From 70fdaa75b6e7660dcefdf217702390596d9df43c Mon Sep 17 00:00:00 2001 From: Simon Jakobi Date: Tue, 3 May 2022 16:31:41 +0200 Subject: [PATCH] Add invariant checking (#444) Context: #366 --- Data/HashMap/Internal.hs | 35 ++++++++ Data/HashMap/Internal/Debug.hs | 149 ++++++++++++++++++++++++++++++++ tests/Properties/HashMapLazy.hs | 35 +++++--- unordered-containers.cabal | 1 + 4 files changed, 207 insertions(+), 13 deletions(-) create mode 100644 Data/HashMap/Internal/Debug.hs diff --git a/Data/HashMap/Internal.hs b/Data/HashMap/Internal.hs index 1a37d77d..e982fbcf 100644 --- a/Data/HashMap/Internal.hs +++ b/Data/HashMap/Internal.hs @@ -117,7 +117,10 @@ module Data.HashMap.Internal , mask , index , bitsPerSubkey + , maxChildren + , isLeafOrCollision , fullBitmap + , subkeyMask , nextShift , sparseIndex , two @@ -210,10 +213,42 @@ instance NFData2 Leaf where -- each key can map to at most one value. data HashMap k v = Empty + -- ^ Invariants: + -- + -- * 'Empty' is not a valid sub-node. It can only appear at the root. (INV1) | BitmapIndexed !Bitmap !(A.Array (HashMap k v)) + -- ^ Invariants: + -- + -- * Only the lower @maxChildren@ bits of the 'Bitmap' may be set. The + -- remaining upper bits must be 0. (INV2) + -- * The array of a 'BitmapIndexed' node stores at least 1 and at most + -- @'maxChildren' - 1@ sub-nodes. (INV3) + -- * The number of sub-nodes is equal to the number of 1-bits in its + -- 'Bitmap'. (INV4) + -- * If a 'BitmapIndexed' node has only one sub-node, this sub-node must + -- be a 'BitmapIndexed' or a 'Full' node. (INV5) | Leaf !Hash !(Leaf k v) + -- ^ Invariants: + -- + -- * The location of a 'Leaf' or 'Collision' node in the tree must be + -- compatible with its 'Hash'. (INV6) + -- (TODO: Document this properly (#425)) + -- * The 'Hash' of a 'Leaf' node must be the 'hash' of its key. (INV7) | Full !(A.Array (HashMap k v)) + -- ^ Invariants: + -- + -- * The array of a 'Full' node stores exactly 'maxChildren' sub-nodes. (INV8) | Collision !Hash !(A.Array (Leaf k v)) + -- ^ Invariants: + -- + -- * The location of a 'Leaf' or 'Collision' node in the tree must be + -- compatible with its 'Hash'. (INV6) + -- (TODO: Document this properly (#425)) + -- * The array of a 'Collision' node must contain at least two sub-nodes. (INV9) + -- * The 'hash' of each key in a 'Collision' node must be the one stored in + -- the node. (INV7) + -- * No two keys stored in a 'Collision' can be equal according to their + -- 'Eq' instance. (INV10) type role HashMap nominal representational diff --git a/Data/HashMap/Internal/Debug.hs b/Data/HashMap/Internal/Debug.hs new file mode 100644 index 00000000..e09901e8 --- /dev/null +++ b/Data/HashMap/Internal/Debug.hs @@ -0,0 +1,149 @@ +{-# LANGUAGE CPP #-} +{-# LANGUAGE TypeApplications #-} + +-- | = WARNING +-- +-- This module is considered __internal__. +-- +-- The Package Versioning Policy __does not apply__. +-- +-- The contents of this module may change __in any way whatsoever__ +-- and __without any warning__ between minor versions of this package. +-- +-- Authors importing this module are expected to track development +-- closely. +-- +-- = Description +-- +-- Debugging utilities for 'HashMap's. + +module Data.HashMap.Internal.Debug + ( valid + , Validity(..) + , Error(..) + , SubHash + , SubHashPath + ) where + +import Data.Bits (complement, countTrailingZeros, popCount, shiftL, + unsafeShiftL, (.&.), (.|.)) +import Data.Hashable (Hashable) +import Data.HashMap.Internal (Bitmap, Hash, HashMap (..), Leaf (..), + bitsPerSubkey, fullBitmap, hash, + isLeafOrCollision, maxChildren, sparseIndex) +import Data.Semigroup (Sum (..)) + +import qualified Data.HashMap.Internal.Array as A + + +#if !MIN_VERSION_base(4,11,0) +import Data.Semigroup (Semigroup (..)) +#endif + +data Validity k = Invalid (Error k) SubHashPath | Valid + deriving (Eq, Show) + +instance Semigroup (Validity k) where + Valid <> y = y + x <> _ = x + +instance Monoid (Validity k) where + mempty = Valid + mappend = (<>) + +-- | An error corresponding to a broken invariant. +-- +-- See 'HashMap' for the documentation of the invariants. +data Error k + = INV1_internal_Empty + | INV2_Bitmap_unexpected_1_bits !Bitmap + | INV3_bad_BitmapIndexed_size !Int + | INV4_bitmap_array_size_mismatch !Bitmap !Int + | INV5_BitmapIndexed_invalid_single_subtree + | INV6_misplaced_hash !Hash + | INV7_key_hash_mismatch k !Hash + | INV8_bad_Full_size !Int + | INV9_Collision_size !Int + | INV10_Collision_duplicate_key k !Hash + deriving (Eq, Show) + +-- TODO: Name this 'Index'?! +-- (https://github.com/haskell-unordered-containers/unordered-containers/issues/425) +-- | A part of a 'Hash' with 'bitsPerSubkey' bits. +type SubHash = Word + +data SubHashPath = SubHashPath + { partialHash :: !Word + -- ^ The bits we already know, starting from the lower bits. + -- The unknown upper bits are @0@. + , lengthInBits :: !Int + -- ^ The number of bits known. + } deriving (Eq, Show) + +initialSubHashPath :: SubHashPath +initialSubHashPath = SubHashPath 0 0 + +addSubHash :: SubHashPath -> SubHash -> SubHashPath +addSubHash (SubHashPath ph l) sh = + SubHashPath (ph .|. (sh `unsafeShiftL` l)) (l + bitsPerSubkey) + +hashMatchesSubHashPath :: SubHashPath -> Hash -> Bool +hashMatchesSubHashPath (SubHashPath ph l) h = maskToLength h l == ph + where + -- Note: This needs to use `shiftL` instead of `unsafeShiftL` because + -- @l'@ may be greater than 32/64 at the deepest level. + maskToLength h' l' = h' .&. complement (complement 0 `shiftL` l') + +valid :: Hashable k => HashMap k v -> Validity k +valid Empty = Valid +valid t = validInternal initialSubHashPath t + where + validInternal p Empty = Invalid INV1_internal_Empty p + validInternal p (Leaf h l) = validHash p h <> validLeaf p h l + validInternal p (Collision h ary) = validHash p h <> validCollision p h ary + validInternal p (BitmapIndexed b ary) = validBitmapIndexed p b ary + validInternal p (Full ary) = validFull p ary + + validHash p h | hashMatchesSubHashPath p h = Valid + | otherwise = Invalid (INV6_misplaced_hash h) p + + validLeaf p h (L k _) | hash k == h = Valid + | otherwise = Invalid (INV7_key_hash_mismatch k h) p + + validCollision p h ary = validCollisionSize <> A.foldMap (validLeaf p h) ary <> distinctKeys + where + n = A.length ary + validCollisionSize | n < 2 = Invalid (INV9_Collision_size n) p + | otherwise = Valid + distinctKeys = A.foldMap (\(L k _) -> appearsOnce k) ary + appearsOnce k | A.foldMap (\(L k' _) -> if k' == k then Sum @Int 1 else Sum 0) ary == 1 = Valid + | otherwise = Invalid (INV10_Collision_duplicate_key k h) p + + validBitmapIndexed p b ary = validBitmap <> validArraySize <> validSubTrees p b ary + where + validBitmap | b .&. complement fullBitmap == 0 = Valid + | otherwise = Invalid (INV2_Bitmap_unexpected_1_bits b) p + n = A.length ary + validArraySize | n < 1 || n >= maxChildren = Invalid (INV3_bad_BitmapIndexed_size n) p + | popCount b == n = Valid + | otherwise = Invalid (INV4_bitmap_array_size_mismatch b n) p + + validSubTrees p b ary + | A.length ary == 1 + , isLeafOrCollision (A.index ary 0) + = Invalid INV5_BitmapIndexed_invalid_single_subtree p + | otherwise = go b + where + go 0 = Valid + go b' = validInternal (addSubHash p (fromIntegral c)) (A.index ary i) <> go b'' + where + c = countTrailingZeros b' + m = 1 `unsafeShiftL` c + i = sparseIndex b m + b'' = b' .&. complement m + + validFull p ary = validArraySize <> validSubTrees p fullBitmap ary + where + n = A.length ary + validArraySize | n == maxChildren = Valid + | otherwise = Invalid (INV8_bad_Full_size n) p diff --git a/tests/Properties/HashMapLazy.hs b/tests/Properties/HashMapLazy.hs index e7bc9bab..4532d154 100644 --- a/tests/Properties/HashMapLazy.hs +++ b/tests/Properties/HashMapLazy.hs @@ -14,20 +14,21 @@ module MODULE_NAME (tests) where -import Control.Applicative (Const (..)) -import Control.Monad (guard) +import Control.Applicative (Const (..)) +import Control.Monad (guard) import Data.Bifoldable -import Data.Function (on) -import Data.Functor.Identity (Identity (..)) -import Data.Hashable (Hashable (hashWithSalt)) -import Data.Ord (comparing) -import Test.QuickCheck (Arbitrary (..), Property, elements, forAll, - property, (===), (==>)) -import Test.QuickCheck.Function (Fun, apply) -import Test.QuickCheck.Poly (A, B) -import Test.Tasty (TestTree, testGroup) -import Test.Tasty.QuickCheck (testProperty) -import Util.Key (Key, keyToInt) +import Data.Function (on) +import Data.Functor.Identity (Identity (..)) +import Data.Hashable (Hashable (hashWithSalt)) +import Data.HashMap.Internal.Debug (Validity (..), valid) +import Data.Ord (comparing) +import Test.QuickCheck (Arbitrary (..), Property, elements, forAll, + property, (===), (==>)) +import Test.QuickCheck.Function (Fun, apply) +import Test.QuickCheck.Poly (A, B) +import Test.Tasty (TestTree, testGroup) +import Test.Tasty.QuickCheck (testProperty) +import Util.Key (Key, keyToInt) import qualified Data.Foldable as Foldable import qualified Data.List as List @@ -319,6 +320,9 @@ pIntersection xs ys = `eq_` HM.intersection (HM.fromList xs) $ ys +pIntersectionValid :: HashMap Key () -> HashMap Key () -> Property +pIntersectionValid x y = valid (HM.intersection x y) === Valid + pIntersectionWith :: [(Key, Int)] -> [(Key, Int)] -> Property pIntersectionWith xs ys = M.intersectionWith (-) (M.fromList xs) `eq_` HM.intersectionWith (-) (HM.fromList xs) $ ys @@ -421,6 +425,9 @@ instance Hashable a => Hashable (Magma a) where pFromList :: [(Key, Int)] -> Property pFromList = id `eq_` id +pFromListValid :: [(Key, ())] -> Property +pFromListValid xs = valid (HM.fromList xs) === Valid + pFromListWith :: [(Key, Int)] -> Property pFromListWith kvs = (M.toAscList $ M.fromListWith Op kvsM) === (toAscList $ HM.fromListWith Op kvsM) @@ -529,6 +536,7 @@ tests = [ testProperty "difference" pDifference , testProperty "differenceWith" pDifferenceWith , testProperty "intersection" pIntersection + , testProperty "intersection produces valid HashMap" pIntersectionValid , testProperty "intersectionWith" pIntersectionWith , testProperty "intersectionWithKey" pIntersectionWithKey ] @@ -544,6 +552,7 @@ tests = [ testProperty "elems" pElems , testProperty "keys" pKeys , testProperty "fromList" pFromList + , testProperty "fromList.valid" pFromListValid , testProperty "fromListWith" pFromListWith , testProperty "fromListWithKey" pFromListWithKey , testProperty "toList" pToList diff --git a/unordered-containers.cabal b/unordered-containers.cabal index 7b4489d2..0271e963 100644 --- a/unordered-containers.cabal +++ b/unordered-containers.cabal @@ -45,6 +45,7 @@ library exposed-modules: Data.HashMap.Internal Data.HashMap.Internal.Array + Data.HashMap.Internal.Debug Data.HashMap.Internal.List Data.HashMap.Internal.Strict Data.HashMap.Lazy