Skip to content

Commit

Permalink
Add invariant checking (#444)
Browse files Browse the repository at this point in the history
Context: #366
  • Loading branch information
sjakobi authored May 3, 2022
1 parent 163a3ec commit 70fdaa7
Show file tree
Hide file tree
Showing 4 changed files with 207 additions and 13 deletions.
35 changes: 35 additions & 0 deletions Data/HashMap/Internal.hs
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,10 @@ module Data.HashMap.Internal
, mask
, index
, bitsPerSubkey
, maxChildren
, isLeafOrCollision
, fullBitmap
, subkeyMask
, nextShift
, sparseIndex
, two
Expand Down Expand Up @@ -210,10 +213,42 @@ instance NFData2 Leaf where
-- each key can map to at most one value.
data HashMap k v
= Empty
-- ^ Invariants:
--
-- * 'Empty' is not a valid sub-node. It can only appear at the root. (INV1)
| BitmapIndexed !Bitmap !(A.Array (HashMap k v))
-- ^ Invariants:
--
-- * Only the lower @maxChildren@ bits of the 'Bitmap' may be set. The
-- remaining upper bits must be 0. (INV2)
-- * The array of a 'BitmapIndexed' node stores at least 1 and at most
-- @'maxChildren' - 1@ sub-nodes. (INV3)
-- * The number of sub-nodes is equal to the number of 1-bits in its
-- 'Bitmap'. (INV4)
-- * If a 'BitmapIndexed' node has only one sub-node, this sub-node must
-- be a 'BitmapIndexed' or a 'Full' node. (INV5)
| Leaf !Hash !(Leaf k v)
-- ^ Invariants:
--
-- * The location of a 'Leaf' or 'Collision' node in the tree must be
-- compatible with its 'Hash'. (INV6)
-- (TODO: Document this properly (#425))
-- * The 'Hash' of a 'Leaf' node must be the 'hash' of its key. (INV7)
| Full !(A.Array (HashMap k v))
-- ^ Invariants:
--
-- * The array of a 'Full' node stores exactly 'maxChildren' sub-nodes. (INV8)
| Collision !Hash !(A.Array (Leaf k v))
-- ^ Invariants:
--
-- * The location of a 'Leaf' or 'Collision' node in the tree must be
-- compatible with its 'Hash'. (INV6)
-- (TODO: Document this properly (#425))
-- * The array of a 'Collision' node must contain at least two sub-nodes. (INV9)
-- * The 'hash' of each key in a 'Collision' node must be the one stored in
-- the node. (INV7)
-- * No two keys stored in a 'Collision' can be equal according to their
-- 'Eq' instance. (INV10)

type role HashMap nominal representational

Expand Down
149 changes: 149 additions & 0 deletions Data/HashMap/Internal/Debug.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
{-# LANGUAGE CPP #-}
{-# LANGUAGE TypeApplications #-}

-- | = WARNING
--
-- This module is considered __internal__.
--
-- The Package Versioning Policy __does not apply__.
--
-- The contents of this module may change __in any way whatsoever__
-- and __without any warning__ between minor versions of this package.
--
-- Authors importing this module are expected to track development
-- closely.
--
-- = Description
--
-- Debugging utilities for 'HashMap's.

module Data.HashMap.Internal.Debug
( valid
, Validity(..)
, Error(..)
, SubHash
, SubHashPath
) where

import Data.Bits (complement, countTrailingZeros, popCount, shiftL,
unsafeShiftL, (.&.), (.|.))
import Data.Hashable (Hashable)
import Data.HashMap.Internal (Bitmap, Hash, HashMap (..), Leaf (..),
bitsPerSubkey, fullBitmap, hash,
isLeafOrCollision, maxChildren, sparseIndex)
import Data.Semigroup (Sum (..))

import qualified Data.HashMap.Internal.Array as A


#if !MIN_VERSION_base(4,11,0)
import Data.Semigroup (Semigroup (..))
#endif

data Validity k = Invalid (Error k) SubHashPath | Valid
deriving (Eq, Show)

instance Semigroup (Validity k) where
Valid <> y = y
x <> _ = x

instance Monoid (Validity k) where
mempty = Valid
mappend = (<>)

-- | An error corresponding to a broken invariant.
--
-- See 'HashMap' for the documentation of the invariants.
data Error k
= INV1_internal_Empty
| INV2_Bitmap_unexpected_1_bits !Bitmap
| INV3_bad_BitmapIndexed_size !Int
| INV4_bitmap_array_size_mismatch !Bitmap !Int
| INV5_BitmapIndexed_invalid_single_subtree
| INV6_misplaced_hash !Hash
| INV7_key_hash_mismatch k !Hash
| INV8_bad_Full_size !Int
| INV9_Collision_size !Int
| INV10_Collision_duplicate_key k !Hash
deriving (Eq, Show)

-- TODO: Name this 'Index'?!
-- (https://github.com/haskell-unordered-containers/unordered-containers/issues/425)
-- | A part of a 'Hash' with 'bitsPerSubkey' bits.
type SubHash = Word

data SubHashPath = SubHashPath
{ partialHash :: !Word
-- ^ The bits we already know, starting from the lower bits.
-- The unknown upper bits are @0@.
, lengthInBits :: !Int
-- ^ The number of bits known.
} deriving (Eq, Show)

initialSubHashPath :: SubHashPath
initialSubHashPath = SubHashPath 0 0

addSubHash :: SubHashPath -> SubHash -> SubHashPath
addSubHash (SubHashPath ph l) sh =
SubHashPath (ph .|. (sh `unsafeShiftL` l)) (l + bitsPerSubkey)

hashMatchesSubHashPath :: SubHashPath -> Hash -> Bool
hashMatchesSubHashPath (SubHashPath ph l) h = maskToLength h l == ph
where
-- Note: This needs to use `shiftL` instead of `unsafeShiftL` because
-- @l'@ may be greater than 32/64 at the deepest level.
maskToLength h' l' = h' .&. complement (complement 0 `shiftL` l')

valid :: Hashable k => HashMap k v -> Validity k
valid Empty = Valid
valid t = validInternal initialSubHashPath t
where
validInternal p Empty = Invalid INV1_internal_Empty p
validInternal p (Leaf h l) = validHash p h <> validLeaf p h l
validInternal p (Collision h ary) = validHash p h <> validCollision p h ary
validInternal p (BitmapIndexed b ary) = validBitmapIndexed p b ary
validInternal p (Full ary) = validFull p ary

validHash p h | hashMatchesSubHashPath p h = Valid
| otherwise = Invalid (INV6_misplaced_hash h) p

validLeaf p h (L k _) | hash k == h = Valid
| otherwise = Invalid (INV7_key_hash_mismatch k h) p

validCollision p h ary = validCollisionSize <> A.foldMap (validLeaf p h) ary <> distinctKeys
where
n = A.length ary
validCollisionSize | n < 2 = Invalid (INV9_Collision_size n) p
| otherwise = Valid
distinctKeys = A.foldMap (\(L k _) -> appearsOnce k) ary
appearsOnce k | A.foldMap (\(L k' _) -> if k' == k then Sum @Int 1 else Sum 0) ary == 1 = Valid
| otherwise = Invalid (INV10_Collision_duplicate_key k h) p

validBitmapIndexed p b ary = validBitmap <> validArraySize <> validSubTrees p b ary
where
validBitmap | b .&. complement fullBitmap == 0 = Valid
| otherwise = Invalid (INV2_Bitmap_unexpected_1_bits b) p
n = A.length ary
validArraySize | n < 1 || n >= maxChildren = Invalid (INV3_bad_BitmapIndexed_size n) p
| popCount b == n = Valid
| otherwise = Invalid (INV4_bitmap_array_size_mismatch b n) p

validSubTrees p b ary
| A.length ary == 1
, isLeafOrCollision (A.index ary 0)
= Invalid INV5_BitmapIndexed_invalid_single_subtree p
| otherwise = go b
where
go 0 = Valid
go b' = validInternal (addSubHash p (fromIntegral c)) (A.index ary i) <> go b''
where
c = countTrailingZeros b'
m = 1 `unsafeShiftL` c
i = sparseIndex b m
b'' = b' .&. complement m

validFull p ary = validArraySize <> validSubTrees p fullBitmap ary
where
n = A.length ary
validArraySize | n == maxChildren = Valid
| otherwise = Invalid (INV8_bad_Full_size n) p
35 changes: 22 additions & 13 deletions tests/Properties/HashMapLazy.hs
Original file line number Diff line number Diff line change
Expand Up @@ -14,20 +14,21 @@

module MODULE_NAME (tests) where

import Control.Applicative (Const (..))
import Control.Monad (guard)
import Control.Applicative (Const (..))
import Control.Monad (guard)
import Data.Bifoldable
import Data.Function (on)
import Data.Functor.Identity (Identity (..))
import Data.Hashable (Hashable (hashWithSalt))
import Data.Ord (comparing)
import Test.QuickCheck (Arbitrary (..), Property, elements, forAll,
property, (===), (==>))
import Test.QuickCheck.Function (Fun, apply)
import Test.QuickCheck.Poly (A, B)
import Test.Tasty (TestTree, testGroup)
import Test.Tasty.QuickCheck (testProperty)
import Util.Key (Key, keyToInt)
import Data.Function (on)
import Data.Functor.Identity (Identity (..))
import Data.Hashable (Hashable (hashWithSalt))
import Data.HashMap.Internal.Debug (Validity (..), valid)
import Data.Ord (comparing)
import Test.QuickCheck (Arbitrary (..), Property, elements, forAll,
property, (===), (==>))
import Test.QuickCheck.Function (Fun, apply)
import Test.QuickCheck.Poly (A, B)
import Test.Tasty (TestTree, testGroup)
import Test.Tasty.QuickCheck (testProperty)
import Util.Key (Key, keyToInt)

import qualified Data.Foldable as Foldable
import qualified Data.List as List
Expand Down Expand Up @@ -319,6 +320,9 @@ pIntersection xs ys =
`eq_` HM.intersection (HM.fromList xs)
$ ys

pIntersectionValid :: HashMap Key () -> HashMap Key () -> Property
pIntersectionValid x y = valid (HM.intersection x y) === Valid

pIntersectionWith :: [(Key, Int)] -> [(Key, Int)] -> Property
pIntersectionWith xs ys = M.intersectionWith (-) (M.fromList xs) `eq_`
HM.intersectionWith (-) (HM.fromList xs) $ ys
Expand Down Expand Up @@ -421,6 +425,9 @@ instance Hashable a => Hashable (Magma a) where
pFromList :: [(Key, Int)] -> Property
pFromList = id `eq_` id

pFromListValid :: [(Key, ())] -> Property
pFromListValid xs = valid (HM.fromList xs) === Valid

pFromListWith :: [(Key, Int)] -> Property
pFromListWith kvs = (M.toAscList $ M.fromListWith Op kvsM) ===
(toAscList $ HM.fromListWith Op kvsM)
Expand Down Expand Up @@ -529,6 +536,7 @@ tests =
[ testProperty "difference" pDifference
, testProperty "differenceWith" pDifferenceWith
, testProperty "intersection" pIntersection
, testProperty "intersection produces valid HashMap" pIntersectionValid
, testProperty "intersectionWith" pIntersectionWith
, testProperty "intersectionWithKey" pIntersectionWithKey
]
Expand All @@ -544,6 +552,7 @@ tests =
[ testProperty "elems" pElems
, testProperty "keys" pKeys
, testProperty "fromList" pFromList
, testProperty "fromList.valid" pFromListValid
, testProperty "fromListWith" pFromListWith
, testProperty "fromListWithKey" pFromListWithKey
, testProperty "toList" pToList
Expand Down
1 change: 1 addition & 0 deletions unordered-containers.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ library
exposed-modules:
Data.HashMap.Internal
Data.HashMap.Internal.Array
Data.HashMap.Internal.Debug
Data.HashMap.Internal.List
Data.HashMap.Internal.Strict
Data.HashMap.Lazy
Expand Down

0 comments on commit 70fdaa7

Please sign in to comment.