Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

zipWith' to packZipWith as per #208 #295

Merged
merged 13 commits into from
Oct 11, 2020
Merged
17 changes: 7 additions & 10 deletions Data/ByteString.hs
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,7 @@ module Data.ByteString (
-- * Zipping and unzipping ByteStrings
zip, -- :: ByteString -> ByteString -> [(Word8,Word8)]
zipWith, -- :: (Word8 -> Word8 -> c) -> ByteString -> ByteString -> [c]
packZipWith, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString -> ByteString
unzip, -- :: [(Word8,Word8)] -> (ByteString,ByteString)

-- * Ordered ByteStrings
Expand Down Expand Up @@ -1622,14 +1623,10 @@ zipWith f ps qs
| otherwise = f (unsafeHead ps) (unsafeHead qs) : zipWith f (unsafeTail ps) (unsafeTail qs)
{-# NOINLINE [1] zipWith #-}

--
-- | A specialised version of zipWith for the common case of a
-- simultaneous map over two bytestrings, to build a 3rd. Rewrite rules
-- are used to automatically covert zipWith into zipWith' when a pack is
-- performed on the result of zipWith.
--
zipWith' :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString -> ByteString
zipWith' f (BS fp l) (BS fq m) = unsafeDupablePerformIO $
-- | A specialised version of `zipWith` for the common case of a
-- simultaneous map over two ByteStrings, to build a 3rd.
packZipWith :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString -> ByteString
packZipWith f (BS fp l) (BS fq m) = unsafeDupablePerformIO $
withForeignPtr fp $ \a ->
withForeignPtr fq $ \b ->
create len $ go a b
Expand All @@ -1646,11 +1643,11 @@ zipWith' f (BS fp l) (BS fq m) = unsafeDupablePerformIO $
zipWith_ (n+1) r

len = min l m
{-# INLINE zipWith' #-}
{-# INLINE packZipWith #-}

{-# RULES
"ByteString specialise zipWith" forall (f :: Word8 -> Word8 -> Word8) p q .
zipWith f p q = unpack (zipWith' f p q)
zipWith f p q = unpack (packZipWith f p q)
#-}

-- | /O(n)/ 'unzip' transforms a list of pairs of bytes into a pair of
Expand Down
9 changes: 9 additions & 0 deletions Data/ByteString/Char8.hs
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,7 @@ module Data.ByteString.Char8 (
-- * Zipping and unzipping ByteStrings
zip, -- :: ByteString -> ByteString -> [(Char,Char)]
zipWith, -- :: (Char -> Char -> c) -> ByteString -> ByteString -> [c]
packZipWith, -- :: (Char -> Char -> Char) -> ByteString -> ByteString -> ByteString
unzip, -- :: [(Char,Char)] -> (ByteString,ByteString)

-- * Ordered ByteStrings
Expand Down Expand Up @@ -837,6 +838,14 @@ zip ps qs
zipWith :: (Char -> Char -> a) -> ByteString -> ByteString -> [a]
zipWith f = B.zipWith ((. w2c) . f . w2c)

-- | A specialised version of `zipWith` for the common case of a
-- simultaneous map over two ByteStrings, to build a 3rd.
packZipWith :: (Char -> Char -> Char) -> ByteString -> ByteString -> ByteString
Bodigrim marked this conversation as resolved.
Show resolved Hide resolved
packZipWith f = B.packZipWith f'
where
f' c1 c2 = c2w $ f (w2c c1) (w2c c2)
{-# INLINE packZipWith #-}

-- | 'unzip' transforms a list of pairs of Chars into a pair of
-- ByteStrings. Note that this performs two 'pack' operations.
unzip :: [(Char,Char)] -> (ByteString,ByteString)
Expand Down
13 changes: 13 additions & 0 deletions Data/ByteString/Lazy.hs
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ module Data.ByteString.Lazy (
-- * Zipping and unzipping ByteStrings
zip, -- :: ByteString -> ByteString -> [(Word8,Word8)]
zipWith, -- :: (Word8 -> Word8 -> c) -> ByteString -> ByteString -> [c]
packZipWith, -- :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString -> ByteString
unzip, -- :: [(Word8,Word8)] -> (ByteString,ByteString)

-- * Ordered ByteStrings
Expand Down Expand Up @@ -1131,6 +1132,18 @@ zipWith f (Chunk a as) (Chunk b bs) = go a as b bs
to _ (Chunk x' xs) y ys | not (S.null y) = go x' xs y ys
to _ (Chunk x' xs) _ (Chunk y' ys) = go x' xs y' ys

-- | A specialised version of `zipWith` for the common case of a
-- simultaneous map over two ByteStrings, to build a 3rd.
packZipWith :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString -> ByteString
packZipWith _ Empty _ = Empty
packZipWith _ _ Empty = Empty
packZipWith f (Chunk a@(S.BS _ al) as) (Chunk b@(S.BS _ bl) bs) = Chunk (S.packZipWith f a b) $
case compare al bl of
LT -> packZipWith f as $ Chunk (S.drop al b) bs
EQ -> packZipWith f as bs
GT -> packZipWith f (Chunk (S.drop bl a) as) bs
{-# INLINE packZipWith #-}

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about adding a RULE similar to the one for the strict version?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Given that in future we intend to decomission the rewrite rule, we can probably remove the sentence advertising it already.

  • Bodigrim

Don't know how I feel about adding a rule only for it to be decommisioned later, still added the rule to Data.ByteString,Lazy though

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@sjakobi As discussed in #208, such rule is harmful when the output of zipWith is consumed lazily. We cannot drop the existing rule for strict zipWith lightly, without further discussion, but from my perspective we should not introduce it for lazy bytestrings.

Copy link
Contributor

@Bodigrim Bodigrim Oct 10, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of copying a rule from the strict version, I would rather add

{-# RULES "Specialise zipWith" pack (zipWith f p q) = packZipWith f p q #-} 

(both for strict and lazy bytestrings)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

After reading #208 again I agree. I changed the rewrite rule

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Data/ByteString.hs:1649:1: warning: [-Winline-rule-shadowing]
    Rule "ByteString specialise zipWith" may never fire
      because ‘pack’ might inline first
    Probable fix: add an INLINE[n] or NOINLINE[n] pragma for ‘pack’
     |
1649 | "ByteString specialise zipWith" forall (f :: Word8 -> Word8 -> Word8) p q .
     | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^...

GHC is complaining. I'm not that read up on the inline/noinline pragmas. What needs to be changed?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added {-# INLINE [1] pack #-} to Data.ByteString and Data.ByteString.Lazy since they only resolve to packBytes and I'm sure it would get inlined anyways.

I added {-# INLINE [1] zipWith #-} to Data.ByteString.Lazy but I'm not so sure about the performance characteristics of that. Shouldn't be too big of an issue since it just resolves to go a as b bs.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for experimenting. I don't mind {-# INLINE [1] zipWith #-}, but not quite sure about {-# INLINE [1] pack #-}. It seems innocent, but deserves a thorough investigation of the impact, because pack is used pretty much everywhere.

To stop going down the rabbit hole, let's reset back to d93cdd4 and be with it for now. Rules issue can be resolved in a separate PR later. Sorry that it became so laborious.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Apologies for the confusion – I had missed the discussion in #208.

-- | /O(n)/ 'unzip' transforms a list of pairs of bytes into a pair of
-- ByteStrings. Note that this performs two 'pack' operations.
unzip :: [(Word8,Word8)] -> (ByteString,ByteString)
Expand Down
9 changes: 9 additions & 0 deletions Data/ByteString/Lazy/Char8.hs
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ module Data.ByteString.Lazy.Char8 (
-- * Zipping and unzipping ByteStrings
zip, -- :: ByteString -> ByteString -> [(Char,Char)]
zipWith, -- :: (Char -> Char -> c) -> ByteString -> ByteString -> [c]
packZipWith, -- :: (Char -> Char -> Char) -> ByteString -> ByteString -> ByteString
-- unzip, -- :: [(Char,Char)] -> (ByteString,ByteString)

-- * Ordered ByteStrings
Expand Down Expand Up @@ -683,6 +684,14 @@ zip ps qs
zipWith :: (Char -> Char -> a) -> ByteString -> ByteString -> [a]
zipWith f = L.zipWith ((. w2c) . f . w2c)

-- | A specialised version of `zipWith` for the common case of a
-- simultaneous map over two ByteStrings, to build a 3rd.
packZipWith :: (Char -> Char -> Char) -> ByteString -> ByteString -> ByteString
packZipWith f = L.packZipWith f'
where
f' c1 c2 = c2w $ f (w2c c1) (w2c c2)
{-# INLINE packZipWith #-}

-- | 'lines' breaks a ByteString up into a list of ByteStrings at
-- newline Chars (@'\\n'@). The resulting strings do not contain newlines.
--
Expand Down
30 changes: 20 additions & 10 deletions tests/Properties.hs
Original file line number Diff line number Diff line change
Expand Up @@ -439,6 +439,8 @@ prop_unfoldrBL =
((\n f a -> take n $
unfoldr f a) :: Int -> (X -> Maybe (W,X)) -> X -> [W])

prop_packZipWithBL = L.packZipWith `eq3` (zipWith :: (W -> W -> W) -> [W] -> [W] -> [W])

--
-- And finally, check correspondance between Data.ByteString and List
--
Expand Down Expand Up @@ -540,7 +542,7 @@ prop_scanr1CL f = eqnotnull2
(scanr1 :: (Char -> Char -> Char) -> [Char] -> [Char])
(castFn f)

-- prop_zipWithPL' = P.zipWith' `eq3` (zipWith :: (W -> W -> W) -> [W] -> [W] -> [W])
prop_packZipWithPL = P.packZipWith `eq3` (zipWith :: (W -> W -> W) -> [W] -> [W] -> [W])

prop_zipWithPL = (P.zipWith :: (W -> W -> X) -> P -> P -> [X]) `eq3`
(zipWith :: (W -> W -> X) -> [W] -> [W] -> [X])
Expand Down Expand Up @@ -1350,7 +1352,12 @@ prop_zip1BB xs ys = P.zip xs ys == zip (P.unpack xs) (P.unpack ys)
prop_zipWithBB xs ys = P.zipWith (,) xs ys == P.zip xs ys
prop_zipWithCC xs ys = C.zipWith (,) xs ys == C.zip xs ys
prop_zipWithLC xs ys = LC.zipWith (,) xs ys == LC.zip xs ys
-- prop_zipWith'BB xs ys = P.pack (P.zipWith (+) xs ys) == P.zipWith' (+) xs ys

prop_packZipWithBB f xs ys = P.pack (P.zipWith f xs ys) == P.packZipWith f xs ys
prop_packZipWithLL f xs ys = L.pack (L.zipWith f xs ys) == L.packZipWith f xs ys
prop_packZipWithBC f xs ys = C.pack (C.zipWith f xs ys) == C.packZipWith f xs ys
prop_packZipWithLC f xs ys = LC.pack (LC.zipWith f xs ys) == LC.packZipWith f xs ys


prop_unzipBB x = let (xs,ys) = unzip x in (P.pack xs, P.pack ys) == P.unzip x

Expand Down Expand Up @@ -1887,6 +1894,7 @@ bl_tests =
, testProperty "elemIndexEnd"prop_elemIndexEndBL
, testProperty "elemIndices" prop_elemIndicesBL
, testProperty "concatMap" prop_concatMapBL
, testProperty "zipWith/packZipWithLazy" prop_packZipWithBL
]

------------------------------------------------------------------------
Expand Down Expand Up @@ -2076,10 +2084,9 @@ pl_tests =
, testProperty "unzip" prop_unzipPL
, testProperty "unzip" prop_unzipLL
, testProperty "unzip" prop_unzipCL
, testProperty "zipWith" prop_zipWithPL
-- , testProperty "zipWith" prop_zipWithCL
, testProperty "zipWith rules" prop_zipWithPL_rules
-- , testProperty "zipWith/zipWith'" prop_zipWithPL'
, testProperty "zipWithPL" prop_zipWithPL
, testProperty "zipWithPL rules" prop_zipWithPL_rules
, testProperty "packZipWithPL" prop_packZipWithPL

, testProperty "isPrefixOf" prop_isPrefixOfPL
, testProperty "isSuffixOf" prop_isSuffixOfPL
Expand Down Expand Up @@ -2330,10 +2337,13 @@ bb_tests =
, testProperty "zip" prop_zipBB
, testProperty "zip" prop_zipLC
, testProperty "zip1" prop_zip1BB
, testProperty "zipWith" prop_zipWithBB
, testProperty "zipWith" prop_zipWithCC
, testProperty "zipWith" prop_zipWithLC
-- , testProperty "zipWith'" prop_zipWith'BB
, testProperty "zipWithBB" prop_zipWithBB
, testProperty "zipWithCC" prop_zipWithCC
, testProperty "zipWithLC" prop_zipWithLC
, testProperty "packZipWithBB" prop_packZipWithBB
, testProperty "packZipWithLL" prop_packZipWithLL
, testProperty "packZipWithBC" prop_packZipWithBC
, testProperty "packZipWithLC" prop_packZipWithLC
, testProperty "unzip" prop_unzipBB
, testProperty "concatMap" prop_concatMapBB
-- , testProperty "join/joinByte" prop_join_spec
Expand Down