Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add lazy dropEnd and friends #395

Merged
merged 17 commits into from
Jul 28, 2021
118 changes: 118 additions & 0 deletions Data/ByteString/Lazy.hs
Original file line number Diff line number Diff line change
Expand Up @@ -127,12 +127,18 @@ module Data.ByteString.Lazy (

-- ** Breaking strings
take,
takeEnd,
drop,
dropEnd,
splitAt,
takeWhile,
takeWhileEnd,
dropWhile,
dropWhileEnd,
span,
spanEnd,
break,
breakEnd,
group,
groupBy,
inits,
Expand Down Expand Up @@ -684,6 +690,28 @@ take i cs0 = take' i cs0
then Chunk (S.take (fromIntegral n) c) Empty
else Chunk c (take' (n - fromIntegral (S.length c)) cs)

-- | /O(c)/ @'takeEnd' n xs@ is equivalent to @'drop' ('length' xs - n) xs@.
-- Takes @n@ elements from end of bytestring.
--
-- >>> takeEnd 3 "abcdefg"
-- "efg"
-- >>> takeEnd 0 "abcdefg"
-- ""
-- >>> takeEnd 4 "abc"
-- "abc"
--
-- @since 0.11.2.0
takeEnd :: Int64 -> ByteString -> ByteString
takeEnd i _ | i <= 0 = Empty
takeEnd i cs0 = takeEnd' i cs0
where takeEnd' 0 _ = Empty
takeEnd' n cs =
snd $ foldrChunks takeTuple (fromIntegral n,Empty) cs
3kyro marked this conversation as resolved.
Show resolved Hide resolved
takeTuple _ (0, cs) = (0, cs)
takeTuple c (n, cs)
| n > S.length c = (n - S.length c, Chunk c cs)
| otherwise = (0, Chunk (S.takeEnd n c) cs)

-- | /O(n\/c)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@
-- elements, or @[]@ if @n > 'length' xs@.
drop :: Int64 -> ByteString -> ByteString
Expand All @@ -696,6 +724,28 @@ drop i cs0 = drop' i cs0
then Chunk (S.drop (fromIntegral n) c) cs
else drop' (n - fromIntegral (S.length c)) cs

-- | /O(c)/ @'dropEnd' n xs@ is equivalent to @'take' ('length' xs - n) xs@.
-- Drops @n@ elements from end of bytestring.
--
-- >>> dropEnd 3 "abcdefg"
-- "abcd"
-- >>> dropEnd 0 "abcdefg"
-- "abcdefg"
-- >>> dropEnd 4 "abc"
-- ""
--
-- @since 0.11.2.0
dropEnd :: Int64 -> ByteString -> ByteString
dropEnd i p | i <= 0 = p
dropEnd i cs0 = dropEnd' i cs0
3kyro marked this conversation as resolved.
Show resolved Hide resolved
where dropEnd' 0 cs = cs
dropEnd' n bs =
snd $ foldrChunks dropTuple (fromIntegral n, Empty) bs
dropTuple c (0, cs) = (0, Chunk c cs)
dropTuple c (n, _)
| n > S.length c = (n - S.length c, Empty)
| otherwise = (0, fromStrict (S.dropEnd n c))

-- | /O(n\/c)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@.
splitAt :: Int64 -> ByteString -> (ByteString, ByteString)
splitAt i cs0 | i <= 0 = (Empty, cs0)
Expand All @@ -722,6 +772,23 @@ takeWhile f = takeWhile'
n | n < S.length c -> Chunk (S.take n c) Empty
| otherwise -> Chunk c (takeWhile' cs)

-- | Returns the longest (possibly empty) suffix of elements
-- satisfying the predicate.
--
-- @'takeWhileEnd' p@ is equivalent to @'reverse' . 'takeWhile' p . 'reverse'@.
Comment on lines +806 to +809
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

An example would be nice to have here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've used Chunk (pack [1,2]) (Chunk (pack [3,4,6])) Empty as an example of a lazy bytestring. Hope it's not too verbose, but textual representation of a bytestring is always tricky.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think that would be helpful for users who will mostly not be aware of ByteString's internal constructors. If you want to represent the bytes as numbers, you can use the OverloadedLists syntax, e.g. [1,2,3,4,6].

--
-- @since 0.11.2.0
takeWhileEnd :: (Word8 -> Bool) -> ByteString -> ByteString
takeWhileEnd f = takeWhileEnd'
where takeWhileEnd' Empty = Empty
takeWhileEnd' cs =
snd $ foldrChunks takeTuple (True,Empty) cs
takeTuple _ (False, bs) = (False,bs)
takeTuple c (True,bs) =
case S.takeWhileEnd f c of
c' | S.length c' == S.length c -> (True, Chunk c bs)
| otherwise -> (False, fromStrict c' `append` bs)

-- | Similar to 'P.dropWhile',
-- drops the longest (possibly empty) prefix of elements
-- satisfying the predicate and returns the remainder.
Expand All @@ -733,6 +800,21 @@ dropWhile f = dropWhile'
n | n < S.length c -> Chunk (S.drop n c) cs
| otherwise -> dropWhile' cs

-- | Similar to 'P.dropWhileEnd',
-- drops the longest (possibly empty) suffix of elements
-- satisfying the predicate and returns the remainder.
--
-- @'dropWhileEnd' p@ is equivalent to @'reverse' . 'dropWhile' p . 'reverse'@.
3kyro marked this conversation as resolved.
Show resolved Hide resolved
--
-- @since 0.11.2.0
dropWhileEnd :: (Word8 -> Bool) -> ByteString -> ByteString
dropWhileEnd f = dropWhileEnd'
where dropWhileEnd' Empty = Empty
3kyro marked this conversation as resolved.
Show resolved Hide resolved
dropWhileEnd' (Chunk c bs) =
case dropWhileEnd' bs of
Empty -> fromStrict (S.dropWhileEnd f c)
bs' -> Chunk c bs'

-- | Similar to 'P.break',
-- returns the longest (possibly empty) prefix of elements which __do not__
-- satisfy the predicate and the remainder of the string.
Expand All @@ -750,6 +832,23 @@ break f = break'
| otherwise -> let (cs', cs'') = break' cs
in (Chunk c cs', cs'')


-- | Returns the longest (possibly empty) suffix of elements which __do not__
-- satisfy the predicate and the remainder of the string.
--
-- 'breakEnd' @p@ is equivalent to @'spanEnd' (not . p)@ and to @('takeWhileEnd' (not . p) &&& 'dropWhileEnd' (not . p))@.
--
-- @since 0.11.2.0
breakEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
breakEnd f = breakEnd'
where breakEnd' Empty = (Empty, Empty)
breakEnd' (Chunk c cs) =
case breakEnd' cs of
(Empty, cs') ->
let (c', c'') = S.breakEnd f c
in (fromStrict c', fromStrict c'' `append` cs')
3kyro marked this conversation as resolved.
Show resolved Hide resolved
(cs', cs'') -> (Chunk c cs', cs'')

--
-- TODO
--
Expand Down Expand Up @@ -799,6 +898,25 @@ spanByte c (LPS ps) = case (spanByte' ps) of (a,b) -> (LPS a, LPS b)
span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
span p = break (not . p)

-- | Returns the longest (possibly empty) suffix of elements
-- satisfying the predicate and the remainder of the string.
--
-- 'spanEnd' @p@ is equivalent to @'breakEnd' (not . p)@ and to @('takeWhileEnd' p &&& 'dropWhileEnd' p)@.
--
-- We have
--
-- > spanEnd (not . isSpace) "x y z" == ("x y ", "z")
--
-- and
--
-- > spanEnd (not . isSpace) ps
-- > ==
-- > let (x, y) = span (not . isSpace) (reverse ps) in (reverse y, reverse x)
--
-- @since 0.11.2.0
spanEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
spanEnd p = breakEnd (not . p)

-- | /O(n)/ Splits a 'ByteString' into components delimited by
-- separators, where the predicate returns True for a separator element.
-- The resulting components do not contain the separators. Two adjacent
Expand Down
54 changes: 53 additions & 1 deletion Data/ByteString/Lazy/Char8.hs
Original file line number Diff line number Diff line change
Expand Up @@ -105,12 +105,18 @@ module Data.ByteString.Lazy.Char8 (

-- ** Breaking strings
take,
takeEnd,
drop,
dropEnd,
splitAt,
takeWhile,
takeWhileEnd,
dropWhile,
dropWhileEnd,
span,
spanEnd,
break,
breakEnd,
group,
groupBy,
inits,
Expand Down Expand Up @@ -203,7 +209,7 @@ module Data.ByteString.Lazy.Char8 (
import Data.ByteString.Lazy
(fromChunks, toChunks
,empty,null,length,tail,init,append,reverse,transpose,cycle
,concat,take,drop,splitAt,intercalate
,concat,take,takeEnd,drop,dropEnd,splitAt,intercalate
,isPrefixOf,isSuffixOf,group,inits,tails,copy
,stripPrefix,stripSuffix
,hGetContents, hGet, hPut, getContents
Expand Down Expand Up @@ -456,22 +462,68 @@ takeWhile :: (Char -> Bool) -> ByteString -> ByteString
takeWhile f = L.takeWhile (f . w2c)
{-# INLINE takeWhile #-}

-- | Returns the longest (possibly empty) suffix of elements
-- satisfying the predicate.
--
-- @'takeWhileEnd' p@ is equivalent to @'reverse' . 'takeWhile' p . 'reverse'@.
--
-- @since 0.11.2.0
takeWhileEnd :: (Char -> Bool) -> ByteString -> ByteString
takeWhileEnd f = L.takeWhileEnd (f . w2c)
{-# INLINE takeWhileEnd #-}

-- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@.
dropWhile :: (Char -> Bool) -> ByteString -> ByteString
dropWhile f = L.dropWhile (f . w2c)
{-# INLINE dropWhile #-}

-- | Similar to 'P.dropWhileEnd',
-- drops the longest (possibly empty) suffix of elements
-- satisfying the predicate and returns the remainder.
--
-- @'dropWhileEnd' p@ is equivalent to @'reverse' . 'dropWhile' p . 'reverse'@.
--
-- @since 0.11.2.0
dropWhileEnd :: (Char -> Bool) -> ByteString -> ByteString
dropWhileEnd f = L.dropWhileEnd (f . w2c)
{-# INLINE dropWhileEnd #-}

-- | 'break' @p@ is equivalent to @'span' ('not' . p)@.
break :: (Char -> Bool) -> ByteString -> (ByteString, ByteString)
break f = L.break (f . w2c)
{-# INLINE break #-}

-- | 'breakEnd' behaves like 'break' but from the end of the 'ByteString'
--
-- breakEnd p == spanEnd (not.p)
--
-- @since 0.11.2.0
breakEnd :: (Char -> Bool) -> ByteString -> (ByteString, ByteString)
breakEnd f = L.breakEnd (f . w2c)
{-# INLINE breakEnd #-}

-- | 'span' @p xs@ breaks the ByteString into two segments. It is
-- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@
span :: (Char -> Bool) -> ByteString -> (ByteString, ByteString)
span f = L.span (f . w2c)
{-# INLINE span #-}

-- | 'spanEnd' behaves like 'span' but from the end of the 'ByteString'.
-- We have
--
-- > spanEnd (not.isSpace) "x y z" == ("x y ","z")
--
-- and
--
-- > spanEnd (not . isSpace) ps
-- > ==
-- > let (x,y) = span (not.isSpace) (reverse ps) in (reverse y, reverse x)
--
-- @since 0.11.2.0
spanEnd :: (Char -> Bool) -> ByteString -> (ByteString, ByteString)
spanEnd f = L.spanEnd (f . w2c)
{-# INLINE spanEnd #-}

{-
-- | 'breakChar' breaks its ByteString argument at the first occurence
-- of the specified Char. It is more efficient than 'break' as it is
Expand Down
7 changes: 2 additions & 5 deletions tests/Properties/ByteString.hs
Original file line number Diff line number Diff line change
Expand Up @@ -177,11 +177,12 @@ tests =
\(toElem -> c) x -> (B.unpack *** B.unpack) (B.break (/= c) x) === break (/= c) (B.unpack x)
, testProperty "break span" $
\f x -> B.break f x === B.span (not . f) x
#ifndef BYTESTRING_LAZY
, testProperty "breakEnd" $
\f x -> B.breakEnd f x === swap ((B.reverse *** B.reverse) (B.break f (B.reverse x)))
, testProperty "breakEnd" $
\f x -> B.breakEnd f x === B.spanEnd (not . f) x

#ifndef BYTESTRING_LAZY
, testProperty "break breakSubstring" $
\(toElem -> c) x -> B.break (== c) x === B.breakSubstring (B.singleton c) x
, testProperty "breakSubstring" $
Expand Down Expand Up @@ -248,7 +249,6 @@ tests =
\x -> B.unpack (B.takeWhile isSpace x) === takeWhile isSpace (B.unpack x)
#endif

#ifndef BYTESTRING_LAZY
, testProperty "dropEnd" $
\n x -> B.dropEnd n x === B.take (B.length x - n) x
, testProperty "dropWhileEnd" $
Expand All @@ -257,7 +257,6 @@ tests =
\n x -> B.takeEnd n x === B.drop (B.length x - n) x
, testProperty "takeWhileEnd" $
\f x -> B.takeWhileEnd f x === B.reverse (B.takeWhile f (B.reverse x))
#endif

#ifdef BYTESTRING_LAZY
, testProperty "invariant" $
Expand Down Expand Up @@ -350,10 +349,8 @@ tests =
\(toElem -> c) x -> (B.unpack *** B.unpack) (B.span (== c) x) === span (== c) (B.unpack x)
, testProperty "span /=" $
\(toElem -> c) x -> (B.unpack *** B.unpack) (B.span (/= c) x) === span (/= c) (B.unpack x)
#ifndef BYTESTRING_LAZY
, testProperty "spanEnd" $
\f x -> B.spanEnd f x === swap ((B.reverse *** B.reverse) (B.span f (B.reverse x)))
#endif
, testProperty "split" $
\(toElem -> c) x -> map B.unpack (B.split c x) === split c (B.unpack x)
, testProperty "split empty" $
Expand Down