Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add lazy dropEnd and friends #395

Merged
merged 17 commits into from
Jul 28, 2021
127 changes: 127 additions & 0 deletions Data/ByteString/Lazy.hs
Original file line number Diff line number Diff line change
Expand Up @@ -127,12 +127,18 @@ module Data.ByteString.Lazy (

-- ** Breaking strings
take,
takeEnd,
drop,
dropEnd,
splitAt,
takeWhile,
takeWhileEnd,
dropWhile,
dropWhileEnd,
span,
spanEnd,
break,
breakEnd,
group,
groupBy,
inits,
Expand Down Expand Up @@ -684,6 +690,30 @@ take i cs0 = take' i cs0
then Chunk (S.take (fromIntegral n) c) Empty
else Chunk c (take' (n - fromIntegral (S.length c)) cs)

-- | /O(c)/ @'takeEnd' n xs@ is equivalent to @'drop' ('length' xs - n) xs@.
-- Takes @n@ elements from end of bytestring.
--
-- >>> takeEnd 3 "abcdefg"
-- "efg"
-- >>> takeEnd 0 "abcdefg"
-- ""
-- >>> takeEnd 4 "abc"
-- "abc"
--
-- @since 0.11.1.1
3kyro marked this conversation as resolved.
Show resolved Hide resolved
takeEnd :: Int64 -> ByteString -> ByteString
takeEnd i _ | i <= 0 = Empty
takeEnd i cs0 = takeEnd' i cs0
where takeEnd' 0 _ = Empty
takeEnd' _ Empty = Empty
takeEnd' n (Chunk c Empty) =
fromStrict (S.takeEnd (fromIntegral n) c)
takeEnd' n (Chunk c bs) =
let n' = n - length bs
3kyro marked this conversation as resolved.
Show resolved Hide resolved
in if n' > 0
then Chunk (S.takeEnd (fromIntegral n') c) bs
else takeEnd' n bs

-- | /O(n\/c)/ 'drop' @n xs@ returns the suffix of @xs@ after the first @n@
-- elements, or @[]@ if @n > 'length' xs@.
drop :: Int64 -> ByteString -> ByteString
Expand All @@ -696,6 +726,30 @@ drop i cs0 = drop' i cs0
then Chunk (S.drop (fromIntegral n) c) cs
else drop' (n - fromIntegral (S.length c)) cs

-- | /O(c)/ @'dropEnd' n xs@ is equivalent to @'take' ('length' xs - n) xs@.
-- Drops @n@ elements from end of bytestring.
--
-- >>> dropEnd 3 "abcdefg"
-- "abcd"
-- >>> dropEnd 0 "abcdefg"
-- "abcdefg"
-- >>> dropEnd 4 "abc"
-- ""
--
-- @since 0.11.1.1
dropEnd :: Int64 -> ByteString -> ByteString
dropEnd i p | i <= 0 = p
dropEnd i cs0 = dropEnd' i cs0
3kyro marked this conversation as resolved.
Show resolved Hide resolved
where dropEnd' 0 cs = cs
dropEnd' _ Empty = Empty
dropEnd' n (Chunk c Empty) =
fromStrict (S.dropEnd (fromIntegral n) c)
dropEnd' n (Chunk c bs) =
let n' = n - length bs
in if n' > 0
then dropEnd' n' (Chunk c Empty)
else Chunk c (dropEnd' n bs)

-- | /O(n\/c)/ 'splitAt' @n xs@ is equivalent to @('take' n xs, 'drop' n xs)@.
splitAt :: Int64 -> ByteString -> (ByteString, ByteString)
splitAt i cs0 | i <= 0 = (Empty, cs0)
Expand All @@ -722,6 +776,23 @@ takeWhile f = takeWhile'
n | n < S.length c -> Chunk (S.take n c) Empty
| otherwise -> Chunk c (takeWhile' cs)

-- | Returns the longest (possibly empty) suffix of elements
-- satisfying the predicate.
--
-- @'takeWhileEnd' p@ is equivalent to @'reverse' . 'takeWhile' p . 'reverse'@.
Comment on lines +806 to +809
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

An example would be nice to have here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've used Chunk (pack [1,2]) (Chunk (pack [3,4,6])) Empty as an example of a lazy bytestring. Hope it's not too verbose, but textual representation of a bytestring is always tricky.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think that would be helpful for users who will mostly not be aware of ByteString's internal constructors. If you want to represent the bytes as numbers, you can use the OverloadedLists syntax, e.g. [1,2,3,4,6].

--
-- @since 0.11.1.1
takeWhileEnd :: (Word8 -> Bool) -> ByteString -> ByteString
takeWhileEnd f = takeWhileEnd'
where takeWhileEnd' Empty = Empty
takeWhileEnd' (Chunk c Empty) =
fromStrict (S.takeWhileEnd f c)
takeWhileEnd' (Chunk c bs) =
let bs' = takeWhileEnd' bs
in if length bs' == length bs
then Chunk (S.takeWhileEnd f c) bs
else bs'

-- | Similar to 'P.dropWhile',
-- drops the longest (possibly empty) prefix of elements
-- satisfying the predicate and returns the remainder.
Expand All @@ -733,6 +804,23 @@ dropWhile f = dropWhile'
n | n < S.length c -> Chunk (S.drop n c) cs
| otherwise -> dropWhile' cs

-- | Similar to 'P.dropWhileEnd',
-- drops the longest (possibly empty) suffix of elements
-- satisfying the predicate and returns the remainder.
--
-- @'dropWhileEnd' p@ is equivalent to @'reverse' . 'dropWhile' p . 'reverse'@.
3kyro marked this conversation as resolved.
Show resolved Hide resolved
--
-- @since 0.11.1.1
dropWhileEnd :: (Word8 -> Bool) -> ByteString -> ByteString
dropWhileEnd f = dropWhileEnd'
where dropWhileEnd' Empty = Empty
dropWhileEnd' (Chunk c Empty) =
fromStrict (S.dropWhileEnd f c)
dropWhileEnd' (Chunk c bs) =
case dropWhileEnd' bs of
Empty -> dropWhileEnd' (Chunk c Empty)
3kyro marked this conversation as resolved.
Show resolved Hide resolved
bs' -> Chunk c bs'

-- | Similar to 'P.break',
-- returns the longest (possibly empty) prefix of elements which __do not__
-- satisfy the predicate and the remainder of the string.
Expand All @@ -750,6 +838,26 @@ break f = break'
| otherwise -> let (cs', cs'') = break' cs
in (Chunk c cs', cs'')


-- | Returns the longest (possibly empty) suffix of elements which __do not__
-- satisfy the predicate and the remainder of the string.
--
-- 'breakEnd' @p@ is equivalent to @'spanEnd' (not . p)@ and to @('takeWhileEnd' (not . p) &&& 'dropWhileEnd' (not . p))@.
--
-- @since 0.11.1.1
breakEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
breakEnd f = breakEnd'
where breakEnd' Empty = (Empty, Empty)
breakEnd' (Chunk c Empty) =
let (c', cs') = S.breakEnd f c
in (fromStrict c', fromStrict cs')
breakEnd' (Chunk c cs) =
case breakEnd' cs of
(Empty, cs') ->
let (c', c'') = S.breakEnd f c
in (fromStrict c', fromStrict c'' <> cs')
(cs', cs'') -> (Chunk c cs', cs'')

--
-- TODO
--
Expand Down Expand Up @@ -799,6 +907,25 @@ spanByte c (LPS ps) = case (spanByte' ps) of (a,b) -> (LPS a, LPS b)
span :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
span p = break (not . p)

-- | Returns the longest (possibly empty) suffix of elements
-- satisfying the predicate and the remainder of the string.
--
-- 'spanEnd' @p@ is equivalent to @'breakEnd' (not . p)@ and to @('takeWhileEnd' p &&& 'dropWhileEnd' p)@.
--
-- We have
--
-- > spanEnd (not . isSpace) "x y z" == ("x y ", "z")
--
-- and
--
-- > spanEnd (not . isSpace) ps
-- > ==
-- > let (x, y) = span (not . isSpace) (reverse ps) in (reverse y, reverse x)
--
-- @since 0.11.1.1
spanEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
spanEnd p = breakEnd (not . p)

-- | /O(n)/ Splits a 'ByteString' into components delimited by
-- separators, where the predicate returns True for a separator element.
-- The resulting components do not contain the separators. Two adjacent
Expand Down
54 changes: 53 additions & 1 deletion Data/ByteString/Lazy/Char8.hs
Original file line number Diff line number Diff line change
Expand Up @@ -105,12 +105,18 @@ module Data.ByteString.Lazy.Char8 (

-- ** Breaking strings
take,
takeEnd,
drop,
dropEnd,
splitAt,
takeWhile,
takeWhileEnd,
dropWhile,
dropWhileEnd,
span,
spanEnd,
break,
breakEnd,
group,
groupBy,
inits,
Expand Down Expand Up @@ -203,7 +209,7 @@ module Data.ByteString.Lazy.Char8 (
import Data.ByteString.Lazy
(fromChunks, toChunks
,empty,null,length,tail,init,append,reverse,transpose,cycle
,concat,take,drop,splitAt,intercalate
,concat,take,takeEnd,drop,dropEnd,splitAt,intercalate
,isPrefixOf,isSuffixOf,group,inits,tails,copy
,stripPrefix,stripSuffix
,hGetContents, hGet, hPut, getContents
Expand Down Expand Up @@ -456,22 +462,68 @@ takeWhile :: (Char -> Bool) -> ByteString -> ByteString
takeWhile f = L.takeWhile (f . w2c)
{-# INLINE takeWhile #-}

-- | Returns the longest (possibly empty) suffix of elements
-- satisfying the predicate.
--
-- @'takeWhileEnd' p@ is equivalent to @'reverse' . 'takeWhile' p . 'reverse'@.
--
-- @since 0.11.1.1
takeWhileEnd :: (Char -> Bool) -> ByteString -> ByteString
takeWhileEnd f = L.takeWhileEnd (f . w2c)
{-# INLINE takeWhileEnd #-}

-- | 'dropWhile' @p xs@ returns the suffix remaining after 'takeWhile' @p xs@.
dropWhile :: (Char -> Bool) -> ByteString -> ByteString
dropWhile f = L.dropWhile (f . w2c)
{-# INLINE dropWhile #-}

-- | Similar to 'P.dropWhileEnd',
-- drops the longest (possibly empty) suffix of elements
-- satisfying the predicate and returns the remainder.
--
-- @'dropWhileEnd' p@ is equivalent to @'reverse' . 'dropWhile' p . 'reverse'@.
--
-- @since 0.11.1.1
dropWhileEnd :: (Char -> Bool) -> ByteString -> ByteString
dropWhileEnd f = L.dropWhileEnd (f . w2c)
{-# INLINE dropWhileEnd #-}

-- | 'break' @p@ is equivalent to @'span' ('not' . p)@.
break :: (Char -> Bool) -> ByteString -> (ByteString, ByteString)
break f = L.break (f . w2c)
{-# INLINE break #-}

-- | 'breakEnd' behaves like 'break' but from the end of the 'ByteString'
--
-- breakEnd p == spanEnd (not.p)
--
-- @since 0.11.1.1
breakEnd :: (Char -> Bool) -> ByteString -> (ByteString, ByteString)
breakEnd f = L.breakEnd (f . w2c)
{-# INLINE breakEnd #-}

-- | 'span' @p xs@ breaks the ByteString into two segments. It is
-- equivalent to @('takeWhile' p xs, 'dropWhile' p xs)@
span :: (Char -> Bool) -> ByteString -> (ByteString, ByteString)
span f = L.span (f . w2c)
{-# INLINE span #-}

-- | 'spanEnd' behaves like 'span' but from the end of the 'ByteString'.
-- We have
--
-- > spanEnd (not.isSpace) "x y z" == ("x y ","z")
--
-- and
--
-- > spanEnd (not . isSpace) ps
-- > ==
-- > let (x,y) = span (not.isSpace) (reverse ps) in (reverse y, reverse x)
--
-- @since 0.11.1.1
spanEnd :: (Char -> Bool) -> ByteString -> (ByteString, ByteString)
spanEnd f = L.spanEnd (f . w2c)
{-# INLINE spanEnd #-}

{-
-- | 'breakChar' breaks its ByteString argument at the first occurence
-- of the specified Char. It is more efficient than 'break' as it is
Expand Down
7 changes: 2 additions & 5 deletions tests/Properties/ByteString.hs
Original file line number Diff line number Diff line change
Expand Up @@ -177,11 +177,12 @@ tests =
\(toElem -> c) x -> (B.unpack *** B.unpack) (B.break (/= c) x) === break (/= c) (B.unpack x)
, testProperty "break span" $
\f x -> B.break f x === B.span (not . f) x
#ifndef BYTESTRING_LAZY
, testProperty "breakEnd" $
\f x -> B.breakEnd f x === swap ((B.reverse *** B.reverse) (B.break f (B.reverse x)))
, testProperty "breakEnd" $
\f x -> B.breakEnd f x === B.spanEnd (not . f) x

#ifndef BYTESTRING_LAZY
, testProperty "break breakSubstring" $
\(toElem -> c) x -> B.break (== c) x === B.breakSubstring (B.singleton c) x
, testProperty "breakSubstring" $
Expand Down Expand Up @@ -248,7 +249,6 @@ tests =
\x -> B.unpack (B.takeWhile isSpace x) === takeWhile isSpace (B.unpack x)
#endif

#ifndef BYTESTRING_LAZY
, testProperty "dropEnd" $
\n x -> B.dropEnd n x === B.take (B.length x - n) x
, testProperty "dropWhileEnd" $
Expand All @@ -257,7 +257,6 @@ tests =
\n x -> B.takeEnd n x === B.drop (B.length x - n) x
, testProperty "takeWhileEnd" $
\f x -> B.takeWhileEnd f x === B.reverse (B.takeWhile f (B.reverse x))
#endif

#ifdef BYTESTRING_LAZY
, testProperty "invariant" $
Expand Down Expand Up @@ -350,10 +349,8 @@ tests =
\(toElem -> c) x -> (B.unpack *** B.unpack) (B.span (== c) x) === span (== c) (B.unpack x)
, testProperty "span /=" $
\(toElem -> c) x -> (B.unpack *** B.unpack) (B.span (/= c) x) === span (/= c) (B.unpack x)
#ifndef BYTESTRING_LAZY
, testProperty "spanEnd" $
\f x -> B.spanEnd f x === swap ((B.reverse *** B.reverse) (B.span f (B.reverse x)))
#endif
, testProperty "split" $
\(toElem -> c) x -> map B.unpack (B.split c x) === split c (B.unpack x)
, testProperty "split empty" $
Expand Down