Skip to content

Commit

Permalink
Speed up Data.Text.intersperse
Browse files Browse the repository at this point in the history
  • Loading branch information
Bodigrim committed Aug 30, 2021
1 parent e0f5cd7 commit 28a8cd0
Showing 1 changed file with 59 additions and 3 deletions.
62 changes: 59 additions & 3 deletions src/Data/Text.hs
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ import Data.Binary (Binary(get, put))
import Data.Monoid (Monoid(..))
import Data.Semigroup (Semigroup(..))
import Data.String (IsString(..))
import Data.Text.Internal.Encoding.Utf8 (utf8Length, utf8LengthByLeader, chr2, chr3, chr4)
import Data.Text.Internal.Encoding.Utf8 (utf8Length, utf8LengthByLeader, chr2, chr3, chr4, ord2, ord3, ord4)
import qualified Data.Text.Internal.Fusion as S
import qualified Data.Text.Internal.Fusion.Common as S
import Data.Text.Encoding (decodeUtf8', encodeUtf8)
Expand Down Expand Up @@ -669,8 +669,61 @@ intercalate t = concat . L.intersperse t
-- "S.H.I.E.L.D"
--
-- Performs replacement on invalid scalar values.
intersperse :: Char -> Text -> Text
intersperse c t = unstream (S.intersperse (safe c) (stream t))
intersperse :: Char -> Text -> Text
intersperse c t@(Text src o l) = if l == 0 then mempty else runST $ do
let !cLen = utf8Length c
dstLen = l + length t P.* cLen

dst <- A.new dstLen

let writeSep = case cLen of
1 -> \dstOff ->
A.unsafeWrite dst dstOff (ord8 c)
2 -> let (c0, c1) = ord2 c in \dstOff -> do
A.unsafeWrite dst dstOff c0
A.unsafeWrite dst (dstOff + 1) c1
3 -> let (c0, c1, c2) = ord3 c in \dstOff -> do
A.unsafeWrite dst dstOff c0
A.unsafeWrite dst (dstOff + 1) c1
A.unsafeWrite dst (dstOff + 2) c2
_ -> let (c0, c1, c2, c3) = ord4 c in \dstOff -> do
A.unsafeWrite dst dstOff c0
A.unsafeWrite dst (dstOff + 1) c1
A.unsafeWrite dst (dstOff + 2) c2
A.unsafeWrite dst (dstOff + 3) c3
let go !srcOff !dstOff = if srcOff >= o + l then return () else do
let m0 = A.unsafeIndex src srcOff
m1 = A.unsafeIndex src (srcOff + 1)
m2 = A.unsafeIndex src (srcOff + 2)
m3 = A.unsafeIndex src (srcOff + 3)
!d = utf8LengthByLeader m0
case d of
1 -> do
A.unsafeWrite dst dstOff m0
writeSep (dstOff + 1)
go (srcOff + 1) (dstOff + 1 + cLen)
2 -> do
A.unsafeWrite dst dstOff m0
A.unsafeWrite dst (dstOff + 1) m1
writeSep (dstOff + 2)
go (srcOff + 2) (dstOff + 2 + cLen)
3 -> do
A.unsafeWrite dst dstOff m0
A.unsafeWrite dst (dstOff + 1) m1
A.unsafeWrite dst (dstOff + 2) m2
writeSep (dstOff + 3)
go (srcOff + 3) (dstOff + 3 + cLen)
_ -> do
A.unsafeWrite dst dstOff m0
A.unsafeWrite dst (dstOff + 1) m1
A.unsafeWrite dst (dstOff + 2) m2
A.unsafeWrite dst (dstOff + 3) m3
writeSep (dstOff + 4)
go (srcOff + 4) (dstOff + 4 + cLen)

go o 0
arr <- A.unsafeFreeze dst
return (Text arr 0 (dstLen - cLen))
{-# INLINE [1] intersperse #-}

-- | /O(n)/ Reverse the characters of a string.
Expand Down Expand Up @@ -1956,6 +2009,9 @@ copy (Text arr off len) = Text (A.run go) 0 len
A.copyI len marr 0 arr off
return marr

ord8 :: Char -> Word8
ord8 = P.fromIntegral . ord

intToCSize :: Int -> CSize
intToCSize = P.fromIntegral

Expand Down

0 comments on commit 28a8cd0

Please sign in to comment.