diff --git a/src/Data/Text.hs b/src/Data/Text.hs index b9aa0b27..532c4156 100644 --- a/src/Data/Text.hs +++ b/src/Data/Text.hs @@ -225,7 +225,7 @@ import Data.Binary (Binary(get, put)) import Data.Monoid (Monoid(..)) import Data.Semigroup (Semigroup(..)) import Data.String (IsString(..)) -import Data.Text.Internal.Encoding.Utf8 (utf8Length, utf8LengthByLeader, chr2, chr3, chr4) +import Data.Text.Internal.Encoding.Utf8 (utf8Length, utf8LengthByLeader, chr2, chr3, chr4, ord2, ord3, ord4) import qualified Data.Text.Internal.Fusion as S import qualified Data.Text.Internal.Fusion.Common as S import Data.Text.Encoding (decodeUtf8', encodeUtf8) @@ -669,8 +669,61 @@ intercalate t = concat . L.intersperse t -- "S.H.I.E.L.D" -- -- Performs replacement on invalid scalar values. -intersperse :: Char -> Text -> Text -intersperse c t = unstream (S.intersperse (safe c) (stream t)) +intersperse :: Char -> Text -> Text +intersperse c t@(Text src o l) = if l == 0 then mempty else runST $ do + let !cLen = utf8Length c + dstLen = l + length t P.* cLen + + dst <- A.new dstLen + + let writeSep = case cLen of + 1 -> \dstOff -> + A.unsafeWrite dst dstOff (ord8 c) + 2 -> let (c0, c1) = ord2 c in \dstOff -> do + A.unsafeWrite dst dstOff c0 + A.unsafeWrite dst (dstOff + 1) c1 + 3 -> let (c0, c1, c2) = ord3 c in \dstOff -> do + A.unsafeWrite dst dstOff c0 + A.unsafeWrite dst (dstOff + 1) c1 + A.unsafeWrite dst (dstOff + 2) c2 + _ -> let (c0, c1, c2, c3) = ord4 c in \dstOff -> do + A.unsafeWrite dst dstOff c0 + A.unsafeWrite dst (dstOff + 1) c1 + A.unsafeWrite dst (dstOff + 2) c2 + A.unsafeWrite dst (dstOff + 3) c3 + let go !srcOff !dstOff = if srcOff >= o + l then return () else do + let m0 = A.unsafeIndex src srcOff + m1 = A.unsafeIndex src (srcOff + 1) + m2 = A.unsafeIndex src (srcOff + 2) + m3 = A.unsafeIndex src (srcOff + 3) + !d = utf8LengthByLeader m0 + case d of + 1 -> do + A.unsafeWrite dst dstOff m0 + writeSep (dstOff + 1) + go (srcOff + 1) (dstOff + 1 + cLen) + 2 -> do + A.unsafeWrite dst dstOff m0 + A.unsafeWrite dst (dstOff + 1) m1 + writeSep (dstOff + 2) + go (srcOff + 2) (dstOff + 2 + cLen) + 3 -> do + A.unsafeWrite dst dstOff m0 + A.unsafeWrite dst (dstOff + 1) m1 + A.unsafeWrite dst (dstOff + 2) m2 + writeSep (dstOff + 3) + go (srcOff + 3) (dstOff + 3 + cLen) + _ -> do + A.unsafeWrite dst dstOff m0 + A.unsafeWrite dst (dstOff + 1) m1 + A.unsafeWrite dst (dstOff + 2) m2 + A.unsafeWrite dst (dstOff + 3) m3 + writeSep (dstOff + 4) + go (srcOff + 4) (dstOff + 4 + cLen) + + go o 0 + arr <- A.unsafeFreeze dst + return (Text arr 0 (dstLen - cLen)) {-# INLINE [1] intersperse #-} -- | /O(n)/ Reverse the characters of a string. @@ -1956,6 +2009,9 @@ copy (Text arr off len) = Text (A.run go) 0 len A.copyI len marr 0 arr off return marr +ord8 :: Char -> Word8 +ord8 = P.fromIntegral . ord + intToCSize :: Int -> CSize intToCSize = P.fromIntegral