Do not benchmark ByteString

haskell · Jun 10, 2021 · e4f37ec · e4f37ec
1 parent 93561e2
commit e4f37ec
Show file tree

Hide file tree

Showing 8 changed files with 16 additions and 154 deletions.
diff --git a/benchmarks/cbits-bench/time_iconv.c b/benchmarks/cbits-bench/time_iconv.c
diff --git a/benchmarks/haskell/Benchmarks/Builder.hs b/benchmarks/haskell/Benchmarks/Builder.hs
@@ -10,11 +10,7 @@ module Benchmarks.Builder
     ) where
 
 import Test.Tasty.Bench (Benchmark, bgroup, bench, nf)
-import Data.Binary.Builder as B
 import Data.ByteString.Char8 ()
-import qualified Data.ByteString.Builder as Blaze
-import qualified Data.ByteString as SB
-import qualified Data.ByteString.Lazy as LB
 import qualified Data.Text as T
 import qualified Data.Text.Lazy as LT
 import qualified Data.Text.Lazy.Builder as LTB
@@ -26,12 +22,6 @@ benchmark = bgroup "Builder"
     [ bgroup "Comparison"
       [ bench "LazyText" $ nf
           (LT.length . LTB.toLazyText . mconcat . map LTB.fromText) texts
-      , bench "Binary" $ nf
-          (LB.length . B.toLazyByteString . mconcat . map B.fromByteString)
-          byteStrings
-      , bench "Blaze" $ nf
-          (LB.length . Blaze.toLazyByteString . mconcat . map Blaze.stringUtf8)
-          strings
       ]
     , bgroup "Int"
       [ bgroup "Decimal"
@@ -61,13 +51,3 @@ benchmark = bgroup "Builder"
 texts :: [T.Text]
 texts = take 200000 $ cycle ["foo", "λx", "由の"]
 {-# NOINLINE texts #-}
-
--- Note that the non-ascii characters will be chopped
-byteStrings :: [SB.ByteString]
-byteStrings = take 200000 $ cycle ["foo", "λx", "由の"]
-{-# NOINLINE byteStrings #-}
-
--- Note that the non-ascii characters will be chopped
-strings :: [String]
-strings = take 200000 $ cycle ["foo", "λx", "由の"]
-{-# NOINLINE strings #-}
diff --git a/benchmarks/haskell/Benchmarks/DecodeUtf8.hs b/benchmarks/haskell/Benchmarks/DecodeUtf8.hs
@@ -21,14 +21,9 @@ module Benchmarks.DecodeUtf8
     , benchmarkASCII
     ) where
 
-import Foreign.C.Types
-import Data.ByteString.Internal (ByteString(..))
 import Data.ByteString.Lazy.Internal (ByteString(..))
-import Foreign.Ptr (Ptr, plusPtr)
-import Foreign.ForeignPtr (withForeignPtr)
-import Data.Word (Word8)
 import qualified Test.Tasty.Bench as C
-import Test.Tasty.Bench (Benchmark, bgroup, nf, whnfIO)
+import Test.Tasty.Bench (Benchmark, bgroup, nf)
 import qualified Data.ByteString as B
 import qualified Data.ByteString.Lazy as BL
 import qualified Data.Text as T
@@ -56,7 +51,6 @@ benchmark kind ~(bs, lbs) =
     in bgroup "DecodeUtf8"
         [ bench "Strict" $ nf T.decodeUtf8 bs
         , bench "Stream" $ nf decodeStream lbs
-        , bench "IConv" $ whnfIO $ iconv bs
         , bench "StrictLength" $ nf (T.length . T.decodeUtf8) bs
         , bench "StrictInitLength" $ nf (T.length . T.init . T.decodeUtf8) bs
         , bench "Lazy" $ nf TL.decodeUtf8 lbs
@@ -74,9 +68,3 @@ benchmarkASCII ~(bs, lbs) =
         , C.bench "lazy decodeLatin1" $ nf TL.decodeLatin1 lbs
         , C.bench "lazy decodeASCII" $ nf TL.decodeASCII lbs
         ]
-
-iconv :: B.ByteString -> IO CInt
-iconv (PS fp off len) = withForeignPtr fp $ \ptr ->
-                        time_iconv (ptr `plusPtr` off) (fromIntegral len)
-
-foreign import ccall unsafe time_iconv :: Ptr Word8 -> CSize -> IO CInt
diff --git a/benchmarks/haskell/Benchmarks/ReadNumbers.hs b/benchmarks/haskell/Benchmarks/ReadNumbers.hs
@@ -23,38 +23,25 @@ module Benchmarks.ReadNumbers
 
 import Test.Tasty.Bench (Benchmark, bgroup, bench, whnf)
 import Data.List (foldl')
-import Numeric (readDec, readFloat, readHex)
-import qualified Data.ByteString.Char8 as B
-import qualified Data.ByteString.Lazy.Char8 as BL
-import qualified Data.ByteString.Lex.Fractional as B
 import qualified Data.Text as T
 import qualified Data.Text.IO as T
 import qualified Data.Text.Lazy as TL
 import qualified Data.Text.Lazy.IO as TL
 import qualified Data.Text.Lazy.Read as TL
 import qualified Data.Text.Read as T
 
-type Env = ([String], [T.Text], [TL.Text], [B.ByteString], [BL.ByteString])
+type Env = ([T.Text], [TL.Text])
 
 initEnv :: FilePath -> IO Env
 initEnv fp = do
-    -- Read all files into lines: string, text, lazy text, bytestring, lazy
-    -- bytestring
-    s <- lines `fmap` readFile fp
     t <- T.lines `fmap` T.readFile fp
     tl <- TL.lines `fmap` TL.readFile fp
-    b <- B.lines `fmap` B.readFile fp
-    bl <- BL.lines `fmap` BL.readFile fp
-    return (s, t, tl, b, bl)
+    return (t, tl)
 
 benchmark :: Env -> Benchmark
-benchmark ~(s, t, tl, b, bl) =
+benchmark ~(t, tl) =
     bgroup "ReadNumbers"
-        [ bench "DecimalString"     $ whnf (int . string readDec) s
-        , bench "HexadecimalString" $ whnf (int . string readHex) s
-        , bench "DoubleString"      $ whnf (double . string readFloat) s
-
-        , bench "DecimalText"     $ whnf (int . text (T.signed T.decimal)) t
+        [ bench "DecimalText"     $ whnf (int . text (T.signed T.decimal)) t
         , bench "HexadecimalText" $ whnf (int . text (T.signed T.hexadecimal)) t
         , bench "DoubleText"      $ whnf (double . text T.double) t
         , bench "RationalText"    $ whnf (double . text T.rational) t
@@ -67,12 +54,6 @@ benchmark ~(s, t, tl, b, bl) =
             whnf (double . text TL.double) tl
         , bench "RationalLazyText" $
             whnf (double . text TL.rational) tl
-
-        , bench "DecimalByteString" $ whnf (int . byteString B.readInt) b
-        , bench "DoubleByteString"  $ whnf (double . byteString B.readDecimal) b
-
-        , bench "DecimalLazyByteString" $
-            whnf (int . byteString BL.readInt) bl
         ]
   where
     -- Used for fixing types
@@ -81,20 +62,8 @@ benchmark ~(s, t, tl, b, bl) =
     double :: Double -> Double
     double = id
 
-string :: (Ord a, Num a) => (t -> [(a, t)]) -> [t] -> a
-string reader = foldl' go 1000000
-  where
-    go z t = case reader t of [(n, _)] -> min n z
-                              _        -> z
-
 text :: (Ord a, Num a) => (t -> Either String (a,t)) -> [t] -> a
 text reader = foldl' go 1000000
   where
     go z t = case reader t of Left _       -> z
                               Right (n, _) -> min n z
-
-byteString :: (Ord a, Num a) => (t -> Maybe (a,t)) -> [t] -> a
-byteString reader = foldl' go 1000000
-  where
-    go z t = case reader t of Nothing     -> z
-                              Just (n, _) -> min n z
diff --git a/benchmarks/haskell/Benchmarks/Replace.hs b/benchmarks/haskell/Benchmarks/Replace.hs
@@ -11,40 +11,26 @@ module Benchmarks.Replace
     ) where
 
 import Test.Tasty.Bench (Benchmark, bgroup, bench, nf)
-import qualified Data.ByteString.Char8 as B
-import qualified Data.ByteString.Lazy as BL
-import qualified Data.ByteString.Lazy.Search as BL
-import qualified Data.ByteString.Search as B
 import qualified Data.Text as T
-import qualified Data.Text.Encoding as T
 import qualified Data.Text.Lazy as TL
-import qualified Data.Text.Lazy.Encoding as TL
 import qualified Data.Text.Lazy.IO as TL
 
-type Env = (T.Text, B.ByteString, TL.Text, BL.ByteString)
+type Env = (T.Text, TL.Text)
 
 initEnv :: FilePath -> IO Env
 initEnv fp = do
     tl <- TL.readFile fp
-    bl <- BL.readFile fp
     let !t = TL.toStrict tl
-        !b = T.encodeUtf8 t
-    return (t, b, tl, bl)
+    return (t, tl)
 
 benchmark :: String -> String -> Env -> Benchmark
-benchmark pat sub ~(t, b, tl, bl) =
+benchmark pat sub ~(t, tl) =
     bgroup "Replace" [
           bench "Text"           $ nf (T.length . T.replace tpat tsub) t
-        , bench "ByteString"     $ nf (BL.length . B.replace bpat bsub) b
         , bench "LazyText"       $ nf (TL.length . TL.replace tlpat tlsub) tl
-        , bench "LazyByteString" $ nf (BL.length . BL.replace blpat blsub) bl
         ]
   where
     tpat  = T.pack pat
     tsub  = T.pack sub
     tlpat = TL.pack pat
     tlsub = TL.pack sub
-    bpat = T.encodeUtf8 tpat
-    bsub = T.encodeUtf8 tsub
-    blpat = B.concat $ BL.toChunks $ TL.encodeUtf8 tlpat
-    blsub = B.concat $ BL.toChunks $ TL.encodeUtf8 tlsub
diff --git a/benchmarks/haskell/Benchmarks/Search.hs b/benchmarks/haskell/Benchmarks/Search.hs
@@ -10,44 +10,28 @@ module Benchmarks.Search
     ) where
 
 import Test.Tasty.Bench (Benchmark, bench, bgroup, whnf)
-import qualified Data.ByteString as B
-import qualified Data.ByteString.Lazy as BL
-import qualified Data.ByteString.Lazy.Search as BL
-import qualified Data.ByteString.Search as B
 import qualified Data.Text as T
-import qualified Data.Text.Encoding as T
 import qualified Data.Text.IO as T
 import qualified Data.Text.Lazy as TL
 import qualified Data.Text.Lazy.IO as TL
 
-type Env = (B.ByteString, BL.ByteString, T.Text, TL.Text)
+type Env = (T.Text, TL.Text)
 
 initEnv :: FilePath -> IO Env
 initEnv fp = do
-    b  <- B.readFile fp
-    bl <- BL.readFile fp
     t  <- T.readFile fp
     tl <- TL.readFile fp
-    return (b, bl, t, tl)
+    return (t, tl)
 
 benchmark :: T.Text -> Env -> Benchmark
-benchmark needleT ~(b, bl, t, tl) =
+benchmark needleT ~(t, tl) =
     bgroup "FileIndices"
-        [ bench "ByteString"     $ whnf (byteString needleB)     b
-        , bench "LazyByteString" $ whnf (lazyByteString needleB) bl
-        , bench "Text"           $ whnf (text needleT)           t
+        [ bench "Text"           $ whnf (text needleT)           t
         , bench "LazyText"       $ whnf (lazyText needleTL)      tl
         ]
   where
-    needleB = T.encodeUtf8 needleT
     needleTL = TL.fromChunks [needleT]
 
-byteString :: B.ByteString -> B.ByteString -> Int
-byteString needle = length . B.indices needle
-
-lazyByteString :: B.ByteString -> BL.ByteString -> Int
-lazyByteString needle = length . BL.indices needle
-
 text :: T.Text -> T.Text -> Int
 text = T.count
 

diff --git a/benchmarks/haskell/Benchmarks/WordFrequencies.hs b/benchmarks/haskell/Benchmarks/WordFrequencies.hs
@@ -14,29 +14,23 @@ module Benchmarks.WordFrequencies
     ) where
 
 import Test.Tasty.Bench (Benchmark, bench, bgroup, whnf)
-import Data.Char (toLower)
 import Data.List (foldl')
 import Data.Map (Map)
-import qualified Data.ByteString.Char8 as B
 import qualified Data.Map as M
 import qualified Data.Text as T
 import qualified Data.Text.IO as T
 
-type Env = (String, B.ByteString, T.Text)
+type Env = T.Text
 
 initEnv :: FilePath -> IO Env
 initEnv fp = do
-    s <- readFile fp
-    b <- B.readFile fp
     t <- T.readFile fp
-    return (s, b, t)
+    return t
 
 benchmark :: Env -> Benchmark
-benchmark ~(s, b, t) =
+benchmark ~t =
     bgroup "WordFrequencies"
-        [ bench "String"     $ whnf (frequencies . words . map toLower)     s
-        , bench "ByteString" $ whnf (frequencies . B.words . B.map toLower) b
-        , bench "Text"       $ whnf (frequencies . T.words . T.toLower)     t
+        [ bench "Text"       $ whnf (frequencies . T.words . T.toLower)     t
         ]
 
 frequencies :: Ord a => [a] -> Map a Int

diff --git a/text.cabal b/text.cabal
@@ -233,19 +233,15 @@ benchmark text-benchmarks
     ghc-options:  "-with-rtsopts=-A32m"
 
   build-depends:  base,
-                  binary,
                   bytestring >= 0.10.4,
-                  bytestring-lexing >= 0.5.0,
                   containers,
                   deepseq,
                   filepath,
-                  stringsearch,
                   tasty-bench >= 0.2,
                   text,
                   transformers,
                   vector
 
-  c-sources:      benchmarks/cbits-bench/time_iconv.c
   hs-source-dirs: benchmarks/haskell
   main-is:        Benchmarks.hs
   other-modules: