Skip to content

Commit

Permalink
Use qsort to sort short ByteString (#267)
Browse files Browse the repository at this point in the history
* Add benchmarks for sorting

* Write c_sort, which is a binding to qsort

* Use qsort to sort tiny bytestrings

* Update cbits/fpstring.c

Co-authored-by: Viktor Dukhovni <[email protected]>

* Change unsigned long to size_t

* Change unsigned long to size_t in return type

* Bonus points

* Remove clong

Co-authored-by: Viktor Dukhovni <[email protected]>
  • Loading branch information
Bodigrim and vdukhovni authored Aug 25, 2020
1 parent 371f224 commit fc9409e
Show file tree
Hide file tree
Showing 6 changed files with 54 additions and 37 deletions.
7 changes: 6 additions & 1 deletion Data/ByteString.hs
Original file line number Diff line number Diff line change
Expand Up @@ -1515,7 +1515,12 @@ tails p | null p = [empty]

-- | /O(n)/ Sort a ByteString efficiently, using counting sort.
sort :: ByteString -> ByteString
sort (BS input l) = unsafeCreate l $ \p -> allocaArray 256 $ \arr -> do
sort (BS input l)
-- qsort outperforms counting sort for small arrays
| l <= 20 = unsafeCreate l $ \ptr -> withForeignPtr input $ \inp -> do
memcpy ptr inp (fromIntegral l)
c_sort ptr (fromIntegral l)
| otherwise = unsafeCreate l $ \p -> allocaArray 256 $ \arr -> do

_ <- memset (castPtr arr) 0 (256 * fromIntegral (sizeOf (undefined :: CSize)))
withForeignPtr input (\x -> countOccurrences arr x l)
Expand Down
28 changes: 16 additions & 12 deletions Data/ByteString/Internal.hs
Original file line number Diff line number Diff line change
Expand Up @@ -75,11 +75,12 @@ module Data.ByteString.Internal (
memset, -- :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8)

-- * cbits functions
c_reverse, -- :: Ptr Word8 -> Ptr Word8 -> CInt -> IO ()
c_intersperse, -- :: Ptr Word8 -> Ptr Word8 -> CInt -> Word8 -> IO ()
c_maximum, -- :: Ptr Word8 -> CInt -> IO Word8
c_minimum, -- :: Ptr Word8 -> CInt -> IO Word8
c_count, -- :: Ptr Word8 -> CInt -> Word8 -> IO CInt
c_reverse, -- :: Ptr Word8 -> Ptr Word8 -> CSize -> IO ()
c_intersperse, -- :: Ptr Word8 -> Ptr Word8 -> CSize -> Word8 -> IO ()
c_maximum, -- :: Ptr Word8 -> CSize -> IO Word8
c_minimum, -- :: Ptr Word8 -> CSize -> IO Word8
c_count, -- :: Ptr Word8 -> CSize -> Word8 -> IO CSize
c_sort, -- :: Ptr Word8 -> CSize -> IO ()

-- * Chars
w2c, c2w, isSpaceWord8, isSpaceChar8,
Expand All @@ -100,9 +101,9 @@ import Foreign.Ptr (Ptr, FunPtr, plusPtr)
import Foreign.Storable (Storable(..))

#if MIN_VERSION_base(4,5,0) || __GLASGOW_HASKELL__ >= 703
import Foreign.C.Types (CInt(..), CSize(..), CULong(..))
import Foreign.C.Types (CInt(..), CSize(..))
#else
import Foreign.C.Types (CInt, CSize, CULong)
import Foreign.C.Types (CInt, CSize)
#endif

import Foreign.C.String (CString)
Expand Down Expand Up @@ -770,16 +771,19 @@ memset p w s = c_memset p (fromIntegral w) s
--

foreign import ccall unsafe "static fpstring.h fps_reverse" c_reverse
:: Ptr Word8 -> Ptr Word8 -> CULong -> IO ()
:: Ptr Word8 -> Ptr Word8 -> CSize -> IO ()

foreign import ccall unsafe "static fpstring.h fps_intersperse" c_intersperse
:: Ptr Word8 -> Ptr Word8 -> CULong -> Word8 -> IO ()
:: Ptr Word8 -> Ptr Word8 -> CSize -> Word8 -> IO ()

foreign import ccall unsafe "static fpstring.h fps_maximum" c_maximum
:: Ptr Word8 -> CULong -> IO Word8
:: Ptr Word8 -> CSize -> IO Word8

foreign import ccall unsafe "static fpstring.h fps_minimum" c_minimum
:: Ptr Word8 -> CULong -> IO Word8
:: Ptr Word8 -> CSize -> IO Word8

foreign import ccall unsafe "static fpstring.h fps_count" c_count
:: Ptr Word8 -> CULong -> Word8 -> IO CULong
:: Ptr Word8 -> CSize -> Word8 -> IO CSize

foreign import ccall unsafe "static fpstring.h fps_sort" c_sort
:: Ptr Word8 -> CSize -> IO ()
16 changes: 6 additions & 10 deletions Data/ByteString/Short/Internal.hs
Original file line number Diff line number Diff line change
Expand Up @@ -573,7 +573,7 @@ copyByteArrayToAddr# = GHC.Exts.copyByteArrayToAddr#
#else

copyAddrToByteArray# src dst dst_off len s =
unIO_ (memcpy_AddrToByteArray dst (clong dst_off) src 0 (csize len)) s
unIO_ (memcpy_AddrToByteArray dst (csize dst_off) src 0 (csize len)) s

copyAddrToByteArray0 :: Addr# -> MutableByteArray# s -> Int#
-> State# RealWorld -> State# RealWorld
Expand All @@ -587,14 +587,14 @@ copyAddrToByteArray0 src dst len s =
= copyAddrToByteArray0 src dst len s #-}

foreign import ccall unsafe "fpstring.h fps_memcpy_offsets"
memcpy_AddrToByteArray :: MutableByteArray# s -> CLong -> Addr# -> CLong -> CSize -> IO ()
memcpy_AddrToByteArray :: MutableByteArray# s -> CSize -> Addr# -> CSize -> CSize -> IO ()

foreign import ccall unsafe "string.h memcpy"
memcpy_AddrToByteArray0 :: MutableByteArray# s -> Addr# -> CSize -> IO ()


copyByteArrayToAddr# src src_off dst len s =
unIO_ (memcpy_ByteArrayToAddr dst 0 src (clong src_off) (csize len)) s
unIO_ (memcpy_ByteArrayToAddr dst 0 src (csize src_off) (csize len)) s

copyByteArrayToAddr0 :: ByteArray# -> Addr# -> Int#
-> State# RealWorld -> State# RealWorld
Expand All @@ -608,7 +608,7 @@ copyByteArrayToAddr0 src dst len s =
= copyByteArrayToAddr0 src dst len s #-}

foreign import ccall unsafe "fpstring.h fps_memcpy_offsets"
memcpy_ByteArrayToAddr :: Addr# -> CLong -> ByteArray# -> CLong -> CSize -> IO ()
memcpy_ByteArrayToAddr :: Addr# -> CSize -> ByteArray# -> CSize -> CSize -> IO ()

foreign import ccall unsafe "string.h memcpy"
memcpy_ByteArrayToAddr0 :: Addr# -> ByteArray# -> CSize -> IO ()
Expand All @@ -617,9 +617,6 @@ foreign import ccall unsafe "string.h memcpy"
unIO_ :: IO () -> State# RealWorld -> State# RealWorld
unIO_ io s = case unIO io s of (# s, _ #) -> s

clong :: Int# -> CLong
clong i# = fromIntegral (I# i#)

csize :: Int# -> CSize
csize i# = fromIntegral (I# i#)
#endif
Expand All @@ -629,14 +626,13 @@ copyByteArray# = GHC.Exts.copyByteArray#
#else
copyByteArray# src src_off dst dst_off len s =
unST_ (unsafeIOToST
(memcpy_ByteArray dst (clong dst_off) src (clong src_off) (csize len))) s
(memcpy_ByteArray dst (csize dst_off) src (csize src_off) (csize len))) s
where
unST (ST st) = st
unST_ st s = case unST st s of (# s, _ #) -> s

foreign import ccall unsafe "fpstring.h fps_memcpy_offsets"
memcpy_ByteArray :: MutableByteArray# s -> CLong
-> ByteArray# -> CLong -> CSize -> IO ()
memcpy_ByteArray :: MutableByteArray# s -> CSize -> ByteArray# -> CSize -> CSize -> IO ()
#endif

-- | /O(n)./ Construct a new @ShortByteString@ from a @CString@. The
Expand Down
5 changes: 5 additions & 0 deletions bench/BenchAll.hs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import Gauge
import Prelude hiding (words)

import qualified Data.ByteString as S
import qualified Data.ByteString.Char8 as S8
import qualified Data.ByteString.Lazy as L

import Data.ByteString.Builder
Expand Down Expand Up @@ -225,6 +226,9 @@ sanityCheckInfo =
]
]

sortInputs :: [S.ByteString]
sortInputs = map (`S.take` S.pack [122, 121 .. 32]) [10..25]

main :: IO ()
main = do
mapM_ putStrLn sanityCheckInfo
Expand Down Expand Up @@ -387,4 +391,5 @@ main = do
, bench "balancedSlow" $ partitionLazy (\x -> hashWord8 x < w 128)
]
]
, bgroup "sort" $ map (\s -> bench (S8.unpack s) $ nf S.sort s) sortInputs
]
21 changes: 14 additions & 7 deletions cbits/fpstring.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
#include "fpstring.h"

/* copy a string in reverse */
void fps_reverse(unsigned char *q, unsigned char *p, unsigned long n) {
void fps_reverse(unsigned char *q, unsigned char *p, size_t n) {
p += n-1;
while (n-- != 0)
*q++ = *p--;
Expand All @@ -42,7 +42,7 @@ void fps_reverse(unsigned char *q, unsigned char *p, unsigned long n) {
of the duplicated string */
void fps_intersperse(unsigned char *q,
unsigned char *p,
unsigned long n,
size_t n,
unsigned char c) {

while (n > 1) {
Expand All @@ -55,7 +55,7 @@ void fps_intersperse(unsigned char *q,
}

/* find maximum char in a packed string */
unsigned char fps_maximum(unsigned char *p, unsigned long len) {
unsigned char fps_maximum(unsigned char *p, size_t len) {
unsigned char *q, c = *p;
for (q = p; q < p + len; q++)
if (*q > c)
Expand All @@ -64,7 +64,7 @@ unsigned char fps_maximum(unsigned char *p, unsigned long len) {
}

/* find minimum char in a packed string */
unsigned char fps_minimum(unsigned char *p, unsigned long len) {
unsigned char fps_minimum(unsigned char *p, size_t len) {
unsigned char *q, c = *p;
for (q = p; q < p + len; q++)
if (*q < c)
Expand All @@ -73,7 +73,7 @@ unsigned char fps_minimum(unsigned char *p, unsigned long len) {
}

/* count the number of occurences of a char in a string */
unsigned long fps_count(unsigned char *p, unsigned long len, unsigned char w) {
size_t fps_count(unsigned char *p, size_t len, unsigned char w) {
unsigned long c;
for (c = 0; len-- != 0; ++p)
if (*p == w)
Expand All @@ -84,7 +84,14 @@ unsigned long fps_count(unsigned char *p, unsigned long len, unsigned char w) {
/* This wrapper is here so that we can copy a sub-range of a ByteArray#.
We cannot construct a pointer to the interior of an unpinned ByteArray#,
except by doing an unsafe ffi call, and adjusting the pointer C-side. */
void * fps_memcpy_offsets(void *dst, unsigned long dst_off,
const void *src, unsigned long src_off, size_t n) {
void * fps_memcpy_offsets(void *dst, size_t dst_off, const void *src, size_t src_off, size_t n) {
return memcpy(dst + dst_off, src + src_off, n);
}

int fps_compare(const void *a, const void *b) {
return (int)*(unsigned char*)a - (int)*(unsigned char*)b;
}

void fps_sort(unsigned char *p, size_t len) {
return qsort(p, len, 1, fps_compare);
}
14 changes: 7 additions & 7 deletions include/fpstring.h
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@

#include <string.h>
#include <stdlib.h>

void fps_reverse(unsigned char *dest, unsigned char *from, unsigned long len);
void fps_intersperse(unsigned char *dest, unsigned char *from, unsigned long len, unsigned char c);
unsigned char fps_maximum(unsigned char *p, unsigned long len);
unsigned char fps_minimum(unsigned char *p, unsigned long len);
unsigned long fps_count(unsigned char *p, unsigned long len, unsigned char w);

void fps_reverse(unsigned char *dest, unsigned char *from, size_t len);
void fps_intersperse(unsigned char *dest, unsigned char *from, size_t len, unsigned char c);
unsigned char fps_maximum(unsigned char *p, size_t len);
unsigned char fps_minimum(unsigned char *p, size_t len);
size_t fps_count(unsigned char *p, size_t len, unsigned char w);
void fps_sort(unsigned char *p, size_t len);

0 comments on commit fc9409e

Please sign in to comment.