Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use qsort to sort short ByteString #267

Merged
merged 8 commits into from
Aug 25, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion Data/ByteString.hs
Original file line number Diff line number Diff line change
Expand Up @@ -1514,7 +1514,12 @@ tails p | null p = [empty]

-- | /O(n)/ Sort a ByteString efficiently, using counting sort.
sort :: ByteString -> ByteString
sort (BS input l) = unsafeCreate l $ \p -> allocaArray 256 $ \arr -> do
sort (BS input l)
-- qsort outperforms counting sort for small arrays
| l <= 20 = unsafeCreate l $ \ptr -> withForeignPtr input $ \inp -> do
memcpy ptr inp (fromIntegral l)
c_sort ptr (fromIntegral l)
| otherwise = unsafeCreate l $ \p -> allocaArray 256 $ \arr -> do

_ <- memset (castPtr arr) 0 (256 * fromIntegral (sizeOf (undefined :: CSize)))
withForeignPtr input (\x -> countOccurrences arr x l)
Expand Down
28 changes: 16 additions & 12 deletions Data/ByteString/Internal.hs
Original file line number Diff line number Diff line change
Expand Up @@ -75,11 +75,12 @@ module Data.ByteString.Internal (
memset, -- :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8)

-- * cbits functions
c_reverse, -- :: Ptr Word8 -> Ptr Word8 -> CInt -> IO ()
c_intersperse, -- :: Ptr Word8 -> Ptr Word8 -> CInt -> Word8 -> IO ()
c_maximum, -- :: Ptr Word8 -> CInt -> IO Word8
c_minimum, -- :: Ptr Word8 -> CInt -> IO Word8
c_count, -- :: Ptr Word8 -> CInt -> Word8 -> IO CInt
c_reverse, -- :: Ptr Word8 -> Ptr Word8 -> CSize -> IO ()
c_intersperse, -- :: Ptr Word8 -> Ptr Word8 -> CSize -> Word8 -> IO ()
c_maximum, -- :: Ptr Word8 -> CSize -> IO Word8
c_minimum, -- :: Ptr Word8 -> CSize -> IO Word8
c_count, -- :: Ptr Word8 -> CSize -> Word8 -> IO CSize
c_sort, -- :: Ptr Word8 -> CSize -> IO ()

-- * Chars
w2c, c2w, isSpaceWord8, isSpaceChar8,
Expand All @@ -100,9 +101,9 @@ import Foreign.Ptr (Ptr, FunPtr, plusPtr)
import Foreign.Storable (Storable(..))

#if MIN_VERSION_base(4,5,0) || __GLASGOW_HASKELL__ >= 703
import Foreign.C.Types (CInt(..), CSize(..), CULong(..))
import Foreign.C.Types (CInt(..), CSize(..))
#else
import Foreign.C.Types (CInt, CSize, CULong)
import Foreign.C.Types (CInt, CSize)
#endif

import Foreign.C.String (CString)
Expand Down Expand Up @@ -745,16 +746,19 @@ memset p w s = c_memset p (fromIntegral w) s
--

foreign import ccall unsafe "static fpstring.h fps_reverse" c_reverse
:: Ptr Word8 -> Ptr Word8 -> CULong -> IO ()
:: Ptr Word8 -> Ptr Word8 -> CSize -> IO ()

foreign import ccall unsafe "static fpstring.h fps_intersperse" c_intersperse
:: Ptr Word8 -> Ptr Word8 -> CULong -> Word8 -> IO ()
:: Ptr Word8 -> Ptr Word8 -> CSize -> Word8 -> IO ()

foreign import ccall unsafe "static fpstring.h fps_maximum" c_maximum
:: Ptr Word8 -> CULong -> IO Word8
:: Ptr Word8 -> CSize -> IO Word8

foreign import ccall unsafe "static fpstring.h fps_minimum" c_minimum
:: Ptr Word8 -> CULong -> IO Word8
:: Ptr Word8 -> CSize -> IO Word8

foreign import ccall unsafe "static fpstring.h fps_count" c_count
:: Ptr Word8 -> CULong -> Word8 -> IO CULong
:: Ptr Word8 -> CSize -> Word8 -> IO CSize

foreign import ccall unsafe "static fpstring.h fps_sort" c_sort
:: Ptr Word8 -> CSize -> IO ()
16 changes: 6 additions & 10 deletions Data/ByteString/Short/Internal.hs
Original file line number Diff line number Diff line change
Expand Up @@ -573,7 +573,7 @@ copyByteArrayToAddr# = GHC.Exts.copyByteArrayToAddr#
#else

copyAddrToByteArray# src dst dst_off len s =
unIO_ (memcpy_AddrToByteArray dst (clong dst_off) src 0 (csize len)) s
unIO_ (memcpy_AddrToByteArray dst (csize dst_off) src 0 (csize len)) s

copyAddrToByteArray0 :: Addr# -> MutableByteArray# s -> Int#
-> State# RealWorld -> State# RealWorld
Expand All @@ -587,14 +587,14 @@ copyAddrToByteArray0 src dst len s =
= copyAddrToByteArray0 src dst len s #-}

foreign import ccall unsafe "fpstring.h fps_memcpy_offsets"
memcpy_AddrToByteArray :: MutableByteArray# s -> CLong -> Addr# -> CLong -> CSize -> IO ()
memcpy_AddrToByteArray :: MutableByteArray# s -> CSize -> Addr# -> CSize -> CSize -> IO ()

foreign import ccall unsafe "string.h memcpy"
memcpy_AddrToByteArray0 :: MutableByteArray# s -> Addr# -> CSize -> IO ()


copyByteArrayToAddr# src src_off dst len s =
unIO_ (memcpy_ByteArrayToAddr dst 0 src (clong src_off) (csize len)) s
unIO_ (memcpy_ByteArrayToAddr dst 0 src (csize src_off) (csize len)) s

copyByteArrayToAddr0 :: ByteArray# -> Addr# -> Int#
-> State# RealWorld -> State# RealWorld
Expand All @@ -608,7 +608,7 @@ copyByteArrayToAddr0 src dst len s =
= copyByteArrayToAddr0 src dst len s #-}

foreign import ccall unsafe "fpstring.h fps_memcpy_offsets"
memcpy_ByteArrayToAddr :: Addr# -> CLong -> ByteArray# -> CLong -> CSize -> IO ()
memcpy_ByteArrayToAddr :: Addr# -> CSize -> ByteArray# -> CSize -> CSize -> IO ()

foreign import ccall unsafe "string.h memcpy"
memcpy_ByteArrayToAddr0 :: Addr# -> ByteArray# -> CSize -> IO ()
Expand All @@ -617,9 +617,6 @@ foreign import ccall unsafe "string.h memcpy"
unIO_ :: IO () -> State# RealWorld -> State# RealWorld
unIO_ io s = case unIO io s of (# s, _ #) -> s

clong :: Int# -> CLong
clong i# = fromIntegral (I# i#)

csize :: Int# -> CSize
csize i# = fromIntegral (I# i#)
#endif
Expand All @@ -629,14 +626,13 @@ copyByteArray# = GHC.Exts.copyByteArray#
#else
copyByteArray# src src_off dst dst_off len s =
unST_ (unsafeIOToST
(memcpy_ByteArray dst (clong dst_off) src (clong src_off) (csize len))) s
(memcpy_ByteArray dst (csize dst_off) src (csize src_off) (csize len))) s
where
unST (ST st) = st
unST_ st s = case unST st s of (# s, _ #) -> s

foreign import ccall unsafe "fpstring.h fps_memcpy_offsets"
memcpy_ByteArray :: MutableByteArray# s -> CLong
-> ByteArray# -> CLong -> CSize -> IO ()
memcpy_ByteArray :: MutableByteArray# s -> CSize -> ByteArray# -> CSize -> CSize -> IO ()
#endif

-- | /O(n)./ Construct a new @ShortByteString@ from a @CString@. The
Expand Down
5 changes: 5 additions & 0 deletions bench/BenchAll.hs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import Gauge
import Prelude hiding (words)

import qualified Data.ByteString as S
import qualified Data.ByteString.Char8 as S8
import qualified Data.ByteString.Lazy as L

import Data.ByteString.Builder
Expand Down Expand Up @@ -225,6 +226,9 @@ sanityCheckInfo =
]
]

sortInputs :: [S.ByteString]
sortInputs = map (`S.take` S.pack [122, 121 .. 32]) [10..25]

main :: IO ()
main = do
mapM_ putStrLn sanityCheckInfo
Expand Down Expand Up @@ -387,4 +391,5 @@ main = do
, bench "balancedSlow" $ partitionLazy (\x -> hashWord8 x < w 128)
]
]
, bgroup "sort" $ map (\s -> bench (S8.unpack s) $ nf S.sort s) sortInputs
]
21 changes: 14 additions & 7 deletions cbits/fpstring.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
#include "fpstring.h"

/* copy a string in reverse */
void fps_reverse(unsigned char *q, unsigned char *p, unsigned long n) {
void fps_reverse(unsigned char *q, unsigned char *p, size_t n) {
p += n-1;
while (n-- != 0)
*q++ = *p--;
Expand All @@ -42,7 +42,7 @@ void fps_reverse(unsigned char *q, unsigned char *p, unsigned long n) {
of the duplicated string */
void fps_intersperse(unsigned char *q,
unsigned char *p,
unsigned long n,
size_t n,
unsigned char c) {

while (n > 1) {
Expand All @@ -55,7 +55,7 @@ void fps_intersperse(unsigned char *q,
}

/* find maximum char in a packed string */
unsigned char fps_maximum(unsigned char *p, unsigned long len) {
unsigned char fps_maximum(unsigned char *p, size_t len) {
unsigned char *q, c = *p;
for (q = p; q < p + len; q++)
if (*q > c)
Expand All @@ -64,7 +64,7 @@ unsigned char fps_maximum(unsigned char *p, unsigned long len) {
}

/* find minimum char in a packed string */
unsigned char fps_minimum(unsigned char *p, unsigned long len) {
unsigned char fps_minimum(unsigned char *p, size_t len) {
unsigned char *q, c = *p;
for (q = p; q < p + len; q++)
if (*q < c)
Expand All @@ -73,7 +73,7 @@ unsigned char fps_minimum(unsigned char *p, unsigned long len) {
}

/* count the number of occurences of a char in a string */
unsigned long fps_count(unsigned char *p, unsigned long len, unsigned char w) {
size_t fps_count(unsigned char *p, size_t len, unsigned char w) {
unsigned long c;
for (c = 0; len-- != 0; ++p)
if (*p == w)
Expand All @@ -84,7 +84,14 @@ unsigned long fps_count(unsigned char *p, unsigned long len, unsigned char w) {
/* This wrapper is here so that we can copy a sub-range of a ByteArray#.
We cannot construct a pointer to the interior of an unpinned ByteArray#,
except by doing an unsafe ffi call, and adjusting the pointer C-side. */
void * fps_memcpy_offsets(void *dst, unsigned long dst_off,
const void *src, unsigned long src_off, size_t n) {
void * fps_memcpy_offsets(void *dst, size_t dst_off, const void *src, size_t src_off, size_t n) {
return memcpy(dst + dst_off, src + src_off, n);
}

int fps_compare(const void *a, const void *b) {
return (int)*(unsigned char*)a - (int)*(unsigned char*)b;
}

void fps_sort(unsigned char *p, size_t len) {
return qsort(p, len, 1, fps_compare);
}
14 changes: 7 additions & 7 deletions include/fpstring.h
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@

#include <string.h>
#include <stdlib.h>

void fps_reverse(unsigned char *dest, unsigned char *from, unsigned long len);
void fps_intersperse(unsigned char *dest, unsigned char *from, unsigned long len, unsigned char c);
unsigned char fps_maximum(unsigned char *p, unsigned long len);
unsigned char fps_minimum(unsigned char *p, unsigned long len);
unsigned long fps_count(unsigned char *p, unsigned long len, unsigned char w);

void fps_reverse(unsigned char *dest, unsigned char *from, size_t len);
void fps_intersperse(unsigned char *dest, unsigned char *from, size_t len, unsigned char c);
unsigned char fps_maximum(unsigned char *p, size_t len);
unsigned char fps_minimum(unsigned char *p, size_t len);
size_t fps_count(unsigned char *p, size_t len, unsigned char w);
void fps_sort(unsigned char *p, size_t len);