From 731caea2f057801850e9ac1d3685b99d70132ebe Mon Sep 17 00:00:00 2001
From: Julian Ospald <hasufell@posteo.de>
Date: Tue, 15 Feb 2022 23:00:53 +0100
Subject: [PATCH] Merge `shortbytestring` package back into `bytestring` wrt
 #444 (#471)

* Merge `shortbytestring` package back into `bytestring` wrt #444

* Fix build on ARM

Reusing compareByteArrays and avoiding
excessive pointer arithmetic.

* Speed up reverse by using byteSwap64 tricks

* Remove phase control from inlines

* Improve performance of elemIndex

* Use setByteArray in replicate

* Implement intercalate manually

* Annotate partial functions with HasCallStack

* Fix build on base < 4.12.0.0

* Add uncons/unsnoc

* Correct complexities

* Exclude reverse optimization path from ARM

It seems to cause segfaults on armv7, suggesting
there are issues with 'indexWord8ArrayAsWord64#'.

All other platforms are fine and tests pass.

* Add benchmarks for ShortByteString

* Improve inlining

* Adjust haddock identifiers

* Get rid of writeCharArray#

* Haddock fixes

* Clean up tests

* Use -fexpose-all-unfoldings

* Improve reverse

* Cleanup 'reverse'

* Fix possible GC race with foreign imports

For more information, see
  https://github.com/haskell/bytestring/pull/471#issuecomment-1022674981

* Disable asserts in shortbytestring.c

* Remove redundant import

* Add documentation about partial functions

* Fold ShortByteString prop tests into ByteString

* Restore previous INLINEs

* Improve naming of bindings

* Consolidate error handling functions

* Remove trailing whitespace

* Fix uncons in documentation

* Rename indexWord64Array to indexWord8ArrayAsWord64

* Improve error message

* Clean up incorrect documentation

* Use div/mod instead of quot/rem

* Simplify branching in reverse

* Move asserts to Haskell

* Prefix C functions

* Fix return type of c_elem_index

* Fix documentation in unfoldrN

* Make unfoldrN more efficient

* Fix maintainer field

* Fix formatting

* Implement takeEnd, dropeEnd and splitAt manually

* Fix some haddock identifiers

* Fix unfoldrN doc

* Add a primops bounds-checking job to CI

* Document and clean up createAndTrim

* Rename errorEmptyList to errorEmptySBS

* Improve documentation for findFromEndUntil

* Improve documentation and naming

* Optimize out quotRem

* Document compareByteArraysOff

* Simplify findIndexOrLength and findFromEndUntil

* Use c_count for count

* Simplify elemIndex

* Remove use of 'mempty'

* Make sure breakSubstring is inlined into isInfixOf

* Simplify stripSuffix and stripPrefix

* Fix redundant import warnings

* Improve 'take'

* Use existing bounnds check in 'drop'

* Avoid 'create' when bytestring is empty

* Optimize filter

* Remove redundant INLINABLE

* Use shorter 'createAndTrim' in 'filter'

* Simplify 'take'

* Simplify 'drop'

* Better formatting

* Add comment to explain DNDEBUG

* Refactor elemIndex

* Optimize 'partition'

* Optimize hot loop in 'partition'
---
 .github/workflows/ci.yml            |   21 +
 Changelog.md                        |   10 +-
 Data/ByteString.hs                  |   10 +-
 Data/ByteString/Lazy.hs             |   10 +-
 Data/ByteString/Short.hs            |  107 +-
 Data/ByteString/Short/Internal.hs   | 1410 +++++++++++++++++++++++++--
 bench/BenchAll.hs                   |    2 +
 bench/BenchShort.hs                 |  235 +++++
 bytestring.cabal                    |    8 +-
 cbits/shortbytestring.c             |   35 +
 tests/Properties.hs                 |   18 +-
 tests/Properties/ByteString.hs      |   55 +-
 tests/Properties/ShortByteString.hs |    5 +
 tests/QuickCheckUtils.hs            |   14 +
 14 files changed, 1795 insertions(+), 145 deletions(-)
 create mode 100644 bench/BenchShort.hs
 create mode 100644 cbits/shortbytestring.c
 create mode 100644 tests/Properties/ShortByteString.hs

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 94bb6b29d..71ff54ca4 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -92,3 +92,24 @@ jobs:
           ghc --version
           ghc --make -Iinclude -itests:tests/builder -o Main cbits/*.c tests/Main.hs +RTS -s
           ./Main +RTS -s
+
+  bounds-checking:
+    runs-on: ubuntu-latest
+    container:
+      image: fedora:34
+    steps:
+    - name: install deps
+      run: |
+          dnf install -y gcc gmp gmp-devel make ncurses ncurses-compat-libs xz perl
+          curl --proto '=https' --tlsv1.2 -sSf https://get-ghcup.haskell.org | BOOTSTRAP_HASKELL_NONINTERACTIVE=1 BOOTSTRAP_HASKELL_MINIMAL=1 sh
+          source ~/.ghcup/env
+          ghcup install ghc -u https://downloads.haskell.org/~ghcup/unofficial-bindists/ghc/9.3.20220124/ghc-9.3.20220124-x86_64-linux-fedora-34-bounds-checking-ddf50f4b.tar.xz --set 9.3.20220124
+          ghcup install cabal
+      shell: bash
+    - uses: actions/checkout@v1
+    - name: test
+      run: |
+          source ~/.ghcup/env
+          cabal update
+          cabal run -w ghc-9.3.20220124 --ghc-options='-fcheck-prim-bounds -fno-ignore-asserts' bytestring-tests
+      shell: bash
diff --git a/Changelog.md b/Changelog.md
index 5d8a3d7f0..fee6091d5 100644
--- a/Changelog.md
+++ b/Changelog.md
@@ -4,7 +4,15 @@
 * [`readInt` returns `Nothing`, if the sequence of digits cannot be represented by an `Int`, instead of overflowing silently](https://github.com/haskell/bytestring/pull/309)
 * [Remove `zipWith` rewrite rule](https://github.com/haskell/bytestring/pull/387)
 
-[0.12.0.0]: https://github.com/haskell/bytestring/compare/0.11.2.0...0.12.0.0
+[0.12.0.0]: https://github.com/haskell/bytestring/compare/0.11.3.0...0.12.0.0
+
+[0.11.3.0] — Unreleased
+
+* merge `shortbytestring` package back into `bytestring` wrt [#444](https://github.com/haskell/bytestring/issues/444),
+  adding lots of additional API:
+    - [Add `all`, `any`, `append`, `break`, `breakEnd`, `breakSubstring`, `concat`, `cons`, `count`, `drop`, `dropEnd`, `dropWhile`, `dropWhileEnd`, `elem`, `elemIndex`, `elemIndices`, `filter`, `find`, `findIndex`, `findIndices`, `foldl'`, `foldl`, `foldl1'`, `foldl1`, `foldr'`, `foldr`, `foldr1'`, `foldr1`, `head`, `init`, `intercalate`, `isInfixOf`, `isPrefixOf`, `isSuffixOf`, `last`, `map`, `partition`, `replicate`, `reverse`, `singleton`, `snoc`, `span`, `spanEnd`, `split`, `splitAt`, `splitWith`, `stripPrefix`, `stripSuffix`, `tail`, `take`, `takeEnd`, `takeWhile`, `takeWhileEnd`, `uncons`, `unfoldr`, `unfoldrN`, `unsnoc`](https://github.com/haskell/bytestring/pull/471)
+
+[0.11.3.0]: https://github.com/haskell/bytestring/compare/0.11.2.0...0.11.3.0
 
 [0.11.2.0] — December 2021
 
diff --git a/Data/ByteString.hs b/Data/ByteString.hs
index 569f0c572..840a8d7ee 100644
--- a/Data/ByteString.hs
+++ b/Data/ByteString.hs
@@ -951,7 +951,7 @@ splitAt n ps@(BS x l)
     | otherwise = (BS x n, BS (plusForeignPtr x n) (l-n))
 {-# INLINE splitAt #-}
 
--- | Similar to 'P.takeWhile',
+-- | Similar to 'Prelude.takeWhile',
 -- returns the longest (possibly empty) prefix of elements
 -- satisfying the predicate.
 takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString
@@ -979,7 +979,7 @@ takeWhileEnd :: (Word8 -> Bool) -> ByteString -> ByteString
 takeWhileEnd f ps = unsafeDrop (findFromEndUntil (not . f) ps) ps
 {-# INLINE takeWhileEnd #-}
 
--- | Similar to 'P.dropWhile',
+-- | Similar to 'Prelude.dropWhile',
 -- drops the longest (possibly empty) prefix of elements
 -- satisfying the predicate and returns the remainder.
 dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString
@@ -997,7 +997,7 @@ dropWhile f ps = unsafeDrop (findIndexOrLength (not . f) ps) ps
     dropWhile (`eqWord8` x) = snd . spanByte x
   #-}
 
--- | Similar to 'P.dropWhileEnd',
+-- | Similar to 'Prelude.dropWhileEnd',
 -- drops the longest (possibly empty) suffix of elements
 -- satisfying the predicate and returns the remainder.
 --
@@ -1008,7 +1008,7 @@ dropWhileEnd :: (Word8 -> Bool) -> ByteString -> ByteString
 dropWhileEnd f ps = unsafeTake (findFromEndUntil (not . f) ps) ps
 {-# INLINE dropWhileEnd #-}
 
--- | Similar to 'P.break',
+-- | Similar to 'Prelude.break',
 -- returns the longest (possibly empty) prefix of elements which __do not__
 -- satisfy the predicate and the remainder of the string.
 --
@@ -1054,7 +1054,7 @@ breakByte c p = case elemIndex c p of
 breakEnd :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 breakEnd  p ps = splitAt (findFromEndUntil p ps) ps
 
--- | Similar to 'P.span',
+-- | Similar to 'Prelude.span',
 -- returns the longest (possibly empty) prefix of elements
 -- satisfying the predicate and the remainder of the string.
 --
diff --git a/Data/ByteString/Lazy.hs b/Data/ByteString/Lazy.hs
index 9830810e0..7ac75243b 100644
--- a/Data/ByteString/Lazy.hs
+++ b/Data/ByteString/Lazy.hs
@@ -849,7 +849,7 @@ splitAt i cs0 = splitAt' i cs0
                    in (Chunk c cs', cs'')
 
 
--- | Similar to 'P.takeWhile',
+-- | Similar to 'Prelude.takeWhile',
 -- returns the longest (possibly empty) prefix of elements
 -- satisfying the predicate.
 takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString
@@ -882,7 +882,7 @@ takeWhileEnd f = takeWhileEnd'
                 c' | S.length c' == S.length c -> (True, Chunk c bs)
                    | otherwise                 -> (False, fromStrict c' `append` bs)
 
--- | Similar to 'P.dropWhile',
+-- | Similar to 'Prelude.dropWhile',
 -- drops the longest (possibly empty) prefix of elements
 -- satisfying the predicate and returns the remainder.
 dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString
@@ -893,7 +893,7 @@ dropWhile f = dropWhile'
             n | n < S.length c -> Chunk (S.drop n c) cs
               | otherwise      -> dropWhile' cs
 
--- | Similar to 'P.dropWhileEnd',
+-- | Similar to 'Prelude.dropWhileEnd',
 -- drops the longest (possibly empty) suffix of elements
 -- satisfying the predicate and returns the remainder.
 --
@@ -916,7 +916,7 @@ dropWhileEnd f = go []
                  x' | S.null x' -> dropEndBytes xs
                     | otherwise -> List.foldl' (flip Chunk) Empty (x' : xs)
 
--- | Similar to 'P.break',
+-- | Similar to 'Prelude.break',
 -- returns the longest (possibly empty) prefix of elements which __do not__
 -- satisfy the predicate and the remainder of the string.
 --
@@ -995,7 +995,7 @@ spanByte c (LPS ps) = case (spanByte' ps) of (a,b) -> (LPS a, LPS b)
                       | otherwise  -> (x' : [], x'' : xs)
 -}
 
--- | Similar to 'P.span',
+-- | Similar to 'Prelude.span',
 -- returns the longest (possibly empty) prefix of elements
 -- satisfying the predicate and the remainder of the string.
 --
diff --git a/Data/ByteString/Short.hs b/Data/ByteString/Short.hs
index cbdfd81e8..e1339e970 100644
--- a/Data/ByteString/Short.hs
+++ b/Data/ByteString/Short.hs
@@ -2,10 +2,10 @@
 
 -- |
 -- Module      : Data.ByteString.Short
--- Copyright   : (c) Duncan Coutts 2012-2013
+-- Copyright   : (c) Duncan Coutts 2012-2013, Julian Ospald 2022
 -- License     : BSD-style
 --
--- Maintainer  : duncan@community.haskell.org
+-- Maintainer  : hasufell@posteo.de
 -- Stability   : stable
 -- Portability : ghc only
 --
@@ -67,26 +67,113 @@ module Data.ByteString.Short (
     -- small unpinned strings are allocated in the same way as normal heap
     -- allocations, rather than in a separate pinned area.
 
-    -- * Conversions
-    toShort,
-    fromShort,
+    -- * Introducing and eliminating 'ShortByteString's
+    empty,
+    singleton,
     pack,
     unpack,
+    fromShort,
+    toShort,
 
-    -- * Other operations
-    empty, null, length, index, indexMaybe, (!?),
+    -- * Basic interface
+    snoc,
+    cons,
+    append,
+    last,
+    tail,
+    uncons,
+    head,
+    init,
+    unsnoc,
+    null,
+    length,
 
-    -- ** Encoding validation
+    -- * Encoding validation
     isValidUtf8,
 
+    -- * Transforming ShortByteStrings
+    map,
+    reverse,
+    intercalate,
+
+    -- * Reducing 'ShortByteString's (folds)
+    foldl,
+    foldl',
+    foldl1,
+    foldl1',
+
+    foldr,
+    foldr',
+    foldr1,
+    foldr1',
+
+    -- ** Special folds
+    all,
+    any,
+    concat,
+
+    -- ** Generating and unfolding ByteStrings
+    replicate,
+    unfoldr,
+    unfoldrN,
+
+    -- * Substrings
+
+    -- ** Breaking strings
+    take,
+    takeEnd,
+    takeWhileEnd,
+    takeWhile,
+    drop,
+    dropEnd,
+    dropWhile,
+    dropWhileEnd,
+    breakEnd,
+    break,
+    span,
+    spanEnd,
+    splitAt,
+    split,
+    splitWith,
+    stripSuffix,
+    stripPrefix,
+
+    -- * Predicates
+    isInfixOf,
+    isPrefixOf,
+    isSuffixOf,
+
+    -- ** Search for arbitrary substrings
+    breakSubstring,
+
+    -- * Searching ShortByteStrings
+
+    -- ** Searching by equality
+    elem,
+
+    -- ** Searching with a predicate
+    find,
+    filter,
+    partition,
+
+    -- * Indexing ShortByteStrings
+    index,
+    indexMaybe,
+    (!?),
+    elemIndex,
+    elemIndices,
+    count,
+    findIndex,
+    findIndices,
+
     -- * Low level conversions
     -- ** Packing 'Foreign.C.String.CString's and pointers
     packCString,
     packCStringLen,
 
-    -- ** Using ByteStrings as 'Foreign.C.String.CString's
+    -- ** Using ShortByteStrings as 'Foreign.C.String.CString's
     useAsCString,
-    useAsCStringLen
+    useAsCStringLen,
   ) where
 
 import Data.ByteString.Short.Internal
diff --git a/Data/ByteString/Short/Internal.hs b/Data/ByteString/Short/Internal.hs
index 8e3e657fa..e39c93423 100644
--- a/Data/ByteString/Short/Internal.hs
+++ b/Data/ByteString/Short/Internal.hs
@@ -1,18 +1,38 @@
-{-# LANGUAGE DeriveDataTypeable, CPP, BangPatterns, RankNTypes,
-             ForeignFunctionInterface, MagicHash, UnboxedTuples,
-             UnliftedFFITypes #-}
-{-# LANGUAGE TypeFamilies #-}
-{-# OPTIONS_GHC -fno-warn-name-shadowing #-}
-{-# LANGUAGE Unsafe #-}
-{-# LANGUAGE TemplateHaskellQuotes #-}
+{-# LANGUAGE BangPatterns             #-}
+{-# LANGUAGE CPP                      #-}
+{-# LANGUAGE DeriveDataTypeable       #-}
+{-# LANGUAGE ForeignFunctionInterface #-}
+{-# LANGUAGE LambdaCase               #-}
+{-# LANGUAGE MagicHash                #-}
+{-# LANGUAGE MultiWayIf               #-}
+{-# LANGUAGE RankNTypes               #-}
+{-# LANGUAGE ScopedTypeVariables      #-}
+{-# LANGUAGE TemplateHaskellQuotes    #-}
+{-# LANGUAGE TupleSections            #-}
+{-# LANGUAGE TypeFamilies             #-}
+{-# LANGUAGE UnboxedTuples            #-}
+{-# LANGUAGE UnliftedFFITypes         #-}
+{-# LANGUAGE Unsafe                   #-}
+
+{-# OPTIONS_GHC -fno-warn-name-shadowing -fexpose-all-unfoldings #-}
 {-# OPTIONS_HADDOCK not-home #-}
 
+-- Not all architectures are forgiving of unaligned accesses; whitelist ones
+-- which are known not to trap (either to the kernel for emulation, or crash).
+#if defined(i386_HOST_ARCH) || defined(x86_64_HOST_ARCH) \
+    || ((defined(arm_HOST_ARCH) || defined(aarch64_HOST_ARCH)) \
+        && defined(__ARM_FEATURE_UNALIGNED)) \
+    || defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH) \
+    || defined(powerpc64le_HOST_ARCH)
+#define SAFE_UNALIGNED 1
+#endif
+
 -- |
 -- Module      : Data.ByteString.Short.Internal
--- Copyright   : (c) Duncan Coutts 2012-2013
+-- Copyright   : (c) Duncan Coutts 2012-2013, Julian Ospald 2022
 -- License     : BSD-style
 --
--- Maintainer  : duncan@community.haskell.org
+-- Maintainer  : hasufell@posteo.de
 -- Stability   : stable
 -- Portability : ghc only
 --
@@ -23,82 +43,229 @@ module Data.ByteString.Short.Internal (
     -- * The @ShortByteString@ type and representation
     ShortByteString(..),
 
-    -- * Conversions
-    toShort,
-    fromShort,
+    -- * Introducing and eliminating 'ShortByteString's
+    empty,
+    singleton,
     pack,
     unpack,
+    fromShort,
+    toShort,
 
-    -- * Other operations
-    empty, null, length, index, indexMaybe, (!?), unsafeIndex,
+    -- * Basic interface
+    snoc,
+    cons,
+    append,
+    last,
+    tail,
+    uncons,
+    head,
+    init,
+    unsnoc,
+    null,
+    length,
+
+    -- * Transforming ShortByteStrings
+    map,
+    reverse,
+    intercalate,
+
+    -- * Reducing 'ShortByteString's (folds)
+    foldl,
+    foldl',
+    foldl1,
+    foldl1',
+
+    foldr,
+    foldr',
+    foldr1,
+    foldr1',
+
+    -- ** Special folds
+    all,
+    any,
+    concat,
+
+    -- ** Generating and unfolding ByteStrings
+    replicate,
+    unfoldr,
+    unfoldrN,
+
+    -- * Substrings
+
+    -- ** Breaking strings
+    take,
+    takeEnd,
+    takeWhileEnd,
+    takeWhile,
+    drop,
+    dropEnd,
+    dropWhile,
+    dropWhileEnd,
+    breakEnd,
+    break,
+    span,
+    spanEnd,
+    splitAt,
+    split,
+    splitWith,
+    stripSuffix,
+    stripPrefix,
+
+    -- * Predicates
+    isInfixOf,
+    isPrefixOf,
+    isSuffixOf,
+
+    -- ** Search for arbitrary substrings
+    breakSubstring,
+
+    -- * Searching ShortByteStrings
+
+    -- ** Searching by equality
+    elem,
+
+    -- ** Searching with a predicate
+    find,
+    filter,
+    partition,
+
+    -- * Indexing ShortByteStrings
+    index,
+    indexMaybe,
+    (!?),
+    elemIndex,
+    elemIndices,
+    count,
+    findIndex,
+    findIndices,
 
     -- * Low level operations
-    createFromPtr, copyToPtr,
+    createFromPtr,
+    copyToPtr,
 
     -- ** Encoding validation
     isValidUtf8,
 
     -- * Low level conversions
-    -- ** Packing 'CString's and pointers
+    -- ** Packing 'Foreign.C.String.CString's and pointers
     packCString,
     packCStringLen,
 
-    -- ** Using ByteStrings as 'CString's
+    -- ** Using ShortByteStrings as 'Foreign.C.String.CString's
     useAsCString,
-    useAsCStringLen
+    useAsCStringLen,
   ) where
 
-import Data.ByteString.Internal (ByteString(..), accursedUnutterablePerformIO)
-import qualified Data.ByteString.Internal as BS
-
-import Data.Typeable    (Typeable)
-import Data.Data        (Data(..), mkNoRepType)
-import Data.Semigroup   (Semigroup((<>)))
-import Data.Monoid      (Monoid(..))
-import Data.String      (IsString(..))
-import Control.DeepSeq  (NFData(..))
-import qualified Data.List as List (length)
-import Foreign.C.String (CString, CStringLen)
-import Foreign.C.Types  (CSize(..), CInt(..))
-import Foreign.Marshal.Alloc (allocaBytes)
-import Foreign.ForeignPtr (touchForeignPtr)
-import Foreign.ForeignPtr.Unsafe (unsafeForeignPtrToPtr)
-import Foreign.Storable (pokeByteOff)
-
-import qualified GHC.Exts
-import GHC.Exts ( Int(I#), Int#, Ptr(Ptr), Addr#, Char(C#)
-                , State#, RealWorld
-                , ByteArray#, MutableByteArray#
-                , newByteArray#
-                , newPinnedByteArray#
-                , byteArrayContents#
-                , unsafeCoerce#
+import Data.ByteString.Internal
+  ( ByteString(..)
+  , accursedUnutterablePerformIO
+  , checkedAdd
+  )
+
+import Data.Bits
+  ( FiniteBits (finiteBitSize)
+  , shiftL
+#if MIN_VERSION_base(4,12,0) && defined(SAFE_UNALIGNED)
+  , shiftR
+#endif
+  , (.&.)
+  , (.|.)
+  )
+import Data.Data
+  ( Data(..)
+  , mkNoRepType
+  )
+import Data.Monoid
+  ( Monoid(..) )
+import Data.Semigroup
+  ( Semigroup((<>)) )
+import Data.String
+  ( IsString(..) )
+import Data.Typeable
+  ( Typeable )
+import Control.Applicative
+  ( pure )
+import Control.DeepSeq
+  ( NFData(..) )
+import Control.Exception
+  ( assert )
+import Control.Monad
+  ( (>>) )
+import Foreign.C.String
+  ( CString
+  , CStringLen
+  )
+import Foreign.C.Types
+  ( CSize(..)
+  , CInt(..)
+  , CPtrdiff(..)
+  )
+import Foreign.ForeignPtr
+  ( touchForeignPtr )
+import Foreign.ForeignPtr.Unsafe
+  ( unsafeForeignPtrToPtr )
+import Foreign.Marshal.Alloc
+  ( allocaBytes )
+import Foreign.Storable
+  ( pokeByteOff )
+import GHC.Exts
+  ( Int(I#), Int#, Ptr(Ptr), Addr#, Char(C#)
+  , State#, RealWorld
+  , ByteArray#, MutableByteArray#
+  , newByteArray#
+  , newPinnedByteArray#
+  , byteArrayContents#
+  , unsafeCoerce#
+  , copyMutableByteArray#
 #if MIN_VERSION_base(4,10,0)
-                , isByteArrayPinned#
-                , isTrue#
+  , isByteArrayPinned#
+  , isTrue#
 #endif
 #if MIN_VERSION_base(4,11,0)
-                , compareByteArrays#
+  , compareByteArrays#
+#endif
+  , sizeofByteArray#
+  , indexWord8Array#, indexCharArray#
+  , writeWord8Array#
+  , unsafeFreezeByteArray#
+#if MIN_VERSION_base(4,12,0) && defined(SAFE_UNALIGNED)
+  ,writeWord64Array#
+  ,indexWord8ArrayAsWord64#
 #endif
-                , sizeofByteArray#
-                , indexWord8Array#, indexCharArray#
-                , writeWord8Array#, writeCharArray#
-                , unsafeFreezeByteArray# )
+  , setByteArray#
+  )
 import GHC.IO
-import GHC.ForeignPtr (ForeignPtr(ForeignPtr), ForeignPtrContents(PlainPtr))
-import GHC.ST         (ST(ST), runST)
-import GHC.Stack.Types (HasCallStack)
+import GHC.ForeignPtr
+  ( ForeignPtr(ForeignPtr)
+  , ForeignPtrContents(PlainPtr)
+  )
+import GHC.ST
+  ( ST(ST)
+  , runST
+  )
+import GHC.Stack.Types
+  ( HasCallStack )
 import GHC.Word
+import Prelude
+  ( Eq(..), Ord(..), Ordering(..), Read(..), Show(..)
+  , ($), ($!), error, (++), (.), (||)
+  , String, userError
+  , Bool(..), (&&), otherwise
+  , (+), (-), fromIntegral
+  , (*)
+  , (^)
+  , (<$>)
+  , return
+  , Maybe(..)
+  , not
+  , snd
+  )
 
+import qualified Data.ByteString.Internal as BS
 
-import Prelude ( Eq(..), Ord(..), Ordering(..), Read(..), Show(..)
-               , ($), ($!), error, (++), (.)
-               , String, userError
-               , Bool(..), (&&), otherwise
-               , (+), (-), fromIntegral
-               , return
-               , Maybe(..) )
-
+import qualified Data.Foldable as Foldable
+import qualified Data.List as List
+import qualified GHC.Exts
 import qualified Language.Haskell.TH.Lib as TH
 import qualified Language.Haskell.TH.Syntax as TH
 
@@ -232,8 +399,8 @@ unsafeIndex sbs = indexWord8Array (asBA sbs)
 
 indexError :: HasCallStack => ShortByteString -> Int -> a
 indexError sbs i =
-  error $ "Data.ByteString.Short.index: error in array index; " ++ show i
-       ++ " not in range [0.." ++ show (length sbs) ++ ")"
+  moduleError "index" $ "error in array index: " ++ show i
+                        ++ " not in range [0.." ++ show (length sbs) ++ "]"
 
 -- | @since 0.11.2.0
 unsafePackLenLiteral :: Int -> Addr# -> ShortByteString
@@ -255,6 +422,66 @@ create len fill =
       return (SBS ba#)
 {-# INLINE create #-}
 
+-- | Given the maximum size needed and a function to make the contents
+-- of a ShortByteString, createAndTrim makes the 'ShortByteString'.
+-- The generating function is required to return the actual final size
+-- (<= the maximum size) and the result value. The resulting byte array
+-- is realloced to this size.
+createAndTrim :: Int -> (forall s. MBA s -> ST s (Int, a)) -> (ShortByteString, a)
+createAndTrim l fill =
+    runST $ do
+      mba <- newByteArray l
+      (l', res) <- fill mba
+      if assert (l' <= l) $ l' >= l
+          then do
+            BA# ba# <- unsafeFreezeByteArray mba
+            return (SBS ba#, res)
+          else do
+            mba2 <- newByteArray l'
+            copyMutableByteArray mba 0 mba2 0 l'
+            BA# ba# <- unsafeFreezeByteArray mba2
+            return (SBS ba#, res)
+{-# INLINE createAndTrim #-}
+
+createAndTrim' :: Int -> (forall s. MBA s -> ST s Int) -> ShortByteString
+createAndTrim' l fill =
+    runST $ do
+      mba <- newByteArray l
+      l' <- fill mba
+      if assert (l' <= l) $ l' >= l
+          then do
+            BA# ba# <- unsafeFreezeByteArray mba
+            return (SBS ba#)
+          else do
+            mba2 <- newByteArray l'
+            copyMutableByteArray mba 0 mba2 0 l'
+            BA# ba# <- unsafeFreezeByteArray mba2
+            return (SBS ba#)
+{-# INLINE createAndTrim' #-}
+
+createAndTrim'' :: Int -> (forall s. MBA s -> MBA s -> ST s (Int, Int)) -> (ShortByteString, ShortByteString)
+createAndTrim'' l fill =
+    runST $ do
+      mba1 <- newByteArray l
+      mba2 <- newByteArray l
+      (l1, l2) <- fill mba1 mba2
+      sbs1 <- freeze' l1 mba1
+      sbs2 <- freeze' l2 mba2
+      pure (sbs1, sbs2)
+  where
+    freeze' :: Int -> MBA s -> ST s ShortByteString
+    freeze' l' mba =
+      if assert (l' <= l) $ l' >= l
+          then do
+            BA# ba# <- unsafeFreezeByteArray mba
+            return (SBS ba#)
+          else do
+            mba2 <- newByteArray l'
+            copyMutableByteArray mba 0 mba2 0 l'
+            BA# ba# <- unsafeFreezeByteArray mba2
+            return (SBS ba#)
+{-# INLINE createAndTrim'' #-}
+
 ------------------------------------------------------------------------
 -- Conversion to and from ByteString
 
@@ -279,12 +506,12 @@ toShortIO (BS fptr len) = do
 --
 fromShort :: ShortByteString -> ByteString
 #if MIN_VERSION_base(4,10,0)
-fromShort (SBS b#)
-  | isTrue# (isByteArrayPinned# b#) = BS fp len
+fromShort (SBS ba#)
+  | isTrue# (isByteArrayPinned# ba#) = BS fp len
   where
-    addr# = byteArrayContents# b#
-    fp = ForeignPtr addr# (PlainPtr (unsafeCoerce# b#))
-    len = I# (sizeofByteArray# b#)
+    addr# = byteArrayContents# ba#
+    fp    = ForeignPtr addr# (PlainPtr (unsafeCoerce# ba#))
+    len   = I# (sizeofByteArray# ba#)
 #endif
 fromShort !sbs = unsafeDupablePerformIO (fromShortIO sbs)
 
@@ -297,6 +524,11 @@ fromShortIO sbs = do
                         (PlainPtr mba#)
     return (BS fp len)
 
+-- | /O(1)/ Convert a 'Word8' into a 'ShortByteString'
+--
+-- @since 0.11.3.0
+singleton :: Word8 -> ShortByteString
+singleton = \w -> create 1 (\mba -> writeWord8Array mba 0 w)
 
 ------------------------------------------------------------------------
 -- Packing and unpacking from lists
@@ -310,20 +542,10 @@ unpack :: ShortByteString -> [Word8]
 unpack = unpackBytes
 
 packChars :: [Char] -> ShortByteString
-packChars cs = packLenChars (List.length cs) cs
+packChars = \cs -> packLenBytes (List.length cs) (List.map BS.c2w cs)
 
 packBytes :: [Word8] -> ShortByteString
-packBytes cs = packLenBytes (List.length cs) cs
-
-packLenChars :: Int -> [Char] -> ShortByteString
-packLenChars len cs0 =
-    create len (\mba -> go mba 0 cs0)
-  where
-    go :: MBA s -> Int -> [Char] -> ST s ()
-    go !_   !_ []     = return ()
-    go !mba !i (c:cs) = do
-      writeCharArray mba i c
-      go mba (i+1) cs
+packBytes = \ws -> packLenBytes (List.length ws) ws
 
 packLenBytes :: Int -> [Word8] -> ShortByteString
 packLenBytes len ws0 =
@@ -347,10 +569,10 @@ packLenBytes len ws0 =
 -- unpackAppendChars do the chunks strictly.
 
 unpackChars :: ShortByteString -> [Char]
-unpackChars bs = unpackAppendCharsLazy bs []
+unpackChars sbs = unpackAppendCharsLazy sbs []
 
 unpackBytes :: ShortByteString -> [Word8]
-unpackBytes bs = unpackAppendBytesLazy bs []
+unpackBytes sbs = unpackAppendBytesLazy sbs []
 
 -- Why 100 bytes you ask? Because on a 64bit machine the list we allocate
 -- takes just shy of 4k which seems like a reasonable amount.
@@ -433,7 +655,7 @@ append src1 src2 =
         copyByteArray (asBA src2) 0 dst len1 len2
 
 concat :: [ShortByteString] -> ShortByteString
-concat sbss =
+concat = \sbss ->
     create (totalLen 0 sbss) (\dst -> copy dst 0 sbss)
   where
     totalLen !acc []          = acc
@@ -446,6 +668,898 @@ concat sbss =
       copyByteArray (asBA src) 0 dst off len
       copy dst (off + len) sbss
 
+-- ---------------------------------------------------------------------
+-- Basic interface
+
+infixr 5 `cons` --same as list (:)
+infixl 5 `snoc`
+
+-- | /O(n)/ Append a byte to the end of a 'ShortByteString'
+-- 
+-- Note: copies the entire byte array
+--
+-- @since 0.11.3.0
+snoc :: ShortByteString -> Word8 -> ShortByteString
+snoc = \sbs c -> let l  = length sbs
+                     nl = l + 1
+  in create nl $ \mba -> do
+      copyByteArray (asBA sbs) 0 mba 0 l
+      writeWord8Array mba l c
+
+-- | /O(n)/ 'cons' is analogous to (:) for lists.
+--
+-- Note: copies the entire byte array
+--
+-- @since 0.11.3.0
+cons :: Word8 -> ShortByteString -> ShortByteString
+cons c = \sbs -> let l  = length sbs
+                     nl = l + 1
+  in create nl $ \mba -> do
+      writeWord8Array mba 0 c
+      copyByteArray (asBA sbs) 0 mba 1 l
+
+-- | /O(1)/ Extract the last element of a ShortByteString, which must be finite and non-empty.
+-- An exception will be thrown in the case of an empty ShortByteString.
+--
+-- This is a partial function, consider using 'unsnoc' instead.
+--
+-- @since 0.11.3.0
+last :: HasCallStack => ShortByteString -> Word8
+last = \sbs -> case null sbs of
+  True -> errorEmptySBS "last"
+  False -> indexWord8Array (asBA sbs) (length sbs - 1)
+
+-- | /O(n)/ Extract the elements after the head of a ShortByteString, which must be non-empty.
+-- An exception will be thrown in the case of an empty ShortByteString.
+--
+-- This is a partial function, consider using 'uncons' instead.
+--
+-- Note: copies the entire byte array
+--
+-- @since 0.11.3.0
+tail :: HasCallStack => ShortByteString -> ShortByteString
+tail = \sbs -> 
+  let l  = length sbs
+      nl = l - 1
+  in case null sbs of
+      True -> errorEmptySBS "tail"
+      False -> create nl $ \mba -> copyByteArray (asBA sbs) 1 mba 0 nl
+
+-- | /O(n)/ Extract the head and tail of a ByteString, returning Nothing
+-- if it is empty.
+--
+-- @since 0.11.3.0
+uncons :: ShortByteString -> Maybe (Word8, ShortByteString)
+uncons = \sbs ->
+  let l  = length sbs
+      nl = l - 1
+  in if | l <= 0 -> Nothing
+        | otherwise -> let h = indexWord8Array (asBA sbs) 0
+                           t = create nl $ \mba -> copyByteArray (asBA sbs) 1 mba 0 nl
+                       in Just (h, t)
+
+-- | /O(1)/ Extract the first element of a ShortByteString, which must be non-empty.
+-- An exception will be thrown in the case of an empty ShortByteString.
+--
+-- This is a partial function, consider using 'uncons' instead.
+--
+-- @since 0.11.3.0
+head :: HasCallStack => ShortByteString -> Word8
+head = \sbs -> case null sbs of
+  True -> errorEmptySBS "head"
+  False -> indexWord8Array (asBA sbs) 0
+
+-- | /O(n)/ Return all the elements of a 'ShortByteString' except the last one.
+-- An exception will be thrown in the case of an empty ShortByteString.
+--
+-- This is a partial function, consider using 'unsnoc' instead.
+--
+-- Note: copies the entire byte array
+--
+-- @since 0.11.3.0
+init :: HasCallStack => ShortByteString -> ShortByteString
+init = \sbs ->
+  let l  = length sbs
+      nl = l - 1
+  in case null sbs of
+      True -> errorEmptySBS "init"
+      False -> create nl $ \mba -> copyByteArray (asBA sbs) 0 mba 0 nl
+
+-- | /O(n)/ Extract the 'init' and 'last' of a ByteString, returning Nothing
+-- if it is empty.
+--
+-- @since 0.11.3.0
+unsnoc :: ShortByteString -> Maybe (ShortByteString, Word8)
+unsnoc = \sbs ->
+  let l  = length sbs
+      nl = l - 1
+  in if | l <= 0 -> Nothing
+        | otherwise -> let l' = indexWord8Array (asBA sbs) (l - 1)
+                           i  = create nl $ \mba -> copyByteArray (asBA sbs) 0 mba 0 nl
+                       in Just (i, l')
+
+
+-- ---------------------------------------------------------------------
+-- Transformations
+
+-- | /O(n)/ 'map' @f xs@ is the ShortByteString obtained by applying @f@ to each
+-- element of @xs@.
+--
+-- @since 0.11.3.0
+map :: (Word8 -> Word8) -> ShortByteString -> ShortByteString
+map f = \sbs ->
+    let l  = length sbs
+        ba = asBA sbs
+    in create l (\mba -> go ba mba 0 l)
+  where
+    go :: BA -> MBA s -> Int -> Int -> ST s ()
+    go !ba !mba !i !l
+      | i >= l = return ()
+      | otherwise = do
+          let w = indexWord8Array ba i
+          writeWord8Array mba i (f w)
+          go ba mba (i+1) l
+
+
+-- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order.
+--
+-- @since 0.11.3.0
+reverse :: ShortByteString -> ShortByteString
+reverse = \sbs ->
+    let l  = length sbs
+        ba = asBA sbs
+-- https://gitlab.haskell.org/ghc/ghc/-/issues/21015
+#if MIN_VERSION_base(4,12,0) && defined(SAFE_UNALIGNED)
+    in create l (\mba -> go ba mba l)
+  where
+    go :: forall s. BA -> MBA s -> Int -> ST s ()
+    go !ba !mba !l = do
+      -- this is equivalent to: (q, r) = l `quotRem` 8 
+      let q = l `shiftR` 3
+          r = l .&. 7
+      i' <- goWord8Chunk 0 r
+      goWord64Chunk i' 0 q
+     where
+
+      goWord64Chunk :: Int -> Int -> Int -> ST s ()
+      goWord64Chunk !off !i' !cl = loop i'
+       where
+        loop :: Int -> ST s ()
+        loop !i
+          | i >= cl = return ()
+          | otherwise = do
+              let w = indexWord8ArrayAsWord64 ba (off + (i * 8))
+              writeWord64Array mba (cl - 1 - i) (byteSwap64 w)
+              loop (i+1)
+
+      goWord8Chunk :: Int -> Int -> ST s Int
+      goWord8Chunk !i' !cl = loop i'
+       where
+        loop :: Int -> ST s Int
+        loop !i
+          | i >= cl = return i
+          | otherwise = do
+              let w = indexWord8Array ba i
+              writeWord8Array mba (l - 1 - i) w
+              loop (i+1)
+#else
+    in create l (\mba -> go ba mba 0 l)
+   where
+    go :: BA -> MBA s -> Int -> Int -> ST s ()
+    go !ba !mba !i !l
+      | i >= l = return ()
+      | otherwise = do
+          let w = indexWord8Array ba i
+          writeWord8Array mba (l - 1 - i) w
+          go ba mba (i+1) l
+#endif
+
+
+-- | /O(n)/ The 'intercalate' function takes a 'ShortByteString' and a list of
+-- 'ShortByteString's and concatenates the list after interspersing the first
+-- argument between each element of the list.
+--
+-- @since 0.11.3.0
+intercalate :: ShortByteString -> [ShortByteString] -> ShortByteString
+intercalate sep = \case
+                    []      -> empty
+                    [x]     -> x -- This branch exists for laziness, not speed
+                    (sbs:t) -> let !totalLen = List.foldl' (\acc chunk -> acc +! length sep +! length chunk) (length sbs) t
+                               in create totalLen (\mba ->
+                                      let !l = length sbs
+                                      in copyByteArray (asBA sbs) 0 mba 0 l >> go mba l t)
+ where
+  ba  = asBA sep
+  lba = length sep
+
+  go :: MBA s -> Int -> [ShortByteString] -> ST s ()
+  go _ _ [] = pure ()
+  go mba !off (chunk:chunks) = do
+    let lc = length chunk
+    copyByteArray ba 0 mba off lba
+    copyByteArray (asBA chunk) 0 mba (off + lba) lc
+    go mba (off + lc + lba) chunks
+  (+!) = checkedAdd "Short.intercalate"
+
+
+-- ---------------------------------------------------------------------
+-- Reducing 'ByteString's
+
+-- | 'foldl', applied to a binary operator, a starting value (typically
+-- the left-identity of the operator), and a ShortByteString, reduces the
+-- ShortByteString using the binary operator, from left to right.
+--
+-- @since 0.11.3.0
+foldl :: (a -> Word8 -> a) -> a -> ShortByteString -> a
+foldl f v = List.foldl f v . unpack
+
+-- | 'foldl'' is like 'foldl', but strict in the accumulator.
+--
+-- @since 0.11.3.0
+foldl' :: (a -> Word8 -> a) -> a -> ShortByteString -> a
+foldl' f v = List.foldl' f v . unpack
+
+-- | 'foldr', applied to a binary operator, a starting value
+-- (typically the right-identity of the operator), and a ShortByteString,
+-- reduces the ShortByteString using the binary operator, from right to left.
+--
+-- @since 0.11.3.0
+foldr :: (Word8 -> a -> a) -> a -> ShortByteString -> a
+foldr f v = List.foldr f v . unpack
+
+-- | 'foldr'' is like 'foldr', but strict in the accumulator.
+--
+-- @since 0.11.3.0
+foldr' :: (Word8 -> a -> a) -> a -> ShortByteString -> a
+foldr' k v = Foldable.foldr' k v . unpack
+
+-- | 'foldl1' is a variant of 'foldl' that has no starting value
+-- argument, and thus must be applied to non-empty 'ShortByteString's.
+-- An exception will be thrown in the case of an empty ShortByteString.
+--
+-- @since 0.11.3.0
+foldl1 :: HasCallStack => (Word8 -> Word8 -> Word8) -> ShortByteString -> Word8
+foldl1 k = List.foldl1 k . unpack
+
+-- | 'foldl1'' is like 'foldl1', but strict in the accumulator.
+-- An exception will be thrown in the case of an empty ShortByteString.
+--
+-- @since 0.11.3.0
+foldl1' :: HasCallStack => (Word8 -> Word8 -> Word8) -> ShortByteString -> Word8
+foldl1' k = List.foldl1' k . unpack
+
+-- | 'foldr1' is a variant of 'foldr' that has no starting value argument,
+-- and thus must be applied to non-empty 'ShortByteString's
+-- An exception will be thrown in the case of an empty ShortByteString.
+--
+-- @since 0.11.3.0
+foldr1 :: HasCallStack => (Word8 -> Word8 -> Word8) -> ShortByteString -> Word8
+foldr1 k = List.foldr1 k . unpack
+
+-- | 'foldr1'' is a variant of 'foldr1', but is strict in the
+-- accumulator.
+--
+-- @since 0.11.3.0
+foldr1' :: HasCallStack => (Word8 -> Word8 -> Word8) -> ShortByteString -> Word8
+foldr1' k = \sbs -> if null sbs then errorEmptySBS "foldr1'" else foldr' k (last sbs) (init sbs)
+
+
+
+-- ---------------------------------------------------------------------
+-- Special folds
+
+-- | /O(n)/ Applied to a predicate and a 'ShortByteString', 'all' determines
+-- if all elements of the 'ShortByteString' satisfy the predicate.
+--
+-- @since 0.11.3.0
+all :: (Word8 -> Bool) -> ShortByteString -> Bool
+all k = \sbs ->
+  let l  = length sbs
+      ba = asBA sbs
+      w  = indexWord8Array ba
+      go !n | n >= l    = True
+            | otherwise = k (w n) && go (n + 1)
+  in go 0
+
+
+-- | /O(n)/ Applied to a predicate and a ByteString, 'any' determines if
+-- any element of the 'ByteString' satisfies the predicate.
+--
+-- @since 0.11.3.0
+any :: (Word8 -> Bool) -> ShortByteString -> Bool
+any k = \sbs ->
+  let l  = length sbs
+      ba = asBA sbs
+      w  = indexWord8Array ba
+      go !n | n >= l    = False
+            | otherwise = k (w n) || go (n + 1)
+  in go 0
+
+
+
+-- ---------------------------------------------------------------------
+-- Substrings
+
+-- | /O(n)/ 'take' @n@, applied to a ShortByteString @xs@, returns the prefix
+-- of @xs@ of length @n@, or @xs@ itself if @n > 'length' xs@.
+--
+-- Note: copies the entire byte array
+--
+-- @since 0.11.3.0
+take :: Int -> ShortByteString -> ShortByteString
+take = \n -> \sbs -> let sl = length sbs
+                     in if | n >= sl   -> sbs
+                           | n <= 0    -> empty
+                           | otherwise ->
+                               create n $ \mba -> copyByteArray (asBA sbs) 0 mba 0 n
+
+-- | Similar to 'Prelude.takeWhile',
+-- returns the longest (possibly empty) prefix of elements
+-- satisfying the predicate.
+--
+-- @since 0.11.3.0
+takeWhile :: (Word8 -> Bool) -> ShortByteString -> ShortByteString
+takeWhile f = \sbs -> take (findIndexOrLength (not . f) sbs) sbs
+
+-- | /O(n)/ @'takeEnd' n xs@ is equivalent to @'drop' ('length' xs - n) xs@.
+-- Takes @n@ elements from end of bytestring.
+--
+-- >>> takeEnd 3 "abcdefg"
+-- "efg"
+-- >>> takeEnd 0 "abcdefg"
+-- ""
+-- >>> takeEnd 4 "abc"
+-- "abc"
+--
+-- @since 0.11.3.0
+takeEnd :: Int -> ShortByteString -> ShortByteString
+takeEnd n = \sbs -> let sl = length sbs
+                    in if | n >= sl   -> sbs
+                          | n <= 0    -> empty
+                          | otherwise -> create n $ \mba -> copyByteArray (asBA sbs) (max 0 (sl - n)) mba 0 n
+
+
+-- | Returns the longest (possibly empty) suffix of elements
+-- satisfying the predicate.
+--
+-- @'takeWhileEnd' p@ is equivalent to @'reverse' . 'takeWhile' p . 'reverse'@.
+--
+-- @since 0.11.3.0
+takeWhileEnd :: (Word8 -> Bool) -> ShortByteString -> ShortByteString
+takeWhileEnd f = \sbs -> drop (findFromEndUntil (not . f) sbs) sbs
+
+-- | /O(n)/ 'drop' @n@ @xs@ returns the suffix of @xs@ after the first n elements, or @[]@ if @n > 'length' xs@.
+--
+-- Note: copies the entire byte array
+--
+-- @since 0.11.3.0
+drop :: Int -> ShortByteString -> ShortByteString
+drop = \n -> \sbs ->
+  let len = length sbs
+  in if | n <= 0    -> sbs
+        | n >= len  -> empty
+        | otherwise -> 
+            let newLen = len - n
+            in create newLen $ \mba -> copyByteArray (asBA sbs) n mba 0 newLen
+
+-- | /O(n)/ @'dropEnd' n xs@ is equivalent to @'take' ('length' xs - n) xs@.
+-- Drops @n@ elements from end of bytestring.
+--
+-- >>> dropEnd 3 "abcdefg"
+-- "abcd"
+-- >>> dropEnd 0 "abcdefg"
+-- "abcdefg"
+-- >>> dropEnd 4 "abc"
+-- ""
+--
+-- @since 0.11.3.0
+dropEnd :: Int -> ShortByteString -> ShortByteString
+dropEnd n = \sbs -> let sl = length sbs
+                        nl = sl - n
+                    in if | n >= sl   -> empty
+                          | n <= 0    -> sbs
+                          | otherwise -> create nl $ \mba -> copyByteArray (asBA sbs) 0 mba 0 nl
+
+-- | Similar to 'Prelude.dropWhile',
+-- drops the longest (possibly empty) prefix of elements
+-- satisfying the predicate and returns the remainder.
+--
+-- Note: copies the entire byte array
+--
+-- @since 0.11.3.0
+dropWhile :: (Word8 -> Bool) -> ShortByteString -> ShortByteString
+dropWhile f = \sbs -> drop (findIndexOrLength (not . f) sbs) sbs
+
+-- | Similar to 'Prelude.dropWhileEnd',
+-- drops the longest (possibly empty) suffix of elements
+-- satisfying the predicate and returns the remainder.
+--
+-- @'dropWhileEnd' p@ is equivalent to @'reverse' . 'dropWhile' p . 'reverse'@.
+--
+-- @since 0.11.3.0
+dropWhileEnd :: (Word8 -> Bool) -> ShortByteString -> ShortByteString
+dropWhileEnd f = \sbs -> take (findFromEndUntil (not . f) sbs) sbs
+
+-- | Returns the longest (possibly empty) suffix of elements which __do not__
+-- satisfy the predicate and the remainder of the string.
+--
+-- 'breakEnd' @p@ is equivalent to @'spanEnd' (not . p)@ and to @('takeWhileEnd' (not . p) &&& 'dropWhileEnd' (not . p))@.
+--
+-- @since 0.11.3.0
+breakEnd :: (Word8 -> Bool) -> ShortByteString -> (ShortByteString, ShortByteString)
+breakEnd p = \sbs -> splitAt (findFromEndUntil p sbs) sbs
+
+-- | Similar to 'Prelude.break',
+-- returns the longest (possibly empty) prefix of elements which __do not__
+-- satisfy the predicate and the remainder of the string.
+--
+-- 'break' @p@ is equivalent to @'span' (not . p)@ and to @('takeWhile' (not . p) &&& 'dropWhile' (not . p))@.
+--
+-- @since 0.11.3.0
+break :: (Word8 -> Bool) -> ShortByteString -> (ShortByteString, ShortByteString)
+break = \p -> \sbs -> case findIndexOrLength p sbs of n -> (take n sbs, drop n sbs)
+
+-- | Similar to 'Prelude.span',
+-- returns the longest (possibly empty) prefix of elements
+-- satisfying the predicate and the remainder of the string.
+--
+-- 'span' @p@ is equivalent to @'break' (not . p)@ and to @('takeWhile' p &&& 'dropWhile' p)@.
+--
+-- @since 0.11.3.0
+span :: (Word8 -> Bool) -> ShortByteString -> (ShortByteString, ShortByteString)
+span p = break (not . p)
+
+-- | Returns the longest (possibly empty) suffix of elements
+-- satisfying the predicate and the remainder of the string.
+--
+-- 'spanEnd' @p@ is equivalent to @'breakEnd' (not . p)@ and to @('takeWhileEnd' p &&& 'dropWhileEnd' p)@.
+--
+-- We have
+--
+-- > spanEnd (not . isSpace) "x y z" == ("x y ", "z")
+--
+-- and
+--
+-- > spanEnd (not . isSpace) sbs
+-- >    ==
+-- > let (x, y) = span (not . isSpace) (reverse sbs) in (reverse y, reverse x)
+--
+-- @since 0.11.3.0
+spanEnd :: (Word8 -> Bool) -> ShortByteString -> (ShortByteString, ShortByteString)
+spanEnd p = \sbs -> splitAt (findFromEndUntil (not . p) sbs) sbs
+
+-- | /O(n)/ 'splitAt' @n sbs@ is equivalent to @('take' n sbs, 'drop' n sbs)@.
+--
+-- Note: copies the substrings
+--
+-- @since 0.11.3.0
+splitAt :: Int -> ShortByteString -> (ShortByteString, ShortByteString)
+splitAt n = \sbs -> if
+  | n <= 0 -> (empty, sbs)
+  | otherwise ->
+      let slen = length sbs
+      in if | n >= length sbs -> (sbs, empty)
+            | otherwise -> 
+                let llen = min slen (max 0 n)
+                    rlen = max 0 (slen - max 0 n)
+                    lsbs = create llen $ \mba -> copyByteArray (asBA sbs) 0 mba 0 llen
+                    rsbs = create rlen $ \mba -> copyByteArray (asBA sbs) n mba 0 rlen
+                in (lsbs, rsbs)
+
+-- | /O(n)/ Break a 'ShortByteString' into pieces separated by the byte
+-- argument, consuming the delimiter. I.e.
+--
+-- > split 10  "a\nb\nd\ne" == ["a","b","d","e"]   -- fromEnum '\n' == 10
+-- > split 97  "aXaXaXa"    == ["","X","X","X",""] -- fromEnum 'a' == 97
+-- > split 120 "x"          == ["",""]             -- fromEnum 'x' == 120
+-- > split undefined ""     == []                  -- and not [""]
+--
+-- and
+--
+-- > intercalate [c] . split c == id
+-- > split == splitWith . (==)
+--
+-- Note: copies the substrings
+--
+-- @since 0.11.3.0
+split :: Word8 -> ShortByteString -> [ShortByteString]
+split w = splitWith (== w)
+
+
+-- | /O(n)/ Splits a 'ShortByteString' into components delimited by
+-- separators, where the predicate returns True for a separator element.
+-- The resulting components do not contain the separators.  Two adjacent
+-- separators result in an empty component in the output.  eg.
+--
+-- > splitWith (==97) "aabbaca" == ["","","bb","c",""] -- fromEnum 'a' == 97
+-- > splitWith undefined ""     == []                  -- and not [""]
+--
+-- @since 0.11.3.0
+splitWith :: (Word8 -> Bool) -> ShortByteString -> [ShortByteString]
+splitWith p = \sbs -> if
+  | null sbs  -> []
+  | otherwise -> go sbs
+  where
+    go sbs'
+      | null sbs' = [empty]
+      | otherwise =
+          case break p sbs' of
+            (a, b)
+              | null b    -> [a]
+              | otherwise -> a : go (tail b)
+
+
+-- | /O(n)/ The 'stripSuffix' function takes two ShortByteStrings and returns 'Just'
+-- the remainder of the second iff the first is its suffix, and otherwise
+-- 'Nothing'.
+--
+-- @since 0.11.3.0
+stripSuffix :: ShortByteString -> ShortByteString -> Maybe ShortByteString
+stripSuffix sbs1 = \sbs2 -> do
+  let l1 = length sbs1
+      l2 = length sbs2
+  if | isSuffixOf sbs1 sbs2 -> 
+         if null sbs1
+         then Just sbs2
+         else Just $! create (l2 - l1) $ \dst -> do
+                copyByteArray (asBA sbs2) 0 dst 0 (l2 - l1)
+     | otherwise -> Nothing
+
+-- | /O(n)/ The 'stripPrefix' function takes two ShortByteStrings and returns 'Just'
+-- the remainder of the second iff the first is its prefix, and otherwise
+-- 'Nothing'.
+--
+-- @since 0.11.3.0
+stripPrefix :: ShortByteString -> ShortByteString -> Maybe ShortByteString
+stripPrefix sbs1 = \sbs2 -> do
+  let l1 = length sbs1
+      l2 = length sbs2
+  if | isPrefixOf sbs1 sbs2 ->
+         if null sbs1
+         then Just sbs2
+         else Just $! create (l2 - l1) $ \dst -> do
+                copyByteArray (asBA sbs2) l1 dst 0 (l2 - l1)
+     | otherwise -> Nothing
+
+
+-- ---------------------------------------------------------------------
+-- Unfolds and replicates
+
+
+-- | /O(n)/ 'replicate' @n x@ is a ByteString of length @n@ with @x@
+-- the value of every element. The following holds:
+--
+-- > replicate w c = unfoldr w (\u -> Just (u,u)) c
+--
+-- @since 0.11.3.0
+replicate :: Int -> Word8 -> ShortByteString
+replicate w c
+    | w <= 0    = empty
+    | otherwise = create w (\mba -> setByteArray mba 0 w (fromIntegral c))
+
+
+-- | /O(n)/, where /n/ is the length of the result.  The 'unfoldr'
+-- function is analogous to the List \'unfoldr\'.  'unfoldr' builds a
+-- ShortByteString from a seed value.  The function takes the element and
+-- returns 'Nothing' if it is done producing the ShortByteString or returns
+-- 'Just' @(a,b)@, in which case, @a@ is the next byte in the string,
+-- and @b@ is the seed value for further production.
+--
+-- This function is not efficient/safe. It will build a list of @[Word8]@
+-- and run the generator until it returns `Nothing`, otherwise recurse infinitely,
+-- then finally create a 'ShortByteString'.
+--
+-- If you know the maximum length, consider using 'unfoldrN'.
+--
+-- Examples:
+--
+-- >    unfoldr (\x -> if x <= 5 then Just (x, x + 1) else Nothing) 0
+-- > == pack [0, 1, 2, 3, 4, 5]
+--
+-- @since 0.11.3.0
+unfoldr :: (a -> Maybe (Word8, a)) -> a -> ShortByteString
+unfoldr f = \x0 -> packBytesRev $ go x0 []
+ where
+   go x words' = case f x of
+                    Nothing      -> words'
+                    Just (w, x') -> go x' (w:words')
+
+-- | /O(n)/ Like 'unfoldr', 'unfoldrN' builds a ShortByteString from a seed
+-- value.  However, the length of the result is limited by the first
+-- argument to 'unfoldrN'.  This function is more efficient than 'unfoldr'
+-- when the maximum length of the result is known.
+--
+-- The following equation relates 'unfoldrN' and 'unfoldr':
+--
+-- > fst (unfoldrN n f s) == take n (unfoldr f s)
+--
+-- @since 0.11.3.0
+unfoldrN :: forall a. Int -> (a -> Maybe (Word8, a)) -> a -> (ShortByteString, Maybe a)
+unfoldrN i f = \x0 ->
+  if | i < 0     -> (empty, Just x0)
+     | otherwise -> createAndTrim i $ \mba -> go mba x0 0
+
+  where
+    go :: forall s. MBA s -> a -> Int -> ST s (Int, Maybe a)
+    go !mba !x !n = go' x n
+      where
+        go' :: a -> Int -> ST s (Int, Maybe a)
+        go' !x' !n'
+          | n' == i   = return (n', Just x')
+          | otherwise = case f x' of
+                          Nothing       -> return (n', Nothing)
+                          Just (w, x'') -> do
+                                             writeWord8Array mba n' w
+                                             go' x'' (n'+1)
+
+
+
+-- --------------------------------------------------------------------
+-- Predicates
+
+-- | Check whether one string is a substring of another.
+--
+-- @since 0.11.3.0
+isInfixOf :: ShortByteString -> ShortByteString -> Bool
+isInfixOf sbs = \s -> null sbs || not (null $ snd $ (GHC.Exts.inline breakSubstring) sbs s)
+
+-- |/O(n)/ The 'isPrefixOf' function takes two ShortByteStrings and returns 'True'
+--
+-- @since 0.11.3.0
+isPrefixOf :: ShortByteString -> ShortByteString -> Bool
+isPrefixOf sbs1 = \sbs2 -> do
+  let l1 = length sbs1
+      l2 = length sbs2
+  if | l1 == 0   -> True
+     | l2 < l1   -> False
+     | otherwise ->
+         let i = compareByteArraysOff (asBA sbs1) 0 (asBA sbs2) 0 l1
+         in i == 0
+
+-- | /O(n)/ The 'isSuffixOf' function takes two ShortByteStrings and returns 'True'
+-- iff the first is a suffix of the second.
+--
+-- The following holds:
+--
+-- > isSuffixOf x y == reverse x `isPrefixOf` reverse y
+--
+-- @since 0.11.3.0
+isSuffixOf :: ShortByteString -> ShortByteString -> Bool
+isSuffixOf sbs1 = \sbs2 -> do
+  let l1 = length sbs1
+      l2 = length sbs2
+  if | l1 == 0   -> True
+     | l2 < l1   -> False
+     | otherwise ->
+         let i = compareByteArraysOff (asBA sbs1) 0 (asBA sbs2) (l2 - l1) l1
+         in i == 0
+
+-- | Break a string on a substring, returning a pair of the part of the
+-- string prior to the match, and the rest of the string.
+--
+-- The following relationships hold:
+--
+-- > break (== c) l == breakSubstring (singleton c) l
+--
+-- For example, to tokenise a string, dropping delimiters:
+--
+-- > tokenise x y = h : if null t then [] else tokenise x (drop (length x) t)
+-- >     where (h,t) = breakSubstring x y
+--
+-- To skip to the first occurence of a string:
+--
+-- > snd (breakSubstring x y)
+--
+-- To take the parts of a string before a delimiter:
+--
+-- > fst (breakSubstring x y)
+--
+-- Note that calling `breakSubstring x` does some preprocessing work, so
+-- you should avoid unnecessarily duplicating breakSubstring calls with the same
+-- pattern.
+--
+-- @since 0.11.3.0
+breakSubstring :: ShortByteString -- ^ String to search for
+               -> ShortByteString -- ^ String to search in
+               -> (ShortByteString, ShortByteString) -- ^ Head and tail of string broken at substring
+breakSubstring pat =
+  case lp of
+    0 -> (empty,)
+    1 -> breakByte (head pat)
+    _ -> if lp * 8 <= finiteBitSize (0 :: Word)
+             then shift
+             else karpRabin
+  where
+    lp = length pat
+    karpRabin :: ShortByteString -> (ShortByteString, ShortByteString)
+    karpRabin src
+        | length src < lp = (src,empty)
+        | otherwise       = search (rollingHash $ take lp src) lp
+      where
+        k           = 2891336453 :: Word32
+        rollingHash = foldl' (\h b -> h * k + fromIntegral b) 0
+        hp          = rollingHash pat
+        m           = k ^ lp
+        get = fromIntegral . unsafeIndex src
+        search !hs !i
+            | hp == hs && pat == take lp b = u
+            | length src <= i              = (src, empty) -- not found
+            | otherwise                    = search hs' (i + 1)
+          where
+            u@(_, b) = splitAt (i - lp) src
+            hs' = hs * k +
+                  get i -
+                  m * get (i - lp)
+    {-# INLINE karpRabin #-}
+
+    shift :: ShortByteString -> (ShortByteString, ShortByteString)
+    shift !src
+        | length src < lp = (src, empty)
+        | otherwise       = search (intoWord $ take lp src) lp
+      where
+        intoWord :: ShortByteString -> Word
+        intoWord = foldl' (\w b -> (w `shiftL` 8) .|. fromIntegral b) 0
+
+        wp    = intoWord pat
+        mask' = (1 `shiftL` (8 * lp)) - 1
+        search !w !i
+            | w == wp         = splitAt (i - lp) src
+            | length src <= i = (src, empty)
+            | otherwise       = search w' (i + 1)
+          where
+            b  = fromIntegral (unsafeIndex src i)
+            w' = mask' .&. ((w `shiftL` 8) .|. b)
+    {-# INLINE shift #-}
+
+
+-- --------------------------------------------------------------------
+-- Searching ShortByteString
+
+-- | /O(n)/ 'elem' is the 'ShortByteString' membership predicate.
+--
+-- @since 0.11.3.0
+elem :: Word8 -> ShortByteString -> Bool
+elem c = \sbs -> case elemIndex c sbs of Nothing -> False ; _ -> True
+
+-- | /O(n)/ 'filter', applied to a predicate and a ByteString,
+-- returns a ByteString containing those characters that satisfy the
+-- predicate.
+--
+-- @since 0.11.3.0
+filter :: (Word8 -> Bool) -> ShortByteString -> ShortByteString
+filter k = \sbs -> let l = length sbs
+                   in if | l <= 0    -> sbs
+                         | otherwise -> createAndTrim' l $ \mba -> go mba (asBA sbs) l
+  where
+    go :: forall s. MBA s -- mutable output bytestring
+       -> BA              -- input bytestring
+       -> Int             -- length of input bytestring
+       -> ST s Int
+    go !mba ba !l = go' 0 0
+      where
+        go' :: Int -- bytes read
+            -> Int -- bytes written
+            -> ST s Int
+        go' !br !bw
+          | br >= l   = return bw
+          | otherwise = do
+              let w = indexWord8Array ba br
+              if k w
+              then do
+                writeWord8Array mba bw w
+                go' (br+1) (bw+1)
+              else
+                go' (br+1) bw
+
+-- | /O(n)/ The 'find' function takes a predicate and a ByteString,
+-- and returns the first element in matching the predicate, or 'Nothing'
+-- if there is no such element.
+--
+-- > find f p = case findIndex f p of Just n -> Just (p ! n) ; _ -> Nothing
+--
+-- @since 0.11.3.0
+find :: (Word8 -> Bool) -> ShortByteString -> Maybe Word8
+find f = \sbs -> case findIndex f sbs of
+                    Just n -> Just (sbs `index` n)
+                    _      -> Nothing
+
+-- | /O(n)/ The 'partition' function takes a predicate a ByteString and returns
+-- the pair of ByteStrings with elements which do and do not satisfy the
+-- predicate, respectively; i.e.,
+--
+-- > partition p bs == (filter p sbs, filter (not . p) sbs)
+--
+-- @since 0.11.3.0
+partition :: (Word8 -> Bool) -> ShortByteString -> (ShortByteString, ShortByteString)
+partition k = \sbs -> let l = length sbs
+                   in if | l <= 0    -> (sbs, sbs)
+                         | otherwise -> createAndTrim'' l $ \mba1 mba2 -> go mba1 mba2 (asBA sbs) l
+  where
+    go :: forall s.
+          MBA s           -- mutable output bytestring1
+       -> MBA s           -- mutable output bytestring2
+       -> BA              -- input bytestring
+       -> Int             -- length of input bytestring
+       -> ST s (Int, Int) -- (length mba1, length mba2)
+    go !mba1 !mba2 ba !l = go' 0 0
+      where
+        go' :: Int -- bytes read
+            -> Int -- bytes written to bytestring 1
+            -> ST s (Int, Int) -- (length mba1, length mba2)
+        go' !br !bw1
+          | br >= l   = return (bw1, br - bw1)
+          | otherwise = do
+              let w = indexWord8Array ba br
+              if k w
+              then do
+                writeWord8Array mba1 bw1 w
+                go' (br+1) (bw1+1)
+              else do
+                writeWord8Array mba2 (br - bw1) w
+                go' (br+1) bw1
+
+
+-- --------------------------------------------------------------------
+-- Indexing ShortByteString
+
+-- | /O(n)/ The 'elemIndex' function returns the index of the first
+-- element in the given 'ShortByteString' which is equal to the query
+-- element, or 'Nothing' if there is no such element.
+--
+-- @since 0.11.3.0
+elemIndex :: Word8 -> ShortByteString -> Maybe Int
+elemIndex c = \sbs@(SBS ba#) -> do
+    let l = length sbs
+    accursedUnutterablePerformIO $ do
+      !s <- c_elem_index ba# c (fromIntegral l)
+      return $! if s < 0 then Nothing else Just (fromIntegral s)
+
+
+-- | /O(n)/ The 'elemIndices' function extends 'elemIndex', by returning
+-- the indices of all elements equal to the query element, in ascending order.
+--
+-- @since 0.11.3.0
+elemIndices :: Word8 -> ShortByteString -> [Int]
+elemIndices k = findIndices (==k)
+
+-- | count returns the number of times its argument appears in the ShortByteString
+--
+-- @since 0.11.3.0
+count :: Word8 -> ShortByteString -> Int
+count w = \sbs@(SBS ba#) -> accursedUnutterablePerformIO $
+    fromIntegral <$> c_count ba# (fromIntegral $ length sbs) w
+
+-- | /O(n)/ The 'findIndex' function takes a predicate and a 'ShortByteString' and
+-- returns the index of the first element in the ByteString
+-- satisfying the predicate.
+--
+-- @since 0.11.3.0
+findIndex :: (Word8 -> Bool) -> ShortByteString -> Maybe Int
+findIndex k = \sbs ->
+  let l  = length sbs
+      ba = asBA sbs
+      w  = indexWord8Array ba
+      go !n | n >= l    = Nothing
+            | k (w n)   = Just n
+            | otherwise = go (n + 1)
+  in go 0
+
+
+-- | /O(n)/ The 'findIndices' function extends 'findIndex', by returning the
+-- indices of all elements satisfying the predicate, in ascending order.
+--
+-- @since 0.11.3.0
+findIndices :: (Word8 -> Bool) -> ShortByteString -> [Int]
+findIndices k = \sbs ->
+  let l  = length sbs
+      ba = asBA sbs
+      w  = indexWord8Array ba
+      go !n | n >= l    = []
+            | k (w n)   = n : go (n + 1)
+            | otherwise = go (n + 1)
+  in go 0
+
+
 
 ------------------------------------------------------------------------
 -- Exported low level operations
@@ -482,6 +1596,11 @@ indexCharArray (BA# ba#) (I# i#) = C# (indexCharArray# ba# i#)
 indexWord8Array :: BA -> Int -> Word8
 indexWord8Array (BA# ba#) (I# i#) = W8# (indexWord8Array# ba# i#)
 
+#if MIN_VERSION_base(4,12,0) && defined(SAFE_UNALIGNED)
+indexWord8ArrayAsWord64 :: BA -> Int -> Word64
+indexWord8ArrayAsWord64 (BA# ba#) (I# i#) = W64# (indexWord8ArrayAsWord64# ba# i#)
+#endif
+
 newByteArray :: Int -> ST s (MBA s)
 newByteArray (I# len#) =
     ST $ \s -> case newByteArray# len# s of
@@ -497,16 +1616,18 @@ unsafeFreezeByteArray (MBA# mba#) =
     ST $ \s -> case unsafeFreezeByteArray# mba# s of
                  (# s, ba# #) -> (# s, BA# ba# #)
 
-writeCharArray :: MBA s -> Int -> Char -> ST s ()
-writeCharArray (MBA# mba#) (I# i#) (C# c#) =
-  ST $ \s -> case writeCharArray# mba# i# c# s of
-               s -> (# s, () #)
-
 writeWord8Array :: MBA s -> Int -> Word8 -> ST s ()
 writeWord8Array (MBA# mba#) (I# i#) (W8# w#) =
   ST $ \s -> case writeWord8Array# mba# i# w# s of
                s -> (# s, () #)
 
+#if MIN_VERSION_base(4,12,0) && defined(SAFE_UNALIGNED)
+writeWord64Array :: MBA s -> Int -> Word64 -> ST s ()
+writeWord64Array (MBA# mba#) (I# i#) (W64# w#) =
+  ST $ \s -> case writeWord64Array# mba# i# w# s of
+               s -> (# s, () #)
+#endif
+
 copyAddrToByteArray :: Ptr a -> MBA RealWorld -> Int -> Int -> ST RealWorld ()
 copyAddrToByteArray (Ptr src#) (MBA# dst#) (I# dst_off#) (I# len#) =
     ST $ \s -> case copyAddrToByteArray# src# dst# dst_off# len# s of
@@ -522,23 +1643,54 @@ copyByteArray (BA# src#) (I# src_off#) (MBA# dst#) (I# dst_off#) (I# len#) =
     ST $ \s -> case copyByteArray# src# src_off# dst# dst_off# len# s of
                  s -> (# s, () #)
 
+setByteArray :: MBA s -> Int -> Int -> Int -> ST s ()
+setByteArray (MBA# dst#) (I# off#) (I# len#) (I# c#) =
+    ST $ \s -> case setByteArray# dst# off# len# c# s of
+                 s -> (# s, () #)
+
+copyMutableByteArray :: MBA s -> Int -> MBA s -> Int -> Int -> ST s ()
+copyMutableByteArray (MBA# src#) (I# src_off#) (MBA# dst#) (I# dst_off#) (I# len#) =
+    ST $ \s -> case copyMutableByteArray# src# src_off# dst# dst_off# len# s of
+                 s -> (# s, () #)
+
 
 ------------------------------------------------------------------------
 -- FFI imports
-
+--
 compareByteArrays :: BA -> BA -> Int -> Int
+compareByteArrays ba1 ba2 = compareByteArraysOff ba1 0 ba2 0
+
+compareByteArraysOff :: BA  -- ^ array 1
+                     -> Int -- ^ offset for array 1
+                     -> BA  -- ^ array 2
+                     -> Int -- ^ offset for array 2
+                     -> Int -- ^ length to compare
+                     -> Int -- ^ like memcmp
 #if MIN_VERSION_base(4,11,0)
-compareByteArrays (BA# ba1#) (BA# ba2#) (I# len#) =
-  I# (compareByteArrays#  ba1# 0# ba2# 0# len#)
+compareByteArraysOff (BA# ba1#) (I# ba1off#) (BA# ba2#) (I# ba2off#) (I# len#) =
+  I# (compareByteArrays#  ba1# ba1off# ba2# ba2off# len#)
 #else
-compareByteArrays (BA# ba1#) (BA# ba2#) len =
-  fromIntegral $ accursedUnutterablePerformIO $
-    c_memcmp_ByteArray ba1# ba2# (fromIntegral len)
-
-foreign import ccall unsafe "string.h memcmp"
-  c_memcmp_ByteArray :: ByteArray# -> ByteArray# -> CSize -> IO CInt
+compareByteArraysOff (BA# ba1#) ba1off (BA# ba2#) ba2off len =
+  assert (ba1off + len <= (I# (sizeofByteArray# ba1#)))
+  $ assert (ba2off + len <= (I# (sizeofByteArray# ba2#)))
+  $ fromIntegral $ accursedUnutterablePerformIO $
+    c_memcmp_ByteArray ba1#
+                       ba1off
+                       ba2#
+                       ba2off
+                       (fromIntegral len)
+  
+
+foreign import ccall unsafe "static sbs_memcmp_off"
+  c_memcmp_ByteArray :: ByteArray# -> Int -> ByteArray# -> Int -> CSize -> IO CInt
 #endif
 
+foreign import ccall unsafe "static sbs_elem_index"
+    c_elem_index :: ByteArray# -> Word8 -> CSize -> IO CPtrdiff
+
+foreign import ccall unsafe "static fpstring.h fps_count" c_count
+    :: ByteArray# -> CSize -> Word8 -> IO CSize
+
 
 ------------------------------------------------------------------------
 -- Primop replacements
@@ -591,12 +1743,12 @@ packCStringLen (_, len) =
 --
 -- @since 0.10.10.0
 useAsCString :: ShortByteString -> (CString -> IO a) -> IO a
-useAsCString bs action =
+useAsCString sbs action =
   allocaBytes (l+1) $ \buf -> do
-      copyToPtr bs 0 buf (fromIntegral l)
+      copyToPtr sbs 0 buf (fromIntegral l)
       pokeByteOff buf l (0::Word8)
       action buf
-  where l = length bs
+  where l = length sbs
 
 -- | /O(n) construction./ Use a @ShortByteString@ with a function requiring a @CStringLen@.
 -- As for @useAsCString@ this function makes a copy of the original @ShortByteString@.
@@ -604,11 +1756,11 @@ useAsCString bs action =
 --
 -- @since 0.10.10.0
 useAsCStringLen :: ShortByteString -> (CStringLen -> IO a) -> IO a
-useAsCStringLen bs action =
+useAsCStringLen sbs action =
   allocaBytes l $ \buf -> do
-      copyToPtr bs 0 buf (fromIntegral l)
+      copyToPtr sbs 0 buf (fromIntegral l)
       action (buf, l)
-  where l = length bs
+  where l = length sbs
 
 -- | /O(n)/ Check whether a 'ShortByteString' represents valid UTF-8.
 --
@@ -630,3 +1782,55 @@ moduleErrorIO fun msg = throwIO . userError $ moduleErrorMsg fun msg
 
 moduleErrorMsg :: String -> String -> String
 moduleErrorMsg fun msg = "Data.ByteString.Short." ++ fun ++ ':':' ':msg
+
+
+-- Find from the end of the string using predicate.
+--
+-- Return '0' if the predicate returns false for the entire ShortByteString.
+findFromEndUntil :: (Word8 -> Bool) -> ShortByteString -> Int
+findFromEndUntil k sbs = go (length sbs - 1)
+  where
+    ba = asBA sbs
+    go !n | n < 0                    = 0
+          | k (indexWord8Array ba n) = n + 1
+          | otherwise                = go (n - 1)
+
+findIndexOrLength :: (Word8 -> Bool) -> ShortByteString -> Int
+findIndexOrLength k sbs = go 0
+  where
+    l = length sbs
+    ba = asBA sbs
+    go !n | n >= l                   = l
+          | k (indexWord8Array ba n) = n
+          | otherwise                = go (n + 1)
+
+
+packBytesRev :: [Word8] -> ShortByteString
+packBytesRev cs = packLenBytesRev (List.length cs) cs
+
+packLenBytesRev :: Int -> [Word8] -> ShortByteString
+packLenBytesRev len ws0 =
+    create len (\mba -> go mba len ws0)
+  where
+    go :: MBA s -> Int -> [Word8] -> ST s ()
+    go !_   !_ []     = return ()
+    go !mba !i (w:ws) = do
+      writeWord8Array mba (i - 1) w
+      go mba (i - 1) ws
+
+
+breakByte :: Word8 -> ShortByteString -> (ShortByteString, ShortByteString)
+breakByte c sbs = case elemIndex c sbs of
+    Nothing -> (sbs, empty)
+    Just n  -> (take n sbs, drop n sbs)
+
+-- Common up near identical calls to `error' to reduce the number
+-- constant strings created when compiled:
+errorEmptySBS :: HasCallStack => String -> a
+errorEmptySBS fun = moduleError fun "empty ShortByteString"
+{-# NOINLINE errorEmptySBS #-}
+
+moduleError :: HasCallStack => String -> String -> a
+moduleError fun msg = error (moduleErrorMsg fun msg)
+{-# NOINLINE moduleError #-}
+
diff --git a/bench/BenchAll.hs b/bench/BenchAll.hs
index 06fab0487..81b36c1e1 100644
--- a/bench/BenchAll.hs
+++ b/bench/BenchAll.hs
@@ -44,6 +44,7 @@ import BenchCount
 import BenchCSV
 import BenchIndices
 import BenchReadInt
+import BenchShort
 
 ------------------------------------------------------------------------------
 -- Benchmark support
@@ -479,4 +480,5 @@ main = do
     , benchCSV
     , benchIndices
     , benchReadInt
+    , benchShort
     ]
diff --git a/bench/BenchShort.hs b/bench/BenchShort.hs
new file mode 100644
index 000000000..f6c37662e
--- /dev/null
+++ b/bench/BenchShort.hs
@@ -0,0 +1,235 @@
+{-# LANGUAGE BangPatterns        #-}
+{-# LANGUAGE PackageImports      #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE MagicHash           #-}
+{-# LANGUAGE OverloadedStrings   #-}
+
+module BenchShort (benchShort) where
+
+import           Data.Foldable                         (foldMap)
+import           Data.Maybe                            (listToMaybe)
+import           Data.Monoid
+import           Data.String
+import           Test.Tasty.Bench
+import           Prelude                               hiding (words)
+
+import           Data.ByteString.Short                 (ShortByteString)
+import qualified Data.ByteString.Short                 as S
+
+import           Data.ByteString.Builder
+import           Data.ByteString.Builder.Extra         (byteStringCopy,
+                                                        byteStringInsert,
+                                                        intHost)
+import           Data.ByteString.Builder.Internal      (ensureFree)
+import           Data.ByteString.Builder.Prim          (BoundedPrim, FixedPrim,
+                                                        (>$<))
+import qualified Data.ByteString.Builder.Prim          as P
+import qualified Data.ByteString.Builder.Prim.Internal as PI
+
+import           Foreign
+
+import System.Random
+
+
+
+------------------------------------------------------------------------------
+-- Benchmark
+------------------------------------------------------------------------------
+
+-- input data (NOINLINE to ensure memoization)
+----------------------------------------------
+
+-- | Few-enough repetitions to avoid making GC too expensive.
+nRepl :: Int
+nRepl = 10000
+
+{-# NOINLINE intData #-}
+intData :: [Int]
+intData = [1..nRepl]
+
+{-# NOINLINE byteStringData #-}
+byteStringData :: S.ShortByteString
+byteStringData = S.pack $ map fromIntegral intData
+
+{-# NOINLINE loremIpsum #-}
+loremIpsum :: S.ShortByteString
+loremIpsum = mconcat
+  [ "  Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor"
+  , "incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis"
+  , "nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat."
+  , "Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu"
+  , "fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in"
+  , "culpa qui officia deserunt mollit anim id est laborum."
+  ]
+
+-- benchmark wrappers
+---------------------
+
+{-# INLINE benchB' #-}
+benchB' :: String -> a -> (a -> ShortByteString) -> Benchmark
+benchB' name x b = bench name $ whnf (S.length . b) x
+
+
+-- We use this construction of just looping through @n,n-1,..,1@ to ensure that
+-- we measure the speed of the encoding and not the speed of generating the
+-- values to be encoded.
+{-# INLINE benchIntEncodingB #-}
+benchIntEncodingB :: Int              -- ^ Maximal 'Int' to write
+                  -> BoundedPrim Int  -- ^ 'BoundedPrim' to execute
+                  -> IO ()            -- ^ 'IO' action to benchmark
+benchIntEncodingB n0 w
+  | n0 <= 0   = return ()
+  | otherwise = do
+      fpbuf <- mallocForeignPtrBytes (n0 * PI.sizeBound w)
+      withForeignPtr fpbuf (loop n0) >> return ()
+  where
+    loop !n !op
+      | n <= 0    = return op
+      | otherwise = PI.runB w n op >>= loop (n - 1)
+
+
+-- Helpers
+-------------
+
+hashInt :: Int -> Int
+hashInt x = iterate step x !! 10
+  where
+    step a = e
+      where b = (a `xor` 61) `xor` (a `shiftR` 16)
+            c = b + (b `shiftL` 3)
+            d = c `xor` (c `shiftR` 4)
+            e = d * 0x27d4eb2d
+            f = e `xor` (e `shiftR` 15)
+
+w :: Int -> Word8
+w = fromIntegral
+
+hashWord8 :: Word8 -> Word8
+hashWord8 = fromIntegral . hashInt . fromIntegral
+
+foldInputs :: [S.ShortByteString]
+foldInputs = map (\k -> S.pack $ if k <= 6 then take (2 ^ k) [32..95] else concat (replicate (2 ^ (k - 6)) [32..95])) [0..16]
+
+largeTraversalInput :: S.ShortByteString
+largeTraversalInput = S.concat (replicate 10 byteStringData)
+
+smallTraversalInput :: S.ShortByteString
+smallTraversalInput = "The quick brown fox"
+
+zeroes :: S.ShortByteString
+zeroes = S.replicate 10000 0
+
+partitionStrict p = nf (S.partition p) . randomStrict $ mkStdGen 98423098
+  where randomStrict = fst . S.unfoldrN 10000 (Just . random)
+
+-- ASCII \n to ensure no typos
+nl :: Word8
+nl = 0xa
+{-# INLINE nl #-}
+
+-- non-inlined equality test
+nilEq :: Word8 -> Word8 -> Bool
+{-# NOINLINE nilEq #-}
+nilEq = (==)
+
+-- lines of 200 letters from a to e, followed by repeated letter f
+absurdlong :: S.ShortByteString
+absurdlong = S.replicate 200 0x61 <> S.singleton nl
+          <> S.replicate 200 0x62 <> S.singleton nl
+          <> S.replicate 200 0x63 <> S.singleton nl
+          <> S.replicate 200 0x64 <> S.singleton nl
+          <> S.replicate 200 0x65 <> S.singleton nl
+          <> S.replicate 999999 0x66
+
+bench_find_index_second :: ShortByteString -> Maybe Int
+bench_find_index_second bs =
+  let isNl = (== nl)
+   in case S.findIndex isNl bs of
+        Just !i -> S.findIndex isNl (S.drop (i+1) bs)
+        Nothing -> Nothing
+{-# INLINE bench_find_index_second #-}
+
+bench_elem_index_second :: ShortByteString -> Maybe Int
+bench_elem_index_second bs =
+    case S.elemIndex nl bs of
+        Just !i -> S.elemIndex nl (S.drop (i+1) bs)
+        Nothing -> Nothing
+{-# INLINE bench_elem_index_second #-}
+
+
+
+-- benchmarks
+-------------
+
+benchShort :: Benchmark
+benchShort = bgroup "ShortByteString"
+    [ bgroup "Small payload"
+      [ benchB' "mempty"        ()  (const mempty)
+      , benchB' "UTF-8 String (naive)" "hello world\0" fromString
+      , benchB' "String (naive)" "hello world!" fromString
+      ]
+    , bgroup "intercalate"
+      [ bench "intercalate (large)" $ whnf (S.intercalate $ " and also ") (replicate 300 "expression")
+      , bench "intercalate (small)" $ whnf (S.intercalate "&") (replicate 30 "foo")
+      , bench "intercalate (tiny)" $ whnf (S.intercalate "&") (["foo", "bar", "baz"])
+      ]
+    , bgroup "partition"
+      [
+        bgroup "strict"
+        [
+          bench "mostlyTrueFast"  $ partitionStrict (< (w 225))
+        , bench "mostlyFalseFast" $ partitionStrict (< (w 10))
+        , bench "balancedFast"    $ partitionStrict (< (w 128))
+
+        , bench "mostlyTrueSlow"  $ partitionStrict (\x -> hashWord8 x < w 225)
+        , bench "mostlyFalseSlow" $ partitionStrict (\x -> hashWord8 x < w 10)
+        , bench "balancedSlow"    $ partitionStrict (\x -> hashWord8 x < w 128)
+        ]
+      ]
+    , bgroup "folds"
+      [ bgroup "strict"
+        [ bgroup "foldl'" $ map (\s -> bench (show $ S.length s) $
+            nf (S.foldl' (\acc x -> acc + fromIntegral x) (0 :: Int)) s) foldInputs
+        , bgroup "foldr'" $ map (\s -> bench (show $ S.length s) $
+            nf (S.foldr' (\x acc -> fromIntegral x + acc) (0 :: Int)) s) foldInputs
+        , bgroup "foldr1'" $ map (\s -> bench (show $ S.length s) $
+            nf (S.foldr1' (\x  acc -> fromIntegral x + acc)) s) foldInputs
+        , bgroup "unfoldrN" $ map (\s -> bench (show $ S.length s) $
+            nf (S.unfoldrN (S.length s) (\a -> Just (a, a + 1))) 0) foldInputs
+        , bgroup "filter" $ map (\s -> bench (show $ S.length s) $
+            nf (S.filter odd) s) foldInputs
+        ]
+      ]
+    , bgroup "findIndexOrLength"
+      [ bench "takeWhile"      $ nf (S.takeWhile even) zeroes
+      , bench "dropWhile"      $ nf (S.dropWhile even) zeroes
+      , bench "break"          $ nf (S.break odd) zeroes
+      ]
+    , bgroup "findIndex_"
+      [ bench "findIndices"    $ nf (sum . S.findIndices (\x -> x ==  129 || x == 72)) byteStringData
+      , bench "find"           $ nf (S.find (>= 198)) byteStringData
+      ]
+    , bgroup "traversals"
+      [ bench "map (+1) large" $ nf (S.map (+ 1)) largeTraversalInput
+      , bench "map (+1) small" $ nf (S.map (+ 1)) smallTraversalInput
+      ]
+    , bgroup "ShortByteString strict first index" $
+        [ bench "FindIndices" $ nf (listToMaybe . S.findIndices (== nl)) absurdlong
+        , bench "ElemIndices" $ nf (listToMaybe . S.elemIndices     nl)  absurdlong
+        , bench "FindIndex"   $ nf (S.findIndex (== nl)) absurdlong
+        , bench "ElemIndex"   $ nf (S.elemIndex     nl)  absurdlong
+        ]
+    , bgroup "ShortByteString strict second index" $
+        [ bench "FindIndices" $ nf (listToMaybe . tail . S.findIndices (== nl)) absurdlong
+        , bench "ElemIndices" $ nf (listToMaybe . tail . S.elemIndices     nl)  absurdlong
+        , bench "FindIndex"   $ nf bench_find_index_second absurdlong
+        , bench "ElemIndex"   $ nf bench_elem_index_second absurdlong
+        ]
+    , bgroup "ShortByteString index equality inlining" $
+        [ bench "FindIndices/inlined"     $ nf (S.findIndices    (== nl)) absurdlong
+        , bench "FindIndices/non-inlined" $ nf (S.findIndices (nilEq nl)) absurdlong
+        , bench "FindIndex/inlined"       $ nf (S.findIndex      (== nl)) absurdlong
+        , bench "FindIndex/non-inlined"   $ nf (S.findIndex   (nilEq nl)) absurdlong
+        ]
+    ]
+
diff --git a/bytestring.cabal b/bytestring.cabal
index 262287626..eddbcff1a 100644
--- a/bytestring.cabal
+++ b/bytestring.cabal
@@ -1,5 +1,5 @@
 Name:                bytestring
-Version:             0.11.2.0
+Version:             0.11.3.0
 Synopsis:            Fast, compact, strict and lazy byte strings with a list interface
 Description:
     An efficient compact, immutable byte string type (both strict and lazy)
@@ -125,13 +125,15 @@ library
   
   c-sources:        cbits/fpstring.c
                     cbits/itoa.c
+                    cbits/shortbytestring.c
   
   if (arch(aarch64))
     c-sources:        cbits/aarch64/is-valid-utf8.c
   else
     c-sources:        cbits/is-valid-utf8.c
   
-  cc-options:        -std=c11
+  -- DNDEBUG disables asserts in cbits/
+  cc-options:        -std=c11 -DNDEBUG=1
  
   -- Required, due to the following issues:
   -- * https://gitlab.haskell.org/ghc/ghc/-/issues/20525#note_385580
@@ -158,6 +160,7 @@ test-suite bytestring-tests
                     Properties.ByteStringChar8
                     Properties.ByteStringLazy
                     Properties.ByteStringLazyChar8
+                    Properties.ShortByteString
                     QuickCheckUtils
   hs-source-dirs:   tests,
                     tests/builder
@@ -181,6 +184,7 @@ benchmark bytestring-bench
                     BenchCSV
                     BenchIndices
                     BenchReadInt
+                    BenchShort
   type:             exitcode-stdio-1.0
   hs-source-dirs:   bench
   default-language: Haskell2010
diff --git a/cbits/shortbytestring.c b/cbits/shortbytestring.c
new file mode 100644
index 000000000..3cadc94bc
--- /dev/null
+++ b/cbits/shortbytestring.c
@@ -0,0 +1,35 @@
+#include <assert.h>
+#include <stddef.h>
+#include <string.h>
+
+
+int
+sbs_memcmp_off(const void *s1,
+            size_t off1,
+            const void *s2,
+            size_t off2,
+            size_t n)
+{
+    const void *s1o = s1 + off1;
+    const void *s2o = s2 + off2;
+
+    int r = memcmp(s1o, s2o, n);
+
+    return r;
+}
+
+ptrdiff_t
+sbs_elem_index(const void *s,
+            int c,
+            size_t n)
+{
+    const void *so = memchr(s, c, n);
+
+    if (so) {
+        ptrdiff_t diff = so - s;
+        assert(diff >= 0);
+        return diff;
+    } else {
+        return -1;
+    }
+}
diff --git a/tests/Properties.hs b/tests/Properties.hs
index 5591680b8..0b3c29f96 100644
--- a/tests/Properties.hs
+++ b/tests/Properties.hs
@@ -61,6 +61,7 @@ import QuickCheckUtils
 import Test.Tasty
 import Test.Tasty.QuickCheck
 
+import qualified Properties.ShortByteString as PropSBS
 import qualified Properties.ByteString as PropBS
 import qualified Properties.ByteStringChar8 as PropBS8
 import qualified Properties.ByteStringLazy as PropBL
@@ -551,14 +552,15 @@ explosiveTail = (`L.append` error "Tail of this byte string is undefined!")
 
 testSuite :: TestTree
 testSuite = testGroup "Properties"
-  [ testGroup "StrictWord8" PropBS.tests
-  , testGroup "StrictChar8" PropBS8.tests
-  , testGroup "LazyWord8"   PropBL.tests
-  , testGroup "LazyChar8"   PropBL8.tests
-  , testGroup "Misc"        misc_tests
-  , testGroup "IO"          io_tests
-  , testGroup "Short"       short_tests
-  , testGroup "Strictness"  strictness_checks
+  [ testGroup "ShortByteString" PropSBS.tests
+  , testGroup "StrictWord8"     PropBS.tests
+  , testGroup "StrictChar8"     PropBS8.tests
+  , testGroup "LazyWord8"       PropBL.tests
+  , testGroup "LazyChar8"       PropBL8.tests
+  , testGroup "Misc"            misc_tests
+  , testGroup "IO"              io_tests
+  , testGroup "Short"           short_tests
+  , testGroup "Strictness"      strictness_checks
   ]
 
 io_tests =
diff --git a/tests/Properties/ByteString.hs b/tests/Properties/ByteString.hs
index a07c62bbc..757059701 100644
--- a/tests/Properties/ByteString.hs
+++ b/tests/Properties/ByteString.hs
@@ -29,12 +29,19 @@
 -- Properties.ByteString{Char8,Lazy,LazyChar8}, which include this file.
 #ifndef BYTESTRING_CHAR8
 
-#ifndef BYTESTRING_LAZY
+#if defined(BYTESTRING_SHORT)
+module Properties.ShortByteString (tests) where
+import qualified Data.ByteString.Short as B
+import qualified Data.ByteString.Short.Internal as B
+#define BYTESTRING_TYPE B.ShortByteString
+#elif !(defined BYTESTRING_LAZY)
 module Properties.ByteString (tests) where
+#define BYTESTRING_TYPE B.ByteString
 import qualified Data.ByteString as B
 import GHC.IO.Encoding
 #else
 module Properties.ByteStringLazy (tests) where
+#define BYTESTRING_TYPE B.ByteString
 import qualified Data.ByteString.Lazy as B
 import qualified Data.ByteString.Lazy.Internal as B (invariant)
 #endif
@@ -44,10 +51,12 @@ import qualified Data.ByteString.Lazy.Internal as B (invariant)
 #ifndef BYTESTRING_LAZY
 module Properties.ByteStringChar8 (tests) where
 import qualified Data.ByteString.Char8 as B
+#define BYTESTRING_TYPE B.ByteString
 #else
 module Properties.ByteStringLazyChar8 (tests) where
 import qualified Data.ByteString.Lazy.Char8 as B
 import qualified Data.ByteString.Lazy.Internal as B (invariant)
+#define BYTESTRING_TYPE B.ByteString
 #endif
 
 import Data.Int
@@ -77,7 +86,7 @@ toElem :: Char8 -> Char
 toElem (Char8 c) = c
 
 class (Integral a, Show a) => RdInt a where
-    bread :: B.ByteString -> Maybe (a, B.ByteString)
+    bread :: BYTESTRING_TYPE -> Maybe (a, BYTESTRING_TYPE)
     sread :: String -> Maybe (a, String)
 
 instance RdInt Int    where { bread = B.readInt;     sread = readInt }
@@ -120,13 +129,14 @@ tests =
   , testProperty "unpack . pack" $
     \(map toElem -> xs) -> xs === B.unpack (B.pack xs)
   , testProperty "read . show" $
-    \x -> (x :: B.ByteString) === read (show x)
+    \x -> (x :: BYTESTRING_TYPE) === read (show x)
+#ifndef BYTESTRING_SHORT
   , testProperty "fromStrict . toStrict" $
     \x -> B.fromStrict (B.toStrict x) === x
   , testProperty "toStrict . fromStrict" $
     \x -> B.toStrict (B.fromStrict x) === x
-#ifndef BYTESTRING_LAZY
-#ifndef BYTESTRING_CHAR8
+#endif
+#if !defined(BYTESTRING_LAZY) && !defined(BYTESTRING_CHAR8) && !defined(BYTESTRING_SHORT)
   , testProperty "toFilePath >>= fromFilePath" $
     \x -> ioProperty $ do
       r <- B.toFilePath x >>= B.fromFilePath
@@ -141,24 +151,25 @@ tests =
     pure $ case textEncodingName enc of
       "ASCII" -> property (prop . getASCIIString)
       _       -> property prop
-#endif
 #endif
 
   , testProperty "==" $
     \x y -> (x == y) === (B.unpack x == B.unpack y)
   , testProperty "== refl" $
-    \x -> (x :: B.ByteString) == x
+    \x -> (x :: BYTESTRING_TYPE) == x
   , testProperty "== symm" $
-    \x y -> ((x :: B.ByteString) == y) === (y == x)
+    \x y -> ((x :: BYTESTRING_TYPE) == y) === (y == x)
   , testProperty "== pack unpack" $
     \x -> x == B.pack (B.unpack x)
+#ifndef BYTESTRING_SHORT
   , testProperty "== copy" $
     \x -> x == B.copy x
+#endif
 
   , testProperty "compare" $
     \x y -> compare x y === compare (B.unpack x) (B.unpack y)
   , testProperty "compare EQ" $
-    \x -> compare (x :: B.ByteString) x == EQ
+    \x -> compare (x :: BYTESTRING_TYPE) x == EQ
   , testProperty "compare GT" $
     \x (toElem -> c) -> compare (B.snoc x c) x == GT
   , testProperty "compare LT" $
@@ -204,6 +215,7 @@ tests =
     \x -> B.null x === null (B.unpack x)
   , testProperty "reverse" $
     \x -> B.unpack (B.reverse x) === reverse (B.unpack x)
+#ifndef BYTESTRING_SHORT
   , testProperty "transpose" $
     \xs -> map B.unpack (B.transpose xs) === List.transpose (map B.unpack xs)
   , testProperty "group" $
@@ -218,6 +230,7 @@ tests =
     \x -> map B.unpack (B.inits x) === List.inits (B.unpack x)
   , testProperty "tails" $
     \x -> map B.unpack (B.tails x) === List.tails (B.unpack x)
+#endif
   , testProperty "all" $
     \f x -> B.all f x === all f (B.unpack x)
   , testProperty "all ==" $
@@ -232,8 +245,10 @@ tests =
     \x y -> B.unpack (mappend x y) === B.unpack x `mappend` B.unpack y
   , testProperty "<>" $
     \x y -> B.unpack (x <> y) === B.unpack x <> B.unpack y
+#ifndef BYTESTRING_SHORT
   , testProperty "stimes" $
-    \(Sqrt (NonNegative n)) (Sqrt x) -> stimes (n :: Int) (x :: B.ByteString) === mtimesDefault n x
+    \(Sqrt (NonNegative n)) (Sqrt x) -> stimes (n :: Int) (x :: BYTESTRING_TYPE) === mtimesDefault n x
+#endif
 
   , testProperty "break" $
     \f x -> (B.unpack *** B.unpack) (B.break f x) === break f (B.unpack x)
@@ -261,10 +276,12 @@ tests =
     \x -> (B.unpack *** B.unpack) (B.break isSpace x) === break isSpace (B.unpack x)
 #endif
 
+#ifndef BYTESTRING_SHORT
   , testProperty "concatMap" $
     \f x -> B.unpack (B.concatMap f x) === concatMap (B.unpack . f) (B.unpack x)
   , testProperty "concatMap singleton" $
     \x -> B.unpack (B.concatMap B.singleton x) === concatMap (: []) (B.unpack x)
+#endif
 
   , testProperty "singleton" $
     \(toElem -> c) -> B.unpack (B.singleton c) === [c]
@@ -381,8 +398,10 @@ tests =
     \f x -> B.find f x === find f (B.unpack x)
   , testProperty "findIndex" $
     \f x -> B.findIndex f x === fmap fromIntegral (List.findIndex f (B.unpack x))
+#ifndef BYTESTRING_SHORT
   , testProperty "findIndexEnd" $
     \f x -> B.findIndexEnd f x === fmap fromIntegral (findIndexEnd f (B.unpack x))
+#endif
   , testProperty "findIndices" $
     \f x -> B.findIndices f x === fmap fromIntegral (List.findIndices f (B.unpack x))
   , testProperty "findIndices ==" $
@@ -390,12 +409,16 @@ tests =
 
   , testProperty "elem" $
     \(toElem -> c) x -> B.elem c x === elem c (B.unpack x)
+#ifndef BYTESTRING_SHORT
   , testProperty "notElem" $
     \(toElem -> c) x -> B.notElem c x === notElem c (B.unpack x)
+#endif
   , testProperty "elemIndex" $
     \(toElem -> c) x -> B.elemIndex c x === fmap fromIntegral (List.elemIndex c (B.unpack x))
+#ifndef BYTESTRING_SHORT
   , testProperty "elemIndexEnd" $
     \(toElem -> c) x -> B.elemIndexEnd c x === fmap fromIntegral (elemIndexEnd c (B.unpack x))
+#endif
   , testProperty "elemIndices" $
     \(toElem -> c) x -> B.elemIndices c x === fmap fromIntegral (List.elemIndices c (B.unpack x))
 
@@ -462,10 +485,12 @@ tests =
     \x -> not (B.null x) ==> B.unpack (B.init x) === init (B.unpack x)
   , testProperty "init length" $
     \x -> not (B.null x) ==> B.length x === 1 + B.length (B.init x)
+#ifndef BYTESTRING_SHORT
   , testProperty "maximum" $
     \x -> not (B.null x) ==> B.maximum x === maximum (B.unpack x)
   , testProperty "minimum" $
     \x -> not (B.null x) ==> B.minimum x === minimum (B.unpack x)
+#endif
 
   , testProperty "foldl" $
     \f (toElem -> c) x -> B.foldl ((toElem .) . f) c x === foldl ((toElem .) . f) c (B.unpack x)
@@ -509,6 +534,7 @@ tests =
   , testProperty "foldr1 max" $
     \x -> not (B.null x) ==> B.foldr1 max x === B.foldr max minBound x
 
+#ifndef BYTESTRING_SHORT
   , testProperty "scanl" $
     \f (toElem -> c) x -> B.unpack (B.scanl ((toElem .) . f) c x) === scanl ((toElem .) . f) c (B.unpack x)
   , testProperty "scanl foldl" $
@@ -524,14 +550,17 @@ tests =
     \f x -> B.unpack (B.scanr1 ((toElem .) . f) x) === scanr1 ((toElem .) . f) (B.unpack x)
   , testProperty "scanr1 empty" $
     \f -> B.scanr1 f B.empty === B.empty
+#endif
 
-#ifndef BYTESTRING_LAZY
+#if !defined(BYTESTRING_LAZY) && !defined(BYTESTRING_SHORT)
   , testProperty "sort" $
     \x -> B.unpack (B.sort x) === List.sort (B.unpack x)
 #endif
 
+#ifndef BYTESTRING_SHORT
   , testProperty "intersperse" $
     \(toElem -> c) x -> B.unpack (B.intersperse c x) === List.intersperse c (B.unpack x)
+#endif
   , testProperty "intercalate" $
     \(Sqrt x) (Sqrt ys) -> B.unpack (B.intercalate x ys) === List.intercalate (B.unpack x) (map B.unpack ys)
   , testProperty "intercalate 'c' [x,y]" $
@@ -539,6 +568,7 @@ tests =
   , testProperty "intercalate split" $
     \(toElem -> c) x -> B.intercalate (B.singleton c) (B.split c x) === x
 
+#ifndef BYTESTRING_SHORT
   , testProperty "mapAccumL" $
     \f (toElem -> c) x -> second B.unpack (B.mapAccumL ((second toElem .) . f) c x) ===
       List.mapAccumL ((second toElem .) . f) c (B.unpack x)
@@ -554,6 +584,7 @@ tests =
     \f x y -> B.unpack (B.packZipWith ((toElem .) . f) x y) === zipWith ((toElem .) . f) (B.unpack x) (B.unpack y)
   , testProperty "unzip" $
     \(fmap (toElem *** toElem) -> xs) -> (B.unpack *** B.unpack) (B.unzip xs) === unzip xs
+#endif
 
   , testProperty "index" $
     \(NonNegative n) x -> fromIntegral n < B.length x ==> B.index x (fromIntegral n) === B.unpack x !! n
@@ -631,11 +662,13 @@ unsnoc :: [a] -> Maybe ([a], a)
 unsnoc [] = Nothing
 unsnoc xs = Just (init xs, last xs)
 
+#ifndef BYTESTRING_SHORT
 findIndexEnd :: (a -> Bool) -> [a] -> Maybe Int
 findIndexEnd f xs = fmap (\n -> length xs - 1 - n) (List.findIndex f (reverse xs))
 
 elemIndexEnd :: Eq a => a -> [a] -> Maybe Int
 elemIndexEnd c xs = fmap (\n -> length xs - 1 - n) (List.elemIndex c (reverse xs))
+#endif
 
 stripSuffix :: Eq a => [a] -> [a] -> Maybe [a]
 stripSuffix x y = fmap reverse (List.stripPrefix (reverse x) (reverse y))
diff --git a/tests/Properties/ShortByteString.hs b/tests/Properties/ShortByteString.hs
new file mode 100644
index 000000000..a0d9154ef
--- /dev/null
+++ b/tests/Properties/ShortByteString.hs
@@ -0,0 +1,5 @@
+{-# LANGUAGE CPP #-}
+
+#define BYTESTRING_SHORT
+
+#include "ByteString.hs"
diff --git a/tests/QuickCheckUtils.hs b/tests/QuickCheckUtils.hs
index ea2f3db3d..631f78ec9 100644
--- a/tests/QuickCheckUtils.hs
+++ b/tests/QuickCheckUtils.hs
@@ -19,6 +19,7 @@ import Data.Int
 import System.IO
 import Foreign.C (CChar)
 
+import qualified Data.ByteString.Short as SB
 import qualified Data.ByteString      as P
 import qualified Data.ByteString.Lazy as L
 import qualified Data.ByteString.Lazy.Internal as L (checkInvariant,ByteString(..))
@@ -101,3 +102,16 @@ instance Arbitrary a => Arbitrary (Sqrt a) where
   arbitrary = Sqrt <$> sized
     (\n -> resize (round @Double $ sqrt $ fromIntegral @Int n) arbitrary)
   shrink = map Sqrt . shrink . unSqrt
+
+
+sizedShortByteString :: Int -> Gen SB.ShortByteString
+sizedShortByteString n = do m <- choose(0, n)
+                            fmap SB.pack $ vectorOf m arbitrary
+
+instance Arbitrary SB.ShortByteString where
+  arbitrary = sized sizedShortByteString
+  shrink = map SB.pack . shrink . SB.unpack
+
+instance CoArbitrary SB.ShortByteString where
+  coarbitrary s = coarbitrary (SB.unpack s)
+