diff --git a/Data/ByteString.hs b/Data/ByteString.hs
index bcab21170..dfb015d89 100644
--- a/Data/ByteString.hs
+++ b/Data/ByteString.hs
@@ -230,7 +230,7 @@ import Prelude hiding           (reverse,head,tail,last,init,null
 
 import Data.Bits                (finiteBitSize, shiftL, (.|.), (.&.))
 
-import Data.ByteString.Internal
+import Data.ByteString.Internal.Type
 import Data.ByteString.Lazy.Internal (fromStrict, toStrict)
 import Data.ByteString.Unsafe
 
@@ -267,7 +267,6 @@ import GHC.IO                   (unsafePerformIO, unsafeDupablePerformIO)
 import GHC.Foreign              (newCStringLen, peekCStringLen)
 import GHC.Stack.Types          (HasCallStack)
 import Data.Char                (ord)
-import Foreign.Marshal.Utils    (copyBytes)
 
 import GHC.Base                 (build)
 import GHC.Word hiding (Word8)
@@ -375,16 +374,16 @@ infixl 5 `snoc`
 -- | /O(n)/ 'cons' is analogous to (:) for lists, but of different
 -- complexity, as it requires making a copy.
 cons :: Word8 -> ByteString -> ByteString
-cons c (BS x l) = unsafeCreate (l+1) $ \p -> unsafeWithForeignPtr x $ \f -> do
-        poke p c
-        memcpy (p `plusPtr` 1) f l
+cons c (BS x l) = unsafeCreateFp (l+1) $ \p -> do
+        pokeFp p c
+        memcpyFp (p `plusForeignPtr` 1) x l
 {-# INLINE cons #-}
 
 -- | /O(n)/ Append a byte to the end of a 'ByteString'
 snoc :: ByteString -> Word8 -> ByteString
-snoc (BS x l) c = unsafeCreate (l+1) $ \p -> unsafeWithForeignPtr x $ \f -> do
-        memcpy p f l
-        poke (p `plusPtr` l) c
+snoc (BS x l) c = unsafeCreateFp (l+1) $ \p -> do
+        memcpyFp p x l
+        pokeFp (p `plusForeignPtr` l) c
 {-# INLINE snoc #-}
 
 -- | /O(1)/ Extract the first element of a ByteString, which must be non-empty.
@@ -459,8 +458,7 @@ append = mappend
 -- | /O(n)/ 'map' @f xs@ is the ByteString obtained by applying @f@ to each
 -- element of @xs@.
 map :: (Word8 -> Word8) -> ByteString -> ByteString
-map f (BS fp len) = unsafeDupablePerformIO $ unsafeWithForeignPtr fp $ \srcPtr ->
-    create len $ \dstPtr -> m srcPtr dstPtr
+map f (BS srcPtr len) = unsafeCreateFp len $ \dstPtr -> m srcPtr dstPtr
   where
     m !p1 !p2 = map_ 0
       where
@@ -468,15 +466,17 @@ map f (BS fp len) = unsafeDupablePerformIO $ unsafeWithForeignPtr fp $ \srcPtr -
       map_ !n
          | n >= len = return ()
          | otherwise = do
-              x <- peekByteOff p1 n
-              pokeByteOff p2 n (f x)
+              x <- peekFpByteOff p1 n
+              pokeFpByteOff p2 n (f x)
               map_ (n+1)
 {-# INLINE map #-}
 
 -- | /O(n)/ 'reverse' @xs@ efficiently returns the elements of @xs@ in reverse order.
 reverse :: ByteString -> ByteString
-reverse (BS x l) = unsafeCreate l $ \p -> unsafeWithForeignPtr x $ \f ->
-        c_reverse p f (fromIntegral l)
+reverse (BS x l) = unsafeCreateFp l $ \fp ->
+  unsafeWithForeignPtr fp $ \p ->
+    unsafeWithForeignPtr x  $ \f ->
+      c_reverse p f (fromIntegral l)
 
 -- | /O(n)/ The 'intersperse' function takes a 'Word8' and a
 -- 'ByteString' and \`intersperses\' that byte between the elements of
@@ -485,8 +485,10 @@ reverse (BS x l) = unsafeCreate l $ \p -> unsafeWithForeignPtr x $ \f ->
 intersperse :: Word8 -> ByteString -> ByteString
 intersperse c ps@(BS x l)
     | length ps < 2  = ps
-    | otherwise      = unsafeCreate (2*l-1) $ \p -> unsafeWithForeignPtr x $ \f ->
-        c_intersperse p f (fromIntegral l) c
+    | otherwise      = unsafeCreateFp (2*l-1) $ \fp ->
+      unsafeWithForeignPtr fp $ \p ->
+        unsafeWithForeignPtr x $ \f ->
+          c_intersperse p f (fromIntegral l) c
 
 -- | The 'transpose' function transposes the rows and columns of its
 -- 'ByteString' argument.
@@ -533,13 +535,13 @@ foldl' f v = \(BS fp len) ->
   let
     g ptr = go v ptr
       where
-        end  = ptr `plusPtr` len
+        end  = ptr `plusForeignPtr` len
         -- tail recursive; traverses array left to right
         go !z !p | p == end  = return z
-                 | otherwise = do x <- peek p
-                                  go (f z x) (p `plusPtr` 1)
+                 | otherwise = do x <- peekFp p
+                                  go (f z x) (p `plusForeignPtr` 1)
   in
-    accursedUnutterablePerformIO $ unsafeWithForeignPtr fp g
+    accursedUnutterablePerformIO $ g fp
 {-# INLINE foldl' #-}
 
 -- | 'foldr', applied to a binary operator, a starting value
@@ -567,15 +569,15 @@ foldr' :: (Word8 -> a -> a) -> a -> ByteString -> a
 foldr' k v = \(BS fp len) ->
           -- see fold inlining
   let
-    g ptr = go v (end `plusPtr` len)
+    g ptr = go v (end `plusForeignPtr` len)
       where
-        end = ptr `plusPtr` (-1)
+        end = ptr `plusForeignPtr` (-1)
         -- tail recursive; traverses array right to left
         go !z !p | p == end  = return z
-                 | otherwise = do x <- peek p
-                                  go (k x z) (p `plusPtr` (-1))
+                 | otherwise = do x <- peekFp p
+                                  go (k x z) (p `plusForeignPtr` (-1))
   in
-    accursedUnutterablePerformIO $ unsafeWithForeignPtr fp g
+    accursedUnutterablePerformIO $ g fp
 
 {-# INLINE foldr' #-}
 
@@ -630,15 +632,15 @@ concatMap f = concat . foldr ((:) . f) []
 -- any element of the 'ByteString' satisfies the predicate.
 any :: (Word8 -> Bool) -> ByteString -> Bool
 any _ (BS _ 0)   = False
-any f (BS x len) = accursedUnutterablePerformIO $ unsafeWithForeignPtr x g
+any f (BS x len) = accursedUnutterablePerformIO $ g x
   where
     g ptr = go ptr
       where
-        end = ptr `plusPtr` len
+        end = ptr `plusForeignPtr` len
         go !p | p == end  = return False
-              | otherwise = do c <- peek p
+              | otherwise = do c <- peekFp p
                                if f c then return True
-                                      else go (p `plusPtr` 1)
+                                      else go (p `plusForeignPtr` 1)
 {-# INLINE [1] any #-}
 
 {-# RULES
@@ -659,15 +661,15 @@ anyByte c (BS x l) = accursedUnutterablePerformIO $ unsafeWithForeignPtr x $ \p
 -- if all elements of the 'ByteString' satisfy the predicate.
 all :: (Word8 -> Bool) -> ByteString -> Bool
 all _ (BS _ 0)   = True
-all f (BS x len) = accursedUnutterablePerformIO $ unsafeWithForeignPtr x g
+all f (BS x len) = accursedUnutterablePerformIO $ g x
   where
     g ptr = go ptr
       where
-        end = ptr `plusPtr` len
+        end = ptr `plusForeignPtr` len
         go !p | p == end  = return True  -- end of list
-              | otherwise = do c <- peek p
+              | otherwise = do c <- peekFp p
                                if f c
-                                  then go (p `plusPtr` 1)
+                                  then go (p `plusForeignPtr` 1)
                                   else return False
 {-# INLINE [1] all #-}
 
@@ -705,7 +707,7 @@ minimum xs@(BS x l)
 -- passing an accumulating parameter from left to right, and returning a
 -- final value of this accumulator together with the new ByteString.
 mapAccumL :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
-mapAccumL f acc = \(BS fp len) -> unsafeDupablePerformIO $ unsafeWithForeignPtr fp $ \a -> do
+mapAccumL f acc = \(BS a len) -> unsafeDupablePerformIO $ do
                -- see fold inlining
     gp   <- mallocByteString len
     let
@@ -714,11 +716,11 @@ mapAccumL f acc = \(BS fp len) -> unsafeDupablePerformIO $ unsafeWithForeignPtr
           mapAccumL_ !s !n
              | n >= len = return s
              | otherwise = do
-                  x <- peekByteOff src n
+                  x <- peekFpByteOff src n
                   let (s', y) = f s x
-                  pokeByteOff dst n y
+                  pokeFpByteOff dst n y
                   mapAccumL_ s' (n+1)
-    acc' <- unsafeWithForeignPtr gp (go a)
+    acc' <- go a gp
     return (acc', BS gp len)
 {-# INLINE mapAccumL #-}
 
@@ -727,7 +729,7 @@ mapAccumL f acc = \(BS fp len) -> unsafeDupablePerformIO $ unsafeWithForeignPtr
 -- passing an accumulating parameter from right to left, and returning a
 -- final value of this accumulator together with the new ByteString.
 mapAccumR :: (acc -> Word8 -> (acc, Word8)) -> acc -> ByteString -> (acc, ByteString)
-mapAccumR f acc = \(BS fp len) -> unsafeDupablePerformIO $ unsafeWithForeignPtr fp $ \a -> do
+mapAccumR f acc = \(BS a len) -> unsafeDupablePerformIO $ do
                -- see fold inlining
     gp   <- mallocByteString len
     let
@@ -735,11 +737,11 @@ mapAccumR f acc = \(BS fp len) -> unsafeDupablePerformIO $ unsafeWithForeignPtr
         where
           mapAccumR_ !s (-1) = return s
           mapAccumR_ !s !n   = do
-              x  <- peekByteOff src n
+              x  <- peekFpByteOff src n
               let (s', y) = f s x
-              pokeByteOff dst n y
+              pokeFpByteOff dst n y
               mapAccumR_ s' (n-1)
-    acc' <- unsafeWithForeignPtr gp (go a)
+    acc' <- go a gp
     return (acc', BS gp len)
 {-# INLINE mapAccumR #-}
 
@@ -765,21 +767,20 @@ scanl
     -- ^ input of length n
     -> ByteString
     -- ^ output of length n+1
-scanl f v = \(BS fp len) -> unsafeDupablePerformIO $ unsafeWithForeignPtr fp $ \a ->
+scanl f v = \(BS a len) -> unsafeCreateFp (len+1) $ \q -> do
          -- see fold inlining
-    create (len+1) $ \q -> do
-        poke q v
+        pokeFp q v
         let
           go src dst = scanl_ v 0
             where
               scanl_ !z !n
                   | n >= len  = return ()
                   | otherwise = do
-                      x <- peekByteOff src n
+                      x <- peekFpByteOff src n
                       let z' = f z x
-                      pokeByteOff dst n z'
+                      pokeFpByteOff dst n z'
                       scanl_ z' (n+1)
-        go a (q `plusPtr` 1)
+        go a (q `plusForeignPtr` 1)
 {-# INLINE scanl #-}
 
 -- | 'scanl1' is a variant of 'scanl' that has no starting value argument.
@@ -810,19 +811,18 @@ scanr
     -- ^ input of length n
     -> ByteString
     -- ^ output of length n+1
-scanr f v = \(BS fp len) -> unsafeDupablePerformIO $ unsafeWithForeignPtr fp $ \a ->
+scanr f v = \(BS a len) -> unsafeCreateFp (len+1) $ \b -> do
          -- see fold inlining
-    create (len+1) $ \b -> do
-        poke (b `plusPtr` len) v
+        pokeFpByteOff b len v
         let
           go p q = scanr_ v (len-1)
             where
               scanr_ !z !n
                   | n < 0     = return ()
                   | otherwise = do
-                      x <- peekByteOff p n
+                      x <- peekFpByteOff p n
                       let z' = f x z
-                      pokeByteOff q n z'
+                      pokeFpByteOff q n z'
                       scanr_ z' (n-1)
         go a b
 {-# INLINE scanr #-}
@@ -846,7 +846,8 @@ scanr1 f ps = case unsnoc ps of
 replicate :: Int -> Word8 -> ByteString
 replicate w c
     | w <= 0    = empty
-    | otherwise = unsafeCreate w $ \ptr ->
+    | otherwise = unsafeCreateFp w $ \fptr ->
+        unsafeWithForeignPtr fptr $ \ptr ->
                       void $ memset ptr c (fromIntegral w)
 {-# INLINE replicate #-}
 
@@ -882,7 +883,7 @@ unfoldr f = concat . unfoldChunk 32 64
 unfoldrN :: Int -> (a -> Maybe (Word8, a)) -> a -> (ByteString, Maybe a)
 unfoldrN i f x0
     | i < 0     = (empty, Just x0)
-    | otherwise = unsafePerformIO $ createAndTrim' i $ \p -> go p x0 0
+    | otherwise = unsafePerformIO $ createFpAndTrim' i $ \p -> go p x0 0
   where
     go !p !x !n = go' x n
       where
@@ -890,7 +891,7 @@ unfoldrN i f x0
           | n' == i    = return (0, n', Just x')
           | otherwise = case f x' of
                           Nothing      -> return (0, n', Nothing)
-                          Just (w,x'') -> do pokeByteOff p n' w
+                          Just (w,x'') -> do pokeFpByteOff p n' w
                                              go' x'' (n'+1)
 {-# INLINE unfoldrN #-}
 
@@ -1130,10 +1131,9 @@ splitWith _ (BS _  0) = []
 splitWith predicate (BS fp len) = splitWith0 0 len fp
   where splitWith0 !off' !len' !fp' =
           accursedUnutterablePerformIO $
-            unsafeWithForeignPtr fp $ \p ->
-              splitLoop p 0 off' len' fp'
+              splitLoop fp 0 off' len' fp'
 
-        splitLoop :: Ptr Word8
+        splitLoop :: ForeignPtr Word8
                   -> Int -> Int -> Int
                   -> ForeignPtr Word8
                   -> IO [ByteString]
@@ -1142,7 +1142,7 @@ splitWith predicate (BS fp len) = splitWith0 0 len fp
             go idx'
                 | idx' >= len'  = return [BS (plusForeignPtr fp' off') idx']
                 | otherwise = do
-                    w <- peekElemOff p (off'+idx')
+                    w <- peekFpByteOff p (off'+idx')
                     if predicate w
                        then return (BS (plusForeignPtr fp' off') idx' :
                                   splitWith0 (off'+idx'+1) (len'-idx'-1) fp')
@@ -1213,19 +1213,16 @@ groupBy k xs = case uncons xs of
 intercalate :: ByteString -> [ByteString] -> ByteString
 intercalate _ [] = mempty
 intercalate _ [x] = x -- This branch exists for laziness, not speed
-intercalate (BS fSepPtr sepLen) (BS fhPtr hLen : t) =
-  unsafeCreate totalLen $ \dstPtr0 ->
-    unsafeWithForeignPtr fSepPtr $ \sepPtr -> do
-      unsafeWithForeignPtr fhPtr $ \hPtr ->
-        memcpy dstPtr0 hPtr hLen
+intercalate (BS sepPtr sepLen) (BS hPtr hLen : t) =
+  unsafeCreateFp totalLen $ \dstPtr0 -> do
+      memcpyFp dstPtr0 hPtr hLen
       let go _ [] = pure ()
-          go dstPtr (BS fChunkPtr chunkLen : chunks) = do
-            memcpy dstPtr sepPtr sepLen
-            let destPtr' = dstPtr `plusPtr` sepLen
-            unsafeWithForeignPtr fChunkPtr $ \chunkPtr ->
-              memcpy destPtr' chunkPtr chunkLen
-            go (destPtr' `plusPtr` chunkLen) chunks
-      go (dstPtr0 `plusPtr` hLen) t
+          go dstPtr (BS chunkPtr chunkLen : chunks) = do
+            memcpyFp dstPtr sepPtr sepLen
+            let destPtr' = dstPtr `plusForeignPtr` sepLen
+            memcpyFp destPtr' chunkPtr chunkLen
+            go (destPtr' `plusForeignPtr` chunkLen) chunks
+      go (dstPtr0 `plusForeignPtr` hLen) t
   where
   totalLen = List.foldl' (\acc chunk -> acc +! sepLen +! length chunk) hLen t
   (+!) = checkedAdd "intercalate"
@@ -1317,12 +1314,12 @@ count w (BS x m) = accursedUnutterablePerformIO $ unsafeWithForeignPtr x $ \p ->
 -- returns the index of the first element in the ByteString
 -- satisfying the predicate.
 findIndex :: (Word8 -> Bool) -> ByteString -> Maybe Int
-findIndex k (BS x l) = accursedUnutterablePerformIO $ unsafeWithForeignPtr x g
+findIndex k (BS x l) = accursedUnutterablePerformIO $ g x
   where
     g !ptr = go 0
       where
         go !n | n >= l    = return Nothing
-              | otherwise = do w <- peek $ ptr `plusPtr` n
+              | otherwise = do w <- peekFp $ ptr `plusForeignPtr` n
                                if k w
                                  then return (Just n)
                                  else go (n+1)
@@ -1334,12 +1331,12 @@ findIndex k (BS x l) = accursedUnutterablePerformIO $ unsafeWithForeignPtr x g
 --
 -- @since 0.10.12.0
 findIndexEnd :: (Word8 -> Bool) -> ByteString -> Maybe Int
-findIndexEnd k (BS x l) = accursedUnutterablePerformIO $ unsafeWithForeignPtr x g
+findIndexEnd k (BS x l) = accursedUnutterablePerformIO $ g x
   where
     g !ptr = go (l-1)
       where
         go !n | n < 0     = return Nothing
-              | otherwise = do w <- peekByteOff ptr n
+              | otherwise = do w <- peekFpByteOff ptr n
                                if k w
                                  then return (Just n)
                                  else go (n-1)
@@ -1382,24 +1379,25 @@ notElem c ps = not (c `elem` ps)
 -- returns a ByteString containing those characters that satisfy the
 -- predicate.
 filter :: (Word8 -> Bool) -> ByteString -> ByteString
-filter k = \ps@(BS x l) ->
+filter k = \ps@(BS pIn l) ->
         -- see fold inlining.
   if null ps
     then ps
     else
-      unsafePerformIO $ createAndTrim l $ \pOut -> unsafeWithForeignPtr x $ \pIn -> do
+      unsafeDupablePerformIO $ createFpAndTrim l $ \pOut -> do
         let
           go' pf pt = go pf pt
             where
-              end = pf `plusPtr` l
+              end = pf `plusForeignPtr` l
               go !f !t | f == end  = return t
                        | otherwise = do
-                           w <- peek f
+                           w <- peekFp f
                            if k w
-                             then poke t w >> go (f `plusPtr` 1) (t `plusPtr` 1)
-                             else             go (f `plusPtr` 1) t
+                             then pokeFp t w
+                               >> go (f `plusForeignPtr` 1) (t `plusForeignPtr` 1)
+                             else go (f `plusForeignPtr` 1) t
         t <- go' pIn pOut
-        return $! t `minusPtr` pOut -- actual length
+        return $! t `minusForeignPtr` pOut -- actual length
 {-# INLINE filter #-}
 
 {-
@@ -1445,34 +1443,33 @@ find f p = case findIndex f p of
 --
 partition :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 partition f s = unsafeDupablePerformIO $
-    do fp' <- mallocByteString len
-       unsafeWithForeignPtr fp' $ \p ->
-           do let end = p `plusPtr` (len - 1)
+    do        p <- mallocByteString len
+              let end = p `plusForeignPtr` (len - 1)
               mid <- sep 0 p end
               rev mid end
-              let i = mid `minusPtr` p
-              return (BS fp' i,
-                      BS (plusForeignPtr fp' i) (len - i))
+              let i = mid `minusForeignPtr` p
+              return (BS p i,
+                      BS (p `plusForeignPtr` i) (len - i))
   where
     len  = length s
-    incr = (`plusPtr` 1)
-    decr = (`plusPtr` (-1))
+    incr = (`plusForeignPtr` 1)
+    decr = (`plusForeignPtr` (-1))
 
     sep !i !p1 !p2
        | i == len  = return p1
-       | f w       = do poke p1 w
+       | f w       = do pokeFp p1 w
                         sep (i + 1) (incr p1) p2
-       | otherwise = do poke p2 w
+       | otherwise = do pokeFp p2 w
                         sep (i + 1) p1 (decr p2)
       where
         w = s `unsafeIndex` i
 
-    rev !p1 !p2
+    rev !p1 !p2 -- fixme: surely there are faster ways to do this
       | p1 >= p2  = return ()
-      | otherwise = do a <- peek p1
-                       b <- peek p2
-                       poke p1 b
-                       poke p2 a
+      | otherwise = do a <- peekFp p1
+                       b <- peekFp p2
+                       pokeFp p1 b
+                       pokeFp p2 a
                        rev (incr p1) (decr p2)
 
 -- --------------------------------------------------------------------
@@ -1663,20 +1660,18 @@ zipWith f ps qs = case uncons ps of
 --
 -- @since 0.11.1.0
 packZipWith :: (Word8 -> Word8 -> Word8) -> ByteString -> ByteString -> ByteString
-packZipWith f (BS fp l) (BS fq m) = unsafeDupablePerformIO $
-    unsafeWithForeignPtr fp $ \a ->
-    unsafeWithForeignPtr fq $ \b ->
-    create len $ go a b
+packZipWith f (BS a l) (BS b m) = unsafeDupablePerformIO $
+    createFp len $ go a b
   where
     go p1 p2 = zipWith_ 0
       where
-        zipWith_ :: Int -> Ptr Word8 -> IO ()
+        zipWith_ :: Int -> ForeignPtr Word8 -> IO ()
         zipWith_ !n !r
            | n >= len = return ()
            | otherwise = do
-                x <- peekByteOff p1 n
-                y <- peekByteOff p2 n
-                pokeByteOff r n (f x y)
+                x <- peekFpByteOff p1 n
+                y <- peekFpByteOff p2 n
+                pokeFpByteOff r n (f x y)
                 zipWith_ (n+1) r
 
     len = min l m
@@ -1709,10 +1704,10 @@ tails p | null p    = [empty]
 sort :: ByteString -> ByteString
 sort (BS input l)
   -- qsort outperforms counting sort for small arrays
-  | l <= 20 = unsafeCreate l $ \ptr -> unsafeWithForeignPtr input $ \inp -> do
-    memcpy ptr inp l
-    c_sort ptr (fromIntegral l)
-  | otherwise = unsafeCreate l $ \p -> allocaArray 256 $ \arr -> do
+  | l <= 20 = unsafeCreateFp l $ \destFP -> do
+    memcpyFp destFP input l
+    unsafeWithForeignPtr destFP $ \dest -> c_sort dest (fromIntegral l)
+  | otherwise = unsafeCreateFp l $ \p -> allocaArray 256 $ \arr -> do
 
     _ <- memset (castPtr arr) 0 (256 * fromIntegral (sizeOf (undefined :: CSize)))
     unsafeWithForeignPtr input (\x -> countOccurrences arr x l)
@@ -1721,7 +1716,7 @@ sort (BS input l)
         go i   !ptr = do n <- peekElemOff arr i
                          when (n /= 0) $ void $ memset ptr (fromIntegral i) n
                          go (i + 1) (ptr `plusPtr` fromIntegral n)
-    go 0 p
+    unsafeWithForeignPtr p (go 0)
   where
     -- Count the number of occurrences of each byte.
     -- Used by 'sort'
@@ -1771,8 +1766,8 @@ packCString cstr = do
 -- The @ByteString@ is a normal Haskell value and will be managed on the
 -- Haskell heap.
 packCStringLen :: CStringLen -> IO ByteString
-packCStringLen (cstr, len) | len >= 0 = create len $ \p ->
-    memcpy p (castPtr cstr) len
+packCStringLen (cstr, len) | len >= 0 = createFp len $ \fp ->
+    unsafeWithForeignPtr fp $ \p -> memcpy p (castPtr cstr) len
 packCStringLen (_, len) =
     moduleErrorIO "packCStringLen" ("negative length: " ++ show len)
 
@@ -1785,8 +1780,7 @@ packCStringLen (_, len) =
 -- is needed in the rest of the program.
 --
 copy :: ByteString -> ByteString
-copy (BS x l) = unsafeCreate l $ \p -> unsafeWithForeignPtr x $ \f ->
-    memcpy p f l
+copy (BS x l) = unsafeCreateFp l $ \p -> memcpyFp p x l
 
 -- ---------------------------------------------------------------------
 -- Line IO
@@ -1845,8 +1839,7 @@ hGetLine h =
 
 mkPS :: RawBuffer Word8 -> Int -> Int -> IO ByteString
 mkPS buf start end =
- create len $ \p ->
-   withRawBuffer buf $ \pbuf -> copyBytes p (pbuf `plusPtr` start) len
+ createFp len $ \fp -> memcpyFp fp (buf `plusForeignPtr` start) len
  where
    len = end - start
 
@@ -1899,7 +1892,8 @@ putStr = hPut stdout
 --
 hGet :: Handle -> Int -> IO ByteString
 hGet h i
-    | i >  0    = createAndTrim i $ \p -> hGetBuf h p i
+    | i >  0    = createFpAndTrim i $ \fp ->
+        unsafeWithForeignPtr fp $ \p -> hGetBuf h p i
     | i == 0    = return empty
     | otherwise = illegalBufferSize h "hGet" i
 
@@ -1913,7 +1907,8 @@ hGet h i
 --
 hGetNonBlocking :: Handle -> Int -> IO ByteString
 hGetNonBlocking h i
-    | i >  0    = createAndTrim i $ \p -> hGetBufNonBlocking h p i
+    | i >  0    = createFpAndTrim i $ \fp ->
+        unsafeWithForeignPtr fp $ \p -> hGetBufNonBlocking h p i
     | i == 0    = return empty
     | otherwise = illegalBufferSize h "hGetNonBlocking" i
 
@@ -1924,7 +1919,8 @@ hGetNonBlocking h i
 --
 hGetSome :: Handle -> Int -> IO ByteString
 hGetSome hh i
-    | i >  0    = createAndTrim i $ \p -> hGetBufSome hh p i
+    | i >  0    = createFpAndTrim i $ \fp ->
+        unsafeWithForeignPtr fp $ \p -> hGetBufSome hh p i
     | i == 0    = return empty
     | otherwise = illegalBufferSize hh "hGetSome" i
 
diff --git a/Data/ByteString/Builder/Internal.hs b/Data/ByteString/Builder/Internal.hs
index 800476154..a05beead9 100644
--- a/Data/ByteString/Builder/Internal.hs
+++ b/Data/ByteString/Builder/Internal.hs
@@ -133,7 +133,7 @@ import           Data.Semigroup (Semigroup((<>)))
 #endif
 
 import qualified Data.ByteString               as S
-import qualified Data.ByteString.Internal      as S
+import qualified Data.ByteString.Internal.Type as S
 import qualified Data.ByteString.Lazy.Internal as L
 import qualified Data.ByteString.Short.Internal as Sh
 
@@ -1099,8 +1099,8 @@ buildStepToCIOS (AllocationStrategy nextBuffer bufSize trim) =
         wrapChunk !op' mkCIOS
           | chunkSize == 0      = mkCIOS True
           | trim chunkSize size = do
-              bs <- S.create chunkSize $ \pbuf' ->
-                        copyBytes pbuf' pbuf chunkSize
+              bs <- S.createFp chunkSize $ \fpbuf' ->
+                        S.memcpyFp fpbuf' fpbuf chunkSize
               -- FIXME: We could reuse the trimmed buffer here.
               return $ Yield1 bs (mkCIOS False)
           | otherwise            =
diff --git a/Data/ByteString/Char8.hs b/Data/ByteString/Char8.hs
index 9b06468d3..1aa70e82f 100644
--- a/Data/ByteString/Char8.hs
+++ b/Data/ByteString/Char8.hs
@@ -275,7 +275,7 @@ import Data.ByteString (null,length,tail,init,append
                        ,useAsCString,useAsCStringLen
                        )
 
-import Data.ByteString.Internal
+import Data.ByteString.Internal.Type
 import Data.ByteString.ReadInt
 import Data.ByteString.ReadNat
 
@@ -975,11 +975,11 @@ unlines = \li -> let
   (+!) = checkedAdd "Char8.unlines"
 
   go [] _ = pure ()
-  go (BS srcFP len : srcs) dest = do
-    unsafeWithForeignPtr srcFP $ \src -> memcpy dest src len
-    pokeElemOff dest len (c2w '\n')
-    go srcs $ dest `plusPtr` (len + 1)
-  in  unsafeCreate totLen (go li)
+  go (BS src len : srcs) dest = do
+    memcpyFp dest src len
+    pokeFpByteOff dest len (c2w '\n')
+    go srcs $ dest `plusForeignPtr` (len + 1)
+  in  unsafeCreateFp totLen (go li)
 
 -- | 'words' breaks a ByteString up into a list of words, which
 -- were delimited by Chars representing white space.
diff --git a/Data/ByteString/Internal.hs b/Data/ByteString/Internal.hs
index f8125a2da..b4481a833 100644
--- a/Data/ByteString/Internal.hs
+++ b/Data/ByteString/Internal.hs
@@ -1,11 +1,3 @@
-{-# LANGUAGE CPP, ForeignFunctionInterface, BangPatterns #-}
-{-# LANGUAGE UnliftedFFITypes, MagicHash,
-            UnboxedTuples, DeriveDataTypeable #-}
-{-# LANGUAGE TupleSections #-}
-{-# LANGUAGE TypeFamilies #-}
-{-# LANGUAGE PatternSynonyms, ViewPatterns #-}
-{-# LANGUAGE Unsafe #-}
-{-# LANGUAGE TemplateHaskellQuotes #-}
 {-# OPTIONS_HADDOCK not-home #-}
 
 -- |
@@ -99,885 +91,4 @@ module Data.ByteString.Internal (
         unsafeWithForeignPtr
   ) where
 
-import Prelude hiding (concat, null)
-import qualified Data.List as List
-
-import Control.Monad            (void)
-
-import Foreign.ForeignPtr       (ForeignPtr, withForeignPtr)
-import Foreign.Ptr              (Ptr, FunPtr, plusPtr, minusPtr)
-import Foreign.Storable         (Storable(..))
-import Foreign.C.Types          (CInt(..), CSize(..))
-import Foreign.C.String         (CString)
-
-#if !MIN_VERSION_base(4,13,0)
-import Data.Semigroup           (Semigroup ((<>)))
-#endif
-import Data.Semigroup           (Semigroup (sconcat, stimes))
-import Data.List.NonEmpty       (NonEmpty ((:|)))
-
-import Control.DeepSeq          (NFData(rnf))
-
-import Data.String              (IsString(..))
-
-import Control.Exception        (assert, throw, Exception)
-
-import Data.Bits                ((.&.))
-import Data.Char                (ord)
-import Data.Word
-
-import Data.Typeable            (Typeable)
-import Data.Data                (Data(..), mkNoRepType)
-
-import GHC.Base                 (nullAddr#,realWorld#,unsafeChr)
-import GHC.Exts                 (IsList(..))
-import GHC.CString              (unpackCString#)
-import GHC.Prim                 (Addr#)
-
-#define TIMES_INT_2_AVAILABLE MIN_VERSION_ghc_prim(0,7,0)
-#if TIMES_INT_2_AVAILABLE
-import GHC.Prim                (timesInt2#)
-#else
-import GHC.Prim                ( timesWord2#
-                               , or#
-                               , uncheckedShiftRL#
-                               , int2Word#
-                               , word2Int#
-                               )
-import Data.Bits               (finiteBitSize)
-#endif
-
-import GHC.IO                   (IO(IO),unsafeDupablePerformIO)
-import GHC.ForeignPtr           (ForeignPtr(ForeignPtr)
-#if __GLASGOW_HASKELL__ < 900
-                                , newForeignPtr_
-#endif
-                                , mallocPlainForeignPtrBytes)
-
-#if MIN_VERSION_base(4,10,0)
-import GHC.ForeignPtr           (plusForeignPtr)
-#else
-import GHC.Prim                 (plusAddr#)
-#endif
-
-#if __GLASGOW_HASKELL__ >= 811
-import GHC.CString              (cstringLength#)
-import GHC.ForeignPtr           (ForeignPtrContents(FinalPtr))
-#else
-import GHC.Ptr                  (Ptr(..))
-#endif
-
-import GHC.Types                (Int (..))
-
-#if MIN_VERSION_base(4,15,0)
-import GHC.ForeignPtr           (unsafeWithForeignPtr)
-#endif
-
-import qualified Language.Haskell.TH.Lib as TH
-import qualified Language.Haskell.TH.Syntax as TH
-
-#if !MIN_VERSION_base(4,15,0)
-unsafeWithForeignPtr :: ForeignPtr a -> (Ptr a -> IO b) -> IO b
-unsafeWithForeignPtr = withForeignPtr
-#endif
-
--- CFILES stuff is Hugs only
-{-# CFILES cbits/fpstring.c #-}
-
-#if !MIN_VERSION_base(4,10,0)
--- |Advances the given address by the given offset in bytes.
---
--- The new 'ForeignPtr' shares the finalizer of the original,
--- equivalent from a finalization standpoint to just creating another
--- reference to the original. That is, the finalizer will not be
--- called before the new 'ForeignPtr' is unreachable, nor will it be
--- called an additional time due to this call, and the finalizer will
--- be called with the same address that it would have had this call
--- not happened, *not* the new address.
-plusForeignPtr :: ForeignPtr a -> Int -> ForeignPtr b
-plusForeignPtr (ForeignPtr addr guts) (I# offset) = ForeignPtr (plusAddr# addr offset) guts
-{-# INLINE [0] plusForeignPtr #-}
-{-# RULES
-"ByteString plusForeignPtr/0" forall fp .
-   plusForeignPtr fp 0 = fp
- #-}
-#endif
-
--- -----------------------------------------------------------------------------
-
--- | A space-efficient representation of a 'Word8' vector, supporting many
--- efficient operations.
---
--- A 'ByteString' contains 8-bit bytes, or by using the operations from
--- "Data.ByteString.Char8" it can be interpreted as containing 8-bit
--- characters.
---
-data ByteString = BS {-# UNPACK #-} !(ForeignPtr Word8) -- payload
-                     {-# UNPACK #-} !Int                -- length
-                     -- ^ @since 0.11.0.0
-    deriving (Typeable)
-
--- | Type synonym for the strict flavour of 'ByteString'.
---
--- @since 0.11.2.0
-type StrictByteString = ByteString
-
--- |
--- @'PS' foreignPtr offset length@ represents a 'ByteString' with data
--- backed by a given @foreignPtr@, starting at a given @offset@ in bytes
--- and of a specified @length@.
---
--- This pattern is used to emulate the legacy 'ByteString' data
--- constructor, so that pre-existing code generally doesn't need to
--- change to benefit from the simplified 'BS' constructor and can
--- continue to function unchanged.
---
--- /Note:/ Matching with this constructor will always be given a 0 offset,
--- as the base will be manipulated by 'plusForeignPtr' instead.
---
-pattern PS :: ForeignPtr Word8 -> Int -> Int -> ByteString
-pattern PS fp zero len <- BS fp ((0,) -> (zero, len)) where
-  PS fp o len = BS (plusForeignPtr fp o) len
-#if __GLASGOW_HASKELL__ >= 802
-{-# COMPLETE PS #-}
-#endif
-
-instance Eq  ByteString where
-    (==)    = eq
-
-instance Ord ByteString where
-    compare = compareBytes
-
-instance Semigroup ByteString where
-    (<>)    = append
-    sconcat (b:|bs) = concat (b:bs)
-    {-# INLINE stimes #-}
-    stimes  = stimesPolymorphic
-
-instance Monoid ByteString where
-    mempty  = empty
-    mappend = (<>)
-    mconcat = concat
-
-instance NFData ByteString where
-    rnf BS{} = ()
-
-instance Show ByteString where
-    showsPrec p ps r = showsPrec p (unpackChars ps) r
-
-instance Read ByteString where
-    readsPrec p str = [ (packChars x, y) | (x, y) <- readsPrec p str ]
-
--- | @since 0.10.12.0
-instance IsList ByteString where
-  type Item ByteString = Word8
-  fromList = packBytes
-  toList   = unpackBytes
-
--- | Beware: 'fromString' truncates multi-byte characters to octets.
--- e.g. "枯朶に烏のとまりけり秋の暮" becomes �6k�nh~�Q��n�
-instance IsString ByteString where
-    {-# INLINE fromString #-}
-    fromString = packChars
-
-instance Data ByteString where
-  gfoldl f z txt = z packBytes `f` unpackBytes txt
-  toConstr _     = error "Data.ByteString.ByteString.toConstr"
-  gunfold _ _    = error "Data.ByteString.ByteString.gunfold"
-  dataTypeOf _   = mkNoRepType "Data.ByteString.ByteString"
-
--- | @since 0.11.2.0
-instance TH.Lift ByteString where
-#if MIN_VERSION_template_haskell(2,16,0)
-  lift (BS ptr len) = [| unsafePackLenLiteral |]
-    `TH.appE` TH.litE (TH.integerL (fromIntegral len))
-    `TH.appE` TH.litE (TH.BytesPrimL $ TH.Bytes ptr 0 (fromIntegral len))
-#else
-  lift bs@(BS _ len) = [| unsafePackLenLiteral |]
-    `TH.appE` TH.litE (TH.integerL (fromIntegral len))
-    `TH.appE` TH.litE (TH.StringPrimL $ unpackBytes bs)
-#endif
-
-#if MIN_VERSION_template_haskell(2,17,0)
-  liftTyped = TH.unsafeCodeCoerce . TH.lift
-#elif MIN_VERSION_template_haskell(2,16,0)
-  liftTyped = TH.unsafeTExpCoerce . TH.lift
-#endif
-
-------------------------------------------------------------------------
--- Internal indexing
-
--- | 'findIndexOrLength' is a variant of findIndex, that returns the length
--- of the string if no element is found, rather than Nothing.
-findIndexOrLength :: (Word8 -> Bool) -> ByteString -> Int
-findIndexOrLength k (BS x l) =
-    accursedUnutterablePerformIO $ unsafeWithForeignPtr x g
-  where
-    g ptr = go 0
-      where
-        go !n | n >= l    = return l
-              | otherwise = do w <- peek $ ptr `plusPtr` n
-                               if k w
-                                 then return n
-                                 else go (n+1)
-{-# INLINE findIndexOrLength #-}
-
-------------------------------------------------------------------------
--- Packing and unpacking from lists
-
-packBytes :: [Word8] -> ByteString
-packBytes ws = unsafePackLenBytes (List.length ws) ws
-
-packChars :: [Char] -> ByteString
-packChars cs = unsafePackLenChars (List.length cs) cs
-
-{-# INLINE [0] packChars #-}
-
-{-# RULES
-"ByteString packChars/packAddress" forall s .
-   packChars (unpackCString# s) = unsafePackLiteral s
- #-}
-
-unsafePackLenBytes :: Int -> [Word8] -> ByteString
-unsafePackLenBytes len xs0 =
-    unsafeCreate len $ \p -> go p xs0
-  where
-    go !_ []     = return ()
-    go !p (x:xs) = poke p x >> go (p `plusPtr` 1) xs
-
-unsafePackLenChars :: Int -> [Char] -> ByteString
-unsafePackLenChars len cs0 =
-    unsafeCreate len $ \p -> go p cs0
-  where
-    go !_ []     = return ()
-    go !p (c:cs) = poke p (c2w c) >> go (p `plusPtr` 1) cs
-
-
--- | /O(n)/ Pack a null-terminated sequence of bytes, pointed to by an
--- Addr\# (an arbitrary machine address assumed to point outside the
--- garbage-collected heap) into a @ByteString@. A much faster way to
--- create an 'Addr#' is with an unboxed string literal, than to pack a
--- boxed string. A unboxed string literal is compiled to a static @char
--- []@ by GHC. Establishing the length of the string requires a call to
--- @strlen(3)@, so the 'Addr#' must point to a null-terminated buffer (as
--- is the case with @\"string\"\#@ literals in GHC). Use 'Data.ByteString.Unsafe.unsafePackAddressLen'
--- if you know the length of the string statically.
---
--- An example:
---
--- > literalFS = unsafePackAddress "literal"#
---
--- This function is /unsafe/. If you modify the buffer pointed to by the
--- original 'Addr#' this modification will be reflected in the resulting
--- @ByteString@, breaking referential transparency.
---
--- Note this also won't work if your 'Addr#' has embedded @\'\\0\'@ characters in
--- the string, as @strlen@ will return too short a length.
---
-unsafePackAddress :: Addr# -> IO ByteString
-unsafePackAddress addr# = do
-#if __GLASGOW_HASKELL__ >= 811
-    unsafePackLenAddress (I# (cstringLength# addr#)) addr#
-#else
-    l <- c_strlen (Ptr addr#)
-    unsafePackLenAddress (fromIntegral l) addr#
-#endif
-{-# INLINE unsafePackAddress #-}
-
--- | See 'unsafePackAddress'. This function is similar,
--- but takes an additional length argument rather then computing
--- it with @strlen@.
--- Therefore embedding @\'\\0\'@ characters is possible.
---
--- @since 0.11.2.0
-unsafePackLenAddress :: Int -> Addr# -> IO ByteString
-unsafePackLenAddress len addr# = do
-#if __GLASGOW_HASKELL__ >= 811
-    return (BS (ForeignPtr addr# FinalPtr) len)
-#else
-    p <- newForeignPtr_ (Ptr addr#)
-    return $ BS p len
-#endif
-{-# INLINE unsafePackLenAddress #-}
-
--- | See 'unsafePackAddress'. This function has similar behavior. Prefer
--- this function when the address in known to be an @Addr#@ literal. In
--- that context, there is no need for the sequencing guarantees that 'IO'
--- provides. On GHC 9.0 and up, this function uses the @FinalPtr@ data
--- constructor for @ForeignPtrContents@.
---
--- @since 0.11.1.0
-unsafePackLiteral :: Addr# -> ByteString
-unsafePackLiteral addr# =
-#if __GLASGOW_HASKELL__ >= 811
-  unsafePackLenLiteral (I# (cstringLength# addr#)) addr#
-#else
-  let len = accursedUnutterablePerformIO (c_strlen (Ptr addr#))
-   in unsafePackLenLiteral (fromIntegral len) addr#
-#endif
-{-# INLINE unsafePackLiteral #-}
-
-
--- | See 'unsafePackLiteral'. This function is similar,
--- but takes an additional length argument rather then computing
--- it with @strlen@.
--- Therefore embedding @\'\\0\'@ characters is possible.
---
--- @since 0.11.2.0
-unsafePackLenLiteral :: Int -> Addr# -> ByteString
-unsafePackLenLiteral len addr# =
-#if __GLASGOW_HASKELL__ >= 811
-  BS (ForeignPtr addr# FinalPtr) len
-#else
-  -- newForeignPtr_ allocates a MutVar# internally. If that MutVar#
-  -- gets commoned up with the MutVar# of some unrelated ForeignPtr,
-  -- it may prevent automatic finalization for that other ForeignPtr.
-  -- So we avoid accursedUnutterablePerformIO here.
-  BS (unsafeDupablePerformIO (newForeignPtr_ (Ptr addr#))) len
-#endif
-{-# INLINE unsafePackLenLiteral #-}
-
-packUptoLenBytes :: Int -> [Word8] -> (ByteString, [Word8])
-packUptoLenBytes len xs0 =
-    unsafeCreateUptoN' len $ \p0 ->
-      let p_end = plusPtr p0 len
-          go !p []              = return (p `minusPtr` p0, [])
-          go !p xs | p == p_end = return (len, xs)
-          go !p (x:xs)          = poke p x >> go (p `plusPtr` 1) xs
-      in go p0 xs0
-
-packUptoLenChars :: Int -> [Char] -> (ByteString, [Char])
-packUptoLenChars len cs0 =
-    unsafeCreateUptoN' len $ \p0 ->
-      let p_end = plusPtr p0 len
-          go !p []              = return (p `minusPtr` p0, [])
-          go !p cs | p == p_end = return (len, cs)
-          go !p (c:cs)          = poke p (c2w c) >> go (p `plusPtr` 1) cs
-      in go p0 cs0
-
--- Unpacking bytestrings into lists efficiently is a tradeoff: on the one hand
--- we would like to write a tight loop that just blasts the list into memory, on
--- the other hand we want it to be unpacked lazily so we don't end up with a
--- massive list data structure in memory.
---
--- Our strategy is to combine both: we will unpack lazily in reasonable sized
--- chunks, where each chunk is unpacked strictly.
---
--- unpackBytes and unpackChars do the lazy loop, while unpackAppendBytes and
--- unpackAppendChars do the chunks strictly.
-
-unpackBytes :: ByteString -> [Word8]
-unpackBytes bs = unpackAppendBytesLazy bs []
-
-unpackChars :: ByteString -> [Char]
-unpackChars bs = unpackAppendCharsLazy bs []
-
-unpackAppendBytesLazy :: ByteString -> [Word8] -> [Word8]
-unpackAppendBytesLazy (BS fp len) xs
-  | len <= 100 = unpackAppendBytesStrict (BS fp len) xs
-  | otherwise  = unpackAppendBytesStrict (BS fp 100) remainder
-  where
-    remainder  = unpackAppendBytesLazy (BS (plusForeignPtr fp 100) (len-100)) xs
-
-  -- Why 100 bytes you ask? Because on a 64bit machine the list we allocate
-  -- takes just shy of 4k which seems like a reasonable amount.
-  -- (5 words per list element, 8 bytes per word, 100 elements = 4000 bytes)
-
-unpackAppendCharsLazy :: ByteString -> [Char] -> [Char]
-unpackAppendCharsLazy (BS fp len) cs
-  | len <= 100 = unpackAppendCharsStrict (BS fp len) cs
-  | otherwise  = unpackAppendCharsStrict (BS fp 100) remainder
-  where
-    remainder  = unpackAppendCharsLazy (BS (plusForeignPtr fp 100) (len-100)) cs
-
--- For these unpack functions, since we're unpacking the whole list strictly we
--- build up the result list in an accumulator. This means we have to build up
--- the list starting at the end. So our traversal starts at the end of the
--- buffer and loops down until we hit the sentinal:
-
-unpackAppendBytesStrict :: ByteString -> [Word8] -> [Word8]
-unpackAppendBytesStrict (BS fp len) xs =
-    accursedUnutterablePerformIO $ unsafeWithForeignPtr fp $ \base ->
-      loop (base `plusPtr` (-1)) (base `plusPtr` (-1+len)) xs
-  where
-    loop !sentinal !p acc
-      | p == sentinal = return acc
-      | otherwise     = do x <- peek p
-                           loop sentinal (p `plusPtr` (-1)) (x:acc)
-
-unpackAppendCharsStrict :: ByteString -> [Char] -> [Char]
-unpackAppendCharsStrict (BS fp len) xs =
-    accursedUnutterablePerformIO $ unsafeWithForeignPtr fp $ \base ->
-      loop (base `plusPtr` (-1)) (base `plusPtr` (-1+len)) xs
-  where
-    loop !sentinal !p acc
-      | p == sentinal = return acc
-      | otherwise     = do x <- peek p
-                           loop sentinal (p `plusPtr` (-1)) (w2c x:acc)
-
-------------------------------------------------------------------------
-
--- | The 0 pointer. Used to indicate the empty Bytestring.
-nullForeignPtr :: ForeignPtr Word8
-#if __GLASGOW_HASKELL__ >= 811
-nullForeignPtr = ForeignPtr nullAddr# FinalPtr
-#else
-nullForeignPtr = ForeignPtr nullAddr# (error "nullForeignPtr")
-#endif
-
--- ---------------------------------------------------------------------
--- Low level constructors
-
--- | /O(1)/ Build a ByteString from a ForeignPtr.
---
--- If you do not need the offset parameter then you should be using
--- 'Data.ByteString.Unsafe.unsafePackCStringLen' or
--- 'Data.ByteString.Unsafe.unsafePackCStringFinalizer' instead.
---
-fromForeignPtr :: ForeignPtr Word8
-               -> Int -- ^ Offset
-               -> Int -- ^ Length
-               -> ByteString
-fromForeignPtr fp o = BS (plusForeignPtr fp o)
-{-# INLINE fromForeignPtr #-}
-
--- | @since 0.11.0.0
-fromForeignPtr0 :: ForeignPtr Word8
-               -> Int -- ^ Length
-               -> ByteString
-fromForeignPtr0 = BS
-{-# INLINE fromForeignPtr0 #-}
-
--- | /O(1)/ Deconstruct a ForeignPtr from a ByteString
-toForeignPtr :: ByteString -> (ForeignPtr Word8, Int, Int) -- ^ (ptr, offset, length)
-toForeignPtr (BS ps l) = (ps, 0, l)
-{-# INLINE toForeignPtr #-}
-
--- | /O(1)/ Deconstruct a ForeignPtr from a ByteString
---
--- @since 0.11.0.0
-toForeignPtr0 :: ByteString -> (ForeignPtr Word8, Int) -- ^ (ptr, length)
-toForeignPtr0 (BS ps l) = (ps, l)
-{-# INLINE toForeignPtr0 #-}
-
--- | A way of creating ByteStrings outside the IO monad. The @Int@
--- argument gives the final size of the ByteString.
-unsafeCreate :: Int -> (Ptr Word8 -> IO ()) -> ByteString
-unsafeCreate l f = unsafeDupablePerformIO (create l f)
-{-# INLINE unsafeCreate #-}
-
--- | Like 'unsafeCreate' but instead of giving the final size of the
--- ByteString, it is just an upper bound. The inner action returns
--- the actual size. Unlike 'createAndTrim' the ByteString is not
--- reallocated if the final size is less than the estimated size.
-unsafeCreateUptoN :: Int -> (Ptr Word8 -> IO Int) -> ByteString
-unsafeCreateUptoN l f = unsafeDupablePerformIO (createUptoN l f)
-{-# INLINE unsafeCreateUptoN #-}
-
--- | @since 0.10.12.0
-unsafeCreateUptoN' :: Int -> (Ptr Word8 -> IO (Int, a)) -> (ByteString, a)
-unsafeCreateUptoN' l f = unsafeDupablePerformIO (createUptoN' l f)
-{-# INLINE unsafeCreateUptoN' #-}
-
--- | Create ByteString of size @l@ and use action @f@ to fill its contents.
-create :: Int -> (Ptr Word8 -> IO ()) -> IO ByteString
-create l action = do
-    fp <- mallocByteString l
-    -- Cannot use unsafeWithForeignPtr, because action can diverge
-    withForeignPtr fp $ \p -> action p
-    return $! BS fp l
-{-# INLINE create #-}
-
--- | Given a maximum size @l@ and an action @f@ that fills the 'ByteString'
--- starting at the given 'Ptr' and returns the actual utilized length,
--- @`createUptoN'` l f@ returns the filled 'ByteString'.
-createUptoN :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
-createUptoN l action = do
-    fp <- mallocByteString l
-    -- Cannot use unsafeWithForeignPtr, because action can diverge
-    l' <- withForeignPtr fp $ \p -> action p
-    assert (l' <= l) $ return $! BS fp l'
-{-# INLINE createUptoN #-}
-
--- | Like 'createUptoN', but also returns an additional value created by the
--- action.
---
--- @since 0.10.12.0
-createUptoN' :: Int -> (Ptr Word8 -> IO (Int, a)) -> IO (ByteString, a)
-createUptoN' l action = do
-    fp <- mallocByteString l
-    -- Cannot use unsafeWithForeignPtr, because action can diverge
-    (l', res) <- withForeignPtr fp $ \p -> action p
-    assert (l' <= l) $ return (BS fp l', res)
-{-# INLINE createUptoN' #-}
-
--- | Given the maximum size needed and a function to make the contents
--- of a ByteString, createAndTrim makes the 'ByteString'. The generating
--- function is required to return the actual final size (<= the maximum
--- size), and the resulting byte array is reallocated to this size.
---
--- createAndTrim is the main mechanism for creating custom, efficient
--- ByteString functions, using Haskell or C functions to fill the space.
---
-createAndTrim :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
-createAndTrim l action = do
-    fp <- mallocByteString l
-    -- Cannot use unsafeWithForeignPtr, because action can diverge
-    withForeignPtr fp $ \p -> do
-        l' <- action p
-        if assert (l' <= l) $ l' >= l
-            then return $! BS fp l
-            else create l' $ \p' -> memcpy p' p l'
-{-# INLINE createAndTrim #-}
-
-createAndTrim' :: Int -> (Ptr Word8 -> IO (Int, Int, a)) -> IO (ByteString, a)
-createAndTrim' l action = do
-    fp <- mallocByteString l
-    -- Cannot use unsafeWithForeignPtr, because action can diverge
-    withForeignPtr fp $ \p -> do
-        (off, l', res) <- action p
-        if assert (l' <= l) $ l' >= l
-            then return (BS fp l, res)
-            else do ps <- create l' $ \p' ->
-                            memcpy p' (p `plusPtr` off) l'
-                    return (ps, res)
-{-# INLINE createAndTrim' #-}
-
--- | Wrapper of 'Foreign.ForeignPtr.mallocForeignPtrBytes' with faster implementation for GHC
---
-mallocByteString :: Int -> IO (ForeignPtr a)
-mallocByteString = mallocPlainForeignPtrBytes
-{-# INLINE mallocByteString #-}
-
-------------------------------------------------------------------------
--- Implementations for Eq, Ord and Monoid instances
-
-eq :: ByteString -> ByteString -> Bool
-eq a@(BS fp len) b@(BS fp' len')
-  | len /= len' = False    -- short cut on length
-  | fp == fp'   = True     -- short cut for the same string
-  | otherwise   = compareBytes a b == EQ
-{-# INLINE eq #-}
--- ^ still needed
-
-compareBytes :: ByteString -> ByteString -> Ordering
-compareBytes (BS _   0)    (BS _   0)    = EQ  -- short cut for empty strings
-compareBytes (BS fp1 len1) (BS fp2 len2) =
-    accursedUnutterablePerformIO $
-      unsafeWithForeignPtr fp1 $ \p1 ->
-      unsafeWithForeignPtr fp2 $ \p2 -> do
-        i <- memcmp p1 p2 (min len1 len2)
-        return $! case i `compare` 0 of
-                    EQ  -> len1 `compare` len2
-                    x   -> x
-
-
--- | /O(1)/ The empty 'ByteString'
-empty :: ByteString
--- This enables bypassing #457 by not using (polymorphic) mempty in
--- any definitions used by the (Monoid ByteString) instance
-empty = BS nullForeignPtr 0
-
-append :: ByteString -> ByteString -> ByteString
-append (BS _   0)    b                  = b
-append a             (BS _   0)    = a
-append (BS fp1 len1) (BS fp2 len2) =
-    unsafeCreate (checkedAdd "append" len1 len2) $ \destptr1 -> do
-      let destptr2 = destptr1 `plusPtr` len1
-      unsafeWithForeignPtr fp1 $ \p1 -> memcpy destptr1 p1 len1
-      unsafeWithForeignPtr fp2 $ \p2 -> memcpy destptr2 p2 len2
-
-concat :: [ByteString] -> ByteString
-concat = \bss0 -> goLen0 bss0 bss0
-    -- The idea here is we first do a pass over the input list to determine:
-    --
-    --  1. is a copy necessary? e.g. @concat []@, @concat [mempty, "hello"]@,
-    --     and @concat ["hello", mempty, mempty]@ can all be handled without
-    --     copying.
-    --  2. if a copy is necessary, how large is the result going to be?
-    --
-    -- If a copy is necessary then we create a buffer of the appropriate size
-    -- and do another pass over the input list, copying the chunks into the
-    -- buffer. Also, since foreign calls aren't entirely free we skip over
-    -- empty chunks while copying.
-    --
-    -- We pass the original [ByteString] (bss0) through as an argument through
-    -- goLen0, goLen1, and goLen since we will need it again in goCopy. Passing
-    -- it as an explicit argument avoids capturing it in these functions'
-    -- closures which would result in unnecessary closure allocation.
-  where
-    -- It's still possible that the result is empty
-    goLen0 _    []                     = empty
-    goLen0 bss0 (BS _ 0     :bss)    = goLen0 bss0 bss
-    goLen0 bss0 (bs           :bss)    = goLen1 bss0 bs bss
-
-    -- It's still possible that the result is a single chunk
-    goLen1 _    bs []                  = bs
-    goLen1 bss0 bs (BS _ 0  :bss)    = goLen1 bss0 bs bss
-    goLen1 bss0 bs (BS _ len:bss)    = goLen bss0 (checkedAdd "concat" len' len) bss
-      where BS _ len' = bs
-
-    -- General case, just find the total length we'll need
-    goLen bss0 !total (BS _ len:bss) = goLen bss0 total' bss
-      where total' = checkedAdd "concat" total len
-    goLen bss0 total [] =
-      unsafeCreate total $ \ptr -> goCopy bss0 ptr
-
-    -- Copy the data
-    goCopy []                  !_   = return ()
-    goCopy (BS _  0  :bss) !ptr = goCopy bss ptr
-    goCopy (BS fp len:bss) !ptr = do
-      unsafeWithForeignPtr fp $ \p -> memcpy ptr p len
-      goCopy bss (ptr `plusPtr` len)
-{-# NOINLINE concat #-}
-
-{-# RULES
-"ByteString concat [] -> empty"
-   concat [] = empty
-"ByteString concat [bs] -> bs" forall x.
-   concat [x] = x
- #-}
-
--- | Repeats the given ByteString n times.
--- Polymorphic wrapper to make sure any generated
--- specializations are reasonably small.
-stimesPolymorphic :: Integral a => a -> ByteString -> ByteString
-{-# INLINABLE stimesPolymorphic #-}
-stimesPolymorphic nRaw = \ !bs -> case checkedIntegerToInt n of
-  Just nInt
-    | nInt >= 0  -> stimesNonNegativeInt nInt bs
-    | otherwise  -> stimesNegativeErr
-  Nothing
-    | n < 0  -> stimesNegativeErr
-    | BS _ 0 <- bs  -> empty
-    | otherwise     -> stimesOverflowErr
-  where  n = toInteger nRaw
-  -- By exclusively using n instead of nRaw, the semantics are kept simple
-  -- and the likelihood of potentially dangerous mistakes minimized.
-
-
-stimesNegativeErr :: ByteString
-stimesNegativeErr
-  = error "stimes @ByteString: non-negative multiplier expected"
-
-stimesOverflowErr :: ByteString
--- Although this only appears once, it is extracted here to prevent it
--- from being duplicated in specializations of 'stimesPolymorphic'
-stimesOverflowErr = overflowError "stimes"
-
--- | Repeats the given ByteString n times.
-stimesNonNegativeInt :: Int -> ByteString -> ByteString
-stimesNonNegativeInt n (BS fp len)
-  | n == 0 = empty
-  | n == 1 = BS fp len
-  | len == 0 = empty
-  | len == 1 = unsafeCreate n $ \destptr ->
-    unsafeWithForeignPtr fp $ \p -> do
-      byte <- peek p
-      void $ memset destptr byte (fromIntegral n)
-  | otherwise = unsafeCreate size $ \destptr ->
-    unsafeWithForeignPtr fp $ \p -> do
-      memcpy destptr p len
-      fillFrom destptr len
-  where
-    size = checkedMultiply "stimes" n len
-    halfSize = (size - 1) `div` 2 -- subtraction and division won't overflow
-
-    fillFrom :: Ptr Word8 -> Int -> IO ()
-    fillFrom destptr copied
-      | copied <= halfSize = do
-        memcpy (destptr `plusPtr` copied) destptr copied
-        fillFrom destptr (copied * 2)
-      | otherwise = memcpy (destptr `plusPtr` copied) destptr (size - copied)
-
-
-------------------------------------------------------------------------
-
--- | Conversion between 'Word8' and 'Char'. Should compile to a no-op.
-w2c :: Word8 -> Char
-w2c = unsafeChr . fromIntegral
-{-# INLINE w2c #-}
-
--- | Unsafe conversion between 'Char' and 'Word8'. This is a no-op and
--- silently truncates to 8 bits Chars > '\255'. It is provided as
--- convenience for ByteString construction.
-c2w :: Char -> Word8
-c2w = fromIntegral . ord
-{-# INLINE c2w #-}
-
--- | Selects words corresponding to white-space characters in the Latin-1 range
-isSpaceWord8 :: Word8 -> Bool
-isSpaceWord8 w8 =
-    -- Avoid the cost of narrowing arithmetic results to Word8,
-    -- the conversion from Word8 to Word is free.
-    let w :: Word
-        !w = fromIntegral w8
-     in w .&. 0x50 == 0    -- Quick non-whitespace filter
-        && w - 0x21 > 0x7e -- Second non-whitespace filter
-        && ( w == 0x20     -- SP
-          || w == 0xa0     -- NBSP
-          || w - 0x09 < 5) -- HT, NL, VT, FF, CR
-{-# INLINE isSpaceWord8 #-}
-
--- | Selects white-space characters in the Latin-1 range
-isSpaceChar8 :: Char -> Bool
-isSpaceChar8 = isSpaceWord8 . c2w
-{-# INLINE isSpaceChar8 #-}
-
-------------------------------------------------------------------------
-
--- | The type of exception raised by 'overflowError'
--- and on failure by overflow-checked arithmetic operations.
-newtype SizeOverflowException
-  = SizeOverflowException String
-
-instance Show SizeOverflowException where
-  show (SizeOverflowException err) = err
-
-instance Exception SizeOverflowException
-
--- | Raises a 'SizeOverflowException',
--- with a message using the given function name.
-overflowError :: String -> a
-overflowError fun = throw $ SizeOverflowException msg
-  where msg = "Data.ByteString." ++ fun ++ ": size overflow"
-
--- | Add two non-negative numbers.
--- Calls 'overflowError' on overflow.
-checkedAdd :: String -> Int -> Int -> Int
-{-# INLINE checkedAdd #-}
-checkedAdd fun x y
-  | r >= 0    = r
-  | otherwise = overflowError fun
-  where r = assert (min x y >= 0) $ x + y
-
--- | Multiplies two non-negative numbers.
--- Calls 'overflowError' on overflow.
-checkedMultiply :: String -> Int -> Int -> Int
-{-# INLINE checkedMultiply #-}
-checkedMultiply fun !x@(I# x#) !y@(I# y#) = assert (min x y >= 0) $
-#if TIMES_INT_2_AVAILABLE
-  case timesInt2# x# y# of
-    (# 0#, _, result #) -> I# result
-    _ -> overflowError fun
-#else
-  case timesWord2# (int2Word# x#) (int2Word# y#) of
-    (# hi, lo #) -> case or# hi (uncheckedShiftRL# lo shiftAmt) of
-      0## -> I# (word2Int# lo)
-      _   -> overflowError fun
-  where !(I# shiftAmt) = finiteBitSize (0 :: Word) - 1
-#endif
-
-
--- | Attempts to convert an 'Integer' value to an 'Int', returning
--- 'Nothing' if doing so would result in an overflow.
-checkedIntegerToInt :: Integer -> Maybe Int
-{-# INLINE checkedIntegerToInt #-}
--- We could use Data.Bits.toIntegralSized, but this hand-rolled
--- version is currently a bit faster as of GHC 9.2.
--- It's even faster to just match on the Integer constructors, but
--- we'd still need a fallback implementation for integer-simple.
-checkedIntegerToInt x
-  | x == toInteger res = Just res
-  | otherwise = Nothing
-  where  res = fromInteger x :: Int
-
-
-------------------------------------------------------------------------
-
--- | This \"function\" has a superficial similarity to 'System.IO.Unsafe.unsafePerformIO' but
--- it is in fact a malevolent agent of chaos. It unpicks the seams of reality
--- (and the 'IO' monad) so that the normal rules no longer apply. It lulls you
--- into thinking it is reasonable, but when you are not looking it stabs you
--- in the back and aliases all of your mutable buffers. The carcass of many a
--- seasoned Haskell programmer lie strewn at its feet.
---
--- Witness the trail of destruction:
---
--- * <https://github.com/haskell/bytestring/commit/71c4b438c675aa360c79d79acc9a491e7bbc26e7>
---
--- * <https://github.com/haskell/bytestring/commit/210c656390ae617d9ee3b8bcff5c88dd17cef8da>
---
--- * <https://ghc.haskell.org/trac/ghc/ticket/3486>
---
--- * <https://ghc.haskell.org/trac/ghc/ticket/3487>
---
--- * <https://ghc.haskell.org/trac/ghc/ticket/7270>
---
--- * <https://gitlab.haskell.org/ghc/ghc/-/issues/22204>
---
--- Do not talk about \"safe\"! You do not know what is safe!
---
--- Yield not to its blasphemous call! Flee traveller! Flee or you will be
--- corrupted and devoured!
---
-{-# INLINE accursedUnutterablePerformIO #-}
-accursedUnutterablePerformIO :: IO a -> a
-accursedUnutterablePerformIO (IO m) = case m realWorld# of (# _, r #) -> r
-
--- ---------------------------------------------------------------------
---
--- Standard C functions
---
-
-foreign import ccall unsafe "string.h strlen" c_strlen
-    :: CString -> IO CSize
-
-foreign import ccall unsafe "static stdlib.h &free" c_free_finalizer
-    :: FunPtr (Ptr Word8 -> IO ())
-
-foreign import ccall unsafe "string.h memchr" c_memchr
-    :: Ptr Word8 -> CInt -> CSize -> IO (Ptr Word8)
-
-memchr :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8)
-memchr p w = c_memchr p (fromIntegral w)
-
-foreign import ccall unsafe "string.h memcmp" c_memcmp
-    :: Ptr Word8 -> Ptr Word8 -> CSize -> IO CInt
-
-memcmp :: Ptr Word8 -> Ptr Word8 -> Int -> IO CInt
-memcmp p q s = c_memcmp p q (fromIntegral s)
-
-foreign import ccall unsafe "string.h memcpy" c_memcpy
-    :: Ptr Word8 -> Ptr Word8 -> CSize -> IO (Ptr Word8)
-
-memcpy :: Ptr Word8 -> Ptr Word8 -> Int -> IO ()
-memcpy p q s = void $ c_memcpy p q (fromIntegral s)
-
-{-
-foreign import ccall unsafe "string.h memmove" c_memmove
-    :: Ptr Word8 -> Ptr Word8 -> CSize -> IO (Ptr Word8)
-
-memmove :: Ptr Word8 -> Ptr Word8 -> CSize -> IO ()
-memmove p q s = do c_memmove p q s
-                   return ()
--}
-
-foreign import ccall unsafe "string.h memset" c_memset
-    :: Ptr Word8 -> CInt -> CSize -> IO (Ptr Word8)
-
-memset :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8)
-memset p w = c_memset p (fromIntegral w)
-
--- ---------------------------------------------------------------------
---
--- Uses our C code
---
-
-foreign import ccall unsafe "static fpstring.h fps_reverse" c_reverse
-    :: Ptr Word8 -> Ptr Word8 -> CSize -> IO ()
-
-foreign import ccall unsafe "static fpstring.h fps_intersperse" c_intersperse
-    :: Ptr Word8 -> Ptr Word8 -> CSize -> Word8 -> IO ()
-
-foreign import ccall unsafe "static fpstring.h fps_maximum" c_maximum
-    :: Ptr Word8 -> CSize -> IO Word8
-
-foreign import ccall unsafe "static fpstring.h fps_minimum" c_minimum
-    :: Ptr Word8 -> CSize -> IO Word8
-
-foreign import ccall unsafe "static fpstring.h fps_count" c_count
-    :: Ptr Word8 -> CSize -> Word8 -> IO CSize
-
-foreign import ccall unsafe "static fpstring.h fps_sort" c_sort
-    :: Ptr Word8 -> CSize -> IO ()
+import Data.ByteString.Internal.Type
diff --git a/Data/ByteString/Internal/Type.hs b/Data/ByteString/Internal/Type.hs
new file mode 100644
index 000000000..0f69096ff
--- /dev/null
+++ b/Data/ByteString/Internal/Type.hs
@@ -0,0 +1,1065 @@
+{-# LANGUAGE CPP, ForeignFunctionInterface, BangPatterns #-}
+{-# LANGUAGE UnliftedFFITypes, MagicHash,
+            UnboxedTuples, DeriveDataTypeable #-}
+{-# LANGUAGE TupleSections #-}
+{-# LANGUAGE TypeFamilies #-}
+{-# LANGUAGE PatternSynonyms, ViewPatterns #-}
+{-# LANGUAGE Unsafe #-}
+{-# LANGUAGE TemplateHaskellQuotes #-}
+{-# OPTIONS_HADDOCK not-home #-}
+
+-- |
+-- Module      : Data.ByteString.Internal.Type
+-- Copyright   : (c) Don Stewart 2006-2008
+--               (c) Duncan Coutts 2006-2012
+-- License     : BSD-style
+-- Maintainer  : dons00@gmail.com, duncan@community.haskell.org
+-- Stability   : unstable
+-- Portability : non-portable
+--
+-- The 'ByteString' type, its instances, and whatever related
+-- utilities the bytestring developers see fit to use internally.
+--
+module Data.ByteString.Internal.Type (
+
+        -- * The @ByteString@ type and representation
+        ByteString
+        ( BS
+        , PS -- backwards compatibility shim
+        ),
+
+        StrictByteString,
+
+        -- * Internal indexing
+        findIndexOrLength,
+
+        -- * Conversion with lists: packing and unpacking
+        packBytes, packUptoLenBytes, unsafePackLenBytes,
+        packChars, packUptoLenChars, unsafePackLenChars,
+        unpackBytes, unpackAppendBytesLazy, unpackAppendBytesStrict,
+        unpackChars, unpackAppendCharsLazy, unpackAppendCharsStrict,
+        unsafePackAddress, unsafePackLenAddress,
+        unsafePackLiteral, unsafePackLenLiteral,
+
+        -- * Low level imperative construction
+        empty,
+        createFp,
+        createFpUptoN,
+        createFpUptoN',
+        createFpAndTrim,
+        createFpAndTrim',
+        unsafeCreateFp,
+        unsafeCreateFpUptoN,
+        unsafeCreateFpUptoN',
+        create,
+        createUptoN,
+        createUptoN',
+        createAndTrim,
+        createAndTrim',
+        unsafeCreate,
+        unsafeCreateUptoN,
+        unsafeCreateUptoN',
+        mallocByteString,
+
+        -- * Conversion to and from ForeignPtrs
+        fromForeignPtr,
+        toForeignPtr,
+        fromForeignPtr0,
+        toForeignPtr0,
+
+        -- * Utilities
+        nullForeignPtr,
+        peekFp,
+        pokeFp,
+        peekFpByteOff,
+        pokeFpByteOff,
+        minusForeignPtr,
+        memcpyFp,
+        SizeOverflowException,
+        overflowError,
+        checkedAdd,
+        checkedMultiply,
+
+        -- * Standard C Functions
+        c_strlen,
+        c_free_finalizer,
+
+        memchr,
+        memcmp,
+        memcpy,
+        memset,
+
+        -- * cbits functions
+        c_reverse,
+        c_intersperse,
+        c_maximum,
+        c_minimum,
+        c_count,
+        c_sort,
+
+        -- * Chars
+        w2c, c2w, isSpaceWord8, isSpaceChar8,
+
+        -- * Deprecated and unmentionable
+        accursedUnutterablePerformIO,
+
+        -- * Exported compatibility shim
+        plusForeignPtr,
+        unsafeWithForeignPtr
+  ) where
+
+import Prelude hiding (concat, null)
+import qualified Data.List as List
+
+import Control.Monad            (void)
+
+import Foreign.ForeignPtr       (ForeignPtr, withForeignPtr)
+import Foreign.Ptr              (Ptr, FunPtr, plusPtr)
+import Foreign.Storable         (Storable(..))
+import Foreign.C.Types          (CInt(..), CSize(..))
+import Foreign.C.String         (CString)
+
+#if !MIN_VERSION_base(4,13,0)
+import Data.Semigroup           (Semigroup ((<>)))
+#endif
+import Data.Semigroup           (Semigroup (sconcat, stimes))
+import Data.List.NonEmpty       (NonEmpty ((:|)))
+
+import Control.DeepSeq          (NFData(rnf))
+
+import Data.String              (IsString(..))
+
+import Control.Exception        (assert, throw, Exception)
+
+import Data.Bits                ((.&.))
+import Data.Char                (ord)
+import Data.Word
+
+import Data.Typeable            (Typeable)
+import Data.Data                (Data(..), mkNoRepType)
+
+import GHC.Base                 (nullAddr#,realWorld#,unsafeChr)
+import GHC.Exts                 (IsList(..))
+import GHC.CString              (unpackCString#)
+import GHC.Exts                 (Addr#, minusAddr#)
+
+#define TIMES_INT_2_AVAILABLE MIN_VERSION_ghc_prim(0,7,0)
+#if TIMES_INT_2_AVAILABLE
+import GHC.Prim                (timesInt2#)
+#else
+import GHC.Prim                ( timesWord2#
+                               , or#
+                               , uncheckedShiftRL#
+                               , int2Word#
+                               , word2Int#
+                               )
+import Data.Bits               (finiteBitSize)
+#endif
+
+import GHC.IO                   (IO(IO),unsafeDupablePerformIO)
+import GHC.ForeignPtr           (ForeignPtr(ForeignPtr)
+#if __GLASGOW_HASKELL__ < 900
+                                , newForeignPtr_
+#endif
+                                , mallocPlainForeignPtrBytes)
+
+#if MIN_VERSION_base(4,10,0)
+import GHC.ForeignPtr           (plusForeignPtr)
+#else
+import GHC.Prim                 (plusAddr#)
+#endif
+
+#if __GLASGOW_HASKELL__ >= 811
+import GHC.CString              (cstringLength#)
+import GHC.ForeignPtr           (ForeignPtrContents(FinalPtr))
+#else
+import GHC.Ptr                  (Ptr(..))
+#endif
+
+import GHC.Types                (Int (..))
+
+#if MIN_VERSION_base(4,15,0)
+import GHC.ForeignPtr           (unsafeWithForeignPtr)
+#endif
+
+import qualified Language.Haskell.TH.Lib as TH
+import qualified Language.Haskell.TH.Syntax as TH
+
+#if !MIN_VERSION_base(4,15,0)
+unsafeWithForeignPtr :: ForeignPtr a -> (Ptr a -> IO b) -> IO b
+unsafeWithForeignPtr = withForeignPtr
+#endif
+
+-- CFILES stuff is Hugs only
+{-# CFILES cbits/fpstring.c #-}
+
+#if !MIN_VERSION_base(4,10,0)
+-- |Advances the given address by the given offset in bytes.
+--
+-- The new 'ForeignPtr' shares the finalizer of the original,
+-- equivalent from a finalization standpoint to just creating another
+-- reference to the original. That is, the finalizer will not be
+-- called before the new 'ForeignPtr' is unreachable, nor will it be
+-- called an additional time due to this call, and the finalizer will
+-- be called with the same address that it would have had this call
+-- not happened, *not* the new address.
+plusForeignPtr :: ForeignPtr a -> Int -> ForeignPtr b
+plusForeignPtr (ForeignPtr addr guts) (I# offset) = ForeignPtr (plusAddr# addr offset) guts
+{-# INLINE [0] plusForeignPtr #-}
+{-# RULES
+"ByteString plusForeignPtr/0" forall fp .
+   plusForeignPtr fp 0 = fp
+ #-}
+#endif
+
+minusForeignPtr :: ForeignPtr a -> ForeignPtr b -> Int
+minusForeignPtr (ForeignPtr addr1 _) (ForeignPtr addr2 _)
+  = I# (minusAddr# addr1 addr2)
+
+peekFp :: Storable a => ForeignPtr a -> IO a
+peekFp fp = unsafeWithForeignPtr fp peek
+
+pokeFp :: Storable a => ForeignPtr a -> a -> IO ()
+pokeFp fp val = unsafeWithForeignPtr fp $ \p -> poke p val
+
+peekFpByteOff :: Storable a => ForeignPtr a -> Int -> IO a
+peekFpByteOff fp off = unsafeWithForeignPtr fp $ \p ->
+  peekByteOff p off
+
+pokeFpByteOff :: Storable a => ForeignPtr b -> Int -> a -> IO ()
+pokeFpByteOff fp off val = unsafeWithForeignPtr fp $ \p ->
+  pokeByteOff p off val
+
+-- -----------------------------------------------------------------------------
+
+-- | A space-efficient representation of a 'Word8' vector, supporting many
+-- efficient operations.
+--
+-- A 'ByteString' contains 8-bit bytes, or by using the operations from
+-- "Data.ByteString.Char8" it can be interpreted as containing 8-bit
+-- characters.
+--
+data ByteString = BS {-# UNPACK #-} !(ForeignPtr Word8) -- payload
+                     {-# UNPACK #-} !Int                -- length
+                     -- ^ @since 0.11.0.0
+    deriving (Typeable)
+
+-- | Type synonym for the strict flavour of 'ByteString'.
+--
+-- @since 0.11.2.0
+type StrictByteString = ByteString
+
+-- |
+-- @'PS' foreignPtr offset length@ represents a 'ByteString' with data
+-- backed by a given @foreignPtr@, starting at a given @offset@ in bytes
+-- and of a specified @length@.
+--
+-- This pattern is used to emulate the legacy 'ByteString' data
+-- constructor, so that pre-existing code generally doesn't need to
+-- change to benefit from the simplified 'BS' constructor and can
+-- continue to function unchanged.
+--
+-- /Note:/ Matching with this constructor will always be given a 0 offset,
+-- as the base will be manipulated by 'plusForeignPtr' instead.
+--
+pattern PS :: ForeignPtr Word8 -> Int -> Int -> ByteString
+pattern PS fp zero len <- BS fp ((0,) -> (zero, len)) where
+  PS fp o len = BS (plusForeignPtr fp o) len
+#if __GLASGOW_HASKELL__ >= 802
+{-# COMPLETE PS #-}
+#endif
+
+instance Eq  ByteString where
+    (==)    = eq
+
+instance Ord ByteString where
+    compare = compareBytes
+
+instance Semigroup ByteString where
+    (<>)    = append
+    sconcat (b:|bs) = concat (b:bs)
+    {-# INLINE stimes #-}
+    stimes  = stimesPolymorphic
+
+instance Monoid ByteString where
+    mempty  = empty
+    mappend = (<>)
+    mconcat = concat
+
+instance NFData ByteString where
+    rnf BS{} = ()
+
+instance Show ByteString where
+    showsPrec p ps r = showsPrec p (unpackChars ps) r
+
+instance Read ByteString where
+    readsPrec p str = [ (packChars x, y) | (x, y) <- readsPrec p str ]
+
+-- | @since 0.10.12.0
+instance IsList ByteString where
+  type Item ByteString = Word8
+  fromList = packBytes
+  toList   = unpackBytes
+
+-- | Beware: 'fromString' truncates multi-byte characters to octets.
+-- e.g. "枯朶に烏のとまりけり秋の暮" becomes �6k�nh~�Q��n�
+instance IsString ByteString where
+    {-# INLINE fromString #-}
+    fromString = packChars
+
+instance Data ByteString where
+  gfoldl f z txt = z packBytes `f` unpackBytes txt
+  toConstr _     = error "Data.ByteString.ByteString.toConstr"
+  gunfold _ _    = error "Data.ByteString.ByteString.gunfold"
+  dataTypeOf _   = mkNoRepType "Data.ByteString.ByteString"
+
+-- | @since 0.11.2.0
+instance TH.Lift ByteString where
+#if MIN_VERSION_template_haskell(2,16,0)
+  lift (BS ptr len) = [| unsafePackLenLiteral |]
+    `TH.appE` TH.litE (TH.integerL (fromIntegral len))
+    `TH.appE` TH.litE (TH.BytesPrimL $ TH.Bytes ptr 0 (fromIntegral len))
+#else
+  lift bs@(BS _ len) = [| unsafePackLenLiteral |]
+    `TH.appE` TH.litE (TH.integerL (fromIntegral len))
+    `TH.appE` TH.litE (TH.StringPrimL $ unpackBytes bs)
+#endif
+
+#if MIN_VERSION_template_haskell(2,17,0)
+  liftTyped = TH.unsafeCodeCoerce . TH.lift
+#elif MIN_VERSION_template_haskell(2,16,0)
+  liftTyped = TH.unsafeTExpCoerce . TH.lift
+#endif
+
+------------------------------------------------------------------------
+-- Internal indexing
+
+-- | 'findIndexOrLength' is a variant of findIndex, that returns the length
+-- of the string if no element is found, rather than Nothing.
+findIndexOrLength :: (Word8 -> Bool) -> ByteString -> Int
+findIndexOrLength k (BS x l) =
+    accursedUnutterablePerformIO $ g x
+  where
+    g ptr = go 0
+      where
+        go !n | n >= l    = return l
+              | otherwise = do w <- peekFp $ ptr `plusForeignPtr` n
+                               if k w
+                                 then return n
+                                 else go (n+1)
+{-# INLINE findIndexOrLength #-}
+
+------------------------------------------------------------------------
+-- Packing and unpacking from lists
+
+packBytes :: [Word8] -> ByteString
+packBytes ws = unsafePackLenBytes (List.length ws) ws
+
+packChars :: [Char] -> ByteString
+packChars cs = unsafePackLenChars (List.length cs) cs
+
+{-# INLINE [0] packChars #-}
+
+{-# RULES
+"ByteString packChars/packAddress" forall s .
+   packChars (unpackCString# s) = unsafePackLiteral s
+ #-}
+
+unsafePackLenBytes :: Int -> [Word8] -> ByteString
+unsafePackLenBytes len xs0 =
+    unsafeCreateFp len $ \p -> go p xs0
+  where
+    go !_ []     = return ()
+    go !p (x:xs) = pokeFp p x >> go (p `plusForeignPtr` 1) xs
+
+unsafePackLenChars :: Int -> [Char] -> ByteString
+unsafePackLenChars len cs0 =
+    unsafeCreateFp len $ \p -> go p cs0
+  where
+    go !_ []     = return ()
+    go !p (c:cs) = pokeFp p (c2w c) >> go (p `plusForeignPtr` 1) cs
+
+
+-- | /O(n)/ Pack a null-terminated sequence of bytes, pointed to by an
+-- Addr\# (an arbitrary machine address assumed to point outside the
+-- garbage-collected heap) into a @ByteString@. A much faster way to
+-- create an 'Addr#' is with an unboxed string literal, than to pack a
+-- boxed string. A unboxed string literal is compiled to a static @char
+-- []@ by GHC. Establishing the length of the string requires a call to
+-- @strlen(3)@, so the 'Addr#' must point to a null-terminated buffer (as
+-- is the case with @\"string\"\#@ literals in GHC). Use 'Data.ByteString.Unsafe.unsafePackAddressLen'
+-- if you know the length of the string statically.
+--
+-- An example:
+--
+-- > literalFS = unsafePackAddress "literal"#
+--
+-- This function is /unsafe/. If you modify the buffer pointed to by the
+-- original 'Addr#' this modification will be reflected in the resulting
+-- @ByteString@, breaking referential transparency.
+--
+-- Note this also won't work if your 'Addr#' has embedded @\'\\0\'@ characters in
+-- the string, as @strlen@ will return too short a length.
+--
+unsafePackAddress :: Addr# -> IO ByteString
+unsafePackAddress addr# = do
+#if __GLASGOW_HASKELL__ >= 811
+    unsafePackLenAddress (I# (cstringLength# addr#)) addr#
+#else
+    l <- c_strlen (Ptr addr#)
+    unsafePackLenAddress (fromIntegral l) addr#
+#endif
+{-# INLINE unsafePackAddress #-}
+
+-- | See 'unsafePackAddress'. This function is similar,
+-- but takes an additional length argument rather then computing
+-- it with @strlen@.
+-- Therefore embedding @\'\\0\'@ characters is possible.
+--
+-- @since 0.11.2.0
+unsafePackLenAddress :: Int -> Addr# -> IO ByteString
+unsafePackLenAddress len addr# = do
+#if __GLASGOW_HASKELL__ >= 811
+    return (BS (ForeignPtr addr# FinalPtr) len)
+#else
+    p <- newForeignPtr_ (Ptr addr#)
+    return $ BS p len
+#endif
+{-# INLINE unsafePackLenAddress #-}
+
+-- | See 'unsafePackAddress'. This function has similar behavior. Prefer
+-- this function when the address in known to be an @Addr#@ literal. In
+-- that context, there is no need for the sequencing guarantees that 'IO'
+-- provides. On GHC 9.0 and up, this function uses the @FinalPtr@ data
+-- constructor for @ForeignPtrContents@.
+--
+-- @since 0.11.1.0
+unsafePackLiteral :: Addr# -> ByteString
+unsafePackLiteral addr# =
+#if __GLASGOW_HASKELL__ >= 811
+  unsafePackLenLiteral (I# (cstringLength# addr#)) addr#
+#else
+  let len = accursedUnutterablePerformIO (c_strlen (Ptr addr#))
+   in unsafePackLenLiteral (fromIntegral len) addr#
+#endif
+{-# INLINE unsafePackLiteral #-}
+
+
+-- | See 'unsafePackLiteral'. This function is similar,
+-- but takes an additional length argument rather then computing
+-- it with @strlen@.
+-- Therefore embedding @\'\\0\'@ characters is possible.
+--
+-- @since 0.11.2.0
+unsafePackLenLiteral :: Int -> Addr# -> ByteString
+unsafePackLenLiteral len addr# =
+#if __GLASGOW_HASKELL__ >= 811
+  BS (ForeignPtr addr# FinalPtr) len
+#else
+  -- newForeignPtr_ allocates a MutVar# internally. If that MutVar#
+  -- gets commoned up with the MutVar# of some unrelated ForeignPtr,
+  -- it may prevent automatic finalization for that other ForeignPtr.
+  -- So we avoid accursedUnutterablePerformIO here.
+  BS (unsafeDupablePerformIO (newForeignPtr_ (Ptr addr#))) len
+#endif
+{-# INLINE unsafePackLenLiteral #-}
+
+packUptoLenBytes :: Int -> [Word8] -> (ByteString, [Word8])
+packUptoLenBytes len xs0 =
+    unsafeCreateFpUptoN' len $ \p0 ->
+      let p_end = plusForeignPtr p0 len
+          go !p []              = return (p `minusForeignPtr` p0, [])
+          go !p xs | p == p_end = return (len, xs)
+          go !p (x:xs)          = pokeFp p x >> go (p `plusForeignPtr` 1) xs
+      in go p0 xs0
+
+packUptoLenChars :: Int -> [Char] -> (ByteString, [Char])
+packUptoLenChars len cs0 =
+    unsafeCreateFpUptoN' len $ \p0 ->
+      let p_end = plusForeignPtr p0 len
+          go !p []              = return (p `minusForeignPtr` p0, [])
+          go !p cs | p == p_end = return (len, cs)
+          go !p (c:cs)          = pokeFp p (c2w c) >> go (p `plusForeignPtr` 1) cs
+      in go p0 cs0
+
+-- Unpacking bytestrings into lists efficiently is a tradeoff: on the one hand
+-- we would like to write a tight loop that just blasts the list into memory, on
+-- the other hand we want it to be unpacked lazily so we don't end up with a
+-- massive list data structure in memory.
+--
+-- Our strategy is to combine both: we will unpack lazily in reasonable sized
+-- chunks, where each chunk is unpacked strictly.
+--
+-- unpackBytes and unpackChars do the lazy loop, while unpackAppendBytes and
+-- unpackAppendChars do the chunks strictly.
+
+unpackBytes :: ByteString -> [Word8]
+unpackBytes bs = unpackAppendBytesLazy bs []
+
+unpackChars :: ByteString -> [Char]
+unpackChars bs = unpackAppendCharsLazy bs []
+
+unpackAppendBytesLazy :: ByteString -> [Word8] -> [Word8]
+unpackAppendBytesLazy (BS fp len) xs
+  | len <= 100 = unpackAppendBytesStrict (BS fp len) xs
+  | otherwise  = unpackAppendBytesStrict (BS fp 100) remainder
+  where
+    remainder  = unpackAppendBytesLazy (BS (plusForeignPtr fp 100) (len-100)) xs
+
+  -- Why 100 bytes you ask? Because on a 64bit machine the list we allocate
+  -- takes just shy of 4k which seems like a reasonable amount.
+  -- (5 words per list element, 8 bytes per word, 100 elements = 4000 bytes)
+
+unpackAppendCharsLazy :: ByteString -> [Char] -> [Char]
+unpackAppendCharsLazy (BS fp len) cs
+  | len <= 100 = unpackAppendCharsStrict (BS fp len) cs
+  | otherwise  = unpackAppendCharsStrict (BS fp 100) remainder
+  where
+    remainder  = unpackAppendCharsLazy (BS (plusForeignPtr fp 100) (len-100)) cs
+
+-- For these unpack functions, since we're unpacking the whole list strictly we
+-- build up the result list in an accumulator. This means we have to build up
+-- the list starting at the end. So our traversal starts at the end of the
+-- buffer and loops down until we hit the sentinal:
+
+unpackAppendBytesStrict :: ByteString -> [Word8] -> [Word8]
+unpackAppendBytesStrict (BS fp len) xs =
+    accursedUnutterablePerformIO $ unsafeWithForeignPtr fp $ \base ->
+      loop (base `plusPtr` (-1)) (base `plusPtr` (-1+len)) xs
+  where
+    loop !sentinal !p acc
+      | p == sentinal = return acc
+      | otherwise     = do x <- peek p
+                           loop sentinal (p `plusPtr` (-1)) (x:acc)
+
+unpackAppendCharsStrict :: ByteString -> [Char] -> [Char]
+unpackAppendCharsStrict (BS fp len) xs =
+    accursedUnutterablePerformIO $ unsafeWithForeignPtr fp $ \base ->
+      loop (base `plusPtr` (-1)) (base `plusPtr` (-1+len)) xs
+  where
+    loop !sentinal !p acc
+      | p == sentinal = return acc
+      | otherwise     = do x <- peek p
+                           loop sentinal (p `plusPtr` (-1)) (w2c x:acc)
+
+------------------------------------------------------------------------
+
+-- | The 0 pointer. Used to indicate the empty Bytestring.
+nullForeignPtr :: ForeignPtr Word8
+#if __GLASGOW_HASKELL__ >= 811
+nullForeignPtr = ForeignPtr nullAddr# FinalPtr
+#else
+nullForeignPtr = ForeignPtr nullAddr# (error "nullForeignPtr")
+#endif
+
+-- ---------------------------------------------------------------------
+-- Low level constructors
+
+-- | /O(1)/ Build a ByteString from a ForeignPtr.
+--
+-- If you do not need the offset parameter then you should be using
+-- 'Data.ByteString.Unsafe.unsafePackCStringLen' or
+-- 'Data.ByteString.Unsafe.unsafePackCStringFinalizer' instead.
+--
+fromForeignPtr :: ForeignPtr Word8
+               -> Int -- ^ Offset
+               -> Int -- ^ Length
+               -> ByteString
+fromForeignPtr fp o = BS (plusForeignPtr fp o)
+{-# INLINE fromForeignPtr #-}
+
+-- | @since 0.11.0.0
+fromForeignPtr0 :: ForeignPtr Word8
+               -> Int -- ^ Length
+               -> ByteString
+fromForeignPtr0 = BS
+{-# INLINE fromForeignPtr0 #-}
+
+-- | /O(1)/ Deconstruct a ForeignPtr from a ByteString
+toForeignPtr :: ByteString -> (ForeignPtr Word8, Int, Int) -- ^ (ptr, offset, length)
+toForeignPtr (BS ps l) = (ps, 0, l)
+{-# INLINE toForeignPtr #-}
+
+-- | /O(1)/ Deconstruct a ForeignPtr from a ByteString
+--
+-- @since 0.11.0.0
+toForeignPtr0 :: ByteString -> (ForeignPtr Word8, Int) -- ^ (ptr, length)
+toForeignPtr0 (BS ps l) = (ps, l)
+{-# INLINE toForeignPtr0 #-}
+
+-- | A way of creating ByteStrings outside the IO monad. The @Int@
+-- argument gives the final size of the ByteString.
+unsafeCreateFp :: Int -> (ForeignPtr Word8 -> IO ()) -> ByteString
+unsafeCreateFp l f = unsafeDupablePerformIO (createFp l f)
+{-# INLINE unsafeCreateFp #-}
+
+-- | Like 'unsafeCreateFp' but instead of giving the final size of the
+-- ByteString, it is just an upper bound. The inner action returns
+-- the actual size. Unlike 'createFpAndTrim' the ByteString is not
+-- reallocated if the final size is less than the estimated size.
+unsafeCreateFpUptoN :: Int -> (ForeignPtr Word8 -> IO Int) -> ByteString
+unsafeCreateFpUptoN l f = unsafeDupablePerformIO (createFpUptoN l f)
+{-# INLINE unsafeCreateFpUptoN #-}
+
+unsafeCreateFpUptoN'
+  :: Int -> (ForeignPtr Word8 -> IO (Int, a)) -> (ByteString, a)
+unsafeCreateFpUptoN' l f = unsafeDupablePerformIO (createFpUptoN' l f)
+{-# INLINE unsafeCreateFpUptoN' #-}
+
+-- | Create ByteString of size @l@ and use action @f@ to fill its contents.
+createFp :: Int -> (ForeignPtr Word8 -> IO ()) -> IO ByteString
+createFp l action = do
+    fp <- mallocByteString l
+    action fp
+    return $! BS fp l
+{-# INLINE createFp #-}
+
+-- | Given a maximum size @l@ and an action @f@ that fills the 'ByteString'
+-- starting at the given 'Ptr' and returns the actual utilized length,
+-- @`createFpUptoN'` l f@ returns the filled 'ByteString'.
+createFpUptoN :: Int -> (ForeignPtr Word8 -> IO Int) -> IO ByteString
+createFpUptoN l action = do
+    fp <- mallocByteString l
+    l' <- action fp
+    assert (l' <= l) $ return $! BS fp l'
+{-# INLINE createFpUptoN #-}
+
+-- | Like 'createFpUptoN', but also returns an additional value created by the
+-- action.
+createFpUptoN' :: Int -> (ForeignPtr Word8 -> IO (Int, a)) -> IO (ByteString, a)
+createFpUptoN' l action = do
+    fp <- mallocByteString l
+    (l', res) <- action fp
+    assert (l' <= l) $ return (BS fp l', res)
+{-# INLINE createFpUptoN' #-}
+
+-- | Given the maximum size needed and a function to make the contents
+-- of a ByteString, createFpAndTrim makes the 'ByteString'. The generating
+-- function is required to return the actual final size (<= the maximum
+-- size), and the resulting byte array is reallocated to this size.
+--
+-- createFpAndTrim is the main mechanism for creating custom, efficient
+-- ByteString functions, using Haskell or C functions to fill the space.
+--
+createFpAndTrim :: Int -> (ForeignPtr Word8 -> IO Int) -> IO ByteString
+createFpAndTrim l action = do
+    fp <- mallocByteString l
+    l' <- action fp
+    if assert (0 <= l' && l' <= l) $ l' >= l
+        then return $! BS fp l
+        else createFp l' $ \fp' -> memcpyFp fp' fp l'
+{-# INLINE createFpAndTrim #-}
+
+createFpAndTrim' :: Int -> (ForeignPtr Word8 -> IO (Int, Int, a)) -> IO (ByteString, a)
+createFpAndTrim' l action = do
+    fp <- mallocByteString l
+    (off, l', res) <- action fp
+    if assert (0 <= l' && l' <= l) $ l' >= l
+        then return (BS fp l, res)
+        else do ps <- createFp l' $ \fp' ->
+                        memcpyFp fp' (fp `plusForeignPtr` off) l'
+                return (ps, res)
+{-# INLINE createFpAndTrim' #-}
+
+
+wrapAction :: (Ptr Word8 -> IO res) -> ForeignPtr Word8 -> IO res
+wrapAction = flip withForeignPtr
+  -- Cannot use unsafeWithForeignPtr, because action can diverge
+
+-- | A way of creating ByteStrings outside the IO monad. The @Int@
+-- argument gives the final size of the ByteString.
+unsafeCreate :: Int -> (Ptr Word8 -> IO ()) -> ByteString
+unsafeCreate l f = unsafeCreateFp l (wrapAction f)
+{-# INLINE unsafeCreate #-}
+
+-- | Like 'unsafeCreate' but instead of giving the final size of the
+-- ByteString, it is just an upper bound. The inner action returns
+-- the actual size. Unlike 'createAndTrim' the ByteString is not
+-- reallocated if the final size is less than the estimated size.
+unsafeCreateUptoN :: Int -> (Ptr Word8 -> IO Int) -> ByteString
+unsafeCreateUptoN l f = unsafeCreateFpUptoN l (wrapAction f)
+{-# INLINE unsafeCreateUptoN #-}
+
+-- | @since 0.10.12.0
+unsafeCreateUptoN' :: Int -> (Ptr Word8 -> IO (Int, a)) -> (ByteString, a)
+unsafeCreateUptoN' l f = unsafeCreateFpUptoN' l (wrapAction f)
+{-# INLINE unsafeCreateUptoN' #-}
+
+-- | Create ByteString of size @l@ and use action @f@ to fill its contents.
+create :: Int -> (Ptr Word8 -> IO ()) -> IO ByteString
+create l action = createFp l (wrapAction action)
+{-# INLINE create #-}
+
+-- | Given a maximum size @l@ and an action @f@ that fills the 'ByteString'
+-- starting at the given 'Ptr' and returns the actual utilized length,
+-- @`createUptoN'` l f@ returns the filled 'ByteString'.
+createUptoN :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
+createUptoN l action = createFpUptoN l (wrapAction action)
+{-# INLINE createUptoN #-}
+
+-- | Like 'createUptoN', but also returns an additional value created by the
+-- action.
+--
+-- @since 0.10.12.0
+createUptoN' :: Int -> (Ptr Word8 -> IO (Int, a)) -> IO (ByteString, a)
+createUptoN' l action = createFpUptoN' l (wrapAction action)
+{-# INLINE createUptoN' #-}
+
+-- | Given the maximum size needed and a function to make the contents
+-- of a ByteString, createAndTrim makes the 'ByteString'. The generating
+-- function is required to return the actual final size (<= the maximum
+-- size), and the resulting byte array is reallocated to this size.
+--
+-- createAndTrim is the main mechanism for creating custom, efficient
+-- ByteString functions, using Haskell or C functions to fill the space.
+--
+createAndTrim :: Int -> (Ptr Word8 -> IO Int) -> IO ByteString
+createAndTrim l action = createFpAndTrim l (wrapAction action)
+{-# INLINE createAndTrim #-}
+
+createAndTrim' :: Int -> (Ptr Word8 -> IO (Int, Int, a)) -> IO (ByteString, a)
+createAndTrim' l action = createFpAndTrim' l (wrapAction action)
+{-# INLINE createAndTrim' #-}
+
+
+-- | Wrapper of 'Foreign.ForeignPtr.mallocForeignPtrBytes' with faster implementation for GHC
+--
+mallocByteString :: Int -> IO (ForeignPtr a)
+mallocByteString = mallocPlainForeignPtrBytes
+{-# INLINE mallocByteString #-}
+
+------------------------------------------------------------------------
+-- Implementations for Eq, Ord and Monoid instances
+
+eq :: ByteString -> ByteString -> Bool
+eq a@(BS fp len) b@(BS fp' len')
+  | len /= len' = False    -- short cut on length
+  | fp == fp'   = True     -- short cut for the same string
+  | otherwise   = compareBytes a b == EQ
+{-# INLINE eq #-}
+-- ^ still needed
+
+compareBytes :: ByteString -> ByteString -> Ordering
+compareBytes (BS _   0)    (BS _   0)    = EQ  -- short cut for empty strings
+compareBytes (BS fp1 len1) (BS fp2 len2) =
+    accursedUnutterablePerformIO $
+      unsafeWithForeignPtr fp1 $ \p1 ->
+      unsafeWithForeignPtr fp2 $ \p2 -> do
+        i <- memcmp p1 p2 (min len1 len2)
+        return $! case i `compare` 0 of
+                    EQ  -> len1 `compare` len2
+                    x   -> x
+
+
+-- | /O(1)/ The empty 'ByteString'
+empty :: ByteString
+-- This enables bypassing #457 by not using (polymorphic) mempty in
+-- any definitions used by the (Monoid ByteString) instance
+empty = BS nullForeignPtr 0
+
+append :: ByteString -> ByteString -> ByteString
+append (BS _   0)    b                  = b
+append a             (BS _   0)    = a
+append (BS fp1 len1) (BS fp2 len2) =
+    unsafeCreateFp (checkedAdd "append" len1 len2) $ \destptr1 -> do
+      let destptr2 = destptr1 `plusForeignPtr` len1
+      memcpyFp destptr1 fp1 len1
+      memcpyFp destptr2 fp2 len2
+
+concat :: [ByteString] -> ByteString
+concat = \bss0 -> goLen0 bss0 bss0
+    -- The idea here is we first do a pass over the input list to determine:
+    --
+    --  1. is a copy necessary? e.g. @concat []@, @concat [mempty, "hello"]@,
+    --     and @concat ["hello", mempty, mempty]@ can all be handled without
+    --     copying.
+    --  2. if a copy is necessary, how large is the result going to be?
+    --
+    -- If a copy is necessary then we create a buffer of the appropriate size
+    -- and do another pass over the input list, copying the chunks into the
+    -- buffer. Also, since foreign calls aren't entirely free we skip over
+    -- empty chunks while copying.
+    --
+    -- We pass the original [ByteString] (bss0) through as an argument through
+    -- goLen0, goLen1, and goLen since we will need it again in goCopy. Passing
+    -- it as an explicit argument avoids capturing it in these functions'
+    -- closures which would result in unnecessary closure allocation.
+  where
+    -- It's still possible that the result is empty
+    goLen0 _    []                     = empty
+    goLen0 bss0 (BS _ 0     :bss)    = goLen0 bss0 bss
+    goLen0 bss0 (bs           :bss)    = goLen1 bss0 bs bss
+
+    -- It's still possible that the result is a single chunk
+    goLen1 _    bs []                  = bs
+    goLen1 bss0 bs (BS _ 0  :bss)    = goLen1 bss0 bs bss
+    goLen1 bss0 bs (BS _ len:bss)    = goLen bss0 (checkedAdd "concat" len' len) bss
+      where BS _ len' = bs
+
+    -- General case, just find the total length we'll need
+    goLen bss0 !total (BS _ len:bss) = goLen bss0 total' bss
+      where total' = checkedAdd "concat" total len
+    goLen bss0 total [] =
+      unsafeCreateFp total $ \ptr -> goCopy bss0 ptr
+
+    -- Copy the data
+    goCopy []                  !_   = return ()
+    goCopy (BS _  0  :bss) !ptr = goCopy bss ptr
+    goCopy (BS fp len:bss) !ptr = do
+      memcpyFp ptr fp len
+      goCopy bss (ptr `plusForeignPtr` len)
+{-# NOINLINE concat #-}
+
+{-# RULES
+"ByteString concat [] -> empty"
+   concat [] = empty
+"ByteString concat [bs] -> bs" forall x.
+   concat [x] = x
+ #-}
+
+-- | Repeats the given ByteString n times.
+-- Polymorphic wrapper to make sure any generated
+-- specializations are reasonably small.
+stimesPolymorphic :: Integral a => a -> ByteString -> ByteString
+{-# INLINABLE stimesPolymorphic #-}
+stimesPolymorphic nRaw = \ !bs -> case checkedIntegerToInt n of
+  Just nInt
+    | nInt >= 0  -> stimesNonNegativeInt nInt bs
+    | otherwise  -> stimesNegativeErr
+  Nothing
+    | n < 0  -> stimesNegativeErr
+    | BS _ 0 <- bs  -> empty
+    | otherwise     -> stimesOverflowErr
+  where  n = toInteger nRaw
+  -- By exclusively using n instead of nRaw, the semantics are kept simple
+  -- and the likelihood of potentially dangerous mistakes minimized.
+
+
+stimesNegativeErr :: ByteString
+stimesNegativeErr
+  = error "stimes @ByteString: non-negative multiplier expected"
+
+stimesOverflowErr :: ByteString
+-- Although this only appears once, it is extracted here to prevent it
+-- from being duplicated in specializations of 'stimesPolymorphic'
+stimesOverflowErr = overflowError "stimes"
+
+-- | Repeats the given ByteString n times.
+stimesNonNegativeInt :: Int -> ByteString -> ByteString
+stimesNonNegativeInt n (BS fp len)
+  | n == 0 = empty
+  | n == 1 = BS fp len
+  | len == 0 = empty
+  | len == 1 = unsafeCreateFp n $ \destfptr -> do
+      byte <- peekFp fp
+      void $ unsafeWithForeignPtr destfptr $ \destptr ->
+        memset destptr byte (fromIntegral n)
+  | otherwise = unsafeCreateFp size $ \destptr -> do
+      memcpyFp destptr fp len
+      fillFrom destptr len
+  where
+    size = checkedMultiply "stimes" n len
+    halfSize = (size - 1) `div` 2 -- subtraction and division won't overflow
+
+    fillFrom :: ForeignPtr Word8 -> Int -> IO ()
+    fillFrom destptr copied
+      | copied <= halfSize = do
+        memcpyFp (destptr `plusForeignPtr` copied) destptr copied
+        fillFrom destptr (copied * 2)
+      | otherwise = memcpyFp (destptr `plusForeignPtr` copied) destptr (size - copied)
+
+
+------------------------------------------------------------------------
+
+-- | Conversion between 'Word8' and 'Char'. Should compile to a no-op.
+w2c :: Word8 -> Char
+w2c = unsafeChr . fromIntegral
+{-# INLINE w2c #-}
+
+-- | Unsafe conversion between 'Char' and 'Word8'. This is a no-op and
+-- silently truncates to 8 bits Chars > '\255'. It is provided as
+-- convenience for ByteString construction.
+c2w :: Char -> Word8
+c2w = fromIntegral . ord
+{-# INLINE c2w #-}
+
+-- | Selects words corresponding to white-space characters in the Latin-1 range
+isSpaceWord8 :: Word8 -> Bool
+isSpaceWord8 w8 =
+    -- Avoid the cost of narrowing arithmetic results to Word8,
+    -- the conversion from Word8 to Word is free.
+    let w :: Word
+        !w = fromIntegral w8
+     in w .&. 0x50 == 0    -- Quick non-whitespace filter
+        && w - 0x21 > 0x7e -- Second non-whitespace filter
+        && ( w == 0x20     -- SP
+          || w == 0xa0     -- NBSP
+          || w - 0x09 < 5) -- HT, NL, VT, FF, CR
+{-# INLINE isSpaceWord8 #-}
+
+-- | Selects white-space characters in the Latin-1 range
+isSpaceChar8 :: Char -> Bool
+isSpaceChar8 = isSpaceWord8 . c2w
+{-# INLINE isSpaceChar8 #-}
+
+------------------------------------------------------------------------
+
+-- | The type of exception raised by 'overflowError'
+-- and on failure by overflow-checked arithmetic operations.
+newtype SizeOverflowException
+  = SizeOverflowException String
+
+instance Show SizeOverflowException where
+  show (SizeOverflowException err) = err
+
+instance Exception SizeOverflowException
+
+-- | Raises a 'SizeOverflowException',
+-- with a message using the given function name.
+overflowError :: String -> a
+overflowError fun = throw $ SizeOverflowException msg
+  where msg = "Data.ByteString." ++ fun ++ ": size overflow"
+
+-- | Add two non-negative numbers.
+-- Calls 'overflowError' on overflow.
+checkedAdd :: String -> Int -> Int -> Int
+{-# INLINE checkedAdd #-}
+checkedAdd fun x y
+  | r >= 0    = r
+  | otherwise = overflowError fun
+  where r = assert (min x y >= 0) $ x + y
+
+-- | Multiplies two non-negative numbers.
+-- Calls 'overflowError' on overflow.
+checkedMultiply :: String -> Int -> Int -> Int
+{-# INLINE checkedMultiply #-}
+checkedMultiply fun !x@(I# x#) !y@(I# y#) = assert (min x y >= 0) $
+#if TIMES_INT_2_AVAILABLE
+  case timesInt2# x# y# of
+    (# 0#, _, result #) -> I# result
+    _ -> overflowError fun
+#else
+  case timesWord2# (int2Word# x#) (int2Word# y#) of
+    (# hi, lo #) -> case or# hi (uncheckedShiftRL# lo shiftAmt) of
+      0## -> I# (word2Int# lo)
+      _   -> overflowError fun
+  where !(I# shiftAmt) = finiteBitSize (0 :: Word) - 1
+#endif
+
+
+-- | Attempts to convert an 'Integer' value to an 'Int', returning
+-- 'Nothing' if doing so would result in an overflow.
+checkedIntegerToInt :: Integer -> Maybe Int
+{-# INLINE checkedIntegerToInt #-}
+-- We could use Data.Bits.toIntegralSized, but this hand-rolled
+-- version is currently a bit faster as of GHC 9.2.
+-- It's even faster to just match on the Integer constructors, but
+-- we'd still need a fallback implementation for integer-simple.
+checkedIntegerToInt x
+  | x == toInteger res = Just res
+  | otherwise = Nothing
+  where  res = fromInteger x :: Int
+
+
+------------------------------------------------------------------------
+
+-- | This \"function\" has a superficial similarity to 'System.IO.Unsafe.unsafePerformIO' but
+-- it is in fact a malevolent agent of chaos. It unpicks the seams of reality
+-- (and the 'IO' monad) so that the normal rules no longer apply. It lulls you
+-- into thinking it is reasonable, but when you are not looking it stabs you
+-- in the back and aliases all of your mutable buffers. The carcass of many a
+-- seasoned Haskell programmer lie strewn at its feet.
+--
+-- Witness the trail of destruction:
+--
+-- * <https://github.com/haskell/bytestring/commit/71c4b438c675aa360c79d79acc9a491e7bbc26e7>
+--
+-- * <https://github.com/haskell/bytestring/commit/210c656390ae617d9ee3b8bcff5c88dd17cef8da>
+--
+-- * <https://ghc.haskell.org/trac/ghc/ticket/3486>
+--
+-- * <https://ghc.haskell.org/trac/ghc/ticket/3487>
+--
+-- * <https://ghc.haskell.org/trac/ghc/ticket/7270>
+--
+-- * <https://gitlab.haskell.org/ghc/ghc/-/issues/22204>
+--
+-- Do not talk about \"safe\"! You do not know what is safe!
+--
+-- Yield not to its blasphemous call! Flee traveller! Flee or you will be
+-- corrupted and devoured!
+--
+{-# INLINE accursedUnutterablePerformIO #-}
+accursedUnutterablePerformIO :: IO a -> a
+accursedUnutterablePerformIO (IO m) = case m realWorld# of (# _, r #) -> r
+
+-- ---------------------------------------------------------------------
+--
+-- Standard C functions
+--
+
+foreign import ccall unsafe "string.h strlen" c_strlen
+    :: CString -> IO CSize
+
+foreign import ccall unsafe "static stdlib.h &free" c_free_finalizer
+    :: FunPtr (Ptr Word8 -> IO ())
+
+foreign import ccall unsafe "string.h memchr" c_memchr
+    :: Ptr Word8 -> CInt -> CSize -> IO (Ptr Word8)
+
+memchr :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8)
+memchr p w = c_memchr p (fromIntegral w)
+
+foreign import ccall unsafe "string.h memcmp" c_memcmp
+    :: Ptr Word8 -> Ptr Word8 -> CSize -> IO CInt
+
+memcmp :: Ptr Word8 -> Ptr Word8 -> Int -> IO CInt
+memcmp p q s = c_memcmp p q (fromIntegral s)
+
+foreign import ccall unsafe "string.h memcpy" c_memcpy
+    :: Ptr Word8 -> Ptr Word8 -> CSize -> IO (Ptr Word8)
+
+memcpy :: Ptr Word8 -> Ptr Word8 -> Int -> IO ()
+memcpy p q s = void $ c_memcpy p q (fromIntegral s)
+
+memcpyFp :: ForeignPtr Word8 -> ForeignPtr Word8 -> Int -> IO ()
+memcpyFp fp fq s = unsafeWithForeignPtr fp $ \p ->
+                     unsafeWithForeignPtr fq $ \q -> memcpy p q s
+
+{-
+foreign import ccall unsafe "string.h memmove" c_memmove
+    :: Ptr Word8 -> Ptr Word8 -> CSize -> IO (Ptr Word8)
+
+memmove :: Ptr Word8 -> Ptr Word8 -> CSize -> IO ()
+memmove p q s = do c_memmove p q s
+                   return ()
+-}
+
+foreign import ccall unsafe "string.h memset" c_memset
+    :: Ptr Word8 -> CInt -> CSize -> IO (Ptr Word8)
+
+memset :: Ptr Word8 -> Word8 -> CSize -> IO (Ptr Word8)
+memset p w = c_memset p (fromIntegral w)
+
+-- ---------------------------------------------------------------------
+--
+-- Uses our C code
+--
+
+foreign import ccall unsafe "static fpstring.h fps_reverse" c_reverse
+    :: Ptr Word8 -> Ptr Word8 -> CSize -> IO ()
+
+foreign import ccall unsafe "static fpstring.h fps_intersperse" c_intersperse
+    :: Ptr Word8 -> Ptr Word8 -> CSize -> Word8 -> IO ()
+
+foreign import ccall unsafe "static fpstring.h fps_maximum" c_maximum
+    :: Ptr Word8 -> CSize -> IO Word8
+
+foreign import ccall unsafe "static fpstring.h fps_minimum" c_minimum
+    :: Ptr Word8 -> CSize -> IO Word8
+
+foreign import ccall unsafe "static fpstring.h fps_count" c_count
+    :: Ptr Word8 -> CSize -> Word8 -> IO CSize
+
+foreign import ccall unsafe "static fpstring.h fps_sort" c_sort
+    :: Ptr Word8 -> CSize -> IO ()
diff --git a/Data/ByteString/Lazy.hs b/Data/ByteString/Lazy.hs
index f046d61e5..d9ae76acc 100644
--- a/Data/ByteString/Lazy.hs
+++ b/Data/ByteString/Lazy.hs
@@ -231,7 +231,7 @@ import qualified Data.List              as List
 import qualified Data.Bifunctor         as BF
 import qualified Data.ByteString        as P  (ByteString) -- type name only
 import qualified Data.ByteString        as S  -- S for strict (hmm...)
-import qualified Data.ByteString.Internal as S
+import qualified Data.ByteString.Internal.Type as S
 import qualified Data.ByteString.Unsafe as S
 import qualified Data.ByteString.Lazy.Internal.Deque as D
 import Data.ByteString.Lazy.Internal
@@ -434,9 +434,11 @@ intersperse w (Chunk c cs) = Chunk (S.intersperse w c)
                                    (foldrChunks (Chunk . intersperse') Empty cs)
   where intersperse' :: P.ByteString -> P.ByteString
         intersperse' (S.BS fp l) =
-          S.unsafeCreate (2*l) $ \p' -> S.unsafeWithForeignPtr fp $ \p -> do
-            poke p' w
-            S.c_intersperse (p' `plusPtr` 1) p (fromIntegral l) w
+          S.unsafeCreateFp (2*l) $ \fp' ->
+            S.unsafeWithForeignPtr fp' $ \p' ->
+              S.unsafeWithForeignPtr fp $ \p -> do
+                poke p' w
+                S.c_intersperse (p' `plusPtr` 1) p (fromIntegral l) w
 
 -- | The 'transpose' function transposes the rows and columns of its
 -- 'ByteString' argument.
diff --git a/Data/ByteString/Lazy/Internal.hs b/Data/ByteString/Lazy/Internal.hs
index 63dc3f670..5962e5ae8 100644
--- a/Data/ByteString/Lazy/Internal.hs
+++ b/Data/ByteString/Lazy/Internal.hs
@@ -49,10 +49,9 @@ module Data.ByteString.Lazy.Internal (
 
 import Prelude hiding (concat)
 
-import qualified Data.ByteString.Internal as S
+import qualified Data.ByteString.Internal.Type as S
 
 import Data.Word (Word8)
-import Foreign.Ptr (plusPtr)
 import Foreign.Storable (Storable(sizeOf))
 
 #if MIN_VERSION_base(4,13,0)
@@ -313,14 +312,13 @@ toStrict = \cs -> goLen0 cs cs
     goLen cs0 !total (Chunk (S.BS _ cl) cs) =
       goLen cs0 (S.checkedAdd "Lazy.toStrict" total cl) cs
     goLen cs0 total Empty =
-      S.unsafeCreate total $ \ptr -> goCopy cs0 ptr
+      S.unsafeCreateFp total $ \ptr -> goCopy cs0 ptr
 
     -- Copy the data
     goCopy Empty                    !_   = return ()
     goCopy (Chunk (S.BS _  0  ) cs) !ptr = goCopy cs ptr
-    goCopy (Chunk (S.BS fp len) cs) !ptr =
-      S.unsafeWithForeignPtr fp $ \p -> do
-        S.memcpy ptr p len
-        goCopy cs (ptr `plusPtr` len)
+    goCopy (Chunk (S.BS fp len) cs) !ptr = do
+      S.memcpyFp ptr fp len
+      goCopy cs (ptr `S.plusForeignPtr` len)
 -- See the comment on Data.ByteString.Internal.concat for some background on
 -- this implementation.
diff --git a/Data/ByteString/Short/Internal.hs b/Data/ByteString/Short/Internal.hs
index 4813af803..7b50ebe33 100644
--- a/Data/ByteString/Short/Internal.hs
+++ b/Data/ByteString/Short/Internal.hs
@@ -407,6 +407,7 @@ indexError sbs i =
 -- | @since 0.11.2.0
 unsafePackLenLiteral :: Int -> Addr# -> ShortByteString
 unsafePackLenLiteral len addr# =
+    -- createFromPtr allocates, so accursedUnutterablePerformIO is wrong
     unsafeDupablePerformIO $ createFromPtr (Ptr addr#) len
 
 ------------------------------------------------------------------------
diff --git a/bytestring.cabal b/bytestring.cabal
index 9e3dea0a3..fd5656fc4 100644
--- a/bytestring.cabal
+++ b/bytestring.cabal
@@ -101,6 +101,7 @@ library
                      Data.ByteString.Builder.RealFloat.D2S
                      Data.ByteString.Builder.RealFloat.Internal
                      Data.ByteString.Builder.RealFloat.TableGenerator
+                     Data.ByteString.Internal.Type
                      Data.ByteString.Lazy.Internal.Deque
                      Data.ByteString.Lazy.ReadInt
                      Data.ByteString.Lazy.ReadNat