diff --git a/Data/ByteString/Builder/Internal.hs b/Data/ByteString/Builder/Internal.hs
index a05beead9..181808508 100644
--- a/Data/ByteString/Builder/Internal.hs
+++ b/Data/ByteString/Builder/Internal.hs
@@ -1,5 +1,5 @@
 {-# LANGUAGE ScopedTypeVariables, CPP, BangPatterns, RankNTypes, TupleSections #-}
-{-# LANGUAGE Unsafe #-}
+{-# LANGUAGE MagicHash, ViewPatterns, Unsafe #-}
 {-# OPTIONS_HADDOCK not-home #-}
 -- | Copyright : (c) 2010 - 2011 Simon Meier
 -- License     : BSD3-style (see LICENSE)
@@ -86,6 +86,8 @@ module Data.ByteString.Builder.Internal (
   , byteStringCopy
   , byteStringInsert
   , byteStringThreshold
+  , cstring
+  , cstringUtf8
 
   , lazyByteStringCopy
   , lazyByteStringInsert
@@ -127,6 +129,7 @@ module Data.ByteString.Builder.Internal (
 ) where
 
 import           Control.Arrow (second)
+import           Control.Monad (when)
 
 #if !(MIN_VERSION_base(4,11,0))
 import           Data.Semigroup (Semigroup((<>)))
@@ -140,10 +143,12 @@ import qualified Data.ByteString.Short.Internal as Sh
 import qualified GHC.IO.Buffer as IO (Buffer(..), newByteBuffer)
 import           GHC.IO.Handle.Internals (wantWritableHandle, flushWriteBuffer)
 import           GHC.IO.Handle.Types (Handle__, haByteBuffer, haBufferMode)
+import           GHC.Exts
 import           System.IO (hFlush, BufferMode(..), Handle)
 import           Data.IORef
 
 import           Foreign
+import           Foreign.C.String (CString)
 import           Foreign.ForeignPtr.Unsafe (unsafeForeignPtrToPtr)
 import           System.IO.Unsafe (unsafeDupablePerformIO)
 
@@ -857,6 +862,77 @@ byteStringInsert :: S.ByteString -> Builder
 byteStringInsert =
     \bs -> builder $ \k (BufferRange op _) -> return $ insertChunk op bs k
 
+
+------------------------------------------------------------------------------
+-- Raw CString encoding
+------------------------------------------------------------------------------
+
+-- | A null-terminated ASCII encoded 'Foreign.C.String.CString'.
+-- Null characters are not representable.
+--
+-- @since 0.11.5.0
+{-# INLINABLE cstring #-}
+cstring :: Addr# -> Builder
+cstring = \addr -> builder $ \k br -> do
+    let ip   = Ptr addr
+#if __GLASGOW_HASKELL__ >= 811
+        ipe  = Ptr (addr `plusAddr#` (cstringLength# addr))
+#else
+    !ipe <- plusPtr ip . fromIntegral <$> S.c_strlen ip
+#endif
+    wrappedBytesCopyStep (BufferRange ip ipe) k br
+
+-- | A null-terminated UTF-8 encoded 'Foreign.C.String.CString'.
+-- Null characters can be encoded as @0xc0 0x80@.
+--
+-- @since 0.11.5.0
+cstringUtf8 :: Addr# -> Builder
+cstringUtf8 = \addr0 -> builder $ \k br -> do
+#if __GLASGOW_HASKELL__ >= 811
+    let len = cstringLength# addr0
+#else
+    (I# len) <- fromIntegral <$> S.c_strlen (Ptr addr0)
+#endif
+    nullAt <- c_strstr (Ptr addr0) (Ptr "\xc0\x80"#)
+    cstringUtf8_step addr0 len nullAt k br
+{-# INLINABLE cstringUtf8 #-}
+
+cstringUtf8_step :: Addr# -> Int# -> Ptr Word8 -> BuildStep r -> BuildStep r
+cstringUtf8_step addr len ((== nullPtr) -> True) k br =
+    -- Contains no encoded nulls, use simple copy codepath
+    wrappedBytesCopyStep (BufferRange ip ipe) k br
+  where
+    ip  = Ptr addr
+    ipe = Ptr (addr `plusAddr#` len)
+cstringUtf8_step addr len !nullAt k (BufferRange op0 ope)
+    -- Copy as much of the null-free portion of the string as fits into the
+    -- available buffer space. If the string is long enough, we may have asked
+    -- for less than its full length, filling the buffer with the rest will go
+    -- into the next builder step.
+    | avail > nullFree = do
+        when (nullFree > 0) (S.memcpy op0 (Ptr addr) nullFree)
+        pokeElemOff op0 nullFree 0
+        let !op' = op0 `plusPtr` (nullFree + 1)
+            nread# = nullFree# +# 2#
+            addr' = addr `plusAddr#` nread#
+            len' = len -# nread#
+        nullAt' <- c_strstr (Ptr addr') (Ptr "\xc0\x80"#)
+        cstringUtf8_step addr' len' nullAt' k (BufferRange op' ope)
+    | otherwise = do
+        let !copy@(I# copy#) = min avail nullFree
+        when (copy > 0) (S.memcpy op0 (Ptr addr) copy)
+        let !op' = op0 `plusPtr` copy
+            addr' = addr `plusAddr#` copy#
+            len' = len -# copy#
+        return $ bufferFull 1 op' (cstringUtf8_step addr' len' nullAt k)
+  where
+    !avail = ope `minusPtr` op0
+    !nullFree@(I# nullFree#) = nullAt `minusPtr` (Ptr addr)
+
+foreign import ccall unsafe "string.h strstr" c_strstr
+    :: CString -> CString -> IO (Ptr Word8)
+
+
 -- Short bytestrings
 ------------------------------------------------------------------------------
 
diff --git a/Data/ByteString/Builder/Prim.hs b/Data/ByteString/Builder/Prim.hs
index 220bd310c..0fe7f3841 100644
--- a/Data/ByteString/Builder/Prim.hs
+++ b/Data/ByteString/Builder/Prim.hs
@@ -1,4 +1,4 @@
-{-# LANGUAGE BangPatterns, CPP, ScopedTypeVariables #-}
+{-# LANGUAGE BangPatterns, ScopedTypeVariables #-}
 {-# LANGUAGE MagicHash, UnboxedTuples, PatternGuards #-}
 {-# OPTIONS_GHC -fno-warn-unused-imports #-}
 {-# LANGUAGE Trustworthy #-}
@@ -433,8 +433,8 @@ module Data.ByteString.Builder.Prim (
   -- a decimal number with UTF-8 encoded characters.
   , charUtf8
 
-  , cstring
-  , cstringUtf8
+  , cstring     -- Backwards-compatibility re-exports from Internal.hs
+  , cstringUtf8 -- these no longer make use of the BoundPrim API.
 
 {-
   -- * Testing support
@@ -468,6 +468,7 @@ import           Data.ByteString.Builder.Prim.ASCII
 
 import           Foreign
 import           Foreign.C.Types
+import           Foreign.C.String (CString)
 import           Foreign.ForeignPtr.Unsafe (unsafeForeignPtrToPtr)
 import           GHC.Int (Int (..))
 import           GHC.Word (Word8 (..))
@@ -664,97 +665,6 @@ primMapLazyByteStringBounded w =
     L.foldrChunks (\x b -> primMapByteStringBounded w x `mappend` b) mempty
 
 
-------------------------------------------------------------------------------
--- Raw CString encoding
-------------------------------------------------------------------------------
-
--- | A null-terminated ASCII encoded 'Foreign.C.String.CString'.
--- Null characters are not representable.
---
--- @since 0.11.0.0
-cstring :: Addr# -> Builder
-cstring = \addr0 -> builder $ \k br -> do
-#if __GLASGOW_HASKELL__ >= 811
-    let len = cstringLength# addr0
-#else
-    (I# len) <- fromIntegral <$> S.c_strlen (Ptr addr0)
-#endif
-    cstring_step addr0 len k br
-{-# INLINE cstring #-}
-
-cstring_step :: Addr# -> Int# -> BuildStep r -> BuildStep r
-cstring_step !addr !len !k br@(BufferRange op0 ope)
-    -- String is empty, process the continuation
-  | (I# len) == 0 = k br
-    -- Buffer is full, allocate some more...  We ask for just one more
-    -- byte, but the builder allocation strategy will in practice give
-    -- us more space, which we'll consume in a single step.
-  | op0 == ope =
-       return $ bufferFull 1 op0 (cstring_step addr len k)
-    -- Copy as much of the string as fits into the available buffer space.
-    -- If the string is long enough, we may have asked for less than its
-    -- full length, filling the buffer with the rest will go into the next
-    -- builder step.
-  | otherwise = do
-      let !avail@(I# avail#) = min (I# len) (ope `minusPtr` op0)
-          br' = BufferRange (op0 `plusPtr` avail) ope
-          addr' = addr `plusAddr#` avail#
-          len' = len -# avail#
-      S.memcpy op0 (Ptr addr) avail
-      cstring_step addr' len' k br'
-
--- | A null-terminated UTF-8 encoded 'Foreign.C.String.CString'.
--- Null characters can be encoded as @0xc0 0x80@.
---
--- @since 0.11.0.0
-cstringUtf8 :: Addr# -> Builder
-cstringUtf8 = \addr0 -> builder $ \k br -> do
-#if __GLASGOW_HASKELL__ >= 811
-    let len = cstringLength# addr0
-#else
-    (I# len) <- fromIntegral <$> S.c_strlen (Ptr addr0)
-#endif
-    nullAt <- S.c_strstr (Ptr addr0) (Ptr "\xc0\x80"#)
-    cstringUtf8_step addr0 len nullAt k br
-{-# INLINE cstringUtf8 #-}
-
-cstringUtf8_step :: Addr# -> Int# -> Ptr Word8 -> BuildStep r -> BuildStep r
-cstringUtf8_step !addr !len !nullAt !k br@(BufferRange op0@(Ptr op0#) ope)
-    -- String is empty, process the continuation
-  | (I# len) == 0 = k br
-    -- Contains no encoded nulls, use simpler 'cstring' code
-  | nullPtr == nullAt =
-      cstring_step addr len k br
-    -- Buffer is full, allocate some more...  We ask for just one more
-    -- byte, but the builder allocation strategy will in practice give
-    -- us more space, which we'll consume in a single step.
-  | op0 == ope =
-       return $ bufferFull 1 op0 (cstringUtf8_step addr len nullAt k)
-    -- We're at the encoded null-byte, append a '\0' to the buffer and
-    -- continue with the rest of the input string, after locating the
-    -- next encoded null-byte, if any.
-  | (Ptr addr) == nullAt = do
-      let !(W8# nullByte#) = 0
-      IO $ \s -> case writeWord8OffAddr# op0# 0# nullByte# s of
-                   s' -> (# s', () #)
-      let br' = BufferRange (op0 `plusPtr` 1) ope
-          addr' = addr `plusAddr#` 2#
-          len' = len -# 2#
-      nullAt' <- S.c_strstr (Ptr addr') (Ptr "\xc0\x80"#)
-      cstringUtf8_step addr' len' nullAt' k br'
-    -- Copy as much of the null-free portion of the string as fits into the
-    -- available buffer space. If the string is long enough, we may have asked
-    -- for less than its full length, filling the buffer with the rest will go
-    -- into the next builder step.
-  | otherwise = do
-      let !nullFree = nullAt `minusPtr` (Ptr addr)
-          !avail@(I# avail#) = min nullFree (ope `minusPtr` op0)
-          br' = BufferRange (op0 `plusPtr` avail) ope
-          addr' = addr `plusAddr#` avail#
-          len' = len -# avail#
-      S.memcpy op0 (Ptr addr) avail
-      cstringUtf8_step addr' len' nullAt k br'
-
 ------------------------------------------------------------------------------
 -- Char8 encoding
 ------------------------------------------------------------------------------
diff --git a/Data/ByteString/Internal.hs b/Data/ByteString/Internal.hs
index a0fd23d55..b4481a833 100644
--- a/Data/ByteString/Internal.hs
+++ b/Data/ByteString/Internal.hs
@@ -65,7 +65,6 @@ module Data.ByteString.Internal (
 
         -- * Standard C Functions
         c_strlen,
-        c_strstr,
         c_free_finalizer,
 
         memchr,
diff --git a/Data/ByteString/Internal/Type.hs b/Data/ByteString/Internal/Type.hs
index eb1d2b316..24596c4f6 100644
--- a/Data/ByteString/Internal/Type.hs
+++ b/Data/ByteString/Internal/Type.hs
@@ -82,7 +82,6 @@ module Data.ByteString.Internal.Type (
 
         -- * Standard C Functions
         c_strlen,
-        c_strstr,
         c_free_finalizer,
 
         memchr,
@@ -1002,9 +1001,6 @@ accursedUnutterablePerformIO (IO m) = case m realWorld# of (# _, r #) -> r
 foreign import ccall unsafe "string.h strlen" c_strlen
     :: CString -> IO CSize
 
-foreign import ccall unsafe "string.h strstr" c_strstr
-    :: CString -> CString -> IO (Ptr Word8)
-
 foreign import ccall unsafe "static stdlib.h &free" c_free_finalizer
     :: FunPtr (Ptr Word8 -> IO ())
 
diff --git a/bench/BenchAll.hs b/bench/BenchAll.hs
index 3daa09463..6d40718fb 100644
--- a/bench/BenchAll.hs
+++ b/bench/BenchAll.hs
@@ -259,6 +259,8 @@ main = do
         , benchB' "UTF-8 String"  () $ \() -> P.cstringUtf8 "hello world\0"#
         , benchB' "String (naive)" "hello world!" fromString
         , benchB' "String"        () $ \() -> P.cstring "hello world!"#
+        , benchB' "AsciiLit64"   () $ \() -> P.cstring "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"#
+        , benchB' "Utf8Lit64"   () $ \() -> P.cstringUtf8 "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\xc0\x80xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"#
         ]
 
       , bgroup "Encoding wrappers"
diff --git a/tests/builder/Data/ByteString/Builder/Prim/Tests.hs b/tests/builder/Data/ByteString/Builder/Prim/Tests.hs
index 9d499b80d..fa1ae5894 100644
--- a/tests/builder/Data/ByteString/Builder/Prim/Tests.hs
+++ b/tests/builder/Data/ByteString/Builder/Prim/Tests.hs
@@ -24,20 +24,7 @@ import           Test.Tasty.QuickCheck
 
 tests :: [TestTree]
 tests = concat [ testsBinary, testsASCII, testsChar8, testsUtf8
-               , testsCombinatorsB, [testCString, testCStringUtf8] ]
-
-testCString :: TestTree
-testCString = testProperty "cstring" $
-    toLazyByteString (BP.cstring "hello world!"#) ==
-      LC.pack "hello" `L.append` L.singleton 0x20 `L.append` LC.pack "world!"
-
-testCStringUtf8 :: TestTree
-testCStringUtf8 = testProperty "cstringUtf8" $
-    toLazyByteString (BP.cstringUtf8 "hello\xc0\x80world\xc0\x80!"#) ==
-      LC.pack "hello" `L.append` L.singleton 0x00
-                      `L.append` LC.pack "world"
-                      `L.append` L.singleton 0x00
-                      `L.append` LC.singleton '!'
+               , testsCombinatorsB ]
 
 ------------------------------------------------------------------------------
 -- Binary
diff --git a/tests/builder/Data/ByteString/Builder/Tests.hs b/tests/builder/Data/ByteString/Builder/Tests.hs
index fa58645e4..a8a6ab266 100644
--- a/tests/builder/Data/ByteString/Builder/Tests.hs
+++ b/tests/builder/Data/ByteString/Builder/Tests.hs
@@ -36,6 +36,7 @@ import           Data.Word
 import qualified Data.ByteString          as S
 import qualified Data.ByteString.Internal as S
 import qualified Data.ByteString.Lazy     as L
+import qualified Data.ByteString.Lazy.Char8 as LC
 import qualified Data.ByteString.Short    as Sh
 
 import           Data.ByteString.Builder
@@ -73,7 +74,8 @@ tests =
   testsASCII ++
   testsFloating ++
   testsChar8 ++
-  testsUtf8
+  testsUtf8 ++
+  testCString
 
 
 ------------------------------------------------------------------------------
@@ -988,3 +990,18 @@ testsUtf8 =
   [ testBuilderConstr "charUtf8" charUtf8_list charUtf8
   , testBuilderConstr "stringUtf8" (foldMap charUtf8_list) stringUtf8
   ]
+
+testCString :: [TestTree]
+testCString =
+    [ testProperty "cstring" $
+        toLazyByteString (BI.cstring "hello world!"#) ==
+        LC.pack "hello" `L.append` L.singleton 0x20
+                        `L.append` LC.pack "world!"
+    , testProperty "cstringUtf8" $
+        toLazyByteString (BI.cstringUtf8 "hello\xc0\x80\xc0\x80world\xc0\x80!"#) ==
+        LC.pack "hello" `L.append` L.singleton 0x00
+                        `L.append` L.singleton 0x00
+                        `L.append` LC.pack "world"
+                        `L.append` L.singleton 0x00
+                        `L.append` LC.singleton '!'
+    ]