From 0bcbdd0e0187a0d6d9acdd53b6cc76dfe730def8 Mon Sep 17 00:00:00 2001 From: Anders Kaseorg Date: Wed, 13 Mar 2019 13:41:10 -0700 Subject: [PATCH] Add peekCString, withCString for converting NUL terminated C strings Since NUL-terminated CString is much more common in foreign APIs than CStringLen, one should not have to manually build these functions out of `peekCStringLen`, `withCStringLen` (and perhaps get it wrong, like I did in https://github.com/jgm/cmark-hs/pull/13). While here, document that `peekCStringLen`, `withCStringLen` are O(n) as well. Fixes #32. Signed-off-by: Anders Kaseorg --- Data/Text/Foreign.hs | 35 ++++++++++++++++++++++++++++++----- 1 file changed, 30 insertions(+), 5 deletions(-) diff --git a/Data/Text/Foreign.hs b/Data/Text/Foreign.hs index 0dad97e1c..9b2929e7f 100644 --- a/Data/Text/Foreign.hs +++ b/Data/Text/Foreign.hs @@ -20,7 +20,9 @@ module Data.Text.Foreign , useAsPtr , asForeignPtr -- ** Encoding as UTF-8 + , peekCString , peekCStringLen + , withCString , withCStringLen -- * Unsafe conversion code , lengthWord16 @@ -39,12 +41,12 @@ import Control.Monad.ST.Unsafe (unsafeIOToST) #else import Control.Monad.ST (unsafeIOToST) #endif -import Data.ByteString.Unsafe (unsafePackCStringLen, unsafeUseAsCStringLen) +import Data.ByteString.Unsafe (unsafePackCString, unsafePackCStringLen, unsafeUseAsCString, unsafeUseAsCStringLen) import Data.Text.Encoding (decodeUtf8, encodeUtf8) import Data.Text.Internal (Text(..), empty) import Data.Text.Unsafe (lengthWord16) import Data.Word (Word16) -import Foreign.C.String (CStringLen) +import Foreign.C.String (CString, CStringLen) import Foreign.ForeignPtr (ForeignPtr, mallocForeignPtrArray, withForeignPtr) import Foreign.Marshal.Alloc (allocaBytes) import Foreign.Ptr (Ptr, castPtr, plusPtr) @@ -153,6 +155,16 @@ asForeignPtr t@(Text _arr _off len) = do withForeignPtr fp $ unsafeCopyToPtr t return (fp, I16 len) +-- | /O(n)/ Decode a NUL terminated C string, which is assumed to have +-- been encoded as UTF-8. If decoding fails, a 'UnicodeException' is +-- thrown. +-- +-- @since 1.2.4.0 +peekCString :: CString -> IO Text +peekCString cs = do + bs <- unsafePackCString cs + return $! decodeUtf8 bs + -- | /O(n)/ Decode a C string with explicit length, which is assumed -- to have been encoded as UTF-8. If decoding fails, a -- 'UnicodeException' is thrown. @@ -163,9 +175,22 @@ peekCStringLen cs = do bs <- unsafePackCStringLen cs return $! decodeUtf8 bs --- | Marshal a 'Text' into a C string encoded as UTF-8 in temporary --- storage, with explicit length information. The encoded string may --- contain NUL bytes, and is not followed by a trailing NUL byte. +-- | /O(n)/ Marshal a 'Text' into a NUL terminated C string encoded as +-- UTF-8 in temporary storage. The 'Text' must not contain any NUL +-- characters. +-- +-- The temporary storage is freed when the subcomputation terminates +-- (either normally or via an exception), so the pointer to the +-- temporary storage must /not/ be used after this function returns. +-- +-- @since 1.2.4.0 +withCString :: Text -> (CString -> IO a) -> IO a +withCString t act = unsafeUseAsCString (encodeUtf8 t) act + +-- | /O(n)/ Marshal a 'Text' into a C string encoded as UTF-8 in +-- temporary storage, with explicit length information. The encoded +-- string may contain NUL bytes, and is not followed by a trailing NUL +-- byte. -- -- The temporary storage is freed when the subcomputation terminates -- (either normally or via an exception), so the pointer to the