Skip to content

Commit

Permalink
Add conversion routines for FilePath (#403)
Browse files Browse the repository at this point in the history
* Add conversion routines for FilePath

Despite FilePath being a type alias for String, the type is actually
quite distinct. An argument of type FilePath is expected to be encoded
using the file system encoding, and can be converted to a bytestring and
back exactly.

* Expand on to/fromFilePath documentation
  • Loading branch information
luke-clifton authored Jul 1, 2021
1 parent 8a0d222 commit 28b235f
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 0 deletions.
27 changes: 27 additions & 0 deletions Data/ByteString.hs
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ module Data.ByteString (
unpack,
fromStrict,
toStrict,
fromFilePath,
toFilePath,

-- * Basic interface
cons,
Expand Down Expand Up @@ -256,7 +258,9 @@ import GHC.IO.Handle.Internals
import GHC.IO.Handle.Types
import GHC.IO.Buffer
import GHC.IO.BufferedIO as Buffered
import GHC.IO.Encoding (getFileSystemEncoding)
import GHC.IO (unsafePerformIO, unsafeDupablePerformIO)
import GHC.Foreign (newCStringLen, peekCStringLen)
import Data.Char (ord)
import Foreign.Marshal.Utils (copyBytes)

Expand Down Expand Up @@ -320,6 +324,29 @@ unpackFoldr bs k z = foldr k z bs
unpackFoldr bs (:) [] = unpackBytes bs
#-}

-- | Convert a 'FilePath' to a 'ByteString'.
--
-- The 'FilePath' type is expected to use the file system encoding
-- as reported by 'GHC.IO.Encoding.getFileSystemEncoding'. This
-- encoding allows for round-tripping of arbitrary data on platforms
-- that allow arbitrary bytes in their paths. This conversion
-- function does the same thing that `System.IO.openFile` would
-- do when decoding the 'FilePath'.
fromFilePath :: FilePath -> IO ByteString
fromFilePath path = do
enc <- getFileSystemEncoding
newCStringLen enc path >>= unsafePackMallocCStringLen

-- | Convert a 'ByteString' to a 'FilePath'.
--
-- This function uses the file system encoding, and resulting 'FilePath's
-- can be safely used with standard IO functions and will reference the
-- correct path in the presence of arbitrary non-UTF-8 encoded paths.
toFilePath :: ByteString -> IO FilePath
toFilePath path = do
enc <- getFileSystemEncoding
useAsCStringLen path (peekCStringLen enc)

-- ---------------------------------------------------------------------
-- Basic interface

Expand Down
12 changes: 12 additions & 0 deletions tests/Properties/ByteString.hs
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,18 @@ tests =
\x -> B.fromStrict (B.toStrict x) === x
, testProperty "toStrict . fromStrict" $
\x -> B.toStrict (B.fromStrict x) === x
#ifndef BYTESTRING_LAZY
#ifndef BYTESTRING_CHAR8
, testProperty "toFilePath >>= fromFilePath" $
\x -> ioProperty $ do
r <- B.toFilePath x >>= B.fromFilePath
pure (r === x)
, testProperty "fromFilePath >>= toFilePath" $
\x -> ioProperty $ do
r <- B.fromFilePath x >>= B.toFilePath
pure (r === x)
#endif
#endif

, testProperty "==" $
\x y -> (x == y) === (B.unpack x == B.unpack y)
Expand Down

0 comments on commit 28b235f

Please sign in to comment.