From c99a7c5be985dc804678d54c34766b9859f112eb Mon Sep 17 00:00:00 2001 From: Julian Ospald Date: Mon, 16 Aug 2021 20:25:55 +0200 Subject: [PATCH] Add --- abstract-filepath.cabal | 3 + lib/AFP/AbstractFilePath.hs | 35 +-- lib/AFP/AbstractFilePath/Common.hs | 40 +++- lib/AFP/AbstractFilePath/Internal/Common.hs | 6 +- lib/AFP/OsString.hs | 4 - lib/AFP/OsString/Common.hs | 243 ++++++++++++++++++++ lib/AFP/OsString/Internal.hs | 60 ++--- lib/AFP/OsString/Internal/Types.hs | 21 +- lib/AFP/OsString/Posix.hs | 11 + lib/AFP/OsString/Windows.hs | 12 + 10 files changed, 367 insertions(+), 68 deletions(-) create mode 100644 lib/AFP/OsString/Common.hs create mode 100644 lib/AFP/OsString/Posix.hs create mode 100644 lib/AFP/OsString/Windows.hs diff --git a/abstract-filepath.cabal b/abstract-filepath.cabal index d9ca735..942767b 100644 --- a/abstract-filepath.cabal +++ b/abstract-filepath.cabal @@ -12,6 +12,7 @@ description: category: System extra-source-files: lib/AFP/AbstractFilePath/Common.hs + lib/AFP/OsString/Common.hs lib/AFP/AbstractFilePath/Internal/Common.hs lib/AFP/AbstractFilePath/Internal.hs-boot lib/AFP/AbstractFilePath.hs-boot @@ -36,6 +37,8 @@ library AFP.Data.ByteString.Short.Word16 AFP.Data.Word16 AFP.OsString + AFP.OsString.Posix + AFP.OsString.Windows AFP.OsString.Internal AFP.OsString.Internal.Types diff --git a/lib/AFP/AbstractFilePath.hs b/lib/AFP/AbstractFilePath.hs index b58f68f..92a53c1 100644 --- a/lib/AFP/AbstractFilePath.hs +++ b/lib/AFP/AbstractFilePath.hs @@ -64,18 +64,23 @@ module AFP.AbstractFilePath ( -- * Types AbstractFilePath - , WindowsFilePath - , PosixFilePath + , OsString + , OsWord -- * Construction , toAbstractFilePath , toAbstractFilePathIO , bsToAFP , afp + , packAFP -- * Deconstruction , fromAbstractFilePath , fromAbstractFilePathIO + , unpackAFP + + -- * Word construction + , fromChar -- * Separator predicates , pathSeparator @@ -140,7 +145,6 @@ module AFP.AbstractFilePath , makeValid , isFileName , hasParentDir - , module AFP.OsString ) where @@ -151,10 +155,13 @@ import AFP.AbstractFilePath.Internal , fromAbstractFilePathIO , toAbstractFilePath , toAbstractFilePathIO + , unpackAFP + , packAFP ) import AFP.AbstractFilePath.Internal.Types ( AbstractFilePath, PosixFilePath, WindowsFilePath ) import AFP.OsString +import AFP.OsString.Internal ( fromChar ) import AFP.OsString.Internal.Types #if defined(mingw32_HOST_OS) || defined(__MINGW32__) @@ -474,7 +481,7 @@ takeAllParents (OsString p) = fmap OsString $ AFP.takeAllParents p -- On Posix, \/ is a Drive. -- -- > Windows: splitDrive "/test" == ("","/test") --- > Windows: splitDrive "C:\\file" == ("C:","file") +-- > Windows: splitDrive "C:\\file" == ("C:\","file") -- > Posix: splitDrive "/test" == ("/","test") -- > splitDrive "//test" == ("//","test") -- > splitDrive "test/file" == ("","test/file") @@ -630,28 +637,28 @@ isRelative (OsString x) = AFP.isRelative x -- | Check if a path is absolute -- --- >>> Windows: isAbsolute "C:\\path" == True --- >>> Windows: isAbsolute "/path" == False --- >>> Posix: isAbsolute "/path" == True --- >>> isAbsolute "path" == False --- >>> isAbsolute "" == False +-- > Windows: isAbsolute "C:\\path" == True +-- > Windows: isAbsolute "/path" == False +-- > Posix: isAbsolute "/path" == True +-- > isAbsolute "path" == False +-- > isAbsolute "" == False isAbsolute :: AbstractFilePath -> Bool isAbsolute (OsString x) = AFP.isAbsolute x -- | Is a FilePath valid, i.e. could you create a file like it? -- --- >>> isValid "" == False --- >>> isValid "\0" == False --- >>> isValid "/random_path" == True +-- > isValid "" == False +-- > isValid "\0" == False +-- > isValid "/random_path" == True isValid :: AbstractFilePath -> Bool isValid (OsString filepath) = AFP.isValid filepath -- | Take a FilePath and make it valid; does not change already valid FilePaths. -- --- >>> makeValid "" == "_" --- >>> makeValid "file\0name" == "file_name" +-- > makeValid "" == "_" +-- > makeValid "file\0name" == "file_name" -- > if isValid p then makeValid p == p else makeValid p /= p -- > isValid (makeValid p) makeValid :: AbstractFilePath -> AbstractFilePath diff --git a/lib/AFP/AbstractFilePath/Common.hs b/lib/AFP/AbstractFilePath/Common.hs index aa0145d..665ea30 100644 --- a/lib/AFP/AbstractFilePath/Common.hs +++ b/lib/AFP/AbstractFilePath/Common.hs @@ -7,8 +7,33 @@ module AFP.AbstractFilePath.MODULE_NAME ( + -- * Types +#ifdef WINDOWS + WindowsString + , WindowsWord + , WindowsFilePath +#else + PosixString + , PosixWord + , PosixFilePath +#endif + -- * String construction + , toPlatformString + , toPlatformStringIO + , bsToPlatformString + , pstr + , packPlatformString + + -- * String deconstruction + , fromPlatformString + , fromPlatformStringIO + , unpackPlatformString + + -- * Word construction + , fromChar + -- * Separator predicates - pathSeparator + , pathSeparator , pathSeparators , isPathSeparator , searchPathSeparator @@ -88,6 +113,19 @@ import qualified AFP.AbstractFilePath.Internal.MODULE_NAME as IP import AFP.AbstractFilePath.Internal.Types import AFP.OsString.Internal.Types +import AFP.OsString.MODULE_NAME ( + toPlatformString + , toPlatformStringIO + , bsToPlatformString + , pstr + , packPlatformString + , fromPlatformString + , fromPlatformStringIO + , unpackPlatformString + , fromChar + ) + + import Control.Arrow ( second ) import Data.Bifunctor diff --git a/lib/AFP/AbstractFilePath/Internal/Common.hs b/lib/AFP/AbstractFilePath/Internal/Common.hs index 922ce49..ae93f01 100644 --- a/lib/AFP/AbstractFilePath/Internal/Common.hs +++ b/lib/AFP/AbstractFilePath/Internal/Common.hs @@ -1,6 +1,6 @@ {-# LANGUAGE CPP #-} -{-# LANGUAGE TypeSynonymInstances #-} {-# LANGUAGE MultiWayIf #-} +{-# LANGUAGE TypeSynonymInstances #-} -- This template expects CPP definitions for: -- MODULE_NAME = Posix | Windows -- IS_WINDOWS = False | True @@ -64,14 +64,14 @@ import Data.Word8 ) #endif -import Data.List - ( mapAccumL ) import Control.Arrow ( second ) import Data.ByteString ( ByteString ) import Data.ByteString.Short ( ShortByteString ) +import Data.List + ( mapAccumL ) import Data.Maybe ( isJust ) import Prelude hiding diff --git a/lib/AFP/OsString.hs b/lib/AFP/OsString.hs index 43d3645..66daa7c 100644 --- a/lib/AFP/OsString.hs +++ b/lib/AFP/OsString.hs @@ -18,8 +18,6 @@ module AFP.OsString ( -- * String types OsString - , WindowsString - , PosixString -- * String construction , toOsString @@ -35,8 +33,6 @@ module AFP.OsString -- * Word types , OsWord - , WindowsWord - , PosixWord -- * Word construction , fromChar diff --git a/lib/AFP/OsString/Common.hs b/lib/AFP/OsString/Common.hs new file mode 100644 index 0000000..e1a56dd --- /dev/null +++ b/lib/AFP/OsString/Common.hs @@ -0,0 +1,243 @@ +{-# LANGUAGE CPP #-} +-- This template expects CPP definitions for: +-- MODULE_NAME = Posix | Windows +-- IS_WINDOWS = False | True + +module AFP.OsString.MODULE_NAME + ( + -- * Types +#ifdef WINDOWS + WindowsString + , WindowsWord +#else + PosixString + , PosixWord +#endif + + -- * String construction + , toPlatformString + , toPlatformStringIO + , bsToPlatformString + , pstr + , packPlatformString + + -- * String deconstruction + , fromPlatformString + , fromPlatformStringIO + , unpackPlatformString + + -- * Word construction + , fromChar + + ) +where + + +import AFP.OsString.Internal.Types +#ifdef WINDOWS + ( WindowsString + , WindowsWord + ) +#else + ( PosixString + , PosixWord + ) +#endif +import AFP.Data.ByteString.Short.Encode + ( encodeUtf16LE, encodeUtf8 ) +import AFP.Data.ByteString.Short.Decode + ( decodeUtf16LE + , decodeUtf16LE' + , decodeUtf16LE'' + , decodeUtf16LEWith + , decodeUtf8 + , decodeUtf8' + , decodeUtf8With + ) +import AFP.AbstractFilePath.Internal.Types +import AFP.OsString.Internal.Types + +import Control.Arrow + ( second ) +import Data.Bifunctor + ( bimap, first ) +import Data.ByteString + ( ByteString ) +import Data.ByteString.Short + ( ShortByteString ) +import Data.Maybe + ( isJust ) +import Data.Word8 + ( Word8, _colon, _nul, _period, _slash, _underscore ) +import Control.Exception + ( throwIO ) +import Control.Monad.Catch + ( MonadThrow, throwM ) +import Data.ByteString + ( ByteString ) +import Data.Proxy + ( Proxy (..) ) +import Data.Text.Encoding.Error + ( UnicodeException (..), lenientDecode ) +import Data.Typeable +import GHC.Exts + ( IsString (..) ) +import GHC.IO.Encoding + ( getFileSystemEncoding ) +import GHC.IO.Exception + ( IOErrorType (InvalidArgument) ) +import Language.Haskell.TH +import Language.Haskell.TH.Quote + ( QuasiQuoter (..) ) +import Language.Haskell.TH.Syntax + ( Lift (..), lift ) +import System.IO.Error + ( catchIOError ) + +import qualified Data.Text as T +import qualified Data.Text.Encoding as E +import qualified GHC.Foreign as GHC +import qualified Language.Haskell.TH.Syntax as TH + +import qualified AFP.AbstractFilePath.Internal.MODULE_NAME as C +#ifdef WINDOWS +import qualified AFP.Data.ByteString.Short.Word16 as BS +#else +import qualified AFP.Data.ByteString.Short as BS +#endif +import AFP.Data.ByteString.Short (toShort) + + + +-- | Total Unicode-friendly encoding. +-- +-- On windows this encodes as UTF16, which is expected. +-- On unix this encodes as UTF8, which is a good guess. +toPlatformString :: String -> PLATFORM_STRING +#ifdef WINDOWS +toPlatformString = WS . encodeUtf16LE +#else +toPlatformString = PS . encodeUtf8 +#endif + +-- | Like 'toPlatformString', except on unix this uses the current +-- locale for encoding instead of always UTF8. +-- +-- Looking up the locale requires IO. If you're not worried about calls +-- to 'setFileSystemEncoding', then 'unsafePerformIO' may be feasible. +toPlatformStringIO :: String -> IO PLATFORM_STRING +#ifdef WINDOWS +toPlatformStringIO = pure . WS . encodeUtf16LE +#else +toPlatformStringIO str = do + enc <- getFileSystemEncoding + cstr <- GHC.newCString enc str + PS <$> BS.packCString cstr +#endif + + +-- | Partial unicode friendly decoding. +-- +-- On windows this decodes as UTF16 (which is the expected filename encoding). +-- On unix this decodes as UTF8 (which is a good guess). Note that +-- filenames on unix are encoding agnostic char arrays. +-- +-- Throws a 'UnicodeException' if decoding fails. +fromPlatformString :: MonadThrow m => PLATFORM_STRING -> m String +#ifdef WINDOWS +fromPlatformString (WS ba) = either throwM pure $ decodeUtf16LE' ba +#else +fromPlatformString (PS ba) = either throwM pure $ decodeUtf8' ba +#endif + + +-- | Like 'fromPlatformStringIO', except on unix this uses the current +-- locale for decoding instead of always UTF8. +-- +-- Looking up the locale requires IO. If you're not worried about calls +-- to 'setFileSystemEncoding', then 'unsafePerformIO' may be feasible. +-- +-- Throws 'UnicodeException' if decoding fails. +fromPlatformStringIO :: PLATFORM_STRING -> IO String +#ifdef WINDOWS +fromPlatformStringIO (WS ba) = either throwIO pure $ decodeUtf16LE' ba +#else +fromPlatformStringIO (PS ba) = flip catchIOError (\_ -> throwIO (DecodeError "fromAbstractFilePath' failed" Nothing)) + $ BS.useAsCString ba $ \fp -> getFileSystemEncoding >>= \enc -> GHC.peekCString enc fp +#endif + + +-- | Constructs an platform string from a ByteString. +-- +-- On windows, this ensures valid UTF16, on unix it is passed unchanged/unchecked. +-- +-- Throws 'UnicodeException' on invalid UTF16 on windows. +bsToPlatformString :: MonadThrow m + => ByteString + -> m PLATFORM_STRING +#ifdef WINDOWS +bsToPlatformString bs = + either throwM (const . pure . WS . toShort $ bs) $ decodeUtf16LE'' bs +#else +bsToPlatformString = pure . PS . toShort +#endif + + +qq :: (ByteString -> Q Exp) -> QuasiQuoter +qq quoteExp' = + QuasiQuoter +#ifdef WINDOWS + { quoteExp = (\s -> quoteExp' . E.encodeUtf16LE . T.pack $ s) + , quotePat = \_ -> + fail "illegal QuasiQuote (allowed as expression only, used as a pattern)" + , quoteType = \_ -> + fail "illegal QuasiQuote (allowed as expression only, used as a type)" + , quoteDec = \_ -> + fail "illegal QuasiQuote (allowed as expression only, used as a declaration)" + } +#else + { quoteExp = (\s -> quoteExp' . E.encodeUtf8 . T.pack $ s) + , quotePat = \_ -> + fail "illegal QuasiQuote (allowed as expression only, used as a pattern)" + , quoteType = \_ -> + fail "illegal QuasiQuote (allowed as expression only, used as a type)" + , quoteDec = \_ -> + fail "illegal QuasiQuote (allowed as expression only, used as a declaration)" + } +#endif + +mkPlatformString :: ByteString -> Q Exp +mkPlatformString bs = + case bsToPlatformString bs of + Just afp -> lift afp + Nothing -> error "invalid encoding" + +-- | QuasiQuote a 'PLATFORM_STRING'. This accepts Unicode characters +-- and encodes as UTF-8 on unix and UTF-16 on windows. +pstr :: QuasiQuoter +pstr = qq mkPlatformString + + +unpackPlatformString :: PLATFORM_STRING -> [PLATFORM_WORD] +#ifdef WINDOWS +unpackPlatformString (WS ba) = fmap WW $ BS.unpack ba +#else +unpackPlatformString (PS ba) = fmap PW $ BS.unpack ba +#endif + + +packPlatformString :: [PLATFORM_WORD] -> PLATFORM_STRING +#ifdef WINDOWS +packPlatformString ws = WS . BS.pack . fmap (\(WW w) -> w) $ ws +#else +packPlatformString ws = PS . BS.pack . fmap (\(PW w) -> w) $ ws +#endif + + +fromChar :: Char -> PLATFORM_WORD +#ifdef WINDOWS +fromChar = WW . fromIntegral . fromEnum +#else +fromChar = PW . fromIntegral . fromEnum +#endif + diff --git a/lib/AFP/OsString/Internal.hs b/lib/AFP/OsString/Internal.hs index 5532358..8d167d2 100644 --- a/lib/AFP/OsString/Internal.hs +++ b/lib/AFP/OsString/Internal.hs @@ -67,6 +67,13 @@ import qualified Data.Text as T import qualified Data.Text.Encoding as E import qualified GHC.Foreign as GHC import qualified Language.Haskell.TH.Syntax as TH +#if defined(mingw32_HOST_OS) || defined(__MINGW32__) +import AFP.OsString.Windows hiding ( fromChar ) +import qualified AFP.OsString.Windows as PF +#else +import AFP.OsString.Posix hiding ( fromChar ) +import qualified AFP.OsString.Posix as PF +#endif @@ -76,11 +83,7 @@ import qualified Language.Haskell.TH.Syntax as TH -- On windows this encodes as UTF16, which is expected. -- On unix this encodes as UTF8, which is a good guess. toOsString :: String -> OsString -#if defined(mingw32_HOST_OS) || defined(__MINGW32__) -toOsString = OsString . WS . encodeUtf16LE -#else -toOsString = OsString . PS . encodeUtf8 -#endif +toOsString = OsString . toPlatformString -- | Like 'toOsString', except on unix this uses the current -- locale for encoding instead of always UTF8. @@ -88,14 +91,7 @@ toOsString = OsString . PS . encodeUtf8 -- Looking up the locale requires IO. If you're not worried about calls -- to 'setFileSystemEncoding', then 'unsafePerformIO' may be feasible. toOsStringIO :: String -> IO OsString -#if defined(mingw32_HOST_OS) || defined(__MINGW32__) -toOsStringIO = OsString . WS . encodeUtf16LE -#else -toOsStringIO str = do - enc <- getFileSystemEncoding - cstr <- GHC.newCString enc str - OsString . PS <$> BSP.packCString cstr -#endif +toOsStringIO = fmap OsString . toPlatformStringIO -- | Partial unicode friendly decoding. @@ -106,11 +102,7 @@ toOsStringIO str = do -- -- Throws a 'UnicodeException' if decoding fails. fromOsString :: MonadThrow m => OsString -> m String -#if defined(mingw32_HOST_OS) || defined(__MINGW32__) -fromOsString (OsString (WS ba)) = either throwM pure $ decodeUtf16LE' ba -#else -fromOsString (OsString (PS ba)) = either throwM pure $ decodeUtf8' ba -#endif +fromOsString (OsString x) = fromPlatformString x -- | Like 'fromOsString', except on unix this uses the current @@ -121,12 +113,7 @@ fromOsString (OsString (PS ba)) = either throwM pure $ decodeUtf8' ba -- -- Throws 'UnicodeException' if decoding fails. fromOsStringIO :: OsString -> IO String -#if defined(mingw32_HOST_OS) || defined(__MINGW32__) -fromOsStringIO (OsString (WS ba)) = either throwIO pure $ decodeUtf16LE' ba -#else -fromOsStringIO (OsString (PS ba)) = flip catchIOError (\_ -> throwIO (DecodeError "fromAbstractFilePath' failed" Nothing)) - $ BSP.useAsCString ba $ \fp -> getFileSystemEncoding >>= \enc -> GHC.peekCString enc fp -#endif +fromOsStringIO (OsString x) = fromPlatformStringIO x -- | Constructs an @OsString@ from a ByteString. @@ -137,12 +124,7 @@ fromOsStringIO (OsString (PS ba)) = flip catchIOError (\_ -> throwIO (DecodeErro bsToOsString :: MonadThrow m => ByteString -> m OsString -#if defined(mingw32_HOST_OS) || defined(__MINGW32__) -bsToOsString bs = - either throwM (const . pure . OsString . WS . toShort $ bs) $ decodeUtf16LE'' bs -#else -bsToOsString = pure . OsString . PS . toShort -#endif +bsToOsString = fmap OsString . bsToPlatformString qq :: (ByteString -> Q Exp) -> QuasiQuoter @@ -181,24 +163,12 @@ osstr = qq mkOsString unpackOsString :: OsString -> [OsWord] -#if defined(mingw32_HOST_OS) || defined(__MINGW32__) -unpackOsString (OsString (WS ba)) = fmap (OsWord . WW) $ BS.unpack ba -#else -unpackOsString (OsString (PS ba)) = fmap (OsWord . PW) $ BS.unpack ba -#endif +unpackOsString (OsString x) = fmap OsWord $ unpackPlatformString x packOsString :: [OsWord] -> OsString -#if defined(mingw32_HOST_OS) || defined(__MINGW32__) -packOsString ws = OsString . WS . BS.pack . fmap (\(OsWord (WW w)) -> w) $ ws -#else -packOsString ws = OsString . PS . BS.pack . fmap (\(OsWord (PW w)) -> w) $ ws -#endif +packOsString = OsString . packPlatformString . fmap (\(OsWord x) -> x) fromChar :: Char -> OsWord -#if defined(mingw32_HOST_OS) || defined(__MINGW32__) -fromChar = OsWord . WW . fromIntegral . fromEnum -#else -fromChar = OsWord . PW . fromIntegral . fromEnum -#endif +fromChar = OsWord . PF.fromChar diff --git a/lib/AFP/OsString/Internal/Types.hs b/lib/AFP/OsString/Internal/Types.hs index a382688..8551811 100644 --- a/lib/AFP/OsString/Internal/Types.hs +++ b/lib/AFP/OsString/Internal/Types.hs @@ -50,11 +50,30 @@ import qualified Language.Haskell.TH.Syntax as TH -- | Commonly used windows string as UTF16 bytes. newtype WindowsString = WS { unWFP :: BS.ShortByteString } deriving (Eq, Ord, Semigroup, Monoid) + +instance Lift WindowsString where + lift (WS bs) + = [| (WS (BS.pack $(lift $ BS.unpack bs))) :: WindowsString |] +#if MIN_VERSION_template_haskell(2,17,0) + liftTyped = TH.unsafeCodeCoerce . TH.lift +#elif MIN_VERSION_template_haskell(2,16,0) + liftTyped = TH.unsafeTExpCoerce . TH.lift +#endif + -- | Commonly used Posix string as uninterpreted @char[]@ -- array. newtype PosixString = PS { unPFP :: BS.ShortByteString } deriving (Eq, Ord, Semigroup, Monoid) +instance Lift PosixString where + lift (PS bs) + = [| (PS (BS.pack $(lift $ BS.unpack bs))) :: PosixString |] +#if MIN_VERSION_template_haskell(2,17,0) + liftTyped = TH.unsafeCodeCoerce . TH.lift +#elif MIN_VERSION_template_haskell(2,16,0) + liftTyped = TH.unsafeTExpCoerce . TH.lift +#endif + instance Show WindowsString where show (WS bs) = ('\"': decodeUtf16LE bs) <> "\"" @@ -91,7 +110,7 @@ type PlatformWord = PosixWord -- depending on the platform. Both use unpinned -- 'ShortByteString' for efficiency and correctness. -- --- The constructor is only exported via "AbstractFilePath.Internal.Types", since +-- The constructor is only exported via "AFP.OsString.Internal.Types", since -- dealing with the internals isn't generally recommended, but supported -- in case you need to write platform specific code, such as the implementation -- of 'fromAbstractFilePath'. diff --git a/lib/AFP/OsString/Posix.hs b/lib/AFP/OsString/Posix.hs new file mode 100644 index 0000000..3e8fdeb --- /dev/null +++ b/lib/AFP/OsString/Posix.hs @@ -0,0 +1,11 @@ +{-# LANGUAGE CPP #-} +#define MODULE_NAME Posix +#define PLATFORM_STRING PosixString +#define PLATFORM_WORD PosixWord +#define IS_WINDOWS False +#include "Common.hs" +#undef MODULE_NAME +#undef FILEPATH_NAME +#undef OSSTRING_NAME +#undef IS_WINDOWS +#undef WINDOWS diff --git a/lib/AFP/OsString/Windows.hs b/lib/AFP/OsString/Windows.hs new file mode 100644 index 0000000..7c3ac46 --- /dev/null +++ b/lib/AFP/OsString/Windows.hs @@ -0,0 +1,12 @@ +{-# LANGUAGE CPP #-} +#define MODULE_NAME Windows +#define PLATFORM_STRING WindowsString +#define PLATFORM_WORD WindowsWord +#define IS_WINDOWS True +#define WINDOWS +#include "Common.hs" +#undef MODULE_NAME +#undef FILEPATH_NAME +#undef OSSTRING_NAME +#undef IS_WINDOWS +#undef WINDOWS