From 4b0a9a472c2a2a70217e5793f21b5d696593fb88 Mon Sep 17 00:00:00 2001 From: Julian Ospald Date: Fri, 21 Feb 2020 23:47:40 +0100 Subject: [PATCH] Add support for over-long filepaths via GNU extension Fixes #49 --- Codec/Archive/Tar/Index.hs | 4 +-- Codec/Archive/Tar/Pack.hs | 40 +++++++++++++++-------- Codec/Archive/Tar/Types.hs | 63 ++++++++++++++++++++++++++++--------- Codec/Archive/Tar/Unpack.hs | 55 ++++++++++++++++++++++---------- 4 files changed, 116 insertions(+), 46 deletions(-) diff --git a/Codec/Archive/Tar/Index.hs b/Codec/Archive/Tar/Index.hs index 8ea9d55..336bdd5 100644 --- a/Codec/Archive/Tar/Index.hs +++ b/Codec/Archive/Tar/Index.hs @@ -702,7 +702,7 @@ example1 = testEntry :: FilePath -> Int64 -> Entry testEntry name size = simpleEntry path (NormalFile mempty size) where - Right path = toTarPath False name + That path = toTarPath False name -- | Simple tar archive containing regular files only data SimpleTarArchive = SimpleTarArchive { @@ -767,7 +767,7 @@ instance Arbitrary SimpleTarArchive where mkList [] = [] mkList ((fp, bs):es) = entry : mkList es where - Right path = toTarPath False fp + That path = toTarPath False fp entry = simpleEntry path content content = NormalFile bs (LBS.length bs) diff --git a/Codec/Archive/Tar/Pack.hs b/Codec/Archive/Tar/Pack.hs index cc44cfa..4fd0b32 100644 --- a/Codec/Archive/Tar/Pack.hs +++ b/Codec/Archive/Tar/Pack.hs @@ -19,7 +19,7 @@ module Codec.Archive.Tar.Pack ( ) where import Codec.Archive.Tar.Types - +import Control.Monad (join) import qualified Data.ByteString.Lazy as BS import System.FilePath ( () ) @@ -72,8 +72,8 @@ preparePaths baseDir paths = packPaths :: FilePath -> [FilePath] -> IO [Entry] packPaths baseDir paths = - interleave - [ do tarpath <- either fail return (toTarPath isDir relpath) + fmap concat $ interleave + [ do let tarpath = toTarPath isDir relpath if isDir then packDirectoryEntry filepath tarpath else packFileEntry filepath tarpath | relpath <- paths @@ -98,19 +98,26 @@ interleave = unsafeInterleaveIO . go -- * The file contents is read lazily. -- packFileEntry :: FilePath -- ^ Full path to find the file on the local disk - -> TarPath -- ^ Path to use for the tar Entry in the archive - -> IO Entry + -> These SplitError TarPath -- ^ Path to use for the tar Entry in the archive + -> IO [Entry] packFileEntry filepath tarpath = do mtime <- getModTime filepath perms <- getPermissions filepath file <- openBinaryFile filepath ReadMode size <- hFileSize file content <- BS.hGetContents file - return (simpleEntry tarpath (NormalFile content (fromIntegral size))) { - entryPermissions = if executable perms then executableFilePermissions - else ordinaryFilePermissions, - entryTime = mtime - } + let entry tp = (simpleEntry tp (NormalFile content (fromIntegral size))) { + entryPermissions = if executable perms then executableFilePermissions + else ordinaryFilePermissions, + entryTime = mtime + } + + case tarpath of + This e -> fail $ show e + That tp -> return [entry tp] + These _ tp -> do + let lEntry = longLinkEntry filepath + return [lEntry, entry tp] -- | Construct a tar 'Entry' based on a local directory (but not its contents). -- @@ -118,14 +125,21 @@ packFileEntry filepath tarpath = do -- Directory ownership and detailed permissions are not preserved. -- packDirectoryEntry :: FilePath -- ^ Full path to find the file on the local disk - -> TarPath -- ^ Path to use for the tar Entry in the archive - -> IO Entry + -> These SplitError TarPath -- ^ Path to use for the tar Entry in the archive + -> IO [Entry] packDirectoryEntry filepath tarpath = do mtime <- getModTime filepath - return (directoryEntry tarpath) { + let dEntry tp = (directoryEntry tp) { entryTime = mtime } + case tarpath of + This e -> fail $ show e + That tp -> return [dEntry tp] + These _ tp -> do + let lEntry = longLinkEntry filepath + return [lEntry, dEntry tp] + -- | This is a utility function, much like 'getDirectoryContents'. The -- difference is that it includes the contents of subdirectories. -- diff --git a/Codec/Archive/Tar/Types.hs b/Codec/Archive/Tar/Types.hs index 30d6ab2..9707175 100644 --- a/Codec/Archive/Tar/Types.hs +++ b/Codec/Archive/Tar/Types.hs @@ -27,8 +27,11 @@ module Codec.Archive.Tar.Types ( DevMajor, DevMinor, Format(..), + These(..), + these, simpleEntry, + longLinkEntry, fileEntry, directoryEntry, @@ -37,6 +40,7 @@ module Codec.Archive.Tar.Types ( directoryPermissions, TarPath(..), + SplitError(..), toTarPath, fromTarPath, fromTarPathToPosixPath, @@ -84,7 +88,6 @@ import Control.Applicative ((<$>), (<*>), pure) import Data.Word (Word16) #endif - type FileSize = Int64 -- | The number of seconds since the UNIX epoch type EpochTime = Int64 @@ -235,6 +238,22 @@ fileEntry :: TarPath -> LBS.ByteString -> Entry fileEntry name fileContent = simpleEntry name (NormalFile fileContent (LBS.length fileContent)) + +-- | Gnu entry for when a filepath is too long to be in entryTarPath. +-- Gnu tar uses OtherEntryType 'L' then as the first Entry and puts path +-- in the entryContent. The Next entry will then be the "original" +-- entry with the entryTarPath truncated. +longLinkEntry :: FilePath -> Entry +longLinkEntry tarpath = Entry { + entryTarPath = TarPath (BS.Char8.pack "././@LongLink") BS.empty, + entryContent = OtherEntryType 'L' (LBS.fromStrict $ BS.Char8.pack tarpath) (fromIntegral $ length tarpath), + entryPermissions = ordinaryFilePermissions, + entryOwnership = Ownership "" "" 0 0, + entryTime = 0, + entryFormat = GnuFormat + } + + -- | A tar 'Entry' for a directory. -- -- Entry fields such as file permissions and ownership have default values. @@ -348,7 +367,7 @@ fromTarPathToWindowsPath (TarPath namebs prefixbs) = adjustDirectory $ -- toTarPath :: Bool -- ^ Is the path for a directory? This is needed because for -- directories a 'TarPath' must always use a trailing @\/@. - -> FilePath -> Either String TarPath + -> FilePath -> These SplitError TarPath toTarPath isDir = splitLongPath . addTrailingSep . FilePath.Posix.joinPath @@ -357,6 +376,11 @@ toTarPath isDir = splitLongPath addTrailingSep | isDir = FilePath.Posix.addTrailingPathSeparator | otherwise = id +data SplitError = FileNameEmpty + | FileNameTooLong + deriving Show + + -- | Take a sanitised path, split on directory separators and try to pack it -- into the 155 + 100 tar file name format. -- @@ -364,17 +388,17 @@ toTarPath isDir = splitLongPath -- and try to fit as many components into the 100 long name area as possible. -- If all the remaining components fit in the 155 name area then we win. -- -splitLongPath :: FilePath -> Either String TarPath +splitLongPath :: FilePath -> These SplitError TarPath splitLongPath path = case packName nameMax (reverse (FilePath.Posix.splitPath path)) of - Left err -> Left err - Right (name, []) -> Right $! TarPath (BS.Char8.pack name) - BS.empty + Left FileNameTooLong -> These FileNameTooLong $ TarPath (BS.Char8.pack $ take 100 path) BS.empty + Left e -> This e + Right (name, []) -> That $! TarPath (BS.Char8.pack name) BS.empty Right (name, first:rest) -> case packName prefixMax remainder of - Left err -> Left err - Right (_ , (_:_)) -> Left "File name too long (cannot split)" - Right (prefix, []) -> Right $! TarPath (BS.Char8.pack name) - (BS.Char8.pack prefix) + Left FileNameTooLong -> These FileNameTooLong $ TarPath (BS.Char8.pack $ take 100 path) BS.empty + Left e -> This e + Right (_ , (_:_)) -> These FileNameTooLong $ TarPath (BS.Char8.pack $ take 100 path) BS.empty + Right (prefix, []) -> That $! TarPath (BS.Char8.pack name) (BS.Char8.pack prefix) where -- drop the '/' between the name and prefix: remainder = init first : rest @@ -384,9 +408,9 @@ splitLongPath path = nameMax = 100 prefixMax = 155 - packName _ [] = Left "File name empty" + packName _ [] = Left FileNameEmpty packName maxLen (c:cs) - | n > maxLen = Left "File name too long" + | n > maxLen = Left FileNameTooLong | otherwise = Right (packName' maxLen n [c] cs) where n = length c @@ -544,6 +568,17 @@ instance NFData e => NFData (Entries e) where rnf (Fail e) = rnf e +data These a b = This a | That b | These a b + deriving (Eq, Ord, Read, Show) + + +-- | Case analysis for the 'These' type. +these :: (a -> c) -> (b -> c) -> (a -> b -> c) -> These a b -> c +these l _ _ (This a) = l a +these _ r _ (That x) = r x +these _ _ lr (These a x) = lr a x + + ------------------------- -- QuickCheck instances -- @@ -568,13 +603,13 @@ instance Arbitrary Entry where | perms' <- shrinkIntegral perms ] instance Arbitrary TarPath where - arbitrary = either error id + arbitrary = these (error . show) id (flip const) . toTarPath False . FilePath.Posix.joinPath <$> listOf1ToN (255 `div` 5) (elements (map (replicate 4) "abcd")) - shrink = map (either error id . toTarPath False) + shrink = map (these (error . show) id (flip const) . toTarPath False) . map FilePath.Posix.joinPath . filter (not . null) . shrinkList shrinkNothing diff --git a/Codec/Archive/Tar/Unpack.hs b/Codec/Archive/Tar/Unpack.hs index 6b82ac8..1a9775d 100644 --- a/Codec/Archive/Tar/Unpack.hs +++ b/Codec/Archive/Tar/Unpack.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE ViewPatterns #-} ----------------------------------------------------------------------------- -- | -- Module : Codec.Archive.Tar @@ -17,6 +18,8 @@ module Codec.Archive.Tar.Unpack ( import Codec.Archive.Tar.Types import Codec.Archive.Tar.Check +import Data.List (partition) +import qualified Data.ByteString.Char8 as Char8 import qualified Data.ByteString.Lazy as BS import System.FilePath ( () ) @@ -61,8 +64,12 @@ import System.IO.Error -- use 'checkSecurity' before 'checkTarbomb' or other checks. -- unpack :: Exception e => FilePath -> Entries e -> IO () -unpack baseDir entries = unpackEntries [] (checkSecurity entries) - >>= emulateLinks +unpack baseDir entries = do + uEntries <- unpackEntries [] (checkSecurity entries) + let (hardlinks, symlinks) = partition (\(_, _, x) -> x) uEntries + -- emulate hardlinks first, in case a symlink points to it + emulateLinks hardlinks + emulateLinks symlinks where -- We're relying here on 'checkSecurity' to make sure we're not scribbling @@ -71,16 +78,28 @@ unpack baseDir entries = unpackEntries [] (checkSecurity entries) unpackEntries _ (Fail err) = either throwIO throwIO err unpackEntries links Done = return links unpackEntries links (Next entry es) = case entryContent entry of - NormalFile file _ -> extractFile path file mtime + NormalFile file _ -> do + extractFile (entryPath entry) file (entryTime entry) + unpackEntries links es + Directory -> extractDir (entryPath entry) (entryTime entry) >> unpackEntries links es - Directory -> extractDir path mtime - >> unpackEntries links es - HardLink link -> (unpackEntries $! saveLink path link links) es - SymbolicLink link -> (unpackEntries $! saveLink path link links) es - _ -> unpackEntries links es --ignore other file types - where - path = entryPath entry - mtime = entryTime entry + HardLink link -> (unpackEntries $! saveLink True (entryPath entry) link links) es + SymbolicLink link -> (unpackEntries $! saveLink False (entryPath entry) link links) es + OtherEntryType 'L' (Char8.unpack . BS.toStrict -> fn) _ -> + case es of + (Next entry' es') -> case entryContent entry' of + NormalFile file _ -> do + extractFile fn file (entryTime entry') + unpackEntries links es' + Directory -> extractDir fn (entryTime entry') + >> unpackEntries links es' + HardLink link -> (unpackEntries $! saveLink True fn link links) es' + SymbolicLink link -> (unpackEntries $! saveLink False fn link links) es' + OtherEntryType 'L' _ _ -> throwIO $ userError "Two subsequent OtherEntryType 'L'" + _ -> unpackEntries links es' + (Fail err) -> either throwIO throwIO err + Done -> throwIO $ userError "././@LongLink without a subsequent entry" + _ -> unpackEntries links es --ignore other file types extractFile path content mtime = do -- Note that tar archives do not make sure each directory is created @@ -99,15 +118,17 @@ unpack baseDir entries = unpackEntries [] (checkSecurity entries) where absPath = baseDir path - saveLink path link links = seq (length path) - $ seq (length link') - $ (path, link'):links + saveLink isHardLink path link links = seq (length path) + $ seq (length link') + $ (path, link', isHardLink):links where link' = fromLinkTarget link - emulateLinks = mapM_ $ \(relPath, relLinkTarget) -> + emulateLinks = mapM_ $ \(relPath, relLinkTarget, isHardLink) -> let absPath = baseDir relPath - absTarget = FilePath.Native.takeDirectory absPath relLinkTarget - in copyFile absTarget absPath + -- hard links link targets are always "absolute" paths in + -- the context of the tar root + absTarget = if isHardLink then baseDir relLinkTarget else FilePath.Native.takeDirectory absPath relLinkTarget + in copyFile absTarget absPath setModTime :: FilePath -> EpochTime -> IO () setModTime path t =