diff --git a/Codec/Archive/Tar/Pack.hs b/Codec/Archive/Tar/Pack.hs index cc44cfa..4fd0b32 100644 --- a/Codec/Archive/Tar/Pack.hs +++ b/Codec/Archive/Tar/Pack.hs @@ -19,7 +19,7 @@ module Codec.Archive.Tar.Pack ( ) where import Codec.Archive.Tar.Types - +import Control.Monad (join) import qualified Data.ByteString.Lazy as BS import System.FilePath ( () ) @@ -72,8 +72,8 @@ preparePaths baseDir paths = packPaths :: FilePath -> [FilePath] -> IO [Entry] packPaths baseDir paths = - interleave - [ do tarpath <- either fail return (toTarPath isDir relpath) + fmap concat $ interleave + [ do let tarpath = toTarPath isDir relpath if isDir then packDirectoryEntry filepath tarpath else packFileEntry filepath tarpath | relpath <- paths @@ -98,19 +98,26 @@ interleave = unsafeInterleaveIO . go -- * The file contents is read lazily. -- packFileEntry :: FilePath -- ^ Full path to find the file on the local disk - -> TarPath -- ^ Path to use for the tar Entry in the archive - -> IO Entry + -> These SplitError TarPath -- ^ Path to use for the tar Entry in the archive + -> IO [Entry] packFileEntry filepath tarpath = do mtime <- getModTime filepath perms <- getPermissions filepath file <- openBinaryFile filepath ReadMode size <- hFileSize file content <- BS.hGetContents file - return (simpleEntry tarpath (NormalFile content (fromIntegral size))) { - entryPermissions = if executable perms then executableFilePermissions - else ordinaryFilePermissions, - entryTime = mtime - } + let entry tp = (simpleEntry tp (NormalFile content (fromIntegral size))) { + entryPermissions = if executable perms then executableFilePermissions + else ordinaryFilePermissions, + entryTime = mtime + } + + case tarpath of + This e -> fail $ show e + That tp -> return [entry tp] + These _ tp -> do + let lEntry = longLinkEntry filepath + return [lEntry, entry tp] -- | Construct a tar 'Entry' based on a local directory (but not its contents). -- @@ -118,14 +125,21 @@ packFileEntry filepath tarpath = do -- Directory ownership and detailed permissions are not preserved. -- packDirectoryEntry :: FilePath -- ^ Full path to find the file on the local disk - -> TarPath -- ^ Path to use for the tar Entry in the archive - -> IO Entry + -> These SplitError TarPath -- ^ Path to use for the tar Entry in the archive + -> IO [Entry] packDirectoryEntry filepath tarpath = do mtime <- getModTime filepath - return (directoryEntry tarpath) { + let dEntry tp = (directoryEntry tp) { entryTime = mtime } + case tarpath of + This e -> fail $ show e + That tp -> return [dEntry tp] + These _ tp -> do + let lEntry = longLinkEntry filepath + return [lEntry, dEntry tp] + -- | This is a utility function, much like 'getDirectoryContents'. The -- difference is that it includes the contents of subdirectories. -- diff --git a/Codec/Archive/Tar/Types.hs b/Codec/Archive/Tar/Types.hs index 260bbdb..5be3a04 100644 --- a/Codec/Archive/Tar/Types.hs +++ b/Codec/Archive/Tar/Types.hs @@ -27,8 +27,11 @@ module Codec.Archive.Tar.Types ( DevMajor, DevMinor, Format(..), + These(..), + these, simpleEntry, + longLinkEntry, fileEntry, directoryEntry, @@ -37,6 +40,7 @@ module Codec.Archive.Tar.Types ( directoryPermissions, TarPath(..), + SplitError(..), toTarPath, fromTarPath, fromTarPathToPosixPath, @@ -224,6 +228,22 @@ fileEntry :: TarPath -> LBS.ByteString -> Entry fileEntry name fileContent = simpleEntry name (NormalFile fileContent (LBS.length fileContent)) + +-- | Gnu entry for when a filepath is too long to be in entryTarPath. +-- Gnu tar uses OtherEntryType 'L' then as the first Entry and puts path +-- in the entryContent. The Next entry will then be the "original" +-- entry with the entryTarPath truncated. +longLinkEntry :: FilePath -> Entry +longLinkEntry tarpath = Entry { + entryTarPath = TarPath (BS.Char8.pack "././@LongLink") BS.empty, + entryContent = OtherEntryType 'L' (LBS.fromStrict $ BS.Char8.pack tarpath) (fromIntegral $ length tarpath), + entryPermissions = ordinaryFilePermissions, + entryOwnership = Ownership "" "" 0 0, + entryTime = 0, + entryFormat = GnuFormat + } + + -- | A tar 'Entry' for a directory. -- -- Entry fields such as file permissions and ownership have default values. @@ -337,7 +357,7 @@ fromTarPathToWindowsPath (TarPath namebs prefixbs) = adjustDirectory $ -- toTarPath :: Bool -- ^ Is the path for a directory? This is needed because for -- directories a 'TarPath' must always use a trailing @\/@. - -> FilePath -> Either String TarPath + -> FilePath -> These SplitError TarPath toTarPath isDir = splitLongPath . addTrailingSep . FilePath.Posix.joinPath @@ -346,6 +366,11 @@ toTarPath isDir = splitLongPath addTrailingSep | isDir = FilePath.Posix.addTrailingPathSeparator | otherwise = id +data SplitError = FileNameEmpty + | FileNameTooLong + deriving Show + + -- | Take a sanitised path, split on directory separators and try to pack it -- into the 155 + 100 tar file name format. -- @@ -353,18 +378,17 @@ toTarPath isDir = splitLongPath -- and try to fit as many components into the 100 long name area as possible. -- If all the remaining components fit in the 155 name area then we win. -- -splitLongPath :: FilePath -> Either String TarPath +splitLongPath :: FilePath -> These SplitError TarPath splitLongPath path = case packName nameMax (reverse (FilePath.Posix.splitPath path)) of - Left err -> Left err - Right (name, []) -> Right $! TarPath (BS.Char8.pack name) - BS.empty + Left FileNameTooLong -> These FileNameTooLong $ TarPath (BS.Char8.pack $ take 100 path) BS.empty + Left e -> This e + Right (name, []) -> That $! TarPath (BS.Char8.pack name) BS.empty Right (name, first:rest) -> case packName prefixMax remainder of - Left err -> Left err - Right (_ , (_:_)) -> Left $ "Filename " ++ path ++ - " too long (cannot split)" - Right (prefix, []) -> Right $! TarPath (BS.Char8.pack name) - (BS.Char8.pack prefix) + Left FileNameTooLong -> These FileNameTooLong $ TarPath (BS.Char8.pack $ take 100 path) BS.empty + Left e -> This e + Right (_ , (_:_)) -> These FileNameTooLong $ TarPath (BS.Char8.pack $ take 100 path) BS.empty + Right (prefix, []) -> That $! TarPath (BS.Char8.pack name) (BS.Char8.pack prefix) where -- drop the '/' between the name and prefix: remainder = init first : rest @@ -374,9 +398,9 @@ splitLongPath path = nameMax = 100 prefixMax = 155 - packName _ [] = Left "File name empty" + packName _ [] = Left FileNameEmpty packName maxLen (c:cs) - | n > maxLen = Left "File name too long" + | n > maxLen = Left FileNameTooLong | otherwise = Right (packName' maxLen n [c] cs) where n = length c @@ -529,3 +553,12 @@ instance NFData e => NFData (Entries e) where rnf (Next e es) = rnf e `seq` rnf es rnf Done = () rnf (Fail e) = rnf e + +data These a b = This a | That b | These a b + deriving (Eq, Ord, Read, Show) + +-- | Case analysis for the 'These' type. +these :: (a -> c) -> (b -> c) -> (a -> b -> c) -> These a b -> c +these l _ _ (This a) = l a +these _ r _ (That x) = r x +these _ _ lr (These a x) = lr a x diff --git a/Codec/Archive/Tar/Unpack.hs b/Codec/Archive/Tar/Unpack.hs index 4caa722..12bbea1 100644 --- a/Codec/Archive/Tar/Unpack.hs +++ b/Codec/Archive/Tar/Unpack.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE ViewPatterns #-} ----------------------------------------------------------------------------- -- | -- Module : Codec.Archive.Tar @@ -19,6 +20,8 @@ import Codec.Archive.Tar.Check import Data.Bits ( testBit ) +import Data.List (partition) +import qualified Data.ByteString.Char8 as Char8 import qualified Data.ByteString.Lazy as BS import System.FilePath ( () ) @@ -63,8 +66,12 @@ import System.IO.Error -- use 'checkSecurity' before 'checkTarbomb' or other checks. -- unpack :: Exception e => FilePath -> Entries e -> IO () -unpack baseDir entries = unpackEntries [] (checkSecurity entries) - >>= emulateLinks +unpack baseDir entries = do + uEntries <- unpackEntries [] (checkSecurity entries) + let (hardlinks, symlinks) = partition (\(_, _, x) -> x) uEntries + -- emulate hardlinks first, in case a symlink points to it + emulateLinks hardlinks + emulateLinks symlinks where -- We're relying here on 'checkSecurity' to make sure we're not scribbling @@ -73,16 +80,29 @@ unpack baseDir entries = unpackEntries [] (checkSecurity entries) unpackEntries _ (Fail err) = either throwIO throwIO err unpackEntries links Done = return links unpackEntries links (Next entry es) = case entryContent entry of - NormalFile file _ -> extractFile (entryPermissions entry) path file mtime - >> unpackEntries links es - Directory -> extractDir path mtime - >> unpackEntries links es - HardLink link -> (unpackEntries $! saveLink path link links) es - SymbolicLink link -> (unpackEntries $! saveLink path link links) es - _ -> unpackEntries links es --ignore other file types - where - path = entryPath entry - mtime = entryTime entry + NormalFile file _ -> do + extractFile (entryPermissions entry) (entryPath entry) file (entryTime entry) + unpackEntries links es + Directory -> do + extractDir (entryPath entry) (entryTime entry) + unpackEntries links es + HardLink link -> (unpackEntries $! saveLink True (entryPath entry) link links) es + SymbolicLink link -> (unpackEntries $! saveLink False (entryPath entry) link links) es + OtherEntryType 'L' (Char8.unpack . BS.toStrict -> fn) _ -> + case es of + (Next entry' es') -> case entryContent entry' of + NormalFile file _ -> do + extractFile (entryPermissions entry') fn file (entryTime entry') + unpackEntries links es' + Directory -> extractDir fn (entryTime entry') + >> unpackEntries links es' + HardLink link -> (unpackEntries $! saveLink True fn link links) es' + SymbolicLink link -> (unpackEntries $! saveLink False fn link links) es' + OtherEntryType 'L' _ _ -> throwIO $ userError "Two subsequent OtherEntryType 'L'" + _ -> unpackEntries links es' + (Fail err) -> either throwIO throwIO err + Done -> throwIO $ userError "././@LongLink without a subsequent entry" + _ -> unpackEntries links es --ignore other file types extractFile permissions path content mtime = do -- Note that tar archives do not make sure each directory is created @@ -102,15 +122,17 @@ unpack baseDir entries = unpackEntries [] (checkSecurity entries) where absPath = baseDir path - saveLink path link links = seq (length path) - $ seq (length link') - $ (path, link'):links + saveLink isHardLink path link links = seq (length path) + $ seq (length link') + $ (path, link', isHardLink):links where link' = fromLinkTarget link - emulateLinks = mapM_ $ \(relPath, relLinkTarget) -> + emulateLinks = mapM_ $ \(relPath, relLinkTarget, isHardLink) -> let absPath = baseDir relPath - absTarget = FilePath.Native.takeDirectory absPath relLinkTarget - in copyFile absTarget absPath + -- hard links link targets are always "absolute" paths in + -- the context of the tar root + absTarget = if isHardLink then baseDir relLinkTarget else FilePath.Native.takeDirectory absPath relLinkTarget + in copyFile absTarget absPath setModTime :: FilePath -> EpochTime -> IO () setModTime path t =