Skip to content

Commit

Permalink
Add support for over-long filepaths via GNU extension
Browse files Browse the repository at this point in the history
Fixes #49
  • Loading branch information
hasufell authored and Bodigrim committed Nov 15, 2023
1 parent 242a760 commit cb6d4ef
Show file tree
Hide file tree
Showing 5 changed files with 116 additions and 47 deletions.
40 changes: 27 additions & 13 deletions Codec/Archive/Tar/Pack.hs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ module Codec.Archive.Tar.Pack (
) where

import Codec.Archive.Tar.Types

import Control.Monad (join)
import qualified Data.ByteString.Lazy as BS
import System.FilePath
( (</>) )
Expand Down Expand Up @@ -72,8 +72,8 @@ preparePaths baseDir paths =

packPaths :: FilePath -> [FilePath] -> IO [Entry]
packPaths baseDir paths =
interleave
[ do tarpath <- either fail return (toTarPath isDir relpath)
fmap concat $ interleave
[ do let tarpath = toTarPath isDir relpath
if isDir then packDirectoryEntry filepath tarpath
else packFileEntry filepath tarpath
| relpath <- paths
Expand All @@ -98,34 +98,48 @@ interleave = unsafeInterleaveIO . go
-- * The file contents is read lazily.
--
packFileEntry :: FilePath -- ^ Full path to find the file on the local disk
-> TarPath -- ^ Path to use for the tar Entry in the archive
-> IO Entry
-> These SplitError TarPath -- ^ Path to use for the tar Entry in the archive
-> IO [Entry]
packFileEntry filepath tarpath = do
mtime <- getModTime filepath
perms <- getPermissions filepath
file <- openBinaryFile filepath ReadMode
size <- hFileSize file
content <- BS.hGetContents file
return (simpleEntry tarpath (NormalFile content (fromIntegral size))) {
entryPermissions = if executable perms then executableFilePermissions
else ordinaryFilePermissions,
entryTime = mtime
}
let entry tp = (simpleEntry tp (NormalFile content (fromIntegral size))) {
entryPermissions = if executable perms then executableFilePermissions
else ordinaryFilePermissions,
entryTime = mtime
}

case tarpath of
This e -> fail $ show e
That tp -> return [entry tp]
These _ tp -> do
let lEntry = longLinkEntry filepath
return [lEntry, entry tp]

-- | Construct a tar 'Entry' based on a local directory (but not its contents).
--
-- The only attribute of the directory that is used is its modification time.
-- Directory ownership and detailed permissions are not preserved.
--
packDirectoryEntry :: FilePath -- ^ Full path to find the file on the local disk
-> TarPath -- ^ Path to use for the tar Entry in the archive
-> IO Entry
-> These SplitError TarPath -- ^ Path to use for the tar Entry in the archive
-> IO [Entry]
packDirectoryEntry filepath tarpath = do
mtime <- getModTime filepath
return (directoryEntry tarpath) {
let dEntry tp = (directoryEntry tp) {
entryTime = mtime
}

case tarpath of
This e -> fail $ show e
That tp -> return [dEntry tp]
These _ tp -> do
let lEntry = longLinkEntry filepath
return [lEntry, dEntry tp]

-- | This is a utility function, much like 'getDirectoryContents'. The
-- difference is that it includes the contents of subdirectories.
--
Expand Down
57 changes: 45 additions & 12 deletions Codec/Archive/Tar/Types.hs
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,11 @@ module Codec.Archive.Tar.Types (
DevMajor,
DevMinor,
Format(..),
These(..),
these,

simpleEntry,
longLinkEntry,
fileEntry,
directoryEntry,

Expand All @@ -37,6 +40,7 @@ module Codec.Archive.Tar.Types (
directoryPermissions,

TarPath(..),
SplitError(..),
toTarPath,
fromTarPath,
fromTarPathToPosixPath,
Expand Down Expand Up @@ -224,6 +228,22 @@ fileEntry :: TarPath -> LBS.ByteString -> Entry
fileEntry name fileContent =
simpleEntry name (NormalFile fileContent (LBS.length fileContent))


-- | Gnu entry for when a filepath is too long to be in entryTarPath.
-- Gnu tar uses OtherEntryType 'L' then as the first Entry and puts path
-- in the entryContent. The Next entry will then be the "original"
-- entry with the entryTarPath truncated.
longLinkEntry :: FilePath -> Entry
longLinkEntry tarpath = Entry {
entryTarPath = TarPath (BS.Char8.pack "././@LongLink") BS.empty,
entryContent = OtherEntryType 'L' (LBS.fromStrict $ BS.Char8.pack tarpath) (fromIntegral $ length tarpath),
entryPermissions = ordinaryFilePermissions,
entryOwnership = Ownership "" "" 0 0,
entryTime = 0,
entryFormat = GnuFormat
}


-- | A tar 'Entry' for a directory.
--
-- Entry fields such as file permissions and ownership have default values.
Expand Down Expand Up @@ -337,7 +357,7 @@ fromTarPathToWindowsPath (TarPath namebs prefixbs) = adjustDirectory $
--
toTarPath :: Bool -- ^ Is the path for a directory? This is needed because for
-- directories a 'TarPath' must always use a trailing @\/@.
-> FilePath -> Either String TarPath
-> FilePath -> These SplitError TarPath
toTarPath isDir = splitLongPath
. addTrailingSep
. FilePath.Posix.joinPath
Expand All @@ -346,25 +366,29 @@ toTarPath isDir = splitLongPath
addTrailingSep | isDir = FilePath.Posix.addTrailingPathSeparator
| otherwise = id

data SplitError = FileNameEmpty
| FileNameTooLong
deriving Show


-- | Take a sanitised path, split on directory separators and try to pack it
-- into the 155 + 100 tar file name format.
--
-- The strategy is this: take the name-directory components in reverse order
-- and try to fit as many components into the 100 long name area as possible.
-- If all the remaining components fit in the 155 name area then we win.
--
splitLongPath :: FilePath -> Either String TarPath
splitLongPath :: FilePath -> These SplitError TarPath
splitLongPath path =
case packName nameMax (reverse (FilePath.Posix.splitPath path)) of
Left err -> Left err
Right (name, []) -> Right $! TarPath (BS.Char8.pack name)
BS.empty
Left FileNameTooLong -> These FileNameTooLong $ TarPath (BS.Char8.pack $ take 100 path) BS.empty
Left e -> This e
Right (name, []) -> That $! TarPath (BS.Char8.pack name) BS.empty
Right (name, first:rest) -> case packName prefixMax remainder of
Left err -> Left err
Right (_ , (_:_)) -> Left $ "Filename " ++ path ++
" too long (cannot split)"
Right (prefix, []) -> Right $! TarPath (BS.Char8.pack name)
(BS.Char8.pack prefix)
Left FileNameTooLong -> These FileNameTooLong $ TarPath (BS.Char8.pack $ take 100 path) BS.empty
Left e -> This e
Right (_ , (_:_)) -> These FileNameTooLong $ TarPath (BS.Char8.pack $ take 100 path) BS.empty
Right (prefix, []) -> That $! TarPath (BS.Char8.pack name) (BS.Char8.pack prefix)
where
-- drop the '/' between the name and prefix:
remainder = init first : rest
Expand All @@ -374,9 +398,9 @@ splitLongPath path =
nameMax = 100
prefixMax = 155

packName _ [] = Left "File name empty"
packName _ [] = Left FileNameEmpty
packName maxLen (c:cs)
| n > maxLen = Left "File name too long"
| n > maxLen = Left FileNameTooLong
| otherwise = Right (packName' maxLen n [c] cs)
where n = length c

Expand Down Expand Up @@ -529,3 +553,12 @@ instance NFData e => NFData (Entries e) where
rnf (Next e es) = rnf e `seq` rnf es
rnf Done = ()
rnf (Fail e) = rnf e

data These a b = This a | That b | These a b
deriving (Eq, Ord, Read, Show)

-- | Case analysis for the 'These' type.
these :: (a -> c) -> (b -> c) -> (a -> b -> c) -> These a b -> c
these l _ _ (This a) = l a
these _ r _ (That x) = r x
these _ _ lr (These a x) = lr a x
58 changes: 40 additions & 18 deletions Codec/Archive/Tar/Unpack.hs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
{-# LANGUAGE ViewPatterns #-}
-----------------------------------------------------------------------------
-- |
-- Module : Codec.Archive.Tar
Expand All @@ -19,6 +20,8 @@ import Codec.Archive.Tar.Check

import Data.Bits
( testBit )
import Data.List (partition)
import qualified Data.ByteString.Char8 as Char8
import qualified Data.ByteString.Lazy as BS
import System.FilePath
( (</>) )
Expand Down Expand Up @@ -63,8 +66,12 @@ import System.IO.Error
-- use 'checkSecurity' before 'checkTarbomb' or other checks.
--
unpack :: Exception e => FilePath -> Entries e -> IO ()
unpack baseDir entries = unpackEntries [] (checkSecurity entries)
>>= emulateLinks
unpack baseDir entries = do
uEntries <- unpackEntries [] (checkSecurity entries)
let (hardlinks, symlinks) = partition (\(_, _, x) -> x) uEntries
-- emulate hardlinks first, in case a symlink points to it
emulateLinks hardlinks
emulateLinks symlinks

where
-- We're relying here on 'checkSecurity' to make sure we're not scribbling
Expand All @@ -73,16 +80,29 @@ unpack baseDir entries = unpackEntries [] (checkSecurity entries)
unpackEntries _ (Fail err) = either throwIO throwIO err
unpackEntries links Done = return links
unpackEntries links (Next entry es) = case entryContent entry of
NormalFile file _ -> extractFile (entryPermissions entry) path file mtime
>> unpackEntries links es
Directory -> extractDir path mtime
>> unpackEntries links es
HardLink link -> (unpackEntries $! saveLink path link links) es
SymbolicLink link -> (unpackEntries $! saveLink path link links) es
_ -> unpackEntries links es --ignore other file types
where
path = entryPath entry
mtime = entryTime entry
NormalFile file _ -> do
extractFile (entryPermissions entry) (entryPath entry) file (entryTime entry)
unpackEntries links es
Directory -> do
extractDir (entryPath entry) (entryTime entry)
unpackEntries links es
HardLink link -> (unpackEntries $! saveLink True (entryPath entry) link links) es
SymbolicLink link -> (unpackEntries $! saveLink False (entryPath entry) link links) es
OtherEntryType 'L' (Char8.unpack . BS.toStrict -> fn) _ ->
case es of
(Next entry' es') -> case entryContent entry' of
NormalFile file _ -> do
extractFile (entryPermissions entry') fn file (entryTime entry')
unpackEntries links es'
Directory -> extractDir fn (entryTime entry')
>> unpackEntries links es'
HardLink link -> (unpackEntries $! saveLink True fn link links) es'
SymbolicLink link -> (unpackEntries $! saveLink False fn link links) es'
OtherEntryType 'L' _ _ -> throwIO $ userError "Two subsequent OtherEntryType 'L'"
_ -> unpackEntries links es'
(Fail err) -> either throwIO throwIO err
Done -> throwIO $ userError "././@LongLink without a subsequent entry"
_ -> unpackEntries links es --ignore other file types

extractFile permissions path content mtime = do
-- Note that tar archives do not make sure each directory is created
Expand All @@ -102,15 +122,17 @@ unpack baseDir entries = unpackEntries [] (checkSecurity entries)
where
absPath = baseDir </> path

saveLink path link links = seq (length path)
$ seq (length link')
$ (path, link'):links
saveLink isHardLink path link links = seq (length path)
$ seq (length link')
$ (path, link', isHardLink):links
where link' = fromLinkTarget link

emulateLinks = mapM_ $ \(relPath, relLinkTarget) ->
emulateLinks = mapM_ $ \(relPath, relLinkTarget, isHardLink) ->
let absPath = baseDir </> relPath
absTarget = FilePath.Native.takeDirectory absPath </> relLinkTarget
in copyFile absTarget absPath
-- hard links link targets are always "absolute" paths in
-- the context of the tar root
absTarget = if isHardLink then baseDir </> relLinkTarget else FilePath.Native.takeDirectory absPath </> relLinkTarget
in copyFile absTarget absPath

setModTime :: FilePath -> EpochTime -> IO ()
setModTime path t =
Expand Down
4 changes: 2 additions & 2 deletions test/Codec/Archive/Tar/Index/Tests.hs
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ example1 =
testEntry :: FilePath -> Int64 -> Entry
testEntry name size = simpleEntry path (NormalFile mempty size)
where
Right path = toTarPath False name
That path = toTarPath False name

-- | Simple tar archive containing regular files only
data SimpleTarArchive = SimpleTarArchive {
Expand Down Expand Up @@ -223,7 +223,7 @@ instance Arbitrary SimpleTarArchive where
mkList [] = []
mkList ((fp, bs):es) = entry : mkList es
where
Right path = toTarPath False fp
That path = toTarPath False fp
entry = simpleEntry path content
content = NormalFile bs (LBS.length bs)

Expand Down
4 changes: 2 additions & 2 deletions test/Codec/Archive/Tar/Types/Tests.hs
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,13 @@ instance Arbitrary Entry where
| perms' <- shrinkIntegral perms ]

instance Arbitrary TarPath where
arbitrary = either error id
arbitrary = these (error . show) id (flip const)
. toTarPath False
. FilePath.Posix.joinPath
<$> listOf1ToN (255 `div` 5)
(elements (map (replicate 4) "abcd"))

shrink = map (either error id . toTarPath False)
shrink = map (these (error . show) id (flip const) . toTarPath False)
. map FilePath.Posix.joinPath
. filter (not . null)
. shrinkList shrinkNothing
Expand Down

0 comments on commit cb6d4ef

Please sign in to comment.