Skip to content

Commit

Permalink
Add support for over-long filepaths via GNU extension
Browse files Browse the repository at this point in the history
  • Loading branch information
hasufell committed Feb 21, 2020
1 parent bde33fe commit 452f667
Show file tree
Hide file tree
Showing 5 changed files with 90 additions and 33 deletions.
5 changes: 3 additions & 2 deletions Codec/Archive/Tar/Index.hs
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ import Data.ByteString.Lazy.Builder.Extras as BS (toLazyByteStringWith,
#endif

#ifdef TESTS
import Data.These
import qualified Prelude
import Test.QuickCheck
import Test.QuickCheck.Property (ioProperty)
Expand Down Expand Up @@ -710,7 +711,7 @@ example1 =
testEntry :: FilePath -> Int64 -> Entry
testEntry name size = simpleEntry path (NormalFile mempty size)
where
Right path = toTarPath False name
That path = toTarPath False name

-- | Simple tar archive containing regular files only
data SimpleTarArchive = SimpleTarArchive {
Expand Down Expand Up @@ -775,7 +776,7 @@ instance Arbitrary SimpleTarArchive where
mkList [] = []
mkList ((fp, bs):es) = entry : mkList es
where
Right path = toTarPath False fp
That path = toTarPath False fp
entry = simpleEntry path content
content = NormalFile bs (LBS.length bs)

Expand Down
41 changes: 28 additions & 13 deletions Codec/Archive/Tar/Pack.hs
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,9 @@ module Codec.Archive.Tar.Pack (
) where

import Codec.Archive.Tar.Types

import Control.Monad (join)
import qualified Data.ByteString.Lazy as BS
import Data.These
import System.FilePath
( (</>) )
import qualified System.FilePath as FilePath.Native
Expand Down Expand Up @@ -79,8 +80,8 @@ preparePaths baseDir paths =

packPaths :: FilePath -> [FilePath] -> IO [Entry]
packPaths baseDir paths =
interleave
[ do tarpath <- either fail return (toTarPath isDir relpath)
fmap concat $ interleave
[ do let tarpath = toTarPath isDir relpath
if isDir then packDirectoryEntry filepath tarpath
else packFileEntry filepath tarpath
| relpath <- paths
Expand All @@ -105,34 +106,48 @@ interleave = unsafeInterleaveIO . go
-- * The file contents is read lazily.
--
packFileEntry :: FilePath -- ^ Full path to find the file on the local disk
-> TarPath -- ^ Path to use for the tar Entry in the archive
-> IO Entry
-> These SplitError TarPath -- ^ Path to use for the tar Entry in the archive
-> IO [Entry]
packFileEntry filepath tarpath = do
mtime <- getModTime filepath
perms <- getPermissions filepath
file <- openBinaryFile filepath ReadMode
size <- hFileSize file
content <- BS.hGetContents file
return (simpleEntry tarpath (NormalFile content (fromIntegral size))) {
entryPermissions = if executable perms then executableFilePermissions
else ordinaryFilePermissions,
entryTime = mtime
}
let entry tp = (simpleEntry tp (NormalFile content (fromIntegral size))) {
entryPermissions = if executable perms then executableFilePermissions
else ordinaryFilePermissions,
entryTime = mtime
}

case tarpath of
This e -> fail $ show e
That tp -> return [entry tp]
These _ tp -> do
let lEntry = longLinkEntry filepath
return [lEntry, entry tp]

-- | Construct a tar 'Entry' based on a local directory (but not its contents).
--
-- The only attribute of the directory that is used is its modification time.
-- Directory ownership and detailed permissions are not preserved.
--
packDirectoryEntry :: FilePath -- ^ Full path to find the file on the local disk
-> TarPath -- ^ Path to use for the tar Entry in the archive
-> IO Entry
-> These SplitError TarPath -- ^ Path to use for the tar Entry in the archive
-> IO [Entry]
packDirectoryEntry filepath tarpath = do
mtime <- getModTime filepath
return (directoryEntry tarpath) {
let dEntry tp = (directoryEntry tp) {
entryTime = mtime
}

case tarpath of
This e -> fail $ show e
That tp -> return [dEntry tp]
These _ tp -> do
let lEntry = longLinkEntry filepath
return [lEntry, dEntry tp]

-- | This is a utility function, much like 'getDirectoryContents'. The
-- difference is that it includes the contents of subdirectories.
--
Expand Down
51 changes: 37 additions & 14 deletions Codec/Archive/Tar/Types.hs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ module Codec.Archive.Tar.Types (
Format(..),

simpleEntry,
longLinkEntry,
fileEntry,
directoryEntry,

Expand All @@ -37,6 +38,7 @@ module Codec.Archive.Tar.Types (
directoryPermissions,

TarPath(..),
SplitError(..),
toTarPath,
fromTarPath,
fromTarPathToPosixPath,
Expand All @@ -63,6 +65,7 @@ module Codec.Archive.Tar.Types (
import Data.Int (Int64)
import Data.Monoid (Monoid(..))
import Data.Semigroup as Sem
import Data.These
import qualified Data.ByteString as BS
import qualified Data.ByteString.Char8 as BS.Char8
import qualified Data.ByteString.Lazy as LBS
Expand All @@ -84,7 +87,6 @@ import Control.Applicative ((<$>), (<*>), pure)
import Data.Word (Word16)
#endif


type FileSize = Int64
-- | The number of seconds since the UNIX epoch
type EpochTime = Int64
Expand Down Expand Up @@ -241,6 +243,22 @@ fileEntry :: TarPath -> LBS.ByteString -> Entry
fileEntry name fileContent =
simpleEntry name (NormalFile fileContent (LBS.length fileContent))


-- | Gnu entry for when a filepath is too long to be in entryTarPath.
-- Gnu tar uses OtherEntryType 'L' then as the first Entry and puts path
-- in the entryContent. The Next entry will then be the "original"
-- entry with the entryTarPath truncated.
longLinkEntry :: FilePath -> Entry
longLinkEntry tarpath = Entry {
entryTarPath = TarPath (BS.Char8.pack "././@LongLink") BS.empty,
entryContent = OtherEntryType 'L' (LBS.fromStrict $ BS.Char8.pack tarpath) (fromIntegral $ length tarpath),
entryPermissions = ordinaryFilePermissions,
entryOwnership = Ownership "" "" 0 0,
entryTime = 0,
entryFormat = GnuFormat
}


-- | A tar 'Entry' for a directory.
--
-- Entry fields such as file permissions and ownership have default values.
Expand Down Expand Up @@ -354,7 +372,7 @@ fromTarPathToWindowsPath (TarPath namebs prefixbs) = adjustDirectory $
--
toTarPath :: Bool -- ^ Is the path for a directory? This is needed because for
-- directories a 'TarPath' must always use a trailing @\/@.
-> FilePath -> Either String TarPath
-> FilePath -> These SplitError TarPath
toTarPath isDir = splitLongPath
. addTrailingSep
. FilePath.Posix.joinPath
Expand All @@ -363,24 +381,29 @@ toTarPath isDir = splitLongPath
addTrailingSep | isDir = FilePath.Posix.addTrailingPathSeparator
| otherwise = id

data SplitError = FileNameEmpty
| FileNameTooLong
deriving Show


-- | Take a sanitised path, split on directory separators and try to pack it
-- into the 155 + 100 tar file name format.
--
-- The strategy is this: take the name-directory components in reverse order
-- and try to fit as many components into the 100 long name area as possible.
-- If all the remaining components fit in the 155 name area then we win.
--
splitLongPath :: FilePath -> Either String TarPath
splitLongPath :: FilePath -> These SplitError TarPath
splitLongPath path =
case packName nameMax (reverse (FilePath.Posix.splitPath path)) of
Left err -> Left err
Right (name, []) -> Right $! TarPath (BS.Char8.pack name)
BS.empty
Left FileNameTooLong -> These FileNameTooLong $ TarPath (BS.Char8.pack $ take 100 path) BS.empty
Left e -> This e
Right (name, []) -> That $! TarPath (BS.Char8.pack name) BS.empty
Right (name, first:rest) -> case packName prefixMax remainder of
Left err -> Left err
Right (_ , (_:_)) -> Left "File name too long (cannot split)"
Right (prefix, []) -> Right $! TarPath (BS.Char8.pack name)
(BS.Char8.pack prefix)
Left FileNameTooLong -> These FileNameTooLong $ TarPath (BS.Char8.pack $ take 100 path) BS.empty
Left e -> This e
Right (_ , (_:_)) -> These FileNameTooLong $ TarPath (BS.Char8.pack $ take 100 path) BS.empty
Right (prefix, []) -> That $! TarPath (BS.Char8.pack name) (BS.Char8.pack prefix)
where
-- drop the '/' between the name and prefix:
remainder = init first : rest
Expand All @@ -390,9 +413,9 @@ splitLongPath path =
nameMax = 100
prefixMax = 155

packName _ [] = Left "File name empty"
packName _ [] = Left FileNameEmpty
packName maxLen (c:cs)
| n > maxLen = Left "File name too long"
| n > maxLen = Left FileNameTooLong
| otherwise = Right (packName' maxLen n [c] cs)
where n = length c

Expand Down Expand Up @@ -578,13 +601,13 @@ instance Arbitrary Entry where
| perms' <- shrinkIntegral perms ]

instance Arbitrary TarPath where
arbitrary = either error id
arbitrary = these (error . show) id (flip const)
. toTarPath False
. FilePath.Posix.joinPath
<$> listOf1ToN (255 `div` 5)
(elements (map (replicate 4) "abcd"))

shrink = map (either error id . toTarPath False)
shrink = map (these (error . show) id (flip const) . toTarPath False)
. map FilePath.Posix.joinPath
. filter (not . null)
. shrinkList shrinkNothing
Expand Down
17 changes: 16 additions & 1 deletion Codec/Archive/Tar/Unpack.hs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ module Codec.Archive.Tar.Unpack (
import Codec.Archive.Tar.Types
import Codec.Archive.Tar.Check

import qualified Data.ByteString.Char8 as BS.Char8
import qualified Data.ByteString.Lazy as BS
import System.FilePath
( (</>) )
Expand Down Expand Up @@ -80,7 +81,21 @@ unpack baseDir entries = unpackEntries [] (checkSecurity entries)
>> unpackEntries links es
HardLink link -> (unpackEntries $! saveLink path link links) es
SymbolicLink link -> (unpackEntries $! saveLink path link links) es
_ -> unpackEntries links es --ignore other file types
OtherEntryType 'L' fn _ ->
case es of
(Next entry' es') -> case entryContent entry' of
NormalFile file _ -> extractFile (BS.Char8.unpack . BS.toStrict $ fn) file mtime
>> unpackEntries links es'
Directory -> extractDir (BS.Char8.unpack . BS.toStrict $ fn) mtime
>> unpackEntries links es'
HardLink link -> (unpackEntries $! saveLink path link links) es'
SymbolicLink link -> (unpackEntries $! saveLink path link links) es'
OtherEntryType 'L' _ _ -> throwIO $ userError "Two subsequent OtherEntryType 'L'"
_ -> unpackEntries links es'
(Fail err) -> either throwIO throwIO err
Done -> throwIO $ userError "././@LongLink without a subsequent entry"
_ -> unpackEntries links es --ignore other file types

where
path = entryPath entry
mtime = entryTime entry
Expand Down
9 changes: 6 additions & 3 deletions tar.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@ library
filepath < 1.5,
array < 0.6,
containers >= 0.2 && < 0.6,
deepseq >= 1.1 && < 1.5
deepseq >= 1.1 && < 1.5,
these >= 1.0.1

if flag(old-time)
build-depends: directory < 1.2, old-time < 1.2
Expand Down Expand Up @@ -92,7 +93,8 @@ test-suite properties
bytestring-handle,
QuickCheck == 2.*,
tasty >= 0.10 && <0.12,
tasty-quickcheck == 0.8.*
tasty-quickcheck == 0.8.*,
these >= 1.0.1

if flag(old-time)
build-depends: directory < 1.2, old-time
Expand Down Expand Up @@ -147,7 +149,8 @@ benchmark bench
containers,
deepseq,
time,
criterion >= 1.0
criterion >= 1.0,
these >= 1.0.1

if !impl(ghc >= 8.0)
build-depends: semigroups == 0.18.*
Expand Down

0 comments on commit 452f667

Please sign in to comment.