Skip to content

Commit

Permalink
Check that filenames are ASCII instead of silent corruption (see #6)
Browse files Browse the repository at this point in the history
  • Loading branch information
Bodigrim committed Nov 18, 2023
1 parent 0ed8145 commit 423e6af
Show file tree
Hide file tree
Showing 5 changed files with 24 additions and 5 deletions.
3 changes: 2 additions & 1 deletion Codec/Archive/Tar/Index/Internal.hs
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ import qualified Codec.Archive.Tar.Index.StringTable as StringTable
import Codec.Archive.Tar.Index.StringTable (StringTable, StringTableBuilder)
import qualified Codec.Archive.Tar.Index.IntTrie as IntTrie
import Codec.Archive.Tar.Index.IntTrie (IntTrie, IntTrieBuilder)
import Codec.Archive.Tar.PackAscii

import qualified System.FilePath.Posix as FilePath
import Data.Monoid (Monoid(..))
Expand Down Expand Up @@ -174,7 +175,7 @@ toComponentIds table =
lookupComponents []
. filter (/= BS.Char8.singleton '.')
. splitDirectories
. BS.Char8.pack
. packAscii
where
lookupComponents cs' [] = Just (reverse cs')
lookupComponents cs' (c:cs) = case StringTable.lookup table c of
Expand Down
14 changes: 14 additions & 0 deletions Codec/Archive/Tar/PackAscii.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
module Codec.Archive.Tar.PackAscii
( packAscii
) where

import qualified Data.ByteString.Char8 as BS.Char8
import Data.Char
import GHC.Stack

-- | We should really migrate to 'OsPath' from 'filepath',
-- but for now let's not corrupt data silently.
packAscii :: HasCallStack => FilePath -> BS.Char8.ByteString
packAscii xs
| all isAscii xs = BS.Char8.pack xs
| otherwise = error $ "packAscii: only ASCII filenames are supported, but got " ++ xs
10 changes: 6 additions & 4 deletions Codec/Archive/Tar/Types.hs
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,8 @@ import qualified System.FilePath.Windows as FilePath.Windows
import System.Posix.Types
( FileMode )

import Codec.Archive.Tar.PackAscii

type FileSize = Int64
-- | The number of seconds since the UNIX epoch
type EpochTime = Int64
Expand Down Expand Up @@ -357,14 +359,14 @@ splitLongPath :: FilePath -> Either String TarPath
splitLongPath path =
case packName nameMax (reverse (FilePath.Posix.splitPath path)) of
Left err -> Left err
Right (name, []) -> Right $! TarPath (BS.Char8.pack name)
Right (name, []) -> Right $! TarPath (packAscii name)
BS.empty
Right (name, first:rest) -> case packName prefixMax remainder of
Left err -> Left err
Right (_ , (_:_)) -> Left $ "Filename " ++ path ++
" too long (cannot split)"
Right (prefix, []) -> Right $! TarPath (BS.Char8.pack name)
(BS.Char8.pack prefix)
Right (prefix, []) -> Right $! TarPath (packAscii name)
(packAscii prefix)
where
-- drop the '/' between the name and prefix:
remainder = init first : rest
Expand Down Expand Up @@ -399,7 +401,7 @@ instance NFData LinkTarget where
-- characters.
--
toLinkTarget :: FilePath -> Maybe LinkTarget
toLinkTarget path | length path <= 100 = Just $! LinkTarget (BS.Char8.pack path)
toLinkTarget path | length path <= 100 = Just $! LinkTarget (packAscii path)
| otherwise = Nothing

-- | Convert a tar 'LinkTarget' to a native 'FilePath'.
Expand Down
1 change: 1 addition & 0 deletions changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ See also http://pvp.haskell.org/faq
* Speed up `fromTarPathToPosixPath`
* Set permissions on extracted files
* Handle > 8 GB files
* Prohibit non-ASCII file names instead of silent corruption

0.5.1.1 Herbert Valerio Riedel <[email protected]> August 2019

Expand Down
1 change: 1 addition & 0 deletions tar.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ library tar-internal
Codec.Archive.Tar.Read
Codec.Archive.Tar.Write
Codec.Archive.Tar.Pack
Codec.Archive.Tar.PackAscii
Codec.Archive.Tar.Unpack
Codec.Archive.Tar.Index.StringTable
Codec.Archive.Tar.Index.IntTrie
Expand Down

0 comments on commit 423e6af

Please sign in to comment.