-
Notifications
You must be signed in to change notification settings - Fork 37
/
Types.hs
530 lines (466 loc) · 18.7 KB
/
Types.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
{-# LANGUAGE CPP, GeneralizedNewtypeDeriving, BangPatterns, DeriveTraversable #-}
-----------------------------------------------------------------------------
-- |
-- Module : Codec.Archive.Tar.Types
-- Copyright : (c) 2007 Bjorn Bringert,
-- 2008 Andrea Vezzosi,
-- 2008-2009 Duncan Coutts
-- 2011 Max Bolingbroke
-- License : BSD3
--
-- Maintainer : [email protected]
-- Portability : portable
--
-- Types to represent the content of @.tar@ archives.
--
-----------------------------------------------------------------------------
module Codec.Archive.Tar.Types (
Entry(..),
entryPath,
EntryContent(..),
FileSize,
Permissions,
Ownership(..),
EpochTime,
TypeCode,
DevMajor,
DevMinor,
Format(..),
simpleEntry,
fileEntry,
directoryEntry,
ordinaryFilePermissions,
executableFilePermissions,
directoryPermissions,
TarPath(..),
toTarPath,
fromTarPath,
fromTarPathToPosixPath,
fromTarPathToWindowsPath,
LinkTarget(..),
toLinkTarget,
fromLinkTarget,
fromLinkTargetToPosixPath,
fromLinkTargetToWindowsPath,
Entries(..),
mapEntries,
mapEntriesNoFail,
foldEntries,
foldlEntries,
unfoldEntries,
) where
import Data.Int (Int64)
import Data.Monoid (Monoid(..))
import Data.Semigroup as Sem
import qualified Data.ByteString as BS
import qualified Data.ByteString.Char8 as BS.Char8
import qualified Data.ByteString.Lazy as LBS
import Control.DeepSeq
import qualified System.FilePath as FilePath.Native
( joinPath, splitDirectories, addTrailingPathSeparator )
import qualified System.FilePath.Posix as FilePath.Posix
( joinPath, splitPath, splitDirectories, hasTrailingPathSeparator
, addTrailingPathSeparator )
import qualified System.FilePath.Windows as FilePath.Windows
( joinPath, addTrailingPathSeparator )
import System.Posix.Types
( FileMode )
type FileSize = Int64
-- | The number of seconds since the UNIX epoch
type EpochTime = Int64
type DevMajor = Int
type DevMinor = Int
type TypeCode = Char
type Permissions = FileMode
-- | Tar archive entry.
--
data Entry = Entry {
-- | The path of the file or directory within the archive. This is in a
-- tar-specific form. Use 'entryPath' to get a native 'FilePath'.
entryTarPath :: {-# UNPACK #-} !TarPath,
-- | The real content of the entry. For 'NormalFile' this includes the
-- file data. An entry usually contains a 'NormalFile' or a 'Directory'.
entryContent :: !EntryContent,
-- | File permissions (Unix style file mode).
entryPermissions :: {-# UNPACK #-} !Permissions,
-- | The user and group to which this file belongs.
entryOwnership :: {-# UNPACK #-} !Ownership,
-- | The time the file was last modified.
entryTime :: {-# UNPACK #-} !EpochTime,
-- | The tar format the archive is using.
entryFormat :: !Format
}
deriving (Eq, Show)
-- | Native 'FilePath' of the file or directory within the archive.
--
entryPath :: Entry -> FilePath
entryPath = fromTarPath . entryTarPath
-- | The content of a tar archive entry, which depends on the type of entry.
--
-- Portable archives should contain only 'NormalFile' and 'Directory'.
--
data EntryContent = NormalFile LBS.ByteString {-# UNPACK #-} !FileSize
| Directory
| SymbolicLink !LinkTarget
| HardLink !LinkTarget
| CharacterDevice {-# UNPACK #-} !DevMajor
{-# UNPACK #-} !DevMinor
| BlockDevice {-# UNPACK #-} !DevMajor
{-# UNPACK #-} !DevMinor
| NamedPipe
| OtherEntryType {-# UNPACK #-} !TypeCode LBS.ByteString
{-# UNPACK #-} !FileSize
deriving (Eq, Ord, Show)
data Ownership = Ownership {
-- | The owner user name. Should be set to @\"\"@ if unknown.
ownerName :: String,
-- | The owner group name. Should be set to @\"\"@ if unknown.
groupName :: String,
-- | Numeric owner user id. Should be set to @0@ if unknown.
ownerId :: {-# UNPACK #-} !Int,
-- | Numeric owner group id. Should be set to @0@ if unknown.
groupId :: {-# UNPACK #-} !Int
}
deriving (Eq, Ord, Show)
-- | There have been a number of extensions to the tar file format over the
-- years. They all share the basic entry fields and put more meta-data in
-- different extended headers.
--
data Format =
-- | This is the classic Unix V7 tar format. It does not support owner and
-- group names, just numeric Ids. It also does not support device numbers.
V7Format
-- | The \"USTAR\" format is an extension of the classic V7 format. It was
-- later standardised by POSIX. It has some restrictions but is the most
-- portable format.
--
| UstarFormat
-- | The GNU tar implementation also extends the classic V7 format, though
-- in a slightly different way from the USTAR format. In general for new
-- archives the standard USTAR/POSIX should be used.
--
| GnuFormat
deriving (Eq, Ord, Show)
instance NFData Entry where
rnf (Entry _ c _ _ _ _) = rnf c
instance NFData EntryContent where
rnf x = case x of
NormalFile c _ -> rnf c
OtherEntryType _ c _ -> rnf c
_ -> seq x ()
instance NFData Ownership where
rnf (Ownership o g _ _) = rnf o `seq` rnf g
-- | @rw-r--r--@ for normal files
ordinaryFilePermissions :: Permissions
ordinaryFilePermissions = 0o0644
-- | @rwxr-xr-x@ for executable files
executableFilePermissions :: Permissions
executableFilePermissions = 0o0755
-- | @rwxr-xr-x@ for directories
directoryPermissions :: Permissions
directoryPermissions = 0o0755
-- | An 'Entry' with all default values except for the file name and type. It
-- uses the portable USTAR/POSIX format (see 'UstarHeader').
--
-- You can use this as a basis and override specific fields, eg:
--
-- > (emptyEntry name HardLink) { linkTarget = target }
--
simpleEntry :: TarPath -> EntryContent -> Entry
simpleEntry tarpath content = Entry {
entryTarPath = tarpath,
entryContent = content,
entryPermissions = case content of
Directory -> directoryPermissions
_ -> ordinaryFilePermissions,
entryOwnership = Ownership "" "" 0 0,
entryTime = 0,
entryFormat = UstarFormat
}
-- | A tar 'Entry' for a file.
--
-- Entry fields such as file permissions and ownership have default values.
--
-- You can use this as a basis and override specific fields. For example if you
-- need an executable file you could use:
--
-- > (fileEntry name content) { fileMode = executableFileMode }
--
fileEntry :: TarPath -> LBS.ByteString -> Entry
fileEntry name fileContent =
simpleEntry name (NormalFile fileContent (LBS.length fileContent))
-- | A tar 'Entry' for a directory.
--
-- Entry fields such as file permissions and ownership have default values.
--
directoryEntry :: TarPath -> Entry
directoryEntry name = simpleEntry name Directory
--
-- * Tar paths
--
-- | The classic tar format allowed just 100 characters for the file name. The
-- USTAR format extended this with an extra 155 characters, however it uses a
-- complex method of splitting the name between the two sections.
--
-- Instead of just putting any overflow into the extended area, it uses the
-- extended area as a prefix. The aggravating insane bit however is that the
-- prefix (if any) must only contain a directory prefix. That is the split
-- between the two areas must be on a directory separator boundary. So there is
-- no simple calculation to work out if a file name is too long. Instead we
-- have to try to find a valid split that makes the name fit in the two areas.
--
-- The rationale presumably was to make it a bit more compatible with old tar
-- programs that only understand the classic format. A classic tar would be
-- able to extract the file name and possibly some dir prefix, but not the
-- full dir prefix. So the files would end up in the wrong place, but that's
-- probably better than ending up with the wrong names too.
--
-- So it's understandable but rather annoying.
--
-- * Tar paths use Posix format (ie @\'/\'@ directory separators), irrespective
-- of the local path conventions.
--
-- * The directory separator between the prefix and name is /not/ stored.
--
data TarPath = TarPath {-# UNPACK #-} !BS.ByteString -- path name, 100 characters max.
{-# UNPACK #-} !BS.ByteString -- path prefix, 155 characters max.
deriving (Eq, Ord)
instance NFData TarPath where
rnf (TarPath _ _) = () -- fully strict by construction
instance Show TarPath where
show = show . fromTarPath
-- | Convert a 'TarPath' to a native 'FilePath'.
--
-- The native 'FilePath' will use the native directory separator but it is not
-- otherwise checked for validity or sanity. In particular:
--
-- * The tar path may be invalid as a native path, eg the file name @\"nul\"@
-- is not valid on Windows.
--
-- * The tar path may be an absolute path or may contain @\"..\"@ components.
-- For security reasons this should not usually be allowed, but it is your
-- responsibility to check for these conditions (eg using 'checkSecurity').
--
fromTarPath :: TarPath -> FilePath
fromTarPath (TarPath namebs prefixbs) = adjustDirectory $
FilePath.Native.joinPath $ FilePath.Posix.splitDirectories prefix
++ FilePath.Posix.splitDirectories name
where
name = BS.Char8.unpack namebs
prefix = BS.Char8.unpack prefixbs
adjustDirectory | FilePath.Posix.hasTrailingPathSeparator name
= FilePath.Native.addTrailingPathSeparator
| otherwise = id
-- | Convert a 'TarPath' to a Unix\/Posix 'FilePath'.
--
-- The difference compared to 'fromTarPath' is that it always returns a Unix
-- style path irrespective of the current operating system.
--
-- This is useful to check how a 'TarPath' would be interpreted on a specific
-- operating system, eg to perform portability checks.
--
fromTarPathToPosixPath :: TarPath -> FilePath
fromTarPathToPosixPath (TarPath namebs prefixbs) = adjustDirectory $
FilePath.Posix.joinPath $ FilePath.Posix.splitDirectories prefix
++ FilePath.Posix.splitDirectories name
where
name = BS.Char8.unpack namebs
prefix = BS.Char8.unpack prefixbs
adjustDirectory | FilePath.Posix.hasTrailingPathSeparator name
= FilePath.Posix.addTrailingPathSeparator
| otherwise = id
-- | Convert a 'TarPath' to a Windows 'FilePath'.
--
-- The only difference compared to 'fromTarPath' is that it always returns a
-- Windows style path irrespective of the current operating system.
--
-- This is useful to check how a 'TarPath' would be interpreted on a specific
-- operating system, eg to perform portability checks.
--
fromTarPathToWindowsPath :: TarPath -> FilePath
fromTarPathToWindowsPath (TarPath namebs prefixbs) = adjustDirectory $
FilePath.Windows.joinPath $ FilePath.Posix.splitDirectories prefix
++ FilePath.Posix.splitDirectories name
where
name = BS.Char8.unpack namebs
prefix = BS.Char8.unpack prefixbs
adjustDirectory | FilePath.Posix.hasTrailingPathSeparator name
= FilePath.Windows.addTrailingPathSeparator
| otherwise = id
-- | Convert a native 'FilePath' to a 'TarPath'.
--
-- The conversion may fail if the 'FilePath' is too long. See 'TarPath' for a
-- description of the problem with splitting long 'FilePath's.
--
toTarPath :: Bool -- ^ Is the path for a directory? This is needed because for
-- directories a 'TarPath' must always use a trailing @\/@.
-> FilePath -> Either String TarPath
toTarPath isDir = splitLongPath
. addTrailingSep
. FilePath.Posix.joinPath
. FilePath.Native.splitDirectories
where
addTrailingSep | isDir = FilePath.Posix.addTrailingPathSeparator
| otherwise = id
-- | Take a sanitised path, split on directory separators and try to pack it
-- into the 155 + 100 tar file name format.
--
-- The strategy is this: take the name-directory components in reverse order
-- and try to fit as many components into the 100 long name area as possible.
-- If all the remaining components fit in the 155 name area then we win.
--
splitLongPath :: FilePath -> Either String TarPath
splitLongPath path =
case packName nameMax (reverse (FilePath.Posix.splitPath path)) of
Left err -> Left err
Right (name, []) -> Right $! TarPath (BS.Char8.pack name)
BS.empty
Right (name, first:rest) -> case packName prefixMax remainder of
Left err -> Left err
Right (_ , (_:_)) -> Left "File name too long (cannot split)"
Right (prefix, []) -> Right $! TarPath (BS.Char8.pack name)
(BS.Char8.pack prefix)
where
-- drop the '/' between the name and prefix:
remainder = init first : rest
where
nameMax, prefixMax :: Int
nameMax = 100
prefixMax = 155
packName _ [] = Left "File name empty"
packName maxLen (c:cs)
| n > maxLen = Left "File name too long"
| otherwise = Right (packName' maxLen n [c] cs)
where n = length c
packName' maxLen n ok (c:cs)
| n' <= maxLen = packName' maxLen n' (c:ok) cs
where n' = n + length c
packName' _ _ ok cs = (FilePath.Posix.joinPath ok, cs)
-- | The tar format allows just 100 ASCII characters for the 'SymbolicLink' and
-- 'HardLink' entry types.
--
newtype LinkTarget = LinkTarget BS.ByteString
deriving (Eq, Ord, Show)
instance NFData LinkTarget where
rnf (LinkTarget bs) = rnf bs
-- | Convert a native 'FilePath' to a tar 'LinkTarget'. This may fail if the
-- string is longer than 100 characters or if it contains non-portable
-- characters.
--
toLinkTarget :: FilePath -> Maybe LinkTarget
toLinkTarget path | length path <= 100 = Just $! LinkTarget (BS.Char8.pack path)
| otherwise = Nothing
-- | Convert a tar 'LinkTarget' to a native 'FilePath'.
--
fromLinkTarget :: LinkTarget -> FilePath
fromLinkTarget (LinkTarget pathbs) = adjustDirectory $
FilePath.Native.joinPath $ FilePath.Posix.splitDirectories path
where
path = BS.Char8.unpack pathbs
adjustDirectory | FilePath.Posix.hasTrailingPathSeparator path
= FilePath.Native.addTrailingPathSeparator
| otherwise = id
-- | Convert a tar 'LinkTarget' to a Unix/Posix 'FilePath'.
--
fromLinkTargetToPosixPath :: LinkTarget -> FilePath
fromLinkTargetToPosixPath (LinkTarget pathbs) = adjustDirectory $
FilePath.Posix.joinPath $ FilePath.Posix.splitDirectories path
where
path = BS.Char8.unpack pathbs
adjustDirectory | FilePath.Posix.hasTrailingPathSeparator path
= FilePath.Native.addTrailingPathSeparator
| otherwise = id
-- | Convert a tar 'LinkTarget' to a Windows 'FilePath'.
--
fromLinkTargetToWindowsPath :: LinkTarget -> FilePath
fromLinkTargetToWindowsPath (LinkTarget pathbs) = adjustDirectory $
FilePath.Windows.joinPath $ FilePath.Posix.splitDirectories path
where
path = BS.Char8.unpack pathbs
adjustDirectory | FilePath.Posix.hasTrailingPathSeparator path
= FilePath.Windows.addTrailingPathSeparator
| otherwise = id
--
-- * Entries type
--
-- | A tar archive is a sequence of entries.
--
-- The point of this type as opposed to just using a list is that it makes the
-- failure case explicit. We need this because the sequence of entries we get
-- from reading a tarball can include errors.
--
-- It is a concrete data type so you can manipulate it directly but it is often
-- clearer to use the provided functions for mapping, folding and unfolding.
--
-- Converting from a list can be done with just @foldr Next Done@. Converting
-- back into a list can be done with 'foldEntries' however in that case you
-- must be prepared to handle the 'Fail' case inherent in the 'Entries' type.
--
-- The 'Monoid' instance lets you concatenate archives or append entries to an
-- archive.
--
data Entries e = Next Entry (Entries e)
| Done
| Fail e
deriving (Eq, Show, Functor, Foldable, Traversable)
infixr 5 `Next`
-- | This is like the standard 'unfoldr' function on lists, but for 'Entries'.
-- It includes failure as an extra possibility that the stepper function may
-- return.
--
-- It can be used to generate 'Entries' from some other type. For example it is
-- used internally to lazily unfold entries from a 'LBS.ByteString'.
--
unfoldEntries :: (a -> Either e (Maybe (Entry, a))) -> a -> Entries e
unfoldEntries f = unfold
where
unfold x = case f x of
Left err -> Fail err
Right Nothing -> Done
Right (Just (e, x')) -> Next e (unfold x')
-- | This is like the standard 'foldr' function on lists, but for 'Entries'.
-- Compared to 'foldr' it takes an extra function to account for the
-- possibility of failure.
--
-- This is used to consume a sequence of entries. For example it could be used
-- to scan a tarball for problems or to collect an index of the contents.
--
foldEntries :: (Entry -> a -> a) -> a -> (e -> a) -> Entries e -> a
foldEntries next done fail' = fold
where
fold (Next e es) = next e (fold es)
fold Done = done
fold (Fail err) = fail' err
-- | A 'foldl'-like function on Entries. It either returns the final
-- accumulator result, or the failure along with the intermediate accumulator
-- value.
--
foldlEntries :: (a -> Entry -> a) -> a -> Entries e -> Either (e, a) a
foldlEntries f z = go z
where
go !acc (Next e es) = go (f acc e) es
go !acc Done = Right acc
go !acc (Fail err) = Left (err, acc)
-- | This is like the standard 'map' function on lists, but for 'Entries'. It
-- includes failure as a extra possible outcome of the mapping function.
--
-- If your mapping function cannot fail it may be more convenient to use
-- 'mapEntriesNoFail'
mapEntries :: (Entry -> Either e' Entry) -> Entries e -> Entries (Either e e')
mapEntries f =
foldEntries (\entry rest -> either (Fail . Right) (flip Next rest) (f entry)) Done (Fail . Left)
-- | Like 'mapEntries' but the mapping function itself cannot fail.
--
mapEntriesNoFail :: (Entry -> Entry) -> Entries e -> Entries e
mapEntriesNoFail f =
foldEntries (\entry -> Next (f entry)) Done Fail
-- | @since 0.5.1.0
instance Sem.Semigroup (Entries e) where
a <> b = foldEntries Next b Fail a
instance Monoid (Entries e) where
mempty = Done
mappend = (Sem.<>)
instance NFData e => NFData (Entries e) where
rnf (Next e es) = rnf e `seq` rnf es
rnf Done = ()
rnf (Fail e) = rnf e