Skip to content

Commit

Permalink
Merge pull request #6523 from ThomasWaldmann/pax-borg-item-master
Browse files Browse the repository at this point in the history
import/export-tar: --tar-format=BORG: roundtrip ALL item metadata
  • Loading branch information
ThomasWaldmann authored Apr 9, 2022
2 parents 1b95950 + e8069a8 commit 28fa9e0
Show file tree
Hide file tree
Showing 4 changed files with 115 additions and 41 deletions.
46 changes: 40 additions & 6 deletions docs/usage/tar.rst
Original file line number Diff line number Diff line change
@@ -1,23 +1,57 @@
.. include:: import-tar.rst.inc

.. include:: export-tar.rst.inc

.. include:: import-tar.rst.inc

Examples
~~~~~~~~
::

# export as uncompressed tar
$ borg export-tar /path/to/repo::Monday Monday.tar

# exclude some types, compress using gzip
# import an uncompressed tar
$ borg import-tar /path/to/repo::Monday Monday.tar

# exclude some file types, compress using gzip
$ borg export-tar /path/to/repo::Monday Monday.tar.gz --exclude '*.so'

# use higher compression level with gzip
$ borg export-tar --tar-filter="gzip -9" testrepo::linux Monday.tar.gz
$ borg export-tar --tar-filter="gzip -9" repo::Monday Monday.tar.gz

# export a tar, but instead of storing it on disk,
# upload it to a remote site using curl.
# copy an archive from repoA to repoB
$ borg export-tar --tar-format=BORG repoA::archive - | borg import-tar repoB::archive -

# export a tar, but instead of storing it on disk, upload it to remote site using curl
$ borg export-tar /path/to/repo::Monday - | curl --data-binary @- https://somewhere/to/POST

# remote extraction via "tarpipe"
$ borg export-tar /path/to/repo::Monday - | ssh somewhere "cd extracted; tar x"

Archives transfer script
~~~~~~~~~~~~~~~~~~~~~~~~

Outputs a script that copies all archives from repo1 to repo2:

::

for A T in `borg list --format='{archive} {time:%Y-%m-%dT%H:%M:%S}{LF}' repo1`
do
echo "borg export-tar --tar-format=BORG repo1::$A - | borg import-tar --timestamp=$T repo2::$A -"
done

Kept:

- archive name, archive timestamp
- archive contents (all items with metadata and data)

Lost:

- some archive metadata (like the original commandline, execution time, etc.)

Please note:

- all data goes over that pipe, again and again for every archive
- the pipe is dumb, there is no data or transfer time reduction there due to deduplication
- maybe add compression
- pipe over ssh for remote transfer
- no special sparse file support
35 changes: 21 additions & 14 deletions src/borg/archive.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import base64
import json
import os
import socket
Expand Down Expand Up @@ -1454,20 +1455,26 @@ def __init__(self, *, cache, key,

@contextmanager
def create_helper(self, tarinfo, status=None, type=None):
def s_to_ns(s):
return safe_ns(int(float(s) * 1e9))

item = Item(path=make_path_safe(tarinfo.name), mode=tarinfo.mode | type,
uid=tarinfo.uid, gid=tarinfo.gid, user=tarinfo.uname or None, group=tarinfo.gname or None,
mtime=s_to_ns(tarinfo.mtime))
if tarinfo.pax_headers:
ph = tarinfo.pax_headers
# note: for mtime this is a bit redundant as it is already done by tarfile module,
# but we just do it in our way to be consistent for sure.
for name in 'atime', 'ctime', 'mtime':
if name in ph:
ns = s_to_ns(ph[name])
setattr(item, name, ns)
ph = tarinfo.pax_headers
if ph and 'BORG.item.version' in ph:
assert ph['BORG.item.version'] == '1'
meta_bin = base64.b64decode(ph['BORG.item.meta'])
meta_dict = msgpack.unpackb(meta_bin, object_hook=StableDict)
item = Item(internal_dict=meta_dict)
else:
def s_to_ns(s):
return safe_ns(int(float(s) * 1e9))

item = Item(path=make_path_safe(tarinfo.name), mode=tarinfo.mode | type,
uid=tarinfo.uid, gid=tarinfo.gid, user=tarinfo.uname or None, group=tarinfo.gname or None,
mtime=s_to_ns(tarinfo.mtime))
if ph:
# note: for mtime this is a bit redundant as it is already done by tarfile module,
# but we just do it in our way to be consistent for sure.
for name in 'atime', 'ctime', 'mtime':
if name in ph:
ns = s_to_ns(ph[name])
setattr(item, name, ns)
yield item, status
# if we get here, "with"-block worked ok without error/exception, the item was processed ok...
self.add_item(item, stats=self.stats)
Expand Down
65 changes: 44 additions & 21 deletions src/borg/archiver.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

try:
import argparse
import base64
import collections
import configparser
import faulthandler
Expand Down Expand Up @@ -1147,7 +1148,7 @@ def peek_and_store_hardlink_masters(item, matched):

# The | (pipe) symbol instructs tarfile to use a streaming mode of operation
# where it never seeks on the passed fileobj.
tar_format = dict(GNU=tarfile.GNU_FORMAT, PAX=tarfile.PAX_FORMAT)[args.tar_format]
tar_format = dict(GNU=tarfile.GNU_FORMAT, PAX=tarfile.PAX_FORMAT, BORG=tarfile.PAX_FORMAT)[args.tar_format]
tar = tarfile.open(fileobj=tarstream, mode='w|', format=tar_format)

if progress:
Expand Down Expand Up @@ -1240,22 +1241,37 @@ def item_to_tarinfo(item, original_path):
return None, stream
return tarinfo, stream

def item_to_paxheaders(item):
def item_to_paxheaders(format, item):
"""
Transform (parts of) a Borg *item* into a pax_headers dict.
"""
# PAX format
# ----------
# When using the PAX (POSIX) format, we can support some things that aren't possible
# with classic tar formats, including GNU tar, such as:
# - atime, ctime (DONE)
# - possibly Linux capabilities, security.* xattrs (TODO)
# - various additions supported by GNU tar in POSIX mode (TODO)
#
# BORG format
# -----------
# This is based on PAX, but additionally adds BORG.* pax headers.
# Additionally to the standard tar / PAX metadata and data, it transfers
# ALL borg item metadata in a BORG specific way.
#
ph = {}
# note: for mtime this is a bit redundant as it is already done by tarfile module,
# but we just do it in our way to be consistent for sure.
for name in 'atime', 'ctime', 'mtime':
if hasattr(item, name):
ns = getattr(item, name)
ph[name] = str(ns / 1e9)
if format == 'BORG': # BORG format additions
ph['BORG.item.version'] = '1'
# BORG.item.meta - just serialize all metadata we have:
meta_bin = msgpack.packb(item.as_dict())
meta_text = base64.b64encode(meta_bin).decode()
ph['BORG.item.meta'] = meta_text
return ph

for item in archive.iter_items(filter, partial_extract=partial_extract,
Expand All @@ -1265,8 +1281,8 @@ def item_to_paxheaders(item):
item.path = os.sep.join(orig_path.split(os.sep)[strip_components:])
tarinfo, stream = item_to_tarinfo(item, orig_path)
if tarinfo:
if args.tar_format == 'PAX':
tarinfo.pax_headers = item_to_paxheaders(item)
if args.tar_format in ('BORG', 'PAX'):
tarinfo.pax_headers = item_to_paxheaders(args.tar_format, item)
if output_list:
logging.getLogger('borg.output.list').info(remove_surrogates(orig_path))
tar.addfile(tarinfo, stream)
Expand Down Expand Up @@ -4067,15 +4083,18 @@ def define_borg_mount(parser):
read the uncompressed tar stream from stdin and write a compressed/filtered
tar stream to stdout.
Depending on the ```-tar-format``option, the generated tarball uses this format:
Depending on the ``-tar-format`` option, these formats are created:
- PAX: POSIX.1-2001 (pax) format
- GNU: GNU tar format
export-tar is a lossy conversion:
BSD flags, ACLs, extended attributes (xattrs), atime and ctime are not exported.
Timestamp resolution is limited to whole seconds, not the nanosecond resolution
otherwise supported by Borg.
+--------------+---------------------------+----------------------------+
| --tar-format | Specification | Metadata |
+--------------+---------------------------+----------------------------+
| BORG | BORG specific, like PAX | all as supported by borg |
+--------------+---------------------------+----------------------------+
| PAX | POSIX.1-2001 (pax) format | GNU + atime/ctime/mtime ns |
+--------------+---------------------------+----------------------------+
| GNU | GNU tar format | mtime s, no atime/ctime, |
| | | no ACLs/xattrs/bsdflags |
+--------------+---------------------------+----------------------------+
A ``--sparse`` option (as found in borg extract) is not supported.
Expand All @@ -4099,8 +4118,8 @@ def define_borg_mount(parser):
subparser.add_argument('--list', dest='output_list', action='store_true',
help='output verbose list of items (files, dirs, ...)')
subparser.add_argument('--tar-format', metavar='FMT', dest='tar_format', default='GNU',
choices=('PAX', 'GNU'),
help='select tar format: PAX or GNU')
choices=('BORG', 'PAX', 'GNU'),
help='select tar format: BORG, PAX or GNU')
subparser.add_argument('location', metavar='ARCHIVE',
type=location_validator(archive=True),
help='archive to export')
Expand Down Expand Up @@ -4974,15 +4993,19 @@ def define_borg_mount(parser):
Most documentation of borg create applies. Note that this command does not
support excluding files.
import-tar is a lossy conversion:
BSD flags, ACLs, extended attributes (xattrs), atime and ctime are not exported.
Timestamp resolution is limited to whole seconds, not the nanosecond resolution
otherwise supported by Borg.
A ``--sparse`` option (as found in borg create) is not supported.
import-tar reads POSIX.1-1988 (ustar), POSIX.1-2001 (pax), GNU tar, UNIX V7 tar
and SunOS tar with extended attributes.
About tar formats and metadata conservation or loss, please see ``borg export-tar``.
import-tar reads these tar formats:
- BORG: borg specific (PAX-based)
- PAX: POSIX.1-2001
- GNU: GNU tar
- POSIX.1-1988 (ustar)
- UNIX V7 tar
- SunOS tar with extended attributes
""")
subparser = subparsers.add_parser('import-tar', parents=[common_parser], add_help=False,
description=self.do_import_tar.__doc__,
Expand Down
10 changes: 10 additions & 0 deletions src/borg/testsuite/archiver.py
Original file line number Diff line number Diff line change
Expand Up @@ -3499,6 +3499,16 @@ def test_import_tar_gz(self, tar_format='GNU'):
self.cmd('extract', self.repository_location + '::dst')
self.assert_dirs_equal('input', 'output/input', ignore_ns=True, ignore_xattrs=True)

def test_roundtrip_pax_borg(self):
self.create_test_files()
self.cmd('init', '--encryption=none', self.repository_location)
self.cmd('create', self.repository_location + '::src', 'input')
self.cmd('export-tar', self.repository_location + '::src', 'simple.tar', '--tar-format=BORG')
self.cmd('import-tar', self.repository_location + '::dst', 'simple.tar')
with changedir(self.output_path):
self.cmd('extract', self.repository_location + '::dst')
self.assert_dirs_equal('input', 'output/input')

# derived from test_extract_xattrs_errors()
@pytest.mark.skipif(not xattr.XATTR_FAKEROOT, reason='xattr not supported on this system or on this version of'
'fakeroot')
Expand Down

0 comments on commit 28fa9e0

Please sign in to comment.