Skip to content

Commit

Permalink
backup/restore: fix import of volumes with small or unaligned size
Browse files Browse the repository at this point in the history
Migrate from resize() followed by unspecified-size import_data() to the
newer method import_data_with_size(), fixing two bugs.

1. When restoring a volume with a size that's smaller than the default,
   the resize step didn't attempt to shrink (and not all storage drivers
   would have supported it anyway), which caused the import step to fail
   with the message "not enough data":

Fixes QubesOS/qubes-issues#7176

2. When restoring a volume with a size that's not divisible by 4 MiB
   (e.g. backed up from file-reflink), LVM Thin implicitly rounds up the
   resize to next multiple, which again caused the import step to fail
   in the same way:

https://forum.qubes-os.org/t/backup-didnt-restore-private-volume/17874
(cherry picked from commit ba9b24d)
  • Loading branch information
rustybird authored and marmarek committed Jun 22, 2023
1 parent ea9c773 commit 5feeb01
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 62 deletions.
82 changes: 34 additions & 48 deletions qubesadmin/backup/restore.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import functools
import getpass
import grp
import inspect
import logging
import multiprocessing
from multiprocessing import Queue, Process
Expand Down Expand Up @@ -339,12 +340,6 @@ def __init__(self, queue, base_dir, passphrase, encrypted,
:py:obj:`QUEUE_ERROR` to end data processing (either cleanly,
or forcefully).
Handlers are given as a map filename -> (data_func, size_func),
where data_func is called with file-like object to process,
and size_func is called with file size as argument. Note that
data_func and size_func may be called simultaneusly, in a different
processes.
:param multiprocessing.Queue queue: a queue with filenames to
process; those files needs to be given as full path, inside *base_dir*
:param str base_dir: directory where all files to process live
Expand All @@ -369,11 +364,12 @@ def __init__(self, queue, base_dir, passphrase, encrypted,
self.base_dir = base_dir
#: passphrase to decrypt/authenticate data
self.passphrase = passphrase
#: handlers for files; it should be dict filename -> (data_function,
# size_function),
# where data_function will get file-like object as the only argument and
# might be called in a separate process (multiprocessing.Process),
# and size_function will get file size (when known) in bytes
#: handlers for files; it should be dict filename -> data_function
# where data_function (which might be called in a separate
# multiprocessing.Process) will get a file-like object as the first
# argument. If the data_function signature accepts a keyword-only
# argument named file_size, it will also get the size in bytes when
# known.
self.handlers = handlers
#: is the backup encrypted?
self.encrypted = encrypted
Expand Down Expand Up @@ -459,16 +455,18 @@ def handle_dir(self, dirname):
:param dirname: directory path to handle (relative to backup root),
without trailing slash
'''
for fname, (data_func, size_func) in self.handlers.items():
for fname, data_func in self.handlers.items():
if not fname.startswith(dirname + '/'):
continue
if not os.path.exists(fname):
# for example firewall.xml
continue
if size_func is not None:
size_func(os.path.getsize(fname))
with open(fname, 'rb') as input_file:
data_func(input_file)
data_func_kwargs = {}
if 'file_size' in inspect.getfullargspec(data_func).kwonlyargs:
data_func_kwargs['file_size'] = \
os.stat(input_file.fileno()).st_size
data_func(input_file, **data_func_kwargs)
os.unlink(fname)
shutil.rmtree(dirname)

Expand Down Expand Up @@ -512,8 +510,7 @@ def cleanup_tar2(self, wait=True, terminate=False):
self.tar2_current_file = None
self.tar2_process = None

def _data_import_wrapper(self, close_fds, data_func, size_func,
tar2_process):
def _data_import_wrapper(self, close_fds, data_func, tar2_process):
'''Close not needed file descriptors, handle output size reported
by tar (if needed) then call data_func(tar2_process.stdout).
Expand All @@ -533,7 +530,8 @@ def _data_import_wrapper(self, close_fds, data_func, size_func,
# not read data from tar's stdout at this point, it will
# hang if it tries to output file content before
# reporting its size on stderr first
if size_func:
data_func_kwargs = {}
if 'file_size' in inspect.getfullargspec(data_func).kwonlyargs:
# process lines on stderr until we get file size
# search for first file size reported by tar -
# this is used only when extracting single-file archive, so don't
Expand All @@ -553,13 +551,12 @@ def _data_import_wrapper(self, close_fds, data_func, size_func,
else:
match = _tar_file_size_re.match(line)
if match:
file_size = int(match.groups()[0])
size_func(file_size)
data_func_kwargs['file_size'] = int(match.groups()[0])
break
self.log.warning(
'unexpected tar output (no file size report): %s', line)

return data_func(tar2_process.stdout)
return data_func(tar2_process.stdout, **data_func_kwargs)

def feed_tar2(self, filename, input_pipe):
'''Feed data from *filename* to *input_pipe*
Expand Down Expand Up @@ -705,11 +702,11 @@ def __run__(self):

if inner_name in self.handlers:
assert redirect_stdout is subprocess.PIPE
data_func, size_func = self.handlers[inner_name]
data_func = self.handlers[inner_name]
self.import_process = multiprocessing.Process(
target=self._data_import_wrapper,
args=([input_pipe.fileno()],
data_func, size_func, self.tar2_process))
data_func, self.tar2_process))

self.import_process.start()
self.tar2_process.stdout.close()
Expand Down Expand Up @@ -1361,9 +1358,7 @@ def _process_qubes_xml(self):

qubes_xml_path = os.path.join(self.tmpdir, 'qubes-restored.xml')
handlers = {
'qubes.xml': (
functools.partial(self._save_qubes_xml, qubes_xml_path),
None)
'qubes.xml': functools.partial(self._save_qubes_xml, qubes_xml_path)
}
extract_proc = self._start_inner_extraction_worker(queue, handlers)
extract_proc.join()
Expand Down Expand Up @@ -1866,22 +1861,16 @@ def _handle_appmenus_list(self, vm, stream):
self.log.error(
'Failed to set application list for %s: %s', vm.name, e)

def _handle_volume_data(self, vm, volume, stream):
def _handle_volume_data(self, vm, volume, stream, *, file_size=None):
'''Wrap volume data import with logging'''
try:
volume.import_data(stream)
except Exception as err: # pylint: disable=broad-except
self.log.error('Failed to restore volume %s of VM %s: %s',
volume.name, vm.name, err)

def _handle_volume_size(self, vm, volume, size):
'''Wrap volume resize with logging'''
try:
if volume.size < size:
volume.resize(size)
if file_size is None:
volume.import_data(stream)
else:
volume.import_data_with_size(stream, file_size)
except Exception as err: # pylint: disable=broad-except
self.log.error('Failed to resize volume %s of VM %s to %d: %s',
volume.name, vm.name, size, err)
self.log.error('Failed to restore volume %s (size %s) of VM %s: %s',
volume.name, file_size, vm.name, err)

def check_disk_space(self):
"""
Expand Down Expand Up @@ -1935,7 +1924,7 @@ def restore_do(self, restore_info):
if self.options.verify_only:
continue

handlers[vm_info.subdir] = (self._handle_dom0, None)
handlers[vm_info.subdir] = self._handle_dom0
else:
vms_size += int(vm_info.size)
vms_dirs.append(vm_info.subdir)
Expand All @@ -1948,16 +1937,13 @@ def restore_do(self, restore_info):
continue
data_func = functools.partial(
self._handle_volume_data, vm, volume)
size_func = functools.partial(
self._handle_volume_size, vm, volume)
img_path = os.path.join(vm_info.subdir, name + '.img')
handlers[img_path] = (data_func, size_func)
handlers[os.path.join(vm_info.subdir, 'firewall.xml')] = (
functools.partial(vm_info.vm.handle_firewall_xml, vm),
None)
handlers[img_path] = data_func
handlers[os.path.join(vm_info.subdir, 'firewall.xml')] = \
functools.partial(vm_info.vm.handle_firewall_xml, vm)
handlers[os.path.join(vm_info.subdir,
'whitelisted-appmenus.list')] = (
functools.partial(self._handle_appmenus_list, vm), None)
'whitelisted-appmenus.list')] = \
functools.partial(self._handle_appmenus_list, vm)
try:
self._restore_vm_data(vms_dirs=vms_dirs, vms_size=vms_size,
handlers=handlers)
Expand Down
16 changes: 2 additions & 14 deletions qubesadmin/tests/backup/backupcompatibility.py
Original file line number Diff line number Diff line change
Expand Up @@ -1392,11 +1392,7 @@ def setup_expected_calls(self, parsed_qubes_xml, templates_map=None):
b'internal=True\n' \
b'revisions_to_keep=3\n'
self.app.expected_calls[
(name, 'admin.vm.volume.Resize', 'root',
str(vm.get('root_size', 2097664)).encode())] = \
b'0\0'
self.app.expected_calls[
(name, 'admin.vm.volume.Import', 'root',
(name, 'admin.vm.volume.ImportWithSize', 'root',
name.encode() + b'/root')] = \
b'0\0'

Expand All @@ -1412,10 +1408,7 @@ def setup_expected_calls(self, parsed_qubes_xml, templates_map=None):
b'save_on_stop=True\n' \
b'revisions_to_keep=3\n'
self.app.expected_calls[
(name, 'admin.vm.volume.Resize', 'private', b'2097152')] = \
b'0\0'
self.app.expected_calls[
(name, 'admin.vm.volume.Import', 'private',
(name, 'admin.vm.volume.ImportWithSize', 'private',
name.encode() + b'/private')] = \
b'0\0'
self.app.expected_calls[
Expand Down Expand Up @@ -2043,11 +2036,6 @@ def test_300_r4_no_space(self):
self.app.expected_calls[
('test-work', 'admin.vm.firewall.Set', None,
firewall_data.encode())] = b'0\0'
del self.app.expected_calls[
('test-work', 'admin.vm.volume.Resize', 'private', b'2097152')]
self.app.expected_calls[
('test-work', 'admin.vm.volume.Resize', 'private', b'2147483648')] = \
b'0\0'

qubesd_calls_queue = multiprocessing.Queue()

Expand Down

0 comments on commit 5feeb01

Please sign in to comment.