From 29f85a3c53fc06d061861633ea38e2e970d839a6 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Thu, 19 Jan 2023 12:21:31 -0800 Subject: [PATCH] [versions-test] Work around bug in dictionary builder for older versions Older versions of zstandard have a bug in the dictionary builder, that can cause dictionary building to fail. The process still exits 0, but the dictionary is not created. For reference, the bug is that it creates a dictionary that starts with the zstd dictionary magic, in the process of writing the dictionary header, but the header isn't fully written yet, and zstd fails compressions in this case, because the dictionary is malformated. We fixed this later on by trying to load the dictionary as a zstd dictionary, but if that fails we fallback to content only (by default). The fix is to: 1. Make the dictionary determinsitic by sorting the input files. Previously the bug would only sometimes occur, when the input files were in a particular order. 2. If dictionary creation fails, fallback to the `head` dictionary. --- tests/test-zstd-versions.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/tests/test-zstd-versions.py b/tests/test-zstd-versions.py index 88b0578ebb3..d6784d61567 100755 --- a/tests/test-zstd-versions.py +++ b/tests/test-zstd-versions.py @@ -85,18 +85,23 @@ def get_git_tags(): return tags -def create_dict(tag, dict_source_path): +def create_dict(tag, dict_source_path, fallback_tag=None): dict_name = 'dict.' + tag if not os.path.isfile(dict_name): cFiles = glob.glob(dict_source_path + "/*.c") hFiles = glob.glob(dict_source_path + "/*.h") + # Ensure the dictionary builder is deterministic + files = sorted(cFiles + hFiles) if tag == 'v0.5.0': - result = execute('./dictBuilder.' + tag + ' ' + ' '.join(cFiles) + ' ' + ' '.join(hFiles) + ' -o ' + dict_name, print_output=False, param_shell=True) + result = execute('./dictBuilder.' + tag + ' ' + ' '.join(files) + ' -o ' + dict_name, print_output=False, param_shell=True) else: - result = execute('./zstd.' + tag + ' -f --train ' + ' '.join(cFiles) + ' ' + ' '.join(hFiles) + ' -o ' + dict_name, print_output=False, param_shell=True) - if result == 0: + result = execute('./zstd.' + tag + ' -f --train ' + ' '.join(files) + ' -o ' + dict_name, print_output=False, param_shell=True) + if result == 0 and os.path.isfile(dict_name): print(dict_name + ' created') - assert os.path.isfile(dict_name) + elif fallback_tag is not None: + fallback_dict_name = 'dict.' + fallback_tag + print('creating dictionary ' + dict_name + ' failed, falling back to ' + fallback_dict_name) + shutil.copy(fallback_dict_name, dict_name) else: raise RuntimeError('ERROR: creating of ' + dict_name + ' failed') else: @@ -272,10 +277,11 @@ def decompress_dict(tag): print('Compress test.dat by all released zstd') print('-----------------------------------------------') + create_dict(head, dict_source_path) for tag in tags: print(tag) if tag >= 'v0.5.0': - create_dict(tag, dict_source_path) + create_dict(tag, dict_source_path, head) dict_compress_sample(tag, test_dat) remove_duplicates() decompress_dict(tag)