From 35a787eeb3d915de47830d177dd6565da974e5bd Mon Sep 17 00:00:00 2001 From: ziadhany Date: Tue, 27 Feb 2024 17:16:17 +0200 Subject: [PATCH] Fix disk storage structure Signed-off-by: ziadhany --- vulnerabilities/management/commands/export.py | 29 ++++++++++--------- vulnerabilities/tests/test_export.py | 14 ++------- 2 files changed, 18 insertions(+), 25 deletions(-) diff --git a/vulnerabilities/management/commands/export.py b/vulnerabilities/management/commands/export.py index b78fce936..11703a954 100644 --- a/vulnerabilities/management/commands/export.py +++ b/vulnerabilities/management/commands/export.py @@ -8,6 +8,7 @@ # import logging import os +from hashlib import sha512 from pathlib import Path import saneyaml @@ -39,14 +40,13 @@ def handle(self, *args, **options): def export_data(self, git_path): """ export vulnerablecode data - by run `python manage.py export /path/vulnerablecode-data` + by running `python manage.py export /path/vulnerablecode-data` """ self.stdout.write("Exporting vulnerablecode data") ecosystems = [pkg.type for pkg in Package.objects.distinct("type")] for ecosystem in ecosystems: - version_files = {} # {"version path": "data" } package_files = {} # {"package path": "data" } vul_files = {} # {"vulnerability path": "data" } @@ -63,16 +63,10 @@ def export_data(self, git_path): # ./aboutcode-packages-ed5/maven/org.apache.log4j/log4j-core/versions/vulnerabilities.yml pkg_filepath = ( - f"./aboutcode-packages-ed5/{purl.type}/{purl.namespace}/{purl.name}" + f"./aboutcode-packages-{get_purl_hash(purl_without_version)}/{purl.type}/{purl.namespace}/{purl.name}" f"/versions/vulnerabilities.yml" ) - # ./aboutcode-packages-ed5/maven/org.apache.log4j/log4j-core/versions/1.2.3/vulnerabilities.yml - version_filepath = ( - f"./aboutcode-packages-ed5/{purl.type}/{purl.namespace}/{purl.name}/versions/" - f"{purl.version}/vulnerabilities.yml" - ) - package_data = { "purl": str(purl), "affected_by_vulnerabilities": [ @@ -88,13 +82,12 @@ def export_data(self, git_path): "package": str(purl_without_version), "versions": [package_data], } - version_files[version_filepath] = package_data for vul in purl.vulnerabilities.all(): vulnerability_id = vul.vulnerability_id - # ./aboutcode-vulnerabilities-1223/3434/VCID-1223-3434-34343/VCID-1223-3434-34343.yml + # ./aboutcode-vulnerabilities-12/34/VCID-1223-3434-34343/VCID-1223-3434-34343.yml vul_filepath = ( - f"./aboutcode-vulnerabilities-{vulnerability_id[5:9]}/{vulnerability_id[10:14]}" + f"./aboutcode-vulnerabilities-{vulnerability_id[5:7]}/{vulnerability_id[10:12]}" f"/{vulnerability_id}/{vulnerability_id}.yml" ) vul_files[vul_filepath] = { @@ -108,7 +101,7 @@ def export_data(self, git_path): ], } - for items in [package_files, version_files, vul_files]: + for items in [package_files, vul_files]: for filepath, data in items.items(): create_file(filepath, git_path, data) @@ -128,3 +121,13 @@ def create_file(filepath, git_path, data): data = saneyaml.dump(data) with open(filepath, encoding="utf-8", mode="w") as f: f.write(data) + + +def get_purl_hash(purl: PackageURL, length: int = 3) -> str: + """ + Return a short lower cased hash of a purl. + https://github.com/nexB/purldb/pull/235/files#diff-a1fd023bd42d73f56019d540f38be711255403547add15108540d70f9948dd40R154 + """ + purl_bytes = str(purl).encode("utf-8") + short_hash = sha512(purl_bytes).hexdigest()[:length] + return short_hash.lower() diff --git a/vulnerabilities/tests/test_export.py b/vulnerabilities/tests/test_export.py index 67af22e61..b2c46ed31 100644 --- a/vulnerabilities/tests/test_export.py +++ b/vulnerabilities/tests/test_export.py @@ -118,27 +118,17 @@ def test_export_data( }, ], } - expected_version = { - "purl": "pkg:generic/nginx/test@2", - "affected_by_vulnerabilities": ["VCID-pst6-b358-aaap"], - "fixing_vulnerabilities": [], - } call_command("export", tmp_path, stdout=StringIO()) vul_filepath = os.path.join( tmp_path, - "./aboutcode-vulnerabilities-pst6/b358/VCID-pst6-b358-aaap/VCID-pst6-b358-aaap.yml", + "./aboutcode-vulnerabilities-ps/b3/VCID-pst6-b358-aaap/VCID-pst6-b358-aaap.yml", ) pck_filepath = os.path.join( tmp_path, - "./aboutcode-packages-ed5/generic/nginx/test/versions/vulnerabilities.yml", - ) - version_filepath = os.path.join( - tmp_path, - "./aboutcode-packages-ed5/generic/nginx/test/versions/2/vulnerabilities.yml", + "./aboutcode-packages-9e6/generic/nginx/test/versions/vulnerabilities.yml", ) assert Path(vul_filepath).read_text() == saneyaml.dump(expected_vul) assert Path(pck_filepath).read_text() == saneyaml.dump(expected_pkg) - assert Path(version_filepath).read_text() == saneyaml.dump(expected_version)