From 027983af3859eee1f1a979e39d7a4c5575998cf1 Mon Sep 17 00:00:00 2001 From: ziadhany Date: Sat, 17 Feb 2024 13:32:10 +0200 Subject: [PATCH] Redefine the disk storage structure Signed-off-by: ziadhany --- vulnerabilities/management/commands/export.py | 109 +++++++++--------- vulnerabilities/tests/test_export.py | 46 ++++---- 2 files changed, 77 insertions(+), 78 deletions(-) diff --git a/vulnerabilities/management/commands/export.py b/vulnerabilities/management/commands/export.py index 8c8af5778..b78fce936 100644 --- a/vulnerabilities/management/commands/export.py +++ b/vulnerabilities/management/commands/export.py @@ -7,6 +7,7 @@ # See https://aboutcode.org for more information about nexB OSS projects. # import logging +import os from pathlib import Path import saneyaml @@ -45,6 +46,7 @@ def export_data(self, git_path): ecosystems = [pkg.type for pkg in Package.objects.distinct("type")] for ecosystem in ecosystems: + version_files = {} # {"version path": "data" } package_files = {} # {"package path": "data" } vul_files = {} # {"vulnerability path": "data" } @@ -53,73 +55,76 @@ def export_data(self, git_path): .prefetch_related("vulnerabilities") .paginated() ): - purl_without_version = PackageURL( type=purl.type, namespace=purl.namespace, name=purl.name, ) - package_dir = create_sub_paths(git_path, purl.type, purl.namespace, purl.name) - filename = f"{purl.type}-{purl.namespace}-{purl.name}.yml".replace("/", " ") - package_dir_file = package_dir.joinpath(filename) - if package_dir_file in package_files: - package_data = { - "purl": str(purl), - "affected_by_vulnerabilities": [ - vuln.vulnerability_id for vuln in purl.affected_by - ], - "fixing_vulnerabilities": [vuln.vulnerability_id for vuln in purl.fixing], - } - package_files[package_dir_file]["versions"].append(package_data) + + # ./aboutcode-packages-ed5/maven/org.apache.log4j/log4j-core/versions/vulnerabilities.yml + pkg_filepath = ( + f"./aboutcode-packages-ed5/{purl.type}/{purl.namespace}/{purl.name}" + f"/versions/vulnerabilities.yml" + ) + + # ./aboutcode-packages-ed5/maven/org.apache.log4j/log4j-core/versions/1.2.3/vulnerabilities.yml + version_filepath = ( + f"./aboutcode-packages-ed5/{purl.type}/{purl.namespace}/{purl.name}/versions/" + f"{purl.version}/vulnerabilities.yml" + ) + + package_data = { + "purl": str(purl), + "affected_by_vulnerabilities": [ + vuln.vulnerability_id for vuln in purl.affected_by + ], + "fixing_vulnerabilities": [vuln.vulnerability_id for vuln in purl.fixing], + } + + if pkg_filepath in package_files: + package_files[pkg_filepath]["versions"].append(package_data) else: - package_files[package_dir_file] = { + package_files[pkg_filepath] = { "package": str(purl_without_version), - "versions": [ - { - "purl": str(purl), - "affected_by_vulnerabilities": [ - vuln.vulnerability_id for vuln in purl.affected_by - ], - "fixing_vulnerabilities": [ - vuln.vulnerability_id for vuln in purl.fixing - ], - } - ], + "versions": [package_data], } + version_files[version_filepath] = package_data for vul in purl.vulnerabilities.all(): - vul_filepath = package_dir.joinpath(f"{vul.vulnerability_id}.yml") - vul_files[vul_filepath] = saneyaml.dump( - { - "vulnerability_id": vul.vulnerability_id, - "aliases": [alias.alias for alias in vul.get_aliases], - "summary": vul.summary, - "severities": [severity for severity in vul.severities.values()], - "references": [ref for ref in vul.references.values()], - "weaknesses": [ - "CWE-" + str(weakness["cwe_id"]) - for weakness in vul.weaknesses.values() - ], - } + vulnerability_id = vul.vulnerability_id + # ./aboutcode-vulnerabilities-1223/3434/VCID-1223-3434-34343/VCID-1223-3434-34343.yml + vul_filepath = ( + f"./aboutcode-vulnerabilities-{vulnerability_id[5:9]}/{vulnerability_id[10:14]}" + f"/{vulnerability_id}/{vulnerability_id}.yml" ) + vul_files[vul_filepath] = { + "vulnerability_id": vul.vulnerability_id, + "aliases": [alias.alias for alias in vul.get_aliases], + "summary": vul.summary, + "severities": [severity for severity in vul.severities.values()], + "references": [ref for ref in vul.references.values()], + "weaknesses": [ + "CWE-" + str(weakness["cwe_id"]) for weakness in vul.weaknesses.values() + ], + } - for k, v in package_files.items(): - data = saneyaml.dump(v) - with open(k, encoding="utf-8", mode="w") as f: - f.write(data) + for items in [package_files, version_files, vul_files]: + for filepath, data in items.items(): + create_file(filepath, git_path, data) - for k, v in vul_files.items(): - with open(k, encoding="utf-8", mode="w") as f: - f.write(v) self.stdout.write(f"Successfully exported {ecosystem} data") -def create_sub_paths(git_path, purl_type, purl_namespace, purl_name): +def create_file(filepath, git_path, data): """ - create the directories if it doesn't exist : `path/purl_type/purl_namespace/purl_name` + Check if the directories exist if it doesn't exist create a new one then Create the file + ./aboutcode-vulnerabilities-1223/3434/VCID-1223-3434-34343/VCID-1223-3434-34343.yml + ./aboutcode-packages-ed5/maven/org.apache.log4j/log4j-core/versions/vulnerabilities.yml + ./aboutcode-packages-ed5/maven/org.apache.log4j/log4j-core/versions/1.2.3/vulnerabilities.yml """ - ecosystem_dir = git_path.joinpath(purl_type) - namespace_dir = ecosystem_dir.joinpath(purl_namespace) - package_dir = namespace_dir.joinpath(purl_name) - package_dir.mkdir(parents=True, exist_ok=True) - return package_dir + filepath = git_path.joinpath(filepath) + dirname = os.path.dirname(filepath) + os.makedirs(dirname, exist_ok=True) + data = saneyaml.dump(data) + with open(filepath, encoding="utf-8", mode="w") as f: + f.write(data) diff --git a/vulnerabilities/tests/test_export.py b/vulnerabilities/tests/test_export.py index e7689dc0c..67af22e61 100644 --- a/vulnerabilities/tests/test_export.py +++ b/vulnerabilities/tests/test_export.py @@ -8,7 +8,6 @@ from django.core.management import call_command from django.core.management.base import CommandError -from vulnerabilities.management.commands.export import create_sub_paths from vulnerabilities.models import Alias from vulnerabilities.models import Package from vulnerabilities.models import PackageRelatedVulnerability @@ -19,27 +18,6 @@ from vulnerabilities.models import Weakness -@pytest.mark.parametrize( - "purl_type,purl_namespace,purl_name", - [ - ("generic", "", "nginx"), - ("github", "package-url", "purl-spec"), - ("pypi", "", "django"), - ], -) -def test_create_sub_paths(tmp_path, purl_type, purl_namespace, purl_name): - create_sub_paths(tmp_path, purl_type, purl_namespace, purl_name) - - ecosystem_dir = os.path.join(tmp_path, purl_type) - assert os.path.isdir(ecosystem_dir) - - namespace_dir = os.path.join(ecosystem_dir, purl_namespace) - assert os.path.isdir(namespace_dir) - - name_dir = os.path.join(namespace_dir, purl_name) - assert os.path.isdir(name_dir) - - @pytest.fixture def package(db): return Package.objects.create( @@ -109,7 +87,7 @@ def test_bad_path_fail_error(self): @pytest.mark.django_db -def test_write_vul_data( +def test_export_data( tmp_path, package_related_vulnerability, vulnerability_reference, vulnerability_severity ): expected_vul = { @@ -140,11 +118,27 @@ def test_write_vul_data( }, ], } + expected_version = { + "purl": "pkg:generic/nginx/test@2", + "affected_by_vulnerabilities": ["VCID-pst6-b358-aaap"], + "fixing_vulnerabilities": [], + } call_command("export", tmp_path, stdout=StringIO()) - # path: type/namespace/name - vul_filepath = os.path.join(tmp_path, "generic/nginx/test/VCID-pst6-b358-aaap.yml") - pck_filepath = os.path.join(tmp_path, "generic/nginx/test/generic-nginx-test.yml") + + vul_filepath = os.path.join( + tmp_path, + "./aboutcode-vulnerabilities-pst6/b358/VCID-pst6-b358-aaap/VCID-pst6-b358-aaap.yml", + ) + pck_filepath = os.path.join( + tmp_path, + "./aboutcode-packages-ed5/generic/nginx/test/versions/vulnerabilities.yml", + ) + version_filepath = os.path.join( + tmp_path, + "./aboutcode-packages-ed5/generic/nginx/test/versions/2/vulnerabilities.yml", + ) assert Path(vul_filepath).read_text() == saneyaml.dump(expected_vul) assert Path(pck_filepath).read_text() == saneyaml.dump(expected_pkg) + assert Path(version_filepath).read_text() == saneyaml.dump(expected_version)