Skip to content

Commit

Permalink
Redefine the disk storage structure
Browse files Browse the repository at this point in the history
Signed-off-by: ziadhany <[email protected]>
  • Loading branch information
ziadhany committed Feb 17, 2024
1 parent 49ff11d commit 027983a
Show file tree
Hide file tree
Showing 2 changed files with 77 additions and 78 deletions.
109 changes: 57 additions & 52 deletions vulnerabilities/management/commands/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
# See https://aboutcode.org for more information about nexB OSS projects.
#
import logging
import os
from pathlib import Path

import saneyaml
Expand Down Expand Up @@ -45,6 +46,7 @@ def export_data(self, git_path):
ecosystems = [pkg.type for pkg in Package.objects.distinct("type")]

for ecosystem in ecosystems:
version_files = {} # {"version path": "data" }
package_files = {} # {"package path": "data" }
vul_files = {} # {"vulnerability path": "data" }

Expand All @@ -53,73 +55,76 @@ def export_data(self, git_path):
.prefetch_related("vulnerabilities")
.paginated()
):

purl_without_version = PackageURL(
type=purl.type,
namespace=purl.namespace,
name=purl.name,
)
package_dir = create_sub_paths(git_path, purl.type, purl.namespace, purl.name)
filename = f"{purl.type}-{purl.namespace}-{purl.name}.yml".replace("/", " ")
package_dir_file = package_dir.joinpath(filename)
if package_dir_file in package_files:
package_data = {
"purl": str(purl),
"affected_by_vulnerabilities": [
vuln.vulnerability_id for vuln in purl.affected_by
],
"fixing_vulnerabilities": [vuln.vulnerability_id for vuln in purl.fixing],
}
package_files[package_dir_file]["versions"].append(package_data)

# ./aboutcode-packages-ed5/maven/org.apache.log4j/log4j-core/versions/vulnerabilities.yml
pkg_filepath = (
f"./aboutcode-packages-ed5/{purl.type}/{purl.namespace}/{purl.name}"
f"/versions/vulnerabilities.yml"
)

# ./aboutcode-packages-ed5/maven/org.apache.log4j/log4j-core/versions/1.2.3/vulnerabilities.yml
version_filepath = (
f"./aboutcode-packages-ed5/{purl.type}/{purl.namespace}/{purl.name}/versions/"
f"{purl.version}/vulnerabilities.yml"
)

package_data = {
"purl": str(purl),
"affected_by_vulnerabilities": [
vuln.vulnerability_id for vuln in purl.affected_by
],
"fixing_vulnerabilities": [vuln.vulnerability_id for vuln in purl.fixing],
}

if pkg_filepath in package_files:
package_files[pkg_filepath]["versions"].append(package_data)
else:
package_files[package_dir_file] = {
package_files[pkg_filepath] = {
"package": str(purl_without_version),
"versions": [
{
"purl": str(purl),
"affected_by_vulnerabilities": [
vuln.vulnerability_id for vuln in purl.affected_by
],
"fixing_vulnerabilities": [
vuln.vulnerability_id for vuln in purl.fixing
],
}
],
"versions": [package_data],
}
version_files[version_filepath] = package_data

for vul in purl.vulnerabilities.all():
vul_filepath = package_dir.joinpath(f"{vul.vulnerability_id}.yml")
vul_files[vul_filepath] = saneyaml.dump(
{
"vulnerability_id": vul.vulnerability_id,
"aliases": [alias.alias for alias in vul.get_aliases],
"summary": vul.summary,
"severities": [severity for severity in vul.severities.values()],
"references": [ref for ref in vul.references.values()],
"weaknesses": [
"CWE-" + str(weakness["cwe_id"])
for weakness in vul.weaknesses.values()
],
}
vulnerability_id = vul.vulnerability_id
# ./aboutcode-vulnerabilities-1223/3434/VCID-1223-3434-34343/VCID-1223-3434-34343.yml
vul_filepath = (
f"./aboutcode-vulnerabilities-{vulnerability_id[5:9]}/{vulnerability_id[10:14]}"
f"/{vulnerability_id}/{vulnerability_id}.yml"
)
vul_files[vul_filepath] = {
"vulnerability_id": vul.vulnerability_id,
"aliases": [alias.alias for alias in vul.get_aliases],
"summary": vul.summary,
"severities": [severity for severity in vul.severities.values()],
"references": [ref for ref in vul.references.values()],
"weaknesses": [
"CWE-" + str(weakness["cwe_id"]) for weakness in vul.weaknesses.values()
],
}

for k, v in package_files.items():
data = saneyaml.dump(v)
with open(k, encoding="utf-8", mode="w") as f:
f.write(data)
for items in [package_files, version_files, vul_files]:
for filepath, data in items.items():
create_file(filepath, git_path, data)

for k, v in vul_files.items():
with open(k, encoding="utf-8", mode="w") as f:
f.write(v)
self.stdout.write(f"Successfully exported {ecosystem} data")


def create_sub_paths(git_path, purl_type, purl_namespace, purl_name):
def create_file(filepath, git_path, data):
"""
create the directories if it doesn't exist : `path/purl_type/purl_namespace/purl_name`
Check if the directories exist if it doesn't exist create a new one then Create the file
./aboutcode-vulnerabilities-1223/3434/VCID-1223-3434-34343/VCID-1223-3434-34343.yml
./aboutcode-packages-ed5/maven/org.apache.log4j/log4j-core/versions/vulnerabilities.yml
./aboutcode-packages-ed5/maven/org.apache.log4j/log4j-core/versions/1.2.3/vulnerabilities.yml
"""
ecosystem_dir = git_path.joinpath(purl_type)
namespace_dir = ecosystem_dir.joinpath(purl_namespace)
package_dir = namespace_dir.joinpath(purl_name)
package_dir.mkdir(parents=True, exist_ok=True)
return package_dir
filepath = git_path.joinpath(filepath)
dirname = os.path.dirname(filepath)
os.makedirs(dirname, exist_ok=True)
data = saneyaml.dump(data)
with open(filepath, encoding="utf-8", mode="w") as f:
f.write(data)
46 changes: 20 additions & 26 deletions vulnerabilities/tests/test_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from django.core.management import call_command
from django.core.management.base import CommandError

from vulnerabilities.management.commands.export import create_sub_paths
from vulnerabilities.models import Alias
from vulnerabilities.models import Package
from vulnerabilities.models import PackageRelatedVulnerability
Expand All @@ -19,27 +18,6 @@
from vulnerabilities.models import Weakness


@pytest.mark.parametrize(
"purl_type,purl_namespace,purl_name",
[
("generic", "", "nginx"),
("github", "package-url", "purl-spec"),
("pypi", "", "django"),
],
)
def test_create_sub_paths(tmp_path, purl_type, purl_namespace, purl_name):
create_sub_paths(tmp_path, purl_type, purl_namespace, purl_name)

ecosystem_dir = os.path.join(tmp_path, purl_type)
assert os.path.isdir(ecosystem_dir)

namespace_dir = os.path.join(ecosystem_dir, purl_namespace)
assert os.path.isdir(namespace_dir)

name_dir = os.path.join(namespace_dir, purl_name)
assert os.path.isdir(name_dir)


@pytest.fixture
def package(db):
return Package.objects.create(
Expand Down Expand Up @@ -109,7 +87,7 @@ def test_bad_path_fail_error(self):


@pytest.mark.django_db
def test_write_vul_data(
def test_export_data(
tmp_path, package_related_vulnerability, vulnerability_reference, vulnerability_severity
):
expected_vul = {
Expand Down Expand Up @@ -140,11 +118,27 @@ def test_write_vul_data(
},
],
}
expected_version = {
"purl": "pkg:generic/nginx/test@2",
"affected_by_vulnerabilities": ["VCID-pst6-b358-aaap"],
"fixing_vulnerabilities": [],
}

call_command("export", tmp_path, stdout=StringIO())
# path: type/namespace/name
vul_filepath = os.path.join(tmp_path, "generic/nginx/test/VCID-pst6-b358-aaap.yml")
pck_filepath = os.path.join(tmp_path, "generic/nginx/test/generic-nginx-test.yml")

vul_filepath = os.path.join(
tmp_path,
"./aboutcode-vulnerabilities-pst6/b358/VCID-pst6-b358-aaap/VCID-pst6-b358-aaap.yml",
)
pck_filepath = os.path.join(
tmp_path,
"./aboutcode-packages-ed5/generic/nginx/test/versions/vulnerabilities.yml",
)
version_filepath = os.path.join(
tmp_path,
"./aboutcode-packages-ed5/generic/nginx/test/versions/2/vulnerabilities.yml",
)

assert Path(vul_filepath).read_text() == saneyaml.dump(expected_vul)
assert Path(pck_filepath).read_text() == saneyaml.dump(expected_pkg)
assert Path(version_filepath).read_text() == saneyaml.dump(expected_version)

0 comments on commit 027983a

Please sign in to comment.