Skip to content

Commit

Permalink
Merge branch 'main' into 1228-fixed-affected-version-matching
Browse files Browse the repository at this point in the history
  • Loading branch information
TG1999 authored Nov 15, 2023
2 parents 94cd01b + a114deb commit e42d095
Show file tree
Hide file tree
Showing 52 changed files with 2,084 additions and 1,018 deletions.
20 changes: 18 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,14 @@ coverage.xml
*.log
local_settings.py

# Sphinx documentation
docs/_build/
# Sphinx
docs/_build
docs/bin
docs/build
docs/include
docs/Lib
doc/pyvenv.cfg
pyvenv.cfg

# PyBuilder
target/
Expand Down Expand Up @@ -103,3 +109,13 @@ Pipfile
*.bak
/.cache/
/tmp/

# pyenv
/.python-version
/man/
/.pytest_cache/
lib64
tcl

# Ignore Jupyter Notebook related temp files
.ipynb_checkpoints/
19 changes: 16 additions & 3 deletions .readthedocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,25 @@
# Required
version: 2

# Build in latest ubuntu/python
build:
os: ubuntu-22.04
tools:
python: "3.11"

# Build PDF & ePub
formats:
- epub
- pdf

# Where the Sphinx conf.py file is located
sphinx:
configuration: docs/source/conf.py

# Setting the doc build requirements
# Setting the python version and doc build requirements
python:
version: "3.7"
install:
- requirements: docs/requirements.txt
- method: pip
path: .
extra_requirements:
- dev
25 changes: 25 additions & 0 deletions apache-2.0.LICENSE
Original file line number Diff line number Diff line change
Expand Up @@ -174,3 +174,28 @@
of your accepting any such warranty or additional liability.

END OF TERMS AND CONDITIONS

APPENDIX: How to apply the Apache License to your work.

To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.

Copyright [yyyy] [name of copyright owner]

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
128 changes: 78 additions & 50 deletions vulnerabilities/import_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,69 +245,97 @@ def create_valid_vulnerability_reference(url, reference_id=None):
return reference


def get_or_create_vulnerability_and_aliases(alias_names, vulnerability_id=None, summary=None):
def get_or_create_vulnerability_and_aliases(
aliases: List[str], vulnerability_id=None, summary=None
):
"""
Get or create vulnerabilitiy and aliases such that all existing and new
aliases point to the same vulnerability
"""
existing_vulns = set()
alias_names = set(alias_names)
new_alias_names = set()
for alias_name in alias_names:
try:
alias = Alias.objects.get(alias=alias_name)
existing_vulns.add(alias.vulnerability)
except Alias.DoesNotExist:
new_alias_names.add(alias_name)

# If given set of aliases point to different vulnerabilities in the
# database, request is malformed
# TODO: It is possible that all those vulnerabilities are actually
# the same at data level, figure out a way to merge them
if len(existing_vulns) > 1:
logger.warning(
f"Given aliases {alias_names} already exist and do not point "
f"to a single vulnerability. Cannot improve. Skipped."
)
return
aliases = set(alias.strip() for alias in aliases if alias and alias.strip())
new_alias_names, existing_vulns = get_vulns_for_aliases_and_get_new_aliases(aliases)

# All aliases must point to the same vulnerability
vulnerability = None
if existing_vulns:
if len(existing_vulns) != 1:
vcids = ", ".join(v.vulnerability_id for v in existing_vulns)
logger.error(
f"Cannot create vulnerability. "
f"Aliases {aliases} already exist and point "
f"to multiple vulnerabilities {vcids}."
)
return
else:
vulnerability = existing_vulns.pop()

existing_alias_vuln = existing_vulns.pop() if existing_vulns else None

if (
existing_alias_vuln
and vulnerability_id
and existing_alias_vuln.vulnerability_id != vulnerability_id
):
logger.warning(
f"Given aliases {alias_names!r} already exist and point to existing"
f"vulnerability {existing_alias_vuln}. Unable to create Vulnerability "
f"with vulnerability_id {vulnerability_id}. Skipped"
)
return
if vulnerability_id and vulnerability.vulnerability_id != vulnerability_id:
logger.error(
f"Cannot create vulnerability. "
f"Aliases {aliases} already exist and point to a different "
f"vulnerability {vulnerability} than the requested "
f"vulnerability {vulnerability_id}."
)
return

if existing_alias_vuln:
vulnerability = existing_alias_vuln
elif vulnerability_id:
if vulnerability_id and not vulnerability:
try:
vulnerability = Vulnerability.objects.get(vulnerability_id=vulnerability_id)
except Vulnerability.DoesNotExist:
logger.warning(
f"Given vulnerability_id: {vulnerability_id} does not exist in the database"
)
logger.error(f"Cannot get requested vulnerability {vulnerability_id}.")
return
if vulnerability:
# TODO: We should keep multiple summaries, one for each advisory
# if summary and summary != vulnerability.summary:
# logger.warning(
# f"Inconsistent summary for {vulnerability.vulnerability_id}. "
# f"Existing: {vulnerability.summary!r}, provided: {summary!r}"
# )
associate_vulnerability_with_aliases(vulnerability=vulnerability, aliases=new_alias_names)
else:
vulnerability = Vulnerability(summary=summary)
vulnerability.save()
try:
vulnerability = create_vulnerability_and_add_aliases(
aliases=new_alias_names, summary=summary
)
except Exception as e:
logger.error(
f"Cannot create vulnerability with summary {summary!r} and {new_alias_names!r} {e!r}.\n{traceback_format_exc()}."
)
return

return vulnerability


def get_vulns_for_aliases_and_get_new_aliases(aliases):
"""
Return ``new_aliases`` that are not in the database and
``existing_vulns`` that point to the given ``aliases``.
"""
new_aliases = set(aliases)
existing_vulns = set()
for alias in Alias.objects.filter(alias__in=aliases):
existing_vulns.add(alias.vulnerability)
new_aliases.remove(alias.alias)
return new_aliases, existing_vulns

if summary and summary != vulnerability.summary:
logger.warning(
f"Inconsistent summary for {vulnerability!r}. "
f"Existing: {vulnerability.summary}, provided: {summary}"
)

for alias_name in new_alias_names:
@transaction.atomic
def create_vulnerability_and_add_aliases(aliases, summary):
"""
Return a new ``vulnerability`` created with ``summary``
and associate the ``vulnerability`` with ``aliases``.
Raise exception if no alias is associated with the ``vulnerability``.
"""
vulnerability = Vulnerability(summary=summary)
vulnerability.save()
associate_vulnerability_with_aliases(aliases, vulnerability)
if not vulnerability.aliases.count():
raise Exception(f"Vulnerability {vulnerability.vcid} must have one or more aliases")
return vulnerability


def associate_vulnerability_with_aliases(aliases, vulnerability):
for alias_name in aliases:
alias = Alias(alias=alias_name, vulnerability=vulnerability)
alias.save()
logger.info(f"New alias for {vulnerability!r}: {alias_name}")

return vulnerability
2 changes: 2 additions & 0 deletions vulnerabilities/importers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from vulnerabilities.importers import npm
from vulnerabilities.importers import nvd
from vulnerabilities.importers import openssl
from vulnerabilities.importers import oss_fuzz
from vulnerabilities.importers import postgresql
from vulnerabilities.importers import project_kb_msr2019
from vulnerabilities.importers import pypa
Expand Down Expand Up @@ -65,6 +66,7 @@
ubuntu_usn.UbuntuUSNImporter,
fireeye.FireyeImporter,
apache_kafka.ApacheKafkaImporter,
oss_fuzz.OSSFuzzImporter,
]

IMPORTERS_REGISTRY = {x.qualified_name: x for x in IMPORTERS_REGISTRY}
32 changes: 31 additions & 1 deletion vulnerabilities/importers/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from typing import Iterable
from typing import Optional

from cwe2.database import Database
from dateutil import parser as dateparser
from packageurl import PackageURL
from univers.version_range import RANGE_CLASS_BY_SCHEMES
Expand All @@ -24,11 +25,11 @@
from vulnerabilities.importer import Reference
from vulnerabilities.importer import VulnerabilitySeverity
from vulnerabilities.utils import dedupe
from vulnerabilities.utils import get_cwe_id
from vulnerabilities.utils import get_item

logger = logging.getLogger(__name__)


PACKAGE_TYPE_BY_GITHUB_ECOSYSTEM = {
"MAVEN": "maven",
"NUGET": "nuget",
Expand Down Expand Up @@ -63,6 +64,11 @@
url
}
severity
cwes(first: 10){
nodes {
cweId
}
}
publishedAt
}
firstPatchedVersion{
Expand Down Expand Up @@ -227,10 +233,34 @@ def process_response(resp: dict, package_type: str) -> Iterable[AdvisoryData]:
else:
logger.error(f"Unknown identifier type {identifier_type!r} and value {value!r}")

weaknesses = get_cwes_from_github_advisory(advisory)

yield AdvisoryData(
aliases=sorted(dedupe(aliases)),
summary=summary,
references=references,
affected_packages=affected_packages,
date_published=date_published,
weaknesses=weaknesses,
)


def get_cwes_from_github_advisory(advisory) -> [int]:
"""
Return the cwe-id list from advisory ex: [ 522 ]
by extracting the cwe_list from advisory ex: [{'cweId': 'CWE-522'}]
then remove the CWE- from string and convert it to integer 522 and Check if the CWE in CWE-Database
"""
weaknesses = []
db = Database()
cwe_list = get_item(advisory, "cwes", "nodes") or []
for cwe_item in cwe_list:
cwe_string = get_item(cwe_item, "cweId")
if cwe_string:
cwe_id = get_cwe_id(cwe_string)
try:
db.get(cwe_id)
weaknesses.append(cwe_id)
except Exception:
logger.error("Invalid CWE id")
return weaknesses
8 changes: 6 additions & 2 deletions vulnerabilities/importers/gitlab.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,10 @@
from vulnerabilities.importer import Importer
from vulnerabilities.importer import Reference
from vulnerabilities.utils import build_description
from vulnerabilities.utils import get_cwe_id

logger = logging.getLogger(__name__)


PURL_TYPE_BY_GITLAB_SCHEME = {
"conan": "conan",
"gem": "gem",
Expand All @@ -44,7 +44,6 @@
"pypi": "pypi",
}


GITLAB_SCHEME_BY_PURL_TYPE = {v: k for k, v in PURL_TYPE_BY_GITLAB_SCHEME.items()}


Expand Down Expand Up @@ -186,6 +185,10 @@ def parse_gitlab_advisory(file):
summary = build_description(gitlab_advisory.get("title"), gitlab_advisory.get("description"))
urls = gitlab_advisory.get("urls")
references = [Reference.from_url(u) for u in urls]

cwe_ids = gitlab_advisory.get("cwe_ids") or []
cwe_list = list(map(get_cwe_id, cwe_ids))

date_published = dateparser.parse(gitlab_advisory.get("pubdate"))
date_published = date_published.replace(tzinfo=pytz.UTC)
package_slug = gitlab_advisory.get("package_slug")
Expand Down Expand Up @@ -251,4 +254,5 @@ def parse_gitlab_advisory(file):
references=references,
date_published=date_published,
affected_packages=affected_packages,
weaknesses=cwe_list,
)
37 changes: 37 additions & 0 deletions vulnerabilities/importers/oss_fuzz.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# VulnerableCode is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/nexB/vulnerablecode for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#
import logging
from pathlib import Path
from typing import Iterable

import saneyaml

from vulnerabilities.importer import AdvisoryData
from vulnerabilities.importer import Importer
from vulnerabilities.importers.osv import parse_advisory_data

logger = logging.getLogger(__name__)


class OSSFuzzImporter(Importer):
license_url = "https://github.com/google/oss-fuzz-vulns/blob/main/LICENSE"
spdx_license_expression = "CC-BY-4.0"
url = "git+https://github.com/google/oss-fuzz-vulns"

def advisory_data(self) -> Iterable[AdvisoryData]:
try:
self.clone(repo_url=self.url)
path = Path(self.vcs_response.dest_dir) / "vulns"
for file in path.glob("**/*.yaml"):
with open(file) as f:
yaml_data = saneyaml.load(f.read())
yield parse_advisory_data(yaml_data, supported_ecosystem="oss-fuzz")
finally:
if self.vcs_response:
self.vcs_response.delete()
Loading

0 comments on commit e42d095

Please sign in to comment.