Skip to content

Commit

Permalink
Get rid of added_advisory and batches
Browse files Browse the repository at this point in the history
useless after a530627

Signed-off-by: Hritik Vijay <[email protected]>
  • Loading branch information
Hritik14 committed Aug 12, 2021
1 parent b6ef11d commit c4aaa10
Show file tree
Hide file tree
Showing 5 changed files with 38 additions and 80 deletions.
30 changes: 1 addition & 29 deletions vulnerabilities/data_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,6 @@ def normalized(self):
class Advisory:
"""
This data class expresses the contract between data sources and the import runner.
Data sources are expected to be usable as context managers and generators, yielding batches of
Advisory sequences.
NB: There are two representations for package URLs that are commonly used by code consuming this
data class; PackageURL objects and strings. As a convention, the former is referred to in
Expand Down Expand Up @@ -131,22 +129,18 @@ class DataSource(ContextManager):

def __init__(
self,
batch_size: int,
last_run_date: Optional[datetime] = None,
cutoff_date: Optional[datetime] = None,
config: Optional[Mapping[str, Any]] = None,
):
"""
Create a DataSource instance.
:param batch_size: Maximum number of records to return from added_advisories() and
updated_advisories()
:param last_run_date: Optional timestamp when this data source was last inspected
:param cutoff_date: Optional timestamp, records older than this will be ignored
:param config: Optional dictionary with subclass-specific configuration
"""
config = config or {}
self.batch_size = batch_size
try:
self.config = self.__class__.CONFIG_CLASS(**config)
# These really should be declared in DataSourceConfiguration above but that would
Expand Down Expand Up @@ -194,16 +188,9 @@ def validate_configuration(self) -> None:
"""
pass

def added_advisories(self) -> Set[Advisory]:
"""
Subclasses yield batch_size sized batches of Advisory objects that have been added to the
data source since the last run or self.cutoff_date.
"""
return set()

def updated_advisories(self) -> Set[Advisory]:
"""
Subclasses yield batch_size sized batches of Advisory objects that have been modified since
Subclasses return Advisory objects that have been modified since
the last run or self.cutoff_date.
NOTE: Data sources that do not enable detection of changes to existing records vs added
Expand All @@ -218,21 +205,6 @@ def error(self, msg: str) -> None:
"""
raise InvalidConfigurationError(f"{type(self).__name__}: {msg}")

def batch_advisories(self, advisories: List[Advisory]) -> Set[Advisory]:
"""
Yield batches of the passed in list of advisories.
"""

# TODO make this less cryptic and efficient

advisories = advisories[:]
# copy the list as we are mutating it in the loop below

while advisories:
b, advisories = advisories[: self.batch_size], advisories[self.batch_size :]
yield b


@dataclasses.dataclass
class GitDataSourceConfiguration(DataSourceConfiguration):
repository_url: str
Expand Down
72 changes: 34 additions & 38 deletions vulnerabilities/import_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import logging
from itertools import chain
from typing import Tuple
from typing import Set

from django.db import transaction

Expand Down Expand Up @@ -68,9 +69,8 @@ class ImportRunner:
- All update and select operations must use indexed columns.
"""

def __init__(self, importer: models.Importer, batch_size: int):
def __init__(self, importer: models.Importer):
self.importer = importer
self.batch_size = batch_size

def run(self, cutoff_date: datetime.datetime = None) -> None:
"""
Expand All @@ -84,9 +84,10 @@ def run(self, cutoff_date: datetime.datetime = None) -> None:
from all Linux distributions that package this kernel version.
"""
logger.info(f"Starting import for {self.importer.name}.")
data_source = self.importer.make_data_source(self.batch_size, cutoff_date=cutoff_date)
data_source = self.importer.make_data_source(cutoff_date=cutoff_date)
with data_source:
process_advisories(data_source)
advisories = data_source.updated_advisories()
process_advisories(advisories)
self.importer.last_run = datetime.datetime.now(tz=datetime.timezone.utc)
self.importer.data_source_cfg = dataclasses.asdict(data_source.config)
self.importer.save()
Expand All @@ -108,46 +109,41 @@ def get_vuln_pkg_refs(vulnerability, package):


@transaction.atomic
def process_advisories(data_source: DataSource) -> None:
def process_advisories(advisories: Set[Advisory]) -> None:
bulk_create_vuln_pkg_refs = set()
# Treat updated_advisories and added_advisories as same. Eventually
# we want to refactor all data sources to provide advisories via a
# single method.
advisory_batches = chain(data_source.updated_advisories(), data_source.added_advisories())
for batch in advisory_batches:
for advisory in batch:
vuln, vuln_created = _get_or_create_vulnerability(advisory)
for vuln_ref in advisory.references:
ref, _ = models.VulnerabilityReference.objects.get_or_create(
vulnerability=vuln, reference_id=vuln_ref.reference_id, url=vuln_ref.url
for advisory in advisories:
vuln, vuln_created = _get_or_create_vulnerability(advisory)
for vuln_ref in advisory.references:
ref, _ = models.VulnerabilityReference.objects.get_or_create(
vulnerability=vuln, reference_id=vuln_ref.reference_id, url=vuln_ref.url
)

for score in vuln_ref.severities:
models.VulnerabilitySeverity.objects.update_or_create(
vulnerability=vuln,
scoring_system=score.system.identifier,
reference=ref,
defaults={"value": str(score.value)},
)

for score in vuln_ref.severities:
models.VulnerabilitySeverity.objects.update_or_create(
vulnerability=vuln,
scoring_system=score.system.identifier,
reference=ref,
defaults={"value": str(score.value)},
)

for aff_pkg_with_patched_pkg in advisory.affected_packages:
vulnerable_package, _ = _get_or_create_package(
aff_pkg_with_patched_pkg.vulnerable_package
for aff_pkg_with_patched_pkg in advisory.affected_packages:
vulnerable_package, _ = _get_or_create_package(
aff_pkg_with_patched_pkg.vulnerable_package
)
patched_package = None
if aff_pkg_with_patched_pkg.patched_package:
patched_package, _ = _get_or_create_package(
aff_pkg_with_patched_pkg.patched_package
)
patched_package = None
if aff_pkg_with_patched_pkg.patched_package:
patched_package, _ = _get_or_create_package(
aff_pkg_with_patched_pkg.patched_package
)

prv, _ = models.PackageRelatedVulnerability.objects.get_or_create(
vulnerability=vuln,
package=vulnerable_package,
)
prv, _ = models.PackageRelatedVulnerability.objects.get_or_create(
vulnerability=vuln,
package=vulnerable_package,
)

if patched_package:
prv.patched_package = patched_package
prv.save()
if patched_package:
prv.patched_package = patched_package
prv.save()

models.PackageRelatedVulnerability.objects.bulk_create(
[i.to_model_object() for i in bulk_create_vuln_pkg_refs]
Expand Down
2 changes: 1 addition & 1 deletion vulnerabilities/importers/nginx.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def updated_advisories(self):
self.set_api()
data = requests.get(self.url).content
advisories.extend(self.to_advisories(data))
return self.batch_advisories(advisories)
return advisories

def to_advisories(self, data):
advisories = []
Expand Down
10 changes: 1 addition & 9 deletions vulnerabilities/management/commands/import.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,20 +53,13 @@ def add_arguments(self, parser):
)
parser.add_argument("sources", nargs="*", help="Data sources from which to import")

parser.add_argument(
"--batch_size", help="The batch size to be used for bulk inserting data"
)

def handle(self, *args, **options):
# load_importers() seeds the DB with Importers
load_importers()
if options["list"]:
self.list_sources()
return

if options["batch_size"]:
self.batch_size = options["batch_size"]

if options["all"]:
self._import_data(Importer.objects.all(), options["cutoff_date"])
return
Expand Down Expand Up @@ -105,9 +98,8 @@ def _import_data(self, importers, cutoff_date):

for importer in importers:
self.stdout.write(f"Importing data from {importer.name}")
batch_size = int(getattr(self, "batch_size", 10))
try:
ImportRunner(importer, batch_size).run(cutoff_date=cutoff_date)
ImportRunner(importer).run(cutoff_date=cutoff_date)
self.stdout.write(
self.style.SUCCESS(f"Successfully imported data from {importer.name}")
)
Expand Down
4 changes: 1 addition & 3 deletions vulnerabilities/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,18 +235,16 @@ class Importer(models.Model):
help_text="Implementation-specific configuration for the data source",
)

def make_data_source(self, batch_size: int, cutoff_date: datetime = None) -> DataSource:
def make_data_source(self, cutoff_date: datetime = None) -> DataSource:
"""
Return a configured and ready to use instance of this importers data source implementation.
batch_size - max. number of records to return on each iteration
cutoff_date - optional timestamp of the oldest data to include in the import
"""
importers_module = importlib.import_module("vulnerabilities.importers")
klass = getattr(importers_module, self.data_source)

ds = klass(
batch_size,
last_run_date=self.last_run,
cutoff_date=cutoff_date,
config=self.data_source_cfg,
Expand Down

0 comments on commit c4aaa10

Please sign in to comment.