Skip to content

Commit

Permalink
Paginate default improver and add interrupt for import and improve pr…
Browse files Browse the repository at this point in the history
…ocess

Signed-off-by: Tushar Goel <[email protected]>
  • Loading branch information
TG1999 committed Jul 21, 2023
1 parent ff95e09 commit 0655260
Show file tree
Hide file tree
Showing 7 changed files with 62 additions and 39 deletions.
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -81,10 +81,10 @@ pytest-django==4.5.2
python-dateutil==2.8.2
python-dotenv==0.20.0
pytz==2022.1
PyYAML==5.4.1
PyYAML==6.0.1
requests==2.27.1
restructuredtext-lint==1.4.0
saneyaml==0.5.2
saneyaml==0.6.0
semantic-version==2.9.0
six==1.16.0
smmap==5.0.0
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ install_requires =

# file and data formats
binaryornot>=0.4.4
saneyaml>=0.5.2
saneyaml>=0.6.0
beautifulsoup4>=4.9.3
python-dateutil>=2.8.1
toml>=0.10.2
Expand Down
12 changes: 8 additions & 4 deletions vulnerabilities/improve_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,14 @@ def run(self) -> None:
improver = self.improver_class()
logger.info(f"Running improver: {improver.qualified_name}")
for advisory in improver.interesting_advisories:
inferences = improver.get_inferences(advisory_data=advisory.to_advisory_data())
process_inferences(
inferences=inferences, advisory=advisory, improver_name=improver.qualified_name
)
logger.info(f"Processing advisory: {advisory!r}")
try:
inferences = improver.get_inferences(advisory_data=advisory.to_advisory_data())
process_inferences(
inferences=inferences, advisory=advisory, improver_name=improver.qualified_name
)
except Exception as e:
logger.info(f"Failed to process advisory: {advisory!r} with error {e!r}")
logger.info("Finished improving using %s.", self.improver_class.qualified_name)


Expand Down
13 changes: 7 additions & 6 deletions vulnerabilities/improvers/default.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ class DefaultImprover(Improver):

@property
def interesting_advisories(self) -> QuerySet:
return Advisory.objects.all()
for advisory in Advisory.objects.all().paginated():
yield advisory

def get_inferences(self, advisory_data: AdvisoryData) -> Iterable[Inference]:
if not advisory_data:
Expand Down Expand Up @@ -94,11 +95,11 @@ def get_exact_purls(affected_package: AffectedPackage) -> Tuple[List[PackageURL]
>>> assert expected == got
"""

vr = affected_package.affected_version_range
# We need ``if c`` below because univers returns None as version
# in case of vers:nginx/*
# TODO: Revisit after https://github.com/nexB/univers/issues/33
try:
vr = affected_package.affected_version_range
# We need ``if c`` below because univers returns None as version
# in case of vers:nginx/*
# TODO: Revisit after https://github.com/nexB/univers/issues/33
affected_purls = []
fixed_versions = []
if vr:
Expand All @@ -120,5 +121,5 @@ def get_exact_purls(affected_package: AffectedPackage) -> Tuple[List[PackageURL]
]
return affected_purls, fixed_purls
except Exception as e:
logger.error(f"Failed to get exact purls for {affected_package} {e}")
logger.error(f"Failed to get exact purls for: {affected_package!r} with error: {e!r}")
return [], []
25 changes: 12 additions & 13 deletions vulnerabilities/management/commands/import.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,19 +29,18 @@ def add_arguments(self, parser):
parser.add_argument("sources", nargs="*", help="Fully qualified importer name to run")

def handle(self, *args, **options):
if options["list"]:
self.list_sources()
return

if options["all"]:
self.import_data(importers=IMPORTERS_REGISTRY.values())
return

sources = options["sources"]
if not sources:
raise CommandError('Please provide at least one importer to run or use "--all".')

self.import_data(validate_importers(sources))
try:
if options["list"]:
self.list_sources()
elif options["all"]:
self.import_data(importers=IMPORTERS_REGISTRY.values())
else:
sources = options["sources"]
if not sources:
raise CommandError('Please provide at least one importer to run or use "--all".')
self.import_data(validate_importers(sources))
except KeyboardInterrupt:
raise CommandError("Keyboard interrupt received. Stopping...")

def list_sources(self):
self.stdout.write("Vulnerability data can be imported from the following importers:")
Expand Down
25 changes: 12 additions & 13 deletions vulnerabilities/management/commands/improve.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,19 +31,18 @@ def add_arguments(self, parser):
parser.add_argument("sources", nargs="*", help="Fully qualified improver name to run")

def handle(self, *args, **options):
if options["list"]:
self.list_sources()
return

if options["all"]:
self.improve_data(IMPROVERS_REGISTRY.values())
return

sources = options["sources"]
if not sources:
raise CommandError('Please provide at least one improver to run or use "--all".')

self.improve_data(validate_improvers(sources))
try:
if options["list"]:
self.list_sources()
elif options["all"]:
self.improve_data(IMPROVERS_REGISTRY.values())
else:
sources = options["sources"]
if not sources:
raise CommandError('Please provide at least one improver to run or use "--all".')
self.improve_data(validate_improvers(sources))
except KeyboardInterrupt:
raise CommandError("Keyboard interrupt received. Stopping...")

def list_sources(self):
improvers = list(IMPROVERS_REGISTRY)
Expand Down
20 changes: 20 additions & 0 deletions vulnerabilities/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from django.contrib.auth.models import UserManager
from django.core import exceptions
from django.core.exceptions import ValidationError
from django.core.paginator import Paginator
from django.core.validators import MaxValueValidator
from django.core.validators import MinValueValidator
from django.db import models
Expand Down Expand Up @@ -53,6 +54,20 @@ def get_or_none(self, *args, **kwargs):
with suppress(self.model.DoesNotExist, ValidationError):
return self.get(*args, **kwargs)

def paginated(self, per_page=5000):
"""
Iterate over a (large) QuerySet by chunks of ``per_page`` items.
This technique is essential for preventing memory issues when iterating
See these links for inspiration:
https://nextlinklabs.com/resources/insights/django-big-data-iteration
https://stackoverflow.com/questions/4222176/why-is-iterating-through-a-large-django-queryset-consuming-massive-amounts-of-me/
"""
paginator = Paginator(self, per_page=per_page)
for page_number in paginator.page_range:
page = paginator.page(page_number)
for object in page.object_list:
yield object


class VulnerabilityQuerySet(BaseQuerySet):
def with_cpes(self):
Expand Down Expand Up @@ -770,6 +785,10 @@ def url(self):
return f"https://github.com/nodejs/security-wg/blob/main/vuln/npm/{id}.json"


class AdvisoryQuerySet(BaseQuerySet):
pass


class Advisory(models.Model):
"""
An advisory represents data directly obtained from upstream transformed
Expand Down Expand Up @@ -809,6 +828,7 @@ class Advisory(models.Model):
"module name importing the advisory. Eg:"
"vulnerabilities.importers.nginx.NginxImporter",
)
objects = AdvisoryQuerySet.as_manager()

class Meta:
unique_together = ["aliases", "unique_content_id", "date_published"]
Expand Down

0 comments on commit 0655260

Please sign in to comment.