-
Notifications
You must be signed in to change notification settings - Fork 201
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Implement improver registry and management command
Improver look strikingly similar to importers with some enhancements. (Eg: It doesn't use an importer_yielder alternative, see issue 501). Overview: Improvers maintain a contract (like Advisory) with improve_runner which is named Inference. Inference class embeds an advisory and a confidence score for that advisory. It is the job of an improver to fetch data to improve from the database (probably using some helper functions) then use whatever means necessary to improve that data sample and return with Inferences. Do note, that Inferences which have already been "imported" by importers would be totally discarded as redundant. Also, in case of two inferences on same data point, the one with highest confidence will be taken into the database. Food for thought: Pssst... Probably Inference class is useless and Advisory class can itself have that confidence score, but then the importers would have to mention that whatever they import have 100% confidence which might be susceptible to typo errors making some importers not mention their confidence thus zeroing on confidence. Anyway, importer and improvers should be different and separated. If not, then we could totally discard the idea of improvers and embed everything in an importer with a confidence score. Well, then, where goes the idea of modularity and keeping things simple ? Also, data coming from an "import"er should always be absolutely correct. This will also ensure that if downstream doesn't want any "improved" data then they don't get our guesses. The whole point of separating importers and improvers is that running improvers could be totally optional and based on downstream taste. Signed-off-by: Hritik Vijay <[email protected]>
- Loading branch information
Showing
4 changed files
with
170 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
import dataclasses | ||
import logging | ||
from vulnerabilities.data_source import Advisory | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
class OverConfidenceError(ValueError): | ||
pass | ||
|
||
class UnderConfidenceError(ValueError): | ||
pass | ||
|
||
MAX_CONFIDENCE = 100 | ||
|
||
@dataclasses.dataclass(order=True) | ||
class Inference: | ||
""" | ||
This data class expresses the contract between data improvers and the improve runner. | ||
""" | ||
advisory: Advisory | ||
confidence: int | ||
|
||
def __post_init__(self): | ||
if self.confidence > MAX_CONFIDENCE: | ||
raise OverConfidenceError | ||
|
||
if self.confidence < 0: | ||
raise UnderConfidenceError |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
from datetime import datetime | ||
import dataclasses | ||
import logging | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
class ImproveRunner: | ||
""" | ||
The ImproveRunner is responsible to improve the already imported data by a datasource. | ||
Inferences regarding the data could be generated based on multiple factors. | ||
All the inferences consist of a confidence score whose threshold could be tuned in user | ||
settings (.env file) | ||
""" | ||
def __init__(self, improver): | ||
self.improver = improver | ||
|
||
def run(self) -> None: | ||
logger.info("Improving using %s.", self.improver.__module__) | ||
inferences = self.improver.updated_inferences() | ||
process_inferences(inferences) | ||
logger.info("Finished improving using %s.", self.improver.__module__) | ||
|
||
|
||
def process_inferences(inferences): | ||
... |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
IMPROVER_REGISTRY = [] | ||
|
||
def class_name(module_name: str): | ||
for improver in IMPROVER_REGISTRY: | ||
if improver.__module__ == module_name: | ||
return improver | ||
|
||
raise AttributeError |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
# | ||
# Copyright (c) nexB Inc. and others. All rights reserved. | ||
# http://nexb.com and https://github.com/nexB/vulnerablecode/ | ||
# The VulnerableCode software is licensed under the Apache License version 2.0. | ||
# Data generated with VulnerableCode require an acknowledgment. | ||
# | ||
# You may not use this software except in compliance with the License. | ||
# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0 | ||
# Unless required by applicable law or agreed to in writing, software distributed | ||
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR | ||
# CONDITIONS OF ANY KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations under the License. | ||
# | ||
# When you publish or redistribute any data created with VulnerableCode or any VulnerableCode | ||
# derivative work, you must accompany this data with the following acknowledgment: | ||
# | ||
# Generated with VulnerableCode and provided on an "AS IS" BASIS, WITHOUT WARRANTIES | ||
# OR CONDITIONS OF ANY KIND, either express or implied. No content created from | ||
# VulnerableCode should be considered or used as legal advice. Consult an Attorney | ||
# for any legal advice. | ||
# VulnerableCode is a free software code scanning tool from nexB Inc. and others. | ||
# Visit https://github.com/nexB/vulnerablecode/ for support and download. | ||
|
||
from datetime import datetime | ||
import traceback | ||
|
||
from django.core.management.base import BaseCommand | ||
from django.core.management.base import CommandError | ||
|
||
from vulnerabilities.models import Importer | ||
from vulnerabilities.import_runner import ImportRunner | ||
from vulnerabilities.importer_yielder import load_importers | ||
from vulnerabilities.improvers import IMPROVER_REGISTRY | ||
from vulnerabilities.improvers import class_name | ||
from vulnerabilities.improve_runner import ImproveRunner | ||
|
||
|
||
class Command(BaseCommand): | ||
help = "Improve imported vulnerability data" | ||
|
||
def add_arguments(self, parser): | ||
parser.add_argument( | ||
"--list", | ||
action="store_true", | ||
help="List available data inferences", | ||
) | ||
parser.add_argument( | ||
"--all", action="store_true", help="Improve data from all available inferences" | ||
) | ||
|
||
parser.add_argument("sources", nargs="*", help="Data sources from which to import") | ||
|
||
def handle(self, *args, **options): | ||
if options["list"]: | ||
self.list_sources() | ||
return | ||
|
||
if options["all"]: | ||
self.improve_data(IMPROVER_REGISTRY) | ||
return | ||
|
||
sources = options["sources"] | ||
if not sources: | ||
raise CommandError( | ||
'Please provide at least one data inference to improve from or use "--all".' | ||
) | ||
|
||
self.improve_data(valid_sources(sources)) | ||
|
||
def list_sources(self): | ||
improvers = [ improver.__module__ for improver in IMPROVER_REGISTRY ] | ||
self.stdout.write("Vulnerability data can be improved from the following sources:") | ||
self.stdout.write(", ".join(improvers)) | ||
|
||
def improve_data(self, improvers): | ||
failed_improvers = [] | ||
|
||
for improver in improvers: | ||
self.stdout.write(f"Improving data using {improver.__module__}") | ||
try: | ||
ImproveRunner(improver).run() | ||
self.stdout.write( | ||
self.style.SUCCESS(f"Successfully improved data using {improver.__module__}") | ||
) | ||
except Exception: | ||
failed_improvers.append(improver.__module__) | ||
traceback.print_exc() | ||
self.stdout.write( | ||
self.style.ERROR(f"Failed to run improver {improver.__module__}. Continuing...") | ||
) | ||
|
||
if failed_improvers: | ||
raise CommandError(f"{len(failed_improvers)} failed!: {','.join(failed_improvers)}") | ||
|
||
|
||
def valid_sources(sources): | ||
improvers = [] | ||
unknown_sources = [] | ||
for source in sources: | ||
try: | ||
improvers.append(class_name(source)) | ||
except AttributeError: | ||
unknown_sources.append(source) | ||
if unknown_sources: | ||
raise CommandError(f"Unknown sources: {unknown_sources}") | ||
|
||
return improvers | ||
|
||
|