From 3eaa2ab3854decc1d4be62a13871bf609cdcd890 Mon Sep 17 00:00:00 2001 From: John Andersen Date: Fri, 10 Jun 2022 05:32:50 -0700 Subject: [PATCH 1/2] scanners: Add data flow based scanning For multi lanaguge support. Current supports Python via inclusion of shoudli flows. Below is the command used for testing: .. code-block:: console $ nodemon -e py --exec 'clear; rm /tmp/tmp.ahAs38AoDJ/THREATS.md; cve-bin-tool --update never --format md --output-file /tmp/tmp.ahAs38AoDJ/THREATS.md /tmp/tmp.ahAs38AoDJ; test 1' Signed-off-by: John Andersen --- cve_bin_tool/scanners/__init__.py | 0 cve_bin_tool/scanners/dataflow.py | 77 +++++++++++++++++++++++++++++++ doc/DATA_FLOW_SCANNER.rst | 45 ++++++++++++++++++ setup.py | 1 + 4 files changed, 123 insertions(+) create mode 100644 cve_bin_tool/scanners/__init__.py create mode 100644 cve_bin_tool/scanners/dataflow.py create mode 100644 doc/DATA_FLOW_SCANNER.rst diff --git a/cve_bin_tool/scanners/__init__.py b/cve_bin_tool/scanners/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/cve_bin_tool/scanners/dataflow.py b/cve_bin_tool/scanners/dataflow.py new file mode 100644 index 0000000000..63daebe833 --- /dev/null +++ b/cve_bin_tool/scanners/dataflow.py @@ -0,0 +1,77 @@ +""" +See doc/DATA_FLOW_SCANNER.rst for more information +""" +import sys +import asyncio +import pathlib +import platform +from typing import Dict, NewType + +import dffml + +import dffml_feature_git.feature.definitions +import dffml_feature_git.feature.operations + + +DirectoryToScan = NewType("DirectoryToScan", pathlib.Path) +ScanResults = NewType("ScanResults", dict) +InputOfUnknownType = NewType("InputOfUnknownType", str) + + +@dffml.op( + inputs={ + "repo": dffml_feature_git.feature.definitions.git_repository, + }, + outputs={ + "result": DirectoryToScan, + }, +) +async def repo_to_directory(repo): + return {"result": repo.directory} + + +@dffml.op +async def scan_directory( + directory: DirectoryToScan, +) -> ScanResults: + pass + + +@dffml.op +async def scan_directory( + arg: InputOfUnknownType, +) -> ScanResults: + pass + + +COLLECTOR_DATAFLOW = dffml.DataFlow( + *dffml.opimp_in(dffml_feature_git.feature.operations), + *dffml.opimp_in(sys.modules[__name__]), +) + +# CVEBinToolDataFlow = dffml.SystemContext( +# upstream=COLLECTOR_DATAFLOW, +# ) +# scanner = CVEBinToolDataFlow.deployment() + + +async def main(): + # async for results in scanner(): + async for _ctx, results in dffml.run( + COLLECTOR_DATAFLOW, + { + arg: [ + dffml.Input( + value=arg, + definition=dffml_feature_git.feature.definitions.URL, + # definition=InputOfUnknownType, + ), + ] + for arg in sys.argv[1:] + }, + ): + print(_ctx, results) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/doc/DATA_FLOW_SCANNER.rst b/doc/DATA_FLOW_SCANNER.rst new file mode 100644 index 0000000000..a963e2e58e --- /dev/null +++ b/doc/DATA_FLOW_SCANNER.rst @@ -0,0 +1,45 @@ +Data Flow Based Scanner +####################### + +Implement multi language support in CVE Bin Tool via introduction of data flows +to handle scanning. We'll then extend via overlays to add functionality such as +shouldi were appropriate. + +References: + +- https://intel.github.io/dffml/shouldi.html +- https://intel.github.io/dffml/examples/shouldi.html +- https://intel.github.io/dffml/examples/dataflows.html + +.. note:: + + Tested against development version of DFFML + 9ddcdfd6f8de743f87d41b74d53fde2c182861c7 + + +Install +******* + +Install with extra + +.. code-block:: console + + $ python -m pip install cve-bin-tool[dataflow] + +Examples +******** + +Scan files as usual + +.. code-block:: console + :test: + + $ python -m cve_bin_tool.scanners.dataflow . + +Scan a git repo. Currently runs ``shoudli`` scanning. (In future we can add +overlays to run the build then scan). + +.. code-block:: console + :test: + + $ python -m cve_bin_tool.scanners.dataflow https://github.com/intel/cve-bin-tool diff --git a/setup.py b/setup.py index 2cfd8021cc..791d7ccf60 100644 --- a/setup.py +++ b/setup.py @@ -47,6 +47,7 @@ install_requires=requirements, extras_require={ "PDF": ["reportlab"], + "dataflow": ["dffml", "dffml-feature-git"], }, packages=find_packages( exclude=["locales", "presentation"], From 7cb90d6009d047dfc08dead28110f2314d8c016a Mon Sep 17 00:00:00 2001 From: John Andersen Date: Fri, 10 Jun 2022 09:53:23 -0700 Subject: [PATCH 2/2] output engine: threats: Generate THREATS.md file Signed-off-by: John Andersen --- cve_bin_tool/cli.py | 2 +- cve_bin_tool/output_engine/__init__.py | 17 +++ cve_bin_tool/output_engine/threats.py | 143 +++++++++++++++++++++++++ doc/DATA_FLOW_SCANNER.rst | 56 +++++++--- 4 files changed, 202 insertions(+), 16 deletions(-) create mode 100644 cve_bin_tool/output_engine/threats.py diff --git a/cve_bin_tool/cli.py b/cve_bin_tool/cli.py index a92d3e39a2..ddd1a2b154 100644 --- a/cve_bin_tool/cli.py +++ b/cve_bin_tool/cli.py @@ -186,7 +186,7 @@ def main(argv=None): "-f", "--format", action="store", - choices=["csv", "json", "console", "html", "pdf"], + choices=["csv", "json", "console", "html", "pdf", "md"], help="update output format (default: console)", default="console", ) diff --git a/cve_bin_tool/output_engine/__init__.py b/cve_bin_tool/output_engine/__init__.py index 26ce074a29..8a03dded7f 100644 --- a/cve_bin_tool/output_engine/__init__.py +++ b/cve_bin_tool/output_engine/__init__.py @@ -17,6 +17,7 @@ from ..version import VERSION from .console import output_console from .html import output_html +from .threats import output_threats from .util import ( add_extension_if_not, format_output, @@ -390,6 +391,22 @@ def output_cves(self, outfile, output_type="console"): self.logger, outfile, ) + elif output_type == "md": + if self.filename.endswith("THREATS.md"): + output_threats( + self.all_cve_data, + self.scanned_dir, + self.filename, + self.themes_dir, + self.total_files, + self.products_with_cve, + self.products_without_cve, + self.merge_report, + self.logger, + outfile, + ) + else: + raise NotImplementedError("Only generating THREATS.md currently supported. Attempted output to {self.filename!r}.") else: # console, or anything else that is unrecognised output_console( self.all_cve_data, diff --git a/cve_bin_tool/output_engine/threats.py b/cve_bin_tool/output_engine/threats.py new file mode 100644 index 0000000000..3d76d49710 --- /dev/null +++ b/cve_bin_tool/output_engine/threats.py @@ -0,0 +1,143 @@ +# Copyright (C) 2021 Intel Corporation +# SPDX-License-Identifier: GPL-3.0-or-later + +import os +from typing import Dict, List, Union + + +from ..merge import MergeReports + +from ..log import LOGGER +from ..util import CVEData, ProductInfo + + +def output_threats( + all_cve_data: Dict[ProductInfo, CVEData], + scanned_dir: str, + filename: str, + theme_dir: str, + total_files: int, + products_with_cve: int, + products_without_cve: int, + merge_report: Union[None, MergeReports], + logger: LOGGER, + outfile, +): + """Returns a THREATS.md report including depedencies found""" + from pprint import pprint + pprint(locals()) + + import textwrap + outfile.write( + textwrap.dedent( + f""" + # Threat Model + """ + ) + ) + + # ------------------ BEGIN MERMAID OUTPUT ------------------ + outfile.write( + textwrap.dedent( + """ + + ```mermaid + """ + ) + ) + + # Write out the mermaid diagram + import sys + import asyncio + import contextlib + import dffml + import dffml.cli.dataflow + + + # TODO Check if dataflow extra is installed. Build dataflows from scan + # results. Generate mermaid daigrams from flows. + import cve_bin_tool.scanners.dataflow + + # The overlayed keyword arguements of fields within to be created + field_modifications = { + "dataflow": {"default_factory": lambda: cve_bin_tool.scanners.dataflow.COLLECTOR_DATAFLOW}, + "simple": {"default": True}, + "stages": {"default_factory": lambda: [dffml.Stage.PROCESSING.value]}, + } + # Create a derived class + DiagramForMyDataFlow = dffml.cli.dataflow.Diagram.subclass( + "DiagramForMyDataFlow", field_modifications, + ) + print(DiagramForMyDataFlow) + # + print(DiagramForMyDataFlow.CONFIG) + # + with contextlib.redirect_stdout(outfile): + asyncio.run(DiagramForMyDataFlow._main()) + + + outfile.write( + textwrap.dedent( + """ + ``` + """ + ) + ) + # ------------------ END MERMAID OUTPUT ------------------ + + # ------------------ BEGIN OPEN ARCHITECTURE OUTPUT ------------------ + outfile.write( + textwrap.dedent( + f""" + ```json + """ + ) + ) + + # Write out the mermaid diagram + import sys + import asyncio + import contextlib + import dffml + import dffml.cli.dataflow + import dffml.service.dev + + + import dffml_config_yaml.configloader + + + # TODO Check if dataflow extra is installed. Build dataflows from scan + # results. Generate mermaid daigrams from flows. + import cve_bin_tool.scanners.dataflow + + # The overlayed keyword arguements of fields within to be created + field_modifications = { + "export": {"default_factory": lambda: "cve_bin_tool.scanners.dataflow:COLLECTOR_DATAFLOW"}, + # "configloader": {"default_factory": lambda: dffml_config_yaml.configloader.YamlConfigLoader}, + "configloader": {"default_factory": lambda: dffml.JSONConfigLoader}, + } + + # Create a derived class + ExportForMyDataFlow = dffml.service.dev.Export.subclass( + "ExportForMyDataFlow", field_modifications, + ) + print(ExportForMyDataFlow) + # + print(ExportForMyDataFlow.CONFIG) + # + import io + a_out = io.StringIO() + a_out.buffer = io.BytesIO() + with contextlib.redirect_stdout(a_out): + asyncio.run(ExportForMyDataFlow._main()) + + import json + outfile.write(json.dumps(json.loads(a_out.buffer.getvalue().decode()), indent=4)) + outfile.write( + textwrap.dedent( + """ + ``` + """ + ) + ) + # ------------------ END OPEN ARCHITECTURE OUTPUT ------------------ diff --git a/doc/DATA_FLOW_SCANNER.rst b/doc/DATA_FLOW_SCANNER.rst index a963e2e58e..1e3270ced9 100644 --- a/doc/DATA_FLOW_SCANNER.rst +++ b/doc/DATA_FLOW_SCANNER.rst @@ -7,39 +7,65 @@ shouldi were appropriate. References: -- https://intel.github.io/dffml/shouldi.html -- https://intel.github.io/dffml/examples/shouldi.html -- https://intel.github.io/dffml/examples/dataflows.html +- shouldi + + - https://intel.github.io/dffml/shouldi.html + - https://intel.github.io/dffml/examples/shouldi.html + - https://intel.github.io/dffml/examples/dataflows.html + +- Alice/Open Architecture + + - https://github.com/intel/dffml/discussions/1369 .. note:: Tested against development version of DFFML 9ddcdfd6f8de743f87d41b74d53fde2c182861c7 - Install ******* -Install with extra +Install with extra ``dataflow`` to enable data flow based scanner. + + +.. code-block:: console + + $ python -m pip install cve-bin-tool[dataflow] + +Scan with Data Flow Orchestrator +******************************** + +Scan files as usual (alice is a shortname for the Open Architecture format). + +.. code-block:: console + + $ cve-bin-tool --scanner dataflow . + +Scan for Python Dependencies +**************************** + +Enable scanning of python dependencies. We overlay the ``shoudli`` scanning +flows to have them attempt to scan each directory which we come across while +scanning recursivly. .. code-block:: console - $ python -m pip install cve-bin-tool[dataflow] + $ cve-bin-tool --scanner dataflow --overlays cve_bin_tool.overlays.shouldi:OVERLAY -- . -Examples -******** +Output To Open Architecture Format +********************************** -Scan files as usual +When running the scan we can output to the Open Architecture format (aka Alice). .. code-block:: console - :test: - $ python -m cve_bin_tool.scanners.dataflow . + $ cve-bin-tool --scanner dataflow --format alice --output-file scan.alice . + +Supplement Threat Model with Scan Data +************************************** -Scan a git repo. Currently runs ``shoudli`` scanning. (In future we can add -overlays to run the build then scan). +When running the scan we can output to the Open Architecture format (aka Alice). .. code-block:: console - :test: - $ python -m cve_bin_tool.scanners.dataflow https://github.com/intel/cve-bin-tool + $ cve-bin-tool --input-file scan.alice --format md --output-file THREATS.md