diff --git a/cve_bin_tool/cli.py b/cve_bin_tool/cli.py index a92d3e39a2..ddd1a2b154 100644 --- a/cve_bin_tool/cli.py +++ b/cve_bin_tool/cli.py @@ -186,7 +186,7 @@ def main(argv=None): "-f", "--format", action="store", - choices=["csv", "json", "console", "html", "pdf"], + choices=["csv", "json", "console", "html", "pdf", "md"], help="update output format (default: console)", default="console", ) diff --git a/cve_bin_tool/output_engine/__init__.py b/cve_bin_tool/output_engine/__init__.py index 26ce074a29..8a03dded7f 100644 --- a/cve_bin_tool/output_engine/__init__.py +++ b/cve_bin_tool/output_engine/__init__.py @@ -17,6 +17,7 @@ from ..version import VERSION from .console import output_console from .html import output_html +from .threats import output_threats from .util import ( add_extension_if_not, format_output, @@ -390,6 +391,22 @@ def output_cves(self, outfile, output_type="console"): self.logger, outfile, ) + elif output_type == "md": + if self.filename.endswith("THREATS.md"): + output_threats( + self.all_cve_data, + self.scanned_dir, + self.filename, + self.themes_dir, + self.total_files, + self.products_with_cve, + self.products_without_cve, + self.merge_report, + self.logger, + outfile, + ) + else: + raise NotImplementedError("Only generating THREATS.md currently supported. Attempted output to {self.filename!r}.") else: # console, or anything else that is unrecognised output_console( self.all_cve_data, diff --git a/cve_bin_tool/output_engine/threats.py b/cve_bin_tool/output_engine/threats.py new file mode 100644 index 0000000000..3d76d49710 --- /dev/null +++ b/cve_bin_tool/output_engine/threats.py @@ -0,0 +1,143 @@ +# Copyright (C) 2021 Intel Corporation +# SPDX-License-Identifier: GPL-3.0-or-later + +import os +from typing import Dict, List, Union + + +from ..merge import MergeReports + +from ..log import LOGGER +from ..util import CVEData, ProductInfo + + +def output_threats( + all_cve_data: Dict[ProductInfo, CVEData], + scanned_dir: str, + filename: str, + theme_dir: str, + total_files: int, + products_with_cve: int, + products_without_cve: int, + merge_report: Union[None, MergeReports], + logger: LOGGER, + outfile, +): + """Returns a THREATS.md report including depedencies found""" + from pprint import pprint + pprint(locals()) + + import textwrap + outfile.write( + textwrap.dedent( + f""" + # Threat Model + """ + ) + ) + + # ------------------ BEGIN MERMAID OUTPUT ------------------ + outfile.write( + textwrap.dedent( + """ + + ```mermaid + """ + ) + ) + + # Write out the mermaid diagram + import sys + import asyncio + import contextlib + import dffml + import dffml.cli.dataflow + + + # TODO Check if dataflow extra is installed. Build dataflows from scan + # results. Generate mermaid daigrams from flows. + import cve_bin_tool.scanners.dataflow + + # The overlayed keyword arguements of fields within to be created + field_modifications = { + "dataflow": {"default_factory": lambda: cve_bin_tool.scanners.dataflow.COLLECTOR_DATAFLOW}, + "simple": {"default": True}, + "stages": {"default_factory": lambda: [dffml.Stage.PROCESSING.value]}, + } + # Create a derived class + DiagramForMyDataFlow = dffml.cli.dataflow.Diagram.subclass( + "DiagramForMyDataFlow", field_modifications, + ) + print(DiagramForMyDataFlow) + # + print(DiagramForMyDataFlow.CONFIG) + # + with contextlib.redirect_stdout(outfile): + asyncio.run(DiagramForMyDataFlow._main()) + + + outfile.write( + textwrap.dedent( + """ + ``` + """ + ) + ) + # ------------------ END MERMAID OUTPUT ------------------ + + # ------------------ BEGIN OPEN ARCHITECTURE OUTPUT ------------------ + outfile.write( + textwrap.dedent( + f""" + ```json + """ + ) + ) + + # Write out the mermaid diagram + import sys + import asyncio + import contextlib + import dffml + import dffml.cli.dataflow + import dffml.service.dev + + + import dffml_config_yaml.configloader + + + # TODO Check if dataflow extra is installed. Build dataflows from scan + # results. Generate mermaid daigrams from flows. + import cve_bin_tool.scanners.dataflow + + # The overlayed keyword arguements of fields within to be created + field_modifications = { + "export": {"default_factory": lambda: "cve_bin_tool.scanners.dataflow:COLLECTOR_DATAFLOW"}, + # "configloader": {"default_factory": lambda: dffml_config_yaml.configloader.YamlConfigLoader}, + "configloader": {"default_factory": lambda: dffml.JSONConfigLoader}, + } + + # Create a derived class + ExportForMyDataFlow = dffml.service.dev.Export.subclass( + "ExportForMyDataFlow", field_modifications, + ) + print(ExportForMyDataFlow) + # + print(ExportForMyDataFlow.CONFIG) + # + import io + a_out = io.StringIO() + a_out.buffer = io.BytesIO() + with contextlib.redirect_stdout(a_out): + asyncio.run(ExportForMyDataFlow._main()) + + import json + outfile.write(json.dumps(json.loads(a_out.buffer.getvalue().decode()), indent=4)) + outfile.write( + textwrap.dedent( + """ + ``` + """ + ) + ) + # ------------------ END OPEN ARCHITECTURE OUTPUT ------------------ diff --git a/cve_bin_tool/scanners/__init__.py b/cve_bin_tool/scanners/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/cve_bin_tool/scanners/dataflow.py b/cve_bin_tool/scanners/dataflow.py new file mode 100644 index 0000000000..63daebe833 --- /dev/null +++ b/cve_bin_tool/scanners/dataflow.py @@ -0,0 +1,77 @@ +""" +See doc/DATA_FLOW_SCANNER.rst for more information +""" +import sys +import asyncio +import pathlib +import platform +from typing import Dict, NewType + +import dffml + +import dffml_feature_git.feature.definitions +import dffml_feature_git.feature.operations + + +DirectoryToScan = NewType("DirectoryToScan", pathlib.Path) +ScanResults = NewType("ScanResults", dict) +InputOfUnknownType = NewType("InputOfUnknownType", str) + + +@dffml.op( + inputs={ + "repo": dffml_feature_git.feature.definitions.git_repository, + }, + outputs={ + "result": DirectoryToScan, + }, +) +async def repo_to_directory(repo): + return {"result": repo.directory} + + +@dffml.op +async def scan_directory( + directory: DirectoryToScan, +) -> ScanResults: + pass + + +@dffml.op +async def scan_directory( + arg: InputOfUnknownType, +) -> ScanResults: + pass + + +COLLECTOR_DATAFLOW = dffml.DataFlow( + *dffml.opimp_in(dffml_feature_git.feature.operations), + *dffml.opimp_in(sys.modules[__name__]), +) + +# CVEBinToolDataFlow = dffml.SystemContext( +# upstream=COLLECTOR_DATAFLOW, +# ) +# scanner = CVEBinToolDataFlow.deployment() + + +async def main(): + # async for results in scanner(): + async for _ctx, results in dffml.run( + COLLECTOR_DATAFLOW, + { + arg: [ + dffml.Input( + value=arg, + definition=dffml_feature_git.feature.definitions.URL, + # definition=InputOfUnknownType, + ), + ] + for arg in sys.argv[1:] + }, + ): + print(_ctx, results) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/doc/DATA_FLOW_SCANNER.rst b/doc/DATA_FLOW_SCANNER.rst new file mode 100644 index 0000000000..1e3270ced9 --- /dev/null +++ b/doc/DATA_FLOW_SCANNER.rst @@ -0,0 +1,71 @@ +Data Flow Based Scanner +####################### + +Implement multi language support in CVE Bin Tool via introduction of data flows +to handle scanning. We'll then extend via overlays to add functionality such as +shouldi were appropriate. + +References: + +- shouldi + + - https://intel.github.io/dffml/shouldi.html + - https://intel.github.io/dffml/examples/shouldi.html + - https://intel.github.io/dffml/examples/dataflows.html + +- Alice/Open Architecture + + - https://github.com/intel/dffml/discussions/1369 + +.. note:: + + Tested against development version of DFFML + 9ddcdfd6f8de743f87d41b74d53fde2c182861c7 + +Install +******* + +Install with extra ``dataflow`` to enable data flow based scanner. + + +.. code-block:: console + + $ python -m pip install cve-bin-tool[dataflow] + +Scan with Data Flow Orchestrator +******************************** + +Scan files as usual (alice is a shortname for the Open Architecture format). + +.. code-block:: console + + $ cve-bin-tool --scanner dataflow . + +Scan for Python Dependencies +**************************** + +Enable scanning of python dependencies. We overlay the ``shoudli`` scanning +flows to have them attempt to scan each directory which we come across while +scanning recursivly. + +.. code-block:: console + + $ cve-bin-tool --scanner dataflow --overlays cve_bin_tool.overlays.shouldi:OVERLAY -- . + +Output To Open Architecture Format +********************************** + +When running the scan we can output to the Open Architecture format (aka Alice). + +.. code-block:: console + + $ cve-bin-tool --scanner dataflow --format alice --output-file scan.alice . + +Supplement Threat Model with Scan Data +************************************** + +When running the scan we can output to the Open Architecture format (aka Alice). + +.. code-block:: console + + $ cve-bin-tool --input-file scan.alice --format md --output-file THREATS.md diff --git a/setup.py b/setup.py index 2cfd8021cc..791d7ccf60 100644 --- a/setup.py +++ b/setup.py @@ -47,6 +47,7 @@ install_requires=requirements, extras_require={ "PDF": ["reportlab"], + "dataflow": ["dffml", "dffml-feature-git"], }, packages=find_packages( exclude=["locales", "presentation"],