Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

scanners: Add data flow based scanning for multi language support #1698

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cve_bin_tool/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ def main(argv=None):
"-f",
"--format",
action="store",
choices=["csv", "json", "console", "html", "pdf"],
choices=["csv", "json", "console", "html", "pdf", "md"],
help="update output format (default: console)",
default="console",
)
Expand Down
17 changes: 17 additions & 0 deletions cve_bin_tool/output_engine/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from ..version import VERSION
from .console import output_console
from .html import output_html
from .threats import output_threats
from .util import (
add_extension_if_not,
format_output,
Expand Down Expand Up @@ -390,6 +391,22 @@ def output_cves(self, outfile, output_type="console"):
self.logger,
outfile,
)
elif output_type == "md":
if self.filename.endswith("THREATS.md"):
output_threats(
self.all_cve_data,
self.scanned_dir,
self.filename,
self.themes_dir,
self.total_files,
self.products_with_cve,
self.products_without_cve,
self.merge_report,
self.logger,
outfile,
)
else:
raise NotImplementedError("Only generating THREATS.md currently supported. Attempted output to {self.filename!r}.")
else: # console, or anything else that is unrecognised
output_console(
self.all_cve_data,
Expand Down
143 changes: 143 additions & 0 deletions cve_bin_tool/output_engine/threats.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
# Copyright (C) 2021 Intel Corporation
# SPDX-License-Identifier: GPL-3.0-or-later

import os
from typing import Dict, List, Union


from ..merge import MergeReports

from ..log import LOGGER
from ..util import CVEData, ProductInfo


def output_threats(
all_cve_data: Dict[ProductInfo, CVEData],
scanned_dir: str,
filename: str,
theme_dir: str,
total_files: int,
products_with_cve: int,
products_without_cve: int,
merge_report: Union[None, MergeReports],
logger: LOGGER,
outfile,
):
"""Returns a THREATS.md report including depedencies found"""
from pprint import pprint
pprint(locals())

import textwrap
outfile.write(
textwrap.dedent(
f"""
# Threat Model
"""
)
)

# ------------------ BEGIN MERMAID OUTPUT ------------------
outfile.write(
textwrap.dedent(
"""

```mermaid
"""
)
)

# Write out the mermaid diagram
import sys
import asyncio
import contextlib
import dffml
import dffml.cli.dataflow


# TODO Check if dataflow extra is installed. Build dataflows from scan
# results. Generate mermaid daigrams from flows.
import cve_bin_tool.scanners.dataflow

# The overlayed keyword arguements of fields within to be created
field_modifications = {
"dataflow": {"default_factory": lambda: cve_bin_tool.scanners.dataflow.COLLECTOR_DATAFLOW},
"simple": {"default": True},
"stages": {"default_factory": lambda: [dffml.Stage.PROCESSING.value]},
}
# Create a derived class
DiagramForMyDataFlow = dffml.cli.dataflow.Diagram.subclass(
"DiagramForMyDataFlow", field_modifications,
)
print(DiagramForMyDataFlow)
# <class 'dffml.util.cli.cmd.DiagramForMyDataFlow'>
print(DiagramForMyDataFlow.CONFIG)
# <class 'types.DiagramForMyDataFlowConfig'>
with contextlib.redirect_stdout(outfile):
asyncio.run(DiagramForMyDataFlow._main())


outfile.write(
textwrap.dedent(
"""
```
"""
)
)
# ------------------ END MERMAID OUTPUT ------------------

# ------------------ BEGIN OPEN ARCHITECTURE OUTPUT ------------------
outfile.write(
textwrap.dedent(
f"""
```json
"""
)
)

# Write out the mermaid diagram
import sys
import asyncio
import contextlib
import dffml
import dffml.cli.dataflow
import dffml.service.dev


import dffml_config_yaml.configloader


# TODO Check if dataflow extra is installed. Build dataflows from scan
# results. Generate mermaid daigrams from flows.
import cve_bin_tool.scanners.dataflow

# The overlayed keyword arguements of fields within to be created
field_modifications = {
"export": {"default_factory": lambda: "cve_bin_tool.scanners.dataflow:COLLECTOR_DATAFLOW"},
# "configloader": {"default_factory": lambda: dffml_config_yaml.configloader.YamlConfigLoader},
"configloader": {"default_factory": lambda: dffml.JSONConfigLoader},
}

# Create a derived class
ExportForMyDataFlow = dffml.service.dev.Export.subclass(
"ExportForMyDataFlow", field_modifications,
)
print(ExportForMyDataFlow)
# <class 'dffml.util.cli.cmd.ExportForMyDataFlow'>
print(ExportForMyDataFlow.CONFIG)
# <class 'types.ExportForMyDataFlowConfig'>
import io
a_out = io.StringIO()
a_out.buffer = io.BytesIO()
with contextlib.redirect_stdout(a_out):
asyncio.run(ExportForMyDataFlow._main())

import json
outfile.write(json.dumps(json.loads(a_out.buffer.getvalue().decode()), indent=4))
outfile.write(
textwrap.dedent(
"""
```
"""
)
)
# ------------------ END OPEN ARCHITECTURE OUTPUT ------------------
Empty file.
77 changes: 77 additions & 0 deletions cve_bin_tool/scanners/dataflow.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
"""
See doc/DATA_FLOW_SCANNER.rst for more information
"""
import sys
import asyncio
import pathlib
import platform
from typing import Dict, NewType

import dffml

import dffml_feature_git.feature.definitions
import dffml_feature_git.feature.operations


DirectoryToScan = NewType("DirectoryToScan", pathlib.Path)
ScanResults = NewType("ScanResults", dict)
InputOfUnknownType = NewType("InputOfUnknownType", str)


@dffml.op(
inputs={
"repo": dffml_feature_git.feature.definitions.git_repository,
},
outputs={
"result": DirectoryToScan,
},
)
async def repo_to_directory(repo):
return {"result": repo.directory}


@dffml.op
async def scan_directory(
directory: DirectoryToScan,
) -> ScanResults:
pass


@dffml.op
async def scan_directory(
arg: InputOfUnknownType,
) -> ScanResults:
pass


COLLECTOR_DATAFLOW = dffml.DataFlow(
*dffml.opimp_in(dffml_feature_git.feature.operations),
*dffml.opimp_in(sys.modules[__name__]),
)

# CVEBinToolDataFlow = dffml.SystemContext(
# upstream=COLLECTOR_DATAFLOW,
# )
# scanner = CVEBinToolDataFlow.deployment()


async def main():
# async for results in scanner():
async for _ctx, results in dffml.run(
COLLECTOR_DATAFLOW,
{
arg: [
dffml.Input(
value=arg,
definition=dffml_feature_git.feature.definitions.URL,
# definition=InputOfUnknownType,
),
]
for arg in sys.argv[1:]
},
):
print(_ctx, results)


if __name__ == "__main__":
asyncio.run(main())
71 changes: 71 additions & 0 deletions doc/DATA_FLOW_SCANNER.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
Data Flow Based Scanner
#######################

Implement multi language support in CVE Bin Tool via introduction of data flows
to handle scanning. We'll then extend via overlays to add functionality such as
shouldi were appropriate.

References:

- shouldi

- https://intel.github.io/dffml/shouldi.html
- https://intel.github.io/dffml/examples/shouldi.html
- https://intel.github.io/dffml/examples/dataflows.html

- Alice/Open Architecture

- https://github.com/intel/dffml/discussions/1369

.. note::

Tested against development version of DFFML
9ddcdfd6f8de743f87d41b74d53fde2c182861c7

Install
*******

Install with extra ``dataflow`` to enable data flow based scanner.


.. code-block:: console

$ python -m pip install cve-bin-tool[dataflow]

Scan with Data Flow Orchestrator
********************************

Scan files as usual (alice is a shortname for the Open Architecture format).

.. code-block:: console

$ cve-bin-tool --scanner dataflow .

Scan for Python Dependencies
****************************

Enable scanning of python dependencies. We overlay the ``shoudli`` scanning
flows to have them attempt to scan each directory which we come across while
scanning recursivly.

.. code-block:: console

$ cve-bin-tool --scanner dataflow --overlays cve_bin_tool.overlays.shouldi:OVERLAY -- .

Output To Open Architecture Format
**********************************

When running the scan we can output to the Open Architecture format (aka Alice).

.. code-block:: console

$ cve-bin-tool --scanner dataflow --format alice --output-file scan.alice .

Supplement Threat Model with Scan Data
**************************************

When running the scan we can output to the Open Architecture format (aka Alice).

.. code-block:: console

$ cve-bin-tool --input-file scan.alice --format md --output-file THREATS.md
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
install_requires=requirements,
extras_require={
"PDF": ["reportlab"],
"dataflow": ["dffml", "dffml-feature-git"],
},
packages=find_packages(
exclude=["locales", "presentation"],
Expand Down