External tools support (#254)

* Ext scan support * Added test * Update readme
ShiftLeftSecurity · Dec 11, 2020 · 4553b19 · 4553b19
1 parent 044bc16
commit 4553b19
Show file tree

Hide file tree

Showing 9 changed files with 596 additions and 60 deletions.
diff --git a/README.md b/README.md
@@ -20,37 +20,38 @@
 
 ## Bundled tools
 
-| Programming Language | Tools                               |
-| -------------------- | ----------------------------------- |
-| ansible              | ansible-lint                        |
-| apex                 | pmd                                 |
-| arm                  | checkov                             |
-| aws                  | checkov                             |
-| bash                 | shellcheck                          |
-| bom                  | cdxgen                              |
-| credscan             | gitleaks                            |
-| depscan              | dep-scan                            |
-| go                   | gosec, staticcheck                  |
-| groovy               | find-sec-bugs                       |
-| java                 | cdxgen, gradle, find-sec-bugs, pmd  |
-| jsp                  | pmd, find-sec-bugs                  |
-| json                 | jq, jsondiff, jsonschema            |
-| kotlin               | detekt, find-sec-bugs               |
-| scala                | find-sec-bugs                       |
-| kubernetes           | checkov, kubesec, kube-score        |
-| nodejs               | cdxgen, yarn, rush                  |
-| php                  | psalm, phpstan (ide only)           |
-| plsql                | pmd                                 |
-| python               | cfg-scan (\*), bandit, cdxgen       |
-| ruby                 | dep-scan                            |
-| rust                 | cdxgen                              |
-| serverless           | checkov                             |
-| terraform            | checkov, tfsec                      |
-| Visual Force (vf)    | pmd                                 |
-| Apache Velocity (vm) | pmd                                 |
-| yaml                 | yamllint                            |
-
-(\*) - Deep analyzer for Python is a built-in feature
+| Programming Language | Tools                              |
+| -------------------- | ---------------------------------- |
+| ansible              | ansible-lint                       |
+| apex                 | pmd                                |
+| arm                  | checkov                            |
+| aws                  | checkov                            |
+| bash                 | shellcheck                         |
+| bom                  | cdxgen                             |
+| credscan             | gitleaks                           |
+| depscan              | dep-scan                           |
+| go                   | gosec, staticcheck                 |
+| groovy               | find-sec-bugs                      |
+| java                 | cdxgen, gradle, find-sec-bugs, pmd |
+| jsp                  | pmd, find-sec-bugs                 |
+| json                 | jq, jsondiff, jsonschema           |
+| kotlin               | detekt, find-sec-bugs              |
+| scala                | find-sec-bugs                      |
+| kubernetes           | checkov, kubesec, kube-score       |
+| nodejs               | cdxgen, yarn, rush                 |
+| php                  | psalm, phpstan (ide only)          |
+| plsql                | pmd                                |
+| python               | cfg-scan (1), bandit, cdxgen       |
+| ruby                 | brakeman (2), dep-scan             |
+| rust                 | cdxgen                             |
+| serverless           | checkov                            |
+| terraform            | checkov, tfsec                     |
+| Visual Force (vf)    | pmd                                |
+| Apache Velocity (vm) | pmd                                |
+| yaml                 | yamllint                           |
+
+(1) - Deep analyzer for Python is a built-in feature
+(2) - Brakeman is not bundled with scan. Use brakeman with an appropriate license and export the report in json format using `-o reports/source-ruby-report.json`
 
 ## Bundled languages/runtime
 

diff --git a/lib/analysis.py b/lib/analysis.py
@@ -183,7 +183,12 @@ def summary(sarif_files, depscan_files=None, aggregate_file=None, override_rules
                     report_summary[tool_name].pop("total", None)
                 else:
                     for aresult in results:
-                        sev = aresult["properties"]["issue_severity"].lower()
+                        if aresult.get("properties"):
+                            sev = aresult["properties"]["issue_severity"].lower()
+                        else:
+                            sev = config.get("exttool_default_severity").get(
+                                tool_name.lower(), "medium"
+                            )
                         report_summary[tool_name][sev] += 1
                 # Compare against the build break rule to determine status
                 tool_rules = config.get("build_break_rules").get(tool_name, {})

diff --git a/lib/config.py b/lib/config.py
@@ -525,6 +525,19 @@ def set(configName, value):
         ],
     },
     "puppet": ["puppet-lint", "--error-level", "all", "--json", "%(src)s"],
+    "ruby-ide": {
+        "source-ruby": [
+            "brakeman",
+            "--skip-libs",
+            "--no-exit-on-warn",
+            "--no-exit-on-error",
+            "-w",
+            "2",
+            "--ignore-protected",
+            "-o",
+            "%(report_fname_prefix)s.json",
+        ]
+    },
     "scala": {
         "audit-scala": [
             "java",
@@ -758,6 +771,7 @@ def set(configName, value):
     "cpg": "ShiftLeft NextGen Analyzer",
     "inspect": "ShiftLeft NextGen Analyzer",
     "ng-sast": "ShiftLeft NextGen Analyzer",
+    "source-ruby": "Ruby Source Analyzer",
     "empty-scan": "Empty Scan Ignore",
 }
 
@@ -1272,6 +1286,9 @@ def __hash__(self):
     "snyk-bot",
 ]
 
+# Default severity for external tools incase the SARIF file is missing severity
+exttool_default_severity = {"brakeman": "medium"}
+
 
 def reload():
     # Load any .sastscanrc file from the root

diff --git a/lib/convert.py b/lib/convert.py
@@ -306,6 +306,9 @@ def extract_from_file(
                         issues += rd.get("results", {}).get("failed_checks")
                 else:
                     issues = report_data.get("results", {}).get("failed_checks")
+            elif tool_name == "source-ruby":
+                issues = report_data.get("warnings", [])
+                issues += report_data.get("errors", [])
             elif isinstance(report_data, list):
                 issues = report_data
             else:
@@ -446,7 +449,6 @@ def report(
     repo_details = find_repo_details(working_dir)
     log_uuid = str(uuid.uuid4())
     run_uuid = config.get("run_uuid")
-
     # Populate metrics
     metrics = {
         "total": 0,

diff --git a/lib/inspect.py b/lib/inspect.py
@@ -367,7 +367,7 @@ def convert_sarif(app_name, repo_context, sarif_files, findings_fname):
                         if not short_desc:
                             short_desc = result.get("message", {}).get("text")
                         ngsev = convert_severity(
-                            result.get("properties", {})["issue_severity"]
+                            result.get("properties", {}).get("issue_severity", "medium")
                         )
                         # Populate tags
                         tags = []
@@ -405,9 +405,11 @@ def convert_sarif(app_name, repo_context, sarif_files, findings_fname):
                             lineno = location.get("physicalLocation", {})["region"][
                                 "startLine"
                             ]
-                            end_lineno = location.get("physicalLocation", {})[
-                                "contextRegion"
-                            ]["endLine"]
+                            end_lineno = (
+                                location.get("physicalLocation", {})
+                                .get("contextRegion", {})
+                                .get("endLine")
+                            )
                             finding = {
                                 "app": app_name,
                                 "type": "extscan",
@@ -419,9 +421,10 @@ def convert_sarif(app_name, repo_context, sarif_files, findings_fname):
                                         filename,
                                         lineno,
                                         end_lineno,
-                                        location.get("physicalLocation", {})["region"][
-                                            "snippet"
-                                        ]["text"],
+                                        location.get("physicalLocation", {})
+                                        .get("region", {})
+                                        .get("snippet", {})
+                                        .get("text", ""),
                                         short_desc,
                                     ),
                                 ),
@@ -437,12 +440,14 @@ def convert_sarif(app_name, repo_context, sarif_files, findings_fname):
                                     "lineNumber": lineno,
                                     "ruleId": rule_id,
                                     "ruleName": rule.get("name"),
-                                    "contextText": location.get("physicalLocation", {})[
-                                        "region"
-                                    ]["snippet"]["text"],
-                                    "snippetText": location.get("physicalLocation", {})[
-                                        "contextRegion"
-                                    ]["snippet"]["text"],
+                                    "contextText": location.get("physicalLocation", {})
+                                    .get("region", {})
+                                    .get("snippet", {})
+                                    .get("text", ""),
+                                    "snippetText": location.get("physicalLocation", {})
+                                    .get("contextRegion", {})
+                                    .get("snippet", {})
+                                    .get("text", ""),
                                 },
                                 "tags": tags,
                             }

diff --git a/lib/issue.py b/lib/issue.py
@@ -128,7 +128,6 @@ def get_code(self, max_lines=config.get("CODE_SNIPPET_MAX_LINES"), tabbed=False)
             tmplt = "%i\t%s" if tabbed else "%i %s"
             for line in moves.xrange(lmin, lmax):
                 text = linecache.getline(self.fname, line)
-
                 if isinstance(text, bytes):
                     text = text.decode("utf-8")
 
@@ -240,6 +239,8 @@ def norm_severity(self, severity):
 
     def find_severity(self, data):
         severity = constants.SEVERITY_DEFAULT
+        if "confidence" in data:
+            severity = data["confidence"].upper()
         if "issue_severity" in data or "priority" in data:
             sev = data.get("issue_severity", data.get("priority"))
             severity = sev
@@ -290,6 +291,8 @@ def get_test_id(self, data):
             test_id = data["test_id"]
         if "rule_id" in data:
             test_id = data["rule_id"]
+        if "check_name" in data:
+            test_id = data["check_name"]
         if "check_id" in data:
             test_id = data["check_id"]
         if "tag" in data:
@@ -323,7 +326,7 @@ def from_dict(self, data, with_code=True):
         :param data: Data dictionary from the tools
         :param with_code: Boolean indicating if code snippet should get added
         """
-        if "code" in data:
+        if "code" in data and data.get("code"):
             if str(data["code"]).isdigit():
                 self.test_id = str(data["code"])
             elif len(data.get("code").split()) > 1:
@@ -336,9 +339,13 @@ def from_dict(self, data, with_code=True):
             self.fname = data["filename"]
         if "fileName" in data:
             self.fname = data["fileName"]
-        if "location" in data and "filename" in data["location"]:
+        if (
+            "location" in data
+            and data.get("location")
+            and "filename" in data["location"]
+        ):
             self.fname = data["location"]["filename"]
-        if "location" in data and "file" in data["location"]:
+        if "location" in data and data.get("location") and "file" in data["location"]:
             self.fname = data["location"]["file"]
         if "file" in data:
             self.fname = data["file"]
@@ -348,13 +355,15 @@ def from_dict(self, data, with_code=True):
             self.fname = data["file_path"]
         self.severity = self.find_severity(data)
         if "issue_confidence" in data:
-            self.confidence = data["issue_confidence"]
+            self.confidence = data["issue_confidence"].upper()
         if "confidence" in data:
-            self.confidence = data["confidence"]
+            self.confidence = data["confidence"].upper()
         if "issue_text" in data:
             self.text = data["issue_text"]
         if "title" in data:
             self.text = data["title"]
+        if "warning_type" in data:
+            self.test = data["warning_type"]
         if "commitMessage" in data and "commit" in data:
             if data.get("commitMessage") == "***STAGED CHANGES***":
                 self.text = "Credential in plaintext?\n\nRule: {}, Secret: {}".format(
@@ -400,7 +409,7 @@ def from_dict(self, data, with_code=True):
                 self.test = data["message"].replace("\\", " \\ ")
             else:
                 self.test = data["type"]
-        if "check_name" in data:
+        if "check_name" in data and "check_id" in data:
             self.text = data["check_name"]
             self.severity = "HIGH"
             self.confidence = "HIGH"

diff --git a/scan b/scan
@@ -186,7 +186,7 @@ def scan_project_types(
                 if dfn:
                     pool.apply_async(dfn, (src, reports_dir, convert, repo_context))
                 else:
-                    x_scan(type_str)
+                    x_scan(type_str, src, reports_dir, convert, repo_context)
             except Exception as e:
                 LOG.debug(e)
                 LOG.warning(
@@ -227,13 +227,38 @@ def scan(type_list, src, reports_dir, convert, scan_mode, repo_context):
             pool.join()
 
 
-def x_scan(type_str):
-    """Default placeholder scan method for missing scanners"""
-    LOG.info(
-        "Is there any open-source scanner for {}? Please let us know :thumbsup:".format(
-            type_str
-        )
+def x_scan(type_str, src, reports_dir, convert, repo_context):
+    """
+    Default placeholder scan method for missing scanners
+
+    Args:
+      type_str Project type
+      src Project dir
+      reports_dir Directory for output reports
+      convert Boolean to enable normalisation of reports json
+      repo_context Repo context
+    """
+    report_fname = utils.get_report_file(
+        f"source-{type_str}", reports_dir, convert, ext_name="json"
     )
+    crep_fname = utils.get_report_file(
+        f"source-{type_str}", reports_dir, convert, ext_name="sarif"
+    )
+    # If there is an existing report available simply use it
+    if os.path.exists(crep_fname):
+        LOG.info(f"Found an existing SARIF report at {crep_fname} :thumbsup:")
+    elif os.path.exists(report_fname) and convert:
+        convertLib.convert_file(
+            f"source-{type_str}",
+            [],
+            src,
+            report_fname,
+            crep_fname,
+        )
+    else:
+        LOG.info(
+            f"Is there any open-source scanner for {type_str}? Please let us know :thumbsup:"
+        )
 
 
 def python_scan(src, reports_dir, convert, repo_context):