Skip to content

Commit

Permalink
Semgrep SCA - add findings published date and fix status (cartography…
Browse files Browse the repository at this point in the history
…-cncf#1281)

The triage information from response
https://semgrep.dev/api/docs#/SupplyChainService/SupplyChainService_ListVulns2
contains either "NEW", "CLOSED" or "IGNORED" status. This field can be
used to determine if a finding has been fixed or not.
Also, adding the "announcedAt" date to get the CVE/GHSA published date
information.
  • Loading branch information
heryxpc authored and tmsteere committed Aug 8, 2024
1 parent aa07474 commit 4939979
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 5 deletions.
22 changes: 17 additions & 5 deletions cartography/intel/semgrep/findings.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from typing import Dict
from typing import List
from typing import Tuple
from urllib.error import HTTPError

import neo4j
import requests
Expand All @@ -20,6 +21,7 @@
logger = logging.getLogger(__name__)
stat_handler = get_stats_client(__name__)
_TIMEOUT = (60, 60)
_MAX_RETRIES = 3


@timeit
Expand Down Expand Up @@ -57,6 +59,7 @@ def get_sca_vulns(semgrep_app_token: str, deployment_id: str) -> List[Dict[str,
has_more = True
cursor: Dict[str, str] = {}
page = 1
retries = 0
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {semgrep_app_token}",
Expand All @@ -78,16 +81,23 @@ def get_sca_vulns(semgrep_app_token: str, deployment_id: str) -> List[Dict[str,
"issueOffset": cursor["issueOffset"],
},
})

response = requests.post(sca_url, json=request_data, headers=headers, timeout=_TIMEOUT)
response.raise_for_status()
data = response.json()
try:
response = requests.post(sca_url, json=request_data, headers=headers, timeout=_TIMEOUT)
response.raise_for_status()
data = response.json()
except HTTPError as e:
logger.warning(f"Failed to retrieve Semgrep SCA vulns for page {page}. Retrying...")
retries += 1
if retries >= _MAX_RETRIES:
raise e
continue
vulns = data["vulns"]
cursor = data.get("cursor")
has_more = data.get("hasMore", False)
all_vulns.extend(vulns)
if page % 10 == 0:
logger.info(f"Processed {page} pages of Semgrep SCA vulnerabilities so far.")
all_vulns.extend(vulns)
retries = 0

return all_vulns

Expand Down Expand Up @@ -128,6 +138,8 @@ def transform_sca_vulns(raw_vulns: List[Dict[str, Any]]) -> Tuple[List[Dict[str,
if vuln["advisory"].get("references", {}).get("urls", []):
sca_vuln["ref_urls"] = vuln["advisory"].get("references", {}).get("urls", [])
sca_vuln["openedAt"] = vuln.get("openedAt", None)
sca_vuln["announcedAt"] = vuln.get("announcedAt", None)
sca_vuln["fixStatus"] = vuln["triage"]["status"]
for usage in vuln.get("usages", []):
usage_dict = {}
usage_dict["SCA_ID"] = sca_vuln["id"]
Expand Down
2 changes: 2 additions & 0 deletions cartography/models/semgrep/findings.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ class SemgrepSCAFindingNodeProperties(CartographyNodeProperties):
dependency_file: PropertyRef = PropertyRef('dependencyFileLocation_path', extra_index=True)
dependency_file_url: PropertyRef = PropertyRef('dependencyFileLocation_url', extra_index=True)
scan_time: PropertyRef = PropertyRef('openedAt')
published_time: PropertyRef = PropertyRef('announcedAt')
fix_status: PropertyRef = PropertyRef('fixStatus')


@dataclass(frozen=True)
Expand Down

0 comments on commit 4939979

Please sign in to comment.