Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

platform(sca): add new sca package scan #3802

Merged
merged 14 commits into from
Nov 7, 2022
8 changes: 6 additions & 2 deletions checkov/common/bridgecrew/platform_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,11 @@
from checkov.common.bridgecrew.platform_key import read_key, persist_key, bridgecrew_file
from checkov.common.bridgecrew.wrapper import reduce_scan_reports, persist_checks_results, \
enrich_and_persist_checks_metadata, checkov_results_prefix, persist_run_metadata, _put_json_object
from checkov.common.models.consts import SUPPORTED_FILE_EXTENSIONS, SUPPORTED_FILES
from checkov.common.models.consts import SUPPORTED_FILE_EXTENSIONS, SUPPORTED_FILES, SUPPORTED_PACKAGE_FILES
from checkov.common.bridgecrew.check_type import CheckType
from checkov.common.runners.base_runner import filter_ignored_paths
from checkov.common.typing import _CicdDetails
from checkov.common.util.consts import PRISMA_PLATFORM, BRIDGECREW_PLATFORM
from checkov.common.util.consts import PRISMA_PLATFORM, BRIDGECREW_PLATFORM, CHECKOV_RUN_SCA_PACKAGE_SCAN_V2
from checkov.common.util.data_structures_utils import merge_dicts
from checkov.common.util.http_utils import normalize_prisma_url, get_auth_header, get_default_get_headers, \
get_user_agent_header, get_default_post_headers, get_prisma_get_headers, get_prisma_auth_header, \
Expand Down Expand Up @@ -332,6 +332,8 @@ def persist_repository(
for f in files:
f_name = os.path.basename(f)
_, file_extension = os.path.splitext(f)
if CHECKOV_RUN_SCA_PACKAGE_SCAN_V2 and file_extension in SUPPORTED_PACKAGE_FILES:
continue
if file_extension in SUPPORTED_FILE_EXTENSIONS or f_name in SUPPORTED_FILES:
files_to_persist.append(FileToPersist(f, os.path.relpath(f, root_dir)))
else:
Expand All @@ -342,6 +344,8 @@ def persist_repository(
filter_ignored_paths(root_path, f_names, excluded_paths)
for file_path in f_names:
_, file_extension = os.path.splitext(file_path)
if CHECKOV_RUN_SCA_PACKAGE_SCAN_V2 and file_extension in SUPPORTED_PACKAGE_FILES:
continue
if file_extension in SUPPORTED_FILE_EXTENSIONS or file_path in SUPPORTED_FILES:
full_file_path = os.path.join(root_path, file_path)
relative_file_path = os.path.relpath(full_file_path, root_dir)
Expand Down
3 changes: 3 additions & 0 deletions checkov/common/util/consts.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,6 @@
BRIDGECREW_PLATFORM = 'Bridgecrew'

MAX_IAC_FILE_SIZE = int(os.getenv('CHECKOV_MAX_IAC_FILE_SIZE', '50_000_000')) # 50 MB is default limit


CHECKOV_RUN_SCA_PACKAGE_SCAN_V2 = os.getenv('CHECKOV_RUN_SCA_PACKAGE_SCAN_V2', '').lower() == 'true'
6 changes: 4 additions & 2 deletions checkov/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
from checkov.common.util import prompt
from checkov.common.util.banner import banner as checkov_banner
from checkov.common.util.config_utils import get_default_config_paths
from checkov.common.util.consts import DEFAULT_EXTERNAL_MODULES_DIR
from checkov.common.util.consts import DEFAULT_EXTERNAL_MODULES_DIR, CHECKOV_RUN_SCA_PACKAGE_SCAN_V2
from checkov.common.util.docs_generator import print_checks
from checkov.common.util.ext_argument_parser import ExtArgumentParser
from checkov.common.util.runner_dependency_handler import RunnerDependencyHandler
Expand All @@ -55,6 +55,7 @@
from checkov.runner_filter import RunnerFilter
from checkov.sca_image.runner import Runner as sca_image_runner
from checkov.sca_package.runner import Runner as sca_package_runner
from checkov.sca_package_2.runner import Runner as sca_package_runner_2
from checkov.secrets.runner import Runner as secrets_runner
from checkov.serverless.runner import Runner as sls_runner
from checkov.terraform.plan_runner import Runner as tf_plan_runner
Expand All @@ -75,6 +76,7 @@
logger = logging.getLogger(__name__)
checkov_runners = [value for attr, value in CheckType.__dict__.items() if not attr.startswith("__")]


DEFAULT_RUNNERS = (
tf_graph_runner(),
cfn_runner(),
Expand All @@ -93,14 +95,14 @@
bitbucket_configuration_runner(),
bitbucket_pipelines_runner(),
kustomize_runner(),
sca_package_runner(),
github_actions_runner(),
bicep_runner(),
openapi_runner(),
sca_image_runner(),
argo_workflows_runner(),
circleci_pipelines_runner(),
azure_pipelines_runner(),
sca_package_runner_2() if CHECKOV_RUN_SCA_PACKAGE_SCAN_V2 else sca_package_runner()
)


Expand Down
Empty file.
152 changes: 152 additions & 0 deletions checkov/sca_package_2/runner.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
from __future__ import annotations

import logging
import os
from pathlib import Path
from typing import Sequence, Any, List

from checkov.common.sca.commons import should_run_scan
from checkov.common.sca.output import add_to_report_sca_data
from checkov.common.typing import _LicenseStatus
from checkov.common.bridgecrew.platform_integration import bc_integration, FileToPersist
from checkov.common.models.consts import SUPPORTED_PACKAGE_FILES
from checkov.common.output.report import Report
from checkov.common.bridgecrew.check_type import CheckType
from checkov.common.runners.base_runner import BaseRunner, ignored_directories
from checkov.runner_filter import RunnerFilter
from checkov.sca_package_2.scanner import Scanner


class Runner(BaseRunner[None]):
check_type = CheckType.SCA_PACKAGE # noqa: CCE003 # a static attribute

def __init__(self, report_type: str = check_type) -> None:
super().__init__(file_names=SUPPORTED_PACKAGE_FILES)
self._check_class: str | None = None
self._code_repo_path: Path | None = None
self.report_type = report_type

def prepare_and_scan(
self,
root_folder: str | Path | None,
files: list[str] | None = None,
runner_filter: RunnerFilter | None = None,
excluded_file_names: set[str] | None = None,
) -> Sequence[dict[str, Any]] | None:
runner_filter = runner_filter or RunnerFilter()
excluded_file_names = excluded_file_names or set()

# skip complete run, if flag '--check' was used without a CVE check ID or the license policies
if not should_run_scan(runner_filter.checks):
return None

if not bc_integration.bc_api_key:
logging.info("The --bc-api-key flag needs to be set to run SCA package scanning")
return None

logging.info("SCA package scanning searching for scannable files")

self._code_repo_path = Path(root_folder) if root_folder else None

excluded_paths = {*ignored_directories}
if runner_filter.excluded_paths:
excluded_paths.update(runner_filter.excluded_paths)

if not self.upload_scannable_files(
root_path=self._code_repo_path,
files=files,
excluded_paths=excluded_paths,
excluded_file_names=excluded_file_names,
):
# no packages found
return None

scanner = Scanner(self.pbar, root_folder)
self._check_class = f"{scanner.__module__}.{scanner.__class__.__qualname__}"
scan_results = scanner.scan()

# logging.info(f"SCA package scanning successfully scanned {len(scan_results)} files")
return scan_results

def run(
self,
root_folder: str | Path | None,
external_checks_dir: list[str] | None = None,
files: list[str] | None = None,
runner_filter: RunnerFilter | None = None,
collect_skip_comments: bool = True,
) -> Report | list[Report]:
runner_filter = runner_filter or RunnerFilter()
if not runner_filter.show_progress_bar:
self.pbar.turn_off_progress_bar()

report = Report(self.check_type)

scan_results = self.prepare_and_scan(root_folder, files, runner_filter)
if scan_results is None:
return report

for result in scan_results:
if not result:
continue
package_file_path = Path(result["repository"])
if self._code_repo_path:
try:
package_file_path = package_file_path.relative_to(self._code_repo_path)
except ValueError:
# Path.is_relative_to() was implemented in Python 3.9
pass

vulnerabilities = result.get("vulnerabilities") or []
packages = result.get("packages") or []

license_statuses = [_LicenseStatus(package_name=elm["packageName"], package_version=elm["packageVersion"],
policy=elm["policy"], license=elm["license"], status=elm["status"])
for elm in result.get("license_statuses") or []]

rootless_file_path = str(package_file_path).replace(package_file_path.anchor, "", 1)
add_to_report_sca_data(
report=report,
check_class=self._check_class,
scanned_file_path=str(package_file_path),
rootless_file_path=rootless_file_path,
runner_filter=runner_filter,
vulnerabilities=vulnerabilities,
packages=packages,
license_statuses=license_statuses,
report_type=self.report_type,
)

return report

def upload_scannable_files(
self,
root_path: Path | None,
files: list[str] | None,
excluded_paths: set[str],
excluded_file_names: set[str] | None = None,
) -> List[FileToPersist]:
""" upload scannable files to s3"""
excluded_file_names = excluded_file_names or set()
package_files_to_persist: List[FileToPersist] = []
if root_path:
for file_path in root_path.glob("**/*"):
if file_path.name in SUPPORTED_PACKAGE_FILES and not any(
p in file_path.parts for p in excluded_paths) and file_path.name not in excluded_file_names:
file_path_str = str(file_path)
package_files_to_persist.append(
FileToPersist(file_path_str, os.path.relpath(file_path_str, root_path)))

if files:
root_folder = os.path.split(os.path.commonprefix(files))[0]
for file in files:
file_path = Path(file)
if not file_path.exists():
logging.warning(f"File {file_path} doesn't exist")
continue
if file_path.name in SUPPORTED_PACKAGE_FILES:
package_files_to_persist.append(FileToPersist(file, os.path.relpath(file, root_folder)))

logging.info(f"{len(package_files_to_persist)} sca package files found.")
bc_integration.persist_files(package_files_to_persist)
return package_files_to_persist
33 changes: 33 additions & 0 deletions checkov/sca_package_2/scanner.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from __future__ import annotations

from collections.abc import Sequence
from pathlib import Path
from typing import Any

from checkov.common.bridgecrew.platform_integration import bc_integration

from checkov.common.util.tqdm_utils import ProgressBar

SLEEP_DURATION = 2
ayajbara marked this conversation as resolved.
Show resolved Hide resolved
MAX_SLEEP_DURATION = 60


class Scanner:
def __init__(self, pbar: ProgressBar | None = None, root_folder: str | Path | None = None) -> None:
self._base_url = bc_integration.api_url
if pbar:
self.pbar = pbar
else:
self.pbar = ProgressBar('')
self.pbar.turn_off_progress_bar()
self.root_folder = root_folder

def scan(self) -> Sequence[dict[str, Any]]:
ayajbara marked this conversation as resolved.
Show resolved Hide resolved
"""run SCA package scan and poll scan results"""
pass

def run_scan(self) -> dict[str, Any]:
pass

def poll_scan_result(self) -> dict[str, Any]:
pass