diff --git a/checkov/common/bridgecrew/platform_integration.py b/checkov/common/bridgecrew/platform_integration.py index 4082f925c80..919311b29cf 100644 --- a/checkov/common/bridgecrew/platform_integration.py +++ b/checkov/common/bridgecrew/platform_integration.py @@ -30,11 +30,11 @@ from checkov.common.bridgecrew.platform_key import read_key, persist_key, bridgecrew_file from checkov.common.bridgecrew.wrapper import reduce_scan_reports, persist_checks_results, \ enrich_and_persist_checks_metadata, checkov_results_prefix, persist_run_metadata, _put_json_object -from checkov.common.models.consts import SUPPORTED_FILE_EXTENSIONS, SUPPORTED_FILES +from checkov.common.models.consts import SUPPORTED_FILE_EXTENSIONS, SUPPORTED_FILES, SUPPORTED_PACKAGE_FILES from checkov.common.bridgecrew.check_type import CheckType from checkov.common.runners.base_runner import filter_ignored_paths from checkov.common.typing import _CicdDetails -from checkov.common.util.consts import PRISMA_PLATFORM, BRIDGECREW_PLATFORM +from checkov.common.util.consts import PRISMA_PLATFORM, BRIDGECREW_PLATFORM, CHECKOV_RUN_SCA_PACKAGE_SCAN_V2 from checkov.common.util.data_structures_utils import merge_dicts from checkov.common.util.http_utils import normalize_prisma_url, get_auth_header, get_default_get_headers, \ get_user_agent_header, get_default_post_headers, get_prisma_get_headers, get_prisma_auth_header, \ @@ -332,6 +332,8 @@ def persist_repository( for f in files: f_name = os.path.basename(f) _, file_extension = os.path.splitext(f) + if CHECKOV_RUN_SCA_PACKAGE_SCAN_V2 and file_extension in SUPPORTED_PACKAGE_FILES: + continue if file_extension in SUPPORTED_FILE_EXTENSIONS or f_name in SUPPORTED_FILES: files_to_persist.append(FileToPersist(f, os.path.relpath(f, root_dir))) else: @@ -342,6 +344,8 @@ def persist_repository( filter_ignored_paths(root_path, f_names, excluded_paths) for file_path in f_names: _, file_extension = os.path.splitext(file_path) + if CHECKOV_RUN_SCA_PACKAGE_SCAN_V2 and file_extension in SUPPORTED_PACKAGE_FILES: + continue if file_extension in SUPPORTED_FILE_EXTENSIONS or file_path in SUPPORTED_FILES: full_file_path = os.path.join(root_path, file_path) relative_file_path = os.path.relpath(full_file_path, root_dir) diff --git a/checkov/common/util/consts.py b/checkov/common/util/consts.py index 751dc0f0c40..867509d7e25 100644 --- a/checkov/common/util/consts.py +++ b/checkov/common/util/consts.py @@ -24,3 +24,6 @@ BRIDGECREW_PLATFORM = 'Bridgecrew' MAX_IAC_FILE_SIZE = int(os.getenv('CHECKOV_MAX_IAC_FILE_SIZE', '50_000_000')) # 50 MB is default limit + + +CHECKOV_RUN_SCA_PACKAGE_SCAN_V2 = os.getenv('CHECKOV_RUN_SCA_PACKAGE_SCAN_V2', '').lower() == 'true' \ No newline at end of file diff --git a/checkov/main.py b/checkov/main.py index 527fc275355..ce8758fecea 100755 --- a/checkov/main.py +++ b/checkov/main.py @@ -38,7 +38,7 @@ from checkov.common.util import prompt from checkov.common.util.banner import banner as checkov_banner from checkov.common.util.config_utils import get_default_config_paths -from checkov.common.util.consts import DEFAULT_EXTERNAL_MODULES_DIR +from checkov.common.util.consts import DEFAULT_EXTERNAL_MODULES_DIR, CHECKOV_RUN_SCA_PACKAGE_SCAN_V2 from checkov.common.util.docs_generator import print_checks from checkov.common.util.ext_argument_parser import ExtArgumentParser from checkov.common.util.runner_dependency_handler import RunnerDependencyHandler @@ -55,6 +55,7 @@ from checkov.runner_filter import RunnerFilter from checkov.sca_image.runner import Runner as sca_image_runner from checkov.sca_package.runner import Runner as sca_package_runner +from checkov.sca_package_2.runner import Runner as sca_package_runner_2 from checkov.secrets.runner import Runner as secrets_runner from checkov.serverless.runner import Runner as sls_runner from checkov.terraform.plan_runner import Runner as tf_plan_runner @@ -75,6 +76,7 @@ logger = logging.getLogger(__name__) checkov_runners = [value for attr, value in CheckType.__dict__.items() if not attr.startswith("__")] + DEFAULT_RUNNERS = ( tf_graph_runner(), cfn_runner(), @@ -93,7 +95,6 @@ bitbucket_configuration_runner(), bitbucket_pipelines_runner(), kustomize_runner(), - sca_package_runner(), github_actions_runner(), bicep_runner(), openapi_runner(), @@ -101,6 +102,7 @@ argo_workflows_runner(), circleci_pipelines_runner(), azure_pipelines_runner(), + sca_package_runner_2() if CHECKOV_RUN_SCA_PACKAGE_SCAN_V2 else sca_package_runner() ) diff --git a/checkov/sca_package_2/__init__.py b/checkov/sca_package_2/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/checkov/sca_package_2/runner.py b/checkov/sca_package_2/runner.py new file mode 100644 index 00000000000..8b0c2d917dd --- /dev/null +++ b/checkov/sca_package_2/runner.py @@ -0,0 +1,152 @@ +from __future__ import annotations + +import logging +import os +from pathlib import Path +from typing import Sequence, Any, List + +from checkov.common.sca.commons import should_run_scan +from checkov.common.sca.output import add_to_report_sca_data +from checkov.common.typing import _LicenseStatus +from checkov.common.bridgecrew.platform_integration import bc_integration, FileToPersist +from checkov.common.models.consts import SUPPORTED_PACKAGE_FILES +from checkov.common.output.report import Report +from checkov.common.bridgecrew.check_type import CheckType +from checkov.common.runners.base_runner import BaseRunner, ignored_directories +from checkov.runner_filter import RunnerFilter +from checkov.sca_package_2.scanner import Scanner + + +class Runner(BaseRunner[None]): + check_type = CheckType.SCA_PACKAGE # noqa: CCE003 # a static attribute + + def __init__(self, report_type: str = check_type) -> None: + super().__init__(file_names=SUPPORTED_PACKAGE_FILES) + self._check_class: str | None = None + self._code_repo_path: Path | None = None + self.report_type = report_type + + def prepare_and_scan( + self, + root_folder: str | Path | None, + files: list[str] | None = None, + runner_filter: RunnerFilter | None = None, + excluded_file_names: set[str] | None = None, + ) -> Sequence[dict[str, Any]] | None: + runner_filter = runner_filter or RunnerFilter() + excluded_file_names = excluded_file_names or set() + + # skip complete run, if flag '--check' was used without a CVE check ID or the license policies + if not should_run_scan(runner_filter.checks): + return None + + if not bc_integration.bc_api_key: + logging.info("The --bc-api-key flag needs to be set to run SCA package scanning") + return None + + logging.info("SCA package scanning searching for scannable files") + + self._code_repo_path = Path(root_folder) if root_folder else None + + excluded_paths = {*ignored_directories} + if runner_filter.excluded_paths: + excluded_paths.update(runner_filter.excluded_paths) + + if not self.upload_scannable_files( + root_path=self._code_repo_path, + files=files, + excluded_paths=excluded_paths, + excluded_file_names=excluded_file_names, + ): + # no packages found + return None + + scanner = Scanner(self.pbar, root_folder) + self._check_class = f"{scanner.__module__}.{scanner.__class__.__qualname__}" + scan_results = scanner.scan() + + # logging.info(f"SCA package scanning successfully scanned {len(scan_results)} files") + return scan_results + + def run( + self, + root_folder: str | Path | None, + external_checks_dir: list[str] | None = None, + files: list[str] | None = None, + runner_filter: RunnerFilter | None = None, + collect_skip_comments: bool = True, + ) -> Report | list[Report]: + runner_filter = runner_filter or RunnerFilter() + if not runner_filter.show_progress_bar: + self.pbar.turn_off_progress_bar() + + report = Report(self.check_type) + + scan_results = self.prepare_and_scan(root_folder, files, runner_filter) + if scan_results is None: + return report + + for result in scan_results: + if not result: + continue + package_file_path = Path(result["repository"]) + if self._code_repo_path: + try: + package_file_path = package_file_path.relative_to(self._code_repo_path) + except ValueError: + # Path.is_relative_to() was implemented in Python 3.9 + pass + + vulnerabilities = result.get("vulnerabilities") or [] + packages = result.get("packages") or [] + + license_statuses = [_LicenseStatus(package_name=elm["packageName"], package_version=elm["packageVersion"], + policy=elm["policy"], license=elm["license"], status=elm["status"]) + for elm in result.get("license_statuses") or []] + + rootless_file_path = str(package_file_path).replace(package_file_path.anchor, "", 1) + add_to_report_sca_data( + report=report, + check_class=self._check_class, + scanned_file_path=str(package_file_path), + rootless_file_path=rootless_file_path, + runner_filter=runner_filter, + vulnerabilities=vulnerabilities, + packages=packages, + license_statuses=license_statuses, + report_type=self.report_type, + ) + + return report + + def upload_scannable_files( + self, + root_path: Path | None, + files: list[str] | None, + excluded_paths: set[str], + excluded_file_names: set[str] | None = None, + ) -> List[FileToPersist]: + """ upload scannable files to s3""" + excluded_file_names = excluded_file_names or set() + package_files_to_persist: List[FileToPersist] = [] + if root_path: + for file_path in root_path.glob("**/*"): + if file_path.name in SUPPORTED_PACKAGE_FILES and not any( + p in file_path.parts for p in excluded_paths) and file_path.name not in excluded_file_names: + file_path_str = str(file_path) + package_files_to_persist.append( + FileToPersist(file_path_str, os.path.relpath(file_path_str, root_path))) + + if files: + root_folder = os.path.split(os.path.commonprefix(files))[0] + for file in files: + file_path = Path(file) + if not file_path.exists(): + logging.warning(f"File {file_path} doesn't exist") + continue + if file_path.name in SUPPORTED_PACKAGE_FILES: + package_files_to_persist.append(FileToPersist(file, os.path.relpath(file, root_folder))) + + logging.info(f"{len(package_files_to_persist)} sca package files found.") + bc_integration.persist_files(package_files_to_persist) + return package_files_to_persist diff --git a/checkov/sca_package_2/scanner.py b/checkov/sca_package_2/scanner.py new file mode 100644 index 00000000000..68c86f302af --- /dev/null +++ b/checkov/sca_package_2/scanner.py @@ -0,0 +1,33 @@ +from __future__ import annotations + +from collections.abc import Sequence +from pathlib import Path +from typing import Any + +from checkov.common.bridgecrew.platform_integration import bc_integration + +from checkov.common.util.tqdm_utils import ProgressBar + +SLEEP_DURATION = 2 +MAX_SLEEP_DURATION = 60 + + +class Scanner: + def __init__(self, pbar: ProgressBar | None = None, root_folder: str | Path | None = None) -> None: + self._base_url = bc_integration.api_url + if pbar: + self.pbar = pbar + else: + self.pbar = ProgressBar('') + self.pbar.turn_off_progress_bar() + self.root_folder = root_folder + + def scan(self) -> Sequence[dict[str, Any]]: + """run SCA package scan and poll scan results""" + pass + + def run_scan(self) -> dict[str, Any]: + pass + + def poll_scan_result(self) -> dict[str, Any]: + pass