diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 19c4d62fb2b..fb2100a4a43 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -251,6 +251,7 @@ jobs: steps: - uses: actions/checkout@93ea575cb5d8a053eaa0ac8fa3b40d7e05a33cc8 # v3 - uses: sigstore/cosign-installer@9becc617647dfa20ae7b1151972e9b3a2c338a2b # v2 + - uses: docker/setup-buildx-action@8c0edbc76e98fa90f69d9a2c020dcb50019dc325 # v2 # needed for self-hosted builds - name: Get release version id: versions run: | @@ -269,12 +270,20 @@ jobs: registry: ghcr.io username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - - name: Build and push Docker image - id: docker_push + - name: Build and export image to Docker + # buildx changes the driver to 'docker-container' which doesn't expose the image to the host, + # so it is built and loaded to Docker and in the next step pushed to the registry uses: docker/build-push-action@c56af957549030174b10d6867f20e78cfd7debc5 # v3 with: context: . no-cache: true + load: true + tags: ${{ env.DH_IMAGE_NAME }}:latest + - name: Push Docker image + id: docker_push + uses: docker/build-push-action@c56af957549030174b10d6867f20e78cfd7debc5 # v3 + with: + context: . push: true tags: | ${{ env.DH_IMAGE_NAME }}:latest diff --git a/checkov/secrets/parsers/__init__.py b/checkov/secrets/parsers/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/checkov/secrets/parsers/json/__init__.py b/checkov/secrets/parsers/json/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/checkov/common/parsers/json/multiline_parser.py b/checkov/secrets/parsers/json/multiline_parser.py similarity index 96% rename from checkov/common/parsers/json/multiline_parser.py rename to checkov/secrets/parsers/json/multiline_parser.py index 060f038acc1..8a0459270da 100644 --- a/checkov/common/parsers/json/multiline_parser.py +++ b/checkov/secrets/parsers/json/multiline_parser.py @@ -7,7 +7,7 @@ if TYPE_CHECKING: from detect_secrets.util.code_snippet import CodeSnippet -from checkov.common.parsers.multiline_parser import BaseMultiLineParser +from checkov.secrets.parsers.multiline_parser import BaseMultiLineParser START_OBJ_END_OF_LINE = r'({\s*}?\s*,?\s*$)' diff --git a/checkov/common/parsers/multiline_parser.py b/checkov/secrets/parsers/multiline_parser.py similarity index 99% rename from checkov/common/parsers/multiline_parser.py rename to checkov/secrets/parsers/multiline_parser.py index afc656f8937..48a39e79ab4 100644 --- a/checkov/common/parsers/multiline_parser.py +++ b/checkov/secrets/parsers/multiline_parser.py @@ -9,7 +9,6 @@ class BaseMultiLineParser(ABC): - def get_lines_from_same_object( self, search_range: range, diff --git a/checkov/secrets/parsers/single_line_parser.py b/checkov/secrets/parsers/single_line_parser.py new file mode 100644 index 00000000000..3180e74a710 --- /dev/null +++ b/checkov/secrets/parsers/single_line_parser.py @@ -0,0 +1,36 @@ +from __future__ import annotations + +from abc import ABC, abstractmethod +from typing import Any, TYPE_CHECKING + + +if TYPE_CHECKING: + from detect_secrets.core.potential_secret import PotentialSecret + from detect_secrets.plugins.high_entropy_strings import Base64HighEntropyString, HexHighEntropyString + from detect_secrets.util.code_snippet import CodeSnippet + + +class BaseSingleLineParser(ABC): + def detect_secret( + self, + scanners: tuple[Base64HighEntropyString, HexHighEntropyString], + filename: str, + raw_context: CodeSnippet | None, + line: str, + line_number: int = 0, + **kwargs: Any, + ) -> set[PotentialSecret]: + for entropy_scanner in scanners: + matches = entropy_scanner.analyze_line(filename, line, line_number, **kwargs) + if matches: + if raw_context and self.ignore_secret(raw_context=raw_context): + return set() + + return matches + return set() + + @abstractmethod + def ignore_secret(self, raw_context: CodeSnippet) -> bool: + """Check for false-positive secrets by leveraging the context""" + + pass diff --git a/checkov/secrets/parsers/terraform/__init__.py b/checkov/secrets/parsers/terraform/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/checkov/secrets/parsers/terraform/multiline_parser.py b/checkov/secrets/parsers/terraform/multiline_parser.py new file mode 100644 index 00000000000..cb6ce62a41e --- /dev/null +++ b/checkov/secrets/parsers/terraform/multiline_parser.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +import re +from typing import TYPE_CHECKING + +from checkov.secrets.parsers.multiline_parser import BaseMultiLineParser + +if TYPE_CHECKING: + from detect_secrets.util.code_snippet import CodeSnippet + +START_OBJ = re.compile(r"^\s*\w*\s*=?\s*{\s*$") +END_OBJ = re.compile(r"^\s*}\s*$") +COMMENT_PREFIX = re.compile(r"^[\s]*(#|//)") + + +class TerraformMultiLineParser(BaseMultiLineParser): + def consecutive_lines_in_same_object( + self, + raw_context: CodeSnippet | None, + other_line_idx: int, + ) -> bool: + return bool(raw_context and 0 <= other_line_idx < len(raw_context.lines)) + + @staticmethod + def is_object_start(line: str) -> bool: + return bool(re.match(START_OBJ, line)) + + @staticmethod + def is_object_end(line: str) -> bool: + return bool(re.match(END_OBJ, line)) + + @staticmethod + def is_line_comment(line: str) -> bool: + return bool(re.match(COMMENT_PREFIX, line)) + + +terraform_multiline_parser = TerraformMultiLineParser() diff --git a/checkov/secrets/parsers/terraform/single_line_parser.py b/checkov/secrets/parsers/terraform/single_line_parser.py new file mode 100644 index 00000000000..3a58d62fcbb --- /dev/null +++ b/checkov/secrets/parsers/terraform/single_line_parser.py @@ -0,0 +1,28 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from checkov.secrets.parsers.single_line_parser import BaseSingleLineParser + +if TYPE_CHECKING: + from detect_secrets.util.code_snippet import CodeSnippet + + +class TerraformSingleLineParser(BaseSingleLineParser): + def ignore_secret(self, raw_context: CodeSnippet) -> bool: + return self.ignore_terraform_data_block(raw_context=raw_context) + + def ignore_terraform_data_block(self, raw_context: CodeSnippet) -> bool: + """Check for a possible data block usage""" + + # search backwards to find a possible 'data' block + for line_index in range(raw_context.target_index - 1, -1, -1): + if raw_context.lines[line_index].lstrip().startswith('data "'): + # a data block is typically used to get remote information, + # therefore can retrieve a secret, but has not a hardcoded secret + return True + + return False + + +terraform_single_line_parser = TerraformSingleLineParser() diff --git a/checkov/secrets/parsers/yaml/__init__.py b/checkov/secrets/parsers/yaml/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/checkov/common/parsers/yaml/multiline_parser.py b/checkov/secrets/parsers/yaml/multiline_parser.py similarity index 95% rename from checkov/common/parsers/yaml/multiline_parser.py rename to checkov/secrets/parsers/yaml/multiline_parser.py index 4202df0d771..af059a51b14 100644 --- a/checkov/common/parsers/yaml/multiline_parser.py +++ b/checkov/secrets/parsers/yaml/multiline_parser.py @@ -6,7 +6,7 @@ if TYPE_CHECKING: from detect_secrets.util.code_snippet import CodeSnippet -from checkov.common.parsers.multiline_parser import BaseMultiLineParser +from checkov.secrets.parsers.multiline_parser import BaseMultiLineParser INDENTATION_PATTERN = re.compile(r'(^\s*(?:-?\s+)?)') COMMENT_PREFIX = re.compile(r'^[\s]*(#|\/\/)') diff --git a/checkov/secrets/plugins/entropy_keyword_combinator.py b/checkov/secrets/plugins/entropy_keyword_combinator.py index e14116da4dd..f26ed92e970 100644 --- a/checkov/secrets/plugins/entropy_keyword_combinator.py +++ b/checkov/secrets/plugins/entropy_keyword_combinator.py @@ -20,12 +20,14 @@ from detect_secrets.util.filetype import FileType from detect_secrets.util.filetype import determine_file_type +from checkov.secrets.parsers.terraform.multiline_parser import terraform_multiline_parser +from checkov.secrets.parsers.terraform.single_line_parser import terraform_single_line_parser from checkov.secrets.runner import SOURCE_CODE_EXTENSION -from checkov.common.parsers.multiline_parser import BaseMultiLineParser -from checkov.common.parsers.yaml.multiline_parser import yml_multiline_parser -from checkov.common.parsers.json.multiline_parser import json_multiline_parser +from checkov.secrets.parsers.yaml.multiline_parser import yml_multiline_parser +from checkov.secrets.parsers.json.multiline_parser import json_multiline_parser if TYPE_CHECKING: + from checkov.secrets.parsers.multiline_parser import BaseMultiLineParser from detect_secrets.core.potential_secret import PotentialSecret from detect_secrets.util.code_snippet import CodeSnippet @@ -93,6 +95,30 @@ flags=re.IGNORECASE, ) +FOLLOWED_BY_EQUAL_VALUE_KEYWORD_REGEX = re.compile( + # e.g. var = MY_PASSWORD_123 + r'{whitespace}({key})?={whitespace}({quote}?){words}{denylist}({closing})?(\3)'.format( + key=KEY, + whitespace=OPTIONAL_WHITESPACE, + quote=QUOTE, + words=AFFIX_REGEX, + denylist=DENY_LIST_REGEX2, + closing=CLOSING, + ), + flags=re.IGNORECASE, +) + +FOLLOWED_BY_EQUAL_VALUE_SECRET_REGEX = re.compile( + # e.g. var = Zmlyc3Rfc2VjcmV0X2hlcmVfd2hvYV9tdWx0aWxsaW5lX3Nob3VsZF93b3JrXzE== + r'{whitespace}({key})?={whitespace}({quote}?)({secret})(\3)'.format( + key=KEY, + whitespace=OPTIONAL_WHITESPACE, + quote=QUOTE, + secret=SECRET, + ), + flags=re.IGNORECASE, +) + # if the current regex is not enough, can add more regexes to check YML_PAIR_VALUE_KEYWORD_REGEX_TO_GROUP = { @@ -111,20 +137,42 @@ QUOTES_REQUIRED_FOLLOWED_BY_COLON_VALUE_SECRET_REGEX: 4, } +TERRAFORM_PAIR_VALUE_KEYWORD_REGEX_TO_GROUP = { + FOLLOWED_BY_EQUAL_VALUE_KEYWORD_REGEX: 4, +} + +TERRAFORM_PAIR_VALUE_SECRET_REGEX_TO_GROUP = { + FOLLOWED_BY_EQUAL_VALUE_SECRET_REGEX: 4, +} REGEX_VALUE_KEYWORD_BY_FILETYPE = { FileType.YAML: YML_PAIR_VALUE_KEYWORD_REGEX_TO_GROUP, FileType.JSON: JSON_PAIR_VALUE_KEYWORD_REGEX_TO_GROUP, + FileType.TERRAFORM: TERRAFORM_PAIR_VALUE_KEYWORD_REGEX_TO_GROUP, } REGEX_VALUE_SECRET_BY_FILETYPE = { FileType.YAML: YML_PAIR_VALUE_SECRET_REGEX_TO_GROUP, FileType.JSON: JSON_PAIR_VALUE_SECRET_REGEX_TO_GROUP, + FileType.TERRAFORM: TERRAFORM_PAIR_VALUE_SECRET_REGEX_TO_GROUP, +} + +SINGLE_LINE_PARSER = { + FileType.TERRAFORM: terraform_single_line_parser, } MULTILINE_PARSERS = { - FileType.YAML: yml_multiline_parser, - FileType.JSON: json_multiline_parser, + FileType.YAML: ( + (FileType.YAML, yml_multiline_parser), + ), + FileType.JSON: ( + (FileType.JSON, json_multiline_parser), + ), + FileType.TERRAFORM: ( + (FileType.TERRAFORM, terraform_multiline_parser), + (FileType.JSON, json_multiline_parser), + (FileType.YAML, yml_multiline_parser), + ), } @@ -149,15 +197,29 @@ def analyze_line( raw_context: CodeSnippet | None = None, **kwargs: Any, ) -> set[PotentialSecret]: + if len(line) > MAX_LINE_LENGTH: + # to keep good performance we skip long lines + return set() + is_iac = f".{filename.split('.')[-1]}" not in SOURCE_CODE_EXTENSION - filetype = determine_file_type(filename) - multiline_parser = MULTILINE_PARSERS.get(filetype) - - if len(line) <= MAX_LINE_LENGTH: - if is_iac: - # classic key-value pair - keyword_on_key = self.keyword_scanner.analyze_line(filename, line, line_number, **kwargs) - if keyword_on_key: + if is_iac: + filetype = determine_file_type(filename) + single_line_parser = SINGLE_LINE_PARSER.get(filetype) + multiline_parsers = MULTILINE_PARSERS.get(filetype) + + # classic key-value pair + keyword_on_key = self.keyword_scanner.analyze_line(filename, line, line_number, **kwargs) + if keyword_on_key: + if single_line_parser: + return single_line_parser.detect_secret( + scanners=self.high_entropy_scanners_iac, + filename=filename, + raw_context=raw_context, + line=line, + line_number=line_number, + kwargs=kwargs + ) + else: return self.detect_secret( scanners=self.high_entropy_scanners_iac, filename=filename, @@ -166,12 +228,16 @@ def analyze_line( kwargs=kwargs ) - # not so classic key-value pair, from multiline, that is only in an array format. - # The scan is one-way backwards, so no duplicates expected. - elif multiline_parser: - value_keyword_regex_to_group = REGEX_VALUE_KEYWORD_BY_FILETYPE.get(filetype) - secret_keyword_regex_to_group = REGEX_VALUE_SECRET_BY_FILETYPE.get(filetype) - return self.analyze_multiline( + # not so classic key-value pair, from multiline, that is only in an array format. + # The scan searches forwards and backwards for a potential secret pair, so no duplicates expected. + elif multiline_parsers: + # iterate over multiple parser and their related file type. + # this is needed for file types, which embed other file type parser, ex Terraform with heredoc + for parser_file_type, multiline_parser in multiline_parsers: + value_keyword_regex_to_group = REGEX_VALUE_KEYWORD_BY_FILETYPE.get(parser_file_type) + secret_keyword_regex_to_group = REGEX_VALUE_SECRET_BY_FILETYPE.get(parser_file_type) + + potential_secrets = self.analyze_multiline( filename=filename, line=line, multiline_parser=multiline_parser, @@ -182,14 +248,19 @@ def analyze_line( secret_pattern=secret_keyword_regex_to_group, kwargs=kwargs ) - else: - return self.detect_secret( - scanners=self.high_entropy_scanners, - filename=filename, - line=line, - line_number=line_number, - kwargs=kwargs - ) + + if potential_secrets: + # return a possible secret, otherwise check with next parser + return potential_secrets + else: + return self.detect_secret( + scanners=self.high_entropy_scanners, + filename=filename, + line=line, + line_number=line_number, + kwargs=kwargs + ) + return set() def analyze_multiline( diff --git a/tests/secrets/terraform_multiline/cfn_heredoc.tf b/tests/secrets/terraform_multiline/cfn_heredoc.tf new file mode 100644 index 00000000000..3a37e128a4c --- /dev/null +++ b/tests/secrets/terraform_multiline/cfn_heredoc.tf @@ -0,0 +1,51 @@ +resource "aws_cloudformation_stack" "lambda" { + name = "lambda" + + parameters = { + VPCCidr = "10.0.0.0/16" + } + + template_body = <