From 89ddd3f5353a72064b0c99100d8ff572ba2ad199 Mon Sep 17 00:00:00 2001 From: marynaKK Date: Thu, 3 Nov 2022 10:22:34 +0200 Subject: [PATCH 1/9] added changes to runner --- .../plugins/entropy_keyword_combinator.py | 214 ++++++++++++++++-- 1 file changed, 195 insertions(+), 19 deletions(-) diff --git a/checkov/secrets/plugins/entropy_keyword_combinator.py b/checkov/secrets/plugins/entropy_keyword_combinator.py index 9a493df59ed..f4d6f2be6c6 100644 --- a/checkov/secrets/plugins/entropy_keyword_combinator.py +++ b/checkov/secrets/plugins/entropy_keyword_combinator.py @@ -1,9 +1,26 @@ from __future__ import annotations -from detect_secrets.plugins.high_entropy_strings import Base64HighEntropyString, HexHighEntropyString +import logging +import json +import re +from typing import Generator +from typing import Any +from typing import TYPE_CHECKING +from typing import Optional +from typing import Pattern + +from detect_secrets.plugins.high_entropy_strings import Base64HighEntropyString +from detect_secrets.plugins.high_entropy_strings import HexHighEntropyString from detect_secrets.plugins.keyword import KeywordDetector +from detect_secrets.plugins.keyword import DENYLIST +from detect_secrets.plugins.keyword import AFFIX_REGEX +from detect_secrets.plugins.keyword import CLOSING +from detect_secrets.plugins.keyword import OPTIONAL_WHITESPACE +from detect_secrets.plugins.keyword import QUOTE +from detect_secrets.plugins.keyword import SECRET from detect_secrets.plugins.base import BasePlugin -from typing import Generator, Any, TYPE_CHECKING +from detect_secrets.util.filetype import FileType +from detect_secrets.util.filetype import determine_file_type from checkov.secrets.runner import SOURCE_CODE_EXTENSION @@ -15,6 +32,61 @@ ENTROPY_KEYWORD_COMBINATOR_LIMIT = 3 ENTROPY_KEYWORD_LIMIT = 4.5 +INDENTATION_PATTERN = re.compile(r'(^[\r\t\f\v ]*(?:-?[\r\t\f\v ]+)?)') +COMMENT_PREFIX = re.compile(r'^[\r\t\f\v ]*#') + +DENY_LIST_REGEX = r'|'.join(DENYLIST) +# Support for suffix after keyword i.e. password_secure = "value" +DENY_LIST_REGEX2 = r'({denylist}){suffix}'.format( + denylist=DENY_LIST_REGEX, + suffix=AFFIX_REGEX, +) + +KEY = r'{words}({closing})?'.format( + words=AFFIX_REGEX, + closing=CLOSING, +) + +FOLLOWED_BY_COLON_VALUE_KEYWORD_REGEX = re.compile( + # e.g. var: MY_PASSWORD_123 + r'{whitespace}({key})?:{whitespace}({quote}?){words}{denylist}({closing})?(\3)'.format( + key=KEY, + whitespace=OPTIONAL_WHITESPACE, + quote=QUOTE, + words=AFFIX_REGEX, + denylist=DENY_LIST_REGEX2, + closing=CLOSING, + ), + flags=re.IGNORECASE, +) + +FOLLOWED_BY_COLON_VALUE_SECRET_REGEX = re.compile( + # e.g. var: Zmlyc3Rfc2VjcmV0X2hlcmVfd2hvYV9tdWx0aWxsaW5lX3Nob3VsZF93b3JrXzE== + r'{whitespace}({key})?:{whitespace}({quote}?)({secret})(\3)'.format( + key=KEY, + whitespace=OPTIONAL_WHITESPACE, + quote=QUOTE, + secret=SECRET, + ), + flags=re.IGNORECASE, +) + +PAIR_VALUE_KEYWORD_REGEX_TO_GROUP = { + FOLLOWED_BY_COLON_VALUE_KEYWORD_REGEX: 4, +} + +PAIR_VALUE_SECRET_REGEX_TO_GROUP = { + FOLLOWED_BY_COLON_VALUE_SECRET_REGEX: 4, +} + +REGEX_VALUE_KEYWORD_BY_FILETYPE = { + FileType.YAML: PAIR_VALUE_KEYWORD_REGEX_TO_GROUP, +} + +REGEX_VALUE_SECRET_BY_FILETYPE = { + FileType.YAML: PAIR_VALUE_SECRET_REGEX_TO_GROUP, +} + class EntropyKeywordCombinator(BasePlugin): secret_type = "" # nosec # noqa: CCE003 # a static attribute @@ -25,6 +97,9 @@ def __init__(self, limit: float = ENTROPY_KEYWORD_LIMIT) -> None: self.high_entropy_scanners = (Base64HighEntropyString(limit=limit), HexHighEntropyString(limit=limit)) self.keyword_scanner = KeywordDetector() + def analyze_string(self, string: str) -> Generator[str, None, None]: + raise NotImplementedError() + def analyze_line( self, filename: str, @@ -34,26 +109,127 @@ def analyze_line( raw_context: CodeSnippet | None = None, **kwargs: Any, ) -> set[PotentialSecret]: - """ - This method first runs the keyword plugin. If it finds a match - it runs the entropy scanners, and if - one of the entropy scanners find a match (on a line which was already matched by keyword plugin) - it is returned. - for source code files run and merge the two plugins. - """ is_iac = f".{filename.split('.')[-1]}" not in SOURCE_CODE_EXTENSION + filetype = determine_file_type(filename) + value_keyword_regex_to_group = REGEX_VALUE_KEYWORD_BY_FILETYPE.get(filetype, FileType.YAML) + secret_keyword_regex_to_group = REGEX_VALUE_SECRET_BY_FILETYPE.get(filetype, FileType.YAML) + if len(line) <= MAX_LINE_LENGTH: if is_iac: - keyword_matches = self.keyword_scanner.analyze_line(filename, line, line_number, **kwargs) - if keyword_matches: - for entropy_scanner in self.high_entropy_scanners_iac: - matches = entropy_scanner.analyze_line(filename, line, line_number, **kwargs) - if matches: - return matches + return self.analyze_iac_line( + filename=filename, + line=line, + line_number=line_number, + context=context, + raw_context=raw_context, + value_pattern=value_keyword_regex_to_group, + secret_pattern=secret_keyword_regex_to_group, + kwargs=kwargs + ) else: - for entropy_scanner in self.high_entropy_scanners: - matches = entropy_scanner.analyze_line(filename, line, line_number, **kwargs) - if matches: - return matches + return self.detect_secret(self.high_entropy_scanners, filename, line, line_number, **kwargs) + return set() + + def analyze_iac_line( + self, + filename: str, + line: str, + line_number: int = 0, + context: CodeSnippet | None = None, + raw_context: CodeSnippet | None = None, + value_pattern: Optional[dict[Pattern, int]] = None, + secret_pattern: Optional[dict[Pattern, int]] = None, + **kwargs: Any, + ) -> set[PotentialSecret]: + secrets = set() + + # classic key-value pair + keyword_on_key = self.keyword_scanner.analyze_line(filename, line, line_number, **kwargs) + if keyword_on_key: + return self.detect_secret(self.high_entropy_scanners_iac, filename, line, line_number, **kwargs) + + # not so classic key-value pair, from multiline, that is only in an array format. + # The scan is one-way backwards, so no duplicates expected. + + elif context is not None and raw_context is not None: + i = context.target_index + if self.is_object_start(raw_context=raw_context, idx=i): + return secrets + + value_secret = self.extract_from_string(pattern=secret_pattern, string=context.lines[i]) + secret_adjust = self.format_reducing_noice_secret(value_secret) + entropy_on_value = self.detect_secret(self.high_entropy_scanners, filename, secret_adjust, line_number, **kwargs) + + if entropy_on_value: + possible_keywords: set = set() + forward_range = range(context.target_index - 1, -1, -1) + backwards_range = range(context.target_index + 1, len(context.lines)) + possible_keywords |= self.get_lines_from_same_object(forward_range, context, raw_context) + possible_keywords |= self.get_lines_from_same_object(backwards_range, context, raw_context) + + for other_value in possible_keywords: + if self.extract_from_string(pattern=value_pattern, string=other_value): + secrets |= entropy_on_value + if secrets: + break + return secrets + + def get_lines_from_same_object(self, search_range, context, raw_context): + possible_keywords = set() + for j in search_range: + line = context.lines[j] + if self.lines_in_same_object(raw_context=raw_context, idx=j) \ + and not self.line_is_comment(line): + possible_keywords.add(raw_context.lines[j]) + if self.is_object_start(raw_context=raw_context, idx=j): + return possible_keywords + return possible_keywords + + @staticmethod + def format_reducing_noice_secret(string): + return json.dumps(string) + + def lines_in_same_object(self, raw_context, idx): + return idx >= 0 \ + and self.lines_same_indentation(raw_context.lines[idx], raw_context.lines[idx+1]) + + @staticmethod + def is_object_start(raw_context, idx) -> bool: + return '-' in re.match(INDENTATION_PATTERN, raw_context.lines[idx]).groups()[0] + + @staticmethod + def line_is_comment(line): + if re.match(COMMENT_PREFIX, line): + return True + return False + + @staticmethod + def extract_from_string(pattern, string) -> str: + for value_regex, group_number in pattern.items(): + match = value_regex.search(string) + if match: + return match.group(group_number) + return '' + + @staticmethod + def lines_same_indentation(line1, line2) -> bool: + indent1 = len(re.match(INDENTATION_PATTERN, line1).groups()[0]) + indent2 = len(re.match(INDENTATION_PATTERN, line2).groups()[0]) + if indent1 == indent2: + return True + return False + + @staticmethod + def detect_secret( + scanners: tuple, + filename: str, + line: str, + line_number: int = 0, + **kwargs: Any, + ) -> set[PotentialSecret]: + for entropy_scanner in scanners: + matches = entropy_scanner.analyze_line(filename, line, line_number, **kwargs) + if matches: + return matches return set() - def analyze_string(self, string: str) -> Generator[str, None, None]: - raise NotImplementedError() From 9fbb699fee7e543a71de9f154941e06bcbf2169b Mon Sep 17 00:00:00 2001 From: marynaKK Date: Thu, 3 Nov 2022 10:22:45 +0200 Subject: [PATCH 2/9] added tests --- tests/secrets/test_plugin.py | 212 +++++++++++++++++++++++++++++++++++ 1 file changed, 212 insertions(+) diff --git a/tests/secrets/test_plugin.py b/tests/secrets/test_plugin.py index ce894eec2f7..ab738e806e1 100644 --- a/tests/secrets/test_plugin.py +++ b/tests/secrets/test_plugin.py @@ -1,7 +1,12 @@ import os import unittest +from detect_secrets.util.code_snippet import CodeSnippet +from detect_secrets.util.filetype import FileType + from checkov.secrets.plugins.entropy_keyword_combinator import EntropyKeywordCombinator +from checkov.secrets.plugins.entropy_keyword_combinator import REGEX_VALUE_KEYWORD_BY_FILETYPE +from checkov.secrets.plugins.entropy_keyword_combinator import REGEX_VALUE_SECRET_BY_FILETYPE class TestCombinatorPlugin(unittest.TestCase): @@ -44,4 +49,211 @@ def test_no_false_positive_yml_1(self): result = self.plugin.analyze_line(file_name, line, i) self.assertEqual(0, len(result)) + def test_are_lines_same_indentation_yml(self): + current_dir = os.path.dirname(os.path.realpath(__file__)) + file_name = "secret.yml" + valid_file_path = current_dir + f"/resources/cfn/{file_name}" + + result = {0: True, 1: False, 2: False, 3: False, 4: False, 5: True, 6: False, 7: True, + 8: True, 9: True, 10: True, 11: False, 12: False, 13: False, 14: False, 15: False, + 16: True, 17: False, 18: False, 19: True, 20: True, 21: True} + with open(file=valid_file_path) as f: + lines = f.readlines() + # assert len(result) == len(lines)-1 + for i in range(len(lines)-1): + result[i] = self.plugin.lines_same_indentation(lines[i], lines[i+1]) + + assert result + + def test_lines_in_same_object_yml(self): + pass + + def test_is_object_start_yml(self): + pass + + def test_line_is_comment_yml(self): + examples = [ + (True, "# comment"), + (True, " # also comment"), + (False, "var: not a comment # comment"), + (False, " - var: a"), + (False, "var: "), + ] + + for ans, line in examples: + assert ans == self.plugin.line_is_comment(line) + + def test_keyword_in_value_pair_yml(self): + # first line is keyword, next line (underneath) is password + context = CodeSnippet( + snippet=[ + 'name: "TEST_SOMETHING"', + 'value: "not-a-real-password"', + 'name: "TEST_PASSWORD_1"', + 'value: "Zmlyc3Rfc2VjcmV0X2hlcmVfd2hvYV9tdWx0aWxsaW5lX3Nob3VsZF93b3JrXzE=="', + 'name: "TEST_PASSWORD_2"', + 'value: "Zmlyc3Rfc2VjcmV0MjIyMjIyX2hlcmVfd2hvYV9tdWx0aWxsaW5lX3Nob3VsZF93b3JrXzI"', + 'name: "TEST_PASSWORD_3"', + 'value: "Z2FlYnJzZGhqa2p1aGdmZHN3cXdnaHluanVraWxvaWtqdWh5Z3RyZmVkd3NlcnR5dWk4bw"', + 'name: "TEST_PASSWORD_4"', + 'value: "Z2FlYnJzZGhqa2p1aGdmZHN3cXdnaHluanVraWxvaWtqdWh5Z3RyZmVkd3NlcnR5dWk4bw"', + 'name: "TEST_PASSWORD_BASE64_LONG_1"' + ], + start_line=112, + target_index=5 + ) + raw_context = CodeSnippet( + snippet=[ + ' - name: TEST_SOMETHING\n', + ' value: not-a-real-password\n', + ' - name: TEST_PASSWORD_1\n', + ' value: Zmlyc3Rfc2VjcmV0X2hlcmVfd2hvYV9tdWx0aWxsaW5lX3Nob3VsZF93b3JrXzE==\n', + ' - name: TEST_PASSWORD_2\n', + ' value: Zmlyc3Rfc2VjcmV0MjIyMjIyX2hlcmVfd2hvYV9tdWx0aWxsaW5lX3Nob3VsZF93b3JrXzI\n', + ' - name: TEST_PASSWORD_3\n', + ' value: Z2FlYnJzZGhqa2p1aGdmZHN3cXdnaHluanVraWxvaWtqdWh5Z3RyZmVkd3NlcnR5dWk4bw\n', + ' - name: TEST_PASSWORD_4\n', + ' value: Z2FlYnJzZGhqa2p1aGdmZHN3cXdnaHluanVraWxvaWtqdWh5Z3RyZmVkd3NlcnR5dWk4bw\n', + ' - name: TEST_PASSWORD_LONG\n' + ], + start_line=112, + target_index=5 + ) + res = self.plugin.analyze_line( + filename="test.yml", + line='value: "Zmlyc3Rfc2VjcmV0MjIyMjIyX2hlcmVfd2hvYV9tdWx0aWxsaW5lX3Nob3VsZF93b3JrXzI"', + line_number=118, + context=context, + raw_context=raw_context + ) + expected_secret_value = 'Zmlyc3Rfc2VjcmV0MjIyMjIyX2hlcmVfd2hvYV9tdWx0aWxsaW5lX3Nob3VsZF93b3JrXzI' + assert res + assert expected_secret_value == res.pop().secret_value + + def test_keyword_in_value_pair_yml2(self): + # first line is password, next line underneath is keyword + context = CodeSnippet( + snippet=[ + '', + 'name: "SOME_NAME"', + 'value: "some_value"', + 'value: "Zo5Zhexnf9TUggdn+zBKGEkmUUvuKzVN+/fKPaMBA4zVyef4irH5H5YfwoC4IqAX0DNoMD12yIF67nIdIMg13atW4WM33eNMfXlE"', + 'name: "TEST_PASSWORD_1"', + 'name1: "TEST_PASSWORD_2"', + 'value1: "1Vab3xejyUlh89P6tUJNXgO4t07DzmomF4tPBwTbwt+sjXHg3G0MPMRpH/I2ho4gS5H3AKJkvJZj87V7/Qnp/rHdbMVYK1F0BX35"', + 'name: "TEST_PASSWORD_3"', + 'value: "PtpfIZR+zZGPUWUYvLojqylVeEg63CBYN0FpGJ4yuH+9YxZZe8Uq7drEoTSfL64kElPEnVJk+H7SZr+wBoxN5qDWsbDmmUS2H76h"', + 'name: "TEST_PASSWORD_4"' + ], + start_line=0, + target_index=4 + ) + raw_context = CodeSnippet( + snippet=[ + 'spec:\n', + ' - name: SOME_NAME\n', + ' value: some_value\n', + ' - value: Zo5Zhexnf9TUggdn+zBKGEkmUUvuKzVN+/fKPaMBA4zVyef4irH5H5YfwoC4IqAX0DNoMD12yIF67nIdIMg13atW4WM33eNMfXlE\n', + ' name: TEST_PASSWORD_1\n', + ' - name1: TEST_PASSWORD_2\n', + ' value1: 1Vab3xejyUlh89P6tUJNXgO4t07DzmomF4tPBwTbwt+sjXHg3G0MPMRpH/I2ho4gS5H3AKJkvJZj87V7/Qnp/rHdbMVYK1F0BX35\n', + ' - name: TEST_PASSWORD_3\n', + ' value: PtpfIZR+zZGPUWUYvLojqylVeEg63CBYN0FpGJ4yuH+9YxZZe8Uq7drEoTSfL64kElPEnVJk+H7SZr+wBoxN5qDWsbDmmUS2H76h\n', + ' - name: TEST_PASSWORD_4\n' + ], + start_line=0, + target_index=4 + ) + res = self.plugin.analyze_line( + filename="test.yml", + line='name: "TEST_PASSWORD_1"', + line_number=5, + context=context, + raw_context=raw_context + ) + + expected_secret_value = 'Zo5Zhexnf9TUggdn+zBKGEkmUUvuKzVN+/fKPaMBA4zVyef4irH5H5YfwoC4IqAX0DNoMD12yIF67nIdIMg13atW4WM33eNMfXlE\\n' + assert res + assert expected_secret_value == res.pop().secret_value + + def test_keyword_in_value_pair_long_password_yml(self): + # first line is keyword, next line (underneath) is a long multiline password + context = CodeSnippet( + snippet=[ + 'name: "TEST_PASSWORD_3"', + 'value: "PtpfIZR+zZGPUWUYvLojqylVeEg63CBYN0FpGJ4yuH+9YxZZe8Uq7drEoTSfL64kElPEnVJk+H7SZr+wBoxN5qDWsbDmmUS2H76h"', + 'name: "TEST_PASSWORD_4"', + 'value: "emDJTiv6H/hP6I8Tmr5+kUdpBIQDrXMwFO7AkmbwROf3rM6uNToJlIJW7H5ApfPmSGU0oWBwflV6Cd9pPu5nEvgxt4YMHZ0SQ85z"', + 'name: "TEST_PASSWORD_LONG_1"', + 'value: "m9+1ONt6FdpnByhlaKDwZ/jjA5gaPzrKY9q5G8cr6kjn092ogigwEOGGryjDqq/NkX1DnKGGG7iduJUJ48+Rv0tgpdVAxwLQuiszRnssmi2ck/Zf1iDFlNQtiE8rvXE6OTCsb6mrpyItLOVnEwsRSpggyRa3KLSuiguiZsK5KyXQ6BsiAclpLvz6QFBQoQkZNxownQrqgLwVwkK1gW0/EEm0m1ylz20ZeLgYO6tRSvKDW0lrgAI7g60F7/eJGv1UqQlxK58T+7u1UX/K11Q69e9jJE+LkQ932eY37U70oVbBVchHwSFKUoffernEaG9XP1tyEpIptPqVpcS2BMpktoR1p1yyWuxC5GsPc2RlPQzEbs3n5lPPnC/uEVu7/cJENSw5+9DzigiHYPz1Cq/p5HedIl5ysn2U2VFgHWekGBYin6ytfmF2Sx+hYqeRd6RcxyU434CXspWQqc330sp9q7vwPQHNecBrvG2Iy7mqVSvaJDnkZ8AN"', + 'name: "TEST_PASSWORD_no_password"', + 'value: "RandomP@ssw0rd"' + ], + start_line=7, + target_index=5 + ) + raw_context = CodeSnippet( + snippet=[ + ' - name: TEST_PASSWORD_3\n', + ' value: PtpfIZR+zZGPUWUYvLojqylVeEg63CBYN0FpGJ4yuH+9YxZZe8Uq7drEoTSfL64kElPEnVJk+H7SZr+wBoxN5qDWsbDmmUS2H76h\n', + ' - name: TEST_PASSWORD_4\n', + ' value: emDJTiv6H/hP6I8Tmr5+kUdpBIQDrXMwFO7AkmbwROf3rM6uNToJlIJW7H5ApfPmSGU0oWBwflV6Cd9pPu5nEvgxt4YMHZ0SQ85z\n', + ' - name: TEST_PASSWORD_LONG_1\n', + ' value: m9+1ONt6FdpnByhlaKDwZ/jjA5gaPzrKY9q5G8cr6kjn092ogigwEOGGryjDqq/NkX1DnKGGG7iduJUJ48+Rv0tgpdVAxwLQuiszRnssmi2ck/Zf1iDFlNQtiE8rvXE6OTCsb6mrpyItLOVnEwsRSpggyRa3KLSuiguiZsK5KyXQ6BsiAclpLvz6QFBQoQkZNxownQrqgLwVwkK1gW0/EEm0m1ylz20ZeLgYO6tRSvKDW0lrgAI7g60F7/eJGv1UqQlxK58T+7u1UX/K11Q69e9jJE+LkQ932eY37U70oVbBVchHwSFKUoffernEaG9XP1tyEpIptPqVpcS2BMpktoR1p1yyWuxC5GsPc2RlPQzEbs3n5lPPnC/uEVu7/cJENSw5+9DzigiHYPz1Cq/p5HedIl5ysn2U2VFgHWekGBYin6ytfmF2Sx+hYqeRd6RcxyU434CXspWQqc330sp9q7vwPQHNecBrvG2Iy7mqVSvaJDnkZ8AN\n', + ' - name: TEST_PASSWORD_no_password\n', + ' value: RandomP@ssw0rd\n' + ], + start_line=112, + target_index=5 + ) + res = self.plugin.analyze_line( + filename="test.yml", + line='value: "Zmlyc3Rfc2VjcmV0MjIyMjIyX2hlcmVfd2hvYV9tdWx0aWxsaW5lX3Nob3VsZF93b3JrXzI"', + line_number=118, + context=context, + raw_context=raw_context + ) + expected_secret_value = 'm9+1ONt6FdpnByhlaKDwZ/jjA5gaPzrKY9q5G8cr6kjn092ogigwEOGGryjDqq/NkX1DnKGGG7iduJUJ48+Rv0tgpdVAxwLQuiszRnssmi2ck/Zf1iDFlNQtiE8rvXE6OTCsb6mrpyItLOVnEwsRSpggyRa3KLSuiguiZsK5KyXQ6BsiAclpLvz6QFBQoQkZNxownQrqgLwVwkK1gW0/EEm0m1ylz20ZeLgYO6tRSvKDW0lrgAI7g60F7/eJGv1UqQlxK58T+7u1UX/K11Q69e9jJE+LkQ932eY37U70oVbBVchHwSFKUoffernEaG9XP1tyEpIptPqVpcS2BMpktoR1p1yyWuxC5GsPc2RlPQzEbs3n5lPPnC/uEVu7/cJENSw5+9DzigiHYPz1Cq/p5HedIl5ysn2U2VFgHWekGBYin6ytfmF2Sx+hYqeRd6RcxyU434CXspWQqc330sp9q7vwPQHNecBrvG2Iy7mqVSvaJDnkZ8AN' + assert res + assert expected_secret_value == res.pop().secret_value + + def test_regex_keyword_in_value_yml(self): + # the regex only finds the relevant part from the keyword that matches, + # the whole keyword is not found by the current regex. + + examples = [ + # (line, keyword) + ( + " - name: TEST_PASSWORD_1\n", + "PASSWORD", + ), + ( + " name: TEST_PASSWORD_1\n", + "PASSWORD", + ) + ] + + keyword_value_regex_to_group = REGEX_VALUE_KEYWORD_BY_FILETYPE.get(FileType.YAML) + value_regex, group_number = keyword_value_regex_to_group.popitem() + for line, secret in examples: + match = value_regex.search(line).group(group_number) + assert match == secret + + def test_regex_secret_in_value_yml(self): + examples = [ + # (line, secret) + ( + " - value: Zmlyc3Rfc2VjcmV0X2hlcmVfd2hvYV9tdWx0aWxsaW5lX3Nob3VsZF93b3JrXzE==\n", + "Zmlyc3Rfc2VjcmV0X2hlcmVfd2hvYV9tdWx0aWxsaW5lX3Nob3VsZF93b3JrXzE==\n", + ), + ( + " value: Zmlyc3Rfc2VjcmV0X2hlcmVfd2hvYV9tdWx0aWxsaW5lX3Nob3VsZF93b3JrXzE==\n", + "Zmlyc3Rfc2VjcmV0X2hlcmVfd2hvYV9tdWx0aWxsaW5lX3Nob3VsZF93b3JrXzE==\n", + ), + ] + secret_value_regex_to_group = REGEX_VALUE_SECRET_BY_FILETYPE.get(FileType.YAML) + value_regex, group_number = secret_value_regex_to_group.popitem() + for line, secret in examples: + match = value_regex.search(line).group(group_number) + assert match == secret From fd45e8a5f833d6b359c9e2715a759843cf04e3e3 Mon Sep 17 00:00:00 2001 From: marynaKK Date: Thu, 3 Nov 2022 10:26:55 +0200 Subject: [PATCH 3/9] added test file --- .../resources/cfn/test-multiline-secrets.yml | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 tests/secrets/resources/cfn/test-multiline-secrets.yml diff --git a/tests/secrets/resources/cfn/test-multiline-secrets.yml b/tests/secrets/resources/cfn/test-multiline-secrets.yml new file mode 100644 index 00000000000..40d9b818582 --- /dev/null +++ b/tests/secrets/resources/cfn/test-multiline-secrets.yml @@ -0,0 +1,19 @@ +spec: + - name: SOME_NAME + value: some_value + - value: Zo5Zhexnf9TUggdn+zBKGEkmUUvuKzVN+/fKPaMBA4zVyef4irH5H5YfwoC4IqAX0DNoMD12yIF67nIdIMg13atW4WM33eNMfXlE + name: TEST_PASSWORD_1 + - name1: TEST_PASSWORD_2 + value1: 1Vab3xejyUlh89P6tUJNXgO4t07DzmomF4tPBwTbwt+sjXHg3G0MPMRpH/I2ho4gS5H3AKJkvJZj87V7/Qnp/rHdbMVYK1F0BX35 + - name: TEST_PASSWORD_3 + value: PtpfIZR+zZGPUWUYvLojqylVeEg63CBYN0FpGJ4yuH+9YxZZe8Uq7drEoTSfL64kElPEnVJk+H7SZr+wBoxN5qDWsbDmmUS2H76h + - name: TEST_PASSWORD_4 + value: emDJTiv6H/hP6I8Tmr5+kUdpBIQDrXMwFO7AkmbwROf3rM6uNToJlIJW7H5ApfPmSGU0oWBwflV6Cd9pPu5nEvgxt4YMHZ0SQ85z + - name: TEST_PASSWORD_LONG_1 + value: m9+1ONt6FdpnByhlaKDwZ/jjA5gaPzrKY9q5G8cr6kjn092ogigwEOGGryjDqq/NkX1DnKGGG7iduJUJ48+Rv0tgpdVAxwLQuiszRnssmi2ck + /Zf1iDFlNQtiE8rvXE6OTCsb6mrpyItLOVnEwsRSpggyRa3KLSuiguiZsK5KyXQ6BsiAclpLvz6QFBQoQkZNxownQrqgLwVwkK1gW0/EEm0m1ylz + 20ZeLgYO6tRSvKDW0lrgAI7g60F7/eJGv1UqQlxK58T+7u1UX/K11Q69e9jJE+LkQ932eY37U70oVbBVchHwSFKUoffernEaG9XP1tyEpIptPqVpcS + 2BMpktoR1p1yyWuxC5GsPc2RlPQzEbs3n5lPPnC/uEVu7/cJENSw5+9DzigiHYPz1Cq/p5HedIl5ysn2U2VFgHWekGBYin6ytfmF2Sx+hYqeRd6Rcxy + U434CXspWQqc330sp9q7vwPQHNecBrvG2Iy7mqVSvaJDnkZ8AN + - name: TEST_PASSWORD_no_password + value: RandomP@ssw0rd From f8cbffedf438d01995c86929b850359ed1810fb6 Mon Sep 17 00:00:00 2001 From: marynaKK Date: Thu, 3 Nov 2022 10:52:54 +0200 Subject: [PATCH 4/9] fixed bug + testing --- .../plugins/entropy_keyword_combinator.py | 2 - .../resources/cfn/test-multiline-secrets.yml | 8 +-- tests/secrets/test_plugin.py | 65 +++++++++++-------- 3 files changed, 40 insertions(+), 35 deletions(-) diff --git a/checkov/secrets/plugins/entropy_keyword_combinator.py b/checkov/secrets/plugins/entropy_keyword_combinator.py index f4d6f2be6c6..d4fbd8c83c7 100644 --- a/checkov/secrets/plugins/entropy_keyword_combinator.py +++ b/checkov/secrets/plugins/entropy_keyword_combinator.py @@ -153,8 +153,6 @@ def analyze_iac_line( elif context is not None and raw_context is not None: i = context.target_index - if self.is_object_start(raw_context=raw_context, idx=i): - return secrets value_secret = self.extract_from_string(pattern=secret_pattern, string=context.lines[i]) secret_adjust = self.format_reducing_noice_secret(value_secret) diff --git a/tests/secrets/resources/cfn/test-multiline-secrets.yml b/tests/secrets/resources/cfn/test-multiline-secrets.yml index 40d9b818582..50900358144 100644 --- a/tests/secrets/resources/cfn/test-multiline-secrets.yml +++ b/tests/secrets/resources/cfn/test-multiline-secrets.yml @@ -3,17 +3,13 @@ spec: value: some_value - value: Zo5Zhexnf9TUggdn+zBKGEkmUUvuKzVN+/fKPaMBA4zVyef4irH5H5YfwoC4IqAX0DNoMD12yIF67nIdIMg13atW4WM33eNMfXlE name: TEST_PASSWORD_1 - - name1: TEST_PASSWORD_2 + name1: TEST_PASSWORD_2 value1: 1Vab3xejyUlh89P6tUJNXgO4t07DzmomF4tPBwTbwt+sjXHg3G0MPMRpH/I2ho4gS5H3AKJkvJZj87V7/Qnp/rHdbMVYK1F0BX35 - name: TEST_PASSWORD_3 value: PtpfIZR+zZGPUWUYvLojqylVeEg63CBYN0FpGJ4yuH+9YxZZe8Uq7drEoTSfL64kElPEnVJk+H7SZr+wBoxN5qDWsbDmmUS2H76h - name: TEST_PASSWORD_4 value: emDJTiv6H/hP6I8Tmr5+kUdpBIQDrXMwFO7AkmbwROf3rM6uNToJlIJW7H5ApfPmSGU0oWBwflV6Cd9pPu5nEvgxt4YMHZ0SQ85z - name: TEST_PASSWORD_LONG_1 - value: m9+1ONt6FdpnByhlaKDwZ/jjA5gaPzrKY9q5G8cr6kjn092ogigwEOGGryjDqq/NkX1DnKGGG7iduJUJ48+Rv0tgpdVAxwLQuiszRnssmi2ck - /Zf1iDFlNQtiE8rvXE6OTCsb6mrpyItLOVnEwsRSpggyRa3KLSuiguiZsK5KyXQ6BsiAclpLvz6QFBQoQkZNxownQrqgLwVwkK1gW0/EEm0m1ylz - 20ZeLgYO6tRSvKDW0lrgAI7g60F7/eJGv1UqQlxK58T+7u1UX/K11Q69e9jJE+LkQ932eY37U70oVbBVchHwSFKUoffernEaG9XP1tyEpIptPqVpcS - 2BMpktoR1p1yyWuxC5GsPc2RlPQzEbs3n5lPPnC/uEVu7/cJENSw5+9DzigiHYPz1Cq/p5HedIl5ysn2U2VFgHWekGBYin6ytfmF2Sx+hYqeRd6Rcxy - U434CXspWQqc330sp9q7vwPQHNecBrvG2Iy7mqVSvaJDnkZ8AN + value: m9+1ONt6FdpnByhlaKDwZ/jjA5gaPzrKY9q5G8cr6kjn092ogigwEOGGryjDqq/NkX1DnKGGG7iduJUJ48+Rv0tgpdVAxwLQuiszRnssmi2ck/Zf1iDFlNQtiE8rvXE6OTCsb6mrpyItLOVnEwsRSpggyRa3KLSuiguiZsK5KyXQ6BsiAclpLvz6QFBQoQkZNxownQrqgLwVwkK1gW0/EEm0m1ylz20ZeLgYO6tRSvKDW0lrgAI7g60F7/eJGv1UqQlxK58T+7u1UX/K11Q69e9jJE+LkQ932eY37U70oVbBVchHwSFKUoffernEaG9XP1tyEpIptPqVpcS2BMpktoR1p1yyWuxC5GsPc2RlPQzEbs3n5lPPnC/uEVu7/cJENSw5+9DzigiHYPz1Cq/p5HedIl5ysn2U2VFgHWekGBYin6ytfmF2Sx+hYqeRd6RcxyU434CXspWQqc330sp9q7vwPQHNecBrvG2Iy7mqVSvaJDnkZ8AN - name: TEST_PASSWORD_no_password value: RandomP@ssw0rd diff --git a/tests/secrets/test_plugin.py b/tests/secrets/test_plugin.py index ab738e806e1..7cf3120e6d8 100644 --- a/tests/secrets/test_plugin.py +++ b/tests/secrets/test_plugin.py @@ -1,12 +1,15 @@ +import logging import os import unittest from detect_secrets.util.code_snippet import CodeSnippet from detect_secrets.util.filetype import FileType +from checkov.runner_filter import RunnerFilter from checkov.secrets.plugins.entropy_keyword_combinator import EntropyKeywordCombinator from checkov.secrets.plugins.entropy_keyword_combinator import REGEX_VALUE_KEYWORD_BY_FILETYPE from checkov.secrets.plugins.entropy_keyword_combinator import REGEX_VALUE_SECRET_BY_FILETYPE +from checkov.secrets.runner import Runner class TestCombinatorPlugin(unittest.TestCase): @@ -65,12 +68,6 @@ def test_are_lines_same_indentation_yml(self): assert result - def test_lines_in_same_object_yml(self): - pass - - def test_is_object_start_yml(self): - pass - def test_line_is_comment_yml(self): examples = [ (True, "# comment"), @@ -135,44 +132,46 @@ def test_keyword_in_value_pair_yml2(self): context = CodeSnippet( snippet=[ '', - 'name: "SOME_NAME"', - 'value: "some_value"', + '', + '', + '', + '', 'value: "Zo5Zhexnf9TUggdn+zBKGEkmUUvuKzVN+/fKPaMBA4zVyef4irH5H5YfwoC4IqAX0DNoMD12yIF67nIdIMg13atW4WM33eNMfXlE"', 'name: "TEST_PASSWORD_1"', 'name1: "TEST_PASSWORD_2"', 'value1: "1Vab3xejyUlh89P6tUJNXgO4t07DzmomF4tPBwTbwt+sjXHg3G0MPMRpH/I2ho4gS5H3AKJkvJZj87V7/Qnp/rHdbMVYK1F0BX35"', 'name: "TEST_PASSWORD_3"', - 'value: "PtpfIZR+zZGPUWUYvLojqylVeEg63CBYN0FpGJ4yuH+9YxZZe8Uq7drEoTSfL64kElPEnVJk+H7SZr+wBoxN5qDWsbDmmUS2H76h"', - 'name: "TEST_PASSWORD_4"' + 'value: "PtpfIZR+zZGPUWUYvLojqylVeEg63CBYN0FpGJ4yuH+9YxZZe8Uq7drEoTSfL64kElPEnVJk+H7SZr+wBoxN5qDWsbDmmUS2H76h"' ], - start_line=0, - target_index=4 + start_line=5, + target_index=5 ) raw_context = CodeSnippet( snippet=[ - 'spec:\n', - ' - name: SOME_NAME\n', - ' value: some_value\n', - ' - value: Zo5Zhexnf9TUggdn+zBKGEkmUUvuKzVN+/fKPaMBA4zVyef4irH5H5YfwoC4IqAX0DNoMD12yIF67nIdIMg13atW4WM33eNMfXlE\n', - ' name: TEST_PASSWORD_1\n', - ' - name1: TEST_PASSWORD_2\n', - ' value1: 1Vab3xejyUlh89P6tUJNXgO4t07DzmomF4tPBwTbwt+sjXHg3G0MPMRpH/I2ho4gS5H3AKJkvJZj87V7/Qnp/rHdbMVYK1F0BX35\n', - ' - name: TEST_PASSWORD_3\n', - ' value: PtpfIZR+zZGPUWUYvLojqylVeEg63CBYN0FpGJ4yuH+9YxZZe8Uq7drEoTSfL64kElPEnVJk+H7SZr+wBoxN5qDWsbDmmUS2H76h\n', - ' - name: TEST_PASSWORD_4\n' + '#\n', + '#\n', + 'spec:\n', + ' - name: SOME_NAME\n', + ' value: some_value\n', + ' value: Zo5Zhexnf9TUggdn+zBKGEkmUUvuKzVN+/fKPaMBA4zVyef4irH5H5YfwoC4IqAX0DNoMD12yIF67nIdIMg13atW4WM33eNMfXlE\n', + ' name: TEST_PASSWORD_1\n', + ' - name1: TEST_PASSWORD_2\n', + ' value1: 1Vab3xejyUlh89P6tUJNXgO4t07DzmomF4tPBwTbwt+sjXHg3G0MPMRpH/I2ho4gS5H3AKJkvJZj87V7/Qnp/rHdbMVYK1F0BX35\n', + ' name: TEST_PASSWORD_3\n', + ' value: PtpfIZR+zZGPUWUYvLojqylVeEg63CBYN0FpGJ4yuH+9YxZZe8Uq7drEoTSfL64kElPEnVJk+H7SZr+wBoxN5qDWsbDmmUS2H76h\n' ], - start_line=0, - target_index=4 + start_line=5, + target_index=5 ) res = self.plugin.analyze_line( filename="test.yml", - line='name: "TEST_PASSWORD_1"', - line_number=5, + line='value: "Zo5Zhexnf9TUggdn+zBKGEkmUUvuKzVN+/fKPaMBA4zVyef4irH5H5YfwoC4IqAX0DNoMD12yIF67nIdIMg13atW4WM33eNMfXlE"', + line_number=11, context=context, raw_context=raw_context ) - expected_secret_value = 'Zo5Zhexnf9TUggdn+zBKGEkmUUvuKzVN+/fKPaMBA4zVyef4irH5H5YfwoC4IqAX0DNoMD12yIF67nIdIMg13atW4WM33eNMfXlE\\n' + expected_secret_value = 'Zo5Zhexnf9TUggdn+zBKGEkmUUvuKzVN+/fKPaMBA4zVyef4irH5H5YfwoC4IqAX0DNoMD12yIF67nIdIMg13atW4WM33eNMfXlE' assert res assert expected_secret_value == res.pop().secret_value @@ -217,6 +216,18 @@ def test_keyword_in_value_pair_long_password_yml(self): assert res assert expected_secret_value == res.pop().secret_value + def test_multiline_keyword_password_report(self): + current_dir = os.path.dirname(os.path.realpath(__file__)) + file_name = "test-multiline-secrets.yml" + valid_file_path = current_dir + f"/resources/cfn/{file_name}" + + runner = Runner() + report = runner.run(root_folder=None, files=[valid_file_path], runner_filter=RunnerFilter(framework=['secrets'])) + self.assertEqual(len(report.failed_checks), 5) + self.assertEqual(report.parsing_errors, []) + self.assertEqual(report.passed_checks, []) + self.assertEqual(report.skipped_checks, []) + def test_regex_keyword_in_value_yml(self): # the regex only finds the relevant part from the keyword that matches, # the whole keyword is not found by the current regex. From 55946302209531b3ca2decf555f5b2d6ce2a9ea4 Mon Sep 17 00:00:00 2001 From: marynaKK Date: Thu, 3 Nov 2022 11:45:24 +0200 Subject: [PATCH 5/9] modified tests + 1st fix lint/mypy --- .../plugins/entropy_keyword_combinator.py | 70 +++++++++++++------ tests/secrets/test_plugin.py | 2 +- .../test-multiline-secrets.yml | 0 3 files changed, 49 insertions(+), 23 deletions(-) rename tests/secrets/{resources/cfn => yml_multiline}/test-multiline-secrets.yml (100%) diff --git a/checkov/secrets/plugins/entropy_keyword_combinator.py b/checkov/secrets/plugins/entropy_keyword_combinator.py index d4fbd8c83c7..575872611bf 100644 --- a/checkov/secrets/plugins/entropy_keyword_combinator.py +++ b/checkov/secrets/plugins/entropy_keyword_combinator.py @@ -1,12 +1,11 @@ from __future__ import annotations -import logging import json import re from typing import Generator +from typing import Optional from typing import Any from typing import TYPE_CHECKING -from typing import Optional from typing import Pattern from detect_secrets.plugins.high_entropy_strings import Base64HighEntropyString @@ -111,8 +110,8 @@ def analyze_line( ) -> set[PotentialSecret]: is_iac = f".{filename.split('.')[-1]}" not in SOURCE_CODE_EXTENSION filetype = determine_file_type(filename) - value_keyword_regex_to_group = REGEX_VALUE_KEYWORD_BY_FILETYPE.get(filetype, FileType.YAML) - secret_keyword_regex_to_group = REGEX_VALUE_SECRET_BY_FILETYPE.get(filetype, FileType.YAML) + value_keyword_regex_to_group = REGEX_VALUE_KEYWORD_BY_FILETYPE.get(filetype, None) + secret_keyword_regex_to_group = REGEX_VALUE_SECRET_BY_FILETYPE.get(filetype, None) if len(line) <= MAX_LINE_LENGTH: if is_iac: @@ -137,8 +136,8 @@ def analyze_iac_line( line_number: int = 0, context: CodeSnippet | None = None, raw_context: CodeSnippet | None = None, - value_pattern: Optional[dict[Pattern, int]] = None, - secret_pattern: Optional[dict[Pattern, int]] = None, + value_pattern: Optional[dict[Pattern[str], int]] = None, + secret_pattern: Optional[dict[Pattern[str], int]] = None, **kwargs: Any, ) -> set[PotentialSecret]: secrets = set() @@ -159,7 +158,7 @@ def analyze_iac_line( entropy_on_value = self.detect_secret(self.high_entropy_scanners, filename, secret_adjust, line_number, **kwargs) if entropy_on_value: - possible_keywords: set = set() + possible_keywords: set[str] = set() forward_range = range(context.target_index - 1, -1, -1) backwards_range = range(context.target_index + 1, len(context.lines)) possible_keywords |= self.get_lines_from_same_object(forward_range, context, raw_context) @@ -172,8 +171,15 @@ def analyze_iac_line( break return secrets - def get_lines_from_same_object(self, search_range, context, raw_context): - possible_keywords = set() + def get_lines_from_same_object( + self, + search_range: range, + context: CodeSnippet | None, + raw_context: CodeSnippet | None + ) -> set[str]: + possible_keywords: set[str] = set() + if not context or not raw_context: + return possible_keywords for j in search_range: line = context.lines[j] if self.lines_in_same_object(raw_context=raw_context, idx=j) \ @@ -184,25 +190,40 @@ def get_lines_from_same_object(self, search_range, context, raw_context): return possible_keywords @staticmethod - def format_reducing_noice_secret(string): + def format_reducing_noice_secret(string: str) -> str: return json.dumps(string) - def lines_in_same_object(self, raw_context, idx): - return idx >= 0 \ - and self.lines_same_indentation(raw_context.lines[idx], raw_context.lines[idx+1]) + def lines_in_same_object( + self, + raw_context: CodeSnippet | None, + idx: int + ) -> bool: + if not raw_context: + return False # could not know + return idx >= 0 and self.lines_same_indentation(raw_context.lines[idx], raw_context.lines[idx + 1]) @staticmethod - def is_object_start(raw_context, idx) -> bool: - return '-' in re.match(INDENTATION_PATTERN, raw_context.lines[idx]).groups()[0] + def is_object_start( + raw_context: CodeSnippet | None, + idx: int + ) -> bool: + if not raw_context: + return False # could not know + match = re.match(INDENTATION_PATTERN, raw_context.lines[idx]) + if match: + return '-' in match.groups()[0] + return False @staticmethod - def line_is_comment(line): + def line_is_comment(line: str) -> bool: if re.match(COMMENT_PREFIX, line): return True return False @staticmethod - def extract_from_string(pattern, string) -> str: + def extract_from_string(pattern: Optional[dict[Pattern[str], int]], string: str) -> str: + if not pattern: + return '' for value_regex, group_number in pattern.items(): match = value_regex.search(string) if match: @@ -210,16 +231,22 @@ def extract_from_string(pattern, string) -> str: return '' @staticmethod - def lines_same_indentation(line1, line2) -> bool: - indent1 = len(re.match(INDENTATION_PATTERN, line1).groups()[0]) - indent2 = len(re.match(INDENTATION_PATTERN, line2).groups()[0]) + def lines_same_indentation(line1: str, line2: str) -> bool: + match1 = re.match(INDENTATION_PATTERN, line1) + match2 = re.match(INDENTATION_PATTERN, line2) + if not match1 and not match2: + return True + if not match1 or not match2: + return False + indent1 = len(match1.groups()[0]) + indent2 = len(match2.groups()[0]) if indent1 == indent2: return True return False @staticmethod def detect_secret( - scanners: tuple, + scanners: tuple[Base64HighEntropyString, HexHighEntropyString], filename: str, line: str, line_number: int = 0, @@ -230,4 +257,3 @@ def detect_secret( if matches: return matches return set() - diff --git a/tests/secrets/test_plugin.py b/tests/secrets/test_plugin.py index 7cf3120e6d8..24b49d587cd 100644 --- a/tests/secrets/test_plugin.py +++ b/tests/secrets/test_plugin.py @@ -219,7 +219,7 @@ def test_keyword_in_value_pair_long_password_yml(self): def test_multiline_keyword_password_report(self): current_dir = os.path.dirname(os.path.realpath(__file__)) file_name = "test-multiline-secrets.yml" - valid_file_path = current_dir + f"/resources/cfn/{file_name}" + valid_file_path = current_dir + f"/yml_multiline/{file_name}" runner = Runner() report = runner.run(root_folder=None, files=[valid_file_path], runner_filter=RunnerFilter(framework=['secrets'])) diff --git a/tests/secrets/resources/cfn/test-multiline-secrets.yml b/tests/secrets/yml_multiline/test-multiline-secrets.yml similarity index 100% rename from tests/secrets/resources/cfn/test-multiline-secrets.yml rename to tests/secrets/yml_multiline/test-multiline-secrets.yml From b0e293408c2b0d9be6fc1178a9e19fe7c069d4cf Mon Sep 17 00:00:00 2001 From: marynaKK Date: Thu, 3 Nov 2022 11:55:32 +0200 Subject: [PATCH 6/9] changed to Nimrod suggestions --- .../plugins/entropy_keyword_combinator.py | 49 +++++++++---------- tests/secrets/test_plugin.py | 6 ++- 2 files changed, 29 insertions(+), 26 deletions(-) diff --git a/checkov/secrets/plugins/entropy_keyword_combinator.py b/checkov/secrets/plugins/entropy_keyword_combinator.py index 575872611bf..c92f458cb16 100644 --- a/checkov/secrets/plugins/entropy_keyword_combinator.py +++ b/checkov/secrets/plugins/entropy_keyword_combinator.py @@ -31,8 +31,8 @@ ENTROPY_KEYWORD_COMBINATOR_LIMIT = 3 ENTROPY_KEYWORD_LIMIT = 4.5 -INDENTATION_PATTERN = re.compile(r'(^[\r\t\f\v ]*(?:-?[\r\t\f\v ]+)?)') -COMMENT_PREFIX = re.compile(r'^[\r\t\f\v ]*#') +INDENTATION_PATTERN = re.compile(r'(^\s*(?:-?\s+)?)') +COMMENT_PREFIX = re.compile(r'^[\s]*(#|\/\/)') DENY_LIST_REGEX = r'|'.join(DENYLIST) # Support for suffix after keyword i.e. password_secure = "value" @@ -115,21 +115,29 @@ def analyze_line( if len(line) <= MAX_LINE_LENGTH: if is_iac: - return self.analyze_iac_line( - filename=filename, - line=line, - line_number=line_number, - context=context, - raw_context=raw_context, - value_pattern=value_keyword_regex_to_group, - secret_pattern=secret_keyword_regex_to_group, - kwargs=kwargs - ) + # classic key-value pair + keyword_on_key = self.keyword_scanner.analyze_line(filename, line, line_number, **kwargs) + if keyword_on_key: + return self.detect_secret(self.high_entropy_scanners_iac, filename, line, line_number, **kwargs) + + # not so classic key-value pair, from multiline, that is only in an array format. + # The scan is one-way backwards, so no duplicates expected. + elif filetype == filetype.YAML: + return self.analyze_iac_line_yml( + filename=filename, + line=line, + line_number=line_number, + context=context, + raw_context=raw_context, + value_pattern=value_keyword_regex_to_group, + secret_pattern=secret_keyword_regex_to_group, + kwargs=kwargs + ) else: return self.detect_secret(self.high_entropy_scanners, filename, line, line_number, **kwargs) return set() - def analyze_iac_line( + def analyze_iac_line_yml( self, filename: str, line: str, @@ -141,20 +149,11 @@ def analyze_iac_line( **kwargs: Any, ) -> set[PotentialSecret]: secrets = set() - - # classic key-value pair - keyword_on_key = self.keyword_scanner.analyze_line(filename, line, line_number, **kwargs) - if keyword_on_key: - return self.detect_secret(self.high_entropy_scanners_iac, filename, line, line_number, **kwargs) - - # not so classic key-value pair, from multiline, that is only in an array format. - # The scan is one-way backwards, so no duplicates expected. - - elif context is not None and raw_context is not None: + if context is not None and raw_context is not None: i = context.target_index value_secret = self.extract_from_string(pattern=secret_pattern, string=context.lines[i]) - secret_adjust = self.format_reducing_noice_secret(value_secret) + secret_adjust = self.format_reducing_noise_secret(value_secret) entropy_on_value = self.detect_secret(self.high_entropy_scanners, filename, secret_adjust, line_number, **kwargs) if entropy_on_value: @@ -190,7 +189,7 @@ def get_lines_from_same_object( return possible_keywords @staticmethod - def format_reducing_noice_secret(string: str) -> str: + def format_reducing_noise_secret(string: str) -> str: return json.dumps(string) def lines_in_same_object( diff --git a/tests/secrets/test_plugin.py b/tests/secrets/test_plugin.py index 24b49d587cd..ddaedf36285 100644 --- a/tests/secrets/test_plugin.py +++ b/tests/secrets/test_plugin.py @@ -72,7 +72,11 @@ def test_line_is_comment_yml(self): examples = [ (True, "# comment"), (True, " # also comment"), - (False, "var: not a comment # comment"), + (True, "// nice comment here"), + (True, "//and nice comment here2"), + (True, " // commenting with checkov and having fun"), + (False, "var: a //this is not a comment"), + (False, "var: not a comment # comment"), (False, " - var: a"), (False, "var: "), ] From 9eb764658492c8e3ff9ebf214b10ee1611681d9c Mon Sep 17 00:00:00 2001 From: marynaKK Date: Thu, 3 Nov 2022 12:00:41 +0200 Subject: [PATCH 7/9] fix typo --- .../plugins/entropy_keyword_combinator.py | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/checkov/secrets/plugins/entropy_keyword_combinator.py b/checkov/secrets/plugins/entropy_keyword_combinator.py index c92f458cb16..e6557304495 100644 --- a/checkov/secrets/plugins/entropy_keyword_combinator.py +++ b/checkov/secrets/plugins/entropy_keyword_combinator.py @@ -122,17 +122,17 @@ def analyze_line( # not so classic key-value pair, from multiline, that is only in an array format. # The scan is one-way backwards, so no duplicates expected. - elif filetype == filetype.YAML: + elif filetype == FileType.YAML: return self.analyze_iac_line_yml( - filename=filename, - line=line, - line_number=line_number, - context=context, - raw_context=raw_context, - value_pattern=value_keyword_regex_to_group, - secret_pattern=secret_keyword_regex_to_group, - kwargs=kwargs - ) + filename=filename, + line=line, + line_number=line_number, + context=context, + raw_context=raw_context, + value_pattern=value_keyword_regex_to_group, + secret_pattern=secret_keyword_regex_to_group, + kwargs=kwargs + ) else: return self.detect_secret(self.high_entropy_scanners, filename, line, line_number, **kwargs) return set() From e2cfe054ed1267b7a3bfa0b841adf9ef3030ce80 Mon Sep 17 00:00:00 2001 From: marynaKK Date: Thu, 3 Nov 2022 13:03:12 +0200 Subject: [PATCH 8/9] fix bug access idx in context --- checkov/secrets/plugins/entropy_keyword_combinator.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/checkov/secrets/plugins/entropy_keyword_combinator.py b/checkov/secrets/plugins/entropy_keyword_combinator.py index e6557304495..cc4b7132124 100644 --- a/checkov/secrets/plugins/entropy_keyword_combinator.py +++ b/checkov/secrets/plugins/entropy_keyword_combinator.py @@ -199,7 +199,8 @@ def lines_in_same_object( ) -> bool: if not raw_context: return False # could not know - return idx >= 0 and self.lines_same_indentation(raw_context.lines[idx], raw_context.lines[idx + 1]) + return 0 <= idx < len(raw_context.lines) and 0 <= idx + 1 < len(raw_context.lines)\ + and self.lines_same_indentation(raw_context.lines[idx], raw_context.lines[idx + 1]) @staticmethod def is_object_start( From 1021fa87dc27696a13e162d08fd485ff7bd8d23d Mon Sep 17 00:00:00 2001 From: marynaKK Date: Thu, 3 Nov 2022 13:15:49 +0200 Subject: [PATCH 9/9] applied Anton suggestions --- .../plugins/entropy_keyword_combinator.py | 38 +++++++++++++------ 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/checkov/secrets/plugins/entropy_keyword_combinator.py b/checkov/secrets/plugins/entropy_keyword_combinator.py index cc4b7132124..af10ac8ccb9 100644 --- a/checkov/secrets/plugins/entropy_keyword_combinator.py +++ b/checkov/secrets/plugins/entropy_keyword_combinator.py @@ -3,7 +3,6 @@ import json import re from typing import Generator -from typing import Optional from typing import Any from typing import TYPE_CHECKING from typing import Pattern @@ -97,7 +96,7 @@ def __init__(self, limit: float = ENTROPY_KEYWORD_LIMIT) -> None: self.keyword_scanner = KeywordDetector() def analyze_string(self, string: str) -> Generator[str, None, None]: - raise NotImplementedError() + pass def analyze_line( self, @@ -118,7 +117,13 @@ def analyze_line( # classic key-value pair keyword_on_key = self.keyword_scanner.analyze_line(filename, line, line_number, **kwargs) if keyword_on_key: - return self.detect_secret(self.high_entropy_scanners_iac, filename, line, line_number, **kwargs) + return self.detect_secret( + scanners=self.high_entropy_scanners_iac, + filename=filename, + line=line, + line_number=line_number, + kwargs=kwargs + ) # not so classic key-value pair, from multiline, that is only in an array format. # The scan is one-way backwards, so no duplicates expected. @@ -134,7 +139,13 @@ def analyze_line( kwargs=kwargs ) else: - return self.detect_secret(self.high_entropy_scanners, filename, line, line_number, **kwargs) + return self.detect_secret( + scanners=self.high_entropy_scanners, + filename=filename, + line=line, + line_number=line_number, + kwargs=kwargs + ) return set() def analyze_iac_line_yml( @@ -144,17 +155,21 @@ def analyze_iac_line_yml( line_number: int = 0, context: CodeSnippet | None = None, raw_context: CodeSnippet | None = None, - value_pattern: Optional[dict[Pattern[str], int]] = None, - secret_pattern: Optional[dict[Pattern[str], int]] = None, + value_pattern: dict[Pattern[str], int] | None = None, + secret_pattern: dict[Pattern[str], int] | None = None, **kwargs: Any, ) -> set[PotentialSecret]: secrets = set() if context is not None and raw_context is not None: - i = context.target_index - - value_secret = self.extract_from_string(pattern=secret_pattern, string=context.lines[i]) + value_secret = self.extract_from_string(pattern=secret_pattern, string=context.target_line) secret_adjust = self.format_reducing_noise_secret(value_secret) - entropy_on_value = self.detect_secret(self.high_entropy_scanners, filename, secret_adjust, line_number, **kwargs) + entropy_on_value = self.detect_secret( + scanners=self.high_entropy_scanners, + filename=filename, + line=secret_adjust, + line_number=line_number, + kwargs=kwargs + ) if entropy_on_value: possible_keywords: set[str] = set() @@ -166,7 +181,6 @@ def analyze_iac_line_yml( for other_value in possible_keywords: if self.extract_from_string(pattern=value_pattern, string=other_value): secrets |= entropy_on_value - if secrets: break return secrets @@ -221,7 +235,7 @@ def line_is_comment(line: str) -> bool: return False @staticmethod - def extract_from_string(pattern: Optional[dict[Pattern[str], int]], string: str) -> str: + def extract_from_string(pattern: dict[Pattern[str], int] | None, string: str) -> str: if not pattern: return '' for value_regex, group_number in pattern.items():