Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(secrets): Add Terraform multiline secrets handling #3907

Merged
merged 6 commits into from
Nov 22, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 11 additions & 2 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,7 @@ jobs:
steps:
- uses: actions/checkout@93ea575cb5d8a053eaa0ac8fa3b40d7e05a33cc8 # v3
- uses: sigstore/cosign-installer@9becc617647dfa20ae7b1151972e9b3a2c338a2b # v2
- uses: docker/setup-buildx-action@8c0edbc76e98fa90f69d9a2c020dcb50019dc325 # v2 # needed for self-hosted builds
- name: Get release version
id: versions
run: |
Expand All @@ -269,12 +270,20 @@ jobs:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Build and push Docker image
id: docker_push
- name: Build and export image to Docker
# buildx changes the driver to 'docker-container' which doesn't expose the image to the host,
# so it is built and loaded to Docker and in the next step pushed to the registry
uses: docker/build-push-action@c56af957549030174b10d6867f20e78cfd7debc5 # v3
with:
context: .
no-cache: true
load: true
tags: ${{ env.DH_IMAGE_NAME }}:latest
- name: Push Docker image
id: docker_push
uses: docker/build-push-action@c56af957549030174b10d6867f20e78cfd7debc5 # v3
with:
context: .
push: true
tags: |
${{ env.DH_IMAGE_NAME }}:latest
Expand Down
Empty file.
Empty file.
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
if TYPE_CHECKING:
from detect_secrets.util.code_snippet import CodeSnippet

from checkov.common.parsers.multiline_parser import BaseMultiLineParser
from checkov.secrets.parsers.multiline_parser import BaseMultiLineParser


START_OBJ_END_OF_LINE = r'({\s*}?\s*,?\s*$)'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@


class BaseMultiLineParser(ABC):

def get_lines_from_same_object(
self,
search_range: range,
Expand Down
36 changes: 36 additions & 0 deletions checkov/secrets/parsers/single_line_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from __future__ import annotations

from abc import ABC, abstractmethod
from typing import Any, TYPE_CHECKING


if TYPE_CHECKING:
from detect_secrets.core.potential_secret import PotentialSecret
from detect_secrets.plugins.high_entropy_strings import Base64HighEntropyString, HexHighEntropyString
from detect_secrets.util.code_snippet import CodeSnippet


class BaseSingleLineParser(ABC):
def detect_secret(
self,
scanners: tuple[Base64HighEntropyString, HexHighEntropyString],
filename: str,
raw_context: CodeSnippet | None,
line: str,
line_number: int = 0,
**kwargs: Any,
) -> set[PotentialSecret]:
for entropy_scanner in scanners:
matches = entropy_scanner.analyze_line(filename, line, line_number, **kwargs)
if matches:
if raw_context and self.ignore_secret(raw_context=raw_context):
return set()

return matches
return set()

@abstractmethod
def ignore_secret(self, raw_context: CodeSnippet) -> bool:
"""Check for false-positive secrets by leveraging the context"""

pass
Empty file.
37 changes: 37 additions & 0 deletions checkov/secrets/parsers/terraform/multiline_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
from __future__ import annotations

import re
from typing import TYPE_CHECKING

from checkov.secrets.parsers.multiline_parser import BaseMultiLineParser

if TYPE_CHECKING:
from detect_secrets.util.code_snippet import CodeSnippet

START_OBJ = re.compile(r"^\s*\w*\s*=?\s*{\s*$")
END_OBJ = re.compile(r"^\s*}\s*$")
COMMENT_PREFIX = re.compile(r"^[\s]*(#|//)")


class TerraformMultiLineParser(BaseMultiLineParser):
def consecutive_lines_in_same_object(
self,
raw_context: CodeSnippet | None,
other_line_idx: int,
) -> bool:
return bool(raw_context and 0 <= other_line_idx < len(raw_context.lines))

@staticmethod
def is_object_start(line: str) -> bool:
return bool(re.match(START_OBJ, line))

@staticmethod
def is_object_end(line: str) -> bool:
return bool(re.match(END_OBJ, line))

@staticmethod
def is_line_comment(line: str) -> bool:
return bool(re.match(COMMENT_PREFIX, line))


terraform_multiline_parser = TerraformMultiLineParser()
28 changes: 28 additions & 0 deletions checkov/secrets/parsers/terraform/single_line_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from __future__ import annotations

from typing import TYPE_CHECKING

from checkov.secrets.parsers.single_line_parser import BaseSingleLineParser

if TYPE_CHECKING:
from detect_secrets.util.code_snippet import CodeSnippet


class TerraformSingleLineParser(BaseSingleLineParser):
def ignore_secret(self, raw_context: CodeSnippet) -> bool:
return self.ignore_terraform_data_block(raw_context=raw_context)

def ignore_terraform_data_block(self, raw_context: CodeSnippet) -> bool:
"""Check for a possible data block usage"""

# search backwards to find a possible 'data' block
for line_index in range(raw_context.target_index - 1, -1, -1):
if raw_context.lines[line_index].lstrip().startswith('data "'):
# a data block is typically used to get remote information,
# therefore can retrieve a secret, but has not a hardcoded secret
return True

return False


terraform_single_line_parser = TerraformSingleLineParser()
Empty file.
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
if TYPE_CHECKING:
from detect_secrets.util.code_snippet import CodeSnippet

from checkov.common.parsers.multiline_parser import BaseMultiLineParser
from checkov.secrets.parsers.multiline_parser import BaseMultiLineParser

INDENTATION_PATTERN = re.compile(r'(^\s*(?:-?\s+)?)')
COMMENT_PREFIX = re.compile(r'^[\s]*(#|\/\/)')
Expand Down
125 changes: 98 additions & 27 deletions checkov/secrets/plugins/entropy_keyword_combinator.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,14 @@
from detect_secrets.util.filetype import FileType
from detect_secrets.util.filetype import determine_file_type

from checkov.secrets.parsers.terraform.multiline_parser import terraform_multiline_parser
from checkov.secrets.parsers.terraform.single_line_parser import terraform_single_line_parser
from checkov.secrets.runner import SOURCE_CODE_EXTENSION
from checkov.common.parsers.multiline_parser import BaseMultiLineParser
from checkov.common.parsers.yaml.multiline_parser import yml_multiline_parser
from checkov.common.parsers.json.multiline_parser import json_multiline_parser
from checkov.secrets.parsers.yaml.multiline_parser import yml_multiline_parser
from checkov.secrets.parsers.json.multiline_parser import json_multiline_parser

if TYPE_CHECKING:
from checkov.secrets.parsers.multiline_parser import BaseMultiLineParser
from detect_secrets.core.potential_secret import PotentialSecret
from detect_secrets.util.code_snippet import CodeSnippet

Expand Down Expand Up @@ -93,6 +95,30 @@
flags=re.IGNORECASE,
)

FOLLOWED_BY_EQUAL_VALUE_KEYWORD_REGEX = re.compile(
# e.g. var = MY_PASSWORD_123
r'{whitespace}({key})?={whitespace}({quote}?){words}{denylist}({closing})?(\3)'.format(
key=KEY,
whitespace=OPTIONAL_WHITESPACE,
quote=QUOTE,
words=AFFIX_REGEX,
denylist=DENY_LIST_REGEX2,
closing=CLOSING,
),
flags=re.IGNORECASE,
)

FOLLOWED_BY_EQUAL_VALUE_SECRET_REGEX = re.compile(
# e.g. var = Zmlyc3Rfc2VjcmV0X2hlcmVfd2hvYV9tdWx0aWxsaW5lX3Nob3VsZF93b3JrXzE==
r'{whitespace}({key})?={whitespace}({quote}?)({secret})(\3)'.format(
key=KEY,
whitespace=OPTIONAL_WHITESPACE,
quote=QUOTE,
secret=SECRET,
),
flags=re.IGNORECASE,
)

# if the current regex is not enough, can add more regexes to check

YML_PAIR_VALUE_KEYWORD_REGEX_TO_GROUP = {
Expand All @@ -111,20 +137,42 @@
QUOTES_REQUIRED_FOLLOWED_BY_COLON_VALUE_SECRET_REGEX: 4,
}

TERRAFORM_PAIR_VALUE_KEYWORD_REGEX_TO_GROUP = {
FOLLOWED_BY_EQUAL_VALUE_KEYWORD_REGEX: 4,
}

TERRAFORM_PAIR_VALUE_SECRET_REGEX_TO_GROUP = {
FOLLOWED_BY_EQUAL_VALUE_SECRET_REGEX: 4,
}

REGEX_VALUE_KEYWORD_BY_FILETYPE = {
FileType.YAML: YML_PAIR_VALUE_KEYWORD_REGEX_TO_GROUP,
FileType.JSON: JSON_PAIR_VALUE_KEYWORD_REGEX_TO_GROUP,
FileType.TERRAFORM: TERRAFORM_PAIR_VALUE_KEYWORD_REGEX_TO_GROUP,
}

REGEX_VALUE_SECRET_BY_FILETYPE = {
FileType.YAML: YML_PAIR_VALUE_SECRET_REGEX_TO_GROUP,
FileType.JSON: JSON_PAIR_VALUE_SECRET_REGEX_TO_GROUP,
FileType.TERRAFORM: TERRAFORM_PAIR_VALUE_SECRET_REGEX_TO_GROUP,
}

SINGLE_LINE_PARSER = {
FileType.TERRAFORM: terraform_single_line_parser,
}

MULTILINE_PARSERS = {
FileType.YAML: yml_multiline_parser,
FileType.JSON: json_multiline_parser,
FileType.YAML: (
(FileType.YAML, yml_multiline_parser),
),
FileType.JSON: (
(FileType.JSON, json_multiline_parser),
),
FileType.TERRAFORM: (
(FileType.TERRAFORM, terraform_multiline_parser),
(FileType.JSON, json_multiline_parser),
(FileType.YAML, yml_multiline_parser),
),
}


Expand All @@ -149,15 +197,29 @@ def analyze_line(
raw_context: CodeSnippet | None = None,
**kwargs: Any,
) -> set[PotentialSecret]:
if len(line) > MAX_LINE_LENGTH:
# to keep good performance we skip long lines
return set()

is_iac = f".{filename.split('.')[-1]}" not in SOURCE_CODE_EXTENSION
filetype = determine_file_type(filename)
multiline_parser = MULTILINE_PARSERS.get(filetype)

if len(line) <= MAX_LINE_LENGTH:
if is_iac:
# classic key-value pair
keyword_on_key = self.keyword_scanner.analyze_line(filename, line, line_number, **kwargs)
if keyword_on_key:
if is_iac:
filetype = determine_file_type(filename)
single_line_parser = SINGLE_LINE_PARSER.get(filetype)
multiline_parsers = MULTILINE_PARSERS.get(filetype)

# classic key-value pair
keyword_on_key = self.keyword_scanner.analyze_line(filename, line, line_number, **kwargs)
if keyword_on_key:
if single_line_parser:
return single_line_parser.detect_secret(
scanners=self.high_entropy_scanners_iac,
filename=filename,
raw_context=raw_context,
line=line,
line_number=line_number,
kwargs=kwargs
)
else:
return self.detect_secret(
scanners=self.high_entropy_scanners_iac,
filename=filename,
Expand All @@ -166,12 +228,16 @@ def analyze_line(
kwargs=kwargs
)

# not so classic key-value pair, from multiline, that is only in an array format.
# The scan is one-way backwards, so no duplicates expected.
elif multiline_parser:
value_keyword_regex_to_group = REGEX_VALUE_KEYWORD_BY_FILETYPE.get(filetype)
secret_keyword_regex_to_group = REGEX_VALUE_SECRET_BY_FILETYPE.get(filetype)
return self.analyze_multiline(
# not so classic key-value pair, from multiline, that is only in an array format.
# The scan searches forwards and backwards for a potential secret pair, so no duplicates expected.
elif multiline_parsers:
# iterate over multiple parser and their related file type.
# this is needed for file types, which embed other file type parser, ex Terraform with heredoc
for parser_file_type, multiline_parser in multiline_parsers:
value_keyword_regex_to_group = REGEX_VALUE_KEYWORD_BY_FILETYPE.get(parser_file_type)
secret_keyword_regex_to_group = REGEX_VALUE_SECRET_BY_FILETYPE.get(parser_file_type)

potential_secrets = self.analyze_multiline(
filename=filename,
line=line,
multiline_parser=multiline_parser,
Expand All @@ -182,14 +248,19 @@ def analyze_line(
secret_pattern=secret_keyword_regex_to_group,
kwargs=kwargs
)
else:
return self.detect_secret(
scanners=self.high_entropy_scanners,
filename=filename,
line=line,
line_number=line_number,
kwargs=kwargs
)

if potential_secrets:
# return a possible secret, otherwise check with next parser
return potential_secrets
else:
return self.detect_secret(
scanners=self.high_entropy_scanners,
filename=filename,
line=line,
line_number=line_number,
kwargs=kwargs
)

return set()

def analyze_multiline(
Expand Down
51 changes: 51 additions & 0 deletions tests/secrets/terraform_multiline/cfn_heredoc.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
resource "aws_cloudformation_stack" "lambda" {
name = "lambda"

parameters = {
VPCCidr = "10.0.0.0/16"
}

template_body = <<STACK
AWSTemplateFormatVersion: '2010-09-09'
Description: VPC function.
Resources:
Function:
Type: AWS::Lambda::Function
Properties:
Handler: index.handler
Role: arn:aws:iam::123456789012:role/lambda-role
Code:
S3Bucket: my-bucket
S3Key: function.zip
Runtime: nodejs12.x
Timeout: 5
TracingConfig:
Mode: Active
VpcConfig:
SecurityGroupIds:
- sg-085912345678492fb
SubnetIds:
- subnet-071f712345678e7c8
- subnet-07fd123456788a036
Tags:
- Key: "SOME_NAME"
Value: "some_value"
# name1 & value1 are not valid arguments
- Value: "Zo5Zhexnf9TUggdn+zBKGEkmUUvuKzVN+/fKPaMBA4zVyef4irH5H5YfwoC4IqAX0DNoMD12yIF67nIdIMg13atW4WM33eNMfXlE"
Key: "TEST_PASSWORD_1"
Key1: "TEST_PASSWORD_2"
Value1: "1Vab3xejyUlh89P6tUJNXgO4t07DzmomF4tPBwTbwt+sjXHg3G0MPMRpH/I2ho4gS5H3AKJkvJZj87V7/Qnp/rHdbMVYK1F0BX35"
- Key: "TEST_PASSWORD_3"
# comment 1
# comment 2
# comment 3
Value: "PtpfIZR+zZGPUWUYvLojqylVeEg63CBYN0FpGJ4yuH+9YxZZe8Uq7drEoTSfL64kElPEnVJk+H7SZr+wBoxN5qDWsbDmmUS2H76h"
- Value: "emDJTiv6H/hP6I8Tmr5+kUdpBIQDrXMwFO7AkmbwROf3rM6uNToJlIJW7H5ApfPmSGU0oWBwflV6Cd9pPu5nEvgxt4YMHZ0SQ85z"
# comment 1
Key: "TEST_PASSWORD_4"
- Key: "TEST_PASSWORD_LONG_1"
Value: "m9+1ONt6FdpnByhlaKDwZ/jjA5gaPzrKY9q5G8cr6kjn092ogigwEOGGryjDqq/NkX1DnKGGG7iduJUJ48+Rv0tgpdVAxwLQuiszRnssmi2ck/Zf1iDFlNQtiE8rvXE6OTCsb6mrpyItLOVnEwsRSpggyRa3KLSuiguiZsK5KyXQ6BsiAclpLvz6QFBQoQkZNxownQrqgLwVwkK1gW0/EEm0m1ylz20ZeLgYO6tRSvKDW0lrgAI7g60F7/eJGv1UqQlxK58T+7u1UX/K11Q69e9jJE+LkQ932eY37U70oVbBVchHwSFKUoffernEaG9XP1tyEpIptPqVpcS2BMpktoR1p1yyWuxC5GsPc2RlPQzEbs3n5lPPnC/uEVu7/cJENSw5+9DzigiHYPz1Cq/p5HedIl5ysn2U2VFgHWekGBYin6ytfmF2Sx+hYqeRd6RcxyU434CXspWQqc330sp9q7vwPQHNecBrvG2Iy7mqVSvaJDnkZ8AN"
- Key: "TEST_PASSWORD_no_password"
Value: "RandomP@ssw0rd"
STACK
}
3 changes: 3 additions & 0 deletions tests/secrets/terraform_multiline/data.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
data "google_secret_manager_secret_version" "secret" {
secret = "somesecretid"
}
Loading