Skip to content

Commit

Permalink
feat(secrets): Add Terraform multiline secrets handling (#3907)
Browse files Browse the repository at this point in the history
* add multiline secret support in Terraform

* support jsonencode and heredoc and ignore data blocks

* fix PR comments

* create a dedicated single line parser class

* add buildx step to build the image on self-hosted runners
  • Loading branch information
gruebel authored Nov 22, 2022
1 parent 57a2102 commit 2ee3c09
Show file tree
Hide file tree
Showing 22 changed files with 597 additions and 41 deletions.
13 changes: 11 additions & 2 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,7 @@ jobs:
steps:
- uses: actions/checkout@93ea575cb5d8a053eaa0ac8fa3b40d7e05a33cc8 # v3
- uses: sigstore/cosign-installer@9becc617647dfa20ae7b1151972e9b3a2c338a2b # v2
- uses: docker/setup-buildx-action@8c0edbc76e98fa90f69d9a2c020dcb50019dc325 # v2 # needed for self-hosted builds
- name: Get release version
id: versions
run: |
Expand All @@ -269,12 +270,20 @@ jobs:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Build and push Docker image
id: docker_push
- name: Build and export image to Docker
# buildx changes the driver to 'docker-container' which doesn't expose the image to the host,
# so it is built and loaded to Docker and in the next step pushed to the registry
uses: docker/build-push-action@c56af957549030174b10d6867f20e78cfd7debc5 # v3
with:
context: .
no-cache: true
load: true
tags: ${{ env.DH_IMAGE_NAME }}:latest
- name: Push Docker image
id: docker_push
uses: docker/build-push-action@c56af957549030174b10d6867f20e78cfd7debc5 # v3
with:
context: .
push: true
tags: |
${{ env.DH_IMAGE_NAME }}:latest
Expand Down
Empty file.
Empty file.
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
if TYPE_CHECKING:
from detect_secrets.util.code_snippet import CodeSnippet

from checkov.common.parsers.multiline_parser import BaseMultiLineParser
from checkov.secrets.parsers.multiline_parser import BaseMultiLineParser


START_OBJ_END_OF_LINE = r'({\s*}?\s*,?\s*$)'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@


class BaseMultiLineParser(ABC):

def get_lines_from_same_object(
self,
search_range: range,
Expand Down
36 changes: 36 additions & 0 deletions checkov/secrets/parsers/single_line_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from __future__ import annotations

from abc import ABC, abstractmethod
from typing import Any, TYPE_CHECKING


if TYPE_CHECKING:
from detect_secrets.core.potential_secret import PotentialSecret
from detect_secrets.plugins.high_entropy_strings import Base64HighEntropyString, HexHighEntropyString
from detect_secrets.util.code_snippet import CodeSnippet


class BaseSingleLineParser(ABC):
def detect_secret(
self,
scanners: tuple[Base64HighEntropyString, HexHighEntropyString],
filename: str,
raw_context: CodeSnippet | None,
line: str,
line_number: int = 0,
**kwargs: Any,
) -> set[PotentialSecret]:
for entropy_scanner in scanners:
matches = entropy_scanner.analyze_line(filename, line, line_number, **kwargs)
if matches:
if raw_context and self.ignore_secret(raw_context=raw_context):
return set()

return matches
return set()

@abstractmethod
def ignore_secret(self, raw_context: CodeSnippet) -> bool:
"""Check for false-positive secrets by leveraging the context"""

pass
Empty file.
37 changes: 37 additions & 0 deletions checkov/secrets/parsers/terraform/multiline_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
from __future__ import annotations

import re
from typing import TYPE_CHECKING

from checkov.secrets.parsers.multiline_parser import BaseMultiLineParser

if TYPE_CHECKING:
from detect_secrets.util.code_snippet import CodeSnippet

START_OBJ = re.compile(r"^\s*\w*\s*=?\s*{\s*$")
END_OBJ = re.compile(r"^\s*}\s*$")
COMMENT_PREFIX = re.compile(r"^[\s]*(#|//)")


class TerraformMultiLineParser(BaseMultiLineParser):
def consecutive_lines_in_same_object(
self,
raw_context: CodeSnippet | None,
other_line_idx: int,
) -> bool:
return bool(raw_context and 0 <= other_line_idx < len(raw_context.lines))

@staticmethod
def is_object_start(line: str) -> bool:
return bool(re.match(START_OBJ, line))

@staticmethod
def is_object_end(line: str) -> bool:
return bool(re.match(END_OBJ, line))

@staticmethod
def is_line_comment(line: str) -> bool:
return bool(re.match(COMMENT_PREFIX, line))


terraform_multiline_parser = TerraformMultiLineParser()
28 changes: 28 additions & 0 deletions checkov/secrets/parsers/terraform/single_line_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from __future__ import annotations

from typing import TYPE_CHECKING

from checkov.secrets.parsers.single_line_parser import BaseSingleLineParser

if TYPE_CHECKING:
from detect_secrets.util.code_snippet import CodeSnippet


class TerraformSingleLineParser(BaseSingleLineParser):
def ignore_secret(self, raw_context: CodeSnippet) -> bool:
return self.ignore_terraform_data_block(raw_context=raw_context)

def ignore_terraform_data_block(self, raw_context: CodeSnippet) -> bool:
"""Check for a possible data block usage"""

# search backwards to find a possible 'data' block
for line_index in range(raw_context.target_index - 1, -1, -1):
if raw_context.lines[line_index].lstrip().startswith('data "'):
# a data block is typically used to get remote information,
# therefore can retrieve a secret, but has not a hardcoded secret
return True

return False


terraform_single_line_parser = TerraformSingleLineParser()
Empty file.
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
if TYPE_CHECKING:
from detect_secrets.util.code_snippet import CodeSnippet

from checkov.common.parsers.multiline_parser import BaseMultiLineParser
from checkov.secrets.parsers.multiline_parser import BaseMultiLineParser

INDENTATION_PATTERN = re.compile(r'(^\s*(?:-?\s+)?)')
COMMENT_PREFIX = re.compile(r'^[\s]*(#|\/\/)')
Expand Down
125 changes: 98 additions & 27 deletions checkov/secrets/plugins/entropy_keyword_combinator.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,14 @@
from detect_secrets.util.filetype import FileType
from detect_secrets.util.filetype import determine_file_type

from checkov.secrets.parsers.terraform.multiline_parser import terraform_multiline_parser
from checkov.secrets.parsers.terraform.single_line_parser import terraform_single_line_parser
from checkov.secrets.runner import SOURCE_CODE_EXTENSION
from checkov.common.parsers.multiline_parser import BaseMultiLineParser
from checkov.common.parsers.yaml.multiline_parser import yml_multiline_parser
from checkov.common.parsers.json.multiline_parser import json_multiline_parser
from checkov.secrets.parsers.yaml.multiline_parser import yml_multiline_parser
from checkov.secrets.parsers.json.multiline_parser import json_multiline_parser

if TYPE_CHECKING:
from checkov.secrets.parsers.multiline_parser import BaseMultiLineParser
from detect_secrets.core.potential_secret import PotentialSecret
from detect_secrets.util.code_snippet import CodeSnippet

Expand Down Expand Up @@ -93,6 +95,30 @@
flags=re.IGNORECASE,
)

FOLLOWED_BY_EQUAL_VALUE_KEYWORD_REGEX = re.compile(
# e.g. var = MY_PASSWORD_123
r'{whitespace}({key})?={whitespace}({quote}?){words}{denylist}({closing})?(\3)'.format(
key=KEY,
whitespace=OPTIONAL_WHITESPACE,
quote=QUOTE,
words=AFFIX_REGEX,
denylist=DENY_LIST_REGEX2,
closing=CLOSING,
),
flags=re.IGNORECASE,
)

FOLLOWED_BY_EQUAL_VALUE_SECRET_REGEX = re.compile(
# e.g. var = Zmlyc3Rfc2VjcmV0X2hlcmVfd2hvYV9tdWx0aWxsaW5lX3Nob3VsZF93b3JrXzE==
r'{whitespace}({key})?={whitespace}({quote}?)({secret})(\3)'.format(
key=KEY,
whitespace=OPTIONAL_WHITESPACE,
quote=QUOTE,
secret=SECRET,
),
flags=re.IGNORECASE,
)

# if the current regex is not enough, can add more regexes to check

YML_PAIR_VALUE_KEYWORD_REGEX_TO_GROUP = {
Expand All @@ -111,20 +137,42 @@
QUOTES_REQUIRED_FOLLOWED_BY_COLON_VALUE_SECRET_REGEX: 4,
}

TERRAFORM_PAIR_VALUE_KEYWORD_REGEX_TO_GROUP = {
FOLLOWED_BY_EQUAL_VALUE_KEYWORD_REGEX: 4,
}

TERRAFORM_PAIR_VALUE_SECRET_REGEX_TO_GROUP = {
FOLLOWED_BY_EQUAL_VALUE_SECRET_REGEX: 4,
}

REGEX_VALUE_KEYWORD_BY_FILETYPE = {
FileType.YAML: YML_PAIR_VALUE_KEYWORD_REGEX_TO_GROUP,
FileType.JSON: JSON_PAIR_VALUE_KEYWORD_REGEX_TO_GROUP,
FileType.TERRAFORM: TERRAFORM_PAIR_VALUE_KEYWORD_REGEX_TO_GROUP,
}

REGEX_VALUE_SECRET_BY_FILETYPE = {
FileType.YAML: YML_PAIR_VALUE_SECRET_REGEX_TO_GROUP,
FileType.JSON: JSON_PAIR_VALUE_SECRET_REGEX_TO_GROUP,
FileType.TERRAFORM: TERRAFORM_PAIR_VALUE_SECRET_REGEX_TO_GROUP,
}

SINGLE_LINE_PARSER = {
FileType.TERRAFORM: terraform_single_line_parser,
}

MULTILINE_PARSERS = {
FileType.YAML: yml_multiline_parser,
FileType.JSON: json_multiline_parser,
FileType.YAML: (
(FileType.YAML, yml_multiline_parser),
),
FileType.JSON: (
(FileType.JSON, json_multiline_parser),
),
FileType.TERRAFORM: (
(FileType.TERRAFORM, terraform_multiline_parser),
(FileType.JSON, json_multiline_parser),
(FileType.YAML, yml_multiline_parser),
),
}


Expand All @@ -149,15 +197,29 @@ def analyze_line(
raw_context: CodeSnippet | None = None,
**kwargs: Any,
) -> set[PotentialSecret]:
if len(line) > MAX_LINE_LENGTH:
# to keep good performance we skip long lines
return set()

is_iac = f".{filename.split('.')[-1]}" not in SOURCE_CODE_EXTENSION
filetype = determine_file_type(filename)
multiline_parser = MULTILINE_PARSERS.get(filetype)

if len(line) <= MAX_LINE_LENGTH:
if is_iac:
# classic key-value pair
keyword_on_key = self.keyword_scanner.analyze_line(filename, line, line_number, **kwargs)
if keyword_on_key:
if is_iac:
filetype = determine_file_type(filename)
single_line_parser = SINGLE_LINE_PARSER.get(filetype)
multiline_parsers = MULTILINE_PARSERS.get(filetype)

# classic key-value pair
keyword_on_key = self.keyword_scanner.analyze_line(filename, line, line_number, **kwargs)
if keyword_on_key:
if single_line_parser:
return single_line_parser.detect_secret(
scanners=self.high_entropy_scanners_iac,
filename=filename,
raw_context=raw_context,
line=line,
line_number=line_number,
kwargs=kwargs
)
else:
return self.detect_secret(
scanners=self.high_entropy_scanners_iac,
filename=filename,
Expand All @@ -166,12 +228,16 @@ def analyze_line(
kwargs=kwargs
)

# not so classic key-value pair, from multiline, that is only in an array format.
# The scan is one-way backwards, so no duplicates expected.
elif multiline_parser:
value_keyword_regex_to_group = REGEX_VALUE_KEYWORD_BY_FILETYPE.get(filetype)
secret_keyword_regex_to_group = REGEX_VALUE_SECRET_BY_FILETYPE.get(filetype)
return self.analyze_multiline(
# not so classic key-value pair, from multiline, that is only in an array format.
# The scan searches forwards and backwards for a potential secret pair, so no duplicates expected.
elif multiline_parsers:
# iterate over multiple parser and their related file type.
# this is needed for file types, which embed other file type parser, ex Terraform with heredoc
for parser_file_type, multiline_parser in multiline_parsers:
value_keyword_regex_to_group = REGEX_VALUE_KEYWORD_BY_FILETYPE.get(parser_file_type)
secret_keyword_regex_to_group = REGEX_VALUE_SECRET_BY_FILETYPE.get(parser_file_type)

potential_secrets = self.analyze_multiline(
filename=filename,
line=line,
multiline_parser=multiline_parser,
Expand All @@ -182,14 +248,19 @@ def analyze_line(
secret_pattern=secret_keyword_regex_to_group,
kwargs=kwargs
)
else:
return self.detect_secret(
scanners=self.high_entropy_scanners,
filename=filename,
line=line,
line_number=line_number,
kwargs=kwargs
)

if potential_secrets:
# return a possible secret, otherwise check with next parser
return potential_secrets
else:
return self.detect_secret(
scanners=self.high_entropy_scanners,
filename=filename,
line=line,
line_number=line_number,
kwargs=kwargs
)

return set()

def analyze_multiline(
Expand Down
51 changes: 51 additions & 0 deletions tests/secrets/terraform_multiline/cfn_heredoc.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
resource "aws_cloudformation_stack" "lambda" {
name = "lambda"

parameters = {
VPCCidr = "10.0.0.0/16"
}

template_body = <<STACK
AWSTemplateFormatVersion: '2010-09-09'
Description: VPC function.
Resources:
Function:
Type: AWS::Lambda::Function
Properties:
Handler: index.handler
Role: arn:aws:iam::123456789012:role/lambda-role
Code:
S3Bucket: my-bucket
S3Key: function.zip
Runtime: nodejs12.x
Timeout: 5
TracingConfig:
Mode: Active
VpcConfig:
SecurityGroupIds:
- sg-085912345678492fb
SubnetIds:
- subnet-071f712345678e7c8
- subnet-07fd123456788a036
Tags:
- Key: "SOME_NAME"
Value: "some_value"
# name1 & value1 are not valid arguments
- Value: "Zo5Zhexnf9TUggdn+zBKGEkmUUvuKzVN+/fKPaMBA4zVyef4irH5H5YfwoC4IqAX0DNoMD12yIF67nIdIMg13atW4WM33eNMfXlE"
Key: "TEST_PASSWORD_1"
Key1: "TEST_PASSWORD_2"
Value1: "1Vab3xejyUlh89P6tUJNXgO4t07DzmomF4tPBwTbwt+sjXHg3G0MPMRpH/I2ho4gS5H3AKJkvJZj87V7/Qnp/rHdbMVYK1F0BX35"
- Key: "TEST_PASSWORD_3"
# comment 1
# comment 2
# comment 3
Value: "PtpfIZR+zZGPUWUYvLojqylVeEg63CBYN0FpGJ4yuH+9YxZZe8Uq7drEoTSfL64kElPEnVJk+H7SZr+wBoxN5qDWsbDmmUS2H76h"
- Value: "emDJTiv6H/hP6I8Tmr5+kUdpBIQDrXMwFO7AkmbwROf3rM6uNToJlIJW7H5ApfPmSGU0oWBwflV6Cd9pPu5nEvgxt4YMHZ0SQ85z"
# comment 1
Key: "TEST_PASSWORD_4"
- Key: "TEST_PASSWORD_LONG_1"
Value: "m9+1ONt6FdpnByhlaKDwZ/jjA5gaPzrKY9q5G8cr6kjn092ogigwEOGGryjDqq/NkX1DnKGGG7iduJUJ48+Rv0tgpdVAxwLQuiszRnssmi2ck/Zf1iDFlNQtiE8rvXE6OTCsb6mrpyItLOVnEwsRSpggyRa3KLSuiguiZsK5KyXQ6BsiAclpLvz6QFBQoQkZNxownQrqgLwVwkK1gW0/EEm0m1ylz20ZeLgYO6tRSvKDW0lrgAI7g60F7/eJGv1UqQlxK58T+7u1UX/K11Q69e9jJE+LkQ932eY37U70oVbBVchHwSFKUoffernEaG9XP1tyEpIptPqVpcS2BMpktoR1p1yyWuxC5GsPc2RlPQzEbs3n5lPPnC/uEVu7/cJENSw5+9DzigiHYPz1Cq/p5HedIl5ysn2U2VFgHWekGBYin6ytfmF2Sx+hYqeRd6RcxyU434CXspWQqc330sp9q7vwPQHNecBrvG2Iy7mqVSvaJDnkZ8AN"
- Key: "TEST_PASSWORD_no_password"
Value: "RandomP@ssw0rd"
STACK
}
3 changes: 3 additions & 0 deletions tests/secrets/terraform_multiline/data.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
data "google_secret_manager_secret_version" "secret" {
secret = "somesecretid"
}
Loading

0 comments on commit 2ee3c09

Please sign in to comment.