Skip to content

Commit

Permalink
Merge pull request trufflesecurity#154 from dxa4481/paths
Browse files Browse the repository at this point in the history
Paths
  • Loading branch information
dxa4481 authored May 6, 2019
2 parents a4c69fa + be24d1d commit 48ffdd3
Show file tree
Hide file tree
Showing 6 changed files with 206 additions and 14 deletions.
44 changes: 42 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@ Searches through git repositories for secrets, digging deep into commit history
## NEW
truffleHog previously functioned by running entropy checks on git diffs. This functionality still exists, but high signal regex checks have been added, and the ability to surpress entropy checking has also been added.

These features help cut down on noise, and makes the tool easier to shove into a devops pipeline.


```
truffleHog --regex --entropy=False https://github.com/dxa4481/truffleHog.git
Expand All @@ -17,6 +15,34 @@ or
truffleHog file:///user/dxa4481/codeprojects/truffleHog/
```

With the `--include_paths` and `--exclude_paths` options, it is also possible to limit scanning to a subset of objects in the Git history by defining regular expressions (one per line) in a file to match the targeted object paths. To illustrate, see the example include and exclude files below:

_include-patterns.txt:_
```ini
src/
# lines beginning with "#" are treated as comments and are ignored
gradle/
# regexes must match the entire path, but can use python's regex syntax for
# case-insensitive matching and other advanced options
(?i).*\.(properties|conf|ini|txt|y(a)?ml)$
(.*/)?id_[rd]sa$
```

_exclude-patterns.txt:_
```ini
(.*/)?\.classpath$
.*\.jmx$
(.*/)?test/(.*/)?resources/
```

These filter files could then be applied by:
```bash
trufflehog --include_paths include-patterns.txt --exclude_paths exclude-patterns.txt file://path/to/my/repo.git
```
With these filters, issues found in files in the root-level `src` directory would be reported, unless they had the `.classpath` or `.jmx` extension, or if they were found in the `src/test/dev/resources/` directory, for example. Additional usage information is provided when calling `trufflehog` with the `-h` or `--help` options.

These features help cut down on noise, and makes the tool easier to shove into a devops pipeline.

![Example](https://i.imgur.com/YAXndLD.png)

## Install
Expand Down Expand Up @@ -65,6 +91,20 @@ optional arguments:
--max_depth MAX_DEPTH
The max commit depth to go back when searching for
secrets
-i INCLUDE_PATHS_FILE, --include_paths INCLUDE_PATHS_FILE
File with regular expressions (one per line), at least
one of which must match a Git object path in order for
it to be scanned; lines starting with "#" are treated
as comments and are ignored. If empty or not provided
(default), all Git object paths are included unless
otherwise excluded via the --exclude_paths option.
-x EXCLUDE_PATHS_FILE, --exclude_paths EXCLUDE_PATHS_FILE
File with regular expressions (one per line), none of
which may match a Git object path in order for it to
be scanned; lines starting with "#" are treated as
comments and are ignored. If empty or not provided
(default), no Git object paths are excluded unless
effectively excluded via the --include_paths option.
```

## Wishlist
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@ GitPython==2.1.1
unittest2==1.1.0
pytest-cov==2.5.1
codecov==2.0.15
truffleHogRegexes==0.0.4
truffleHogRegexes==0.0.7
40 changes: 37 additions & 3 deletions scripts/searchOrg.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,49 @@
"""
import requests
from truffleHog import truffleHog
import re
from json import loads, dumps

rules = {
"Slack Token": "(xox[p|b|o|a]-[0-9]{12}-[0-9]{12}-[0-9]{12}-[a-z0-9]{32})",
"RSA private key": "-----BEGIN RSA PRIVATE KEY-----",
"SSH (OPENSSH) private key": "-----BEGIN OPENSSH PRIVATE KEY-----",
"SSH (DSA) private key": "-----BEGIN DSA PRIVATE KEY-----",
"SSH (EC) private key": "-----BEGIN EC PRIVATE KEY-----",
"PGP private key block": "-----BEGIN PGP PRIVATE KEY BLOCK-----",
"Facebook Oauth": "[f|F][a|A][c|C][e|E][b|B][o|O][o|O][k|K].{0,30}['\"\\s][0-9a-f]{32}['\"\\s]",
"Twitter Oauth": "[t|T][w|W][i|I][t|T][t|T][e|E][r|R].{0,30}['\"\\s][0-9a-zA-Z]{35,44}['\"\\s]",
"GitHub": "[g|G][i|I][t|T][h|H][u|U][b|B].{0,30}['\"\\s][0-9a-zA-Z]{35,40}['\"\\s]",
"Google Oauth": "(\"client_secret\":\"[a-zA-Z0-9-_]{24}\")",
"AWS API Key": "AKIA[0-9A-Z]{16}",
"Heroku API Key": "[h|H][e|E][r|R][o|O][k|K][u|U].{0,30}[0-9A-F]{8}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{12}",
"Generic Secret": "[s|S][e|E][c|C][r|R][e|E][t|T].{0,30}['\"\\s][0-9a-zA-Z]{32,45}['\"\\s]",
"Generic API Key": "[a|A][p|P][i|I][_]?[k|K][e|E][y|Y].{0,30}['\"\\s][0-9a-zA-Z]{32,45}['\"\\s]",
"Slack Webhook": "https://hooks.slack.com/services/T[a-zA-Z0-9_]{8}/B[a-zA-Z0-9_]{8}/[a-zA-Z0-9_]{24}",
"Google (GCP) Service-account": "\"type\": \"service_account\"",
"Twilio API Key": "SK[a-z0-9]{32}",
"Password in URL": "[a-zA-Z]{3,10}://[^/\\s:@]{3,20}:[^/\\s:@]{3,20}@.{1,100}[\"'\\s]",
}

for key in rules:
rules[key] = re.compile(rules[key])

def get_org_repos(orgname, page):
response = requests.get(url='https://api.github.com/users/' + orgname + '/repos?page={}'.format(page))
json = response.json()
if not json:
return None
for item in json:
if item['private'] == False:

if item['fork'] == False and reached:
print('searching ' + item["html_url"])
truffleHog.find_strings(item["html_url"], do_regex=True, do_entropy=False, max_depth=100000)
results = truffleHog.find_strings(item["html_url"], do_regex=True, custom_regexes=rules, do_entropy=False, max_depth=100000)
for issue in results["foundIssues"]:
d = loads(open(issue).read())
d['github_url'] = "{}/blob/{}/{}".format(item["html_url"], d['commitHash'], d['path'])
d['github_commit_url'] = "{}/commit/{}".format(item["html_url"], d['commitHash'])
d['diff'] = d['diff'][0:200]
d['printDiff'] = d['printDiff'][0:200]
print(dumps(d, indent=4))
get_org_repos(orgname, page + 1)
get_org_repos("twitter", 1)
get_org_repos("square", 1)
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

setup(
name='truffleHog',
version='2.0.92',
version='2.0.98',
description='Searches through git repositories for high entropy strings, digging deep into commit history.',
url='https://github.com/dxa4481/truffleHog',
author='Dylan Ayrey',
Expand All @@ -11,7 +11,7 @@
packages = ['truffleHog'],
install_requires=[
'GitPython == 2.1.1',
'truffleHogRegexes == 0.0.4'
'truffleHogRegexes == 0.0.7'
],
entry_points = {
'console_scripts': ['trufflehog = truffleHog.truffleHog:main'],
Expand Down
64 changes: 64 additions & 0 deletions test_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import sys
import json
import io
import re
from collections import namedtuple
from truffleHog import truffleHog
from mock import patch
from mock import MagicMock
Expand Down Expand Up @@ -66,6 +68,68 @@ def test_branch(self, rmtree_mock, repo_const_mock, clone_git_repo):
repo_const_mock.return_value = repo
truffleHog.find_strings("test_repo", branch="testbranch")
repo.remotes.origin.fetch.assert_called_once_with("testbranch")
def test_path_included(self):
Blob = namedtuple('Blob', ('a_path', 'b_path'))
blobs = {
'file-root-dir': Blob('file', 'file'),
'file-sub-dir': Blob('sub-dir/file', 'sub-dir/file'),
'new-file-root-dir': Blob(None, 'new-file'),
'new-file-sub-dir': Blob(None, 'sub-dir/new-file'),
'deleted-file-root-dir': Blob('deleted-file', None),
'deleted-file-sub-dir': Blob('sub-dir/deleted-file', None),
'renamed-file-root-dir': Blob('file', 'renamed-file'),
'renamed-file-sub-dir': Blob('sub-dir/file', 'sub-dir/renamed-file'),
'moved-file-root-dir-to-sub-dir': Blob('moved-file', 'sub-dir/moved-file'),
'moved-file-sub-dir-to-root-dir': Blob('sub-dir/moved-file', 'moved-file'),
'moved-file-sub-dir-to-sub-dir': Blob('sub-dir/moved-file', 'moved/moved-file'),
}
src_paths = set(blob.a_path for blob in blobs.values() if blob.a_path is not None)
dest_paths = set(blob.b_path for blob in blobs.values() if blob.b_path is not None)
all_paths = src_paths.union(dest_paths)
all_paths_patterns = [re.compile(re.escape(p)) for p in all_paths]
overlap_patterns = [re.compile(r'sub-dir/.*'), re.compile(r'moved/'), re.compile(r'[^/]*file$')]
sub_dirs_patterns = [re.compile(r'.+/.+')]
deleted_paths_patterns = [re.compile(r'(.*/)?deleted-file$')]
for name, blob in blobs.items():
self.assertTrue(truffleHog.path_included(blob),
'{} should be included by default'.format(blob))
self.assertTrue(truffleHog.path_included(blob, include_patterns=all_paths_patterns),
'{} should be included with include_patterns: {}'.format(blob, all_paths_patterns))
self.assertFalse(truffleHog.path_included(blob, exclude_patterns=all_paths_patterns),
'{} should be excluded with exclude_patterns: {}'.format(blob, all_paths_patterns))
self.assertFalse(truffleHog.path_included(blob,
include_patterns=all_paths_patterns,
exclude_patterns=all_paths_patterns),
'{} should be excluded with overlapping patterns: \n\tinclude: {}\n\texclude: {}'.format(
blob, all_paths_patterns, all_paths_patterns))
self.assertFalse(truffleHog.path_included(blob,
include_patterns=overlap_patterns,
exclude_patterns=all_paths_patterns),
'{} should be excluded with overlapping patterns: \n\tinclude: {}\n\texclude: {}'.format(
blob, overlap_patterns, all_paths_patterns))
self.assertFalse(truffleHog.path_included(blob,
include_patterns=all_paths_patterns,
exclude_patterns=overlap_patterns),
'{} should be excluded with overlapping patterns: \n\tinclude: {}\n\texclude: {}'.format(
blob, all_paths_patterns, overlap_patterns))
path = blob.b_path if blob.b_path else blob.a_path
if '/' in path:
self.assertTrue(truffleHog.path_included(blob, include_patterns=sub_dirs_patterns),
'{}: inclusion should include sub directory paths: {}'.format(blob, sub_dirs_patterns))
self.assertFalse(truffleHog.path_included(blob, exclude_patterns=sub_dirs_patterns),
'{}: exclusion should exclude sub directory paths: {}'.format(blob, sub_dirs_patterns))
else:
self.assertFalse(truffleHog.path_included(blob, include_patterns=sub_dirs_patterns),
'{}: inclusion should exclude root directory paths: {}'.format(blob, sub_dirs_patterns))
self.assertTrue(truffleHog.path_included(blob, exclude_patterns=sub_dirs_patterns),
'{}: exclusion should include root directory paths: {}'.format(blob, sub_dirs_patterns))
if name.startswith('deleted-file-'):
self.assertTrue(truffleHog.path_included(blob, include_patterns=deleted_paths_patterns),
'{}: inclusion should match deleted paths: {}'.format(blob, deleted_paths_patterns))
self.assertFalse(truffleHog.path_included(blob, exclude_patterns=deleted_paths_patterns),
'{}: exclusion should match deleted paths: {}'.format(blob, deleted_paths_patterns))



@patch('truffleHog.truffleHog.clone_git_repo')
@patch('truffleHog.truffleHog.Repo')
Expand Down
Loading

0 comments on commit 48ffdd3

Please sign in to comment.