Skip to content

Commit

Permalink
Merge pull request #68 from cgeopapa/master
Browse files Browse the repository at this point in the history
Save output to path
  • Loading branch information
opsdisk authored Jun 4, 2022
2 parents 68ff778 + bd8c73a commit 1797384
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 32 deletions.
26 changes: 17 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@ There are 2 parts. The first is `ghdb_scraper.py` that retrieves the latest Goo
`pagodo.py` that leverages the information gathered by `ghdb_scraper.py`.

The core Google search library now uses the more flexible [yagooglesearch](https://github.com/opsdisk/yagooglesearch)
instead of [googlesearch](https://github.com/MarioVilas/googlesearch). Check out the
[yagooglesearch README](https://github.com/opsdisk/yagooglesearch/blob/master/README.md) for a more in-depth explanation
of the library differences and capabilities.
instead of [googlesearch](https://github.com/MarioVilas/googlesearch). Check out the [yagooglesearch
README](https://github.com/opsdisk/yagooglesearch/blob/master/README.md) for a more in-depth explanation of the library
differences and capabilities.

This version of `pagodo` also supports native HTTP(S) and SOCKS5 application support, so no more wrapping it in a tool
like `proxychains4` if you need proxy support. You can specify multiple proxies to use in a round-robin fashion by
Expand Down Expand Up @@ -53,9 +53,9 @@ pip install -r requirements.txt

## ghdb_scraper.py

To start off, `pagodo.py` needs a list of all the current Google dorks. The repo contains a `dorks/` directory with
the current dorks when the `ghdb_scraper.py` was last run. It's advised to run `ghdb_scraper.py` to get the freshest
data before running `pagodo.py`. The `dorks/` directory contains:
To start off, `pagodo.py` needs a list of all the current Google dorks. The repo contains a `dorks/` directory with the
current dorks when the `ghdb_scraper.py` was last run. It's advised to run `ghdb_scraper.py` to get the freshest data
before running `pagodo.py`. The `dorks/` directory contains:

* the `all_google_dorks.txt` file which contains all the Google dorks, one per line
* the `all_google_dorks.json` file which is the JSON response from GHDB
Expand Down Expand Up @@ -165,8 +165,8 @@ pg = pagodo.Pagodo(
google_dorks_file="dorks.txt",
domain="github.com",
max_search_result_urls_to_return_per_dork=3,
save_pagodo_results_to_json_file=True,
save_urls_to_file=True,
save_pagodo_results_to_json_file=None, # None = Auto-generate file name, otherwise pass a string for path and filename.
save_urls_to_file=None, # None = Auto-generate file name, otherwise pass a string for path and filename.
verbosity=5,
)
pagodo_results_dict = pg.go()
Expand Down Expand Up @@ -209,6 +209,14 @@ between each different Google dork search.
results at a time, so if you pick `-m 500`, 5 separate search queries will have to be made for each Google dork search,
which will increase the amount of time to complete.

### Save Output

`-o [optional/path/to/results.json]` - Save output to a JSON file. If you do not specify a filename, a datetimestamped
one will be generated.

`-s [optional/path/to/results.txt]` - Save URLs to a text file. If you do not specify a filename, a datetimestamped one
will be generated.

## Google is blocking me!

Performing 7300+ search requests to Google as fast as possible will simply not work. Google will rightfully detect it
Expand Down Expand Up @@ -260,7 +268,7 @@ Throw `proxychains4` in front of the `pagodo.py` script and each *request* looku
thus source from a different IP).

```bash
proxychains4 python pagodo.py -g dorks/all_google_dorks.txt -o -s
proxychains4 python pagodo.py -g dorks/all_google_dorks.txt -o [optional/path/to/results.json] -s [optional/path/to/results.txt]
```

Note that this may not appear natural to Google if you:
Expand Down
62 changes: 41 additions & 21 deletions pagodo.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

# Custom Python libraries.

__version__ = "2.2.1"
__version__ = "2.3.1"

# Logging
ROOT_LOGGER = logging.getLogger("pagodo")
Expand All @@ -42,9 +42,9 @@ def __init__(
google_dorks_file,
domain="",
max_search_result_urls_to_return_per_dork=100,
save_pagodo_results_to_json_file=False,
save_pagodo_results_to_json_file=None, # None = Auto-generate file name, otherwise pass a string for path and filename.
proxies="",
save_urls_to_file=False,
save_urls_to_file=None, # None = Auto-generate file name, otherwise pass a string for path and filename.
minimum_delay_between_dork_searches_in_seconds=37,
maximum_delay_between_dork_searches_in_seconds=60,
disable_verify_ssl=False,
Expand Down Expand Up @@ -79,7 +79,7 @@ def __init__(
# All passed paramters look good, assign to the class object.
self.google_dorks_file = google_dorks_file
self.google_dorks = []
with open(google_dorks_file, "r") as fh:
with open(google_dorks_file, "r", encoding="utf-8") as fh:
for line in fh.read().splitlines():
if line.strip():
self.google_dorks.append(line)
Expand Down Expand Up @@ -122,6 +122,14 @@ def __init__(
self.total_urls_found = 0
self.proxy_rotation_index = 0

# -o with no filename. Desire to save results, don't care about the file name.
if self.save_pagodo_results_to_json_file is None:
self.save_pagodo_results_to_json_file = f"{self.base_file_name}.json"

# -s with no filename. Desire to save results, don't care about the file name.
if self.save_urls_to_file is None:
self.save_urls_to_file = f"{self.base_file_name}.txt"

# Assign log level.
ROOT_LOGGER.setLevel((6 - self.verbosity) * 10)

Expand All @@ -146,13 +154,10 @@ def go(self):

for dork in self.google_dorks:

# fmt: off
self.pagodo_results_dict["dorks"][dork] = {
"urls_size": 0,
"urls": [],

}
# fmt: on

try:
dork = dork.strip()
Expand Down Expand Up @@ -241,18 +246,16 @@ def go(self):

# Save URLs with valid results to an .txt file.
if self.save_urls_to_file:
with open(f"{self.base_file_name}.txt", "a") as fh:
with open(self.save_urls_to_file, "a") as fh:
fh.write(f"# {dork}\n")
for url in dork_urls_list:
fh.write(f"{url}\n")
fh.write("#" * 50 + "\n")

# fmt: off
self.pagodo_results_dict["dorks"][dork] = {
"urls_size": dork_urls_list_size,
"urls": dork_urls_list,
}
# fmt: on

# No Google dork results found.
else:
Expand Down Expand Up @@ -290,15 +293,27 @@ def go(self):

# Save pagodo_results_dict to a .json file.
if self.save_pagodo_results_to_json_file:
with open(f"{self.base_file_name}.json", "w") as fh:
with open(self.save_pagodo_results_to_json_file, "w") as fh:
json.dump(self.pagodo_results_dict, fh, indent=4)

return self.pagodo_results_dict


# http://stackoverflow.com/questions/3853722/python-argparse-how-to-insert-newline-in-the-help-text
class SmartFormatter(argparse.HelpFormatter):
def _split_lines(self, text, width):
if text.startswith("R|"):
return text[2:].splitlines()
# This is the RawTextHelpFormatter._split_lines
return argparse.HelpFormatter._split_lines(self, text, width)


if __name__ == "__main__":

parser = argparse.ArgumentParser(description=f"pagodo - Passive Google Dork v{__version__}")
parser = argparse.ArgumentParser(
description=f"pagodo - Passive Google Dork v{__version__}",
formatter_class=SmartFormatter,
)
parser.add_argument(
"-g", dest="google_dorks_file", action="store", required=True, help="File containing Google dorks, 1 per line."
)
Expand Down Expand Up @@ -360,22 +375,27 @@ def go(self):
)
parser.add_argument(
"-o",
nargs="?",
metavar="JSON_FILE",
dest="save_pagodo_results_to_json_file",
action="store_true",
required=False,
action="store",
default=False,
help=(
"Save JSON dictionary to pagodo_results_<TIMESTAMP>.json file. Contains more information than "
"pagodo_results_<TIMESTAMP>.txt"
),
help="R|Save URL dork data to a JSON file. Contains more information than .txt version\n"
"no -o = Do not save dork data to a JSON file\n"
"-o = Save dork data to pagodo_results_<TIMESTAMP>.json\n"
"-o JSON_FILE = Save dork data to JSON_FILE",
)
parser.add_argument(
"-s",
nargs="?",
metavar="URL_FILE",
dest="save_urls_to_file",
action="store_true",
required=False,
action="store",
default=False,
help="Save any URLS found for a dork to the pagodo_results_<TIMESTAMP>.txt file.",
help="R|Save URL dork data to a text file.\n"
"no -s = Do not save dork data to a file\n"
"-s = Save dork data to pagodo_results_<TIMESTAMP>.txt\n"
"-s URL_FILE = Save dork data to URL_FILE",
)
parser.add_argument(
"-v",
Expand Down
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
beautifulsoup4==4.10.0
yagooglesearch==1.6.0
beautifulsoup4==4.11.1
requests==2.27.1
yagooglesearch==1.6.0

0 comments on commit 1797384

Please sign in to comment.