diff --git a/README.md b/README.md index 3887974..3a7df06 100644 --- a/README.md +++ b/README.md @@ -9,9 +9,9 @@ There are 2 parts. The first is `ghdb_scraper.py` that retrieves the latest Goo `pagodo.py` that leverages the information gathered by `ghdb_scraper.py`. The core Google search library now uses the more flexible [yagooglesearch](https://github.com/opsdisk/yagooglesearch) -instead of [googlesearch](https://github.com/MarioVilas/googlesearch). Check out the -[yagooglesearch README](https://github.com/opsdisk/yagooglesearch/blob/master/README.md) for a more in-depth explanation -of the library differences and capabilities. +instead of [googlesearch](https://github.com/MarioVilas/googlesearch). Check out the [yagooglesearch +README](https://github.com/opsdisk/yagooglesearch/blob/master/README.md) for a more in-depth explanation of the library +differences and capabilities. This version of `pagodo` also supports native HTTP(S) and SOCKS5 application support, so no more wrapping it in a tool like `proxychains4` if you need proxy support. You can specify multiple proxies to use in a round-robin fashion by @@ -53,9 +53,9 @@ pip install -r requirements.txt ## ghdb_scraper.py -To start off, `pagodo.py` needs a list of all the current Google dorks. The repo contains a `dorks/` directory with -the current dorks when the `ghdb_scraper.py` was last run. It's advised to run `ghdb_scraper.py` to get the freshest -data before running `pagodo.py`. The `dorks/` directory contains: +To start off, `pagodo.py` needs a list of all the current Google dorks. The repo contains a `dorks/` directory with the +current dorks when the `ghdb_scraper.py` was last run. It's advised to run `ghdb_scraper.py` to get the freshest data +before running `pagodo.py`. The `dorks/` directory contains: * the `all_google_dorks.txt` file which contains all the Google dorks, one per line * the `all_google_dorks.json` file which is the JSON response from GHDB @@ -165,8 +165,8 @@ pg = pagodo.Pagodo( google_dorks_file="dorks.txt", domain="github.com", max_search_result_urls_to_return_per_dork=3, - save_pagodo_results_to_json_file=True, - save_urls_to_file=True, + save_pagodo_results_to_json_file=None, # None = Auto-generate file name, otherwise pass a string for path and filename. + save_urls_to_file=None, # None = Auto-generate file name, otherwise pass a string for path and filename. verbosity=5, ) pagodo_results_dict = pg.go() @@ -209,6 +209,14 @@ between each different Google dork search. results at a time, so if you pick `-m 500`, 5 separate search queries will have to be made for each Google dork search, which will increase the amount of time to complete. +### Save Output + +`-o [optional/path/to/results.json]` - Save output to a JSON file. If you do not specify a filename, a datetimestamped +one will be generated. + +`-s [optional/path/to/results.txt]` - Save URLs to a text file. If you do not specify a filename, a datetimestamped one +will be generated. + ## Google is blocking me! Performing 7300+ search requests to Google as fast as possible will simply not work. Google will rightfully detect it @@ -260,7 +268,7 @@ Throw `proxychains4` in front of the `pagodo.py` script and each *request* looku thus source from a different IP). ```bash -proxychains4 python pagodo.py -g dorks/all_google_dorks.txt -o -s +proxychains4 python pagodo.py -g dorks/all_google_dorks.txt -o [optional/path/to/results.json] -s [optional/path/to/results.txt] ``` Note that this may not appear natural to Google if you: diff --git a/pagodo.py b/pagodo.py index fdf9f9a..4bf7ebb 100644 --- a/pagodo.py +++ b/pagodo.py @@ -16,7 +16,7 @@ # Custom Python libraries. -__version__ = "2.2.1" +__version__ = "2.3.1" # Logging ROOT_LOGGER = logging.getLogger("pagodo") @@ -42,9 +42,9 @@ def __init__( google_dorks_file, domain="", max_search_result_urls_to_return_per_dork=100, - save_pagodo_results_to_json_file=False, + save_pagodo_results_to_json_file=None, # None = Auto-generate file name, otherwise pass a string for path and filename. proxies="", - save_urls_to_file=False, + save_urls_to_file=None, # None = Auto-generate file name, otherwise pass a string for path and filename. minimum_delay_between_dork_searches_in_seconds=37, maximum_delay_between_dork_searches_in_seconds=60, disable_verify_ssl=False, @@ -79,7 +79,7 @@ def __init__( # All passed paramters look good, assign to the class object. self.google_dorks_file = google_dorks_file self.google_dorks = [] - with open(google_dorks_file, "r") as fh: + with open(google_dorks_file, "r", encoding="utf-8") as fh: for line in fh.read().splitlines(): if line.strip(): self.google_dorks.append(line) @@ -122,6 +122,14 @@ def __init__( self.total_urls_found = 0 self.proxy_rotation_index = 0 + # -o with no filename. Desire to save results, don't care about the file name. + if self.save_pagodo_results_to_json_file is None: + self.save_pagodo_results_to_json_file = f"{self.base_file_name}.json" + + # -s with no filename. Desire to save results, don't care about the file name. + if self.save_urls_to_file is None: + self.save_urls_to_file = f"{self.base_file_name}.txt" + # Assign log level. ROOT_LOGGER.setLevel((6 - self.verbosity) * 10) @@ -146,13 +154,10 @@ def go(self): for dork in self.google_dorks: - # fmt: off self.pagodo_results_dict["dorks"][dork] = { "urls_size": 0, "urls": [], - } - # fmt: on try: dork = dork.strip() @@ -241,18 +246,16 @@ def go(self): # Save URLs with valid results to an .txt file. if self.save_urls_to_file: - with open(f"{self.base_file_name}.txt", "a") as fh: + with open(self.save_urls_to_file, "a") as fh: fh.write(f"# {dork}\n") for url in dork_urls_list: fh.write(f"{url}\n") fh.write("#" * 50 + "\n") - # fmt: off self.pagodo_results_dict["dorks"][dork] = { "urls_size": dork_urls_list_size, "urls": dork_urls_list, } - # fmt: on # No Google dork results found. else: @@ -290,15 +293,27 @@ def go(self): # Save pagodo_results_dict to a .json file. if self.save_pagodo_results_to_json_file: - with open(f"{self.base_file_name}.json", "w") as fh: + with open(self.save_pagodo_results_to_json_file, "w") as fh: json.dump(self.pagodo_results_dict, fh, indent=4) return self.pagodo_results_dict +# http://stackoverflow.com/questions/3853722/python-argparse-how-to-insert-newline-in-the-help-text +class SmartFormatter(argparse.HelpFormatter): + def _split_lines(self, text, width): + if text.startswith("R|"): + return text[2:].splitlines() + # This is the RawTextHelpFormatter._split_lines + return argparse.HelpFormatter._split_lines(self, text, width) + + if __name__ == "__main__": - parser = argparse.ArgumentParser(description=f"pagodo - Passive Google Dork v{__version__}") + parser = argparse.ArgumentParser( + description=f"pagodo - Passive Google Dork v{__version__}", + formatter_class=SmartFormatter, + ) parser.add_argument( "-g", dest="google_dorks_file", action="store", required=True, help="File containing Google dorks, 1 per line." ) @@ -360,22 +375,27 @@ def go(self): ) parser.add_argument( "-o", + nargs="?", + metavar="JSON_FILE", dest="save_pagodo_results_to_json_file", - action="store_true", - required=False, + action="store", default=False, - help=( - "Save JSON dictionary to pagodo_results_.json file. Contains more information than " - "pagodo_results_.txt" - ), + help="R|Save URL dork data to a JSON file. Contains more information than .txt version\n" + "no -o = Do not save dork data to a JSON file\n" + "-o = Save dork data to pagodo_results_.json\n" + "-o JSON_FILE = Save dork data to JSON_FILE", ) parser.add_argument( "-s", + nargs="?", + metavar="URL_FILE", dest="save_urls_to_file", - action="store_true", - required=False, + action="store", default=False, - help="Save any URLS found for a dork to the pagodo_results_.txt file.", + help="R|Save URL dork data to a text file.\n" + "no -s = Do not save dork data to a file\n" + "-s = Save dork data to pagodo_results_.txt\n" + "-s URL_FILE = Save dork data to URL_FILE", ) parser.add_argument( "-v", diff --git a/requirements.txt b/requirements.txt index 7c4707c..f708f91 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ -beautifulsoup4==4.10.0 -yagooglesearch==1.6.0 +beautifulsoup4==4.11.1 requests==2.27.1 +yagooglesearch==1.6.0