Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/develop' into develop
Browse files Browse the repository at this point in the history
  • Loading branch information
Nekmo committed Dec 14, 2020
2 parents 98a904c + edd9315 commit ff1b823
Show file tree
Hide file tree
Showing 4 changed files with 53 additions and 12 deletions.
5 changes: 3 additions & 2 deletions dirhunt/crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ class Crawler(ThreadPoolExecutor):

def __init__(self, max_workers=None, interesting_extensions=None, interesting_files=None, std=None,
progress_enabled=True, timeout=10, depth=3, not_follow_subdomains=False, exclude_sources=(),
not_allow_redirects=False, proxies=None, delay=0, limit=1000, to_file=None, user_agent=None):
not_allow_redirects=False, proxies=None, delay=0, limit=1000, to_file=None, user_agent=None,
cookies=None, headers=None):
if not max_workers and not delay:
max_workers = (multiprocessing.cpu_count() or 1) * 5
elif not max_workers and delay:
Expand All @@ -44,7 +45,7 @@ def __init__(self, max_workers=None, interesting_extensions=None, interesting_fi
self.index_of_processors = []
self.proxies = proxies
self.delay = delay
self.sessions = Sessions(proxies, delay, user_agent)
self.sessions = Sessions(proxies, delay, user_agent, cookies, headers)
self.processing = {}
self.processed = {}
self.add_lock = Lock()
Expand Down
25 changes: 18 additions & 7 deletions dirhunt/management.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

import sys

from click import BadOptionUsage, Path
from click import BadOptionUsage, Path, BadParameter

from dirhunt.crawler import Crawler
from dirhunt.exceptions import DirHuntError, catch, IncompatibleVersionError
Expand All @@ -20,8 +20,8 @@

init(autoreset=True)

STATUS_CODES = lrange(100, 102+1) + lrange(200, 208+1) + [226] + lrange(300, 308+1) + lrange(400, 418+1) + \
lrange(421, 426+1) + [428, 429, 431, 451] + lrange(500, 511+1)
STATUS_CODES = lrange(100, 102 + 1) + lrange(200, 208 + 1) + [226] + lrange(300, 308 + 1) + lrange(400, 418 + 1) + \
lrange(421, 426 + 1) + [428, 429, 431, 451] + lrange(500, 511 + 1)
INTERESTING_EXTS = ['php', 'zip', 'sh', 'asp', 'csv', 'log']
INTERESTING_FILES = ['access_log', 'error_log', 'error', 'logs', 'dump']
STDOUT_FLAGS = ['blank', 'not_found.fake', 'html']
Expand Down Expand Up @@ -57,6 +57,13 @@ def comma_separated_files(ctx, param, value):
return items


def key_value(ctx, param, values):
items = [item.split(':', 1) for item in values]
if any(filter(lambda x: len(x) < 2, items)):
raise BadParameter('Expect a value with format key:bar', ctx, param)
return {x[0].strip(): x[1].strip() for x in items}


def status_code_range(start, end):
return list(filter(lambda x: start <= x <= end, STATUS_CODES))

Expand Down Expand Up @@ -126,12 +133,16 @@ def flags_range(flags):
@click.option('--not-allow-redirects', is_flag=True, help='Redirectors will not be followed')
@click.option('--limit', type=int, default=1000, help='Max number of pages processed to search for directories.')
@click.option('--to-file', type=Path(writable=True), default=None, help='Create a report file in JSON.')
@click.option('--user-agent', type=str, default=None, help='User agent to use. By default a random browser.')
@click.option('-u', '--user-agent', type=str, default=None, help='User agent to use. By default a random browser.')
@click.option('-c', '--cookie', 'cookies', callback=key_value, multiple=True,
help='Add a cookie to requests in the cookie_name:value format.')
@click.option('-h', '--header', 'headers', callback=key_value, multiple=True,
help='Add a header to requests in the header:value format.')
@click.option('--version', is_flag=True, callback=print_version,
expose_value=False, is_eager=True)
def hunt(urls, threads, exclude_flags, include_flags, interesting_extensions, interesting_files, stdout_flags,
progress_enabled, timeout, max_depth, not_follow_subdomains, exclude_sources, proxies, delay,
not_allow_redirects, limit, to_file, user_agent):
not_allow_redirects, limit, to_file, user_agent, cookies, headers):
"""Find web directories without bruteforce
"""
if exclude_flags and include_flags:
Expand All @@ -150,7 +161,7 @@ def hunt(urls, threads, exclude_flags, include_flags, interesting_extensions, in
progress_enabled=progress_enabled, timeout=timeout, depth=max_depth,
not_follow_subdomains=not_follow_subdomains, exclude_sources=exclude_sources,
not_allow_redirects=not_allow_redirects, proxies=proxies, delay=delay, limit=limit,
to_file=to_file, user_agent=user_agent)
to_file=to_file, user_agent=user_agent, cookies=cookies, headers=headers)
if os.path.exists(crawler.get_resume_file()):
click.echo('Resuming the previous program execution...')
try:
Expand All @@ -159,7 +170,7 @@ def hunt(urls, threads, exclude_flags, include_flags, interesting_extensions, in
click.echo(e)
crawler.add_init_urls(*urls)
while True:
choice = catch_keyboard_interrupt_choices(crawler.print_results, ['abort', 'continue', 'results'], 'a')\
choice = catch_keyboard_interrupt_choices(crawler.print_results, ['abort', 'continue', 'results'], 'a') \
(set(exclude_flags), set(include_flags))
if choice == 'a':
crawler.close(True)
Expand Down
10 changes: 7 additions & 3 deletions dirhunt/sessions.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,14 +78,16 @@ def __getitem__(self, item):


class Session(object):
def __init__(self, sessions, proxy, user_agent=None):
def __init__(self, sessions, proxy, user_agent=None, cookies=None, headers=None):
self.sessions = sessions
self.proxy_name = proxy
self.proxy = normalize_proxy(self.proxy_name, sessions)
self.session = requests.Session()
self.session.headers = {
'User-Agent': user_agent or get_random_user_agent(),
}
self.session.cookies.update(cookies or {})
self.session.headers.update(headers or {})
adapter = HTTPAdapter(pool_connections=POOL_CONNECTIONS, pool_maxsize=POOL_CONNECTIONS)
self.session.mount('http://', adapter)
self.session.mount('https://', adapter)
Expand Down Expand Up @@ -124,11 +126,13 @@ def get(self, url, **kwargs):


class Sessions(object):
def __init__(self, proxies=None, delay=0, user_agent=None):
def __init__(self, proxies=None, delay=0, user_agent=None, cookies=None, headers=None):
self.availables = Queue()
self.proxies_lists = RandomProxies()
self.delay = delay
self.user_agent = user_agent
self.cookies = cookies or {}
self.headers = headers or {}
self.sessions = self.create_sessions(proxies or [None])
for session in self.sessions:
self.availables.put(session)
Expand All @@ -140,7 +144,7 @@ def add_available(self, session):
self.availables.put(session)

def create_sessions(self, proxies):
return [Session(self, proxy, self.user_agent) for proxy in proxies]
return [Session(self, proxy, self.user_agent, self.cookies, self.headers) for proxy in proxies]

def get_random_session(self):
return random.choice(self.sessions)
Expand Down
25 changes: 25 additions & 0 deletions docs/usage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -434,6 +434,31 @@ Example for **interesting files** (``-f``)::
It is necessary to put the complete path to the file, or the relative using ``./``. Each value of the files must be
separated by newlines.

Custom headers
--------------
To add custom HTTP headers to requests you can use the ``--header`` parameter.

.. code::
$ dirhunt <url> --header <Field name>:<Field value>
This parameter can be used more than once, for example::

$ dirhunt http://domain1/blog/ --header "Authorization: token foo" --header "X-Server: prod"


Custom cookies
--------------
To add custom cookies to requests you can use the ``--cookie`` parameter.

.. code::
$ dirhunt <url> --cookie <Cookie name>:<Cookie value>
This parameter can be used more than once, for example::

$ dirhunt http://domain1/blog/ --cookie "session: secret" --cookie "user: 123"


Progress bar
------------
Expand Down

0 comments on commit ff1b823

Please sign in to comment.