Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Headless crawler #310

Merged
merged 40 commits into from
Sep 20, 2022
Merged
Show file tree
Hide file tree
Changes from 38 commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
2b8683a
wip: headless crawler
devl00p Jul 21, 2022
e3073c2
missing dependency
devl00p Jul 22, 2022
a603c23
fix most tests
devl00p Jul 23, 2022
b9ac5eb
fix more tests
devl00p Jul 23, 2022
2f8ca4d
remove our asyncmock as we removed python3.7 support and 3.8 has buil…
devl00p Jul 23, 2022
1ba7112
fix annoying warning in the mod_xxe test (... not awaited)
devl00p Jul 23, 2022
25c48d1
improving stop of the headless crawler
devl00p Jul 23, 2022
ddc7fcd
manage several headless modes, cli option
devl00p Jul 24, 2022
49182f0
Loading cookies inside intercepting explorer.
devl00p Jul 25, 2022
c8d9564
fixing beginner level error in CrawlerConfiguration (:sweating:) + style
devl00p Jul 26, 2022
6130465
ignore intercepted CONNECT requests + increase delay before reading t…
devl00p Jul 28, 2022
2f82d73
add --wait option for headless mode + force Request objects in "start…
devl00p Jul 29, 2022
174d779
fix test and style
devl00p Jul 29, 2022
8cb0ee2
fix style (again)
devl00p Jul 29, 2022
f583709
extract more urls
devl00p Aug 4, 2022
703762b
integrate exclusions for headless (had to do it in both crawler and m…
devl00p Aug 6, 2022
55d16b3
fix on urls with fragments
devl00p Aug 8, 2022
8fd2404
need two separate exclusion lists in intercepting_explorer.py
devl00p Aug 8, 2022
305c73f
Use link_depth in headless crawler. Real values won't appear in outpu…
devl00p Aug 8, 2022
2001dce
brings some limits into the intercepting explorer
devl00p Aug 10, 2022
520ae44
lock pyasn1 version
devl00p Aug 11, 2022
c587aaf
prevent downloading files from the headless browser by checking the m…
devl00p Aug 23, 2022
202a4bc
fix setup.py
devl00p Aug 23, 2022
4c91696
style
devl00p Aug 23, 2022
38c2d04
prevent out of scope redirections
devl00p Aug 24, 2022
a2e20a4
click on some buttons
devl00p Aug 25, 2022
88b168f
Use a headless browser to detect technologies a website is using + re…
devl00p Aug 30, 2022
37065d7
fix some tests
devl00p Aug 31, 2022
9808582
Fix one wappalizer related test, figured out the "find implied softwa…
devl00p Aug 31, 2022
e29af3a
fix more wappalyzer tests
devl00p Sep 1, 2022
a065c45
figured out reason behind last test failures (i18n)
devl00p Sep 1, 2022
07ff825
refactoring
devl00p Sep 2, 2022
d14bcf2
pin aiohttp version / use headless browser in mod_wapp only if --head…
devl00p Sep 5, 2022
d0b5760
fix tests for mod_wapp
devl00p Sep 5, 2022
9a4b36a
check geckodriver presence before activating headless mode
devl00p Sep 9, 2022
01d9360
headless mode: allows Firefox to connect to 127.0.0.1 using the proxy
devl00p Sep 12, 2022
a2161d9
headless mode: remove the buttons that are added by firefox when read…
devl00p Sep 13, 2022
4f317eb
catch asyncio.TimeoutError due to arsenic issue
devl00p Sep 14, 2022
772ef37
remove content-disposition header when set (intercepting mode)
devl00p Sep 15, 2022
dfb8df7
put back use of HTTP redirection urls + raise usage error is auth-typ…
devl00p Sep 18, 2022
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ sslyze = "==5.0.1"
humanize = "==3.13.1"
mitmproxy = "==8.0.0"
h11 = "==0.12"
arsenic = "==21.8"
pyasn1 = "==0.4.8"
aiohttp = "==3.8.1"

[requires]
python_version = "3.8"
739 changes: 541 additions & 198 deletions Pipfile.lock

Large diffs are not rendered by default.

5 changes: 4 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,12 +117,15 @@ def run_tests(self):
"sqlalchemy>=1.4.26",
"aiocache==0.11.1",
"aiosqlite==0.17.0",
"aiohttp==3.8.1",
"loguru>=0.5.3",
"dnspython==2.1.0",
"httpcore>=0.15.0",
"mitmproxy==8.0.0",
"h11==0.12",
"pyasn1==0.4.8"
"pyasn1==0.4.8",
"arsenic==21.8",
"pyasn1==0.4.8",
],
extras_require={
"NTLM": ["httpx-ntlm"],
Expand Down
8 changes: 0 additions & 8 deletions tests/__init__.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,3 @@
from unittest.mock import MagicMock


class AsyncMock(MagicMock):
async def __call__(self, *args, **kwargs):
return super(AsyncMock, self).__call__(*args, **kwargs)


class AsyncIterator:
def __init__(self, seq):
self.iter = iter(seq)
Expand Down
6 changes: 3 additions & 3 deletions tests/attack/test_mod_backup.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from asyncio import Event
from unittest.mock import AsyncMock

import httpx
import respx
Expand All @@ -8,7 +9,6 @@
from wapitiCore.net.crawler import AsyncCrawler
from wapitiCore.net.crawler_configuration import CrawlerConfiguration
from wapitiCore.attack.mod_backup import ModuleBackup
from tests import AsyncMock


@pytest.mark.asyncio
Expand All @@ -35,7 +35,7 @@ async def test_whole_stuff():
async with AsyncCrawler.with_configuration(crawler_configuration) as crawler:
options = {"timeout": 10, "level": 2}

module = ModuleBackup(crawler, persister, options, Event())
module = ModuleBackup(crawler, persister, options, Event(), crawler_configuration)
module.do_get = True
await module.attack(request, response)

Expand Down Expand Up @@ -66,6 +66,6 @@ async def test_false_positive():
async with AsyncCrawler.with_configuration(crawler_configuration) as crawler:
options = {"timeout": 10, "level": 2}

module = ModuleBackup(crawler, persister, options, Event())
module = ModuleBackup(crawler, persister, options, Event(), crawler_configuration)
module.do_get = True
assert not await module.must_attack(request, response)
2 changes: 1 addition & 1 deletion tests/attack/test_mod_buster.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ async def test_whole_stuff():
"wapitiCore.attack.mod_buster.ModuleBuster.payloads",
[("nawak", Flags()), ("admin", Flags()), ("config.inc", Flags()), ("authconfig.php", Flags())]
):
module = ModuleBuster(crawler, persister, options, Event())
module = ModuleBuster(crawler, persister, options, Event(), crawler_configuration)
module.do_get = True
await module.attack(request, None)

Expand Down
4 changes: 2 additions & 2 deletions tests/attack/test_mod_cookieflags.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import asyncio
import re
from unittest.mock import AsyncMock

import httpx
import respx
Expand All @@ -9,7 +10,6 @@
from wapitiCore.net.crawler import AsyncCrawler
from wapitiCore.net.crawler_configuration import CrawlerConfiguration
from wapitiCore.attack.mod_cookieflags import ModuleCookieflags
from tests import AsyncMock


@pytest.mark.asyncio
Expand All @@ -35,7 +35,7 @@ async def test_cookieflags():
await crawler.async_send(request) # Put cookies in our crawler object
options = {"timeout": 10, "level": 2}

module = ModuleCookieflags(crawler, persister, options, asyncio.Event())
module = ModuleCookieflags(crawler, persister, options, asyncio.Event(), crawler_configuration)
await module.attack(request)

cookie_flags = []
Expand Down
4 changes: 2 additions & 2 deletions tests/attack/test_mod_crlf.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from asyncio import Event
from unittest.mock import AsyncMock

import respx
import pytest
Expand All @@ -9,7 +10,6 @@
from wapitiCore.net.crawler_configuration import CrawlerConfiguration
from wapitiCore.language.vulnerability import _
from wapitiCore.attack.mod_crlf import ModuleCrlf
from tests import AsyncMock


@pytest.mark.asyncio
Expand All @@ -30,7 +30,7 @@ async def test_whole_stuff():
async with AsyncCrawler.with_configuration(crawler_configuration) as crawler:
options = {"timeout": 10, "level": 2}

module = ModuleCrlf(crawler, persister, options, Event())
module = ModuleCrlf(crawler, persister, options, Event(), crawler_configuration)
module.do_get = True
await module.attack(request)

Expand Down
4 changes: 2 additions & 2 deletions tests/attack/test_mod_csrf.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import sys
from time import sleep
from asyncio import Event
from unittest.mock import AsyncMock

import httpx
import pytest
Expand All @@ -12,7 +13,6 @@
from wapitiCore.net.crawler_configuration import CrawlerConfiguration
from wapitiCore.attack.mod_csrf import ModuleCsrf
from wapitiCore.language.vulnerability import _
from tests import AsyncMock


@pytest.fixture(autouse=True)
Expand Down Expand Up @@ -68,7 +68,7 @@ async def test_csrf_cases():
async with AsyncCrawler.with_configuration(crawler_configuration) as crawler:
options = {"timeout": 10, "level": 1}

module = ModuleCsrf(crawler, persister, options, Event())
module = ModuleCsrf(crawler, persister, options, Event(), crawler_configuration)
module.do_post = True
for request, response in all_requests:
if await module.must_attack(request, response):
Expand Down
30 changes: 22 additions & 8 deletions tests/attack/test_mod_drupal_enum.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import sys
from os.path import join as path_join
from asyncio import Event
from unittest.mock import AsyncMock

import httpx
import respx
Expand All @@ -12,7 +13,6 @@
from wapitiCore.net.crawler import AsyncCrawler
from wapitiCore.attack.mod_drupal_enum import ModuleDrupalEnum
from wapitiCore.language.vulnerability import _
from tests import AsyncMock


# Test no Drupal detected
Expand Down Expand Up @@ -42,7 +42,7 @@ async def test_no_drupal():
async with AsyncCrawler.with_configuration(crawler_configuration) as crawler:
options = {"timeout": 10, "level": 2, "tasks": 20}

module = ModuleDrupalEnum(crawler, persister, options, Event())
module = ModuleDrupalEnum(crawler, persister, options, Event(), crawler_configuration)

await module.attack(request)

Expand All @@ -61,7 +61,11 @@ async def test_version_detected():
data = changelog.read()

# Response to tell that Drupal is used
respx.get("http://perdu.com/core/misc/drupal.js").mock(return_value=httpx.Response(200, headers={"Content-Type": "application/javascript"}))
respx.get("http://perdu.com/core/misc/drupal.js").mock(
return_value=httpx.Response(
200,
headers={"Content-Type": "application/javascript"})
)

# Response for changelog.txt
respx.get("http://perdu.com/CHANGELOG.txt").mock(return_value=httpx.Response(200, text=data))
Expand All @@ -77,7 +81,7 @@ async def test_version_detected():
async with AsyncCrawler.with_configuration(crawler_configuration) as crawler:
options = {"timeout": 10, "level": 2, "tasks": 20}

module = ModuleDrupalEnum(crawler, persister, options, Event())
module = ModuleDrupalEnum(crawler, persister, options, Event(), crawler_configuration)

await module.attack(request)

Expand Down Expand Up @@ -106,7 +110,12 @@ async def test_multi_versions_detected():
data = maintainers.read()

# Response to tell that Drupal is used
respx.get("http://perdu.com/core/misc/drupal.js").mock(return_value=httpx.Response(200, headers={"Content-Type": "application/javascript"}))
respx.get("http://perdu.com/core/misc/drupal.js").mock(
return_value=httpx.Response(
200,
headers={"Content-Type": "application/javascript"}
)
)

# Response for maintainers.txt
respx.get("http://perdu.com/core/MAINTAINERS.txt").mock(return_value=httpx.Response(200, text=data))
Expand All @@ -122,7 +131,7 @@ async def test_multi_versions_detected():
async with AsyncCrawler.with_configuration(crawler_configuration) as crawler:
options = {"timeout": 10, "level": 2, "tasks": 20}

module = ModuleDrupalEnum(crawler, persister, options, Event())
module = ModuleDrupalEnum(crawler, persister, options, Event(), crawler_configuration)

await module.attack(request)

Expand All @@ -147,7 +156,12 @@ async def test_version_not_detected():
data = changelog.read()

# Response to tell that Drupal is used
respx.get("http://perdu.com/misc/drupal.js").mock(return_value=httpx.Response(200, headers={"Content-Type": "application/javascript"}))
respx.get("http://perdu.com/misc/drupal.js").mock(
return_value=httpx.Response(
200,
headers={"Content-Type": "application/javascript"}
)
)

# Response for edited changelog.txt
respx.get("http://perdu.com/CHANGELOG.txt").mock(return_value=httpx.Response(200, text=data))
Expand All @@ -163,7 +177,7 @@ async def test_version_not_detected():
async with AsyncCrawler.with_configuration(crawler_configuration) as crawler:
options = {"timeout": 10, "level": 2, "tasks": 20}

module = ModuleDrupalEnum(crawler, persister, options, Event())
module = ModuleDrupalEnum(crawler, persister, options, Event(), crawler_configuration)

await module.attack(request)

Expand Down
8 changes: 4 additions & 4 deletions tests/attack/test_mod_exec.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import sys
from time import sleep
from asyncio import Event
from unittest.mock import AsyncMock

import pytest
import respx
Expand All @@ -13,7 +14,6 @@
from wapitiCore.language.vulnerability import _
from wapitiCore.net.crawler import AsyncCrawler
from wapitiCore.attack.mod_exec import ModuleExec
from tests import AsyncMock


@pytest.fixture(autouse=True)
Expand Down Expand Up @@ -57,7 +57,7 @@ async def test_whole_stuff():
async with AsyncCrawler.with_configuration(crawler_configuration) as crawler:
options = {"timeout": 10, "level": 2}

module = ModuleExec(crawler, persister, options, Event())
module = ModuleExec(crawler, persister, options, Event(), crawler_configuration)
module.do_post = True
for request in all_requests:
await module.attack(request)
Expand Down Expand Up @@ -85,7 +85,7 @@ async def test_detection():
async with AsyncCrawler.with_configuration(crawler_configuration) as crawler:
options = {"timeout": 10, "level": 1}

module = ModuleExec(crawler, persister, options, Event())
module = ModuleExec(crawler, persister, options, Event(), crawler_configuration)
await module.attack(request)

assert persister.add_payload.call_count == 1
Expand Down Expand Up @@ -114,7 +114,7 @@ def timeout_callback(http_request):
async with AsyncCrawler.with_configuration(crawler_configuration) as crawler:
options = {"timeout": 1, "level": 1}

module = ModuleExec(crawler, persister, options, Event())
module = ModuleExec(crawler, persister, options, Event(), crawler_configuration)
module.do_post = False

payloads_until_sleep = 0
Expand Down
8 changes: 4 additions & 4 deletions tests/attack/test_mod_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import sys
from time import sleep
from asyncio import Event
from unittest.mock import AsyncMock

import pytest

Expand All @@ -11,7 +12,6 @@
from wapitiCore.language.vulnerability import _
from wapitiCore.net.crawler import AsyncCrawler
from wapitiCore.attack.mod_file import ModuleFile, has_prefix_or_suffix, find_warning_message, FileWarning
from tests import AsyncMock


@pytest.fixture(autouse=True)
Expand All @@ -36,7 +36,7 @@ async def test_inclusion_detection():
async with AsyncCrawler.with_configuration(crawler_configuration) as crawler:
options = {"timeout": 10, "level": 2}

module = ModuleFile(crawler, persister, options, Event())
module = ModuleFile(crawler, persister, options, Event(), crawler_configuration)
module.do_post = False
await module.attack(request)

Expand All @@ -56,7 +56,7 @@ async def test_warning_false_positive():
async with AsyncCrawler.with_configuration(crawler_configuration) as crawler:
options = {"timeout": 10, "level": 2}

module = ModuleFile(crawler, persister, options, Event())
module = ModuleFile(crawler, persister, options, Event(), crawler_configuration)
module.do_post = False
await module.attack(request)

Expand Down Expand Up @@ -85,7 +85,7 @@ async def test_no_crash():
async with AsyncCrawler.with_configuration(crawler_configuration) as crawler:
options = {"timeout": 10, "level": 2}

module = ModuleFile(crawler, persister, options, Event())
module = ModuleFile(crawler, persister, options, Event(), crawler_configuration)
module.do_post = False
for request in all_requests:
await module.attack(request)
Expand Down
4 changes: 2 additions & 2 deletions tests/attack/test_mod_htaccess.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from asyncio import Event
from unittest.mock import AsyncMock

import httpx
import respx
Expand All @@ -9,7 +10,6 @@
from wapitiCore.net.crawler import AsyncCrawler
from wapitiCore.language.vulnerability import _
from wapitiCore.attack.mod_htaccess import ModuleHtaccess
from tests import AsyncMock


@pytest.mark.asyncio
Expand Down Expand Up @@ -47,7 +47,7 @@ async def test_whole_stuff():
async with AsyncCrawler.with_configuration(crawler_configuration) as crawler:
options = {"timeout": 10, "level": 2}

module = ModuleHtaccess(crawler, persister, options, Event())
module = ModuleHtaccess(crawler, persister, options, Event(), crawler_configuration)
module.do_get = True
for request, response in all_requests:
if await module.must_attack(request, response):
Expand Down
Loading