Skip to content

Commit

Permalink
Merge pull request #2117 from blacklanternsecurity/fix-web-spider
Browse files Browse the repository at this point in the history
Fix web spider bug
  • Loading branch information
TheTechromancer authored Dec 29, 2024
2 parents 53dada0 + fd0df0d commit 556d026
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 11 deletions.
24 changes: 19 additions & 5 deletions bbot/core/event/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1232,11 +1232,25 @@ def sanitize_data(self, data):
return data

def add_tag(self, tag):
host_same_as_parent = self.parent and self.host == self.parent.host
if tag == "spider-danger" and host_same_as_parent and "spider-danger" not in self.tags:
# increment the web spider distance
if self.type == "URL_UNVERIFIED":
self.web_spider_distance += 1
self_url = getattr(self, "parsed_url", "")
self_host = getattr(self, "host", "")
# autoincrement web spider distance if the "spider-danger" tag is added
if tag == "spider-danger" and "spider-danger" not in self.tags and self_url and self_host:
parent_hosts_and_urls = set()
for p in self.get_parents():
# URL_UNVERIFIED events don't count because they haven't been visited yet
if p.type == "URL_UNVERIFIED":
continue
url = getattr(p, "parsed_url", "")
parent_hosts_and_urls.add((p.host, url))
# if there's a URL anywhere in our parent chain that's different from ours but shares our host, we're in dAnGeR
dangerous_parent = any(
p_host == self.host and p_url != self_url for p_host, p_url in parent_hosts_and_urls
)
if dangerous_parent:
# increment the web spider distance
if self.type == "URL_UNVERIFIED":
self.web_spider_distance += 1
if self.is_spider_max:
self.add_tag("spider-max")
super().add_tag(tag)
Expand Down
29 changes: 23 additions & 6 deletions bbot/test/test_step_1/test_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -808,6 +808,8 @@ async def handle_event(self, event):
async def test_event_web_spider_distance(bbot_scanner):
# make sure web spider distance inheritance works as intended
# and we don't have any runaway situations with SOCIAL events + URLs

# URL_UNVERIFIED events should not increment web spider distance
scan = bbot_scanner(config={"web": {"spider_distance": 1}})
url_event_1 = scan.make_event("http://www.evilcorp.com/test1", "URL_UNVERIFIED", parent=scan.root_event)
assert url_event_1.web_spider_distance == 0
Expand All @@ -816,9 +818,24 @@ async def test_event_web_spider_distance(bbot_scanner):
url_event_3 = scan.make_event(
"http://www.evilcorp.com/test3", "URL_UNVERIFIED", parent=url_event_2, tags=["spider-danger"]
)
assert url_event_3.web_spider_distance == 0
assert "spider-danger" in url_event_3.tags
assert "spider-max" not in url_event_3.tags

# URL events should increment web spider distance
scan = bbot_scanner(config={"web": {"spider_distance": 1}})
url_event_1 = scan.make_event("http://www.evilcorp.com/test1", "URL", parent=scan.root_event, tags="status-200")
assert url_event_1.web_spider_distance == 0
url_event_2 = scan.make_event("http://www.evilcorp.com/test2", "URL", parent=url_event_1, tags="status-200")
assert url_event_2.web_spider_distance == 0
url_event_3 = scan.make_event(
"http://www.evilcorp.com/test3", "URL_UNVERIFIED", parent=url_event_2, tags=["spider-danger"]
)
assert url_event_3.web_spider_distance == 1
assert "spider-danger" in url_event_3.tags
assert "spider-max" not in url_event_3.tags

# SOCIAL events should inherit spider distance
social_event = scan.make_event(
{"platform": "github", "url": "http://www.evilcorp.com/test4"}, "SOCIAL", parent=url_event_3
)
Expand Down Expand Up @@ -846,17 +863,17 @@ async def test_event_web_spider_distance(bbot_scanner):
url_event_2 = scan.make_event(
"http://www.evilcorp.com", "URL_UNVERIFIED", parent=scan.root_event, tags="spider-danger"
)
# spider distance shouldn't increment because it's not the same host
assert url_event_2.web_spider_distance == 0
assert "spider-danger" in url_event_2.tags
assert "spider-max" not in url_event_2.tags
url_event_2b = scan.make_event("http://www.evilcorp.com", "URL", parent=url_event_2, tags="status-200")
assert url_event_2b.web_spider_distance == 0
assert "spider-danger" in url_event_2b.tags
assert "spider-max" not in url_event_2b.tags
url_event_3 = scan.make_event(
"http://www.evilcorp.com/3", "URL_UNVERIFIED", parent=url_event_2, tags="spider-danger"
"http://www.evilcorp.com/3", "URL_UNVERIFIED", parent=url_event_2b, tags="spider-danger"
)
assert url_event_3.web_spider_distance == 1
assert "spider-danger" in url_event_3.tags
assert "spider-max" not in url_event_3.tags
url_event_4 = scan.make_event("http://evilcorp.com", "URL_UNVERIFIED", parent=url_event_3)
url_event_4 = scan.make_event("http://evilcorp.com", "URL", parent=url_event_3, tags="status-200")
assert url_event_4.web_spider_distance == 0
assert "spider-danger" not in url_event_4.tags
assert "spider-max" not in url_event_4.tags
Expand Down

0 comments on commit 556d026

Please sign in to comment.