-
Notifications
You must be signed in to change notification settings - Fork 0
/
WebWorm.py
91 lines (74 loc) · 2.31 KB
/
WebWorm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import argparse
from Scraper import WebScraper
from TechDetector import detect_tech
RED = "\033[91m"
GREEN = "\033[92m"
BLUE = "\033[94m"
YELLOW = "\033[93m"
RESET = "\033[0m"
def print_banner():
banner = f"""
{YELLOW}__ __ _ __ __
\ \ / /__| |_\ \ / /__ _ __ _ __ ___
\ \ /\ / / _ \ '_ \ \ /\ / / _ \| '__| '_ ` _ \
\ V V / __/ |_) \ V V / (_) | | | | | | | |
\_/\_/ \___|_.__/ \_/\_/ \___/|_| |_| |_| |_|{RESET}
"""
print(banner)
def main():
print_banner()
parser = argparse.ArgumentParser(
description=f"{GREEN}WebWorm: A tool to scrape and download files from a website.{RESET}"
)
parser.add_argument(
"-e",
"--extensions",
type=str,
help=f'{YELLOW}Comma-separated list of file extensions to scrape (e.g., "jpg,png,docx"). If not specified, all files will be scraped.{RESET}',
)
parser.add_argument(
"-d",
"--depth",
type=int,
default=1,
help=f"{YELLOW}The maximum depth to crawl the website. Default is 1.{RESET}",
)
parser.add_argument(
"url",
type=str,
help=f"{YELLOW}The URL of the website to scrape.{RESET}",
)
parser.add_argument(
"-t",
"--tech",
action="store_true",
help=f"{YELLOW}Detect technologies used on the website.{RESET}",
)
args = parser.parse_args()
if not args.url.startswith("http"):
print(f"{RED}Error: URL must start with 'http' or 'https'.{RESET}")
exit(1)
if args.tech:
techs = detect_tech(args.url)
print(f"{BLUE}{techs}{RESET}")
exit(0)
if args.depth < 1:
print(f"{RED}Error: Depth must be greater than 0.{RESET}")
exit(1)
extensions = []
if args.extensions:
extensions = [
"." + ext if not ext.startswith(".") else ext
for ext in args.extensions.split(",")
]
if extensions:
print(
f"{GREEN}Scraping for files with extensions: {', '.join(extensions)}{RESET}"
)
else:
print(f"{GREEN}Scraping for all files.{RESET}")
print(f"{GREEN}Maximum crawl depth: {args.depth}{RESET}")
scraper = WebScraper(args.url, args.depth, extensions)
scraper.start_scraping()
if __name__ == "__main__":
main()