diff --git a/crawlers/website_crawler.py b/crawlers/website_crawler.py index c7c58f4..845d8ef 100644 --- a/crawlers/website_crawler.py +++ b/crawlers/website_crawler.py @@ -79,7 +79,6 @@ def crawl(self) -> None: pos_regex=self.pos_regex, neg_regex=self.neg_regex, indexer=self.indexer, visited=set(), verbose=self.indexer.verbose) urls = clean_urls(urls_set, keep_query_params) - urls = list(set(urls_set)) else: logging.info(f"Unknown pages_source: {self.cfg.website_crawler.pages_source}") return