diff --git a/artemis/module_base.py b/artemis/module_base.py index cc0d77741..43296d026 100644 --- a/artemis/module_base.py +++ b/artemis/module_base.py @@ -20,7 +20,7 @@ from artemis.db import DB from artemis.domains import is_domain from artemis.redis_cache import RedisCache -from artemis.resolvers import lookup +from artemis.resolvers import NoAnswer, ResolutionException, lookup from artemis.resource_lock import FailedToAcquireLockException, ResourceLock from artemis.retrying_resolver import setup_retrying_resolver from artemis.task_utils import ( @@ -123,6 +123,52 @@ def add_task(self, current_task: Task, new_task: Task) -> None: else: self.log.info("Task is not a new task, not adding: %s", new_task) + def add_task_if_domain_exists(self, current_task: Task, new_task: Task) -> None: + """ + Add a new task if the domain in the task payload exists. + + Args: + current_task (Task): The current task being processed. + new_task (Task): The new task to potentially add. + """ + domain = new_task.payload.get("domain") + if not domain: + self.log.info("No domain found in new task payload - adding it, as it might be an IP task") + self.add_task(current_task, new_task) + return + + if self.check_domain_exists(domain): + self.add_task(current_task, new_task) + else: + self.log.info("Skipping invalid domain: %s", domain) + + def check_domain_exists(self, domain: str) -> bool: + """ + Check if a domain exists by looking up its NS and A records. + + Args: + domain (str): The domain to check. + + Returns: + bool: True if the domain exists, False otherwise. + """ + try: + # Check for NS records + try: + ns_records = lookup(domain, "NS") + if ns_records: + return True + except NoAnswer: + # No NS records, continue to check A records + pass + + # Check for A records + a_records = lookup(domain, "A") + return len(a_records) > 0 # returns true if found + + except ResolutionException: + return False + def loop(self) -> None: """ Differs from the original karton implementation: consumes the tasks in random order, so that diff --git a/artemis/modules/classifier.py b/artemis/modules/classifier.py index 4c1206423..9e3d9d51a 100644 --- a/artemis/modules/classifier.py +++ b/artemis/modules/classifier.py @@ -241,7 +241,7 @@ def run(self, current_task: Task) -> None: }, ) - self.add_task(current_task, new_task) + self.add_task_if_domain_exists(current_task, new_task) if __name__ == "__main__": diff --git a/artemis/modules/subdomain_enumeration.py b/artemis/modules/subdomain_enumeration.py index 78ea9c0be..913db073c 100644 --- a/artemis/modules/subdomain_enumeration.py +++ b/artemis/modules/subdomain_enumeration.py @@ -178,13 +178,14 @@ def run(self, current_task: Task) -> None: # We save the task as soon as we have results from a single tool so that other kartons can do something. for subdomain in valid_subdomains_from_tool: - task = Task( - {"type": TaskType.DOMAIN}, - payload={ - "domain": subdomain, - }, - ) - self.add_task(current_task, task) + if subdomain != domain: # ensure we are not adding the parent domain again + task = Task( + {"type": TaskType.DOMAIN}, + payload={ + "domain": subdomain, + }, + ) + self.add_task_if_domain_exists(current_task, task) valid_subdomains.update(valid_subdomains_from_tool) diff --git a/docker-compose.yaml b/docker-compose.yaml index eec603f13..909cdf120 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -94,7 +94,6 @@ services: restart: always volumes: ["./docker/karton.ini:/etc/karton/karton.ini", "${DOCKER_COMPOSE_ADDITIONAL_SHARED_DIRECTORY:-./shared}:/shared/"] - karton-dashboard: depends_on: [karton-system, karton-logger] env_file: .env @@ -158,7 +157,6 @@ services: restart: always volumes: ["./docker/karton.ini:/etc/karton/karton.ini", "${DOCKER_COMPOSE_ADDITIONAL_SHARED_DIRECTORY:-./shared}:/shared/"] - karton-http_service_to_url: <<: *artemis-build-or-image command: "python3 -m artemis.modules.http_service_to_url"