From 133e2efd8109e4945fa91d2d794e31ec6c2ab2ae Mon Sep 17 00:00:00 2001 From: kiblik <5609770+kiblik@users.noreply.github.com> Date: Tue, 12 Nov 2024 15:34:13 +0100 Subject: [PATCH] feat(DD_DEDUPLICATION_ALGORITHM_PER_PARSER + DD_HASHCODE_FIELDS_PER_SCANNER): Add checker of values --- docs/content/en/usage/features.md | 8 ++++---- dojo/settings/.settings.dist.py.sha256sum | 2 +- dojo/settings/settings.dist.py | 16 ++++++++++++++++ 3 files changed, 21 insertions(+), 5 deletions(-) diff --git a/docs/content/en/usage/features.md b/docs/content/en/usage/features.md index 5f99f34023f..7fad563b138 100644 --- a/docs/content/en/usage/features.md +++ b/docs/content/en/usage/features.md @@ -244,7 +244,7 @@ The environment variable will override the settings in `settings.dist.py`, repla The available algorithms are: -DEDUPE_ALGO_UNIQUE_ID_FROM_TOOL +DEDUPE_ALGO_UNIQUE_ID_FROM_TOOL (value for `DD_DEDUPLICATION_ALGORITHM_PER_PARSER`: `unique_id_from_tool`) : The deduplication occurs based on finding.unique_id_from_tool which is a unique technical id existing in the source tool. Few scanners populate this @@ -266,12 +266,12 @@ DEDUPE_ALGO_UNIQUE_ID_FROM_TOOL able to recognise that findings found in previous scans are actually the same as the new findings. -DEDUPE_ALGO_HASH_CODE +DEDUPE_ALGO_HASH_CODE (value for `DD_DEDUPLICATION_ALGORITHM_PER_PARSER`: `hash_code`) : The deduplication occurs based on finding.hash_code. The hash_code itself is configurable for each scanner in parameter `HASHCODE_FIELDS_PER_SCANNER`. -DEDUPE_ALGO_UNIQUE_ID_FROM_TOOL_OR_HASH_CODE +DEDUPE_ALGO_UNIQUE_ID_FROM_TOOL_OR_HASH_CODE (value for `DD_DEDUPLICATION_ALGORITHM_PER_PARSER`: `unique_id_from_tool_or_hash_code`) : A finding is a duplicate with another if they have the same unique_id_from_tool OR the same hash_code. @@ -284,7 +284,7 @@ DEDUPE_ALGO_UNIQUE_ID_FROM_TOOL_OR_HASH_CODE cross-parser deduplication -DEDUPE_ALGO_LEGACY +DEDUPE_ALGO_LEGACY (value for `DD_DEDUPLICATION_ALGORITHM_PER_PARSER`: `legacy`) : This is algorithm that was in place before the configuration per parser was made possible, and also the default one for backward compatibility reasons. diff --git a/dojo/settings/.settings.dist.py.sha256sum b/dojo/settings/.settings.dist.py.sha256sum index 59acc056a4b..ac4b313fa4c 100644 --- a/dojo/settings/.settings.dist.py.sha256sum +++ b/dojo/settings/.settings.dist.py.sha256sum @@ -1 +1 @@ -58e2f6cb0ed2c041fe2741d955b72cb7540bfb0923f489d6324717fcf00039da +4ecb73239a728a885aef8a93747fad416d2fc68666aa8f6223a9993d67f705f1 diff --git a/dojo/settings/settings.dist.py b/dojo/settings/settings.dist.py index 2571d99b0cf..6744b2fee61 100644 --- a/dojo/settings/settings.dist.py +++ b/dojo/settings/settings.dist.py @@ -1296,6 +1296,12 @@ def saml2_attrib_map_format(dict): if len(env("DD_HASHCODE_FIELDS_PER_SCANNER")) > 0: env_hashcode_fields_per_scanner = json.loads(env("DD_HASHCODE_FIELDS_PER_SCANNER")) for key, value in env_hashcode_fields_per_scanner.items(): + if not isinstance(value, list): + msg = f"Fields definition '{value}' for hashcode calculation of '{key}' is not valid. It needs to be list of strings but it is {type(value)}." + raise TypeError(msg) + if not all(isinstance(field, str) for field in value): + msg = f"Fields for hashcode calculation for {key} are not valid. It needs to be list of strings. Some of fields are not string." + raise AttributeError(msg) if key in HASHCODE_FIELDS_PER_SCANNER: logger.info(f"Replacing {key} with value {value} (previously set to {HASHCODE_FIELDS_PER_SCANNER[key]}) from env var DD_HASHCODE_FIELDS_PER_SCANNER") HASHCODE_FIELDS_PER_SCANNER[key] = value @@ -1377,6 +1383,13 @@ def saml2_attrib_map_format(dict): # Makes it possible to deduplicate on a technical id (same parser) and also on some functional fields (cross-parsers deduplication) DEDUPE_ALGO_UNIQUE_ID_FROM_TOOL_OR_HASH_CODE = "unique_id_from_tool_or_hash_code" +DEDUPE_ALGOS = [ + DEDUPE_ALGO_LEGACY, + DEDUPE_ALGO_UNIQUE_ID_FROM_TOOL, + DEDUPE_ALGO_HASH_CODE, + DEDUPE_ALGO_UNIQUE_ID_FROM_TOOL_OR_HASH_CODE, +] + # Allows to deduplicate with endpoints if endpoints is not included in the hashcode. # Possible values are: scheme, host, port, path, query, fragment, userinfo, and user. For a details description see https://hyperlink.readthedocs.io/en/latest/api.html#attributes. # Example: @@ -1526,6 +1539,9 @@ def saml2_attrib_map_format(dict): if len(env("DD_DEDUPLICATION_ALGORITHM_PER_PARSER")) > 0: env_dedup_algorithm_per_parser = json.loads(env("DD_DEDUPLICATION_ALGORITHM_PER_PARSER")) for key, value in env_dedup_algorithm_per_parser.items(): + if value not in DEDUPE_ALGOS: + msg = f"DEDUP algorithm '{value}' for '{key}' is not valid. Use one of following values: {', '.join(DEDUPE_ALGOS)}" + raise AttributeError(msg) if key in DEDUPLICATION_ALGORITHM_PER_PARSER: logger.info(f"Replacing {key} with value {value} (previously set to {DEDUPLICATION_ALGORITHM_PER_PARSER[key]}) from env var DD_DEDUPLICATION_ALGORITHM_PER_PARSER") DEDUPLICATION_ALGORITHM_PER_PARSER[key] = value