From 0e706a1db1998d01df23dfcd4ec555b8d8140360 Mon Sep 17 00:00:00 2001 From: Donny Peeters Date: Thu, 8 Aug 2024 16:43:47 +0200 Subject: [PATCH 01/17] First setup Signed-off-by: Donny Peeters --- boefjes/boefjes/dependencies/plugins.py | 7 ++++++- boefjes/boefjes/local_repository.py | 3 ++- boefjes/boefjes/models.py | 15 ++++++++++++++- boefjes/boefjes/plugins/models.py | 13 +++++++++++++ boefjes/boefjes/sql/db_models.py | 1 + boefjes/boefjes/sql/plugin_storage.py | 2 ++ 6 files changed, 38 insertions(+), 3 deletions(-) diff --git a/boefjes/boefjes/dependencies/plugins.py b/boefjes/boefjes/dependencies/plugins.py index c6f922ce9d4..ccb3187c0e2 100644 --- a/boefjes/boefjes/dependencies/plugins.py +++ b/boefjes/boefjes/dependencies/plugins.py @@ -158,7 +158,12 @@ def delete_settings(self, organisation_id: str, plugin_id: str): self.set_enabled_by_id(plugin_id, organisation_id, False) def schema(self, plugin_id: str) -> dict | None: - return self.local_repo.schema(plugin_id) + try: + boefje = self.plugin_storage.boefje_by_id(plugin_id) + + return boefje.schema + except PluginNotFound: + return self.local_repo.schema(plugin_id) def cover(self, plugin_id: str) -> Path: try: diff --git a/boefjes/boefjes/local_repository.py b/boefjes/boefjes/local_repository.py index 4e53054fe43..b58567b3023 100644 --- a/boefjes/boefjes/local_repository.py +++ b/boefjes/boefjes/local_repository.py @@ -14,6 +14,7 @@ BoefjeResource, ModuleException, NormalizerResource, + SCHEMA_FILE, ) logger = structlog.get_logger(__name__) @@ -52,7 +53,7 @@ def schema(self, id_: str) -> dict | None: if id_ not in boefjes: return None - path = boefjes[id_].path / "schema.json" + path = boefjes[id_].path / SCHEMA_FILE if not path.exists(): logger.debug("Did not find schema for boefje %s", boefjes[id_]) diff --git a/boefjes/boefjes/models.py b/boefjes/boefjes/models.py index 69d8cc2c637..266eb776523 100644 --- a/boefjes/boefjes/models.py +++ b/boefjes/boefjes/models.py @@ -2,7 +2,8 @@ from enum import Enum from typing import Literal -from pydantic import BaseModel, Field +from jsonschema.validators import Draft202012Validator +from pydantic import BaseModel, Field, field_validator class Organisation(BaseModel): @@ -29,10 +30,22 @@ class Boefje(Plugin): scan_level: int = 1 consumes: set[str] = Field(default_factory=set) produces: set[str] = Field(default_factory=set) + schema: dict = Field(default_factory=dict) runnable_hash: str | None = None oci_image: str | None = None oci_arguments: list[str] = Field(default_factory=list) + @field_validator("schema") + @classmethod + def json_schema_valid(cls, schema: dict) -> dict: + val = Draft202012Validator({}) + val.check_schema(schema) + + return schema + + class Config: + validate_assignment = True + class Normalizer(Plugin): type: Literal["normalizer"] = "normalizer" diff --git a/boefjes/boefjes/plugins/models.py b/boefjes/boefjes/plugins/models.py index 5a2669ffbc4..91af854f576 100644 --- a/boefjes/boefjes/plugins/models.py +++ b/boefjes/boefjes/plugins/models.py @@ -1,15 +1,20 @@ import hashlib +import json from enum import Enum from importlib import import_module from inspect import isfunction, signature +from json import JSONDecodeError from pathlib import Path from typing import Protocol +from jsonschema.exceptions import SchemaError + from boefjes.models import Boefje, Normalizer BOEFJES_DIR = Path(__file__).parent BOEFJE_DEFINITION_FILE = "boefje.json" +SCHEMA_FILE = "schema.json" NORMALIZER_DEFINITION_FILE = "normalizer.json" ENTRYPOINT_BOEFJES = "main.py" ENTRYPOINT_NORMALIZERS = "normalize.py" @@ -62,6 +67,14 @@ def __init__(self, path: Path, package: str): if (path / ENTRYPOINT_BOEFJES).exists(): self.module = get_runnable_module_from_package(package, ENTRYPOINT_BOEFJES, parameter_count=1) + if (path / SCHEMA_FILE).exists(): + try: + self.boefje.schema = json.load((path / SCHEMA_FILE).open()) + except JSONDecodeError as e: + raise ModuleException("Invalid schema file") from e + except SchemaError as e: + raise ModuleException("Invalid schema") from e + class NormalizerResource: """Represents a Normalizer package that we can run. Throws a ModuleException if any validation fails.""" diff --git a/boefjes/boefjes/sql/db_models.py b/boefjes/boefjes/sql/db_models.py index 1ecd0cc0086..e8e9740f8f4 100644 --- a/boefjes/boefjes/sql/db_models.py +++ b/boefjes/boefjes/sql/db_models.py @@ -76,6 +76,7 @@ class BoefjeInDB(SQL_BASE): consumes = Column(types.ARRAY(types.String(length=128)), default=lambda: [], nullable=False) produces = Column(types.ARRAY(types.String(length=128)), default=lambda: [], nullable=False) environment_keys = Column(types.ARRAY(types.String(length=128)), default=lambda: [], nullable=False) + schema = Column(types.JSON(), nullable=True) # Image specifications oci_image = Column(types.String(length=256), nullable=True) diff --git a/boefjes/boefjes/sql/plugin_storage.py b/boefjes/boefjes/sql/plugin_storage.py index 2dddd0ad822..c9bcffbaa2f 100644 --- a/boefjes/boefjes/sql/plugin_storage.py +++ b/boefjes/boefjes/sql/plugin_storage.py @@ -110,6 +110,7 @@ def to_boefje_in_db(boefje: Boefje) -> BoefjeInDB: scan_level=str(boefje.scan_level), consumes=boefje.consumes, produces=boefje.produces, + schema=boefje.schema, environment_keys=boefje.environment_keys, oci_image=boefje.oci_image, oci_arguments=boefje.oci_arguments, @@ -142,6 +143,7 @@ def to_boefje(boefje_in_db: BoefjeInDB) -> Boefje: scan_level=int(boefje_in_db.scan_level), consumes=boefje_in_db.consumes, produces=boefje_in_db.produces, + schema=boefje_in_db.schema, environment_keys=boefje_in_db.environment_keys, oci_image=boefje_in_db.oci_image, oci_arguments=boefje_in_db.oci_arguments, From f5005430d1f9e6b2169f37d8151cb868292d13b4 Mon Sep 17 00:00:00 2001 From: Donny Peeters Date: Thu, 8 Aug 2024 16:45:05 +0200 Subject: [PATCH 02/17] Make schema matching for enabling optional from the API perspective Signed-off-by: Donny Peeters --- boefjes/boefjes/dependencies/plugins.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/boefjes/boefjes/dependencies/plugins.py b/boefjes/boefjes/dependencies/plugins.py index c6f922ce9d4..7600457141d 100644 --- a/boefjes/boefjes/dependencies/plugins.py +++ b/boefjes/boefjes/dependencies/plugins.py @@ -150,12 +150,7 @@ def _put_normalizer(self, normalizer_id: str) -> None: def delete_settings(self, organisation_id: str, plugin_id: str): self.config_storage.delete(organisation_id, plugin_id) - try: - self._assert_settings_match_schema({}, organisation_id, plugin_id) - except SettingsNotConformingToSchema: - logger.warning("Making sure %s is disabled for %s because settings are deleted", plugin_id, organisation_id) - - self.set_enabled_by_id(plugin_id, organisation_id, False) + # We don't check the schema anymore because we can provide entries through the global environment as well def schema(self, plugin_id: str) -> dict | None: return self.local_repo.schema(plugin_id) @@ -179,9 +174,7 @@ def description(self, plugin_id: str, organisation_id: str) -> str: return "" def set_enabled_by_id(self, plugin_id: str, organisation_id: str, enabled: bool): - if enabled: - all_settings = self.get_all_settings(organisation_id, plugin_id) - self._assert_settings_match_schema(all_settings, organisation_id, plugin_id) + # We don't check the schema anymore because we can provide entries through the global environment as well try: self._put_boefje(plugin_id) From e2db672cf94a9a82f828cc6f01955936d6b2b2e3 Mon Sep 17 00:00:00 2001 From: Donny Peeters Date: Thu, 8 Aug 2024 17:05:58 +0200 Subject: [PATCH 03/17] Add migration and set the schema for local plugins Signed-off-by: Donny Peeters --- boefjes/boefjes/local_repository.py | 2 +- ..._introduce_schema_field_to_boefje_model.py | 53 +++++++++++++++++++ 2 files changed, 54 insertions(+), 1 deletion(-) create mode 100644 boefjes/boefjes/migrations/versions/5be152459a7b_introduce_schema_field_to_boefje_model.py diff --git a/boefjes/boefjes/local_repository.py b/boefjes/boefjes/local_repository.py index b58567b3023..1370c6a8779 100644 --- a/boefjes/boefjes/local_repository.py +++ b/boefjes/boefjes/local_repository.py @@ -11,10 +11,10 @@ BOEFJES_DIR, ENTRYPOINT_NORMALIZERS, NORMALIZER_DEFINITION_FILE, + SCHEMA_FILE, BoefjeResource, ModuleException, NormalizerResource, - SCHEMA_FILE, ) logger = structlog.get_logger(__name__) diff --git a/boefjes/boefjes/migrations/versions/5be152459a7b_introduce_schema_field_to_boefje_model.py b/boefjes/boefjes/migrations/versions/5be152459a7b_introduce_schema_field_to_boefje_model.py new file mode 100644 index 00000000000..a2f208ca2da --- /dev/null +++ b/boefjes/boefjes/migrations/versions/5be152459a7b_introduce_schema_field_to_boefje_model.py @@ -0,0 +1,53 @@ +"""Introduce schema field to Boefje model + +Revision ID: 5be152459a7b +Revises: f9de6eb7824b +Create Date: 2024-08-08 14:47:12.582017 + +""" +import logging + +from alembic import op +import sqlalchemy as sa +from sqlalchemy.orm import sessionmaker + +from boefjes.local_repository import get_local_repository +from boefjes.sql.plugin_storage import create_plugin_storage +from boefjes.storage.interfaces import PluginNotFound + +# revision identifiers, used by Alembic. +revision = '5be152459a7b' +down_revision = 'f9de6eb7824b' +branch_labels = None +depends_on = None + +logger = logging.getLogger(__name__) + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('boefje', sa.Column('schema', sa.JSON(), nullable=True)) + + local_repo = get_local_repository() + connection = op.get_bind() + session = sessionmaker(bind=connection)() + + with create_plugin_storage(session) as storage: + for plugin in local_repo.get_all(): + if schema := local_repo.schema(plugin.id): + try: + # This way we avoid the safeguard that updating static boefjes is not allowed + instance = storage._db_boefje_instance_by_id(plugin.id) + instance.schema = schema + storage.session.add(instance) + except PluginNotFound: + logger.exception("Could not set schema for plugin %s", plugin.id) + continue + + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column('boefje', 'schema') + # ### end Alembic commands ### From d0877dd45119cfd510ae8db88208aa20e7b258d3 Mon Sep 17 00:00:00 2001 From: Donny Peeters Date: Sat, 10 Aug 2024 17:38:41 +0200 Subject: [PATCH 04/17] Fix migration tests --- ..._introduce_schema_field_to_boefje_model.py | 15 +- ...de6eb7824b_introduce_boefjeconfig_model.py | 199 ++++++++++++------ .../test_remove_repository_migration.py | 7 +- ...est_settings_to_boefje_config_migration.py | 10 +- 4 files changed, 147 insertions(+), 84 deletions(-) diff --git a/boefjes/boefjes/migrations/versions/5be152459a7b_introduce_schema_field_to_boefje_model.py b/boefjes/boefjes/migrations/versions/5be152459a7b_introduce_schema_field_to_boefje_model.py index a2f208ca2da..f38f5963298 100644 --- a/boefjes/boefjes/migrations/versions/5be152459a7b_introduce_schema_field_to_boefje_model.py +++ b/boefjes/boefjes/migrations/versions/5be152459a7b_introduce_schema_field_to_boefje_model.py @@ -5,10 +5,11 @@ Create Date: 2024-08-08 14:47:12.582017 """ + import logging -from alembic import op import sqlalchemy as sa +from alembic import op from sqlalchemy.orm import sessionmaker from boefjes.local_repository import get_local_repository @@ -16,8 +17,8 @@ from boefjes.storage.interfaces import PluginNotFound # revision identifiers, used by Alembic. -revision = '5be152459a7b' -down_revision = 'f9de6eb7824b' +revision = "5be152459a7b" +down_revision = "f9de6eb7824b" branch_labels = None depends_on = None @@ -26,11 +27,10 @@ def upgrade() -> None: # ### commands auto generated by Alembic - please adjust! ### - op.add_column('boefje', sa.Column('schema', sa.JSON(), nullable=True)) + op.add_column("boefje", sa.Column("schema", sa.JSON(), nullable=True)) local_repo = get_local_repository() - connection = op.get_bind() - session = sessionmaker(bind=connection)() + session = sessionmaker(bind=op.get_bind())() with create_plugin_storage(session) as storage: for plugin in local_repo.get_all(): @@ -44,10 +44,11 @@ def upgrade() -> None: logger.exception("Could not set schema for plugin %s", plugin.id) continue + session.close() # ### end Alembic commands ### def downgrade() -> None: # ### commands auto generated by Alembic - please adjust! ### - op.drop_column('boefje', 'schema') + op.drop_column("boefje", "schema") # ### end Alembic commands ### diff --git a/boefjes/boefjes/migrations/versions/f9de6eb7824b_introduce_boefjeconfig_model.py b/boefjes/boefjes/migrations/versions/f9de6eb7824b_introduce_boefjeconfig_model.py index ffa35930368..40a44d504d3 100644 --- a/boefjes/boefjes/migrations/versions/f9de6eb7824b_introduce_boefjeconfig_model.py +++ b/boefjes/boefjes/migrations/versions/f9de6eb7824b_introduce_boefjeconfig_model.py @@ -10,11 +10,12 @@ import sqlalchemy as sa from alembic import op -from sqlalchemy.orm import sessionmaker +from psycopg2._json import Json +from psycopg2.extensions import register_adapter +from psycopg2.extras import execute_values from boefjes.local_repository import get_local_repository -from boefjes.sql.plugin_storage import create_plugin_storage -from boefjes.storage.interfaces import PluginNotFound +from boefjes.models import Boefje, Normalizer # revision identifiers, used by Alembic. revision = "f9de6eb7824b" @@ -57,72 +58,136 @@ def upgrade() -> None: op.add_column("boefje", sa.Column("static", sa.Boolean(), server_default="false", nullable=False)) op.add_column("normalizer", sa.Column("static", sa.Boolean(), server_default="false", nullable=False)) + register_adapter(dict, Json) + local_plugins = {plugin.id: plugin for plugin in get_local_repository().get_all()} connection = op.get_bind() - session = sessionmaker(bind=connection)() - - with create_plugin_storage(session) as storage: - # Get unique plugin_ids from the settings table for boefjes that do not exist yet in the database - query = """ - SELECT DISTINCT s.plugin_id FROM settings s left join boefje b on b.plugin_id = s.plugin_id - where b.plugin_id IS NULL - """ - for plugin_id_output in op.get_bind().execute(query).fetchall(): - plugin_id = plugin_id_output[0] - if plugin_id not in local_plugins: - raise ValueError(f"Invalid plugin id found: {plugin_id}") - - # Since settings are boefje-only at this moment - if local_plugins[plugin_id].type != "boefje": - raise ValueError(f"Settings for normalizer or bit found: {plugin_id}. Remove these entries first.") - - try: - storage.boefje_by_id(plugin_id) - continue # The Boefje already exists - except PluginNotFound: - pass # The raw query bypasses the session "cache", so this just checks for duplicates - - storage.create_boefje(local_plugins[plugin_id]) # type: ignore - - query = """ - SELECT DISTINCT p.plugin_id FROM plugin_state p left join boefje b on b.plugin_id = p.plugin_id - where b.plugin_id IS NULL - """ - - for plugin_id_output in op.get_bind().execute(query).fetchall(): - plugin_id = plugin_id_output[0] - if plugin_id not in local_plugins: - logger.warning("Unknown plugin id found: %s. You might have to re-enable the plugin!", plugin_id) - continue - - try: - storage.boefje_by_id(plugin_id) - continue # The Boefje already exists - except PluginNotFound: - pass # The raw query bypasses the session "cache", so this just checks for duplicates - - if local_plugins[plugin_id].type == "boefje": - storage.create_boefje(local_plugins[plugin_id]) # type: ignore - - query = """ - SELECT DISTINCT p.plugin_id FROM plugin_state p left join normalizer n on n.plugin_id = p.plugin_id - where n.plugin_id IS NULL - """ - - for plugin_id_output in op.get_bind().execute(query).fetchall(): - plugin_id = plugin_id_output[0] - if plugin_id not in local_plugins: - logger.warning("Unknown plugin id found: %s. You might have to re-enable the plugin!", plugin_id) - continue - - try: - storage.normalizer_by_id(plugin_id) - continue # The Normalizer already exists - except PluginNotFound: - pass # The raw query bypasses the session "cache", so this just checks for duplicates - - if local_plugins[plugin_id].type == "normalizer": - storage.create_normalizer(local_plugins[plugin_id]) # type: ignore + + # Get unique plugin_ids from the settings table for boefjes that do not exist yet in the database + query = """ + SELECT DISTINCT s.plugin_id FROM settings s left join boefje b on b.plugin_id = s.plugin_id + where b.plugin_id IS NULL + """ # noqa: S608 + + to_insert: list[Boefje] = [] + + for plugin_id_output in connection.execute(query).fetchall(): + plugin_id = plugin_id_output[0] + if plugin_id not in local_plugins: + raise ValueError(f"Invalid plugin id found: {plugin_id}") + + # Since settings are boefje-only at this moment + if local_plugins[plugin_id].type != "boefje": + raise ValueError(f"Settings for normalizer or bit found: {plugin_id}. Remove these entries first.") + + res = connection.execute(f"SELECT id FROM boefje where plugin_id = '{plugin_id}'") # noqa: S608 + if res.fetchone() is not None: + continue # The Boefje already exists + + if local_plugins[plugin_id].type == "boefje": + to_insert.append(local_plugins[plugin_id]) + + entries = [ + ( + boefje.id, + boefje.name, + boefje.description, + str(boefje.scan_level), + list(boefje.consumes), + list(boefje.produces), + boefje.environment_keys, + boefje.oci_image, + boefje.oci_arguments, + boefje.version, + ) + for boefje in to_insert + ] + query = """INSERT INTO boefje (plugin_id, name, description, scan_level, consumes, produces, environment_keys, + oci_image, oci_arguments, version) values %s""" + + with connection.begin(): + cursor = connection.connection.cursor() + execute_values(cursor, query, entries) + + to_insert = [] + + query = """ + SELECT DISTINCT p.plugin_id FROM plugin_state p left join boefje b on b.plugin_id = p.plugin_id + where b.plugin_id IS NULL + """ + + for plugin_id_output in connection.execute(query).fetchall(): + plugin_id = plugin_id_output[0] + if plugin_id not in local_plugins: + logger.warning("Unknown plugin id found: %s. You might have to re-enable the plugin!", plugin_id) + continue + + res = connection.execute(f"SELECT id FROM boefje where plugin_id = '{plugin_id}'") # noqa: S608 + if res.fetchone() is not None: + continue # The Boefje already exists + + if local_plugins[plugin_id].type == "boefje": + to_insert.append(local_plugins[plugin_id]) + + entries = [ + ( + boefje.id, + boefje.name, + boefje.description, + str(boefje.scan_level), + list(boefje.consumes), + list(boefje.produces), + boefje.environment_keys, + boefje.oci_image, + boefje.oci_arguments, + boefje.version, + ) + for boefje in to_insert + ] + query = """INSERT INTO boefje (plugin_id, name, description, scan_level, consumes, produces, environment_keys, + oci_image, oci_arguments, version) values %s""" # noqa: S608 + + with connection.begin(): + cursor = connection.connection.cursor() + execute_values(cursor, query, entries) + + normalizers_to_insert: list[Normalizer] = [] + query = """ + SELECT DISTINCT p.plugin_id FROM plugin_state p left join normalizer n on n.plugin_id = p.plugin_id + where n.plugin_id IS NULL + """ # noqa: S608 + + for plugin_id_output in connection.execute(query).fetchall(): + plugin_id = plugin_id_output[0] + if plugin_id not in local_plugins: + logger.warning("Unknown plugin id found: %s. You might have to re-enable the plugin!", plugin_id) + continue + + res = connection.execute(f"SELECT id FROM normalizer where plugin_id = '{plugin_id}'") # noqa: S608 + if res.fetchone() is not None: + continue # The Normalizer already exists + + if local_plugins[plugin_id].type == "normalizer": + normalizers_to_insert.append(local_plugins[plugin_id]) + + normalizer_entries = [ + ( + normalizer.id, + normalizer.name, + normalizer.description, + normalizer.consumes, + normalizer.produces, + normalizer.environment_keys, + normalizer.version, + ) + for normalizer in normalizers_to_insert + ] + query = """INSERT INTO normalizer (plugin_id, name, description, consumes, produces, environment_keys, version) + values %s""" # noqa: S608 + + with connection.begin(): + cursor = connection.connection.cursor() + execute_values(cursor, query, normalizer_entries) with connection.begin(): connection.execute(""" diff --git a/boefjes/tests/integration/test_remove_repository_migration.py b/boefjes/tests/integration/test_remove_repository_migration.py index dd2ee7c8034..33379d42714 100644 --- a/boefjes/tests/integration/test_remove_repository_migration.py +++ b/boefjes/tests/integration/test_remove_repository_migration.py @@ -27,6 +27,8 @@ def setUp(self) -> None: storage.create(Organisation(id="dev1", name="Test 1 ")) storage.create(Organisation(id="dev2", name="Test 2 ")) + session.close() + entries = [(1, "LOCAL", "Repository Local", "https://local.com/")] query = f"INSERT INTO repository (pk, id, name, base_url) values {','.join(map(str, entries))}" # noqa: S608 self.engine.execute(text(query)) @@ -44,7 +46,6 @@ def setUp(self) -> None: f"INSERT INTO organisation_repository (repository_pk, organisation_pk) values {','.join(map(str, entries))}" # noqa: S608 ) self.engine.execute(text(query)) - session.close() def test_fail_on_non_unique(self): session = sessionmaker(bind=self.engine)() @@ -76,8 +77,6 @@ def test_fail_on_non_unique(self): session.close() def test_downgrade(self): - session = sessionmaker(bind=self.engine)() - self.engine.execute(text("DELETE FROM plugin_state WHERE id = 2")) # Fix unique constraint fails alembic.config.main(argv=["--config", "/app/boefjes/boefjes/alembic.ini", "upgrade", "7c88b9cd96aa"]) alembic.config.main(argv=["--config", "/app/boefjes/boefjes/alembic.ini", "downgrade", "-1"]) @@ -88,8 +87,6 @@ def test_downgrade(self): (1, "LOCAL", "Local Plugin Repository", "http://dev/null") ] - session.close() - def tearDown(self) -> None: self.engine.execute(text("DELETE FROM plugin_state")) # Fix unique constraint fails diff --git a/boefjes/tests/integration/test_settings_to_boefje_config_migration.py b/boefjes/tests/integration/test_settings_to_boefje_config_migration.py index 35e7670f614..261a965240c 100644 --- a/boefjes/tests/integration/test_settings_to_boefje_config_migration.py +++ b/boefjes/tests/integration/test_settings_to_boefje_config_migration.py @@ -10,7 +10,6 @@ from boefjes.sql.config_storage import SQLConfigStorage, create_encrypter from boefjes.sql.db import SQL_BASE, get_engine from boefjes.sql.organisation_storage import SQLOrganisationStorage -from boefjes.sql.plugin_storage import SQLPluginStorage @skipIf(os.environ.get("CI") != "1", "Needs a CI database.") @@ -29,6 +28,8 @@ def setUp(self) -> None: storage.create(Organisation(id="dev1", name="Test 1 ")) storage.create(Organisation(id="dev2", name="Test 2 ")) + session.close() + encrypter = create_encrypter() entries = [ (1, encrypter.encode('{"key1": "val1"}'), "dns-records", 1), @@ -44,8 +45,6 @@ def setUp(self) -> None: ) self.engine.execute(text(query)) - session.close() - def test_fail_on_wrong_plugin_ids(self): session = sessionmaker(bind=self.engine)() @@ -73,16 +72,17 @@ def test_fail_on_wrong_plugin_ids(self): alembic.config.main(argv=["--config", "/app/boefjes/boefjes/alembic.ini", "upgrade", "f9de6eb7824b"]) - assert SQLPluginStorage(session, settings).boefje_by_id("dns-records").id == "dns-records" + assert self.engine.execute(text("SELECT id FROM boefje WHERE plugin_id = 'dns-records'")).fetchall() == [(2,)] config_storage = SQLConfigStorage(session, encrypter) assert config_storage.get_all_settings("dev1", "dns-records") == {"key1": "val1"} - assert config_storage.get_all_settings("dev2", "dns-records") == {"key1": "val1", "key2": "val2"} assert config_storage.get_all_settings("dev1", "nmap-udp") == {} + assert config_storage.get_all_settings("dev2", "dns-records") == {"key1": "val1", "key2": "val2"} assert config_storage.is_enabled_by_id("dns-records", "dev1") assert config_storage.is_enabled_by_id("nmap-udp", "dev1") + session.commit() session.close() def test_downgrade(self): From 622db58e8cbf9beeb6639980ae66d5774b17c208 Mon Sep 17 00:00:00 2001 From: Donny Peeters Date: Mon, 12 Aug 2024 10:37:10 +0200 Subject: [PATCH 05/17] Test the migration Signed-off-by: Donny Peeters --- ..._introduce_schema_field_to_boefje_model.py | 3 +- .../test_migration_add_schema_field.py | 163 ++++++++++++++++++ 2 files changed, 165 insertions(+), 1 deletion(-) create mode 100644 boefjes/tests/integration/test_migration_add_schema_field.py diff --git a/boefjes/boefjes/migrations/versions/5be152459a7b_introduce_schema_field_to_boefje_model.py b/boefjes/boefjes/migrations/versions/5be152459a7b_introduce_schema_field_to_boefje_model.py index f38f5963298..c462c72d0b5 100644 --- a/boefjes/boefjes/migrations/versions/5be152459a7b_introduce_schema_field_to_boefje_model.py +++ b/boefjes/boefjes/migrations/versions/5be152459a7b_introduce_schema_field_to_boefje_model.py @@ -40,8 +40,9 @@ def upgrade() -> None: instance = storage._db_boefje_instance_by_id(plugin.id) instance.schema = schema storage.session.add(instance) + logger.info("Updated database entry for plugin %s", plugin.id) except PluginNotFound: - logger.exception("Could not set schema for plugin %s", plugin.id) + logger.info("No database entry for plugin %s", plugin.id) continue session.close() diff --git a/boefjes/tests/integration/test_migration_add_schema_field.py b/boefjes/tests/integration/test_migration_add_schema_field.py new file mode 100644 index 00000000000..3834af44fa5 --- /dev/null +++ b/boefjes/tests/integration/test_migration_add_schema_field.py @@ -0,0 +1,163 @@ +import os +from unittest import TestCase, skipIf + +import alembic.config +from psycopg2.extras import execute_values +from sqlalchemy.orm import sessionmaker + +from boefjes.local_repository import get_local_repository +from boefjes.sql.db import SQL_BASE, get_engine + + +@skipIf(os.environ.get("CI") != "1", "Needs a CI database.") +class TestSettingsToBoefjeConfig(TestCase): + def setUp(self) -> None: + self.engine = get_engine() + + # To reset autoincrement ids + alembic.config.main(argv=["--config", "/app/boefjes/boefjes/alembic.ini", "downgrade", "base"]) + # Set state to revision 6f99834a4a5a + alembic.config.main(argv=["--config", "/app/boefjes/boefjes/alembic.ini", "upgrade", "f9de6eb7824b"]) + + dns_records = get_local_repository().by_id("dns-records") + nmap_udp = get_local_repository().by_id("nmap-udp") + + entries = [ + ( + boefje.id, + boefje.name, + boefje.description, + str(boefje.scan_level), + list(boefje.consumes), + list(boefje.produces), + boefje.environment_keys, + boefje.oci_image, + boefje.oci_arguments, + boefje.version, + ) + for boefje in [dns_records, nmap_udp] + ] + + connection = self.engine.connect() + query = """INSERT INTO boefje (plugin_id, name, description, scan_level, consumes, produces, environment_keys, + oci_image, oci_arguments, version) values %s""" + + with connection.begin(): + execute_values(connection.connection.cursor(), query, entries) + + def test_fail_on_wrong_plugin_ids(self): + assert self.engine.execute("SELECT * from boefje").fetchall() == [ + ( + 1, + "dns-records", + None, + "DnsRecords", + "Fetch the DNS record(s) of a hostname", + "1", + ["Hostname"], + ["boefje/dns-records"], + ["RECORD_TYPES", "REMOTE_NS"], + None, + [], + None, + False, + ), + ( + 2, + "nmap-udp", + None, + "Nmap UDP", + "Defaults to top 250 UDP ports. Includes service detection.", + "2", + ["IPAddressV6", "IPAddressV4"], + ["boefje/nmap-udp"], + ["TOP_PORTS_UDP"], + "ghcr.io/minvws/openkat/nmap:latest", + ["--open", "-T4", "-Pn", "-r", "-v10", "-sV", "-sU"], + None, + False, + ), + ] + + alembic.config.main(argv=["--config", "/app/boefjes/boefjes/alembic.ini", "upgrade", "5be152459a7b"]) + + schema_dns = { + "title": "Arguments", + "type": "object", + "properties": { + "RECORD_TYPES": { + "title": "RECORD_TYPES", + "type": "string", + "description": "List of comma separated DNS record types to query for.", + "default": "A,AAAA,CAA,CERT,RP,SRV,TXT,MX,NS,CNAME,DNAME", + }, + "REMOTE_NS": { + "title": "REMOTE_NS", + "maxLength": 45, + "type": "string", + "description": "The IP address of the DNS resolver you want to use.", + "default": "1.1.1.1", + }, + }, + } + + schema_udp = { + "title": "Arguments", + "type": "object", + "properties": { + "TOP_PORTS_UDP": { + "title": "TOP_PORTS_UDP", + "type": "integer", + "minimum": 1, + "maximum": 65535, + "description": "Scan TOP_PORTS_UDP most common ports. Defaults to 250.", + } + }, + "required": [], + } + + assert self.engine.execute("SELECT * from boefje").fetchall() == [ + ( + 1, + "dns-records", + None, + "DnsRecords", + "Fetch the DNS record(s) of a hostname", + "1", + ["Hostname"], + ["boefje/dns-records"], + ["RECORD_TYPES", "REMOTE_NS"], + None, + [], + None, + False, + schema_dns, + ), + ( + 2, + "nmap-udp", + None, + "Nmap UDP", + "Defaults to top 250 UDP ports. Includes service detection.", + "2", + ["IPAddressV6", "IPAddressV4"], + ["boefje/nmap-udp"], + ["TOP_PORTS_UDP"], + "ghcr.io/minvws/openkat/nmap:latest", + ["--open", "-T4", "-Pn", "-r", "-v10", "-sV", "-sU"], + None, + False, + schema_udp, + ), + ] + + def tearDown(self) -> None: + alembic.config.main(argv=["--config", "/app/boefjes/boefjes/alembic.ini", "upgrade", "head"]) + + session = sessionmaker(bind=get_engine())() + + for table in SQL_BASE.metadata.tables: + session.execute(f"DELETE FROM {table} CASCADE") # noqa: S608 + + session.commit() + session.close() From a2664ff4e558363ce5f3e7a032b1813ff41db697 Mon Sep 17 00:00:00 2001 From: Donny Peeters Date: Mon, 12 Aug 2024 11:55:58 +0200 Subject: [PATCH 06/17] Add tests for API logic and fix some edge cases Signed-off-by: Donny Peeters --- boefjes/boefjes/katalogus/plugins.py | 15 +++- boefjes/boefjes/katalogus/root.py | 9 +++ boefjes/boefjes/models.py | 9 +-- boefjes/tests/integration/test_api.py | 57 +++++++++++--- .../test_migration_add_schema_field.py | 77 ++++++++++--------- 5 files changed, 114 insertions(+), 53 deletions(-) diff --git a/boefjes/boefjes/katalogus/plugins.py b/boefjes/boefjes/katalogus/plugins.py index 0928280af4a..134243ad963 100644 --- a/boefjes/boefjes/katalogus/plugins.py +++ b/boefjes/boefjes/katalogus/plugins.py @@ -3,7 +3,8 @@ from fastapi import APIRouter, Body, Depends, HTTPException, status from fastapi.responses import FileResponse, JSONResponse, Response -from pydantic import BaseModel, Field +from jsonschema.validators import Draft202012Validator +from pydantic import BaseModel, Field, field_validator from boefjes.dependencies.plugins import ( PluginService, @@ -90,6 +91,8 @@ def get_plugin( @router.post("/plugins", status_code=status.HTTP_201_CREATED) def add_plugin(plugin: PluginType, plugin_service: PluginService = Depends(get_plugin_service)): with plugin_service as service: + plugin.static = False # Creation through the API implies that these cannot be static + if plugin.type == "boefje": return service.create_boefje(plugin) @@ -124,9 +127,19 @@ class BoefjeIn(BaseModel): scan_level: int = 1 consumes: set[str] = Field(default_factory=set) produces: set[str] = Field(default_factory=set) + schema: dict | None = None oci_image: str | None = None oci_arguments: list[str] = Field(default_factory=list) + @field_validator("schema") + @classmethod + def json_schema_valid(cls, schema: dict | None) -> dict | None: + if schema is not None: + Draft202012Validator.check_schema(schema) + return schema + + return None + @router.patch("/boefjes/{boefje_id}", status_code=status.HTTP_204_NO_CONTENT) def update_boefje( diff --git a/boefjes/boefjes/katalogus/root.py b/boefjes/boefjes/katalogus/root.py index 5c62cea744b..8aa0c1683c5 100644 --- a/boefjes/boefjes/katalogus/root.py +++ b/boefjes/boefjes/katalogus/root.py @@ -5,6 +5,7 @@ import structlog from fastapi import APIRouter, FastAPI, Request, status from fastapi.responses import JSONResponse, RedirectResponse +from jsonschema.exceptions import SchemaError from opentelemetry import trace from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor @@ -96,6 +97,14 @@ def storage_error_handler(request: Request, exc: StorageError): ) +@app.exception_handler(SchemaError) +def schema_error_handler(request: Request, exc: StorageError): + return JSONResponse( + status_code=status.HTTP_400_BAD_REQUEST, + content={"message": "Invalid jsonschema provided"}, + ) + + class ServiceHealth(BaseModel): service: str healthy: bool = False diff --git a/boefjes/boefjes/models.py b/boefjes/boefjes/models.py index 266eb776523..4881b26008a 100644 --- a/boefjes/boefjes/models.py +++ b/boefjes/boefjes/models.py @@ -30,7 +30,7 @@ class Boefje(Plugin): scan_level: int = 1 consumes: set[str] = Field(default_factory=set) produces: set[str] = Field(default_factory=set) - schema: dict = Field(default_factory=dict) + schema: dict | None = None runnable_hash: str | None = None oci_image: str | None = None oci_arguments: list[str] = Field(default_factory=list) @@ -38,10 +38,9 @@ class Boefje(Plugin): @field_validator("schema") @classmethod def json_schema_valid(cls, schema: dict) -> dict: - val = Draft202012Validator({}) - val.check_schema(schema) - - return schema + if schema is not None: + Draft202012Validator.check_schema(schema) + return schema class Config: validate_assignment = True diff --git a/boefjes/tests/integration/test_api.py b/boefjes/tests/integration/test_api.py index 9df55695c16..2cc553d0a28 100644 --- a/boefjes/tests/integration/test_api.py +++ b/boefjes/tests/integration/test_api.py @@ -60,7 +60,7 @@ def test_cannot_add_plugin_reserved_id(self): self.assertEqual(response.status_code, 400) self.assertEqual(response.json(), {"message": "Plugin id 'dns-records' is already used"}) - normalizer = Normalizer(id="kat_nmap_normalize", name="My test normalizer", static=False) + normalizer = Normalizer(id="kat_nmap_normalize", name="My test normalizer") response = self.client.post(f"/v1/organisations/{self.org.id}/plugins", content=normalizer.json()) self.assertEqual(response.status_code, 400) self.assertEqual(response.json(), {"message": "Plugin id 'kat_nmap_normalize' is already used"}) @@ -115,8 +115,8 @@ def test_delete_normalizer(self): self.assertEqual(response.status_code, 404) def test_update_plugins(self): - normalizer = Normalizer(id="norm_id", name="My test normalizer", static=False) - boefje = Boefje(id="test_plugin", name="My test boefje", description="123", static=False) + normalizer = Normalizer(id="norm_id", name="My test normalizer") + boefje = Boefje(id="test_plugin", name="My test boefje", description="123") self.client.post(f"/v1/organisations/{self.org.id}/plugins", content=boefje.json()) self.client.patch(f"/v1/organisations/{self.org.id}/boefjes/{boefje.id}", json={"description": "4"}) @@ -126,6 +126,50 @@ def test_update_plugins(self): self.assertEqual(response.json()["description"], "4") self.assertTrue(response.json()["enabled"]) + self.client.post(f"/v1/organisations/{self.org.id}/plugins", content=normalizer.json()) + self.client.patch(f"/v1/organisations/{self.org.id}/normalizers/{normalizer.id}", json={"version": "v1.2"}) + + response = self.client.get(f"/v1/organisations/{self.org.id}/plugins/{normalizer.id}") + self.assertEqual(response.json()["version"], "v1.2") + + def test_cannot_create_boefje_with_invalid_schema(self): + boefje = Boefje(id="test_plugin", name="My test boefje", description="123").model_dump(mode="json") + boefje["schema"] = {"$schema": 3} + + r = self.client.post(f"/v1/organisations/{self.org.id}/plugins", json=boefje) + self.assertEqual(r.status_code, 400) + + def test_update_boefje_schema(self): + boefje = Boefje(id="test_plugin", name="My test boefje", description="123") + self.client.post(f"/v1/organisations/{self.org.id}/plugins", content=boefje.json()) + + r = self.client.patch(f"/v1/organisations/{self.org.id}/boefjes/{boefje.id}", json={"schema": {"$schema": 3}}) + self.assertEqual(r.status_code, 400) + + valid_schema = { + "title": "Arguments", + "type": "object", + "properties": { + "MY_KEY": { + "title": "MY_KEY", + "type": "integer", + } + }, + "required": [], + } + r = self.client.patch(f"/v1/organisations/{self.org.id}/boefjes/{boefje.id}", json={"schema": valid_schema}) + self.assertEqual(r.status_code, 204) + + schema = self.client.get(f"/v1/organisations/{self.org.id}/plugins/{boefje.id}/schema.json").json() + assert schema == valid_schema + + api_boefje = self.client.get(f"/v1/organisations/{self.org.id}/plugins/{boefje.id}").json() + assert api_boefje["schema"] == valid_schema + + r = self.client.patch(f"/v1/organisations/{self.org.id}/boefjes/dns-records", json={"schema": valid_schema}) + self.assertEqual(r.status_code, 404) + + def test_cannot_update_static_plugins(self): r = self.client.patch(f"/v1/organisations/{self.org.id}/boefjes/dns-records", json={"id": "4", "version": "s"}) self.assertEqual(r.status_code, 404) r = self.client.patch(f"/v1/organisations/{self.org.id}/boefjes/dns-records", json={"name": "Overwrite name"}) @@ -136,13 +180,6 @@ def test_update_plugins(self): self.assertIsNone(response.json()["version"]) self.assertEqual(response.json()["id"], "dns-records") - self.client.post(f"/v1/organisations/{self.org.id}/plugins", content=normalizer.json()) - self.client.patch(f"/v1/organisations/{self.org.id}/normalizers/{normalizer.id}", json={"version": "v1.2"}) - - response = self.client.get(f"/v1/organisations/{self.org.id}/plugins/{normalizer.id}") - self.assertEqual(response.json()["version"], "v1.2") - - def test_cannot_update_static_plugins(self): self.client.patch(f"/v1/organisations/{self.org.id}/plugins/dns-records", json={"enabled": True}) response = self.client.get(f"/v1/organisations/{self.org.id}/plugins/dns-records") self.assertTrue(response.json()["enabled"]) diff --git a/boefjes/tests/integration/test_migration_add_schema_field.py b/boefjes/tests/integration/test_migration_add_schema_field.py index 3834af44fa5..26fa13f0a70 100644 --- a/boefjes/tests/integration/test_migration_add_schema_field.py +++ b/boefjes/tests/integration/test_migration_add_schema_field.py @@ -28,8 +28,8 @@ def setUp(self) -> None: boefje.name, boefje.description, str(boefje.scan_level), - list(boefje.consumes), - list(boefje.produces), + list(sorted(boefje.consumes)), + list(sorted(boefje.produces)), boefje.environment_keys, boefje.oci_image, boefje.oci_arguments, @@ -69,7 +69,7 @@ def test_fail_on_wrong_plugin_ids(self): "Nmap UDP", "Defaults to top 250 UDP ports. Includes service detection.", "2", - ["IPAddressV6", "IPAddressV4"], + ["IPAddressV4", "IPAddressV6"], ["boefje/nmap-udp"], ["TOP_PORTS_UDP"], "ghcr.io/minvws/openkat/nmap:latest", @@ -116,40 +116,43 @@ def test_fail_on_wrong_plugin_ids(self): "required": [], } - assert self.engine.execute("SELECT * from boefje").fetchall() == [ - ( - 1, - "dns-records", - None, - "DnsRecords", - "Fetch the DNS record(s) of a hostname", - "1", - ["Hostname"], - ["boefje/dns-records"], - ["RECORD_TYPES", "REMOTE_NS"], - None, - [], - None, - False, - schema_dns, - ), - ( - 2, - "nmap-udp", - None, - "Nmap UDP", - "Defaults to top 250 UDP ports. Includes service detection.", - "2", - ["IPAddressV6", "IPAddressV4"], - ["boefje/nmap-udp"], - ["TOP_PORTS_UDP"], - "ghcr.io/minvws/openkat/nmap:latest", - ["--open", "-T4", "-Pn", "-r", "-v10", "-sV", "-sU"], - None, - False, - schema_udp, - ), - ] + self.assertListEqual( + self.engine.execute("SELECT * from boefje").fetchall(), + [ + ( + 1, + "dns-records", + None, + "DnsRecords", + "Fetch the DNS record(s) of a hostname", + "1", + ["Hostname"], + ["boefje/dns-records"], + ["RECORD_TYPES", "REMOTE_NS"], + None, + [], + None, + False, + schema_dns, + ), + ( + 2, + "nmap-udp", + None, + "Nmap UDP", + "Defaults to top 250 UDP ports. Includes service detection.", + "2", + ["IPAddressV4", "IPAddressV6"], + ["boefje/nmap-udp"], + ["TOP_PORTS_UDP"], + "ghcr.io/minvws/openkat/nmap:latest", + ["--open", "-T4", "-Pn", "-r", "-v10", "-sV", "-sU"], + None, + False, + schema_udp, + ), + ], + ) def tearDown(self) -> None: alembic.config.main(argv=["--config", "/app/boefjes/boefjes/alembic.ini", "upgrade", "head"]) From 6254c5d0d19af4191a402188b590f8e5b59acda0 Mon Sep 17 00:00:00 2001 From: Donny Peeters Date: Mon, 19 Aug 2024 21:28:37 +0200 Subject: [PATCH 07/17] Remove organisation argument and update the unit tests --- boefjes/boefjes/api.py | 3 +- boefjes/boefjes/dependencies/plugins.py | 6 ++-- boefjes/boefjes/job_handler.py | 31 +++++++++---------- boefjes/boefjes/storage/interfaces.py | 7 ++--- .../tests/katalogus/test_plugin_service.py | 30 ++++-------------- 5 files changed, 27 insertions(+), 50 deletions(-) diff --git a/boefjes/boefjes/api.py b/boefjes/boefjes/api.py index 72ac9ccc96c..e21d2fe486f 100644 --- a/boefjes/boefjes/api.py +++ b/boefjes/boefjes/api.py @@ -151,8 +151,7 @@ def get_task(task_id, scheduler_client): def create_boefje_meta(task, local_repository): boefje = task.data.boefje boefje_resource = local_repository.by_id(boefje.id) - env_keys = boefje_resource.environment_keys - environment = get_environment_settings(task.data, env_keys) if env_keys else {} + environment = get_environment_settings(task.data) organization = task.data.organization input_ooi = task.data.input_ooi diff --git a/boefjes/boefjes/dependencies/plugins.py b/boefjes/boefjes/dependencies/plugins.py index 879c689fc3d..c0324a0fc55 100644 --- a/boefjes/boefjes/dependencies/plugins.py +++ b/boefjes/boefjes/dependencies/plugins.py @@ -98,7 +98,7 @@ def clone_settings_to_organisation(self, from_organisation: str, to_organisation self.set_enabled_by_id(plugin_id, to_organisation, enabled=True) def upsert_settings(self, settings: dict, organisation_id: str, plugin_id: str): - self._assert_settings_match_schema(settings, organisation_id, plugin_id) + self._assert_settings_match_schema(settings, plugin_id) self._put_boefje(plugin_id) return self.config_storage.upsert(organisation_id, plugin_id, settings=settings) @@ -188,14 +188,14 @@ def set_enabled_by_id(self, plugin_id: str, organisation_id: str, enabled: bool) self.config_storage.upsert(organisation_id, plugin_id, enabled=enabled) - def _assert_settings_match_schema(self, all_settings: dict, organisation_id: str, plugin_id: str): + def _assert_settings_match_schema(self, all_settings: dict, plugin_id: str): schema = self.schema(plugin_id) if schema: # No schema means that there is nothing to assert try: validate(instance=all_settings, schema=schema) except ValidationError as e: - raise SettingsNotConformingToSchema(organisation_id, plugin_id, e.message) from e + raise SettingsNotConformingToSchema(plugin_id, e.message) from e def _set_plugin_enabled(self, plugin: PluginType, organisation_id: str) -> PluginType: with contextlib.suppress(KeyError, NotFound): diff --git a/boefjes/boefjes/job_handler.py b/boefjes/boefjes/job_handler.py index a7e0d4feca1..f086446aacb 100644 --- a/boefjes/boefjes/job_handler.py +++ b/boefjes/boefjes/job_handler.py @@ -35,7 +35,7 @@ def get_octopoes_api_connector(org_code: str) -> OctopoesAPIConnector: return OctopoesAPIConnector(str(settings.octopoes_api), org_code) -def get_environment_settings(boefje_meta: BoefjeMeta, environment_keys: list[str]) -> dict[str, str]: +def get_environment_settings(boefje_meta: BoefjeMeta) -> dict[str, str]: try: katalogus_api = str(settings.katalogus_api).rstrip("/") response = httpx.get( @@ -43,22 +43,23 @@ def get_environment_settings(boefje_meta: BoefjeMeta, environment_keys: list[str timeout=30, ) response.raise_for_status() - environment = response.json() - - # Add prefixed BOEFJE_* global environment variables - for key, value in os.environ.items(): - if key.startswith("BOEFJE_"): - katalogus_key = key.split("BOEFJE_", 1)[1] - # Only pass the environment variable if it is not explicitly set through the katalogus, - # if and only if they are defined in boefje.json - if katalogus_key in environment_keys and katalogus_key not in environment: - environment[katalogus_key] = value - - return {k: str(v) for k, v in environment.items() if k in environment_keys} except HTTPError: logger.exception("Error getting environment settings") raise + settings_from_katalogus = response.json() + new_env = {} + + for key, value in os.environ.items(): + if key.startswith("BOEFJE_"): + env_key = key.split("BOEFJE_", 1)[1] + new_env[env_key] = value + + for key, value in settings_from_katalogus.items: + new_env[key] = value + + return new_env + class BoefjeHandler(Handler): def __init__( @@ -97,10 +98,8 @@ def handle(self, boefje_meta: BoefjeMeta) -> None: boefje_meta.arguments["input"] = ooi.serialize() - env_keys = boefje_resource.environment_keys - boefje_meta.runnable_hash = boefje_resource.runnable_hash - boefje_meta.environment = get_environment_settings(boefje_meta, env_keys) if env_keys else {} + boefje_meta.environment = get_environment_settings(boefje_meta) mime_types = _default_mime_types(boefje_meta.boefje) diff --git a/boefjes/boefjes/storage/interfaces.py b/boefjes/boefjes/storage/interfaces.py index ff19ca7c9f8..3f3d58e4a63 100644 --- a/boefjes/boefjes/storage/interfaces.py +++ b/boefjes/boefjes/storage/interfaces.py @@ -11,11 +11,8 @@ def __init__(self, message: str): class SettingsNotConformingToSchema(StorageError): - def __init__(self, organisation_id: str, plugin_id: str, validation_error: str): - super().__init__( - f"Settings for organisation {organisation_id} and plugin {plugin_id} are not conform the plugin schema: " - f"{validation_error}" - ) + def __init__(self, plugin_id: str, validation_error: str): + super().__init__(f"Settings for plugin {plugin_id} are not conform the plugin schema: {validation_error}") class NotFound(StorageError): diff --git a/boefjes/tests/katalogus/test_plugin_service.py b/boefjes/tests/katalogus/test_plugin_service.py index a86b4aa6fbf..acb892eedee 100644 --- a/boefjes/tests/katalogus/test_plugin_service.py +++ b/boefjes/tests/katalogus/test_plugin_service.py @@ -55,27 +55,13 @@ def test_update_by_id(self): def test_update_by_id_bad_schema(self): plugin_id = "kat_test" - with self.assertRaises(SettingsNotConformingToSchema) as ctx: - self.service.set_enabled_by_id(plugin_id, self.organisation, True) - - msg = ( - "Settings for organisation test and plugin kat_test are not conform the plugin schema: 'api_key' is a " - "required property" - ) - self.assertEqual(ctx.exception.message, msg) - self.service.config_storage.upsert(self.organisation, plugin_id, {"api_key": 128 * "a"}) self.service.set_enabled_by_id(plugin_id, self.organisation, True) - value = 129 * "a" - self.service.config_storage.upsert(self.organisation, plugin_id, {"api_key": 129 * "a"}) with self.assertRaises(SettingsNotConformingToSchema) as ctx: - self.service.set_enabled_by_id(plugin_id, self.organisation, True) + self.service.upsert_settings({"api_key": 129 * "a"}, self.organisation, plugin_id) - msg = ( - f"Settings for organisation test and plugin kat_test are not conform the plugin schema: " - f"'{value}' is too long" - ) + msg = f"Settings for plugin kat_test are not conform the plugin schema: '{129 * 'a'}' is too long" self.assertEqual(ctx.exception.message, msg) def test_get_schema(self): @@ -93,7 +79,7 @@ def test_get_schema(self): schema = self.service.schema("kat_test_normalize") self.assertIsNone(schema) - def test_removing_mandatory_setting_disables_plugin(self): + def test_removing_mandatory_setting_does_not_disable_plugin_anymore(self): plugin_id = "kat_test" self.service.config_storage.upsert(self.organisation, plugin_id, {"api_key": 128 * "a"}) @@ -105,20 +91,17 @@ def test_removing_mandatory_setting_disables_plugin(self): self.service.delete_settings(self.organisation, plugin_id) plugin = self.service.by_plugin_id(plugin_id, self.organisation) - self.assertFalse(plugin.enabled) + self.assertTrue(plugin.enabled) def test_adding_integer_settings_within_given_constraints(self): plugin_id = "kat_test_2" - self.service.config_storage.upsert(self.organisation, plugin_id, {"api_key": "24"}) - with self.assertRaises(SettingsNotConformingToSchema) as ctx: - self.service.set_enabled_by_id(plugin_id, self.organisation, True) + self.service.upsert_settings({"api_key": "24"}, self.organisation, plugin_id) self.assertIn("'24' is not of type 'integer'", ctx.exception.message) - self.service.config_storage.upsert(self.organisation, plugin_id, {"api_key": 24}) - + self.service.upsert_settings({"api_key": 24}, self.organisation, plugin_id) self.service.set_enabled_by_id(plugin_id, self.organisation, True) plugin = self.service.by_plugin_id(plugin_id, self.organisation) @@ -155,7 +138,6 @@ def test_clone_many_settings(self): all_settings_1 = {"api_key": "123"} self.service.upsert_settings(all_settings_1, self.organisation, plugin_id_1) - self.service.clone_settings_to_organisation(self.organisation, "org2") all_settings_for_new_org = self.service.get_all_settings("org2", plugin_id_1) From da5cdd768c5aad3e9a507fc79bb1acce12a5e57e Mon Sep 17 00:00:00 2001 From: Donny Peeters Date: Tue, 20 Aug 2024 08:29:06 +0200 Subject: [PATCH 08/17] Phase out the environment keys field --- boefjes/boefjes/katalogus/plugins.py | 2 -- .../f9de6eb7824b_introduce_boefjeconfig_model.py | 6 +++--- boefjes/boefjes/models.py | 1 - boefjes/boefjes/plugins/kat_binaryedge/boefje.json | 3 --- boefjes/boefjes/plugins/kat_censys/boefje.json | 4 ---- .../boefjes/plugins/kat_cve_finding_types/boefje.json | 3 --- boefjes/boefjes/plugins/kat_dns/boefje.json | 4 ---- boefjes/boefjes/plugins/kat_external_db/boefje.json | 7 ------- boefjes/boefjes/plugins/kat_leakix/boefje.json | 3 --- boefjes/boefjes/plugins/kat_log4shell/boefje.json | 3 --- boefjes/boefjes/plugins/kat_masscan/boefje.json | 4 ---- boefjes/boefjes/plugins/kat_maxmind_geoip/boefje.json | 6 +----- boefjes/boefjes/plugins/kat_nmap_ip_range/boefje.json | 6 ------ boefjes/boefjes/plugins/kat_nmap_ports/boefje.json | 3 --- boefjes/boefjes/plugins/kat_nmap_tcp/boefje.json | 3 --- boefjes/boefjes/plugins/kat_nmap_udp/boefje.json | 3 --- boefjes/boefjes/plugins/kat_shodan/boefje.json | 3 --- .../boefjes/plugins/kat_testssl_sh_ciphers/boefje.json | 5 +---- boefjes/boefjes/plugins/kat_webpage_analysis/boefje.json | 3 --- boefjes/boefjes/plugins/kat_wpscan/boefje.json | 3 --- boefjes/boefjes/sql/db_models.py | 2 -- boefjes/boefjes/sql/plugin_storage.py | 4 ---- .../tests/integration/test_migration_add_schema_field.py | 6 +++--- boefjes/tests/integration/test_sql_repositories.py | 2 -- .../katalogus/boefjes_test_dir/kat_test/boefje.json | 1 - .../boefjes_test_dir/kat_test/kat_test_2/boefje.json | 1 - .../boefjes_test_dir/kat_test/kat_test_4/boefje.json | 1 - boefjes/tests/modules/dummy_boefje/boefje.json | 3 +-- .../tests/modules/dummy_boefje_environment/boefje.json | 3 +-- .../dummy_boefje_environment_with_pycache/boefje.json | 3 +-- .../modules/dummy_boefje_invalid_signature/boefje.json | 3 +-- .../tests/modules/dummy_boefje_missing_run/boefje.json | 3 +-- .../modules/dummy_boefje_runtime_exception/boefje.json | 3 +-- .../tests/modules/dummy_oci_boefje_no_main/boefje.json | 1 - boefjes/tests/test_tasks.py | 6 +++--- .../development_tutorial/creating_a_boefje.md | 2 -- docs/source/introduction/makeyourown.rst | 3 +-- mula/scheduler/models/plugin.py | 1 - rocky/katalogus/client.py | 1 - rocky/tests/conftest.py | 1 - rocky/tests/stubs/katalogus_boefjes.json | 9 --------- rocky/tests/stubs/katalogus_normalizers.json | 5 ----- rocky/tests/test_indemnification.py | 1 - 43 files changed, 18 insertions(+), 122 deletions(-) diff --git a/boefjes/boefjes/katalogus/plugins.py b/boefjes/boefjes/katalogus/plugins.py index 134243ad963..6ecc4b031db 100644 --- a/boefjes/boefjes/katalogus/plugins.py +++ b/boefjes/boefjes/katalogus/plugins.py @@ -123,7 +123,6 @@ class BoefjeIn(BaseModel): version: str | None = None created: datetime.datetime | None = None description: str | None = None - environment_keys: list[str] = Field(default_factory=list) scan_level: int = 1 consumes: set[str] = Field(default_factory=set) produces: set[str] = Field(default_factory=set) @@ -167,7 +166,6 @@ class NormalizerIn(BaseModel): version: str | None = None created: datetime.datetime | None = None description: str | None = None - environment_keys: list[str] = Field(default_factory=list) consumes: list[str] = Field(default_factory=list) # mime types (and/ or boefjes) produces: list[str] = Field(default_factory=list) # oois diff --git a/boefjes/boefjes/migrations/versions/f9de6eb7824b_introduce_boefjeconfig_model.py b/boefjes/boefjes/migrations/versions/f9de6eb7824b_introduce_boefjeconfig_model.py index 40a44d504d3..d46f360b703 100644 --- a/boefjes/boefjes/migrations/versions/f9de6eb7824b_introduce_boefjeconfig_model.py +++ b/boefjes/boefjes/migrations/versions/f9de6eb7824b_introduce_boefjeconfig_model.py @@ -95,7 +95,7 @@ def upgrade() -> None: str(boefje.scan_level), list(boefje.consumes), list(boefje.produces), - boefje.environment_keys, + ["TEST_KEY"], boefje.oci_image, boefje.oci_arguments, boefje.version, @@ -137,7 +137,7 @@ def upgrade() -> None: str(boefje.scan_level), list(boefje.consumes), list(boefje.produces), - boefje.environment_keys, + ["TEST_KEY"], boefje.oci_image, boefje.oci_arguments, boefje.version, @@ -177,7 +177,7 @@ def upgrade() -> None: normalizer.description, normalizer.consumes, normalizer.produces, - normalizer.environment_keys, + ["TEST_KEY"], normalizer.version, ) for normalizer in normalizers_to_insert diff --git a/boefjes/boefjes/models.py b/boefjes/boefjes/models.py index 4881b26008a..028d1a1c9a6 100644 --- a/boefjes/boefjes/models.py +++ b/boefjes/boefjes/models.py @@ -17,7 +17,6 @@ class Plugin(BaseModel): version: str | None = None created: datetime.datetime | None = None description: str | None = None - environment_keys: list[str] = Field(default_factory=list) enabled: bool = False static: bool = True # We need to differentiate between local and remote plugins to know which ones can be deleted diff --git a/boefjes/boefjes/plugins/kat_binaryedge/boefje.json b/boefjes/boefjes/plugins/kat_binaryedge/boefje.json index 004015e0570..efa23aa2e3f 100644 --- a/boefjes/boefjes/plugins/kat_binaryedge/boefje.json +++ b/boefjes/boefjes/plugins/kat_binaryedge/boefje.json @@ -6,8 +6,5 @@ "IPAddressV4", "IPAddressV6" ], - "environment_keys": [ - "BINARYEDGE_API" - ], "scan_level": 2 } diff --git a/boefjes/boefjes/plugins/kat_censys/boefje.json b/boefjes/boefjes/plugins/kat_censys/boefje.json index e8c15547c76..afacecb16ec 100644 --- a/boefjes/boefjes/plugins/kat_censys/boefje.json +++ b/boefjes/boefjes/plugins/kat_censys/boefje.json @@ -6,9 +6,5 @@ "IPAddressV4", "IPAddressV6" ], - "environment_keys": [ - "CENSYS_API_ID", - "CENSYS_API_SECRET" - ], "scan_level": 1 } diff --git a/boefjes/boefjes/plugins/kat_cve_finding_types/boefje.json b/boefjes/boefjes/plugins/kat_cve_finding_types/boefje.json index 2b390197290..b78b1585037 100644 --- a/boefjes/boefjes/plugins/kat_cve_finding_types/boefje.json +++ b/boefjes/boefjes/plugins/kat_cve_finding_types/boefje.json @@ -5,9 +5,6 @@ "consumes": [ "CVEFindingType" ], - "environment_keys": [ - "CVEAPI_URL" - ], "scan_level": 0, "enabled": true } diff --git a/boefjes/boefjes/plugins/kat_dns/boefje.json b/boefjes/boefjes/plugins/kat_dns/boefje.json index 76c36ae1775..336ca088380 100644 --- a/boefjes/boefjes/plugins/kat_dns/boefje.json +++ b/boefjes/boefjes/plugins/kat_dns/boefje.json @@ -5,9 +5,5 @@ "consumes": [ "Hostname" ], - "environment_keys": [ - "RECORD_TYPES", - "REMOTE_NS" - ], "scan_level": 1 } diff --git a/boefjes/boefjes/plugins/kat_external_db/boefje.json b/boefjes/boefjes/plugins/kat_external_db/boefje.json index 1f27e7f9d2e..7f51766f762 100644 --- a/boefjes/boefjes/plugins/kat_external_db/boefje.json +++ b/boefjes/boefjes/plugins/kat_external_db/boefje.json @@ -5,12 +5,5 @@ "consumes": [ "Network" ], - "environment_keys": [ - "DB_URL", - "DB_ACCESS_TOKEN", - "DB_ORGANIZATION_IDENTIFIER", - "DB_ENDPOINT_FORMAT", - "REQUESTS_CA_BUNDLE" - ], "scan_level": 0 } diff --git a/boefjes/boefjes/plugins/kat_leakix/boefje.json b/boefjes/boefjes/plugins/kat_leakix/boefje.json index 1410c1ff0e5..d285e41ee14 100644 --- a/boefjes/boefjes/plugins/kat_leakix/boefje.json +++ b/boefjes/boefjes/plugins/kat_leakix/boefje.json @@ -7,8 +7,5 @@ "IPAddressV6", "Hostname" ], - "environment_keys": [ - "LEAKIX_API" - ], "scan_level": 1 } diff --git a/boefjes/boefjes/plugins/kat_log4shell/boefje.json b/boefjes/boefjes/plugins/kat_log4shell/boefje.json index 6b2909ae7c7..77e9a312672 100644 --- a/boefjes/boefjes/plugins/kat_log4shell/boefje.json +++ b/boefjes/boefjes/plugins/kat_log4shell/boefje.json @@ -5,8 +5,5 @@ "consumes": [ "Hostname" ], - "environment_keys": [ - "REPLY_FQDN" - ], "scan_level": 4 } diff --git a/boefjes/boefjes/plugins/kat_masscan/boefje.json b/boefjes/boefjes/plugins/kat_masscan/boefje.json index b0681d7ba2a..fbdc03a6093 100644 --- a/boefjes/boefjes/plugins/kat_masscan/boefje.json +++ b/boefjes/boefjes/plugins/kat_masscan/boefje.json @@ -5,9 +5,5 @@ "consumes": [ "IPV4NetBlock" ], - "environment_keys": [ - "PORTS", - "MAX_RATE" - ], "scan_level": 2 } diff --git a/boefjes/boefjes/plugins/kat_maxmind_geoip/boefje.json b/boefjes/boefjes/plugins/kat_maxmind_geoip/boefje.json index ba6e2e43c02..678914b662a 100644 --- a/boefjes/boefjes/plugins/kat_maxmind_geoip/boefje.json +++ b/boefjes/boefjes/plugins/kat_maxmind_geoip/boefje.json @@ -6,9 +6,5 @@ "IPAddressV4", "IPAddressV6" ], - "scan_level": 1, - "environment_keys": [ - "MAXMIND_USER_ID", - "MAXMIND_LICENCE_KEY" - ] + "scan_level": 1 } diff --git a/boefjes/boefjes/plugins/kat_nmap_ip_range/boefje.json b/boefjes/boefjes/plugins/kat_nmap_ip_range/boefje.json index 4b4dc1a2d06..fe3baf38eb7 100644 --- a/boefjes/boefjes/plugins/kat_nmap_ip_range/boefje.json +++ b/boefjes/boefjes/plugins/kat_nmap_ip_range/boefje.json @@ -6,11 +6,5 @@ "IPV6NetBlock", "IPV4NetBlock" ], - "environment_keys": [ - "TOP_PORTS_TCP", - "TOP_PORTS_UDP", - "MIN_VLSM_IPV4", - "MIN_VLSM_IPV6" - ], "scan_level": 2 } diff --git a/boefjes/boefjes/plugins/kat_nmap_ports/boefje.json b/boefjes/boefjes/plugins/kat_nmap_ports/boefje.json index 506b6ffd478..87fdb592a06 100644 --- a/boefjes/boefjes/plugins/kat_nmap_ports/boefje.json +++ b/boefjes/boefjes/plugins/kat_nmap_ports/boefje.json @@ -6,8 +6,5 @@ "IPAddressV4", "IPAddressV6" ], - "environment_keys": [ - "PORTS" - ], "scan_level": 2 } diff --git a/boefjes/boefjes/plugins/kat_nmap_tcp/boefje.json b/boefjes/boefjes/plugins/kat_nmap_tcp/boefje.json index f3d46684aeb..2ab5f018393 100644 --- a/boefjes/boefjes/plugins/kat_nmap_tcp/boefje.json +++ b/boefjes/boefjes/plugins/kat_nmap_tcp/boefje.json @@ -6,9 +6,6 @@ "IPAddressV4", "IPAddressV6" ], - "environment_keys": [ - "TOP_PORTS" - ], "scan_level": 2, "oci_image": "ghcr.io/minvws/openkat/nmap:latest", "oci_arguments": [ diff --git a/boefjes/boefjes/plugins/kat_nmap_udp/boefje.json b/boefjes/boefjes/plugins/kat_nmap_udp/boefje.json index f9839e53e37..671ff93fe2a 100644 --- a/boefjes/boefjes/plugins/kat_nmap_udp/boefje.json +++ b/boefjes/boefjes/plugins/kat_nmap_udp/boefje.json @@ -6,9 +6,6 @@ "IPAddressV4", "IPAddressV6" ], - "environment_keys": [ - "TOP_PORTS_UDP" - ], "scan_level": 2, "oci_image": "ghcr.io/minvws/openkat/nmap:latest", "oci_arguments": [ diff --git a/boefjes/boefjes/plugins/kat_shodan/boefje.json b/boefjes/boefjes/plugins/kat_shodan/boefje.json index ad1e1c3787e..c1309e7e1ac 100644 --- a/boefjes/boefjes/plugins/kat_shodan/boefje.json +++ b/boefjes/boefjes/plugins/kat_shodan/boefje.json @@ -6,8 +6,5 @@ "IPAddressV4", "IPAddressV6" ], - "environment_keys": [ - "SHODAN_API" - ], "scan_level": 1 } diff --git a/boefjes/boefjes/plugins/kat_testssl_sh_ciphers/boefje.json b/boefjes/boefjes/plugins/kat_testssl_sh_ciphers/boefje.json index 91a7d6f9994..92626c95337 100644 --- a/boefjes/boefjes/plugins/kat_testssl_sh_ciphers/boefje.json +++ b/boefjes/boefjes/plugins/kat_testssl_sh_ciphers/boefje.json @@ -5,8 +5,5 @@ "consumes": [ "IPService" ], - "scan_level": 2, - "environment_keys": [ - "TIMEOUT" - ] + "scan_level": 2 } diff --git a/boefjes/boefjes/plugins/kat_webpage_analysis/boefje.json b/boefjes/boefjes/plugins/kat_webpage_analysis/boefje.json index c6c12b6e73c..e284ad6f61b 100644 --- a/boefjes/boefjes/plugins/kat_webpage_analysis/boefje.json +++ b/boefjes/boefjes/plugins/kat_webpage_analysis/boefje.json @@ -141,8 +141,5 @@ "video/x-msvideo", "video/x-sgi-movie" ], - "environment_keys": [ - "USERAGENT" - ], "scan_level": 2 } diff --git a/boefjes/boefjes/plugins/kat_wpscan/boefje.json b/boefjes/boefjes/plugins/kat_wpscan/boefje.json index 83658bb2d56..9c1d0b343da 100644 --- a/boefjes/boefjes/plugins/kat_wpscan/boefje.json +++ b/boefjes/boefjes/plugins/kat_wpscan/boefje.json @@ -5,8 +5,5 @@ "consumes": [ "SoftwareInstance" ], - "environment_keys": [ - "WP_SCAN_API" - ], "scan_level": 2 } diff --git a/boefjes/boefjes/sql/db_models.py b/boefjes/boefjes/sql/db_models.py index e8e9740f8f4..f9b32a9a9ea 100644 --- a/boefjes/boefjes/sql/db_models.py +++ b/boefjes/boefjes/sql/db_models.py @@ -75,7 +75,6 @@ class BoefjeInDB(SQL_BASE): # Job specifications consumes = Column(types.ARRAY(types.String(length=128)), default=lambda: [], nullable=False) produces = Column(types.ARRAY(types.String(length=128)), default=lambda: [], nullable=False) - environment_keys = Column(types.ARRAY(types.String(length=128)), default=lambda: [], nullable=False) schema = Column(types.JSON(), nullable=True) # Image specifications @@ -99,5 +98,4 @@ class NormalizerInDB(SQL_BASE): # Job specifications consumes = Column(types.ARRAY(types.String(length=128)), default=lambda: [], nullable=False) produces = Column(types.ARRAY(types.String(length=128)), default=lambda: [], nullable=False) - environment_keys = Column(types.ARRAY(types.String(length=128)), default=lambda: [], nullable=False) version = Column(types.String(length=16), nullable=True) diff --git a/boefjes/boefjes/sql/plugin_storage.py b/boefjes/boefjes/sql/plugin_storage.py index c9bcffbaa2f..263d035ab76 100644 --- a/boefjes/boefjes/sql/plugin_storage.py +++ b/boefjes/boefjes/sql/plugin_storage.py @@ -111,7 +111,6 @@ def to_boefje_in_db(boefje: Boefje) -> BoefjeInDB: consumes=boefje.consumes, produces=boefje.produces, schema=boefje.schema, - environment_keys=boefje.environment_keys, oci_image=boefje.oci_image, oci_arguments=boefje.oci_arguments, version=boefje.version, @@ -127,7 +126,6 @@ def to_normalizer_in_db(normalizer: Normalizer) -> NormalizerInDB: description=normalizer.description, consumes=normalizer.consumes, produces=normalizer.produces, - environment_keys=normalizer.environment_keys, version=normalizer.version, static=normalizer.static, ) @@ -144,7 +142,6 @@ def to_boefje(boefje_in_db: BoefjeInDB) -> Boefje: consumes=boefje_in_db.consumes, produces=boefje_in_db.produces, schema=boefje_in_db.schema, - environment_keys=boefje_in_db.environment_keys, oci_image=boefje_in_db.oci_image, oci_arguments=boefje_in_db.oci_arguments, version=boefje_in_db.version, @@ -161,7 +158,6 @@ def to_normalizer(normalizer_in_db: NormalizerInDB) -> Normalizer: description=normalizer_in_db.description, consumes=normalizer_in_db.consumes, produces=normalizer_in_db.produces, - environment_keys=normalizer_in_db.environment_keys, version=normalizer_in_db.version, static=normalizer_in_db.static, ) diff --git a/boefjes/tests/integration/test_migration_add_schema_field.py b/boefjes/tests/integration/test_migration_add_schema_field.py index 26fa13f0a70..6b1cacb1a4f 100644 --- a/boefjes/tests/integration/test_migration_add_schema_field.py +++ b/boefjes/tests/integration/test_migration_add_schema_field.py @@ -30,7 +30,7 @@ def setUp(self) -> None: str(boefje.scan_level), list(sorted(boefje.consumes)), list(sorted(boefje.produces)), - boefje.environment_keys, + ["RECORD_TYPES", "REMOTE_NS"], boefje.oci_image, boefje.oci_arguments, boefje.version, @@ -71,7 +71,7 @@ def test_fail_on_wrong_plugin_ids(self): "2", ["IPAddressV4", "IPAddressV6"], ["boefje/nmap-udp"], - ["TOP_PORTS_UDP"], + ["RECORD_TYPES", "REMOTE_NS"], "ghcr.io/minvws/openkat/nmap:latest", ["--open", "-T4", "-Pn", "-r", "-v10", "-sV", "-sU"], None, @@ -144,7 +144,7 @@ def test_fail_on_wrong_plugin_ids(self): "2", ["IPAddressV4", "IPAddressV6"], ["boefje/nmap-udp"], - ["TOP_PORTS_UDP"], + ["RECORD_TYPES", "REMOTE_NS"], "ghcr.io/minvws/openkat/nmap:latest", ["--open", "-T4", "-Pn", "-r", "-v10", "-sV", "-sU"], None, diff --git a/boefjes/tests/integration/test_sql_repositories.py b/boefjes/tests/integration/test_sql_repositories.py index ad4f804ad10..4dbe5d5c991 100644 --- a/boefjes/tests/integration/test_sql_repositories.py +++ b/boefjes/tests/integration/test_sql_repositories.py @@ -194,7 +194,6 @@ def test_rich_boefje_storage(self): version="v1.09", created=datetime.datetime(2010, 10, 10, 10, 10, 10, tzinfo=datetime.UTC), description="My Boefje", - environment_keys=["api_key", "TOKEN"], scan_level=4, consumes=["Internet"], produces=[ @@ -244,7 +243,6 @@ def test_rich_normalizer_storage(self): version="v1.19", created=datetime.datetime(2010, 10, 10, 10, 10, 10, tzinfo=datetime.UTC), description="My Normalizer", - environment_keys=["api_key", "TOKEN"], scan_level=4, consumes=["Internet"], produces=[ diff --git a/boefjes/tests/katalogus/boefjes_test_dir/kat_test/boefje.json b/boefjes/tests/katalogus/boefjes_test_dir/kat_test/boefje.json index 518d9804e4b..321d8541364 100644 --- a/boefjes/tests/katalogus/boefjes_test_dir/kat_test/boefje.json +++ b/boefjes/tests/katalogus/boefjes_test_dir/kat_test/boefje.json @@ -5,6 +5,5 @@ "consumes": [ "DNSZone" ], - "environment_keys": [], "scan_level": 1 } diff --git a/boefjes/tests/katalogus/boefjes_test_dir/kat_test/kat_test_2/boefje.json b/boefjes/tests/katalogus/boefjes_test_dir/kat_test/kat_test_2/boefje.json index 07c01db4d21..67dc80c70bf 100644 --- a/boefjes/tests/katalogus/boefjes_test_dir/kat_test/kat_test_2/boefje.json +++ b/boefjes/tests/katalogus/boefjes_test_dir/kat_test/kat_test_2/boefje.json @@ -8,6 +8,5 @@ "produces": [ "text/html" ], - "environment_keys": [], "scan_level": 1 } diff --git a/boefjes/tests/katalogus/boefjes_test_dir/kat_test/kat_test_4/boefje.json b/boefjes/tests/katalogus/boefjes_test_dir/kat_test/kat_test_4/boefje.json index b00b312a437..d419ffd073f 100644 --- a/boefjes/tests/katalogus/boefjes_test_dir/kat_test/kat_test_4/boefje.json +++ b/boefjes/tests/katalogus/boefjes_test_dir/kat_test/kat_test_4/boefje.json @@ -8,6 +8,5 @@ "produces": [ "text/html" ], - "environment_keys": [], "scan_level": 1 } diff --git a/boefjes/tests/modules/dummy_boefje/boefje.json b/boefjes/tests/modules/dummy_boefje/boefje.json index 791539e798f..81b94057368 100644 --- a/boefjes/tests/modules/dummy_boefje/boefje.json +++ b/boefjes/tests/modules/dummy_boefje/boefje.json @@ -3,6 +3,5 @@ "name": "dummy", "description": "", "consumes": [], - "produces": [], - "environment_keys": [] + "produces": [] } diff --git a/boefjes/tests/modules/dummy_boefje_environment/boefje.json b/boefjes/tests/modules/dummy_boefje_environment/boefje.json index 838cef1dae7..2ad7fdddefa 100644 --- a/boefjes/tests/modules/dummy_boefje_environment/boefje.json +++ b/boefjes/tests/modules/dummy_boefje_environment/boefje.json @@ -3,6 +3,5 @@ "name": "dummy", "description": "", "consumes": [], - "produces": [], - "environment_keys": [] + "produces": [] } diff --git a/boefjes/tests/modules/dummy_boefje_environment_with_pycache/boefje.json b/boefjes/tests/modules/dummy_boefje_environment_with_pycache/boefje.json index 28f7090b2f0..009f2116c0f 100644 --- a/boefjes/tests/modules/dummy_boefje_environment_with_pycache/boefje.json +++ b/boefjes/tests/modules/dummy_boefje_environment_with_pycache/boefje.json @@ -3,6 +3,5 @@ "name": "dummy", "description": "", "consumes": [], - "produces": [], - "environment_keys": [] + "produces": [] } diff --git a/boefjes/tests/modules/dummy_boefje_invalid_signature/boefje.json b/boefjes/tests/modules/dummy_boefje_invalid_signature/boefje.json index 47f325f09e9..3fca5d08ce7 100644 --- a/boefjes/tests/modules/dummy_boefje_invalid_signature/boefje.json +++ b/boefjes/tests/modules/dummy_boefje_invalid_signature/boefje.json @@ -3,6 +3,5 @@ "name": "dummy", "description": "", "consumes": [], - "produces": [], - "environment_keys": [] + "produces": [] } diff --git a/boefjes/tests/modules/dummy_boefje_missing_run/boefje.json b/boefjes/tests/modules/dummy_boefje_missing_run/boefje.json index 838cef1dae7..2ad7fdddefa 100644 --- a/boefjes/tests/modules/dummy_boefje_missing_run/boefje.json +++ b/boefjes/tests/modules/dummy_boefje_missing_run/boefje.json @@ -3,6 +3,5 @@ "name": "dummy", "description": "", "consumes": [], - "produces": [], - "environment_keys": [] + "produces": [] } diff --git a/boefjes/tests/modules/dummy_boefje_runtime_exception/boefje.json b/boefjes/tests/modules/dummy_boefje_runtime_exception/boefje.json index 197a6aa43f0..e7dcc24f271 100644 --- a/boefjes/tests/modules/dummy_boefje_runtime_exception/boefje.json +++ b/boefjes/tests/modules/dummy_boefje_runtime_exception/boefje.json @@ -3,6 +3,5 @@ "name": "dummy", "description": "", "consumes": [], - "produces": [], - "environment_keys": [] + "produces": [] } diff --git a/boefjes/tests/modules/dummy_oci_boefje_no_main/boefje.json b/boefjes/tests/modules/dummy_oci_boefje_no_main/boefje.json index 2ef96a5e2da..6e8c1bc69b3 100644 --- a/boefjes/tests/modules/dummy_oci_boefje_no_main/boefje.json +++ b/boefjes/tests/modules/dummy_oci_boefje_no_main/boefje.json @@ -4,6 +4,5 @@ "description": "", "consumes": [], "produces": [], - "environment_keys": [], "oci_image": "openkat/test" } diff --git a/boefjes/tests/test_tasks.py b/boefjes/tests/test_tasks.py index f0cbf5fac56..f9c319897a1 100644 --- a/boefjes/tests/test_tasks.py +++ b/boefjes/tests/test_tasks.py @@ -198,6 +198,6 @@ def test_correct_local_runner_hash(self) -> None: assert Path(path / "__pycache__/pytest__init__.cpython-311.pyc").is_file() assert Path(path / "__pycache__/pytest_main.cpython-311.pyc").is_file() - assert boefje_resource_1.runnable_hash == "4bae5e869bd17759bf750bf357fdee1eedff5768d407248b8ddcb63d0abdee19" - assert boefje_resource_2.runnable_hash == "e0c46fb915778b06f69cd5934b2157733cef84d67fc89c563c5bbd965ad52949" - assert boefje_resource_3.runnable_hash == "0185c90d3d1a4dc1490ec918374f84e8a480101f98db14d434638147dd82c626" + assert boefje_resource_1.runnable_hash == "7450ebc13f6856df925e90cd57f2769468a39723f18ba835749982b484564ec9" + assert boefje_resource_2.runnable_hash == "874e154b572a0315cfe4329bd3b756bf9cad77f6a87bb9b9b9bb6296f1d4b520" + assert boefje_resource_3.runnable_hash == "70c0b0ad3b2e70fd79e52dcf043096a50ed69db1359df0011499e66ab1510bbe" diff --git a/docs/source/developer_documentation/development_tutorial/creating_a_boefje.md b/docs/source/developer_documentation/development_tutorial/creating_a_boefje.md index 719c885c7de..791150a657d 100644 --- a/docs/source/developer_documentation/development_tutorial/creating_a_boefje.md +++ b/docs/source/developer_documentation/development_tutorial/creating_a_boefje.md @@ -28,7 +28,6 @@ This file contains information about our boefje. For example, this file contains "name": "Hello Katty", "description": "A simple boefje that can say hello", "consumes": ["IPAddressV4", "IPAddressV6"], - "environment_keys": ["MESSAGE", "NUMBER"], "scan_level": 0, "oci_image": "openkat/hello-katty" } @@ -38,7 +37,6 @@ This file contains information about our boefje. For example, this file contains - **`name`**: A name to display in the KAT-alogus. - **`description`**: A description in the KAT-alogus. - **`consumes`**: A list of OOI types that trigger the boefje to run. Whenever one of these OOIs gets added, this boefje will run with that OOI. In our case, we will run our boefje whenever a new IPAddressV4 or IPAddressV6 gets added. -- **`environment_keys`**: A list of inputs provided by the user. More information about these inputs can be found in `schema.json`. OpenKAT also provides some environment variables. - **`scan_level`**: A scan level that decides how intrusively this boefje will scan the provided OOIs. Since we will not make any external requests our boefje will have a scan level of 0. - **`oci_image`**: The name of the docker image that is provided inside `boefjes/Makefile` diff --git a/docs/source/introduction/makeyourown.rst b/docs/source/introduction/makeyourown.rst index 226b2dfcb5c..2a6bf7c3cc0 100644 --- a/docs/source/introduction/makeyourown.rst +++ b/docs/source/introduction/makeyourown.rst @@ -99,7 +99,6 @@ An example: "IPPort", "CVEFindingType" ], - "environment_keys": ["SHODAN_API"], "scan_level": 1 } @@ -111,7 +110,7 @@ Using the template as a base, you can create a boefje.json for your own boefje. NOTE: If your boefje needs object-types that do not exist, you will need to create those. This will be described later in the document. -The boefje also uses variables from the web interface, like the Shodan the API key. There are more possibilities, you can be creative with this and let the end user bring settings from the web interface. Use *environment_keys* for this. The schema.json file defines the metadata for these fields. +The boefje also uses variables from the web interface, like the Shodan the API key. There are more possibilities, you can be creative with this and let the end user bring settings from the web interface. Use environment variables or KATalogus settings for this. The schema.json file defines the metadata for these fields. schema.json diff --git a/mula/scheduler/models/plugin.py b/mula/scheduler/models/plugin.py index f51c4952972..a30908af8a3 100644 --- a/mula/scheduler/models/plugin.py +++ b/mula/scheduler/models/plugin.py @@ -12,7 +12,6 @@ class Plugin(BaseModel): authors: list[str] | None = None created: datetime.datetime | None = None description: str | None = None - environment_keys: list[str] | None = None related: list[str] | None = None scan_level: int | None = None consumes: str | list[str] diff --git a/rocky/katalogus/client.py b/rocky/katalogus/client.py index 9c293a670ba..8bb9f7fc463 100644 --- a/rocky/katalogus/client.py +++ b/rocky/katalogus/client.py @@ -24,7 +24,6 @@ class Plugin(BaseModel): authors: str | None = None created: str | None = None description: str | None = None - environment_keys: list[str] | None = None related: list[str] = Field(default_factory=list) enabled: bool type: str diff --git a/rocky/tests/conftest.py b/rocky/tests/conftest.py index a00690c900e..30d388e58a8 100644 --- a/rocky/tests/conftest.py +++ b/rocky/tests/conftest.py @@ -1775,7 +1775,6 @@ def boefje_dns_records(): authors=None, created=None, description="Fetch the DNS record(s) of a hostname", - environment_keys=None, related=[], enabled=True, type="boefje", diff --git a/rocky/tests/stubs/katalogus_boefjes.json b/rocky/tests/stubs/katalogus_boefjes.json index 40c79a6cb1b..d7d6ba63991 100644 --- a/rocky/tests/stubs/katalogus_boefjes.json +++ b/rocky/tests/stubs/katalogus_boefjes.json @@ -5,9 +5,6 @@ "version": null, "created": null, "description": "Use BinaryEdge to find open ports with vulnerabilities that are found on that port", - "environment_keys": [ - "BINARYEDGE_API" - ], "enabled": true, "type": "boefje", "scan_level": 2, @@ -33,7 +30,6 @@ "version": null, "created": null, "description": "Scan SSL certificates of websites", - "environment_keys": [], "enabled": false, "type": "boefje", "scan_level": 1, @@ -51,7 +47,6 @@ "version": null, "created": null, "description": "Scan SSL/TLS versions of websites", - "environment_keys": [], "enabled": false, "type": "boefje", "scan_level": 2, @@ -70,9 +65,6 @@ "version": null, "created": null, "description": "Scan wordpress sites", - "environment_keys": [ - "WP_SCAN_API" - ], "enabled": false, "type": "boefje", "scan_level": 2, @@ -91,7 +83,6 @@ "version": null, "created": null, "description": null, - "environment_keys": [], "enabled": true, "type": "normalizer", "consumes": [ diff --git a/rocky/tests/stubs/katalogus_normalizers.json b/rocky/tests/stubs/katalogus_normalizers.json index 182b5bbb549..dfbca8cb726 100644 --- a/rocky/tests/stubs/katalogus_normalizers.json +++ b/rocky/tests/stubs/katalogus_normalizers.json @@ -5,7 +5,6 @@ "version": null, "created": null, "description": null, - "environment_keys": [], "enabled": true, "type": "normalizer", "consumes": [ @@ -22,7 +21,6 @@ "version": null, "created": null, "description": null, - "environment_keys": [], "enabled": true, "type": "normalizer", "consumes": [ @@ -42,7 +40,6 @@ "version": null, "created": null, "description": null, - "environment_keys": [], "enabled": true, "type": "normalizer", "consumes": [ @@ -59,7 +56,6 @@ "version": null, "created": null, "description": null, - "environment_keys": [], "enabled": true, "type": "normalizer", "consumes": [ @@ -83,7 +79,6 @@ "version": null, "created": null, "description": null, - "environment_keys": [], "enabled": true, "type": "normalizer", "consumes": [ diff --git a/rocky/tests/test_indemnification.py b/rocky/tests/test_indemnification.py index 785679ca7aa..e6d628b323e 100644 --- a/rocky/tests/test_indemnification.py +++ b/rocky/tests/test_indemnification.py @@ -14,7 +14,6 @@ def test_update_clearance_level(rf, client_member, httpx_mock): "authors": None, "created": None, "description": "Use BinaryEdge to find open ports with vulnerabilities that are found on that port", - "environment_keys": ["BINARYEDGE_API"], "related": None, "enabled": True, "type": "boefje", From cb0a1b8585f290f6c815b90f6e34b637ab070b47 Mon Sep 17 00:00:00 2001 From: Donny Peeters Date: Tue, 20 Aug 2024 08:32:37 +0200 Subject: [PATCH 09/17] Add missing migration file --- ...fc302b852_remove_environment_keys_field.py | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 boefjes/boefjes/migrations/versions/870fc302b852_remove_environment_keys_field.py diff --git a/boefjes/boefjes/migrations/versions/870fc302b852_remove_environment_keys_field.py b/boefjes/boefjes/migrations/versions/870fc302b852_remove_environment_keys_field.py new file mode 100644 index 00000000000..200a1255d05 --- /dev/null +++ b/boefjes/boefjes/migrations/versions/870fc302b852_remove_environment_keys_field.py @@ -0,0 +1,30 @@ +"""Remove environment keys field + +Revision ID: 870fc302b852 +Revises: 5be152459a7b +Create Date: 2024-08-20 06:08:20.943924 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision = '870fc302b852' +down_revision = '5be152459a7b' +branch_labels = None +depends_on = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column('boefje', 'environment_keys') + op.drop_column('normalizer', 'environment_keys') + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('normalizer', sa.Column('environment_keys', postgresql.ARRAY(sa.VARCHAR(length=128)), autoincrement=False, nullable=False)) + op.add_column('boefje', sa.Column('environment_keys', postgresql.ARRAY(sa.VARCHAR(length=128)), autoincrement=False, nullable=False)) + # ### end Alembic commands ### From a224b131b1d2f521839659b3bfe50cf899c71a47 Mon Sep 17 00:00:00 2001 From: Donny Peeters Date: Tue, 20 Aug 2024 15:38:41 +0200 Subject: [PATCH 10/17] Validate the final environment against the schema before running the boefje. --- boefjes/boefjes/api.py | 2 +- boefjes/boefjes/job_handler.py | 13 ++++++++++-- ...fc302b852_remove_environment_keys_field.py | 21 ++++++++++++------- 3 files changed, 26 insertions(+), 10 deletions(-) diff --git a/boefjes/boefjes/api.py b/boefjes/boefjes/api.py index e21d2fe486f..22542898dfd 100644 --- a/boefjes/boefjes/api.py +++ b/boefjes/boefjes/api.py @@ -151,7 +151,7 @@ def get_task(task_id, scheduler_client): def create_boefje_meta(task, local_repository): boefje = task.data.boefje boefje_resource = local_repository.by_id(boefje.id) - environment = get_environment_settings(task.data) + environment = get_environment_settings(task.data, boefje_resource.schema) organization = task.data.organization input_ooi = task.data.input_ooi diff --git a/boefjes/boefjes/job_handler.py b/boefjes/boefjes/job_handler.py index f086446aacb..7f08078319a 100644 --- a/boefjes/boefjes/job_handler.py +++ b/boefjes/boefjes/job_handler.py @@ -7,6 +7,8 @@ import httpx import structlog from httpx import HTTPError +from jsonschema.exceptions import ValidationError +from jsonschema.validators import validate from boefjes.clients.bytes_client import BytesAPIClient from boefjes.config import settings @@ -15,6 +17,7 @@ from boefjes.local_repository import LocalPluginRepository from boefjes.plugins.models import _default_mime_types from boefjes.runtime_interfaces import BoefjeJobRunner, Handler, NormalizerJobRunner +from boefjes.storage.interfaces import SettingsNotConformingToSchema from octopoes.api.models import Affirmation, Declaration, Observation from octopoes.connector.octopoes import OctopoesAPIConnector from octopoes.models import Reference, ScanLevel @@ -35,7 +38,7 @@ def get_octopoes_api_connector(org_code: str) -> OctopoesAPIConnector: return OctopoesAPIConnector(str(settings.octopoes_api), org_code) -def get_environment_settings(boefje_meta: BoefjeMeta) -> dict[str, str]: +def get_environment_settings(boefje_meta: BoefjeMeta, schema: dict | None = None) -> dict[str, str]: try: katalogus_api = str(settings.katalogus_api).rstrip("/") response = httpx.get( @@ -58,6 +61,12 @@ def get_environment_settings(boefje_meta: BoefjeMeta) -> dict[str, str]: for key, value in settings_from_katalogus.items: new_env[key] = value + if schema is not None: + try: + validate(instance=new_env, schema=schema) + except ValidationError as e: + raise SettingsNotConformingToSchema(boefje_meta.boefje.id, e.message) from e + return new_env @@ -99,7 +108,7 @@ def handle(self, boefje_meta: BoefjeMeta) -> None: boefje_meta.arguments["input"] = ooi.serialize() boefje_meta.runnable_hash = boefje_resource.runnable_hash - boefje_meta.environment = get_environment_settings(boefje_meta) + boefje_meta.environment = get_environment_settings(boefje_meta, boefje_resource.schema) mime_types = _default_mime_types(boefje_meta.boefje) diff --git a/boefjes/boefjes/migrations/versions/870fc302b852_remove_environment_keys_field.py b/boefjes/boefjes/migrations/versions/870fc302b852_remove_environment_keys_field.py index 200a1255d05..7bdfbd9e024 100644 --- a/boefjes/boefjes/migrations/versions/870fc302b852_remove_environment_keys_field.py +++ b/boefjes/boefjes/migrations/versions/870fc302b852_remove_environment_keys_field.py @@ -5,26 +5,33 @@ Create Date: 2024-08-20 06:08:20.943924 """ -from alembic import op + import sqlalchemy as sa +from alembic import op from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. -revision = '870fc302b852' -down_revision = '5be152459a7b' +revision = "870fc302b852" +down_revision = "5be152459a7b" branch_labels = None depends_on = None def upgrade() -> None: # ### commands auto generated by Alembic - please adjust! ### - op.drop_column('boefje', 'environment_keys') - op.drop_column('normalizer', 'environment_keys') + op.drop_column("boefje", "environment_keys") + op.drop_column("normalizer", "environment_keys") # ### end Alembic commands ### def downgrade() -> None: # ### commands auto generated by Alembic - please adjust! ### - op.add_column('normalizer', sa.Column('environment_keys', postgresql.ARRAY(sa.VARCHAR(length=128)), autoincrement=False, nullable=False)) - op.add_column('boefje', sa.Column('environment_keys', postgresql.ARRAY(sa.VARCHAR(length=128)), autoincrement=False, nullable=False)) + op.add_column( + "normalizer", + sa.Column("environment_keys", postgresql.ARRAY(sa.VARCHAR(length=128)), autoincrement=False, nullable=False), + ) + op.add_column( + "boefje", + sa.Column("environment_keys", postgresql.ARRAY(sa.VARCHAR(length=128)), autoincrement=False, nullable=False), + ) # ### end Alembic commands ### From 2f2c5515ed744e6c2cb8a2ba257bc49c81ff354e Mon Sep 17 00:00:00 2001 From: Donny Peeters Date: Tue, 20 Aug 2024 16:35:51 +0200 Subject: [PATCH 11/17] Add more logging to migration script --- boefjes/boefjes/local_repository.py | 2 +- ...be152459a7b_introduce_schema_field_to_boefje_model.py | 9 ++++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/boefjes/boefjes/local_repository.py b/boefjes/boefjes/local_repository.py index 1370c6a8779..a29fb92ba35 100644 --- a/boefjes/boefjes/local_repository.py +++ b/boefjes/boefjes/local_repository.py @@ -56,7 +56,7 @@ def schema(self, id_: str) -> dict | None: path = boefjes[id_].path / SCHEMA_FILE if not path.exists(): - logger.debug("Did not find schema for boefje %s", boefjes[id_]) + logger.debug("Did not find schema for boefje %s", id_) return None return json.loads(path.read_text()) diff --git a/boefjes/boefjes/migrations/versions/5be152459a7b_introduce_schema_field_to_boefje_model.py b/boefjes/boefjes/migrations/versions/5be152459a7b_introduce_schema_field_to_boefje_model.py index c462c72d0b5..2cd63145aa5 100644 --- a/boefjes/boefjes/migrations/versions/5be152459a7b_introduce_schema_field_to_boefje_model.py +++ b/boefjes/boefjes/migrations/versions/5be152459a7b_introduce_schema_field_to_boefje_model.py @@ -33,8 +33,13 @@ def upgrade() -> None: session = sessionmaker(bind=op.get_bind())() with create_plugin_storage(session) as storage: + plugins = local_repo.get_all() + logger.info("Found %s plugins", len(plugins)) + for plugin in local_repo.get_all(): - if schema := local_repo.schema(plugin.id): + schema = local_repo.schema(plugin.id) + + if schema: try: # This way we avoid the safeguard that updating static boefjes is not allowed instance = storage._db_boefje_instance_by_id(plugin.id) @@ -44,6 +49,8 @@ def upgrade() -> None: except PluginNotFound: logger.info("No database entry for plugin %s", plugin.id) continue + else: + logger.info("No schema present for plugin %s", plugin.id) session.close() # ### end Alembic commands ### From 78759634afb8b99bacd9a8121dc8715996b23554 Mon Sep 17 00:00:00 2001 From: Donny Peeters Date: Tue, 27 Aug 2024 11:29:52 +0200 Subject: [PATCH 12/17] Add comment about importance of the schema enforcement --- boefjes/boefjes/job_handler.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/boefjes/boefjes/job_handler.py b/boefjes/boefjes/job_handler.py index 7f08078319a..92b6e481f60 100644 --- a/boefjes/boefjes/job_handler.py +++ b/boefjes/boefjes/job_handler.py @@ -61,6 +61,9 @@ def get_environment_settings(boefje_meta: BoefjeMeta, schema: dict | None = None for key, value in settings_from_katalogus.items: new_env[key] = value + # The schema, besides dictating that a boefje cannot run if it is not matched, also provides an extra safeguard: + # it is possible to inject code if arguments are passed that "escape" the call to a tool. Hence, we should enforce + # the schema somewhere and make the schema as strict as possible. if schema is not None: try: validate(instance=new_env, schema=schema) From f8d13886fafca9525ee9ae473df7b6bab708a335 Mon Sep 17 00:00:00 2001 From: Donny Peeters Date: Thu, 29 Aug 2024 10:46:29 +0200 Subject: [PATCH 13/17] Use dict comprehension --- boefjes/boefjes/job_handler.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/boefjes/boefjes/job_handler.py b/boefjes/boefjes/job_handler.py index 92b6e481f60..e3748c58a91 100644 --- a/boefjes/boefjes/job_handler.py +++ b/boefjes/boefjes/job_handler.py @@ -51,12 +51,7 @@ def get_environment_settings(boefje_meta: BoefjeMeta, schema: dict | None = None raise settings_from_katalogus = response.json() - new_env = {} - - for key, value in os.environ.items(): - if key.startswith("BOEFJE_"): - env_key = key.split("BOEFJE_", 1)[1] - new_env[env_key] = value + new_env = {key.split("BOEFJE_", 1)[1]: value for key, value in os.environ.items() if key.startswith("BOEFJE_")} for key, value in settings_from_katalogus.items: new_env[key] = value From fff81f346e9f121d3ddf07d21545c55eeb772c23 Mon Sep 17 00:00:00 2001 From: Donny Peeters Date: Thu, 29 Aug 2024 15:02:33 +0200 Subject: [PATCH 14/17] Add tests for getting the environment from the KATalogus. Only add environment keys from the schema. --- boefjes/.ci/docker-compose.yml | 14 +++++-- .../.ci/wiremock/mappings/organisations.json | 15 -------- boefjes/boefjes/dependencies/plugins.py | 6 +-- boefjes/boefjes/job_handler.py | 13 +++++-- boefjes/tests/conftest.py | 38 +++++++++++++++++-- boefjes/tests/integration/test_bench.py | 4 +- .../tests/integration/test_get_environment.py | 19 ++++++++++ 7 files changed, 79 insertions(+), 30 deletions(-) delete mode 100644 boefjes/.ci/wiremock/mappings/organisations.json create mode 100644 boefjes/tests/integration/test_get_environment.py diff --git a/boefjes/.ci/docker-compose.yml b/boefjes/.ci/docker-compose.yml index 407d1678336..f3dc4ecc244 100644 --- a/boefjes/.ci/docker-compose.yml +++ b/boefjes/.ci/docker-compose.yml @@ -8,6 +8,7 @@ services: command: sh -c 'python -m pytest -v tests/integration' depends_on: - ci_katalogus-db + - ci_katalogus env_file: - .ci/.env.test volumes: @@ -103,8 +104,15 @@ services: hard: 262144 ci_katalogus: - image: "docker.io/wiremock/wiremock:2.34.0" - volumes: - - .ci/wiremock:/home/wiremock + build: + context: .. + dockerfile: boefjes/Dockerfile + args: + - ENVIRONMENT=dev + command: uvicorn boefjes.katalogus.root:app --host 0.0.0.0 --port 8080 + depends_on: + - ci_katalogus-db env_file: - .ci/.env.test + volumes: + - .:/app/boefjes diff --git a/boefjes/.ci/wiremock/mappings/organisations.json b/boefjes/.ci/wiremock/mappings/organisations.json deleted file mode 100644 index 13124e6222f..00000000000 --- a/boefjes/.ci/wiremock/mappings/organisations.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "request": { - "method": "GET", - "url": "/v1/organisations" - }, - "response": { - "status": 200, - "jsonBody": { - "_dev": { - "id": "_dev", - "name": "Development Organisation" - } - } - } -} diff --git a/boefjes/boefjes/dependencies/plugins.py b/boefjes/boefjes/dependencies/plugins.py index c0324a0fc55..080ff6d7da7 100644 --- a/boefjes/boefjes/dependencies/plugins.py +++ b/boefjes/boefjes/dependencies/plugins.py @@ -122,14 +122,14 @@ def _put_boefje(self, boefje_id: str) -> None: try: self.plugin_storage.boefje_by_id(boefje_id) - except PluginNotFound: + except PluginNotFound as e: try: plugin = self.local_repo.by_id(boefje_id) except KeyError: - raise + raise e if plugin.type != "boefje": - raise + raise e self.plugin_storage.create_boefje(plugin) def _put_normalizer(self, normalizer_id: str) -> None: diff --git a/boefjes/boefjes/job_handler.py b/boefjes/boefjes/job_handler.py index e3748c58a91..8b0d979d256 100644 --- a/boefjes/boefjes/job_handler.py +++ b/boefjes/boefjes/job_handler.py @@ -50,11 +50,18 @@ def get_environment_settings(boefje_meta: BoefjeMeta, schema: dict | None = None logger.exception("Error getting environment settings") raise + allowed_keys = schema.get("properties", []) if schema else [] + new_env = { + key.split("BOEFJE_", 1)[1]: value + for key, value in os.environ.items() + if key.startswith("BOEFJE_") and key in allowed_keys + } + settings_from_katalogus = response.json() - new_env = {key.split("BOEFJE_", 1)[1]: value for key, value in os.environ.items() if key.startswith("BOEFJE_")} - for key, value in settings_from_katalogus.items: - new_env[key] = value + for key, value in settings_from_katalogus.items(): + if key in allowed_keys: + new_env[key] = value # The schema, besides dictating that a boefje cannot run if it is not matched, also provides an extra safeguard: # it is possible to inject code if arguments are passed that "escape" the call to a tool. Hence, we should enforce diff --git a/boefjes/tests/conftest.py b/boefjes/tests/conftest.py index d94dc074344..079185699ed 100644 --- a/boefjes/tests/conftest.py +++ b/boefjes/tests/conftest.py @@ -16,12 +16,16 @@ from boefjes.clients.bytes_client import BytesAPIClient from boefjes.clients.scheduler_client import Queue, SchedulerClientInterface, Task, TaskStatus from boefjes.config import Settings, settings +from boefjes.dependencies.plugins import PluginService from boefjes.job_handler import bytes_api_client from boefjes.job_models import BoefjeMeta, NormalizerMeta +from boefjes.local_repository import get_local_repository from boefjes.models import Organisation from boefjes.runtime_interfaces import Handler, WorkerManager +from boefjes.sql.config_storage import SQLConfigStorage, create_encrypter from boefjes.sql.db import SQL_BASE, get_engine from boefjes.sql.organisation_storage import SQLOrganisationStorage +from boefjes.sql.plugin_storage import SQLPluginStorage from octopoes.api.models import Declaration, Observation from octopoes.connector.octopoes import OctopoesAPIConnector from octopoes.models import OOI @@ -142,21 +146,47 @@ def api(tmp_path): @pytest.fixture -def organisation_repository(): +def session(): engine = get_engine() session = sessionmaker(bind=engine)() - yield SQLOrganisationStorage(session, settings) + yield session session.execute(";".join([f"TRUNCATE TABLE {t} CASCADE" for t in SQL_BASE.metadata.tables])) + session.commit() session.close() @pytest.fixture -def organisation(organisation_repository) -> Organisation: +def organisation_storage(session): + return SQLOrganisationStorage(session, settings) + + +@pytest.fixture +def config_storage(session): + return SQLConfigStorage(session, create_encrypter()) + + +@pytest.fixture +def plugin_storage(session): + return SQLPluginStorage(session, settings) + + +@pytest.fixture +def local_repo(): + return get_local_repository() + + +@pytest.fixture +def plugin_service(plugin_storage, config_storage, local_repo): + return PluginService(plugin_storage, config_storage, local_repo) + + +@pytest.fixture +def organisation(organisation_storage) -> Organisation: organisation = Organisation(id="test", name="Test org") - with organisation_repository as repo: + with organisation_storage as repo: repo.create(organisation) return organisation diff --git a/boefjes/tests/integration/test_bench.py b/boefjes/tests/integration/test_bench.py index a77d5732092..14123016904 100644 --- a/boefjes/tests/integration/test_bench.py +++ b/boefjes/tests/integration/test_bench.py @@ -19,7 +19,7 @@ def test_migration( octopoes_api_connector: OctopoesAPIConnector, bytes_client: BytesAPIClient, - organisation_repository: SQLOrganisationStorage, + organisation_storage: SQLOrganisationStorage, valid_time, ): octopoes_api_connector.session._timeout.connect = 60 @@ -87,7 +87,7 @@ def test_migration( bytes_client.save_normalizer_meta(normalizer_meta) total_oois = octopoes_api_connector.list_objects(set(), valid_time).count - total_processed, total_failed = upgrade(organisation_repository, valid_time) + total_processed, total_failed = upgrade(organisation_storage, valid_time) assert total_processed == len(hostname_range) assert total_failed == 0 diff --git a/boefjes/tests/integration/test_get_environment.py b/boefjes/tests/integration/test_get_environment.py new file mode 100644 index 00000000000..39d98fa77b2 --- /dev/null +++ b/boefjes/tests/integration/test_get_environment.py @@ -0,0 +1,19 @@ +from boefjes.dependencies.plugins import PluginService +from boefjes.job_handler import get_environment_settings +from boefjes.models import Organisation +from tests.loading import get_boefje_meta + + +def test_environment_builds_up_correctly(plugin_service: PluginService, organisation: Organisation): + plugin_id = "dns-records" + schema = plugin_service.schema(plugin_id) + environment = get_environment_settings(get_boefje_meta(boefje_id=plugin_id), schema) + + assert environment == {} + + with plugin_service: + plugin_service.upsert_settings({"RECORD_TYPES": "CNAME,AAAA", "WRONG": "3"}, organisation.id, plugin_id) + + environment = get_environment_settings(get_boefje_meta(boefje_id=plugin_id), schema) + + assert environment == {"RECORD_TYPES": "CNAME,AAAA"} From 716bbb9e2146973005c85681afb1e1f93c03faff Mon Sep 17 00:00:00 2001 From: Donny Peeters Date: Thu, 29 Aug 2024 15:42:07 +0200 Subject: [PATCH 15/17] skip integration test when unit testing --- boefjes/tests/integration/test_get_environment.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/boefjes/tests/integration/test_get_environment.py b/boefjes/tests/integration/test_get_environment.py index 39d98fa77b2..54e24d95d46 100644 --- a/boefjes/tests/integration/test_get_environment.py +++ b/boefjes/tests/integration/test_get_environment.py @@ -1,9 +1,17 @@ +import os + +import pytest + from boefjes.dependencies.plugins import PluginService from boefjes.job_handler import get_environment_settings from boefjes.models import Organisation from tests.loading import get_boefje_meta +if os.environ.get("CI") != "1": + pytest.skip() + + def test_environment_builds_up_correctly(plugin_service: PluginService, organisation: Organisation): plugin_id = "dns-records" schema = plugin_service.schema(plugin_id) From 0c92934c4784db48ff8070d928dbe522d7d2701f Mon Sep 17 00:00:00 2001 From: Donny Peeters Date: Thu, 5 Sep 2024 09:30:48 +0200 Subject: [PATCH 16/17] Fix tests and style Signed-off-by: Donny Peeters --- boefjes/pyproject.toml | 1 + boefjes/tests/integration/test_get_environment.py | 7 +------ 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/boefjes/pyproject.toml b/boefjes/pyproject.toml index 31c11e2cf98..b1ca215be31 100644 --- a/boefjes/pyproject.toml +++ b/boefjes/pyproject.toml @@ -87,6 +87,7 @@ line-length = 120 transform-concats = true [tool.pytest.ini_options] +markers = ["slow: marks tests as slow"] addopts = "-m 'not slow'" env = [ "D:KATALOGUS_DB_URI=postgresql://postgres:postgres@ci_katalogus-db:5432/ci_katalogus", diff --git a/boefjes/tests/integration/test_get_environment.py b/boefjes/tests/integration/test_get_environment.py index 54e24d95d46..5aa209c0493 100644 --- a/boefjes/tests/integration/test_get_environment.py +++ b/boefjes/tests/integration/test_get_environment.py @@ -1,5 +1,3 @@ -import os - import pytest from boefjes.dependencies.plugins import PluginService @@ -8,10 +6,7 @@ from tests.loading import get_boefje_meta -if os.environ.get("CI") != "1": - pytest.skip() - - +@pytest.mark.skipif("os.environ.get('CI') != '1'") def test_environment_builds_up_correctly(plugin_service: PluginService, organisation: Organisation): plugin_id = "dns-records" schema = plugin_service.schema(plugin_id) From 762687d145c1f15466c4b3169e437d7e8c66664a Mon Sep 17 00:00:00 2001 From: Donny Peeters Date: Thu, 5 Sep 2024 10:49:26 +0200 Subject: [PATCH 17/17] Documentation update for settings and environment variables for boefjes. Signed-off-by: Donny Peeters --- .../source/developer_documentation/boefjes.md | 4 +- .../development_tutorial/creating_a_boefje.md | 23 ++++++++++- docs/source/introduction/makeyourown.rst | 40 ++++++++++++++----- 3 files changed, 52 insertions(+), 15 deletions(-) diff --git a/docs/source/developer_documentation/boefjes.md b/docs/source/developer_documentation/boefjes.md index 553dfddd74e..2a8c4737138 100644 --- a/docs/source/developer_documentation/boefjes.md +++ b/docs/source/developer_documentation/boefjes.md @@ -51,13 +51,13 @@ although this would be more complicated. ## Environment variables By design, Boefjes do not have access to the host system's environment variables. -If a Boefje requires access to an environment variable (e.g. `HTTP_PROXY` or `USER_AGENT`), it should note as such in its `boefje.json` manifest. +If a Boefje requires access to an environment variable (e.g. `HTTP_PROXY` or `USER_AGENT`), it should note as such in its `schema.json`. The system-wide variables can be set as environment variable to the boefjes runner by prefixing it with `BOEFJE_`. This is to prevent a Boefje from accessing variables it should not have access to, such as secrets. To illustrate: if `BOEFJE_HTTP_PROXY=https://proxy:8080` environment variable is configured, the Boefje can access it as `HTTP_PROXY`. This feature can also be used to set default values for KAT-alogus settings. For example, configuring the `BOEFJE_TOP_PORTS` environment variable will set the default value for the `TOP_PORTS` setting (used by the nmap Boefje). -This default value can be overridden by setting any value for `TOP_PORTS` in the KAT-alogus. +This default value can be overridden per organisation by setting any value for `TOP_PORTS` in the KAT-alogus. ## Technical Design diff --git a/docs/source/developer_documentation/development_tutorial/creating_a_boefje.md b/docs/source/developer_documentation/development_tutorial/creating_a_boefje.md index d7d92790eda..65e4d750f18 100644 --- a/docs/source/developer_documentation/development_tutorial/creating_a_boefje.md +++ b/docs/source/developer_documentation/development_tutorial/creating_a_boefje.md @@ -50,8 +50,9 @@ This file contains a description of the boefje to explain to the user what this ## `schema.json` -This JSON is used as the basis for a form for the user. When the user enables this boefje they can get the option to give extra information. For example, it can contain an API key that the script requires. -This is an example of a `schema.json` file: +To allow the user to pass information to a boefje runtime, add a schema.json file to the folder where your boefje is located. +This can be used, for example, to add an API key that the script requires. +It must conform to the https://json-schema.org/ standard, for example: ```json { @@ -76,6 +77,24 @@ This is an example of a `schema.json` file: } ``` +This JSON defines which additional environment variables can be set for the boefje. +There are two ways to do this. +Firstly, using this schema as an example, you could set the `BOEFJE_MESSAGE` environment variable in the boefje runtime. +Prepending the key with `BOEFJE_` provides an extra safeguard. +Note that setting an environment variable means this configuration is applied to _all_ organisations. +Secondly, if you want to avoid setting environment variables or configure it for just one organisation, +it is also possible to set the API key through the KAT-alogus. +Navigate to the boefje detail page of Shodan to find the schema as a form. +These values take precedence over the environment variables. +This is also a way to test whether the schema is properly understood for your boefje. +If encryption has been set up for the KATalogus, all keys provided through this form are stored encrypted in the database. + +Although the Shodan boefje defines an API key, the schema could contain anything your boefje needs. +However, OpenKAT currently officially only supports "string" and "integer" properties that are one level deep. +Because keys may be passed through environment variables, +schema validation does not happen right away when settings are added or boefjes enabled. +Schema validation happens right before spawning a boefje, meaning your tasks will fail if is missing a required variable. + - `title`: This should always contain a string containing 'Arguments'. - `type`: This should always contain a string containing 'object'. - `description`: A description of the boefje explaining in short what it can do. This will both be displayed inside the KAT-alogus and on the boefje's page. diff --git a/docs/source/introduction/makeyourown.rst b/docs/source/introduction/makeyourown.rst index 2a6bf7c3cc0..04d9979b694 100644 --- a/docs/source/introduction/makeyourown.rst +++ b/docs/source/introduction/makeyourown.rst @@ -15,7 +15,7 @@ What types of plugins are available? There are three types of plugins, deployed by OpenKAT to collect information, translate it into objects for the data model and then analyze it. Boefjes gather facts, Whiskers structure the information for the data model and Bits determine what you want to think about it; they are the business rules. Each action is cut into the smallest possible pieces. -- Boefjes gather factual information, such as by calling an external scanning tool like nmap or using a database like shodan. +- Boefjes gather factual information, such as by calling an external scanning tool like nmap or using a database like Shodan. - Whiskers analyze the information and turn it into objects for the data model in Octopoes. @@ -51,7 +51,7 @@ To make a finding about a CVE to a software version, you need multiple objects: Existing boefjes ================ -The existing boefjes can be viewed via the KATalog in OpenKAT and are on `GitHUB in the boefjes repository. `_ +The existing boefjes can be viewed via the KATalog in OpenKAT and are on `GitHub in the boefjes repository. `_ Object-types, classes and objects. ---------------------------------- @@ -65,7 +65,7 @@ When we talk about objects, we usually mean instance of such a class, or a 'reco Example: the boefje for shodan ------------------------------ -The boefje calling shodan gives a good first impression of its capabilities. The boefje includes the following files. +The boefje calling Shodan gives a good first impression of its capabilities. The boefje includes the following files. - __init.py__, which remains empty - boefje.json, containing the normalizers and object-types in the data model @@ -75,7 +75,7 @@ The boefje calling shodan gives a good first impression of its capabilities. The - normalize.py, the normalizer (whiskers) - normalizer.json, which accepts and supplies the normalizer - requirements.txt, with the requirements for this boefje -- schema.json, settings for the web interface +- schema.json, settings for the web interface formatted as JSON Schema boefje.json *********** @@ -116,9 +116,9 @@ The boefje also uses variables from the web interface, like the Shodan the API k schema.json *********** -To allow the user to add information through the web interface, add the schema.json file to the folder where your boefje is located. This json is used as the basis for a form for the user. In this case, it can contain an API key, but it can also be something else that your boefje responds to. This Schema must conform to the https://json-schema.org/ standard. - -Currently, however, OpenKAT only understands fairly shallow structures. For example, not all field types are supported, nor does OpenKAT understand references. You can test whether your Schema is neatly understood by checking the settings form in Rocky's KAT catalog for your boefje. +To allow the user to pass information to a boefje runtime, add a schema.json file to the folder where your boefje is located. +This can be used, for example, to add an API key that the script requires. +It must conform to the https://json-schema.org/ standard, see for example the schema.json for Shodan: .. code-block:: json @@ -138,6 +138,24 @@ Currently, however, OpenKAT only understands fairly shallow structures. For exam ] } +This JSON defines which additional environment variables can be set for the boefje. +There are two ways to do this. +Firstly, using the Shodan schema as an example, you could set the ``BOEFJE_SHODAN_API`` environment variable in the boefje runtime. +Prepending the key with ``BOEFJE_`` provides an extra safeguard. +Note that setting an environment variable means this configuration is applied to _all_ organisations. +Secondly, if you want to avoid setting environment variables or configure it for just one organisation, +it is also possible to set the API key through the KAT-alogus. +Navigate to the boefje detail page of Shodan to find the schema as a form. +These values take precedence over the environment variables. +This is also a way to test whether the schema is properly understood for your boefje. +If encryption has been set up for the KATalogus, all keys provided through this form are stored encrypted in the database. + +Although the Shodan boefje defines an API key, the schema could contain anything your boefje needs. +However, OpenKAT currently only officially supports "string" and "integer" properties that are one level deep. +Because keys may be passed through environment variables, +schema validation does not happen right away when settings are added or boefjes enabled. +Schema validation happens right before spawning a boefje, meaning your tasks will fail if is missing a required variable. + main.py ******* @@ -189,8 +207,8 @@ normalizer.json The normalizers translate the output of a boefje into objects that fit the data model. Each normalizer defines what input it accepts and what object-types it provides. -In the case of the shodan normalizer, -it involves the entire output of the shodan boefje (created based on IP address), +In the case of the Shodan normalizer, +it involves the entire output of the Shodan boefje (created based on IP address), where findings and ports come out. The `normalizer.json` defines these: .. code-block:: json @@ -294,7 +312,7 @@ If you want to add an object-type, you need to know with which other object-type Adding object-types to the data model requires an addition in octopus. Here, an object-type can be added if it is connected to other object-types. Visually this is well understood using the `Graph explorer `_. The actual code is `in the Octopoes repo `_. -As with the boefje for shodan, here we again use the example from the functional documentation. A description of an object-type in the data model, in this case an IPPort, looks like this: +As with the boefje for Shodan, here we again use the example from the functional documentation. A description of an object-type in the data model, in this case an IPPort, looks like this: .. code-block:: python @@ -520,4 +538,4 @@ There are a number of ways to add your new boefje to OpenKAT. - Do a commit of your code, after review it can be included - Add an image server in the KAT catalog config file ``*`` -``*`` If you want to add an image server, join the ongoing project to standardize and describe it. The idea is to add an image server in the KAT catalog config file that has artifacts from your boefjes and normalizers as outputted by the Github CI. +``*`` If you want to add an image server, join the ongoing project to standardize and describe it. The idea is to add an image server in the KAT catalog config file that has artifacts from your boefjes and normalizers as outputted by the GitHub CI.