From b89633958b0c8efc1a80153a11a88379c64cfa51 Mon Sep 17 00:00:00 2001 From: Rich Piazza Date: Mon, 25 Mar 2024 19:53:25 -0400 Subject: [PATCH] remove database connection, start on insert statements --- stix2/datastore/relational_db/__init__.py | 35 -- stix2/datastore/relational_db/add_method.py | 21 ++ .../datastore/relational_db/input_creation.py | 270 ++++++++++++++ .../postgres_database_connection.py | 17 - .../datastore/relational_db/relational_db.py | 61 ++-- .../relational_db/relational_db_testing.py | 5 +- .../datastore/relational_db/table_creation.py | 333 +++++++++--------- stix2/datastore/relational_db/utils.py | 35 ++ stix2/v21/base.py | 2 +- 9 files changed, 527 insertions(+), 252 deletions(-) delete mode 100644 stix2/datastore/relational_db/__init__.py create mode 100644 stix2/datastore/relational_db/add_method.py create mode 100644 stix2/datastore/relational_db/input_creation.py delete mode 100644 stix2/datastore/relational_db/postgres_database_connection.py create mode 100644 stix2/datastore/relational_db/utils.py diff --git a/stix2/datastore/relational_db/__init__.py b/stix2/datastore/relational_db/__init__.py deleted file mode 100644 index 1adb8f10..00000000 --- a/stix2/datastore/relational_db/__init__.py +++ /dev/null @@ -1,35 +0,0 @@ -from abc import abstractmethod - - -class DatabaseConnection(): - def __init__(self): - pass - - @abstractmethod - def execute(self, sql_statement, bindings): - """ - - Args: - sql_statement: the statement to execute - bindings: a dictionary where the keys are the column names and the values are the data to be - inserted into that column of the table - - Returns: - - """ - pass - - @abstractmethod - def create_insert_statement(self, table_name, bindings, **kwargs): - """ - - Args: - table_name: the name of the table to be inserted into - bindings: a dictionary where the keys are the column names and the values are the data to be - inserted into that column of the table - **kwargs: other specific arguments - - Returns: - - """ - pass diff --git a/stix2/datastore/relational_db/add_method.py b/stix2/datastore/relational_db/add_method.py new file mode 100644 index 00000000..ad08a2e0 --- /dev/null +++ b/stix2/datastore/relational_db/add_method.py @@ -0,0 +1,21 @@ +# _ALLOWABLE_CLASSES = get_all_subclasses(_STIXBase21) +# +# +# _ALLOWABLE_CLASSES.extend(get_all_subclasses(Property)) + + +def create_real_method_name(name, klass_name): + # if klass_name not in _ALLOWABLE_CLASSES: + # raise NameError + # split_up_klass_name = re.findall('[A-Z][^A-Z]*', klass_name) + # split_up_klass_name.remove("Type") + return name + "_" + "_".join([x.lower() for x in klass_name]) + + +def add_method(cls): + def decorator(fn): + method_name = fn.__name__ + fn.__name__ = create_real_method_name(fn.__name__, cls.__name__) + setattr(cls, method_name, fn) + return fn + return decorator diff --git a/stix2/datastore/relational_db/input_creation.py b/stix2/datastore/relational_db/input_creation.py new file mode 100644 index 00000000..ded5cbf5 --- /dev/null +++ b/stix2/datastore/relational_db/input_creation.py @@ -0,0 +1,270 @@ +from collections import OrderedDict + +from sqlalchemy import ( + TIMESTAMP, + CheckConstraint, + Column, + ForeignKey, + Table, + Text, + create_engine, + insert, +) + + +from stix2.properties import ( + DictionaryProperty, EmbeddedObjectProperty, EnumProperty, ExtensionsProperty, FloatProperty, + IntegerProperty, ListProperty, ReferenceProperty, StringProperty) + +from stix2.datastore.relational_db.utils import SCO_COMMON_PROPERTIES, SDO_COMMON_PROPERTIES, canonicalize_table_name + + +def single_value(p): + return not(isinstance(p, (EmbeddedObjectProperty, + ListProperty, + DictionaryProperty))) + + +def table_property(prop, name, core_properties): + if isinstance(prop, ListProperty) and name not in core_properties: + contained_property = prop.contained + return not isinstance(contained_property, (StringProperty, IntegerProperty, FloatProperty)) + elif isinstance(prop, DictionaryProperty) and name not in core_properties: + return True + else: + return False + + +def embedded_object_list_property(prop, name, core_properties): + if isinstance(prop, ListProperty) and name not in core_properties: + contained_property = prop.contained + return isinstance(contained_property, EmbeddedObjectProperty) + else: + return False + + +def array_property(prop, name, core_properties): + if isinstance(prop, ListProperty) and name not in core_properties: + contained_property = prop.contained + return isinstance(contained_property, (StringProperty, IntegerProperty, FloatProperty, EnumProperty)) + else: + return False + + +def derive_column_name(prop): + contained_property = prop.contained + if isinstance(contained_property, ReferenceProperty): + return "ref_id" + elif isinstance(contained_property, StringProperty): + return "value" + + +def generate_insert_for_array_in_table(table, property_name, values, prop, foreign_key_value): + + bindings = { + "id": foreign_key_value + } + + for idx, item in enumerate(values): + item_binding_name = f"item{idx}" + + bindings[item_binding_name] = item + + return [insert(table).values(bindings)] + + +def generate_single_values(stix_object, properties, core_properties=[]): + bindings = OrderedDict() + for name, prop in properties.items(): + if (single_value(prop) and (name == 'id' or name not in core_properties) or + array_property(prop, name, core_properties)): + if name in stix_object and name != "type": + bindings[name] = stix_object[name] if not array_property(prop, name, core_properties) else "{" + ",".join( + ['"' + x + '"' for x in stix_object[name]]) + "}" + return bindings + + +def generate_insert_for_embedded_object(type_name, item, foreign_key_value): + bindings = generate_single_values(item, item._properties) + bindings["id"] = foreign_key_value + sql = f"INSERT INTO {canonicalize_table_name(type_name, item._type)}" \ + f" ({','.join(bindings.keys())})" \ + f" VALUES ({','.join(values)}, %(id)s )" + + print("sql:", sql) + print("embedded:", bindings) + return [(sql, bindings)] + + +def generate_insert_for_dictionary(item, dictionary_table, foreign_key_value, value_types): + bindings = {"id": foreign_key_value} + + for idx, (name, value) in enumerate(item.items()): + name_binding = f"name{idx}" + if len(value_types) == 1: + value_binding = f"value{idx}" + elif isinstance(value, int): + value_binding = f"integer_value{idx}" + else: + value_binding = f"string_value{idx}" + + bindings[name_binding] = name + bindings[value_binding] = value + + return [insert(dictionary_table).values(bindings)] + + +def generate_insert_for_embedded_objects(type_name, values, foreign_key_value): + sql_bindings_tuples = list() + for item in values: + sql_bindings_tuples.extend(generate_insert_for_embedded_object(type_name, item, foreign_key_value)) + return sql_bindings_tuples + + +def generate_insert_for_hashes(hashes, hashes_table, foreign_key_value): + bindings = {"id": foreign_key_value} + + for idx, (hash_name, hash_value) in enumerate(hashes.items()): + hash_name_binding_name = "hash_name" + str(idx) + hash_value_binding_name = "hash_value" + str(idx) + + bindings[hash_name_binding_name] = hash_name + bindings[hash_value_binding_name] = hash_value + + return [insert(hashes_table).values(bindings)] + + +def generate_insert_for_external_references(data_sink, stix_object): + insert_statements = list() + object_table = data_sink.tables_dictionary["common.external_references"] + for er in stix_object["external_references"]: + bindings = {"id": stix_object["id"]} + for prop in ["source_name", "description", "url", "external_id"]: + if prop in er: + bindings[prop] = er[prop] + er_insert_statement = insert(object_table).values(bindings) + insert_statements.append(er_insert_statement) + + if "hashes" in er: + hashes_table = data_sink.tables_dictionary[canonicalize_table_name("external_references_hashes", "sdo")] + insert_statements.extend(generate_insert_for_hashes(er["hashes"], + hashes_table, + stix_object["id"])) + + return insert_statements + + +def generate_insert_for_granular_markings(data_sink, stix_object, granular_markings_table): + granular_markings = stix_object["granular_markings"] + bindings = { + "id": stix_object["id"] + } + for idx, granular_marking in enumerate(granular_markings): + lang_binding_name = f"lang{idx}" + marking_ref_binding_name = f"marking_ref{idx}" + selectors_binding_name = f"selectors{idx}" + + bindings[lang_binding_name] = granular_marking.get("lang") + bindings[marking_ref_binding_name] = granular_marking.get("marking_ref") + bindings[selectors_binding_name] = granular_marking.get("selectors") + + return [insert(granular_markings_table).values(bindings)] + + +def generate_insert_for_extensions(extensions, foreign_key_value, type_name, core_properties): + sql_bindings_tuples = list() + for name, ex in extensions.items(): + sql_bindings_tuples.extend(generate_insert_for_subtype_extension(name, + ex, + foreign_key_value, + type_name, + core_properties)) + return sql_bindings_tuples + + +def generate_insert_for_core(data_sink, stix_object, core_properties, schema_name): + if schema_name == "sdo": + core_table = data_sink.tables_dictionary["common.core_sdo"] + else: + core_table = data_sink.tables_dictionary["common.core_sco"] + insert_statements = list() + core_bindings = {} + + for prop_name, value in stix_object.items(): + + if prop_name in core_properties: + # stored in separate tables, skip here + if prop_name not in {"object_marking_refs", "granular_markings", "external_references", "type"}: + core_bindings[prop_name] = value + + core_insert_statement = insert(core_table).values(core_bindings) + insert_statements.append(core_insert_statement) + + if "object_marking_refs" in stix_object: + if schema_name == "sdo": + object_markings_ref_table = data_sink.tables_dictionary["common.object_marking_refs_sdo"] + else: + object_markings_ref_table = data_sink.tables_dictionary["common.object_marking_refs_sco"] + insert_statements.extend(generate_insert_for_array_in_table(data_sink, stix_object, object_markings_ref_table)) + + # Granular markings + if "granular_markings" in stix_object: + if schema_name == "sdo": + granular_marking_table = data_sink.tables_dictionary["common.granular_marking_sdo"] + else: + granular_marking_table = data_sink.tables_dictionary["common.granular_marking_sco"] + granular_input_statements = generate_insert_for_granular_markings(data_sink, + stix_object.granular_markings, + granular_marking_table) + insert_statements.extend(granular_input_statements) + + + return insert_statements + + +def generate_insert_for_object(data_sink, stix_object, schema_name, foreign_key_value=None): + insert_statements = list() + stix_id = stix_object["id"] + if schema_name == "sco": + core_properties = SCO_COMMON_PROPERTIES + else: + core_properties = SDO_COMMON_PROPERTIES + type_name = stix_object["type"] + table_name = canonicalize_table_name(type_name, schema_name) + object_table = data_sink.tables_dictionary[table_name] + properties = stix_object._properties + insert_statements.extend(generate_insert_for_core(data_sink, stix_object, core_properties, schema_name)) + + bindings = generate_single_values(stix_object, properties, core_properties) + object_insert_statement = insert(object_table).values(bindings) + insert_statements.append(object_insert_statement) + + for name, prop in stix_object._properties.items(): + if isinstance(prop, DictionaryProperty) and not name == "extensions": + dictionary_table_name = canonicalize_table_name(type_name + "_" + name, schema_name) + dictionary_table = data_sink.tables_dictionary[dictionary_table_name] + insert_statements.extend(generate_insert_for_dictionary(stix_object[name], dictionary_table, stix_id)) + + if "external_references" in stix_object: + insert_statements.extend(generate_insert_for_external_references(data_sink, stix_object, "sdo")) + + if "extensions" in stix_object: + for ex in stix_object["extensions"]: + insert_statements.extend(generate_insert_for_object(data_sink, ex, schema_name, stix_id)) + for name, prop in properties.items(): + if table_property(prop, name, core_properties): + if name in stix_object: + if embedded_object_list_property(prop, name, core_properties): + insert_statements.extend(generate_insert_for_embedded_objects(name, + stix_object[name], + stix_object["id"])) + elif isinstance(prop, ExtensionsProperty): + pass + else: + insert_statements.extend(generate_insert_for_array_in_table(stix_object["type"], + name, + stix_object[name], + properties[name], + stix_object["id"] )) + return insert_statements + diff --git a/stix2/datastore/relational_db/postgres_database_connection.py b/stix2/datastore/relational_db/postgres_database_connection.py deleted file mode 100644 index 25265897..00000000 --- a/stix2/datastore/relational_db/postgres_database_connection.py +++ /dev/null @@ -1,17 +0,0 @@ -import postgres -from sqlalchemy import create_engine - -from stix2.datastore.relational_db import DatabaseConnection - - -class PostgresDatabaseConnection(DatabaseConnection): - - def __init__(self, host, dbname, user): - self.db = postgres.Postgres(url=f"host={host} dbname={dbname} user={user}") - self.engine = create_engine(f"postgresql://{host}/{dbname}", max_identifier_length=200) - - def execute(self, sql_statement, bindings): - self.db.run(sql_statement, parameters=bindings) - - def create_insert_statement(self, table_name, bindings, **kwargs): - return f"INSERT INTO {table_name} ({','.join(bindings.keys())}) VALUES ({','.join(kwargs['values'])})" diff --git a/stix2/datastore/relational_db/relational_db.py b/stix2/datastore/relational_db/relational_db.py index ae31d495..30bc6a04 100644 --- a/stix2/datastore/relational_db/relational_db.py +++ b/stix2/datastore/relational_db/relational_db.py @@ -1,13 +1,13 @@ -from sqlalchemy import MetaData +from sqlalchemy import MetaData, create_engine from sqlalchemy.schema import CreateTable from stix2.base import _STIXBase from stix2.datastore import DataSink -from stix2.datastore.relational_db.table_creation import ( - create_core_tables, generate_object_table, -) +from stix2.datastore.relational_db.table_creation import create_core_tables, generate_object_table +from stix2.datastore.relational_db.input_creation import generate_insert_for_object + from stix2.parsing import parse -from stix2.v21.base import _DomainObject, _Extension, _Observable, _RelationshipObject +from stix2.v21.base import (_DomainObject, _Extension, _Observable, _RelationshipObject,) def _get_all_subclasses(cls): @@ -19,11 +19,7 @@ def _get_all_subclasses(cls): return all_subclasses -def insert_object(store, stix_obj, is_sdo): - pass - - -def _add(store, stix_data, allow_custom=True, version=None): +def _add(store, stix_data, allow_custom=True, version="2.1"): """Add STIX objects to MemoryStore/Sink. Adds STIX objects to an in-memory dictionary for fast lookup. @@ -57,7 +53,7 @@ def _add(store, stix_data, allow_custom=True, version=None): else: stix_obj = parse(stix_data, allow_custom, version) - insert_object(store, stix_obj, isinstance(stix_obj, _Observable)) + store.insert_object(stix_obj) class RelationalDBSink(DataSink): @@ -85,13 +81,18 @@ class RelationalDBSink(DataSink): """ def __init__( - self, database_connection, allow_custom=True, version=None, - instantiate_database=False, + self, database_connection_url, allow_custom=True, version=None, + instantiate_database=True, ): super(RelationalDBSink, self).__init__() self.allow_custom = allow_custom self.metadata = MetaData() - self.database_connection = database_connection + self.database_connection = create_engine(database_connection_url) + + self.tables = self._create_table_objects() + self.tables_dictionary = dict() + for t in self.tables: + self.tables_dictionary[t.name] = t if instantiate_database: self._instantiate_database() @@ -99,30 +100,38 @@ def __init__( def _create_table_objects(self): tables = create_core_tables(self.metadata) for stix_class in _get_all_subclasses(_DomainObject): - new_tables = generate_object_table(stix_class, self.metadata, True) + new_tables = generate_object_table(stix_class, self.metadata, "sdo") tables.extend(new_tables) for stix_class in _get_all_subclasses(_RelationshipObject): - new_tables = generate_object_table(stix_class, self.metadata, True) + new_tables = generate_object_table(stix_class, self.metadata, "sro") tables.extend(new_tables) for stix_class in _get_all_subclasses(_Observable): - tables.extend(generate_object_table(stix_class, self.metadata, False)) + tables.extend(generate_object_table(stix_class, self.metadata, "sco")) for stix_class in _get_all_subclasses(_Extension): - if hasattr(stix_class, "_applies_to"): - is_sdo = stix_class._applies_to == "sdo" - else: - is_sdo = False - tables.extend(generate_object_table(stix_class, self.metadata, is_sdo, is_extension=True)) + if stix_class.extension_type not in ["new-sdo", "new-sco", "new-sro"]: + if hasattr(stix_class, "_applies_to"): + schema_name = stix_class._applies_to + else: + schema_name = "sco" + tables.extend(generate_object_table(stix_class, self.metadata, schema_name, is_extension=True)) return tables def _instantiate_database(self): - self._create_table_objects() self.metadata.create_all(self.database_connection.engine) def generate_stix_schema(self): - tables = self._create_table_objects() - for t in tables: + for t in self.tables: print(CreateTable(t).compile(self.database_connection.engine)) def add(self, stix_data, version=None): - _add(self, stix_data, self.allow_custom, version) + _add(self, stix_data) add.__doc__ = _add.__doc__ + + def insert_object(self, stix_object): + schema_name = "sdo" if "created" in stix_object else "sco" + with self.database_connection.begin() as trans: + statements = generate_insert_for_object(self, stix_object, schema_name) + for stmt in statements: + print("executing: ", stmt) + trans.execute(stmt) + trans.commit() diff --git a/stix2/datastore/relational_db/relational_db_testing.py b/stix2/datastore/relational_db/relational_db_testing.py index 6859070f..695f735b 100644 --- a/stix2/datastore/relational_db/relational_db_testing.py +++ b/stix2/datastore/relational_db/relational_db_testing.py @@ -3,9 +3,6 @@ import pytz import stix2 -from stix2.datastore.relational_db.postgres_database_connection import ( - PostgresDatabaseConnection, -) from stix2.datastore.relational_db.relational_db import RelationalDBSink directory_stix_object = stix2.Directory( @@ -97,7 +94,7 @@ def file_example_with_PDFExt_Object(): def main(): - store = RelationalDBSink(PostgresDatabaseConnection("localhost", "stix-data-sink", "rpiazza")) + store = RelationalDBSink("postgresql://localhost/stix-data-sink") store.generate_stix_schema() diff --git a/stix2/datastore/relational_db/table_creation.py b/stix2/datastore/relational_db/table_creation.py index 5a7a872b..7b935fd1 100644 --- a/stix2/datastore/relational_db/table_creation.py +++ b/stix2/datastore/relational_db/table_creation.py @@ -5,6 +5,7 @@ Integer, LargeBinary, Table, Text, ) +from stix2.datastore.relational_db.add_method import add_method from stix2.properties import ( BinaryProperty, BooleanProperty, DictionaryProperty, EmbeddedObjectProperty, EnumProperty, ExtensionsProperty, FloatProperty, @@ -14,43 +15,7 @@ ) from stix2.v21.common import KillChainPhase -# Helps us know which data goes in core, and which in a type-specific table. -SCO_COMMON_PROPERTIES = { - "id", - # "type", - "spec_version", - "object_marking_refs", - "granular_markings", - "defanged", -} - - -# Helps us know which data goes in core, and which in a type-specific table. -SDO_COMMON_PROPERTIES = { - "id", - # "type", - "spec_version", - "object_marking_refs", - "granular_markings", - "defanged", - "created", - "modified", - "created_by_ref", - "revoked", - "labels", - "confidence", - "lang", - "external_references", -} - - -def canonicalize_table_name(table_name, is_sdo): - if is_sdo: - full_name = ("sdo" if is_sdo else "sco") + "." + table_name - else: - full_name = table_name - return full_name.replace("-", "_") - +from stix2.datastore.relational_db.utils import SCO_COMMON_PROPERTIES, SDO_COMMON_PROPERTIES, canonicalize_table_name def aux_table_property(prop, name, core_properties): if isinstance(prop, ListProperty) and name not in core_properties: @@ -70,6 +35,61 @@ def derive_column_name(prop): return "value" +def create_object_markings_refs_table(metadata, sco_or_sdo): + return create_ref_table(metadata, + {"marking_definition"}, + "common.object_marking_refs_" + sco_or_sdo, + "common.core_" + sco_or_sdo + ".id", + 0) + + +def create_ref_table(metadata, specifics, table_name, foreign_key_name, auth_type=0): + columns = list() + columns.append( + Column( + "id", + Text, + ForeignKey( + foreign_key_name, + ondelete="CASCADE", + ), + nullable=False, + ), + ) + columns.append(ref_column("ref_id", specifics, auth_type)) + return Table(table_name, metadata, *columns) + + +def create_hashes_table(name, metadata, schema_name, table_name): + columns = list() + columns.append( + Column( + "id", + Text, + ForeignKey( + canonicalize_table_name(table_name, schema_name) + ".id", + ondelete="CASCADE", + ), + nullable=False, + ), + ) + columns.append( + Column( + "hash_name", + Text, + nullable=False, + ), + ) + columns.append( + Column( + "hash_value", + Text, + nullable=False, + ), + ) + return Table(canonicalize_table_name(table_name + "_" + name, schema_name), metadata, *columns) + + def create_granular_markings_table(metadata, sco_or_sdo): return Table( "common.granular_marking_" + sco_or_sdo, @@ -102,7 +122,30 @@ def create_granular_markings_table(metadata, sco_or_sdo): ) -def create_core_table(metadata, sco_or_sdo): +def create_external_references_tables(metadata): + columns = [ + Column( + "id", + Text, + ForeignKey("common.core_sdo" + ".id", ondelete="CASCADE"), + CheckConstraint( + "id ~ '^[a-z][a-z0-9-]+[a-z0-9]--[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$'", + # noqa: E131 + ), + primary_key=True + ), + Column("source_name", Text), + Column("description", Text), + Column("url", Text), + Column("external_id", Text), + ] + return [ + Table("common.external_references", metadata, *columns), + # create_hashes_table("hashes", metadata, "common", "external_references") + ] + + +def create_core_table(metadata, schema_name): columns = [ Column( "id", @@ -116,7 +159,7 @@ def create_core_table(metadata, sco_or_sdo): Column("spec_version", Text, default="2.1"), Column("object_marking_ref", ARRAY(Text)), ] - if sco_or_sdo == "sdo": + if schema_name == "sdo": sdo_columns = [ Column( "created_by_ref", @@ -136,35 +179,12 @@ def create_core_table(metadata, sco_or_sdo): else: columns.append(Column("defanged", Boolean, default=False)), return Table( - "common.core_" + sco_or_sdo, + "common.core_" + schema_name, metadata, *columns ) -# _ALLOWABLE_CLASSES = get_all_subclasses(_STIXBase21) -# -# -# _ALLOWABLE_CLASSES.extend(get_all_subclasses(Property)) - - -def create_real_method_name(name, klass_name): - # if klass_name not in _ALLOWABLE_CLASSES: - # raise NameError - # split_up_klass_name = re.findall('[A-Z][^A-Z]*', klass_name) - # split_up_klass_name.remove("Type") - return name + "_" + "_".join([x.lower() for x in klass_name]) - - -def add_method(cls): - def decorator(fn): - method_name = fn.__name__ - fn.__name__ = create_real_method_name(fn.__name__, cls.__name__) - setattr(cls, method_name, fn) - return fn - return decorator - - @add_method(KillChainPhase) def determine_sql_type(self): return None @@ -252,14 +272,14 @@ def generate_table_information(self, name, **kwargs): # noqa: F811 @add_method(IDProperty) def generate_table_information(self, name, **kwargs): # noqa: F811 - foreign_key_column = "common.core_sdo.id" if kwargs.get("is_sdo") else "common.core_sco.id" + foreign_key_column = "common.core_sdo.id" if kwargs.get("schema") else "common.core_sco.id" table_name = kwargs.get("table_name") return Column( name, Text, ForeignKey(foreign_key_column, ondelete="CASCADE"), CheckConstraint( - f"{name} ~ '^{table_name}--[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$'", # noqa: E131 + f"{name} ~ '^{table_name}" + "--[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$'", # noqa: E131 ), primary_key=True, nullable=not (self.required), @@ -292,15 +312,14 @@ def generate_table_information(self, name, **kwargs): # noqa: F811 @add_method(DictionaryProperty) -def generate_table_information(self, name, metadata, is_sdo, table_name, is_extension=False, **kwargs): # noqa: F811 +def generate_table_information(self, name, metadata, schema_name, table_name, is_extension=False, **kwargs): # noqa: F811 columns = list() columns.append( Column( "id", - Integer if is_extension else Text, - ForeignKey(canonicalize_table_name(table_name, is_sdo) + ".id", ondelete="CASCADE"), - primary_key=True, + Text, + ForeignKey(canonicalize_table_name(table_name, schema_name) + ".id", ondelete="CASCADE"), ), ) columns.append( @@ -340,39 +359,12 @@ def generate_table_information(self, name, metadata, is_sdo, table_name, is_exte Integer, ), ) - return [Table(canonicalize_table_name(table_name + "_" + name, is_sdo), metadata, *columns)] + return [Table(canonicalize_table_name(table_name + "_" + name, schema_name), metadata, *columns)] @add_method(HashesProperty) -def generate_table_information(self, name, metadata, is_sdo, table_name, is_extension=False, **kwargs): # noqa: F811 - - columns = list() - columns.append( - Column( - "id", - Integer if is_extension else Text, - ForeignKey( - canonicalize_table_name(table_name, is_sdo) + ".id", - ondelete="CASCADE", - ), - primary_key=True, - ), - ) - columns.append( - Column( - "hash_name", - Text, - nullable=False, - ), - ) - columns.append( - Column( - "hash_value", - Text, - nullable=False, - ), - ) - return [Table(canonicalize_table_name(table_name + "_" + name, is_sdo), metadata, *columns)] +def generate_table_information(self, name, metadata, schema_name, table_name, is_extension=False, **kwargs): # noqa: F811 + return [create_hashes_table(name, metadata, schema_name, table_name)] @add_method(HexProperty) @@ -391,14 +383,14 @@ def generate_table_information(self, name, **kwargs): # noqa: F811 @add_method(ExtensionsProperty) -def generate_table_information(self, name, metadata, is_sdo, table_name, **kwargs): # noqa: F811 +def generate_table_information(self, name, metadata, schema_name, table_name, **kwargs): # noqa: F811 columns = list() columns.append( Column( "id", Text, - ForeignKey(canonicalize_table_name(table_name, is_sdo) + ".id", ondelete="CASCADE"), - primary_key=True, + ForeignKey(canonicalize_table_name(table_name, schema_name) + ".id", ondelete="CASCADE"), + nullable=False, ), ) columns.append( @@ -408,26 +400,24 @@ def generate_table_information(self, name, metadata, is_sdo, table_name, **kwarg nullable=False, ), ) - columns.append( - Column( - "ext_table_id", - Integer, - nullable=False, - ), - ) - return [Table(canonicalize_table_name(table_name + "_" + name, is_sdo), metadata, *columns)] + return [Table(canonicalize_table_name(table_name + "_" + name, schema_name), metadata, *columns)] -def ref_column(name, specifics): +def ref_column(name, specifics, auth_type=0): if specifics: - allowed_types = "|".join(specifics) - return Column( - name, - Text, - CheckConstraint( - f"{name} ~ '^({allowed_types})--[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$'", # noqa: E131 - ), - ) + types = "|".join(specifics) + if auth_type == 0: + constraint = \ + CheckConstraint( + f"{name} ~ '^({types})" + "--[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$'", # noqa: E131 + ) + else: + constraint = \ + CheckConstraint( + f"(NOT({name} ~ '^({types})') AND ({name} ~" + "'--[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[1-5][0-9a-fA-F]{3}-[89abAB][0-9a-fA-F]{3}-[0-9a-fA-F]{12}$')", + # noqa: E131 + ) + return Column(name, Text, constraint) else: return Column( name, @@ -438,12 +428,12 @@ def ref_column(name, specifics): @add_method(ReferenceProperty) def generate_table_information(self, name, **kwargs): # noqa: F811 - return ref_column(name, self.specifics) + return ref_column(name, self.specifics, self.auth_type) @add_method(EmbeddedObjectProperty) -def generate_table_information(self, name, metadata, is_sdo, table_name, is_extension=False, is_list=False, **kwargs): # noqa: F811 - return generate_object_table(self.type, metadata, is_sdo, table_name, is_extension, True, is_list) +def generate_table_information(self, name, metadata, schema_name, table_name, is_extension=False, is_list=False, **kwargs): # noqa: F811 + return generate_object_table(self.type, metadata, schema_name, table_name, is_extension, True, is_list) @add_method(ObjectReferenceProperty) @@ -452,54 +442,51 @@ def generate_table_information(self, name, **kwargs): # noqa: F811 raise ValueError(f"Property {name} in {table_name} is of type ObjectReferenceProperty, which is for STIX 2.0 only") +def sub_objects(prop_class): + for name, prop in prop_class.type._properties.items(): + if isinstance(prop, (HashesProperty, EmbeddedObjectProperty)): + return True + return False + + @add_method(ListProperty) -def generate_table_information(self, name, metadata, is_sdo, table_name, **kwargs): # noqa: F811 +def generate_table_information(self, name, metadata, schema_name, table_name, **kwargs): # noqa: F811 is_extension = kwargs.get('is_extension') tables = list() if isinstance(self.contained, ReferenceProperty): - columns = list() - columns.append( - Column( - "id", - Integer if is_extension else Text, - ForeignKey( - canonicalize_table_name(table_name, is_sdo) + ".id", - ondelete="CASCADE", - ), - primary_key=True, - ), - ) - columns.append(ref_column("ref_id", self.contained.specifics)) - return [Table(canonicalize_table_name(table_name + "_" + name, is_sdo), metadata, *columns)] + return [create_ref_table(metadata, + self.contained.specifics, + canonicalize_table_name(table_name + "_" + name, schema_name), + canonicalize_table_name(table_name, schema_name) + ".id", )] elif isinstance(self.contained, EmbeddedObjectProperty): columns = list() columns.append( Column( "id", - Integer if is_extension else Text, + Text, ForeignKey( - canonicalize_table_name(table_name, is_sdo) + ".id", + canonicalize_table_name(table_name, schema_name) + ".id", ondelete="CASCADE", ), - primary_key=True, ), ) columns.append( Column( "ref_id", - Integer if is_extension else Text, + Integer, + primary_key=True, nullable=False, ), ) - tables.append(Table(canonicalize_table_name(table_name + "_" + name, is_sdo), metadata, *columns)) + tables.append(Table(canonicalize_table_name(table_name + "_" + name, schema_name), metadata, *columns)) tables.extend( self.contained.generate_table_information( name, metadata, - False, - canonicalize_table_name(table_name + "_" + name, None), + schema_name, + canonicalize_table_name(table_name + "_" + name, None), # if sub_table_needed else canonicalize_table_name(table_name, None), is_extension, - is_list=True + is_list=True, ), ) return tables @@ -514,70 +501,78 @@ def generate_table_information(self, name, metadata, is_sdo, table_name, **kwarg ) -def generate_object_table(stix_object_class, metadata, is_sdo, foreign_key_name=None, is_extension=False, is_embedded_object=False, is_list=False): +def generate_object_table( + stix_object_class, metadata, schema_name, foreign_key_name=None, + is_extension=False, is_embedded_object=False, is_list=False, +): properties = stix_object_class._properties if hasattr(stix_object_class, "_type"): table_name = stix_object_class._type else: table_name = stix_object_class.__name__ - core_properties = SDO_COMMON_PROPERTIES if is_sdo else SCO_COMMON_PROPERTIES + if table_name.startswith("extension-definition"): + table_name = table_name[0:30] + core_properties = SDO_COMMON_PROPERTIES if schema_name else SCO_COMMON_PROPERTIES columns = list() tables = list() for name, prop in properties.items(): if name == 'id' or name not in core_properties: - col = prop.generate_table_information(name, - metadata=metadata, - is_sdo=is_sdo, - table_name=table_name, - is_extension=is_extension, - is_embedded_object=is_embedded_object, - is_list=is_list) + col = prop.generate_table_information( + name, + metadata=metadata, + schema_name=schema_name, + table_name=table_name, + is_extension=is_extension, + is_embedded_object=is_embedded_object, + is_list=is_list, + ) if col is not None and isinstance(col, Column): columns.append(col) if col is not None and isinstance(col, list): tables.extend(col) - if (is_extension and not is_embedded_object) or (is_extension and is_embedded_object and is_list): + if (is_extension and not is_embedded_object): # or (is_extension and is_embedded_object and is_list): columns.append( Column( "id", - Integer, + Text, # no Foreign Key because it could be for different tables primary_key=True, ), ) if foreign_key_name: - if is_extension and is_embedded_object and is_list: + if is_extension or (is_embedded_object and is_list): column = Column( - "ref_id", - Integer, + "id", + Integer if (is_embedded_object and is_list) else Text, ForeignKey( - canonicalize_table_name(foreign_key_name, is_sdo) + ".ref_id", + canonicalize_table_name(foreign_key_name, schema_name) + (".ref_id" if (is_embedded_object and is_list) else ".id"), ondelete="CASCADE", ), - primary_key=True, ) else: column = Column( "id", Text, ForeignKey( - canonicalize_table_name(foreign_key_name, is_sdo) + ".id", + canonicalize_table_name(foreign_key_name, schema_name) + ".id", ondelete="CASCADE", ), - primary_key=True, ) columns.append(column) - return [Table(canonicalize_table_name(table_name, is_sdo), metadata, *columns)] - else: - all_tables = [Table(canonicalize_table_name(table_name, is_sdo), metadata, *columns)] - all_tables.extend(tables) - return all_tables + + all_tables = [Table(canonicalize_table_name(table_name, schema_name), metadata, *columns)] + all_tables.extend(tables) + return all_tables def create_core_tables(metadata): - return [ + tables = [ create_core_table(metadata, "sdo"), create_granular_markings_table(metadata, "sdo"), create_core_table(metadata, "sco"), create_granular_markings_table(metadata, "sco"), + create_object_markings_refs_table(metadata, "sdo"), + create_object_markings_refs_table(metadata, "sco") ] + tables.extend(create_external_references_tables(metadata)) + return tables diff --git a/stix2/datastore/relational_db/utils.py b/stix2/datastore/relational_db/utils.py new file mode 100644 index 00000000..ffa90eaa --- /dev/null +++ b/stix2/datastore/relational_db/utils.py @@ -0,0 +1,35 @@ +# Helps us know which data goes in core, and which in a type-specific table. +SCO_COMMON_PROPERTIES = { + "id", + "type", + "spec_version", + "object_marking_refs", + "granular_markings", + "defanged" +} + +# Helps us know which data goes in core, and which in a type-specific table. +SDO_COMMON_PROPERTIES = { + "id", + "type", + "spec_version", + "object_marking_refs", + "granular_markings", + "defanged", + "created", + "modified", + "created_by_ref", + "revoked", + "labels", + "confidence", + "lang", + "external_references" +} + + +def canonicalize_table_name(table_name, schema_name): + if schema_name: + full_name = schema_name + "." + table_name + else: + full_name = table_name + return full_name.replace("-", "_") \ No newline at end of file diff --git a/stix2/v21/base.py b/stix2/v21/base.py index c6025a0b..5f14f1bb 100644 --- a/stix2/v21/base.py +++ b/stix2/v21/base.py @@ -28,12 +28,12 @@ def __init__(self, **kwargs): class _Extension(_Extension, _STIXBase21): extension_type = None + def __init__(self, applies_to="sco", **kwargs): super(_Extension, self).__init__(**kwargs) self._applies_to = applies_to - class _DomainObject(_DomainObject, _STIXBase21): pass