From ef3b950311fdfe57548a92823b9a4c49ed034320 Mon Sep 17 00:00:00 2001 From: cgu Date: Tue, 15 Dec 2020 15:07:11 -0500 Subject: [PATCH] Add correct cascade behavior for sqlalchemy --- ...9e3e18bcc_add_more_cascade_relationship.py | 38 +++++++++++ datahub/server/app/db.py | 8 ++- datahub/server/models/admin.py | 2 +- datahub/server/models/datadoc.py | 14 +++- datahub/server/models/metastore.py | 66 ++++++++++++++----- datahub/server/models/query_execution.py | 29 ++++++-- datahub/server/models/schedule.py | 2 +- datahub/server/models/tag.py | 4 +- datahub/server/models/user.py | 4 +- 9 files changed, 135 insertions(+), 32 deletions(-) create mode 100644 datahub/migrations/versions/b8a9e3e18bcc_add_more_cascade_relationship.py diff --git a/datahub/migrations/versions/b8a9e3e18bcc_add_more_cascade_relationship.py b/datahub/migrations/versions/b8a9e3e18bcc_add_more_cascade_relationship.py new file mode 100644 index 000000000..9d447e938 --- /dev/null +++ b/datahub/migrations/versions/b8a9e3e18bcc_add_more_cascade_relationship.py @@ -0,0 +1,38 @@ +"""add more cascade relationship + +Revision ID: b8a9e3e18bcc +Revises: 7d24ddb3edb9 +Create Date: 2020-12-15 19:54:55.906997 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = 'b8a9e3e18bcc' +down_revision = '7d24ddb3edb9' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_constraint('data_doc_editor_ibfk_1', 'data_doc_editor', type_='foreignkey') + op.create_foreign_key(None, 'data_doc_editor', 'data_doc', ['data_doc_id'], ['id'], ondelete='CASCADE') + op.drop_constraint('job_metadata_metastore_fk', 'data_job_metadata', type_='foreignkey') + op.create_foreign_key('job_metadata_metastore_fk', 'data_job_metadata', 'query_metastore', ['metastore_id'], ['id'], ondelete='CASCADE') + op.drop_constraint('data_schema_ibfk_1', 'data_schema', type_='foreignkey') + op.create_foreign_key(None, 'data_schema', 'query_metastore', ['metastore_id'], ['id'], ondelete='CASCADE') + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_constraint(None, 'data_schema', type_='foreignkey') + op.create_foreign_key('data_schema_ibfk_1', 'data_schema', 'query_metastore', ['metastore_id'], ['id']) + op.drop_constraint('job_metadata_metastore_fk', 'data_job_metadata', type_='foreignkey') + op.create_foreign_key('job_metadata_metastore_fk', 'data_job_metadata', 'query_metastore', ['metastore_id'], ['id']) + op.drop_constraint(None, 'data_doc_editor', type_='foreignkey') + op.create_foreign_key('data_doc_editor_ibfk_1', 'data_doc_editor', 'data_doc', ['data_doc_id'], ['id']) + # ### end Alembic commands ### diff --git a/datahub/server/app/db.py b/datahub/server/app/db.py index 566e3c1ea..809d1599a 100644 --- a/datahub/server/app/db.py +++ b/datahub/server/app/db.py @@ -46,6 +46,13 @@ def get_db_engine( def connect(dbapi_connection, connection_record): connection_record.info["pid"] = os.getpid() + if conn_string.startswith("sqlite"): + # Sqlite DB requires foreign keys to be turned on manually + # to ensure on delete cascade works + cursor = dbapi_connection.cursor() + cursor.execute("PRAGMA foreign_keys=ON") + cursor.close() + @event.listens_for(__engine, "checkout") def checkout(dbapi_connection, connection_record, connection_proxy): pid = os.getpid() @@ -66,7 +73,6 @@ def checkout(dbapi_connection, connection_record, connection_proxy): def get_session(scopefunc=None): """Create a global bound scoped_session - Returns: [type] -- [description] """ diff --git a/datahub/server/models/admin.py b/datahub/server/models/admin.py index b425ba77f..f39ec0fa4 100644 --- a/datahub/server/models/admin.py +++ b/datahub/server/models/admin.py @@ -201,4 +201,4 @@ class AdminAuditLog(CRUDMixin, Base): op = sql.Column(sql.Enum(AdminOperation), nullable=False) log = sql.Column(sql.String(length=description_length)) - relationship("User", uselist=False) + user = relationship("User", uselist=False) diff --git a/datahub/server/models/datadoc.py b/datahub/server/models/datadoc.py index 95dafb9b8..1959c436a 100644 --- a/datahub/server/models/datadoc.py +++ b/datahub/server/models/datadoc.py @@ -43,7 +43,9 @@ class DataDoc(Base, CRUDMixin): owner = relationship("User", uselist=False) environment = relationship( - "Environment", uselist=False, backref=backref("data_docs"), + "Environment", + uselist=False, + backref=backref("data_docs", cascade="all, delete", passive_deletes=True), ) def to_dict(self, with_cells=False): @@ -131,7 +133,9 @@ class DataDocEditor(Base): ) id = sql.Column(sql.Integer, primary_key=True, autoincrement=True) - data_doc_id = sql.Column(sql.Integer, sql.ForeignKey("data_doc.id")) + data_doc_id = sql.Column( + sql.Integer, sql.ForeignKey("data_doc.id", ondelete="CASCADE") + ) uid = sql.Column(sql.Integer, sql.ForeignKey("user.id", ondelete="CASCADE")) read = sql.Column(sql.Boolean, default=False, nullable=False) @@ -139,7 +143,11 @@ class DataDocEditor(Base): user = relationship("User", uselist=False) - data_doc = relationship("DataDoc", uselist=False, backref=backref("editors")) + data_doc = relationship( + "DataDoc", + uselist=False, + backref=backref("editors", cascade="all, delete", passive_deletes=True), + ) def to_dict(self): return { diff --git a/datahub/server/models/metastore.py b/datahub/server/models/metastore.py index 5603f7cf9..807aedb78 100644 --- a/datahub/server/models/metastore.py +++ b/datahub/server/models/metastore.py @@ -51,16 +51,19 @@ class TableLineage(Base): table = relationship( "DataTable", - backref=backref("table_lineage", cascade="all, delete"), + backref=backref("table_lineage", cascade="all, delete", passive_deletes=True), foreign_keys=[table_id], ) parent_table = relationship( "DataTable", - backref=backref("parent_table_lineage", cascade="all, delete"), + backref=backref( + "parent_table_lineage", cascade="all, delete", passive_deletes=True + ), foreign_keys=[parent_table_id], ) job_metadata = relationship( - "DataJobMetadata", backref=backref("table_lineage", passive_deletes=True) + "DataJobMetadata", + backref=backref("table_lineage", cascade="all, delete", passive_deletes=True), ) def to_dict(self, include_table=False): @@ -95,10 +98,13 @@ class DataJobMetadata(Base): metastore_id = sql.Column( sql.Integer, - sql.ForeignKey("query_metastore.id", name="job_metadata_metastore_fk"), + sql.ForeignKey( + "query_metastore.id", name="job_metadata_metastore_fk", ondelete="CASCADE" + ), ) metastore = relationship( - "QueryMetastore", backref=backref("job_metadata", passive_deletes=True) + "QueryMetastore", + backref=backref("job_metadata", cascade="all, delete", passive_deletes=True), ) def to_dict(self): @@ -125,10 +131,17 @@ class DataSchema(Base): table_count = sql.Column(sql.Integer) description = sql.Column(sql.Text(length=mediumtext_length)) - metastore_id = sql.Column(sql.Integer, sql.ForeignKey("query_metastore.id")) - metastore = relationship("QueryMetastore", backref="schemas") + metastore_id = sql.Column( + sql.Integer, sql.ForeignKey("query_metastore.id", ondelete="CASCADE") + ) + metastore = relationship( + "QueryMetastore", + backref=backref("schemas", cascade="all, delete", passive_deletes=True), + ) - tables = relationship("DataTable", backref="data_schema") + tables = relationship( + "DataTable", backref="data_schema", cascade="all, delete", passive_deletes=True + ) def to_dict(self, include_metastore=False, include_table=False): schema_dict = { @@ -172,13 +185,24 @@ class DataTable(Base, CRUDMixin): sql.Integer, sql.ForeignKey("data_schema.id", ondelete="CASCADE") ) golden = sql.Column(sql.Boolean, default=False) - boost_score = sql.Column(sql.Numeric, default=1) + boost_score = sql.Column(sql.Numeric, default=1, nullable=False) information = relationship( - "DataTableInformation", uselist=False, backref="data_table" + "DataTableInformation", + uselist=False, + backref="data_table", + cascade="all, delete", + passive_deletes=True, + ) + columns = relationship( + "DataTableColumn", + backref="data_table", + cascade="all, delete", + passive_deletes=True, + ) + ownership = relationship( + "DataTableOwnership", uselist=False, cascade="all, delete", passive_deletes=True ) - columns = relationship("DataTableColumn", backref="data_table") - ownership = relationship("DataTableOwnership", uselist=False) def to_dict( self, include_schema=False, include_column=False, include_warnings=False, @@ -319,12 +343,16 @@ class DataTableQueryExecution(Base, CRUDMixin): table = relationship( "DataTable", - backref=backref("table_query_execution", cascade="all, delete"), + backref=backref( + "table_query_execution", cascade="all, delete", passive_deletes=True + ), foreign_keys=[table_id], ) query_execution = relationship( "QueryExecution", - backref=backref("table_query_execution", cascade="all, delete"), + backref=backref( + "table_query_execution", cascade="all, delete", passive_deletes=True + ), foreign_keys=[query_execution_id], ) @@ -346,7 +374,7 @@ class DataTableWarning(Base, CRUDMixin): table = relationship( "DataTable", - backref=backref("warnings", cascade="all, delete"), + backref=backref("warnings", cascade="all, delete", passive_deletes=True), foreign_keys=[table_id], ) @@ -364,7 +392,9 @@ class DataTableStatistics(Base, CRUDMixin): table = relationship( "DataTable", - backref=backref("table_statistics", cascade="all, delete"), + backref=backref( + "table_statistics", cascade="all, delete", passive_deletes=True + ), foreign_keys=[table_id], ) @@ -384,6 +414,8 @@ class DataTableColumnStatistics(Base, CRUDMixin): column = relationship( "DataTableColumn", - backref=backref("table_statistics", cascade="all, delete"), + backref=backref( + "table_statistics", cascade="all, delete", passive_deletes=True + ), foreign_keys=[column_id], ) diff --git a/datahub/server/models/query_execution.py b/datahub/server/models/query_execution.py index 3611487ab..152b7f414 100644 --- a/datahub/server/models/query_execution.py +++ b/datahub/server/models/query_execution.py @@ -36,12 +36,29 @@ class QueryExecution(Base): uid = sql.Column(sql.Integer, sql.ForeignKey("user.id", ondelete="CASCADE")) owner = relationship("User", uselist=False) - engine = relationship("QueryEngine", uselist=False) - statement_executions = relationship("StatementExecution", backref="query_execution") + engine = relationship( + "QueryEngine", + uselist=False, + backref=backref("executions", cascade="all, delete", passive_deletes=True), + ) + statement_executions = relationship( + "StatementExecution", + backref="query_execution", + cascade="all, delete", + passive_deletes=True, + ) notifications = relationship( - "QueryExecutionNotification", backref="query_execution" + "QueryExecutionNotification", + backref="query_execution", + cascade="all, delete", + passive_deletes=True, + ) + error = relationship( + "QueryExecutionError", + uselist=False, + cascade="all, delete", + passive_deletes=True, ) - error = relationship("QueryExecutionError", uselist=False) @with_formatted_date def to_dict(self, with_statement=True): @@ -188,5 +205,7 @@ class QueryExecutionViewer(CRUDMixin, Base): creator = relationship("User", foreign_keys="QueryExecutionViewer.created_by") created_at = sql.Column(sql.DateTime, default=now, nullable=False) query_execution = relationship( - "QueryExecution", uselist=False, backref=backref("viewers") + "QueryExecution", + uselist=False, + backref=backref("viewers", cascade="all, delete", passive_deletes=True), ) diff --git a/datahub/server/models/schedule.py b/datahub/server/models/schedule.py index 4ea4a5c7f..cc1413fdd 100644 --- a/datahub/server/models/schedule.py +++ b/datahub/server/models/schedule.py @@ -117,6 +117,6 @@ class TaskRunRecord(CRUDMixin, TruncateString("error_message"), db.Base): task = relationship( "TaskSchedule", - backref=backref("task_run_record", cascade="all, delete"), + backref=backref("task_run_record", cascade="all, delete", passive_deletes=True), foreign_keys=[name], ) diff --git a/datahub/server/models/tag.py b/datahub/server/models/tag.py index 014c62b3c..0eaab4495 100644 --- a/datahub/server/models/tag.py +++ b/datahub/server/models/tag.py @@ -46,11 +46,11 @@ class TagItem(CRUDMixin, Base): tag = relationship( "Tag", - backref=backref("tag_item", cascade="all, delete"), + backref=backref("tag_item", cascade="all, delete", passive_deletes=True), foreign_keys=[tag_name], ) table = relationship( "DataTable", - backref=backref("tags", cascade="all, delete"), + backref=backref("tags", cascade="all, delete", passive_deletes=True), foreign_keys=[table_id], ) diff --git a/datahub/server/models/user.py b/datahub/server/models/user.py index 09267f4f8..5b0666ce3 100644 --- a/datahub/server/models/user.py +++ b/datahub/server/models/user.py @@ -34,8 +34,8 @@ class User(CRUDMixin, Base): properties = sql.Column(sql.JSON, default={}) - settings = relationship("UserSetting") - roles = relationship("UserRole") + settings = relationship("UserSetting", cascade="all, delete", passive_deletes=True) + roles = relationship("UserRole", cascade="all, delete", passive_deletes=True) @hybrid_property def password(self):