From d7e982466b186645cfefaa8fd2fc55b1fd53e6bf Mon Sep 17 00:00:00 2001 From: Mayuri N Date: Tue, 19 Nov 2024 20:59:32 +0530 Subject: [PATCH 01/13] feat(ingest/mssql): include stored procedure lineage --- .../ingestion/source/sql/mssql/source.py | 43 +++++++++++- .../sql/mssql/stored_procedure_lineage.py | 68 +++++++++++++++++++ .../integration/sql_server/docker-compose.yml | 2 +- 3 files changed, 110 insertions(+), 3 deletions(-) create mode 100644 metadata-ingestion/src/datahub/ingestion/source/sql/mssql/stored_procedure_lineage.py diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/source.py b/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/source.py index c19b22a8622ca..51a74b8ed8d3d 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/source.py @@ -24,6 +24,8 @@ platform_name, support_status, ) +from datahub.ingestion.api.source import StructuredLogLevel +from datahub.ingestion.api.source_helpers import auto_workunit from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.ingestion.source.sql.mssql.job_models import ( JobStep, @@ -36,6 +38,9 @@ ProcedureParameter, StoredProcedure, ) +from datahub.ingestion.source.sql.mssql.stored_procedure_lineage import ( + add_procedure_to_aggregator, +) from datahub.ingestion.source.sql.sql_common import ( SQLAlchemySource, SqlWorkUnit, @@ -51,6 +56,7 @@ StringTypeClass, UnionTypeClass, ) +from datahub.sql_parsing.sql_parsing_aggregator import SqlParsingAggregator logger: logging.Logger = logging.getLogger(__name__) @@ -99,6 +105,10 @@ class SQLServerConfig(BasicSQLAlchemyConfig): default=False, description="Enable to convert the SQL Server assets urns to lowercase", ) + include_lineage: bool = Field( + default=True, + description="Enable lineage extraction for views and stored procedures", + ) @pydantic.validator("uri_args") def passwords_match(cls, v, values, **kwargs): @@ -161,6 +171,17 @@ def __init__(self, config: SQLServerConfig, ctx: PipelineContext): self.current_database = None self.table_descriptions: Dict[str, str] = {} self.column_descriptions: Dict[str, str] = {} + self.sql_aggregator = SqlParsingAggregator( + platform=self.platform, + env=self.config.env, + schema_resolver=self.schema_resolver, + graph=ctx.graph, + generate_lineage=self.config.include_lineage, + generate_queries=True, + generate_usage_statistics=False, + generate_operations=False, + generate_query_usage_statistics=False, + ) if self.config.include_descriptions: for inspector in self.get_inspectors(): db_name: str = self.get_db_name(inspector) @@ -429,8 +450,22 @@ def loop_stored_procedures( # noqa: C901 ) if procedure_definition: data_job.add_property("definition", procedure_definition) - if sql_config.include_stored_procedures_code and procedure_code: - data_job.add_property("code", procedure_code) + if procedure_code: + if self.config.include_lineage: + with self.report.report_exc( + message="Failed to parse stored procedure lineage", + context=procedure.full_name, + level=StructuredLogLevel.WARN, + ): + add_procedure_to_aggregator( + aggregator=self.sql_aggregator, + procedure_code=procedure_code, + default_db=db_name, + default_schema=schema, + procedure_job_urn=data_job.urn, + ) + if self.config.include_stored_procedures_code: + data_job.add_property("code", procedure_code) procedure_inputs = self._get_procedure_inputs(conn, procedure) properties = self._get_procedure_properties(conn, procedure) data_job.add_property( @@ -664,3 +699,7 @@ def get_identifier( if self.config.convert_urns_to_lowercase else qualified_table_name ) + + def get_workunits_internal(self) -> Iterable[Union[MetadataWorkUnit, SqlWorkUnit]]: + yield from super().get_workunits_internal() + yield from auto_workunit(self.sql_aggregator.gen_metadata()) diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/stored_procedure_lineage.py b/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/stored_procedure_lineage.py new file mode 100644 index 0000000000000..dd7ef944f4803 --- /dev/null +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/stored_procedure_lineage.py @@ -0,0 +1,68 @@ +import re +from typing import Iterable + +from datahub.sql_parsing.sql_parsing_aggregator import ( + ObservedQuery, + SqlParsingAggregator, +) + + +def split_procedure_into_queries(sql: str) -> Iterable[str]: + # cleanup syntax + sql = sql.replace("= @RunID", "= 'hardcoded-run-id'") + sql = sql.replace(".Union.", ".[Union].") + + # split on statements + sql = sql.replace(",\n\n", ",\n") + sql = sql.replace("GO", "\n\n") + sql = re.sub(r"\ndeclare", "\n\ndeclare", sql, flags=re.IGNORECASE) + sql = re.sub(r"\ncreate\s+index", "\n\ncreate index", sql, flags=re.IGNORECASE) + + sqls = sql.split("\n\n") + + for i, chunk in enumerate(sqls): + chunk = chunk.strip().rstrip(";") + "\n;" + + # If there's an "IF" statement with a begin and end, just take the inner part. + # if ( + # i == 12 + # and file.stem == "Portland.Tribeca.sprDW_LiquidityTermDeposits-sanitised" + # ): + # breakpoint() + chunk = re.sub( + r"IF.*BEGIN(.*)END", + r"\1", + chunk, + flags=re.DOTALL | re.MULTILINE | re.IGNORECASE, + ) + + # Remove all lines starting with `IF`, `WHILE`, `BEGIN`, or `END`. + chunk = re.sub(r"^IF.*$", "", chunk, flags=re.MULTILINE | re.IGNORECASE) + chunk = re.sub(r"^BEGIN.*$", "", chunk, flags=re.MULTILINE | re.IGNORECASE) + chunk = re.sub(r"^WHILE.*$", "", chunk, flags=re.MULTILINE | re.IGNORECASE) + chunk = re.sub(r"^END$", "", chunk, flags=re.MULTILINE | re.IGNORECASE) + chunk = re.sub(r"^END.*--.*$", "", chunk, flags=re.MULTILINE) + + chunk = chunk.strip() + if not chunk or chunk == ";": + continue + + yield chunk + + +# Is procedure handling generic enough to be added to SqlParsingAggregator? +def add_procedure_to_aggregator( + *, + aggregator: SqlParsingAggregator, + procedure_code: str, + default_db: str, + default_schema: str, + procedure_job_urn: str, +) -> None: + for query in split_procedure_into_queries(procedure_code): + aggregator.add_observed_query( + observed=ObservedQuery( + default_db=default_db, default_schema=default_schema, query=query + ) + ) + # TODO: finalize and use data job urn as required diff --git a/metadata-ingestion/tests/integration/sql_server/docker-compose.yml b/metadata-ingestion/tests/integration/sql_server/docker-compose.yml index 1046321e4f720..aed70503903c0 100644 --- a/metadata-ingestion/tests/integration/sql_server/docker-compose.yml +++ b/metadata-ingestion/tests/integration/sql_server/docker-compose.yml @@ -1,7 +1,7 @@ version: "3" services: testsqlserver: - image: "mcr.microsoft.com/mssql/server:latest" + image: "mcr.microsoft.com/mssql/server:2022-latest" platform: linux/amd64 container_name: "testsqlserver" environment: From 12002c3992cc2602fc37d43e800a42b02a198f57 Mon Sep 17 00:00:00 2001 From: Mayuri N Date: Thu, 21 Nov 2024 18:52:49 +0530 Subject: [PATCH 02/13] lineage via job and tests --- .../ingestion/source/sql/mssql/job_models.py | 1 + .../ingestion/source/sql/mssql/source.py | 137 +- .../sql/mssql/stored_procedure_lineage.py | 75 +- .../ingestion/source/sql/sql_common.py | 9 +- .../src/datahub/sql_parsing/datajob.py | 57 + .../datahub/sql_parsing/split_statements.py | 176 ++ .../sql_parsing/sql_parsing_aggregator.py | 1 - .../golden_mces_mssql_no_db_to_file.json | 1026 ++++-- .../golden_mces_mssql_no_db_with_filter.json | 484 ++- .../golden_mces_mssql_to_file.json | 484 ++- ...golden_mces_mssql_with_lower_case_urn.json | 2778 +++++++++++++++-- .../procedures/DemoData.Foo.NewProc.json | 21 + .../procedures/DemoData.Foo.NewProc.sql | 29 + .../integration/sql_server/setup/setup.sql | 38 +- .../mssql_with_lower_case_urn.yml | 1 - .../integration/sql_server/test_sql_server.py | 61 + 16 files changed, 4450 insertions(+), 928 deletions(-) create mode 100644 metadata-ingestion/src/datahub/sql_parsing/datajob.py create mode 100644 metadata-ingestion/src/datahub/sql_parsing/split_statements.py create mode 100644 metadata-ingestion/tests/integration/sql_server/golden_files/procedures/DemoData.Foo.NewProc.json create mode 100644 metadata-ingestion/tests/integration/sql_server/procedures/DemoData.Foo.NewProc.sql diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/job_models.py b/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/job_models.py index 21e7fad334331..5107a4e38f64d 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/job_models.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/job_models.py @@ -101,6 +101,7 @@ class StoredProcedure: flow: Union[MSSQLJob, MSSQLProceduresContainer] type: str = "STORED_PROCEDURE" source: str = "mssql" + code: Optional[str] = None @property def full_type(self) -> str: diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/source.py b/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/source.py index 51a74b8ed8d3d..2bdf265067376 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/source.py @@ -39,7 +39,7 @@ StoredProcedure, ) from datahub.ingestion.source.sql.mssql.stored_procedure_lineage import ( - add_procedure_to_aggregator, + generate_procedure_lineage, ) from datahub.ingestion.source.sql.sql_common import ( SQLAlchemySource, @@ -107,7 +107,7 @@ class SQLServerConfig(BasicSQLAlchemyConfig): ) include_lineage: bool = Field( default=True, - description="Enable lineage extraction for views and stored procedures", + description="Enable lineage extraction for stored procedures", ) @pydantic.validator("uri_args") @@ -171,17 +171,6 @@ def __init__(self, config: SQLServerConfig, ctx: PipelineContext): self.current_database = None self.table_descriptions: Dict[str, str] = {} self.column_descriptions: Dict[str, str] = {} - self.sql_aggregator = SqlParsingAggregator( - platform=self.platform, - env=self.config.env, - schema_resolver=self.schema_resolver, - graph=ctx.graph, - generate_lineage=self.config.include_lineage, - generate_queries=True, - generate_usage_statistics=False, - generate_operations=False, - generate_query_usage_statistics=False, - ) if self.config.include_descriptions: for inspector in self.get_inspectors(): db_name: str = self.get_db_name(inspector) @@ -191,6 +180,20 @@ def __init__(self, config: SQLServerConfig, ctx: PipelineContext): self._populate_table_descriptions(conn, db_name) self._populate_column_descriptions(conn, db_name) + def new_sql_aggregator(self) -> SqlParsingAggregator: + return SqlParsingAggregator( + platform=self.platform, + env=self.config.env, + schema_resolver=self.schema_resolver, + graph=self.ctx.graph, + generate_lineage=self.config.include_lineage, + generate_queries=False, + generate_usage_statistics=False, + generate_operations=False, + generate_query_subject_fields=False, + generate_query_usage_statistics=False, + ) + @staticmethod def _add_output_converters(conn: Connection) -> None: def handle_sql_variant_as_string(value): @@ -433,51 +436,57 @@ def loop_stored_procedures( # noqa: C901 if procedures: yield from self.construct_flow_workunits(data_flow=data_flow) for procedure in procedures: - upstream = self._get_procedure_upstream(conn, procedure) - downstream = self._get_procedure_downstream(conn, procedure) - data_job = MSSQLDataJob( - entity=procedure, - ) - # TODO: because of this upstream and downstream are more dependencies, - # can't be used as DataJobInputOutput. - # Should be reorganized into lineage. - data_job.add_property("procedure_depends_on", str(upstream.as_property)) - data_job.add_property( - "depending_on_procedure", str(downstream.as_property) - ) - procedure_definition, procedure_code = self._get_procedure_code( - conn, procedure - ) - if procedure_definition: - data_job.add_property("definition", procedure_definition) - if procedure_code: - if self.config.include_lineage: - with self.report.report_exc( - message="Failed to parse stored procedure lineage", - context=procedure.full_name, - level=StructuredLogLevel.WARN, - ): - add_procedure_to_aggregator( - aggregator=self.sql_aggregator, - procedure_code=procedure_code, - default_db=db_name, - default_schema=schema, - procedure_job_urn=data_job.urn, - ) - if self.config.include_stored_procedures_code: - data_job.add_property("code", procedure_code) - procedure_inputs = self._get_procedure_inputs(conn, procedure) - properties = self._get_procedure_properties(conn, procedure) - data_job.add_property( - "input parameters", str([param.name for param in procedure_inputs]) - ) - for param in procedure_inputs: - data_job.add_property( - f"parameter {param.name}", str(param.properties) + yield from self._process_stored_procedure(conn, procedure) + + def _process_stored_procedure( + self, conn: Connection, procedure: StoredProcedure + ) -> Iterable[MetadataWorkUnit]: + upstream = self._get_procedure_upstream(conn, procedure) + downstream = self._get_procedure_downstream(conn, procedure) + data_job = MSSQLDataJob( + entity=procedure, + ) + # TODO: because of this upstream and downstream are more dependencies, + # can't be used as DataJobInputOutput. + # Should be reorganized into lineage. + data_job.add_property("procedure_depends_on", str(upstream.as_property)) + data_job.add_property("depending_on_procedure", str(downstream.as_property)) + procedure_definition, procedure_code = self._get_procedure_code(conn, procedure) + procedure.code = procedure_code + if procedure_definition: + data_job.add_property("definition", procedure_definition) + if procedure_code and self.config.include_stored_procedures_code: + data_job.add_property("code", procedure_code) + procedure_inputs = self._get_procedure_inputs(conn, procedure) + properties = self._get_procedure_properties(conn, procedure) + data_job.add_property( + "input parameters", str([param.name for param in procedure_inputs]) + ) + for param in procedure_inputs: + data_job.add_property(f"parameter {param.name}", str(param.properties)) + for property_name, property_value in properties.items(): + data_job.add_property(property_name, str(property_value)) + if self.config.include_lineage: + with self.report.report_exc( + message="Failed to parse stored procedure lineage", + context=procedure.full_name, + level=StructuredLogLevel.WARN, + ): + aggregator = self.new_sql_aggregator() + yield from auto_workunit( + generate_procedure_lineage( + aggregator=aggregator, + procedure=procedure, + procedure_job_urn=data_job.urn, ) - for property_name, property_value in properties.items(): - data_job.add_property(property_name, str(property_value)) - yield from self.construct_job_workunits(data_job) + ) + if aggregator.report.num_observed_queries_failed: + raise + yield from self.construct_job_workunits( + data_job, + # For stored procedure lineage is ingested above + include_lineage=False, + ) @staticmethod def _get_procedure_downstream( @@ -637,16 +646,18 @@ def _get_stored_procedures( def construct_job_workunits( self, data_job: MSSQLDataJob, + include_lineage: bool = True, ) -> Iterable[MetadataWorkUnit]: yield MetadataChangeProposalWrapper( entityUrn=data_job.urn, aspect=data_job.as_datajob_info_aspect, ).as_workunit() - yield MetadataChangeProposalWrapper( - entityUrn=data_job.urn, - aspect=data_job.as_datajob_input_output_aspect, - ).as_workunit() + if include_lineage: + yield MetadataChangeProposalWrapper( + entityUrn=data_job.urn, + aspect=data_job.as_datajob_input_output_aspect, + ).as_workunit() # TODO: Add SubType when it appear def construct_flow_workunits( @@ -699,7 +710,3 @@ def get_identifier( if self.config.convert_urns_to_lowercase else qualified_table_name ) - - def get_workunits_internal(self) -> Iterable[Union[MetadataWorkUnit, SqlWorkUnit]]: - yield from super().get_workunits_internal() - yield from auto_workunit(self.sql_aggregator.gen_metadata()) diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/stored_procedure_lineage.py b/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/stored_procedure_lineage.py index dd7ef944f4803..5894b2d5fb6fa 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/stored_procedure_lineage.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/stored_procedure_lineage.py @@ -1,68 +1,35 @@ -import re from typing import Iterable +from datahub.emitter.mcp import MetadataChangeProposalWrapper +from datahub.ingestion.source.sql.mssql.job_models import StoredProcedure +from datahub.sql_parsing.datajob import to_job_lineage +from datahub.sql_parsing.split_statements import split_statements from datahub.sql_parsing.sql_parsing_aggregator import ( ObservedQuery, SqlParsingAggregator, ) -def split_procedure_into_queries(sql: str) -> Iterable[str]: - # cleanup syntax - sql = sql.replace("= @RunID", "= 'hardcoded-run-id'") - sql = sql.replace(".Union.", ".[Union].") - - # split on statements - sql = sql.replace(",\n\n", ",\n") - sql = sql.replace("GO", "\n\n") - sql = re.sub(r"\ndeclare", "\n\ndeclare", sql, flags=re.IGNORECASE) - sql = re.sub(r"\ncreate\s+index", "\n\ncreate index", sql, flags=re.IGNORECASE) - - sqls = sql.split("\n\n") - - for i, chunk in enumerate(sqls): - chunk = chunk.strip().rstrip(";") + "\n;" - - # If there's an "IF" statement with a begin and end, just take the inner part. - # if ( - # i == 12 - # and file.stem == "Portland.Tribeca.sprDW_LiquidityTermDeposits-sanitised" - # ): - # breakpoint() - chunk = re.sub( - r"IF.*BEGIN(.*)END", - r"\1", - chunk, - flags=re.DOTALL | re.MULTILINE | re.IGNORECASE, - ) - - # Remove all lines starting with `IF`, `WHILE`, `BEGIN`, or `END`. - chunk = re.sub(r"^IF.*$", "", chunk, flags=re.MULTILINE | re.IGNORECASE) - chunk = re.sub(r"^BEGIN.*$", "", chunk, flags=re.MULTILINE | re.IGNORECASE) - chunk = re.sub(r"^WHILE.*$", "", chunk, flags=re.MULTILINE | re.IGNORECASE) - chunk = re.sub(r"^END$", "", chunk, flags=re.MULTILINE | re.IGNORECASE) - chunk = re.sub(r"^END.*--.*$", "", chunk, flags=re.MULTILINE) - - chunk = chunk.strip() - if not chunk or chunk == ";": - continue - - yield chunk - - # Is procedure handling generic enough to be added to SqlParsingAggregator? -def add_procedure_to_aggregator( +def generate_procedure_lineage( *, aggregator: SqlParsingAggregator, - procedure_code: str, - default_db: str, - default_schema: str, + procedure: StoredProcedure, procedure_job_urn: str, -) -> None: - for query in split_procedure_into_queries(procedure_code): - aggregator.add_observed_query( - observed=ObservedQuery( - default_db=default_db, default_schema=default_schema, query=query +) -> Iterable[MetadataChangeProposalWrapper]: + if procedure.code: + for query in split_statements(procedure.code): + print(query) + aggregator.add_observed_query( + observed=ObservedQuery( + default_db=procedure.db, + default_schema=procedure.schema, + query=query, + ) ) + mcps = list(aggregator.gen_metadata()) + yield from to_job_lineage( + job_urn=procedure_job_urn, + mcps=mcps, + ignore_extra_mcps=True, ) - # TODO: finalize and use data job urn as required diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py index 238fd88f1c950..aae3b3a48accb 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py @@ -830,7 +830,7 @@ def _process_table( self._classify(dataset_name, schema, table, data_reader, schema_metadata) dataset_snapshot.aspects.append(schema_metadata) - if self.config.include_view_lineage: + if self._save_schema_to_resolver(): self.schema_resolver.add_schema_metadata(dataset_urn, schema_metadata) db_name = self.get_db_name(inspector) @@ -1125,7 +1125,7 @@ def _process_view( columns, canonical_schema=schema_fields, ) - if self.config.include_view_lineage: + if self._save_schema_to_resolver(): self.schema_resolver.add_schema_metadata(dataset_urn, schema_metadata) description, properties, _ = self.get_table_properties(inspector, schema, view) try: @@ -1189,6 +1189,11 @@ def _process_view( domain_registry=self.domain_registry, ) + def _save_schema_to_resolver(self): + return self.config.include_view_lineage or ( + hasattr(self.config, "include_lineage") and self.config.include_lineage + ) + def _run_sql_parser( self, view_identifier: str, query: str, schema_resolver: SchemaResolver ) -> Optional[SqlParsingResult]: diff --git a/metadata-ingestion/src/datahub/sql_parsing/datajob.py b/metadata-ingestion/src/datahub/sql_parsing/datajob.py new file mode 100644 index 0000000000000..7b638521f8491 --- /dev/null +++ b/metadata-ingestion/src/datahub/sql_parsing/datajob.py @@ -0,0 +1,57 @@ +import logging +from typing import Iterable, List + +from datahub.emitter.mcp import MetadataChangeProposalWrapper +from datahub.metadata._schema_classes import FineGrainedLineageClass +from datahub.metadata.schema_classes import ( + DataJobInputOutputClass, + UpstreamLineageClass, +) + +logger = logging.getLogger(__name__) + + +def to_job_lineage( + job_urn: str, + mcps: Iterable[MetadataChangeProposalWrapper], + ignore_extra_mcps: bool = True, +) -> Iterable[MetadataChangeProposalWrapper]: + inputDatasets: List[str] = [] + outputDatasets: List[str] = [] + fineGrainedLineages: List[FineGrainedLineageClass] = [] + for mcp in mcps: + + # TODO: Simple write operations without lineage as outputDatasets + + upstream_lineage = mcp.as_workunit().get_aspect_of_type(UpstreamLineageClass) + if upstream_lineage is not None: + + if mcp.entityUrn and mcp.entityUrn not in outputDatasets: + outputDatasets.append(mcp.entityUrn) + + for upstream in upstream_lineage.upstreams: + if upstream.dataset not in inputDatasets: + inputDatasets.append(upstream.dataset) + + if upstream_lineage.fineGrainedLineages: + for fineGrainedLineage in upstream_lineage.fineGrainedLineages: + fineGrainedLineages.append(fineGrainedLineage) + + else: + if ignore_extra_mcps: + logger.warning( + f"Ignoring mcp {mcp.entityUrn}-{mcp.aspectName} with no lineage" + ) + else: + yield mcp + + if inputDatasets or outputDatasets: + # we have job lineage + yield MetadataChangeProposalWrapper( + entityUrn=job_urn, + aspect=DataJobInputOutputClass( + inputDatasets=inputDatasets, + outputDatasets=outputDatasets, + fineGrainedLineages=fineGrainedLineages, + ), + ) diff --git a/metadata-ingestion/src/datahub/sql_parsing/split_statements.py b/metadata-ingestion/src/datahub/sql_parsing/split_statements.py new file mode 100644 index 0000000000000..ec7f5c20adba6 --- /dev/null +++ b/metadata-ingestion/src/datahub/sql_parsing/split_statements.py @@ -0,0 +1,176 @@ +import re +from enum import Enum +from typing import Generator, List + +CONTROL_FLOW_KEYWORDS = [ + "GO", + r"BEGIN\w+TRY", + r"BEGIN\w+CATCH", + "BEGIN", + r"END\w+TRY", + r"END\w+CATCH", + "END", +] + +FORCE_NEW_STATEMENT_KEYWORDS = [ + # SELECT is used inside queries as well, so we can't include it here. + "INSERT", + "UPDATE", + "DELETE", + "MERGE", +] + + +class ParserState(Enum): + NORMAL = 1 + STRING = 2 + COMMENT = 3 + MULTILINE_COMMENT = 4 + + +def is_keyword_at_position(sql: str, pos: int, keyword: str) -> bool: + """ + Check if a keyword exists at the given position using regex word boundaries. + """ + if pos + len(keyword) > len(sql): + return False + + # If we're not at a word boundary, we can't generate a keyword. + if pos > 0 and not ( + bool(re.match(r"\w\W", sql[pos - 1 : pos + 1])) + or bool(re.match(r"\W\w", sql[pos - 1 : pos + 1])) + ): + return False + + pattern = rf"^{re.escape(keyword)}\b" + match = re.match(pattern, sql[pos:], re.IGNORECASE) + return bool(match) + + +def look_ahead_for_keywords( + sql: str, pos: int, keywords: List[str] +) -> tuple[bool, str, int]: + """ + Look ahead for SQL keywords at the current position. + """ + + for keyword in keywords: + if is_keyword_at_position(sql, pos, keyword): + return True, keyword, len(keyword) + return False, "", 0 + + +def split_statements(sql: str) -> Generator[str, None, None]: + """ + Split T-SQL code into individual statements, handling various SQL constructs. + """ + if not sql or not sql.strip(): + return + + current_statement: List[str] = [] + state = ParserState.NORMAL + i = 0 + + def yield_if_complete() -> Generator[str, None, None]: + statement = "".join(current_statement).strip() + if statement: + yield statement + current_statement.clear() + + while i < len(sql): + c = sql[i] + next_char = sql[i + 1] if i < len(sql) - 1 else "\0" + + if state == ParserState.NORMAL: + if c == "'": + state = ParserState.STRING + current_statement.append(c) + elif c == "-" and next_char == "-": + state = ParserState.COMMENT + current_statement.append(c) + current_statement.append(next_char) + i += 1 + elif c == "/" and next_char == "*": + state = ParserState.MULTILINE_COMMENT + current_statement.append(c) + current_statement.append(next_char) + i += 1 + else: + is_control_keyword, keyword, keyword_len = look_ahead_for_keywords( + sql, i, keywords=CONTROL_FLOW_KEYWORDS + ) + if is_control_keyword: + # Yield current statement if any + yield from yield_if_complete() + # Yield keyword as its own statement + yield keyword + i += keyword_len + continue + + ( + is_force_new_statement_keyword, + keyword, + keyword_len, + ) = look_ahead_for_keywords( + sql, i, keywords=FORCE_NEW_STATEMENT_KEYWORDS + ) + if is_force_new_statement_keyword: + # Force termination of current statement + yield from yield_if_complete() + + current_statement.append(keyword) + i += keyword_len + continue + + elif c == ";": + yield from yield_if_complete() + else: + current_statement.append(c) + + elif state == ParserState.STRING: + current_statement.append(c) + if c == "'" and next_char == "'": + current_statement.append(next_char) + i += 1 + elif c == "'": + state = ParserState.NORMAL + + elif state == ParserState.COMMENT: + current_statement.append(c) + if c == "\n": + state = ParserState.NORMAL + + elif state == ParserState.MULTILINE_COMMENT: + current_statement.append(c) + if c == "*" and next_char == "/": + current_statement.append(next_char) + i += 1 + state = ParserState.NORMAL + + i += 1 + + # Handle the last statement + statement = "".join(current_statement).strip() + if statement: + yield statement + + +# Example usage and test +if __name__ == "__main__": + test_sql = """ + CREATE TABLE Users (Id INT); + -- Comment here + INSERT INTO Users VALUES (1); + BEGIN + UPDATE Users SET Id = 2; + /* Multi-line + comment */ + DELETE FROM /* inline DELETE comment */ Users; + END + GO + SELECT * FROM Users + """ + + print("Statements found:") + for i, statement in enumerate(split_statements(test_sql), 1): + print(f"\n{i}. {statement}") diff --git a/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py b/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py index e8a0369597d53..aaa0c5e2fca12 100644 --- a/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py +++ b/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py @@ -762,7 +762,6 @@ def add_observed_query( This assumes that queries come in order of increasing timestamps. """ - self.report.num_observed_queries += 1 # All queries with no session ID are assumed to be part of the same session. diff --git a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_to_file.json b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_to_file.json index 558548bfc7a69..94171fb4c8f32 100644 --- a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_to_file.json +++ b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_to_file.json @@ -113,11 +113,11 @@ "aspect": { "json": { "customProperties": { - "job_id": "ba144ff3-f6f8-4a61-a8a5-5cf1ed172738", + "job_id": "166cbe7f-971b-47a3-b5f4-ee66ee9344ef", "job_name": "Weekly Demo Data Backup", "description": "No description available.", - "date_created": "2024-09-16 15:59:53.077000", - "date_modified": "2024-09-16 15:59:53.217000", + "date_created": "2024-11-21 13:00:59.213000", + "date_modified": "2024-11-21 13:00:59.350000", "step_id": "1", "step_name": "Set database to read only", "subsystem": "TSQL", @@ -154,6 +154,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "container", + "entityUrn": "urn:li:container:7da983a1581c33cce8a106587b150f02", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "container", "entityUrn": "urn:li:container:7da983a1581c33cce8a106587b150f02", @@ -231,10 +247,15 @@ "entityType": "container", "entityUrn": "urn:li:container:7da983a1581c33cce8a106587b150f02", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "browsePathsV2", "aspect": { "json": { - "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "path": [ + { + "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", + "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + } + ] } }, "systemMetadata": { @@ -245,17 +266,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:7da983a1581c33cce8a106587b150f02", + "entityUrn": "urn:li:container:671f67227a05c22c9fa97c27abc56820", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", - "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" - } - ] + "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" } }, "systemMetadata": { @@ -341,10 +357,15 @@ "entityType": "container", "entityUrn": "urn:li:container:671f67227a05c22c9fa97c27abc56820", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "browsePathsV2", "aspect": { "json": { - "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "path": [ + { + "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", + "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + } + ] } }, "systemMetadata": { @@ -355,17 +376,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:671f67227a05c22c9fa97c27abc56820", + "entityUrn": "urn:li:container:830660638ee785d5352ca300835af7ec", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", - "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" - } - ] + "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" } }, "systemMetadata": { @@ -451,10 +467,15 @@ "entityType": "container", "entityUrn": "urn:li:container:830660638ee785d5352ca300835af7ec", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "browsePathsV2", "aspect": { "json": { - "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "path": [ + { + "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", + "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + } + ] } }, "systemMetadata": { @@ -465,17 +486,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:830660638ee785d5352ca300835af7ec", + "entityUrn": "urn:li:container:e6b69ac2a511e798a89a4186881f70b8", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", - "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" - } - ] + "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" } }, "systemMetadata": { @@ -561,10 +577,15 @@ "entityType": "container", "entityUrn": "urn:li:container:e6b69ac2a511e798a89a4186881f70b8", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "browsePathsV2", "aspect": { "json": { - "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "path": [ + { + "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", + "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + } + ] } }, "systemMetadata": { @@ -575,17 +596,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:e6b69ac2a511e798a89a4186881f70b8", + "entityUrn": "urn:li:container:a5b29b900882d27c0d5fb0d5ccac92a5", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", - "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" - } - ] + "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" } }, "systemMetadata": { @@ -671,10 +687,15 @@ "entityType": "container", "entityUrn": "urn:li:container:a5b29b900882d27c0d5fb0d5ccac92a5", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "browsePathsV2", "aspect": { "json": { - "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "path": [ + { + "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", + "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + } + ] } }, "systemMetadata": { @@ -685,17 +706,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:a5b29b900882d27c0d5fb0d5ccac92a5", + "entityUrn": "urn:li:container:b6baf19c5f148fba3d3385151a8c672f", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", - "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" - } - ] + "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" } }, "systemMetadata": { @@ -781,10 +797,15 @@ "entityType": "container", "entityUrn": "urn:li:container:b6baf19c5f148fba3d3385151a8c672f", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "browsePathsV2", "aspect": { "json": { - "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "path": [ + { + "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", + "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + } + ] } }, "systemMetadata": { @@ -795,17 +816,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:b6baf19c5f148fba3d3385151a8c672f", + "entityUrn": "urn:li:container:ee19bd6cf8db0a0d086fbe78f7539bf7", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", - "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" - } - ] + "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" } }, "systemMetadata": { @@ -891,10 +907,15 @@ "entityType": "container", "entityUrn": "urn:li:container:ee19bd6cf8db0a0d086fbe78f7539bf7", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "browsePathsV2", "aspect": { "json": { - "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "path": [ + { + "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", + "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + } + ] } }, "systemMetadata": { @@ -905,17 +926,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:ee19bd6cf8db0a0d086fbe78f7539bf7", + "entityUrn": "urn:li:container:6514a64e5b04f103c9c1dd0ebe3d8b47", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", - "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" - } - ] + "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" } }, "systemMetadata": { @@ -1001,10 +1017,15 @@ "entityType": "container", "entityUrn": "urn:li:container:6514a64e5b04f103c9c1dd0ebe3d8b47", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "browsePathsV2", "aspect": { "json": { - "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "path": [ + { + "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", + "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + } + ] } }, "systemMetadata": { @@ -1015,17 +1036,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:6514a64e5b04f103c9c1dd0ebe3d8b47", + "entityUrn": "urn:li:container:fd80008628a03642d6e747c460a90619", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", - "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" - } - ] + "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" } }, "systemMetadata": { @@ -1111,10 +1127,15 @@ "entityType": "container", "entityUrn": "urn:li:container:fd80008628a03642d6e747c460a90619", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "browsePathsV2", "aspect": { "json": { - "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "path": [ + { + "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", + "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + } + ] } }, "systemMetadata": { @@ -1125,17 +1146,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:fd80008628a03642d6e747c460a90619", + "entityUrn": "urn:li:container:61332a50b978d8ca7245ddb34565d7b1", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", - "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" - } - ] + "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" } }, "systemMetadata": { @@ -1217,22 +1233,6 @@ "lastRunId": "no-run-id-provided" } }, -{ - "entityType": "container", - "entityUrn": "urn:li:container:61332a50b978d8ca7245ddb34565d7b1", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, { "entityType": "container", "entityUrn": "urn:li:container:61332a50b978d8ca7245ddb34565d7b1", @@ -1386,6 +1386,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "container", + "entityUrn": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "container", "entityUrn": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9", @@ -1459,22 +1475,6 @@ "lastRunId": "no-run-id-provided" } }, -{ - "entityType": "container", - "entityUrn": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, { "entityType": "container", "entityUrn": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9", @@ -1498,7 +1498,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.Items,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.age_dist,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { @@ -1515,7 +1515,7 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.Items,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.age_dist,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -1525,14 +1525,13 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": {}, - "name": "Items", - "description": "Description for table Items of schema Foo.", + "name": "age_dist", "tags": [] } }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "DemoData.Foo.Items", + "schemaName": "DemoData.Foo.age_dist", "platform": "urn:li:dataPlatform:mssql", "version": 0, "created": { @@ -1551,7 +1550,7 @@ }, "fields": [ { - "fieldPath": "ID", + "fieldPath": "Age", "nullable": true, "type": { "type": { @@ -1563,14 +1562,147 @@ "isPartOfKey": false }, { - "fieldPath": "ItemName", + "fieldPath": "Count", "nullable": true, "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "NVARCHAR(max) COLLATE SQL_Latin1_General_CP1_CI_AS", + "nativeDataType": "INTEGER", + "recursive": false, + "isPartOfKey": false + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.age_dist,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.age_dist,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", + "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + }, + { + "id": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9", + "urn": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.Items,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.Items,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": {}, + "name": "Items", + "description": "Description for table Items of schema Foo.", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "DemoData.Foo.Items", + "platform": "urn:li:dataPlatform:mssql", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "ID", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "ItemName", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "NVARCHAR(max) COLLATE SQL_Latin1_General_CP1_CI_AS", "recursive": false, "isPartOfKey": false } @@ -1973,8 +2105,8 @@ "code": "CREATE PROCEDURE [Foo].[Proc.With.SpecialChar] @ID INT\nAS\n SELECT @ID AS ThatDB;\n", "input parameters": "['@ID']", "parameter @ID": "{'type': 'int'}", - "date_created": "2024-09-16 15:59:53.010000", - "date_modified": "2024-09-16 15:59:53.010000" + "date_created": "2024-11-21 13:00:59.083000", + "date_modified": "2024-11-21 13:00:59.083000" }, "externalUrl": "", "name": "DemoData.Foo.Proc.With.SpecialChar", @@ -1991,14 +2123,62 @@ }, { "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),Proc.With.SpecialChar)", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),NewProc)", "changeType": "UPSERT", "aspectName": "dataJobInputOutput", "aspect": { "json": { - "inputDatasets": [], - "outputDatasets": [], - "inputDatajobs": [] + "inputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.salesreason,PROD)" + ], + "outputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD)" + ], + "fineGrainedLineages": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),NewProc)", + "changeType": "UPSERT", + "aspectName": "dataJobInfo", + "aspect": { + "json": { + "customProperties": { + "procedure_depends_on": "{'DemoData.Foo.age_dist': 'USER_TABLE', 'DemoData.Foo.Items': 'USER_TABLE', 'DemoData.Foo.Persons': 'USER_TABLE', 'DemoData.Foo.SalesReason': 'USER_TABLE'}", + "depending_on_procedure": "{}", + "code": "\nCREATE PROCEDURE [Foo].[NewProc]\nAS\nBEGIN\n --insert into items table from salesreason table\n insert into Foo.Items (ID, ItemName)\n SELECT TempID, Name\n FROM Foo.SalesReason;\n\n\n IF OBJECT_ID('Foo.age_dist', 'U') IS NULL\n BEGIN\n -- \nCreate and populate if table doesn't exist\n SELECT Age, COUNT(*) as Count\n INTO Foo.age_dist\n FROM Foo.Persons\n GROUP BY Age\n END\n ELSE\n BEGIN\n -- Update existing table\n TRUNCATE TABLE Foo.age_dist;\n \n \n INSERT INTO Foo.age_dist (Age, Count)\n SELECT Age, COUNT(*) as Count\n FROM Foo.Persons\n GROUP BY Age\n END\nEND\n", + "input parameters": "[]", + "date_created": "2024-11-21 13:00:59.087000", + "date_modified": "2024-11-21 13:00:59.087000" + }, + "externalUrl": "", + "name": "DemoData.Foo.NewProc", + "type": { + "string": "MSSQL_STORED_PROCEDURE" + } + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:250ce23f940485303fa5e5d4f5194975", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" } }, "systemMetadata": { @@ -2084,10 +2264,15 @@ "entityType": "container", "entityUrn": "urn:li:container:250ce23f940485303fa5e5d4f5194975", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "browsePathsV2", "aspect": { "json": { - "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "path": [ + { + "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", + "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + } + ] } }, "systemMetadata": { @@ -2098,17 +2283,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:250ce23f940485303fa5e5d4f5194975", + "entityUrn": "urn:li:container:f84e3b6c61876e1625f9112cbc0e988f", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", - "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" - } - ] + "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" } }, "systemMetadata": { @@ -2194,10 +2374,15 @@ "entityType": "container", "entityUrn": "urn:li:container:f84e3b6c61876e1625f9112cbc0e988f", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "browsePathsV2", "aspect": { "json": { - "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "path": [ + { + "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", + "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + } + ] } }, "systemMetadata": { @@ -2208,17 +2393,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:f84e3b6c61876e1625f9112cbc0e988f", + "entityUrn": "urn:li:container:d730a6ecf30bbb41cac5df5c0014168d", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", - "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" - } - ] + "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" } }, "systemMetadata": { @@ -2300,22 +2480,6 @@ "lastRunId": "no-run-id-provided" } }, -{ - "entityType": "container", - "entityUrn": "urn:li:container:d730a6ecf30bbb41cac5df5c0014168d", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, { "entityType": "container", "entityUrn": "urn:li:container:d730a6ecf30bbb41cac5df5c0014168d", @@ -2425,6 +2589,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "container", + "entityUrn": "urn:li:container:8b7691fec458d7383d5bc4e213831375", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "container", "entityUrn": "urn:li:container:8b7691fec458d7383d5bc4e213831375", @@ -2502,10 +2682,15 @@ "entityType": "container", "entityUrn": "urn:li:container:8b7691fec458d7383d5bc4e213831375", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "browsePathsV2", "aspect": { "json": { - "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + ] } }, "systemMetadata": { @@ -2516,17 +2701,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:8b7691fec458d7383d5bc4e213831375", + "entityUrn": "urn:li:container:523d13eddd725607ec835a2459b05c9c", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", - "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" - } - ] + "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" } }, "systemMetadata": { @@ -2612,10 +2792,15 @@ "entityType": "container", "entityUrn": "urn:li:container:523d13eddd725607ec835a2459b05c9c", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "browsePathsV2", "aspect": { "json": { - "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + ] } }, "systemMetadata": { @@ -2626,17 +2811,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:523d13eddd725607ec835a2459b05c9c", + "entityUrn": "urn:li:container:29bd421b2225a415df9c750e77404c66", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", - "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" - } - ] + "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" } }, "systemMetadata": { @@ -2722,10 +2902,15 @@ "entityType": "container", "entityUrn": "urn:li:container:29bd421b2225a415df9c750e77404c66", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "browsePathsV2", "aspect": { "json": { - "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + ] } }, "systemMetadata": { @@ -2736,17 +2921,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:29bd421b2225a415df9c750e77404c66", + "entityUrn": "urn:li:container:a3c02df4bcc7280a89f539b793b04197", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", - "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" - } - ] + "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" } }, "systemMetadata": { @@ -2832,10 +3012,15 @@ "entityType": "container", "entityUrn": "urn:li:container:a3c02df4bcc7280a89f539b793b04197", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "browsePathsV2", "aspect": { "json": { - "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + ] } }, "systemMetadata": { @@ -2846,17 +3031,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:a3c02df4bcc7280a89f539b793b04197", + "entityUrn": "urn:li:container:c3b5d1cdc69a7d8faf0e1981e89b89d1", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", - "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" - } - ] + "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" } }, "systemMetadata": { @@ -2942,10 +3122,15 @@ "entityType": "container", "entityUrn": "urn:li:container:c3b5d1cdc69a7d8faf0e1981e89b89d1", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "browsePathsV2", "aspect": { "json": { - "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + ] } }, "systemMetadata": { @@ -2956,17 +3141,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:c3b5d1cdc69a7d8faf0e1981e89b89d1", + "entityUrn": "urn:li:container:2b937d85ae7545dc769766008a332f42", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", - "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" - } - ] + "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" } }, "systemMetadata": { @@ -3052,10 +3232,15 @@ "entityType": "container", "entityUrn": "urn:li:container:2b937d85ae7545dc769766008a332f42", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "browsePathsV2", "aspect": { "json": { - "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + ] } }, "systemMetadata": { @@ -3066,17 +3251,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:2b937d85ae7545dc769766008a332f42", + "entityUrn": "urn:li:container:a399d8bb765028abb9e55ae39846ca5e", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", - "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" - } - ] + "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" } }, "systemMetadata": { @@ -3162,10 +3342,15 @@ "entityType": "container", "entityUrn": "urn:li:container:a399d8bb765028abb9e55ae39846ca5e", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "browsePathsV2", "aspect": { "json": { - "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + ] } }, "systemMetadata": { @@ -3176,17 +3361,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:a399d8bb765028abb9e55ae39846ca5e", + "entityUrn": "urn:li:container:457efe38f0aec2af9ad681cf1b43b1cb", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", - "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" - } - ] + "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" } }, "systemMetadata": { @@ -3272,10 +3452,15 @@ "entityType": "container", "entityUrn": "urn:li:container:457efe38f0aec2af9ad681cf1b43b1cb", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "browsePathsV2", "aspect": { "json": { - "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + ] } }, "systemMetadata": { @@ -3286,17 +3471,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:457efe38f0aec2af9ad681cf1b43b1cb", + "entityUrn": "urn:li:container:1d87783ffe7e82210365dff4ca8ee7d1", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", - "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" - } - ] + "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" } }, "systemMetadata": { @@ -3382,10 +3562,15 @@ "entityType": "container", "entityUrn": "urn:li:container:1d87783ffe7e82210365dff4ca8ee7d1", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "browsePathsV2", "aspect": { "json": { - "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + ] } }, "systemMetadata": { @@ -3396,17 +3581,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:1d87783ffe7e82210365dff4ca8ee7d1", + "entityUrn": "urn:li:container:269d0067d130eda0399a534fc787054c", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", - "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" - } - ] + "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" } }, "systemMetadata": { @@ -3488,22 +3668,6 @@ "lastRunId": "no-run-id-provided" } }, -{ - "entityType": "container", - "entityUrn": "urn:li:container:269d0067d130eda0399a534fc787054c", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, { "entityType": "container", "entityUrn": "urn:li:container:269d0067d130eda0399a534fc787054c", @@ -3669,6 +3833,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f721da08adde46586c0f113287cb60d1", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "container", "entityUrn": "urn:li:container:f721da08adde46586c0f113287cb60d1", @@ -3742,22 +3922,6 @@ "lastRunId": "no-run-id-provided" } }, -{ - "entityType": "container", - "entityUrn": "urn:li:container:f721da08adde46586c0f113287cb60d1", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, { "entityType": "container", "entityUrn": "urn:li:container:f721da08adde46586c0f113287cb60d1", @@ -4079,6 +4243,175 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,NewData.FooNew.View1,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:f721da08adde46586c0f113287cb60d1" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:mssql,NewData.FooNew.View1,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "view_definition": "CREATE VIEW FooNew.View1 AS\nSELECT LastName, FirstName\nFROM FooNew.PersonsNew\nWHERE Age > 18\n", + "is_view": "True" + }, + "name": "View1", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "NewData.FooNew.View1", + "platform": "urn:li:dataPlatform:mssql", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "LastName", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255) COLLATE SQL_Latin1_General_CP1_CI_AS", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "FirstName", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255) COLLATE SQL_Latin1_General_CP1_CI_AS", + "recursive": false, + "isPartOfKey": false + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,NewData.FooNew.View1,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "View" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,NewData.FooNew.View1,PROD)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "CREATE VIEW FooNew.View1 AS\nSELECT LastName, FirstName\nFROM FooNew.PersonsNew\nWHERE Age > 18\n", + "viewLanguage": "SQL" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,NewData.FooNew.View1,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + }, + { + "id": "urn:li:container:f721da08adde46586c0f113287cb60d1", + "urn": "urn:li:container:f721da08adde46586c0f113287cb60d1" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f3cb304e29e178d0615ed5ee6aa4ad58", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "container", "entityUrn": "urn:li:container:f3cb304e29e178d0615ed5ee6aa4ad58", @@ -4156,10 +4489,15 @@ "entityType": "container", "entityUrn": "urn:li:container:f3cb304e29e178d0615ed5ee6aa4ad58", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "browsePathsV2", "aspect": { "json": { - "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + ] } }, "systemMetadata": { @@ -4170,17 +4508,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:f3cb304e29e178d0615ed5ee6aa4ad58", + "entityUrn": "urn:li:container:752bb2abafeb2dae8f4adc7ffd547780", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", - "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" - } - ] + "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" } }, "systemMetadata": { @@ -4266,10 +4599,15 @@ "entityType": "container", "entityUrn": "urn:li:container:752bb2abafeb2dae8f4adc7ffd547780", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "browsePathsV2", "aspect": { "json": { - "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + ] } }, "systemMetadata": { @@ -4280,17 +4618,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:752bb2abafeb2dae8f4adc7ffd547780", + "entityUrn": "urn:li:container:46b713e3c7754c51649899f0f284ce34", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", - "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" - } - ] + "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" } }, "systemMetadata": { @@ -4376,10 +4709,15 @@ "entityType": "container", "entityUrn": "urn:li:container:46b713e3c7754c51649899f0f284ce34", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "browsePathsV2", "aspect": { "json": { - "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + ] } }, "systemMetadata": { @@ -4389,16 +4727,20 @@ } }, { - "entityType": "container", - "entityUrn": "urn:li:container:46b713e3c7754c51649899f0f284ce34", + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.view1,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "upstreamLineage", "aspect": { "json": { - "path": [ + "upstreams": [ { - "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", - "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.personsnew,PROD)", + "type": "VIEW" } ] } @@ -4441,6 +4783,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),NewProc)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataJob", "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),Proc.With.SpecialChar)", @@ -4472,5 +4830,21 @@ "runId": "mssql-test", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.view1,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_with_filter.json b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_with_filter.json index 161d40ea91d91..949eb32547ec5 100644 --- a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_with_filter.json +++ b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_with_filter.json @@ -113,11 +113,11 @@ "aspect": { "json": { "customProperties": { - "job_id": "ba144ff3-f6f8-4a61-a8a5-5cf1ed172738", + "job_id": "166cbe7f-971b-47a3-b5f4-ee66ee9344ef", "job_name": "Weekly Demo Data Backup", "description": "No description available.", - "date_created": "2024-09-16 15:59:53.077000", - "date_modified": "2024-09-16 15:59:53.217000", + "date_created": "2024-11-21 13:00:59.213000", + "date_modified": "2024-11-21 13:00:59.350000", "step_id": "1", "step_name": "Set database to read only", "subsystem": "TSQL", @@ -154,6 +154,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "container", + "entityUrn": "urn:li:container:7da983a1581c33cce8a106587b150f02", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "container", "entityUrn": "urn:li:container:7da983a1581c33cce8a106587b150f02", @@ -231,10 +247,15 @@ "entityType": "container", "entityUrn": "urn:li:container:7da983a1581c33cce8a106587b150f02", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "browsePathsV2", "aspect": { "json": { - "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "path": [ + { + "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", + "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + } + ] } }, "systemMetadata": { @@ -245,17 +266,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:7da983a1581c33cce8a106587b150f02", + "entityUrn": "urn:li:container:671f67227a05c22c9fa97c27abc56820", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", - "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" - } - ] + "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" } }, "systemMetadata": { @@ -341,10 +357,15 @@ "entityType": "container", "entityUrn": "urn:li:container:671f67227a05c22c9fa97c27abc56820", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "browsePathsV2", "aspect": { "json": { - "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "path": [ + { + "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", + "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + } + ] } }, "systemMetadata": { @@ -355,17 +376,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:671f67227a05c22c9fa97c27abc56820", + "entityUrn": "urn:li:container:830660638ee785d5352ca300835af7ec", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", - "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" - } - ] + "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" } }, "systemMetadata": { @@ -451,10 +467,15 @@ "entityType": "container", "entityUrn": "urn:li:container:830660638ee785d5352ca300835af7ec", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "browsePathsV2", "aspect": { "json": { - "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "path": [ + { + "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", + "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + } + ] } }, "systemMetadata": { @@ -465,17 +486,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:830660638ee785d5352ca300835af7ec", + "entityUrn": "urn:li:container:e6b69ac2a511e798a89a4186881f70b8", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", - "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" - } - ] + "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" } }, "systemMetadata": { @@ -561,10 +577,15 @@ "entityType": "container", "entityUrn": "urn:li:container:e6b69ac2a511e798a89a4186881f70b8", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "browsePathsV2", "aspect": { "json": { - "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "path": [ + { + "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", + "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + } + ] } }, "systemMetadata": { @@ -575,17 +596,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:e6b69ac2a511e798a89a4186881f70b8", + "entityUrn": "urn:li:container:a5b29b900882d27c0d5fb0d5ccac92a5", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", - "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" - } - ] + "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" } }, "systemMetadata": { @@ -671,10 +687,15 @@ "entityType": "container", "entityUrn": "urn:li:container:a5b29b900882d27c0d5fb0d5ccac92a5", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "browsePathsV2", "aspect": { "json": { - "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "path": [ + { + "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", + "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + } + ] } }, "systemMetadata": { @@ -685,17 +706,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:a5b29b900882d27c0d5fb0d5ccac92a5", + "entityUrn": "urn:li:container:b6baf19c5f148fba3d3385151a8c672f", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", - "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" - } - ] + "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" } }, "systemMetadata": { @@ -781,10 +797,15 @@ "entityType": "container", "entityUrn": "urn:li:container:b6baf19c5f148fba3d3385151a8c672f", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "browsePathsV2", "aspect": { "json": { - "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "path": [ + { + "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", + "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + } + ] } }, "systemMetadata": { @@ -795,17 +816,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:b6baf19c5f148fba3d3385151a8c672f", + "entityUrn": "urn:li:container:ee19bd6cf8db0a0d086fbe78f7539bf7", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", - "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" - } - ] + "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" } }, "systemMetadata": { @@ -891,10 +907,15 @@ "entityType": "container", "entityUrn": "urn:li:container:ee19bd6cf8db0a0d086fbe78f7539bf7", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "browsePathsV2", "aspect": { "json": { - "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "path": [ + { + "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", + "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + } + ] } }, "systemMetadata": { @@ -905,17 +926,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:ee19bd6cf8db0a0d086fbe78f7539bf7", + "entityUrn": "urn:li:container:6514a64e5b04f103c9c1dd0ebe3d8b47", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", - "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" - } - ] + "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" } }, "systemMetadata": { @@ -1001,10 +1017,15 @@ "entityType": "container", "entityUrn": "urn:li:container:6514a64e5b04f103c9c1dd0ebe3d8b47", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "browsePathsV2", "aspect": { "json": { - "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "path": [ + { + "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", + "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + } + ] } }, "systemMetadata": { @@ -1015,17 +1036,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:6514a64e5b04f103c9c1dd0ebe3d8b47", + "entityUrn": "urn:li:container:fd80008628a03642d6e747c460a90619", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", - "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" - } - ] + "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" } }, "systemMetadata": { @@ -1111,10 +1127,15 @@ "entityType": "container", "entityUrn": "urn:li:container:fd80008628a03642d6e747c460a90619", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "browsePathsV2", "aspect": { "json": { - "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "path": [ + { + "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", + "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + } + ] } }, "systemMetadata": { @@ -1125,17 +1146,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:fd80008628a03642d6e747c460a90619", + "entityUrn": "urn:li:container:61332a50b978d8ca7245ddb34565d7b1", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", - "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" - } - ] + "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" } }, "systemMetadata": { @@ -1217,22 +1233,6 @@ "lastRunId": "no-run-id-provided" } }, -{ - "entityType": "container", - "entityUrn": "urn:li:container:61332a50b978d8ca7245ddb34565d7b1", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, { "entityType": "container", "entityUrn": "urn:li:container:61332a50b978d8ca7245ddb34565d7b1", @@ -1386,6 +1386,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "container", + "entityUrn": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "container", "entityUrn": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9", @@ -1463,10 +1479,31 @@ "entityType": "container", "entityUrn": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9", "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", + "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.age_dist,PROD)", + "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "container": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9" } }, "systemMetadata": { @@ -1476,8 +1513,99 @@ } }, { - "entityType": "container", - "entityUrn": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9", + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.age_dist,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": {}, + "name": "age_dist", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "DemoData.Foo.age_dist", + "platform": "urn:li:dataPlatform:mssql", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "Age", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "Count", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER", + "recursive": false, + "isPartOfKey": false + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.age_dist,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.age_dist,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -1486,6 +1614,10 @@ { "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + }, + { + "id": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9", + "urn": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9" } ] } @@ -1973,8 +2105,8 @@ "code": "CREATE PROCEDURE [Foo].[Proc.With.SpecialChar] @ID INT\nAS\n SELECT @ID AS ThatDB;\n", "input parameters": "['@ID']", "parameter @ID": "{'type': 'int'}", - "date_created": "2024-09-16 15:59:53.010000", - "date_modified": "2024-09-16 15:59:53.010000" + "date_created": "2024-11-21 13:00:59.083000", + "date_modified": "2024-11-21 13:00:59.083000" }, "externalUrl": "", "name": "DemoData.Foo.Proc.With.SpecialChar", @@ -1991,14 +2123,62 @@ }, { "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),Proc.With.SpecialChar)", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),NewProc)", "changeType": "UPSERT", "aspectName": "dataJobInputOutput", "aspect": { "json": { - "inputDatasets": [], - "outputDatasets": [], - "inputDatajobs": [] + "inputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.salesreason,PROD)" + ], + "outputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD)" + ], + "fineGrainedLineages": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),NewProc)", + "changeType": "UPSERT", + "aspectName": "dataJobInfo", + "aspect": { + "json": { + "customProperties": { + "procedure_depends_on": "{'DemoData.Foo.age_dist': 'USER_TABLE', 'DemoData.Foo.Items': 'USER_TABLE', 'DemoData.Foo.Persons': 'USER_TABLE', 'DemoData.Foo.SalesReason': 'USER_TABLE'}", + "depending_on_procedure": "{}", + "code": "\nCREATE PROCEDURE [Foo].[NewProc]\nAS\nBEGIN\n --insert into items table from salesreason table\n insert into Foo.Items (ID, ItemName)\n SELECT TempID, Name\n FROM Foo.SalesReason;\n\n\n IF OBJECT_ID('Foo.age_dist', 'U') IS NULL\n BEGIN\n -- \nCreate and populate if table doesn't exist\n SELECT Age, COUNT(*) as Count\n INTO Foo.age_dist\n FROM Foo.Persons\n GROUP BY Age\n END\n ELSE\n BEGIN\n -- Update existing table\n TRUNCATE TABLE Foo.age_dist;\n \n \n INSERT INTO Foo.age_dist (Age, Count)\n SELECT Age, COUNT(*) as Count\n FROM Foo.Persons\n GROUP BY Age\n END\nEND\n", + "input parameters": "[]", + "date_created": "2024-11-21 13:00:59.087000", + "date_modified": "2024-11-21 13:00:59.087000" + }, + "externalUrl": "", + "name": "DemoData.Foo.NewProc", + "type": { + "string": "MSSQL_STORED_PROCEDURE" + } + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:250ce23f940485303fa5e5d4f5194975", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" } }, "systemMetadata": { @@ -2084,10 +2264,15 @@ "entityType": "container", "entityUrn": "urn:li:container:250ce23f940485303fa5e5d4f5194975", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "browsePathsV2", "aspect": { "json": { - "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "path": [ + { + "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", + "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + } + ] } }, "systemMetadata": { @@ -2098,17 +2283,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:250ce23f940485303fa5e5d4f5194975", + "entityUrn": "urn:li:container:f84e3b6c61876e1625f9112cbc0e988f", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", - "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" - } - ] + "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" } }, "systemMetadata": { @@ -2194,10 +2374,15 @@ "entityType": "container", "entityUrn": "urn:li:container:f84e3b6c61876e1625f9112cbc0e988f", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "browsePathsV2", "aspect": { "json": { - "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "path": [ + { + "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", + "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + } + ] } }, "systemMetadata": { @@ -2208,17 +2393,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:f84e3b6c61876e1625f9112cbc0e988f", + "entityUrn": "urn:li:container:d730a6ecf30bbb41cac5df5c0014168d", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", - "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" - } - ] + "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" } }, "systemMetadata": { @@ -2304,10 +2484,15 @@ "entityType": "container", "entityUrn": "urn:li:container:d730a6ecf30bbb41cac5df5c0014168d", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "browsePathsV2", "aspect": { "json": { - "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "path": [ + { + "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", + "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + } + ] } }, "systemMetadata": { @@ -2317,18 +2502,13 @@ } }, { - "entityType": "container", - "entityUrn": "urn:li:container:d730a6ecf30bbb41cac5df5c0014168d", + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "status", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", - "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" - } - ] + "removed": false } }, "systemMetadata": { @@ -2339,7 +2519,7 @@ }, { "entityType": "dataFlow", - "entityUrn": "urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD)", + "entityUrn": "urn:li:dataFlow:(mssql,Weekly Demo Data Backup,PROD)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -2354,8 +2534,8 @@ } }, { - "entityType": "dataFlow", - "entityUrn": "urn:li:dataFlow:(mssql,Weekly Demo Data Backup,PROD)", + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),NewProc)", "changeType": "UPSERT", "aspectName": "status", "aspect": { diff --git a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_to_file.json b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_to_file.json index 161d40ea91d91..949eb32547ec5 100644 --- a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_to_file.json +++ b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_to_file.json @@ -113,11 +113,11 @@ "aspect": { "json": { "customProperties": { - "job_id": "ba144ff3-f6f8-4a61-a8a5-5cf1ed172738", + "job_id": "166cbe7f-971b-47a3-b5f4-ee66ee9344ef", "job_name": "Weekly Demo Data Backup", "description": "No description available.", - "date_created": "2024-09-16 15:59:53.077000", - "date_modified": "2024-09-16 15:59:53.217000", + "date_created": "2024-11-21 13:00:59.213000", + "date_modified": "2024-11-21 13:00:59.350000", "step_id": "1", "step_name": "Set database to read only", "subsystem": "TSQL", @@ -154,6 +154,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "container", + "entityUrn": "urn:li:container:7da983a1581c33cce8a106587b150f02", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "container", "entityUrn": "urn:li:container:7da983a1581c33cce8a106587b150f02", @@ -231,10 +247,15 @@ "entityType": "container", "entityUrn": "urn:li:container:7da983a1581c33cce8a106587b150f02", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "browsePathsV2", "aspect": { "json": { - "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "path": [ + { + "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", + "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + } + ] } }, "systemMetadata": { @@ -245,17 +266,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:7da983a1581c33cce8a106587b150f02", + "entityUrn": "urn:li:container:671f67227a05c22c9fa97c27abc56820", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", - "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" - } - ] + "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" } }, "systemMetadata": { @@ -341,10 +357,15 @@ "entityType": "container", "entityUrn": "urn:li:container:671f67227a05c22c9fa97c27abc56820", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "browsePathsV2", "aspect": { "json": { - "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "path": [ + { + "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", + "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + } + ] } }, "systemMetadata": { @@ -355,17 +376,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:671f67227a05c22c9fa97c27abc56820", + "entityUrn": "urn:li:container:830660638ee785d5352ca300835af7ec", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", - "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" - } - ] + "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" } }, "systemMetadata": { @@ -451,10 +467,15 @@ "entityType": "container", "entityUrn": "urn:li:container:830660638ee785d5352ca300835af7ec", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "browsePathsV2", "aspect": { "json": { - "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "path": [ + { + "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", + "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + } + ] } }, "systemMetadata": { @@ -465,17 +486,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:830660638ee785d5352ca300835af7ec", + "entityUrn": "urn:li:container:e6b69ac2a511e798a89a4186881f70b8", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", - "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" - } - ] + "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" } }, "systemMetadata": { @@ -561,10 +577,15 @@ "entityType": "container", "entityUrn": "urn:li:container:e6b69ac2a511e798a89a4186881f70b8", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "browsePathsV2", "aspect": { "json": { - "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "path": [ + { + "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", + "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + } + ] } }, "systemMetadata": { @@ -575,17 +596,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:e6b69ac2a511e798a89a4186881f70b8", + "entityUrn": "urn:li:container:a5b29b900882d27c0d5fb0d5ccac92a5", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", - "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" - } - ] + "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" } }, "systemMetadata": { @@ -671,10 +687,15 @@ "entityType": "container", "entityUrn": "urn:li:container:a5b29b900882d27c0d5fb0d5ccac92a5", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "browsePathsV2", "aspect": { "json": { - "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "path": [ + { + "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", + "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + } + ] } }, "systemMetadata": { @@ -685,17 +706,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:a5b29b900882d27c0d5fb0d5ccac92a5", + "entityUrn": "urn:li:container:b6baf19c5f148fba3d3385151a8c672f", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", - "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" - } - ] + "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" } }, "systemMetadata": { @@ -781,10 +797,15 @@ "entityType": "container", "entityUrn": "urn:li:container:b6baf19c5f148fba3d3385151a8c672f", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "browsePathsV2", "aspect": { "json": { - "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "path": [ + { + "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", + "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + } + ] } }, "systemMetadata": { @@ -795,17 +816,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:b6baf19c5f148fba3d3385151a8c672f", + "entityUrn": "urn:li:container:ee19bd6cf8db0a0d086fbe78f7539bf7", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", - "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" - } - ] + "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" } }, "systemMetadata": { @@ -891,10 +907,15 @@ "entityType": "container", "entityUrn": "urn:li:container:ee19bd6cf8db0a0d086fbe78f7539bf7", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "browsePathsV2", "aspect": { "json": { - "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "path": [ + { + "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", + "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + } + ] } }, "systemMetadata": { @@ -905,17 +926,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:ee19bd6cf8db0a0d086fbe78f7539bf7", + "entityUrn": "urn:li:container:6514a64e5b04f103c9c1dd0ebe3d8b47", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", - "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" - } - ] + "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" } }, "systemMetadata": { @@ -1001,10 +1017,15 @@ "entityType": "container", "entityUrn": "urn:li:container:6514a64e5b04f103c9c1dd0ebe3d8b47", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "browsePathsV2", "aspect": { "json": { - "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "path": [ + { + "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", + "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + } + ] } }, "systemMetadata": { @@ -1015,17 +1036,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:6514a64e5b04f103c9c1dd0ebe3d8b47", + "entityUrn": "urn:li:container:fd80008628a03642d6e747c460a90619", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", - "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" - } - ] + "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" } }, "systemMetadata": { @@ -1111,10 +1127,15 @@ "entityType": "container", "entityUrn": "urn:li:container:fd80008628a03642d6e747c460a90619", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "browsePathsV2", "aspect": { "json": { - "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "path": [ + { + "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", + "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + } + ] } }, "systemMetadata": { @@ -1125,17 +1146,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:fd80008628a03642d6e747c460a90619", + "entityUrn": "urn:li:container:61332a50b978d8ca7245ddb34565d7b1", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", - "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" - } - ] + "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" } }, "systemMetadata": { @@ -1217,22 +1233,6 @@ "lastRunId": "no-run-id-provided" } }, -{ - "entityType": "container", - "entityUrn": "urn:li:container:61332a50b978d8ca7245ddb34565d7b1", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, { "entityType": "container", "entityUrn": "urn:li:container:61332a50b978d8ca7245ddb34565d7b1", @@ -1386,6 +1386,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "container", + "entityUrn": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "container", "entityUrn": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9", @@ -1463,10 +1479,31 @@ "entityType": "container", "entityUrn": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9", "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", + "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.age_dist,PROD)", + "changeType": "UPSERT", "aspectName": "container", "aspect": { "json": { - "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "container": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9" } }, "systemMetadata": { @@ -1476,8 +1513,99 @@ } }, { - "entityType": "container", - "entityUrn": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9", + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.age_dist,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": {}, + "name": "age_dist", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "DemoData.Foo.age_dist", + "platform": "urn:li:dataPlatform:mssql", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "Age", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "Count", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER", + "recursive": false, + "isPartOfKey": false + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.age_dist,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.age_dist,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -1486,6 +1614,10 @@ { "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + }, + { + "id": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9", + "urn": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9" } ] } @@ -1973,8 +2105,8 @@ "code": "CREATE PROCEDURE [Foo].[Proc.With.SpecialChar] @ID INT\nAS\n SELECT @ID AS ThatDB;\n", "input parameters": "['@ID']", "parameter @ID": "{'type': 'int'}", - "date_created": "2024-09-16 15:59:53.010000", - "date_modified": "2024-09-16 15:59:53.010000" + "date_created": "2024-11-21 13:00:59.083000", + "date_modified": "2024-11-21 13:00:59.083000" }, "externalUrl": "", "name": "DemoData.Foo.Proc.With.SpecialChar", @@ -1991,14 +2123,62 @@ }, { "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),Proc.With.SpecialChar)", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),NewProc)", "changeType": "UPSERT", "aspectName": "dataJobInputOutput", "aspect": { "json": { - "inputDatasets": [], - "outputDatasets": [], - "inputDatajobs": [] + "inputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.salesreason,PROD)" + ], + "outputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD)" + ], + "fineGrainedLineages": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),NewProc)", + "changeType": "UPSERT", + "aspectName": "dataJobInfo", + "aspect": { + "json": { + "customProperties": { + "procedure_depends_on": "{'DemoData.Foo.age_dist': 'USER_TABLE', 'DemoData.Foo.Items': 'USER_TABLE', 'DemoData.Foo.Persons': 'USER_TABLE', 'DemoData.Foo.SalesReason': 'USER_TABLE'}", + "depending_on_procedure": "{}", + "code": "\nCREATE PROCEDURE [Foo].[NewProc]\nAS\nBEGIN\n --insert into items table from salesreason table\n insert into Foo.Items (ID, ItemName)\n SELECT TempID, Name\n FROM Foo.SalesReason;\n\n\n IF OBJECT_ID('Foo.age_dist', 'U') IS NULL\n BEGIN\n -- \nCreate and populate if table doesn't exist\n SELECT Age, COUNT(*) as Count\n INTO Foo.age_dist\n FROM Foo.Persons\n GROUP BY Age\n END\n ELSE\n BEGIN\n -- Update existing table\n TRUNCATE TABLE Foo.age_dist;\n \n \n INSERT INTO Foo.age_dist (Age, Count)\n SELECT Age, COUNT(*) as Count\n FROM Foo.Persons\n GROUP BY Age\n END\nEND\n", + "input parameters": "[]", + "date_created": "2024-11-21 13:00:59.087000", + "date_modified": "2024-11-21 13:00:59.087000" + }, + "externalUrl": "", + "name": "DemoData.Foo.NewProc", + "type": { + "string": "MSSQL_STORED_PROCEDURE" + } + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:250ce23f940485303fa5e5d4f5194975", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" } }, "systemMetadata": { @@ -2084,10 +2264,15 @@ "entityType": "container", "entityUrn": "urn:li:container:250ce23f940485303fa5e5d4f5194975", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "browsePathsV2", "aspect": { "json": { - "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "path": [ + { + "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", + "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + } + ] } }, "systemMetadata": { @@ -2098,17 +2283,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:250ce23f940485303fa5e5d4f5194975", + "entityUrn": "urn:li:container:f84e3b6c61876e1625f9112cbc0e988f", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", - "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" - } - ] + "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" } }, "systemMetadata": { @@ -2194,10 +2374,15 @@ "entityType": "container", "entityUrn": "urn:li:container:f84e3b6c61876e1625f9112cbc0e988f", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "browsePathsV2", "aspect": { "json": { - "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "path": [ + { + "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", + "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + } + ] } }, "systemMetadata": { @@ -2208,17 +2393,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:f84e3b6c61876e1625f9112cbc0e988f", + "entityUrn": "urn:li:container:d730a6ecf30bbb41cac5df5c0014168d", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", - "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" - } - ] + "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" } }, "systemMetadata": { @@ -2304,10 +2484,15 @@ "entityType": "container", "entityUrn": "urn:li:container:d730a6ecf30bbb41cac5df5c0014168d", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "browsePathsV2", "aspect": { "json": { - "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "path": [ + { + "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", + "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + } + ] } }, "systemMetadata": { @@ -2317,18 +2502,13 @@ } }, { - "entityType": "container", - "entityUrn": "urn:li:container:d730a6ecf30bbb41cac5df5c0014168d", + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "status", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", - "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" - } - ] + "removed": false } }, "systemMetadata": { @@ -2339,7 +2519,7 @@ }, { "entityType": "dataFlow", - "entityUrn": "urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD)", + "entityUrn": "urn:li:dataFlow:(mssql,Weekly Demo Data Backup,PROD)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -2354,8 +2534,8 @@ } }, { - "entityType": "dataFlow", - "entityUrn": "urn:li:dataFlow:(mssql,Weekly Demo Data Backup,PROD)", + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),NewProc)", "changeType": "UPSERT", "aspectName": "status", "aspect": { diff --git a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_with_lower_case_urn.json b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_with_lower_case_urn.json index 29124f6fc156c..e855efcb0df6a 100644 --- a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_with_lower_case_urn.json +++ b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_with_lower_case_urn.json @@ -113,11 +113,11 @@ "aspect": { "json": { "customProperties": { - "job_id": "ba144ff3-f6f8-4a61-a8a5-5cf1ed172738", + "job_id": "166cbe7f-971b-47a3-b5f4-ee66ee9344ef", "job_name": "Weekly Demo Data Backup", "description": "No description available.", - "date_created": "2024-09-16 15:59:53.077000", - "date_modified": "2024-09-16 15:59:53.217000", + "date_created": "2024-11-21 13:00:59.213000", + "date_modified": "2024-11-21 13:00:59.350000", "step_id": "1", "step_name": "Set database to read only", "subsystem": "TSQL", @@ -154,6 +154,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "container", + "entityUrn": "urn:li:container:7da983a1581c33cce8a106587b150f02", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "container", "entityUrn": "urn:li:container:7da983a1581c33cce8a106587b150f02", @@ -231,10 +247,15 @@ "entityType": "container", "entityUrn": "urn:li:container:7da983a1581c33cce8a106587b150f02", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "browsePathsV2", "aspect": { "json": { - "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "path": [ + { + "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", + "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + } + ] } }, "systemMetadata": { @@ -245,17 +266,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:7da983a1581c33cce8a106587b150f02", + "entityUrn": "urn:li:container:671f67227a05c22c9fa97c27abc56820", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", - "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" - } - ] + "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" } }, "systemMetadata": { @@ -341,10 +357,15 @@ "entityType": "container", "entityUrn": "urn:li:container:671f67227a05c22c9fa97c27abc56820", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "browsePathsV2", "aspect": { "json": { - "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "path": [ + { + "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", + "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + } + ] } }, "systemMetadata": { @@ -355,17 +376,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:671f67227a05c22c9fa97c27abc56820", + "entityUrn": "urn:li:container:830660638ee785d5352ca300835af7ec", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", - "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" - } - ] + "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" } }, "systemMetadata": { @@ -451,10 +467,15 @@ "entityType": "container", "entityUrn": "urn:li:container:830660638ee785d5352ca300835af7ec", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "browsePathsV2", "aspect": { "json": { - "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "path": [ + { + "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", + "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + } + ] } }, "systemMetadata": { @@ -465,17 +486,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:830660638ee785d5352ca300835af7ec", + "entityUrn": "urn:li:container:e6b69ac2a511e798a89a4186881f70b8", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", - "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" - } - ] + "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" } }, "systemMetadata": { @@ -561,10 +577,15 @@ "entityType": "container", "entityUrn": "urn:li:container:e6b69ac2a511e798a89a4186881f70b8", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "browsePathsV2", "aspect": { "json": { - "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "path": [ + { + "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", + "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + } + ] } }, "systemMetadata": { @@ -575,17 +596,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:e6b69ac2a511e798a89a4186881f70b8", + "entityUrn": "urn:li:container:a5b29b900882d27c0d5fb0d5ccac92a5", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", - "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" - } - ] + "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" } }, "systemMetadata": { @@ -671,10 +687,15 @@ "entityType": "container", "entityUrn": "urn:li:container:a5b29b900882d27c0d5fb0d5ccac92a5", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "browsePathsV2", "aspect": { "json": { - "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "path": [ + { + "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", + "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + } + ] } }, "systemMetadata": { @@ -685,17 +706,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:a5b29b900882d27c0d5fb0d5ccac92a5", + "entityUrn": "urn:li:container:b6baf19c5f148fba3d3385151a8c672f", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", - "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" - } - ] + "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" } }, "systemMetadata": { @@ -781,10 +797,15 @@ "entityType": "container", "entityUrn": "urn:li:container:b6baf19c5f148fba3d3385151a8c672f", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "browsePathsV2", "aspect": { "json": { - "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "path": [ + { + "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", + "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + } + ] } }, "systemMetadata": { @@ -795,17 +816,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:b6baf19c5f148fba3d3385151a8c672f", + "entityUrn": "urn:li:container:ee19bd6cf8db0a0d086fbe78f7539bf7", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", - "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" - } - ] + "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" } }, "systemMetadata": { @@ -891,10 +907,15 @@ "entityType": "container", "entityUrn": "urn:li:container:ee19bd6cf8db0a0d086fbe78f7539bf7", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "browsePathsV2", "aspect": { "json": { - "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "path": [ + { + "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", + "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + } + ] } }, "systemMetadata": { @@ -905,17 +926,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:ee19bd6cf8db0a0d086fbe78f7539bf7", + "entityUrn": "urn:li:container:6514a64e5b04f103c9c1dd0ebe3d8b47", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", - "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" - } - ] + "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" } }, "systemMetadata": { @@ -1001,10 +1017,15 @@ "entityType": "container", "entityUrn": "urn:li:container:6514a64e5b04f103c9c1dd0ebe3d8b47", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "browsePathsV2", "aspect": { "json": { - "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "path": [ + { + "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", + "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + } + ] } }, "systemMetadata": { @@ -1015,17 +1036,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:6514a64e5b04f103c9c1dd0ebe3d8b47", + "entityUrn": "urn:li:container:fd80008628a03642d6e747c460a90619", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", - "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" - } - ] + "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" } }, "systemMetadata": { @@ -1111,10 +1127,15 @@ "entityType": "container", "entityUrn": "urn:li:container:fd80008628a03642d6e747c460a90619", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "browsePathsV2", "aspect": { "json": { - "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "path": [ + { + "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", + "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + } + ] } }, "systemMetadata": { @@ -1125,17 +1146,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:fd80008628a03642d6e747c460a90619", + "entityUrn": "urn:li:container:61332a50b978d8ca7245ddb34565d7b1", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", - "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" - } - ] + "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" } }, "systemMetadata": { @@ -1217,22 +1233,6 @@ "lastRunId": "no-run-id-provided" } }, -{ - "entityType": "container", - "entityUrn": "urn:li:container:61332a50b978d8ca7245ddb34565d7b1", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, { "entityType": "container", "entityUrn": "urn:li:container:61332a50b978d8ca7245ddb34565d7b1", @@ -1386,6 +1386,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "container", + "entityUrn": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "container", "entityUrn": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9", @@ -1459,22 +1475,6 @@ "lastRunId": "no-run-id-provided" } }, -{ - "entityType": "container", - "entityUrn": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, { "entityType": "container", "entityUrn": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9", @@ -1498,7 +1498,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.age_dist,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { @@ -1515,7 +1515,7 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.age_dist,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.Status": { @@ -1525,14 +1525,13 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": {}, - "name": "Items", - "description": "Description for table Items of schema Foo.", + "name": "age_dist", "tags": [] } }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "demodata.foo.items", + "schemaName": "demodata.foo.age_dist", "platform": "urn:li:dataPlatform:mssql", "version": 0, "created": { @@ -1551,7 +1550,7 @@ }, "fields": [ { - "fieldPath": "ID", + "fieldPath": "Age", "nullable": true, "type": { "type": { @@ -1563,14 +1562,147 @@ "isPartOfKey": false }, { - "fieldPath": "ItemName", + "fieldPath": "Count", "nullable": true, "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "NVARCHAR(max) COLLATE SQL_Latin1_General_CP1_CI_AS", + "nativeDataType": "INTEGER", + "recursive": false, + "isPartOfKey": false + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.age_dist,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.age_dist,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", + "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + }, + { + "id": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9", + "urn": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": {}, + "name": "Items", + "description": "Description for table Items of schema Foo.", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "demodata.foo.items", + "platform": "urn:li:dataPlatform:mssql", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "ID", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "ItemName", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "NVARCHAR(max) COLLATE SQL_Latin1_General_CP1_CI_AS", "recursive": false, "isPartOfKey": false } @@ -1973,8 +2105,8 @@ "code": "CREATE PROCEDURE [Foo].[Proc.With.SpecialChar] @ID INT\nAS\n SELECT @ID AS ThatDB;\n", "input parameters": "['@ID']", "parameter @ID": "{'type': 'int'}", - "date_created": "2024-09-16 15:59:53.010000", - "date_modified": "2024-09-16 15:59:53.010000" + "date_created": "2024-11-21 13:00:59.083000", + "date_modified": "2024-11-21 13:00:59.083000" }, "externalUrl": "", "name": "DemoData.Foo.Proc.With.SpecialChar", @@ -1991,14 +2123,62 @@ }, { "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),Proc.With.SpecialChar)", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),NewProc)", "changeType": "UPSERT", "aspectName": "dataJobInputOutput", "aspect": { "json": { - "inputDatasets": [], - "outputDatasets": [], - "inputDatajobs": [] + "inputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.salesreason,PROD)" + ], + "outputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD)" + ], + "fineGrainedLineages": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),NewProc)", + "changeType": "UPSERT", + "aspectName": "dataJobInfo", + "aspect": { + "json": { + "customProperties": { + "procedure_depends_on": "{'DemoData.Foo.age_dist': 'USER_TABLE', 'DemoData.Foo.Items': 'USER_TABLE', 'DemoData.Foo.Persons': 'USER_TABLE', 'DemoData.Foo.SalesReason': 'USER_TABLE'}", + "depending_on_procedure": "{}", + "code": "\nCREATE PROCEDURE [Foo].[NewProc]\nAS\nBEGIN\n --insert into items table from salesreason table\n insert into Foo.Items (ID, ItemName)\n SELECT TempID, Name\n FROM Foo.SalesReason;\n\n\n IF OBJECT_ID('Foo.age_dist', 'U') IS NULL\n BEGIN\n -- \nCreate and populate if table doesn't exist\n SELECT Age, COUNT(*) as Count\n INTO Foo.age_dist\n FROM Foo.Persons\n GROUP BY Age\n END\n ELSE\n BEGIN\n -- Update existing table\n TRUNCATE TABLE Foo.age_dist;\n \n \n INSERT INTO Foo.age_dist (Age, Count)\n SELECT Age, COUNT(*) as Count\n FROM Foo.Persons\n GROUP BY Age\n END\nEND\n", + "input parameters": "[]", + "date_created": "2024-11-21 13:00:59.087000", + "date_modified": "2024-11-21 13:00:59.087000" + }, + "externalUrl": "", + "name": "DemoData.Foo.NewProc", + "type": { + "string": "MSSQL_STORED_PROCEDURE" + } + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:250ce23f940485303fa5e5d4f5194975", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" } }, "systemMetadata": { @@ -2084,10 +2264,15 @@ "entityType": "container", "entityUrn": "urn:li:container:250ce23f940485303fa5e5d4f5194975", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "browsePathsV2", "aspect": { "json": { - "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "path": [ + { + "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", + "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + } + ] } }, "systemMetadata": { @@ -2098,17 +2283,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:250ce23f940485303fa5e5d4f5194975", + "entityUrn": "urn:li:container:f84e3b6c61876e1625f9112cbc0e988f", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", - "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" - } - ] + "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" } }, "systemMetadata": { @@ -2194,10 +2374,15 @@ "entityType": "container", "entityUrn": "urn:li:container:f84e3b6c61876e1625f9112cbc0e988f", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "browsePathsV2", "aspect": { "json": { - "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "path": [ + { + "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", + "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + } + ] } }, "systemMetadata": { @@ -2208,17 +2393,12 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:f84e3b6c61876e1625f9112cbc0e988f", + "entityUrn": "urn:li:container:d730a6ecf30bbb41cac5df5c0014168d", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", - "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" - } - ] + "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" } }, "systemMetadata": { @@ -2304,10 +2484,15 @@ "entityType": "container", "entityUrn": "urn:li:container:d730a6ecf30bbb41cac5df5c0014168d", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "browsePathsV2", "aspect": { "json": { - "container": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + "path": [ + { + "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", + "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + } + ] } }, "systemMetadata": { @@ -2318,17 +2503,18 @@ }, { "entityType": "container", - "entityUrn": "urn:li:container:d730a6ecf30bbb41cac5df5c0014168d", + "entityUrn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "containerProperties", "aspect": { "json": { - "path": [ - { - "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", - "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" - } - ] + "customProperties": { + "platform": "mssql", + "env": "PROD", + "database": "NewData" + }, + "name": "NewData", + "env": "PROD" } }, "systemMetadata": { @@ -2338,8 +2524,8 @@ } }, { - "entityType": "dataFlow", - "entityUrn": "urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD)", + "entityType": "container", + "entityUrn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -2354,8 +2540,2252 @@ } }, { - "entityType": "dataFlow", - "entityUrn": "urn:li:dataFlow:(mssql,Weekly Demo Data Backup,PROD)", + "entityType": "container", + "entityUrn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Database" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:8b7691fec458d7383d5bc4e213831375", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:8b7691fec458d7383d5bc4e213831375", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "mssql", + "env": "PROD", + "database": "NewData", + "schema": "db_accessadmin" + }, + "name": "db_accessadmin", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:8b7691fec458d7383d5bc4e213831375", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:8b7691fec458d7383d5bc4e213831375", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:8b7691fec458d7383d5bc4e213831375", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:8b7691fec458d7383d5bc4e213831375", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:523d13eddd725607ec835a2459b05c9c", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:523d13eddd725607ec835a2459b05c9c", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "mssql", + "env": "PROD", + "database": "NewData", + "schema": "db_backupoperator" + }, + "name": "db_backupoperator", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:523d13eddd725607ec835a2459b05c9c", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:523d13eddd725607ec835a2459b05c9c", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:523d13eddd725607ec835a2459b05c9c", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:523d13eddd725607ec835a2459b05c9c", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:29bd421b2225a415df9c750e77404c66", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:29bd421b2225a415df9c750e77404c66", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "mssql", + "env": "PROD", + "database": "NewData", + "schema": "db_datareader" + }, + "name": "db_datareader", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:29bd421b2225a415df9c750e77404c66", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:29bd421b2225a415df9c750e77404c66", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:29bd421b2225a415df9c750e77404c66", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:29bd421b2225a415df9c750e77404c66", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:a3c02df4bcc7280a89f539b793b04197", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:a3c02df4bcc7280a89f539b793b04197", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "mssql", + "env": "PROD", + "database": "NewData", + "schema": "db_datawriter" + }, + "name": "db_datawriter", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:a3c02df4bcc7280a89f539b793b04197", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:a3c02df4bcc7280a89f539b793b04197", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:a3c02df4bcc7280a89f539b793b04197", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:a3c02df4bcc7280a89f539b793b04197", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:c3b5d1cdc69a7d8faf0e1981e89b89d1", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:c3b5d1cdc69a7d8faf0e1981e89b89d1", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "mssql", + "env": "PROD", + "database": "NewData", + "schema": "db_ddladmin" + }, + "name": "db_ddladmin", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:c3b5d1cdc69a7d8faf0e1981e89b89d1", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:c3b5d1cdc69a7d8faf0e1981e89b89d1", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:c3b5d1cdc69a7d8faf0e1981e89b89d1", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:c3b5d1cdc69a7d8faf0e1981e89b89d1", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:2b937d85ae7545dc769766008a332f42", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:2b937d85ae7545dc769766008a332f42", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "mssql", + "env": "PROD", + "database": "NewData", + "schema": "db_denydatareader" + }, + "name": "db_denydatareader", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:2b937d85ae7545dc769766008a332f42", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:2b937d85ae7545dc769766008a332f42", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:2b937d85ae7545dc769766008a332f42", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:2b937d85ae7545dc769766008a332f42", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:a399d8bb765028abb9e55ae39846ca5e", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:a399d8bb765028abb9e55ae39846ca5e", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "mssql", + "env": "PROD", + "database": "NewData", + "schema": "db_denydatawriter" + }, + "name": "db_denydatawriter", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:a399d8bb765028abb9e55ae39846ca5e", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:a399d8bb765028abb9e55ae39846ca5e", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:a399d8bb765028abb9e55ae39846ca5e", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:a399d8bb765028abb9e55ae39846ca5e", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:457efe38f0aec2af9ad681cf1b43b1cb", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:457efe38f0aec2af9ad681cf1b43b1cb", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "mssql", + "env": "PROD", + "database": "NewData", + "schema": "db_owner" + }, + "name": "db_owner", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:457efe38f0aec2af9ad681cf1b43b1cb", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:457efe38f0aec2af9ad681cf1b43b1cb", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:457efe38f0aec2af9ad681cf1b43b1cb", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:457efe38f0aec2af9ad681cf1b43b1cb", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:1d87783ffe7e82210365dff4ca8ee7d1", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:1d87783ffe7e82210365dff4ca8ee7d1", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "mssql", + "env": "PROD", + "database": "NewData", + "schema": "db_securityadmin" + }, + "name": "db_securityadmin", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:1d87783ffe7e82210365dff4ca8ee7d1", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:1d87783ffe7e82210365dff4ca8ee7d1", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:1d87783ffe7e82210365dff4ca8ee7d1", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:1d87783ffe7e82210365dff4ca8ee7d1", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:269d0067d130eda0399a534fc787054c", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:269d0067d130eda0399a534fc787054c", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "mssql", + "env": "PROD", + "database": "NewData", + "schema": "dbo" + }, + "name": "dbo", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:269d0067d130eda0399a534fc787054c", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:269d0067d130eda0399a534fc787054c", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:269d0067d130eda0399a534fc787054c", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:269d0067d130eda0399a534fc787054c", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.dbo.productsnew,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:269d0067d130eda0399a534fc787054c" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.dbo.productsnew,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": {}, + "name": "ProductsNew", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "newdata.dbo.productsnew", + "platform": "urn:li:dataPlatform:mssql", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "ID", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "ProductName", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "NVARCHAR(max) COLLATE SQL_Latin1_General_CP1_CI_AS", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "Price", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "MONEY", + "recursive": false, + "isPartOfKey": false + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.dbo.productsnew,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.dbo.productsnew,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + }, + { + "id": "urn:li:container:269d0067d130eda0399a534fc787054c", + "urn": "urn:li:container:269d0067d130eda0399a534fc787054c" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f721da08adde46586c0f113287cb60d1", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f721da08adde46586c0f113287cb60d1", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "mssql", + "env": "PROD", + "database": "NewData", + "schema": "FooNew" + }, + "name": "FooNew", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f721da08adde46586c0f113287cb60d1", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f721da08adde46586c0f113287cb60d1", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f721da08adde46586c0f113287cb60d1", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f721da08adde46586c0f113287cb60d1", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.itemsnew,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:f721da08adde46586c0f113287cb60d1" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.itemsnew,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": {}, + "name": "ItemsNew", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "newdata.foonew.itemsnew", + "platform": "urn:li:dataPlatform:mssql", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "ID", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "ItemName", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "NVARCHAR(max) COLLATE SQL_Latin1_General_CP1_CI_AS", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "Price", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "SMALLMONEY", + "recursive": false, + "isPartOfKey": false + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.itemsnew,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.itemsnew,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + }, + { + "id": "urn:li:container:f721da08adde46586c0f113287cb60d1", + "urn": "urn:li:container:f721da08adde46586c0f113287cb60d1" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.personsnew,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:f721da08adde46586c0f113287cb60d1" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.personsnew,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": {}, + "name": "PersonsNew", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "newdata.foonew.personsnew", + "platform": "urn:li:dataPlatform:mssql", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "ID", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER", + "recursive": false, + "isPartOfKey": true + }, + { + "fieldPath": "LastName", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255) COLLATE SQL_Latin1_General_CP1_CI_AS", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "FirstName", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255) COLLATE SQL_Latin1_General_CP1_CI_AS", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "Age", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER", + "recursive": false, + "isPartOfKey": false + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.personsnew,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.personsnew,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + }, + { + "id": "urn:li:container:f721da08adde46586c0f113287cb60d1", + "urn": "urn:li:container:f721da08adde46586c0f113287cb60d1" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.view1,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:f721da08adde46586c0f113287cb60d1" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.view1,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "view_definition": "CREATE VIEW FooNew.View1 AS\nSELECT LastName, FirstName\nFROM FooNew.PersonsNew\nWHERE Age > 18\n", + "is_view": "True" + }, + "name": "View1", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "newdata.foonew.view1", + "platform": "urn:li:dataPlatform:mssql", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "LastName", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255) COLLATE SQL_Latin1_General_CP1_CI_AS", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "FirstName", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255) COLLATE SQL_Latin1_General_CP1_CI_AS", + "recursive": false, + "isPartOfKey": false + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.view1,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "View" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.view1,PROD)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "CREATE VIEW FooNew.View1 AS\nSELECT LastName, FirstName\nFROM FooNew.PersonsNew\nWHERE Age > 18\n", + "viewLanguage": "SQL" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.view1,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + }, + { + "id": "urn:li:container:f721da08adde46586c0f113287cb60d1", + "urn": "urn:li:container:f721da08adde46586c0f113287cb60d1" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f3cb304e29e178d0615ed5ee6aa4ad58", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f3cb304e29e178d0615ed5ee6aa4ad58", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "mssql", + "env": "PROD", + "database": "NewData", + "schema": "guest" + }, + "name": "guest", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f3cb304e29e178d0615ed5ee6aa4ad58", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f3cb304e29e178d0615ed5ee6aa4ad58", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f3cb304e29e178d0615ed5ee6aa4ad58", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f3cb304e29e178d0615ed5ee6aa4ad58", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:752bb2abafeb2dae8f4adc7ffd547780", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:752bb2abafeb2dae8f4adc7ffd547780", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "mssql", + "env": "PROD", + "database": "NewData", + "schema": "INFORMATION_SCHEMA" + }, + "name": "INFORMATION_SCHEMA", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:752bb2abafeb2dae8f4adc7ffd547780", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:752bb2abafeb2dae8f4adc7ffd547780", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:752bb2abafeb2dae8f4adc7ffd547780", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:752bb2abafeb2dae8f4adc7ffd547780", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:46b713e3c7754c51649899f0f284ce34", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:46b713e3c7754c51649899f0f284ce34", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "mssql", + "env": "PROD", + "database": "NewData", + "schema": "sys" + }, + "name": "sys", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:46b713e3c7754c51649899f0f284ce34", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:46b713e3c7754c51649899f0f284ce34", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:46b713e3c7754c51649899f0f284ce34", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:46b713e3c7754c51649899f0f284ce34", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.view1,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.personsnew,PROD)", + "type": "VIEW" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(mssql,Weekly Demo Data Backup,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),NewProc)", "changeType": "UPSERT", "aspectName": "status", "aspect": { diff --git a/metadata-ingestion/tests/integration/sql_server/golden_files/procedures/DemoData.Foo.NewProc.json b/metadata-ingestion/tests/integration/sql_server/golden_files/procedures/DemoData.Foo.NewProc.json new file mode 100644 index 0000000000000..357d00e18cd9c --- /dev/null +++ b/metadata-ingestion/tests/integration/sql_server/golden_files/procedures/DemoData.Foo.NewProc.json @@ -0,0 +1,21 @@ +[ +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),NewProc)", + "changeType": "UPSERT", + "aspectName": "dataJobInputOutput", + "aspect": { + "json": { + "inputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.salesreason,PROD)" + ], + "outputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.age_dist,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD)" + ], + "fineGrainedLineages": [] + } + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/sql_server/procedures/DemoData.Foo.NewProc.sql b/metadata-ingestion/tests/integration/sql_server/procedures/DemoData.Foo.NewProc.sql new file mode 100644 index 0000000000000..c46673cb4f055 --- /dev/null +++ b/metadata-ingestion/tests/integration/sql_server/procedures/DemoData.Foo.NewProc.sql @@ -0,0 +1,29 @@ +CREATE PROCEDURE [Foo].[NewProc] @ID INT + AS + BEGIN + --insert into items table from salesreason table + insert into Foo.Items (ID, ItemName) + SELECT TempID, Name + FROM Foo.SalesReason; + + + IF OBJECT_ID('Foo.age_dist', 'U') IS NULL + + BEGIN + -- Create and populate if table doesn't exist + SELECT Age, COUNT(*) as Count + INTO Foo.age_dist + FROM Foo.Persons + GROUP BY Age + END + ELSE + BEGIN + -- Update existing table + TRUNCATE TABLE Foo.age_dist; + + INSERT INTO Foo.age_dist (Age, Count) + SELECT Age, COUNT(*) as Count + FROM Foo.Persons + GROUP BY Age + END + END \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/sql_server/setup/setup.sql b/metadata-ingestion/tests/integration/sql_server/setup/setup.sql index 77ecabc5a3fff..1a19386fb587d 100644 --- a/metadata-ingestion/tests/integration/sql_server/setup/setup.sql +++ b/metadata-ingestion/tests/integration/sql_server/setup/setup.sql @@ -14,6 +14,12 @@ CREATE TABLE FooNew.PersonsNew ( FirstName varchar(255), Age int ); +GO +CREATE VIEW FooNew.View1 AS +SELECT LastName, FirstName +FROM FooNew.PersonsNew +WHERE Age > 18 +GO CREATE DATABASE DemoData; GO @@ -50,6 +56,36 @@ AS SELECT @ID AS ThatDB; GO +CREATE PROCEDURE [Foo].[NewProc] +AS +BEGIN + --insert into items table from salesreason table + insert into Foo.Items (ID, ItemName) + SELECT TempID, Name + FROM Foo.SalesReason; + + + IF OBJECT_ID('Foo.age_dist', 'U') IS NULL + BEGIN + -- Create and populate if table doesn't exist + SELECT Age, COUNT(*) as Count + INTO Foo.age_dist + FROM Foo.Persons + GROUP BY Age + END + ELSE + BEGIN + -- Update existing table + TRUNCATE TABLE Foo.age_dist; + + INSERT INTO Foo.age_dist (Age, Count) + SELECT Age, COUNT(*) as Count + FROM Foo.Persons + GROUP BY Age + END +END +GO +EXEC Foo.NewProc GO EXEC sys.sp_addextendedproperty @name = N'MS_Description', @@ -91,4 +127,4 @@ EXEC sp_attach_schedule GO EXEC dbo.sp_add_jobserver @job_name = N'Weekly Demo Data Backup' -GO +GO \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/sql_server/source_files/mssql_with_lower_case_urn.yml b/metadata-ingestion/tests/integration/sql_server/source_files/mssql_with_lower_case_urn.yml index ff1179034833f..94128810f026b 100644 --- a/metadata-ingestion/tests/integration/sql_server/source_files/mssql_with_lower_case_urn.yml +++ b/metadata-ingestion/tests/integration/sql_server/source_files/mssql_with_lower_case_urn.yml @@ -5,7 +5,6 @@ source: config: username: sa password: test!Password - database: DemoData host_port: localhost:21433 convert_urns_to_lowercase: true # use_odbc: True diff --git a/metadata-ingestion/tests/integration/sql_server/test_sql_server.py b/metadata-ingestion/tests/integration/sql_server/test_sql_server.py index 1f418ffbd32ea..1316584ff4c4b 100644 --- a/metadata-ingestion/tests/integration/sql_server/test_sql_server.py +++ b/metadata-ingestion/tests/integration/sql_server/test_sql_server.py @@ -1,9 +1,15 @@ import os import subprocess import time +from pathlib import Path import pytest +from datahub.ingestion.source.sql.mssql.job_models import StoredProcedure +from datahub.ingestion.source.sql.mssql.stored_procedure_lineage import ( + generate_procedure_lineage, +) +from datahub.sql_parsing.sql_parsing_aggregator import SqlParsingAggregator from tests.test_helpers import mce_helpers from tests.test_helpers.click_helpers import run_datahub_cmd from tests.test_helpers.docker_helpers import cleanup_image, wait_for_port @@ -57,3 +63,58 @@ def test_mssql_ingest(mssql_runner, pytestconfig, tmp_path, mock_time, config_fi r"root\[\d+\]\['aspect'\]\['json'\]\['customProperties'\]\['date_modified'\]", ], ) + + +PROCEDURE_SQLS_FOLDER = "./tests/integration/sql_server/procedures" +procedure_sqls = os.listdir(PROCEDURE_SQLS_FOLDER) + + +@pytest.mark.parametrize("procedure_sql_file", procedure_sqls) +@pytest.mark.integration +def test_stored_procedure_split(pytestconfig, procedure_sql_file): + + sql_file_path = Path(f"{PROCEDURE_SQLS_FOLDER}/{procedure_sql_file}").resolve() + procedure_code = Path(sql_file_path).read_text() + + RESOURCE_DIR = ( + pytestconfig.rootpath / "tests/integration/sql_server/golden_files/procedures/" + ) + + # Procedure file is named as .. + splits = procedure_sql_file.split(".") + db = splits[0] + schema = splits[1] + name = splits[2] + + procedure = StoredProcedure( + db=db, + schema=schema, + name=name, + flow=None, # type: ignore # flow is not used in this test + code=procedure_code, + ) + data_job_urn = f"urn:li:dataJob:(urn:li:dataFlow:(mssql,{db}.{schema}.stored_procedures,PROD),{name})" + + aggregator = SqlParsingAggregator( + platform="mssql", + generate_lineage=True, + generate_queries=False, + generate_usage_statistics=False, + generate_operations=False, + generate_query_subject_fields=False, + generate_query_usage_statistics=False, + ) + + mcps = list( + generate_procedure_lineage( + aggregator=aggregator, + procedure=procedure, + procedure_job_urn=data_job_urn, + ) + ) + mce_helpers.check_goldens_stream( + pytestconfig, + outputs=mcps, + golden_path=RESOURCE_DIR + / Path(procedure_sql_file).name.replace(".sql", ".json"), + ) From 1b81994c7367cfd8299a2271cad48f4345c8abe8 Mon Sep 17 00:00:00 2001 From: Mayuri N Date: Thu, 21 Nov 2024 19:25:20 +0530 Subject: [PATCH 03/13] minor updates --- .../golden_mces_mssql_no_db_to_file.json | 18 ++++--- .../golden_mces_mssql_no_db_with_filter.json | 18 ++++--- .../golden_mces_mssql_to_file.json | 18 ++++--- ...golden_mces_mssql_with_lower_case_urn.json | 18 ++++--- .../procedures/DemoData.Foo.NewProc.sql | 2 +- .../integration/sql_server/setup/setup.sql | 50 ++++++++++--------- .../integration/sql_server/test_sql_server.py | 2 +- 7 files changed, 68 insertions(+), 58 deletions(-) diff --git a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_to_file.json b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_to_file.json index 94171fb4c8f32..d22e65d453925 100644 --- a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_to_file.json +++ b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_to_file.json @@ -113,11 +113,11 @@ "aspect": { "json": { "customProperties": { - "job_id": "166cbe7f-971b-47a3-b5f4-ee66ee9344ef", + "job_id": "f09bf076-da0c-407c-b92e-c72928c5cc46", "job_name": "Weekly Demo Data Backup", "description": "No description available.", - "date_created": "2024-11-21 13:00:59.213000", - "date_modified": "2024-11-21 13:00:59.350000", + "date_created": "2024-11-21 13:50:08.497000", + "date_modified": "2024-11-21 13:50:08.623000", "step_id": "1", "step_name": "Set database to read only", "subsystem": "TSQL", @@ -2105,8 +2105,8 @@ "code": "CREATE PROCEDURE [Foo].[Proc.With.SpecialChar] @ID INT\nAS\n SELECT @ID AS ThatDB;\n", "input parameters": "['@ID']", "parameter @ID": "{'type': 'int'}", - "date_created": "2024-11-21 13:00:59.083000", - "date_modified": "2024-11-21 13:00:59.083000" + "date_created": "2024-11-21 13:50:08.383000", + "date_modified": "2024-11-21 13:50:08.383000" }, "externalUrl": "", "name": "DemoData.Foo.Proc.With.SpecialChar", @@ -2129,9 +2129,11 @@ "aspect": { "json": { "inputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD)", "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.salesreason,PROD)" ], "outputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.age_dist,PROD)", "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD)" ], "fineGrainedLineages": [] @@ -2153,10 +2155,10 @@ "customProperties": { "procedure_depends_on": "{'DemoData.Foo.age_dist': 'USER_TABLE', 'DemoData.Foo.Items': 'USER_TABLE', 'DemoData.Foo.Persons': 'USER_TABLE', 'DemoData.Foo.SalesReason': 'USER_TABLE'}", "depending_on_procedure": "{}", - "code": "\nCREATE PROCEDURE [Foo].[NewProc]\nAS\nBEGIN\n --insert into items table from salesreason table\n insert into Foo.Items (ID, ItemName)\n SELECT TempID, Name\n FROM Foo.SalesReason;\n\n\n IF OBJECT_ID('Foo.age_dist', 'U') IS NULL\n BEGIN\n -- \nCreate and populate if table doesn't exist\n SELECT Age, COUNT(*) as Count\n INTO Foo.age_dist\n FROM Foo.Persons\n GROUP BY Age\n END\n ELSE\n BEGIN\n -- Update existing table\n TRUNCATE TABLE Foo.age_dist;\n \n \n INSERT INTO Foo.age_dist (Age, Count)\n SELECT Age, COUNT(*) as Count\n FROM Foo.Persons\n GROUP BY Age\n END\nEND\n", + "code": "\nCREATE PROCEDURE [Foo].[NewProc]\n AS\n BEGIN\n --insert into items table from salesreason table\n insert into Foo.Items (ID, ItemName)\n SELECT TempID, Name\n FROM Foo.SalesReason;\n\n\n IF OBJECT_ID('Foo.age_dist', 'U') I\nS NULL\n\n BEGIN\n -- Create and populate if table doesn't exist\n SELECT Age, COUNT(*) as Count\n INTO Foo.age_dist\n FROM Foo.Persons\n GROUP BY Age\n END\n ELSE\n BEGIN\n \n -- Update existing table\n TRUNCATE TABLE Foo.age_dist;\n\n INSERT INTO Foo.age_dist (Age, Count)\n SELECT Age, COUNT(*) as Count\n FROM Foo.Persons\n GROUP BY Age\n END\n END\n", "input parameters": "[]", - "date_created": "2024-11-21 13:00:59.087000", - "date_modified": "2024-11-21 13:00:59.087000" + "date_created": "2024-11-21 13:50:08.387000", + "date_modified": "2024-11-21 13:50:08.387000" }, "externalUrl": "", "name": "DemoData.Foo.NewProc", diff --git a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_with_filter.json b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_with_filter.json index 949eb32547ec5..e9cacf0d4a432 100644 --- a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_with_filter.json +++ b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_with_filter.json @@ -113,11 +113,11 @@ "aspect": { "json": { "customProperties": { - "job_id": "166cbe7f-971b-47a3-b5f4-ee66ee9344ef", + "job_id": "f09bf076-da0c-407c-b92e-c72928c5cc46", "job_name": "Weekly Demo Data Backup", "description": "No description available.", - "date_created": "2024-11-21 13:00:59.213000", - "date_modified": "2024-11-21 13:00:59.350000", + "date_created": "2024-11-21 13:50:08.497000", + "date_modified": "2024-11-21 13:50:08.623000", "step_id": "1", "step_name": "Set database to read only", "subsystem": "TSQL", @@ -2105,8 +2105,8 @@ "code": "CREATE PROCEDURE [Foo].[Proc.With.SpecialChar] @ID INT\nAS\n SELECT @ID AS ThatDB;\n", "input parameters": "['@ID']", "parameter @ID": "{'type': 'int'}", - "date_created": "2024-11-21 13:00:59.083000", - "date_modified": "2024-11-21 13:00:59.083000" + "date_created": "2024-11-21 13:50:08.383000", + "date_modified": "2024-11-21 13:50:08.383000" }, "externalUrl": "", "name": "DemoData.Foo.Proc.With.SpecialChar", @@ -2129,9 +2129,11 @@ "aspect": { "json": { "inputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD)", "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.salesreason,PROD)" ], "outputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.age_dist,PROD)", "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD)" ], "fineGrainedLineages": [] @@ -2153,10 +2155,10 @@ "customProperties": { "procedure_depends_on": "{'DemoData.Foo.age_dist': 'USER_TABLE', 'DemoData.Foo.Items': 'USER_TABLE', 'DemoData.Foo.Persons': 'USER_TABLE', 'DemoData.Foo.SalesReason': 'USER_TABLE'}", "depending_on_procedure": "{}", - "code": "\nCREATE PROCEDURE [Foo].[NewProc]\nAS\nBEGIN\n --insert into items table from salesreason table\n insert into Foo.Items (ID, ItemName)\n SELECT TempID, Name\n FROM Foo.SalesReason;\n\n\n IF OBJECT_ID('Foo.age_dist', 'U') IS NULL\n BEGIN\n -- \nCreate and populate if table doesn't exist\n SELECT Age, COUNT(*) as Count\n INTO Foo.age_dist\n FROM Foo.Persons\n GROUP BY Age\n END\n ELSE\n BEGIN\n -- Update existing table\n TRUNCATE TABLE Foo.age_dist;\n \n \n INSERT INTO Foo.age_dist (Age, Count)\n SELECT Age, COUNT(*) as Count\n FROM Foo.Persons\n GROUP BY Age\n END\nEND\n", + "code": "\nCREATE PROCEDURE [Foo].[NewProc]\n AS\n BEGIN\n --insert into items table from salesreason table\n insert into Foo.Items (ID, ItemName)\n SELECT TempID, Name\n FROM Foo.SalesReason;\n\n\n IF OBJECT_ID('Foo.age_dist', 'U') I\nS NULL\n\n BEGIN\n -- Create and populate if table doesn't exist\n SELECT Age, COUNT(*) as Count\n INTO Foo.age_dist\n FROM Foo.Persons\n GROUP BY Age\n END\n ELSE\n BEGIN\n \n -- Update existing table\n TRUNCATE TABLE Foo.age_dist;\n\n INSERT INTO Foo.age_dist (Age, Count)\n SELECT Age, COUNT(*) as Count\n FROM Foo.Persons\n GROUP BY Age\n END\n END\n", "input parameters": "[]", - "date_created": "2024-11-21 13:00:59.087000", - "date_modified": "2024-11-21 13:00:59.087000" + "date_created": "2024-11-21 13:50:08.387000", + "date_modified": "2024-11-21 13:50:08.387000" }, "externalUrl": "", "name": "DemoData.Foo.NewProc", diff --git a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_to_file.json b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_to_file.json index 949eb32547ec5..e9cacf0d4a432 100644 --- a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_to_file.json +++ b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_to_file.json @@ -113,11 +113,11 @@ "aspect": { "json": { "customProperties": { - "job_id": "166cbe7f-971b-47a3-b5f4-ee66ee9344ef", + "job_id": "f09bf076-da0c-407c-b92e-c72928c5cc46", "job_name": "Weekly Demo Data Backup", "description": "No description available.", - "date_created": "2024-11-21 13:00:59.213000", - "date_modified": "2024-11-21 13:00:59.350000", + "date_created": "2024-11-21 13:50:08.497000", + "date_modified": "2024-11-21 13:50:08.623000", "step_id": "1", "step_name": "Set database to read only", "subsystem": "TSQL", @@ -2105,8 +2105,8 @@ "code": "CREATE PROCEDURE [Foo].[Proc.With.SpecialChar] @ID INT\nAS\n SELECT @ID AS ThatDB;\n", "input parameters": "['@ID']", "parameter @ID": "{'type': 'int'}", - "date_created": "2024-11-21 13:00:59.083000", - "date_modified": "2024-11-21 13:00:59.083000" + "date_created": "2024-11-21 13:50:08.383000", + "date_modified": "2024-11-21 13:50:08.383000" }, "externalUrl": "", "name": "DemoData.Foo.Proc.With.SpecialChar", @@ -2129,9 +2129,11 @@ "aspect": { "json": { "inputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD)", "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.salesreason,PROD)" ], "outputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.age_dist,PROD)", "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD)" ], "fineGrainedLineages": [] @@ -2153,10 +2155,10 @@ "customProperties": { "procedure_depends_on": "{'DemoData.Foo.age_dist': 'USER_TABLE', 'DemoData.Foo.Items': 'USER_TABLE', 'DemoData.Foo.Persons': 'USER_TABLE', 'DemoData.Foo.SalesReason': 'USER_TABLE'}", "depending_on_procedure": "{}", - "code": "\nCREATE PROCEDURE [Foo].[NewProc]\nAS\nBEGIN\n --insert into items table from salesreason table\n insert into Foo.Items (ID, ItemName)\n SELECT TempID, Name\n FROM Foo.SalesReason;\n\n\n IF OBJECT_ID('Foo.age_dist', 'U') IS NULL\n BEGIN\n -- \nCreate and populate if table doesn't exist\n SELECT Age, COUNT(*) as Count\n INTO Foo.age_dist\n FROM Foo.Persons\n GROUP BY Age\n END\n ELSE\n BEGIN\n -- Update existing table\n TRUNCATE TABLE Foo.age_dist;\n \n \n INSERT INTO Foo.age_dist (Age, Count)\n SELECT Age, COUNT(*) as Count\n FROM Foo.Persons\n GROUP BY Age\n END\nEND\n", + "code": "\nCREATE PROCEDURE [Foo].[NewProc]\n AS\n BEGIN\n --insert into items table from salesreason table\n insert into Foo.Items (ID, ItemName)\n SELECT TempID, Name\n FROM Foo.SalesReason;\n\n\n IF OBJECT_ID('Foo.age_dist', 'U') I\nS NULL\n\n BEGIN\n -- Create and populate if table doesn't exist\n SELECT Age, COUNT(*) as Count\n INTO Foo.age_dist\n FROM Foo.Persons\n GROUP BY Age\n END\n ELSE\n BEGIN\n \n -- Update existing table\n TRUNCATE TABLE Foo.age_dist;\n\n INSERT INTO Foo.age_dist (Age, Count)\n SELECT Age, COUNT(*) as Count\n FROM Foo.Persons\n GROUP BY Age\n END\n END\n", "input parameters": "[]", - "date_created": "2024-11-21 13:00:59.087000", - "date_modified": "2024-11-21 13:00:59.087000" + "date_created": "2024-11-21 13:50:08.387000", + "date_modified": "2024-11-21 13:50:08.387000" }, "externalUrl": "", "name": "DemoData.Foo.NewProc", diff --git a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_with_lower_case_urn.json b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_with_lower_case_urn.json index e855efcb0df6a..fa3da0e6a3a91 100644 --- a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_with_lower_case_urn.json +++ b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_with_lower_case_urn.json @@ -113,11 +113,11 @@ "aspect": { "json": { "customProperties": { - "job_id": "166cbe7f-971b-47a3-b5f4-ee66ee9344ef", + "job_id": "f09bf076-da0c-407c-b92e-c72928c5cc46", "job_name": "Weekly Demo Data Backup", "description": "No description available.", - "date_created": "2024-11-21 13:00:59.213000", - "date_modified": "2024-11-21 13:00:59.350000", + "date_created": "2024-11-21 13:50:08.497000", + "date_modified": "2024-11-21 13:50:08.623000", "step_id": "1", "step_name": "Set database to read only", "subsystem": "TSQL", @@ -2105,8 +2105,8 @@ "code": "CREATE PROCEDURE [Foo].[Proc.With.SpecialChar] @ID INT\nAS\n SELECT @ID AS ThatDB;\n", "input parameters": "['@ID']", "parameter @ID": "{'type': 'int'}", - "date_created": "2024-11-21 13:00:59.083000", - "date_modified": "2024-11-21 13:00:59.083000" + "date_created": "2024-11-21 13:50:08.383000", + "date_modified": "2024-11-21 13:50:08.383000" }, "externalUrl": "", "name": "DemoData.Foo.Proc.With.SpecialChar", @@ -2129,9 +2129,11 @@ "aspect": { "json": { "inputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD)", "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.salesreason,PROD)" ], "outputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.age_dist,PROD)", "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD)" ], "fineGrainedLineages": [] @@ -2153,10 +2155,10 @@ "customProperties": { "procedure_depends_on": "{'DemoData.Foo.age_dist': 'USER_TABLE', 'DemoData.Foo.Items': 'USER_TABLE', 'DemoData.Foo.Persons': 'USER_TABLE', 'DemoData.Foo.SalesReason': 'USER_TABLE'}", "depending_on_procedure": "{}", - "code": "\nCREATE PROCEDURE [Foo].[NewProc]\nAS\nBEGIN\n --insert into items table from salesreason table\n insert into Foo.Items (ID, ItemName)\n SELECT TempID, Name\n FROM Foo.SalesReason;\n\n\n IF OBJECT_ID('Foo.age_dist', 'U') IS NULL\n BEGIN\n -- \nCreate and populate if table doesn't exist\n SELECT Age, COUNT(*) as Count\n INTO Foo.age_dist\n FROM Foo.Persons\n GROUP BY Age\n END\n ELSE\n BEGIN\n -- Update existing table\n TRUNCATE TABLE Foo.age_dist;\n \n \n INSERT INTO Foo.age_dist (Age, Count)\n SELECT Age, COUNT(*) as Count\n FROM Foo.Persons\n GROUP BY Age\n END\nEND\n", + "code": "\nCREATE PROCEDURE [Foo].[NewProc]\n AS\n BEGIN\n --insert into items table from salesreason table\n insert into Foo.Items (ID, ItemName)\n SELECT TempID, Name\n FROM Foo.SalesReason;\n\n\n IF OBJECT_ID('Foo.age_dist', 'U') I\nS NULL\n\n BEGIN\n -- Create and populate if table doesn't exist\n SELECT Age, COUNT(*) as Count\n INTO Foo.age_dist\n FROM Foo.Persons\n GROUP BY Age\n END\n ELSE\n BEGIN\n \n -- Update existing table\n TRUNCATE TABLE Foo.age_dist;\n\n INSERT INTO Foo.age_dist (Age, Count)\n SELECT Age, COUNT(*) as Count\n FROM Foo.Persons\n GROUP BY Age\n END\n END\n", "input parameters": "[]", - "date_created": "2024-11-21 13:00:59.087000", - "date_modified": "2024-11-21 13:00:59.087000" + "date_created": "2024-11-21 13:50:08.387000", + "date_modified": "2024-11-21 13:50:08.387000" }, "externalUrl": "", "name": "DemoData.Foo.NewProc", diff --git a/metadata-ingestion/tests/integration/sql_server/procedures/DemoData.Foo.NewProc.sql b/metadata-ingestion/tests/integration/sql_server/procedures/DemoData.Foo.NewProc.sql index c46673cb4f055..f8ba7c7d068a9 100644 --- a/metadata-ingestion/tests/integration/sql_server/procedures/DemoData.Foo.NewProc.sql +++ b/metadata-ingestion/tests/integration/sql_server/procedures/DemoData.Foo.NewProc.sql @@ -1,4 +1,4 @@ -CREATE PROCEDURE [Foo].[NewProc] @ID INT +CREATE PROCEDURE [Foo].[NewProc] AS BEGIN --insert into items table from salesreason table diff --git a/metadata-ingestion/tests/integration/sql_server/setup/setup.sql b/metadata-ingestion/tests/integration/sql_server/setup/setup.sql index 1a19386fb587d..8e6746684d142 100644 --- a/metadata-ingestion/tests/integration/sql_server/setup/setup.sql +++ b/metadata-ingestion/tests/integration/sql_server/setup/setup.sql @@ -57,34 +57,36 @@ AS GO CREATE PROCEDURE [Foo].[NewProc] -AS -BEGIN - --insert into items table from salesreason table - insert into Foo.Items (ID, ItemName) - SELECT TempID, Name - FROM Foo.SalesReason; + AS + BEGIN + --insert into items table from salesreason table + insert into Foo.Items (ID, ItemName) + SELECT TempID, Name + FROM Foo.SalesReason; - IF OBJECT_ID('Foo.age_dist', 'U') IS NULL - BEGIN - -- Create and populate if table doesn't exist - SELECT Age, COUNT(*) as Count - INTO Foo.age_dist - FROM Foo.Persons - GROUP BY Age - END - ELSE - BEGIN - -- Update existing table - TRUNCATE TABLE Foo.age_dist; - - INSERT INTO Foo.age_dist (Age, Count) - SELECT Age, COUNT(*) as Count - FROM Foo.Persons - GROUP BY Age + IF OBJECT_ID('Foo.age_dist', 'U') IS NULL + + BEGIN + -- Create and populate if table doesn't exist + SELECT Age, COUNT(*) as Count + INTO Foo.age_dist + FROM Foo.Persons + GROUP BY Age + END + ELSE + BEGIN + -- Update existing table + TRUNCATE TABLE Foo.age_dist; + + INSERT INTO Foo.age_dist (Age, Count) + SELECT Age, COUNT(*) as Count + FROM Foo.Persons + GROUP BY Age + END END -END GO + EXEC Foo.NewProc GO EXEC sys.sp_addextendedproperty diff --git a/metadata-ingestion/tests/integration/sql_server/test_sql_server.py b/metadata-ingestion/tests/integration/sql_server/test_sql_server.py index 1316584ff4c4b..994c47ac06be6 100644 --- a/metadata-ingestion/tests/integration/sql_server/test_sql_server.py +++ b/metadata-ingestion/tests/integration/sql_server/test_sql_server.py @@ -71,7 +71,7 @@ def test_mssql_ingest(mssql_runner, pytestconfig, tmp_path, mock_time, config_fi @pytest.mark.parametrize("procedure_sql_file", procedure_sqls) @pytest.mark.integration -def test_stored_procedure_split(pytestconfig, procedure_sql_file): +def test_stored_procedure_lineage(pytestconfig, procedure_sql_file): sql_file_path = Path(f"{PROCEDURE_SQLS_FOLDER}/{procedure_sql_file}").resolve() procedure_code = Path(sql_file_path).read_text() From 5d3d6623073146ae183e2ece95cac8cb9ca424ac Mon Sep 17 00:00:00 2001 From: Mayuri N Date: Thu, 21 Nov 2024 19:49:41 +0530 Subject: [PATCH 04/13] add new test for stored procedure --- .../sql/mssql/stored_procedure_lineage.py | 1 - .../procedures/demodata.foo.proc2.json | 55 +++++++++++++++++++ .../procedures/demodata.foo.proc2.sql | 29 ++++++++++ 3 files changed, 84 insertions(+), 1 deletion(-) create mode 100644 metadata-ingestion/tests/integration/sql_server/golden_files/procedures/demodata.foo.proc2.json create mode 100644 metadata-ingestion/tests/integration/sql_server/procedures/demodata.foo.proc2.sql diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/stored_procedure_lineage.py b/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/stored_procedure_lineage.py index 5894b2d5fb6fa..3287b022ab3a8 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/stored_procedure_lineage.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/stored_procedure_lineage.py @@ -19,7 +19,6 @@ def generate_procedure_lineage( ) -> Iterable[MetadataChangeProposalWrapper]: if procedure.code: for query in split_statements(procedure.code): - print(query) aggregator.add_observed_query( observed=ObservedQuery( default_db=procedure.db, diff --git a/metadata-ingestion/tests/integration/sql_server/golden_files/procedures/demodata.foo.proc2.json b/metadata-ingestion/tests/integration/sql_server/golden_files/procedures/demodata.foo.proc2.json new file mode 100644 index 0000000000000..49d5ac94cd23e --- /dev/null +++ b/metadata-ingestion/tests/integration/sql_server/golden_files/procedures/demodata.foo.proc2.json @@ -0,0 +1,55 @@ +[ +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,demodata.foo.stored_procedures,PROD),proc2)", + "changeType": "UPSERT", + "aspectName": "dataJobInputOutput", + "aspect": { + "json": { + "inputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.salesreason,PROD)" + ], + "outputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.age_dist,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD)" + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD),age)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.age_dist,PROD),age)" + ], + "confidenceScore": 0.2 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.salesreason,PROD),tempid)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD),tempid)" + ], + "confidenceScore": 0.2 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.salesreason,PROD),name)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD),name)" + ], + "confidenceScore": 0.2 + } + ] + } + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/sql_server/procedures/demodata.foo.proc2.sql b/metadata-ingestion/tests/integration/sql_server/procedures/demodata.foo.proc2.sql new file mode 100644 index 0000000000000..d11caff9f0aa9 --- /dev/null +++ b/metadata-ingestion/tests/integration/sql_server/procedures/demodata.foo.proc2.sql @@ -0,0 +1,29 @@ +CREATE PROCEDURE [foo].[proc2] + AS + BEGIN + --insert into items table from salesreason table + insert into foo.items (id, itemame) + SELECT tempid, name + FROM foo.salesreason; + + + IF OBJECT_ID('foo.age_dist', 'U') IS NULL + + BEGIN + -- Create and populate if table doesn't exist + SELECT age, COUNT(*) as count + INTO foo.age_dist + FROM foo.persons + GROUP BY age + END + ELSE + BEGIN + -- Update existing table + TRUNCATE TABLE foo.age_dist; + + INSERT INTO foo.age_dist (age, count) + SELECT age, COUNT(*) as count + FROM foo.persons + GROUP BY age + END + END \ No newline at end of file From ac010032a9b612e8f406d493ad75d7202ee7ccd8 Mon Sep 17 00:00:00 2001 From: Mayuri N Date: Thu, 21 Nov 2024 19:58:57 +0530 Subject: [PATCH 05/13] fix lint --- metadata-ingestion/src/datahub/sql_parsing/datajob.py | 2 +- .../src/datahub/sql_parsing/split_statements.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/metadata-ingestion/src/datahub/sql_parsing/datajob.py b/metadata-ingestion/src/datahub/sql_parsing/datajob.py index 7b638521f8491..781a44983d7e0 100644 --- a/metadata-ingestion/src/datahub/sql_parsing/datajob.py +++ b/metadata-ingestion/src/datahub/sql_parsing/datajob.py @@ -2,9 +2,9 @@ from typing import Iterable, List from datahub.emitter.mcp import MetadataChangeProposalWrapper -from datahub.metadata._schema_classes import FineGrainedLineageClass from datahub.metadata.schema_classes import ( DataJobInputOutputClass, + FineGrainedLineageClass, UpstreamLineageClass, ) diff --git a/metadata-ingestion/src/datahub/sql_parsing/split_statements.py b/metadata-ingestion/src/datahub/sql_parsing/split_statements.py index ec7f5c20adba6..5f8e3526bd409 100644 --- a/metadata-ingestion/src/datahub/sql_parsing/split_statements.py +++ b/metadata-ingestion/src/datahub/sql_parsing/split_statements.py @@ -1,6 +1,6 @@ import re from enum import Enum -from typing import Generator, List +from typing import Generator, List, Tuple CONTROL_FLOW_KEYWORDS = [ "GO", @@ -49,7 +49,7 @@ def is_keyword_at_position(sql: str, pos: int, keyword: str) -> bool: def look_ahead_for_keywords( sql: str, pos: int, keywords: List[str] -) -> tuple[bool, str, int]: +) -> Tuple[bool, str, int]: """ Look ahead for SQL keywords at the current position. """ From 397748e136ef64504049453892f7a37040f09ad5 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Thu, 21 Nov 2024 12:30:15 -0800 Subject: [PATCH 06/13] tweak tests --- .../tests/integration/sql_server/setup/setup.sql | 6 ++++++ .../integration/sql_server/test_sql_server.py | 15 ++++++--------- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/metadata-ingestion/tests/integration/sql_server/setup/setup.sql b/metadata-ingestion/tests/integration/sql_server/setup/setup.sql index 8e6746684d142..d31bd9876f004 100644 --- a/metadata-ingestion/tests/integration/sql_server/setup/setup.sql +++ b/metadata-ingestion/tests/integration/sql_server/setup/setup.sql @@ -1,3 +1,4 @@ +DROP DATABASE IF EXISTS NewData; CREATE DATABASE NewData; GO USE NewData; @@ -21,6 +22,7 @@ FROM FooNew.PersonsNew WHERE Age > 18 GO +DROP DATABASE IF EXISTS DemoData; CREATE DATABASE DemoData; GO USE DemoData; @@ -51,11 +53,15 @@ CREATE TABLE Foo.SalesReason ) ; GO +DROP PROCEDURE IF EXISTS [Foo].[Proc.With.SpecialChar]; +GO CREATE PROCEDURE [Foo].[Proc.With.SpecialChar] @ID INT AS SELECT @ID AS ThatDB; GO +DROP PROCEDURE IF EXISTS [Foo].[NewProc]; +GO CREATE PROCEDURE [Foo].[NewProc] AS BEGIN diff --git a/metadata-ingestion/tests/integration/sql_server/test_sql_server.py b/metadata-ingestion/tests/integration/sql_server/test_sql_server.py index 994c47ac06be6..d3b266a9d29cf 100644 --- a/metadata-ingestion/tests/integration/sql_server/test_sql_server.py +++ b/metadata-ingestion/tests/integration/sql_server/test_sql_server.py @@ -1,4 +1,5 @@ import os +import pathlib import subprocess import time from pathlib import Path @@ -65,21 +66,17 @@ def test_mssql_ingest(mssql_runner, pytestconfig, tmp_path, mock_time, config_fi ) -PROCEDURE_SQLS_FOLDER = "./tests/integration/sql_server/procedures" -procedure_sqls = os.listdir(PROCEDURE_SQLS_FOLDER) +PROCEDURE_SQLS_DIR = pathlib.Path(__file__).parent / "procedures" +PROCEDURES_GOLDEN_DIR = pathlib.Path(__file__).parent / "golden_files/procedures/" +procedure_sqls = [sql_file.name for sql_file in PROCEDURE_SQLS_DIR.iterdir()] @pytest.mark.parametrize("procedure_sql_file", procedure_sqls) @pytest.mark.integration def test_stored_procedure_lineage(pytestconfig, procedure_sql_file): - - sql_file_path = Path(f"{PROCEDURE_SQLS_FOLDER}/{procedure_sql_file}").resolve() + sql_file_path = Path(f"{PROCEDURE_SQLS_DIR}/{procedure_sql_file}").resolve() procedure_code = Path(sql_file_path).read_text() - RESOURCE_DIR = ( - pytestconfig.rootpath / "tests/integration/sql_server/golden_files/procedures/" - ) - # Procedure file is named as .. splits = procedure_sql_file.split(".") db = splits[0] @@ -115,6 +112,6 @@ def test_stored_procedure_lineage(pytestconfig, procedure_sql_file): mce_helpers.check_goldens_stream( pytestconfig, outputs=mcps, - golden_path=RESOURCE_DIR + golden_path=PROCEDURES_GOLDEN_DIR / Path(procedure_sql_file).name.replace(".sql", ".json"), ) From dda71cd02110443672cf2649826935198fe3aac9 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Thu, 21 Nov 2024 17:17:31 -0800 Subject: [PATCH 07/13] minor cleanups --- .../sql/mssql/stored_procedure_lineage.py | 65 ++++++++++++++----- .../src/datahub/sql_parsing/datajob.py | 45 ++++++------- .../src/datahub/sql_parsing/query_types.py | 11 +++- .../datahub/sql_parsing/split_statements.py | 11 ++-- 4 files changed, 85 insertions(+), 47 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/stored_procedure_lineage.py b/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/stored_procedure_lineage.py index 3287b022ab3a8..c86acace8cf1e 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/stored_procedure_lineage.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/stored_procedure_lineage.py @@ -1,8 +1,10 @@ -from typing import Iterable +from typing import Iterable, Optional from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.ingestion.source.sql.mssql.job_models import StoredProcedure -from datahub.sql_parsing.datajob import to_job_lineage +from datahub.metadata.schema_classes import DataJobInputOutputClass +from datahub.sql_parsing.datajob import to_datajob_input_output +from datahub.sql_parsing.schema_resolver import SchemaResolver from datahub.sql_parsing.split_statements import split_statements from datahub.sql_parsing.sql_parsing_aggregator import ( ObservedQuery, @@ -10,6 +12,41 @@ ) +def parse_procedure_code( + *, + schema_resolver: SchemaResolver, + default_db: Optional[str], + default_schema: Optional[str], + code: str, +) -> Optional[DataJobInputOutputClass]: + aggregator = SqlParsingAggregator( + platform=schema_resolver.platform, + env=schema_resolver.env, + schema_resolver=schema_resolver, + generate_lineage=True, + generate_queries=False, + generate_usage_statistics=False, + generate_operations=False, + generate_query_subject_fields=False, + generate_query_usage_statistics=False, + ) + for query in split_statements(code): + # TODO: We should take into account `USE x` statements. + aggregator.add_observed_query( + observed=ObservedQuery( + default_db=default_db, + default_schema=default_schema, + query=query, + ) + ) + + mcps = list(aggregator.gen_metadata()) + return to_datajob_input_output( + mcps=mcps, + ignore_extra_mcps=True, + ) + + # Is procedure handling generic enough to be added to SqlParsingAggregator? def generate_procedure_lineage( *, @@ -18,17 +55,15 @@ def generate_procedure_lineage( procedure_job_urn: str, ) -> Iterable[MetadataChangeProposalWrapper]: if procedure.code: - for query in split_statements(procedure.code): - aggregator.add_observed_query( - observed=ObservedQuery( - default_db=procedure.db, - default_schema=procedure.schema, - query=query, - ) - ) - mcps = list(aggregator.gen_metadata()) - yield from to_job_lineage( - job_urn=procedure_job_urn, - mcps=mcps, - ignore_extra_mcps=True, + datajob_input_output = parse_procedure_code( + schema_resolver=aggregator._schema_resolver, + default_db=procedure.db, + default_schema=procedure.schema, + code=procedure.code, ) + + if datajob_input_output: + yield MetadataChangeProposalWrapper( + entityUrn=procedure_job_urn, + aspect=datajob_input_output, + ) diff --git a/metadata-ingestion/src/datahub/sql_parsing/datajob.py b/metadata-ingestion/src/datahub/sql_parsing/datajob.py index 781a44983d7e0..215b207c3dcf5 100644 --- a/metadata-ingestion/src/datahub/sql_parsing/datajob.py +++ b/metadata-ingestion/src/datahub/sql_parsing/datajob.py @@ -1,5 +1,5 @@ import logging -from typing import Iterable, List +from typing import Iterable, List, Optional from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.metadata.schema_classes import ( @@ -11,21 +11,17 @@ logger = logging.getLogger(__name__) -def to_job_lineage( - job_urn: str, - mcps: Iterable[MetadataChangeProposalWrapper], - ignore_extra_mcps: bool = True, -) -> Iterable[MetadataChangeProposalWrapper]: +def to_datajob_input_output( + *, mcps: Iterable[MetadataChangeProposalWrapper], ignore_extra_mcps: bool = True +) -> Optional[DataJobInputOutputClass]: inputDatasets: List[str] = [] outputDatasets: List[str] = [] fineGrainedLineages: List[FineGrainedLineageClass] = [] for mcp in mcps: - - # TODO: Simple write operations without lineage as outputDatasets + # TODO: Represent simple write operations without lineage as outputDatasets. upstream_lineage = mcp.as_workunit().get_aspect_of_type(UpstreamLineageClass) if upstream_lineage is not None: - if mcp.entityUrn and mcp.entityUrn not in outputDatasets: outputDatasets.append(mcp.entityUrn) @@ -37,21 +33,18 @@ def to_job_lineage( for fineGrainedLineage in upstream_lineage.fineGrainedLineages: fineGrainedLineages.append(fineGrainedLineage) + elif ignore_extra_mcps: + pass else: - if ignore_extra_mcps: - logger.warning( - f"Ignoring mcp {mcp.entityUrn}-{mcp.aspectName} with no lineage" - ) - else: - yield mcp - - if inputDatasets or outputDatasets: - # we have job lineage - yield MetadataChangeProposalWrapper( - entityUrn=job_urn, - aspect=DataJobInputOutputClass( - inputDatasets=inputDatasets, - outputDatasets=outputDatasets, - fineGrainedLineages=fineGrainedLineages, - ), - ) + raise ValueError( + f"Expected an upstreamLineage aspect, got {mcp.aspectName} for {mcp.entityUrn}" + ) + + if not inputDatasets and not outputDatasets: + return None + + return DataJobInputOutputClass( + inputDatasets=inputDatasets, + outputDatasets=outputDatasets, + fineGrainedLineages=fineGrainedLineages, + ) diff --git a/metadata-ingestion/src/datahub/sql_parsing/query_types.py b/metadata-ingestion/src/datahub/sql_parsing/query_types.py index 2acad19418c11..802fb3e993f42 100644 --- a/metadata-ingestion/src/datahub/sql_parsing/query_types.py +++ b/metadata-ingestion/src/datahub/sql_parsing/query_types.py @@ -14,7 +14,16 @@ def _is_temp_table(table: sqlglot.exp.Table, dialect: sqlglot.Dialect) -> bool: identifier: sqlglot.exp.Identifier = table.this return identifier.args.get("temporary") or ( - is_dialect_instance(dialect, "redshift") and identifier.name.startswith("#") + # These dialects use # as a prefix for temp tables. + is_dialect_instance( + dialect, + [ + "redshift", + "mssql", + # sybase is another one, but we don't support that dialect yet. + ], + ) + and identifier.name.startswith("#") ) diff --git a/metadata-ingestion/src/datahub/sql_parsing/split_statements.py b/metadata-ingestion/src/datahub/sql_parsing/split_statements.py index 5f8e3526bd409..64caa6fffdf15 100644 --- a/metadata-ingestion/src/datahub/sql_parsing/split_statements.py +++ b/metadata-ingestion/src/datahub/sql_parsing/split_statements.py @@ -28,7 +28,7 @@ class ParserState(Enum): MULTILINE_COMMENT = 4 -def is_keyword_at_position(sql: str, pos: int, keyword: str) -> bool: +def _is_keyword_at_position(sql: str, pos: int, keyword: str) -> bool: """ Check if a keyword exists at the given position using regex word boundaries. """ @@ -47,7 +47,7 @@ def is_keyword_at_position(sql: str, pos: int, keyword: str) -> bool: return bool(match) -def look_ahead_for_keywords( +def _look_ahead_for_keywords( sql: str, pos: int, keywords: List[str] ) -> Tuple[bool, str, int]: """ @@ -55,7 +55,7 @@ def look_ahead_for_keywords( """ for keyword in keywords: - if is_keyword_at_position(sql, pos, keyword): + if _is_keyword_at_position(sql, pos, keyword): return True, keyword, len(keyword) return False, "", 0 @@ -96,7 +96,7 @@ def yield_if_complete() -> Generator[str, None, None]: current_statement.append(next_char) i += 1 else: - is_control_keyword, keyword, keyword_len = look_ahead_for_keywords( + is_control_keyword, keyword, keyword_len = _look_ahead_for_keywords( sql, i, keywords=CONTROL_FLOW_KEYWORDS ) if is_control_keyword: @@ -111,7 +111,7 @@ def yield_if_complete() -> Generator[str, None, None]: is_force_new_statement_keyword, keyword, keyword_len, - ) = look_ahead_for_keywords( + ) = _look_ahead_for_keywords( sql, i, keywords=FORCE_NEW_STATEMENT_KEYWORDS ) if is_force_new_statement_keyword: @@ -157,6 +157,7 @@ def yield_if_complete() -> Generator[str, None, None]: # Example usage and test if __name__ == "__main__": + # TODO: Move this to a test. test_sql = """ CREATE TABLE Users (Id INT); -- Comment here From 85b6acdd397f48a5676ad3512296503b95213e00 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Thu, 21 Nov 2024 17:25:23 -0800 Subject: [PATCH 08/13] make more cll work --- .../golden_mces_mssql_no_db_to_file.json | 452 ++- .../golden_mces_mssql_no_db_with_filter.json | 234 +- .../golden_mces_mssql_to_file.json | 234 +- ...golden_mces_mssql_with_lower_case_urn.json | 2634 ++++++++++++++++- .../procedures/DemoData.Foo.NewProc.json | 36 +- 5 files changed, 3490 insertions(+), 100 deletions(-) diff --git a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_to_file.json b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_to_file.json index 4302c41140dc6..225c0db8b9218 100644 --- a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_to_file.json +++ b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_to_file.json @@ -113,11 +113,11 @@ "aspect": { "json": { "customProperties": { - "job_id": "01afcab8-187c-459f-828e-727196a1832d", + "job_id": "cd483357-fcfc-4369-b80e-171f41298a64", "job_name": "Weekly Demo Data Backup", "description": "No description available.", - "date_created": "2024-11-21 21:01:26.550000", - "date_modified": "2024-11-21 21:01:26.690000", + "date_created": "2024-11-22 01:18:57.383000", + "date_modified": "2024-11-22 01:18:57.690000", "step_id": "1", "step_name": "Set database to read only", "subsystem": "TSQL", @@ -1496,6 +1496,138 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.age_dist,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.age_dist,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": {}, + "name": "age_dist", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "DemoData.Foo.age_dist", + "platform": "urn:li:dataPlatform:mssql", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "Age", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "Count", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER", + "recursive": false, + "isPartOfKey": false + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.age_dist,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.age_dist,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", + "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + }, + { + "id": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9", + "urn": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.Items,PROD)", @@ -2150,8 +2282,8 @@ "code": "CREATE PROCEDURE [Foo].[Proc.With.SpecialChar] @ID INT\nAS\n SELECT @ID AS ThatDB;\n", "input parameters": "['@ID']", "parameter @ID": "{'type': 'int'}", - "date_created": "2024-11-21 21:01:26.483000", - "date_modified": "2024-11-21 21:01:26.483000" + "date_created": "2024-11-22 01:18:57.203000", + "date_modified": "2024-11-22 01:18:57.203000" }, "externalUrl": "", "name": "DemoData.Foo.Proc.With.SpecialChar", @@ -2168,14 +2300,82 @@ }, { "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),Proc.With.SpecialChar)", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),NewProc)", "changeType": "UPSERT", "aspectName": "dataJobInputOutput", "aspect": { "json": { - "inputDatasets": [], - "outputDatasets": [], - "inputDatajobs": [] + "inputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.salesreason,PROD)" + ], + "outputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.age_dist,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD)" + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD),age)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.age_dist,PROD),age)" + ], + "confidenceScore": 0.2 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.salesreason,PROD),tempid)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD),tempid)" + ], + "confidenceScore": 0.2 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.salesreason,PROD),name)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD),name)" + ], + "confidenceScore": 0.2 + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),NewProc)", + "changeType": "UPSERT", + "aspectName": "dataJobInfo", + "aspect": { + "json": { + "customProperties": { + "procedure_depends_on": "{'DemoData.Foo.age_dist': 'USER_TABLE', 'DemoData.Foo.Items': 'USER_TABLE', 'DemoData.Foo.Persons': 'USER_TABLE', 'DemoData.Foo.SalesReason': 'USER_TABLE'}", + "depending_on_procedure": "{}", + "code": "CREATE PROCEDURE [Foo].[NewProc]\n AS\n BEGIN\n --insert into items table from salesreason table\n insert into Foo.Items (ID, ItemName)\n SELECT TempID, Name\n FROM Foo.SalesReason;\n\n\n IF OBJECT_ID('Foo.age_dist', 'U') IS\n NULL\n\n BEGIN\n -- Create and populate if table doesn't exist\n SELECT Age, COUNT(*) as Count\n INTO Foo.age_dist\n FROM Foo.Persons\n GROUP BY Age\n END\n ELSE\n BEGIN\n \n-- Update existing table\n TRUNCATE TABLE Foo.age_dist;\n\n INSERT INTO Foo.age_dist (Age, Count)\n SELECT Age, COUNT(*) as Count\n FROM Foo.Persons\n GROUP BY Age\n END\n END\n", + "input parameters": "[]", + "date_created": "2024-11-22 01:18:57.207000", + "date_modified": "2024-11-22 01:18:57.207000" + }, + "externalUrl": "", + "name": "DemoData.Foo.NewProc", + "type": { + "string": "MSSQL_STORED_PROCEDURE" + } } }, "systemMetadata": { @@ -4256,6 +4456,159 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,NewData.FooNew.View1,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:f721da08adde46586c0f113287cb60d1" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:mssql,NewData.FooNew.View1,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "view_definition": "CREATE VIEW FooNew.View1 AS\nSELECT LastName, FirstName\nFROM FooNew.PersonsNew\nWHERE Age > 18\n", + "is_view": "True" + }, + "name": "View1", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "NewData.FooNew.View1", + "platform": "urn:li:dataPlatform:mssql", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "LastName", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255) COLLATE SQL_Latin1_General_CP1_CI_AS", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "FirstName", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255) COLLATE SQL_Latin1_General_CP1_CI_AS", + "recursive": false, + "isPartOfKey": false + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,NewData.FooNew.View1,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "View" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,NewData.FooNew.View1,PROD)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "CREATE VIEW FooNew.View1 AS\nSELECT LastName, FirstName\nFROM FooNew.PersonsNew\nWHERE Age > 18\n", + "viewLanguage": "SQL" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,NewData.FooNew.View1,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + }, + { + "id": "urn:li:container:f721da08adde46586c0f113287cb60d1", + "urn": "urn:li:container:f721da08adde46586c0f113287cb60d1" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "container", "entityUrn": "urn:li:container:f3cb304e29e178d0615ed5ee6aa4ad58", @@ -4611,6 +4964,55 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.view1,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.personsnew,PROD)", + "type": "VIEW" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.personsnew,PROD),firstname)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.view1,PROD),firstname)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.personsnew,PROD),lastname)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.view1,PROD),lastname)" + ], + "confidenceScore": 1.0 + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataFlow", "entityUrn": "urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD)", @@ -4643,6 +5045,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),NewProc)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataJob", "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),Proc.With.SpecialChar)", @@ -4690,5 +5108,21 @@ "runId": "mssql-test", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.view1,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_with_filter.json b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_with_filter.json index 0a50556edc638..690cc14d52e4b 100644 --- a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_with_filter.json +++ b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_with_filter.json @@ -113,11 +113,11 @@ "aspect": { "json": { "customProperties": { - "job_id": "01afcab8-187c-459f-828e-727196a1832d", + "job_id": "cd483357-fcfc-4369-b80e-171f41298a64", "job_name": "Weekly Demo Data Backup", "description": "No description available.", - "date_created": "2024-11-21 21:01:26.550000", - "date_modified": "2024-11-21 21:01:26.690000", + "date_created": "2024-11-22 01:18:57.383000", + "date_modified": "2024-11-22 01:18:57.690000", "step_id": "1", "step_name": "Set database to read only", "subsystem": "TSQL", @@ -1496,6 +1496,138 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.age_dist,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.age_dist,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": {}, + "name": "age_dist", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "DemoData.Foo.age_dist", + "platform": "urn:li:dataPlatform:mssql", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "Age", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "Count", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER", + "recursive": false, + "isPartOfKey": false + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.age_dist,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.age_dist,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", + "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + }, + { + "id": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9", + "urn": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.Items,PROD)", @@ -2150,8 +2282,8 @@ "code": "CREATE PROCEDURE [Foo].[Proc.With.SpecialChar] @ID INT\nAS\n SELECT @ID AS ThatDB;\n", "input parameters": "['@ID']", "parameter @ID": "{'type': 'int'}", - "date_created": "2024-11-21 21:01:26.483000", - "date_modified": "2024-11-21 21:01:26.483000" + "date_created": "2024-11-22 01:18:57.203000", + "date_modified": "2024-11-22 01:18:57.203000" }, "externalUrl": "", "name": "DemoData.Foo.Proc.With.SpecialChar", @@ -2168,14 +2300,82 @@ }, { "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),Proc.With.SpecialChar)", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),NewProc)", "changeType": "UPSERT", "aspectName": "dataJobInputOutput", "aspect": { "json": { - "inputDatasets": [], - "outputDatasets": [], - "inputDatajobs": [] + "inputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.salesreason,PROD)" + ], + "outputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.age_dist,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD)" + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD),age)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.age_dist,PROD),age)" + ], + "confidenceScore": 0.2 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.salesreason,PROD),tempid)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD),tempid)" + ], + "confidenceScore": 0.2 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.salesreason,PROD),name)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD),name)" + ], + "confidenceScore": 0.2 + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),NewProc)", + "changeType": "UPSERT", + "aspectName": "dataJobInfo", + "aspect": { + "json": { + "customProperties": { + "procedure_depends_on": "{'DemoData.Foo.age_dist': 'USER_TABLE', 'DemoData.Foo.Items': 'USER_TABLE', 'DemoData.Foo.Persons': 'USER_TABLE', 'DemoData.Foo.SalesReason': 'USER_TABLE'}", + "depending_on_procedure": "{}", + "code": "CREATE PROCEDURE [Foo].[NewProc]\n AS\n BEGIN\n --insert into items table from salesreason table\n insert into Foo.Items (ID, ItemName)\n SELECT TempID, Name\n FROM Foo.SalesReason;\n\n\n IF OBJECT_ID('Foo.age_dist', 'U') IS\n NULL\n\n BEGIN\n -- Create and populate if table doesn't exist\n SELECT Age, COUNT(*) as Count\n INTO Foo.age_dist\n FROM Foo.Persons\n GROUP BY Age\n END\n ELSE\n BEGIN\n \n-- Update existing table\n TRUNCATE TABLE Foo.age_dist;\n\n INSERT INTO Foo.age_dist (Age, Count)\n SELECT Age, COUNT(*) as Count\n FROM Foo.Persons\n GROUP BY Age\n END\n END\n", + "input parameters": "[]", + "date_created": "2024-11-22 01:18:57.207000", + "date_modified": "2024-11-22 01:18:57.207000" + }, + "externalUrl": "", + "name": "DemoData.Foo.NewProc", + "type": { + "string": "MSSQL_STORED_PROCEDURE" + } } }, "systemMetadata": { @@ -2571,6 +2771,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),NewProc)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataJob", "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),Proc.With.SpecialChar)", diff --git a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_to_file.json b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_to_file.json index 0a50556edc638..690cc14d52e4b 100644 --- a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_to_file.json +++ b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_to_file.json @@ -113,11 +113,11 @@ "aspect": { "json": { "customProperties": { - "job_id": "01afcab8-187c-459f-828e-727196a1832d", + "job_id": "cd483357-fcfc-4369-b80e-171f41298a64", "job_name": "Weekly Demo Data Backup", "description": "No description available.", - "date_created": "2024-11-21 21:01:26.550000", - "date_modified": "2024-11-21 21:01:26.690000", + "date_created": "2024-11-22 01:18:57.383000", + "date_modified": "2024-11-22 01:18:57.690000", "step_id": "1", "step_name": "Set database to read only", "subsystem": "TSQL", @@ -1496,6 +1496,138 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.age_dist,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.age_dist,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": {}, + "name": "age_dist", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "DemoData.Foo.age_dist", + "platform": "urn:li:dataPlatform:mssql", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "Age", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "Count", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER", + "recursive": false, + "isPartOfKey": false + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.age_dist,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.age_dist,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", + "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + }, + { + "id": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9", + "urn": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.Items,PROD)", @@ -2150,8 +2282,8 @@ "code": "CREATE PROCEDURE [Foo].[Proc.With.SpecialChar] @ID INT\nAS\n SELECT @ID AS ThatDB;\n", "input parameters": "['@ID']", "parameter @ID": "{'type': 'int'}", - "date_created": "2024-11-21 21:01:26.483000", - "date_modified": "2024-11-21 21:01:26.483000" + "date_created": "2024-11-22 01:18:57.203000", + "date_modified": "2024-11-22 01:18:57.203000" }, "externalUrl": "", "name": "DemoData.Foo.Proc.With.SpecialChar", @@ -2168,14 +2300,82 @@ }, { "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),Proc.With.SpecialChar)", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),NewProc)", "changeType": "UPSERT", "aspectName": "dataJobInputOutput", "aspect": { "json": { - "inputDatasets": [], - "outputDatasets": [], - "inputDatajobs": [] + "inputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.salesreason,PROD)" + ], + "outputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.age_dist,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD)" + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD),age)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.age_dist,PROD),age)" + ], + "confidenceScore": 0.2 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.salesreason,PROD),tempid)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD),tempid)" + ], + "confidenceScore": 0.2 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.salesreason,PROD),name)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD),name)" + ], + "confidenceScore": 0.2 + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),NewProc)", + "changeType": "UPSERT", + "aspectName": "dataJobInfo", + "aspect": { + "json": { + "customProperties": { + "procedure_depends_on": "{'DemoData.Foo.age_dist': 'USER_TABLE', 'DemoData.Foo.Items': 'USER_TABLE', 'DemoData.Foo.Persons': 'USER_TABLE', 'DemoData.Foo.SalesReason': 'USER_TABLE'}", + "depending_on_procedure": "{}", + "code": "CREATE PROCEDURE [Foo].[NewProc]\n AS\n BEGIN\n --insert into items table from salesreason table\n insert into Foo.Items (ID, ItemName)\n SELECT TempID, Name\n FROM Foo.SalesReason;\n\n\n IF OBJECT_ID('Foo.age_dist', 'U') IS\n NULL\n\n BEGIN\n -- Create and populate if table doesn't exist\n SELECT Age, COUNT(*) as Count\n INTO Foo.age_dist\n FROM Foo.Persons\n GROUP BY Age\n END\n ELSE\n BEGIN\n \n-- Update existing table\n TRUNCATE TABLE Foo.age_dist;\n\n INSERT INTO Foo.age_dist (Age, Count)\n SELECT Age, COUNT(*) as Count\n FROM Foo.Persons\n GROUP BY Age\n END\n END\n", + "input parameters": "[]", + "date_created": "2024-11-22 01:18:57.207000", + "date_modified": "2024-11-22 01:18:57.207000" + }, + "externalUrl": "", + "name": "DemoData.Foo.NewProc", + "type": { + "string": "MSSQL_STORED_PROCEDURE" + } } }, "systemMetadata": { @@ -2571,6 +2771,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),NewProc)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataJob", "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),Proc.With.SpecialChar)", diff --git a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_with_lower_case_urn.json b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_with_lower_case_urn.json index 0279a94084ce5..cfd358d44829a 100644 --- a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_with_lower_case_urn.json +++ b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_with_lower_case_urn.json @@ -113,11 +113,11 @@ "aspect": { "json": { "customProperties": { - "job_id": "01afcab8-187c-459f-828e-727196a1832d", + "job_id": "cd483357-fcfc-4369-b80e-171f41298a64", "job_name": "Weekly Demo Data Backup", "description": "No description available.", - "date_created": "2024-11-21 21:01:26.550000", - "date_modified": "2024-11-21 21:01:26.690000", + "date_created": "2024-11-22 01:18:57.383000", + "date_modified": "2024-11-22 01:18:57.690000", "step_id": "1", "step_name": "Set database to read only", "subsystem": "TSQL", @@ -1496,6 +1496,138 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.age_dist,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.age_dist,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": {}, + "name": "age_dist", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "demodata.foo.age_dist", + "platform": "urn:li:dataPlatform:mssql", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "Age", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "Count", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER", + "recursive": false, + "isPartOfKey": false + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.age_dist,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.age_dist,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:b275b7c099ce32f3faf1817cb054b100", + "urn": "urn:li:container:b275b7c099ce32f3faf1817cb054b100" + }, + { + "id": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9", + "urn": "urn:li:container:046d11ae7c0bc9bde45993041ac011c9" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD)", @@ -2150,8 +2282,8 @@ "code": "CREATE PROCEDURE [Foo].[Proc.With.SpecialChar] @ID INT\nAS\n SELECT @ID AS ThatDB;\n", "input parameters": "['@ID']", "parameter @ID": "{'type': 'int'}", - "date_created": "2024-11-21 21:01:26.483000", - "date_modified": "2024-11-21 21:01:26.483000" + "date_created": "2024-11-22 01:18:57.203000", + "date_modified": "2024-11-22 01:18:57.203000" }, "externalUrl": "", "name": "DemoData.Foo.Proc.With.SpecialChar", @@ -2168,14 +2300,82 @@ }, { "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),Proc.With.SpecialChar)", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),NewProc)", "changeType": "UPSERT", "aspectName": "dataJobInputOutput", "aspect": { "json": { - "inputDatasets": [], - "outputDatasets": [], - "inputDatajobs": [] + "inputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.salesreason,PROD)" + ], + "outputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.age_dist,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD)" + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD),Age)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.age_dist,PROD),Age)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.salesreason,PROD),TempID)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD),tempid)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.salesreason,PROD),Name)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD),name)" + ], + "confidenceScore": 0.9 + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),NewProc)", + "changeType": "UPSERT", + "aspectName": "dataJobInfo", + "aspect": { + "json": { + "customProperties": { + "procedure_depends_on": "{'DemoData.Foo.age_dist': 'USER_TABLE', 'DemoData.Foo.Items': 'USER_TABLE', 'DemoData.Foo.Persons': 'USER_TABLE', 'DemoData.Foo.SalesReason': 'USER_TABLE'}", + "depending_on_procedure": "{}", + "code": "CREATE PROCEDURE [Foo].[NewProc]\n AS\n BEGIN\n --insert into items table from salesreason table\n insert into Foo.Items (ID, ItemName)\n SELECT TempID, Name\n FROM Foo.SalesReason;\n\n\n IF OBJECT_ID('Foo.age_dist', 'U') IS\n NULL\n\n BEGIN\n -- Create and populate if table doesn't exist\n SELECT Age, COUNT(*) as Count\n INTO Foo.age_dist\n FROM Foo.Persons\n GROUP BY Age\n END\n ELSE\n BEGIN\n \n-- Update existing table\n TRUNCATE TABLE Foo.age_dist;\n\n INSERT INTO Foo.age_dist (Age, Count)\n SELECT Age, COUNT(*) as Count\n FROM Foo.Persons\n GROUP BY Age\n END\n END\n", + "input parameters": "[]", + "date_created": "2024-11-22 01:18:57.207000", + "date_modified": "2024-11-22 01:18:57.207000" + }, + "externalUrl": "", + "name": "DemoData.Foo.NewProc", + "type": { + "string": "MSSQL_STORED_PROCEDURE" + } } }, "systemMetadata": { @@ -2515,68 +2715,19 @@ } }, { - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.personsview,PROD)", + "entityType": "container", + "entityUrn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", "changeType": "UPSERT", - "aspectName": "upstreamLineage", + "aspectName": "containerProperties", "aspect": { "json": { - "upstreams": [ - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD)", - "type": "VIEW" - } - ], - "fineGrainedLineages": [ - { - "upstreamType": "FIELD_SET", - "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD),Age)" - ], - "downstreamType": "FIELD", - "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.personsview,PROD),Age)" - ], - "confidenceScore": 1.0 - }, - { - "upstreamType": "FIELD_SET", - "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD),FirstName)" - ], - "downstreamType": "FIELD", - "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.personsview,PROD),FirstName)" - ], - "confidenceScore": 1.0 - }, - { - "upstreamType": "FIELD_SET", - "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD),ID)" - ], - "downstreamType": "FIELD", - "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.personsview,PROD),ID)" - ], - "confidenceScore": 1.0 - }, - { - "upstreamType": "FIELD_SET", - "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD),LastName)" - ], - "downstreamType": "FIELD", - "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.personsview,PROD),LastName)" - ], - "confidenceScore": 1.0 - } - ] + "customProperties": { + "platform": "mssql", + "env": "PROD", + "database": "NewData" + }, + "name": "NewData", + "env": "PROD" } }, "systemMetadata": { @@ -2586,8 +2737,8 @@ } }, { - "entityType": "dataFlow", - "entityUrn": "urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD)", + "entityType": "container", + "entityUrn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -2602,8 +2753,2347 @@ } }, { - "entityType": "dataFlow", - "entityUrn": "urn:li:dataFlow:(mssql,Weekly Demo Data Backup,PROD)", + "entityType": "container", + "entityUrn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Database" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:8b7691fec458d7383d5bc4e213831375", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:8b7691fec458d7383d5bc4e213831375", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "mssql", + "env": "PROD", + "database": "NewData", + "schema": "db_accessadmin" + }, + "name": "db_accessadmin", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:8b7691fec458d7383d5bc4e213831375", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:8b7691fec458d7383d5bc4e213831375", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:8b7691fec458d7383d5bc4e213831375", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:8b7691fec458d7383d5bc4e213831375", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:523d13eddd725607ec835a2459b05c9c", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:523d13eddd725607ec835a2459b05c9c", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "mssql", + "env": "PROD", + "database": "NewData", + "schema": "db_backupoperator" + }, + "name": "db_backupoperator", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:523d13eddd725607ec835a2459b05c9c", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:523d13eddd725607ec835a2459b05c9c", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:523d13eddd725607ec835a2459b05c9c", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:523d13eddd725607ec835a2459b05c9c", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:29bd421b2225a415df9c750e77404c66", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:29bd421b2225a415df9c750e77404c66", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "mssql", + "env": "PROD", + "database": "NewData", + "schema": "db_datareader" + }, + "name": "db_datareader", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:29bd421b2225a415df9c750e77404c66", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:29bd421b2225a415df9c750e77404c66", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:29bd421b2225a415df9c750e77404c66", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:29bd421b2225a415df9c750e77404c66", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:a3c02df4bcc7280a89f539b793b04197", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:a3c02df4bcc7280a89f539b793b04197", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "mssql", + "env": "PROD", + "database": "NewData", + "schema": "db_datawriter" + }, + "name": "db_datawriter", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:a3c02df4bcc7280a89f539b793b04197", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:a3c02df4bcc7280a89f539b793b04197", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:a3c02df4bcc7280a89f539b793b04197", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:a3c02df4bcc7280a89f539b793b04197", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:c3b5d1cdc69a7d8faf0e1981e89b89d1", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:c3b5d1cdc69a7d8faf0e1981e89b89d1", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "mssql", + "env": "PROD", + "database": "NewData", + "schema": "db_ddladmin" + }, + "name": "db_ddladmin", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:c3b5d1cdc69a7d8faf0e1981e89b89d1", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:c3b5d1cdc69a7d8faf0e1981e89b89d1", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:c3b5d1cdc69a7d8faf0e1981e89b89d1", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:c3b5d1cdc69a7d8faf0e1981e89b89d1", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:2b937d85ae7545dc769766008a332f42", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:2b937d85ae7545dc769766008a332f42", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "mssql", + "env": "PROD", + "database": "NewData", + "schema": "db_denydatareader" + }, + "name": "db_denydatareader", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:2b937d85ae7545dc769766008a332f42", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:2b937d85ae7545dc769766008a332f42", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:2b937d85ae7545dc769766008a332f42", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:2b937d85ae7545dc769766008a332f42", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:a399d8bb765028abb9e55ae39846ca5e", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:a399d8bb765028abb9e55ae39846ca5e", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "mssql", + "env": "PROD", + "database": "NewData", + "schema": "db_denydatawriter" + }, + "name": "db_denydatawriter", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:a399d8bb765028abb9e55ae39846ca5e", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:a399d8bb765028abb9e55ae39846ca5e", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:a399d8bb765028abb9e55ae39846ca5e", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:a399d8bb765028abb9e55ae39846ca5e", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:457efe38f0aec2af9ad681cf1b43b1cb", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:457efe38f0aec2af9ad681cf1b43b1cb", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "mssql", + "env": "PROD", + "database": "NewData", + "schema": "db_owner" + }, + "name": "db_owner", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:457efe38f0aec2af9ad681cf1b43b1cb", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:457efe38f0aec2af9ad681cf1b43b1cb", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:457efe38f0aec2af9ad681cf1b43b1cb", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:457efe38f0aec2af9ad681cf1b43b1cb", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:1d87783ffe7e82210365dff4ca8ee7d1", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:1d87783ffe7e82210365dff4ca8ee7d1", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "mssql", + "env": "PROD", + "database": "NewData", + "schema": "db_securityadmin" + }, + "name": "db_securityadmin", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:1d87783ffe7e82210365dff4ca8ee7d1", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:1d87783ffe7e82210365dff4ca8ee7d1", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:1d87783ffe7e82210365dff4ca8ee7d1", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:1d87783ffe7e82210365dff4ca8ee7d1", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:269d0067d130eda0399a534fc787054c", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:269d0067d130eda0399a534fc787054c", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "mssql", + "env": "PROD", + "database": "NewData", + "schema": "dbo" + }, + "name": "dbo", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:269d0067d130eda0399a534fc787054c", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:269d0067d130eda0399a534fc787054c", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:269d0067d130eda0399a534fc787054c", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:269d0067d130eda0399a534fc787054c", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.dbo.productsnew,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:269d0067d130eda0399a534fc787054c" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.dbo.productsnew,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": {}, + "name": "ProductsNew", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "newdata.dbo.productsnew", + "platform": "urn:li:dataPlatform:mssql", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "ID", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "ProductName", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "NVARCHAR(max) COLLATE SQL_Latin1_General_CP1_CI_AS", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "Price", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "MONEY", + "recursive": false, + "isPartOfKey": false + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.dbo.productsnew,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.dbo.productsnew,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + }, + { + "id": "urn:li:container:269d0067d130eda0399a534fc787054c", + "urn": "urn:li:container:269d0067d130eda0399a534fc787054c" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f721da08adde46586c0f113287cb60d1", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f721da08adde46586c0f113287cb60d1", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "mssql", + "env": "PROD", + "database": "NewData", + "schema": "FooNew" + }, + "name": "FooNew", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f721da08adde46586c0f113287cb60d1", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f721da08adde46586c0f113287cb60d1", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f721da08adde46586c0f113287cb60d1", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f721da08adde46586c0f113287cb60d1", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.itemsnew,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:f721da08adde46586c0f113287cb60d1" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.itemsnew,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": {}, + "name": "ItemsNew", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "newdata.foonew.itemsnew", + "platform": "urn:li:dataPlatform:mssql", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "ID", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "ItemName", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "NVARCHAR(max) COLLATE SQL_Latin1_General_CP1_CI_AS", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "Price", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "SMALLMONEY", + "recursive": false, + "isPartOfKey": false + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.itemsnew,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.itemsnew,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + }, + { + "id": "urn:li:container:f721da08adde46586c0f113287cb60d1", + "urn": "urn:li:container:f721da08adde46586c0f113287cb60d1" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.personsnew,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:f721da08adde46586c0f113287cb60d1" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.personsnew,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": {}, + "name": "PersonsNew", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "newdata.foonew.personsnew", + "platform": "urn:li:dataPlatform:mssql", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "ID", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER", + "recursive": false, + "isPartOfKey": true + }, + { + "fieldPath": "LastName", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255) COLLATE SQL_Latin1_General_CP1_CI_AS", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "FirstName", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255) COLLATE SQL_Latin1_General_CP1_CI_AS", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "Age", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER", + "recursive": false, + "isPartOfKey": false + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.personsnew,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.personsnew,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + }, + { + "id": "urn:li:container:f721da08adde46586c0f113287cb60d1", + "urn": "urn:li:container:f721da08adde46586c0f113287cb60d1" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.view1,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:f721da08adde46586c0f113287cb60d1" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.view1,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "view_definition": "CREATE VIEW FooNew.View1 AS\nSELECT LastName, FirstName\nFROM FooNew.PersonsNew\nWHERE Age > 18\n", + "is_view": "True" + }, + "name": "View1", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "newdata.foonew.view1", + "platform": "urn:li:dataPlatform:mssql", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "LastName", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255) COLLATE SQL_Latin1_General_CP1_CI_AS", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "FirstName", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(255) COLLATE SQL_Latin1_General_CP1_CI_AS", + "recursive": false, + "isPartOfKey": false + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.view1,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "View" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.view1,PROD)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "CREATE VIEW FooNew.View1 AS\nSELECT LastName, FirstName\nFROM FooNew.PersonsNew\nWHERE Age > 18\n", + "viewLanguage": "SQL" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.view1,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + }, + { + "id": "urn:li:container:f721da08adde46586c0f113287cb60d1", + "urn": "urn:li:container:f721da08adde46586c0f113287cb60d1" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f3cb304e29e178d0615ed5ee6aa4ad58", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f3cb304e29e178d0615ed5ee6aa4ad58", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "mssql", + "env": "PROD", + "database": "NewData", + "schema": "guest" + }, + "name": "guest", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f3cb304e29e178d0615ed5ee6aa4ad58", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f3cb304e29e178d0615ed5ee6aa4ad58", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f3cb304e29e178d0615ed5ee6aa4ad58", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:f3cb304e29e178d0615ed5ee6aa4ad58", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:752bb2abafeb2dae8f4adc7ffd547780", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:752bb2abafeb2dae8f4adc7ffd547780", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "mssql", + "env": "PROD", + "database": "NewData", + "schema": "INFORMATION_SCHEMA" + }, + "name": "INFORMATION_SCHEMA", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:752bb2abafeb2dae8f4adc7ffd547780", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:752bb2abafeb2dae8f4adc7ffd547780", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:752bb2abafeb2dae8f4adc7ffd547780", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:752bb2abafeb2dae8f4adc7ffd547780", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:46b713e3c7754c51649899f0f284ce34", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:46b713e3c7754c51649899f0f284ce34", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "mssql", + "env": "PROD", + "database": "NewData", + "schema": "sys" + }, + "name": "sys", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:46b713e3c7754c51649899f0f284ce34", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:46b713e3c7754c51649899f0f284ce34", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mssql" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:46b713e3c7754c51649899f0f284ce34", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Schema" + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:46b713e3c7754c51649899f0f284ce34", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:0a12bec9e9271b0db039923a770d75e5", + "urn": "urn:li:container:0a12bec9e9271b0db039923a770d75e5" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.personsview,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD)", + "type": "VIEW" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD),Age)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.personsview,PROD),Age)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD),FirstName)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.personsview,PROD),FirstName)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD),ID)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.personsview,PROD),ID)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD),LastName)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.personsview,PROD),LastName)" + ], + "confidenceScore": 1.0 + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.view1,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.personsnew,PROD)", + "type": "VIEW" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.personsnew,PROD),FirstName)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.view1,PROD),FirstName)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.personsnew,PROD),LastName)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.view1,PROD),LastName)" + ], + "confidenceScore": 1.0 + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(mssql,Weekly Demo Data Backup,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),NewProc)", "changeType": "UPSERT", "aspectName": "status", "aspect": { diff --git a/metadata-ingestion/tests/integration/sql_server/golden_files/procedures/DemoData.Foo.NewProc.json b/metadata-ingestion/tests/integration/sql_server/golden_files/procedures/DemoData.Foo.NewProc.json index 357d00e18cd9c..346a1e49c838b 100644 --- a/metadata-ingestion/tests/integration/sql_server/golden_files/procedures/DemoData.Foo.NewProc.json +++ b/metadata-ingestion/tests/integration/sql_server/golden_files/procedures/DemoData.Foo.NewProc.json @@ -14,7 +14,41 @@ "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.age_dist,PROD)", "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD)" ], - "fineGrainedLineages": [] + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD),age)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.age_dist,PROD),age)" + ], + "confidenceScore": 0.2 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.salesreason,PROD),tempid)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD),tempid)" + ], + "confidenceScore": 0.2 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.salesreason,PROD),name)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD),name)" + ], + "confidenceScore": 0.2 + } + ] } } } From 33858623c9c4826d2315eb0d0cc89b9d2a93259b Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Thu, 21 Nov 2024 17:43:02 -0800 Subject: [PATCH 09/13] remove aggregator code --- .../ingestion/source/sql/mssql/source.py | 22 ++------------- .../sql/mssql/stored_procedure_lineage.py | 15 +++++++++-- .../integration/sql_server/test_sql_server.py | 27 ++++++++----------- 3 files changed, 26 insertions(+), 38 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/source.py b/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/source.py index 2bdf265067376..feddba6e8c66f 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/source.py @@ -56,7 +56,6 @@ StringTypeClass, UnionTypeClass, ) -from datahub.sql_parsing.sql_parsing_aggregator import SqlParsingAggregator logger: logging.Logger = logging.getLogger(__name__) @@ -180,20 +179,6 @@ def __init__(self, config: SQLServerConfig, ctx: PipelineContext): self._populate_table_descriptions(conn, db_name) self._populate_column_descriptions(conn, db_name) - def new_sql_aggregator(self) -> SqlParsingAggregator: - return SqlParsingAggregator( - platform=self.platform, - env=self.config.env, - schema_resolver=self.schema_resolver, - graph=self.ctx.graph, - generate_lineage=self.config.include_lineage, - generate_queries=False, - generate_usage_statistics=False, - generate_operations=False, - generate_query_subject_fields=False, - generate_query_usage_statistics=False, - ) - @staticmethod def _add_output_converters(conn: Connection) -> None: def handle_sql_variant_as_string(value): @@ -398,7 +383,7 @@ def loop_jobs( def loop_job_steps( self, job: MSSQLJob, job_steps: Dict[str, Any] ) -> Iterable[MetadataWorkUnit]: - for step_id, step_data in job_steps.items(): + for _step_id, step_data in job_steps.items(): step = JobStep( job_name=job.formatted_name, step_name=step_data["step_name"], @@ -472,16 +457,13 @@ def _process_stored_procedure( context=procedure.full_name, level=StructuredLogLevel.WARN, ): - aggregator = self.new_sql_aggregator() yield from auto_workunit( generate_procedure_lineage( - aggregator=aggregator, + schema_resolver=self.schema_resolver, procedure=procedure, procedure_job_urn=data_job.urn, ) ) - if aggregator.report.num_observed_queries_failed: - raise yield from self.construct_job_workunits( data_job, # For stored procedure lineage is ingested above diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/stored_procedure_lineage.py b/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/stored_procedure_lineage.py index c86acace8cf1e..10b915a1f72fc 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/stored_procedure_lineage.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/stored_procedure_lineage.py @@ -1,3 +1,4 @@ +import logging from typing import Iterable, Optional from datahub.emitter.mcp import MetadataChangeProposalWrapper @@ -11,6 +12,8 @@ SqlParsingAggregator, ) +logger = logging.getLogger(__name__) + def parse_procedure_code( *, @@ -18,6 +21,7 @@ def parse_procedure_code( default_db: Optional[str], default_schema: Optional[str], code: str, + raise_: bool = False, ) -> Optional[DataJobInputOutputClass]: aggregator = SqlParsingAggregator( platform=schema_resolver.platform, @@ -39,6 +43,11 @@ def parse_procedure_code( query=query, ) ) + if aggregator.report.num_observed_queries_failed and raise_: + logger.info(aggregator.report.as_string()) + raise ValueError( + f"Failed to parse {aggregator.report.num_observed_queries_failed} queries." + ) mcps = list(aggregator.gen_metadata()) return to_datajob_input_output( @@ -50,16 +59,18 @@ def parse_procedure_code( # Is procedure handling generic enough to be added to SqlParsingAggregator? def generate_procedure_lineage( *, - aggregator: SqlParsingAggregator, + schema_resolver: SchemaResolver, procedure: StoredProcedure, procedure_job_urn: str, + raise_: bool = False, ) -> Iterable[MetadataChangeProposalWrapper]: if procedure.code: datajob_input_output = parse_procedure_code( - schema_resolver=aggregator._schema_resolver, + schema_resolver=schema_resolver, default_db=procedure.db, default_schema=procedure.schema, code=procedure.code, + raise_=raise_, ) if datajob_input_output: diff --git a/metadata-ingestion/tests/integration/sql_server/test_sql_server.py b/metadata-ingestion/tests/integration/sql_server/test_sql_server.py index d3b266a9d29cf..67897deeedff3 100644 --- a/metadata-ingestion/tests/integration/sql_server/test_sql_server.py +++ b/metadata-ingestion/tests/integration/sql_server/test_sql_server.py @@ -10,7 +10,7 @@ from datahub.ingestion.source.sql.mssql.stored_procedure_lineage import ( generate_procedure_lineage, ) -from datahub.sql_parsing.sql_parsing_aggregator import SqlParsingAggregator +from datahub.sql_parsing.schema_resolver import SchemaResolver from tests.test_helpers import mce_helpers from tests.test_helpers.click_helpers import run_datahub_cmd from tests.test_helpers.docker_helpers import cleanup_image, wait_for_port @@ -73,9 +73,11 @@ def test_mssql_ingest(mssql_runner, pytestconfig, tmp_path, mock_time, config_fi @pytest.mark.parametrize("procedure_sql_file", procedure_sqls) @pytest.mark.integration -def test_stored_procedure_lineage(pytestconfig, procedure_sql_file): - sql_file_path = Path(f"{PROCEDURE_SQLS_DIR}/{procedure_sql_file}").resolve() - procedure_code = Path(sql_file_path).read_text() +def test_stored_procedure_lineage( + pytestconfig: pytest.Config, procedure_sql_file: str +) -> None: + sql_file_path = PROCEDURE_SQLS_DIR / procedure_sql_file + procedure_code = sql_file_path.read_text() # Procedure file is named as .. splits = procedure_sql_file.split(".") @@ -92,19 +94,11 @@ def test_stored_procedure_lineage(pytestconfig, procedure_sql_file): ) data_job_urn = f"urn:li:dataJob:(urn:li:dataFlow:(mssql,{db}.{schema}.stored_procedures,PROD),{name})" - aggregator = SqlParsingAggregator( - platform="mssql", - generate_lineage=True, - generate_queries=False, - generate_usage_statistics=False, - generate_operations=False, - generate_query_subject_fields=False, - generate_query_usage_statistics=False, - ) + schema_resolver = SchemaResolver(platform="mssql") mcps = list( generate_procedure_lineage( - aggregator=aggregator, + schema_resolver=schema_resolver, procedure=procedure, procedure_job_urn=data_job_urn, ) @@ -112,6 +106,7 @@ def test_stored_procedure_lineage(pytestconfig, procedure_sql_file): mce_helpers.check_goldens_stream( pytestconfig, outputs=mcps, - golden_path=PROCEDURES_GOLDEN_DIR - / Path(procedure_sql_file).name.replace(".sql", ".json"), + golden_path=( + PROCEDURES_GOLDEN_DIR / Path(procedure_sql_file).with_suffix(".json") + ), ) From bf81203ac4871a89acc91e510cfa799858ec48c2 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Thu, 21 Nov 2024 18:16:55 -0800 Subject: [PATCH 10/13] fix bug in split statements --- .../datahub/sql_parsing/split_statements.py | 38 +++++--------- .../unit/sql_parsing/test_split_statements.py | 51 +++++++++++++++++++ 2 files changed, 63 insertions(+), 26 deletions(-) create mode 100644 metadata-ingestion/tests/unit/sql_parsing/test_split_statements.py diff --git a/metadata-ingestion/src/datahub/sql_parsing/split_statements.py b/metadata-ingestion/src/datahub/sql_parsing/split_statements.py index 64caa6fffdf15..42dda4e62158b 100644 --- a/metadata-ingestion/src/datahub/sql_parsing/split_statements.py +++ b/metadata-ingestion/src/datahub/sql_parsing/split_statements.py @@ -12,6 +12,8 @@ "END", ] +# There's an exception to this rule, which is when the statement +# is preceeded by a CTE. FORCE_NEW_STATEMENT_KEYWORDS = [ # SELECT is used inside queries as well, so we can't include it here. "INSERT", @@ -77,6 +79,7 @@ def yield_if_complete() -> Generator[str, None, None]: yield statement current_statement.clear() + prev_real_char = "\0" # the most recent non-whitespace, non-comment character while i < len(sql): c = sql[i] next_char = sql[i + 1] if i < len(sql) - 1 else "\0" @@ -85,6 +88,7 @@ def yield_if_complete() -> Generator[str, None, None]: if c == "'": state = ParserState.STRING current_statement.append(c) + prev_real_char = c elif c == "-" and next_char == "-": state = ParserState.COMMENT current_statement.append(c) @@ -96,6 +100,10 @@ def yield_if_complete() -> Generator[str, None, None]: current_statement.append(next_char) i += 1 else: + most_recent_real_char = prev_real_char + if not c.isspace(): + prev_real_char = c + is_control_keyword, keyword, keyword_len = _look_ahead_for_keywords( sql, i, keywords=CONTROL_FLOW_KEYWORDS ) @@ -114,7 +122,9 @@ def yield_if_complete() -> Generator[str, None, None]: ) = _look_ahead_for_keywords( sql, i, keywords=FORCE_NEW_STATEMENT_KEYWORDS ) - if is_force_new_statement_keyword: + if ( + is_force_new_statement_keyword and most_recent_real_char != ")" + ): # usually we'd have a close paren that closes a CTE # Force termination of current statement yield from yield_if_complete() @@ -150,28 +160,4 @@ def yield_if_complete() -> Generator[str, None, None]: i += 1 # Handle the last statement - statement = "".join(current_statement).strip() - if statement: - yield statement - - -# Example usage and test -if __name__ == "__main__": - # TODO: Move this to a test. - test_sql = """ - CREATE TABLE Users (Id INT); - -- Comment here - INSERT INTO Users VALUES (1); - BEGIN - UPDATE Users SET Id = 2; - /* Multi-line - comment */ - DELETE FROM /* inline DELETE comment */ Users; - END - GO - SELECT * FROM Users - """ - - print("Statements found:") - for i, statement in enumerate(split_statements(test_sql), 1): - print(f"\n{i}. {statement}") + yield from yield_if_complete() diff --git a/metadata-ingestion/tests/unit/sql_parsing/test_split_statements.py b/metadata-ingestion/tests/unit/sql_parsing/test_split_statements.py new file mode 100644 index 0000000000000..06e0e84ede554 --- /dev/null +++ b/metadata-ingestion/tests/unit/sql_parsing/test_split_statements.py @@ -0,0 +1,51 @@ +from datahub.sql_parsing.split_statements import split_statements + + +def test_split_statements_complex() -> None: + test_sql = """ + CREATE TABLE Users (Id INT); + -- Comment here + INSERT INTO Users VALUES (1); + BEGIN + UPDATE Users SET Id = 2; + /* Multi-line + comment */ + DELETE FROM /* inline DELETE comment */ Users; + END + GO + SELECT * FROM Users + """ + + statements = [statement.strip() for statement in split_statements(test_sql)] + assert statements == [ + "CREATE TABLE Users (Id INT)", + "-- Comment here", + "INSERT INTO Users VALUES (1)", + "BEGIN", + "UPDATE Users SET Id = 2", + "/* Multi-line\n comment */", + "DELETE FROM /* inline DELETE comment */ Users", + "END", + "GO", + "SELECT * FROM Users", + ] + + +def test_split_statements_cte() -> None: + # SQL example from https://stackoverflow.com/a/11562724 + test_sql = """\ +WITH T AS +( SELECT InvoiceNumber, + DocTotal, + SUM(Sale + VAT) OVER(PARTITION BY InvoiceNumber) AS NewDocTotal + FROM PEDI_InvoiceDetail +) +-- comment +/* multi-line +comment */ +UPDATE T +SET DocTotal = NewDocTotal""" + statements = [statement.strip() for statement in split_statements(test_sql)] + assert statements == [ + test_sql, + ] From 43aabe483587dd34d3764be990932e38e855557a Mon Sep 17 00:00:00 2001 From: Mayuri N Date: Fri, 22 Nov 2024 16:13:41 +0530 Subject: [PATCH 11/13] add is_temp_table check --- .../ingestion/source/sql/mssql/source.py | 73 ++++++++-- .../sql/mssql/stored_procedure_lineage.py | 6 +- .../ingestion/source/sql/sql_common.py | 3 + .../source/state/entity_removal_state.py | 6 +- .../golden_mces_mssql_no_db_to_file.json | 72 +--------- .../golden_mces_mssql_no_db_with_filter.json | 72 +--------- .../golden_mces_mssql_to_file.json | 72 +--------- ...golden_mces_mssql_with_lower_case_urn.json | 130 +++++++++--------- 8 files changed, 155 insertions(+), 279 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/source.py b/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/source.py index feddba6e8c66f..140f4706ec26e 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/source.py @@ -56,6 +56,7 @@ StringTypeClass, UnionTypeClass, ) +from datahub.utilities.file_backed_collections import FileBackedList logger: logging.Logger = logging.getLogger(__name__) @@ -170,6 +171,7 @@ def __init__(self, config: SQLServerConfig, ctx: PipelineContext): self.current_database = None self.table_descriptions: Dict[str, str] = {} self.column_descriptions: Dict[str, str] = {} + self.stored_procedures: FileBackedList[StoredProcedure] = FileBackedList() if self.config.include_descriptions: for inspector in self.get_inspectors(): db_name: str = self.get_db_name(inspector) @@ -452,21 +454,11 @@ def _process_stored_procedure( for property_name, property_value in properties.items(): data_job.add_property(property_name, str(property_value)) if self.config.include_lineage: - with self.report.report_exc( - message="Failed to parse stored procedure lineage", - context=procedure.full_name, - level=StructuredLogLevel.WARN, - ): - yield from auto_workunit( - generate_procedure_lineage( - schema_resolver=self.schema_resolver, - procedure=procedure, - procedure_job_urn=data_job.urn, - ) - ) + # These will be used to construct lineage + self.stored_procedures.append(procedure) yield from self.construct_job_workunits( data_job, - # For stored procedure lineage is ingested above + # For stored procedure lineage is ingested later include_lineage=False, ) @@ -692,3 +684,58 @@ def get_identifier( if self.config.convert_urns_to_lowercase else qualified_table_name ) + + def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: + yield from super().get_workunits_internal() + + # This is done at the end so that we will have access to tables + # from all databases in schema_resolver and discovered_tables + for procedure in self.stored_procedures: + with self.report.report_exc( + message="Failed to parse stored procedure lineage", + context=procedure.full_name, + level=StructuredLogLevel.WARN, + ): + yield from auto_workunit( + generate_procedure_lineage( + schema_resolver=self.schema_resolver, + procedure=procedure, + procedure_job_urn=MSSQLDataJob(entity=procedure).urn, + is_temp_table=self.is_temp_table, + ) + ) + + def is_temp_table(self, name: str) -> bool: + try: + parts = name.split(".") + table_name = parts[-1] + schema_name = parts[-2] + db_name = parts[-3] + + if table_name.startswith("#"): + return True + + # This is also a temp table if + # 1. this name would be allowed by the dataset patterns, and + # 2. we have a list of discovered tables, and + # 3. it's not in the discovered tables list + if ( + self.config.database_pattern.allowed(db_name) + and self.config.schema_pattern.allowed(schema_name) + and self.config.table_pattern.allowed(name) + and self.standardize_identifier_case(name) + not in self.discovered_datasets + ): + logger.debug(f"inferred as temp table {name}") + return True + + except Exception: + logger.warning(f"Error parsing table name {name} ") + return False + + def standardize_identifier_case(self, table_ref_str: str) -> str: + return ( + table_ref_str.lower() + if self.config.convert_urns_to_lowercase + else table_ref_str + ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/stored_procedure_lineage.py b/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/stored_procedure_lineage.py index 10b915a1f72fc..b979a270a5528 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/stored_procedure_lineage.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/stored_procedure_lineage.py @@ -1,5 +1,5 @@ import logging -from typing import Iterable, Optional +from typing import Callable, Iterable, Optional from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.ingestion.source.sql.mssql.job_models import StoredProcedure @@ -21,6 +21,7 @@ def parse_procedure_code( default_db: Optional[str], default_schema: Optional[str], code: str, + is_temp_table: Callable[[str], bool], raise_: bool = False, ) -> Optional[DataJobInputOutputClass]: aggregator = SqlParsingAggregator( @@ -33,6 +34,7 @@ def parse_procedure_code( generate_operations=False, generate_query_subject_fields=False, generate_query_usage_statistics=False, + is_temp_table=is_temp_table, ) for query in split_statements(code): # TODO: We should take into account `USE x` statements. @@ -62,6 +64,7 @@ def generate_procedure_lineage( schema_resolver: SchemaResolver, procedure: StoredProcedure, procedure_job_urn: str, + is_temp_table: Callable[[str], bool] = lambda _: False, raise_: bool = False, ) -> Iterable[MetadataChangeProposalWrapper]: if procedure.code: @@ -70,6 +73,7 @@ def generate_procedure_lineage( default_db=procedure.db, default_schema=procedure.schema, code=procedure.code, + is_temp_table=is_temp_table, raise_=raise_, ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py index c3a0217ec70ea..ae6116326da33 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py @@ -392,6 +392,7 @@ def __init__(self, config: SQLCommonConfig, ctx: PipelineContext, platform: str) platform_instance=self.config.platform_instance, env=self.config.env, ) + self.discovered_datasets: Set[str] = set() self._view_definition_cache: MutableMapping[str, str] if self.config.use_file_backed_cache: self._view_definition_cache = FileBackedDict[str]() @@ -833,6 +834,7 @@ def _process_table( dataset_snapshot.aspects.append(schema_metadata) if self._save_schema_to_resolver(): self.schema_resolver.add_schema_metadata(dataset_urn, schema_metadata) + self.discovered_datasets.add(dataset_name) db_name = self.get_db_name(inspector) yield from self.add_table_to_schema_container( @@ -1128,6 +1130,7 @@ def _process_view( ) if self._save_schema_to_resolver(): self.schema_resolver.add_schema_metadata(dataset_urn, schema_metadata) + self.discovered_datasets.add(dataset_name) description, properties, _ = self.get_table_properties(inspector, schema, view) try: view_definition = inspector.get_view_definition(view, schema) diff --git a/metadata-ingestion/src/datahub/ingestion/source/state/entity_removal_state.py b/metadata-ingestion/src/datahub/ingestion/source/state/entity_removal_state.py index 2b10ca1fa57ed..318395d4e66b2 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/state/entity_removal_state.py +++ b/metadata-ingestion/src/datahub/ingestion/source/state/entity_removal_state.py @@ -146,11 +146,7 @@ def urn_count(self) -> int: def compute_percent_entities_changed( new_entities: List[str], old_entities: List[str] ) -> float: - ( - overlap_count, - old_count, - _, - ) = _get_entity_overlap_and_cardinalities( + (overlap_count, old_count, _,) = _get_entity_overlap_and_cardinalities( new_entities=new_entities, old_entities=old_entities ) diff --git a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_to_file.json b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_to_file.json index 225c0db8b9218..eee23fa38f3fe 100644 --- a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_to_file.json +++ b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_to_file.json @@ -113,11 +113,11 @@ "aspect": { "json": { "customProperties": { - "job_id": "cd483357-fcfc-4369-b80e-171f41298a64", + "job_id": "d51c2aac-aecc-4f9b-9f0b-601140fef79b", "job_name": "Weekly Demo Data Backup", "description": "No description available.", - "date_created": "2024-11-22 01:18:57.383000", - "date_modified": "2024-11-22 01:18:57.690000", + "date_created": "2024-11-22 10:38:26.833000", + "date_modified": "2024-11-22 10:38:26.970000", "step_id": "1", "step_name": "Set database to read only", "subsystem": "TSQL", @@ -2282,8 +2282,8 @@ "code": "CREATE PROCEDURE [Foo].[Proc.With.SpecialChar] @ID INT\nAS\n SELECT @ID AS ThatDB;\n", "input parameters": "['@ID']", "parameter @ID": "{'type': 'int'}", - "date_created": "2024-11-22 01:18:57.203000", - "date_modified": "2024-11-22 01:18:57.203000" + "date_created": "2024-11-22 10:38:26.733000", + "date_modified": "2024-11-22 10:38:26.733000" }, "externalUrl": "", "name": "DemoData.Foo.Proc.With.SpecialChar", @@ -2298,64 +2298,6 @@ "lastRunId": "no-run-id-provided" } }, -{ - "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),NewProc)", - "changeType": "UPSERT", - "aspectName": "dataJobInputOutput", - "aspect": { - "json": { - "inputDatasets": [ - "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD)", - "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.salesreason,PROD)" - ], - "outputDatasets": [ - "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.age_dist,PROD)", - "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD)" - ], - "fineGrainedLineages": [ - { - "upstreamType": "FIELD_SET", - "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD),age)" - ], - "downstreamType": "FIELD", - "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.age_dist,PROD),age)" - ], - "confidenceScore": 0.2 - }, - { - "upstreamType": "FIELD_SET", - "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.salesreason,PROD),tempid)" - ], - "downstreamType": "FIELD", - "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD),tempid)" - ], - "confidenceScore": 0.2 - }, - { - "upstreamType": "FIELD_SET", - "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.salesreason,PROD),name)" - ], - "downstreamType": "FIELD", - "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD),name)" - ], - "confidenceScore": 0.2 - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, { "entityType": "dataJob", "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),NewProc)", @@ -2368,8 +2310,8 @@ "depending_on_procedure": "{}", "code": "CREATE PROCEDURE [Foo].[NewProc]\n AS\n BEGIN\n --insert into items table from salesreason table\n insert into Foo.Items (ID, ItemName)\n SELECT TempID, Name\n FROM Foo.SalesReason;\n\n\n IF OBJECT_ID('Foo.age_dist', 'U') IS\n NULL\n\n BEGIN\n -- Create and populate if table doesn't exist\n SELECT Age, COUNT(*) as Count\n INTO Foo.age_dist\n FROM Foo.Persons\n GROUP BY Age\n END\n ELSE\n BEGIN\n \n-- Update existing table\n TRUNCATE TABLE Foo.age_dist;\n\n INSERT INTO Foo.age_dist (Age, Count)\n SELECT Age, COUNT(*) as Count\n FROM Foo.Persons\n GROUP BY Age\n END\n END\n", "input parameters": "[]", - "date_created": "2024-11-22 01:18:57.207000", - "date_modified": "2024-11-22 01:18:57.207000" + "date_created": "2024-11-22 10:38:26.737000", + "date_modified": "2024-11-22 10:38:26.737000" }, "externalUrl": "", "name": "DemoData.Foo.NewProc", diff --git a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_with_filter.json b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_with_filter.json index 690cc14d52e4b..e8af20c601edf 100644 --- a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_with_filter.json +++ b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_with_filter.json @@ -113,11 +113,11 @@ "aspect": { "json": { "customProperties": { - "job_id": "cd483357-fcfc-4369-b80e-171f41298a64", + "job_id": "d51c2aac-aecc-4f9b-9f0b-601140fef79b", "job_name": "Weekly Demo Data Backup", "description": "No description available.", - "date_created": "2024-11-22 01:18:57.383000", - "date_modified": "2024-11-22 01:18:57.690000", + "date_created": "2024-11-22 10:38:26.833000", + "date_modified": "2024-11-22 10:38:26.970000", "step_id": "1", "step_name": "Set database to read only", "subsystem": "TSQL", @@ -2282,8 +2282,8 @@ "code": "CREATE PROCEDURE [Foo].[Proc.With.SpecialChar] @ID INT\nAS\n SELECT @ID AS ThatDB;\n", "input parameters": "['@ID']", "parameter @ID": "{'type': 'int'}", - "date_created": "2024-11-22 01:18:57.203000", - "date_modified": "2024-11-22 01:18:57.203000" + "date_created": "2024-11-22 10:38:26.733000", + "date_modified": "2024-11-22 10:38:26.733000" }, "externalUrl": "", "name": "DemoData.Foo.Proc.With.SpecialChar", @@ -2298,64 +2298,6 @@ "lastRunId": "no-run-id-provided" } }, -{ - "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),NewProc)", - "changeType": "UPSERT", - "aspectName": "dataJobInputOutput", - "aspect": { - "json": { - "inputDatasets": [ - "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD)", - "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.salesreason,PROD)" - ], - "outputDatasets": [ - "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.age_dist,PROD)", - "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD)" - ], - "fineGrainedLineages": [ - { - "upstreamType": "FIELD_SET", - "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD),age)" - ], - "downstreamType": "FIELD", - "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.age_dist,PROD),age)" - ], - "confidenceScore": 0.2 - }, - { - "upstreamType": "FIELD_SET", - "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.salesreason,PROD),tempid)" - ], - "downstreamType": "FIELD", - "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD),tempid)" - ], - "confidenceScore": 0.2 - }, - { - "upstreamType": "FIELD_SET", - "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.salesreason,PROD),name)" - ], - "downstreamType": "FIELD", - "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD),name)" - ], - "confidenceScore": 0.2 - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, { "entityType": "dataJob", "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),NewProc)", @@ -2368,8 +2310,8 @@ "depending_on_procedure": "{}", "code": "CREATE PROCEDURE [Foo].[NewProc]\n AS\n BEGIN\n --insert into items table from salesreason table\n insert into Foo.Items (ID, ItemName)\n SELECT TempID, Name\n FROM Foo.SalesReason;\n\n\n IF OBJECT_ID('Foo.age_dist', 'U') IS\n NULL\n\n BEGIN\n -- Create and populate if table doesn't exist\n SELECT Age, COUNT(*) as Count\n INTO Foo.age_dist\n FROM Foo.Persons\n GROUP BY Age\n END\n ELSE\n BEGIN\n \n-- Update existing table\n TRUNCATE TABLE Foo.age_dist;\n\n INSERT INTO Foo.age_dist (Age, Count)\n SELECT Age, COUNT(*) as Count\n FROM Foo.Persons\n GROUP BY Age\n END\n END\n", "input parameters": "[]", - "date_created": "2024-11-22 01:18:57.207000", - "date_modified": "2024-11-22 01:18:57.207000" + "date_created": "2024-11-22 10:38:26.737000", + "date_modified": "2024-11-22 10:38:26.737000" }, "externalUrl": "", "name": "DemoData.Foo.NewProc", diff --git a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_to_file.json b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_to_file.json index 690cc14d52e4b..e8af20c601edf 100644 --- a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_to_file.json +++ b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_to_file.json @@ -113,11 +113,11 @@ "aspect": { "json": { "customProperties": { - "job_id": "cd483357-fcfc-4369-b80e-171f41298a64", + "job_id": "d51c2aac-aecc-4f9b-9f0b-601140fef79b", "job_name": "Weekly Demo Data Backup", "description": "No description available.", - "date_created": "2024-11-22 01:18:57.383000", - "date_modified": "2024-11-22 01:18:57.690000", + "date_created": "2024-11-22 10:38:26.833000", + "date_modified": "2024-11-22 10:38:26.970000", "step_id": "1", "step_name": "Set database to read only", "subsystem": "TSQL", @@ -2282,8 +2282,8 @@ "code": "CREATE PROCEDURE [Foo].[Proc.With.SpecialChar] @ID INT\nAS\n SELECT @ID AS ThatDB;\n", "input parameters": "['@ID']", "parameter @ID": "{'type': 'int'}", - "date_created": "2024-11-22 01:18:57.203000", - "date_modified": "2024-11-22 01:18:57.203000" + "date_created": "2024-11-22 10:38:26.733000", + "date_modified": "2024-11-22 10:38:26.733000" }, "externalUrl": "", "name": "DemoData.Foo.Proc.With.SpecialChar", @@ -2298,64 +2298,6 @@ "lastRunId": "no-run-id-provided" } }, -{ - "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),NewProc)", - "changeType": "UPSERT", - "aspectName": "dataJobInputOutput", - "aspect": { - "json": { - "inputDatasets": [ - "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD)", - "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.salesreason,PROD)" - ], - "outputDatasets": [ - "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.age_dist,PROD)", - "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD)" - ], - "fineGrainedLineages": [ - { - "upstreamType": "FIELD_SET", - "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD),age)" - ], - "downstreamType": "FIELD", - "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.age_dist,PROD),age)" - ], - "confidenceScore": 0.2 - }, - { - "upstreamType": "FIELD_SET", - "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.salesreason,PROD),tempid)" - ], - "downstreamType": "FIELD", - "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD),tempid)" - ], - "confidenceScore": 0.2 - }, - { - "upstreamType": "FIELD_SET", - "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.salesreason,PROD),name)" - ], - "downstreamType": "FIELD", - "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD),name)" - ], - "confidenceScore": 0.2 - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, { "entityType": "dataJob", "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),NewProc)", @@ -2368,8 +2310,8 @@ "depending_on_procedure": "{}", "code": "CREATE PROCEDURE [Foo].[NewProc]\n AS\n BEGIN\n --insert into items table from salesreason table\n insert into Foo.Items (ID, ItemName)\n SELECT TempID, Name\n FROM Foo.SalesReason;\n\n\n IF OBJECT_ID('Foo.age_dist', 'U') IS\n NULL\n\n BEGIN\n -- Create and populate if table doesn't exist\n SELECT Age, COUNT(*) as Count\n INTO Foo.age_dist\n FROM Foo.Persons\n GROUP BY Age\n END\n ELSE\n BEGIN\n \n-- Update existing table\n TRUNCATE TABLE Foo.age_dist;\n\n INSERT INTO Foo.age_dist (Age, Count)\n SELECT Age, COUNT(*) as Count\n FROM Foo.Persons\n GROUP BY Age\n END\n END\n", "input parameters": "[]", - "date_created": "2024-11-22 01:18:57.207000", - "date_modified": "2024-11-22 01:18:57.207000" + "date_created": "2024-11-22 10:38:26.737000", + "date_modified": "2024-11-22 10:38:26.737000" }, "externalUrl": "", "name": "DemoData.Foo.NewProc", diff --git a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_with_lower_case_urn.json b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_with_lower_case_urn.json index cfd358d44829a..6b2a6517ed823 100644 --- a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_with_lower_case_urn.json +++ b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_with_lower_case_urn.json @@ -113,11 +113,11 @@ "aspect": { "json": { "customProperties": { - "job_id": "cd483357-fcfc-4369-b80e-171f41298a64", + "job_id": "7b9e639a-f3e9-4921-9841-c046034c08c7", "job_name": "Weekly Demo Data Backup", "description": "No description available.", - "date_created": "2024-11-22 01:18:57.383000", - "date_modified": "2024-11-22 01:18:57.690000", + "date_created": "2024-11-22 09:34:52.980000", + "date_modified": "2024-11-22 09:34:53.717000", "step_id": "1", "step_name": "Set database to read only", "subsystem": "TSQL", @@ -2282,8 +2282,8 @@ "code": "CREATE PROCEDURE [Foo].[Proc.With.SpecialChar] @ID INT\nAS\n SELECT @ID AS ThatDB;\n", "input parameters": "['@ID']", "parameter @ID": "{'type': 'int'}", - "date_created": "2024-11-22 01:18:57.203000", - "date_modified": "2024-11-22 01:18:57.203000" + "date_created": "2024-11-22 09:34:52.597000", + "date_modified": "2024-11-22 09:34:52.597000" }, "externalUrl": "", "name": "DemoData.Foo.Proc.With.SpecialChar", @@ -2298,64 +2298,6 @@ "lastRunId": "no-run-id-provided" } }, -{ - "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),NewProc)", - "changeType": "UPSERT", - "aspectName": "dataJobInputOutput", - "aspect": { - "json": { - "inputDatasets": [ - "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD)", - "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.salesreason,PROD)" - ], - "outputDatasets": [ - "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.age_dist,PROD)", - "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD)" - ], - "fineGrainedLineages": [ - { - "upstreamType": "FIELD_SET", - "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD),Age)" - ], - "downstreamType": "FIELD", - "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.age_dist,PROD),Age)" - ], - "confidenceScore": 0.9 - }, - { - "upstreamType": "FIELD_SET", - "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.salesreason,PROD),TempID)" - ], - "downstreamType": "FIELD", - "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD),tempid)" - ], - "confidenceScore": 0.9 - }, - { - "upstreamType": "FIELD_SET", - "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.salesreason,PROD),Name)" - ], - "downstreamType": "FIELD", - "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD),name)" - ], - "confidenceScore": 0.9 - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "mssql-test", - "lastRunId": "no-run-id-provided" - } -}, { "entityType": "dataJob", "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),NewProc)", @@ -2368,8 +2310,8 @@ "depending_on_procedure": "{}", "code": "CREATE PROCEDURE [Foo].[NewProc]\n AS\n BEGIN\n --insert into items table from salesreason table\n insert into Foo.Items (ID, ItemName)\n SELECT TempID, Name\n FROM Foo.SalesReason;\n\n\n IF OBJECT_ID('Foo.age_dist', 'U') IS\n NULL\n\n BEGIN\n -- Create and populate if table doesn't exist\n SELECT Age, COUNT(*) as Count\n INTO Foo.age_dist\n FROM Foo.Persons\n GROUP BY Age\n END\n ELSE\n BEGIN\n \n-- Update existing table\n TRUNCATE TABLE Foo.age_dist;\n\n INSERT INTO Foo.age_dist (Age, Count)\n SELECT Age, COUNT(*) as Count\n FROM Foo.Persons\n GROUP BY Age\n END\n END\n", "input parameters": "[]", - "date_created": "2024-11-22 01:18:57.207000", - "date_modified": "2024-11-22 01:18:57.207000" + "date_created": "2024-11-22 09:34:52.610000", + "date_modified": "2024-11-22 09:34:52.610000" }, "externalUrl": "", "name": "DemoData.Foo.NewProc", @@ -5059,6 +5001,64 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD),NewProc)", + "changeType": "UPSERT", + "aspectName": "dataJobInputOutput", + "aspect": { + "json": { + "inputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.salesreason,PROD)" + ], + "outputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.age_dist,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD)" + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD),Age)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.age_dist,PROD),Age)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.salesreason,PROD),TempID)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD),tempid)" + ], + "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.salesreason,PROD),Name)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD),name)" + ], + "confidenceScore": 0.9 + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "mssql-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataFlow", "entityUrn": "urn:li:dataFlow:(mssql,DemoData.Foo.stored_procedures,PROD)", From 1fae14d3238c8c6087137137c5b999fb9802a15b Mon Sep 17 00:00:00 2001 From: Mayuri N Date: Fri, 22 Nov 2024 18:25:40 +0530 Subject: [PATCH 12/13] testcase for via temp table lineage - fix issue in _get_procedure_code method remove unnecessary newline separator --- .../ingestion/source/sql/mssql/source.py | 4 +-- .../source/state/entity_removal_state.py | 6 +++- .../golden_mces_mssql_no_db_to_file.json | 16 ++++----- .../golden_mces_mssql_no_db_with_filter.json | 16 ++++----- .../golden_mces_mssql_to_file.json | 16 ++++----- ...golden_mces_mssql_with_lower_case_urn.json | 33 +++++++++++++------ .../procedures/DemoData.Foo.NewProc.json | 6 ++-- .../procedures/demodata.foo.proc2.json | 6 ++-- .../procedures/DemoData.Foo.NewProc.sql | 8 +++++ .../procedures/demodata.foo.proc2.sql | 7 ++++ .../integration/sql_server/setup/setup.sql | 11 ++++++- .../integration/sql_server/test_sql_server.py | 1 + 12 files changed, 88 insertions(+), 42 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/source.py b/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/source.py index 140f4706ec26e..9ab9c76c30417 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/mssql/source.py @@ -564,8 +564,8 @@ def _get_procedure_code( code_list.append(row["Text"]) if code_slice_text in re.sub(" +", " ", row["Text"].lower()).strip(): code_slice_index = index - definition = "\n".join(code_list[:code_slice_index]) - code = "\n".join(code_list[code_slice_index:]) + definition = "".join(code_list[:code_slice_index]) + code = "".join(code_list[code_slice_index:]) except ResourceClosedError: logger.warning( "Connection was closed from procedure '%s'", diff --git a/metadata-ingestion/src/datahub/ingestion/source/state/entity_removal_state.py b/metadata-ingestion/src/datahub/ingestion/source/state/entity_removal_state.py index 318395d4e66b2..2b10ca1fa57ed 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/state/entity_removal_state.py +++ b/metadata-ingestion/src/datahub/ingestion/source/state/entity_removal_state.py @@ -146,7 +146,11 @@ def urn_count(self) -> int: def compute_percent_entities_changed( new_entities: List[str], old_entities: List[str] ) -> float: - (overlap_count, old_count, _,) = _get_entity_overlap_and_cardinalities( + ( + overlap_count, + old_count, + _, + ) = _get_entity_overlap_and_cardinalities( new_entities=new_entities, old_entities=old_entities ) diff --git a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_to_file.json b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_to_file.json index eee23fa38f3fe..f010cbd0dc7a1 100644 --- a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_to_file.json +++ b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_to_file.json @@ -113,11 +113,11 @@ "aspect": { "json": { "customProperties": { - "job_id": "d51c2aac-aecc-4f9b-9f0b-601140fef79b", + "job_id": "400f2a31-7ec0-4feb-9079-9d921d18d3a6", "job_name": "Weekly Demo Data Backup", "description": "No description available.", - "date_created": "2024-11-22 10:38:26.833000", - "date_modified": "2024-11-22 10:38:26.970000", + "date_created": "2024-11-22 12:47:39.113000", + "date_modified": "2024-11-22 12:47:39.267000", "step_id": "1", "step_name": "Set database to read only", "subsystem": "TSQL", @@ -2282,8 +2282,8 @@ "code": "CREATE PROCEDURE [Foo].[Proc.With.SpecialChar] @ID INT\nAS\n SELECT @ID AS ThatDB;\n", "input parameters": "['@ID']", "parameter @ID": "{'type': 'int'}", - "date_created": "2024-11-22 10:38:26.733000", - "date_modified": "2024-11-22 10:38:26.733000" + "date_created": "2024-11-22 12:47:38.993000", + "date_modified": "2024-11-22 12:47:38.993000" }, "externalUrl": "", "name": "DemoData.Foo.Proc.With.SpecialChar", @@ -2308,10 +2308,10 @@ "customProperties": { "procedure_depends_on": "{'DemoData.Foo.age_dist': 'USER_TABLE', 'DemoData.Foo.Items': 'USER_TABLE', 'DemoData.Foo.Persons': 'USER_TABLE', 'DemoData.Foo.SalesReason': 'USER_TABLE'}", "depending_on_procedure": "{}", - "code": "CREATE PROCEDURE [Foo].[NewProc]\n AS\n BEGIN\n --insert into items table from salesreason table\n insert into Foo.Items (ID, ItemName)\n SELECT TempID, Name\n FROM Foo.SalesReason;\n\n\n IF OBJECT_ID('Foo.age_dist', 'U') IS\n NULL\n\n BEGIN\n -- Create and populate if table doesn't exist\n SELECT Age, COUNT(*) as Count\n INTO Foo.age_dist\n FROM Foo.Persons\n GROUP BY Age\n END\n ELSE\n BEGIN\n \n-- Update existing table\n TRUNCATE TABLE Foo.age_dist;\n\n INSERT INTO Foo.age_dist (Age, Count)\n SELECT Age, COUNT(*) as Count\n FROM Foo.Persons\n GROUP BY Age\n END\n END\n", + "code": "CREATE PROCEDURE [Foo].[NewProc]\n AS\n BEGIN\n --insert into items table from salesreason table\n insert into Foo.Items (ID, ItemName)\n SELECT TempID, Name\n FROM Foo.SalesReason;\n\n\n IF OBJECT_ID('Foo.age_dist', 'U') IS NULL\n BEGIN\n -- Create and populate if table doesn't exist\n SELECT Age, COUNT(*) as Count\n INTO Foo.age_dist\n FROM Foo.Persons\n GROUP BY Age\n END\n ELSE\n BEGIN\n -- Update existing table\n TRUNCATE TABLE Foo.age_dist;\n\n INSERT INTO Foo.age_dist (Age, Count)\n SELECT Age, COUNT(*) as Count\n FROM Foo.Persons\n GROUP BY Age\n END\n\n SELECT ID, Age \n INTO #TEMPTABLE \n FROM NewData.FooNew.PersonsNew;\n \n UPDATE DemoData.Foo.Persons\n SET Age = t.Age\n FROM DemoData.Foo.Persons p\n JOIN #TEMPTABLE t ON p.ID = t.ID;\n\n END\n", "input parameters": "[]", - "date_created": "2024-11-22 10:38:26.737000", - "date_modified": "2024-11-22 10:38:26.737000" + "date_created": "2024-11-22 12:47:38.997000", + "date_modified": "2024-11-22 12:47:38.997000" }, "externalUrl": "", "name": "DemoData.Foo.NewProc", diff --git a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_with_filter.json b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_with_filter.json index e8af20c601edf..4417cafabde31 100644 --- a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_with_filter.json +++ b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_with_filter.json @@ -113,11 +113,11 @@ "aspect": { "json": { "customProperties": { - "job_id": "d51c2aac-aecc-4f9b-9f0b-601140fef79b", + "job_id": "400f2a31-7ec0-4feb-9079-9d921d18d3a6", "job_name": "Weekly Demo Data Backup", "description": "No description available.", - "date_created": "2024-11-22 10:38:26.833000", - "date_modified": "2024-11-22 10:38:26.970000", + "date_created": "2024-11-22 12:47:39.113000", + "date_modified": "2024-11-22 12:47:39.267000", "step_id": "1", "step_name": "Set database to read only", "subsystem": "TSQL", @@ -2282,8 +2282,8 @@ "code": "CREATE PROCEDURE [Foo].[Proc.With.SpecialChar] @ID INT\nAS\n SELECT @ID AS ThatDB;\n", "input parameters": "['@ID']", "parameter @ID": "{'type': 'int'}", - "date_created": "2024-11-22 10:38:26.733000", - "date_modified": "2024-11-22 10:38:26.733000" + "date_created": "2024-11-22 12:47:38.993000", + "date_modified": "2024-11-22 12:47:38.993000" }, "externalUrl": "", "name": "DemoData.Foo.Proc.With.SpecialChar", @@ -2308,10 +2308,10 @@ "customProperties": { "procedure_depends_on": "{'DemoData.Foo.age_dist': 'USER_TABLE', 'DemoData.Foo.Items': 'USER_TABLE', 'DemoData.Foo.Persons': 'USER_TABLE', 'DemoData.Foo.SalesReason': 'USER_TABLE'}", "depending_on_procedure": "{}", - "code": "CREATE PROCEDURE [Foo].[NewProc]\n AS\n BEGIN\n --insert into items table from salesreason table\n insert into Foo.Items (ID, ItemName)\n SELECT TempID, Name\n FROM Foo.SalesReason;\n\n\n IF OBJECT_ID('Foo.age_dist', 'U') IS\n NULL\n\n BEGIN\n -- Create and populate if table doesn't exist\n SELECT Age, COUNT(*) as Count\n INTO Foo.age_dist\n FROM Foo.Persons\n GROUP BY Age\n END\n ELSE\n BEGIN\n \n-- Update existing table\n TRUNCATE TABLE Foo.age_dist;\n\n INSERT INTO Foo.age_dist (Age, Count)\n SELECT Age, COUNT(*) as Count\n FROM Foo.Persons\n GROUP BY Age\n END\n END\n", + "code": "CREATE PROCEDURE [Foo].[NewProc]\n AS\n BEGIN\n --insert into items table from salesreason table\n insert into Foo.Items (ID, ItemName)\n SELECT TempID, Name\n FROM Foo.SalesReason;\n\n\n IF OBJECT_ID('Foo.age_dist', 'U') IS NULL\n BEGIN\n -- Create and populate if table doesn't exist\n SELECT Age, COUNT(*) as Count\n INTO Foo.age_dist\n FROM Foo.Persons\n GROUP BY Age\n END\n ELSE\n BEGIN\n -- Update existing table\n TRUNCATE TABLE Foo.age_dist;\n\n INSERT INTO Foo.age_dist (Age, Count)\n SELECT Age, COUNT(*) as Count\n FROM Foo.Persons\n GROUP BY Age\n END\n\n SELECT ID, Age \n INTO #TEMPTABLE \n FROM NewData.FooNew.PersonsNew;\n \n UPDATE DemoData.Foo.Persons\n SET Age = t.Age\n FROM DemoData.Foo.Persons p\n JOIN #TEMPTABLE t ON p.ID = t.ID;\n\n END\n", "input parameters": "[]", - "date_created": "2024-11-22 10:38:26.737000", - "date_modified": "2024-11-22 10:38:26.737000" + "date_created": "2024-11-22 12:47:38.997000", + "date_modified": "2024-11-22 12:47:38.997000" }, "externalUrl": "", "name": "DemoData.Foo.NewProc", diff --git a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_to_file.json b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_to_file.json index e8af20c601edf..4417cafabde31 100644 --- a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_to_file.json +++ b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_to_file.json @@ -113,11 +113,11 @@ "aspect": { "json": { "customProperties": { - "job_id": "d51c2aac-aecc-4f9b-9f0b-601140fef79b", + "job_id": "400f2a31-7ec0-4feb-9079-9d921d18d3a6", "job_name": "Weekly Demo Data Backup", "description": "No description available.", - "date_created": "2024-11-22 10:38:26.833000", - "date_modified": "2024-11-22 10:38:26.970000", + "date_created": "2024-11-22 12:47:39.113000", + "date_modified": "2024-11-22 12:47:39.267000", "step_id": "1", "step_name": "Set database to read only", "subsystem": "TSQL", @@ -2282,8 +2282,8 @@ "code": "CREATE PROCEDURE [Foo].[Proc.With.SpecialChar] @ID INT\nAS\n SELECT @ID AS ThatDB;\n", "input parameters": "['@ID']", "parameter @ID": "{'type': 'int'}", - "date_created": "2024-11-22 10:38:26.733000", - "date_modified": "2024-11-22 10:38:26.733000" + "date_created": "2024-11-22 12:47:38.993000", + "date_modified": "2024-11-22 12:47:38.993000" }, "externalUrl": "", "name": "DemoData.Foo.Proc.With.SpecialChar", @@ -2308,10 +2308,10 @@ "customProperties": { "procedure_depends_on": "{'DemoData.Foo.age_dist': 'USER_TABLE', 'DemoData.Foo.Items': 'USER_TABLE', 'DemoData.Foo.Persons': 'USER_TABLE', 'DemoData.Foo.SalesReason': 'USER_TABLE'}", "depending_on_procedure": "{}", - "code": "CREATE PROCEDURE [Foo].[NewProc]\n AS\n BEGIN\n --insert into items table from salesreason table\n insert into Foo.Items (ID, ItemName)\n SELECT TempID, Name\n FROM Foo.SalesReason;\n\n\n IF OBJECT_ID('Foo.age_dist', 'U') IS\n NULL\n\n BEGIN\n -- Create and populate if table doesn't exist\n SELECT Age, COUNT(*) as Count\n INTO Foo.age_dist\n FROM Foo.Persons\n GROUP BY Age\n END\n ELSE\n BEGIN\n \n-- Update existing table\n TRUNCATE TABLE Foo.age_dist;\n\n INSERT INTO Foo.age_dist (Age, Count)\n SELECT Age, COUNT(*) as Count\n FROM Foo.Persons\n GROUP BY Age\n END\n END\n", + "code": "CREATE PROCEDURE [Foo].[NewProc]\n AS\n BEGIN\n --insert into items table from salesreason table\n insert into Foo.Items (ID, ItemName)\n SELECT TempID, Name\n FROM Foo.SalesReason;\n\n\n IF OBJECT_ID('Foo.age_dist', 'U') IS NULL\n BEGIN\n -- Create and populate if table doesn't exist\n SELECT Age, COUNT(*) as Count\n INTO Foo.age_dist\n FROM Foo.Persons\n GROUP BY Age\n END\n ELSE\n BEGIN\n -- Update existing table\n TRUNCATE TABLE Foo.age_dist;\n\n INSERT INTO Foo.age_dist (Age, Count)\n SELECT Age, COUNT(*) as Count\n FROM Foo.Persons\n GROUP BY Age\n END\n\n SELECT ID, Age \n INTO #TEMPTABLE \n FROM NewData.FooNew.PersonsNew;\n \n UPDATE DemoData.Foo.Persons\n SET Age = t.Age\n FROM DemoData.Foo.Persons p\n JOIN #TEMPTABLE t ON p.ID = t.ID;\n\n END\n", "input parameters": "[]", - "date_created": "2024-11-22 10:38:26.737000", - "date_modified": "2024-11-22 10:38:26.737000" + "date_created": "2024-11-22 12:47:38.997000", + "date_modified": "2024-11-22 12:47:38.997000" }, "externalUrl": "", "name": "DemoData.Foo.NewProc", diff --git a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_with_lower_case_urn.json b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_with_lower_case_urn.json index 6b2a6517ed823..163607c641c0a 100644 --- a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_with_lower_case_urn.json +++ b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_with_lower_case_urn.json @@ -113,11 +113,11 @@ "aspect": { "json": { "customProperties": { - "job_id": "7b9e639a-f3e9-4921-9841-c046034c08c7", + "job_id": "400f2a31-7ec0-4feb-9079-9d921d18d3a6", "job_name": "Weekly Demo Data Backup", "description": "No description available.", - "date_created": "2024-11-22 09:34:52.980000", - "date_modified": "2024-11-22 09:34:53.717000", + "date_created": "2024-11-22 12:47:39.113000", + "date_modified": "2024-11-22 12:47:39.267000", "step_id": "1", "step_name": "Set database to read only", "subsystem": "TSQL", @@ -2282,8 +2282,8 @@ "code": "CREATE PROCEDURE [Foo].[Proc.With.SpecialChar] @ID INT\nAS\n SELECT @ID AS ThatDB;\n", "input parameters": "['@ID']", "parameter @ID": "{'type': 'int'}", - "date_created": "2024-11-22 09:34:52.597000", - "date_modified": "2024-11-22 09:34:52.597000" + "date_created": "2024-11-22 12:47:38.993000", + "date_modified": "2024-11-22 12:47:38.993000" }, "externalUrl": "", "name": "DemoData.Foo.Proc.With.SpecialChar", @@ -2308,10 +2308,10 @@ "customProperties": { "procedure_depends_on": "{'DemoData.Foo.age_dist': 'USER_TABLE', 'DemoData.Foo.Items': 'USER_TABLE', 'DemoData.Foo.Persons': 'USER_TABLE', 'DemoData.Foo.SalesReason': 'USER_TABLE'}", "depending_on_procedure": "{}", - "code": "CREATE PROCEDURE [Foo].[NewProc]\n AS\n BEGIN\n --insert into items table from salesreason table\n insert into Foo.Items (ID, ItemName)\n SELECT TempID, Name\n FROM Foo.SalesReason;\n\n\n IF OBJECT_ID('Foo.age_dist', 'U') IS\n NULL\n\n BEGIN\n -- Create and populate if table doesn't exist\n SELECT Age, COUNT(*) as Count\n INTO Foo.age_dist\n FROM Foo.Persons\n GROUP BY Age\n END\n ELSE\n BEGIN\n \n-- Update existing table\n TRUNCATE TABLE Foo.age_dist;\n\n INSERT INTO Foo.age_dist (Age, Count)\n SELECT Age, COUNT(*) as Count\n FROM Foo.Persons\n GROUP BY Age\n END\n END\n", + "code": "CREATE PROCEDURE [Foo].[NewProc]\n AS\n BEGIN\n --insert into items table from salesreason table\n insert into Foo.Items (ID, ItemName)\n SELECT TempID, Name\n FROM Foo.SalesReason;\n\n\n IF OBJECT_ID('Foo.age_dist', 'U') IS NULL\n BEGIN\n -- Create and populate if table doesn't exist\n SELECT Age, COUNT(*) as Count\n INTO Foo.age_dist\n FROM Foo.Persons\n GROUP BY Age\n END\n ELSE\n BEGIN\n -- Update existing table\n TRUNCATE TABLE Foo.age_dist;\n\n INSERT INTO Foo.age_dist (Age, Count)\n SELECT Age, COUNT(*) as Count\n FROM Foo.Persons\n GROUP BY Age\n END\n\n SELECT ID, Age \n INTO #TEMPTABLE \n FROM NewData.FooNew.PersonsNew;\n \n UPDATE DemoData.Foo.Persons\n SET Age = t.Age\n FROM DemoData.Foo.Persons p\n JOIN #TEMPTABLE t ON p.ID = t.ID;\n\n END\n", "input parameters": "[]", - "date_created": "2024-11-22 09:34:52.610000", - "date_modified": "2024-11-22 09:34:52.610000" + "date_created": "2024-11-22 12:47:38.997000", + "date_modified": "2024-11-22 12:47:38.997000" }, "externalUrl": "", "name": "DemoData.Foo.NewProc", @@ -5010,11 +5010,13 @@ "json": { "inputDatasets": [ "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD)", - "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.salesreason,PROD)" + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.salesreason,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.personsnew,PROD)" ], "outputDatasets": [ "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.age_dist,PROD)", - "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD)" + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD)" ], "fineGrainedLineages": [ { @@ -5049,6 +5051,17 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD),name)" ], "confidenceScore": 0.9 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.personsnew,PROD),Age)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD),Age)" + ], + "confidenceScore": 0.35 } ] } diff --git a/metadata-ingestion/tests/integration/sql_server/golden_files/procedures/DemoData.Foo.NewProc.json b/metadata-ingestion/tests/integration/sql_server/golden_files/procedures/DemoData.Foo.NewProc.json index 346a1e49c838b..609e3a6f42945 100644 --- a/metadata-ingestion/tests/integration/sql_server/golden_files/procedures/DemoData.Foo.NewProc.json +++ b/metadata-ingestion/tests/integration/sql_server/golden_files/procedures/DemoData.Foo.NewProc.json @@ -8,11 +8,13 @@ "json": { "inputDatasets": [ "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD)", - "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.salesreason,PROD)" + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.salesreason,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.personsnew,PROD)" ], "outputDatasets": [ "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.age_dist,PROD)", - "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD)" + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD)" ], "fineGrainedLineages": [ { diff --git a/metadata-ingestion/tests/integration/sql_server/golden_files/procedures/demodata.foo.proc2.json b/metadata-ingestion/tests/integration/sql_server/golden_files/procedures/demodata.foo.proc2.json index 49d5ac94cd23e..8ebd1c065ebf9 100644 --- a/metadata-ingestion/tests/integration/sql_server/golden_files/procedures/demodata.foo.proc2.json +++ b/metadata-ingestion/tests/integration/sql_server/golden_files/procedures/demodata.foo.proc2.json @@ -8,11 +8,13 @@ "json": { "inputDatasets": [ "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD)", - "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.salesreason,PROD)" + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.salesreason,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:mssql,newdata.foonew.personsnew,PROD)" ], "outputDatasets": [ "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.age_dist,PROD)", - "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD)" + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.items,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:mssql,demodata.foo.persons,PROD)" ], "fineGrainedLineages": [ { diff --git a/metadata-ingestion/tests/integration/sql_server/procedures/DemoData.Foo.NewProc.sql b/metadata-ingestion/tests/integration/sql_server/procedures/DemoData.Foo.NewProc.sql index f8ba7c7d068a9..52a8d1327653b 100644 --- a/metadata-ingestion/tests/integration/sql_server/procedures/DemoData.Foo.NewProc.sql +++ b/metadata-ingestion/tests/integration/sql_server/procedures/DemoData.Foo.NewProc.sql @@ -26,4 +26,12 @@ CREATE PROCEDURE [Foo].[NewProc] FROM Foo.Persons GROUP BY Age END + + SELECT * INTO #TempTable FROM NewData.FooNew.PersonsNew + + UPDATE DemoData.Foo.Persons + SET Age = t.Age + FROM DemoData.Foo.Persons p + JOIN #TempTable t ON p.ID = t.ID + END \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/sql_server/procedures/demodata.foo.proc2.sql b/metadata-ingestion/tests/integration/sql_server/procedures/demodata.foo.proc2.sql index d11caff9f0aa9..69194a8d2c546 100644 --- a/metadata-ingestion/tests/integration/sql_server/procedures/demodata.foo.proc2.sql +++ b/metadata-ingestion/tests/integration/sql_server/procedures/demodata.foo.proc2.sql @@ -26,4 +26,11 @@ CREATE PROCEDURE [foo].[proc2] FROM foo.persons GROUP BY age END + + SELECT * INTO #temptable FROM newdata.foonew.personsnew + + UPDATE demodata.foo.persons + SET age = t.age + FROM demodata.foo.persons p + JOIN #temptable t ON p.ID = t.ID END \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/sql_server/setup/setup.sql b/metadata-ingestion/tests/integration/sql_server/setup/setup.sql index 625d0c2355742..30497a3427bea 100644 --- a/metadata-ingestion/tests/integration/sql_server/setup/setup.sql +++ b/metadata-ingestion/tests/integration/sql_server/setup/setup.sql @@ -74,7 +74,6 @@ CREATE PROCEDURE [Foo].[NewProc] IF OBJECT_ID('Foo.age_dist', 'U') IS NULL - BEGIN -- Create and populate if table doesn't exist SELECT Age, COUNT(*) as Count @@ -92,6 +91,16 @@ CREATE PROCEDURE [Foo].[NewProc] FROM Foo.Persons GROUP BY Age END + + SELECT ID, Age + INTO #TEMPTABLE + FROM NewData.FooNew.PersonsNew; + + UPDATE DemoData.Foo.Persons + SET Age = t.Age + FROM DemoData.Foo.Persons p + JOIN #TEMPTABLE t ON p.ID = t.ID; + END GO diff --git a/metadata-ingestion/tests/integration/sql_server/test_sql_server.py b/metadata-ingestion/tests/integration/sql_server/test_sql_server.py index 67897deeedff3..b969f77b4c3c1 100644 --- a/metadata-ingestion/tests/integration/sql_server/test_sql_server.py +++ b/metadata-ingestion/tests/integration/sql_server/test_sql_server.py @@ -101,6 +101,7 @@ def test_stored_procedure_lineage( schema_resolver=schema_resolver, procedure=procedure, procedure_job_urn=data_job_urn, + is_temp_table=lambda name: "temp" in name.lower(), ) ) mce_helpers.check_goldens_stream( From 8449f01732d028f11b9b016b28993f3eb0cd82ec Mon Sep 17 00:00:00 2001 From: Mayuri N Date: Fri, 22 Nov 2024 18:30:10 +0530 Subject: [PATCH 13/13] revert test changes --- .../golden_mces_mssql_no_db_to_file.json | 16 ++++++++-------- .../golden_mces_mssql_no_db_with_filter.json | 16 ++++++++-------- .../golden_files/golden_mces_mssql_to_file.json | 16 ++++++++-------- .../golden_mces_mssql_with_lower_case_urn.json | 16 ++++++++-------- .../tests/integration/sql_server/setup/setup.sql | 6 ++---- 5 files changed, 34 insertions(+), 36 deletions(-) diff --git a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_to_file.json b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_to_file.json index f010cbd0dc7a1..54821347fd28b 100644 --- a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_to_file.json +++ b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_to_file.json @@ -113,11 +113,11 @@ "aspect": { "json": { "customProperties": { - "job_id": "400f2a31-7ec0-4feb-9079-9d921d18d3a6", + "job_id": "4130c37d-146c-43da-a671-dd9a413a44b3", "job_name": "Weekly Demo Data Backup", "description": "No description available.", - "date_created": "2024-11-22 12:47:39.113000", - "date_modified": "2024-11-22 12:47:39.267000", + "date_created": "2024-11-22 12:58:03.260000", + "date_modified": "2024-11-22 12:58:03.440000", "step_id": "1", "step_name": "Set database to read only", "subsystem": "TSQL", @@ -2282,8 +2282,8 @@ "code": "CREATE PROCEDURE [Foo].[Proc.With.SpecialChar] @ID INT\nAS\n SELECT @ID AS ThatDB;\n", "input parameters": "['@ID']", "parameter @ID": "{'type': 'int'}", - "date_created": "2024-11-22 12:47:38.993000", - "date_modified": "2024-11-22 12:47:38.993000" + "date_created": "2024-11-22 12:58:03.137000", + "date_modified": "2024-11-22 12:58:03.137000" }, "externalUrl": "", "name": "DemoData.Foo.Proc.With.SpecialChar", @@ -2308,10 +2308,10 @@ "customProperties": { "procedure_depends_on": "{'DemoData.Foo.age_dist': 'USER_TABLE', 'DemoData.Foo.Items': 'USER_TABLE', 'DemoData.Foo.Persons': 'USER_TABLE', 'DemoData.Foo.SalesReason': 'USER_TABLE'}", "depending_on_procedure": "{}", - "code": "CREATE PROCEDURE [Foo].[NewProc]\n AS\n BEGIN\n --insert into items table from salesreason table\n insert into Foo.Items (ID, ItemName)\n SELECT TempID, Name\n FROM Foo.SalesReason;\n\n\n IF OBJECT_ID('Foo.age_dist', 'U') IS NULL\n BEGIN\n -- Create and populate if table doesn't exist\n SELECT Age, COUNT(*) as Count\n INTO Foo.age_dist\n FROM Foo.Persons\n GROUP BY Age\n END\n ELSE\n BEGIN\n -- Update existing table\n TRUNCATE TABLE Foo.age_dist;\n\n INSERT INTO Foo.age_dist (Age, Count)\n SELECT Age, COUNT(*) as Count\n FROM Foo.Persons\n GROUP BY Age\n END\n\n SELECT ID, Age \n INTO #TEMPTABLE \n FROM NewData.FooNew.PersonsNew;\n \n UPDATE DemoData.Foo.Persons\n SET Age = t.Age\n FROM DemoData.Foo.Persons p\n JOIN #TEMPTABLE t ON p.ID = t.ID;\n\n END\n", + "code": "CREATE PROCEDURE [Foo].[NewProc]\n AS\n BEGIN\n --insert into items table from salesreason table\n insert into Foo.Items (ID, ItemName)\n SELECT TempID, Name\n FROM Foo.SalesReason;\n\n\n IF OBJECT_ID('Foo.age_dist', 'U') IS NULL\n BEGIN\n -- Create and populate if table doesn't exist\n SELECT Age, COUNT(*) as Count\n INTO Foo.age_dist\n FROM Foo.Persons\n GROUP BY Age\n END\n ELSE\n BEGIN\n -- Update existing table\n TRUNCATE TABLE Foo.age_dist;\n\n INSERT INTO Foo.age_dist (Age, Count)\n SELECT Age, COUNT(*) as Count\n FROM Foo.Persons\n GROUP BY Age\n END\n\n SELECT ID, Age INTO #TEMPTABLE FROM NewData.FooNew.PersonsNew\n \n UPDATE DemoData.Foo.Persons\n SET Age = t.Age\n FROM DemoData.Foo.Persons p\n JOIN #TEMPTABLE t ON p.ID = t.ID\n\n END\n", "input parameters": "[]", - "date_created": "2024-11-22 12:47:38.997000", - "date_modified": "2024-11-22 12:47:38.997000" + "date_created": "2024-11-22 12:58:03.140000", + "date_modified": "2024-11-22 12:58:03.140000" }, "externalUrl": "", "name": "DemoData.Foo.NewProc", diff --git a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_with_filter.json b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_with_filter.json index 4417cafabde31..3836e587ef8cf 100644 --- a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_with_filter.json +++ b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_no_db_with_filter.json @@ -113,11 +113,11 @@ "aspect": { "json": { "customProperties": { - "job_id": "400f2a31-7ec0-4feb-9079-9d921d18d3a6", + "job_id": "4130c37d-146c-43da-a671-dd9a413a44b3", "job_name": "Weekly Demo Data Backup", "description": "No description available.", - "date_created": "2024-11-22 12:47:39.113000", - "date_modified": "2024-11-22 12:47:39.267000", + "date_created": "2024-11-22 12:58:03.260000", + "date_modified": "2024-11-22 12:58:03.440000", "step_id": "1", "step_name": "Set database to read only", "subsystem": "TSQL", @@ -2282,8 +2282,8 @@ "code": "CREATE PROCEDURE [Foo].[Proc.With.SpecialChar] @ID INT\nAS\n SELECT @ID AS ThatDB;\n", "input parameters": "['@ID']", "parameter @ID": "{'type': 'int'}", - "date_created": "2024-11-22 12:47:38.993000", - "date_modified": "2024-11-22 12:47:38.993000" + "date_created": "2024-11-22 12:58:03.137000", + "date_modified": "2024-11-22 12:58:03.137000" }, "externalUrl": "", "name": "DemoData.Foo.Proc.With.SpecialChar", @@ -2308,10 +2308,10 @@ "customProperties": { "procedure_depends_on": "{'DemoData.Foo.age_dist': 'USER_TABLE', 'DemoData.Foo.Items': 'USER_TABLE', 'DemoData.Foo.Persons': 'USER_TABLE', 'DemoData.Foo.SalesReason': 'USER_TABLE'}", "depending_on_procedure": "{}", - "code": "CREATE PROCEDURE [Foo].[NewProc]\n AS\n BEGIN\n --insert into items table from salesreason table\n insert into Foo.Items (ID, ItemName)\n SELECT TempID, Name\n FROM Foo.SalesReason;\n\n\n IF OBJECT_ID('Foo.age_dist', 'U') IS NULL\n BEGIN\n -- Create and populate if table doesn't exist\n SELECT Age, COUNT(*) as Count\n INTO Foo.age_dist\n FROM Foo.Persons\n GROUP BY Age\n END\n ELSE\n BEGIN\n -- Update existing table\n TRUNCATE TABLE Foo.age_dist;\n\n INSERT INTO Foo.age_dist (Age, Count)\n SELECT Age, COUNT(*) as Count\n FROM Foo.Persons\n GROUP BY Age\n END\n\n SELECT ID, Age \n INTO #TEMPTABLE \n FROM NewData.FooNew.PersonsNew;\n \n UPDATE DemoData.Foo.Persons\n SET Age = t.Age\n FROM DemoData.Foo.Persons p\n JOIN #TEMPTABLE t ON p.ID = t.ID;\n\n END\n", + "code": "CREATE PROCEDURE [Foo].[NewProc]\n AS\n BEGIN\n --insert into items table from salesreason table\n insert into Foo.Items (ID, ItemName)\n SELECT TempID, Name\n FROM Foo.SalesReason;\n\n\n IF OBJECT_ID('Foo.age_dist', 'U') IS NULL\n BEGIN\n -- Create and populate if table doesn't exist\n SELECT Age, COUNT(*) as Count\n INTO Foo.age_dist\n FROM Foo.Persons\n GROUP BY Age\n END\n ELSE\n BEGIN\n -- Update existing table\n TRUNCATE TABLE Foo.age_dist;\n\n INSERT INTO Foo.age_dist (Age, Count)\n SELECT Age, COUNT(*) as Count\n FROM Foo.Persons\n GROUP BY Age\n END\n\n SELECT ID, Age INTO #TEMPTABLE FROM NewData.FooNew.PersonsNew\n \n UPDATE DemoData.Foo.Persons\n SET Age = t.Age\n FROM DemoData.Foo.Persons p\n JOIN #TEMPTABLE t ON p.ID = t.ID\n\n END\n", "input parameters": "[]", - "date_created": "2024-11-22 12:47:38.997000", - "date_modified": "2024-11-22 12:47:38.997000" + "date_created": "2024-11-22 12:58:03.140000", + "date_modified": "2024-11-22 12:58:03.140000" }, "externalUrl": "", "name": "DemoData.Foo.NewProc", diff --git a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_to_file.json b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_to_file.json index 4417cafabde31..3836e587ef8cf 100644 --- a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_to_file.json +++ b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_to_file.json @@ -113,11 +113,11 @@ "aspect": { "json": { "customProperties": { - "job_id": "400f2a31-7ec0-4feb-9079-9d921d18d3a6", + "job_id": "4130c37d-146c-43da-a671-dd9a413a44b3", "job_name": "Weekly Demo Data Backup", "description": "No description available.", - "date_created": "2024-11-22 12:47:39.113000", - "date_modified": "2024-11-22 12:47:39.267000", + "date_created": "2024-11-22 12:58:03.260000", + "date_modified": "2024-11-22 12:58:03.440000", "step_id": "1", "step_name": "Set database to read only", "subsystem": "TSQL", @@ -2282,8 +2282,8 @@ "code": "CREATE PROCEDURE [Foo].[Proc.With.SpecialChar] @ID INT\nAS\n SELECT @ID AS ThatDB;\n", "input parameters": "['@ID']", "parameter @ID": "{'type': 'int'}", - "date_created": "2024-11-22 12:47:38.993000", - "date_modified": "2024-11-22 12:47:38.993000" + "date_created": "2024-11-22 12:58:03.137000", + "date_modified": "2024-11-22 12:58:03.137000" }, "externalUrl": "", "name": "DemoData.Foo.Proc.With.SpecialChar", @@ -2308,10 +2308,10 @@ "customProperties": { "procedure_depends_on": "{'DemoData.Foo.age_dist': 'USER_TABLE', 'DemoData.Foo.Items': 'USER_TABLE', 'DemoData.Foo.Persons': 'USER_TABLE', 'DemoData.Foo.SalesReason': 'USER_TABLE'}", "depending_on_procedure": "{}", - "code": "CREATE PROCEDURE [Foo].[NewProc]\n AS\n BEGIN\n --insert into items table from salesreason table\n insert into Foo.Items (ID, ItemName)\n SELECT TempID, Name\n FROM Foo.SalesReason;\n\n\n IF OBJECT_ID('Foo.age_dist', 'U') IS NULL\n BEGIN\n -- Create and populate if table doesn't exist\n SELECT Age, COUNT(*) as Count\n INTO Foo.age_dist\n FROM Foo.Persons\n GROUP BY Age\n END\n ELSE\n BEGIN\n -- Update existing table\n TRUNCATE TABLE Foo.age_dist;\n\n INSERT INTO Foo.age_dist (Age, Count)\n SELECT Age, COUNT(*) as Count\n FROM Foo.Persons\n GROUP BY Age\n END\n\n SELECT ID, Age \n INTO #TEMPTABLE \n FROM NewData.FooNew.PersonsNew;\n \n UPDATE DemoData.Foo.Persons\n SET Age = t.Age\n FROM DemoData.Foo.Persons p\n JOIN #TEMPTABLE t ON p.ID = t.ID;\n\n END\n", + "code": "CREATE PROCEDURE [Foo].[NewProc]\n AS\n BEGIN\n --insert into items table from salesreason table\n insert into Foo.Items (ID, ItemName)\n SELECT TempID, Name\n FROM Foo.SalesReason;\n\n\n IF OBJECT_ID('Foo.age_dist', 'U') IS NULL\n BEGIN\n -- Create and populate if table doesn't exist\n SELECT Age, COUNT(*) as Count\n INTO Foo.age_dist\n FROM Foo.Persons\n GROUP BY Age\n END\n ELSE\n BEGIN\n -- Update existing table\n TRUNCATE TABLE Foo.age_dist;\n\n INSERT INTO Foo.age_dist (Age, Count)\n SELECT Age, COUNT(*) as Count\n FROM Foo.Persons\n GROUP BY Age\n END\n\n SELECT ID, Age INTO #TEMPTABLE FROM NewData.FooNew.PersonsNew\n \n UPDATE DemoData.Foo.Persons\n SET Age = t.Age\n FROM DemoData.Foo.Persons p\n JOIN #TEMPTABLE t ON p.ID = t.ID\n\n END\n", "input parameters": "[]", - "date_created": "2024-11-22 12:47:38.997000", - "date_modified": "2024-11-22 12:47:38.997000" + "date_created": "2024-11-22 12:58:03.140000", + "date_modified": "2024-11-22 12:58:03.140000" }, "externalUrl": "", "name": "DemoData.Foo.NewProc", diff --git a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_with_lower_case_urn.json b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_with_lower_case_urn.json index 163607c641c0a..ebcadcc11dcbf 100644 --- a/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_with_lower_case_urn.json +++ b/metadata-ingestion/tests/integration/sql_server/golden_files/golden_mces_mssql_with_lower_case_urn.json @@ -113,11 +113,11 @@ "aspect": { "json": { "customProperties": { - "job_id": "400f2a31-7ec0-4feb-9079-9d921d18d3a6", + "job_id": "4130c37d-146c-43da-a671-dd9a413a44b3", "job_name": "Weekly Demo Data Backup", "description": "No description available.", - "date_created": "2024-11-22 12:47:39.113000", - "date_modified": "2024-11-22 12:47:39.267000", + "date_created": "2024-11-22 12:58:03.260000", + "date_modified": "2024-11-22 12:58:03.440000", "step_id": "1", "step_name": "Set database to read only", "subsystem": "TSQL", @@ -2282,8 +2282,8 @@ "code": "CREATE PROCEDURE [Foo].[Proc.With.SpecialChar] @ID INT\nAS\n SELECT @ID AS ThatDB;\n", "input parameters": "['@ID']", "parameter @ID": "{'type': 'int'}", - "date_created": "2024-11-22 12:47:38.993000", - "date_modified": "2024-11-22 12:47:38.993000" + "date_created": "2024-11-22 12:58:03.137000", + "date_modified": "2024-11-22 12:58:03.137000" }, "externalUrl": "", "name": "DemoData.Foo.Proc.With.SpecialChar", @@ -2308,10 +2308,10 @@ "customProperties": { "procedure_depends_on": "{'DemoData.Foo.age_dist': 'USER_TABLE', 'DemoData.Foo.Items': 'USER_TABLE', 'DemoData.Foo.Persons': 'USER_TABLE', 'DemoData.Foo.SalesReason': 'USER_TABLE'}", "depending_on_procedure": "{}", - "code": "CREATE PROCEDURE [Foo].[NewProc]\n AS\n BEGIN\n --insert into items table from salesreason table\n insert into Foo.Items (ID, ItemName)\n SELECT TempID, Name\n FROM Foo.SalesReason;\n\n\n IF OBJECT_ID('Foo.age_dist', 'U') IS NULL\n BEGIN\n -- Create and populate if table doesn't exist\n SELECT Age, COUNT(*) as Count\n INTO Foo.age_dist\n FROM Foo.Persons\n GROUP BY Age\n END\n ELSE\n BEGIN\n -- Update existing table\n TRUNCATE TABLE Foo.age_dist;\n\n INSERT INTO Foo.age_dist (Age, Count)\n SELECT Age, COUNT(*) as Count\n FROM Foo.Persons\n GROUP BY Age\n END\n\n SELECT ID, Age \n INTO #TEMPTABLE \n FROM NewData.FooNew.PersonsNew;\n \n UPDATE DemoData.Foo.Persons\n SET Age = t.Age\n FROM DemoData.Foo.Persons p\n JOIN #TEMPTABLE t ON p.ID = t.ID;\n\n END\n", + "code": "CREATE PROCEDURE [Foo].[NewProc]\n AS\n BEGIN\n --insert into items table from salesreason table\n insert into Foo.Items (ID, ItemName)\n SELECT TempID, Name\n FROM Foo.SalesReason;\n\n\n IF OBJECT_ID('Foo.age_dist', 'U') IS NULL\n BEGIN\n -- Create and populate if table doesn't exist\n SELECT Age, COUNT(*) as Count\n INTO Foo.age_dist\n FROM Foo.Persons\n GROUP BY Age\n END\n ELSE\n BEGIN\n -- Update existing table\n TRUNCATE TABLE Foo.age_dist;\n\n INSERT INTO Foo.age_dist (Age, Count)\n SELECT Age, COUNT(*) as Count\n FROM Foo.Persons\n GROUP BY Age\n END\n\n SELECT ID, Age INTO #TEMPTABLE FROM NewData.FooNew.PersonsNew\n \n UPDATE DemoData.Foo.Persons\n SET Age = t.Age\n FROM DemoData.Foo.Persons p\n JOIN #TEMPTABLE t ON p.ID = t.ID\n\n END\n", "input parameters": "[]", - "date_created": "2024-11-22 12:47:38.997000", - "date_modified": "2024-11-22 12:47:38.997000" + "date_created": "2024-11-22 12:58:03.140000", + "date_modified": "2024-11-22 12:58:03.140000" }, "externalUrl": "", "name": "DemoData.Foo.NewProc", diff --git a/metadata-ingestion/tests/integration/sql_server/setup/setup.sql b/metadata-ingestion/tests/integration/sql_server/setup/setup.sql index 30497a3427bea..0c3c7ee2fd29e 100644 --- a/metadata-ingestion/tests/integration/sql_server/setup/setup.sql +++ b/metadata-ingestion/tests/integration/sql_server/setup/setup.sql @@ -92,14 +92,12 @@ CREATE PROCEDURE [Foo].[NewProc] GROUP BY Age END - SELECT ID, Age - INTO #TEMPTABLE - FROM NewData.FooNew.PersonsNew; + SELECT ID, Age INTO #TEMPTABLE FROM NewData.FooNew.PersonsNew UPDATE DemoData.Foo.Persons SET Age = t.Age FROM DemoData.Foo.Persons p - JOIN #TEMPTABLE t ON p.ID = t.ID; + JOIN #TEMPTABLE t ON p.ID = t.ID END GO