Skip to content

Commit

Permalink
fix(ingest/metabase): Fix for query template expressions and invalid …
Browse files Browse the repository at this point in the history
…URNs for Text Cards (#10381)

Co-authored-by: Harshal Sheth <[email protected]>
  • Loading branch information
pulsar256 and hsheth2 authored May 24, 2024
1 parent c308a6f commit 1c1450e
Show file tree
Hide file tree
Showing 4 changed files with 219 additions and 50 deletions.
35 changes: 28 additions & 7 deletions metadata-ingestion/src/datahub/ingestion/source/metabase.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import json
import logging
import re
from dataclasses import dataclass
from datetime import datetime, timezone
from functools import lru_cache
Expand Down Expand Up @@ -309,9 +310,10 @@ def construct_dashboard_from_api_data(
chart_urns = []
cards_data = dashboard_details.get("dashcards", {})
for card_info in cards_data:
chart_urn = builder.make_chart_urn(
self.platform, card_info.get("card").get("id", "")
)
card_id = card_info.get("card").get("id", "")
if not card_id:
continue # most likely a virtual card without an id (text or heading), not relevant.
chart_urn = builder.make_chart_urn(self.platform, card_id)
chart_urns.append(chart_urn)

dashboard_info_class = DashboardInfoClass(
Expand Down Expand Up @@ -592,11 +594,12 @@ def get_datasource_urn(
)
]
else:
raw_query = (
raw_query_stripped = self.strip_template_expressions(
card_details.get("dataset_query", {}).get("native", {}).get("query", "")
)

result = create_lineage_sql_parsed_result(
query=raw_query,
query=raw_query_stripped,
default_db=database_name,
default_schema=database_schema or self.config.default_schema,
platform=platform,
Expand All @@ -606,17 +609,35 @@ def get_datasource_urn(
)
if result.debug_info.table_error:
logger.info(
f"Failed to parse lineage from query {raw_query}: "
f"Failed to parse lineage from query {raw_query_stripped}: "
f"{result.debug_info.table_error}"
)
self.report.report_warning(
key="metabase-query",
reason=f"Unable to retrieve lineage from query: {raw_query}",
reason=f"Unable to retrieve lineage from query: {raw_query_stripped}",
)
return result.in_tables

return None

@staticmethod
def strip_template_expressions(raw_query: str) -> str:
"""
Workarounds for metabase raw queries containing most commonly used template expressions:
- strip conditional expressions "[[ .... ]]"
- replace all {{ filter expressions }} with "1"
reference: https://www.metabase.com/docs/latest/questions/native-editor/sql-parameters
"""

# drop [[ WHERE {{FILTER}} ]]
query_patched = re.sub(r"\[\[.+?\]\]", r" ", raw_query)

# replace {{FILTER}} with 1
query_patched = re.sub(r"\{\{.+?\}\}", r"1", query_patched)
return query_patched

@lru_cache(maxsize=None)
def get_source_table_from_id(
self, table_id: Union[int, str]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,37 @@
"archived": false,
"collection_position": null,
"dashcards": [
{
"size_x": 24,
"dashboard_tab_id": null,
"series": [],
"action_id": null,
"collection_authority_level": null,
"card": {
"query_average_duration": null
},
"updated_at": "2024-05-18T18:22:19.900158Z",
"col": 0,
"id": 2,
"parameter_mappings": [],
"card_id": null,
"entity_id": "woqw4RKYx7rlOgqp-_UPc",
"visualization_settings": {
"dashcard.background": false,
"virtual_card": {
"name": null,
"dataset_query": {},
"display": "heading",
"visualization_settings": {},
"archived": false
},
"text": "This is a virtual card and should be ignored by the ingestion."
},
"size_y": 1,
"dashboard_id": 1,
"created_at": "2024-05-18T18:22:19.900158Z",
"row": 0
},
{
"size_x": 12,
"dashboard_tab_id": null,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,37 @@
"archived": false,
"collection_position": null,
"dashcards": [
{
"size_x": 24,
"dashboard_tab_id": null,
"series": [],
"action_id": null,
"collection_authority_level": null,
"card": {
"query_average_duration": null
},
"updated_at": "2024-05-18T18:22:19.900158Z",
"col": 0,
"id": 2,
"parameter_mappings": [],
"card_id": null,
"entity_id": "woqw4RKYx7rlOgqp-_UPc",
"visualization_settings": {
"dashcard.background": false,
"virtual_card": {
"name": null,
"dataset_query": {},
"display": "heading",
"visualization_settings": {},
"archived": false
},
"text": "This is a virtual card and should be ignored by the ingestion."
},
"size_y": 1,
"dashboard_id": 1,
"created_at": "2024-05-18T18:22:19.900158Z",
"row": 0
},
{
"size_x": 12,
"dashboard_tab_id": null,
Expand Down
Loading

0 comments on commit 1c1450e

Please sign in to comment.