Skip to content

Commit

Permalink
fix(ingestion/tableau): restructure the tableau graphql datasource qu…
Browse files Browse the repository at this point in the history
…ery (#11230)

Co-authored-by: Harshal Sheth <[email protected]>
  • Loading branch information
sid-acryl and hsheth2 authored Sep 9, 2024
1 parent 5467481 commit 3150d90
Show file tree
Hide file tree
Showing 20 changed files with 6,725 additions and 3,120 deletions.
2 changes: 1 addition & 1 deletion metadata-ingestion/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -722,7 +722,7 @@
"snowflake-summary = datahub.ingestion.source.snowflake.snowflake_summary:SnowflakeSummarySource",
"snowflake-queries = datahub.ingestion.source.snowflake.snowflake_queries:SnowflakeQueriesSource",
"superset = datahub.ingestion.source.superset:SupersetSource",
"tableau = datahub.ingestion.source.tableau:TableauSource",
"tableau = datahub.ingestion.source.tableau.tableau:TableauSource",
"openapi = datahub.ingestion.source.openapi:OpenApiSource",
"metabase = datahub.ingestion.source.metabase:MetabaseSource",
"teradata = datahub.ingestion.source.sql.teradata:TeradataSource",
Expand Down
Empty file.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

import datahub.emitter.mce_builder as builder
from datahub.configuration.common import ConfigModel
from datahub.ingestion.source import tableau_constant as c
from datahub.ingestion.source.tableau import tableau_constant as c
from datahub.metadata.com.linkedin.pegasus2avro.dataset import (
DatasetLineageType,
FineGrainedLineage,
Expand Down Expand Up @@ -223,19 +223,19 @@ class MetadataQueryException(Exception):
description
isHidden
folderName
upstreamFields {
name
datasource {
id
}
}
upstreamColumns {
name
table {
__typename
id
}
}
# upstreamFields {
# name
# datasource {
# id
# }
# }
# upstreamColumns {
# name
# table {
# __typename
# id
# }
# }
... on ColumnField {
dataCategory
role
Expand Down Expand Up @@ -336,6 +336,26 @@ class MetadataQueryException(Exception):
}
"""


datasource_upstream_fields_graphql_query = """
{
id
upstreamFields {
name
datasource {
id
}
}
upstreamColumns {
name
table {
__typename
id
}
}
}
"""

published_datasource_graphql_query = """
{
__typename
Expand Down Expand Up @@ -368,19 +388,19 @@ class MetadataQueryException(Exception):
description
isHidden
folderName
upstreamFields {
name
datasource {
id
}
}
upstreamColumns {
name
table {
__typename
id
}
}
# upstreamFields {
# name
# datasource {
# id
# }
# }
# upstreamColumns {
# name
# table {
# __typename
# id
# }
# }
... on ColumnField {
dataCategory
role
Expand Down Expand Up @@ -910,40 +930,46 @@ def make_filter(filter_dict: dict) -> str:
return filter


def query_metadata(
def query_metadata_cursor_based_pagination(
server: Server,
main_query: str,
connection_name: str,
first: int,
offset: int,
after: Optional[str],
qry_filter: str = "",
) -> dict:
query = """{{
{connection_name} (first:{first}, offset:{offset}, filter:{{{filter}}})
{{
nodes {main_query}
pageInfo {{
hasNextPage
endCursor
query = f"""
query GetItems(
$first: Int,
$after: String
) {{
{connection_name} ( first: $first, after: $after, filter:{{ {qry_filter} }})
{{
nodes {main_query}
pageInfo {{
hasNextPage
endCursor
}}
}}
totalCount
}}
}}""".format(
connection_name=connection_name,
first=first,
offset=offset,
filter=qry_filter,
main_query=main_query,
}}""" # {{ is to escape { character of f-string

result = server.metadata.query(
query=query,
variables={
"first": first,
"after": after,
},
)
return server.metadata.query(query)

return result


def get_filter_pages(query_filter: dict, page_size: int) -> List[dict]:
filter_pages = [query_filter]
# If this is primary id filter so we can use divide this query list into
# If this is primary id filter, so we can use divide this query list into
# multiple requests each with smaller filter list (of order page_size).
# It is observed in the past that if list of primary ids grow beyond
# a few ten thousands then tableau server responds with empty response
# It is observed in the past that if a list of primary ids grows beyond
# a few ten thousand, then tableau server responds with empty response
# causing below error:
# tableauserverclient.server.endpoint.exceptions.NonXMLResponseError: b''
if (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@
EXTRACT_LAST_INCREMENTAL_UPDATE_TIME = "extractLastIncrementalUpdateTime"
EXTRACT_LAST_UPDATE_TIME = "extractLastUpdateTime"
PUBLISHED_DATA_SOURCES_CONNECTION = "publishedDatasourcesConnection"
FIELDS_CONNECTION = "fieldsConnection"
DATA_SOURCE_FIELDS = "datasourceFields"
SHEETS_CONNECTION = "sheetsConnection"
CREATED_AT = "createdAt"
Expand Down
Loading

0 comments on commit 3150d90

Please sign in to comment.