diff --git a/datahub-web-react/src/app/ingest/source/builder/RecipeForm/unity_catalog.tsx b/datahub-web-react/src/app/ingest/source/builder/RecipeForm/unity_catalog.tsx index 3b5565ab1abd43..6df3a077eb95f7 100644 --- a/datahub-web-react/src/app/ingest/source/builder/RecipeForm/unity_catalog.tsx +++ b/datahub-web-react/src/app/ingest/source/builder/RecipeForm/unity_catalog.tsx @@ -94,7 +94,7 @@ export const UNITY_CATALOG_ALLOW: RecipeField = { label: 'Allow Patterns', tooltip: 'Only include specific Catalogs by providing the name of a Catalog, or a Regular Expression (REGEX) to include specific Catalogs. If not provided, all Catalogs will be included.', - placeholder: 'my_catalog', + placeholder: 'metastore.my_catalog', type: FieldType.LIST, buttonLabel: 'Add pattern', fieldPath: catalogAllowFieldPath, @@ -106,11 +106,11 @@ export const UNITY_CATALOG_ALLOW: RecipeField = { const catalogDenyFieldPath = 'source.config.catalog_pattern.deny'; export const UNITY_CATALOG_DENY: RecipeField = { - name: 'catalog_pattern.allow', - label: 'Allow Patterns', + name: 'catalog_pattern.deny', + label: 'Deny Patterns', tooltip: 'Exclude specific Catalogs by providing the name of a Catalog, or a Regular Expression (REGEX) to exclude specific Catalogs. If not provided, all Catalogs will be included. Deny patterns always take precedence over Allow patterns.', - placeholder: 'my_catalog', + placeholder: 'metastore.my_catalog', type: FieldType.LIST, buttonLabel: 'Add pattern', fieldPath: catalogDenyFieldPath, @@ -120,9 +120,9 @@ export const UNITY_CATALOG_DENY: RecipeField = { setListValuesOnRecipe(recipe, values, catalogDenyFieldPath), }; -const tableAllowFieldPath = 'source.config.metastore_id_pattern.allow'; +const tableAllowFieldPath = 'source.config.table_pattern.allow'; export const UNITY_TABLE_ALLOW: RecipeField = { - name: 'catalog_pattern.allow', + name: 'table_pattern.allow', label: 'Allow Patterns', tooltip: 'Only include specific Tables by providing the fully-qualified name of a Table, or a Regular Expression (REGEX) to include specific Tables. If not provided, all Tables will be included.', @@ -136,10 +136,10 @@ export const UNITY_TABLE_ALLOW: RecipeField = { setListValuesOnRecipe(recipe, values, tableAllowFieldPath), }; -const tableDenyFieldPath = 'source.config.metastore_id_pattern.deny'; +const tableDenyFieldPath = 'source.config.table_pattern.deny'; export const UNITY_TABLE_DENY: RecipeField = { - name: 'catalog_pattern.allow', - label: 'Allow Patterns', + name: 'table_pattern.deny', + label: 'Deny Patterns', tooltip: 'Exclude specific Tables by providing the fully-qualified name of a Table, or a Regular Expression (REGEX) to exclude specific Tables. If not provided, all Tables will be included. Deny patterns always take precedence over Allow patterns.', placeholder: 'catalog.schema.table', diff --git a/docs/how/updating-datahub.md b/docs/how/updating-datahub.md index 77eb73b5d5d033..ddc001f7e3b6d2 100644 --- a/docs/how/updating-datahub.md +++ b/docs/how/updating-datahub.md @@ -6,18 +6,27 @@ This file documents any backwards-incompatible changes in DataHub and assists pe ### Breaking Changes +- #7900: The `catalog_pattern` and `schema_pattern` options of the Unity Catalog source now match against the fully qualified name of the catalog/schema instead of just the name. Unless you're using regex `^` in your patterns, this should not affect you. + +### Potential Downtime + +### Deprecations + +### Other notable Changes + +## 0.10.2 + +### Breaking Changes + - #7016 Add `add_database_name_to_urn` flag to Oracle source which ensure that Dataset urns have the DB name as a prefix to prevent collision (.e.g. {database}.{schema}.{table}). ONLY breaking if you set this flag to true, otherwise behavior remains the same. - The Airflow plugin no longer includes the DataHub Kafka emitter by default. Use `pip install acryl-datahub-airflow-plugin[datahub-kafka]` for Kafka support. - The Airflow lineage backend no longer includes the DataHub Kafka emitter by default. Use `pip install acryl-datahub[airflow,datahub-kafka]` for Kafka support. - Java SDK PatchBuilders have been modified in a backwards incompatible way to align more with the Python SDK and support more use cases. Any application utilizing the Java SDK for patch building may be affected on upgrading this dependency. -### Potential Downtime - ### Deprecations -- The docker image and script for updating from Elasticsearch 6 to 7 is no longer being maintained and will be removed from the `/contrib` section of -the repository. Please refer to older releases if needed. -### Other notable Changes +- The docker image and script for updating from Elasticsearch 6 to 7 is no longer being maintained and will be removed from the `/contrib` section of + the repository. Please refer to older releases if needed. ## 0.10.0 diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/config.py b/metadata-ingestion/src/datahub/ingestion/source/unity/config.py index 08fe889f09af6e..fb106cba86c628 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/unity/config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/unity/config.py @@ -29,17 +29,17 @@ class UnityCatalogSourceConfig(StatefulIngestionConfigBase, DatasetSourceConfigM catalog_pattern: AllowDenyPattern = Field( default=AllowDenyPattern.allow_all(), - description="Regex patterns for catalogs to filter in ingestion. Specify regex to match the catalog name", + description="Regex patterns for catalogs to filter in ingestion. Specify regex to match the full `metastore.catalog` name.", ) schema_pattern: AllowDenyPattern = Field( default=AllowDenyPattern.allow_all(), - description="Regex patterns for schemas to filter in ingestion. Specify regex to only match the schema name. e.g. to match all tables in schema analytics, use the regex 'analytics'", + description="Regex patterns for schemas to filter in ingestion. Specify regex to the full `metastore.catalog.schema` name. e.g. to match all tables in schema analytics, use the regex `^mymetastore\\.mycatalog\\.analytics$`.", ) table_pattern: AllowDenyPattern = Field( default=AllowDenyPattern.allow_all(), - description="Regex patterns for tables to filter in ingestion. Specify regex to match the entire table name in catalog.schema.table format. e.g. to match all tables starting with customer in Customer catalog and public schema, use the regex 'Customer.public.customer.*'", + description="Regex patterns for tables to filter in ingestion. Specify regex to match the entire table name in `catalog.schema.table` format. e.g. to match all tables starting with customer in Customer catalog and public schema, use the regex `Customer\\.public\\.customer.*`.", ) domain: Dict[str, AllowDenyPattern] = Field( default=dict(), diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/source.py b/metadata-ingestion/src/datahub/ingestion/source/unity/source.py index ff767d07dda347..8a91ff5359d75f 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/unity/source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/unity/source.py @@ -222,7 +222,7 @@ def process_catalogs( self, metastore: proxy.Metastore ) -> Iterable[MetadataWorkUnit]: for catalog in self.unity_catalog_api_proxy.catalogs(metastore=metastore): - if not self.config.catalog_pattern.allowed(catalog.name): + if not self.config.catalog_pattern.allowed(catalog.id): self.report.catalogs.dropped(catalog.id) continue @@ -233,7 +233,7 @@ def process_catalogs( def process_schemas(self, catalog: proxy.Catalog) -> Iterable[MetadataWorkUnit]: for schema in self.unity_catalog_api_proxy.schemas(catalog=catalog): - if not self.config.schema_pattern.allowed(schema.name): + if not self.config.schema_pattern.allowed(schema.id): self.report.schemas.dropped(schema.id) continue