Skip to content

Commit

Permalink
fix(ingest/unity): use fully qualified catalog/schema patterns (#7900)
Browse files Browse the repository at this point in the history
  • Loading branch information
hsheth2 authored May 2, 2023
1 parent 8a7aeac commit bf86235
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 19 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ export const UNITY_CATALOG_ALLOW: RecipeField = {
label: 'Allow Patterns',
tooltip:
'Only include specific Catalogs by providing the name of a Catalog, or a Regular Expression (REGEX) to include specific Catalogs. If not provided, all Catalogs will be included.',
placeholder: 'my_catalog',
placeholder: 'metastore.my_catalog',
type: FieldType.LIST,
buttonLabel: 'Add pattern',
fieldPath: catalogAllowFieldPath,
Expand All @@ -106,11 +106,11 @@ export const UNITY_CATALOG_ALLOW: RecipeField = {

const catalogDenyFieldPath = 'source.config.catalog_pattern.deny';
export const UNITY_CATALOG_DENY: RecipeField = {
name: 'catalog_pattern.allow',
label: 'Allow Patterns',
name: 'catalog_pattern.deny',
label: 'Deny Patterns',
tooltip:
'Exclude specific Catalogs by providing the name of a Catalog, or a Regular Expression (REGEX) to exclude specific Catalogs. If not provided, all Catalogs will be included. Deny patterns always take precedence over Allow patterns.',
placeholder: 'my_catalog',
placeholder: 'metastore.my_catalog',
type: FieldType.LIST,
buttonLabel: 'Add pattern',
fieldPath: catalogDenyFieldPath,
Expand All @@ -120,9 +120,9 @@ export const UNITY_CATALOG_DENY: RecipeField = {
setListValuesOnRecipe(recipe, values, catalogDenyFieldPath),
};

const tableAllowFieldPath = 'source.config.metastore_id_pattern.allow';
const tableAllowFieldPath = 'source.config.table_pattern.allow';
export const UNITY_TABLE_ALLOW: RecipeField = {
name: 'catalog_pattern.allow',
name: 'table_pattern.allow',
label: 'Allow Patterns',
tooltip:
'Only include specific Tables by providing the fully-qualified name of a Table, or a Regular Expression (REGEX) to include specific Tables. If not provided, all Tables will be included.',
Expand All @@ -136,10 +136,10 @@ export const UNITY_TABLE_ALLOW: RecipeField = {
setListValuesOnRecipe(recipe, values, tableAllowFieldPath),
};

const tableDenyFieldPath = 'source.config.metastore_id_pattern.deny';
const tableDenyFieldPath = 'source.config.table_pattern.deny';
export const UNITY_TABLE_DENY: RecipeField = {
name: 'catalog_pattern.allow',
label: 'Allow Patterns',
name: 'table_pattern.deny',
label: 'Deny Patterns',
tooltip:
'Exclude specific Tables by providing the fully-qualified name of a Table, or a Regular Expression (REGEX) to exclude specific Tables. If not provided, all Tables will be included. Deny patterns always take precedence over Allow patterns.',
placeholder: 'catalog.schema.table',
Expand Down
19 changes: 14 additions & 5 deletions docs/how/updating-datahub.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,27 @@ This file documents any backwards-incompatible changes in DataHub and assists pe

### Breaking Changes

- #7900: The `catalog_pattern` and `schema_pattern` options of the Unity Catalog source now match against the fully qualified name of the catalog/schema instead of just the name. Unless you're using regex `^` in your patterns, this should not affect you.

### Potential Downtime

### Deprecations

### Other notable Changes

## 0.10.2

### Breaking Changes

- #7016 Add `add_database_name_to_urn` flag to Oracle source which ensure that Dataset urns have the DB name as a prefix to prevent collision (.e.g. {database}.{schema}.{table}). ONLY breaking if you set this flag to true, otherwise behavior remains the same.
- The Airflow plugin no longer includes the DataHub Kafka emitter by default. Use `pip install acryl-datahub-airflow-plugin[datahub-kafka]` for Kafka support.
- The Airflow lineage backend no longer includes the DataHub Kafka emitter by default. Use `pip install acryl-datahub[airflow,datahub-kafka]` for Kafka support.
- Java SDK PatchBuilders have been modified in a backwards incompatible way to align more with the Python SDK and support more use cases. Any application utilizing the Java SDK for patch building may be affected on upgrading this dependency.

### Potential Downtime

### Deprecations
- The docker image and script for updating from Elasticsearch 6 to 7 is no longer being maintained and will be removed from the `/contrib` section of
the repository. Please refer to older releases if needed.

### Other notable Changes
- The docker image and script for updating from Elasticsearch 6 to 7 is no longer being maintained and will be removed from the `/contrib` section of
the repository. Please refer to older releases if needed.

## 0.10.0

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,17 +33,17 @@ class UnityCatalogSourceConfig(

catalog_pattern: AllowDenyPattern = Field(
default=AllowDenyPattern.allow_all(),
description="Regex patterns for catalogs to filter in ingestion. Specify regex to match the catalog name",
description="Regex patterns for catalogs to filter in ingestion. Specify regex to match the full `metastore.catalog` name.",
)

schema_pattern: AllowDenyPattern = Field(
default=AllowDenyPattern.allow_all(),
description="Regex patterns for schemas to filter in ingestion. Specify regex to only match the schema name. e.g. to match all tables in schema analytics, use the regex 'analytics'",
description="Regex patterns for schemas to filter in ingestion. Specify regex to the full `metastore.catalog.schema` name. e.g. to match all tables in schema analytics, use the regex `^mymetastore\\.mycatalog\\.analytics$`.",
)

table_pattern: AllowDenyPattern = Field(
default=AllowDenyPattern.allow_all(),
description="Regex patterns for tables to filter in ingestion. Specify regex to match the entire table name in catalog.schema.table format. e.g. to match all tables starting with customer in Customer catalog and public schema, use the regex 'Customer.public.customer.*'",
description="Regex patterns for tables to filter in ingestion. Specify regex to match the entire table name in `catalog.schema.table` format. e.g. to match all tables starting with customer in Customer catalog and public schema, use the regex `Customer\\.public\\.customer.*`.",
)
domain: Dict[str, AllowDenyPattern] = Field(
default=dict(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ def process_catalogs(
self, metastore: proxy.Metastore
) -> Iterable[MetadataWorkUnit]:
for catalog in self.unity_catalog_api_proxy.catalogs(metastore=metastore):
if not self.config.catalog_pattern.allowed(catalog.name):
if not self.config.catalog_pattern.allowed(catalog.id):
self.report.catalogs.dropped(catalog.id)
continue

Expand All @@ -248,7 +248,7 @@ def process_catalogs(

def process_schemas(self, catalog: proxy.Catalog) -> Iterable[MetadataWorkUnit]:
for schema in self.unity_catalog_api_proxy.schemas(catalog=catalog):
if not self.config.schema_pattern.allowed(schema.name):
if not self.config.schema_pattern.allowed(schema.id):
self.report.schemas.dropped(schema.id)
continue

Expand Down

0 comments on commit bf86235

Please sign in to comment.