Skip to content

Commit

Permalink
Formatting with Black
Browse files Browse the repository at this point in the history
  • Loading branch information
joelmataKPN committed Jul 1, 2024
1 parent af3e161 commit c7f59d3
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 21 deletions.
14 changes: 3 additions & 11 deletions metadata-ingestion/src/datahub/ingestion/source/abs/source.py
Original file line number Diff line number Diff line change
Expand Up @@ -540,15 +540,6 @@ def abs_browser(
matches = re.finditer(r"{\s*\w+\s*}", path_spec.include, re.MULTILINE)
matches_list = list(matches)
if matches_list and path_spec.sample_files:
# TODO refactor for abs
# Replace the patch_spec include's templates with star because later we want to resolve all the stars
# to actual directories.
# For example:
# "s3://my-test-bucket/*/{dept}/*/{table}/*/*.*" -> "s3://my-test-bucket/*/*/*/{table}/*/*.*"
# We only keep the last template as a marker to know the point util we need to resolve path.
# After the marker we can safely get sample files for sampling because it is not used in the
# table name, so we don't need all the files.
# This speed up processing but we won't be able to get a precise modification date/size/number of files.
max_start: int = -1
include: str = path_spec.include
max_match: str = ""
Expand Down Expand Up @@ -580,8 +571,9 @@ def abs_browser(
)
logger.info(f"Getting files from folder: {dir_to_process}")
dir_to_process = dir_to_process.rstrip("\\")
for obj in (
container_client.list_blobs(name_starts_with=f"{dir_to_process}", results_per_page=PAGE_SIZE)
for obj in container_client.list_blobs(
name_starts_with=f"{dir_to_process}",
results_per_page=PAGE_SIZE,
):
abs_path = self.create_abs_path(obj.name)
logger.debug(f"Sampling file: {abs_path}")
Expand Down
12 changes: 2 additions & 10 deletions metadata-ingestion/src/datahub/ingestion/source/azure/abs_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,6 @@
from datahub.ingestion.source.azure.azure_common import AzureConnectionConfig
from datahub.metadata.schema_classes import GlobalTagsClass, TagAssociationClass

# TODO
# Azure blob storage URIs:
# https://<storage-account>.<type>.core.windows.net/containername/
# where type is in [blob, web, dfs, file, queue, table]
# unknown:
# - what types are supported by the DSH
# - what types are supported by Datahub
# https://learn.microsoft.com/en-us/azure/storage/common/storage-account-overview#types-of-storage-accounts

ABS_PREFIXES_REGEX = re.compile(
r"(http[s]?://[a-z0-9]{3,24}\.blob\.core\.windows\.net/)"
)
Expand Down Expand Up @@ -180,6 +171,7 @@ def create_properties(
prefix=f"{prefix}_{key}",
custom_properties=custom_properties,
resource_name=resource_name,
json_properties=json_properties
)
else:
custom_properties = add_property(
Expand Down Expand Up @@ -259,7 +251,7 @@ def list_folders(

this_dict = {}
for blob in blob_list:
blob_name = blob.name[:blob.name.rfind("/")+1]
blob_name = blob.name[: blob.name.rfind("/") + 1]
folder_structure_arr = blob_name.split("/")

folder_name = ""
Expand Down

0 comments on commit c7f59d3

Please sign in to comment.