Skip to content

Commit

Permalink
Improve error handling for assess_workflows task (#3255)
Browse files Browse the repository at this point in the history
This pull request includes several changes to improve error handling and
logging in the `databricks/labs/ucx/source_code` module. The most
important changes include adding a new error type, handling specific
errors in temporary file operations, and updating log levels for various
error messages.

### Error Handling:

*
[`src/databricks/labs/ucx/source_code/jobs.py`](diffhunk://#diff-e9b8a0fc1055c1e7d799d63ee6c440b3db47fdbcb8dccd5880f1a72db5df5837L20-R20):
Added `DatabricksError` to the list of imported errors and used it to
handle exceptions in the `_temporary_copy` method. This change ensures
that specific Databricks-related errors are caught and re-raised as
`InvalidPath` errors.
[[1]](diffhunk://#diff-e9b8a0fc1055c1e7d799d63ee6c440b3db47fdbcb8dccd5880f1a72db5df5837L20-R20)
[[2]](diffhunk://#diff-e9b8a0fc1055c1e7d799d63ee6c440b3db47fdbcb8dccd5880f1a72db5df5837R168-R176)

### Logging Improvements:

*
[`src/databricks/labs/ucx/source_code/known.py`](diffhunk://#diff-ed49b49a4bffc221bd19b77b68ce27482cda9417398b40cc1205e16d4d463022L169-R169):
Changed the log level from `error` to `warning` for recursion errors in
the `_analyze_dist_info` method.
*
[`src/databricks/labs/ucx/source_code/linters/files.py`](diffhunk://#diff-5dae8c130e55d05b5dfeb64c78ee0f128c7609e530b7a91f359f0008c91bf3a9L231-R231):
Changed the log level from `error` to `warning` for Unicode decode
errors in the `_apply_file_fix` method.
*
[`src/databricks/labs/ucx/source_code/linters/from_table.py`](diffhunk://#diff-3a3ae81870927af560ab2e91f35d8b4230d28bd5f45c099f51ac6c4a633d9301L100-R100):
Changed the log level from `error` to `warning` for schema determination
errors in the `apply` method.
*
[`src/databricks/labs/ucx/source_code/redash.py`](diffhunk://#diff-d5de4bcdd34eb78885392169c22e798351cf010da05ae546a28cf536f9c648d5L52-R52):
Changed the log level from `error` to `warning` for errors when listing
dashboards in the `_list_dashboards` method.
*
[`src/databricks/labs/ucx/source_code/sql/sql_parser.py`](diffhunk://#diff-dc63f875bba70c85a8e1c6f34089b0a53779b7ef68a983d497b0c923e3ce5bb3L55-R55):
Changed the log level from `error` to `warning` for schema determination
errors in the `_collect_table_info` method.
  • Loading branch information
nfx authored Nov 12, 2024
1 parent 75fe330 commit 1e9d334
Show file tree
Hide file tree
Showing 6 changed files with 15 additions and 11 deletions.
16 changes: 10 additions & 6 deletions src/databricks/labs/ucx/source_code/jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from databricks.labs.blueprint.paths import DBFSPath
from databricks.labs.lsql.backends import SqlBackend
from databricks.sdk import WorkspaceClient
from databricks.sdk.errors import NotFound, ResourceDoesNotExist, BadRequest, InvalidParameterValue
from databricks.sdk.errors import NotFound, ResourceDoesNotExist, BadRequest, InvalidParameterValue, DatabricksError
from databricks.sdk.service import compute, jobs
from databricks.sdk.service.compute import DataSecurityMode
from databricks.sdk.service.jobs import Source
Expand Down Expand Up @@ -165,11 +165,15 @@ def _as_path(self, path: str) -> Path:
@classmethod
@contextmanager
def _temporary_copy(cls, path: Path) -> Generator[Path, None, None]:
with tempfile.TemporaryDirectory() as directory:
temporary_path = Path(directory) / path.name
with path.open("rb") as src, temporary_path.open("wb") as dst:
shutil.copyfileobj(src, dst)
yield temporary_path
try:
with tempfile.TemporaryDirectory() as directory:
temporary_path = Path(directory) / path.name
with path.open("rb") as src, temporary_path.open("wb") as dst:
shutil.copyfileobj(src, dst)
yield temporary_path
except DatabricksError as e:
# Cover cases like `ResourceDoesNotExist: Path (/Volumes/...-py3-none-any.whl) doesn't exist.`
raise InvalidPath(f"Cannot load file: {path}") from e

def _register_library(self, graph: DependencyGraph, library: compute.Library) -> Iterable[DependencyProblem]:
if library.pypi:
Expand Down
2 changes: 1 addition & 1 deletion src/databricks/labs/ucx/source_code/known.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ def _analyze_dist_info(cls, dist_info_folder, known_distributions, library_root)
try:
cls._analyze_file(known_distributions, library_root, dist_info, module_path)
except RecursionError:
logger.error(f"Recursion error in {module_path}")
logger.warning(f"Recursion error in {module_path}")
continue

@classmethod
Expand Down
2 changes: 1 addition & 1 deletion src/databricks/labs/ucx/source_code/linters/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ def _apply_file_fix(self, path: Path):
try:
code = f.read()
except UnicodeDecodeError as e:
logger.error(f"Could not decode file {path}: {e}")
logger.warning(f"Could not decode file {path}: {e}")
return False
applied = False
# Lint the code and apply fixes
Expand Down
2 changes: 1 addition & 1 deletion src/databricks/labs/ucx/source_code/linters/from_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def apply(self, code: str) -> str:
for old_table in self._dependent_tables(statement):
src_schema = old_table.db if old_table.db else self._session_state.schema
if not src_schema:
logger.error(f"Could not determine schema for table {old_table.name}")
logger.warning(f"Could not determine schema for table {old_table.name}")
continue
dst = self._index.get(src_schema, old_table.name)
if not dst:
Expand Down
2 changes: 1 addition & 1 deletion src/databricks/labs/ucx/source_code/redash.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def _list_dashboards(self, dashboard_id: str | None) -> list[Dashboard]:
return list(self._ws.dashboards.list())
return [self._ws.dashboards.get(dashboard_id)]
except DatabricksError as e:
logger.error(f"Cannot list dashboards: {e}")
logger.warning(f"Cannot list dashboards: {e}")
return []

def _fix_query(self, query: LegacyQuery) -> None:
Expand Down
2 changes: 1 addition & 1 deletion src/databricks/labs/ucx/source_code/sql/sql_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def _collect_table_info(
# Sqlglot uses db instead of schema, watch out for that
src_schema = table.db if table.db else session_state.schema
if not src_schema:
logger.error(f"Could not determine schema for table {table.name}")
logger.warning(f"Could not determine schema for table {table.name}")
return None
return UsedTable(
catalog_name=catalog_name,
Expand Down

0 comments on commit 1e9d334

Please sign in to comment.