diff --git a/pyiceberg/catalog/__init__.py b/pyiceberg/catalog/__init__.py index 61e2bdd8f3..4cb6c2ff4a 100644 --- a/pyiceberg/catalog/__init__.py +++ b/pyiceberg/catalog/__init__.py @@ -75,7 +75,16 @@ LOCATION = "location" EXTERNAL_TABLE = "EXTERNAL_TABLE" -TABLE_METADATA_FILE_NAME_REGEX = re.compile(r"""(\d+)-.*\.json""", re.X) +TABLE_METADATA_FILE_NAME_REGEX = re.compile( + r""" + (\d+) # version number + - # separator + ([\w-]{36}) # UUID (36 characters, including hyphens) + (?:\.\w+)? # optional codec name + \.metadata\.json # file extension + """, + re.X, +) class CatalogType(Enum): @@ -592,7 +601,7 @@ def _write_metadata(metadata: TableMetadata, io: FileIO, metadata_path: str) -> @staticmethod def _get_metadata_location(location: str, new_version: int = 0) -> str: if new_version < 0: - raise ValueError(f"Table metadata version: {new_version} cannot be negative") + raise ValueError(f"Table metadata version: `{new_version}` must be a non-negative integer") version_str = f"{new_version:05d}" return f"{location}/metadata/{version_str}-{uuid.uuid4()}.metadata.json" @@ -615,6 +624,10 @@ def _parse_metadata_version(metadata_location: str) -> int: """ file_name = metadata_location.split("/")[-1] if file_name_match := TABLE_METADATA_FILE_NAME_REGEX.fullmatch(file_name): + try: + uuid.UUID(file_name_match.group(2)) + except ValueError: + return -1 return int(file_name_match.group(1)) else: return -1 diff --git a/pyiceberg/catalog/glue.py b/pyiceberg/catalog/glue.py index f118d450fc..6cf9462b71 100644 --- a/pyiceberg/catalog/glue.py +++ b/pyiceberg/catalog/glue.py @@ -207,10 +207,10 @@ def _update_glue_table(self, database_name: str, table_name: str, table_input: T VersionId=version_id, ) except self.glue.exceptions.EntityNotFoundException as e: - raise NoSuchTableError(f"Table does not exist: {database_name}.{table_name}") from e + raise NoSuchTableError(f"Table does not exist: {database_name}.{table_name} (Glue table version {version_id})") from e except self.glue.exceptions.ConcurrentModificationException as e: raise CommitFailedException( - f"Cannot commit {database_name}.{table_name} because Glue detected concurrent update" + f"Cannot commit {database_name}.{table_name} because Glue detected concurrent update to table version {version_id}" ) from e def _get_glue_table(self, database_name: str, table_name: str) -> TableTypeDef: @@ -299,7 +299,7 @@ def _commit_table(self, table_request: CommitTableRequest) -> CommitTableRespons current_glue_table = self._get_glue_table(database_name=database_name, table_name=table_name) glue_table_version_id = current_glue_table.get("VersionId") - if glue_table_version_id is None: + if not glue_table_version_id: raise CommitFailedException(f"Cannot commit {database_name}.{table_name} because Glue table version id is missing") current_table = self._convert_glue_to_iceberg(glue_table=current_glue_table) base_metadata = current_table.metadata diff --git a/tests/conftest.py b/tests/conftest.py index 64ace326ec..e54f1f54d6 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1586,7 +1586,7 @@ def fixture_aws_credentials() -> Generator[None, None, None]: os.environ.pop("AWS_DEFAULT_REGION") -MOTO_SERVER = ThreadedMotoServer(port=5000) +MOTO_SERVER = ThreadedMotoServer(ip_address="localhost", port=5000) def pytest_sessionfinish(