Skip to content

Commit

Permalink
feat(python): add DeltaTable.is_deltatable static method (#2662)
Browse files Browse the repository at this point in the history
This adds a static method is_deltatable(path, opts) to the
DeltaTable class, which returns True if delta-rs is able to load
a DeltaTable instance from the specified path and False otherwise.

Additionally, this also adds documentation of the usage with
examples for the DeltaTable.is_deltatable() method.
  • Loading branch information
omkar-foss committed Jul 29, 2024
1 parent 13af7cb commit 5c5432e
Show file tree
Hide file tree
Showing 4 changed files with 101 additions and 3 deletions.
27 changes: 27 additions & 0 deletions docs/usage/loading-table.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,33 @@ For AWS Glue catalog, use AWS environment variables to authenticate.
{'id': [5, 7, 9, 5, 6, 7, 8, 9]}
```

## Verify Table Existence

You can check whether or not a Delta table exists at a particular path by using
the `DeltaTable.is_deltatable()` method.

```python
from deltalake import DeltaTable

table_path = "<path/to/valid/table>"
DeltaTable.is_deltatable(table_path)
# True

invalid_table_path = "<path/to/nonexistent/table>"
DeltaTable.is_deltatable(invalid_table_path)
# False

bucket_table_path = "<path/to/valid/table/in/bucket>"
storage_options = {
"AWS_ACCESS_KEY_ID": "THE_AWS_ACCESS_KEY_ID",
"AWS_SECRET_ACCESS_KEY": "THE_AWS_SECRET_ACCESS_KEY",
...
}
DeltaTable.is_deltatable(bucket_table_path)
# True
```


## Custom Storage Backends

While delta always needs its internal storage backend to work and be
Expand Down
25 changes: 22 additions & 3 deletions python/deltalake/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
from deltalake._internal import create_deltalake as _create_deltalake
from deltalake._util import encode_partition_value
from deltalake.data_catalog import DataCatalog
from deltalake.exceptions import DeltaProtocolError
from deltalake.exceptions import DeltaProtocolError, TableNotFoundError
from deltalake.fs import DeltaStorageHandler
from deltalake.schema import Schema as DeltaSchema

Expand Down Expand Up @@ -186,7 +186,7 @@ def __init__(

if compression_level is not None and compression is None:
raise ValueError(
"""Providing a compression level without the compression type is not possible,
"""Providing a compression level without the compression type is not possible,
please provide the compression as well."""
)
if isinstance(compression, str):
Expand Down Expand Up @@ -359,6 +359,25 @@ def from_data_catalog(
table_uri=table_uri, version=version, log_buffer_size=log_buffer_size
)

@staticmethod
def is_deltatable(
table_uri: Union[str, Path], storage_options: Optional[Dict[str, str]] = None
) -> bool:
"""
Returns True if a Delta Table exists at specified path.
Returns False otherwise.
Args:
table_uri: the path of the DeltaTable
storage_options: a dictionary of the options to use for the
storage backend
"""
try:
dt = DeltaTable(table_uri, storage_options=storage_options)
return dt is not None
except TableNotFoundError:
return False

@classmethod
def create(
cls,
Expand Down Expand Up @@ -1816,7 +1835,7 @@ def add_constraint(
"""
if len(constraints.keys()) > 1:
raise ValueError(
"""add_constraints is limited to a single constraint addition at once for now.
"""add_constraints is limited to a single constraint addition at once for now.
Please execute add_constraints multiple times with each time a different constraint."""
)

Expand Down
26 changes: 26 additions & 0 deletions python/docs/source/usage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,32 @@ For Databricks Unity Catalog authentication, use environment variables:
.. _`azure options`: https://docs.rs/object_store/latest/object_store/azure/enum.AzureConfigKey.html#variants
.. _`gcs options`: https://docs.rs/object_store/latest/object_store/gcp/enum.GoogleConfigKey.html#variants

Verify Table Existence
~~~~~~~~~~~~~~~~~~~~~~

You can check whether or not a Delta table exists at a particular path by using
the :meth:`DeltaTable.is_deltatable()` method.

.. code-block:: python
from deltalake import DeltaTable
table_path = "<path/to/valid/table>"
DeltaTable.is_deltatable(table_path)
# True
invalid_table_path = "<path/to/nonexistent/table>"
DeltaTable.is_deltatable(invalid_table_path)
# False
bucket_table_path = "<path/to/valid/table/in/bucket>"
storage_options = {
"AWS_ACCESS_KEY_ID": "THE_AWS_ACCESS_KEY_ID",
"AWS_SECRET_ACCESS_KEY": "THE_AWS_SECRET_ACCESS_KEY",
...
}
DeltaTable.is_deltatable(bucket_table_path)
# True
Custom Storage Backends
~~~~~~~~~~~~~~~~~~~~~~~

Expand Down
26 changes: 26 additions & 0 deletions python/tests/test_table_read.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os
from datetime import date, datetime, timezone
from pathlib import Path
from random import random
from threading import Barrier, Thread
from types import SimpleNamespace
from typing import Any, List, Tuple
Expand Down Expand Up @@ -798,3 +799,28 @@ def test_read_table_last_checkpoint_not_updated():
dt = DeltaTable("../crates/test/tests/data/table_failed_last_checkpoint_update")

assert dt.version() == 3


def test_is_deltatable_valid_path():
table_path = "../crates/test/tests/data/simple_table"
assert DeltaTable.is_deltatable(table_path)


def test_is_deltatable_invalid_path():
# Nonce ensures that the table_path always remains an invalid table path.
nonce = int(random() * 10000)
table_path = "../crates/test/tests/data/simple_table_invalid_%s" % nonce
assert not DeltaTable.is_deltatable(table_path)


def test_is_deltatable_with_storage_opts():
table_path = "../crates/test/tests/data/simple_table"
storage_options = {
"AWS_ACCESS_KEY_ID": "THE_AWS_ACCESS_KEY_ID",
"AWS_SECRET_ACCESS_KEY": "THE_AWS_SECRET_ACCESS_KEY",
"AWS_ALLOW_HTTP": "true",
"AWS_S3_ALLOW_UNSAFE_RENAME": "true",
"AWS_S3_LOCKING_PROVIDER": "dynamodb",
"DELTA_DYNAMO_TABLE_NAME": "custom_table_name",
}
assert DeltaTable.is_deltatable(table_path, storage_options=storage_options)

0 comments on commit 5c5432e

Please sign in to comment.