Skip to content

Commit

Permalink
Allow metadata for write_deltalake (#587)
Browse files Browse the repository at this point in the history
* Enable write_deltalake to accept name, description, and configuration metadata

* Add round trip metadata test and silence clippy too many arguments

* Add mypy ignore call-arg for too many arguments

* update docstring for new parameters
  • Loading branch information
PadenZach authored Apr 17, 2022
1 parent ccc354b commit 54da787
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 5 deletions.
19 changes: 18 additions & 1 deletion python/deltalake/writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,9 @@ def write_deltalake(
partition_by: Optional[List[str]] = None,
filesystem: Optional[pa_fs.FileSystem] = None,
mode: Literal["error", "append", "overwrite", "ignore"] = "error",
name: Optional[str] = None,
description: Optional[str] = None,
configuration: Optional[Mapping[str, Optional[str]]] = None,
) -> None:
"""Write to a Delta Lake table (Experimental)
Expand All @@ -53,6 +56,8 @@ def write_deltalake(
to write to an existing table with a higher min_writer_version, this
function will throw DeltaTableProtocolError.
Note that this function does NOT register this table in a data catalog.
:param table_or_uri: URI of a table or a DeltaTable object.
:param data: Data to write. If passing iterable, the schema must also be given.
:param schema: Optional schema to write.
Expand All @@ -64,6 +69,9 @@ def write_deltalake(
already exists. If 'append', will add new data. If 'overwrite', will
replace table with new data. If 'ignore', will not write anything if
table already exists.
:param name: User-provided identifier for this table.
:param description: User-provided description for this table.
:param configuration: A map containing configuration options for the metadata action.
"""
if isinstance(data, pd.DataFrame):
data = pa.Table.from_pandas(data)
Expand Down Expand Up @@ -147,7 +155,16 @@ def visitor(written_file: Any) -> None:
)

if table is None:
_write_new_deltalake(table_uri, schema, add_actions, mode, partition_by or [])
_write_new_deltalake( # type: ignore[call-arg]
table_uri,
schema,
add_actions,
mode,
partition_by or [],
name,
description,
configuration,
)
else:
table._table.create_write_transaction(
add_actions,
Expand Down
12 changes: 8 additions & 4 deletions python/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -514,12 +514,16 @@ impl From<&PyAddAction> for action::Add {
}

#[pyfunction]
#[allow(clippy::too_many_arguments)]
fn write_new_deltalake(
table_uri: String,
schema: ArrowSchema,
add_actions: Vec<PyAddAction>,
_mode: &str,
partition_by: Vec<String>,
name: Option<String>,
description: Option<String>,
configuration: Option<HashMap<String, Option<String>>>,
) -> PyResult<()> {
let mut table = deltalake::DeltaTable::new(
&table_uri,
Expand All @@ -529,12 +533,12 @@ fn write_new_deltalake(
.map_err(PyDeltaTableError::from_raw)?;

let metadata = DeltaTableMetaData::new(
None,
None,
None,
name,
description,
None, // Format
(&schema).try_into()?,
partition_by,
HashMap::new(),
configuration.unwrap_or_default(),
);

let fut = table.create(
Expand Down
18 changes: 18 additions & 0 deletions python/tests/test_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,24 @@ def test_roundtrip_basic(tmp_path: pathlib.Path, sample_data: pa.Table):
assert table == sample_data


def test_roundtrip_metadata(tmp_path: pathlib.Path, sample_data: pa.Table):
write_deltalake(
str(tmp_path),
sample_data,
name="test_name",
description="test_desc",
configuration={"configTest": "foobar"},
)

delta_table = DeltaTable(str(tmp_path))

metadata = delta_table.metadata()

assert metadata.name == "test_name"
assert metadata.description == "test_desc"
assert metadata.configuration == {"configTest": "foobar"}


@pytest.mark.parametrize(
"column",
[
Expand Down

0 comments on commit 54da787

Please sign in to comment.