From 525cb53c5be45dd7a210c82bc0fe48dc8fde429d Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Sun, 14 Jan 2024 16:12:36 -0500 Subject: [PATCH 1/4] docs: add usage guide for check constraints --- docs/usage/constraints.md | 32 ++++++++++++++++++++++++++++++++ mkdocs.yml | 1 + 2 files changed, 33 insertions(+) create mode 100644 docs/usage/constraints.md diff --git a/docs/usage/constraints.md b/docs/usage/constraints.md new file mode 100644 index 0000000000..f5720bb51b --- /dev/null +++ b/docs/usage/constraints.md @@ -0,0 +1,32 @@ +# Adding a Constraint to a table + +Check constraints are a way to enforce that only data that meets the constraint is allowed to be added to the table. + +## Add the Constraint + +```python +from deltalake import DeltaTable +dt = DeltaTable("../rust/tests/data/simple_table") + +# Check the schema before hand +print(dt.schema()) +# Add the constraint to the table. +dt.alter.add_constraint({"id_gt_0": "id > 0"}) +``` + +After you have added the constraint to the table attempting to append data to the table that violates the constraint +will instead throw an error. + +## Verify the constraint by trying to add some data + +```python +from deltalake import write_deltalake +import pandas as pd + +df = pd.DataFrame({'id': [-1]}) +write_deltalake(dt, df, mode='append', engine='rust') +# _internal.DeltaProtocolError: Invariant violations: ["Check or Invariant (id > 0) violated by value in row: [-1]"] +``` + +Note: ensure you use the `engine='rust'` parameter when writing to the table as this feature is not supported in the +default pyarrow writer. \ No newline at end of file diff --git a/mkdocs.yml b/mkdocs.yml index 4e713d73ec..50f7e2a3d1 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -52,6 +52,7 @@ nav: - Creating a table: usage/create-delta-lake-table.md - Loading a table: usage/loading-table.md - Append/overwrite tables: usage/appending-overwriting-delta-lake-table.md + - Adding a constraint: usage/constraints.md - Examining a table: usage/examining-table.md - Querying a table: usage/querying-delta-tables.md - Managing a table: usage/managing-tables.md From 3ed9f0627a267f5de0e051d597880f658387d824 Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Sun, 14 Jan 2024 16:42:47 -0500 Subject: [PATCH 2/4] docs: add usage guide for check constraints --- docs/src/python/check_constraints.py | 20 ++++++++++++++++++++ docs/src/rust/check_constraints.rs | 23 +++++++++++++++++++++++ docs/usage/constraints.md | 19 ++----------------- 3 files changed, 45 insertions(+), 17 deletions(-) create mode 100644 docs/src/python/check_constraints.py create mode 100644 docs/src/rust/check_constraints.rs diff --git a/docs/src/python/check_constraints.py b/docs/src/python/check_constraints.py new file mode 100644 index 0000000000..308353b806 --- /dev/null +++ b/docs/src/python/check_constraints.py @@ -0,0 +1,20 @@ +def add_constraint(): + # --8<-- [start:add_constraint] + from deltalake import DeltaTable + dt = DeltaTable("../rust/tests/data/simple_table") + + # Check the schema before hand + print(dt.schema()) + # Add the constraint to the table. + dt.alter.add_constraint({"id_gt_0": "id > 0"}) + # --8<-- [end:add_constraint] + +def add_data(): + # --8<-- [start:add_data] + from deltalake import write_deltalake + import pandas as pd + + df = pd.DataFrame({'id': [-1]}) + write_deltalake(dt, df, mode='append', engine='rust') + # _internal.DeltaProtocolError: Invariant violations: ["Check or Invariant (id > 0) violated by value in row: [-1]"] + # --8<-- [end:add_data] \ No newline at end of file diff --git a/docs/src/rust/check_constraints.rs b/docs/src/rust/check_constraints.rs new file mode 100644 index 0000000000..fbc2cf18d6 --- /dev/null +++ b/docs/src/rust/check_constraints.rs @@ -0,0 +1,23 @@ +use std::sync::Arc; + +#[tokio::main] +async fn main() -> Result<(), Box> { + + // --8<-- [start:add_constraint] + let table = deltalake::open_table("../rust/tests/data/simple_table").await?; + let ops = DeltaOps(table); + ops.with_constraint("id_gt_0", "id > 0").await?; + // --8<-- [end:add_constraint] + + // --8<-- [start:add_data] + let table = deltalake::open_table("../rust/tests/data/simple_table").await?; + let schema = table.get_state().arrow_schema()?; + let invalid_values: Vec> = vec![ + Arc::new(Int32Array::from(vec![-10])) + ]; + let batch = RecordBatch::try_new(schema, invalid_values)?; + table.write(vec![batch]).await?; + // --8<-- [end:add_data] + + Ok(()) +} \ No newline at end of file diff --git a/docs/usage/constraints.md b/docs/usage/constraints.md index f5720bb51b..3e8bc8122c 100644 --- a/docs/usage/constraints.md +++ b/docs/usage/constraints.md @@ -4,29 +4,14 @@ Check constraints are a way to enforce that only data that meets the constraint ## Add the Constraint -```python -from deltalake import DeltaTable -dt = DeltaTable("../rust/tests/data/simple_table") - -# Check the schema before hand -print(dt.schema()) -# Add the constraint to the table. -dt.alter.add_constraint({"id_gt_0": "id > 0"}) -``` +{{ code_example('check_constraints', 'add_constraint', ['DeltaTable']) }} After you have added the constraint to the table attempting to append data to the table that violates the constraint will instead throw an error. ## Verify the constraint by trying to add some data -```python -from deltalake import write_deltalake -import pandas as pd - -df = pd.DataFrame({'id': [-1]}) -write_deltalake(dt, df, mode='append', engine='rust') -# _internal.DeltaProtocolError: Invariant violations: ["Check or Invariant (id > 0) violated by value in row: [-1]"] -``` +{{ code_example('check_constraints', 'add_data', []) }} Note: ensure you use the `engine='rust'` parameter when writing to the table as this feature is not supported in the default pyarrow writer. \ No newline at end of file From 38e36748499522f3be0c0558d3ac01bef4215767 Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Mon, 15 Jan 2024 12:16:02 -0500 Subject: [PATCH 3/4] docs: add usage guide for check constraints --- docs/src/python/check_constraints.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/python/check_constraints.py b/docs/src/python/check_constraints.py index 308353b806..589162f22d 100644 --- a/docs/src/python/check_constraints.py +++ b/docs/src/python/check_constraints.py @@ -17,4 +17,4 @@ def add_data(): df = pd.DataFrame({'id': [-1]}) write_deltalake(dt, df, mode='append', engine='rust') # _internal.DeltaProtocolError: Invariant violations: ["Check or Invariant (id > 0) violated by value in row: [-1]"] - # --8<-- [end:add_data] \ No newline at end of file + # --8<-- [end:add_data] From 84bd9f515a519c6a6aaab74a71dcbb5737fc6592 Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Mon, 15 Jan 2024 12:29:24 -0500 Subject: [PATCH 4/4] docs: add usage guide for check constraints --- docs/src/python/check_constraints.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/src/python/check_constraints.py b/docs/src/python/check_constraints.py index 589162f22d..16fb8bf374 100644 --- a/docs/src/python/check_constraints.py +++ b/docs/src/python/check_constraints.py @@ -1,6 +1,7 @@ def add_constraint(): # --8<-- [start:add_constraint] from deltalake import DeltaTable + dt = DeltaTable("../rust/tests/data/simple_table") # Check the schema before hand @@ -9,12 +10,13 @@ def add_constraint(): dt.alter.add_constraint({"id_gt_0": "id > 0"}) # --8<-- [end:add_constraint] + def add_data(): # --8<-- [start:add_data] from deltalake import write_deltalake import pandas as pd - df = pd.DataFrame({'id': [-1]}) - write_deltalake(dt, df, mode='append', engine='rust') + df = pd.DataFrame({"id": [-1]}) + write_deltalake(dt, df, mode="append", engine="rust") # _internal.DeltaProtocolError: Invariant violations: ["Check or Invariant (id > 0) violated by value in row: [-1]"] # --8<-- [end:add_data]