diff --git a/crates/integrations/datafusion/src/table.rs b/crates/integrations/datafusion/src/table.rs index f12d41eec..2797e12d6 100644 --- a/crates/integrations/datafusion/src/table.rs +++ b/crates/integrations/datafusion/src/table.rs @@ -110,3 +110,57 @@ impl TableProvider for IcebergTableProvider { Ok(filter_support) } } + +#[cfg(test)] +mod tests { + use datafusion::common::Column; + use datafusion::prelude::SessionContext; + use iceberg::io::FileIO; + use iceberg::table::{StaticTable, Table}; + use iceberg::TableIdent; + + use super::*; + + async fn get_test_table_from_metadata_file() -> Table { + let metadata_file_name = "TableMetadataV2Valid.json"; + let metadata_file_path = format!( + "{}/tests/test_data/{}", + env!("CARGO_MANIFEST_DIR"), + metadata_file_name + ); + let file_io = FileIO::from_path(&metadata_file_path) + .unwrap() + .build() + .unwrap(); + let static_identifier = TableIdent::from_strs(["static_ns", "static_table"]).unwrap(); + let static_table = + StaticTable::from_metadata_file(&metadata_file_path, static_identifier, file_io) + .await + .unwrap(); + static_table.into_table() + } + + #[tokio::test] + async fn test_try_new_from_table() { + let table = get_test_table_from_metadata_file().await; + let table_provider = IcebergTableProvider::try_new_from_table(table.clone()) + .await + .unwrap(); + let ctx = SessionContext::new(); + ctx.register_table("mytable", Arc::new(table_provider)) + .unwrap(); + let df = ctx.sql("SELECT * FROM mytable").await.unwrap(); + let df_schema = df.schema(); + let df_columns = df_schema.fields(); + assert_eq!(df_columns.len(), 3); + let x_column = df_columns.first().unwrap(); + let column_data = format!( + "{:?}:{:?}", + x_column.name(), + x_column.data_type().to_string() + ); + assert_eq!(column_data, "\"x\":\"Int64\""); + let has_column = df_schema.has_column(&Column::from_name("z")); + assert!(has_column); + } +} diff --git a/crates/integrations/datafusion/tests/test_data/TableMetadataV2Valid.json b/crates/integrations/datafusion/tests/test_data/TableMetadataV2Valid.json new file mode 100644 index 000000000..0dc89de58 --- /dev/null +++ b/crates/integrations/datafusion/tests/test_data/TableMetadataV2Valid.json @@ -0,0 +1,122 @@ +{ + "format-version": 2, + "table-uuid": "9c12d441-03fe-4693-9a96-a0705ddf69c1", + "location": "s3://bucket/test/location", + "last-sequence-number": 34, + "last-updated-ms": 1602638573590, + "last-column-id": 3, + "current-schema-id": 1, + "schemas": [ + { + "type": "struct", + "schema-id": 0, + "fields": [ + { + "id": 1, + "name": "x", + "required": true, + "type": "long" + } + ] + }, + { + "type": "struct", + "schema-id": 1, + "identifier-field-ids": [ + 1, + 2 + ], + "fields": [ + { + "id": 1, + "name": "x", + "required": true, + "type": "long" + }, + { + "id": 2, + "name": "y", + "required": true, + "type": "long", + "doc": "comment" + }, + { + "id": 3, + "name": "z", + "required": true, + "type": "long" + } + ] + } + ], + "default-spec-id": 0, + "partition-specs": [ + { + "spec-id": 0, + "fields": [ + { + "name": "x", + "transform": "identity", + "source-id": 1, + "field-id": 1000 + } + ] + } + ], + "last-partition-id": 1000, + "default-sort-order-id": 3, + "sort-orders": [ + { + "order-id": 3, + "fields": [ + { + "transform": "identity", + "source-id": 2, + "direction": "asc", + "null-order": "nulls-first" + }, + { + "transform": "bucket[4]", + "source-id": 3, + "direction": "desc", + "null-order": "nulls-last" + } + ] + } + ], + "properties": {}, + "current-snapshot-id": 3055729675574597004, + "snapshots": [ + { + "snapshot-id": 3051729675574597004, + "timestamp-ms": 1515100955770, + "sequence-number": 0, + "summary": { + "operation": "append" + }, + "manifest-list": "s3://a/b/1.avro" + }, + { + "snapshot-id": 3055729675574597004, + "parent-snapshot-id": 3051729675574597004, + "timestamp-ms": 1555100955770, + "sequence-number": 1, + "summary": { + "operation": "append" + }, + "manifest-list": "s3://a/b/2.avro", + "schema-id": 1 + } + ], + "snapshot-log": [ + { + "snapshot-id": 3051729675574597004, + "timestamp-ms": 1515100955770 + }, + { + "snapshot-id": 3055729675574597004, + "timestamp-ms": 1555100955770 + } + ], + "metadata-log": [] +} \ No newline at end of file