-
Notifications
You must be signed in to change notification settings - Fork 598
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat(iceberg): support iceberg sink create table #18362
Changes from 17 commits
edcc896
83c48d6
2e90242
4cf7c5f
24e9127
0e34c39
c28f476
4b1cdc2
4c289a3
43e828c
77d58d2
dc3b266
6d9125a
77f47c0
2c40ed1
a0516c2
777c182
e7a2faf
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,7 +18,7 @@ use std::collections::HashMap; | |
|
||
use async_trait::async_trait; | ||
use iceberg::io::{FileIO, S3_ACCESS_KEY_ID, S3_ENDPOINT, S3_REGION, S3_SECRET_ACCESS_KEY}; | ||
use iceberg::spec::TableMetadata; | ||
use iceberg::spec::{TableMetadata, TableMetadataBuilder}; | ||
use iceberg::table::Table; | ||
use iceberg::{ | ||
Catalog, Error, ErrorKind, Namespace, NamespaceIdent, Result, TableCommit, TableCreation, | ||
|
@@ -218,18 +218,54 @@ impl Catalog for StorageCatalog { | |
/// Create a new table inside the namespace. | ||
async fn create_table( | ||
&self, | ||
_namespace: &NamespaceIdent, | ||
_creation: TableCreation, | ||
namespace: &NamespaceIdent, | ||
creation: TableCreation, | ||
) -> iceberg::Result<Table> { | ||
todo!() | ||
let table_ident = TableIdent::new(namespace.clone(), creation.name.clone()); | ||
let table_path = { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ditto There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you can ignore this comments, I'm going to propose another PR to refactor it. |
||
let mut names = table_ident.namespace.clone().inner(); | ||
names.push(table_ident.name.to_string()); | ||
if self.warehouse.ends_with('/') { | ||
format!("{}{}", self.warehouse, names.join("/")) | ||
} else { | ||
format!("{}/{}", self.warehouse, names.join("/")) | ||
} | ||
}; | ||
|
||
// Create the metadata directory | ||
let metadata_path = format!("{table_path}/metadata"); | ||
|
||
// Create the initial table metadata | ||
let table_metadata = TableMetadataBuilder::from_table_creation(creation)?.build()?; | ||
|
||
// Write the initial metadata file | ||
let metadata_file_path = format!("{metadata_path}/v1.metadata.json"); | ||
let metadata_json = serde_json::to_string(&table_metadata)?; | ||
let output = self.file_io.new_output(&metadata_file_path)?; | ||
output.write(metadata_json.into()).await?; | ||
|
||
// Write the version hint file | ||
let version_hint_path = format!("{table_path}/metadata/version-hint.text"); | ||
let version_hint_output = self.file_io.new_output(&version_hint_path)?; | ||
version_hint_output.write("1".into()).await?; | ||
|
||
Ok(Table::builder() | ||
.metadata(table_metadata) | ||
.identifier(table_ident) | ||
.file_io(self.file_io.clone()) | ||
.build()) | ||
} | ||
|
||
/// Load table from the catalog. | ||
async fn load_table(&self, table: &TableIdent) -> iceberg::Result<Table> { | ||
let table_path = { | ||
let mut names = table.namespace.clone().inner(); | ||
names.push(table.name.to_string()); | ||
format!("{}/{}", self.warehouse, names.join("/")) | ||
if self.warehouse.ends_with('/') { | ||
format!("{}{}", self.warehouse, names.join("/")) | ||
} else { | ||
format!("{}/{}", self.warehouse, names.join("/")) | ||
} | ||
}; | ||
let path = if self.is_version_hint_exist(&table_path).await? { | ||
let version_hint = self.read_version_hint(&table_path).await?; | ||
|
@@ -262,8 +298,23 @@ impl Catalog for StorageCatalog { | |
} | ||
|
||
/// Check if a table exists in the catalog. | ||
async fn table_exists(&self, _table: &TableIdent) -> iceberg::Result<bool> { | ||
todo!() | ||
async fn table_exists(&self, table: &TableIdent) -> iceberg::Result<bool> { | ||
let table_path = { | ||
let mut names = table.namespace.clone().inner(); | ||
names.push(table.name.to_string()); | ||
if self.warehouse.ends_with('/') { | ||
format!("{}{}", self.warehouse, names.join("/")) | ||
} else { | ||
format!("{}/{}", self.warehouse, names.join("/")) | ||
} | ||
}; | ||
let metadata_path = format!("{table_path}/metadata/version-hint.text"); | ||
self.file_io.is_exist(&metadata_path).await.map_err(|err| { | ||
Error::new( | ||
ErrorKind::Unexpected, | ||
format!("Failed to check if table exists: {}", err.as_report()), | ||
) | ||
}) | ||
} | ||
|
||
/// Rename a table in the catalog. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -342,6 +342,10 @@ pub async fn get_partition_compute_info( | |
async fn get_partition_compute_info_for_iceberg( | ||
iceberg_config: &IcebergConfig, | ||
) -> Result<Option<PartitionComputeInfo>> { | ||
// TODO: check table if exists | ||
if iceberg_config.create_table_if_not_exists { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why can do this? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Because this is an optimization. We can always skip it. https://github.com/risingwavelabs/rfcs/blob/532d50c7f19ef940582390a17412e633e069449e/rfcs/0077-iceberg-sink-shuffle.md |
||
return Ok(None); | ||
} | ||
let table = iceberg_config.load_table().await?; | ||
let Some(partition_spec) = table.current_table_metadata().current_partition_spec().ok() else { | ||
return Ok(None); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nips: Can use a separate function for handle the path