From bb03f284b1f17ef57887bf6b6d52fad54fe14289 Mon Sep 17 00:00:00 2001 From: Renjie Liu Date: Wed, 2 Aug 2023 18:47:40 +0800 Subject: [PATCH] Fix clippy --- crates/iceberg/Cargo.toml | 1 + crates/iceberg/src/spec/datatypes.rs | 74 ++++++++++++++++-- crates/iceberg/src/spec/mod.rs | 1 + crates/iceberg/src/spec/schema.rs | 111 +++++++++++++++++++++++++++ 4 files changed, 179 insertions(+), 8 deletions(-) create mode 100644 crates/iceberg/src/spec/schema.rs diff --git a/crates/iceberg/Cargo.toml b/crates/iceberg/Cargo.toml index 496cba497..72c10c7cb 100644 --- a/crates/iceberg/Cargo.toml +++ b/crates/iceberg/Cargo.toml @@ -33,6 +33,7 @@ serde_bytes = "0.11.8" serde_json = "^1.0" serde_derive = "^1.0" anyhow = "1.0.72" +once_cell = "1" [dev-dependencies] pretty_assertions = "1.4.0" diff --git a/crates/iceberg/src/spec/datatypes.rs b/crates/iceberg/src/spec/datatypes.rs index 4d0b83113..653c5cf71 100644 --- a/crates/iceberg/src/spec/datatypes.rs +++ b/crates/iceberg/src/spec/datatypes.rs @@ -18,6 +18,7 @@ /*! * Data Types */ +use std::cell::OnceCell; use std::{collections::HashMap, fmt, ops::Index}; use serde::{ @@ -197,7 +198,7 @@ pub struct StructType { fields: Vec, /// Lookup for index by field id #[serde(skip_serializing)] - id_lookup: HashMap, + id_lookup: OnceCell>, } impl<'de> Deserialize<'de> for StructType { @@ -252,12 +253,23 @@ impl<'de> Deserialize<'de> for StructType { impl StructType { /// Creates a struct type with the given fields. pub fn new(fields: Vec) -> Self { - let id_lookup = HashMap::from_iter(fields.iter().enumerate().map(|(i, x)| (x.id, i))); - Self { fields, id_lookup } + Self { + fields, + id_lookup: OnceCell::default(), + } } - /// Get structfield with certain id + /// Get struct field with certain id pub fn field_by_id(&self, id: i32) -> Option<&StructField> { - self.fields.get(*self.id_lookup.get(&id)?) + self.field_id_to_index(id).map(|idx| &self.fields[idx]) + } + + fn field_id_to_index(&self, field_id: i32) -> Option { + self.id_lookup + .get_or_init(|| { + HashMap::from_iter(self.fields.iter().enumerate().map(|(i, x)| (x.id, i))) + }) + .get(&field_id) + .copied() } } @@ -305,6 +317,52 @@ pub struct StructField { pub write_default: Option, } +impl StructField { + /// Construct a required field. + pub fn required(id: i32, name: impl ToString, field_type: Type) -> Self { + Self { + id, + name: name.to_string(), + required: true, + field_type, + doc: None, + initial_default: None, + write_default: None, + } + } + + /// Construct an optional field. + pub fn optional(id: i32, name: impl ToString, field_type: Type) -> Self { + Self { + id, + name: name.to_string(), + required: false, + field_type, + doc: None, + initial_default: None, + write_default: None, + } + } + + /// Set the field's doc. + pub fn with_doc(mut self, doc: impl ToString) -> Self { + self.doc = Some(doc.to_string()); + self + } + + /// Set the field's initial default value. + pub fn with_initial_default(mut self, value: impl ToString) -> Self { + self.initial_default = Some(value.to_string()); + self + } + + /// Set the field's initial default value. + pub fn with_write_default(mut self, value: impl ToString) -> Self { + self.write_default = Some(value.to_string()); + self + } +} + impl fmt::Display for StructField { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "{}: ", self.id)?; @@ -402,7 +460,7 @@ mod tests { initial_default: None, write_default: None, }], - id_lookup: HashMap::from([(1, 0)]), + id_lookup: HashMap::from([(1, 0)]).into(), }), ) } @@ -435,7 +493,7 @@ mod tests { initial_default: None, write_default: None, }], - id_lookup: HashMap::from([(1, 0)]), + id_lookup: HashMap::from([(1, 0)]).into(), }), ) } @@ -486,7 +544,7 @@ mod tests { write_default: None, }, ], - id_lookup: HashMap::from([(1, 0), (2, 1)]), + id_lookup: HashMap::from([(1, 0), (2, 1)]).into(), }), ) } diff --git a/crates/iceberg/src/spec/mod.rs b/crates/iceberg/src/spec/mod.rs index bae9e2ab5..3b998a773 100644 --- a/crates/iceberg/src/spec/mod.rs +++ b/crates/iceberg/src/spec/mod.rs @@ -18,3 +18,4 @@ //! Spec for Iceberg. pub mod datatypes; +pub mod schema; diff --git a/crates/iceberg/src/spec/schema.rs b/crates/iceberg/src/spec/schema.rs new file mode 100644 index 000000000..4a53dc29c --- /dev/null +++ b/crates/iceberg/src/spec/schema.rs @@ -0,0 +1,111 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! This module defines schema in iceberg. + +use crate::spec::datatypes::{StructField, StructType}; + +const DEFAULT_SCHEMA_ID: i32 = 0; + +/// Defines schema in iceberg. +#[derive(Debug, PartialEq, Eq, Clone)] +pub struct Schema { + r#struct: StructType, + schema_id: i32, + highest_field_id: i32, +} + +/// Schema builder. +pub struct SchemaBuilder { + schema_id: i32, + fields: Vec, +} + +impl SchemaBuilder { + /// Add fields to schem builder + pub fn with_fields(mut self, fields: impl IntoIterator) -> Self { + self.fields.extend(fields.into_iter()); + self + } + + /// Set schema id. + pub fn with_schema_id(mut self, schema_id: i32) -> Self { + self.schema_id = schema_id; + self + } + + /// Builds the schema. + pub fn build(self) -> Schema { + let highest_field_id = self.fields.iter().map(|f| f.id).max().unwrap_or(0); + Schema { + r#struct: StructType::new(self.fields), + schema_id: self.schema_id, + highest_field_id, + } + } +} + +impl Schema { + /// Create a schema builder. + pub fn builder() -> SchemaBuilder { + SchemaBuilder { + schema_id: DEFAULT_SCHEMA_ID, + fields: vec![], + } + } + + /// Get field by field id. + pub fn field_by_id(&self, field_id: i32) -> Option<&StructField> { + self.r#struct.field_by_id(field_id) + } + + /// Returns [`highest_field_id`]. + #[inline] + pub fn highest_field_id(&self) -> i32 { + self.highest_field_id + } + + /// Returns [`schema_id`]. + #[inline] + pub fn schema_id(&self) -> i32 { + self.schema_id + } +} + +#[cfg(test)] +mod tests { + use crate::spec::datatypes::{PrimitiveType, StructField, Type}; + use crate::spec::schema::Schema; + + #[test] + fn test_construct_schema() { + let field1 = StructField::required(1, "f1", Type::Primitive(PrimitiveType::Boolean)); + let field2 = StructField::optional(2, "f2", Type::Primitive(PrimitiveType::Int)); + + let schema = Schema::builder() + .with_fields(vec![field1.clone()]) + .with_fields(vec![field2.clone()]) + .with_schema_id(3) + .build(); + + assert_eq!(3, schema.schema_id()); + assert_eq!(2, schema.highest_field_id()); + assert_eq!(Some(&field1), schema.field_by_id(1)); + assert_eq!(Some(&field2), schema.field_by_id(2)); + assert_eq!(None, schema.field_by_id(3)); + } +}