Skip to content

Commit

Permalink
Fix clippy
Browse files Browse the repository at this point in the history
  • Loading branch information
liurenjie1024 committed Aug 2, 2023
1 parent 5776a66 commit bb03f28
Show file tree
Hide file tree
Showing 4 changed files with 179 additions and 8 deletions.
1 change: 1 addition & 0 deletions crates/iceberg/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ serde_bytes = "0.11.8"
serde_json = "^1.0"
serde_derive = "^1.0"
anyhow = "1.0.72"
once_cell = "1"

[dev-dependencies]
pretty_assertions = "1.4.0"
74 changes: 66 additions & 8 deletions crates/iceberg/src/spec/datatypes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
/*!
* Data Types
*/
use std::cell::OnceCell;
use std::{collections::HashMap, fmt, ops::Index};

use serde::{
Expand Down Expand Up @@ -197,7 +198,7 @@ pub struct StructType {
fields: Vec<StructField>,
/// Lookup for index by field id
#[serde(skip_serializing)]
id_lookup: HashMap<i32, usize>,
id_lookup: OnceCell<HashMap<i32, usize>>,
}

impl<'de> Deserialize<'de> for StructType {
Expand Down Expand Up @@ -252,12 +253,23 @@ impl<'de> Deserialize<'de> for StructType {
impl StructType {
/// Creates a struct type with the given fields.
pub fn new(fields: Vec<StructField>) -> Self {
let id_lookup = HashMap::from_iter(fields.iter().enumerate().map(|(i, x)| (x.id, i)));
Self { fields, id_lookup }
Self {
fields,
id_lookup: OnceCell::default(),
}
}
/// Get structfield with certain id
/// Get struct field with certain id
pub fn field_by_id(&self, id: i32) -> Option<&StructField> {
self.fields.get(*self.id_lookup.get(&id)?)
self.field_id_to_index(id).map(|idx| &self.fields[idx])
}

fn field_id_to_index(&self, field_id: i32) -> Option<usize> {
self.id_lookup
.get_or_init(|| {
HashMap::from_iter(self.fields.iter().enumerate().map(|(i, x)| (x.id, i)))
})
.get(&field_id)
.copied()
}
}

Expand Down Expand Up @@ -305,6 +317,52 @@ pub struct StructField {
pub write_default: Option<String>,
}

impl StructField {
/// Construct a required field.
pub fn required(id: i32, name: impl ToString, field_type: Type) -> Self {
Self {
id,
name: name.to_string(),
required: true,
field_type,
doc: None,
initial_default: None,
write_default: None,
}
}

/// Construct an optional field.
pub fn optional(id: i32, name: impl ToString, field_type: Type) -> Self {
Self {
id,
name: name.to_string(),
required: false,
field_type,
doc: None,
initial_default: None,
write_default: None,
}
}

/// Set the field's doc.
pub fn with_doc(mut self, doc: impl ToString) -> Self {
self.doc = Some(doc.to_string());
self
}

/// Set the field's initial default value.
pub fn with_initial_default(mut self, value: impl ToString) -> Self {
self.initial_default = Some(value.to_string());
self
}

/// Set the field's initial default value.
pub fn with_write_default(mut self, value: impl ToString) -> Self {
self.write_default = Some(value.to_string());
self
}
}

impl fmt::Display for StructField {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}: ", self.id)?;
Expand Down Expand Up @@ -402,7 +460,7 @@ mod tests {
initial_default: None,
write_default: None,
}],
id_lookup: HashMap::from([(1, 0)]),
id_lookup: HashMap::from([(1, 0)]).into(),
}),
)
}
Expand Down Expand Up @@ -435,7 +493,7 @@ mod tests {
initial_default: None,
write_default: None,
}],
id_lookup: HashMap::from([(1, 0)]),
id_lookup: HashMap::from([(1, 0)]).into(),
}),
)
}
Expand Down Expand Up @@ -486,7 +544,7 @@ mod tests {
write_default: None,
},
],
id_lookup: HashMap::from([(1, 0), (2, 1)]),
id_lookup: HashMap::from([(1, 0), (2, 1)]).into(),
}),
)
}
Expand Down
1 change: 1 addition & 0 deletions crates/iceberg/src/spec/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,4 @@
//! Spec for Iceberg.

pub mod datatypes;
pub mod schema;
111 changes: 111 additions & 0 deletions crates/iceberg/src/spec/schema.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

//! This module defines schema in iceberg.

use crate::spec::datatypes::{StructField, StructType};

const DEFAULT_SCHEMA_ID: i32 = 0;

/// Defines schema in iceberg.
#[derive(Debug, PartialEq, Eq, Clone)]
pub struct Schema {
r#struct: StructType,
schema_id: i32,
highest_field_id: i32,
}

/// Schema builder.
pub struct SchemaBuilder {
schema_id: i32,
fields: Vec<StructField>,
}

impl SchemaBuilder {
/// Add fields to schem builder
pub fn with_fields(mut self, fields: impl IntoIterator<Item = StructField>) -> Self {
self.fields.extend(fields.into_iter());
self
}

/// Set schema id.
pub fn with_schema_id(mut self, schema_id: i32) -> Self {
self.schema_id = schema_id;
self
}

/// Builds the schema.
pub fn build(self) -> Schema {
let highest_field_id = self.fields.iter().map(|f| f.id).max().unwrap_or(0);
Schema {
r#struct: StructType::new(self.fields),
schema_id: self.schema_id,
highest_field_id,
}
}
}

impl Schema {
/// Create a schema builder.
pub fn builder() -> SchemaBuilder {
SchemaBuilder {
schema_id: DEFAULT_SCHEMA_ID,
fields: vec![],
}
}

/// Get field by field id.
pub fn field_by_id(&self, field_id: i32) -> Option<&StructField> {
self.r#struct.field_by_id(field_id)
}

/// Returns [`highest_field_id`].
#[inline]
pub fn highest_field_id(&self) -> i32 {
self.highest_field_id
}

/// Returns [`schema_id`].
#[inline]
pub fn schema_id(&self) -> i32 {
self.schema_id
}
}

#[cfg(test)]
mod tests {
use crate::spec::datatypes::{PrimitiveType, StructField, Type};
use crate::spec::schema::Schema;

#[test]
fn test_construct_schema() {
let field1 = StructField::required(1, "f1", Type::Primitive(PrimitiveType::Boolean));
let field2 = StructField::optional(2, "f2", Type::Primitive(PrimitiveType::Int));

let schema = Schema::builder()
.with_fields(vec![field1.clone()])
.with_fields(vec![field2.clone()])
.with_schema_id(3)
.build();

assert_eq!(3, schema.schema_id());
assert_eq!(2, schema.highest_field_id());
assert_eq!(Some(&field1), schema.field_by_id(1));
assert_eq!(Some(&field2), schema.field_by_id(2));
assert_eq!(None, schema.field_by_id(3));
}
}

0 comments on commit bb03f28

Please sign in to comment.