diff --git a/rust/Cargo.toml b/rust/Cargo.toml index 1e63c51acb..93a4528b60 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -17,6 +17,7 @@ [workspace] members = [ + "fury-core", "fury", "fury-derive", "tests" diff --git a/rust/fury-core/Cargo.toml b/rust/fury-core/Cargo.toml new file mode 100644 index 0000000000..3be240ff7d --- /dev/null +++ b/rust/fury-core/Cargo.toml @@ -0,0 +1,41 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +[package] +name = "fury-core" +version.workspace = true +edition.workspace = true +rust-version.workspace = true + +[dependencies] +proc-macro2 = { default-features = false, version = "1.0" } +syn = { default-features = false, version = "2.0", features = ["full", "fold"] } +quote = { default-features = false, version = "1.0" } +lazy_static = { version = "1.4" } +byteorder = { version = "1.4" } +chrono = "0.4" +thiserror = { default-features = false, version = "1.0" } + + +[[bench]] +name = "simd_bench" +harness = false + + +[dev-dependencies] +criterion = "0.5.1" +rand = "0.8.5" \ No newline at end of file diff --git a/rust/fury/benches/simd_bench.rs b/rust/fury-core/benches/simd_bench.rs similarity index 100% rename from rust/fury/benches/simd_bench.rs rename to rust/fury-core/benches/simd_bench.rs diff --git a/rust/fury/src/buffer.rs b/rust/fury-core/src/buffer.rs similarity index 100% rename from rust/fury/src/buffer.rs rename to rust/fury-core/src/buffer.rs diff --git a/rust/fury/src/error.rs b/rust/fury-core/src/error.rs similarity index 66% rename from rust/fury/src/error.rs rename to rust/fury-core/src/error.rs index 73cc9105d9..717d18bff6 100644 --- a/rust/fury/src/error.rs +++ b/rust/fury-core/src/error.rs @@ -47,8 +47,29 @@ pub enum Error { TagType(u8), #[error("Only Xlang supported; receive: {language:?}")] - UnsupportLanguage { language: Language }, + UnsupportedLanguage { language: Language }, #[error("Unsupported Language Code; receive: {code:?}")] - UnsupportLanguageCode { code: u8 }, + UnsupportedLanguageCode { code: u8 }, + + #[error("encoded_data cannot be empty")] + EncodedDataEmpty, + + #[error("Long meta string than 32767 is not allowed")] + LengthExceed, + + #[error("Non-ASCII characters in meta string are not allowed")] + OnlyAllowASCII, + + #[error("Unsupported character for LOWER_SPECIAL encoding: {ch:?}")] + UnsupportedLowerSpecialCharacter { ch: char }, + + #[error("Unsupported character for LOWER_UPPER_DIGIT_SPECIAL encoding: {ch:?}")] + UnsupportedLowerUpperDigitSpecialCharacter { ch: char }, + + #[error("Invalid character value for LOWER_SPECIAL decoding: {value:?}")] + InvalidLowerSpecialValue { value: u8 }, + + #[error("Invalid character value for LOWER_UPPER_DIGIT_SPECIAL decoding: {value:?}")] + InvalidLowerUpperDigitSpecialValue { value: u8 }, } diff --git a/rust/fury-core/src/fury.rs b/rust/fury-core/src/fury.rs new file mode 100644 index 0000000000..34a1532e76 --- /dev/null +++ b/rust/fury-core/src/fury.rs @@ -0,0 +1,60 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::buffer::{Reader, Writer}; +use crate::error::Error; +use crate::read_state::ReadState; +use crate::serializer::Serializer; +use crate::types::Mode; +use crate::write_state::WriteState; + +pub struct Fury { + mode: Mode, +} + +impl Default for Fury { + fn default() -> Self { + Fury { + mode: Mode::Compatible, + } + } +} + +impl Fury { + pub fn mode(&mut self, mode: Mode) { + self.mode = mode; + } + + pub fn get_mode(&self) -> &Mode { + &self.mode + } + + pub fn deserialize(&self, bf: &[u8]) -> Result { + let reader = Reader::new(bf); + let mut deserializer = ReadState::new(self, reader); + deserializer.head()?; + ::deserialize(&mut deserializer) + } + + pub fn serialize(&self, record: &T) -> Vec { + let mut writer = Writer::default(); + let mut serializer = WriteState::new(self, &mut writer); + serializer.head::(); + ::serialize(record, &mut serializer); + writer.dump() + } +} diff --git a/rust/fury-core/src/internal/bool.rs b/rust/fury-core/src/internal/bool.rs new file mode 100644 index 0000000000..edeebbd807 --- /dev/null +++ b/rust/fury-core/src/internal/bool.rs @@ -0,0 +1,41 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::error::Error; +use crate::read_state::ReadState; +use crate::serializer::Serializer; +use crate::types::FieldType; +use crate::write_state::WriteState; +use std::mem; + +impl Serializer for bool { + fn reserved_space() -> usize { + mem::size_of::() + } + + fn write(&self, serializer: &mut WriteState) { + serializer.writer.u8(if *self { 1 } else { 0 }); + } + + fn read(deserializer: &mut ReadState) -> Result { + Ok(deserializer.reader.u8() == 1) + } + + fn ty() -> FieldType { + FieldType::BOOL + } +} diff --git a/rust/fury-core/src/internal/datetime.rs b/rust/fury-core/src/internal/datetime.rs new file mode 100644 index 0000000000..8852a0779a --- /dev/null +++ b/rust/fury-core/src/internal/datetime.rs @@ -0,0 +1,80 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::error::Error; +use crate::read_state::ReadState; +use crate::serializer::Serializer; +use crate::types::{FieldType, FuryGeneralList}; +use crate::write_state::WriteState; +use chrono::{DateTime, Days, NaiveDate, NaiveDateTime}; +use std::mem; + +impl Serializer for NaiveDateTime { + fn read(deserializer: &mut ReadState) -> Result { + let timestamp = deserializer.reader.u64(); + let ret = DateTime::from_timestamp_millis(timestamp as i64).map(|dt| dt.naive_utc()); + match ret { + Some(r) => Ok(r), + None => Err(Error::NaiveDateTime), + } + } + + fn write(&self, serializer: &mut WriteState) { + serializer + .writer + .u64(self.and_utc().timestamp_millis() as u64); + } + + fn reserved_space() -> usize { + mem::size_of::() + } + + fn ty() -> FieldType { + FieldType::TIMESTAMP + } +} + +impl FuryGeneralList for NaiveDateTime {} + +lazy_static::lazy_static!( + static ref EPOCH: NaiveDate = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap(); +); + +impl Serializer for NaiveDate { + fn write(&self, serializer: &mut WriteState) { + let days_since_epoch = self.signed_duration_since(*EPOCH).num_days(); + serializer.writer.u64(days_since_epoch as u64); + } + + fn reserved_space() -> usize { + mem::size_of::() + } + + fn read(serializer: &mut ReadState) -> Result { + let days = serializer.reader.u64(); + match EPOCH.checked_add_days(Days::new(days)) { + Some(value) => Ok(value), + None => Err(Error::NaiveDate), + } + } + + fn ty() -> FieldType { + FieldType::DATE + } +} + +impl FuryGeneralList for NaiveDate {} diff --git a/rust/fury-core/src/internal/list.rs b/rust/fury-core/src/internal/list.rs new file mode 100644 index 0000000000..491e377b28 --- /dev/null +++ b/rust/fury-core/src/internal/list.rs @@ -0,0 +1,60 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::error::Error; +use crate::read_state::ReadState; +use crate::serializer::Serializer; +use crate::types::{FieldType, FuryGeneralList, SIZE_OF_REF_AND_TYPE}; +use crate::write_state::WriteState; +use std::mem; + +impl Serializer for Vec +where + T: Serializer + FuryGeneralList, +{ + fn write(&self, serializer: &mut WriteState) { + serializer.writer.var_int32(self.len() as i32); + serializer + .writer + .reserve((::reserved_space() + SIZE_OF_REF_AND_TYPE) * self.len()); + for item in self.iter() { + item.serialize(serializer); + } + } + + fn read(deserializer: &mut ReadState) -> Result { + // length + let len = deserializer.reader.var_int32(); + // value + let mut result = Vec::new(); + for _ in 0..len { + result.push(T::deserialize(deserializer)?); + } + Ok(result) + } + + fn reserved_space() -> usize { + // size of the vec + mem::size_of::() + } + + fn ty() -> FieldType { + FieldType::ARRAY + } +} + +impl FuryGeneralList for Vec where T: Serializer {} diff --git a/rust/fury-core/src/internal/map.rs b/rust/fury-core/src/internal/map.rs new file mode 100644 index 0000000000..791da12194 --- /dev/null +++ b/rust/fury-core/src/internal/map.rs @@ -0,0 +1,66 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::error::Error; +use crate::read_state::ReadState; +use crate::serializer::Serializer; +use crate::types::{FieldType, FuryGeneralList, SIZE_OF_REF_AND_TYPE}; +use crate::write_state::WriteState; +use std::collections::HashMap; +use std::mem; + +impl Serializer for HashMap { + fn write(&self, serializer: &mut WriteState) { + // length + serializer.writer.var_int32(self.len() as i32); + + let reserved_space = (::reserved_space() + SIZE_OF_REF_AND_TYPE) + * self.len() + + (::reserved_space() + SIZE_OF_REF_AND_TYPE) * self.len(); + serializer.writer.reserve(reserved_space); + + // key-value + for i in self.iter() { + i.0.serialize(serializer); + i.1.serialize(serializer); + } + } + + fn read(deserializer: &mut ReadState) -> Result { + // length + let len = deserializer.reader.var_int32(); + let mut result = HashMap::new(); + // key-value + for _ in 0..len { + result.insert( + ::deserialize(deserializer)?, + ::deserialize(deserializer)?, + ); + } + Ok(result) + } + + fn reserved_space() -> usize { + mem::size_of::() + } + + fn ty() -> FieldType { + FieldType::MAP + } +} + +impl FuryGeneralList for HashMap {} diff --git a/rust/fury/src/meta/mod.rs b/rust/fury-core/src/internal/mod.rs similarity index 88% rename from rust/fury/src/meta/mod.rs rename to rust/fury-core/src/internal/mod.rs index 02871e8257..4b4db5d5d2 100644 --- a/rust/fury/src/meta/mod.rs +++ b/rust/fury-core/src/internal/mod.rs @@ -15,6 +15,12 @@ // specific language governing permissions and limitations // under the License. -mod meta_string; -mod string_util; -pub use meta_string::{Encoding, MetaStringDecoder, MetaStringEncoder}; +mod bool; +mod datetime; +mod list; +mod map; +mod number; +mod option; +mod primitive_list; +mod set; +mod string; diff --git a/rust/fury-core/src/internal/number.rs b/rust/fury-core/src/internal/number.rs new file mode 100644 index 0000000000..ef4f50832b --- /dev/null +++ b/rust/fury-core/src/internal/number.rs @@ -0,0 +1,65 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::error::Error; +use crate::read_state::ReadState; +use crate::serializer::Serializer; +use crate::types::{FieldType, FuryGeneralList}; +use crate::write_state::WriteState; + +#[allow(dead_code)] +fn to_u8_slice(slice: &[T]) -> &[u8] { + let byte_len = std::mem::size_of_val(slice); + unsafe { std::slice::from_raw_parts(slice.as_ptr().cast::(), byte_len) } +} + +macro_rules! impl_num_serializer { + ($name: ident, $ty:tt, $field_type: expr) => { + impl Serializer for $ty { + fn write(&self, serializer: &mut WriteState) { + serializer.writer.$name(*self); + } + + fn read(deserializer: &mut ReadState) -> Result { + Ok(deserializer.reader.$name()) + } + + fn reserved_space() -> usize { + std::mem::size_of::<$ty>() + } + + fn ty() -> FieldType { + $field_type + } + } + }; +} +impl FuryGeneralList for i8 {} +impl FuryGeneralList for u16 {} +impl FuryGeneralList for u32 {} +impl FuryGeneralList for u64 {} + +impl_num_serializer!(i8, i8, FieldType::INT8); +impl_num_serializer!(u8, u8, FieldType::UINT8); +impl_num_serializer!(i16, i16, FieldType::INT16); +impl_num_serializer!(u16, u16, FieldType::UINT16); +impl_num_serializer!(i32, i32, FieldType::INT32); +impl_num_serializer!(u32, u32, FieldType::UINT32); +impl_num_serializer!(u64, u64, FieldType::UINT64); +impl_num_serializer!(i64, i64, FieldType::INT64); +impl_num_serializer!(f32, f32, FieldType::FLOAT); +impl_num_serializer!(f64, f64, FieldType::DOUBLE); diff --git a/rust/fury-core/src/internal/option.rs b/rust/fury-core/src/internal/option.rs new file mode 100644 index 0000000000..d4f5b64e96 --- /dev/null +++ b/rust/fury-core/src/internal/option.rs @@ -0,0 +1,87 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::error::Error; +use crate::read_state::ReadState; +use crate::serializer::Serializer; +use crate::types::{FieldType, FuryGeneralList, RefFlag}; +use crate::write_state::WriteState; + +impl Serializer for Option { + fn read(deserializer: &mut ReadState) -> Result { + Ok(Some(T::read(deserializer)?)) + } + + fn deserialize(deserializer: &mut ReadState) -> Result { + // ref flag + let ref_flag = deserializer.reader.i8(); + + if ref_flag == (RefFlag::NotNullValue as i8) || ref_flag == (RefFlag::RefValue as i8) { + // type_id + let type_id = deserializer.reader.i16(); + + if type_id != T::ty() as i16 { + Err(Error::FieldType { + expected: T::ty(), + actial: type_id, + }) + } else { + Ok(Some(T::read(deserializer)?)) + } + } else if ref_flag == (RefFlag::Null as i8) { + Ok(None) + } else if ref_flag == (RefFlag::Ref as i8) { + Err(Error::Ref) + } else { + Err(Error::BadRefFlag) + } + } + + fn write(&self, serializer: &mut WriteState) { + if let Some(v) = self { + T::write(v, serializer) + } else { + unreachable!("write should be call by serialize") + } + } + + fn serialize(&self, serializer: &mut WriteState) { + match self { + Some(v) => { + // ref flag + serializer.writer.i8(RefFlag::NotNullValue as i8); + // type + serializer.writer.i16(T::ty() as i16); + + v.write(serializer); + } + None => { + serializer.writer.i8(RefFlag::Null as i8); + } + } + } + + fn reserved_space() -> usize { + std::mem::size_of::() + } + + fn ty() -> FieldType { + T::ty() + } +} + +impl FuryGeneralList for Option {} diff --git a/rust/fury-core/src/internal/primitive_list.rs b/rust/fury-core/src/internal/primitive_list.rs new file mode 100644 index 0000000000..f8ab7c3a23 --- /dev/null +++ b/rust/fury-core/src/internal/primitive_list.rs @@ -0,0 +1,91 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::error::Error; +use crate::read_state::ReadState; +use crate::serializer::Serializer; +use crate::types::FieldType; +use crate::write_state::WriteState; +use std::mem; + +pub fn to_u8_slice(slice: &[T]) -> &[u8] { + let byte_len = std::mem::size_of_val(slice); + unsafe { std::slice::from_raw_parts(slice.as_ptr().cast::(), byte_len) } +} + +fn from_u8_slice(slice: &[u8]) -> Vec { + let byte_len = slice.len() / mem::size_of::(); + unsafe { std::slice::from_raw_parts(slice.as_ptr().cast::(), byte_len) }.to_vec() +} + +macro_rules! impl_primitive_vec { + ($name: ident, $ty:tt, $field_type: expr) => { + impl Serializer for Vec<$ty> { + fn write(&self, serializer: &mut WriteState) { + serializer.writer.var_int32(self.len() as i32); + serializer + .writer + .reserve(self.len() * mem::size_of::<$ty>()); + serializer.writer.bytes(to_u8_slice(self)); + } + + fn read(deserializer: &mut ReadState) -> Result { + // length + let len = (deserializer.reader.var_int32() as usize) * mem::size_of::<$ty>(); + Ok(from_u8_slice::<$ty>( + deserializer.reader.bytes(len as usize), + )) + } + + fn reserved_space() -> usize { + mem::size_of::() + } + + fn ty() -> FieldType { + $field_type + } + } + }; +} + +impl Serializer for Vec { + fn write(&self, serializer: &mut WriteState) { + serializer.writer.var_int32(self.len() as i32); + serializer.writer.bytes(to_u8_slice(self)); + } + + fn reserved_space() -> usize { + mem::size_of::() + } + + fn ty() -> FieldType { + FieldType::FuryPrimitiveBoolArray + } + + fn read(deserializer: &mut ReadState) -> Result { + let size = deserializer.reader.var_int32(); + let bytes = deserializer.reader.bytes(size as usize).to_vec(); + Ok(unsafe { mem::transmute::, Vec>(bytes) }) + } +} + +impl_primitive_vec!(u8, u8, FieldType::BINARY); +impl_primitive_vec!(i16, i16, FieldType::FuryPrimitiveShortArray); +impl_primitive_vec!(i32, i32, FieldType::FuryPrimitiveIntArray); +impl_primitive_vec!(i64, i64, FieldType::FuryPrimitiveLongArray); +impl_primitive_vec!(f32, f32, FieldType::FuryPrimitiveFloatArray); +impl_primitive_vec!(f64, f64, FieldType::FuryPrimitiveDoubleArray); diff --git a/rust/fury-core/src/internal/set.rs b/rust/fury-core/src/internal/set.rs new file mode 100644 index 0000000000..8989e6a2db --- /dev/null +++ b/rust/fury-core/src/internal/set.rs @@ -0,0 +1,61 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::error::Error; +use crate::read_state::ReadState; +use crate::serializer::Serializer; +use crate::types::{FieldType, FuryGeneralList, SIZE_OF_REF_AND_TYPE}; +use crate::write_state::WriteState; +use std::collections::HashSet; +use std::mem; + +impl Serializer for HashSet { + fn write(&self, serializer: &mut WriteState) { + // length + serializer.writer.i32(self.len() as i32); + + let reserved_space = + (::reserved_space() + SIZE_OF_REF_AND_TYPE) * self.len(); + serializer.writer.reserve(reserved_space); + + // key-value + for i in self.iter() { + i.serialize(serializer); + } + } + + fn read(deserializer: &mut ReadState) -> Result { + // length + let len = deserializer.reader.var_int32(); + let mut result = HashSet::new(); + // key-value + for _ in 0..len { + result.insert(::deserialize(deserializer)?); + } + Ok(result) + } + + fn reserved_space() -> usize { + mem::size_of::() + } + + fn ty() -> FieldType { + FieldType::FurySet + } +} + +impl FuryGeneralList for HashSet {} diff --git a/rust/fury-core/src/internal/string.rs b/rust/fury-core/src/internal/string.rs new file mode 100644 index 0000000000..fc473b1422 --- /dev/null +++ b/rust/fury-core/src/internal/string.rs @@ -0,0 +1,45 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::error::Error; +use crate::read_state::ReadState; +use crate::serializer::Serializer; +use crate::types::{FieldType, FuryGeneralList}; +use crate::write_state::WriteState; +use std::mem; + +impl Serializer for String { + fn reserved_space() -> usize { + mem::size_of::() + } + + fn write(&self, serializer: &mut WriteState) { + serializer.writer.var_int32(self.len() as i32); + serializer.writer.bytes(self.as_bytes()); + } + + fn read(deserializer: &mut ReadState) -> Result { + let len = deserializer.reader.var_int32(); + Ok(deserializer.reader.string(len as usize)) + } + + fn ty() -> FieldType { + FieldType::STRING + } +} + +impl FuryGeneralList for String {} diff --git a/rust/fury-core/src/lib.rs b/rust/fury-core/src/lib.rs new file mode 100644 index 0000000000..18e85d970a --- /dev/null +++ b/rust/fury-core/src/lib.rs @@ -0,0 +1,28 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +pub mod buffer; +pub mod error; +pub mod fury; +pub mod internal; +pub mod meta; +pub mod read_state; +pub mod row; +pub mod serializer; +pub mod types; +pub mod util; +pub mod write_state; diff --git a/rust/fury-core/src/meta/meta_store.rs b/rust/fury-core/src/meta/meta_store.rs new file mode 100644 index 0000000000..5cc4149d9b --- /dev/null +++ b/rust/fury-core/src/meta/meta_store.rs @@ -0,0 +1,80 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::buffer::{Reader, Writer}; +use crate::error::Error; +use crate::meta::TypeMeta; +use std::collections::HashMap; + +#[allow(dead_code)] +pub struct MetaReaderStore { + reading_type_defs: Vec, +} + +#[allow(dead_code)] +impl MetaReaderStore { + fn new() -> MetaReaderStore { + MetaReaderStore { + reading_type_defs: Vec::new(), + } + } + + fn get(&self, index: usize) -> &TypeMeta { + unsafe { self.reading_type_defs.get_unchecked(index) } + } + + fn from_bytes(reader: &mut Reader) -> MetaReaderStore { + let meta_size = reader.var_int32(); + let mut reading_type_defs = Vec::::with_capacity(meta_size as usize); + for _ in 0..meta_size { + reading_type_defs.push(TypeMeta::from_bytes(reader)); + } + MetaReaderStore { reading_type_defs } + } +} + +#[derive(Default)] +pub struct MetaWriterStore<'a> { + writing_type_defs: Vec<&'a [u8]>, + index_map: HashMap, +} + +#[allow(dead_code)] +impl<'a> MetaWriterStore<'a> { + pub fn push<'b: 'a>(&mut self, type_id: u32, type_meta_bytes: &'b [u8]) -> usize { + match self.index_map.get(&type_id) { + None => { + let index = self.writing_type_defs.len(); + self.writing_type_defs.push(type_meta_bytes); + self.index_map.insert(type_id, index); + index + } + Some(index) => *index, + } + } + + fn to_bytes(&self, writer: &mut Writer) -> Result<(), Error> { + for item in self.writing_type_defs.iter() { + writer.bytes(item) + } + Ok(()) + } + + fn reset(&mut self) { + self.writing_type_defs.clear(); + } +} diff --git a/rust/fury/src/meta/meta_string.rs b/rust/fury-core/src/meta/meta_string.rs similarity index 94% rename from rust/fury/src/meta/meta_string.rs rename to rust/fury-core/src/meta/meta_string.rs index 627433c9fd..282b1ab549 100644 --- a/rust/fury/src/meta/meta_string.rs +++ b/rust/fury-core/src/meta/meta_string.rs @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +use crate::error::Error; use crate::meta::string_util; #[derive(Debug, PartialEq)] @@ -26,30 +27,6 @@ pub enum Encoding { AllToLowerSpecial = 0x04, } -#[derive(thiserror::Error, Debug)] -pub enum Error { - #[error("encoded_data cannot be empty")] - EncodedDataEmpty, - - #[error("Long meta string than 32767 is not allowed")] - LengthExceed, - - #[error("Non-ASCII characters in meta string are not allowed")] - OnlyAllowASCII, - - #[error("Unsupported character for LOWER_SPECIAL encoding: {ch:?}")] - UnsupportedLowerSpecialCharacter { ch: char }, - - #[error("Unsupported character for LOWER_UPPER_DIGIT_SPECIAL encoding: {ch:?}")] - UnsupportedLowerUpperDigitSpecialCharacter { ch: char }, - - #[error("Invalid character value for LOWER_SPECIAL decoding: {value:?}")] - InvalidLowerSpecialValue { value: u8 }, - - #[error("Invalid character value for LOWER_UPPER_DIGIT_SPECIAL decoding: {value:?}")] - InvalidLowerUpperDigitSpecialValue { value: u8 }, -} - #[derive(Debug, PartialEq)] pub struct MetaString { pub original: String, diff --git a/rust/fury-core/src/meta/mod.rs b/rust/fury-core/src/meta/mod.rs new file mode 100644 index 0000000000..58a81ea03e --- /dev/null +++ b/rust/fury-core/src/meta/mod.rs @@ -0,0 +1,25 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +mod meta_store; +mod meta_string; +mod string_util; +mod type_meta; + +pub use meta_store::{MetaReaderStore, MetaWriterStore}; +pub use meta_string::{Encoding, MetaString, MetaStringDecoder, MetaStringEncoder}; +pub use type_meta::{FieldInfo, TypeMeta}; diff --git a/rust/fury/src/meta/string_util.rs b/rust/fury-core/src/meta/string_util.rs similarity index 97% rename from rust/fury/src/meta/string_util.rs rename to rust/fury-core/src/meta/string_util.rs index ea8659110b..bd65beff8b 100644 --- a/rust/fury/src/meta/string_util.rs +++ b/rust/fury-core/src/meta/string_util.rs @@ -25,14 +25,14 @@ use std::arch::x86_64::*; use std::arch::x86_64::*; #[cfg(target_arch = "x86_64")] -pub(crate) const MIN_DIM_SIZE_AVX: usize = 32; +pub const MIN_DIM_SIZE_AVX: usize = 32; #[cfg(any( target_arch = "x86", target_arch = "x86_64", all(target_arch = "aarch64", target_feature = "neon") ))] -pub(crate) const MIN_DIM_SIZE_SIMD: usize = 16; +pub const MIN_DIM_SIZE_SIMD: usize = 16; #[cfg(target_arch = "x86_64")] unsafe fn is_latin_avx(s: &str) -> bool { @@ -108,7 +108,7 @@ fn is_latin_standard(s: &str) -> bool { s.bytes().all(|b| b.is_ascii()) } -pub(crate) fn is_latin(s: &str) -> bool { +pub fn is_latin(s: &str) -> bool { #[cfg(target_arch = "x86_64")] { if is_x86_feature_detected!("avx") diff --git a/rust/fury-core/src/meta/type_meta.rs b/rust/fury-core/src/meta/type_meta.rs new file mode 100644 index 0000000000..99ac37bd90 --- /dev/null +++ b/rust/fury-core/src/meta/type_meta.rs @@ -0,0 +1,113 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use super::meta_string::MetaStringEncoder; +use crate::buffer::{Reader, Writer}; +use crate::error::Error; +use crate::types::FieldType; + +//todo backward/forward compatibility +#[allow(dead_code)] +pub struct FieldInfo { + tag_id: u32, + field_name: String, + field_type: FieldType, +} + +impl FieldInfo { + pub fn new(field_name: &str, field_type: FieldType) -> FieldInfo { + FieldInfo { + field_name: field_name.to_string(), + field_type, + tag_id: 0, + } + } + + fn to_bytes(&self) -> Result, Error> { + let mut writer = Writer::default(); + let meta_string = MetaStringEncoder::new().encode(&self.field_name)?; + let mut header = 1 << 2; + let encoded = meta_string.bytes.as_slice(); + let size = (encoded.len() - 1) as u32; + header |= (meta_string.encoding as u8) << 3; + let big_size = size >= 7; + if big_size { + header |= 0b11100000; + writer.u8(header); + writer.var_int32((size - 7) as i32); + } else { + header |= (size << 5) as u8; + writer.u8(header); + } + writer.bytes(encoded); + Ok(writer.dump()) + } +} + +struct TypeMetaLayer { + type_id: u32, + field_info: Vec, +} + +impl TypeMetaLayer { + fn to_bytes(&self) -> Result, Error> { + let mut writer = Writer::default(); + writer.var_int32(self.field_info.len() as i32); + writer.var_int32(self.type_id as i32); + for field in self.field_info.iter() { + writer.bytes(field.to_bytes()?.as_slice()); + } + Ok(writer.dump()) + } +} + +pub struct TypeMeta { + hash: u64, + layers: Vec, +} + +impl TypeMeta { + pub fn from_fields(type_id: u32, field_info: Vec) -> TypeMeta { + TypeMeta { + hash: 0, + layers: vec![TypeMetaLayer { + type_id, + field_info, + }], + } + } + + pub fn to_bytes(&self) -> Result, Error> { + let mut writer = Writer::default(); + writer.u64((self.hash << 4) | (self.layers.len() as u64 & 0b1111)); + for layer in self.layers.iter() { + writer.bytes(layer.to_bytes()?.as_slice()); + } + Ok(writer.dump()) + } + + pub fn read_hash_from_bytes(reader: &mut Reader) -> u64 { + reader.u64() >> 7 + } + + pub fn from_bytes(_reader: &mut Reader) -> TypeMeta { + TypeMeta { + hash: 0, + layers: Vec::new(), + } + } +} diff --git a/rust/fury-core/src/read_state.rs b/rust/fury-core/src/read_state.rs new file mode 100644 index 0000000000..eaffdaa444 --- /dev/null +++ b/rust/fury-core/src/read_state.rs @@ -0,0 +1,66 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use super::buffer::Reader; +use super::types::Language; +use crate::error::Error; +use crate::fury::Fury; + +pub struct ReadState<'de, 'bf: 'de> { + pub reader: Reader<'bf>, + pub tags: Vec<&'de str>, + pub fury: &'de Fury, +} + +impl<'de, 'bf: 'de> ReadState<'de, 'bf> { + pub fn new(fury: &'de Fury, reader: Reader<'bf>) -> ReadState<'de, 'bf> { + ReadState { + reader, + tags: Vec::new(), + fury, + } + } + + pub fn get_fury(&self) -> &Fury { + self.fury + } + + pub fn head(&mut self) -> Result<(), Error> { + let _bitmap = self.reader.u8(); + let _language: Language = self.reader.u8().try_into()?; + self.reader.skip(8); // native offset and size + Ok(()) + } + + pub fn read_tag(&mut self) -> Result<&str, Error> { + const USESTRINGVALUE: u8 = 0; + const USESTRINGID: u8 = 1; + let tag_type = self.reader.u8(); + if tag_type == USESTRINGID { + Ok(self.tags[self.reader.i16() as usize]) + } else if tag_type == USESTRINGVALUE { + self.reader.skip(8); // todo tag hash + let len = self.reader.i16(); + let tag: &str = + unsafe { std::str::from_utf8_unchecked(self.reader.bytes(len as usize)) }; + self.tags.push(tag); + Ok(tag) + } else { + Err(Error::TagType(tag_type)) + } + } +} diff --git a/rust/fury/src/row/bit_util.rs b/rust/fury-core/src/row/bit_util.rs similarity index 100% rename from rust/fury/src/row/bit_util.rs rename to rust/fury-core/src/row/bit_util.rs diff --git a/rust/fury/src/row/mod.rs b/rust/fury-core/src/row/mod.rs similarity index 100% rename from rust/fury/src/row/mod.rs rename to rust/fury-core/src/row/mod.rs diff --git a/rust/fury/src/row/reader.rs b/rust/fury-core/src/row/reader.rs similarity index 100% rename from rust/fury/src/row/reader.rs rename to rust/fury-core/src/row/reader.rs diff --git a/rust/fury/src/row/row.rs b/rust/fury-core/src/row/row.rs similarity index 99% rename from rust/fury/src/row/row.rs rename to rust/fury-core/src/row/row.rs index be0f6591dc..baeb33f5cf 100644 --- a/rust/fury/src/row/row.rs +++ b/rust/fury-core/src/row/row.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -use crate::{buffer::Writer, Error}; +use crate::{buffer::Writer, error::Error}; use byteorder::{ByteOrder, LittleEndian}; use chrono::{DateTime, Days, NaiveDate, NaiveDateTime}; use std::collections::BTreeMap; diff --git a/rust/fury/src/row/writer.rs b/rust/fury-core/src/row/writer.rs similarity index 100% rename from rust/fury/src/row/writer.rs rename to rust/fury-core/src/row/writer.rs diff --git a/rust/fury-core/src/serializer.rs b/rust/fury-core/src/serializer.rs new file mode 100644 index 0000000000..a5d4f8a76c --- /dev/null +++ b/rust/fury-core/src/serializer.rs @@ -0,0 +1,86 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::error::Error; +use crate::read_state::ReadState; +use crate::types::{FieldType, RefFlag}; +use crate::write_state::WriteState; + +pub trait Serializer +where + Self: Sized, +{ + /// The fixed memory size of the Type. + /// Avoid the memory check, which would hurt performance. + fn reserved_space() -> usize; + + /// Write the data into the buffer. + fn write(&self, serializer: &mut WriteState); + + /// Entry point of the serialization. + /// + /// Step 1: write the type flag and type flag into the buffer. + /// Step 2: invoke the write function to write the Rust object. + fn serialize(&self, serializer: &mut WriteState) { + // ref flag + serializer.writer.i8(RefFlag::NotNullValue as i8); + // type + serializer.writer.i16(Self::ty() as i16); + self.write(serializer); + } + + fn read(deserializer: &mut ReadState) -> Result; + + fn deserialize(deserializer: &mut ReadState) -> Result { + // ref flag + let ref_flag = deserializer.reader.i8(); + + if ref_flag == (RefFlag::NotNullValue as i8) || ref_flag == (RefFlag::RefValue as i8) { + // type_id + let type_id = deserializer.reader.i16(); + let ty = Self::ty(); + if type_id != ty as i16 { + Err(Error::FieldType { + expected: ty, + actial: type_id, + }) + } else { + Ok(Self::read(deserializer)?) + } + } else if ref_flag == (RefFlag::Null as i8) { + Err(Error::Null) + } else if ref_flag == (RefFlag::Ref as i8) { + Err(Error::Ref) + } else { + Err(Error::BadRefFlag) + } + } + + fn ty() -> FieldType; + + fn hash() -> u32 { + 0 + } + + fn tag() -> &'static str { + "" + } + + fn type_def() -> &'static [u8] { + &[] + } +} diff --git a/rust/fury/src/types.rs b/rust/fury-core/src/types.rs similarity index 60% rename from rust/fury/src/types.rs rename to rust/fury-core/src/types.rs index 7c4647d709..47f6cf575b 100644 --- a/rust/fury/src/types.rs +++ b/rust/fury-core/src/types.rs @@ -15,125 +15,8 @@ // specific language governing permissions and limitations // under the License. -use std::{ - collections::{HashMap, HashSet}, - mem, -}; - -use chrono::{NaiveDate, NaiveDateTime}; - -use crate::Error; - -pub trait FuryMeta { - fn ty() -> FieldType; - - fn vec_ty() -> FieldType { - FieldType::ARRAY - } - - fn hash() -> u32 { - 0 - } - - fn tag() -> &'static str { - "" - } - - fn is_vec() -> bool { - false - } -} - -macro_rules! impl_number_meta { - ($expr: expr, $tt: tt) => { - impl FuryMeta for $tt { - fn ty() -> FieldType { - $expr - } - } - }; -} - -macro_rules! impl_primitive_array_meta { - ($ty: expr, $vec_ty: expr, $tt: tt) => { - impl FuryMeta for $tt { - fn ty() -> FieldType { - $ty - } - - fn vec_ty() -> FieldType { - $vec_ty - } - } - }; -} - -impl FuryMeta for HashMap { - fn ty() -> FieldType { - FieldType::MAP - } -} - -impl FuryMeta for HashSet { - fn ty() -> FieldType { - FieldType::FurySet - } -} - -impl FuryMeta for u8 { - fn ty() -> FieldType { - FieldType::UINT8 - } - fn vec_ty() -> FieldType { - FieldType::BINARY - } -} - -impl FuryMeta for NaiveDateTime { - fn ty() -> FieldType { - FieldType::TIMESTAMP - } -} - -impl FuryMeta for NaiveDate { - fn ty() -> FieldType { - FieldType::DATE - } -} - -impl_number_meta!(FieldType::UINT16, u16); -impl_number_meta!(FieldType::UINT32, u32); -impl_number_meta!(FieldType::UINT64, u64); -impl_number_meta!(FieldType::INT8, i8); - -// special type array -impl_primitive_array_meta!(FieldType::BOOL, FieldType::FuryPrimitiveBoolArray, bool); -impl_primitive_array_meta!(FieldType::INT16, FieldType::FuryPrimitiveShortArray, i16); -impl_primitive_array_meta!(FieldType::INT32, FieldType::FuryPrimitiveIntArray, i32); -impl_primitive_array_meta!(FieldType::INT64, FieldType::FuryPrimitiveLongArray, i64); -impl_primitive_array_meta!(FieldType::FLOAT, FieldType::FuryPrimitiveFloatArray, f32); -impl_primitive_array_meta!(FieldType::DOUBLE, FieldType::FuryPrimitiveDoubleArray, f64); -impl_primitive_array_meta!(FieldType::STRING, FieldType::FuryStringArray, String); - -impl FuryMeta for Vec { - fn ty() -> FieldType { - FieldType::ARRAY - } - - fn is_vec() -> bool { - true - } -} - -impl FuryMeta for Option { - fn vec_ty() -> FieldType { - T::vec_ty() - } - - fn ty() -> FieldType { - T::ty() - } -} +use crate::error::Error; +use std::mem; #[allow(dead_code)] pub enum StringFlag { @@ -182,6 +65,8 @@ pub enum FieldType { FuryStringArray = 264, } +pub trait FuryGeneralList {} + const MAX_UNT32: u64 = (1 << 31) - 1; // todo: struct hash @@ -219,10 +104,10 @@ pub fn compute_field_hash(hash: u32, id: i16) -> u32 { new_hash as u32 } -pub fn compute_struct_hash(props: Vec<(&str, FieldType, &str)>) -> u32 { +pub fn compute_struct_hash(props: Vec<(&str, FieldType)>) -> u32 { let mut hash = 17; props.iter().for_each(|prop| { - let (_name, ty, _tag) = prop; + let (_name, ty) = prop; hash = match ty { FieldType::ARRAY | FieldType::MAP => compute_field_hash(hash, *ty as i16), _ => hash, @@ -255,6 +140,11 @@ pub enum Language { Rust = 6, } +pub enum Mode { + SchemaConsistent, + Compatible, +} + impl TryFrom for Language { type Error = Error; @@ -267,7 +157,7 @@ impl TryFrom for Language { 4 => Ok(Language::Go), 5 => Ok(Language::Javascript), 6 => Ok(Language::Rust), - _ => Err(Error::UnsupportLanguageCode { code: num }), + _ => Err(Error::UnsupportedLanguageCode { code: num }), } } } diff --git a/rust/fury/src/util.rs b/rust/fury-core/src/util.rs similarity index 100% rename from rust/fury/src/util.rs rename to rust/fury-core/src/util.rs diff --git a/rust/fury-core/src/write_state.rs b/rust/fury-core/src/write_state.rs new file mode 100644 index 0000000000..9a7560008e --- /dev/null +++ b/rust/fury-core/src/write_state.rs @@ -0,0 +1,82 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use super::buffer::Writer; +use super::types::{config_flags, Language, SIZE_OF_REF_AND_TYPE}; +use crate::fury::Fury; +use crate::meta::MetaWriterStore; +use crate::serializer::Serializer; + +pub struct WriteState<'se> { + pub writer: &'se mut Writer, + pub tags: Vec<&'static str>, + fury: &'se Fury, + meta_store: MetaWriterStore<'se>, +} + +impl<'se> WriteState<'se> { + pub fn new(fury: &'se Fury, writer: &'se mut Writer) -> WriteState<'se> { + WriteState { + writer, + tags: Vec::new(), + fury, + meta_store: MetaWriterStore::default(), + } + } + + pub fn push_meta(&mut self, type_id: u32, type_def: &'static [u8]) { + self.meta_store.push(type_id, type_def); + } + + pub fn get_fury(&self) -> &Fury { + self.fury + } + + pub fn write_tag(&mut self, tag: &'static str) { + const USESTRINGVALUE: u8 = 0; + const USESTRINGID: u8 = 1; + + let mayby_idx = self.tags.iter().position(|x| *x == tag); + match mayby_idx { + Some(idx) => { + self.writer.u8(USESTRINGID); + self.writer.i16(idx as i16); + } + None => { + self.writer.u8(USESTRINGVALUE); + self.writer.skip(8); // todo tag hash + self.writer.i16(tag.len() as i16); + self.writer.bytes(tag.as_bytes()); + } + }; + } + + pub fn head(&mut self) -> &Self { + const HEAD_SIZE: usize = 10; + self.writer + .reserve(::reserved_space() + SIZE_OF_REF_AND_TYPE + HEAD_SIZE); + + let mut bitmap = 0; + bitmap |= config_flags::IS_LITTLE_ENDIAN_FLAG; + bitmap |= config_flags::IS_CROSS_LANGUAGE_FLAG; + self.writer.u8(bitmap); + self.writer.u8(Language::Rust as u8); + self.writer.skip(4); // native offset + self.writer.skip(4); // native size + self + } +} diff --git a/rust/fury/tests/de.rs b/rust/fury-core/tests/de.rs similarity index 100% rename from rust/fury/tests/de.rs rename to rust/fury-core/tests/de.rs diff --git a/rust/fury-derive/Cargo.toml b/rust/fury-derive/Cargo.toml index ced3734351..1707ee9565 100644 --- a/rust/fury-derive/Cargo.toml +++ b/rust/fury-derive/Cargo.toml @@ -26,6 +26,7 @@ proc-macro = true [dependencies] proc-macro2 = { default-features = false, version = "1.0" } +fury-core = { path = "../fury-core"} syn = { default-features = false, version = "2.0", features = [ "parsing", "proc-macro", diff --git a/rust/fury-derive/src/fury_meta.rs b/rust/fury-derive/src/fury_meta.rs index ed251d2811..475109983b 100644 --- a/rust/fury-derive/src/fury_meta.rs +++ b/rust/fury-derive/src/fury_meta.rs @@ -25,7 +25,7 @@ pub fn sorted_fields(fields: &Fields) -> Vec<&Field> { fields } -pub fn derive_fury_meta(ast: &syn::DeriveInput, tag: String) -> TokenStream { +pub fn derive_serializer(ast: &syn::DeriveInput, tag: &String) -> TokenStream { let name = &ast.ident; let fields = match &ast.data { syn::Data::Struct(s) => sorted_fields(&s.fields), @@ -33,53 +33,23 @@ pub fn derive_fury_meta(ast: &syn::DeriveInput, tag: String) -> TokenStream { panic!("only struct be supported") } }; - let props = fields.iter().map(|field| { + + let name_hash: proc_macro2::Ident = + syn::Ident::new(&format!("HASH_{}", name).to_uppercase(), name.span()); + + let field_infos = fields.iter().map(|field| { let ty = &field.ty; let name = format!("{}", field.ident.as_ref().expect("should be field name")); quote! { - (#name, <#ty as fury::__derive::FuryMeta>::ty(), <#ty as fury::__derive::FuryMeta>::tag()) + fury_core::meta::FieldInfo::new(#name, <#ty as fury_core::serializer::Serializer>::ty()) } }); - let name_hash_static: proc_macro2::Ident = - syn::Ident::new(&format!("HASH_{}", name).to_uppercase(), name.span()); - - let gen = quote! { - - lazy_static::lazy_static! { - static ref #name_hash_static: u32 = fury::__derive::compute_struct_hash(vec![#(#props),*]); - } - - impl fury::__derive::FuryMeta for #name { - fn tag() -> &'static str { - #tag - } - - fn hash() -> u32 { - *(#name_hash_static) - } - - fn ty() -> fury::__derive::FieldType { - fury::__derive::FieldType::FuryTypeTag - } - } - }; - gen.into() -} - -pub fn derive_serialize(ast: &syn::DeriveInput) -> TokenStream { - let name = &ast.ident; - let fields = match &ast.data { - syn::Data::Struct(s) => sorted_fields(&s.fields), - _ => { - panic!("only struct be supported") - } - }; let accessor_exprs = fields.iter().map(|field| { let ty = &field.ty; let ident = &field.ident; quote! { - <#ty as fury::__derive::Serialize>::serialize(&self.#ident, serializer); + <#ty as fury_core::serializer::Serializer>::serialize(&self.#ident, serializer); } }); @@ -87,65 +57,84 @@ pub fn derive_serialize(ast: &syn::DeriveInput) -> TokenStream { let ty = &field.ty; // each field have one byte ref tag and two byte type id quote! { - <#ty as fury::__derive::Serialize>::reserved_space() + fury::__derive::SIZE_OF_REF_AND_TYPE + <#ty as fury_core::serializer::Serializer>::reserved_space() + fury_core::types::SIZE_OF_REF_AND_TYPE } }); let tag_bytelen = format!("{}", name).len(); - let gen = quote! { - impl fury::__derive::Serialize for #name { - fn write(&self, serializer: &mut fury::__derive::SerializerState) { - // write tag string - serializer.write_tag(<#name as fury::__derive::FuryMeta>::tag()); - // write tag hash - serializer.writer.u32(<#name as fury::__derive::FuryMeta>::hash()); - // write fields - #(#accessor_exprs)* - } - - fn reserved_space() -> usize { - // struct have four byte hash - #tag_bytelen + 4 + #(#reserved_size_exprs)+* - } - } - }; - gen.into() -} - -pub fn derive_deserilize(ast: &syn::DeriveInput) -> TokenStream { - let name = &ast.ident; - let fields = match &ast.data { - syn::Data::Struct(s) => sorted_fields(&s.fields), - _ => { - panic!("only struct be supported") - } - }; - let exprs = fields.iter().map(|field| { let ty = &field.ty; let ident = &field.ident; quote! { - #ident: <#ty as fury::__derive::Deserialize>::deserialize(deserializer)? + #ident: <#ty as fury_core::serializer::Serializer>::deserialize(state)? + } + }); + + let props = fields.iter().map(|field| { + let ty = &field.ty; + let name = format!("{}", field.ident.as_ref().expect("should be field name")); + quote! { + (#name, <#ty as fury_core::serializer::Serializer>::ty()) } }); let gen = quote! { - impl<'de> fury::__derive::Deserialize for #name { - fn read(deserializer: &mut fury::__derive::DeserializerState) -> Result { + impl fury_core::types::FuryGeneralList for #name {} + impl fury_core::serializer::Serializer for #name { + fn tag() -> &'static str { + #tag + } + + fn hash() -> u32 { + lazy_static::lazy_static! { + static ref #name_hash: u32 = fury_core::types::compute_struct_hash(vec![#(#props),*]); + } + *(#name_hash) + } + + fn type_def() -> &'static [u8] { + lazy_static::lazy_static! { + static ref type_definition: Vec = fury_core::meta::TypeMeta::from_fields( + 0, + vec![#(#field_infos),*] + ).to_bytes().unwrap(); + } + type_definition.as_slice() + } + + fn ty() -> fury_core::types::FieldType { + fury_core::types::FieldType::FuryTypeTag + } + + fn write(&self, serializer: &mut fury_core::write_state::WriteState) { + // write tag string + serializer.write_tag(<#name as fury_core::serializer::Serializer>::tag()); + // write tag hash + serializer.writer.u32(<#name as fury_core::serializer::Serializer>::hash()); + // write fields + #(#accessor_exprs)* + } + + fn read(state: &mut fury_core::read_state::ReadState) -> Result { // read tag string - deserializer.read_tag()?; + state.read_tag()?; // read tag hash - let hash = deserializer.reader.u32(); - let expected = <#name as fury::__derive::FuryMeta>::hash(); + let hash = state.reader.u32(); + let expected = <#name as fury_core::serializer::Serializer>::hash(); if(hash != expected) { - Err(fury::__derive::Error::StructHash{ expected, actial: hash }) + Err(fury_core::error::Error::StructHash{ expected, actial: hash }) } else { Ok(Self { #(#exprs),* }) } } + + fn reserved_space() -> usize { + // struct have four byte hash + #tag_bytelen + 4 + #(#reserved_size_exprs)+* + } } }; gen.into() diff --git a/rust/fury-derive/src/fury_row.rs b/rust/fury-derive/src/fury_row.rs index 70304fc747..20b55aea75 100644 --- a/rust/fury-derive/src/fury_row.rs +++ b/rust/fury-derive/src/fury_row.rs @@ -35,7 +35,7 @@ pub fn derive_row(ast: &syn::DeriveInput) -> TokenStream { quote! { let mut callback_info = struct_writer.write_start(#index); - <#ty as fury::__derive::Row<'a>>::write(&v.#ident, struct_writer.get_writer()); + <#ty as fury_core::row::Row<'a>>::write(&v.#ident, struct_writer.get_writer()); struct_writer.write_end(callback_info); } }); @@ -46,9 +46,9 @@ pub fn derive_row(ast: &syn::DeriveInput) -> TokenStream { let getter_name: proc_macro2::Ident = syn::Ident::new(&format!("{}", ident), ident.span()); quote! { - pub fn #getter_name(&self) -> <#ty as fury::__derive::Row<'a>>::ReadResult { + pub fn #getter_name(&self) -> <#ty as fury_core::row::Row<'a>>::ReadResult { let bytes = self.struct_data.get_field_bytes(#index); - <#ty as fury::__derive::Row<'a>>::cast(bytes) + <#ty as fury_core::row::Row<'a>>::cast(bytes) } } }); @@ -60,24 +60,24 @@ pub fn derive_row(ast: &syn::DeriveInput) -> TokenStream { let gen = quote! { struct #getter<'a> { - struct_data: fury::__derive::StructViewer<'a> + struct_data: fury_core::row::StructViewer<'a> } impl<'a> #getter<'a> { #(#getter_exprs)* } - impl<'a> fury::__derive::Row<'a> for #name { + impl<'a> fury_core::row::Row<'a> for #name { type ReadResult = #getter<'a>; - fn write(v: &Self, writer: &mut fury::__derive::Writer) { - let mut struct_writer = fury::__derive::StructWriter::new(#num_fields, writer); + fn write(v: &Self, writer: &mut fury_core::buffer::Writer) { + let mut struct_writer = fury_core::row::StructWriter::new(#num_fields, writer); #(#write_exprs);*; } fn cast(bytes: &'a [u8]) -> Self::ReadResult { - #getter{ struct_data: fury::__derive::StructViewer::new(bytes, #num_fields) } + #getter{ struct_data: fury_core::row::StructViewer::new(bytes, #num_fields) } } } }; diff --git a/rust/fury-derive/src/lib.rs b/rust/fury-derive/src/lib.rs index e18ca21052..a72e7a426e 100644 --- a/rust/fury-derive/src/lib.rs +++ b/rust/fury-derive/src/lib.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -use fury_meta::{derive_deserilize, derive_fury_meta, derive_serialize}; +use fury_meta::derive_serializer; use fury_row::derive_row; use proc_macro::TokenStream; use syn::{parse_macro_input, DeriveInput}; @@ -38,12 +38,7 @@ pub fn proc_macro_derive_fury_meta(input: proc_macro::TokenStream) -> TokenStrea panic!("tag should be string") } }; - let mut token_stream = derive_fury_meta(&input, tag); - // append serialize impl - token_stream.extend(derive_serialize(&input)); - // append deserialize impl - token_stream.extend(derive_deserilize(&input)); - token_stream + derive_serializer(&input, &tag) } #[proc_macro_derive(FuryRow)] diff --git a/rust/fury/Cargo.toml b/rust/fury/Cargo.toml index bba04afb73..1335f7013b 100644 --- a/rust/fury/Cargo.toml +++ b/rust/fury/Cargo.toml @@ -15,28 +15,18 @@ # specific language governing permissions and limitations # under the License. + [package] name = "fury" version.workspace = true -edition.workspace = true rust-version.workspace = true +license.workspace = true +readme.workspace = true +repository.workspace = true +edition.workspace = true +keywords.workspace = true +categories.workspace = true [dependencies] -proc-macro2 = { default-features = false, version = "1.0" } -syn = { default-features = false, version = "2.0", features = ["full", "fold"] } -quote = { default-features = false, version = "1.0" } -fury-derive = { path = "../fury-derive" } -lazy_static = { version = "1.4" } -byteorder = { version = "1.4" } -chrono = "0.4" -thiserror = { default-features = false, version = "1.0" } - - -[[bench]] -name = "simd_bench" -harness = false - - -[dev-dependencies] -criterion = "0.5.1" -rand = "0.8.5" \ No newline at end of file +fury-core = { path = "../fury-core"} +fury-derive = { path = "../fury-derive"} diff --git a/rust/fury/src/deserializer.rs b/rust/fury/src/deserializer.rs deleted file mode 100644 index 0ca8f34329..0000000000 --- a/rust/fury/src/deserializer.rs +++ /dev/null @@ -1,269 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use super::buffer::Reader; -use super::types::Language; -use crate::{ - error::Error, - types::{FuryMeta, RefFlag}, -}; -use chrono::{DateTime, Days, NaiveDate, NaiveDateTime, TimeZone, Utc}; -use std::{ - collections::{HashMap, HashSet}, - mem, -}; - -fn from_u8_slice(slice: &[u8]) -> Vec { - let byte_len = slice.len() / mem::size_of::(); - unsafe { std::slice::from_raw_parts(slice.as_ptr().cast::(), byte_len) }.to_vec() -} - -pub trait Deserialize -where - Self: Sized + FuryMeta, -{ - fn read(deserializer: &mut DeserializerState) -> Result; - - fn read_vec(deserializer: &mut DeserializerState) -> Result, Error> { - // length - let len = deserializer.reader.var_int32(); - // value - let mut result = Vec::new(); - for _ in 0..len { - result.push(Self::deserialize(deserializer)?); - } - Ok(result) - } - - fn deserialize(deserializer: &mut DeserializerState) -> Result { - // ref flag - let ref_flag = deserializer.reader.i8(); - - if ref_flag == (RefFlag::NotNullValue as i8) || ref_flag == (RefFlag::RefValue as i8) { - // type_id - let type_id = deserializer.reader.i16(); - let ty = if Self::is_vec() { - Self::vec_ty() - } else { - Self::ty() - }; - if type_id != ty as i16 { - Err(Error::FieldType { - expected: ty, - actial: type_id, - }) - } else { - Ok(Self::read(deserializer)?) - } - } else if ref_flag == (RefFlag::Null as i8) { - Err(Error::Null) - } else if ref_flag == (RefFlag::Ref as i8) { - Err(Error::Ref) - } else { - Err(Error::BadRefFlag) - } - } -} - -macro_rules! impl_num_deserialize { - ($name: ident, $ty:tt) => { - impl Deserialize for $ty { - fn read(deserializer: &mut DeserializerState) -> Result { - Ok(deserializer.reader.$name()) - } - } - }; -} - -macro_rules! impl_num_deserialize_and_pritimive_vec { - ($name: ident, $ty:tt) => { - impl Deserialize for $ty { - fn read(deserializer: &mut DeserializerState) -> Result { - Ok(deserializer.reader.$name()) - } - - fn read_vec(deserializer: &mut DeserializerState) -> Result, Error> { - // length - let len = (deserializer.reader.var_int32() as usize) * mem::size_of::<$ty>(); - Ok(from_u8_slice::<$ty>( - deserializer.reader.bytes(len as usize), - )) - } - } - }; -} - -impl_num_deserialize!(u16, u16); -impl_num_deserialize!(u32, u32); -impl_num_deserialize!(u64, u64); -impl_num_deserialize!(i8, i8); - -impl_num_deserialize_and_pritimive_vec!(u8, u8); -impl_num_deserialize_and_pritimive_vec!(i16, i16); -impl_num_deserialize_and_pritimive_vec!(i32, i32); -impl_num_deserialize_and_pritimive_vec!(i64, i64); -impl_num_deserialize_and_pritimive_vec!(f32, f32); -impl_num_deserialize_and_pritimive_vec!(f64, f64); - -impl Deserialize for String { - fn read(deserializer: &mut DeserializerState) -> Result { - let len = deserializer.reader.var_int32(); - Ok(deserializer.reader.string(len as usize)) - } -} - -impl Deserialize for bool { - fn read(deserializer: &mut DeserializerState) -> Result { - Ok(deserializer.reader.u8() == 1) - } -} - -impl Deserialize for HashMap { - fn read(deserializer: &mut DeserializerState) -> Result { - // length - let len = deserializer.reader.var_int32(); - let mut result = HashMap::new(); - // key-value - for _ in 0..len { - result.insert( - ::deserialize(deserializer)?, - ::deserialize(deserializer)?, - ); - } - Ok(result) - } -} - -impl Deserialize for HashSet { - fn read(deserializer: &mut DeserializerState) -> Result { - // length - let len = deserializer.reader.var_int32(); - let mut result = HashSet::new(); - // key-value - for _ in 0..len { - result.insert(::deserialize(deserializer)?); - } - Ok(result) - } -} - -impl Deserialize for NaiveDateTime { - fn read(deserializer: &mut DeserializerState) -> Result { - let timestamp = deserializer.reader.u64(); - let ret = DateTime::from_timestamp_millis(timestamp as i64).map(|dt| dt.naive_utc()); - match ret { - Some(r) => Ok(r), - None => Err(Error::NaiveDateTime), - } - } -} - -impl Deserialize for Vec { - fn read(deserializer: &mut DeserializerState) -> Result { - T::read_vec(deserializer) - } -} - -impl Deserialize for Option { - fn read(deserializer: &mut DeserializerState) -> Result { - Ok(Some(T::read(deserializer)?)) - } - - fn deserialize(deserializer: &mut DeserializerState) -> Result { - // ref flag - let ref_flag = deserializer.reader.i8(); - - if ref_flag == (RefFlag::NotNullValue as i8) || ref_flag == (RefFlag::RefValue as i8) { - // type_id - let type_id = deserializer.reader.i16(); - - if type_id != ::ty() as i16 { - Err(Error::FieldType { - expected: ::ty(), - actial: type_id, - }) - } else { - Ok(Self::read(deserializer)?) - } - } else if ref_flag == (RefFlag::Null as i8) { - Ok(None) - } else if ref_flag == (RefFlag::Ref as i8) { - Err(Error::Ref) - } else { - Err(Error::BadRefFlag) - } - } -} - -lazy_static::lazy_static!( - static ref EPOCH: DateTime = Utc.with_ymd_and_hms(1970, 1, 1, 0, 0, 0).unwrap(); -); - -impl Deserialize for NaiveDate { - fn read(serializer: &mut DeserializerState) -> Result { - let days = serializer.reader.u64(); - match EPOCH.checked_add_days(Days::new(days)) { - Some(value) => Ok(value.date_naive()), - None => Err(Error::NaiveDate), - } - } -} -pub struct DeserializerState<'de, 'bf: 'de> { - pub reader: Reader<'bf>, - pub tags: Vec<&'de str>, -} - -impl<'de, 'bf: 'de> DeserializerState<'de, 'bf> { - fn new(reader: Reader<'bf>) -> DeserializerState<'de, 'bf> { - DeserializerState { - reader, - tags: Vec::new(), - } - } - - fn head(&mut self) -> Result<(), Error> { - let _bitmap = self.reader.u8(); - let _language: Language = self.reader.u8().try_into()?; - self.reader.skip(8); // native offset and size - Ok(()) - } - - pub fn read_tag(&mut self) -> Result<&str, Error> { - const USESTRINGVALUE: u8 = 0; - const USESTRINGID: u8 = 1; - let tag_type = self.reader.u8(); - if tag_type == USESTRINGID { - Ok(self.tags[self.reader.i16() as usize]) - } else if tag_type == USESTRINGVALUE { - self.reader.skip(8); // todo tag hash - let len = self.reader.i16(); - let tag: &str = - unsafe { std::str::from_utf8_unchecked(self.reader.bytes(len as usize)) }; - self.tags.push(tag); - Ok(tag) - } else { - Err(Error::TagType(tag_type)) - } - } -} - -pub fn from_buffer(bf: &[u8]) -> Result { - let reader = Reader::new(bf); - let mut deserializer = DeserializerState::new(reader); - deserializer.head()?; - ::deserialize(&mut deserializer) -} diff --git a/rust/fury/src/lib.rs b/rust/fury/src/lib.rs index 248de27dcd..b1b8a60d96 100644 --- a/rust/fury/src/lib.rs +++ b/rust/fury/src/lib.rs @@ -15,28 +15,4 @@ // specific language governing permissions and limitations // under the License. -mod buffer; -mod deserializer; -mod error; -mod meta; -mod row; -mod serializer; -mod types; -mod util; - -pub use deserializer::from_buffer; -pub use error::Error; -pub use fury_derive::*; -pub use meta::{Encoding, MetaStringDecoder, MetaStringEncoder}; -pub use row::{from_row, to_row}; -pub use serializer::to_buffer; -pub use util::to_utf8; - -pub mod __derive { - pub use crate::buffer::{Reader, Writer}; - pub use crate::deserializer::{Deserialize, DeserializerState}; - pub use crate::row::{ArrayViewer, ArrayWriter, Row, StructViewer, StructWriter}; - pub use crate::serializer::{Serialize, SerializerState}; - pub use crate::types::{compute_struct_hash, FieldType, FuryMeta, SIZE_OF_REF_AND_TYPE}; - pub use crate::Error; -} +pub use fury_core::{error::Error, fury::Fury, row::from_row, row::to_row}; diff --git a/rust/fury/src/serializer.rs b/rust/fury/src/serializer.rs deleted file mode 100644 index 3c253ce2f8..0000000000 --- a/rust/fury/src/serializer.rs +++ /dev/null @@ -1,365 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Implement the serializer of internal types in fury protocol -//! -//! Serializeable type should implements the Serialize trait. -//! This file include all the internal type implements which describe in fury protocol, -//! But custom type which serializable is not here, custom type implement the Serialize trait via fury-derive. -//! # Examples: -//! ``` -//! -//! use fury::Fury; -//! #[derive(Fury)] -//! #[tag("example.foo2")] -//! struct Animal { -//! category: String, -//! } -//! -//! ``` -//! fury::Fury would expand the code and automatic implements the Serialize trait - -use super::buffer::Writer; -use super::types::{config_flags, FuryMeta, Language, RefFlag, SIZE_OF_REF_AND_TYPE}; -use chrono::{NaiveDate, NaiveDateTime}; -use std::collections::{HashMap, HashSet}; -use std::mem; - -/// Convert a typed slice to a u8 slice. -/// Usually used to convert a typed array like Vec to &[u8], which can be easily written to a buffer. -fn to_u8_slice(slice: &[T]) -> &[u8] { - let byte_len = std::mem::size_of_val(slice); - unsafe { std::slice::from_raw_parts(slice.as_ptr().cast::(), byte_len) } -} - -/// Types that implement the Serialize trait can be serialized to Fury. -/// -/// 1. Normal situation: -/// The order of function calls is reserved_space -> serialize -> write. -/// a. reserved_space is used to allocate the fixed memory space, which can avoid the cost of the memory check. -/// However, dynamic types like strings should allocate the size separately before being written to the buffer. -/// b. serialize is used to serialize the data into the buffer. The first step is to write the object head, -/// which includes one byte reference flag and two byte type flag. -/// The second step is to call the write function, which is used to write the Rust object. -/// c. write is used to write the Rust object into the buffer. -/// 2. Vec situation: -/// If the object is in a Vec, the call order is reserved_space -> serialize -> write -> write_vec. -/// The write_vec function is used to write the elements of the Vec. But why can't we just loop through the elements and write each element one by one? -/// This is because Fury includes some primitive types like FuryPrimitiveBoolArray which do not include the head of the elements, -/// but other Vecs do. So the write_vec function is necessary to handle the differences. Primitive arrays can overwrite the function. -pub trait Serialize -where - Self: Sized + FuryMeta, -{ - /// This function is invoked when the data is in a Vec. - /// - /// The default implementation invokes the serialize function, which includes the type head. Some types like primitive arrays can overwrite it. - /// Step 1: write the length of the Vec into the buffer. - /// Step 2: reserve the fixed size of all the elements. - /// Step 3: loop through the Vec and invoke the serialize function of each item. - fn write_vec(value: &[Self], serializer: &mut SerializerState) { - serializer.writer.var_int32(value.len() as i32); - serializer - .writer - .reserve((::reserved_space() + SIZE_OF_REF_AND_TYPE) * value.len()); - for item in value.iter() { - item.serialize(serializer); - } - } - - /// The fixed memory size of the Type. - /// Avoid the memory check, which would hurt performance. - fn reserved_space() -> usize; - - /// Write the data into the buffer. - fn write(&self, serializer: &mut SerializerState); - - /// Entry point of the serialization. - /// - /// Step 1: write the type flag and type flag into the buffer. - /// Step 2: invoke the write function to write the Rust object. - fn serialize(&self, serializer: &mut SerializerState) { - // ref flag - serializer.writer.i8(RefFlag::NotNullValue as i8); - // type - serializer.writer.i16(if Self::is_vec() { - Self::vec_ty() - } else { - Self::ty() - } as i16); - self.write(serializer); - } -} - -/// Implement Serialize for all the number types. -/// -/// Serializing number types is similar, the only difference is the fixed size. -macro_rules! impl_num_serialize { - ($name: ident, $ty:tt) => { - impl Serialize for $ty { - fn write(&self, serializer: &mut SerializerState) { - serializer.writer.$name(*self); - } - - fn reserved_space() -> usize { - mem::size_of::<$ty>() - } - } - }; -} - -/// Implement Serialize for all the number types, but overwrite the primitive type Vec, which does not include the object head. -macro_rules! impl_num_serialize_and_pritimive_vec { - ($name: ident, $ty:tt) => { - impl Serialize for $ty { - fn write(&self, serializer: &mut SerializerState) { - serializer.writer.$name(*self); - } - - fn write_vec(value: &[Self], serializer: &mut SerializerState) { - serializer.writer.var_int32(value.len() as i32); - serializer.writer.bytes(to_u8_slice(value)); - } - - fn reserved_space() -> usize { - mem::size_of::<$ty>() - } - } - }; -} - -impl_num_serialize!(u16, u16); -impl_num_serialize!(u32, u32); -impl_num_serialize!(u64, u64); -impl_num_serialize!(i8, i8); - -impl_num_serialize_and_pritimive_vec!(u8, u8); -impl_num_serialize_and_pritimive_vec!(i16, i16); -impl_num_serialize_and_pritimive_vec!(i32, i32); -impl_num_serialize_and_pritimive_vec!(i64, i64); -impl_num_serialize_and_pritimive_vec!(f32, f32); -impl_num_serialize_and_pritimive_vec!(f64, f64); - -/// The implement of String Type -impl Serialize for String { - fn write(&self, serializer: &mut SerializerState) { - serializer.writer.var_int32(self.len() as i32); - serializer.writer.bytes(self.as_bytes()); - } - - fn write_vec(value: &[Self], serializer: &mut SerializerState) { - serializer.writer.var_int32(value.len() as i32); - serializer - .writer - .reserve((::reserved_space()) * value.len()); - - for x in value.iter() { - x.write(serializer); - } - } - - fn reserved_space() -> usize { - mem::size_of::() - } -} - -impl Serialize for bool { - fn write(&self, serializer: &mut SerializerState) { - serializer.writer.u8(if *self { 1 } else { 0 }); - } - - fn write_vec(value: &[Self], serializer: &mut SerializerState) { - serializer.writer.var_int32(value.len() as i32); - serializer.writer.bytes(to_u8_slice(value)); - } - - fn reserved_space() -> usize { - mem::size_of::() - } -} - -impl Serialize for HashMap { - fn write(&self, serializer: &mut SerializerState) { - // length - serializer.writer.var_int32(self.len() as i32); - - let reserved_space = (::reserved_space() + SIZE_OF_REF_AND_TYPE) - * self.len() - + (::reserved_space() + SIZE_OF_REF_AND_TYPE) * self.len(); - serializer.writer.reserve(reserved_space); - - // key-value - for i in self.iter() { - i.0.serialize(serializer); - i.1.serialize(serializer); - } - } - - fn reserved_space() -> usize { - mem::size_of::() - } -} - -impl Serialize for HashSet { - fn write(&self, serializer: &mut SerializerState) { - // length - serializer.writer.i32(self.len() as i32); - - let reserved_space = - (::reserved_space() + SIZE_OF_REF_AND_TYPE) * self.len(); - serializer.writer.reserve(reserved_space); - - // key-value - for i in self.iter() { - i.serialize(serializer); - } - } - - fn reserved_space() -> usize { - mem::size_of::() - } -} - -impl Serialize for NaiveDateTime { - fn write(&self, serializer: &mut SerializerState) { - serializer - .writer - .u64(self.and_utc().timestamp_millis() as u64); - } - - fn reserved_space() -> usize { - mem::size_of::() - } -} - -lazy_static::lazy_static!( - static ref EPOCH: NaiveDate = NaiveDate::from_ymd_opt(1970, 1, 1).unwrap(); -); - -impl Serialize for NaiveDate { - fn write(&self, serializer: &mut SerializerState) { - let days_since_epoch = self.signed_duration_since(*EPOCH).num_days(); - serializer.writer.u64(days_since_epoch as u64); - } - - fn reserved_space() -> usize { - mem::size_of::() - } -} - -impl Serialize for Vec -where - T: Serialize, -{ - fn write(&self, serializer: &mut SerializerState) { - T::write_vec(self, serializer); - } - - fn reserved_space() -> usize { - // size of the vec - mem::size_of::() - } -} - -impl Serialize for Option -where - T: Serialize, -{ - fn write(&self, serializer: &mut SerializerState) { - if let Some(v) = self { - T::write(v, serializer) - } else { - unreachable!("write should be call by serialize") - } - } - - fn serialize(&self, serializer: &mut SerializerState) { - match self { - Some(v) => { - // ref flag - serializer.writer.i8(RefFlag::NotNullValue as i8); - // type - serializer.writer.i16(::ty() as i16); - - v.write(serializer); - } - None => { - serializer.writer.i8(RefFlag::Null as i8); - } - } - } - - fn reserved_space() -> usize { - mem::size_of::() - } -} - -pub struct SerializerState<'se> { - pub writer: &'se mut Writer, - pub tags: Vec<&'static str>, -} - -impl<'de> SerializerState<'de> { - fn new(writer: &mut Writer) -> SerializerState { - SerializerState { - writer, - tags: Vec::new(), - } - } - - pub fn write_tag(&mut self, tag: &'static str) { - const USESTRINGVALUE: u8 = 0; - const USESTRINGID: u8 = 1; - - let mayby_idx = self.tags.iter().position(|x| *x == tag); - match mayby_idx { - Some(idx) => { - self.writer.u8(USESTRINGID); - self.writer.i16(idx as i16); - } - None => { - self.writer.u8(USESTRINGVALUE); - self.writer.skip(8); // todo tag hash - self.writer.i16(tag.len() as i16); - self.writer.bytes(tag.as_bytes()); - } - }; - } - - fn head(&mut self) -> &Self { - const HEAD_SIZE: usize = 10; - self.writer - .reserve(::reserved_space() + SIZE_OF_REF_AND_TYPE + HEAD_SIZE); - - let mut bitmap = 0; - bitmap |= config_flags::IS_LITTLE_ENDIAN_FLAG; - bitmap |= config_flags::IS_CROSS_LANGUAGE_FLAG; - self.writer.u8(bitmap); - self.writer.u8(Language::Rust as u8); - self.writer.skip(4); // native offset - self.writer.skip(4); // native size - self - } -} - -pub fn to_buffer(record: &T) -> Vec { - let mut writer = Writer::default(); - let mut serializer = SerializerState::new(&mut writer); - serializer.head::(); - ::serialize(record, &mut serializer); - writer.dump() -} diff --git a/rust/tests/Cargo.toml b/rust/tests/Cargo.toml index 340411c1ee..07a8afa76e 100644 --- a/rust/tests/Cargo.toml +++ b/rust/tests/Cargo.toml @@ -23,7 +23,7 @@ rust-version.workspace = true publish = false [dependencies] -fury = { path = "../fury" } +fury-core = { path = "../fury-core" } fury-derive = { path = "../fury-derive" } chrono = "0.4" diff --git a/rust/tests/tests/test_complex_struct.rs b/rust/tests/tests/test_complex_struct.rs index b114db4220..fddf5eb563 100644 --- a/rust/tests/tests/test_complex_struct.rs +++ b/rust/tests/tests/test_complex_struct.rs @@ -16,7 +16,8 @@ // under the License. use chrono::{DateTime, NaiveDate, NaiveDateTime}; -use fury::{from_buffer, to_buffer, Fury}; +use fury_core::fury::Fury; +use fury_derive::Fury; use std::collections::HashMap; #[test] @@ -64,55 +65,12 @@ fn complex_struct() { c5: 2.0, c6: 4.0, }; - - let bin: Vec = to_buffer(&person); - let obj: Person = from_buffer(&bin).expect("should success"); + let fury = Fury::default(); + let bin: Vec = fury.serialize(&person); + let obj: Person = fury.deserialize(&bin).expect("should success"); assert_eq!(person, obj); } -#[test] -fn decode_py_struct() { - #[derive(Fury, Debug, PartialEq)] - #[tag("example.foo2")] - struct Animal { - category: String, - } - - #[derive(Fury, Debug, PartialEq)] - #[tag("example.ComplexObject")] - struct Person { - f1: String, - f2: HashMap, - f3: i8, - f4: i16, - f5: i32, - f6: i64, - f7: f32, - f8: f64, - f9: Vec, - f10: HashMap, - } - - let bin = [ - 134, 0, 179, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 81, 159, 160, 124, 69, 240, 2, 120, 21, 0, - 101, 120, 97, 109, 112, 108, 101, 46, 67, 111, 109, 112, 108, 101, 120, 79, 98, 106, 101, - 99, 116, 71, 168, 32, 21, 0, 13, 0, 3, 115, 116, 114, 0, 30, 0, 2, 255, 7, 0, 1, 0, 0, 0, - 255, 12, 0, 85, 85, 85, 85, 85, 85, 213, 63, 255, 7, 0, 100, 0, 0, 0, 255, 12, 0, 146, 36, - 73, 146, 36, 73, 210, 63, 0, 30, 0, 2, 0, 13, 0, 2, 107, 49, 255, 3, 0, 255, 0, 13, 0, 2, - 107, 50, 255, 3, 0, 2, 255, 3, 0, 127, 255, 5, 0, 255, 127, 255, 7, 0, 255, 255, 255, 127, - 255, 9, 0, 255, 255, 255, 255, 255, 255, 255, 127, 255, 11, 0, 0, 0, 0, 63, 255, 12, 0, 85, - 85, 85, 85, 85, 85, 229, 63, 0, 25, 0, 2, 255, 5, 0, 1, 0, 255, 5, 0, 2, 0, 134, 2, 98, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 81, 159, 160, 124, 69, 240, 2, 120, 21, 0, 101, 120, 97, 109, - 112, 108, 101, 46, 67, 111, 109, 112, 108, 101, 120, 79, 98, 106, 101, 99, 116, 71, 168, - 32, 21, 253, 253, 253, 255, 3, 0, 0, 255, 5, 0, 0, 0, 255, 7, 0, 0, 0, 0, 0, 255, 9, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 255, 11, 0, 171, 170, 170, 62, 255, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 253, - ]; - - let obj: Person = from_buffer(&bin).expect("should some"); - print!("{:?}", obj); -} - #[test] fn encode_to_obin() { #[derive(Fury, Debug, PartialEq)] @@ -134,8 +92,8 @@ fn encode_to_obin() { f8: f64, f10: HashMap, } - - let bin: Vec = to_buffer(&Person { + let fury = Fury::default(); + let bin: Vec = fury.serialize(&Person { f1: "Hello".to_string(), f2: HashMap::from([("hello1".to_string(), 1), ("hello2".to_string(), 2)]), f3: 1, diff --git a/rust/tests/tests/test_meta_string.rs b/rust/tests/tests/test_meta_string.rs index a1065f39be..d90f8a81e2 100644 --- a/rust/tests/tests/test_meta_string.rs +++ b/rust/tests/tests/test_meta_string.rs @@ -17,7 +17,7 @@ use std::iter; -use fury::{Encoding, MetaStringDecoder, MetaStringEncoder}; +use fury_core::meta::{Encoding, MetaStringDecoder, MetaStringEncoder}; #[test] fn test_encode_meta_string_lower_special() { diff --git a/rust/tests/tests/test_row.rs b/rust/tests/tests/test_row.rs index c37ac01b84..d5bce9c071 100644 --- a/rust/tests/tests/test_row.rs +++ b/rust/tests/tests/test_row.rs @@ -17,7 +17,8 @@ use std::collections::BTreeMap; -use fury::{from_row, to_row, FuryRow}; +use fury_core::row::{from_row, to_row}; +use fury_derive::FuryRow; #[test] fn row() { diff --git a/rust/tests/tests/test_util.rs b/rust/tests/tests/test_util.rs index a37705c9e1..85a7124673 100644 --- a/rust/tests/tests/test_util.rs +++ b/rust/tests/tests/test_util.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -use fury::to_utf8; +use fury_core::util::to_utf8; #[test] fn test_to_utf8() {