From 08b6a0b8c2c3a7a70db273fd4beaac18c55ad950 Mon Sep 17 00:00:00 2001 From: Mees Delzenne Date: Thu, 15 Aug 2024 16:11:03 +0200 Subject: [PATCH 1/8] Remove bincode --- Cargo.toml | 2 +- src/error.rs | 37 +++- src/format.rs | 3 + src/implementations/bound.rs | 25 +-- src/implementations/duration.rs | 45 ++-- src/implementations/mod.rs | 17 ++ src/implementations/path.rs | 25 +-- src/implementations/primitives.rs | 352 +++++++++++++++++++++++++++--- src/implementations/result.rs | 21 +- src/implementations/string.rs | 77 +++++-- src/implementations/tuple.rs | 177 ++++++--------- 11 files changed, 545 insertions(+), 236 deletions(-) create mode 100644 src/format.rs diff --git a/Cargo.toml b/Cargo.toml index 36a955e..75a21d2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,7 +20,6 @@ members = ["derive"] default = [] [dependencies] -bincode = "1.3.3" chrono = { version = "0.4.26", features = ["serde"], optional = true } derive = { version = "0.8.0", package = "revision-derive", path = "derive" } geo = { version = "0.26.0", features = ["use-serde"], optional = true } @@ -35,6 +34,7 @@ uuid = { version = "1.4.1", optional = true } [dev-dependencies] rand = "0.8.5" criterion = "0.5.1" +bincode = "1.3.3" [[bench]] name = "roaring" diff --git a/src/error.rs b/src/error.rs index 5980411..dcbae92 100644 --- a/src/error.rs +++ b/src/error.rs @@ -1,11 +1,24 @@ +use std::{io, str::Utf8Error}; use thiserror::Error; /// An error which occurs when revisioned serialization / deserialization fails. -#[derive(Error, Debug, PartialEq)] +#[derive(Error, Debug)] pub enum Error { /// An IO error occured. - Io(i32), - /// Generic serialization error. + Io(io::Error), + /// Tried to deserialize a boolean value with an invalid byte value. + InvalidBoolValue(u8), + /// Deserialization encountered integer encoding which is not suported. + InvalidIntegerEncoding, + /// Deserialization encountered an integer with a value which did not fit the target type.. + IntegerOverflow, + /// Path contains invalid utf-8 characters + InvalidPath, + /// Invalid character encoding + InvalidCharEncoding, + /// Error parsing a string + Utf8Error(Utf8Error), + /// Failed to serialize character. Serialize(String), /// Generic deserialization error. Deserialize(String), @@ -17,6 +30,24 @@ impl std::fmt::Display for Error { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::result::Result<(), std::fmt::Error> { match self { Self::Io(e) => write!(f, "An IO error occured: {}", e), + Self::InvalidBoolValue(_) => { + write!(f, "Tried to deserialize a boolean value with an invalid byte value.") + } + Self::InvalidIntegerEncoding => { + write!(f, "Encountered invalid integer encoding.") + } + Self::IntegerOverflow => { + write!(f, "Encountered integer which doesn't fit the target integer type during deserialization.") + } + Self::InvalidPath => { + write!(f, "Path contained invalid UTF-8 characters.") + } + Self::InvalidCharEncoding => { + write!(f, "Invalid character encoding.") + } + Self::Utf8Error(x) => { + write!(f, "Invalid UTF-8 characters in string: {x}") + } Self::Serialize(e) => write!(f, "A serialization error occured: {}", e), Self::Deserialize(e) => write!(f, "A deserialization error occured: {}", e), Self::Conversion(e) => write!(f, "A user generated conversion error occured: {}", e), diff --git a/src/format.rs b/src/format.rs new file mode 100644 index 0000000..07e6ed7 --- /dev/null +++ b/src/format.rs @@ -0,0 +1,3 @@ +pub trait Num { + const BITS: usize; +} diff --git a/src/implementations/bound.rs b/src/implementations/bound.rs index 2ab065c..8ed5f5d 100644 --- a/src/implementations/bound.rs +++ b/src/implementations/bound.rs @@ -1,28 +1,18 @@ use super::super::Error; use super::super::Revisioned; -use bincode::Options; use std::ops::Bound; impl Revisioned for Bound { #[inline] fn serialize_revisioned(&self, writer: &mut W) -> Result<(), Error> { - let opts = bincode::options() - .with_no_limit() - .with_little_endian() - .with_varint_encoding() - .reject_trailing_bytes(); match *self { - Bound::Unbounded => opts - .serialize_into(writer, &0u32) - .map_err(|ref err| Error::Serialize(format!("{:?}", err))), + Bound::Unbounded => 0u32.serialize_revisioned(writer), Bound::Included(ref value) => { - opts.serialize_into(&mut *writer, &1u32) - .map_err(|ref err| Error::Serialize(format!("{:?}", err)))?; + 1u32.serialize_revisioned(writer)?; value.serialize_revisioned(writer) } Bound::Excluded(ref value) => { - opts.serialize_into(&mut *writer, &2u32) - .map_err(|ref err| Error::Serialize(format!("{:?}", err)))?; + 2u32.serialize_revisioned(writer)?; value.serialize_revisioned(writer) } } @@ -30,14 +20,7 @@ impl Revisioned for Bound { #[inline] fn deserialize_revisioned(reader: &mut R) -> Result { - let opts = bincode::options() - .with_no_limit() - .with_little_endian() - .with_varint_encoding() - .reject_trailing_bytes(); - let variant: u32 = opts - .deserialize_from(&mut *reader) - .map_err(|ref err| Error::Deserialize(format!("{:?}", err)))?; + let variant = u32::deserialize_revisioned(reader)?; match variant { 0 => Ok(Bound::Unbounded), 1 => Ok(Bound::Included( diff --git a/src/implementations/duration.rs b/src/implementations/duration.rs index edc7ea5..f160b87 100644 --- a/src/implementations/duration.rs +++ b/src/implementations/duration.rs @@ -1,29 +1,19 @@ use super::super::Error; use super::super::Revisioned; -use bincode::Options; use std::time::Duration; impl Revisioned for Duration { #[inline] fn serialize_revisioned(&self, writer: &mut W) -> Result<(), Error> { - bincode::options() - .with_no_limit() - .with_little_endian() - .with_varint_encoding() - .reject_trailing_bytes() - .serialize_into(writer, self) - .map_err(|ref err| Error::Serialize(format!("{:?}", err))) + self.as_secs().serialize_revisioned(writer)?; + self.subsec_nanos().serialize_revisioned(writer) } #[inline] fn deserialize_revisioned(reader: &mut R) -> Result { - bincode::options() - .with_no_limit() - .with_little_endian() - .with_varint_encoding() - .reject_trailing_bytes() - .deserialize_from(reader) - .map_err(|ref err| Error::Deserialize(format!("{:?}", err))) + let secs = u64::deserialize_revisioned(reader)?; + let nanos = u32::deserialize_revisioned(reader)?; + Ok(Duration::new(secs, nanos)) } fn revision() -> u16 { @@ -33,9 +23,9 @@ impl Revisioned for Duration { #[cfg(test)] mod tests { - use super::Duration; use super::Revisioned; + use bincode::Options as _; #[test] fn test_string() { @@ -46,4 +36,27 @@ mod tests { let out = ::deserialize_revisioned(&mut mem.as_slice()).unwrap(); assert_eq!(val, out); } + + #[test] + fn bincode_compat() { + fn assert_compat(d: Duration) { + let bincode = bincode::options() + .with_no_limit() + .with_little_endian() + .with_varint_encoding() + .reject_trailing_bytes() + .serialize(&d) + .unwrap(); + + let mut revision = Vec::new(); + d.serialize_revisioned(&mut revision).unwrap(); + + assert_eq!(revision, bincode) + } + + assert_compat(Duration::ZERO); + assert_compat(Duration::MAX); + assert_compat(Duration::new(u64::MAX, 0)); + assert_compat(Duration::new(0, 999_999_999)); + } } diff --git a/src/implementations/mod.rs b/src/implementations/mod.rs index b3c60c9..c790c6c 100644 --- a/src/implementations/mod.rs +++ b/src/implementations/mod.rs @@ -1,3 +1,7 @@ +use std::io; + +use crate::Error; + pub mod arrays; pub mod bound; pub mod boxes; @@ -20,3 +24,16 @@ pub mod tuple; pub mod uuid; pub mod vecs; pub mod wrapping; + +pub fn unexpected_eof() -> Error { + Error::Io(io::Error::new(io::ErrorKind::UnexpectedEof, "")) +} + +pub fn read_buffer(reader: &mut R) -> Result<[u8; COUNT], Error> { + let mut buffer = [0u8; COUNT]; + let count = reader.read(&mut buffer).map_err(Error::Io)?; + if count != COUNT { + return Err(unexpected_eof()); + } + Ok(buffer) +} diff --git a/src/implementations/path.rs b/src/implementations/path.rs index 6ee21af..afe9ae6 100644 --- a/src/implementations/path.rs +++ b/src/implementations/path.rs @@ -2,29 +2,24 @@ use std::path::PathBuf; use super::super::Error; use super::super::Revisioned; -use bincode::Options; impl Revisioned for PathBuf { #[inline] fn serialize_revisioned(&self, writer: &mut W) -> Result<(), Error> { - bincode::options() - .with_no_limit() - .with_little_endian() - .with_varint_encoding() - .reject_trailing_bytes() - .serialize_into(writer, self) - .map_err(|ref err| Error::Serialize(format!("{:?}", err))) + match self.to_str() { + Some(s) => { + (s.len() as u64).serialize_revisioned(writer)?; + writer.write_all(s.as_bytes()).map_err(Error::Io)?; + Ok(()) + } + None => Err(Error::InvalidPath), + } } #[inline] fn deserialize_revisioned(reader: &mut R) -> Result { - bincode::options() - .with_no_limit() - .with_little_endian() - .with_varint_encoding() - .reject_trailing_bytes() - .deserialize_from(reader) - .map_err(|ref err| Error::Deserialize(format!("{:?}", err))) + let s = String::deserialize_revisioned(reader)?; + Ok(PathBuf::from(s)) } fn revision() -> u16 { diff --git a/src/implementations/primitives.rs b/src/implementations/primitives.rs index 8870b54..ba46842 100644 --- a/src/implementations/primitives.rs +++ b/src/implementations/primitives.rs @@ -1,19 +1,200 @@ -use super::super::Error; +use std::{io, u64}; + use super::super::Revisioned; -use bincode::Options; +use super::read_buffer; +use crate::Error; + +/// zigzag encode a 64bit integer +fn zigzag_64(v: i64) -> u64 { + (v >> (i64::BITS - 1)) as u64 ^ ((v as u64) << 1) +} + +/// undo zigzag encoding +fn gazgiz_64(v: u64) -> i64 { + (v >> 1) as i64 ^ -((v & 1) as i64) +} + +/// zigzag encode a 128bit integer +fn zigzag_128(v: i128) -> u128 { + (v >> (i128::BITS - 1)) as u128 ^ ((v as u128) << 1) +} + +/// undo zigzag encoding +fn gazgiz_128(v: u128) -> i128 { + (v >> 1) as i128 ^ -((v & 1) as i128) +} + +fn encode_u64(writer: &mut W, i: u64) -> Result<(), Error> +where + W: io::Write, +{ + if i < 251 { + writer.write_all(&[i as u8]).map_err(Error::Io)?; + } else if i < (1 << 16) { + let bytes = (i as u16).to_le_bytes(); + writer.write_all(&[251, bytes[0], bytes[1]]).map_err(Error::Io)?; + } else if i < (1 << 32) { + let bytes = (i as u32).to_le_bytes(); + writer.write_all(&[252, bytes[0], bytes[1], bytes[2], bytes[3]]).map_err(Error::Io)?; + } else { + let bytes = i.to_le_bytes(); + writer + .write_all(&[ + 253, bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7], + ]) + .map_err(Error::Io)?; + } + + Ok(()) +} + +fn encode_u128(writer: &mut W, i: u128) -> Result<(), Error> +where + W: io::Write, +{ + if i < 251 { + writer.write_all(&[i as u8]).map_err(Error::Io)?; + } else if i < (1 << 16) { + let bytes = (i as u16).to_le_bytes(); + writer.write_all(&[251, bytes[0], bytes[1]]).map_err(Error::Io)?; + } else if i < (1 << 32) { + let bytes = (i as u32).to_le_bytes(); + writer.write_all(&[252, bytes[0], bytes[1], bytes[2], bytes[3]]).map_err(Error::Io)?; + } else if i < (1 << 64) { + let bytes = (i as u64).to_le_bytes(); + writer + .write_all(&[ + 253, bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7], + ]) + .map_err(Error::Io)?; + } else { + let bytes = i.to_le_bytes(); + let bytes = [ + 254, bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7], + bytes[8], bytes[9], bytes[10], bytes[11], bytes[12], bytes[13], bytes[14], bytes[15], + ]; + writer.write_all(&bytes).map_err(Error::Io)?; + } + + Ok(()) +} +fn decode_u64(reader: &mut R) -> Result +where + R: io::Read, +{ + let b = read_buffer::<1, _>(reader)?; + let v = match b[0] { + 251 => { + let b = read_buffer::<2, _>(reader)?; + u16::from_le_bytes(b) as u64 + } + 252 => { + let b = read_buffer::<4, _>(reader)?; + u32::from_le_bytes(b) as u64 + } + 253 => { + let b = read_buffer::<8, _>(reader)?; + u64::from_le_bytes(b) + } + 254 => return Err(Error::IntegerOverflow), + 255 => return Err(Error::InvalidIntegerEncoding), + x => x as u64, + }; + Ok(v) +} -macro_rules! impl_revisioned { +fn decode_u128(reader: &mut R) -> Result +where + R: io::Read, +{ + let b = read_buffer::<1, _>(reader)?; + let v = match b[0] { + 251 => { + let b = read_buffer::<2, _>(reader)?; + u16::from_le_bytes(b) as u128 + } + 252 => { + let b = read_buffer::<4, _>(reader)?; + u32::from_le_bytes(b) as u128 + } + 253 => { + let b = read_buffer::<8, _>(reader)?; + u64::from_le_bytes(b) as u128 + } + 254 => { + let b = read_buffer::<16, _>(reader)?; + u128::from_le_bytes(b) + } + 255 => return Err(Error::InvalidIntegerEncoding), + x => x as u128, + }; + Ok(v) +} + +impl Revisioned for bool { + fn revision() -> u16 { + 1 + } + + fn serialize_revisioned(&self, w: &mut W) -> Result<(), Error> { + let v = *self as u8; + w.write(&[v]).map_err(Error::Io)?; + Ok(()) + } + + fn deserialize_revisioned(r: &mut R) -> Result + where + Self: Sized, + { + let buffer = read_buffer::<1, _>(r)?; + match buffer[0] { + 0 => Ok(false), + 1 => Ok(true), + x => Err(Error::InvalidBoolValue(x)), + } + } +} + +impl Revisioned for usize { + fn revision() -> u16 { + 1 + } + + fn serialize_revisioned(&self, w: &mut W) -> Result<(), Error> { + ((*self) as u64).serialize_revisioned(w) + } + + fn deserialize_revisioned(r: &mut R) -> Result + where + Self: Sized, + { + u64::deserialize_revisioned(r).map(|x| x as usize) + } +} + +impl Revisioned for isize { + fn revision() -> u16 { + 1 + } + + fn serialize_revisioned(&self, w: &mut W) -> Result<(), Error> { + ((*self) as i64).serialize_revisioned(w) + } + + fn deserialize_revisioned(r: &mut R) -> Result + where + Self: Sized, + { + i64::deserialize_revisioned(r).map(|x| x as isize) + } +} + +macro_rules! impl_revisioned_int { ($ty:ident) => { impl Revisioned for $ty { #[inline] fn serialize_revisioned(&self, writer: &mut W) -> Result<(), Error> { - bincode::options() - .with_no_limit() - .with_little_endian() - .with_varint_encoding() - .reject_trailing_bytes() - .serialize_into(writer, self) - .map_err(|ref err| Error::Serialize(format!("{:?}", err))) + encode_u64(writer, (*self) as u64) } #[inline] @@ -21,13 +202,7 @@ macro_rules! impl_revisioned { where Self: Sized, { - bincode::options() - .with_no_limit() - .with_little_endian() - .with_varint_encoding() - .reject_trailing_bytes() - .deserialize_from(reader) - .map_err(|ref err| Error::Deserialize(format!("{:?}", err))) + decode_u64(reader).map(|x| x as $ty) } fn revision() -> u16 { @@ -37,28 +212,139 @@ macro_rules! impl_revisioned { }; } -impl_revisioned!(bool); -impl_revisioned!(isize); -impl_revisioned!(i8); -impl_revisioned!(i16); -impl_revisioned!(i32); -impl_revisioned!(i64); -impl_revisioned!(i128); -impl_revisioned!(usize); -impl_revisioned!(u8); -impl_revisioned!(u16); -impl_revisioned!(u32); -impl_revisioned!(u64); -impl_revisioned!(u128); -impl_revisioned!(f32); -impl_revisioned!(f64); -impl_revisioned!(char); +macro_rules! impl_revisioned_signed_int { + ($ty:ident) => { + impl Revisioned for $ty { + #[inline] + fn serialize_revisioned(&self, writer: &mut W) -> Result<(), Error> { + encode_u64(writer, zigzag_64((*self) as i64)) + } + + #[inline] + fn deserialize_revisioned(reader: &mut R) -> Result + where + Self: Sized, + { + decode_u64(reader).map(|x| gazgiz_64(x) as $ty) + } + + fn revision() -> u16 { + 1 + } + } + }; +} + +impl_revisioned_int!(u8); +impl_revisioned_int!(u16); +impl_revisioned_int!(u32); +impl_revisioned_int!(u64); + +impl_revisioned_signed_int!(i8); +impl_revisioned_signed_int!(i16); +impl_revisioned_signed_int!(i32); +impl_revisioned_signed_int!(i64); + +impl Revisioned for i128 { + fn revision() -> u16 { + 1 + } + + fn serialize_revisioned(&self, w: &mut W) -> Result<(), Error> { + encode_u128(w, zigzag_128(*self)) + } + + fn deserialize_revisioned(r: &mut R) -> Result + where + Self: Sized, + { + decode_u128(r).map(|x| gazgiz_128(x)) + } +} + +impl Revisioned for u128 { + fn revision() -> u16 { + 1 + } + + fn serialize_revisioned(&self, w: &mut W) -> Result<(), Error> { + encode_u128(w, *self) + } + + fn deserialize_revisioned(r: &mut R) -> Result + where + Self: Sized, + { + decode_u128(r) + } +} + +impl Revisioned for f32 { + fn revision() -> u16 { + 1 + } + + fn serialize_revisioned(&self, w: &mut W) -> Result<(), Error> { + let bytes = self.to_le_bytes(); + w.write_all(&bytes).map_err(Error::Io) + } + + fn deserialize_revisioned(r: &mut R) -> Result + where + Self: Sized, + { + let b = read_buffer::<4, _>(r)?; + Ok(f32::from_le_bytes(b)) + } +} + +impl Revisioned for f64 { + fn revision() -> u16 { + 1 + } + + fn serialize_revisioned(&self, w: &mut W) -> Result<(), Error> { + let bytes = self.to_le_bytes(); + w.write_all(&bytes).map_err(Error::Io) + } + + fn deserialize_revisioned(r: &mut R) -> Result + where + Self: Sized, + { + let b = read_buffer::<8, _>(r)?; + Ok(f64::from_le_bytes(b)) + } +} #[cfg(test)] mod tests { + use std::u64; + + use crate::implementations::primitives::{gazgiz_64, zigzag_64}; use super::Revisioned; + #[test] + fn test_zigzag() { + assert_eq!(zigzag_64(0), 0); + assert_eq!(zigzag_64(1), 2); + assert_eq!(zigzag_64(-1), 1); + + assert_eq!(zigzag_64(i64::MIN), u64::MAX); + assert_eq!(zigzag_64(i64::MAX), u64::MAX - 1); + } + + #[test] + fn test_gazgiz() { + assert_eq!(gazgiz_64(0), 0); + assert_eq!(gazgiz_64(1), -1); + assert_eq!(gazgiz_64(2), 1); + + assert_eq!(gazgiz_64(u64::MAX), i64::MIN); + assert_eq!(gazgiz_64(u64::MAX - 1), i64::MAX); + } + #[test] fn test_bool() { let val = true; diff --git a/src/implementations/result.rs b/src/implementations/result.rs index 2c86e6b..169b093 100644 --- a/src/implementations/result.rs +++ b/src/implementations/result.rs @@ -1,24 +1,16 @@ use super::super::Error; use super::super::Revisioned; -use bincode::Options; impl Revisioned for Result { #[inline] fn serialize_revisioned(&self, writer: &mut W) -> Result<(), Error> { - let opts = bincode::options() - .with_no_limit() - .with_little_endian() - .with_varint_encoding() - .reject_trailing_bytes(); match self { Ok(v) => { - opts.serialize_into(&mut *writer, &0u32) - .map_err(|ref err| Error::Serialize(format!("{:?}", err)))?; + 0u32.serialize_revisioned(writer)?; v.serialize_revisioned(writer) } Err(e) => { - opts.serialize_into(&mut *writer, &1u32) - .map_err(|ref err| Error::Serialize(format!("{:?}", err)))?; + 1u32.serialize_revisioned(writer)?; e.serialize_revisioned(writer) } } @@ -26,14 +18,7 @@ impl Revisioned for Result { #[inline] fn deserialize_revisioned(reader: &mut R) -> Result { - let opts = bincode::options() - .with_no_limit() - .with_little_endian() - .with_varint_encoding() - .reject_trailing_bytes(); - let variant: u32 = opts - .deserialize_from(&mut *reader) - .map_err(|ref err| Error::Deserialize(format!("{:?}", err)))?; + let variant = u32::deserialize_revisioned(reader)?; match variant { 0 => Ok(Ok(T::deserialize_revisioned(reader) .map_err(|ref err| Error::Deserialize(format!("{:?}", err)))?)), diff --git a/src/implementations/string.rs b/src/implementations/string.rs index 3f2df9a..20f66d0 100644 --- a/src/implementations/string.rs +++ b/src/implementations/string.rs @@ -1,35 +1,78 @@ -use super::super::Error; -use super::super::Revisioned; -use bincode::Options; +use core::str; + +use crate::{Error, Revisioned}; impl Revisioned for String { + fn revision() -> u16 { + 1 + } + #[inline] fn serialize_revisioned(&self, writer: &mut W) -> Result<(), Error> { - bincode::options() - .with_no_limit() - .with_little_endian() - .with_varint_encoding() - .reject_trailing_bytes() - .serialize_into(writer, self) - .map_err(|ref err| Error::Serialize(format!("{:?}", err))) + (self.len() as u64).serialize_revisioned(writer)?; + writer.write_all(self.as_bytes()).map_err(Error::Io) } #[inline] fn deserialize_revisioned(reader: &mut R) -> Result { - bincode::options() - .with_no_limit() - .with_little_endian() - .with_varint_encoding() - .reject_trailing_bytes() - .deserialize_from(reader) - .map_err(|ref err| Error::Deserialize(format!("{:?}", err))) + let len: usize = + u64::deserialize_revisioned(reader)?.try_into().map_err(|_| Error::IntegerOverflow)?; + let slice = vec![0u8; len]; + + String::from_utf8(slice).map_err(|x| Error::Utf8Error(x.utf8_error())) } +} +impl Revisioned for char { fn revision() -> u16 { 1 } + + fn serialize_revisioned(&self, w: &mut W) -> Result<(), Error> { + let buffer = &mut [0u8; 4]; + w.write_all(self.encode_utf8(buffer).as_bytes()).map_err(Error::Io) + } + + fn deserialize_revisioned(r: &mut R) -> Result + where + Self: Sized, + { + let mut buffer = [0u8; 4]; + r.read_exact(&mut buffer[..1]).map_err(Error::Io)?; + + let len = CHAR_LENGTH[buffer[0] as usize]; + if len == 0 { + return Err(Error::InvalidCharEncoding); + } + + r.read_exact(&mut buffer[1..(len as usize)]).map_err(Error::Io)?; + + str::from_utf8(&buffer[..(len as usize)]) + .map_err(|_| Error::InvalidCharEncoding) + .map(|x| x.chars().next().unwrap()) + } } +static CHAR_LENGTH: [u8; 256] = const { + let mut r = [0u8; 256]; + let mut i = 0; + while i < 256 { + if i & 0b1000_0000 == 0 { + r[i] = 1; + } else if i & 0b1110_000 == 0b1100_0000 { + r[i] = 2; + } else if i & 0b1111_000 == 0b1110_0000 { + r[i] = 3; + } else if i & 0b1111_100 == 0b1111_0000 { + r[i] = 4; + } + + i += 1; + } + + r +}; + #[cfg(test)] mod tests { diff --git a/src/implementations/tuple.rs b/src/implementations/tuple.rs index 79f8975..ac3dc57 100644 --- a/src/implementations/tuple.rs +++ b/src/implementations/tuple.rs @@ -1,120 +1,73 @@ use super::super::Error; use super::super::Revisioned; -impl Revisioned for (A, B) -where - A: Revisioned, - B: Revisioned, -{ - #[inline] - fn serialize_revisioned(&self, writer: &mut W) -> Result<(), Error> { - self.0.serialize_revisioned(writer)?; - self.1.serialize_revisioned(writer)?; - Ok(()) - } - - #[inline] - fn deserialize_revisioned(reader: &mut R) -> Result { - Ok((A::deserialize_revisioned(reader)?, B::deserialize_revisioned(reader)?)) - } - - fn revision() -> u16 { - 1 - } +macro_rules! impl_tuple { + ($($n:ident),*$(,)?) => { + impl_tuple!{@marker $($n,)*} + }; + + ($($n:ident,)* @marker $head:ident, $($tail:ident,)*) => { + impl<$($n),*> Revisioned for ($($n,)*) + where $($n: Revisioned),* + { + fn revision() -> u16{ + 1 + } + + #[inline] + #[allow(non_snake_case)] + fn serialize_revisioned(&self, _writer: &mut W) -> Result<(), Error> { + let ($(ref $n,)*) = *self; + $( + $n.serialize_revisioned(_writer)?; + )* + Ok(()) + } + + #[inline] + #[allow(non_snake_case)] + fn deserialize_revisioned(_reader: &mut R) -> Result { + $( + let $n = Revisioned::deserialize_revisioned(_reader)?; + )* + Ok(($($n,)*)) + } + } + + impl_tuple!{$($n,)* $head, @marker $($tail,)*} + + }; + ($($n:ident,)* @marker) => { + impl<$($n),*> Revisioned for ($($n),*) + where $($n: Revisioned),* + { + fn revision() -> u16{ + 1 + } + + #[inline] + #[allow(non_snake_case)] + fn serialize_revisioned(&self, _writer: &mut W) -> Result<(), Error> { + let ($(ref $n),*) = self; + $( + $n.serialize_revisioned(_writer)?; + )* + Ok(()) + } + + #[inline] + #[allow(non_snake_case)] + fn deserialize_revisioned(_reader: &mut R) -> Result { + $( + let $n = Revisioned::deserialize_revisioned(_reader)?; + )* + Ok(($($n),*)) + } + } + }; } -impl Revisioned for (A, B, C) -where - A: Revisioned, - B: Revisioned, - C: Revisioned, -{ - #[inline] - fn serialize_revisioned(&self, writer: &mut W) -> Result<(), Error> { - self.0.serialize_revisioned(writer)?; - self.1.serialize_revisioned(writer)?; - self.2.serialize_revisioned(writer)?; - Ok(()) - } - - #[inline] - fn deserialize_revisioned(reader: &mut R) -> Result { - Ok(( - A::deserialize_revisioned(reader)?, - B::deserialize_revisioned(reader)?, - C::deserialize_revisioned(reader)?, - )) - } - - fn revision() -> u16 { - 1 - } -} - -impl Revisioned for (A, B, C, D) -where - A: Revisioned, - B: Revisioned, - C: Revisioned, - D: Revisioned, -{ - #[inline] - fn serialize_revisioned(&self, writer: &mut W) -> Result<(), Error> { - self.0.serialize_revisioned(writer)?; - self.1.serialize_revisioned(writer)?; - self.2.serialize_revisioned(writer)?; - self.3.serialize_revisioned(writer)?; - Ok(()) - } - - #[inline] - fn deserialize_revisioned(reader: &mut R) -> Result { - Ok(( - A::deserialize_revisioned(reader)?, - B::deserialize_revisioned(reader)?, - C::deserialize_revisioned(reader)?, - D::deserialize_revisioned(reader)?, - )) - } - - fn revision() -> u16 { - 1 - } -} - -impl Revisioned for (A, B, C, D, E) -where - A: Revisioned, - B: Revisioned, - C: Revisioned, - D: Revisioned, - E: Revisioned, -{ - #[inline] - fn serialize_revisioned(&self, writer: &mut W) -> Result<(), Error> { - self.0.serialize_revisioned(writer)?; - self.1.serialize_revisioned(writer)?; - self.2.serialize_revisioned(writer)?; - self.3.serialize_revisioned(writer)?; - self.4.serialize_revisioned(writer)?; - Ok(()) - } - - #[inline] - fn deserialize_revisioned(reader: &mut R) -> Result { - Ok(( - A::deserialize_revisioned(reader)?, - B::deserialize_revisioned(reader)?, - C::deserialize_revisioned(reader)?, - D::deserialize_revisioned(reader)?, - E::deserialize_revisioned(reader)?, - )) - } - - fn revision() -> u16 { - 1 - } -} +impl_tuple! { A,B,C,D,E,F } #[cfg(test)] mod tests { From baf97fce28dcd09adf82756408f7882079e8578e Mon Sep 17 00:00:00 2001 From: Mees Delzenne Date: Thu, 15 Aug 2024 20:47:44 +0200 Subject: [PATCH 2/8] Add more tests --- Cargo.toml | 3 +- src/error.rs | 13 +- src/format.rs | 3 - src/implementations/duration.rs | 26 +--- src/implementations/geo.rs | 192 +++++++++++++++++------------- src/implementations/mod.rs | 22 ++++ src/implementations/path.rs | 10 +- src/implementations/primitives.rs | 92 +++++++++++++- src/implementations/regex.rs | 19 +-- src/implementations/string.rs | 64 ++++++++-- src/implementations/tuple.rs | 6 + src/implementations/vecs.rs | 12 ++ 12 files changed, 318 insertions(+), 144 deletions(-) delete mode 100644 src/format.rs diff --git a/Cargo.toml b/Cargo.toml index 75a21d2..63240bd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -27,14 +27,13 @@ ordered-float = { version = "3", optional = true } regex = { version = "1.9.3", optional = true } roaring = { version = "0.10.2", features = ["serde"], optional = true } rust_decimal = { version = "1.31.0", optional = true } -serde = "1.0.183" -thiserror = "1.0.44" uuid = { version = "1.4.1", optional = true } [dev-dependencies] rand = "0.8.5" criterion = "0.5.1" bincode = "1.3.3" +serde = "1.0.183" [[bench]] name = "roaring" diff --git a/src/error.rs b/src/error.rs index dcbae92..e978281 100644 --- a/src/error.rs +++ b/src/error.rs @@ -1,8 +1,7 @@ use std::{io, str::Utf8Error}; -use thiserror::Error; /// An error which occurs when revisioned serialization / deserialization fails. -#[derive(Error, Debug)] +#[derive(Debug)] pub enum Error { /// An IO error occured. Io(io::Error), @@ -26,6 +25,16 @@ pub enum Error { Conversion(String), } +impl std::error::Error for Error { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + match self { + Error::Io(ref x) => Some(x), + Error::Utf8Error(ref x) => Some(x), + _ => None, + } + } +} + impl std::fmt::Display for Error { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::result::Result<(), std::fmt::Error> { match self { diff --git a/src/format.rs b/src/format.rs deleted file mode 100644 index 07e6ed7..0000000 --- a/src/format.rs +++ /dev/null @@ -1,3 +0,0 @@ -pub trait Num { - const BITS: usize; -} diff --git a/src/implementations/duration.rs b/src/implementations/duration.rs index f160b87..6105d23 100644 --- a/src/implementations/duration.rs +++ b/src/implementations/duration.rs @@ -23,9 +23,10 @@ impl Revisioned for Duration { #[cfg(test)] mod tests { + use crate::implementations::assert_bincode_compat; + use super::Duration; use super::Revisioned; - use bincode::Options as _; #[test] fn test_string() { @@ -39,24 +40,9 @@ mod tests { #[test] fn bincode_compat() { - fn assert_compat(d: Duration) { - let bincode = bincode::options() - .with_no_limit() - .with_little_endian() - .with_varint_encoding() - .reject_trailing_bytes() - .serialize(&d) - .unwrap(); - - let mut revision = Vec::new(); - d.serialize_revisioned(&mut revision).unwrap(); - - assert_eq!(revision, bincode) - } - - assert_compat(Duration::ZERO); - assert_compat(Duration::MAX); - assert_compat(Duration::new(u64::MAX, 0)); - assert_compat(Duration::new(0, 999_999_999)); + assert_bincode_compat(&Duration::ZERO); + assert_bincode_compat(&Duration::MAX); + assert_bincode_compat(&Duration::new(u64::MAX, 0)); + assert_bincode_compat(&Duration::new(0, 999_999_999)); } } diff --git a/src/implementations/geo.rs b/src/implementations/geo.rs index 404984b..bc5d969 100644 --- a/src/implementations/geo.rs +++ b/src/implementations/geo.rs @@ -2,29 +2,39 @@ use super::super::Error; use super::super::Revisioned; -use bincode::Options; +use super::vecs::serialize_slice; + +impl Revisioned for geo::Coord { + #[inline] + fn serialize_revisioned(&self, writer: &mut W) -> Result<(), Error> { + self.x.serialize_revisioned(writer)?; + self.y.serialize_revisioned(writer) + } + + #[inline] + fn deserialize_revisioned(reader: &mut R) -> Result { + let x = f64::deserialize_revisioned(reader)?; + let y = f64::deserialize_revisioned(reader)?; + Ok(Self { + x, + y, + }) + } + + fn revision() -> u16 { + 1 + } +} impl Revisioned for geo::Point { #[inline] fn serialize_revisioned(&self, writer: &mut W) -> Result<(), Error> { - bincode::options() - .with_no_limit() - .with_little_endian() - .with_varint_encoding() - .reject_trailing_bytes() - .serialize_into(writer, self) - .map_err(|ref err| Error::Serialize(format!("{:?}", err))) + self.0.serialize_revisioned(writer) } #[inline] fn deserialize_revisioned(reader: &mut R) -> Result { - bincode::options() - .with_no_limit() - .with_little_endian() - .with_varint_encoding() - .allow_trailing_bytes() - .deserialize_from(reader) - .map_err(|ref err| Error::Deserialize(format!("{:?}", err))) + Ok(Self(Revisioned::deserialize_revisioned(reader)?)) } fn revision() -> u16 { @@ -35,24 +45,12 @@ impl Revisioned for geo::Point { impl Revisioned for geo::LineString { #[inline] fn serialize_revisioned(&self, writer: &mut W) -> Result<(), Error> { - bincode::options() - .with_no_limit() - .with_little_endian() - .with_varint_encoding() - .allow_trailing_bytes() - .serialize_into(writer, self) - .map_err(|ref err| Error::Serialize(format!("{:?}", err))) + self.0.serialize_revisioned(writer) } #[inline] fn deserialize_revisioned(reader: &mut R) -> Result { - bincode::options() - .with_no_limit() - .with_little_endian() - .with_varint_encoding() - .allow_trailing_bytes() - .deserialize_from(reader) - .map_err(|ref err| Error::Deserialize(format!("{:?}", err))) + Ok(Self(Revisioned::deserialize_revisioned(reader)?)) } fn revision() -> u16 { @@ -63,24 +61,16 @@ impl Revisioned for geo::LineString { impl Revisioned for geo::Polygon { #[inline] fn serialize_revisioned(&self, writer: &mut W) -> Result<(), Error> { - bincode::options() - .with_no_limit() - .with_little_endian() - .with_varint_encoding() - .allow_trailing_bytes() - .serialize_into(writer, self) - .map_err(|ref err| Error::Serialize(format!("{:?}", err))) + self.exterior().serialize_revisioned(writer)?; + serialize_slice(self.interiors(), writer) } #[inline] fn deserialize_revisioned(reader: &mut R) -> Result { - bincode::options() - .with_no_limit() - .with_little_endian() - .with_varint_encoding() - .allow_trailing_bytes() - .deserialize_from(reader) - .map_err(|ref err| Error::Deserialize(format!("{:?}", err))) + Ok(Self::new( + Revisioned::deserialize_revisioned(reader)?, + Revisioned::deserialize_revisioned(reader)?, + )) } fn revision() -> u16 { @@ -91,24 +81,12 @@ impl Revisioned for geo::Polygon { impl Revisioned for geo::MultiPoint { #[inline] fn serialize_revisioned(&self, writer: &mut W) -> Result<(), Error> { - bincode::options() - .with_no_limit() - .with_little_endian() - .with_varint_encoding() - .allow_trailing_bytes() - .serialize_into(writer, self) - .map_err(|ref err| Error::Serialize(format!("{:?}", err))) + self.0.serialize_revisioned(writer) } #[inline] fn deserialize_revisioned(reader: &mut R) -> Result { - bincode::options() - .with_no_limit() - .with_little_endian() - .with_varint_encoding() - .allow_trailing_bytes() - .deserialize_from(reader) - .map_err(|ref err| Error::Deserialize(format!("{:?}", err))) + Ok(Self(Revisioned::deserialize_revisioned(reader)?)) } fn revision() -> u16 { @@ -119,24 +97,12 @@ impl Revisioned for geo::MultiPoint { impl Revisioned for geo::MultiLineString { #[inline] fn serialize_revisioned(&self, writer: &mut W) -> Result<(), Error> { - bincode::options() - .with_no_limit() - .with_little_endian() - .with_varint_encoding() - .allow_trailing_bytes() - .serialize_into(writer, self) - .map_err(|ref err| Error::Serialize(format!("{:?}", err))) + self.0.serialize_revisioned(writer) } #[inline] fn deserialize_revisioned(reader: &mut R) -> Result { - bincode::options() - .with_no_limit() - .with_little_endian() - .with_varint_encoding() - .allow_trailing_bytes() - .deserialize_from(reader) - .map_err(|ref err| Error::Deserialize(format!("{:?}", err))) + Ok(Self(Revisioned::deserialize_revisioned(reader)?)) } fn revision() -> u16 { @@ -147,27 +113,83 @@ impl Revisioned for geo::MultiLineString { impl Revisioned for geo::MultiPolygon { #[inline] fn serialize_revisioned(&self, writer: &mut W) -> Result<(), Error> { - bincode::options() - .with_no_limit() - .with_little_endian() - .with_varint_encoding() - .allow_trailing_bytes() - .serialize_into(writer, self) - .map_err(|ref err| Error::Serialize(format!("{:?}", err))) + self.0.serialize_revisioned(writer) } #[inline] fn deserialize_revisioned(reader: &mut R) -> Result { - bincode::options() - .with_no_limit() - .with_little_endian() - .with_varint_encoding() - .allow_trailing_bytes() - .deserialize_from(reader) - .map_err(|ref err| Error::Deserialize(format!("{:?}", err))) + Ok(Self(Revisioned::deserialize_revisioned(reader)?)) } fn revision() -> u16 { 1 } } + +#[cfg(test)] +mod test { + use std::cell::Cell; + + use geo::{Coord, LineString, MultiLineString, MultiPoint, MultiPolygon, Point, Polygon}; + + use crate::implementations::assert_bincode_compat; + + pub struct Rng(pub Cell); + + impl Rng { + pub fn next(&self) -> u64 { + let mut x = self.0.get(); + x ^= x << 13; + x ^= x >> 7; + x ^= x << 17; + self.0.set(x); + x + } + + pub fn next_f64(&self) -> f64 { + f64::from_bits(self.next()) + } + + pub fn next_point(&self) -> Point { + Point::new(self.next_f64(), self.next_f64()) + } + + pub fn next_points(&self, len: usize) -> Vec { + (0..len).map(|_| self.next_point()).collect() + } + + pub fn next_coords(&self, len: usize) -> Vec { + (0..len).map(|_| self.next_point().0).collect() + } + } + + #[test] + fn compat() { + let rng = Rng(Cell::new(0x1fb931de31)); + + let point_a = rng.next_point(); + let point_b = rng.next_point(); + assert_bincode_compat(&point_a); + assert_bincode_compat(&point_b); + + let line_string = LineString(rng.next_coords(10)); + assert_bincode_compat(&line_string); + + let create_multi_line = + || (0..10).map(|_| LineString(rng.next_coords(10))).collect::>(); + + let create_polygon = || Polygon::new(LineString(rng.next_coords(10)), create_multi_line()); + + let polygon = create_polygon(); + assert_bincode_compat(&polygon); + + let multi_point = MultiPoint(rng.next_points(10)); + assert_bincode_compat(&multi_point); + + let multi_line = MultiLineString(create_multi_line()); + assert_bincode_compat(&multi_line); + + let multi_polygon = MultiPolygon((0..10).map(|_| create_polygon()).collect()); + assert_bincode_compat(&multi_polygon); + } +} diff --git a/src/implementations/mod.rs b/src/implementations/mod.rs index c790c6c..604f5c0 100644 --- a/src/implementations/mod.rs +++ b/src/implementations/mod.rs @@ -37,3 +37,25 @@ pub fn read_buffer(reader: &mut R) -> Result<[u } Ok(buffer) } + +#[cfg(test)] +#[track_caller] +pub fn assert_bincode_compat(v: &T) +where + T: serde::Serialize + crate::Revisioned, +{ + use bincode::Options; + + let bincode = bincode::options() + .with_no_limit() + .with_little_endian() + .with_varint_encoding() + .reject_trailing_bytes() + .serialize(&v) + .unwrap(); + + let mut revision = Vec::new(); + v.serialize_revisioned(&mut revision).unwrap(); + + assert_eq!(revision, bincode) +} diff --git a/src/implementations/path.rs b/src/implementations/path.rs index afe9ae6..86d727b 100644 --- a/src/implementations/path.rs +++ b/src/implementations/path.rs @@ -2,16 +2,13 @@ use std::path::PathBuf; use super::super::Error; use super::super::Revisioned; +use super::string::serialize_str; impl Revisioned for PathBuf { #[inline] fn serialize_revisioned(&self, writer: &mut W) -> Result<(), Error> { match self.to_str() { - Some(s) => { - (s.len() as u64).serialize_revisioned(writer)?; - writer.write_all(s.as_bytes()).map_err(Error::Io)?; - Ok(()) - } + Some(s) => serialize_str(writer, s), None => Err(Error::InvalidPath), } } @@ -32,11 +29,14 @@ mod tests { use std::path::PathBuf; + use crate::implementations::assert_bincode_compat; + use super::Revisioned; #[test] fn test_pathbuf() { let val = PathBuf::from("/test/path/to/file.txt"); + assert_bincode_compat(&val); let mut mem: Vec = vec![]; val.serialize_revisioned(&mut mem).unwrap(); assert_eq!(mem.len(), 23); diff --git a/src/implementations/primitives.rs b/src/implementations/primitives.rs index ba46842..8e36603 100644 --- a/src/implementations/primitives.rs +++ b/src/implementations/primitives.rs @@ -189,6 +189,44 @@ impl Revisioned for isize { } } +impl Revisioned for u8 { + #[inline] + fn serialize_revisioned(&self, writer: &mut W) -> Result<(), Error> { + writer.write_all(&[*self]).map_err(Error::Io) + } + + #[inline] + fn deserialize_revisioned(reader: &mut R) -> Result + where + Self: Sized, + { + Ok(read_buffer::<1, _>(reader)?[0]) + } + + fn revision() -> u16 { + 1 + } +} + +impl Revisioned for i8 { + #[inline] + fn serialize_revisioned(&self, writer: &mut W) -> Result<(), Error> { + writer.write_all(&[*self as u8]).map_err(Error::Io) + } + + #[inline] + fn deserialize_revisioned(reader: &mut R) -> Result + where + Self: Sized, + { + Ok(read_buffer::<1, _>(reader)?[0] as i8) + } + + fn revision() -> u16 { + 1 + } +} + macro_rules! impl_revisioned_int { ($ty:ident) => { impl Revisioned for $ty { @@ -235,12 +273,10 @@ macro_rules! impl_revisioned_signed_int { }; } -impl_revisioned_int!(u8); impl_revisioned_int!(u16); impl_revisioned_int!(u32); impl_revisioned_int!(u64); -impl_revisioned_signed_int!(i8); impl_revisioned_signed_int!(i16); impl_revisioned_signed_int!(i32); impl_revisioned_signed_int!(i64); @@ -319,9 +355,13 @@ impl Revisioned for f64 { #[cfg(test)] mod tests { + use core::{f32, f64}; use std::u64; - use crate::implementations::primitives::{gazgiz_64, zigzag_64}; + use crate::implementations::{ + assert_bincode_compat, + primitives::{gazgiz_64, zigzag_64}, + }; use super::Revisioned; @@ -504,4 +544,50 @@ mod tests { let out = ::deserialize_revisioned(&mut mem.as_slice()).unwrap(); assert_eq!(val, out); } + + macro_rules! test_integer_compat { + ($n:ident,$ty:ident) => { + #[test] + fn $n() { + let zero: $ty = 0; + assert_bincode_compat(&zero); + assert_bincode_compat(&$ty::MAX); + assert_bincode_compat(&$ty::MIN); + } + }; + } + + test_integer_compat!(compat_i8, i8); + test_integer_compat!(compat_u8, u8); + test_integer_compat!(compat_i16, i16); + test_integer_compat!(compat_u16, u16); + test_integer_compat!(compat_i32, i32); + test_integer_compat!(compat_u32, u32); + test_integer_compat!(compat_i64, i64); + test_integer_compat!(compat_u64, u64); + test_integer_compat!(compat_i128, i128); + test_integer_compat!(compat_u128, u128); + + #[test] + fn compat_f64() { + assert_bincode_compat(&0f64); + assert_bincode_compat(&f64::MAX); + assert_bincode_compat(&f64::MIN); + assert_bincode_compat(&f64::EPSILON); + assert_bincode_compat(&f64::INFINITY); + assert_bincode_compat(&f64::NEG_INFINITY); + assert_bincode_compat(&f64::NAN); + } + + #[test] + fn compat_f32() { + assert_bincode_compat(&0f32); + assert_bincode_compat(&f32::MAX); + assert_bincode_compat(&f32::MIN); + assert_bincode_compat(&f32::EPSILON); + assert_bincode_compat(&f32::INFINITY); + assert_bincode_compat(&f32::NEG_INFINITY); + assert_bincode_compat(&f32::MIN_POSITIVE); + assert_bincode_compat(&f32::NAN); + } } diff --git a/src/implementations/regex.rs b/src/implementations/regex.rs index 5a91d8d..4ddaf7e 100644 --- a/src/implementations/regex.rs +++ b/src/implementations/regex.rs @@ -2,31 +2,18 @@ use super::super::Error; use super::super::Revisioned; -use bincode::Options; +use super::string::serialize_str; use regex::Regex; -use std::borrow::Cow; impl Revisioned for Regex { #[inline] fn serialize_revisioned(&self, writer: &mut W) -> Result<(), Error> { - bincode::options() - .with_no_limit() - .with_little_endian() - .with_varint_encoding() - .reject_trailing_bytes() - .serialize_into(writer, self.as_str()) - .map_err(|ref err| Error::Serialize(format!("{:?}", err))) + serialize_str(writer, self.as_str()) } #[inline] fn deserialize_revisioned(reader: &mut R) -> Result { - let s: Cow = bincode::options() - .with_no_limit() - .with_little_endian() - .with_varint_encoding() - .reject_trailing_bytes() - .deserialize_from(reader) - .map_err(|ref err| Error::Deserialize(format!("{:?}", err)))?; + let s = String::deserialize_revisioned(reader)?; s.parse().map_err(|_| Error::Deserialize("invalid regex".to_string())) } diff --git a/src/implementations/string.rs b/src/implementations/string.rs index 20f66d0..c121c6c 100644 --- a/src/implementations/string.rs +++ b/src/implementations/string.rs @@ -2,6 +2,11 @@ use core::str; use crate::{Error, Revisioned}; +pub(crate) fn serialize_str(writer: &mut W, str: &str) -> Result<(), Error> { + (str.len() as u64).serialize_revisioned(writer)?; + writer.write_all(str.as_bytes()).map_err(Error::Io) +} + impl Revisioned for String { fn revision() -> u16 { 1 @@ -9,16 +14,15 @@ impl Revisioned for String { #[inline] fn serialize_revisioned(&self, writer: &mut W) -> Result<(), Error> { - (self.len() as u64).serialize_revisioned(writer)?; - writer.write_all(self.as_bytes()).map_err(Error::Io) + serialize_str(writer, self) } #[inline] fn deserialize_revisioned(reader: &mut R) -> Result { let len: usize = u64::deserialize_revisioned(reader)?.try_into().map_err(|_| Error::IntegerOverflow)?; - let slice = vec![0u8; len]; - + let mut slice = vec![0u8; len]; + reader.read_exact(&mut slice).map_err(Error::Io)?; String::from_utf8(slice).map_err(|x| Error::Utf8Error(x.utf8_error())) } } @@ -40,7 +44,10 @@ impl Revisioned for char { let mut buffer = [0u8; 4]; r.read_exact(&mut buffer[..1]).map_err(Error::Io)?; - let len = CHAR_LENGTH[buffer[0] as usize]; + dbg!(buffer[0]); + println!("{:b}", buffer[0]); + + let len = dbg!(CHAR_LENGTH[buffer[0] as usize]); if len == 0 { return Err(Error::InvalidCharEncoding); } @@ -59,11 +66,11 @@ static CHAR_LENGTH: [u8; 256] = const { while i < 256 { if i & 0b1000_0000 == 0 { r[i] = 1; - } else if i & 0b1110_000 == 0b1100_0000 { + } else if i & 0b1110_0000 == 0b1100_0000 { r[i] = 2; - } else if i & 0b1111_000 == 0b1110_0000 { + } else if i & 0b1111_0000 == 0b1110_0000 { r[i] = 3; - } else if i & 0b1111_100 == 0b1111_0000 { + } else if i & 0b1111_1000 == 0b1111_0000 { r[i] = 4; } @@ -76,6 +83,10 @@ static CHAR_LENGTH: [u8; 256] = const { #[cfg(test)] mod tests { + use std::char; + + use crate::implementations::assert_bincode_compat; + use super::Revisioned; #[test] @@ -87,4 +98,41 @@ mod tests { let out = ::deserialize_revisioned(&mut mem.as_slice()).unwrap(); assert_eq!(val, out); } + + #[test] + fn test_char() { + let char = '𐃌'; + let mut mem = Vec::new(); + char.serialize_revisioned(&mut mem).unwrap(); + let out = Revisioned::deserialize_revisioned(&mut mem.as_slice()).unwrap(); + assert_eq!(char, out); + } + + #[test] + fn bincode_compat_char() { + assert_bincode_compat(&char::MAX); + assert_bincode_compat(&'\0'); + assert_bincode_compat(&'z'); + assert_bincode_compat(&'0'); + // in the 0x7F - 0x07FF range + assert_bincode_compat(&'ʘ'); + // in the 0x7FF - 0xFFFF range + assert_bincode_compat(&'ꚸ'); + // in the 0xFFFF - 0x10FFFF range + assert_bincode_compat(&'𐃌'); + } + + #[test] + fn bincode_compat_string() { + assert_bincode_compat(&char::MAX.to_string()); + assert_bincode_compat(&'\0'.to_string()); + assert_bincode_compat(&'z'.to_string()); + assert_bincode_compat(&'0'.to_string()); + // in the 0x7F - 0x07FF range + assert_bincode_compat(&'ʘ'.to_string()); + // in the 0x7FF - 0xFFFF range + assert_bincode_compat(&'ꚸ'.to_string()); + // in the 0xFFFF - 0x10FFFF range + assert_bincode_compat(&'𐃌'.to_string()); + } } diff --git a/src/implementations/tuple.rs b/src/implementations/tuple.rs index ac3dc57..d103ad4 100644 --- a/src/implementations/tuple.rs +++ b/src/implementations/tuple.rs @@ -72,11 +72,14 @@ impl_tuple! { A,B,C,D,E,F } #[cfg(test)] mod tests { + use crate::implementations::assert_bincode_compat; + use super::Revisioned; #[test] fn test_tuple_2() { let val = (String::from("test"), true); + assert_bincode_compat(&val); let mut mem: Vec = vec![]; val.serialize_revisioned(&mut mem).unwrap(); assert_eq!(mem.len(), 6); @@ -88,6 +91,7 @@ mod tests { #[test] fn test_tuple_3() { let val = (String::from("test"), true, 1419247293847192847.13947134978139487); + assert_bincode_compat(&val); let mut mem: Vec = vec![]; val.serialize_revisioned(&mut mem).unwrap(); assert_eq!(mem.len(), 14); @@ -99,6 +103,7 @@ mod tests { #[test] fn test_tuple_4() { let val = (String::from("test"), true, 1419247293847192847.13947134978139487, Some('t')); + assert_bincode_compat(&val); let mut mem: Vec = vec![]; val.serialize_revisioned(&mut mem).unwrap(); assert_eq!(mem.len(), 16); @@ -118,6 +123,7 @@ mod tests { Some('t'), vec![4u8, 19u8, 133u8], ); + assert_bincode_compat(&val); let mut mem: Vec = vec![]; val.serialize_revisioned(&mut mem).unwrap(); assert_eq!(mem.len(), 20); diff --git a/src/implementations/vecs.rs b/src/implementations/vecs.rs index e3eb61d..7491356 100644 --- a/src/implementations/vecs.rs +++ b/src/implementations/vecs.rs @@ -1,6 +1,18 @@ use super::super::Error; use super::super::Revisioned; +pub(crate) fn serialize_slice(v: &[T], writer: &mut W) -> Result<(), Error> +where + W: std::io::Write, + T: Revisioned, +{ + v.len().serialize_revisioned(writer)?; + for v in v { + v.serialize_revisioned(writer)?; + } + Ok(()) +} + impl Revisioned for Vec where T: Revisioned, From 1900b53b605003614dd640db9c287c92a1f3ace7 Mon Sep 17 00:00:00 2001 From: Mees Delzenne Date: Thu, 15 Aug 2024 20:50:49 +0200 Subject: [PATCH 3/8] Fix integer overflow handling --- src/implementations/primitives.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/implementations/primitives.rs b/src/implementations/primitives.rs index 8e36603..4a4a999 100644 --- a/src/implementations/primitives.rs +++ b/src/implementations/primitives.rs @@ -240,7 +240,7 @@ macro_rules! impl_revisioned_int { where Self: Sized, { - decode_u64(reader).map(|x| x as $ty) + decode_u64(reader).and_then(|x| x.try_into().map_err(|_| Error::IntegerOverflow)) } fn revision() -> u16 { @@ -263,7 +263,8 @@ macro_rules! impl_revisioned_signed_int { where Self: Sized, { - decode_u64(reader).map(|x| gazgiz_64(x) as $ty) + decode_u64(reader) + .and_then(|x| gazgiz_64(x).try_into().map_err(|_| Error::IntegerOverflow)) } fn revision() -> u16 { From f39831bce940cb726583675b34c1a93bc02eb148 Mon Sep 17 00:00:00 2001 From: Mees Delzenne Date: Thu, 15 Aug 2024 20:54:44 +0200 Subject: [PATCH 4/8] Fix unused function --- src/implementations/path.rs | 4 ++-- src/implementations/string.rs | 14 ++++---------- src/implementations/vecs.rs | 6 +----- 3 files changed, 7 insertions(+), 17 deletions(-) diff --git a/src/implementations/path.rs b/src/implementations/path.rs index 86d727b..60a69a7 100644 --- a/src/implementations/path.rs +++ b/src/implementations/path.rs @@ -2,13 +2,13 @@ use std::path::PathBuf; use super::super::Error; use super::super::Revisioned; -use super::string::serialize_str; +use super::vecs::serialize_slice; impl Revisioned for PathBuf { #[inline] fn serialize_revisioned(&self, writer: &mut W) -> Result<(), Error> { match self.to_str() { - Some(s) => serialize_str(writer, s), + Some(s) => serialize_slice(s.as_bytes(), writer), None => Err(Error::InvalidPath), } } diff --git a/src/implementations/string.rs b/src/implementations/string.rs index c121c6c..686c510 100644 --- a/src/implementations/string.rs +++ b/src/implementations/string.rs @@ -2,10 +2,7 @@ use core::str; use crate::{Error, Revisioned}; -pub(crate) fn serialize_str(writer: &mut W, str: &str) -> Result<(), Error> { - (str.len() as u64).serialize_revisioned(writer)?; - writer.write_all(str.as_bytes()).map_err(Error::Io) -} +use super::vecs::serialize_slice; impl Revisioned for String { fn revision() -> u16 { @@ -14,16 +11,13 @@ impl Revisioned for String { #[inline] fn serialize_revisioned(&self, writer: &mut W) -> Result<(), Error> { - serialize_str(writer, self) + serialize_slice(self.as_bytes(), writer) } #[inline] fn deserialize_revisioned(reader: &mut R) -> Result { - let len: usize = - u64::deserialize_revisioned(reader)?.try_into().map_err(|_| Error::IntegerOverflow)?; - let mut slice = vec![0u8; len]; - reader.read_exact(&mut slice).map_err(Error::Io)?; - String::from_utf8(slice).map_err(|x| Error::Utf8Error(x.utf8_error())) + let bytes = Vec::::deserialize_revisioned(reader)?; + String::from_utf8(bytes).map_err(|x| Error::Utf8Error(x.utf8_error())) } } diff --git a/src/implementations/vecs.rs b/src/implementations/vecs.rs index 7491356..606bfa5 100644 --- a/src/implementations/vecs.rs +++ b/src/implementations/vecs.rs @@ -19,11 +19,7 @@ where { #[inline] fn serialize_revisioned(&self, writer: &mut W) -> Result<(), Error> { - self.len().serialize_revisioned(writer)?; - for v in self { - v.serialize_revisioned(writer)?; - } - Ok(()) + serialize_slice(self.as_slice(), writer) } #[inline] From 2bea82430c30ba26aa905430623aad0a311ac9e2 Mon Sep 17 00:00:00 2001 From: Mees Delzenne Date: Thu, 15 Aug 2024 21:03:18 +0200 Subject: [PATCH 5/8] Fix features --- Cargo.toml | 1 + src/implementations/decimal.rs | 29 ++++------------------------- src/implementations/regex.rs | 4 ++-- src/implementations/uuid.rs | 9 ++++----- 4 files changed, 11 insertions(+), 32 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 63240bd..a1f8371 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,6 +19,7 @@ members = ["derive"] [features] default = [] + [dependencies] chrono = { version = "0.4.26", features = ["serde"], optional = true } derive = { version = "0.8.0", package = "revision-derive", path = "derive" } diff --git a/src/implementations/decimal.rs b/src/implementations/decimal.rs index e7e131e..7354260 100644 --- a/src/implementations/decimal.rs +++ b/src/implementations/decimal.rs @@ -7,35 +7,14 @@ use rust_decimal::Decimal; impl Revisioned for Decimal { #[inline] fn serialize_revisioned(&self, writer: &mut W) -> Result<(), Error> { - writer - .write_all(self.serialize().as_slice()) - .map_err(|e| Error::Io(e.raw_os_error().unwrap_or(0))) + writer.write_all(self.serialize().as_slice()).map_err(Error::Io) } #[inline] fn deserialize_revisioned(reader: &mut R) -> Result { - let mut v = vec![0u8; 16]; - reader - .read_exact(v.as_mut_slice()) - .map_err(|e| Error::Io(e.raw_os_error().unwrap_or(0)))?; - Ok(Decimal::deserialize([ - v.remove(0), - v.remove(0), - v.remove(0), - v.remove(0), - v.remove(0), - v.remove(0), - v.remove(0), - v.remove(0), - v.remove(0), - v.remove(0), - v.remove(0), - v.remove(0), - v.remove(0), - v.remove(0), - v.remove(0), - v.remove(0), - ])) + let mut b = [0u8; 16]; + reader.read_exact(&mut b).map_err(Error::Io)?; + Ok(Decimal::deserialize(b)) } fn revision() -> u16 { diff --git a/src/implementations/regex.rs b/src/implementations/regex.rs index 4ddaf7e..a48d683 100644 --- a/src/implementations/regex.rs +++ b/src/implementations/regex.rs @@ -2,13 +2,13 @@ use super::super::Error; use super::super::Revisioned; -use super::string::serialize_str; +use super::vecs::serialize_slice; use regex::Regex; impl Revisioned for Regex { #[inline] fn serialize_revisioned(&self, writer: &mut W) -> Result<(), Error> { - serialize_str(writer, self.as_str()) + serialize_slice(self.as_str().as_bytes(), writer) } #[inline] diff --git a/src/implementations/uuid.rs b/src/implementations/uuid.rs index 7d9f279..2a6fd99 100644 --- a/src/implementations/uuid.rs +++ b/src/implementations/uuid.rs @@ -2,20 +2,19 @@ use super::super::Error; use super::super::Revisioned; +use super::vecs::serialize_slice; use uuid::Uuid; impl Revisioned for Uuid { #[inline] fn serialize_revisioned(&self, writer: &mut W) -> Result<(), Error> { - writer.write_all(self.as_bytes()).map_err(|e| Error::Io(e.raw_os_error().unwrap_or(0))) + serialize_slice(self.as_bytes(), writer) } #[inline] fn deserialize_revisioned(reader: &mut R) -> Result { - let mut v = vec![0u8; 16]; - reader - .read_exact(v.as_mut_slice()) - .map_err(|e| Error::Io(e.raw_os_error().unwrap_or(0)))?; + let mut v = [0u8; 16]; + reader.read_exact(&mut v).map_err(Error::Io)?; Uuid::from_slice(&v).map_err(|_| Error::Deserialize("invalid uuid".to_string())) } From c08748952570ca2836ebdb4249c89e9df2c83c9c Mon Sep 17 00:00:00 2001 From: Mees Delzenne Date: Thu, 15 Aug 2024 21:07:20 +0200 Subject: [PATCH 6/8] Fix clippy + uuid --- src/implementations/primitives.rs | 4 ++-- src/implementations/uuid.rs | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/implementations/primitives.rs b/src/implementations/primitives.rs index 4a4a999..2c432e4 100644 --- a/src/implementations/primitives.rs +++ b/src/implementations/primitives.rs @@ -1,4 +1,4 @@ -use std::{io, u64}; +use std::io; use super::super::Revisioned; use super::read_buffer; @@ -295,7 +295,7 @@ impl Revisioned for i128 { where Self: Sized, { - decode_u128(r).map(|x| gazgiz_128(x)) + decode_u128(r).map(gazgiz_128) } } diff --git a/src/implementations/uuid.rs b/src/implementations/uuid.rs index 2a6fd99..e7ac79e 100644 --- a/src/implementations/uuid.rs +++ b/src/implementations/uuid.rs @@ -2,13 +2,12 @@ use super::super::Error; use super::super::Revisioned; -use super::vecs::serialize_slice; use uuid::Uuid; impl Revisioned for Uuid { #[inline] fn serialize_revisioned(&self, writer: &mut W) -> Result<(), Error> { - serialize_slice(self.as_bytes(), writer) + writer.write_all(self.as_bytes()).map_err(Error::Io) } #[inline] From 08ebda6e658038c232564270be30fd5fbcd94b52 Mon Sep 17 00:00:00 2001 From: Mees Delzenne Date: Thu, 15 Aug 2024 21:09:48 +0200 Subject: [PATCH 7/8] Fix clippy again --- src/implementations/primitives.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/implementations/primitives.rs b/src/implementations/primitives.rs index 2c432e4..be095f3 100644 --- a/src/implementations/primitives.rs +++ b/src/implementations/primitives.rs @@ -356,9 +356,6 @@ impl Revisioned for f64 { #[cfg(test)] mod tests { - use core::{f32, f64}; - use std::u64; - use crate::implementations::{ assert_bincode_compat, primitives::{gazgiz_64, zigzag_64}, From d16da063faac90a4afc13eadfa926cc2cf6811c6 Mon Sep 17 00:00:00 2001 From: Mees Delzenne Date: Thu, 15 Aug 2024 21:13:02 +0200 Subject: [PATCH 8/8] Cleanup some organization --- src/implementations/mod.rs | 17 ----------------- src/implementations/primitives.rs | 7 ++++++- 2 files changed, 6 insertions(+), 18 deletions(-) diff --git a/src/implementations/mod.rs b/src/implementations/mod.rs index 604f5c0..3e0839f 100644 --- a/src/implementations/mod.rs +++ b/src/implementations/mod.rs @@ -1,7 +1,3 @@ -use std::io; - -use crate::Error; - pub mod arrays; pub mod bound; pub mod boxes; @@ -25,19 +21,6 @@ pub mod uuid; pub mod vecs; pub mod wrapping; -pub fn unexpected_eof() -> Error { - Error::Io(io::Error::new(io::ErrorKind::UnexpectedEof, "")) -} - -pub fn read_buffer(reader: &mut R) -> Result<[u8; COUNT], Error> { - let mut buffer = [0u8; COUNT]; - let count = reader.read(&mut buffer).map_err(Error::Io)?; - if count != COUNT { - return Err(unexpected_eof()); - } - Ok(buffer) -} - #[cfg(test)] #[track_caller] pub fn assert_bincode_compat(v: &T) diff --git a/src/implementations/primitives.rs b/src/implementations/primitives.rs index be095f3..e81eb0b 100644 --- a/src/implementations/primitives.rs +++ b/src/implementations/primitives.rs @@ -1,9 +1,14 @@ use std::io; use super::super::Revisioned; -use super::read_buffer; use crate::Error; +pub fn read_buffer(reader: &mut R) -> Result<[u8; COUNT], Error> { + let mut buffer = [0u8; COUNT]; + reader.read_exact(&mut buffer).map_err(Error::Io)?; + Ok(buffer) +} + /// zigzag encode a 64bit integer fn zigzag_64(v: i64) -> u64 { (v >> (i64::BITS - 1)) as u64 ^ ((v as u64) << 1)