From e3b724cb8e785b939520788ea4643310a650cd93 Mon Sep 17 00:00:00 2001 From: Tim Evans Date: Wed, 29 May 2024 14:47:58 +1200 Subject: [PATCH] Increment to beta version, updated dependencies and the code that used them, fixed minor grammer errors. --- Cargo.toml | 29 ++++++++++++++++------------- README.md | 4 ++-- docs/index.md | 2 +- docs/start.md | 2 +- omf-c/Cargo.toml | 2 +- omf.schema.json | 2 +- src/date_time.rs | 15 ++++++++------- src/file/image.rs | 8 ++++---- src/file/zip_container.rs | 6 +++--- src/pqarray/array_type.rs | 8 ++++---- src/pqarray/read.rs | 16 +++++++++------- src/pqarray/source.rs | 10 ---------- src/schema.rs | 2 +- src/version.rs | 2 +- tests/conversion_tests.rs | 2 +- 15 files changed, 53 insertions(+), 57 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 24993fb..48e47be 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "omf" -version = "0.1.0-alpha.2" +version = "0.1.0-beta.1" description = "File reader and writer for Open Mining Format." authors = ["Tim Evans "] license = "MIT" @@ -34,15 +34,18 @@ omf1 = ["parquet"] members = ["omf-c"] [workspace.dependencies] -bytes = "1.4.0" -cbindgen = { version = "0.24.5", default-features = false } -chrono = { version = "0.4.30", default-features = false, features = ["serde"] } -flate2 = "1.0.27" -image = { version = "0.24.7", default-features = false, features = ["png", "jpeg"] } -parquet = { version = "46.0.0", default-features = false, features = ["flate2"] } -regex = "1.9.3" -schemars = { version = "0.8.12", features = ["chrono"] } -serde = { version = "1.0.188", features = ["derive"] } -serde_json = { version = "1.0.107", features = ["float_roundtrip"] } -thiserror = "1.0.47" -zip = { version = "0.6.6", default-features = false } +bytes = "1" +cbindgen = { version = "0.26", default-features = false } +chrono = { version = "0.4", default-features = false, features = ["serde"] } +flate2 = "1.0" +image = { version = "0.25", default-features = false, features = [ + "png", + "jpeg", +] } +parquet = { version = "51", default-features = false, features = ["flate2"] } +regex = "1" +schemars = { version = "0.8", features = ["chrono"] } +serde = { version = "1", features = ["derive"] } +serde_json = { version = "1", features = ["float_roundtrip"] } +thiserror = "1" +zip = { version = "2", default-features = false } diff --git a/README.md b/README.md index 2305aeb..bd5924b 100644 --- a/README.md +++ b/README.md @@ -6,9 +6,9 @@ A library for reading and writing files in Open Mining Format 2.0. Also supports translating OMF 1 files to OMF 2. -OMF file version: 2.0-alpha.2 +OMF file version: 2.0-beta.1 -Crate version: 0.1.0-alpha.2 +Crate version: 0.1.0-beta.1 **Warning:** this is pre-release code. diff --git a/docs/index.md b/docs/index.md index dd776ee..2f7a539 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,6 +1,6 @@ # Home -Version 0.1.0-alpha.1 +Version 0.1.0-beta.1 Specification and library for Open Mining Format version 2, a standard for mining data interchange backed by the diff --git a/docs/start.md b/docs/start.md index 4cd2180..ef70856 100644 --- a/docs/start.md +++ b/docs/start.md @@ -19,7 +19,7 @@ and contains details for linking them together into rich objects. Images may use PNG or JPEG encoding, while arrays use Apache Parquet encoding. > WARNING: -> When OMF files, beware of "zip bombs" where data is maliciously crafted to expand to an +> When reading OMF files, beware of "zip bombs" where data is maliciously crafted to expand to an > excessive size when decompressed, leading to a potential denial of service attack. > Use the limits provided by the C and Rust APIs, and check sizes before allocating memory. diff --git a/omf-c/Cargo.toml b/omf-c/Cargo.toml index bd52ce0..ff77c74 100644 --- a/omf-c/Cargo.toml +++ b/omf-c/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "omfc" -version = "0.1.0-alpha.2" +version = "0.1.0-beta.1" description = "C bindings for `omf`." authors = ["Tim Evans "] license = "MIT" diff --git a/omf.schema.json b/omf.schema.json index 835d7d9..1697e12 100644 --- a/omf.schema.json +++ b/omf.schema.json @@ -1,7 +1,7 @@ { "$schema": "https://json-schema.org/draft/2019-09/schema", "$id": "https://github.com/gmggroup/omf-rust/blob/main/omf.schema.json", - "title": "Open Mining Format 2.0-alpha.2", + "title": "Open Mining Format 2.0-beta.1", "type": "object", "required": [ "date" diff --git a/src/date_time.rs b/src/date_time.rs index 67be039..1a12ba5 100644 --- a/src/date_time.rs +++ b/src/date_time.rs @@ -1,6 +1,6 @@ //! Utility functions for date and date-time conversion. -use chrono::{DateTime, Duration, NaiveDate, NaiveDateTime, TimeZone, Utc}; +use chrono::{DateTime, Duration, NaiveDate, TimeZone, Utc}; /// Convert a date to the number of days since the epoch. pub fn date_to_f64(date: NaiveDate) -> f64 { @@ -29,8 +29,8 @@ pub fn date_time_to_i64(date_time: DateTime) -> i64 { /// Convert a number of days since the epoch back to a date. pub fn i64_to_date(value: i64) -> NaiveDate { - NaiveDate::default() - .checked_add_signed(Duration::days(value)) + Duration::try_days(value) + .and_then(|d| NaiveDate::default().checked_add_signed(d)) .unwrap_or(if value < 0 { NaiveDate::MIN } else { @@ -51,8 +51,8 @@ pub fn i64_to_date_time(value: i64) -> DateTime { /// Convert a number of milliseconds since the epoch back to a date. pub fn i64_milli_to_date_time(value: i64) -> DateTime { - DateTime::::default() - .checked_add_signed(Duration::milliseconds(value)) + Duration::try_milliseconds(value) + .and_then(|d| DateTime::::default().checked_add_signed(d)) .unwrap_or(if value < 0 { DateTime::::MIN_UTC } else { @@ -76,7 +76,8 @@ pub fn utc_now() -> DateTime { let now = std::time::SystemTime::now() .duration_since(std::time::UNIX_EPOCH) .expect("valid system time"); - let naive = NaiveDateTime::from_timestamp_opt(now.as_secs() as i64, now.subsec_nanos()) - .expect("valid timestamp"); + let naive = DateTime::from_timestamp(now.as_secs() as i64, now.subsec_nanos()) + .expect("valid timestamp") + .naive_utc(); Utc.from_utc_datetime(&naive) } diff --git a/src/file/image.rs b/src/file/image.rs index 46539ec..e773168 100644 --- a/src/file/image.rs +++ b/src/file/image.rs @@ -33,7 +33,7 @@ impl Writer { image: &image::DynamicImage, ) -> Result, Error> { let mut bytes = Vec::new(); - image.write_to(&mut Cursor::new(&mut bytes), image::ImageOutputFormat::Png)?; + image.write_to(&mut Cursor::new(&mut bytes), image::ImageFormat::Png)?; self.image_bytes(&bytes) } @@ -53,10 +53,10 @@ impl Writer { quality: u8, ) -> Result, Error> { let mut bytes = Vec::new(); - image.write_to( + image.write_with_encoder(image::codecs::jpeg::JpegEncoder::new_with_quality( &mut Cursor::new(&mut bytes), - image::ImageOutputFormat::Jpeg(quality.clamp(1, 100)), - )?; + quality.clamp(1, 100), + ))?; self.image_bytes(&bytes) } } diff --git a/src/file/zip_container.rs b/src/file/zip_container.rs index 62489d6..6c93abc 100644 --- a/src/file/zip_container.rs +++ b/src/file/zip_container.rs @@ -6,7 +6,7 @@ use std::{ use zip::{ read::{ZipArchive, ZipFile}, - write::{FileOptions, ZipWriter}, + write::{FullFileOptions, ZipWriter}, }; use crate::{error::Error, FORMAT_NAME}; @@ -54,8 +54,8 @@ impl Builder { FileType::Jpeg => format!("{}{JPEG_EXT}", self.id()), }; self.zip_writer.start_file( - &name, - FileOptions::default() + name.clone(), + FullFileOptions::default() .large_file(true) .compression_method(zip::CompressionMethod::Stored), )?; diff --git a/src/pqarray/array_type.rs b/src/pqarray/array_type.rs index bed36dd..27f57ae 100644 --- a/src/pqarray/array_type.rs +++ b/src/pqarray/array_type.rs @@ -249,9 +249,9 @@ mod tests { #[test] fn date_overflow() { let min = NaiveDate::from_parquet(i32::MIN, &None); - assert_eq!(min.to_string(), "-262144-01-01"); + assert_eq!(min.to_string(), "-262143-01-01"); let max = NaiveDate::from_parquet(i32::MAX, &None); - assert_eq!(max.to_string(), "+262143-12-31"); + assert_eq!(max.to_string(), "+262142-12-31"); } #[test] @@ -281,8 +281,8 @@ mod tests { #[test] fn date_time_overflow() { let min = DateTime::::from_parquet(i64::MIN, &DATE_TIME_MILLI); - assert_eq!(min.to_string(), "-262144-01-01 00:00:00 UTC"); + assert_eq!(min.to_string(), "-262143-01-01 00:00:00 UTC"); let max = DateTime::::from_parquet(i64::MAX, &DATE_TIME_MILLI); - assert_eq!(max.to_string(), "+262143-12-31 23:59:59.999999999 UTC"); + assert_eq!(max.to_string(), "+262142-12-31 23:59:59.999999999 UTC"); } } diff --git a/src/pqarray/read.rs b/src/pqarray/read.rs index 0840dda..6de778c 100644 --- a/src/pqarray/read.rs +++ b/src/pqarray/read.rs @@ -96,12 +96,14 @@ impl Counter { /// Reads a block of data from a column reader. fn read_column_chunk( column: &mut ColumnReaderImpl, - values: &mut [D::T], - def_levels: Option<&mut [i16]>, + values: &mut Vec, + mut def_levels: Option<&mut Vec>, ) -> Result<(usize, usize), Error> { - let mut max_records = values.len(); - if let Some(d) = &def_levels { - max_records = max_records.max(d.len()); + values.clear(); + let mut max_records = values.capacity(); + if let Some(d) = &mut def_levels { + d.clear(); + max_records = max_records.max(d.capacity()); } let (n_val, n_def, _n_rep) = column.read_records(max_records, def_levels, None, values)?; Ok((n_val, n_def)) @@ -193,8 +195,8 @@ impl GroupValues

for NullableGroupValues

{ len: 0, index: Counter::new(), value_index: Counter::new(), - values: vec![Default::default(); CHUNK_SIZE], - def_levels: vec![0; CHUNK_SIZE], + values: Vec::with_capacity(CHUNK_SIZE), + def_levels: Vec::with_capacity(CHUNK_SIZE), logical_type, } } diff --git a/src/pqarray/source.rs b/src/pqarray/source.rs index 73ecb89..eeef2d8 100644 --- a/src/pqarray/source.rs +++ b/src/pqarray/source.rs @@ -27,8 +27,6 @@ pub trait Source { fn buffer(&mut self, size: usize) -> usize; /// Write data from the last `buffer()` call. fn write(&mut self, row_group: &mut dyn RowGrouper) -> Result<(), ParquetError>; - /// Size hint from the underlying iterator. - fn size_hint(&self) -> (usize, Option); } fn single_type(name: &str, nullable: bool) -> Type { @@ -108,10 +106,6 @@ impl> Source for RowSource { fn write(&mut self, row_group: &mut dyn RowGrouper) -> Result<(), ParquetError> { R::write_buffer(&self.buffer, row_group, &self.def_levels[..self.count]) } - - fn size_hint(&self) -> (usize, Option) { - self.iter.size_hint() - } } pub struct NullableRowSource>> { @@ -182,10 +176,6 @@ impl>> Source for NullableRowSource< &self.def_levels[..self.def_levels.len()], ) } - - fn size_hint(&self) -> (usize, Option) { - self.iter.size_hint() - } } impl PqArrayRow for [P; N] { diff --git a/src/schema.rs b/src/schema.rs index d07e015..a298e56 100644 --- a/src/schema.rs +++ b/src/schema.rs @@ -81,7 +81,7 @@ impl Visitor for TweakSchema { // Change references to the generics Array_for_* to just Array. if let Some(r) = schema.reference.as_mut() { if r.starts_with("#/definitions/Array_for_") { - *r = "#/definitions/Array".to_owned(); + "#/definitions/Array".clone_into(r); } } // Then delegate to default implementation to visit any subschemas. diff --git a/src/version.rs b/src/version.rs index c7a4dde..f739245 100644 --- a/src/version.rs +++ b/src/version.rs @@ -20,7 +20,7 @@ pub const FORMAT_VERSION_MINOR: u32 = 0; /// /// This will always be `None` in release versions of the crate. Pre-release formats /// may contain experimental changes so can't be opened in by release versions. -pub const FORMAT_VERSION_PRERELEASE: Option<&str> = Some("alpha.2"); +pub const FORMAT_VERSION_PRERELEASE: Option<&str> = Some("beta.1"); /// Returns a string containing the file format version that this crate produces. pub fn format_version() -> String { diff --git a/tests/conversion_tests.rs b/tests/conversion_tests.rs index 3c6eb49..4a878f8 100644 --- a/tests/conversion_tests.rs +++ b/tests/conversion_tests.rs @@ -22,7 +22,7 @@ fn convert_omf1() { assert!(metadata.starts_with( r#"{ "OMF1 conversion": { - "by": "omf 0.1.0-alpha.2", + "by": "omf 0.1.0-beta.1", "from": "OMF-v0.9.0", "on": "# ));