From e3b724cb8e785b939520788ea4643310a650cd93 Mon Sep 17 00:00:00 2001
From: Tim Evans <tim.evans@aranzgeo.com>
Date: Wed, 29 May 2024 14:47:58 +1200
Subject: [PATCH] Increment to beta version, updated dependencies and the code
 that used them, fixed minor grammer errors.

---
 Cargo.toml                | 29 ++++++++++++++++-------------
 README.md                 |  4 ++--
 docs/index.md             |  2 +-
 docs/start.md             |  2 +-
 omf-c/Cargo.toml          |  2 +-
 omf.schema.json           |  2 +-
 src/date_time.rs          | 15 ++++++++-------
 src/file/image.rs         |  8 ++++----
 src/file/zip_container.rs |  6 +++---
 src/pqarray/array_type.rs |  8 ++++----
 src/pqarray/read.rs       | 16 +++++++++-------
 src/pqarray/source.rs     | 10 ----------
 src/schema.rs             |  2 +-
 src/version.rs            |  2 +-
 tests/conversion_tests.rs |  2 +-
 15 files changed, 53 insertions(+), 57 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index 24993fb..48e47be 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "omf"
-version = "0.1.0-alpha.2"
+version = "0.1.0-beta.1"
 description = "File reader and writer for Open Mining Format."
 authors = ["Tim Evans <tim.evans@seequent.com>"]
 license = "MIT"
@@ -34,15 +34,18 @@ omf1 = ["parquet"]
 members = ["omf-c"]
 
 [workspace.dependencies]
-bytes = "1.4.0"
-cbindgen = { version = "0.24.5", default-features = false }
-chrono = { version = "0.4.30", default-features = false, features = ["serde"] }
-flate2 = "1.0.27"
-image = { version = "0.24.7", default-features = false, features = ["png", "jpeg"]  }
-parquet = { version = "46.0.0", default-features = false, features = ["flate2"] }
-regex = "1.9.3"
-schemars = { version = "0.8.12", features = ["chrono"] }
-serde = { version = "1.0.188", features = ["derive"] }
-serde_json = { version = "1.0.107", features = ["float_roundtrip"] }
-thiserror = "1.0.47"
-zip = { version = "0.6.6", default-features = false }
+bytes = "1"
+cbindgen = { version = "0.26", default-features = false }
+chrono = { version = "0.4", default-features = false, features = ["serde"] }
+flate2 = "1.0"
+image = { version = "0.25", default-features = false, features = [
+    "png",
+    "jpeg",
+] }
+parquet = { version = "51", default-features = false, features = ["flate2"] }
+regex = "1"
+schemars = { version = "0.8", features = ["chrono"] }
+serde = { version = "1", features = ["derive"] }
+serde_json = { version = "1", features = ["float_roundtrip"] }
+thiserror = "1"
+zip = { version = "2", default-features = false }
diff --git a/README.md b/README.md
index 2305aeb..bd5924b 100644
--- a/README.md
+++ b/README.md
@@ -6,9 +6,9 @@
 A library for reading and writing files in Open Mining Format 2.0.
 Also supports translating OMF 1 files to OMF 2.
 
-OMF file version: 2.0-alpha.2
+OMF file version: 2.0-beta.1
 
-Crate version: 0.1.0-alpha.2
+Crate version: 0.1.0-beta.1
 
 **Warning:** this is pre-release code.
 
diff --git a/docs/index.md b/docs/index.md
index dd776ee..2f7a539 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -1,6 +1,6 @@
 # Home
 
-Version 0.1.0-alpha.1
+Version 0.1.0-beta.1
 
 Specification and library for Open Mining Format version 2,
 a standard for mining data interchange backed by the
diff --git a/docs/start.md b/docs/start.md
index 4cd2180..ef70856 100644
--- a/docs/start.md
+++ b/docs/start.md
@@ -19,7 +19,7 @@ and contains details for linking them together into rich objects.
 Images may use PNG or JPEG encoding, while arrays use Apache Parquet encoding.
 
 > WARNING: 
-> When OMF files, beware of "zip bombs" where data is maliciously crafted to expand to an
+> When reading OMF files, beware of "zip bombs" where data is maliciously crafted to expand to an
 > excessive size when decompressed, leading to a potential denial of service attack.
 > Use the limits provided by the C and Rust APIs, and check sizes before allocating memory.
 
diff --git a/omf-c/Cargo.toml b/omf-c/Cargo.toml
index bd52ce0..ff77c74 100644
--- a/omf-c/Cargo.toml
+++ b/omf-c/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "omfc"
-version = "0.1.0-alpha.2"
+version = "0.1.0-beta.1"
 description = "C bindings for `omf`."
 authors = ["Tim Evans <tim.evans@seequent.com>"]
 license = "MIT"
diff --git a/omf.schema.json b/omf.schema.json
index 835d7d9..1697e12 100644
--- a/omf.schema.json
+++ b/omf.schema.json
@@ -1,7 +1,7 @@
 {
   "$schema": "https://json-schema.org/draft/2019-09/schema",
   "$id": "https://github.com/gmggroup/omf-rust/blob/main/omf.schema.json",
-  "title": "Open Mining Format 2.0-alpha.2",
+  "title": "Open Mining Format 2.0-beta.1",
   "type": "object",
   "required": [
     "date"
diff --git a/src/date_time.rs b/src/date_time.rs
index 67be039..1a12ba5 100644
--- a/src/date_time.rs
+++ b/src/date_time.rs
@@ -1,6 +1,6 @@
 //! Utility functions for date and date-time conversion.
 
-use chrono::{DateTime, Duration, NaiveDate, NaiveDateTime, TimeZone, Utc};
+use chrono::{DateTime, Duration, NaiveDate, TimeZone, Utc};
 
 /// Convert a date to the number of days since the epoch.
 pub fn date_to_f64(date: NaiveDate) -> f64 {
@@ -29,8 +29,8 @@ pub fn date_time_to_i64(date_time: DateTime<Utc>) -> i64 {
 
 /// Convert a number of days since the epoch back to a date.
 pub fn i64_to_date(value: i64) -> NaiveDate {
-    NaiveDate::default()
-        .checked_add_signed(Duration::days(value))
+    Duration::try_days(value)
+        .and_then(|d| NaiveDate::default().checked_add_signed(d))
         .unwrap_or(if value < 0 {
             NaiveDate::MIN
         } else {
@@ -51,8 +51,8 @@ pub fn i64_to_date_time(value: i64) -> DateTime<Utc> {
 
 /// Convert a number of milliseconds since the epoch back to a date.
 pub fn i64_milli_to_date_time(value: i64) -> DateTime<Utc> {
-    DateTime::<Utc>::default()
-        .checked_add_signed(Duration::milliseconds(value))
+    Duration::try_milliseconds(value)
+        .and_then(|d| DateTime::<Utc>::default().checked_add_signed(d))
         .unwrap_or(if value < 0 {
             DateTime::<Utc>::MIN_UTC
         } else {
@@ -76,7 +76,8 @@ pub fn utc_now() -> DateTime<Utc> {
     let now = std::time::SystemTime::now()
         .duration_since(std::time::UNIX_EPOCH)
         .expect("valid system time");
-    let naive = NaiveDateTime::from_timestamp_opt(now.as_secs() as i64, now.subsec_nanos())
-        .expect("valid timestamp");
+    let naive = DateTime::from_timestamp(now.as_secs() as i64, now.subsec_nanos())
+        .expect("valid timestamp")
+        .naive_utc();
     Utc.from_utc_datetime(&naive)
 }
diff --git a/src/file/image.rs b/src/file/image.rs
index 46539ec..e773168 100644
--- a/src/file/image.rs
+++ b/src/file/image.rs
@@ -33,7 +33,7 @@ impl Writer {
         image: &image::DynamicImage,
     ) -> Result<Array<array_type::Image>, Error> {
         let mut bytes = Vec::new();
-        image.write_to(&mut Cursor::new(&mut bytes), image::ImageOutputFormat::Png)?;
+        image.write_to(&mut Cursor::new(&mut bytes), image::ImageFormat::Png)?;
         self.image_bytes(&bytes)
     }
 
@@ -53,10 +53,10 @@ impl Writer {
         quality: u8,
     ) -> Result<Array<array_type::Image>, Error> {
         let mut bytes = Vec::new();
-        image.write_to(
+        image.write_with_encoder(image::codecs::jpeg::JpegEncoder::new_with_quality(
             &mut Cursor::new(&mut bytes),
-            image::ImageOutputFormat::Jpeg(quality.clamp(1, 100)),
-        )?;
+            quality.clamp(1, 100),
+        ))?;
         self.image_bytes(&bytes)
     }
 }
diff --git a/src/file/zip_container.rs b/src/file/zip_container.rs
index 62489d6..6c93abc 100644
--- a/src/file/zip_container.rs
+++ b/src/file/zip_container.rs
@@ -6,7 +6,7 @@ use std::{
 
 use zip::{
     read::{ZipArchive, ZipFile},
-    write::{FileOptions, ZipWriter},
+    write::{FullFileOptions, ZipWriter},
 };
 
 use crate::{error::Error, FORMAT_NAME};
@@ -54,8 +54,8 @@ impl Builder {
             FileType::Jpeg => format!("{}{JPEG_EXT}", self.id()),
         };
         self.zip_writer.start_file(
-            &name,
-            FileOptions::default()
+            name.clone(),
+            FullFileOptions::default()
                 .large_file(true)
                 .compression_method(zip::CompressionMethod::Stored),
         )?;
diff --git a/src/pqarray/array_type.rs b/src/pqarray/array_type.rs
index bed36dd..27f57ae 100644
--- a/src/pqarray/array_type.rs
+++ b/src/pqarray/array_type.rs
@@ -249,9 +249,9 @@ mod tests {
     #[test]
     fn date_overflow() {
         let min = NaiveDate::from_parquet(i32::MIN, &None);
-        assert_eq!(min.to_string(), "-262144-01-01");
+        assert_eq!(min.to_string(), "-262143-01-01");
         let max = NaiveDate::from_parquet(i32::MAX, &None);
-        assert_eq!(max.to_string(), "+262143-12-31");
+        assert_eq!(max.to_string(), "+262142-12-31");
     }
 
     #[test]
@@ -281,8 +281,8 @@ mod tests {
     #[test]
     fn date_time_overflow() {
         let min = DateTime::<Utc>::from_parquet(i64::MIN, &DATE_TIME_MILLI);
-        assert_eq!(min.to_string(), "-262144-01-01 00:00:00 UTC");
+        assert_eq!(min.to_string(), "-262143-01-01 00:00:00 UTC");
         let max = DateTime::<Utc>::from_parquet(i64::MAX, &DATE_TIME_MILLI);
-        assert_eq!(max.to_string(), "+262143-12-31 23:59:59.999999999 UTC");
+        assert_eq!(max.to_string(), "+262142-12-31 23:59:59.999999999 UTC");
     }
 }
diff --git a/src/pqarray/read.rs b/src/pqarray/read.rs
index 0840dda..6de778c 100644
--- a/src/pqarray/read.rs
+++ b/src/pqarray/read.rs
@@ -96,12 +96,14 @@ impl Counter {
 /// Reads a block of data from a column reader.
 fn read_column_chunk<D: DataType>(
     column: &mut ColumnReaderImpl<D>,
-    values: &mut [D::T],
-    def_levels: Option<&mut [i16]>,
+    values: &mut Vec<D::T>,
+    mut def_levels: Option<&mut Vec<i16>>,
 ) -> Result<(usize, usize), Error> {
-    let mut max_records = values.len();
-    if let Some(d) = &def_levels {
-        max_records = max_records.max(d.len());
+    values.clear();
+    let mut max_records = values.capacity();
+    if let Some(d) = &mut def_levels {
+        d.clear();
+        max_records = max_records.max(d.capacity());
     }
     let (n_val, n_def, _n_rep) = column.read_records(max_records, def_levels, None, values)?;
     Ok((n_val, n_def))
@@ -193,8 +195,8 @@ impl<P: PqArrayType> GroupValues<P> for NullableGroupValues<P> {
             len: 0,
             index: Counter::new(),
             value_index: Counter::new(),
-            values: vec![Default::default(); CHUNK_SIZE],
-            def_levels: vec![0; CHUNK_SIZE],
+            values: Vec::with_capacity(CHUNK_SIZE),
+            def_levels: Vec::with_capacity(CHUNK_SIZE),
             logical_type,
         }
     }
diff --git a/src/pqarray/source.rs b/src/pqarray/source.rs
index 73ecb89..eeef2d8 100644
--- a/src/pqarray/source.rs
+++ b/src/pqarray/source.rs
@@ -27,8 +27,6 @@ pub trait Source {
     fn buffer(&mut self, size: usize) -> usize;
     /// Write data from the last `buffer()` call.
     fn write(&mut self, row_group: &mut dyn RowGrouper) -> Result<(), ParquetError>;
-    /// Size hint from the underlying iterator.
-    fn size_hint(&self) -> (usize, Option<usize>);
 }
 
 fn single_type<P: PqArrayType>(name: &str, nullable: bool) -> Type {
@@ -108,10 +106,6 @@ impl<R: PqArrayRow, I: Iterator<Item = R>> Source for RowSource<R, I> {
     fn write(&mut self, row_group: &mut dyn RowGrouper) -> Result<(), ParquetError> {
         R::write_buffer(&self.buffer, row_group, &self.def_levels[..self.count])
     }
-
-    fn size_hint(&self) -> (usize, Option<usize>) {
-        self.iter.size_hint()
-    }
 }
 
 pub struct NullableRowSource<R: PqArrayRow, I: Iterator<Item = Option<R>>> {
@@ -182,10 +176,6 @@ impl<R: PqArrayRow, I: Iterator<Item = Option<R>>> Source for NullableRowSource<
             &self.def_levels[..self.def_levels.len()],
         )
     }
-
-    fn size_hint(&self) -> (usize, Option<usize>) {
-        self.iter.size_hint()
-    }
 }
 
 impl<P: PqArrayType, const N: usize> PqArrayRow for [P; N] {
diff --git a/src/schema.rs b/src/schema.rs
index d07e015..a298e56 100644
--- a/src/schema.rs
+++ b/src/schema.rs
@@ -81,7 +81,7 @@ impl Visitor for TweakSchema {
         // Change references to the generics Array_for_* to just Array.
         if let Some(r) = schema.reference.as_mut() {
             if r.starts_with("#/definitions/Array_for_") {
-                *r = "#/definitions/Array".to_owned();
+                "#/definitions/Array".clone_into(r);
             }
         }
         // Then delegate to default implementation to visit any subschemas.
diff --git a/src/version.rs b/src/version.rs
index c7a4dde..f739245 100644
--- a/src/version.rs
+++ b/src/version.rs
@@ -20,7 +20,7 @@ pub const FORMAT_VERSION_MINOR: u32 = 0;
 ///
 /// This will always be `None` in release versions of the crate. Pre-release formats
 /// may contain experimental changes so can't be opened in by release versions.
-pub const FORMAT_VERSION_PRERELEASE: Option<&str> = Some("alpha.2");
+pub const FORMAT_VERSION_PRERELEASE: Option<&str> = Some("beta.1");
 
 /// Returns a string containing the file format version that this crate produces.
 pub fn format_version() -> String {
diff --git a/tests/conversion_tests.rs b/tests/conversion_tests.rs
index 3c6eb49..4a878f8 100644
--- a/tests/conversion_tests.rs
+++ b/tests/conversion_tests.rs
@@ -22,7 +22,7 @@ fn convert_omf1() {
     assert!(metadata.starts_with(
         r#"{
   "OMF1 conversion": {
-    "by": "omf 0.1.0-alpha.2",
+    "by": "omf 0.1.0-beta.1",
     "from": "OMF-v0.9.0",
     "on": "#
     ));