diff --git a/parquet_integration/write_parquet.py b/parquet_integration/write_parquet.py index bf2e39cb678..6c10f8b5f00 100644 --- a/parquet_integration/write_parquet.py +++ b/parquet_integration/write_parquet.py @@ -32,6 +32,7 @@ def case_basic_nullable(size=1): pa.field("timestamp_us", pa.timestamp("us")), pa.field("timestamp_s", pa.timestamp("s")), pa.field("emoji", pa.utf8()), + pa.field("timestamp_s_utc", pa.timestamp("s", "UTC")), ] schema = pa.schema(fields) @@ -50,6 +51,7 @@ def case_basic_nullable(size=1): "timestamp_us": int64 * size, "timestamp_s": int64 * size, "emoji": emoji * size, + "timestamp_s_utc": int64 * size, }, schema, f"basic_nullable_{size*10}.parquet", diff --git a/src/io/parquet/read/deserialize/mod.rs b/src/io/parquet/read/deserialize/mod.rs index fe870d904d2..4e8a6de31c3 100644 --- a/src/io/parquet/read/deserialize/mod.rs +++ b/src/io/parquet/read/deserialize/mod.rs @@ -79,7 +79,7 @@ pub fn page_iter_to_arrays<'a, I: 'a + DataPages>( primitive::Iter::new(pages, data_type, chunk_size, |x: i32| x as i32), )), - Timestamp(time_unit, None) => { + Timestamp(time_unit, _) => { let time_unit = *time_unit; return timestamp( pages, @@ -150,9 +150,12 @@ pub fn page_iter_to_arrays<'a, I: 'a + DataPages>( }, // INT64 - Int64 | Date64 | Time64(_) | Duration(_) | Timestamp(_, _) => dyn_iter(iden( - primitive::Iter::new(pages, data_type, chunk_size, |x: i64| x as i64), - )), + Int64 | Date64 | Time64(_) | Duration(_) => dyn_iter(iden(primitive::Iter::new( + pages, + data_type, + chunk_size, + |x: i64| x as i64, + ))), UInt64 => dyn_iter(iden(primitive::Iter::new( pages, data_type, @@ -444,7 +447,7 @@ fn dict_read<'a, K: DictionaryKey, I: 'a + DataPages>( }), ), - Timestamp(time_unit, None) => { + Timestamp(time_unit, _) => { let time_unit = *time_unit; return timestamp_dict::( iter, @@ -456,7 +459,7 @@ fn dict_read<'a, K: DictionaryKey, I: 'a + DataPages>( ); } - Int64 | Date64 | Time64(_) | Duration(_) | Timestamp(_, _) => dyn_iter( + Int64 | Date64 | Time64(_) | Duration(_) => dyn_iter( primitive::DictIter::::new(iter, data_type, chunk_size, |x: i64| x), ), Float32 => dyn_iter(primitive::DictIter::::new( diff --git a/tests/it/io/parquet/mod.rs b/tests/it/io/parquet/mod.rs index d5446a8c8b9..8eb009204b2 100644 --- a/tests/it/io/parquet/mod.rs +++ b/tests/it/io/parquet/mod.rs @@ -330,6 +330,12 @@ pub fn pyarrow_nullable(column: usize) -> Box { 11 => Box::new( PrimitiveArray::::from(i64_values).to(DataType::Timestamp(TimeUnit::Second, None)), ), + 13 => Box::new( + PrimitiveArray::::from(i64_values).to(DataType::Timestamp( + TimeUnit::Second, + Some("UTC".to_string()), + )), + ), _ => unreachable!(), } } @@ -415,6 +421,13 @@ pub fn pyarrow_nullable_statistics(column: usize) -> Option> min_value: Some(0), max_value: Some(9), }), + 13 => Box::new(PrimitiveStatistics:: { + data_type: DataType::Timestamp(TimeUnit::Second, Some("UTC".to_string())), + distinct_count: None, + null_count: Some(3), + min_value: Some(0), + max_value: Some(9), + }), _ => unreachable!(), }) } diff --git a/tests/it/io/parquet/read.rs b/tests/it/io/parquet/read.rs index 166becd593f..90d61f0a97c 100644 --- a/tests/it/io/parquet/read.rs +++ b/tests/it/io/parquet/read.rs @@ -355,6 +355,11 @@ fn v1_timestamp_s_nullable_dict() -> Result<()> { test_pyarrow_integration(11, 1, "basic", true, false, None) } +#[test] +fn v1_timestamp_s_utc_nullable() -> Result<()> { + test_pyarrow_integration(13, 1, "basic", false, false, None) +} + #[test] fn v2_decimal_26_required() -> Result<()> { test_pyarrow_integration(8, 2, "basic", false, true, None)