Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: Make read_num_bytes a function instead of a macro #2364

Merged
merged 2 commits into from
Aug 9, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions parquet/src/column/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ use crate::column::reader::decoder::{
use crate::data_type::*;
use crate::errors::{ParquetError, Result};
use crate::schema::types::ColumnDescPtr;
use crate::util::bit_util::{ceil, num_required_bits};
use crate::util::bit_util::{ceil, num_required_bits, read_num_bytes};
use crate::util::memory::ByteBufferPtr;

pub(crate) mod decoder;
Expand Down Expand Up @@ -520,7 +520,7 @@ fn parse_v1_level(
match encoding {
Encoding::RLE => {
let i32_size = std::mem::size_of::<i32>();
let data_size = read_num_bytes!(i32, i32_size, buf.as_ref()) as usize;
let data_size = read_num_bytes::<i32>(i32_size, buf.as_ref()) as usize;
Ok((i32_size + data_size, buf.range(i32_size, data_size)))
}
Encoding::BIT_PACKED => {
Expand Down
25 changes: 5 additions & 20 deletions parquet/src/data_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -565,7 +565,7 @@ impl AsBytes for str {

pub(crate) mod private {
use crate::encodings::decoding::PlainDecoderDetails;
use crate::util::bit_util::{BitReader, BitWriter};
use crate::util::bit_util::{read_num_bytes, BitReader, BitWriter};
use crate::util::memory::ByteBufferPtr;

use crate::basic::Type;
Expand Down Expand Up @@ -907,21 +907,6 @@ pub(crate) mod private {
}
}

// TODO - Why does macro importing fail?
/// Reads `$size` of bytes from `$src`, and reinterprets them as type `$ty`, in
/// little-endian order. `$ty` must implement the `Default` trait. Otherwise this won't
/// compile.
/// This is copied and modified from byteorder crate.
macro_rules! read_num_bytes {
($ty:ty, $size:expr, $src:expr) => {{
assert!($size <= $src.len());
let mut buffer =
<$ty as $crate::util::bit_util::FromBytes>::Buffer::default();
buffer.as_mut()[..$size].copy_from_slice(&$src[..$size]);
<$ty>::from_ne_bytes(buffer)
}};
}

impl ParquetValueType for super::ByteArray {
const PHYSICAL_TYPE: Type = Type::BYTE_ARRAY;

Expand Down Expand Up @@ -961,17 +946,17 @@ pub(crate) mod private {
.as_mut()
.expect("set_data should have been called");
let num_values = std::cmp::min(buffer.len(), decoder.num_values);
for i in 0..num_values {
for val_array in buffer.iter_mut().take(num_values) {
let len: usize =
read_num_bytes!(u32, 4, data.start_from(decoder.start).as_ref())
read_num_bytes::<u32>(4, data.start_from(decoder.start).as_ref())
as usize;
decoder.start += std::mem::size_of::<u32>();

if data.len() < decoder.start + len {
return Err(eof_err!("Not enough bytes to decode"));
}

let val: &mut Self = buffer[i].as_mut_any().downcast_mut().unwrap();
let val: &mut Self = val_array.as_mut_any().downcast_mut().unwrap();

val.set_data(data.range(decoder.start, len));
decoder.start += len;
Expand All @@ -990,7 +975,7 @@ pub(crate) mod private {

for _ in 0..num_values {
let len: usize =
read_num_bytes!(u32, 4, data.start_from(decoder.start).as_ref())
read_num_bytes::<u32>(4, data.start_from(decoder.start).as_ref())
as usize;
decoder.start += std::mem::size_of::<u32>() + len;
}
Expand Down
2 changes: 1 addition & 1 deletion parquet/src/encodings/decoding.rs
Original file line number Diff line number Diff line change
Expand Up @@ -412,7 +412,7 @@ impl<T: DataType> Decoder<T> for RleValueDecoder<T> {

// We still need to remove prefix of i32 from the stream.
const I32_SIZE: usize = mem::size_of::<i32>();
let data_size = read_num_bytes!(i32, I32_SIZE, data.as_ref()) as usize;
let data_size = bit_util::read_num_bytes::<i32>(I32_SIZE, data.as_ref()) as usize;
self.decoder = RleDecoder::new(1);
self.decoder.set_data(data.range(I32_SIZE, data_size));
self.values_left = num_values;
Expand Down
4 changes: 2 additions & 2 deletions parquet/src/encodings/levels.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ use crate::basic::Encoding;
use crate::data_type::AsBytes;
use crate::errors::Result;
use crate::util::{
bit_util::{ceil, num_required_bits, BitReader, BitWriter},
bit_util::{ceil, num_required_bits, read_num_bytes, BitReader, BitWriter},
memory::ByteBufferPtr,
};

Expand Down Expand Up @@ -190,7 +190,7 @@ impl LevelDecoder {
LevelDecoder::Rle(ref mut num_values, ref mut decoder) => {
*num_values = Some(num_buffered_values);
let i32_size = mem::size_of::<i32>();
let data_size = read_num_bytes!(i32, i32_size, data.as_ref()) as usize;
let data_size = read_num_bytes::<i32>(i32_size, data.as_ref()) as usize;
decoder.set_data(data.range(i32_size, data_size));
i32_size + data_size
}
Expand Down
28 changes: 14 additions & 14 deletions parquet/src/util/bit_util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,17 +88,17 @@ impl FromBytes for bool {

from_le_bytes! { u8, u16, u32, u64, i8, i16, i32, i64, f32, f64 }

/// Reads `$size` of bytes from `$src`, and reinterprets them as type `$ty`, in
/// little-endian order. `$ty` must implement the `Default` trait. Otherwise this won't
/// compile.
/// Reads `size` of bytes from `src`, and reinterprets them as type `ty`, in
/// little-endian order.
/// This is copied and modified from byteorder crate.
macro_rules! read_num_bytes {
($ty:ty, $size:expr, $src:expr) => {{
assert!($size <= $src.len());
let mut buffer = <$ty as $crate::util::bit_util::FromBytes>::Buffer::default();
buffer.as_mut()[..$size].copy_from_slice(&$src[..$size]);
<$ty>::from_ne_bytes(buffer)
}};
pub(crate) fn read_num_bytes<T>(size: usize, src: &[u8]) -> T
where
T: FromBytes,
{
assert!(size <= src.len());
let mut buffer = <T as FromBytes>::Buffer::default();
buffer.as_mut()[..size].copy_from_slice(&src[..size]);
<T>::from_ne_bytes(buffer)
}

/// Converts value `val` of type `T` to a byte vector, by reading `num_bytes` from `val`.
Expand Down Expand Up @@ -383,7 +383,7 @@ impl BitReader {
pub fn new(buffer: ByteBufferPtr) -> Self {
let total_bytes = buffer.len();
let num_bytes = cmp::min(8, total_bytes);
let buffered_values = read_num_bytes!(u64, num_bytes, buffer.as_ref());
let buffered_values = read_num_bytes::<u64>(num_bytes, buffer.as_ref());
BitReader {
buffer,
buffered_values,
Expand All @@ -397,7 +397,7 @@ impl BitReader {
self.buffer = buffer;
self.total_bytes = self.buffer.len();
let num_bytes = cmp::min(8, self.total_bytes);
self.buffered_values = read_num_bytes!(u64, num_bytes, self.buffer.as_ref());
self.buffered_values = read_num_bytes::<u64>(num_bytes, self.buffer.as_ref());
self.byte_offset = 0;
self.bit_offset = 0;
}
Expand Down Expand Up @@ -666,7 +666,7 @@ impl BitReader {

// Advance byte_offset to next unread byte and read num_bytes
self.byte_offset += bytes_read;
let v = read_num_bytes!(T, num_bytes, self.buffer.data()[self.byte_offset..]);
let v = read_num_bytes::<T>(num_bytes, &self.buffer.data()[self.byte_offset..]);
self.byte_offset += num_bytes;

// Reset buffered_values
Expand Down Expand Up @@ -717,7 +717,7 @@ impl BitReader {
fn reload_buffer_values(&mut self) {
let bytes_to_read = cmp::min(self.total_bytes - self.byte_offset, 8);
self.buffered_values =
read_num_bytes!(u64, bytes_to_read, self.buffer.data()[self.byte_offset..]);
read_num_bytes::<u64>(bytes_to_read, &self.buffer.data()[self.byte_offset..]);
}
}

Expand Down