diff --git a/Cargo.lock b/Cargo.lock index c780cafb..6965a3e4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1798,7 +1798,7 @@ checksum = "33ea5043e58958ee56f3e15a90aee535795cd7dfd319846288d93c5b57d85cbe" [[package]] name = "orc-rust" version = "0.3.1" -source = "git+https://github.com/harveyyue/datafusion-orc.git?rev=507600707a08c4bf7e2605a7ea40a5941456779d#507600707a08c4bf7e2605a7ea40a5941456779d" +source = "git+https://github.com/harveyyue/datafusion-orc.git?rev=f0ff4bcffa762b62e8c57ed4c2f6e1a9547b4abb#f0ff4bcffa762b62e8c57ed4c2f6e1a9547b4abb" dependencies = [ "arrow", "async-trait", diff --git a/Cargo.toml b/Cargo.toml index 7e9417c2..72286281 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -72,7 +72,7 @@ datafusion-expr = { git = "https://github.com/harveyyue/datafusion.git", rev = " datafusion-execution = { git = "https://github.com/harveyyue/datafusion.git", rev = "d33877f8fbc7c57de946dc6081b2b357eedd0df9"} datafusion-optimizer = { git = "https://github.com/harveyyue/datafusion.git", rev = "d33877f8fbc7c57de946dc6081b2b357eedd0df9"} datafusion-physical-expr = { git = "https://github.com/harveyyue/datafusion.git", rev = "d33877f8fbc7c57de946dc6081b2b357eedd0df9"} -orc-rust = { git = "https://github.com/harveyyue/datafusion-orc.git", rev = "507600707a08c4bf7e2605a7ea40a5941456779d"} +orc-rust = { git = "https://github.com/harveyyue/datafusion-orc.git", rev = "f0ff4bcffa762b62e8c57ed4c2f6e1a9547b4abb"} # arrow: branch=v50-blaze arrow = { git = "https://github.com/blaze-init/arrow-rs.git", rev = "7471d70f7ae6edd5d4da82b7d966a8ede720e499"} diff --git a/native-engine/datafusion-ext-plans/src/orc_exec.rs b/native-engine/datafusion-ext-plans/src/orc_exec.rs index bb8b2086..dfca0d9d 100644 --- a/native-engine/datafusion-ext-plans/src/orc_exec.rs +++ b/native-engine/datafusion-ext-plans/src/orc_exec.rs @@ -221,9 +221,13 @@ impl FileOpener for OrcOpener { let schema_adapter = SchemaAdapter::new(projected_schema); Ok(Box::pin(async move { - let builder = ArrowReaderBuilder::try_new_async(reader) + let mut builder = ArrowReaderBuilder::try_new_async(reader) .await .map_err(ArrowError::from)?; + if let Some(range) = file_meta.range.clone() { + let range = range.start as usize..range.end as usize; + builder = builder.with_file_byte_range(range); + } let file_schema = builder.schema(); let (schema_mapping, adapted_projections) = schema_adapter.map_schema(&file_schema)?; // Offset by 1 since index 0 is the root