Skip to content

Commit

Permalink
Re-export Arrow and Parquet crates from DataFusion (#39)
Browse files Browse the repository at this point in the history
* Re-export Arrow and Parquet crates

* Switch benchmarks crate to use DF-exported Arrow and Parquet deps

* Switch datafusion-examples crate to use DF-exported Arrow dep
  • Loading branch information
returnString authored Apr 24, 2021
1 parent 3cb83fe commit 9ba214a
Show file tree
Hide file tree
Showing 13 changed files with 40 additions and 38 deletions.
2 changes: 0 additions & 2 deletions benchmarks/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,6 @@ simd = ["datafusion/simd"]
snmalloc = ["snmalloc-rs"]

[dependencies]
arrow = { git = "https://github.com/apache/arrow-rs", rev = "c3fe3bab9905739fdda75301dab07a18c91731bd" }
parquet = { git = "https://github.com/apache/arrow-rs", rev = "c3fe3bab9905739fdda75301dab07a18c91731bd" }
datafusion = { path = "../datafusion" }
ballista = { path = "../ballista/rust/client" }
structopt = { version = "0.3", default-features = false }
Expand Down
5 changes: 3 additions & 2 deletions benchmarks/src/bin/nyctaxi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,9 @@ use std::path::PathBuf;
use std::process;
use std::time::Instant;

use arrow::datatypes::{DataType, Field, Schema};
use arrow::util::pretty;
use datafusion::arrow::datatypes::{DataType, Field, Schema};
use datafusion::arrow::util::pretty;

use datafusion::error::Result;
use datafusion::execution::context::{ExecutionConfig, ExecutionContext};

Expand Down
25 changes: 13 additions & 12 deletions benchmarks/src/bin/tpch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,17 +28,21 @@ use std::{

use futures::StreamExt;

use arrow::datatypes::{DataType, Field, Schema};
use arrow::util::pretty;
use ballista::context::BallistaContext;

use datafusion::arrow::datatypes::{DataType, Field, Schema};
use datafusion::arrow::record_batch::RecordBatch;
use datafusion::arrow::util::pretty;

use datafusion::datasource::parquet::ParquetTable;
use datafusion::datasource::{CsvFile, MemTable, TableProvider};
use datafusion::error::{DataFusionError, Result};
use datafusion::logical_plan::LogicalPlan;
use datafusion::physical_plan::collect;
use datafusion::prelude::*;
use parquet::basic::Compression;
use parquet::file::properties::WriterProperties;

use datafusion::parquet::basic::Compression;
use datafusion::parquet::file::properties::WriterProperties;
use structopt::StructOpt;

#[cfg(feature = "snmalloc")]
Expand Down Expand Up @@ -149,9 +153,7 @@ async fn main() -> Result<()> {
}
}

async fn benchmark_datafusion(
opt: BenchmarkOpt,
) -> Result<Vec<arrow::record_batch::RecordBatch>> {
async fn benchmark_datafusion(opt: BenchmarkOpt) -> Result<Vec<RecordBatch>> {
println!("Running benchmarks with the following options: {:?}", opt);
let config = ExecutionConfig::new()
.with_concurrency(opt.concurrency)
Expand Down Expand Up @@ -186,7 +188,7 @@ async fn benchmark_datafusion(

let mut millis = vec![];
// run benchmark
let mut result: Vec<arrow::record_batch::RecordBatch> = Vec::with_capacity(1);
let mut result: Vec<RecordBatch> = Vec::with_capacity(1);
for i in 0..opt.iterations {
let start = Instant::now();
let plan = create_logical_plan(&mut ctx, opt.query)?;
Expand Down Expand Up @@ -299,7 +301,7 @@ async fn execute_query(
ctx: &mut ExecutionContext,
plan: &LogicalPlan,
debug: bool,
) -> Result<Vec<arrow::record_batch::RecordBatch>> {
) -> Result<Vec<RecordBatch>> {
if debug {
println!("Logical plan:\n{:?}", plan);
}
Expand Down Expand Up @@ -523,9 +525,8 @@ mod tests {
use std::env;
use std::sync::Arc;

use arrow::array::*;
use arrow::record_batch::RecordBatch;
use arrow::util::display::array_value_to_string;
use datafusion::arrow::array::*;
use datafusion::arrow::util::display::array_value_to_string;

use datafusion::logical_plan::Expr;
use datafusion::logical_plan::Expr::Cast;
Expand Down
1 change: 0 additions & 1 deletion datafusion-examples/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ publish = false


[dev-dependencies]
arrow = { git = "https://github.com/apache/arrow-rs", rev = "c3fe3bab9905739fdda75301dab07a18c91731bd" }
arrow-flight = { git = "https://github.com/apache/arrow-rs", rev = "c3fe3bab9905739fdda75301dab07a18c91731bd" }
datafusion = { path = "../datafusion" }
prost = "0.7"
Expand Down
4 changes: 2 additions & 2 deletions datafusion-examples/examples/csv_sql.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.

use arrow::util::pretty;
use datafusion::arrow::util::pretty;

use datafusion::error::Result;
use datafusion::prelude::*;
Expand All @@ -27,7 +27,7 @@ async fn main() -> Result<()> {
// create local execution context
let mut ctx = ExecutionContext::new();

let testdata = arrow::util::test_util::arrow_test_data();
let testdata = datafusion::arrow::util::test_util::arrow_test_data();

// register csv file with the execution context
ctx.register_csv(
Expand Down
4 changes: 2 additions & 2 deletions datafusion-examples/examples/dataframe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.

use arrow::util::pretty;
use datafusion::arrow::util::pretty;

use datafusion::error::Result;
use datafusion::prelude::*;
Expand All @@ -27,7 +27,7 @@ async fn main() -> Result<()> {
// create local execution context
let mut ctx = ExecutionContext::new();

let testdata = arrow::util::test_util::parquet_test_data();
let testdata = datafusion::arrow::util::test_util::parquet_test_data();

let filename = &format!("{}/alltypes_plain.parquet", testdata);

Expand Down
8 changes: 4 additions & 4 deletions datafusion-examples/examples/dataframe_in_memory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@

use std::sync::Arc;

use arrow::array::{Int32Array, StringArray};
use arrow::datatypes::{DataType, Field, Schema};
use arrow::record_batch::RecordBatch;
use arrow::util::pretty;
use datafusion::arrow::array::{Int32Array, StringArray};
use datafusion::arrow::datatypes::{DataType, Field, Schema};
use datafusion::arrow::record_batch::RecordBatch;
use datafusion::arrow::util::pretty;

use datafusion::datasource::MemTable;
use datafusion::error::Result;
Expand Down
6 changes: 3 additions & 3 deletions datafusion-examples/examples/flight_client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@
use std::convert::TryFrom;
use std::sync::Arc;

use arrow::datatypes::Schema;
use arrow::util::pretty;
use datafusion::arrow::datatypes::Schema;
use datafusion::arrow::util::pretty;

use arrow_flight::flight_descriptor;
use arrow_flight::flight_service_client::FlightServiceClient;
Expand All @@ -31,7 +31,7 @@ use arrow_flight::{FlightDescriptor, Ticket};
/// This example is run along-side the example `flight_server`.
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let testdata = arrow::util::test_util::parquet_test_data();
let testdata = datafusion::arrow::util::test_util::parquet_test_data();

// Create Flight client
let mut client = FlightServiceClient::connect("http://localhost:50051").await?;
Expand Down
6 changes: 3 additions & 3 deletions datafusion-examples/examples/flight_server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ impl FlightService for FlightServiceImpl {

let table = ParquetTable::try_new(&request.path[0], num_cpus::get()).unwrap();

let options = arrow::ipc::writer::IpcWriteOptions::default();
let options = datafusion::arrow::ipc::writer::IpcWriteOptions::default();
let schema_result = arrow_flight::utils::flight_schema_from_arrow_schema(
table.schema().as_ref(),
&options,
Expand All @@ -87,7 +87,7 @@ impl FlightService for FlightServiceImpl {
// create local execution context
let mut ctx = ExecutionContext::new();

let testdata = arrow::util::test_util::parquet_test_data();
let testdata = datafusion::arrow::util::test_util::parquet_test_data();

// register parquet file with the execution context
ctx.register_parquet(
Expand All @@ -106,7 +106,7 @@ impl FlightService for FlightServiceImpl {
}

// add an initial FlightData message that sends schema
let options = arrow::ipc::writer::IpcWriteOptions::default();
let options = datafusion::arrow::ipc::writer::IpcWriteOptions::default();
let schema_flight_data =
arrow_flight::utils::flight_data_from_arrow_schema(
&df.schema().clone().into(),
Expand Down
4 changes: 2 additions & 2 deletions datafusion-examples/examples/parquet_sql.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.

use arrow::util::pretty;
use datafusion::arrow::util::pretty;

use datafusion::error::Result;
use datafusion::prelude::*;
Expand All @@ -27,7 +27,7 @@ async fn main() -> Result<()> {
// create local execution context
let mut ctx = ExecutionContext::new();

let testdata = arrow::util::test_util::parquet_test_data();
let testdata = datafusion::arrow::util::test_util::parquet_test_data();

// register parquet file with the execution context
ctx.register_parquet(
Expand Down
4 changes: 2 additions & 2 deletions datafusion-examples/examples/simple_udaf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

/// In this example we will declare a single-type, single return type UDAF that computes the geometric mean.
/// The geometric mean is described here: https://en.wikipedia.org/wiki/Geometric_mean
use arrow::{
use datafusion::arrow::{
array::Float32Array, array::Float64Array, datatypes::DataType,
record_batch::RecordBatch,
};
Expand All @@ -28,7 +28,7 @@ use std::sync::Arc;

// create local execution context with an in-memory table
fn create_context() -> Result<ExecutionContext> {
use arrow::datatypes::{Field, Schema};
use datafusion::arrow::datatypes::{Field, Schema};
use datafusion::datasource::MemTable;
// define a schema.
let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Float32, false)]));
Expand Down
4 changes: 2 additions & 2 deletions datafusion-examples/examples/simple_udf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.

use arrow::{
use datafusion::arrow::{
array::{ArrayRef, Float32Array, Float64Array},
datatypes::DataType,
record_batch::RecordBatch,
Expand All @@ -28,7 +28,7 @@ use std::sync::Arc;

// create local execution context with an in-memory table
fn create_context() -> Result<ExecutionContext> {
use arrow::datatypes::{Field, Schema};
use datafusion::arrow::datatypes::{Field, Schema};
use datafusion::datasource::MemTable;
// define a schema.
let schema = Arc::new(Schema::new(vec![
Expand Down
5 changes: 4 additions & 1 deletion datafusion/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,6 @@
//!
//! you can find examples of each of them in examples section.
extern crate arrow;
extern crate sqlparser;

pub mod catalog;
Expand All @@ -200,6 +199,10 @@ pub mod scalar;
pub mod sql;
pub mod variable;

// re-export dependencies from arrow-rs to minimise version maintenance for crate users
pub use arrow;
pub use parquet;

#[cfg(test)]
pub mod test;

Expand Down

0 comments on commit 9ba214a

Please sign in to comment.