Skip to content

Commit

Permalink
[split/1] split datafusion-common module (#1751)
Browse files Browse the repository at this point in the history
* split datafusion-common module

* pyarrow

* Update datafusion-common/README.md

Co-authored-by: Andy Grove <[email protected]>

* Update datafusion/Cargo.toml

* include publishing

Co-authored-by: Andy Grove <[email protected]>
  • Loading branch information
jimexist and andygrove authored Feb 7, 2022
1 parent 40c29e5 commit fe46a1e
Show file tree
Hide file tree
Showing 8 changed files with 302 additions and 191 deletions.
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
[workspace]
members = [
"datafusion",
"datafusion-common",
"datafusion-cli",
"datafusion-examples",
"benchmarks",
Expand Down
44 changes: 44 additions & 0 deletions datafusion-common/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

[package]
name = "datafusion-common"
description = "DataFusion is an in-memory query engine that uses Apache Arrow as the memory model"
version = "6.0.0"
homepage = "https://github.com/apache/arrow-datafusion"
repository = "https://github.com/apache/arrow-datafusion"
readme = "README.md"
authors = ["Apache Arrow <[email protected]>"]
license = "Apache-2.0"
keywords = [ "arrow", "query", "sql" ]
edition = "2021"
rust-version = "1.58"

[lib]
name = "datafusion_common"
path = "src/lib.rs"

[features]
avro = ["avro-rs"]
pyarrow = ["pyo3"]

[dependencies]
arrow = { version = "8.0.0", features = ["prettyprint"] }
parquet = { version = "8.0.0", features = ["arrow"] }
avro-rs = { version = "0.13", features = ["snappy"], optional = true }
pyo3 = { version = "0.15", optional = true }
sqlparser = "0.13"
24 changes: 24 additions & 0 deletions datafusion-common/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
<!---
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->

# DataFusion Common

This is an internal module for the most fundamental types of [DataFusion][df].

[df]: https://crates.io/crates/datafusion
209 changes: 209 additions & 0 deletions datafusion-common/src/error.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,209 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

//! DataFusion error types
use std::error;
use std::fmt::{Display, Formatter};
use std::io;
use std::result;

use arrow::error::ArrowError;
#[cfg(feature = "avro")]
use avro_rs::Error as AvroError;
use parquet::errors::ParquetError;
#[cfg(feature = "pyarrow")]
use pyo3::exceptions::PyException;
#[cfg(feature = "pyarrow")]
use pyo3::prelude::PyErr;
use sqlparser::parser::ParserError;

/// Result type for operations that could result in an [DataFusionError]
pub type Result<T> = result::Result<T, DataFusionError>;

/// Error type for generic operations that could result in DataFusionError::External
pub type GenericError = Box<dyn error::Error + Send + Sync>;

/// DataFusion error
#[derive(Debug)]
pub enum DataFusionError {
/// Error returned by arrow.
ArrowError(ArrowError),
/// Wraps an error from the Parquet crate
ParquetError(ParquetError),
/// Wraps an error from the Avro crate
#[cfg(feature = "avro")]
AvroError(AvroError),
/// Error associated to I/O operations and associated traits.
IoError(io::Error),
/// Error returned when SQL is syntactically incorrect.
SQL(ParserError),
/// Error returned on a branch that we know it is possible
/// but to which we still have no implementation for.
/// Often, these errors are tracked in our issue tracker.
NotImplemented(String),
/// Error returned as a consequence of an error in DataFusion.
/// This error should not happen in normal usage of DataFusion.
// DataFusions has internal invariants that we are unable to ask the compiler to check for us.
// This error is raised when one of those invariants is not verified during execution.
Internal(String),
/// This error happens whenever a plan is not valid. Examples include
/// impossible casts, schema inference not possible and non-unique column names.
Plan(String),
/// Error returned during execution of the query.
/// Examples include files not found, errors in parsing certain types.
Execution(String),
/// This error is thrown when a consumer cannot acquire memory from the Memory Manager
/// we can just cancel the execution of the partition.
ResourcesExhausted(String),
/// Errors originating from outside DataFusion's core codebase.
/// For example, a custom S3Error from the crate datafusion-objectstore-s3
External(GenericError),
}

impl From<io::Error> for DataFusionError {
fn from(e: io::Error) -> Self {
DataFusionError::IoError(e)
}
}

impl From<ArrowError> for DataFusionError {
fn from(e: ArrowError) -> Self {
DataFusionError::ArrowError(e)
}
}

#[cfg(feature = "pyarrow")]
impl From<DataFusionError> for PyErr {
fn from(err: DataFusionError) -> PyErr {
PyException::new_err(err.to_string())
}
}

impl From<DataFusionError> for ArrowError {
fn from(e: DataFusionError) -> Self {
match e {
DataFusionError::ArrowError(e) => e,
DataFusionError::External(e) => ArrowError::ExternalError(e),
other => ArrowError::ExternalError(Box::new(other)),
}
}
}

impl From<ParquetError> for DataFusionError {
fn from(e: ParquetError) -> Self {
DataFusionError::ParquetError(e)
}
}

#[cfg(feature = "avro")]
impl From<AvroError> for DataFusionError {
fn from(e: AvroError) -> Self {
DataFusionError::AvroError(e)
}
}

impl From<ParserError> for DataFusionError {
fn from(e: ParserError) -> Self {
DataFusionError::SQL(e)
}
}

impl From<GenericError> for DataFusionError {
fn from(err: GenericError) -> Self {
DataFusionError::External(err)
}
}

impl Display for DataFusionError {
fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
match *self {
DataFusionError::ArrowError(ref desc) => write!(f, "Arrow error: {}", desc),
DataFusionError::ParquetError(ref desc) => {
write!(f, "Parquet error: {}", desc)
}
#[cfg(feature = "avro")]
DataFusionError::AvroError(ref desc) => {
write!(f, "Avro error: {}", desc)
}
DataFusionError::IoError(ref desc) => write!(f, "IO error: {}", desc),
DataFusionError::SQL(ref desc) => {
write!(f, "SQL error: {:?}", desc)
}
DataFusionError::NotImplemented(ref desc) => {
write!(f, "This feature is not implemented: {}", desc)
}
DataFusionError::Internal(ref desc) => {
write!(f, "Internal error: {}. This was likely caused by a bug in DataFusion's \
code and we would welcome that you file an bug report in our issue tracker", desc)
}
DataFusionError::Plan(ref desc) => {
write!(f, "Error during planning: {}", desc)
}
DataFusionError::Execution(ref desc) => {
write!(f, "Execution error: {}", desc)
}
DataFusionError::ResourcesExhausted(ref desc) => {
write!(f, "Resources exhausted: {}", desc)
}
DataFusionError::External(ref desc) => {
write!(f, "External error: {}", desc)
}
}
}
}

impl error::Error for DataFusionError {}

#[cfg(test)]
mod test {
use crate::error::DataFusionError;
use arrow::error::ArrowError;

#[test]
fn arrow_error_to_datafusion() {
let res = return_arrow_error().unwrap_err();
assert_eq!(
res.to_string(),
"External error: Error during planning: foo"
);
}

#[test]
fn datafusion_error_to_arrow() {
let res = return_datafusion_error().unwrap_err();
assert_eq!(res.to_string(), "Arrow error: Schema error: bar");
}

/// Model what happens when implementing SendableRecrordBatchStream:
/// DataFusion code needs to return an ArrowError
#[allow(clippy::try_err)]
fn return_arrow_error() -> arrow::error::Result<()> {
// Expect the '?' to work
let _foo = Err(DataFusionError::Plan("foo".to_string()))?;
Ok(())
}

/// Model what happens when using arrow kernels in DataFusion
/// code: need to turn an ArrowError into a DataFusionError
#[allow(clippy::try_err)]
fn return_datafusion_error() -> crate::error::Result<()> {
// Expect the '?' to work
let _bar = Err(ArrowError::SchemaError("bar".to_string()))?;
Ok(())
}
}
20 changes: 20 additions & 0 deletions datafusion-common/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

mod error;

pub use error::{DataFusionError, Result};
5 changes: 3 additions & 2 deletions datafusion/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -43,13 +43,14 @@ simd = ["arrow/simd"]
crypto_expressions = ["md-5", "sha2", "blake2", "blake3"]
regex_expressions = ["regex"]
unicode_expressions = ["unicode-segmentation"]
pyarrow = ["pyo3", "arrow/pyarrow"]
pyarrow = ["pyo3", "arrow/pyarrow", "datafusion-common/pyarrow"]
# Used for testing ONLY: causes all values to hash to the same value (test for collisions)
force_hash_collisions = []
# Used to enable the avro format
avro = ["avro-rs", "num-traits"]
avro = ["avro-rs", "num-traits", "datafusion-common/avro"]

[dependencies]
datafusion-common = { path = "../datafusion-common", version = "6.0.0" }
ahash = { version = "0.7", default-features = false }
hashbrown = { version = "0.12", features = ["raw"] }
arrow = { version = "8.0.0", features = ["prettyprint"] }
Expand Down
Loading

0 comments on commit fe46a1e

Please sign in to comment.