-
Notifications
You must be signed in to change notification settings - Fork 1.2k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[split/1] split datafusion-common module (#1751)
* split datafusion-common module * pyarrow * Update datafusion-common/README.md Co-authored-by: Andy Grove <[email protected]> * Update datafusion/Cargo.toml * include publishing Co-authored-by: Andy Grove <[email protected]>
- Loading branch information
Showing
8 changed files
with
302 additions
and
191 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
# Licensed to the Apache Software Foundation (ASF) under one | ||
# or more contributor license agreements. See the NOTICE file | ||
# distributed with this work for additional information | ||
# regarding copyright ownership. The ASF licenses this file | ||
# to you under the Apache License, Version 2.0 (the | ||
# "License"); you may not use this file except in compliance | ||
# with the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, | ||
# software distributed under the License is distributed on an | ||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
# KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations | ||
# under the License. | ||
|
||
[package] | ||
name = "datafusion-common" | ||
description = "DataFusion is an in-memory query engine that uses Apache Arrow as the memory model" | ||
version = "6.0.0" | ||
homepage = "https://github.com/apache/arrow-datafusion" | ||
repository = "https://github.com/apache/arrow-datafusion" | ||
readme = "README.md" | ||
authors = ["Apache Arrow <[email protected]>"] | ||
license = "Apache-2.0" | ||
keywords = [ "arrow", "query", "sql" ] | ||
edition = "2021" | ||
rust-version = "1.58" | ||
|
||
[lib] | ||
name = "datafusion_common" | ||
path = "src/lib.rs" | ||
|
||
[features] | ||
avro = ["avro-rs"] | ||
pyarrow = ["pyo3"] | ||
|
||
[dependencies] | ||
arrow = { version = "8.0.0", features = ["prettyprint"] } | ||
parquet = { version = "8.0.0", features = ["arrow"] } | ||
avro-rs = { version = "0.13", features = ["snappy"], optional = true } | ||
pyo3 = { version = "0.15", optional = true } | ||
sqlparser = "0.13" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
<!--- | ||
Licensed to the Apache Software Foundation (ASF) under one | ||
or more contributor license agreements. See the NOTICE file | ||
distributed with this work for additional information | ||
regarding copyright ownership. The ASF licenses this file | ||
to you under the Apache License, Version 2.0 (the | ||
"License"); you may not use this file except in compliance | ||
with the License. You may obtain a copy of the License at | ||
http://www.apache.org/licenses/LICENSE-2.0 | ||
Unless required by applicable law or agreed to in writing, | ||
software distributed under the License is distributed on an | ||
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
KIND, either express or implied. See the License for the | ||
specific language governing permissions and limitations | ||
under the License. | ||
--> | ||
|
||
# DataFusion Common | ||
|
||
This is an internal module for the most fundamental types of [DataFusion][df]. | ||
|
||
[df]: https://crates.io/crates/datafusion |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,209 @@ | ||
// Licensed to the Apache Software Foundation (ASF) under one | ||
// or more contributor license agreements. See the NOTICE file | ||
// distributed with this work for additional information | ||
// regarding copyright ownership. The ASF licenses this file | ||
// to you under the Apache License, Version 2.0 (the | ||
// "License"); you may not use this file except in compliance | ||
// with the License. You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, | ||
// software distributed under the License is distributed on an | ||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
// KIND, either express or implied. See the License for the | ||
// specific language governing permissions and limitations | ||
// under the License. | ||
|
||
//! DataFusion error types | ||
use std::error; | ||
use std::fmt::{Display, Formatter}; | ||
use std::io; | ||
use std::result; | ||
|
||
use arrow::error::ArrowError; | ||
#[cfg(feature = "avro")] | ||
use avro_rs::Error as AvroError; | ||
use parquet::errors::ParquetError; | ||
#[cfg(feature = "pyarrow")] | ||
use pyo3::exceptions::PyException; | ||
#[cfg(feature = "pyarrow")] | ||
use pyo3::prelude::PyErr; | ||
use sqlparser::parser::ParserError; | ||
|
||
/// Result type for operations that could result in an [DataFusionError] | ||
pub type Result<T> = result::Result<T, DataFusionError>; | ||
|
||
/// Error type for generic operations that could result in DataFusionError::External | ||
pub type GenericError = Box<dyn error::Error + Send + Sync>; | ||
|
||
/// DataFusion error | ||
#[derive(Debug)] | ||
pub enum DataFusionError { | ||
/// Error returned by arrow. | ||
ArrowError(ArrowError), | ||
/// Wraps an error from the Parquet crate | ||
ParquetError(ParquetError), | ||
/// Wraps an error from the Avro crate | ||
#[cfg(feature = "avro")] | ||
AvroError(AvroError), | ||
/// Error associated to I/O operations and associated traits. | ||
IoError(io::Error), | ||
/// Error returned when SQL is syntactically incorrect. | ||
SQL(ParserError), | ||
/// Error returned on a branch that we know it is possible | ||
/// but to which we still have no implementation for. | ||
/// Often, these errors are tracked in our issue tracker. | ||
NotImplemented(String), | ||
/// Error returned as a consequence of an error in DataFusion. | ||
/// This error should not happen in normal usage of DataFusion. | ||
// DataFusions has internal invariants that we are unable to ask the compiler to check for us. | ||
// This error is raised when one of those invariants is not verified during execution. | ||
Internal(String), | ||
/// This error happens whenever a plan is not valid. Examples include | ||
/// impossible casts, schema inference not possible and non-unique column names. | ||
Plan(String), | ||
/// Error returned during execution of the query. | ||
/// Examples include files not found, errors in parsing certain types. | ||
Execution(String), | ||
/// This error is thrown when a consumer cannot acquire memory from the Memory Manager | ||
/// we can just cancel the execution of the partition. | ||
ResourcesExhausted(String), | ||
/// Errors originating from outside DataFusion's core codebase. | ||
/// For example, a custom S3Error from the crate datafusion-objectstore-s3 | ||
External(GenericError), | ||
} | ||
|
||
impl From<io::Error> for DataFusionError { | ||
fn from(e: io::Error) -> Self { | ||
DataFusionError::IoError(e) | ||
} | ||
} | ||
|
||
impl From<ArrowError> for DataFusionError { | ||
fn from(e: ArrowError) -> Self { | ||
DataFusionError::ArrowError(e) | ||
} | ||
} | ||
|
||
#[cfg(feature = "pyarrow")] | ||
impl From<DataFusionError> for PyErr { | ||
fn from(err: DataFusionError) -> PyErr { | ||
PyException::new_err(err.to_string()) | ||
} | ||
} | ||
|
||
impl From<DataFusionError> for ArrowError { | ||
fn from(e: DataFusionError) -> Self { | ||
match e { | ||
DataFusionError::ArrowError(e) => e, | ||
DataFusionError::External(e) => ArrowError::ExternalError(e), | ||
other => ArrowError::ExternalError(Box::new(other)), | ||
} | ||
} | ||
} | ||
|
||
impl From<ParquetError> for DataFusionError { | ||
fn from(e: ParquetError) -> Self { | ||
DataFusionError::ParquetError(e) | ||
} | ||
} | ||
|
||
#[cfg(feature = "avro")] | ||
impl From<AvroError> for DataFusionError { | ||
fn from(e: AvroError) -> Self { | ||
DataFusionError::AvroError(e) | ||
} | ||
} | ||
|
||
impl From<ParserError> for DataFusionError { | ||
fn from(e: ParserError) -> Self { | ||
DataFusionError::SQL(e) | ||
} | ||
} | ||
|
||
impl From<GenericError> for DataFusionError { | ||
fn from(err: GenericError) -> Self { | ||
DataFusionError::External(err) | ||
} | ||
} | ||
|
||
impl Display for DataFusionError { | ||
fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { | ||
match *self { | ||
DataFusionError::ArrowError(ref desc) => write!(f, "Arrow error: {}", desc), | ||
DataFusionError::ParquetError(ref desc) => { | ||
write!(f, "Parquet error: {}", desc) | ||
} | ||
#[cfg(feature = "avro")] | ||
DataFusionError::AvroError(ref desc) => { | ||
write!(f, "Avro error: {}", desc) | ||
} | ||
DataFusionError::IoError(ref desc) => write!(f, "IO error: {}", desc), | ||
DataFusionError::SQL(ref desc) => { | ||
write!(f, "SQL error: {:?}", desc) | ||
} | ||
DataFusionError::NotImplemented(ref desc) => { | ||
write!(f, "This feature is not implemented: {}", desc) | ||
} | ||
DataFusionError::Internal(ref desc) => { | ||
write!(f, "Internal error: {}. This was likely caused by a bug in DataFusion's \ | ||
code and we would welcome that you file an bug report in our issue tracker", desc) | ||
} | ||
DataFusionError::Plan(ref desc) => { | ||
write!(f, "Error during planning: {}", desc) | ||
} | ||
DataFusionError::Execution(ref desc) => { | ||
write!(f, "Execution error: {}", desc) | ||
} | ||
DataFusionError::ResourcesExhausted(ref desc) => { | ||
write!(f, "Resources exhausted: {}", desc) | ||
} | ||
DataFusionError::External(ref desc) => { | ||
write!(f, "External error: {}", desc) | ||
} | ||
} | ||
} | ||
} | ||
|
||
impl error::Error for DataFusionError {} | ||
|
||
#[cfg(test)] | ||
mod test { | ||
use crate::error::DataFusionError; | ||
use arrow::error::ArrowError; | ||
|
||
#[test] | ||
fn arrow_error_to_datafusion() { | ||
let res = return_arrow_error().unwrap_err(); | ||
assert_eq!( | ||
res.to_string(), | ||
"External error: Error during planning: foo" | ||
); | ||
} | ||
|
||
#[test] | ||
fn datafusion_error_to_arrow() { | ||
let res = return_datafusion_error().unwrap_err(); | ||
assert_eq!(res.to_string(), "Arrow error: Schema error: bar"); | ||
} | ||
|
||
/// Model what happens when implementing SendableRecrordBatchStream: | ||
/// DataFusion code needs to return an ArrowError | ||
#[allow(clippy::try_err)] | ||
fn return_arrow_error() -> arrow::error::Result<()> { | ||
// Expect the '?' to work | ||
let _foo = Err(DataFusionError::Plan("foo".to_string()))?; | ||
Ok(()) | ||
} | ||
|
||
/// Model what happens when using arrow kernels in DataFusion | ||
/// code: need to turn an ArrowError into a DataFusionError | ||
#[allow(clippy::try_err)] | ||
fn return_datafusion_error() -> crate::error::Result<()> { | ||
// Expect the '?' to work | ||
let _bar = Err(ArrowError::SchemaError("bar".to_string()))?; | ||
Ok(()) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
// Licensed to the Apache Software Foundation (ASF) under one | ||
// or more contributor license agreements. See the NOTICE file | ||
// distributed with this work for additional information | ||
// regarding copyright ownership. The ASF licenses this file | ||
// to you under the Apache License, Version 2.0 (the | ||
// "License"); you may not use this file except in compliance | ||
// with the License. You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, | ||
// software distributed under the License is distributed on an | ||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
// KIND, either express or implied. See the License for the | ||
// specific language governing permissions and limitations | ||
// under the License. | ||
|
||
mod error; | ||
|
||
pub use error::{DataFusionError, Result}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.