Skip to content

Commit

Permalink
Merge 5270354 into 85d5363
Browse files Browse the repository at this point in the history
  • Loading branch information
Brent Gardner authored Aug 8, 2022
2 parents 85d5363 + 5270354 commit a96911d
Show file tree
Hide file tree
Showing 66 changed files with 613 additions and 453 deletions.
76 changes: 73 additions & 3 deletions .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,16 @@ jobs:
- uses: actions/checkout@v2
with:
submodules: true
- name: Install protobuf compiler
shell: bash
run: |
mkdir -p $HOME/d/protoc
cd $HOME/d/protoc
export PROTO_ZIP="protoc-21.4-linux-x86_64.zip"
curl -LO https://github.com/protocolbuffers/protobuf/releases/download/v21.4/$PROTO_ZIP
unzip $PROTO_ZIP
export PATH=$PATH:$HOME/d/protoc/bin
protoc --version
- name: Cache Cargo
uses: actions/cache@v3
with:
Expand All @@ -94,6 +104,7 @@ jobs:
rust-version: ${{ matrix.rust }}
- name: Run tests
run: |
export PATH=$PATH:$HOME/d/protoc/bin
cargo test --features avro,jit,scheduler,json
# test datafusion-sql examples
cargo run --example sql
Expand Down Expand Up @@ -159,17 +170,65 @@ jobs:
POSTGRES_USER: postgres
POSTGRES_PASSWORD: postgres

windows-and-macos:
name: Test on ${{ matrix.os }} Rust ${{ matrix.rust }}
windows:
name: Test on Windows Rust ${{ matrix.rust }}
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [windows-latest, macos-latest]
os: [windows-latest]
rust: [stable]
steps:
- uses: actions/checkout@v2
with:
submodules: true
- name: Install protobuf compiler
shell: bash
run: |
mkdir -p $HOME/d/protoc
cd $HOME/d/protoc
export PROTO_ZIP="protoc-21.4-win64.zip"
curl -LO https://github.com/protocolbuffers/protobuf/releases/download/v21.4/$PROTO_ZIP
unzip $PROTO_ZIP
export PATH=$PATH:$HOME/d/protoc/bin
protoc.exe --version
# TODO: this won't cache anything, which is expensive. Setup this action
# with a OS-dependent path.
- name: Setup Rust toolchain
run: |
rustup toolchain install ${{ matrix.rust }}
rustup default ${{ matrix.rust }}
rustup component add rustfmt
- name: Run tests
shell: bash
run: |
export PATH=$PATH:$HOME/d/protoc/bin
cargo test
env:
# do not produce debug symbols to keep memory usage down
RUSTFLAGS: "-C debuginfo=0"

macos:
name: Test on MacOS Rust ${{ matrix.rust }}
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [macos-latest]
rust: [stable]
steps:
- uses: actions/checkout@v2
with:
submodules: true
- name: Install protobuf compiler
shell: bash
run: |
mkdir -p $HOME/d/protoc
cd $HOME/d/protoc
export PROTO_ZIP="protoc-21.4-osx-x86_64.zip"
curl -LO https://github.com/protocolbuffers/protobuf/releases/download/v21.4/$PROTO_ZIP
unzip $PROTO_ZIP
echo "$HOME/d/protoc/bin" >> $GITHUB_PATH
export PATH=$PATH:$HOME/d/protoc/bin
protoc --version
# TODO: this won't cache anything, which is expensive. Setup this action
# with a OS-dependent path.
- name: Setup Rust toolchain
Expand Down Expand Up @@ -250,6 +309,16 @@ jobs:
- uses: actions/checkout@v2
with:
submodules: true
- name: Install protobuf compiler
shell: bash
run: |
mkdir -p $HOME/d/protoc
cd $HOME/d/protoc
export PROTO_ZIP="protoc-21.4-linux-x86_64.zip"
curl -LO https://github.com/protocolbuffers/protobuf/releases/download/v21.4/$PROTO_ZIP
unzip $PROTO_ZIP
export PATH=$PATH:$HOME/d/protoc/bin
protoc --version
- name: Setup Rust toolchain
run: |
rustup toolchain install ${{ matrix.rust }}
Expand All @@ -263,6 +332,7 @@ jobs:
key: cargo-coverage-cache3-
- name: Run coverage
run: |
export PATH=$PATH:$HOME/d/protoc/bin
rustup toolchain install stable
rustup default stable
cargo install --version 0.20.1 cargo-tarpaulin
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ simd = ["datafusion/simd"]
snmalloc = ["snmalloc-rs"]

[dependencies]
datafusion = { path = "../datafusion/core" }
datafusion = { path = "../datafusion/core", features = [], optional = false }
env_logger = "0.9"
futures = "0.3"
mimalloc = { version = "0.1", optional = true, default-features = false }
Expand Down
4 changes: 2 additions & 2 deletions datafusion-cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@ rust-version = "1.59"
readme = "README.md"

[dependencies]
arrow = { version = "19.0.0" }
arrow = { git = "https://github.com/apache/arrow-rs.git", rev = "30c94dbf1c422f81f8520b9956e96ab7b53c3f47", features = [], optional = false }
clap = { version = "3", features = ["derive", "cargo"] }
datafusion = { path = "../datafusion/core", version = "10.0.0" }
datafusion = { path = "../datafusion/core", features = [], optional = false }
dirs = "4.0.0"
env_logger = "0.9"
mimalloc = { version = "0.1", default-features = false }
Expand Down
8 changes: 4 additions & 4 deletions datafusion-examples/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,13 @@ path = "examples/avro_sql.rs"
required-features = ["datafusion/avro"]

[dev-dependencies]
arrow-flight = { version = "19.0.0" }
arrow-flight = { git = "https://github.com/apache/arrow-rs.git", rev = "30c94dbf1c422f81f8520b9956e96ab7b53c3f47", features = [], optional = false }
async-trait = "0.1.41"
datafusion = { path = "../datafusion/core" }
datafusion = { path = "../datafusion/core", features = [], optional = false }
futures = "0.3"
num_cpus = "1.13.0"
prost = "0.10"
prost = "0.11.0"
serde = { version = "1.0.136", features = ["derive"] }
serde_json = "1.0.82"
tokio = { version = "1.0", features = ["macros", "rt", "rt-multi-thread", "sync", "parking_lot"] }
tonic = "0.7"
tonic = "0.8"
7 changes: 4 additions & 3 deletions datafusion/common/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,12 @@ pyarrow = ["pyo3"]

[dependencies]
apache-avro = { version = "0.14", features = ["snappy"], optional = true }
arrow = { version = "19.0.0", features = ["prettyprint"] }
arrow = { git = "https://github.com/apache/arrow-rs.git", rev = "30c94dbf1c422f81f8520b9956e96ab7b53c3f47", features = ["prettyprint"], optional = false }
avro-rs = { version = "0.13", features = ["snappy"], optional = true }
cranelift-module = { version = "0.86.1", optional = true }
object_store = { version = "0.3", optional = true }
object_store = { git = "https://github.com/apache/arrow-rs.git", rev = "30c94dbf1c422f81f8520b9956e96ab7b53c3f47", features = [], optional = true }
ordered-float = "3.0"
parquet = { version = "19.0.0", features = ["arrow"], optional = true }
parquet = { git = "https://github.com/apache/arrow-rs.git", rev = "30c94dbf1c422f81f8520b9956e96ab7b53c3f47", features = ["arrow"], optional = true }
pyo3 = { version = "0.16", optional = true }
serde_json = "1.0"
sqlparser = "0.20"
22 changes: 11 additions & 11 deletions datafusion/common/src/scalar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ use arrow::{
IntervalMonthDayNanoType, IntervalUnit, IntervalYearMonthType, TimeUnit,
TimestampMicrosecondType, TimestampMillisecondType, TimestampNanosecondType,
TimestampSecondType, UInt16Type, UInt32Type, UInt64Type, UInt8Type,
DECIMAL_MAX_PRECISION,
DECIMAL128_MAX_PRECISION,
},
util::decimal::{BasicDecimal, Decimal128},
};
Expand Down Expand Up @@ -611,7 +611,7 @@ impl ScalarValue {
scale: usize,
) -> Result<Self> {
// make sure the precision and scale is valid
if precision <= DECIMAL_MAX_PRECISION && scale <= precision {
if precision <= DECIMAL128_MAX_PRECISION && scale <= precision {
return Ok(ScalarValue::Decimal128(Some(value), precision, scale));
}
Err(DataFusionError::Internal(format!(
Expand Down Expand Up @@ -654,7 +654,7 @@ impl ScalarValue {
ScalarValue::Int32(_) => DataType::Int32,
ScalarValue::Int64(_) => DataType::Int64,
ScalarValue::Decimal128(_, precision, scale) => {
DataType::Decimal(*precision, *scale)
DataType::Decimal128(*precision, *scale)
}
ScalarValue::TimestampSecond(_, tz_opt) => {
DataType::Timestamp(TimeUnit::Second, tz_opt.clone())
Expand Down Expand Up @@ -935,7 +935,7 @@ impl ScalarValue {
}

let array: ArrayRef = match &data_type {
DataType::Decimal(precision, scale) => {
DataType::Decimal128(precision, scale) => {
let decimal_array =
ScalarValue::iter_to_decimal_array(scalars, precision, scale)?;
Arc::new(decimal_array)
Expand Down Expand Up @@ -1448,7 +1448,7 @@ impl ScalarValue {

Ok(match array.data_type() {
DataType::Null => ScalarValue::Null,
DataType::Decimal(precision, scale) => {
DataType::Decimal128(precision, scale) => {
ScalarValue::get_decimal_value_from_array(array, index, precision, scale)
}
DataType::Boolean => typed_cast!(array, index, BooleanArray, Boolean),
Expand Down Expand Up @@ -1899,7 +1899,7 @@ impl TryFrom<&DataType> for ScalarValue {
DataType::UInt16 => ScalarValue::UInt16(None),
DataType::UInt32 => ScalarValue::UInt32(None),
DataType::UInt64 => ScalarValue::UInt64(None),
DataType::Decimal(precision, scale) => {
DataType::Decimal128(precision, scale) => {
ScalarValue::Decimal128(None, *precision, *scale)
}
DataType::Utf8 => ScalarValue::Utf8(None),
Expand Down Expand Up @@ -2145,7 +2145,7 @@ mod tests {
#[test]
fn scalar_decimal_test() {
let decimal_value = ScalarValue::Decimal128(Some(123), 10, 1);
assert_eq!(DataType::Decimal(10, 1), decimal_value.get_datatype());
assert_eq!(DataType::Decimal128(10, 1), decimal_value.get_datatype());
let try_into_value: i128 = decimal_value.clone().try_into().unwrap();
assert_eq!(123_i128, try_into_value);
assert!(!decimal_value.is_null());
Expand All @@ -2163,14 +2163,14 @@ mod tests {
let array = decimal_value.to_array();
let array = array.as_any().downcast_ref::<Decimal128Array>().unwrap();
assert_eq!(1, array.len());
assert_eq!(DataType::Decimal(10, 1), array.data_type().clone());
assert_eq!(DataType::Decimal128(10, 1), array.data_type().clone());
assert_eq!(123i128, array.value(0).as_i128());

// decimal scalar to array with size
let array = decimal_value.to_array_of_size(10);
let array_decimal = array.as_any().downcast_ref::<Decimal128Array>().unwrap();
assert_eq!(10, array.len());
assert_eq!(DataType::Decimal(10, 1), array.data_type().clone());
assert_eq!(DataType::Decimal128(10, 1), array.data_type().clone());
assert_eq!(123i128, array_decimal.value(0).as_i128());
assert_eq!(123i128, array_decimal.value(9).as_i128());
// test eq array
Expand Down Expand Up @@ -2208,7 +2208,7 @@ mod tests {
// convert the vec to decimal array and check the result
let array = ScalarValue::iter_to_array(decimal_vec.into_iter()).unwrap();
assert_eq!(3, array.len());
assert_eq!(DataType::Decimal(10, 2), array.data_type().clone());
assert_eq!(DataType::Decimal128(10, 2), array.data_type().clone());

let decimal_vec = vec![
ScalarValue::Decimal128(Some(1), 10, 2),
Expand All @@ -2218,7 +2218,7 @@ mod tests {
];
let array = ScalarValue::iter_to_array(decimal_vec.into_iter()).unwrap();
assert_eq!(4, array.len());
assert_eq!(DataType::Decimal(10, 2), array.data_type().clone());
assert_eq!(DataType::Decimal128(10, 2), array.data_type().clone());

assert!(ScalarValue::try_new_decimal128(1, 10, 2)
.unwrap()
Expand Down
22 changes: 11 additions & 11 deletions datafusion/core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -56,17 +56,17 @@ unicode_expressions = ["datafusion-physical-expr/regex_expressions", "datafusion
[dependencies]
ahash = { version = "0.7", default-features = false }
apache-avro = { version = "0.14", optional = true }
arrow = { version = "19.0.0", features = ["prettyprint"] }
arrow = { git = "https://github.com/apache/arrow-rs.git", rev = "30c94dbf1c422f81f8520b9956e96ab7b53c3f47", features = ["prettyprint"], optional = false }
async-trait = "0.1.41"
bytes = "1.1"
chrono = { version = "0.4", default-features = false }
datafusion-common = { path = "../common", version = "10.0.0", features = ["parquet", "object_store"] }
datafusion-expr = { path = "../expr", version = "10.0.0" }
datafusion-jit = { path = "../jit", version = "10.0.0", optional = true }
datafusion-optimizer = { path = "../optimizer", version = "10.0.0" }
datafusion-physical-expr = { path = "../physical-expr", version = "10.0.0" }
datafusion-row = { path = "../row", version = "10.0.0" }
datafusion-sql = { path = "../sql", version = "10.0.0" }
datafusion-common = { path = "../common", features = ["parquet", "object_store"], optional = false }
datafusion-expr = { path = "../expr", features = [], optional = false }
datafusion-jit = { path = "../jit", features = [], optional = true }
datafusion-optimizer = { path = "../optimizer", features = [], optional = false }
datafusion-physical-expr = { path = "../physical-expr", features = [], optional = false }
datafusion-row = { path = "../row", features = [], optional = false }
datafusion-sql = { path = "../sql", features = [], optional = false }
futures = "0.3"
glob = "0.3.0"
hashbrown = { version = "0.12", features = ["raw"] }
Expand All @@ -75,10 +75,10 @@ lazy_static = { version = "^1.4.0" }
log = "^0.4"
num-traits = { version = "0.2", optional = true }
num_cpus = "1.13.0"
object_store = "0.3.0"
object_store = { git = "https://github.com/apache/arrow-rs.git", rev = "30c94dbf1c422f81f8520b9956e96ab7b53c3f47", features = [], optional = false }
ordered-float = "3.0"
parking_lot = "0.12"
parquet = { version = "19.0.0", features = ["arrow", "async"] }
parquet = { git = "https://github.com/apache/arrow-rs.git", rev = "30c94dbf1c422f81f8520b9956e96ab7b53c3f47", features = ["arrow", "async"], optional = false }
paste = "^1.0"
pin-project-lite = "^0.2.7"
pyo3 = { version = "0.16", optional = true }
Expand All @@ -98,7 +98,7 @@ csv = "1.1.6"
ctor = "0.1.22"
doc-comment = "0.3"
env_logger = "0.9"
fuzz-utils = { path = "fuzz-utils" }
fuzz-utils = { path = "fuzz-utils", features = [], optional = false }

[[bench]]
harness = false
Expand Down
2 changes: 1 addition & 1 deletion datafusion/core/fuzz-utils/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,6 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
arrow = { version = "19.0.0", features = ["prettyprint"] }
arrow = { git = "https://github.com/apache/arrow-rs.git", rev = "30c94dbf1c422f81f8520b9956e96ab7b53c3f47", features = ["prettyprint"], optional = false }
env_logger = "0.9.0"
rand = "0.8"
10 changes: 4 additions & 6 deletions datafusion/core/src/avro_to_arrow/arrow_array_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -101,12 +101,10 @@ impl<'a, R: Read> AvroArrowArrayReader<'a, R> {
"Failed to parse avro value: {:?}",
e
))),
other => {
return Err(ArrowError::ParseError(format!(
"Row needs to be of type object, got: {:?}",
other
)))
}
other => Err(ArrowError::ParseError(format!(
"Row needs to be of type object, got: {:?}",
other
))),
})
.collect::<ArrowResult<Vec<Vec<(String, Value)>>>>()?;
if rows.is_empty() {
Expand Down
4 changes: 2 additions & 2 deletions datafusion/core/src/avro_to_arrow/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ fn schema_to_field_with_props(
AvroSchema::Fixed { size, .. } => DataType::FixedSizeBinary(*size as i32),
AvroSchema::Decimal {
precision, scale, ..
} => DataType::Decimal(*precision, *scale),
} => DataType::Decimal128(*precision, *scale),
AvroSchema::Uuid => DataType::FixedSizeBinary(16),
AvroSchema::Date => DataType::Date32,
AvroSchema::TimeMillis => DataType::Time32(TimeUnit::Millisecond),
Expand Down Expand Up @@ -217,7 +217,7 @@ fn default_field_name(dt: &DataType) -> &str {
DataType::Union(_, _, _) => "union",
DataType::Dictionary(_, _) => "map",
DataType::Map(_, _) => unimplemented!("Map support not implemented"),
DataType::Decimal(_, _) => "decimal",
DataType::Decimal128(_, _) => "decimal",
DataType::Decimal256(_, _) => "decimal",
}
}
Expand Down
2 changes: 1 addition & 1 deletion datafusion/core/src/catalog/information_schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -508,7 +508,7 @@ impl InformationSchemaColumnsBuilder {
Float32 => (Some(24), Some(2), None),
// Numbers from postgres `double` type
Float64 => (Some(24), Some(2), None),
Decimal(precision, scale) => {
Decimal128(precision, scale) => {
(Some(*precision as u64), Some(10), Some(*scale as u64))
}
_ => (None, None, None),
Expand Down
8 changes: 6 additions & 2 deletions datafusion/core/src/datasource/file_format/json.rs
Original file line number Diff line number Diff line change
Expand Up @@ -231,17 +231,21 @@ mod tests {
projection: Option<Vec<usize>>,
limit: Option<usize>,
) -> Result<Arc<dyn ExecutionPlan>> {
let store_root = env!("CARGO_MANIFEST_DIR");
let filename = "tests/jsons/2.json";
let format = JsonFormat::default();
scan_format(&format, ".", filename, projection, limit).await
scan_format(&format, store_root, filename, projection, limit).await
}

#[tokio::test]
async fn infer_schema_with_limit() {
let store = Arc::new(LocalFileSystem::new()) as _;
let filename = "tests/jsons/schema_infer_limit.json";
let format = JsonFormat::default().with_schema_infer_max_rec(Some(3));

let store_root = std::path::Path::new(env!("CARGO_MANIFEST_DIR"));
let filename = store_root.join("tests/jsons/schema_infer_limit.json");
let filename = filename.to_str().expect("Unable to get path!");

let file_schema = format
.infer_schema(&store, &[local_unpartitioned_file(filename)])
.await
Expand Down
Loading

0 comments on commit a96911d

Please sign in to comment.