Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feature gate csv functionality #312

Merged
merged 5 commits into from
May 21, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,8 @@ jobs:
cd arrow
# re-run tests on arrow workspace with additional features
cargo test --features=prettyprint
# run test on arrow with minimal set of features
cargo test --no-default-features
cargo run --example builders
cargo run --example dynamic_types
cargo run --example read_csv
Expand Down
5 changes: 3 additions & 2 deletions arrow/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@ serde_derive = "1.0"
serde_json = { version = "1.0", features = ["preserve_order"] }
indexmap = "1.6"
rand = "0.7"
csv = "1.1"
num = "0.4"
csv_crate = { version = "1.1", optional = true, package="csv" }
regex = "1.3"
lazy_static = "1.4"
packed_simd = { version = "0.3.4", optional = true, package = "packed_simd_2" }
Expand All @@ -54,8 +54,9 @@ lexical-core = "^0.7"
multiversion = "0.6.1"

[features]
default = []
default = ["csv"]
avx512 = []
csv = ["csv_crate"]
simd = ["packed_simd"]
prettyprint = ["prettytable-rs"]
# this is only intended to be used in single-threaded programs: it verifies that
Expand Down
64 changes: 34 additions & 30 deletions arrow/benches/csv_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,45 +21,49 @@ extern crate criterion;
use criterion::*;

use arrow::array::*;
#[cfg(feature = "csv")]
use arrow::csv;
use arrow::datatypes::*;
use arrow::record_batch::RecordBatch;
use std::fs::File;
use std::sync::Arc;

fn record_batches_to_csv() {
let schema = Schema::new(vec![
Field::new("c1", DataType::Utf8, false),
Field::new("c2", DataType::Float64, true),
Field::new("c3", DataType::UInt32, false),
Field::new("c3", DataType::Boolean, true),
]);
#[cfg(feature = "csv")]
{
let schema = Schema::new(vec![
Field::new("c1", DataType::Utf8, false),
Field::new("c2", DataType::Float64, true),
Field::new("c3", DataType::UInt32, false),
Field::new("c3", DataType::Boolean, true),
]);

let c1 = StringArray::from(vec![
"Lorem ipsum dolor sit amet",
"consectetur adipiscing elit",
"sed do eiusmod tempor",
]);
let c2 = PrimitiveArray::<Float64Type>::from(vec![
Some(123.564532),
None,
Some(-556132.25),
]);
let c3 = PrimitiveArray::<UInt32Type>::from(vec![3, 2, 1]);
let c4 = BooleanArray::from(vec![Some(true), Some(false), None]);
let c1 = StringArray::from(vec![
"Lorem ipsum dolor sit amet",
"consectetur adipiscing elit",
"sed do eiusmod tempor",
]);
let c2 = PrimitiveArray::<Float64Type>::from(vec![
Some(123.564532),
None,
Some(-556132.25),
]);
let c3 = PrimitiveArray::<UInt32Type>::from(vec![3, 2, 1]);
let c4 = BooleanArray::from(vec![Some(true), Some(false), None]);

let b = RecordBatch::try_new(
Arc::new(schema),
vec![Arc::new(c1), Arc::new(c2), Arc::new(c3), Arc::new(c4)],
)
.unwrap();
let file = File::create("target/bench_write_csv.csv").unwrap();
let mut writer = csv::Writer::new(file);
let batches = vec![&b, &b, &b, &b, &b, &b, &b, &b, &b, &b, &b];
#[allow(clippy::unit_arg)]
criterion::black_box(for batch in batches {
writer.write(batch).unwrap()
});
let b = RecordBatch::try_new(
Arc::new(schema),
vec![Arc::new(c1), Arc::new(c2), Arc::new(c3), Arc::new(c4)],
)
.unwrap();
let file = File::create("target/bench_write_csv.csv").unwrap();
let mut writer = csv::Writer::new(file);
let batches = vec![&b, &b, &b, &b, &b, &b, &b, &b, &b, &b, &b];
#[allow(clippy::unit_arg)]
criterion::black_box(for batch in batches {
writer.write(batch).unwrap()
});
}
}

fn criterion_benchmark(c: &mut Criterion) {
Expand Down
27 changes: 16 additions & 11 deletions arrow/examples/read_csv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,24 +20,29 @@ extern crate arrow;
use std::fs::File;
use std::sync::Arc;

#[cfg(feature = "csv")]
use arrow::csv;
use arrow::datatypes::{DataType, Field, Schema};
#[cfg(feature = "prettyprint")]
use arrow::util::pretty::print_batches;

fn main() {
let schema = Schema::new(vec![
Field::new("city", DataType::Utf8, false),
Field::new("lat", DataType::Float64, false),
Field::new("lng", DataType::Float64, false),
]);
#[cfg(feature = "csv")]
{
let schema = Schema::new(vec![
Field::new("city", DataType::Utf8, false),
Field::new("lat", DataType::Float64, false),
Field::new("lng", DataType::Float64, false),
]);

let file = File::open("test/data/uk_cities.csv").unwrap();
let file = File::open("test/data/uk_cities.csv").unwrap();

let mut csv = csv::Reader::new(file, Arc::new(schema), false, None, 1024, None, None);
let _batch = csv.next().unwrap().unwrap();
#[cfg(feature = "prettyprint")]
{
print_batches(&[_batch]).unwrap();
let mut csv =
csv::Reader::new(file, Arc::new(schema), false, None, 1024, None, None);
let _batch = csv.next().unwrap().unwrap();
#[cfg(feature = "prettyprint")]
{
print_batches(&[_batch]).unwrap();
}
}
}
20 changes: 12 additions & 8 deletions arrow/examples/read_csv_infer_schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,20 +17,24 @@

extern crate arrow;

#[cfg(feature = "csv")]
use arrow::csv;
#[cfg(feature = "prettyprint")]
use arrow::util::pretty::print_batches;
use std::fs::File;

fn main() {
let file = File::open("test/data/uk_cities_with_headers.csv").unwrap();
let builder = csv::ReaderBuilder::new()
.has_header(true)
.infer_schema(Some(100));
let mut csv = builder.build(file).unwrap();
let _batch = csv.next().unwrap().unwrap();
#[cfg(feature = "prettyprint")]
#[cfg(feature = "csv")]
{
print_batches(&[_batch]).unwrap();
let file = File::open("test/data/uk_cities_with_headers.csv").unwrap();
let builder = csv::ReaderBuilder::new()
.has_header(true)
.infer_schema(Some(100));
let mut csv = builder.build(file).unwrap();
let _batch = csv.next().unwrap().unwrap();
#[cfg(feature = "prettyprint")]
{
print_batches(&[_batch]).unwrap();
}
}
}
4 changes: 1 addition & 3 deletions arrow/src/csv/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,16 +49,14 @@ use std::fs::File;
use std::io::{Read, Seek, SeekFrom};
use std::sync::Arc;

use csv as csv_crate;

use crate::array::{
ArrayRef, BooleanArray, DictionaryArray, PrimitiveArray, StringArray,
};
use crate::datatypes::*;
use crate::error::{ArrowError, Result};
use crate::record_batch::RecordBatch;

use self::csv_crate::{ByteRecord, StringRecord};
use csv_crate::{ByteRecord, StringRecord};

lazy_static! {
static ref DECIMAL_RE: Regex = Regex::new(r"^-?(\d+\.\d+)$").unwrap();
Expand Down
2 changes: 0 additions & 2 deletions arrow/src/csv/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,6 @@
//! }
//! ```

use csv as csv_crate;

use std::io::Write;

use crate::datatypes::*;
Expand Down
2 changes: 1 addition & 1 deletion arrow/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
use std::fmt::{Debug, Display, Formatter};
use std::io::Write;

use csv as csv_crate;
use std::error::Error;

/// Many different operations in the `arrow` crate return this error type.
Expand Down Expand Up @@ -59,6 +58,7 @@ impl From<::std::io::Error> for ArrowError {
}
}

#[cfg(feature = "csv")]
impl From<csv_crate::Error> for ArrowError {
fn from(error: csv_crate::Error) -> Self {
match error.kind() {
Expand Down
1 change: 1 addition & 0 deletions arrow/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ pub mod bitmap;
pub mod buffer;
mod bytes;
pub mod compute;
#[cfg(feature = "csv")]
pub mod csv;
pub mod datatypes;
pub mod error;
Expand Down
3 changes: 3 additions & 0 deletions arrow/src/util/string_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
//! Example:
//!
//! ```
//! #[cfg(feature = "csv")]
//! {
//! use arrow::array::*;
//! use arrow::csv;
//! use arrow::datatypes::*;
Expand Down Expand Up @@ -58,6 +60,7 @@
//! let sw = StringWriter::new();
//! let mut writer = csv::Writer::new(sw);
//! writer.write(&batch).unwrap();
//! }
//! ```

use std::io::{Error, ErrorKind, Result, Write};
Expand Down