Skip to content

Commit

Permalink
feature gate csv functionality (#312)
Browse files Browse the repository at this point in the history
* feature gate csv functionality

* mock read_csv example

* clippy

* mock read_csv_infer_schema example

* add tests of --no-default-features to CI
  • Loading branch information
ritchie46 authored May 21, 2021
1 parent f042191 commit dde86b9
Show file tree
Hide file tree
Showing 10 changed files with 73 additions and 57 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,8 @@ jobs:
cd arrow
# re-run tests on arrow workspace with additional features
cargo test --features=prettyprint
# run test on arrow with minimal set of features
cargo test --no-default-features
cargo run --example builders
cargo run --example dynamic_types
cargo run --example read_csv
Expand Down
5 changes: 3 additions & 2 deletions arrow/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@ serde_derive = "1.0"
serde_json = { version = "1.0", features = ["preserve_order"] }
indexmap = "1.6"
rand = "0.7"
csv = "1.1"
num = "0.4"
csv_crate = { version = "1.1", optional = true, package="csv" }
regex = "1.3"
lazy_static = "1.4"
packed_simd = { version = "0.3.4", optional = true, package = "packed_simd_2" }
Expand All @@ -54,8 +54,9 @@ lexical-core = "^0.7"
multiversion = "0.6.1"

[features]
default = []
default = ["csv"]
avx512 = []
csv = ["csv_crate"]
simd = ["packed_simd"]
prettyprint = ["prettytable-rs"]
# this is only intended to be used in single-threaded programs: it verifies that
Expand Down
64 changes: 34 additions & 30 deletions arrow/benches/csv_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,45 +21,49 @@ extern crate criterion;
use criterion::*;

use arrow::array::*;
#[cfg(feature = "csv")]
use arrow::csv;
use arrow::datatypes::*;
use arrow::record_batch::RecordBatch;
use std::fs::File;
use std::sync::Arc;

fn record_batches_to_csv() {
let schema = Schema::new(vec![
Field::new("c1", DataType::Utf8, false),
Field::new("c2", DataType::Float64, true),
Field::new("c3", DataType::UInt32, false),
Field::new("c3", DataType::Boolean, true),
]);
#[cfg(feature = "csv")]
{
let schema = Schema::new(vec![
Field::new("c1", DataType::Utf8, false),
Field::new("c2", DataType::Float64, true),
Field::new("c3", DataType::UInt32, false),
Field::new("c3", DataType::Boolean, true),
]);

let c1 = StringArray::from(vec![
"Lorem ipsum dolor sit amet",
"consectetur adipiscing elit",
"sed do eiusmod tempor",
]);
let c2 = PrimitiveArray::<Float64Type>::from(vec![
Some(123.564532),
None,
Some(-556132.25),
]);
let c3 = PrimitiveArray::<UInt32Type>::from(vec![3, 2, 1]);
let c4 = BooleanArray::from(vec![Some(true), Some(false), None]);
let c1 = StringArray::from(vec![
"Lorem ipsum dolor sit amet",
"consectetur adipiscing elit",
"sed do eiusmod tempor",
]);
let c2 = PrimitiveArray::<Float64Type>::from(vec![
Some(123.564532),
None,
Some(-556132.25),
]);
let c3 = PrimitiveArray::<UInt32Type>::from(vec![3, 2, 1]);
let c4 = BooleanArray::from(vec![Some(true), Some(false), None]);

let b = RecordBatch::try_new(
Arc::new(schema),
vec![Arc::new(c1), Arc::new(c2), Arc::new(c3), Arc::new(c4)],
)
.unwrap();
let file = File::create("target/bench_write_csv.csv").unwrap();
let mut writer = csv::Writer::new(file);
let batches = vec![&b, &b, &b, &b, &b, &b, &b, &b, &b, &b, &b];
#[allow(clippy::unit_arg)]
criterion::black_box(for batch in batches {
writer.write(batch).unwrap()
});
let b = RecordBatch::try_new(
Arc::new(schema),
vec![Arc::new(c1), Arc::new(c2), Arc::new(c3), Arc::new(c4)],
)
.unwrap();
let file = File::create("target/bench_write_csv.csv").unwrap();
let mut writer = csv::Writer::new(file);
let batches = vec![&b, &b, &b, &b, &b, &b, &b, &b, &b, &b, &b];
#[allow(clippy::unit_arg)]
criterion::black_box(for batch in batches {
writer.write(batch).unwrap()
});
}
}

fn criterion_benchmark(c: &mut Criterion) {
Expand Down
27 changes: 16 additions & 11 deletions arrow/examples/read_csv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,24 +20,29 @@ extern crate arrow;
use std::fs::File;
use std::sync::Arc;

#[cfg(feature = "csv")]
use arrow::csv;
use arrow::datatypes::{DataType, Field, Schema};
#[cfg(feature = "prettyprint")]
use arrow::util::pretty::print_batches;

fn main() {
let schema = Schema::new(vec![
Field::new("city", DataType::Utf8, false),
Field::new("lat", DataType::Float64, false),
Field::new("lng", DataType::Float64, false),
]);
#[cfg(feature = "csv")]
{
let schema = Schema::new(vec![
Field::new("city", DataType::Utf8, false),
Field::new("lat", DataType::Float64, false),
Field::new("lng", DataType::Float64, false),
]);

let file = File::open("test/data/uk_cities.csv").unwrap();
let file = File::open("test/data/uk_cities.csv").unwrap();

let mut csv = csv::Reader::new(file, Arc::new(schema), false, None, 1024, None, None);
let _batch = csv.next().unwrap().unwrap();
#[cfg(feature = "prettyprint")]
{
print_batches(&[_batch]).unwrap();
let mut csv =
csv::Reader::new(file, Arc::new(schema), false, None, 1024, None, None);
let _batch = csv.next().unwrap().unwrap();
#[cfg(feature = "prettyprint")]
{
print_batches(&[_batch]).unwrap();
}
}
}
20 changes: 12 additions & 8 deletions arrow/examples/read_csv_infer_schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,20 +17,24 @@

extern crate arrow;

#[cfg(feature = "csv")]
use arrow::csv;
#[cfg(feature = "prettyprint")]
use arrow::util::pretty::print_batches;
use std::fs::File;

fn main() {
let file = File::open("test/data/uk_cities_with_headers.csv").unwrap();
let builder = csv::ReaderBuilder::new()
.has_header(true)
.infer_schema(Some(100));
let mut csv = builder.build(file).unwrap();
let _batch = csv.next().unwrap().unwrap();
#[cfg(feature = "prettyprint")]
#[cfg(feature = "csv")]
{
print_batches(&[_batch]).unwrap();
let file = File::open("test/data/uk_cities_with_headers.csv").unwrap();
let builder = csv::ReaderBuilder::new()
.has_header(true)
.infer_schema(Some(100));
let mut csv = builder.build(file).unwrap();
let _batch = csv.next().unwrap().unwrap();
#[cfg(feature = "prettyprint")]
{
print_batches(&[_batch]).unwrap();
}
}
}
4 changes: 1 addition & 3 deletions arrow/src/csv/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,16 +49,14 @@ use std::fs::File;
use std::io::{Read, Seek, SeekFrom};
use std::sync::Arc;

use csv as csv_crate;

use crate::array::{
ArrayRef, BooleanArray, DictionaryArray, PrimitiveArray, StringArray,
};
use crate::datatypes::*;
use crate::error::{ArrowError, Result};
use crate::record_batch::RecordBatch;

use self::csv_crate::{ByteRecord, StringRecord};
use csv_crate::{ByteRecord, StringRecord};

lazy_static! {
static ref DECIMAL_RE: Regex = Regex::new(r"^-?(\d+\.\d+)$").unwrap();
Expand Down
2 changes: 0 additions & 2 deletions arrow/src/csv/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,6 @@
//! }
//! ```
use csv as csv_crate;

use std::io::Write;

use crate::datatypes::*;
Expand Down
2 changes: 1 addition & 1 deletion arrow/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
use std::fmt::{Debug, Display, Formatter};
use std::io::Write;

use csv as csv_crate;
use std::error::Error;

/// Many different operations in the `arrow` crate return this error type.
Expand Down Expand Up @@ -59,6 +58,7 @@ impl From<::std::io::Error> for ArrowError {
}
}

#[cfg(feature = "csv")]
impl From<csv_crate::Error> for ArrowError {
fn from(error: csv_crate::Error) -> Self {
match error.kind() {
Expand Down
1 change: 1 addition & 0 deletions arrow/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ pub mod bitmap;
pub mod buffer;
mod bytes;
pub mod compute;
#[cfg(feature = "csv")]
pub mod csv;
pub mod datatypes;
pub mod error;
Expand Down
3 changes: 3 additions & 0 deletions arrow/src/util/string_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
//! Example:
//!
//! ```
//! #[cfg(feature = "csv")]
//! {
//! use arrow::array::*;
//! use arrow::csv;
//! use arrow::datatypes::*;
Expand Down Expand Up @@ -58,6 +60,7 @@
//! let sw = StringWriter::new();
//! let mut writer = csv::Writer::new(sw);
//! writer.write(&batch).unwrap();
//! }
//! ```
use std::io::{Error, ErrorKind, Result, Write};
Expand Down

0 comments on commit dde86b9

Please sign in to comment.