Skip to content

Commit

Permalink
Update DECIMAL_RE to allow scientific notation in auto inferred schem…
Browse files Browse the repository at this point in the history
…as (#1216)

* Update DECIMAL_RE to allow scientific notation in auto inferred schemas

* Fixed format lint
  • Loading branch information
pjmore authored Jan 22, 2022
1 parent 0377aae commit e72875e
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 4 deletions.
8 changes: 5 additions & 3 deletions arrow/src/csv/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,8 @@ use std::ops::Neg;
lazy_static! {
static ref PARSE_DECIMAL_RE: Regex =
Regex::new(r"^-?(\d+\.?\d*|\d*\.?\d+)$").unwrap();
static ref DECIMAL_RE: Regex = Regex::new(r"^-?(\d*\.\d+|\d+\.\d*)$").unwrap();
static ref DECIMAL_RE: Regex =
Regex::new(r"^-?((\d*\.\d+|\d+\.\d*)([eE]-?\d+)?|\d+([eE]-?\d+))$").unwrap();
static ref INTEGER_RE: Regex = Regex::new(r"^-?(\d+)$").unwrap();
static ref BOOLEAN_RE: Regex = RegexBuilder::new(r"^(true)$|^(false)$")
.case_insensitive(true)
Expand Down Expand Up @@ -1570,7 +1571,7 @@ mod tests {
let mut csv = builder.build(file).unwrap();
let batch = csv.next().unwrap().unwrap();

assert_eq!(5, batch.num_rows());
assert_eq!(7, batch.num_rows());
assert_eq!(6, batch.num_columns());

let schema = batch.schema();
Expand Down Expand Up @@ -1872,6 +1873,7 @@ mod tests {
writeln!(csv1, "c1,c2,c3")?;
writeln!(csv1, "1,\"foo\",0.5")?;
writeln!(csv1, "3,\"bar\",1")?;
writeln!(csv1, "3,\"bar\",2e-06")?;
// reading csv2 will set c2 to optional
writeln!(csv2, "c1,c2,c3,c4")?;
writeln!(csv2, "10,,3.14,true")?;
Expand All @@ -1887,7 +1889,7 @@ mod tests {
csv4.path().to_str().unwrap().to_string(),
],
b',',
Some(3), // only csv1 and csv2 should be read
Some(4), // only csv1 and csv2 should be read
true,
)?;

Expand Down
4 changes: 3 additions & 1 deletion arrow/test/data/various_types.csv
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,6 @@ c_int|c_float|c_string|c_bool|c_date|c_datetime
2|2.2|"2.22"|true|2020-11-08|2020-11-08T01:00:00
3||"3.33"|true|1969-12-31|1969-11-08T02:00:00
4|4.4||false||
5|6.6|""|false|1990-01-01|1990-01-01T03:00:00
5|6.6|""|false|1990-01-01|1990-01-01T03:00:00
4|4e6||false||
4|4.0e-6||false||

0 comments on commit e72875e

Please sign in to comment.