Skip to content

Commit

Permalink
Merge pull request #176 from jqnatividad/stats-date-types
Browse files Browse the repository at this point in the history
`stats`: add `DateTime` data type (RFC3339 format)
  • Loading branch information
jqnatividad authored Feb 28, 2022
2 parents de25f09 + af90edd commit cbe7a59
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 7 deletions.
30 changes: 23 additions & 7 deletions src/cmd/stats.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ use crate::CliResult;
use dateparser::DateTimeUtc;
use serde::Deserialize;

use self::FieldType::{TDate, TFloat, TInteger, TNull, TString, TUnknown};
use self::FieldType::{TDate, TDateTime, TFloat, TInteger, TNull, TString, TUnknown};

static USAGE: &str = "
Computes basic statistics on CSV data.
Expand Down Expand Up @@ -398,7 +398,7 @@ impl Stats {
});
}
}
TDate => {}
_ => {}
}
}

Expand Down Expand Up @@ -546,6 +546,7 @@ pub enum FieldType {
TFloat,
TInteger,
TDate,
TDateTime,
}

impl FieldType {
Expand All @@ -563,8 +564,15 @@ impl FieldType {
if string.parse::<f64>().is_ok() {
return TFloat;
}
if string.parse::<DateTimeUtc>().is_ok() {
return TDate;
if let Ok(parsed_date) = string.parse::<DateTimeUtc>() {
let rfc3339_date_str = parsed_date.0.to_string();
let datelen = rfc3339_date_str.len();

if datelen >= 17 {
return TDateTime;
} else {
return TDate;
}
}
TString
}
Expand All @@ -585,19 +593,25 @@ impl Commute for FieldType {
(TFloat, TFloat) => TFloat,
(TInteger, TInteger) => TInteger,
(TDate, TDate) => TDate,
(TDateTime, TDateTime) => TDateTime,
// Null does not impact the type.
(TNull, any) | (any, TNull) => any,
// There's no way to get around an unknown.
(TUnknown, _) | (_, TUnknown) => TUnknown,
// date data types
(TDate, TDateTime) | (TDateTime, TDate) => TDateTime,
// Integers can degrade to floats.
(TFloat, TInteger) | (TInteger, TFloat) => TFloat,
// when using unixtime format can degrade to int/floats.
(TInteger, TDate) | (TDate, TInteger) => TInteger,
(TFloat, TDate) | (TDate, TFloat) => TFloat,
(TInteger, TDateTime) | (TDateTime, TInteger) => TInteger,
(TFloat, TDateTime) | (TDateTime, TFloat) => TFloat,
// Numbers/dates can degrade to unicode Strings.
(TString, TFloat) | (TFloat, TString) => TString,
(TString, TInteger) | (TInteger, TString) => TString,
(TString, TDate) | (TDate, TString) => TString,
(TString, TDateTime) | (TDateTime, TString) => TString,
};
}
}
Expand All @@ -620,6 +634,7 @@ impl fmt::Display for FieldType {
TFloat => write!(f, "Float"),
TInteger => write!(f, "Integer"),
TDate => write!(f, "Date"),
TDateTime => write!(f, "DateTime"),
}
}
}
Expand All @@ -633,6 +648,7 @@ impl fmt::Debug for FieldType {
TFloat => write!(f, "Float"),
TInteger => write!(f, "Integer"),
TDate => write!(f, "Date"),
TDateTime => write!(f, "DateTime"),
}
}
}
Expand Down Expand Up @@ -677,7 +693,7 @@ impl TypedSum {

fn show(&self, typ: FieldType) -> Option<String> {
match typ {
TNull | TString | TUnknown | TDate => None,
TNull | TString | TUnknown | TDate | TDateTime => None,
TInteger => Some(self.integer.to_string()),
TFloat => Some(self.float.unwrap_or(0.0).to_string()),
}
Expand Down Expand Up @@ -732,7 +748,7 @@ impl TypedMinMax {
self.integers.add(n);
self.floats.add(n as f64);
}
TDate => {
TDate | TDateTime => {
let n = str::from_utf8(&*sample)
.ok()
.and_then(|s| dateparser::parse(s).ok())
Expand Down Expand Up @@ -761,7 +777,7 @@ impl TypedMinMax {
}
_ => None,
},
TDate => match (self.dates.min(), self.dates.max()) {
TDate | TDateTime => match (self.dates.min(), self.dates.max()) {
(Some(min), Some(max)) => Some((min.to_string(), max.to_string())),
_ => None,
},
Expand Down
18 changes: 18 additions & 0 deletions tests/test_stats.rs
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,12 @@ stats_tests!(stats_infer_int, "type", &["1"], "Integer");
stats_tests!(stats_infer_float, "type", &["1.2"], "Float");
stats_tests!(stats_infer_null, "type", &[""], "NULL");
stats_tests!(stats_infer_date, "type", &["1968-06-27"], "Date");
stats_tests!(
stats_infer_datetime,
"type",
&["1968-06-27 12:30:01"],
"DateTime"
);
stats_tests!(stats_infer_string_null, "type", &["a", ""], "String");
stats_tests!(stats_infer_int_null, "type", &["1", ""], "Integer");
stats_tests!(stats_infer_float_null, "type", &["1.2", ""], "Float");
Expand All @@ -170,6 +176,12 @@ stats_tests!(
&["June 27, 1968", ""],
"Date"
);
stats_tests!(
stats_infer_datetime_null,
"type",
&["June 27, 1968 12:30:00 UTC", ""],
"DateTime"
);
stats_tests!(stats_infer_null_string, "type", &["", "a"], "String");
stats_tests!(stats_infer_null_int, "type", &["", "1"], "Integer");
stats_tests!(stats_infer_null_float, "type", &["", "1.2"], "Float");
Expand All @@ -179,6 +191,12 @@ stats_tests!(
&["", "September 17, 2012 at 10:09am PST"],
"Date"
);
stats_tests!(
stats_infer_date_datetime,
"type",
&["September 11, 2001", "September 17, 2012 at 10:09am PST"],
"DateTime"
);
stats_tests!(stats_infer_int_string, "type", &["1", "a"], "String");
stats_tests!(stats_infer_string_int, "type", &["a", "1"], "String");
stats_tests!(stats_infer_int_float, "type", &["1", "1.2"], "Float");
Expand Down

0 comments on commit cbe7a59

Please sign in to comment.