Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Clean slate with Winnow #61

Draft
wants to merge 15 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 10 additions & 39 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,5 @@ repository = "https://github.com/uutils/parse_datetime"
readme = "README.md"

[dependencies]
regex = "1.9"
chrono = { version="0.4", default-features=false, features=["std", "alloc", "clock"] }
winnow = "0.5.34"
76 changes: 76 additions & 0 deletions src/items/combined.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.

//! Parse an ISO 8601 date and time item
//!
//! The GNU docs state:
//!
//! > The ISO 8601 date and time of day extended format consists of an ISO 8601
//! > date, a ‘T’ character separator, and an ISO 8601 time of day. This format
//! > is also recognized if the ‘T’ is replaced by a space.
//! >
//! > In this format, the time of day should use 24-hour notation. Fractional
//! > seconds are allowed, with either comma or period preceding the fraction.
//! > ISO 8601 fractional minutes and hours are not supported. Typically, hosts
//! > support nanosecond timestamp resolution; excess precision is silently discarded.

use winnow::{combinator::alt, seq, PResult, Parser};

use crate::items::space;

use super::{
date::{self, Date},
s,
time::{self, Time},
};

#[derive(PartialEq, Debug, Clone)]
pub struct DateTime {
date: Date,
time: Time,
}

pub fn parse(input: &mut &str) -> PResult<DateTime> {
seq!(DateTime {
date: date::iso,
// Note: the `T` is lowercased by the main parse function
_: alt((s('t').void(), (' ', space).void())),
time: time::iso,
})
.parse_next(input)
}

#[cfg(test)]
mod tests {
use super::{parse, DateTime};
use crate::items::{date::Date, time::Time};

#[test]
fn some_date() {
let reference = Some(DateTime {
date: Date {
day: 10,
month: 10,
year: Some(2022),
},
time: Time {
hour: 10,
minute: 10,
second: 55.0,
offset: None,
},
});

for mut s in [
"2022-10-10t10:10:55",
"2022-10-10 10:10:55",
"2022-10-10 t 10:10:55",
"2022-10-10 10:10:55",
"2022-10-10 (A comment!) t 10:10:55",
"2022-10-10 (A comment!) 10:10:55",
] {
let old_s = s.to_owned();
assert_eq!(parse(&mut s).ok(), reference, "Failed string: {old_s}")
}
}
}
232 changes: 232 additions & 0 deletions src/items/date.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,232 @@
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.

//! Parse a date item (without time component)
//!
//! The GNU docs say:
//!
//! > A calendar date item specifies a day of the year. It is specified
//! > differently, depending on whether the month is specified numerically
//! > or literally.
//! >
//! > ...
//! >
//! > For numeric months, the ISO 8601 format ‘year-month-day’ is allowed,
//! > where year is any positive number, month is a number between 01 and
//! > 12, and day is a number between 01 and 31. A leading zero must be
//! > present if a number is less than ten. If year is 68 or smaller, then
//! > 2000 is added to it; otherwise, if year is less than 100, then 1900
//! > is added to it. The construct ‘month/day/year’, popular in the United
//! > States, is accepted. Also ‘month/day’, omitting the year.
//! >
//! > Literal months may be spelled out in full: ‘January’, ‘February’,
//! > ‘March’, ‘April’, ‘May’, ‘June’, ‘July’, ‘August’, ‘September’,
//! > ‘October’, ‘November’ or ‘December’. Literal months may be
//! > abbreviated to their first three letters, possibly followed by an
//! > abbreviating dot. It is also permitted to write ‘Sept’ instead of
//! > ‘September’.

use winnow::{
ascii::{alpha1, dec_uint},
combinator::{alt, opt, preceded},
seq,
token::take,
PResult, Parser,
};

use super::s;
use crate::ParseDateTimeError;

#[derive(PartialEq, Eq, Clone, Debug)]
pub struct Date {
pub day: u32,
pub month: u32,
pub year: Option<u32>,
}

pub fn parse(input: &mut &str) -> PResult<Date> {
alt((iso, us, literal1, literal2)).parse_next(input)
}

/// Parse `YYYY-MM-DD` or `YY-MM-DD`
///
/// This is also used by [`combined`](super::combined).
pub fn iso(input: &mut &str) -> PResult<Date> {
seq!(Date {
year: year.map(Some),
_: s('-'),
month: month,
_: s('-'),
day: day,
})
.parse_next(input)
}

/// Parse `MM/DD/YYYY`, `MM/DD/YY` or `MM/DD`
fn us(input: &mut &str) -> PResult<Date> {
seq!(Date {
month: month,
_: s('/'),
day: day,
year: opt(preceded(s('/'), year)),
})
.parse_next(input)
}

/// Parse `14 November 2022`, `14 Nov 2022`, "14nov2022", "14-nov-2022", "14-nov2022", "14nov-2022"
fn literal1(input: &mut &str) -> PResult<Date> {
seq!(Date {
day: day,
_: opt(s('-')),
month: literal_month,
year: opt(preceded(opt(s('-')), year)),
})
.parse_next(input)
}

/// Parse `November 14, 2022` and `Nov 14, 2022`
fn literal2(input: &mut &str) -> PResult<Date> {
seq!(Date {
month: literal_month,
day: day,
// FIXME: GNU requires _some_ space between the day and the year,
// probably to distinguish with floats.
year: opt(preceded(s(","), year)),
})
.parse_next(input)
}

fn year(input: &mut &str) -> PResult<u32> {
s(alt((
take(4usize).try_map(|x: &str| x.parse()),
take(3usize).try_map(|x: &str| x.parse()),
take(2usize).try_map(|x: &str| x.parse()).map(
|x: u32| {
if x <= 68 {
x + 2000
} else {
x + 1900
}
},
),
)))
.parse_next(input)
}

fn month(input: &mut &str) -> PResult<u32> {
s(dec_uint)
.try_map(|x| {
(x >= 1 && x <= 12)
.then_some(x)
.ok_or(ParseDateTimeError::InvalidInput)
})
.parse_next(input)
}

fn day(input: &mut &str) -> PResult<u32> {
s(dec_uint)
.try_map(|x| {
(x >= 1 && x <= 31)
.then_some(x)
.ok_or(ParseDateTimeError::InvalidInput)
})
.parse_next(input)
}

/// Parse the name of a month (case-insensitive)
fn literal_month(input: &mut &str) -> PResult<u32> {
s(alpha1)
.verify_map(|s: &str| {
Some(match s {
"january" | "jan" => 1,
"february" | "feb" => 2,
"march" | "mar" => 3,
"april" | "apr" => 4,
"may" => 5,
"june" | "jun" => 6,
"july" | "jul" => 7,
"august" | "aug" => 8,
"september" | "sep" | "sept" => 9,
"october" | "oct" => 10,
"november" | "nov" => 11,
"december" | "dec" => 12,
_ => return None,
})
})
.parse_next(input)
}

#[cfg(test)]
mod tests {
use super::{parse, Date};

// Test cases from the GNU docs:
//
// ```
// 2022-11-14 # ISO 8601.
// 22-11-14 # Assume 19xx for 69 through 99,
// # 20xx for 00 through 68 (not recommended).
// 11/14/2022 # Common U.S. writing.
// 14 November 2022
// 14 Nov 2022 # Three-letter abbreviations always allowed.
// November 14, 2022
// 14-nov-2022
// 14nov2022
// ```

#[test]
fn with_year() {
let reference = Date {
year: Some(2022),
month: 11,
day: 14,
};

for mut s in [
"2022-11-14",
"2022 - 11 - 14",
"22-11-14",
"2022---11----14",
"22(comment 1)-11(comment 2)-14",
"11/14/2022",
"11--/14--/2022",
"11(comment 1)/(comment 2)14(comment 3)/(comment 4)2022",
"11 / 14 / 2022",
"11/14/22",
"14 november 2022",
"14 nov 2022",
"november 14, 2022",
"november 14 , 2022",
"nov 14, 2022",
"14-nov-2022",
"14nov2022",
"14nov 2022",
] {
let old_s = s.to_owned();
assert_eq!(parse(&mut s).unwrap(), reference, "Format string: {old_s}");
}
}

#[test]
fn no_year() {
let reference = Date {
year: None,
month: 11,
day: 14,
};
for mut s in [
"11/14",
"14 november",
"14 nov",
"14(comment!)nov",
"november 14",
"november(comment!)14",
"nov 14",
"14-nov",
"14nov",
"14(comment????)nov",
] {
assert_eq!(parse(&mut s).unwrap(), reference);
}
}
}
Loading