From 2167646e01ff04dcb53d32849be482a2278a2061 Mon Sep 17 00:00:00 2001 From: Philip Chimento Date: Thu, 18 Aug 2022 17:54:43 -0700 Subject: [PATCH] Normative: Align ISO 8601 grammar with annotations from IXDTF The IETF SEDATE Working Group Internet-Draft, "Date and Time on the Internet: Timestamps with additional information" has reached agreement on all the open issues. This implements the conclusions of that draft RFC, which defines a date-time format called Internet Extended Date-Time Format (abbreviated IXDTF). IXDTF defines a grammar and semantics for annotations that can be appended to RFC 3339 strings. We were already using these annotations informally in Temporal for time zones and calendars. The main things that have to change are that annotations can have a "critical" flag ("!") which in Temporal has no effect on time zone and calendar annotations; and that multiple annotations are possible, where unknown ones are ignored unless they are marked critical. See: #1450 --- polyfill/lib/ecmascript.mjs | 34 +++++++---- polyfill/lib/regex.mjs | 11 ++-- polyfill/test/validStrings.mjs | 52 ++++++++++++----- spec/abstractops.html | 104 ++++++++++++++++++++++----------- 4 files changed, 133 insertions(+), 68 deletions(-) diff --git a/polyfill/lib/ecmascript.mjs b/polyfill/lib/ecmascript.mjs index 1bd5aa7c2c..248f841d7e 100644 --- a/polyfill/lib/ecmascript.mjs +++ b/polyfill/lib/ecmascript.mjs @@ -328,7 +328,15 @@ export const ES = ObjectAssign({}, ES2020, { if (offset === '-00:00') offset = '+00:00'; } const ianaName = match[19]; - const calendar = match[20]; + const annotations = match[20]; + let calendar; + for (const [, critical, key, value] of annotations.matchAll(PARSE.annotation)) { + if (key === 'u-ca') { + if (calendar === undefined) calendar = value; + } else if (critical === '!') { + throw new RangeError(`Unrecognized annotation: !${key}=${value}`); + } + } ES.RejectDateTime(year, month, day, hour, minute, second, millisecond, microsecond, nanosecond); return { year, @@ -365,7 +373,7 @@ export const ES = ObjectAssign({}, ES2020, { }, ParseTemporalTimeString: (isoString) => { const match = PARSE.time.exec(isoString); - let hour, minute, second, millisecond, microsecond, nanosecond, calendar; + let hour, minute, second, millisecond, microsecond, nanosecond, annotations, calendar; if (match) { hour = ES.ToInteger(match[1]); minute = ES.ToInteger(match[2] || match[5]); @@ -375,7 +383,14 @@ export const ES = ObjectAssign({}, ES2020, { millisecond = ES.ToInteger(fraction.slice(0, 3)); microsecond = ES.ToInteger(fraction.slice(3, 6)); nanosecond = ES.ToInteger(fraction.slice(6, 9)); - calendar = match[15]; + annotations = match[15]; + for (const [, critical, key, value] of annotations.matchAll(PARSE.annotation)) { + if (key === 'u-ca') { + if (calendar === undefined) calendar = value; + } else if (critical === '!') { + throw new RangeError(`Unrecognized annotation: !${key}=${value}`); + } + } } else { let z, hasTime; ({ hasTime, hour, minute, second, millisecond, microsecond, nanosecond, calendar, z } = @@ -388,18 +403,17 @@ export const ES = ObjectAssign({}, ES2020, { return { hour, minute, second, millisecond, microsecond, nanosecond, calendar }; } // Reject strings that are ambiguous with PlainMonthDay or PlainYearMonth. - // The calendar suffix is `[u-ca=${calendar}]`, i.e. calendar plus 7 characters, - // and must be stripped so presence of a calendar doesn't result in interpretation - // of otherwise ambiguous input as a time. - const isoStringWithoutCalendar = calendar - ? ES.Call(StringPrototypeSlice, isoString, [0, -(calendar.length + 7)]) + // The annotations must be stripped so presence of a calendar doesn't result + // in interpretation of otherwise ambiguous input as a time. + const isoStringWithoutAnnotations = annotations + ? ES.Call(StringPrototypeSlice, isoString, [0, -annotations.length]) : isoString; try { - const { month, day } = ES.ParseTemporalMonthDayString(isoStringWithoutCalendar); + const { month, day } = ES.ParseTemporalMonthDayString(isoStringWithoutAnnotations); ES.RejectISODate(1972, month, day); } catch { try { - const { year, month } = ES.ParseTemporalYearMonthString(isoStringWithoutCalendar); + const { year, month } = ES.ParseTemporalYearMonthString(isoStringWithoutAnnotations); ES.RejectISODate(year, month, 1); } catch { return { hour, minute, second, millisecond, microsecond, nanosecond, calendar }; diff --git a/polyfill/lib/regex.mjs b/polyfill/lib/regex.mjs index 615fa0a1df..33af5fc61f 100644 --- a/polyfill/lib/regex.mjs +++ b/polyfill/lib/regex.mjs @@ -15,9 +15,6 @@ export const timeZoneID = new RegExp( ')' ); -const calComponent = /[A-Za-z0-9]{3,8}/; -export const calendarID = new RegExp(`(?:${calComponent.source}(?:-${calComponent.source})*)`); - const yearpart = /(?:[+\u2212-]\d{6}|\d{4})/; const monthpart = /(?:0[1-9]|1[0-2])/; const daypart = /(?:0[1-9]|[12]\d|3[01])/; @@ -26,15 +23,15 @@ export const datesplit = new RegExp( ); const timesplit = /(\d{2})(?::(\d{2})(?::(\d{2})(?:[.,](\d{1,9}))?)?|(\d{2})(?:(\d{2})(?:[.,](\d{1,9}))?)?)?/; export const offset = /([+\u2212-])([01][0-9]|2[0-3])(?::?([0-5][0-9])(?::?([0-5][0-9])(?:[.,](\d{1,9}))?)?)?/; -const zonesplit = new RegExp(`(?:([zZ])|(?:${offset.source})?)(?:\\[(${timeZoneID.source})\\])?`); -const calendar = new RegExp(`\\[u-ca=(${calendarID.source})\\]`); +const zonesplit = new RegExp(`(?:([zZ])|(?:${offset.source})?)(?:\\[!?(${timeZoneID.source})\\])?`); +export const annotation = /\[(!)?([a-z_][a-z0-9_-]*)=([A-Za-z0-9]+(?:-[A-Za-z0-9]+)*)\]/g; export const zoneddatetime = new RegExp( - `^${datesplit.source}(?:(?:T|\\s+)${timesplit.source})?${zonesplit.source}(?:${calendar.source})?$`, + `^${datesplit.source}(?:(?:T|\\s+)${timesplit.source})?${zonesplit.source}((?:${annotation.source})*)$`, 'i' ); -export const time = new RegExp(`^T?${timesplit.source}(?:${zonesplit.source})?(?:${calendar.source})?$`, 'i'); +export const time = new RegExp(`^T?${timesplit.source}(?:${zonesplit.source})?((?:${annotation.source})*)$`, 'i'); // The short forms of YearMonth and MonthDay are only for the ISO calendar. // Non-ISO calendar YearMonth and MonthDay have to parse as a Temporal.PlainDate, diff --git a/polyfill/test/validStrings.mjs b/polyfill/test/validStrings.mjs index f3a9c3b661..f9b31d3146 100644 --- a/polyfill/test/validStrings.mjs +++ b/polyfill/test/validStrings.mjs @@ -116,6 +116,12 @@ class CharacterClass extends Choice { function character(str) { return new CharacterClass(str); } +function lcalpha() { + return new CharacterClass('abcdefghijklmnopqrstuvwxyz'); +} +function alpha() { + return new CharacterClass('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'); +} function digit() { return new CharacterClass('0123456789'); } @@ -152,6 +158,9 @@ class ZeroOrMore { return retval; } } +function zeroOrMore(production) { + return new ZeroOrMore(production); +} function oneOrMore(production) { return seq(production, new ZeroOrMore(production)); } @@ -196,6 +205,7 @@ const yearsDesignator = character('Yy'); const utcDesignator = withCode(character('Zz'), (data) => { data.z = 'Z'; }); +const annotationCriticalFlag = character('!'); const timeFractionalPart = between(1, 9, digit()); const fraction = seq(decimalSeparator, timeFractionalPart); @@ -266,7 +276,7 @@ const timeZoneIdentifier = withCode( choice(timeZoneUTCOffsetName, timeZoneIANAName), (data, result) => (data.ianaName = result) ); -const timeZoneBracketedAnnotation = seq('[', timeZoneIdentifier, ']'); +const timeZoneBracketedAnnotation = seq('[', [annotationCriticalFlag], timeZoneIdentifier, ']'); const timeZoneOffsetRequired = withCode(seq(timeZoneUTCOffset, [timeZoneBracketedAnnotation]), (data) => { if (!('offset' in data)) data.offset = undefined; }); @@ -274,8 +284,18 @@ const timeZoneNameRequired = withCode(seq([timeZoneUTCOffset], timeZoneBracketed if (!('offset' in data)) data.offset = undefined; }); const timeZone = choice(timeZoneOffsetRequired, timeZoneNameRequired); -const calendarName = withCode(choice(...calendarNames), (data, result) => (data.calendar = result)); -const calendar = seq('[u-ca=', calendarName, ']'); +const aKeyLeadingChar = choice(lcalpha(), character('_')); +const aKeyChar = choice(lcalpha(), digit(), character('_-')); +const aValChar = choice(alpha(), digit()); +const annotationKey = seq(aKeyLeadingChar, zeroOrMore(aKeyChar)); +const annotationValueComponent = oneOrMore(aValChar); +const annotationValue = seq(annotationValueComponent, zeroOrMore(seq('-', annotationValueComponent))); +const annotation = seq('[', /*[annotationCriticalFlag],*/ annotationKey, '=', annotationValue, ']'); +const calendarName = withCode(choice(...calendarNames), (data, result) => { + if (!data.calendar) data.calendar = result; +}); +const calendarAnnotation = seq('[', [annotationCriticalFlag], 'u-ca=', calendarName, ']'); +const annotations = oneOrMore(choice(calendarAnnotation, annotation)); const timeSpec = seq( timeHour, choice([':', timeMinute, [':', timeSecond, [timeFraction]]], seq(timeMinute, [timeSecond, [timeFraction]])) @@ -300,12 +320,12 @@ const date = withSyntaxConstraints( validateDayOfMonth ); const dateTime = seq(date, [timeSpecSeparator], [timeZone]); -const calendarDateTime = seq(dateTime, [calendar]); -const calendarDateTimeTimeRequired = seq(date, timeSpecSeparator, [timeZone], [calendar]); -const calendarTime = choice( - seq(timeDesignator, timeSpec, [timeZone], [calendar]), - seq(timeSpecWithOptionalTimeZoneNotAmbiguous, [calendar]) +const annotatedTime = choice( + seq(timeDesignator, timeSpec, [timeZone], [annotations]), + seq(timeSpecWithOptionalTimeZoneNotAmbiguous, [annotations]) ); +const annotatedDateTime = seq(dateTime, [annotations]); +const annotatedDateTimeTimeRequired = seq(date, timeSpecSeparator, [timeZone], [annotations]); const durationFractionalPart = withCode(between(1, 9, digit()), (data, result) => { const fraction = result.padEnd(9, '0'); @@ -359,19 +379,19 @@ const duration = seq( choice(durationDate, durationTime) ); -const instant = seq(date, [timeSpecSeparator], timeZoneOffsetRequired, [calendar]); -const zonedDateTime = seq(date, [timeSpecSeparator], timeZoneNameRequired, [calendar]); +const instant = seq(date, [timeSpecSeparator], timeZoneOffsetRequired, [annotations]); +const zonedDateTime = seq(date, [timeSpecSeparator], timeZoneNameRequired, [annotations]); // goal elements const goals = { Instant: instant, - Date: calendarDateTime, - DateTime: calendarDateTime, + Date: annotatedDateTime, + DateTime: annotatedDateTime, Duration: duration, - MonthDay: choice(dateSpecMonthDay, calendarDateTime), - Time: choice(calendarTime, calendarDateTimeTimeRequired), - TimeZone: choice(timeZoneIdentifier, seq(date, [timeSpecSeparator], timeZone, [calendar])), - YearMonth: choice(dateSpecYearMonth, calendarDateTime), + MonthDay: choice(dateSpecMonthDay, annotatedDateTime), + Time: choice(annotatedTime, annotatedDateTimeTimeRequired), + TimeZone: choice(timeZoneIdentifier, seq(date, [timeSpecSeparator], timeZone, [annotations])), + YearMonth: choice(dateSpecYearMonth, annotatedDateTime), ZonedDateTime: zonedDateTime }; diff --git a/spec/abstractops.html b/spec/abstractops.html index eafdf8f33e..3e8e5c771b 100644 --- a/spec/abstractops.html +++ b/spec/abstractops.html @@ -826,8 +826,12 @@

ISO 8601 grammar

  • Fractional parts may have 1 through 9 decimal places.
  • In time representations, only seconds are allowed to have a fractional part.
  • In duration representations, only hours, minutes, and seconds are allowed to have a fractional part.
  • -
  • The time zone may be given by a suffixed IANA time zone name in square brackets, instead of or in addition to a UTC offset.
  • -
  • The calendar may be given by a suffixed BCP 47 key in square brackets.
  • +
  • Any number of conforming suffixes in square brackets are allowed.
  • +
  • + Time zone and BCP 47 calendar suffixes are the only recognized ones. + Others are ignored, unless they are marked with a *!*, in which case they are rejected. +
  • +
  • A time zone suffix may be instead of or in addition to a UTC offset.
  • A space may be used to separate the date and time in a combined date / time representation, but not in a duration.
  • Alphabetic designators may be in lower or upper case.
  • Period or comma may be used as the decimal separator.
  • @@ -852,6 +856,10 @@

    ISO 8601 grammar

    `a` `b` `c` `d` `e` `f` `g` `h` `i` `j` `k` `l` `m` `n` `o` `p` `q` `r` `s` `t` `u` `v` `w` `x` `y` `z` + LowercaseAlpha : one of + `a` `b` `c` `d` `e` `f` `g` `h` `i` `j` `k` `l` `m` + `n` `o` `p` `q` `r` `s` `t` `u` `v` `w` `x` `y` `z` + ASCIISign : one of `+` `-` @@ -921,6 +929,9 @@

    ISO 8601 grammar

    UTCDesignator : one of `Z` `z` + AnnotationCriticalFlag : + `!` + DateFourDigitYear : DecimalDigit DecimalDigit DecimalDigit DecimalDigit @@ -1060,7 +1071,7 @@

    ISO 8601 grammar

    TimeZoneUTCOffsetName TimeZoneBracketedAnnotation : - `[` TimeZoneIdentifier `]` + `[` AnnotationCriticalFlag? TimeZoneIdentifier `]` TimeZoneOffsetRequired : TimeZoneUTCOffset TimeZoneBracketedAnnotation? @@ -1072,22 +1083,40 @@

    ISO 8601 grammar

    TimeZoneUTCOffset TimeZoneBracketedAnnotation? TimeZoneBracketedAnnotation - CalChar : + AKeyLeadingChar : + LowercaseAlpha + `_` + + AKeyChar : + AKeyLeadingChar + DecimalDigit + `-` + + AValChar : Alpha DecimalDigit - CalendarNameComponent : - CalChar CalChar CalChar CalChar? CalChar? CalChar? CalChar? CalChar? + AnnotationKeyTail : + AKeyChar AnnotationKeyTail? + + AnnotationKey : + AKeyLeadingChar AnnotationKeyTail? - CalendarNameTail : - CalendarNameComponent - CalendarNameComponent `-` CalendarNameTail + AnnotationValueComponent : + AValChar AnnotationValueComponent? - CalendarName : - CalendarNameTail + AnnotationValueTail : + AnnotationValueComponent + AnnotationValueComponent `-` AnnotationValueTail - Calendar : - `[u-ca=` CalendarName `]` + AnnotationValue : + AnnotationValueTail + + Annotation : + `[` AnnotationCriticalFlag? AnnotationKey `=` AnnotationValue `]` + + Annotations : + Annotation Annotations? TimeSpec : TimeHour @@ -1105,15 +1134,15 @@

    ISO 8601 grammar

    DateTime : Date TimeSpecSeparator? TimeZone? - CalendarTime : - TimeDesignator TimeSpec TimeZone? Calendar? - TimeSpecWithOptionalTimeZoneNotAmbiguous Calendar? + AnnotatedTime : + TimeDesignator TimeSpec TimeZone? Annotations? + TimeSpecWithOptionalTimeZoneNotAmbiguous Annotations? - CalendarDateTime: - DateTime Calendar? + AnnotatedDateTime: + DateTime Annotations? - CalendarDateTimeTimeRequired : - Date TimeSpecSeparator TimeZone? Calendar? + AnnotatedDateTimeTimeRequired : + Date TimeSpecSeparator TimeZone? Annotations? DurationWholeSeconds : DecimalDigits[~Sep] @@ -1186,34 +1215,34 @@

    ISO 8601 grammar

    Sign? DurationDesignator DurationTime TemporalInstantString : - Date TimeSpecSeparator? TimeZoneOffsetRequired Calendar? + Date TimeSpecSeparator? TimeZoneOffsetRequired Annotations? TemporalDateTimeString : - CalendarDateTime + AnnotatedDateTime TemporalDurationString : Duration TemporalMonthDayString : DateSpecMonthDay - CalendarDateTime + AnnotatedDateTime TemporalTimeString : - CalendarTime - CalendarDateTimeTimeRequired + AnnotatedTime + AnnotatedDateTimeTimeRequired TemporalYearMonthString : DateSpecYearMonth - CalendarDateTime + AnnotatedDateTime TemporalZonedDateTimeString : - Date TimeSpecSeparator? TimeZoneNameRequired Calendar? + Date TimeSpecSeparator? TimeZoneNameRequired Annotations? TemporalCalendarString : CalendarName TemporalInstantString - CalendarDateTime - CalendarTime + AnnotatedDateTime + AnnotatedTime DateSpecYearMonth DateSpecMonthDay @@ -1240,7 +1269,7 @@

    1. For each nonterminal _goal_ of « |TemporalDateTimeString|, |TemporalInstantString|, |TemporalMonthDayString|, |TemporalTimeString|, |TemporalYearMonthString|, |TemporalZonedDateTimeString| », do 1. If _parseResult_ is not a Parse Node, set _parseResult_ to ParseText(StringToCodePoints(_isoString_), _goal_). 1. If _parseResult_ is not a Parse Node, throw a *RangeError* exception. - 1. Let each of _year_, _month_, _day_, _hour_, _minute_, _second_, _fSeconds_, and _calendar_ be the source text matched by the respective |DateYear|, |DateMonth|, |DateDay|, |TimeHour|, |TimeMinute|, |TimeSecond|, |TimeFraction|, and |CalendarName| Parse Node contained within _parseResult_, or an empty sequence of code points if not present. + 1. Let each of _year_, _month_, _day_, _hour_, _minute_, _second_, and _fSeconds_ be the source text matched by the respective |DateYear|, |DateMonth|, |DateDay|, |TimeHour|, |TimeMinute|, |TimeSecond|, and |TimeFraction| Parse Node contained within _parseResult_, or an empty sequence of code points if not present. 1. If the first code point of _year_ is U+2212 (MINUS SIGN), replace the first code point with U+002D (HYPHEN-MINUS). 1. Let _yearMV_ be ! ToIntegerOrInfinity(CodePointsToString(_year_)). 1. If _month_ is empty, then @@ -1281,10 +1310,15 @@

    1. If _parseResult_ contains a |TimeZoneNumericUTCOffset| Parse Node, then 1. Let _offset_ be the source text matched by the |TimeZoneNumericUTCOffset| Parse Node contained within _parseResult_. 1. Set _timeZoneResult_.[[OffsetString]] to CodePointsToString(_offset_). - 1. If _calendar_ is empty, then - 1. Let _calendarVal_ be *undefined*. - 1. Else, - 1. Let _calendarVal_ be CodePointsToString(_calendar_). + 1. Let _calendar_ be *undefined*. + 1. For each |Annotation| Parse Node _annotation_ contained within _parseResult_, do + 1. Let _key_ be the source text matched by the |AnnotationKey| Parse Node contained within _annotation_. + 1. If CodePointsToString(_key_) is *"u-ca"*, then + 1. If _calendar_ is *undefined*, then + 1. Let _value_ be the source text matched by the |AnnotationValue| Parse Node contained within _annotation_. + 1. Let _calendar_ be CodePointsToString(_value_). + 1. Else, + 1. If _annotation_ contains an |AnnotationCriticalFlag| Parse Node, throw a *RangeError* exception. 1. Return the Record { [[Year]]: _yearMV_, [[Month]]: _monthMV_, @@ -1296,7 +1330,7 @@

    [[Microsecond]]: _microsecondMV_, [[Nanosecond]]: _nanosecondMV_, [[TimeZone]]: _timeZoneResult_, - [[Calendar]]: _calendarVal_, + [[Calendar]]: _calendar_ }.