diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Chinese/DateTimeDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Chinese/DateTimeDefinitions.cs index 6976bf1dd5..bfcd4ceeed 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/Chinese/DateTimeDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Chinese/DateTimeDefinitions.cs @@ -60,8 +60,8 @@ public static class DateTimeDefinitions public static readonly string SpecialDate = $@"(?({ThisPrefixRegex}|{LastPrefixRegex}|{NextPrefixRegex})年)?(?({ThisPrefixRegex}|{LastPrefixRegex}|{NextPrefixRegex})月)?{DateDayRegexInCJK}"; public const string DateUnitRegex = @"(?年|个月|周|週|日|天)"; public const string BeforeRegex = @"以前|之前|前"; - public const string AfterRegex = @"以后|以後|之后|之後|后|後"; - public const string TimePeriodLeftRegex = @"^[.]"; + public const string AfterRegex = @"以后|以後|之后|之後|后|後|还剩"; + public const string TimePeriodLeftRegex = @"还剩"; public static readonly string DateRegexList1 = $@"({LunarRegex}(\s*))?((({SimpleYearRegex}|{DateYearInCJKRegex})年)(\s*))?{MonthRegex}(\s*){DateDayRegexInCJK}((\s*|,|,){WeekDayRegex})?"; public static readonly string DateRegexList2 = $@"((({SimpleYearRegex}|{DateYearInCJKRegex})年)(\s*))?({LunarRegex}(\s*))?{MonthRegex}(\s*){DateDayRegexInCJK}((\s*|,|,){WeekDayRegex})?"; public static readonly string DateRegexList3 = $@"((({SimpleYearRegex}|{DateYearInCJKRegex})年)(\s*))?({LunarRegex}(\s*))?{MonthRegex}(\s*)({DayRegexNumInCJK}|{DayRegex})((\s*|,|,){WeekDayRegex})?"; diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Dutch/DateTimeDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Dutch/DateTimeDefinitions.cs index f9b127ca8f..7a622c197b 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/Dutch/DateTimeDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Dutch/DateTimeDefinitions.cs @@ -28,10 +28,10 @@ public static class DateTimeDefinitions public const string ArticleRegex = @"\b(de|het|een)\b"; public const string ApostrofRegex = @"(’|‘|'|ʼ)"; public static readonly string ApostrofsRegex = $@"({ApostrofRegex}\s*s)"; - public const string RelativeRegex = @"\b(?((dit|deze|volgende?|(aan)?komende?|aanstaande?|huidige?|vorige?|verleden|voorgaande?|laatste|afgelopen|(op\s+)?de|het)\b)|gister(en)?)"; - public const string StrictRelativeRegex = @"\b(?((dit|deze|volgende?|(aan)?komende?|aanstaande?|huidige?|vorige?|verleden|voorgaande?|laatste|afgelopen)\b)|gister(en)?)"; + public const string RelativeRegex = @"\b(?((dit|deze|(erop)?volgende?|(aan)?komende?|aanstaande?|huidige?|vorige?|verleden|voorgaande?|laatste|afgelopen|(op\s+)?de|het)\b)|gister(en)?)"; + public const string StrictRelativeRegex = @"\b(?((dit|deze|(erop)?volgende?|(aan)?komende?|aanstaande?|huidige?|vorige?|verleden|voorgaande?|laatste|afgelopen)\b)|gister(en)?)"; public const string UpcomingPrefixRegex = @"((deze\s+)?((aan)?komende?|aanstaande?))"; - public static readonly string NextPrefixRegex = $@"\b(volgende?|eerstvolgende|{UpcomingPrefixRegex})\b"; + public static readonly string NextPrefixRegex = $@"\b((erop)?volgende?|eerstvolgende|{UpcomingPrefixRegex})\b"; public const string AfterNextSuffixRegex = @"\b((na\s+(afloop\s+van\s+)?((de|het)\s+)?volgende?)|over)\b"; public const string PastPrefixRegex = @"((deze\s+)?(verleden|afgelopen))\b"; public static readonly string PreviousPrefixRegex = $@"((voorgaand[e]|vorige?|verleden|laatste|{PastPrefixRegex})\b|gister(en)?)"; @@ -41,10 +41,10 @@ public static class DateTimeDefinitions public const string ReferencePrefixRegex = @"(dezelfde|hetzelfde|dat(zelfde)?|die|overeenkomstige)\b"; public const string FutureSuffixRegex = @"\b(((in\s+de\s+)?toekomst)|daarna|over|na)\b"; public const string PastSuffixRegex = @"^\b$"; - public const string DayRegex = @"(de\s*)?(?(?:3[0-1]|[1-2]\d|0?[1-9]))(?:ste|de|e)?(?=\b|t)"; + public const string DayRegex = @"(de\s*)?(?(?:3[0-1]|[1-2]\d|0?[1-9]))(?:\s*(ste|de|e))?(?=\b|t)"; public static readonly string WrittenDayRegex = $@"(?({WrittenOneToNineRegex})|({WrittenElevenToNineteenRegex})|(({WrittenOneToNineRegex}(en|ën))?twintig)|(((één|een)(en|ën))?dertig))"; public static readonly string WrittenCardinalDayRegex = $@"(?<=((de\s+)|\b))(?(éérste|eerste|tweede|derde|vierde|vijfde|zesde|zevende|achtste|negende|tiende|{WrittenElevenToNineteenRegex}de|({WrittenOneToNineRegex}(en|ën))?twintigste|((één|een)(en|ën))?dertigste))"; - public const string ImplicitDayRegex = @"(de\s*)?(?(3[0-1]|[0-2]?\d)(ste|e|de))\b"; + public const string ImplicitDayRegex = @"(de\s*)?(?(3[0-1]|[0-2]?\d)(\s*(ste|de|e)))\b"; public const string MonthNumRegex = @"\b(?01|02|03|04|05|06|07|08|09|10|11|12|1|2|3|4|5|6|7|8|9)\b"; public const string WrittenOneToNineRegex = @"(één|een|twee|drie|vier|vijf|zes|zeven|acht|negen)"; public const string WrittenElevenToNineteenRegex = @"(elf|elven|twaalf|dertien|veertien|vijftien|zestien|zeventien|achttien|negentien)"; @@ -62,7 +62,7 @@ public static class DateTimeDefinitions public static readonly string AmPmDescRegex = $@"(:?{BaseDateTime.BaseAmPmDescRegex})"; public static readonly string DescRegex = $@"(:?(:?({OclockRegex}\s+)?(?({AmPmDescRegex}|{AmDescRegex}|{PmDescRegex}|{SpecialDescRegex}))\.?)|{OclockRegex})"; public static readonly string PmRegex = $@"(?({ApostrofsRegex}|des)\s+(\bmiddags|avonds|nachts)|((in|tegen|op|om|met)\s+(de\s+)?)(((na)?middag|avond|(midder)?nacht|lunchtijd))|dag)"; - public static readonly string PmRegexFull = $@"(?(({ApostrofsRegex}|des)\s+(\bmiddags|avonds|nachts)|((in|tegen|op|om|met)\s+(de\s+)?)?(((na)?middag|avond|(midder)?nacht|lunchtijd))))"; + public static readonly string PmRegexFull = $@"(?(({ApostrofsRegex}|des)\s+(\bmiddags|avonds|nachts)|((in|tegen|op|om|met)\s+(de\s+)?)?(((na)?middag|(?(({ApostrofsRegex}|des)\s+(ochtends|morgens)|((in|tegen|op)\s+de)(\s+(ochtend|morgen))|(?<=gisteren|morgen|vandaag|(maan|dins|woens|donder|vrij|zater|zon)dag)(ochtend|morgen)|^?ochtend))"; public static readonly string FullDescRegex = $@"({DescRegex}|{AmRegex}|{PmRegexFull})"; public static readonly string TwoDigitYearRegex = $@"\b(?([0-24-9]\d))(?!(\s*(([:\.]\d)|keer|uurs?|{AmDescRegex}|{PmDescRegex})))\b"; @@ -72,7 +72,7 @@ public static class DateTimeDefinitions public static readonly string RelativeMonthRegex = $@"(?((van\s+)?(de\s+)?)?{RelativeRegex}\s+maand)\b"; public const string WrittenMonthRegex = @"(((de\s+)?maand\s+)?(?januari|februari|maart|april|mei|juni|juli|augustus|september|oktober|november|december|jan|feb|mar|mrt|apr|jun|jul|aug|sep|sept|oct|okt|nov|dec))"; public static readonly string MonthSuffixRegex = $@"(?((in|van|tijdens|sinds|tot|op)\s+)?({RelativeMonthRegex}|{WrittenMonthRegex}))"; - public const string DateUnitRegex = @"(?(eeuw|maand)(?en)?|jaar|(?jaren|weken)|jr|decennia|mnd|week|(?(werk))?dag(?en)?|dgn)\b"; + public const string DateUnitRegex = @"(?(eeuw|maand|weekend)(?en)?|jaar|(?jaren|weken)|jr|decennia|mnd|week|(?we[er]k)?dag(?en)?|dgn)\b"; public const string DateTokenPrefix = @"op "; public const string TimeTokenPrefix = @"om "; public const string TokenBeforeDate = @"op "; @@ -91,7 +91,7 @@ public static class DateTimeDefinitions public static readonly string BetweenRegex = $@"\b(tussen\s+)({DayRegex})\s*{RangeConnectorRegex}\s*({DayRegex})\s+{MonthSuffixRegex}((\s+|\s*,\s*){YearRegex})?\b"; public static readonly string RelativeYearRegex = $@"({YearRegex}|(?volgende?|komende?|aanstaande?|aankomende?|huidige?|vorige?|afgelopen|dit)\s+jaar)"; public static readonly string MonthWithYear = $@"\b(({WrittenMonthRegex}(\.)?(\s*)[/\\\-\.,]?(\s+(van|over|in))?(\s*){RelativeYearRegex})|({RelativeYearRegex}(\s*),?(\s*){WrittenMonthRegex}))\b"; - public static readonly string OneWordPeriodRegex = $@"\b((((de\s+)?maand\s+(van\s+)?)?({StrictRelativeRegex}\s+)?(?januari|februari|maart|april|mei|juni|juli|augustus|september|oktober|november|december|jan\.?|feb\.?|mar\.?|mrt\.?|apr\.?|jun\.?|jul\.?|aug\.?|sep\.?|sept\.?|oct\.?|okt\.?|nov\.?|dec\.?))|(maand|jaar)\s+tot(\s+op)?\s+heden|(({RelativeRegex}\s+)(mijn\s+)?(weekend|week|maand|jaar(?!\s+hoger dan))|({RelativeRegex}\s+)?(mijn\s+)(weekend|week|maand|jaar))(?!((\s+van)?\s+\d+|\s+tot(\s+op)?\s+heden|nu))(\s+{AfterNextSuffixRegex})?)\b"; + public static readonly string OneWordPeriodRegex = $@"\b((((de\s+)?maand\s+(van\s+)?)?({StrictRelativeRegex}\s+)?(?januari|februari|maart|april|mei|juni|juli|augustus|september|oktober|november|december|jan\.?|feb\.?|mar\.?|mrt\.?|apr\.?|jun\.?|jul\.?|aug\.?|sep\.?|sept\.?|oct\.?|okt\.?|nov\.?|dec\.?))|(maand|jaar)\s+tot(\s+op)?\s+heden|(({RelativeRegex}\s+)(mijn\s+)?(weekend|(?werkweek)|week|maand|jaar(?!\s+hoger dan))|({RelativeRegex}\s+)?(mijn\s+)(weekend|(?werkweek)|week|maand|jaar))(?!((\s+van)?\s+\d+|\s+tot(\s+op)?\s+heden|nu))(\s+{AfterNextSuffixRegex})?)\b"; public static readonly string MonthNumWithYear = $@"\b(({BaseDateTime.FourDigitYearRegex}(\s*)[/\-\.](\s*){MonthNumRegex})|({MonthNumRegex}(\s*)[/\-](\s*){BaseDateTime.FourDigitYearRegex}))\b"; public static readonly string WeekOfMonthRegex = $@"\b(?(de\s+)?(?eerste|tweede|derde|vierde|vijfde|1e|1ste|2e|2de|3e|3de|4e|4de|5e|5de|laatste)\s+week\s+{MonthSuffixRegex}(\s+{BaseDateTime.FourDigitYearRegex}|{RelativeRegex}\s+year)?)\b"; public static readonly string WeekOfYearRegex = $@"(\b(?(de\s+)?(?eerste|tweede|derde|vierde|vijfde|1e|1ste|2e|2de|3e|3de|4e|4de|5e|5de|laatste)\s+week(\s+van)?\s+({YearRegex}|{RelativeRegex}\s+jaar))\b)|(\b({YearRegex}|{RelativeRegex}\s+jaar)\s(?(de\s+)?(?eerste|tweede|derde|vierde|vijfde|1e|1ste|2e|2de|3e|3de|4e|4de|5e|5de|laatste)\s+week)\b)"; @@ -107,29 +107,29 @@ public static class DateTimeDefinitions public static readonly string HalfYearBackRegex = $@"(het\s+)?(H(?[1-2])|({HalfYearTermRegex}))(\s+van|\s*,\s*)?\s+({YearRegex})"; public static readonly string HalfYearRelativeRegex = $@"(het\s+)?{HalfYearTermRegex}(\s+van|\s*,\s*)?\s+({RelativeRegex}\s+jaar)"; public static readonly string AllHalfYearRegex = $@"({HalfYearFrontRegex})|({HalfYearBackRegex})|({HalfYearRelativeRegex})"; - public const string EarlyPrefixRegex = @"\b(?((?eerder)|vroeg(er)?|begin(nend)?|start(end)?)(\s+(in|op|van)(\s+de)?)?)\b"; - public const string MidPrefixRegex = @"\b(?(mid(den|-)?|halverwege|op\s+de\s+helft|half)(\s+(in|op|van)(\s+de)?)?)"; - public const string LaterPrefixRegex = @"\b(?(laat|(?later)|aan\s+het\s+einde?(\s+van(\s+de)?)?|eind(igend)?|afsluitend)(\s+(in|op|van)(\s+de)?)?)\b"; + public const string EarlyPrefixRegex = @"\b(?((?eerder)|vroeg(er)?|((de|het)\s+)?(begin(nend)?|start(end)?))(\s+(in|op|van)(\s+de)?)?)\b"; + public const string MidPrefixRegex = @"\b(?(het\s+)?(mid(den|-)?|halverwege|op\s+de\s+helft|half)(\s+(in|op|van)(\s+de)?)?)"; + public const string LaterPrefixRegex = @"\b(?(laat|(?later)|(aan\s+)?het\s+einde?(\s+van(\s+de)?)?|eind(e|igend)?|afsluitend)(\s+(in|op|van)(\s+de)?)?)\b"; public static readonly string PrefixPeriodRegex = $@"({EarlyPrefixRegex}|{MidPrefixRegex}|{LaterPrefixRegex})"; public static readonly string PrefixDayRegex = $@"\b(((?eerder|vroeg(er)?|begin|start)|(?midden|halverwege|op\s+de\s+helft)|in\s+de|(?laat|later))(\s+(in|op|van))?(\s+de\s+dag)?$)|^\s*(((?eerder|vroeg(er)?|begin|start)|(?midden|halverwege|op\s+de\s+helft)|in\s+de|(?laat|later))(\s+(in|op|van))(\s+de\s+dag))\b"; public const string SeasonDescRegex = @"(?lente|voorjaar|zomer|herfst|najaar|winter)"; public static readonly string SeasonRegex = $@"\b(?({PrefixPeriodRegex}(\s+)?)?({ArticleRegex}\s+)?({RelativeRegex}\s+)?{SeasonDescRegex}((\s+(in|van)|\s*,\s*)?\s+({YearRegex}|({ArticleRegex}\s+)?({RelativeRegex}\s+)?jaar))?)\b"; public const string WhichWeekRegex = @"\b(week)(\s*)(?5[0-3]|[1-4]\d|0?[1-9])\b"; - public const string WeekOfRegex = @"(de\s+)?(week)(\s+van)(\s+de|het)?"; + public const string WeekOfRegex = @"(de\s+)?(week)\s+(van(\s+(de|het))?|(beginnend|die\s+begint|startend|aanvangend)(\s+op)?)"; public const string MonthOfRegex = @"(maand)(\s*)(van)"; public const string MonthRegex = @"\b(?(januari|februari|maart|april|mei|juni|juli|augustus|september|oktober|november|december)\b|(jan|feb|mar|mrt|apr|jun|jul|aug|sept|sep|oct|okt|nov|dec)(?:\.|\b))"; public static readonly string DateYearRegex = $@"(?{BaseDateTime.FourDigitYearRegex}|{TwoDigitYearRegex})"; public static readonly string YearSuffix = $@"((,|\s*van)?\s*({DateYearRegex}|{FullTextYearRegex}))"; public static readonly string OnRegex = $@"(?<=\bop\s+)({DayRegex})\b(?!(\.|:)\d+)"; - public const string RelaxedOnRegex = @"\b(?<=op\s+)(?:de\s+)?(?(?:3[0-1]|[1-2]\d|0?[1-9]))(?:(ste|de|e))?\b(?!(\.|:)\d+)"; + public const string RelaxedOnRegex = @"\b(?<=op\s+)(?:de\s+)?(?(?:3[0-1]|[1-2]\d|0?[1-9]))(?:\s*(ste|de|e))?\b(?!(\.|:)\d+)"; public const string PrefixWeekDayRegex = @"(\s*((,?\s*op)|[-—–]))"; public static readonly string ThisRegex = $@"\b((deze(\s+week{PrefixWeekDayRegex}?)?\s*){WeekDayRegex})|({WeekDayRegex}((\s+van)?\s*deze\s+week))\b"; - public static readonly string LastDateRegex = $@"\b({PreviousPrefixRegex}(\s*week{PrefixWeekDayRegex}?)?\s+{WeekDayRegex})|({WeekDayRegex}(\s+vorige\s+week))\b"; + public static readonly string LastDateRegex = $@"\b({PreviousPrefixRegex}(\s*week{PrefixWeekDayRegex}?)?\s+{WeekDayRegex})|({WeekDayRegex}(\s+van)?(\s+vorige\s+week))\b"; public const string WeekDayForNextDateRegex = @"\b(?((ma|di(ns)?|wo(e(ns)?)?|do|vr(ij)?|za(t)?|zo)(\.|\b))|((?:maan(?!den)|dins|woens|donder|vrij|zater|zon)(dag)?))"; public static readonly string NextDateRegex1 = $@"\b({NextPrefixRegex}(\s*week{PrefixWeekDayRegex}?)?\s+{WeekDayForNextDateRegex}|(op\s+)?{WeekDayForNextDateRegex}\s+((van\s+)?(de\s+)?{NextPrefixRegex})\s*week|(op\s+)?{NextPrefixRegex}\s*week\s+{WeekDayForNextDateRegex})"; public static readonly string NextDateRegex2 = $@"\b({NextPrefixRegex}(\s*week(\s*,?\s*op)?)?\s+{WeekDayRegex}|(op\s+)?{WeekDayRegex}\s+((van\s+)?(de\s+)?{NextPrefixRegex})\s*week|(op\s+)?{NextPrefixRegex}\s*week\s+{WeekDayRegex})"; public static readonly string NextDateRegex = $@"({NextDateRegex1}|{NextDateRegex2})"; - public static readonly string SpecialDayRegex = $@"\b(eergisteren|overmorgen|(de\s+)?dag\s+na\s+morgen|(de\s+)?dag\s+(ervoor|erna)|((de\s+)?({RelativeRegex}|mijn)\s+dag)\b|gisteren|(deze\s+)?morgen|vandaag|morgen(middag))(?!s\b)"; + public static readonly string SpecialDayRegex = $@"\b(eergisteren|overmorgen|(de\s+)?dag\s+na\s+morgen|(de\s+)?dag\s+(ervoor|erna)|((de\s+)?({StrictRelativeRegex}|mijn)\s+dag)\b|(de\s+dag(?!\s+van))|gisteren|(deze\s+)?morgen|vandaag|morgen(middag))(?!s\b)"; public static readonly string SpecialDayWithNumRegex = $@"\b((?{WrittenNumRegex})\s+dag(en)?\s+(gerekend\s+)?(vanaf\s+)(?gisteren|morgen|vandaag))\b"; public static readonly string RelativeDayRegex = $@"\b(((de\s+)?{RelativeRegex}\s+dag))\b"; public const string SetWeekDayRegex = @"\b(?op\s+({ArticleRegex}\s+)?)?(?morgen|ochtend|middag|avond|nacht|zondag|maandag|dinsdag|woensdag|donderdag|vrijdag|zaterdag)((?e)n)\b"; @@ -137,9 +137,9 @@ public static class DateTimeDefinitions public static readonly string RelativeWeekDayRegex = $@"\b({WrittenNumRegex}\s+{WeekDayRegex}\s+(vanaf\s+nu|later))\b"; public static readonly string SpecialDate = $@"(?=\b(op\s+)(de\s+)?){DayRegex}\b"; public const string DatePreposition = @"\b(op(\s+de)?)"; - public static readonly string DateExtractorYearTermRegex = $@"(\s+|\s*[,./-]\s*){DateYearRegex}"; - public static readonly string DateExtractor1 = $@"\b({WeekDayRegex}\s*[,-]?\s*)?(({MonthRegex}(\.)?\s*[/\\.,-]?\s*{DayRegex}(?!\s*{MonthRegex}))|(\({MonthRegex}\s*[-.]\s*{DayRegex}\))|({DayRegex}(\.)?\s*[/\\.,-]?\s*{MonthRegex}))(\s*\(\s*{WeekDayRegex}\s*\))?({DateExtractorYearTermRegex}(?!\s*{MonthRegex})\b)?"; - public static readonly string DateExtractor3 = $@"\b({WeekDayRegex}(\s+|\s*,\s*)?(de\s+)?)?(({DayRegex}(\s*dag|\.)?)((\s+|\s*[,/-]\s*|\s+van\s+)?{MonthRegex})((\.)?(\s+|\s*[,/-]\s*|\s+in\s+)?{DateYearRegex})?|{BaseDateTime.FourDigitYearRegex}\s*[,./-]?\s*(de\s*)?(?(?:3[0-1]|[1-2]\d|0?[1-9]))(?:ste|de|e)?(\s*dag|\.)?\s*[,./-]?\s*{MonthRegex})\b"; + public static readonly string DateExtractorYearTermRegex = $@"(\s+(van\s+)?|\s*[,./-]\s*){DateYearRegex}"; + public static readonly string DateExtractor1 = $@"\b({WeekDayRegex}\s*[,-]?\s*)?(({MonthRegex}(\.)?\s*[/\\.,-]?\s*{DayRegex}(?!\s*({MonthRegex}|\-\s*\d{{2}}\b)))|(\({MonthRegex}\s*[-.]\s*{DayRegex}\))|({DayRegex}(\.)?\s*[/\\.,-]?\s*{MonthRegex}))(\s*\(\s*{WeekDayRegex}\s*\))?({DateExtractorYearTermRegex}(?!\s*{MonthRegex})\b)?"; + public static readonly string DateExtractor3 = $@"\b({WeekDayRegex}(\s+|\s*,\s*)?(de\s+)?)?(({DayRegex}(\s*dag|\.)?)((\s+|\s*[,/-]\s*|\s+van\s+)?{MonthRegex})((\.)?(\s+|\s*[,/-]\s*|\s+in\s+)?{DateYearRegex})?|{BaseDateTime.FourDigitYearRegex}\s*[,./-]?\s*(de\s*)?(?(?:3[0-1]|[1-2]\d|0?[1-9]))(?:\s*(ste|de|e))?(\s*dag|\.)?\s*[,./-]?\s*{MonthRegex})\b"; public static readonly string DateExtractor4 = $@"\b{MonthNumRegex}\s*[/\\\-]\s*{DayRegex}[\.]?\s*[/\\\-]\s*{ApostrofRegex}?{DateYearRegex}"; public static readonly string DateExtractor5 = $@"\b{DayRegex}\s*[/\\\-\.]\s*({MonthNumRegex}|{MonthRegex})\s*[/\\\-\.]\s*{DateYearRegex}(?!\s*[/\\\-\.]\s*\d+)"; public static readonly string DateExtractor6 = $@"(?<={DatePreposition}\s+)({StrictRelativeRegex}\s+)?({WeekDayRegex}\s+)?{MonthNumRegex}[\.]{DayRegex}(?!([%]|\s*{FullDescRegex}))\b|(?<={DatePreposition}\s+){MonthNumRegex}[\-\.]{DayRegex}(?!([%]|\s*{FullDescRegex}))\b"; @@ -148,7 +148,7 @@ public static class DateTimeDefinitions public static readonly string DateExtractor8 = $@"\b((?<=(^|{DatePreposition}\s+)){WeekDayRegex}\s+)?(?(?:3[0-1]|[1-2]\d|0?[1-9]))(?:ste|de|e)?|{DayRegex}\s*[/\\\-\.]\s*{BaseDateTime.FourDigitYearRegex}\s*[/\\\-\.]\s*{MonthRegex})"; + public static readonly string DateExtractorA = $@"\b({WeekDayRegex}\s+)?({BaseDateTime.FourDigitYearRegex}\s*[/\\\-\.]\s*({MonthNumRegex}|{MonthRegex})\s*[/\\\-\.]\s*{DayRegex}(?!\s*[/\\\-\.]\s*\d+)|{MonthRegex}\s*[/\\\-\.]\s*{BaseDateTime.FourDigitYearRegex}\s*[/\\\-\.]\s*(de\s*)?(?(?:3[0-1]|[1-2]\d|0?[1-9]))(?:\s*(ste|de|e))?|{DayRegex}\s*[/\\\-\.]\s*{BaseDateTime.FourDigitYearRegex}\s*[/\\\-\.]\s*{MonthRegex})"; public static readonly string OfMonth = $@"(^\s*((van|in)\s+)?)({MonthRegex})"; public static readonly string MonthEnd = $@"{MonthRegex}(\s+de\s*)?$"; public static readonly string WeekDayEnd = $@"(deze\s+)?{WeekDayRegex}\s*,?\s*$"; @@ -220,7 +220,8 @@ public static class DateTimeDefinitions public static readonly string DateTimeSpecificTimeOfDayRegex = $@"\b(({RelativeRegex}\s+{DateTimeTimeOfDayRegex})|van(nacht|avond|middag|ochtend|morgen))\b"; public static readonly string TimeOfTodayAfterRegex = $@"^\s*(,\s*)?((in\s+de)|(op\s+de))?{DateTimeSpecificTimeOfDayRegex}"; public static readonly string TimeOfTodayBeforeRegex = $@"{DateTimeSpecificTimeOfDayRegex}(\s*,)?(\s+(om|rond|tegen|op\s+de|op))?\s*$"; - public static readonly string SimpleTimeOfTodayAfterRegex = $@"\b({HourNumRegex}|{BaseDateTime.HourRegex})(\s*({OclockRegex}|u))?\s*(,\s*)?((in|op)\s+de\s+)?{DateTimeSpecificTimeOfDayRegex}"; + public const string NonTimeContextTokens = @"\b(gebouw)"; + public static readonly string SimpleTimeOfTodayAfterRegex = $@"(?ochtend|(na)?middag|avond|nacht))\b"; public static readonly string PeriodSpecificTimeOfDayRegex = $@"\b(({StrictRelativeRegex}(\s+)?{PeriodTimeOfDayRegex})\b|\bvan(nacht|avond|(na)?middag|ochtend))\b"; public static readonly string PeriodTimeOfDayWithDateRegex = $@"(({TimeOfDayRegex}(\s+(om|rond|van|tegen|op(\s+de)?))?))\b"; - public static readonly string PeriodTimeOfDayWithDateRegexWithAnchors = $@"((({TimeOfDayRegex}(\s+(om|rond|van|tegen|op(\s+de)?))?))(?=({MiddlePauseRegex})?\s*$)|(?<=^\s*({MiddlePauseRegex})?){TimeOfDayRegex})"; + public static readonly string PeriodTimeOfDayWithDateRegexWithAnchors = $@"((({TimeOfDayRegex}(\s+(om|rond|van|tegen|op(\s+de)?))?))(?=({MiddlePauseRegex})?\s*$)|(?<=^\s*({MiddlePauseRegex})?)(?!{MealTimeRegex}){TimeOfDayRegex})"; public const string LessThanRegex = @"\b((binnen\s+)?minder\s+dan)\b"; public const string MoreThanRegex = @"\b((meer|langer)\s+dan|ruim)\b"; - public static readonly string DurationUnitRegex = $@"(?{DateUnitRegex}|(min\.|sec\.)|((?halfuur)|(?kwartier\s+uur)|(?kwartier)|uur|uren|u|minuten|minuut|mins|min|m|secondes|seconden|secs|sec|s|nacht(en)?)\b)(\s+lang\b)?"; + public static readonly string DurationUnitRegex = $@"(?{DateUnitRegex}|(min\.|sec\.)|((?halfuur)|(?kwartier\s+uur)|(?kwartier)|uur|uren|u|minuten|minuut|m(ins?)?|seconde[ns]?|s(ecs?)?|nacht(en)?)\b)(\s+lang\b)?"; public const string SuffixAndRegex = @"(?\s*(en|ën)(\s*een)?\s*(?hal(f|ve)|kwart|kwartier)|(?(een\s+)?kwartier))"; - public const string PeriodicRegex = @"\b(?dagelijkse?|(drie)?maandelijkse?|wekelijkse?|twee-?wekelijkse?|jaarlijkse?|kwartaal)\b"; - public static readonly string EachUnitRegex = $@"(?((iedere|elke|eenmaal per)(?\s+andere)?\s*{DurationUnitRegex})|(({DurationUnitRegex}|{WeekDayRegex})\s+om(\s+de)?(?\s+andere)?\s*(week|{DurationUnitRegex})))"; + public const string PeriodicRegex = @"\b(?dagelijkse?|(drie)?maandelijkse?|wekelijkse?|twee-?wekelijkse?|(half)?jaarlijkse?|kwartaal)\b"; + public static readonly string EachUnitRegex = $@"(?((iedere?|elke?|eenmaal per)(?\s+andere)?\s*({DurationUnitRegex}|(?weekend(en)?))|({DurationUnitRegex}|{WeekDayRegex})\s+om(\s+de)?(?\s+andere)?\s*(week|{DurationUnitRegex})))"; public const string EachPrefixRegex = @"\b(?(iedere|elke|eenmaal per)\s*$)"; - public const string SetEachRegex = @"\b(?(iedere|elke|om de)\s*(?\s+andere)?\s*(week)?)"; + public static readonly string SetEachRegex = $@"\b(?(iedere|elke|om\s+de)\s*(?\s+andere)?\s*(week\s*(?={WeekDayRegex}))?)"; public const string SetLastRegex = @"(?volgende?|komende|aankomende|aanstaande|deze|huidige|aanstaande|vorige?|verleden|laatste)"; public const string EachDayRegex = @"^\s*(iedere|elke)\s*dag\b"; public const string BeforeEachDayRegex = @"(iedere|elke)\s*dag\s*"; @@ -247,8 +248,8 @@ public static class DateTimeDefinitions public const string AllRegex = @"\b(?((de|het|een)\s+)?((ge)?hele|volledige|ganse|heel|volledig|volle)(\s+|-)(?jaar|maand|week|dag))\b"; public const string HalfRegex = @"(((een)\s*)|\b)(?(half|halve)\s+(?jaar|maand|week|dag|uur|halfuur)|(?halfuur))\b"; public const string ConjunctionRegex = @"\b((en(\s+voor)?)|plus)\b"; - public static readonly string HolidayList1 = $@"(?goede vrijdag|pasen|((eerste|tweede)\s+)?paasdag|paas(zondag|maandag)|kerst|kerstavond|kerstmis|thanksgiving|halloween|(islamitisch\s+)?nieuwjaar|oud en nieuw|oud & nieuw|pinksteren|oude?jaar|oude?jaarsavond|silvester|silvesteravond|sinterklaas|sinterklaasfeest|sinterklaasavond|pakjesavond|eid al(-|\s+)fitr|eid al(-|\s+)adha)"; - public static readonly string HolidayList2 = $@"(?black friday|cyber monday|nationale dodenherdenking|nationale herdenking|dodenherdenking|dag van de leraar|dag van de leerkracht(en)?|dag van de arbeid|feest van de arbeid|yuandan|valentijn|sint-maartensfeest|sint-maarten|driekoningen|keti(\s+|-)?koti|ramadan|suikerfeest|offerfeest|allerheiligen|allerheiligenavond|franse nationale feestdag|bestorming van de bastille)"; + public static readonly string HolidayList1 = $@"(?goede vrijdag|pasen|((eerste|tweede)\s+)?paasdag|paas(zondag|maandag)|kerst(avond|mis)?|thanksgiving|halloween|(islamitisch\s+)?nieuwjaar|oud en nieuw|oud & nieuw|pinksteren|oude?jaar|oude?jaarsavond|silvester|silvesteravond|sinterklaas|sinterklaasfeest|sinterklaasavond|pakjesavond|eid al(-|\s+)fitr|eid al(-|\s+)adha|juneteenth|vrijheidsdag|jubilee\s+day)"; + public static readonly string HolidayList2 = $@"(?black friday|cyber monday|nationale dodenherdenking|nationale herdenking|dodenherdenking|dag\s+van\s+de\s+(leraar|leerkracht(en)?|arbeid|aarde)|feest\s+van\s+de\s+arbeid|yuandan|valentijn|sint-maartensfeest|sint-maarten|driekoningen|keti(\s+|-)?koti|ramadan|suikerfeest|offerfeest|allerheiligen|allerheiligenavond|franse nationale feestdag|bestorming van de bastille)"; public static readonly string HolidayList3 = $@"(?(martin luther king|mlk|dankzeggings|valentijns|nieuwjaars|(eerste|1e|tweede|2e)\s+paas|prinsjes|konings|koninginne|bevrijdings|hemelvaarts|(eerste|1e|tweede|2e)\s+kerst|vader|moeder|meisjes|(amerikaanse|us\s+)?onafhankelijk(heid)?s|(nederlandse\s+)?veteranen|boomplant|(nationale\s+)?boomfeest)dag)"; public static readonly string HolidayRegex = $@"\b(({StrictRelativeRegex}\s+({HolidayList1}|{HolidayList2}|{HolidayList3}))|(({HolidayList1}|{HolidayList2}|{HolidayList3})(\s+(van\s+)?({YearRegex}|{RelativeRegex}\s+jaar))?))\b"; public static readonly string AMTimeRegex = $@"(?{ApostrofsRegex}\s*(morgens|ochtends)|in\s+de\s+(morgen|ochtend))"; @@ -260,12 +261,12 @@ public static class DateTimeDefinitions public const string AsapTimeRegex = @"\b(zo\s+snel\s+mogelijk|zsm)\b"; public const string InclusiveModPrepositions = @"(?((in|tegen|tijdens|op|om)\s+of\s+)|(\s+of\s+(in|tegen|tijdens|op)))"; public static readonly string AfterRegex = $@"(\b{InclusiveModPrepositions}?((na(\s+afloop\s+van)?|(?>=)|>)"; - public static readonly string BeforeRegex = $@"(\b(?(al\s+)?zo\s+laat\s+als)){InclusiveModPrepositions}?\b\s*)|(?)((?<=)|<)"; + public static readonly string BeforeRegex = $@"(\b(?(al\s+)?zo\s+laat\s+als)){InclusiveModPrepositions}?\b\s*)|(?)((?<=)|<)"; public const string SinceRegex = @"(\b(sinds|na\s+of\s+gelijk\s+aan|(startend|beginnend)\s+(vanaf|op|met)|(al\s+)?zo\s+vroeg\s+als|(elk|ieder)\s+moment\s+vanaf|een\s+tijdstip\s+vanaf)\b\s*)|(?=)"; public const string AroundRegex = @"(\b(rond(om)?|ongeveer(\s+om)?)\s*\b)"; public const string AgoRegex = @"\b(geleden|(voor|eerder\s+dan)\s+(?gisteren|vandaag))\b"; public const string LaterRegex = @"\b(later|vanaf\s+nu|(vanaf|na|sedert)\s+(?morgen|vandaag))\b"; - public const string BeforeAfterRegex = @"^[.]"; + public const string BeforeAfterRegex = @"\b(gerekend\s+)?((?voor(dat)?)|(?van(af)?|na))\b"; public static readonly string ModPrefixRegex = $@"\b({RelativeRegex}|{AroundRegex}|{BeforeRegex}|{AfterRegex}|{SinceRegex})\b"; public static readonly string ModSuffixRegex = $@"\b({AgoRegex}|{LaterRegex}|{BeforeAfterRegex}|{FutureSuffixRegex}|{PastSuffixRegex})\b"; public const string InConnectorRegex = @"\b(in|over|na)(\s+de)?\b"; @@ -287,18 +288,18 @@ public static class DateTimeDefinitions public const string SingleAmbiguousTermsRegex = @"^(de\s+)?(dag|week|maand|jaar)$"; public const string UnspecificDatePeriodRegex = @"^(week|weekend|maand|jaar)$"; public const string PrepositionSuffixRegex = @"\b((op|in)(\s+de)?|om|rond(om)?|van|tot)$"; - public const string FlexibleDayRegex = @"(?([A-Za-zë]+\s)?[A-Za-zë\d]+?(ste|de|e))"; + public const string FlexibleDayRegex = @"(?([A-Za-zë]+\s+)?[A-Za-zë\d]+?\s*(ste|de|e))"; public static readonly string ForTheRegex = $@"\b((((?<=voor\s+)de\s+{FlexibleDayRegex})|((?<=op\s+)de\s+{FlexibleDayRegex}(?<=(ste|de|e))))(?(\s+(tussen|binnen|terug|tegen|aan|uit|mee|bij|vol|uit|aan|op|in|na|af)\s*)?(\s+(ge\w\w\w+|\w\w\w+en)\s*)?(,|\.|!|\?|$)))"; public static readonly string WeekDayAndDayOfMonthRegex = $@"\b{WeekDayRegex}\s+(de\s+{FlexibleDayRegex})\b"; public static readonly string WeekDayAndDayRegex = $@"\b{WeekDayRegex}\s+{DayRegex}(?!([-]|:\d+|\.\d+|(\s+({AmDescRegex}|{PmDescRegex}|{OclockRegex}))))\b"; public const string RestOfDateRegex = @"\brest\s+(van\s+)?((de|het|mijn|dit|deze|(de\s+)?huidige)\s+)?(?week|maand|jaar|decennium)\b"; public const string RestOfDateTimeRegex = @"\brest\s+(van\s+)?((de|het|mijn|dit|deze|(de\s+)?huidige)\s+)?(?vandaag|dag)\b"; - public const string MealTimeRegex = @"\b((tijdens\s+de\s+)?(?lunch)|((om|tegen)\s+)?(?lunchtijd))\b"; + public const string MealTimeRegex = @"\b((((tijdens\s+)?de|het)\s+)?(?ontbijt|lunch|avondeten)|((om|tegen|tijdens)\s+)?(?lunchtijd))\b"; public const string AmbiguousRangeModifierPrefix = @"(voor)"; - public static readonly string PotentialAmbiguousRangeRegex = $@"\b{AmbiguousRangeModifierPrefix}(.+\b(boven|later|groter|erna|daarna|hoger|(?{BaseDateTime.RangeConnectorSymbolRegex}))\b)"; + public static readonly string PotentialAmbiguousRangeRegex = $@"\b{AmbiguousRangeModifierPrefix}(?!\s+het\s+(einde?|begin(nen)?))(.+\b(boven|later|groter|erna|daarna|hoger|(?{BaseDateTime.RangeConnectorSymbolRegex}))\b)"; public static readonly string NumberEndingPattern = $@"^(\s+((?vergadering|afspraak|conferentie|telefoontje|skype-gesprek)\s+)?(om|naar)\s+(?{PeriodHourNumRegex}|{HourRegex})((\.)?$|(\.,|,|!|\?)))"; public const string OneOnOneRegex = @"\b(1\s*:\s*1)|(één\s+(op\s)één|één\s*-\s*één|één\s*:\s*één)\b"; - public static readonly string LaterEarlyPeriodRegex = $@"\b({PrefixPeriodRegex})\s*\b\s*(?{OneWordPeriodRegex})\b"; + public static readonly string LaterEarlyPeriodRegex = $@"\b({PrefixPeriodRegex})\s*\b\s*(?{OneWordPeriodRegex}|(?{BaseDateTime.FourDigitYearRegex}))\b"; public static readonly string WeekWithWeekDayRangeRegex = $@"\b((?({NextPrefixRegex}|{PreviousPrefixRegex}|deze)\s+week)((\s+tussen\s+{WeekDayRegex}\s+en\s+{WeekDayRegex})|(\s+van\s+{WeekDayRegex}\s+tot\s+{WeekDayRegex})))\b"; public const string GeneralEndingRegex = @"^\s*((\.,)|\.|,|!|\?)?\s*$"; public const string MiddlePauseRegex = @"\s*(,)\s*"; @@ -315,7 +316,9 @@ public static class DateTimeDefinitions public const string SuffixAfterRegex = @"\b(((bij)\s)?(of|en)\s+(boven|later|groter|erna|daarna|hoger)(?!\s+dan))\b"; public const string DateAfterRegex = @"\b((of|en)\s+(hoger|later|groter)(?!\s+dan))\b"; public static readonly string YearPeriodRegex = $@"((((van(af)?|tijdens|gedurende|in)\s+)?{YearRegex}\s*({TillRegex})\s*{YearRegex})|(((tussen)\s+){YearRegex}\s*({RangeConnectorRegex})\s*{YearRegex}))"; - public static readonly string ComplexDatePeriodRegex = $@"(((van(af)?|tijdens|gedurende|in(\s+de)?)\s+)?(?.+)\s*({TillRegex})\s*(?.+)|((tussen)\s+)(?.+)\s*({RangeConnectorRegex})\s*(?.+))"; + public const string StartMiddleEndRegex = @"\b((?(((de|het)\s+)?(start|begin)\s+van\s+)?)(?((het\s+)?midden\s+van\s+)?)(?((het\s+)?einde?\s+van\s+)?))"; + public static readonly string ComplexDatePeriodRegex = $@"(((van(af)?|tijdens|gedurende|in(\s+de)?)\s+)?{StartMiddleEndRegex}(?.+)\s*({TillRegex})\s*{StartMiddleEndRegex}(?.+)|((tussen)\s+){StartMiddleEndRegex}(?.+)\s*({RangeConnectorRegex})\s*{StartMiddleEndRegex}(?.+)|(?{WrittenMonthRegex})\s+(?{WrittenMonthRegex}(\s+|\s*,\s*){YearRegex}))"; + public static readonly string ComplexTillRegex = $@"({TillRegex}|{WrittenMonthRegex})"; public static readonly Dictionary UnitMap = new Dictionary { { @"millennium", @"1000Y" }, @@ -331,8 +334,14 @@ public static class DateTimeDefinitions { @"mnd", @"MON" }, { @"weken", @"W" }, { @"week", @"W" }, + { @"weekend", @"WE" }, + { @"weekenden", @"WE" }, { @"dagen", @"D" }, { @"dag", @"D" }, + { @"werkdagen", @"D" }, + { @"werkdag", @"D" }, + { @"weekdagen", @"D" }, + { @"weekdag", @"D" }, { @"vandaag", @"D" }, { @"dgn", @"D" }, { @"nachten", @"D" }, @@ -366,6 +375,8 @@ public static class DateTimeDefinitions { @"mnd", 2592000 }, { @"weken", 604800 }, { @"week", 604800 }, + { @"weekenden", 172800 }, + { @"weekend", 172800 }, { @"dagen", 86400 }, { @"dag", 86400 }, { @"vandaag", 86400 }, @@ -374,6 +385,8 @@ public static class DateTimeDefinitions { @"nacht", 86400 }, { @"werkdagen", 86400 }, { @"werkdag", 86400 }, + { @"weekdagen", 86400 }, + { @"weekdag", 86400 }, { @"uren", 3600 }, { @"uur", 3600 }, { @"u", 3600 }, @@ -772,7 +785,9 @@ public static class DateTimeDefinitions { @"martinlutherking", new string[] { @"martinlutherkingday", @"martinlutherkingjrday", @"martinlutherkingdag", @"mlkdag" } }, { @"usindependenceday", new string[] { @"amerikaanseonafhankelijkheidsdag", @"usonafhankelijkheidsdag" } }, { @"blackfriday", new string[] { @"blackfriday" } }, - { @"cybermonday", new string[] { @"cybermonday" } } + { @"cybermonday", new string[] { @"cybermonday" } }, + { @"earthday", new string[] { @"dagvandeaarde" } }, + { @"juneteenth", new string[] { @"jubileeday", @"juneteenth", @"vrijheidsdag" } } }; public static readonly Dictionary WrittenDecades = new Dictionary { @@ -844,7 +859,7 @@ public static class DateTimeDefinitions public static readonly Dictionary AmbiguityFiltersDict = new Dictionary { { @"^\d{4}$", @"(\d\.\d{4}|\d{4}\.\d)" }, - { @"\b(lunch)$", @"(? AmbiguityTimeFiltersDict = new Dictionary @@ -869,6 +884,19 @@ public static class DateTimeDefinitions @"avond", @"avonden" }; + public static readonly IList MealtimeBreakfastTermList = new List + { + @"ontbijt" + }; + public static readonly IList MealtimeLunchTermList = new List + { + @"lunch", + @"lunchtijd" + }; + public static readonly IList MealtimeDinnerTermList = new List + { + @"avondeten" + }; public static readonly IList DaytimeTermList = new List { @"dag", @@ -921,6 +949,8 @@ public static class DateTimeDefinitions @"deze", @"volgend", @"volgende", + @"eropvolgend", + @"eropvolgende", @"dit", @"die" }; @@ -964,11 +994,13 @@ public static class DateTimeDefinitions @"jaar tot op heden", @"vanaf vorig jaareinde" }; - public const string DayTypeRegex = @"^(dag(elijkse?)?)$"; + public const string DayTypeRegex = @"^((we[er]k)?dag(en|elijkse?)?)$"; public const string WeekTypeRegex = @"^(wekelijkse?|week)$"; + public const string WeekendTypeRegex = @"^(weekend(en)?)$"; public const string BiWeekTypeRegex = @"^(tweewekelijkse?)$"; public const string MonthTypeRegex = @"^(maand(elijkse?)?)$"; public const string QuarterTypeRegex = @"^(kwartaal|driemaandelijkse?)$"; public const string YearTypeRegex = @"^(elk\s+jaar|jaar(lijkse?)?)$"; + public const string SemiYearTypeRegex = @"^(halfjaar(lijkse?)?)$"; } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Dutch/NumbersDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Dutch/NumbersDefinitions.cs index ff0c0eb141..e3649efac8 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/Dutch/NumbersDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Dutch/NumbersDefinitions.cs @@ -50,7 +50,7 @@ public static class NumbersDefinitions public static readonly string SuffixRoundNumberOrdinalRegex = $@"(({AllIntRegex}\s*){RoundNumberOrdinalRegex})"; public static readonly string AllOrdinalNumberRegex = $@"(?:{SuffixBasicOrdinalRegex}|{SuffixRoundNumberOrdinalRegex})"; public static readonly string AllOrdinalRegex = $@"(?:{AllOrdinalNumberRegex}|{RelativeOrdinalRegex})"; - public const string OrdinalSuffixRegex = @"(?<=\b)((\d*(1e|2e|3e|4e|5e|6e|7e|8e|9e|0e))|(1ste|2de|3de|4de|5de|6de|7de|8ste|9de|0de)|([0-9]*1[0-9]de)|([0-9]*[2-9][0-9]ste)|([0-9]*[0](1ste|2de|3de|4de|5de|6de|7de|8ste|9de|0de)))(?=\b)"; + public const string OrdinalSuffixRegex = @"(?<=\b)((\d+\s*e)|[18]\s*ste|[092-7]\s*de|([0-9]*1[0-9]\s*de)|([0-9]*[2-9][0-9]\s*ste)|([0-9]*[0]([18]\s*ste|[092-7]\s*de)))(?=\b)"; public const string OrdinalNumericRegex = @"(?<=\b)(\d{1,3}(\s*.\s*\d{3})*\s*e)(?=\b)"; public static readonly string OrdinalRoundNumberRegex = $@"(?(?:3[0-1]|[1-2]\d|0?[1-9])(?:th|nd|rd|st)?)[\.]?(\s+|\s*[-,/]\s*|\s+of\s+){MonthRegex}[\.]?)\b"; public static readonly string DateExtractor4 = $@"\b{MonthNumRegex}\s*[/\\\-]\s*{DayRegex}[\.]?\s*[/\\\-]\s*{DateYearRegex}"; public static readonly string DateExtractor5 = $@"\b({DayPrefix}(\s*,)?\s+)?{DayRegex}\s*[/\\\-\.]\s*({MonthNumRegex}|{MonthRegex})\s*[/\\\-\.]\s*{DateYearRegex}(?!\s*[/\\\-\.]\s*\d+)"; @@ -196,7 +196,7 @@ public static class DateTimeDefinitions public const string MealTimeRegex = @"\b(at\s+)?(?breakfast|brunch|lunch(\s*time)?|dinner(\s*time)?|supper)\b"; public static readonly string UnspecificTimePeriodRegex = $@"({MealTimeRegex})"; public static readonly string TimeOfDayRegex = $@"\b(?((((in\s+the\s+){LaterEarlyRegex}?(morning|afternoon|night(-?time)?|evening)s)|((in\s+the\s+)?{LaterEarlyRegex}?(in(\s+the)?\s+)?(morning|afternoon|night(-?time)?|evening)))|{MealTimeRegex}|(((in\s+(the)?\s+)?)(daytime|business\s+hours?))))\b"; - public static readonly string SpecificTimeOfDayRegex = $@"\b(({StrictRelativeRegex}\s+{TimeOfDayRegex})\b|\btoni(ght|te))s?\b"; + public static readonly string SpecificTimeOfDayRegex = $@"\b(({StrictRelativeRegex}\s+{TimeOfDayRegex})\b|\b(?toni(ght|te)))s?\b"; public static readonly string TimeFollowedUnit = $@"^\s*{TimeUnitRegex}"; public static readonly string TimeNumberCombinedWithUnit = $@"\b(?\d+(\.\d*)?){TimeUnitRegex}"; public static readonly string[] BusinessHourSplitStrings = { @"business", @"hour" }; @@ -213,8 +213,8 @@ public static class DateTimeDefinitions public const string SpecificEndOfRegex = @"(the\s+)?end of(\s+the)?\s*$"; public const string UnspecificEndOfRegex = @"\b(the\s+)?(eod|(end\s+of\s+day))\b"; public const string UnspecificEndOfRangeRegex = @"\b(eoy)\b"; - public static readonly string PeriodTimeOfDayRegex = $@"\b((in\s+(the)?\s+)?{LaterEarlyRegex}?(this\s+)?{DateTimeTimeOfDayRegex})\b"; - public static readonly string PeriodSpecificTimeOfDayRegex = $@"\b({LaterEarlyRegex}?this\s+{DateTimeTimeOfDayRegex}|({StrictRelativeRegex}\s+{PeriodTimeOfDayRegex})\b|\btoni(ght|te))\b"; + public static readonly string PeriodTimeOfDayRegex = $@"\b((in\s+(the)?\s+)?{LaterEarlyRegex}?((this\s+)?{DateTimeTimeOfDayRegex}|(?(?tonight))))\b"; + public static readonly string PeriodSpecificTimeOfDayRegex = $@"\b({LaterEarlyRegex}?this\s+{DateTimeTimeOfDayRegex}|({StrictRelativeRegex}\s+{PeriodTimeOfDayRegex})\b|\b(?toni(ght|te)))\b"; public static readonly string PeriodTimeOfDayWithDateRegex = $@"\b(({PeriodTimeOfDayRegex}(\s+(on|of))?))\b"; public const string LessThanRegex = @"\b(less\s+than)\b"; public const string MoreThanRegex = @"\b(more\s+than)\b"; diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/German/DateTimeDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/German/DateTimeDefinitions.cs index 914b598148..a3850cd681 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/German/DateTimeDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/German/DateTimeDefinitions.cs @@ -96,7 +96,7 @@ public static class DateTimeDefinitions public static readonly string RelativeWeekDayRegex = $@"\b({WrittenNumRegex}\s+{WeekDayRegex}e\s+(von\s+jetzt|später))\b"; public static readonly string SpecialDate = $@"(?=\b(an( dem)?|am)\s+){DayRegex}\b"; public static readonly string DateExtractor1 = $@"\b(({WeekDayRegex})(\s+|\s*,\s*))?({DayRegex}\s*[/\\.,\- ]\s*{MonthRegex}(\s*[/\\.,\- ]\s*{DateYearRegex})?|{BaseDateTime.FourDigitYearRegex}\s*[/\\.,\- ]\s*{DayRegex}\s*[/\\.,\- ]\s*{MonthRegex})\b"; - public static readonly string DateExtractor2 = $@"\b({MonthRegex}\s*[/\\.,\- ]\s*{DayRegex}(\s*[/\\.,\- ]\s*{DateYearRegex})?)\b"; + public static readonly string DateExtractor2 = $@"\b({MonthRegex}\s*[/\\.,\- ]\s*{DayRegex}(?!\s*\-\s*\d{{2}}\b)(\s*[/\\.,\- ]\s*{DateYearRegex})?)\b"; public static readonly string DateExtractor3 = $@"\b({DayRegex}{MonthRegex})"; public static readonly string DateExtractor4 = $@"\b({DayRegex}\s*{MonthNumRegex}\s*{DateYearRegex})\b"; public static readonly string DateExtractor5 = $@"\b(({WeekDayRegex})(\s+|\s*,\s*))?({DayRegex}\s*[/\\\-\.]\s*({MonthNumRegex}|{MonthRegex})\s*[/\\\-\.]\s*{DateYearRegex})\b(?!\s*[/\\\-\.]\s*\d+)"; diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Italian/DateTimeDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Italian/DateTimeDefinitions.cs index 121fc64ee9..4776fafd45 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/Italian/DateTimeDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Italian/DateTimeDefinitions.cs @@ -111,7 +111,7 @@ public static class DateTimeDefinitions public static readonly string WeekDayOfMonthRegex = $@"(?((la|il)\s+)?(?prim[ao]|second[ao]|terz[ao]|quart[ao]|quint[ao]|ultim[ao])\s+{WeekDayRegex}\s+{MonthSuffixRegex})"; public static readonly string RelativeWeekDayRegex = $@"\b({WrittenNumRegex}\s+{WeekDayRegex}\s+(da\s+ora|dopo))\b"; public static readonly string SpecialDate = $@"(?<=\b(il|l'|al(l')?)\s*){DayRegex}\b"; - public static readonly string DateExtractor1 = $@"\b((quest[oa]\s+)?{WeekDayRegex}\s*[,-]?\s*)?(({MonthRegex}(\.)?\s*[/\\.,-]?\s*{DayRegex})|(\({MonthRegex}\s*[-.]\s*{DayRegex}\))|({DayRegex}\s*[/\\.,-]?\s*{MonthRegex}(\.)?))(\s*\(\s*{WeekDayRegex}\s*\))?"; + public static readonly string DateExtractor1 = $@"\b((quest[oa]\s+)?{WeekDayRegex}\s*[,-]?\s*)?(({MonthRegex}(\.)?\s*[/\\.,-]?\s*{DayRegex}(?!\s*\-\s*\d{{2}}\b))|(\({MonthRegex}\s*[-.]\s*{DayRegex}\))|({DayRegex}\s*[/\\.,-]?\s*{MonthRegex}(\.)?))(\s*\(\s*{WeekDayRegex}\s*\))?"; public static readonly string DateExtractor2 = $@"({DateExtractor1}(\s+|\s*[\-/,.]\s*|\s+del\s+)({DateYearRegex}))\b"; public static readonly string DateExtractor3 = $@"\b({WeekDayRegex}(\s+|\s*,\s*))?({DayRegex}(\.)?(\s*[/,.\- ]\s*|\s+di\s+){MonthRegex}(\.)?(\s*[/,.\- ]\s*{DateYearRegex})?|{BaseDateTime.FourDigitYearRegex}\s*[/,.\- ]\s*{DayRegex}\s*[/,.\- ]\s*{MonthRegex})\b"; public static readonly string DateExtractor4 = $@"\b({WeekDayRegex}(\s+|\s*,\s*))?((il|l')\s*)?{MonthNumRegex}\s*[/\\\-]\s*{DayRegex}(\.)?\s*[/\\\-]\s*{DateYearRegex}(?!\s*[/\\\-\.]\s*\d+)"; diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Japanese/DateTimeDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Japanese/DateTimeDefinitions.cs index 887e8d940f..976510139c 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/Japanese/DateTimeDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Japanese/DateTimeDefinitions.cs @@ -25,9 +25,10 @@ public static class DateTimeDefinitions public const string MonthRegex = @"(?(正|一|二|三|四|五|六|七|八|九|十|十一|十二|0?[1-9]|1[0-2])\s*(か月(?!で)|月間?))"; public const string MonthRegexForPeriod = @"(?正月|一月|二月|三月|四月|五月|六月|七月|八月|九月|十月|十一月|十二月|(0?[1-9]|1[0-2])か?月)(?=\b|t|まで|から)?"; public const string MonthNumRegexForPeriod = @"(?0?[1-9]|1[0-2])(?=\b|t|まで|から)?"; - public const string DayRegex = @"(?[0-2]?[1-9]|[1-3]0|31)([日目]間?)?"; - public const string DayRegexForPeriod = @"(?3[01]|[0-2]?\d|(三十一?|(一|二)?十?[一二三四五六七八九]))(\s*日目?)?(?=\b|t|まで|から)?"; - public const string DateDayRegexInCJK = @"(?初一|((二十二|二十三|二十四|二十五|二十六|二十七|二十八|二十九|二十一|三十一|十二|十三|十四|十五|十六|十七|十八|十一|十|二十|廿(?!日市市)|三十|一|二|三|四|五|六|七|八|九)|3[01]|[0-2]?\d)(\s*日|号))目?"; + public const string DayRegex = @"(?[0-2]?[1-9]|[1-3]0|31)((日|目)(?!かかる|待つ|泊まる|経つ)間?)?"; + public const string DayRegexForPeriod = @"(?3[01]|[0-2]?\d|(三十一?|(一|二)?十?[一二三四五六七八九]))((\s*日(?!かかる|待つ|泊まる|経つ))目?)?(?=\b|t|まで|から)?"; + public const string DayNumberRegex = @"(二十二|二十三|二十四|二十五|二十六|二十七|二十八|二十九|二十一|三十一|十二|十三|十四|十五|十六|十七|十八|十一|十|二十|廿(?!日市市)|三十|一|二|三|四|五|六|七|八|九)"; + public static readonly string DateDayRegexInCJK = $@"(?初一|({DayNumberRegex}|3[01]|[0-2]?\d)(\s*日|号)(?!かかる|待つ|泊まる|経つ))目?"; public const string DayRegexNumInCJK = @"(?一|十一|二十一|三十一|三十|二|三|四|五|六|七|八|九|十二|十三|十四|十五|十六|十七|十八|十九|二十二|二十三|二十四|二十五|二十六|二十七|二十八|二十九|一|十一|十|二十一|二十|廿(?!日市市)|三十一|二|三|四|五|六|七|八|九|十二|十三|十四|十五|十六|十七|十八|十九|二十二|二十三|二十四|二十五|二十六|二十七|二十八|二十九|十|二十|廿|卅)"; public const string MonthNumRegex = @"(?0?[1-9]|1[0-2])"; public const string TwoNumYear = @"50"; @@ -52,7 +53,7 @@ public static class DateTimeDefinitions public const string SpecialMonthRegex = @"(先月|来月|今月|前月|再来月|昨月|先々月|ぜんげつ|(せん)?せんげつ|さくげつ|らいげつ|こんげつ)"; public const string SpecialYearRegex = @"(ことし|さ?らいねん|きょねん|さくねん)"; public const string SpecialDayRegex = @"((いっ)?さくじつ|おとつい|最近|前天|后天|明日から二日((?今日)から(?1日半)(の間)?)|((?今日)から(?2日半)(の間)?)|昨日の2日前|昨日から4日|今日から二日|今日から4日|昨日から2日間|昨天|明天|今天|今日|明日|一?昨?昨日|一昨日|大后天|大前天|後天|大後天|きょう|あす|あした|きのう|明々後日|(弥)?明後日|この日|前日|二日前|おととい|し?あさって|私の一日|この間|次の日|その日|最後の日)"; - public const string SpecialDayWithNumRegex = @"((いっ)?さくじつ|おとつい|最近|前天|后天|昨天|明天|今天|今日?|明日|一?昨?昨日|一昨日|大后天|大前天|後天|大後天|きょう|あす|あした|きのう|明々後日|(弥)?明後日|この日|前日|二日前|おととい|し?あさって|私の一日|この間|次の日|その日)(から|の)?([\d十一二三四五六七八九]*|数)(日|月|週|個)間?(先|後|前|以内)?(の(?日曜日?|月曜日?|火曜日?|水曜日?|木曜日?|金曜日?|土曜日?))?"; + public const string SpecialDayWithNumRegex = @"((いっ)?さくじつ|おとつい|最近|前天|后天|昨天|明天|今天|今日?|明日|一?昨?昨日|一昨日|大后天|大前天|後天|大後天|きょう|あす|あした|きのう|明々後日|(弥)?明後日|この日|前日|二日前|おととい|し?あさって|私の一日|この間|次の日|その日)(から|の)?([\d十一二三四五六七八九]*|数)(日|月|週(間で)?|個)間?(先|後|前)?(の(?日曜日?|月曜日?|火曜日?|水曜日?|木曜日?|金曜日?|土曜日?))?"; public static readonly string WeekDayOfMonthRegex = $@"((({SpecialMonthRegex}|{MonthRegex}|{MonthNumRegex}|((这个|这一个|这|这一|本|今|上个|上一个|上|上一|去|下个|下一个|下|下一|明)月))(的|の)?\s*)?(第|最)?(?([初一二三四五])|最後|最終|([1-5])|最后一)(个|の|\s)*{WeekDayRegex})"; public static readonly string WeekDayAndDayRegex = $@"({DayRegexForPeriod}(の|的)?(\s|,)*{WeekDayRegex})"; public const string ThisPrefixRegex = @"这个|这一个|这|这一|本|今|こ"; @@ -60,13 +61,13 @@ public static class DateTimeDefinitions public const string NextPrefixRegex = @"下个|下一个|下|下一|明|次|再?来|向こう|これから(の)?|翌|向こう"; public static readonly string RelativeRegex = $@"(?({ThisPrefixRegex}|{LastPrefixRegex}|{NextPrefixRegex}))"; public static readonly string SpecialDate = $@"(?({ThisPrefixRegex}|{LastPrefixRegex}|{NextPrefixRegex})年)?(の|的)?(?({ThisPrefixRegex}|{LastPrefixRegex}|{NextPrefixRegex})(の|的)?月)?(の|的)?{DateDayRegexInCJK}"; - public const string DateUnitRegex = @"(?年|个月|月|周|時間?|(?営業)日|(?年|个月|月|周|(?営業)日|(?(二十二|二十三|二十四|二十五|二十六|二十七|二十八|二十九|二十二|二十三|二十一|十一|三十一|十二|十三|十四|十五|十六|十七|十八|十九|十|二十|三十|一|十|二|三|四|五|六|七|八|九|3[0-1]|[1-2]\d|0?[1-9])日|初一|三十|(一|十一|二十一|三十一|二|三|四|五|六|七|八|九|十二|十三|十四|十五|十六|十七|十八|十九|二十二|二十三|二十四|二十五|二十六|二十七|二十八|二十九|一|十一|十|二十一|二十|三十一|三十|二|三|四|五|六|七|八|九|十二|十三|十四|十五|十六|十七|十八|十九|二十二|二十三|二十四|二十五|二十六|二十七|二十八|二十九|十|二十|三十|3[0-1]|[1-2]\d|0?[1-9])号|一|十一|二十一|三十一|二|三|四|五|六|七|八|九|十二|十三|十四|十五|十六|十七|十八|十九|二十二|二十三|二十四|二十五|二十六|二十七|二十八|二十九|一|十一|十|二十一|二十|三十一|三十|二|三|四|五|六|七|八|九|十二|十三|十四|十五|十六|十七|十八|十九|十|二十|三十|廿(?!日市市)|卅)目?"; public const string DatePeriodThisRegex = @"今|这个|这一个|这|这一|本"; public const string DatePeriodLastRegex = @"この|上个|上一个|上|上一|前|去|最後|最終|過去|先|昨"; - public const string DatePeriodNextRegex = @"(?再来)|下个|下一个|下|下一|最初|来|向こう|これから(の)?|翌|今後|次(の)?"; + public const string DatePeriodNextRegex = @"(?再来|以降)|下个|下一个|下|下一|最初|来|向こう|これから(の)?|翌|今後|次(の)?"; public const string DateRangePrepositions = @"((こ|私の|その|この|これらの|それらの)\s*)?"; public static readonly string RelativeMonthRegex = $@"(?({DatePeriodThisRegex}|{DatePeriodLastRegex}|{DatePeriodNextRegex})\s*月)"; public const string HalfYearRegex = @"((?の?(上|前)半期)|(?の?(下|后)半期))"; @@ -115,7 +116,7 @@ public static class DateTimeDefinitions public const string MonthToMonthSuffixRequired = @"^[.]"; public static readonly string DayToDay = $@"({ThisPrefixRegex}|{LastPrefixRegex}|{NextPrefixRegex})?(({SpecialMonthRegex}|{MonthRegex})の?)?(({SpecialDayRegex}|{DayRegex}|{WeekDayRegex})から(({SpecialMonthRegex}|{MonthRegex})の?)?({ThisPrefixRegex}|{LastPrefixRegex}|{NextPrefixRegex})?((今月|来月|{MonthRegex})の?)?({SpecialDayRegex}|{DayRegex}|{WeekDayRegex})(までの間|まで|の間))|{SpecialDayRegex}"; public static readonly string FirstLastOfYearRegex = $@"(({DatePeriodYearInCJKRegex}|{YearRegex}|(?再来年|翌年|来年|今年|去年))的?)((?前)|(?(最后|最後|最終)))"; - public static readonly string ComplexDatePeriodRegex = $@"({DateRangePrepositions})(?(第{ZeroToNineIntegerRegexCJK}+|第\d+|{DatePeriodThisRegex}|{DatePeriodLastRegex}|{DatePeriodNextRegex}|(({YearNumRegex}|{DayRegexForPeriod}|{DateDayRegexInCJK}|{MonthRegex}|{MonthNumRegex})(?!((\d+)?(分|秒|時))))|{RelativeRegex}).+)(から)(?.+)(までの間|まで|の間)"; + public static readonly string ComplexDatePeriodRegex = $@"({DateRangePrepositions})(?(第{ZeroToNineIntegerRegexCJK}+|第\d+|{DatePeriodThisRegex}|{DatePeriodLastRegex}|{DatePeriodNextRegex}|(({YearNumRegex}|{DayRegexForPeriod}|{DateDayRegexInCJK}|{MonthRegex}|{MonthNumRegex})(?!((\d+)?(分|秒|時))))|{RelativeRegex}).+)(から)(?.+)(までの間|(?(この|時前|(?((?以内に)|後に|向こう|后|次の|今後|今日の午後|これからの|(?春|夏|秋|冬)(天|季)?(の)?((?半ば)|(?初め|のはじめ)|(?終わり(ごろ)?|末|下旬))?"; @@ -134,12 +135,13 @@ public static class DateTimeDefinitions public const string TomorrowRegex = @"(?明日の?(午前|午後|中|夜|朝)?)"; public const string YesterdayRegex = @"(?昨日の?(午前|午後|中|夜|朝)?)"; public const string TodayRegex = @"(?(今朝の?|今朝の午前|今晩|今晚|今早|今晨|明晚|明早|明晨|昨晚|今夜|昨夜)(的|在)?)"; + public const string FromNowRegex = @"((?今)から)"; public static readonly string SpecialDayHourRegex = $@"((?{TimeHourCJKRegex}|{TimeHourNumRegex})(時間?|(:00)))"; public static readonly string SpecialDayMinuteRegex = $@"((?{TimeMinuteCJKRegex}|{TimeMinuteNumRegex})分間?)"; public static readonly string SpecialDaySecondRegex = $@"((?{TimeSecondCJKRegex}|{TimeSecondNumRegex})秒間?)"; - public const string SpecialDayModRegex = @"((?過ぎに)|(?ごろ)|(?で|の?うちに)|(?弱|たらず)|(?以上))"; + public const string SpecialDayModRegex = @"((?過ぎに|以降)|(?ごろ)|(?で|の?うちに)|(?弱|たらず)|(?以上))"; public static readonly string SpecialDayEndOfRegex = $@"((?明日の終わり|({WeekDayRegex}の?終わり))|(?日の終わり|一日の終わり|その日の終わり))"; - public static readonly string TimeOfSpecialDayRegex = $@"(({SpecialDayEndOfRegex}|{WeekDayRegex}|{TomorrowRegex}|{YesterdayRegex}|あと|{TodayRegex})(\d日)?(と)?(({SpecialDayHourRegex}{SpecialDayMinuteRegex}?{SpecialDaySecondRegex}?)|({SpecialDayMinuteRegex}{SpecialDaySecondRegex}?)){SpecialDayModRegex}?)|(({SpecialDayHourRegex}(?で|の?うちに)))|(({SpecialDayEndOfRegex}|{TomorrowRegex}|{YesterdayRegex}|あと|{TodayRegex}){SpecialDayModRegex}?)|({WeekDayRegex}(\d日)?(と)?{SpecialDayModRegex})"; + public static readonly string TimeOfSpecialDayRegex = $@"(({SpecialDayEndOfRegex}|{WeekDayRegex}|{TomorrowRegex}|{YesterdayRegex}|あと|{TodayRegex})(\d日)?(と)?(({SpecialDayHourRegex}{SpecialDayMinuteRegex}?{SpecialDaySecondRegex}?)|({SpecialDayMinuteRegex}{SpecialDaySecondRegex}?)){SpecialDayModRegex}?)|(({SpecialDayHourRegex}(?で|の?うちに)))|(({SpecialDayEndOfRegex}|{TomorrowRegex}|{YesterdayRegex}|あと|{TodayRegex}){SpecialDayModRegex}?)|({WeekDayRegex}(\d日)?(と)?{SpecialDayModRegex})|({FromNowRegex}\d+(分|時|秒)後)"; public const string NowTimeRegex = @"(现在|今)"; public const string RecentlyTimeRegex = @"(刚刚才?|刚才)"; public const string AsapTimeRegex = @"(出来る限り早く|立刻|马上)"; @@ -166,7 +168,7 @@ public static class DateTimeDefinitions public const string PlusFourDayRegex = @"今日から4日"; public const string DurationAllRegex = @"(まる(ひと)?)"; public const string DurationHalfRegex = @"^[.]"; - public const string DurationRelativeDurationUnitRegex = @"(?数ヶ|数)|(?前|昨日)|(?後|明日)|(?もう)"; + public const string DurationRelativeDurationUnitRegex = @"(?数ヶ|数)|(?(?以内)|(?後|明日)|(?(?(?(?数(?((か|ヶ)?(時|月|日|週|年|周|週|週|秒|分|営業日|年)間?))(たらず|以上)?)"; @@ -199,7 +201,7 @@ public static class DateTimeDefinitions @"个月", @"年" }; - public static readonly string DurationUnitRegex = $@"(?{DateUnitRegex}|分|秒|時間|まる(ひと)?|もう|数|以上|たらず)"; + public static readonly string DurationUnitRegex = $@"(?年|个月|月|周|時間?|(?営業)日|天|週間?|星期|个星期|か月|(?[と]?|,)\s*$"; public const string ConnectorRegex = @"^\s*[,-]\s*$"; public static readonly string LunarHolidayRegex = $@"(({YearRegex}|{DatePeriodYearInCJKRegex}|(?明年|今年|去年|来年))(的)?)?(?除夕|春节|旧暦の正月初一|中秋(節|节)?|元宵(节|節)|端午(节|の節句)?|重(阳节|陽節))"; @@ -212,7 +214,7 @@ public static class DateTimeDefinitions public const string SetLastRegex = @"(?last|this|next)"; public const string SetEachDayRegex = @"(毎|各|毎一)(天|日)\s*$"; public const string SetEachDateUnitRegex = @"(毎)(年|月|週)\s*$"; - public const string TimeHourNumRegex = @"([0-1]?\d|2[0-4])"; + public const string TimeHourNumRegex = @"(?过半|半)"; public const string TimeQuarterRegex = @"(?[一两二三四1-4])\s*(刻钟|刻)"; public static readonly string TimeCJKTimeRegex = $@"{TimeHourRegex}({TimeQuarterRegex}|{TimeHalfRegex}|((((过|又)?{TimeMinuteRegex})({TimeSecondRegex})?)|({TimeSecondRegex})))?"; - public static readonly string TimeDigitTimeRegex = $@"(?{TimeHourNumRegex}):(?{TimeMinuteNumRegex})(:(?{TimeSecondNumRegex}))?(am|pm)?"; + public static readonly string TimeDigitTimeRegex = $@"(?{TimeHourNumRegex}):(?{TimeMinuteNumRegex})(:(?{TimeSecondNumRegex}))?({AmPmDescRegex})?"; public static readonly string TimeDayDescRegex = $@"(?(正午|夜中|午前半ば|(昼食時)|真昼)|((?<=({TimeDigitTimeRegex}|{TimeCJKTimeRegex})(の)?)(早朝(に)?|午後(に)?|晚|晩|(深)?夜(に)?|未明|午前(中)?|日中|白昼|(未|早)?朝(に)?|昼前に|昼すぎに|夕方前に|夕方に|営業時間内に|昼))|((早朝(に)?|午後(に)?|晚|晩|(深)?夜(に)?|未明|午前(中)?|日中|白昼|(未|早)?朝(に)?|昼前に|昼すぎに|夕方前に|夕方に|営業時間内に|昼)(?=(の)?({TimeDigitTimeRegex}|{TimeCJKTimeRegex}))))"; public const string TimeApproximateDescPreffixRegex = @"(ぐらい|おそらく|多分|ほとんど|まもなく|昨日の|昨日|来週の|来週|昼食時|昼食|真)"; public const string TimeApproximateDescSuffixRegex = @"(ごろに|ごろ|過ぎに|過ぎ|丁度に|丁度|きっかりに|きっかり|を過ぎた頃に|を過ぎた頃|ちょっと前に|ちょっと前|近くに|近く|昼食時|昼食|ぐらい|時かっきり|頃|かっきり|以降|まで(の間)?|の間|間で?|間以内)"; @@ -247,9 +249,9 @@ public static class DateTimeDefinitions public const string FromToRegex = @"^[.]"; public const string AmbiguousRangeModifierPrefix = @"^[.]"; public const string ReferenceDatePeriodRegex = @"(同じ|その)(?月|週末|年|週)"; - public const string ParserConfigurationBefore = @"(之前|以前|前)"; - public const string ParserConfigurationAfter = @"(之后|之後|以后|以後|后|後)"; - public const string ParserConfigurationUntil = @"(直到|直至|截至|截止(到)?)"; + public const string ParserConfigurationBefore = @"(またはその前|またはそれ以前|之前|以前|前)"; + public const string ParserConfigurationAfter = @"(またはそれ以降|之后|之後|以后|以後|后|後|以降)"; + public const string ParserConfigurationUntil = @"(まで|直到|直至|截至|截止(到)?)"; public const string ParserConfigurationSincePrefix = @"(自从|自|自打|打)"; public const string ParserConfigurationSinceSuffix = @"(以来|开始)"; public const string ParserConfigurationLastWeekDayRegex = @"最后一个"; @@ -648,12 +650,17 @@ public static class DateTimeDefinitions public const string DateTimePeriodNIRegex = @"(半夜|夜间|深夜|夜)"; public static readonly Dictionary AmbiguityFiltersDict = new Dictionary { - { @"早", @"(? AmbiguityDatePeriodFiltersDict = new Dictionary { { @"^年$", @"年" } }; + public static readonly Dictionary AmbiguityDurationFiltersDict = new Dictionary + { + { @"月に", @"月に" } + }; public static readonly Dictionary DurationUnitValueMap = new Dictionary { { @"Y", 31536000 }, @@ -682,7 +689,7 @@ public static class DateTimeDefinitions { @"キング牧師記念日", @"-01-WXX-1-3" } }; public const string MergedBeforeRegex = @"(前|之前)$"; - public const string MergedAfterRegex = @"(后|後|之后|之後)$"; + public const string MergedAfterRegex = @"(后|後|之后|之後|以降)$"; public static readonly Dictionary TimeNumberDictionary = new Dictionary { { '零', 0 }, diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Portuguese/DateTimeDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Portuguese/DateTimeDefinitions.cs index 3bf7c08adc..24844183d6 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/Portuguese/DateTimeDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Portuguese/DateTimeDefinitions.cs @@ -107,7 +107,7 @@ public static class DateTimeDefinitions public static readonly string DateYearRegex = $@"(?{YearRegex}|{TwoDigitYearRegex})"; public static readonly string DateExtractor1 = $@"\b({WeekDayRegex}(\s+|\s*,\s*))?{DayRegex}((\s*(de)|[/\\\.\- ])\s*)?{MonthRegex}\b"; public static readonly string DateExtractor2 = $@"\b({WeekDayRegex}(\s+|\s*,\s*))?({DayRegex}(\s*([/\.\-]|de)?\s*{MonthRegex}|\s+de\s+{MonthNumRegex})(\s*([,./-]|de|\s+)\s*){DateYearRegex}|{BaseDateTime.FourDigitYearRegex}\s*[/\.\- ]\s*{DayRegex}\s*[/\.\- ]\s*{MonthRegex})\b"; - public static readonly string DateExtractor3 = $@"\b({WeekDayRegex}(\s+|\s*,\s*))?{MonthRegex}(\s*[/\.\- ]\s*|\s+de\s+){DayRegex}((\s*[/\.\- ]\s*|\s+de\s+){DateYearRegex})?\b"; + public static readonly string DateExtractor3 = $@"\b({WeekDayRegex}(\s+|\s*,\s*))?{MonthRegex}(\s*[/\.\- ]\s*|\s+de\s+){DayRegex}(?!\s*\-\s*\d{{2}}\b)((\s*[/\.\- ]\s*|\s+de\s+){DateYearRegex})?\b"; public static readonly string DateExtractor4 = $@"\b({WeekDayRegex}(\s+|\s*,\s*))?{MonthNumRegex}\s*[/\\\-]\s*{DayRegex}\s*[/\\\-]\s*{DateYearRegex}(?!\s*[/\\\-\.]\s*\d+)"; public static readonly string DateExtractor5 = $@"\b({WeekDayRegex}(\s+|\s*,\s*))?{DayRegex}\s*[/\\\-\.]\s*({MonthNumRegex}|{MonthRegex})\s*[/\\\-\.]\s*{DateYearRegex}(?!\s*[/\\\-\.]\s*\d+)"; public static readonly string DateExtractor6 = $@"(?<=\b(em|no|o)\s+){MonthNumRegex}[\-\.]{DayRegex}{BaseDateTime.CheckDecimalRegex}\b"; diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Spanish/DateTimeDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Spanish/DateTimeDefinitions.cs index 41ee1675cb..0067d4560e 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/Spanish/DateTimeDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Spanish/DateTimeDefinitions.cs @@ -118,7 +118,7 @@ public static class DateTimeDefinitions public static readonly string DateYearRegex = $@"(?{YearRegex}|(?y|con)"; + public const string MultiplierRegex = @"\s*\b(((mil\s+)?mi|bi|cuatri|quinti|sexti|septi)ll[oó]n|mil)(es)?\b"; public static readonly Dictionary CurrencyPrefixList = new Dictionary { { @"Dobra", @"db|std" }, - { @"Dólar", @"$" }, + { @"Dólar", @"$|dólar|dólares|dolar|dolares" }, { @"Dólar estadounidense", @"us$|u$d|usd" }, { @"Dólar del Caribe Oriental", @"ec$|xcd" }, { @"Dólar australiano", @"a$|aud" }, diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/DateTime/TestDateTime_English.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/DateTime/TestDateTime_English.cs index f40eb647ee..504942bad7 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/DateTime/TestDateTime_English.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/DateTime/TestDateTime_English.cs @@ -257,5 +257,13 @@ public void DateTimeModelExperimentalMode(TestModel testSpec) { TestDateTimeAlt(testSpec); } + + [NetCoreTestDataSource] + [TestMethod] + public void DateTimeModelTasksMode(TestModel testSpec) + { + TestDateTimeAlt(testSpec); + } + } } diff --git a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/TestHelpers.cs b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/TestHelpers.cs index c94a73566f..5d6380c715 100644 --- a/.NET/Microsoft.Recognizers.Text.DataDrivenTests/TestHelpers.cs +++ b/.NET/Microsoft.Recognizers.Text.DataDrivenTests/TestHelpers.cs @@ -53,6 +53,7 @@ public enum Models DateTimeExtendedTypes, DateTimeComplexCalendar, DateTimeExperimentalMode, + DateTimeTasksMode, PhoneNumber, IpAddress, Mention, @@ -121,6 +122,7 @@ public static class TestContextExtensions { Models.DateTimeExtendedTypes, (test, culture) => DateTimeRecognizer.RecognizeDateTime(test.Input, culture, DateTimeOptions.ExtendedTypes, refTime: test.GetReferenceDateTime(), fallbackToDefaultCulture: false) }, { Models.DateTimeComplexCalendar, (test, culture) => DateTimeRecognizer.RecognizeDateTime(test.Input, culture, DateTimeOptions.ExtendedTypes | DateTimeOptions.CalendarMode | DateTimeOptions.EnablePreview, refTime: test.GetReferenceDateTime(), fallbackToDefaultCulture: false) }, { Models.DateTimeExperimentalMode, (test, culture) => DateTimeRecognizer.RecognizeDateTime(test.Input, culture, DateTimeOptions.ExperimentalMode, refTime: test.GetReferenceDateTime(), fallbackToDefaultCulture: false) }, + { Models.DateTimeTasksMode, (test, culture) => DateTimeRecognizer.RecognizeDateTime(test.Input, culture, options: DateTimeOptions.TasksMode, refTime: test.GetReferenceDateTime(), fallbackToDefaultCulture: false) }, { Models.PhoneNumber, (test, culture) => SequenceRecognizer.RecognizePhoneNumber(test.Input, culture, fallbackToDefaultCulture: false) }, { Models.IpAddress, (test, culture) => SequenceRecognizer.RecognizeIpAddress(test.Input, culture, fallbackToDefaultCulture: false) }, { Models.Mention, (test, culture) => SequenceRecognizer.RecognizeMention(test.Input, culture, fallbackToDefaultCulture: false) }, diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Extractors/ArabicMergedExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Extractors/ArabicMergedExtractorConfiguration.cs index 046d08ed72..0f1c5779af 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Extractors/ArabicMergedExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Arabic/Extractors/ArabicMergedExtractorConfiguration.cs @@ -52,6 +52,9 @@ public class ArabicMergedExtractorConfiguration : BaseDateTimeOptionsConfigurati public static readonly Regex FailFastRegex = new Regex(DateTimeDefinitions.FailFastRegex, RegexFlags | RegexOptions.Compiled); + public static readonly Regex YearRegex = + new Regex(DateTimeDefinitions.YearRegex, RegexFlags); + public static readonly Regex[] TermFilterRegexes = { // one on one @@ -162,6 +165,8 @@ public ArabicMergedExtractorConfiguration(IDateTimeOptionsConfiguration config) Regex IMergedExtractorConfiguration.UnspecificTimePeriodRegex => UnspecificTimePeriodRegex; + Regex IMergedExtractorConfiguration.YearRegex => YearRegex; + Regex IMergedExtractorConfiguration.FailFastRegex => FailFastRegex; IEnumerable IMergedExtractorConfiguration.TermFilterRegexes => TermFilterRegexes; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Extractors/ChineseDurationExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Extractors/ChineseDurationExtractorConfiguration.cs index 28830913b8..426568b03e 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Extractors/ChineseDurationExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Extractors/ChineseDurationExtractorConfiguration.cs @@ -56,6 +56,8 @@ public ChineseDurationExtractorConfiguration(IDateTimeOptionsConfiguration confi public Dictionary UnitValueMap { get; } + public Dictionary AmbiguityDurationFiltersDict => null; + Regex ICJKDurationExtractorConfiguration.DurationUnitRegex => DurationUnitRegex; Regex ICJKDurationExtractorConfiguration.DurationConnectorRegex => DurationConnectorRegex; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Constants.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Constants.cs index fd7f13f9dc..29c9469f23 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Constants.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Constants.cs @@ -53,6 +53,7 @@ public static class Constants // AmPm time representation for time parser public const string Comment_AmPm = "ampm"; + public const string Comment_Am = "am"; // Prefix early/late for time parser public const string Comment_Early = "early"; @@ -128,6 +129,9 @@ public static class Constants // Hours is a half mid-day-duration public const int HalfMidDayDurationHourCount = 2; + // Minutes in an hour + public const int HourMinuteCount = 60; + // Char length of four digits year, e.g., 2018 public const int FourDigitsYearLength = 4; @@ -260,6 +264,7 @@ public static class Constants public const string TimexHour = "H"; public const string TimexMinute = "M"; public const string TimexSecond = "S"; + public const string TimexNow = "PRESENT_REF"; public const char TimexFuzzy = 'X'; public const string TimexFuzzyYear = "XXXX"; public const string TimexFuzzyMonth = "XX"; @@ -267,6 +272,7 @@ public static class Constants public const string TimexFuzzyDay = "XX"; public const string DateTimexConnector = "-"; public const string TimeTimexConnector = ":"; + public const string TimexSeparator = ","; public const string GeneralPeriodPrefix = "P"; public const string TimeTimexPrefix = "T"; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/DateTimeOptions.cs b/.NET/Microsoft.Recognizers.Text.DateTime/DateTimeOptions.cs index 19b56a2682..0e068a7628 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/DateTimeOptions.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/DateTimeOptions.cs @@ -38,6 +38,11 @@ public enum DateTimeOptions /// NoProtoCache = 16, + /// + /// TasksMode, specific functionality that changes default behaviour for business reasons. + /// + TasksMode = 1048576, // 2 ^20 + /// /// FailFast, mode that aborts extraction/tagging quickly for non-entity cases. May be removed later. /// diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchDatePeriodExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchDatePeriodExtractorConfiguration.cs index efea3e76dc..9b59a6831a 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchDatePeriodExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchDatePeriodExtractorConfiguration.cs @@ -16,7 +16,7 @@ public class DutchDatePeriodExtractorConfiguration : BaseDateTimeOptionsConfigur { // Base regexes public static readonly Regex TillRegex = - new Regex(DateTimeDefinitions.TillRegex, RegexFlags); + new Regex(DateTimeDefinitions.ComplexTillRegex, RegexFlags); public static readonly Regex RangeConnectorRegex = new Regex(DateTimeDefinitions.RangeConnectorRegex, RegexFlags); diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchMergedExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchMergedExtractorConfiguration.cs index df3c719f42..7094a1e48f 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchMergedExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Extractors/DutchMergedExtractorConfiguration.cs @@ -52,6 +52,9 @@ public class DutchMergedExtractorConfiguration : BaseDateTimeOptionsConfiguratio public static readonly Regex PotentialAmbiguousRangeRegex = new Regex(DateTimeDefinitions.PotentialAmbiguousRangeRegex, RegexFlags); + public static readonly Regex YearRegex = + new Regex(DateTimeDefinitions.YearRegex, RegexFlags); + public static readonly Regex[] TermFilterRegexes = { // one on one @@ -152,6 +155,8 @@ public DutchMergedExtractorConfiguration(IDateTimeOptionsConfiguration config) Regex IMergedExtractorConfiguration.UnspecificTimePeriodRegex => null; + Regex IMergedExtractorConfiguration.YearRegex => YearRegex; + public Regex FailFastRegex { get; } = null; IEnumerable IMergedExtractorConfiguration.TermFilterRegexes => TermFilterRegexes; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Parsers/DutchHolidayParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Parsers/DutchHolidayParserConfiguration.cs index d868d96b3f..caf1dedabe 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Parsers/DutchHolidayParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Parsers/DutchHolidayParserConfiguration.cs @@ -120,6 +120,8 @@ protected override IDictionary> InitHolidayFuncs() { "sacrifice", Sacrifice }, { "eidalfitr", EidAlFitr }, { "islamicnewyear", IslamicNewYear }, + { "earthday", EarthDay }, + { "juneteenth", Juneteenth }, }; } @@ -215,6 +217,10 @@ protected override IDictionary> InitHolidayFuncs() private static DateObject KetiKoti(int year) => new DateObject(year, 7, 1); + private static DateObject EarthDay(int year) => new DateObject(year, 4, 22); + + private static DateObject Juneteenth(int year) => new DateObject(year, 6, 19); + private static DateObject Ramadan(int year) => HolidayFunctions.IslamicHoliday(year, HolidayFunctions.IslamicHolidayType.Ramadan); private static DateObject Sacrifice(int year) => HolidayFunctions.IslamicHoliday(year, HolidayFunctions.IslamicHolidayType.Sacrifice); diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Parsers/DutchSetParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Parsers/DutchSetParserConfiguration.cs index 0dfa4b365a..2c7435d257 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Parsers/DutchSetParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Parsers/DutchSetParserConfiguration.cs @@ -31,6 +31,12 @@ public class DutchSetParserConfiguration : BaseDateTimeOptionsConfiguration, ISe private static readonly Regex YearTypeRegex = new Regex(DateTimeDefinitions.YearTypeRegex, RegexFlags); + private static readonly Regex SemiYearTypeRegex = + new Regex(DateTimeDefinitions.SemiYearTypeRegex, RegexFlags); + + private static readonly Regex WeekendTypeRegex = + new Regex(DateTimeDefinitions.WeekendTypeRegex, RegexFlags); + public DutchSetParserConfiguration(ICommonDateTimeParserConfiguration config) : base(config) { @@ -125,9 +131,17 @@ public bool GetMatchedDailyTimex(string text, out string timex) { timex = "P1Y"; } + else if (SemiYearTypeRegex.IsMatch(trimmedText)) + { + timex = "P0.5Y"; + } else if (QuarterTypeRegex.IsMatch(trimmedText)) { timex = "P3M"; + } + else if (WeekendTypeRegex.IsMatch(trimmedText)) + { + timex = "XXXX-WXX-WE"; } else { diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Parsers/DutchTimePeriodParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Parsers/DutchTimePeriodParserConfiguration.cs index ce00be927f..6cb18a17e4 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Parsers/DutchTimePeriodParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Dutch/Parsers/DutchTimePeriodParserConfiguration.cs @@ -99,6 +99,18 @@ public bool GetMatchedTimeRange(string text, out string timex, out int beginHour { timeOfDay = Constants.BusinessHour; } + else if (DateTimeDefinitions.MealtimeBreakfastTermList.Any(o => trimmedText.Contains(o))) + { + timeOfDay = Constants.MealtimeBreakfast; + } + else if (DateTimeDefinitions.MealtimeLunchTermList.Any(o => trimmedText.Contains(o))) + { + timeOfDay = Constants.MealtimeLunch; + } + else if (DateTimeDefinitions.MealtimeDinnerTermList.Any(o => trimmedText.Contains(o))) + { + timeOfDay = Constants.MealtimeDinner; + } else { timex = null; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishMergedExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishMergedExtractorConfiguration.cs index d55211b500..f9b3725c1c 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishMergedExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishMergedExtractorConfiguration.cs @@ -53,6 +53,10 @@ public class EnglishMergedExtractorConfiguration : BaseDateTimeOptionsConfigurat public static readonly Regex FailFastRegex = new Regex(DateTimeDefinitions.FailFastRegex, RegexFlags | RegexOptions.Compiled); + // Used to skip only year references in a text in TasksMode + public static readonly Regex YearRegex = + new Regex(DateTimeDefinitions.YearRegex, RegexFlags); + public static readonly Regex[] TermFilterRegexes = { // one on one @@ -164,6 +168,8 @@ public EnglishMergedExtractorConfiguration(IDateTimeOptionsConfiguration config) Regex IMergedExtractorConfiguration.UnspecificTimePeriodRegex => UnspecificTimePeriodRegex; + Regex IMergedExtractorConfiguration.YearRegex => YearRegex; + Regex IMergedExtractorConfiguration.FailFastRegex => FailFastRegex; IEnumerable IMergedExtractorConfiguration.TermFilterRegexes => TermFilterRegexes; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseMergedDateTimeExtractor.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseMergedDateTimeExtractor.cs index 81d40c14f2..1c366268f9 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseMergedDateTimeExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseMergedDateTimeExtractor.cs @@ -223,6 +223,14 @@ private void AddTo(List dst, List src, string text } } + if ((config.Options & DateTimeOptions.TasksMode) != 0) + { + if (ShouldSkipOnlyYear(result)) + { + continue; + } + } + var isFound = false; var overlapIndexes = new List(); var firstIndex = -1; @@ -270,6 +278,15 @@ private bool ShouldSkipFromToMerge(ExtractResult er) return config.FromToRegex.IsMatch(er.Text); } + /*Under TasksMode: Should not treat a four-digit number as a daterange if the input text does not include a month or year reference. + It should not treat 2005 as a daterange in statements like "Milk 2005." + (The year 2005 should be treated as a number only.) + */ + private bool ShouldSkipOnlyYear(ExtractResult er) + { + return config.YearRegex.Match(er.Text).Value == er.Text; + } + private List FilterUnspecificDatePeriod(List ers) { ers.RemoveAll(o => this.config.UnspecificDatePeriodRegex.IsMatch(o.Text)); diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/BaseCJKDurationExtractor.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/BaseCJKDurationExtractor.cs index ef6c0118e8..cb15619251 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/BaseCJKDurationExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/BaseCJKDurationExtractor.cs @@ -2,7 +2,8 @@ // Licensed under the MIT License. using System.Collections.Generic; - +using System.Linq; +using System.Text.RegularExpressions; using Microsoft.Recognizers.Text.Utilities; using DateObject = System.DateTime; @@ -54,9 +55,32 @@ public List Extract(string source, DateObject referenceTime) res = MergeMultipleDuration(source, res); } + res = FilterAmbiguity(res, source); + return res; } + private List FilterAmbiguity(List extractResults, string text) + { + if (this.config.AmbiguityDurationFiltersDict != null) + { + foreach (var regex in this.config.AmbiguityDurationFiltersDict) + { + foreach (var extractResult in extractResults) + { + if (regex.Key.IsMatch(text)) + { + var matches = regex.Value.Matches(text).Cast(); + extractResults = extractResults.Where(er => !matches.Any(m => m.Index < er.Start + er.Length && m.Index + m.Length > er.Start)) + .ToList(); + } + } + } + } + + return extractResults; + } + private List MergeMultipleDuration(string text, List extractorResults) { if (extractorResults.Count <= 1) diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/BaseCJKMergedDateTimeExtractor.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/BaseCJKMergedDateTimeExtractor.cs index aa3748747b..98354748f3 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/BaseCJKMergedDateTimeExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/BaseCJKMergedDateTimeExtractor.cs @@ -129,6 +129,17 @@ private void AddMod(List ers, string text) er.Metadata = AssignModMetadata(er.Metadata); } + match = this.config.UntilRegex.MatchBegin(afterStr, trim: true); + + if (match.Success) + { + var modLength = match.Index + match.Length; + er.Length += modLength; + er.Text = text.Substring(er.Start ?? 0, er.Length ?? 0); + + er.Metadata = AssignModMetadata(er.Metadata); + } + match = this.config.SincePrefixRegex.MatchEnd(beforeStr, trim: true); if (match.Success && AmbiguousRangeChecker(beforeStr, text, er)) diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/ICJKDurationExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/ICJKDurationExtractorConfiguration.cs index d176c140c4..d16a2a1d5f 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/ICJKDurationExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/ICJKDurationExtractorConfiguration.cs @@ -32,5 +32,7 @@ public interface ICJKDurationExtractorConfiguration : IDateTimeOptionsConfigurat Dictionary UnitValueMap { get; } + Dictionary AmbiguityDurationFiltersDict { get; } + } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/IMergedExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/IMergedExtractorConfiguration.cs index 459ba64a17..51d25085e4 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/IMergedExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/IMergedExtractorConfiguration.cs @@ -67,6 +67,8 @@ public interface IMergedExtractorConfiguration : IDateTimeOptionsConfiguration // Regex to act as umbrella for key terms so that sentences that clearly don't have entities can be rejected quickly Regex FailFastRegex { get; } + Regex YearRegex { get; } + StringMatcher SuperfluousWordMatcher { get; } Dictionary AmbiguityFiltersDict { get; } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/French/Extractors/FrenchMergedExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/French/Extractors/FrenchMergedExtractorConfiguration.cs index c74bc649ab..e7f3f61577 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/French/Extractors/FrenchMergedExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/French/Extractors/FrenchMergedExtractorConfiguration.cs @@ -57,6 +57,9 @@ public class FrenchMergedExtractorConfiguration : BaseDateTimeOptionsConfigurati public static readonly StringMatcher SuperfluousWordMatcher = new StringMatcher(); + public static readonly Regex YearRegex = + new Regex(DateTimeDefinitions.YearRegex, RegexFlags); + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; public FrenchMergedExtractorConfiguration(IDateTimeOptionsConfiguration config) @@ -141,6 +144,8 @@ public FrenchMergedExtractorConfiguration(IDateTimeOptionsConfiguration config) Regex IMergedExtractorConfiguration.UnspecificTimePeriodRegex => null; + Regex IMergedExtractorConfiguration.YearRegex => YearRegex; + public Regex FailFastRegex { get; } = null; IEnumerable IMergedExtractorConfiguration.TermFilterRegexes => TermFilterRegexes; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/German/Extractors/GermanMergedExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/German/Extractors/GermanMergedExtractorConfiguration.cs index bac36e832c..1e2f527e42 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/German/Extractors/GermanMergedExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/German/Extractors/GermanMergedExtractorConfiguration.cs @@ -49,6 +49,9 @@ public class GermanMergedExtractorConfiguration : BaseDateTimeOptionsConfigurati public static readonly Regex UnspecificDatePeriodRegex = new Regex(DateTimeDefinitions.UnspecificDatePeriodRegex, RegexFlags); + public static readonly Regex YearRegex = + new Regex(DateTimeDefinitions.YearRegex, RegexFlags); + public static readonly StringMatcher SuperfluousWordMatcher = new StringMatcher(); public static readonly Regex[] TermFilterRegexes = @@ -142,6 +145,8 @@ public GermanMergedExtractorConfiguration(IDateTimeOptionsConfiguration config) Regex IMergedExtractorConfiguration.UnspecificTimePeriodRegex => null; + Regex IMergedExtractorConfiguration.YearRegex => YearRegex; + public Regex FailFastRegex { get; } = null; IEnumerable IMergedExtractorConfiguration.TermFilterRegexes => TermFilterRegexes; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Extractors/HindiMergedExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Extractors/HindiMergedExtractorConfiguration.cs index 5f86449ee1..485ef52087 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Extractors/HindiMergedExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Hindi/Extractors/HindiMergedExtractorConfiguration.cs @@ -51,6 +51,9 @@ public class HindiMergedExtractorConfiguration : BaseDateTimeOptionsConfiguratio public static readonly Regex FailFastRegex = new Regex(DateTimeDefinitions.FailFastRegex, RegexFlags | RegexOptions.Compiled); + public static readonly Regex YearRegex = + new Regex(DateTimeDefinitions.YearRegex, RegexFlags); + public static readonly Regex[] TermFilterRegexes = { // one on one @@ -141,6 +144,8 @@ public HindiMergedExtractorConfiguration(IDateTimeOptionsConfiguration config) Regex IMergedExtractorConfiguration.UnspecificTimePeriodRegex => null; + Regex IMergedExtractorConfiguration.YearRegex => YearRegex; + Regex IMergedExtractorConfiguration.FailFastRegex => FailFastRegex; IEnumerable IMergedExtractorConfiguration.TermFilterRegexes => TermFilterRegexes; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Extractors/ItalianMergedExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Extractors/ItalianMergedExtractorConfiguration.cs index 376a17e790..e53a2241ad 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Extractors/ItalianMergedExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Italian/Extractors/ItalianMergedExtractorConfiguration.cs @@ -56,6 +56,9 @@ public class ItalianMergedExtractorConfiguration : BaseDateTimeOptionsConfigurat public static readonly StringMatcher SuperfluousWordMatcher = new StringMatcher(); + public static readonly Regex YearRegex = + new Regex(DateTimeDefinitions.YearRegex, RegexFlags); + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; public ItalianMergedExtractorConfiguration(IDateTimeOptionsConfiguration config) @@ -131,6 +134,8 @@ public ItalianMergedExtractorConfiguration(IDateTimeOptionsConfiguration config) Regex IMergedExtractorConfiguration.UnspecificTimePeriodRegex => null; + Regex IMergedExtractorConfiguration.YearRegex => YearRegex; + public Regex FailFastRegex { get; } = null; IEnumerable IMergedExtractorConfiguration.TermFilterRegexes => TermFilterRegexes; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Extractors/JapaneseDurationExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Extractors/JapaneseDurationExtractorConfiguration.cs index 6b600ce73d..382edd096b 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Extractors/JapaneseDurationExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Extractors/JapaneseDurationExtractorConfiguration.cs @@ -6,8 +6,8 @@ using System.Globalization; using System.Linq; using System.Text.RegularExpressions; - using Microsoft.Recognizers.Definitions.Japanese; +using Microsoft.Recognizers.Definitions.Utilities; using Microsoft.Recognizers.Text.NumberWithUnit; using Microsoft.Recognizers.Text.NumberWithUnit.Japanese; @@ -48,6 +48,8 @@ public JapaneseDurationExtractorConfiguration(IDateTimeOptionsConfiguration conf UnitMap = DateTimeDefinitions.ParserConfigurationUnitMap.ToDictionary(k => k.Key, k => k.Value); UnitValueMap = DateTimeDefinitions.DurationUnitValueMap; + AmbiguityDurationFiltersDict = DefinitionLoader.LoadAmbiguityFilters(DateTimeDefinitions.AmbiguityDurationFiltersDict); + } public IExtractor InternalExtractor { get; } @@ -56,6 +58,8 @@ public JapaneseDurationExtractorConfiguration(IDateTimeOptionsConfiguration conf public Dictionary UnitValueMap { get; } + public Dictionary AmbiguityDurationFiltersDict { get; } + Regex ICJKDurationExtractorConfiguration.DurationUnitRegex => DurationUnitRegex; Regex ICJKDurationExtractorConfiguration.DurationConnectorRegex => DurationConnectorRegex; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Korean/Extractors/KoreanDurationExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Korean/Extractors/KoreanDurationExtractorConfiguration.cs index 7dfd383b7b..b4007d88e4 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Korean/Extractors/KoreanDurationExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Korean/Extractors/KoreanDurationExtractorConfiguration.cs @@ -56,6 +56,8 @@ public KoreanDurationExtractorConfiguration(IDateTimeOptionsConfiguration config public Dictionary UnitValueMap { get; } + public Dictionary AmbiguityDurationFiltersDict => null; + Regex ICJKDurationExtractorConfiguration.DurationUnitRegex => DurationUnitRegex; Regex ICJKDurationExtractorConfiguration.DurationConnectorRegex => DurationConnectorRegex; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Microsoft.Recognizers.Text.DateTime.xml b/.NET/Microsoft.Recognizers.Text.DateTime/Microsoft.Recognizers.Text.DateTime.xml index 7ef470bb62..11f4e6ef09 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Microsoft.Recognizers.Text.DateTime.xml +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Microsoft.Recognizers.Text.DateTime.xml @@ -84,6 +84,11 @@ NoProtoCache + + + NoProtoCache + + FailFast, mode that aborts extraction/tagging quickly for non-entity cases. May be removed later. diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseDateTimePeriodParser.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseDateTimePeriodParser.cs index 12ba3a3b65..4e67bfb830 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseDateTimePeriodParser.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseDateTimePeriodParser.cs @@ -1097,55 +1097,58 @@ private DateTimeResolutionResult MergeTwoTimePoints(string text, DateObject refe DateObject pastBegin = (DateObject)((DateTimeResolutionResult)pr1.Value).PastValue, pastEnd = (DateObject)((DateTimeResolutionResult)pr2.Value).PastValue; - if (bothHaveDates) + // If one side contains "ampm" while the other doesn't, shift the time appropriately + var ampmStr1 = ((DateTimeResolutionResult)pr1.Value).Comment; + var ampmStr2 = ((DateTimeResolutionResult)pr2.Value).Comment; + if (ampmStr1 is Constants.Comment_AmPm ^ ampmStr2 is Constants.Comment_AmPm) { - if (futureBegin > futureEnd) + if (futureBegin > futureEnd && futureBegin.Date == futureEnd.Date) { - futureBegin = pastBegin; + futureEnd = futureEnd.AddHours(Constants.HalfDayHourCount); } - if (pastEnd < pastBegin) + if (pastBegin > pastEnd && pastBegin.Date == pastEnd.Date) { - pastEnd = futureEnd; + pastEnd = pastEnd.AddHours(Constants.HalfDayHourCount); } } + var leftTimex = pr1.TimexStr; + var rightTimex = pr2.TimexStr; + if (bothHaveDates) { - var duration = futureEnd - futureBegin; - var durationStr = Convert.ToInt32(duration.TotalHours) != 0 ? $"{Convert.ToInt32(duration.TotalHours)}H" : - $"{Convert.ToInt32(duration.TotalMinutes)}M"; - ret.Timex = $"({pr1.TimexStr},{pr2.TimexStr},PT{durationStr})"; + if (futureBegin > futureEnd) + { + futureBegin = pastBegin; + } - // Do nothing + if (pastEnd < pastBegin) + { + pastEnd = futureEnd; + } } else if (beginHasDate) { futureEnd = DateObject.MinValue.SafeCreateFromValue( futureBegin.Year, futureBegin.Month, futureBegin.Day, futureEnd.Hour, futureEnd.Minute, futureEnd.Second); - pastEnd = DateObject.MinValue.SafeCreateFromValue( pastBegin.Year, pastBegin.Month, pastBegin.Day, pastEnd.Hour, pastEnd.Minute, pastEnd.Second); - var dateStr = pr1.TimexStr.Split('T')[0]; - var durationStr = DateTimeFormatUtil.LuisTimeSpan(futureEnd - futureBegin); - ret.Timex = $"({pr1.TimexStr},{dateStr + pr2.TimexStr},{durationStr})"; + rightTimex = TimexUtility.CombineDateTimeTimex(pr2.TimexStr, pr1.TimexStr, futureEnd); } else if (endHasDate) { futureBegin = DateObject.MinValue.SafeCreateFromValue( futureEnd.Year, futureEnd.Month, futureEnd.Day, futureBegin.Hour, futureBegin.Minute, futureBegin.Second); - pastBegin = DateObject.MinValue.SafeCreateFromValue( pastEnd.Year, pastEnd.Month, pastEnd.Day, pastBegin.Hour, pastBegin.Minute, pastBegin.Second); - var dateStr = pr2.TimexStr.Split('T')[0]; - var durationStr = DateTimeFormatUtil.LuisTimeSpan(pastEnd - pastBegin); - ret.Timex = $"({dateStr + pr1.TimexStr},{pr2.TimexStr},{durationStr})"; + leftTimex = TimexUtility.CombineDateTimeTimex(pr1.TimexStr, pr2.TimexStr, pastBegin); } - var ampmStr1 = ((DateTimeResolutionResult)pr1.Value).Comment; - var ampmStr2 = ((DateTimeResolutionResult)pr2.Value).Comment; + ret.Timex = TimexUtility.GenerateDateTimePeriodTimex(leftTimex, rightTimex, futureEnd - futureBegin); + if (!string.IsNullOrEmpty(ampmStr1) && ampmStr1.EndsWith(Constants.Comment_AmPm, StringComparison.Ordinal) && !string.IsNullOrEmpty(ampmStr2) && ampmStr2.EndsWith(Constants.Comment_AmPm, StringComparison.Ordinal)) { diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/BaseCJKDateParser.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/BaseCJKDateParser.cs index 4a20d4d351..5d0461d1ad 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/BaseCJKDateParser.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/BaseCJKDateParser.cs @@ -318,8 +318,9 @@ protected DateTimeResolutionResult ParseImplicitDate(string text, DateObject ref // handle "明日から3週間" (3 weeks from tomorrow) var durationResult = this.config.DurationExtractor.Extract(text, referenceDate); var unitMatch = this.config.DurationRelativeDurationUnitRegex.Match(text); + var isWithin = this.config.DurationRelativeDurationUnitRegex.MatchEnd(text, trim: true).Groups[Constants.WithinGroupName].Success; - if (exactMatch.Success && unitMatch.Success && (durationResult.Count > 0) && + if ((exactMatch.Success || isWithin) && unitMatch.Success && (durationResult.Count > 0) && string.IsNullOrEmpty(unitMatch.Groups["few"].Value)) { var pr = this.config.DurationParser.Parse(durationResult[0], referenceDate); diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/BaseCJKDateTimeParser.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/BaseCJKDateTimeParser.cs index ec11874279..40e97756fd 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/BaseCJKDateTimeParser.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/BaseCJKDateTimeParser.cs @@ -158,6 +158,14 @@ private DateTimeResolutionResult MergeDateAndTime(string text, DateObject refere var pastDate = (DateObject)((DateTimeResolutionResult)pr1.Value).PastValue; var time = (DateObject)((DateTimeResolutionResult)pr2.Value).FutureValue; + // handle cases with time like 25時 which resolve to the next day + var timexHour = TimexUtility.ParseHourFromTimeTimex(pr2.TimexStr); + if (timexHour > Constants.DayHourCount) + { + futureDate = futureDate.AddDays(1); + pastDate = pastDate.AddDays(1); + } + var hour = time.Hour; var min = time.Minute; var sec = time.Second; @@ -178,7 +186,6 @@ private DateTimeResolutionResult MergeDateAndTime(string text, DateObject refere timeStr = timeStr.Substring(0, timeStr.Length - 4); } - timeStr = "T" + hour.ToString("D2", CultureInfo.InvariantCulture) + timeStr.Substring(3); ret.Timex = pr1.TimexStr + timeStr; var val = (DateTimeResolutionResult)pr2.Value; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/BaseCJKDateTimePeriodParser.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/BaseCJKDateTimePeriodParser.cs index 3bd7bcfef4..08d6704ef4 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/BaseCJKDateTimePeriodParser.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/BaseCJKDateTimePeriodParser.cs @@ -126,15 +126,31 @@ private DateTimeResolutionResult MergeDateAndTimePeriod(string text, DateObject var futureDate = (DateObject)((DateTimeResolutionResult)pr1.Value).FutureValue; var pastDate = (DateObject)((DateTimeResolutionResult)pr1.Value).PastValue; + // handle cases with time like 25時 which resolve to the next day + var swiftDay = 0; + var timexHours = TimexUtility.ParseHoursFromTimePeriodTimex(pr2.TimexStr); + if (timexHours.Item1 > Constants.DayHourCount) + { + pastDate = pastDate.AddDays(1); + futureDate = futureDate.AddDays(1); + } + else if (timexHours.Item2 > Constants.DayHourCount) + { + swiftDay++; + } + + var pastDateAlt = pastDate.AddDays(swiftDay); + var futureDateAlt = futureDate.AddDays(swiftDay); + ret.FutureValue = new Tuple( DateObject.MinValue.SafeCreateFromValue(futureDate.Year, futureDate.Month, futureDate.Day, beginTime.Hour, beginTime.Minute, beginTime.Second), - DateObject.MinValue.SafeCreateFromValue(futureDate.Year, futureDate.Month, futureDate.Day, endTime.Hour, endTime.Minute, endTime.Second)); + DateObject.MinValue.SafeCreateFromValue(futureDateAlt.Year, futureDateAlt.Month, futureDateAlt.Day, endTime.Hour, endTime.Minute, endTime.Second)); ret.PastValue = new Tuple( DateObject.MinValue.SafeCreateFromValue(pastDate.Year, pastDate.Month, pastDate.Day, beginTime.Hour, beginTime.Minute, beginTime.Second), - DateObject.MinValue.SafeCreateFromValue(pastDate.Year, pastDate.Month, pastDate.Day, endTime.Hour, endTime.Minute, endTime.Second)); + DateObject.MinValue.SafeCreateFromValue(pastDateAlt.Year, pastDateAlt.Month, pastDateAlt.Day, endTime.Hour, endTime.Minute, endTime.Second)); ret.Timex = TimexUtility.GenerateSplitDateTimePeriodTimex(pr1.TimexStr, pr2.TimexStr); ret.Success = !string.IsNullOrEmpty(ret.Timex); @@ -216,19 +232,13 @@ private DateTimeResolutionResult MergeTwoTimePoints(string text, DateObject refe DateObject futureBegin = (DateObject)((DateTimeResolutionResult)pr1.Value).FutureValue, futureEnd = (DateObject)((DateTimeResolutionResult)pr2.Value).FutureValue; - DateObject pastBegin = (DateObject)((DateTimeResolutionResult)pr1.Value).PastValue, - pastEnd = (DateObject)((DateTimeResolutionResult)pr2.Value).PastValue; + DateObject pastBegin = (DateObject)((DateTimeResolutionResult)pr1.Value).PastValue; if (futureBegin > futureEnd) { futureBegin = pastBegin; } - if (pastEnd < pastBegin) - { - pastEnd = futureEnd; - } - if (bothHaveDates) { rightTime = DateObject.MinValue.SafeCreateFromValue(futureEnd.Year, futureEnd.Month, futureEnd.Day); @@ -236,22 +246,10 @@ private DateTimeResolutionResult MergeTwoTimePoints(string text, DateObject refe } else if (beginHasDate) { - // TODO: Handle "明天下午两点到五点" - futureEnd = DateObject.MinValue.SafeCreateFromValue( - futureBegin.Year, futureBegin.Month, futureBegin.Day, futureEnd.Hour, futureEnd.Minute, futureEnd.Second); - pastEnd = DateObject.MinValue.SafeCreateFromValue( - pastBegin.Year, pastBegin.Month, pastBegin.Day, pastEnd.Hour, pastEnd.Minute, pastEnd.Second); - leftTime = DateObject.MinValue.SafeCreateFromValue(futureBegin.Year, futureBegin.Month, futureBegin.Day); } else if (endHasDate) { - // TODO: Handle "明天下午两点到五点" - futureBegin = DateObject.MinValue.SafeCreateFromValue( - futureEnd.Year, futureEnd.Month, futureEnd.Day, futureBegin.Hour, futureBegin.Minute, futureBegin.Second); - pastBegin = DateObject.MinValue.SafeCreateFromValue( - pastEnd.Year, pastEnd.Month, pastEnd.Day, pastBegin.Hour, pastBegin.Minute, pastBegin.Second); - rightTime = DateObject.MinValue.SafeCreateFromValue(futureEnd.Year, futureEnd.Month, futureEnd.Day); } @@ -260,27 +258,18 @@ private DateTimeResolutionResult MergeTwoTimePoints(string text, DateObject refe var leftResultTime = (DateObject)leftResult.FutureValue; var rightResultTime = (DateObject)rightResult.FutureValue; - int day = referenceTime.Day, - month = referenceTime.Month, - year = referenceTime.Year; - // check if the right time is smaller than the left time, if yes, add one day int hour = leftResultTime.Hour > 0 ? leftResultTime.Hour : 0, min = leftResultTime.Minute > 0 ? leftResultTime.Minute : 0, second = leftResultTime.Second > 0 ? leftResultTime.Second : 0; - leftTime = leftTime.AddHours(hour); - leftTime = leftTime.AddMinutes(min); - leftTime = leftTime.AddSeconds(second); - DateObject.MinValue.SafeCreateFromValue(year, month, day, hour, min, second); + leftTime = leftTime.AddHours(hour).AddMinutes(min).AddSeconds(second); hour = rightResultTime.Hour > 0 ? rightResultTime.Hour : 0; min = rightResultTime.Minute > 0 ? rightResultTime.Minute : 0; second = rightResultTime.Second > 0 ? rightResultTime.Second : 0; - rightTime = rightTime.AddHours(hour); - rightTime = rightTime.AddMinutes(min); - rightTime = rightTime.AddSeconds(second); + rightTime = rightTime.AddHours(hour).AddMinutes(min).AddSeconds(second); // the right side time contains "ampm", while the left side doesn't if (rightResult.Comment is Constants.Comment_AmPm && @@ -296,23 +285,18 @@ private DateTimeResolutionResult MergeTwoTimePoints(string text, DateObject refe ret.FutureValue = ret.PastValue = new Tuple(leftTime, rightTime); - var leftTimex = string.Empty; - var rightTimex = string.Empty; - - // "X" is timex token for not determined time - if (!pr1.TimexStr.Contains("X") && !pr2.TimexStr.Contains("X")) + var leftTimex = pr1.TimexStr; + var rightTimex = pr2.TimexStr; + if (beginHasDate) { - leftTimex = DateTimeFormatUtil.LuisDateTime(leftTime); - rightTimex = DateTimeFormatUtil.LuisDateTime(rightTime); + rightTimex = DateTimeFormatUtil.LuisDateShortTime(rightTime, pr2.TimexStr); } - else + else if (endHasDate) { - leftTimex = pr1.TimexStr; - rightTimex = pr2.TimexStr; + leftTimex = DateTimeFormatUtil.LuisDateShortTime(leftTime, pr1.TimexStr); } - ret.Timex = $"({leftTimex},{rightTimex},PT{Convert.ToInt32((rightTime - leftTime).TotalHours)}H)"; - + ret.Timex = TimexUtility.GenerateDateTimePeriodTimex(leftTimex, rightTimex, rightTime - leftTime); ret.Success = true; return ret; } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseMergedExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseMergedExtractorConfiguration.cs index e363955268..d4b3638fb3 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseMergedExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Portuguese/Extractors/PortugueseMergedExtractorConfiguration.cs @@ -53,6 +53,9 @@ public class PortugueseMergedExtractorConfiguration : BaseDateTimeOptionsConfigu public static readonly StringMatcher SuperfluousWordMatcher = new StringMatcher(); + public static readonly Regex YearRegex = + new Regex(DateTimeDefinitions.YearRegex, RegexFlags); + public static readonly Regex[] TermFilterRegexes = System.Array.Empty(); private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; @@ -139,6 +142,8 @@ public PortugueseMergedExtractorConfiguration(IDateTimeOptionsConfiguration conf Regex IMergedExtractorConfiguration.UnspecificTimePeriodRegex => null; + Regex IMergedExtractorConfiguration.YearRegex => YearRegex; + public Regex FailFastRegex { get; } = null; IEnumerable IMergedExtractorConfiguration.TermFilterRegexes => TermFilterRegexes; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishMergedExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishMergedExtractorConfiguration.cs index c705b7925d..7f821a0520 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishMergedExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Spanish/Extractors/SpanishMergedExtractorConfiguration.cs @@ -51,6 +51,8 @@ public class SpanishMergedExtractorConfiguration : BaseDateTimeOptionsConfigurat public static readonly StringMatcher SuperfluousWordMatcher = new StringMatcher(); + public static readonly Regex YearRegex = new Regex(DateTimeDefinitions.YearRegex, RegexFlags); + private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture; public SpanishMergedExtractorConfiguration(IDateTimeOptionsConfiguration config) @@ -146,6 +148,8 @@ public SpanishMergedExtractorConfiguration(IDateTimeOptionsConfiguration config) Regex IMergedExtractorConfiguration.UnspecificTimePeriodRegex => null; + Regex IMergedExtractorConfiguration.YearRegex => YearRegex; + public Regex FailFastRegex { get; } = null; IEnumerable IMergedExtractorConfiguration.TermFilterRegexes => TermFilterRegexes; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Extractors/SwedishMergedExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Extractors/SwedishMergedExtractorConfiguration.cs index 7fe38173b8..d63cd8abed 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Extractors/SwedishMergedExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Swedish/Extractors/SwedishMergedExtractorConfiguration.cs @@ -53,6 +53,9 @@ public class SwedishMergedExtractorConfiguration : BaseDateTimeOptionsConfigurat public static readonly Regex FailFastRegex = new Regex(DateTimeDefinitions.FailFastRegex, RegexFlags | RegexOptions.Compiled); + public static readonly Regex YearRegex = + new Regex(DateTimeDefinitions.YearRegex, RegexFlags); + public static readonly Regex[] TermFilterRegexes = { // one on one @@ -164,6 +167,8 @@ public SwedishMergedExtractorConfiguration(IDateTimeOptionsConfiguration config) Regex IMergedExtractorConfiguration.UnspecificTimePeriodRegex => UnspecificTimePeriodRegex; + Regex IMergedExtractorConfiguration.YearRegex => YearRegex; + Regex IMergedExtractorConfiguration.FailFastRegex => FailFastRegex; IEnumerable IMergedExtractorConfiguration.TermFilterRegexes => TermFilterRegexes; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Extractors/TurkishMergedExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Extractors/TurkishMergedExtractorConfiguration.cs index d211fca018..f96c982a85 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Extractors/TurkishMergedExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Turkish/Extractors/TurkishMergedExtractorConfiguration.cs @@ -51,6 +51,9 @@ public class TurkishMergedExtractorConfiguration : BaseDateTimeOptionsConfigurat public static readonly Regex FailFastRegex = new Regex(DateTimeDefinitions.FailFastRegex, RegexFlags | RegexOptions.Compiled); + public static readonly Regex YearRegex = + new Regex(DateTimeDefinitions.YearRegex, RegexFlags); + public static readonly Regex[] TermFilterRegexes = { // one on one @@ -141,6 +144,8 @@ public TurkishMergedExtractorConfiguration(IDateTimeOptionsConfiguration config) Regex IMergedExtractorConfiguration.UnspecificTimePeriodRegex => null; + Regex IMergedExtractorConfiguration.YearRegex => YearRegex; + Regex IMergedExtractorConfiguration.FailFastRegex => FailFastRegex; IEnumerable IMergedExtractorConfiguration.TermFilterRegexes => TermFilterRegexes; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/DateTimeFormatUtil.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/DateTimeFormatUtil.cs index 3d473200c3..c0a4c40415 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/DateTimeFormatUtil.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/DateTimeFormatUtil.cs @@ -157,14 +157,24 @@ public static string LuisDateTime(DateObject time) return $"{LuisDate(time)}{Constants.TimeTimexPrefix}{LuisTime(time.Hour, time.Minute, time.Second)}"; } - // Only handle TimeSpan which is less than one day + // If a timex is given and it contains minutes and seconds, the result also includes minutes and seconds. + // Otherwise the result does not include minutes and seconds if they are zero. + public static string LuisDateShortTime(DateObject time, string timex = null) + { + var hasMin = timex != null ? timex.Contains(Constants.TimeTimexConnector) : false; + var hasSec = timex != null ? timex.Split(Constants.TimeTimexConnector[0]).Length > 2 : false; + + return $"{LuisDate(time)}{FormatShortTime(time, hasMin, hasSec)}"; + } + + // Also handle TimeSpans which are more than one day public static string LuisTimeSpan(System.TimeSpan timeSpan) { var timexBuilder = new StringBuilder($"{Constants.GeneralPeriodPrefix}{Constants.TimeTimexPrefix}"); - if (timeSpan.Hours > 0) + if (timeSpan.Days > 0 || timeSpan.Hours > 0) { - timexBuilder.Append($"{timeSpan.Hours}H"); + timexBuilder.Append($"{(timeSpan.Days * Constants.DayHourCount) + timeSpan.Hours}H"); } if (timeSpan.Minutes > 0) @@ -190,6 +200,15 @@ public static string FormatTime(DateObject time) return string.Join(Constants.TimeTimexConnector, time.Hour.ToString("D2", CultureInfo.InvariantCulture), time.Minute.ToString("D2", CultureInfo.InvariantCulture), time.Second.ToString("D2", CultureInfo.InvariantCulture)); } + // Does not return minutes and seconds if they are zero + public static string FormatShortTime(DateObject time, bool keepMin = false, bool keepSec = false) + { + int hour = time.Hour, + min = (keepMin || time.Minute > 0) ? time.Minute : Constants.InvalidMinute, + sec = (keepSec || time.Second > 0) ? time.Second : Constants.InvalidSecond; + return ShortTime(hour, min, sec); + } + public static string FormatDateTime(DateObject datetime) { return $"{FormatDate(datetime)} {FormatTime(datetime)}"; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/TimeFunctions.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/TimeFunctions.cs index 254aa04bd2..9229558848 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/TimeFunctions.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/TimeFunctions.cs @@ -79,12 +79,12 @@ public DateTimeResolutionResult PackTimeResult(DateTimeExtra extra, Ti noDesc = false; } - int hour = timeResult.Hour > 0 ? timeResult.Hour % Constants.DayHourCount : 0, + // Hours > 24 (e.g. 25時 which resolves to the next day) are kept unnormalized in the timex + // to avoid ambiguity in other entities. For example, "on the 30th at 25" is resolved to + // "XXXX-XX-30T25" because with "XXXX-XX-30+1T01" it is not known if the day should be "31" or "01". + int hour = timeResult.Hour > 0 && timeResult.Hour != Constants.DayHourCount ? timeResult.Hour : 0, min = timeResult.Minute > 0 ? timeResult.Minute : 0, - second = timeResult.Second > 0 ? timeResult.Second : 0, - day = referenceTime.Day, - month = referenceTime.Month, - year = referenceTime.Year; + second = timeResult.Second > 0 ? timeResult.Second : 0; var dateTimeResult = new DateTimeResolutionResult(); @@ -109,6 +109,18 @@ public DateTimeResolutionResult PackTimeResult(DateTimeExtra extra, Ti build.Append(":" + second.ToString("D2", CultureInfo.InvariantCulture)); } + // handle cases with time like 25時 (the hour is normalized in the past/future values) + if (timeResult.Hour > Constants.DayHourCount) + { + hour = timeResult.Hour - Constants.DayHourCount; + referenceTime = referenceTime.AddDays(1); + if (noDesc) + { + dateTimeResult.Comment = Constants.Comment_Am; + noDesc = false; + } + } + if (noDesc && hour <= Constants.HalfDayHourCount) { // build.Append("ampm"); @@ -117,6 +129,7 @@ public DateTimeResolutionResult PackTimeResult(DateTimeExtra extra, Ti dateTimeResult.Timex = build.ToString(); + int day = referenceTime.Day, month = referenceTime.Month, year = referenceTime.Year; dateTimeResult.FutureValue = dateTimeResult.PastValue = DateObject.MinValue.SafeCreateFromValue(year, month, day, hour, min, second); dateTimeResult.Success = true; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/TimePeriodFunctions.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/TimePeriodFunctions.cs index 00791e96d1..dc04613235 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/TimePeriodFunctions.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/TimePeriodFunctions.cs @@ -91,31 +91,50 @@ public static DateTimeResolutionResult Handle(IDateTimeParser timeParser, DateTi int day = refTime.Day, month = refTime.Month, - year = refTime.Year; + year = refTime.Year, + rightSwiftDay = 0, + leftSwiftDay = 0; // determine if the right side time is smaller than the left side, if yes, add one day int hour = leftResult.Hour > 0 ? leftResult.Hour : 0, min = leftResult.Minute > 0 ? leftResult.Minute : 0, second = leftResult.Second > 0 ? leftResult.Second : 0; + // handle cases with time like 25時 which resolve to the next day + if (hour > Constants.DayHourCount) + { + hour -= Constants.DayHourCount; + leftSwiftDay++; + } + var leftTime = DateObject.MinValue.SafeCreateFromValue(year, month, day, hour, min, second); hour = rightResult.Hour > 0 ? rightResult.Hour : 0; min = rightResult.Minute > 0 ? rightResult.Minute : 0; second = rightResult.Second > 0 ? rightResult.Second : 0; + // handle cases with time like 25時 which resolve to the next day + if (hour > Constants.DayHourCount) + { + hour -= Constants.DayHourCount; + rightSwiftDay++; + } + var rightTime = DateObject.MinValue.SafeCreateFromValue(year, month, day, hour, min, second); - if (rightTime.Hour < leftTime.Hour) + if (rightResult.Hour < leftResult.Hour) { rightTime = rightTime.AddDays(1); } - ret.FutureValue = ret.PastValue = new Tuple(leftTime, rightTime); - var leftTimex = BuildTimex(leftResult); var rightTimex = BuildTimex(rightResult); ret.Timex = $"({leftTimex},{rightTimex},{BuildSpan(leftResult, rightResult)})"; + + leftTime = leftTime.AddDays(leftSwiftDay); + rightTime = rightTime.AddDays(rightSwiftDay); + + ret.FutureValue = ret.PastValue = new Tuple(leftTime, rightTime); return ret; } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/TimexUtility.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/TimexUtility.cs index dcb85b5d6a..c39ee26430 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/TimexUtility.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/TimexUtility.cs @@ -458,11 +458,23 @@ public static string GenerateWeekTimex(int weekNum) return $"W{weekNum.ToString("D2", CultureInfo.InvariantCulture)}"; } + public static string CombineDateTimeTimex(string timeTimex1, string dateTimeTimex2, DateObject dateTime1) + { + return dateTimeTimex2.Equals(Constants.TimexNow, StringComparison.Ordinal) ? DateTimeFormatUtil.LuisDateShortTime(dateTime1) : + dateTimeTimex2.Split(Constants.TimeTimexPrefix[0])[0] + timeTimex1; + } + public static string GenerateDateTimePeriodTimex(string beginTimex, string endTimex, string durationTimex) { return $"({beginTimex},{endTimex},{durationTimex})"; } + public static string GenerateDateTimePeriodTimex(string beginTimex, string endTimex, TimeSpan duration) + { + var durationTimex = DateTimeFormatUtil.LuisTimeSpan(duration); + return GenerateDateTimePeriodTimex(beginTimex, endTimex, durationTimex); + } + public static string GenerateDateTimePeriodTimex(DateObject beginDateTime, DateObject endDateTime, string durationTimex) { return GenerateDateTimePeriodTimex(DateTimeFormatUtil.LuisDateTime(beginDateTime), @@ -569,6 +581,30 @@ public static float ParseNumberFromDurationTimex(string timex) return float.Parse(numberStr); } + public static int ParseHourFromTimeTimex(string timex) + { + var start = timex.IndexOf(Constants.TimeTimexPrefix) + 1; + var end = timex.IndexOf(Constants.TimeTimexConnector); + end = end > 0 ? end : timex.Length; + var hourStr = timex.Substring(start, end - start); + int.TryParse(hourStr, out int hour); + + return hour; + } + + public static Tuple ParseHoursFromTimePeriodTimex(string timex) + { + int hour1 = 0, hour2 = 0; + var timeList = timex.Split(Constants.TimexSeparator[0]); + if (timeList.Length > 2) + { + hour1 = ParseHourFromTimeTimex(timeList[0]); + hour2 = ParseHourFromTimeTimex(timeList[1]); + } + + return new Tuple(hour1, hour2); + } + private static bool IsTimeDurationTimex(string timex) { return timex.StartsWith($"{Constants.GeneralPeriodPrefix}{Constants.TimeTimexPrefix}", StringComparison.Ordinal); diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Extractors/CurrencyExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Extractors/CurrencyExtractorConfiguration.cs index 2ee0c48be9..25308851d8 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Extractors/CurrencyExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Extractors/CurrencyExtractorConfiguration.cs @@ -13,9 +13,6 @@ namespace Microsoft.Recognizers.Text.NumberWithUnit.English { public class CurrencyExtractorConfiguration : EnglishNumberWithUnitExtractorConfiguration { - public static readonly ImmutableDictionary CurrencySuffixList = - NumbersWithUnitDefinitions.CurrencySuffixList.ToImmutableDictionary(); - // CurrencyNameToIsoCodeMap dictionary (excluding fake and unofficial Iso codes starting with underscore) public static readonly Dictionary IsoCodeDict = NumbersWithUnitDefinitions.CurrencyNameToIsoCodeMap.Where(x => !x.Value.StartsWith("_", StringComparison.Ordinal)) @@ -26,16 +23,31 @@ public class CurrencyExtractorConfiguration : EnglishNumberWithUnitExtractorConf NumbersWithUnitDefinitions.CurrencyNameToIsoCodeMap.Where(x => !x.Value.StartsWith("_", StringComparison.Ordinal)) .ToDictionary(x => x.Key, x => x.Value.ToLower(CultureInfo.InvariantCulture) + "$"); + // CurrencyNameToIsoCodeMap preceded by 'M' symbol (e.g. 'MUSD') + public static readonly Dictionary IsoCodeWithMutiplierDict = + NumbersWithUnitDefinitions.CurrencyNameToIsoCodeMap.Where(x => !x.Value.StartsWith("_", StringComparison.Ordinal)) + .ToDictionary(x => x.Key, x => "m" + x.Value.ToLower(CultureInfo.InvariantCulture)); + // Merge IsoCodeDict and IsoCodeWithSymbolDict - public static readonly Dictionary IsoCodeCombinedDict = IsoCodeDict.Concat(IsoCodeWithSymbolDict) + public static readonly Dictionary IsoCodeCombinedDictWithSymbol = IsoCodeDict.Concat(IsoCodeWithSymbolDict) + .GroupBy(x => x.Key).ToDictionary(x => x.Key, y => y.Count() > 1 ? string.Join("|", new string[] { y.First().Value, y.Last().Value }) : y.First().Value); + + // Merge IsoCodeDict and IsoCodeWithMutiplierDict + public static readonly Dictionary IsoCodeCombinedDict = IsoCodeCombinedDictWithSymbol.Concat(IsoCodeWithMutiplierDict) .GroupBy(x => x.Key).ToDictionary(x => x.Key, y => y.Count() > 1 ? string.Join("|", new string[] { y.First().Value, y.Last().Value }) : y.First().Value); // Merge IsoCodeCombinedDict with CurrencyPrefixList (excluding fake and unofficial Iso codes starting with underscore) public static readonly Dictionary CurrencyPrefixDict = NumbersWithUnitDefinitions.CurrencyPrefixList.Concat(IsoCodeCombinedDict) .GroupBy(x => x.Key).ToDictionary(x => x.Key, y => y.Count() > 1 ? string.Join("|", new string[] { y.First().Value, y.Last().Value }) : y.First().Value); + // Merge IsoCodeCombinedDict with CurrencySuffixList (excluding fake and unofficial Iso codes starting with underscore) + public static readonly Dictionary CurrencySuffixDict = NumbersWithUnitDefinitions.CurrencySuffixList.Concat(IsoCodeCombinedDict) + .GroupBy(x => x.Key).ToDictionary(x => x.Key, y => y.Count() > 1 ? string.Join("|", new string[] { y.First().Value, y.Last().Value }) : y.First().Value); + public static readonly ImmutableDictionary CurrencyPrefixList = CurrencyPrefixDict.ToImmutableDictionary(); + public static readonly ImmutableDictionary CurrencySuffixList = CurrencySuffixDict.ToImmutableDictionary(); + public static readonly ImmutableDictionary FractionalUnitNameToCodeMap = NumbersWithUnitDefinitions.FractionalUnitNameToCodeMap.ToImmutableDictionary(); diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Parsers/CurrencyParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Parsers/CurrencyParserConfiguration.cs index d92a288c4b..89f58e806a 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Parsers/CurrencyParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/English/Parsers/CurrencyParserConfiguration.cs @@ -3,6 +3,7 @@ using System.Collections.Immutable; using System.Globalization; +using System.Linq; using Microsoft.Recognizers.Definitions.English; namespace Microsoft.Recognizers.Text.NumberWithUnit.English @@ -20,6 +21,7 @@ public CurrencyParserConfiguration(CultureInfo ci) this.BindDictionary(CurrencyExtractorConfiguration.CurrencySuffixList); this.BindDictionary(CurrencyExtractorConfiguration.CurrencyPrefixList); this.CurrencyNameToIsoCodeMap = NumbersWithUnitDefinitions.CurrencyNameToIsoCodeMap.ToImmutableDictionary(); + this.MultiplierIsoCodeList = CurrencyExtractorConfiguration.IsoCodeWithMutiplierDict.Values.ToList(); this.CurrencyFractionCodeList = NumbersWithUnitDefinitions.FractionalUnitNameToCodeMap.ToImmutableDictionary(); } } diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Parsers/BaseCurrencyParser.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Parsers/BaseCurrencyParser.cs index 217704ec33..60228ba1c1 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Parsers/BaseCurrencyParser.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Parsers/BaseCurrencyParser.cs @@ -39,6 +39,22 @@ public ParseResult Parse(ExtractResult extResult) pr = numberWithUnitParser.Parse(extResult); var value = pr.Value as UnitValue; + // Parse unit like "MUSD" that resolves to "1 million USD" + if (extResult.Data is ExtractResult) + { + var data = (ExtractResult)extResult.Data; + var unitStr = extResult.Text.Replace(data.Text, string.Empty).Trim(); + if (this.Config.MultiplierIsoCodeList.Contains(unitStr) && float.TryParse(value?.Number, out var number)) + { + value.Number = (number * 1000000).ToString("G15", CultureInfo.InvariantCulture); + pr.Value = new UnitValue + { + Unit = value?.Unit, + Number = value?.Number, + }; + } + } + Config.CurrencyNameToIsoCodeMap.TryGetValue(value?.Unit, out var mainUnitIsoCode); if (string.IsNullOrEmpty(mainUnitIsoCode) || mainUnitIsoCode.StartsWith(Constants.FAKE_ISO_CODE_PREFIX, StringComparison.Ordinal)) { diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Parsers/INumberWithUnitParserConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Parsers/INumberWithUnitParserConfiguration.cs index 6ef19b06f7..c12f3d65c2 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Parsers/INumberWithUnitParserConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Parsers/INumberWithUnitParserConfiguration.cs @@ -41,6 +41,7 @@ protected BaseNumberWithUnitParserConfiguration(CultureInfo ci) this.CurrencyFractionMapping = BaseCurrency.CurrencyFractionMapping.ToImmutableDictionary(); this.NonStandardFractionalSubunits = BaseCurrency.NonStandardFractionalSubunits.ToImmutableDictionary(); this.CurrencyNameToIsoCodeMap = new Dictionary(); + this.MultiplierIsoCodeList = new List(); this.CurrencyFractionCodeList = new Dictionary(); this.TypeList = new Dictionary(); this.CheckFirstSuffix = false; @@ -66,6 +67,8 @@ protected BaseNumberWithUnitParserConfiguration(CultureInfo ci) public IDictionary CurrencyNameToIsoCodeMap { get; set; } + public List MultiplierIsoCodeList { get; set; } + public IDictionary CurrencyFractionCodeList { get; set; } public abstract IDictionary TypeList { get; set; } diff --git a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Extractors/SpanishNumberWithUnitExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Extractors/SpanishNumberWithUnitExtractorConfiguration.cs index 394ebc3e0a..e8f68fd2ee 100644 --- a/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Extractors/SpanishNumberWithUnitExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.NumberWithUnit/Spanish/Extractors/SpanishNumberWithUnitExtractorConfiguration.cs @@ -26,6 +26,9 @@ public abstract class SpanishNumberWithUnitExtractorConfiguration : INumberWithU private static readonly Regex NonUnitsRegex = new Regex(BaseUnits.PmNonUnitRegex, RegexFlags); + private static readonly Regex NumberMultiplierRegex = + new Regex(NumbersWithUnitDefinitions.MultiplierRegex, RegexFlags); + protected SpanishNumberWithUnitExtractorConfiguration(CultureInfo ci) { this.CultureInfo = ci; @@ -59,7 +62,7 @@ protected SpanishNumberWithUnitExtractorConfiguration(CultureInfo ci) public virtual Regex AmbiguousUnitNumberMultiplierRegex => null; - public Regex MultiplierRegex => null; + public Regex MultiplierRegex => NumberMultiplierRegex; public Dictionary AmbiguityFiltersDict { get; } = null; diff --git a/Java/libraries/resource-generator/pom.xml b/Java/libraries/resource-generator/pom.xml index 326112f8e5..22dc80a32f 100644 --- a/Java/libraries/resource-generator/pom.xml +++ b/Java/libraries/resource-generator/pom.xml @@ -24,7 +24,7 @@ com.fasterxml.jackson.core jackson-databind - 2.10.0.pr1 + 2.12.6.1 org.yaml diff --git a/Java/samples/simple-console/pom.xml b/Java/samples/simple-console/pom.xml index c278be988d..990cbe566e 100644 --- a/Java/samples/simple-console/pom.xml +++ b/Java/samples/simple-console/pom.xml @@ -59,7 +59,7 @@ com.fasterxml.jackson.core jackson-databind - 2.10.0.pr1 + 2.12.6.1 com.microsoft.recognizers.text.datetime diff --git a/JavaScript/packages/recognizers-date-time/src/dateTime/baseDateTimePeriod.ts b/JavaScript/packages/recognizers-date-time/src/dateTime/baseDateTimePeriod.ts index 5c6cf141ad..163799cce9 100644 --- a/JavaScript/packages/recognizers-date-time/src/dateTime/baseDateTimePeriod.ts +++ b/JavaScript/packages/recognizers-date-time/src/dateTime/baseDateTimePeriod.ts @@ -571,19 +571,19 @@ export class BaseDateTimePeriodParser implements IDateTimeParser { if (pastEnd < pastBegin) { pastEnd = futureEnd; } - result.timex = `(${prs.begin.timexStr},${prs.end.timexStr},PT${DateUtils.totalHours(futureEnd, futureBegin)}H)`; + result.timex = `(${prs.begin.timexStr},${prs.end.timexStr},${DateTimeFormatUtil.luisTimeSpan(futureBegin, futureEnd)})`; } else if (beginHasDate) { futureEnd = DateUtils.safeCreateFromMinValue(futureBegin.getFullYear(), futureBegin.getMonth(), futureBegin.getDate(), futureEnd.getHours(), futureEnd.getMinutes(), futureEnd.getSeconds()); pastEnd = DateUtils.safeCreateFromMinValue(pastBegin.getFullYear(), pastBegin.getMonth(), pastBegin.getDate(), pastEnd.getHours(), pastEnd.getMinutes(), pastEnd.getSeconds()); let dateStr = prs.begin.timexStr.split('T').pop(); - result.timex = `(${prs.begin.timexStr},${dateStr}${prs.end.timexStr},PT${DateUtils.totalHours(futureEnd, futureBegin)}H)`; + result.timex = `(${prs.begin.timexStr},${dateStr}${prs.end.timexStr},${DateTimeFormatUtil.luisTimeSpan(futureBegin, futureEnd)})`; } else if (endHasDate) { futureBegin = DateUtils.safeCreateFromMinValue(futureEnd.getFullYear(), futureEnd.getMonth(), futureEnd.getDate(), futureBegin.getHours(), futureBegin.getMinutes(), futureBegin.getSeconds()); pastBegin = DateUtils.safeCreateFromMinValue(pastEnd.getFullYear(), pastEnd.getMonth(), pastEnd.getDate(), pastBegin.getHours(), pastBegin.getMinutes(), pastBegin.getSeconds()); let dateStr = prs.end.timexStr.split('T')[0]; - result.timex = `(${dateStr}${prs.begin.timexStr},${prs.end.timexStr},PT${DateUtils.totalHours(futureEnd, futureBegin)}H)`; + result.timex = `(${dateStr}${prs.begin.timexStr},${prs.end.timexStr},${DateTimeFormatUtil.luisTimeSpan(futureBegin, futureEnd)})`; } if (!StringUtility.isNullOrEmpty(begin.comment) && begin.comment.endsWith('ampm') && !StringUtility.isNullOrEmpty(end.comment) && end.comment.endsWith('ampm')) { result.comment = 'ampm'; diff --git a/JavaScript/packages/recognizers-date-time/src/dateTime/baseMerged.ts b/JavaScript/packages/recognizers-date-time/src/dateTime/baseMerged.ts index 6dc5439a68..68a6aac7e2 100644 --- a/JavaScript/packages/recognizers-date-time/src/dateTime/baseMerged.ts +++ b/JavaScript/packages/recognizers-date-time/src/dateTime/baseMerged.ts @@ -223,7 +223,7 @@ export class BaseMergedExtractor implements IDateTimeExtractor { }); } - private tryMergeModifierToken(er:ExtractResult, regex: RegExp, source: string, potentialAmbiguity:boolean = false): boolean { + protected tryMergeModifierToken(er:ExtractResult, regex: RegExp, source: string, potentialAmbiguity:boolean = false): boolean { let beforeStr = source.substr(0, er.start).toLowerCase(); // Avoid adding mod for ambiguity cases, such as "from" in "from ... to ..." should not add mod @@ -250,7 +250,7 @@ export class BaseMergedExtractor implements IDateTimeExtractor { return false; } - private assignModMetadata(metadata: MetaData): MetaData { + protected assignModMetadata(metadata: MetaData): MetaData { if (metadata === undefined || metadata === null) { metadata = new MetaData(); @@ -262,7 +262,7 @@ export class BaseMergedExtractor implements IDateTimeExtractor { return metadata } - private hasTokenIndex(source: string, regex: RegExp): { matched: boolean, index: number } { + protected hasTokenIndex(source: string, regex: RegExp): { matched: boolean, index: number } { // This part is different from C# because no Regex RightToLeft option in JS let result = { matched: false, index: -1 }; let matchResult = RegExpUtility.getMatches(regex, source); diff --git a/JavaScript/packages/recognizers-date-time/src/dateTime/chinese/dateTimeConfiguration.ts b/JavaScript/packages/recognizers-date-time/src/dateTime/chinese/dateTimeConfiguration.ts index b1f636a285..32efb5658b 100644 --- a/JavaScript/packages/recognizers-date-time/src/dateTime/chinese/dateTimeConfiguration.ts +++ b/JavaScript/packages/recognizers-date-time/src/dateTime/chinese/dateTimeConfiguration.ts @@ -246,11 +246,16 @@ class ChineseDateTimeParserConfiguration implements IDateTimeParserConfiguration export class ChineseDateTimeParser extends BaseDateTimeParser { private readonly durationExtractor: ChineseDurationExtractor; private readonly integerExtractor: BaseNumberExtractor + private readonly lunarRegex: RegExp; + private readonly lunarHolidayRegex: RegExp; + constructor(dmyDateFormat: boolean) { let config = new ChineseDateTimeParserConfiguration(dmyDateFormat); super(config); this.durationExtractor = new ChineseDurationExtractor(); this.integerExtractor = new ChineseIntegerExtractor(); + this.lunarRegex = RegExpUtility.getSafeRegExp(ChineseDateTime.LunarRegex); + this.lunarHolidayRegex = RegExpUtility.getSafeRegExp(ChineseDateTime.LunarHolidayRegex); } parse(er: ExtractResult, refTime?: Date): DateTimeParseResult { @@ -276,6 +281,7 @@ export class ChineseDateTimeParser extends BaseDateTimeParser { innerResult.futureResolution[TimeTypeConstants.DATETIME] = DateTimeFormatUtil.formatDateTime(innerResult.futureValue); innerResult.pastResolution = {}; innerResult.pastResolution[TimeTypeConstants.DATETIME] = DateTimeFormatUtil.formatDateTime(innerResult.pastValue); + innerResult.isLunar = this.isLunarCalendar(er.text); value = innerResult; } } @@ -289,6 +295,17 @@ export class ChineseDateTimeParser extends BaseDateTimeParser { return ret; } + // parse if lunar contains + private isLunarCalendar(text: string): boolean { + let trimmedText = text.trim(); + if (RegExpUtility.getMatches(this.lunarRegex, text).length || RegExpUtility.getMatches(this.lunarHolidayRegex, text).length) + { + return true; + } + + return false; + } + // merge a Date entity and a Time entity protected mergeDateAndTime(text: string, referenceTime: Date): DateTimeResolutionResult { let ret = new DateTimeResolutionResult(); diff --git a/JavaScript/packages/recognizers-date-time/src/dateTime/chinese/dateTimePeriodConfiguration.ts b/JavaScript/packages/recognizers-date-time/src/dateTime/chinese/dateTimePeriodConfiguration.ts index 4d8932b6ac..c9ddf8e74e 100644 --- a/JavaScript/packages/recognizers-date-time/src/dateTime/chinese/dateTimePeriodConfiguration.ts +++ b/JavaScript/packages/recognizers-date-time/src/dateTime/chinese/dateTimePeriodConfiguration.ts @@ -556,12 +556,17 @@ export class ChineseDateTimePeriodParser extends BaseDateTimePeriodParser { result.futureValue = [leftTime, rightTime]; result.pastValue = [leftTime, rightTime]; - let hasFuzzyTimex = prs.begin.timexStr.includes('X') || prs.end.timexStr.includes('X'); - let leftTimex = hasFuzzyTimex ? prs.begin.timexStr : DateTimeFormatUtil.luisDateTime(leftTime); - let rightTimex = hasFuzzyTimex ? prs.end.timexStr : DateTimeFormatUtil.luisDateTime(rightTime); - let hoursBetween = DateUtils.totalHours(rightTime, leftTime); + let leftTimex = prs.begin.timexStr; + let rightTimex = prs.end.timexStr; + if (beginHasDate) { + rightTimex = DateTimeFormatUtil.luisDateShortTime(rightTime, rightTimex); + } + else if (endHasDate) { + leftTimex = DateTimeFormatUtil.luisDateShortTime(leftTime, leftTimex); + } + let durationTimex = DateTimeFormatUtil.luisTimeSpan(leftTime, rightTime); - result.timex = `(${leftTimex},${rightTimex},PT${hoursBetween}H)`; + result.timex = `(${leftTimex},${rightTimex},${durationTimex})`; result.success = true; return result; diff --git a/JavaScript/packages/recognizers-date-time/src/dateTime/chinese/mergedConfiguration.ts b/JavaScript/packages/recognizers-date-time/src/dateTime/chinese/mergedConfiguration.ts index e0727f7870..37e47c884c 100644 --- a/JavaScript/packages/recognizers-date-time/src/dateTime/chinese/mergedConfiguration.ts +++ b/JavaScript/packages/recognizers-date-time/src/dateTime/chinese/mergedConfiguration.ts @@ -11,7 +11,7 @@ import { BaseTimePeriodExtractor, BaseTimePeriodParser } from "../baseTimePeriod import { BaseDateTimeExtractor, BaseDateTimeParser } from "../baseDateTime"; import { BaseDateTimePeriodExtractor, BaseDateTimePeriodParser } from "../baseDateTimePeriod"; import { BaseDurationExtractor, BaseDurationParser } from "../baseDuration"; -import { ExtractResult, RegExpUtility } from "@microsoft/recognizers-text"; +import { ExtractResult, RegExpUtility, MetaData } from "@microsoft/recognizers-text"; import { BaseNumberExtractor } from "@microsoft/recognizers-text-number"; import { ChineseDateTime } from "../../resources/chineseDateTime"; import { ChineseDurationExtractor, ChineseDurationParser } from "./durationConfiguration"; @@ -41,8 +41,8 @@ class ChineseMergedExtractorConfiguration implements IMergedExtractorConfigurati readonly setExtractor: BaseSetExtractor readonly integerExtractor: BaseNumberExtractor readonly afterRegex: RegExp - readonly sinceRegex: RegExp readonly beforeRegex: RegExp + readonly sinceRegex: RegExp readonly fromToRegex: RegExp readonly singleAmbiguousMonthRegex: RegExp readonly prepositionSuffixRegex: RegExp @@ -63,16 +63,22 @@ class ChineseMergedExtractorConfiguration implements IMergedExtractorConfigurati this.setExtractor = new ChineseSetExtractor(dmyDateFormat); this.holidayExtractor = new BaseHolidayExtractor(new ChineseHolidayExtractorConfiguration()); this.durationExtractor = new ChineseDurationExtractor(); + this.beforeRegex = RegExpUtility.getSafeRegExp(ChineseDateTime.ParserConfigurationBefore); + this.afterRegex = RegExpUtility.getSafeRegExp(ChineseDateTime.ParserConfigurationAfter); } } export class ChineseMergedExtractor extends BaseMergedExtractor { private readonly dayOfMonthRegex: RegExp; + private readonly sincePrefixRegex: RegExp + private readonly sinceSuffixRegex: RegExp constructor(options: DateTimeOptions, dmyDateFormat: boolean = false) { let config = new ChineseMergedExtractorConfiguration(dmyDateFormat); super(config, options); this.dayOfMonthRegex = RegExpUtility.getSafeRegExp(`^\\d{1,2}号`, 'gi'); + this.sincePrefixRegex = RegExpUtility.getSafeRegExp(ChineseDateTime.ParserConfigurationSincePrefix); + this.sinceSuffixRegex = RegExpUtility.getSafeRegExp(ChineseDateTime.ParserConfigurationSinceSuffix); } extract(source: string, refDate: Date): ExtractResult[] { @@ -94,6 +100,8 @@ export class ChineseMergedExtractor extends BaseMergedExtractor { result = this.checkBlackList(result, source); + this.addMod(result, source); + result = result.sort((a, b) => a.start - b.start); return result; } @@ -157,6 +165,77 @@ export class ChineseMergedExtractor extends BaseMergedExtractor { return true; }); } + + protected addMod(ers: ExtractResult[], source: string) { + let lastEnd = 0; + ers.forEach(er => { + let success = this.tryMergeModifierToken(er, this.config.beforeRegex, source); + if (!success) { + success = this.tryMergeModifierToken(er, this.config.afterRegex, source); + } + + if (!success) { + // SinceRegex in English contains the term "from" which is potentially ambiguous with ranges in the form "from X to Y" + success = this.tryMergeModifierToken(er, this.sincePrefixRegex, source, true, true); + success = this.tryMergeModifierToken(er, this.sinceSuffixRegex, source, false, true); + } + }); + } + + protected tryMergeModifierToken(er:ExtractResult, regex: RegExp, source: string, isPrefix: boolean = false, potentialAmbiguity:boolean = false): boolean { + let subStr = isPrefix ? source.substr(0, er.start) : source.substr(er.start + er.length); + + // Avoid adding mod for ambiguity cases, such as "from" in "from ... to ..." should not add mod + if (potentialAmbiguity && this.config.ambiguousRangeModifierPrefix && + RegExpUtility.isMatch(this.config.ambiguousRangeModifierPrefix, subStr)) { + let matches = RegExpUtility.getMatches(this.config.potentialAmbiguousRangeRegex, source); + if (matches.find(m => m.index < er.start + er.length && m.index + m.length > er.start)) { + return false + } + } + + let token = this.hasTokenIndex(subStr.trim(), regex, isPrefix); + if (token.matched) { + let modLength = isPrefix ? subStr.length - token.index : token.index; + er.length += modLength; + er.start -= isPrefix ? modLength : 0; + er.text = source.substr(er.start, er.length); + + er.metaData = this.assignModMetadata(er.metaData); + + return true; + } + + return false; + } + + protected assignModMetadata(metadata: MetaData): MetaData { + + if (metadata === undefined || metadata === null) { + metadata = new MetaData(); + metadata.HasMod = true; + } else { + metadata.HasMod = true; + } + + return metadata + } + + protected hasTokenIndex(source: string, regex: RegExp, isPrefix: boolean = false): { matched: boolean, index: number } { + // This part is different from C# because no Regex RightToLeft option in JS + let result = { matched: false, index: -1 }; + let matchResult = RegExpUtility.getMatches(regex, source); + let index = isPrefix ? matchResult.length - 1 : 0; + let match = matchResult.length > 0 ? matchResult[index]: false; + if (match) { + let leftStr = isPrefix ? source.substr(match.index + match.length).trim() : source.substr(0, match.index).trim() + if (!leftStr.length) { + result.matched = true; + result.index = match.index + (isPrefix ? 0 : match.length); + } + } + return result; + } } class ChineseMergedParserConfiguration implements IMergedParserConfiguration { @@ -174,9 +253,8 @@ class ChineseMergedParserConfiguration implements IMergedParserConfiguration { readonly setParser: BaseSetParser; constructor(dmyDateFormat: boolean) { - this.beforeRegex = RegExpUtility.getSafeRegExp(ChineseDateTime.MergedBeforeRegex); - this.afterRegex = RegExpUtility.getSafeRegExp(ChineseDateTime.MergedAfterRegex); - this.sinceRegex = RegExpUtility.getSafeRegExp(ChineseDateTime.MergedAfterRegex); + this.beforeRegex = RegExpUtility.getSafeRegExp(ChineseDateTime.ParserConfigurationBefore); + this.afterRegex = RegExpUtility.getSafeRegExp(ChineseDateTime.ParserConfigurationAfter); this.dateParser = new ChineseDateParser(dmyDateFormat); this.holidayParser = new ChineseHolidayParser(); @@ -265,9 +343,14 @@ export class ChineseMergedParser extends BaseMergedParser { } export class ChineseFullMergedParser extends BaseMergedParser { + private readonly sincePrefixRegex: RegExp; + private readonly sinceSuffixRegex: RegExp; + constructor(dmyDateFormat: boolean = false) { let config = new ChineseMergedParserConfiguration(dmyDateFormat); super(config, 0); + this.sincePrefixRegex = RegExpUtility.getSafeRegExp(ChineseDateTime.ParserConfigurationSincePrefix); + this.sinceSuffixRegex = RegExpUtility.getSafeRegExp(ChineseDateTime.ParserConfigurationSinceSuffix); } parse(er: ExtractResult, refTime?: Date): DateTimeParseResult | null { @@ -277,22 +360,43 @@ export class ChineseFullMergedParser extends BaseMergedParser { // push, save teh MOD string let hasBefore = false; let hasAfter = false; - let modStr = ""; - let beforeMatch = RegExpUtility.getMatches(this.config.beforeRegex, er.text).pop(); - let afterMatch = RegExpUtility.getMatches(this.config.afterRegex, er.text).pop(); - if (beforeMatch && !this.isDurationWithAgoAndLater(er)) { - hasBefore = true; - er.start += beforeMatch.length; - er.length -= beforeMatch.length; - er.text = er.text.substring(beforeMatch.length); - modStr = beforeMatch.value; - } - else if (afterMatch && !this.isDurationWithAgoAndLater(er)) { - hasAfter = true; - er.start += afterMatch.length; - er.length -= afterMatch.length; - er.text = er.text.substring(afterMatch.length); - modStr = afterMatch.value; + let hasSince = false; + let modStr = '', modStrPrefix = '', modStrSuffix = ''; + let erLength = er.length; + + if (er.metaData !== null && er.metaData !== undefined && er.metaData.HasMod) { + let beforeMatch = RegExpUtility.getMatches(this.config.beforeRegex, er.text).shift(); + let afterMatch = RegExpUtility.getMatches(this.config.afterRegex, er.text).shift(); + let sincePrefixMatch = RegExpUtility.getMatches(this.sincePrefixRegex, er.text).shift(); + let sinceSuffixMatch = RegExpUtility.getMatches(this.sinceSuffixRegex, er.text).shift(); + if (beforeMatch && beforeMatch.index + beforeMatch.length === erLength) { + hasBefore = true; + er.length -= beforeMatch.length; + er.text = er.text.substring(0, er.length); + modStr = beforeMatch.value; + } + else if (afterMatch && afterMatch.index + afterMatch.length === erLength) { + hasAfter = true; + er.length -= afterMatch.length; + er.text = er.text.substring(0, er.length); + modStr = afterMatch.value; + } + else { + if (sincePrefixMatch && sincePrefixMatch.index === 0) { + hasSince = true; + er.start += sincePrefixMatch.length; + er.length -= sincePrefixMatch.length; + er.text = er.text.substring(sincePrefixMatch.length); + modStrPrefix = sincePrefixMatch.value; + } + + if (sinceSuffixMatch && sinceSuffixMatch.index + sinceSuffixMatch.length === erLength) { + hasSince = true; + er.length -= sinceSuffixMatch.length; + er.text = er.text.substring(0, er.length); + modStrSuffix = sinceSuffixMatch.value; + } + } } if (er.type === Constants.SYS_DATETIME_DATE) { @@ -329,29 +433,37 @@ export class ChineseFullMergedParser extends BaseMergedParser { // pop, restore the MOD string if (hasBefore && pr.value !== null) { pr.length += modStr.length; - pr.start -= modStr.length; - pr.text = modStr + pr.text; + pr.text = pr.text + modStr; let val = pr.value; - val.mod = TimeTypeConstants.beforeMod; + val.mod = this.combineMod(val.mod, TimeTypeConstants.beforeMod); pr.value = val; } if (hasAfter && pr.value !== null) { pr.length += modStr.length; - pr.start -= modStr.length; - pr.text = modStr + pr.text; + pr.text = pr.text + modStr; let val = pr.value; - val.mod = TimeTypeConstants.afterMod; + val.mod = this.combineMod(val.mod, TimeTypeConstants.afterMod); pr.value = val; } - pr.value = this.dateTimeResolution(pr, hasBefore, hasAfter); - pr.type = `${this.parserTypeName}.${this.determineDateTimeType(er.type, hasBefore || hasAfter)}`; + if (hasSince && pr.value !== null) { + pr.length += modStrPrefix.length + modStrSuffix.length; + pr.start -= modStrPrefix.length; + pr.text = modStrPrefix + pr.text + modStrSuffix; + let val = pr.value; + val.mod = this.combineMod(val.mod, TimeTypeConstants.sinceMod); + pr.value = val; + } + + let hasRangeChangingMod = hasBefore || hasAfter || hasSince; + pr.value = this.dateTimeResolution(pr, hasRangeChangingMod); + pr.type = `${this.parserTypeName}.${this.determineDateTimeType(er.type, hasRangeChangingMod)}`; return pr; } - protected dateTimeResolution(slot: DateTimeParseResult, hasBefore: boolean, hasAfter: boolean, hasSince: boolean = false): { [s: string]: StringMap[]; } { + protected dateTimeResolution(slot: DateTimeParseResult, hasRangeChangingMod: boolean): { [s: string]: StringMap[]; } { if (!slot) { return null; } @@ -360,8 +472,9 @@ export class ChineseFullMergedParser extends BaseMergedParser { let resolutions = new Array(); let type = slot.type; - let outputType = this.determineDateTimeType(type, hasBefore || hasAfter); + let outputType = this.determineDateTimeType(type, hasRangeChangingMod); let timex = slot.timexStr; + let sourceEntity = this.determineSourceEntityType(type, outputType, hasRangeChangingMod); let value: DateTimeResolutionResult = slot.value; if (!value) { @@ -410,10 +523,6 @@ export class ChineseFullMergedParser extends BaseMergedParser { } } - if (isLunar) { - this.addResolutionFieldsAny(result, Constants.IsLunarKey, isLunar); - } - result.forEach((value, key) => { if (this.isObject(value)) { // is "StringMap" @@ -422,6 +531,10 @@ export class ChineseFullMergedParser extends BaseMergedParser { this.addResolutionFields(newValues, Constants.TimexKey, timex); this.addResolutionFields(newValues, Constants.ModKey, mod); this.addResolutionFields(newValues, Constants.TypeKey, outputType); + this.addResolutionFields(newValues, Constants.SourceEntity, sourceEntity); + if (isLunar) { + this.addResolutionFields(newValues, Constants.IsLunarKey, "True"); + } Object.keys(value).forEach((innerKey) => { newValues[innerKey] = value[innerKey]; diff --git a/JavaScript/packages/recognizers-date-time/src/dateTime/utilities.ts b/JavaScript/packages/recognizers-date-time/src/dateTime/utilities.ts index f8a9b96837..a171507a4a 100644 --- a/JavaScript/packages/recognizers-date-time/src/dateTime/utilities.ts +++ b/JavaScript/packages/recognizers-date-time/src/dateTime/utilities.ts @@ -260,6 +260,12 @@ export class DateTimeFormatUtil { return `${DateTimeFormatUtil.luisDateFromDate(time)}T${DateTimeFormatUtil.luisTimeFromDate(time)}`; } + public static luisDateShortTime(time: Date, timex: string = null): string { + let hasMin = timex != null ? timex.includes(Constants.TimeTimexConnector) : false; + let hasSec = timex != null ? timex.split(Constants.TimeTimexConnector).length > 2 : false; + return `${DateTimeFormatUtil.luisDateFromDate(time)}${DateTimeFormatUtil.formatShortTime(time, hasMin, hasSec)}`; + } + public static formatDate(date: Date): string { return [DateTimeFormatUtil.toString(date.getFullYear(), 4), DateTimeFormatUtil.toString(date.getMonth() + 1, 2), @@ -276,6 +282,13 @@ export class DateTimeFormatUtil { return `${DateTimeFormatUtil.formatDate(datetime)} ${DateTimeFormatUtil.formatTime(datetime)}`; } + public static formatShortTime(time: Date, hasMin: Boolean = false, hasSec: Boolean = false): string { + let hour = time.getHours(); + let min = hasMin || time.getMinutes() > 0 ? time.getMinutes() : -1; + let sec = hasSec || time.getSeconds() > 0 ? time.getSeconds() : -1; + return DateTimeFormatUtil.shortTime(hour, min, sec); + } + public static shortTime(hour: number, minute: number, second: number): string { if (minute < 0 && second < 0) { return `T${DateTimeFormatUtil.toString(hour, 2)}`; diff --git a/Patterns/Chinese/Chinese-DateTime.yaml b/Patterns/Chinese/Chinese-DateTime.yaml index 9a2e2ea2c1..864ac256f1 100644 --- a/Patterns/Chinese/Chinese-DateTime.yaml +++ b/Patterns/Chinese/Chinese-DateTime.yaml @@ -93,9 +93,9 @@ DateUnitRegex: !simpleRegex BeforeRegex: !simpleRegex def: 以前|之前|前 AfterRegex: !simpleRegex - def: 以后|以後|之后|之後|后|後 + def: 以后|以後|之后|之後|后|後|还剩 TimePeriodLeftRegex: !simpleRegex - def: ^[.] + def: 还剩 # (农历)?(2016年)?一月三日(星期三)? DateRegexList1: !nestedRegex def: ({LunarRegex}(\s*))?((({SimpleYearRegex}|{DateYearInCJKRegex})年)(\s*))?{MonthRegex}(\s*){DateDayRegexInCJK}((\s*|,|,){WeekDayRegex})? diff --git a/Patterns/Dutch/Dutch-DateTime.yaml b/Patterns/Dutch/Dutch-DateTime.yaml index c4beefd666..520d9f9000 100644 --- a/Patterns/Dutch/Dutch-DateTime.yaml +++ b/Patterns/Dutch/Dutch-DateTime.yaml @@ -17,13 +17,13 @@ ApostrofsRegex: !nestedRegex def: ({ApostrofRegex}\s*s) references: [ ApostrofRegex ] RelativeRegex: !simpleRegex - def: \b(?((dit|deze|volgende?|(aan)?komende?|aanstaande?|huidige?|vorige?|verleden|voorgaande?|laatste|afgelopen|(op\s+)?de|het)\b)|gister(en)?) + def: \b(?((dit|deze|(erop)?volgende?|(aan)?komende?|aanstaande?|huidige?|vorige?|verleden|voorgaande?|laatste|afgelopen|(op\s+)?de|het)\b)|gister(en)?) StrictRelativeRegex: !simpleRegex - def: \b(?((dit|deze|volgende?|(aan)?komende?|aanstaande?|huidige?|vorige?|verleden|voorgaande?|laatste|afgelopen)\b)|gister(en)?) + def: \b(?((dit|deze|(erop)?volgende?|(aan)?komende?|aanstaande?|huidige?|vorige?|verleden|voorgaande?|laatste|afgelopen)\b)|gister(en)?) UpcomingPrefixRegex: !simpleRegex def: ((deze\s+)?((aan)?komende?|aanstaande?)) NextPrefixRegex: !nestedRegex - def: \b(volgende?|eerstvolgende|{UpcomingPrefixRegex})\b + def: \b((erop)?volgende?|eerstvolgende|{UpcomingPrefixRegex})\b references: [ UpcomingPrefixRegex ] AfterNextSuffixRegex: !simpleRegex def: \b((na\s+(afloop\s+van\s+)?((de|het)\s+)?volgende?)|over)\b @@ -45,7 +45,7 @@ FutureSuffixRegex: !simpleRegex PastSuffixRegex: !simpleRegex def: ^\b$ DayRegex: !simpleRegex - def: (de\s*)?(?(?:3[0-1]|[1-2]\d|0?[1-9]))(?:ste|de|e)?(?=\b|t) + def: (de\s*)?(?(?:3[0-1]|[1-2]\d|0?[1-9]))(?:\s*(ste|de|e))?(?=\b|t) # 1-31 written WrittenDayRegex: !nestedRegex def: (?({WrittenOneToNineRegex})|({WrittenElevenToNineteenRegex})|(({WrittenOneToNineRegex}(en|ën))?twintig)|(((één|een)(en|ën))?dertig)) @@ -54,7 +54,7 @@ WrittenCardinalDayRegex: !nestedRegex def: (?<=((de\s+)|\b))(?(éérste|eerste|tweede|derde|vierde|vijfde|zesde|zevende|achtste|negende|tiende|{WrittenElevenToNineteenRegex}de|({WrittenOneToNineRegex}(en|ën))?twintigste|((één|een)(en|ën))?dertigste)) references: [ WrittenOneToNineRegex, WrittenElevenToNineteenRegex ] ImplicitDayRegex: !simpleRegex - def: (de\s*)?(?(3[0-1]|[0-2]?\d)(ste|e|de))\b + def: (de\s*)?(?(3[0-1]|[0-2]?\d)(\s*(ste|de|e)))\b MonthNumRegex: !simpleRegex def: \b(?01|02|03|04|05|06|07|08|09|10|11|12|1|2|3|4|5|6|7|8|9)\b WrittenOneToNineRegex: !simpleRegex @@ -100,7 +100,7 @@ PmRegex: !nestedRegex def: (?({ApostrofsRegex}|des)\s+(\bmiddags|avonds|nachts)|((in|tegen|op|om|met)\s+(de\s+)?)(((na)?middag|avond|(midder)?nacht|lunchtijd))|dag) references: [ ApostrofsRegex ] PmRegexFull: !nestedRegex - def: (?(({ApostrofsRegex}|des)\s+(\bmiddags|avonds|nachts)|((in|tegen|op|om|met)\s+(de\s+)?)?(((na)?middag|avond|(midder)?nacht|lunchtijd)))) + def: (?(({ApostrofsRegex}|des)\s+(\bmiddags|avonds|nachts)|((in|tegen|op|om|met)\s+(de\s+)?)?(((na)?middag|(?(({ApostrofsRegex}|des)\s+(ochtends|morgens)|((in|tegen|op)\s+de)(\s+(ochtend|morgen))|(?<=gisteren|morgen|vandaag|(maan|dins|woens|donder|vrij|zater|zon)dag)(ochtend|morgen)|^?ochtend)) @@ -129,7 +129,7 @@ MonthSuffixRegex: !nestedRegex def: (?((in|van|tijdens|sinds|tot|op)\s+)?({RelativeMonthRegex}|{WrittenMonthRegex})) references: [ RelativeMonthRegex, WrittenMonthRegex ] DateUnitRegex: !simpleRegex - def: (?(eeuw|maand)(?en)?|jaar|(?jaren|weken)|jr|decennia|mnd|week|(?(werk))?dag(?en)?|dgn)\b + def: (?(eeuw|maand|weekend)(?en)?|jaar|(?jaren|weken)|jr|decennia|mnd|week|(?we[er]k)?dag(?en)?|dgn)\b DateTokenPrefix: 'op ' TimeTokenPrefix: 'om ' TokenBeforeDate: 'op ' @@ -169,7 +169,7 @@ MonthWithYear: !nestedRegex def: \b(({WrittenMonthRegex}(\.)?(\s*)[/\\\-\.,]?(\s+(van|over|in))?(\s*){RelativeYearRegex})|({RelativeYearRegex}(\s*),?(\s*){WrittenMonthRegex}))\b references: [ WrittenMonthRegex, RelativeYearRegex ] OneWordPeriodRegex: !nestedRegex - def: \b((((de\s+)?maand\s+(van\s+)?)?({StrictRelativeRegex}\s+)?(?januari|februari|maart|april|mei|juni|juli|augustus|september|oktober|november|december|jan\.?|feb\.?|mar\.?|mrt\.?|apr\.?|jun\.?|jul\.?|aug\.?|sep\.?|sept\.?|oct\.?|okt\.?|nov\.?|dec\.?))|(maand|jaar)\s+tot(\s+op)?\s+heden|(({RelativeRegex}\s+)(mijn\s+)?(weekend|week|maand|jaar(?!\s+hoger dan))|({RelativeRegex}\s+)?(mijn\s+)(weekend|week|maand|jaar))(?!((\s+van)?\s+\d+|\s+tot(\s+op)?\s+heden|nu))(\s+{AfterNextSuffixRegex})?)\b + def: \b((((de\s+)?maand\s+(van\s+)?)?({StrictRelativeRegex}\s+)?(?januari|februari|maart|april|mei|juni|juli|augustus|september|oktober|november|december|jan\.?|feb\.?|mar\.?|mrt\.?|apr\.?|jun\.?|jul\.?|aug\.?|sep\.?|sept\.?|oct\.?|okt\.?|nov\.?|dec\.?))|(maand|jaar)\s+tot(\s+op)?\s+heden|(({RelativeRegex}\s+)(mijn\s+)?(weekend|(?werkweek)|week|maand|jaar(?!\s+hoger dan))|({RelativeRegex}\s+)?(mijn\s+)(weekend|(?werkweek)|week|maand|jaar))(?!((\s+van)?\s+\d+|\s+tot(\s+op)?\s+heden|nu))(\s+{AfterNextSuffixRegex})?)\b references: [ StrictRelativeRegex, RelativeRegex, AfterNextSuffixRegex ] MonthNumWithYear: !nestedRegex def: \b(({BaseDateTime.FourDigitYearRegex}(\s*)[/\-\.](\s*){MonthNumRegex})|({MonthNumRegex}(\s*)[/\-](\s*){BaseDateTime.FourDigitYearRegex}))\b @@ -216,11 +216,11 @@ AllHalfYearRegex: !nestedRegex def: ({HalfYearFrontRegex})|({HalfYearBackRegex})|({HalfYearRelativeRegex}) references: [ HalfYearFrontRegex, HalfYearBackRegex, HalfYearRelativeRegex ] EarlyPrefixRegex: !simpleRegex - def: \b(?((?eerder)|vroeg(er)?|begin(nend)?|start(end)?)(\s+(in|op|van)(\s+de)?)?)\b + def: \b(?((?eerder)|vroeg(er)?|((de|het)\s+)?(begin(nend)?|start(end)?))(\s+(in|op|van)(\s+de)?)?)\b MidPrefixRegex: !simpleRegex - def: \b(?(mid(den|-)?|halverwege|op\s+de\s+helft|half)(\s+(in|op|van)(\s+de)?)?) + def: \b(?(het\s+)?(mid(den|-)?|halverwege|op\s+de\s+helft|half)(\s+(in|op|van)(\s+de)?)?) LaterPrefixRegex: !simpleRegex - def: \b(?(laat|(?later)|aan\s+het\s+einde?(\s+van(\s+de)?)?|eind(igend)?|afsluitend)(\s+(in|op|van)(\s+de)?)?)\b + def: \b(?(laat|(?later)|(aan\s+)?het\s+einde?(\s+van(\s+de)?)?|eind(e|igend)?|afsluitend)(\s+(in|op|van)(\s+de)?)?)\b PrefixPeriodRegex: !nestedRegex def: ({EarlyPrefixRegex}|{MidPrefixRegex}|{LaterPrefixRegex}) references: [EarlyPrefixRegex, MidPrefixRegex, LaterPrefixRegex] @@ -235,7 +235,7 @@ SeasonRegex: !nestedRegex WhichWeekRegex: !simpleRegex def: \b(week)(\s*)(?5[0-3]|[1-4]\d|0?[1-9])\b WeekOfRegex: !simpleRegex - def: (de\s+)?(week)(\s+van)(\s+de|het)? + def: (de\s+)?(week)\s+(van(\s+(de|het))?|(beginnend|die\s+begint|startend|aanvangend)(\s+op)?) MonthOfRegex: !simpleRegex def: (maand)(\s*)(van) MonthRegex: !simpleRegex @@ -253,14 +253,14 @@ OnRegex: !nestedRegex references: [ DayRegex ] # Ordinals are incomplete RelaxedOnRegex: !simpleRegex - def: \b(?<=op\s+)(?:de\s+)?(?(?:3[0-1]|[1-2]\d|0?[1-9]))(?:(ste|de|e))?\b(?!(\.|:)\d+) + def: \b(?<=op\s+)(?:de\s+)?(?(?:3[0-1]|[1-2]\d|0?[1-9]))(?:\s*(ste|de|e))?\b(?!(\.|:)\d+) PrefixWeekDayRegex: !simpleRegex def: (\s*((,?\s*op)|[-—–])) ThisRegex: !nestedRegex def: \b((deze(\s+week{PrefixWeekDayRegex}?)?\s*){WeekDayRegex})|({WeekDayRegex}((\s+van)?\s*deze\s+week))\b references: [ WeekDayRegex, PrefixWeekDayRegex ] LastDateRegex: !nestedRegex - def: \b({PreviousPrefixRegex}(\s*week{PrefixWeekDayRegex}?)?\s+{WeekDayRegex})|({WeekDayRegex}(\s+vorige\s+week))\b + def: \b({PreviousPrefixRegex}(\s*week{PrefixWeekDayRegex}?)?\s+{WeekDayRegex})|({WeekDayRegex}(\s+van)?(\s+vorige\s+week))\b references: [ PreviousPrefixRegex, WeekDayRegex, PrefixWeekDayRegex ] WeekDayForNextDateRegex: !simpleRegex def: \b(?((ma|di(ns)?|wo(e(ns)?)?|do|vr(ij)?|za(t)?|zo)(\.|\b))|((?:maan(?!den)|dins|woens|donder|vrij|zater|zon)(dag)?)) @@ -274,8 +274,8 @@ NextDateRegex: !nestedRegex def: ({NextDateRegex1}|{NextDateRegex2}) references: [ NextDateRegex1, NextDateRegex2 ] SpecialDayRegex: !nestedRegex - def: \b(eergisteren|overmorgen|(de\s+)?dag\s+na\s+morgen|(de\s+)?dag\s+(ervoor|erna)|((de\s+)?({RelativeRegex}|mijn)\s+dag)\b|gisteren|(deze\s+)?morgen|vandaag|morgen(middag))(?!s\b) - references: [ RelativeRegex ] + def: \b(eergisteren|overmorgen|(de\s+)?dag\s+na\s+morgen|(de\s+)?dag\s+(ervoor|erna)|((de\s+)?({StrictRelativeRegex}|mijn)\s+dag)\b|(de\s+dag(?!\s+van))|gisteren|(deze\s+)?morgen|vandaag|morgen(middag))(?!s\b) + references: [ StrictRelativeRegex ] SpecialDayWithNumRegex: !nestedRegex def: \b((?{WrittenNumRegex})\s+dag(en)?\s+(gerekend\s+)?(vanaf\s+)(?gisteren|morgen|vandaag))\b references: [ WrittenNumRegex ] @@ -296,16 +296,16 @@ SpecialDate: !nestedRegex DatePreposition: !simpleRegex def: \b(op(\s+de)?) DateExtractorYearTermRegex: !nestedRegex - def: (\s+|\s*[,./-]\s*){DateYearRegex} + def: (\s+(van\s+)?|\s*[,./-]\s*){DateYearRegex} references: [ DateYearRegex ] # Maandag, Mei 2 DateExtractor1: !nestedRegex - def: \b({WeekDayRegex}\s*[,-]?\s*)?(({MonthRegex}(\.)?\s*[/\\.,-]?\s*{DayRegex}(?!\s*{MonthRegex}))|(\({MonthRegex}\s*[-.]\s*{DayRegex}\))|({DayRegex}(\.)?\s*[/\\.,-]?\s*{MonthRegex}))(\s*\(\s*{WeekDayRegex}\s*\))?({DateExtractorYearTermRegex}(?!\s*{MonthRegex})\b)? + def: \b({WeekDayRegex}\s*[,-]?\s*)?(({MonthRegex}(\.)?\s*[/\\.,-]?\s*{DayRegex}(?!\s*({MonthRegex}|\-\s*\d{2}\b)))|(\({MonthRegex}\s*[-.]\s*{DayRegex}\))|({DayRegex}(\.)?\s*[/\\.,-]?\s*{MonthRegex}))(\s*\(\s*{WeekDayRegex}\s*\))?({DateExtractorYearTermRegex}(?!\s*{MonthRegex})\b)? references: [ WeekDayRegex, MonthRegex, DayRegex, DateExtractorYearTermRegex ] # Maandag 2 Mei # TODO: add ... van 2019? DateExtractor3: !nestedRegex - def: \b({WeekDayRegex}(\s+|\s*,\s*)?(de\s+)?)?(({DayRegex}(\s*dag|\.)?)((\s+|\s*[,/-]\s*|\s+van\s+)?{MonthRegex})((\.)?(\s+|\s*[,/-]\s*|\s+in\s+)?{DateYearRegex})?|{BaseDateTime.FourDigitYearRegex}\s*[,./-]?\s*(de\s*)?(?(?:3[0-1]|[1-2]\d|0?[1-9]))(?:ste|de|e)?(\s*dag|\.)?\s*[,./-]?\s*{MonthRegex})\b + def: \b({WeekDayRegex}(\s+|\s*,\s*)?(de\s+)?)?(({DayRegex}(\s*dag|\.)?)((\s+|\s*[,/-]\s*|\s+van\s+)?{MonthRegex})((\.)?(\s+|\s*[,/-]\s*|\s+in\s+)?{DateYearRegex})?|{BaseDateTime.FourDigitYearRegex}\s*[,./-]?\s*(de\s*)?(?(?:3[0-1]|[1-2]\d|0?[1-9]))(?:\s*(ste|de|e))?(\s*dag|\.)?\s*[,./-]?\s*{MonthRegex})\b references: [ WeekDayRegex, DayRegex, MonthRegex, DateYearRegex, BaseDateTime.FourDigitYearRegex ] # 05/02/2019 # The final lookahead in DateExtractor4|5|A avoids extracting as date "10/1-11" from an input like "10/1-11/2/2017" @@ -338,7 +338,7 @@ DateExtractor9S: !nestedRegex def: \b(?(?:3[0-1]|[1-2]\d|0?[1-9]))(?:ste|de|e)?|{DayRegex}\s*[/\\\-\.]\s*{BaseDateTime.FourDigitYearRegex}\s*[/\\\-\.]\s*{MonthRegex}) + def: \b({WeekDayRegex}\s+)?({BaseDateTime.FourDigitYearRegex}\s*[/\\\-\.]\s*({MonthNumRegex}|{MonthRegex})\s*[/\\\-\.]\s*{DayRegex}(?!\s*[/\\\-\.]\s*\d+)|{MonthRegex}\s*[/\\\-\.]\s*{BaseDateTime.FourDigitYearRegex}\s*[/\\\-\.]\s*(de\s*)?(?(?:3[0-1]|[1-2]\d|0?[1-9]))(?:\s*(ste|de|e))?|{DayRegex}\s*[/\\\-\.]\s*{BaseDateTime.FourDigitYearRegex}\s*[/\\\-\.]\s*{MonthRegex}) references: [ BaseDateTime.FourDigitYearRegex, MonthNumRegex, MonthRegex, DayRegex, WeekDayRegex ] OfMonth: !nestedRegex def: (^\s*((van|in)\s+)?)({MonthRegex}) @@ -532,9 +532,11 @@ TimeOfTodayAfterRegex: !nestedRegex TimeOfTodayBeforeRegex: !nestedRegex def: '{DateTimeSpecificTimeOfDayRegex}(\s*,)?(\s+(om|rond|tegen|op\s+de|op))?\s*$' references: [ DateTimeSpecificTimeOfDayRegex ] +NonTimeContextTokens: !simpleRegex + def: \b(gebouw) SimpleTimeOfTodayAfterRegex: !nestedRegex - def: \b({HourNumRegex}|{BaseDateTime.HourRegex})(\s*({OclockRegex}|u))?\s*(,\s*)?((in|op)\s+de\s+)?{DateTimeSpecificTimeOfDayRegex} - references: [ HourNumRegex, BaseDateTime.HourRegex, DateTimeSpecificTimeOfDayRegex, OclockRegex ] + def: (?{DateUnitRegex}|(min\.|sec\.)|((?halfuur)|(?kwartier\s+uur)|(?kwartier)|uur|uren|u|minuten|minuut|mins|min|m|secondes|seconden|secs|sec|s|nacht(en)?)\b)(\s+lang\b)? + def: (?{DateUnitRegex}|(min\.|sec\.)|((?halfuur)|(?kwartier\s+uur)|(?kwartier)|uur|uren|u|minuten|minuut|m(ins?)?|seconde[ns]?|s(ecs?)?|nacht(en)?)\b)(\s+lang\b)? references: [ DateUnitRegex ] SuffixAndRegex: !simpleRegex def: (?\s*(en|ën)(\s*een)?\s*(?hal(f|ve)|kwart|kwartier)|(?(een\s+)?kwartier)) PeriodicRegex: !simpleRegex - def: \b(?dagelijkse?|(drie)?maandelijkse?|wekelijkse?|twee-?wekelijkse?|jaarlijkse?|kwartaal)\b + def: \b(?dagelijkse?|(drie)?maandelijkse?|wekelijkse?|twee-?wekelijkse?|(half)?jaarlijkse?|kwartaal)\b EachUnitRegex: !nestedRegex - def: (?((iedere|elke|eenmaal per)(?\s+andere)?\s*{DurationUnitRegex})|(({DurationUnitRegex}|{WeekDayRegex})\s+om(\s+de)?(?\s+andere)?\s*(week|{DurationUnitRegex}))) + def: (?((iedere?|elke?|eenmaal per)(?\s+andere)?\s*({DurationUnitRegex}|(?weekend(en)?))|({DurationUnitRegex}|{WeekDayRegex})\s+om(\s+de)?(?\s+andere)?\s*(week|{DurationUnitRegex}))) references: [ DurationUnitRegex, WeekDayRegex ] EachPrefixRegex: !simpleRegex def: \b(?(iedere|elke|eenmaal per)\s*$) -SetEachRegex: !simpleRegex - def: \b(?(iedere|elke|om de)\s*(?\s+andere)?\s*(week)?) +SetEachRegex: !nestedRegex + def: \b(?(iedere|elke|om\s+de)\s*(?\s+andere)?\s*(week\s*(?={WeekDayRegex}))?) + references: [ WeekDayRegex ] SetLastRegex: !simpleRegex def: (?volgende?|komende|aankomende|aanstaande|deze|huidige|aanstaande|vorige?|verleden|laatste) # This regex is used to extract Set patterns like "3pm every day" where "every day" follows the time. @@ -600,10 +603,10 @@ HalfRegex: !simpleRegex ConjunctionRegex: !simpleRegex def: \b((en(\s+voor)?)|plus)\b HolidayList1: !nestedRegex - def: (?goede vrijdag|pasen|((eerste|tweede)\s+)?paasdag|paas(zondag|maandag)|kerst|kerstavond|kerstmis|thanksgiving|halloween|(islamitisch\s+)?nieuwjaar|oud en nieuw|oud & nieuw|pinksteren|oude?jaar|oude?jaarsavond|silvester|silvesteravond|sinterklaas|sinterklaasfeest|sinterklaasavond|pakjesavond|eid al(-|\s+)fitr|eid al(-|\s+)adha) + def: (?goede vrijdag|pasen|((eerste|tweede)\s+)?paasdag|paas(zondag|maandag)|kerst(avond|mis)?|thanksgiving|halloween|(islamitisch\s+)?nieuwjaar|oud en nieuw|oud & nieuw|pinksteren|oude?jaar|oude?jaarsavond|silvester|silvesteravond|sinterklaas|sinterklaasfeest|sinterklaasavond|pakjesavond|eid al(-|\s+)fitr|eid al(-|\s+)adha|juneteenth|vrijheidsdag|jubilee\s+day) references: [ YearRegex, RelativeRegex ] HolidayList2: !nestedRegex - def: (?black friday|cyber monday|nationale dodenherdenking|nationale herdenking|dodenherdenking|dag van de leraar|dag van de leerkracht(en)?|dag van de arbeid|feest van de arbeid|yuandan|valentijn|sint-maartensfeest|sint-maarten|driekoningen|keti(\s+|-)?koti|ramadan|suikerfeest|offerfeest|allerheiligen|allerheiligenavond|franse nationale feestdag|bestorming van de bastille) + def: (?black friday|cyber monday|nationale dodenherdenking|nationale herdenking|dodenherdenking|dag\s+van\s+de\s+(leraar|leerkracht(en)?|arbeid|aarde)|feest\s+van\s+de\s+arbeid|yuandan|valentijn|sint-maartensfeest|sint-maarten|driekoningen|keti(\s+|-)?koti|ramadan|suikerfeest|offerfeest|allerheiligen|allerheiligenavond|franse nationale feestdag|bestorming van de bastille) references: [ YearRegex, RelativeRegex ] HolidayList3: !nestedRegex # -dag suffix def: (?(martin luther king|mlk|dankzeggings|valentijns|nieuwjaars|(eerste|1e|tweede|2e)\s+paas|prinsjes|konings|koninginne|bevrijdings|hemelvaarts|(eerste|1e|tweede|2e)\s+kerst|vader|moeder|meisjes|(amerikaanse|us\s+)?onafhankelijk(heid)?s|(nederlandse\s+)?veteranen|boomplant|(nationale\s+)?boomfeest)dag) @@ -634,7 +637,7 @@ AfterRegex: !nestedRegex def: (\b{InclusiveModPrepositions}?((na(\s+afloop\s+van)?|(?>=)|>) references: [ InclusiveModPrepositions ] BeforeRegex: !nestedRegex - def: (\b(?(al\s+)?zo\s+laat\s+als)){InclusiveModPrepositions}?\b\s*)|(?)((?<=)|<) + def: (\b(?(al\s+)?zo\s+laat\s+als)){InclusiveModPrepositions}?\b\s*)|(?)((?<=)|<) references: [ InclusiveModPrepositions ] SinceRegex: !simpleRegex def: (\b(sinds|na\s+of\s+gelijk\s+aan|(startend|beginnend)\s+(vanaf|op|met)|(al\s+)?zo\s+vroeg\s+als|(elk|ieder)\s+moment\s+vanaf|een\s+tijdstip\s+vanaf)\b\s*)|(?=) @@ -645,7 +648,7 @@ AgoRegex: !simpleRegex LaterRegex: !simpleRegex def: \b(later|vanaf\s+nu|(vanaf|na|sedert)\s+(?morgen|vandaag))\b BeforeAfterRegex: !simpleRegex - def: ^[.] + def: \b(gerekend\s+)?((?voor(dat)?)|(?van(af)?|na))\b ModPrefixRegex: !nestedRegex def: \b({RelativeRegex}|{AroundRegex}|{BeforeRegex}|{AfterRegex}|{SinceRegex})\b references: [RelativeRegex, AroundRegex, BeforeRegex, AfterRegex, SinceRegex ] @@ -703,7 +706,7 @@ UnspecificDatePeriodRegex: !simpleRegex PrepositionSuffixRegex: !simpleRegex def: \b((op|in)(\s+de)?|om|rond(om)?|van|tot)$ FlexibleDayRegex: !simpleRegex - def: (?([A-Za-zë]+\s)?[A-Za-zë\d]+?(ste|de|e)) + def: (?([A-Za-zë]+\s+)?[A-Za-zë\d]+?\s*(ste|de|e)) ForTheRegex: !nestedRegex def: \b((((?<=voor\s+)de\s+{FlexibleDayRegex})|((?<=op\s+)de\s+{FlexibleDayRegex}(?<=(ste|de|e))))(?(\s+(tussen|binnen|terug|tegen|aan|uit|mee|bij|vol|uit|aan|op|in|na|af)\s*)?(\s+(ge\w\w\w+|\w\w\w+en)\s*)?(,|\.|!|\?|$))) references: [ FlexibleDayRegex ] @@ -718,11 +721,11 @@ RestOfDateRegex: !simpleRegex RestOfDateTimeRegex: !simpleRegex def: \brest\s+(van\s+)?((de|het|mijn|dit|deze|(de\s+)?huidige)\s+)?(?vandaag|dag)\b MealTimeRegex: !simpleRegex - def: \b((tijdens\s+de\s+)?(?lunch)|((om|tegen)\s+)?(?lunchtijd))\b + def: \b((((tijdens\s+)?de|het)\s+)?(?ontbijt|lunch|avondeten)|((om|tegen|tijdens)\s+)?(?lunchtijd))\b AmbiguousRangeModifierPrefix: !simpleRegex def: (voor) PotentialAmbiguousRangeRegex: !nestedRegex - def: \b{AmbiguousRangeModifierPrefix}(.+\b(boven|later|groter|erna|daarna|hoger|(?{BaseDateTime.RangeConnectorSymbolRegex}))\b) + def: \b{AmbiguousRangeModifierPrefix}(?!\s+het\s+(einde?|begin(nen)?))(.+\b(boven|later|groter|erna|daarna|hoger|(?{BaseDateTime.RangeConnectorSymbolRegex}))\b) references: [ AmbiguousRangeModifierPrefix, BaseDateTime.RangeConnectorSymbolRegex, DateUnitRegex ] NumberEndingPattern: !nestedRegex def: ^(\s+((?vergadering|afspraak|conferentie|telefoontje|skype-gesprek)\s+)?(om|naar)\s+(?{PeriodHourNumRegex}|{HourRegex})((\.)?$|(\.,|,|!|\?))) @@ -730,8 +733,8 @@ NumberEndingPattern: !nestedRegex OneOnOneRegex: !simpleRegex def: \b(1\s*:\s*1)|(één\s+(op\s)één|één\s*-\s*één|één\s*:\s*één)\b LaterEarlyPeriodRegex: !nestedRegex - def: \b({PrefixPeriodRegex})\s*\b\s*(?{OneWordPeriodRegex})\b - references: [PrefixPeriodRegex, OneWordPeriodRegex] + def: \b({PrefixPeriodRegex})\s*\b\s*(?{OneWordPeriodRegex}|(?{BaseDateTime.FourDigitYearRegex}))\b + references: [PrefixPeriodRegex, OneWordPeriodRegex, BaseDateTime.FourDigitYearRegex] WeekWithWeekDayRangeRegex: !nestedRegex def: \b((?({NextPrefixRegex}|{PreviousPrefixRegex}|deze)\s+week)((\s+tussen\s+{WeekDayRegex}\s+en\s+{WeekDayRegex})|(\s+van\s+{WeekDayRegex}\s+tot\s+{WeekDayRegex})))\b references: [NextPrefixRegex, PreviousPrefixRegex, WeekDayRegex] @@ -771,9 +774,15 @@ DateAfterRegex: !simpleRegex YearPeriodRegex: !nestedRegex def: ((((van(af)?|tijdens|gedurende|in)\s+)?{YearRegex}\s*({TillRegex})\s*{YearRegex})|(((tussen)\s+){YearRegex}\s*({RangeConnectorRegex})\s*{YearRegex})) references: [ YearRegex, TillRegex, RangeConnectorRegex ] +StartMiddleEndRegex: !simpleRegex + def: \b((?(((de|het)\s+)?(start|begin)\s+van\s+)?)(?((het\s+)?midden\s+van\s+)?)(?((het\s+)?einde?\s+van\s+)?)) ComplexDatePeriodRegex: !nestedRegex - def: (((van(af)?|tijdens|gedurende|in(\s+de)?)\s+)?(?.+)\s*({TillRegex})\s*(?.+)|((tussen)\s+)(?.+)\s*({RangeConnectorRegex})\s*(?.+)) - references: [ YearRegex, TillRegex, RangeConnectorRegex ] + def: (((van(af)?|tijdens|gedurende|in(\s+de)?)\s+)?{StartMiddleEndRegex}(?.+)\s*({TillRegex})\s*{StartMiddleEndRegex}(?.+)|((tussen)\s+){StartMiddleEndRegex}(?.+)\s*({RangeConnectorRegex})\s*{StartMiddleEndRegex}(?.+)|(?{WrittenMonthRegex})\s+(?{WrittenMonthRegex}(\s+|\s*,\s*){YearRegex})) + references: [ YearRegex, TillRegex, RangeConnectorRegex, WrittenMonthRegex, StartMiddleEndRegex ] +#To handle cases like "jan feb 2017" +ComplexTillRegex: !nestedRegex + def: ({TillRegex}|{WrittenMonthRegex}) + references: [TillRegex, WrittenMonthRegex] UnitMap: !dictionary types: [ string, string ] entries: @@ -790,8 +799,14 @@ UnitMap: !dictionary mnd: MON weken: W week: W + weekend: WE + weekenden: WE dagen: D dag: D + werkdagen: D + werkdag: D + weekdagen: D + weekdag: D vandaag: D dgn: D nachten: D @@ -825,6 +840,8 @@ UnitValueMap: !dictionary mnd: 2592000 weken: 604800 week: 604800 + weekenden: 172800 + weekend: 172800 dagen: 86400 dag: 86400 vandaag: 86400 @@ -833,6 +850,8 @@ UnitValueMap: !dictionary nacht: 86400 werkdagen: 86400 werkdag: 86400 + weekdagen: 86400 + weekdag: 86400 uren: 3600 uur: 3600 u: 3600 @@ -1237,6 +1256,8 @@ HolidayNames: !dictionary usindependenceday: [ amerikaanseonafhankelijkheidsdag, usonafhankelijkheidsdag ] blackfriday: [ blackfriday ] cybermonday: [ cybermonday ] + earthday: [ dagvandeaarde ] + juneteenth: [jubileeday, juneteenth, vrijheidsdag] WrittenDecades: !dictionary types: [ string, int ] entries: @@ -1311,7 +1332,7 @@ AmbiguityFiltersDict: !dictionary types: [ string, string ] entries: '^\d{4}$': '(\d\.\d{4}|\d{4}\.\d)' - '\b(lunch)$': '(?(?:3[0-1]|[1-2]\d|0?[1-9])(?:th|nd|rd|st)?)[\.]?(\s+|\s*[-,/]\s*|\s+of\s+){MonthRegex}[\.]?)\b @@ -459,7 +459,7 @@ TimeOfDayRegex: !nestedRegex def: \b(?((((in\s+the\s+){LaterEarlyRegex}?(morning|afternoon|night(-?time)?|evening)s)|((in\s+the\s+)?{LaterEarlyRegex}?(in(\s+the)?\s+)?(morning|afternoon|night(-?time)?|evening)))|{MealTimeRegex}|(((in\s+(the)?\s+)?)(daytime|business\s+hours?))))\b references: [ LaterEarlyRegex, MealTimeRegex ] SpecificTimeOfDayRegex: !nestedRegex - def: \b(({StrictRelativeRegex}\s+{TimeOfDayRegex})\b|\btoni(ght|te))s?\b + def: \b(({StrictRelativeRegex}\s+{TimeOfDayRegex})\b|\b(?toni(ght|te)))s?\b references: [ TimeOfDayRegex, StrictRelativeRegex ] TimeFollowedUnit: !nestedRegex def: ^\s*{TimeUnitRegex} @@ -502,10 +502,10 @@ UnspecificEndOfRegex: !simpleRegex UnspecificEndOfRangeRegex: !simpleRegex def: \b(eoy)\b PeriodTimeOfDayRegex: !nestedRegex - def: \b((in\s+(the)?\s+)?{LaterEarlyRegex}?(this\s+)?{DateTimeTimeOfDayRegex})\b + def: \b((in\s+(the)?\s+)?{LaterEarlyRegex}?((this\s+)?{DateTimeTimeOfDayRegex}|(?(?tonight))))\b references: [ DateTimeTimeOfDayRegex, LaterEarlyRegex ] PeriodSpecificTimeOfDayRegex: !nestedRegex - def: \b({LaterEarlyRegex}?this\s+{DateTimeTimeOfDayRegex}|({StrictRelativeRegex}\s+{PeriodTimeOfDayRegex})\b|\btoni(ght|te))\b + def: \b({LaterEarlyRegex}?this\s+{DateTimeTimeOfDayRegex}|({StrictRelativeRegex}\s+{PeriodTimeOfDayRegex})\b|\b(?toni(ght|te)))\b references: [ PeriodTimeOfDayRegex, StrictRelativeRegex, DateTimeTimeOfDayRegex, LaterEarlyRegex ] PeriodTimeOfDayWithDateRegex: !nestedRegex def: \b(({PeriodTimeOfDayRegex}(\s+(on|of))?))\b diff --git a/Patterns/German/German-DateTime.yaml b/Patterns/German/German-DateTime.yaml index c65aa51a14..358f4ec5e4 100644 --- a/Patterns/German/German-DateTime.yaml +++ b/Patterns/German/German-DateTime.yaml @@ -194,7 +194,7 @@ DateExtractor1: !nestedRegex def: \b(({WeekDayRegex})(\s+|\s*,\s*))?({DayRegex}\s*[/\\.,\- ]\s*{MonthRegex}(\s*[/\\.,\- ]\s*{DateYearRegex})?|{BaseDateTime.FourDigitYearRegex}\s*[/\\.,\- ]\s*{DayRegex}\s*[/\\.,\- ]\s*{MonthRegex})\b references: [ WeekDayRegex, MonthRegex, DayRegex, DateYearRegex, BaseDateTime.FourDigitYearRegex ] DateExtractor2: !nestedRegex - def: \b({MonthRegex}\s*[/\\.,\- ]\s*{DayRegex}(\s*[/\\.,\- ]\s*{DateYearRegex})?)\b + def: \b({MonthRegex}\s*[/\\.,\- ]\s*{DayRegex}(?!\s*\-\s*\d{2}\b)(\s*[/\\.,\- ]\s*{DateYearRegex})?)\b references: [ WeekDayRegex, MonthRegex, DayRegex, DateYearRegex ] DateExtractor3: !nestedRegex def: \b({DayRegex}{MonthRegex}) diff --git a/Patterns/Italian/Italian-DateTime.yaml b/Patterns/Italian/Italian-DateTime.yaml index 84fcd53572..75aa070d83 100644 --- a/Patterns/Italian/Italian-DateTime.yaml +++ b/Patterns/Italian/Italian-DateTime.yaml @@ -232,7 +232,7 @@ SpecialDate: !nestedRegex def: (?<=\b(il|l'|al(l')?)\s*){DayRegex}\b references: [ DayRegex ] DateExtractor1: !nestedRegex - def: \b((quest[oa]\s+)?{WeekDayRegex}\s*[,-]?\s*)?(({MonthRegex}(\.)?\s*[/\\.,-]?\s*{DayRegex})|(\({MonthRegex}\s*[-.]\s*{DayRegex}\))|({DayRegex}\s*[/\\.,-]?\s*{MonthRegex}(\.)?))(\s*\(\s*{WeekDayRegex}\s*\))? + def: \b((quest[oa]\s+)?{WeekDayRegex}\s*[,-]?\s*)?(({MonthRegex}(\.)?\s*[/\\.,-]?\s*{DayRegex}(?!\s*\-\s*\d{2}\b))|(\({MonthRegex}\s*[-.]\s*{DayRegex}\))|({DayRegex}\s*[/\\.,-]?\s*{MonthRegex}(\.)?))(\s*\(\s*{WeekDayRegex}\s*\))? references: [ WeekDayRegex, MonthRegex, DayRegex ] DateExtractor2: !nestedRegex def: ({DateExtractor1}(\s+|\s*[\-/,.]\s*|\s+del\s+)({DateYearRegex}))\b diff --git a/Patterns/Japanese/Japanese-DateTime.yaml b/Patterns/Japanese/Japanese-DateTime.yaml index 39d135cd05..9d77d1d8c4 100644 --- a/Patterns/Japanese/Japanese-DateTime.yaml +++ b/Patterns/Japanese/Japanese-DateTime.yaml @@ -9,11 +9,14 @@ MonthRegexForPeriod: !simpleRegex MonthNumRegexForPeriod: !simpleRegex def: (?0?[1-9]|1[0-2])(?=\b|t|まで|から)? DayRegex: !simpleRegex - def: (?[0-2]?[1-9]|[1-3]0|31)([日目]間?)? + def: (?[0-2]?[1-9]|[1-3]0|31)((日|目)(?!かかる|待つ|泊まる|経つ)間?)? DayRegexForPeriod: !simpleRegex - def: (?3[01]|[0-2]?\d|(三十一?|(一|二)?十?[一二三四五六七八九]))(\s*日目?)?(?=\b|t|まで|から)? -DateDayRegexInCJK: !simpleRegex - def: (?初一|((二十二|二十三|二十四|二十五|二十六|二十七|二十八|二十九|二十一|三十一|十二|十三|十四|十五|十六|十七|十八|十一|十|二十|廿(?!日市市)|三十|一|二|三|四|五|六|七|八|九)|3[01]|[0-2]?\d)(\s*日|号))目? + def: (?3[01]|[0-2]?\d|(三十一?|(一|二)?十?[一二三四五六七八九]))((\s*日(?!かかる|待つ|泊まる|経つ))目?)?(?=\b|t|まで|から)? +DayNumberRegex: !simpleRegex + def: (二十二|二十三|二十四|二十五|二十六|二十七|二十八|二十九|二十一|三十一|十二|十三|十四|十五|十六|十七|十八|十一|十|二十|廿(?!日市市)|三十|一|二|三|四|五|六|七|八|九) +DateDayRegexInCJK: !nestedRegex + def: (?初一|({DayNumberRegex}|3[01]|[0-2]?\d)(\s*日|号)(?!かかる|待つ|泊まる|経つ))目? + references: [DayNumberRegex] DayRegexNumInCJK: !simpleRegex def: (?一|十一|二十一|三十一|三十|二|三|四|五|六|七|八|九|十二|十三|十四|十五|十六|十七|十八|十九|二十二|二十三|二十四|二十五|二十六|二十七|二十八|二十九|一|十一|十|二十一|二十|廿(?!日市市)|三十一|二|三|四|五|六|七|八|九|十二|十三|十四|十五|十六|十七|十八|十九|二十二|二十三|二十四|二十五|二十六|二十七|二十八|二十九|十|二十|廿|卅) MonthNumRegex: !simpleRegex @@ -69,7 +72,7 @@ SpecialYearRegex: !simpleRegex SpecialDayRegex: !simpleRegex def: ((いっ)?さくじつ|おとつい|最近|前天|后天|明日から二日((?今日)から(?1日半)(の間)?)|((?今日)から(?2日半)(の間)?)|昨日の2日前|昨日から4日|今日から二日|今日から4日|昨日から2日間|昨天|明天|今天|今日|明日|一?昨?昨日|一昨日|大后天|大前天|後天|大後天|きょう|あす|あした|きのう|明々後日|(弥)?明後日|この日|前日|二日前|おととい|し?あさって|私の一日|この間|次の日|その日|最後の日) SpecialDayWithNumRegex: !simpleRegex - def: ((いっ)?さくじつ|おとつい|最近|前天|后天|昨天|明天|今天|今日?|明日|一?昨?昨日|一昨日|大后天|大前天|後天|大後天|きょう|あす|あした|きのう|明々後日|(弥)?明後日|この日|前日|二日前|おととい|し?あさって|私の一日|この間|次の日|その日)(から|の)?([\d十一二三四五六七八九]*|数)(日|月|週|個)間?(先|後|前|以内)?(の(?日曜日?|月曜日?|火曜日?|水曜日?|木曜日?|金曜日?|土曜日?))? + def: ((いっ)?さくじつ|おとつい|最近|前天|后天|昨天|明天|今天|今日?|明日|一?昨?昨日|一昨日|大后天|大前天|後天|大後天|きょう|あす|あした|きのう|明々後日|(弥)?明後日|この日|前日|二日前|おととい|し?あさって|私の一日|この間|次の日|その日)(から|の)?([\d十一二三四五六七八九]*|数)(日|月|週(間で)?|個)間?(先|後|前)?(の(?日曜日?|月曜日?|火曜日?|水曜日?|木曜日?|金曜日?|土曜日?))? WeekDayOfMonthRegex: !nestedRegex def: ((({SpecialMonthRegex}|{MonthRegex}|{MonthNumRegex}|((这个|这一个|这|这一|本|今|上个|上一个|上|上一|去|下个|下一个|下|下一|明)月))(的|の)?\s*)?(第|最)?(?([初一二三四五])|最後|最終|([1-5])|最后一)(个|の|\s)*{WeekDayRegex}) references: [SpecialMonthRegex, MonthRegex, MonthNumRegex, WeekDayRegex] @@ -89,11 +92,11 @@ SpecialDate: !nestedRegex def: (?({ThisPrefixRegex}|{LastPrefixRegex}|{NextPrefixRegex})年)?(の|的)?(?({ThisPrefixRegex}|{LastPrefixRegex}|{NextPrefixRegex})(の|的)?月)?(の|的)?{DateDayRegexInCJK} references: [ThisPrefixRegex, LastPrefixRegex, NextPrefixRegex, DateDayRegexInCJK] DateUnitRegex: !simpleRegex - def: (?年|个月|月|周|時間?|(?営業)日|(?年|个月|月|周|(?営業)日|(?再来)|下个|下一个|下|下一|最初|来|向こう|これから(の)?|翌|今後|次(の)? + def: (?再来|以降)|下个|下一个|下|下一|最初|来|向こう|これから(の)?|翌|今後|次(の)? DateRangePrepositions: !simpleRegex def: ((こ|私の|その|この|これらの|それらの)\s*)? RelativeMonthRegex: !nestedRegex @@ -251,7 +254,7 @@ FirstLastOfYearRegex: !nestedRegex def: (({DatePeriodYearInCJKRegex}|{YearRegex}|(?再来年|翌年|来年|今年|去年))的?)((?前)|(?(最后|最後|最終))) references: [YearRegex,DatePeriodYearInCJKRegex] ComplexDatePeriodRegex: !nestedRegex - def: ({DateRangePrepositions})(?(第{ZeroToNineIntegerRegexCJK}+|第\d+|{DatePeriodThisRegex}|{DatePeriodLastRegex}|{DatePeriodNextRegex}|(({YearNumRegex}|{DayRegexForPeriod}|{DateDayRegexInCJK}|{MonthRegex}|{MonthNumRegex})(?!((\d+)?(分|秒|時))))|{RelativeRegex}).+)(から)(?.+)(までの間|まで|の間) + def: ({DateRangePrepositions})(?(第{ZeroToNineIntegerRegexCJK}+|第\d+|{DatePeriodThisRegex}|{DatePeriodLastRegex}|{DatePeriodNextRegex}|(({YearNumRegex}|{DayRegexForPeriod}|{DateDayRegexInCJK}|{MonthRegex}|{MonthNumRegex})(?!((\d+)?(分|秒|時))))|{RelativeRegex}).+)(から)(?.+)(までの間|(?(この|時前|(?昨日の?(午前|午後|中|夜|朝)?) TodayRegex: !simpleRegex def: (?(今朝の?|今朝の午前|今晩|今晚|今早|今晨|明晚|明早|明晨|昨晚|今夜|昨夜)(的|在)?) +FromNowRegex: !simpleRegex + def: ((?今)から) SpecialDayHourRegex: !nestedRegex def: ((?{TimeHourCJKRegex}|{TimeHourNumRegex})(時間?|(:00))) references: [TimeHourCJKRegex, TimeHourNumRegex] @@ -305,13 +310,13 @@ SpecialDaySecondRegex: !nestedRegex def: ((?{TimeSecondCJKRegex}|{TimeSecondNumRegex})秒間?) references: [TimeSecondCJKRegex, TimeSecondNumRegex] SpecialDayModRegex: !simpleRegex - def: ((?過ぎに)|(?ごろ)|(?で|の?うちに)|(?弱|たらず)|(?以上)) + def: ((?過ぎに|以降)|(?ごろ)|(?で|の?うちに)|(?弱|たらず)|(?以上)) SpecialDayEndOfRegex: !nestedRegex def: ((?明日の終わり|({WeekDayRegex}の?終わり))|(?日の終わり|一日の終わり|その日の終わり)) references: [WeekDayRegex] TimeOfSpecialDayRegex: !nestedRegex - def: (({SpecialDayEndOfRegex}|{WeekDayRegex}|{TomorrowRegex}|{YesterdayRegex}|あと|{TodayRegex})(\d日)?(と)?(({SpecialDayHourRegex}{SpecialDayMinuteRegex}?{SpecialDaySecondRegex}?)|({SpecialDayMinuteRegex}{SpecialDaySecondRegex}?)){SpecialDayModRegex}?)|(({SpecialDayHourRegex}(?で|の?うちに)))|(({SpecialDayEndOfRegex}|{TomorrowRegex}|{YesterdayRegex}|あと|{TodayRegex}){SpecialDayModRegex}?)|({WeekDayRegex}(\d日)?(と)?{SpecialDayModRegex}) - references: [ TomorrowRegex, YesterdayRegex, TodayRegex, WeekDayRegex, SpecialDayEndOfRegex, SpecialDayHourRegex, SpecialDayMinuteRegex, SpecialDaySecondRegex, SpecialDayModRegex ] + def: (({SpecialDayEndOfRegex}|{WeekDayRegex}|{TomorrowRegex}|{YesterdayRegex}|あと|{TodayRegex})(\d日)?(と)?(({SpecialDayHourRegex}{SpecialDayMinuteRegex}?{SpecialDaySecondRegex}?)|({SpecialDayMinuteRegex}{SpecialDaySecondRegex}?)){SpecialDayModRegex}?)|(({SpecialDayHourRegex}(?で|の?うちに)))|(({SpecialDayEndOfRegex}|{TomorrowRegex}|{YesterdayRegex}|あと|{TodayRegex}){SpecialDayModRegex}?)|({WeekDayRegex}(\d日)?(と)?{SpecialDayModRegex})|({FromNowRegex}\d+(分|時|秒)後) + references: [ TomorrowRegex, YesterdayRegex, TodayRegex, WeekDayRegex, SpecialDayEndOfRegex, SpecialDayHourRegex, SpecialDayMinuteRegex, SpecialDaySecondRegex, SpecialDayModRegex, FromNowRegex ] NowTimeRegex: !simpleRegex def: (现在|今) RecentlyTimeRegex: !simpleRegex @@ -372,7 +377,7 @@ DurationHalfRegex: !simpleRegex # TODO: modify below regex according to the counterpart in Korean def: ^[.] DurationRelativeDurationUnitRegex: !simpleRegex - def: (?数ヶ|数)|(?前|昨日)|(?後|明日)|(?もう) + def: (?数ヶ|数)|(?(?以内)|(?後|明日)|(?(?(?(?{DateUnitRegex}|分|秒|時間|まる(ひと)?|もう|数|以上|たらず) - references: [DateUnitRegex] + def: (?年|个月|月|周|時間?|(?営業)日|天|週間?|星期|个星期|か月|(?[と]?|,)\s*$ ConnectorRegex: !simpleRegex @@ -446,7 +451,7 @@ SetEachDateUnitRegex: !simpleRegex def: (毎)(年|月|週)\s*$ #TimeExtractorCJK TimeHourNumRegex: !simpleRegex - def: ([0-1]?\d|2[0-4]) + def: (?{TimeHourNumRegex}):(?{TimeMinuteNumRegex})(:(?{TimeSecondNumRegex}))?(am|pm)? - references: [TimeHourNumRegex, TimeMinuteNumRegex, TimeSecondNumRegex] + def: (?{TimeHourNumRegex}):(?{TimeMinuteNumRegex})(:(?{TimeSecondNumRegex}))?({AmPmDescRegex})? + references: [TimeHourNumRegex, TimeMinuteNumRegex, TimeSecondNumRegex, AmPmDescRegex] TimeDayDescRegex: !nestedRegex def: (?(正午|夜中|午前半ば|(昼食時)|真昼)|((?<=({TimeDigitTimeRegex}|{TimeCJKTimeRegex})(の)?)(早朝(に)?|午後(に)?|晚|晩|(深)?夜(に)?|未明|午前(中)?|日中|白昼|(未|早)?朝(に)?|昼前に|昼すぎに|夕方前に|夕方に|営業時間内に|昼))|((早朝(に)?|午後(に)?|晚|晩|(深)?夜(に)?|未明|午前(中)?|日中|白昼|(未|早)?朝(に)?|昼前に|昼すぎに|夕方前に|夕方に|営業時間内に|昼)(?=(の)?({TimeDigitTimeRegex}|{TimeCJKTimeRegex})))) references: [TimeDigitTimeRegex, TimeCJKTimeRegex] @@ -541,11 +546,11 @@ AmbiguousRangeModifierPrefix: !simpleRegex ReferenceDatePeriodRegex: !simpleRegex def: (同じ|その)(?月|週末|年|週) ParserConfigurationBefore: !simpleRegex - def: (之前|以前|前) + def: (またはその前|またはそれ以前|之前|以前|前) ParserConfigurationAfter: !simpleRegex - def: (之后|之後|以后|以後|后|後) + def: (またはそれ以降|之后|之後|以后|以後|后|後|以降) ParserConfigurationUntil: !simpleRegex - def: (直到|直至|截至|截止(到)?) + def: (まで|直到|直至|截至|截止(到)?) ParserConfigurationSincePrefix: !simpleRegex def: (自从|自|自打|打) ParserConfigurationSinceSuffix: !simpleRegex @@ -955,13 +960,17 @@ DateTimePeriodNIRegex: !simpleRegex def: (半夜|夜间|深夜|夜) AmbiguityFiltersDict: !dictionary types: [ string, string ] - # TODO: populate dictionary according to the counterpart in Chinese entries: '早': '(?y|con) +MultiplierRegex: !simpleRegex + def: \s*\b(((mil\s+)?mi|bi|cuatri|quinti|sexti|septi)ll[oó]n|mil)(es)?\b CurrencyPrefixList: !dictionary types: [ string, string ] entries: Dobra: db|std - Dólar: $ + Dólar: $|dólar|dólares|dolar|dolares Dólar estadounidense: us$|u$d|usd Dólar del Caribe Oriental: ec$|xcd Dólar australiano: a$|aud diff --git a/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/base_datetimeperiod.py b/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/base_datetimeperiod.py index 21013ae1d2..e708f55377 100644 --- a/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/base_datetimeperiod.py +++ b/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/base_datetimeperiod.py @@ -1277,7 +1277,8 @@ def merge_two_time_points(self, source: str, reference: datetime) -> DateTimeRes if past_end < past_begin: past_end = future_end - result.timex = f'({parse_result1.timex_str},{parse_result2.timex_str},PT{DateUtils.total_hours(future_begin, future_end)}H)' + duration_str = DateTimeFormatUtil.luis_time_span(future_begin, future_end) + result.timex = f'({parse_result1.timex_str},{parse_result2.timex_str},{duration_str})' # Do nothing elif begin_has_date: diff --git a/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/chinese/datetimeperiod_parser.py b/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/chinese/datetimeperiod_parser.py index 4e1b15219e..e42f40bb04 100644 --- a/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/chinese/datetimeperiod_parser.py +++ b/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/chinese/datetimeperiod_parser.py @@ -218,13 +218,14 @@ def merge_two_time_points(self, source: str, reference: datetime) -> DateTimeRes result.future_value = [left_time, right_time] result.past_value = [left_time, right_time] - fuzzy_timex = 'X' in prs.begin.timex_str or 'X' in prs.end.timex_str - left_timex = prs.begin.timex_str if fuzzy_timex else DateTimeFormatUtil.luis_date_time( - left_time) - right_timex = prs.end.timex_str if fuzzy_timex else DateTimeFormatUtil.luis_date_time( - right_time) - total_hours = DateUtils.total_hours(left_time, right_time) - result.timex = f'({left_timex},{right_timex},PT{total_hours}H)' + left_timex = prs.begin.timex_str + right_timex = prs.end.timex_str + if begin_has_date: + right_timex = DateTimeFormatUtil.luis_date_short_time(right_time, right_timex) + elif end_has_date: + left_timex = DateTimeFormatUtil.luis_date_short_time(left_time, left_timex) + duration_timex = DateTimeFormatUtil.luis_time_span(left_time, right_time) + result.timex = f'({left_timex},{right_timex},{duration_timex})' result.success = True return result diff --git a/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/constants.py b/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/constants.py index 0d15ef0b00..e7365193bd 100644 --- a/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/constants.py +++ b/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/constants.py @@ -44,6 +44,9 @@ class Constants: MIN_MONTH: int = 1 MAX_MONTH: int = 12 INVALID_YEAR = -2147483648 + INVALID_HOUR = -2147483648 + INVALID_MINUTE = -2147483648 + INVALID_SECOND = -2147483648 COMMENT_KEY: str = 'Comment' COMMENT_AMPM = 'ampm' @@ -56,6 +59,9 @@ class Constants: MAX_YEAR_NUM: int = int(BaseDateTime.MaxYearNum) HALF_DAY_HOUR_COUNT = 12 + DAY_HOUR_COUNT = 24 + HOUR_SECOND_COUNT = 3600 + MINUTE_SECOND_COUNT = 60 HALF_MID_DAY_DURATION_HOUR_COUNT = 2 # specifies the priority interpreting month and day order diff --git a/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/utilities.py b/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/utilities.py index d690757514..676080f8f0 100644 --- a/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/utilities.py +++ b/Python/libraries/recognizers-date-time/recognizers_date_time/date_time/utilities.py @@ -285,8 +285,11 @@ def luis_date_from_datetime(date: datetime) -> str: return DateTimeFormatUtil.luis_date(date.year, date.month, date.day) @staticmethod - def luis_time(hour: int, minute: int, second: int) -> str: - return f'{hour:02d}:{minute:02d}:{second:02d}' + def luis_time(hour: int, minute: int, second: int = Constants.INVALID_SECOND) -> str: + if second == Constants.INVALID_SECOND: + return f'{hour:02d}:{minute:02d}' + else: + return f'{hour:02d}:{minute:02d}:{second:02d}' @staticmethod def luis_time_from_datetime(time: datetime) -> str: @@ -297,15 +300,37 @@ def luis_date_time(time: datetime) -> str: return DateTimeFormatUtil.luis_date_from_datetime(time) + 'T' + DateTimeFormatUtil.luis_time_from_datetime(time) @staticmethod - def luis_time_span(begin_time: datetime, end_time: datetime) -> str: - timex_builder = f'{Constants.GENERAL_PERIOD_PREFIX}{Constants.TIME_TIMEX_PREFIX}' + def luis_date_short_time(time: datetime, timex: str = None) -> str: + has_min = False if timex is None else Constants.TIME_TIMEX_CONNECTOR in timex + has_sec = False if timex is None else len(timex.split(Constants.TIME_TIMEX_CONNECTOR)) > 2 - total_hours = end_time.hour - begin_time.hour - total_minutes = end_time.minute - begin_time.minute - total_seconds = end_time.second - begin_time.second + return DateTimeFormatUtil.luis_date_from_datetime(time) + DateTimeFormatUtil.format_short_time(time, has_min, has_sec) - if total_hours > 0: - timex_builder += f'{total_hours}H' + @staticmethod + def format_short_time(time: datetime, has_min: bool = False, has_sec: bool = False) -> str: + hour = time.hour + min = time.minute if has_min or time.minute > 0 else Constants.INVALID_MINUTE + sec = time.second if has_sec or time.second > 0 else Constants.INVALID_SECOND + return DateTimeFormatUtil.short_time(hour, min, sec) + + @staticmethod + def short_time(hour: int, minute: int = Constants.INVALID_MINUTE, second: int = Constants.INVALID_SECOND) -> str: + if minute == Constants.INVALID_MINUTE and second == Constants.INVALID_SECOND: + return f'{Constants.TIME_TIMEX_PREFIX}{hour:02d}' + else: + return f'{Constants.TIME_TIMEX_PREFIX}{DateTimeFormatUtil.luis_time(hour, minute, second)}' + + @staticmethod + def luis_time_span(begin_time: datetime, end_time: datetime) -> str: + timex_builder = f'{Constants.GENERAL_PERIOD_PREFIX}{Constants.TIME_TIMEX_PREFIX}' + span = end_time - begin_time + total_days = span.days + total_seconds = span.seconds + total_hours, total_seconds = divmod(total_seconds, Constants.HOUR_SECOND_COUNT) + total_minutes, total_seconds = divmod(total_seconds, Constants.MINUTE_SECOND_COUNT) + + if total_days > 0 or total_hours > 0: + timex_builder += f'{total_days * Constants.DAY_HOUR_COUNT + total_hours}H' if total_minutes > 0: timex_builder += f'{total_minutes}M' if total_seconds > 0: diff --git a/README.md b/README.md index 8b8b0ebafc..cebe86a4aa 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ Microsoft.Recognizers.Text provides robust recognition and resolution of entitie # Utilizing the Project -Microsoft.Recognizers.Text powers pre-built entities in both [**LUIS: Language Understanding Intelligent Service**](https://www.luis.ai/home) and [**Microsoft Bot Framework**](https://dev.botframework.com/); base entity types in [**Text Analytics Cognitive Service**](https://docs.microsoft.com/en-us/azure/cognitive-services/text-analytics/how-tos/text-analytics-how-to-entity-linking); and it is also available as standalone packages (for the base classes and the different entity recognizers). +Microsoft.Recognizers.Text powers pre-built entities in [**LUIS: Language Understanding Intelligent Service**](https://www.luis.ai/home), [**Power Virtual Agents**](https://powervirtualagents.microsoft.com/en-us/), and [**Microsoft Bot Framework**](https://dev.botframework.com/); base entity types in [**Text Analytics Cognitive Service**](https://docs.microsoft.com/en-us/azure/cognitive-services/text-analytics/how-tos/text-analytics-how-to-entity-linking); and it is also available as standalone packages (for the base classes and the different entity recognizers). The Microsoft.Recognizers.Text packages currently target four platforms: * [C#/.NET](https://github.com/Microsoft/Recognizers-Text/tree/master/.NET) - **NuGet packages** available at: https://www.nuget.org/profiles/Recognizers.Text diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 0000000000..f7b89984f0 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,41 @@ + + +## Security + +Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/). + +If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://docs.microsoft.com/en-us/previous-versions/tn-archive/cc751383(v=technet.10)), please report it to us as described below. + +## Reporting Security Issues + +**Please do not report security vulnerabilities through public GitHub issues.** + +Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://msrc.microsoft.com/create-report). + +If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://www.microsoft.com/en-us/msrc/pgp-key-msrc). + +You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc). + +Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue: + + * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.) + * Full paths of source file(s) related to the manifestation of the issue + * The location of the affected source code (tag/branch/commit or direct URL) + * Any special configuration required to reproduce the issue + * Step-by-step instructions to reproduce the issue + * Proof-of-concept or exploit code (if possible) + * Impact of the issue, including how an attacker might exploit the issue + +This information will help us triage your report more quickly. + +If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://microsoft.com/msrc/bounty) page for more details about our active programs. + +## Preferred Languages + +We prefer all communications to be in English. + +## Policy + +Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://www.microsoft.com/en-us/msrc/cvd). + + \ No newline at end of file diff --git a/Specs/DateTime/Chinese/DateTimeModel.json b/Specs/DateTime/Chinese/DateTimeModel.json index cce719ab43..3e42f53d7a 100644 --- a/Specs/DateTime/Chinese/DateTimeModel.json +++ b/Specs/DateTime/Chinese/DateTimeModel.json @@ -225,7 +225,6 @@ "Context": { "ReferenceDateTime": "2017-03-22T00:00:00" }, - "NotSupported": "javascript", "Results": [ { "Text": "农历2015年十月初一", @@ -250,7 +249,6 @@ "Context": { "ReferenceDateTime": "2017-03-22T00:00:00" }, - "NotSupported": "javascript", "Results": [ { "Text": "正月三十", @@ -651,7 +649,6 @@ "Context": { "ReferenceDateTime": "2016-11-07T14:07:00" }, - "NotSupported": "javascript", "Results": [ { "Text": "2015年十月初一早上九点二十", @@ -803,7 +800,7 @@ "Resolution": { "values": [ { - "timex": "(2016-11-06T14:00:00,2016-11-08T04:00:00,PT38H)", + "timex": "(2016-11-06T14,2016-11-08T04,PT38H)", "type": "datetimerange", "start": "2016-11-06 14:00:00", "end": "2016-11-08 04:00:00" @@ -1655,7 +1652,7 @@ "Context": { "ReferenceDateTime": "2017-03-22T00:00:00" }, - "NotSupported": "javascript, python", + "NotSupported": "python", "Results": [ { "Text": "2018年以前", @@ -1681,7 +1678,7 @@ "Context": { "ReferenceDateTime": "2017-03-22T00:00:00" }, - "NotSupported": "javascript, python", + "NotSupported": "python", "Results": [ { "Text": "2018年之后", @@ -1759,7 +1756,7 @@ "Context": { "ReferenceDateTime": "2017-03-22T00:00:00" }, - "NotSupported": "javascript, python", + "NotSupported": "python", "Results": [ { "Text": "自2018年以来", @@ -1785,7 +1782,7 @@ "Context": { "ReferenceDateTime": "2017-03-22T00:00:00" }, - "NotSupported": "javascript, python", + "NotSupported": "python", "Results": [ { "Text": "2018年开始", @@ -1811,7 +1808,7 @@ "Context": { "ReferenceDateTime": "2017-03-22T00:00:00" }, - "NotSupported": "javascript, python", + "NotSupported": "python", "Results": [ { "Text": "07年以后", @@ -1837,7 +1834,7 @@ "Context": { "ReferenceDateTime": "2017-03-22T00:00:00" }, - "NotSupported": "javascript, python", + "NotSupported": "python", "Results": [ { "Text": "07 年以前", @@ -2193,7 +2190,7 @@ "Context": { "ReferenceDateTime": "2018-08-30T14:16:03" }, - "NotSupported": "javascript, python, java", + "NotSupported": "python, java", "Results": [ { "Text": "2015年前", @@ -3144,7 +3141,7 @@ "Context": { "ReferenceDateTime": "2019-07-01T00:00:00" }, - "NotSupported": "javascript, python, java", + "NotSupported": "python, java", "Results": [ { "Text": "今天晚上八点之前", @@ -3170,7 +3167,7 @@ "Context": { "ReferenceDateTime": "2019-07-01T00:00:00" }, - "NotSupported": "javascript, python, java", + "NotSupported": "python, java", "Results": [ { "Text": "下午三点之前", @@ -3850,7 +3847,7 @@ "Context": { "ReferenceDateTime": "2018-08-30T14:16:03" }, - "NotSupported": "java, javascript, python", + "NotSupported": "java, python", "Results": [ { "Text": "从2016年3月1日开始", @@ -4565,7 +4562,7 @@ "Context": { "ReferenceDateTime": "2019-01-06T12:00:00" }, - "NotSupported": "java, javascript, python", + "NotSupported": "java, python", "Results": [ { "Text": "自2016年9月1日起", @@ -4821,7 +4818,7 @@ "Context": { "ReferenceDateTime": "2019-01-06T12:00:00" }, - "NotSupported": "java, javascript, python", + "NotSupported": "java, python", "Results": [ { "Text": "2015年1月1日", @@ -5883,7 +5880,6 @@ "Context": { "ReferenceDateTime": "2017-03-22T00:00:00" }, - "NotSupported": "javascript", "Results": [ { "Text": "神龙二年正月初一", @@ -6905,5 +6901,111 @@ } } ] + }, + { + "Input": "帮我预订2016年12月15日晚上7:00到8:00的会议", + "Context": { + "ReferenceDateTime": "2016-11-07T10:20:00" + }, + "NotSupportedByDesign": "java, javascript, python", + "Results": [ + { + "Text": "2016年12月15日晚上7:00到8:00", + "Start": 4, + "End": 25, + "TypeName": "datetimeV2.datetimerange", + "Resolution": { + "values": [ + { + "timex": "(2016-12-15T19:00,2016-12-15T20:00,PT1H)", + "type": "datetimerange", + "start": "2016-12-15 19:00:00", + "end": "2016-12-15 20:00:00" + } + ] + } + } + ] + }, + { + "Input": "周一上午10点到12点有空吗", + "Context": { + "ReferenceDateTime": "2018-11-01T12:00:00" + }, + "NotSupportedByDesign": "java, javascript, python", + "Results": [ + { + "Text": "周一上午10点到12点", + "Start": 0, + "End": 10, + "TypeName": "datetimeV2.datetimerange", + "Resolution": { + "values": [ + { + "timex": "(XXXX-WXX-1T10,XXXX-WXX-1T12,PT2H)", + "type": "datetimerange", + "start": "2018-10-29 10:00:00", + "end": "2018-10-29 12:00:00" + }, + { + "timex": "(XXXX-WXX-1T10,XXXX-WXX-1T12,PT2H)", + "type": "datetimerange", + "start": "2018-11-05 10:00:00", + "end": "2018-11-05 12:00:00" + } + ] + } + } + ] + }, + { + "Input": "还剩5 分钟", + "Context": { + "ReferenceDateTime": "2016-11-07T16:12:00" + }, + "NotSupportedByDesign": "java, javascript, python", + "Results": [ + { + "Text": "还剩5 分钟", + "Start": 0, + "End": 5, + "TypeName": "datetimeV2.datetimerange", + "Resolution": { + "values": [ + { + "timex": "(2016-11-07T16:12:00,2016-11-07T16:17:00,PT5M)", + "type": "datetimerange", + "start": "2016-11-07 16:12:00", + "end": "2016-11-07 16:17:00" + } + ] + } + } + ] + }, + { + "Input": "还剩 3 小时", + "Context": { + "ReferenceDateTime": "2016-11-07T16:12:00" + }, + "NotSupportedByDesign": "java, javascript, python", + "Results": [ + { + "Text": "还剩 3 小时", + "Start": 0, + "End": 6, + "TypeName": "datetimeV2.datetimerange", + "Resolution": { + "values": [ + { + "timex": "(2016-11-07T16:12:00,2016-11-07T19:12:00,PT3H)", + "type": "datetimerange", + "start": "2016-11-07 16:12:00", + "end": "2016-11-07 19:12:00" + } + ] + } + } + ] } ] \ No newline at end of file diff --git a/Specs/DateTime/Chinese/DateTimePeriodParser.json b/Specs/DateTime/Chinese/DateTimePeriodParser.json index 014b2fce40..e3a38a916a 100644 --- a/Specs/DateTime/Chinese/DateTimePeriodParser.json +++ b/Specs/DateTime/Chinese/DateTimePeriodParser.json @@ -34,7 +34,7 @@ "Text": "从昨天下午两点到明天四点", "Type": "datetimerange", "Value": { - "Timex": "(2016-11-06T14:00:00,2016-11-08T04:00:00,PT38H)", + "Timex": "(2016-11-06T14,2016-11-08T04,PT38H)", "FutureResolution": { "startDateTime": "2016-11-06 14:00:00", "endDateTime": "2016-11-08 04:00:00" @@ -109,7 +109,7 @@ "Text": "2点-明天4点", "Type": "datetimerange", "Value": { - "Timex": "(2016-11-07T02:00:00,2016-11-08T04:00:00,PT26H)", + "Timex": "(2016-11-07T02,2016-11-08T04,PT26H)", "FutureResolution": { "startDateTime": "2016-11-07 02:00:00", "endDateTime": "2016-11-08 04:00:00" @@ -309,7 +309,7 @@ "Text": "从现在到八点", "Type": "datetimerange", "Value": { - "Timex": "(2016-11-07T16:12:00,2016-11-07T20:00:00,PT4H)", + "Timex": "(PRESENT_REF,2016-11-07T20,PT3H48M)", "FutureResolution": { "startDateTime": "2016-11-07 16:12:00", "endDateTime": "2016-11-07 20:00:00" @@ -334,7 +334,7 @@ "Text": "今晚八点到九点", "Type": "datetimerange", "Value": { - "Timex": "(2016-11-07T20:00:00,2016-11-07T21:00:00,PT1H)", + "Timex": "(2016-11-07T20,2016-11-07T21,PT1H)", "FutureResolution": { "startDateTime": "2016-11-07 20:00:00", "endDateTime": "2016-11-07 21:00:00" @@ -359,7 +359,7 @@ "Text": "今晚7点到7点30", "Type": "datetimerange", "Value": { - "Timex": "(2016-11-07T19:00:00,2016-11-07T19:30:00,PT0H)", + "Timex": "(2016-11-07T19,2016-11-07T19:30,PT30M)", "FutureResolution": { "startDateTime": "2016-11-07 19:00:00", "endDateTime": "2016-11-07 19:30:00" @@ -641,5 +641,57 @@ "Length": 6 } ] + }, + { + "Input": "明天下午两点到五点", + "Context": { + "ReferenceDateTime": "2016-11-07T16:12:00" + }, + "NotSupportedByDesign": "java, javascript, python", + "Results": [ + { + "Text": "明天下午两点到五点", + "Type": "datetimerange", + "Value": { + "Timex": "(2016-11-08T14,2016-11-08T17,PT3H)", + "FutureResolution": { + "startDateTime": "2016-11-08 14:00:00", + "endDateTime": "2016-11-08 17:00:00" + }, + "PastResolution": { + "startDateTime": "2016-11-08 14:00:00", + "endDateTime": "2016-11-08 17:00:00" + } + }, + "Start": 0, + "Length": 9 + } + ] + }, + { + "Input": "2:00-明天4:00", + "Context": { + "ReferenceDateTime": "2016-11-07T16:12:00" + }, + "NotSupportedByDesign": "java, javascript, python", + "Results": [ + { + "Text": "2:00-明天4:00", + "Type": "datetimerange", + "Value": { + "Timex": "(2016-11-07T02:00,2016-11-08T04:00,PT26H)", + "FutureResolution": { + "startDateTime": "2016-11-07 02:00:00", + "endDateTime": "2016-11-08 04:00:00" + }, + "PastResolution": { + "startDateTime": "2016-11-07 02:00:00", + "endDateTime": "2016-11-08 04:00:00" + } + }, + "Start": 0, + "Length": 11 + } + ] } ] \ No newline at end of file diff --git a/Specs/DateTime/Dutch/DateTimeModel.json b/Specs/DateTime/Dutch/DateTimeModel.json index 8bfd8c8517..98e9c1c0c8 100644 --- a/Specs/DateTime/Dutch/DateTimeModel.json +++ b/Specs/DateTime/Dutch/DateTimeModel.json @@ -1,4 +1,4 @@ -[ +[ { "Input": "Ik ga 4e jan 2019 terug", "Context": { @@ -11940,11 +11940,10 @@ "Context": { "ReferenceDateTime": "2016-11-07T00:00:00" }, - "NotSupported": "dotnet", "NotSupportedByDesign": "java, javascript, python", "Results": [ { - "Text": "Dag van de Aarde 2010", + "Text": "dag van de aarde 2010", "Start": 23, "End": 43, "TypeName": "datetimeV2.date", @@ -13779,7 +13778,6 @@ "Context": { "ReferenceDateTime": "2019-07-30T00:00:00" }, - "NotSupported": "dotnet", "NotSupportedByDesign": "java, javascript, python", "Results": [ { @@ -13854,7 +13852,6 @@ "Context": { "ReferenceDateTime": "2019-07-30T00:00:00" }, - "NotSupported": "dotnet", "NotSupportedByDesign": "java, javascript, python", "Results": [ { @@ -14521,7 +14518,6 @@ "Context": { "ReferenceDateTime": "2019-09-09T00:00:00" }, - "NotSupported": "dotnet", "NotSupportedByDesign": "java, javascript, python", "Results": [ { @@ -14601,7 +14597,6 @@ "Context": { "ReferenceDateTime": "2019-09-12T00:00:00" }, - "NotSupported": "dotnet", "NotSupportedByDesign": "java, javascript, python", "Results": [ { @@ -14627,7 +14622,6 @@ "Context": { "ReferenceDateTime": "2019-09-12T00:00:00" }, - "NotSupported": "dotnet", "NotSupportedByDesign": "java, javascript, python", "Results": [ { @@ -14653,7 +14647,6 @@ "Context": { "ReferenceDateTime": "2019-09-12T00:00:00" }, - "NotSupported": "dotnet", "NotSupportedByDesign": "java, javascript, python", "Results": [ { @@ -14861,11 +14854,10 @@ "Context": { "ReferenceDateTime": "2019-11-25T17:00:00" }, - "NotSupported": "dotnet", "NotSupportedByDesign": "java, javascript, python", "Results": [ { - "Text": "Om de week vrijdag", + "Text": "om de week vrijdag", "Start": 0, "End": 17, "TypeName": "datetimeV2.set", @@ -15125,13 +15117,12 @@ "Context": { "ReferenceDateTime": "2019-11-07T00:00:00" }, - "NotSupported": "dotnet", "NotSupportedByDesign": "java, javascript, python", "Results": [ { - "Text": "week beginnend op 4 feb", - "Start": 37, - "End": 59, + "Text": "de week beginnend op 4 feb.", + "Start": 34, + "End": 60, "TypeName": "datetimeV2.daterange", "Resolution": { "values": [ @@ -15157,7 +15148,6 @@ "Context": { "ReferenceDateTime": "2019-11-07T00:00:00" }, - "NotSupported": "dotnet", "NotSupportedByDesign": "java, javascript, python", "Results": [ { @@ -15189,7 +15179,6 @@ "Context": { "ReferenceDateTime": "2019-11-07T00:00:00" }, - "NotSupported": "dotnet", "NotSupportedByDesign": "java, javascript, python", "Results": [ { @@ -15221,7 +15210,6 @@ "Context": { "ReferenceDateTime": "2019-11-07T00:00:00" }, - "NotSupported": "dotnet", "NotSupportedByDesign": "java, javascript, python", "Results": [ { @@ -15279,36 +15267,35 @@ "Context": { "ReferenceDateTime": "2019-11-07T00:00:00" }, - "NotSupported": "dotnet", "NotSupportedByDesign": "java, javascript, python", "Results": [ { - "Text": "tijdens lunchtijd", - "Start": 42, - "End": 58, - "TypeName": "datetimeV2.timerange", + "Text": "vandaag", + "Start": 34, + "End": 40, + "TypeName": "datetimeV2.date", "Resolution": { "values": [ { - "timex": "TMEL", - "type": "timerange", - "start": "11:00:00", - "end": "13:00:00" + "timex": "2019-11-07", + "type": "date", + "value": "2019-11-07" } ] } }, { - "Text": "vandaag", - "Start": -1, - "End": 5, - "TypeName": "datetimeV2.date", + "Text": "tijdens lunchtijd", + "Start": 42, + "End": 58, + "TypeName": "datetimeV2.timerange", "Resolution": { "values": [ { - "timex": "2019-11-07", - "type": "date", - "value": "2019-11-07" + "timex": "TMEL", + "type": "timerange", + "start": "11:00:00", + "end": "13:00:00" } ] } @@ -15466,7 +15453,6 @@ "Context": { "ReferenceDateTime": "2019-11-07T00:00:00" }, - "NotSupported": "dotnet", "NotSupportedByDesign": "java, javascript, python", "Results": [ { @@ -15517,12 +15503,11 @@ "Context": { "ReferenceDateTime": "2020-04-24T10:00:00" }, - "NotSupported": "dotnet", "NotSupportedByDesign": "java, javascript, python", "Results": [ { - "Text": "eind van 2000", - "Start": 33, + "Text": "het eind van 2000", + "Start": 29, "End": 45, "TypeName": "datetimeV2.daterange", "Resolution": { @@ -15544,12 +15529,11 @@ "Context": { "ReferenceDateTime": "2020-04-24T10:00:00" }, - "NotSupported": "dotnet", "NotSupportedByDesign": "java, javascript, python", "Results": [ { - "Text": "midden van 2000", - "Start": 32, + "Text": "het midden van 2000", + "Start": 28, "End": 46, "TypeName": "datetimeV2.daterange", "Resolution": { @@ -15571,12 +15555,11 @@ "Context": { "ReferenceDateTime": "2020-04-24T10:00:00" }, - "NotSupported": "dotnet", "NotSupportedByDesign": "java, javascript, python", "Results": [ { - "Text": "begin van 2000", - "Start": 33, + "Text": "het begin van 2000", + "Start": 29, "End": 46, "TypeName": "datetimeV2.daterange", "Resolution": { @@ -15625,12 +15608,11 @@ "Context": { "ReferenceDateTime": "2020-04-27T18:00:00" }, - "NotSupported": "dotnet", "NotSupportedByDesign": "java, javascript, python", "Results": [ { - "Text": "einde van 1989", - "Start": 31, + "Text": "het einde van 1989", + "Start": 27, "End": 44, "TypeName": "datetimeV2.daterange", "Resolution": { @@ -15652,7 +15634,6 @@ "Context": { "ReferenceDateTime": "2020-04-27T18:00:00" }, - "NotSupported": "dotnet", "NotSupportedByDesign": "java, javascript, python", "Results": [ { @@ -15950,11 +15931,10 @@ "Context": { "ReferenceDateTime": "2020-05-14T12:00:00" }, - "NotSupported": "dotnet", "NotSupportedByDesign": "java, javascript, python", "Results": [ { - "Text": "Dag van de Arbeid", + "Text": "dag van de arbeid", "Start": 12, "End": 28, "TypeName": "datetimeV2.date", @@ -16287,7 +16267,6 @@ "Context": { "ReferenceDateTime": "2020-06-12T00:00:00" }, - "NotSupported": "dotnet", "NotSupportedByDesign": "java, javascript, python", "Results": [ { @@ -16366,6 +16345,21 @@ ] } }, + { + "Text": "elk jaar", + "Start": 96, + "End": 103, + "TypeName": "datetimeV2.set", + "Resolution": { + "values": [ + { + "timex": "P1Y", + "type": "set", + "value": "not resolved" + } + ] + } + }, { "Text": "19 juni", "Start": 108, @@ -16385,21 +16379,6 @@ } ] } - }, - { - "Text": "elk jaar", - "Start": -1, - "End": 6, - "TypeName": "datetimeV2.set", - "Resolution": { - "values": [ - { - "timex": "P1Y", - "type": "set", - "value": "not resolved" - } - ] - } } ] }, @@ -16477,7 +16456,6 @@ "Context": { "ReferenceDateTime": "2020-06-12T00:00:00" }, - "NotSupported": "dotnet", "NotSupportedByDesign": "java, javascript, python", "Results": [ { @@ -16502,7 +16480,6 @@ "Context": { "ReferenceDateTime": "2020-06-12T00:00:00" }, - "NotSupported": "dotnet", "NotSupportedByDesign": "java, javascript, python", "Results": [ { @@ -16552,7 +16529,6 @@ "Context": { "ReferenceDateTime": "2020-06-12T00:00:00" }, - "NotSupported": "dotnet", "NotSupportedByDesign": "java, javascript, python", "Results": [ { @@ -16720,7 +16696,6 @@ "Context": { "ReferenceDateTime": "2020-06-12T00:00:00" }, - "NotSupported": "dotnet", "NotSupportedByDesign": "java, javascript, python", "Results": [ { @@ -16745,7 +16720,6 @@ "Context": { "ReferenceDateTime": "2020-06-12T00:00:00" }, - "NotSupported": "dotnet", "NotSupportedByDesign": "java, javascript, python", "Results": [ { @@ -16770,7 +16744,6 @@ "Context": { "ReferenceDateTime": "2020-06-12T00:00:00" }, - "NotSupported": "dotnet", "NotSupportedByDesign": "java, javascript, python", "Results": [ { @@ -17364,11 +17337,10 @@ "Context": { "ReferenceDateTime": "2019-06-12T00:00:00" }, - "NotSupported": "dotnet", "NotSupportedByDesign": "java, javascript, python", "Results": [ { - "Text": "eropvolgende Pasen", + "Text": "eropvolgende pasen", "Start": 15, "End": 32, "TypeName": "datetimeV2.date", @@ -17389,7 +17361,6 @@ "Context": { "ReferenceDateTime": "2016-11-07T00:00:00" }, - "NotSupported": "dotnet", "NotSupportedByDesign": "java, javascript, python", "Results": [ { @@ -17416,7 +17387,6 @@ "Context": { "ReferenceDateTime": "2016-11-07T00:00:00" }, - "NotSupported": "dotnet", "NotSupportedByDesign": "java, javascript, python", "Results": [ { @@ -17503,7 +17473,6 @@ "Context": { "ReferenceDateTime": "2016-11-07T00:00:00" }, - "NotSupported": "dotnet", "NotSupportedByDesign": "java, javascript, python", "Results": [ { @@ -18070,7 +18039,6 @@ "Context": { "ReferenceDateTime": "2016-11-07T00:00:00" }, - "NotSupported": "dotnet", "NotSupportedByDesign": "java, javascript, python", "Results": [ { @@ -18122,7 +18090,6 @@ "Context": { "ReferenceDateTime": "2016-11-07T00:00:00" }, - "NotSupported": "dotnet", "NotSupportedByDesign": "java, javascript, python", "Results": [ { @@ -18154,7 +18121,6 @@ "Context": { "ReferenceDateTime": "2016-11-07T00:00:00" }, - "NotSupported": "dotnet", "NotSupportedByDesign": "java, javascript, python", "Results": [ { @@ -18180,7 +18146,6 @@ "Context": { "ReferenceDateTime": "2016-11-07T00:00:00" }, - "NotSupported": "dotnet", "NotSupportedByDesign": "java, javascript, python", "Results": [ { @@ -18206,7 +18171,6 @@ "Context": { "ReferenceDateTime": "2016-11-07T00:00:00" }, - "NotSupported": "dotnet", "NotSupportedByDesign": "java, javascript, python", "Results": [ { @@ -18232,7 +18196,6 @@ "Context": { "ReferenceDateTime": "2016-11-07T00:00:00" }, - "NotSupported": "dotnet", "NotSupportedByDesign": "java, javascript, python", "Results": [ { @@ -18264,7 +18227,6 @@ "Context": { "ReferenceDateTime": "2016-11-07T00:00:00" }, - "NotSupported": "dotnet", "NotSupportedByDesign": "java, javascript, python", "Results": [ { @@ -18296,7 +18258,6 @@ "Context": { "ReferenceDateTime": "2018-06-26T00:00:00" }, - "NotSupported": "dotnet", "NotSupportedByDesign": "java, javascript, python", "Results": [ { @@ -18322,7 +18283,6 @@ "Context": { "ReferenceDateTime": "2018-06-26T00:00:00" }, - "NotSupported": "dotnet", "NotSupportedByDesign": "java, javascript, python", "Results": [ { @@ -18603,7 +18563,6 @@ "Context": { "ReferenceDateTime": "2016-11-07T00:00:00" }, - "NotSupported": "dotnet", "NotSupportedByDesign": "java, javascript, python", "Results": [ { @@ -18727,7 +18686,6 @@ "Context": { "ReferenceDateTime": "2016-11-07T00:00:00" }, - "NotSupported": "dotnet", "NotSupportedByDesign": "java, javascript, python", "Results": [ { @@ -18886,11 +18844,10 @@ "Context": { "ReferenceDateTime": "2019-08-05T00:00:00" }, - "NotSupported": "dotnet", "NotSupportedByDesign": "java, javascript, python", "Results": [ { - "Text": "Kerstavond", + "Text": "kerstavond", "Start": 25, "End": 34, "TypeName": "datetimeV2.date", @@ -18910,14 +18867,14 @@ } }, { - "Text": "18.00", - "Start": 39, + "Text": "om 18.00", + "Start": 36, "End": 43, "TypeName": "datetimeV2.time", "Resolution": { "values": [ { - "timex": "T18", + "timex": "T18:00", "type": "time", "value": "18:00:00" } @@ -20028,5 +19985,206 @@ } } ] + }, + { + "Input": "Ik ben afwezig november 19-20", + "Context": { + "ReferenceDateTime": "2016-11-07T00:00:00" + }, + "NotSupportedByDesign": "javascript, python, java", + "Results": [ + { + "Text": "november 19-20", + "Start": 15, + "End": 28, + "TypeName": "datetimeV2.daterange", + "Resolution": { + "values": [ + { + "timex": "(XXXX-11-19,XXXX-11-20,P1D)", + "type": "daterange", + "start": "2015-11-19", + "end": "2015-11-20" + }, + { + "timex": "(XXXX-11-19,XXXX-11-20,P1D)", + "type": "daterange", + "start": "2016-11-19", + "end": "2016-11-20" + } + ] + } + } + ] + }, + { + "Input": "Ik ga 20 ste van de volgende maand terug", + "Context": { + "ReferenceDateTime": "2016-11-07T00:00:00" + }, + "NotSupportedByDesign": "javascript, python, java", + "Results": [ + { + "Text": "20 ste van de volgende maand", + "Start": 6, + "End": 33, + "TypeName": "datetimeV2.date", + "Resolution": { + "values": [ + { + "timex": "2016-12-20", + "type": "date", + "value": "2016-12-20" + } + ] + } + } + ] + }, + { + "Input": "Ik ga terug vier dagen gerekend vanaf gisteren", + "Context": { + "ReferenceDateTime": "2016-11-07T00:00:00" + }, + "NotSupportedByDesign": "javascript, python, java", + "Results": [ + { + "Text": "vier dagen gerekend vanaf gisteren", + "Start": 12, + "End": 45, + "TypeName": "datetimeV2.date", + "Resolution": { + "values": [ + { + "timex": "2016-11-10", + "type": "date", + "value": "2016-11-10" + } + ] + } + } + ] + }, + { + "Input": "Ik ga terug 4 dagen gerekend vanaf gisteren", + "Context": { + "ReferenceDateTime": "2016-11-07T00:00:00" + }, + "NotSupportedByDesign": "javascript, python, java", + "Results": [ + { + "Text": "4 dagen gerekend vanaf gisteren", + "Start": 12, + "End": 42, + "TypeName": "datetimeV2.date", + "Resolution": { + "values": [ + { + "timex": "2016-11-10", + "type": "date", + "value": "2016-11-10" + } + ] + } + } + ] + }, + { + "Input": "APEC zal in Korea plaatsvinden jan-feb 2017", + "Context": { + "ReferenceDateTime": "2016-11-07T00:00:00" + }, + "NotSupportedByDesign": "javascript, python, java", + "Results": [ + { + "Text": "jan-feb 2017", + "Start": 31, + "End": 42, + "TypeName": "datetimeV2.daterange", + "Resolution": { + "values": [ + { + "timex": "(2017-01-01,2017-02-01,P1M)", + "type": "daterange", + "start": "2017-01-01", + "end": "2017-02-01" + } + ] + } + } + ] + }, + { + "Input": "APEC zal in Korea plaatsvinden jan feb 2017", + "Context": { + "ReferenceDateTime": "2016-11-07T00:00:00" + }, + "NotSupportedByDesign": "javascript, python, java", + "Results": [ + { + "Text": "jan feb 2017", + "Start": 31, + "End": 42, + "TypeName": "datetimeV2.daterange", + "Resolution": { + "values": [ + { + "timex": "(2017-01-01,2017-02-01,P1M)", + "type": "daterange", + "start": "2017-01-01", + "end": "2017-02-01" + } + ] + } + } + ] + }, + { + "Input": "We ontmoetten elkaar op dinsdag van volgende week", + "Context": { + "ReferenceDateTime": "2019-07-30T00:00:00" + }, + "NotSupportedByDesign": "java, javascript, python", + "Results": [ + { + "Text": "op dinsdag van volgende week", + "Start": 21, + "End": 48, + "TypeName": "datetimeV2.date", + "Resolution": { + "values": [ + { + "timex": "2019-08-06", + "type": "date", + "value": "2019-08-06" + } + ] + } + } + ] + }, + { + "Input": "Plan een halfjaarlijkse vergadering in", + "Context": { + "ReferenceDateTime": "2020-06-12T00:00:00" + }, + "NotSupportedByDesign": "java, javascript, python", + "Results": [ + { + "Text": "halfjaarlijkse", + "Start": 9, + "End": 22, + "TypeName": "datetimeV2.set", + "Resolution": { + "values": [ + { + "timex": "P0.5Y", + "type": "set", + "value": "not resolved" + } + ] + } + } + ] } ] \ No newline at end of file diff --git a/Specs/DateTime/Dutch/DateTimePeriodParser.json b/Specs/DateTime/Dutch/DateTimePeriodParser.json index d510bd1569..b5ed8ba10f 100644 --- a/Specs/DateTime/Dutch/DateTimePeriodParser.json +++ b/Specs/DateTime/Dutch/DateTimePeriodParser.json @@ -322,7 +322,7 @@ "Text": "van 2016-2-21 14:00 tot 2016-04-23 03:32", "Type": "datetimerange", "Value": { - "Timex": "(2016-02-21T14:00,2016-04-23T03:32,PT1478H)", + "Timex": "(2016-02-21T14:00,2016-04-23T03:32,PT1477H32M)", "FutureResolution": { "startDateTime": "2016-02-21 14:00:00", "endDateTime": "2016-04-23 03:32:00" @@ -1697,7 +1697,7 @@ "Text": "van 2016-02-21 14:00 uur tot 2016-02-23 03:32", "Type": "datetimerange", "Value": { - "Timex": "(2016-02-21T14:00,2016-02-23T03:32,PT38H)", + "Timex": "(2016-02-21T14:00,2016-02-23T03:32,PT37H32M)", "FutureResolution": { "startDateTime": "2016-02-21 14:00:00", "endDateTime": "2016-02-23 03:32:00" @@ -2947,7 +2947,7 @@ "Text": "van 14:00, 21-2-2016 tot 3:32, 23-04-2016", "Type": "datetimerange", "Value": { - "Timex": "(2016-02-21T14:00,2016-04-23T03:32,PT1478H)", + "Timex": "(2016-02-21T14:00,2016-04-23T03:32,PT1477H32M)", "FutureResolution": { "startDateTime": "2016-02-21 14:00:00", "endDateTime": "2016-04-23 03:32:00" @@ -4056,7 +4056,7 @@ "Text": "van 14:00, 21-2-2016 tot 3:32, 23-04-2016", "Type": "datetimerange", "Value": { - "Timex": "(2016-02-21T14:00,2016-04-23T03:32,PT1478H)", + "Timex": "(2016-02-21T14:00,2016-04-23T03:32,PT1477H32M)", "FutureResolution": { "startDateTime": "2016-02-21 14:00:00", "endDateTime": "2016-04-23 03:32:00" diff --git a/Specs/DateTime/English/DateTimeModel.json b/Specs/DateTime/English/DateTimeModel.json index 623fa8d957..a18fbee8a4 100644 --- a/Specs/DateTime/English/DateTimeModel.json +++ b/Specs/DateTime/English/DateTimeModel.json @@ -1,4 +1,4 @@ -[ +[ { "Input": "I'll go back 04th Jan 2019.", "Context": { @@ -18401,6 +18401,55 @@ } ] }, + { + "Input": "deliver 2000 pieces of toys.", + "Comment": "Default mode produces a false positive for it and it's a case from TasksMode.", + "Context": { + "ReferenceDateTime": "2019-12-15T01:00:00" + }, + "NotSupported": "dotnet, javascript, python, java", + "Results": [] + }, + { + "Input": "Show sales of 2007 and 2009", + "Context": { + "ReferenceDateTime": "2018-06-26T00:00:00" + }, + "Results": [ + { + "Text": "2007", + "Start": 14, + "End": 17, + "TypeName": "datetimeV2.daterange", + "Resolution": { + "values": [ + { + "timex": "2007", + "type": "daterange", + "start": "2007-01-01", + "end": "2008-01-01" + } + ] + } + }, + { + "Text": "2009", + "Start": 23, + "End": 26, + "TypeName": "datetimeV2.daterange", + "Resolution": { + "values": [ + { + "timex": "2009", + "type": "daterange", + "start": "2009-01-01", + "end": "2010-01-01" + } + ] + } + } + ] + }, { "Input": "It will happen 3 days from Tuesday.", "Context": { @@ -21270,7 +21319,7 @@ { "Input": "I want to book a flight for the weekend of halloween", "Context": { - "ReferenceDateTime": "2021-11-09T18:00:00" + "ReferenceDateTime": "2021-11-09T18:00:00" }, "NotSupported": "java, javascript, python", "Results": [ @@ -21301,7 +21350,7 @@ { "Input": "I want to book a flight for halloween weekend 2021", "Context": { - "ReferenceDateTime": "2021-11-09T18:00:00" + "ReferenceDateTime": "2021-11-09T18:00:00" }, "NotSupported": "java, javascript, python", "Results": [ @@ -21326,7 +21375,7 @@ { "Input": "I need to prepare for labor day weekend", "Context": { - "ReferenceDateTime": "2021-11-09T18:00:00" + "ReferenceDateTime": "2021-11-09T18:00:00" }, "NotSupported": "java, javascript, python", "Results": [ @@ -21357,7 +21406,7 @@ { "Input": "I need to prepare for labor day 2021 weekend", "Context": { - "ReferenceDateTime": "2021-11-09T18:00:00" + "ReferenceDateTime": "2021-11-09T18:00:00" }, "NotSupported": "java, javascript, python", "Results": [ @@ -21382,7 +21431,7 @@ { "Input": "We'll take weekend of thanksgiving 2023 off", "Context": { - "ReferenceDateTime": "2021-11-09T18:00:00" + "ReferenceDateTime": "2021-11-09T18:00:00" }, "NotSupported": "java, javascript, python", "Results": [ @@ -23192,5 +23241,366 @@ } } ] + }, + { + "Input": "I'll be out November 19-20", + "Context": { + "ReferenceDateTime": "2016-11-07T00:00:00" + }, + "Results": [ + { + "Text": "november 19-20", + "Start": 12, + "End": 25, + "TypeName": "datetimeV2.daterange", + "Resolution": { + "values": [ + { + "timex": "(XXXX-11-19,XXXX-11-20,P1D)", + "type": "daterange", + "start": "2015-11-19", + "end": "2015-11-20" + }, + { + "timex": "(XXXX-11-19,XXXX-11-20,P1D)", + "type": "daterange", + "start": "2016-11-19", + "end": "2016-11-20" + } + ] + } + } + ] + }, + { + "Input": "Help me book a meeting tonight from 7 to 7:30", + "Context": { + "ReferenceDateTime": "2016-11-07T10:20:00" + }, + "NotSupportedByDesign": "java, javascript, python", + "Results": [ + { + "Text": "tonight from 7 to 7:30", + "Start": 23, + "End": 44, + "TypeName": "datetimeV2.datetimerange", + "Resolution": { + "values": [ + { + "timex": "(2016-11-07T19,2016-11-07T19:30,PT30M)", + "type": "datetimerange", + "start": "2016-11-07 19:00:00", + "end": "2016-11-07 19:30:00" + } + ] + } + } + ] + }, + { + "Input": "Help me book a meeting this evening from 7 to 7:30", + "Context": { + "ReferenceDateTime": "2016-11-07T10:20:00" + }, + "NotSupportedByDesign": "java, javascript, python", + "Results": [ + { + "Text": "this evening from 7 to 7:30", + "Start": 23, + "End": 49, + "TypeName": "datetimeV2.datetimerange", + "Resolution": { + "values": [ + { + "timex": "(2016-11-07T19,2016-11-07T19:30,PT30M)", + "type": "datetimerange", + "start": "2016-11-07 19:00:00", + "end": "2016-11-07 19:30:00" + } + ] + } + } + ] + }, + { + "Input": "Help me book a meeting today from 7 to 7:30", + "Context": { + "ReferenceDateTime": "2016-11-07T10:20:00" + }, + "NotSupportedByDesign": "java, javascript, python", + "Results": [ + { + "Text": "today from 7 to 7:30", + "Start": 23, + "End": 42, + "TypeName": "datetimeV2.datetimerange", + "Resolution": { + "values": [ + { + "timex": "(2016-11-07T07,2016-11-07T07:30,PT30M)", + "type": "datetimerange", + "start": "2016-11-07 07:00:00", + "end": "2016-11-07 07:30:00" + }, + { + "timex": "(2016-11-07T19,2016-11-07T19:30,PT30M)", + "type": "datetimerange", + "start": "2016-11-07 19:00:00", + "end": "2016-11-07 19:30:00" + } + ] + } + } + ] + }, + { + "Input": "Help me book a meeting Tuesday from 7 to 7:30", + "Context": { + "ReferenceDateTime": "2016-11-07T10:20:00" + }, + "NotSupportedByDesign": "java, javascript, python", + "Results": [ + { + "Text": "tuesday from 7 to 7:30", + "Start": 23, + "End": 44, + "TypeName": "datetimeV2.datetimerange", + "Resolution": { + "values": [ + { + "timex": "(XXXX-WXX-2T07,XXXX-WXX-2T07:30,PT30M)", + "type": "datetimerange", + "start": "2016-11-01 07:00:00", + "end": "2016-11-01 07:30:00" + }, + { + "timex": "(XXXX-WXX-2T07,XXXX-WXX-2T07:30,PT30M)", + "type": "datetimerange", + "start": "2016-11-08 07:00:00", + "end": "2016-11-08 07:30:00" + }, + { + "timex": "(XXXX-WXX-2T19,XXXX-WXX-2T19:30,PT30M)", + "type": "datetimerange", + "start": "2016-11-01 19:00:00", + "end": "2016-11-01 19:30:00" + }, + { + "timex": "(XXXX-WXX-2T19,XXXX-WXX-2T19:30,PT30M)", + "type": "datetimerange", + "start": "2016-11-08 19:00:00", + "end": "2016-11-08 19:30:00" + } + ] + } + } + ] + }, + { + "Input": "Help me book a meeting tonight from 7 to 7:30pm", + "Context": { + "ReferenceDateTime": "2016-11-07T10:20:00" + }, + "NotSupportedByDesign": "java, javascript, python", + "Results": [ + { + "Text": "tonight from 7 to 7:30pm", + "Start": 23, + "End": 46, + "TypeName": "datetimeV2.datetimerange", + "Resolution": { + "values": [ + { + "timex": "(2016-11-07T19,2016-11-07T19:30,PT30M)", + "type": "datetimerange", + "start": "2016-11-07 19:00:00", + "end": "2016-11-07 19:30:00" + } + ] + } + } + ] + }, + { + "Input": "Help me book a meeting on December 15 2016 from 7:00 to 8:00pm", + "Context": { + "ReferenceDateTime": "2016-11-07T10:20:00" + }, + "NotSupportedByDesign": "java, javascript, python", + "Results": [ + { + "Text": "december 15 2016 from 7:00 to 8:00pm", + "Start": 26, + "End": 61, + "TypeName": "datetimeV2.datetimerange", + "Resolution": { + "values": [ + { + "timex": "(2016-12-15T19:00,2016-12-15T20:00,PT1H)", + "type": "datetimerange", + "start": "2016-12-15 19:00:00", + "end": "2016-12-15 20:00:00" + } + ] + } + } + ] + }, + { + "Input": "Help me book a meeting tomorrow from 7:00 to 8:00pm", + "Context": { + "ReferenceDateTime": "2016-11-07T10:20:00" + }, + "NotSupportedByDesign": "java, javascript, python", + "Results": [ + { + "Text": "tomorrow from 7:00 to 8:00pm", + "Start": 23, + "End": 50, + "TypeName": "datetimeV2.datetimerange", + "Resolution": { + "values": [ + { + "timex": "(2016-11-08T19:00,2016-11-08T20:00,PT1H)", + "type": "datetimerange", + "start": "2016-11-08 19:00:00", + "end": "2016-11-08 20:00:00" + } + ] + } + } + ] + }, + { + "Input": "help me book a meeting between now and eight o'clock", + "Context": { + "ReferenceDateTime": "2016-11-07T10:20:00" + }, + "NotSupportedByDesign": "java, javascript, python", + "Results": [ + { + "Text": "between now and eight o'clock", + "Start": 23, + "End": 51, + "TypeName": "datetimeV2.datetimerange", + "Resolution": { + "values": [ + { + "timex": "(PRESENT_REF,2016-11-07T20,PT9H40M)", + "type": "datetimerange", + "start": "2016-11-07 10:20:00", + "end": "2016-11-07 20:00:00" + } + ] + } + } + ] + }, + { + "Input": "How many calls between eight o'clock and now?", + "Context": { + "ReferenceDateTime": "2016-11-07T10:20:00" + }, + "NotSupportedByDesign": "java, javascript, python", + "Results": [ + { + "Text": "between eight o'clock and now", + "Start": 15, + "End": 43, + "TypeName": "datetimeV2.datetimerange", + "Resolution": { + "values": [ + { + "timex": "(2016-11-07T08,PRESENT_REF,PT2H20M)", + "type": "datetimerange", + "start": "2016-11-07 08:00:00", + "end": "2016-11-07 10:20:00" + } + ] + } + } + ] + }, + { + "Input": "I'll be away from 2pm till tomorrow 4:30pm", + "Comment": "It should be resolved as 'from 2pm today till tomorrow 4:30pm'", + "Context": { + "ReferenceDateTime": "2016-11-07T10:20:00" + }, + "NotSupported": "dotnet, java, javascript, python", + "Results": [ + { + "Text": "from 2pm till tomorrow 4:30pm", + "Start": 13, + "End": 41, + "TypeName": "datetimeV2.datetimerange", + "Resolution": { + "values": [ + { + "timex": "(2016-11-07T14,2016-11-08T16:30,PT26H30M)", + "type": "datetimerange", + "start": "2016-11-07 14:00:00", + "end": "2016-11-08 14:30:00" + } + ] + } + } + ] + }, + { + "Input": "I'll be away between 2 and 4:30pm tomorrow", + "Context": { + "ReferenceDateTime": "2016-11-07T10:20:00" + }, + "NotSupportedByDesign": "java, javascript, python", + "Results": [ + { + "Text": "between 2 and 4:30pm tomorrow", + "Start": 13, + "End": 41, + "TypeName": "datetimeV2.datetimerange", + "Resolution": { + "values": [ + { + "timex": "(2016-11-08T14,2016-11-08T16:30,PT2H30M)", + "type": "datetimerange", + "start": "2016-11-08 14:00:00", + "end": "2016-11-08 16:30:00" + } + ] + } + } + ] + }, + { + "Input": "I'll be away between 2:00 tomorrow and 4:00", + "Context": { + "ReferenceDateTime": "2016-11-07T10:20:00" + }, + "NotSupportedByDesign": "java, javascript, python", + "Results": [ + { + "Text": "between 2:00 tomorrow and 4:00", + "Start": 13, + "End": 42, + "TypeName": "datetimeV2.datetimerange", + "Resolution": { + "values": [ + { + "timex": "(2016-11-08T02:00,2016-11-08T04:00,PT2H)", + "type": "datetimerange", + "start": "2016-11-08 02:00:00", + "end": "2016-11-08 04:00:00" + }, + { + "timex": "(2016-11-08T14:00,2016-11-08T16:00,PT2H)", + "type": "datetimerange", + "start": "2016-11-08 14:00:00", + "end": "2016-11-08 16:00:00" + } + ] + } + } + ] } ] diff --git a/Specs/DateTime/English/DateTimeModelCalendarMode.json b/Specs/DateTime/English/DateTimeModelCalendarMode.json index 9656927500..16465b8d30 100644 --- a/Specs/DateTime/English/DateTimeModelCalendarMode.json +++ b/Specs/DateTime/English/DateTimeModelCalendarMode.json @@ -1320,5 +1320,35 @@ } } ] + }, + { + "Input": "I'll be out November 19-20", + "Context": { + "ReferenceDateTime": "2016-11-07T00:00:00" + }, + "Results": [ + { + "Text": "november 19-20", + "Start": 12, + "End": 25, + "TypeName": "datetimeV2.daterange", + "Resolution": { + "values": [ + { + "timex": "(XXXX-11-19,XXXX-11-20,P1D)", + "type": "daterange", + "start": "2015-11-19", + "end": "2015-11-20" + }, + { + "timex": "(XXXX-11-19,XXXX-11-20,P1D)", + "type": "daterange", + "start": "2016-11-19", + "end": "2016-11-20" + } + ] + } + } + ] } ] diff --git a/Specs/DateTime/English/DateTimeModelComplexCalendar.json b/Specs/DateTime/English/DateTimeModelComplexCalendar.json index 64c7885866..e35a9ae5fa 100644 --- a/Specs/DateTime/English/DateTimeModelComplexCalendar.json +++ b/Specs/DateTime/English/DateTimeModelComplexCalendar.json @@ -13570,5 +13570,35 @@ } } ] + }, + { + "Input": "I'll be out November 19-20", + "Context": { + "ReferenceDateTime": "2016-11-07T00:00:00" + }, + "Results": [ + { + "Text": "november 19-20", + "Start": 12, + "End": 25, + "TypeName": "datetimeV2.daterange", + "Resolution": { + "values": [ + { + "timex": "(XXXX-11-19,XXXX-11-20,P1D)", + "type": "daterange", + "start": "2015-11-19", + "end": "2015-11-20" + }, + { + "timex": "(XXXX-11-19,XXXX-11-20,P1D)", + "type": "daterange", + "start": "2016-11-19", + "end": "2016-11-20" + } + ] + } + } + ] } ] diff --git a/Specs/DateTime/English/DateTimeModelExperimentalMode.json b/Specs/DateTime/English/DateTimeModelExperimentalMode.json index 6c413863e4..c04df16715 100644 --- a/Specs/DateTime/English/DateTimeModelExperimentalMode.json +++ b/Specs/DateTime/English/DateTimeModelExperimentalMode.json @@ -7961,5 +7961,35 @@ } } ] + }, + { + "Input": "I'll be out November 19-20", + "Context": { + "ReferenceDateTime": "2016-11-07T00:00:00" + }, + "Results": [ + { + "Text": "november 19-20", + "Start": 12, + "End": 25, + "TypeName": "datetimeV2.daterange", + "Resolution": { + "values": [ + { + "timex": "(XXXX-11-19,XXXX-11-20,P1D)", + "type": "daterange", + "start": "2015-11-19", + "end": "2015-11-20" + }, + { + "timex": "(XXXX-11-19,XXXX-11-20,P1D)", + "type": "daterange", + "start": "2016-11-19", + "end": "2016-11-20" + } + ] + } + } + ] } ] \ No newline at end of file diff --git a/Specs/DateTime/English/DateTimeModelExtendedTypes.json b/Specs/DateTime/English/DateTimeModelExtendedTypes.json index 8b5e982aab..4fb1f0261b 100644 --- a/Specs/DateTime/English/DateTimeModelExtendedTypes.json +++ b/Specs/DateTime/English/DateTimeModelExtendedTypes.json @@ -1976,5 +1976,35 @@ } } ] + }, + { + "Input": "I'll be out November 19-20", + "Context": { + "ReferenceDateTime": "2016-11-07T00:00:00" + }, + "Results": [ + { + "Text": "november 19-20", + "Start": 12, + "End": 25, + "TypeName": "datetimeV2.daterange", + "Resolution": { + "values": [ + { + "timex": "(XXXX-11-19,XXXX-11-20,P1D)", + "type": "daterange", + "start": "2015-11-19", + "end": "2015-11-20" + }, + { + "timex": "(XXXX-11-19,XXXX-11-20,P1D)", + "type": "daterange", + "start": "2016-11-19", + "end": "2016-11-20" + } + ] + } + } + ] } ] \ No newline at end of file diff --git a/Specs/DateTime/English/DateTimeModelSplitDateAndTime.json b/Specs/DateTime/English/DateTimeModelSplitDateAndTime.json index d537b110e4..0456d3fe6b 100644 --- a/Specs/DateTime/English/DateTimeModelSplitDateAndTime.json +++ b/Specs/DateTime/English/DateTimeModelSplitDateAndTime.json @@ -1242,5 +1242,35 @@ } } ] + }, + { + "Input": "I'll be out November 19-20", + "Context": { + "ReferenceDateTime": "2016-11-07T00:00:00" + }, + "Results": [ + { + "Text": "november 19-20", + "Start": 12, + "End": 25, + "TypeName": "datetimeV2.daterange", + "Resolution": { + "values": [ + { + "timex": "(XXXX-11-19,XXXX-11-20,P1D)", + "type": "daterange", + "start": "2015-11-19", + "end": "2015-11-20" + }, + { + "timex": "(XXXX-11-19,XXXX-11-20,P1D)", + "type": "daterange", + "start": "2016-11-19", + "end": "2016-11-20" + } + ] + } + } + ] } ] \ No newline at end of file diff --git a/Specs/DateTime/English/DateTimeModelTasksMode.json b/Specs/DateTime/English/DateTimeModelTasksMode.json new file mode 100644 index 0000000000..6ba1fcee46 --- /dev/null +++ b/Specs/DateTime/English/DateTimeModelTasksMode.json @@ -0,0 +1,92 @@ +[ + { + "Input": "How about year 2016 and greater", + "Comment": "Taskmode will only supress 4 digit number from getting be categorised as year, this eg have year mentioned along with 4 digit number", + "Context": { + "ReferenceDateTime": "2018-08-31T12:00:00" + }, + "NotSupported": "javascript, python, java", + "Results": [ + { + "Text": "year 2016 and greater", + "Start": 10, + "End": 30, + "TypeName": "datetimeV2.daterange", + "Resolution": { + "values": [ + { + "Mod": "since", + "sourceEntity": "datetimerange", + "start": "2016-01-01", + "timex": "2016", + "type": "daterange" + } + ] + } + } + ] + }, + { + "Input": "I'll go back 04th Jan 2019.", + "Context": { + "ReferenceDateTime": "2016-11-07T00:00:00" + }, + "Results": [ + { + "Text": "04th jan 2019", + "Start": 13, + "End": 25, + "TypeName": "datetimeV2.date", + "Resolution": { + "values": [ + { + "timex": "2019-01-04", + "type": "date", + "value": "2019-01-04" + } + ] + } + } + ] + }, + { + "Input": "Show sales in two thousand and twenty", + "Context": { + "ReferenceDateTime": "2018-06-26T00:00:00" + }, + "NotSupported": "javascript, python, java", + "Results": [] + }, + { + "Input": "The range is from 2015 and 2016", + "Context": { + "ReferenceDateTime": "2016-11-07T00:00:00" + }, + "NotSupported": "javascript, python, java", + "Results": [] + }, + { + "Input": "deliver 2000 pieces of toys.", + "Context": { + "ReferenceDateTime": "2016-11-07T00:00:00" + }, + "NotSupported": "javascript, python, java", + "Results": [] + }, + { + "Input": "Show sales of 2007 and 2009", + "Context": { + "ReferenceDateTime": "2018-06-26T00:00:00" + }, + "NotSupported": "javascript, python, java", + "Results": [] + }, + { + "Input": "show me sales before 2010 or after 2018", + "Context": { + "ReferenceDateTime": "2018-05-31T00:00:00" + }, + "NotSupported": "javascript, python, java", + "Results": [] + } +] diff --git a/Specs/DateTime/English/DateTimePeriodParser.json b/Specs/DateTime/English/DateTimePeriodParser.json index 0f04142e7d..a44b6916e9 100644 --- a/Specs/DateTime/English/DateTimePeriodParser.json +++ b/Specs/DateTime/English/DateTimePeriodParser.json @@ -237,7 +237,7 @@ "Text": "from 2:00pm, 2016-2-21 to 3:32, 04/23/2016", "Type": "datetimerange", "Value": { - "Timex": "(2016-02-21T14:00,2016-04-23T03:32,PT1478H)", + "Timex": "(2016-02-21T14:00,2016-04-23T03:32,PT1477H32M)", "FutureResolution": { "startDateTime": "2016-02-21 14:00:00", "endDateTime": "2016-04-23 03:32:00" diff --git a/Specs/DateTime/French/DateTimeModel.json b/Specs/DateTime/French/DateTimeModel.json index 3f80394231..a450fa98d0 100644 --- a/Specs/DateTime/French/DateTimeModel.json +++ b/Specs/DateTime/French/DateTimeModel.json @@ -1,4 +1,4 @@ -[ +[ { "Input": "Je vais sortir cette décembre", "Context": { @@ -21556,5 +21556,36 @@ } } ] + }, + { + "Input": "je serai de sortie novembre 19-20", + "Context": { + "ReferenceDateTime": "2016-11-07T00:00:00" + }, + "NotSupportedByDesign": "javascript, python, java", + "Results": [ + { + "Text": "novembre 19-20", + "Start": 19, + "End": 32, + "TypeName": "datetimeV2.daterange", + "Resolution": { + "values": [ + { + "timex": "(XXXX-11-19,XXXX-11-20,P1D)", + "type": "daterange", + "start": "2015-11-19", + "end": "2015-11-20" + }, + { + "timex": "(XXXX-11-19,XXXX-11-20,P1D)", + "type": "daterange", + "start": "2016-11-19", + "end": "2016-11-20" + } + ] + } + } + ] } ] diff --git a/Specs/DateTime/German/DateTimeModel.json b/Specs/DateTime/German/DateTimeModel.json index dbc6e8fc96..fd33139393 100644 --- a/Specs/DateTime/German/DateTimeModel.json +++ b/Specs/DateTime/German/DateTimeModel.json @@ -1,4 +1,4 @@ -[ +[ { "Input": "Ich komme am 2. Oktober zurück.", "Context": { @@ -5234,5 +5234,36 @@ } } ] + }, + { + "Input": "Ich bin November 19-20 unterwegs", + "Context": { + "ReferenceDateTime": "2016-11-07T00:00:00" + }, + "NotSupportedByDesign": "javascript, python, java", + "Results": [ + { + "Text": "november 19-20", + "Start": 8, + "End": 21, + "TypeName": "datetimeV2.daterange", + "Resolution": { + "values": [ + { + "timex": "(XXXX-11-19,XXXX-11-20,P1D)", + "type": "daterange", + "start": "2015-11-19", + "end": "2015-11-20" + }, + { + "timex": "(XXXX-11-19,XXXX-11-20,P1D)", + "type": "daterange", + "start": "2016-11-19", + "end": "2016-11-20" + } + ] + } + } + ] } ] \ No newline at end of file diff --git a/Specs/DateTime/Hindi/DateTimePeriodParser.json b/Specs/DateTime/Hindi/DateTimePeriodParser.json index 86c064182a..76607af244 100644 --- a/Specs/DateTime/Hindi/DateTimePeriodParser.json +++ b/Specs/DateTime/Hindi/DateTimePeriodParser.json @@ -296,7 +296,7 @@ "Text": "दोपहर 2:00 बजे, 2016-2-21 से 3:32, 04/23/2016", "Type": "datetimerange", "Value": { - "Timex": "(2016-02-21T14:00,2016-04-23T03:32,PT1478H)", + "Timex": "(2016-02-21T14:00,2016-04-23T03:32,PT1477H32M)", "FutureResolution": { "startDateTime": "2016-02-21 14:00:00", "endDateTime": "2016-04-23 03:32:00" @@ -1426,7 +1426,7 @@ "Text": "2:00 PM, 2016-2-21 से 3:32, 04/23/2016", "Type": "datetimerange", "Value": { - "Timex": "(2016-02-21T14:00,2016-04-23T03:32,PT1478H)", + "Timex": "(2016-02-21T14:00,2016-04-23T03:32,PT1477H32M)", "FutureResolution": { "startDateTime": "2016-02-21 14:00:00", "endDateTime": "2016-04-23 03:32:00" diff --git a/Specs/DateTime/Italian/DateTimeModel.json b/Specs/DateTime/Italian/DateTimeModel.json index b8678958fa..ef93fd6edd 100644 --- a/Specs/DateTime/Italian/DateTimeModel.json +++ b/Specs/DateTime/Italian/DateTimeModel.json @@ -3077,5 +3077,36 @@ } } ] + }, + { + "Input": "Sarò via novembre 19-20", + "Context": { + "ReferenceDateTime": "2016-11-07T00:00:00" + }, + "NotSupportedByDesign": "javascript, python, java", + "Results": [ + { + "Text": "novembre 19-20", + "Start": 9, + "End": 22, + "TypeName": "datetimeV2.daterange", + "Resolution": { + "values": [ + { + "timex": "(XXXX-11-19,XXXX-11-20,P1D)", + "type": "daterange", + "start": "2015-11-19", + "end": "2015-11-20" + }, + { + "timex": "(XXXX-11-19,XXXX-11-20,P1D)", + "type": "daterange", + "start": "2016-11-19", + "end": "2016-11-20" + } + ] + } + } + ] } ] diff --git a/Specs/DateTime/Italian/DateTimePeriodParser.json b/Specs/DateTime/Italian/DateTimePeriodParser.json index 386f6fb00a..e5e3f72b71 100644 --- a/Specs/DateTime/Italian/DateTimePeriodParser.json +++ b/Specs/DateTime/Italian/DateTimePeriodParser.json @@ -244,7 +244,7 @@ "Text": "dalle 2:00pm del 2016-2-21 alle 3:32 del 04/23/2016", "Type": "datetimerange", "Value": { - "Timex": "(2016-02-21T14:00,2016-04-23T03:32,PT1478H)", + "Timex": "(2016-02-21T14:00,2016-04-23T03:32,PT1477H32M)", "FutureResolution": { "startDateTime": "2016-02-21 14:00:00", "endDateTime": "2016-04-23 03:32:00" diff --git a/Specs/DateTime/Japanese/DateParser.json b/Specs/DateTime/Japanese/DateParser.json index 6f45e3cf9b..26df36d082 100644 --- a/Specs/DateTime/Japanese/DateParser.json +++ b/Specs/DateTime/Japanese/DateParser.json @@ -2045,14 +2045,14 @@ ] }, { - "Input": "私は今から2週間以内に戻ります", + "Input": "私は今から2週間でに戻ります", "Context": { "ReferenceDateTime": "2016-11-07T00:00:00" }, "NotSupportedByDesign": "javascript, python, java", "Results": [ { - "Text": "今から2週間以内", + "Text": "今から2週間で", "Type": "date", "Value": { "Timex": "2016-11-21", @@ -2064,7 +2064,7 @@ } }, "Start": 2, - "Length": 8 + "Length": 7 } ] }, diff --git a/Specs/DateTime/Japanese/DatePeriodParser.json b/Specs/DateTime/Japanese/DatePeriodParser.json index 505a319efd..7e9454beec 100644 --- a/Specs/DateTime/Japanese/DatePeriodParser.json +++ b/Specs/DateTime/Japanese/DatePeriodParser.json @@ -6347,5 +6347,31 @@ "Length": 9 } ] + }, + { + "Input": "私は今から2週間以内に戻ります", + "Context": { + "ReferenceDateTime": "2018-05-29T00:00:00" + }, + "NotSupportedByDesign": "javascript,python,java", + "Results": [ + { + "Text": "2週間以内に", + "Type": "daterange", + "Value": { + "Timex": "(2018-05-29,2018-06-12,P2W)", + "FutureResolution": { + "startDate": "2018-05-29", + "endDate": "2018-06-12" + }, + "PastResolution": { + "startDate": "2018-05-29", + "endDate": "2018-06-12" + } + }, + "Start": 5, + "Length": 6 + } + ] } ] \ No newline at end of file diff --git a/Specs/DateTime/Japanese/DateTimeParser.json b/Specs/DateTime/Japanese/DateTimeParser.json index 47d3549524..8cf875da76 100644 --- a/Specs/DateTime/Japanese/DateTimeParser.json +++ b/Specs/DateTime/Japanese/DateTimeParser.json @@ -1967,5 +1967,125 @@ "Length": 5 } ] + }, + { + "Input": "私は2016年12月22日25時に戻ります", + "Context": { + "ReferenceDateTime": "2016-11-07T00:00:00" + }, + "NotSupportedByDesign": "javascript,python,java", + "Results": [ + { + "Text": "2016年12月22日25時", + "Type": "datetime", + "Value": { + "Timex": "2016-12-22T25", + "FutureResolution": { + "dateTime": "2016-12-23 01:00:00" + }, + "PastResolution": { + "dateTime": "2016-12-23 01:00:00" + } + }, + "Start": 2, + "Length": 14 + } + ] + }, + { + "Input": "私は2016年12月22日26時に戻ります", + "Context": { + "ReferenceDateTime": "2016-11-07T00:00:00" + }, + "NotSupportedByDesign": "javascript,python,java", + "Results": [ + { + "Text": "2016年12月22日26時", + "Type": "datetime", + "Value": { + "Timex": "2016-12-22T26", + "FutureResolution": { + "dateTime": "2016-12-23 02:00:00" + }, + "PastResolution": { + "dateTime": "2016-12-23 02:00:00" + } + }, + "Start": 2, + "Length": 14 + } + ] + }, + { + "Input": "私は15日の25時に戻ります", + "Context": { + "ReferenceDateTime": "2016-11-07T00:00:00" + }, + "NotSupportedByDesign": "javascript,python,java", + "Results": [ + { + "Text": "15日の25時", + "Type": "datetime", + "Value": { + "Timex": "XXXX-XX-15T25", + "FutureResolution": { + "dateTime": "2016-11-16 01:00:00" + }, + "PastResolution": { + "dateTime": "2016-10-16 01:00:00" + } + }, + "Start": 2, + "Length": 7 + } + ] + }, + { + "Input": "私は30日の28時に戻ります", + "Context": { + "ReferenceDateTime": "2016-11-07T00:00:00" + }, + "NotSupportedByDesign": "javascript,python,java", + "Results": [ + { + "Text": "30日の28時", + "Type": "datetime", + "Value": { + "Timex": "XXXX-XX-30T28", + "FutureResolution": { + "dateTime": "2016-12-01 04:00:00" + }, + "PastResolution": { + "dateTime": "2016-10-31 04:00:00" + } + }, + "Start": 2, + "Length": 7 + } + ] + }, + { + "Input": "私は30日の28時45分に戻ります", + "Context": { + "ReferenceDateTime": "2016-11-07T00:00:00" + }, + "NotSupportedByDesign": "javascript,python,java", + "Results": [ + { + "Text": "30日の28時45分", + "Type": "datetime", + "Value": { + "Timex": "XXXX-XX-30T28:45", + "FutureResolution": { + "dateTime": "2016-12-01 04:45:00" + }, + "PastResolution": { + "dateTime": "2016-10-31 04:45:00" + } + }, + "Start": 2, + "Length": 10 + } + ] } ] \ No newline at end of file diff --git a/Specs/DateTime/Japanese/DateTimePeriodParser.json b/Specs/DateTime/Japanese/DateTimePeriodParser.json index d598ca0dae..82934f09e8 100644 --- a/Specs/DateTime/Japanese/DateTimePeriodParser.json +++ b/Specs/DateTime/Japanese/DateTimePeriodParser.json @@ -218,7 +218,7 @@ "Text": "今日の午後4時から明日の午後5時まで", "Type": "datetimerange", "Value": { - "Timex": "(2016-11-07T16:00:00,2016-11-08T17:00:00,PT25H)", + "Timex": "(2016-11-07T16,2016-11-08T17,PT25H)", "FutureResolution": { "startDateTime": "2016-11-07 16:00:00", "endDateTime": "2016-11-08 17:00:00" @@ -244,7 +244,7 @@ "Text": "2016年2月21日の午後2時から2016年4月23日3時32分まで", "Type": "datetimerange", "Value": { - "Timex": "(2016-02-21T14:00:00,2016-04-23T03:32:00,PT1478H)", + "Timex": "(2016-02-21T14,2016-04-23T03:32,PT1477H32M)", "FutureResolution": { "startDateTime": "2016-02-21 14:00:00", "endDateTime": "2016-04-23 03:32:00" @@ -296,7 +296,7 @@ "Text": "2016年1月1日の午後4時から今日の午後5時まで", "Type": "datetimerange", "Value": { - "Timex": "(2016-01-01T16:00:00,2016-11-07T17:00:00,PT7465H)", + "Timex": "(2016-01-01T16,2016-11-07T17,PT7465H)", "FutureResolution": { "startDateTime": "2016-01-01 16:00:00", "endDateTime": "2016-11-07 17:00:00" @@ -1050,7 +1050,7 @@ "Text": "2016年2月21日午後2時から2016年4月23日3時32分まで", "Type": "datetimerange", "Value": { - "Timex": "(2016-02-21T14:00:00,2016-04-23T03:32:00,PT1478H)", + "Timex": "(2016-02-21T14,2016-04-23T03:32,PT1477H32M)", "FutureResolution": { "startDateTime": "2016-02-21 14:00:00", "endDateTime": "2016-04-23 03:32:00" @@ -1778,7 +1778,7 @@ "Text": "2時-明日の4時", "Type": "datetimerange", "Value": { - "Timex": "(2016-11-07T02:00:00,2016-11-08T04:00:00,PT26H)", + "Timex": "(2016-11-07T02,2016-11-08T04,PT26H)", "FutureResolution": { "startDateTime": "2016-11-07 02:00:00", "endDateTime": "2016-11-08 04:00:00" @@ -1856,7 +1856,7 @@ "Text": "今夜7時から7時30分まで", "Type": "datetimerange", "Value": { - "Timex": "(2016-11-07T19:00:00,2016-11-07T19:30:00,PT0H)", + "Timex": "(2016-11-07T19,2016-11-07T19:30,PT30M)", "FutureResolution": { "startDateTime": "2016-11-07 19:00:00", "endDateTime": "2016-11-07 19:30:00" @@ -1960,7 +1960,7 @@ "Text": "今夜8時から9時まで", "Type": "datetimerange", "Value": { - "Timex": "(2016-11-07T20:00:00,2016-11-07T21:00:00,PT1H)", + "Timex": "(2016-11-07T20,2016-11-07T21,PT1H)", "FutureResolution": { "startDateTime": "2016-11-07 20:00:00", "endDateTime": "2016-11-07 21:00:00" @@ -2038,7 +2038,7 @@ "Text": "今から8時まで", "Type": "datetimerange", "Value": { - "Timex": "(2016-11-07T16:12:00,2016-11-07T20:00:00,PT4H)", + "Timex": "(PRESENT_REF,2016-11-07T20,PT3H48M)", "FutureResolution": { "startDateTime": "2016-11-07 16:12:00", "endDateTime": "2016-11-07 20:00:00" @@ -2312,7 +2312,7 @@ "Text": "昨日の午後2時から明日4時まで", "Type": "datetimerange", "Value": { - "Timex": "(2016-11-06T14:00:00,2016-11-08T04:00:00,PT38H)", + "Timex": "(2016-11-06T14,2016-11-08T04,PT38H)", "FutureResolution": { "startDateTime": "2016-11-06 14:00:00", "endDateTime": "2016-11-08 04:00:00" @@ -2456,5 +2456,135 @@ "Length": 6 } ] + }, + { + "Input": "2016年1月1日の5時から25時まで不在にします。", + "Context": { + "ReferenceDateTime": "2016-11-07T16:12:00" + }, + "NotSupportedByDesign": "javascript,python,java", + "Results": [ + { + "Text": "2016年1月1日の5時から25時まで", + "Type": "datetimerange", + "Value": { + "Timex": "(2016-01-01T05,2016-01-01T25,PT20H)", + "FutureResolution": { + "startDateTime": "2016-01-01 05:00:00", + "endDateTime": "2016-01-02 01:00:00" + }, + "PastResolution": { + "startDateTime": "2016-01-01 05:00:00", + "endDateTime": "2016-01-02 01:00:00" + } + }, + "Start": 0, + "Length": 19 + } + ] + }, + { + "Input": "2016年1月31日の5時から25時まで不在にします。", + "Context": { + "ReferenceDateTime": "2016-11-07T16:12:00" + }, + "NotSupportedByDesign": "javascript,python,java", + "Results": [ + { + "Text": "2016年1月31日の5時から25時まで", + "Type": "datetimerange", + "Value": { + "Timex": "(2016-01-31T05,2016-01-31T25,PT20H)", + "FutureResolution": { + "startDateTime": "2016-01-31 05:00:00", + "endDateTime": "2016-02-01 01:00:00" + }, + "PastResolution": { + "startDateTime": "2016-01-31 05:00:00", + "endDateTime": "2016-02-01 01:00:00" + } + }, + "Start": 0, + "Length": 20 + } + ] + }, + { + "Input": "2016年1月1日の25時から29時まで不在にします。", + "Context": { + "ReferenceDateTime": "2016-11-07T16:12:00" + }, + "NotSupportedByDesign": "javascript,python,java", + "Results": [ + { + "Text": "2016年1月1日の25時から29時まで", + "Type": "datetimerange", + "Value": { + "Timex": "(2016-01-01T25,2016-01-01T29,PT4H)", + "FutureResolution": { + "startDateTime": "2016-01-02 01:00:00", + "endDateTime": "2016-01-02 05:00:00" + }, + "PastResolution": { + "startDateTime": "2016-01-02 01:00:00", + "endDateTime": "2016-01-02 05:00:00" + } + }, + "Start": 0, + "Length": 20 + } + ] + }, + { + "Input": "月曜日の8時から29時に戻ってきます。", + "Context": { + "ReferenceDateTime": "2018-04-19T08:12:00" + }, + "NotSupportedByDesign": "javascript,python,java", + "Results": [ + { + "Text": "月曜日の8時から29時", + "Type": "datetimerange", + "Value": { + "Timex": "(XXXX-WXX-1T08,XXXX-WXX-1T29,PT21H)", + "FutureResolution": { + "startDateTime": "2018-04-23 08:00:00", + "endDateTime": "2018-04-24 05:00:00" + }, + "PastResolution": { + "startDateTime": "2018-04-16 08:00:00", + "endDateTime": "2018-04-17 05:00:00" + } + }, + "Start": 0, + "Length": 11 + } + ] + }, + { + "Input": "月曜日の26時30分から29時に戻ってきます。", + "Context": { + "ReferenceDateTime": "2018-04-19T08:12:00" + }, + "NotSupportedByDesign": "javascript,python,java", + "Results": [ + { + "Text": "月曜日の26時30分から29時", + "Type": "datetimerange", + "Value": { + "Timex": "(XXXX-WXX-1T26:30,XXXX-WXX-1T29,PT2H30M)", + "FutureResolution": { + "startDateTime": "2018-04-24 02:30:00", + "endDateTime": "2018-04-24 05:00:00" + }, + "PastResolution": { + "startDateTime": "2018-04-17 02:30:00", + "endDateTime": "2018-04-17 05:00:00" + } + }, + "Start": 0, + "Length": 15 + } + ] } ] \ No newline at end of file diff --git a/Specs/DateTime/Japanese/MergedExtractor.json b/Specs/DateTime/Japanese/MergedExtractor.json index f5074d3023..c2f631032f 100644 --- a/Specs/DateTime/Japanese/MergedExtractor.json +++ b/Specs/DateTime/Japanese/MergedExtractor.json @@ -1,12 +1,11 @@ [ { "Input": "これは2日です。", - "NotSupported": "dotnet", "NotSupportedByDesign": "javascript,python,java", "Results": [ { "Text": "2日", - "Type": "duration", + "Type": "date", "Start": 3, "Length": 2 } @@ -14,7 +13,6 @@ }, { "Input": "これは午後4時前です。", - "NotSupported": "dotnet", "NotSupportedByDesign": "javascript,python,java", "Results": [ { @@ -27,7 +25,6 @@ }, { "Input": "これは明日午後4時前です。", - "NotSupported": "dotnet", "NotSupportedByDesign": "javascript,python,java", "Results": [ { @@ -40,7 +37,6 @@ }, { "Input": "これは午後4時以降です。", - "NotSupported": "dotnet", "NotSupportedByDesign": "javascript,python,java", "Results": [ { @@ -53,7 +49,6 @@ }, { "Input": "これは明日午後4時以降です。", - "NotSupported": "dotnet", "NotSupportedByDesign": "javascript,python,java", "Results": [ { @@ -66,7 +61,6 @@ }, { "Input": "5分で戻ります。", - "NotSupported": "dotnet", "NotSupportedByDesign": "javascript,python,java", "Results": [ { @@ -79,7 +73,6 @@ }, { "Input": "この1週間", - "NotSupported": "dotnet", "NotSupportedByDesign": "javascript,python,java", "Results": [ { @@ -92,20 +85,18 @@ }, { "Input": "10時間後に会議の予定を入れて。", - "NotSupported": "dotnet", "NotSupportedByDesign": "javascript,python,java", "Results": [ { - "Text": "10時間後に", + "Text": "10時間後", "Type": "datetime", "Start": 0, - "Length": 6 + "Length": 5 } ] }, { "Input": "この日はどうですか。", - "NotSupported": "dotnet", "NotSupportedByDesign": "javascript,python,java", "Results": [ { @@ -118,7 +109,6 @@ }, { "Input": "この週はどうですか。", - "NotSupported": "dotnet", "NotSupportedByDesign": "javascript,python,java", "Results": [ { @@ -131,7 +121,6 @@ }, { "Input": "私の一週間はどんなかんじですか。", - "NotSupported": "dotnet", "NotSupportedByDesign": "javascript,python,java", "Results": [ { @@ -144,7 +133,6 @@ }, { "Input": "その週はどんなかんじですか。", - "NotSupported": "dotnet", "NotSupportedByDesign": "javascript,python,java", "Results": [ { @@ -157,7 +145,6 @@ }, { "Input": "私の一日はどんなかんじですか。", - "NotSupported": "dotnet", "NotSupportedByDesign": "javascript,python,java", "Results": [ { @@ -170,7 +157,6 @@ }, { "Input": "その日はどんなかんじですか。", - "NotSupported": "dotnet", "NotSupportedByDesign": "javascript,python,java", "Results": [ { @@ -183,7 +169,6 @@ }, { "Input": "午前9時から11時まで会議の予定を入れて。", - "NotSupported": "dotnet", "NotSupportedByDesign": "javascript,python,java", "Results": [ { @@ -196,7 +181,6 @@ }, { "Input": "明日の午前9時から11時まで会議の予定を入れて。", - "NotSupported": "dotnet", "NotSupportedByDesign": "javascript,python,java", "Results": [ { @@ -209,7 +193,6 @@ }, { "Input": "7月22日のベルビューでの会議を8月22日に変更。", - "NotSupported": "dotnet", "NotSupportedByDesign": "javascript,python,java", "Results": [ { @@ -228,7 +211,6 @@ }, { "Input": "7月2日以降", - "NotSupported": "dotnet", "NotSupportedByDesign": "javascript,python,java", "Results": [ { @@ -241,7 +223,6 @@ }, { "Input": "7月2日から", - "NotSupported": "dotnet", "NotSupportedByDesign": "javascript,python,java", "Results": [ { @@ -254,7 +235,6 @@ }, { "Input": "7月2日以前", - "NotSupported": "dotnet", "NotSupportedByDesign": "javascript,python,java", "Results": [ { @@ -267,7 +247,6 @@ }, { "Input": "6月6日 12時15分", - "NotSupported": "dotnet", "NotSupportedByDesign": "javascript,python,java", "Results": [ { @@ -280,7 +259,6 @@ }, { "Input": "2012年6月6日 15時15分", - "NotSupported": "dotnet", "NotSupportedByDesign": "javascript,python,java", "Results": [ { @@ -293,7 +271,6 @@ }, { "Input": "5月29日", - "NotSupported": "dotnet", "NotSupportedByDesign": "javascript,python,java", "Results": [ { @@ -306,7 +283,6 @@ }, { "Input": "3月29日", - "NotSupported": "dotnet", "NotSupportedByDesign": "javascript,python,java", "Results": [ { @@ -319,7 +295,6 @@ }, { "Input": "私は3月に生まれました。", - "NotSupported": "dotnet", "NotSupportedByDesign": "javascript,python,java", "Results": [ { @@ -332,7 +307,6 @@ }, { "Input": "5月に何が起きたのですか。", - "NotSupported": "dotnet", "NotSupportedByDesign": "javascript,python,java", "Results": [ { @@ -345,13 +319,11 @@ }, { "Input": "notapplicable", - "NotSupported": "dotnet", "NotSupportedByDesign": "javascript,python,java", "Results": [] }, { "Input": "午後3時の予定を4時に変更します。", - "NotSupported": "dotnet", "NotSupportedByDesign": "javascript,python,java", "Results": [ { @@ -370,7 +342,6 @@ }, { "Input": "午前10時の予定を11時に変更します。", - "NotSupported": "dotnet", "NotSupportedByDesign": "javascript,python,java", "Results": [ { @@ -389,7 +360,6 @@ }, { "Input": "午前10時の予定を20時に変更します。", - "NotSupported": "dotnet", "NotSupportedByDesign": "javascript,python,java", "Results": [ { @@ -408,7 +378,6 @@ }, { "Input": "午前10時の予定を13時に変更します。", - "NotSupported": "dotnet", "NotSupportedByDesign": "javascript,python,java", "Results": [ { @@ -427,7 +396,6 @@ }, { "Input": "午前10時の予定を0時に変更します。", - "NotSupported": "dotnet", "NotSupportedByDesign": "javascript,python,java", "Results": [ { @@ -446,7 +414,6 @@ }, { "Input": "午前10時の予定を24時に変更します。", - "NotSupported": "dotnet", "NotSupportedByDesign": "javascript,python,java", "Results": [ { @@ -465,7 +432,6 @@ }, { "Input": "午前10時の予定を4時に変更しますが、どうでしょうか。", - "NotSupported": "dotnet", "NotSupportedByDesign": "javascript,python,java", "Results": [ { @@ -483,8 +449,7 @@ ] }, { - "Input": "午前10時の予定を4.3時に変更します。", - "NotSupported": "dotnet", + "Input": "午前10時の予定を4.3に変更します。", "NotSupportedByDesign": "javascript,python,java", "Results": [ { @@ -497,7 +462,6 @@ }, { "Input": "午前10時の予定を26時に変更します。", - "NotSupported": "dotnet", "NotSupportedByDesign": "javascript,python,java", "Results": [ { @@ -505,12 +469,17 @@ "Type": "time", "Start": 0, "Length": 5 + }, + { + "Text": "26時", + "Type": "time", + "Start": 9, + "Length": 3 } ] }, { "Input": "午前10時の予定を4時以降に変更します。", - "NotSupported": "dotnet", "NotSupportedByDesign": "javascript,python,java", "Results": [ { @@ -518,12 +487,17 @@ "Type": "time", "Start": 0, "Length": 5 + }, + { + "Text": "4時以降", + "Type": "time", + "Start": 9, + "Length": 4 } ] }, { "Input": "午前10時の予定を25時に変更します。", - "NotSupported": "dotnet", "NotSupportedByDesign": "javascript,python,java", "Results": [ { @@ -531,12 +505,17 @@ "Type": "time", "Start": 0, "Length": 5 + }, + { + "Text": "25時", + "Type": "time", + "Start": 9, + "Length": 3 } ] }, { "Input": "次の会議は2017年3月16日に開かれますが、今日の午後2時に話し合うのはいかがでしょうか。", - "NotSupported": "dotnet", "NotSupportedByDesign": "javascript,python,java", "Results": [ { @@ -555,7 +534,6 @@ }, { "Input": "2018年4月1日、今日の午後2時に計画できます。", - "NotSupported": "dotnet", "NotSupportedByDesign": "javascript,python,java", "Results": [ { @@ -574,7 +552,6 @@ }, { "Input": "範囲は2012年以前です。", - "NotSupported": "dotnet", "NotSupportedByDesign": "javascript,python,java", "Results": [ { @@ -587,7 +564,6 @@ }, { "Input": "範囲は2012年までです。", - "NotSupported": "dotnet", "NotSupportedByDesign": "javascript,python,java", "Results": [ { @@ -600,7 +576,6 @@ }, { "Input": "範囲は2012年以降です。", - "NotSupported": "dotnet", "NotSupportedByDesign": "javascript,python,java", "Results": [ { @@ -613,7 +588,6 @@ }, { "Input": "2016年11月は不在にします。", - "NotSupported": "dotnet", "NotSupportedByDesign": "javascript,python,java", "Results": [ { @@ -626,7 +600,6 @@ }, { "Input": "2016年11月", - "NotSupported": "dotnet", "NotSupportedByDesign": "javascript,python,java", "Results": [ { @@ -639,7 +612,6 @@ }, { "Input": "彼は2016年1月1日またはそれ以降に到着する予定です", - "NotSupported": "dotnet", "NotSupportedByDesign": "javascript,python,java", "Results": [ { @@ -652,7 +624,6 @@ }, { "Input": "彼は2016年1月1日またはそれ以前に出発します。", - "NotSupported": "dotnet", "NotSupportedByDesign": "javascript,python,java", "Results": [ { @@ -665,7 +636,6 @@ }, { "Input": "この仕事は2016年1月1日またはそれ以前に完了します。", - "NotSupported": "dotnet", "NotSupportedByDesign": "javascript,python,java", "Results": [ { @@ -678,7 +648,6 @@ }, { "Input": "この仕事は2018年2月またはそれ以前に完了します。", - "NotSupported": "dotnet", "NotSupportedByDesign": "javascript,python,java", "Results": [ { @@ -691,7 +660,6 @@ }, { "Input": "あなたは2016年またはそれ以前には出発できません。", - "NotSupported": "dotnet", "NotSupportedByDesign": "javascript,python,java", "Results": [ { @@ -704,7 +672,6 @@ }, { "Input": "今日の午後6時30分以降に退社できます。", - "NotSupported": "dotnet", "NotSupportedByDesign": "javascript,python,java", "Results": [ { @@ -717,7 +684,6 @@ }, { "Input": "あなたは明後日またはその前に出発する必要があります。", - "NotSupported": "dotnet", "NotSupportedByDesign": "javascript,python,java", "Results": [ { @@ -730,7 +696,6 @@ }, { "Input": "あなたは2018年5月15日の午後3時またはそれ以前に出発する必要があります。", - "NotSupported": "dotnet", "NotSupportedByDesign": "javascript,python,java", "Results": [ { @@ -743,7 +708,6 @@ }, { "Input": "今日から2日後のご都合はいかがでしょうか。", - "NotSupported": "dotnet", "NotSupportedByDesign": "javascript,python,java", "Results": [ { @@ -756,7 +720,6 @@ }, { "Input": "明日から3週間のご都合はいかがでしょうか。", - "NotSupported": "dotnet", "NotSupportedByDesign": "javascript,python,java", "Results": [ { @@ -769,7 +732,6 @@ }, { "Input": "昨日の2日前、あなたはどこにいましたか。", - "NotSupported": "dotnet", "NotSupportedByDesign": "javascript,python,java", "Results": [ { @@ -782,20 +744,18 @@ }, { "Input": "今日から2週間以上前にもうすべての仕事を終えた。", - "NotSupported": "dotnet", "NotSupportedByDesign": "javascript,python,java", "Results": [ { - "Text": "今日から2週間以上", + "Text": "今日から2週間以上前", "Type": "daterange", "Start": 0, - "Length": 9 + "Length": 10 } ] }, { "Input": "今日から2週間以内に戻ってきます。", - "NotSupported": "dotnet", "NotSupportedByDesign": "javascript,python,java", "Results": [ { @@ -808,7 +768,6 @@ }, { "Input": "この仕事は昨日の2日以上前には終わらせておくべきだった。", - "NotSupported": "dotnet", "NotSupportedByDesign": "javascript,python,java", "Results": [ { @@ -821,7 +780,6 @@ }, { "Input": "この仕事は明日から3日以内に終わらせます。", - "NotSupported": "dotnet", "NotSupportedByDesign": "javascript,python,java", "Results": [ { @@ -834,7 +792,6 @@ }, { "Input": "今から3分後に始めましょう。", - "NotSupported": "dotnet", "NotSupportedByDesign": "javascript,python,java", "Results": [ { @@ -847,26 +804,25 @@ }, { "Input": "今日から3分始めましょう。", - "NotSupported": "dotnet", "NotSupportedByDesign": "javascript,python,java", "Results": [ - { - "Text": "3分", - "Type": "duration", - "Start": 4, - "Length": 2 - }, { "Text": "今日", "Type": "date", "Start": 0, "Length": 2 + }, + { + "Text": "3分", + "Type": "duration", + "Start": 4, + "Length": 2 } ] }, { "Input": "今日夜十時頃以降、マイクロソフトビルの入り口で会いましょう", - "NotSupported": "dotnet, javascript, python, java", + "NotSupported": "javascript, python, java", "Results": [ { "Text": "今日夜十時頃以降", @@ -878,7 +834,7 @@ }, { "Input": "今日十時頃以降、マイクロソフトビルの入り口で会いましょう", - "NotSupported": "dotnet, javascript, python, java", + "NotSupported": "javascript, python, java", "Results": [ { "Text": "今日十時頃以降", @@ -887,5 +843,29 @@ "Length": 7 } ] + }, + { + "Input": "これは2日間です。", + "NotSupportedByDesign": "javascript,python,java", + "Results": [ + { + "Text": "2日間", + "Type": "duration", + "Start": 3, + "Length": 3 + } + ] + }, + { + "Input": "午前10時の予定を4以降に変更します。", + "NotSupportedByDesign": "javascript,python,java", + "Results": [ + { + "Text": "午前10時", + "Type": "time", + "Start": 0, + "Length": 5 + } + ] } ] \ No newline at end of file diff --git a/Specs/DateTime/Japanese/MergedParser.json b/Specs/DateTime/Japanese/MergedParser.json index d667674233..65ee094ae2 100644 --- a/Specs/DateTime/Japanese/MergedParser.json +++ b/Specs/DateTime/Japanese/MergedParser.json @@ -4,7 +4,7 @@ "Context": { "ReferenceDateTime": "2020-01-05T00:00:00" }, - "NotSupported": "dotnet, javascript, python, java", + "NotSupported": "javascript, python, java", "Results": [ { "Text": "今日夜十時ぐらい以降", @@ -30,7 +30,7 @@ "Context": { "ReferenceDateTime": "2020-01-05T00:00:00" }, - "NotSupported": "dotnet, javascript, python, java", + "NotSupported": "javascript, python, java", "Results": [ { "Text": "十時ぐらい以降", @@ -63,7 +63,7 @@ "Context": { "ReferenceDateTime": "2020-01-05T00:00:00" }, - "NotSupported": "dotnet, javascript, python, java", + "NotSupported": "javascript, python, java", "Results": [ { "Text": "夜十時ぐらい以降", @@ -83,5 +83,107 @@ "Length": 8 } ] + }, + { + "Input": "午前10時の予定を25時に変更します。", + "Context": { + "ReferenceDateTime": "2020-01-05T00:00:00" + }, + "NotSupported": "javascript, python, java", + "Results": [ + { + "Text": "午前10時", + "Type": "datetimeV2.time", + "Value": { + "values": [ + { + "timex": "T10", + "type": "time", + "value": "10:00:00" + } + ] + }, + "Start": 0, + "Length": 5 + }, + { + "Text": "25時", + "Type": "datetimeV2.time", + "Value": { + "values": [ + { + "timex": "T25", + "type": "time", + "value": "01:00:00" + } + ] + }, + "Start": 9, + "Length": 3 + } + ] + }, + { + "Input": "午前10時の予定を28時に変更します。", + "Context": { + "ReferenceDateTime": "2020-01-05T00:00:00" + }, + "NotSupported": "javascript, python, java", + "Results": [ + { + "Text": "午前10時", + "Type": "datetimeV2.time", + "Value": { + "values": [ + { + "timex": "T10", + "type": "time", + "value": "10:00:00" + } + ] + }, + "Start": 0, + "Length": 5 + }, + { + "Text": "28時", + "Type": "datetimeV2.time", + "Value": { + "values": [ + { + "timex": "T28", + "type": "time", + "value": "04:00:00" + } + ] + }, + "Start": 9, + "Length": 3 + } + ] + }, + { + "Input": "午前10時の予定を31時に変更します。", + "Context": { + "ReferenceDateTime": "2020-01-05T00:00:00" + }, + "NotSupported": "javascript, python, java", + "Results": [ + { + "Text": "午前10時", + "Type": "datetimeV2.time", + "Value": { + "values": [ + { + "timex": "T10", + "type": "time", + "value": "10:00:00" + } + ] + }, + "Start": 0, + "Length": 5 + } + ] } ] \ No newline at end of file diff --git a/Specs/DateTime/Japanese/TimeParser.json b/Specs/DateTime/Japanese/TimeParser.json index 727c4de697..d45af48a90 100644 --- a/Specs/DateTime/Japanese/TimeParser.json +++ b/Specs/DateTime/Japanese/TimeParser.json @@ -1636,5 +1636,68 @@ "Length": 4 } ] + }, + { + "Input": "アラームを25時40分にセットして。", + "NotSupportedByDesign": "javascript,python,java", + "Results": [ + { + "Text": "25時40分", + "Type": "time", + "Value": { + "Timex": "T25:40", + "FutureResolution": { + "time": "01:40:00" + }, + "PastResolution": { + "time": "01:40:00" + } + }, + "Start": 5, + "Length": 6 + } + ] + }, + { + "Input": "アラームを26時45分にセットして。", + "NotSupportedByDesign": "javascript,python,java", + "Results": [ + { + "Text": "26時45分", + "Type": "time", + "Value": { + "Timex": "T26:45", + "FutureResolution": { + "time": "02:45:00" + }, + "PastResolution": { + "time": "02:45:00" + } + }, + "Start": 5, + "Length": 6 + } + ] + }, + { + "Input": "私は8:00:24pmに戻ります", + "NotSupportedByDesign": "javascript,python,java", + "Results": [ + { + "Text": "8:00:24pm", + "Type": "time", + "Value": { + "Timex": "T20:00:24", + "FutureResolution": { + "time": "20:00:24" + }, + "PastResolution": { + "time": "20:00:24" + } + }, + "Start": 2, + "Length": 9 + } + ] } ] \ No newline at end of file diff --git a/Specs/DateTime/Japanese/TimePeriodParser.json b/Specs/DateTime/Japanese/TimePeriodParser.json index ac791561ab..b45cc93ec6 100644 --- a/Specs/DateTime/Japanese/TimePeriodParser.json +++ b/Specs/DateTime/Japanese/TimePeriodParser.json @@ -1669,5 +1669,83 @@ "Length": 10 } ] + }, + { + "Input": "4時から25時まで。", + "Context": { + "ReferenceDateTime": "2016-11-07T16:12:00" + }, + "NotSupportedByDesign": "javascript,python,java", + "Results": [ + { + "Text": "4時から25時まで", + "Type": "timerange", + "Value": { + "Timex": "(T04,T25,PT21H)", + "FutureResolution": { + "startTime": "04:00:00", + "endTime": "01:00:00" + }, + "PastResolution": { + "startTime": "04:00:00", + "endTime": "01:00:00" + } + }, + "Start": 0, + "Length": 9 + } + ] + }, + { + "Input": "4時から26時まで。", + "Context": { + "ReferenceDateTime": "2016-11-07T16:12:00" + }, + "NotSupportedByDesign": "javascript,python,java", + "Results": [ + { + "Text": "4時から26時まで", + "Type": "timerange", + "Value": { + "Timex": "(T04,T26,PT22H)", + "FutureResolution": { + "startTime": "04:00:00", + "endTime": "02:00:00" + }, + "PastResolution": { + "startTime": "04:00:00", + "endTime": "02:00:00" + } + }, + "Start": 0, + "Length": 9 + } + ] + }, + { + "Input": "26時から30時まで。", + "Context": { + "ReferenceDateTime": "2016-11-07T16:12:00" + }, + "NotSupportedByDesign": "javascript,python,java", + "Results": [ + { + "Text": "26時から30時まで", + "Type": "timerange", + "Value": { + "Timex": "(T26,T30,PT4H)", + "FutureResolution": { + "startTime": "02:00:00", + "endTime": "06:00:00" + }, + "PastResolution": { + "startTime": "02:00:00", + "endTime": "06:00:00" + } + }, + "Start": 0, + "Length": 10 + } + ] } ] \ No newline at end of file diff --git a/Specs/DateTime/Portuguese/DateTimeModel.json b/Specs/DateTime/Portuguese/DateTimeModel.json index 2d944e4427..efe9e7b187 100644 --- a/Specs/DateTime/Portuguese/DateTimeModel.json +++ b/Specs/DateTime/Portuguese/DateTimeModel.json @@ -1,4 +1,4 @@ -[ +[ { "Input": "dom, amanhã, hoje, 2018", "Context": { @@ -4135,5 +4135,36 @@ } } ] + }, + { + "Input": "Estarei fora novembro 19-20", + "Context": { + "ReferenceDateTime": "2016-11-07T00:00:00" + }, + "NotSupportedByDesign": "javascript, python, java", + "Results": [ + { + "Text": "novembro 19-20", + "Start": 13, + "End": 26, + "TypeName": "datetimeV2.daterange", + "Resolution": { + "values": [ + { + "timex": "(XXXX-11-19,XXXX-11-20,P1D)", + "type": "daterange", + "start": "2015-11-19", + "end": "2015-11-20" + }, + { + "timex": "(XXXX-11-19,XXXX-11-20,P1D)", + "type": "daterange", + "start": "2016-11-19", + "end": "2016-11-20" + } + ] + } + } + ] } ] \ No newline at end of file diff --git a/Specs/DateTime/Portuguese/DateTimePeriodParser.json b/Specs/DateTime/Portuguese/DateTimePeriodParser.json index b43eb39b0b..7e7fc66a51 100644 --- a/Specs/DateTime/Portuguese/DateTimePeriodParser.json +++ b/Specs/DateTime/Portuguese/DateTimePeriodParser.json @@ -218,7 +218,7 @@ "Text": "de 2:00pm, 2016-2-21 a 3:32, 04/23/2016", "Type": "datetimerange", "Value": { - "Timex": "(2016-02-21T14:00,2016-04-23T03:32,PT1478H)", + "Timex": "(2016-02-21T14:00,2016-04-23T03:32,PT1477H32M)", "FutureResolution": { "startDateTime": "2016-02-21 14:00:00", "endDateTime": "2016-04-23 03:32:00" diff --git a/Specs/DateTime/Spanish/DateTimeModel.json b/Specs/DateTime/Spanish/DateTimeModel.json index 41a1561e06..4281c28e5c 100644 --- a/Specs/DateTime/Spanish/DateTimeModel.json +++ b/Specs/DateTime/Spanish/DateTimeModel.json @@ -1,4 +1,4 @@ -[ +[ { "Input": "domingo hoy 2018", "Context": { @@ -22692,5 +22692,36 @@ } } ] + }, + { + "Input": "estaré fuera noviembre 19-20", + "Context": { + "ReferenceDateTime": "2016-11-07T00:00:00" + }, + "NotSupportedByDesign": "javascript, python, java", + "Results": [ + { + "Text": "noviembre 19-20", + "Start": 13, + "End": 27, + "TypeName": "datetimeV2.daterange", + "Resolution": { + "values": [ + { + "timex": "(XXXX-11-19,XXXX-11-20,P1D)", + "type": "daterange", + "start": "2015-11-19", + "end": "2015-11-20" + }, + { + "timex": "(XXXX-11-19,XXXX-11-20,P1D)", + "type": "daterange", + "start": "2016-11-19", + "end": "2016-11-20" + } + ] + } + } + ] } ] \ No newline at end of file diff --git a/Specs/DateTime/Spanish/DateTimePeriodParser.json b/Specs/DateTime/Spanish/DateTimePeriodParser.json index 208fb0d4f1..2e23ff7c8a 100644 --- a/Specs/DateTime/Spanish/DateTimePeriodParser.json +++ b/Specs/DateTime/Spanish/DateTimePeriodParser.json @@ -238,7 +238,7 @@ "Text": "de 2:00pm, 2016-2-21 a 3:32, 04/23/2016", "Type": "datetimerange", "Value": { - "Timex": "(2016-02-21T14:00,2016-04-23T03:32,PT1478H)", + "Timex": "(2016-02-21T14:00,2016-04-23T03:32,PT1477H32M)", "FutureResolution": { "startDateTime": "2016-02-21 14:00:00", "endDateTime": "2016-04-23 03:32:00" @@ -952,7 +952,7 @@ "Text": "desde las 2 p. m. del 21 de feb de 2016 hasta el 4-23-16 a las 3:32", "Type": "datetimerange", "Value": { - "Timex": "(2016-02-21T14:00,2016-04-23T03:32,PT1478H)", + "Timex": "(2016-02-21T14:00,2016-04-23T03:32,PT1477H32M)", "FutureResolution": { "startDateTime": "2016-02-21 14:00:00", "endDateTime": "2016-04-23 03:32:00" @@ -1978,7 +1978,7 @@ "Text": "desde las 14 h de 21-2-2016 hasta el 23.4.16 a las 3:32", "Type": "datetimerange", "Value": { - "Timex": "(2016-02-21T14:00,2016-04-23T03:32,PT1478H)", + "Timex": "(2016-02-21T14:00,2016-04-23T03:32,PT1477H32M)", "FutureResolution": { "startDateTime": "2016-02-21 14:00:00", "endDateTime": "2016-04-23 03:32:00" diff --git a/Specs/DateTime/Turkish/DateTimePeriodParser.json b/Specs/DateTime/Turkish/DateTimePeriodParser.json index 4430b8018a..ee25ff66d0 100644 --- a/Specs/DateTime/Turkish/DateTimePeriodParser.json +++ b/Specs/DateTime/Turkish/DateTimePeriodParser.json @@ -218,7 +218,7 @@ "Text": "21-2-2016, 14:00'ten 23/04/2016 saat 3:32'ye kadar", "Type": "datetimerange", "Value": { - "Timex": "(2016-02-21T14:00,2016-04-23T03:32,PT1478H)", + "Timex": "(2016-02-21T14:00,2016-04-23T03:32,PT1477H32M)", "FutureResolution": { "startDateTime": "2016-02-21 14:00:00", "endDateTime": "2016-04-23 03:32:00" @@ -1164,7 +1164,7 @@ "Text": "21/2/2016 saat 14:00'ten 23/04/2016 saat 3:32'ye kadar", "Type": "datetimerange", "Value": { - "Timex": "(2016-02-21T14:00,2016-04-23T03:32,PT1478H)", + "Timex": "(2016-02-21T14:00,2016-04-23T03:32,PT1477H32M)", "FutureResolution": { "startDateTime": "2016-02-21 14:00:00", "endDateTime": "2016-04-23 03:32:00" diff --git a/Specs/NumberWithUnit/English/CurrencyModel.json b/Specs/NumberWithUnit/English/CurrencyModel.json index b858adda4c..aba9f3a7e0 100644 --- a/Specs/NumberWithUnit/English/CurrencyModel.json +++ b/Specs/NumberWithUnit/English/CurrencyModel.json @@ -2895,5 +2895,90 @@ } } ] + }, + { + "Input": "five more losses in excess of 30 MUSD were reported", + "NotSupported": "java, javascript, python", + "Results": [ + { + "Text": "30 musd", + "Start": 30, + "End": 36, + "TypeName": "currency", + "Resolution": { + "isoCurrency": "USD", + "unit": "United States dollar", + "value": "30000000" + } + } + ] + }, + { + "Input": "The client estimates an increase to 120 MEUR", + "NotSupported": "java, javascript, python", + "Results": [ + { + "Text": "120 meur", + "Start": 36, + "End": 43, + "TypeName": "currency", + "Resolution": { + "isoCurrency": "EUR", + "unit": "Euro", + "value": "120000000" + } + } + ] + }, + { + "Input": "Refers to 48.5 MGBP purchase price already paid", + "NotSupported": "java, javascript, python", + "Results": [ + { + "Text": "48.5 mgbp", + "Start": 10, + "End": 18, + "TypeName": "currency", + "Resolution": { + "isoCurrency": "GBP", + "unit": "British pound", + "value": "48500000" + } + } + ] + }, + { + "Input": "The company has annual revenues of about MAUD 60", + "NotSupported": "java, javascript, python", + "Results": [ + { + "Text": "maud 60", + "Start": 41, + "End": 47, + "TypeName": "currency", + "Resolution": { + "isoCurrency": "AUD", + "unit": "Australian dollar", + "value": "60000000" + } + } + ] + }, + { + "Input": "In 1993, 25 grants were provided, totalling twenty MUSD", + "NotSupported": "java, javascript, python", + "Results": [ + { + "Text": "twenty musd", + "Start": 44, + "End": 54, + "TypeName": "currency", + "Resolution": { + "isoCurrency": "USD", + "unit": "United States dollar", + "value": "20000000" + } + } + ] } ] diff --git a/Specs/NumberWithUnit/Japanese/CurrencyModel.json b/Specs/NumberWithUnit/Japanese/CurrencyModel.json index b4d06cd316..0956089ccc 100644 --- a/Specs/NumberWithUnit/Japanese/CurrencyModel.json +++ b/Specs/NumberWithUnit/Japanese/CurrencyModel.json @@ -1037,5 +1037,141 @@ "End": 23 } ] + }, + { + "Input": "これは 7万円です", + "NotSupported": "javascript, python, java", + "Results": [ + { + "Text": "7万円", + "TypeName": "currency", + "Resolution": { + "isoCurrency": "JPY", + "value": "70000", + "unit": "Japanese yen" + }, + "Start": 4, + "End": 6 + } + ] + }, + { + "Input": "これは7万円です", + "NotSupported": "javascript, python, java", + "Results": [ + { + "Text": "7万円", + "TypeName": "currency", + "Resolution": { + "isoCurrency": "JPY", + "value": "70000", + "unit": "Japanese yen" + }, + "Start": 3, + "End": 5 + } + ] + }, + { + "Input": "これは 7万円です", + "NotSupported": "javascript, python, java", + "Results": [ + { + "Text": "7万円", + "TypeName": "currency", + "Resolution": { + "isoCurrency": "JPY", + "value": "70000", + "unit": "Japanese yen" + }, + "Start": 8, + "End": 10 + } + ] + }, + { + "Input": "これは 7 万 円 です", + "NotSupported": "javascript, python, java", + "Results": [ + { + "Text": "7 万 円", + "TypeName": "currency", + "Resolution": { + "isoCurrency": "JPY", + "value": "70000", + "unit": "Japanese yen" + }, + "Start": 4, + "End": 10 + } + ] + }, + { + "Input": "これは 700円です", + "NotSupported": "javascript, python, java", + "Results": [ + { + "Text": "700円", + "TypeName": "currency", + "Resolution": { + "isoCurrency": "JPY", + "value": "700", + "unit": "Japanese yen" + }, + "Start": 4, + "End": 7 + } + ] + }, + { + "Input": "これは 七万円です", + "NotSupported": "javascript, python, java", + "Results": [ + { + "Text": "七万円", + "TypeName": "currency", + "Resolution": { + "isoCurrency": "JPY", + "value": "70000", + "unit": "Japanese yen" + }, + "Start": 4, + "End": 6 + } + ] + }, + { + "Input": "これは 7百円です", + "NotSupported": "javascript, python, java", + "Results": [ + { + "Text": "7百円", + "TypeName": "currency", + "Resolution": { + "isoCurrency": "JPY", + "value": "700", + "unit": "Japanese yen" + }, + "Start": 4, + "End": 6 + } + ] + }, + { + "Input": "これは 7千円です", + "NotSupported": "javascript, python, java", + "Results": [ + { + "Text": "7千円", + "TypeName": "currency", + "Resolution": { + "isoCurrency": "JPY", + "value": "7000", + "unit": "Japanese yen" + }, + "Start": 4, + "End": 6 + } + ] } ] \ No newline at end of file diff --git a/Specs/NumberWithUnit/Spanish/CurrencyModel.json b/Specs/NumberWithUnit/Spanish/CurrencyModel.json index 0a09cb6847..07eb9f045c 100644 --- a/Specs/NumberWithUnit/Spanish/CurrencyModel.json +++ b/Specs/NumberWithUnit/Spanish/CurrencyModel.json @@ -1731,5 +1731,188 @@ "End": 5 } ] + }, + { + "Input": "Perdieron 75 USD millones en los últimos tres años.", + "NotSupported": "java, javascript, python", + "Results": [ + { + "Text": "75 usd millones", + "TypeName": "currency", + "Resolution": { + "value": "75000000", + "unit": "Dólar estadounidense", + "isoCurrency": "USD" + }, + "Start": 10, + "End": 24 + } + ] + }, + { + "Input": "Los precios del acuerdo comienzan en dólar 22,5 millones.", + "NotSupported": "java, javascript, python", + "Results": [ + { + "Text": "dólar 22,5 millones", + "TypeName": "currency", + "Resolution": { + "value": "22500000", + "unit": "Dólar" + }, + "Start": 37, + "End": 55 + } + ] + }, + { + "Input": "Los precios del acuerdo comienzan en dólar 22500000.", + "NotSupported": "java, javascript, python", + "Results": [ + { + "Text": "dólar 22500000", + "TypeName": "currency", + "Resolution": { + "value": "22500000", + "unit": "Dólar" + }, + "Start": 37, + "End": 50 + } + ] + }, + { + "Input": "Por 15 dolares 50 puedes almorzar y cenar", + "NotSupported": "java, javascript, python", + "Results": [ + { + "Text": "15 dolares 50", + "TypeName": "currency", + "Resolution": { + "value": "15,5", + "unit": "Dólar" + }, + "Start": 4, + "End": 16 + } + ] + }, + { + "Input": "Por 15 USD 50 puedes almorzar y cenar", + "NotSupported": "java, javascript, python", + "Results": [ + { + "Text": "15 usd 50", + "TypeName": "currency", + "Resolution": { + "value": "15,5", + "unit": "Dólar estadounidense", + "isoCurrency": "USD" + }, + "Start": 4, + "End": 12 + } + ] + }, + { + "Input": "Pagaron 75 USD millones, pero el precio en realidad fue de 50 millones USD. Y si tuvieran otros 15 USD 50, habrían pagado 80 USD millones.", + "NotSupported": "java, javascript, python", + "Results": [ + { + "Text": "75 usd millones", + "TypeName": "currency", + "Resolution": { + "value": "75000000", + "unit": "Dólar estadounidense", + "isoCurrency": "USD" + }, + "Start": 8, + "End": 22 + }, + { + "Text": "50 millones usd", + "TypeName": "currency", + "Resolution": { + "value": "50000000", + "unit": "Dólar estadounidense", + "isoCurrency": "USD" + }, + "Start": 59, + "End": 73 + }, + { + "Text": "15 usd 50", + "TypeName": "currency", + "Resolution": { + "value": "15,5", + "unit": "Dólar estadounidense", + "isoCurrency": "USD" + }, + "Start": 96, + "End": 104 + }, + { + "Text": "80 usd millones", + "TypeName": "currency", + "Resolution": { + "value": "80000000", + "unit": "Dólar estadounidense", + "isoCurrency": "USD" + }, + "Start": 122, + "End": 136 + } + ] + }, + { + "Input": "El cheque equivalía a 125 USD millones", + "NotSupported": "java, javascript, python", + "Results": [ + { + "Text": "125 usd millones", + "TypeName": "currency", + "Resolution": { + "value": "125000000", + "unit": "Dólar estadounidense", + "isoCurrency": "USD" + }, + "Start": 22, + "End": 37 + } + ] + }, + { + "Input": "Obtén 2 meses por MXN$ 20", + "NotSupported": "java, javascript, python", + "Results": [ + { + "Text": "mxn$ 20", + "TypeName": "currency", + "Resolution": { + "value": "20", + "unit": "Peso mexicano", + "isoCurrency": "MXN" + }, + "Start": 18, + "End": 24 + } + ] + }, + { + "Input": "Obtén 2 meses por 20 MXN$", + "NotSupported": "java, javascript, python", + "Results": [ + { + "Text": "20 mxn$", + "TypeName": "currency", + "Resolution": { + "value": "20", + "unit": "Peso mexicano", + "isoCurrency": "MXN" + }, + "Start": 18, + "End": 24 + } + ] } ] \ No newline at end of file