From 8c899a79ae4964ab6f35725ff1264a15cdb013eb Mon Sep 17 00:00:00 2001 From: aitelint <53436544+aitelint@users.noreply.github.com> Date: Mon, 16 May 2022 06:21:56 +0300 Subject: [PATCH] [JA DateTimeV2] Merged refinements (#2950) * Merged refinements * Fixed specs formatting * Minor renamed variables according to review Co-authored-by: aitelint --- .../Japanese/DateTimeDefinitions.cs | 47 ++++--- .../ChineseDurationExtractorConfiguration.cs | 2 + .../Constants.cs | 2 + .../CJK/BaseCJKDurationExtractor.cs | 26 +++- .../CJK/BaseCJKMergedDateTimeExtractor.cs | 11 ++ .../CJK/ICJKDurationExtractorConfiguration.cs | 2 + .../JapaneseDurationExtractorConfiguration.cs | 6 +- .../KoreanDurationExtractorConfiguration.cs | 2 + .../Parsers/CJK/BaseCJKDateParser.cs | 3 +- .../Parsers/CJK/BaseCJKDateTimeParser.cs | 9 +- .../CJK/BaseCJKDateTimePeriodParser.cs | 20 ++- .../Utilities/TimeFunctions.cs | 23 ++- .../Utilities/TimePeriodFunctions.cs | 27 +++- .../Utilities/TimexUtility.cs | 24 ++++ Patterns/Japanese/Japanese-DateTime.yaml | 59 ++++---- Specs/DateTime/Japanese/DateParser.json | 6 +- Specs/DateTime/Japanese/DatePeriodParser.json | 26 ++++ Specs/DateTime/Japanese/DateTimeParser.json | 120 ++++++++++++++++ .../Japanese/DateTimePeriodParser.json | 130 +++++++++++++++++ Specs/DateTime/Japanese/MergedExtractor.json | 132 ++++++++---------- Specs/DateTime/Japanese/MergedParser.json | 108 +++++++++++++- Specs/DateTime/Japanese/TimeParser.json | 63 +++++++++ Specs/DateTime/Japanese/TimePeriodParser.json | 78 +++++++++++ 23 files changed, 784 insertions(+), 142 deletions(-) diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/Japanese/DateTimeDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/Japanese/DateTimeDefinitions.cs index 887e8d940f..976510139c 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/Japanese/DateTimeDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/Japanese/DateTimeDefinitions.cs @@ -25,9 +25,10 @@ public static class DateTimeDefinitions public const string MonthRegex = @"(?(正|一|二|三|四|五|六|七|八|九|十|十一|十二|0?[1-9]|1[0-2])\s*(か月(?!で)|月間?))"; public const string MonthRegexForPeriod = @"(?正月|一月|二月|三月|四月|五月|六月|七月|八月|九月|十月|十一月|十二月|(0?[1-9]|1[0-2])か?月)(?=\b|t|まで|から)?"; public const string MonthNumRegexForPeriod = @"(?0?[1-9]|1[0-2])(?=\b|t|まで|から)?"; - public const string DayRegex = @"(?[0-2]?[1-9]|[1-3]0|31)([日目]間?)?"; - public const string DayRegexForPeriod = @"(?3[01]|[0-2]?\d|(三十一?|(一|二)?十?[一二三四五六七八九]))(\s*日目?)?(?=\b|t|まで|から)?"; - public const string DateDayRegexInCJK = @"(?初一|((二十二|二十三|二十四|二十五|二十六|二十七|二十八|二十九|二十一|三十一|十二|十三|十四|十五|十六|十七|十八|十一|十|二十|廿(?!日市市)|三十|一|二|三|四|五|六|七|八|九)|3[01]|[0-2]?\d)(\s*日|号))目?"; + public const string DayRegex = @"(?[0-2]?[1-9]|[1-3]0|31)((日|目)(?!かかる|待つ|泊まる|経つ)間?)?"; + public const string DayRegexForPeriod = @"(?3[01]|[0-2]?\d|(三十一?|(一|二)?十?[一二三四五六七八九]))((\s*日(?!かかる|待つ|泊まる|経つ))目?)?(?=\b|t|まで|から)?"; + public const string DayNumberRegex = @"(二十二|二十三|二十四|二十五|二十六|二十七|二十八|二十九|二十一|三十一|十二|十三|十四|十五|十六|十七|十八|十一|十|二十|廿(?!日市市)|三十|一|二|三|四|五|六|七|八|九)"; + public static readonly string DateDayRegexInCJK = $@"(?初一|({DayNumberRegex}|3[01]|[0-2]?\d)(\s*日|号)(?!かかる|待つ|泊まる|経つ))目?"; public const string DayRegexNumInCJK = @"(?一|十一|二十一|三十一|三十|二|三|四|五|六|七|八|九|十二|十三|十四|十五|十六|十七|十八|十九|二十二|二十三|二十四|二十五|二十六|二十七|二十八|二十九|一|十一|十|二十一|二十|廿(?!日市市)|三十一|二|三|四|五|六|七|八|九|十二|十三|十四|十五|十六|十七|十八|十九|二十二|二十三|二十四|二十五|二十六|二十七|二十八|二十九|十|二十|廿|卅)"; public const string MonthNumRegex = @"(?0?[1-9]|1[0-2])"; public const string TwoNumYear = @"50"; @@ -52,7 +53,7 @@ public static class DateTimeDefinitions public const string SpecialMonthRegex = @"(先月|来月|今月|前月|再来月|昨月|先々月|ぜんげつ|(せん)?せんげつ|さくげつ|らいげつ|こんげつ)"; public const string SpecialYearRegex = @"(ことし|さ?らいねん|きょねん|さくねん)"; public const string SpecialDayRegex = @"((いっ)?さくじつ|おとつい|最近|前天|后天|明日から二日((?今日)から(?1日半)(の間)?)|((?今日)から(?2日半)(の間)?)|昨日の2日前|昨日から4日|今日から二日|今日から4日|昨日から2日間|昨天|明天|今天|今日|明日|一?昨?昨日|一昨日|大后天|大前天|後天|大後天|きょう|あす|あした|きのう|明々後日|(弥)?明後日|この日|前日|二日前|おととい|し?あさって|私の一日|この間|次の日|その日|最後の日)"; - public const string SpecialDayWithNumRegex = @"((いっ)?さくじつ|おとつい|最近|前天|后天|昨天|明天|今天|今日?|明日|一?昨?昨日|一昨日|大后天|大前天|後天|大後天|きょう|あす|あした|きのう|明々後日|(弥)?明後日|この日|前日|二日前|おととい|し?あさって|私の一日|この間|次の日|その日)(から|の)?([\d十一二三四五六七八九]*|数)(日|月|週|個)間?(先|後|前|以内)?(の(?日曜日?|月曜日?|火曜日?|水曜日?|木曜日?|金曜日?|土曜日?))?"; + public const string SpecialDayWithNumRegex = @"((いっ)?さくじつ|おとつい|最近|前天|后天|昨天|明天|今天|今日?|明日|一?昨?昨日|一昨日|大后天|大前天|後天|大後天|きょう|あす|あした|きのう|明々後日|(弥)?明後日|この日|前日|二日前|おととい|し?あさって|私の一日|この間|次の日|その日)(から|の)?([\d十一二三四五六七八九]*|数)(日|月|週(間で)?|個)間?(先|後|前)?(の(?日曜日?|月曜日?|火曜日?|水曜日?|木曜日?|金曜日?|土曜日?))?"; public static readonly string WeekDayOfMonthRegex = $@"((({SpecialMonthRegex}|{MonthRegex}|{MonthNumRegex}|((这个|这一个|这|这一|本|今|上个|上一个|上|上一|去|下个|下一个|下|下一|明)月))(的|の)?\s*)?(第|最)?(?([初一二三四五])|最後|最終|([1-5])|最后一)(个|の|\s)*{WeekDayRegex})"; public static readonly string WeekDayAndDayRegex = $@"({DayRegexForPeriod}(の|的)?(\s|,)*{WeekDayRegex})"; public const string ThisPrefixRegex = @"这个|这一个|这|这一|本|今|こ"; @@ -60,13 +61,13 @@ public static class DateTimeDefinitions public const string NextPrefixRegex = @"下个|下一个|下|下一|明|次|再?来|向こう|これから(の)?|翌|向こう"; public static readonly string RelativeRegex = $@"(?({ThisPrefixRegex}|{LastPrefixRegex}|{NextPrefixRegex}))"; public static readonly string SpecialDate = $@"(?({ThisPrefixRegex}|{LastPrefixRegex}|{NextPrefixRegex})年)?(の|的)?(?({ThisPrefixRegex}|{LastPrefixRegex}|{NextPrefixRegex})(の|的)?月)?(の|的)?{DateDayRegexInCJK}"; - public const string DateUnitRegex = @"(?年|个月|月|周|時間?|(?営業)日|(?年|个月|月|周|(?営業)日|(?(二十二|二十三|二十四|二十五|二十六|二十七|二十八|二十九|二十二|二十三|二十一|十一|三十一|十二|十三|十四|十五|十六|十七|十八|十九|十|二十|三十|一|十|二|三|四|五|六|七|八|九|3[0-1]|[1-2]\d|0?[1-9])日|初一|三十|(一|十一|二十一|三十一|二|三|四|五|六|七|八|九|十二|十三|十四|十五|十六|十七|十八|十九|二十二|二十三|二十四|二十五|二十六|二十七|二十八|二十九|一|十一|十|二十一|二十|三十一|三十|二|三|四|五|六|七|八|九|十二|十三|十四|十五|十六|十七|十八|十九|二十二|二十三|二十四|二十五|二十六|二十七|二十八|二十九|十|二十|三十|3[0-1]|[1-2]\d|0?[1-9])号|一|十一|二十一|三十一|二|三|四|五|六|七|八|九|十二|十三|十四|十五|十六|十七|十八|十九|二十二|二十三|二十四|二十五|二十六|二十七|二十八|二十九|一|十一|十|二十一|二十|三十一|三十|二|三|四|五|六|七|八|九|十二|十三|十四|十五|十六|十七|十八|十九|十|二十|三十|廿(?!日市市)|卅)目?"; public const string DatePeriodThisRegex = @"今|这个|这一个|这|这一|本"; public const string DatePeriodLastRegex = @"この|上个|上一个|上|上一|前|去|最後|最終|過去|先|昨"; - public const string DatePeriodNextRegex = @"(?再来)|下个|下一个|下|下一|最初|来|向こう|これから(の)?|翌|今後|次(の)?"; + public const string DatePeriodNextRegex = @"(?再来|以降)|下个|下一个|下|下一|最初|来|向こう|これから(の)?|翌|今後|次(の)?"; public const string DateRangePrepositions = @"((こ|私の|その|この|これらの|それらの)\s*)?"; public static readonly string RelativeMonthRegex = $@"(?({DatePeriodThisRegex}|{DatePeriodLastRegex}|{DatePeriodNextRegex})\s*月)"; public const string HalfYearRegex = @"((?の?(上|前)半期)|(?の?(下|后)半期))"; @@ -115,7 +116,7 @@ public static class DateTimeDefinitions public const string MonthToMonthSuffixRequired = @"^[.]"; public static readonly string DayToDay = $@"({ThisPrefixRegex}|{LastPrefixRegex}|{NextPrefixRegex})?(({SpecialMonthRegex}|{MonthRegex})の?)?(({SpecialDayRegex}|{DayRegex}|{WeekDayRegex})から(({SpecialMonthRegex}|{MonthRegex})の?)?({ThisPrefixRegex}|{LastPrefixRegex}|{NextPrefixRegex})?((今月|来月|{MonthRegex})の?)?({SpecialDayRegex}|{DayRegex}|{WeekDayRegex})(までの間|まで|の間))|{SpecialDayRegex}"; public static readonly string FirstLastOfYearRegex = $@"(({DatePeriodYearInCJKRegex}|{YearRegex}|(?再来年|翌年|来年|今年|去年))的?)((?前)|(?(最后|最後|最終)))"; - public static readonly string ComplexDatePeriodRegex = $@"({DateRangePrepositions})(?(第{ZeroToNineIntegerRegexCJK}+|第\d+|{DatePeriodThisRegex}|{DatePeriodLastRegex}|{DatePeriodNextRegex}|(({YearNumRegex}|{DayRegexForPeriod}|{DateDayRegexInCJK}|{MonthRegex}|{MonthNumRegex})(?!((\d+)?(分|秒|時))))|{RelativeRegex}).+)(から)(?.+)(までの間|まで|の間)"; + public static readonly string ComplexDatePeriodRegex = $@"({DateRangePrepositions})(?(第{ZeroToNineIntegerRegexCJK}+|第\d+|{DatePeriodThisRegex}|{DatePeriodLastRegex}|{DatePeriodNextRegex}|(({YearNumRegex}|{DayRegexForPeriod}|{DateDayRegexInCJK}|{MonthRegex}|{MonthNumRegex})(?!((\d+)?(分|秒|時))))|{RelativeRegex}).+)(から)(?.+)(までの間|(?(この|時前|(?((?以内に)|後に|向こう|后|次の|今後|今日の午後|これからの|(?春|夏|秋|冬)(天|季)?(の)?((?半ば)|(?初め|のはじめ)|(?終わり(ごろ)?|末|下旬))?"; @@ -134,12 +135,13 @@ public static class DateTimeDefinitions public const string TomorrowRegex = @"(?明日の?(午前|午後|中|夜|朝)?)"; public const string YesterdayRegex = @"(?昨日の?(午前|午後|中|夜|朝)?)"; public const string TodayRegex = @"(?(今朝の?|今朝の午前|今晩|今晚|今早|今晨|明晚|明早|明晨|昨晚|今夜|昨夜)(的|在)?)"; + public const string FromNowRegex = @"((?今)から)"; public static readonly string SpecialDayHourRegex = $@"((?{TimeHourCJKRegex}|{TimeHourNumRegex})(時間?|(:00)))"; public static readonly string SpecialDayMinuteRegex = $@"((?{TimeMinuteCJKRegex}|{TimeMinuteNumRegex})分間?)"; public static readonly string SpecialDaySecondRegex = $@"((?{TimeSecondCJKRegex}|{TimeSecondNumRegex})秒間?)"; - public const string SpecialDayModRegex = @"((?過ぎに)|(?ごろ)|(?で|の?うちに)|(?弱|たらず)|(?以上))"; + public const string SpecialDayModRegex = @"((?過ぎに|以降)|(?ごろ)|(?で|の?うちに)|(?弱|たらず)|(?以上))"; public static readonly string SpecialDayEndOfRegex = $@"((?明日の終わり|({WeekDayRegex}の?終わり))|(?日の終わり|一日の終わり|その日の終わり))"; - public static readonly string TimeOfSpecialDayRegex = $@"(({SpecialDayEndOfRegex}|{WeekDayRegex}|{TomorrowRegex}|{YesterdayRegex}|あと|{TodayRegex})(\d日)?(と)?(({SpecialDayHourRegex}{SpecialDayMinuteRegex}?{SpecialDaySecondRegex}?)|({SpecialDayMinuteRegex}{SpecialDaySecondRegex}?)){SpecialDayModRegex}?)|(({SpecialDayHourRegex}(?で|の?うちに)))|(({SpecialDayEndOfRegex}|{TomorrowRegex}|{YesterdayRegex}|あと|{TodayRegex}){SpecialDayModRegex}?)|({WeekDayRegex}(\d日)?(と)?{SpecialDayModRegex})"; + public static readonly string TimeOfSpecialDayRegex = $@"(({SpecialDayEndOfRegex}|{WeekDayRegex}|{TomorrowRegex}|{YesterdayRegex}|あと|{TodayRegex})(\d日)?(と)?(({SpecialDayHourRegex}{SpecialDayMinuteRegex}?{SpecialDaySecondRegex}?)|({SpecialDayMinuteRegex}{SpecialDaySecondRegex}?)){SpecialDayModRegex}?)|(({SpecialDayHourRegex}(?で|の?うちに)))|(({SpecialDayEndOfRegex}|{TomorrowRegex}|{YesterdayRegex}|あと|{TodayRegex}){SpecialDayModRegex}?)|({WeekDayRegex}(\d日)?(と)?{SpecialDayModRegex})|({FromNowRegex}\d+(分|時|秒)後)"; public const string NowTimeRegex = @"(现在|今)"; public const string RecentlyTimeRegex = @"(刚刚才?|刚才)"; public const string AsapTimeRegex = @"(出来る限り早く|立刻|马上)"; @@ -166,7 +168,7 @@ public static class DateTimeDefinitions public const string PlusFourDayRegex = @"今日から4日"; public const string DurationAllRegex = @"(まる(ひと)?)"; public const string DurationHalfRegex = @"^[.]"; - public const string DurationRelativeDurationUnitRegex = @"(?数ヶ|数)|(?前|昨日)|(?後|明日)|(?もう)"; + public const string DurationRelativeDurationUnitRegex = @"(?数ヶ|数)|(?(?以内)|(?後|明日)|(?(?(?(?数(?((か|ヶ)?(時|月|日|週|年|周|週|週|秒|分|営業日|年)間?))(たらず|以上)?)"; @@ -199,7 +201,7 @@ public static class DateTimeDefinitions @"个月", @"年" }; - public static readonly string DurationUnitRegex = $@"(?{DateUnitRegex}|分|秒|時間|まる(ひと)?|もう|数|以上|たらず)"; + public static readonly string DurationUnitRegex = $@"(?年|个月|月|周|時間?|(?営業)日|天|週間?|星期|个星期|か月|(?[と]?|,)\s*$"; public const string ConnectorRegex = @"^\s*[,-]\s*$"; public static readonly string LunarHolidayRegex = $@"(({YearRegex}|{DatePeriodYearInCJKRegex}|(?明年|今年|去年|来年))(的)?)?(?除夕|春节|旧暦の正月初一|中秋(節|节)?|元宵(节|節)|端午(节|の節句)?|重(阳节|陽節))"; @@ -212,7 +214,7 @@ public static class DateTimeDefinitions public const string SetLastRegex = @"(?last|this|next)"; public const string SetEachDayRegex = @"(毎|各|毎一)(天|日)\s*$"; public const string SetEachDateUnitRegex = @"(毎)(年|月|週)\s*$"; - public const string TimeHourNumRegex = @"([0-1]?\d|2[0-4])"; + public const string TimeHourNumRegex = @"(?过半|半)"; public const string TimeQuarterRegex = @"(?[一两二三四1-4])\s*(刻钟|刻)"; public static readonly string TimeCJKTimeRegex = $@"{TimeHourRegex}({TimeQuarterRegex}|{TimeHalfRegex}|((((过|又)?{TimeMinuteRegex})({TimeSecondRegex})?)|({TimeSecondRegex})))?"; - public static readonly string TimeDigitTimeRegex = $@"(?{TimeHourNumRegex}):(?{TimeMinuteNumRegex})(:(?{TimeSecondNumRegex}))?(am|pm)?"; + public static readonly string TimeDigitTimeRegex = $@"(?{TimeHourNumRegex}):(?{TimeMinuteNumRegex})(:(?{TimeSecondNumRegex}))?({AmPmDescRegex})?"; public static readonly string TimeDayDescRegex = $@"(?(正午|夜中|午前半ば|(昼食時)|真昼)|((?<=({TimeDigitTimeRegex}|{TimeCJKTimeRegex})(の)?)(早朝(に)?|午後(に)?|晚|晩|(深)?夜(に)?|未明|午前(中)?|日中|白昼|(未|早)?朝(に)?|昼前に|昼すぎに|夕方前に|夕方に|営業時間内に|昼))|((早朝(に)?|午後(に)?|晚|晩|(深)?夜(に)?|未明|午前(中)?|日中|白昼|(未|早)?朝(に)?|昼前に|昼すぎに|夕方前に|夕方に|営業時間内に|昼)(?=(の)?({TimeDigitTimeRegex}|{TimeCJKTimeRegex}))))"; public const string TimeApproximateDescPreffixRegex = @"(ぐらい|おそらく|多分|ほとんど|まもなく|昨日の|昨日|来週の|来週|昼食時|昼食|真)"; public const string TimeApproximateDescSuffixRegex = @"(ごろに|ごろ|過ぎに|過ぎ|丁度に|丁度|きっかりに|きっかり|を過ぎた頃に|を過ぎた頃|ちょっと前に|ちょっと前|近くに|近く|昼食時|昼食|ぐらい|時かっきり|頃|かっきり|以降|まで(の間)?|の間|間で?|間以内)"; @@ -247,9 +249,9 @@ public static class DateTimeDefinitions public const string FromToRegex = @"^[.]"; public const string AmbiguousRangeModifierPrefix = @"^[.]"; public const string ReferenceDatePeriodRegex = @"(同じ|その)(?月|週末|年|週)"; - public const string ParserConfigurationBefore = @"(之前|以前|前)"; - public const string ParserConfigurationAfter = @"(之后|之後|以后|以後|后|後)"; - public const string ParserConfigurationUntil = @"(直到|直至|截至|截止(到)?)"; + public const string ParserConfigurationBefore = @"(またはその前|またはそれ以前|之前|以前|前)"; + public const string ParserConfigurationAfter = @"(またはそれ以降|之后|之後|以后|以後|后|後|以降)"; + public const string ParserConfigurationUntil = @"(まで|直到|直至|截至|截止(到)?)"; public const string ParserConfigurationSincePrefix = @"(自从|自|自打|打)"; public const string ParserConfigurationSinceSuffix = @"(以来|开始)"; public const string ParserConfigurationLastWeekDayRegex = @"最后一个"; @@ -648,12 +650,17 @@ public static class DateTimeDefinitions public const string DateTimePeriodNIRegex = @"(半夜|夜间|深夜|夜)"; public static readonly Dictionary AmbiguityFiltersDict = new Dictionary { - { @"早", @"(? AmbiguityDatePeriodFiltersDict = new Dictionary { { @"^年$", @"年" } }; + public static readonly Dictionary AmbiguityDurationFiltersDict = new Dictionary + { + { @"月に", @"月に" } + }; public static readonly Dictionary DurationUnitValueMap = new Dictionary { { @"Y", 31536000 }, @@ -682,7 +689,7 @@ public static class DateTimeDefinitions { @"キング牧師記念日", @"-01-WXX-1-3" } }; public const string MergedBeforeRegex = @"(前|之前)$"; - public const string MergedAfterRegex = @"(后|後|之后|之後)$"; + public const string MergedAfterRegex = @"(后|後|之后|之後|以降)$"; public static readonly Dictionary TimeNumberDictionary = new Dictionary { { '零', 0 }, diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Extractors/ChineseDurationExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Extractors/ChineseDurationExtractorConfiguration.cs index 28830913b8..426568b03e 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Extractors/ChineseDurationExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Extractors/ChineseDurationExtractorConfiguration.cs @@ -56,6 +56,8 @@ public ChineseDurationExtractorConfiguration(IDateTimeOptionsConfiguration confi public Dictionary UnitValueMap { get; } + public Dictionary AmbiguityDurationFiltersDict => null; + Regex ICJKDurationExtractorConfiguration.DurationUnitRegex => DurationUnitRegex; Regex ICJKDurationExtractorConfiguration.DurationConnectorRegex => DurationConnectorRegex; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Constants.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Constants.cs index fd7f13f9dc..79e1d4ce57 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Constants.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Constants.cs @@ -53,6 +53,7 @@ public static class Constants // AmPm time representation for time parser public const string Comment_AmPm = "ampm"; + public const string Comment_Am = "am"; // Prefix early/late for time parser public const string Comment_Early = "early"; @@ -267,6 +268,7 @@ public static class Constants public const string TimexFuzzyDay = "XX"; public const string DateTimexConnector = "-"; public const string TimeTimexConnector = ":"; + public const string TimexSeparator = ","; public const string GeneralPeriodPrefix = "P"; public const string TimeTimexPrefix = "T"; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/BaseCJKDurationExtractor.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/BaseCJKDurationExtractor.cs index ef6c0118e8..cb15619251 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/BaseCJKDurationExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/BaseCJKDurationExtractor.cs @@ -2,7 +2,8 @@ // Licensed under the MIT License. using System.Collections.Generic; - +using System.Linq; +using System.Text.RegularExpressions; using Microsoft.Recognizers.Text.Utilities; using DateObject = System.DateTime; @@ -54,9 +55,32 @@ public List Extract(string source, DateObject referenceTime) res = MergeMultipleDuration(source, res); } + res = FilterAmbiguity(res, source); + return res; } + private List FilterAmbiguity(List extractResults, string text) + { + if (this.config.AmbiguityDurationFiltersDict != null) + { + foreach (var regex in this.config.AmbiguityDurationFiltersDict) + { + foreach (var extractResult in extractResults) + { + if (regex.Key.IsMatch(text)) + { + var matches = regex.Value.Matches(text).Cast(); + extractResults = extractResults.Where(er => !matches.Any(m => m.Index < er.Start + er.Length && m.Index + m.Length > er.Start)) + .ToList(); + } + } + } + } + + return extractResults; + } + private List MergeMultipleDuration(string text, List extractorResults) { if (extractorResults.Count <= 1) diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/BaseCJKMergedDateTimeExtractor.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/BaseCJKMergedDateTimeExtractor.cs index aa3748747b..98354748f3 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/BaseCJKMergedDateTimeExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/BaseCJKMergedDateTimeExtractor.cs @@ -129,6 +129,17 @@ private void AddMod(List ers, string text) er.Metadata = AssignModMetadata(er.Metadata); } + match = this.config.UntilRegex.MatchBegin(afterStr, trim: true); + + if (match.Success) + { + var modLength = match.Index + match.Length; + er.Length += modLength; + er.Text = text.Substring(er.Start ?? 0, er.Length ?? 0); + + er.Metadata = AssignModMetadata(er.Metadata); + } + match = this.config.SincePrefixRegex.MatchEnd(beforeStr, trim: true); if (match.Success && AmbiguousRangeChecker(beforeStr, text, er)) diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/ICJKDurationExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/ICJKDurationExtractorConfiguration.cs index d176c140c4..d16a2a1d5f 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/ICJKDurationExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/ICJKDurationExtractorConfiguration.cs @@ -32,5 +32,7 @@ public interface ICJKDurationExtractorConfiguration : IDateTimeOptionsConfigurat Dictionary UnitValueMap { get; } + Dictionary AmbiguityDurationFiltersDict { get; } + } } \ No newline at end of file diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Extractors/JapaneseDurationExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Extractors/JapaneseDurationExtractorConfiguration.cs index 6b600ce73d..382edd096b 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Extractors/JapaneseDurationExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Extractors/JapaneseDurationExtractorConfiguration.cs @@ -6,8 +6,8 @@ using System.Globalization; using System.Linq; using System.Text.RegularExpressions; - using Microsoft.Recognizers.Definitions.Japanese; +using Microsoft.Recognizers.Definitions.Utilities; using Microsoft.Recognizers.Text.NumberWithUnit; using Microsoft.Recognizers.Text.NumberWithUnit.Japanese; @@ -48,6 +48,8 @@ public JapaneseDurationExtractorConfiguration(IDateTimeOptionsConfiguration conf UnitMap = DateTimeDefinitions.ParserConfigurationUnitMap.ToDictionary(k => k.Key, k => k.Value); UnitValueMap = DateTimeDefinitions.DurationUnitValueMap; + AmbiguityDurationFiltersDict = DefinitionLoader.LoadAmbiguityFilters(DateTimeDefinitions.AmbiguityDurationFiltersDict); + } public IExtractor InternalExtractor { get; } @@ -56,6 +58,8 @@ public JapaneseDurationExtractorConfiguration(IDateTimeOptionsConfiguration conf public Dictionary UnitValueMap { get; } + public Dictionary AmbiguityDurationFiltersDict { get; } + Regex ICJKDurationExtractorConfiguration.DurationUnitRegex => DurationUnitRegex; Regex ICJKDurationExtractorConfiguration.DurationConnectorRegex => DurationConnectorRegex; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Korean/Extractors/KoreanDurationExtractorConfiguration.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Korean/Extractors/KoreanDurationExtractorConfiguration.cs index 7dfd383b7b..b4007d88e4 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Korean/Extractors/KoreanDurationExtractorConfiguration.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Korean/Extractors/KoreanDurationExtractorConfiguration.cs @@ -56,6 +56,8 @@ public KoreanDurationExtractorConfiguration(IDateTimeOptionsConfiguration config public Dictionary UnitValueMap { get; } + public Dictionary AmbiguityDurationFiltersDict => null; + Regex ICJKDurationExtractorConfiguration.DurationUnitRegex => DurationUnitRegex; Regex ICJKDurationExtractorConfiguration.DurationConnectorRegex => DurationConnectorRegex; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/BaseCJKDateParser.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/BaseCJKDateParser.cs index 4a20d4d351..5d0461d1ad 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/BaseCJKDateParser.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/BaseCJKDateParser.cs @@ -318,8 +318,9 @@ protected DateTimeResolutionResult ParseImplicitDate(string text, DateObject ref // handle "明日から3週間" (3 weeks from tomorrow) var durationResult = this.config.DurationExtractor.Extract(text, referenceDate); var unitMatch = this.config.DurationRelativeDurationUnitRegex.Match(text); + var isWithin = this.config.DurationRelativeDurationUnitRegex.MatchEnd(text, trim: true).Groups[Constants.WithinGroupName].Success; - if (exactMatch.Success && unitMatch.Success && (durationResult.Count > 0) && + if ((exactMatch.Success || isWithin) && unitMatch.Success && (durationResult.Count > 0) && string.IsNullOrEmpty(unitMatch.Groups["few"].Value)) { var pr = this.config.DurationParser.Parse(durationResult[0], referenceDate); diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/BaseCJKDateTimeParser.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/BaseCJKDateTimeParser.cs index ec11874279..40e97756fd 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/BaseCJKDateTimeParser.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/BaseCJKDateTimeParser.cs @@ -158,6 +158,14 @@ private DateTimeResolutionResult MergeDateAndTime(string text, DateObject refere var pastDate = (DateObject)((DateTimeResolutionResult)pr1.Value).PastValue; var time = (DateObject)((DateTimeResolutionResult)pr2.Value).FutureValue; + // handle cases with time like 25時 which resolve to the next day + var timexHour = TimexUtility.ParseHourFromTimeTimex(pr2.TimexStr); + if (timexHour > Constants.DayHourCount) + { + futureDate = futureDate.AddDays(1); + pastDate = pastDate.AddDays(1); + } + var hour = time.Hour; var min = time.Minute; var sec = time.Second; @@ -178,7 +186,6 @@ private DateTimeResolutionResult MergeDateAndTime(string text, DateObject refere timeStr = timeStr.Substring(0, timeStr.Length - 4); } - timeStr = "T" + hour.ToString("D2", CultureInfo.InvariantCulture) + timeStr.Substring(3); ret.Timex = pr1.TimexStr + timeStr; var val = (DateTimeResolutionResult)pr2.Value; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/BaseCJKDateTimePeriodParser.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/BaseCJKDateTimePeriodParser.cs index 3bd7bcfef4..bdd5683a0f 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/BaseCJKDateTimePeriodParser.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/BaseCJKDateTimePeriodParser.cs @@ -126,15 +126,31 @@ private DateTimeResolutionResult MergeDateAndTimePeriod(string text, DateObject var futureDate = (DateObject)((DateTimeResolutionResult)pr1.Value).FutureValue; var pastDate = (DateObject)((DateTimeResolutionResult)pr1.Value).PastValue; + // handle cases with time like 25時 which resolve to the next day + var swiftDay = 0; + var timexHours = TimexUtility.ParseHoursFromTimePeriodTimex(pr2.TimexStr); + if (timexHours.Item1 > Constants.DayHourCount) + { + pastDate = pastDate.AddDays(1); + futureDate = futureDate.AddDays(1); + } + else if (timexHours.Item2 > Constants.DayHourCount) + { + swiftDay++; + } + + var pastDateAlt = pastDate.AddDays(swiftDay); + var futureDateAlt = futureDate.AddDays(swiftDay); + ret.FutureValue = new Tuple( DateObject.MinValue.SafeCreateFromValue(futureDate.Year, futureDate.Month, futureDate.Day, beginTime.Hour, beginTime.Minute, beginTime.Second), - DateObject.MinValue.SafeCreateFromValue(futureDate.Year, futureDate.Month, futureDate.Day, endTime.Hour, endTime.Minute, endTime.Second)); + DateObject.MinValue.SafeCreateFromValue(futureDateAlt.Year, futureDateAlt.Month, futureDateAlt.Day, endTime.Hour, endTime.Minute, endTime.Second)); ret.PastValue = new Tuple( DateObject.MinValue.SafeCreateFromValue(pastDate.Year, pastDate.Month, pastDate.Day, beginTime.Hour, beginTime.Minute, beginTime.Second), - DateObject.MinValue.SafeCreateFromValue(pastDate.Year, pastDate.Month, pastDate.Day, endTime.Hour, endTime.Minute, endTime.Second)); + DateObject.MinValue.SafeCreateFromValue(pastDateAlt.Year, pastDateAlt.Month, pastDateAlt.Day, endTime.Hour, endTime.Minute, endTime.Second)); ret.Timex = TimexUtility.GenerateSplitDateTimePeriodTimex(pr1.TimexStr, pr2.TimexStr); ret.Success = !string.IsNullOrEmpty(ret.Timex); diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/TimeFunctions.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/TimeFunctions.cs index 254aa04bd2..9229558848 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/TimeFunctions.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/TimeFunctions.cs @@ -79,12 +79,12 @@ public DateTimeResolutionResult PackTimeResult(DateTimeExtra extra, Ti noDesc = false; } - int hour = timeResult.Hour > 0 ? timeResult.Hour % Constants.DayHourCount : 0, + // Hours > 24 (e.g. 25時 which resolves to the next day) are kept unnormalized in the timex + // to avoid ambiguity in other entities. For example, "on the 30th at 25" is resolved to + // "XXXX-XX-30T25" because with "XXXX-XX-30+1T01" it is not known if the day should be "31" or "01". + int hour = timeResult.Hour > 0 && timeResult.Hour != Constants.DayHourCount ? timeResult.Hour : 0, min = timeResult.Minute > 0 ? timeResult.Minute : 0, - second = timeResult.Second > 0 ? timeResult.Second : 0, - day = referenceTime.Day, - month = referenceTime.Month, - year = referenceTime.Year; + second = timeResult.Second > 0 ? timeResult.Second : 0; var dateTimeResult = new DateTimeResolutionResult(); @@ -109,6 +109,18 @@ public DateTimeResolutionResult PackTimeResult(DateTimeExtra extra, Ti build.Append(":" + second.ToString("D2", CultureInfo.InvariantCulture)); } + // handle cases with time like 25時 (the hour is normalized in the past/future values) + if (timeResult.Hour > Constants.DayHourCount) + { + hour = timeResult.Hour - Constants.DayHourCount; + referenceTime = referenceTime.AddDays(1); + if (noDesc) + { + dateTimeResult.Comment = Constants.Comment_Am; + noDesc = false; + } + } + if (noDesc && hour <= Constants.HalfDayHourCount) { // build.Append("ampm"); @@ -117,6 +129,7 @@ public DateTimeResolutionResult PackTimeResult(DateTimeExtra extra, Ti dateTimeResult.Timex = build.ToString(); + int day = referenceTime.Day, month = referenceTime.Month, year = referenceTime.Year; dateTimeResult.FutureValue = dateTimeResult.PastValue = DateObject.MinValue.SafeCreateFromValue(year, month, day, hour, min, second); dateTimeResult.Success = true; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/TimePeriodFunctions.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/TimePeriodFunctions.cs index 00791e96d1..dc04613235 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/TimePeriodFunctions.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/TimePeriodFunctions.cs @@ -91,31 +91,50 @@ public static DateTimeResolutionResult Handle(IDateTimeParser timeParser, DateTi int day = refTime.Day, month = refTime.Month, - year = refTime.Year; + year = refTime.Year, + rightSwiftDay = 0, + leftSwiftDay = 0; // determine if the right side time is smaller than the left side, if yes, add one day int hour = leftResult.Hour > 0 ? leftResult.Hour : 0, min = leftResult.Minute > 0 ? leftResult.Minute : 0, second = leftResult.Second > 0 ? leftResult.Second : 0; + // handle cases with time like 25時 which resolve to the next day + if (hour > Constants.DayHourCount) + { + hour -= Constants.DayHourCount; + leftSwiftDay++; + } + var leftTime = DateObject.MinValue.SafeCreateFromValue(year, month, day, hour, min, second); hour = rightResult.Hour > 0 ? rightResult.Hour : 0; min = rightResult.Minute > 0 ? rightResult.Minute : 0; second = rightResult.Second > 0 ? rightResult.Second : 0; + // handle cases with time like 25時 which resolve to the next day + if (hour > Constants.DayHourCount) + { + hour -= Constants.DayHourCount; + rightSwiftDay++; + } + var rightTime = DateObject.MinValue.SafeCreateFromValue(year, month, day, hour, min, second); - if (rightTime.Hour < leftTime.Hour) + if (rightResult.Hour < leftResult.Hour) { rightTime = rightTime.AddDays(1); } - ret.FutureValue = ret.PastValue = new Tuple(leftTime, rightTime); - var leftTimex = BuildTimex(leftResult); var rightTimex = BuildTimex(rightResult); ret.Timex = $"({leftTimex},{rightTimex},{BuildSpan(leftResult, rightResult)})"; + + leftTime = leftTime.AddDays(leftSwiftDay); + rightTime = rightTime.AddDays(rightSwiftDay); + + ret.FutureValue = ret.PastValue = new Tuple(leftTime, rightTime); return ret; } diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/TimexUtility.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/TimexUtility.cs index dcb85b5d6a..63fa64c37a 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/TimexUtility.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Utilities/TimexUtility.cs @@ -569,6 +569,30 @@ public static float ParseNumberFromDurationTimex(string timex) return float.Parse(numberStr); } + public static int ParseHourFromTimeTimex(string timex) + { + var start = timex.IndexOf(Constants.TimeTimexPrefix) + 1; + var end = timex.IndexOf(Constants.TimeTimexConnector); + end = end > 0 ? end : timex.Length; + var hourStr = timex.Substring(start, end - start); + int.TryParse(hourStr, out int hour); + + return hour; + } + + public static Tuple ParseHoursFromTimePeriodTimex(string timex) + { + int hour1 = 0, hour2 = 0; + var timeList = timex.Split(Constants.TimexSeparator[0]); + if (timeList.Length > 2) + { + hour1 = ParseHourFromTimeTimex(timeList[0]); + hour2 = ParseHourFromTimeTimex(timeList[1]); + } + + return new Tuple(hour1, hour2); + } + private static bool IsTimeDurationTimex(string timex) { return timex.StartsWith($"{Constants.GeneralPeriodPrefix}{Constants.TimeTimexPrefix}", StringComparison.Ordinal); diff --git a/Patterns/Japanese/Japanese-DateTime.yaml b/Patterns/Japanese/Japanese-DateTime.yaml index 39d135cd05..9d77d1d8c4 100644 --- a/Patterns/Japanese/Japanese-DateTime.yaml +++ b/Patterns/Japanese/Japanese-DateTime.yaml @@ -9,11 +9,14 @@ MonthRegexForPeriod: !simpleRegex MonthNumRegexForPeriod: !simpleRegex def: (?0?[1-9]|1[0-2])(?=\b|t|まで|から)? DayRegex: !simpleRegex - def: (?[0-2]?[1-9]|[1-3]0|31)([日目]間?)? + def: (?[0-2]?[1-9]|[1-3]0|31)((日|目)(?!かかる|待つ|泊まる|経つ)間?)? DayRegexForPeriod: !simpleRegex - def: (?3[01]|[0-2]?\d|(三十一?|(一|二)?十?[一二三四五六七八九]))(\s*日目?)?(?=\b|t|まで|から)? -DateDayRegexInCJK: !simpleRegex - def: (?初一|((二十二|二十三|二十四|二十五|二十六|二十七|二十八|二十九|二十一|三十一|十二|十三|十四|十五|十六|十七|十八|十一|十|二十|廿(?!日市市)|三十|一|二|三|四|五|六|七|八|九)|3[01]|[0-2]?\d)(\s*日|号))目? + def: (?3[01]|[0-2]?\d|(三十一?|(一|二)?十?[一二三四五六七八九]))((\s*日(?!かかる|待つ|泊まる|経つ))目?)?(?=\b|t|まで|から)? +DayNumberRegex: !simpleRegex + def: (二十二|二十三|二十四|二十五|二十六|二十七|二十八|二十九|二十一|三十一|十二|十三|十四|十五|十六|十七|十八|十一|十|二十|廿(?!日市市)|三十|一|二|三|四|五|六|七|八|九) +DateDayRegexInCJK: !nestedRegex + def: (?初一|({DayNumberRegex}|3[01]|[0-2]?\d)(\s*日|号)(?!かかる|待つ|泊まる|経つ))目? + references: [DayNumberRegex] DayRegexNumInCJK: !simpleRegex def: (?一|十一|二十一|三十一|三十|二|三|四|五|六|七|八|九|十二|十三|十四|十五|十六|十七|十八|十九|二十二|二十三|二十四|二十五|二十六|二十七|二十八|二十九|一|十一|十|二十一|二十|廿(?!日市市)|三十一|二|三|四|五|六|七|八|九|十二|十三|十四|十五|十六|十七|十八|十九|二十二|二十三|二十四|二十五|二十六|二十七|二十八|二十九|十|二十|廿|卅) MonthNumRegex: !simpleRegex @@ -69,7 +72,7 @@ SpecialYearRegex: !simpleRegex SpecialDayRegex: !simpleRegex def: ((いっ)?さくじつ|おとつい|最近|前天|后天|明日から二日((?今日)から(?1日半)(の間)?)|((?今日)から(?2日半)(の間)?)|昨日の2日前|昨日から4日|今日から二日|今日から4日|昨日から2日間|昨天|明天|今天|今日|明日|一?昨?昨日|一昨日|大后天|大前天|後天|大後天|きょう|あす|あした|きのう|明々後日|(弥)?明後日|この日|前日|二日前|おととい|し?あさって|私の一日|この間|次の日|その日|最後の日) SpecialDayWithNumRegex: !simpleRegex - def: ((いっ)?さくじつ|おとつい|最近|前天|后天|昨天|明天|今天|今日?|明日|一?昨?昨日|一昨日|大后天|大前天|後天|大後天|きょう|あす|あした|きのう|明々後日|(弥)?明後日|この日|前日|二日前|おととい|し?あさって|私の一日|この間|次の日|その日)(から|の)?([\d十一二三四五六七八九]*|数)(日|月|週|個)間?(先|後|前|以内)?(の(?日曜日?|月曜日?|火曜日?|水曜日?|木曜日?|金曜日?|土曜日?))? + def: ((いっ)?さくじつ|おとつい|最近|前天|后天|昨天|明天|今天|今日?|明日|一?昨?昨日|一昨日|大后天|大前天|後天|大後天|きょう|あす|あした|きのう|明々後日|(弥)?明後日|この日|前日|二日前|おととい|し?あさって|私の一日|この間|次の日|その日)(から|の)?([\d十一二三四五六七八九]*|数)(日|月|週(間で)?|個)間?(先|後|前)?(の(?日曜日?|月曜日?|火曜日?|水曜日?|木曜日?|金曜日?|土曜日?))? WeekDayOfMonthRegex: !nestedRegex def: ((({SpecialMonthRegex}|{MonthRegex}|{MonthNumRegex}|((这个|这一个|这|这一|本|今|上个|上一个|上|上一|去|下个|下一个|下|下一|明)月))(的|の)?\s*)?(第|最)?(?([初一二三四五])|最後|最終|([1-5])|最后一)(个|の|\s)*{WeekDayRegex}) references: [SpecialMonthRegex, MonthRegex, MonthNumRegex, WeekDayRegex] @@ -89,11 +92,11 @@ SpecialDate: !nestedRegex def: (?({ThisPrefixRegex}|{LastPrefixRegex}|{NextPrefixRegex})年)?(の|的)?(?({ThisPrefixRegex}|{LastPrefixRegex}|{NextPrefixRegex})(の|的)?月)?(の|的)?{DateDayRegexInCJK} references: [ThisPrefixRegex, LastPrefixRegex, NextPrefixRegex, DateDayRegexInCJK] DateUnitRegex: !simpleRegex - def: (?年|个月|月|周|時間?|(?営業)日|(?年|个月|月|周|(?営業)日|(?再来)|下个|下一个|下|下一|最初|来|向こう|これから(の)?|翌|今後|次(の)? + def: (?再来|以降)|下个|下一个|下|下一|最初|来|向こう|これから(の)?|翌|今後|次(の)? DateRangePrepositions: !simpleRegex def: ((こ|私の|その|この|これらの|それらの)\s*)? RelativeMonthRegex: !nestedRegex @@ -251,7 +254,7 @@ FirstLastOfYearRegex: !nestedRegex def: (({DatePeriodYearInCJKRegex}|{YearRegex}|(?再来年|翌年|来年|今年|去年))的?)((?前)|(?(最后|最後|最終))) references: [YearRegex,DatePeriodYearInCJKRegex] ComplexDatePeriodRegex: !nestedRegex - def: ({DateRangePrepositions})(?(第{ZeroToNineIntegerRegexCJK}+|第\d+|{DatePeriodThisRegex}|{DatePeriodLastRegex}|{DatePeriodNextRegex}|(({YearNumRegex}|{DayRegexForPeriod}|{DateDayRegexInCJK}|{MonthRegex}|{MonthNumRegex})(?!((\d+)?(分|秒|時))))|{RelativeRegex}).+)(から)(?.+)(までの間|まで|の間) + def: ({DateRangePrepositions})(?(第{ZeroToNineIntegerRegexCJK}+|第\d+|{DatePeriodThisRegex}|{DatePeriodLastRegex}|{DatePeriodNextRegex}|(({YearNumRegex}|{DayRegexForPeriod}|{DateDayRegexInCJK}|{MonthRegex}|{MonthNumRegex})(?!((\d+)?(分|秒|時))))|{RelativeRegex}).+)(から)(?.+)(までの間|(?(この|時前|(?昨日の?(午前|午後|中|夜|朝)?) TodayRegex: !simpleRegex def: (?(今朝の?|今朝の午前|今晩|今晚|今早|今晨|明晚|明早|明晨|昨晚|今夜|昨夜)(的|在)?) +FromNowRegex: !simpleRegex + def: ((?今)から) SpecialDayHourRegex: !nestedRegex def: ((?{TimeHourCJKRegex}|{TimeHourNumRegex})(時間?|(:00))) references: [TimeHourCJKRegex, TimeHourNumRegex] @@ -305,13 +310,13 @@ SpecialDaySecondRegex: !nestedRegex def: ((?{TimeSecondCJKRegex}|{TimeSecondNumRegex})秒間?) references: [TimeSecondCJKRegex, TimeSecondNumRegex] SpecialDayModRegex: !simpleRegex - def: ((?過ぎに)|(?ごろ)|(?で|の?うちに)|(?弱|たらず)|(?以上)) + def: ((?過ぎに|以降)|(?ごろ)|(?で|の?うちに)|(?弱|たらず)|(?以上)) SpecialDayEndOfRegex: !nestedRegex def: ((?明日の終わり|({WeekDayRegex}の?終わり))|(?日の終わり|一日の終わり|その日の終わり)) references: [WeekDayRegex] TimeOfSpecialDayRegex: !nestedRegex - def: (({SpecialDayEndOfRegex}|{WeekDayRegex}|{TomorrowRegex}|{YesterdayRegex}|あと|{TodayRegex})(\d日)?(と)?(({SpecialDayHourRegex}{SpecialDayMinuteRegex}?{SpecialDaySecondRegex}?)|({SpecialDayMinuteRegex}{SpecialDaySecondRegex}?)){SpecialDayModRegex}?)|(({SpecialDayHourRegex}(?で|の?うちに)))|(({SpecialDayEndOfRegex}|{TomorrowRegex}|{YesterdayRegex}|あと|{TodayRegex}){SpecialDayModRegex}?)|({WeekDayRegex}(\d日)?(と)?{SpecialDayModRegex}) - references: [ TomorrowRegex, YesterdayRegex, TodayRegex, WeekDayRegex, SpecialDayEndOfRegex, SpecialDayHourRegex, SpecialDayMinuteRegex, SpecialDaySecondRegex, SpecialDayModRegex ] + def: (({SpecialDayEndOfRegex}|{WeekDayRegex}|{TomorrowRegex}|{YesterdayRegex}|あと|{TodayRegex})(\d日)?(と)?(({SpecialDayHourRegex}{SpecialDayMinuteRegex}?{SpecialDaySecondRegex}?)|({SpecialDayMinuteRegex}{SpecialDaySecondRegex}?)){SpecialDayModRegex}?)|(({SpecialDayHourRegex}(?で|の?うちに)))|(({SpecialDayEndOfRegex}|{TomorrowRegex}|{YesterdayRegex}|あと|{TodayRegex}){SpecialDayModRegex}?)|({WeekDayRegex}(\d日)?(と)?{SpecialDayModRegex})|({FromNowRegex}\d+(分|時|秒)後) + references: [ TomorrowRegex, YesterdayRegex, TodayRegex, WeekDayRegex, SpecialDayEndOfRegex, SpecialDayHourRegex, SpecialDayMinuteRegex, SpecialDaySecondRegex, SpecialDayModRegex, FromNowRegex ] NowTimeRegex: !simpleRegex def: (现在|今) RecentlyTimeRegex: !simpleRegex @@ -372,7 +377,7 @@ DurationHalfRegex: !simpleRegex # TODO: modify below regex according to the counterpart in Korean def: ^[.] DurationRelativeDurationUnitRegex: !simpleRegex - def: (?数ヶ|数)|(?前|昨日)|(?後|明日)|(?もう) + def: (?数ヶ|数)|(?(?以内)|(?後|明日)|(?(?(?(?{DateUnitRegex}|分|秒|時間|まる(ひと)?|もう|数|以上|たらず) - references: [DateUnitRegex] + def: (?年|个月|月|周|時間?|(?営業)日|天|週間?|星期|个星期|か月|(?[と]?|,)\s*$ ConnectorRegex: !simpleRegex @@ -446,7 +451,7 @@ SetEachDateUnitRegex: !simpleRegex def: (毎)(年|月|週)\s*$ #TimeExtractorCJK TimeHourNumRegex: !simpleRegex - def: ([0-1]?\d|2[0-4]) + def: (?{TimeHourNumRegex}):(?{TimeMinuteNumRegex})(:(?{TimeSecondNumRegex}))?(am|pm)? - references: [TimeHourNumRegex, TimeMinuteNumRegex, TimeSecondNumRegex] + def: (?{TimeHourNumRegex}):(?{TimeMinuteNumRegex})(:(?{TimeSecondNumRegex}))?({AmPmDescRegex})? + references: [TimeHourNumRegex, TimeMinuteNumRegex, TimeSecondNumRegex, AmPmDescRegex] TimeDayDescRegex: !nestedRegex def: (?(正午|夜中|午前半ば|(昼食時)|真昼)|((?<=({TimeDigitTimeRegex}|{TimeCJKTimeRegex})(の)?)(早朝(に)?|午後(に)?|晚|晩|(深)?夜(に)?|未明|午前(中)?|日中|白昼|(未|早)?朝(に)?|昼前に|昼すぎに|夕方前に|夕方に|営業時間内に|昼))|((早朝(に)?|午後(に)?|晚|晩|(深)?夜(に)?|未明|午前(中)?|日中|白昼|(未|早)?朝(に)?|昼前に|昼すぎに|夕方前に|夕方に|営業時間内に|昼)(?=(の)?({TimeDigitTimeRegex}|{TimeCJKTimeRegex})))) references: [TimeDigitTimeRegex, TimeCJKTimeRegex] @@ -541,11 +546,11 @@ AmbiguousRangeModifierPrefix: !simpleRegex ReferenceDatePeriodRegex: !simpleRegex def: (同じ|その)(?月|週末|年|週) ParserConfigurationBefore: !simpleRegex - def: (之前|以前|前) + def: (またはその前|またはそれ以前|之前|以前|前) ParserConfigurationAfter: !simpleRegex - def: (之后|之後|以后|以後|后|後) + def: (またはそれ以降|之后|之後|以后|以後|后|後|以降) ParserConfigurationUntil: !simpleRegex - def: (直到|直至|截至|截止(到)?) + def: (まで|直到|直至|截至|截止(到)?) ParserConfigurationSincePrefix: !simpleRegex def: (自从|自|自打|打) ParserConfigurationSinceSuffix: !simpleRegex @@ -955,13 +960,17 @@ DateTimePeriodNIRegex: !simpleRegex def: (半夜|夜间|深夜|夜) AmbiguityFiltersDict: !dictionary types: [ string, string ] - # TODO: populate dictionary according to the counterpart in Chinese entries: '早': '(?