Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[.NET] Integration of new mode termed "TasksMode" in DatetimeOption.cs #2972

Merged
merged 15 commits into from
Jun 7, 2022
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -257,5 +257,13 @@ public void DateTimeModelExperimentalMode(TestModel testSpec)
{
TestDateTimeAlt(testSpec);
}

[NetCoreTestDataSource]
[TestMethod]
public void DateTimeModelTasksMode(TestModel testSpec)
{
TestDateTimeAlt(testSpec);
}

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ public enum Models
DateTimeExtendedTypes,
DateTimeComplexCalendar,
DateTimeExperimentalMode,
DateTimeTasksMode,
PhoneNumber,
IpAddress,
Mention,
Expand Down Expand Up @@ -121,6 +122,7 @@ public static class TestContextExtensions
{ Models.DateTimeExtendedTypes, (test, culture) => DateTimeRecognizer.RecognizeDateTime(test.Input, culture, DateTimeOptions.ExtendedTypes, refTime: test.GetReferenceDateTime(), fallbackToDefaultCulture: false) },
{ Models.DateTimeComplexCalendar, (test, culture) => DateTimeRecognizer.RecognizeDateTime(test.Input, culture, DateTimeOptions.ExtendedTypes | DateTimeOptions.CalendarMode | DateTimeOptions.EnablePreview, refTime: test.GetReferenceDateTime(), fallbackToDefaultCulture: false) },
{ Models.DateTimeExperimentalMode, (test, culture) => DateTimeRecognizer.RecognizeDateTime(test.Input, culture, DateTimeOptions.ExperimentalMode, refTime: test.GetReferenceDateTime(), fallbackToDefaultCulture: false) },
{ Models.DateTimeTasksMode, (test, culture) => DateTimeRecognizer.RecognizeDateTime(test.Input, culture, options: DateTimeOptions.TasksMode, refTime: test.GetReferenceDateTime(), fallbackToDefaultCulture: false) },
{ Models.PhoneNumber, (test, culture) => SequenceRecognizer.RecognizePhoneNumber(test.Input, culture, fallbackToDefaultCulture: false) },
{ Models.IpAddress, (test, culture) => SequenceRecognizer.RecognizeIpAddress(test.Input, culture, fallbackToDefaultCulture: false) },
{ Models.Mention, (test, culture) => SequenceRecognizer.RecognizeMention(test.Input, culture, fallbackToDefaultCulture: false) },
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,9 @@ public class ArabicMergedExtractorConfiguration : BaseDateTimeOptionsConfigurati
public static readonly Regex FailFastRegex =
new Regex(DateTimeDefinitions.FailFastRegex, RegexFlags | RegexOptions.Compiled);

public static readonly Regex YearRegex =
new Regex(DateTimeDefinitions.YearRegex, RegexFlags);

public static readonly Regex[] TermFilterRegexes =
{
// one on one
Expand Down Expand Up @@ -162,6 +165,8 @@ public ArabicMergedExtractorConfiguration(IDateTimeOptionsConfiguration config)

Regex IMergedExtractorConfiguration.UnspecificTimePeriodRegex => UnspecificTimePeriodRegex;

Regex IMergedExtractorConfiguration.YearRegex => YearRegex;

Regex IMergedExtractorConfiguration.FailFastRegex => FailFastRegex;

IEnumerable<Regex> IMergedExtractorConfiguration.TermFilterRegexes => TermFilterRegexes;
Expand Down
5 changes: 5 additions & 0 deletions .NET/Microsoft.Recognizers.Text.DateTime/DateTimeOptions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,11 @@ public enum DateTimeOptions
/// </summary>
NoProtoCache = 16,

/// <summary>
/// NoProtoCache
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please update comments

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd also personally prefer to use 2^20 as the value.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any specific reason for using 2^20 value.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's the highest available value.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The file doesn't seem updated here.

/// </summary>
TasksMode = 32,

/// <summary>
/// FailFast, mode that aborts extraction/tagging quickly for non-entity cases. May be removed later.
/// </summary>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,9 @@ public class DutchMergedExtractorConfiguration : BaseDateTimeOptionsConfiguratio
public static readonly Regex PotentialAmbiguousRangeRegex =
new Regex(DateTimeDefinitions.PotentialAmbiguousRangeRegex, RegexFlags);

public static readonly Regex YearRegex =
new Regex(DateTimeDefinitions.YearRegex, RegexFlags);

public static readonly Regex[] TermFilterRegexes =
{
// one on one
Expand Down Expand Up @@ -152,6 +155,8 @@ public DutchMergedExtractorConfiguration(IDateTimeOptionsConfiguration config)

Regex IMergedExtractorConfiguration.UnspecificTimePeriodRegex => null;

Regex IMergedExtractorConfiguration.YearRegex => YearRegex;

public Regex FailFastRegex { get; } = null;

IEnumerable<Regex> IMergedExtractorConfiguration.TermFilterRegexes => TermFilterRegexes;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,10 @@ public class EnglishMergedExtractorConfiguration : BaseDateTimeOptionsConfigurat
public static readonly Regex FailFastRegex =
new Regex(DateTimeDefinitions.FailFastRegex, RegexFlags | RegexOptions.Compiled);

// Used to skip only year references in a text in TasksMode
public static readonly Regex YearRegex =
new Regex(DateTimeDefinitions.YearRegex, RegexFlags);

public static readonly Regex[] TermFilterRegexes =
{
// one on one
Expand Down Expand Up @@ -164,6 +168,8 @@ public EnglishMergedExtractorConfiguration(IDateTimeOptionsConfiguration config)

Regex IMergedExtractorConfiguration.UnspecificTimePeriodRegex => UnspecificTimePeriodRegex;

Regex IMergedExtractorConfiguration.YearRegex => YearRegex;

Regex IMergedExtractorConfiguration.FailFastRegex => FailFastRegex;

IEnumerable<Regex> IMergedExtractorConfiguration.TermFilterRegexes => TermFilterRegexes;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,14 @@ private void AddTo(List<ExtractResult> dst, List<ExtractResult> src, string text
}
}

if ((config.Options & DateTimeOptions.TasksMode) != 0)
{
if (ShouldSkipOnlyYear(result))
{
continue;
}
}

var isFound = false;
var overlapIndexes = new List<int>();
var firstIndex = -1;
Expand Down Expand Up @@ -270,6 +278,15 @@ private bool ShouldSkipFromToMerge(ExtractResult er)
return config.FromToRegex.IsMatch(er.Text);
}

/*Under TasksMode: Should not treat a four-digit number as a daterange if the input text does not include a month or year reference.
It should not treat 2005 as a daterange in statements like "Milk 2005."
(The year 2005 should be treated as a number only.)
*/
private bool ShouldSkipOnlyYear(ExtractResult er)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No need to do this in this PR, but it would be interesting if this mode-specific behaviours/code is isolated into it's own module/strategy.
Please move it along with new changes in a followup PR.

{
return config.YearRegex.Match(er.Text).Value == er.Text;
tellarin marked this conversation as resolved.
Show resolved Hide resolved
}

private List<ExtractResult> FilterUnspecificDatePeriod(List<ExtractResult> ers)
{
ers.RemoveAll(o => this.config.UnspecificDatePeriodRegex.IsMatch(o.Text));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ public interface IMergedExtractorConfiguration : IDateTimeOptionsConfiguration
// Regex to act as umbrella for key terms so that sentences that clearly don't have entities can be rejected quickly
Regex FailFastRegex { get; }

Regex YearRegex { get; }

StringMatcher SuperfluousWordMatcher { get; }

Dictionary<Regex, Regex> AmbiguityFiltersDict { get; }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,9 @@ public class FrenchMergedExtractorConfiguration : BaseDateTimeOptionsConfigurati

public static readonly StringMatcher SuperfluousWordMatcher = new StringMatcher();

public static readonly Regex YearRegex =
new Regex(DateTimeDefinitions.YearRegex, RegexFlags);

private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture;

public FrenchMergedExtractorConfiguration(IDateTimeOptionsConfiguration config)
Expand Down Expand Up @@ -141,6 +144,8 @@ public FrenchMergedExtractorConfiguration(IDateTimeOptionsConfiguration config)

Regex IMergedExtractorConfiguration.UnspecificTimePeriodRegex => null;

Regex IMergedExtractorConfiguration.YearRegex => YearRegex;

public Regex FailFastRegex { get; } = null;

IEnumerable<Regex> IMergedExtractorConfiguration.TermFilterRegexes => TermFilterRegexes;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@ public class GermanMergedExtractorConfiguration : BaseDateTimeOptionsConfigurati
public static readonly Regex UnspecificDatePeriodRegex =
new Regex(DateTimeDefinitions.UnspecificDatePeriodRegex, RegexFlags);

public static readonly Regex YearRegex =
new Regex(DateTimeDefinitions.YearRegex, RegexFlags);

public static readonly StringMatcher SuperfluousWordMatcher = new StringMatcher();

public static readonly Regex[] TermFilterRegexes =
Expand Down Expand Up @@ -142,6 +145,8 @@ public GermanMergedExtractorConfiguration(IDateTimeOptionsConfiguration config)

Regex IMergedExtractorConfiguration.UnspecificTimePeriodRegex => null;

Regex IMergedExtractorConfiguration.YearRegex => YearRegex;

public Regex FailFastRegex { get; } = null;

IEnumerable<Regex> IMergedExtractorConfiguration.TermFilterRegexes => TermFilterRegexes;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@ public class HindiMergedExtractorConfiguration : BaseDateTimeOptionsConfiguratio
public static readonly Regex FailFastRegex =
new Regex(DateTimeDefinitions.FailFastRegex, RegexFlags | RegexOptions.Compiled);

public static readonly Regex YearRegex =
new Regex(DateTimeDefinitions.YearRegex, RegexFlags);

public static readonly Regex[] TermFilterRegexes =
{
// one on one
Expand Down Expand Up @@ -141,6 +144,8 @@ public HindiMergedExtractorConfiguration(IDateTimeOptionsConfiguration config)

Regex IMergedExtractorConfiguration.UnspecificTimePeriodRegex => null;

Regex IMergedExtractorConfiguration.YearRegex => YearRegex;

Regex IMergedExtractorConfiguration.FailFastRegex => FailFastRegex;

IEnumerable<Regex> IMergedExtractorConfiguration.TermFilterRegexes => TermFilterRegexes;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@ public class ItalianMergedExtractorConfiguration : BaseDateTimeOptionsConfigurat

public static readonly StringMatcher SuperfluousWordMatcher = new StringMatcher();

public static readonly Regex YearRegex =
new Regex(DateTimeDefinitions.YearRegex, RegexFlags);

private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture;

public ItalianMergedExtractorConfiguration(IDateTimeOptionsConfiguration config)
Expand Down Expand Up @@ -131,6 +134,8 @@ public ItalianMergedExtractorConfiguration(IDateTimeOptionsConfiguration config)

Regex IMergedExtractorConfiguration.UnspecificTimePeriodRegex => null;

Regex IMergedExtractorConfiguration.YearRegex => YearRegex;

public Regex FailFastRegex { get; } = null;

IEnumerable<Regex> IMergedExtractorConfiguration.TermFilterRegexes => TermFilterRegexes;
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,9 @@ public class PortugueseMergedExtractorConfiguration : BaseDateTimeOptionsConfigu

public static readonly StringMatcher SuperfluousWordMatcher = new StringMatcher();

public static readonly Regex YearRegex =
new Regex(DateTimeDefinitions.YearRegex, RegexFlags);

public static readonly Regex[] TermFilterRegexes = System.Array.Empty<Regex>();

private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture;
Expand Down Expand Up @@ -139,6 +142,8 @@ public PortugueseMergedExtractorConfiguration(IDateTimeOptionsConfiguration conf

Regex IMergedExtractorConfiguration.UnspecificTimePeriodRegex => null;

Regex IMergedExtractorConfiguration.YearRegex => YearRegex;

public Regex FailFastRegex { get; } = null;

IEnumerable<Regex> IMergedExtractorConfiguration.TermFilterRegexes => TermFilterRegexes;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ public class SpanishMergedExtractorConfiguration : BaseDateTimeOptionsConfigurat

public static readonly StringMatcher SuperfluousWordMatcher = new StringMatcher();

public static readonly Regex YearRegex = new Regex(DateTimeDefinitions.YearRegex, RegexFlags);

private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture;

public SpanishMergedExtractorConfiguration(IDateTimeOptionsConfiguration config)
Expand Down Expand Up @@ -146,6 +148,8 @@ public SpanishMergedExtractorConfiguration(IDateTimeOptionsConfiguration config)

Regex IMergedExtractorConfiguration.UnspecificTimePeriodRegex => null;

Regex IMergedExtractorConfiguration.YearRegex => YearRegex;

public Regex FailFastRegex { get; } = null;

IEnumerable<Regex> IMergedExtractorConfiguration.TermFilterRegexes => TermFilterRegexes;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,9 @@ public class SwedishMergedExtractorConfiguration : BaseDateTimeOptionsConfigurat
public static readonly Regex FailFastRegex =
new Regex(DateTimeDefinitions.FailFastRegex, RegexFlags | RegexOptions.Compiled);

public static readonly Regex YearRegex =
new Regex(DateTimeDefinitions.YearRegex, RegexFlags);

public static readonly Regex[] TermFilterRegexes =
{
// one on one
Expand Down Expand Up @@ -164,6 +167,8 @@ public SwedishMergedExtractorConfiguration(IDateTimeOptionsConfiguration config)

Regex IMergedExtractorConfiguration.UnspecificTimePeriodRegex => UnspecificTimePeriodRegex;

Regex IMergedExtractorConfiguration.YearRegex => YearRegex;

Regex IMergedExtractorConfiguration.FailFastRegex => FailFastRegex;

IEnumerable<Regex> IMergedExtractorConfiguration.TermFilterRegexes => TermFilterRegexes;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@ public class TurkishMergedExtractorConfiguration : BaseDateTimeOptionsConfigurat
public static readonly Regex FailFastRegex =
new Regex(DateTimeDefinitions.FailFastRegex, RegexFlags | RegexOptions.Compiled);

public static readonly Regex YearRegex =
new Regex(DateTimeDefinitions.YearRegex, RegexFlags);

public static readonly Regex[] TermFilterRegexes =
{
// one on one
Expand Down Expand Up @@ -141,6 +144,8 @@ public TurkishMergedExtractorConfiguration(IDateTimeOptionsConfiguration config)

Regex IMergedExtractorConfiguration.UnspecificTimePeriodRegex => null;

Regex IMergedExtractorConfiguration.YearRegex => YearRegex;

Regex IMergedExtractorConfiguration.FailFastRegex => FailFastRegex;

IEnumerable<Regex> IMergedExtractorConfiguration.TermFilterRegexes => TermFilterRegexes;
Expand Down
Loading