Skip to content
This repository has been archived by the owner on Dec 4, 2023. It is now read-only.

[SDK][Bot-Dialogs] Update Recognizers-Text internal library #1021

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ private static List<ModelResult<FoundChoice>> recognizeNumbers(String utterance,
return result.stream().map(r ->
new ModelResult<FoundChoice>() {{
setStart(r.start);
setEnd(r.end - 1); // bug in 1.0-SNAPSHOT, should not have to decrement
setEnd(r.end);
setText(r.text);
setResolution(new FoundChoice() {{
setValue(r.resolution.get("value").toString());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,11 @@ public class ChineseChoice {

public static final String TokenizerRegex = "[^\\u3040-\\u30ff\\u3400-\\u4dbf\\u4e00-\\u9fff\\uf900-\\ufaff\\uff66-\\uff9f]";

public static final String TrueRegex = "(好[的啊呀嘞哇]|没问题|可以|中|好|同意|行|是的|是|对)|(\\uD83D\\uDC4D|\\uD83D\\uDC4C)";
public static final String SkinToneRegex = "(\\uD83C\\uDFFB|\\uD83C\\uDFFC|\\uD83C\\uDFFD|\\uD83C\\uDFFE|\\uD83C\\uDFFF)";

public static final String FalseRegex = "(不行|不好|拒绝|否定|不中|不可以|不是的|不是|不对|不)|(\\uD83D\\uDC4E|\\u270B|\\uD83D\\uDD90)";
public static final String TrueRegex = "(好[的啊呀嘞哇]|没问题|可以|中|好|同意|行|是的|是|对)|(\\uD83D\\uDC4D|\\uD83D\\uDC4C){SkinToneRegex}?"
.replace("{SkinToneRegex}", SkinToneRegex);

public static final String FalseRegex = "(不行|不好|拒绝|否定|不中|不可以|不是的|不是|不对|不)|(\\uD83D\\uDC4E|\\u270B|\\uD83D\\uDD90){SkinToneRegex}?"
.replace("{SkinToneRegex}", SkinToneRegex);
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,11 @@ public class EnglishChoice {

public static final String TokenizerRegex = "[^\\w\\d]";

public static final String TrueRegex = "\\b(true|yes|yep|yup|yeah|y|sure|ok|agree)\\b|(\\uD83D\\uDC4D|\\uD83D\\uDC4C|\\u0001f44c)";
public static final String SkinToneRegex = "(\\uD83C\\uDFFB|\\uD83C\\uDFFC|\\uD83C\\uDFFD|\\uD83C\\uDFFE|\\uD83C\\uDFFF)";

public static final String FalseRegex = "\\b(false|nope|nop|no|not\\s+ok|disagree)\\b|(\\uD83D\\uDC4E|\\u270B|\\uD83D\\uDD90|\\u0001F44E|\\u0001F590)";
public static final String TrueRegex = "\\b(true|yes|yep|yup|yeah|y|sure|ok|agree)\\b|(\\uD83D\\uDC4D|\\uD83D\\uDC4C|\\u0001f44c){SkinToneRegex}?"
.replace("{SkinToneRegex}", SkinToneRegex);

public static final String FalseRegex = "\\b(false|nope|nop|no|not\\s+ok|disagree)\\b|(\\uD83D\\uDC4E|\\u270B|\\uD83D\\uDD90|\\u0001F44E|\\u0001F590){SkinToneRegex}?"
.replace("{SkinToneRegex}", SkinToneRegex);
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,11 @@ public class FrenchChoice {

public static final String TokenizerRegex = "[^\\w\\d\\u00E0-\\u00FC]";

public static final String TrueRegex = "\\b(s[uû]r|ouais|oui|yep|y|sure|approuver|accepter|consentir|d'accord|ça march[eé])\\b|(\\uD83D\\uDC4D|\\uD83D\\uDC4C)";
public static final String SkinToneRegex = "(\\uD83C\\uDFFB|\\uD83C\\uDFFC|\\uD83C\\uDFFD|\\uD83C\\uDFFE|\\uD83C\\uDFFF)";

public static final String FalseRegex = "\\b(faux|nan|non|pas\\s+d'accord|pas\\s+concorder|n'est\\s+pas\\s+(correct|ok)|pas)\\b|(\\uD83D\\uDC4E|\\u270B|\\uD83D\\uDD90)";
public static final String TrueRegex = "\\b(s[uû]r|ouais|oui|yep|y|sure|approuver|accepter|consentir|d'accord|ça march[eé])\\b|(\\uD83D\\uDC4D|\\uD83D\\uDC4C){SkinToneRegex}?"
.replace("{SkinToneRegex}", SkinToneRegex);

public static final String FalseRegex = "\\b(faux|nan|non|pas\\s+d'accord|pas\\s+concorder|n'est\\s+pas\\s+(correct|ok)|pas)\\b|(\\uD83D\\uDC4E|\\u270B|\\uD83D\\uDD90){SkinToneRegex}?"
.replace("{SkinToneRegex}", SkinToneRegex);
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,11 @@ public class PortugueseChoice {

public static final String TokenizerRegex = "[^\\w\\d\\u00E0-\\u00FC]";

public static final String TrueRegex = "\\b(verdade|verdadeir[oa]|sim|isso|claro|ok)\\b|(\\uD83D\\uDC4D|\\uD83D\\uDC4C)";
public static final String SkinToneRegex = "(\\uD83C\\uDFFB|\\uD83C\\uDFFC|\\uD83C\\uDFFD|\\uD83C\\uDFFE|\\uD83C\\uDFFF)";

public static final String FalseRegex = "\\b(falso|n[aã]o|incorreto|nada disso)\\b|(\\uD83D\\uDC4E|\\u270B|\\uD83D\\uDD90)";
public static final String TrueRegex = "\\b(verdade|verdadeir[oa]|sim|isso|claro|ok)\\b|(\\uD83D\\uDC4D|\\uD83D\\uDC4C){SkinToneRegex}?"
.replace("{SkinToneRegex}", SkinToneRegex);

public static final String FalseRegex = "\\b(falso|n[aã]o|incorreto|nada disso)\\b|(\\uD83D\\uDC4E|\\u270B|\\uD83D\\uDD90){SkinToneRegex}?"
.replace("{SkinToneRegex}", SkinToneRegex);
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,11 @@ public class SpanishChoice {

public static final String TokenizerRegex = "[^\\w\\d\\u00E0-\\u00FC]";

public static final String TrueRegex = "\\b(verdad|verdadero|sí|sip|s|si|cierto|por supuesto|ok)\\b|(\\uD83D\\uDC4D|\\uD83D\\uDC4C)";
public static final String SkinToneRegex = "(\\uD83C\\uDFFB|\\uD83C\\uDFFC|\\uD83C\\uDFFD|\\uD83C\\uDFFE|\\uD83C\\uDFFF)";

public static final String FalseRegex = "\\b(falso|no|nop|n|no)\\b|(\\uD83D\\uDC4E|\\u270B|\\uD83D\\uDD90)";
public static final String TrueRegex = "\\b(verdad|verdadero|sí|sip|s|si|cierto|por supuesto|ok)\\b|(\\uD83D\\uDC4D|\\uD83D\\uDC4C){SkinToneRegex}?"
.replace("{SkinToneRegex}", SkinToneRegex);

public static final String FalseRegex = "\\b(falso|no|nop|n|no)\\b|(\\uD83D\\uDC4E|\\u270B|\\uD83D\\uDD90){SkinToneRegex}?"
.replace("{SkinToneRegex}", SkinToneRegex);
}
Original file line number Diff line number Diff line change
Expand Up @@ -1231,8 +1231,11 @@ public class EnglishDateTime {

public static final ImmutableMap<String, Integer> DayOfMonth = ImmutableMap.<String, Integer>builder()
.put("1st", 1)
.put("1th", 1)
.put("2nd", 2)
.put("2th", 2)
.put("3rd", 3)
.put("3th", 3)
.put("4th", 4)
.put("5th", 5)
.put("6th", 6)
Expand Down Expand Up @@ -1268,8 +1271,11 @@ public class EnglishDateTime {
.put("30th", 30)
.put("31st", 31)
.put("01st", 1)
.put("01th", 1)
.put("02nd", 2)
.put("02th", 2)
.put("03rd", 3)
.put("03th", 3)
.put("04th", 4)
.put("05th", 5)
.put("06th", 6)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,16 @@ public class SpanishDateTime {

public static final String PmTimeRegex = "(?<pm>(esta|(por|de|a|en)\\s+la)\\s+(tarde|noche))";

public static final String NightTimeRegex = "(noche)";

public static final String LastNightTimeRegex = "(anoche)";

public static final String NowTimeRegex = "(ahora|mismo|momento)";

public static final String RecentlyTimeRegex = "(mente)";

public static final String AsapTimeRegex = "(posible|pueda[ns]?|podamos)";

public static final String LessThanOneHour = "(?<lth>((\\s+y\\s+)?cuarto|(\\s*)menos cuarto|(\\s+y\\s+)media|{BaseDateTime.DeltaMinuteRegex}(\\s+(minutos?|mins?))|{DeltaMinuteNumRegex}(\\s+(minutos?|mins?))))"
.replace("{BaseDateTime.DeltaMinuteRegex}", BaseDateTime.DeltaMinuteRegex)
.replace("{DeltaMinuteNumRegex}", DeltaMinuteNumRegex);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ public class SpanishDateTimeParserConfiguration extends BaseOptionsConfiguration
public final Pattern nowRegex;
public final Pattern amTimeRegex;
public final Pattern pmTimeRegex;
public final Pattern lastNightTimeRegex;
public final Pattern simpleTimeOfTodayAfterRegex;
public final Pattern simpleTimeOfTodayBeforeRegex;
public final Pattern specificTimeOfDayRegex;
Expand Down Expand Up @@ -80,6 +81,7 @@ public SpanishDateTimeParserConfiguration(ICommonDateTimeParserConfiguration con

pmTimeRegex = RegExpUtility.getSafeRegExp(SpanishDateTime.PmRegex);
amTimeRegex = RegExpUtility.getSafeRegExp(SpanishDateTime.AmTimeRegex);
lastNightTimeRegex = RegExpUtility.getSafeRegExp(SpanishDateTime.LastNightTimeRegex);
}

@Override
Expand Down Expand Up @@ -122,12 +124,18 @@ public int getSwiftDay(String text) {
Matcher regexMatcher = SpanishDatePeriodParserConfiguration.previousPrefixRegex.matcher(trimmedText);

int swift = 0;

if (regexMatcher.find()) {
swift = 1;
swift = -1;
} else {
regexMatcher = SpanishDatePeriodParserConfiguration.nextPrefixRegex.matcher(trimmedText);
regexMatcher = this.lastNightTimeRegex.matcher(trimmedText);
if (regexMatcher.find()) {
swift = -1;
} else {
regexMatcher = SpanishDatePeriodParserConfiguration.nextPrefixRegex.matcher(trimmedText);
if (regexMatcher.find()) {
swift = 1;
}
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,7 @@ public int getSwiftPrefix(String text) {
Matcher regexMatcher = regex.matcher(trimmedText);

int swift = 0;
if (regexMatcher.find() || trimmedText.equals("anoche")) {
if (regexMatcher.find() || trimmedText.startsWith("anoche")) {
swift = -1;
} else {
regex = Pattern.compile(SpanishDateTime.NextPrefixRegex);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package com.microsoft.recognizers.text.datetime.utilities;

import com.google.common.collect.ImmutableMap;
import com.microsoft.recognizers.datatypes.timex.expression.TimexHelpers;
import com.microsoft.recognizers.text.datetime.Constants;
import com.microsoft.recognizers.text.datetime.DatePeriodTimexType;
import com.microsoft.recognizers.text.datetime.DateTimeResolutionKey;
Expand All @@ -14,6 +15,7 @@
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

public class TimexUtility {
private static final HashMap<DatePeriodTimexType, String> DatePeriodTimexTypeToTimexSuffix = new HashMap<DatePeriodTimexType, String>() {
Expand All @@ -28,33 +30,14 @@ public class TimexUtility {
public static String generateCompoundDurationTimex(Map<String, String> unitToTimexComponents, ImmutableMap<String, Long> unitValueMap) {
List<String> unitList = new ArrayList<>(unitToTimexComponents.keySet());
unitList.sort((x, y) -> unitValueMap.get(x) < unitValueMap.get(y) ? 1 : -1);
boolean isTimeDurationAlreadyExist = false;
StringBuilder timexBuilder = new StringBuilder(Constants.GeneralPeriodPrefix);

for (String unitKey : unitList) {
String timexComponent = unitToTimexComponents.get(unitKey);

// The Time Duration component occurs first time
if (!isTimeDurationAlreadyExist && isTimeDurationTimex(timexComponent)) {
timexBuilder.append(Constants.TimeTimexPrefix);
timexBuilder.append(getDurationTimexWithoutPrefix(timexComponent));
isTimeDurationAlreadyExist = true;
} else {
timexBuilder.append(getDurationTimexWithoutPrefix(timexComponent));
}
}
return timexBuilder.toString();
unitList = unitList.stream().map(t -> unitToTimexComponents.get(t)).collect(Collectors.toList());
return TimexHelpers.generateCompoundDurationTimex(unitList);
}

private static boolean isTimeDurationTimex(String timex) {
private static Boolean isTimeDurationTimex(String timex) {
return timex.startsWith(Constants.GeneralPeriodPrefix + Constants.TimeTimexPrefix);
}

private static String getDurationTimexWithoutPrefix(String timex) {
// Remove "PT" prefix for TimeDuration, Remove "P" prefix for DateDuration
return timex.substring(isTimeDurationTimex(timex) ? 2 : 1);
}

public static String getDatePeriodTimexUnitCount(LocalDateTime begin, LocalDateTime end,
DatePeriodTimexType timexType, Boolean equalDurationLength) {
String unitCount = "XX";
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

package com.microsoft.recognizers.datatypes.timex.expression;

public class Constants {

// Timex
public static final String TIMEX_YEAR = "Y";
public static final String TIMEX_MONTH = "M";
public static final String TIMEX_MONTH_FULL = "MON";
public static final String TIMEX_WEEK = "W";
public static final String TIMEX_DAY = "D";
public static final String TIMEX_BUSINESS_DAY = "BD";
public static final String TIMEX_WEEKEND = "WE";
public static final String TIMEX_HOUR = "H";
public static final String TIMEX_MINUTE = "M";
public static final String TIMEX_SECOND = "S";
public static final String TIMEX_NIGHT = "NI";
public static final Character TIMEX_FUZZY = 'X';
public static final String TIMEX_FUZZY_YEAR = "XXXX";
public static final String TIMEX_FUZZY_MONTH = "XX";
public static final String TIMEX_FUZZY_WEEK = "WXX";
public static final String TIMEX_FUZZY_DAY = "XX";
public static final String DATE_TIMEX_CONNECTOR = "-";
public static final String TIME_TIMEX_CONNECTOR = ":";
public static final String GENERAL_PERIOD_PREFIX = "P";
public static final String TIME_TIMEX_PREFIX = "T";

public static final String YEAR_UNIT = "year";
public static final String MONTH_UNIT = "month";
public static final String WEEK_UNIT = "week";
public static final String DAY_UNIT = "day";
public static final String HOUR_UNIT = "hour";
public static final String MINUTE_UNIT = "minute";
public static final String SECOND_UNIT = "second";
public static final String TIME_DURATION_UNIT = "s";

public static final String AM = "AM";
public static final String PM = "PM";

public static final int INVALID_VALUE = -1;

public static class TimexTypes {
public static final String PRESENT = "present";
public static final String DEFINITE = "definite";
public static final String DATE = "date";
public static final String DATE_TIME = "datetime";
public static final String DATE_RANGE = "daterange";
public static final String DURATION = "duration";
public static final String TIME = "time";
public static final String TIME_RANGE = "timerange";
public static final String DATE_TIME_RANGE = "datetimerange";
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

package com.microsoft.recognizers.datatypes.timex.expression;

import java.time.LocalDateTime;

public class DateRange {
private LocalDateTime start;
private LocalDateTime end;

public LocalDateTime getStart() {
return start;
}

public void setStart(LocalDateTime withStart) {
this.start = withStart;
}

public LocalDateTime getEnd() {
return end;
}

public void setEnd(LocalDateTime withEnd) {
this.end = withEnd;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

package com.microsoft.recognizers.datatypes.timex.expression;

import java.util.ArrayList;
import java.util.List;

public class Resolution {
private List<Entry> values;

public List<Entry> getValues() {
return this.values;
}

public Resolution() {
this.values = new ArrayList<Entry>();
}

public static class Entry {
private String timex;

private String type;

private String value;

private String start;

private String end;

public String getTimex() {
return timex;
}

public void setTimex(String withTimex) {
this.timex = withTimex;
}

public String getType() {
return type;
}

public void setType(String withType) {
this.type = withType;
}

public String getValue() {
return value;
}

public void setValue(String withValue) {
this.value = withValue;
}

public String getStart() {
return start;
}

public void setStart(String withStart) {
this.start = withStart;
}

public String getEnd() {
return end;
}

public void setEnd(String withEnd) {
this.end = withEnd;
}
}
}
Loading