Skip to content

Commit

Permalink
- fix offsets, use relativedelta
Browse files Browse the repository at this point in the history
- handle X {time unit} earlier
- handle fractional decades/centuries/millenium
- handle decades/centuries/millenium ago
- add failing tests for extract last N {timeunit}
  • Loading branch information
JarbasAl committed Feb 23, 2022
1 parent 9128b96 commit 46518a7
Show file tree
Hide file tree
Showing 2 changed files with 189 additions and 63 deletions.
55 changes: 34 additions & 21 deletions lingua_franca/lang/parse_en.py
Original file line number Diff line number Diff line change
Expand Up @@ -641,7 +641,7 @@ def clean_string(s):
# normalize and lowercase utt (replaces words with numbers)
s = _convert_words_to_numbers_en(s, ordinals=None)
# clean unneeded punctuation and capitalization among other things.
s = s.lower().replace('?', '').replace('.', '').replace(',', '') \
s = s.lower().replace('?', '').replace(',', '') \
.replace(' the ', ' ').replace(' a ', ' ').replace(' an ', ' ') \
.replace("o' clock", "o'clock").replace("o clock", "o'clock") \
.replace("o ' clock", "o'clock").replace("o 'clock", "o'clock") \
Expand Down Expand Up @@ -696,6 +696,8 @@ def date_found():
timeQualifiersPM = ['afternoon', 'evening', 'night', 'tonight']
timeQualifiersList = set(timeQualifiersAM + timeQualifiersPM)
past_markers = ["last", "past"]
earlier_markers = ["ago", "earlier"]
later_markers = ["after", "later"]
future_markers = ["in", "within"] # in a month -> + 1 month timedelta
future_1st_markers = ["next"] # next month -> day 1 of next month
markers = ['at', 'in', 'on', 'by', 'this', 'around', 'for', 'of', "within"]
Expand Down Expand Up @@ -726,27 +728,38 @@ def date_found():
start = idx
used = 0
# save timequalifier for later
if word == "ago" and dayOffset:
if word in earlier_markers and dayOffset:
dayOffset = - dayOffset
used += 1
if word == "now" and not datestr:
elif word == "now" and not datestr:
resultStr = " ".join(words[idx + 1:])
resultStr = ' '.join(resultStr.split())
extractedDate = anchorDate.replace(microsecond=0)
return [extractedDate, resultStr]
elif wordNext in year_multiples:
multiplier = None
if is_numeric(word):
multiplier = extract_number_en(word)
try:
multiplier = float(word)
except:
multiplier = extract_number_en(word)
multiplier = multiplier or 1
_leftover = "0"
if int(multiplier) != multiplier:
multiplier, _leftover = str(multiplier).split(".")
multiplier = int(multiplier)

used += 2
if wordNext == "decade":
yearOffset = multiplier * 10
yearOffset = multiplier * 10 + int(_leftover[:1])
elif wordNext == "century":
yearOffset = multiplier * 100
yearOffset = multiplier * 100 + int(_leftover[:2]) * 10
elif wordNext == "millennium":
yearOffset = multiplier * 1000
yearOffset = multiplier * 1000 + int(_leftover[:3]) * 100

if wordNextNext in earlier_markers:
yearOffset = yearOffset * -1

# couple of
elif word == "2" and wordNext == "of" and \
wordNextNext in year_multiples:
Expand Down Expand Up @@ -797,7 +810,7 @@ def date_found():
start -= 1
used += 1
# parse 5 days, 10 weeks, last week, next week
elif word == "day" and wordNext != "ago":
elif word == "day" and wordNext not in earlier_markers:
if wordPrev and wordPrev[0].isdigit():
dayOffset += int(wordPrev)
start -= 1
Expand All @@ -813,7 +826,7 @@ def date_found():
start -= 1
used = 2
# parse X days ago
elif word == "day" and wordNext == "ago":
elif word == "day" and wordNext in earlier_markers:
if wordPrev and wordPrev[0].isdigit():
dayOffset -= int(wordPrev)
start -= 1
Expand All @@ -822,7 +835,7 @@ def date_found():
dayOffset -= 1
used = 2
# parse last/past/next week and in/after X weeks
elif word == "week" and not fromFlag and wordPrev and wordNext != "ago":
elif word == "week" and not fromFlag and wordPrev and wordNext not in earlier_markers:
if wordPrev[0].isdigit():
dayOffset += int(wordPrev) * 7
start -= 1
Expand All @@ -842,7 +855,7 @@ def date_found():
start -= 1
used = 2
# parse X weeks ago
elif word == "week" and not fromFlag and wordNext == "ago":
elif word == "week" and not fromFlag and wordNext in earlier_markers:
if wordPrev[0].isdigit():
dayOffset -= int(wordPrev) * 7
start -= 1
Expand All @@ -851,7 +864,7 @@ def date_found():
dayOffset -= 7
used = 2
# parse last/past/next weekend and in/after X weekends
elif word == "weekend" and not fromFlag and wordPrev and wordNext != "ago":
elif word == "weekend" and not fromFlag and wordPrev and wordNext not in earlier_markers:
# in/after X weekends
if wordPrev[0].isdigit():
n = int(wordPrev)
Expand Down Expand Up @@ -881,7 +894,7 @@ def date_found():
start -= 1
used = 2
# parse X weekends ago
elif word == "weekend" and not fromFlag and wordNext == "ago":
elif word == "weekend" and not fromFlag and wordNext in earlier_markers:
dayOffset -= wkday + 3 # past friday "one weekend ago"
used = 2
# X weekends ago
Expand All @@ -891,7 +904,7 @@ def date_found():
start -= 1
used = 3
# parse 10 months, next month, last month
elif word == "month" and not fromFlag and wordPrev and wordNext != "ago":
elif word == "month" and not fromFlag and wordPrev and wordNext not in earlier_markers:
if wordPrev[0].isdigit():
monthOffset = int(wordPrev)
start -= 1
Expand All @@ -911,16 +924,16 @@ def date_found():
monthOffset = -1
start -= 1
used = 2
elif word == "month" and wordNext == "ago":
elif word == "month" and wordNext in earlier_markers:
if wordPrev and wordPrev[0].isdigit():
dayOffset -= int(wordPrev) * 31
monthOffset -= int(wordPrev)
start -= 1
used = 3
else:
dayOffset -= 31
monthOffset -= 1
used = 2
# parse 5 years, next year, last year
elif word == "year" and not fromFlag and wordPrev and wordNext != "ago":
elif word == "year" and not fromFlag and wordPrev and wordNext not in earlier_markers:
if wordPrev[0].isdigit():
yearOffset = int(wordPrev)
start -= 1
Expand All @@ -940,13 +953,13 @@ def date_found():
yearOffset = -1
start -= 1
used = 2
elif word == "year" and wordNext == "ago":
elif word == "year" and wordNext in earlier_markers:
if wordPrev and wordPrev[0].isdigit():
dayOffset -= int(wordPrev) * 365
yearOffset -= int(wordPrev)
start -= 1
used = 3
else:
dayOffset -= 365
yearOffset -= 1
used = 2
# parse Monday, Tuesday, etc., and next Monday,
# last Tuesday, etc.
Expand Down
Loading

0 comments on commit 46518a7

Please sign in to comment.