Skip to content

Commit

Permalink
New: improve parsing performance by caching complied regex
Browse files Browse the repository at this point in the history
  • Loading branch information
Wanasit Tanakitrungruang committed Mar 13, 2021
1 parent 98815b5 commit 036f7aa
Show file tree
Hide file tree
Showing 4 changed files with 65 additions and 17 deletions.
11 changes: 10 additions & 1 deletion src/common/parsers/AbstractParserWithWordBoundary.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,18 @@ export abstract class AbstractParserWithWordBoundaryChecking implements Parser {
match: RegExpMatchArray
): ParsingComponents | ParsingResult | { [c in Component]?: number } | null;

private cachedInnerPattern?: RegExp = null;
private cachedPattern?: RegExp = null;

pattern(context: ParsingContext): RegExp {
const innerPattern = this.innerPattern(context);
return new RegExp(`(\\W|^)${innerPattern.source}`, innerPattern.flags);
if (innerPattern == this.cachedInnerPattern) {
return this.cachedPattern;
}

this.cachedPattern = new RegExp(`(\\W|^)${innerPattern.source}`, innerPattern.flags);
this.cachedInnerPattern = innerPattern;
return this.cachedPattern;
}

extract(context: ParsingContext, match: RegExpMatchArray) {
Expand Down
42 changes: 39 additions & 3 deletions src/common/parsers/AbstractTimeExpressionParser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ function primaryTimePattern(primaryPrefix: string, primarySuffix: string) {
}

// prettier-ignore
function followingTimeExpression(followingPhase: string, followingSuffix: string) {
function followingTimePatten(followingPhase: string, followingSuffix: string) {
return new RegExp(
`^(${followingPhase})` +
"(\\d{1,4})" +
Expand Down Expand Up @@ -67,7 +67,7 @@ export abstract class AbstractTimeExpressionParser implements Parser {
}

pattern(context: ParsingContext): RegExp {
return primaryTimePattern(this.primaryPrefix(), this.primarySuffix());
return this.getPrimaryTimePatternThroughCache();
}

extract(context: ParsingContext, match: RegExpMatchArray): ParsingResult {
Expand All @@ -85,7 +85,7 @@ export abstract class AbstractTimeExpressionParser implements Parser {
}

const remainingText = context.text.substring(match.index + match[0].length);
const followingPattern = followingTimeExpression(this.followingPhase(), this.followingSuffix());
const followingPattern = this.getFollowingTimePatternThroughCache();
match = followingPattern.exec(remainingText);
if (
!match ||
Expand Down Expand Up @@ -335,4 +335,40 @@ export abstract class AbstractTimeExpressionParser implements Parser {

return result;
}

private cachedPrimaryPrefix = null;
private cachedPrimarySuffix = null;
private cachedPrimaryTimePattern = null;

getPrimaryTimePatternThroughCache() {
const primaryPrefix = this.primaryPrefix();
const primarySuffix = this.primarySuffix();

if (this.cachedPrimaryPrefix === primaryPrefix && this.cachedPrimarySuffix === primarySuffix) {
return this.cachedPrimaryTimePattern;
}

this.cachedPrimaryTimePattern = primaryTimePattern(primaryPrefix, primarySuffix);
this.cachedPrimaryPrefix = primaryPrefix;
this.cachedPrimarySuffix = primarySuffix;
return this.cachedPrimaryTimePattern;
}

private cachedFollowingPhase = null;
private cachedFollowingSuffix = null;
private cachedFollowingTimePatten = null;

getFollowingTimePatternThroughCache() {
const followingPhase = this.followingPhase();
const followingSuffix = this.followingSuffix();

if (this.cachedFollowingPhase === followingPhase && this.cachedFollowingSuffix === followingSuffix) {
return this.cachedFollowingTimePatten;
}

this.cachedFollowingTimePatten = followingTimePatten(followingPhase, followingSuffix);
this.cachedFollowingPhase = followingPhase;
this.cachedFollowingSuffix = followingSuffix;
return this.cachedFollowingTimePatten;
}
}
7 changes: 4 additions & 3 deletions src/common/refiners/ExtractTimezoneAbbrRefiner.ts
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,8 @@ export default class ExtractTimezoneAbbrRefiner implements Refiner {
}

refine(context: ParsingContext, results: ParsingResult[]): ParsingResult[] {
const timezones = { ...this.timezone, ...context.option.timezones };
const timezoneOverrides = context.option.timezones ?? {};

results.forEach((result) => {
const suffix = context.text.substring(result.index + result.text.length);
const match = TIMEZONE_NAME_PATTERN.exec(suffix);
Expand All @@ -214,11 +215,11 @@ export default class ExtractTimezoneAbbrRefiner implements Refiner {
}

const timezoneAbbr = match[1].toUpperCase();
if (timezones[timezoneAbbr] === undefined) {
const extractedTimezoneOffset = timezoneOverrides[timezoneAbbr] ?? this.timezone[timezoneAbbr] ?? null;
if (extractedTimezoneOffset === null) {
return;
}

const extractedTimezoneOffset = timezones[timezoneAbbr];
context.debug(() => {
console.log(`Extracting timezone: '${timezoneAbbr}' into : ${extractedTimezoneOffset}`);
});
Expand Down
22 changes: 12 additions & 10 deletions src/locales/en/parsers/ENTimeUnitWithinFormatParser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,20 @@ import { ParsingContext } from "../../../chrono";
import { ParsingComponents } from "../../../results";
import { AbstractParserWithWordBoundaryChecking } from "../../../common/parsers/AbstractParserWithWordBoundary";

const PATTERN_WITH_PREFIX = new RegExp(
`(?:within|in|for)\\s*` +
`(?:(?:about|around|roughly|approximately|just)\\s*(?:~\\s*)?)?(${TIME_UNITS_PATTERN})(?=\\W|$)`,
"i"
);

const PATTERN_WITHOUT_PREFIX = new RegExp(
`(?:(?:about|around|roughly|approximately|just)\\s*(?:~\\s*)?)?(${TIME_UNITS_PATTERN})(?=\\W|$)`,
"i"
);

export default class ENTimeUnitWithinFormatParser extends AbstractParserWithWordBoundaryChecking {
innerPattern(context: ParsingContext): RegExp {
const prefix = context.option.forwardDate ? "" : "(?:within|in|for)\\s*";
return new RegExp(
prefix +
`(?:(?:about|around|roughly|approximately|just)\\s*(?:~\\s*)?)?` +
"(" +
TIME_UNITS_PATTERN +
")" +
`(?=\\W|$)`,
"i"
);
return context.option.forwardDate ? PATTERN_WITHOUT_PREFIX : PATTERN_WITH_PREFIX;
}

innerExtract(context: ParsingContext, match: RegExpMatchArray): ParsingComponents {
Expand Down

0 comments on commit 036f7aa

Please sign in to comment.