diff --git a/MorseRunner.dpr b/MorseRunner.dpr
index f379004..d3c6d07 100644
--- a/MorseRunner.dpr
+++ b/MorseRunner.dpr
@@ -47,7 +47,8 @@ uses
ACAG in 'ACAG.pas',
IaruHf in 'IaruHf.pas',
ExchFields in 'ExchFields.pas',
- SerNRGen in 'SerNRGen.pas';
+ SerNRGen in 'SerNRGen.pas',
+ Lexer in 'Util\Lexer.pas';
{$R *.RES}
diff --git a/MorseRunner.dproj b/MorseRunner.dproj
index 563f39b..b8dbfb9 100644
--- a/MorseRunner.dproj
+++ b/MorseRunner.dproj
@@ -166,6 +166,7 @@
+
Base
@@ -279,6 +280,12 @@
+
+
+ MorseRunner.exe
+ true
+
+
diff --git a/Util/Lexer.pas b/Util/Lexer.pas
new file mode 100644
index 0000000..54f678a
--- /dev/null
+++ b/Util/Lexer.pas
@@ -0,0 +1,229 @@
+unit Lexer;
+
+interface
+
+uses
+ Generics.Defaults,
+ Generics.Collections, // for TList<>
+ SysUtils, // Exception
+ PerlRegEx; // for regular expression support (TPerlRegEx, TPerlRegExList)
+
+type
+ {
+ Lexer rules are defined as a pair, consisting of a RegEx string and a
+ corresponding type. An array of TTokenRuleDef records will be passed
+ into TLexer.Create.
+
+ Example:
+ LexerRules: array[0..3] of TTokenRuleDef = (
+ (R: '[A-Z]+'; T: Ord(ttAlpha)),
+ (R: '\d+'; T: Ord(ttNumeric)),
+ (R: '+'; T: Ord(ttPlus)),
+ (R: '-'; T: ORd(ttMinus))
+ );
+
+ Perl-Compatible Regular Expressions ...
+ - https://pcre.org/original/doc/html/index.html
+ - https://pcre.org/original/doc/html/pcrepattern.html#SEC27
+ }
+ TTokenRuleDef = record
+ R: PCREString;
+ T: Integer;
+ end;
+
+ {
+ Returned by TLexer.NextToken(out tok: TExchToken).
+ }
+ TExchToken = record
+ TokenType: Integer;
+ Value: string;
+ Pos: integer;
+
+ procedure Init(AType: Integer; aValue: string; aPos: integer);
+ end;
+
+ {
+ A simple regex-based lexer/tokenizer.
+
+ The basic idea is to search a set of rules (regular expressions) looking
+ for a match, where each expression represents a different token. Whitespace
+ is handled in one of two ways: it can be automatically skipped by the Lexer,
+ or user can provide additional rules to manage whitespace.
+
+ The initial application of this class will be to support the ARRL
+ Sweepstakes Contest.
+
+ Inspiration and design is based on this article:
+ https://eli.thegreenplace.net/2013/06/25/regex-based-lexical-analysis-in-python-and-javascript
+ }
+ TLexer = class
+ private
+ protected
+ type
+ {
+ Hold a single token rule for the Lexer, including its type and
+ corresponding regular expression. A set of rules are passed to
+ the Lexer as an array of TTokenRuleDef records.
+ }
+ TTokenRule = packed record
+ tokenType: Integer;
+ regex: TPerlRegEx;
+
+ constructor init(AType: Integer; ARegEx: TPerlRegEx);
+ end;
+
+ var
+ SkipWhitespace: Boolean;
+ Pos: Integer;
+ Buf: string;
+ ReSkipWhitespace: TPerlRegEx;
+ Rules: TList;
+ public
+ type
+ ELexerError = class(SysUtils.Exception);
+ EInvalidData = class(ELexerError);
+
+ constructor Create(const ARules: array of TTokenRuleDef;
+ ASkipWhitespace: Boolean = True);
+ destructor Destroy; override;
+
+ procedure Input(const ABuf: string);
+ function NextToken(var AToken: TExchToken): Boolean; virtual;
+ end;
+
+implementation
+
+uses
+ System.Classes;
+
+constructor TLexer.TTokenRule.init(AType: Integer; ARegEx: TPerlRegEx);
+begin
+ Self.tokenType := AType;
+ Self.regex := ARegEx;
+end;
+
+procedure TExchToken.Init(AType: Integer; aValue: string; aPos: integer);
+begin
+ Self.TokenType := AType;
+ Self.Value := aValue;
+ Self.Pos := aPos;
+end;
+
+{
+ Create a Lexer...
+ ARules
+ An array of TTokenRuleDef's. Each rule contains a regex
+ and a Token type value. `Regex` is regex is the regular expression used
+ to recognize the token and `type` is the type of the token to return
+ when it's recognized.
+
+ ASkipWhitespace
+ If True, whitespace will be skipped and not reported by the lexer.
+ Otherwise, you have to specify your rules for whitespace, or it will be
+ flagged as an error.
+}
+constructor TLexer.Create(const ARules: array of TTokenRuleDef;
+ ASkipWhitespace: Boolean = True);
+var
+ Def: TTokenRuleDef;
+ Rule: TTokenRule;
+ Reg: TPerlRegEx;
+begin
+ ReSkipWhitespace := TPerlRegEx.Create;
+ Rules := TList.Create;
+
+ SkipWhitespace := ASkipWhitespace;
+ ReSkipWhitespace.Options := [preAnchored];
+ ReSkipWhitespace.RegEx := '\s*'; //'\s+';
+ ReSkipWhitespace.Compile;
+
+ for Def in ARules do
+ begin
+ Reg := TPerlRegEx.Create;
+ Reg.Options := [preAnchored];
+ Reg.RegEx := Def.R;
+ Rule.regex := Reg;
+ Rule.tokenType := Def.T;
+ Rules.Add(Rule);
+ Reg := nil;
+ end;
+end;
+
+
+destructor TLexer.Destroy;
+var
+ Rule: TTokenRule;
+begin
+ for Rule in Rules do
+ Rule.regex.Free;
+ FreeAndNil(Rules);
+ FreeAndNil(ReSkipWhitespace);
+end;
+
+
+procedure TLexer.Input(const ABuf: string);
+var
+ Rule: TTokenRule;
+begin
+ Buf := ABuf;
+ Pos := 1;
+ ReSkipWhitespace.Subject := UTF8String(Self.Buf);
+ ReSkipWhitespace.Start := 1;
+ ReSkipWhitespace.Stop := Self.Buf.Length;
+ for Rule in Rules do
+ begin
+ Rule.regex.Subject := UTF8String(Self.Buf);
+ Rule.regex.Start := 1;
+ Rule.regex.Stop := Self.Buf.Length;
+ end;
+end;
+
+
+function TLexer.NextToken(var AToken: TExchToken): Boolean;
+var
+ Rule: TTokenRule;
+begin
+ Result := self.Pos <= buf.length;
+ if not Result then
+ begin
+ AToken.Init(-1, self.Buf, self.Pos);
+ Exit;
+ end;
+
+ if SkipWhitespace then
+ begin
+ assert(ReSkipWhitespace.Subject = self.Buf);
+ assert(ReSkipWhitespace.Stop = Self.Buf.Length);
+ ReSkipWhitespace.Start := self.Pos;
+ if ReSkipWhitespace.MatchAgain then
+ self.Pos := ReSkipWhitespace.Start;
+
+ Result := self.Pos <= buf.length;
+ if not Result then
+ begin
+ AToken.Init(-1, self.Buf, self.Pos);
+ Exit;
+ end;
+ end;
+
+ for Rule in Rules do
+ begin
+ assert(Rule.regex.Subject = Self.Buf);
+ assert(Rule.regex.Stop = Self.Buf.Length);
+ Rule.regex.Start := Self.Pos;
+ Result := Rule.regex.MatchAgain;
+ if Result then
+ begin
+ AToken.Init(Rule.tokenType, Rule.regex.MatchedText, Self.Pos);
+ Self.Pos := Rule.regex.Start;
+ Exit;
+ end;
+ end;
+
+ // if we're here, no rule matched
+ raise EInvalidData.CreateFmt('Invalid data (%s) at position %d',
+ [Self.Buf.Substring(Self.Pos-1,1), Self.Pos]);
+end;
+
+
+end.