From e0985b0b17531bc5f49a23ebae935c191356b82e Mon Sep 17 00:00:00 2001
From: Mike Brashler <mike.brashler@gmail.com>
Date: Fri, 26 Jul 2024 15:40:27 -0700
Subject: [PATCH 1/4] Add Util/Lexer.pas

---
 MorseRunner.dpr   |   3 +-
 MorseRunner.dproj |   7 ++
 Util/Lexer.pas    | 226 ++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 235 insertions(+), 1 deletion(-)
 create mode 100644 Util/Lexer.pas
diff --git a/MorseRunner.dpr b/MorseRunner.dpr
index f379004..d3c6d07 100644
--- a/MorseRunner.dpr
+++ b/MorseRunner.dpr
@@ -47,7 +47,8 @@ uses
   ACAG in 'ACAG.pas',
   IaruHf in 'IaruHf.pas',
   ExchFields in 'ExchFields.pas',
-  SerNRGen in 'SerNRGen.pas';
+  SerNRGen in 'SerNRGen.pas',
+  Lexer in 'Util\Lexer.pas';
 
 {$R *.RES}
 
diff --git a/MorseRunner.dproj b/MorseRunner.dproj
index 563f39b..b8dbfb9 100644
--- a/MorseRunner.dproj
+++ b/MorseRunner.dproj
@@ -166,6 +166,7 @@
         <DCCReference Include="IaruHf.pas"/>
         <DCCReference Include="ExchFields.pas"/>
         <DCCReference Include="SerNRGen.pas"/>
+        <DCCReference Include="Util\Lexer.pas"/>
         <BuildConfiguration Include="Base">
             <Key>Base</Key>
         </BuildConfiguration>
@@ -279,6 +280,12 @@
                     </Platform>
                 </DeployFile>
                 <DeployFile LocalName="MorseRunner.exe" Configuration="Debug" Class="ProjectOutput"/>
+                <DeployFile LocalName="MorseRunner.exe" Configuration="Debug" Class="ProjectOutput">
+                    <Platform Name="Win32">
+                        <RemoteName>MorseRunner.exe</RemoteName>
+                        <Overwrite>true</Overwrite>
+                    </Platform>
+                </DeployFile>
                 <DeployFile LocalName="MorseRunner.exe" Configuration="Release" Class="ProjectOutput"/>
                 <DeployClass Name="AdditionalDebugSymbols">
                     <Platform Name="OSX32">
diff --git a/Util/Lexer.pas b/Util/Lexer.pas
new file mode 100644
index 0000000..83c8753
--- /dev/null
+++ b/Util/Lexer.pas
@@ -0,0 +1,226 @@
+unit Lexer;
+
+interface
+
+uses
+  Generics.Defaults,
+  Generics.Collections,   // for TList<>
+  SysUtils,               // Exception
+  PerlRegEx;      // for regular expression support (TPerlRegEx, TPerlRegExList)
+
+type
+  {
+    Lexer rules are defined as a pair, consisting of a RegEx string and a
+    corresponding type. An array of TTokenRuleDef records will be passed
+    into TLexer<T>.Create.
+
+    Example:
+      LexerRules: array[0..2] of TTokenRuleDef = (
+        (R: '[A-Z]+';               T: Ord(ttAlpha)),
+        (R: '\d+';                  T: Ord(ttNumeric)),
+        (R: '[A-Z][A-Z\d]*';        T: Ord(ttAlphaNumeric))
+      );
+
+    Perl-Compatible Regular Expressions ...
+    - https://pcre.org/original/doc/html/index.html
+    - https://pcre.org/original/doc/html/pcrepattern.html#SEC27
+  }
+  TTokenRuleDef = record
+    R: PCREString;
+    T: Integer;
+  end;
+
+  {
+    Returned by TLexer.NextToken(out tok: TExchToken).
+  }
+  TExchToken = record
+    TokenType: Integer;
+    Value: string;
+    Pos: integer;
+
+    procedure Init(AType: Integer; aValue: string; aPos: integer);
+  end;
+
+  {
+    A simple regex-based lexer/tokenizer.
+
+    The basic idea is to search a set of rules (regular expressions) looking
+    for a match, where each expression represents a different token. Whitespace
+    is handled in one of two ways: it can be automatically skipped by the Lexer,
+    or user can provide additional rules to manage whitespace.
+
+    Inspiration and design is based on this article:
+      https://eli.thegreenplace.net/2013/06/25/regex-based-lexical-analysis-in-python-and-javascript
+  }
+  TLexer = class
+  private
+  protected
+    type
+      {
+        Hold a single token rule for the Lexer, including its type and
+        corresponding regular expression. A set of rules are passed to
+        the Lexer as an array of TTokenRuleDef records.
+      }
+      TTokenRule = packed record
+        tokenType: Integer;
+        regex: TPerlRegEx;
+
+        constructor init(AType: Integer; ARegEx: TPerlRegEx);
+      end;
+
+    var
+      SkipWhitespace: Boolean;
+      Pos: Integer;
+      Buf: string;
+      ReSkipWhitespace: TPerlRegEx;
+      Rules: TList<TTokenRule>;
+  public
+    type
+      ELexerError = class(SysUtils.Exception);
+      EInvalidData = class(ELexerError);
+
+    constructor Create(const ARules: array of TTokenRuleDef;
+      ASkipWhitespace: Boolean = True);
+    destructor Destroy; override;
+
+    procedure Input(const ABuf: string);
+    function NextToken(var AToken: TExchToken): Boolean; virtual;
+  end;
+
+implementation
+
+uses
+  System.Classes;
+
+constructor TLexer.TTokenRule.init(AType: Integer; ARegEx: TPerlRegEx);
+begin
+  Self.tokenType := AType;
+  Self.regex := ARegEx;
+end;
+
+procedure TExchToken.Init(AType: Integer; aValue: string; aPos: integer);
+begin
+  Self.TokenType := AType;
+  Self.Value := aValue;
+  Self.Pos := aPos;
+end;
+
+{
+  Create a Lexer...
+  ARules
+      An array of TTokenRuleDef's. Each rule contains a regex
+      and a Token type value. `Regex` is regex is the regular expression used
+      to recognize the token and `type` is the type of the token to return
+      when it's recognized.
+
+  ASkipWhitespace
+      If True, whitespace will be skipped and not reported by the lexer.
+      Otherwise, you have to specify your rules for whitespace, or it will be
+      flagged as an error.
+}
+constructor TLexer.Create(const ARules: array of TTokenRuleDef;
+  ASkipWhitespace: Boolean = True);
+var
+  Def: TTokenRuleDef;
+  Rule: TTokenRule;
+  Reg: TPerlRegEx;
+begin
+  ReSkipWhitespace := TPerlRegEx.Create;
+  Rules := TList<TTokenRule>.Create;
+
+  SkipWhitespace := ASkipWhitespace;
+  ReSkipWhitespace.Options := [preAnchored];
+  ReSkipWhitespace.RegEx := '\s*';  //'\s+';
+  ReSkipWhitespace.Compile;
+
+  for Def in ARules do
+    begin
+      Reg := TPerlRegEx.Create;
+      Reg.Options := [preAnchored];
+      Reg.RegEx := Def.R;
+      Rule.regex := Reg;
+      Rule.tokenType := Def.T;
+      Rules.Add(Rule);
+      Reg := nil;
+    end;
+end;
+
+
+destructor TLexer.Destroy;
+var
+  Rule: TTokenRule;
+begin
+  for Rule in Rules do
+    Rule.regex.Free;
+  FreeAndNil(Rules);
+  FreeAndNil(ReSkipWhitespace);
+end;
+
+
+procedure TLexer.Input(const ABuf: string);
+var
+  Rule: TTokenRule;
+begin
+  Buf := ABuf;
+  Pos := 1;
+  ReSkipWhitespace.Subject := Self.Buf;
+  ReSkipWhitespace.Start := 1;
+  ReSkipWhitespace.Stop := Self.Buf.Length;
+  for Rule in Rules do
+    begin
+      Rule.regex.Subject := Self.Buf;
+      Rule.regex.Start := 1;
+      Rule.regex.Stop := Self.Buf.Length;
+    end;
+end;
+
+
+function TLexer.NextToken(var AToken: TExchToken): Boolean;
+var
+  Rule: TTokenRule;
+  Matched: boolean;
+begin
+  Result := self.Pos <= buf.length;
+  if not Result then
+    begin
+      AToken.Init(-1, self.Buf, self.Pos);
+      Exit;
+    end;
+
+  if SkipWhitespace then
+    begin
+      assert(ReSkipWhitespace.Subject = self.Buf);
+      assert(ReSkipWhitespace.Stop = Self.Buf.Length);
+      ReSkipWhitespace.Start := self.Pos;
+      if ReSkipWhitespace.MatchAgain then
+        self.Pos := ReSkipWhitespace.Start;
+
+      Result := self.Pos <= buf.length;
+      if not Result then
+        begin
+          AToken.Init(-1, self.Buf, self.Pos);
+          Exit;
+        end;
+    end;
+
+  for Rule in Rules do
+    begin
+      assert(Rule.regex.Subject = Self.Buf);
+      assert(Rule.regex.Stop = Self.Buf.Length);
+      Rule.regex.Start := Self.Pos;
+      Result := Rule.regex.MatchAgain;
+      if Result then
+        begin
+          AToken.Init(Rule.tokenType, Rule.regex.MatchedText, Self.Pos);
+          Self.Pos := Rule.regex.Start;
+          Exit;
+        end;
+    end;
+
+  // if we're here, no rule matched
+  raise EInvalidData.CreateFmt('Invalid data (%s) at position %d',
+    [Self.Buf.Substring(Self.Pos-1,1), Self.Pos]);
+end;
+
+
+end.

From 0b45b0d89cbc068a400402f0b7cc50e5b4c6b98e Mon Sep 17 00:00:00 2001
From: Mike Brashler <mike.brashler@gmail.com>
Date: Thu, 22 Aug 2024 15:53:48 -0700
Subject: [PATCH 2/4] Update comments

---
 Util/Lexer.pas | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/Util/Lexer.pas b/Util/Lexer.pas
index 83c8753..70868d1 100644
--- a/Util/Lexer.pas
+++ b/Util/Lexer.pas
@@ -12,13 +12,14 @@ interface
   {
     Lexer rules are defined as a pair, consisting of a RegEx string and a
     corresponding type. An array of TTokenRuleDef records will be passed
-    into TLexer<T>.Create.
+    into TLexer.Create.
 
     Example:
       LexerRules: array[0..2] of TTokenRuleDef = (
         (R: '[A-Z]+';               T: Ord(ttAlpha)),
         (R: '\d+';                  T: Ord(ttNumeric)),
-        (R: '[A-Z][A-Z\d]*';        T: Ord(ttAlphaNumeric))
+        (R: '+';                    T: Ord(ttPlus)),
+        (R: '-';                    T: ORd(ttMinus))
       );
 
     Perl-Compatible Regular Expressions ...
@@ -49,6 +50,9 @@   TExchToken = record
     is handled in one of two ways: it can be automatically skipped by the Lexer,
     or user can provide additional rules to manage whitespace.
 
+    The initial application of this class will be to support the ARRL
+    Sweepstakes Contest.
+
     Inspiration and design is based on this article:
       https://eli.thegreenplace.net/2013/06/25/regex-based-lexical-analysis-in-python-and-javascript
   }

From 0e9afeb37e46d26385b5bd4f25ca7b91d098659c Mon Sep 17 00:00:00 2001
From: Mike Brashler <mike.brashler@gmail.com>
Date: Thu, 22 Aug 2024 15:56:25 -0700
Subject: [PATCH 3/4] update comments

---
 Util/Lexer.pas | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Util/Lexer.pas b/Util/Lexer.pas
index 70868d1..82a247c 100644
--- a/Util/Lexer.pas
+++ b/Util/Lexer.pas
@@ -15,7 +15,7 @@ interface
     into TLexer.Create.
 
     Example:
-      LexerRules: array[0..2] of TTokenRuleDef = (
+      LexerRules: array[0..3] of TTokenRuleDef = (
         (R: '[A-Z]+';               T: Ord(ttAlpha)),
         (R: '\d+';                  T: Ord(ttNumeric)),
         (R: '+';                    T: Ord(ttPlus)),

From a8d4dc73522b7d9152c64373f083519650163f34 Mon Sep 17 00:00:00 2001
From: Mike Brashler <mike.brashler@gmail.com>
Date: Sat, 24 Aug 2024 00:22:51 -0700
Subject: [PATCH 4/4] fix compiler warnings

---
 Util/Lexer.pas | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/Util/Lexer.pas b/Util/Lexer.pas
index 82a247c..54f678a 100644
--- a/Util/Lexer.pas
+++ b/Util/Lexer.pas
@@ -167,12 +167,12 @@ procedure TLexer.Input(const ABuf: string);
 begin
   Buf := ABuf;
   Pos := 1;
-  ReSkipWhitespace.Subject := Self.Buf;
+  ReSkipWhitespace.Subject := UTF8String(Self.Buf);
   ReSkipWhitespace.Start := 1;
   ReSkipWhitespace.Stop := Self.Buf.Length;
   for Rule in Rules do
     begin
-      Rule.regex.Subject := Self.Buf;
+      Rule.regex.Subject := UTF8String(Self.Buf);
       Rule.regex.Start := 1;
       Rule.regex.Stop := Self.Buf.Length;
     end;
@@ -182,7 +182,6 @@ procedure TLexer.Input(const ABuf: string);
 function TLexer.NextToken(var AToken: TExchToken): Boolean;
 var
   Rule: TTokenRule;
-  Matched: boolean;
 begin
   Result := self.Pos <= buf.length;
   if not Result then