Additional coverage & error messages (#74)

Simplifies the tokenizer in one case and adds a backslash to bad unicode error messages, otherwise just coverage.
robsimmons · Nov 25, 2024 · d1aeb10 · d1aeb10
1 parent f564247
commit d1aeb10
Show file tree

Hide file tree

Showing 3 changed files with 135 additions and 104 deletions.
diff --git a/src/client.test.ts b/src/client.test.ts
@@ -1,5 +1,5 @@
 import { test, expect } from 'vitest';
-import { Dusa, termToString, compareTerms } from './client.js';
+import { Dusa, termToString, compareTerms, DusaError } from './client.js';
 
 function solutions(dusa: Dusa, pred: string = 'res') {
   const sols: string[] = [];
@@ -14,6 +14,17 @@ function solutions(dusa: Dusa, pred: string = 'res') {
   return sols.toSorted(new Intl.Collator('en').compare);
 }
 
+function runForDusaError(program: string) {
+  try {
+    new Dusa(program);
+  } catch (e) {
+    if (e instanceof DusaError) {
+      return e.issues.map(({ msg }) => msg);
+    }
+  }
+  return null;
+}
+
 let dusa: Dusa;
 
 test('Basic operation', () => {
@@ -33,11 +44,38 @@ test('Basic operation', () => {
   dusa.assert({ name: 'a', value: 'ff' });
   expect(dusa.solution).toBeNull();
 
-  expect(() => new Dusa("a is '.")).toThrow();
+  expect(runForDusaError("a is '.")).toStrictEqual(["Unexpected symbol '''"]);
+  expect(runForDusaError('a is ".')).toStrictEqual(['End of string not found at end of input']);
+  expect(runForDusaError('a is ".\n')).toStrictEqual(['End of string not found at end of line']);
+});
+
+test('String escapes', () => {
+  expect(
+    solutions(new Dusa('res "\\0\\b\\f\\n\\r\\t\\v\\\'\\"\\\\\\x12\\u{12}\\u{2601}".')),
+  ).toStrictEqual(['res "\\x00\\x08\\x0c\\n\\x0d\\x09\\x0b\'\\"\\\\\\x12\\x12\\u{2601}"']);
+
+  expect(runForDusaError('a is "\\u{d901}".\n')).toStrictEqual([
+    'Cannot encode lone surrogate \\u{d901}',
+  ]);
+  expect(runForDusaError('a is "\\u{999999999}".\n')).toStrictEqual([
+    'Bad Unicode code point \\u{999999999}',
+  ]);
+  expect(runForDusaError('a is "\\q".\n')).toStrictEqual(['Invalid escape sequence \\q']);
+  expect(runForDusaError('a is "\\\n')).toStrictEqual(['Backslash not supported at end of line']);
+});
+
+test('Parse errors', () => {
+  expect(runForDusaError("a'")).toStrictEqual(["Invalid identifier 'a''"]);
+  expect(runForDusaError('a')).toStrictEqual([
+    "Expected to find ':-', but instead reached the end of input.",
+  ]);
+  expect(runForDusaError('a is {}.')).toStrictEqual([
+    'Expected to find a term here, but no term found.',
+  ]);
 });
 
 test('Exhaustive choices', () => {
-  dusa = new Dusa('p a is { tt, ff }.\np b is { tt, ff }.');
+  dusa = new Dusa('  p a is { tt, ff }.\n  p b is { tt, ff }.');
   expect(dusa.solution).not.toBeNull();
   expect([...dusa].length).toBe(4);
   expect(solutions(dusa, 'p')).toStrictEqual([

diff --git a/src/language/dusa-parser.ts b/src/language/dusa-parser.ts
@@ -89,13 +89,15 @@ function mkStream<T>(xs: T[]): ImperativeStream<T> {
 
 function force(t: ImperativeStream<Token>, type: string): Token {
   const tok = t.next();
-  if (tok === null)
+  if (tok === null) {
     throw new DusaSyntaxError(`Expected to find '${type}', but instead reached the end of input.`);
-  if (tok.type !== type)
+  }
+  if (tok.type !== type) {
     throw new DusaSyntaxError(
       `Expected to find '${type}', but instead found '${tok.type}'.`,
       tok.loc,
     );
+  }
   return tok;
 }
 

diff --git a/src/language/dusa-tokenizer.ts b/src/language/dusa-tokenizer.ts
@@ -125,116 +125,111 @@ export const dusaTokenizer: StreamParser<ParserState, Token> = {
           };
         }
 
-        if (stream.eat('\\')) {
-          if (
-            (tok = stream.eat(/^([0bfnrtv'"\\]|x[0-9a-fA-F][0-9a-fA-F]|u\{[0-9a-fA-F]{1,6}\})/))
-          ) {
-            switch (tok[0]) {
-              case '0':
-                tok = '\0';
-                break;
-              case 'b':
-                tok = '\b';
-                break;
-              case 'f':
-                tok = '\f';
-                break;
-              case 'n':
-                tok = '\n';
-                break;
-              case 'r':
-                tok = '\r';
-                break;
-              case 't':
-                tok = '\t';
-                break;
-              case 'v':
-                tok = '\v';
-                break;
-              case "'":
-                tok = "'";
-                break;
-              case '"':
-                tok = '"';
-                break;
-              case '\\':
-                tok = '\\';
-                break;
-              case 'x':
-                tok = String.fromCharCode(parseInt(tok.slice(1), 16));
+        stream.eat('\\'); // Expected to always return non-null
+        if ((tok = stream.eat(/^([0bfnrtv'"\\]|x[0-9a-fA-F][0-9a-fA-F]|u\{[0-9a-fA-F]+\})/))) {
+          switch (tok[0]) {
+            case '0':
+              tok = '\0';
+              break;
+            case 'b':
+              tok = '\b';
+              break;
+            case 'f':
+              tok = '\f';
+              break;
+            case 'n':
+              tok = '\n';
+              break;
+            case 'r':
+              tok = '\r';
+              break;
+            case 't':
+              tok = '\t';
+              break;
+            case 'v':
+              tok = '\v';
+              break;
+            case "'":
+              tok = "'";
+              break;
+            case '"':
+              tok = '"';
+              break;
+            case '\\':
+              tok = '\\';
+              break;
+            case 'x':
+              tok = String.fromCharCode(parseInt(tok.slice(1), 16));
+              break;
+            default: {
+              // case 'u'
+              const charCode = parseInt(tok.slice(2, tok.length - 1), 16);
+              if (0xd800 <= charCode && charCode < 0xe000) {
+                return {
+                  state,
+                  issues: [
+                    {
+                      type: 'Issue',
+                      msg: `Cannot encode lone surrogate \\${tok}`,
+                      severity: 'error',
+                      loc: stream.matchedLocation(),
+                    },
+                  ],
+                };
+              }
+              if (charCode > 0x10ffff) {
+                return {
+                  state,
+                  issues: [
+                    {
+                      type: 'Issue',
+                      msg: `Bad Unicode code point \\${tok}`,
+                      severity: 'error',
+                      loc: stream.matchedLocation(),
+                    },
+                  ],
+                };
+              } else {
+                tok = String.fromCodePoint(charCode);
                 break;
-              default: {
-                // case 'u'
-                const charCode = parseInt(tok.slice(2, tok.length - 1), 16);
-                if (0xd800 <= charCode && charCode < 0xe000) {
-                  return {
-                    state,
-                    issues: [
-                      {
-                        type: 'Issue',
-                        msg: `Cannot encode lone surrogate ${tok}`,
-                        severity: 'error',
-                        loc: stream.matchedLocation(),
-                      },
-                    ],
-                  };
-                }
-                if (charCode > 0x10ffff) {
-                  return {
-                    state,
-                    issues: [
-                      {
-                        type: 'Issue',
-                        msg: `Bad Unicode code point ${tok}`,
-                        severity: 'error',
-                        loc: stream.matchedLocation(),
-                      },
-                    ],
-                  };
-                } else {
-                  tok = String.fromCodePoint(charCode);
-                  break;
-                }
               }
             }
-            return {
-              state: {
-                ...state,
-                collected: state.collected + tok,
-                end: stream.matchedLocation().end,
-              },
-              tag: 'escape',
-            };
-          }
-          if ((tok = stream.eat(/^./))) {
-            return {
-              state,
-              tag: 'invalid',
-              issues: [
-                {
-                  type: 'Issue',
-                  msg: `Invalid escape sequence \\${tok}`,
-                  severity: 'error',
-                  loc: stream.matchedLocation(),
-                },
-              ],
-            };
           }
           return {
-            state: { type: 'Normal' },
+            state: {
+              ...state,
+              collected: state.collected + tok,
+              end: stream.matchedLocation().end,
+            },
+            tag: 'escape',
+          };
+        }
+        if ((tok = stream.eat(/^./))) {
+          return {
+            state,
             tag: 'invalid',
             issues: [
               {
                 type: 'Issue',
-                msg: 'Backslash not supported at end of line',
+                msg: `Invalid escape sequence \\${tok}`,
                 severity: 'error',
                 loc: stream.matchedLocation(),
               },
             ],
           };
         }
-
-        throw new Error('Expected-to-be-unimpossible state in string parsing reached');
+        return {
+          state: { type: 'Normal' },
+          tag: 'invalid',
+          issues: [
+            {
+              type: 'Issue',
+              msg: 'Backslash not supported at end of line',
+              severity: 'error',
+              loc: stream.matchedLocation(),
+            },
+          ],
+        };
 
       case 'Normal':
         if ((tok = stream.eat('#'))) {
@@ -288,10 +283,6 @@ export const dusaTokenizer: StreamParser<ParserState, Token> = {
           }
         }
 
-        if (stream.eat(/^\s+/)) {
-          return { state };
-        }
-
         if ((tok = stream.eat(META_ID_TOKEN) ?? stream.eat(META_NUM_TOKEN))) {
           if (tok === 'is') {
             if (stream.eat('?')) {