Skip to content

Commit

Permalink
Additional coverage & error messages (#74)
Browse files Browse the repository at this point in the history
Simplifies the tokenizer in one case and adds a backslash to bad unicode
error messages, otherwise just coverage.
  • Loading branch information
robsimmons authored Nov 25, 2024
1 parent f564247 commit d1aeb10
Show file tree
Hide file tree
Showing 3 changed files with 135 additions and 104 deletions.
44 changes: 41 additions & 3 deletions src/client.test.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { test, expect } from 'vitest';
import { Dusa, termToString, compareTerms } from './client.js';
import { Dusa, termToString, compareTerms, DusaError } from './client.js';

function solutions(dusa: Dusa, pred: string = 'res') {
const sols: string[] = [];
Expand All @@ -14,6 +14,17 @@ function solutions(dusa: Dusa, pred: string = 'res') {
return sols.toSorted(new Intl.Collator('en').compare);
}

function runForDusaError(program: string) {
try {
new Dusa(program);
} catch (e) {
if (e instanceof DusaError) {
return e.issues.map(({ msg }) => msg);
}
}
return null;
}

let dusa: Dusa;

test('Basic operation', () => {
Expand All @@ -33,11 +44,38 @@ test('Basic operation', () => {
dusa.assert({ name: 'a', value: 'ff' });
expect(dusa.solution).toBeNull();

expect(() => new Dusa("a is '.")).toThrow();
expect(runForDusaError("a is '.")).toStrictEqual(["Unexpected symbol '''"]);
expect(runForDusaError('a is ".')).toStrictEqual(['End of string not found at end of input']);
expect(runForDusaError('a is ".\n')).toStrictEqual(['End of string not found at end of line']);
});

test('String escapes', () => {
expect(
solutions(new Dusa('res "\\0\\b\\f\\n\\r\\t\\v\\\'\\"\\\\\\x12\\u{12}\\u{2601}".')),
).toStrictEqual(['res "\\x00\\x08\\x0c\\n\\x0d\\x09\\x0b\'\\"\\\\\\x12\\x12\\u{2601}"']);

expect(runForDusaError('a is "\\u{d901}".\n')).toStrictEqual([
'Cannot encode lone surrogate \\u{d901}',
]);
expect(runForDusaError('a is "\\u{999999999}".\n')).toStrictEqual([
'Bad Unicode code point \\u{999999999}',
]);
expect(runForDusaError('a is "\\q".\n')).toStrictEqual(['Invalid escape sequence \\q']);
expect(runForDusaError('a is "\\\n')).toStrictEqual(['Backslash not supported at end of line']);
});

test('Parse errors', () => {
expect(runForDusaError("a'")).toStrictEqual(["Invalid identifier 'a''"]);
expect(runForDusaError('a')).toStrictEqual([
"Expected to find ':-', but instead reached the end of input.",
]);
expect(runForDusaError('a is {}.')).toStrictEqual([
'Expected to find a term here, but no term found.',
]);
});

test('Exhaustive choices', () => {
dusa = new Dusa('p a is { tt, ff }.\np b is { tt, ff }.');
dusa = new Dusa(' p a is { tt, ff }.\n p b is { tt, ff }.');
expect(dusa.solution).not.toBeNull();
expect([...dusa].length).toBe(4);
expect(solutions(dusa, 'p')).toStrictEqual([
Expand Down
6 changes: 4 additions & 2 deletions src/language/dusa-parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -89,13 +89,15 @@ function mkStream<T>(xs: T[]): ImperativeStream<T> {

function force(t: ImperativeStream<Token>, type: string): Token {
const tok = t.next();
if (tok === null)
if (tok === null) {
throw new DusaSyntaxError(`Expected to find '${type}', but instead reached the end of input.`);
if (tok.type !== type)
}
if (tok.type !== type) {
throw new DusaSyntaxError(
`Expected to find '${type}', but instead found '${tok.type}'.`,
tok.loc,
);
}
return tok;
}

Expand Down
189 changes: 90 additions & 99 deletions src/language/dusa-tokenizer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -125,116 +125,111 @@ export const dusaTokenizer: StreamParser<ParserState, Token> = {
};
}

if (stream.eat('\\')) {
if (
(tok = stream.eat(/^([0bfnrtv'"\\]|x[0-9a-fA-F][0-9a-fA-F]|u\{[0-9a-fA-F]{1,6}\})/))
) {
switch (tok[0]) {
case '0':
tok = '\0';
break;
case 'b':
tok = '\b';
break;
case 'f':
tok = '\f';
break;
case 'n':
tok = '\n';
break;
case 'r':
tok = '\r';
break;
case 't':
tok = '\t';
break;
case 'v':
tok = '\v';
break;
case "'":
tok = "'";
break;
case '"':
tok = '"';
break;
case '\\':
tok = '\\';
break;
case 'x':
tok = String.fromCharCode(parseInt(tok.slice(1), 16));
stream.eat('\\'); // Expected to always return non-null
if ((tok = stream.eat(/^([0bfnrtv'"\\]|x[0-9a-fA-F][0-9a-fA-F]|u\{[0-9a-fA-F]+\})/))) {
switch (tok[0]) {
case '0':
tok = '\0';
break;
case 'b':
tok = '\b';
break;
case 'f':
tok = '\f';
break;
case 'n':
tok = '\n';
break;
case 'r':
tok = '\r';
break;
case 't':
tok = '\t';
break;
case 'v':
tok = '\v';
break;
case "'":
tok = "'";
break;
case '"':
tok = '"';
break;
case '\\':
tok = '\\';
break;
case 'x':
tok = String.fromCharCode(parseInt(tok.slice(1), 16));
break;
default: {
// case 'u'
const charCode = parseInt(tok.slice(2, tok.length - 1), 16);
if (0xd800 <= charCode && charCode < 0xe000) {
return {
state,
issues: [
{
type: 'Issue',
msg: `Cannot encode lone surrogate \\${tok}`,
severity: 'error',
loc: stream.matchedLocation(),
},
],
};
}
if (charCode > 0x10ffff) {
return {
state,
issues: [
{
type: 'Issue',
msg: `Bad Unicode code point \\${tok}`,
severity: 'error',
loc: stream.matchedLocation(),
},
],
};
} else {
tok = String.fromCodePoint(charCode);
break;
default: {
// case 'u'
const charCode = parseInt(tok.slice(2, tok.length - 1), 16);
if (0xd800 <= charCode && charCode < 0xe000) {
return {
state,
issues: [
{
type: 'Issue',
msg: `Cannot encode lone surrogate ${tok}`,
severity: 'error',
loc: stream.matchedLocation(),
},
],
};
}
if (charCode > 0x10ffff) {
return {
state,
issues: [
{
type: 'Issue',
msg: `Bad Unicode code point ${tok}`,
severity: 'error',
loc: stream.matchedLocation(),
},
],
};
} else {
tok = String.fromCodePoint(charCode);
break;
}
}
}
return {
state: {
...state,
collected: state.collected + tok,
end: stream.matchedLocation().end,
},
tag: 'escape',
};
}
if ((tok = stream.eat(/^./))) {
return {
state,
tag: 'invalid',
issues: [
{
type: 'Issue',
msg: `Invalid escape sequence \\${tok}`,
severity: 'error',
loc: stream.matchedLocation(),
},
],
};
}
return {
state: { type: 'Normal' },
state: {
...state,
collected: state.collected + tok,
end: stream.matchedLocation().end,
},
tag: 'escape',
};
}
if ((tok = stream.eat(/^./))) {
return {
state,
tag: 'invalid',
issues: [
{
type: 'Issue',
msg: 'Backslash not supported at end of line',
msg: `Invalid escape sequence \\${tok}`,
severity: 'error',
loc: stream.matchedLocation(),
},
],
};
}

throw new Error('Expected-to-be-unimpossible state in string parsing reached');
return {
state: { type: 'Normal' },
tag: 'invalid',
issues: [
{
type: 'Issue',
msg: 'Backslash not supported at end of line',
severity: 'error',
loc: stream.matchedLocation(),
},
],
};

case 'Normal':
if ((tok = stream.eat('#'))) {
Expand Down Expand Up @@ -288,10 +283,6 @@ export const dusaTokenizer: StreamParser<ParserState, Token> = {
}
}

if (stream.eat(/^\s+/)) {
return { state };
}

if ((tok = stream.eat(META_ID_TOKEN) ?? stream.eat(META_NUM_TOKEN))) {
if (tok === 'is') {
if (stream.eat('?')) {
Expand Down

0 comments on commit d1aeb10

Please sign in to comment.