From d863db52fe22eb4de2ee8c04afe1ca9680bb6648 Mon Sep 17 00:00:00 2001 From: Richard Gibson Date: Mon, 23 Jan 2023 22:29:57 -0500 Subject: [PATCH 1/8] Wrap field/slot references in Fixes #95 --- src/emitter.ts | 14 ++++++++--- src/node-types.ts | 17 ++++++++++++-- src/parser.ts | 20 ++++++++++------ src/tokenizer.ts | 43 ++++++++++++++++++++++++++++++---- src/visitor.ts | 1 + test/cases/iterator-close.html | 2 +- 6 files changed, 79 insertions(+), 18 deletions(-) diff --git a/src/emitter.ts b/src/emitter.ts index 11d313a..e176240 100644 --- a/src/emitter.ts +++ b/src/emitter.ts @@ -7,6 +7,7 @@ import type { TagNode, UnderscoreNode, StarNode, + DoubleBracketsNode, OrderedListItemNode, UnorderedListItemNode, OrderedListNode, @@ -72,6 +73,9 @@ export class Emitter { case 'tilde': this.emitTilde(node); break; + case 'double-brackets': + this.emitFieldOrSlot(node); + break; case 'comment': case 'tag': case 'opaqueTag': @@ -125,6 +129,10 @@ export class Emitter { this.str += `${node.contents}`; } + emitFieldOrSlot(node: DoubleBracketsNode) { + this.wrapFragment('var', node.contents, ' class="field"'); + } + emitTag(tag: OpaqueTagNode | CommentNode | TagNode) { this.str += tag.contents; } @@ -159,9 +167,9 @@ export class Emitter { this.str += '>' + pipe.nonTerminal + ''; } - wrapFragment(wrapping: string, fragment: Node[]) { - this.str += `<${wrapping}>`; + wrapFragment(tagName: string, fragment: Node[], attrs: string = '') { + this.str += `<${tagName}${attrs}>`; this.emitFragment(fragment); - this.str += ``; + this.str += ``; } } diff --git a/src/node-types.ts b/src/node-types.ts index 253a8f8..81626d6 100644 --- a/src/node-types.ts +++ b/src/node-types.ts @@ -21,7 +21,7 @@ export type EOFToken = { location: LocationRange; }; -export type Format = 'star' | 'underscore' | 'tick' | 'pipe' | 'tilde'; +export type Format = 'star' | 'underscore' | 'tick' | 'pipe' | 'tilde' | 'double-brackets'; export type FormatToken = { name: Format; @@ -168,7 +168,19 @@ export type PipeNode = { location: LocationRange; }; -export type FormatNode = StarNode | UnderscoreNode | TickNode | TildeNode | PipeNode; +export type DoubleBracketsNode = { + name: 'double-brackets'; + contents: FragmentNode[]; + location: LocationRange; +}; + +export type FormatNode = + | StarNode + | UnderscoreNode + | TickNode + | TildeNode + | PipeNode + | DoubleBracketsNode; export type UnorderedListNode = { name: 'ul'; @@ -213,6 +225,7 @@ export type Node = | TextNode | StarNode | UnderscoreNode + | DoubleBracketsNode | TickNode | TildeNode | PipeNode diff --git a/src/parser.ts b/src/parser.ts index 7a109d6..fdd30e9 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -248,8 +248,8 @@ export class Parser { if (isFormatToken(tok)) { // check if format token is valid // - // tick is always valid - if (tok.name === 'tick') { + // tick and field/slot are always valid + if (tok.name === 'tick' || tok.name === 'double-brackets') { break; } @@ -289,9 +289,15 @@ export class Parser { opts: ParseFragmentOpts ): (TextNode | CommentNode | TagNode | FormatNode)[] { const startTok = this._t.next() as FormatToken; + const start = this.getPos(startTok); let contents: (TextNode | CommentNode | TagNode)[] = []; - if (format === 'underscore') { + if (format === 'double-brackets') { + // the tokenizer emits fields/slots as complete `[[...]]` tokens, which are preserved here + const location = { start, end: this.getPos() }; + const text = { name: 'text', contents: startTok.contents, location }; + return [{ name: format, contents: [text as TextNode], location }]; + } else if (format === 'underscore') { if (this._t.peek().name === 'text') { contents = [this._t.next() as TextNode]; } @@ -300,7 +306,6 @@ export class Parser { } const nextTok = this._t.peek(); - const start = this.getPos(startTok); // fragment ended but we don't have a close format. Convert this node into a text node. if (nextTok.name !== format) { @@ -395,7 +400,8 @@ function isFormatToken(tok: Token): tok is FormatToken { tok.name === 'underscore' || tok.name === 'tilde' || tok.name === 'tick' || - tok.name === 'pipe' + tok.name === 'pipe' || + tok.name === 'double-brackets' ); } @@ -417,7 +423,7 @@ function isList(tok: Token): tok is OrderedListToken | UnorderedListToken { // Backtick can work anywhere, other format tokens have more stringent requirements. // This aligns with gmd semantics. function isValidStartFormat(prev: NotEOFToken, cur: Token, next: Token) { - if (cur.name === 'tick') { + if (cur.name === 'tick' || cur.name === 'double-brackets') { return true; } @@ -425,7 +431,7 @@ function isValidStartFormat(prev: NotEOFToken, cur: Token, next: Token) { } function isValidEndFormat(prev: Token, cur: Token) { - if (cur.name === 'tick') { + if (cur.name === 'tick' || cur.name === 'double-brackets') { return true; } diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 8262a19..b5f94e6 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -1,5 +1,6 @@ import type { Unlocated, Token, AttrToken, Position } from './node-types'; +const fieldOrSlotRegexp = /^\[\[(?:[^\\\]]|\\.)+\]\]/; const tagRegexp = /^<[/!]?(\w[\w-]*)(\s+\w[\w-]*(\s*=\s*("[^"]*"|'[^']*'|[^><"'=`]+))?)*\s*>/; const commentRegexp = /^/; const attrRegexp = /^\[ *[\w-]+ *= *"(?:[^"\\\x00-\x1F]|\\["\\/bfnrt]|\\u[a-fA-F]{4})*" *(?:, *[\w-]+ *= *"(?:[^"\\\x00-\x1F]|\\["\\/bfnrt]|\\u[a-fA-F]{4})*" *)*] /; @@ -79,6 +80,13 @@ export class Tokenizer { if (chr === '\\') { out += this.scanEscape(); } else if (isChars(chr)) { + out += chr; + this.pos++; + } else if (chr === '[') { + if (this.tryScanFieldOrSlot()) { + break; + } + out += chr; this.pos++; } else if (chr === '<') { @@ -115,13 +123,20 @@ export class Tokenizer { return this.str.slice(start, this.pos); } - // does not actually consume the tag - // you should manually `this.pos += tag[0].length;` if you end up consuming it - tryScanTag() { - if (this.str[this.pos] !== '<') { + // does not actually consume the field/slot + // you should manually `this.pos += result.length;` if you end up consuming it + tryScanFieldOrSlot() { + const match = this.str.slice(this.pos).match(fieldOrSlotRegexp); + if (!match) { return; } + return match[0]; + } + + // does not actually consume the tag + // you should manually `this.pos += tag[0].length;` if you end up consuming it + tryScanTag() { const match = this.str.slice(this.pos).match(tagRegexp); if (!match) { return; @@ -297,6 +312,16 @@ export class Tokenizer { } else if (isChars(chr)) { this.enqueue({ name: 'text', contents: this.scanChars() }, start); return; + } else if (chr === '[') { + const fieldOrSlot = this.tryScanFieldOrSlot(); + if (fieldOrSlot) { + this.pos += fieldOrSlot.length; + this.enqueue({ name: 'double-brackets', contents: fieldOrSlot }, start); + return; + } + + // didn't find a valid field/slot, so fall back to text. + this.enqueue({ name: 'text', contents: this.scanChars() }, start); } else if (chr === '<') { if ( this.str[this.pos + 1] === '!' && @@ -436,5 +461,13 @@ function isChars(chr: string) { } function isFormat(chr: string) { - return chr === '*' || chr === '_' || chr === '`' || chr === '<' || chr === '|' || chr === '~'; + return ( + chr === '*' || + chr === '_' || + chr === '`' || + chr === '[' || + chr === '<' || + chr === '|' || + chr === '~' + ); } diff --git a/src/visitor.ts b/src/visitor.ts index 0ccca49..16eb94b 100644 --- a/src/visitor.ts +++ b/src/visitor.ts @@ -11,6 +11,7 @@ const childKeys = { tick: ['contents'], tilde: ['contents'], pipe: [], + 'double-brackets': ['contents'], ul: ['contents'], ol: ['contents'], 'ordered-list-item': ['contents', 'sublist'], diff --git a/test/cases/iterator-close.html b/test/cases/iterator-close.html index 381bfb6..32108d3 100644 --- a/test/cases/iterator-close.html +++ b/test/cases/iterator-close.html @@ -5,7 +5,7 @@
  • ReturnIfAbrupt(hasReturn).
    1. If hasReturn is true, then
      1. Let innerResult be Invoke(iterator, "return", ( )).
      2. -
      3. If completion.[[type]] is not throw and innerResult.[[type]] is throw, then
          +
        1. If completion.[[type]] is not throw and innerResult.[[type]] is throw, then
          1. Return innerResult.
        2. From de4b042a151e5f01918bf79ba289180171a3a2ff Mon Sep 17 00:00:00 2001 From: Richard Gibson Date: Mon, 23 Jan 2023 23:33:19 -0500 Subject: [PATCH 2/8] Restore support for ECMA-402 `[[<_var_>]]` --- src/tokenizer.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index b5f94e6..b911cf6 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -1,6 +1,6 @@ import type { Unlocated, Token, AttrToken, Position } from './node-types'; -const fieldOrSlotRegexp = /^\[\[(?:[^\\\]]|\\.)+\]\]/; +const fieldOrSlotRegexp = /^\[\[[a-zA-Z0-9_%]+\]\]/; const tagRegexp = /^<[/!]?(\w[\w-]*)(\s+\w[\w-]*(\s*=\s*("[^"]*"|'[^']*'|[^><"'=`]+))?)*\s*>/; const commentRegexp = /^/; const attrRegexp = /^\[ *[\w-]+ *= *"(?:[^"\\\x00-\x1F]|\\["\\/bfnrt]|\\u[a-fA-F]{4})*" *(?:, *[\w-]+ *= *"(?:[^"\\\x00-\x1F]|\\["\\/bfnrt]|\\u[a-fA-F]{4})*" *)*] /; From f3581d46713d638843245c4e7f04f95cc2376794 Mon Sep 17 00:00:00 2001 From: Richard Gibson Date: Tue, 24 Jan 2023 10:08:28 -0500 Subject: [PATCH 3/8] Restore support for [[%intrinsic%]] --- src/tokenizer.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tokenizer.ts b/src/tokenizer.ts index b911cf6..a71f079 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -1,6 +1,6 @@ import type { Unlocated, Token, AttrToken, Position } from './node-types'; -const fieldOrSlotRegexp = /^\[\[[a-zA-Z0-9_%]+\]\]/; +const fieldOrSlotRegexp = /^\[\[[a-zA-Z0-9_]+\]\]/; const tagRegexp = /^<[/!]?(\w[\w-]*)(\s+\w[\w-]*(\s*=\s*("[^"]*"|'[^']*'|[^><"'=`]+))?)*\s*>/; const commentRegexp = /^/; const attrRegexp = /^\[ *[\w-]+ *= *"(?:[^"\\\x00-\x1F]|\\["\\/bfnrt]|\\u[a-fA-F]{4})*" *(?:, *[\w-]+ *= *"(?:[^"\\\x00-\x1F]|\\["\\/bfnrt]|\\u[a-fA-F]{4})*" *)*] /; From a26833116bd0e6641488d6622444077a49e7453a Mon Sep 17 00:00:00 2001 From: Kevin Gibbons Date: Tue, 24 Jan 2023 10:58:30 -0800 Subject: [PATCH 4/8] add some test cases --- test/cases/formats-in-text.fragment.ecmarkdown | 2 +- test/cases/formats-in-text.fragment.html | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/test/cases/formats-in-text.fragment.ecmarkdown b/test/cases/formats-in-text.fragment.ecmarkdown index 974e424..8d79b54 100644 --- a/test/cases/formats-in-text.fragment.ecmarkdown +++ b/test/cases/formats-in-text.fragment.ecmarkdown @@ -1 +1 @@ -*star*s s*tars* _var_s v_ars_ `tick`s t`icks` |pipe|s p|ipes| ~tilde~s t~ildes~ +*star*s s*tars* _var_s v_ars_ `tick`s t`icks` |pipe|s p|ipes| ~tilde~s t~ildes~ a.[[b]] a.[[B]] a.[b] a.[[<_b_>]] a.[[%b%]] diff --git a/test/cases/formats-in-text.fragment.html b/test/cases/formats-in-text.fragment.html index 6a42d0b..1992ebc 100644 --- a/test/cases/formats-in-text.fragment.html +++ b/test/cases/formats-in-text.fragment.html @@ -1 +1 @@ -stars s*tars* vars v_ars_ ticks ticks pipes p|ipes| tildes t~ildes~ +stars s*tars* vars v_ars_ ticks ticks pipes p|ipes| tildes t~ildes~ a.[[b]] a.[[B]] a.[b] a.[[<b>]] a.[[%b%]] From 45381ba1fbfe9379926dfd0ec26c2d7c05a94e29 Mon Sep 17 00:00:00 2001 From: Kevin Gibbons Date: Tue, 24 Jan 2023 11:21:42 -0800 Subject: [PATCH 5/8] promote double-brackets node to its own type, rather than being a format node --- src/emitter.ts | 2 +- src/node-types.ts | 29 ++++++++++++++++++----------- src/parser.ts | 32 ++++++++++++++++++-------------- src/tokenizer.ts | 12 ++---------- src/visitor.ts | 2 +- 5 files changed, 40 insertions(+), 37 deletions(-) diff --git a/src/emitter.ts b/src/emitter.ts index e176240..c42adee 100644 --- a/src/emitter.ts +++ b/src/emitter.ts @@ -130,7 +130,7 @@ export class Emitter { } emitFieldOrSlot(node: DoubleBracketsNode) { - this.wrapFragment('var', node.contents, ' class="field"'); + this.str += `${node.contents}`; } emitTag(tag: OpaqueTagNode | CommentNode | TagNode) { diff --git a/src/node-types.ts b/src/node-types.ts index 81626d6..b056df7 100644 --- a/src/node-types.ts +++ b/src/node-types.ts @@ -21,7 +21,7 @@ export type EOFToken = { location: LocationRange; }; -export type Format = 'star' | 'underscore' | 'tick' | 'pipe' | 'tilde' | 'double-brackets'; +export type Format = 'star' | 'underscore' | 'tick' | 'pipe' | 'tilde'; export type FormatToken = { name: Format; @@ -47,6 +47,12 @@ export type WhitespaceToken = { location: LocationRange; }; +export type DoubleBracketsToken = { + name: 'double-brackets'; + contents: string; + location: LocationRange; +}; + export type TextToken = { name: 'text'; contents: string; @@ -96,6 +102,7 @@ export type Token = | ParabreakToken | LinebreakToken | WhitespaceToken + | DoubleBracketsToken | TextToken | CommentToken | TagToken @@ -170,17 +177,11 @@ export type PipeNode = { export type DoubleBracketsNode = { name: 'double-brackets'; - contents: FragmentNode[]; + contents: string; location: LocationRange; }; -export type FormatNode = - | StarNode - | UnderscoreNode - | TickNode - | TildeNode - | PipeNode - | DoubleBracketsNode; +export type FormatNode = StarNode | UnderscoreNode | TickNode | TildeNode | PipeNode; export type UnorderedListNode = { name: 'ul'; @@ -213,7 +214,13 @@ export type OrderedListItemNode = { location: LocationRange; }; -export type FragmentNode = TextNode | FormatNode | CommentNode | TagNode | OpaqueTagNode; +export type FragmentNode = + | TextNode + | FormatNode + | CommentNode + | TagNode + | OpaqueTagNode + | DoubleBracketsNode; export type ListNode = UnorderedListNode | OrderedListNode; @@ -223,9 +230,9 @@ export type Node = | CommentNode | AlgorithmNode | TextNode + | DoubleBracketsNode | StarNode | UnderscoreNode - | DoubleBracketsNode | TickNode | TildeNode | PipeNode diff --git a/src/parser.ts b/src/parser.ts index fdd30e9..55f622e 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -180,7 +180,12 @@ export class Parser { frag = frag.concat(f); } } - } else if (tok.name === 'comment' || tok.name === 'tag' || tok.name === 'opaqueTag') { + } else if ( + tok.name === 'comment' || + tok.name === 'tag' || + tok.name === 'opaqueTag' || + tok.name === 'double-brackets' + ) { frag.push(tok); this._t.next(); } else if (isList(tok)) { @@ -241,15 +246,20 @@ export class Parser { lastRealTok = lastWsTok; } - if (tok.name === 'opaqueTag' || tok.name === 'comment' || tok.name === 'tag') { + if ( + tok.name === 'opaqueTag' || + tok.name === 'comment' || + tok.name === 'tag' || + tok.name === 'double-brackets' + ) { break; } if (isFormatToken(tok)) { // check if format token is valid // - // tick and field/slot are always valid - if (tok.name === 'tick' || tok.name === 'double-brackets') { + // tick is always valid + if (tok.name === 'tick') { break; } @@ -292,12 +302,7 @@ export class Parser { const start = this.getPos(startTok); let contents: (TextNode | CommentNode | TagNode)[] = []; - if (format === 'double-brackets') { - // the tokenizer emits fields/slots as complete `[[...]]` tokens, which are preserved here - const location = { start, end: this.getPos() }; - const text = { name: 'text', contents: startTok.contents, location }; - return [{ name: format, contents: [text as TextNode], location }]; - } else if (format === 'underscore') { + if (format === 'underscore') { if (this._t.peek().name === 'text') { contents = [this._t.next() as TextNode]; } @@ -400,8 +405,7 @@ function isFormatToken(tok: Token): tok is FormatToken { tok.name === 'underscore' || tok.name === 'tilde' || tok.name === 'tick' || - tok.name === 'pipe' || - tok.name === 'double-brackets' + tok.name === 'pipe' ); } @@ -423,7 +427,7 @@ function isList(tok: Token): tok is OrderedListToken | UnorderedListToken { // Backtick can work anywhere, other format tokens have more stringent requirements. // This aligns with gmd semantics. function isValidStartFormat(prev: NotEOFToken, cur: Token, next: Token) { - if (cur.name === 'tick' || cur.name === 'double-brackets') { + if (cur.name === 'tick') { return true; } @@ -431,7 +435,7 @@ function isValidStartFormat(prev: NotEOFToken, cur: Token, next: Token) { } function isValidEndFormat(prev: Token, cur: Token) { - if (cur.name === 'tick' || cur.name === 'double-brackets') { + if (cur.name === 'tick') { return true; } diff --git a/src/tokenizer.ts b/src/tokenizer.ts index a71f079..38b9ade 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -457,17 +457,9 @@ function isWhitespace(chr: string) { } function isChars(chr: string) { - return !isFormat(chr) && chr !== '\n' && chr !== ' ' && chr !== '\t'; + return !isFormat(chr) && chr !== '\n' && chr !== ' ' && chr !== '\t' && chr !== '['; } function isFormat(chr: string) { - return ( - chr === '*' || - chr === '_' || - chr === '`' || - chr === '[' || - chr === '<' || - chr === '|' || - chr === '~' - ); + return chr === '*' || chr === '_' || chr === '`' || chr === '<' || chr === '|' || chr === '~'; } diff --git a/src/visitor.ts b/src/visitor.ts index 16eb94b..9c36179 100644 --- a/src/visitor.ts +++ b/src/visitor.ts @@ -6,12 +6,12 @@ const childKeys = { comment: [], algorithm: ['contents'], text: [], + 'double-brackets': [], star: ['contents'], underscore: [], tick: ['contents'], tilde: ['contents'], pipe: [], - 'double-brackets': ['contents'], ul: ['contents'], ol: ['contents'], 'ordered-list-item': ['contents', 'sublist'], From 284a75abaa58bc4d3a2035d051c2ea861da6668d Mon Sep 17 00:00:00 2001 From: Kevin Gibbons Date: Tue, 24 Jan 2023 11:25:26 -0800 Subject: [PATCH 6/8] document field syntax --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 51e4258..0e9891b 100644 --- a/README.md +++ b/README.md @@ -71,6 +71,8 @@ Inside a paragraph, list item, or header, the following inline formatting elemen **Variables** are written as `_x_` and are translated to `x`. Variables cannot contain whitespace or other formatting characters. +**Fields** are written as `[[f]]` and are translated as `[[f]]`. Field names must match `/[a-zA-Z0-9_]+/`. + **Values** are written as `*x*` and are translated to `x`. Values cannot contain asterisks. **Code** is written as `` `x` `` and is translated to `x`. Code cannot contain backticks. From 4a4f20a6a7a49be739f97c65cac488b3749b8b38 Mon Sep 17 00:00:00 2001 From: Richard Gibson Date: Wed, 25 Jan 2023 10:11:34 -0500 Subject: [PATCH 7/8] Anchor the field name regular expression --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 0e9891b..d0db4d4 100644 --- a/README.md +++ b/README.md @@ -71,7 +71,7 @@ Inside a paragraph, list item, or header, the following inline formatting elemen **Variables** are written as `_x_` and are translated to `x`. Variables cannot contain whitespace or other formatting characters. -**Fields** are written as `[[f]]` and are translated as `[[f]]`. Field names must match `/[a-zA-Z0-9_]+/`. +**Fields** are written as `[[f]]` and are translated as `[[f]]`. Field names must match regular expression `/^[a-zA-Z0-9_]+$/`. **Values** are written as `*x*` and are translated to `x`. Values cannot contain asterisks. From 1d2cdde446d32531ae08f1bf3d8f4805bf55d7ca Mon Sep 17 00:00:00 2001 From: Kevin Gibbons Date: Tue, 14 Feb 2023 22:02:03 -0800 Subject: [PATCH 8/8] exclude brackets from node contents --- src/emitter.ts | 2 +- src/tokenizer.ts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/emitter.ts b/src/emitter.ts index c42adee..590203c 100644 --- a/src/emitter.ts +++ b/src/emitter.ts @@ -130,7 +130,7 @@ export class Emitter { } emitFieldOrSlot(node: DoubleBracketsNode) { - this.str += `${node.contents}`; + this.str += `[[${node.contents}]]`; } emitTag(tag: OpaqueTagNode | CommentNode | TagNode) { diff --git a/src/tokenizer.ts b/src/tokenizer.ts index 38b9ade..2522f64 100644 --- a/src/tokenizer.ts +++ b/src/tokenizer.ts @@ -316,7 +316,7 @@ export class Tokenizer { const fieldOrSlot = this.tryScanFieldOrSlot(); if (fieldOrSlot) { this.pos += fieldOrSlot.length; - this.enqueue({ name: 'double-brackets', contents: fieldOrSlot }, start); + this.enqueue({ name: 'double-brackets', contents: fieldOrSlot.slice(2, -2) }, start); return; }