From a0043f017f24662aa40333318916176de1c7eb0c Mon Sep 17 00:00:00 2001
From: David Chambers <dc@davidchambers.me>
Date: Thu, 4 Jan 2024 09:49:52 +0100
Subject: [PATCH] refactor `rewriteJs` and `rewriteCoffee`

---
 lib/doctest.js | 397 +++++++++++++++++--------------------------------
 test/index.js  |   2 +-
 2 files changed, 134 insertions(+), 265 deletions(-)

diff --git a/lib/doctest.js b/lib/doctest.js
index eb608ed..48b9e1d 100644
--- a/lib/doctest.js
+++ b/lib/doctest.js
@@ -79,8 +79,7 @@ __doctest.enqueue({
       ${formatOutput ('      ') (output.lines)};
     },
   }`},
-});
-`;
+});`;
 };
 
 const wrapCoffee = ({indent, input, output}) => `
@@ -107,290 +106,161 @@ const contiguous = line => lines => (
   line.number === lines[lines.length - 1].number + 1
 );
 
-//    Location = { start :: { line :: Integer, column :: Integer }
-//               ,   end :: { line :: Integer, column :: Integer } }
-
-//    transformComments
-//    :: { prefix :: String
-//       , openingDelimiter :: String?
-//       , closingDelimiter :: String? }
-//    -> Array { type :: String, value :: String, loc :: Location }
-//    -> Array { commentIndex :: Integer
-//             ,        input :: { lines :: Array Line, loc :: Location }
-//             ,       output :: { lines :: Array Line, loc :: Location } }
-//
-//    Returns the doctests present in the given esprima comment objects.
-//
-//    > transformComments
-//    .   ({prefix: ''})
-//    .   ([{type: 'Line',
-//    .      value: ' > 6 * 7',
-//    .      loc: {start: {line: 1, column: 0}, end: {line: 1, column: 10}}},
-//    .     {type: 'Line',
-//    .      value: ' 42',
-//    .      loc: {start: {line: 2, column: 0}, end: {line: 2, column: 5}}}])
-//    [{commentIndex: 1,
-//    . input: {lines: [Line (1) ('> 6 * 7')],
-//    .         loc: {start: {line: 1, column: 0},
-//    .               end: {line: 1, column: 10}}},
-//    . output: {lines: [Line (2) ('42')],
-//    .          loc: {start: {line: 2, column: 0},
-//    .                end: {line: 2, column: 5}}}}]
-const transformComments = ({
-  prefix,
-  openingDelimiter,
-  closingDelimiter,
-}) => comments => {
-  const result = comments.reduce (
-    (accum, comment, commentIndex) =>
-      comment.value.split ('\n')
-      .reduce ((accum, text, idx) => {
-        let uncommented, start, end;
-        if (comment.type === 'Block') {
-          uncommented = text.replace (/^\s*[*]/, '');
-          start = end = {line: comment.loc.start.line + idx};
-        } else if (comment.type === 'Line') {
-          uncommented = text;
-          ({start, end} = comment.loc);
-        }
-        if (uncommented.startsWith (prefix)) {
-          const unprefixed = uncommented
-                             .slice (prefix.length)
-                             .replace (/^\s*/, '');
-          const line = Line (start.line) (unprefixed);
-          if (accum.state === 'closed') {
-            if (unprefixed === openingDelimiter) accum.state = 'open';
-          } else if (unprefixed === closingDelimiter) {
-            accum.state = 'closed';
-          } else if (unprefixed.startsWith ('>')) {
-            accum.tests.push ({
-              [accum.state = 'input']: {
-                lines: [line],
-                loc: {start, end},
-              },
-            });
-          } else if (unprefixed.startsWith ('.')) {
-            accum.tests[accum.tests.length - 1][accum.state].lines.push (line);
-            accum.tests[accum.tests.length - 1][accum.state].loc.end = end;
-          } else if (accum.state === 'input') {
-            //  A comment immediately following an input line is an output
-            //  line if and only if it contains non-whitespace characters.
-            const {lines} = accum.tests[accum.tests.length - 1].input;
-            if (contiguous (line) (lines) && unprefixed !== '') {
-              accum.tests[accum.tests.length - 1].commentIndex = commentIndex;
-              accum.tests[accum.tests.length - 1][accum.state = 'output'] = {
-                lines: [line],
-                loc: {start, end},
-              };
-            } else {
-              accum.state = 'open';
-            }
-          }
-        }
-        return accum;
-      }, accum),
-    {state: openingDelimiter == null ? 'open' : 'closed', tests: []}
-  );
-
-  return result.tests;
-};
-
-//    substring
-//    :: ( String
-//       , { line :: Integer, column :: Integer }
-//       , { line :: Integer, column :: Integer } )
-//    -> String
-//
-//    Returns the substring between the start and end positions.
-//    Positions are specified in terms of line and column rather than index.
-//    {line: 1, column: 0} represents the first character of the first line.
-//
-//    > substring ('hello\nworld', {line: 1, column: 3}, {line: 2, column: 2})
-//    'lo\nwo'
-//    > substring ('hello\nworld', {line: 1, column: 0}, {line: 1, column: 0})
-//    ''
-const substring = (input, start, end) => {
-  const lines = input.split (/^/m);
-  return (
-    start.line === end.line ?
-      lines[start.line - 1].slice (start.column, end.column) :
-    end.line === Infinity ?
-      lines[start.line - 1].slice (start.column) +
-      (lines.slice (start.line)).join ('') :
-    // else
-      lines[start.line - 1].slice (start.column) +
-      (lines.slice (start.line, end.line - 1)).join ('') +
-      lines[end.line - 1].slice (0, end.column)
-  );
-};
-
 const rewriteJs = sourceType => ({
   prefix,
   openingDelimiter,
   closingDelimiter,
 }) => input => {
-  //  1. Locate block comments and line comments within the input text.
-  //
-  //  2. Create a list of comment chunks from the list of line comments
-  //     located in step 1 by grouping related comments.
-  //
-  //  3. Create a list of code chunks from the remaining input text.
-  //     Note that if there are N comment chunks there are N + 1 code
-  //     chunks. A trailing empty comment enables the final code chunk
-  //     to be captured:
-
-  const bookend = {
-    loc: {start: {line: Infinity, column: Infinity}},
-  };
-
-  //  4. Map each comment chunk in the list produced by step 2 to a
-  //     string of JavaScript code derived from the chunk's doctests.
-  //
-  //  5. Zip the lists produced by steps 3 and 4.
-  //
-  //  6. Find block comments in the source code produced by step 5.
-  //     (The locations of block comments located in step 1 are not
-  //     applicable to the rewritten source.)
-  //
-  //  7. Repeat steps 3 through 5 for the list of block comments
-  //     produced by step 6 (substituting "step 6" for "step 2").
-
-  const getComments = input => {
-    const comments = [];
-    acorn.parse (input, {
-      ecmaVersion: 2023,
-      sourceType,
-      locations: true,
-      onComment: comments,
-    });
-    return comments;
-  };
+  // 1: Parse source text to extract comments
+  const comments = [];
+  acorn.parse (input, {
+    ecmaVersion: 2023,
+    sourceType,
+    locations: true,
+    onComment: comments,
+  });
 
-  const wrapCode = wrapJs (sourceType);
+  // 2: Preserve source text between comments
+  const chunks = [];
+  {
+    let offset = 0;
+    for (const {start, end} of comments) {
+      chunks.push ([offset, input.slice (offset, start)]);
+      offset = end;
+    }
+    chunks.push ([offset, input.slice (offset)]);
+  }
 
-  //    comments :: { Block :: Array Comment, Line :: Array Comment }
-  const comments = Z.reduce (
-    (comments, comment) => ((comments[comment.type].push (comment), comments)),
-    {Block: [], Line: []},
-    getComments (input)
-  );
+  // 3: Extract prefixed comment lines
+  const lines = [];
+  {
+    const maybePushLine = (text, offset, number) => {
+      if (text.startsWith (prefix)) {
+        const unprefixed = (text.slice (prefix.length)).trimStart ();
+        lines.push ([offset, Line (number) (unprefixed)]);
+      }
+    };
+    for (const {type, value, start, loc} of comments) {
+      if (type === 'Line') {
+        maybePushLine (value, start, loc.start.line);
+      } else {
+        let offset = start;
+        let number = loc.start.line;
+        for (const text of value.split ('\n')) {
+          maybePushLine (text.replace (/^\s*[*]/, ''), offset, number);
+          offset += '\n'.length;
+          number += 1;
+        }
+      }
+    }
+  }
 
-  const options = {prefix, openingDelimiter, closingDelimiter};
-  const blockTests = transformComments (options) (comments.Block);
-  const lineTests = transformComments (options) (comments.Line);
+  // 4: Coalesce related input and output lines
+  const tests = [];
+  {
+    let test;
+    let state = openingDelimiter == null ? 'open' : 'closed';
+    for (const [offset, line] of lines) {
+      if (state === 'closed') {
+        if (line.text === openingDelimiter) state = 'open';
+      } else if (line.text === closingDelimiter) {
+        state = 'closed';
+      } else if (line.text.startsWith ('>')) {
+        tests.push ([offset, test = {input: {lines: [line]}}]);
+        state = 'input';
+      } else if (line.text.startsWith ('.')) {
+        test[state].lines.push (line);
+      } else if (state === 'input') {
+        //  A comment immediately following an input line is an output
+        //  line if and only if it contains non-whitespace characters.
+        if (contiguous (line) (test.input.lines) && line.text !== '') {
+          test.output = {lines: [line]};
+          state = 'output';
+        } else {
+          state = 'open';
+        }
+      }
+    }
+  }
 
-  const chunks = lineTests
-    .concat ([{input: bookend}])
-    .reduce ((accum, test) => {
-      accum.chunks.push (
-        substring (input, accum.loc, test.input.loc.start)
-        .replace (/[ \t]*$/, '')
-      );
-      accum.loc = (test.output ?? test.input).loc.end;
-      return accum;
-    }, {chunks: [], loc: {line: 1, column: 0}})
-    .chunks;
+  // 5: Convert doctests to source text
+  for (const [offset, test] of tests) {
+    chunks.push ([offset, wrapJs (sourceType) (test)]);
+  }
 
-  //    source :: String
-  const source = lineTests
-    .map (wrapCode)
-    .concat ([''])
-    .reduce ((accum, s, idx) => `${accum}${chunks[idx]}${s}`, '');
+  // 6: Sort verbatim and generated source text by original offsets
+  chunks.sort (([a], [b]) => a - b);
 
-  return getComments (source)
-    .filter (comment => comment.type === 'Block')
-    .concat ([bookend])
-    .reduce ((accum, comment, idx) => {
-      accum.chunks.push (
-        substring (source, accum.loc, comment.loc.start),
-        blockTests.reduce (
-          (s, test) =>
-            test.commentIndex === idx ? `${s}${wrapCode (test)}\n` : s,
-          ''
-        )
-      );
-      accum.loc = comment.loc.end;
-      return accum;
-    }, {chunks: [], loc: {line: 1, column: 0}})
-    .chunks
-    .join ('');
+  // 7: Concatenate source text
+  let sourceText = '';
+  for (const [, text] of chunks) sourceText += text;
+  return sourceText;
 };
 
 const rewriteCoffee = ({
-  prefix,
+  prefix: _prefix,
   openingDelimiter,
   closingDelimiter,
 }) => input => {
-  const lines = input.match (/^.*(?=\n)/gm);
-  const chunks = lines.reduce ((accum, text, idx) => {
-    const isComment = /^[ \t]*#(?!##)/.test (text);
-    const current = isComment ? accum.commentChunks : accum.literalChunks;
-    const line = Line (idx + 1) (text);
-    if (isComment === accum.isComment) {
-      current[current.length - 1].push (line);
-    } else {
-      current.push ([line]);
+  // 1a: Extract prefixed comment lines
+  const lines = [];
+  // 1b: Preserve other lines
+  const chunks = [];
+  {
+    const prefix = '#' + _prefix;
+    let number = 0;
+    for (const [text, indent, rest] of input.matchAll (/^([ \t]*)(.*)\n?/gm)) {
+      number += 1;
+      if (rest.startsWith (prefix)) {
+        const unprefixed = (rest.slice (prefix.length)).trimStart ();
+        const line = Line (number) (unprefixed);
+        lines.push ([indent, line]);
+      } else {
+        chunks.push ([number, text]);
+      }
     }
-    accum.isComment = isComment;
-    return accum;
-  }, {
-    literalChunks: [[]],
-    commentChunks: [],
-    isComment: false,
-  });
+  }
 
-  const testChunks = chunks.commentChunks.map (commentChunk => {
-    const result = commentChunk.reduce ((accum, {number, text}) => {
-      const [, indent, uncommented] = text.match (/^([ \t]*)#(.*)$/);
-      if (uncommented.startsWith (prefix)) {
-        const unprefixed = uncommented
-                           .slice (prefix.length)
-                           .replace (/^\s*/, '');
-        const line = Line (number) (unprefixed);
-        if (accum.state === 'closed') {
-          if (unprefixed === openingDelimiter) accum.state = 'open';
-        } else if (unprefixed === closingDelimiter) {
-          accum.state = 'closed';
-        } else if (unprefixed.startsWith ('>')) {
-          accum.tests.push ({
-            indent,
-            [accum.state = 'input']: {
-              lines: [line],
-            },
-          });
-        } else if (unprefixed.startsWith ('.')) {
-          accum.tests[accum.tests.length - 1][accum.state].lines.push (
-            line
-          );
-        } else if (accum.state === 'input') {
-          //  A comment immediately following an input line is an output
-          //  line if and only if it contains non-whitespace characters.
-          const {lines} = accum.tests[accum.tests.length - 1].input;
-          if (contiguous (line) (lines) && unprefixed !== '') {
-            accum.tests[accum.tests.length - 1][accum.state = 'output'] = {
-              lines: [line],
-            };
-          } else {
-            accum.state = 'open';
-          }
+  // 2: Coalesce related input and output lines
+  const tests = [];
+  {
+    let test;
+    let state = openingDelimiter == null ? 'open' : 'closed';
+    for (const [indent, line] of lines) {
+      if (state === 'closed') {
+        if (line.text === openingDelimiter) state = 'open';
+      } else if (line.text === closingDelimiter) {
+        state = 'closed';
+      } else if (line.text.startsWith ('>')) {
+        tests.push ([line.number, test = {indent, input: {lines: [line]}}]);
+        state = 'input';
+      } else if (line.text.startsWith ('.')) {
+        test[state].lines.push (line);
+      } else if (state === 'input') {
+        //  A comment immediately following an input line is an output
+        //  line if and only if it contains non-whitespace characters.
+        if (contiguous (line) (test.input.lines) && line.text !== '') {
+          test.output = {lines: [line]};
+          state = 'output';
+        } else {
+          state = 'open';
         }
       }
-      return accum;
-    }, {state: openingDelimiter == null ? 'open' : 'closed', tests: []});
+    }
+  }
 
-    return result.tests.map (wrapCoffee);
-  });
+  // 3: Convert doctests to source text
+  for (const [number, test] of tests) {
+    chunks.push ([number, wrapCoffee (test)]);
+  }
 
-  return CoffeeScript.compile (
-    chunks.literalChunks.reduce (
-      (s, chunk, idx) => Z.reduce (
-        (s, line) => `${s}${line}\n`,
-        chunk.reduce ((s, line) => `${s}${line.text}\n`, s),
-        idx < testChunks.length ? testChunks[idx] : []
-      ),
-      ''
-    )
-  );
+  // 4: Sort verbatim and generated source text by original line numbers
+  chunks.sort (([a], [b]) => a - b);
+
+  // 5: Concatenate source text
+  let sourceText = '';
+  for (const [, text] of chunks) sourceText += text;
+  return CoffeeScript.compile (sourceText);
 };
 
 const run = queue =>
@@ -490,7 +360,6 @@ const test = options => path => rewrite => async evaluate => {
     }
     return results;
   }
-  /* c8 ignore next */
 };
 
 export default options => async path => {
diff --git a/test/index.js b/test/index.js
index 9af2cef..1991741 100644
--- a/test/index.js
+++ b/test/index.js
@@ -304,7 +304,7 @@ testCommand ('bin/doctest --module xxx file.js', {
 testCommand ('bin/doctest --module esm lib/doctest.js', {
   status: 0,
   stdout: `running doctests in lib/doctest.js...
-...
+
 `,
   stderr: '',
 });