Add scope type parser piece of custom command grammar (#2295)

Initial work towards #492; will be used to parse scope types in #2131 Exposes a function `parseScopeType` that can parse strings like `funk`, `curly` etc into their corresponding scope type payloads Here's a railroad: https://deploy-preview-2295--cursorless.netlify.app/custom-command-railroad ## Checklist - [ ] I have added [tests](https://www.cursorless.org/docs/contributing/test-case-recorder/) - [ ] I have updated the [docs](https://github.com/cursorless-dev/cursorless/tree/main/docs) and [cheatsheet](https://github.com/cursorless-dev/cursorless/tree/main/cursorless-talon/src/cheatsheet) - [ ] I have not broken the cheatsheet
cursorless-dev · Apr 22, 2024 · d5be7e7 · d5be7e7
1 parent 83ed3fe
commit d5be7e7
Show file tree

Hide file tree

Showing 18 changed files with 465 additions and 174 deletions.
diff --git a/.vscode/tasks.json b/.vscode/tasks.json
@@ -66,7 +66,6 @@
       "label": "Generate grammar",
       "type": "npm",
       "script": "generate-grammar",
-      "path": "packages/cursorless-vscode",
       "presentation": {
         "reveal": "silent"
       },

diff --git a/package.json b/package.json
@@ -18,6 +18,7 @@
     "preinstall": "npx only-allow pnpm",
     "test-compile": "tsc --build",
     "test": "pnpm compile && pnpm lint && pnpm -F '!test-harness' test && pnpm -F test-harness test",
+    "generate-grammar": "pnpm -r generate-grammar",
     "transform-recorded-tests": "./packages/common/scripts/my-ts-node.js packages/cursorless-engine/src/scripts/transformRecordedTests/index.ts",
     "watch": "pnpm run -w --parallel '/^watch:.*/'",
     "watch:esbuild": "pnpm run -r --parallel --if-present watch:esbuild",

diff --git a/packages/cursorless-engine/package.json b/packages/cursorless-engine/package.json
@@ -8,6 +8,11 @@
     "compile:tsc": "tsc --build",
     "compile:esbuild": "esbuild ./src/index.ts --sourcemap --format=esm --bundle --packages=external --outfile=./out/index.js",
     "compile": "pnpm compile:tsc && pnpm compile:esbuild",
+    "generate-grammar:base": "nearleyc src/customCommandGrammar/grammar.ne",
+    "ensure-grammar-up-to-date": "pnpm -s generate-grammar:base | diff -u src/customCommandGrammar/generated/grammar.ts -",
+    "generate-grammar": "pnpm generate-grammar:base -o src/customCommandGrammar/generated/grammar.ts",
+    "generate-railroad": "nearley-railroad src/customCommandGrammar/grammar.ne -o out/railroad.html",
+    "test": "pnpm ensure-grammar-up-to-date",
     "watch:tsc": "pnpm compile:tsc --watch",
     "watch:esbuild": "pnpm compile:esbuild --watch",
     "watch": "pnpm run --filter @cursorless/cursorless-engine --parallel '/^watch:.*/'"
@@ -22,6 +27,8 @@
     "immutability-helper": "^3.1.1",
     "itertools": "^2.2.5",
     "lodash": "^4.17.21",
+    "moo": "0.5.2",
+    "nearley": "2.20.1",
     "node-html-parser": "^6.1.12",
     "sbd": "^1.0.19",
     "uuid": "^9.0.1",
@@ -32,6 +39,8 @@
     "@types/js-yaml": "^4.0.9",
     "@types/lodash": "4.17.0",
     "@types/mocha": "^10.0.6",
+    "@types/moo": "0.5.9",
+    "@types/nearley": "2.11.5",
     "@types/sbd": "^1.0.5",
     "@types/sinon": "^17.0.3",
     "@types/uuid": "^9.0.8",

diff --git a/packages/cursorless-engine/src/customCommandGrammar/generated/grammar.ts b/packages/cursorless-engine/src/customCommandGrammar/generated/grammar.ts
@@ -0,0 +1,51 @@
+// Generated automatically by nearley, version 2.20.1
+// http://github.com/Hardmath123/nearley
+// Bypasses TS6133. Allow declared but unused functions.
+// @ts-ignore
+function id(d: any[]): any { return d[0]; }
+declare var simpleScopeTypeType: any;
+declare var pairedDelimiter: any;
+
+import { capture } from "../../util/grammarHelpers";
+import { lexer } from "../lexer";
+
+interface NearleyToken {
+  value: any;
+  [key: string]: any;
+};
+
+interface NearleyLexer {
+  reset: (chunk: any, info: any) => void;
+  next: () => NearleyToken | undefined;
+  save: () => any;
+  formatError: (token: any, message: string) => string;
+  has: (tokenType: any) => boolean;
+};
+
+interface NearleyRule {
+  name: string;
+  symbols: NearleySymbol[];
+  postprocess?: (d: any[], loc?: number, reject?: {}) => any;
+};
+
+type NearleySymbol = string | { literal: any } | { test: (token: any) => boolean };
+
+interface Grammar {
+  Lexer: NearleyLexer | undefined;
+  ParserRules: NearleyRule[];
+  ParserStart: string;
+};
+
+const grammar: Grammar = {
+  Lexer: lexer,
+  ParserRules: [
+    {"name": "main", "symbols": ["scopeType"]},
+    {"name": "scopeType", "symbols": [(lexer.has("simpleScopeTypeType") ? {type: "simpleScopeTypeType"} : simpleScopeTypeType)], "postprocess": capture("type")},
+    {"name": "scopeType", "symbols": [(lexer.has("pairedDelimiter") ? {type: "pairedDelimiter"} : pairedDelimiter)], "postprocess": 
+        ([delimiter]) => ({ type: "surroundingPair", delimiter })
+        }
+  ],
+  ParserStart: "main",
+};
+
+export default grammar;
diff --git a/packages/cursorless-engine/src/customCommandGrammar/grammar.ne b/packages/cursorless-engine/src/customCommandGrammar/grammar.ne
@@ -0,0 +1,14 @@
+@preprocessor typescript
+@{%
+import { capture } from "../../util/grammarHelpers";
+import { lexer } from "../lexer";
+%}
+@lexer lexer
+
+main -> scopeType
+
+# --------------------------- Scope types ---------------------------
+scopeType -> %simpleScopeTypeType {% capture("type") %}
+scopeType -> %pairedDelimiter {%
+  ([delimiter]) => ({ type: "surroundingPair", delimiter })
+%}
diff --git a/packages/cursorless-engine/src/customCommandGrammar/grammarScopeType.test.ts b/packages/cursorless-engine/src/customCommandGrammar/grammarScopeType.test.ts
@@ -0,0 +1,39 @@
+import assert from "assert";
+import { ScopeType } from "@cursorless/common";
+import { parseScopeType } from "./parseScopeType";
+
+interface TestCase {
+  input: string;
+  expectedOutput: ScopeType;
+}
+
+const testCases: TestCase[] = [
+  {
+    input: "funk",
+    expectedOutput: {
+      type: "namedFunction",
+    },
+  },
+  {
+    input: "curly",
+    expectedOutput: {
+      type: "surroundingPair",
+      delimiter: "curlyBrackets",
+    },
+  },
+  {
+    input: "string",
+    expectedOutput: {
+      type: "surroundingPair",
+      delimiter: "string",
+    },
+  },
+];
+
+suite("custom grammar: scope types", () => {
+  testCases.forEach(({ input, expectedOutput }) => {
+    test(input, () => {
+      assert.deepStrictEqual(parseScopeType(input), expectedOutput);
+    });
+  });
+});
diff --git a/packages/cursorless-engine/src/customCommandGrammar/lexer.test.ts b/packages/cursorless-engine/src/customCommandGrammar/lexer.test.ts
@@ -0,0 +1,76 @@
+import * as assert from "assert";
+import { unitTestSetup } from "../test/unitTestSetup";
+import { lexer } from "./lexer";
+
+interface Token {
+  type: string;
+  value: string;
+}
+
+interface Fixture {
+  input: string;
+  expectedOutput: Token[];
+}
+
+const fixtures: Fixture[] = [
+  {
+    input: "funk",
+    expectedOutput: [
+      {
+        type: "simpleScopeTypeType",
+        value: "namedFunction",
+      },
+    ],
+  },
+  {
+    input: "curly",
+    expectedOutput: [
+      {
+        type: "pairedDelimiter",
+        value: "curlyBrackets",
+      },
+    ],
+  },
+  {
+    input: "state name",
+    expectedOutput: [
+      {
+        type: "simpleScopeTypeType",
+        value: "statement",
+      },
+      {
+        type: "ws",
+        value: " ",
+      },
+      {
+        type: "simpleScopeTypeType",
+        value: "name",
+      },
+    ],
+  },
+  {
+    input: "funk name",
+    expectedOutput: [
+      {
+        type: "simpleScopeTypeType",
+        value: "functionName",
+      },
+    ],
+  },
+];
+
+suite("custom grammar lexer", () => {
+  unitTestSetup();
+
+  fixtures.forEach(({ input, expectedOutput }) => {
+    test(input, () => {
+      assert.deepStrictEqual(
+        Array.from(lexer.reset(input)).map(({ type, value }) => ({
+          type,
+          value,
+        })),
+        expectedOutput,
+      );
+    });
+  });
+});
diff --git a/packages/cursorless-engine/src/customCommandGrammar/lexer.ts b/packages/cursorless-engine/src/customCommandGrammar/lexer.ts
@@ -0,0 +1,44 @@
+import { simpleScopeTypeTypes, surroundingPairNames } from "@cursorless/common";
+import moo from "moo";
+import { defaultSpokenFormMap } from "../spokenForms/defaultSpokenFormMap";
+
+interface Token {
+  type: string;
+  value: string;
+}
+
+const tokens: Record<string, Token> = {};
+
+// FIXME: Remove the duplication below?
+
+for (const simpleScopeTypeType of simpleScopeTypeTypes) {
+  const { spokenForms } =
+    defaultSpokenFormMap.simpleScopeTypeType[simpleScopeTypeType];
+  for (const spokenForm of spokenForms) {
+    tokens[spokenForm] = {
+      type: "simpleScopeTypeType",
+      value: simpleScopeTypeType,
+    };
+  }
+}
+
+for (const pairedDelimiter of surroundingPairNames) {
+  const { spokenForms } = defaultSpokenFormMap.pairedDelimiter[pairedDelimiter];
+  for (const spokenForm of spokenForms) {
+    tokens[spokenForm] = {
+      type: "pairedDelimiter",
+      value: pairedDelimiter,
+    };
+  }
+}
+
+export const lexer = moo.compile({
+  ws: /[ \t]+/,
+  token: {
+    match: Object.keys(tokens),
+    type: (text) => tokens[text].type,
+    value: (text) => tokens[text].value,
+  },
+});
+
+(lexer as any).transform = (token: { value: string }) => token.value;
diff --git a/packages/cursorless-engine/src/customCommandGrammar/parseScopeType.ts b/packages/cursorless-engine/src/customCommandGrammar/parseScopeType.ts
@@ -0,0 +1,29 @@
+import { Parser, Grammar } from "nearley";
+import grammar from "./generated/grammar";
+import { ScopeType } from "@cursorless/common";
+
+function getScopeTypeParser(): Parser {
+  return new Parser(
+    // eslint-disable-next-line @typescript-eslint/naming-convention
+    Grammar.fromCompiled({ ...grammar, ParserStart: "scopeType" }),
+  );
+}
+
+/**
+ * Given a textual representation of a scope type, parse it into a scope type.
+ *
+ * @param input A textual representation of a scope type
+ * @returns A parsed scope type
+ */
+export function parseScopeType(input: string): ScopeType {
+  const parser = getScopeTypeParser();
+  parser.feed(input);
+
+  if (parser.results.length !== 1) {
+    throw new Error(
+      `Expected exactly one result, got ${parser.results.length}`,
+    );
+  }
+
+  return parser.results[0] as ScopeType;
+}
diff --git a/packages/cursorless-engine/src/index.ts b/packages/cursorless-engine/src/index.ts
@@ -10,3 +10,4 @@ export * from "./api/CursorlessEngineApi";
 export * from "./CommandRunner";
 export * from "./CommandHistory";
 export * from "./CommandHistoryAnalyzer";
+export * from "./util/grammarHelpers";
diff --git a/packages/cursorless-engine/src/spokenForms/defaultSpokenFormMap.ts b/packages/cursorless-engine/src/spokenForms/defaultSpokenFormMap.ts
@@ -1,4 +1,4 @@
-import { mapSpokenForms } from "./SpokenFormMap";
+import { SpokenFormMap, mapSpokenForms } from "./SpokenFormMap";
 import { defaultSpokenFormMapCore } from "./defaultSpokenFormMapCore";
 import { DefaultSpokenFormInfoMap } from "./defaultSpokenFormMap.types";
 
@@ -23,7 +23,7 @@ export const defaultSpokenFormInfoMap: DefaultSpokenFormInfoMap =
  * A spoken form map constructed from the default spoken forms. It is designed to
  * be used as a fallback when the Talon spoken form map is not available.
  */
-export const defaultSpokenFormMap = mapSpokenForms(
+export const defaultSpokenFormMap: SpokenFormMap = mapSpokenForms(
   defaultSpokenFormInfoMap,
   ({ defaultSpokenForms, isDisabledByDefault, isPrivate }) => ({
     spokenForms: isDisabledByDefault ? [] : defaultSpokenForms,