diff --git a/packages/markdown-it-myst/.eslintrc.js b/packages/markdown-it-myst/.eslintrc.js new file mode 100644 index 000000000..76787609a --- /dev/null +++ b/packages/markdown-it-myst/.eslintrc.js @@ -0,0 +1,4 @@ +module.exports = { + root: true, + extends: ['curvenote'], +}; diff --git a/packages/markdown-it-myst/README.md b/packages/markdown-it-myst/README.md new file mode 100644 index 000000000..f18bb9732 --- /dev/null +++ b/packages/markdown-it-myst/README.md @@ -0,0 +1 @@ +# markdown-it-myst diff --git a/packages/markdown-it-myst/jest.config.js b/packages/markdown-it-myst/jest.config.js new file mode 100644 index 000000000..8739a081b --- /dev/null +++ b/packages/markdown-it-myst/jest.config.js @@ -0,0 +1,23 @@ +module.exports = { + rootDir: '../../', + preset: 'ts-jest/presets/js-with-ts', + testMatch: ['/packages/markdown-it-myst/**/?(*.)+(spec|test).+(ts|tsx|js)'], + transform: { + '^.+\\.(ts|tsx)$': 'ts-jest', + }, + testTimeout: 10000, + moduleNameMapper: { + '#(.*)': '/node_modules/$1', // https://github.com/chalk/chalk/issues/532 + }, + globals: { + 'ts-jest': { + tsconfig: './tsconfig.test.json', + }, + }, + verbose: true, + testEnvironment: 'node', + transformIgnorePatterns: [ + '/node_modules/(?!(vfile|formdata-polyfill|chalk|fetch-blob|vfile-message|unified|bail|trough|zwitch|unist-|hast-|html-|rehype-|mdast-|micromark-|trim-|web-namespaces|property-information|space-separated-tokens|comma-separated-tokens|get-port|stringify-entities|character-entities-html4|ccount|array-iterate))', + ], + testPathIgnorePatterns: ['/node_modules/', '/.yalc/', '/dist/'], +}; diff --git a/packages/markdown-it-myst/package.json b/packages/markdown-it-myst/package.json new file mode 100644 index 000000000..c416088dc --- /dev/null +++ b/packages/markdown-it-myst/package.json @@ -0,0 +1,57 @@ +{ + "name": "markdown-it-myst", + "version": "0.0.1", + "sideEffects": false, + "license": "MIT", + "description": "markdown-it tokenizer for MyST roles and directives", + "author": "Franklin Koch ", + "homepage": "https://github.com/executablebooks/mystjs/tree/main/packages/markdown-it-myst", + "main": "./dist/cjs/index.js", + "module": "./dist/esm/index.js", + "types": "./dist/types/index.d.ts", + "files": [ + "dist" + ], + "exports": { + ".": { + "import": "./dist/esm/index.js", + "require": "./dist/cjs/index.js" + } + }, + "publishConfig": { + "access": "public" + }, + "repository": { + "type": "git", + "url": "git+https://github.com/executablebooks/mystjs.git" + }, + "scripts": { + "clean": "rimraf dist", + "build:esm": "tsc --project ./tsconfig.json --module es2015 --outDir dist/esm", + "build:cjs": "tsc --project ./tsconfig.json --module commonjs --outDir dist/cjs", + "declarations": "tsc --project ./tsconfig.json --declaration --emitDeclarationOnly --declarationMap --outDir dist/types", + "build": "npm-run-all -l clean -p build:cjs build:esm declarations", + "lint": "eslint \"src/**/!(*.spec).ts\" -c ./.eslintrc.js", + "lint:format": "npx prettier --check \"src/**/*.ts\"", + "test": "jest", + "test:watch": "jest --watchAll" + }, + "bugs": { + "url": "https://github.com/executablebooks/mystjs/issues" + }, + "dependencies": { + "js-yaml": "^4.1.0", + "markdown-it": "^13.0.1" + }, + "devDependencies": { + "@types/jest": "^28.1.6", + "eslint": "^8.21.0", + "eslint-config-curvenote": "latest", + "jest": "28.1.3", + "npm-run-all": "^4.1.5", + "prettier": "latest", + "rimraf": "^3.0.2", + "ts-jest": "^28.0.7", + "typescript": "latest" + } +} diff --git a/packages/markdown-it-myst/src/directives.ts b/packages/markdown-it-myst/src/directives.ts new file mode 100644 index 000000000..38220cc99 --- /dev/null +++ b/packages/markdown-it-myst/src/directives.ts @@ -0,0 +1,198 @@ +/* eslint-disable @typescript-eslint/no-explicit-any */ +import yaml from 'js-yaml'; +import type MarkdownIt from 'markdown-it/lib'; +import type StateCore from 'markdown-it/lib/rules_core/state_core'; +import { nestedPartToTokens } from './nestedParse'; + +/** Convert fences identified as directives to `directive` tokens */ +function replaceFences(state: StateCore): boolean { + for (const token of state.tokens) { + if (token.type === 'fence' || token.type === 'colon_fence') { + const match = token.info.match(/^\{([^\s}]+)\}\s*(.*)$/); + if (match) { + token.type = 'directive'; + token.info = match[1]; + token.meta = { arg: match[2] }; + } + } + } + return true; +} + +/** Run all directives, replacing the original token */ +function runDirectives(state: StateCore): boolean { + const finalTokens = []; + for (const token of state.tokens) { + if (token.type === 'directive') { + try { + const { info, map } = token; + const { arg } = token.meta; + const content = parseDirectiveContent( + token.content.trim() ? token.content.split(/\r?\n/) : [], + ); + const { body, options } = content; + let { bodyOffset } = content; + while (body.length && !body[0].trim()) { + body.shift(); + bodyOffset++; + } + const directiveOpen = new state.Token('parsed_directive_open', '', 1); + directiveOpen.info = info; + directiveOpen.hidden = true; + directiveOpen.content = body.join('\n'); + directiveOpen.map = map; + directiveOpen.meta = { + arg, + options, + }; + const startLineNumber = map ? map[0] : 0; + const argTokens = directiveArgToTokens(arg, startLineNumber, state); + const optsTokens = directiveOptionsToTokens(options, startLineNumber + 1, state); + const bodyTokens = directiveBodyToTokens( + body.join('\n'), + startLineNumber + bodyOffset, + state, + ); + const directiveClose = new state.Token('parsed_directive_close', '', -1); + directiveClose.info = info; + directiveClose.hidden = true; + const newTokens = [ + directiveOpen, + ...argTokens, + ...optsTokens, + ...bodyTokens, + directiveClose, + ]; + finalTokens.push(...newTokens); + } catch (err) { + const errorToken = new state.Token('directive_error', '', 0); + errorToken.content = token.content; + errorToken.info = token.info; + errorToken.meta = token.meta; + errorToken.map = token.map; + errorToken.meta.error_message = (err as Error).message; + errorToken.meta.error_name = (err as Error).name; + finalTokens.push(errorToken); + } + } else { + finalTokens.push(token); + } + } + state.tokens = finalTokens; + return true; +} + +function loadOptions(yamlBlock: string) { + const options = yaml.load(yamlBlock); + if (options === null || typeof options !== 'object') { + return null; + } + const output: Record = {}; + Object.entries(options).forEach(([key, value]) => { + // If options are given as flags, this coerces them to true + output[key] = value !== null ? value : true; + }); + return output; +} + +function parseDirectiveContent(content: string[]): { + body: string[]; + options: Record; + bodyOffset: number; +} { + let bodyOffset = 1; + let yamlBlock: string[] | null = null; + const newContent: string[] = []; + + if (content.length && content[0].trim() === '---') { + // options contained in YAML block, starting and ending with '---' + bodyOffset++; + yamlBlock = []; + let foundDivider = false; + for (const line of content.slice(1)) { + if (line.trim() === '---') { + bodyOffset++; + foundDivider = true; + continue; + } + if (foundDivider) { + newContent.push(line); + } else { + bodyOffset++; + yamlBlock.push(line); + } + } + } else if (content.length && content[0].startsWith(':')) { + yamlBlock = []; + let foundDivider = false; + for (const line of content) { + if (!foundDivider && !line.startsWith(':')) { + foundDivider = true; + newContent.push(line); + continue; + } + if (foundDivider) { + newContent.push(line); + } else { + bodyOffset++; + yamlBlock.push(line.slice(1)); + } + } + } + + if (yamlBlock !== null) { + try { + const options = loadOptions(yamlBlock.join('\n')); + if (options) { + return { body: newContent, options, bodyOffset }; + } + } catch { + // If there's an error, no worries; assume the intent is no options. + } + } + + return { body: content, options: {}, bodyOffset: 1 }; +} + +function directiveArgToTokens(arg: string, lineNumber: number, state: StateCore) { + return nestedPartToTokens('directive_arg', arg, lineNumber, state); +} + +function directiveOptionsToTokens( + options: Record, + lineNumber: number, + state: StateCore, +) { + const tokens = Object.entries(options).map(([key, value], index) => { + // lineNumber mapping assumes each option is only one line; + // not necessarily true for yaml options. + const optTokens = nestedPartToTokens('directive_option', `${value}`, lineNumber + index, state); + optTokens[0].info = key; + optTokens[0].content = value; + return optTokens; + }); + return tokens.flat(); +} + +function directiveBodyToTokens(body: string, lineNumber: number, state: StateCore) { + return nestedPartToTokens('directive_body', body, lineNumber, state); +} + +export function directivePlugin(md: MarkdownIt): void { + md.core.ruler.after('block', 'fence_to_directive', replaceFences); + md.core.ruler.after('fence_to_directive', 'run_directives', runDirectives); + + // fallback renderer for unhandled directives + md.renderer.rules['directive'] = (tokens, idx) => { + const token = tokens[idx]; + return `\n`; + }; + md.renderer.rules['directive_error'] = (tokens, idx) => { + const token = tokens[idx]; + let content = ''; + if (token.content) { + content = `\n---\n${token.content}`; + } + return `\n`; + }; +} diff --git a/packages/markdown-it-myst/src/index.ts b/packages/markdown-it-myst/src/index.ts new file mode 100644 index 000000000..063e61b69 --- /dev/null +++ b/packages/markdown-it-myst/src/index.ts @@ -0,0 +1,16 @@ +import type MarkdownIt from 'markdown-it/lib'; +import { rolePlugin } from './roles'; +import { directivePlugin } from './directives'; + +export { rolePlugin }; +export { directivePlugin }; + +/** + * A markdown-it plugin for parsing MyST roles and directives to structured data + */ +export function mystPlugin(md: MarkdownIt): void { + md.use(rolePlugin); + md.use(directivePlugin); +} + +export default mystPlugin; diff --git a/packages/markdown-it-myst/src/nestedParse.ts b/packages/markdown-it-myst/src/nestedParse.ts new file mode 100644 index 000000000..47e4c3d7a --- /dev/null +++ b/packages/markdown-it-myst/src/nestedParse.ts @@ -0,0 +1,72 @@ +import type MarkdownIt from 'markdown-it'; +import type StateCore from 'markdown-it/lib/rules_core/state_core'; +import type Token from 'markdown-it/lib/token'; + +/** Perform a nested parse upto and including a particular ruleName + * + * The main use for this function is to perform nested parses + * upto but not including inline parsing. + */ +export function nestedCoreParse( + md: MarkdownIt, + pluginRuleName: string, + src: string, + env: any, + initLine: number, + includeRule = true, +): Token[] { + // disable all core rules after pluginRuleName + const tempDisabledCore: string[] = []; + // TODO __rules__ is currently not exposed in typescript, but is the only way to get the rule names, + // since md.core.ruler.getRules('') only returns the rule functions + // we should upstream a getRuleNames() function or similar + // eslint-disable-next-line @typescript-eslint/ban-ts-comment + // @ts-ignore TS2339 + for (const rule of [...md.core.ruler.__rules__].reverse()) { + if (rule.name === pluginRuleName) { + if (!includeRule) { + tempDisabledCore.push(rule.name); + } + break; + } + if (rule.name) { + tempDisabledCore.push(rule.name); + } + } + + md.core.ruler.disable(tempDisabledCore); + + let tokens = []; + try { + tokens = md.parse(src, env); + } finally { + md.core.ruler.enable(tempDisabledCore); + } + for (const token of tokens) { + token.map = token.map !== null ? [token.map[0] + initLine, token.map[1] + initLine] : token.map; + } + return tokens; +} + +export function nestedPartToTokens( + partName: string, + part: string, + lineNumber: number, + state: StateCore, +) { + if (!part) return []; + const openToken = new state.Token(`${partName}_open`, '', 1); + openToken.content = part; + openToken.hidden = true; + const nestedTokens = nestedCoreParse( + state.md, + 'run_directives', + part, + state.env, + lineNumber, + true, + ); + const closeToken = new state.Token(`${partName}_close`, '', -1); + closeToken.hidden = true; + return [openToken, ...nestedTokens, closeToken]; +} diff --git a/packages/markdown-it-myst/src/roles.ts b/packages/markdown-it-myst/src/roles.ts new file mode 100644 index 000000000..1834863f9 --- /dev/null +++ b/packages/markdown-it-myst/src/roles.ts @@ -0,0 +1,101 @@ +import type MarkdownIt from 'markdown-it/lib'; +import type StateCore from 'markdown-it/lib/rules_core/state_core'; +import type StateInline from 'markdown-it/lib/rules_inline/state_inline'; +import { nestedPartToTokens } from './nestedParse'; + +export function rolePlugin(md: MarkdownIt): void { + md.inline.ruler.before('backticks', 'parse_roles', roleRule); + md.core.ruler.after('inline', 'run_roles', runRoles); + // fallback renderer for unhandled roles + md.renderer.rules['role'] = (tokens, idx) => { + const token = tokens[idx]; + return `${token.meta.name}${token.content}`; + }; + md.renderer.rules['role_error'] = (tokens, idx) => { + const token = tokens[idx]; + let content = ''; + if (token.content) { + content = `\n---\n${token.content}`; + } + return `\n`; + }; +} + +function roleRule(state: StateInline, silent: boolean): boolean { + // Check if the role is escaped + if (state.src.charCodeAt(state.pos - 1) === 0x5c) { + /* \ */ + // TODO: this could be improved in the case of edge case '\\{', also multi-line + return false; + } + const match = ROLE_PATTERN.exec(state.src.slice(state.pos)); + if (match == null) return false; + const [str, name, , content] = match; + state.pos += str.length; + + if (!silent) { + const token = state.push('role', '', 0); + token.meta = { name }; + token.content = content; + } + return true; +} + +// MyST role syntax format e.g. {role}`text` +// TODO: support role with no value e.g. {role}`` +let _x: RegExp; +try { + _x = new RegExp('^\\{([a-zA-Z_\\-+:]{1,36})\\}(`+)(?!`)(.+?)(? { + it('basic directive parses', () => { + const mdit = MarkdownIt().use(plugin); + const tokens = mdit.parse('```{abc}\n```', {}); + expect(tokens.map((t) => t.type)).toEqual(['parsed_directive_open', 'parsed_directive_close']); + expect(tokens[0].info).toEqual('abc'); + expect(tokens[0].content).toEqual(''); + }); + it('arg/opts/body directive parses', () => { + const mdit = MarkdownIt().use(plugin); + const tokens = mdit.parse('```{abc} my arg\n:label: my label\n\nmy body\n```', {}); + expect(tokens.map((t) => t.type)).toEqual([ + 'parsed_directive_open', + 'directive_arg_open', + 'paragraph_open', + 'inline', + 'paragraph_close', + 'directive_arg_close', + 'directive_option_open', + 'paragraph_open', + 'inline', + 'paragraph_close', + 'directive_option_close', + 'directive_body_open', + 'paragraph_open', + 'inline', + 'paragraph_close', + 'directive_body_close', + 'parsed_directive_close', + ]); + expect(tokens[0].info).toEqual('abc'); + expect(tokens[3].content).toEqual('my arg'); + expect(tokens[6].info).toEqual('label'); + expect(tokens[8].content).toEqual('my label'); + expect(tokens[13].content).toEqual('my body'); + }); + it('opt flag directive parses to true', () => { + const mdit = MarkdownIt().use(plugin); + const tokens = mdit.parse('```{abc}\n:flag:\n```', {}); + expect(tokens.map((t) => t.type)).toEqual([ + 'parsed_directive_open', + 'directive_option_open', + 'paragraph_open', + 'inline', + 'paragraph_close', + 'directive_option_close', + 'parsed_directive_close', + ]); + expect(tokens[0].info).toEqual('abc'); + expect(tokens[1].info).toEqual('flag'); + expect(tokens[1].content).toEqual(true); + expect(tokens[3].content).toEqual('true'); + }); + it('yaml opts directive parses', () => { + const mdit = MarkdownIt().use(plugin); + const tokens = mdit.parse('```{abc}\n---\na: x\nb: y\n---\n```', {}); + expect(tokens.map((t) => t.type)).toEqual([ + 'parsed_directive_open', + 'directive_option_open', + 'paragraph_open', + 'inline', + 'paragraph_close', + 'directive_option_close', + 'directive_option_open', + 'paragraph_open', + 'inline', + 'paragraph_close', + 'directive_option_close', + 'parsed_directive_close', + ]); + expect(tokens[0].info).toEqual('abc'); + expect(tokens[1].info).toEqual('a'); + expect(tokens[3].content).toEqual('x'); + expect(tokens[6].info).toEqual('b'); + expect(tokens[8].content).toEqual('y'); + }); +}); diff --git a/packages/markdown-it-myst/tests/roles.spec.ts b/packages/markdown-it-myst/tests/roles.spec.ts new file mode 100644 index 000000000..760d38347 --- /dev/null +++ b/packages/markdown-it-myst/tests/roles.spec.ts @@ -0,0 +1,41 @@ +import MarkdownIt from 'markdown-it'; +import plugin from '../src'; + +describe('parses roles', () => { + it('basic role parses', () => { + const mdit = MarkdownIt().use(plugin); + const tokens = mdit.parse('{abc}`hello`', {}); + expect(tokens.map((t) => t.type)).toEqual(['paragraph_open', 'inline', 'paragraph_close']); + expect(tokens[1].children?.map((t) => t.type)).toEqual([ + 'parsed_role_open', + 'role_content_open', + 'paragraph_open', + 'inline', + 'paragraph_close', + 'role_content_close', + 'parsed_role_close', + ]); + expect(tokens[1].content).toEqual('{abc}`hello`'); + expect(tokens[1].children?.[0].info).toEqual('abc'); + expect(tokens[1].children?.[0].content).toEqual('hello'); + expect(tokens[1].children?.[3].content).toEqual('hello'); + }); + it('header role parses', () => { + const mdit = MarkdownIt().use(plugin); + const tokens = mdit.parse('{abc}`# hello`', {}); + expect(tokens.map((t) => t.type)).toEqual(['paragraph_open', 'inline', 'paragraph_close']); + expect(tokens[1].children?.map((t) => t.type)).toEqual([ + 'parsed_role_open', + 'role_content_open', + 'heading_open', + 'inline', + 'heading_close', + 'role_content_close', + 'parsed_role_close', + ]); + expect(tokens[1].content).toEqual('{abc}`# hello`'); + expect(tokens[1].children?.[0].info).toEqual('abc'); + expect(tokens[1].children?.[0].content).toEqual('# hello'); + expect(tokens[1].children?.[3].content).toEqual('hello'); + }); +}); diff --git a/packages/markdown-it-myst/tsconfig.json b/packages/markdown-it-myst/tsconfig.json new file mode 100644 index 000000000..8268fe093 --- /dev/null +++ b/packages/markdown-it-myst/tsconfig.json @@ -0,0 +1,33 @@ +{ + "compilerOptions": { + "target": "es6", + // module is overridden from the build:esm/build:cjs scripts + "module": "es2015", + "jsx": "react-jsx", + "lib": ["es2020"], + "esModuleInterop": true, + "noImplicitAny": true, + "strict": true, + "moduleResolution": "node", + "sourceMap": false, + // outDir is overridden from the build:esm/build:cjs scripts + "outDir": "dist/types", + "baseUrl": "src", + "paths": { + "*": ["node_modules/*"] + }, + // Type roots allows it to be included in a workspace + "typeRoots": [ + "./types", + "./node_modules/@types", + "../../node_modules/@types", + "../../../node_modules/@types" + ], + "resolveJsonModule": true, + // Ignore node_modules, etc. + "skipLibCheck": true, + "forceConsistentCasingInFileNames": true + }, + "include": ["src/**/*"], + "exclude": ["tests/**/*"] +} diff --git a/packages/markdown-it-myst/tsconfig.test.json b/packages/markdown-it-myst/tsconfig.test.json new file mode 100644 index 000000000..bafe01bbd --- /dev/null +++ b/packages/markdown-it-myst/tsconfig.test.json @@ -0,0 +1,8 @@ +{ + "extends": "./tsconfig.json", + "compilerOptions": { + "allowJs": true, + "target": "es6" + }, + "exclude": [] +}