Skip to content

Commit

Permalink
📦 New markdown-it-myst package
Browse files Browse the repository at this point in the history
  • Loading branch information
fwkoch committed Feb 6, 2023
1 parent c79bcba commit 83c1406
Show file tree
Hide file tree
Showing 12 changed files with 634 additions and 0 deletions.
4 changes: 4 additions & 0 deletions packages/markdown-it-myst/.eslintrc.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
module.exports = {
root: true,
extends: ['curvenote'],
};
1 change: 1 addition & 0 deletions packages/markdown-it-myst/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# markdown-it-myst
23 changes: 23 additions & 0 deletions packages/markdown-it-myst/jest.config.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
module.exports = {
rootDir: '../../',
preset: 'ts-jest/presets/js-with-ts',
testMatch: ['<rootDir>/packages/markdown-it-myst/**/?(*.)+(spec|test).+(ts|tsx|js)'],
transform: {
'^.+\\.(ts|tsx)$': 'ts-jest',
},
testTimeout: 10000,
moduleNameMapper: {
'#(.*)': '<rootDir>/node_modules/$1', // https://github.com/chalk/chalk/issues/532
},
globals: {
'ts-jest': {
tsconfig: './tsconfig.test.json',
},
},
verbose: true,
testEnvironment: 'node',
transformIgnorePatterns: [
'<rootDir>/node_modules/(?!(vfile|formdata-polyfill|chalk|fetch-blob|vfile-message|unified|bail|trough|zwitch|unist-|hast-|html-|rehype-|mdast-|micromark-|trim-|web-namespaces|property-information|space-separated-tokens|comma-separated-tokens|get-port|stringify-entities|character-entities-html4|ccount|array-iterate))',
],
testPathIgnorePatterns: ['/node_modules/', '/.yalc/', '/dist/'],
};
57 changes: 57 additions & 0 deletions packages/markdown-it-myst/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
{
"name": "markdown-it-myst",
"version": "0.0.1",
"sideEffects": false,
"license": "MIT",
"description": "markdown-it tokenizer for MyST roles and directives",
"author": "Franklin Koch <[email protected]>",
"homepage": "https://github.com/executablebooks/mystjs/tree/main/packages/markdown-it-myst",
"main": "./dist/cjs/index.js",
"module": "./dist/esm/index.js",
"types": "./dist/types/index.d.ts",
"files": [
"dist"
],
"exports": {
".": {
"import": "./dist/esm/index.js",
"require": "./dist/cjs/index.js"
}
},
"publishConfig": {
"access": "public"
},
"repository": {
"type": "git",
"url": "git+https://github.com/executablebooks/mystjs.git"
},
"scripts": {
"clean": "rimraf dist",
"build:esm": "tsc --project ./tsconfig.json --module es2015 --outDir dist/esm",
"build:cjs": "tsc --project ./tsconfig.json --module commonjs --outDir dist/cjs",
"declarations": "tsc --project ./tsconfig.json --declaration --emitDeclarationOnly --declarationMap --outDir dist/types",
"build": "npm-run-all -l clean -p build:cjs build:esm declarations",
"lint": "eslint \"src/**/!(*.spec).ts\" -c ./.eslintrc.js",
"lint:format": "npx prettier --check \"src/**/*.ts\"",
"test": "jest",
"test:watch": "jest --watchAll"
},
"bugs": {
"url": "https://github.com/executablebooks/mystjs/issues"
},
"dependencies": {
"js-yaml": "^4.1.0",
"markdown-it": "^13.0.1"
},
"devDependencies": {
"@types/jest": "^28.1.6",
"eslint": "^8.21.0",
"eslint-config-curvenote": "latest",
"jest": "28.1.3",
"npm-run-all": "^4.1.5",
"prettier": "latest",
"rimraf": "^3.0.2",
"ts-jest": "^28.0.7",
"typescript": "latest"
}
}
198 changes: 198 additions & 0 deletions packages/markdown-it-myst/src/directives.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,198 @@
/* eslint-disable @typescript-eslint/no-explicit-any */
import yaml from 'js-yaml';
import type MarkdownIt from 'markdown-it/lib';
import type StateCore from 'markdown-it/lib/rules_core/state_core';
import { nestedPartToTokens } from './nestedParse';

/** Convert fences identified as directives to `directive` tokens */
function replaceFences(state: StateCore): boolean {
for (const token of state.tokens) {
if (token.type === 'fence' || token.type === 'colon_fence') {
const match = token.info.match(/^\{([^\s}]+)\}\s*(.*)$/);
if (match) {
token.type = 'directive';
token.info = match[1];
token.meta = { arg: match[2] };
}
}
}
return true;
}

/** Run all directives, replacing the original token */
function runDirectives(state: StateCore): boolean {
const finalTokens = [];
for (const token of state.tokens) {
if (token.type === 'directive') {
try {
const { info, map } = token;
const { arg } = token.meta;
const content = parseDirectiveContent(
token.content.trim() ? token.content.split(/\r?\n/) : [],
);
const { body, options } = content;
let { bodyOffset } = content;
while (body.length && !body[0].trim()) {
body.shift();
bodyOffset++;
}
const directiveOpen = new state.Token('parsed_directive_open', '', 1);
directiveOpen.info = info;
directiveOpen.hidden = true;
directiveOpen.content = body.join('\n');
directiveOpen.map = map;
directiveOpen.meta = {
arg,
options,
};
const startLineNumber = map ? map[0] : 0;
const argTokens = directiveArgToTokens(arg, startLineNumber, state);
const optsTokens = directiveOptionsToTokens(options, startLineNumber + 1, state);
const bodyTokens = directiveBodyToTokens(
body.join('\n'),
startLineNumber + bodyOffset,
state,
);
const directiveClose = new state.Token('parsed_directive_close', '', -1);
directiveClose.info = info;
directiveClose.hidden = true;
const newTokens = [
directiveOpen,
...argTokens,
...optsTokens,
...bodyTokens,
directiveClose,
];
finalTokens.push(...newTokens);
} catch (err) {
const errorToken = new state.Token('directive_error', '', 0);
errorToken.content = token.content;
errorToken.info = token.info;
errorToken.meta = token.meta;
errorToken.map = token.map;
errorToken.meta.error_message = (err as Error).message;
errorToken.meta.error_name = (err as Error).name;
finalTokens.push(errorToken);
}
} else {
finalTokens.push(token);
}
}
state.tokens = finalTokens;
return true;
}

function loadOptions(yamlBlock: string) {
const options = yaml.load(yamlBlock);
if (options === null || typeof options !== 'object') {
return null;
}
const output: Record<string, any> = {};
Object.entries(options).forEach(([key, value]) => {
// If options are given as flags, this coerces them to true
output[key] = value !== null ? value : true;
});
return output;
}

function parseDirectiveContent(content: string[]): {
body: string[];
options: Record<string, any>;
bodyOffset: number;
} {
let bodyOffset = 1;
let yamlBlock: string[] | null = null;
const newContent: string[] = [];

if (content.length && content[0].trim() === '---') {
// options contained in YAML block, starting and ending with '---'
bodyOffset++;
yamlBlock = [];
let foundDivider = false;
for (const line of content.slice(1)) {
if (line.trim() === '---') {
bodyOffset++;
foundDivider = true;
continue;
}
if (foundDivider) {
newContent.push(line);
} else {
bodyOffset++;
yamlBlock.push(line);
}
}
} else if (content.length && content[0].startsWith(':')) {
yamlBlock = [];
let foundDivider = false;
for (const line of content) {
if (!foundDivider && !line.startsWith(':')) {
foundDivider = true;
newContent.push(line);
continue;
}
if (foundDivider) {
newContent.push(line);
} else {
bodyOffset++;
yamlBlock.push(line.slice(1));
}
}
}

if (yamlBlock !== null) {
try {
const options = loadOptions(yamlBlock.join('\n'));
if (options) {
return { body: newContent, options, bodyOffset };
}
} catch {
// If there's an error, no worries; assume the intent is no options.
}
}

return { body: content, options: {}, bodyOffset: 1 };
}

function directiveArgToTokens(arg: string, lineNumber: number, state: StateCore) {
return nestedPartToTokens('directive_arg', arg, lineNumber, state);
}

function directiveOptionsToTokens(
options: Record<string, any>,
lineNumber: number,
state: StateCore,
) {
const tokens = Object.entries(options).map(([key, value], index) => {
// lineNumber mapping assumes each option is only one line;
// not necessarily true for yaml options.
const optTokens = nestedPartToTokens('directive_option', `${value}`, lineNumber + index, state);
optTokens[0].info = key;
optTokens[0].content = value;
return optTokens;
});
return tokens.flat();
}

function directiveBodyToTokens(body: string, lineNumber: number, state: StateCore) {
return nestedPartToTokens('directive_body', body, lineNumber, state);
}

export function directivePlugin(md: MarkdownIt): void {
md.core.ruler.after('block', 'fence_to_directive', replaceFences);
md.core.ruler.after('fence_to_directive', 'run_directives', runDirectives);

// fallback renderer for unhandled directives
md.renderer.rules['directive'] = (tokens, idx) => {
const token = tokens[idx];
return `<aside class="directive-unhandled">\n<header><mark>${token.info}</mark><code> ${token.meta.arg}</code></header>\n<pre>${token.content}</pre></aside>\n`;
};
md.renderer.rules['directive_error'] = (tokens, idx) => {
const token = tokens[idx];
let content = '';
if (token.content) {
content = `\n---\n${token.content}`;
}
return `<aside class="directive-error">\n<header><mark>${token.info}</mark><code> ${token.meta.arg}</code></header>\n<pre>${token.meta.error_name}:\n${token.meta.error_message}\n${content}</pre></aside>\n`;
};
}
16 changes: 16 additions & 0 deletions packages/markdown-it-myst/src/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import type MarkdownIt from 'markdown-it/lib';
import { rolePlugin } from './roles';
import { directivePlugin } from './directives';

export { rolePlugin };
export { directivePlugin };

/**
* A markdown-it plugin for parsing MyST roles and directives to structured data
*/
export function mystPlugin(md: MarkdownIt): void {
md.use(rolePlugin);
md.use(directivePlugin);
}

export default mystPlugin;
72 changes: 72 additions & 0 deletions packages/markdown-it-myst/src/nestedParse.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
import type MarkdownIt from 'markdown-it';
import type StateCore from 'markdown-it/lib/rules_core/state_core';
import type Token from 'markdown-it/lib/token';

/** Perform a nested parse upto and including a particular ruleName
*
* The main use for this function is to perform nested parses
* upto but not including inline parsing.
*/
export function nestedCoreParse(
md: MarkdownIt,
pluginRuleName: string,
src: string,
env: any,
initLine: number,
includeRule = true,
): Token[] {
// disable all core rules after pluginRuleName
const tempDisabledCore: string[] = [];
// TODO __rules__ is currently not exposed in typescript, but is the only way to get the rule names,
// since md.core.ruler.getRules('') only returns the rule functions
// we should upstream a getRuleNames() function or similar
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
// @ts-ignore TS2339
for (const rule of [...md.core.ruler.__rules__].reverse()) {
if (rule.name === pluginRuleName) {
if (!includeRule) {
tempDisabledCore.push(rule.name);
}
break;
}
if (rule.name) {
tempDisabledCore.push(rule.name);
}
}

md.core.ruler.disable(tempDisabledCore);

let tokens = [];
try {
tokens = md.parse(src, env);
} finally {
md.core.ruler.enable(tempDisabledCore);
}
for (const token of tokens) {
token.map = token.map !== null ? [token.map[0] + initLine, token.map[1] + initLine] : token.map;
}
return tokens;
}

export function nestedPartToTokens(
partName: string,
part: string,
lineNumber: number,
state: StateCore,
) {
if (!part) return [];
const openToken = new state.Token(`${partName}_open`, '', 1);
openToken.content = part;
openToken.hidden = true;
const nestedTokens = nestedCoreParse(
state.md,
'run_directives',
part,
state.env,
lineNumber,
true,
);
const closeToken = new state.Token(`${partName}_close`, '', -1);
closeToken.hidden = true;
return [openToken, ...nestedTokens, closeToken];
}
Loading

0 comments on commit 83c1406

Please sign in to comment.