Skip to content

Commit

Permalink
Fix roundtripping of attention by encoding surroundings
Browse files Browse the repository at this point in the history
  • Loading branch information
wooorm committed Oct 29, 2024
1 parent df0d6a6 commit 97fb818
Show file tree
Hide file tree
Showing 11 changed files with 359 additions and 49 deletions.
41 changes: 32 additions & 9 deletions lib/handle/emphasis.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,11 @@
*/

import {checkEmphasis} from '../util/check-emphasis.js'
import {encodeCharacterReference} from '../util/encode-character-reference.js'
import {encodeInfo} from '../util/encode-info.js'

emphasis.peek = emphasisPeek

// To do: there are cases where emphasis cannot “form” depending on the
// previous or next character of sequences.
// There’s no way around that though, except for injecting zero-width stuff.
// Do we need to safeguard against that?
/**
* @param {Emphasis} node
* @param {Parents | undefined} _
Expand All @@ -22,17 +20,42 @@ export function emphasis(node, _, state, info) {
const marker = checkEmphasis(state)
const exit = state.enter('emphasis')
const tracker = state.createTracker(info)
let value = tracker.move(marker)
value += tracker.move(
const before = tracker.move(marker)

let between = tracker.move(
state.containerPhrasing(node, {
before: value,
after: marker,
before,
...tracker.current()
})
)
value += tracker.move(marker)
const betweenHead = between.charCodeAt(0)
const open = encodeInfo(
info.before.charCodeAt(info.before.length - 1),
betweenHead,
marker
)

if (open.inside) {
between = encodeCharacterReference(betweenHead) + between.slice(1)
}

const betweenTail = between.charCodeAt(between.length - 1)
const close = encodeInfo(info.after.charCodeAt(0), betweenTail, marker)

if (close.inside) {
between = between.slice(0, -1) + encodeCharacterReference(betweenTail)
}

const after = tracker.move(marker)

exit()
return value

state.attentionEncodeSurroundingInfo = {
after: close.outside,
before: open.outside
}
return before + between + after
}

/**
Expand Down
7 changes: 2 additions & 5 deletions lib/handle/heading.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
* @import {Heading, Parents} from 'mdast'
*/

import {encodeCharacterReference} from '../util/encode-character-reference.js'
import {formatHeadingAsSetext} from '../util/format-heading-as-setext.js'

/**
Expand Down Expand Up @@ -58,11 +59,7 @@ export function heading(node, _, state, info) {

if (/^[\t ]/.test(value)) {
// To do: what effect has the character reference on tracking?
value =
'&#x' +
value.charCodeAt(0).toString(16).toUpperCase() +
';' +
value.slice(1)
value = encodeCharacterReference(value.charCodeAt(0)) + value.slice(1)
}

value = value ? sequence + ' ' + value : sequence
Expand Down
41 changes: 32 additions & 9 deletions lib/handle/strong.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,11 @@
*/

import {checkStrong} from '../util/check-strong.js'
import {encodeCharacterReference} from '../util/encode-character-reference.js'
import {encodeInfo} from '../util/encode-info.js'

strong.peek = strongPeek

// To do: there are cases where emphasis cannot “form” depending on the
// previous or next character of sequences.
// There’s no way around that though, except for injecting zero-width stuff.
// Do we need to safeguard against that?
/**
* @param {Strong} node
* @param {Parents | undefined} _
Expand All @@ -22,17 +20,42 @@ export function strong(node, _, state, info) {
const marker = checkStrong(state)
const exit = state.enter('strong')
const tracker = state.createTracker(info)
let value = tracker.move(marker + marker)
value += tracker.move(
const before = tracker.move(marker + marker)

let between = tracker.move(
state.containerPhrasing(node, {
before: value,
after: marker,
before,
...tracker.current()
})
)
value += tracker.move(marker + marker)
const betweenHead = between.charCodeAt(0)
const open = encodeInfo(
info.before.charCodeAt(info.before.length - 1),
betweenHead,
marker
)

if (open.inside) {
between = encodeCharacterReference(betweenHead) + between.slice(1)
}

const betweenTail = between.charCodeAt(between.length - 1)
const close = encodeInfo(info.after.charCodeAt(0), betweenTail, marker)

if (close.inside) {
between = between.slice(0, -1) + encodeCharacterReference(betweenTail)
}

const after = tracker.move(marker + marker)

exit()
return value

state.attentionEncodeSurroundingInfo = {
after: close.outside,
before: open.outside
}
return before + between + after
}

/**
Expand Down
25 changes: 13 additions & 12 deletions lib/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -22,35 +22,36 @@ import {track} from './util/track.js'
*
* @param {Nodes} tree
* Tree to serialize.
* @param {Options} [options]
* @param {Options | null | undefined} [options]
* Configuration (optional).
* @returns {string}
* Serialized markdown representing `tree`.
*/
export function toMarkdown(tree, options = {}) {
export function toMarkdown(tree, options) {
const settings = options || {}
/** @type {State} */
const state = {
enter,
indentLines,
associationId: association,
containerPhrasing: containerPhrasingBound,
containerFlow: containerFlowBound,
createTracker: track,
compilePattern,
safe: safeBound,
stack: [],
unsafe: [...unsafe],
join: [...join],
enter,
// @ts-expect-error: GFM / frontmatter are typed in `mdast` but not defined
// here.
handlers: {...handlers},
options: {},
indexStack: [],
// @ts-expect-error: add `handle` in a second.
handle: undefined
handle: undefined,
indentLines,
indexStack: [],
join: [...join],
options: {},
safe: safeBound,
stack: [],
unsafe: [...unsafe]
}

configure(state, options)
configure(state, settings)

if (state.options.tightDefinitions) {
state.join.push(joinDefinition)
Expand Down
44 changes: 44 additions & 0 deletions lib/types.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -392,6 +392,38 @@ export type ContainerPhrasing = (parent: PhrasingParents, info: Info) => string
*/
export type CreateTracker = (info: TrackFields) => Tracker

/**
* Whether to encode things — with fields representing the surrounding of a
* whole.
*/
export interface EncodeSurrounding {
/**
* Whether to encode after.
*/
after: boolean

/**
* Whether to encode before.
*/
before: boolean
}

/**
* Whether to encode things — with fields representing the relationship to a
* whole.
*/
export interface EncodeSides {
/**
* Whether to encode inside.
*/
inside: boolean

/**
* Whether to encode before.
*/
outside: boolean
}

/**
* Enter something.
*
Expand Down Expand Up @@ -754,6 +786,18 @@ export interface State {
* Get an identifier from an association to match it to others.
*/
associationId: AssociationId
/**
* Info on whether to encode the surrounding of *attention*.
*
* Whether attention (emphasis, strong, strikethrough) forms
* depends on the characters inside and outside them.
* The characters inside can be handled by *attention* itself.
* However the outside characters are already handled.
* Or handled afterwards.
* This field can be used to signal from *attention* that some parent
* function (practically `containerPhrasing`) has to handle the surrounding.
*/
attentionEncodeSurroundingInfo: EncodeSurrounding | undefined
/**
* List marker currently in use.
*/
Expand Down
50 changes: 40 additions & 10 deletions lib/util/container-phrasing.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
* @import {PhrasingParents} from '../types.js'
*/

import {encodeCharacterReference} from './encode-character-reference.js'

/**
* Serialize the children of a parent that contains phrasing children.
*
Expand All @@ -24,6 +26,8 @@ export function containerPhrasing(parent, state, info) {
const results = []
let index = -1
let before = info.before
/** @type {string | undefined} */
let encodeAfter

indexStack.push(-1)
let tracker = state.createTracker(info)
Expand Down Expand Up @@ -75,17 +79,43 @@ export function containerPhrasing(parent, state, info) {
tracker.move(results.join(''))
}

results.push(
tracker.move(
state.handle(child, parent, state, {
...tracker.current(),
before,
after
})
)
)
let value = state.handle(child, parent, state, {
...tracker.current(),
after,
before
})

// If we had to encode the first character after the previous node and it’s
// still the same character,
// encode it.
if (encodeAfter && encodeAfter === value.slice(0, 1)) {
value =
encodeCharacterReference(encodeAfter.charCodeAt(0)) + value.slice(1)
}

const encodingInfo = state.attentionEncodeSurroundingInfo
state.attentionEncodeSurroundingInfo = undefined
encodeAfter = undefined

// If we have to encode the first character before the current node and
// it’s still the same character,
// encode it.
if (encodingInfo) {
if (
encodingInfo.before &&
before === results[results.length - 1].slice(-1)
) {
results[results.length - 1] =
results[results.length - 1].slice(0, -1) +
encodeCharacterReference(before.charCodeAt(0))
}

if (encodingInfo.after) encodeAfter = after
}

before = results[results.length - 1].slice(-1)
tracker.move(value)
results.push(value)
before = value.slice(-1)
}

indexStack.pop()
Expand Down
11 changes: 11 additions & 0 deletions lib/util/encode-character-reference.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
/**
* Encode a code point as a character reference.
*
* @param {number} code
* Code point to encode.
* @returns {string}
* Encoded character reference.
*/
export function encodeCharacterReference(code) {
return '&#x' + code.toString(16).toUpperCase() + ';'
}
Loading

0 comments on commit 97fb818

Please sign in to comment.