Skip to content

Commit

Permalink
refactor(parser): parse quotation marks instead of quoted strings
Browse files Browse the repository at this point in the history
drop the `types.QuotedString` type and simply focus on quotation
marks (and their escaped variants), which is simpler but slightly
more permissive with regards to surrounding spaces.

Fixes #972

Signed-off-by: Xavier Coulon <[email protected]>
  • Loading branch information
xcoulon committed Mar 13, 2022
1 parent 9bb5b66 commit 07fbbfd
Show file tree
Hide file tree
Showing 13 changed files with 26,691 additions and 28,541 deletions.
2 changes: 1 addition & 1 deletion pkg/parser/document_processing_apply_substitutions.go
Original file line number Diff line number Diff line change
Expand Up @@ -415,7 +415,7 @@ func enableExperimentalMacros(enabled bool) Option {
// checks if the `experimental` doc attribute was set (no value is expected, but we set a flag to handle the case where the attribute was reset)
func (c *current) isExperimentalEnabled() bool {
enabled, found := c.globalStore[experimentalMacrosKey].(bool)
log.Debugf("experimental enabled: %t", (found && enabled))
// log.Debugf("experimental enabled: %t", (found && enabled))
return found && enabled
}

Expand Down
53,750 changes: 25,931 additions & 27,819 deletions pkg/parser/parser.go

Large diffs are not rendered by default.

158 changes: 27 additions & 131 deletions pkg/parser/parser.peg
Original file line number Diff line number Diff line change
Expand Up @@ -513,7 +513,8 @@ ShortHandAttributeValue <-
/ elements:(
// unquoted shorthand value may include placeholders and substitutions but NOT comma, space, equal sign and dots
// also, cannot be followed by an `=` sign
([^,=.%# \r\n\uFFFD{\]]+ {
QuotationMark
/ ([^,=.%# \r\n\uFFFD{\]]+ {
return types.NewStringElement(string(c.text))
})
// / ElementPlaceHolder
Expand Down Expand Up @@ -575,6 +576,7 @@ SingleQuotedAttributeValueContent <-
Alphanums
/ Space
/ Quote
/ QuotationMark
/ AttributeReference
/ (`\'` {
return types.NewStringElement(`'`) // escaped single quote
Expand All @@ -600,6 +602,7 @@ DoubleQuotedAttributeValueContent <-
Alphanums
/ Space
/ Quote
/ QuotationMark
/ AttributeReference
/ (`\"` {
return types.NewStringElement(`"`) // escaped double quote
Expand All @@ -620,14 +623,18 @@ UnquotedAttributeValue <-
// so we need to count the `[` and `]` to balance
!Space // can't start with a space (eg: can't have `[ cookie ]`)
elements:(
Quote // quotes can have their own attributes
Quote // quotes can have their own attributes // TODO: move down
/ ("[" UnquotedAttributeValue "]") // recursively within brackets (see comment above)
// / ElementPlaceHolder
/ ([^=,\uFFFD\]{ ]+ { // not within brackets and stop on space and `{`
/ ([^=,\uFFFD\]{'"` ]+ { // not within brackets and stop on space and quotation marks (`"')
return string(c.text), nil
})
/ Space
/ AttributeReference
/ QuotationMark
/ ("'" / "`" / "\"") { // standalone characters not used in quotation marks
return string(c.text), nil
}
/ "{"
)+ {
return types.Reduce(elements, strings.TrimSpace), nil
Expand Down Expand Up @@ -1329,7 +1336,7 @@ IndexTerm <- "((" term:(IndexTermContent) "))" {
return types.NewIndexTerm(term.([]interface{}))
}

IndexTermContent <- elements:(Word / QuotedString / QuotedText / Space / SpecialCharacter / ElementPlaceHolder / (!"))" .) {
IndexTermContent <- elements:(Word / QuotedText / Space / SpecialCharacter / ElementPlaceHolder / (!"))" .) {
return string(c.text), nil
})+ {
return types.NewInlineElements(elements.([]interface{}))
Expand Down Expand Up @@ -1976,7 +1983,6 @@ DoubleQuoteBoldTextElement <-
/ InlineMacro
/ SpecialCharacter // must be after InlineMacro (because of BareURL)
/ Symbol
/ QuotedString
/ QuotedTextInDoubleQuoteBoldText
/ ElementPlaceHolder
/ DoubleQuoteBoldTextFallbackCharacter) {
Expand Down Expand Up @@ -2032,7 +2038,6 @@ SingleQuoteBoldTextElement <-
/ InlineMacro
/ SpecialCharacter // must be after InlineMacro (because of BareURL)
/ Symbol
/ QuotedString
/ QuotedTextInSingleQuoteBoldText
/ ElementPlaceHolder
/ SingleQuoteBoldTextFallbackCharacter
Expand Down Expand Up @@ -2120,7 +2125,6 @@ DoubleQuoteItalicTextElement <-
/ InlineMacro
/ SpecialCharacter // must be after InlineMacro (because of BareURL)
/ Symbol
/ QuotedString
/ QuotedTextInDoubleQuoteItalicText
/ ElementPlaceHolder
/ DoubleQuoteItalicTextFallbackCharacter) {
Expand Down Expand Up @@ -2188,7 +2192,6 @@ SingleQuoteItalicTextElement <-
/ InlineMacro // must be after InlineMacro (because of BareURL)
/ SpecialCharacter
/ Symbol
/ QuotedString
/ QuotedTextInSingleQuoteItalicText
/ ElementPlaceHolder
/ SingleQuoteItalicTextFallbackCharacter
Expand Down Expand Up @@ -2274,7 +2277,6 @@ DoubleQuoteMonospaceTextElement <-
/ InlineMacro
/ SpecialCharacter // must be after InlineMacro (because of BareURL)
/ Symbol
/ QuotedString
/ RawApostrophe // must be before SingleQuoteMonospaceText
/ QuotedTextInDoubleQuoteMonospaceText
/ ElementPlaceHolder
Expand Down Expand Up @@ -2346,7 +2348,6 @@ SingleQuoteMonospaceTextElement <-
/ InlineMacro // must be after InlineMacro (because of BareURL)
/ SpecialCharacter
/ Symbol
/ QuotedString
/ QuotedTextInSingleQuoteMonospaceText
/ RawApostrophe
/ ElementPlaceHolder
Expand Down Expand Up @@ -2434,7 +2435,6 @@ DoubleQuoteMarkedTextElement <- // may start and end with spaces
/ InlineMacro
/ SpecialCharacter // must be after InlineMacro (because of BareURL)
/ Symbol
/ QuotedString
/ QuotedTextInDoubleMarkedBoldText
/ ElementPlaceHolder
/ DoubleQuoteMarkedTextFallbackCharacter
Expand Down Expand Up @@ -2504,7 +2504,6 @@ SingleQuoteMarkedTextElement <-
/ InlineMacro
/ SpecialCharacter // must be after InlineMacro (because of BareURL)
/ Symbol
/ QuotedString
/ QuotedTextInSingleQuoteMarkedText
/ ElementPlaceHolder
/ SingleQuoteMarkedTextFallbackCharacter
Expand Down Expand Up @@ -2609,115 +2608,6 @@ EscapedSuperscriptText <-
return types.NewEscapedQuotedText(backslashes.(string), "^", element)
}

// -------------------------------------------------------------------------------------------------
// Quoted Strings (between curly single or double quotes)
// -------------------------------------------------------------------------------------------------

QuotedString <- SingleQuotedString / DoubleQuotedString

SingleQuotedString <-
SingleQuoteStringStart
elements:SingleQuotedStringElements
SingleQuoteStringEnd {
return types.NewQuotedString(types.SingleQuote, elements.([]interface{}))
}

SingleQuotedStringElements <-
elements:(SingleQuotedStringElement)+ {
c.resetSpaceSuffixTracking()
return types.NewInlineElements(elements)
}

SingleQuoteStringStart <- "'`" ![ \t\r\n]

SingleQuoteStringEnd <- "`'"

QuotedStringWord <- [\pL0-9]+ &(Space / SingleQuoteStringEnd) {
return types.NewStringElement(string(c.text))
}

// We have to treat this one special, because of ambiguity with monospace markup.
SingleQuotedStringElement <-
!SingleQuoteStringEnd
element:(
QuotedStringWord
/ Space !SingleQuoteStringEnd
/ Newline !Newline // 2 newlines split the paragraph
/ AttributeReference
/ InlineMacro
/ SpecialCharacter // must be after InlineMacro (because of BareURL)
/ LineBreak !SingleQuoteStringEnd
/ Symbol
/ QuotedTextInSingleQuotedString
/ DoubleQuotedString
/ SingleQuotedStringFallbackCharacter
) {
c.trackSpaceSuffix(element)
return element, nil
}

QuotedTextInSingleQuotedString <-
attributes:(LongHandAttributes)?
text:(BoldText
/ ItalicText
/ (!"`'" element:(MonospaceText) { return element, nil})
/ SubscriptText
/ SuperscriptText
/ MarkedText) {
return text.(*types.QuotedText).WithAttributes(attributes)
}

SingleQuotedStringFallbackCharacter <- [^\r\n\t `] / "`" !"'" { // '
return types.NewStringElement(string(c.text))
}

DoubleQuotedString <- DoubleQuoteStringStart elements:DoubleQuotedStringElements DoubleQuoteStringEnd {
return types.NewQuotedString(types.DoubleQuote, elements.([]interface{}))
}

DoubleQuotedStringElements <- elements:(DoubleQuotedStringElement+) {
c.resetSpaceSuffixTracking()
return types.NewInlineElements(elements)
}

// We have to treat this one special, because of ambiguity with monospace markup.
DoubleQuotedStringElement <-
!DoubleQuoteStringEnd
element:(
QuotedStringWord
/ Space !DoubleQuoteStringEnd
/ Newline !Newline // 2 newlines split the paragraph
/ LineBreak !SingleQuoteStringEnd
/ AttributeReference
/ InlineMacro
/ SpecialCharacter // must be after InlineMacro (because of BareURL)
/ QuotedTextInDoubleQuotedString
/ SingleQuotedString
/ DoubleQuotedStringFallbackCharacter) {
c.trackSpaceSuffix(element)
return element, nil
}

QuotedTextInDoubleQuotedString <-
attributes:(LongHandAttributes)?
text:(
BoldText
/ ItalicText
/ (!"`\"" element:(MonospaceText) { return element, nil})
/ SubscriptText
/ SuperscriptText
/ MarkedText) {
return text.(*types.QuotedText).WithAttributes(attributes)
}

DoubleQuoteStringStart <- "\"`" ![ \t\r\n]

DoubleQuoteStringEnd <- "`\""

DoubleQuotedStringFallbackCharacter <- ([^\r\n\t `] / "`" !"\"") {
return types.NewStringElement(string(c.text))
}

// -------------------------------------------------------------------------------------
// Sections
// -------------------------------------------------------------------------------------
Expand Down Expand Up @@ -2837,11 +2727,7 @@ Quote <-
&{
return c.isSubstitutionEnabled(Quotes), nil
}
element:(
QuotedText
/ QuotedString) {
return element, nil
}
QuotedText

Replacement <-
// check if enabled with the current substitution context
Expand Down Expand Up @@ -2889,19 +2775,29 @@ SingleLineCommentContent <- [^\r\n]* {
// -------------------------------------------------------------------------------------
Symbol <-
// escaped
`\` (Apostrophe / Copyright / Trademark / Registered / Ellipsis) {
`\` (QuotationMark / Copyright / Trademark / Registered / Ellipsis) {
return types.NewStringElement(strings.TrimPrefix(string(c.text), `\`))
}
// unescaped
/ Apostrophe / Copyright / Trademark / Registered / Ellipsis
/ QuotationMark / Copyright / Trademark / Registered / Ellipsis
//
/ TypographicQuote

Apostrophe <- "`'" {
return types.NewSymbol("`'")
QuotationMark <-
"\"`" {
return types.NewSymbol("\"`")
}
/ "`\"" {
return types.NewSymbol("`\"")
}
/ "'`" {
return types.NewSymbol("'`")
}
/ "`'" {
return types.NewSymbol("`'")
}

RawApostrophe <- "`'" // no conversion
RawApostrophe <- "`'" // no conversion // TODO: needed?

Copyright <- "(C)" {
return types.NewSymbol("(C)")
Expand Down
7 changes: 0 additions & 7 deletions pkg/parser/parser_ext.go
Original file line number Diff line number Diff line change
Expand Up @@ -128,13 +128,6 @@ func (c *current) isPreceededBySpace() bool {
return ok && s
}

func (c *current) resetSpaceSuffixTracking() {
if log.IsLevelEnabled(log.DebugLevel) {
log.Debugf("resetting space suffix tracking")
}
delete(c.globalStore, spaceSuffixTrackingKey)
}

// verifies that the content does not end with a space
func validateSingleQuoteElements(elements []interface{}) (bool, error) {
// if log.IsLevelEnabled(log.DebugLevel) {
Expand Down
Loading

0 comments on commit 07fbbfd

Please sign in to comment.