Skip to content

Commit

Permalink
refactor(parser/renderer): include files before full parsing (#344)
Browse files Browse the repository at this point in the history
add a new entrypoint in the grammar to "preparse" the document, only
looking for a subset of elements, in particular the `include` directives
and the sections whose level may be offset during the file inclusions.
Once the file have been included (ie, the `include` directives have been
replaced with the actual file content), then the document can be fully parsed.

Fixes #343

Signed-off-by: Xavier Coulon <[email protected]>
  • Loading branch information
xcoulon authored May 8, 2019
1 parent 99de2e9 commit e152aa4
Show file tree
Hide file tree
Showing 52 changed files with 58,857 additions and 38,946 deletions.
1 change: 1 addition & 0 deletions .golangci.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
run:
skip-dirs:
- pkg/parser/includes
- pkg/renderer/html5/includes
skip-files:
- pkg/parser/asciidoc_parser.go # generated
Expand Down
6 changes: 6 additions & 0 deletions LIMITATIONS.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,9 @@ will produce no HTML element at all, whereas Asciidoc/Asciidoctor will produce :
<p></p>
</div>
....

== File Inclusions

File inclusions are performed before the full parsing takes place. During this phase, the main file is parsed to look for `include::` directives and then replace them with the content of the file to include.
If the file to include has an empty last line, it will be ignored, so it's always a good practice to include a blank line after the `include::` directive in the main document, to avoid side-effects during
the "full" parsing.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ generate: prebuild-checks
## generate the .go file based on the asciidoc grammar
generate-optimized:
@echo "generating the parser (optimized)..."
@pigeon -optimize-grammar -alternate-entrypoints InlineElementsWithoutSubtitution,VerbatimBlock ./pkg/parser/asciidoc-grammar.peg > ./pkg/parser/asciidoc_parser.go
@pigeon -optimize-grammar -alternate-entrypoints PreparsedDocument,InlineElementsWithoutSubtitution,VerbatimBlock ./pkg/parser/asciidoc-grammar.peg > ./pkg/parser/asciidoc_parser.go


.PHONY: test
Expand Down
4 changes: 2 additions & 2 deletions libasciidoc.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,15 +43,15 @@ func ConvertToHTML(ctx context.Context, r io.Reader, output io.Writer, options .
log.Debugf("parsing the asciidoc source...")
start := time.Now()
stats := parser.Stats{}
doc, err := parser.ParseReader("", r, parser.Statistics(&stats, "no match"))
doc, err := parser.ParseDocument("", r, parser.Statistics(&stats, "no match"))
if err != nil {
return nil, errors.Wrapf(err, "error while parsing the document")
}
duration := time.Since(start)
log.Debugf("parsing stats:")
log.Debugf("- parsing duration: %v", duration)
log.Debugf("- expressions processed: %v", stats.ExprCnt)
return convertToHTML(ctx, doc.(types.Document), output, options...)
return convertToHTML(ctx, doc, output, options...)
}

func convertToHTML(ctx context.Context, doc types.Document, output io.Writer, options ...renderer.Option) (map[string]interface{}, error) {
Expand Down
137 changes: 114 additions & 23 deletions pkg/parser/asciidoc-grammar.peg
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,35 @@ import (
"github.com/bytesparadise/libasciidoc/pkg/types"

log "github.com/sirupsen/logrus"
errs "github.com/pkg/errors"
)

// *****************************************************************************************
// This file is generated after its sibling `asciidoc-grammar.peg` file. DO NOT MODIFY !
// *****************************************************************************************

// Parse parses the data from b using filename as information in the
// error messages.
func ParseDocument(filename string, r io.Reader, opts ...Option) (types.Document, error) {
preparsedDoc, err := PreparseDocument(filename, r, opts...)
if err != nil {
return types.Document{}, err
}
result, err := Parse(filename, preparsedDoc, opts...)
if err != nil {
return types.Document{}, err
}
doc, ok := result.(types.Document)
if !ok {
return types.Document{}, errs.Errorf("invalid type of result: %T (expected a Document)", result)
}
return doc, nil
}

}

// ------------------------------------------
// Document
// Document - fully parsed document
// ------------------------------------------
Document <- frontMatter:(FrontMatter?) blocks:(DocumentBlocks) EOF {
return types.NewDocument(frontMatter, blocks.([]interface{}))
Expand All @@ -33,6 +52,36 @@ DocumentBlock <- attributes:(ElementAttribute)* block:(Section / DocumentElement
return types.WithAttributes(block, attributes.([]interface{}))
}

// ------------------------------------------------------------------------------------
// PreparsedDocument: document where only preprocessing directives are parsed,
// while the rest is just retrieved as raw text
// ------------------------------------------------------------------------------------
PreparsedDocument <- blocks:(PreparsedDocumentBlocks) EOF {
return types.NewPreparsedDocument(blocks.([]interface{}))
}

PreparsedDocumentBlocks <- (DocumentAttributeDeclaration /
RawSectionTitle /
FileInclude /
BlankLine /
RawText)*

RawSectionTitle <- prefix:(RawSectionTitlePrefix) title:RawSectionTitleContent {
return types.NewRawSectionTitle(prefix.(types.RawSectionTitlePrefix), title.(types.RawSectionTitleContent))
}

RawSectionTitlePrefix <- level:("="+ { return c.text, nil }) spaces:(WS+ { return c.text, nil }) {
return types.NewRawSectionTitlePrefix(level.([]byte), spaces.([]byte))
}

RawSectionTitleContent <- content:((!EOL .)+ { return c.text, nil }) EOL {
return types.NewRawSectionTitleContent(content.([]byte))
}

RawText <- content:((!EOL .)+ { return c.text, nil }) EOL {
return types.NewRawText(content.([]byte))
}

// ------------------------------------------
// Front Matter
// ------------------------------------------
Expand Down Expand Up @@ -173,8 +222,16 @@ DocumentElement <- !EOF // when reaching EOF, do not try to parse a new document
// Element Attributes
// ------------------------------------------
ElementAttribute <- &("[" / "." / "#") // skip if the content does not start with one of those characters
attr:(ElementID / ElementTitle / ElementRole / SourceAttributes / QuoteAttributes / VerseAttributes / AdmonitionMarkerAttribute / HorizontalLayout / AttributeGroup) WS* EOL {
return attr, nil // avoid returning something like `[]interface{}{attr, EOL}`
attr:(ElementID /
ElementTitle /
ElementRole /
SourceAttributes /
QuoteAttributes /
VerseAttributes /
AdmonitionMarkerAttribute /
HorizontalLayout /
AttributeGroup) WS* EOL {
return attr, nil // avoid returning something like `[]interface{}{attr, EOL}`
}

ElementAttributePrefixMatch <- "[" / "." / "#"
Expand Down Expand Up @@ -326,7 +383,7 @@ Section1_5 <- &"=" // just skip if the content does not start with at least one
return section, nil
}

Section2_5 <- &"=" // just skip if the content does not start with at least one '='
Section2_5 <- &"=" // just skip if the content does not start with at least one '='
section:(Section2 / Section3 / Section4 / Section5) {
return section, nil
}
Expand All @@ -341,9 +398,16 @@ Section4_5 <- &"=" // just skip if the content does not start with at least one
return section, nil
}

SectionTitlePrefix <- ("=")+ WS*
SectionTitlePrefix <- Section0TitlePrefix /
Section1TitlePrefix /
Section2TitlePrefix /
Section3TitlePrefix /
Section4TitlePrefix /
Section5TitlePrefix

Section0TitlePrefix <- "=" WS+
Section0TitlePrefix <- "=" WS+ {
return c.text, nil
}

Section0WithMetadata <- title:(Section0Title)
authors:(DocumentAuthors?)
Expand All @@ -364,11 +428,6 @@ Section0Title <- Section0TitlePrefix elements:(TitleElements) id:(InlineElementI
}


// Section0TitleWithAttributes <- attributes:(ElementAttribute)*
// title:(Section0Title) {
// return types.WithAttributes(title, attributes.([]interface{}))
// }

Section0Element <- !Section0TitlePrefix
attributes:(ElementAttribute)*
element:(Section1_5 / DocumentElement) {
Expand All @@ -381,10 +440,12 @@ Section1 <- header:(Section1Title)
return types.NewSection(1, header.(types.SectionTitle), elements.([]interface{}))
}

Section1TitlePrefix <- "==" WS+
Section1TitlePrefix <- "==" WS+ {
return c.text, nil
}

Section1Title <- Section1TitlePrefix elements:(TitleElements) id:(InlineElementID*) EOL {
return types.NewSectionTitle(elements.(types.InlineElements), id.([]interface{}))
return types.NewSectionTitle(elements.(types.InlineElements), id.([]interface{}))
}

Section1Element <- !Section1TitlePrefix
Expand All @@ -399,7 +460,9 @@ Section2 <- header:(Section2Title)
return types.NewSection(2, header.(types.SectionTitle), elements.([]interface{}))
}

Section2TitlePrefix <- "===" WS+
Section2TitlePrefix <- "===" WS+ {
return c.text, nil
}

Section2Title <- Section2TitlePrefix elements:(TitleElements) id:(InlineElementID*) EOL {
return types.NewSectionTitle(elements.(types.InlineElements), id.([]interface{}))
Expand All @@ -417,7 +480,9 @@ Section3 <- header:(Section3Title)
return types.NewSection(3, header.(types.SectionTitle), elements.([]interface{}))
}

Section3TitlePrefix <- "====" WS+
Section3TitlePrefix <- "====" WS+ {
return c.text, nil
}

Section3Title <- Section3TitlePrefix elements:(TitleElements) id:(InlineElementID*) EOL {
return types.NewSectionTitle(elements.(types.InlineElements), id.([]interface{}))
Expand All @@ -435,7 +500,9 @@ Section4 <- header:(Section4Title)
return types.NewSection(4, header.(types.SectionTitle), elements.([]interface{}))
}

Section4TitlePrefix <- "=====" WS+
Section4TitlePrefix <- "=====" WS+ {
return c.text, nil
}

Section4Title <- Section4TitlePrefix elements:(TitleElements) id:(InlineElementID*) EOL {
return types.NewSectionTitle(elements.(types.InlineElements), id.([]interface{}))
Expand All @@ -453,7 +520,9 @@ Section5 <- header:(Section5Title)
return types.NewSection(5, header.(types.SectionTitle), elements.([]interface{}))
}

Section5TitlePrefix <- "======" WS+
Section5TitlePrefix <- "======" WS+ {
return c.text, nil
}

Section5Title <- Section5TitlePrefix elements:(TitleElements) id:(InlineElementID*) EOL {
return types.NewSectionTitle(elements.(types.InlineElements), id.([]interface{}))
Expand Down Expand Up @@ -733,11 +802,11 @@ VerseParagraph <-
}
verse:(
// admonition paragraph
!("="+ WS+ !NEWLINE) t:(AdmonitionKind) ": " lines:(InlineElements)+ {
t:(AdmonitionKind) ": " lines:(InlineElements)+ {
return types.NewAdmonitionParagraph(lines.([]interface{}), t.(types.AdmonitionKind))
} /
// other kind of paragraph (verse, regular, etc.)
!("="+ WS+ !NEWLINE) lines:(InlineElements)+ {
lines:(InlineElements)+ {
return types.NewParagraph(lines.([]interface{}))
}
) #{
Expand All @@ -747,7 +816,7 @@ VerseParagraph <-
return verse, nil
}

InlineElements <- !BlankLine
InlineElements <- !BlankLine
elements:(comment:(SingleLineComment) {
return types.NewInlineElements([]interface{}{comment})
} / !BlockDelimiter elements:(InlineElement)+ linebreak:(LineBreak)? EOL {
Expand Down Expand Up @@ -1198,10 +1267,32 @@ QuoteBlock <- QuoteBlockDelimiter content:(QuoteBlockElement)* (QuoteBlockDelimi
}

QuoteBlockElement <-
!QuoteBlockDelimiter !EOF element:(DocumentElement) {
!QuoteBlockDelimiter !EOF element:(BlankLine
/ FileInclude
/ VerseBlock
/ VerseParagraph
/ ImageBlock
/ List
/ FencedBlock
/ ListingBlock
/ ExampleBlock
/ CommentBlock
/ SingleLineComment
/ QuoteBlock
/ SidebarBlock
/ Table
/ LiteralBlock
/ DocumentAttributeDeclaration
/ DocumentAttributeReset
/ TableOfContentsMacro
/ QuoteBlockParagraph) {
return element, nil
}

QuoteBlockParagraph <- lines:(InlineElements)+ {
return types.NewParagraph(lines.([]interface{}))
}

// -------------------------------------------------------------------------------------
// Verse blocks
// -------------------------------------------------------------------------------------
Expand All @@ -1220,7 +1311,7 @@ verse:(QuoteBlockDelimiter content:(VerseBlockElement)* (QuoteBlockDelimiter / E
return verse, nil
}

VerseBlockElement <- VerseFileInclude / VerseBlockParagraph
VerseBlockElement <- VerseFileInclude / BlankLine / VerseBlockParagraph


VerseFileInclude <- !QuoteBlockDelimiter !EOF include:(FileInclude) {
Expand All @@ -1231,7 +1322,7 @@ VerseBlockParagraph <- lines:(VerseBlockLine)+ {
return types.NewParagraph(lines.([]interface{}), nil)
}

VerseBlockLine <- !QuoteBlockDelimiter !EOF line:(VerseBlockLineContent) EOL {
VerseBlockLine <- !QuoteBlockDelimiter !BlankLine !EOF line:(VerseBlockLineContent) EOL {
return line.(types.InlineElements), nil
}

Expand Down
Loading

0 comments on commit e152aa4

Please sign in to comment.