refactor(parser/renderer): include files before full parsing (bytespa…

…radise#344) add a new entrypoint in the grammar to "preparse" the document, only looking for a subset of elements, in particular the `include` directives and the sections whose level may be offset during the file inclusions. Once the file have been included (ie, the `include` directives have been replaced with the actual file content), then the document can be fully parsed. Fixes bytesparadise#343 Signed-off-by: Xavier Coulon <[email protected]>
xcoulon · May 8, 2019 · e152aa4 · e152aa4
1 parent 99de2e9
commit e152aa4
Show file tree

Hide file tree

Showing 52 changed files with 58,857 additions and 38,946 deletions.
diff --git a/.golangci.yml b/.golangci.yml
@@ -1,5 +1,6 @@
 run:
   skip-dirs:
+    - pkg/parser/includes
     - pkg/renderer/html5/includes
   skip-files:
     - pkg/parser/asciidoc_parser.go # generated

diff --git a/LIMITATIONS.adoc b/LIMITATIONS.adoc
@@ -55,3 +55,9 @@ will produce no HTML element at all, whereas Asciidoc/Asciidoctor will produce :
 <p></p>
 </div>
 ....
+
+== File Inclusions
+
+File inclusions are performed before the full parsing takes place. During this phase, the main file is parsed to look for `include::` directives and then replace them with the content of the file to include. 
+If the file to include has an empty last line, it will be ignored, so it's always a good practice to include a blank line after the `include::` directive in the main document, to avoid side-effects during
+the "full" parsing.
diff --git a/Makefile b/Makefile
@@ -82,7 +82,7 @@ generate: prebuild-checks
 ## generate the .go file based on the asciidoc grammar
 generate-optimized:
 	@echo "generating the parser (optimized)..."
-	@pigeon -optimize-grammar -alternate-entrypoints InlineElementsWithoutSubtitution,VerbatimBlock ./pkg/parser/asciidoc-grammar.peg > ./pkg/parser/asciidoc_parser.go
+	@pigeon -optimize-grammar -alternate-entrypoints PreparsedDocument,InlineElementsWithoutSubtitution,VerbatimBlock ./pkg/parser/asciidoc-grammar.peg > ./pkg/parser/asciidoc_parser.go
 
 
 .PHONY: test

diff --git a/libasciidoc.go b/libasciidoc.go
@@ -43,15 +43,15 @@ func ConvertToHTML(ctx context.Context, r io.Reader, output io.Writer, options .
 	log.Debugf("parsing the asciidoc source...")
 	start := time.Now()
 	stats := parser.Stats{}
-	doc, err := parser.ParseReader("", r, parser.Statistics(&stats, "no match"))
+	doc, err := parser.ParseDocument("", r, parser.Statistics(&stats, "no match"))
 	if err != nil {
 		return nil, errors.Wrapf(err, "error while parsing the document")
 	}
 	duration := time.Since(start)
 	log.Debugf("parsing stats:")
 	log.Debugf("- parsing duration:                %v", duration)
 	log.Debugf("- expressions processed:           %v", stats.ExprCnt)
-	return convertToHTML(ctx, doc.(types.Document), output, options...)
+	return convertToHTML(ctx, doc, output, options...)
 }
 
 func convertToHTML(ctx context.Context, doc types.Document, output io.Writer, options ...renderer.Option) (map[string]interface{}, error) {

diff --git a/pkg/parser/asciidoc-grammar.peg b/pkg/parser/asciidoc-grammar.peg
@@ -7,16 +7,35 @@ import (
     "github.com/bytesparadise/libasciidoc/pkg/types"
 
     log "github.com/sirupsen/logrus"
+    errs "github.com/pkg/errors"
 )
 
 // *****************************************************************************************
 // This file is generated after its sibling `asciidoc-grammar.peg` file. DO NOT MODIFY !
 // *****************************************************************************************
 
+// Parse parses the data from b using filename as information in the
+// error messages.
+func ParseDocument(filename string, r io.Reader, opts ...Option) (types.Document, error) {
+	preparsedDoc, err := PreparseDocument(filename, r, opts...)
+	if err != nil {
+		return types.Document{}, err
+	}
+    result, err := Parse(filename, preparsedDoc, opts...)
+	if err != nil {
+		return types.Document{}, err
+	}
+	doc, ok := result.(types.Document)
+	if !ok {
+		return types.Document{}, errs.Errorf("invalid type of result: %T (expected a Document)", result)
+	}
+	return doc, nil
+}
+
 }
 
 // ------------------------------------------
-// Document
+// Document - fully parsed document
 // ------------------------------------------
 Document <- frontMatter:(FrontMatter?) blocks:(DocumentBlocks) EOF {
     return types.NewDocument(frontMatter, blocks.([]interface{}))
@@ -33,6 +52,36 @@ DocumentBlock <- attributes:(ElementAttribute)* block:(Section / DocumentElement
     return types.WithAttributes(block, attributes.([]interface{}))
 }
 
+// ------------------------------------------------------------------------------------
+// PreparsedDocument: document where only preprocessing directives are parsed,
+// while the rest is just retrieved as raw text
+// ------------------------------------------------------------------------------------
+PreparsedDocument <- blocks:(PreparsedDocumentBlocks) EOF {
+    return types.NewPreparsedDocument(blocks.([]interface{}))
+}
+
+PreparsedDocumentBlocks <- (DocumentAttributeDeclaration / 
+        RawSectionTitle /
+        FileInclude / 
+        BlankLine / 
+        RawText)* 
+
+RawSectionTitle <- prefix:(RawSectionTitlePrefix) title:RawSectionTitleContent {
+    return types.NewRawSectionTitle(prefix.(types.RawSectionTitlePrefix), title.(types.RawSectionTitleContent))
+}
+
+RawSectionTitlePrefix <- level:("="+ { return c.text, nil }) spaces:(WS+ { return c.text, nil }) {
+    return types.NewRawSectionTitlePrefix(level.([]byte), spaces.([]byte))
+}
+
+RawSectionTitleContent <- content:((!EOL .)+ { return c.text, nil }) EOL {
+    return types.NewRawSectionTitleContent(content.([]byte))
+}
+
+RawText <- content:((!EOL .)+ { return c.text, nil }) EOL {
+    return types.NewRawText(content.([]byte))
+}
+
 // ------------------------------------------
 // Front Matter
 // ------------------------------------------
@@ -173,8 +222,16 @@ DocumentElement <- !EOF // when reaching EOF, do not try to parse a new document
 // Element Attributes
 // ------------------------------------------
 ElementAttribute <- &("[" / "." / "#") // skip if the content does not start with one of those characters
-    attr:(ElementID / ElementTitle / ElementRole / SourceAttributes / QuoteAttributes / VerseAttributes / AdmonitionMarkerAttribute / HorizontalLayout / AttributeGroup) WS* EOL {
-        return attr, nil // avoid returning something like `[]interface{}{attr, EOL}`
+    attr:(ElementID / 
+        ElementTitle / 
+        ElementRole / 
+        SourceAttributes / 
+        QuoteAttributes / 
+        VerseAttributes / 
+        AdmonitionMarkerAttribute / 
+        HorizontalLayout / 
+        AttributeGroup) WS* EOL {
+    return attr, nil // avoid returning something like `[]interface{}{attr, EOL}`
 }
 
 ElementAttributePrefixMatch <- "[" / "." / "#"
@@ -326,7 +383,7 @@ Section1_5 <- &"=" // just skip if the content does not start with at least one
         return section, nil
 }
 
-Section2_5 <- &"=" // just skip if the content does not start with at least one '='
+Section2_5 <- &"="  // just skip if the content does not start with at least one '='
     section:(Section2 / Section3 / Section4 / Section5) {
         return section, nil
 }
@@ -341,9 +398,16 @@ Section4_5 <- &"=" // just skip if the content does not start with at least one
         return section, nil
 }
 
-SectionTitlePrefix <- ("=")+ WS*
+SectionTitlePrefix <- Section0TitlePrefix /
+                        Section1TitlePrefix /
+                        Section2TitlePrefix /
+                        Section3TitlePrefix /
+                        Section4TitlePrefix /
+                        Section5TitlePrefix
 
-Section0TitlePrefix <- "=" WS+
+Section0TitlePrefix <- "=" WS+ {
+    return c.text, nil
+}
 
 Section0WithMetadata <- title:(Section0Title) 
         authors:(DocumentAuthors?) 
@@ -364,11 +428,6 @@ Section0Title <- Section0TitlePrefix elements:(TitleElements) id:(InlineElementI
 }
 
 
-// Section0TitleWithAttributes <- attributes:(ElementAttribute)* 
-//         title:(Section0Title) {
-//     return types.WithAttributes(title, attributes.([]interface{}))
-// }
-
 Section0Element <- !Section0TitlePrefix 
         attributes:(ElementAttribute)* 
         element:(Section1_5 / DocumentElement) {
@@ -381,10 +440,12 @@ Section1 <- header:(Section1Title)
     return types.NewSection(1, header.(types.SectionTitle), elements.([]interface{}))
 }
 
-Section1TitlePrefix <- "==" WS+
+Section1TitlePrefix <- "==" WS+ {
+    return c.text, nil
+}
 
 Section1Title <- Section1TitlePrefix elements:(TitleElements) id:(InlineElementID*) EOL {
-    return types.NewSectionTitle(elements.(types.InlineElements), id.([]interface{})) 
+    return types.NewSectionTitle(elements.(types.InlineElements), id.([]interface{}))
 }
 
 Section1Element <- !Section1TitlePrefix 
@@ -399,7 +460,9 @@ Section2 <- header:(Section2Title)
     return types.NewSection(2, header.(types.SectionTitle), elements.([]interface{}))
 }
 
-Section2TitlePrefix <- "===" WS+ 
+Section2TitlePrefix <- "===" WS+ {
+    return c.text, nil
+}
 
 Section2Title <- Section2TitlePrefix elements:(TitleElements) id:(InlineElementID*) EOL {
     return types.NewSectionTitle(elements.(types.InlineElements), id.([]interface{})) 
@@ -417,7 +480,9 @@ Section3 <- header:(Section3Title)
     return types.NewSection(3, header.(types.SectionTitle), elements.([]interface{}))
 }
 
-Section3TitlePrefix <- "====" WS+ 
+Section3TitlePrefix <- "====" WS+ {
+    return c.text, nil
+}
 
 Section3Title <- Section3TitlePrefix elements:(TitleElements) id:(InlineElementID*) EOL {
     return types.NewSectionTitle(elements.(types.InlineElements), id.([]interface{})) 
@@ -435,7 +500,9 @@ Section4 <- header:(Section4Title)
     return types.NewSection(4, header.(types.SectionTitle), elements.([]interface{}))
 }
 
-Section4TitlePrefix <- "=====" WS+ 
+Section4TitlePrefix <- "=====" WS+ {
+    return c.text, nil
+}
 
 Section4Title <- Section4TitlePrefix elements:(TitleElements) id:(InlineElementID*) EOL {
     return types.NewSectionTitle(elements.(types.InlineElements), id.([]interface{})) 
@@ -453,7 +520,9 @@ Section5 <- header:(Section5Title)
     return types.NewSection(5, header.(types.SectionTitle), elements.([]interface{}))
 }
 
-Section5TitlePrefix <- "======" WS+ 
+Section5TitlePrefix <- "======" WS+ {
+    return c.text, nil
+}
 
 Section5Title <- Section5TitlePrefix elements:(TitleElements) id:(InlineElementID*) EOL {
     return types.NewSectionTitle(elements.(types.InlineElements), id.([]interface{})) 
@@ -733,11 +802,11 @@ VerseParagraph <-
     }
     verse:(
         // admonition paragraph 
-        !("="+ WS+ !NEWLINE) t:(AdmonitionKind) ": " lines:(InlineElements)+ { 
+        t:(AdmonitionKind) ": " lines:(InlineElements)+ { 
             return types.NewAdmonitionParagraph(lines.([]interface{}), t.(types.AdmonitionKind))
         } / 
         // other kind of paragraph (verse, regular, etc.)
-        !("="+ WS+ !NEWLINE) lines:(InlineElements)+ { 
+        lines:(InlineElements)+ { 
             return types.NewParagraph(lines.([]interface{}))
         } 
     ) #{
@@ -747,7 +816,7 @@ VerseParagraph <-
         return verse, nil
 }
 
-InlineElements <- !BlankLine 
+InlineElements <- !BlankLine
     elements:(comment:(SingleLineComment) {
         return types.NewInlineElements([]interface{}{comment})
     } / !BlockDelimiter elements:(InlineElement)+ linebreak:(LineBreak)? EOL { 
@@ -1198,10 +1267,32 @@ QuoteBlock <- QuoteBlockDelimiter content:(QuoteBlockElement)* (QuoteBlockDelimi
 }
 
 QuoteBlockElement <- 
-    !QuoteBlockDelimiter !EOF element:(DocumentElement) {
+    !QuoteBlockDelimiter !EOF element:(BlankLine 
+            / FileInclude
+            / VerseBlock
+            / VerseParagraph
+            / ImageBlock 
+            / List 
+            / FencedBlock
+            / ListingBlock
+            / ExampleBlock
+            / CommentBlock
+            / SingleLineComment
+            / QuoteBlock 
+            / SidebarBlock
+            / Table 
+            / LiteralBlock 
+            / DocumentAttributeDeclaration 
+            / DocumentAttributeReset 
+            / TableOfContentsMacro
+            / QuoteBlockParagraph) {
         return element, nil
     } 
 
+QuoteBlockParagraph <- lines:(InlineElements)+ { 
+    return types.NewParagraph(lines.([]interface{}))
+}
+
 // -------------------------------------------------------------------------------------
 // Verse blocks
 // -------------------------------------------------------------------------------------
@@ -1220,7 +1311,7 @@ verse:(QuoteBlockDelimiter content:(VerseBlockElement)* (QuoteBlockDelimiter / E
     return verse, nil
 }
 
-VerseBlockElement <- VerseFileInclude / VerseBlockParagraph
+VerseBlockElement <- VerseFileInclude / BlankLine / VerseBlockParagraph
 
 
 VerseFileInclude <- !QuoteBlockDelimiter !EOF include:(FileInclude) {
@@ -1231,7 +1322,7 @@ VerseBlockParagraph <- lines:(VerseBlockLine)+ {
     return types.NewParagraph(lines.([]interface{}), nil)
 }
 
-VerseBlockLine <- !QuoteBlockDelimiter !EOF line:(VerseBlockLineContent) EOL {
+VerseBlockLine <- !QuoteBlockDelimiter !BlankLine !EOF line:(VerseBlockLineContent) EOL {
     return line.(types.InlineElements), nil
 }