Skip to content

Commit

Permalink
feat(parser): use the memoize option in the parser to improve perfs
Browse files Browse the repository at this point in the history
also, small refactoring in the grammar with regards to spaces
 after attributes

Fixes bytesparadise#117

Signed-off-by: Xavier Coulon <[email protected]>
  • Loading branch information
xcoulon committed Jun 2, 2018
1 parent d402c2d commit 20f88dd
Show file tree
Hide file tree
Showing 7 changed files with 1,533 additions and 1,455 deletions.
10 changes: 9 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,14 @@ generate:
@echo "generating the parser..."
@pigeon ./pkg/parser/asciidoc-grammar.peg > ./pkg/parser/asciidoc_parser.go

.PHONY: generate-optimized
## generates the .go file based on the asciidoc grammar
generate-optimized:
@echo "generating the parser..."
@pigeon -optimize-grammar ./pkg/parser/asciidoc-grammar.peg > ./pkg/parser/asciidoc_parser.go
# @pigeon -optimize-parser ./pkg/parser/asciidoc-grammar.peg > ./pkg/parser/asciidoc_parser.go


.PHONY: test
## run all tests except in the 'vendor' package
test: deps generate
Expand All @@ -102,7 +110,7 @@ test: deps generate

.PHONY: build
## builds the binary executable from CLI
build: $(INSTALL_PREFIX) deps generate
build: $(INSTALL_PREFIX) deps generate-optimized
$(eval BUILD_COMMIT:=$(shell git rev-parse --short HEAD))
$(eval BUILD_TAG:=$(shell git tag --contains $(BUILD_COMMIT)))
$(eval BUILD_TIME:=$(shell date -u '+%Y-%m-%dT%H:%M:%SZ'))
Expand Down
28 changes: 26 additions & 2 deletions libasciidoc.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@ package libasciidoc

import (
"context"
"encoding/json"
"io"
"time"

"github.com/bytesparadise/libasciidoc/pkg/parser"
"github.com/bytesparadise/libasciidoc/pkg/renderer"
Expand All @@ -25,28 +27,50 @@ var (
// The conversion result is written in the given writer `output`, whereas the document metadata (title, etc.) (or an error if a problem occurred) is returned
// as the result of the function call.
func ConvertFileToHTML(ctx context.Context, filename string, output io.Writer, options ...renderer.Option) (map[string]interface{}, error) {
doc, err := parser.ParseFile(filename)
log.Infof("parsing the asciidoc source...")
stats := parser.Stats{}
start := time.Now()
doc, err := parser.ParseFile(filename, parser.Memoize(true), parser.Statistics(&stats, "no match"))
if err != nil {
return nil, errors.Wrapf(err, "error while parsing the document")
}
duration := time.Since(start)
log.Infof("parsed the asciidoc source in %v ", duration)
b, err := json.MarshalIndent(stats.ChoiceAltCnt, "", " ")
if err != nil {
log.Warnf("failed to produce stats ", err)
}
log.Infof("parsing stats: %s", string(b))
return convertToHTML(ctx, doc, output, options...)
}

// ConvertToHTML converts the content of the given reader `r` into a full HTML document, written in the given writer `output`.
// Returns an error if a problem occurred
func ConvertToHTML(ctx context.Context, r io.Reader, output io.Writer, options ...renderer.Option) (map[string]interface{}, error) {
doc, err := parser.ParseReader("", r)
start := time.Now()
stats := parser.Stats{}
doc, err := parser.ParseReader("", r, parser.Memoize(true), parser.Statistics(&stats, "no match"))
if err != nil {
return nil, errors.Wrapf(err, "error while parsing the document")
}
duration := time.Since(start)
log.Infof("parsed the asciidoc source in %v ", duration)
b, err := json.MarshalIndent(stats.ChoiceAltCnt, "", " ")
if err != nil {
log.Warnf("failed to produce stats ", err)
}
log.Infof("parsing stats: %s", string(b))
return convertToHTML(ctx, doc, output, options...)
}

func convertToHTML(ctx context.Context, doc interface{}, output io.Writer, options ...renderer.Option) (map[string]interface{}, error) {
start := time.Now()
metadata, err := htmlrenderer.Render(renderer.Wrap(ctx, doc.(types.Document), options...), output)
if err != nil {
return nil, errors.Wrapf(err, "error while rendering the document")
}
log.Debugf("Done processing document")
duration := time.Since(start)
log.Infof("rendered the HTML output in %v", duration)
return metadata, nil
}
28 changes: 18 additions & 10 deletions pkg/parser/asciidoc-grammar.peg
Original file line number Diff line number Diff line change
Expand Up @@ -126,42 +126,50 @@ TableOfContentsMacro <- "toc::[]" NEWLINE
// ------------------------------------------
// Element Attributes
// ------------------------------------------
ElementAttribute <- attr:(ElementID / ElementTitle / AdmonitionMarkerAttribute / AttributeGroup / InvalidElementAttribute) EOL {
ElementAttribute <- attr:(ElementID / ElementTitle / AdmonitionMarkerAttribute / AttributeGroup / InvalidElementAttribute) WS* EOL {
return attr, nil // avoid returning something like `[]interface{}{attr, EOL}`
}

ElementID <- id:(InlineElementID) {
return id, nil
} / "[#" id:(ID) "]" WS* {
} / "[#" id:(ID) "]" {
return types.NewElementID(id.(string))
}

InlineElementID <- "[[" id:(ID) "]]" WS* {
InlineElementID <- "[[" id:(ID) "]]" {
return types.NewElementID(id.(string))
}

// a title attached to an element, such as a BlockImage (
// a title starts with a single "." followed by the value, without space in-between
ElementTitle <- "." !"." !WS title:(!NEWLINE .)+ WS* {
ElementTitle <- "." !"." !WS title:(!NEWLINE .)+ {
return types.NewElementTitle(title.([]interface{}))
}

// expression for the whole admonition marker, but only retains the actual kind
AdmonitionMarkerAttribute <- "[" k:(AdmonitionKind) "]" WS* {
AdmonitionMarkerAttribute <- "[" k:(AdmonitionKind) "]" {
return k, nil
}

// one or more attributes. eg: [foo, key1=value1, key2=value2]
AttributeGroup <- "[" attributes:(GenericAttribute)* "]" WS* {
return types.NewAttributeGroup(attributes.([]interface{}))
// one or more attributes. eg: [foo, key1=value1, key2=value2]other
AttributeGroup <- "[" attribute:(GenericAttribute) attributes:(OtherGenericAttribute)* "]" {
return types.NewAttributeGroup(append([]interface{}{attribute}, attributes.([]interface{})...))
}

GenericAttribute <- key:(AttributeKey) "=" value:(AttributeValue) ("," WS*)? { // value is set
GenericAttribute <- key:(AttributeKey) "=" value:(AttributeValue) { // value is set
return types.NewGenericAttribute(key.([]interface{}), value.([]interface{}))
} / key:(AttributeKey) ("," WS*)? { // value is not set
} / key:(AttributeKey) { // value is not set
return types.NewGenericAttribute(key.([]interface{}), nil)
}

OtherGenericAttribute <- "," WS* key:(AttributeKey) "=" value:(AttributeValue) { // value is set
return types.NewGenericAttribute(key.([]interface{}), value.([]interface{}))
} / "," WS* key:(AttributeKey) { // value is not set
return types.NewGenericAttribute(key.([]interface{}), nil)
}



AttributeKey <- key: (!WS !"=" !"," !"]" .)+ WS* {
// fmt.Printf("found attribute key: %v\n", key)
return key, nil
Expand Down
Loading

0 comments on commit 20f88dd

Please sign in to comment.