Skip to content

Commit

Permalink
refactor(parser): improve grammar (#1081)
Browse files Browse the repository at this point in the history
avoid failling into the `AnyChar` rule when parsing content
with the `NormalGroup` entrypoint

includes refactoring of the `InlineWord` and `Punctuation` rules
to match content faster.

Signed-off-by: Xavier Coulon <[email protected]>
  • Loading branch information
xcoulon authored Sep 18, 2022
1 parent 4d151e0 commit 481ef43
Show file tree
Hide file tree
Showing 16 changed files with 42,956 additions and 70,307 deletions.
2 changes: 1 addition & 1 deletion pkg/parser/context.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ func NewParseContext(config *configuration.Configuration, options ...Option) *Pa
GlobalStore(frontMatterKey, true),
GlobalStore(documentHeaderKey, true),
GlobalStore(usermacrosKey, config.Macros),
GlobalStore(enabledSubstitutionsKey, attributeDeclarations()),
GlobalStore(enabledSubstitutionsKey, normalSubstitutions()),
}
opts = append(opts, options...)
return &ParseContext{
Expand Down
8 changes: 4 additions & 4 deletions pkg/parser/document_preprocessing.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ func Preprocess(source io.Reader, config *configuration.Configuration, opts ...O

func preprocess(ctx *ParseContext, source io.Reader) (string, error) {
if log.IsLevelEnabled(log.DebugLevel) {
log.Debugf("processing file inclusions in %s with leveloffset=%s", ctx.filename, spew.Sdump(ctx.levelOffsets))
log.Debugf("preprocessing file inclusions in %s with leveloffset=%s", ctx.filename, spew.Sdump(ctx.levelOffsets))
}
b := &builder{
enabled: true,
Expand All @@ -40,9 +40,9 @@ func preprocess(ctx *ParseContext, source io.Reader) (string, error) {
// content of line was not relevant in the context of preparsing (ie, it's a regular line), so let's keep it as-is
b.Write(line)
} else {
if log.IsLevelEnabled(log.DebugLevel) {
log.Debugf("checking element of type '%T'", element)
}
// if log.IsLevelEnabled(log.DebugLevel) {
// log.Debugf("checking element of type '%T'", element)
// }
switch e := element.(type) {
case *types.AttributeDeclaration:
ctx.attributes.set(e.Name, e.Value)
Expand Down
6 changes: 3 additions & 3 deletions pkg/parser/document_processing_aggregate.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,9 +109,9 @@ func aggregate(ctx *ParseContext, fragmentStream <-chan types.DocumentFragment)
}

func resolveCrossReferences(element interface{}, attrs *contextAttributes) error {
if log.IsLevelEnabled(log.DebugLevel) {
log.Debugf("resolving cross references in element of type '%T'", element)
}
// if log.IsLevelEnabled(log.DebugLevel) {
// log.Debugf("resolving cross references in element of type '%T'", element)
// }
switch e := element.(type) {
case types.WithElements:
for _, elmt := range e.GetElements() {
Expand Down
28 changes: 26 additions & 2 deletions pkg/parser/document_processing_apply_substitutions.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package parser
import (
"bytes"
"fmt"
"sort"
"strconv"
"strings"
"time"
Expand All @@ -29,7 +30,7 @@ func ApplySubstitutions(ctx *ParseContext, done <-chan interface{}, fragmentStre
return
}
}
log.WithField("pipeline_stage", "apply_substitutions").Debug("done")
log.WithField("pipeline_stage", "apply_substitutions").Info("done")
}()
return processedFragmentStream
}
Expand Down Expand Up @@ -614,7 +615,8 @@ func parseWithSubstitutions(content interface{}, subs *substitutions, opts ...Op
if log.IsLevelEnabled(log.DebugLevel) {
log.Debugf("parsing '%s' with '%s' substitutions", serialized, subs.toString())
}
elements, err := parseContent(serialized, append(opts, GlobalStore(enabledSubstitutionsKey, subs))...)
stats := Stats{}
elements, err := parseContent(serialized, append(opts, GlobalStore(enabledSubstitutionsKey, subs))...) // , Statistics(&stats, "no match"), Debug(true)
if err != nil {
return nil, err
}
Expand All @@ -625,6 +627,28 @@ func parseWithSubstitutions(content interface{}, subs *substitutions, opts ...Op
if log.IsLevelEnabled(log.DebugLevel) {
log.Debugf("parsed content:\n%s", spew.Sdump(elements))
}
if log.IsLevelEnabled(log.InfoLevel) {
log.Infof("parsed '%s' with '%s' substitutions", serialized, subs.toString())
log.Infof("stats:")
log.Infof(" expr count: %v", stats.ExprCnt)
rules := make([]string, 0, len(stats.ChoiceAltCnt))
for rule := range stats.ChoiceAltCnt {
rules = append(rules, rule)
}
sort.Strings(rules)
for _, rule := range rules {
log.Infof(" %s:", rule)
matchs := make([]string, 0, len(stats.ChoiceAltCnt[rule]))
for match := range stats.ChoiceAltCnt[rule] {
matchs = append(matchs, match)
}
sort.Strings(matchs)
for _, match := range matchs {
log.Infof(" - case %s: %d", match, stats.ChoiceAltCnt[rule][match])

}
}
}
return elements, nil
}

Expand Down
10 changes: 0 additions & 10 deletions pkg/parser/document_substitutions.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,16 +70,6 @@ func attributeSubstitutions() *substitutions {
}
}

func attributeDeclarations() *substitutions {
return &substitutions{
sequence: []string{
InlinePassthroughs,
AttributeRefs,
SpecialCharacters,
},
}
}

func noneSubstitutions() *substitutions {
return &substitutions{}
}
Expand Down
20 changes: 7 additions & 13 deletions pkg/parser/link_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ a link to <{example}>.`
Expect(ParseDocument(source)).To(MatchDocument(expected))
})

It("with special character in URL", func() {
It("with invalid special character in URL", func() {
source := `a link to https://example.com>[].`
expected := &types.Document{
Elements: []interface{}{
Expand All @@ -212,11 +212,14 @@ a link to <{example}>.`
&types.InlineLink{
Location: &types.Location{
Scheme: "https://",
Path: "example.com>",
Path: "example.com",
},
},
&types.SpecialCharacter{
Name: ">",
},
&types.StringElement{
Content: ".",
Content: "[].",
},
},
},
Expand Down Expand Up @@ -289,16 +292,7 @@ a link to <{example}>.`
&types.Paragraph{
Elements: []interface{}{
&types.StringElement{
Content: "write to ",
},
&types.InlineLink{
Location: &types.Location{
Scheme: "mailto:",
Path: "[email protected]",
},
},
&types.StringElement{
Content: ".",
Content: "write to [email protected].", // local part must not end with `.`
},
},
},
Expand Down
27 changes: 27 additions & 0 deletions pkg/parser/paragraph_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,33 @@ and another one using attribute substitution: {github-url}[{github-title}]...

Context("regular paragraphs", func() {

It("2 inlinewords and a punctuation", func() {
source := "\"`Get moving!`\" he shouted."
expected := &types.Document{
Elements: []interface{}{
&types.Paragraph{
Elements: []interface{}{
&types.Symbol{
Name: "\"`",
},
&types.StringElement{
Content: "Get moving!",
},
&types.Symbol{
Name: "`\"",
},
&types.StringElement{
Content: " he shouted.",
},
},
},
},
}
result, err := ParseDocument(source)
Expect(err).NotTo(HaveOccurred())
Expect(result).To(MatchDocument(expected))
})

It("3 with basic content", func() {
source := `cookie
Expand Down
Loading

0 comments on commit 481ef43

Please sign in to comment.