Skip to content

Commit

Permalink
refactor(parser): parse by fragments and process in pipeline
Browse files Browse the repository at this point in the history
Fragments are blocks of contiguous lines separated by blank lines.
Blank lines within delimited blocks are preserved (hence they do not
split the fragments)
Once a fragment is read, it is sent on a channel to be processed
by the next stages of the pipeline until the aggregator which combines
all the fragments into the resulting `types.Document`, which
can thn be rendered

Also:
- removed tests in "raw documents" for quoted text, since it has little
 to no value.

BREAKING CHANGE:
- using pointers on all structs in `pkg/types`
- removing `types.Document.Attributes`, holding attributes in context
  as parsing/rendering progresses.
- `types.Paragraph.Lines` (`[][]interface{}`) is replaced by
  `types.Paragraph.Elements` (`[]interface{}`)
- all delimited blocks types are merged into `types.DelimitedBlock`
- all lists are merged into `types.List`
- removed the `Level` field in `types.ListElement` struct
- refactor document structure with header (level 0) and
  sections (level 1 to 5)

Fixes bytesparadise#843

Signed-off-by: Xavier Coulon <[email protected]>
  • Loading branch information
xcoulon committed Nov 14, 2021
1 parent 82bfd34 commit 78a9120
Show file tree
Hide file tree
Showing 249 changed files with 114,649 additions and 90,255 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ jobs:
runs-on: ${{ matrix.os }}
strategy:
matrix:
go-version: [1.15.x, 1.14.x]
go-version: [1.16.x,1.15.x]
os: [ubuntu-latest, macos-latest, windows-latest]
name: Test ${{ matrix.os }} with Go ${{ matrix.go-version }}

Expand Down
8 changes: 5 additions & 3 deletions .golangci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,21 @@ run:
timeout: 5m

linters:
enable-all: false
enable:
- megacheck
- govet
- gocyclo
- unused
- gofmt
- golint
- revive
- misspell
enable-all: false
- exportloopref
disable-all: false
disable:
- maligned
- prealloc
disable-all: false
- scopelint
presets:
- bugs
- unused
Expand Down
4 changes: 2 additions & 2 deletions README.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -76,11 +76,11 @@ Libasciidoc provides 2 functions to convert an Asciidoc content into HTML:

1. Converting an `io.Reader` into an HTML document:

Convert(r io.Reader, output io.Writer, config configuration.Configuration) (types.Metadata, error)
Convert(r io.Reader, output io.Writer, config *configuration.Configuration) (types.Metadata, error)

2. Converting a file (given its name) into an HTML document:

ConvertFile(output io.Writer, config configuration.Configuration) (types.Metadata, error)
ConvertFile(output io.Writer, config *configuration.Configuration) (types.Metadata, error)

where the returned `types.Metadata` object contains the document's title which is not part of the generated HTML `<body>` part, as well as the other attributes of the source document.

Expand Down
4 changes: 2 additions & 2 deletions cmd/libasciidoc/root_cmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,8 @@ func getOut(cmd *cobra.Command, sourcePath, outputName string) (io.Writer, close
}

// converts the `name`, `!name` and `name=value` into a map
func parseAttributes(attributes []string) map[string]string {
result := make(map[string]string, len(attributes))
func parseAttributes(attributes []string) map[string]interface{} {
result := make(map[string]interface{}, len(attributes))
for _, attr := range attributes {
data := strings.Split(attr, "=")
if len(data) > 1 {
Expand Down
4 changes: 1 addition & 3 deletions cmd/libasciidoc/root_cmd_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ var _ = Describe("root cmd", func() {
// when
err := root.Execute()
// then
GinkgoT().Logf("command output: %v", buf.String())
Expect(err).To(HaveOccurred())
})

Expand Down Expand Up @@ -93,11 +92,10 @@ var _ = Describe("root cmd", func() {
// when
err := root.Execute()
// then
GinkgoT().Logf("out: %v", buf.String())
Expect(err).ToNot(HaveOccurred())
Expect(buf.String()).ToNot(BeEmpty())
// console output also includes a warning message
Expect(buf.String()).To(Equal(`level=warning msg="unable to find attribute 'foo2'"
Expect(buf.String()).To(ContainSubstring(`level=warning msg="unable to find attribute 'foo2'"
<div class="paragraph">
<p>bar1 and {foo2}</p>
</div>
Expand Down
16 changes: 9 additions & 7 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,24 @@ go 1.11
require (
github.com/alecthomas/chroma v0.7.1
github.com/davecgh/go-spew v1.1.1
github.com/google/go-cmp v0.5.2 // indirect
github.com/google/go-cmp v0.5.5
github.com/kr/text v0.2.0 // indirect
github.com/mattn/go-isatty v0.0.12 // indirect
github.com/mna/pigeon v1.0.1-0.20200224192238-18953b277063
github.com/mna/pigeon v1.1.0
github.com/modocache/gover v0.0.0-20171022184752-b58185e213c5 // indirect
github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e // indirect
github.com/onsi/ginkgo v1.13.0
github.com/onsi/gomega v1.10.1
github.com/onsi/ginkgo v1.16.4
github.com/onsi/gomega v1.13.0
github.com/pkg/errors v0.9.1
github.com/sergi/go-diff v1.0.0
github.com/sirupsen/logrus v1.7.0
github.com/sozorogami/gover v0.0.0-20171022184752-b58185e213c5
github.com/spf13/cobra v1.1.1
github.com/stretchr/testify v1.6.1
golang.org/x/sys v0.0.0-20201009025420-dfb3f7c4e634 // indirect
golang.org/x/tools v0.0.0-20201013201025-64a9e34f3752 // indirect
gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f // indirect
gopkg.in/yaml.v2 v2.3.0
gopkg.in/yaml.v2 v2.4.0
)

// include support for disabling unexported fields
// TODO: still needed?
replace github.com/davecgh/go-spew => github.com/flw-cn/go-spew v1.1.2-0.20200624141737-10fccbfd0b23
55 changes: 40 additions & 15 deletions go.sum

Large diffs are not rendered by default.

26 changes: 16 additions & 10 deletions libasciidoc.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ var (
// ConvertFile converts the content of the given filename into an output document.
// The conversion result is written in the given writer `output`, whereas the document metadata (title, etc.) (or an error if a problem occurred) is returned
// as the result of the function call. The output format is determined by config.Backend (HTML5 default).
func ConvertFile(output io.Writer, config configuration.Configuration) (types.Metadata, error) {
func ConvertFile(output io.Writer, config *configuration.Configuration) (types.Metadata, error) {
file, err := os.Open(config.Filename)
if err != nil {
return types.Metadata{}, errors.Wrapf(err, "error opening %s", config.Filename)
Expand All @@ -50,9 +50,9 @@ func ConvertFile(output io.Writer, config configuration.Configuration) (types.Me

// Convert converts the content of the given reader `r` into a full output document, written in the given writer `output`.
// Returns an error if a problem occurred. The default will be HTML5, but depends on the config.BackEnd value.
func Convert(r io.Reader, output io.Writer, config configuration.Configuration) (types.Metadata, error) {
func Convert(r io.Reader, output io.Writer, config *configuration.Configuration) (types.Metadata, error) {

var render func(*renderer.Context, types.Document, io.Writer) (types.Metadata, error)
var render func(*renderer.Context, *types.Document, io.Writer) (types.Metadata, error)
switch config.BackEnd {
case "html", "html5", "":
render = html5.Render
Expand All @@ -73,16 +73,22 @@ func Convert(r io.Reader, output io.Writer, config configuration.Configuration)
return types.Metadata{}, err
}
// validate the document
problems, err := validator.Validate(&doc)
doctype := config.Attributes.GetAsStringWithDefault(types.AttrDocType, "article")
problems, err := validator.Validate(doc, doctype)
if err != nil {
return types.Metadata{}, err
}
for _, problem := range problems {
switch problem.Severity {
case validator.Error:
log.Error(problem.Message)
case validator.Warning:
log.Warn(problem.Message)
if len(problems) > 0 {
// if any problem found, change the doctype to render the document as a regular article
log.Warnf("changing doctype to 'article' because problems were found in the document: %v", problems)
config.Attributes[types.AttrDocType] = "article" // switch to `article` rendering (in case it was a manpage with problems)
for _, problem := range problems {
switch problem.Severity {
case validator.Error:
log.Error(problem.Message)
case validator.Warning:
log.Warn(problem.Message)
}
}
}
// render
Expand Down
128 changes: 126 additions & 2 deletions libasciidoc_bench_test.go
Original file line number Diff line number Diff line change
@@ -1,16 +1,140 @@
package libasciidoc_test

import (
"strings"
"testing"

"github.com/bytesparadise/libasciidoc"
"github.com/bytesparadise/libasciidoc/pkg/configuration"
"github.com/bytesparadise/libasciidoc/pkg/types"
"github.com/bytesparadise/libasciidoc/testsupport"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

func BenchmarkLibasciidoc(b *testing.B) {
// TODO: unexclude this bench func
func XBenchmarkRenderRealDocument(b *testing.B) {
filename := "./test/bench/mocking.adoc"
for i := 0; i < b.N; i++ {
_, err := testsupport.RenderHTML5Document(filename)
out := &strings.Builder{}
_, err := libasciidoc.ConvertFile(out,
configuration.NewConfiguration(
configuration.WithFilename(filename),
configuration.WithCSS("path/to/style.css"),
configuration.WithHeaderFooter(true)))
require.NoError(b, err)
}
}

func BenchmarkParseBasicDocument(b *testing.B) {
content := `== Lorem Ipsum
Lorem ipsum dolor sit amet, consetetur sadipscing elitr,
sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat,
sed diam voluptua.
At vero eos et accusam et justo duo dolores et ea rebum.
Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet.
Lorem ipsum dolor sit amet, consetetur sadipscing elitr,
sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat,
sed diam voluptua.
At vero eos et accusam et justo duo dolores et ea rebum.
Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet.`

for i := 0; i < b.N; i++ {
_, err := testsupport.ParseDocument(content)
require.NoError(b, err)
}
}

func BenchmarkParseLongDocument(b *testing.B) {
content := strings.Builder{}
for i := 0; i < 50; i++ {
content.WriteString(`== Lorem Ipsum
Lorem ipsum dolor sit amet, consetetur sadipscing elitr,
sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat,
sed diam voluptua.
At vero eos et accusam et justo duo dolores et ea rebum.
Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet.
Lorem ipsum dolor sit amet, consetetur sadipscing elitr,
sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat,
sed diam voluptua.
At vero eos et accusam et justo duo dolores et ea rebum.
Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet.
`)
}
for i := 0; i < b.N; i++ {
_, err := testsupport.ParseDocument(content.String())
require.NoError(b, err)
}
}

func TestParseBasicDocument(t *testing.T) {
source := `== Lorem Ipsum
Lorem ipsum dolor sit amet, consetetur sadipscing elitr,
sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat,
sed diam voluptua.
At vero eos et accusam et justo duo dolores et ea rebum.
Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet.
Lorem ipsum dolor sit amet, consetetur sadipscing elitr,
sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat,
sed diam voluptua.
At vero eos et accusam et justo duo dolores et ea rebum.
Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit *amet*.`

title := []interface{}{
&types.StringElement{
Content: "Lorem Ipsum",
},
}
expected := &types.Document{
Elements: []interface{}{
&types.Section{
Level: 1,
Attributes: types.Attributes{
types.AttrID: "_lorem_ipsum",
},
Title: title,
Elements: []interface{}{
&types.Paragraph{
Elements: []interface{}{
&types.StringElement{
Content: `Lorem ipsum dolor sit amet, consetetur sadipscing elitr,
sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat,
sed diam voluptua.
At vero eos et accusam et justo duo dolores et ea rebum.
Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet.
Lorem ipsum dolor sit amet, consetetur sadipscing elitr,
sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat,
sed diam voluptua.
At vero eos et accusam et justo duo dolores et ea rebum.
Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit `,
},
&types.QuotedText{
Kind: types.SingleQuoteBold,
Elements: []interface{}{
&types.StringElement{
Content: "amet",
},
},
},
&types.StringElement{
Content: ".",
},
},
},
},
},
},
ElementReferences: types.ElementReferences{
"_lorem_ipsum": title,
},
}
result, err := testsupport.ParseDocument(source)
require.NoError(t, err)
assert.Equal(t, expected, result)

}
Loading

0 comments on commit 78a9120

Please sign in to comment.