Skip to content

Commit

Permalink
Add a goldmark parser extension for first class sections
Browse files Browse the repository at this point in the history
  • Loading branch information
iwahbe committed Aug 12, 2024
1 parent c33c861 commit 9e1c136
Show file tree
Hide file tree
Showing 6 changed files with 281 additions and 58 deletions.
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ require (
github.com/spf13/afero v1.9.5
github.com/spf13/cobra v1.8.0
github.com/stretchr/testify v1.9.0
github.com/teekennedy/goldmark-markdown v0.3.0
github.com/yuin/goldmark v1.7.4
github.com/zclconf/go-cty v1.14.2
golang.org/x/crypto v0.24.0
Expand Down
4 changes: 4 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -1920,6 +1920,8 @@ github.com/pulumi/terraform-plugin-sdk/v2 v2.0.0-20240520223432-0c0bf0d65f10 h1:
github.com/pulumi/terraform-plugin-sdk/v2 v2.0.0-20240520223432-0c0bf0d65f10/go.mod h1:H+8tjs9TjV2w57QFVSMBQacf8k/E1XwLXGCARgViC6A=
github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
github.com/rhysd/go-fakeio v1.0.0 h1:+TjiKCOs32dONY7DaoVz/VPOdvRkPfBkEyUDIpM8FQY=
github.com/rhysd/go-fakeio v1.0.0/go.mod h1:joYxF906trVwp2JLrE4jlN7A0z6wrz8O6o1UjarbFzE=
github.com/rivo/uniseg v0.1.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
github.com/rivo/uniseg v0.4.4 h1:8TfxU8dW6PdqD27gjM8MVNuicgxIjxpm4K7x4jp8sis=
Expand Down Expand Up @@ -1997,6 +1999,8 @@ github.com/stretchr/testify v1.8.3/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXl
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/teekennedy/goldmark-markdown v0.3.0 h1:ik9/biVGCwGWFg8dQ3KVm2pQ/wiiG0whYiUcz9xH0W8=
github.com/teekennedy/goldmark-markdown v0.3.0/go.mod h1:kMhDz8La77A9UHvJGsxejd0QUflN9sS+QXCqnhmxmNo=
github.com/texttheater/golang-levenshtein v1.0.1 h1:+cRNoVrfiwufQPhoMzB6N0Yf/Mqajr6t1lOv8GyGE2U=
github.com/texttheater/golang-levenshtein v1.0.1/go.mod h1:PYAKrbF5sAiq9wd+H82hs7gNaen0CplQ9uvm6+enD/8=
github.com/tweekmonster/luser v0.0.0-20161003172636-3fa38070dbd7 h1:X9dsIWPuuEJlPX//UmRKophhOKCGXc46RVIGuttks68=
Expand Down
61 changes: 3 additions & 58 deletions pkg/tfgen/docs.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,15 +38,13 @@ import (
"github.com/spf13/afero"
"github.com/yuin/goldmark"
gmast "github.com/yuin/goldmark/ast"
"github.com/yuin/goldmark/extension"
gmparser "github.com/yuin/goldmark/parser"
gmtext "github.com/yuin/goldmark/text"
"github.com/yuin/goldmark/util"
"golang.org/x/text/cases"
"golang.org/x/text/language"

"github.com/pulumi/pulumi-terraform-bridge/v3/pkg/tf2pulumi/convert"
"github.com/pulumi/pulumi-terraform-bridge/v3/pkg/tfbridge"
"github.com/pulumi/pulumi-terraform-bridge/v3/pkg/tfgen/parse"
)

const (
Expand Down Expand Up @@ -388,52 +386,6 @@ func trimFrontMatter(text []byte) []byte {
return body[idx+3:]
}

func gmWalkNodes(node gmast.Node, f func(gmast.Node)) {
f(node)
for child := node.FirstChild(); child != nil; child = child.NextSibling() {
gmWalkNodes(child, f)
}
}

func gmWalkNode[T gmast.Node](node gmast.Node, f func(T)) {
gmWalkNodes(node, func(node gmast.Node) {
n, ok := node.(T)
if ok {
f(n)
}
})
}

// recognizeHeaderAfterHTML allows us to work around a difference in how TF's registry parses
// markdown vs goldmark's CommonMark parser.
//
// Goldmark correctly (for CommonMark) parses the following as a single HTML Block:
//
// <div>
// content
// </div>
// ## Header
//
// This is a common pattern in GCP, and we need to parse it as a HTML block, then a header
// block. This AST transformation makes the desired change.
type recognizeHeaderAfterHTML struct{}

func (recognizeHeaderAfterHTML) Transform(node *gmast.Document, reader gmtext.Reader, pc gmparser.Context) {
gmWalkNode(node, func(node *gmast.HTMLBlock) {
if node.Lines().Len() == 0 {
return
}

last := node.Lines().At(node.Lines().Len() - 1)
if bytes.HasPrefix(last.Value(reader.Source()), []byte("## ")) {
node.Lines().SetSliced(0, node.Lines().Len()-1)
heading := gmast.NewHeading(2)
heading.Lines().Append(last)
node.Parent().InsertAfter(node.Parent(), node, heading)
}
})
}

func splitByMarkdownHeaders(text string, level int) [][]string {
// splitByMarkdownHeaders parses text, then walks the resulting AST to find
// appropriate header nodes. It uses the location of these header nodes to split
Expand All @@ -444,17 +396,10 @@ func splitByMarkdownHeaders(text string, level int) [][]string {
offset := len(text) - len(bytes)
contract.Assertf(offset >= 0, "The offset generated by chopping of the front-matter cannot be negative")

gm := goldmark.New(
goldmark.WithExtensions(extension.GFM),
goldmark.WithParserOptions(
gmparser.WithASTTransformers(
util.Prioritized(recognizeHeaderAfterHTML{}, 2000),
),
),
)
gm := goldmark.New(goldmark.WithExtensions(parse.TFRegistryExtension))

headers := []int{}
gmWalkNode(gm.Parser().Parse(gmtext.NewReader(bytes)), func(heading *gmast.Heading) {
parse.WalkNode(gm.Parser().Parse(gmtext.NewReader(bytes)), func(heading *gmast.Heading) {
if heading.Level != level {
return
}
Expand Down
84 changes: 84 additions & 0 deletions pkg/tfgen/parse/extension.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
// Copyright 2016-2024, Pulumi Corporation.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package parse

import (
"bytes"

"github.com/pulumi/pulumi/sdk/v3/go/common/util/contract"
"github.com/yuin/goldmark"
"github.com/yuin/goldmark/ast"
"github.com/yuin/goldmark/extension"
"github.com/yuin/goldmark/parser"
"github.com/yuin/goldmark/text"
"github.com/yuin/goldmark/util"

"github.com/pulumi/pulumi-terraform-bridge/v3/pkg/tfgen/parse/section"
)

var TFRegistryExtension goldmark.Extender = tfRegistryExtension{}

type tfRegistryExtension struct{}

func (s tfRegistryExtension) Extend(md goldmark.Markdown) {
extension.GFM.Extend(md)
section.Extension(2, 100).Extend(md)
md.Parser().AddOptions(
parser.WithASTTransformers(
util.Prioritized(recognizeHeaderAfterHTML{}, 2000),
))

}

// recognizeHeaderAfterHTML allows us to work around a difference in how TF's registry parses
// markdown vs goldmark's CommonMark parser.
//
// Goldmark correctly (for CommonMark) parses the following as a single HTML Block:
//
// <div>
// content
// </div>
// ## Header
//
// This is a common pattern in GCP, and we need to parse it as a HTML block, then a header
// block. This AST transformation makes the desired change.
type recognizeHeaderAfterHTML struct{}

func (recognizeHeaderAfterHTML) Transform(node *ast.Document, reader text.Reader, pc parser.Context) {
WalkNode(node, func(node *ast.HTMLBlock) {
if node.Lines().Len() == 0 {
return
}

last := node.Lines().At(node.Lines().Len() - 1)
if bytes.HasPrefix(last.Value(reader.Source()), []byte("## ")) {
node.Lines().SetSliced(0, node.Lines().Len()-1)
heading := ast.NewHeading(2)
heading.Lines().Append(last)
node.Parent().InsertAfter(node.Parent(), node, heading)
}
})
}

func WalkNode[T ast.Node](node ast.Node, f func(T)) {
err := ast.Walk(node, func(node ast.Node, entering bool) (ast.WalkStatus, error) {
n, ok := node.(T)
if ok && entering {
f(n)
}
return ast.WalkContinue, nil
})
contract.AssertNoErrorf(err, "impossible: ast.Walk never returns an error")
}
82 changes: 82 additions & 0 deletions pkg/tfgen/parse/section/section.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
// Copyright 2016-2024, Pulumi Corporation.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package section

import (
"github.com/pulumi/pulumi/sdk/v3/go/common/util/contract"
"github.com/yuin/goldmark"
"github.com/yuin/goldmark/ast"
"github.com/yuin/goldmark/parser"
"github.com/yuin/goldmark/text"
"github.com/yuin/goldmark/util"
)

var _ goldmark.Extender = section{}

func Extension(level, priority int) goldmark.Extender {
return section{level, priority}
}

var Kind = ast.NewNodeKind("Section")

type section struct{ level, priority int }

func (s section) Extend(md goldmark.Markdown) {
md.Parser().AddOptions(parser.WithASTTransformers(
util.Prioritized(sectionParser{s.level}, s.priority),
))
}

type Section struct{ ast.BaseBlock }

func (s *Section) Heading() *ast.Heading {
return s.FirstChild().(*ast.Heading)
}

func (s *Section) Dump(source []byte, level int) {
ast.DumpHelper(s, source, level, nil, nil)
}

func (s *Section) Kind() ast.NodeKind { return Kind }

func (s sectionParser) Transform(node *ast.Document, reader text.Reader, pc parser.Context) {
err := ast.Walk(node, func(node ast.Node, entering bool) (ast.WalkStatus, error) {
heading, ok := node.(*ast.Heading)
if !ok || heading.Level != s.level {
return ast.WalkContinue, nil
}

parent := heading.Parent()
section := &Section{}
c := heading.NextSibling()
section.AppendChild(section, heading)
parent.ReplaceChild(parent, heading, section)
for c != nil {
if child, ok := c.(*ast.Heading); ok && child.Level >= heading.Level {
break
}
child := c
// We are going to add c to section
c = c.NextSibling()
section.AppendChild(section, child)
}

return ast.WalkContinue, nil
})

contract.AssertNoErrorf(err, "The walker cannot error, so the walk cannot error")
}

type sectionParser struct{ level int }
107 changes: 107 additions & 0 deletions pkg/tfgen/parse/section/section_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
// Copyright 2016-2024, Pulumi Corporation.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package section_test

import (
"bytes"
"testing"

"github.com/hexops/autogold/v2"
"github.com/stretchr/testify/require"
markdown "github.com/teekennedy/goldmark-markdown"
"github.com/yuin/goldmark"
"github.com/yuin/goldmark/ast"
"github.com/yuin/goldmark/parser"
"github.com/yuin/goldmark/text"
"github.com/yuin/goldmark/util"

"github.com/pulumi/pulumi-terraform-bridge/v3/pkg/tfgen/parse/section"
)

type walkTransformer func(node ast.Node, entering bool) (ast.WalkStatus, error)

func (w walkTransformer) Transform(node *ast.Document, reader text.Reader, pc parser.Context) {
err := ast.Walk(node, (func(node ast.Node, entering bool) (ast.WalkStatus, error))(w))
if err != nil {
panic(err)
}
}

func TestSection(t *testing.T) {
t.Parallel()

tests := []struct {
input string
walk func(src []byte, node ast.Node, entering bool) (ast.WalkStatus, error)
expected autogold.Value
}{
{
input: `
Hi
## 1
content *foo*
content
## 2
content (again)
`,
walk: func(src []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
s, ok := node.(*section.Section)
if !ok || !entering {
return ast.WalkContinue, nil
}
s.Dump(src, 0)
if string(s.FirstChild().(*ast.Heading).Text(src)) == "1" {
s.Parent().RemoveChild(s.Parent(), s)
}
return ast.WalkContinue, nil
},
expected: autogold.Expect(`Hi
## 2
content (again)
`),
},
}

for _, tt := range tests {
tt := tt
t.Run("", func(t *testing.T) {
t.Parallel()
src := []byte(tt.input)
walk := func(node ast.Node, entering bool) (ast.WalkStatus, error) {
return tt.walk(src, node, entering)
}
var b bytes.Buffer
gm := goldmark.New(
goldmark.WithExtensions(section.Extension(2, 100)),
goldmark.WithParserOptions(
parser.WithASTTransformers(
util.Prioritized(walkTransformer(walk), 2000),
),
),
goldmark.WithRenderer(markdown.NewRenderer()),
)
require.NoError(t, gm.Convert(src, &b))
tt.expected.Equal(t, b.String())
})
}
}

0 comments on commit 9e1c136

Please sign in to comment.