Skip to content

Commit

Permalink
internal/frontend: change id generation to use parsed markdown text
Browse files Browse the repository at this point in the history
To produce heading ids that match between the goldmark version of the
code and the rsc.io/markdown version of the code, use the markdown
parser to parse the markdown and then extract the text from it. We do
this because rsc.io/markdown doesn't provide the raw markdown for us
to generate the ids with. This will change the ids that are generated
for some headings.

For golang/go#61399

Change-Id: Id0f26b311b59e848ff1753e058d413ed3168926d
Reviewed-on: https://go-review.googlesource.com/c/pkgsite/+/548255
LUCI-TryBot-Result: Go LUCI <[email protected]>
kokoro-CI: kokoro <[email protected]>
Reviewed-by: Jonathan Amsterdam <[email protected]>
  • Loading branch information
matloob committed Dec 18, 2023
1 parent 00698da commit 56a70e8
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 26 deletions.
37 changes: 26 additions & 11 deletions internal/frontend/goldmark.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ import (
"bytes"
"context"
"fmt"
"regexp"
"strings"

"github.com/yuin/goldmark/ast"
Expand All @@ -22,6 +21,7 @@ import (
"golang.org/x/pkgsite/internal"
"golang.org/x/pkgsite/internal/log"
"golang.org/x/pkgsite/internal/source"
"rsc.io/markdown"
)

// astTransformer is a default transformer of the goldmark tree. We pass in
Expand Down Expand Up @@ -185,20 +185,35 @@ func newIDs() parser.IDs {
// unit page. Duplicated heading ids are given an incremental suffix. See
// readme_test.go for examples.
func (s *ids) Generate(value []byte, kind ast.NodeKind) []byte {
// Matches strings like `<tag attr="value">Text</tag>` or `[![Text](file.svg)](link.html)`.
r := regexp.MustCompile(`(<[^<>]+>|\[\!\[[^\]]+]\([^\)]+\)\]\([^\)]+\))`)
str := r.ReplaceAllString(string(value), "")
var defaultID string
if kind == ast.KindHeading {
defaultID = "heading"
} else {
defaultID = "id"
}

parser := &markdown.Parser{}
doc := parser.Parse("# " + string(value))
return []byte(s.generateID(doc, defaultID))
}

func (s *ids) generateID(block markdown.Block, defaultID string) string {
var buf bytes.Buffer
walkBlocks([]markdown.Block{block}, func(b markdown.Block) error {
if t, ok := b.(*markdown.Text); ok {
for _, inl := range t.Inline {
inl.PrintText(&buf)
}
}
return nil
})
f := func(c rune) bool {
return !('a' <= c && c <= 'z') && !('A' <= c && c <= 'Z') && !('0' <= c && c <= '9')
}
str = strings.Join(strings.FieldsFunc(str, f), "-")
str := strings.Join(strings.FieldsFunc(buf.String(), f), "-")
str = strings.ToLower(str)
if len(str) == 0 {
if kind == ast.KindHeading {
str = "heading"
} else {
str = "id"
}
str = defaultID
}
key := str
for i := 1; ; i++ {
Expand All @@ -208,7 +223,7 @@ func (s *ids) Generate(value []byte, kind ast.NodeKind) []byte {
}
key = fmt.Sprintf("%s-%d", str, i)
}
return []byte("readme-" + key)
return "readme-" + key
}

// Put implements Put from the goldmark parser IDs interface.
Expand Down
28 changes: 15 additions & 13 deletions internal/frontend/markdown.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ import (
"strings"

"github.com/google/safehtml/template"
"github.com/yuin/goldmark/ast"
"golang.org/x/pkgsite/internal"
"golang.org/x/pkgsite/internal/derrors"
"golang.org/x/pkgsite/internal/log"
Expand Down Expand Up @@ -118,6 +117,8 @@ func walkBlocks(blocks []markdown.Block, walkFunc func(b markdown.Block) error)

err = nil
switch x := b.(type) {
case *markdown.Document:
err = walkBlocks(x.Blocks, walkFunc)
case *markdown.Text:
case *markdown.Paragraph:
err = walkBlocks([]markdown.Block{x.Text}, walkFunc)
Expand All @@ -130,7 +131,9 @@ func walkBlocks(blocks []markdown.Block, walkFunc func(b markdown.Block) error)
case *markdown.Quote:
err = walkBlocks(x.Blocks, walkFunc)
case *markdown.HTMLBlock:
continue
case *markdown.CodeBlock:
case *markdown.Empty:
case *markdown.ThematicBreak:
default:
return fmt.Errorf("unhandled block type %T", x)
}
Expand Down Expand Up @@ -287,6 +290,12 @@ func transformHeadingsToHTML(doc *markdown.Document) {
rewriteHeadingsBlocks = func(blocks []markdown.Block) {
for i, b := range blocks {
switch x := b.(type) {
case *markdown.Paragraph:
rewriteHeadingsBlocks([]markdown.Block{x.Text})
case *markdown.List:
rewriteHeadingsBlocks(x.Items)
case *markdown.Item:
rewriteHeadingsBlocks(x.Blocks)
case *markdown.Quote:
rewriteHeadingsBlocks(x.Blocks)
case *markdown.Heading:
Expand Down Expand Up @@ -338,19 +347,12 @@ var htmlQuoteEscaper = strings.NewReplacer(
// function, but we don't have the raw markdown anymore, so we use the
// text instead.
func rewriteHeadingIDs(doc *markdown.Document) {
ids := newIDs()
ids := &ids{
values: map[string]bool{},
}
walkBlocks(doc.Blocks, func(b markdown.Block) error {
if heading, ok := b.(*markdown.Heading); ok {
var buf bytes.Buffer
for _, inl := range heading.Text.Inline {
// Hack: use HTML because ids strips out html tags.
// TODO(matloob): change the goldmark code to not use
// raw markdown text and instead depend on the text of the
// nodes.
inl.PrintHTML(&buf)
}

id := ids.Generate(buf.Bytes(), ast.KindHeading)
id := ids.generateID(heading, "heading")
heading.ID = string(id)
}
return nil
Expand Down
16 changes: 14 additions & 2 deletions internal/frontend/readme_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -414,9 +414,9 @@ func TestReadme(t *testing.T) {
Contents: `# [![Image Text](file.svg)](link.html)
`,
},
wantHTML: `<h3 class="h1" id="readme-heading"><a href="https://github.com/valid/module_name/blob/v1.0.0/link.html" rel="nofollow"><img src="https://github.com/valid/module_name/raw/v1.0.0/file.svg" alt="Image Text"/></a></h3>`,
wantHTML: `<h3 class="h1" id="readme-image-text"><a href="https://github.com/valid/module_name/blob/v1.0.0/link.html" rel="nofollow"><img src="https://github.com/valid/module_name/raw/v1.0.0/file.svg" alt="Image Text"/></a></h3>`,
wantOutline: []*Heading{
{Level: 1, Text: "Image Text", ID: "readme-heading"},
{Level: 1, Text: "Image Text", ID: "readme-image-text"},
},
},
{
Expand Down Expand Up @@ -457,6 +457,18 @@ func TestReadme(t *testing.T) {
{Level: 1, Text: "Heading", ID: "readme-heading-3"},
},
},
{
name: "tag in heading",
unit: unit,
readme: &internal.Readme{
Filepath: "README.md",
Contents: `# A link <a href="link">link</a>`,
},
wantHTML: `<h3 class="h1" id="readme-a-link-link">A link <a href="link" rel="nofollow">link</a></h3>`,
wantOutline: []*Heading{
{Level: 1, Text: "A link link", ID: "readme-a-link-link"},
},
},
} {
t.Run(test.name, func(t *testing.T) {
processReadmes := map[string]func(ctx context.Context, u *internal.Unit) (frontendReadme *Readme, err error){
Expand Down

0 comments on commit 56a70e8

Please sign in to comment.