diff --git a/markup/blackfriday/convert.go b/markup/blackfriday/convert.go index bbbc2b377d3..d844c5554a4 100644 --- a/markup/blackfriday/convert.go +++ b/markup/blackfriday/convert.go @@ -15,6 +15,8 @@ package blackfriday import ( + "unicode" + "github.com/gohugoio/hugo/identity" "github.com/gohugoio/hugo/markup/blackfriday/blackfriday_config" "github.com/gohugoio/hugo/markup/converter" @@ -61,7 +63,27 @@ type blackfridayConverter struct { } func (c *blackfridayConverter) SanitizeAnchorName(s string) string { - return blackfriday.SanitizedAnchorName(s) + return SanitizedAnchorName(s) +} + +// SanitizedAnchorName is how Blackfriday sanitizes anchor names. +// Implementation borrowed from https://github.com/russross/blackfriday/blob/a477dd1646916742841ed20379f941cfa6c5bb6f/block.go#L1464 +func SanitizedAnchorName(text string) string { + var anchorName []rune + futureDash := false + for _, r := range text { + switch { + case unicode.IsLetter(r) || unicode.IsNumber(r): + if futureDash && len(anchorName) > 0 { + anchorName = append(anchorName, '-') + } + futureDash = false + anchorName = append(anchorName, unicode.ToLower(r)) + default: + futureDash = true + } + } + return string(anchorName) } func (c *blackfridayConverter) AnchorSuffix() string { diff --git a/markup/blackfriday/convert_test.go b/markup/blackfriday/convert_test.go index b4d66dec66b..d2d8d927e94 100644 --- a/markup/blackfriday/convert_test.go +++ b/markup/blackfriday/convert_test.go @@ -179,3 +179,45 @@ This is a footnote.[^1] And then some. c.Assert(s, qt.Contains, "This is a footnote.1") c.Assert(s, qt.Contains, "[return]") } + +// Tests borrowed from https://github.com/russross/blackfriday/blob/a925a152c144ea7de0f451eaf2f7db9e52fa005a/block_test.go#L1817 +func TestSanitizedAnchorName(t *testing.T) { + tests := []struct { + text string + want string + }{ + { + text: "This is a header", + want: "this-is-a-header", + }, + { + text: "This is also a header", + want: "this-is-also-a-header", + }, + { + text: "main.go", + want: "main-go", + }, + { + text: "Article 123", + want: "article-123", + }, + { + text: "<- Let's try this, shall we?", + want: "let-s-try-this-shall-we", + }, + { + text: " ", + want: "", + }, + { + text: "Hello, 世界", + want: "hello-世界", + }, + } + for _, test := range tests { + if got := SanitizedAnchorName(test.text); got != test.want { + t.Errorf("SanitizedAnchorName(%q):\ngot %q\nwant %q", test.text, got, test.want) + } + } +} diff --git a/markup/goldmark/autoid.go b/markup/goldmark/autoid.go index aaf1852d161..950d4a5778e 100644 --- a/markup/goldmark/autoid.go +++ b/markup/goldmark/autoid.go @@ -19,6 +19,8 @@ import ( "unicode" "unicode/utf8" + "github.com/gohugoio/hugo/markup/blackfriday" + "github.com/gohugoio/hugo/markup/goldmark/goldmark_config" "github.com/gohugoio/hugo/common/text" @@ -30,34 +32,41 @@ import ( bp "github.com/gohugoio/hugo/bufferpool" ) -func sanitizeAnchorNameString(s string, asciiOnly bool) string { - return string(sanitizeAnchorName([]byte(s), asciiOnly)) +func sanitizeAnchorNameString(s string, idType string) string { + return string(sanitizeAnchorName([]byte(s), idType)) } -func sanitizeAnchorName(b []byte, asciiOnly bool) []byte { - return sanitizeAnchorNameWithHook(b, asciiOnly, nil) +func sanitizeAnchorName(b []byte, idType string) []byte { + return sanitizeAnchorNameWithHook(b, idType, nil) } -func sanitizeAnchorNameWithHook(b []byte, asciiOnly bool, hook func(buf *bytes.Buffer)) []byte { +func sanitizeAnchorNameWithHook(b []byte, idType string, hook func(buf *bytes.Buffer)) []byte { buf := bp.GetBuffer() - if asciiOnly { - // Normalize it to preserve accents if possible. - b = text.RemoveAccents(b) - } + if idType == goldmark_config.AutoHeadingIDTypeBlackfriday { + // TODO(bep) make it more efficient. + buf.WriteString(blackfriday.SanitizedAnchorName(string(b))) + } else { + asciiOnly := idType == goldmark_config.AutoHeadingIDTypeGitHubAscii - for len(b) > 0 { - r, size := utf8.DecodeRune(b) - switch { - case asciiOnly && size != 1: - case r == '-' || isSpace(r): - buf.WriteRune('-') - case isAlphaNumeric(r): - buf.WriteRune(unicode.ToLower(r)) - default: + if asciiOnly { + // Normalize it to preserve accents if possible. + b = text.RemoveAccents(b) } - b = b[size:] + for len(b) > 0 { + r, size := utf8.DecodeRune(b) + switch { + case asciiOnly && size != 1: + case r == '-' || isSpace(r): + buf.WriteRune('-') + case isAlphaNumeric(r): + buf.WriteRune(unicode.ToLower(r)) + default: + } + + b = b[size:] + } } if hook != nil { @@ -83,19 +92,19 @@ func isSpace(r rune) bool { var _ parser.IDs = (*idFactory)(nil) type idFactory struct { - asciiOnly bool - vals map[string]struct{} + idType string + vals map[string]struct{} } func newIDFactory(idType string) *idFactory { return &idFactory{ - vals: make(map[string]struct{}), - asciiOnly: idType == goldmark_config.AutoHeadingIDTypeGitHubAscii, + vals: make(map[string]struct{}), + idType: idType, } } func (ids *idFactory) Generate(value []byte, kind ast.NodeKind) []byte { - return sanitizeAnchorNameWithHook(value, ids.asciiOnly, func(buf *bytes.Buffer) { + return sanitizeAnchorNameWithHook(value, ids.idType, func(buf *bytes.Buffer) { if buf.Len() == 0 { if kind == ast.KindHeading { buf.WriteString("heading") diff --git a/markup/goldmark/autoid_test.go b/markup/goldmark/autoid_test.go index 915c6a03cf9..1257b348250 100644 --- a/markup/goldmark/autoid_test.go +++ b/markup/goldmark/autoid_test.go @@ -17,6 +17,8 @@ import ( "strings" "testing" + "github.com/gohugoio/hugo/markup/goldmark/goldmark_config" + qt "github.com/frankban/quicktest" ) @@ -69,9 +71,9 @@ under_score expect := expectlines[i] c.Run(input, func(c *qt.C) { b := []byte(input) - got := string(sanitizeAnchorName(b, false)) + got := string(sanitizeAnchorName(b, goldmark_config.AutoHeadingIDTypeGitHub)) c.Assert(got, qt.Equals, expect) - c.Assert(sanitizeAnchorNameString(input, false), qt.Equals, expect) + c.Assert(sanitizeAnchorNameString(input, goldmark_config.AutoHeadingIDTypeGitHub), qt.Equals, expect) c.Assert(string(b), qt.Equals, input) }) } @@ -80,16 +82,21 @@ under_score func TestSanitizeAnchorNameAsciiOnly(t *testing.T) { c := qt.New(t) - c.Assert(sanitizeAnchorNameString("god is神真美好 good", true), qt.Equals, "god-is-good") - c.Assert(sanitizeAnchorNameString("Resumé", true), qt.Equals, "resume") + c.Assert(sanitizeAnchorNameString("god is神真美好 good", goldmark_config.AutoHeadingIDTypeGitHubAscii), qt.Equals, "god-is-good") + c.Assert(sanitizeAnchorNameString("Resumé", goldmark_config.AutoHeadingIDTypeGitHubAscii), qt.Equals, "resume") + +} +func TestSanitizeAnchorNameBlackfriday(t *testing.T) { + c := qt.New(t) + c.Assert(sanitizeAnchorNameString("Let's try this, shall we?", goldmark_config.AutoHeadingIDTypeBlackfriday), qt.Equals, "let-s-try-this-shall-we") } func BenchmarkSanitizeAnchorName(b *testing.B) { input := []byte("God is good: 神真美好") b.ResetTimer() for i := 0; i < b.N; i++ { - result := sanitizeAnchorName(input, false) + result := sanitizeAnchorName(input, goldmark_config.AutoHeadingIDTypeGitHub) if len(result) != 24 { b.Fatalf("got %d", len(result)) @@ -101,7 +108,7 @@ func BenchmarkSanitizeAnchorNameAsciiOnly(b *testing.B) { input := []byte("God is good: 神真美好") b.ResetTimer() for i := 0; i < b.N; i++ { - result := sanitizeAnchorName(input, true) + result := sanitizeAnchorName(input, goldmark_config.AutoHeadingIDTypeGitHubAscii) if len(result) != 12 { b.Fatalf("got %d", len(result)) @@ -109,11 +116,23 @@ func BenchmarkSanitizeAnchorNameAsciiOnly(b *testing.B) { } } +func BenchmarkSanitizeAnchorNameBlackfriday(b *testing.B) { + input := []byte("God is good: 神真美好") + b.ResetTimer() + for i := 0; i < b.N; i++ { + result := sanitizeAnchorName(input, goldmark_config.AutoHeadingIDTypeBlackfriday) + if len(result) != 24 { + b.Fatalf("got %d", len(result)) + + } + } +} + func BenchmarkSanitizeAnchorNameString(b *testing.B) { input := "God is good: 神真美好" b.ResetTimer() for i := 0; i < b.N; i++ { - result := sanitizeAnchorNameString(input, false) + result := sanitizeAnchorNameString(input, goldmark_config.AutoHeadingIDTypeGitHub) if len(result) != 24 { b.Fatalf("got %d", len(result)) } diff --git a/markup/goldmark/convert.go b/markup/goldmark/convert.go index c6f95836618..d4c3533537e 100644 --- a/markup/goldmark/convert.go +++ b/markup/goldmark/convert.go @@ -29,7 +29,6 @@ import ( "github.com/gohugoio/hugo/hugofs" "github.com/gohugoio/hugo/markup/converter" - "github.com/gohugoio/hugo/markup/goldmark/goldmark_config" "github.com/gohugoio/hugo/markup/highlight" "github.com/gohugoio/hugo/markup/tableofcontents" "github.com/yuin/goldmark" @@ -57,7 +56,7 @@ func (p provide) New(cfg converter.ProviderConfig) (converter.Provider, error) { cfg: cfg, md: md, sanitizeAnchorName: func(s string) string { - return sanitizeAnchorNameString(s, cfg.MarkupConfig.Goldmark.Parser.AutoHeadingIDType == goldmark_config.AutoHeadingIDTypeGitHub) + return sanitizeAnchorNameString(s, cfg.MarkupConfig.Goldmark.Parser.AutoHeadingIDType) }, }, nil }), nil diff --git a/markup/goldmark/convert_test.go b/markup/goldmark/convert_test.go index 3c173fb0a35..31799b2a53c 100644 --- a/markup/goldmark/convert_test.go +++ b/markup/goldmark/convert_test.go @@ -178,6 +178,21 @@ func TestConvertAutoIDAsciiOnly(t *testing.T) { c.Assert(got, qt.Contains, "

") } +func TestConvertAutoIDBlackfriday(t *testing.T) { + c := qt.New(t) + + content := ` +## Let's try this, shall we? + +` + mconf := markup_config.Default + mconf.Goldmark.Parser.AutoHeadingIDType = goldmark_config.AutoHeadingIDTypeBlackfriday + b := convert(c, mconf, content) + got := string(b.Bytes()) + + c.Assert(got, qt.Contains, "

") +} + func TestCodeFence(t *testing.T) { c := qt.New(t) diff --git a/markup/goldmark/goldmark_config/config.go b/markup/goldmark/goldmark_config/config.go index 47399b52c54..af33e03dc4b 100644 --- a/markup/goldmark/goldmark_config/config.go +++ b/markup/goldmark/goldmark_config/config.go @@ -17,6 +17,7 @@ package goldmark_config const ( AutoHeadingIDTypeGitHub = "github" AutoHeadingIDTypeGitHubAscii = "github-ascii" + AutoHeadingIDTypeBlackfriday = "blackfriday" ) // DefaultConfig holds the default Goldmark configuration.