Skip to content

Commit

Permalink
markup/goldmark: Make auto IDs GitHub compatible
Browse files Browse the repository at this point in the history
You can turn off this behaviour:

```toml
[markup]
  [markup.goldmark]
    [markup.goldmark.parser]
      autoHeadingIDAsciiOnly = true
```
Note that the `anchorize` now adapts its behaviour depending on the default Markdown handler.

Fixes #6616
  • Loading branch information
bep committed Jan 4, 2020
1 parent ae81645 commit a82d270
Show file tree
Hide file tree
Showing 12 changed files with 421 additions and 35 deletions.
47 changes: 47 additions & 0 deletions common/text/transform.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
// Copyright 2019 The Hugo Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package text

import (
"sync"
"unicode"

"golang.org/x/text/runes"
"golang.org/x/text/transform"
"golang.org/x/text/unicode/norm"
)

var accentTransformerPool = &sync.Pool{
New: func() interface{} {
return transform.Chain(norm.NFD, runes.Remove(runes.In(unicode.Mn)), norm.NFC)
},
}

// RemoveAccents removes all accents from b.
func RemoveAccents(b []byte) []byte {
t := accentTransformerPool.Get().(transform.Transformer)
b, _, _ = transform.Bytes(t, b)
t.Reset()
accentTransformerPool.Put(t)
return b
}

// RemoveAccentsString removes all accents from s.
func RemoveAccentsString(s string) string {
t := accentTransformerPool.Get().(transform.Transformer)
s, _, _ = transform.String(t, s)
t.Reset()
accentTransformerPool.Put(t)
return s
}
29 changes: 29 additions & 0 deletions common/text/transform_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
// Copyright 2019 The Hugo Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package text

import (
"testing"

qt "github.com/frankban/quicktest"
)

func TestRemoveAccents(t *testing.T) {
c := qt.New(t)

c.Assert(string(RemoveAccents([]byte("Resumé"))), qt.Equals, "Resume")
c.Assert(string(RemoveAccents([]byte("Hugo Rocks!"))), qt.Equals, "Hugo Rocks!")
c.Assert(string(RemoveAccentsString("Resumé")), qt.Equals, "Resume")

}
20 changes: 18 additions & 2 deletions helpers/content.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,9 @@ var (

// ContentSpec provides functionality to render markdown content.
type ContentSpec struct {
Converters markup.ConverterProvider
MardownConverter converter.Converter // Markdown converter with no document context
Converters markup.ConverterProvider
MardownConverter converter.Converter // Markdown converter with no document context
anchorNameSanitizer converter.AnchorNameSanitizer

// SummaryLength is the length of the summary that Hugo extracts from a content.
summaryLength int
Expand Down Expand Up @@ -91,6 +92,17 @@ func NewContentSpec(cfg config.Provider, logger *loggers.Logger, contentFs afero
return nil, err
}
spec.MardownConverter = conv
if as, ok := conv.(converter.AnchorNameSanitizer); ok {
spec.anchorNameSanitizer = as
} else {
// Use Goldmark's sanitizer
p := converterProvider.Get("goldmark")
conv, err := p.New(converter.DocumentContext{})
if err != nil {
return nil, err
}
spec.anchorNameSanitizer = conv.(converter.AnchorNameSanitizer)
}

return spec, nil
}
Expand Down Expand Up @@ -192,6 +204,10 @@ func (c *ContentSpec) RenderMarkdown(src []byte) ([]byte, error) {
return b.Bytes(), nil
}

func (c *ContentSpec) SanitizeAnchorName(s string) string {
return c.anchorNameSanitizer.SanitizeAnchorName(s)
}

func (c *ContentSpec) ResolveMarkup(in string) string {
in = strings.ToLower(in)
switch in {
Expand Down
21 changes: 7 additions & 14 deletions helpers/path.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,16 +24,15 @@ import (
"strings"
"unicode"

"github.com/gohugoio/hugo/common/text"

"github.com/gohugoio/hugo/config"

"github.com/gohugoio/hugo/hugofs"

"github.com/gohugoio/hugo/common/hugio"
_errors "github.com/pkg/errors"
"github.com/spf13/afero"
"golang.org/x/text/runes"
"golang.org/x/text/transform"
"golang.org/x/text/unicode/norm"
)

var (
Expand Down Expand Up @@ -134,6 +133,10 @@ func ishex(c rune) bool {
// are also removed.
// Spaces will be replaced with a single hyphen, and sequential hyphens will be reduced to one.
func (p *PathSpec) UnicodeSanitize(s string) string {
if p.RemovePathAccents {
s = text.RemoveAccentsString(s)
}

source := []rune(s)
target := make([]rune, 0, len(source))
var prependHyphen bool
Expand All @@ -154,17 +157,7 @@ func (p *PathSpec) UnicodeSanitize(s string) string {
}
}

var result string

if p.RemovePathAccents {
// remove accents - see https://blog.golang.org/normalization
t := transform.Chain(norm.NFD, runes.Remove(runes.In(unicode.Mn)), norm.NFC)
result, _, _ = transform.String(t, string(target))
} else {
result = string(target)
}

return result
return string(target)
}

// ReplaceExtension takes a path and an extension, strips the old extension
Expand Down
7 changes: 6 additions & 1 deletion markup/blackfriday/convert.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,10 @@ type blackfridayConverter struct {
cfg converter.ProviderConfig
}

func (c *blackfridayConverter) SanitizeAnchorName(s string) string {
return blackfriday.SanitizedAnchorName(s)
}

func (c *blackfridayConverter) AnchorSuffix() string {
if c.bf.PlainIDAnchors {
return ""
Expand Down Expand Up @@ -204,5 +208,6 @@ var blackfridayExtensionMap = map[string]int{
}

var (
_ converter.DocumentInfo = (*blackfridayConverter)(nil)
_ converter.DocumentInfo = (*blackfridayConverter)(nil)
_ converter.AnchorNameSanitizer = (*blackfridayConverter)(nil)
)
5 changes: 5 additions & 0 deletions markup/converter/converter.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,11 @@ type TableOfContentsProvider interface {
TableOfContents() tableofcontents.Root
}

// AnchorNameSanitizer tells how a converter sanitizes anchor names.
type AnchorNameSanitizer interface {
SanitizeAnchorName(s string) string
}

// Bytes holds a byte slice and implements the Result interface.
type Bytes []byte

Expand Down
125 changes: 125 additions & 0 deletions markup/goldmark/autoid.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
// Copyright 2019 The Hugo Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package goldmark

import (
"bytes"
"strconv"
"unicode"
"unicode/utf8"

"github.com/gohugoio/hugo/common/text"

"github.com/yuin/goldmark/ast"
"github.com/yuin/goldmark/parser"
"github.com/yuin/goldmark/util"

bp "github.com/gohugoio/hugo/bufferpool"
)

func sanitizeAnchorNameString(s string, asciiOnly bool) string {
return string(sanitizeAnchorName([]byte(s), asciiOnly))
}

func sanitizeAnchorName(b []byte, asciiOnly bool) []byte {
return sanitizeAnchorNameWithHook(b, asciiOnly, nil)
}

func sanitizeAnchorNameWithHook(b []byte, asciiOnly bool, hook func(buf *bytes.Buffer)) []byte {
buf := bp.GetBuffer()

if asciiOnly {
// Normalize it to preserve accents if possible.
b = text.RemoveAccents(b)
}

for len(b) > 0 {
r, size := utf8.DecodeRune(b)
switch {
case asciiOnly && size != 1:
case isSpace(r):
buf.WriteString("-")
case r == '-' || isAlphaNumeric(r):
buf.WriteRune(unicode.ToLower(r))
default:
}

b = b[size:]
}

if hook != nil {
hook(buf)
}

result := make([]byte, buf.Len())
copy(result, buf.Bytes())

bp.PutBuffer(buf)

return result
}

func isAlphaNumeric(r rune) bool {
return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r)
}

func isSpace(r rune) bool {
return r == ' ' || r == '\t'
}

var _ parser.IDs = (*idFactory)(nil)

type idFactory struct {
asciiOnly bool
vals map[string]struct{}
}

func newIDFactory(asciiOnly bool) *idFactory {
return &idFactory{
vals: make(map[string]struct{}),
asciiOnly: asciiOnly,
}
}

func (ids *idFactory) Generate(value []byte, kind ast.NodeKind) []byte {
return sanitizeAnchorNameWithHook(value, ids.asciiOnly, func(buf *bytes.Buffer) {
if buf.Len() == 0 {
if kind == ast.KindHeading {
buf.WriteString("heading")
} else {
buf.WriteString("id")
}
}

if _, found := ids.vals[util.BytesToReadOnlyString(buf.Bytes())]; found {
// Append a hypen and a number, starting with 1.
buf.WriteRune('-')
pos := buf.Len()
for i := 1; ; i++ {
buf.WriteString(strconv.Itoa(i))
if _, found := ids.vals[util.BytesToReadOnlyString(buf.Bytes())]; !found {
break
}
buf.Truncate(pos)
}
}

ids.vals[buf.String()] = struct{}{}

})
}

func (ids *idFactory) Put(value []byte) {
ids.vals[util.BytesToReadOnlyString(value)] = struct{}{}
}
Loading

0 comments on commit a82d270

Please sign in to comment.