From e2b23a6146b3dabd24dea48abe5d6c23aaa3b356 Mon Sep 17 00:00:00 2001 From: Xavier Coulon Date: Sat, 5 Feb 2022 19:13:07 +0100 Subject: [PATCH] fix(types): section id generation support quoted strings and ignore unnecessary characters fixes #920 Signed-off-by: Xavier Coulon --- pkg/parser/quoted_string_test.go | 8 +- pkg/renderer/sgml/html5/quoted_string_test.go | 4 +- pkg/renderer/sgml/html5/string_test.go | 8 +- .../sgml/xhtml5/quoted_string_test.go | 4 +- .../non_alphanumeric_replacement_test.go | 166 ++++++++++++------ pkg/types/non_alphanumerics_replacement.go | 51 ++++-- pkg/types/types.go | 5 +- 7 files changed, 167 insertions(+), 79 deletions(-) diff --git a/pkg/parser/quoted_string_test.go b/pkg/parser/quoted_string_test.go index 8f14b88f..ecf73270 100644 --- a/pkg/parser/quoted_string_test.go +++ b/pkg/parser/quoted_string_test.go @@ -997,13 +997,13 @@ var _ = Describe("quoted strings", func() { &types.Section{ Level: 1, Attributes: types.Attributes{ - types.AttrID: "_a_episode", + types.AttrID: "_a_curly_episode", }, Title: title, }, }, ElementReferences: types.ElementReferences{ - "_a_episode": title, + "_a_curly_episode": title, }, } Expect(ParseDocument(source)).To(MatchDocument(expected)) @@ -1096,13 +1096,13 @@ var _ = Describe("quoted strings", func() { &types.Section{ Level: 1, Attributes: types.Attributes{ - types.AttrID: "_a_episode", + types.AttrID: "_a_curly_episode", }, Title: title, }, }, ElementReferences: types.ElementReferences{ - "_a_episode": title, + "_a_curly_episode": title, }, } Expect(ParseDocument(source)).To(MatchDocument(expected)) diff --git a/pkg/renderer/sgml/html5/quoted_string_test.go b/pkg/renderer/sgml/html5/quoted_string_test.go index 6d727602..54caa499 100644 --- a/pkg/renderer/sgml/html5/quoted_string_test.go +++ b/pkg/renderer/sgml/html5/quoted_string_test.go @@ -124,7 +124,7 @@ var _ = Describe("quoted strings", func() { It("curly in title", func() { source := "== a '`curly`' episode" expected := `
-

a ‘curly’ episode

+

a ‘curly’ episode

@@ -293,7 +293,7 @@ var _ = Describe("quoted strings", func() { It("double curly in title", func() { source := "== a \"`curly`\" episode" expected := `
-

a “curly” episode

+

a “curly” episode

diff --git a/pkg/renderer/sgml/html5/string_test.go b/pkg/renderer/sgml/html5/string_test.go index 99565908..78cf393c 100644 --- a/pkg/renderer/sgml/html5/string_test.go +++ b/pkg/renderer/sgml/html5/string_test.go @@ -98,7 +98,7 @@ var _ = Describe("strings", func() { It("title with explicit apostrophe", func() { source := "== It`'s A Wonderful Life" expected := "
\n" + - "

It’s A Wonderful Life

\n" + + "

It’s A Wonderful Life

\n" + "
\n" + "
\n" + "
\n" @@ -108,7 +108,7 @@ var _ = Describe("strings", func() { It("title with explicit apostrophe (unicode)", func() { source := ":unicode:\n\n== It`'s A Wonderful Life" expected := "
\n" + - "

It\u2019s A Wonderful Life

\n" + + "

It\u2019s A Wonderful Life

\n" + "
\n" + "
\n" + "
\n" @@ -118,7 +118,7 @@ var _ = Describe("strings", func() { It("title with implicit apostrophe", func() { source := "== It's A Wonderful Life" expected := "
\n" + - "

It’s A Wonderful Life

\n" + + "

It’s A Wonderful Life

\n" + "
\n" + "
\n" + "
\n" @@ -128,7 +128,7 @@ var _ = Describe("strings", func() { It("title with implicit apostrophe (unicode)", func() { source := ":unicode:\n\n== It's A Wonderful Life" expected := "
\n" + - "

It\u2019s A Wonderful Life

\n" + + "

It\u2019s A Wonderful Life

\n" + "
\n" + "
\n" + "
\n" diff --git a/pkg/renderer/sgml/xhtml5/quoted_string_test.go b/pkg/renderer/sgml/xhtml5/quoted_string_test.go index f54cb489..f6595e9e 100644 --- a/pkg/renderer/sgml/xhtml5/quoted_string_test.go +++ b/pkg/renderer/sgml/xhtml5/quoted_string_test.go @@ -118,7 +118,7 @@ var _ = Describe("quoted strings", func() { It("curly in title", func() { source := "== a '`curly`' episode" expected := `
-

a ‘curly’ episode

+

a ‘curly’ episode

@@ -287,7 +287,7 @@ var _ = Describe("quoted strings", func() { It("double curly in title", func() { source := "== a \"`curly`\" episode" expected := `
-

a “curly” episode

+

a “curly” episode

diff --git a/pkg/types/non_alphanumeric_replacement_test.go b/pkg/types/non_alphanumeric_replacement_test.go index c8c99299..1d134b35 100644 --- a/pkg/types/non_alphanumeric_replacement_test.go +++ b/pkg/types/non_alphanumeric_replacement_test.go @@ -3,65 +3,90 @@ package types_test import ( "github.com/bytesparadise/libasciidoc/pkg/types" - . "github.com/onsi/ginkgo" // nolint:golint - . "github.com/onsi/gomega" // nolint:golintt + . "github.com/onsi/ginkgo/extensions/table" // nolint:golint + . "github.com/onsi/gomega" // nolint:golint ) -var _ = Describe("normalizing string", func() { +var _ = DescribeTable("replace non-alphanumeric chars", - It("hello", func() { - source := []interface{}{ - &types.StringElement{Content: "hello"}, - } - Expect(types.ReplaceNonAlphanumerics(source, "_")).To(Equal("hello")) - }) + func(source []interface{}, valueWithDefaultSettings, valueWithCustomSettings string) { + Expect(types.ReplaceNonAlphanumerics(source, "_", "_")).To(Equal(valueWithDefaultSettings)) + Expect(types.ReplaceNonAlphanumerics(source, "id_", "-")).To(Equal(valueWithCustomSettings)) + }, - It("héllo with an accent", func() { - source := []interface{}{ - &types.StringElement{Content: " héllo 1.2 and 3 Spaces"}, - } - Expect(types.ReplaceNonAlphanumerics(source, "_")).To(Equal("héllo_1_2_and_3_spaces")) - }) + Entry("hello", + []interface{}{ + &types.StringElement{ + Content: "hello", + }, + }, + "_hello", + "id_hello", + ), - It("a an accent and a swedish character", func() { - source := []interface{}{ - &types.StringElement{Content: `A à ⌘`}, - } - Expect(types.ReplaceNonAlphanumerics(source, "_")).To(Equal("a_à")) - }) + Entry("héllo with an accent", + []interface{}{ + &types.StringElement{ + Content: " héllo 1.2 and 3 Spaces", + }, + }, + "_héllo_1_2_and_3_spaces", + "id_héllo-1-2-and-3-spaces", + ), - It("AŁA", func() { - source := []interface{}{ - &types.StringElement{Content: `AŁA 0.1 ?`}, - } - Expect(types.ReplaceNonAlphanumerics(source, "_")).To(Equal("ała_0_1")) - }) + Entry("a an accent and a swedish character", + []interface{}{ + &types.StringElement{ + Content: `A à ⌘`, + }, + }, + "_a_à", + "id_a-à", + ), + + Entry("AŁA", + []interface{}{ + &types.StringElement{ + Content: `AŁA 0.1 ?`, + }, + }, + "_ała_0_1", + "id_ała-0-1", + ), - It("it's 2 spaces, here !", func() { - source := []interface{}{ - &types.StringElement{Content: `it's 2 spaces, here !`}, - } - Expect(types.ReplaceNonAlphanumerics(source, "_")).To(Equal("it_s_2_spaces_here")) - }) + Entry("it's 2 spaces, here !", + []interface{}{ + &types.StringElement{ + Content: `it's 2 spaces, here !`, + }, + }, + "_its_2_spaces_here", + "id_its-2-spaces-here", + ), - It("content with markup", func() { - // == a section title, with *bold content* - source := []interface{}{ - &types.StringElement{Content: "a section title, with"}, + Entry("content with markup", + []interface{}{ + &types.StringElement{ + Content: "a section title, with", + }, &types.QuotedText{ Kind: types.SingleQuoteBold, Elements: []interface{}{ - &types.StringElement{Content: "bold content"}, + &types.StringElement{ + Content: "bold content", + }, }, }, - } - Expect(types.ReplaceNonAlphanumerics(source, "_")).To(Equal("a_section_title_with_bold_content")) - }) + }, + "_a_section_title_with_bold_content", + "id_a-section-title-with-bold-content", + ), - It("content with link", func() { - // == a section title, with *bold content* - source := []interface{}{ - &types.StringElement{Content: "link to "}, + Entry("content with link", + []interface{}{ + &types.StringElement{ + Content: "link to ", + }, &types.InlineLink{ Attributes: types.Attributes{}, Location: &types.Location{ @@ -69,7 +94,50 @@ var _ = Describe("normalizing string", func() { Path: "foo.bar", }, }, - } - Expect(types.ReplaceNonAlphanumerics(source, "_")).To(Equal("link_to_httpsfoo_bar")) // asciidoctor will return `_link_to_httpsfoo_bar` - }) -}) + }, + "_link_to_httpsfoo_bar", + "id_link-to-httpsfoo-bar", + ), + + Entry("content with dots and special characters", + []interface{}{ + &types.StringElement{ + Content: "...and we're back!", + }, + }, + "_and_were_back", + "id_-and-were-back", + ), + + Entry("content with dots", + []interface{}{ + &types.StringElement{ + Content: "Section A.a", + }, + }, + "_section_a_a", + "id_section-a-a", + ), + + Entry("content with quoted string", + // Block Quotes and "`Smart`" Ones + []interface{}{ + &types.StringElement{ + Content: "Block Quotes and ", + }, + &types.QuotedString{ + Kind: types.DoubleQuote, + Elements: []interface{}{ + &types.StringElement{ + Content: "Smart", + }, + }, + }, + &types.StringElement{ + Content: "Ones", + }, + }, + "_block_quotes_and_smart_ones", + "id_block-quotes-and-smart-ones", + ), +) diff --git a/pkg/types/non_alphanumerics_replacement.go b/pkg/types/non_alphanumerics_replacement.go index 5c7d799f..3a1e39ba 100644 --- a/pkg/types/non_alphanumerics_replacement.go +++ b/pkg/types/non_alphanumerics_replacement.go @@ -8,47 +8,68 @@ import ( ) // ReplaceNonAlphanumerics replace all non alpha numeric characters with the given `replacement` -func ReplaceNonAlphanumerics(elements []interface{}, replacement string) (string, error) { +func ReplaceNonAlphanumerics(elements []interface{}, prefix, separator string) (string, error) { + replacement, err := replaceNonAlphanumericsOnElements(elements, separator) + if err != nil { + return "", err + } + // avoid double prefix + if strings.HasPrefix(replacement, prefix) { + return replacement, nil + } + return prefix + replacement, nil +} + +func replaceNonAlphanumericsOnElements(elements []interface{}, separator string) (string, error) { buf := &strings.Builder{} for _, element := range elements { switch element := element.(type) { + case *QuotedString: + r, err := replaceNonAlphanumericsOnElements(element.Elements, separator) + if err != nil { + return "", err + } + if buf.Len() > 0 { + buf.WriteString(separator) + } + buf.WriteString(r) case *QuotedText: - r, err := ReplaceNonAlphanumerics(element.Elements, replacement) + r, err := replaceNonAlphanumericsOnElements(element.Elements, separator) if err != nil { return "", err } if buf.Len() > 0 { - buf.WriteString(replacement) + buf.WriteString(separator) } buf.WriteString(r) case *StringElement: - r, err := replaceNonAlphanumerics(element.Content, replacement) + r, err := replaceNonAlphanumerics(element.Content, separator) if err != nil { return "", err } if buf.Len() > 0 { - buf.WriteString(replacement) + buf.WriteString(separator) } buf.WriteString(r) case *InlineLink: if element.Location != nil { - r, err := replaceNonAlphanumerics(element.Location.Stringify(), replacement) + r, err := replaceNonAlphanumerics(element.Location.Stringify(), separator) if err != nil { return "", err } if buf.Len() > 0 { - buf.WriteString(replacement) + buf.WriteString(separator) } buf.WriteString(r) } case *Icon: s := element.Attributes.GetAsStringWithDefault(AttrImageAlt, element.Class) - r, err := replaceNonAlphanumerics(s, replacement) + r, err := replaceNonAlphanumerics(s, separator) if err != nil { return "", err } if buf.Len() > 0 { - buf.WriteString(replacement) + buf.WriteString(separator) } buf.WriteString(r) default: @@ -62,27 +83,27 @@ func ReplaceNonAlphanumerics(elements []interface{}, replacement string) (string func replaceNonAlphanumerics(content, replacement string) (string, error) { buf := &strings.Builder{} - lastCharIsSpace := false + lastCharIsSeparator := false // Drop the :// from links. content = strings.ReplaceAll(content, "://", "") for _, r := range strings.TrimLeft(content, " ") { // ignore header spaces - if unicode.Is(unicode.Letter, r) || unicode.Is(unicode.Number, r) { + switch { + case unicode.Is(unicode.Letter, r) || unicode.Is(unicode.Number, r): _, err := buf.WriteString(strings.ToLower(string(r))) if err != nil { return "", errors.Wrapf(err, "error while normalizing String Element") } - lastCharIsSpace = false - } else if !lastCharIsSpace && (unicode.Is(unicode.Space, r) || unicode.Is(unicode.Punct, r)) { + lastCharIsSeparator = false + case !lastCharIsSeparator && (string(r) == " " || string(r) == "-" || string(r) == "."): _, err := buf.WriteString(replacement) if err != nil { return "", errors.Wrapf(err, "error while normalizing String Element") } - lastCharIsSpace = true + lastCharIsSeparator = true } } result := strings.TrimSuffix(buf.String(), replacement) - // log.Debugf("normalized '%s' to '%s'", content, result) return result, nil } diff --git a/pkg/types/types.go b/pkg/types/types.go index ef52df90..5d48b71b 100644 --- a/pkg/types/types.go +++ b/pkg/types/types.go @@ -2405,13 +2405,12 @@ func (s *Section) resolveID(attrs Attributes) (string, error) { return id, nil } log.Debugf("resolving section id") + prefix := attrs.GetAsStringWithDefault(AttrIDPrefix, DefaultIDPrefix) separator := attrs.GetAsStringWithDefault(AttrIDSeparator, DefaultIDSeparator) - replacement, err := ReplaceNonAlphanumerics(s.Title, separator) + id, err := ReplaceNonAlphanumerics(s.Title, prefix, separator) if err != nil { return "", errors.Wrapf(err, "failed to generate default ID on Section element") } - idPrefix := attrs.GetAsStringWithDefault(AttrIDPrefix, DefaultIDPrefix) - id := idPrefix + replacement s.Attributes[AttrID] = id log.Debugf("updated section id to '%s'", s.Attributes[AttrID]) return id, nil