Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor(renderer): improve HTML escaping #952

Merged
merged 1 commit into from
Feb 26, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ require (
github.com/davecgh/go-spew v1.1.1
github.com/google/go-cmp v0.5.5
github.com/kr/text v0.2.0 // indirect
github.com/mattn/go-isatty v0.0.12 // indirect
github.com/mna/pigeon v1.1.0
github.com/modocache/gover v0.0.0-20171022184752-b58185e213c5 // indirect
github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e // indirect
Expand All @@ -18,11 +17,11 @@ require (
github.com/sirupsen/logrus v1.7.0
github.com/sozorogami/gover v0.0.0-20171022184752-b58185e213c5
github.com/spf13/cobra v1.1.1
github.com/stretchr/testify v1.6.1
github.com/stretchr/testify v1.7.0
gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f // indirect
gopkg.in/yaml.v2 v2.4.0
)

// include support for disabling unexported fields
// include support for disabling unexported fields
// TODO: still needed?
replace github.com/davecgh/go-spew => github.com/flw-cn/go-spew v1.1.2-0.20200624141737-10fccbfd0b23
replace github.com/davecgh/go-spew => github.com/flw-cn/go-spew v1.1.2-0.20200624141737-10fccbfd0b23
52 changes: 2 additions & 50 deletions go.sum

Large diffs are not rendered by default.

12 changes: 5 additions & 7 deletions pkg/renderer/context.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ import (
type Context struct {
Config *configuration.Configuration // TODO: use composition (remove the `Config` field)
WithinDelimitedBlock bool
EncodeSpecialChars bool
WithinList int
counters map[string]int
Attributes types.Attributes
Expand All @@ -23,12 +22,11 @@ type Context struct {
func NewContext(doc *types.Document, config *configuration.Configuration) *Context {
header := doc.Header()
ctx := &Context{
Config: config,
counters: make(map[string]int),
Attributes: config.Attributes,
ElementReferences: doc.ElementReferences,
HasHeader: header != nil,
EncodeSpecialChars: true,
Config: config,
counters: make(map[string]int),
Attributes: config.Attributes,
ElementReferences: doc.ElementReferences,
HasHeader: header != nil,
}
// TODO: add other attributes from https://docs.asciidoctor.org/asciidoc/latest/attributes/document-attributes-ref/#builtin-attributes-i18n
ctx.Attributes[types.AttrFigureCaption] = "Figure"
Expand Down
6 changes: 1 addition & 5 deletions pkg/renderer/sgml/delimited_block_source.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,10 +93,6 @@ func (r *sgmlRenderer) renderSourceBlockElements(ctx *renderer.Context, b *types
if log.IsLevelEnabled(log.DebugLevel) {
log.Debugf("splitted lines:\n%s", spew.Sdump(lines))
}
ctx.EncodeSpecialChars = false
defer func() {
ctx.EncodeSpecialChars = true
}()
// using github.com/alecthomas/chroma to highlight the content
lexer := lexers.Get(language)
if lexer == nil {
Expand Down Expand Up @@ -168,7 +164,7 @@ func (r *sgmlRenderer) renderSourceLine(ctx *renderer.Context, line interface{})
for _, e := range elements {
switch e := e.(type) {
case *types.StringElement, *types.SpecialCharacter:
s, err := r.renderElement(ctx, e)
s, err := r.renderPlainText(ctx, e)
if err != nil {
return "", nil, err
}
Expand Down
6 changes: 4 additions & 2 deletions pkg/renderer/sgml/elements.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ func (r *sgmlRenderer) renderElement(ctx *renderer.Context, element interface{})
case *types.ThematicBreak:
return r.renderThematicBreak()
case *types.SpecialCharacter:
return r.renderSpecialCharacter(ctx, e)
return r.renderSpecialCharacter(e)
case *types.PredefinedAttribute:
return r.renderPredefinedAttribute(e)
case *types.AttributeDeclaration:
Expand Down Expand Up @@ -141,8 +141,10 @@ func (r *sgmlRenderer) renderPlainText(ctx *renderer.Context, element interface{
return element.Location.Stringify(), nil
case *types.BlankLine, types.ThematicBreak:
return "\n\n", nil
case *types.SpecialCharacter:
return element.Name, nil
case *types.StringElement:
return element.Content, nil
return r.renderStringElement(ctx, element)
case *types.QuotedString:
return r.renderQuotedString(ctx, element)
// case *types.Paragraph:
Expand Down
2 changes: 1 addition & 1 deletion pkg/renderer/sgml/html5/link.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
package html5

const (
linkTmpl = `<a{{ if .ID }} id="{{ .ID }}"{{ end }}{{ if .URL }} href="{{ .URL }}"{{ end }}{{if .Class}} class="{{ .Class }}"{{ end }}{{if .Target}} target="{{ .Target }}"{{ end }}>{{ .Text }}</a>`
linkTmpl = `<a{{ if .ID }} id="{{ .ID }}"{{ end }}{{ if .URL }} href="{{ escape .URL }}"{{ end }}{{if .Class}} class="{{ .Class }}"{{ end }}{{if .Target}} target="{{ .Target }}"{{ end }}>{{ .Text }}</a>`
)
43 changes: 37 additions & 6 deletions pkg/renderer/sgml/html5/link_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ var _ = Describe("links", func() {

Context("bare URLs", func() {

It("should parse standalone URL with scheme ", func() {
It("standalone URL with scheme", func() {
source := `<https://example.com>`
expected := `<div class="paragraph">
<p><a href="https://example.com" class="bare">https://example.com</a></p>
Expand All @@ -24,7 +24,7 @@ var _ = Describe("links", func() {
Expect(RenderHTML(source)).To(MatchHTML(expected))
})

It("should parse URL with scheme in sentence", func() {
It("URL with scheme in sentence", func() {
source := `a link to <https://example.com>.`
expected := `<div class="paragraph">
<p>a link to <a href="https://example.com" class="bare">https://example.com</a>.</p>
Expand All @@ -33,7 +33,7 @@ var _ = Describe("links", func() {
Expect(RenderHTML(source)).To(MatchHTML(expected))
})

It("should parse substituted URL with scheme", func() {
It("substituted URL with scheme", func() {
source := `:example: https://example.com

a link to <{example}>.`
Expand All @@ -44,6 +44,37 @@ a link to <{example}>.`
Expect(RenderHTML(source)).To(MatchHTML(expected))
})

It("substituted URL with scheme", func() {
source := `:example: https://example.com

a link to <{example}>.`
expected := `<div class="paragraph">
<p>a link to <a href="https://example.com" class="bare">https://example.com</a>.</p>
</div>
`
Expect(RenderHTML(source)).To(MatchHTML(expected))
})

It("URL with query string", func() {
source := `a link to https://example.com?foo=bar&lang=en[].`
expected := `<div class="paragraph">
<p>a link to <a href="https://example.com?foo=bar&amp;lang=en" class="bare">https://example.com?foo=bar&amp;lang=en</a>.</p>
</div>
`
Expect(RenderHTML(source)).To(MatchHTML(expected))
})

It("substituted bare URL with query string", func() {
source := `:example: https://example.com?foo=bar&lang=en

a link to <{example}>.`
expected := `<div class="paragraph">
<p>a link to <a href="https://example.com?foo=bar&amp;lang=en" class="bare">https://example.com?foo=bar&amp;lang=en</a>.</p>
</div>
`
Expect(RenderHTML(source)).To(MatchHTML(expected))
})

Context("malformed", func() {

It("should not parse URL without scheme", func() {
Expand All @@ -55,7 +86,7 @@ a link to <{example}>.`
Expect(RenderHTML(source)).To(MatchHTML(expected))
})

It("should parse with special character in URL", func() {
It("with special character in URL", func() {
source := `a link to https://example.com>[].`
expected := &types.Document{
Elements: []interface{}{
Expand Down Expand Up @@ -87,7 +118,7 @@ a link to <{example}>.`
Expect(ParseDocument(source)).To(MatchDocument(expected))
})

It("should parse with opening angle bracket", func() {
It("with opening angle bracket", func() {
source := `a link to <https://example.com[].`
expected := &types.Document{
Elements: []interface{}{
Expand Down Expand Up @@ -295,7 +326,7 @@ a link to {scheme}://{path} and https://foo.baz`
It("with special characters", func() {
source := `a link to https://example.com?a=1&b=2`
expected := `<div class="paragraph">
<p>a link to <a href="https://example.com?a=1&b=2" class="bare">https://example.com?a=1&amp;b=2</a></p>
<p>a link to <a href="https://example.com?a=1&amp;b=2" class="bare">https://example.com?a=1&amp;b=2</a></p>
</div>
`
Expect(RenderHTML(source)).To(MatchHTML(expected))
Expand Down
5 changes: 0 additions & 5 deletions pkg/renderer/sgml/html5/special_character.go

This file was deleted.

9 changes: 9 additions & 0 deletions pkg/renderer/sgml/html5/string_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -134,4 +134,13 @@ var _ = Describe("strings", func() {
"</div>\n"
Expect(RenderHTML(source)).To(MatchHTML(expected))
})

It("paragraph with special characters", func() {
source := `...and we're back!`
expected := `<div class="paragraph">
<p>&#8230;&#8203;and we&#8217;re back!</p>
</div>
`
Expect(RenderHTML(source)).To(MatchHTML(expected))
})
})
2 changes: 1 addition & 1 deletion pkg/renderer/sgml/html5/table_of_contents.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,6 @@ const (

tocSectionTmpl = "<ul class=\"sectlevel{{ .Level }}\">\n{{ .Content }}</ul>\n"

tocEntryTmpl = "<li><a href=\"#{{ .ID }}\">{{ if .Number }}{{ .Number }}. {{ end }}{{ .Title }}</a>" +
tocEntryTmpl = "<li><a href=\"#{{ .ID }}\">{{ if .Number }}{{ .Number }}. {{ end }}{{ escape .Title }}</a>" +
"{{ if .Content }}\n{{ .Content }}{{ end }}</li>\n"
)
21 changes: 20 additions & 1 deletion pkg/renderer/sgml/html5/table_of_contents_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import (
. "github.com/onsi/gomega" // nolint:golint
)

var _ = Describe("document toc", func() {
var _ = Describe("table of contents", func() {

Context("in document with header", func() {

Expand Down Expand Up @@ -430,6 +430,25 @@ level 1 sections do not exist.`
Expect(RenderHTML(source)).To(MatchHTML(expected))

})

It("section title with special characters", func() { // TODO: move into `section_test.go`
source := `:toc:

== ...and we're back!`
expected := `<div id="toc" class="toc">
<div id="toctitle">Table of Contents</div>
<ul class="sectlevel1">
<li><a href="#_and_were_back">&#8230;&#8203;and we&#8217;re back!</a></li>
</ul>
</div>
<div class="sect1">
<h2 id="_and_were_back">&#8230;&#8203;and we&#8217;re back!</h2>
<div class="sectionbody">
</div>
</div>
`
Expect(RenderHTML(source)).To(MatchHTML(expected))
})
})

Context("within preamble", func() {
Expand Down
1 change: 0 additions & 1 deletion pkg/renderer/sgml/html5/templates.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,6 @@ var templates = sgml.Templates{
SectionHeader: sectionHeaderTmpl,
SidebarBlock: sidebarBlockTmpl,
SourceBlock: sourceBlockTmpl,
SpecialCharacter: specialCharacterTmpl,
StringElement: stringTmpl,
SubscriptText: subscriptTextTmpl,
SuperscriptText: superscriptTextTmpl,
Expand Down
11 changes: 0 additions & 11 deletions pkg/renderer/sgml/renderer.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ func NewRenderer(t Templates) Renderer {
"trimRight": trimRight,
"trimLeft": trimLeft,
"trim": trimBoth,
"specialCharacter": specialCharacter,
"predefinedAttribute": predefinedAttribute,
"halign": halign,
"valign": valign,
Expand All @@ -62,16 +61,6 @@ func trimBoth(s string) string {
return strings.Trim(s, " ")
}

var specialCharacters = map[string]string{
">": "&gt;",
"<": "&lt;",
"&": "&amp;",
}

func specialCharacter(c string) string {
return specialCharacters[c]
}

var predefinedAttributes = map[string]string{
"sp": " ",
"blank": "",
Expand Down
2 changes: 0 additions & 2 deletions pkg/renderer/sgml/sgml_renderer.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,6 @@ type sgmlRenderer struct {
sectionTitle *textTemplate
sidebarBlock *textTemplate
sourceBlock *textTemplate
specialCharacter *textTemplate
stringElement *textTemplate
subscriptText *textTemplate
superscriptText *textTemplate
Expand Down Expand Up @@ -152,7 +151,6 @@ func (r *sgmlRenderer) prepareTemplates() error {
r.stringElement, err = r.newTemplate("string-element", tmpls.StringElement, err)
r.sidebarBlock, err = r.newTemplate("sidebar-block", tmpls.SidebarBlock, err)
r.sourceBlock, err = r.newTemplate("source-block", tmpls.SourceBlock, err)
r.specialCharacter, err = r.newTemplate("special-character", tmpls.SpecialCharacter, err)
r.subscriptText, err = r.newTemplate("subscript", tmpls.SubscriptText, err)
r.superscriptText, err = r.newTemplate("superscript", tmpls.SuperscriptText, err)
r.table, err = r.newTemplate("table", tmpls.Table, err)
Expand Down
20 changes: 2 additions & 18 deletions pkg/renderer/sgml/special_character.go
Original file line number Diff line number Diff line change
@@ -1,26 +1,10 @@
package sgml

import (
"strings"

"github.com/bytesparadise/libasciidoc/pkg/types"
"github.com/pkg/errors"
)

func (r *sgmlRenderer) renderSpecialCharacter(ctx *Context, s *types.SpecialCharacter) (string, error) {
func (r *sgmlRenderer) renderSpecialCharacter(s *types.SpecialCharacter) (string, error) {
// log.Debugf("rendering special character '%s'", s.Name)
if !ctx.EncodeSpecialChars {
// just return the special character as-is
return s.Name, nil
}
// TODO: no need for a template here, just a map
result := &strings.Builder{}
if err := r.specialCharacter.Execute(result, struct {
Name string
}{
Name: s.Name,
}); err != nil {
return "", errors.Wrap(err, "error while rendering special character")
}
return result.String(), nil
return EscapeString(s.Name), nil
}
5 changes: 1 addition & 4 deletions pkg/renderer/sgml/string.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,18 +56,15 @@ func (r *sgmlRenderer) renderStringElement(ctx *renderer.Context, str *types.Str
func asciiEntify(source string) string {
out := &strings.Builder{}
out.Grow(len(source))

for _, r := range source {

// This will certain characters that should be escaped alone. Run them through
// escape first if that is a concern.
if r < 128 && (unicode.IsPrint(r) || unicode.IsSpace(r)) {
out.WriteRune(r)
continue
}
// take care that the entity is unsigned (should always be)
fmt.Fprintf(out, "&#%d;", uint32(r))
fmt.Fprintf(out, "&#%d;", uint32(r)) // TODO: avoid `fmt.Fprintf`, use `fmt.Fprint` instead?
}

return out.String()
}
1 change: 0 additions & 1 deletion pkg/renderer/sgml/templates.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,6 @@ type Templates struct {
SectionHeader string
SidebarBlock string
SourceBlock string
SpecialCharacter string
StringElement string
SubscriptText string
SuperscriptText string
Expand Down