Skip to content

Commit

Permalink
publisher: Fix tag collector for nested table elements
Browse files Browse the repository at this point in the history
Fixes #7318
  • Loading branch information
bep committed May 27, 2020
1 parent 9152024 commit c950c86
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 1 deletion.
2 changes: 1 addition & 1 deletion hugolib/site_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1123,7 +1123,7 @@ ABC.
els := stats.HTMLElements

b.Assert(els.Classes, qt.HasLen, 3606) // (4 * 900) + 4 +2
b.Assert(els.Tags, qt.HasLen, 8)
b.Assert(els.Tags, qt.HasLen, 9)
b.Assert(els.IDs, qt.HasLen, 1)
}

Expand Down
20 changes: 20 additions & 0 deletions publisher/htmlElementsCollector.go
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,13 @@ func (w *cssClassCollectorWriter) Write(p []byte) (n int, err error) {

w.buff.Reset()

if strings.HasPrefix(s, "</") {
continue
}

s, tagName := w.insertStandinHTMLElement(s)
el := parseHTMLElement(s)
el.Tag = tagName

w.collector.mu.Lock()
w.collector.elementSet[s] = true
Expand All @@ -132,6 +138,20 @@ func (w *cssClassCollectorWriter) Write(p []byte) (n int, err error) {
return
}

// The net/html parser does not handle single table elemnts as input, e.g. tbody.
// We only care about the element/class/ids, so just store away the original tag name
// and pretend it's a <div>.
func (c *cssClassCollectorWriter) insertStandinHTMLElement(el string) (string, string) {
tag := el[1:]
spacei := strings.Index(tag, " ")
if spacei != -1 {
tag = tag[:spacei]
}
newv := strings.Replace(el, tag, "div", 1)
return newv, strings.ToLower(tag)

}

func (c *cssClassCollectorWriter) endCollecting(drop bool) {
c.isCollecting = false
c.inQuote = false
Expand Down
6 changes: 6 additions & 0 deletions publisher/htmlElementsCollector_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,12 @@ func TestClassCollector(t *testing.T) {
{"duplicates", `<div class="b a b"></div>`, f("div", "a b", "")},
{"single quote", `<body class='b a'></body>`, f("body", "a b", "")},
{"no quote", `<body class=b id=myelement></body>`, f("body", "b", "myelement")},
{"thead", `
https://github.com/gohugoio/hugo/issues/7318
<table class="cl1">
<thead class="cl2"><tr class="cl3"><td class="cl4"></td></tr></thead>
<tbody class="cl5"><tr class="cl6"><td class="cl7"></td></tr></tbody>
</table>`, f("table tbody td thead tr", "cl1 cl2 cl3 cl4 cl5 cl6 cl7", "")},
// https://github.com/gohugoio/hugo/issues/7161
{"minified a href", `<a class="b a" href=/></a>`, f("a", "a b", "")},

Expand Down

1 comment on commit c950c86

@earthboundkid
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I had this bug and didn't know why: spotlightpa/poor-richard@96ea0cc

Please sign in to comment.