diff --git a/html/render.go b/html/render.go index 8b28031905..e8c1233455 100644 --- a/html/render.go +++ b/html/render.go @@ -194,9 +194,8 @@ func render1(w writer, n *Node) error { } } - // Render any child nodes. - switch n.Data { - case "iframe", "noembed", "noframes", "noscript", "plaintext", "script", "style", "xmp": + // Render any child nodes + if childTextNodesAreLiteral(n) { for c := n.FirstChild; c != nil; c = c.NextSibling { if c.Type == TextNode { if _, err := w.WriteString(c.Data); err != nil { @@ -213,7 +212,7 @@ func render1(w writer, n *Node) error { // last element in the file, with no closing tag. return plaintextAbort } - default: + } else { for c := n.FirstChild; c != nil; c = c.NextSibling { if err := render1(w, c); err != nil { return err @@ -231,6 +230,27 @@ func render1(w writer, n *Node) error { return w.WriteByte('>') } +func childTextNodesAreLiteral(n *Node) bool { + // Per WHATWG HTML 13.3, if the parent of the current node is a style, + // script, xmp, iframe, noembed, noframes, or plaintext element, and the + // current node is a text node, append the value of the node's data + // literally. The specification is not explicit about it, but we only + // enforce this if we are in the HTML namespace (i.e. when the namespace is + // ""). + // NOTE: we also always include noscript elements, although the + // specification states that they should only be rendered as such if + // scripting is enabled for the node (which is not something we track). + if n.Namespace != "" { + return false + } + switch n.Data { + case "iframe", "noembed", "noframes", "noscript", "plaintext", "script", "style", "xmp": + return true + default: + return false + } +} + // writeQuoted writes s to w surrounded by quotes. Normally it will use double // quotes, but if s contains a double quote, it will use single quotes. // It is used for writing the identifiers in a doctype declaration. diff --git a/html/render_test.go b/html/render_test.go index 08e592be27..22d08641a0 100644 --- a/html/render_test.go +++ b/html/render_test.go @@ -6,6 +6,8 @@ package html import ( "bytes" + "fmt" + "strings" "testing" ) @@ -108,16 +110,16 @@ func TestRenderer(t *testing.T) { // just commentary. The "0:" prefixes are for easy cross-reference with // the nodes array. treeAsText := [...]string{ - 0: ``, - 1: `. `, - 2: `. `, - 3: `. . "0<1"`, - 4: `. .

`, - 5: `. . . "2"`, - 6: `. . . `, - 7: `. . . . "3"`, - 8: `. . . `, - 9: `. . . . "&4"`, + 0: ``, + 1: `. `, + 2: `. `, + 3: `. . "0<1"`, + 4: `. .

`, + 5: `. . . "2"`, + 6: `. . . `, + 7: `. . . . "3"`, + 8: `. . . `, + 9: `. . . . "&4"`, 10: `. . "5"`, 11: `. .

`, 12: `. .
`, @@ -169,3 +171,37 @@ func TestRenderer(t *testing.T) { t.Errorf("got vs want:\n%s\n%s\n", got, want) } } + +func TestRenderTextNodes(t *testing.T) { + elements := []string{"style", "script", "xmp", "iframe", "noembed", "noframes", "plaintext", "noscript"} + for _, namespace := range []string{ + "", // html + "svg", + "math", + } { + for _, e := range elements { + var namespaceOpen, namespaceClose string + if namespace != "" { + namespaceOpen, namespaceClose = fmt.Sprintf("<%s>", namespace), fmt.Sprintf("", namespace) + } + doc := fmt.Sprintf(`%s<%s>&%s`, namespaceOpen, e, e, namespaceClose) + n, err := Parse(strings.NewReader(doc)) + if err != nil { + t.Fatal(err) + } + b := bytes.NewBuffer(nil) + if err := Render(b, n); err != nil { + t.Fatal(err) + } + + expected := doc + if namespace != "" { + expected = strings.Replace(expected, "&", "&", 1) + } + + if b.String() != expected { + t.Errorf("unexpected output: got %q, want %q", b.String(), expected) + } + } + } +}