From d21e1ceeed8a93a36d03bf2f791af2c64b588a8c Mon Sep 17 00:00:00 2001 From: Daniel Potapov Date: Tue, 23 Apr 2024 11:49:15 -0500 Subject: [PATCH] Add support for auto-closing tags --- etree.go | 31 +++++++++++++++++++++++++++++++ etree_test.go | 39 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+) diff --git a/etree.go b/etree.go index f5b99f5..761bece 100644 --- a/etree.go +++ b/etree.go @@ -52,6 +52,12 @@ type ReadSettings struct { // Entity to be passed to standard xml.Decoder. Default: nil. Entity map[string]string + + // When Permissive is true, AutoClose indicates a set of elements to + // consider closed immediately after they are opened, regardless of + // whether an end element is present. Commonly set to xml.HTMLAutoClose. + // Default: nil. + AutoClose []string } // newReadSettings creates a default ReadSettings record. @@ -796,6 +802,27 @@ func (e *Element) RemoveChildAt(index int) Token { return t } +// autoClose analyzes the stack's top element and the current token to decide +// whether the top element should be closed. +func (e *Element) autoClose(stack *stack, t xml.Token, tags []string) { + if stack.empty() { + return + } + + top := stack.peek().(*Element) + + for _, tag := range tags { + if strings.EqualFold(tag, top.FullTag()) { + if e, ok := t.(xml.EndElement); !ok || + !strings.EqualFold(e.Name.Space, top.Space) || + !strings.EqualFold(e.Name.Local, top.Tag) { + stack.pop() + } + break + } + } +} + // ReadFrom reads XML from the reader 'ri' and stores the result as a new // child of this element. func (e *Element) readFrom(ri io.Reader, settings ReadSettings) (n int64, err error) { @@ -822,6 +849,10 @@ func (e *Element) readFrom(ri io.Reader, settings ReadSettings) (n int64, err er t, err := dec.RawToken() + if settings.Permissive && settings.AutoClose != nil { + e.autoClose(&stack, t, settings.AutoClose) + } + switch { case err == io.EOF: if len(stack.data) != 1 { diff --git a/etree_test.go b/etree_test.go index cef2c96..e501d7b 100644 --- a/etree_test.go +++ b/etree_test.go @@ -348,6 +348,45 @@ func TestDocumentReadHTMLEntities(t *testing.T) { } } +func TestDocumentReadHTMLAutoClose(t *testing.T) { + cases := []struct { + name string + input string + want string + }{ + {"empty", ``, ``}, + {"oneSelfClosing", `
`, `
`}, + {"twoSelfClosingAndText", `
some text
`, `
some text
`}, + { + name: "largerExample", + input: ` +
+Author: Charles Dickens
+Book: Great Expectations
`, + want: ` +
+Author: Charles Dickens
+Book: Great Expectations
`}, + } + + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + doc := NewDocument() + doc.ReadSettings.Permissive = true + doc.ReadSettings.AutoClose = xml.HTMLAutoClose + err := doc.ReadFromString(c.input) + if err != nil { + t.Fatal("etree: ReadFromString() error = ", err) + } + s, err := doc.WriteToString() + if err != nil { + t.Fatal("etree: WriteToString() error = ", err) + } + checkStrEq(t, s, c.want) + }) + } +} + func TestEscapeCodes(t *testing.T) { cases := []struct { input string