Skip to content

Commit

Permalink
parser: Handle self-closing elements and void elements (#127)
Browse files Browse the repository at this point in the history
- Add `selfClosing` field to `nodeElement` struct to track self-closing tags
- Update `match` function to accept multiple token types
- Check for self-closing tags in `parseElement` and set `selfClosing` field
- Return early for void elements in `parseElement`
- Refactor `parseChildren` to handle both start and self-closing tags
- Add `isVoidElement` function to check if a tag is a void element
- Update tests to cover self-closing and void elements

🌟 These changes improve the HTML parser to properly handle self-closing
elements and void elements according to the HTML spec.

Fixes #126
  • Loading branch information
paulsmith authored Oct 11, 2024
1 parent aefc63b commit 530852a
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 18 deletions.
1 change: 1 addition & 0 deletions ast.go
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@ type nodeElement struct {
startTagNodes []node
children []node
pos span
selfClosing bool
}

func (e nodeElement) Pos() span { return e.pos }
Expand Down
62 changes: 45 additions & 17 deletions parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -420,23 +420,35 @@ func newTag(tagname []byte, attrs []*attr) tag {
return tag{name: string(tagname), attrs: attrs}
}

func (p *htmlParser) match(typ html.TokenType) bool {
return p.toktyp == typ
func (p *htmlParser) match(types ...html.TokenType) bool {
for _, typ := range types {
if p.toktyp == typ {
return true
}
}
return false
}

func (p *htmlParser) parseElement() node {
var result *nodeElement

// FIXME(paulsmith): handle self-closing elements
if !p.match(html.StartTagToken) {
p.errorf("expected an HTML element start tag, got %s", p.toktyp)
if !p.match(html.StartTagToken, html.SelfClosingTagToken) {
p.errorf("expected an HTML element start or self-closing tag, got %s", p.toktyp)
}

result = new(nodeElement)
result.tag = newTag(p.tagname, p.attrs)
result.pos.start = p.parser.offset - len(p.raw)
result.pos.end = p.parser.offset
if p.match(html.SelfClosingTagToken) {
result.selfClosing = true
}
result.startTagNodes = p.parseStartTag()

if isVoidElement(result.tag.name) {
return result
}

p.advance()

result.children = p.parseChildren()
Expand All @@ -462,31 +474,28 @@ func (p *htmlParser) parseChildren() []node {
var elemStack []*nodeElement
loop:
for {
switch p.toktyp {
switch tt := p.toktyp; tt {
case html.ErrorToken:
if p.err == io.EOF {
break loop
} else {
p.errorf("HTML tokenizer: %w", p.err)
}
case html.SelfClosingTagToken:
elem := new(nodeElement)
elem.tag = newTag(p.tagname, p.attrs)
elem.pos.start = p.parser.offset - len(p.raw)
elem.pos.end = p.parser.offset
elem.startTagNodes = p.parseStartTag()
p.advance()
result = append(result, elem)
case html.StartTagToken:
case html.StartTagToken, html.SelfClosingTagToken:
elem := new(nodeElement)
elem.tag = newTag(p.tagname, p.attrs)
elem.pos.start = p.parser.offset - len(p.raw)
elem.pos.end = p.parser.offset
elem.startTagNodes = p.parseStartTag()
if tt == html.SelfClosingTagToken {
elem.selfClosing = true
}
p.advance()
elem.children = p.parseChildren()
if !isVoidElement(elem.tag.name) {
elem.children = p.parseChildren()
elemStack = append(elemStack, elem)
}
result = append(result, elem)
elemStack = append(elemStack, elem)
case html.EndTagToken:
if len(elemStack) == 0 {
return result
Expand Down Expand Up @@ -534,6 +543,25 @@ loop:
return result
}

func isVoidElement(tagname string) bool {
// Per spec: https://html.spec.whatwg.org/multipage/syntax.html#void-elements
voidElements := map[string]bool{"area": true,
"base": true,
"br": true,
"col": true,
"embed": true,
"hr": true,
"img": true,
"input": true,
"link": true,
"meta": true,
"source": true,
"track": true,
"wbr": true,
}
return voidElements[tagname]
}

type Optional[T any] struct {
value *T
}
Expand Down
23 changes: 22 additions & 1 deletion parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -405,6 +405,27 @@ func TestParse(t *testing.T) {
},
},
},
{
`^if true { <br/> }`,
&syntaxTree{
nodes: []node{
&nodeIf{
cond: &nodeGoStrExpr{expr: "true", pos: span{start: 4, end: 8}},
then: &nodeBlock{
nodes: []node{
&nodeLiteral{str: " ", pos: span{10, 11}},
&nodeElement{
tag: tag{name: "br"},
startTagNodes: []node{&nodeLiteral{str: "<br/>", pos: span{start: 11, end: 16}}},
pos: span{11, 16},
selfClosing: true,
},
},
},
},
},
},
},
}
opts := cmp.AllowUnexported(unexported...)
for _, test := range tests {
Expand Down Expand Up @@ -450,7 +471,7 @@ func TestParseSyntaxErrors(t *testing.T) {
{
`^if true {
<illegal />
}`, 2, 13,
}`, 3, 2,
},
// FIXME(paulsmith): add more syntax errors
}
Expand Down

0 comments on commit 530852a

Please sign in to comment.