diff --git a/ast/ast.go b/ast/ast.go
index 67bb0da..7edd63b 100644
--- a/ast/ast.go
+++ b/ast/ast.go
@@ -379,6 +379,11 @@ func (n *BaseNode) Text(source []byte) []byte {
var buf bytes.Buffer
for c := n.firstChild; c != nil; c = c.NextSibling() {
buf.Write(c.Text(source))
+ if sb, ok := c.(interface {
+ SoftLineBreak() bool
+ }); ok && sb.SoftLineBreak() {
+ buf.WriteByte('\n')
+ }
}
return buf.Bytes()
}
diff --git a/ast/ast_test.go b/ast/ast_test.go
index 7cee141..191fffd 100644
--- a/ast/ast_test.go
+++ b/ast/ast_test.go
@@ -1,28 +1,10 @@
package ast
import (
- "bytes"
"reflect"
"testing"
-
- "github.com/yuin/goldmark/text"
)
-func TestRemoveChildren(t *testing.T) {
- root := NewDocument()
-
- node1 := NewDocument()
-
- node2 := NewDocument()
-
- root.AppendChild(root, node1)
- root.AppendChild(root, node2)
-
- root.RemoveChildren(root)
-
- t.Logf("%+v", node2.PreviousSibling())
-}
-
func TestWalk(t *testing.T) {
tests := []struct {
name string
@@ -76,48 +58,3 @@ func node(n Node, children ...Node) Node {
}
return n
}
-
-func TestBaseBlock_Text(t *testing.T) {
- source := []byte(`# Heading
-
- code block here
- and also here
-
-A paragraph
-
-` + "```" + `somelang
-fenced code block
-` + "```" + `
-
-The end`)
-
- t.Run("fetch text from code block", func(t *testing.T) {
- block := NewCodeBlock()
- block.lines = text.NewSegments()
- block.lines.Append(text.Segment{Start: 15, Stop: 31})
- block.lines.Append(text.Segment{Start: 32, Stop: 46})
-
- expected := []byte("code block here\nand also here\n")
- if !bytes.Equal(expected, block.Text(source)) {
- t.Errorf("Expected: %q, got: %q", string(expected), string(block.Text(source)))
- }
- })
-
- t.Run("fetch text from fenced code block", func(t *testing.T) {
- block := NewFencedCodeBlock(&Text{
- Segment: text.Segment{Start: 63, Stop: 71},
- })
- block.lines = text.NewSegments()
- block.lines.Append(text.Segment{Start: 72, Stop: 90})
-
- expectedLang := []byte("somelang")
- if !bytes.Equal(expectedLang, block.Language(source)) {
- t.Errorf("Expected: %q, got: %q", string(expectedLang), string(block.Language(source)))
- }
-
- expected := []byte("fenced code block\n")
- if !bytes.Equal(expected, block.Text(source)) {
- t.Errorf("Expected: %q, got: %q", string(expected), string(block.Text(source)))
- }
- })
-}
diff --git a/ast/block.go b/ast/block.go
index 04d0d54..467819e 100644
--- a/ast/block.go
+++ b/ast/block.go
@@ -1,7 +1,6 @@
package ast
import (
- "bytes"
"fmt"
"strings"
@@ -48,15 +47,6 @@ func (b *BaseBlock) SetLines(v *textm.Segments) {
b.lines = v
}
-// Text implements Node.Text.
-func (b *BaseBlock) Text(source []byte) []byte {
- var buf bytes.Buffer
- for _, line := range b.Lines().Sliced(0, b.Lines().Len()) {
- buf.Write(line.Value(source))
- }
- return buf.Bytes()
-}
-
// A Document struct is a root node of Markdown text.
type Document struct {
BaseBlock
@@ -140,6 +130,11 @@ func (n *TextBlock) Kind() NodeKind {
return KindTextBlock
}
+// Text implements Node.Text.
+func (n *TextBlock) Text(source []byte) []byte {
+ return n.Lines().Value(source)
+}
+
// NewTextBlock returns a new TextBlock node.
func NewTextBlock() *TextBlock {
return &TextBlock{
@@ -165,6 +160,11 @@ func (n *Paragraph) Kind() NodeKind {
return KindParagraph
}
+// Text implements Node.Text.
+func (n *Paragraph) Text(source []byte) []byte {
+ return n.Lines().Value(source)
+}
+
// NewParagraph returns a new Paragraph node.
func NewParagraph() *Paragraph {
return &Paragraph{
@@ -259,6 +259,11 @@ func (n *CodeBlock) Kind() NodeKind {
return KindCodeBlock
}
+// Text implements Node.Text.
+func (n *CodeBlock) Text(source []byte) []byte {
+ return n.Lines().Value(source)
+}
+
// NewCodeBlock returns a new CodeBlock node.
func NewCodeBlock() *CodeBlock {
return &CodeBlock{
@@ -314,6 +319,11 @@ func (n *FencedCodeBlock) Kind() NodeKind {
return KindFencedCodeBlock
}
+// Text implements Node.Text.
+func (n *FencedCodeBlock) Text(source []byte) []byte {
+ return n.Lines().Value(source)
+}
+
// NewFencedCodeBlock return a new FencedCodeBlock node.
func NewFencedCodeBlock(info *Text) *FencedCodeBlock {
return &FencedCodeBlock{
@@ -508,6 +518,15 @@ func (n *HTMLBlock) Kind() NodeKind {
return KindHTMLBlock
}
+// Text implements Node.Text.
+func (n *HTMLBlock) Text(source []byte) []byte {
+ ret := n.Lines().Value(source)
+ if n.HasClosure() {
+ ret = append(ret, n.ClosureLine.Value(source)...)
+ }
+ return ret
+}
+
// NewHTMLBlock returns a new HTMLBlock node.
func NewHTMLBlock(typ HTMLBlockType) *HTMLBlock {
return &HTMLBlock{
diff --git a/ast/inline.go b/ast/inline.go
index 7e4c51f..9df8470 100644
--- a/ast/inline.go
+++ b/ast/inline.go
@@ -503,6 +503,11 @@ func (n *AutoLink) Label(source []byte) []byte {
return n.value.Text(source)
}
+// Text implements Node.Text.
+func (n *AutoLink) Text(source []byte) []byte {
+ return n.value.Text(source)
+}
+
// NewAutoLink returns a new AutoLink node.
func NewAutoLink(typ AutoLinkType, value *Text) *AutoLink {
return &AutoLink{
@@ -541,6 +546,11 @@ func (n *RawHTML) Kind() NodeKind {
return KindRawHTML
}
+// Text implements Node.Text.
+func (n *RawHTML) Text(source []byte) []byte {
+ return n.Segments.Value(source)
+}
+
// NewRawHTML returns a new RawHTML node.
func NewRawHTML() *RawHTML {
return &RawHTML{
diff --git a/ast_test.go b/ast_test.go
new file mode 100644
index 0000000..e5e6016
--- /dev/null
+++ b/ast_test.go
@@ -0,0 +1,200 @@
+package goldmark_test
+
+import (
+ "bytes"
+ "testing"
+
+ . "github.com/yuin/goldmark"
+ "github.com/yuin/goldmark/testutil"
+ "github.com/yuin/goldmark/text"
+)
+
+func TestASTBlockNodeText(t *testing.T) {
+ var cases = []struct {
+ Name string
+ Source string
+ T1 string
+ T2 string
+ C bool
+ }{
+ {
+ Name: "AtxHeading",
+ Source: `# l1
+
+a
+
+# l2`,
+ T1: `l1`,
+ T2: `l2`,
+ },
+ {
+ Name: "SetextHeading",
+ Source: `l1
+l2
+===============
+
+a
+
+l3
+l4
+==============`,
+ T1: `l1
+l2`,
+ T2: `l3
+l4`,
+ },
+ {
+ Name: "CodeBlock",
+ Source: ` l1
+ l2
+
+a
+
+ l3
+ l4`,
+ T1: `l1
+l2
+`,
+ T2: `l3
+l4
+`,
+ },
+ {
+ Name: "FencedCodeBlock",
+ Source: "```" + `
+l1
+l2
+` + "```" + `
+
+a
+
+` + "```" + `
+l3
+l4`,
+ T1: `l1
+l2
+`,
+ T2: `l3
+l4
+`,
+ },
+ {
+ Name: "Blockquote",
+ Source: `> l1
+> l2
+
+a
+
+> l3
+> l4`,
+ T1: `l1
+l2`,
+ T2: `l3
+l4`,
+ },
+ {
+ Name: "List",
+ Source: `- l1
+ l2
+
+a
+
+- l3
+ l4`,
+ T1: `l1
+l2`,
+ T2: `l3
+l4`,
+ C: true,
+ },
+ {
+ Name: "HTMLBlock",
+ Source: `
+l1
+l2
+
+
+a
+
+
+l3
+l4`,
+ T1: `
+l1
+l2
+
+`,
+ T2: `
+l3
+l4`,
+ },
+ }
+
+ for _, cs := range cases {
+ t.Run(cs.Name, func(t *testing.T) {
+ s := []byte(cs.Source)
+ md := New()
+ n := md.Parser().Parse(text.NewReader(s))
+ c1 := n.FirstChild()
+ c2 := c1.NextSibling().NextSibling()
+ if cs.C {
+ c1 = c1.FirstChild()
+ c2 = c2.FirstChild()
+ }
+ if !bytes.Equal(c1.Text(s), []byte(cs.T1)) {
+ t.Errorf("%s unmatch: %s", cs.Name, testutil.DiffPretty(c1.Text(s), []byte(cs.T1)))
+ }
+ if !bytes.Equal(c2.Text(s), []byte(cs.T2)) {
+ t.Errorf("%s(EOF) unmatch: %s", cs.Name, testutil.DiffPretty(c2.Text(s), []byte(cs.T2)))
+ }
+ })
+ }
+
+}
+
+func TestASTInlineNodeText(t *testing.T) {
+ var cases = []struct {
+ Name string
+ Source string
+ T1 string
+ }{
+ {
+ Name: "CodeSpan",
+ Source: "`c1`",
+ T1: `c1`,
+ },
+ {
+ Name: "Emphasis",
+ Source: `*c1 **c2***`,
+ T1: `c1 c2`,
+ },
+ {
+ Name: "Link",
+ Source: `[label](url)`,
+ T1: `label`,
+ },
+ {
+ Name: "AutoLink",
+ Source: ``,
+ T1: `http://url`,
+ },
+ {
+ Name: "RawHTML",
+ Source: `c1`,
+ T1: ``,
+ },
+ }
+
+ for _, cs := range cases {
+ t.Run(cs.Name, func(t *testing.T) {
+ s := []byte(cs.Source)
+ md := New()
+ n := md.Parser().Parse(text.NewReader(s))
+ c1 := n.FirstChild().FirstChild()
+ if !bytes.Equal(c1.Text(s), []byte(cs.T1)) {
+ t.Errorf("%s unmatch:\n%s", cs.Name, testutil.DiffPretty(c1.Text(s), []byte(cs.T1)))
+ }
+ })
+ }
+
+}
diff --git a/extension/ast_test.go b/extension/ast_test.go
new file mode 100644
index 0000000..e0c24b1
--- /dev/null
+++ b/extension/ast_test.go
@@ -0,0 +1,117 @@
+package extension
+
+import (
+ "bytes"
+ "testing"
+
+ "github.com/yuin/goldmark"
+ "github.com/yuin/goldmark/renderer/html"
+ "github.com/yuin/goldmark/testutil"
+ "github.com/yuin/goldmark/text"
+)
+
+func TestASTBlockNodeText(t *testing.T) {
+ var cases = []struct {
+ Name string
+ Source string
+ T1 string
+ T2 string
+ C bool
+ }{
+ {
+ Name: "DefinitionList",
+ Source: `c1
+: c2
+ c3
+
+a
+
+c4
+: c5
+ c6`,
+ T1: `c1c2
+c3`,
+ T2: `c4c5
+c6`,
+ },
+ {
+ Name: "Table",
+ Source: `| h1 | h2 |
+| -- | -- |
+| c1 | c2 |
+
+a
+
+
+| h3 | h4 |
+| -- | -- |
+| c3 | c4 |`,
+
+ T1: `h1h2c1c2`,
+ T2: `h3h4c3c4`,
+ },
+ }
+
+ for _, cs := range cases {
+ t.Run(cs.Name, func(t *testing.T) {
+ s := []byte(cs.Source)
+ md := goldmark.New(
+ goldmark.WithRendererOptions(
+ html.WithUnsafe(),
+ ),
+ goldmark.WithExtensions(
+ DefinitionList,
+ Table,
+ ),
+ )
+ n := md.Parser().Parse(text.NewReader(s))
+ c1 := n.FirstChild()
+ c2 := c1.NextSibling().NextSibling()
+ if cs.C {
+ c1 = c1.FirstChild()
+ c2 = c2.FirstChild()
+ }
+ if !bytes.Equal(c1.Text(s), []byte(cs.T1)) {
+ t.Errorf("%s unmatch:\n%s", cs.Name, testutil.DiffPretty(c1.Text(s), []byte(cs.T1)))
+ }
+ if !bytes.Equal(c2.Text(s), []byte(cs.T2)) {
+ t.Errorf("%s(EOF) unmatch: %s", cs.Name, testutil.DiffPretty(c2.Text(s), []byte(cs.T2)))
+ }
+ })
+ }
+
+}
+
+func TestASTInlineNodeText(t *testing.T) {
+ var cases = []struct {
+ Name string
+ Source string
+ T1 string
+ }{
+ {
+ Name: "Strikethrough",
+ Source: `~c1 *c2*~`,
+ T1: `c1 c2`,
+ },
+ }
+
+ for _, cs := range cases {
+ t.Run(cs.Name, func(t *testing.T) {
+ s := []byte(cs.Source)
+ md := goldmark.New(
+ goldmark.WithRendererOptions(
+ html.WithUnsafe(),
+ ),
+ goldmark.WithExtensions(
+ Strikethrough,
+ ),
+ )
+ n := md.Parser().Parse(text.NewReader(s))
+ c1 := n.FirstChild().FirstChild()
+ if !bytes.Equal(c1.Text(s), []byte(cs.T1)) {
+ t.Errorf("%s unmatch:\n%s", cs.Name, testutil.DiffPretty(c1.Text(s), []byte(cs.T1)))
+ }
+ })
+ }
+
+}
diff --git a/parser/code_block.go b/parser/code_block.go
index 732f18c..d99146c 100644
--- a/parser/code_block.go
+++ b/parser/code_block.go
@@ -35,6 +35,7 @@ func (b *codeBlockParser) Open(parent ast.Node, reader text.Reader, pc Context)
if segment.Padding != 0 {
preserveLeadingTabInCodeBlock(&segment, reader, 0)
}
+ segment.ForceNewline = true
node.Lines().Append(segment)
reader.Advance(segment.Len() - 1)
return node, NoChildren
@@ -59,6 +60,7 @@ func (b *codeBlockParser) Continue(node ast.Node, reader text.Reader, pc Context
preserveLeadingTabInCodeBlock(&segment, reader, 0)
}
+ segment.ForceNewline = true
node.Lines().Append(segment)
reader.Advance(segment.Len() - 1)
return Continue | NoChildren
diff --git a/parser/fcode_block.go b/parser/fcode_block.go
index e51a35a..953b8dc 100644
--- a/parser/fcode_block.go
+++ b/parser/fcode_block.go
@@ -100,6 +100,7 @@ func (b *fencedCodeBlockParser) Continue(node ast.Node, reader text.Reader, pc C
if padding != 0 {
preserveLeadingTabInCodeBlock(&seg, reader, fdata.indent)
}
+ seg.ForceNewline = true // EOF as newline
node.Lines().Append(seg)
reader.AdvanceAndSetPadding(segment.Stop-segment.Start-pos-1, padding)
return Continue | NoChildren
diff --git a/parser/parser.go b/parser/parser.go
index b59666c..b05db13 100644
--- a/parser/parser.go
+++ b/parser/parser.go
@@ -878,12 +878,6 @@ func (p *parser) Parse(reader text.Reader, opts ...ParseOption) ast.Node {
blockReader := text.NewBlockReader(reader.Source(), nil)
p.walkBlock(root, func(node ast.Node) {
p.parseBlock(blockReader, node, pc)
- lines := node.Lines()
- if lines != nil && lines.Len() != 0 {
- s := lines.At(lines.Len() - 1)
- s.EOB = true
- lines.Set(lines.Len()-1, s)
- }
})
for _, at := range p.astTransformers {
at.Transform(root, reader, pc)
diff --git a/text/segment.go b/text/segment.go
index 83c875b..93fbf19 100644
--- a/text/segment.go
+++ b/text/segment.go
@@ -20,8 +20,19 @@ type Segment struct {
// Padding is a padding length of the segment.
Padding int
- // EOB is true if the segment is end of the block.
- EOB bool
+ // ForceNewline is true if the segment should be ended with a newline.
+ // Some elements(i.e. CodeBlock, FencedCodeBlock) does not trim trailing
+ // newlines. Spec defines that EOF is treated as a newline, so we need to
+ // add a newline to the end of the segment if it is not empty.
+ //
+ // i.e.:
+ //
+ // ```go
+ // const test = "test"
+ //
+ // This code does not close the code block and ends with EOF. In this case,
+ // we need to add a newline to the end of the last line like `const test = "test"\n`.
+ ForceNewline bool
}
// NewSegment return a new Segment.
@@ -52,7 +63,7 @@ func (t *Segment) Value(buffer []byte) []byte {
result = append(result, bytes.Repeat(space, t.Padding)...)
result = append(result, buffer[t.Start:t.Stop]...)
}
- if t.EOB && len(result) > 0 && result[len(result)-1] != '\n' {
+ if t.ForceNewline && len(result) > 0 && result[len(result)-1] != '\n' {
result = append(result, '\n')
}
return result
@@ -217,3 +228,12 @@ func (s *Segments) Unshift(v Segment) {
s.values = append(s.values[0:1], s.values[0:]...)
s.values[0] = v
}
+
+// Value returns a string value of the collection.
+func (s *Segments) Value(buffer []byte) []byte {
+ var result []byte
+ for _, v := range s.values {
+ result = append(result, v.Value(buffer)...)
+ }
+ return result
+}