forked from foolin/pagser
-
Notifications
You must be signed in to change notification settings - Fork 0
/
doc_test.go
167 lines (143 loc) · 3.26 KB
/
doc_test.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
package pagser
import (
"github.com/PuerkitoBio/goquery"
"log"
"net/http"
"strings"
)
const rawExampleHtml = `
<!doctype html>
<html>
<head>
<meta charset="utf-8">
<title>Pagser Example</title>
<meta name="keywords" content="golang,pagser,goquery,html,page,parser,colly">
</head>
<body>
<h1><u>Pagser</u> H1 Title</h1>
<div class="navlink">
<div class="container">
<ul class="clearfix">
<li id=''><a href="/">Index</a></li>
<li id='2'><a href="/list/web" title="web site">Web page</a></li>
<li id='3'><a href="/list/pc" title="pc page">Pc Page</a></li>
<li id='4'><a href="/list/mobile" title="mobile page">Mobile Page</a></li>
</ul>
</div>
</div>
<div class='words' show="true">A|B|C|D</div>
<input name="email" value="[email protected]" />
<input name="email" value="[email protected]" />
<input name="bool" value="true" />
<input name="bool" value="false" />
<input name="number" value="12345" />
<input name="number" value="67890" />
<input name="float" value="123.45" />
<input name="float" value="678.90" />
</body>
</html>
`
type ExamplePage struct {
Title string `pagser:"title"`
H1 string `pagser:"h1"`
Navs []struct {
ID int `pagser:"->attrEmpty(id, -1)"`
Name string `pagser:"a"`
Url string `pagser:"a->attr(href)"`
} `pagser:".navlink li"`
}
func ExampleNewWithConfig() {
cfg := Config{
TagName: "pagser",
FuncSymbol: "->",
CastError: false,
Debug: false,
}
p, err := NewWithConfig(cfg)
if err != nil {
log.Fatal(err)
}
//data parser model
var page ExamplePage
//parse html data
err = p.Parse(&page, rawExampleHtml)
//check error
if err != nil {
log.Fatal(err)
}
}
func ExamplePagser_Parse() {
//New default Config
p := New()
//data parser model
var page ExamplePage
//parse html data
err := p.Parse(&page, rawExampleHtml)
//check error
if err != nil {
log.Fatal(err)
}
//print result
log.Printf("%v", page)
}
func ExamplePagser_ParseDocument() {
//New default Config
p := New()
//data parser model
var data ExamplePage
doc, err := goquery.NewDocumentFromReader(strings.NewReader(rawExampleHtml))
if err != nil {
log.Fatal(err)
}
//parse document
err = p.ParseDocument(&data, doc)
//check error
if err != nil {
log.Fatal(err)
}
//print result
log.Printf("%v", data)
}
func ExamplePagser_ParseSelection() {
//New default Config
p := New()
//data parser model
var data ExamplePage
doc, err := goquery.NewDocumentFromReader(strings.NewReader(rawExampleHtml))
if err != nil {
log.Fatal(err)
}
//parse document
err = p.ParseSelection(&data, doc.Selection)
//check error
if err != nil {
log.Fatal(err)
}
//print result
log.Printf("%v", data)
}
func ExamplePagser_ParseReader() {
resp, err := http.Get("https://raw.githubusercontent.com/foolin/pagser/master/_examples/pages/demo.html")
if err != nil {
log.Fatal(err)
}
defer resp.Body.Close()
//New default Config
p := New()
//data parser model
var page ExamplePage
//parse html data
err = p.ParseReader(&page, resp.Body)
//check error
if err != nil {
panic(err)
}
log.Printf("%v", page)
}
func ExamplePagser_RegisterFunc() {
p := New()
p.RegisterFunc("MyFunc", func(node *goquery.Selection, args ...string) (out interface{}, err error) {
//Todo
return "Hello", nil
})
}