-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathindex.js
134 lines (134 loc) · 4.2 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
let cheerio = require("cheerio"),
fs = require("fs"),
nodeExcel = require('excel-export'),
util = require("util"),
https = require('https'),
source_url = 'https://www.yidaiyilu.gov.cn',
// url = 'https://www.yidaiyilu.gov.cn/info/iList.jsp?cat_id=10005&cur_page=1';
url = 'https://www.yidaiyilu.gov.cn/';
// for (var i = 0; i <= 2; i++) {
let html = ""
let list = []
let buffer = null
let newslist = []
// let _url = url + (i + 1)
let req = https.request(url, function(res) {
res.on("data", function(data) {
list.push(data)
})
res.on("end", async function() {
buffer = Buffer.concat(list)
html = buffer.toString()
$ = cheerio.load(html)
// for(var i=1;i<=3;i++){
// let dlist = `.con_yw_${i}`;
// console.log('$(".mybox .main-1").find(dlist).find("a")', $(".mybox .main-1").find(dlist).find('a'))
// $(".mybox .main-1").find(dlist).find("a").each((index,ele)=>{
// let txt = $(ele).text();
// let alink = $(ele).attr("href")
// let news = {};
// news["title"] = txt;
// news["url"] = source_url + alink;
// newslist.push(news)
// })
// }
// await readycontentdata(newslist)
console.log('$(".wtfz_list_right ul")', $(".wtfz_list_right ul").find("li"))
$(".wtfz_list_right ul").find("li").find("a").each((index, ele) => {
console.log('index', index)
if (index === 0) {
let txt = $(ele).text();
let alink = $(ele).attr("href")
let news = {};
news["title"] = txt;
news["url"] = source_url + alink;
newslist.push(news)
}
})
if (3 === newslist.length ) {
await readycontentdata(newslist)
}
})
})
req.end()
// }
async function readycontentdata(data) {
let arr = []
for (var i = 0; i <= data.length; i++) {
let _list = []
let buf = null
let _html = ''
let obj = {}
obj.create_content = ''
let _req = https.request(data[i].url, function(res) {
res.on("data", function(_data) {
_list.push(_data)
})
res.on("end", async function() {
buf = Buffer.concat(_list)
_html = buf.toString()
$ = cheerio.load(_html)
obj.create_title = $("#zoom .main_content_title").text()
obj.create_date = $("#zoom div .szty .szty1").text()
obj.create_born = $("#zoom div .szty .szty2").text()
$("#zoom .info_content").find('p').each((index, ele)=>{
obj.create_content += $(ele).text() + '^^^^^^'
})
obj.create_editor = $("#zoom .editor").text()
arr.push(obj)
if (arr.length === data.length ) {
let r = await readydata(arr);
await exportdata(r);
}
})
})
_req.end()
}
}
async function readydata(data) {
//做点什么,如从数据库取数据
let exceldata = data;
return exceldata;
}
//导出
async function exportdata(v) {
let conf = {};
conf.name = "mysheet"; //表格名
let alldata = new Array();
for (let i = 0; i < v.length; i++) {
let arr = new Array();
arr.push(v[i].create_title);
arr.push(v[i].create_date);
arr.push(v[i].create_born);
arr.push(v[i].create_content);
arr.push(v[i].create_editor);
alldata.push(arr);
}
//决定列名和类型
conf.cols = [{
caption: '标题',
type: 'string'
}, {
caption: '日期',
type: 'string'
}, {
caption: '来源',
type: 'string'
}, {
caption: '内容',
type: 'string'
}, {
caption: '编辑',
type: 'string'
}];
conf.rows = alldata; //填充数据
const date = new Date().getTime()
let result = nodeExcel.execute(conf);
let data = Buffer.from(result,'binary');
fs.writeFile(`./upload-excel/${conf.name}-${date}.xlsx`, data, function(err, data) {
if (err) {
throw err;
}
console.log('------success------')
})
}