-
Notifications
You must be signed in to change notification settings - Fork 60.3k
/
domwaiter.js
55 lines (47 loc) · 1.22 KB
/
domwaiter.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import { EventEmitter } from 'node:events'
import Bottleneck from 'bottleneck'
import got from 'got'
import cheerio from 'cheerio'
export default function domwaiter(pages, opts = {}) {
const emitter = new EventEmitter()
const defaults = {
parseDOM: true,
json: false,
maxConcurrent: 5,
minTime: 500,
}
opts = Object.assign(defaults, opts)
const limiter = new Bottleneck(opts)
pages.forEach((page) => {
limiter.schedule(getPage, page, emitter, opts)
})
limiter
.on('idle', () => {
emitter.emit('done')
})
.on('error', (err) => {
emitter.emit('error', err)
})
return emitter
}
async function getPage(page, emitter, opts) {
emitter.emit('beforePageLoad', page)
if (opts.json) {
try {
const json = await got(page.url).json()
const pageCopy = Object.assign({}, page, { json })
emitter.emit('page', pageCopy)
} catch (err) {
emitter.emit('error', err)
}
} else {
try {
const body = (await got(page.url)).body
const pageCopy = Object.assign({}, page, { body })
if (opts.parseDOM) pageCopy.$ = cheerio.load(body)
emitter.emit('page', pageCopy)
} catch (err) {
emitter.emit('error', err)
}
}
}