-
Notifications
You must be signed in to change notification settings - Fork 0
/
app.js
76 lines (64 loc) · 1.86 KB
/
app.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
const puppeteer = require("puppeteer");
const mongo = require("mongodb").MongoClient
const url = "mongodb://localhost:27017"
let db, jobs
mongo.connect(
url,
{
useNewUrlParser: true,
useUnifiedTopology: true,
},
(err, client) => {
if (err) {
console.error(err)
return
}
db = client.db("jobs")
jobs = db.collection("jobs")
// scrape(jobs)
}
)
async function scrape(jobs){
const browser = await puppeteer.launch({ headless: false })
const page = await browser.newPage()
await page.goto("https://remoteok.io/remote-javascript-jobs")
/* Run javascript inside the page */
const data = await page.evaluate(() => {
const list = []
const items = document.querySelectorAll("tr.job")
for (const item of items) {
list.push({
company: item.querySelector(".company h3").innerHTML,
position: item.querySelector(".company h2").innerHTML,
link: "https://remoteok.io" + item.getAttribute("data-href"),
})
}
return list
})
console.log(data)
jobs.deleteMany({})
jobs.insertMany(data)
await browser.close()
}
async function scrape2(jobs){
const searchQuery = "angular";
const browser = await puppeteer.launch({ headless: false })
const page = await browser.newPage()
await page.goto("https://remoteok.io/", {waitUntil: "domcontentloaded"});
await page.waitForSelector('body > div.top > div.box > input', {visible: true});
await page.type('body > div.top > div.box > input', searchQuery);
// await Promise.all([
// await page.waitForNavigation(),
page.keyboard.press("Enter"),
// ]);
await page.waitForSelector("#jobsboard", {visible: true});
const searchResults = await page.$$eval(".job", els =>
// els.map(e => ({title: e}))
console.log(els)
);
// console.log(searchResults)
// jobs.deleteMany({})
// jobs.insertMany(data)
// await browser.close()
}
scrape2('')