forked from thayton/casperjs-taleo-job-scraper
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscraper.js
64 lines (51 loc) · 1.73 KB
/
scraper.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
/**
* Scrape job title, url, and location from Taleo jobs page at https://l3com.taleo.net/careersection/l3_ext_us/jobsearch.ftl
*
* Usage: $ casperjs scraper.js
*/
var casper = require("casper").create({
pageSettings: {
userAgent: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:23.0) Gecko/20130404 Firefox/23.0"
}
});
var url = 'https://l3com.taleo.net/careersection/l3_ext_us/jobsearch.ftl';
var currentPage = 1;
var jobs = [];
var terminate = function() {
this.echo("Exiting..").exit();
};
// Return the current page by looking for the disabled page number link in the pager
function getSelectedPage() {
var el = document.querySelector('li[class="navigation-link-disabled"]');
return parseInt(el.textContent);
}
function getJobs() {
var rows = document.querySelectorAll('table#jobs tr[id^="job"]');
var jobs = [];
for (var i = 0, row; row = rows[i]; i++) {
var a = row.cells[1].querySelector('a[href*="jobdetail.ftl?job="]');
var l = row.cells[2].querySelector('span');
var job = {};
job['title'] = a.innerText;
job['url'] = a.getAttribute('href');
job['location'] = l.innerText;
jobs.push(job);
}
return jobs;
}
var processPage = function() {
jobs = this.evaluate(getJobs);
require('utils').dump(jobs);
if (currentPage >= 3 || !this.exists("table#jobs")) {
return terminate.call(casper);
}
currentPage++;
this.thenClick("div#jobPager a#next").then(function() {
this.waitFor(function() {
return currentPage === this.evaluate(getSelectedPage);
}, processPage, terminate);
});
};
casper.start(url);
casper.waitForSelector('table#jobs', processPage, terminate);
casper.run();