This repository has been archived by the owner on Jun 7, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 3
/
get-data.js
68 lines (61 loc) · 1.85 KB
/
get-data.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
var request = require('request')
, cheerio = require('cheerio')
, fs = require('fs')
, outfilePrefix = 'app/data/'
, sources = [ 'http://hartapoliticii.ro/?c=camera+deputatilor+2009&cid=11&sid=1',
'http://hartapoliticii.ro/?c=senat+2009&cid=12&sid=1'
];
sources.forEach(function (source) {
request({ uri : source }, function (error, response, body) {
var $ = cheerio.load(body)
, $table;
var title = $('title').text().indexOf("Senat") !== -1 ? 'senate' : 'deputies';
$('table').each(function (idx, table) {
// cheerio doesn't support selecting immediate
// descendants, neither by '>' or by '.children()'
// https://github.com/MatthewMueller/cheerio/issues/17
// a workaround is to check for nested tables and skip
// it actually seems that cheerio is crap
// each -> return false doesn't work
// find also matches the current element
// hardcoding the index, TODO: report the bugs
if (idx === 3) {
$table = $(table);
return false;
}
});
if (!$table) {
throw new Error("The list of politicians could not be found.");
}
var everybody = [];
$table.find('tr').each(function (idx, row) {
var entry = {};
// skip the first few rows
if (idx < 3) return true;
$(row).find('td').each(function (idx, cell) {
switch (idx) {
case 0:
return true;
case 1:
entry.name = $(cell).find('a').text();
break;
case 2:
entry.circumscription = $(cell).text();
case 3:
entry.group = $(cell).find('span a').text();
break;
case 4:
entry.attendance = $(cell).text().split('%')[0].trim();
break;
case 5:
entry.rebel = $(cell).text().slice(0, -1);
break;
default:
return true;
}
});
everybody.push(entry);
});
fs.writeFile(outfilePrefix + title + ".json", JSON.stringify(everybody), function (err) {});
});
});