-
Notifications
You must be signed in to change notification settings - Fork 0
/
scraper.js
107 lines (90 loc) · 3.99 KB
/
scraper.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import puppeteer from "puppeteer";
import fs, { appendFile } from "fs";
console.time("dbsave");
var start = process.hrtime();
var elapsed_time = function (note) {
var precision = 3;
var elapsed = process.hrtime(start)[1] / 1000000;
console.log(process.hrtime(start)[0] + " s, " + elapsed.toFixed(precision) + " ms - " + note);
start = process.hrtime();
}
let courses = [];
let data = "";
const run = async () => {
elapsed_time("Start\n")
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto("https://www.atilim.edu.tr/tr/ects/site-courses/programlar/lisans");
//Find all programs
const programIDs = await page.evaluate(() => {
return Array.from(document.querySelectorAll("#mm-0 > div > section:nth-child(3) > div > div > div.col-md-9 > div > ul>li>a"), (e) => e.href.match(/\d+/)[0]);
});
//Visit each programs course list and find all course link
let idCounter = 1;
for (const id of programIDs) {
const p = await browser.newPage();
await p.goto(`https://www.atilim.edu.tr/en/ects/site-courses/${id}/info/Courses`);
const programName = await p.evaluate(() => document.querySelector(".header-title .container > div >h1").innerText.replace("ECTS - - ", ""));
console.log(`(${idCounter++}/${programIDs.length}) ${programName} ✔`);
let newCourses = await p.evaluate(() =>
Array.from(document.querySelectorAll(".panel.panel-default"), (course) => (
{
link: course.querySelector("a").href,
shortName: course.querySelector("strong").innerHTML,
fullName: course.querySelector("small.colorRed").innerHTML
}
))
);
courses = courses.concat(newCourses)
}
//Filter
const finalCourseList = [];
for (const course of courses) {
let duplicate = finalCourseList.some(c => course.shortName === c.shortName);
let electiveCourse = course.fullName.includes("Elective");
let hasShortName = course.shortName != "";
if (!duplicate && !electiveCourse && hasShortName) {
finalCourseList.push(course);
}
}
//Sort
finalCourseList.sort((a, b) => (a.shortName < b.shortName) ? -1 : ((a.shortName > b.shortName) ? 1 : 0));
console.log("--------------------------------------------------")
data += "["
//Mark the Date
const lastUpadted = new Date();
data += JSON.stringify(lastUpadted.toDateString()) + ","
//Visit each Course Page And Print Gradings
for (const course in finalCourseList) {
const coursePage = await browser.newPage();
await coursePage.goto(finalCourseList[course].link);
let gradings = await coursePage.evaluate(() =>
Array.from(document.querySelectorAll(".detail-container.ects div:nth-child(5) > table > tbody > tr"), (grading) => {
const row = grading.querySelectorAll("td");
let type = row[0].innerText;
const number = (row[1].innerText == "-") ? null : Number(row[1].innerText);
const percentage = (row[2].innerText == "-") ? null : Number(row[2].innerText);
if (number != null || percentage != null) {
return {
type: type,
number: number,
percentage: percentage
}
}
})
);
//Discard gradings which is null
gradings = gradings.filter(e => e != null);
//Update the array with gradings
finalCourseList[course].gradings = gradings;
console.log(`(${course}/${finalCourseList.length}) ${finalCourseList[course].shortName} ✔`);
data += (JSON.stringify(finalCourseList[course]) + ",")
coursePage.close();
}
data = data.slice(0, -1);
data += "]"
fs.writeFileSync("data.json",data)
await browser.close();
elapsed_time("Finish");
}
run();