Skip to content

Commit

Permalink
Add scraping folder and data folder
Browse files Browse the repository at this point in the history
  • Loading branch information
Atticus64 committed Jan 6, 2023
1 parent 6a2349f commit de01225
Show file tree
Hide file tree
Showing 4 changed files with 2,903 additions and 63 deletions.
1 change: 1 addition & 0 deletions deno.jsonc
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
{
"tasks": {
"dev": "deno run --watch --allow-net src/main.ts",
"scrape": "deno run --allow-net --allow-write ./src/scraping/jobs.ts",
"update-lock": "deno cache --lock=deno.lock --lock-write src/deps.ts"
},
"fmt": {
Expand Down
69 changes: 6 additions & 63 deletions src/controllers/jobs.controller.ts
Original file line number Diff line number Diff line change
@@ -1,65 +1,8 @@
import { Context } from '../deps.ts';
import { ScraperService } from '../services/Scraper.service.ts';
import { Context } from "../deps.ts";
import jsonJobs from "../data/jobs.json" assert { "type": "json" };

export async function getJobs(ctx: Context): Promise<void> {
const scraper = new ScraperService('https://www.getonbrd.com');
const $ = await scraper.execute('/empleos/programacion');

const jobsResultList = $(
'body #right-col .main-container ul.sgb-results-list>div',
);

const jobs = $(jobsResultList).map((_i, el) => {
let elCheerio = $(el);
elCheerio = elCheerio.children('a');

const elInfo = elCheerio
.children('.gb-results-list__main')
.children('.gb-results-list__info');

// const logo = elCheerio
// .children('.gb-results-list__main')
// .children('.gb-results-list__avatar')
// .children('img.gb-results-list__img')
// .attr('src');

const title: string = elInfo
.children('.gb-results-list__title')
.children('strong')
.text();

const [role, time]: string[] = elInfo
.children('.gb-results-list__title')
.children('span')
.text()
.split('|')
.map((t) => t.trim());

const textInfo: string = elInfo
.children('div')
.text();

const [companyName, ...locations]: string[] = textInfo
.split('\n')
.filter((t) => t);

const location = locations
.filter((_t, i) => i > 0)
.join(' ');

const url: string = elCheerio.attr('href') || '';

return {
title,
role,
time,
companyName,
location,
url,
};
}).toArray();

ctx.response.body = {
jobs,
};
export function getJobs(ctx: Context) {
ctx.response.body = {
jsonJobs,
};
}
Loading

0 comments on commit de01225

Please sign in to comment.