-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
refactor in /scraping/jobs and add static types declarations in /type…
…s.ts - add more information of jobs in /scraping/jobs - create new folder /dto and file Response.dto - rename deno.jsonc -> deno.json - update deno.lock - implement dto in jobs.controller
- Loading branch information
1 parent
ff70948
commit 57845e3
Showing
8 changed files
with
175 additions
and
121 deletions.
There are no files selected for viewing
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,7 @@ | ||
{ | ||
"imports": { | ||
"$src/": "./src/", | ||
"$deps": "./deps.ts" | ||
"$deps": "./deps.ts", | ||
"$types": "./src/types.ts" | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,12 +0,0 @@ | ||
type TJob = { | ||
title: string | ||
role: string | ||
time: string | ||
postulationFast: boolean | ||
companyName: string | ||
location: string | ||
url: string | ||
perks: string[] | ||
isNew: boolean | ||
hasPublishedSalary: boolean | ||
} | ||
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,9 @@ | ||
import { Context } from '$deps' | ||
import jobs from '$src/data/jobs.json' assert { 'type': 'json' } | ||
import { ResponseDto } from '$src/dto/Response.dto.ts' | ||
import { IJob } from '$types' | ||
|
||
export function getJobs(ctx: Context): void { | ||
ctx.response.body = { | ||
total: jobs.length, | ||
jobs, | ||
} | ||
const data = new ResponseDto(jobs as IJob[]) | ||
ctx.response.body = data | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
import type { IJob } from '$types' | ||
|
||
export class ResponseDto { | ||
readonly jobs: IJob[] | ||
readonly total: number | ||
|
||
constructor(jobs: IJob[]) { | ||
this.jobs = jobs | ||
this.total = jobs.length | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,118 +1,155 @@ | ||
import { Cheerio, CheerioAPI, Element } from '$deps' | ||
import { ConfigService } from '$src/services/Config.service.ts' | ||
import { ScraperService } from '$src/services/Scraper.service.ts' | ||
import type { IJob, TJobDetails, TJobPartial } from '$types' | ||
|
||
export async function scrapeJobs(): Promise<void> { | ||
const config = new ConfigService() | ||
const scraper = new ScraperService() | ||
|
||
await config.load() | ||
const $ = await scraper.execute('/empleos/programacion') | ||
const baseUrl: string = config.get('URL_GETONBRD') || 'https://www.getonbrd.com' | ||
const $ = await scraper.execute(`${baseUrl}/empleos/programacion`) | ||
|
||
const jobsResultList = $( | ||
const sgbResultsList = $( | ||
'body #right-col .main-container ul.sgb-results-list>div', | ||
) | ||
const jobPartialList = getJobPartialList(sgbResultsList, $) | ||
const cheerioResponses = await Promise.allSettled( | ||
jobPartialList.map((job: TJobPartial) => scraper.execute(job.url)), | ||
) | ||
const jobs: IJob[] = [] | ||
|
||
for (let i = 0; i < cheerioResponses.length; i++) { | ||
const cheerioResponse = cheerioResponses[i] | ||
|
||
const jobs = getDataJobs(jobsResultList, $) | ||
if (cheerioResponse.status === 'fulfilled') { | ||
const jobPartial = jobPartialList[i] | ||
const $ = cheerioResponse.value | ||
const details = getJobDetails($) | ||
|
||
jobs.push({ | ||
...jobPartial, | ||
details, | ||
}) | ||
} | ||
} | ||
|
||
Deno.writeTextFile('./src/data/jobs.json', JSON.stringify(jobs, null, 2)) | ||
} | ||
|
||
const getDataJobs = ( | ||
jobsResultList: Cheerio<Element>, | ||
$: CheerioAPI, | ||
): TJob[] => { | ||
const jobs = $(jobsResultList) | ||
.map((_i: number, el: Element) => { | ||
let elCheerio = $(el) | ||
elCheerio = elCheerio.children('a') | ||
|
||
const elInfo = elCheerio | ||
.children('.gb-results-list__main') | ||
.children('.gb-results-list__info') | ||
|
||
// const logo = elCheerio | ||
// .children('.gb-results-list__main') | ||
// .children('.gb-results-list__avatar') | ||
// .children('img.gb-results-list__img') | ||
// .attr('src'); | ||
|
||
let title: string = elInfo | ||
.children('.gb-results-list__title') | ||
.children('strong') | ||
.text() | ||
|
||
// Remover caracteres especiales o emojis al inicio del título | ||
title = title.substring(title.search(/\w{1}/)) | ||
|
||
const [role, time]: string[] = elInfo | ||
.children('.gb-results-list__title') | ||
.children('span') | ||
.text() | ||
.split('|') | ||
.map((t: string) => t.trim()) | ||
|
||
const postulationFast: boolean = elInfo | ||
.children('.gb-results-list__title') | ||
.children('i') | ||
.hasClass('fa-bolt') | ||
|
||
const textInfo: string = elInfo | ||
.children('div') | ||
.text() | ||
|
||
const [companyName, ...locations]: string[] = textInfo | ||
.split('\n') | ||
.filter((t) => t) | ||
|
||
const location: string = locations | ||
.join(' ') | ||
.trim() | ||
.replace(/^\w{1}/, (l) => l.toUpperCase()) | ||
|
||
const url: string = elCheerio.attr('href') || '' | ||
|
||
const perks: string[] = [] | ||
elCheerio | ||
.children('.gb-results-list__secondary') | ||
.children('.gb-perks-list') | ||
.children('i') | ||
.each((_i, el) => { | ||
const elIcon = $(el) | ||
const className = elIcon.attr('class')!.split('perk-')[1] | ||
|
||
if (elIcon.hasClass(`perk-${className}`)) { | ||
perks.push(className.replaceAll('_', ' ')) | ||
} | ||
}) | ||
|
||
const elBadges = elCheerio | ||
.children('.gb-results-list__secondary') | ||
.children('.gb-results-list__badges') | ||
|
||
const isNew: boolean = elBadges | ||
.children('span') | ||
.hasClass('badge') | ||
|
||
const hasPublishedSalary: boolean = elBadges | ||
.children('i') | ||
.hasClass('fa-money') | ||
|
||
return { | ||
title, | ||
role, | ||
time, | ||
postulationFast, | ||
companyName, | ||
location, | ||
url, | ||
perks, | ||
isNew, | ||
hasPublishedSalary, | ||
} | ||
}).toArray() | ||
function getJobPartialList(jobsResultList: Cheerio<Element>, $: CheerioAPI): TJobPartial[] { | ||
const jobs = $(jobsResultList).map((_i: number, el: Element) => { | ||
const elCheerio = $(el).children('a') | ||
|
||
const elInfo = elCheerio | ||
.children('.gb-results-list__main') | ||
.children('.gb-results-list__info') | ||
|
||
// const logo = elCheerio | ||
// .children('.gb-results-list__main') | ||
// .children('.gb-results-list__avatar') | ||
// .children('img.gb-results-list__img') | ||
// .attr('src'); | ||
|
||
let title: string = elInfo | ||
.children('.gb-results-list__title') | ||
.children('strong') | ||
.text() | ||
|
||
// Remover caracteres especiales o emojis al inicio del título | ||
title = title.substring(title.search(/\w{1}/)) | ||
|
||
const [role, time]: string[] = elInfo | ||
.children('.gb-results-list__title') | ||
.children('span') | ||
.text() | ||
.split('|') | ||
.map((t: string) => t.trim()) | ||
|
||
const postulationFast: boolean = elInfo | ||
.children('.gb-results-list__title') | ||
.children('i') | ||
.hasClass('fa-bolt') | ||
|
||
const textInfo: string = elInfo | ||
.children('div') | ||
.text() | ||
|
||
const [companyName, ...locations]: string[] = textInfo | ||
.split('\n') | ||
.filter((t) => t) | ||
|
||
const location: string = locations | ||
.join(' ') | ||
.trim() | ||
.replace(/^\w{1}/, (l) => l.toUpperCase()) | ||
|
||
const url: string = elCheerio.attr('href') || '' | ||
|
||
const perks: string[] = [] | ||
elCheerio | ||
.children('.gb-results-list__secondary') | ||
.children('.gb-perks-list') | ||
.children('i') | ||
.each((_i, el) => { | ||
const elIcon = $(el) | ||
const className = elIcon.attr('class')!.split('perk-')[1] | ||
|
||
if (elIcon.hasClass(`perk-${className}`)) { | ||
perks.push(className.replaceAll('_', ' ')) | ||
} | ||
}) | ||
|
||
const elBadges = elCheerio | ||
.children('.gb-results-list__secondary') | ||
.children('.gb-results-list__badges') | ||
|
||
const isNew: boolean = elBadges | ||
.children('span') | ||
.hasClass('badge') | ||
|
||
const hasPublishedSalary: boolean = elBadges | ||
.children('i') | ||
.hasClass('fa-money') | ||
|
||
return { | ||
title, | ||
role, | ||
time, | ||
postulationFast, | ||
companyName, | ||
location, | ||
url, | ||
perks, | ||
isNew, | ||
hasPublishedSalary, | ||
} | ||
}).toArray() | ||
|
||
return jobs | ||
} | ||
|
||
function getJobDetails($: CheerioAPI): TJobDetails { | ||
const rightCol = $('body #right-col') | ||
|
||
const isThemeColored = rightCol.children().hasClass('gb-company-theme-colored') | ||
const gbCompanyTheme = isThemeColored | ||
? rightCol.children('.gb-company-theme-colored') | ||
: rightCol.children('.gb-company-theme-clean') | ||
|
||
const postulationsText = gbCompanyTheme | ||
.children('.gb-landing-cover') | ||
.children('.gb-container') | ||
.children('.full-width') | ||
.children('.size0') | ||
.text() | ||
.match(/\d+/) | ||
|
||
const postulations = postulationsText ? Number(postulationsText[0]) : 0 | ||
|
||
return { | ||
postulations, | ||
} | ||
} | ||
|
||
scrapeJobs() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
export interface IJob extends TJobPartial { | ||
details: TJobDetails | ||
} | ||
|
||
export type TJobPartial = { | ||
title: string | ||
role: string | ||
time: string | ||
postulationFast: boolean | ||
companyName: string | ||
location: string | ||
url: string | ||
perks: string[] | ||
isNew: boolean | ||
hasPublishedSalary: boolean | ||
} | ||
|
||
export type TJobDetails = { | ||
postulations: number | ||
} | ||
|
||
export type TResponseAPI = import('$src/dto/Response.dto.ts').ResponseDto |