Skip to content

Commit

Permalink
refactor in /scraping/jobs and add static types declarations in /type…
Browse files Browse the repository at this point in the history
…s.ts

- add more information of jobs in /scraping/jobs
- create new folder /dto and file Response.dto
- rename deno.jsonc -> deno.json
- update deno.lock
- implement dto in jobs.controller
  • Loading branch information
EdixonAlberto committed Feb 11, 2023
1 parent ff70948 commit 57845e3
Show file tree
Hide file tree
Showing 8 changed files with 175 additions and 121 deletions.
File renamed without changes.
3 changes: 2 additions & 1 deletion import_map.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
{
"imports": {
"$src/": "./src/",
"$deps": "./deps.ts"
"$deps": "./deps.ts",
"$types": "./src/types.ts"
}
}
12 changes: 0 additions & 12 deletions src/@types/index.d.ts
Original file line number Diff line number Diff line change
@@ -1,12 +0,0 @@
type TJob = {
title: string
role: string
time: string
postulationFast: boolean
companyName: string
location: string
url: string
perks: string[]
isNew: boolean
hasPublishedSalary: boolean
}
8 changes: 4 additions & 4 deletions src/controllers/jobs.controller.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import { Context } from '$deps'
import jobs from '$src/data/jobs.json' assert { 'type': 'json' }
import { ResponseDto } from '$src/dto/Response.dto.ts'
import { IJob } from '$types'

export function getJobs(ctx: Context): void {
ctx.response.body = {
total: jobs.length,
jobs,
}
const data = new ResponseDto(jobs as IJob[])
ctx.response.body = data
}
11 changes: 11 additions & 0 deletions src/dto/Response.dto.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import type { IJob } from '$types'

export class ResponseDto {
readonly jobs: IJob[]
readonly total: number

constructor(jobs: IJob[]) {
this.jobs = jobs
this.total = jobs.length
}
}
231 changes: 134 additions & 97 deletions src/scraping/jobs.ts
Original file line number Diff line number Diff line change
@@ -1,118 +1,155 @@
import { Cheerio, CheerioAPI, Element } from '$deps'
import { ConfigService } from '$src/services/Config.service.ts'
import { ScraperService } from '$src/services/Scraper.service.ts'
import type { IJob, TJobDetails, TJobPartial } from '$types'

export async function scrapeJobs(): Promise<void> {
const config = new ConfigService()
const scraper = new ScraperService()

await config.load()
const $ = await scraper.execute('/empleos/programacion')
const baseUrl: string = config.get('URL_GETONBRD') || 'https://www.getonbrd.com'
const $ = await scraper.execute(`${baseUrl}/empleos/programacion`)

const jobsResultList = $(
const sgbResultsList = $(
'body #right-col .main-container ul.sgb-results-list>div',
)
const jobPartialList = getJobPartialList(sgbResultsList, $)
const cheerioResponses = await Promise.allSettled(
jobPartialList.map((job: TJobPartial) => scraper.execute(job.url)),
)
const jobs: IJob[] = []

for (let i = 0; i < cheerioResponses.length; i++) {
const cheerioResponse = cheerioResponses[i]

const jobs = getDataJobs(jobsResultList, $)
if (cheerioResponse.status === 'fulfilled') {
const jobPartial = jobPartialList[i]
const $ = cheerioResponse.value
const details = getJobDetails($)

jobs.push({
...jobPartial,
details,
})
}
}

Deno.writeTextFile('./src/data/jobs.json', JSON.stringify(jobs, null, 2))
}

const getDataJobs = (
jobsResultList: Cheerio<Element>,
$: CheerioAPI,
): TJob[] => {
const jobs = $(jobsResultList)
.map((_i: number, el: Element) => {
let elCheerio = $(el)
elCheerio = elCheerio.children('a')

const elInfo = elCheerio
.children('.gb-results-list__main')
.children('.gb-results-list__info')

// const logo = elCheerio
// .children('.gb-results-list__main')
// .children('.gb-results-list__avatar')
// .children('img.gb-results-list__img')
// .attr('src');

let title: string = elInfo
.children('.gb-results-list__title')
.children('strong')
.text()

// Remover caracteres especiales o emojis al inicio del título
title = title.substring(title.search(/\w{1}/))

const [role, time]: string[] = elInfo
.children('.gb-results-list__title')
.children('span')
.text()
.split('|')
.map((t: string) => t.trim())

const postulationFast: boolean = elInfo
.children('.gb-results-list__title')
.children('i')
.hasClass('fa-bolt')

const textInfo: string = elInfo
.children('div')
.text()

const [companyName, ...locations]: string[] = textInfo
.split('\n')
.filter((t) => t)

const location: string = locations
.join(' ')
.trim()
.replace(/^\w{1}/, (l) => l.toUpperCase())

const url: string = elCheerio.attr('href') || ''

const perks: string[] = []
elCheerio
.children('.gb-results-list__secondary')
.children('.gb-perks-list')
.children('i')
.each((_i, el) => {
const elIcon = $(el)
const className = elIcon.attr('class')!.split('perk-')[1]

if (elIcon.hasClass(`perk-${className}`)) {
perks.push(className.replaceAll('_', ' '))
}
})

const elBadges = elCheerio
.children('.gb-results-list__secondary')
.children('.gb-results-list__badges')

const isNew: boolean = elBadges
.children('span')
.hasClass('badge')

const hasPublishedSalary: boolean = elBadges
.children('i')
.hasClass('fa-money')

return {
title,
role,
time,
postulationFast,
companyName,
location,
url,
perks,
isNew,
hasPublishedSalary,
}
}).toArray()
function getJobPartialList(jobsResultList: Cheerio<Element>, $: CheerioAPI): TJobPartial[] {
const jobs = $(jobsResultList).map((_i: number, el: Element) => {
const elCheerio = $(el).children('a')

const elInfo = elCheerio
.children('.gb-results-list__main')
.children('.gb-results-list__info')

// const logo = elCheerio
// .children('.gb-results-list__main')
// .children('.gb-results-list__avatar')
// .children('img.gb-results-list__img')
// .attr('src');

let title: string = elInfo
.children('.gb-results-list__title')
.children('strong')
.text()

// Remover caracteres especiales o emojis al inicio del título
title = title.substring(title.search(/\w{1}/))

const [role, time]: string[] = elInfo
.children('.gb-results-list__title')
.children('span')
.text()
.split('|')
.map((t: string) => t.trim())

const postulationFast: boolean = elInfo
.children('.gb-results-list__title')
.children('i')
.hasClass('fa-bolt')

const textInfo: string = elInfo
.children('div')
.text()

const [companyName, ...locations]: string[] = textInfo
.split('\n')
.filter((t) => t)

const location: string = locations
.join(' ')
.trim()
.replace(/^\w{1}/, (l) => l.toUpperCase())

const url: string = elCheerio.attr('href') || ''

const perks: string[] = []
elCheerio
.children('.gb-results-list__secondary')
.children('.gb-perks-list')
.children('i')
.each((_i, el) => {
const elIcon = $(el)
const className = elIcon.attr('class')!.split('perk-')[1]

if (elIcon.hasClass(`perk-${className}`)) {
perks.push(className.replaceAll('_', ' '))
}
})

const elBadges = elCheerio
.children('.gb-results-list__secondary')
.children('.gb-results-list__badges')

const isNew: boolean = elBadges
.children('span')
.hasClass('badge')

const hasPublishedSalary: boolean = elBadges
.children('i')
.hasClass('fa-money')

return {
title,
role,
time,
postulationFast,
companyName,
location,
url,
perks,
isNew,
hasPublishedSalary,
}
}).toArray()

return jobs
}

function getJobDetails($: CheerioAPI): TJobDetails {
const rightCol = $('body #right-col')

const isThemeColored = rightCol.children().hasClass('gb-company-theme-colored')
const gbCompanyTheme = isThemeColored
? rightCol.children('.gb-company-theme-colored')
: rightCol.children('.gb-company-theme-clean')

const postulationsText = gbCompanyTheme
.children('.gb-landing-cover')
.children('.gb-container')
.children('.full-width')
.children('.size0')
.text()
.match(/\d+/)

const postulations = postulationsText ? Number(postulationsText[0]) : 0

return {
postulations,
}
}

scrapeJobs()
9 changes: 2 additions & 7 deletions src/services/Scraper.service.ts
Original file line number Diff line number Diff line change
@@ -1,13 +1,8 @@
import { CheerioAPI, load } from '$deps'
import { ConfigService } from './Config.service.ts'

export class ScraperService {
constructor(private readonly config = new ConfigService()) {}

public async execute(path: string): Promise<CheerioAPI> {
const baseUrl: string | undefined = this.config.get('URL_GETONBRD') || 'https://www.getonbrd.com'
const pathname: string = path.startsWith('/') ? path : `/${path}`
const response = await fetch(baseUrl + pathname)
public async execute(url: string): Promise<CheerioAPI> {
const response = await fetch(url)
const html: string = await response.text()
const $ = load(html)
return $
Expand Down
22 changes: 22 additions & 0 deletions src/types.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
export interface IJob extends TJobPartial {
details: TJobDetails
}

export type TJobPartial = {
title: string
role: string
time: string
postulationFast: boolean
companyName: string
location: string
url: string
perks: string[]
isNew: boolean
hasPublishedSalary: boolean
}

export type TJobDetails = {
postulations: number
}

export type TResponseAPI = import('$src/dto/Response.dto.ts').ResponseDto

0 comments on commit 57845e3

Please sign in to comment.