Skip to content
This repository has been archived by the owner on Jun 3, 2021. It is now read-only.

Commit

Permalink
fix(crawler): deal with anti crawlers
Browse files Browse the repository at this point in the history
  • Loading branch information
beetcb committed Feb 14, 2021
1 parent b6e9615 commit 39aea2d
Showing 1 changed file with 8 additions and 5 deletions.
13 changes: 8 additions & 5 deletions crawler/casLogIn.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@ const log = require('../interface/colorLog')
const ocr = require('./captcha')

const headers = {
'cache-control': 'max-age=0',
'Cache-control': 'max-age=0',
'Accept-Encoding': 'gzip, deflate',
connection: 'keep-alive',
Connection: 'keep-alive',
'Upgrade-Insecure-Requests': '1',
'user-agent':
'User-agent':
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36',
}

Expand All @@ -29,10 +29,13 @@ module.exports = async (school, user) => {

const name = user.alias || user.username

headers.referer = school.login
// deal with anti crawlers
headers.Referer = school.login
headers.Origin = school.origin
headers.Host = school.origin.replace(/http(s?)\:\/\//, '')

// get base session -> cookie
res = await fetch(school.login, { headers })
headers.res = await fetch(school.login, { headers })
reCook(res, 1, cookie)

// create document for crawling
Expand Down

0 comments on commit 39aea2d

Please sign in to comment.