Skip to content

Commit

Permalink
limit interrupt fix: after self-interrupting, only look at local pend…
Browse files Browse the repository at this point in the history
…ing list (for redis state)

logging: don't log CF check errors, do log when errorCount is reset
  • Loading branch information
ikreymer committed May 19, 2022
1 parent 6ec47cd commit 70ba924
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 7 deletions.
10 changes: 7 additions & 3 deletions crawler.js
Original file line number Diff line number Diff line change
Expand Up @@ -412,7 +412,7 @@ class Crawler {
}

if (interrupt) {
this.crawlState.setDrain();
this.crawlState.setDrain(true);
this.exitCode = 11;
}
}
Expand Down Expand Up @@ -625,7 +625,10 @@ class Crawler {

try {
await page.goto(url, gotoOpts);
this.errorCount = 0;
if (this.errorCount > 0) {
this.statusLog(`Page loaded, resetting error count ${this.errorCount} to 0`);
this.errorCount = 0;
}
} catch (e) {
let msg = e.message || "";
if (!msg.startsWith("net::ERR_ABORTED") || !ignoreAbort) {
Expand Down Expand Up @@ -719,7 +722,8 @@ class Crawler {
await this.sleep(5500);
}
} catch (e) {
console.warn(e);
//console.warn("Check CF failed, ignoring");
//console.warn(e);
}
}

Expand Down
18 changes: 14 additions & 4 deletions util/state.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,12 @@ class BaseState
{
constructor() {
this.drainMax = 0;
this.localOnly = false;
}

async setDrain() {
this.drainMax = (await this.numPending()) + (await this.numDone());
async setDrain(localOnly = false) {
this.drainMax = (await this.numRealPending()) + (await this.numDone());
this.localOnly = localOnly;
}

async size() {
Expand All @@ -29,6 +31,14 @@ class BaseState

return seed.isIncluded(data.url, data.depth, data.extraHops);
}

numPending(localPending = 0) {
if (this.localOnly) {
return localPending;
}

return this.numRealPending();
}
}


Expand Down Expand Up @@ -146,7 +156,7 @@ class MemoryCrawlState extends BaseState
return this.seenList.size;
}

async numPending() {
async numRealPending() {
return this.pending.size;
}
}
Expand Down Expand Up @@ -386,7 +396,7 @@ return 0;
return await this.redis.scard(this.skey);
}

async numPending() {
async numRealPending() {
const res = await this.redis.hlen(this.pkey);

// reset pendings
Expand Down

0 comments on commit 70ba924

Please sign in to comment.