Skip to content

Commit

Permalink
support user agent rotating
Browse files Browse the repository at this point in the history
  • Loading branch information
mike442144 committed Jun 12, 2015
1 parent f507c79 commit 7b10d7d
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 6 deletions.
20 changes: 15 additions & 5 deletions lib/crawler.js
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,8 @@ Crawler.prototype.init = function init (options) {
retries: 3,
retryTimeout: 10000,
timeout: 15000,
skipDuplicates: false
skipDuplicates: false,
rotateUA: false
};

//return defaultOptions with overriden properties from options.
Expand Down Expand Up @@ -115,6 +116,7 @@ Crawler.prototype.init = function init (options) {
});

self.on("request",function(options){

if(_.isFunction(self.options.preRequest)){
self.options.preRequest(options);
}
Expand Down Expand Up @@ -315,7 +317,16 @@ Crawler.prototype._buildHttpRequest = function _buildHTTPRequest (options) {
}

if (ropts.userAgent) {
ropts.headers['User-Agent'] = ropts.userAgent;
if(ropts.rotateUA && _.isArray(ropts.userAgent)){
ropts.headers['User-Agent'] = ropts.userAgent[0];
// If "rotateUA" is true, rotate User-Agent
options.userAgent.push(options.userAgent.shift());
}else{
ropts.headers['User-Agent'] = ropts.userAgent;
}
if(options.debug){
logger.info(ropts.headers['User-Agent']);
}
}
if (ropts.referer) {
ropts.headers.Referer = ropts.referer;
Expand Down Expand Up @@ -363,8 +374,7 @@ Crawler.prototype._onContent = function _onContent (error, options, response, fr
if (options.proxies) {
options.proxies.push(options.proxies.shift());
}

self.queue(options);
self.queue(options);
},options.retryTimeout);

} else if (options.callback) {
Expand Down Expand Up @@ -490,4 +500,4 @@ Crawler.prototype._onInject = function _onInject (errors, options, response, $)
};

module.exports = Crawler;
module.exports.VERSION = '0.4.8';
module.exports.VERSION = '0.4.9';
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "node-webcrawler",
"version": "0.4.8",
"version": "0.4.9",
"description": "Crawler is a web spider written with Nodejs. It gives you the full power of jQuery on the server to parse a big number of pages as they are downloaded, asynchronously",
"main": "./lib/crawler.js",
"directories": {
Expand Down

0 comments on commit 7b10d7d

Please sign in to comment.