Skip to content

Commit

Permalink
Merge pull request #1 from ContentMine/master
Browse files Browse the repository at this point in the history
Merge new arxiv work
  • Loading branch information
tarrow committed Feb 24, 2016
2 parents 34641bd + 6af72a0 commit 3693bae
Showing 1 changed file with 21 additions and 13 deletions.
34 changes: 21 additions & 13 deletions lib/arxiv.js
Original file line number Diff line number Diff line change
Expand Up @@ -23,16 +23,15 @@ ArXiv.prototype.search = function(query) {
log.warn("The ArXiv API does not provide fulltext XML, so the --xml flag will be ignored");
}

options = {
max_results: 10
};
var options = {};

arxiv.queryurl = arxiv.buildQuery(query, options);
arxiv.first = true;
arxiv.hitcount = 0;
arxiv.allresults = [];
arxiv.iter = 0;

arxiv.pagesize = 500;
arxiv.page_delay = 3000; // miliseconds to wait between requests
arxiv.pageQuery();

}
Expand All @@ -43,13 +42,13 @@ ArXiv.prototype.pageQuery = function() {

var thisQueryUrl = arxiv.queryurl;

if (arxiv.iter > 0) {
var pageterm = '&start=' + arxiv.iter;
thisQueryUrl += pageterm;
}
var pageterm =
'&start=' + arxiv.iter +
'&max_results=' + arxiv.pagesize;
thisQueryUrl += pageterm;

log.debug(thisQueryUrl);
var rq = rest.get(thisQueryUrl, {timeout: 20000, parser: rest.parsers.xml});
var rq = rest.get(thisQueryUrl, {timeout: 40000, parser: rest.parsers.xml});
rq.on('complete', arxiv.completeCallback.bind(arxiv));
rq.on('timeout', arxiv.timeoutCallback);

Expand Down Expand Up @@ -82,13 +81,22 @@ ArXiv.prototype.completeCallback = function(data) {

}

var result = data.feed.entry;
if (data && data.feed && data.feed.entry) {
var result = data.feed.entry;
} else {
log.error('Malformed response from arXiv API - no data in feed');
log.debug(data);
log.info('Retrying failed request');
setTimeout(arxiv.pageQuery.bind(arxiv), arxiv.page_delay);
return;
}
log.debug('Got', result.length, 'results in this page');
arxiv.allresults = arxiv.allresults.concat(result);
arxiv.pageprogress.tick(result.length);

if (arxiv.allresults.length < arxiv.hitcount) {
arxiv.iter += 1;
arxiv.pageQuery();
arxiv.iter += arxiv.pagesize;
setTimeout(arxiv.pageQuery.bind(arxiv), arxiv.page_delay);
} else {
log.info('Done collecting results');
arxiv.handleSearchResults(arxiv);
Expand Down Expand Up @@ -294,7 +302,7 @@ ArXiv.prototype.downloadUrls = function(urls, type, rename, failed,
mkdirp.sync(base);
log.debug('Downloading ' + type + ': ' + url);
var options = {
timeout: 15000,
timeout: 40000,
encoding: null
}
var get = got(url, options, function(err, data, res) {
Expand Down

0 comments on commit 3693bae

Please sign in to comment.