-
Notifications
You must be signed in to change notification settings - Fork 37
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #87 from tarrow/updateurldownloader
Downloads refactor - use a common downloader across all APIs
- Loading branch information
Showing
3 changed files
with
116 additions
and
160 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
var util = require('util') | ||
, fs = require('fs') | ||
, chalk = require('chalk') | ||
, got = require('got') | ||
, mkdirp = require('mkdirp') | ||
, _ = require('lodash') | ||
, ProgressBar = require('progress'); | ||
|
||
exports.downloadURLs = function(fullurlQueue, nextDlTaskcb) { | ||
var failed = []; | ||
var retries = 0; | ||
var missing = 0; | ||
|
||
urlQueue = fullurlQueue; //urlQueue needs to be global unless | ||
//we put these other functions inside | ||
//this one. | ||
|
||
//Setup ProgressBar | ||
var progmsg = 'Downloading files [:bar] :percent' + | ||
' (:current/:total) [:elapseds elapsed, eta :eta]'; | ||
var progopts = { | ||
total: fullurlQueue.length, | ||
width: 30, | ||
complete: chalk.green('=') | ||
}; | ||
var dlprogress = new ProgressBar(progmsg, progopts); | ||
|
||
for(i=0; i<10; i++) { | ||
nextUrlTask(urlQueue); //spawn 10 workers | ||
} | ||
|
||
function nextUrlTask() { | ||
if (urlQueue instanceof Array && urlQueue.length > 0) { | ||
var urlObj = urlQueue.splice(0,1)[0]; | ||
downloadURL(urlObj); | ||
} | ||
} | ||
|
||
function downloadURL(urlObj) { | ||
var url = urlObj.url; | ||
var id = urlObj.id; | ||
var type = urlObj.type; | ||
var rename = urlObj.rename; | ||
var base = id + '/'; | ||
log.debug('Creating directory: ' + base); | ||
mkdirp.sync(base); | ||
log.debug('Downloading ' + type + ': ' + url); | ||
var options = { | ||
timeout: 15000, | ||
encoding: null, | ||
retries: 3 | ||
} | ||
|
||
var get = got(url, options, function(err, data, res) { | ||
dlprogress.tick(); | ||
if (err) { | ||
if (err.code === 'ETIMEDOUT' || err.code === 'ESOCKETTIMEDOUT') { | ||
log.warn('Download timed out for URL ' + url); | ||
} | ||
if (!res) { | ||
failed.push(url); | ||
} else if ((res.statusCode == 404) && !(fourohfour === null)) { | ||
fourohfour(); | ||
} else { | ||
failed.push(url); | ||
} | ||
done(); | ||
} else { | ||
fs.writeFile(base + rename, data, done); | ||
} | ||
nextUrlTask(urlQueue); | ||
}); | ||
} | ||
|
||
var donefunc = function() { | ||
if (failed.length > 0) { | ||
log.warn(failed.length + ' downloads timed out on retry.'); | ||
} else if (missing > 0) { | ||
var succeeded = urls.length - missing; | ||
var suffix = missing > 1 ? 's' : '' | ||
log.info(succeeded + ' downloads succeeded. ' + missing + | ||
' paper' + suffix + ' had URLs that could not be reached (404 error).'); | ||
} else { | ||
log.info('All downloads succeeded!'); | ||
} | ||
nextDlTaskcb(); | ||
} | ||
|
||
var done = _.after(urls.length, donefunc); | ||
|
||
var fourohfour = function() { | ||
missing ++; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters