-
Notifications
You must be signed in to change notification settings - Fork 41
Explore > Jobs (data only) #942
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
years ?= 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 | ||
zips = $(foreach year,$(years),$(year).zip) | ||
csvs = $(foreach year,$(years),$(year).csv) | ||
|
||
all: $(csvs) | ||
|
||
%.zip: | ||
curl "http://www.bls.gov/cew/data/files/$*/csv/$*_annual_by_industry.zip" -o $@ | ||
|
||
%.csv: %.zip | ||
unzip $< '*.annual 21 *.csv' | ||
mv $*.annual.by_industry/*.csv $@ | ||
rm -r $*.annual.by_industry | ||
|
||
distclean: clean | ||
rm -f *.zip | ||
|
||
clean: | ||
rm -f *.csv |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,152 @@ | ||
#!/usr/bin/env node | ||
var options = require('yargs') | ||
.option('user', { | ||
desc: 'your BEA API user ID' | ||
}) | ||
.option('year', { | ||
desc: 'year or year range', | ||
default: '2004-2013' | ||
}) | ||
.option('of', { | ||
desc: 'output format (tito-compatible)', | ||
default: 'tsv' | ||
}) | ||
.option('o', { | ||
desc: 'output filename' | ||
}) | ||
.argv; | ||
|
||
var tito = require('tito').formats; | ||
var request = require('request'); | ||
var qs = require('querystring'); | ||
var _url = require('url'); | ||
var util = require('../../lib/util'); | ||
var thru = require('through2').obj; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 'thru' is defined but never used. |
||
var fs = require('fs'); | ||
var async = require('async'); | ||
var extend = require('extend'); | ||
var streamify = require('stream-array'); | ||
|
||
var ONE_MILLION = 1e6; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 'ONE_MILLION' is defined but never used. |
||
var years = util.range(options.year); | ||
|
||
var lineCodes = { | ||
total: 10, // "[SA25N] Total employment" | ||
mining: 200, // "[SA25N] Mining" | ||
}; | ||
|
||
var tables = { | ||
total: 'SA25N', // "Total Full-Time and Part-Time Employment by NAICS Industry" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Line is too long. |
||
subtotal: 'SA27N', // "Full-Time and Part-Time Wage and Salary Employment by NAICS Industry" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Line is too long. |
||
}; | ||
|
||
var params = { | ||
UserID: options.user || process.env.BEA_API_KEY, | ||
DataSetName: 'RegionalIncome', | ||
Method: 'GetData', | ||
Year: years.join(','), | ||
GeoFips: 'STATE', | ||
LineCode: lineCodes.mining | ||
}; | ||
|
||
var fetch = function(params) { | ||
var url = [ | ||
'http://www.bea.gov/api/data/', | ||
qs.stringify(params) | ||
].join('?'); | ||
console.warn('fetching:', params, '->', url); | ||
return request(url); | ||
}; | ||
|
||
var parser = function() { | ||
return tito.createReadStream('json', { | ||
path: '.BEAAPI.Results.Data.*' | ||
}); | ||
}; | ||
|
||
var tableIds = [tables.total, tables.subtotal]; | ||
|
||
async.mapSeries(tableIds, function(table, next) { | ||
params.TableName = table; | ||
|
||
var rows = []; | ||
var done = function(error) { | ||
if (error) console.warn('error?', error); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Expected '{' and instead saw 'console'. |
||
next(null, rows); | ||
}; | ||
|
||
fetch(params) | ||
.on('error', done) | ||
.pipe(parser()) | ||
.on('data', function(row) { | ||
row = mapRow(row); | ||
if (row) { | ||
console.warn('+ row:', row); | ||
rows.push(row); | ||
} | ||
}) | ||
.on('end', done); | ||
}, function(error, sets) { | ||
if (error) return console.error('error:', error); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Expected '{' and instead saw 'return'. |
||
|
||
console.warn('got %d sets', sets.length); | ||
|
||
sets[0].forEach(function(d) { | ||
d.Total = d.Value; | ||
d.Value = 0; | ||
}); | ||
|
||
var rows = sets.reduce(function(list, set) { | ||
return list.concat(set); | ||
}, []); | ||
|
||
var keys = ['Region', 'Year']; | ||
var result = util.group(rows, keys, function(group) { | ||
var total = 0; | ||
var actual = 0; | ||
|
||
group.values.forEach(function(d) { | ||
total += coerceNumber(d.Total); | ||
actual += coerceNumber(d.Value); | ||
}); | ||
|
||
return { | ||
Total: total, | ||
Actual: actual, | ||
Value: total - actual, | ||
}; | ||
}) | ||
.map(function(entry) { | ||
return extend(entry.key, entry.value); | ||
}); | ||
|
||
console.warn('got %d rows', result.length); | ||
console.warn(result[0]); | ||
|
||
streamify(result) | ||
.pipe(tito.createWriteStream(options.of)) | ||
.pipe(fs.createWriteStream(options.o || '/dev/stdout')) | ||
.on('end', function() { | ||
console.warn('wrote %d rows', result.length); | ||
}); | ||
}); | ||
|
||
function mapRow(row) { | ||
var fips = row.GeoFips.substr(0, 2); | ||
if (fips > 90) { | ||
return; | ||
} | ||
return { | ||
Region: row.GeoName, | ||
Year: row.TimePeriod, | ||
Value: row.DataValue | ||
}; | ||
} | ||
|
||
function noop(error) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 'noop' is defined but never used. |
||
console.warn('error?', error); | ||
} | ||
|
||
function coerceNumber(str) { | ||
return Number(str) || 0; | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
#!/usr/bin/env node | ||
var yargs = require('yargs'); | ||
var options = yargs | ||
.option('in-states', { | ||
desc: 'the states data file', | ||
default: '_input/geo/states.csv' | ||
}) | ||
.option('year', { | ||
desc: 'year or year range', | ||
default: '2004-2013' | ||
}) | ||
.option('of', { | ||
desc: 'output format (tito-compatible)', | ||
default: 'tsv' | ||
}) | ||
.option('o', { | ||
desc: 'output filename' | ||
}) | ||
.argv; | ||
|
||
if (options.help) { | ||
return yargs.showHelp(); | ||
} | ||
|
||
var tito = require('tito').formats; | ||
var request = require('request'); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 'request' is defined but never used. |
||
var qs = require('querystring'); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 'qs' is defined but never used. |
||
var util = require('../../lib/util'); | ||
var thru = require('through2').obj; | ||
var fs = require('fs'); | ||
var async = require('async'); | ||
var streamify = require('stream-array'); | ||
|
||
var jobsField = 'annual_avg_emplvl'; | ||
var areaAggrCode = 'agglvl_code'; | ||
var statesByFips = {}; | ||
|
||
var inputFiles = options._; | ||
|
||
var loadStates = function(done) { | ||
util.readData(options['in-states'], | ||
tito.createReadStream('csv'), | ||
function(error, states) { | ||
if (error) return done(error); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Expected '{' and instead saw 'return'. |
||
statesByFips = util.map(states, 'FIPS', true); | ||
done(); | ||
}); | ||
}; | ||
|
||
var isValidRow = function(d) { | ||
// XXX 74 is the county aggregation level | ||
return d[areaAggrCode] == 74; | ||
}; | ||
|
||
var mapRow = function(d) { | ||
var fips = d.area_fips; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Identifier 'area_fips' is not in camel case. |
||
var stateFips = fips.length === 4 | ||
? '0' + fips.charAt(0) | ||
: fips.substr(0, 2); | ||
var state = statesByFips[stateFips]; | ||
// console.warn('fips: %s ->', fips, stateFips); | ||
return { | ||
Year: d.year, | ||
State: state ? state.abbr : '', | ||
FIPS: fips, | ||
County: d.area_title.replace(/, [\w\s]+$/, ''), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Identifier 'area_title' is not in camel case. |
||
Jobs: +d[jobsField] | ||
}; | ||
}; | ||
|
||
var loadFiles = function(done) { | ||
async.mapSeries(inputFiles, function(filename, next) { | ||
console.warn('reading:', filename); | ||
var rows = []; | ||
fs.createReadStream(filename) | ||
.pipe(tito.createReadStream('csv')) | ||
.pipe(thru(function(d, enc, next) { | ||
if (isValidRow(d)) { | ||
// console.warn('add:', d[areaAggrCode]); | ||
rows.push(mapRow(d)); | ||
} else { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Empty block. |
||
// console.warn('skip:', d[areaAggrCode]); | ||
} | ||
next(); | ||
})) | ||
.on('finish', function() { | ||
next(null, rows); | ||
}); | ||
}, function(error, years) { | ||
console.warn('loaded %d years', years.length); | ||
var rows = years.reduce(function(collection, set) { | ||
return collection.concat(set.filter(function(d) { | ||
return d.Jobs > 0; | ||
})); | ||
}, []); | ||
console.warn('got %d rows', rows.length); | ||
done(null, rows); | ||
}); | ||
}; | ||
|
||
var write = function(rows, done) { | ||
streamify(rows) | ||
.pipe(tito.createWriteStream(options.of)) | ||
.pipe(fs.createWriteStream(options.o || '/dev/stdout')) | ||
.on('end', done); | ||
}; | ||
|
||
async.waterfall([ | ||
loadStates, | ||
loadFiles, | ||
write | ||
], function(error) { | ||
if (error) return console.error('error:', error); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Expected '{' and instead saw 'return'. |
||
console.warn('all done!'); | ||
}); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
'_url' is defined but never used.