Skip to content
This repository has been archived by the owner on Nov 10, 2020. It is now read-only.

Commit

Permalink
add wage & salary jobs data per #926
Browse files Browse the repository at this point in the history
  • Loading branch information
Shawn Allen committed Dec 1, 2015
1 parent e8903bc commit f0e26d0
Show file tree
Hide file tree
Showing 3 changed files with 12,205 additions and 2 deletions.
13 changes: 11 additions & 2 deletions data/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -39,15 +39,19 @@ GDP_FILES = \
gdp/US.tsv \
gdp/states.tsv

JOBS_FILES = \
jobs/wage-salary.tsv

# a list of all of the individual files to build.
# unless otherwise noted, all of these files are broken
# down by year and commodity
FILES = \
$(REVENUE_FILES) \
$(PRODUCTION_FILES) \
$(EXPORT_FILES) \
geo svg \
$(GDP_FILES)
$(GDP_FILES) \
$(JOBS_FILES) \
geo svg

all: $(FILES)

Expand All @@ -59,6 +63,11 @@ exports: $(EXPORT_FILES)

gdp: $(GDP_FILES)

jobs: $(JOBS_FILES)

jobs/wage-salary.tsv:
bin/parse-bls.js _input/bls/*.csv > $@

# national revenues by commodity and revenue type
national/revenues.tsv: state/revenues.tsv offshore/revenues.tsv
mkdir -p $(dir $@)
Expand Down
115 changes: 115 additions & 0 deletions data/bin/parse-bls.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
#!/usr/bin/env node
var yargs = require('yargs');
var options = yargs
.option('in-states', {
desc: 'the states data file',
default: '_input/geo/states.csv'
})
.option('year', {
desc: 'year or year range',
default: '2004-2013'
})
.option('of', {
desc: 'output format (tito-compatible)',
default: 'tsv'
})
.option('o', {
desc: 'output filename'
})
.argv;

if (options.help) {
return yargs.showHelp();
}

var tito = require('tito').formats;
var request = require('request');
var qs = require('querystring');
var util = require('../../lib/util');
var thru = require('through2').obj;
var fs = require('fs');
var async = require('async');
var streamify = require('stream-array');

var jobsField = 'annual_avg_emplvl';
var areaAggrCode = 'agglvl_code';
var statesByFips = {};

var inputFiles = options._;

var loadStates = function(done) {
util.readData(options['in-states'],
tito.createReadStream('csv'),
function(error, states) {
if (error) return done(error);
statesByFips = util.map(states, 'FIPS', true);
done();
});
};

var isValidRow = function(d) {
// XXX 74 is the county aggregation level
return d[areaAggrCode] == 74;
};

var mapRow = function(d) {
var fips = d.area_fips;
var stateFips = fips.length === 4
? '0' + fips.charAt(0)
: fips.substr(0, 2);
var state = statesByFips[stateFips];
// console.warn('fips: %s ->', fips, stateFips);
return {
Year: d.year,
State: state ? state.abbr : '',
FIPS: fips,
County: d.area_title.replace(/, [\w\s]+$/, ''),
Jobs: +d[jobsField]
};
};

var loadFiles = function(done) {
async.mapSeries(inputFiles, function(filename, next) {
console.warn('reading:', filename);
var rows = [];
fs.createReadStream(filename)
.pipe(tito.createReadStream('csv'))
.pipe(thru(function(d, enc, next) {
if (isValidRow(d)) {
// console.warn('add:', d[areaAggrCode]);
rows.push(mapRow(d));
} else {
// console.warn('skip:', d[areaAggrCode]);
}
next();
}))
.on('finish', function() {
next(null, rows);
});
}, function(error, years) {
console.warn('loaded %d years', years.length);
var rows = years.reduce(function(collection, set) {
return collection.concat(set.filter(function(d) {
return d.Jobs > 0;
}));
}, []);
console.warn('got %d rows', rows.length);
done(null, rows);
});
};

var write = function(rows, done) {
streamify(rows)
.pipe(tito.createWriteStream(options.of))
.pipe(fs.createWriteStream(options.o || '/dev/stdout'))
.on('end', done);
};

async.waterfall([
loadStates,
loadFiles,
write
], function(error) {
if (error) return console.error('error:', error);
console.warn('all done!');
});
Loading

0 comments on commit f0e26d0

Please sign in to comment.