Skip to content
This repository has been archived by the owner on Nov 10, 2020. It is now read-only.

Explore > Jobs (data only) #942

Merged
merged 3 commits into from
Dec 1, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 22 additions & 2 deletions data/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -39,15 +39,20 @@ GDP_FILES = \
gdp/US.tsv \
gdp/states.tsv

JOBS_FILES = \
jobs/wage-salary.tsv \
jobs/self-employment.tsv

# a list of all of the individual files to build.
# unless otherwise noted, all of these files are broken
# down by year and commodity
FILES = \
$(REVENUE_FILES) \
$(PRODUCTION_FILES) \
$(EXPORT_FILES) \
geo svg \
$(GDP_FILES)
$(GDP_FILES) \
$(JOBS_FILES) \
geo svg

all: $(FILES)

Expand All @@ -59,6 +64,11 @@ exports: $(EXPORT_FILES)

gdp: $(GDP_FILES)

jobs: $(JOBS_FILES)

jobs/wage-salary.tsv:
bin/parse-bls.js _input/bls/*.csv > $@

# national revenues by commodity and revenue type
national/revenues.tsv: state/revenues.tsv offshore/revenues.tsv
mkdir -p $(dir $@)
Expand Down Expand Up @@ -90,6 +100,16 @@ gdp/regional.tsv:
--field Region \
> $@

jobs/self-employment.tsv:
mkdir -p $(dir $@)
bin/get-bea-jobs.js | \
bin/abbr-state.js \
--if tsv \
--of tsv \
--states $(input)geo/states.csv \
--field Region \
> $@

# state listing
state/states.tsv: $(input)geo/states.csv
$(tito) --read csv $< | \
Expand Down
2,994 changes: 2,994 additions & 0 deletions data/_input/bls/2004.csv

Large diffs are not rendered by default.

3,005 changes: 3,005 additions & 0 deletions data/_input/bls/2005.csv

Large diffs are not rendered by default.

3,023 changes: 3,023 additions & 0 deletions data/_input/bls/2006.csv

Large diffs are not rendered by default.

3,029 changes: 3,029 additions & 0 deletions data/_input/bls/2007.csv

Large diffs are not rendered by default.

3,044 changes: 3,044 additions & 0 deletions data/_input/bls/2008.csv

Large diffs are not rendered by default.

3,027 changes: 3,027 additions & 0 deletions data/_input/bls/2009.csv

Large diffs are not rendered by default.

3,028 changes: 3,028 additions & 0 deletions data/_input/bls/2010.csv

Large diffs are not rendered by default.

3,030 changes: 3,030 additions & 0 deletions data/_input/bls/2011.csv

Large diffs are not rendered by default.

3,032 changes: 3,032 additions & 0 deletions data/_input/bls/2012.csv

Large diffs are not rendered by default.

3,051 changes: 3,051 additions & 0 deletions data/_input/bls/2013.csv

Large diffs are not rendered by default.

19 changes: 19 additions & 0 deletions data/_input/bls/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
years ?= 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013
zips = $(foreach year,$(years),$(year).zip)
csvs = $(foreach year,$(years),$(year).csv)

all: $(csvs)

%.zip:
curl "http://www.bls.gov/cew/data/files/$*/csv/$*_annual_by_industry.zip" -o $@

%.csv: %.zip
unzip $< '*.annual 21 *.csv'
mv $*.annual.by_industry/*.csv $@
rm -r $*.annual.by_industry

distclean: clean
rm -f *.zip

clean:
rm -f *.csv
152 changes: 152 additions & 0 deletions data/bin/get-bea-jobs.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
#!/usr/bin/env node
var options = require('yargs')
.option('user', {
desc: 'your BEA API user ID'
})
.option('year', {
desc: 'year or year range',
default: '2004-2013'
})
.option('of', {
desc: 'output format (tito-compatible)',
default: 'tsv'
})
.option('o', {
desc: 'output filename'
})
.argv;

var tito = require('tito').formats;
var request = require('request');
var qs = require('querystring');
var _url = require('url');

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

'_url' is defined but never used.

var util = require('../../lib/util');
var thru = require('through2').obj;

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

'thru' is defined but never used.

var fs = require('fs');
var async = require('async');
var extend = require('extend');
var streamify = require('stream-array');

var ONE_MILLION = 1e6;

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

'ONE_MILLION' is defined but never used.

var years = util.range(options.year);

var lineCodes = {
total: 10, // "[SA25N] Total employment"
mining: 200, // "[SA25N] Mining"
};

var tables = {
total: 'SA25N', // "Total Full-Time and Part-Time Employment by NAICS Industry"

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Line is too long.

subtotal: 'SA27N', // "Full-Time and Part-Time Wage and Salary Employment by NAICS Industry"

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Line is too long.

};

var params = {
UserID: options.user || process.env.BEA_API_KEY,
DataSetName: 'RegionalIncome',
Method: 'GetData',
Year: years.join(','),
GeoFips: 'STATE',
LineCode: lineCodes.mining
};

var fetch = function(params) {
var url = [
'http://www.bea.gov/api/data/',
qs.stringify(params)
].join('?');
console.warn('fetching:', params, '->', url);
return request(url);
};

var parser = function() {
return tito.createReadStream('json', {
path: '.BEAAPI.Results.Data.*'
});
};

var tableIds = [tables.total, tables.subtotal];

async.mapSeries(tableIds, function(table, next) {
params.TableName = table;

var rows = [];
var done = function(error) {
if (error) console.warn('error?', error);

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Expected '{' and instead saw 'console'.

next(null, rows);
};

fetch(params)
.on('error', done)
.pipe(parser())
.on('data', function(row) {
row = mapRow(row);
if (row) {
console.warn('+ row:', row);
rows.push(row);
}
})
.on('end', done);
}, function(error, sets) {
if (error) return console.error('error:', error);

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Expected '{' and instead saw 'return'.


console.warn('got %d sets', sets.length);

sets[0].forEach(function(d) {
d.Total = d.Value;
d.Value = 0;
});

var rows = sets.reduce(function(list, set) {
return list.concat(set);
}, []);

var keys = ['Region', 'Year'];
var result = util.group(rows, keys, function(group) {
var total = 0;
var actual = 0;

group.values.forEach(function(d) {
total += coerceNumber(d.Total);
actual += coerceNumber(d.Value);
});

return {
Total: total,
Actual: actual,
Value: total - actual,
};
})
.map(function(entry) {
return extend(entry.key, entry.value);
});

console.warn('got %d rows', result.length);
console.warn(result[0]);

streamify(result)
.pipe(tito.createWriteStream(options.of))
.pipe(fs.createWriteStream(options.o || '/dev/stdout'))
.on('end', function() {
console.warn('wrote %d rows', result.length);
});
});

function mapRow(row) {
var fips = row.GeoFips.substr(0, 2);
if (fips > 90) {
return;
}
return {
Region: row.GeoName,
Year: row.TimePeriod,
Value: row.DataValue
};
}

function noop(error) {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

'noop' is defined but never used.

console.warn('error?', error);
}

function coerceNumber(str) {
return Number(str) || 0;
}
115 changes: 115 additions & 0 deletions data/bin/parse-bls.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
#!/usr/bin/env node
var yargs = require('yargs');
var options = yargs
.option('in-states', {
desc: 'the states data file',
default: '_input/geo/states.csv'
})
.option('year', {
desc: 'year or year range',
default: '2004-2013'
})
.option('of', {
desc: 'output format (tito-compatible)',
default: 'tsv'
})
.option('o', {
desc: 'output filename'
})
.argv;

if (options.help) {
return yargs.showHelp();
}

var tito = require('tito').formats;
var request = require('request');

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

'request' is defined but never used.

var qs = require('querystring');

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

'qs' is defined but never used.

var util = require('../../lib/util');
var thru = require('through2').obj;
var fs = require('fs');
var async = require('async');
var streamify = require('stream-array');

var jobsField = 'annual_avg_emplvl';
var areaAggrCode = 'agglvl_code';
var statesByFips = {};

var inputFiles = options._;

var loadStates = function(done) {
util.readData(options['in-states'],
tito.createReadStream('csv'),
function(error, states) {
if (error) return done(error);

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Expected '{' and instead saw 'return'.

statesByFips = util.map(states, 'FIPS', true);
done();
});
};

var isValidRow = function(d) {
// XXX 74 is the county aggregation level
return d[areaAggrCode] == 74;
};

var mapRow = function(d) {
var fips = d.area_fips;

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Identifier 'area_fips' is not in camel case.

var stateFips = fips.length === 4
? '0' + fips.charAt(0)
: fips.substr(0, 2);
var state = statesByFips[stateFips];
// console.warn('fips: %s ->', fips, stateFips);
return {
Year: d.year,
State: state ? state.abbr : '',
FIPS: fips,
County: d.area_title.replace(/, [\w\s]+$/, ''),

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Identifier 'area_title' is not in camel case.

Jobs: +d[jobsField]
};
};

var loadFiles = function(done) {
async.mapSeries(inputFiles, function(filename, next) {
console.warn('reading:', filename);
var rows = [];
fs.createReadStream(filename)
.pipe(tito.createReadStream('csv'))
.pipe(thru(function(d, enc, next) {
if (isValidRow(d)) {
// console.warn('add:', d[areaAggrCode]);
rows.push(mapRow(d));
} else {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Empty block.

// console.warn('skip:', d[areaAggrCode]);
}
next();
}))
.on('finish', function() {
next(null, rows);
});
}, function(error, years) {
console.warn('loaded %d years', years.length);
var rows = years.reduce(function(collection, set) {
return collection.concat(set.filter(function(d) {
return d.Jobs > 0;
}));
}, []);
console.warn('got %d rows', rows.length);
done(null, rows);
});
};

var write = function(rows, done) {
streamify(rows)
.pipe(tito.createWriteStream(options.of))
.pipe(fs.createWriteStream(options.o || '/dev/stdout'))
.on('end', done);
};

async.waterfall([
loadStates,
loadFiles,
write
], function(error) {
if (error) return console.error('error:', error);

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Expected '{' and instead saw 'return'.

console.warn('all done!');
});
Loading