Skip to content
This repository has been archived by the owner on Jan 8, 2020. It is now read-only.

Commit

Permalink
Merge pull request #152 from cfpb/remove-hash
Browse files Browse the repository at this point in the history
Remove hash checking
  • Loading branch information
hkeeler committed Dec 18, 2015
2 parents 6376b9b + afc06db commit dc557d6
Show file tree
Hide file tree
Showing 19 changed files with 80 additions and 129 deletions.
9 changes: 0 additions & 9 deletions data.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
{
"name": "north_carolina",
"url": "http://data.nconemap.gov/downloads/vector/mastadd14.zip",
"hash": "1b927688a387275c6b7767a94b46a3467731564c0265c6d3747ae7e80ea65257",
"file": "AddressNC_2014.gdb",
"spatialReference": "EPSG:2264",
"count": 4916623,
Expand Down Expand Up @@ -32,7 +31,6 @@
{
"name": "utah",
"url": "ftp://ftp.agrc.utah.gov/UtahSGID_Vector/UTM12_NAD83/LOCATION/UnpackagedData/AddressPoints/_Statewide/AddressPoints_gdb.zip",
"hash": "66e88a697adc7b5ddf5c409782694a9684e2c1c6eeab55834eec3fb68435fd96",
"file": "AddressPoints.gdb",
"count": 1084127,
"fields": {
Expand Down Expand Up @@ -61,7 +59,6 @@
{
"name": "new_york",
"url": "http://gis.ny.gov/gisdata/data/ds_921/SAM_Master_Statewide.gdb.zip",
"hash": "dda07a26bb25b5c900d0d1464ade072e3aa769372bf4d9bca75fa05ddae96122",
"file": "SAM_Master_Statewide_Database.gdb",
"count": 4766632,
"fields": {
Expand Down Expand Up @@ -90,7 +87,6 @@
{
"name": "arkansas",
"url": "http://geostor-vectors.geostor.org/Location/SHP/SITUS_ADDRESS_PT.zip",
"hash": "304fb038ede2da4a5e388e2f8c0bb54ca722dec9422ed173f044b79800810448",
"file": "location_SITUS_ADDRESS_PT.shp",
"count": 1407700,
"fields": {
Expand Down Expand Up @@ -119,7 +115,6 @@
{
"name": "virginia",
"url": "https://ftp.vgingis.com/Download/VA_SiteAddress.txt.zip",
"hash": "3612012b94b92c003f105aafef2ea01c736b7bd68e88365e1f42485a80b6921e",
"file": "VA_SiteAddress.txt",
"count": 3654477,
"spatialReference": "NAD83",
Expand Down Expand Up @@ -149,7 +144,6 @@
{
"name": "maine",
"url": "http://www.maine.gov/megis/catalog/shps/state/ng911pts.zip",
"hash": "81f0ca73065e0a6db87d510b15ff88bec512221b8499ebd8c77d7e1da1863eb8",
"file": "NGAddresses.shp",
"count": 474802,
"fields": {
Expand Down Expand Up @@ -178,7 +172,6 @@
{
"name": "vermont",
"url": "http://maps.vcgi.org/gisdata/vcgi/packaged_zips/EmergencyE911_ESITE.zip",
"hash": "87119dde146b994dd81275e84c96065118191e93deb9b78b203db62cd44cbc7c",
"file": "Emergency_ESITE_point.shp",
"count": 321692,
"fields": {
Expand Down Expand Up @@ -207,7 +200,6 @@
{
"name": "district_of_columbia",
"url": "http://opendata.dc.gov/agol/arcgis/aa514416aaf74fdc94748f1e56e7cc8a/0.zip",
"hash": "171ee9a1fba2876b9ead818fd54ad139e08aa3cb415c10bd8b74820c103ae051",
"file": "Address_Points.shp",
"fields": {
"Number": {
Expand Down Expand Up @@ -235,7 +227,6 @@
{
"name": "rhode_island",
"url": "http://www.edc.uri.edu/rigis/spfdata/structure/e911Sites15r1.zip",
"hash": "732c126ce35be2a585971c19d2c6eb1fbc5e4347c1147f508fe59759aa83ca53",
"file": "e911Sites15r1.shp",
"count": 405634,
"fields": {
Expand Down
10 changes: 3 additions & 7 deletions index.js
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ options
.option('-b, --bucket <bucket>', 'An S3 bucket where the data resides.')
.option('-d, --directory <directory>', 'A directory where data sources reside, either relative to the current folder or the passed S3 bucket.')
.option('-P, --profile <profile>', 'The aws profile in ~/.aws/credentials. Only needed if loading data from a bucket. AWS environment variables will override this value.', 'default')
.option('-M, --monitor', 'Run the retriever in monitoring mode which only checks data source freshness and doesn\'t load data.')
.parse(process.argv);


Expand All @@ -45,7 +44,6 @@ var logger = makeLogger(options);
options.client = esLoader.connect(options.host, options.port, options.log);

if(options.directory && options.directory[options.directory.length - 1] === '/') options.directory = options.directory.slice(0, -1);
if(options.monitor) logger.info('Running in monitoring mode. Remote files will be checked for freshness but not loaded.');

retriever(options, function(output){
options.client.close();
Expand All @@ -60,11 +58,9 @@ retriever(options, function(output){
output.errors[i] = v.toString();
});

logger.info('%d source%s still fresh, %d source%s need updates, %d source%s overridden from known files.',
output.fresh.length,
output.fresh.length === 1 ? '' : 's',
output.stale.length,
output.stale.length === 1 ? '' : 's',
logger.info('%d source%s loaded, %d source%s overridden from known files.',
output.loaded.length,
output.loaded.length === 1 ? '' : 's',
output.overridden.length,
output.overridden.length === 1 ? '' : 's'
);
Expand Down
22 changes: 16 additions & 6 deletions lib/fieldFilter.js
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,17 @@ function fieldFilter(record){
return cb(null);
}

var address = formatAddress(vals.Number, vals.Street, vals.City, vals.State, vals.Zip);
var number = vals.Number;
var street = vals.Street;
var city = vals.City;
var state = vals.State;
var zip = vals.Zip;

if(number === undefined || street === undefined || city === undefined || state === undefined || zip === undefined){
return cb(new Error('The field mappings provided in the metadata file are not correct. Perhaps the schema has changed.'));
}

var address = formatAddress(number, street, city, state, zip);

if(address === null || chunk.geometry === null){
logger.info('No valid street name or number for %s.\n\nOriginal fields: %s\n\nResolved fields: %s\n', name, JSON.stringify(props), JSON.stringify(vals));
Expand All @@ -39,11 +49,11 @@ function fieldFilter(record){
type: "Feature",
properties: {
address: address,
number: vals.Number + '',
street: vals.Street,
city: vals.City,
state: vals.State,
zip: vals.Zip + ''
number: number + '',
street: street,
city: city,
state: state,
zip: zip + ''
},
geometry: chunk.geometry
}
Expand Down
2 changes: 1 addition & 1 deletion lib/resolveFields.js
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ module.exports = function(props, fields){
val = props[field.value];
}

val = (val + '').trim();
if(val !== null && val !== undefined) val = (val + '').trim();

vals[keys[i]] = val;
}
Expand Down
28 changes: 0 additions & 28 deletions lib/retriever.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ var winston = require('winston');
var request = require('request');
var resolveOverrides = require('./resolveOverrides');
var ftp = require('./ftpWrapper');
var checkHash = require('./checkHash');
var handleZip = require('./handleZip');
var handleCsv = require('./handleCsv');
var retrieverPipeline = require('./retriever-pipeline');
Expand All @@ -23,8 +22,6 @@ function retrieve(options, callback){

var output = {
errors: [],
fresh: [],
stale: [],
overridden: [],
processed: [],
loaded: [],
Expand Down Expand Up @@ -153,33 +150,8 @@ function retrieve(options, callback){

if(record._override){
output.overridden.push(record.name);
}else{
//Ensure data has not changed
checkHash(stream, record.hash, function(hashIsEqual, remoteHash){
if(hashIsEqual){
logger.info('Remote file for %s verified.', record.name);
output.fresh.push(record.name);

if(options.monitor) return recordCallback(null, record);
return;
}
output.stale.push(record.name);

var staleErr = new Error('The hash from ' + record.name + ' did not match the downloaded file\'s hash.\nRecord hash: ' + record.hash +'\nRemote hash: ' + remoteHash +'\n')

//ftp stream is auto-closed before error propagates
if(url.parse(record.url).protocol === 'ftp:'){
handleStreamError.call(stream, record, staleErr);
}else{
stream.emit('error', staleErr);
}
});
}


if(options.monitor) return;


if(zipReg.test(record.url)){
logger.info('Unzipping file stream of %s from %s', record.name, record._override || record.url);
handleZip(stream, record, scratchSpace, handleStream, handleStreamError);
Expand Down
66 changes: 38 additions & 28 deletions test/data/fields/arkansas.json
Original file line number Diff line number Diff line change
@@ -1,45 +1,55 @@
{
"type": "Feature",
"properties": {
"OBJECTID": 6,
"id": 1362633,
"adr_num": 10721,
"adr_num_su": null,
"objectid": 3658,
"id": 10859,
"adr_num": 512,
"adr_numsuf": null,
"adr_bldg": null,
"adr_unit_t": null,
"adr_unit_i": null,
"adr_unitty": null,
"adr_unitid": null,
"pre_dir": null,
"pstr_name": "Highway 5",
"pstr_type": null,
"psuf_dir": "S",
"pstr_name": "Hailey",
"pstr_type": "Rd",
"psuf_dir": null,
"pstr_mod": null,
"pstr_fulna": "Highway 5 S",
"pstrfulnam": "Hailey Rd",
"adr_place": null,
"adr_muni": "Salesville",
"adr_city": "Mountain Home",
"adr_zip5": "72653",
"adr_zip4": "9698",
"adr_zip9": "72653-9698",
"cnty_name": "Baxter",
"adr_muni": "Berryville",
"adr_city": "Berryville",
"adr_zip5": "72616",
"adr_zip4": "5043",
"adr_zip9": "72616-5043",
"cnty_name": "Carroll",
"adr_state": "AR",
"adr_label": "10721 Highway 5 S",
"adr_box_ty": null,
"adr_label": "512 Hailey Rd",
"adr_boxtyp": null,
"adr_box_id": null,
"adr_boxgr_": null,
"adr_boxgr1": null,
"adr_box_lb": null,
"lon_x": -92.26726092,
"lat_y": 36.24593768,
"adrboxgrty": null,
"adr_boxgid": null,
"adr_boxlbl": null,
"lon_x": -93.54851973,
"lat_y": 36.3644817,
"fea_typ": null,
"date_ed": "20141125",
"add_auth": "005",
"globalid": "{4DE7FF4F-A50B-4D96-8595-06D91CF2A7ED}"
"date_ed": "20140210",
"add_auth": "015",
"uid_text": "015-10859",
"apf_id": 135917,
"addr_hn": null,
"addr_pd": null,
"addr_pt": null,
"addr_sn": null,
"addr_st": null,
"addr_sd": null,
"pre_type": null,
"comp_hn": "512"
},
"geometry": {
"type": "Point",
"coordinates": [
565834.9165000003,
4011476.0067999996
450791.4815999996,
4024515.6977999993,
0
]
}
}
1 change: 0 additions & 1 deletion test/data/metadata/maine.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
{
"name":"maine",
"url":"https://cfpb.github.io/grasshopper-retriever/maine.zip",
"hash":"7e047fa12e048ebed7e0b7cbd80621a39996666396e685a092ae30f5ea2ee79e",
"file":"maine.shp",
"fields": {
"Number": {
Expand Down
7 changes: 2 additions & 5 deletions test/data/metadata/maineandarkanderr.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
{
"name":"maine",
"url":"https://cfpb.github.io/grasshopper-retriever/maine.zip",
"hash":"7e047fa12e048ebed7e0b7cbd80621a39996666396e685a092ae30f5ea2ee79e",
"file":"maine.shp",
"fields": {
"Number": {
Expand Down Expand Up @@ -30,7 +29,6 @@
{
"name":"arkansas",
"url":"https://cfpb.github.io/grasshopper-retriever/arkansas.json",
"hash":"ae631ff68108423f9877f8012ce46577f973ba2eef895db6a4b8f40ea901283d",
"file":"arkansas.json",
"fields": {
"Number": {
Expand Down Expand Up @@ -58,20 +56,19 @@
{
"name":"north_carolina",
"url":"https://cfpb.github.io/grasshopper-retriever/north_carolina.json",
"hash":"badsha",
"file":"north_carolina.json",
"fields": {
"Number": {
"type": "static",
"value": "ADDR_HN"
"value": "bad"
},
"Street": {
"type": "multi",
"value": ["ADDR_SN", "ADDR_ST"]
},
"City": {
"type": "static",
"value": "PO_NAME"
"value": "nope"
},
"State": {
"type": "static",
Expand Down
3 changes: 0 additions & 3 deletions test/data/metadata/maineandarkandparenterr.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
{
"name":"maine",
"url":"https://cfpb.github.io/grasshopper-retriever/maine.zip",
"hash":"7e047fa12e048ebed7e0b7cbd80621a39996666396e685a092ae30f5ea2ee79e",
"file":"maine.shp",
"fields": {
"Number": {
Expand Down Expand Up @@ -31,7 +30,6 @@
{
"name":"arkansas",
"url":"https://cfpb.github.io/grasshopper-retriever/arkansas.json",
"hash":"ae631ff68108423f9877f8012ce46577f973ba2eef895db6a4b8f40ea901283d",
"file":"arkansas.json",
"fields": {
"Number": {
Expand Down Expand Up @@ -59,7 +57,6 @@
{
"name":"../",
"url":"https://cfpb.github.io/grasshopper-retriever/north_carolina.json",
"hash":"4a0929b355b4250412b9652c758733e19df43aea4e6fcba33c69c4b86148d3c0",
"file":"north_carolina.json",
"fields": {
"Number": {
Expand Down
1 change: 0 additions & 1 deletion test/data/metadata/mainecsv.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
{
"name":"mainecsv",
"url":"https://cfpb.github.io/grasshopper-retriever/maine.csv",
"hash":"572e11cbb4024a8ee316cb30118d23276e97897414ec2e07ac64d6731b0f49ce",
"spatialReference":"WGS84",
"file":"maine.csv",
"fields": {
Expand Down
1 change: 0 additions & 1 deletion test/data/metadata/mainejson.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
{
"name":"mainejson",
"url":"https://cfpb.github.io/grasshopper-retriever/maine.json",
"hash":"d2b16e30013d168681e6cb8cf831cb133caff8388aa7275430e35ea4268daf4d",
"file":"maine.json",
"fields": {
"Number": {
Expand Down
1 change: 0 additions & 1 deletion test/data/metadata/mainezipcsv.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
{
"name":"mainezipcsv",
"url":"https://cfpb.github.io/grasshopper-retriever/maine.csv.zip",
"hash":"0023e2d253a83abdd7e2cb6426e73c37a6df029bab7108757e6b4d6b80d47b01",
"spatialReference":"WGS84",
"file":"maine.csv",
"fields": {
Expand Down
1 change: 0 additions & 1 deletion test/data/metadata/ncmeta.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
{
"name": "north_carolina",
"url": "http://data.nconemap.gov/downloads/vector/mastadd14.zip",
"hash": "1b927688a387275c6b7767a94b46a3467731564c0265c6d3747ae7e80ea65257",
"file": "AddressNC_2014.gdb",
"spatialReference": "EPSG:2264",
"fields": {
Expand Down
1 change: 0 additions & 1 deletion test/data/metadata/parcelsjson.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
{
"name":"sacramento",
"url":"https://cfpb.github.io/grasshopper-retriever/parcels.json",
"hash":"65637abd5a01a235467cfea2e1eb859cee5facf1b900604321d2b1e74d4f861b",
"file":"parcels.json",
"fields": {
"Number": {
Expand Down
Loading

0 comments on commit dc557d6

Please sign in to comment.