This repository has been archived by the owner on Apr 17, 2018. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 6
/
libpostalParser.js
100 lines (89 loc) · 2.82 KB
/
libpostalParser.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
var logger = require('pelias-logger').get('text-analyzer');
var _ = require('lodash');
// mapping object from libpostal fields to pelias fields
var field_mapping = {
island: 'island',
category: 'category',
house: 'query',
house_number: 'number',
road: 'street',
suburb: 'neighbourhood',
city_district: 'borough',
city: 'city',
state_district: 'county',
state: 'state',
postcode: 'postalcode',
country: 'country'
};
// wrapper for libpostal that injects the actual parse function for easier
// testing purposes. `parse_address` is just a function that in the real world
// calls libpostal and returns the parsed input. It's injected since it's
// libpostal is an external dependency and this pattern makes unit testing much
// easier by effectively mocking out libpostal. `parse_address` takes a single
// string parameter to be parsed and returns an array of the form:
//
// ```
// [
// {
// component: 'house_number',
// value: '30'
// },
// {
// component: 'road',
// value: 'west 26th street'
// },
// {
// component: 'city',
// value: 'new york'
// },
// {
// component: 'state',
// value: 'ny'
// }
//]
// ```
//
// where `component` can be any of (currently):
// - house (generally interpreted as unknown, treated by pelias like a query term)
// - category (like "restaurants")
// - house_number
// - road
// - unit (apt or suite #)
// - suburb (like a neighbourhood)
// - city
// - city_district (like an NYC borough)
// - state_district (like a county)
// - state
// - postcode
// - country
//
// The Pelias query module is not concerned with unit.
//
module.exports.create = function create(parse_address) {
if (typeof parse_address !== 'function') {
throw new Error('parse_address parameter must be of type function');
}
return {
parse: function parse(query) {
// call the parsing function (libpostal)
var parsed = parse_address(_.deburr(query));
logger.debug('libpostal raw: ' + JSON.stringify(parsed, null, 2));
// if any field is represented more than once in the libpostal response, treat it as invalid
// and return undefined
// _.countBy creates a histogram from parsed, eg: { "road": 2, "city": 1 }
if (_.some(_.countBy(parsed, o => o.component), count => count > 1)) {
logger.warn(`discarding libpostal parse of '${query}' due to duplicate field assignments`);
return undefined;
}
// convert the libpostal input into something that pelias understands
var o = parsed.reduce(function(o, f) {
if (field_mapping.hasOwnProperty(f.component)) {
o[field_mapping[f.component]] = f.value;
}
return o;
}, {});
logger.debug('converted: ' + JSON.stringify(o, null, 2));
return o;
}
};
};