This repository has been archived by the owner on Apr 17, 2018. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 6
/
addressItParser.js
74 lines (57 loc) · 2.15 KB
/
addressItParser.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
var parser = require('addressit');
var _ = require('lodash');
var logger = require('pelias-logger').get('text-analyzer');
function getAdminPartsBySplittingOnDelim(queryParts, delim) {
// naive approach - for admin matching during query time
// split 'flatiron, new york, ny' into 'flatiron' and 'new york, ny'
var address = {};
if (queryParts.length > 1) {
address.name = queryParts[0].trim();
// 1. slice away all parts after the first one
// 2. trim spaces from each part just in case
// 3. join the parts back together with appropriate delimiter and spacing
address.admin_parts = queryParts.slice(1)
.map(function (part) { return part.trim(); })
.join(delim + ' ');
}
return address;
}
function getAddressParts(query) {
// perform full address parsing
// except on queries so short they obviously can't contain an address
if (query.length > 3) {
return parser( query );
}
}
function parse(query) {
var delim = ',';
var queryParts = query.split(delim);
var addressWithAdminParts = getAdminPartsBySplittingOnDelim(queryParts, delim);
var addressWithAddressParts= getAddressParts(queryParts.join(delim + ' '));
var parsedAddress = _.extend(addressWithAdminParts,
addressWithAddressParts);
var address_parts = [ 'name',
'number',
'street',
'city',
'state',
'country',
'postalcode',
'regions',
'admin_parts'
];
var parsed_text = {};
address_parts.forEach(function(part){
if (parsedAddress[part]) {
parsed_text[part] = parsedAddress[part];
}
});
// if all we found was regions, ignore it as it is not enough information to make smarter decisions
if (Object.keys(parsed_text).length === 1 && !_.isUndefined(parsed_text.regions))
{
logger.info('Ignoring address parser output, regions only');
return null;
}
return parsed_text;
}
module.exports.parse = parse;