Skip to content

Commit

Permalink
feat(toponym): Add EN toponyms for highway streets (#70)
Browse files Browse the repository at this point in the history
  • Loading branch information
Joxit authored Oct 7, 2019
1 parent db36079 commit bcdb8f0
Show file tree
Hide file tree
Showing 7 changed files with 127 additions and 1 deletion.
10 changes: 10 additions & 0 deletions classification/ToponymClassification.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
const Classification = require('./Classification')

class ToponymClassification extends Classification {
constructor (confidence, meta) {
super(confidence, meta)
this.label = 'toponym'
}
}

module.exports = ToponymClassification
24 changes: 24 additions & 0 deletions classification/ToponymClassification.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
const Classification = require('./ToponymClassification')

module.exports.tests = {}

module.exports.tests.constructor = (test) => {
test('constructor', (t) => {
let c = new Classification()
t.false(c.public)
t.equals(c.label, 'toponym')
t.equals(c.confidence, 1.0)
t.deepEqual(c.meta, {})
t.end()
})
}

module.exports.all = (tape, common) => {
function test (name, testFunction) {
return tape(`ToponymClassification: ${name}`, testFunction)
}

for (var testCase in module.exports.tests) {
module.exports.tests[testCase](test, common)
}
}
26 changes: 26 additions & 0 deletions classifier/ToponymClassifier.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
const WordClassifier = require('./super/WordClassifier')
const ToponymClassification = require('../classification/ToponymClassification')
const libpostal = require('../resources/libpostal/libpostal')

// dictionaries sourced from the libpostal project
// see: https://github.com/openvenues/libpostal

class ToponymClassifier extends WordClassifier {
setup () {
// load street tokens
this.index = {}
libpostal.load(this.index, ['en'], 'toponyms.txt')
}

each (span) {
// skip spans which contain numbers
if (span.contains.numerals) { return }

// use an inverted index for full token matching as it's O(1)
if (this.index.hasOwnProperty(span.norm)) {
span.classify(new ToponymClassification(1))
}
}
}

module.exports = ToponymClassifier
57 changes: 57 additions & 0 deletions classifier/ToponymClassifier.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
const ToponymClassifier = require('./ToponymClassifier')
const ToponymClassification = require('../classification/ToponymClassification')
const Span = require('../tokenization/Span')
const classifier = new ToponymClassifier()

module.exports.tests = {}

function classify (body) {
let s = new Span(body)
classifier.each(s, null, 1)
return s
}

module.exports.tests.contains_numerals = (test) => {
test('contains numerals: honours contains.numerals boolean', (t) => {
let s = new Span('example')
s.contains.numerals = true
classifier.each(s, null, 1)
t.deepEqual(s.classifications, {})
t.end()
})
}

module.exports.tests.single_character_tokens = (test) => {
test('index: does not contain single char tokens', (t) => {
t.false(Object.keys(classifier.index).some(token => token.length < 2))
t.end()
})
}

module.exports.tests.english_suffix = (test) => {
let valid = [
'md', 'maryland', 'ca',
'california', 'ia', 'nj'
]

valid.forEach(token => {
test(`english toponyms: ${token}`, (t) => {
let s = classify(token)

t.deepEqual(s.classifications, {
ToponymClassification: new ToponymClassification(1)
})
t.end()
})
})
}

module.exports.all = (tape, common) => {
function test (name, testFunction) {
return tape(`ToponymClassifier: ${name}`, testFunction)
}

for (var testCase in module.exports.tests) {
module.exports.tests[testCase](test, common)
}
}
2 changes: 1 addition & 1 deletion classifier/scheme/street.js
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,7 @@ module.exports = [
Class: StreetClassification,
scheme: [
{
is: ['RoadTypeClassification'],
is: ['RoadTypeClassification', 'ToponymClassification'],
not: []
},
{
Expand Down
2 changes: 2 additions & 0 deletions parser/AddressParser.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ const PostcodeClassifier = require('../classifier/PostcodeClassifier')
const StreetPrefixClassifier = require('../classifier/StreetPrefixClassifier')
const StreetSuffixClassifier = require('../classifier/StreetSuffixClassifier')
const RoadTypeClassifier = require('../classifier/RoadTypeClassifier')
const ToponymClassifier = require('../classifier/ToponymClassifier')
const CompoundStreetClassifier = require('../classifier/CompoundStreetClassifier')
const DirectionalClassifier = require('../classifier/DirectionalClassifier')
const OrdinalClassifier = require('../classifier/OrdinalClassifier')
Expand Down Expand Up @@ -46,6 +47,7 @@ class AddressParser extends Parser {
new StreetPrefixClassifier(),
new StreetSuffixClassifier(),
new RoadTypeClassifier(),
new ToponymClassifier(),
new CompoundStreetClassifier(),
new DirectionalClassifier(),
new OrdinalClassifier(),
Expand Down
7 changes: 7 additions & 0 deletions test/address.usa.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,13 @@ const testcase = (test, common) => {
assert('1210a Highway 10 W IA', [{ housenumber: '1210a' }, { street: 'Highway 10 W' }, { region: 'IA' }], true)
assert('1210a State Highway 10', [{ housenumber: '1210a' }, { street: 'State Highway 10' }], true)
assert('1389a County Road 42 IA', [{ housenumber: '1389a' }, { street: 'County Road 42' }, { region: 'IA' }], true)
assert('CA 72', [{ street: 'CA 72' }], true)
assert('1210a IA 10 W IA', [{ housenumber: '1210a' }, { street: 'IA 10 W' }, { region: 'IA' }], true)
assert('1210a California 10', [{ housenumber: '1210a' }, { street: 'California 10' }], true)
assert('1389a IA 42 IA', [{ housenumber: '1389a' }, { street: 'IA 42' }, { region: 'IA' }], true)

// This does not work because of MD
// assert('1111 MD 760, Lusby, MD, USA', [{ housenumber: '1111' }, { street: 'MD 760' }, { locality: 'Lusby' }, { region: 'MD' }, { country: 'USA' }], true)
}

module.exports.all = (tape, common) => {
Expand Down

0 comments on commit bcdb8f0

Please sign in to comment.