Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add EN toponyms for highway streets #70

Merged
merged 1 commit into from
Oct 7, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions classification/ToponymClassification.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
const Classification = require('./Classification')

class ToponymClassification extends Classification {
constructor (confidence, meta) {
super(confidence, meta)
this.label = 'toponym'
}
}

module.exports = ToponymClassification
24 changes: 24 additions & 0 deletions classification/ToponymClassification.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
const Classification = require('./ToponymClassification')

module.exports.tests = {}

module.exports.tests.constructor = (test) => {
test('constructor', (t) => {
let c = new Classification()
t.false(c.public)
t.equals(c.label, 'toponym')
t.equals(c.confidence, 1.0)
t.deepEqual(c.meta, {})
t.end()
})
}

module.exports.all = (tape, common) => {
function test (name, testFunction) {
return tape(`ToponymClassification: ${name}`, testFunction)
}

for (var testCase in module.exports.tests) {
module.exports.tests[testCase](test, common)
}
}
26 changes: 26 additions & 0 deletions classifier/ToponymClassifier.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
const WordClassifier = require('./super/WordClassifier')
const ToponymClassification = require('../classification/ToponymClassification')
const libpostal = require('../resources/libpostal/libpostal')

// dictionaries sourced from the libpostal project
// see: https://github.com/openvenues/libpostal

class ToponymClassifier extends WordClassifier {
setup () {
// load street tokens
this.index = {}
libpostal.load(this.index, ['en'], 'toponyms.txt')
}

each (span) {
// skip spans which contain numbers
if (span.contains.numerals) { return }

// use an inverted index for full token matching as it's O(1)
if (this.index.hasOwnProperty(span.norm)) {
span.classify(new ToponymClassification(1))
}
}
}

module.exports = ToponymClassifier
57 changes: 57 additions & 0 deletions classifier/ToponymClassifier.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
const ToponymClassifier = require('./ToponymClassifier')
const ToponymClassification = require('../classification/ToponymClassification')
const Span = require('../tokenization/Span')
const classifier = new ToponymClassifier()

module.exports.tests = {}

function classify (body) {
let s = new Span(body)
classifier.each(s, null, 1)
return s
}

module.exports.tests.contains_numerals = (test) => {
test('contains numerals: honours contains.numerals boolean', (t) => {
let s = new Span('example')
s.contains.numerals = true
classifier.each(s, null, 1)
t.deepEqual(s.classifications, {})
t.end()
})
}

module.exports.tests.single_character_tokens = (test) => {
test('index: does not contain single char tokens', (t) => {
t.false(Object.keys(classifier.index).some(token => token.length < 2))
t.end()
})
}

module.exports.tests.english_suffix = (test) => {
let valid = [
'md', 'maryland', 'ca',
'california', 'ia', 'nj'
]

valid.forEach(token => {
test(`english toponyms: ${token}`, (t) => {
let s = classify(token)

t.deepEqual(s.classifications, {
ToponymClassification: new ToponymClassification(1)
})
t.end()
})
})
}

module.exports.all = (tape, common) => {
function test (name, testFunction) {
return tape(`ToponymClassifier: ${name}`, testFunction)
}

for (var testCase in module.exports.tests) {
module.exports.tests[testCase](test, common)
}
}
2 changes: 1 addition & 1 deletion classifier/scheme/street.js
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,7 @@ module.exports = [
Class: StreetClassification,
scheme: [
{
is: ['RoadTypeClassification'],
is: ['RoadTypeClassification', 'ToponymClassification'],
not: []
},
{
Expand Down
2 changes: 2 additions & 0 deletions parser/AddressParser.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ const PostcodeClassifier = require('../classifier/PostcodeClassifier')
const StreetPrefixClassifier = require('../classifier/StreetPrefixClassifier')
const StreetSuffixClassifier = require('../classifier/StreetSuffixClassifier')
const RoadTypeClassifier = require('../classifier/RoadTypeClassifier')
const ToponymClassifier = require('../classifier/ToponymClassifier')
const CompoundStreetClassifier = require('../classifier/CompoundStreetClassifier')
const DirectionalClassifier = require('../classifier/DirectionalClassifier')
const OrdinalClassifier = require('../classifier/OrdinalClassifier')
Expand Down Expand Up @@ -46,6 +47,7 @@ class AddressParser extends Parser {
new StreetPrefixClassifier(),
new StreetSuffixClassifier(),
new RoadTypeClassifier(),
new ToponymClassifier(),
new CompoundStreetClassifier(),
new DirectionalClassifier(),
new OrdinalClassifier(),
Expand Down
7 changes: 7 additions & 0 deletions test/address.usa.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,13 @@ const testcase = (test, common) => {
assert('1210a Highway 10 W IA', [{ housenumber: '1210a' }, { street: 'Highway 10 W' }, { region: 'IA' }], true)
assert('1210a State Highway 10', [{ housenumber: '1210a' }, { street: 'State Highway 10' }], true)
assert('1389a County Road 42 IA', [{ housenumber: '1389a' }, { street: 'County Road 42' }, { region: 'IA' }], true)
assert('CA 72', [{ street: 'CA 72' }], true)
assert('1210a IA 10 W IA', [{ housenumber: '1210a' }, { street: 'IA 10 W' }, { region: 'IA' }], true)
assert('1210a California 10', [{ housenumber: '1210a' }, { street: 'California 10' }], true)
assert('1389a IA 42 IA', [{ housenumber: '1389a' }, { street: 'IA 42' }, { region: 'IA' }], true)

// This does not work because of MD
// assert('1111 MD 760, Lusby, MD, USA', [{ housenumber: '1111' }, { street: 'MD 760' }, { locality: 'Lusby' }, { region: 'MD' }, { country: 'USA' }], true)
}

module.exports.all = (tape, common) => {
Expand Down