-
-
Notifications
You must be signed in to change notification settings - Fork 28
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(toponym): Add EN toponyms for highway streets (#70)
- Loading branch information
Showing
7 changed files
with
127 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
const Classification = require('./Classification') | ||
|
||
class ToponymClassification extends Classification { | ||
constructor (confidence, meta) { | ||
super(confidence, meta) | ||
this.label = 'toponym' | ||
} | ||
} | ||
|
||
module.exports = ToponymClassification |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
const Classification = require('./ToponymClassification') | ||
|
||
module.exports.tests = {} | ||
|
||
module.exports.tests.constructor = (test) => { | ||
test('constructor', (t) => { | ||
let c = new Classification() | ||
t.false(c.public) | ||
t.equals(c.label, 'toponym') | ||
t.equals(c.confidence, 1.0) | ||
t.deepEqual(c.meta, {}) | ||
t.end() | ||
}) | ||
} | ||
|
||
module.exports.all = (tape, common) => { | ||
function test (name, testFunction) { | ||
return tape(`ToponymClassification: ${name}`, testFunction) | ||
} | ||
|
||
for (var testCase in module.exports.tests) { | ||
module.exports.tests[testCase](test, common) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
const WordClassifier = require('./super/WordClassifier') | ||
const ToponymClassification = require('../classification/ToponymClassification') | ||
const libpostal = require('../resources/libpostal/libpostal') | ||
|
||
// dictionaries sourced from the libpostal project | ||
// see: https://github.com/openvenues/libpostal | ||
|
||
class ToponymClassifier extends WordClassifier { | ||
setup () { | ||
// load street tokens | ||
this.index = {} | ||
libpostal.load(this.index, ['en'], 'toponyms.txt') | ||
} | ||
|
||
each (span) { | ||
// skip spans which contain numbers | ||
if (span.contains.numerals) { return } | ||
|
||
// use an inverted index for full token matching as it's O(1) | ||
if (this.index.hasOwnProperty(span.norm)) { | ||
span.classify(new ToponymClassification(1)) | ||
} | ||
} | ||
} | ||
|
||
module.exports = ToponymClassifier |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
const ToponymClassifier = require('./ToponymClassifier') | ||
const ToponymClassification = require('../classification/ToponymClassification') | ||
const Span = require('../tokenization/Span') | ||
const classifier = new ToponymClassifier() | ||
|
||
module.exports.tests = {} | ||
|
||
function classify (body) { | ||
let s = new Span(body) | ||
classifier.each(s, null, 1) | ||
return s | ||
} | ||
|
||
module.exports.tests.contains_numerals = (test) => { | ||
test('contains numerals: honours contains.numerals boolean', (t) => { | ||
let s = new Span('example') | ||
s.contains.numerals = true | ||
classifier.each(s, null, 1) | ||
t.deepEqual(s.classifications, {}) | ||
t.end() | ||
}) | ||
} | ||
|
||
module.exports.tests.single_character_tokens = (test) => { | ||
test('index: does not contain single char tokens', (t) => { | ||
t.false(Object.keys(classifier.index).some(token => token.length < 2)) | ||
t.end() | ||
}) | ||
} | ||
|
||
module.exports.tests.english_suffix = (test) => { | ||
let valid = [ | ||
'md', 'maryland', 'ca', | ||
'california', 'ia', 'nj' | ||
] | ||
|
||
valid.forEach(token => { | ||
test(`english toponyms: ${token}`, (t) => { | ||
let s = classify(token) | ||
|
||
t.deepEqual(s.classifications, { | ||
ToponymClassification: new ToponymClassification(1) | ||
}) | ||
t.end() | ||
}) | ||
}) | ||
} | ||
|
||
module.exports.all = (tape, common) => { | ||
function test (name, testFunction) { | ||
return tape(`ToponymClassifier: ${name}`, testFunction) | ||
} | ||
|
||
for (var testCase in module.exports.tests) { | ||
module.exports.tests[testCase](test, common) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters