From 93cd61379bb5a507306ce49653516f90acd05e64 Mon Sep 17 00:00:00 2001 From: Peter Johnson Date: Thu, 16 May 2019 14:59:28 +0200 Subject: [PATCH] feat(dictionaries): dictionary updates (#22) --- classifier/CompoundStreetClassifier.js | 3 --- classifier/CompoundStreetClassifier.test.js | 2 +- .../libpostal/de/concatenated_suffixes_separable.txt | 2 ++ .../whosonfirst/locality/name:eng_x_preferred.txt | 2 ++ test/intersection.test.js | 8 ++++++++ 5 files changed, 13 insertions(+), 4 deletions(-) create mode 100644 resources/pelias/dictionaries/libpostal/de/concatenated_suffixes_separable.txt diff --git a/classifier/CompoundStreetClassifier.js b/classifier/CompoundStreetClassifier.js index 3451a6fe..07a5ed57 100644 --- a/classifier/CompoundStreetClassifier.js +++ b/classifier/CompoundStreetClassifier.js @@ -15,9 +15,6 @@ class CompoundStreetClassifier extends WordClassifier { // this removes suffixes such as 'r.' which can be ambiguous minlength: 3 }) - - // whitelist - this.suffixes.park = true } each (span) { diff --git a/classifier/CompoundStreetClassifier.test.js b/classifier/CompoundStreetClassifier.test.js index 57d3e7f3..d3583708 100644 --- a/classifier/CompoundStreetClassifier.test.js +++ b/classifier/CompoundStreetClassifier.test.js @@ -25,6 +25,7 @@ module.exports.tests.contains_numerals = (test) => { module.exports.tests.german_compound = (test) => { let valid = [ 'teststraße', 'teststrasse', 'teststr.', + 'teststr', 'grolmanstr', 'testallee', 'testweg', 'testplatz', @@ -32,7 +33,6 @@ module.exports.tests.german_compound = (test) => { ] let invalid = [ - 'teststr', 'testal', 'testw', 'testw.' ] diff --git a/resources/pelias/dictionaries/libpostal/de/concatenated_suffixes_separable.txt b/resources/pelias/dictionaries/libpostal/de/concatenated_suffixes_separable.txt new file mode 100644 index 00000000..86abf105 --- /dev/null +++ b/resources/pelias/dictionaries/libpostal/de/concatenated_suffixes_separable.txt @@ -0,0 +1,2 @@ +str +park \ No newline at end of file diff --git a/resources/pelias/dictionaries/whosonfirst/locality/name:eng_x_preferred.txt b/resources/pelias/dictionaries/whosonfirst/locality/name:eng_x_preferred.txt index 4523d95b..d5ff5e2f 100644 --- a/resources/pelias/dictionaries/whosonfirst/locality/name:eng_x_preferred.txt +++ b/resources/pelias/dictionaries/whosonfirst/locality/name:eng_x_preferred.txt @@ -6,6 +6,8 @@ sf !italy !college !university +!airport +!deli # remove any localities which share a name with a US state !alabama !alaska diff --git a/test/intersection.test.js b/test/intersection.test.js index a977221b..02cd0d67 100644 --- a/test/intersection.test.js +++ b/test/intersection.test.js @@ -94,6 +94,14 @@ const testcase = (test, common) => { [{ street: 'SW 6th' }, { street: 'Pine' }] ]) + // Should not detect this as an intersection + // assert('University of Hawaii at Hilo', [ + // [{ street: 'SW 6th' }, { street: 'Pine' }] + // ]) + // assert('national air and space museum', [ + // [{ street: 'SW 6th' }, { street: 'Pine' }] + // ]) + // Trimet syntax // assert('9,Lambert', [ // [{ street: '9' }, { street: 'Lambert' }]