From 690919e91b809637de519ae3224f6809595d4ca4 Mon Sep 17 00:00:00 2001 From: Julian Simioni Date: Tue, 21 Sep 2021 08:58:48 -0700 Subject: [PATCH] feat(names): Remove variant names from index Who's on First variant names are a useful collection of unofficial names for places, but they tend to be pretty messy. This PR explores the effect of removing them from indexing. While there might be occasionally useful names in there, it seems like the majority are exact or near duplicates of more official names, or names that are so colloquial that they are not particularly useful (do we _really_ need to support returning NYC for queries for "the big apple"?). For reference, here are some variant names for some key places, just to record the kind of data that's in there: NYC: ``` Bigapple NY City NY Cty New York City New York Cty Newyork Newyorkcity Novaiorque Nycity Thebigapple Big Apple ``` San Francisco: ``` S Francisco S. Francisco SFO Sanfran Sanfrancisco Frisco ``` China: ``` China - Peoples Republic China Peoples Rep China, People's Republic Chinese PR China PR of China People's Republic of China Peoples Republic of China ``` --- src/components/extractFields.js | 3 +-- test/components/extractFieldsTest.js | 8 +++----- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/src/components/extractFields.js b/src/components/extractFields.js index 7692f735..4865d8b2 100644 --- a/src/components/extractFields.js +++ b/src/components/extractFields.js @@ -22,12 +22,11 @@ const population_hierarchy = [ // note the '%s' is replaced by a language code const NAME_ALIAS_FIELDS = [ 'name:%s_x_preferred', - 'name:%s_x_variant', 'label:%s_x_preferred_longname', 'label:%s_x_preferred' ]; -const WOF_NAMES_REGEX = /^(name|label):[a-z]{3}_x_(preferred|variant)$/; +const WOF_NAMES_REGEX = /^(name|label):[a-z]{3}_x_preferred$/; // this function is used to verify that a US county QS altname is available function isUsCounty(base_record, wof_country, qs_a2_alt) { diff --git a/test/components/extractFieldsTest.js b/test/components/extractFieldsTest.js index 2128f652..3ca6aea1 100644 --- a/test/components/extractFieldsTest.js +++ b/test/components/extractFieldsTest.js @@ -948,7 +948,7 @@ tape('name alias tests', (test) => { } }]; - const expected_name_aliases = ['preferred1', 'preferred2', 'variant1', 'variant2', 'englabel1', 'englabel2']; + const expected_name_aliases = ['preferred1', 'preferred2', 'englabel1', 'englabel2']; test_stream(input, extractFields.create(), function (err, actual) { t.deepEqual(actual[0].name_aliases, expected_name_aliases, 'name aliases populated from preferred and variant fields'); @@ -974,7 +974,6 @@ tape('name alias tests', (test) => { const expected_name_aliases = [ 'preferred1', 'preferred2', - 'variant1', 'variant2', 'spalabel1', 'spalabel2', 'fralabel1', 'fralabel2', 'englabel1', 'englabel2' @@ -1003,12 +1002,11 @@ tape('multi-lang index test', (test) => { const expected_name_langs = { 'en': ['preferredENG1'], - 'fr': ['preferredFRA1', 'preferredFRA2'], - 'es': ['variantSPA1', 'variantSPA2'] + 'fr': ['preferredFRA1', 'preferredFRA2'] }; test_stream(input, extractFields.create(), function (err, actual) { - t.deepEqual(actual[0].name_langs, expected_name_langs, 'name langs populated from fr preferred and SPA variant fields'); + t.deepEqual(actual[0].name_langs, expected_name_langs, 'name langs populated from fr preferred fields'); t.end(); }); });