From cebdb2d790a656b949e0f1fcb6893333c63602e1 Mon Sep 17 00:00:00 2001 From: Alexander Zagniotov Date: Thu, 28 Nov 2024 01:01:45 -0800 Subject: [PATCH] SOLR-17575: Fixed broken backwards compatibility with the legacy "langid.whitelist" config in Solr Langid (#2886) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Alexander Zagniotov Co-authored-by: Jan Høydahl --- solr/CHANGES.txt | 2 ++ .../LanguageIdentifierUpdateProcessor.java | 13 +++++----- ...ntifierUpdateProcessorFactoryTestCase.java | 26 +++++++++++++++++++ 3 files changed, 34 insertions(+), 7 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 840de727b00..e32f1f1ac4f 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -227,6 +227,8 @@ Bug Fixes * SOLR-16976: Remove log4j-jul jar and use slf4j bridge for JUL to prevent exception from being logged when remote JMX is enabled (Shawn Heisey, Stephen Zhou, Eric Pugh, Christine Poerschke, David Smiley) +* SOLR-17575: Fixed broken backwards compatibility with the legacy "langid.whitelist" config in Solr Langid. (Jan Høydahl, Alexander Zagniotov) + Dependency Upgrades --------------------- (No changes) diff --git a/solr/modules/langid/src/java/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessor.java b/solr/modules/langid/src/java/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessor.java index 21921440cae..f4f1b9cc83c 100644 --- a/solr/modules/langid/src/java/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessor.java +++ b/solr/modules/langid/src/java/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessor.java @@ -111,8 +111,8 @@ private void initParams(SolrParams params) { overwrite = params.getBool(OVERWRITE, false); langAllowlist = new HashSet<>(); threshold = params.getDouble(THRESHOLD, DOCID_THRESHOLD_DEFAULT); - String legacyAllowList = params.get(LANG_WHITELIST, ""); - if (legacyAllowList.length() > 0) { + final String legacyAllowList = params.get(LANG_WHITELIST, "").trim(); + if (!legacyAllowList.isEmpty()) { // nowarn compile time string concatenation log.warn( LANG_WHITELIST @@ -120,11 +120,10 @@ private void initParams(SolrParams params) { + LANG_ALLOWLIST + " instead."); // nowarn } - if (params.get(LANG_ALLOWLIST, legacyAllowList).length() > 0) { - for (String lang : params.get(LANG_ALLOWLIST, "").split(",")) { - langAllowlist.add(lang); - } - } + Arrays.stream(params.get(LANG_ALLOWLIST, legacyAllowList).split(",")) + .map(String::trim) + .filter(lang -> !lang.isEmpty()) + .forEach(langAllowlist::add); // Mapping params (field centric) enableMapping = params.getBool(MAP_ENABLE, false); diff --git a/solr/modules/langid/src/test/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessorFactoryTestCase.java b/solr/modules/langid/src/test/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessorFactoryTestCase.java index 4d8d398a25c..15e62d11a50 100644 --- a/solr/modules/langid/src/test/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessorFactoryTestCase.java +++ b/solr/modules/langid/src/test/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessorFactoryTestCase.java @@ -18,6 +18,7 @@ import java.util.ArrayList; import java.util.List; +import java.util.Set; import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.params.ModifiableSolrParams; @@ -464,6 +465,31 @@ public void testMapIndividual() throws Exception { assertTrue(mappedIndividual.containsKey("text2_ru")); } + @Test + public void testAllowlist() throws Exception { + ModifiableSolrParams parameters = new ModifiableSolrParams(); + parameters.add("langid.fl", "name,subject"); + parameters.add("langid.langField", "language_s"); + parameters.add("langid.allowlist", "no,en ,, ,sv, sv"); + liProcessor = createLangIdProcessor(parameters); + + // Make sure that empty language codes have been filtered out and others trimmed. + assertEquals(Set.of("no", "en", "sv"), liProcessor.langAllowlist); + } + + @Test + public void testAllowlistBackwardsCompatabilityWithLegacyAllowlist() throws Exception { + // The "legacy allowlist" is "langid.whitelist" + ModifiableSolrParams parameters = new ModifiableSolrParams(); + parameters.add("langid.fl", "name,subject"); + parameters.add("langid.langField", "language_s"); + parameters.add("langid.whitelist", "no,en ,, ,sv, sv"); + liProcessor = createLangIdProcessor(parameters); + + // Make sure that empty language codes have been filtered out and others trimmed. + assertEquals(Set.of("no", "en", "sv"), liProcessor.langAllowlist); + } + // Various utility methods private SolrInputDocument englishDoc() {