From cfb983175c68d5dad98935adfa839181887eaada Mon Sep 17 00:00:00 2001 From: Jason Gerlowski Date: Wed, 5 Jun 2024 11:16:33 -0400 Subject: [PATCH 1/9] SOLR-17195: Add 'minPrefixLength' soft limit Prefix queries have a memory cost that increases in proportion with the number of indexed terms that start with that prefix. This can cause stability problems, especially for short prefixes which can match a large number of indexed-terms. This commit introduces a new solrconfig.xml setting, `minPrefixLength`, similar to the existing `maxBooleanClauses`. This setting causes an error to be thrown any time a prefix query is created with a prefix whose length is less than the configured minumum. This setting is set at '2' by default, disallowing only single-character prefix queries. Still TODO: - ref guide documentation - more error-case testing - handle the faceting invocation - consider whether this should be disable-able on a per request basis --- .../java/org/apache/solr/core/SolrConfig.java | 6 ++++ .../java/org/apache/solr/schema/StrField.java | 32 +++++++++++++++++++ .../search/facet/FacetFieldProcessor.java | 4 +++ .../solr/collection1/conf/solrconfig.xml | 12 +++++++ .../org/apache/solr/ConvertedLegacyTest.java | 4 +-- .../org/apache/solr/core/SolrCoreTest.java | 1 + .../org/apache/solr/search/TestFiltering.java | 2 +- .../solr/search/join/BJQParserTest.java | 1 + .../configsets/_default/conf/solrconfig.xml | 13 ++++++++ 9 files changed, 72 insertions(+), 3 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/core/SolrConfig.java b/solr/core/src/java/org/apache/solr/core/SolrConfig.java index 7fb27347818..680f4eeea48 100644 --- a/solr/core/src/java/org/apache/solr/core/SolrConfig.java +++ b/solr/core/src/java/org/apache/solr/core/SolrConfig.java @@ -103,6 +103,8 @@ public class SolrConfig implements MapSerializable { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); public static final String DEFAULT_CONF_FILE = "solrconfig.xml"; + public static final String MIN_PREFIX_LENGTH = "minPrefixLength"; + public static final int DEFAULT_MIN_PREFIX_LENGTH = 0; private final String resourceName; private int znodeVersion; @@ -291,6 +293,8 @@ private SolrConfig( BooleanQuery.getMaxClauseCount(), "set 'maxBooleanClauses' in solr.xml to increase global limit"); } + prefixQueryMinPrefixLength = + get("query").get(MIN_PREFIX_LENGTH).intVal(DEFAULT_MIN_PREFIX_LENGTH); // Warn about deprecated / discontinued parameters // boolToFilterOptimizer has had no effect since 3.1 @@ -667,6 +671,7 @@ public SolrRequestParsers getRequestParsers() { /* The set of materialized parameters: */ public final int booleanQueryMaxClauseCount; + public final int prefixQueryMinPrefixLength; // SolrIndexSearcher - nutch optimizer -- Disabled since 3.1 // public final boolean filtOptEnabled; // public final int filtOptCacheSize; @@ -1018,6 +1023,7 @@ public Map toMap(Map result) { m.put("queryResultMaxDocsCached", queryResultMaxDocsCached); m.put("enableLazyFieldLoading", enableLazyFieldLoading); m.put("maxBooleanClauses", booleanQueryMaxClauseCount); + m.put(MIN_PREFIX_LENGTH, prefixQueryMinPrefixLength); for (SolrPluginInfo plugin : plugins) { List infos = getPluginInfos(plugin.clazz.getName()); diff --git a/solr/core/src/java/org/apache/solr/schema/StrField.java b/solr/core/src/java/org/apache/solr/schema/StrField.java index 114de6bcaca..2e67150dfca 100644 --- a/solr/core/src/java/org/apache/solr/schema/StrField.java +++ b/solr/core/src/java/org/apache/solr/schema/StrField.java @@ -20,17 +20,21 @@ import java.util.ArrayList; import java.util.Collections; import java.util.List; +import java.util.Locale; import java.util.Map; import org.apache.lucene.document.SortedDocValuesField; import org.apache.lucene.document.SortedSetDocValuesField; import org.apache.lucene.index.IndexableField; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.valuesource.SortedSetFieldSource; +import org.apache.lucene.search.PrefixQuery; +import org.apache.lucene.search.Query; import org.apache.lucene.search.SortField; import org.apache.lucene.search.SortedSetSelector; import org.apache.lucene.util.BytesRef; import org.apache.solr.common.SolrException; import org.apache.solr.common.util.ByteArrayUtf8CharSequence; +import org.apache.solr.core.SolrConfig; import org.apache.solr.response.TextResponseWriter; import org.apache.solr.search.QParser; import org.apache.solr.uninverting.UninvertingReader.Type; @@ -68,6 +72,34 @@ public List createFields(SchemaField field, Object value) { return Collections.singletonList(fval); } + @Override + public Query getPrefixQuery(QParser parser, SchemaField sf, String termStr) { + final var query = super.getPrefixQuery(parser, sf, termStr); + + // Some internal usage (e.g. faceting) creates PrefixQueries without a surrounding QParser, so + // check for null here before using QParser to access the limit value + if (query instanceof PrefixQuery && parser != null) { + final var minPrefixLength = + parser.getReq().getCore().getSolrConfig().prefixQueryMinPrefixLength; + // TODO Should we provide a query-param to disable the limit on a request-by-request basis? I + // can imagine scenarios where advanced users may want to enforce the limit on most fields, + // but ignore it for a few fields that they know to be low-cardinality and therefore "less + // risky" + if (termStr.length() < minPrefixLength) { + final var message = + String.format( + Locale.ROOT, + "Query [%s] does not meet the minimum prefix length [%d] (actual=[%d]). Please try with a larger prefix, or adjust %s in your solrconfig.xml", + query, + minPrefixLength, + termStr.length(), + SolrConfig.MIN_PREFIX_LENGTH); + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, message); + } + } + return query; + } + public static BytesRef getBytesRef(Object value) { if (value instanceof ByteArrayUtf8CharSequence) { ByteArrayUtf8CharSequence utf8 = (ByteArrayUtf8CharSequence) value; diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessor.java b/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessor.java index 7e4cd041ea6..ff3a2553ca4 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessor.java +++ b/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessor.java @@ -544,6 +544,10 @@ protected Query makeBucketQuery(final String bucketValue) { private void calculateNumBuckets(SimpleOrderedMap target) throws IOException { DocSet domain = fcontext.base; if (freq.prefix != null) { + // TODO - Should we enforce minPrefixLength here in the case of 'string' fields, or omit + // since this is an "internal" request? If we want to enforce the limit in this case, + // we should have StrField read the configured limit and cache it in 'init' so that it can + // be read at 'getPrefixQuery' call-time without a QParser. Query prefixFilter = sf.getType().getPrefixQuery(null, sf, freq.prefix); domain = fcontext.searcher.getDocSet(prefixFilter, domain); } diff --git a/solr/core/src/test-files/solr/collection1/conf/solrconfig.xml b/solr/core/src/test-files/solr/collection1/conf/solrconfig.xml index 82dca6384d8..b973bccc792 100644 --- a/solr/core/src/test-files/solr/collection1/conf/solrconfig.xml +++ b/solr/core/src/test-files/solr/collection1/conf/solrconfig.xml @@ -89,6 +89,18 @@ --> ${solr.max.booleanClauses:1024} + + ${solr.min.prefixLength:2} + diff --git a/solr/core/src/test/org/apache/solr/ConvertedLegacyTest.java b/solr/core/src/test/org/apache/solr/ConvertedLegacyTest.java index fdbc3c7ec14..5c9507d9f9a 100644 --- a/solr/core/src/test/org/apache/solr/ConvertedLegacyTest.java +++ b/solr/core/src/test/org/apache/solr/ConvertedLegacyTest.java @@ -637,8 +637,8 @@ public void testABunchOfConvertedStuff() { "107port"); assertU(""); - assertQ(req("val_s:a*"), "//*[@numFound='3']"); - assertQ(req("val_s:p*"), "//*[@numFound='4']"); + assertQ(req("val_s:ap*"), "//*[@numFound='3']"); + assertQ(req("val_s:pe*"), "//*[@numFound='3']"); // val_s:* %//*[@numFound="8"] // test wildcard query diff --git a/solr/core/src/test/org/apache/solr/core/SolrCoreTest.java b/solr/core/src/test/org/apache/solr/core/SolrCoreTest.java index 28cfbc70cae..bb92f561da9 100644 --- a/solr/core/src/test/org/apache/solr/core/SolrCoreTest.java +++ b/solr/core/src/test/org/apache/solr/core/SolrCoreTest.java @@ -298,6 +298,7 @@ public void testConfiguration() { assertEquals( "wrong config for slowQueryThresholdMillis", 2000, solrConfig.slowQueryThresholdMillis); assertEquals("wrong config for maxBooleanClauses", 1024, solrConfig.booleanQueryMaxClauseCount); + assertEquals("wrong config for minPrefixLength", 2, solrConfig.prefixQueryMinPrefixLength); assertTrue("wrong config for enableLazyFieldLoading", solrConfig.enableLazyFieldLoading); assertEquals("wrong config for queryResultWindowSize", 10, solrConfig.queryResultWindowSize); } diff --git a/solr/core/src/test/org/apache/solr/search/TestFiltering.java b/solr/core/src/test/org/apache/solr/search/TestFiltering.java index e9194e170f9..8c392f1a0ff 100644 --- a/solr/core/src/test/org/apache/solr/search/TestFiltering.java +++ b/solr/core/src/test/org/apache/solr/search/TestFiltering.java @@ -64,7 +64,7 @@ public void testLiveDocsSharing() throws Exception { String[] queries = { "foo_s:foo", - "foo_s:f*", + "foo_s:fo*", "*:*", "id:[* TO *]", "id:[0 TO 99]", diff --git a/solr/core/src/test/org/apache/solr/search/join/BJQParserTest.java b/solr/core/src/test/org/apache/solr/search/join/BJQParserTest.java index 5fcb7455187..ca52afceab8 100644 --- a/solr/core/src/test/org/apache/solr/search/join/BJQParserTest.java +++ b/solr/core/src/test/org/apache/solr/search/join/BJQParserTest.java @@ -55,6 +55,7 @@ public class BJQParserTest extends SolrTestCaseJ4 { @BeforeClass public static void beforeClass() throws Exception { + System.setProperty("solr.min.prefixLength", String.valueOf(1)); initCore("solrconfig.xml", "schema15.xml"); createIndex(); } diff --git a/solr/server/solr/configsets/_default/conf/solrconfig.xml b/solr/server/solr/configsets/_default/conf/solrconfig.xml index 5b24094179b..1a416d83fef 100644 --- a/solr/server/solr/configsets/_default/conf/solrconfig.xml +++ b/solr/server/solr/configsets/_default/conf/solrconfig.xml @@ -360,6 +360,19 @@ --> ${solr.max.booleanClauses:1024} + + + ${solr.min.prefixLength:2} + From c565a8559599591b22ec01101d437dce4d7f50bb Mon Sep 17 00:00:00 2001 From: Jason Gerlowski Date: Wed, 5 Jun 2024 13:52:11 -0400 Subject: [PATCH 2/9] Unit tests Also expands the setting to govern text field searches as well. --- .../java/org/apache/solr/schema/StrField.java | 19 +--- .../org/apache/solr/schema/TextField.java | 16 +++ .../search/ComplexPhraseQParserPlugin.java | 14 +++ .../org/apache/solr/search/QueryUtils.java | 21 ++++ .../apache/solr/search/PrefixQueryTest.java | 106 ++++++++++++++++++ 5 files changed, 159 insertions(+), 17 deletions(-) create mode 100644 solr/core/src/test/org/apache/solr/search/PrefixQueryTest.java diff --git a/solr/core/src/java/org/apache/solr/schema/StrField.java b/solr/core/src/java/org/apache/solr/schema/StrField.java index 2e67150dfca..4a0df2169ef 100644 --- a/solr/core/src/java/org/apache/solr/schema/StrField.java +++ b/solr/core/src/java/org/apache/solr/schema/StrField.java @@ -20,7 +20,6 @@ import java.util.ArrayList; import java.util.Collections; import java.util.List; -import java.util.Locale; import java.util.Map; import org.apache.lucene.document.SortedDocValuesField; import org.apache.lucene.document.SortedSetDocValuesField; @@ -34,9 +33,9 @@ import org.apache.lucene.util.BytesRef; import org.apache.solr.common.SolrException; import org.apache.solr.common.util.ByteArrayUtf8CharSequence; -import org.apache.solr.core.SolrConfig; import org.apache.solr.response.TextResponseWriter; import org.apache.solr.search.QParser; +import org.apache.solr.search.QueryUtils; import org.apache.solr.uninverting.UninvertingReader.Type; public class StrField extends PrimitiveFieldType { @@ -81,21 +80,7 @@ public Query getPrefixQuery(QParser parser, SchemaField sf, String termStr) { if (query instanceof PrefixQuery && parser != null) { final var minPrefixLength = parser.getReq().getCore().getSolrConfig().prefixQueryMinPrefixLength; - // TODO Should we provide a query-param to disable the limit on a request-by-request basis? I - // can imagine scenarios where advanced users may want to enforce the limit on most fields, - // but ignore it for a few fields that they know to be low-cardinality and therefore "less - // risky" - if (termStr.length() < minPrefixLength) { - final var message = - String.format( - Locale.ROOT, - "Query [%s] does not meet the minimum prefix length [%d] (actual=[%d]). Please try with a larger prefix, or adjust %s in your solrconfig.xml", - query, - minPrefixLength, - termStr.length(), - SolrConfig.MIN_PREFIX_LENGTH); - throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, message); - } + QueryUtils.ensurePrefixQueryObeysMinimumPrefixLength(query, termStr, minPrefixLength); } return query; } diff --git a/solr/core/src/java/org/apache/solr/schema/TextField.java b/solr/core/src/java/org/apache/solr/schema/TextField.java index 549b10c4cdf..bbff63a6bec 100644 --- a/solr/core/src/java/org/apache/solr/schema/TextField.java +++ b/solr/core/src/java/org/apache/solr/schema/TextField.java @@ -26,6 +26,7 @@ import org.apache.lucene.index.Term; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.valuesource.SortedSetFieldSource; +import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.SortField; import org.apache.lucene.search.SortedSetSelector; @@ -38,6 +39,7 @@ import org.apache.solr.query.SolrRangeQuery; import org.apache.solr.response.TextResponseWriter; import org.apache.solr.search.QParser; +import org.apache.solr.search.QueryUtils; import org.apache.solr.uninverting.UninvertingReader.Type; /** @@ -165,6 +167,20 @@ public Query getFieldTermQuery(QParser parser, SchemaField field, String externa return new TermQuery(new Term(field.getName(), br)); } + @Override + public Query getPrefixQuery(QParser parser, SchemaField sf, String termStr) { + final var query = super.getPrefixQuery(parser, sf, termStr); + + // Some internal usage (e.g. faceting) creates PrefixQueries without a surrounding QParser, so + // check for null here before using QParser to access the limit value + if (query instanceof PrefixQuery && parser != null) { + final var minPrefixLength = + parser.getReq().getCore().getSolrConfig().prefixQueryMinPrefixLength; + QueryUtils.ensurePrefixQueryObeysMinimumPrefixLength(query, termStr, minPrefixLength); + } + return query; + } + @Override public Object toObject(SchemaField sf, BytesRef term) { return term.utf8ToString(); diff --git a/solr/core/src/java/org/apache/solr/search/ComplexPhraseQParserPlugin.java b/solr/core/src/java/org/apache/solr/search/ComplexPhraseQParserPlugin.java index 9d8210a8c2e..6f6ed504285 100644 --- a/solr/core/src/java/org/apache/solr/search/ComplexPhraseQParserPlugin.java +++ b/solr/core/src/java/org/apache/solr/search/ComplexPhraseQParserPlugin.java @@ -19,6 +19,7 @@ import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.queryparser.complexPhrase.ComplexPhraseQueryParser; import org.apache.lucene.search.MultiTermQuery; +import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.search.Query; import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.SolrParams; @@ -134,6 +135,19 @@ protected Query newWildcardQuery(org.apache.lucene.index.Term t) { } } + @Override + protected Query getPrefixQuery(String field, String termStr) throws ParseException { + // TODO check the field type and call QueryUtils.ensureBlah here + final var query = super.getPrefixQuery(field, termStr); + if (query instanceof PrefixQuery) { + final var minPrefixLength = + getReq().getCore().getSolrConfig().prefixQueryMinPrefixLength; + QueryUtils.ensurePrefixQueryObeysMinimumPrefixLength( + query, termStr, minPrefixLength); + } + return query; + } + private Query setRewriteMethod(org.apache.lucene.search.Query query) { if (query instanceof MultiTermQuery) { ((MultiTermQuery) query) diff --git a/solr/core/src/java/org/apache/solr/search/QueryUtils.java b/solr/core/src/java/org/apache/solr/search/QueryUtils.java index fa49ef91bc9..65e6e6d3468 100644 --- a/solr/core/src/java/org/apache/solr/search/QueryUtils.java +++ b/solr/core/src/java/org/apache/solr/search/QueryUtils.java @@ -21,6 +21,7 @@ import java.util.Collections; import java.util.IdentityHashMap; import java.util.List; +import java.util.Locale; import java.util.Map; import java.util.Set; import org.apache.lucene.search.BooleanClause; @@ -34,6 +35,7 @@ import org.apache.lucene.search.Query; import org.apache.solr.common.SolrException; import org.apache.solr.common.params.CommonParams; +import org.apache.solr.core.SolrConfig; import org.apache.solr.request.SolrQueryRequest; /** */ @@ -83,6 +85,25 @@ public static boolean isConstantScoreQuery(Query q) { } } + public static void ensurePrefixQueryObeysMinimumPrefixLength( + Query query, String prefix, int minPrefixLength) { + // TODO Should we provide a query-param to disable the limit on a request-by-request basis? I + // can imagine scenarios where advanced users may want to enforce the limit on most fields, + // but ignore it for a few fields that they know to be low-cardinality and therefore "less + // risky" + if (prefix.length() < minPrefixLength) { + final var message = + String.format( + Locale.ROOT, + "Query [%s] does not meet the minimum prefix length [%d] (actual=[%d]). Please try with a larger prefix, or adjust %s in your solrconfig.xml", + query, + minPrefixLength, + prefix.length(), + SolrConfig.MIN_PREFIX_LENGTH); + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, message); + } + } + /** * Returns the original query if it was already a positive query, otherwise return the negative of * the query (i.e., a positive query). diff --git a/solr/core/src/test/org/apache/solr/search/PrefixQueryTest.java b/solr/core/src/test/org/apache/solr/search/PrefixQueryTest.java new file mode 100644 index 00000000000..9461049c3a3 --- /dev/null +++ b/solr/core/src/test/org/apache/solr/search/PrefixQueryTest.java @@ -0,0 +1,106 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.search; + +import org.apache.solr.SolrTestCaseJ4; +import org.apache.solr.common.SolrException; +import org.junit.BeforeClass; +import org.junit.Test; + +/** + * Unit tests for prefix-query functionality - mostly testing the 'minPrefixLength' setting + * available in solrconfig.xml + */ +public class PrefixQueryTest extends SolrTestCaseJ4 { + + private static final String[] MIN_PREFIX_SUPPORTING_FIELDS = new String[] {"val_s", "t_val"}; + + @BeforeClass + public static void beforeTests() throws Exception { + initCore("solrconfig.xml", "schema.xml"); + + assertU(createDocWithFieldVal("1", "aaa")); + assertU(createDocWithFieldVal("2", "aab")); + assertU(createDocWithFieldVal("3", "aac")); + assertU(createDocWithFieldVal("4", "abc")); + + assertU(createDocWithFieldVal("5", "bbb")); + assertU(createDocWithFieldVal("6", "bbc")); + + assertU(""); + } + + // Sanity-check of a few queries we'll use in other tests + @Test + public void testPrefixQueryMatchesExpectedDocuments() { + for (String fieldName : MIN_PREFIX_SUPPORTING_FIELDS) { + assertQ(req(fieldName + ":*"), "//*[@numFound='6']"); + assertQ(req(fieldName + ":aa*"), "//*[@numFound='3']"); + assertQ(req(fieldName + ":bb*"), "//*[@numFound='2']"); + } + } + + @Test + public void testPrefixQueryObeysMinPrefixLimit() { + for (String fieldName : MIN_PREFIX_SUPPORTING_FIELDS) { + assertQEx( + "Prefix query didn't obey limit", + "does not meet the minimum prefix length [2] (actual=[1])", + req(fieldName + ":a*"), + SolrException.ErrorCode.BAD_REQUEST); + } + } + + @Test + public void testPrefixQParserObeysMinPrefixLimit() { + for (String fieldName : MIN_PREFIX_SUPPORTING_FIELDS) { + assertQEx( + "Prefix query didn't obey limit", + "does not meet the minimum prefix length [2] (actual=[1])", + req("q", "{!prefix f=" + fieldName + "}a"), + SolrException.ErrorCode.BAD_REQUEST); + } + } + + @Test + public void testComplexPhraseQParserObeysMinPrefixLimit() { + for (String fieldName : MIN_PREFIX_SUPPORTING_FIELDS) { + assertQEx( + "{!complex} query didn't obey min-prefix limit", + "does not meet the minimum prefix length [2] (actual=[1])", + req("q", "{!complexphrase inOrder=true}" + fieldName + ":\"a*\""), + SolrException.ErrorCode.BAD_REQUEST); + } + } + + @Test + public void testQuestionMarkWildcardsCountTowardsMinimumPrefix() { + // Both of these queries succeed since the '?' wildcard is counted as a part of the prefix + assertQ(req("val_s:a?c*"), "//*[@numFound='2']"); // Matches 'aac' and 'abc' + assertQ(req("val_s:a??*"), "//*[@numFound='4']"); // Matches all documents starting with 'a' + } + + private static String createDocWithFieldVal(String id, String fieldVal) { + return "" + + id + + "" + + fieldVal + + "" + + fieldVal + + ""; + } +} From 3902a785d1cb5f16e81a9dbd100febe1e1f82786 Mon Sep 17 00:00:00 2001 From: Jason Gerlowski Date: Wed, 5 Jun 2024 14:19:10 -0400 Subject: [PATCH 3/9] Minor test fixes --- .../apache/solr/search/TestComplexPhraseQParserPlugin.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/solr/core/src/test/org/apache/solr/search/TestComplexPhraseQParserPlugin.java b/solr/core/src/test/org/apache/solr/search/TestComplexPhraseQParserPlugin.java index 241cdf7542e..61aa3dbb7ec 100644 --- a/solr/core/src/test/org/apache/solr/search/TestComplexPhraseQParserPlugin.java +++ b/solr/core/src/test/org/apache/solr/search/TestComplexPhraseQParserPlugin.java @@ -57,7 +57,7 @@ public void testDefaultField() { "//doc[./str[@name='id']='1']"); assertQ( - req("q", "{!complexphrase} \"j* smyth~\""), + req("q", "{!complexphrase} \"jo* smyth~\""), "//result[@numFound='2']", "//doc[./str[@name='id']='1']", "//doc[./str[@name='id']='2']"); @@ -121,7 +121,7 @@ public void test() { assertQ( "wildcards and fuzzies are OK in phrases", - sumLRF.makeRequest("name:\"j* smyth~\""), + sumLRF.makeRequest("name:\"jo* smyth~\""), "//doc[./str[@name='id']='1']", "//doc[./str[@name='id']='2']", "//result[@numFound='2']"); @@ -252,7 +252,7 @@ public void testMultipleFields() { "//doc[./str[@name='id']='3']"); assertQ( - req("q", "{!complexphrase} name:\"prot* dige*\" AND text:\"d* r*\""), + req("q", "{!complexphrase} name:\"prot* dige*\" AND text:\"dn* ru*\""), "//result[@numFound='1']", "//doc[./str[@name='id']='3']"); From b518f4b0b99601ff7b7a211b66138ec7bf069958 Mon Sep 17 00:00:00 2001 From: Jason Gerlowski Date: Wed, 5 Jun 2024 15:02:30 -0400 Subject: [PATCH 4/9] Add documentation for minPrefixLength --- .../configuration-guide/pages/caches-warming.adoc | 12 ++++++++++++ .../modules/query-guide/pages/other-parsers.adoc | 12 ++++++++---- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/solr/solr-ref-guide/modules/configuration-guide/pages/caches-warming.adoc b/solr/solr-ref-guide/modules/configuration-guide/pages/caches-warming.adoc index e0536003b49..d00000e8909 100644 --- a/solr/solr-ref-guide/modules/configuration-guide/pages/caches-warming.adoc +++ b/solr/solr-ref-guide/modules/configuration-guide/pages/caches-warming.adoc @@ -235,6 +235,18 @@ This is the same system property used in the xref:configuring-solr-xml.adoc#glob ${solr.max.booleanClauses:1024} ---- +=== Element + +Prefix-based queries consume resources proportional to the number of terms in the index that start with the specified prefix. +Particularly short prefixes, those with only one or two characters for instance, tend to match such a large proportion of the index that they're a frequent cause of resource contention and instability. +This setting establishes a minimum prefix length for queries, giving administrators a way to block queries that might otherwise cause stability issues. +Queries that don't match this minimum prefix length trigger an error. + +The setting aims to govern all prefix-based queries (e.g. `val_s:a*`, `{!prefix f=val_s}a`, `{!complexphrase}val_s:"a*"`) that target `string` or `text` type fields. + +In the default configset, the minimum-prefix is set to '2' (or the value of the `solr.min.prefixLength` system property if specified). +If not specified in `solrconfig.xml`, no minimum-length is enforced. + === Element When this parameter is set to `true`, fields that are not directly requested will be loaded only as needed. diff --git a/solr/solr-ref-guide/modules/query-guide/pages/other-parsers.adoc b/solr/solr-ref-guide/modules/query-guide/pages/other-parsers.adoc index 0fc92b17268..1bb9316f563 100644 --- a/solr/solr-ref-guide/modules/query-guide/pages/other-parsers.adoc +++ b/solr/solr-ref-guide/modules/query-guide/pages/other-parsers.adoc @@ -229,21 +229,25 @@ A mix of ordered and unordered complex phrase queries: Performance is sensitive to the number of unique terms that are associated with a pattern. For instance, searching for "a*" will form a large OR clause (technically a SpanOr with many terms) for all of the terms in your index for the indicated field that start with the single letter 'a'. It may be prudent to restrict wildcards to at least two or preferably three letters as a prefix. -Allowing very short prefixes may result in to many low-quality documents being returned. +Allowing very short prefixes may result in too many low-quality documents being returned. Notice that it also supports leading wildcards "*a" as well with consequent performance implications. Applying xref:indexing-guide:filters.adoc#reversed-wildcard-filter[ReversedWildcardFilterFactory] in index-time analysis is usually a good idea. -==== MaxBooleanClauses with Complex Phrase Parser +==== Query Settings and Complex Phrase Parser -You may need to increase MaxBooleanClauses in `solrconfig.xml` as a result of the term expansion above: +Due to the query-expansion described above, this parser may produce queries that run afoul of several `solrconfig.xml` settings. + +Particularly relevant are `maxBooleanClauses` and `minPrefixLength`, two safeguards that Solr provides in order to curb overly resource-intensive queries. [source,xml] ---- 4096 +1 ---- -This property is described in more detail in the section xref:configuration-guide:caches-warming.adoc#query-sizing-and-warming[Query Sizing and Warming]. +Both properties are described in more detail in the section xref:configuration-guide:caches-warming.adoc#query-sizing-and-warming[Query Sizing and Warming]. +Administrators should consider the performance tradeoffs carefully when making changes to support "Complex Phrase" queries. ==== Stopwords with Complex Phrase Parser From c03afc9efdbfe23cd6f7d01fe3180ce3660dc487 Mon Sep 17 00:00:00 2001 From: Jason Gerlowski Date: Wed, 3 Jul 2024 10:06:58 -0400 Subject: [PATCH 5/9] Address review comments, rd 1 --- .../java/org/apache/solr/core/SolrConfig.java | 10 +++--- .../org/apache/solr/schema/FieldType.java | 1 + .../java/org/apache/solr/schema/StrField.java | 17 --------- .../org/apache/solr/schema/TextField.java | 16 --------- .../search/ComplexPhraseQParserPlugin.java | 11 ++---- .../java/org/apache/solr/search/QParser.java | 13 +++++++ .../org/apache/solr/search/QueryUtils.java | 35 +++++++++++++++---- .../solr/collection1/conf/solrconfig.xml | 9 +++-- .../org/apache/solr/core/SolrCoreTest.java | 2 +- .../apache/solr/search/PrefixQueryTest.java | 32 ++++++++++++++--- .../configsets/_default/conf/solrconfig.xml | 10 +++--- 11 files changed, 92 insertions(+), 64 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/core/SolrConfig.java b/solr/core/src/java/org/apache/solr/core/SolrConfig.java index 2e06f28f5e0..173a44b9ba2 100644 --- a/solr/core/src/java/org/apache/solr/core/SolrConfig.java +++ b/solr/core/src/java/org/apache/solr/core/SolrConfig.java @@ -103,8 +103,8 @@ public class SolrConfig implements MapSerializable { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); public static final String DEFAULT_CONF_FILE = "solrconfig.xml"; - public static final String MIN_PREFIX_LENGTH = "minPrefixLength"; - public static final int DEFAULT_MIN_PREFIX_LENGTH = 0; + public static final String MIN_PREFIX_QUERY_TERM_LENGTH = "minPrefixQueryTermLength"; + public static final int DEFAULT_MIN_PREFIX_QUERY_TERM_LENGTH = -1; private final String resourceName; private int znodeVersion; @@ -292,7 +292,9 @@ private SolrConfig( "set 'maxBooleanClauses' in solr.xml to increase global limit"); } prefixQueryMinPrefixLength = - get("query").get(MIN_PREFIX_LENGTH).intVal(DEFAULT_MIN_PREFIX_LENGTH); + get("query") + .get(MIN_PREFIX_QUERY_TERM_LENGTH) + .intVal(DEFAULT_MIN_PREFIX_QUERY_TERM_LENGTH); // Warn about deprecated / discontinued parameters // boolToFilterOptimizer has had no effect since 3.1 @@ -1024,7 +1026,7 @@ public Map toMap(Map result) { m.put("queryResultMaxDocsCached", queryResultMaxDocsCached); m.put("enableLazyFieldLoading", enableLazyFieldLoading); m.put("maxBooleanClauses", booleanQueryMaxClauseCount); - m.put(MIN_PREFIX_LENGTH, prefixQueryMinPrefixLength); + m.put(MIN_PREFIX_QUERY_TERM_LENGTH, prefixQueryMinPrefixLength); for (SolrPluginInfo plugin : plugins) { List infos = getPluginInfos(plugin.clazz.getName()); diff --git a/solr/core/src/java/org/apache/solr/schema/FieldType.java b/solr/core/src/java/org/apache/solr/schema/FieldType.java index 3e860680122..0667c40a6d8 100644 --- a/solr/core/src/java/org/apache/solr/schema/FieldType.java +++ b/solr/core/src/java/org/apache/solr/schema/FieldType.java @@ -483,6 +483,7 @@ public Query getPrefixQuery(QParser parser, SchemaField sf, String termStr) { } PrefixQuery query = new PrefixQuery(new Term(sf.getName(), termStr)); query.setRewriteMethod(sf.getType().getRewriteMethod(parser, sf)); + QueryUtils.ensurePrefixQueryObeysMinimumPrefixLength(parser, query, termStr); return query; } diff --git a/solr/core/src/java/org/apache/solr/schema/StrField.java b/solr/core/src/java/org/apache/solr/schema/StrField.java index 4a0df2169ef..114de6bcaca 100644 --- a/solr/core/src/java/org/apache/solr/schema/StrField.java +++ b/solr/core/src/java/org/apache/solr/schema/StrField.java @@ -26,8 +26,6 @@ import org.apache.lucene.index.IndexableField; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.valuesource.SortedSetFieldSource; -import org.apache.lucene.search.PrefixQuery; -import org.apache.lucene.search.Query; import org.apache.lucene.search.SortField; import org.apache.lucene.search.SortedSetSelector; import org.apache.lucene.util.BytesRef; @@ -35,7 +33,6 @@ import org.apache.solr.common.util.ByteArrayUtf8CharSequence; import org.apache.solr.response.TextResponseWriter; import org.apache.solr.search.QParser; -import org.apache.solr.search.QueryUtils; import org.apache.solr.uninverting.UninvertingReader.Type; public class StrField extends PrimitiveFieldType { @@ -71,20 +68,6 @@ public List createFields(SchemaField field, Object value) { return Collections.singletonList(fval); } - @Override - public Query getPrefixQuery(QParser parser, SchemaField sf, String termStr) { - final var query = super.getPrefixQuery(parser, sf, termStr); - - // Some internal usage (e.g. faceting) creates PrefixQueries without a surrounding QParser, so - // check for null here before using QParser to access the limit value - if (query instanceof PrefixQuery && parser != null) { - final var minPrefixLength = - parser.getReq().getCore().getSolrConfig().prefixQueryMinPrefixLength; - QueryUtils.ensurePrefixQueryObeysMinimumPrefixLength(query, termStr, minPrefixLength); - } - return query; - } - public static BytesRef getBytesRef(Object value) { if (value instanceof ByteArrayUtf8CharSequence) { ByteArrayUtf8CharSequence utf8 = (ByteArrayUtf8CharSequence) value; diff --git a/solr/core/src/java/org/apache/solr/schema/TextField.java b/solr/core/src/java/org/apache/solr/schema/TextField.java index bbff63a6bec..549b10c4cdf 100644 --- a/solr/core/src/java/org/apache/solr/schema/TextField.java +++ b/solr/core/src/java/org/apache/solr/schema/TextField.java @@ -26,7 +26,6 @@ import org.apache.lucene.index.Term; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.valuesource.SortedSetFieldSource; -import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.SortField; import org.apache.lucene.search.SortedSetSelector; @@ -39,7 +38,6 @@ import org.apache.solr.query.SolrRangeQuery; import org.apache.solr.response.TextResponseWriter; import org.apache.solr.search.QParser; -import org.apache.solr.search.QueryUtils; import org.apache.solr.uninverting.UninvertingReader.Type; /** @@ -167,20 +165,6 @@ public Query getFieldTermQuery(QParser parser, SchemaField field, String externa return new TermQuery(new Term(field.getName(), br)); } - @Override - public Query getPrefixQuery(QParser parser, SchemaField sf, String termStr) { - final var query = super.getPrefixQuery(parser, sf, termStr); - - // Some internal usage (e.g. faceting) creates PrefixQueries without a surrounding QParser, so - // check for null here before using QParser to access the limit value - if (query instanceof PrefixQuery && parser != null) { - final var minPrefixLength = - parser.getReq().getCore().getSolrConfig().prefixQueryMinPrefixLength; - QueryUtils.ensurePrefixQueryObeysMinimumPrefixLength(query, termStr, minPrefixLength); - } - return query; - } - @Override public Object toObject(SchemaField sf, BytesRef term) { return term.utf8ToString(); diff --git a/solr/core/src/java/org/apache/solr/search/ComplexPhraseQParserPlugin.java b/solr/core/src/java/org/apache/solr/search/ComplexPhraseQParserPlugin.java index 6f6ed504285..933fc7e2205 100644 --- a/solr/core/src/java/org/apache/solr/search/ComplexPhraseQParserPlugin.java +++ b/solr/core/src/java/org/apache/solr/search/ComplexPhraseQParserPlugin.java @@ -19,7 +19,6 @@ import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.queryparser.complexPhrase.ComplexPhraseQueryParser; import org.apache.lucene.search.MultiTermQuery; -import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.search.Query; import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.SolrParams; @@ -120,6 +119,7 @@ public Query parse() throws SyntaxError { String defaultField = getParam(CommonParams.DF); SolrQueryParserDelegate reverseAwareParser = new SolrQueryParserDelegate(this, defaultField); + final var qParserReference = this; lparser = new ComplexPhraseQueryParser(defaultField, getReq().getSchema().getQueryAnalyzer()) { @@ -137,14 +137,9 @@ protected Query newWildcardQuery(org.apache.lucene.index.Term t) { @Override protected Query getPrefixQuery(String field, String termStr) throws ParseException { - // TODO check the field type and call QueryUtils.ensureBlah here final var query = super.getPrefixQuery(field, termStr); - if (query instanceof PrefixQuery) { - final var minPrefixLength = - getReq().getCore().getSolrConfig().prefixQueryMinPrefixLength; - QueryUtils.ensurePrefixQueryObeysMinimumPrefixLength( - query, termStr, minPrefixLength); - } + QueryUtils.ensurePrefixQueryObeysMinimumPrefixLength( + qParserReference, query, termStr); return query; } diff --git a/solr/core/src/java/org/apache/solr/search/QParser.java b/solr/core/src/java/org/apache/solr/search/QParser.java index ab49d7b53b5..e6723cff3bf 100644 --- a/solr/core/src/java/org/apache/solr/search/QParser.java +++ b/solr/core/src/java/org/apache/solr/search/QParser.java @@ -28,6 +28,7 @@ import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.StrUtils; +import org.apache.solr.core.SolrConfig; import org.apache.solr.request.SolrQueryRequest; /** @@ -311,6 +312,18 @@ public void addDebugInfo(NamedList debugInfo) { debugInfo.add("QParser", this.getClass().getSimpleName()); } + public int getPrefixQueryMinPrefixLength() { + final var localLimit = + getLocalParams() != null + ? getLocalParams().getInt(SolrConfig.MIN_PREFIX_QUERY_TERM_LENGTH) + : null; + if (localLimit != null) { + return localLimit; + } + + return getReq().getCore().getSolrConfig().prefixQueryMinPrefixLength; + } + /** * Create a {@link QParser} to parse qstr, using the "lucene" * (QParserPlugin.DEFAULT_QTYPE) query parser. The query parser may be overridden by local-params diff --git a/solr/core/src/java/org/apache/solr/search/QueryUtils.java b/solr/core/src/java/org/apache/solr/search/QueryUtils.java index 65e6e6d3468..ba02c34819c 100644 --- a/solr/core/src/java/org/apache/solr/search/QueryUtils.java +++ b/solr/core/src/java/org/apache/solr/search/QueryUtils.java @@ -32,6 +32,7 @@ import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.MatchNoDocsQuery; +import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.search.Query; import org.apache.solr.common.SolrException; import org.apache.solr.common.params.CommonParams; @@ -85,12 +86,34 @@ public static boolean isConstantScoreQuery(Query q) { } } + public static final int NO_PREFIX_QUERY_LENGTH_LIMIT = -1; + + /** + * Validates that a provided prefix query obeys any limits (if configured) on the minimum + * allowable prefix size + * + *

The limit is retrieved from the provided QParser (see {@link + * QParser#getPrefixQueryMinPrefixLength()} for the default implementation). + * + * @param parser the QParser used to parse the query being validated. No limit will be enforced if + * 'null' + * @param query the query to validate. Limits will only be enforced if this is a {@link + * PrefixQuery} + * @param prefix a String term included in the provided query. Its size is compared against the + * configured limit + */ public static void ensurePrefixQueryObeysMinimumPrefixLength( - Query query, String prefix, int minPrefixLength) { - // TODO Should we provide a query-param to disable the limit on a request-by-request basis? I - // can imagine scenarios where advanced users may want to enforce the limit on most fields, - // but ignore it for a few fields that they know to be low-cardinality and therefore "less - // risky" + QParser parser, Query query, String prefix) { + if (!(query instanceof PrefixQuery)) { + return; + } + + final var minPrefixLength = + parser != null ? parser.getPrefixQueryMinPrefixLength() : NO_PREFIX_QUERY_LENGTH_LIMIT; + if (minPrefixLength == NO_PREFIX_QUERY_LENGTH_LIMIT) { + return; + } + if (prefix.length() < minPrefixLength) { final var message = String.format( @@ -99,7 +122,7 @@ public static void ensurePrefixQueryObeysMinimumPrefixLength( query, minPrefixLength, prefix.length(), - SolrConfig.MIN_PREFIX_LENGTH); + SolrConfig.MIN_PREFIX_QUERY_TERM_LENGTH); throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, message); } } diff --git a/solr/core/src/test-files/solr/collection1/conf/solrconfig.xml b/solr/core/src/test-files/solr/collection1/conf/solrconfig.xml index b973bccc792..5632e36cb0f 100644 --- a/solr/core/src/test-files/solr/collection1/conf/solrconfig.xml +++ b/solr/core/src/test-files/solr/collection1/conf/solrconfig.xml @@ -89,7 +89,7 @@ --> ${solr.max.booleanClauses:1024} - - ${solr.min.prefixLength:2} + ${solr.query.minPrefixLength:-1} ${solr.max.booleanClauses:1024} - - - ${solr.min.prefixLength:2} + ${solr.query.minPrefixLength:-1}