diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index e054083eb20..61af4795afa 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -117,6 +117,11 @@ New Features * SOLR-10654: Introduce output of Prometheus metrics directly from Solr. (Matthew Biscocho via David Smiley) +* SOLR-17195: Configsets now include a `minPrefixQueryTermLength` setting, which instructs Solr to reject prefix queries whose prefixes are "too short". This can + be used as one line of defense against "runaway wildcard queries" consuming too many resources. The setting is disabled ('-1') in the default configset but can be + overridden with a property ('solr.query.minPrefixLength'). Users may also override their collection-wide setting for individual queries by providing a + `minPrefixQueryTermLength` local-param. (Jason Gerlowski, David Smiley) + Improvements --------------------- * SOLR-17137: Enable Prometheus exporter to communicate with SSL protected Solr. (Eivind Bergstøl via Eric Pugh) diff --git a/solr/core/src/java/org/apache/solr/core/SolrConfig.java b/solr/core/src/java/org/apache/solr/core/SolrConfig.java index 21256317926..173a44b9ba2 100644 --- a/solr/core/src/java/org/apache/solr/core/SolrConfig.java +++ b/solr/core/src/java/org/apache/solr/core/SolrConfig.java @@ -103,6 +103,8 @@ public class SolrConfig implements MapSerializable { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); public static final String DEFAULT_CONF_FILE = "solrconfig.xml"; + public static final String MIN_PREFIX_QUERY_TERM_LENGTH = "minPrefixQueryTermLength"; + public static final int DEFAULT_MIN_PREFIX_QUERY_TERM_LENGTH = -1; private final String resourceName; private int znodeVersion; @@ -289,6 +291,10 @@ private SolrConfig( BooleanQuery.getMaxClauseCount(), "set 'maxBooleanClauses' in solr.xml to increase global limit"); } + prefixQueryMinPrefixLength = + get("query") + .get(MIN_PREFIX_QUERY_TERM_LENGTH) + .intVal(DEFAULT_MIN_PREFIX_QUERY_TERM_LENGTH); // Warn about deprecated / discontinued parameters // boolToFilterOptimizer has had no effect since 3.1 @@ -668,6 +674,7 @@ public SolrRequestParsers getRequestParsers() { /* The set of materialized parameters: */ public final int booleanQueryMaxClauseCount; + public final int prefixQueryMinPrefixLength; // SolrIndexSearcher - nutch optimizer -- Disabled since 3.1 // public final boolean filtOptEnabled; // public final int filtOptCacheSize; @@ -1019,6 +1026,7 @@ public Map toMap(Map result) { m.put("queryResultMaxDocsCached", queryResultMaxDocsCached); m.put("enableLazyFieldLoading", enableLazyFieldLoading); m.put("maxBooleanClauses", booleanQueryMaxClauseCount); + m.put(MIN_PREFIX_QUERY_TERM_LENGTH, prefixQueryMinPrefixLength); for (SolrPluginInfo plugin : plugins) { List infos = getPluginInfos(plugin.clazz.getName()); diff --git a/solr/core/src/java/org/apache/solr/schema/FieldType.java b/solr/core/src/java/org/apache/solr/schema/FieldType.java index 3e860680122..0667c40a6d8 100644 --- a/solr/core/src/java/org/apache/solr/schema/FieldType.java +++ b/solr/core/src/java/org/apache/solr/schema/FieldType.java @@ -483,6 +483,7 @@ public Query getPrefixQuery(QParser parser, SchemaField sf, String termStr) { } PrefixQuery query = new PrefixQuery(new Term(sf.getName(), termStr)); query.setRewriteMethod(sf.getType().getRewriteMethod(parser, sf)); + QueryUtils.ensurePrefixQueryObeysMinimumPrefixLength(parser, query, termStr); return query; } diff --git a/solr/core/src/java/org/apache/solr/search/ComplexPhraseQParserPlugin.java b/solr/core/src/java/org/apache/solr/search/ComplexPhraseQParserPlugin.java index 9d8210a8c2e..933fc7e2205 100644 --- a/solr/core/src/java/org/apache/solr/search/ComplexPhraseQParserPlugin.java +++ b/solr/core/src/java/org/apache/solr/search/ComplexPhraseQParserPlugin.java @@ -119,6 +119,7 @@ public Query parse() throws SyntaxError { String defaultField = getParam(CommonParams.DF); SolrQueryParserDelegate reverseAwareParser = new SolrQueryParserDelegate(this, defaultField); + final var qParserReference = this; lparser = new ComplexPhraseQueryParser(defaultField, getReq().getSchema().getQueryAnalyzer()) { @@ -134,6 +135,14 @@ protected Query newWildcardQuery(org.apache.lucene.index.Term t) { } } + @Override + protected Query getPrefixQuery(String field, String termStr) throws ParseException { + final var query = super.getPrefixQuery(field, termStr); + QueryUtils.ensurePrefixQueryObeysMinimumPrefixLength( + qParserReference, query, termStr); + return query; + } + private Query setRewriteMethod(org.apache.lucene.search.Query query) { if (query instanceof MultiTermQuery) { ((MultiTermQuery) query) diff --git a/solr/core/src/java/org/apache/solr/search/QParser.java b/solr/core/src/java/org/apache/solr/search/QParser.java index ab49d7b53b5..e6723cff3bf 100644 --- a/solr/core/src/java/org/apache/solr/search/QParser.java +++ b/solr/core/src/java/org/apache/solr/search/QParser.java @@ -28,6 +28,7 @@ import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.StrUtils; +import org.apache.solr.core.SolrConfig; import org.apache.solr.request.SolrQueryRequest; /** @@ -311,6 +312,18 @@ public void addDebugInfo(NamedList debugInfo) { debugInfo.add("QParser", this.getClass().getSimpleName()); } + public int getPrefixQueryMinPrefixLength() { + final var localLimit = + getLocalParams() != null + ? getLocalParams().getInt(SolrConfig.MIN_PREFIX_QUERY_TERM_LENGTH) + : null; + if (localLimit != null) { + return localLimit; + } + + return getReq().getCore().getSolrConfig().prefixQueryMinPrefixLength; + } + /** * Create a {@link QParser} to parse qstr, using the "lucene" * (QParserPlugin.DEFAULT_QTYPE) query parser. The query parser may be overridden by local-params diff --git a/solr/core/src/java/org/apache/solr/search/QueryUtils.java b/solr/core/src/java/org/apache/solr/search/QueryUtils.java index fa49ef91bc9..ba02c34819c 100644 --- a/solr/core/src/java/org/apache/solr/search/QueryUtils.java +++ b/solr/core/src/java/org/apache/solr/search/QueryUtils.java @@ -21,6 +21,7 @@ import java.util.Collections; import java.util.IdentityHashMap; import java.util.List; +import java.util.Locale; import java.util.Map; import java.util.Set; import org.apache.lucene.search.BooleanClause; @@ -31,9 +32,11 @@ import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.MatchNoDocsQuery; +import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.search.Query; import org.apache.solr.common.SolrException; import org.apache.solr.common.params.CommonParams; +import org.apache.solr.core.SolrConfig; import org.apache.solr.request.SolrQueryRequest; /** */ @@ -83,6 +86,47 @@ public static boolean isConstantScoreQuery(Query q) { } } + public static final int NO_PREFIX_QUERY_LENGTH_LIMIT = -1; + + /** + * Validates that a provided prefix query obeys any limits (if configured) on the minimum + * allowable prefix size + * + *

The limit is retrieved from the provided QParser (see {@link + * QParser#getPrefixQueryMinPrefixLength()} for the default implementation). + * + * @param parser the QParser used to parse the query being validated. No limit will be enforced if + * 'null' + * @param query the query to validate. Limits will only be enforced if this is a {@link + * PrefixQuery} + * @param prefix a String term included in the provided query. Its size is compared against the + * configured limit + */ + public static void ensurePrefixQueryObeysMinimumPrefixLength( + QParser parser, Query query, String prefix) { + if (!(query instanceof PrefixQuery)) { + return; + } + + final var minPrefixLength = + parser != null ? parser.getPrefixQueryMinPrefixLength() : NO_PREFIX_QUERY_LENGTH_LIMIT; + if (minPrefixLength == NO_PREFIX_QUERY_LENGTH_LIMIT) { + return; + } + + if (prefix.length() < minPrefixLength) { + final var message = + String.format( + Locale.ROOT, + "Query [%s] does not meet the minimum prefix length [%d] (actual=[%d]). Please try with a larger prefix, or adjust %s in your solrconfig.xml", + query, + minPrefixLength, + prefix.length(), + SolrConfig.MIN_PREFIX_QUERY_TERM_LENGTH); + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, message); + } + } + /** * Returns the original query if it was already a positive query, otherwise return the negative of * the query (i.e., a positive query). diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessor.java b/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessor.java index 7e4cd041ea6..ff3a2553ca4 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessor.java +++ b/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessor.java @@ -544,6 +544,10 @@ protected Query makeBucketQuery(final String bucketValue) { private void calculateNumBuckets(SimpleOrderedMap target) throws IOException { DocSet domain = fcontext.base; if (freq.prefix != null) { + // TODO - Should we enforce minPrefixLength here in the case of 'string' fields, or omit + // since this is an "internal" request? If we want to enforce the limit in this case, + // we should have StrField read the configured limit and cache it in 'init' so that it can + // be read at 'getPrefixQuery' call-time without a QParser. Query prefixFilter = sf.getType().getPrefixQuery(null, sf, freq.prefix); domain = fcontext.searcher.getDocSet(prefixFilter, domain); } diff --git a/solr/core/src/test-files/solr/collection1/conf/solrconfig.xml b/solr/core/src/test-files/solr/collection1/conf/solrconfig.xml index 82dca6384d8..5632e36cb0f 100644 --- a/solr/core/src/test-files/solr/collection1/conf/solrconfig.xml +++ b/solr/core/src/test-files/solr/collection1/conf/solrconfig.xml @@ -89,6 +89,21 @@ --> ${solr.max.booleanClauses:1024} + + ${solr.query.minPrefixLength:-1} + diff --git a/solr/core/src/test/org/apache/solr/ConvertedLegacyTest.java b/solr/core/src/test/org/apache/solr/ConvertedLegacyTest.java index fdbc3c7ec14..5c9507d9f9a 100644 --- a/solr/core/src/test/org/apache/solr/ConvertedLegacyTest.java +++ b/solr/core/src/test/org/apache/solr/ConvertedLegacyTest.java @@ -637,8 +637,8 @@ public void testABunchOfConvertedStuff() { "107port"); assertU(""); - assertQ(req("val_s:a*"), "//*[@numFound='3']"); - assertQ(req("val_s:p*"), "//*[@numFound='4']"); + assertQ(req("val_s:ap*"), "//*[@numFound='3']"); + assertQ(req("val_s:pe*"), "//*[@numFound='3']"); // val_s:* %//*[@numFound="8"] // test wildcard query diff --git a/solr/core/src/test/org/apache/solr/core/SolrCoreTest.java b/solr/core/src/test/org/apache/solr/core/SolrCoreTest.java index 28cfbc70cae..940092b9fb4 100644 --- a/solr/core/src/test/org/apache/solr/core/SolrCoreTest.java +++ b/solr/core/src/test/org/apache/solr/core/SolrCoreTest.java @@ -298,6 +298,8 @@ public void testConfiguration() { assertEquals( "wrong config for slowQueryThresholdMillis", 2000, solrConfig.slowQueryThresholdMillis); assertEquals("wrong config for maxBooleanClauses", 1024, solrConfig.booleanQueryMaxClauseCount); + assertEquals( + "wrong config for minPrefixQueryTermLength", -1, solrConfig.prefixQueryMinPrefixLength); assertTrue("wrong config for enableLazyFieldLoading", solrConfig.enableLazyFieldLoading); assertEquals("wrong config for queryResultWindowSize", 10, solrConfig.queryResultWindowSize); } diff --git a/solr/core/src/test/org/apache/solr/search/PrefixQueryTest.java b/solr/core/src/test/org/apache/solr/search/PrefixQueryTest.java new file mode 100644 index 00000000000..c2f5be7e065 --- /dev/null +++ b/solr/core/src/test/org/apache/solr/search/PrefixQueryTest.java @@ -0,0 +1,128 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.search; + +import org.apache.solr.SolrTestCaseJ4; +import org.apache.solr.common.SolrException; +import org.junit.BeforeClass; +import org.junit.Test; + +/** + * Unit tests for prefix-query functionality - mostly testing the 'minPrefixLength' setting + * available in solrconfig.xml + */ +public class PrefixQueryTest extends SolrTestCaseJ4 { + + private static final String[] FIELDS_TO_TEST_PREFIX_LIMITING = new String[] {"val_s", "t_val"}; + + @BeforeClass + public static void beforeTests() throws Exception { + System.setProperty("solr.query.minPrefixLength", "2"); + initCore("solrconfig.xml", "schema.xml"); + + assertU(createDocWithFieldVal("1", "aaa")); + assertU(createDocWithFieldVal("2", "aab")); + assertU(createDocWithFieldVal("3", "aac")); + assertU(createDocWithFieldVal("4", "abc")); + + assertU(createDocWithFieldVal("5", "bbb")); + assertU(createDocWithFieldVal("6", "bbc")); + + assertU(""); + } + + // Sanity-check of a few queries we'll use in other tests + @Test + public void testPrefixQueryMatchesExpectedDocuments() { + for (String fieldName : FIELDS_TO_TEST_PREFIX_LIMITING) { + assertQ(req(fieldName + ":*"), "//*[@numFound='6']"); + assertQ(req(fieldName + ":aa*"), "//*[@numFound='3']"); + assertQ(req(fieldName + ":bb*"), "//*[@numFound='2']"); + } + } + + @Test + public void testPrefixQueryObeysMinPrefixLimit() { + for (String fieldName : FIELDS_TO_TEST_PREFIX_LIMITING) { + assertQEx( + "Prefix query didn't obey limit", + "does not meet the minimum prefix length [2] (actual=[1])", + req(fieldName + ":a*"), + SolrException.ErrorCode.BAD_REQUEST); + } + } + + @Test + public void testPrefixQParserObeysMinPrefixLimit() { + for (String fieldName : FIELDS_TO_TEST_PREFIX_LIMITING) { + assertQEx( + "Prefix query didn't obey limit", + "does not meet the minimum prefix length [2] (actual=[1])", + req("q", "{!prefix f=" + fieldName + "}a"), + SolrException.ErrorCode.BAD_REQUEST); + } + } + + @Test + public void testComplexPhraseQParserObeysMinPrefixLimit() { + for (String fieldName : FIELDS_TO_TEST_PREFIX_LIMITING) { + assertQEx( + "{!complex} query didn't obey min-prefix limit", + "does not meet the minimum prefix length [2] (actual=[1])", + req("q", "{!complexphrase inOrder=true}" + fieldName + ":\"a*\""), + SolrException.ErrorCode.BAD_REQUEST); + } + } + + @Test + public void testLocalParamCanBeUsedToOverrideConfiguredLimit() { + // The solrconfig.xml configured limit is '2'; requests should fail when that is not overridden + for (String fieldName : FIELDS_TO_TEST_PREFIX_LIMITING) { + assertQEx( + "{!complex} query didn't obey min-prefix limit", + "does not meet the minimum prefix length [2] (actual=[1])", + req("q", "{!complexphrase inOrder=true}" + fieldName + ":\"a*\""), + SolrException.ErrorCode.BAD_REQUEST); + } + + // When the configured limit *is* overridden to be more lenient, the requests should succeed! + for (String fieldName : FIELDS_TO_TEST_PREFIX_LIMITING) { + assertQ( + req( + "q", + "{!complexphrase inOrder=true minPrefixQueryTermLength=-1}" + fieldName + ":\"a*\""), + "//*[@numFound='4']"); + } + } + + @Test + public void testQuestionMarkWildcardsCountTowardsMinimumPrefix() { + // Both of these queries succeed since the '?' wildcard is counted as a part of the prefix + assertQ(req("val_s:a?c*"), "//*[@numFound='2']"); // Matches 'aac' and 'abc' + assertQ(req("val_s:a??*"), "//*[@numFound='4']"); // Matches all documents starting with 'a' + } + + private static String createDocWithFieldVal(String id, String fieldVal) { + return "" + + id + + "" + + fieldVal + + "" + + fieldVal + + ""; + } +} diff --git a/solr/server/solr/configsets/_default/conf/solrconfig.xml b/solr/server/solr/configsets/_default/conf/solrconfig.xml index 502f1b2db59..92896c6d3f7 100644 --- a/solr/server/solr/configsets/_default/conf/solrconfig.xml +++ b/solr/server/solr/configsets/_default/conf/solrconfig.xml @@ -360,6 +360,21 @@ --> ${solr.max.booleanClauses:1024} + + ${solr.query.minPrefixLength:-1} + diff --git a/solr/solr-ref-guide/modules/configuration-guide/pages/caches-warming.adoc b/solr/solr-ref-guide/modules/configuration-guide/pages/caches-warming.adoc index e0536003b49..431cbac5f98 100644 --- a/solr/solr-ref-guide/modules/configuration-guide/pages/caches-warming.adoc +++ b/solr/solr-ref-guide/modules/configuration-guide/pages/caches-warming.adoc @@ -235,6 +235,18 @@ This is the same system property used in the xref:configuring-solr-xml.adoc#glob ${solr.max.booleanClauses:1024} ---- +=== Element + +Prefix-based queries consume resources proportional to the number of terms in the index that start with the specified prefix. +Particularly short prefixes, those with only one or two characters for instance, tend to match such a large proportion of the index that they're a frequent cause of resource contention and instability. +This setting establishes a minimum prefix length for queries, giving administrators a way to block queries that might otherwise cause stability issues. +Queries that don't match this minimum prefix length trigger an error. +The setting can be overridden on a per-query basis by providing a `minPrefixQueryTermLength` "local-param" with a different value. + +The setting aims to govern all prefix-based queries (e.g. `val_s:a*`, `{!prefix f=val_s}a`, `{!complexphrase}val_s:"a*"`). + +In the default configset the minimum-prefix is set to '-1' (a flag value with the semantics of "no limit"), or the value of the `solr.query.minPrefixLength` system property (if specified). + === Element When this parameter is set to `true`, fields that are not directly requested will be loaded only as needed. diff --git a/solr/solr-ref-guide/modules/query-guide/pages/other-parsers.adoc b/solr/solr-ref-guide/modules/query-guide/pages/other-parsers.adoc index 0fc92b17268..1bb9316f563 100644 --- a/solr/solr-ref-guide/modules/query-guide/pages/other-parsers.adoc +++ b/solr/solr-ref-guide/modules/query-guide/pages/other-parsers.adoc @@ -229,21 +229,25 @@ A mix of ordered and unordered complex phrase queries: Performance is sensitive to the number of unique terms that are associated with a pattern. For instance, searching for "a*" will form a large OR clause (technically a SpanOr with many terms) for all of the terms in your index for the indicated field that start with the single letter 'a'. It may be prudent to restrict wildcards to at least two or preferably three letters as a prefix. -Allowing very short prefixes may result in to many low-quality documents being returned. +Allowing very short prefixes may result in too many low-quality documents being returned. Notice that it also supports leading wildcards "*a" as well with consequent performance implications. Applying xref:indexing-guide:filters.adoc#reversed-wildcard-filter[ReversedWildcardFilterFactory] in index-time analysis is usually a good idea. -==== MaxBooleanClauses with Complex Phrase Parser +==== Query Settings and Complex Phrase Parser -You may need to increase MaxBooleanClauses in `solrconfig.xml` as a result of the term expansion above: +Due to the query-expansion described above, this parser may produce queries that run afoul of several `solrconfig.xml` settings. + +Particularly relevant are `maxBooleanClauses` and `minPrefixLength`, two safeguards that Solr provides in order to curb overly resource-intensive queries. [source,xml] ---- 4096 +1 ---- -This property is described in more detail in the section xref:configuration-guide:caches-warming.adoc#query-sizing-and-warming[Query Sizing and Warming]. +Both properties are described in more detail in the section xref:configuration-guide:caches-warming.adoc#query-sizing-and-warming[Query Sizing and Warming]. +Administrators should consider the performance tradeoffs carefully when making changes to support "Complex Phrase" queries. ==== Stopwords with Complex Phrase Parser