Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SOLR-17195: Add 'minPrefixLength' soft limit #2499

Merged
merged 12 commits into from
Jul 9, 2024
8 changes: 8 additions & 0 deletions solr/core/src/java/org/apache/solr/core/SolrConfig.java
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,8 @@ public class SolrConfig implements MapSerializable {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());

public static final String DEFAULT_CONF_FILE = "solrconfig.xml";
public static final String MIN_PREFIX_QUERY_TERM_LENGTH = "minPrefixQueryTermLength";
public static final int DEFAULT_MIN_PREFIX_QUERY_TERM_LENGTH = -1;
private final String resourceName;

private int znodeVersion;
Expand Down Expand Up @@ -289,6 +291,10 @@ private SolrConfig(
BooleanQuery.getMaxClauseCount(),
"set 'maxBooleanClauses' in solr.xml to increase global limit");
}
prefixQueryMinPrefixLength =
get("query")
.get(MIN_PREFIX_QUERY_TERM_LENGTH)
.intVal(DEFAULT_MIN_PREFIX_QUERY_TERM_LENGTH);

// Warn about deprecated / discontinued parameters
// boolToFilterOptimizer has had no effect since 3.1
Expand Down Expand Up @@ -668,6 +674,7 @@ public SolrRequestParsers getRequestParsers() {

/* The set of materialized parameters: */
public final int booleanQueryMaxClauseCount;
public final int prefixQueryMinPrefixLength;
// SolrIndexSearcher - nutch optimizer -- Disabled since 3.1
// public final boolean filtOptEnabled;
// public final int filtOptCacheSize;
Expand Down Expand Up @@ -1019,6 +1026,7 @@ public Map<String, Object> toMap(Map<String, Object> result) {
m.put("queryResultMaxDocsCached", queryResultMaxDocsCached);
m.put("enableLazyFieldLoading", enableLazyFieldLoading);
m.put("maxBooleanClauses", booleanQueryMaxClauseCount);
m.put(MIN_PREFIX_QUERY_TERM_LENGTH, prefixQueryMinPrefixLength);

for (SolrPluginInfo plugin : plugins) {
List<PluginInfo> infos = getPluginInfos(plugin.clazz.getName());
Expand Down
1 change: 1 addition & 0 deletions solr/core/src/java/org/apache/solr/schema/FieldType.java
Original file line number Diff line number Diff line change
Expand Up @@ -483,6 +483,7 @@ public Query getPrefixQuery(QParser parser, SchemaField sf, String termStr) {
}
PrefixQuery query = new PrefixQuery(new Term(sf.getName(), termStr));
query.setRewriteMethod(sf.getType().getRewriteMethod(parser, sf));
QueryUtils.ensurePrefixQueryObeysMinimumPrefixLength(parser, query, termStr);
return query;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ public Query parse() throws SyntaxError {
String defaultField = getParam(CommonParams.DF);

SolrQueryParserDelegate reverseAwareParser = new SolrQueryParserDelegate(this, defaultField);
final var qParserReference = this;

lparser =
new ComplexPhraseQueryParser(defaultField, getReq().getSchema().getQueryAnalyzer()) {
Expand All @@ -134,6 +135,14 @@ protected Query newWildcardQuery(org.apache.lucene.index.Term t) {
}
}

@Override
protected Query getPrefixQuery(String field, String termStr) throws ParseException {
final var query = super.getPrefixQuery(field, termStr);
QueryUtils.ensurePrefixQueryObeysMinimumPrefixLength(
qParserReference, query, termStr);
return query;
dsmiley marked this conversation as resolved.
Show resolved Hide resolved
}

private Query setRewriteMethod(org.apache.lucene.search.Query query) {
if (query instanceof MultiTermQuery) {
((MultiTermQuery) query)
Expand Down
13 changes: 13 additions & 0 deletions solr/core/src/java/org/apache/solr/search/QParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.StrUtils;
import org.apache.solr.core.SolrConfig;
import org.apache.solr.request.SolrQueryRequest;

/**
Expand Down Expand Up @@ -311,6 +312,18 @@ public void addDebugInfo(NamedList<Object> debugInfo) {
debugInfo.add("QParser", this.getClass().getSimpleName());
}

public int getPrefixQueryMinPrefixLength() {
final var localLimit =
getLocalParams() != null
? getLocalParams().getInt(SolrConfig.MIN_PREFIX_QUERY_TERM_LENGTH)
: null;
if (localLimit != null) {
return localLimit;
}

return getReq().getCore().getSolrConfig().prefixQueryMinPrefixLength;
}

/**
* Create a {@link QParser} to parse <code>qstr</code>, using the "lucene"
* (QParserPlugin.DEFAULT_QTYPE) query parser. The query parser may be overridden by local-params
Expand Down
44 changes: 44 additions & 0 deletions solr/core/src/java/org/apache/solr/search/QueryUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import java.util.Collections;
import java.util.IdentityHashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.search.BooleanClause;
Expand All @@ -31,9 +32,11 @@
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.core.SolrConfig;
import org.apache.solr.request.SolrQueryRequest;

/** */
Expand Down Expand Up @@ -83,6 +86,47 @@ public static boolean isConstantScoreQuery(Query q) {
}
}

public static final int NO_PREFIX_QUERY_LENGTH_LIMIT = -1;

/**
* Validates that a provided prefix query obeys any limits (if configured) on the minimum
* allowable prefix size
*
* <p>The limit is retrieved from the provided QParser (see {@link
* QParser#getPrefixQueryMinPrefixLength()} for the default implementation).
*
* @param parser the QParser used to parse the query being validated. No limit will be enforced if
* 'null'
* @param query the query to validate. Limits will only be enforced if this is a {@link
* PrefixQuery}
* @param prefix a String term included in the provided query. Its size is compared against the
* configured limit
*/
public static void ensurePrefixQueryObeysMinimumPrefixLength(
QParser parser, Query query, String prefix) {
if (!(query instanceof PrefixQuery)) {
return;
}

final var minPrefixLength =
parser != null ? parser.getPrefixQueryMinPrefixLength() : NO_PREFIX_QUERY_LENGTH_LIMIT;
if (minPrefixLength == NO_PREFIX_QUERY_LENGTH_LIMIT) {
return;
}

if (prefix.length() < minPrefixLength) {
final var message =
String.format(
Locale.ROOT,
"Query [%s] does not meet the minimum prefix length [%d] (actual=[%d]). Please try with a larger prefix, or adjust %s in your solrconfig.xml",
query,
minPrefixLength,
prefix.length(),
SolrConfig.MIN_PREFIX_QUERY_TERM_LENGTH);
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, message);
}
}

/**
* Returns the original query if it was already a positive query, otherwise return the negative of
* the query (i.e., a positive query).
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -544,6 +544,10 @@ protected Query makeBucketQuery(final String bucketValue) {
private void calculateNumBuckets(SimpleOrderedMap<Object> target) throws IOException {
DocSet domain = fcontext.base;
if (freq.prefix != null) {
// TODO - Should we enforce minPrefixLength here in the case of 'string' fields, or omit
// since this is an "internal" request? If we want to enforce the limit in this case,
// we should have StrField read the configured limit and cache it in 'init' so that it can
// be read at 'getPrefixQuery' call-time without a QParser.
Query prefixFilter = sf.getType().getPrefixQuery(null, sf, freq.prefix);
domain = fcontext.searcher.getDocSet(prefixFilter, domain);
}
Expand Down
15 changes: 15 additions & 0 deletions solr/core/src/test-files/solr/collection1/conf/solrconfig.xml
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,21 @@
-->
<maxBooleanClauses>${solr.max.booleanClauses:1024}</maxBooleanClauses>

<!-- Minimum acceptable prefix-size for prefix-based queries.

Prefix-based queries consume memory in proportion to the number of terms in the index
that start with that prefix. Short prefixes tend to match many many more indexed-terms
and consume more memory as a result, sometimes causing stability issues on the node.

This setting allows administrators to require that prefixes meet or exceed a specified
minimum length requirement. Prefix queries that don't meet this requirement return an
error to users. The limit may be overridden on a per-query basis by specifying a
'minPrefixQueryTermLength' local-param value.

The flag value of '-1' can be used to disable enforcement of this limit.
-->
<minPrefixQueryTermLength>${solr.query.minPrefixLength:-1}</minPrefixQueryTermLength>

<!-- Cache specification for Filters or DocSets - unordered set of *all* documents
that match a particular query.
-->
Expand Down
4 changes: 2 additions & 2 deletions solr/core/src/test/org/apache/solr/ConvertedLegacyTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -637,8 +637,8 @@ public void testABunchOfConvertedStuff() {
"<add><doc><field name=\"id\">107</field><field name=\"val_s\">port</field></doc></add>");
assertU("<commit/>");

assertQ(req("val_s:a*"), "//*[@numFound='3']");
assertQ(req("val_s:p*"), "//*[@numFound='4']");
assertQ(req("val_s:ap*"), "//*[@numFound='3']");
assertQ(req("val_s:pe*"), "//*[@numFound='3']");
// val_s:* %//*[@numFound="8"]

// test wildcard query
Expand Down
2 changes: 2 additions & 0 deletions solr/core/src/test/org/apache/solr/core/SolrCoreTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,8 @@ public void testConfiguration() {
assertEquals(
"wrong config for slowQueryThresholdMillis", 2000, solrConfig.slowQueryThresholdMillis);
assertEquals("wrong config for maxBooleanClauses", 1024, solrConfig.booleanQueryMaxClauseCount);
assertEquals(
"wrong config for minPrefixQueryTermLength", -1, solrConfig.prefixQueryMinPrefixLength);
assertTrue("wrong config for enableLazyFieldLoading", solrConfig.enableLazyFieldLoading);
assertEquals("wrong config for queryResultWindowSize", 10, solrConfig.queryResultWindowSize);
}
Expand Down
128 changes: 128 additions & 0 deletions solr/core/src/test/org/apache/solr/search/PrefixQueryTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search;

import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.SolrException;
import org.junit.BeforeClass;
import org.junit.Test;

/**
* Unit tests for prefix-query functionality - mostly testing the 'minPrefixLength' setting
* available in solrconfig.xml
*/
public class PrefixQueryTest extends SolrTestCaseJ4 {

private static final String[] FIELDS_TO_TEST_PREFIX_LIMITING = new String[] {"val_s", "t_val"};

@BeforeClass
public static void beforeTests() throws Exception {
System.setProperty("solr.query.minPrefixLength", "2");
initCore("solrconfig.xml", "schema.xml");

assertU(createDocWithFieldVal("1", "aaa"));
assertU(createDocWithFieldVal("2", "aab"));
assertU(createDocWithFieldVal("3", "aac"));
assertU(createDocWithFieldVal("4", "abc"));

assertU(createDocWithFieldVal("5", "bbb"));
assertU(createDocWithFieldVal("6", "bbc"));

assertU("<commit/>");
}

// Sanity-check of a few queries we'll use in other tests
@Test
public void testPrefixQueryMatchesExpectedDocuments() {
for (String fieldName : FIELDS_TO_TEST_PREFIX_LIMITING) {
assertQ(req(fieldName + ":*"), "//*[@numFound='6']");
assertQ(req(fieldName + ":aa*"), "//*[@numFound='3']");
assertQ(req(fieldName + ":bb*"), "//*[@numFound='2']");
}
}

@Test
public void testPrefixQueryObeysMinPrefixLimit() {
for (String fieldName : FIELDS_TO_TEST_PREFIX_LIMITING) {
assertQEx(
"Prefix query didn't obey limit",
"does not meet the minimum prefix length [2] (actual=[1])",
req(fieldName + ":a*"),
SolrException.ErrorCode.BAD_REQUEST);
}
}

@Test
public void testPrefixQParserObeysMinPrefixLimit() {
for (String fieldName : FIELDS_TO_TEST_PREFIX_LIMITING) {
assertQEx(
"Prefix query didn't obey limit",
"does not meet the minimum prefix length [2] (actual=[1])",
req("q", "{!prefix f=" + fieldName + "}a"),
SolrException.ErrorCode.BAD_REQUEST);
}
}

@Test
public void testComplexPhraseQParserObeysMinPrefixLimit() {
for (String fieldName : FIELDS_TO_TEST_PREFIX_LIMITING) {
assertQEx(
"{!complex} query didn't obey min-prefix limit",
"does not meet the minimum prefix length [2] (actual=[1])",
req("q", "{!complexphrase inOrder=true}" + fieldName + ":\"a*\""),
SolrException.ErrorCode.BAD_REQUEST);
}
}

@Test
public void testLocalParamCanBeUsedToOverrideConfiguredLimit() {
// The solrconfig.xml configured limit is '2'; requests should fail when that is not overridden
for (String fieldName : FIELDS_TO_TEST_PREFIX_LIMITING) {
assertQEx(
"{!complex} query didn't obey min-prefix limit",
"does not meet the minimum prefix length [2] (actual=[1])",
req("q", "{!complexphrase inOrder=true}" + fieldName + ":\"a*\""),
SolrException.ErrorCode.BAD_REQUEST);
}

// When the configured limit *is* overridden to be more lenient, the requests should succeed!
for (String fieldName : FIELDS_TO_TEST_PREFIX_LIMITING) {
assertQ(
req(
"q",
"{!complexphrase inOrder=true minPrefixQueryTermLength=-1}" + fieldName + ":\"a*\""),
"//*[@numFound='4']");
}
}

@Test
public void testQuestionMarkWildcardsCountTowardsMinimumPrefix() {
// Both of these queries succeed since the '?' wildcard is counted as a part of the prefix
assertQ(req("val_s:a?c*"), "//*[@numFound='2']"); // Matches 'aac' and 'abc'
assertQ(req("val_s:a??*"), "//*[@numFound='4']"); // Matches all documents starting with 'a'
}

private static String createDocWithFieldVal(String id, String fieldVal) {
return "<add><doc><field name=\"id\">"
+ id
+ "</field><field name=\"val_s\">"
+ fieldVal
+ "</field><field name=\"t_val\">"
+ fieldVal
+ "</field></doc></add>";
}
}
15 changes: 15 additions & 0 deletions solr/server/solr/configsets/_default/conf/solrconfig.xml
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,21 @@
-->
<maxBooleanClauses>${solr.max.booleanClauses:1024}</maxBooleanClauses>

<!-- Minimum acceptable prefix-size for prefix-based queries.

Prefix-based queries consume memory in proportion to the number of terms in the index
that start with that prefix. Short prefixes tend to match many many more indexed-terms
and consume more memory as a result, sometimes causing stability issues on the node.

This setting allows administrators to require that prefixes meet or exceed a specified
minimum length requirement. Prefix queries that don't meet this requirement return an
error to users. The limit may be overridden on a per-query basis by specifying a
'minPrefixQueryTermLength' local-param value.

The flag value of '-1' can be used to disable enforcement of this limit.
-->
<minPrefixQueryTermLength>${solr.query.minPrefixLength:-1}</minPrefixQueryTermLength>

<!-- Solr Internal Query Caches
Starting with Solr 9.0 the default cache implementation used is CaffeineCache.
-->
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,18 @@ This is the same system property used in the xref:configuring-solr-xml.adoc#glob
<maxBooleanClauses>${solr.max.booleanClauses:1024}</maxBooleanClauses>
----

=== <minPrefixQueryTermLength> Element

Prefix-based queries consume resources proportional to the number of terms in the index that start with the specified prefix.
Particularly short prefixes, those with only one or two characters for instance, tend to match such a large proportion of the index that they're a frequent cause of resource contention and instability.
This setting establishes a minimum prefix length for queries, giving administrators a way to block queries that might otherwise cause stability issues.
Queries that don't match this minimum prefix length trigger an error.
The setting can be overridden on a per-query basis by providing a `minPrefixQueryTermLength` "local-param" with a different value.

The setting aims to govern all prefix-based queries (e.g. `val_s:a*`, `{!prefix f=val_s}a`, `{!complexphrase}val_s:"a*"`).

In the default configset the minimum-prefix is set to '-1' (a flag value with the semantics of "no limit"), or the value of the `solr.query.minPrefixLength` system property (if specified).

=== <enableLazyFieldLoading> Element

When this parameter is set to `true`, fields that are not directly requested will be loaded only as needed.
Expand Down
Loading
Loading