Skip to content

Commit

Permalink
Add tests for wildcard + regexp match-all cases
Browse files Browse the repository at this point in the history
Signed-off-by: Michael Froh <[email protected]>
  • Loading branch information
msfroh committed Jun 11, 2024
1 parent 3ee80ee commit aa62135
Show file tree
Hide file tree
Showing 3 changed files with 88 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,12 @@ setup:
id: 5
body:
my_field: "AbCd"
- do:
index:
index: test
id: 6
body:
other_field: "test"
- do:
indices.refresh: {}

Expand Down Expand Up @@ -198,3 +204,26 @@ setup:
my_field:
value: ".*06-08.*Cluster-Manager Node.*"
- match: { hits.total.value: 0 }

---
"wildcard match-all works":
- do:
search:
index: test
body:
query:
wildcard:
my_field:
value: "*"
- match: { hits.total.value: 5 }
---
"regexp match-all works":
- do:
search:
index: test
body:
query:
regexp:
my_field:
value: ".*"
- match: { hits.total.value: 5 }
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
Expand Down Expand Up @@ -448,14 +449,20 @@ public Query wildcardQuery(String value, MultiTermQuery.RewriteMethod method, bo
};
}

return new WildcardMatchingQuery(
name(),
matchAllTermsQuery(name(), getRequiredNGrams(finalValue)),
matchPredicate,
value,
context,
this
);
Set<String> requiredNGrams = getRequiredNGrams(finalValue);
Query approximation;
if (requiredNGrams.isEmpty()) {
// This only happens when all characters are wildcard characters (* or ?),
// or it's the empty string.
if (value.length() == 0 || value.contains("?")) {
approximation = this.existsQuery(context);
} else {
return existsQuery(context);
}
} else {
approximation = matchAllTermsQuery(name(), requiredNGrams);
}
return new WildcardMatchingQuery(name(), approximation, matchPredicate, value, context, this);
}

// Package-private for testing
Expand Down Expand Up @@ -540,10 +547,23 @@ public Query regexpQuery(
Automaton automaton = regExp.toAutomaton(maxDeterminizedStates);
CompiledAutomaton compiledAutomaton = new CompiledAutomaton(automaton);

return new WildcardMatchingQuery(name(), regexpToQuery(name(), regExp), s -> {
BytesRef valueBytes = BytesRefs.toBytesRef(s);
return compiledAutomaton.runAutomaton.run(valueBytes.bytes, valueBytes.offset, valueBytes.length);
}, "/" + value + "/", context, this);
Predicate<String> regexpPredicate;
if (compiledAutomaton.type == CompiledAutomaton.AUTOMATON_TYPE.ALL) {
return existsQuery(context);
} else if (compiledAutomaton.type == CompiledAutomaton.AUTOMATON_TYPE.NONE) {
return new MatchNoDocsQuery("Regular expression matches nothing");
} else {
regexpPredicate = s -> {
BytesRef valueBytes = BytesRefs.toBytesRef(s);
return compiledAutomaton.runAutomaton.run(valueBytes.bytes, valueBytes.offset, valueBytes.length);
};
}

Query approximation = regexpToQuery(name(), regExp);
if (approximation instanceof MatchAllDocsQuery) {
approximation = existsQuery(context);
}
return new WildcardMatchingQuery(name(), approximation, regexpPredicate, "/" + value + "/", context, this);
}

/**
Expand Down Expand Up @@ -602,6 +622,8 @@ private static Query regexpToQuery(String fieldName, RegExp regExp) {
}
if (query.clauses().size() == 1) {
return query.iterator().next().getQuery();
} else if (query.clauses().size() == 0) {
return new MatchAllDocsQuery();
}
return query;
}
Expand Down Expand Up @@ -704,7 +726,7 @@ private WildcardMatchingQuery(

@Override
public String toString(String s) {
return "WildcardMatchingQuery(" + fieldName + "\"" + patternString + "\")";
return "WildcardMatchingQuery(" + fieldName + ":\"" + patternString + "\")";
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -149,4 +149,28 @@ public void testRegexpQuery() {
assertTrue(actualMatchingQuery.getSecondPhaseMatcher().test("abcdjk"));
assertTrue(actualMatchingQuery.getSecondPhaseMatcher().test("abefqwertyhi"));
}

public void testWildcardMatchAll() {
String pattern = "???";
MappedFieldType ft = new WildcardFieldMapper.WildcardFieldType("field");
Query actual = ft.wildcardQuery(pattern, null, null);
assertEquals(new WildcardFieldMapper.WildcardMatchingQuery("field", ft.existsQuery(null), "???"), actual);

pattern = "*";
actual = ft.wildcardQuery(pattern, null, null);
assertEquals(ft.existsQuery(null), actual);
}

public void testRegexpMatchAll() {
// The following matches any string of length exactly 3. We do need to evaluate the predicate.
String pattern = "...";
MappedFieldType ft = new WildcardFieldMapper.WildcardFieldType("field");
Query actual = ft.regexpQuery(pattern, 0, 0, 1000, null, null);
assertEquals(new WildcardFieldMapper.WildcardMatchingQuery("field", ft.existsQuery(null), "/.../"), actual);

// The following pattern has a predicate that matches everything. We can just return the field exists query.
pattern = ".*";
actual = ft.regexpQuery(pattern, 0, 0, 1000, null, null);
assertEquals(ft.existsQuery(null), actual);
}
}

0 comments on commit aa62135

Please sign in to comment.