Skip to content

Commit

Permalink
Use FieldMapper to create the low level term queries in CommonTermQuery
Browse files Browse the repository at this point in the history
  • Loading branch information
s1monw committed Feb 27, 2014
1 parent 1c36176 commit 26e5349
Show file tree
Hide file tree
Showing 4 changed files with 128 additions and 19 deletions.
105 changes: 100 additions & 5 deletions src/main/java/org/apache/lucene/queries/ExtendedCommonTermsQuery.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,18 @@

package org.apache.lucene.queries;

import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.elasticsearch.common.lucene.search.Queries;
import org.elasticsearch.index.mapper.FieldMapper;

import java.io.IOException;

/**
* Extended version of {@link CommonTermsQuery} that allows to pass in a
Expand All @@ -29,12 +39,11 @@
*/
public class ExtendedCommonTermsQuery extends CommonTermsQuery {

public ExtendedCommonTermsQuery(Occur highFreqOccur, Occur lowFreqOccur, float maxTermFrequency, boolean disableCoord) {
super(highFreqOccur, lowFreqOccur, maxTermFrequency, disableCoord);
}
private final FieldMapper<?> mapper;

public ExtendedCommonTermsQuery(Occur highFreqOccur, Occur lowFreqOccur, float maxTermFrequency) {
super(highFreqOccur, lowFreqOccur, maxTermFrequency);
public ExtendedCommonTermsQuery(Occur highFreqOccur, Occur lowFreqOccur, float maxTermFrequency, boolean disableCoord, FieldMapper<?> mapper) {
super(highFreqOccur, lowFreqOccur, maxTermFrequency, disableCoord);
this.mapper = mapper;
}

private String lowFreqMinNumShouldMatchSpec;
Expand Down Expand Up @@ -72,4 +81,90 @@ public void setLowFreqMinimumNumberShouldMatch(String spec) {
public String getLowFreqMinimumNumberShouldMatchSpec() {
return lowFreqMinNumShouldMatchSpec;
}

// LUCENE-UPGRADE: remove this method if on 4.8
@Override
public Query rewrite(IndexReader reader) throws IOException {
if (this.terms.isEmpty()) {
return new BooleanQuery();
} else if (this.terms.size() == 1) {
final Query tq = newTermQuery(this.terms.get(0), null);
tq.setBoost(getBoost());
return tq;
}
return super.rewrite(reader);
}

// LUCENE-UPGRADE: remove this method if on 4.8
@Override
protected Query buildQuery(final int maxDoc,
final TermContext[] contextArray, final Term[] queryTerms) {
BooleanQuery lowFreq = new BooleanQuery(disableCoord);
BooleanQuery highFreq = new BooleanQuery(disableCoord);
highFreq.setBoost(highFreqBoost);
lowFreq.setBoost(lowFreqBoost);
BooleanQuery query = new BooleanQuery(true);
for (int i = 0; i < queryTerms.length; i++) {
TermContext termContext = contextArray[i];
if (termContext == null) {
lowFreq.add(newTermQuery(queryTerms[i], null), lowFreqOccur);
} else {
if ((maxTermFrequency >= 1f && termContext.docFreq() > maxTermFrequency)
|| (termContext.docFreq() > (int) Math.ceil(maxTermFrequency * (float) maxDoc))) {
highFreq.add(newTermQuery(queryTerms[i], termContext), highFreqOccur);
} else {
lowFreq.add(newTermQuery(queryTerms[i], termContext), lowFreqOccur);
}
}

}
final int numLowFreqClauses = lowFreq.clauses().size();
final int numHighFreqClauses = highFreq.clauses().size();
if (lowFreqOccur == Occur.SHOULD && numLowFreqClauses > 0) {
int minMustMatch = calcLowFreqMinimumNumberShouldMatch(numLowFreqClauses);
lowFreq.setMinimumNumberShouldMatch(minMustMatch);
}
if (highFreqOccur == Occur.SHOULD && numHighFreqClauses > 0) {
int minMustMatch = calcHighFreqMinimumNumberShouldMatch(numHighFreqClauses);
highFreq.setMinimumNumberShouldMatch(minMustMatch);
}
if (lowFreq.clauses().isEmpty()) {
/*
* if lowFreq is empty we rewrite the high freq terms in a conjunction to
* prevent slow queries.
*/
if (highFreq.getMinimumNumberShouldMatch() == 0 && highFreqOccur != Occur.MUST) {
for (BooleanClause booleanClause : highFreq) {
booleanClause.setOccur(Occur.MUST);
}
}
highFreq.setBoost(getBoost());
return highFreq;
} else if (highFreq.clauses().isEmpty()) {
// only do low freq terms - we don't have high freq terms
lowFreq.setBoost(getBoost());
return lowFreq;
} else {
query.add(highFreq, Occur.SHOULD);
query.add(lowFreq, Occur.MUST);
query.setBoost(getBoost());
return query;
}
}

//@Override
// LUCENE-UPGRADE: remove this method if on 4.8
protected Query newTermQuery(Term term, TermContext context) {
if (mapper == null) {
// this should be super.newTermQuery(term, context) once it's available in the super class
return context == null ? new TermQuery(term) : new TermQuery(term, context);
}
final Query query = mapper.queryStringTermQuery(term);
if (query == null) {
// this should be super.newTermQuery(term, context) once it's available in the super class
return context == null ? new TermQuery(term) : new TermQuery(term, context);
} else {
return query;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -166,19 +166,6 @@ public Query parse(QueryParseContext parseContext) throws IOException, QueryPars
if (value == null) {
throw new QueryParsingException(parseContext.index(), "No text specified for text query");
}
ExtendedCommonTermsQuery commonsQuery = new ExtendedCommonTermsQuery(highFreqOccur, lowFreqOccur, maxTermFrequency, disableCoords);
commonsQuery.setBoost(boost);
Query query = parseQueryString(commonsQuery, value.toString(), fieldName, parseContext, queryAnalyzer, lowFreqMinimumShouldMatch, highFreqMinimumShouldMatch);
if (queryName != null) {
parseContext.addNamedQuery(queryName, query);
}
return query;
}


private final Query parseQueryString(ExtendedCommonTermsQuery query, String queryString, String fieldName, QueryParseContext parseContext,
String queryAnalyzer, String lowFreqMinimumShouldMatch, String highFreqMinimumShouldMatch) throws IOException {

FieldMapper<?> mapper = null;
String field;
MapperService.SmartNameFieldMappers smartNameFieldMappers = parseContext.smartFieldMappers(fieldName);
Expand Down Expand Up @@ -207,6 +194,18 @@ private final Query parseQueryString(ExtendedCommonTermsQuery query, String quer
}
}

ExtendedCommonTermsQuery commonsQuery = new ExtendedCommonTermsQuery(highFreqOccur, lowFreqOccur, maxTermFrequency, disableCoords, mapper);
commonsQuery.setBoost(boost);
Query query = parseQueryString(commonsQuery, value.toString(), field, parseContext, analyzer, lowFreqMinimumShouldMatch, highFreqMinimumShouldMatch, smartNameFieldMappers);
if (queryName != null) {
parseContext.addNamedQuery(queryName, query);
}
return query;
}


private final Query parseQueryString(ExtendedCommonTermsQuery query, String queryString, String field, QueryParseContext parseContext,
Analyzer analyzer, String lowFreqMinimumShouldMatch, String highFreqMinimumShouldMatch, MapperService.SmartNameFieldMappers smartNameFieldMappers) throws IOException {
// Logic similar to QueryParser#getFieldQuery
TokenStream source = analyzer.tokenStream(field, queryString.toString());
int count = 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ public Query parse(Type type, String fieldName, Object value) throws IOException
return wrapSmartNameQuery(q, smartNameFieldMappers, parseContext);
}
if (commonTermsCutoff != null) {
ExtendedCommonTermsQuery q = new ExtendedCommonTermsQuery(occur, occur, commonTermsCutoff, positionCount == 1);
ExtendedCommonTermsQuery q = new ExtendedCommonTermsQuery(occur, occur, commonTermsCutoff, positionCount == 1, mapper);
for (int i = 0; i < numTokens; i++) {
boolean hasNext = buffer.incrementToken();
assert hasNext == true;
Expand Down
15 changes: 15 additions & 0 deletions src/test/java/org/elasticsearch/search/query/SimpleQueryTests.java
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,21 @@ public void testAllDocsQueryString() throws InterruptedException, ExecutionExcep
}
}

@Test
public void testCommonTermsQueryOnAllField() throws Exception {
client().admin().indices().prepareCreate("test")
.addMapping("type1", "message", "type=string", "comment", "type=string,boost=5.0")
.setSettings(SETTING_NUMBER_OF_SHARDS, 1).get();
indexRandom(true, client().prepareIndex("test", "type1", "1").setSource("message", "test message", "comment", "whatever"),
client().prepareIndex("test", "type1", "2").setSource("message", "hello world", "comment", "test comment"));

SearchResponse searchResponse = client().prepareSearch().setQuery(commonTerms("_all", "test")).get();
assertHitCount(searchResponse, 2l);
assertFirstHit(searchResponse, hasId("2"));
assertSecondHit(searchResponse, hasId("1"));
assertThat(searchResponse.getHits().getHits()[0].getScore(), greaterThan(searchResponse.getHits().getHits()[1].getScore()));
}

@Test
public void testCommonTermsQuery() throws Exception {
client().admin().indices().prepareCreate("test")
Expand Down

0 comments on commit 26e5349

Please sign in to comment.