diff --git a/buildSrc/src/main/resources/checkstyle_suppressions.xml b/buildSrc/src/main/resources/checkstyle_suppressions.xml
index b0ab7f9b7e9ac..212b407f02d0b 100644
--- a/buildSrc/src/main/resources/checkstyle_suppressions.xml
+++ b/buildSrc/src/main/resources/checkstyle_suppressions.xml
@@ -372,9 +372,6 @@
-
-
-
@@ -564,7 +561,6 @@
-
@@ -644,7 +640,6 @@
-
diff --git a/docs/reference/migration/migrate_7_0/search.asciidoc b/docs/reference/migration/migrate_7_0/search.asciidoc
index 6cf004da6ce8d..44e22c97f2f9b 100644
--- a/docs/reference/migration/migrate_7_0/search.asciidoc
+++ b/docs/reference/migration/migrate_7_0/search.asciidoc
@@ -78,6 +78,13 @@ removed.
* `levenstein` - replaced by `levenshtein`
* `jarowinkler` - replaced by `jaro_winkler`
+[float]
+==== `popular` mode for Suggesters
+
+The `popular` mode for Suggesters (`term` and `phrase`) now uses the doc frequency
+(instead of the sum of the doc frequency) of the input terms to compute the frequency
+threshold for candidate suggestions.
+
[float]
==== Limiting the number of terms that can be used in a Terms Query request
diff --git a/server/src/main/java/org/elasticsearch/search/suggest/phrase/CandidateGenerator.java b/server/src/main/java/org/elasticsearch/search/suggest/phrase/CandidateGenerator.java
index f98822296b086..8730330d39fea 100644
--- a/server/src/main/java/org/elasticsearch/search/suggest/phrase/CandidateGenerator.java
+++ b/server/src/main/java/org/elasticsearch/search/suggest/phrase/CandidateGenerator.java
@@ -18,6 +18,7 @@
*/
package org.elasticsearch.search.suggest.phrase;
+import org.apache.lucene.codecs.TermStats;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.Candidate;
import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.CandidateSet;
@@ -29,7 +30,7 @@ public abstract class CandidateGenerator {
public abstract boolean isKnownWord(BytesRef term) throws IOException;
- public abstract long frequency(BytesRef term) throws IOException;
+ public abstract TermStats termStats(BytesRef term) throws IOException;
public CandidateSet drawCandidates(BytesRef term) throws IOException {
CandidateSet set = new CandidateSet(Candidate.EMPTY, createCandidate(term, true));
@@ -37,14 +38,14 @@ public CandidateSet drawCandidates(BytesRef term) throws IOException {
}
public Candidate createCandidate(BytesRef term, boolean userInput) throws IOException {
- return createCandidate(term, frequency(term), 1.0, userInput);
+ return createCandidate(term, termStats(term), 1.0, userInput);
}
- public Candidate createCandidate(BytesRef term, long frequency, double channelScore) throws IOException {
- return createCandidate(term, frequency, channelScore, false);
+ public Candidate createCandidate(BytesRef term, TermStats termStats, double channelScore) throws IOException {
+ return createCandidate(term, termStats, channelScore, false);
}
- public abstract Candidate createCandidate(BytesRef term, long frequency, double channelScore, boolean userInput) throws IOException;
+ public abstract Candidate createCandidate(BytesRef term, TermStats termStats,
+ double channelScore, boolean userInput) throws IOException;
public abstract CandidateSet drawCandidates(CandidateSet set) throws IOException;
-
}
diff --git a/server/src/main/java/org/elasticsearch/search/suggest/phrase/CandidateScorer.java b/server/src/main/java/org/elasticsearch/search/suggest/phrase/CandidateScorer.java
index 3928a16b7c9a0..d93ef42ee4ff7 100644
--- a/server/src/main/java/org/elasticsearch/search/suggest/phrase/CandidateScorer.java
+++ b/server/src/main/java/org/elasticsearch/search/suggest/phrase/CandidateScorer.java
@@ -77,21 +77,24 @@ public void findCandidates(CandidateSet[] candidates, Candidate[] path, int ord,
} else {
if (numMissspellingsLeft > 0) {
path[ord] = current.originalTerm;
- findCandidates(candidates, path, ord + 1, numMissspellingsLeft, corrections, cutoffScore, pathScore + scorer.score(path, candidates, ord, gramSize));
+ findCandidates(candidates, path, ord + 1, numMissspellingsLeft, corrections, cutoffScore,
+ pathScore + scorer.score(path, candidates, ord, gramSize));
for (int i = 0; i < current.candidates.length; i++) {
path[ord] = current.candidates[i];
- findCandidates(candidates, path, ord + 1, numMissspellingsLeft - 1, corrections, cutoffScore, pathScore + scorer.score(path, candidates, ord, gramSize));
+ findCandidates(candidates, path, ord + 1, numMissspellingsLeft - 1, corrections, cutoffScore,
+ pathScore + scorer.score(path, candidates, ord, gramSize));
}
} else {
path[ord] = current.originalTerm;
- findCandidates(candidates, path, ord + 1, 0, corrections, cutoffScore, pathScore + scorer.score(path, candidates, ord, gramSize));
+ findCandidates(candidates, path, ord + 1, 0, corrections, cutoffScore,
+ pathScore + scorer.score(path, candidates, ord, gramSize));
}
}
}
- private void updateTop(CandidateSet[] candidates, Candidate[] path, PriorityQueue corrections, double cutoffScore, double score)
- throws IOException {
+ private void updateTop(CandidateSet[] candidates, Candidate[] path,
+ PriorityQueue corrections, double cutoffScore, double score) throws IOException {
score = Math.exp(score);
assert Math.abs(score - score(path, candidates)) < 0.00001 : "cur_score=" + score + ", path_score=" + score(path,candidates);
if (score > cutoffScore) {
diff --git a/server/src/main/java/org/elasticsearch/search/suggest/phrase/DirectCandidateGenerator.java b/server/src/main/java/org/elasticsearch/search/suggest/phrase/DirectCandidateGenerator.java
index 678b00aa13dca..bc2f6a8d42f65 100644
--- a/server/src/main/java/org/elasticsearch/search/suggest/phrase/DirectCandidateGenerator.java
+++ b/server/src/main/java/org/elasticsearch/search/suggest/phrase/DirectCandidateGenerator.java
@@ -23,6 +23,7 @@
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.codecs.TermStats;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.Term;
@@ -48,6 +49,7 @@
import static java.lang.Math.log10;
import static java.lang.Math.max;
+import static java.lang.Math.min;
import static java.lang.Math.round;
public final class DirectCandidateGenerator extends CandidateGenerator {
@@ -57,20 +59,20 @@ public final class DirectCandidateGenerator extends CandidateGenerator {
private final SuggestMode suggestMode;
private final TermsEnum termsEnum;
private final IndexReader reader;
- private final long dictSize;
+ private final long sumTotalTermFreq;
private static final double LOG_BASE = 5;
private final long frequencyPlateau;
private final Analyzer preFilter;
private final Analyzer postFilter;
private final double nonErrorLikelihood;
- private final boolean useTotalTermFrequency;
private final CharsRefBuilder spare = new CharsRefBuilder();
private final BytesRefBuilder byteSpare = new BytesRefBuilder();
private final int numCandidates;
public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, SuggestMode suggestMode, IndexReader reader,
double nonErrorLikelihood, int numCandidates) throws IOException {
- this(spellchecker, field, suggestMode, reader, nonErrorLikelihood, numCandidates, null, null, MultiFields.getTerms(reader, field));
+ this(spellchecker, field, suggestMode, reader, nonErrorLikelihood,
+ numCandidates, null, null, MultiFields.getTerms(reader, field));
}
public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, SuggestMode suggestMode, IndexReader reader,
@@ -83,14 +85,12 @@ public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, S
this.numCandidates = numCandidates;
this.suggestMode = suggestMode;
this.reader = reader;
- final long dictSize = terms.getSumTotalTermFreq();
- this.useTotalTermFrequency = dictSize != -1;
- this.dictSize = dictSize == -1 ? reader.maxDoc() : dictSize;
+ this.sumTotalTermFreq = terms.getSumTotalTermFreq() == -1 ? reader.maxDoc() : terms.getSumTotalTermFreq();
this.preFilter = preFilter;
this.postFilter = postFilter;
this.nonErrorLikelihood = nonErrorLikelihood;
float thresholdFrequency = spellchecker.getThresholdFrequency();
- this.frequencyPlateau = thresholdFrequency >= 1.0f ? (int) thresholdFrequency: (int)(dictSize * thresholdFrequency);
+ this.frequencyPlateau = thresholdFrequency >= 1.0f ? (int) thresholdFrequency: (int) (reader.maxDoc() * thresholdFrequency);
termsEnum = terms.iterator();
}
@@ -99,24 +99,29 @@ public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, S
*/
@Override
public boolean isKnownWord(BytesRef term) throws IOException {
- return frequency(term) > 0;
+ return termStats(term).docFreq > 0;
}
/* (non-Javadoc)
* @see org.elasticsearch.search.suggest.phrase.CandidateGenerator#frequency(org.apache.lucene.util.BytesRef)
*/
@Override
- public long frequency(BytesRef term) throws IOException {
+ public TermStats termStats(BytesRef term) throws IOException {
term = preFilter(term, spare, byteSpare);
- return internalFrequency(term);
+ return internalTermStats(term);
}
- public long internalFrequency(BytesRef term) throws IOException {
+ public TermStats internalTermStats(BytesRef term) throws IOException {
if (termsEnum.seekExact(term)) {
- return useTotalTermFrequency ? termsEnum.totalTermFreq() : termsEnum.docFreq();
+ return new TermStats(termsEnum.docFreq(),
+ /**
+ * We use the {@link TermsEnum#docFreq()} for fields that don't
+ * record the {@link TermsEnum#totalTermFreq()}.
+ */
+ termsEnum.totalTermFreq() == -1 ? termsEnum.docFreq() : termsEnum.totalTermFreq());
}
- return 0;
+ return new TermStats(0, 0);
}
public String getField() {
@@ -127,15 +132,28 @@ public String getField() {
public CandidateSet drawCandidates(CandidateSet set) throws IOException {
Candidate original = set.originalTerm;
BytesRef term = preFilter(original.term, spare, byteSpare);
- final long frequency = original.frequency;
- spellchecker.setThresholdFrequency(this.suggestMode == SuggestMode.SUGGEST_ALWAYS ? 0 : thresholdFrequency(frequency, dictSize));
+ if (suggestMode != SuggestMode.SUGGEST_ALWAYS) {
+ /**
+ * We use the {@link TermStats#docFreq} to compute the frequency threshold
+ * because that's what {@link DirectSpellChecker#suggestSimilar} expects
+ * when filtering terms.
+ */
+ int threshold = thresholdTermFrequency(original.termStats.docFreq);
+ if (threshold == Integer.MAX_VALUE) {
+ // the threshold is the max possible frequency so we can skip the search
+ return set;
+ }
+ spellchecker.setThresholdFrequency(threshold);
+ }
+
SuggestWord[] suggestSimilar = spellchecker.suggestSimilar(new Term(field, term), numCandidates, reader, this.suggestMode);
List candidates = new ArrayList<>(suggestSimilar.length);
for (int i = 0; i < suggestSimilar.length; i++) {
SuggestWord suggestWord = suggestSimilar[i];
BytesRef candidate = new BytesRef(suggestWord.string);
- postFilter(new Candidate(candidate, internalFrequency(candidate), suggestWord.score,
- score(suggestWord.freq, suggestWord.score, dictSize), false), spare, byteSpare, candidates);
+ TermStats termStats = internalTermStats(candidate);
+ postFilter(new Candidate(candidate, termStats,
+ suggestWord.score, score(termStats, suggestWord.score, sumTotalTermFreq), false), spare, byteSpare, candidates);
}
set.addCandidates(candidates);
return set;
@@ -171,28 +189,30 @@ public void nextToken() throws IOException {
BytesRef term = result.toBytesRef();
// We should not use frequency(term) here because it will analyze the term again
// If preFilter and postFilter are the same analyzer it would fail.
- long freq = internalFrequency(term);
- candidates.add(new Candidate(result.toBytesRef(), freq, candidate.stringDistance,
- score(candidate.frequency, candidate.stringDistance, dictSize), false));
+ TermStats termStats = internalTermStats(term);
+ candidates.add(new Candidate(result.toBytesRef(), termStats, candidate.stringDistance,
+ score(candidate.termStats, candidate.stringDistance, sumTotalTermFreq), false));
} else {
- candidates.add(new Candidate(result.toBytesRef(), candidate.frequency, nonErrorLikelihood,
- score(candidate.frequency, candidate.stringDistance, dictSize), false));
+ candidates.add(new Candidate(result.toBytesRef(), candidate.termStats, nonErrorLikelihood,
+ score(candidate.termStats, candidate.stringDistance, sumTotalTermFreq), false));
}
}
}, spare);
}
}
- private double score(long frequency, double errorScore, long dictionarySize) {
- return errorScore * (((double)frequency + 1) / ((double)dictionarySize +1));
+ private double score(TermStats termStats, double errorScore, long dictionarySize) {
+ return errorScore * (((double)termStats.totalTermFreq + 1) / ((double)dictionarySize +1));
}
- protected long thresholdFrequency(long termFrequency, long dictionarySize) {
- if (termFrequency > 0) {
- return max(0, round(termFrequency * (log10(termFrequency - frequencyPlateau) * (1.0 / log10(LOG_BASE))) + 1));
+ // package protected for test
+ int thresholdTermFrequency(int docFreq) {
+ if (docFreq > 0) {
+ return (int) min(
+ max(0, round(docFreq * (log10(docFreq - frequencyPlateau) * (1.0 / log10(LOG_BASE))) + 1)), Integer.MAX_VALUE
+ );
}
return 0;
-
}
public abstract static class TokenConsumer {
@@ -249,12 +269,12 @@ public static class Candidate implements Comparable {
public static final Candidate[] EMPTY = new Candidate[0];
public final BytesRef term;
public final double stringDistance;
- public final long frequency;
+ public final TermStats termStats;
public final double score;
public final boolean userInput;
- public Candidate(BytesRef term, long frequency, double stringDistance, double score, boolean userInput) {
- this.frequency = frequency;
+ public Candidate(BytesRef term, TermStats termStats, double stringDistance, double score, boolean userInput) {
+ this.termStats = termStats;
this.term = term;
this.stringDistance = stringDistance;
this.score = score;
@@ -266,7 +286,7 @@ public String toString() {
return "Candidate [term=" + term.utf8ToString()
+ ", stringDistance=" + stringDistance
+ ", score=" + score
- + ", frequency=" + frequency
+ + ", termStats=" + termStats
+ (userInput ? ", userInput" : "") + "]";
}
@@ -305,8 +325,8 @@ public int compareTo(Candidate other) {
}
@Override
- public Candidate createCandidate(BytesRef term, long frequency, double channelScore, boolean userInput) throws IOException {
- return new Candidate(term, frequency, channelScore, score(frequency, channelScore, dictSize), userInput);
+ public Candidate createCandidate(BytesRef term, TermStats termStats, double channelScore, boolean userInput) throws IOException {
+ return new Candidate(term, termStats, channelScore, score(termStats, channelScore, sumTotalTermFreq), userInput);
}
public static int analyze(Analyzer analyzer, BytesRef toAnalyze, String field, TokenConsumer consumer, CharsRefBuilder spare)
diff --git a/server/src/main/java/org/elasticsearch/search/suggest/phrase/LaplaceScorer.java b/server/src/main/java/org/elasticsearch/search/suggest/phrase/LaplaceScorer.java
index d9797a4207e22..52157a0fe8bde 100644
--- a/server/src/main/java/org/elasticsearch/search/suggest/phrase/LaplaceScorer.java
+++ b/server/src/main/java/org/elasticsearch/search/suggest/phrase/LaplaceScorer.java
@@ -46,7 +46,7 @@ protected double scoreUnigram(Candidate word) throws IOException {
@Override
protected double scoreBigram(Candidate word, Candidate w_1) throws IOException {
join(separator, spare, w_1.term, word.term);
- return (alpha + frequency(spare.get())) / (w_1.frequency + alpha * numTerms);
+ return (alpha + frequency(spare.get())) / (w_1.termStats.totalTermFreq + alpha * numTerms);
}
@Override
diff --git a/server/src/main/java/org/elasticsearch/search/suggest/phrase/LinearInterpolatingScorer.java b/server/src/main/java/org/elasticsearch/search/suggest/phrase/LinearInterpolatingScorer.java
index c6d67fe8cf7cd..b0c9552f8a8d6 100644
--- a/server/src/main/java/org/elasticsearch/search/suggest/phrase/LinearInterpolatingScorer.java
+++ b/server/src/main/java/org/elasticsearch/search/suggest/phrase/LinearInterpolatingScorer.java
@@ -60,7 +60,7 @@ protected double scoreBigram(Candidate word, Candidate w_1) throws IOException {
if (count < 1) {
return unigramLambda * scoreUnigram(word);
}
- return bigramLambda * (count / (0.5d + w_1.frequency)) + unigramLambda * scoreUnigram(word);
+ return bigramLambda * (count / (0.5d + w_1.termStats.totalTermFreq)) + unigramLambda * scoreUnigram(word);
}
@Override
diff --git a/server/src/main/java/org/elasticsearch/search/suggest/phrase/MultiCandidateGeneratorWrapper.java b/server/src/main/java/org/elasticsearch/search/suggest/phrase/MultiCandidateGeneratorWrapper.java
index 904822e389483..e0e93e26164d3 100644
--- a/server/src/main/java/org/elasticsearch/search/suggest/phrase/MultiCandidateGeneratorWrapper.java
+++ b/server/src/main/java/org/elasticsearch/search/suggest/phrase/MultiCandidateGeneratorWrapper.java
@@ -18,6 +18,7 @@
*/
package org.elasticsearch.search.suggest.phrase;
+import org.apache.lucene.codecs.TermStats;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.Candidate;
import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator.CandidateSet;
@@ -41,8 +42,8 @@ public boolean isKnownWord(BytesRef term) throws IOException {
}
@Override
- public long frequency(BytesRef term) throws IOException {
- return candidateGenerator[0].frequency(term);
+ public TermStats termStats(BytesRef term) throws IOException {
+ return candidateGenerator[0].termStats(term);
}
@Override
@@ -65,8 +66,8 @@ private CandidateSet reduce(CandidateSet set, int numCandidates) {
return set;
}
@Override
- public Candidate createCandidate(BytesRef term, long frequency, double channelScore, boolean userInput) throws IOException {
- return candidateGenerator[0].createCandidate(term, frequency, channelScore, userInput);
+ public Candidate createCandidate(BytesRef term, TermStats termStats, double channelScore, boolean userInput) throws IOException {
+ return candidateGenerator[0].createCandidate(term, termStats, channelScore, userInput);
}
}
diff --git a/server/src/main/java/org/elasticsearch/search/suggest/phrase/NoisyChannelSpellChecker.java b/server/src/main/java/org/elasticsearch/search/suggest/phrase/NoisyChannelSpellChecker.java
index eb9694c6039b7..635fa64c59b53 100644
--- a/server/src/main/java/org/elasticsearch/search/suggest/phrase/NoisyChannelSpellChecker.java
+++ b/server/src/main/java/org/elasticsearch/search/suggest/phrase/NoisyChannelSpellChecker.java
@@ -23,6 +23,7 @@
import org.apache.lucene.analysis.shingle.ShingleFilter;
import org.apache.lucene.analysis.synonym.SynonymFilter;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+import org.apache.lucene.codecs.TermStats;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
@@ -84,9 +85,9 @@ public void nextToken() throws IOException {
anyUnigram = true;
if (posIncAttr.getPositionIncrement() == 0 && typeAttribute.type() == SynonymFilter.TYPE_SYNONYM) {
assert currentSet != null;
- long freq = 0;
- if ((freq = generator.frequency(term)) > 0) {
- currentSet.addOneCandidate(generator.createCandidate(BytesRef.deepCopyOf(term), freq, realWordLikelihood));
+ TermStats termStats = generator.termStats(term);
+ if (termStats.docFreq > 0) {
+ currentSet.addOneCandidate(generator.createCandidate(BytesRef.deepCopyOf(term), termStats, realWordLikelihood));
}
} else {
if (currentSet != null) {
@@ -131,9 +132,11 @@ public void end() {
}
public Result getCorrections(Analyzer analyzer, BytesRef query, CandidateGenerator generator,
- float maxErrors, int numCorrections, IndexReader reader, String analysisField, WordScorer scorer, float confidence, int gramSize) throws IOException {
+ float maxErrors, int numCorrections, IndexReader reader, String analysisField,
+ WordScorer scorer, float confidence, int gramSize) throws IOException {
- return getCorrections(tokenStream(analyzer, query, new CharsRefBuilder(), analysisField), generator, maxErrors, numCorrections, scorer, confidence, gramSize);
+ return getCorrections(tokenStream(analyzer, query, new CharsRefBuilder(), analysisField), generator, maxErrors,
+ numCorrections, scorer, confidence, gramSize);
}
diff --git a/server/src/main/java/org/elasticsearch/search/suggest/phrase/StupidBackoffScorer.java b/server/src/main/java/org/elasticsearch/search/suggest/phrase/StupidBackoffScorer.java
index 166a4182c8f81..d6862f384bebf 100644
--- a/server/src/main/java/org/elasticsearch/search/suggest/phrase/StupidBackoffScorer.java
+++ b/server/src/main/java/org/elasticsearch/search/suggest/phrase/StupidBackoffScorer.java
@@ -28,8 +28,8 @@
class StupidBackoffScorer extends WordScorer {
private final double discount;
- StupidBackoffScorer(IndexReader reader, Terms terms,String field, double realWordLikelyhood, BytesRef separator, double discount)
- throws IOException {
+ StupidBackoffScorer(IndexReader reader, Terms terms,String field,
+ double realWordLikelyhood, BytesRef separator, double discount) throws IOException {
super(reader, terms, field, realWordLikelyhood, separator);
this.discount = discount;
}
@@ -45,7 +45,7 @@ protected double scoreBigram(Candidate word, Candidate w_1) throws IOException {
if (count < 1) {
return discount * scoreUnigram(word);
}
- return count / (w_1.frequency + 0.00000000001d);
+ return count / (w_1.termStats.totalTermFreq + 0.00000000001d);
}
@Override
@@ -60,7 +60,7 @@ protected double scoreTrigram(Candidate w, Candidate w_1, Candidate w_2) throws
join(separator, spare, w_2.term, w_1.term, w.term);
long trigramCount = frequency(spare.get());
if (trigramCount < 1) {
- return discount * (bigramCount / (w_1.frequency + 0.00000000001d));
+ return discount * (bigramCount / (w_1.termStats.totalTermFreq + 0.00000000001d));
}
return trigramCount / (bigramCount + 0.00000000001d);
}
diff --git a/server/src/main/java/org/elasticsearch/search/suggest/phrase/WordScorer.java b/server/src/main/java/org/elasticsearch/search/suggest/phrase/WordScorer.java
index 22515489ee252..1bdf1c90d7d09 100644
--- a/server/src/main/java/org/elasticsearch/search/suggest/phrase/WordScorer.java
+++ b/server/src/main/java/org/elasticsearch/search/suggest/phrase/WordScorer.java
@@ -62,7 +62,8 @@ public WordScorer(IndexReader reader, Terms terms, String field, double realWord
// division by zero, by scoreUnigram.
final long nTerms = terms.size();
this.numTerms = nTerms == -1 ? reader.maxDoc() : nTerms;
- this.termsEnum = new FreqTermsEnum(reader, field, !useTotalTermFreq, useTotalTermFreq, null, BigArrays.NON_RECYCLING_INSTANCE); // non recycling for now
+ this.termsEnum = new FreqTermsEnum(reader, field, !useTotalTermFreq, useTotalTermFreq, null,
+ BigArrays.NON_RECYCLING_INSTANCE); // non recycling for now
this.reader = reader;
this.realWordLikelyhood = realWordLikelyHood;
this.separator = separator;
diff --git a/server/src/test/java/org/elasticsearch/index/suggest/stats/SuggestStatsIT.java b/server/src/test/java/org/elasticsearch/index/suggest/stats/SuggestStatsIT.java
index 7b7e7a41783a3..c355725f62acd 100644
--- a/server/src/test/java/org/elasticsearch/index/suggest/stats/SuggestStatsIT.java
+++ b/server/src/test/java/org/elasticsearch/index/suggest/stats/SuggestStatsIT.java
@@ -106,9 +106,12 @@ public void testSimpleStats() throws Exception {
assertThat(suggest.getSuggestCurrent(), equalTo(0L));
// check suggest count
- assertThat(suggest.getSuggestCount(), equalTo((long) (suggestAllIdx * totalShards + suggestIdx1 * shardsIdx1 + suggestIdx2 * shardsIdx2)));
- assertThat(indicesStats.getIndices().get("test1").getTotal().getSearch().getTotal().getSuggestCount(), equalTo((long) ((suggestAllIdx + suggestIdx1) * shardsIdx1)));
- assertThat(indicesStats.getIndices().get("test2").getTotal().getSearch().getTotal().getSuggestCount(), equalTo((long) ((suggestAllIdx + suggestIdx2) * shardsIdx2)));
+ assertThat(suggest.getSuggestCount(),
+ equalTo((long) (suggestAllIdx * totalShards + suggestIdx1 * shardsIdx1 + suggestIdx2 * shardsIdx2)));
+ assertThat(indicesStats.getIndices().get("test1").getTotal().getSearch().getTotal().getSuggestCount(),
+ equalTo((long) ((suggestAllIdx + suggestIdx1) * shardsIdx1)));
+ assertThat(indicesStats.getIndices().get("test2").getTotal().getSearch().getTotal().getSuggestCount(),
+ equalTo((long) ((suggestAllIdx + suggestIdx2) * shardsIdx2)));
logger.info("iter {}, iter1 {}, iter2 {}, {}", suggestAllIdx, suggestIdx1, suggestIdx2, endTime - startTime);
// check suggest time
diff --git a/server/src/test/java/org/elasticsearch/search/suggest/phrase/DirectCandidateGeneratorTests.java b/server/src/test/java/org/elasticsearch/search/suggest/phrase/DirectCandidateGeneratorTests.java
index 4ae19a1b6b04b..5bff24b934837 100644
--- a/server/src/test/java/org/elasticsearch/search/suggest/phrase/DirectCandidateGeneratorTests.java
+++ b/server/src/test/java/org/elasticsearch/search/suggest/phrase/DirectCandidateGeneratorTests.java
@@ -19,11 +19,20 @@
package org.elasticsearch.search.suggest.phrase;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.search.spell.DirectSpellChecker;
import org.apache.lucene.search.spell.JaroWinklerDistance;
import org.apache.lucene.search.spell.LevenshteinDistance;
import org.apache.lucene.search.spell.LuceneLevenshteinDistance;
import org.apache.lucene.search.spell.NGramDistance;
+import org.apache.lucene.search.spell.SuggestMode;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
import org.elasticsearch.common.xcontent.ToXContent;
import org.elasticsearch.common.xcontent.XContentBuilder;
@@ -32,7 +41,6 @@
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.common.xcontent.json.JsonXContent;
-import org.elasticsearch.search.suggest.phrase.PhraseSuggestionContext.DirectCandidateGenerator;
import org.elasticsearch.test.ESTestCase;
import java.io.IOException;
@@ -133,7 +141,8 @@ public void testFromXContent() throws IOException {
}
}
- public static void assertEqualGenerators(DirectCandidateGenerator first, DirectCandidateGenerator second) {
+ public static void assertEqualGenerators(PhraseSuggestionContext.DirectCandidateGenerator first,
+ PhraseSuggestionContext.DirectCandidateGenerator second) {
assertEquals(first.field(), second.field());
assertEquals(first.accuracy(), second.accuracy(), Float.MIN_VALUE);
assertEquals(first.maxTermFreq(), second.maxTermFreq(), Float.MIN_VALUE);
@@ -176,6 +185,66 @@ public void testIllegalXContent() throws IOException {
"[direct_generator] size doesn't support values of type: START_ARRAY");
}
+ public void testFrequencyThreshold() throws Exception {
+ try (Directory dir = newDirectory()) {
+ IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig());
+ int numDocs = randomIntBetween(10, 20);
+ for (int i = 0; i < numDocs; i++) {
+ Document doc = new Document();
+ if (i == 0) {
+ for (int j = 0; j < numDocs; j++) {
+ doc.add(new TextField("field", "fooz", Field.Store.NO));
+ }
+ } else {
+ doc.add(new TextField("field", "foo", Field.Store.NO));
+ }
+ writer.addDocument(doc);
+ }
+ try (IndexReader reader = DirectoryReader.open(writer)) {
+ writer.close();
+ DirectSpellChecker spellchecker = new DirectSpellChecker();
+ DirectCandidateGenerator generator = new DirectCandidateGenerator(spellchecker, "field", SuggestMode.SUGGEST_MORE_POPULAR,
+ reader, 0f, 10);
+ DirectCandidateGenerator.CandidateSet candidateSet =
+ generator.drawCandidates(new DirectCandidateGenerator.CandidateSet(DirectCandidateGenerator.Candidate.EMPTY,
+ generator.createCandidate(new BytesRef("fooz"), false)));
+ assertThat(candidateSet.candidates.length, equalTo(1));
+ assertThat(candidateSet.candidates[0].termStats.docFreq, equalTo(numDocs - 1));
+ assertThat(candidateSet.candidates[0].termStats.totalTermFreq, equalTo((long) numDocs - 1));
+
+ // test that it doesn't overflow
+ assertThat(generator.thresholdTermFrequency(Integer.MAX_VALUE), equalTo(Integer.MAX_VALUE));
+
+ spellchecker = new DirectSpellChecker();
+ spellchecker.setThresholdFrequency(0.5f);
+ generator = new DirectCandidateGenerator(spellchecker, "field", SuggestMode.SUGGEST_MORE_POPULAR,
+ reader, 0f, 10);
+ candidateSet =
+ generator.drawCandidates(new DirectCandidateGenerator.CandidateSet(DirectCandidateGenerator.Candidate.EMPTY,
+ generator.createCandidate(new BytesRef("fooz"), false)));
+ assertThat(candidateSet.candidates.length, equalTo(1));
+ assertThat(candidateSet.candidates[0].termStats.docFreq, equalTo(numDocs - 1));
+ assertThat(candidateSet.candidates[0].termStats.totalTermFreq, equalTo((long) numDocs - 1));
+
+ // test that it doesn't overflow
+ assertThat(generator.thresholdTermFrequency(Integer.MAX_VALUE), equalTo(Integer.MAX_VALUE));
+
+ spellchecker = new DirectSpellChecker();
+ spellchecker.setThresholdFrequency(0.5f);
+ generator = new DirectCandidateGenerator(spellchecker, "field", SuggestMode.SUGGEST_ALWAYS,
+ reader, 0f, 10);
+ candidateSet =
+ generator.drawCandidates(new DirectCandidateGenerator.CandidateSet(DirectCandidateGenerator.Candidate.EMPTY,
+ generator.createCandidate(new BytesRef("fooz"), false)));
+ assertThat(candidateSet.candidates.length, equalTo(01));
+
+ // test that it doesn't overflow
+ assertThat(generator.thresholdTermFrequency(Integer.MAX_VALUE), equalTo(Integer.MAX_VALUE));
+ }
+ }
+
+ }
+
private void assertIllegalXContent(String directGenerator, Class extends Exception> exceptionClass, String exceptionMsg)
throws IOException {
try (XContentParser parser = createParser(JsonXContent.jsonXContent, directGenerator)) {
diff --git a/server/src/test/java/org/elasticsearch/search/suggest/phrase/NoisyChannelSpellCheckerTests.java b/server/src/test/java/org/elasticsearch/search/suggest/phrase/NoisyChannelSpellCheckerTests.java
index 40b2b023334ca..171bb0bf1697f 100644
--- a/server/src/test/java/org/elasticsearch/search/suggest/phrase/NoisyChannelSpellCheckerTests.java
+++ b/server/src/test/java/org/elasticsearch/search/suggest/phrase/NoisyChannelSpellCheckerTests.java
@@ -110,20 +110,24 @@ protected TokenStreamComponents createComponents(String fieldName) {
}
DirectoryReader ir = DirectoryReader.open(writer);
- WordScorer wordScorer = new LaplaceScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.95d, new BytesRef(" "), 0.5f);
+ WordScorer wordScorer = new LaplaceScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.95d,
+ new BytesRef(" "), 0.5f);
NoisyChannelSpellChecker suggester = new NoisyChannelSpellChecker();
DirectSpellChecker spellchecker = new DirectSpellChecker();
spellchecker.setMinQueryLength(1);
- DirectCandidateGenerator generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.95, 5);
- Result result = suggester.getCorrections(wrapper, new BytesRef("american ame"), generator, 1, 1, ir, "body", wordScorer, 1, 2);
+ DirectCandidateGenerator generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR,
+ ir, 0.95, 5);
+ Result result = suggester.getCorrections(wrapper, new BytesRef("american ame"), generator, 1, 1,
+ ir, "body", wordScorer, 1, 2);
Correction[] corrections = result.corrections;
assertThat(corrections.length, equalTo(1));
assertThat(corrections[0].join(space).utf8ToString(), equalTo("american ace"));
assertThat(corrections[0].join(space, preTag, postTag).utf8ToString(), equalTo("american ace"));
assertThat(result.cutoffScore, greaterThan(0d));
- result = suggester.getCorrections(wrapper, new BytesRef("american ame"), generator, 1, 1, ir, "body", wordScorer, 0, 1);
+ result = suggester.getCorrections(wrapper, new BytesRef("american ame"), generator, 1, 1,
+ ir, "body", wordScorer, 0, 1);
corrections = result.corrections;
assertThat(corrections.length, equalTo(1));
assertThat(corrections[0].join(space).utf8ToString(), equalTo("american ame"));
@@ -131,8 +135,10 @@ protected TokenStreamComponents createComponents(String fieldName) {
assertThat(result.cutoffScore, equalTo(Double.MIN_VALUE));
suggester = new NoisyChannelSpellChecker(0.85);
- wordScorer = new LaplaceScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.85d, new BytesRef(" "), 0.5f);
- corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 4, ir, "body", wordScorer, 0, 2).corrections;
+ wordScorer = new LaplaceScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.85d,
+ new BytesRef(" "), 0.5f);
+ corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 4,
+ ir, "body", wordScorer, 0, 2).corrections;
assertThat(corrections.length, equalTo(4));
assertThat(corrections[0].join(space).utf8ToString(), equalTo("xorr the god jewel"));
assertThat(corrections[1].join(space).utf8ToString(), equalTo("xor the god jewel"));
@@ -143,7 +149,8 @@ protected TokenStreamComponents createComponents(String fieldName) {
assertThat(corrections[2].join(space, preTag, postTag).utf8ToString(), equalTo("xorn the god jewel"));
assertThat(corrections[3].join(space, preTag, postTag).utf8ToString(), equalTo("xorr the got jewel"));
- corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 4, ir, "body", wordScorer, 1, 2).corrections;
+ corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f,
+ 4, ir, "body", wordScorer, 1, 2).corrections;
assertThat(corrections.length, equalTo(4));
assertThat(corrections[0].join(space).utf8ToString(), equalTo("xorr the god jewel"));
assertThat(corrections[1].join(space).utf8ToString(), equalTo("xor the god jewel"));
@@ -152,8 +159,10 @@ protected TokenStreamComponents createComponents(String fieldName) {
// Test some of the highlighting corner cases
suggester = new NoisyChannelSpellChecker(0.85);
- wordScorer = new LaplaceScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.85d, new BytesRef(" "), 0.5f);
- corrections = suggester.getCorrections(wrapper, new BytesRef("Xor teh Got-Jewel"), generator, 4f, 4, ir, "body", wordScorer, 1, 2).corrections;
+ wordScorer = new LaplaceScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.85d,
+ new BytesRef(" "), 0.5f);
+ corrections = suggester.getCorrections(wrapper, new BytesRef("Xor teh Got-Jewel"), generator, 4f, 4,
+ ir, "body", wordScorer, 1, 2).corrections;
assertThat(corrections.length, equalTo(4));
assertThat(corrections[0].join(space).utf8ToString(), equalTo("xorr the god jewel"));
assertThat(corrections[1].join(space).utf8ToString(), equalTo("xor the god jewel"));
@@ -187,19 +196,25 @@ protected TokenStreamComponents createComponents(String fieldName) {
spellchecker.setMinPrefix(1);
spellchecker.setMinQueryLength(1);
suggester = new NoisyChannelSpellChecker(0.85);
- wordScorer = new LaplaceScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.85d, new BytesRef(" "), 0.5f);
- corrections = suggester.getCorrections(analyzer, new BytesRef("captian usa"), generator, 2, 4, ir, "body", wordScorer, 1, 2).corrections;
+ wordScorer = new LaplaceScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.85d,
+ new BytesRef(" "), 0.5f);
+ corrections = suggester.getCorrections(analyzer, new BytesRef("captian usa"), generator, 2, 4,
+ ir, "body", wordScorer, 1, 2).corrections;
assertThat(corrections[0].join(space).utf8ToString(), equalTo("captain america"));
assertThat(corrections[0].join(space, preTag, postTag).utf8ToString(), equalTo("captain america"));
- generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.85, 10, null, analyzer, MultiFields.getTerms(ir, "body"));
- corrections = suggester.getCorrections(analyzer, new BytesRef("captian usw"), generator, 2, 4, ir, "body", wordScorer, 1, 2).corrections;
+ generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.85,
+ 10, null, analyzer, MultiFields.getTerms(ir, "body"));
+ corrections = suggester.getCorrections(analyzer, new BytesRef("captian usw"), generator, 2, 4,
+ ir, "body", wordScorer, 1, 2).corrections;
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("captain america"));
assertThat(corrections[0].join(space, preTag, postTag).utf8ToString(), equalTo("captain america"));
// Make sure that user supplied text is not marked as highlighted in the presence of a synonym filter
- generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.85, 10, null, analyzer, MultiFields.getTerms(ir, "body"));
- corrections = suggester.getCorrections(analyzer, new BytesRef("captain usw"), generator, 2, 4, ir, "body", wordScorer, 1, 2).corrections;
+ generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.85,
+ 10, null, analyzer, MultiFields.getTerms(ir, "body"));
+ corrections = suggester.getCorrections(analyzer, new BytesRef("captain usw"), generator, 2, 4, ir,
+ "body", wordScorer, 1, 2).corrections;
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("captain america"));
assertThat(corrections[0].join(space, preTag, postTag).utf8ToString(), equalTo("captain america"));
}
@@ -265,47 +280,58 @@ protected TokenStreamComponents createComponents(String fieldName) {
}
DirectoryReader ir = DirectoryReader.open(writer);
- LaplaceScorer wordScorer = new LaplaceScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.95d, new BytesRef(" "), 0.5f);
+ LaplaceScorer wordScorer = new LaplaceScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.95d,
+ new BytesRef(" "), 0.5f);
NoisyChannelSpellChecker suggester = new NoisyChannelSpellChecker();
DirectSpellChecker spellchecker = new DirectSpellChecker();
spellchecker.setMinQueryLength(1);
- DirectCandidateGenerator forward = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_ALWAYS, ir, 0.95, 10);
- DirectCandidateGenerator reverse = new DirectCandidateGenerator(spellchecker, "body_reverse", SuggestMode.SUGGEST_ALWAYS, ir, 0.95, 10, wrapper, wrapper, MultiFields.getTerms(ir, "body_reverse"));
+ DirectCandidateGenerator forward = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_ALWAYS, ir,
+ 0.95, 10);
+ DirectCandidateGenerator reverse = new DirectCandidateGenerator(spellchecker, "body_reverse", SuggestMode.SUGGEST_ALWAYS, ir,
+ 0.95, 10, wrapper, wrapper, MultiFields.getTerms(ir, "body_reverse"));
CandidateGenerator generator = new MultiCandidateGeneratorWrapper(10, forward, reverse);
- Correction[] corrections = suggester.getCorrections(wrapper, new BytesRef("american cae"), generator, 1, 1, ir, "body", wordScorer, 1, 2).corrections;
+ Correction[] corrections = suggester.getCorrections(wrapper, new BytesRef("american cae"), generator, 1, 1,
+ ir, "body", wordScorer, 1, 2).corrections;
assertThat(corrections.length, equalTo(1));
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("american ace"));
generator = new MultiCandidateGeneratorWrapper(5, forward, reverse);
- corrections = suggester.getCorrections(wrapper, new BytesRef("american ame"), generator, 1, 1, ir, "body", wordScorer, 1, 2).corrections;
+ corrections = suggester.getCorrections(wrapper, new BytesRef("american ame"), generator, 1, 1, ir,
+ "body", wordScorer, 1, 2).corrections;
assertThat(corrections.length, equalTo(1));
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("american ace"));
- corrections = suggester.getCorrections(wrapper, new BytesRef("american cae"), forward, 1, 1, ir, "body", wordScorer, 1, 2).corrections;
+ corrections = suggester.getCorrections(wrapper, new BytesRef("american cae"), forward, 1, 1, ir,
+ "body", wordScorer, 1, 2).corrections;
assertThat(corrections.length, equalTo(0)); // only use forward with constant prefix
- corrections = suggester.getCorrections(wrapper, new BytesRef("america cae"), generator, 2, 1, ir, "body", wordScorer, 1, 2).corrections;
+ corrections = suggester.getCorrections(wrapper, new BytesRef("america cae"), generator, 2, 1, ir,
+ "body", wordScorer, 1, 2).corrections;
assertThat(corrections.length, equalTo(1));
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("american ace"));
- corrections = suggester.getCorrections(wrapper, new BytesRef("Zorr the Got-Jewel"), generator, 0.5f, 4, ir, "body", wordScorer, 0, 2).corrections;
+ corrections = suggester.getCorrections(wrapper, new BytesRef("Zorr the Got-Jewel"), generator, 0.5f, 4, ir,
+ "body", wordScorer, 0, 2).corrections;
assertThat(corrections.length, equalTo(4));
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel"));
assertThat(corrections[1].join(new BytesRef(" ")).utf8ToString(), equalTo("zorr the god jewel"));
assertThat(corrections[2].join(new BytesRef(" ")).utf8ToString(), equalTo("four the god jewel"));
- corrections = suggester.getCorrections(wrapper, new BytesRef("Zorr the Got-Jewel"), generator, 0.5f, 1, ir, "body", wordScorer, 1.5f, 2).corrections;
+ corrections = suggester.getCorrections(wrapper, new BytesRef("Zorr the Got-Jewel"), generator, 0.5f, 1, ir,
+ "body", wordScorer, 1.5f, 2).corrections;
assertThat(corrections.length, equalTo(1));
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel"));
- corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 1, ir, "body", wordScorer, 1.5f, 2).corrections;
+ corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 1, ir,
+ "body", wordScorer, 1.5f, 2).corrections;
assertThat(corrections.length, equalTo(1));
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel"));
// Test a special case where one of the suggest term is unchanged by the postFilter, 'II' here is unchanged by the reverse analyzer.
- corrections = suggester.getCorrections(wrapper, new BytesRef("Quazar II"), generator, 1, 1, ir, "body", wordScorer, 1, 2).corrections;
+ corrections = suggester.getCorrections(wrapper, new BytesRef("Quazar II"), generator, 1, 1, ir,
+ "body", wordScorer, 1, 2).corrections;
assertThat(corrections.length, equalTo(1));
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("quasar ii"));
}
@@ -362,22 +388,28 @@ protected TokenStreamComponents createComponents(String fieldName) {
}
DirectoryReader ir = DirectoryReader.open(writer);
- WordScorer wordScorer = new LinearInterpolatingScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.85d, new BytesRef(" "), 0.5, 0.4, 0.1);
+ WordScorer wordScorer = new LinearInterpolatingScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.85d,
+ new BytesRef(" "), 0.5, 0.4, 0.1);
NoisyChannelSpellChecker suggester = new NoisyChannelSpellChecker();
DirectSpellChecker spellchecker = new DirectSpellChecker();
spellchecker.setMinQueryLength(1);
- DirectCandidateGenerator generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.95, 5);
- Correction[] corrections = suggester.getCorrections(wrapper, new BytesRef("american ame"), generator, 1, 1, ir, "body", wordScorer, 1, 3).corrections;
+ DirectCandidateGenerator generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR, ir,
+ 0.95, 5);
+ Correction[] corrections = suggester.getCorrections(wrapper, new BytesRef("american ame"), generator, 1, 1,
+ ir, "body", wordScorer, 1, 3).corrections;
assertThat(corrections.length, equalTo(1));
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("american ace"));
- corrections = suggester.getCorrections(wrapper, new BytesRef("american ame"), generator, 1, 1, ir, "body", wordScorer, 1, 1).corrections;
+ corrections = suggester.getCorrections(wrapper, new BytesRef("american ame"), generator, 1, 1,
+ ir, "body", wordScorer, 1, 1).corrections;
assertThat(corrections.length, equalTo(0));
// assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("american ape"));
- wordScorer = new LinearInterpolatingScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.85d, new BytesRef(" "), 0.5, 0.4, 0.1);
- corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 4, ir, "body", wordScorer, 0, 3).corrections;
+ wordScorer = new LinearInterpolatingScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.85d,
+ new BytesRef(" "), 0.5, 0.4, 0.1);
+ corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 4,
+ ir, "body", wordScorer, 0, 3).corrections;
assertThat(corrections.length, equalTo(4));
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel"));
assertThat(corrections[1].join(new BytesRef(" ")).utf8ToString(), equalTo("xor the god jewel"));
@@ -387,7 +419,8 @@ protected TokenStreamComponents createComponents(String fieldName) {
- corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 4, ir, "body", wordScorer, 1, 3).corrections;
+ corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 4,
+ ir, "body", wordScorer, 1, 3).corrections;
assertThat(corrections.length, equalTo(4));
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel"));
assertThat(corrections[1].join(new BytesRef(" ")).utf8ToString(), equalTo("xor the god jewel"));
@@ -395,7 +428,8 @@ protected TokenStreamComponents createComponents(String fieldName) {
assertThat(corrections[3].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the got jewel"));
- corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 1, ir, "body", wordScorer, 100, 3).corrections;
+ corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 1,
+ ir, "body", wordScorer, 100, 3).corrections;
assertThat(corrections.length, equalTo(1));
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel"));
@@ -423,17 +457,23 @@ protected TokenStreamComponents createComponents(String fieldName) {
spellchecker.setMinPrefix(1);
spellchecker.setMinQueryLength(1);
suggester = new NoisyChannelSpellChecker(0.95);
- wordScorer = new LinearInterpolatingScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.95d, new BytesRef(" "), 0.5, 0.4, 0.1);
- corrections = suggester.getCorrections(analyzer, new BytesRef("captian usa"), generator, 2, 4, ir, "body", wordScorer, 1, 3).corrections;
+ wordScorer = new LinearInterpolatingScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.95d,
+ new BytesRef(" "), 0.5, 0.4, 0.1);
+ corrections = suggester.getCorrections(analyzer, new BytesRef("captian usa"), generator, 2, 4,
+ ir, "body", wordScorer, 1, 3).corrections;
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("captain america"));
- generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.95, 10, null, analyzer, MultiFields.getTerms(ir, "body"));
- corrections = suggester.getCorrections(analyzer, new BytesRef("captian usw"), generator, 2, 4, ir, "body", wordScorer, 1, 3).corrections;
+ generator = new DirectCandidateGenerator(spellchecker, "body", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.95,
+ 10, null, analyzer, MultiFields.getTerms(ir, "body"));
+ corrections = suggester.getCorrections(analyzer, new BytesRef("captian usw"), generator, 2, 4,
+ ir, "body", wordScorer, 1, 3).corrections;
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("captain america"));
- wordScorer = new StupidBackoffScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.85d, new BytesRef(" "), 0.4);
- corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 2, ir, "body", wordScorer, 0, 3).corrections;
+ wordScorer = new StupidBackoffScorer(ir, MultiFields.getTerms(ir, "body_ngram"), "body_ngram", 0.85d,
+ new BytesRef(" "), 0.4);
+ corrections = suggester.getCorrections(wrapper, new BytesRef("Xor the Got-Jewel"), generator, 0.5f, 2,
+ ir, "body", wordScorer, 0, 3).corrections;
assertThat(corrections.length, equalTo(2));
assertThat(corrections[0].join(new BytesRef(" ")).utf8ToString(), equalTo("xorr the god jewel"));
assertThat(corrections[1].join(new BytesRef(" ")).utf8ToString(), equalTo("xor the god jewel"));
@@ -452,11 +492,14 @@ public void testFewDocsEgdeCase() throws Exception {
}
try (DirectoryReader ir = DirectoryReader.open(dir)) {
- WordScorer wordScorer = new StupidBackoffScorer(ir, MultiFields.getTerms(ir, "field"), "field", 0.95d, new BytesRef(" "), 0.4f);
+ WordScorer wordScorer = new StupidBackoffScorer(ir, MultiFields.getTerms(ir, "field"), "field", 0.95d,
+ new BytesRef(" "), 0.4f);
NoisyChannelSpellChecker suggester = new NoisyChannelSpellChecker();
DirectSpellChecker spellchecker = new DirectSpellChecker();
- DirectCandidateGenerator generator = new DirectCandidateGenerator(spellchecker, "field", SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.95, 5);
- Result result = suggester.getCorrections(new StandardAnalyzer(), new BytesRef("valeu"), generator, 1, 1, ir, "field", wordScorer, 1, 2);
+ DirectCandidateGenerator generator = new DirectCandidateGenerator(spellchecker, "field",
+ SuggestMode.SUGGEST_MORE_POPULAR, ir, 0.95, 5);
+ Result result = suggester.getCorrections(new StandardAnalyzer(), new BytesRef("valeu"), generator, 1, 1,
+ ir, "field", wordScorer, 1, 2);
assertThat(result.corrections.length, equalTo(1));
assertThat(result.corrections[0].join(space).utf8ToString(), equalTo("value"));
}