Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Laplace scorer to multiply by alpha (and not add) #27125

Merged
merged 2 commits into from
Oct 31, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ public void findCandidates(CandidateSet[] candidates, Candidate[] path, int ord,
private void updateTop(CandidateSet[] candidates, Candidate[] path, PriorityQueue<Correction> corrections, double cutoffScore, double score)
throws IOException {
score = Math.exp(score);
assert Math.abs(score - score(path, candidates)) < 0.00001;
assert Math.abs(score - score(path, candidates)) < 0.00001 : "cur_score=" + score + ", path_score=" + score(path,candidates);
if (score > cutoffScore) {
if (corrections.size() < maxNumCorrections) {
Candidate[] c = new Candidate[candidates.length];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,18 +38,23 @@ final class LaplaceScorer extends WordScorer {
return this.alpha;
}

@Override
protected double scoreUnigram(Candidate word) throws IOException {
return (alpha + frequency(word.term)) / (vocabluarySize + alpha * numTerms);
}

@Override
protected double scoreBigram(Candidate word, Candidate w_1) throws IOException {
join(separator, spare, w_1.term, word.term);
return (alpha + frequency(spare.get())) / (alpha + w_1.frequency + vocabluarySize);
return (alpha + frequency(spare.get())) / (w_1.frequency + alpha * numTerms);
}

@Override
protected double scoreTrigram(Candidate word, Candidate w_1, Candidate w_2) throws IOException {
join(separator, spare, w_2.term, w_1.term, word.term);
long trigramCount = frequency(spare.get());
join(separator, spare, w_1.term, word.term);
return (alpha + trigramCount) / (alpha + frequency(spare.get()) + vocabluarySize);
return (alpha + trigramCount) / (frequency(spare.get()) + alpha * numTerms);
}


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@ public abstract class WordScorer {
protected final double realWordLikelyhood;
protected final BytesRefBuilder spare = new BytesRefBuilder();
protected final BytesRef separator;
protected final long numTerms;
private final TermsEnum termsEnum;
private final long numTerms;
private final boolean useTotalTermFreq;

public WordScorer(IndexReader reader, String field, double realWordLikelyHood, BytesRef separator) throws IOException {
Expand All @@ -57,10 +57,11 @@ public WordScorer(IndexReader reader, Terms terms, String field, double realWord
final long vocSize = terms.getSumTotalTermFreq();
this.vocabluarySize = vocSize == -1 ? reader.maxDoc() : vocSize;
this.useTotalTermFreq = vocSize != -1;
long numTerms = terms.size();
// -1 cannot be used as value, because scoreUnigram(...) can then divide by 0 if vocabluarySize is 1.
// -1 is returned when terms is a MultiTerms instance.
this.numTerms = vocabluarySize + numTerms > 1 ? numTerms : 0;
// terms.size() might be -1 if it's a MultiTerms instance. In that case,
// use reader.maxDoc() as an approximation. This also protects from
// division by zero, by scoreUnigram.
final long nTerms = terms.size();
this.numTerms = nTerms == -1 ? reader.maxDoc() : nTerms;
this.termsEnum = new FreqTermsEnum(reader, field, !useTotalTermFreq, useTotalTermFreq, null, BigArrays.NON_RECYCLING_INSTANCE); // non recycling for now
this.reader = reader;
this.realWordLikelyhood = realWordLikelyHood;
Expand Down
2 changes: 1 addition & 1 deletion docs/reference/search/suggesters/phrase-suggest.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ The response contains suggestions scored by the most likely spell correction fir
"options" : [ {
"text" : "nobel prize",
"highlighted": "<em>nobel</em> prize",
"score" : 0.5962314
"score" : 0.48614594
}]
}
]
Expand Down