forked from elastic/elasticsearch
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
New version of the script_score term stats helpers.
- Loading branch information
Showing
7 changed files
with
214 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
74 changes: 74 additions & 0 deletions
74
server/src/main/java/org/elasticsearch/script/TermStatsReader.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License | ||
* 2.0 and the Server Side Public License, v 1; you may not use this file except | ||
* in compliance with, at your election, the Elastic License 2.0 or the Server | ||
* Side Public License, v 1. | ||
*/ | ||
|
||
package org.elasticsearch.script; | ||
|
||
import org.apache.lucene.index.LeafReaderContext; | ||
import org.apache.lucene.index.PostingsEnum; | ||
import org.apache.lucene.index.Term; | ||
import org.apache.lucene.index.TermState; | ||
import org.apache.lucene.index.TermStates; | ||
import org.apache.lucene.index.TermsEnum; | ||
import org.apache.lucene.search.IndexSearcher; | ||
import org.apache.lucene.search.TermStatistics; | ||
|
||
import java.io.IOException; | ||
import java.io.UncheckedIOException; | ||
import java.util.Map; | ||
import java.util.Set; | ||
|
||
public class TermStatsReader { | ||
private final IndexSearcher searcher; | ||
private final Set<Term> terms; | ||
private final Map<Term, TermStates> termContexts; | ||
|
||
public TermStatsReader(IndexSearcher searcher, Set<Term> terms, Map<Term, TermStates> termContexts) { | ||
this.searcher = searcher; | ||
this.terms = terms; | ||
this.termContexts = termContexts; | ||
} | ||
|
||
public Set<Term> terms() { | ||
return terms; | ||
} | ||
|
||
public TermStatistics termStatistics(Term term) { | ||
try { | ||
if (termContexts.containsKey(term) == false) { | ||
return searcher.termStatistics(term, 0, 0); | ||
} | ||
|
||
return searcher.termStatistics(term, termContexts.get(term).docFreq(), termContexts.get(term).totalTermFreq()); | ||
} catch (IllegalArgumentException e) { | ||
return null; | ||
} catch (IOException e) { | ||
throw new UncheckedIOException(e); | ||
} | ||
} | ||
|
||
public PostingsEnum postings(LeafReaderContext leafReaderContext, Term term, int flags) { | ||
if (termContexts.containsKey(term) == false) { | ||
return null; | ||
} | ||
|
||
try { | ||
TermStates termContext = termContexts.get(term); | ||
TermState state = termContext.get(leafReaderContext); | ||
if (state == null || termContext.docFreq() == 0) { | ||
return null; | ||
} | ||
|
||
TermsEnum termsEnum = leafReaderContext.reader().terms(term.field()).iterator(); | ||
termsEnum.seekExact(term.bytes(), state); | ||
return termsEnum.postings(null, flags); | ||
|
||
} catch (IOException e) { | ||
throw new UncheckedIOException(e); | ||
} | ||
} | ||
} |
61 changes: 61 additions & 0 deletions
61
server/src/main/java/org/elasticsearch/script/TermStatsScriptUtils.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License | ||
* 2.0 and the Server Side Public License, v 1; you may not use this file except | ||
* in compliance with, at your election, the Elastic License 2.0 or the Server | ||
* Side Public License, v 1. | ||
*/ | ||
|
||
package org.elasticsearch.script; | ||
|
||
import org.apache.lucene.index.PostingsEnum; | ||
import org.apache.lucene.search.TermStatistics; | ||
|
||
import java.io.IOException; | ||
import java.io.UncheckedIOException; | ||
import java.util.Collection; | ||
import java.util.DoubleSummaryStatistics; | ||
import java.util.function.Supplier; | ||
|
||
; | ||
|
||
public class TermStatsScriptUtils { | ||
|
||
public static final class DocumentFrequencyStatistics { | ||
private final Collection<TermStatistics> termsStatistics; | ||
|
||
public DocumentFrequencyStatistics(ScoreScript scoreScript) { | ||
this.termsStatistics = scoreScript._termStatistics().values(); | ||
} | ||
|
||
public DoubleSummaryStatistics documentFrequencyStatistics() { | ||
return termsStatistics.stream().mapToDouble(termStatistics -> termStatistics == null ? 0 : termStatistics.docFreq()).summaryStatistics(); | ||
} | ||
} | ||
|
||
public static final class TermFrequencyStatistics { | ||
private final Collection<PostingsEnum> postings; | ||
private final Supplier<Integer> docIdSupplier; | ||
|
||
public TermFrequencyStatistics(ScoreScript scoreScript) { | ||
postings = scoreScript._postings(PostingsEnum.FREQS).values(); | ||
docIdSupplier = scoreScript::_getDocId; | ||
} | ||
|
||
public DoubleSummaryStatistics termFrequencyStatistics() { | ||
return postings.stream().mapToDouble( | ||
currentPostings -> { | ||
try { | ||
int docId = docIdSupplier.get(); | ||
if (currentPostings == null || currentPostings.advance(docId) != docId) { | ||
return 0; | ||
} | ||
return currentPostings.freq(); | ||
} catch (IOException e) { | ||
throw new UncheckedIOException(e); | ||
} | ||
} | ||
).summaryStatistics(); | ||
} | ||
} | ||
} |