Skip to content

Commit

Permalink
LUCENE-9953: Make FacetResult#value accurate for LongValueFacetCounts…
Browse files Browse the repository at this point in the history
… multi-value doc cases (#131)

Co-authored-by: Greg Miller <[email protected]>
  • Loading branch information
gsmiller and Greg Miller authored May 18, 2021
1 parent ade50f0 commit 65820e5
Show file tree
Hide file tree
Showing 4 changed files with 19 additions and 8 deletions.
4 changes: 4 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -390,6 +390,10 @@ Bug Fixes

* LUCENE-9958: Fixed performance regression for boolean queries that configure a
minimum number of matching clauses. (Adrien Grand, Matt Weber)

* LUCENE-9953: LongValueFacetCounts should count each document at most once when determining
the total count for a dimension. Prior to this fix, multi-value docs could contribute a > 1
count to the dimension count. (Greg Miller)

Other
---------------------
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,12 @@ public final class FacetResult {
public final String[] path;

/**
* Total value for this path (sum of all child counts, or sum of all child values), even those not
* included in the topN.
* Total number of documents containing a value for this path, even those not included in the
* topN. If a document contains multiple values for the same path, it will only be counted once in
* this value.
*/
// TODO: This may not hold true for SSDV faceting, where docs can be counted more than
// once. We should fix this. See LUCENE-9952
public final Number value;

/** How many child labels were encountered. */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,9 @@ private void count(String field, List<MatchingDocs> matchingDocs) throws IOExcep

for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
int limit = multiValues.docValueCount();
totCount += limit;
if (limit > 0) {
totCount++;
}
for (int i = 0; i < limit; i++) {
increment(multiValues.nextValue());
}
Expand Down Expand Up @@ -204,7 +206,9 @@ private void countAll(IndexReader reader, String field) throws IOException {

while (multiValues.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
int limit = multiValues.docValueCount();
totCount += limit;
if (limit > 0) {
totCount++;
}
for (int i = 0; i < limit; i++) {
increment(multiValues.nextValue());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -429,15 +429,15 @@ public void testRandomMultiValued() throws Exception {
int expectedChildCount = 0;
int expectedTotalCount = 0;
for (int i = 0; i < valueCount; i++) {
if (values[i] != null) {
if (values[i] != null && values[i].length > 0) {
expectedTotalCount++;
for (long value : values[i]) {
Integer curCount = expected.get(value);
if (curCount == null) {
curCount = 0;
expectedChildCount++;
}
expected.put(value, curCount + 1);
expectedTotalCount++;
}
}
}
Expand Down Expand Up @@ -520,9 +520,9 @@ public void testRandomMultiValued() throws Exception {
expectedChildCount = 0;
int totCount = 0;
for (int i = minId; i <= maxId; i++) {
if (values[i] != null) {
if (values[i] != null && values[i].length > 0) {
totCount++;
for (long value : values[i]) {
totCount++;
Integer curCount = expected.get(value);
if (curCount == null) {
expectedChildCount++;
Expand Down

0 comments on commit 65820e5

Please sign in to comment.