Removed map of subquery to subquery index in favor of storing index a…

…s part of disi wrapper to improve hybrid query latencies by 20% Signed-off-by: Martin Gaievski <[email protected]>
opensearch-project · Apr 26, 2024 · 2c63f8a · 2c63f8a
1 parent b277b07
commit 2c63f8a
Show file tree

Hide file tree

Showing 4 changed files with 57 additions and 56 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -25,6 +25,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 - Allowing execution of hybrid query on index alias with filters ([#670](https://github.com/opensearch-project/neural-search/pull/670))
 - Allowing query by raw tokens in neural_sparse query ([#693](https://github.com/opensearch-project/neural-search/pull/693))
 - Removed stream.findFirst implementation to use more native iteration implement to improve hybrid query latencies by 35% ([#706](https://github.com/opensearch-project/neural-search/pull/706))
+- Removed map of subquery to subquery index in favor of storing index as part of disi wrapper to improve hybrid query latencies by 20% ([#711](https://github.com/opensearch-project/neural-search/pull/711))
 ### Bug Fixes
 - Add support for request_cache flag in hybrid query ([#663](https://github.com/opensearch-project/neural-search/pull/663))
 ### Infrastructure

diff --git a/src/main/java/org/opensearch/neuralsearch/query/HybridQueryScorer.java b/src/main/java/org/opensearch/neuralsearch/query/HybridQueryScorer.java
@@ -8,25 +8,21 @@
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
-import java.util.HashMap;
 import java.util.List;
-import java.util.Locale;
-import java.util.Map;
 import java.util.Objects;
 
-import com.google.common.primitives.Ints;
 import org.apache.lucene.search.DisiPriorityQueue;
 import org.apache.lucene.search.DisiWrapper;
 import org.apache.lucene.search.DisjunctionDISIApproximation;
 import org.apache.lucene.search.DocIdSetIterator;
-import org.apache.lucene.search.Query;
 import org.apache.lucene.search.ScoreMode;
 import org.apache.lucene.search.Scorer;
 import org.apache.lucene.search.TwoPhaseIterator;
 import org.apache.lucene.search.Weight;
 
 import lombok.Getter;
 import org.apache.lucene.util.PriorityQueue;
+import org.opensearch.neuralsearch.search.HybridDisiWrapper;
 
 /**
  * Class abstracts functionality of Scorer for hybrid query. When iterating over documents in increasing
@@ -43,8 +39,6 @@ public final class HybridQueryScorer extends Scorer {
 
     private final float[] subScores;
 
-    private final Map<Query, int[]> queryToIndex;
-
     private final DocIdSetIterator approximation;
     private final HybridScoreBlockBoundaryPropagator disjunctionBlockPropagator;
     private final TwoPhase twoPhase;
@@ -57,7 +51,6 @@ public HybridQueryScorer(final Weight weight, final List<Scorer> subScorers) thr
         super(weight);
         this.subScorers = Collections.unmodifiableList(subScorers);
         subScores = new float[subScorers.size()];
-        this.queryToIndex = mapQueryToIndex();
         this.subScorersPQ = initializeSubScorersPQ();
         boolean needsScores = scoreMode != ScoreMode.COMPLETE_NO_SCORES;
 
@@ -194,70 +187,30 @@ public int docID() {
      */
     public float[] hybridScores() throws IOException {
         float[] scores = new float[subScores.length];
-        DisiWrapper topList = subScorersPQ.topList();
-        for (DisiWrapper disiWrapper = topList; disiWrapper != null; disiWrapper = disiWrapper.next) {
+        HybridDisiWrapper topList = (HybridDisiWrapper) subScorersPQ.topList();
+        for (HybridDisiWrapper disiWrapper = topList; disiWrapper != null; disiWrapper = (HybridDisiWrapper) disiWrapper.next) {
             // check if this doc has match in the subQuery. If not, add score as 0.0 and continue
             Scorer scorer = disiWrapper.scorer;
             if (scorer.docID() == DocIdSetIterator.NO_MORE_DOCS) {
                 continue;
             }
-            Query query = scorer.getWeight().getQuery();
-            int[] indexes = queryToIndex.get(query);
-            // we need to find the index of first sub-query that hasn't been set yet. Such score will have initial value of "0.0"
-            int index = -1;
-            for (int idx : indexes) {
-                if (Float.compare(scores[idx], 0.0f) == 0) {
-                    index = idx;
-                    break;
-                }
-            }
-            if (index == -1) {
-                throw new IllegalStateException(
-                    String.format(
-                        Locale.ROOT,
-                        "cannot set score for one of hybrid search subquery [%s] and document [%d]",
-                        query.toString(),
-                        scorer.docID()
-                    )
-                );
-            }
-            scores[index] = scorer.score();
+            scores[disiWrapper.getSubQueryIndex()] = scorer.score();
         }
         return scores;
     }
 
-    private Map<Query, int[]> mapQueryToIndex() {
-        // we need list as number of identical queries is unknown
-        Map<Query, List<Integer>> queryToListOfIndexes = new HashMap<>();
-        int idx = 0;
-        for (Scorer scorer : subScorers) {
-            if (scorer == null) {
-                idx++;
-                continue;
-            }
-            Query query = scorer.getWeight().getQuery();
-            queryToListOfIndexes.putIfAbsent(query, new ArrayList<>());
-            queryToListOfIndexes.get(query).add(idx);
-            idx++;
-        }
-        // convert to the int array for better performance
-        Map<Query, int[]> queryToIndex = new HashMap<>();
-        queryToListOfIndexes.forEach((key, value) -> queryToIndex.put(key, Ints.toArray(value)));
-        return queryToIndex;
-    }
-
     private DisiPriorityQueue initializeSubScorersPQ() {
-        Objects.requireNonNull(queryToIndex, "should not be null");
         Objects.requireNonNull(subScorers, "should not be null");
         // we need to count this way in order to include all identical sub-queries
-        int numOfSubQueries = queryToIndex.values().stream().map(array -> array.length).reduce(0, Integer::sum);
+        int numOfSubQueries = subScorers.size();
         DisiPriorityQueue subScorersPQ = new DisiPriorityQueue(numOfSubQueries);
-        for (Scorer scorer : subScorers) {
+        for (int idx = 0; idx < subScorers.size(); idx++) {
+            Scorer scorer = subScorers.get(idx);
             if (scorer == null) {
                 continue;
             }
-            final DisiWrapper w = new DisiWrapper(scorer);
-            subScorersPQ.add(w);
+            final HybridDisiWrapper disiWrapper = new HybridDisiWrapper(scorer, idx);
+            subScorersPQ.add(disiWrapper);
         }
         return subScorersPQ;
     }

diff --git a/src/main/java/org/opensearch/neuralsearch/search/HybridDisiWrapper.java b/src/main/java/org/opensearch/neuralsearch/search/HybridDisiWrapper.java
@@ -0,0 +1,23 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+package org.opensearch.neuralsearch.search;
+
+import lombok.Getter;
+import org.apache.lucene.search.DisiWrapper;
+import org.apache.lucene.search.Scorer;
+
+/**
+ * Wrapper for DisiWrapper, saves state of sub-queries for performance reasons
+ */
+@Getter
+public class HybridDisiWrapper extends DisiWrapper {
+    // index of disi wrapper sub-query object when its part of the hybrid query
+    int subQueryIndex = -1;
+
+    public HybridDisiWrapper(Scorer scorer, int subQueryIndex) {
+        super(scorer);
+        this.subQueryIndex = subQueryIndex;
+    }
+}
diff --git a/src/test/java/org/opensearch/neuralsearch/search/HybridDisiWrapperTests.java b/src/test/java/org/opensearch/neuralsearch/search/HybridDisiWrapperTests.java
@@ -0,0 +1,24 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+package org.opensearch.neuralsearch.search;
+
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.Scorer;
+import org.opensearch.neuralsearch.query.OpenSearchQueryTestCase;
+
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+public class HybridDisiWrapperTests extends OpenSearchQueryTestCase {
+
+    public void testSubQueryIndex_whenCreateNewInstanceAndSetIndex_thenSuccessful() {
+        Scorer scorer = mock(Scorer.class);
+        DocIdSetIterator docIdSetIterator = mock(DocIdSetIterator.class);
+        when(scorer.iterator()).thenReturn(docIdSetIterator);
+        int subQueryIndex = 2;
+        HybridDisiWrapper hybridDisiWrapper = new HybridDisiWrapper(scorer, subQueryIndex);
+        assertEquals(2, hybridDisiWrapper.getSubQueryIndex());
+    }
+}