From 26e0737e4037074114eadc7717057b938ae93ef0 Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Thu, 31 Oct 2024 10:22:30 -0400 Subject: [PATCH] Account for 0 graph size when initializing HNSW graph (#13964) When initializing a joint graph from one of the segments' graphs, we always assume that a segment's graph is present. But later we want to explore an option where some segments will not have graphs (#13447). This change allows to account for missing graphs. --- .../util/hnsw/ConcurrentHnswMerger.java | 35 ++++++++----------- .../util/hnsw/IncrementalHnswGraphMerger.java | 4 +++ 2 files changed, 19 insertions(+), 20 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/util/hnsw/ConcurrentHnswMerger.java b/lucene/core/src/java/org/apache/lucene/util/hnsw/ConcurrentHnswMerger.java index c4e7d159b489..b4688d097302 100644 --- a/lucene/core/src/java/org/apache/lucene/util/hnsw/ConcurrentHnswMerger.java +++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/ConcurrentHnswMerger.java @@ -48,28 +48,23 @@ public ConcurrentHnswMerger( @Override protected HnswBuilder createBuilder(KnnVectorValues mergedVectorValues, int maxOrd) throws IOException { + OnHeapHnswGraph graph; + BitSet initializedNodes = null; + if (initReader == null) { - return new HnswConcurrentMergeBuilder( - taskExecutor, - numWorker, - scorerSupplier, - M, - beamWidth, - new OnHeapHnswGraph(M, maxOrd), - null); + graph = new OnHeapHnswGraph(M, maxOrd); + } else { + HnswGraph initializerGraph = ((HnswGraphProvider) initReader).getGraph(fieldInfo.name); + if (initializerGraph.size() == 0) { + graph = new OnHeapHnswGraph(M, maxOrd); + } else { + initializedNodes = new FixedBitSet(maxOrd); + int[] oldToNewOrdinalMap = getNewOrdMapping(mergedVectorValues, initializedNodes); + graph = + InitializedHnswGraphBuilder.initGraph(M, initializerGraph, oldToNewOrdinalMap, maxOrd); + } } - - HnswGraph initializerGraph = ((HnswGraphProvider) initReader).getGraph(fieldInfo.name); - BitSet initializedNodes = new FixedBitSet(maxOrd); - int[] oldToNewOrdinalMap = getNewOrdMapping(mergedVectorValues, initializedNodes); - return new HnswConcurrentMergeBuilder( - taskExecutor, - numWorker, - scorerSupplier, - M, - beamWidth, - InitializedHnswGraphBuilder.initGraph(M, initializerGraph, oldToNewOrdinalMap, maxOrd), - initializedNodes); + taskExecutor, numWorker, scorerSupplier, M, beamWidth, graph, initializedNodes); } } diff --git a/lucene/core/src/java/org/apache/lucene/util/hnsw/IncrementalHnswGraphMerger.java b/lucene/core/src/java/org/apache/lucene/util/hnsw/IncrementalHnswGraphMerger.java index d64961a02ee4..c480d53360cb 100644 --- a/lucene/core/src/java/org/apache/lucene/util/hnsw/IncrementalHnswGraphMerger.java +++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/IncrementalHnswGraphMerger.java @@ -121,6 +121,10 @@ protected HnswBuilder createBuilder(KnnVectorValues mergedVectorValues, int maxO } HnswGraph initializerGraph = ((HnswGraphProvider) initReader).getGraph(fieldInfo.name); + if (initializerGraph.size() == 0) { + return HnswGraphBuilder.create( + scorerSupplier, M, beamWidth, HnswGraphBuilder.randSeed, maxOrd); + } BitSet initializedNodes = new FixedBitSet(maxOrd); int[] oldToNewOrdinalMap = getNewOrdMapping(mergedVectorValues, initializedNodes);