Skip to content
This repository has been archived by the owner on Aug 2, 2022. It is now read-only.

Add stats to track knn request counts #89

Merged
20 changes: 20 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,21 @@ The number of cache misses that have occurred on the node. A cache miss occurs w
#### graph_memory_usage
The current weight of the cache (the total size in native memory of all of the graphs) in Kilobytes.

#### graph_index_requests
The number of requests to add the knn_vector field of a document into a graph.

#### graph_index_errors
The number of requests to add the knn_vector field of a document into a graph that have produced an error.

#### graph_query_requests
The number of graph queries that have been made.

#### graph_query_errors
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about we add metrics for counting KNNQueries?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makes sense. Will add

The number of graph queries that have produced an error.

#### knn_query_requests
The number of KNN query requests received.

#### cache_capacity_reached
Whether the cache capacity for this node has been reached. This capacity can be controlled as part of the *knn.memory.circuit_breaker.limit.*

Expand Down Expand Up @@ -270,6 +285,11 @@ GET /_opendistro/_knn/stats?pretty
"eviction_count" : 0,
"miss_count" : 1,
"graph_memory_usage" : 1,
"graph_index_requests" : 7,
"graph_index_errors" : 1,
"knn_query_requests" : 4,
"graph_query_requests" : 30,
"graph_query_errors" : 15,
"cache_capacity_reached" : false,
"load_exception_count" : 0,
"hit_count" : 0,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

package com.amazon.opendistroforelasticsearch.knn.index;

import com.amazon.opendistroforelasticsearch.knn.plugin.stats.KNNCounter;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.lucene.search.Query;
Expand Down Expand Up @@ -94,9 +95,13 @@ private static float[] ObjectsToFloats(List<Object> objs) {
*/
public KNNQueryBuilder(StreamInput in) throws IOException {
super(in);
fieldName = in.readString();
vector = in.readFloatArray();
k = in.readInt();
try {
fieldName = in.readString();
vector = in.readFloatArray();
k = in.readInt();
} catch (IOException ex) {
throw new RuntimeException("[KNN] Unable to create KNNQueryBuilder: " + ex);
}
}

public static KNNQueryBuilder fromXContent(XContentParser parser) throws IOException {
Expand All @@ -107,6 +112,7 @@ public static KNNQueryBuilder fromXContent(XContentParser parser) throws IOExcep
String queryName = null;
String currentFieldName = null;
XContentParser.Token token;
KNNCounter.KNN_QUERY_REQUESTS.increment();
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
if (token == XContentParser.Token.FIELD_NAME) {
currentFieldName = parser.currentName();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
package com.amazon.opendistroforelasticsearch.knn.index.codec.KNN80Codec;

import com.amazon.opendistroforelasticsearch.knn.index.codec.KNNCodecUtil;
import com.amazon.opendistroforelasticsearch.knn.plugin.stats.KNNCounter;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.lucene.codecs.CodecUtil;
Expand Down Expand Up @@ -69,12 +70,14 @@ public void addBinaryField(FieldInfo field, DocValuesProducer valuesProducer) th
}

public void addKNNBinaryField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
KNNCounter.GRAPH_INDEX_REQUESTS.increment();
if (field.attributes().containsKey(KNNVectorFieldMapper.KNN_FIELD)) {

/**
* We always write with latest NMS library version
*/
if (!isNmsLibLatest()) {
KNNCounter.GRAPH_INDEX_ERRORS.increment();
throw new IllegalStateException("Nms library version mismatch. Correct version: "
+ NmsLibVersion.LATEST.indexLibraryVersion());
}
Expand Down Expand Up @@ -118,6 +121,9 @@ public Void run() {
IndexOutput os = state.directory.createOutput(hnswFileName, state.context)) {
os.copyBytes(is, is.length());
CodecUtil.writeFooter(os);
} catch (Exception ex) {
KNNCounter.GRAPH_INDEX_ERRORS.increment();
throw new RuntimeException("[KNN] Adding footer to serialized graph failed: " + ex);
} finally {
IOUtils.deleteFilesIgnoringExceptions(state.directory, hsnwTempFileName);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

import com.amazon.opendistroforelasticsearch.knn.index.KNNQueryResult;
import com.amazon.opendistroforelasticsearch.knn.index.util.NmsLibVersion;
import com.amazon.opendistroforelasticsearch.knn.plugin.stats.KNNCounter;

import java.io.File;
import java.io.IOException;
Expand Down Expand Up @@ -70,7 +71,7 @@ public long getIndexSize() {
public KNNQueryResult[] queryIndex(final float[] query, final int k) throws IOException {
Lock readLock = readWriteLock.readLock();
readLock.lock();

KNNCounter.GRAPH_QUERY_REQUESTS.increment();
try {
if (this.isClosed) {
throw new IOException("Index is already closed");
Expand All @@ -84,6 +85,9 @@ public KNNQueryResult[] run() {
}
);

} catch (Exception ex) {
KNNCounter.GRAPH_QUERY_ERRORS.increment();
throw new RuntimeException("Unable to query the index: " + ex);
Comment on lines +88 to +90
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just a thought, why not we just rely on load_exception_count metric from cache stats. This seem to track number of exceptions while loading graph which will be invoked during queries?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using load_exception_count would only count exceptions for loading the graph into memory, not the actual query of the graph. Adding the metric here allows us to check if the library query of the graph fails. In your opinion, should this metric track the number of query errors where a query is a call to the ES search API for knn, or for a query where a query is a call to the library function?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

makes sense. It should track the number of query errors.

} finally {
readLock.unlock();
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
/*
* Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
* A copy of the License is located at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* or in the "license" file accompanying this file. This file is distributed
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/

package com.amazon.opendistroforelasticsearch.knn.plugin.stats;

import java.util.concurrent.atomic.AtomicLong;

/**
* Contains a map of counters to keep track of different values
*/
public enum KNNCounter {
GRAPH_QUERY_ERRORS("graph_query_errors"),
GRAPH_QUERY_REQUESTS("graph_query_requests"),
GRAPH_INDEX_ERRORS("graph_index_errors"),
GRAPH_INDEX_REQUESTS("graph_index_requests"),
KNN_QUERY_REQUESTS("knn_query_requests");

private String name;
private AtomicLong count;

/**
* Constructor
*
* @param name name of the counter
*/
KNNCounter(String name) {
this.name = name;
this.count = new AtomicLong(0);
}

/**
* Get name of counter
*
* @return name
*/
public String getName() {
return name;
}

/**
* Get the value of count
*
* @return count
*/
public Long getCount() {
return count.get();
}

/**
* Increment the value of a counter
*/
public void increment() {
count.getAndIncrement();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import com.amazon.opendistroforelasticsearch.knn.index.KNNIndexCache;
import com.amazon.opendistroforelasticsearch.knn.plugin.stats.suppliers.KNNCacheSupplier;
import com.amazon.opendistroforelasticsearch.knn.plugin.stats.suppliers.KNNCircuitBreakerSupplier;
import com.amazon.opendistroforelasticsearch.knn.plugin.stats.suppliers.KNNCounterSupplier;
import com.amazon.opendistroforelasticsearch.knn.plugin.stats.suppliers.KNNInnerCacheStatsSupplier;
import com.google.common.cache.CacheStats;
import com.google.common.collect.ImmutableMap;
Expand All @@ -42,6 +43,16 @@ public class KNNStatsConfig {
new KNNCacheSupplier<>(KNNIndexCache::getWeightInKilobytes)))
.put(StatNames.CACHE_CAPACITY_REACHED.getName(), new KNNStat<>(false,
new KNNCacheSupplier<>(KNNIndexCache::isCacheCapacityReached)))
.put(StatNames.GRAPH_QUERY_ERRORS.getName(), new KNNStat<>(false,
new KNNCounterSupplier(KNNCounter.GRAPH_QUERY_ERRORS)))
.put(StatNames.GRAPH_QUERY_REQUESTS.getName(), new KNNStat<>(false,
new KNNCounterSupplier(KNNCounter.GRAPH_QUERY_REQUESTS)))
.put(StatNames.GRAPH_INDEX_ERRORS.getName(), new KNNStat<>(false,
new KNNCounterSupplier(KNNCounter.GRAPH_INDEX_ERRORS)))
.put(StatNames.GRAPH_INDEX_REQUESTS.getName(), new KNNStat<>(false,
new KNNCounterSupplier(KNNCounter.GRAPH_INDEX_REQUESTS)))
.put(StatNames.CIRCUIT_BREAKER_TRIGGERED.getName(), new KNNStat<>(true,
new KNNCircuitBreakerSupplier())).build();
new KNNCircuitBreakerSupplier()))
.put(StatNames.KNN_QUERY_REQUESTS.getName(), new KNNStat<>(false,
new KNNCounterSupplier(KNNCounter.KNN_QUERY_REQUESTS))).build();
}
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,12 @@ public enum StatNames {
EVICTION_COUNT("eviction_count"),
GRAPH_MEMORY_USAGE("graph_memory_usage"),
CACHE_CAPACITY_REACHED("cache_capacity_reached"),
CIRCUIT_BREAKER_TRIGGERED("circuit_breaker_triggered");
CIRCUIT_BREAKER_TRIGGERED("circuit_breaker_triggered"),
GRAPH_QUERY_ERRORS(KNNCounter.GRAPH_QUERY_ERRORS.getName()),
GRAPH_QUERY_REQUESTS(KNNCounter.GRAPH_QUERY_REQUESTS.getName()),
GRAPH_INDEX_ERRORS(KNNCounter.GRAPH_INDEX_ERRORS.getName()),
GRAPH_INDEX_REQUESTS(KNNCounter.GRAPH_INDEX_REQUESTS.getName()),
KNN_QUERY_REQUESTS(KNNCounter.KNN_QUERY_REQUESTS.getName());

private String name;

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/*
* Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
* A copy of the License is located at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* or in the "license" file accompanying this file. This file is distributed
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/

package com.amazon.opendistroforelasticsearch.knn.plugin.stats.suppliers;

import com.amazon.opendistroforelasticsearch.knn.plugin.stats.KNNCounter;

import java.util.function.Supplier;

/**
* Supplier for stats that need to keep count
*/
public class KNNCounterSupplier implements Supplier<Long> {
private KNNCounter knnCounter;

/**
* Constructor
*
* @param knnCounter KNN Plugin Counter
*/
public KNNCounterSupplier(KNNCounter knnCounter) {
this.knnCounter = knnCounter;
}

@Override
public Long get() {
return knnCounter.getCount();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
/*
* Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
* A copy of the License is located at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* or in the "license" file accompanying this file. This file is distributed
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/

package com.amazon.opendistroforelasticsearch.knn.plugin.stats;

import org.elasticsearch.test.ESTestCase;

public class KNNCounterTests extends ESTestCase {
public void testGetName() {
assertEquals(StatNames.GRAPH_QUERY_ERRORS.getName(), KNNCounter.GRAPH_QUERY_ERRORS.getName());
}

public void testCount() {
assertEquals((Long) 0L, KNNCounter.GRAPH_QUERY_ERRORS.getCount());

for (long i = 0; i < 100; i++) {
KNNCounter.GRAPH_QUERY_ERRORS.increment();
assertEquals((Long) (i+1), KNNCounter.GRAPH_QUERY_ERRORS.getCount());
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
/*
* Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
* A copy of the License is located at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* or in the "license" file accompanying this file. This file is distributed
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/

package com.amazon.opendistroforelasticsearch.knn.plugin.stats.suppliers;

import com.amazon.opendistroforelasticsearch.knn.plugin.stats.KNNCounter;
import org.elasticsearch.test.ESTestCase;

public class KNNCounterSupplierTests extends ESTestCase {
public void testNormal() {
KNNCounterSupplier knnCounterSupplier = new KNNCounterSupplier(KNNCounter.GRAPH_QUERY_REQUESTS);
assertEquals((Long) 0L, knnCounterSupplier.get());
KNNCounter.GRAPH_QUERY_REQUESTS.increment();
assertEquals((Long) 1L, knnCounterSupplier.get());
}
}