From 507a987c0bf4f7e23172eec6ae7aafbc42683de8 Mon Sep 17 00:00:00 2001 From: masajiro Date: Thu, 4 May 2023 01:48:35 +0000 Subject: [PATCH 1/5] update parameters --- algos.yaml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/algos.yaml b/algos.yaml index 998f0ed81..968034063 100644 --- a/algos.yaml +++ b/algos.yaml @@ -442,6 +442,14 @@ float: args : [{'edge': 100, 'outdegree': 10, 'indegree': 120, 'epsilon': 0.02}] query-args : [[[0.60, -2], [0.90, -2], [1.00, -2], [1.02, -2], [1.03, -2], [1.04, -2], [1.05, -2], [1.07, -2], [1.10, -2], [1.20, -2]]] + e0.0-e60: + args : [{'edge': 60, 'outdegree': 10, 'indegree': 120, 'epsilon': 0.0}] + query-args : [[[0.60, -2], [0.90, -2], [1.00, -2], [1.02, -2], [1.03, -2], + [1.04, -2], [1.05, -2], [1.07, -2], [1.10, -2], [1.20, -2]]] + e0.0-e50: + args : [{'edge': 50, 'outdegree': 10, 'indegree': 120, 'epsilon': 0.0}] + query-args : [[[0.60, -2], [0.90, -2], [1.00, -2], [1.02, -2], [1.03, -2], + [1.04, -2], [1.05, -2], [1.07, -2], [1.10, -2], [1.20, -2]]] e0.1-with-refinement: args : [{'edge': 100, 'outdegree': 10, 'indegree': 120, 'tree': False, 'refine': True}] query-args : [[[0.995, 40], [0.998, 40], [1.00, 40], [1.005, 40], [1.010, 40], @@ -516,6 +524,16 @@ float: [ 5, 1.00], [ 10, 1.00], [ 20, 1.00], [1.2, 1.02], [1.5, 1.02], [2.0, 1.02], [3.0, 1.02], [ 2, 1.04], [ 3, 1.04], [ 5, 1.04], [ 8, 1.04]]] + s2000-e0.01: + args : [{'edge': 100, 'outdegree': 64, 'indegree': 120, 'max_edge': 96, 'sample': 2000, 'epsilon': 0.01}] + query-args : [[[0.0, 0.90], [0.0, 0.95], [0.0, 0.98], [0.0, 1.00], + [1.2, 0.90], [1.5, 0.90], [2.0, 0.90], [3.0, 0.90], + [1.2, 0.95], [1.5, 0.95], [2.0, 0.95], [3.0, 0.95], + [1.2, 0.98], [1.5, 0.98], [2.0, 0.98], [3.0, 0.98], + [1.2, 1.00], [1.5, 1.00], [2.0, 1.00], [3.0, 1.00], + [ 5, 1.00], [ 10, 1.00], [ 20, 1.00], + [1.2, 1.02], [1.5, 1.02], [2.0, 1.02], [3.0, 1.02], + [ 2, 1.04], [ 3, 1.04], [ 5, 1.04], [ 8, 1.04]]] mrpt: docker-tag: ann-benchmarks-mrpt module: ann_benchmarks.algorithms.mrpt From dcb5e436b270d3feb002a78485ed2d6392b7600b Mon Sep 17 00:00:00 2001 From: gyd-a <1666066431@qq.com> Date: Wed, 17 May 2023 00:20:35 +0800 Subject: [PATCH 2/5] add vearch to README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 1b382dbe3..32b1e8ee1 100644 --- a/README.md +++ b/README.md @@ -30,6 +30,7 @@ Evaluated * [PUFFINN](https://github.com/puffinn/puffinn) ![https://img.shields.io/github/stars/puffinn/puffinn?style=social](https://img.shields.io/github/stars/puffinn/puffinn?style=social) * [N2](https://github.com/kakao/n2) ![https://img.shields.io/github/stars/kakao/n2?style=social](https://img.shields.io/github/stars/kakao/n2?style=social) * [ScaNN](https://github.com/google-research/google-research/tree/master/scann) +* [Vearch](https://github.com/vearch/vearch) ![https://img.shields.io/github/stars/vearch/vearch?style=social](https://img.shields.io/github/stars/vearch/vearch?style=social) * [Elasticsearch](https://github.com/elastic/elasticsearch) ![https://img.shields.io/github/stars/elastic/elasticsearch?style=social](https://img.shields.io/github/stars/elastic/elasticsearch?style=social): HNSW * [Elastiknn](https://github.com/alexklibisz/elastiknn) ![https://img.shields.io/github/stars/alexklibisz/elastiknn?style=social](https://img.shields.io/github/stars/alexklibisz/elastiknn?style=social) * [OpenSearch KNN](https://github.com/opensearch-project/k-NN) ![https://img.shields.io/github/stars/opensearch-project/k-NN?style=social](https://img.shields.io/github/stars/opensearch-project/k-NN?style=social) From f43cd8bb3262112333b6f0e3cc46e8a440d4ccd8 Mon Sep 17 00:00:00 2001 From: zh Wang Date: Wed, 17 May 2023 19:40:36 +0800 Subject: [PATCH 3/5] Update glass --- README.md | 3 +-- algos.yaml | 6 +++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 32b1e8ee1..acf722a61 100644 --- a/README.md +++ b/README.md @@ -40,9 +40,8 @@ Evaluated * [vald](https://github.com/vdaas/vald) ![https://img.shields.io/github/stars/vdaas/vald?style=social](https://img.shields.io/github/stars/vdaas/vald?style=social) * [Qdrant](https://github.com/qdrant/qdrant) ![https://img.shields.io/github/stars/qdrant/qdrant?style=social](https://img.shields.io/github/stars/qdrant/qdrant?style=social) * [qsgngtlib](https://gitee.com/WPJIANG-gitee/hwtl_sdu-anns-qsgngtlib) -* [Milvus](https://github.com/milvus-io/milvus) ![https://img.shields.io/github/stars/milvus-io/milvus?style=social](https://img.shields.io/github/stars/milvus-io/milvus?style=social): [Knowhere](https://github.com/milvus-io/knowhere) +* [Milvus](https://github.com/milvus-io/milvus) ![https://img.shields.io/github/stars/milvus-io/milvus?style=social](https://img.shields.io/github/stars/milvus-io/milvus?style=social): [Knowhere](https://github.com/milvus-io/knowhere) [Glass](https://github.com/hhy3/pyglass) * [pgvector](https://github.com/pgvector/pgvector) ![https://img.shields.io/github/stars/pgvector/pgvector?style=social](https://img.shields.io/github/stars/pgvector/pgvector?style=social) -* [Glass](https://github.com/hhy3/pyglass) Data sets ========= diff --git a/algos.yaml b/algos.yaml index 968034063..7de437df2 100644 --- a/algos.yaml +++ b/algos.yaml @@ -244,15 +244,15 @@ float: HNSW: args: { "M": [4, 8, 12, 16, 24, 36, 48, 64, 96], "efConstruction": 500 } # index query-args: [ [ 10, 15, 20, 25, 30, 35, 40, 50, 60, 80, 120, 200, 400, 600, 800 ] ] # ef - glass: + Milvus(Glass): docker-tag: ann-benchmarks-glass module: ann_benchmarks.algorithms.glass constructor: Glass base-args: ["@metric", "@dimension"] run-groups: HNSW: - args: { "R": [8, 16, 32, 48, 64, 96], "level" : [1, 2, 3], "L": 200 } # index - query-args: [ [ 10, 20, 30, 40, 60, 80, 120, 200, 400, 600, 800 ] ] # ef + args: { "R": [8, 16, 32, 48, 64, 96, 128], "level" : [1, 2, 3], "L": 300 } # index + query-args: [ [ 10, 20, 30, 40, 60, 80, 120, 200, 400, 600, 800, 1000 ] ] # ef nearpy: disabled: true docker-tag: ann-benchmarks-nearpy From dbff9daec125991734efb3a060ca0680373df930 Mon Sep 17 00:00:00 2001 From: Emil Hessman Date: Mon, 1 May 2023 16:49:46 +0200 Subject: [PATCH 4/5] Implement __str__ for Elasticsearch --- ann_benchmarks/algorithms/elasticsearch.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/ann_benchmarks/algorithms/elasticsearch.py b/ann_benchmarks/algorithms/elasticsearch.py index e1893b85a..568a28aab 100644 --- a/ann_benchmarks/algorithms/elasticsearch.py +++ b/ann_benchmarks/algorithms/elasticsearch.py @@ -19,7 +19,7 @@ def __init__(self, metric: str, dimension: int, index_options: dict): self.num_candidates = 100 index_options_str = "-".join(sorted(f"{k}-{v}" for k, v in self.index_options.items())) - self.name = f"es-{metric}-{dimension}-{index_options_str}" + self.index_name = f"{metric}-{dimension}-{index_options_str}" self.similarity_metric = self._vector_similarity_metric(metric) self.client = Elasticsearch(["http://localhost:9200"]) @@ -72,11 +72,11 @@ def fit(self, X): }, }, } - self.client.indices.create(index=self.name, settings=settings, mappings=mappings) + self.client.indices.create(index=self.index_name, settings=settings, mappings=mappings) def gen(): for i, vec in enumerate(X): - yield {"_op_type": "index", "_index": self.name, "id": str(i), "vec": vec.tolist()} + yield {"_op_type": "index", "_index": self.index_name, "id": str(i), "vec": vec.tolist()} print("Indexing ...") (_, errors) = bulk(self.client, gen(), chunk_size=500, request_timeout=90) @@ -84,10 +84,10 @@ def gen(): raise RuntimeError("Failed to index documents") print("Force merge index ...") - self.client.indices.forcemerge(index=self.name, max_num_segments=1, request_timeout=900) + self.client.indices.forcemerge(index=self.index_name, max_num_segments=1, request_timeout=900) print("Refreshing index ...") - self.client.indices.refresh(index=self.name, request_timeout=900) + self.client.indices.refresh(index=self.index_name, request_timeout=900) def set_query_arguments(self, num_candidates): self.num_candidates = num_candidates @@ -105,7 +105,7 @@ def query(self, q, n): } } res = self.client.search( - index=self.name, + index=self.index_name, body=body, size=n, _source=False, @@ -121,3 +121,6 @@ def batch_query(self, X, n): def get_batch_results(self): return self.batch_res + + def __str__(self): + return f"Elasticsearch(index_options: {self.index_options}, num_canditates: {self.num_candidates})" From c45bf6b2c917eeb169426088fd6b82840f272e97 Mon Sep 17 00:00:00 2001 From: Emil Hessman Date: Mon, 1 May 2023 17:17:18 +0200 Subject: [PATCH 5/5] Implement __str__ for OpenSearch --- ann_benchmarks/algorithms/opensearchknn.py | 25 +++++++++++++--------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/ann_benchmarks/algorithms/opensearchknn.py b/ann_benchmarks/algorithms/opensearchknn.py index 5c3a307fe..51ce142b1 100644 --- a/ann_benchmarks/algorithms/opensearchknn.py +++ b/ann_benchmarks/algorithms/opensearchknn.py @@ -14,8 +14,9 @@ def __init__(self, metric, dimension, method_param): self.dimension = dimension self.method_param = method_param self.param_string = "-".join(k + "-" + str(v) for k, v in self.method_param.items()).lower() - self.name = f"os-{self.param_string}" + self.index_name = f"os-{self.param_string}" self.client = OpenSearch(["http://localhost:9200"]) + self.ef_search = None self._wait_for_health_status() def _wait_for_health_status(self, wait_seconds=30, status="yellow"): @@ -53,29 +54,30 @@ def fit(self, X): } } - self.client.indices.create(self.name, body=body) - self.client.indices.put_mapping(mapping, self.name) + self.client.indices.create(self.index_name, body=body) + self.client.indices.put_mapping(mapping, self.index_name) - print("Uploading data to the Index:", self.name) + print("Uploading data to the Index:", self.index_name) def gen(): for i, vec in enumerate(tqdm(X)): - yield {"_op_type": "index", "_index": self.name, "vec": vec.tolist(), "id": str(i + 1)} + yield {"_op_type": "index", "_index": self.index_name, "vec": vec.tolist(), "id": str(i + 1)} (_, errors) = bulk(self.client, gen(), chunk_size=500, max_retries=2, request_timeout=10) assert len(errors) == 0, errors print("Force Merge...") - self.client.indices.forcemerge(self.name, max_num_segments=1, request_timeout=1000) + self.client.indices.forcemerge(self.index_name, max_num_segments=1, request_timeout=1000) print("Refreshing the Index...") - self.client.indices.refresh(self.name, request_timeout=1000) + self.client.indices.refresh(self.index_name, request_timeout=1000) print("Running Warmup API...") - res = urlopen(Request("http://localhost:9200/_plugins/_knn/warmup/" + self.name + "?pretty")) + res = urlopen(Request("http://localhost:9200/_plugins/_knn/warmup/" + self.index_name + "?pretty")) print(res.read().decode("utf-8")) def set_query_arguments(self, ef): + self.ef_search = ef body = {"settings": {"index": {"knn.algo_param.ef_search": ef}}} self.client.indices.put_settings(body=body) @@ -83,7 +85,7 @@ def query(self, q, n): body = {"query": {"knn": {"vec": {"vector": q.tolist(), "k": n}}}} res = self.client.search( - index=self.name, + index=self.index_name, body=body, size=n, _source=False, @@ -102,4 +104,7 @@ def get_batch_results(self): return self.batch_res def freeIndex(self): - self.client.indices.delete(index=self.name) + self.client.indices.delete(index=self.index_name) + + def __str__(self): + return f"OpenSearch(index_options: {self.method_param}, ef_search: {self.ef_search})"