forked from qdrant/vector-db-benchmark
-
Notifications
You must be signed in to change notification settings - Fork 3
/
run.py
98 lines (88 loc) · 3.24 KB
/
run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import fnmatch
import traceback
from typing import List
import stopit
import typer
from benchmark.config_read import read_dataset_config, read_engine_configs
from benchmark.dataset import Dataset
from engine.base_client import IncompatibilityError
from engine.clients.client_factory import ClientFactory
app = typer.Typer()
@app.command()
def run(
engines: List[str] = typer.Option(["*"]),
datasets: List[str] = typer.Option(["*"]),
parallels: List[int] = typer.Option([]),
host: str = "localhost",
skip_upload: bool = False,
skip_search: bool = False,
skip_if_exists: bool = True,
exit_on_error: bool = True,
timeout: float = 86400.0,
upload_start_idx: int = 0,
upload_end_idx: int = -1,
):
"""
Example:
python3 run.py --engines *-m-16-* --engines qdrant-* --datasets glove-*
"""
all_engines = read_engine_configs()
all_datasets = read_dataset_config()
selected_engines = {
name: config
for name, config in all_engines.items()
if any(fnmatch.fnmatch(name, engine) for engine in engines)
}
selected_datasets = {
name: config
for name, config in all_datasets.items()
if any(fnmatch.fnmatch(name, dataset) for dataset in datasets)
}
for engine_name, engine_config in selected_engines.items():
for dataset_name, dataset_config in selected_datasets.items():
print(f"Running experiment: {engine_name} - {dataset_name}")
client = ClientFactory(host).build_client(engine_config)
dataset = Dataset(
dataset_config,
skip_upload,
skip_search,
upload_start_idx,
upload_end_idx,
)
dataset.download()
try:
with stopit.ThreadingTimeout(timeout) as tt:
client.run_experiment(
dataset,
skip_upload,
skip_search,
skip_if_exists,
parallels,
upload_start_idx,
upload_end_idx,
)
client.delete_client()
# If the timeout is reached, the server might be still in the
# middle of some background processing, like creating the index.
# Next experiment should not be launched. It's better to reset
# the server state manually.
if tt.state != stopit.ThreadingTimeout.EXECUTED:
print(
f"Timed out {engine_name} - {dataset_name}, "
f"exceeded {timeout} seconds"
)
exit(2)
except IncompatibilityError as e:
print(f"Skipping {engine_name} - {dataset_name}, incompatible params")
continue
except KeyboardInterrupt as e:
traceback.print_exc()
exit(1)
except Exception as e:
print(f"Experiment {engine_name} - {dataset_name} interrupted")
traceback.print_exc()
if exit_on_error:
raise e
continue
if __name__ == "__main__":
app()