Skip to content

Commit

Permalink
Improve the wikipedia track. (elastic#458)
Browse files Browse the repository at this point in the history
  • Loading branch information
afoucret authored and inqueue committed Dec 6, 2023
1 parent 94e0e57 commit 54de171
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 34 deletions.
10 changes: 6 additions & 4 deletions wikipedia/challenges/default.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
{
"name": "index-documents",
"operation": "index-documents",
"warmup-time-period": {{ bulk_warmup | default(40) | int }},
"warmup-time-period": {{bulk_warmup | default(40) | int}},
"clients": {{bulk_indexing_clients | default(5)}}
},
{
Expand All @@ -29,7 +29,8 @@
"name": "query-string-search",
"operation": "query-string-search",
"clients": {{search_clients | default(20)}},
"warmup-iterations": 100
"time-period": {{search_time_period | default(300) | int}},
"warmup-time-period": {{search_warmup | default(10) | int}}
},
{
"name": "clear-cache",
Expand All @@ -42,8 +43,9 @@
{
"name": "default-search-application-search",
"operation": "default-search-application-search",
"clients": {{search_clients | default(20)}},
"warmup-iterations": 100
"clients": {{application_search_clients | default(20)}},
"time-period": {{search_time_period | default(300) | int}},
"warmup-time-period": {{search_warmup | default(10) | int}}
}
]
}
8 changes: 3 additions & 5 deletions wikipedia/operations/default.json
Original file line number Diff line number Diff line change
Expand Up @@ -40,14 +40,12 @@
{
"name": "default-search-application-search",
"operation-type": "raw-request",
"param-source": "search-application-search-param-source",
"iterations": {{search_iterations | default(100000)}}
"param-source": "search-application-search-param-source"
},
{
"name": "query-string-search",
"operation-type": "search",
"param-source": "query-string-search",
"size" : {{search_size | default(20)}},
"search-fields" : "{{search_fields | default("*")}}",
"iterations": {{search_iterations | default(100000)}}
}
"search-fields" : "{{search_fields | default("*")}}"
}
61 changes: 36 additions & 25 deletions wikipedia/track.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,17 @@
QUERY_CLEAN_REXEXP = regexp = re.compile("[^0-9a-zA-Z]+")


def query_iterator(k: int, random_seed: int = None) -> Iterator[str]:
def query_samples(k: int, random_seed: int = None) -> list[str]:
with open(QUERIES_FILENAME) as queries_file:
csv_reader = csv.reader(queries_file)
next(csv_reader)
queries_with_probabilities = list(tuple(line) for line in csv_reader)

queries = [query for query, _ in queries_with_probabilities]
queries = [QUERY_CLEAN_REXEXP.sub(" ", query).lower() for query, _ in queries_with_probabilities]
probabilities = [float(probability) for _, probability in queries_with_probabilities]
random.seed(random_seed)
for query in random.choices(queries, weights=probabilities, k=k):
# remove special chars from the query + lowercase
yield QUERY_CLEAN_REXEXP.sub(" ", query).lower()

return random.choices(queries, weights=probabilities, k=k)


class SearchApplicationParams:
Expand Down Expand Up @@ -55,15 +54,17 @@ def params(self):
class QueryIteratorParamSource(ParamSource):
def __init__(self, track, params, **kwargs):
super().__init__(track, params, **kwargs)
self._batch_size = self._params.get("batch_size", 100000)
self._random_seed = self._params.get("seed", None)
self._sample_queries = query_samples(self._batch_size, self._random_seed)
self._queries_iterator = None

def size(self):
return self._params.get("iterations", 10000)
return None

def partition(self, partition_index, total_partitions):
if self._queries_iterator is None:
partition_size = math.ceil(self.size() / total_partitions)
self._queries_iterator = query_iterator(partition_size, random_seed=self._params.get("seed", None))
self._queries_iterator = iter(self._sample_queries)
return self


Expand All @@ -73,16 +74,20 @@ def __init__(self, track, params, **kwargs):
self.search_application_params = SearchApplicationParams(track, params)

def params(self):
query = next(self._queries_iterator)
return {
"method": "POST",
"path": f"{SEARCH_APPLICATION_ROOT_ENDPOINT}/{self.search_application_params.name}/_search",
"body": {
"params": {
"query_string": query,
try:
query = next(self._queries_iterator)
return {
"method": "POST",
"path": f"{SEARCH_APPLICATION_ROOT_ENDPOINT}/{self.search_application_params.name}/_search",
"body": {
"params": {
"query_string": query,
},
},
},
}
}
except StopIteration:
self._queries_iterator = iter(self._sample_queries)
return self.params()


class QueryParamSource(QueryIteratorParamSource):
Expand All @@ -92,14 +97,20 @@ def __init__(self, track, params, **kwargs):
self._cache = params.get("cache", True)

def params(self):
result = {
"body": {"query": {"query_string": {"query": next(self._queries_iterator), "default_field": self._params["search-fields"]}}},
"size": self._params["size"],
"index": self._index_name,
"cache": self._cache,
}

return result
try:
result = {
"body": {
"query": {"query_string": {"query": next(self._queries_iterator), "default_field": self._params["search-fields"]}}
},
"size": self._params["size"],
"index": self._index_name,
"cache": self._cache,
}

return result
except StopIteration:
self._queries_iterator = iter(self._sample_queries)
return self.params()


def register(registry):
Expand Down

0 comments on commit 54de171

Please sign in to comment.