diff --git a/backend/handler/metadata/base_hander.py b/backend/handler/metadata/base_hander.py index 3a6058f60..0de05cc81 100644 --- a/backend/handler/metadata/base_hander.py +++ b/backend/handler/metadata/base_hander.py @@ -11,6 +11,7 @@ SWITCH_TITLEDB_INDEX_KEY, update_switch_titledb_task, ) +from utils.iterators import batched def conditionally_set_cache( @@ -19,8 +20,11 @@ def conditionally_set_cache( fixtures_path = os.path.join(parent_dir, "fixtures") if not cache.exists(index_key): index_data = json.loads(open(os.path.join(fixtures_path, filename)).read()) - for key, value in index_data.items(): - cache.hset(index_key, key, json.dumps(value)) + with cache.pipeline() as pipe: + for data_batch in batched(index_data.items(), 2000): + data_map = {k: json.dumps(v) for k, v in dict(data_batch).items()} + pipe.hset(index_key, mapping=data_map) + pipe.execute() # These are loaded in cache in update_switch_titledb_task diff --git a/backend/tasks/update_switch_titledb.py b/backend/tasks/update_switch_titledb.py index 089418d7b..1ac55a20e 100644 --- a/backend/tasks/update_switch_titledb.py +++ b/backend/tasks/update_switch_titledb.py @@ -8,6 +8,7 @@ from handler.redis_handler import cache from logger.logger import log from tasks.tasks import RemoteFilePullTask +from utils.iterators import batched SWITCH_TITLEDB_INDEX_KEY: Final = "romm:switch_titledb" SWITCH_PRODUCT_ID_KEY: Final = "romm:switch_product_id" @@ -23,20 +24,24 @@ def __init__(self): url="https://raw.githubusercontent.com/blawar/titledb/master/US.en.json", ) - async def run(self, force: bool = False): + async def run(self, force: bool = False) -> None: content = await super().run(force) if content is None: return index_json = json.loads(content) - for key, value in index_json.items(): - if key and value: - cache.hset(SWITCH_TITLEDB_INDEX_KEY, key, json.dumps(value)) - - product_ids = {v["id"]: v for v in index_json.values()} - for key, value in product_ids.items(): - if key and value: - cache.hset(SWITCH_PRODUCT_ID_KEY, key, json.dumps(value)) + relevant_data = {k: v for k, v in index_json.items() if k and v} + + with cache.pipeline() as pipe: + for data_batch in batched(relevant_data.items(), 2000): + titledb_map = {k: json.dumps(v) for k, v in dict(data_batch).items()} + pipe.hset(SWITCH_TITLEDB_INDEX_KEY, mapping=titledb_map) + for data_batch in batched(relevant_data.items(), 2000): + product_map = { + v["id"]: json.dumps(v) for v in dict(data_batch).values() + } + pipe.hset(SWITCH_PRODUCT_ID_KEY, mapping=product_map) + pipe.execute() log.info("Scheduled switch titledb update completed!") diff --git a/backend/utils/iterators.py b/backend/utils/iterators.py new file mode 100644 index 000000000..cc95d36be --- /dev/null +++ b/backend/utils/iterators.py @@ -0,0 +1,17 @@ +import sys + +if sys.version_info >= (3, 12): + from itertools import batched # noqa: F401 +else: + from collections.abc import Iterator + from itertools import islice + from typing import Iterable, TypeVar + + T = TypeVar("T") + + def batched(iterable: Iterable[T], n: int) -> Iterator[tuple[T, ...]]: + if n < 1: + raise ValueError("n must be at least one") + iterator = iter(iterable) + while batch := tuple(islice(iterator, n)): + yield batch