From efa6d2b464f4790e5d11d70847fb1ea7f83bb8c7 Mon Sep 17 00:00:00 2001 From: sengineer0 Date: Mon, 29 Jan 2024 11:01:14 +0700 Subject: [PATCH 1/2] Update indexes_by_name api: allow to specify env_name to restrict only fetching indexes from that env --- .gitignore | 1 + biothings/hub/dataindex/indexer.py | 19 +++++++++++++------ 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/.gitignore b/.gitignore index 7e8915bc1..11914c966 100644 --- a/.gitignore +++ b/.gitignore @@ -41,3 +41,4 @@ config.py .notes/ .env .notes +ssh_host_key* diff --git a/biothings/hub/dataindex/indexer.py b/biothings/hub/dataindex/indexer.py index e2bf42324..a7375f05d 100644 --- a/biothings/hub/dataindex/indexer.py +++ b/biothings/hub/dataindex/indexer.py @@ -318,7 +318,9 @@ def __init__(self, build_doc, indexer_env, index_name): self.pinfo = ProcessInfo(self, indexer_env.get("concurrency", 10)) def setup_log(self): - log_folder = os.path.join(btconfig.LOG_FOLDER, "build", self.build_name or "", "index") if btconfig.LOG_FOLDER else None + log_folder = ( + os.path.join(btconfig.LOG_FOLDER, "build", self.build_name or "", "index") if btconfig.LOG_FOLDER else None + ) log_name = f"index_{self.es_index_name}" self.logger, self.logfile = get_logger(log_name, log_folder=log_folder, force=True) @@ -804,8 +806,9 @@ async def _enhance(conf): return self._config - def get_indexes_by_name(self, index_name=None, limit=10): + def get_indexes_by_name(self, index_name=None, env_name=None, limit=10): """Accept an index_name and return a list of indexes get from all elasticsearch environments + or from specific elasticsearch environment. If index_name is blank, it will be return all indexes. limit can be used to specify how many indexes should be return. @@ -829,9 +832,13 @@ def get_indexes_by_name(self, index_name=None, limit=10): index_name = "*" limit = int(limit) - async def fetch(index_name, limit=None): + async def fetch(index_name, env_name=None, limit=None): indexes = [] - for env_name, env in self.register.items(): + for _env_name, env in self.register.items(): + # If env_name is set, only fetch indexes for the specific es server + if env_name and env_name != _env_name: + continue + async with AsyncElasticsearch(**env["args"]) as client: try: indices = await client.indices.get(index_name) @@ -846,7 +853,7 @@ async def fetch(index_name, limit=None): "count": mapping_meta["stats"]["total"], "creation_date": index_data["settings"]["index"]["creation_date"], "environment": { - "name": env_name, + "name": _env_name, "host": env["args"]["hosts"], }, } @@ -858,7 +865,7 @@ async def fetch(index_name, limit=None): indexes = indexes[:limit] return indexes - job = asyncio.ensure_future(fetch(index_name, limit=limit)) + job = asyncio.ensure_future(fetch(index_name, env_name=env_name, limit=limit)) job.add_done_callback(self.logger.debug) return job From b345241ed35c08e5a49bcfcdbf21c752bfbe7ed0 Mon Sep 17 00:00:00 2001 From: sengineer0 Date: Mon, 29 Jan 2024 11:59:00 +0700 Subject: [PATCH 2/2] Add doc_type to indexes_by_name api --- biothings/hub/dataindex/indexer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/biothings/hub/dataindex/indexer.py b/biothings/hub/dataindex/indexer.py index a7375f05d..5ad296a5f 100644 --- a/biothings/hub/dataindex/indexer.py +++ b/biothings/hub/dataindex/indexer.py @@ -849,6 +849,7 @@ async def fetch(index_name, env_name=None, limit=None): indexes.append( { "index_name": index_name, + "doc_type": mapping_meta["biothing_type"], "build_version": mapping_meta["build_version"], "count": mapping_meta["stats"]["total"], "creation_date": index_data["settings"]["index"]["creation_date"],