From 70c1fc0d3d826e06dff19d1d3a6534d7496655eb Mon Sep 17 00:00:00 2001 From: babebe Date: Tue, 7 Jan 2025 09:37:04 -0500 Subject: [PATCH] [Issue 3166] cleanup to delete all indexes using prefix (#3372) ## Summary Fixes #{[3166](https://github.com/HHS/simpler-grants-gov/issues/3166)} ### Time to review: __5 mins__ ## Changes proposed > cleanup func to delete all old indexes --- api/src/adapters/search/opensearch_client.py | 26 ++++++++++------ .../backend/load_opportunities_to_index.py | 6 +++- .../adapters/search/test_opensearch_client.py | 31 +++++++++++++++++-- .../test_load_opportunities_to_index.py | 3 +- 4 files changed, 52 insertions(+), 14 deletions(-) diff --git a/api/src/adapters/search/opensearch_client.py b/api/src/adapters/search/opensearch_client.py index e3d186a9f..f8970f5fc 100644 --- a/api/src/adapters/search/opensearch_client.py +++ b/api/src/adapters/search/opensearch_client.py @@ -44,7 +44,7 @@ def create_index( *, shard_count: int = 1, replica_count: int = 1, - analysis: dict | None = None + analysis: dict | None = None, ) -> None: """ Create an empty search index @@ -78,7 +78,7 @@ def bulk_upsert( records: Iterable[dict[str, Any]], primary_key_field: str, *, - refresh: bool = True + refresh: bool = True, ) -> None: """ Bulk upsert records to an index @@ -148,9 +148,20 @@ def alias_exists(self, alias_name: str) -> bool: existing_index_mapping = self._client.cat.aliases(alias_name, format="json") return len(existing_index_mapping) > 0 - def swap_alias_index( - self, index_name: str, alias_name: str, *, delete_prior_indexes: bool = False - ) -> None: + def cleanup_old_indices(self, index_prefix: str, indexes_to_keep: list[str]) -> None: + """ + Cleanup old indexes now that they aren't connected to the alias + """ + resp = self._client.cat.indices(f"{index_prefix}-*", format="json", h=["index"]) + + old_indexes = [ + index["index"] for index in resp if index["index"] not in indexes_to_keep + ] # omit the newly created one + + for index in old_indexes: + self.delete_index(index) + + def swap_alias_index(self, index_name: str, alias_name: str) -> None: """ For a given index, set it to the given alias. If any existing index(es) are attached to the alias, remove them from the alias. @@ -174,11 +185,6 @@ def swap_alias_index( self._client.indices.update_aliases({"actions": actions}) - # Cleanup old indexes now that they aren't connected to the alias - if delete_prior_indexes: - for index in existing_indexes: - self.delete_index(index) - def search_raw(self, index_name: str, search_query: dict) -> dict: # Simple wrapper around search if you don't want the request or response # object handled in any special way. diff --git a/api/src/search/backend/load_opportunities_to_index.py b/api/src/search/backend/load_opportunities_to_index.py index 4b5720c99..9e0140f5b 100644 --- a/api/src/search/backend/load_opportunities_to_index.py +++ b/api/src/search/backend/load_opportunities_to_index.py @@ -187,9 +187,13 @@ def full_refresh(self) -> None: # handle aliasing of endpoints self.search_client.swap_alias_index( - self.index_name, self.config.alias_name, delete_prior_indexes=True + self.index_name, + self.config.alias_name, ) + # cleanup old indexes + self.search_client.cleanup_old_indices(self.config.index_prefix, [self.index_name]) + def fetch_opportunities(self) -> Iterator[Sequence[Opportunity]]: """ Fetch the opportunities in batches. The iterator returned diff --git a/api/tests/src/adapters/search/test_opensearch_client.py b/api/tests/src/adapters/search/test_opensearch_client.py index ef201eeb4..1e86c585e 100644 --- a/api/tests/src/adapters/search/test_opensearch_client.py +++ b/api/tests/src/adapters/search/test_opensearch_client.py @@ -110,14 +110,15 @@ def test_swap_alias_index(search_client, generic_index): search_client.bulk_upsert(tmp_index, tmp_index_records, primary_key_field="id") # Set the alias - search_client.swap_alias_index(tmp_index, alias_name, delete_prior_indexes=True) + search_client.swap_alias_index(tmp_index, alias_name) # Can search by this alias and get records from the tmp index resp = search_client.search(alias_name, {}, include_scores=False) assert resp.records == tmp_index_records # Swap the index to the generic one + delete the tmp one - search_client.swap_alias_index(generic_index, alias_name, delete_prior_indexes=True) + search_client.swap_alias_index(generic_index, alias_name) + search_client.cleanup_old_indices("test-tmp-index", [generic_index]) resp = search_client.search(alias_name, {}, include_scores=False) assert resp.records == records @@ -213,3 +214,29 @@ def test_get_connection_parameters(): "connection_class": opensearchpy.RequestsHttpConnection, "pool_maxsize": 10, } + + +def test_cleanup_old_indices(search_client): + index_name_1 = f"test-index-{uuid.uuid4().int}" # old index + index_name_2 = f"test-index-{uuid.uuid4().int}" # old index + index_name_3 = f"partial-refresh-index-{uuid.uuid4().int}" # old index + index_name_4 = f"test-index-{uuid.uuid4().int}" # new index + + search_client.create_index(index_name_1) + search_client.create_index(index_name_2) + search_client.create_index(index_name_3) + search_client.create_index(index_name_4) + + # check all indexes were created + assert search_client.index_exists(index_name_1) is True + assert search_client.index_exists(index_name_2) is True + assert search_client.index_exists(index_name_3) is True + assert search_client.index_exists(index_name_4) is True + + # expect old index with same prefix to be deleted and others to remain + search_client.cleanup_old_indices("test-index", [index_name_4]) + + assert search_client.index_exists(index_name_1) is False + assert search_client.index_exists(index_name_2) is False + assert search_client.index_exists(index_name_3) is True + assert search_client.index_exists(index_name_4) is True diff --git a/api/tests/src/search/backend/test_load_opportunities_to_index.py b/api/tests/src/search/backend/test_load_opportunities_to_index.py index a61acc3e4..349638665 100644 --- a/api/tests/src/search/backend/test_load_opportunities_to_index.py +++ b/api/tests/src/search/backend/test_load_opportunities_to_index.py @@ -142,7 +142,8 @@ def test_load_opportunities_to_index( ) search_client.create_index(index_name) search_client.swap_alias_index( - index_name, load_opportunities_to_index.config.alias_name, delete_prior_indexes=True + index_name, + load_opportunities_to_index.config.alias_name, ) # Load a bunch of records into the DB