From cba7130414b4621a4387865e13b570dacf2a9456 Mon Sep 17 00:00:00 2001 From: YSK Date: Mon, 22 Apr 2024 16:00:20 -0400 Subject: [PATCH 1/3] Added ProgramList and fixed the studies that are filtered by program name --- src/dug/core/async_search.py | 35 ++++++++++++++++++++++++++++++++++- src/dug/server.py | 12 +++++++++++- 2 files changed, 45 insertions(+), 2 deletions(-) diff --git a/src/dug/core/async_search.py b/src/dug/core/async_search.py index 92689865..7675b521 100644 --- a/src/dug/core/async_search.py +++ b/src/dug/core/async_search.py @@ -491,7 +491,7 @@ async def search_program(self, program_name=None, offset=0, size=None): "match": {"data_type": program_name} }) - print("query_body", query_body) + #print("query_body", query_body) # Prepare the query body for execution body = query_body @@ -514,7 +514,40 @@ async def search_program(self, program_name=None, offset=0, size=None): #print(search_results) return search_results + + async def search_program_list(self): + + query_body = { + "size": 0, # We don't need the documents themselves, so set the size to 0 + "aggs": { + "unique_program_names": { + "terms": { + "field": "data_type.keyword" + }, + "aggs": { + "No_of_studies": { + "cardinality": { + "field": "collection_id.keyword" + } + } + } + } + } + } + # Execute the search query + search_results = await self.es.search( + index="variables_index", + body=query_body + ) + + # The unique data_types and their counts of unique collection_ids will be in the 'aggregations' field of the response + unique_data_types = search_results['aggregations']['unique_program_names']['buckets'] + + # Testing the output so print the unique data_types and their counts of unique collection_ids + #for bucket in unique_data_types: + # print(f"data_type: {bucket['key']}, count of unique collection_ids: {bucket['No_of_studies']['value']}") + return unique_data_types def _get_var_query(self, concept, fuzziness, prefix_length, query): """Returns ES query for variable search""" es_query = { diff --git a/src/dug/server.py b/src/dug/server.py index ef03890e..15716cb7 100644 --- a/src/dug/server.py +++ b/src/dug/server.py @@ -147,6 +147,16 @@ async def search_program( program_name: Optional[str] = None): "status": "success" } - +@APP.post('/program_list') +async def get_program_list(): + """ + Search for studies by unique_id (ID or name) and/or study_name. + """ + result = await search.search_program_list() + return { + + "result": result, + "status": "success" + } if __name__ == '__main__': uvicorn.run(APP) From 42545675c1a66e04418c2d345c9c2c3011174afc Mon Sep 17 00:00:00 2001 From: YSK Date: Fri, 26 Apr 2024 13:38:03 -0400 Subject: [PATCH 2/3] fixed study list --- src/dug/core/async_search.py | 44 ++++++++++++++++++++++++++---------- src/dug/server.py | 2 +- 2 files changed, 33 insertions(+), 13 deletions(-) diff --git a/src/dug/core/async_search.py b/src/dug/core/async_search.py index 7675b521..1414c788 100644 --- a/src/dug/core/async_search.py +++ b/src/dug/core/async_search.py @@ -467,6 +467,7 @@ async def search_program(self, program_name=None, offset=0, size=None): Search for studies by unique_id (ID or name) and/or study_name. """ + # Initialize the query_body with the outer structure query_body = { "query": { "bool": { @@ -476,45 +477,64 @@ async def search_program(self, program_name=None, offset=0, size=None): "aggs": { "unique_collection_ids": { "terms": { - "field": "collection_id.keyword" + "field": "collection_id.keyword", + "size":1000 + }, + "aggs": { + "collection_details": { + "top_hits": { + "_source": ["collection_id", "collection_name", "collection_action"], + "size": 1 + } + } } } } } - # specify the fields to be returned - query_body["_source"] = ["collection_id", "collection_name", "collection_action"] - - # search for program_name based on uses input + # Add conditions based on user input if program_name: + # Lowercase the program_name before adding it to the query + program_name = program_name.lower() query_body["query"]["bool"]["must"].append({ "match": {"data_type": program_name} }) - #print("query_body", query_body) + print("query_body", query_body) # Prepare the query body for execution body = query_body - #print(body) + print(body) + + # Execute the search query # Execute the search query search_results = await self.es.search( index="variables_index", body=body, - filter_path=['hits.hits._id', 'hits.hits._type', 'hits.hits._source', 'aggregations.unique_collection_ids.buckets'], from_=offset, size=size ) - # The unique collection_ids will be in the 'aggregations' field of the response + # The unique collection_ids and their details will be in the 'aggregations' field of the response unique_collection_ids = search_results['aggregations']['unique_collection_ids']['buckets'] - #print("Unique collection_ids:", unique_collection_ids) + # Prepare a list to hold the collection details + collection_details_list = [] + for bucket in unique_collection_ids: + collection_details = bucket['collection_details']['hits']['hits'][0]['_source'] + # Append the details to the list in the desired format + collection_details_list.append(collection_details) - #print(search_results) - return search_results + # Print the list of collection details in JSON format + import json + #print(json.dumps(collection_details_list, indent=4)) + + return collection_details_list + + async def search_program_list(self): query_body = { diff --git a/src/dug/server.py b/src/dug/server.py index 15716cb7..1e5acec5 100644 --- a/src/dug/server.py +++ b/src/dug/server.py @@ -147,7 +147,7 @@ async def search_program( program_name: Optional[str] = None): "status": "success" } -@APP.post('/program_list') +@APP.get('/program_list') async def get_program_list(): """ Search for studies by unique_id (ID or name) and/or study_name. From c0d60986f8b2e6fe310a68744d652eb1d8c631d6 Mon Sep 17 00:00:00 2001 From: YSK Date: Fri, 26 Apr 2024 16:26:31 -0400 Subject: [PATCH 3/3] cleaned the code --- src/dug/core/async_search.py | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/src/dug/core/async_search.py b/src/dug/core/async_search.py index 1414c788..35ee14c9 100644 --- a/src/dug/core/async_search.py +++ b/src/dug/core/async_search.py @@ -466,7 +466,6 @@ async def search_program(self, program_name=None, offset=0, size=None): """ Search for studies by unique_id (ID or name) and/or study_name. """ - # Initialize the query_body with the outer structure query_body = { "query": { @@ -504,10 +503,7 @@ async def search_program(self, program_name=None, offset=0, size=None): # Prepare the query body for execution body = query_body - print(body) - - # Execute the search query - + # Execute the search query search_results = await self.es.search( index="variables_index", @@ -527,10 +523,6 @@ async def search_program(self, program_name=None, offset=0, size=None): # Append the details to the list in the desired format collection_details_list.append(collection_details) - # Print the list of collection details in JSON format - import json - #print(json.dumps(collection_details_list, indent=4)) - return collection_details_list @@ -559,15 +551,12 @@ async def search_program_list(self): index="variables_index", body=query_body ) - # The unique data_types and their counts of unique collection_ids will be in the 'aggregations' field of the response unique_data_types = search_results['aggregations']['unique_program_names']['buckets'] - # Testing the output so print the unique data_types and their counts of unique collection_ids - #for bucket in unique_data_types: - # print(f"data_type: {bucket['key']}, count of unique collection_ids: {bucket['No_of_studies']['value']}") - return unique_data_types + + def _get_var_query(self, concept, fuzziness, prefix_length, query): """Returns ES query for variable search""" es_query = {