From cba7130414b4621a4387865e13b570dacf2a9456 Mon Sep 17 00:00:00 2001
From: YSK <kaleyogeshs@gmail.com>
Date: Mon, 22 Apr 2024 16:00:20 -0400
Subject: [PATCH 1/3] Added ProgramList and fixed the studies that are filtered
 by program name

---
 src/dug/core/async_search.py | 35 ++++++++++++++++++++++++++++++++++-
 src/dug/server.py            | 12 +++++++++++-
 2 files changed, 45 insertions(+), 2 deletions(-)

diff --git a/src/dug/core/async_search.py b/src/dug/core/async_search.py
index 92689865..7675b521 100644
--- a/src/dug/core/async_search.py
+++ b/src/dug/core/async_search.py
@@ -491,7 +491,7 @@ async def search_program(self, program_name=None, offset=0, size=None):
                 "match": {"data_type": program_name}
             })
 
-        print("query_body", query_body)
+        #print("query_body", query_body)
 
         # Prepare the query body for execution
         body = query_body
@@ -514,7 +514,40 @@ async def search_program(self, program_name=None, offset=0, size=None):
 
         #print(search_results)
         return search_results
+    
+    async def search_program_list(self):
+
+        query_body = {
+            "size": 0,  # We don't need the documents themselves, so set the size to 0
+            "aggs": {
+                "unique_program_names": {
+                    "terms": {
+                        "field": "data_type.keyword"
+                    },
+                    "aggs": {
+                        "No_of_studies": {
+                            "cardinality": {
+                                "field": "collection_id.keyword"
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        # Execute the search query
+        search_results = await self.es.search(
+            index="variables_index",
+            body=query_body
+        )
+
+        # The unique data_types and their counts of unique collection_ids will be in the 'aggregations' field of the response
+        unique_data_types = search_results['aggregations']['unique_program_names']['buckets']
+
+        # Testing the output so print the unique data_types and their counts of unique collection_ids
+        #for bucket in unique_data_types:
+        #    print(f"data_type: {bucket['key']}, count of unique collection_ids: {bucket['No_of_studies']['value']}")
 
+        return unique_data_types
     def _get_var_query(self, concept, fuzziness, prefix_length, query):
         """Returns ES query for variable search"""
         es_query = {
diff --git a/src/dug/server.py b/src/dug/server.py
index ef03890e..15716cb7 100644
--- a/src/dug/server.py
+++ b/src/dug/server.py
@@ -147,6 +147,16 @@ async def search_program( program_name: Optional[str] = None):
         "status": "success"
     }
 
-
+@APP.post('/program_list')
+async def get_program_list():
+    """
+    Search for studies by unique_id (ID or name) and/or study_name.
+    """
+    result = await search.search_program_list()
+    return {
+  
+        "result": result,
+        "status": "success"
+    }
 if __name__ == '__main__':
     uvicorn.run(APP)

From 42545675c1a66e04418c2d345c9c2c3011174afc Mon Sep 17 00:00:00 2001
From: YSK <kaleyogeshs@gmail.com>
Date: Fri, 26 Apr 2024 13:38:03 -0400
Subject: [PATCH 2/3] fixed study list

---
 src/dug/core/async_search.py | 44 ++++++++++++++++++++++++++----------
 src/dug/server.py            |  2 +-
 2 files changed, 33 insertions(+), 13 deletions(-)

diff --git a/src/dug/core/async_search.py b/src/dug/core/async_search.py
index 7675b521..1414c788 100644
--- a/src/dug/core/async_search.py
+++ b/src/dug/core/async_search.py
@@ -467,6 +467,7 @@ async def search_program(self, program_name=None, offset=0, size=None):
         Search for studies by unique_id (ID or name) and/or study_name.
         """
     
+ # Initialize the query_body with the outer structure
         query_body = {
             "query": {
                 "bool": {
@@ -476,45 +477,64 @@ async def search_program(self, program_name=None, offset=0, size=None):
             "aggs": {
                 "unique_collection_ids": {
                     "terms": {
-                        "field": "collection_id.keyword"
+                        "field": "collection_id.keyword",
+                        "size":1000
+                    },
+                    "aggs": {
+                        "collection_details": {
+                            "top_hits": {
+                                "_source": ["collection_id", "collection_name", "collection_action"],
+                                "size": 1
+                            }
+                        }
                     }
                 }
             }
         }
 
-        # specify the fields to be returned
-        query_body["_source"] = ["collection_id", "collection_name", "collection_action"]
-
-        # search for program_name based on uses input
+        # Add conditions based on user input
         if program_name:
+            # Lowercase the program_name before adding it to the query
+            program_name = program_name.lower()
             query_body["query"]["bool"]["must"].append({
                 "match": {"data_type": program_name}
             })
 
-        #print("query_body", query_body)
+        print("query_body", query_body)
 
         # Prepare the query body for execution
         body = query_body
-        #print(body)
+        print(body)
+
+        # Execute the search query
 
         # Execute the search query
         search_results = await self.es.search(
             index="variables_index",
             body=body,
-            filter_path=['hits.hits._id', 'hits.hits._type', 'hits.hits._source', 'aggregations.unique_collection_ids.buckets'],
             from_=offset,
             size=size
         )
 
-        # The unique collection_ids will be in the 'aggregations' field of the response
+        # The unique collection_ids and their details will be in the 'aggregations' field of the response
         unique_collection_ids = search_results['aggregations']['unique_collection_ids']['buckets']
 
-        #print("Unique collection_ids:", unique_collection_ids)
+        # Prepare a list to hold the collection details
+        collection_details_list = []
 
+        for bucket in unique_collection_ids:
+            collection_details = bucket['collection_details']['hits']['hits'][0]['_source']
+            # Append the details to the list in the desired format
+            collection_details_list.append(collection_details)
 
-        #print(search_results)
-        return search_results
+        # Print the list of collection details in JSON format
+        import json
+        #print(json.dumps(collection_details_list, indent=4))
+
+        return collection_details_list
     
+
+
     async def search_program_list(self):
 
         query_body = {
diff --git a/src/dug/server.py b/src/dug/server.py
index 15716cb7..1e5acec5 100644
--- a/src/dug/server.py
+++ b/src/dug/server.py
@@ -147,7 +147,7 @@ async def search_program( program_name: Optional[str] = None):
         "status": "success"
     }
 
-@APP.post('/program_list')
+@APP.get('/program_list')
 async def get_program_list():
     """
     Search for studies by unique_id (ID or name) and/or study_name.

From c0d60986f8b2e6fe310a68744d652eb1d8c631d6 Mon Sep 17 00:00:00 2001
From: YSK <kaleyogeshs@gmail.com>
Date: Fri, 26 Apr 2024 16:26:31 -0400
Subject: [PATCH 3/3] cleaned the code

---
 src/dug/core/async_search.py | 17 +++--------------
 1 file changed, 3 insertions(+), 14 deletions(-)

diff --git a/src/dug/core/async_search.py b/src/dug/core/async_search.py
index 1414c788..35ee14c9 100644
--- a/src/dug/core/async_search.py
+++ b/src/dug/core/async_search.py
@@ -466,7 +466,6 @@ async def search_program(self, program_name=None, offset=0, size=None):
         """
         Search for studies by unique_id (ID or name) and/or study_name.
         """
-    
  # Initialize the query_body with the outer structure
         query_body = {
             "query": {
@@ -504,10 +503,7 @@ async def search_program(self, program_name=None, offset=0, size=None):
 
         # Prepare the query body for execution
         body = query_body
-        print(body)
-
-        # Execute the search query
-
+    
         # Execute the search query
         search_results = await self.es.search(
             index="variables_index",
@@ -527,10 +523,6 @@ async def search_program(self, program_name=None, offset=0, size=None):
             # Append the details to the list in the desired format
             collection_details_list.append(collection_details)
 
-        # Print the list of collection details in JSON format
-        import json
-        #print(json.dumps(collection_details_list, indent=4))
-
         return collection_details_list
     
 
@@ -559,15 +551,12 @@ async def search_program_list(self):
             index="variables_index",
             body=query_body
         )
-
         # The unique data_types and their counts of unique collection_ids will be in the 'aggregations' field of the response
         unique_data_types = search_results['aggregations']['unique_program_names']['buckets']
 
-        # Testing the output so print the unique data_types and their counts of unique collection_ids
-        #for bucket in unique_data_types:
-        #    print(f"data_type: {bucket['key']}, count of unique collection_ids: {bucket['No_of_studies']['value']}")
-
         return unique_data_types
+
+
     def _get_var_query(self, concept, fuzziness, prefix_length, query):
         """Returns ES query for variable search"""
         es_query = {