From e3adb31a4dff898490783c0625aeea61ea2cf046 Mon Sep 17 00:00:00 2001
From: Eyob <eyobyirgu@gmail.com>
Date: Mon, 27 May 2024 15:35:56 +0300
Subject: [PATCH 1/2] chore: Update PromptReconstructor to include llm
 parameter in constructor and method calls

---
 src/apps/slackapp/slackapp/bolt_app.py        |  8 ++++++
 src/sherpa_ai/scrape/file_scraper.py          |  7 ++---
 src/sherpa_ai/scrape/prompt_reconstructor.py  |  9 ++++---
 src/sherpa_ai/utils.py                        | 27 +++++--------------
 ...uct_prompt_with_link_inside_succeeds.jsonl |  0
 .../scrape/test_prompt_reconstructor.py       |  7 ++---
 6 files changed, 28 insertions(+), 30 deletions(-)
 create mode 100644 src/tests/data/test_prompt_reconstructor_test_reconstruct_prompt_with_link_inside_succeeds.jsonl

diff --git a/src/apps/slackapp/slackapp/bolt_app.py b/src/apps/slackapp/slackapp/bolt_app.py
index f53d2a9c..c49fbcad 100644
--- a/src/apps/slackapp/slackapp/bolt_app.py
+++ b/src/apps/slackapp/slackapp/bolt_app.py
@@ -170,6 +170,7 @@ def file_event_handler(say, files, user_id, thread_ts, question):
         user_id=user_id,
         files=files,
         token=cfg.SLACK_OAUTH_TOKEN,
+        llm=llm,
     )
     file_prompt_data = file_prompt.reconstruct_prompt_with_file()
     if file_prompt_data["status"] == "success":
@@ -217,6 +218,12 @@ def event_test(client, say, event):
     # only will be executed if the user don't pass the daily limit
     # the daily limit is calculated based on the user's usage in a workspace
     # users with a daily limitation can be allowed to use in a different workspace
+    llm = SherpaChatOpenAI(
+        openai_api_key=cfg.OPENAI_API_KEY,
+        user_id=user_id,
+        team_id=team_id,
+        temperature=cfg.TEMPERATURE,
+    )
 
     if can_execute:
         if "files" in event:
@@ -227,6 +234,7 @@ def event_test(client, say, event):
                 thread_ts=thread_ts,
                 user_id=combined_id,
                 question=question,
+                llm=llm,
             )
             if file_event["status"] == "error":
                 return
diff --git a/src/sherpa_ai/scrape/file_scraper.py b/src/sherpa_ai/scrape/file_scraper.py
index 9832ee35..9cc2077b 100644
--- a/src/sherpa_ai/scrape/file_scraper.py
+++ b/src/sherpa_ai/scrape/file_scraper.py
@@ -15,7 +15,7 @@
 
 
 class QuestionWithFileHandler:
-    def __init__(self, question, files, token, user_id):
+    def __init__(self, question, files, token, user_id, team_id, llm):
         """
         Initializes the QuestionWithFileHandler instance.
 
@@ -32,6 +32,7 @@ def __init__(self, question, files, token, user_id):
         self.token = token
         self.files = files
         self.user_id = user_id
+        self.llm = llm
 
     def reconstruct_prompt_with_file(self):
         """
@@ -124,7 +125,7 @@ def prompt_reconstruct(self, file_info, data=str):
                 question=self.question,
                 title=file_info["title"],
                 text_data=data,
-                user_id=self.user_id,
+                llm=self.llm,
             )
 
             while count_string_tokens(chunk_summary, "gpt-3.5-turbo") > 3000:
@@ -134,7 +135,7 @@ def prompt_reconstruct(self, file_info, data=str):
                     question=self.question,
                     title=file_info["title"],
                     text_data=chunk_summary,
-                    user_id=self.user_id,
+                    llm=self.llm,
                 )
         result = question_with_file_reconstructor(
             file_format=file_info["filetype"],
diff --git a/src/sherpa_ai/scrape/prompt_reconstructor.py b/src/sherpa_ai/scrape/prompt_reconstructor.py
index 94f4cabc..cc7aa21d 100644
--- a/src/sherpa_ai/scrape/prompt_reconstructor.py
+++ b/src/sherpa_ai/scrape/prompt_reconstructor.py
@@ -15,7 +15,7 @@ class PromptReconstructor:
     and rewrites the question to incorporate a summary of the scraped URLs.
     """
 
-    def __init__(self, question, slack_message):
+    def __init__(self, question, slack_message, llm):
         """
         Initialize the PromptReconstructor with a question and a Slack message.
 
@@ -26,8 +26,9 @@ def __init__(self, question, slack_message):
 
         self.question = question
         self.slack_message = slack_message
+        self.llm = llm
 
-    def reconstruct_prompt(self, user_id=None):
+    def reconstruct_prompt(self):
         """
         Reconstruct the prompt based on the question and the last Slack message.
 
@@ -68,7 +69,7 @@ def reconstruct_prompt(self, user_id=None):
                         link=link,
                         question=question,
                         text_data=scraped_data["data"],
-                        user_id=user_id,
+                        llm=self.llm,
                     )
 
                     while (
@@ -79,7 +80,7 @@ def reconstruct_prompt(self, user_id=None):
                             link=link,
                             question=question,
                             text_data=chunk_summary,
-                            user_id=user_id,
+                            llm=self.llm,
                         )
 
                     final_summary.append({"data": chunk_summary, "link": link})
diff --git a/src/sherpa_ai/utils.py b/src/sherpa_ai/utils.py
index 57bdc26c..c58c1bd1 100644
--- a/src/sherpa_ai/utils.py
+++ b/src/sherpa_ai/utils.py
@@ -121,22 +121,12 @@ def count_string_tokens(string: str, model_name: str) -> int:
     return len(encoding.encode(string))
 
 
-def chunk_and_summarize(
-    text_data: str,
-    question: str,
-    link: str,
-    user_id: str = None,
-):
-    llm = SherpaOpenAI(
-        temperature=cfg.TEMPERATURE,
-        openai_api_key=cfg.OPENAI_API_KEY,
-        user_id=user_id,
-    )
+def chunk_and_summarize(text_data: str, question: str, link: str, llm):
 
     instruction = (
         "include any information that can be used to answer the "
-        "question '{question}' the given literal text is a data "
-        "from the link {link}. Do not directly answer the question itself"
+        f"question '{question}' the given literal text is a data "
+        f"from the link {link}. Do not directly answer the question itself"
     )
 
     text_splitter = TokenTextSplitter(chunk_size=3000, chunk_overlap=0)
@@ -161,19 +151,16 @@ def chunk_and_summarize_file(
     question: str,
     file_name: str,
     file_format: str,
+    llm,
     title: str = None,
-    user_id: str = None,
 ):
-    llm = SherpaOpenAI(
-        temperature=cfg.TEMPERATURE, openai_api_key=cfg.OPENAI_API_KEY, user_id=user_id
-    )
 
     title = f",title {title} " if title is not None else ""
 
     instruction = (
-        "include any information that can be used to answer the "
-        "question '{question}' the given literal text is a data "
-        "from the file named {file_name} {title} and file format {file_format} . Do not directly answer the question itself"
+        f"include any information that can be used to answer the "
+        f"question '{question}' the given literal text is a data "
+        f"from the file named {file_name} {title} and file format {file_format} . Do not directly answer the question itself"
     )
     text_splitter = TokenTextSplitter(chunk_size=3000, chunk_overlap=0)
     chunked_text = text_splitter.split_text(text_data)
diff --git a/src/tests/data/test_prompt_reconstructor_test_reconstruct_prompt_with_link_inside_succeeds.jsonl b/src/tests/data/test_prompt_reconstructor_test_reconstruct_prompt_with_link_inside_succeeds.jsonl
new file mode 100644
index 00000000..e69de29b
diff --git a/src/tests/unit_tests/scrape/test_prompt_reconstructor.py b/src/tests/unit_tests/scrape/test_prompt_reconstructor.py
index 7c91bba5..6cb0ba5e 100644
--- a/src/tests/unit_tests/scrape/test_prompt_reconstructor.py
+++ b/src/tests/unit_tests/scrape/test_prompt_reconstructor.py
@@ -3,7 +3,7 @@
 import pytest
 
 from sherpa_ai.scrape.prompt_reconstructor import PromptReconstructor
-
+from sherpa_ai.test_utils.llms import get_llm
 
 # Assuming that 'your_module' contains the 'PromptReconstructor' class
 
@@ -33,11 +33,12 @@ def test_reconstruct_prompt_with_link_inside_succeeds(
     mock_get_link_from_slack_client_conversation,
     mock_scrape_with_url,
     mock_chunk_and_summarize,
+    get_llm
 ):
     question = "Here's a <https://google.com>"
     slack_message = ""
-
-    reconstructor = PromptReconstructor(question, slack_message)
+    llm = get_llm(__file__, test_reconstruct_prompt_with_link_inside_succeeds.__name__)
+    reconstructor = PromptReconstructor(question, slack_message , llm)
     with patch(
         "sherpa_ai.scrape.prompt_reconstructor.chunk_and_summarize",
         return_value=mock_chunk_and_summarize,

From 7e2e09f4f5c27f03c3671c2bffcd40f685a9551d Mon Sep 17 00:00:00 2001
From: Eyob <eyobyirgu@gmail.com>
Date: Mon, 27 May 2024 15:44:34 +0300
Subject: [PATCH 2/2] format the code

---
 src/sherpa_ai/utils.py                                   | 2 --
 src/tests/unit_tests/scrape/test_prompt_reconstructor.py | 5 +++--
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/sherpa_ai/utils.py b/src/sherpa_ai/utils.py
index c58c1bd1..c0b6bc9d 100644
--- a/src/sherpa_ai/utils.py
+++ b/src/sherpa_ai/utils.py
@@ -122,7 +122,6 @@ def count_string_tokens(string: str, model_name: str) -> int:
 
 
 def chunk_and_summarize(text_data: str, question: str, link: str, llm):
-
     instruction = (
         "include any information that can be used to answer the "
         f"question '{question}' the given literal text is a data "
@@ -154,7 +153,6 @@ def chunk_and_summarize_file(
     llm,
     title: str = None,
 ):
-
     title = f",title {title} " if title is not None else ""
 
     instruction = (
diff --git a/src/tests/unit_tests/scrape/test_prompt_reconstructor.py b/src/tests/unit_tests/scrape/test_prompt_reconstructor.py
index 6cb0ba5e..3afda22f 100644
--- a/src/tests/unit_tests/scrape/test_prompt_reconstructor.py
+++ b/src/tests/unit_tests/scrape/test_prompt_reconstructor.py
@@ -5,6 +5,7 @@
 from sherpa_ai.scrape.prompt_reconstructor import PromptReconstructor
 from sherpa_ai.test_utils.llms import get_llm
 
+
 # Assuming that 'your_module' contains the 'PromptReconstructor' class
 
 
@@ -33,12 +34,12 @@ def test_reconstruct_prompt_with_link_inside_succeeds(
     mock_get_link_from_slack_client_conversation,
     mock_scrape_with_url,
     mock_chunk_and_summarize,
-    get_llm
+    get_llm,
 ):
     question = "Here's a <https://google.com>"
     slack_message = ""
     llm = get_llm(__file__, test_reconstruct_prompt_with_link_inside_succeeds.__name__)
-    reconstructor = PromptReconstructor(question, slack_message , llm)
+    reconstructor = PromptReconstructor(question, slack_message, llm)
     with patch(
         "sherpa_ai.scrape.prompt_reconstructor.chunk_and_summarize",
         return_value=mock_chunk_and_summarize,