From e3adb31a4dff898490783c0625aeea61ea2cf046 Mon Sep 17 00:00:00 2001 From: Eyob Date: Mon, 27 May 2024 15:35:56 +0300 Subject: [PATCH 1/2] chore: Update PromptReconstructor to include llm parameter in constructor and method calls --- src/apps/slackapp/slackapp/bolt_app.py | 8 ++++++ src/sherpa_ai/scrape/file_scraper.py | 7 ++--- src/sherpa_ai/scrape/prompt_reconstructor.py | 9 ++++--- src/sherpa_ai/utils.py | 27 +++++-------------- ...uct_prompt_with_link_inside_succeeds.jsonl | 0 .../scrape/test_prompt_reconstructor.py | 7 ++--- 6 files changed, 28 insertions(+), 30 deletions(-) create mode 100644 src/tests/data/test_prompt_reconstructor_test_reconstruct_prompt_with_link_inside_succeeds.jsonl diff --git a/src/apps/slackapp/slackapp/bolt_app.py b/src/apps/slackapp/slackapp/bolt_app.py index f53d2a9c..c49fbcad 100644 --- a/src/apps/slackapp/slackapp/bolt_app.py +++ b/src/apps/slackapp/slackapp/bolt_app.py @@ -170,6 +170,7 @@ def file_event_handler(say, files, user_id, thread_ts, question): user_id=user_id, files=files, token=cfg.SLACK_OAUTH_TOKEN, + llm=llm, ) file_prompt_data = file_prompt.reconstruct_prompt_with_file() if file_prompt_data["status"] == "success": @@ -217,6 +218,12 @@ def event_test(client, say, event): # only will be executed if the user don't pass the daily limit # the daily limit is calculated based on the user's usage in a workspace # users with a daily limitation can be allowed to use in a different workspace + llm = SherpaChatOpenAI( + openai_api_key=cfg.OPENAI_API_KEY, + user_id=user_id, + team_id=team_id, + temperature=cfg.TEMPERATURE, + ) if can_execute: if "files" in event: @@ -227,6 +234,7 @@ def event_test(client, say, event): thread_ts=thread_ts, user_id=combined_id, question=question, + llm=llm, ) if file_event["status"] == "error": return diff --git a/src/sherpa_ai/scrape/file_scraper.py b/src/sherpa_ai/scrape/file_scraper.py index 9832ee35..9cc2077b 100644 --- a/src/sherpa_ai/scrape/file_scraper.py +++ b/src/sherpa_ai/scrape/file_scraper.py @@ -15,7 +15,7 @@ class QuestionWithFileHandler: - def __init__(self, question, files, token, user_id): + def __init__(self, question, files, token, user_id, team_id, llm): """ Initializes the QuestionWithFileHandler instance. @@ -32,6 +32,7 @@ def __init__(self, question, files, token, user_id): self.token = token self.files = files self.user_id = user_id + self.llm = llm def reconstruct_prompt_with_file(self): """ @@ -124,7 +125,7 @@ def prompt_reconstruct(self, file_info, data=str): question=self.question, title=file_info["title"], text_data=data, - user_id=self.user_id, + llm=self.llm, ) while count_string_tokens(chunk_summary, "gpt-3.5-turbo") > 3000: @@ -134,7 +135,7 @@ def prompt_reconstruct(self, file_info, data=str): question=self.question, title=file_info["title"], text_data=chunk_summary, - user_id=self.user_id, + llm=self.llm, ) result = question_with_file_reconstructor( file_format=file_info["filetype"], diff --git a/src/sherpa_ai/scrape/prompt_reconstructor.py b/src/sherpa_ai/scrape/prompt_reconstructor.py index 94f4cabc..cc7aa21d 100644 --- a/src/sherpa_ai/scrape/prompt_reconstructor.py +++ b/src/sherpa_ai/scrape/prompt_reconstructor.py @@ -15,7 +15,7 @@ class PromptReconstructor: and rewrites the question to incorporate a summary of the scraped URLs. """ - def __init__(self, question, slack_message): + def __init__(self, question, slack_message, llm): """ Initialize the PromptReconstructor with a question and a Slack message. @@ -26,8 +26,9 @@ def __init__(self, question, slack_message): self.question = question self.slack_message = slack_message + self.llm = llm - def reconstruct_prompt(self, user_id=None): + def reconstruct_prompt(self): """ Reconstruct the prompt based on the question and the last Slack message. @@ -68,7 +69,7 @@ def reconstruct_prompt(self, user_id=None): link=link, question=question, text_data=scraped_data["data"], - user_id=user_id, + llm=self.llm, ) while ( @@ -79,7 +80,7 @@ def reconstruct_prompt(self, user_id=None): link=link, question=question, text_data=chunk_summary, - user_id=user_id, + llm=self.llm, ) final_summary.append({"data": chunk_summary, "link": link}) diff --git a/src/sherpa_ai/utils.py b/src/sherpa_ai/utils.py index 57bdc26c..c58c1bd1 100644 --- a/src/sherpa_ai/utils.py +++ b/src/sherpa_ai/utils.py @@ -121,22 +121,12 @@ def count_string_tokens(string: str, model_name: str) -> int: return len(encoding.encode(string)) -def chunk_and_summarize( - text_data: str, - question: str, - link: str, - user_id: str = None, -): - llm = SherpaOpenAI( - temperature=cfg.TEMPERATURE, - openai_api_key=cfg.OPENAI_API_KEY, - user_id=user_id, - ) +def chunk_and_summarize(text_data: str, question: str, link: str, llm): instruction = ( "include any information that can be used to answer the " - "question '{question}' the given literal text is a data " - "from the link {link}. Do not directly answer the question itself" + f"question '{question}' the given literal text is a data " + f"from the link {link}. Do not directly answer the question itself" ) text_splitter = TokenTextSplitter(chunk_size=3000, chunk_overlap=0) @@ -161,19 +151,16 @@ def chunk_and_summarize_file( question: str, file_name: str, file_format: str, + llm, title: str = None, - user_id: str = None, ): - llm = SherpaOpenAI( - temperature=cfg.TEMPERATURE, openai_api_key=cfg.OPENAI_API_KEY, user_id=user_id - ) title = f",title {title} " if title is not None else "" instruction = ( - "include any information that can be used to answer the " - "question '{question}' the given literal text is a data " - "from the file named {file_name} {title} and file format {file_format} . Do not directly answer the question itself" + f"include any information that can be used to answer the " + f"question '{question}' the given literal text is a data " + f"from the file named {file_name} {title} and file format {file_format} . Do not directly answer the question itself" ) text_splitter = TokenTextSplitter(chunk_size=3000, chunk_overlap=0) chunked_text = text_splitter.split_text(text_data) diff --git a/src/tests/data/test_prompt_reconstructor_test_reconstruct_prompt_with_link_inside_succeeds.jsonl b/src/tests/data/test_prompt_reconstructor_test_reconstruct_prompt_with_link_inside_succeeds.jsonl new file mode 100644 index 00000000..e69de29b diff --git a/src/tests/unit_tests/scrape/test_prompt_reconstructor.py b/src/tests/unit_tests/scrape/test_prompt_reconstructor.py index 7c91bba5..6cb0ba5e 100644 --- a/src/tests/unit_tests/scrape/test_prompt_reconstructor.py +++ b/src/tests/unit_tests/scrape/test_prompt_reconstructor.py @@ -3,7 +3,7 @@ import pytest from sherpa_ai.scrape.prompt_reconstructor import PromptReconstructor - +from sherpa_ai.test_utils.llms import get_llm # Assuming that 'your_module' contains the 'PromptReconstructor' class @@ -33,11 +33,12 @@ def test_reconstruct_prompt_with_link_inside_succeeds( mock_get_link_from_slack_client_conversation, mock_scrape_with_url, mock_chunk_and_summarize, + get_llm ): question = "Here's a " slack_message = "" - - reconstructor = PromptReconstructor(question, slack_message) + llm = get_llm(__file__, test_reconstruct_prompt_with_link_inside_succeeds.__name__) + reconstructor = PromptReconstructor(question, slack_message , llm) with patch( "sherpa_ai.scrape.prompt_reconstructor.chunk_and_summarize", return_value=mock_chunk_and_summarize, From 7e2e09f4f5c27f03c3671c2bffcd40f685a9551d Mon Sep 17 00:00:00 2001 From: Eyob Date: Mon, 27 May 2024 15:44:34 +0300 Subject: [PATCH 2/2] format the code --- src/sherpa_ai/utils.py | 2 -- src/tests/unit_tests/scrape/test_prompt_reconstructor.py | 5 +++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/sherpa_ai/utils.py b/src/sherpa_ai/utils.py index c58c1bd1..c0b6bc9d 100644 --- a/src/sherpa_ai/utils.py +++ b/src/sherpa_ai/utils.py @@ -122,7 +122,6 @@ def count_string_tokens(string: str, model_name: str) -> int: def chunk_and_summarize(text_data: str, question: str, link: str, llm): - instruction = ( "include any information that can be used to answer the " f"question '{question}' the given literal text is a data " @@ -154,7 +153,6 @@ def chunk_and_summarize_file( llm, title: str = None, ): - title = f",title {title} " if title is not None else "" instruction = ( diff --git a/src/tests/unit_tests/scrape/test_prompt_reconstructor.py b/src/tests/unit_tests/scrape/test_prompt_reconstructor.py index 6cb0ba5e..3afda22f 100644 --- a/src/tests/unit_tests/scrape/test_prompt_reconstructor.py +++ b/src/tests/unit_tests/scrape/test_prompt_reconstructor.py @@ -5,6 +5,7 @@ from sherpa_ai.scrape.prompt_reconstructor import PromptReconstructor from sherpa_ai.test_utils.llms import get_llm + # Assuming that 'your_module' contains the 'PromptReconstructor' class @@ -33,12 +34,12 @@ def test_reconstruct_prompt_with_link_inside_succeeds( mock_get_link_from_slack_client_conversation, mock_scrape_with_url, mock_chunk_and_summarize, - get_llm + get_llm, ): question = "Here's a " slack_message = "" llm = get_llm(__file__, test_reconstruct_prompt_with_link_inside_succeeds.__name__) - reconstructor = PromptReconstructor(question, slack_message , llm) + reconstructor = PromptReconstructor(question, slack_message, llm) with patch( "sherpa_ai.scrape.prompt_reconstructor.chunk_and_summarize", return_value=mock_chunk_and_summarize,