diff --git a/src/apps/slackapp/slackapp/bolt_app.py b/src/apps/slackapp/slackapp/bolt_app.py index f53d2a9c..c49fbcad 100644 --- a/src/apps/slackapp/slackapp/bolt_app.py +++ b/src/apps/slackapp/slackapp/bolt_app.py @@ -170,6 +170,7 @@ def file_event_handler(say, files, user_id, thread_ts, question): user_id=user_id, files=files, token=cfg.SLACK_OAUTH_TOKEN, + llm=llm, ) file_prompt_data = file_prompt.reconstruct_prompt_with_file() if file_prompt_data["status"] == "success": @@ -217,6 +218,12 @@ def event_test(client, say, event): # only will be executed if the user don't pass the daily limit # the daily limit is calculated based on the user's usage in a workspace # users with a daily limitation can be allowed to use in a different workspace + llm = SherpaChatOpenAI( + openai_api_key=cfg.OPENAI_API_KEY, + user_id=user_id, + team_id=team_id, + temperature=cfg.TEMPERATURE, + ) if can_execute: if "files" in event: @@ -227,6 +234,7 @@ def event_test(client, say, event): thread_ts=thread_ts, user_id=combined_id, question=question, + llm=llm, ) if file_event["status"] == "error": return diff --git a/src/sherpa_ai/scrape/file_scraper.py b/src/sherpa_ai/scrape/file_scraper.py index 9832ee35..9cc2077b 100644 --- a/src/sherpa_ai/scrape/file_scraper.py +++ b/src/sherpa_ai/scrape/file_scraper.py @@ -15,7 +15,7 @@ class QuestionWithFileHandler: - def __init__(self, question, files, token, user_id): + def __init__(self, question, files, token, user_id, team_id, llm): """ Initializes the QuestionWithFileHandler instance. @@ -32,6 +32,7 @@ def __init__(self, question, files, token, user_id): self.token = token self.files = files self.user_id = user_id + self.llm = llm def reconstruct_prompt_with_file(self): """ @@ -124,7 +125,7 @@ def prompt_reconstruct(self, file_info, data=str): question=self.question, title=file_info["title"], text_data=data, - user_id=self.user_id, + llm=self.llm, ) while count_string_tokens(chunk_summary, "gpt-3.5-turbo") > 3000: @@ -134,7 +135,7 @@ def prompt_reconstruct(self, file_info, data=str): question=self.question, title=file_info["title"], text_data=chunk_summary, - user_id=self.user_id, + llm=self.llm, ) result = question_with_file_reconstructor( file_format=file_info["filetype"], diff --git a/src/sherpa_ai/scrape/prompt_reconstructor.py b/src/sherpa_ai/scrape/prompt_reconstructor.py index 94f4cabc..cc7aa21d 100644 --- a/src/sherpa_ai/scrape/prompt_reconstructor.py +++ b/src/sherpa_ai/scrape/prompt_reconstructor.py @@ -15,7 +15,7 @@ class PromptReconstructor: and rewrites the question to incorporate a summary of the scraped URLs. """ - def __init__(self, question, slack_message): + def __init__(self, question, slack_message, llm): """ Initialize the PromptReconstructor with a question and a Slack message. @@ -26,8 +26,9 @@ def __init__(self, question, slack_message): self.question = question self.slack_message = slack_message + self.llm = llm - def reconstruct_prompt(self, user_id=None): + def reconstruct_prompt(self): """ Reconstruct the prompt based on the question and the last Slack message. @@ -68,7 +69,7 @@ def reconstruct_prompt(self, user_id=None): link=link, question=question, text_data=scraped_data["data"], - user_id=user_id, + llm=self.llm, ) while ( @@ -79,7 +80,7 @@ def reconstruct_prompt(self, user_id=None): link=link, question=question, text_data=chunk_summary, - user_id=user_id, + llm=self.llm, ) final_summary.append({"data": chunk_summary, "link": link}) diff --git a/src/sherpa_ai/utils.py b/src/sherpa_ai/utils.py index 164a1ccb..41413fda 100644 --- a/src/sherpa_ai/utils.py +++ b/src/sherpa_ai/utils.py @@ -121,22 +121,11 @@ def count_string_tokens(string: str, model_name: str) -> int: return len(encoding.encode(string)) -def chunk_and_summarize( - text_data: str, - question: str, - link: str, - user_id: str = None, -): - llm = SherpaOpenAI( - temperature=cfg.TEMPERATURE, - openai_api_key=cfg.OPENAI_API_KEY, - user_id=user_id, - ) - +def chunk_and_summarize(text_data: str, question: str, link: str, llm): instruction = ( "include any information that can be used to answer the " - "question '{question}' the given literal text is a data " - "from the link {link}. Do not directly answer the question itself" + f"question '{question}' the given literal text is a data " + f"from the link {link}. Do not directly answer the question itself" ) text_splitter = TokenTextSplitter(chunk_size=3000, chunk_overlap=0) @@ -161,19 +150,15 @@ def chunk_and_summarize_file( question: str, file_name: str, file_format: str, + llm, title: str = None, - user_id: str = None, ): - llm = SherpaOpenAI( - temperature=cfg.TEMPERATURE, openai_api_key=cfg.OPENAI_API_KEY, user_id=user_id - ) - title = f",title {title} " if title is not None else "" instruction = ( - "include any information that can be used to answer the " - "question '{question}' the given literal text is a data " - "from the file named {file_name} {title} and file format {file_format} . Do not directly answer the question itself" + f"include any information that can be used to answer the " + f"question '{question}' the given literal text is a data " + f"from the file named {file_name} {title} and file format {file_format} . Do not directly answer the question itself" ) text_splitter = TokenTextSplitter(chunk_size=3000, chunk_overlap=0) chunked_text = text_splitter.split_text(text_data) diff --git a/src/tests/data/test_prompt_reconstructor_test_reconstruct_prompt_with_link_inside_succeeds.jsonl b/src/tests/data/test_prompt_reconstructor_test_reconstruct_prompt_with_link_inside_succeeds.jsonl new file mode 100644 index 00000000..e69de29b diff --git a/src/tests/unit_tests/scrape/test_prompt_reconstructor.py b/src/tests/unit_tests/scrape/test_prompt_reconstructor.py index 7c91bba5..3afda22f 100644 --- a/src/tests/unit_tests/scrape/test_prompt_reconstructor.py +++ b/src/tests/unit_tests/scrape/test_prompt_reconstructor.py @@ -3,6 +3,7 @@ import pytest from sherpa_ai.scrape.prompt_reconstructor import PromptReconstructor +from sherpa_ai.test_utils.llms import get_llm # Assuming that 'your_module' contains the 'PromptReconstructor' class @@ -33,11 +34,12 @@ def test_reconstruct_prompt_with_link_inside_succeeds( mock_get_link_from_slack_client_conversation, mock_scrape_with_url, mock_chunk_and_summarize, + get_llm, ): question = "Here's a " slack_message = "" - - reconstructor = PromptReconstructor(question, slack_message) + llm = get_llm(__file__, test_reconstruct_prompt_with_link_inside_succeeds.__name__) + reconstructor = PromptReconstructor(question, slack_message, llm) with patch( "sherpa_ai.scrape.prompt_reconstructor.chunk_and_summarize", return_value=mock_chunk_and_summarize,