Skip to content

Commit

Permalink
Merge pull request #383 from Eyobyb/llm_initialization_lifting
Browse files Browse the repository at this point in the history
 Update PromptReconstructor to include llm parameter
  • Loading branch information
amirfz authored Jun 12, 2024
2 parents ba9e631 + d900bc1 commit 0237604
Show file tree
Hide file tree
Showing 6 changed files with 28 additions and 31 deletions.
8 changes: 8 additions & 0 deletions src/apps/slackapp/slackapp/bolt_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,7 @@ def file_event_handler(say, files, user_id, thread_ts, question):
user_id=user_id,
files=files,
token=cfg.SLACK_OAUTH_TOKEN,
llm=llm,
)
file_prompt_data = file_prompt.reconstruct_prompt_with_file()
if file_prompt_data["status"] == "success":
Expand Down Expand Up @@ -217,6 +218,12 @@ def event_test(client, say, event):
# only will be executed if the user don't pass the daily limit
# the daily limit is calculated based on the user's usage in a workspace
# users with a daily limitation can be allowed to use in a different workspace
llm = SherpaChatOpenAI(
openai_api_key=cfg.OPENAI_API_KEY,
user_id=user_id,
team_id=team_id,
temperature=cfg.TEMPERATURE,
)

if can_execute:
if "files" in event:
Expand All @@ -227,6 +234,7 @@ def event_test(client, say, event):
thread_ts=thread_ts,
user_id=combined_id,
question=question,
llm=llm,
)
if file_event["status"] == "error":
return
Expand Down
7 changes: 4 additions & 3 deletions src/sherpa_ai/scrape/file_scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@


class QuestionWithFileHandler:
def __init__(self, question, files, token, user_id):
def __init__(self, question, files, token, user_id, team_id, llm):
"""
Initializes the QuestionWithFileHandler instance.
Expand All @@ -32,6 +32,7 @@ def __init__(self, question, files, token, user_id):
self.token = token
self.files = files
self.user_id = user_id
self.llm = llm

def reconstruct_prompt_with_file(self):
"""
Expand Down Expand Up @@ -124,7 +125,7 @@ def prompt_reconstruct(self, file_info, data=str):
question=self.question,
title=file_info["title"],
text_data=data,
user_id=self.user_id,
llm=self.llm,
)

while count_string_tokens(chunk_summary, "gpt-3.5-turbo") > 3000:
Expand All @@ -134,7 +135,7 @@ def prompt_reconstruct(self, file_info, data=str):
question=self.question,
title=file_info["title"],
text_data=chunk_summary,
user_id=self.user_id,
llm=self.llm,
)
result = question_with_file_reconstructor(
file_format=file_info["filetype"],
Expand Down
9 changes: 5 additions & 4 deletions src/sherpa_ai/scrape/prompt_reconstructor.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ class PromptReconstructor:
and rewrites the question to incorporate a summary of the scraped URLs.
"""

def __init__(self, question, slack_message):
def __init__(self, question, slack_message, llm):
"""
Initialize the PromptReconstructor with a question and a Slack message.
Expand All @@ -26,8 +26,9 @@ def __init__(self, question, slack_message):

self.question = question
self.slack_message = slack_message
self.llm = llm

def reconstruct_prompt(self, user_id=None):
def reconstruct_prompt(self):
"""
Reconstruct the prompt based on the question and the last Slack message.
Expand Down Expand Up @@ -68,7 +69,7 @@ def reconstruct_prompt(self, user_id=None):
link=link,
question=question,
text_data=scraped_data["data"],
user_id=user_id,
llm=self.llm,
)

while (
Expand All @@ -79,7 +80,7 @@ def reconstruct_prompt(self, user_id=None):
link=link,
question=question,
text_data=chunk_summary,
user_id=user_id,
llm=self.llm,
)

final_summary.append({"data": chunk_summary, "link": link})
Expand Down
29 changes: 7 additions & 22 deletions src/sherpa_ai/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,22 +121,11 @@ def count_string_tokens(string: str, model_name: str) -> int:
return len(encoding.encode(string))


def chunk_and_summarize(
text_data: str,
question: str,
link: str,
user_id: str = None,
):
llm = SherpaOpenAI(
temperature=cfg.TEMPERATURE,
openai_api_key=cfg.OPENAI_API_KEY,
user_id=user_id,
)

def chunk_and_summarize(text_data: str, question: str, link: str, llm):
instruction = (
"include any information that can be used to answer the "
"question '{question}' the given literal text is a data "
"from the link {link}. Do not directly answer the question itself"
f"question '{question}' the given literal text is a data "
f"from the link {link}. Do not directly answer the question itself"
)

text_splitter = TokenTextSplitter(chunk_size=3000, chunk_overlap=0)
Expand All @@ -161,19 +150,15 @@ def chunk_and_summarize_file(
question: str,
file_name: str,
file_format: str,
llm,
title: str = None,
user_id: str = None,
):
llm = SherpaOpenAI(
temperature=cfg.TEMPERATURE, openai_api_key=cfg.OPENAI_API_KEY, user_id=user_id
)

title = f",title {title} " if title is not None else ""

instruction = (
"include any information that can be used to answer the "
"question '{question}' the given literal text is a data "
"from the file named {file_name} {title} and file format {file_format} . Do not directly answer the question itself"
f"include any information that can be used to answer the "
f"question '{question}' the given literal text is a data "
f"from the file named {file_name} {title} and file format {file_format} . Do not directly answer the question itself"
)
text_splitter = TokenTextSplitter(chunk_size=3000, chunk_overlap=0)
chunked_text = text_splitter.split_text(text_data)
Expand Down
6 changes: 4 additions & 2 deletions src/tests/unit_tests/scrape/test_prompt_reconstructor.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import pytest

from sherpa_ai.scrape.prompt_reconstructor import PromptReconstructor
from sherpa_ai.test_utils.llms import get_llm


# Assuming that 'your_module' contains the 'PromptReconstructor' class
Expand Down Expand Up @@ -33,11 +34,12 @@ def test_reconstruct_prompt_with_link_inside_succeeds(
mock_get_link_from_slack_client_conversation,
mock_scrape_with_url,
mock_chunk_and_summarize,
get_llm,
):
question = "Here's a <https://google.com>"
slack_message = ""

reconstructor = PromptReconstructor(question, slack_message)
llm = get_llm(__file__, test_reconstruct_prompt_with_link_inside_succeeds.__name__)
reconstructor = PromptReconstructor(question, slack_message, llm)
with patch(
"sherpa_ai.scrape.prompt_reconstructor.chunk_and_summarize",
return_value=mock_chunk_and_summarize,
Expand Down

0 comments on commit 0237604

Please sign in to comment.