From 5a11279ada4304572e2a8fbccb20bef790216740 Mon Sep 17 00:00:00 2001 From: ChenKua Date: Thu, 8 Feb 2024 00:00:42 -0500 Subject: [PATCH 1/6] math --- src/sherpa_ai/agents/mathematician.py | 165 ++++++++++++++++++++++++++ 1 file changed, 165 insertions(+) create mode 100644 src/sherpa_ai/agents/mathematician.py diff --git a/src/sherpa_ai/agents/mathematician.py b/src/sherpa_ai/agents/mathematician.py new file mode 100644 index 00000000..62c7cd76 --- /dev/null +++ b/src/sherpa_ai/agents/mathematician.py @@ -0,0 +1,165 @@ +from typing import List +import pal +from pal.prompt import colored_object_prompt, math_prompts +from pal.core.interface import timeout +from collections import Counter +from langchain.base_language import BaseLanguageModel +from loguru import logger +import types +from sherpa_ai.action_planner import ActionPlanner +from sherpa_ai.actions import Deliberation, GoogleSearch, SynthesizeOutput +from sherpa_ai.actions.base import BaseAction +from sherpa_ai.agents.base import BaseAgent +from sherpa_ai.memory import Belief, SharedMemory +from sherpa_ai.verbose_loggers.verbose_loggers import DummyVerboseLogger +import re + +MATH_DESCRIPTION = "You are a Mathematician with a deep-rooted expertise in understanding and analyzing the fundamental principles of math. Your primary role is to assist individuals, organizations, and researchers in navigating and resolving complex math-related challenges, using your knowledge to guide decisions and ensure the accuracy and reliability of outcomes." # noqa: E501 + +ACTION_PLAN_DESCRIPTION = "Given your specialized expertise, historical context, and your mission to facilitate physics-based solutions, determine which action and its corresponding arguments would be the most scientifically sound and efficient approach to achieve the described task." # noqa: E501 + + +def changePlaceholder(question, number_dict): + for key in number_dict: + question = question.replace(key, str(number_dict[key])) + + return question + + +def remove_function_arguments(input_string): + # Define a regular expression pattern to match the function definition + left = input_string.index("(") + + new_string = input_string[: left + 1] + "):" + return new_string + + +def replace_numbers_with_placeholders(text): + # Define a regular expression pattern to match both integers and decimals + number_pattern = re.compile(r"\b\d+(\.\d+)?\b") + + # Use a counter to generate unique placeholders + placeholder_counter = 1 + + # Dictionary to store the mappings between placeholders and numbers + placeholder_dict = {} + + # Replace each number with a placeholder and store the mapping in the dictionary + def replace(match): + nonlocal placeholder_counter + number = match.group(0) + placeholder = f"number_{placeholder_counter}" + placeholder_dict[placeholder] = float(number) if "." in number else int(number) + placeholder_counter += 1 + return placeholder + + # Use the re.sub function to replace numbers in the text + result = number_pattern.sub(replace, text) + + return result, placeholder_dict + + +def run_with_dict( + self, + prompt: str, + dictionary, + time_out: float = 10, + temperature: float = 0.0, + top_p: float = 1.0, + max_tokens: int = 1024, + majority_at: int = None, +): + code_snippets = self.generate( + prompt, + majority_at=majority_at, + temperature=temperature, + top_p=top_p, + max_tokens=max_tokens, + ) + + results = [] + for code in code_snippets: + with timeout(time_out): + try: + code[0] = remove_function_arguments(code[0]) + for i in range(1, len(code)): + code[i] = changePlaceholder(code[i], dictionary) + exec_result = self.execute(code) + except Exception as e: + print(e) + continue + results.append(exec_result) + + if len(results) == 0: + print( + "No results was produced. A common reason is that the generated code snippet is not valid or did not return any results." + ) + return None + + counter = Counter(results) + return counter.most_common(1)[0][0] + + +class Mathematician(BaseAgent): + """ + The Mathematician agent answers questions or research about math-related topics + """ + + def __init__( + self, + llm: BaseLanguageModel, + name="Mathematician", + description=MATH_DESCRIPTION, + shared_memory: SharedMemory = None, + num_runs=3, + verbose_logger=DummyVerboseLogger(), + MODEL="gpt-3.5-turbo", + pal_verbose=True, # TODO + ): + self.llm = llm + self.name = name + self.description = description + self.shared_memory = shared_memory + self.action_planner = ActionPlanner(description, ACTION_PLAN_DESCRIPTION, llm) + self.num_runs = num_runs + self.belief = Belief() + self.verbose_logger = verbose_logger + + interface = pal.interface.ProgramInterface( + model=MODEL, get_answer_expr="solution()", verbose=pal_verbose + ) + + interface.run_with_dict = types.MethodType(run_with_dict, interface) + + self.interface = interface + + def create_actions(self) -> List[BaseAction]: + return [ + Deliberation(self.description, self.llm), + GoogleSearch(self.description, self.belief.current_task, self.llm), + ] + + def synthesize_output(self) -> str: + synthesize_action = SynthesizeOutput(self.description, self.llm) + result = synthesize_action.execute( + self.belief.current_task.content, + self.belief.get_context(self.llm.get_num_tokens), + self.belief.get_internal_history(self.llm.get_num_tokens), + ) + + return result + + def answer_arithmetic(self, question, placeholder=False) -> str: + if not placeholder: + prompt = math_prompts.MATH_PROMPT.format(question=question) + answer = self.interface.run(prompt) + + else: + modified_text, number_dict = replace_numbers_with_placeholders(question) + scaled_question = modified_text + prompt = math_prompts.MATH_PROMPT.format(question=scaled_question) + answer = self.interface.run_with_dict(prompt, number_dict) + + reasoning = self.interface.histroy[-1][0] + logger.info(reasoning) + return answer From f6c045b4216133ca026991bb5520a8a4805ba4b8 Mon Sep 17 00:00:00 2001 From: ChenKua Date: Thu, 8 Feb 2024 18:05:44 -0500 Subject: [PATCH 2/6] doc strings --- src/sherpa_ai/output_parsers/__init__.py | 3 +- src/sherpa_ai/output_parsers/base.py | 37 ++++- .../output_parsers/citation_validation.py | 137 ++++++++++++++++-- 3 files changed, 159 insertions(+), 18 deletions(-) diff --git a/src/sherpa_ai/output_parsers/__init__.py b/src/sherpa_ai/output_parsers/__init__.py index 314ef133..bc34f2fd 100644 --- a/src/sherpa_ai/output_parsers/__init__.py +++ b/src/sherpa_ai/output_parsers/__init__.py @@ -1,5 +1,6 @@ from sherpa_ai.output_parsers.base import BaseOutputParser +from sherpa_ai.output_parsers.citation_validation import CitationValidation from sherpa_ai.output_parsers.link_parse import LinkParser from sherpa_ai.output_parsers.md_to_slack_parse import MDToSlackParse -__all__ = ["BaseOutputParser", "LinkParser", "MDToSlackParse"] +__all__ = ["BaseOutputParser", "LinkParser", "MDToSlackParse", "CitationValidation"] diff --git a/src/sherpa_ai/output_parsers/base.py b/src/sherpa_ai/output_parsers/base.py index 54eaa82a..12c08cf1 100644 --- a/src/sherpa_ai/output_parsers/base.py +++ b/src/sherpa_ai/output_parsers/base.py @@ -3,14 +3,49 @@ class BaseOutputParser(ABC): + """ + An abstract base class for output parsers. + + All concrete output parser classes should inherit from this base class + and implement the abstract method 'parse_output'. + + Attributes: + - None + @abstractmethod def parse_output(self, text: str) -> str: pass + Methods: + - parse_output(text: str) -> str: + This abstract method must be implemented by subclasses to define + the logic for parsing the given text and returning the parsed output. + + Example Usage: + ```python + class MyOutputParser(BaseOutputParser): + def parse_output(self, text: str) -> str: + # Implement custom logic to parse the output from 'text' + # and return the parsed result. + pass + ``` + """ -class BaseOutputProcessor(ABC): @abstractmethod def process_output(self, text: str) -> Tuple[bool, str]: + """ + Parse the output from the given text. + + This method should be implemented by concrete subclasses to define + the logic for parsing the output from the provided 'text' and returning + the parsed result. + + Parameters: + - text (str): The raw text to be parsed. + + Returns: + - str: The parsed output. + """ pass def __call__(self, text: str) -> Tuple[bool, str]: diff --git a/src/sherpa_ai/output_parsers/citation_validation.py b/src/sherpa_ai/output_parsers/citation_validation.py index 72982a4d..cda79a38 100644 --- a/src/sherpa_ai/output_parsers/citation_validation.py +++ b/src/sherpa_ai/output_parsers/citation_validation.py @@ -8,13 +8,62 @@ class CitationValidation(BaseOutputParser): + """ + A class for adding citations to generated text based on a list of resources. + + This class inherits from the abstract class BaseOutputParser and provides + methods to add citations to each sentence in the generated text based on + reference texts and links provided in the 'resources' list. + + Attributes: + - seq_thresh (float): Threshold for common longest subsequence / text. Default is 0.7. + - jaccard_thresh (float): Jaccard similarity threshold. Default is 0.7. + - token_overlap (float): Token overlap threshold. Default is 0.7. + + Methods: + - calculate_token_overlap(sentence1, sentence2): Calculate token overlap between two sentences. + - jaccard_index(sentence1, sentence2): Calculate Jaccard similarity index between two sentences. + - longestCommonSubsequence(text1, text2): Calculate the length of the longest common subsequence between two texts. + - unfoldList(nestedList): Flatten a nested list of strings. + - split_paragraph_into_sentences(paragraph): Tokenize a paragraph into sentences. + - parse_output(generated, resources): Add citation to each sentence in the generated text from resources based on fact-checking model. + + Example Usage: + ```python + citation_parser = CitationValidation(seq_thresh=0.7, jaccard_thresh=0.7, token_overlap=0.7) + result = citation_parser.parse_output(generated_text, list_of_resources) + ``` + """ + def __init__(self, seq_thresh=0.7, jaccard_thresh=0.7, token_overlap=0.7): - # threshold + """ + Initialize the CitationValidation object. + + Args: + - seq_thresh (float): Threshold for common longest subsequence / text. Default is 0.7. + - jaccard_thresh (float): Jaccard similarity threshold. Default is 0.7. + - token_overlap (float): Token overlap threshold. Default is 0.7. + """ self.seq_thresh = seq_thresh # threshold for common longest subsequece / text self.jaccard_thresh = jaccard_thresh self.token_overlap = token_overlap - def calculate_token_overlap(self, sentence1, sentence2): + def calculate_token_overlap(self, sentence1, sentence2) -> tuple: + """ + Calculate the percentage of token overlap between two sentences. + + Tokenizes the input sentences and calculates the percentage of token overlap + by finding the intersection of the token sets and dividing it by the length + of each sentence's token set. + + Args: + - sentence1 (str): The first sentence for token overlap calculation. + - sentence2 (str): The second sentence for token overlap calculation. + + Returns: + - tuple: A tuple containing two float values representing the percentage + of token overlap for sentence1 and sentence2, respectively. + """ # Tokenize the sentences tokens1 = word_tokenize(sentence1) tokens2 = word_tokenize(sentence2) @@ -32,7 +81,20 @@ def calculate_token_overlap(self, sentence1, sentence2): overlap_percentage_2 = len(overlapping_tokens) / (len(tokens2)) return overlap_percentage, overlap_percentage_2 - def jaccard_index(sself, sentence1, sentence2): + def jaccard_index(sself, sentence1, sentence2) -> float: + """ + Calculate the Jaccard index between two sentences. + + The Jaccard index is a measure of similarity between two sets, defined as the + size of the intersection divided by the size of the union of the sets. + + Args: + - sentence1 (str): The first sentence for Jaccard index calculation. + - sentence2 (str): The second sentence for Jaccard index calculation. + + Returns: + - float: The Jaccard index representing the similarity between the two sentences. + """ # Convert the sentences to sets of words set1 = set(word_tokenize(sentence1)) set2 = set(word_tokenize(sentence2)) @@ -46,10 +108,20 @@ def jaccard_index(sself, sentence1, sentence2): return jaccard_index def longestCommonSubsequence(self, text1: str, text2: str) -> int: - # A subsequence of a string is a new string generated from the - # original string with some characters - # (can be none) deleted without changing the relative - # order of the remaining characters. + """ + Calculate the length of the longest common subsequence between two texts. + + A subsequence of a string is a new string generated from the original + string with some characters (can be none) deleted without changing + the relative order of the remaining characters. + + Args: + - text1 (str): The first text for calculating the longest common subsequence. + - text2 (str): The second text for calculating the longest common subsequence. + + Returns: + - int: The length of the longest common subsequence between the two texts. + """ dp = [[0 for i in range(len(text1) + 1)] for i in range(len(text2) + 1)] for i in range(1, len(text2) + 1): @@ -60,7 +132,16 @@ def longestCommonSubsequence(self, text1: str, text2: str) -> int: dp[i][j] = max(diagnoal, dp[i - 1][j], dp[i][j - 1]) return dp[-1][-1] - def unfoldList(self, nestedList: list[list[str]]): + def unfoldList(self, nestedList: list[list[str]]) -> list[str]: + """ + Flatten a nested list of strings into a single list of strings. + + Args: + - nestedList (list[list[str]]): The nested list of strings to be flattened. + + Returns: + - list[str]: A flat list containing all non-empty strings from the nested list. + """ sentences = [] for sublist in nestedList: for item in sublist: @@ -68,22 +149,46 @@ def unfoldList(self, nestedList: list[list[str]]): sentences.append(item) return sentences - def split_paragraph_into_sentences(self, paragraph): + def split_paragraph_into_sentences(self, paragraph: str) -> list[str]: + """ + Tokenize a paragraph into a list of sentences. + + Uses NLTK's sent_tokenize to split a given paragraph into a list of sentences. + + Args: + - paragraph (str): The input paragraph to be tokenized into sentences. + + Returns: + - list[str]: A list of sentences extracted from the input paragraph. + """ sentences = sent_tokenize(paragraph) return sentences # add citation to the generated text def parse_output(self, generated: str, resources: list[dict]) -> ValidationResult: """ - Add citation to each sentence in the generated text from resources based on fact checking model. + Add citation to each sentence in the generated text based on fact-checking methdod. + Args: - generated (str): The generated content where we need to add citation/reference - resources (list[dict]): A list of dictionaries containing reference text and links. - Each dictionary in the list should have the format {"Document": str, "Source": str}. - activated (bool): control whether we need to add citation or just return the raw generated text. - by default it is activated. + - generated (str): The generated content where citations/references need to be added. + - resources (list[dict]): A list of dictionaries containing reference text and links. + Each dictionary in the list should have the format {"Document": str, "Source": str}. + Returns: - str: A formatted string combining the citation information from the 'resources' list. + - ValidationResult: An object containing the result of citation addition and feedback. + The ValidationResult has attributes 'is_valid' indicating success, 'result' containing + the formatted text with citations, and 'feedback' providing additional information. + + Note: + - The 'resources' list should contain dictionaries with "Document" and "Source" keys. + + Example: + ```python + resources = [{"Document": "Some reference text.", "Source": "http://example.com/source1"}] + citation_parser = CitationValidation() + result = citation_parser.parse_output("Generated text.", resources) + ``` + """ # resources type From 5f0982dbb3f1bbd4f52630d14129986a45a863b9 Mon Sep 17 00:00:00 2001 From: ChenKua Date: Thu, 8 Feb 2024 18:22:56 -0500 Subject: [PATCH 3/6] doc string for output parser --- src/sherpa_ai/output_parsers/link_parse.py | 54 +++++++++++++++++++ .../output_parsers/md_to_slack_parse.py | 35 +++++++++++- .../output_parsers/number_validation.py | 44 +++++++++++++++ .../output_parsers/validation_result.py | 18 +++++++ 4 files changed, 150 insertions(+), 1 deletion(-) diff --git a/src/sherpa_ai/output_parsers/link_parse.py b/src/sherpa_ai/output_parsers/link_parse.py index a9a4c897..ad7b5fb7 100644 --- a/src/sherpa_ai/output_parsers/link_parse.py +++ b/src/sherpa_ai/output_parsers/link_parse.py @@ -6,7 +6,32 @@ class LinkParser(BaseOutputParser): + """ + A class for parsing and modifying links in text using specified patterns. + + This class inherits from the abstract class BaseOutputParser and provides + methods to parse and modify links in the input text. It includes functionality + to replace links with symbols and symbols with links based on predefined patterns. + + Attributes: + - links (list): A list to store unique links encountered during parsing. + - link_to_id (dict): A dictionary mapping links to their corresponding symbols. + - count (int): Counter for generating unique symbols for new links. + - output_counter (int): Counter for reindexing output. + - reindex_mapping (dict): A mapping of original document IDs to reindexed IDs. + - url_pattern (str): Regular expression pattern for identifying links in the input text. + - doc_id_pattern (str): Regular expression pattern for identifying document IDs in the input text. + - link_symbol (str): Format string for representing link symbols. + + Methods: + - parse_output(text: str, tool_output: bool = False) -> str: + Parses and modifies links in the input text based on the specified patterns. + """ + def __init__(self): + """ + Initialize the LinkParser object. + """ self.links = [] self.link_to_id = {} self.count = 1 @@ -21,7 +46,27 @@ def __init__(self): self.link_symbol = "[{id}]" def parse_output(self, text: str, tool_output=False) -> str: + """ + Parses and modifies links in the input text based on the specified patterns. + + Args: + - text (str): The input text containing links or symbols to be parsed. + - tool_output (bool): A flag indicating whether the input text is tool-generated. Default is False. + + Returns: + - str: The modified text with links replaced by symbols or symbols replaced by links. + """ + def replace_with_symbol(match: re.Match): + """ + Replaces links with symbols in the input text. + + Args: + - match (re.Match): A regular expression match object. + + Returns: + - str: The modified text with links replaced by symbols. + """ link = match.group(1) # check if the link is valid if not link.startswith("http"): @@ -34,6 +79,15 @@ def replace_with_symbol(match: re.Match): return "DocID:" + self.link_to_id[link] + "\n" def replace_with_link(match: re.Match): + """ + Replaces symbols with links in the input text. + + Args: + - match (re.Match): A regular expression match object. + + Returns: + - str: The modified text with symbols replaced by links. + """ logger.debug(match) doc_id = int(match.group(1)) if doc_id <= 0 or doc_id > len(self.links): diff --git a/src/sherpa_ai/output_parsers/md_to_slack_parse.py b/src/sherpa_ai/output_parsers/md_to_slack_parse.py index db92bb37..7d503910 100644 --- a/src/sherpa_ai/output_parsers/md_to_slack_parse.py +++ b/src/sherpa_ai/output_parsers/md_to_slack_parse.py @@ -8,9 +8,42 @@ class MDToSlackParse(BaseOutputParser): + """ + A post-processor for converting Markdown links to Slack-compatible format. + + This class inherits from the BaseOutputParser and provides a method to parse + and convert Markdown-style links to Slack-compatible format in the input text. + + Attributes: + - pattern (str): Regular expression pattern for identifying Markdown links. + + Methods: + - parse_output(text: str) -> str: + Parses and converts Markdown links to Slack-compatible format in the input text. + + Example Usage: + ```python + md_to_slack_parser = MDToSlackParse() + result = md_to_slack_parser.parse_output("Check out [this link](http://example.com)!") + ``` + + """ + def __init__(self) -> None: + """ + Initialize the MDToSlackParse object with pattern. + """ self.pattern = r"\[([^\]]+)\]\(([^)]+)\)" def parse_output(self, text: str) -> str: - # replace with Slack link + """ + Parses and converts Markdown links to Slack-compatible format in the input text. + Replace with Slack link + + Args: + - text (str): The input text containing Markdown-style links. + + Returns: + - str: The modified text with Markdown links replaced by Slack-compatible links. + """ return re.sub(self.pattern, r"<\2|\1>", text) diff --git a/src/sherpa_ai/output_parsers/number_validation.py b/src/sherpa_ai/output_parsers/number_validation.py index 01a74109..046c3ff6 100644 --- a/src/sherpa_ai/output_parsers/number_validation.py +++ b/src/sherpa_ai/output_parsers/number_validation.py @@ -7,13 +7,57 @@ class NumberValidation(BaseOutputProcessor): + + """ + Process and validate numerical information in the generated text. + + This class inherits from the BaseOutputProcessor and provides a method to process + the generated text and validate the presence of numerical information based on a + specified source. + + Attributes: + - source (str): The source or context against which numerical information is validated. + + Methods: + - process_output(text: str) -> ValidationResult: + Process the generated text and validate the presence of numerical information. + + Example Usage: + ```python + number_validator = NumberValidation(source="document") + result = number_validator.process_output("The document contains important numbers: 123, 456.") + ``` + + """ + def __init__( self, source: str, ): + """ + Initialize the NumberValidation object. + + Args: + - source (str): The source or context against which numerical information is validated. + """ self.source = source def process_output(self, text: str) -> ValidationResult: + """ + Process the generated text and validate the presence of numerical information. + + Args: + - text (str): The generated text to be processed. + + Returns: + - ValidationResult: An object containing the result of the numerical validation, + including the validity status, the processed text, and optional feedback. + + Example Usage: + ```python + result = number_validator.process_output("The document contains important numbers: 123, 456.") + ``` + """ check_validation = check_if_number_exist(text, self.source) if check_validation["number_exists"]: return ValidationResult( diff --git a/src/sherpa_ai/output_parsers/validation_result.py b/src/sherpa_ai/output_parsers/validation_result.py index ba879c98..33780285 100644 --- a/src/sherpa_ai/output_parsers/validation_result.py +++ b/src/sherpa_ai/output_parsers/validation_result.py @@ -2,6 +2,24 @@ class ValidationResult(BaseModel): + """ + Represents the result of a validation process. + + This class inherits from the Pydantic BaseModel and includes fields + for indicating the validity of the result, the actual result, and optional feedback. + + Attributes: + - is_valid (bool): Indicates whether the validation result is valid (True) or not (False). + - result (str): The actual result of the validation process. + - feedback (str, optional): Additional feedback or information about the validation result. Default is an empty string. + + Example Usage: + ```python + validation_result = ValidationResult(is_valid=True, result="Validated successfully", feedback="No issues found.") + ``` + + """ + is_valid: bool result: str feedback: str = "" From 527f30d1155453793a22f6ade1f8a5a4c35aaeb1 Mon Sep 17 00:00:00 2001 From: ChenKua Date: Thu, 8 Feb 2024 20:35:12 -0500 Subject: [PATCH 4/6] fix typo and add tests --- src/sherpa_ai/agents/__init__.py | 3 +++ src/sherpa_ai/agents/mathematician.py | 25 +++++++++++++++++-- src/sherpa_ai/output_parsers/__init__.py | 4 ++- .../output_parsers/number_validation.py | 4 +-- .../unit_tests/agents/test_mathematician.py | 17 +++++++++++++ 5 files changed, 48 insertions(+), 5 deletions(-) create mode 100644 src/tests/unit_tests/agents/test_mathematician.py diff --git a/src/sherpa_ai/agents/__init__.py b/src/sherpa_ai/agents/__init__.py index a9bfe26c..98fdcaad 100644 --- a/src/sherpa_ai/agents/__init__.py +++ b/src/sherpa_ai/agents/__init__.py @@ -5,6 +5,8 @@ from sherpa_ai.agents.planner import Planner from sherpa_ai.agents.qa_agent import QAAgent from sherpa_ai.agents.user import UserAgent +from sherpa_ai.agents.mathematician import Mathematician + __all__ = [ "AgentPool", @@ -15,4 +17,5 @@ "UserAgent", "Critic", "QAAgent", + "Mathematician", ] diff --git a/src/sherpa_ai/agents/mathematician.py b/src/sherpa_ai/agents/mathematician.py index 62c7cd76..fc3ba8fa 100644 --- a/src/sherpa_ai/agents/mathematician.py +++ b/src/sherpa_ai/agents/mathematician.py @@ -6,6 +6,8 @@ from langchain.base_language import BaseLanguageModel from loguru import logger import types +import openai +import os from sherpa_ai.action_planner import ActionPlanner from sherpa_ai.actions import Deliberation, GoogleSearch, SynthesizeOutput from sherpa_ai.actions.base import BaseAction @@ -124,7 +126,7 @@ def __init__( self.num_runs = num_runs self.belief = Belief() self.verbose_logger = verbose_logger - + openai.api_key = os.getenv("OPENAI_API_KEY") interface = pal.interface.ProgramInterface( model=MODEL, get_answer_expr="solution()", verbose=pal_verbose ) @@ -160,6 +162,25 @@ def answer_arithmetic(self, question, placeholder=False) -> str: prompt = math_prompts.MATH_PROMPT.format(question=scaled_question) answer = self.interface.run_with_dict(prompt, number_dict) - reasoning = self.interface.histroy[-1][0] + reasoning = self.interface.history[-1][0] logger.info(reasoning) return answer + + +if __name__ == "__main__": + m = Mathematician(llm=None) + question = ( + "if each apple costs 2 dollar. How much money do I need for buying 10 apples?" + ) + + answer = 20 + result = m.answer_arithmetic(question) + + logger.info(result) + + # MODEL = "gpt-3.5-turbo" + # interface = pal.interface.ProgramInterface( + # model=MODEL, get_answer_expr="solution()", verbose=True + # ) + + # print(interface.history) diff --git a/src/sherpa_ai/output_parsers/__init__.py b/src/sherpa_ai/output_parsers/__init__.py index bc34f2fd..2edc9ad8 100644 --- a/src/sherpa_ai/output_parsers/__init__.py +++ b/src/sherpa_ai/output_parsers/__init__.py @@ -2,5 +2,7 @@ from sherpa_ai.output_parsers.citation_validation import CitationValidation from sherpa_ai.output_parsers.link_parse import LinkParser from sherpa_ai.output_parsers.md_to_slack_parse import MDToSlackParse +from sherpa_ai.output_parsers.number_validation import NumberValidation -__all__ = ["BaseOutputParser", "LinkParser", "MDToSlackParse", "CitationValidation"] + +__all__ = ["BaseOutputParser", "LinkParser", "MDToSlackParse", "CitationValidation", "NumberValidation"] diff --git a/src/sherpa_ai/output_parsers/number_validation.py b/src/sherpa_ai/output_parsers/number_validation.py index 046c3ff6..b3ff935b 100644 --- a/src/sherpa_ai/output_parsers/number_validation.py +++ b/src/sherpa_ai/output_parsers/number_validation.py @@ -1,12 +1,12 @@ from typing import Tuple from sherpa_ai.memory import Belief -from sherpa_ai.output_parsers.base import BaseOutputProcessor +from sherpa_ai.output_parsers.base import BaseOutputParser from sherpa_ai.output_parsers.validation_result import ValidationResult from sherpa_ai.utils import check_if_number_exist -class NumberValidation(BaseOutputProcessor): +class NumberValidation(BaseOutputParser): """ Process and validate numerical information in the generated text. diff --git a/src/tests/unit_tests/agents/test_mathematician.py b/src/tests/unit_tests/agents/test_mathematician.py new file mode 100644 index 00000000..393fa3d1 --- /dev/null +++ b/src/tests/unit_tests/agents/test_mathematician.py @@ -0,0 +1,17 @@ +from sherpa_ai.agents import Mathematician + + +def test_answer_arith(): + m = Mathematician(llm=None) + question = ( + "if each apple costs 2 dollar. How much money do I need for buying 10 apples?" + ) + + answer = 20 + result = m.answer_arithmetic(question) + + assert answer == result + + result = m.answer_arithmetic(question, True) + + assert answer == result From 7a7b7215b715ba1de82c643a2ad1a51e97c652aa Mon Sep 17 00:00:00 2001 From: ChenKua Date: Fri, 9 Feb 2024 10:03:40 -0500 Subject: [PATCH 5/6] math and doc string --- src/sherpa_ai/agents/mathematician.py | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/src/sherpa_ai/agents/mathematician.py b/src/sherpa_ai/agents/mathematician.py index fc3ba8fa..03f06a46 100644 --- a/src/sherpa_ai/agents/mathematician.py +++ b/src/sherpa_ai/agents/mathematician.py @@ -126,7 +126,7 @@ def __init__( self.num_runs = num_runs self.belief = Belief() self.verbose_logger = verbose_logger - openai.api_key = os.getenv("OPENAI_API_KEY") + openai.api_key = os.getenv('OPENAI_API_KEY') interface = pal.interface.ProgramInterface( model=MODEL, get_answer_expr="solution()", verbose=pal_verbose ) @@ -167,20 +167,11 @@ def answer_arithmetic(self, question, placeholder=False) -> str: return answer -if __name__ == "__main__": - m = Mathematician(llm=None) - question = ( - "if each apple costs 2 dollar. How much money do I need for buying 10 apples?" - ) - - answer = 20 - result = m.answer_arithmetic(question) - - logger.info(result) - + + # MODEL = "gpt-3.5-turbo" # interface = pal.interface.ProgramInterface( # model=MODEL, get_answer_expr="solution()", verbose=True # ) - - # print(interface.history) + + # print(interface.history) \ No newline at end of file From f0179c595e1f3410d0723389bfea48e8b25f8107 Mon Sep 17 00:00:00 2001 From: ChenKua Date: Fri, 9 Feb 2024 10:05:38 -0500 Subject: [PATCH 6/6] remove math --- src/sherpa_ai/agents/mathematician.py | 177 ------------------ .../unit_tests/agents/test_mathematician.py | 17 -- 2 files changed, 194 deletions(-) delete mode 100644 src/sherpa_ai/agents/mathematician.py delete mode 100644 src/tests/unit_tests/agents/test_mathematician.py diff --git a/src/sherpa_ai/agents/mathematician.py b/src/sherpa_ai/agents/mathematician.py deleted file mode 100644 index 03f06a46..00000000 --- a/src/sherpa_ai/agents/mathematician.py +++ /dev/null @@ -1,177 +0,0 @@ -from typing import List -import pal -from pal.prompt import colored_object_prompt, math_prompts -from pal.core.interface import timeout -from collections import Counter -from langchain.base_language import BaseLanguageModel -from loguru import logger -import types -import openai -import os -from sherpa_ai.action_planner import ActionPlanner -from sherpa_ai.actions import Deliberation, GoogleSearch, SynthesizeOutput -from sherpa_ai.actions.base import BaseAction -from sherpa_ai.agents.base import BaseAgent -from sherpa_ai.memory import Belief, SharedMemory -from sherpa_ai.verbose_loggers.verbose_loggers import DummyVerboseLogger -import re - -MATH_DESCRIPTION = "You are a Mathematician with a deep-rooted expertise in understanding and analyzing the fundamental principles of math. Your primary role is to assist individuals, organizations, and researchers in navigating and resolving complex math-related challenges, using your knowledge to guide decisions and ensure the accuracy and reliability of outcomes." # noqa: E501 - -ACTION_PLAN_DESCRIPTION = "Given your specialized expertise, historical context, and your mission to facilitate physics-based solutions, determine which action and its corresponding arguments would be the most scientifically sound and efficient approach to achieve the described task." # noqa: E501 - - -def changePlaceholder(question, number_dict): - for key in number_dict: - question = question.replace(key, str(number_dict[key])) - - return question - - -def remove_function_arguments(input_string): - # Define a regular expression pattern to match the function definition - left = input_string.index("(") - - new_string = input_string[: left + 1] + "):" - return new_string - - -def replace_numbers_with_placeholders(text): - # Define a regular expression pattern to match both integers and decimals - number_pattern = re.compile(r"\b\d+(\.\d+)?\b") - - # Use a counter to generate unique placeholders - placeholder_counter = 1 - - # Dictionary to store the mappings between placeholders and numbers - placeholder_dict = {} - - # Replace each number with a placeholder and store the mapping in the dictionary - def replace(match): - nonlocal placeholder_counter - number = match.group(0) - placeholder = f"number_{placeholder_counter}" - placeholder_dict[placeholder] = float(number) if "." in number else int(number) - placeholder_counter += 1 - return placeholder - - # Use the re.sub function to replace numbers in the text - result = number_pattern.sub(replace, text) - - return result, placeholder_dict - - -def run_with_dict( - self, - prompt: str, - dictionary, - time_out: float = 10, - temperature: float = 0.0, - top_p: float = 1.0, - max_tokens: int = 1024, - majority_at: int = None, -): - code_snippets = self.generate( - prompt, - majority_at=majority_at, - temperature=temperature, - top_p=top_p, - max_tokens=max_tokens, - ) - - results = [] - for code in code_snippets: - with timeout(time_out): - try: - code[0] = remove_function_arguments(code[0]) - for i in range(1, len(code)): - code[i] = changePlaceholder(code[i], dictionary) - exec_result = self.execute(code) - except Exception as e: - print(e) - continue - results.append(exec_result) - - if len(results) == 0: - print( - "No results was produced. A common reason is that the generated code snippet is not valid or did not return any results." - ) - return None - - counter = Counter(results) - return counter.most_common(1)[0][0] - - -class Mathematician(BaseAgent): - """ - The Mathematician agent answers questions or research about math-related topics - """ - - def __init__( - self, - llm: BaseLanguageModel, - name="Mathematician", - description=MATH_DESCRIPTION, - shared_memory: SharedMemory = None, - num_runs=3, - verbose_logger=DummyVerboseLogger(), - MODEL="gpt-3.5-turbo", - pal_verbose=True, # TODO - ): - self.llm = llm - self.name = name - self.description = description - self.shared_memory = shared_memory - self.action_planner = ActionPlanner(description, ACTION_PLAN_DESCRIPTION, llm) - self.num_runs = num_runs - self.belief = Belief() - self.verbose_logger = verbose_logger - openai.api_key = os.getenv('OPENAI_API_KEY') - interface = pal.interface.ProgramInterface( - model=MODEL, get_answer_expr="solution()", verbose=pal_verbose - ) - - interface.run_with_dict = types.MethodType(run_with_dict, interface) - - self.interface = interface - - def create_actions(self) -> List[BaseAction]: - return [ - Deliberation(self.description, self.llm), - GoogleSearch(self.description, self.belief.current_task, self.llm), - ] - - def synthesize_output(self) -> str: - synthesize_action = SynthesizeOutput(self.description, self.llm) - result = synthesize_action.execute( - self.belief.current_task.content, - self.belief.get_context(self.llm.get_num_tokens), - self.belief.get_internal_history(self.llm.get_num_tokens), - ) - - return result - - def answer_arithmetic(self, question, placeholder=False) -> str: - if not placeholder: - prompt = math_prompts.MATH_PROMPT.format(question=question) - answer = self.interface.run(prompt) - - else: - modified_text, number_dict = replace_numbers_with_placeholders(question) - scaled_question = modified_text - prompt = math_prompts.MATH_PROMPT.format(question=scaled_question) - answer = self.interface.run_with_dict(prompt, number_dict) - - reasoning = self.interface.history[-1][0] - logger.info(reasoning) - return answer - - - - - # MODEL = "gpt-3.5-turbo" - # interface = pal.interface.ProgramInterface( - # model=MODEL, get_answer_expr="solution()", verbose=True - # ) - - # print(interface.history) \ No newline at end of file diff --git a/src/tests/unit_tests/agents/test_mathematician.py b/src/tests/unit_tests/agents/test_mathematician.py deleted file mode 100644 index 393fa3d1..00000000 --- a/src/tests/unit_tests/agents/test_mathematician.py +++ /dev/null @@ -1,17 +0,0 @@ -from sherpa_ai.agents import Mathematician - - -def test_answer_arith(): - m = Mathematician(llm=None) - question = ( - "if each apple costs 2 dollar. How much money do I need for buying 10 apples?" - ) - - answer = 20 - result = m.answer_arithmetic(question) - - assert answer == result - - result = m.answer_arithmetic(question, True) - - assert answer == result