Aggregate-Intellect · 20001LastOrder · Jun 28, 2024 · Jun 25, 2024 · Jun 26, 2024 · Jun 26, 2024
diff --git a/demo/pdf_question_answering/actions.py b/demo/pdf_question_answering/actions.py
@@ -0,0 +1,56 @@
+from langchain.document_loaders import PDFMinerLoader
+from langchain.embeddings.base import Embeddings
+from langchain.text_splitter import SentenceTransformersTokenTextSplitter
+from langchain.vectorstores.chroma import Chroma
+from loguru import logger
+from pydantic import ConfigDict
+
+from sherpa_ai.actions.base import BaseAction
+
+
+class DocumentSearch(BaseAction):
+    model_config = ConfigDict(arbitrary_types_allowed=True)
+    # file name of the pdf
+    filename: str
+    # the embedding function to use
+    embedding_function: Embeddings
+    # number of results to return in search
+    k: int
+    # the variables start with _ will not included in the __init__
+    _chroma: Chroma
+    # Override name and args properties from BaseAction
+    # The name of the action, used to describe the action to the agent.
+    name: str = "DocumentSearch"
+    # The arguments that the action takes, used to describe the action to the agent.
+    args: dict = {"query": "string"}
+    # Description of action. Used semantically to determine when the action should be chosen by the agent
+    usage: str = "Search the document store based on a query"
+
+    def __init__(self, **kwargs):
+        # initialize attributes using Pydantic BaseModel
+        super().__init__(**kwargs)
+
+        # load the pdf and create the vector store
+        self._chroma = Chroma(embedding_function=self.embedding_function)
+        documents = PDFMinerLoader(self.filename).load()
+        documents = SentenceTransformersTokenTextSplitter(
+            chunk_overlap=0
+        ).split_documents(documents)
+
+        logger.info(f"Adding {len(documents)} documents to the vector store")
+        self._chroma.add_documents(documents)
+        logger.info("Finished adding documents to the vector store")
+
+    def execute(self, query):
+        """
+        Execute the action by searching the document store for the query
+
+        Args:
+            query (str): The query to search for
+
+        Returns:
+            str: The search results combined into a single string
+        """
+
+        results = self._chroma.search(query, search_type="mmr", k=self.k)
+        return "\n\n".join([result.page_content for result in results])
diff --git a/demo/pdf_question_answering/agent_config.yml b/demo/pdf_question_answering/agent_config.yml
@@ -0,0 +1,33 @@
+shared_memory:
+    _target_: sherpa_ai.memory.shared_memory.SharedMemory  # The absolute path to the share memory class in the library
+    objective: Answer the question  # Objective for the agent, since this is a question answering agent, the objective is to answer questions
+
+agent_config: # For the demo, default configuration is used. You can change the configuration as per your requirement
+    _target_: sherpa_ai.config.task_config.AgentConfig
+
+
+llm:  # Configuration for the llm, here we are using the OpenAI GPT-3.5-turbo model
+    _target_: langchain.chat_models.ChatOpenAI
+    model_name: gpt-3.5-turbo
+    temperature: 0
+
+embedding_func:
+    _target_: langchain.embeddings.SentenceTransformerEmbeddings
+    model_name: sentence-transformers/all-mpnet-base-v2
+
+doc_search:
+    _target_: actions.DocumentSearch
+    filename: paper.pdf
+    embedding_function: ${embedding_func}
+    k: 4
+
+qa_agent:
+    _target_: sherpa_ai.agents.qa_agent.QAAgent
+    llm: ${llm}
+    shared_memory: ${shared_memory}
+    name: QA Sherpa
+    description: You are a question answering assistant helping users to find answers to their questions. Based on the input question, you will provide the answer from the text ONLY.
+    agent_config: ${agent_config}
+    num_runs: 1
+    actions:
+        - ${doc_search}
diff --git a/demo/pdf_question_answering/main.py b/demo/pdf_question_answering/main.py
@@ -0,0 +1,44 @@
+from argparse import ArgumentParser
+
+from hydra.utils import instantiate
+from omegaconf import OmegaConf
+
+from sherpa_ai.agents import QAAgent
+from sherpa_ai.events import EventType
+
+
+def get_qa_agent_from_config_file(
+    config_path: str,
+) -> QAAgent:
+    """
+    Create a QAAgent from a config file.
+
+    Args:
+        config_path: Path to the config file
+
+    Returns:
+        QAAgent: A QAAgent instance
+    """
+
+    config = OmegaConf.load(config_path)
+
+    agent_config = instantiate(config.agent_config)
+    qa_agent: QAAgent = instantiate(config.qa_agent, agent_config=agent_config)
+
+    return qa_agent
+
+if __name__ == "__main__":
+    parser = ArgumentParser()
+    parser.add_argument("--config", type=str, default="agent_config.yaml")
+    args = parser.parse_args()
+
+    qa_agent = get_qa_agent_from_config_file(args.config)
+
+    while True:
+        question = input("Ask me a question: ")
+
+        # Add the question to the shared memory. By default, the agent will take the last
+        # message in the shared memory as the task.
+        qa_agent.shared_memory.add(EventType.task, "human", question)
+        result = qa_agent.run()
+        print(result)
diff --git a/demo/pdf_question_answering/paper.pdf b/demo/pdf_question_answering/paper.pdf
diff --git a/demo/question_answering/README.md b/demo/question_answering/README.md
diff --git a/demo/question_answering/qa_config.yaml b/demo/question_answering/qa_config.yaml
diff --git a/demo/question_answering/qa_demo.py b/demo/question_answering/qa_demo.py
diff --git a/docs/How_To/Tutorials/document_reader.rst b/docs/How_To/Tutorials/document_reader.rst
@@ -75,12 +75,13 @@ In the tutorial folder, create a file called `actions.py` and add the following
         k: int
         # the variables start with _ will not included in the __init__
         _chroma: Chroma
-
         # Override name and args properties from BaseAction
         # The name of the action, used to describe the action to the agent.
         name: str = "DocumentSearch"
         # The arguments that the action takes, used to describe the action to the agent.
         args: dict = {"query": "string"}
+        # Description of action. Used semantically to determine when the action should be chosen by the agent
+        usage: str = "Search the document store based on a query"
 
         def __init__(self, **kwargs):
             # initialize attributes using Pydantic BaseModel