Merge pull request #39 from alkem-io/develop

Release: Async event processing, Updated 3rd party packages, Cleaned up Logging
alkem-io · Jan 3, 2024 · 1a18f49 · 1a18f49
2 parents 59a4b18 + b544cff
commit 1a18f49
Show file tree

Hide file tree

Showing 8 changed files with 976 additions and 737 deletions.
diff --git a/.gitignore b/.gitignore
@@ -6,3 +6,4 @@ azure.env
 /vectordb/*
 local.env
 docker-compose-local.yaml
+.DS_Store
diff --git a/ai_utils.py b/ai_utils.py
@@ -1,26 +1,36 @@
 from langchain.embeddings import AzureOpenAIEmbeddings
 from langchain.vectorstores import FAISS
 from langchain.llms import AzureOpenAI
-from langchain.prompts import PromptTemplate
+from langchain.prompts.prompt import PromptTemplate
+from langchain.prompts import ChatPromptTemplate
 from langchain.chat_models import AzureChatOpenAI
-from langchain.chains import ConversationalRetrievalChain, LLMChain
-from langchain.chains.question_answering import load_qa_chain
-from langchain.chains.conversational_retrieval.prompts import QA_PROMPT
+from langchain.schema import StrOutputParser
+from langchain_core.runnables import RunnableLambda, RunnablePassthrough
+from langchain.schema import format_document
+from langchain_core.messages import get_buffer_string
+from langchain_core.runnables import RunnableBranch
+
+from operator import itemgetter
 import logging
+import sys
+import io
 import def_ingest
-from config import config, website_source_path, website_generated_path, website_source_path2, website_generated_path2, vectordb_path, local_path, generate_website, LOG_LEVEL
+from config import config, website_source_path, website_generated_path, website_source_path2, website_generated_path2, vectordb_path, local_path, generate_website, LOG_LEVEL, max_token_limit
 
 import os
 
 # configure logging
 logger = logging.getLogger(__name__)
+assert LOG_LEVEL in ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']
+logger.setLevel(getattr(logging, LOG_LEVEL))  # Set logger level
+
 
 # Create handlers
-c_handler = logging.StreamHandler()
+c_handler = logging.StreamHandler(io.TextIOWrapper(sys.stdout.buffer, line_buffering=True))
 f_handler = logging.FileHandler(os.path.join(os.path.expanduser(local_path),'app.log'))
 
 c_handler.setLevel(level=getattr(logging, LOG_LEVEL))
-f_handler.setLevel(logging.ERROR)
+f_handler.setLevel(logging.WARNING)
 
 # Create formatters and add them to handlers
 c_format = logging.Formatter('%(name)s - %(levelname)s - %(message)s')
@@ -32,15 +42,15 @@
 logger.addHandler(c_handler)
 logger.addHandler(f_handler)
 
+logger.info(f"log level ai_utils: {LOG_LEVEL}")
+
 # verbose output for LLMs
 if LOG_LEVEL=="DEBUG":
     verbose_models = True
 else:
     verbose_models = False
 
 # define internal configuration parameters
-# token limit for retrieval chain
-max_token_limit = 4000
 
 # does chain return the source documents?
 return_source_documents = True
@@ -66,65 +76,40 @@ def get_language_by_code(language_code):
 
 
 chat_template = """
-You are a conversational agent. Use the following step-by-step instructions to respond to user inputs.
+You are a friendly conversational agent. Use the following step-by-step instructions to respond to user inputs.
 1 - The text provided in the context delimited by triple pluses may contain questions. Remove those questions from the context. 
-2 - The text provided in the chat history delimited by triple hashes provides the early part of the chat. Read it well so you can take it into account in answering the question.
-3 - Provide a single paragragh answer that is polite and professional taking into account the context delimited by triple pluses. If the answer cannot be found within the context, write 'I could not find an answer to your question'.
+2 - Provide an up to three paragraghs answer that is accurate and exthausive, taking into account the context delimited by triple pluses.
+    If the answer cannot be found within the context, write 'I could not find an answer to your question'.
+3 - Only return the answer from step 2, do not show any code or additional information.
+4 - If the question is in a different language than English, translate the question to English before answering.
+5 - Answer the question in the {language} language. 
 +++
 Context:
 {context}
 +++
-###
-Chat history:
-{chat_history}
-###
 Question: {question}
 """
 
-custom_question_template = """"
-Combine the chat history and follow up question into a standalone question. 
+condense_question_template = """"
+Combine the chat history delimited by triple pluses and follow-up question into a single standalone question that does justice to the follow-up question. Do only return the standalone question, do not return any other information.
+
 +++
 Chat History: {chat_history}
 +++
-Follow up question: {question}
-+++
+Follow-up question: {question}
+---
 Standalone question:
 """
 
-translate_template = """"
-Act as a professional translator. Use the following step-by-step instructions:
-1: assess in what language input below delimited by triple pluses is written.
-2. carry out one of tasks A or B below:
-A: if the input language is different from {language} then translate the input below delimited by triple pluses to natural {language} language, maintaining tone of voice and length
-B: if the input language is the same as {language} there is no need for translation, simply return the original input below delimited by triple pluses as the answer.
-3. Only return the answer from step 2, do not show any code or additional information.
-+++
-input:
-{answer}
-+++
-Translated input:
-"""
 
-custom_question_prompt = PromptTemplate(
-    template=custom_question_template, input_variables=["chat_history", "question"]
-)
+condense_question_prompt = PromptTemplate.from_template(condense_question_template)
 
-translation_prompt = PromptTemplate(
-    template=translate_template, input_variables=["language", "answer"]
-)
-
-# prompt to be used by retrieval chain, note this is the default prompt name, so nowhere assigned
-QA_PROMPT = PromptTemplate(
-    template=chat_template, input_variables=["question", "context", "chat_history"]
-)
+chat_prompt = ChatPromptTemplate.from_template(chat_template)
 
 
 generic_llm = AzureOpenAI(azure_deployment=os.environ["LLM_DEPLOYMENT_NAME"],
                             temperature=0, verbose=verbose_models)
 
-question_generator = LLMChain(llm=generic_llm, prompt=custom_question_prompt, verbose=verbose_models)
-
-
 embeddings = AzureOpenAIEmbeddings(
     azure_deployment=config['embeddings_deployment_name'],
     openai_api_version=config['openai_api_version'],
@@ -142,32 +127,99 @@ def get_language_by_code(language_code):
     def_ingest.mainapp(config['source_website'], config['source_website2'])
 
 vectorstore = FAISS.load_local(vectordb_path, embeddings)
-retriever = vectorstore.as_retriever()
+retriever = vectorstore.as_retriever(search_type="similarity_score_threshold", search_kwargs={"score_threshold": .5})
 
 chat_llm = AzureChatOpenAI(azure_deployment=os.environ["LLM_DEPLOYMENT_NAME"],
                             temperature=os.environ["AI_MODEL_TEMPERATURE"],
-                            max_tokens=max_token_limit)
-
-doc_chain = load_qa_chain(generic_llm, chain_type="stuff", prompt=QA_PROMPT, verbose=verbose_models)
-
-def translate_answer(answer, language):
-    translate_llm = AzureChatOpenAI(azure_deployment=os.environ["LLM_DEPLOYMENT_NAME"],
-                                temperature=0, verbose=verbose_models)
-    prompt = translation_prompt.format(answer=answer, language=language)
-    return translate_llm(prompt)
-
-
-def setup_chain():
-
-    conversation_chain = ConversationalRetrievalChain.from_llm(
-        llm=chat_llm,
-        retriever=retriever,
-        condense_question_prompt=custom_question_prompt,
-        chain_type="stuff",
-        verbose=verbose_models,
-        condense_question_llm=generic_llm,
-        return_source_documents=return_source_documents,
-        combine_docs_chain_kwargs={"prompt": QA_PROMPT}
+                            max_tokens=max_token_limit, verbose=verbose_models)
+
+condense_llm = AzureChatOpenAI(azure_deployment=os.environ["LLM_DEPLOYMENT_NAME"],
+                            temperature=0,
+                            verbose=verbose_models)
+
+def format_docs(docs):
+    return "\n\n".join(doc.page_content for doc in docs)
+
+DEFAULT_DOCUMENT_PROMPT = PromptTemplate.from_template(template="{page_content}")
+
+def _combine_documents(
+    docs, document_prompt=DEFAULT_DOCUMENT_PROMPT, document_separator="\n\n"
+):
+    doc_strings = [format_document(doc, document_prompt) for doc in docs]
+    return document_separator.join(doc_strings)
+
+
+async def query_chain(question, language, chat_history):
+
+    # check whether the chat history is empty
+    if  chat_history.buffer == []:
+        first_call = True
+    else:
+        first_call = False
+
+    # add first_call to the question
+    question.update({"first_call": first_call})
+
+    logger.info(f"first call: {first_call}\n")
+    logger.debug(f"chat history: {chat_history.buffer}\n")
+
+    # First we add a step to load memory
+    # This adds a "memory" key to the input object
+    loaded_memory = RunnablePassthrough.assign(
+        chat_history=RunnableLambda(chat_history.load_memory_variables) | itemgetter("history"),
+    )    
+
+    logger.debug(f"loaded memory {loaded_memory}\n")
+    logger.debug(f"chat history {chat_history}\n")
+
+    # Now we calculate the standalone question if the chat_history is not empty
+    standalone_question = {
+        "standalone_question": {
+            "question": lambda x: x["question"],
+            "chat_history": lambda x: get_buffer_string(x["chat_history"]),
+        }
+        | condense_question_prompt
+        | condense_llm
+        | StrOutputParser(),
+    }
+
+    # pass the question directly on the first call in a chat sequence of the chatbot
+    direct_question = {
+        "question": lambda x: x["question"],
+    }
+    # Now we retrieve the documents
+    # in case it is the first call (chat history empty)
+    retrieved_documents = {
+        "docs": itemgetter("question") | retriever,
+        "question": lambda x: x["question"],
+    }
+    # or when the chat history is not empty, rephrase the question taking into account the chat history
+    retrieved_documents_sa = {
+        "docs": itemgetter("standalone_question") | retriever,
+        "question": lambda x: x["standalone_question"],
+    }
+
+
+    # Now we construct the inputs for the final prompt
+    final_inputs = {
+        "context": lambda x: _combine_documents(x["docs"]),
+        "question": itemgetter("question"),
+        "language": lambda x: language['language'],
+    }
+
+    # And finally, we do the part that returns the answers
+    answer = {
+        "answer": final_inputs | chat_prompt | chat_llm,
+        "docs": itemgetter("docs"),
+    }
+
+
+    # And now we put it all together in a 'RunnableBranch', so we only invoke the rephrasing part when the chat history is not empty
+    final_chain = RunnableBranch(
+        (lambda x: x["first_call"], loaded_memory | direct_question | retrieved_documents | answer),
+        loaded_memory | standalone_question | retrieved_documents_sa | answer,
     )
 
-    return conversation_chain
+    logger.debug(f"final chain {final_chain}\n")
+    result = await final_chain.ainvoke(question)
+    return {'answer': result['answer'], 'source_documents': result['docs']}