Skip to content

Commit

Permalink
Merge pull request #39 from alkem-io/develop
Browse files Browse the repository at this point in the history
Release: Async event processing, Updated 3rd party packages, Cleaned up Logging
  • Loading branch information
valentinyanakiev authored Jan 3, 2024
2 parents 59a4b18 + b544cff commit 1a18f49
Show file tree
Hide file tree
Showing 8 changed files with 976 additions and 737 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ azure.env
/vectordb/*
local.env
docker-compose-local.yaml
.DS_Store
194 changes: 123 additions & 71 deletions ai_utils.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,36 @@
from langchain.embeddings import AzureOpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.llms import AzureOpenAI
from langchain.prompts import PromptTemplate
from langchain.prompts.prompt import PromptTemplate
from langchain.prompts import ChatPromptTemplate
from langchain.chat_models import AzureChatOpenAI
from langchain.chains import ConversationalRetrievalChain, LLMChain
from langchain.chains.question_answering import load_qa_chain
from langchain.chains.conversational_retrieval.prompts import QA_PROMPT
from langchain.schema import StrOutputParser
from langchain_core.runnables import RunnableLambda, RunnablePassthrough
from langchain.schema import format_document
from langchain_core.messages import get_buffer_string
from langchain_core.runnables import RunnableBranch

from operator import itemgetter
import logging
import sys
import io
import def_ingest
from config import config, website_source_path, website_generated_path, website_source_path2, website_generated_path2, vectordb_path, local_path, generate_website, LOG_LEVEL
from config import config, website_source_path, website_generated_path, website_source_path2, website_generated_path2, vectordb_path, local_path, generate_website, LOG_LEVEL, max_token_limit

import os

# configure logging
logger = logging.getLogger(__name__)
assert LOG_LEVEL in ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']
logger.setLevel(getattr(logging, LOG_LEVEL)) # Set logger level


# Create handlers
c_handler = logging.StreamHandler()
c_handler = logging.StreamHandler(io.TextIOWrapper(sys.stdout.buffer, line_buffering=True))
f_handler = logging.FileHandler(os.path.join(os.path.expanduser(local_path),'app.log'))

c_handler.setLevel(level=getattr(logging, LOG_LEVEL))
f_handler.setLevel(logging.ERROR)
f_handler.setLevel(logging.WARNING)

# Create formatters and add them to handlers
c_format = logging.Formatter('%(name)s - %(levelname)s - %(message)s')
Expand All @@ -32,15 +42,15 @@
logger.addHandler(c_handler)
logger.addHandler(f_handler)

logger.info(f"log level ai_utils: {LOG_LEVEL}")

# verbose output for LLMs
if LOG_LEVEL=="DEBUG":
verbose_models = True
else:
verbose_models = False

# define internal configuration parameters
# token limit for retrieval chain
max_token_limit = 4000

# does chain return the source documents?
return_source_documents = True
Expand All @@ -66,65 +76,40 @@ def get_language_by_code(language_code):


chat_template = """
You are a conversational agent. Use the following step-by-step instructions to respond to user inputs.
You are a friendly conversational agent. Use the following step-by-step instructions to respond to user inputs.
1 - The text provided in the context delimited by triple pluses may contain questions. Remove those questions from the context.
2 - The text provided in the chat history delimited by triple hashes provides the early part of the chat. Read it well so you can take it into account in answering the question.
3 - Provide a single paragragh answer that is polite and professional taking into account the context delimited by triple pluses. If the answer cannot be found within the context, write 'I could not find an answer to your question'.
2 - Provide an up to three paragraghs answer that is accurate and exthausive, taking into account the context delimited by triple pluses.
If the answer cannot be found within the context, write 'I could not find an answer to your question'.
3 - Only return the answer from step 2, do not show any code or additional information.
4 - If the question is in a different language than English, translate the question to English before answering.
5 - Answer the question in the {language} language.
+++
Context:
{context}
+++
###
Chat history:
{chat_history}
###
Question: {question}
"""

custom_question_template = """"
Combine the chat history and follow up question into a standalone question.
condense_question_template = """"
Combine the chat history delimited by triple pluses and follow-up question into a single standalone question that does justice to the follow-up question. Do only return the standalone question, do not return any other information.
+++
Chat History: {chat_history}
+++
Follow up question: {question}
+++
Follow-up question: {question}
---
Standalone question:
"""

translate_template = """"
Act as a professional translator. Use the following step-by-step instructions:
1: assess in what language input below delimited by triple pluses is written.
2. carry out one of tasks A or B below:
A: if the input language is different from {language} then translate the input below delimited by triple pluses to natural {language} language, maintaining tone of voice and length
B: if the input language is the same as {language} there is no need for translation, simply return the original input below delimited by triple pluses as the answer.
3. Only return the answer from step 2, do not show any code or additional information.
+++
input:
{answer}
+++
Translated input:
"""

custom_question_prompt = PromptTemplate(
template=custom_question_template, input_variables=["chat_history", "question"]
)
condense_question_prompt = PromptTemplate.from_template(condense_question_template)

translation_prompt = PromptTemplate(
template=translate_template, input_variables=["language", "answer"]
)

# prompt to be used by retrieval chain, note this is the default prompt name, so nowhere assigned
QA_PROMPT = PromptTemplate(
template=chat_template, input_variables=["question", "context", "chat_history"]
)
chat_prompt = ChatPromptTemplate.from_template(chat_template)


generic_llm = AzureOpenAI(azure_deployment=os.environ["LLM_DEPLOYMENT_NAME"],
temperature=0, verbose=verbose_models)

question_generator = LLMChain(llm=generic_llm, prompt=custom_question_prompt, verbose=verbose_models)


embeddings = AzureOpenAIEmbeddings(
azure_deployment=config['embeddings_deployment_name'],
openai_api_version=config['openai_api_version'],
Expand All @@ -142,32 +127,99 @@ def get_language_by_code(language_code):
def_ingest.mainapp(config['source_website'], config['source_website2'])

vectorstore = FAISS.load_local(vectordb_path, embeddings)
retriever = vectorstore.as_retriever()
retriever = vectorstore.as_retriever(search_type="similarity_score_threshold", search_kwargs={"score_threshold": .5})

chat_llm = AzureChatOpenAI(azure_deployment=os.environ["LLM_DEPLOYMENT_NAME"],
temperature=os.environ["AI_MODEL_TEMPERATURE"],
max_tokens=max_token_limit)

doc_chain = load_qa_chain(generic_llm, chain_type="stuff", prompt=QA_PROMPT, verbose=verbose_models)

def translate_answer(answer, language):
translate_llm = AzureChatOpenAI(azure_deployment=os.environ["LLM_DEPLOYMENT_NAME"],
temperature=0, verbose=verbose_models)
prompt = translation_prompt.format(answer=answer, language=language)
return translate_llm(prompt)


def setup_chain():

conversation_chain = ConversationalRetrievalChain.from_llm(
llm=chat_llm,
retriever=retriever,
condense_question_prompt=custom_question_prompt,
chain_type="stuff",
verbose=verbose_models,
condense_question_llm=generic_llm,
return_source_documents=return_source_documents,
combine_docs_chain_kwargs={"prompt": QA_PROMPT}
max_tokens=max_token_limit, verbose=verbose_models)

condense_llm = AzureChatOpenAI(azure_deployment=os.environ["LLM_DEPLOYMENT_NAME"],
temperature=0,
verbose=verbose_models)

def format_docs(docs):
return "\n\n".join(doc.page_content for doc in docs)

DEFAULT_DOCUMENT_PROMPT = PromptTemplate.from_template(template="{page_content}")

def _combine_documents(
docs, document_prompt=DEFAULT_DOCUMENT_PROMPT, document_separator="\n\n"
):
doc_strings = [format_document(doc, document_prompt) for doc in docs]
return document_separator.join(doc_strings)


async def query_chain(question, language, chat_history):

# check whether the chat history is empty
if chat_history.buffer == []:
first_call = True
else:
first_call = False

# add first_call to the question
question.update({"first_call": first_call})

logger.info(f"first call: {first_call}\n")
logger.debug(f"chat history: {chat_history.buffer}\n")

# First we add a step to load memory
# This adds a "memory" key to the input object
loaded_memory = RunnablePassthrough.assign(
chat_history=RunnableLambda(chat_history.load_memory_variables) | itemgetter("history"),
)

logger.debug(f"loaded memory {loaded_memory}\n")
logger.debug(f"chat history {chat_history}\n")

# Now we calculate the standalone question if the chat_history is not empty
standalone_question = {
"standalone_question": {
"question": lambda x: x["question"],
"chat_history": lambda x: get_buffer_string(x["chat_history"]),
}
| condense_question_prompt
| condense_llm
| StrOutputParser(),
}

# pass the question directly on the first call in a chat sequence of the chatbot
direct_question = {
"question": lambda x: x["question"],
}
# Now we retrieve the documents
# in case it is the first call (chat history empty)
retrieved_documents = {
"docs": itemgetter("question") | retriever,
"question": lambda x: x["question"],
}
# or when the chat history is not empty, rephrase the question taking into account the chat history
retrieved_documents_sa = {
"docs": itemgetter("standalone_question") | retriever,
"question": lambda x: x["standalone_question"],
}


# Now we construct the inputs for the final prompt
final_inputs = {
"context": lambda x: _combine_documents(x["docs"]),
"question": itemgetter("question"),
"language": lambda x: language['language'],
}

# And finally, we do the part that returns the answers
answer = {
"answer": final_inputs | chat_prompt | chat_llm,
"docs": itemgetter("docs"),
}


# And now we put it all together in a 'RunnableBranch', so we only invoke the rephrasing part when the chat history is not empty
final_chain = RunnableBranch(
(lambda x: x["first_call"], loaded_memory | direct_question | retrieved_documents | answer),
loaded_memory | standalone_question | retrieved_documents_sa | answer,
)

return conversation_chain
logger.debug(f"final chain {final_chain}\n")
result = await final_chain.ainvoke(question)
return {'answer': result['answer'], 'source_documents': result['docs']}
Loading

0 comments on commit 1a18f49

Please sign in to comment.