Skip to content

Commit

Permalink
Merge pull request #27 from alkem-io/develop
Browse files Browse the repository at this point in the history
Release: Improve chat, logging and ingest functionality
  • Loading branch information
valentinyanakiev authored Oct 4, 2023
2 parents 23db36e + 93d36f4 commit ec5a40f
Show file tree
Hide file tree
Showing 9 changed files with 389 additions and 256 deletions.
6 changes: 5 additions & 1 deletion .azure-template.env
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,8 @@ AI_DEPLOYMENT_NAME=deploy-gpt-35-turbo
AI_EMBEDDINGS_DEPLOYMENT_NAME=embedding
AI_SOURCE_WEBSITE=https://www.alkemio.org
AI_LOCAL_PATH=~/alkemio/data
AI_WEBSITE_REPO=https://github.com/alkem-io/website.git
AI_WEBSITE_REPO=https://github.com/alkem-io/website.git
LANGCHAIN_TRACING_V2=true
LANGCHAIN_ENDPOINT="https://api.smith.langchain.com"
LANGCHAIN_API_KEY="langsmith-api-key"
LANGCHAIN_PROJECT="guidance-engine"
4 changes: 4 additions & 0 deletions .openai-template.env
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,7 @@ AI_MODEL_NAME=gpt-35-turbo
AI_SOURCE_WEBSITE=https://www.alkemio.org
AI_LOCAL_PATH=~/alkemio/data
AI_WEBSITE_REPO=https://github.com/alkem-io/website.git
LANGCHAIN_TRACING_V2=true
LANGCHAIN_ENDPOINT="https://api.smith.langchain.com"
LANGCHAIN_API_KEY="langsmith-api-key"
LANGCHAIN_PROJECT="guidance-engine"
8 changes: 4 additions & 4 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
# Use an official Python runtime as a parent image
FROM python:3-slim-bookworm
FROM python:3.11-slim-bookworm

# Set the working directory in the container to /app
WORKDIR /app

ARG GO_VERSION=1.21.1
ARG HUGO_VERSIOM=0.118.2
ARG HUGO_VERSION=0.118.2
ARG ARCHITECTURE=amd64

# install git, go and hugo
RUN apt update && apt upgrade -y && apt install git wget -y
RUN apt update && apt upgrade -y && apt install -y git wget
RUN wget https://go.dev/dl/go${GO_VERSION}.linux-${ARCHITECTURE}.tar.gz && tar -C /usr/local -xzf go${GO_VERSION}.linux-${ARCHITECTURE}.tar.gz
RUN export PATH=$PATH:/usr/local/go/bin:/usr/local && go version
RUN wget https://github.com/gohugoio/hugo/releases/download/v${HUGO_VERSIOM}/hugo_extended_${HUGO_VERSIOM}_linux-${ARCHITECTURE}.tar.gz && tar -C /usr/local -xzf hugo_extended_${HUGO_VERSIOM}_linux-${ARCHITECTURE}.tar.gz && ls -al /usr/local
RUN wget https://github.com/gohugoio/hugo/releases/download/v${HUGO_VERSION}/hugo_extended_${HUGO_VERSION}_linux-${ARCHITECTURE}.tar.gz && tar -C /usr/local -xzf hugo_extended_${HUGO_VERSION}_linux-${ARCHITECTURE}.tar.gz && ls -al /usr/local
RUN /usr/local/hugo version

# Install Poetry
Expand Down
92 changes: 70 additions & 22 deletions ai_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,45 @@
from langchain.chat_models import AzureChatOpenAI
from langchain.chains import ConversationalRetrievalChain, LLMChain
from langchain.chains.question_answering import load_qa_chain
from langchain.chains.conversational_retrieval.prompts import CONDENSE_QUESTION_PROMPT, QA_PROMPT
from langchain.chains.conversational_retrieval.prompts import QA_PROMPT
import logging
import def_ingest
from config import config, website_source_path, website_generated_path, vectordb_path, local_path, generate_website, LOG_LEVEL

import os

# configure logging
logger = logging.getLogger(__name__)

# Create handlers
c_handler = logging.StreamHandler()
f_handler = logging.FileHandler(local_path+'/app.log')

c_handler.setLevel(level=getattr(logging, LOG_LEVEL))
f_handler.setLevel(logging.ERROR)

# Create formatters and add them to handlers
c_format = logging.Formatter('%(name)s - %(levelname)s - %(message)s')
f_format = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
c_handler.setFormatter(c_format)
f_handler.setFormatter(f_format)

# Add handlers to the logger
logger.addHandler(c_handler)
logger.addHandler(f_handler)

# verbose output for LLMs
if LOG_LEVEL=="DEBUG":
verbose_models = True
else:
verbose_models = False

# define internal configuration parameters
# token limit for retrieval chain
max_token_limit = 2000
# verbose output for LLMs
verbose_models = True
# doews chain return the source documents?
return_source_document = True
max_token_limit = 4000

# does chain return the source documents?
return_source_documents = True


# Define a dictionary containing country codes as keys and related languages as values
Expand All @@ -40,11 +68,16 @@ def get_language_by_code(language_code):
chat_template = """
You are a conversational agent. Use the following step-by-step instructions to respond to user inputs.
1 - The text provided in the context delimited by triple pluses may contain questions. Remove those questions from the context.
2 - Provide a single paragragh answer that is polite and professional taking into account the context delimited by triple pluses. If the answer cannot be found within the context, write 'I could not find an answer to your question'.
2 - The text provided in the chat history delimited by triple hashes provides the early part of the chat. Read it well so you can take it into account in answering the question.
3 - Provide a single paragragh answer that is polite and professional taking into account the context delimited by triple pluses. If the answer cannot be found within the context, write 'I could not find an answer to your question'.
+++
Context:
{context}
+++
###
Chat history:
{chat_history}
###
Question: {question}
"""

Expand Down Expand Up @@ -82,28 +115,42 @@ def get_language_by_code(language_code):

# prompt to be used by retrieval chain, note this is the default prompt name, so nowhere assigned
QA_PROMPT = PromptTemplate(
template=chat_template, input_variables=["question", "context"]
template=chat_template, input_variables=["question", "context", "chat_history"]
)

generic_llm = AzureOpenAI(deployment_name=os.environ["AI_DEPLOYMENT_NAME"], model_name=os.environ["AI_MODEL_NAME"],
temperature=0, verbose=verbose_models)

question_generator = LLMChain(llm=generic_llm, prompt=custom_question_prompt, verbose=verbose_models)

embeddings = OpenAIEmbeddings(deployment=os.environ["AI_EMBEDDINGS_DEPLOYMENT_NAME"], chunk_size=1)

# Check if the vector database exists
if os.path.exists(vectordb_path+"/index.pkl"):
logger.info(f"The file vector database is present")
else:
# ingest data
if generate_website:
def_ingest.clone_and_generate(config['website_repo'], website_generated_path, website_source_path)
def_ingest.mainapp(config['source_website'])

vectorstore = FAISS.load_local(vectordb_path, embeddings)
retriever = vectorstore.as_retriever()

chat_llm = AzureChatOpenAI(deployment_name=os.environ["AI_DEPLOYMENT_NAME"],
model_name=os.environ["AI_MODEL_NAME"], temperature=os.environ["AI_MODEL_TEMPERATURE"],
max_tokens=max_token_limit)

doc_chain = load_qa_chain(generic_llm, chain_type="stuff", prompt=QA_PROMPT, verbose=verbose_models)

def translate_answer(answer, language):
translate_llm = AzureOpenAI(deployment_name=os.environ["AI_DEPLOYMENT_NAME"], model_name=os.environ["AI_MODEL_NAME"],
translate_llm = AzureChatOpenAI(deployment_name=os.environ["AI_DEPLOYMENT_NAME"], model_name=os.environ["AI_MODEL_NAME"],
temperature=0, verbose=verbose_models)
prompt = translation_prompt.format(answer=answer, language=language)
return translate_llm(prompt)


def setup_chain(db_path):
generic_llm = AzureOpenAI(deployment_name=os.environ["AI_DEPLOYMENT_NAME"], model_name=os.environ["AI_MODEL_NAME"],
temperature=0, verbose=verbose_models)

embeddings = OpenAIEmbeddings(deployment=os.environ["AI_EMBEDDINGS_DEPLOYMENT_NAME"], chunk_size=1)

vectorstore = FAISS.load_local(db_path, embeddings)
retriever = vectorstore.as_retriever()

chat_llm = AzureChatOpenAI(deployment_name=os.environ["AI_DEPLOYMENT_NAME"],
model_name=os.environ["AI_MODEL_NAME"], temperature=os.environ["AI_MODEL_TEMPERATURE"],
max_tokens=max_token_limit)
def setup_chain():

conversation_chain = ConversationalRetrievalChain.from_llm(
llm=chat_llm,
Expand All @@ -112,7 +159,8 @@ def setup_chain(db_path):
chain_type="stuff",
verbose=verbose_models,
condense_question_llm=generic_llm,
return_source_documents=True,
return_source_documents=return_source_documents,
combine_docs_chain_kwargs={"prompt": QA_PROMPT}
)

return conversation_chain
92 changes: 52 additions & 40 deletions app.py
Original file line number Diff line number Diff line change
@@ -1,86 +1,98 @@
from langchain.callbacks import get_openai_callback
import os
import pika
import json
import ai_utils
import logging
import def_ingest
from config import config, website_source_path, website_generated_path, vectordb_path, generate_website
from config import config, website_source_path, website_generated_path, vectordb_path, generate_website, local_path, LOG_LEVEL

# configure logging
logger = logging.getLogger(__name__)

# Create handlers
c_handler = logging.StreamHandler()
f_handler = logging.FileHandler(local_path+'/app.log')

c_handler.setLevel(level=getattr(logging, LOG_LEVEL))
f_handler.setLevel(logging.WARNING)

# Create formatters and add them to handlers
c_format = logging.Formatter('%(name)s - %(levelname)s - %(message)s')
f_format = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
c_handler.setFormatter(c_format)
f_handler.setFormatter(f_format)

# Add handlers to the logger
logger.addHandler(c_handler)
logger.addHandler(f_handler)

user_data = {}
user_chain = {}

credentials = pika.PlainCredentials(config['rabbitmq_user'],
config['rabbitmq_password'])
parameters = pika.ConnectionParameters(host=config['rabbitmq_host'],
credentials=credentials)
print(f"\About to connect to RabbitMQ with params {config['rabbitmq_user']}: {config['rabbitmq_host']}\n")
logger.info(f"\About to connect to RabbitMQ with params {config['rabbitmq_user']}: {config['rabbitmq_host']}\n")
connection = pika.BlockingConnection(parameters)
channel = connection.channel()

channel.queue_declare(queue=config['rabbitmqrequestqueue'])

# Check if the vector database exists
if os.path.exists(vectordb_path+"/index.pkl"):
print(f"The file vector database is present")
else:
# ingest data
if generate_website:
def_ingest.clone_and_generate(config['website_repo'], website_generated_path, website_source_path)
def_ingest.mainapp(config['source_website'])

qa_chain = ai_utils.setup_chain(vectordb_path)

def query(user_id, query, language_code):
print(f"\nQuery from user {user_id}: {query}\n")
logger.info(f"\nQuery from user {user_id}: {query}\n")

if user_id not in user_data:
user_chain[user_id]=ai_utils.setup_chain()
reset(user_id)
chat_history=[]

user_data[user_id]['language'] = ai_utils.get_language_by_code(language_code)

print(f"\nlanguage: {user_data[user_id]['language']}\n")
logger.debug(f"\nlanguage: {user_data[user_id]['language']}\n")
chat_history = user_data[user_id]['chat_history']

# llm_result =ai_utils.qa_chain(
# query,
# chat_history,
# user_data[user_id]['language']
# )
with get_openai_callback() as cb:
llm_result = qa_chain({"question": query, "chat_history": chat_history})
#translation = ai_utils.translate_answer(llm_result['answer'], user_data[user_id]['language'])
translation = llm_result['answer']
llm_result = user_chain[user_id]({"question": query, "chat_history": chat_history})
answer = llm_result['answer']

print(f"\nTotal Tokens: {cb.total_tokens}")
print(f"\nPrompt Tokens: {cb.prompt_tokens}")
print(f"\nCompletion Tokens: {cb.completion_tokens}")
print(f"\nTotal Cost (USD): ${cb.total_cost}")

print(f"\n\nLLM result: {llm_result}\n\n")
print(f"\n\ntranslation result: {translation}\n\n")
# clean up the document sources to avoid sending too much information over.
sources = [doc.metadata['source'] for doc in llm_result['source_documents']]



logger.info(f"\nTotal Tokens: {cb.total_tokens}")
logger.info(f"\nPrompt Tokens: {cb.prompt_tokens}")
logger.info(f"\nCompletion Tokens: {cb.completion_tokens}")
logger.info(f"\nTotal Cost (USD): ${cb.total_cost}")

logger.debug(f"\n\nLLM result: {llm_result}\n\n")
logger.info(f"\n\nanswer: {answer}\n\n")
logger.debug(f"\n\nsources: {sources}\n\ n")

formatted_messages = (
f"Human:'{llm_result['question']}'",
f"AI:'{llm_result['answer']}'"
f"Assistant:'{llm_result['answer']}'"
)
user_data[user_id]['chat_history'].append(formatted_messages)

# only keep the last 3 entires of that chat history to avoid exceeding the token limit.
# only keep the last 3 entries of that chat history to avoid exceeding the token limit.
user_data[user_id]['chat_history'] = user_data[user_id]['chat_history'][-3:]

print(f"new chat history {user_data[user_id]['chat_history']}")

logger.debug(f"new chat history {user_data[user_id]['chat_history']}")
response = json.dumps({
"question": str(llm_result["question"]), "answer": str(translation), "sources": str(llm_result["source_documents"]), "prompt_tokens": cb.prompt_tokens, "completion_tokens": cb.completion_tokens, "total_tokens": cb.total_tokens, "total_cost": cb.total_cost
"question": str(llm_result["question"]), "answer": str(answer), "sources": str(llm_result["source_documents"]), "prompt_tokens": cb.prompt_tokens, "completion_tokens": cb.completion_tokens, "total_tokens": cb.total_tokens, "total_cost": cb.total_cost
}
)

return response

def reset(user_id):

user_data[user_id] = {
'chat_history': []
}

return "Reset function executed"

def ingest(source_url, website_repo, destination_path, source_path):
Expand Down Expand Up @@ -119,13 +131,13 @@ def on_request(ch, method, props, body):
)

ch.basic_ack(delivery_tag=method.delivery_tag)
print(f"Response sent for correlation_id: {props.correlation_id}")
print(f"Response sent to: {props.reply_to}")
print(f"response: {response}")
logger.info(f"Response sent for correlation_id: {props.correlation_id}")
logger.info(f"Response sent to: {props.reply_to}")
logger.info(f"response: {response}")


channel.basic_qos(prefetch_count=1)
channel.basic_consume(queue=config['rabbitmqrequestqueue'], on_message_callback=on_request)

print("Waiting for RPC requests")
logger.info("Waiting for RPC requests")
channel.start_consuming()
2 changes: 2 additions & 0 deletions config.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,5 @@
website_generated_path = local_path + '/website/generated'
vectordb_path = local_path + "/vectordb"
generate_website = True

LOG_LEVEL = 'DEBUG' # Possible values: 'DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'
Loading

0 comments on commit ec5a40f

Please sign in to comment.