Merge pull request #27 from alkem-io/develop

Release: Improve chat, logging and ingest functionality
alkem-io · Oct 4, 2023 · ec5a40f · ec5a40f
2 parents 23db36e + 93d36f4
commit ec5a40f
Show file tree

Hide file tree

Showing 9 changed files with 389 additions and 256 deletions.
diff --git a/.azure-template.env b/.azure-template.env
@@ -11,4 +11,8 @@ AI_DEPLOYMENT_NAME=deploy-gpt-35-turbo
 AI_EMBEDDINGS_DEPLOYMENT_NAME=embedding
 AI_SOURCE_WEBSITE=https://www.alkemio.org
 AI_LOCAL_PATH=~/alkemio/data
-AI_WEBSITE_REPO=https://github.com/alkem-io/website.git
+AI_WEBSITE_REPO=https://github.com/alkem-io/website.git
+LANGCHAIN_TRACING_V2=true
+LANGCHAIN_ENDPOINT="https://api.smith.langchain.com"
+LANGCHAIN_API_KEY="langsmith-api-key"
+LANGCHAIN_PROJECT="guidance-engine"
diff --git a/.openai-template.env b/.openai-template.env
@@ -9,3 +9,7 @@ AI_MODEL_NAME=gpt-35-turbo
 AI_SOURCE_WEBSITE=https://www.alkemio.org
 AI_LOCAL_PATH=~/alkemio/data
 AI_WEBSITE_REPO=https://github.com/alkem-io/website.git
+LANGCHAIN_TRACING_V2=true
+LANGCHAIN_ENDPOINT="https://api.smith.langchain.com"
+LANGCHAIN_API_KEY="langsmith-api-key"
+LANGCHAIN_PROJECT="guidance-engine"
diff --git a/Dockerfile b/Dockerfile
@@ -1,18 +1,18 @@
 # Use an official Python runtime as a parent image
-FROM python:3-slim-bookworm
+FROM python:3.11-slim-bookworm
 
 # Set the working directory in the container to /app
 WORKDIR /app
 
 ARG GO_VERSION=1.21.1
-ARG HUGO_VERSIOM=0.118.2
+ARG HUGO_VERSION=0.118.2
 ARG ARCHITECTURE=amd64
 
 # install git, go and hugo
-RUN  apt update &&   apt upgrade -y && apt install git wget -y
+RUN  apt update && apt upgrade -y && apt install -y git wget
 RUN wget https://go.dev/dl/go${GO_VERSION}.linux-${ARCHITECTURE}.tar.gz && tar -C /usr/local -xzf go${GO_VERSION}.linux-${ARCHITECTURE}.tar.gz 
 RUN export PATH=$PATH:/usr/local/go/bin:/usr/local && go version
-RUN wget https://github.com/gohugoio/hugo/releases/download/v${HUGO_VERSIOM}/hugo_extended_${HUGO_VERSIOM}_linux-${ARCHITECTURE}.tar.gz && tar -C /usr/local -xzf hugo_extended_${HUGO_VERSIOM}_linux-${ARCHITECTURE}.tar.gz && ls -al /usr/local
+RUN wget https://github.com/gohugoio/hugo/releases/download/v${HUGO_VERSION}/hugo_extended_${HUGO_VERSION}_linux-${ARCHITECTURE}.tar.gz && tar -C /usr/local -xzf hugo_extended_${HUGO_VERSION}_linux-${ARCHITECTURE}.tar.gz && ls -al /usr/local
 RUN /usr/local/hugo version
 
 # Install Poetry

diff --git a/ai_utils.py b/ai_utils.py
@@ -5,17 +5,45 @@
 from langchain.chat_models import AzureChatOpenAI
 from langchain.chains import ConversationalRetrievalChain, LLMChain
 from langchain.chains.question_answering import load_qa_chain
-from langchain.chains.conversational_retrieval.prompts import CONDENSE_QUESTION_PROMPT, QA_PROMPT
+from langchain.chains.conversational_retrieval.prompts import QA_PROMPT
+import logging
+import def_ingest
+from config import config, website_source_path, website_generated_path, vectordb_path, local_path, generate_website, LOG_LEVEL
 
 import os
 
+# configure logging
+logger = logging.getLogger(__name__)
+
+# Create handlers
+c_handler = logging.StreamHandler()
+f_handler = logging.FileHandler(local_path+'/app.log')
+
+c_handler.setLevel(level=getattr(logging, LOG_LEVEL))
+f_handler.setLevel(logging.ERROR)
+
+# Create formatters and add them to handlers
+c_format = logging.Formatter('%(name)s - %(levelname)s - %(message)s')
+f_format = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+c_handler.setFormatter(c_format)
+f_handler.setFormatter(f_format)
+
+# Add handlers to the logger
+logger.addHandler(c_handler)
+logger.addHandler(f_handler)
+
+# verbose output for LLMs
+if LOG_LEVEL=="DEBUG":
+    verbose_models = True
+else:
+    verbose_models = False
+
 # define internal configuration parameters
 # token limit for retrieval chain
-max_token_limit = 2000
-# verbose output for LLMs
-verbose_models = True
-# doews chain return the source documents?
-return_source_document = True
+max_token_limit = 4000
+
+# does chain return the source documents?
+return_source_documents = True
 
 
 # Define a dictionary containing country codes as keys and related languages as values
@@ -40,11 +68,16 @@ def get_language_by_code(language_code):
 chat_template = """
 You are a conversational agent. Use the following step-by-step instructions to respond to user inputs.
 1 - The text provided in the context delimited by triple pluses may contain questions. Remove those questions from the context. 
-2 - Provide a single paragragh answer that is polite and professional taking into account the context delimited by triple pluses. If the answer cannot be found within the context, write 'I could not find an answer to your question'.
+2 - The text provided in the chat history delimited by triple hashes provides the early part of the chat. Read it well so you can take it into account in answering the question.
+3 - Provide a single paragragh answer that is polite and professional taking into account the context delimited by triple pluses. If the answer cannot be found within the context, write 'I could not find an answer to your question'.
 +++
 Context:
 {context}
 +++
+###
+Chat history:
+{chat_history}
+###
 Question: {question}
 """
 
@@ -82,28 +115,42 @@ def get_language_by_code(language_code):
 
 # prompt to be used by retrieval chain, note this is the default prompt name, so nowhere assigned
 QA_PROMPT = PromptTemplate(
-    template=chat_template, input_variables=["question", "context"]
+    template=chat_template, input_variables=["question", "context", "chat_history"]
 )
 
+generic_llm = AzureOpenAI(deployment_name=os.environ["AI_DEPLOYMENT_NAME"], model_name=os.environ["AI_MODEL_NAME"],
+                            temperature=0, verbose=verbose_models)
+
+question_generator = LLMChain(llm=generic_llm, prompt=custom_question_prompt, verbose=verbose_models)
+
+embeddings = OpenAIEmbeddings(deployment=os.environ["AI_EMBEDDINGS_DEPLOYMENT_NAME"], chunk_size=1)
+
+# Check if the vector database exists
+if os.path.exists(vectordb_path+"/index.pkl"):
+    logger.info(f"The file vector database is present")
+else:
+    # ingest data
+    if generate_website:
+        def_ingest.clone_and_generate(config['website_repo'], website_generated_path, website_source_path)
+    def_ingest.mainapp(config['source_website'])
+
+vectorstore = FAISS.load_local(vectordb_path, embeddings)
+retriever = vectorstore.as_retriever()
+
+chat_llm = AzureChatOpenAI(deployment_name=os.environ["AI_DEPLOYMENT_NAME"],
+                            model_name=os.environ["AI_MODEL_NAME"], temperature=os.environ["AI_MODEL_TEMPERATURE"],
+                            max_tokens=max_token_limit)
+
+doc_chain = load_qa_chain(generic_llm, chain_type="stuff", prompt=QA_PROMPT, verbose=verbose_models)
+
 def translate_answer(answer, language):
-    translate_llm = AzureOpenAI(deployment_name=os.environ["AI_DEPLOYMENT_NAME"], model_name=os.environ["AI_MODEL_NAME"],
+    translate_llm = AzureChatOpenAI(deployment_name=os.environ["AI_DEPLOYMENT_NAME"], model_name=os.environ["AI_MODEL_NAME"],
                                 temperature=0, verbose=verbose_models)
     prompt = translation_prompt.format(answer=answer, language=language)
     return translate_llm(prompt)
 
 
-def setup_chain(db_path):
-    generic_llm = AzureOpenAI(deployment_name=os.environ["AI_DEPLOYMENT_NAME"], model_name=os.environ["AI_MODEL_NAME"],
-                              temperature=0, verbose=verbose_models)
-
-    embeddings = OpenAIEmbeddings(deployment=os.environ["AI_EMBEDDINGS_DEPLOYMENT_NAME"], chunk_size=1)
-
-    vectorstore = FAISS.load_local(db_path, embeddings)
-    retriever = vectorstore.as_retriever()
-
-    chat_llm = AzureChatOpenAI(deployment_name=os.environ["AI_DEPLOYMENT_NAME"],
-                               model_name=os.environ["AI_MODEL_NAME"], temperature=os.environ["AI_MODEL_TEMPERATURE"],
-                               max_tokens=max_token_limit)
+def setup_chain():
 
     conversation_chain = ConversationalRetrievalChain.from_llm(
         llm=chat_llm,
@@ -112,7 +159,8 @@ def setup_chain(db_path):
         chain_type="stuff",
         verbose=verbose_models,
         condense_question_llm=generic_llm,
-        return_source_documents=True,
+        return_source_documents=return_source_documents,
         combine_docs_chain_kwargs={"prompt": QA_PROMPT}
     )
+
     return conversation_chain
diff --git a/app.py b/app.py
@@ -1,86 +1,98 @@
 from langchain.callbacks import get_openai_callback
-import os
 import pika
 import json
 import ai_utils
+import logging
 import def_ingest
-from config import config, website_source_path, website_generated_path, vectordb_path, generate_website
+from config import config, website_source_path, website_generated_path, vectordb_path, generate_website, local_path, LOG_LEVEL
+
+# configure logging
+logger = logging.getLogger(__name__)
+
+# Create handlers
+c_handler = logging.StreamHandler()
+f_handler = logging.FileHandler(local_path+'/app.log')
+
+c_handler.setLevel(level=getattr(logging, LOG_LEVEL))
+f_handler.setLevel(logging.WARNING)
+
+# Create formatters and add them to handlers
+c_format = logging.Formatter('%(name)s - %(levelname)s - %(message)s')
+f_format = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+c_handler.setFormatter(c_format)
+f_handler.setFormatter(f_format)
+
+# Add handlers to the logger
+logger.addHandler(c_handler)
+logger.addHandler(f_handler)
 
 user_data = {}
+user_chain = {}
 
 credentials = pika.PlainCredentials(config['rabbitmq_user'],
                                     config['rabbitmq_password'])
 parameters = pika.ConnectionParameters(host=config['rabbitmq_host'],
                                        credentials=credentials)
-print(f"\About to connect to RabbitMQ with params {config['rabbitmq_user']}: {config['rabbitmq_host']}\n")
+logger.info(f"\About to connect to RabbitMQ with params {config['rabbitmq_user']}: {config['rabbitmq_host']}\n")
 connection = pika.BlockingConnection(parameters)
 channel = connection.channel()
 
 channel.queue_declare(queue=config['rabbitmqrequestqueue'])
 
-# Check if the vector database exists
-if os.path.exists(vectordb_path+"/index.pkl"):
-    print(f"The file vector database is present")
-else:
-    # ingest data
-    if generate_website:
-        def_ingest.clone_and_generate(config['website_repo'], website_generated_path, website_source_path)
-    def_ingest.mainapp(config['source_website'])
-
-qa_chain = ai_utils.setup_chain(vectordb_path)
-
 def query(user_id, query, language_code):
-    print(f"\nQuery from user {user_id}: {query}\n")
+    logger.info(f"\nQuery from user {user_id}: {query}\n")
 
     if user_id not in user_data:
+        user_chain[user_id]=ai_utils.setup_chain()
         reset(user_id)
+        chat_history=[]
 
     user_data[user_id]['language'] = ai_utils.get_language_by_code(language_code)
 
-    print(f"\nlanguage: {user_data[user_id]['language']}\n")
+    logger.debug(f"\nlanguage: {user_data[user_id]['language']}\n")
     chat_history = user_data[user_id]['chat_history']
 
-    # llm_result =ai_utils.qa_chain(
-    #    query,
-    #    chat_history,
-    #    user_data[user_id]['language']
-    # )
     with get_openai_callback() as cb:
-        llm_result = qa_chain({"question": query, "chat_history": chat_history})
-        #translation = ai_utils.translate_answer(llm_result['answer'], user_data[user_id]['language'])
-        translation = llm_result['answer']
+        llm_result = user_chain[user_id]({"question": query, "chat_history": chat_history})
+        answer = llm_result['answer']
 
-    print(f"\nTotal Tokens: {cb.total_tokens}")
-    print(f"\nPrompt Tokens: {cb.prompt_tokens}")
-    print(f"\nCompletion Tokens: {cb.completion_tokens}")
-    print(f"\nTotal Cost (USD): ${cb.total_cost}")
 
-    print(f"\n\nLLM result: {llm_result}\n\n")
-    print(f"\n\ntranslation result: {translation}\n\n")
+    # clean up the document sources to avoid sending too much information over.
+    sources = [doc.metadata['source'] for doc in llm_result['source_documents']]
+
+
+
+    logger.info(f"\nTotal Tokens: {cb.total_tokens}")
+    logger.info(f"\nPrompt Tokens: {cb.prompt_tokens}")
+    logger.info(f"\nCompletion Tokens: {cb.completion_tokens}")
+    logger.info(f"\nTotal Cost (USD): ${cb.total_cost}")
+
+    logger.debug(f"\n\nLLM result: {llm_result}\n\n")
+    logger.info(f"\n\nanswer: {answer}\n\n")
+    logger.debug(f"\n\nsources: {sources}\n\ n")
 
     formatted_messages = (
         f"Human:'{llm_result['question']}'",
-        f"AI:'{llm_result['answer']}'"
+        f"Assistant:'{llm_result['answer']}'"
     )
     user_data[user_id]['chat_history'].append(formatted_messages)
 
-    # only keep the last 3 entires of that chat history to avoid exceeding the token limit.
+    # only keep the last 3 entries of that chat history to avoid exceeding the token limit.
     user_data[user_id]['chat_history'] = user_data[user_id]['chat_history'][-3:]
 
-    print(f"new chat history {user_data[user_id]['chat_history']}")
-
+    logger.debug(f"new chat history {user_data[user_id]['chat_history']}")
     response = json.dumps({
-        "question": str(llm_result["question"]), "answer": str(translation), "sources": str(llm_result["source_documents"]), "prompt_tokens": cb.prompt_tokens, "completion_tokens": cb.completion_tokens, "total_tokens": cb.total_tokens, "total_cost": cb.total_cost
+        "question": str(llm_result["question"]), "answer": str(answer), "sources": str(llm_result["source_documents"]), "prompt_tokens": cb.prompt_tokens, "completion_tokens": cb.completion_tokens, "total_tokens": cb.total_tokens, "total_cost": cb.total_cost
     }
     )
 
     return response
 
 def reset(user_id):
+
     user_data[user_id] = {
         'chat_history': []
     }
-
     return "Reset function executed"
 
 def ingest(source_url, website_repo, destination_path, source_path):
@@ -119,13 +131,13 @@ def on_request(ch, method, props, body):
     )
 
     ch.basic_ack(delivery_tag=method.delivery_tag)
-    print(f"Response sent for correlation_id: {props.correlation_id}")
-    print(f"Response sent to: {props.reply_to}")
-    print(f"response: {response}")
+    logger.info(f"Response sent for correlation_id: {props.correlation_id}")
+    logger.info(f"Response sent to: {props.reply_to}")
+    logger.info(f"response: {response}")
 
 
 channel.basic_qos(prefetch_count=1)
 channel.basic_consume(queue=config['rabbitmqrequestqueue'], on_message_callback=on_request)
 
-print("Waiting for RPC requests")
+logger.info("Waiting for RPC requests")
 channel.start_consuming()
diff --git a/config.py b/config.py
@@ -17,3 +17,5 @@
 website_generated_path = local_path + '/website/generated'
 vectordb_path = local_path + "/vectordb"
 generate_website = True
+
+LOG_LEVEL = 'DEBUG'  # Possible values: 'DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'