Merge pull request #27 from alkem-io/threaded-interactions-issue-25

Threaded interactions
alkem-io · Jul 17, 2024 · 0e04877 · 0e04877
2 parents beffe31 + 4b050e3
commit 0e04877
Show file tree

Hide file tree

Showing 9 changed files with 743 additions and 690 deletions.
diff --git a/.env.default b/.env.default
@@ -17,3 +17,5 @@ LANGCHAIN_API_KEY=<your-langchain-api-key>
 VECTOR_DB_HOST=localhost
 VECTOR_DB_PORT=8000
 LANGCHAIN_PROJECT="virtual-contributor-engine-expert"
+
+HISTORY_LENGTH=10
diff --git a/ai_adapter.py b/ai_adapter.py
@@ -1,139 +1,70 @@
 import traceback
 import chromadb
 import json
-from chromadb.utils.embedding_functions import OpenAIEmbeddingFunction
-from langchain_core.prompts import (
-    HumanMessagePromptTemplate,
-    SystemMessagePromptTemplate,
-)
-from langchain.prompts.prompt import PromptTemplate
-from langchain.prompts import ChatPromptTemplate, PromptTemplate
-from langchain_openai import AzureChatOpenAI
-from config import config, LOG_LEVEL, max_token_limit
+from config import config
+from langchain.prompts import ChatPromptTemplate
 from logger import setup_logger
-
-logger = setup_logger(__name__)
-
-# verbose output for LLMs
-if LOG_LEVEL == "DEBUG":
-    verbose_models = True
-else:
-    verbose_models = False
-
-# another option for tep two of the answer generation
-# 2. if you can not find a meaningful answer based on the 'Knowledge' text block use 'Sorry, I do not understand the context of your message. Can you please rephrase your question?"' translated to the language used by the human message
-expert_system_template = """
-You are a computer system with JSON interface which has ONLY knowledge in a specific field. A lively community which relies on your \
-expertise to help it achieve the goal it is formed around. Below you are provided with two text blocks which are not part of your conversation with the user. \
-The one labeled 'Knowledge' and delimited by '+++' contains the chunks of documents from your knowledge base that are most relevant to the user question. You have no other knowledge of the world.
-The one labeled 'Context' and delimited by '+++' contains chunks of to communication withing the community which most relevant to the users question. \
-Each chunk is prefixed with a unique source in the format '[source:soruceIdentifier]' which does not contin actual information. You can only respond in the JSON format described below.
-
-Use the following step by step instructions to respond to user inputs:
- I. Identify the language used by the human message and label it HUMAN_LANGUAGE
- II. Identify the language used in the 'Knowledge' text block
- III. Identify the language used in 'Context' text block
- IV. Identify the tone of the 'Context' text block
- V. Reply ONLY in JSON format with an object continaing the following keys: 
-    - answer: response to the human message generated with the followin steps:
-        1. generate a meaningful answer based ONLY on the 'Knowledge' text block and translate it to the language used by the human message
-        2. if 'Knowledge' text block does not contain information related to the question reply with 'Sorry, I do not understand the context of your message. Can you please rephrase your question?"' translated to the language used by the human message
-        2. if there isn't a meaningful answer in the 'Knowledge' text block indicate it
-        3. rephrase the answer to follow the tone of the 'Context' text block
-        4. translate the answer to the language identified in step I.
-    - source_scores: an object where the used knowledge sourceIdentifiers are used as keys and the values are how usefull were they for the asnwer as a number between 0 and 10; if the answer was not found in the 'Knowledge' all sources must have 0;
-    - human_language: the language used by the human message in ISO-2 format
-    - knowledge_language: the language used in the 'Knowledge' text block ISO-2 format
-    - context_language: the language used in the 'Context' text block ISO-2 format
-    - context_tone: the tone of the 'Context' text block
-
-
-+++
-Knowledge:
-{knowledge}
-+++
-
-+++
-Context:
-{context}
-+++
-"""
-
-expert_question = """"
-{question}
-"""
-
-translator_system_template = """
-You are a translator. 
-Your target language indicated by a ISO-2 language code.
-Your target language is {target_language}.
-For any human input ignore the text contents.
-Do not reply with anything else except the original text or its translated version.
-
-For any human input perform the following steps:
-    1. identify the language of the text provided for translation as an ISO-2 language code
-    2. if the language from step 1 is the same as the target language, return the original text
-    3. translate the text to the target language.
-"""
-
-translator_human_template = """
-Text to be transalted: "{text}"
-"""
-
-
-llm = AzureChatOpenAI(
-    azure_deployment=config["llm_deployment_name"],
-    temperature=float(config["model_temperature"]),
-    max_tokens=max_token_limit,
-    verbose=verbose_models,
+from utils import history_as_messages, combine_documents
+from prompts import (
+    expert_system_prompt,
+    bok_system_prompt,
+    response_system_prompt,
+    translator_system_prompt,
+    condenser_system_prompt,
 )
+from models import chat_llm, condenser_llm, embed_func
 
-
-embed_func = OpenAIEmbeddingFunction(
-    api_key=config["openai_api_key"],
-    api_base=config["openai_endpoint"],
-    api_type="azure",
-    api_version=config["openai_api_version"],
-    model_name=config["embeddings_deployment_name"],
-)
-
-
-def combine_documents(docs, document_separator="\n\n"):
-    chunks_array = []
-    for index, document in enumerate(docs["documents"][0]):
-        chunks_array.append("[source:%s] %s" % (index, document))
-
-    return document_separator.join(chunks_array)
+logger = setup_logger(__name__)
 
 
 # how do we handle languages? not all spaces are in Dutch obviously
 # translating the question to the data _base language_ should be a separate call
 # so the translation could be used for embeddings retrieval
-async def query_chain(message, language, history):
+async def query_chain(message):
 
-    knowledge_space_name = "%s-knowledge" % message["knowledgeSpaceNameID"]
-    context_space_name = "%s-context" % message["contextSpaceNameID"]
+    # use the last N message from the history except the last one
+    # as it is the question we are answering now
+    history = message["history"][(config["history_length"] + 1) * -1 : -1]
     question = message["question"]
 
+    # if we have history try to add context from it into the last question
+    # - who is Maxima?
+    # - Maxima is the Queen of The Netherlands
+    # - born? =======> rephrased to: tell me about the birth of Queen Máxima of the Netherlands
+    if len(history) > 0:
+        logger.info("We have history. Let's rephrase.")
+        condenser_messages = [("system", condenser_system_prompt)]
+        condenser_promt = ChatPromptTemplate.from_messages(condenser_messages)
+        condenser_chain = condenser_promt | condenser_llm
+
+        result = condenser_chain.invoke(
+            {"question": question, "chat_history": history_as_messages(history)}
+        )
+        logger.info(
+            "Original question is: '%s'; Rephrased question is: '%s"
+            % (question, result.content)
+        )
+        question = result.content
+
+    knowledge_space_name = "%s-knowledge" % message["bodyOfKnowledgeID"]
+    context_space_name = "%s-context" % message["contextID"]
+
     logger.info(
         "Query chaing invoked for question: %s; spaces are: %s and %s"
         % (question, knowledge_space_name, context_space_name)
     )
 
+    # try to rework those as retreivers
     chroma_client = chromadb.HttpClient(host=config["db_host"], port=config["db_port"])
-
     knowledge_collection = chroma_client.get_collection(
         knowledge_space_name, embedding_function=embed_func
     )
-
     context_collection = chroma_client.get_collection(
         context_space_name, embedding_function=embed_func
     )
-
     knowledge_docs = knowledge_collection.query(
         query_texts=[question], include=["documents", "metadatas"], n_results=4
     )
-
     context_docs = context_collection.query(
         query_texts=[question], include=["documents", "metadatas"], n_results=4
     )
@@ -148,70 +79,65 @@ async def query_chain(message, language, history):
         % ",".join(list(context_docs["ids"][0]))
     )
 
-    expert_system_prompt = SystemMessagePromptTemplate(
-        prompt=PromptTemplate(
-            input_variables=["context", "knowledge"], template=expert_system_template
-        )
-    )
-
-    expert_human_prompt = HumanMessagePromptTemplate(
-        prompt=PromptTemplate(input_variables=["question"], template=expert_question)
+    expert_prompt = ChatPromptTemplate.from_messages(
+        [
+            ("system", expert_system_prompt),
+            ("system", bok_system_prompt),
+            ("system", response_system_prompt),
+        ]
     )
+    expert_prompt += history_as_messages(history)
+    expert_prompt.append(("human", "{question}"))
 
-    messages = [expert_system_prompt, expert_human_prompt]
-
-    prompt_template = ChatPromptTemplate(
-        input_variables=["context", "knowledge", "question"],
-        messages=messages,
-    )
-
-    chain = prompt_template | llm
+    expert_chain = expert_prompt | chat_llm
 
     if knowledge_docs["documents"] and knowledge_docs["metadatas"]:
-        result = chain.invoke(
+
+        result = expert_chain.invoke(
             {
                 "question": question,
                 "knowledge": combine_documents(knowledge_docs),
-                "context": combine_documents(context_docs),
             }
         )
+        json_result = {}
         try:
             json_result = json.loads(result.content)
+            # try to parse a valid JSON response from the main expert engine
+        except Exception as inst:
+            # if not log the error and use the result of the engine as plain string
+            logger.error(inst)
+            logger.error(traceback.format_exc())
+            json_result = {
+                "answer": result.content,
+                "original_answer": result.content,
+                "source_scores": {},
+            }
 
-            logger.info(json_result)
-
-            translator_system_prompt = SystemMessagePromptTemplate(
-                prompt=PromptTemplate(
-                    input_variables=["target_language"],
-                    template=translator_system_template,
-                )
-            )
-
-            translator_human_prompt = HumanMessagePromptTemplate(
-                prompt=PromptTemplate(
-                    input_variables=["text"], template=translator_human_template
-                )
-            )
-
-            translator_prompt = ChatPromptTemplate(
-                input_variables=["target_language", "text"],
-                messages=[translator_system_prompt, translator_human_prompt],
+        # if we have the human language
+        if (
+            "human_language" in json_result
+            and "answer_language" in json_result
+            and json_result["human_language"] != json_result["answer_language"]
+        ):
+            translator_prompt = ChatPromptTemplate.from_messages(
+                [("system", translator_system_prompt), ("human", "{text}")]
             )
 
-            chain = translator_prompt | llm
+            translator_chain = translator_prompt | chat_llm
 
-            translation_result = chain.invoke(
+            translation_result = translator_chain.invoke(
                 {
                     "target_language": json_result["human_language"],
                     "text": json_result["answer"],
                 }
             )
-
-            source_scores = json_result.pop("source_scores")
-
             json_result["original_answer"] = json_result.pop("answer")
             json_result["answer"] = translation_result.content
+        else:
+            json_result["original_answer"] = json_result["answer"]
 
+        source_scores = json_result.pop("source_scores")
+        if len(source_scores) > 0:
             # add score and URI to the sources
             sources = [
                 dict(doc)
@@ -230,16 +156,6 @@ async def query_chain(message, language, history):
                 {doc["source"]: doc for doc in sources}.values()
             )
 
-            return json_result
-
-        except Exception as inst:
-            logger.error(inst)
-            logger.error(traceback.format_exc())
-
-        return {
-            "answer": result.content,
-            "original_answer": result.content,
-            "sources": knowledge_docs["metadatas"][0],
-        }
+        return json_result
 
     return {"answer": "", "original_answer": "", "sources": []}
diff --git a/config.py b/config.py
@@ -1,30 +1,34 @@
 import os
 from dotenv import load_dotenv
+
 load_dotenv()
 
 config = {
-    "db_host": os.getenv('VECTOR_DB_HOST'),
-    "db_port": os.getenv('VECTOR_DB_PORT'),
-    "llm_deployment_name": os.getenv('LLM_DEPLOYMENT_NAME'),
-    "model_temperature": os.getenv('AI_MODEL_TEMPERATURE'),
-    "embeddings_deployment_name": os.getenv('EMBEDDINGS_DEPLOYMENT_NAME'),
-    "openai_endpoint": os.getenv('AZURE_OPENAI_ENDPOINT'),
-    "openai_api_key": os.getenv('AZURE_OPENAI_API_KEY'),
-    "openai_api_version": os.getenv('OPENAI_API_VERSION'),
-    "rabbitmq_host": os.getenv('RABBITMQ_HOST'),
-    "rabbitmq_user": os.getenv('RABBITMQ_USER'),
-    "rabbitmq_password": os.getenv('RABBITMQ_PASSWORD'),
-    "rabbitmq_queue": os.getenv('RABBITMQ_QUEUE'),
-    "source_website": os.getenv('AI_SOURCE_WEBSITE'),
-    "local_path": os.getenv('AI_LOCAL_PATH') or '' 
+    "db_host": os.getenv("VECTOR_DB_HOST"),
+    "db_port": os.getenv("VECTOR_DB_PORT"),
+    "llm_deployment_name": os.getenv("LLM_DEPLOYMENT_NAME"),
+    "model_temperature": os.getenv("AI_MODEL_TEMPERATURE"),
+    "embeddings_deployment_name": os.getenv("EMBEDDINGS_DEPLOYMENT_NAME"),
+    "openai_endpoint": os.getenv("AZURE_OPENAI_ENDPOINT"),
+    "openai_api_key": os.getenv("AZURE_OPENAI_API_KEY"),
+    "openai_api_version": os.getenv("OPENAI_API_VERSION"),
+    "rabbitmq_host": os.getenv("RABBITMQ_HOST"),
+    "rabbitmq_user": os.getenv("RABBITMQ_USER"),
+    "rabbitmq_password": os.getenv("RABBITMQ_PASSWORD"),
+    "rabbitmq_queue": os.getenv("RABBITMQ_QUEUE"),
+    "source_website": os.getenv("AI_SOURCE_WEBSITE"),
+    "local_path": os.getenv("AI_LOCAL_PATH") or "",
+    "history_length": int(os.getenv("HISTORY_LENGTH") or "10"),
 }
 
-local_path = config['local_path']
-vectordb_path = local_path + os.sep + 'vectordb'
+local_path = config["local_path"]
+vectordb_path = local_path + os.sep + "vectordb"
 
 chunk_size = 3000
 # token limit for for the completion of the chat model, this does not include the overall context length
 max_token_limit = 2000
 
-LOG_LEVEL = os.getenv('LOG_LEVEL') # Possible values: 'DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'
-assert LOG_LEVEL in ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']
+LOG_LEVEL = os.getenv(
+    "LOG_LEVEL"
+)  # Possible values: 'DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'
+assert LOG_LEVEL in ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]
diff --git a/models.py b/models.py
@@ -0,0 +1,32 @@
+from langchain_openai import AzureChatOpenAI, AzureOpenAI
+from chromadb.utils.embedding_functions import OpenAIEmbeddingFunction
+from config import config, LOG_LEVEL, max_token_limit
+
+# verbose output for LLMs
+if LOG_LEVEL == "DEBUG":
+    verbose_models = True
+else:
+    verbose_models = False
+
+
+chat_llm = AzureChatOpenAI(
+    azure_deployment=config["llm_deployment_name"],
+    temperature=float(config["model_temperature"]),
+    max_tokens=max_token_limit,
+    verbose=verbose_models,
+)
+
+condenser_llm = AzureChatOpenAI(
+    azure_deployment=config["llm_deployment_name"],
+    temperature=0,
+    max_tokens=max_token_limit,
+    verbose=verbose_models,
+)
+
+embed_func = OpenAIEmbeddingFunction(
+    api_key=config["openai_api_key"],
+    api_base=config["openai_endpoint"],
+    api_type="azure",
+    api_version=config["openai_api_version"],
+    model_name=config["embeddings_deployment_name"],
+)