-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #23 from alkem-io/develop
* fully refactored code * updated outstanding tasks in README * fix ingestion and make file paths fully configurable * update docker composew file to add rabbitmq server and persist vector database * Update .openai-template.env * add llm usage and cost info * updates after debugging session * make 'docker on Windows proof' and update docker compose file * refactor query chain so a chatLLM is used for the query and 'normal' llm for condensing the question * improve language capabilities * updated generate website script to run on windows - no dash in filename, absolute path for hugo; additional logging * fix additional path issue * moved from shell script to python for the generation * fixed additional path issues; reverted order in app.py * re-enable website generation * misc tidy up * reverted to loading hugo from the path + setting path properly * Refactoring * Made naming more consistent * revert some temperature changes some llm's need zero temperature. * Update ai_utils.py fix typo in prompt * finetuning and dependency updates --------- Co-authored-by: Rene Honig <[email protected]> Co-authored-by: Neil Smyth <[email protected]>
- Loading branch information
Showing
13 changed files
with
932 additions
and
937 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,4 +3,5 @@ | |
openai.env | ||
azure.env | ||
/__pycache__/* | ||
/local_index/* | ||
/vectordb/* | ||
local.env |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,11 @@ | ||
OPENAI_API_KEY=api-key | ||
OPENAI_API_KEY=api-key | ||
AI_SOURCE_WEBSITE=https://www.alkemio.org | ||
AI_LOCAL_PATH=~/alkemio/data | ||
RABBITMQ_HOST=localhost | ||
RABBITMQ_USER=admin | ||
RABBITMQ_PASSWORD=super-secure-pass | ||
AI_MODEL_TEMPERATURE=0.3 | ||
AI_MODEL_NAME=gpt-35-turbo | ||
AI_SOURCE_WEBSITE=https://www.alkemio.org | ||
AI_LOCAL_PATH=~/alkemio/data | ||
AI_WEBSITE_REPO=https://github.com/alkem-io/website.git |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,39 +1,118 @@ | ||
from langchain.prompts.prompt import PromptTemplate | ||
from langchain.embeddings import OpenAIEmbeddings | ||
from langchain.chains import ConversationalRetrievalChain | ||
from langchain.vectorstores import FAISS | ||
from langchain.llms import AzureOpenAI | ||
from langchain.prompts import PromptTemplate | ||
from langchain.chat_models import AzureChatOpenAI | ||
from langchain.chains import ConversationalRetrievalChain, LLMChain | ||
from langchain.chains.question_answering import load_qa_chain | ||
from langchain.chains.conversational_retrieval.prompts import CONDENSE_QUESTION_PROMPT, QA_PROMPT | ||
|
||
import os | ||
|
||
# define internal configuration parameters | ||
# token limit for retrieval chain | ||
max_token_limit = 2000 | ||
# verbose output for LLMs | ||
verbose_models = True | ||
# doews chain return the source documents? | ||
return_source_document = True | ||
|
||
# Set Context for response | ||
TEMPLATE = """ | ||
- Act as a product and innovation expert. | ||
- Your task is to answer user questions. | ||
- Return your response in markdown, and highlight important elements. | ||
- If the answer cannot be found within the context, write 'I could not find an answer to your question'. | ||
- Provide concise replies that are polite and professional. | ||
- Use the following context to answer the query. | ||
|
||
# Define a dictionary containing country codes as keys and related languages as values | ||
language_mapping = { | ||
'EN': 'English', | ||
'US': 'English', | ||
'UK': 'English', | ||
'FR': 'French', | ||
'DE': 'German', | ||
'ES': 'Spanish', | ||
'NL': 'Dutch', | ||
'BG': 'Bulgarian', | ||
'UA': "Ukranian" | ||
} | ||
|
||
# function to retrieve language from country | ||
def get_language_by_code(language_code): | ||
"""Returns the language associated with the given code. If no match is found, it returns 'English'.""" | ||
return language_mapping.get(language_code, 'English') | ||
|
||
|
||
chat_template = """ | ||
You are a conversational agent. Use the following step-by-step instructions to respond to user inputs. | ||
1 - The text provided in the context delimited by triple pluses may contain questions. Remove those questions from the context. | ||
2 - Provide a single paragragh answer that is polite and professional taking into account the context delimited by triple pluses. If the answer cannot be found within the context, write 'I could not find an answer to your question'. | ||
+++ | ||
Context: | ||
{context} | ||
+++ | ||
Question: {question} | ||
""" | ||
|
||
Question: | ||
{question} | ||
custom_question_template = """" | ||
Combine the chat history and follow up question into a standalone question. | ||
+++ | ||
Chat History: {chat_history} | ||
+++ | ||
Follow up question: {question} | ||
+++ | ||
Standalone question: | ||
""" | ||
|
||
QA_PROMPT = PromptTemplate(template=TEMPLATE, input_variables=["question", "context"]) | ||
translate_template = """" | ||
Act as a professional translator. Use the following step-by-step instructions: | ||
1: assess in what language input below delimited by triple pluses is written. | ||
2. carry out one of tasks A or B below: | ||
A: if the input language is different from {language} then translate the input below delimited by triple pluses to natural {language} language, maintaining tone of voice and length | ||
B: if the input language is the same as {language} there is no need for translation, simply return the original input below delimited by triple pluses as the answer. | ||
3. Only return the answer from step 2, do not show any code or additional information. | ||
+++ | ||
input: | ||
{answer} | ||
+++ | ||
Translated input: | ||
""" | ||
|
||
custom_question_prompt = PromptTemplate( | ||
template=custom_question_template, input_variables=["chat_history", "question"] | ||
) | ||
|
||
translation_prompt = PromptTemplate( | ||
template=translate_template, input_variables=["language", "answer"] | ||
) | ||
|
||
# prompt to be used by retrieval chain, note this is the default prompt name, so nowhere assigned | ||
QA_PROMPT = PromptTemplate( | ||
template=chat_template, input_variables=["question", "context"] | ||
) | ||
|
||
def translate_answer(answer, language): | ||
translate_llm = AzureOpenAI(deployment_name=os.environ["AI_DEPLOYMENT_NAME"], model_name=os.environ["AI_MODEL_NAME"], | ||
temperature=0, verbose=verbose_models) | ||
prompt = translation_prompt.format(answer=answer, language=language) | ||
return translate_llm(prompt) | ||
|
||
|
||
def setup_chain(db_path): | ||
generic_llm = AzureOpenAI(deployment_name=os.environ["AI_DEPLOYMENT_NAME"], model_name=os.environ["AI_MODEL_NAME"], | ||
temperature=0, verbose=verbose_models) | ||
|
||
def setup_chain(): | ||
llm = AzureOpenAI(deployment_name=os.environ["AI_DEPLOYMENT_NAME"], model_name=os.environ["AI_MODEL_NAME"], temperature=os.environ["AI_MODEL_TEMPERATURE"]) | ||
embeddings = OpenAIEmbeddings(deployment=os.environ["AI_EMBEDDINGS_DEPLOYMENT_NAME"], chunk_size=1) | ||
vectorstore = FAISS.load_local("local_index", embeddings) | ||
|
||
chain = ConversationalRetrievalChain.from_llm( | ||
llm, vectorstore.as_retriever(), return_source_documents=True | ||
) | ||
print("\n\nchain:\n", chain) | ||
vectorstore = FAISS.load_local(db_path, embeddings) | ||
retriever = vectorstore.as_retriever() | ||
|
||
chat_llm = AzureChatOpenAI(deployment_name=os.environ["AI_DEPLOYMENT_NAME"], | ||
model_name=os.environ["AI_MODEL_NAME"], temperature=os.environ["AI_MODEL_TEMPERATURE"], | ||
max_tokens=max_token_limit) | ||
|
||
return chain | ||
conversation_chain = ConversationalRetrievalChain.from_llm( | ||
llm=chat_llm, | ||
retriever=retriever, | ||
condense_question_prompt=custom_question_prompt, | ||
chain_type="stuff", | ||
verbose=verbose_models, | ||
condense_question_llm=generic_llm, | ||
return_source_documents=True, | ||
combine_docs_chain_kwargs={"prompt": QA_PROMPT} | ||
) | ||
return conversation_chain |
Oops, something went wrong.