Skip to content

Commit

Permalink
updated to langchain latest
Browse files Browse the repository at this point in the history
  • Loading branch information
emarco177 committed Jul 7, 2024
1 parent 4589594 commit b9fe0a7
Show file tree
Hide file tree
Showing 6 changed files with 1,431 additions and 992 deletions.
4 changes: 4 additions & 0 deletions Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@ streamlit = "*"
streamlit-chat = "*"
tqdm = "*"
langchain-pinecone = "*"
langchain-openai = "*"
langchain-community = "*"
isort = "*"
langchainhub = "*"

[dev-packages]

Expand Down
2,320 changes: 1,355 additions & 965 deletions Pipfile.lock

Large diffs are not rendered by default.

66 changes: 55 additions & 11 deletions backend/core.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,71 @@
from dotenv import load_dotenv

load_dotenv()
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

from langchain_openai import ChatOpenAI, OpenAIEmbeddings

from typing import Any, Dict, List
from langchain.chains import ConversationalRetrievalChain
from langchain_pinecone import PineconeVectorStore

from langchain import hub
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains.history_aware_retriever import create_history_aware_retriever
from langchain.chains.retrieval import create_retrieval_chain
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_pinecone import PineconeVectorStore

INDEX_NAME = "langchain-doc-index"
from consts import INDEX_NAME


def run_llm(query: str, chat_history: List[Dict[str, Any]] = []):
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
docsearch = PineconeVectorStore(embedding=embeddings, index_name=INDEX_NAME)
docsearch = PineconeVectorStore(index_name=INDEX_NAME, embedding=embeddings)
chat = ChatOpenAI(verbose=True, temperature=0)

rephrase_prompt = hub.pull("langchain-ai/chat-langchain-rephrase")

retrieval_qa_chat_prompt = hub.pull("langchain-ai/retrieval-qa-chat")
stuff_documents_chain = create_stuff_documents_chain(chat, retrieval_qa_chat_prompt)

history_aware_retriever = create_history_aware_retriever(
llm=chat, retriever=docsearch.as_retriever(), prompt=rephrase_prompt
)
qa = create_retrieval_chain(
retriever=history_aware_retriever, combine_docs_chain=stuff_documents_chain
)

chat = ChatOpenAI(
verbose=True,
temperature=0,
result = qa.invoke(input={"input": query, "chat_history": chat_history})
return result


def format_docs(docs):
return "\n\n".join(doc.page_content for doc in docs)


def run_llm2(query: str, chat_history: List[Dict[str, Any]] = []):
embeddings = OpenAIEmbeddings()
docsearch = PineconeVectorStore(index_name=INDEX_NAME, embedding=embeddings)
chat = ChatOpenAI(model_name="gpt-4o", verbose=True, temperature=0)

rephrase_prompt = hub.pull("langchain-ai/chat-langchain-rephrase")

retrieval_qa_chat_prompt = hub.pull("langchain-ai/retrieval-qa-chat")

rag_chain = (
{
"context": docsearch.as_retriever() | format_docs,
"input": RunnablePassthrough(),
}
| retrieval_qa_chat_prompt
| chat
| StrOutputParser()
)

qa = ConversationalRetrievalChain.from_llm(
llm=chat, retriever=docsearch.as_retriever(), return_source_documents=True
retrieve_docs_chain = (lambda x: x["input"]) | docsearch.as_retriever()

chain = RunnablePassthrough.assign(context=retrieve_docs_chain).assign(
answer=rag_chain
)
return qa.invoke({"question": query, "chat_history": chat_history})

result = chain.invoke({"input": query, "chat_history": chat_history})
return result
1 change: 1 addition & 0 deletions consts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
INDEX_NAME = "langchain-doc-index"
13 changes: 4 additions & 9 deletions ingestion.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,23 +2,18 @@

load_dotenv()

import os

from langchain_community.document_loaders import ReadTheDocsLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_pinecone import PineconeVectorStore
from langchain_community.document_loaders import ReadTheDocsLoader
from langchain_openai import OpenAIEmbeddings
from langchain_pinecone import PineconeVectorStore


INDEX_NAME = "langchain-doc-index"
from consts import INDEX_NAME

embeddings = OpenAIEmbeddings(model="text-embedding-3-small")


def ingest_docs():
loader = ReadTheDocsLoader(
"langchain-docs/api.python.langchain.com/en/latest/chains"
)
loader = ReadTheDocsLoader("langchain-docs/api.python.langchain.com/en/latest")

raw_documents = loader.load()
print(f"loaded {len(raw_documents)} documents")
Expand Down
19 changes: 12 additions & 7 deletions main.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
from dotenv import load_dotenv

load_dotenv()
from typing import Set

from backend.core import run_llm
import streamlit as st
from streamlit_chat import message

from backend.core import run_llm


def create_sources_string(source_urls: Set[str]) -> str:
if not source_urls:
Expand Down Expand Up @@ -37,16 +41,17 @@ def create_sources_string(source_urls: Set[str]) -> str:
query=prompt, chat_history=st.session_state["chat_history"]
)

sources = set(
[doc.metadata["source"] for doc in generated_response["source_documents"]]
)
sources = set(doc.metadata["source"] for doc in generated_response["context"])

formatted_response = (
f"{generated_response['answer']} \n\n {create_sources_string(sources)}"
)

st.session_state.chat_history.append((prompt, generated_response["answer"]))
st.session_state.user_prompt_history.append(prompt)
st.session_state.chat_answers_history.append(formatted_response)
st.session_state["user_prompt_history"].append(prompt)
st.session_state["chat_answers_history"].append(formatted_response)
st.session_state["chat_history"].append(("human", prompt))
st.session_state["chat_history"].append(("ai", generated_response["answer"]))


if st.session_state["chat_answers_history"]:
for generated_response, user_query in zip(
Expand Down

0 comments on commit b9fe0a7

Please sign in to comment.