-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathstreamlit_app.py
186 lines (143 loc) · 5.17 KB
/
streamlit_app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
import pandas as pd
import streamlit as st
from dotenv import load_dotenv
from InstructorEmbedding import INSTRUCTOR
from langchain.chains import ConversationalRetrievalChain
from langchain.chat_models import ChatOpenAI
from langchain.embeddings import (HuggingFaceInstructEmbeddings, OpenAIEmbeddings)
from langchain.llms import HuggingFaceHub
from langchain.memory import ConversationBufferMemory
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
from pypdf import PdfReader
from htmlTemplates import bot_template, css, user_template
# Function to extract the text of pdf files
def get_text_pdf(uploaded_files):
"""
Generate a text representation of a PDF document.
Args:
uploaded_files (list): A list of paths to the uploaded PDF files.
Returns:
str: The combined text from all the pages of the PDF documents.
"""
doc_text = ''
for pdf in uploaded_files:
pdf_reader = PdfReader(pdf)
for page in pdf_reader.pages:
doc_text += page.extract_text()
return doc_text
def get_chunks_text(doc_text):
"""
Generate the chunks of text from the given document text.
Args:
doc_text (str): The document text from which to generate the chunks.
Returns:
List[str]: A list of chunks of text generated from the document text.
"""
text_splitter = CharacterTextSplitter(
separator='\n',
chunk_size=2000,
chunk_overlap=200,
length_function=len,
)
chunks = text_splitter.split_text(doc_text)
return chunks
def get_vectorstore(text_chunks):
"""
Generate a vector store from a list of text chunks.
Args:
text_chunks (List[str]): A list of text chunks to generate the vector store from.
Returns:
vectorstore (FAISS): The generated vector store.
"""
embeddings = OpenAIEmbeddings()
# embeddings = HuggingFaceInstructEmbeddings(
# model_name='hkunlp/instructor-xl'
# )
vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
return vectorstore
def get_conversation_chain(vectorstore):
"""
Generate a conversation chain using a vector store.
Args:
vectorstore (VectorStore): The vector store used for retrieval.
Returns:
ConversationalRetrievalChain: The conversation chain object.
"""
llm = ChatOpenAI()
# llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":0.5, "max_length":512})
memory = ConversationBufferMemory(
memory_key='chat_history', return_messages=True
)
conversation_chain = ConversationalRetrievalChain.from_llm(
llm=llm, retriever=vectorstore.as_retriever(), memory=memory
)
return conversation_chain
def handle_userinput(user_question):
"""
Handles user input by sending it to a conversation API and displaying the response in a chat format.
Parameters:
- user_question (str): The user's input question.
Returns:
- None
"""
response = st.session_state.conversation({'question': user_question})
st.session_state.chat_history = response['chat_history']
for i, message in enumerate(st.session_state.chat_history):
if i % 2 == 0:
st.write(
user_template.replace('{{MSG}}', message.content),
unsafe_allow_html=True,
)
else:
st.write(
bot_template.replace('{{MSG}}', message.content),
unsafe_allow_html=True,
)
def main():
"""
The main function that initializes the OpenAI Chatbot of CogniCore.
This function sets up the page configuration, writes the CSS, and handles user input.
Parameters:
None
Returns:
None
"""
load_dotenv()
st.set_page_config(
page_title='OpenAI Chatbot',
page_icon='🤖',
)
st.write(css, unsafe_allow_html=True)
if 'conversation' not in st.session_state:
st.session_state.conversation = None
if 'chat_history' not in st.session_state:
st.session_state.chat_history = None
st.header('OpenAI Chatbot:🤖')
col1 = st.columns(spec=1, gap='small')
st.header('ChatBot:')
st.text('Resume about what the people going on about.')
user_question = st.text_input('Ask a question: ')
if user_question:
handle_userinput(user_question)
with st.sidebar:
st.subheader('Ask a question: ')
uploaded_files = st.file_uploader(
'Choose a PDF file', accept_multiple_files=True
)
if st.button('Submit'):
with st.spinner('Please wait...'):
# Get pdf text
raw_text = get_text_pdf(uploaded_files)
st.write(raw_text)
# Get the text chunks
text_chunks = get_chunks_text(raw_text)
st.write(text_chunks)
# Get the vector store
vectorstore = get_vectorstore(text_chunks)
# Create a conversation chain
st.session_state.conversation = get_conversation_chain(
vectorstore
)
if __name__ == '__main__':
main()