Skip to content

Commit

Permalink
modified the jupyter notebook to download the files directly from web
Browse files Browse the repository at this point in the history
  • Loading branch information
dheerajreddy2020 committed Jan 18, 2025
1 parent 2503d3f commit 6a5accd
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 52 deletions.
126 changes: 74 additions & 52 deletions cookbook/rag_with_faiss.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 2,
"id": "c96cd71a-510d-46a2-a06b-8839818e2196",
"metadata": {},
"outputs": [
Expand All @@ -22,78 +22,77 @@
"text": [
"Requirement already satisfied: langchain in /home/dheerajreddy/lctest/lib/python3.10/site-packages (0.3.14)\n",
"Requirement already satisfied: langchain-community in /home/dheerajreddy/lctest/lib/python3.10/site-packages (0.3.14)\n",
"Requirement already satisfied: openai in /home/dheerajreddy/lctest/lib/python3.10/site-packages (1.59.8)\n",
"Requirement already satisfied: langchain_openai in /home/dheerajreddy/lctest/lib/python3.10/site-packages (0.3.0)\n",
"Requirement already satisfied: faiss-cpu in /home/dheerajreddy/lctest/lib/python3.10/site-packages (1.9.0.post1)\n",
"Requirement already satisfied: pypdf in /home/dheerajreddy/lctest/lib/python3.10/site-packages (5.1.0)\n",
"Requirement already satisfied: SQLAlchemy<3,>=1.4 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from langchain) (2.0.37)\n",
"Requirement already satisfied: langsmith<0.3,>=0.1.17 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from langchain) (0.2.11)\n",
"Requirement already satisfied: requests<3,>=2 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from langchain) (2.32.3)\n",
"Requirement already satisfied: numpy<2,>=1.22.4 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from langchain) (1.26.4)\n",
"Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from langchain) (3.11.11)\n",
"Requirement already satisfied: pydantic<3.0.0,>=2.7.4 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from langchain) (2.10.5)\n",
"Requirement already satisfied: langchain-core<0.4.0,>=0.3.29 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from langchain) (0.3.30)\n",
"Requirement already satisfied: PyYAML>=5.3 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from langchain) (6.0.2)\n",
"Requirement already satisfied: langchain-text-splitters<0.4.0,>=0.3.3 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from langchain) (0.3.5)\n",
"Requirement already satisfied: tenacity!=8.4.0,<10,>=8.1.0 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from langchain) (9.0.0)\n",
"Requirement already satisfied: requests<3,>=2 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from langchain) (2.32.3)\n",
"Requirement already satisfied: async-timeout<5.0.0,>=4.0.0 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from langchain) (4.0.3)\n",
"Requirement already satisfied: pydantic<3.0.0,>=2.7.4 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from langchain) (2.10.5)\n",
"Requirement already satisfied: SQLAlchemy<3,>=1.4 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from langchain) (2.0.37)\n",
"Requirement already satisfied: tenacity!=8.4.0,<10,>=8.1.0 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from langchain) (9.0.0)\n",
"Requirement already satisfied: langchain-text-splitters<0.4.0,>=0.3.3 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from langchain) (0.3.5)\n",
"Requirement already satisfied: langchain-core<0.4.0,>=0.3.29 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from langchain) (0.3.30)\n",
"Requirement already satisfied: httpx-sse<0.5.0,>=0.4.0 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from langchain-community) (0.4.0)\n",
"Requirement already satisfied: dataclasses-json<0.7,>=0.5.7 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from langchain-community) (0.6.7)\n",
"Requirement already satisfied: pydantic-settings<3.0.0,>=2.4.0 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from langchain-community) (2.7.1)\n",
"Requirement already satisfied: httpx-sse<0.5.0,>=0.4.0 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from langchain-community) (0.4.0)\n",
"Requirement already satisfied: tqdm>4 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from openai) (4.67.1)\n",
"Requirement already satisfied: typing-extensions<5,>=4.11 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from openai) (4.12.2)\n",
"Requirement already satisfied: sniffio in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from openai) (1.3.1)\n",
"Requirement already satisfied: jiter<1,>=0.4.0 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from openai) (0.8.2)\n",
"Requirement already satisfied: httpx<1,>=0.23.0 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from openai) (0.28.1)\n",
"Requirement already satisfied: anyio<5,>=3.5.0 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from openai) (4.8.0)\n",
"Requirement already satisfied: distro<2,>=1.7.0 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from openai) (1.9.0)\n",
"Requirement already satisfied: tiktoken<1,>=0.7 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from langchain_openai) (0.8.0)\n",
"Requirement already satisfied: openai<2.0.0,>=1.58.1 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from langchain_openai) (1.59.8)\n",
"Requirement already satisfied: packaging in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from faiss-cpu) (24.2)\n",
"Requirement already satisfied: typing_extensions>=4.0 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from pypdf) (4.12.2)\n",
"Requirement already satisfied: yarl<2.0,>=1.17.0 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.18.3)\n",
"Requirement already satisfied: attrs>=17.3.0 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (24.3.0)\n",
"Requirement already satisfied: frozenlist>=1.1.1 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.5.0)\n",
"Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (2.4.4)\n",
"Requirement already satisfied: aiosignal>=1.1.2 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.3.2)\n",
"Requirement already satisfied: propcache>=0.2.0 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (0.2.1)\n",
"Requirement already satisfied: multidict<7.0,>=4.5 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (6.1.0)\n",
"Requirement already satisfied: yarl<2.0,>=1.17.0 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.18.3)\n",
"Requirement already satisfied: aiosignal>=1.1.2 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.3.2)\n",
"Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (2.4.4)\n",
"Requirement already satisfied: typing-inspect<1,>=0.4.0 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from dataclasses-json<0.7,>=0.5.7->langchain-community) (0.9.0)\n",
"Requirement already satisfied: frozenlist>=1.1.1 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.5.0)\n",
"Requirement already satisfied: idna>=2.8 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from anyio<5,>=3.5.0->openai) (3.10)\n",
"Requirement already satisfied: exceptiongroup>=1.0.2 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from anyio<5,>=3.5.0->openai) (1.2.2)\n",
"Requirement already satisfied: marshmallow<4.0.0,>=3.18.0 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from dataclasses-json<0.7,>=0.5.7->langchain-community) (3.25.1)\n",
"Requirement already satisfied: typing-inspect<1,>=0.4.0 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from dataclasses-json<0.7,>=0.5.7->langchain-community) (0.9.0)\n",
"Requirement already satisfied: httpcore==1.* in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from httpx<1,>=0.23.0->openai) (1.0.7)\n",
"Requirement already satisfied: certifi in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from httpx<1,>=0.23.0->openai) (2024.12.14)\n",
"Requirement already satisfied: h11<0.15,>=0.13 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from httpcore==1.*->httpx<1,>=0.23.0->openai) (0.14.0)\n",
"Requirement already satisfied: jsonpatch<2.0,>=1.33 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from langchain-core<0.4.0,>=0.3.29->langchain) (1.33)\n",
"Requirement already satisfied: requests-toolbelt<2.0.0,>=1.0.0 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from langsmith<0.3,>=0.1.17->langchain) (1.0.0)\n",
"Requirement already satisfied: orjson<4.0.0,>=3.9.14 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from langsmith<0.3,>=0.1.17->langchain) (3.10.14)\n",
"Requirement already satisfied: httpx<1,>=0.23.0 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from langsmith<0.3,>=0.1.17->langchain) (0.28.1)\n",
"Requirement already satisfied: anyio<5,>=3.5.0 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from openai<2.0.0,>=1.58.1->langchain_openai) (4.8.0)\n",
"Requirement already satisfied: sniffio in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from openai<2.0.0,>=1.58.1->langchain_openai) (1.3.1)\n",
"Requirement already satisfied: tqdm>4 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from openai<2.0.0,>=1.58.1->langchain_openai) (4.67.1)\n",
"Requirement already satisfied: jiter<1,>=0.4.0 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from openai<2.0.0,>=1.58.1->langchain_openai) (0.8.2)\n",
"Requirement already satisfied: distro<2,>=1.7.0 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from openai<2.0.0,>=1.58.1->langchain_openai) (1.9.0)\n",
"Requirement already satisfied: pydantic-core==2.27.2 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from pydantic<3.0.0,>=2.7.4->langchain) (2.27.2)\n",
"Requirement already satisfied: requests-toolbelt<2.0.0,>=1.0.0 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from langsmith<0.3,>=0.1.17->langchain) (1.0.0)\n",
"Requirement already satisfied: annotated-types>=0.6.0 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from pydantic<3.0.0,>=2.7.4->langchain) (0.7.0)\n",
"Requirement already satisfied: pydantic-core==2.27.2 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from pydantic<3.0.0,>=2.7.4->langchain) (2.27.2)\n",
"Requirement already satisfied: python-dotenv>=0.21.0 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from pydantic-settings<3.0.0,>=2.4.0->langchain-community) (1.0.1)\n",
"Requirement already satisfied: urllib3<3,>=1.21.1 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from requests<3,>=2->langchain) (2.3.0)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from requests<3,>=2->langchain) (2024.12.14)\n",
"Requirement already satisfied: idna<4,>=2.5 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from requests<3,>=2->langchain) (3.10)\n",
"Requirement already satisfied: charset-normalizer<4,>=2 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from requests<3,>=2->langchain) (3.4.1)\n",
"Requirement already satisfied: urllib3<3,>=1.21.1 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from requests<3,>=2->langchain) (2.3.0)\n",
"Requirement already satisfied: greenlet!=0.4.17 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from SQLAlchemy<3,>=1.4->langchain) (3.1.1)\n",
"Requirement already satisfied: regex>=2022.1.18 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from tiktoken<1,>=0.7->langchain_openai) (2024.11.6)\n",
"Requirement already satisfied: exceptiongroup>=1.0.2 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from anyio<5,>=3.5.0->openai<2.0.0,>=1.58.1->langchain_openai) (1.2.2)\n",
"Requirement already satisfied: httpcore==1.* in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from httpx<1,>=0.23.0->langsmith<0.3,>=0.1.17->langchain) (1.0.7)\n",
"Requirement already satisfied: h11<0.15,>=0.13 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from httpcore==1.*->httpx<1,>=0.23.0->langsmith<0.3,>=0.1.17->langchain) (0.14.0)\n",
"Requirement already satisfied: jsonpointer>=1.9 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from jsonpatch<2.0,>=1.33->langchain-core<0.4.0,>=0.3.29->langchain) (3.0.0)\n",
"Requirement already satisfied: mypy-extensions>=0.3.0 in /home/dheerajreddy/lctest/lib/python3.10/site-packages (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7,>=0.5.7->langchain-community) (1.0.0)\n"
]
}
],
"source": [
"! pip install -U langchain langchain-community langchain_openai faiss-cpu pypdf # (newest versions required for multi-modal)"
"! pip install -U langchain langchain-community openai langchain_openai faiss-cpu pypdf # (newest versions required for multi-modal)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 3,
"id": "d033c505-c805-49cd-905d-97edf606113d",
"metadata": {},
"outputs": [],
"source": [
"#Import all necessary libraries\n",
"# from langchain_community.chat_models import ChatOpenAI\n",
"import requests\n",
"from langchain_openai import ChatOpenAI\n",
"from langchain_community.vectorstores import FAISS\n",
"# from langchain_community.embeddings import OpenAIEmbeddings\n",
"from langchain_openai import OpenAIEmbeddings\n",
"from langchain_core.prompts import PromptTemplate\n",
"from langchain_community.document_loaders import PyPDFLoader,TextLoader\n",
Expand All @@ -105,7 +104,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 4,
"id": "4196d408-efe1-442c-8d8a-856a002ab8ac",
"metadata": {
"scrolled": true
Expand All @@ -129,7 +128,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 5,
"id": "c4eb93a9-6e6a-4ab9-822b-1e5106e30258",
"metadata": {},
"outputs": [],
Expand All @@ -154,7 +153,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 6,
"id": "9d8e77bc-0e5e-4d8f-8475-d9248b048817",
"metadata": {},
"outputs": [],
Expand Down Expand Up @@ -212,41 +211,64 @@
},
{
"cell_type": "code",
"execution_count": 6,
"id": "c25b33fe-8039-4bbd-a3af-c829179325b5",
"execution_count": 7,
"id": "57fb42dd-684a-4c80-917d-37feaa41a6ea",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"PDF downloaded successfully as Nike_q4_report.pdf\n",
"PDF downloaded successfully as puma_q4_report.pdf\n"
]
}
],
"source": [
"#add all the file paths, you want to create a retrieval chatbot for\n",
"file_paths = ['../docs/docs/example_data/nike-q3-2024-earnings.pdf',\n",
" '../docs/docs/example_data/puma-q3-2024-earnings.pdf']"
"# Download sample files for RAG\n",
"def download_pdf(url, filename):\n",
" response = requests.get(url)\n",
" if response.status_code == 200:\n",
" with open(filename, 'wb') as file:\n",
" file.write(response.content)\n",
" print(f\"PDF downloaded successfully as {filename}\")\n",
" else:\n",
" print(f\"Failed to download PDF. Status code: {response.status_code}\")\n",
"\n",
"download_urls = ['https://s1.q4cdn.com/806093406/files/doc_financials/2024/q3/FY24-Q3-Combined-NIKE-Press-Release-Schedules-FINAL.pdf',\n",
" 'https://about.puma.com/sites/default/files/financial-report/2024/puma-q3-2024-release-english-final.pdf']\n",
"\n",
"filepaths = ['Nike_q4_report.pdf','puma_q4_report.pdf']\n",
"\n",
"for i in range(len(filepaths)):\n",
" download_pdf(download_urls[i], filepaths[i])\n"
]
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 9,
"id": "3b0ced3b-890d-4fd4-9b3e-849e73451210",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['../docs/docs/example_data/nike-q3-2024-earnings.pdf', '../docs/docs/example_data/puma-q3-2024-earnings.pdf']\n",
"Processing file: ../docs/docs/example_data/nike-q3-2024-earnings.pdf\n",
"Processing file: ../docs/docs/example_data/puma-q3-2024-earnings.pdf\n",
"['Nike_q4_report.pdf', 'puma_q4_report.pdf']\n",
"Processing file: Nike_q4_report.pdf\n",
"Processing file: puma_q4_report.pdf\n",
"49\n"
]
}
],
"source": [
"#Create Vector embeddings\n",
"process_documents(file_paths)"
"process_documents(filepaths)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 10,
"id": "e261b2c3-d62e-428f-a615-2643e2c3f79d",
"metadata": {},
"outputs": [
Expand All @@ -266,29 +288,29 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 15,
"id": "723f652b-ca89-4f9e-9b39-60c451336d2e",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"It is difficult to determine which company's financials are better based on the given context as both Nike and Puma present different aspects of their financial performance. Nike reported revenues of $12.4 billion in the third quarter of fiscal year 2024, while Puma emphasized its focus on managing short-term challenges without compromising long-term momentum and positive feedback on upcoming product releases. Additional analysis and comparison of financial statements would be needed to determine which company's financial condition is better.\n"
"Based on the provided context, PUMA appears to be performing better than Nike. PUMA has achieved various successes in athletics and sports partnerships, as well as positive feedback from consumers and retail partners. The brand's momentum and growth are highlighted, indicating a strong performance compared to Nike.\n"
]
}
],
"source": [
"#query the documents\n",
"question = 'Among Nike and Puma whose financials are better'\n",
"question = 'Which company is performing better in between Nike and Puma'\n",
"response = qa_chain.invoke(question)\n",
"print(response)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "57fb42dd-684a-4c80-917d-37feaa41a6ea",
"id": "5f5f6a9f-02fb-42e1-8268-bfaef2956a3d",
"metadata": {},
"outputs": [],
"source": []
Expand Down
Binary file removed docs/docs/example_data/nike-q3-2024-earnings.pdf
Binary file not shown.
Binary file removed docs/docs/example_data/puma-q3-2024-earnings.pdf
Binary file not shown.

0 comments on commit 6a5accd

Please sign in to comment.