From 909a9d8f942beef7cd831a961dd2e38d79958eb6 Mon Sep 17 00:00:00 2001 From: Abhi Agarwal Date: Fri, 2 Aug 2024 19:09:27 -0400 Subject: [PATCH 1/6] community: add sqlite-vec vectorstore --- docs/docs/integrations/providers/sqlite.mdx | 5 +- .../integrations/vectorstores/sqlitevec.ipynb | 240 +++++++++++++++++ .../migrations/langchain_to_community.json | 4 + libs/community/extended_testing_deps.txt | 1 + .../vectorstores/__init__.py | 5 + .../vectorstores/sqlitevec.py | 242 ++++++++++++++++++ .../vectorstores/test_sqlitevec.py | 58 +++++ .../unit_tests/vectorstores/test_imports.py | 1 + .../langchain/vectorstores/__init__.py | 3 + .../vectorstores/test_public_api.py | 1 + 10 files changed, 558 insertions(+), 2 deletions(-) create mode 100644 docs/docs/integrations/vectorstores/sqlitevec.ipynb create mode 100644 libs/community/langchain_community/vectorstores/sqlitevec.py create mode 100644 libs/community/tests/integration_tests/vectorstores/test_sqlitevec.py diff --git a/docs/docs/integrations/providers/sqlite.mdx b/docs/docs/integrations/providers/sqlite.mdx index e45a47f11372c..3bba662f0d888 100644 --- a/docs/docs/integrations/providers/sqlite.mdx +++ b/docs/docs/integrations/providers/sqlite.mdx @@ -16,10 +16,11 @@ pip install SQLAlchemy ## Vector Store -See a [usage example](/docs/integrations/vectorstores/sqlitevss). +See a [usage example](/docs/integrations/vectorstores/sqlitevec). ```python -from langchain_community.vectorstores import SQLiteVSS +from langchain_community.vectorstores import SQLiteVec +from langchain_community.vectorstores import SQLiteVSS # legacy ``` ## Memory diff --git a/docs/docs/integrations/vectorstores/sqlitevec.ipynb b/docs/docs/integrations/vectorstores/sqlitevec.ipynb new file mode 100644 index 0000000000000..c47f623b26c7f --- /dev/null +++ b/docs/docs/integrations/vectorstores/sqlitevec.ipynb @@ -0,0 +1,240 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "source": [ + "# SQLite-Vec\n", + "\n", + ">[SQLite-Vec](https://alexgarcia.xyz/sqlite-vec/) is an `SQLite` extension designed for vector search, emphasizing local-first operations and easy integration into applications without external servers. It is the successor to [SQLite-VSS](https://alexgarcia.xyz/sqlite-vss/) by the same author. It is written in zero-dependency C and designed to be easy to build and use.\n", + "\n", + "You'll need to install `langchain-community` with `pip install -qU langchain-community` to use this integration\n", + "\n", + "This notebook shows how to use the `SQLiteVec` vector database." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [], + "source": [ + "# You need to install sqlite-vss as a dependency.\n", + "%pip install --upgrade --quiet sqlite-vec" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "source": [ + "## Quickstart" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2023-09-06T14:55:55.370351Z", + "start_time": "2023-09-06T14:55:53.547755Z" + }, + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "'Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.'" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from langchain_community.document_loaders import TextLoader\n", + "from langchain_community.embeddings.sentence_transformer import (\n", + " SentenceTransformerEmbeddings,\n", + ")\n", + "from langchain_community.vectorstores import SQLiteVec\n", + "from langchain_text_splitters import CharacterTextSplitter\n", + "\n", + "# load the document and split it into chunks\n", + "loader = TextLoader(\"../../how_to/state_of_the_union.txt\")\n", + "documents = loader.load()\n", + "\n", + "# split it into chunks\n", + "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", + "docs = text_splitter.split_documents(documents)\n", + "texts = [doc.page_content for doc in docs]\n", + "\n", + "\n", + "# create the open-source embedding function\n", + "embedding_function = SentenceTransformerEmbeddings(model_name=\"all-MiniLM-L6-v2\")\n", + "\n", + "\n", + "# load it in sqlite-vss in a table named state_union.\n", + "# the db_file parameter is the name of the file you want\n", + "# as your sqlite database.\n", + "db = SQLiteVec.from_texts(\n", + " texts=texts,\n", + " embedding=embedding_function,\n", + " table=\"state_union\",\n", + " db_file=\"/tmp/vec.db\",\n", + ")\n", + "\n", + "# query it\n", + "query = \"What did the president say about Ketanji Brown Jackson\"\n", + "data = db.similarity_search(query)\n", + "\n", + "# print results\n", + "data[0].page_content" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "source": [ + "## Using existing SQLite connection" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "ExecuteTime": { + "end_time": "2023-09-06T14:59:22.086252Z", + "start_time": "2023-09-06T14:59:21.693237Z" + }, + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "'Ketanji Brown Jackson is awesome'" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from langchain_community.document_loaders import TextLoader\n", + "from langchain_community.embeddings.sentence_transformer import (\n", + " SentenceTransformerEmbeddings,\n", + ")\n", + "from langchain_community.vectorstores import SQLiteVec\n", + "from langchain_text_splitters import CharacterTextSplitter\n", + "\n", + "# load the document and split it into chunks\n", + "loader = TextLoader(\"../../how_to/state_of_the_union.txt\")\n", + "documents = loader.load()\n", + "\n", + "# split it into chunks\n", + "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", + "docs = text_splitter.split_documents(documents)\n", + "texts = [doc.page_content for doc in docs]\n", + "\n", + "\n", + "# create the open-source embedding function\n", + "embedding_function = SentenceTransformerEmbeddings(model_name=\"all-MiniLM-L6-v2\")\n", + "connection = SQLiteVec.create_connection(db_file=\"/tmp/vec.db\")\n", + "\n", + "db1 = SQLiteVec(\n", + " table=\"state_union\", embedding=embedding_function, connection=connection\n", + ")\n", + "\n", + "db1.add_texts([\"Ketanji Brown Jackson is awesome\"])\n", + "# query it again\n", + "query = \"What did the president say about Ketanji Brown Jackson\"\n", + "data = db1.similarity_search(query)\n", + "\n", + "# print results\n", + "data[0].page_content" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "ExecuteTime": { + "end_time": "2023-09-06T15:01:15.550318Z", + "start_time": "2023-09-06T15:01:15.546428Z" + }, + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [], + "source": [ + "# Cleaning up\n", + "import os\n", + "\n", + "os.remove(\"/tmp/vec.db\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.4" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/libs/cli/langchain_cli/namespaces/migrate/codemods/migrations/langchain_to_community.json b/libs/cli/langchain_cli/namespaces/migrate/codemods/migrations/langchain_to_community.json index e6ea9e0f40f0a..1a5c4eef5a15b 100644 --- a/libs/cli/langchain_cli/namespaces/migrate/codemods/migrations/langchain_to_community.json +++ b/libs/cli/langchain_cli/namespaces/migrate/codemods/migrations/langchain_to_community.json @@ -7079,6 +7079,10 @@ "langchain.vectorstores.SingleStoreDB", "langchain_community.vectorstores.SingleStoreDB" ], + [ + "langchain.vectorstores.SQLiteVec", + "langchain_community.vectorstores.SQLiteVec" + ], [ "langchain.vectorstores.SQLiteVSS", "langchain_community.vectorstores.SQLiteVSS" diff --git a/libs/community/extended_testing_deps.txt b/libs/community/extended_testing_deps.txt index f94f975d05790..9ed2ec0008051 100644 --- a/libs/community/extended_testing_deps.txt +++ b/libs/community/extended_testing_deps.txt @@ -76,6 +76,7 @@ rspace_client>=2.5.0,<3 scikit-learn>=1.2.2,<2 simsimd>=4.3.1,<5 sqlite-vss>=0.1.2,<0.2 +sqlite-vec>=0.1.0,<0.2 sseclient-py>=1.8.0,<2 streamlit>=1.18.0,<2 sympy>=1.12,<2 diff --git a/libs/community/langchain_community/vectorstores/__init__.py b/libs/community/langchain_community/vectorstores/__init__.py index faa2e35b3103f..c38beea0ed6d2 100644 --- a/libs/community/langchain_community/vectorstores/__init__.py +++ b/libs/community/langchain_community/vectorstores/__init__.py @@ -230,6 +230,9 @@ from langchain_community.vectorstores.sklearn import ( SKLearnVectorStore, ) + from langchain_community.vectorstores.sqlitevec import ( + SQLiteVec, + ) from langchain_community.vectorstores.sqlitevss import ( SQLiteVSS, ) @@ -380,6 +383,7 @@ "Relyt", "Rockset", "SKLearnVectorStore", + "SQLiteVec", "SQLiteVSS", "ScaNN", "SemaDB", @@ -483,6 +487,7 @@ "Relyt": "langchain_community.vectorstores.relyt", "Rockset": "langchain_community.vectorstores.rocksetdb", "SKLearnVectorStore": "langchain_community.vectorstores.sklearn", + "SQLiteVec": "langchain_community.vectorstores.sqlitevec", "SQLiteVSS": "langchain_community.vectorstores.sqlitevss", "ScaNN": "langchain_community.vectorstores.scann", "SemaDB": "langchain_community.vectorstores.semadb", diff --git a/libs/community/langchain_community/vectorstores/sqlitevec.py b/libs/community/langchain_community/vectorstores/sqlitevec.py new file mode 100644 index 0000000000000..13a2d5ee9208c --- /dev/null +++ b/libs/community/langchain_community/vectorstores/sqlitevec.py @@ -0,0 +1,242 @@ +from __future__ import annotations + +import json +import logging +import struct +import warnings +from typing import ( + TYPE_CHECKING, + Any, + Iterable, + List, + Optional, + Tuple, + Type, +) + +from langchain_core.documents import Document +from langchain_core.embeddings import Embeddings +from langchain_core.vectorstores import VectorStore + +if TYPE_CHECKING: + import sqlite3 + +logger = logging.getLogger(__name__) + + +def serialize_f32(vector: List[float]) -> bytes: + """Serializes a list of floats into a compact "raw bytes" format + + Source: https://github.com/asg017/sqlite-vec/blob/21c5a14fc71c83f135f5b00c84115139fd12c492/examples/simple-python/demo.py#L8-L10 + """ + return struct.pack("%sf" % len(vector), *vector) + + +class SQLiteVec(VectorStore): + """SQLite with Vec extension as a vector database. + + To use, you should have the ``sqlite-vec`` python package installed. + Example: + .. code-block:: python + from langchain_community.vectorstores import SQLiteVec + from langchain_community.embeddings.openai import OpenAIEmbeddings + ... + """ + + def __init__( + self, + table: str, + connection: Optional[sqlite3.Connection], + embedding: Embeddings, + db_file: str = "vec.db", + ): + """Initialize with sqlite client with vss extension.""" + try: + import sqlite_vec # noqa # pylint: disable=unused-import + except ImportError: + raise ImportError( + "Could not import sqlite-vec python package. " + "Please install it with `pip install sqlite-vec`." + ) + + if not connection: + connection = self.create_connection(db_file) + + if not isinstance(embedding, Embeddings): + warnings.warn("embeddings input must be Embeddings object.") + + self._connection = connection + self._table = table + self._embedding = embedding + + self.create_table_if_not_exists() + + def create_table_if_not_exists(self) -> None: + self._connection.execute( + f""" + CREATE TABLE IF NOT EXISTS {self._table} + ( + rowid INTEGER PRIMARY KEY AUTOINCREMENT, + text TEXT, + metadata BLOB, + text_embedding BLOB + ) + ; + """ + ) + self._connection.execute( + f""" + CREATE VIRTUAL TABLE IF NOT EXISTS {self._table}_vec USING vec0( + rowid INTEGER PRIMARY KEY, + text_embedding float[{self.get_dimensionality()}] + ) + ; + """ + ) + self._connection.execute( + f""" + CREATE TRIGGER IF NOT EXISTS embed_text + AFTER INSERT ON {self._table} + BEGIN + INSERT INTO {self._table}_vec(rowid, text_embedding) + VALUES (new.rowid, new.text_embedding) + ; + END; + """ + ) + self._connection.commit() + + def add_texts( + self, + texts: Iterable[str], + metadatas: Optional[List[dict]] = None, + **kwargs: Any, + ) -> List[str]: + """Add more texts to the vectorstore index. + Args: + texts: Iterable of strings to add to the vectorstore. + metadatas: Optional list of metadatas associated with the texts. + kwargs: vectorstore specific parameters + """ + max_id = self._connection.execute( + f"SELECT max(rowid) as rowid FROM {self._table}" + ).fetchone()["rowid"] + if max_id is None: # no text added yet + max_id = 0 + + embeds = self._embedding.embed_documents(list(texts)) + if not metadatas: + metadatas = [{} for _ in texts] + data_input = [ + (text, json.dumps(metadata), serialize_f32(embed)) + for text, metadata, embed in zip(texts, metadatas, embeds) + ] + self._connection.executemany( + f"INSERT INTO {self._table}(text, metadata, text_embedding) " + f"VALUES (?,?,?)", + data_input, + ) + self._connection.commit() + # pulling every ids we just inserted + results = self._connection.execute( + f"SELECT rowid FROM {self._table} WHERE rowid > {max_id}" + ) + return [row["rowid"] for row in results] + + def similarity_search_with_score_by_vector( + self, embedding: List[float], k: int = 4, **kwargs: Any + ) -> List[Tuple[Document, float]]: + sql_query = f""" + SELECT + text, + metadata, + distance + FROM {self._table} AS e + INNER JOIN {self._table}_vec AS v on v.rowid = e.rowid + WHERE + v.text_embedding MATCH ? + AND k = ? + ORDER BY distance + """ + cursor = self._connection.cursor() + cursor.execute( + sql_query, + [serialize_f32(embedding), k], + ) + results = cursor.fetchall() + + documents = [] + for row in results: + metadata = json.loads(row["metadata"]) or {} + doc = Document(page_content=row["text"], metadata=metadata) + documents.append((doc, row["distance"])) + + return documents + + def similarity_search( + self, query: str, k: int = 4, **kwargs: Any + ) -> List[Document]: + """Return docs most similar to query.""" + embedding = self._embedding.embed_query(query) + documents = self.similarity_search_with_score_by_vector( + embedding=embedding, k=k + ) + return [doc for doc, _ in documents] + + def similarity_search_with_score( + self, query: str, k: int = 4, **kwargs: Any + ) -> List[Tuple[Document, float]]: + """Return docs most similar to query.""" + embedding = self._embedding.embed_query(query) + documents = self.similarity_search_with_score_by_vector( + embedding=embedding, k=k + ) + return documents + + def similarity_search_by_vector( + self, embedding: List[float], k: int = 4, **kwargs: Any + ) -> List[Document]: + documents = self.similarity_search_with_score_by_vector( + embedding=embedding, k=k + ) + return [doc for doc, _ in documents] + + @classmethod + def from_texts( + cls: Type[SQLiteVec], + texts: List[str], + embedding: Embeddings, + metadatas: Optional[List[dict]] = None, + table: str = "langchain", + db_file: str = "vec.db", + **kwargs: Any, + ) -> SQLiteVec: + """Return VectorStore initialized from texts and embeddings.""" + connection = cls.create_connection(db_file) + vec = cls( + table=table, connection=connection, db_file=db_file, embedding=embedding + ) + vec.add_texts(texts=texts, metadatas=metadatas) + return vec + + @staticmethod + def create_connection(db_file: str) -> sqlite3.Connection: + import sqlite3 + + import sqlite_vec + + connection = sqlite3.connect(db_file) + connection.row_factory = sqlite3.Row + connection.enable_load_extension(True) + sqlite_vec.load(connection) + connection.enable_load_extension(False) + return connection + + def get_dimensionality(self) -> int: + """ + Function that does a dummy embedding to figure out how many dimensions + this embedding function returns. Needed for the virtual table DDL. + """ + dummy_text = "This is a dummy text" + dummy_embedding = self._embedding.embed_query(dummy_text) + return len(dummy_embedding) diff --git a/libs/community/tests/integration_tests/vectorstores/test_sqlitevec.py b/libs/community/tests/integration_tests/vectorstores/test_sqlitevec.py new file mode 100644 index 0000000000000..f7c67ba529902 --- /dev/null +++ b/libs/community/tests/integration_tests/vectorstores/test_sqlitevec.py @@ -0,0 +1,58 @@ +from typing import List, Optional + +import pytest +from langchain_core.documents import Document + +from langchain_community.vectorstores import SQLiteVec +from tests.integration_tests.vectorstores.fake_embeddings import ( + FakeEmbeddings, + fake_texts, +) + + +def _sqlite_vec_from_texts( + metadatas: Optional[List[dict]] = None, drop: bool = True +) -> SQLiteVec: + return SQLiteVec.from_texts( + fake_texts, + FakeEmbeddings(), + metadatas=metadatas, + table="test", + db_file=":memory:", + ) + + +@pytest.mark.requires("sqlite-vec") +def test_sqlitevec() -> None: + """Test end to end construction and search.""" + docsearch = _sqlite_vec_from_texts() + output = docsearch.similarity_search("foo", k=1) + assert output == [Document(page_content="foo", metadata={})] + + +@pytest.mark.requires("sqlite-vec") +def test_sqlitevec_with_score() -> None: + """Test end to end construction and search with scores and IDs.""" + texts = ["foo", "bar", "baz"] + metadatas = [{"page": i} for i in range(len(texts))] + docsearch = _sqlite_vec_from_texts(metadatas=metadatas) + output = docsearch.similarity_search_with_score("foo", k=3) + docs = [o[0] for o in output] + distances = [o[1] for o in output] + assert docs == [ + Document(page_content="foo", metadata={"page": 0}), + Document(page_content="bar", metadata={"page": 1}), + Document(page_content="baz", metadata={"page": 2}), + ] + assert distances[0] < distances[1] < distances[2] + + +@pytest.mark.requires("sqlite-vec") +def test_sqlitevec_add_extra() -> None: + """Test end to end construction and MRR search.""" + texts = ["foo", "bar", "baz"] + metadatas = [{"page": i} for i in range(len(texts))] + docsearch = _sqlite_vec_from_texts(metadatas=metadatas) + docsearch.add_texts(texts, metadatas) + output = docsearch.similarity_search("foo", k=10) + assert len(output) == 6 diff --git a/libs/community/tests/unit_tests/vectorstores/test_imports.py b/libs/community/tests/unit_tests/vectorstores/test_imports.py index 2a59b0ebc7c3f..5ac0ca72b49c5 100644 --- a/libs/community/tests/unit_tests/vectorstores/test_imports.py +++ b/libs/community/tests/unit_tests/vectorstores/test_imports.py @@ -76,6 +76,7 @@ "Relyt", "Rockset", "SKLearnVectorStore", + "SQLiteVec", "SQLiteVSS", "ScaNN", "SemaDB", diff --git a/libs/langchain/langchain/vectorstores/__init__.py b/libs/langchain/langchain/vectorstores/__init__.py index 603421aad0e08..c2cb3e44491d5 100644 --- a/libs/langchain/langchain/vectorstores/__init__.py +++ b/libs/langchain/langchain/vectorstores/__init__.py @@ -80,6 +80,7 @@ SemaDB, SingleStoreDB, SKLearnVectorStore, + SQLiteVec, SQLiteVSS, StarRocks, SupabaseVectorStore, @@ -158,6 +159,7 @@ "SemaDB": "langchain_community.vectorstores", "SingleStoreDB": "langchain_community.vectorstores", "SKLearnVectorStore": "langchain_community.vectorstores", + "SQLiteVec": "langchain_community.vectorstores", "SQLiteVSS": "langchain_community.vectorstores", "StarRocks": "langchain_community.vectorstores", "SupabaseVectorStore": "langchain_community.vectorstores", @@ -240,6 +242,7 @@ def __getattr__(name: str) -> Any: "SemaDB", "SingleStoreDB", "SKLearnVectorStore", + "SQLiteVec", "SQLiteVSS", "StarRocks", "SupabaseVectorStore", diff --git a/libs/langchain/tests/unit_tests/vectorstores/test_public_api.py b/libs/langchain/tests/unit_tests/vectorstores/test_public_api.py index 861fa5463f47e..bfac8c632d31f 100644 --- a/libs/langchain/tests/unit_tests/vectorstores/test_public_api.py +++ b/libs/langchain/tests/unit_tests/vectorstores/test_public_api.py @@ -56,6 +56,7 @@ "SemaDB", "SingleStoreDB", "SKLearnVectorStore", + "SQLiteVec", "SQLiteVSS", "StarRocks", "SupabaseVectorStore", From aabc1fccfb4348862fb88374cf5f9c53aff73025 Mon Sep 17 00:00:00 2001 From: Erick Friis Date: Wed, 28 Aug 2024 09:45:19 -0700 Subject: [PATCH 2/6] x --- .../migrate/codemods/migrations/langchain_to_community.json | 4 ---- libs/langchain/langchain/vectorstores/__init__.py | 3 --- .../tests/unit_tests/vectorstores/test_public_api.py | 1 - 3 files changed, 8 deletions(-) diff --git a/libs/cli/langchain_cli/namespaces/migrate/codemods/migrations/langchain_to_community.json b/libs/cli/langchain_cli/namespaces/migrate/codemods/migrations/langchain_to_community.json index 1a5c4eef5a15b..e6ea9e0f40f0a 100644 --- a/libs/cli/langchain_cli/namespaces/migrate/codemods/migrations/langchain_to_community.json +++ b/libs/cli/langchain_cli/namespaces/migrate/codemods/migrations/langchain_to_community.json @@ -7079,10 +7079,6 @@ "langchain.vectorstores.SingleStoreDB", "langchain_community.vectorstores.SingleStoreDB" ], - [ - "langchain.vectorstores.SQLiteVec", - "langchain_community.vectorstores.SQLiteVec" - ], [ "langchain.vectorstores.SQLiteVSS", "langchain_community.vectorstores.SQLiteVSS" diff --git a/libs/langchain/langchain/vectorstores/__init__.py b/libs/langchain/langchain/vectorstores/__init__.py index c2cb3e44491d5..603421aad0e08 100644 --- a/libs/langchain/langchain/vectorstores/__init__.py +++ b/libs/langchain/langchain/vectorstores/__init__.py @@ -80,7 +80,6 @@ SemaDB, SingleStoreDB, SKLearnVectorStore, - SQLiteVec, SQLiteVSS, StarRocks, SupabaseVectorStore, @@ -159,7 +158,6 @@ "SemaDB": "langchain_community.vectorstores", "SingleStoreDB": "langchain_community.vectorstores", "SKLearnVectorStore": "langchain_community.vectorstores", - "SQLiteVec": "langchain_community.vectorstores", "SQLiteVSS": "langchain_community.vectorstores", "StarRocks": "langchain_community.vectorstores", "SupabaseVectorStore": "langchain_community.vectorstores", @@ -242,7 +240,6 @@ def __getattr__(name: str) -> Any: "SemaDB", "SingleStoreDB", "SKLearnVectorStore", - "SQLiteVec", "SQLiteVSS", "StarRocks", "SupabaseVectorStore", diff --git a/libs/langchain/tests/unit_tests/vectorstores/test_public_api.py b/libs/langchain/tests/unit_tests/vectorstores/test_public_api.py index bfac8c632d31f..861fa5463f47e 100644 --- a/libs/langchain/tests/unit_tests/vectorstores/test_public_api.py +++ b/libs/langchain/tests/unit_tests/vectorstores/test_public_api.py @@ -56,7 +56,6 @@ "SemaDB", "SingleStoreDB", "SKLearnVectorStore", - "SQLiteVec", "SQLiteVSS", "StarRocks", "SupabaseVectorStore", From e25b7295cf69b111a969cc05a1b9c3e236d818dd Mon Sep 17 00:00:00 2001 From: philippe-oger Date: Sat, 31 Aug 2024 21:32:38 +0100 Subject: [PATCH 3/6] Add the expected headers for the notebook example --- .../integrations/vectorstores/sqlitevec.ipynb | 187 ++++++++++++++---- 1 file changed, 146 insertions(+), 41 deletions(-) diff --git a/docs/docs/integrations/vectorstores/sqlitevec.ipynb b/docs/docs/integrations/vectorstores/sqlitevec.ipynb index c47f623b26c7f..2bb91380a2e56 100644 --- a/docs/docs/integrations/vectorstores/sqlitevec.ipynb +++ b/docs/docs/integrations/vectorstores/sqlitevec.ipynb @@ -1,5 +1,14 @@ { "cells": [ + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "---\n", + "sidebar_label: SQLiteVec\n", + "---" + ] + }, { "cell_type": "markdown", "metadata": { @@ -9,15 +18,23 @@ } }, "source": [ - "# SQLite-Vec\n", + "# SQLite as a Vector Store with SQLiteVec\n", "\n", - ">[SQLite-Vec](https://alexgarcia.xyz/sqlite-vec/) is an `SQLite` extension designed for vector search, emphasizing local-first operations and easy integration into applications without external servers. It is the successor to [SQLite-VSS](https://alexgarcia.xyz/sqlite-vss/) by the same author. It is written in zero-dependency C and designed to be easy to build and use.\n", + "This notebook covers how to get started with the SQLiteVec vector store.\n", "\n", - "You'll need to install `langchain-community` with `pip install -qU langchain-community` to use this integration\n", + ">[SQLite-Vec](https://alexgarcia.xyz/sqlite-vec/) is an `SQLite` extension designed for vector search, emphasizing local-first operations and easy integration into applications without external servers. It is the successor to [SQLite-VSS](https://alexgarcia.xyz/sqlite-vss/) by the same author. It is written in zero-dependency C and designed to be easy to build and use.\n", "\n", "This notebook shows how to use the `SQLiteVec` vector database." ] }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "## Setup\n", + "You'll need to install `langchain-community` with `pip install -qU langchain-community` to use this integration" + ] + }, { "cell_type": "code", "execution_count": null, @@ -29,10 +46,100 @@ }, "outputs": [], "source": [ - "# You need to install sqlite-vss as a dependency.\n", + "# You need to install sqlite-vec as a dependency.\n", "%pip install --upgrade --quiet sqlite-vec" ] }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "### Credentials\n", + "SQLiteVec does not require any credentials to use as the vector store is a simple SQLite file." + ] + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "## Initialization" + }, + { + "metadata": { + "jupyter": { + "is_executing": true + } + }, + "cell_type": "code", + "source": [ + "from langchain_community.vectorstores import SQLiteVec\n", + "from langchain_community.embeddings.sentence_transformer import (\n", + " SentenceTransformerEmbeddings,\n", + ")\n", + "\n", + "embedding_function = SentenceTransformerEmbeddings(model_name=\"all-MiniLM-L6-v2\")\n", + "vector_store = SQLiteVec(table=\"state_union\", db_file=\"/tmp/vec.db\", embedding=embedding_function)" + ], + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "## Manage vector store" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "### Add items to vector store" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": "vector_store.add_texts(texts=[\"Ketanji Brown Jackson is awesome\", \"foo\", \"bar\"])" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "### Update items in vector store\n", + "Not supported yet" + ] + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "### Delete items from vector store\n", + "Not supported yet" + ] + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "## Query vector store" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "### Query directly" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": "data = vector_store.similarity_search(\"Ketanji Brown Jackson\", k=4)" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "### Query by turning into retriever\n", + "Not supported yet" + ] + }, { "cell_type": "markdown", "metadata": { @@ -42,9 +149,42 @@ } }, "source": [ - "## Quickstart" + "- ## Setup\n", + "- ### Credentials\n", + "- ## Initialization\n", + "- ## Manage vector store\n", + "- ### Add items to vector store\n", + "- ### Update items in vector store\n", + "- ### Delete items from vector store\n", + "- ## Query vector store\n", + "- ### Query directly\n", + "- ### Query by turning into retriever\n", + "- ## Usage for retrieval-augmented generation\n", + "- ## API reference" ] }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "## API reference\n", + "\n", + "For detailed documentation of all SQLiteVec features and configurations head to the API reference:https://api.python.langchain.com/en/latest/vectorstores/langchain_community.vectorstores.sqlitevec.SQLiteVec.html" + ] + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "## Usage for retrieval-augmented generation\n", + "Refer to the documentation on sqlite-vec at https://alexgarcia.xyz/sqlite-vec/ for more information on how to use it for retrieval-augmented generation." + ] + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "### Other examples" + }, { "cell_type": "code", "execution_count": null, @@ -118,9 +258,7 @@ "outputs_hidden": false } }, - "source": [ - "## Using existing SQLite connection" - ] + "source": "### Example using existing SQLite connection" }, { "cell_type": "code", @@ -181,39 +319,6 @@ "# print results\n", "data[0].page_content" ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": { - "ExecuteTime": { - "end_time": "2023-09-06T15:01:15.550318Z", - "start_time": "2023-09-06T15:01:15.546428Z" - }, - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } - }, - "outputs": [], - "source": [ - "# Cleaning up\n", - "import os\n", - "\n", - "os.remove(\"/tmp/vec.db\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } - }, - "outputs": [], - "source": [] } ], "metadata": { From c4cefaee812a9e9607a21b02920c0a60e3f620e5 Mon Sep 17 00:00:00 2001 From: philippe-oger Date: Sat, 31 Aug 2024 21:35:11 +0100 Subject: [PATCH 4/6] Remove a cell from notebook --- .../integrations/vectorstores/sqlitevec.ipynb | 23 ------------------- 1 file changed, 23 deletions(-) diff --git a/docs/docs/integrations/vectorstores/sqlitevec.ipynb b/docs/docs/integrations/vectorstores/sqlitevec.ipynb index 2bb91380a2e56..6ef139bbc4523 100644 --- a/docs/docs/integrations/vectorstores/sqlitevec.ipynb +++ b/docs/docs/integrations/vectorstores/sqlitevec.ipynb @@ -140,29 +140,6 @@ "Not supported yet" ] }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false, - "jupyter": { - "outputs_hidden": false - } - }, - "source": [ - "- ## Setup\n", - "- ### Credentials\n", - "- ## Initialization\n", - "- ## Manage vector store\n", - "- ### Add items to vector store\n", - "- ### Update items in vector store\n", - "- ### Delete items from vector store\n", - "- ## Query vector store\n", - "- ### Query directly\n", - "- ### Query by turning into retriever\n", - "- ## Usage for retrieval-augmented generation\n", - "- ## API reference" - ] - }, { "metadata": {}, "cell_type": "markdown", From 4b38ddae6576b4a95349f97c7bfb710ea7367cd1 Mon Sep 17 00:00:00 2001 From: philippe-oger Date: Sun, 1 Sep 2024 22:04:07 +0100 Subject: [PATCH 5/6] Add API reference header again --- docs/docs/integrations/vectorstores/sqlitevec.ipynb | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/docs/integrations/vectorstores/sqlitevec.ipynb b/docs/docs/integrations/vectorstores/sqlitevec.ipynb index 6ef139bbc4523..654ee88d1275f 100644 --- a/docs/docs/integrations/vectorstores/sqlitevec.ipynb +++ b/docs/docs/integrations/vectorstores/sqlitevec.ipynb @@ -145,7 +145,6 @@ "cell_type": "markdown", "source": [ "## API reference\n", - "\n", "For detailed documentation of all SQLiteVec features and configurations head to the API reference:https://api.python.langchain.com/en/latest/vectorstores/langchain_community.vectorstores.sqlitevec.SQLiteVec.html" ] }, From d2ea34a985f6fc1d3019d42388c67c72e03b90c9 Mon Sep 17 00:00:00 2001 From: philippe-oger Date: Sun, 1 Sep 2024 22:14:38 +0100 Subject: [PATCH 6/6] Formatting and reorder headers --- .../docs/integrations/vectorstores/sqlitevec.ipynb | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/docs/docs/integrations/vectorstores/sqlitevec.ipynb b/docs/docs/integrations/vectorstores/sqlitevec.ipynb index 654ee88d1275f..33eb5b854d502 100644 --- a/docs/docs/integrations/vectorstores/sqlitevec.ipynb +++ b/docs/docs/integrations/vectorstores/sqlitevec.ipynb @@ -71,13 +71,15 @@ }, "cell_type": "code", "source": [ - "from langchain_community.vectorstores import SQLiteVec\n", "from langchain_community.embeddings.sentence_transformer import (\n", " SentenceTransformerEmbeddings,\n", ")\n", + "from langchain_community.vectorstores import SQLiteVec\n", "\n", "embedding_function = SentenceTransformerEmbeddings(model_name=\"all-MiniLM-L6-v2\")\n", - "vector_store = SQLiteVec(table=\"state_union\", db_file=\"/tmp/vec.db\", embedding=embedding_function)" + "vector_store = SQLiteVec(\n", + " table=\"state_union\", db_file=\"/tmp/vec.db\", embedding=embedding_function\n", + ")" ], "outputs": [], "execution_count": null @@ -144,16 +146,16 @@ "metadata": {}, "cell_type": "markdown", "source": [ - "## API reference\n", - "For detailed documentation of all SQLiteVec features and configurations head to the API reference:https://api.python.langchain.com/en/latest/vectorstores/langchain_community.vectorstores.sqlitevec.SQLiteVec.html" + "## Usage for retrieval-augmented generation\n", + "Refer to the documentation on sqlite-vec at https://alexgarcia.xyz/sqlite-vec/ for more information on how to use it for retrieval-augmented generation." ] }, { "metadata": {}, "cell_type": "markdown", "source": [ - "## Usage for retrieval-augmented generation\n", - "Refer to the documentation on sqlite-vec at https://alexgarcia.xyz/sqlite-vec/ for more information on how to use it for retrieval-augmented generation." + "## API reference\n", + "For detailed documentation of all SQLiteVec features and configurations head to the API reference:https://api.python.langchain.com/en/latest/vectorstores/langchain_community.vectorstores.sqlitevec.SQLiteVec.html" ] }, {