From 11c00d43fd1b2c5caf4d49f705bd55c704edae8a Mon Sep 17 00:00:00 2001 From: Hao Xu Date: Sat, 28 Sep 2024 01:58:48 -0700 Subject: [PATCH] fix: Fix vector store config (#4583) --- docs/reference/alpha-vector-database.md | 2 +- docs/reference/online-stores/postgres.md | 4 ++-- .../infra/online_stores/contrib/elasticsearch.py | 10 ++-------- .../infra/online_stores/contrib/postgres.py | 15 +++++---------- sdk/python/feast/infra/online_stores/sqlite.py | 15 +++++---------- .../feast/infra/online_stores/vector_store.py | 16 ++++++++++++++++ .../universal/online_store/postgres.py | 2 +- .../unit/online_store/test_online_retrieval.py | 2 +- 8 files changed, 33 insertions(+), 33 deletions(-) create mode 100644 sdk/python/feast/infra/online_stores/vector_store.py diff --git a/docs/reference/alpha-vector-database.md b/docs/reference/alpha-vector-database.md index b9ce7f408a..06909bd565 100644 --- a/docs/reference/alpha-vector-database.md +++ b/docs/reference/alpha-vector-database.md @@ -40,7 +40,7 @@ registry: path: postgresql://@localhost:5432/feast online_store: type: postgres - pgvector_enabled: true + vector_enabled: true vector_len: 384 host: 127.0.0.1 port: 5432 diff --git a/docs/reference/online-stores/postgres.md b/docs/reference/online-stores/postgres.md index 77a9408d2b..e4e2173ccd 100644 --- a/docs/reference/online-stores/postgres.md +++ b/docs/reference/online-stores/postgres.md @@ -30,7 +30,7 @@ online_store: sslkey_path: /path/to/client-key.pem sslcert_path: /path/to/client-cert.pem sslrootcert_path: /path/to/server-ca.pem - pgvector_enabled: false + vector_enabled: false vector_len: 512 ``` {% endcode %} @@ -65,7 +65,7 @@ To compare this set of functionality against other online stores, please see the ## PGVector The Postgres online store supports the use of [PGVector](https://github.com/pgvector/pgvector) for storing feature values. -To enable PGVector, set `pgvector_enabled: true` in the online store configuration. +To enable PGVector, set `vector_enabled: true` in the online store configuration. The `vector_len` parameter can be used to specify the length of the vector. The default value is 512. diff --git a/sdk/python/feast/infra/online_stores/contrib/elasticsearch.py b/sdk/python/feast/infra/online_stores/contrib/elasticsearch.py index a0c25b931a..0152ca330c 100644 --- a/sdk/python/feast/infra/online_stores/contrib/elasticsearch.py +++ b/sdk/python/feast/infra/online_stores/contrib/elasticsearch.py @@ -14,13 +14,14 @@ serialize_entity_key, ) from feast.infra.online_stores.online_store import OnlineStore +from feast.infra.online_stores.vector_store import VectorStoreConfig from feast.protos.feast.types.EntityKey_pb2 import EntityKey as EntityKeyProto from feast.protos.feast.types.Value_pb2 import Value as ValueProto from feast.repo_config import FeastConfigBaseModel from feast.utils import _build_retrieve_online_document_record, to_naive_utc -class ElasticSearchOnlineStoreConfig(FeastConfigBaseModel): +class ElasticSearchOnlineStoreConfig(FeastConfigBaseModel, VectorStoreConfig): """ Configuration for the ElasticSearch online store. NOTE: The class *must* end with the `OnlineStoreConfig` suffix. @@ -38,13 +39,6 @@ class ElasticSearchOnlineStoreConfig(FeastConfigBaseModel): # The number of rows to write in a single batch write_batch_size: Optional[int] = 40 - # The length of the vector value - vector_len: Optional[int] = 512 - - # The vector similarity metric to use in KNN search - # more details: https://www.elastic.co/guide/en/elasticsearch/reference/current/dense-vector.html - similarity: Optional[str] = "cosine" - class ElasticSearchOnlineStore(OnlineStore): _client: Optional[Elasticsearch] = None diff --git a/sdk/python/feast/infra/online_stores/contrib/postgres.py b/sdk/python/feast/infra/online_stores/contrib/postgres.py index 036c0b6b92..7c099c80ec 100644 --- a/sdk/python/feast/infra/online_stores/contrib/postgres.py +++ b/sdk/python/feast/infra/online_stores/contrib/postgres.py @@ -25,6 +25,7 @@ from feast.infra.key_encoding_utils import get_list_val_str, serialize_entity_key from feast.infra.online_stores.helpers import _to_naive_utc from feast.infra.online_stores.online_store import OnlineStore +from feast.infra.online_stores.vector_store import VectorStoreConfig from feast.infra.utils.postgres.connection_utils import ( _get_conn, _get_conn_async, @@ -45,15 +46,9 @@ } -class PostgreSQLOnlineStoreConfig(PostgreSQLConfig): +class PostgreSQLOnlineStoreConfig(PostgreSQLConfig, VectorStoreConfig): type: Literal["postgres"] = "postgres" - # Whether to enable the pgvector extension for vector similarity search - pgvector_enabled: Optional[bool] = False - - # If pgvector is enabled, the length of the vector field - vector_len: Optional[int] = 512 - class PostgreSQLOnlineStore(OnlineStore): _conn: Optional[Connection] = None @@ -118,7 +113,7 @@ def online_write_batch( for feature_name, val in values.items(): vector_val = None - if config.online_store.pgvector_enabled: + if config.online_store.vector_enabled: vector_val = get_list_val_str(val) insert_values.append( ( @@ -302,7 +297,7 @@ def update( for table in tables_to_keep: table_name = _table_id(project, table) - if config.online_store.pgvector_enabled: + if config.online_store.vector_enabled: vector_value_type = f"vector({config.online_store.vector_len})" else: # keep the vector_value_type as BYTEA if pgvector is not enabled, to maintain compatibility @@ -380,7 +375,7 @@ def retrieve_online_documents( """ project = config.project - if not config.online_store.pgvector_enabled: + if not config.online_store.vector_enabled: raise ValueError( "pgvector is not enabled in the online store configuration" ) diff --git a/sdk/python/feast/infra/online_stores/sqlite.py b/sdk/python/feast/infra/online_stores/sqlite.py index 061a766b8c..1b79b1a94b 100644 --- a/sdk/python/feast/infra/online_stores/sqlite.py +++ b/sdk/python/feast/infra/online_stores/sqlite.py @@ -29,6 +29,7 @@ from feast.infra.infra_object import SQLITE_INFRA_OBJECT_CLASS_TYPE, InfraObject from feast.infra.key_encoding_utils import serialize_entity_key from feast.infra.online_stores.online_store import OnlineStore +from feast.infra.online_stores.vector_store import VectorStoreConfig from feast.protos.feast.core.InfraObject_pb2 import InfraObject as InfraObjectProto from feast.protos.feast.core.Registry_pb2 import Registry as RegistryProto from feast.protos.feast.core.SqliteTable_pb2 import SqliteTable as SqliteTableProto @@ -38,7 +39,7 @@ from feast.utils import _build_retrieve_online_document_record, to_naive_utc -class SqliteOnlineStoreConfig(FeastConfigBaseModel): +class SqliteOnlineStoreConfig(FeastConfigBaseModel, VectorStoreConfig): """Online store config for local (SQLite-based) store""" type: Literal["sqlite", "feast.infra.online_stores.sqlite.SqliteOnlineStore"] = ( @@ -49,12 +50,6 @@ class SqliteOnlineStoreConfig(FeastConfigBaseModel): path: StrictStr = "data/online.db" """ (optional) Path to sqlite db """ - vec_enabled: Optional[bool] = False - """ (optional) Enable or disable sqlite-vss for vector search""" - - vector_len: Optional[int] = 512 - """ (optional) Length of the vector to be stored in the database""" - class SqliteOnlineStore(OnlineStore): """ @@ -83,7 +78,7 @@ def _get_conn(self, config: RepoConfig): if not self._conn: db_path = self._get_db_path(config) self._conn = _initialize_conn(db_path) - if sys.version_info[0:2] == (3, 10) and config.online_store.vec_enabled: + if sys.version_info[0:2] == (3, 10) and config.online_store.vector_enabled: import sqlite_vec # noqa: F401 self._conn.enable_load_extension(True) # type: ignore @@ -121,7 +116,7 @@ def online_write_batch( table_name = _table_id(project, table) for feature_name, val in values.items(): - if config.online_store.vec_enabled: + if config.online_store.vector_enabled: vector_bin = serialize_f32( val.float_list_val.val, config.online_store.vector_len ) # type: ignore @@ -321,7 +316,7 @@ def retrieve_online_documents( """ project = config.project - if not config.online_store.vec_enabled: + if not config.online_store.vector_enabled: raise ValueError("sqlite-vss is not enabled in the online store config") conn = self._get_conn(config) diff --git a/sdk/python/feast/infra/online_stores/vector_store.py b/sdk/python/feast/infra/online_stores/vector_store.py new file mode 100644 index 0000000000..051f9bcaed --- /dev/null +++ b/sdk/python/feast/infra/online_stores/vector_store.py @@ -0,0 +1,16 @@ +from typing import Optional + + +class VectorStoreConfig: + # Whether to enable the online store for vector similarity search, + # This is only applicable for online store. + vector_enabled: Optional[bool] = False + + # If vector is enabled, the length of the vector field + vector_len: Optional[int] = 512 + + # The vector similarity metric to use in KNN search + # It is helpful for vector database that does not support config at retrieval runtime + # E.g. Elasticsearch dense_vector field at + # https://www.elastic.co/guide/en/elasticsearch/reference/current/dense-vector.html + similarity: Optional[str] = "cosine" diff --git a/sdk/python/tests/integration/feature_repos/universal/online_store/postgres.py b/sdk/python/tests/integration/feature_repos/universal/online_store/postgres.py index 622ee99e14..7ff72a48a3 100644 --- a/sdk/python/tests/integration/feature_repos/universal/online_store/postgres.py +++ b/sdk/python/tests/integration/feature_repos/universal/online_store/postgres.py @@ -67,7 +67,7 @@ def create_online_store(self) -> Dict[str, Any]: "user": "root", "password": "test!@#$%", "database": "test", - "pgvector_enabled": True, + "vector_enabled": True, "vector_len": 2, "port": self.container.get_exposed_port(5432), } diff --git a/sdk/python/tests/unit/online_store/test_online_retrieval.py b/sdk/python/tests/unit/online_store/test_online_retrieval.py index 0a4880164f..83184643f3 100644 --- a/sdk/python/tests/unit/online_store/test_online_retrieval.py +++ b/sdk/python/tests/unit/online_store/test_online_retrieval.py @@ -441,7 +441,7 @@ def test_sqlite_get_online_documents() -> None: with runner.local_repo( get_example_repo("example_feature_repo_1.py"), "file" ) as store: - store.config.online_store.vec_enabled = True + store.config.online_store.vector_enabled = True store.config.online_store.vector_len = vector_length # Write some data to two tables document_embeddings_fv = store.get_feature_view(name="document_embeddings")