diff --git a/chromadb/api/fastapi.py b/chromadb/api/fastapi.py index d3d1a8a4e7e..1ee7a45af54 100644 --- a/chromadb/api/fastapi.py +++ b/chromadb/api/fastapi.py @@ -138,6 +138,8 @@ def __init__(self, system: System): self._session = requests.Session() if self._header is not None: self._session.headers.update(self._header) + if self._settings.chroma_server_ssl_verify is not None: + self._session.verify = self._settings.chroma_server_ssl_verify @trace_method("FastAPI.heartbeat", OpenTelemetryGranularity.OPERATION) @override diff --git a/chromadb/config.py b/chromadb/config.py index 61b789d0eee..e9ceffc5dd0 100644 --- a/chromadb/config.py +++ b/chromadb/config.py @@ -4,7 +4,7 @@ import os from abc import ABC from graphlib import TopologicalSorter -from typing import Optional, List, Any, Dict, Set, Iterable +from typing import Optional, List, Any, Dict, Set, Iterable, Union from typing import Type, TypeVar, cast from overrides import EnforceOverrides @@ -122,6 +122,8 @@ class Settings(BaseSettings): # type: ignore chroma_server_headers: Optional[Dict[str, str]] = None chroma_server_http_port: Optional[str] = None chroma_server_ssl_enabled: Optional[bool] = False + # the below config value is only applicable to Chroma HTTP clients + chroma_server_ssl_verify: Optional[Union[bool, str]] = None chroma_server_api_default_path: Optional[str] = "/api/v1" chroma_server_grpc_port: Optional[str] = None # eg ["http://localhost:3000"] diff --git a/chromadb/test/conftest.py b/chromadb/test/conftest.py index 087cb2271bd..34a1b040dd1 100644 --- a/chromadb/test/conftest.py +++ b/chromadb/test/conftest.py @@ -3,6 +3,7 @@ import os import shutil import socket +import subprocess import tempfile import time from typing import ( @@ -47,7 +48,6 @@ ) hypothesis.settings.load_profile(os.getenv("HYPOTHESIS_PROFILE", "dev")) - NOT_CLUSTER_ONLY = os.getenv("CHROMA_CLUSTER_TEST_ONLY") != "1" @@ -58,6 +58,35 @@ def skip_if_not_cluster() -> pytest.MarkDecorator: ) +def generate_self_signed_certificate() -> None: + config_path = os.path.join( + os.path.dirname(os.path.abspath(__file__)), "openssl.cnf" + ) + print(f"Config path: {config_path}") # Debug print to verify path + if not os.path.exists(config_path): + raise FileNotFoundError(f"Config file not found at {config_path}") + subprocess.run( + [ + "openssl", + "req", + "-x509", + "-newkey", + "rsa:4096", + "-keyout", + "serverkey.pem", + "-out", + "servercert.pem", + "-days", + "365", + "-nodes", + "-subj", + "/CN=localhost", + "-config", + config_path, + ] + ) + + def find_free_port() -> int: with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: s.bind(("", 0)) @@ -77,6 +106,8 @@ def _run_server( chroma_server_authz_provider: Optional[str] = None, chroma_server_authz_config_file: Optional[str] = None, chroma_server_authz_config: Optional[Dict[str, Any]] = None, + chroma_server_ssl_certfile: Optional[str] = None, + chroma_server_ssl_keyfile: Optional[str] = None, ) -> None: """Run a Chroma server locally""" if is_persistent and persist_directory: @@ -123,6 +154,8 @@ def _run_server( port=port, log_level="error", timeout_keep_alive=30, + ssl_keyfile=chroma_server_ssl_keyfile, + ssl_certfile=chroma_server_ssl_certfile, ) @@ -152,6 +185,8 @@ def _fastapi_fixture( chroma_server_authz_provider: Optional[str] = None, chroma_server_authz_config_file: Optional[str] = None, chroma_server_authz_config: Optional[Dict[str, Any]] = None, + chroma_server_ssl_certfile: Optional[str] = None, + chroma_server_ssl_keyfile: Optional[str] = None, ) -> Generator[System, None, None]: """Fixture generator that launches a server in a separate process, and yields a fastapi client connect to it""" @@ -171,6 +206,8 @@ def _fastapi_fixture( Optional[str], Optional[str], Optional[Dict[str, Any]], + Optional[str], + Optional[str], ] = ( port, False, @@ -183,6 +220,8 @@ def _fastapi_fixture( chroma_server_authz_provider, chroma_server_authz_config_file, chroma_server_authz_config, + chroma_server_ssl_certfile, + chroma_server_ssl_keyfile, ) persist_directory = None if is_persistent: @@ -199,6 +238,8 @@ def _fastapi_fixture( chroma_server_authz_provider, chroma_server_authz_config_file, chroma_server_authz_config, + chroma_server_ssl_certfile, + chroma_server_ssl_keyfile, ) proc = ctx.Process(target=_run_server, args=args, daemon=True) proc.start() @@ -210,6 +251,8 @@ def _fastapi_fixture( chroma_client_auth_provider=chroma_client_auth_provider, chroma_client_auth_credentials=chroma_client_auth_credentials, chroma_client_auth_token_transport_header=chroma_client_auth_token_transport_header, + chroma_server_ssl_verify=chroma_server_ssl_certfile, + chroma_server_ssl_enabled=True if chroma_server_ssl_certfile else False, ) system = System(settings) api = system.instance(ServerAPI) @@ -231,6 +274,15 @@ def fastapi_persistent() -> Generator[System, None, None]: return _fastapi_fixture(is_persistent=True) +def fastapi_ssl() -> Generator[System, None, None]: + generate_self_signed_certificate() + return _fastapi_fixture( + is_persistent=False, + chroma_server_ssl_certfile="./servercert.pem", + chroma_server_ssl_keyfile="./serverkey.pem", + ) + + def basic_http_client() -> Generator[System, None, None]: settings = Settings( chroma_api_impl="chromadb.api.fastapi.FastAPI", @@ -400,6 +452,11 @@ def system_fixtures_wrong_auth() -> List[Callable[[], Generator[System, None, No return fixtures +def system_fixtures_ssl() -> List[Callable[[], Generator[System, None, None]]]: + fixtures = [fastapi_ssl] + return fixtures + + @pytest.fixture(scope="module", params=system_fixtures_wrong_auth()) def system_wrong_auth( request: pytest.FixtureRequest, @@ -412,6 +469,11 @@ def system(request: pytest.FixtureRequest) -> Generator[ServerAPI, None, None]: yield next(request.param()) +@pytest.fixture(scope="module", params=system_fixtures_ssl()) +def system_ssl(request: pytest.FixtureRequest) -> Generator[ServerAPI, None, None]: + yield next(request.param()) + + @pytest.fixture(scope="module", params=system_fixtures_auth()) def system_auth(request: pytest.FixtureRequest) -> Generator[ServerAPI, None, None]: yield next(request.param()) @@ -432,6 +494,14 @@ def client(system: System) -> Generator[ClientAPI, None, None]: client.clear_system_cache() +@pytest.fixture(scope="function") +def client_ssl(system_ssl: System) -> Generator[ClientAPI, None, None]: + system_ssl.reset_state() + client = ClientCreator.from_system(system_ssl) + yield client + client.clear_system_cache() + + @pytest.fixture(scope="function") def api_wrong_cred( system_wrong_auth: System, diff --git a/chromadb/test/openssl.cnf b/chromadb/test/openssl.cnf new file mode 100644 index 00000000000..11704076bd4 --- /dev/null +++ b/chromadb/test/openssl.cnf @@ -0,0 +1,12 @@ +[req] +distinguished_name = req_distinguished_name +x509_extensions = usr_cert + +[req_distinguished_name] +CN = localhost + +[usr_cert] +subjectAltName = @alt_names + +[alt_names] +DNS.1 = localhost \ No newline at end of file diff --git a/chromadb/test/test_api.py b/chromadb/test/test_api.py index 36a82205e45..cb88ed2bb77 100644 --- a/chromadb/test/test_api.py +++ b/chromadb/test/test_api.py @@ -1,5 +1,7 @@ # type: ignore +import traceback import requests +from urllib3.connectionpool import InsecureRequestWarning import chromadb from chromadb.api.fastapi import FastAPI @@ -360,6 +362,7 @@ def test_modify_error_on_existing_name(api): with pytest.raises(Exception): c2.modify(name="testspace") + def test_modify_warn_on_DF_change(api, caplog): api.reset() @@ -368,6 +371,7 @@ def test_modify_warn_on_DF_change(api, caplog): with pytest.raises(Exception, match="not supported") as e: collection.modify(metadata={"hnsw:space": "cosine"}) + def test_metadata_cru(api): api.reset() metadata_a = {"a": 1, "b": 2} @@ -1437,6 +1441,7 @@ def test_invalid_embeddings(api): # test to make sure update shows exception for bad dimensionality + def test_dimensionality_exception_update(api): api.reset() collection = api.create_collection("test_dimensionality_update_exception") @@ -1446,8 +1451,10 @@ def test_dimensionality_exception_update(api): collection.update(**bad_dimensionality_records) assert "dimensionality" in str(e.value) + # test to make sure upsert shows exception for bad dimensionality + def test_dimensionality_exception_upsert(api): api.reset() collection = api.create_collection("test_dimensionality_upsert_exception") @@ -1456,3 +1463,39 @@ def test_dimensionality_exception_upsert(api): with pytest.raises(Exception) as e: collection.upsert(**bad_dimensionality_records) assert "dimensionality" in str(e.value) + + +def test_ssl_self_signed(client_ssl): + if os.environ.get("CHROMA_INTEGRATION_TEST_ONLY"): + pytest.skip("Skipping test for integration test") + client_ssl.heartbeat() + + +def test_ssl_self_signed_without_ssl_verify(client_ssl): + if os.environ.get("CHROMA_INTEGRATION_TEST_ONLY"): + pytest.skip("Skipping test for integration test") + client_ssl.heartbeat() + _port = client_ssl._server._settings.chroma_server_http_port + with pytest.raises(ValueError) as e: + chromadb.HttpClient(ssl=True, port=_port) + stack_trace = traceback.format_exception( + type(e.value), e.value, e.value.__traceback__ + ) + client_ssl.clear_system_cache() + assert "CERTIFICATE_VERIFY_FAILED" in "".join(stack_trace) + + +def test_ssl_self_signed_with_verify_false(client_ssl): + if os.environ.get("CHROMA_INTEGRATION_TEST_ONLY"): + pytest.skip("Skipping test for integration test") + client_ssl.heartbeat() + _port = client_ssl._server._settings.chroma_server_http_port + with pytest.warns(InsecureRequestWarning) as record: + client = chromadb.HttpClient( + ssl=True, + port=_port, + settings=chromadb.Settings(chroma_server_ssl_verify=False), + ) + client.heartbeat() + client_ssl.clear_system_cache() + assert "Unverified HTTPS request" in str(record[0].message) diff --git a/docs/cip/CIP-01022024_SSL_Verify_Client_Config.md b/docs/cip/CIP-01022024_SSL_Verify_Client_Config.md new file mode 100644 index 00000000000..2448af11c88 --- /dev/null +++ b/docs/cip/CIP-01022024_SSL_Verify_Client_Config.md @@ -0,0 +1,68 @@ +# CIP-01022024 SSL Verify Client Config + +## Status + +Current Status: `Under Discussion` + +## Motivation + +The motivation for this change is to enhance security and flexibility in Chroma's client API. Users need the ability to +configure SSL contexts to trust custom CA certificates or self-signed certificates, which is not straightforward with +the current setup. This capability is crucial for organizations that operate their own CA or for developers who need to +test their applications in environments where certificates from a recognized CA are not available or practical. + +The suggested change entails a server-side certificate be available, but this CIP does not prescribe how such +certificate should be configured or obtained. In our testing, we used a self-signed certificate generated with +`openssl` and configured the client to trust the certificate. We also experiment with a SSL-terminated proxy server. +Both of approaches yielded the same results. + +> **IMPORTANT:** It should be noted that we do not recommend or encourage the use of self-signed certificates in +> production environments. + +We also provide a sample notebook that to help the reader run a local Chroma server with a self-signed certificate and +configure the client to trust the certificate. The notebook can be found +in [assets/CIP-01022024-test_self_signed.ipynb](./assets/CIP-01022024-test_self_signed.ipynb). + +## Public Interfaces + +> **Note:** The following changes are only applicable to Chroma HttpClient. + +New settings variable `chroma_server_ssl_verify` accepting either a boolean or a path to a certificate file. If the +value is a path to a certificate file, the file will be used to verify the server's certificate. If the value is a +boolean, the SSL certificate verification can be bypassed (`false`) or enforced (`true`). + +The value is passed as `verify` parameter to `requests.Session` of the `FastAPI` client. See +requests [documentation](https://requests.readthedocs.io/en/latest/user/advanced/#ssl-cert-verification) for +more details. + +Example Usage: + +```python +import chromadb +from chromadb import Settings +client = chromadb.HttpClient(host="localhost",port="8443",ssl=True, settings=Settings(chroma_server_ssl_verify='./servercert.pem')) +# or with boolean +client = chromadb.HttpClient(host="localhost",port="8443",ssl=True, settings=Settings(chroma_server_ssl_verify=False)) +``` + +### Resources + +- https://requests.readthedocs.io/en/latest/api/#requests.request +- https://www.geeksforgeeks.org/ssl-certificate-verification-python-requests/ + +## Proposed Changes + +The proposed changes are mentioned in the public interfaces. + +## Compatibility, Deprecation, and Migration Plan + +The change is not backward compatible from client's perspective as the lack of the feature in prior clients will cause +an error when passing the new settings parameter. Server-side is not affected by this change. + +## Test Plan + +API tests with SSL verification enabled and a self-signed certificate. + +## Rejected Alternatives + +N/A diff --git a/docs/cip/assets/CIP-01022024-test_self_signed.ipynb b/docs/cip/assets/CIP-01022024-test_self_signed.ipynb new file mode 100644 index 00000000000..d607b51824b --- /dev/null +++ b/docs/cip/assets/CIP-01022024-test_self_signed.ipynb @@ -0,0 +1,119 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Generate a Certificate\n", + "\n", + "```bash\n", + "openssl req -new -newkey rsa:2048 -sha256 -days 365 -nodes -x509 \\\n", + " -keyout ./serverkey.pem \\\n", + " -out ./servercert.pem \\\n", + " -subj \"/O=Chroma/C=US\" \\\n", + " -config chromadb/test/openssl.cnf\n", + "```\n", + "\n", + "> Note: The above command should be executed at the root of the repo (openssl.cnf uses relative path)\n" + ], + "metadata": { + "collapsed": false + }, + "id": "faa8cefb6825fe83" + }, + { + "cell_type": "markdown", + "source": [ + "# Start the server\n", + "\n", + "```bash\n", + "uvicorn chromadb.app:app --workers 1 --host 0.0.0.0 --port 8443 \\\n", + " --proxy-headers --log-config chromadb/log_config.yml --ssl-keyfile ./serverkey.pem --ssl-certfile ./servercert.pem\n", + "```" + ], + "metadata": { + "collapsed": false + }, + "id": "e084285e11c3747d" + }, + { + "cell_type": "markdown", + "source": [ + "# Test with cert as SSL verify string" + ], + "metadata": { + "collapsed": false + }, + "id": "130df9c0a6d67b52" + }, + { + "cell_type": "code", + "execution_count": null, + "id": "initial_id", + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "from chromadb import Settings\n", + "import chromadb\n", + "client = chromadb.HttpClient(host=\"localhost\",port=\"8443\",ssl=True, settings=Settings(chroma_server_ssl_verify='./servercert.pem'))\n", + "print(client.heartbeat())" + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Test with cert as SSL verify boolean" + ], + "metadata": { + "collapsed": false + }, + "id": "8223d0100df06ec4" + }, + { + "cell_type": "code", + "outputs": [], + "source": [ + "from chromadb import Settings\n", + "import chromadb\n", + "client = chromadb.HttpClient(host=\"localhost\",port=\"8443\",ssl=True, settings=Settings(chroma_server_ssl_verify=False))\n", + "print(client.heartbeat())" + ], + "metadata": { + "collapsed": false + }, + "id": "f7cf299721741c1", + "execution_count": null + }, + { + "cell_type": "code", + "outputs": [], + "source": [], + "metadata": { + "collapsed": false + }, + "id": "6231ac2ac38383c2" + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}