Skip to content

Commit

Permalink
[ENH][SEC]: CIP-01022024 SSL Verify Client Config (#1604)
Browse files Browse the repository at this point in the history
## Description of changes

*Summarize the changes made by this PR.*
 - New functionality
- New CIP to introduce SSL verify flag to support custom PKIs or to
accept self-signed certs for testing and experimentation purposes

## Test plan
*How are these changes tested?*

- [x] Tests pass locally with `pytest` for python

## Documentation Changes
CIP document in the PR.
  • Loading branch information
tazarov authored Feb 7, 2024
1 parent efc16a2 commit a62cfb0
Show file tree
Hide file tree
Showing 7 changed files with 318 additions and 2 deletions.
2 changes: 2 additions & 0 deletions chromadb/api/fastapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,8 @@ def __init__(self, system: System):
self._session = requests.Session()
if self._header is not None:
self._session.headers.update(self._header)
if self._settings.chroma_server_ssl_verify is not None:
self._session.verify = self._settings.chroma_server_ssl_verify

@trace_method("FastAPI.heartbeat", OpenTelemetryGranularity.OPERATION)
@override
Expand Down
4 changes: 3 additions & 1 deletion chromadb/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import os
from abc import ABC
from graphlib import TopologicalSorter
from typing import Optional, List, Any, Dict, Set, Iterable
from typing import Optional, List, Any, Dict, Set, Iterable, Union
from typing import Type, TypeVar, cast

from overrides import EnforceOverrides
Expand Down Expand Up @@ -122,6 +122,8 @@ class Settings(BaseSettings): # type: ignore
chroma_server_headers: Optional[Dict[str, str]] = None
chroma_server_http_port: Optional[str] = None
chroma_server_ssl_enabled: Optional[bool] = False
# the below config value is only applicable to Chroma HTTP clients
chroma_server_ssl_verify: Optional[Union[bool, str]] = None
chroma_server_api_default_path: Optional[str] = "/api/v1"
chroma_server_grpc_port: Optional[str] = None
# eg ["http://localhost:3000"]
Expand Down
72 changes: 71 additions & 1 deletion chromadb/test/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import os
import shutil
import socket
import subprocess
import tempfile
import time
from typing import (
Expand Down Expand Up @@ -47,7 +48,6 @@
)
hypothesis.settings.load_profile(os.getenv("HYPOTHESIS_PROFILE", "dev"))


NOT_CLUSTER_ONLY = os.getenv("CHROMA_CLUSTER_TEST_ONLY") != "1"


Expand All @@ -58,6 +58,35 @@ def skip_if_not_cluster() -> pytest.MarkDecorator:
)


def generate_self_signed_certificate() -> None:
config_path = os.path.join(
os.path.dirname(os.path.abspath(__file__)), "openssl.cnf"
)
print(f"Config path: {config_path}") # Debug print to verify path
if not os.path.exists(config_path):
raise FileNotFoundError(f"Config file not found at {config_path}")
subprocess.run(
[
"openssl",
"req",
"-x509",
"-newkey",
"rsa:4096",
"-keyout",
"serverkey.pem",
"-out",
"servercert.pem",
"-days",
"365",
"-nodes",
"-subj",
"/CN=localhost",
"-config",
config_path,
]
)


def find_free_port() -> int:
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
s.bind(("", 0))
Expand All @@ -77,6 +106,8 @@ def _run_server(
chroma_server_authz_provider: Optional[str] = None,
chroma_server_authz_config_file: Optional[str] = None,
chroma_server_authz_config: Optional[Dict[str, Any]] = None,
chroma_server_ssl_certfile: Optional[str] = None,
chroma_server_ssl_keyfile: Optional[str] = None,
) -> None:
"""Run a Chroma server locally"""
if is_persistent and persist_directory:
Expand Down Expand Up @@ -123,6 +154,8 @@ def _run_server(
port=port,
log_level="error",
timeout_keep_alive=30,
ssl_keyfile=chroma_server_ssl_keyfile,
ssl_certfile=chroma_server_ssl_certfile,
)


Expand Down Expand Up @@ -152,6 +185,8 @@ def _fastapi_fixture(
chroma_server_authz_provider: Optional[str] = None,
chroma_server_authz_config_file: Optional[str] = None,
chroma_server_authz_config: Optional[Dict[str, Any]] = None,
chroma_server_ssl_certfile: Optional[str] = None,
chroma_server_ssl_keyfile: Optional[str] = None,
) -> Generator[System, None, None]:
"""Fixture generator that launches a server in a separate process, and yields a
fastapi client connect to it"""
Expand All @@ -171,6 +206,8 @@ def _fastapi_fixture(
Optional[str],
Optional[str],
Optional[Dict[str, Any]],
Optional[str],
Optional[str],
] = (
port,
False,
Expand All @@ -183,6 +220,8 @@ def _fastapi_fixture(
chroma_server_authz_provider,
chroma_server_authz_config_file,
chroma_server_authz_config,
chroma_server_ssl_certfile,
chroma_server_ssl_keyfile,
)
persist_directory = None
if is_persistent:
Expand All @@ -199,6 +238,8 @@ def _fastapi_fixture(
chroma_server_authz_provider,
chroma_server_authz_config_file,
chroma_server_authz_config,
chroma_server_ssl_certfile,
chroma_server_ssl_keyfile,
)
proc = ctx.Process(target=_run_server, args=args, daemon=True)
proc.start()
Expand All @@ -210,6 +251,8 @@ def _fastapi_fixture(
chroma_client_auth_provider=chroma_client_auth_provider,
chroma_client_auth_credentials=chroma_client_auth_credentials,
chroma_client_auth_token_transport_header=chroma_client_auth_token_transport_header,
chroma_server_ssl_verify=chroma_server_ssl_certfile,
chroma_server_ssl_enabled=True if chroma_server_ssl_certfile else False,
)
system = System(settings)
api = system.instance(ServerAPI)
Expand All @@ -231,6 +274,15 @@ def fastapi_persistent() -> Generator[System, None, None]:
return _fastapi_fixture(is_persistent=True)


def fastapi_ssl() -> Generator[System, None, None]:
generate_self_signed_certificate()
return _fastapi_fixture(
is_persistent=False,
chroma_server_ssl_certfile="./servercert.pem",
chroma_server_ssl_keyfile="./serverkey.pem",
)


def basic_http_client() -> Generator[System, None, None]:
settings = Settings(
chroma_api_impl="chromadb.api.fastapi.FastAPI",
Expand Down Expand Up @@ -400,6 +452,11 @@ def system_fixtures_wrong_auth() -> List[Callable[[], Generator[System, None, No
return fixtures


def system_fixtures_ssl() -> List[Callable[[], Generator[System, None, None]]]:
fixtures = [fastapi_ssl]
return fixtures


@pytest.fixture(scope="module", params=system_fixtures_wrong_auth())
def system_wrong_auth(
request: pytest.FixtureRequest,
Expand All @@ -412,6 +469,11 @@ def system(request: pytest.FixtureRequest) -> Generator[ServerAPI, None, None]:
yield next(request.param())


@pytest.fixture(scope="module", params=system_fixtures_ssl())
def system_ssl(request: pytest.FixtureRequest) -> Generator[ServerAPI, None, None]:
yield next(request.param())


@pytest.fixture(scope="module", params=system_fixtures_auth())
def system_auth(request: pytest.FixtureRequest) -> Generator[ServerAPI, None, None]:
yield next(request.param())
Expand All @@ -432,6 +494,14 @@ def client(system: System) -> Generator[ClientAPI, None, None]:
client.clear_system_cache()


@pytest.fixture(scope="function")
def client_ssl(system_ssl: System) -> Generator[ClientAPI, None, None]:
system_ssl.reset_state()
client = ClientCreator.from_system(system_ssl)
yield client
client.clear_system_cache()


@pytest.fixture(scope="function")
def api_wrong_cred(
system_wrong_auth: System,
Expand Down
12 changes: 12 additions & 0 deletions chromadb/test/openssl.cnf
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
[req]
distinguished_name = req_distinguished_name
x509_extensions = usr_cert

[req_distinguished_name]
CN = localhost

[usr_cert]
subjectAltName = @alt_names

[alt_names]
DNS.1 = localhost
43 changes: 43 additions & 0 deletions chromadb/test/test_api.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# type: ignore
import traceback
import requests
from urllib3.connectionpool import InsecureRequestWarning

import chromadb
from chromadb.api.fastapi import FastAPI
Expand Down Expand Up @@ -360,6 +362,7 @@ def test_modify_error_on_existing_name(api):
with pytest.raises(Exception):
c2.modify(name="testspace")


def test_modify_warn_on_DF_change(api, caplog):
api.reset()

Expand All @@ -368,6 +371,7 @@ def test_modify_warn_on_DF_change(api, caplog):
with pytest.raises(Exception, match="not supported") as e:
collection.modify(metadata={"hnsw:space": "cosine"})


def test_metadata_cru(api):
api.reset()
metadata_a = {"a": 1, "b": 2}
Expand Down Expand Up @@ -1437,6 +1441,7 @@ def test_invalid_embeddings(api):

# test to make sure update shows exception for bad dimensionality


def test_dimensionality_exception_update(api):
api.reset()
collection = api.create_collection("test_dimensionality_update_exception")
Expand All @@ -1446,8 +1451,10 @@ def test_dimensionality_exception_update(api):
collection.update(**bad_dimensionality_records)
assert "dimensionality" in str(e.value)


# test to make sure upsert shows exception for bad dimensionality


def test_dimensionality_exception_upsert(api):
api.reset()
collection = api.create_collection("test_dimensionality_upsert_exception")
Expand All @@ -1456,3 +1463,39 @@ def test_dimensionality_exception_upsert(api):
with pytest.raises(Exception) as e:
collection.upsert(**bad_dimensionality_records)
assert "dimensionality" in str(e.value)


def test_ssl_self_signed(client_ssl):
if os.environ.get("CHROMA_INTEGRATION_TEST_ONLY"):
pytest.skip("Skipping test for integration test")
client_ssl.heartbeat()


def test_ssl_self_signed_without_ssl_verify(client_ssl):
if os.environ.get("CHROMA_INTEGRATION_TEST_ONLY"):
pytest.skip("Skipping test for integration test")
client_ssl.heartbeat()
_port = client_ssl._server._settings.chroma_server_http_port
with pytest.raises(ValueError) as e:
chromadb.HttpClient(ssl=True, port=_port)
stack_trace = traceback.format_exception(
type(e.value), e.value, e.value.__traceback__
)
client_ssl.clear_system_cache()
assert "CERTIFICATE_VERIFY_FAILED" in "".join(stack_trace)


def test_ssl_self_signed_with_verify_false(client_ssl):
if os.environ.get("CHROMA_INTEGRATION_TEST_ONLY"):
pytest.skip("Skipping test for integration test")
client_ssl.heartbeat()
_port = client_ssl._server._settings.chroma_server_http_port
with pytest.warns(InsecureRequestWarning) as record:
client = chromadb.HttpClient(
ssl=True,
port=_port,
settings=chromadb.Settings(chroma_server_ssl_verify=False),
)
client.heartbeat()
client_ssl.clear_system_cache()
assert "Unverified HTTPS request" in str(record[0].message)
68 changes: 68 additions & 0 deletions docs/cip/CIP-01022024_SSL_Verify_Client_Config.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# CIP-01022024 SSL Verify Client Config

## Status

Current Status: `Under Discussion`

## Motivation

The motivation for this change is to enhance security and flexibility in Chroma's client API. Users need the ability to
configure SSL contexts to trust custom CA certificates or self-signed certificates, which is not straightforward with
the current setup. This capability is crucial for organizations that operate their own CA or for developers who need to
test their applications in environments where certificates from a recognized CA are not available or practical.

The suggested change entails a server-side certificate be available, but this CIP does not prescribe how such
certificate should be configured or obtained. In our testing, we used a self-signed certificate generated with
`openssl` and configured the client to trust the certificate. We also experiment with a SSL-terminated proxy server.
Both of approaches yielded the same results.

> **IMPORTANT:** It should be noted that we do not recommend or encourage the use of self-signed certificates in
> production environments.
We also provide a sample notebook that to help the reader run a local Chroma server with a self-signed certificate and
configure the client to trust the certificate. The notebook can be found
in [assets/CIP-01022024-test_self_signed.ipynb](./assets/CIP-01022024-test_self_signed.ipynb).

## Public Interfaces

> **Note:** The following changes are only applicable to Chroma HttpClient.
New settings variable `chroma_server_ssl_verify` accepting either a boolean or a path to a certificate file. If the
value is a path to a certificate file, the file will be used to verify the server's certificate. If the value is a
boolean, the SSL certificate verification can be bypassed (`false`) or enforced (`true`).

The value is passed as `verify` parameter to `requests.Session` of the `FastAPI` client. See
requests [documentation](https://requests.readthedocs.io/en/latest/user/advanced/#ssl-cert-verification) for
more details.

Example Usage:

```python
import chromadb
from chromadb import Settings
client = chromadb.HttpClient(host="localhost",port="8443",ssl=True, settings=Settings(chroma_server_ssl_verify='./servercert.pem'))
# or with boolean
client = chromadb.HttpClient(host="localhost",port="8443",ssl=True, settings=Settings(chroma_server_ssl_verify=False))
```

### Resources

- https://requests.readthedocs.io/en/latest/api/#requests.request
- https://www.geeksforgeeks.org/ssl-certificate-verification-python-requests/

## Proposed Changes

The proposed changes are mentioned in the public interfaces.

## Compatibility, Deprecation, and Migration Plan

The change is not backward compatible from client's perspective as the lack of the feature in prior clients will cause
an error when passing the new settings parameter. Server-side is not affected by this change.

## Test Plan

API tests with SSL verification enabled and a self-signed certificate.

## Rejected Alternatives

N/A
Loading

0 comments on commit a62cfb0

Please sign in to comment.