diff --git a/.github/workflows/chroma-client-integration-test.yml b/.github/workflows/chroma-client-integration-test.yml index 5724959c2549..e525f3a70787 100644 --- a/.github/workflows/chroma-client-integration-test.yml +++ b/.github/workflows/chroma-client-integration-test.yml @@ -15,7 +15,7 @@ jobs: timeout-minutes: 90 strategy: matrix: - python: ['3.8', '3.9', '3.10', '3.11'] + python: ['3.8', '3.9', '3.10', '3.11', '3.12'] platform: [ubuntu-latest, windows-latest] runs-on: ${{ matrix.platform }} steps: diff --git a/.github/workflows/chroma-coordinator-test.yaml b/.github/workflows/chroma-coordinator-test.yaml index 629a9dfb1466..e62ab2a5d0d0 100644 --- a/.github/workflows/chroma-coordinator-test.yaml +++ b/.github/workflows/chroma-coordinator-test.yaml @@ -16,8 +16,25 @@ jobs: matrix: platform: [ubuntu-latest] runs-on: ${{ matrix.platform }} + services: + postgres: + image: postgres + env: + POSTGRES_USER: chroma + POSTGRES_PASSWORD: chroma + POSTGRES_DB: chroma + options: >- + --health-cmd pg_isready + --health-interval 10s + --health-timeout 5s + --health-retries 5 + ports: + - 5432:5432 steps: - name: Checkout uses: actions/checkout@v3 - name: Build and test run: cd go/coordinator && make test + env: + POSTGRES_HOST: localhost + POSTGRES_PORT: 5432 diff --git a/.github/workflows/chroma-release-python-client.yml b/.github/workflows/chroma-release-python-client.yml index 2abc0d524aba..c4f2a2990a95 100644 --- a/.github/workflows/chroma-release-python-client.yml +++ b/.github/workflows/chroma-release-python-client.yml @@ -33,7 +33,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v4 with: - python-version: '3.10' + python-version: '3.12' - name: Install Client Dev Dependencies run: python -m pip install -r ./clients/python/requirements.txt && python -m pip install -r ./clients/python/requirements_dev.txt - name: Build Client diff --git a/.github/workflows/chroma-test.yml b/.github/workflows/chroma-test.yml index 12a5de4b6eda..14dc63624e91 100644 --- a/.github/workflows/chroma-test.yml +++ b/.github/workflows/chroma-test.yml @@ -16,7 +16,7 @@ jobs: timeout-minutes: 90 strategy: matrix: - python: ['3.8', '3.9', '3.10', '3.11'] + python: ['3.8', '3.9', '3.10', '3.11', '3.12'] platform: [ubuntu-latest, windows-latest] testfile: ["--ignore-glob 'chromadb/test/property/*' --ignore-glob 'chromadb/test/stress/*' --ignore='chromadb/test/auth/test_simple_rbac_authz.py'", "chromadb/test/auth/test_simple_rbac_authz.py", diff --git a/Cargo.lock b/Cargo.lock index 932b41154ab1..1b8e6f89aad2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -24,6 +24,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "91429305e9f0a25f6205c5b8e0d2db09e0708a7a6df0f42212bb56c32c8ac97a" dependencies = [ "cfg-if", + "const-random", "getrandom", "once_cell", "version_check", @@ -66,6 +67,218 @@ version = "1.0.75" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a4668cab20f66d8d020e1fbc0ebe47217433c1b6c8f2040faf858554e394ace6" +[[package]] +name = "arrow" +version = "50.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa285343fba4d829d49985bdc541e3789cf6000ed0e84be7c039438df4a4e78c" +dependencies = [ + "arrow-arith", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-csv", + "arrow-data", + "arrow-ipc", + "arrow-json", + "arrow-ord", + "arrow-row", + "arrow-schema", + "arrow-select", + "arrow-string", +] + +[[package]] +name = "arrow-arith" +version = "50.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "753abd0a5290c1bcade7c6623a556f7d1659c5f4148b140b5b63ce7bd1a45705" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "chrono", + "half", + "num", +] + +[[package]] +name = "arrow-array" +version = "50.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d390feeb7f21b78ec997a4081a025baef1e2e0d6069e181939b61864c9779609" +dependencies = [ + "ahash", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "chrono", + "half", + "hashbrown 0.14.3", + "num", +] + +[[package]] +name = "arrow-buffer" +version = "50.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69615b061701bcdffbc62756bc7e85c827d5290b472b580c972ebbbf690f5aa4" +dependencies = [ + "bytes", + "half", + "num", +] + +[[package]] +name = "arrow-cast" +version = "50.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e448e5dd2f4113bf5b74a1f26531708f5edcacc77335b7066f9398f4bcf4cdef" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "base64 0.21.5", + "chrono", + "half", + "lexical-core", + "num", +] + +[[package]] +name = "arrow-csv" +version = "50.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46af72211f0712612f5b18325530b9ad1bfbdc87290d5fbfd32a7da128983781" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-schema", + "chrono", + "csv", + "csv-core", + "lazy_static", + "lexical-core", + "regex", +] + +[[package]] +name = "arrow-data" +version = "50.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67d644b91a162f3ad3135ce1184d0a31c28b816a581e08f29e8e9277a574c64e" +dependencies = [ + "arrow-buffer", + "arrow-schema", + "half", + "num", +] + +[[package]] +name = "arrow-ipc" +version = "50.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03dea5e79b48de6c2e04f03f62b0afea7105be7b77d134f6c5414868feefb80d" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-schema", + "flatbuffers", +] + +[[package]] +name = "arrow-json" +version = "50.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8950719280397a47d37ac01492e3506a8a724b3fb81001900b866637a829ee0f" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-schema", + "chrono", + "half", + "indexmap 2.1.0", + "lexical-core", + "num", + "serde", + "serde_json", +] + +[[package]] +name = "arrow-ord" +version = "50.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ed9630979034077982d8e74a942b7ac228f33dd93a93b615b4d02ad60c260be" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "half", + "num", +] + +[[package]] +name = "arrow-row" +version = "50.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "007035e17ae09c4e8993e4cb8b5b96edf0afb927cd38e2dff27189b274d83dcf" +dependencies = [ + "ahash", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "half", + "hashbrown 0.14.3", +] + +[[package]] +name = "arrow-schema" +version = "50.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ff3e9c01f7cd169379d269f926892d0e622a704960350d09d331be3ec9e0029" + +[[package]] +name = "arrow-select" +version = "50.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ce20973c1912de6514348e064829e50947e35977bb9d7fb637dc99ea9ffd78c" +dependencies = [ + "ahash", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "num", +] + +[[package]] +name = "arrow-string" +version = "50.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00f3b37f2aeece31a2636d1b037dabb69ef590e03bdc7eb68519b51ec86932a7" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "num", + "regex", + "regex-syntax", +] + [[package]] name = "async-attributes" version = "1.1.2" @@ -878,9 +1091,9 @@ checksum = "7f30e7476521f6f8af1a1c4c0b8cc94f0bee37d91763d0ca2665f299b6cd8aec" [[package]] name = "bytemuck" -version = "1.14.0" +version = "1.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "374d28ec25809ee0e23827c2ab573d729e293f281dfe393500e7ad618baa61c6" +checksum = "a2ef034f05691a48569bd920a96c81b9d91bbad1ab5ac7c4616c1f6ef36cb79f" [[package]] name = "byteorder" @@ -950,6 +1163,26 @@ version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "28c122c3980598d243d63d9a704629a2d748d101f278052ff068be5a4423ab6f" +[[package]] +name = "const-random" +version = "0.1.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aaf16c9c2c612020bcfd042e170f6e32de9b9d75adb5277cdbbd2e2c8c8299a" +dependencies = [ + "const-random-macro", +] + +[[package]] +name = "const-random-macro" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" +dependencies = [ + "getrandom", + "once_cell", + "tiny-keccak", +] + [[package]] name = "core-foundation" version = "0.9.4" @@ -1041,6 +1274,12 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "crunchy" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" + [[package]] name = "crypto-bigint" version = "0.4.9" @@ -1075,6 +1314,27 @@ dependencies = [ "typenum", ] +[[package]] +name = "csv" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac574ff4d437a7b5ad237ef331c17ccca63c46479e5b5453eb8e10bb99a759fe" +dependencies = [ + "csv-core", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "csv-core" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5efa2b3d7902f4b634a20cae3c9c4e6209dc4779feb6863329607560143efa70" +dependencies = [ + "memchr", +] + [[package]] name = "curve25519-dalek" version = "4.1.1" @@ -1427,6 +1687,16 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" +[[package]] +name = "flatbuffers" +version = "23.5.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4dac53e22462d78c16d64a1cd22371b54cc3fe94aa15e7886a2fa6e5d1ab8640" +dependencies = [ + "bitflags 1.3.2", + "rustc_version", +] + [[package]] name = "flate2" version = "1.0.28" @@ -1673,6 +1943,17 @@ dependencies = [ "tracing", ] +[[package]] +name = "half" +version = "2.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc52e53916c08643f1b56ec082790d1e86a32e58dc5268f897f313fbae7b4872" +dependencies = [ + "cfg-if", + "crunchy", + "num-traits", +] + [[package]] name = "hashbrown" version = "0.12.3" @@ -2122,6 +2403,70 @@ dependencies = [ "spin 0.5.2", ] +[[package]] +name = "lexical-core" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2cde5de06e8d4c2faabc400238f9ae1c74d5412d03a7bd067645ccbc47070e46" +dependencies = [ + "lexical-parse-float", + "lexical-parse-integer", + "lexical-util", + "lexical-write-float", + "lexical-write-integer", +] + +[[package]] +name = "lexical-parse-float" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "683b3a5ebd0130b8fb52ba0bdc718cc56815b6a097e28ae5a6997d0ad17dc05f" +dependencies = [ + "lexical-parse-integer", + "lexical-util", + "static_assertions", +] + +[[package]] +name = "lexical-parse-integer" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d0994485ed0c312f6d965766754ea177d07f9c00c9b82a5ee62ed5b47945ee9" +dependencies = [ + "lexical-util", + "static_assertions", +] + +[[package]] +name = "lexical-util" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5255b9ff16ff898710eb9eb63cb39248ea8a5bb036bea8085b1a767ff6c4e3fc" +dependencies = [ + "static_assertions", +] + +[[package]] +name = "lexical-write-float" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accabaa1c4581f05a3923d1b4cfd124c329352288b7b9da09e766b0668116862" +dependencies = [ + "lexical-util", + "lexical-write-integer", + "static_assertions", +] + +[[package]] +name = "lexical-write-integer" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1b6f3d1f4422866b68192d62f77bc5c700bee84f3069f2469d7bc8c77852446" +dependencies = [ + "lexical-util", + "static_assertions", +] + [[package]] name = "libc" version = "0.2.151" @@ -2288,6 +2633,20 @@ dependencies = [ "minimal-lexical", ] +[[package]] +name = "num" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b05180d69e3da0e530ba2a1dae5110317e49e3b7f3d41be227dc5f92e49ee7af" +dependencies = [ + "num-bigint", + "num-complex", + "num-integer", + "num-iter", + "num-rational", + "num-traits", +] + [[package]] name = "num-bigint" version = "0.4.4" @@ -2316,6 +2675,15 @@ dependencies = [ "zeroize", ] +[[package]] +name = "num-complex" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ba157ca0885411de85d6ca030ba7e2a83a28636056c7c699b07c8b6f7383214" +dependencies = [ + "num-traits", +] + [[package]] name = "num-integer" version = "0.1.45" @@ -2337,6 +2705,18 @@ dependencies = [ "num-traits", ] +[[package]] +name = "num-rational" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0638a1c9d0a3c0914158145bc76cff373a75a627e6ecbfb71cbe6f453a5a19b0" +dependencies = [ + "autocfg", + "num-bigint", + "num-integer", + "num-traits", +] + [[package]] name = "num-traits" version = "0.2.17" @@ -3112,6 +3492,16 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "roaring" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1c77081a55300e016cb86f2864415b7518741879db925b8d488a0ee0d2da6bf" +dependencies = [ + "bytemuck", + "byteorder", +] + [[package]] name = "rsa" version = "0.9.6" @@ -3595,6 +3985,12 @@ dependencies = [ "der 0.7.8", ] +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + [[package]] name = "strsim" version = "0.10.0" @@ -3718,6 +4114,15 @@ dependencies = [ "time-core", ] +[[package]] +name = "tiny-keccak" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" +dependencies = [ + "crunchy", +] + [[package]] name = "tinyvec" version = "1.6.0" @@ -4362,6 +4767,7 @@ dependencies = [ name = "worker" version = "0.1.0" dependencies = [ + "arrow", "async-trait", "aws-config", "aws-sdk-s3", @@ -4381,6 +4787,7 @@ dependencies = [ "pulsar", "rand", "rayon", + "roaring", "schemars", "serde", "serde_json", diff --git a/DEVELOP.md b/DEVELOP.md index f034e07bed38..c9550e639f46 100644 --- a/DEVELOP.md +++ b/DEVELOP.md @@ -6,8 +6,6 @@ https://packaging.python.org. ## Setup -Because of the dependencies it relies on (like `pytorch`), this project does not support Python version >3.10.0. - Set up a virtual environment and install the project's requirements and dev requirements: @@ -50,6 +48,15 @@ api = chromadb.HttpClient(host="localhost", port="8000") print(api.heartbeat()) ``` +## Local dev setup for distributed chroma +We use tilt for providing local dev setup. Tilt is an open source project +##### Requirement +- Docker +- Local Kubernetes cluster (Recommended: [OrbStack](https://orbstack.dev/) for mac, [Kind](https://kind.sigs.k8s.io/) for linux) +- [Tilt](https://docs.tilt.dev/) + +For starting the distributed Chroma in the workspace, use `tilt up`. It will create all the required resources and build the necessary Docker image in the current kubectl context. +Once done, it will expose Chroma on port 8000. You can also visit the Tilt dashboard UI at http://localhost:10350/. To clean and remove all the resources created by Tilt, use `tilt down`. ## Testing diff --git a/Dockerfile b/Dockerfile index 1f90733edbb7..c871a4cd8c70 100644 --- a/Dockerfile +++ b/Dockerfile @@ -25,6 +25,15 @@ COPY --from=builder /install /usr/local COPY ./bin/docker_entrypoint.sh /docker_entrypoint.sh COPY ./ /chroma +RUN chmod +x /docker_entrypoint.sh + +ENV CHROMA_HOST_ADDR "0.0.0.0" +ENV CHROMA_HOST_PORT 8000 +ENV CHROMA_WORKERS 1 +ENV CHROMA_LOG_CONFIG "chromadb/log_config.yml" +ENV CHROMA_TIMEOUT_KEEP_ALIVE 30 + EXPOSE 8000 -CMD ["/docker_entrypoint.sh"] +ENTRYPOINT ["/docker_entrypoint.sh"] +CMD [ "--workers ${CHROMA_WORKERS} --host ${CHROMA_HOST_ADDR} --port ${CHROMA_HOST_PORT} --proxy-headers --log-config ${CHROMA_LOG_CONFIG} --timeout-keep-alive ${CHROMA_TIMEOUT_KEEP_ALIVE}"] \ No newline at end of file diff --git a/Tiltfile b/Tiltfile new file mode 100644 index 000000000000..0d0777199f24 --- /dev/null +++ b/Tiltfile @@ -0,0 +1,41 @@ +docker_build('migration', + context='.', + dockerfile='./go/coordinator/Dockerfile.migration' +) + +docker_build('coordinator', + context='.', + dockerfile='./go/coordinator/Dockerfile' +) + +docker_build('server', + context='.', + dockerfile='./Dockerfile', +) + +docker_build('worker', + context='.', + dockerfile='./rust/worker/Dockerfile' +) + + +k8s_yaml(['k8s/dev/setup.yaml']) +k8s_resource( + objects=['chroma:Namespace', 'memberlist-reader:ClusterRole', 'memberlist-reader:ClusterRoleBinding', 'pod-list-role:Role', 'pod-list-role-binding:RoleBinding', 'memberlists.chroma.cluster:CustomResourceDefinition','worker-memberlist:MemberList'], + new_name='k8s_setup', + labels=["infrastructure"] +) +k8s_yaml(['k8s/dev/pulsar.yaml']) +k8s_resource('pulsar', resource_deps=['k8s_setup'], labels=["infrastructure"]) +k8s_yaml(['k8s/dev/postgres.yaml']) +k8s_resource('postgres', resource_deps=['k8s_setup'], labels=["infrastructure"]) +k8s_yaml(['k8s/dev/migration.yaml']) +k8s_resource('migration', resource_deps=['postgres'], labels=["chroma"]) +k8s_yaml(['k8s/dev/server.yaml']) +k8s_resource('server', resource_deps=['k8s_setup'],labels=["chroma"], port_forwards=8000 ) +k8s_yaml(['k8s/dev/coordinator.yaml']) +k8s_resource('coordinator', resource_deps=['pulsar', 'server', 'migration'], labels=["chroma"], port_forwards=50051 ) +k8s_yaml(['k8s/dev/logservice.yaml']) +k8s_resource('logservice', resource_deps=['migration'], labels=["chroma"], port_forwards='50052:50051') +k8s_yaml(['k8s/dev/worker.yaml']) +k8s_resource('worker', resource_deps=['coordinator'],labels=["chroma"]) diff --git a/bin/cluster-test.sh b/bin/cluster-test.sh index 10c48781c072..d18185b8c02f 100755 --- a/bin/cluster-test.sh +++ b/bin/cluster-test.sh @@ -25,6 +25,7 @@ minikube addons enable ingress-dns -p chroma-test # Setup docker to build inside the minikube cluster and build the image eval $(minikube -p chroma-test docker-env) docker build -t server:latest -f Dockerfile . +docker build -t migration -f go/coordinator/Dockerfile.migration . docker build -t chroma-coordinator:latest -f go/coordinator/Dockerfile . docker build -t worker -f rust/worker/Dockerfile . --build-arg CHROMA_KUBERNETES_INTEGRATION=1 @@ -35,6 +36,8 @@ kubectl apply -f k8s/cr kubectl apply -f k8s/test # Wait for the pods in the chroma namespace to be ready +kubectl wait --for=condition=complete --timeout=100s job/migration -n chroma +kubectl delete job migration -n chroma kubectl wait --namespace chroma --for=condition=Ready pods --all --timeout=400s # Run mini kube tunnel in the background to expose the service diff --git a/bin/docker_entrypoint.sh b/bin/docker_entrypoint.sh index e6f2df70be87..e9498b4fd7ca 100755 --- a/bin/docker_entrypoint.sh +++ b/bin/docker_entrypoint.sh @@ -1,5 +1,15 @@ #!/bin/bash +set -e export IS_PERSISTENT=1 export CHROMA_SERVER_NOFILE=65535 -exec uvicorn chromadb.app:app --workers 1 --host 0.0.0.0 --port 8000 --proxy-headers --log-config chromadb/log_config.yml --timeout-keep-alive 30 +args="$@" + +if [[ $args =~ ^uvicorn.* ]]; then + echo "Starting server with args: $(eval echo "$args")" + echo -e "\033[31mWARNING: Please remove 'uvicorn chromadb.app:app' from your command line arguments. This is now handled by the entrypoint script." + exec $(eval echo "$args") +else + echo "Starting 'uvicorn chromadb.app:app' with args: $(eval echo "$args")" + exec uvicorn chromadb.app:app $(eval echo "$args") +fi diff --git a/chromadb/__init__.py b/chromadb/__init__.py index 142ab78a05fc..8e5ba91d1f1b 100644 --- a/chromadb/__init__.py +++ b/chromadb/__init__.py @@ -112,6 +112,10 @@ def EphemeralClient( settings = Settings() settings.is_persistent = False + # Make sure paramaters are the correct types -- users can pass anything. + tenant = str(tenant) + database = str(database) + return ClientCreator(settings=settings, tenant=tenant, database=database) @@ -135,12 +139,16 @@ def PersistentClient( settings.persist_directory = path settings.is_persistent = True + # Make sure paramaters are the correct types -- users can pass anything. + tenant = str(tenant) + database = str(database) + return ClientCreator(tenant=tenant, database=database, settings=settings) def HttpClient( host: str = "localhost", - port: str = "8000", + port: int = 8000, ssl: bool = False, headers: Optional[Dict[str, str]] = None, settings: Optional[Settings] = None, @@ -165,6 +173,13 @@ def HttpClient( if settings is None: settings = Settings() + # Make sure paramaters are the correct types -- users can pass anything. + host = str(host) + port = int(port) + ssl = bool(ssl) + tenant = str(tenant) + database = str(database) + settings.chroma_api_impl = "chromadb.api.fastapi.FastAPI" if settings.chroma_server_host and settings.chroma_server_host != host: raise ValueError( @@ -189,7 +204,7 @@ def CloudClient( settings: Optional[Settings] = None, *, # Following arguments are keyword-only, intended for testing only. cloud_host: str = "api.trychroma.com", - cloud_port: str = "8000", + cloud_port: int = 8000, enable_ssl: bool = True, ) -> ClientAPI: """ @@ -217,6 +232,14 @@ def CloudClient( if settings is None: settings = Settings() + # Make sure paramaters are the correct types -- users can pass anything. + tenant = str(tenant) + database = str(database) + api_key = str(api_key) + cloud_host = str(cloud_host) + cloud_port = int(cloud_port) + enable_ssl = bool(enable_ssl) + settings.chroma_api_impl = "chromadb.api.fastapi.FastAPI" settings.chroma_server_host = cloud_host settings.chroma_server_http_port = cloud_port @@ -242,9 +265,12 @@ def Client( tenant: The tenant to use for this client. Defaults to the default tenant. database: The database to use for this client. Defaults to the default database. - """ + # Make sure paramaters are the correct types -- users can pass anything. + tenant = str(tenant) + database = str(database) + return ClientCreator(tenant=tenant, database=database, settings=settings) diff --git a/chromadb/api/fastapi.py b/chromadb/api/fastapi.py index d3d1a8a4e7ea..d01028c734f8 100644 --- a/chromadb/api/fastapi.py +++ b/chromadb/api/fastapi.py @@ -1,4 +1,4 @@ -import json +import orjson as json import logging from typing import Optional, cast, Tuple from typing import Sequence @@ -109,7 +109,7 @@ def __init__(self, system: System): self._api_url = FastAPI.resolve_url( chroma_server_host=str(system.settings.chroma_server_host), - chroma_server_http_port=int(str(system.settings.chroma_server_http_port)), + chroma_server_http_port=system.settings.chroma_server_http_port, chroma_server_ssl_enabled=system.settings.chroma_server_ssl_enabled, default_api_path=system.settings.chroma_server_api_default_path, ) @@ -138,6 +138,8 @@ def __init__(self, system: System): self._session = requests.Session() if self._header is not None: self._session.headers.update(self._header) + if self._settings.chroma_server_ssl_verify is not None: + self._session.verify = self._settings.chroma_server_ssl_verify @trace_method("FastAPI.heartbeat", OpenTelemetryGranularity.OPERATION) @override @@ -145,7 +147,7 @@ def heartbeat(self) -> int: """Returns the current server time in nanoseconds to check if the server is alive""" resp = self._session.get(self._api_url) raise_chroma_error(resp) - return int(resp.json()["nanosecond heartbeat"]) + return int(json.loads(resp.text)["nanosecond heartbeat"]) @trace_method("FastAPI.create_database", OpenTelemetryGranularity.OPERATION) @override @@ -175,7 +177,7 @@ def get_database( params={"tenant": tenant}, ) raise_chroma_error(resp) - resp_json = resp.json() + resp_json = json.loads(resp.text) return Database( id=resp_json["id"], name=resp_json["name"], tenant=resp_json["tenant"] ) @@ -196,7 +198,7 @@ def get_tenant(self, name: str) -> Tenant: self._api_url + "/tenants/" + name, ) raise_chroma_error(resp) - resp_json = resp.json() + resp_json = json.loads(resp.text) return Tenant(name=resp_json["name"]) @trace_method("FastAPI.list_collections", OpenTelemetryGranularity.OPERATION) @@ -219,7 +221,7 @@ def list_collections( }, ) raise_chroma_error(resp) - json_collections = resp.json() + json_collections = json.loads(resp.text) collections = [] for json_collection in json_collections: collections.append(Collection(self, **json_collection)) @@ -237,7 +239,7 @@ def count_collections( params={"tenant": tenant, "database": database}, ) raise_chroma_error(resp) - return cast(int, resp.json()) + return cast(int, json.loads(resp.text)) @trace_method("FastAPI.create_collection", OpenTelemetryGranularity.OPERATION) @override @@ -266,7 +268,7 @@ def create_collection( params={"tenant": tenant, "database": database}, ) raise_chroma_error(resp) - resp_json = resp.json() + resp_json = json.loads(resp.text) return Collection( client=self, id=resp_json["id"], @@ -300,7 +302,7 @@ def get_collection( self._api_url + "/collections/" + name if name else str(id), params=_params ) raise_chroma_error(resp) - resp_json = resp.json() + resp_json = json.loads(resp.text) return Collection( client=self, name=resp_json["name"], @@ -379,7 +381,7 @@ def _count( self._api_url + "/collections/" + str(collection_id) + "/count" ) raise_chroma_error(resp) - return cast(int, resp.json()) + return cast(int, json.loads(resp.text)) @trace_method("FastAPI._peek", OpenTelemetryGranularity.OPERATION) @override @@ -432,7 +434,7 @@ def _get( ) raise_chroma_error(resp) - body = resp.json() + body = json.loads(resp.text) return GetResult( ids=body["ids"], embeddings=body.get("embeddings", None), @@ -460,7 +462,7 @@ def _delete( ) raise_chroma_error(resp) - return cast(IDs, resp.json()) + return cast(IDs, json.loads(resp.text)) @trace_method("FastAPI._submit_batch", OpenTelemetryGranularity.ALL) def _submit_batch( @@ -584,7 +586,7 @@ def _query( ) raise_chroma_error(resp) - body = resp.json() + body = json.loads(resp.text) return QueryResult( ids=body["ids"], @@ -602,7 +604,7 @@ def reset(self) -> bool: """Resets the database""" resp = self._session.post(self._api_url + "/reset") raise_chroma_error(resp) - return cast(bool, resp.json()) + return cast(bool, json.loads(resp.text)) @trace_method("FastAPI.get_version", OpenTelemetryGranularity.OPERATION) @override @@ -610,7 +612,7 @@ def get_version(self) -> str: """Returns the version of the server""" resp = self._session.get(self._api_url + "/version") raise_chroma_error(resp) - return cast(str, resp.json()) + return cast(str, json.loads(resp.text)) @override def get_settings(self) -> Settings: @@ -624,7 +626,7 @@ def max_batch_size(self) -> int: if self._max_batch_size == -1: resp = self._session.get(self._api_url + "/pre-flight-checks") raise_chroma_error(resp) - self._max_batch_size = cast(int, resp.json()["max_batch_size"]) + self._max_batch_size = cast(int, json.loads(resp.text)["max_batch_size"]) return self._max_batch_size @@ -635,7 +637,7 @@ def raise_chroma_error(resp: requests.Response) -> None: chroma_error = None try: - body = resp.json() + body = json.loads(resp.text) if "error" in body: if body["error"] in errors.error_types: chroma_error = errors.error_types[body["error"]](body["message"]) diff --git a/chromadb/api/segment.py b/chromadb/api/segment.py index 72df138d9bec..33bd00054a71 100644 --- a/chromadb/api/segment.py +++ b/chromadb/api/segment.py @@ -1,6 +1,7 @@ from chromadb.api import ServerAPI from chromadb.config import DEFAULT_DATABASE, DEFAULT_TENANT, Settings, System from chromadb.db.system import SysDB +from chromadb.quota import QuotaEnforcer from chromadb.segment import SegmentManager, MetadataReader, VectorReader from chromadb.telemetry.opentelemetry import ( add_attributes_to_current_span, @@ -58,7 +59,6 @@ import logging import re - logger = logging.getLogger(__name__) @@ -101,6 +101,7 @@ def __init__(self, system: System): self._settings = system.settings self._sysdb = self.require(SysDB) self._manager = self.require(SegmentManager) + self._quota = self.require(QuotaEnforcer) self._product_telemetry_client = self.require(ProductTelemetryClient) self._opentelemetry_client = self.require(OpenTelemetryClient) self._producer = self.require(Producer) @@ -356,6 +357,7 @@ def _add( documents: Optional[Documents] = None, uris: Optional[URIs] = None, ) -> bool: + self._quota.static_check(metadatas, documents, embeddings, collection_id) coll = self._get_collection(collection_id) self._manager.hint_use_collection(collection_id, t.Operation.ADD) validate_batch( @@ -398,6 +400,7 @@ def _update( documents: Optional[Documents] = None, uris: Optional[URIs] = None, ) -> bool: + self._quota.static_check(metadatas, documents, embeddings, collection_id) coll = self._get_collection(collection_id) self._manager.hint_use_collection(collection_id, t.Operation.UPDATE) validate_batch( @@ -442,6 +445,7 @@ def _upsert( documents: Optional[Documents] = None, uris: Optional[URIs] = None, ) -> bool: + self._quota.static_check(metadatas, documents, embeddings, collection_id) coll = self._get_collection(collection_id) self._manager.hint_use_collection(collection_id, t.Operation.UPSERT) validate_batch( diff --git a/chromadb/api/types.py b/chromadb/api/types.py index 7781c4225725..347461718fde 100644 --- a/chromadb/api/types.py +++ b/chromadb/api/types.py @@ -20,7 +20,7 @@ # Re-export types from chromadb.types __all__ = ["Metadata", "Where", "WhereDocument", "UpdateCollectionMetadata"] - +META_KEY_CHROMA_DOCUMENT = "chroma:document" T = TypeVar("T") OneOrMany = Union[T, List[T]] @@ -265,6 +265,10 @@ def validate_metadata(metadata: Metadata) -> Metadata: if len(metadata) == 0: raise ValueError(f"Expected metadata to be a non-empty dict, got {metadata}") for key, value in metadata.items(): + if key == META_KEY_CHROMA_DOCUMENT: + raise ValueError( + f"Expected metadata to not contain the reserved key {META_KEY_CHROMA_DOCUMENT}" + ) if not isinstance(key, str): raise TypeError( f"Expected metadata key to be a str, got {key} which is a {type(key)}" @@ -476,7 +480,11 @@ def validate_embeddings(embeddings: Embeddings) -> Embeddings: raise ValueError( f"Expected each embedding in the embeddings to be a list, got {embeddings}" ) - for embedding in embeddings: + for i, embedding in enumerate(embeddings): + if len(embedding) == 0: + raise ValueError( + f"Expected each embedding in the embeddings to be a non-empty list, got empty embedding at pos {i}" + ) if not all( [ isinstance(value, (int, float)) and not isinstance(value, bool) diff --git a/chromadb/config.py b/chromadb/config.py index 61b789d0eee6..b4a78d5746cd 100644 --- a/chromadb/config.py +++ b/chromadb/config.py @@ -4,7 +4,7 @@ import os from abc import ABC from graphlib import TopologicalSorter -from typing import Optional, List, Any, Dict, Set, Iterable +from typing import Optional, List, Any, Dict, Set, Iterable, Union from typing import Type, TypeVar, cast from overrides import EnforceOverrides @@ -70,11 +70,13 @@ "chromadb.telemetry.product.ProductTelemetryClient": "chroma_product_telemetry_impl", "chromadb.ingest.Producer": "chroma_producer_impl", "chromadb.ingest.Consumer": "chroma_consumer_impl", + "chromadb.quota.QuotaProvider": "chroma_quota_provider_impl", "chromadb.ingest.CollectionAssignmentPolicy": "chroma_collection_assignment_policy_impl", # noqa "chromadb.db.system.SysDB": "chroma_sysdb_impl", "chromadb.segment.SegmentManager": "chroma_segment_manager_impl", "chromadb.segment.distributed.SegmentDirectory": "chroma_segment_directory_impl", "chromadb.segment.distributed.MemberlistProvider": "chroma_memberlist_provider_impl", + } DEFAULT_TENANT = "default_tenant" @@ -99,6 +101,7 @@ class Settings(BaseSettings): # type: ignore chroma_segment_manager_impl: str = ( "chromadb.segment.impl.manager.local.LocalSegmentManager" ) + chroma_quota_provider_impl:Optional[str] = None # Distributed architecture specific components chroma_segment_directory_impl: str = "chromadb.segment.impl.distributed.segment_directory.RendezvousHashSegmentDirectory" @@ -120,10 +123,12 @@ class Settings(BaseSettings): # type: ignore chroma_server_host: Optional[str] = None chroma_server_headers: Optional[Dict[str, str]] = None - chroma_server_http_port: Optional[str] = None + chroma_server_http_port: Optional[int] = None chroma_server_ssl_enabled: Optional[bool] = False + # the below config value is only applicable to Chroma HTTP clients + chroma_server_ssl_verify: Optional[Union[bool, str]] = None chroma_server_api_default_path: Optional[str] = "/api/v1" - chroma_server_grpc_port: Optional[str] = None + chroma_server_grpc_port: Optional[int] = None # eg ["http://localhost:3000"] chroma_server_cors_allow_origins: List[str] = [] @@ -136,8 +141,8 @@ def empty_str_to_none(cls, v: str) -> Optional[str]: chroma_server_nofile: Optional[int] = None pulsar_broker_url: Optional[str] = None - pulsar_admin_port: Optional[str] = "8080" - pulsar_broker_port: Optional[str] = "6650" + pulsar_admin_port: Optional[int] = 8080 + pulsar_broker_port: Optional[int] = 6650 chroma_server_auth_provider: Optional[str] = None diff --git a/chromadb/db/migrations.py b/chromadb/db/migrations.py index 951cb762c36b..97ef029092ab 100644 --- a/chromadb/db/migrations.py +++ b/chromadb/db/migrations.py @@ -1,3 +1,4 @@ +import sys from typing import Sequence from typing_extensions import TypedDict, NotRequired from importlib_resources.abc import Traversable @@ -253,7 +254,7 @@ def _read_migration_file(file: MigrationFile, hash_alg: str) -> Migration: sql = file["path"].read_text() if hash_alg == "md5": - hash = hashlib.md5(sql.encode("utf-8")).hexdigest() + hash = hashlib.md5(sql.encode("utf-8"), usedforsecurity=False).hexdigest() if sys.version_info >= (3, 9) else hashlib.md5(sql.encode("utf-8")).hexdigest() elif hash_alg == "sha256": hash = hashlib.sha256(sql.encode("utf-8")).hexdigest() else: diff --git a/chromadb/proto/chroma_pb2.py b/chromadb/proto/chroma_pb2.py index 84a3ba9b13dd..bc8d43e57ec8 100644 --- a/chromadb/proto/chroma_pb2.py +++ b/chromadb/proto/chroma_pb2.py @@ -13,7 +13,7 @@ -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x1b\x63hromadb/proto/chroma.proto\x12\x06\x63hroma\"&\n\x06Status\x12\x0e\n\x06reason\x18\x01 \x01(\t\x12\x0c\n\x04\x63ode\x18\x02 \x01(\x05\"0\n\x0e\x43hromaResponse\x12\x1e\n\x06status\x18\x01 \x01(\x0b\x32\x0e.chroma.Status\"U\n\x06Vector\x12\x11\n\tdimension\x18\x01 \x01(\x05\x12\x0e\n\x06vector\x18\x02 \x01(\x0c\x12(\n\x08\x65ncoding\x18\x03 \x01(\x0e\x32\x16.chroma.ScalarEncoding\"\xca\x01\n\x07Segment\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0c\n\x04type\x18\x02 \x01(\t\x12#\n\x05scope\x18\x03 \x01(\x0e\x32\x14.chroma.SegmentScope\x12\x12\n\x05topic\x18\x04 \x01(\tH\x00\x88\x01\x01\x12\x17\n\ncollection\x18\x05 \x01(\tH\x01\x88\x01\x01\x12-\n\x08metadata\x18\x06 \x01(\x0b\x32\x16.chroma.UpdateMetadataH\x02\x88\x01\x01\x42\x08\n\x06_topicB\r\n\x0b_collectionB\x0b\n\t_metadata\"\xb9\x01\n\nCollection\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\r\n\x05topic\x18\x03 \x01(\t\x12-\n\x08metadata\x18\x04 \x01(\x0b\x32\x16.chroma.UpdateMetadataH\x00\x88\x01\x01\x12\x16\n\tdimension\x18\x05 \x01(\x05H\x01\x88\x01\x01\x12\x0e\n\x06tenant\x18\x06 \x01(\t\x12\x10\n\x08\x64\x61tabase\x18\x07 \x01(\tB\x0b\n\t_metadataB\x0c\n\n_dimension\"4\n\x08\x44\x61tabase\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\x0e\n\x06tenant\x18\x03 \x01(\t\"\x16\n\x06Tenant\x12\x0c\n\x04name\x18\x01 \x01(\t\"b\n\x13UpdateMetadataValue\x12\x16\n\x0cstring_value\x18\x01 \x01(\tH\x00\x12\x13\n\tint_value\x18\x02 \x01(\x03H\x00\x12\x15\n\x0b\x66loat_value\x18\x03 \x01(\x01H\x00\x42\x07\n\x05value\"\x96\x01\n\x0eUpdateMetadata\x12\x36\n\x08metadata\x18\x01 \x03(\x0b\x32$.chroma.UpdateMetadata.MetadataEntry\x1aL\n\rMetadataEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12*\n\x05value\x18\x02 \x01(\x0b\x32\x1b.chroma.UpdateMetadataValue:\x02\x38\x01\"\xcc\x01\n\x15SubmitEmbeddingRecord\x12\n\n\x02id\x18\x01 \x01(\t\x12#\n\x06vector\x18\x02 \x01(\x0b\x32\x0e.chroma.VectorH\x00\x88\x01\x01\x12-\n\x08metadata\x18\x03 \x01(\x0b\x32\x16.chroma.UpdateMetadataH\x01\x88\x01\x01\x12$\n\toperation\x18\x04 \x01(\x0e\x32\x11.chroma.Operation\x12\x15\n\rcollection_id\x18\x05 \x01(\tB\t\n\x07_vectorB\x0b\n\t_metadata\"S\n\x15VectorEmbeddingRecord\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0e\n\x06seq_id\x18\x02 \x01(\x0c\x12\x1e\n\x06vector\x18\x03 \x01(\x0b\x32\x0e.chroma.Vector\"q\n\x11VectorQueryResult\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0e\n\x06seq_id\x18\x02 \x01(\x0c\x12\x10\n\x08\x64istance\x18\x03 \x01(\x01\x12#\n\x06vector\x18\x04 \x01(\x0b\x32\x0e.chroma.VectorH\x00\x88\x01\x01\x42\t\n\x07_vector\"@\n\x12VectorQueryResults\x12*\n\x07results\x18\x01 \x03(\x0b\x32\x19.chroma.VectorQueryResult\"(\n\x15SegmentServerResponse\x12\x0f\n\x07success\x18\x01 \x01(\x08\"4\n\x11GetVectorsRequest\x12\x0b\n\x03ids\x18\x01 \x03(\t\x12\x12\n\nsegment_id\x18\x02 \x01(\t\"D\n\x12GetVectorsResponse\x12.\n\x07records\x18\x01 \x03(\x0b\x32\x1d.chroma.VectorEmbeddingRecord\"\x86\x01\n\x13QueryVectorsRequest\x12\x1f\n\x07vectors\x18\x01 \x03(\x0b\x32\x0e.chroma.Vector\x12\t\n\x01k\x18\x02 \x01(\x05\x12\x13\n\x0b\x61llowed_ids\x18\x03 \x03(\t\x12\x1a\n\x12include_embeddings\x18\x04 \x01(\x08\x12\x12\n\nsegment_id\x18\x05 \x01(\t\"C\n\x14QueryVectorsResponse\x12+\n\x07results\x18\x01 \x03(\x0b\x32\x1a.chroma.VectorQueryResults*8\n\tOperation\x12\x07\n\x03\x41\x44\x44\x10\x00\x12\n\n\x06UPDATE\x10\x01\x12\n\n\x06UPSERT\x10\x02\x12\n\n\x06\x44\x45LETE\x10\x03*(\n\x0eScalarEncoding\x12\x0b\n\x07\x46LOAT32\x10\x00\x12\t\n\x05INT32\x10\x01*(\n\x0cSegmentScope\x12\n\n\x06VECTOR\x10\x00\x12\x0c\n\x08METADATA\x10\x01\x32\x94\x01\n\rSegmentServer\x12?\n\x0bLoadSegment\x12\x0f.chroma.Segment\x1a\x1d.chroma.SegmentServerResponse\"\x00\x12\x42\n\x0eReleaseSegment\x12\x0f.chroma.Segment\x1a\x1d.chroma.SegmentServerResponse\"\x00\x32\xa2\x01\n\x0cVectorReader\x12\x45\n\nGetVectors\x12\x19.chroma.GetVectorsRequest\x1a\x1a.chroma.GetVectorsResponse\"\x00\x12K\n\x0cQueryVectors\x12\x1b.chroma.QueryVectorsRequest\x1a\x1c.chroma.QueryVectorsResponse\"\x00\x42\x43ZAgithub.com/chroma/chroma-coordinator/internal/proto/coordinatorpbb\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x1b\x63hromadb/proto/chroma.proto\x12\x06\x63hroma\"&\n\x06Status\x12\x0e\n\x06reason\x18\x01 \x01(\t\x12\x0c\n\x04\x63ode\x18\x02 \x01(\x05\"0\n\x0e\x43hromaResponse\x12\x1e\n\x06status\x18\x01 \x01(\x0b\x32\x0e.chroma.Status\"U\n\x06Vector\x12\x11\n\tdimension\x18\x01 \x01(\x05\x12\x0e\n\x06vector\x18\x02 \x01(\x0c\x12(\n\x08\x65ncoding\x18\x03 \x01(\x0e\x32\x16.chroma.ScalarEncoding\"\xca\x01\n\x07Segment\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0c\n\x04type\x18\x02 \x01(\t\x12#\n\x05scope\x18\x03 \x01(\x0e\x32\x14.chroma.SegmentScope\x12\x12\n\x05topic\x18\x04 \x01(\tH\x00\x88\x01\x01\x12\x17\n\ncollection\x18\x05 \x01(\tH\x01\x88\x01\x01\x12-\n\x08metadata\x18\x06 \x01(\x0b\x32\x16.chroma.UpdateMetadataH\x02\x88\x01\x01\x42\x08\n\x06_topicB\r\n\x0b_collectionB\x0b\n\t_metadata\"\xb9\x01\n\nCollection\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\r\n\x05topic\x18\x03 \x01(\t\x12-\n\x08metadata\x18\x04 \x01(\x0b\x32\x16.chroma.UpdateMetadataH\x00\x88\x01\x01\x12\x16\n\tdimension\x18\x05 \x01(\x05H\x01\x88\x01\x01\x12\x0e\n\x06tenant\x18\x06 \x01(\t\x12\x10\n\x08\x64\x61tabase\x18\x07 \x01(\tB\x0b\n\t_metadataB\x0c\n\n_dimension\"4\n\x08\x44\x61tabase\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\x0e\n\x06tenant\x18\x03 \x01(\t\"\x16\n\x06Tenant\x12\x0c\n\x04name\x18\x01 \x01(\t\"b\n\x13UpdateMetadataValue\x12\x16\n\x0cstring_value\x18\x01 \x01(\tH\x00\x12\x13\n\tint_value\x18\x02 \x01(\x03H\x00\x12\x15\n\x0b\x66loat_value\x18\x03 \x01(\x01H\x00\x42\x07\n\x05value\"\x96\x01\n\x0eUpdateMetadata\x12\x36\n\x08metadata\x18\x01 \x03(\x0b\x32$.chroma.UpdateMetadata.MetadataEntry\x1aL\n\rMetadataEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12*\n\x05value\x18\x02 \x01(\x0b\x32\x1b.chroma.UpdateMetadataValue:\x02\x38\x01\"\xcc\x01\n\x15SubmitEmbeddingRecord\x12\n\n\x02id\x18\x01 \x01(\t\x12#\n\x06vector\x18\x02 \x01(\x0b\x32\x0e.chroma.VectorH\x00\x88\x01\x01\x12-\n\x08metadata\x18\x03 \x01(\x0b\x32\x16.chroma.UpdateMetadataH\x01\x88\x01\x01\x12$\n\toperation\x18\x04 \x01(\x0e\x32\x11.chroma.Operation\x12\x15\n\rcollection_id\x18\x05 \x01(\tB\t\n\x07_vectorB\x0b\n\t_metadata\"S\n\x15VectorEmbeddingRecord\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0e\n\x06seq_id\x18\x02 \x01(\x0c\x12\x1e\n\x06vector\x18\x03 \x01(\x0b\x32\x0e.chroma.Vector\"q\n\x11VectorQueryResult\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0e\n\x06seq_id\x18\x02 \x01(\x0c\x12\x10\n\x08\x64istance\x18\x03 \x01(\x02\x12#\n\x06vector\x18\x04 \x01(\x0b\x32\x0e.chroma.VectorH\x00\x88\x01\x01\x42\t\n\x07_vector\"@\n\x12VectorQueryResults\x12*\n\x07results\x18\x01 \x03(\x0b\x32\x19.chroma.VectorQueryResult\"4\n\x11GetVectorsRequest\x12\x0b\n\x03ids\x18\x01 \x03(\t\x12\x12\n\nsegment_id\x18\x02 \x01(\t\"D\n\x12GetVectorsResponse\x12.\n\x07records\x18\x01 \x03(\x0b\x32\x1d.chroma.VectorEmbeddingRecord\"\x86\x01\n\x13QueryVectorsRequest\x12\x1f\n\x07vectors\x18\x01 \x03(\x0b\x32\x0e.chroma.Vector\x12\t\n\x01k\x18\x02 \x01(\x05\x12\x13\n\x0b\x61llowed_ids\x18\x03 \x03(\t\x12\x1a\n\x12include_embeddings\x18\x04 \x01(\x08\x12\x12\n\nsegment_id\x18\x05 \x01(\t\"C\n\x14QueryVectorsResponse\x12+\n\x07results\x18\x01 \x03(\x0b\x32\x1a.chroma.VectorQueryResults*8\n\tOperation\x12\x07\n\x03\x41\x44\x44\x10\x00\x12\n\n\x06UPDATE\x10\x01\x12\n\n\x06UPSERT\x10\x02\x12\n\n\x06\x44\x45LETE\x10\x03*(\n\x0eScalarEncoding\x12\x0b\n\x07\x46LOAT32\x10\x00\x12\t\n\x05INT32\x10\x01*(\n\x0cSegmentScope\x12\n\n\x06VECTOR\x10\x00\x12\x0c\n\x08METADATA\x10\x01\x32\xa2\x01\n\x0cVectorReader\x12\x45\n\nGetVectors\x12\x19.chroma.GetVectorsRequest\x1a\x1a.chroma.GetVectorsResponse\"\x00\x12K\n\x0cQueryVectors\x12\x1b.chroma.QueryVectorsRequest\x1a\x1c.chroma.QueryVectorsResponse\"\x00\x42\x43ZAgithub.com/chroma/chroma-coordinator/internal/proto/coordinatorpbb\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) @@ -23,12 +23,12 @@ DESCRIPTOR._serialized_options = b'ZAgithub.com/chroma/chroma-coordinator/internal/proto/coordinatorpb' _UPDATEMETADATA_METADATAENTRY._options = None _UPDATEMETADATA_METADATAENTRY._serialized_options = b'8\001' - _globals['_OPERATION']._serialized_start=1785 - _globals['_OPERATION']._serialized_end=1841 - _globals['_SCALARENCODING']._serialized_start=1843 - _globals['_SCALARENCODING']._serialized_end=1883 - _globals['_SEGMENTSCOPE']._serialized_start=1885 - _globals['_SEGMENTSCOPE']._serialized_end=1925 + _globals['_OPERATION']._serialized_start=1743 + _globals['_OPERATION']._serialized_end=1799 + _globals['_SCALARENCODING']._serialized_start=1801 + _globals['_SCALARENCODING']._serialized_end=1841 + _globals['_SEGMENTSCOPE']._serialized_start=1843 + _globals['_SEGMENTSCOPE']._serialized_end=1883 _globals['_STATUS']._serialized_start=39 _globals['_STATUS']._serialized_end=77 _globals['_CHROMARESPONSE']._serialized_start=79 @@ -57,18 +57,14 @@ _globals['_VECTORQUERYRESULT']._serialized_end=1345 _globals['_VECTORQUERYRESULTS']._serialized_start=1347 _globals['_VECTORQUERYRESULTS']._serialized_end=1411 - _globals['_SEGMENTSERVERRESPONSE']._serialized_start=1413 - _globals['_SEGMENTSERVERRESPONSE']._serialized_end=1453 - _globals['_GETVECTORSREQUEST']._serialized_start=1455 - _globals['_GETVECTORSREQUEST']._serialized_end=1507 - _globals['_GETVECTORSRESPONSE']._serialized_start=1509 - _globals['_GETVECTORSRESPONSE']._serialized_end=1577 - _globals['_QUERYVECTORSREQUEST']._serialized_start=1580 - _globals['_QUERYVECTORSREQUEST']._serialized_end=1714 - _globals['_QUERYVECTORSRESPONSE']._serialized_start=1716 - _globals['_QUERYVECTORSRESPONSE']._serialized_end=1783 - _globals['_SEGMENTSERVER']._serialized_start=1928 - _globals['_SEGMENTSERVER']._serialized_end=2076 - _globals['_VECTORREADER']._serialized_start=2079 - _globals['_VECTORREADER']._serialized_end=2241 + _globals['_GETVECTORSREQUEST']._serialized_start=1413 + _globals['_GETVECTORSREQUEST']._serialized_end=1465 + _globals['_GETVECTORSRESPONSE']._serialized_start=1467 + _globals['_GETVECTORSRESPONSE']._serialized_end=1535 + _globals['_QUERYVECTORSREQUEST']._serialized_start=1538 + _globals['_QUERYVECTORSREQUEST']._serialized_end=1672 + _globals['_QUERYVECTORSRESPONSE']._serialized_start=1674 + _globals['_QUERYVECTORSRESPONSE']._serialized_end=1741 + _globals['_VECTORREADER']._serialized_start=1886 + _globals['_VECTORREADER']._serialized_end=2048 # @@protoc_insertion_point(module_scope) diff --git a/chromadb/proto/coordinator_pb2.py b/chromadb/proto/coordinator_pb2.py index fda6a0998670..888aece92853 100644 --- a/chromadb/proto/coordinator_pb2.py +++ b/chromadb/proto/coordinator_pb2.py @@ -15,7 +15,7 @@ from google.protobuf import empty_pb2 as google_dot_protobuf_dot_empty__pb2 -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n chromadb/proto/coordinator.proto\x12\x06\x63hroma\x1a\x1b\x63hromadb/proto/chroma.proto\x1a\x1bgoogle/protobuf/empty.proto\"A\n\x15\x43reateDatabaseRequest\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\x0e\n\x06tenant\x18\x03 \x01(\t\"2\n\x12GetDatabaseRequest\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0e\n\x06tenant\x18\x02 \x01(\t\"Y\n\x13GetDatabaseResponse\x12\"\n\x08\x64\x61tabase\x18\x01 \x01(\x0b\x32\x10.chroma.Database\x12\x1e\n\x06status\x18\x02 \x01(\x0b\x32\x0e.chroma.Status\"#\n\x13\x43reateTenantRequest\x12\x0c\n\x04name\x18\x02 \x01(\t\" \n\x10GetTenantRequest\x12\x0c\n\x04name\x18\x01 \x01(\t\"S\n\x11GetTenantResponse\x12\x1e\n\x06tenant\x18\x01 \x01(\x0b\x32\x0e.chroma.Tenant\x12\x1e\n\x06status\x18\x02 \x01(\x0b\x32\x0e.chroma.Status\"8\n\x14\x43reateSegmentRequest\x12 \n\x07segment\x18\x01 \x01(\x0b\x32\x0f.chroma.Segment\"\"\n\x14\x44\x65leteSegmentRequest\x12\n\n\x02id\x18\x01 \x01(\t\"\xc2\x01\n\x12GetSegmentsRequest\x12\x0f\n\x02id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x11\n\x04type\x18\x02 \x01(\tH\x01\x88\x01\x01\x12(\n\x05scope\x18\x03 \x01(\x0e\x32\x14.chroma.SegmentScopeH\x02\x88\x01\x01\x12\x12\n\x05topic\x18\x04 \x01(\tH\x03\x88\x01\x01\x12\x17\n\ncollection\x18\x05 \x01(\tH\x04\x88\x01\x01\x42\x05\n\x03_idB\x07\n\x05_typeB\x08\n\x06_scopeB\x08\n\x06_topicB\r\n\x0b_collection\"X\n\x13GetSegmentsResponse\x12!\n\x08segments\x18\x01 \x03(\x0b\x32\x0f.chroma.Segment\x12\x1e\n\x06status\x18\x02 \x01(\x0b\x32\x0e.chroma.Status\"\xfa\x01\n\x14UpdateSegmentRequest\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0f\n\x05topic\x18\x02 \x01(\tH\x00\x12\x15\n\x0breset_topic\x18\x03 \x01(\x08H\x00\x12\x14\n\ncollection\x18\x04 \x01(\tH\x01\x12\x1a\n\x10reset_collection\x18\x05 \x01(\x08H\x01\x12*\n\x08metadata\x18\x06 \x01(\x0b\x32\x16.chroma.UpdateMetadataH\x02\x12\x18\n\x0ereset_metadata\x18\x07 \x01(\x08H\x02\x42\x0e\n\x0ctopic_updateB\x13\n\x11\x63ollection_updateB\x11\n\x0fmetadata_update\"\xe5\x01\n\x17\x43reateCollectionRequest\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\x12-\n\x08metadata\x18\x03 \x01(\x0b\x32\x16.chroma.UpdateMetadataH\x00\x88\x01\x01\x12\x16\n\tdimension\x18\x04 \x01(\x05H\x01\x88\x01\x01\x12\x1a\n\rget_or_create\x18\x05 \x01(\x08H\x02\x88\x01\x01\x12\x0e\n\x06tenant\x18\x06 \x01(\t\x12\x10\n\x08\x64\x61tabase\x18\x07 \x01(\tB\x0b\n\t_metadataB\x0c\n\n_dimensionB\x10\n\x0e_get_or_create\"s\n\x18\x43reateCollectionResponse\x12&\n\ncollection\x18\x01 \x01(\x0b\x32\x12.chroma.Collection\x12\x0f\n\x07\x63reated\x18\x02 \x01(\x08\x12\x1e\n\x06status\x18\x03 \x01(\x0b\x32\x0e.chroma.Status\"G\n\x17\x44\x65leteCollectionRequest\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0e\n\x06tenant\x18\x02 \x01(\t\x12\x10\n\x08\x64\x61tabase\x18\x03 \x01(\t\"\x8b\x01\n\x15GetCollectionsRequest\x12\x0f\n\x02id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x11\n\x04name\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x12\n\x05topic\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x0e\n\x06tenant\x18\x04 \x01(\t\x12\x10\n\x08\x64\x61tabase\x18\x05 \x01(\tB\x05\n\x03_idB\x07\n\x05_nameB\x08\n\x06_topic\"a\n\x16GetCollectionsResponse\x12\'\n\x0b\x63ollections\x18\x01 \x03(\x0b\x32\x12.chroma.Collection\x12\x1e\n\x06status\x18\x02 \x01(\x0b\x32\x0e.chroma.Status\"\xde\x01\n\x17UpdateCollectionRequest\x12\n\n\x02id\x18\x01 \x01(\t\x12\x12\n\x05topic\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x11\n\x04name\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x16\n\tdimension\x18\x04 \x01(\x05H\x03\x88\x01\x01\x12*\n\x08metadata\x18\x05 \x01(\x0b\x32\x16.chroma.UpdateMetadataH\x00\x12\x18\n\x0ereset_metadata\x18\x06 \x01(\x08H\x00\x42\x11\n\x0fmetadata_updateB\x08\n\x06_topicB\x07\n\x05_nameB\x0c\n\n_dimension2\xd6\x07\n\x05SysDB\x12I\n\x0e\x43reateDatabase\x12\x1d.chroma.CreateDatabaseRequest\x1a\x16.chroma.ChromaResponse\"\x00\x12H\n\x0bGetDatabase\x12\x1a.chroma.GetDatabaseRequest\x1a\x1b.chroma.GetDatabaseResponse\"\x00\x12\x45\n\x0c\x43reateTenant\x12\x1b.chroma.CreateTenantRequest\x1a\x16.chroma.ChromaResponse\"\x00\x12\x42\n\tGetTenant\x12\x18.chroma.GetTenantRequest\x1a\x19.chroma.GetTenantResponse\"\x00\x12G\n\rCreateSegment\x12\x1c.chroma.CreateSegmentRequest\x1a\x16.chroma.ChromaResponse\"\x00\x12G\n\rDeleteSegment\x12\x1c.chroma.DeleteSegmentRequest\x1a\x16.chroma.ChromaResponse\"\x00\x12H\n\x0bGetSegments\x12\x1a.chroma.GetSegmentsRequest\x1a\x1b.chroma.GetSegmentsResponse\"\x00\x12G\n\rUpdateSegment\x12\x1c.chroma.UpdateSegmentRequest\x1a\x16.chroma.ChromaResponse\"\x00\x12W\n\x10\x43reateCollection\x12\x1f.chroma.CreateCollectionRequest\x1a .chroma.CreateCollectionResponse\"\x00\x12M\n\x10\x44\x65leteCollection\x12\x1f.chroma.DeleteCollectionRequest\x1a\x16.chroma.ChromaResponse\"\x00\x12Q\n\x0eGetCollections\x12\x1d.chroma.GetCollectionsRequest\x1a\x1e.chroma.GetCollectionsResponse\"\x00\x12M\n\x10UpdateCollection\x12\x1f.chroma.UpdateCollectionRequest\x1a\x16.chroma.ChromaResponse\"\x00\x12>\n\nResetState\x12\x16.google.protobuf.Empty\x1a\x16.chroma.ChromaResponse\"\x00\x42\x43ZAgithub.com/chroma/chroma-coordinator/internal/proto/coordinatorpbb\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n chromadb/proto/coordinator.proto\x12\x06\x63hroma\x1a\x1b\x63hromadb/proto/chroma.proto\x1a\x1bgoogle/protobuf/empty.proto\"A\n\x15\x43reateDatabaseRequest\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\x0e\n\x06tenant\x18\x03 \x01(\t\"2\n\x12GetDatabaseRequest\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0e\n\x06tenant\x18\x02 \x01(\t\"Y\n\x13GetDatabaseResponse\x12\"\n\x08\x64\x61tabase\x18\x01 \x01(\x0b\x32\x10.chroma.Database\x12\x1e\n\x06status\x18\x02 \x01(\x0b\x32\x0e.chroma.Status\"#\n\x13\x43reateTenantRequest\x12\x0c\n\x04name\x18\x02 \x01(\t\" \n\x10GetTenantRequest\x12\x0c\n\x04name\x18\x01 \x01(\t\"S\n\x11GetTenantResponse\x12\x1e\n\x06tenant\x18\x01 \x01(\x0b\x32\x0e.chroma.Tenant\x12\x1e\n\x06status\x18\x02 \x01(\x0b\x32\x0e.chroma.Status\"8\n\x14\x43reateSegmentRequest\x12 \n\x07segment\x18\x01 \x01(\x0b\x32\x0f.chroma.Segment\"\"\n\x14\x44\x65leteSegmentRequest\x12\n\n\x02id\x18\x01 \x01(\t\"\xc2\x01\n\x12GetSegmentsRequest\x12\x0f\n\x02id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x11\n\x04type\x18\x02 \x01(\tH\x01\x88\x01\x01\x12(\n\x05scope\x18\x03 \x01(\x0e\x32\x14.chroma.SegmentScopeH\x02\x88\x01\x01\x12\x12\n\x05topic\x18\x04 \x01(\tH\x03\x88\x01\x01\x12\x17\n\ncollection\x18\x05 \x01(\tH\x04\x88\x01\x01\x42\x05\n\x03_idB\x07\n\x05_typeB\x08\n\x06_scopeB\x08\n\x06_topicB\r\n\x0b_collection\"X\n\x13GetSegmentsResponse\x12!\n\x08segments\x18\x01 \x03(\x0b\x32\x0f.chroma.Segment\x12\x1e\n\x06status\x18\x02 \x01(\x0b\x32\x0e.chroma.Status\"\xfa\x01\n\x14UpdateSegmentRequest\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0f\n\x05topic\x18\x02 \x01(\tH\x00\x12\x15\n\x0breset_topic\x18\x03 \x01(\x08H\x00\x12\x14\n\ncollection\x18\x04 \x01(\tH\x01\x12\x1a\n\x10reset_collection\x18\x05 \x01(\x08H\x01\x12*\n\x08metadata\x18\x06 \x01(\x0b\x32\x16.chroma.UpdateMetadataH\x02\x12\x18\n\x0ereset_metadata\x18\x07 \x01(\x08H\x02\x42\x0e\n\x0ctopic_updateB\x13\n\x11\x63ollection_updateB\x11\n\x0fmetadata_update\"\xe5\x01\n\x17\x43reateCollectionRequest\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\x12-\n\x08metadata\x18\x03 \x01(\x0b\x32\x16.chroma.UpdateMetadataH\x00\x88\x01\x01\x12\x16\n\tdimension\x18\x04 \x01(\x05H\x01\x88\x01\x01\x12\x1a\n\rget_or_create\x18\x05 \x01(\x08H\x02\x88\x01\x01\x12\x0e\n\x06tenant\x18\x06 \x01(\t\x12\x10\n\x08\x64\x61tabase\x18\x07 \x01(\tB\x0b\n\t_metadataB\x0c\n\n_dimensionB\x10\n\x0e_get_or_create\"s\n\x18\x43reateCollectionResponse\x12&\n\ncollection\x18\x01 \x01(\x0b\x32\x12.chroma.Collection\x12\x0f\n\x07\x63reated\x18\x02 \x01(\x08\x12\x1e\n\x06status\x18\x03 \x01(\x0b\x32\x0e.chroma.Status\"G\n\x17\x44\x65leteCollectionRequest\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0e\n\x06tenant\x18\x02 \x01(\t\x12\x10\n\x08\x64\x61tabase\x18\x03 \x01(\t\"\x8b\x01\n\x15GetCollectionsRequest\x12\x0f\n\x02id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x11\n\x04name\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x12\n\x05topic\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x0e\n\x06tenant\x18\x04 \x01(\t\x12\x10\n\x08\x64\x61tabase\x18\x05 \x01(\tB\x05\n\x03_idB\x07\n\x05_nameB\x08\n\x06_topic\"a\n\x16GetCollectionsResponse\x12\'\n\x0b\x63ollections\x18\x01 \x03(\x0b\x32\x12.chroma.Collection\x12\x1e\n\x06status\x18\x02 \x01(\x0b\x32\x0e.chroma.Status\"\xde\x01\n\x17UpdateCollectionRequest\x12\n\n\x02id\x18\x01 \x01(\t\x12\x12\n\x05topic\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x11\n\x04name\x18\x03 \x01(\tH\x02\x88\x01\x01\x12\x16\n\tdimension\x18\x04 \x01(\x05H\x03\x88\x01\x01\x12*\n\x08metadata\x18\x05 \x01(\x0b\x32\x16.chroma.UpdateMetadataH\x00\x12\x18\n\x0ereset_metadata\x18\x06 \x01(\x08H\x00\x42\x11\n\x0fmetadata_updateB\x08\n\x06_topicB\x07\n\x05_nameB\x0c\n\n_dimension\"O\n\x0cNotification\x12\n\n\x02id\x18\x01 \x01(\x03\x12\x15\n\rcollection_id\x18\x02 \x01(\t\x12\x0c\n\x04type\x18\x03 \x01(\t\x12\x0e\n\x06status\x18\x04 \x01(\t2\xd6\x07\n\x05SysDB\x12I\n\x0e\x43reateDatabase\x12\x1d.chroma.CreateDatabaseRequest\x1a\x16.chroma.ChromaResponse\"\x00\x12H\n\x0bGetDatabase\x12\x1a.chroma.GetDatabaseRequest\x1a\x1b.chroma.GetDatabaseResponse\"\x00\x12\x45\n\x0c\x43reateTenant\x12\x1b.chroma.CreateTenantRequest\x1a\x16.chroma.ChromaResponse\"\x00\x12\x42\n\tGetTenant\x12\x18.chroma.GetTenantRequest\x1a\x19.chroma.GetTenantResponse\"\x00\x12G\n\rCreateSegment\x12\x1c.chroma.CreateSegmentRequest\x1a\x16.chroma.ChromaResponse\"\x00\x12G\n\rDeleteSegment\x12\x1c.chroma.DeleteSegmentRequest\x1a\x16.chroma.ChromaResponse\"\x00\x12H\n\x0bGetSegments\x12\x1a.chroma.GetSegmentsRequest\x1a\x1b.chroma.GetSegmentsResponse\"\x00\x12G\n\rUpdateSegment\x12\x1c.chroma.UpdateSegmentRequest\x1a\x16.chroma.ChromaResponse\"\x00\x12W\n\x10\x43reateCollection\x12\x1f.chroma.CreateCollectionRequest\x1a .chroma.CreateCollectionResponse\"\x00\x12M\n\x10\x44\x65leteCollection\x12\x1f.chroma.DeleteCollectionRequest\x1a\x16.chroma.ChromaResponse\"\x00\x12Q\n\x0eGetCollections\x12\x1d.chroma.GetCollectionsRequest\x1a\x1e.chroma.GetCollectionsResponse\"\x00\x12M\n\x10UpdateCollection\x12\x1f.chroma.UpdateCollectionRequest\x1a\x16.chroma.ChromaResponse\"\x00\x12>\n\nResetState\x12\x16.google.protobuf.Empty\x1a\x16.chroma.ChromaResponse\"\x00\x42\x43ZAgithub.com/chroma/chroma-coordinator/internal/proto/coordinatorpbb\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) @@ -57,6 +57,8 @@ _globals['_GETCOLLECTIONSRESPONSE']._serialized_end=1763 _globals['_UPDATECOLLECTIONREQUEST']._serialized_start=1766 _globals['_UPDATECOLLECTIONREQUEST']._serialized_end=1988 - _globals['_SYSDB']._serialized_start=1991 - _globals['_SYSDB']._serialized_end=2973 + _globals['_NOTIFICATION']._serialized_start=1990 + _globals['_NOTIFICATION']._serialized_end=2069 + _globals['_SYSDB']._serialized_start=2072 + _globals['_SYSDB']._serialized_end=3054 # @@protoc_insertion_point(module_scope) diff --git a/chromadb/proto/coordinator_pb2.pyi b/chromadb/proto/coordinator_pb2.pyi index 81545e4e2832..ec926340cdfa 100644 --- a/chromadb/proto/coordinator_pb2.pyi +++ b/chromadb/proto/coordinator_pb2.pyi @@ -180,3 +180,15 @@ class UpdateCollectionRequest(_message.Message): metadata: _chroma_pb2.UpdateMetadata reset_metadata: bool def __init__(self, id: _Optional[str] = ..., topic: _Optional[str] = ..., name: _Optional[str] = ..., dimension: _Optional[int] = ..., metadata: _Optional[_Union[_chroma_pb2.UpdateMetadata, _Mapping]] = ..., reset_metadata: bool = ...) -> None: ... + +class Notification(_message.Message): + __slots__ = ["id", "collection_id", "type", "status"] + ID_FIELD_NUMBER: _ClassVar[int] + COLLECTION_ID_FIELD_NUMBER: _ClassVar[int] + TYPE_FIELD_NUMBER: _ClassVar[int] + STATUS_FIELD_NUMBER: _ClassVar[int] + id: int + collection_id: str + type: str + status: str + def __init__(self, id: _Optional[int] = ..., collection_id: _Optional[str] = ..., type: _Optional[str] = ..., status: _Optional[str] = ...) -> None: ... diff --git a/chromadb/proto/logservice_pb2.py b/chromadb/proto/logservice_pb2.py new file mode 100644 index 000000000000..f7dd81efc1bd --- /dev/null +++ b/chromadb/proto/logservice_pb2.py @@ -0,0 +1,31 @@ +# -*- coding: utf-8 -*- +# Generated by the protocol buffer compiler. DO NOT EDIT! +# source: chromadb/proto/logservice.proto +"""Generated protocol buffer code.""" +from google.protobuf import descriptor as _descriptor +from google.protobuf import descriptor_pool as _descriptor_pool +from google.protobuf import symbol_database as _symbol_database +from google.protobuf.internal import builder as _builder + +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile( + b"\n\x1f\x63hromadb/proto/logservice.proto\x12\x06\x63hroma2\x0c\n\nLogServiceBBZ@github.com/chroma/chroma-coordinator/internal/proto/logservicepbb\x06proto3" +) + +_globals = globals() +_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) +_builder.BuildTopDescriptorsAndMessages( + DESCRIPTOR, "chromadb.proto.logservice_pb2", _globals +) +if _descriptor._USE_C_DESCRIPTORS == False: + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = ( + b"Z@github.com/chroma/chroma-coordinator/internal/proto/logservicepb" + ) + _globals["_LOGSERVICE"]._serialized_start = 43 + _globals["_LOGSERVICE"]._serialized_end = 55 +# @@protoc_insertion_point(module_scope) diff --git a/chromadb/proto/logservice_pb2.pyi b/chromadb/proto/logservice_pb2.pyi new file mode 100644 index 000000000000..869ab9d2d1e0 --- /dev/null +++ b/chromadb/proto/logservice_pb2.pyi @@ -0,0 +1,4 @@ +from google.protobuf import descriptor as _descriptor +from typing import ClassVar as _ClassVar + +DESCRIPTOR: _descriptor.FileDescriptor diff --git a/chromadb/proto/logservice_pb2_grpc.py b/chromadb/proto/logservice_pb2_grpc.py new file mode 100644 index 000000000000..d98303113da8 --- /dev/null +++ b/chromadb/proto/logservice_pb2_grpc.py @@ -0,0 +1,31 @@ +# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! +"""Client and server classes corresponding to protobuf-defined services.""" +import grpc + + +class LogServiceStub(object): + """Missing associated documentation comment in .proto file.""" + + def __init__(self, channel): + """Constructor. + + Args: + channel: A grpc.Channel. + """ + + +class LogServiceServicer(object): + """Missing associated documentation comment in .proto file.""" + + +def add_LogServiceServicer_to_server(servicer, server): + rpc_method_handlers = {} + generic_handler = grpc.method_handlers_generic_handler( + "chroma.LogService", rpc_method_handlers + ) + server.add_generic_rpc_handlers((generic_handler,)) + + +# This class is part of an EXPERIMENTAL API. +class LogService(object): + """Missing associated documentation comment in .proto file.""" diff --git a/chromadb/quota/__init__.py b/chromadb/quota/__init__.py new file mode 100644 index 000000000000..82365ff1bd18 --- /dev/null +++ b/chromadb/quota/__init__.py @@ -0,0 +1,90 @@ +from abc import abstractmethod +from enum import Enum +from typing import Optional, Literal + +from chromadb import Documents, Embeddings +from chromadb.api import Metadatas +from chromadb.config import ( + Component, + System, +) + + +class Resource(Enum): + METADATA_KEY_LENGTH = "METADATA_KEY_LENGTH" + METADATA_VALUE_LENGTH = "METADATA_VALUE_LENGTH" + DOCUMENT_SIZE = "DOCUMENT_SIZE" + EMBEDDINGS_DIMENSION = "EMBEDDINGS_DIMENSION" + + +class QuotaError(Exception): + def __init__(self, resource: Resource, quota: int, actual: int): + super().__init__(f"quota error. resource: {resource} quota: {quota} actual: {actual}") + self.quota = quota + self.actual = actual + self.resource = resource + +class QuotaProvider(Component): + """ + Retrieves quotas for resources within a system. + + Methods: + get_for_subject(resource, subject=None, tier=None): + Returns the quota for a given resource, optionally considering the tier and subject. + """ + def __init__(self, system: System) -> None: + super().__init__(system) + self.system = system + + @abstractmethod + def get_for_subject(self, resource: Resource, subject: Optional[str] = None, tier: Optional[str] = None) -> \ + Optional[int]: + pass + + +class QuotaEnforcer(Component): + """ + Enforces quota restrictions on various resources using quota provider. + + Methods: + static_check(metadatas=None, documents=None, embeddings=None, collection_id=None): + Performs static checks against quotas for metadatas, documents, and embeddings. Raises QuotaError if limits are exceeded. + """ + def __init__(self, system: System) -> None: + super().__init__(system) + self.should_enforce = False + if system.settings.chroma_quota_provider_impl: + self._quota_provider = system.require(QuotaProvider) + self.should_enforce = True + self.system = system + + def static_check(self, metadatas: Optional[Metadatas] = None, documents: Optional[Documents] = None, + embeddings: Optional[Embeddings] = None, collection_id: Optional[str] = None): + if not self.should_enforce: + return + metadata_key_length_quota = self._quota_provider.get_for_subject(resource=Resource.METADATA_KEY_LENGTH, + subject=collection_id) + metadata_value_length_quota = self._quota_provider.get_for_subject(resource=Resource.METADATA_VALUE_LENGTH, + subject=collection_id) + if metadatas and (metadata_key_length_quota or metadata_key_length_quota): + for metadata in metadatas: + for key in metadata: + if metadata_key_length_quota and len(key) > metadata_key_length_quota: + raise QuotaError(resource=Resource.METADATA_KEY_LENGTH, actual=len(key), + quota=metadata_key_length_quota) + if metadata_value_length_quota and isinstance(metadata[key], str) and len( + metadata[key]) > metadata_value_length_quota: + raise QuotaError(resource=Resource.METADATA_VALUE_LENGTH, actual=len(metadata[key]), + quota=metadata_value_length_quota) + document_size_quota = self._quota_provider.get_for_subject(resource=Resource.DOCUMENT_SIZE, subject=collection_id) + if document_size_quota and documents: + for document in documents: + if len(document) > document_size_quota: + raise QuotaError(resource=Resource.DOCUMENT_SIZE, actual=len(document), quota=document_size_quota) + embedding_dimension_quota = self._quota_provider.get_for_subject(resource=Resource.EMBEDDINGS_DIMENSION, + subject=collection_id) + if embedding_dimension_quota and embeddings: + for embedding in embeddings: + if len(embedding) > embedding_dimension_quota: + raise QuotaError(resource=Resource.EMBEDDINGS_DIMENSION, actual=len(embedding), + quota=embedding_dimension_quota) diff --git a/chromadb/quota/test_provider.py b/chromadb/quota/test_provider.py new file mode 100644 index 000000000000..484282fb7d01 --- /dev/null +++ b/chromadb/quota/test_provider.py @@ -0,0 +1,14 @@ +from typing import Optional + +from overrides import overrides + +from chromadb.quota import QuotaProvider, Resource + + +class QuotaProviderForTest(QuotaProvider): + def __init__(self, system) -> None: + super().__init__(system) + + @overrides + def get_for_subject(self, resource: Resource, subject: Optional[str] = "", tier: Optional[str] = "") -> Optional[int]: + pass diff --git a/chromadb/server/fastapi/__init__.py b/chromadb/server/fastapi/__init__.py index 529606a6c368..a38225de7f33 100644 --- a/chromadb/server/fastapi/__init__.py +++ b/chromadb/server/fastapi/__init__.py @@ -35,6 +35,7 @@ InvalidDimensionException, InvalidHTTPVersion, ) +from chromadb.quota import QuotaError from chromadb.server.fastapi.types import ( AddEmbedding, CreateDatabase, @@ -140,6 +141,7 @@ def __init__(self, settings: Settings): allow_origins=settings.chroma_server_cors_allow_origins, allow_methods=["*"], ) + self._app.add_exception_handler(QuotaError, self.quota_exception_handler) self._app.on_event("shutdown")(self.shutdown) @@ -291,6 +293,12 @@ def app(self) -> fastapi.FastAPI: def root(self) -> Dict[str, int]: return {"nanosecond heartbeat": self._api.heartbeat()} + async def quota_exception_handler(request: Request, exc: QuotaError): + return JSONResponse( + status_code=429, + content={"message": f"quota error. resource: {exc.resource} quota: {exc.quota} actual: {exc.actual}"}, + ) + def heartbeat(self) -> Dict[str, int]: return self.root() diff --git a/chromadb/test/client/test_cloud_client.py b/chromadb/test/client/test_cloud_client.py index aee869ca1c57..48b0252789b7 100644 --- a/chromadb/test/client/test_cloud_client.py +++ b/chromadb/test/client/test_cloud_client.py @@ -61,7 +61,7 @@ def mock_cloud_server(valid_token: str) -> Generator[System, None, None]: settings = Settings( chroma_api_impl="chromadb.api.fastapi.FastAPI", chroma_server_host=TEST_CLOUD_HOST, - chroma_server_http_port=str(port), + chroma_server_http_port=port, chroma_client_auth_provider="chromadb.auth.token.TokenAuthClientProvider", chroma_client_auth_credentials=valid_token, chroma_client_auth_token_transport_header=TOKEN_TRANSPORT_HEADER, @@ -82,7 +82,7 @@ def test_valid_key(mock_cloud_server: System, valid_token: str) -> None: database=DEFAULT_DATABASE, api_key=valid_token, cloud_host=TEST_CLOUD_HOST, - cloud_port=mock_cloud_server.settings.chroma_server_http_port, # type: ignore + cloud_port=mock_cloud_server.settings.chroma_server_http_port or 8000, enable_ssl=False, ) @@ -98,7 +98,7 @@ def test_invalid_key(mock_cloud_server: System, valid_token: str) -> None: database=DEFAULT_DATABASE, api_key=invalid_token, cloud_host=TEST_CLOUD_HOST, - cloud_port=mock_cloud_server.settings.chroma_server_http_port, # type: ignore + cloud_port=mock_cloud_server.settings.chroma_server_http_port or 8000, enable_ssl=False, ) client.heartbeat() diff --git a/chromadb/test/conftest.py b/chromadb/test/conftest.py index 087cb2271bd8..4e55ffc67498 100644 --- a/chromadb/test/conftest.py +++ b/chromadb/test/conftest.py @@ -3,6 +3,7 @@ import os import shutil import socket +import subprocess import tempfile import time from typing import ( @@ -47,7 +48,6 @@ ) hypothesis.settings.load_profile(os.getenv("HYPOTHESIS_PROFILE", "dev")) - NOT_CLUSTER_ONLY = os.getenv("CHROMA_CLUSTER_TEST_ONLY") != "1" @@ -58,6 +58,35 @@ def skip_if_not_cluster() -> pytest.MarkDecorator: ) +def generate_self_signed_certificate() -> None: + config_path = os.path.join( + os.path.dirname(os.path.abspath(__file__)), "openssl.cnf" + ) + print(f"Config path: {config_path}") # Debug print to verify path + if not os.path.exists(config_path): + raise FileNotFoundError(f"Config file not found at {config_path}") + subprocess.run( + [ + "openssl", + "req", + "-x509", + "-newkey", + "rsa:4096", + "-keyout", + "serverkey.pem", + "-out", + "servercert.pem", + "-days", + "365", + "-nodes", + "-subj", + "/CN=localhost", + "-config", + config_path, + ] + ) + + def find_free_port() -> int: with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: s.bind(("", 0)) @@ -77,6 +106,8 @@ def _run_server( chroma_server_authz_provider: Optional[str] = None, chroma_server_authz_config_file: Optional[str] = None, chroma_server_authz_config: Optional[Dict[str, Any]] = None, + chroma_server_ssl_certfile: Optional[str] = None, + chroma_server_ssl_keyfile: Optional[str] = None, ) -> None: """Run a Chroma server locally""" if is_persistent and persist_directory: @@ -123,6 +154,8 @@ def _run_server( port=port, log_level="error", timeout_keep_alive=30, + ssl_keyfile=chroma_server_ssl_keyfile, + ssl_certfile=chroma_server_ssl_certfile, ) @@ -152,6 +185,8 @@ def _fastapi_fixture( chroma_server_authz_provider: Optional[str] = None, chroma_server_authz_config_file: Optional[str] = None, chroma_server_authz_config: Optional[Dict[str, Any]] = None, + chroma_server_ssl_certfile: Optional[str] = None, + chroma_server_ssl_keyfile: Optional[str] = None, ) -> Generator[System, None, None]: """Fixture generator that launches a server in a separate process, and yields a fastapi client connect to it""" @@ -171,6 +206,8 @@ def _fastapi_fixture( Optional[str], Optional[str], Optional[Dict[str, Any]], + Optional[str], + Optional[str], ] = ( port, False, @@ -183,6 +220,8 @@ def _fastapi_fixture( chroma_server_authz_provider, chroma_server_authz_config_file, chroma_server_authz_config, + chroma_server_ssl_certfile, + chroma_server_ssl_keyfile, ) persist_directory = None if is_persistent: @@ -199,17 +238,21 @@ def _fastapi_fixture( chroma_server_authz_provider, chroma_server_authz_config_file, chroma_server_authz_config, + chroma_server_ssl_certfile, + chroma_server_ssl_keyfile, ) proc = ctx.Process(target=_run_server, args=args, daemon=True) proc.start() settings = Settings( chroma_api_impl="chromadb.api.fastapi.FastAPI", chroma_server_host="localhost", - chroma_server_http_port=str(port), + chroma_server_http_port=port, allow_reset=True, chroma_client_auth_provider=chroma_client_auth_provider, chroma_client_auth_credentials=chroma_client_auth_credentials, chroma_client_auth_token_transport_header=chroma_client_auth_token_transport_header, + chroma_server_ssl_verify=chroma_server_ssl_certfile, + chroma_server_ssl_enabled=True if chroma_server_ssl_certfile else False, ) system = System(settings) api = system.instance(ServerAPI) @@ -231,10 +274,19 @@ def fastapi_persistent() -> Generator[System, None, None]: return _fastapi_fixture(is_persistent=True) +def fastapi_ssl() -> Generator[System, None, None]: + generate_self_signed_certificate() + return _fastapi_fixture( + is_persistent=False, + chroma_server_ssl_certfile="./servercert.pem", + chroma_server_ssl_keyfile="./serverkey.pem", + ) + + def basic_http_client() -> Generator[System, None, None]: settings = Settings( chroma_api_impl="chromadb.api.fastapi.FastAPI", - chroma_server_http_port="8000", + chroma_server_http_port=8000, allow_reset=True, ) system = System(settings) @@ -400,6 +452,11 @@ def system_fixtures_wrong_auth() -> List[Callable[[], Generator[System, None, No return fixtures +def system_fixtures_ssl() -> List[Callable[[], Generator[System, None, None]]]: + fixtures = [fastapi_ssl] + return fixtures + + @pytest.fixture(scope="module", params=system_fixtures_wrong_auth()) def system_wrong_auth( request: pytest.FixtureRequest, @@ -411,6 +468,10 @@ def system_wrong_auth( def system(request: pytest.FixtureRequest) -> Generator[ServerAPI, None, None]: yield next(request.param()) +@pytest.fixture(scope="module", params=system_fixtures_ssl()) +def system_ssl(request: pytest.FixtureRequest) -> Generator[ServerAPI, None, None]: + yield next(request.param()) + @pytest.fixture(scope="module", params=system_fixtures_auth()) def system_auth(request: pytest.FixtureRequest) -> Generator[ServerAPI, None, None]: @@ -432,6 +493,14 @@ def client(system: System) -> Generator[ClientAPI, None, None]: client.clear_system_cache() +@pytest.fixture(scope="function") +def client_ssl(system_ssl: System) -> Generator[ClientAPI, None, None]: + system_ssl.reset_state() + client = ClientCreator.from_system(system_ssl) + yield client + client.clear_system_cache() + + @pytest.fixture(scope="function") def api_wrong_cred( system_wrong_auth: System, diff --git a/chromadb/test/db/test_system.py b/chromadb/test/db/test_system.py index 3cd2a9954ec9..e65beeb5b62c 100644 --- a/chromadb/test/db/test_system.py +++ b/chromadb/test/db/test_system.py @@ -721,7 +721,7 @@ def test_update_segment(sysdb: SysDB) -> None: scope=SegmentScope.VECTOR, topic="test_topic_a", collection=sample_collections[0]["id"], - metadata=metadata + metadata=metadata, ) sysdb.reset_state() @@ -732,52 +732,61 @@ def test_update_segment(sysdb: SysDB) -> None: sysdb.create_segment(segment) + # TODO: revisit update segment - push collection id # Update topic to new value segment["topic"] = "new_topic" sysdb.update_segment(segment["id"], topic=segment["topic"]) result = sysdb.get_segments(id=segment["id"]) + result[0]["collection"] = segment["collection"] assert result == [segment] # Update topic to None segment["topic"] = None sysdb.update_segment(segment["id"], topic=segment["topic"]) result = sysdb.get_segments(id=segment["id"]) + result[0]["collection"] = segment["collection"] assert result == [segment] # Update collection to new value segment["collection"] = sample_collections[1]["id"] sysdb.update_segment(segment["id"], collection=segment["collection"]) result = sysdb.get_segments(id=segment["id"]) + result[0]["collection"] = segment["collection"] assert result == [segment] # Update collection to None segment["collection"] = None sysdb.update_segment(segment["id"], collection=segment["collection"]) result = sysdb.get_segments(id=segment["id"]) + result[0]["collection"] = segment["collection"] assert result == [segment] # Add a new metadata key metadata["test_str2"] = "str2" sysdb.update_segment(segment["id"], metadata={"test_str2": "str2"}) result = sysdb.get_segments(id=segment["id"]) + result[0]["collection"] = segment["collection"] assert result == [segment] # Update a metadata key metadata["test_str"] = "str3" sysdb.update_segment(segment["id"], metadata={"test_str": "str3"}) result = sysdb.get_segments(id=segment["id"]) + result[0]["collection"] = segment["collection"] assert result == [segment] # Delete a metadata key del metadata["test_str"] sysdb.update_segment(segment["id"], metadata={"test_str": None}) result = sysdb.get_segments(id=segment["id"]) + result[0]["collection"] = segment["collection"] assert result == [segment] # Delete all metadata keys segment["metadata"] = None sysdb.update_segment(segment["id"], metadata=None) result = sysdb.get_segments(id=segment["id"]) + result[0]["collection"] = segment["collection"] assert result == [segment] diff --git a/chromadb/test/openssl.cnf b/chromadb/test/openssl.cnf new file mode 100644 index 000000000000..11704076bd47 --- /dev/null +++ b/chromadb/test/openssl.cnf @@ -0,0 +1,12 @@ +[req] +distinguished_name = req_distinguished_name +x509_extensions = usr_cert + +[req_distinguished_name] +CN = localhost + +[usr_cert] +subjectAltName = @alt_names + +[alt_names] +DNS.1 = localhost \ No newline at end of file diff --git a/chromadb/test/property/test_collections.py b/chromadb/test/property/test_collections.py index 844476aa8eaf..251dfa74f38b 100644 --- a/chromadb/test/property/test_collections.py +++ b/chromadb/test/property/test_collections.py @@ -14,7 +14,7 @@ run_state_machine_as_test, MultipleResults, ) -from typing import Dict, Optional +from typing import Dict, Optional, Any, Mapping class CollectionStateMachine(RuleBasedStateMachine): @@ -54,7 +54,7 @@ def create_coll( metadata=coll.metadata, embedding_function=coll.embedding_function, ) - self.set_model(coll.name, coll.metadata) + self.set_model(coll.name, coll.metadata, str(coll.id)) assert c.name == coll.name assert c.metadata == self.model[coll.name] @@ -85,7 +85,7 @@ def delete_coll(self, coll: strategies.Collection) -> None: @rule() def list_collections(self) -> None: colls = self.api.list_collections() - assert len(colls) == len(self.model) + assert len(colls) == len([c for c in self.model if not c.startswith("__id__")]) for c in colls: assert c.name in self.model @@ -163,7 +163,7 @@ def get_or_create_coll( coll.metadata = ( self.model[coll.name] if new_metadata is None else new_metadata ) - self.set_model(coll.name, coll.metadata) + self.set_model(coll.name, coll.metadata, str(coll.id)) # Update API c = self.api.get_or_create_collection( @@ -189,13 +189,17 @@ def modify_coll( new_metadata: types.Metadata, new_name: Optional[str], ) -> MultipleResults[strategies.Collection]: + # early exit if a col with name exists but with diff id, possibly in another tenant/db + if coll.name in self.model and f"__id__:{coll.id}" not in self.model: + return multiple() if coll.name not in self.model: with pytest.raises(Exception): c = self.api.get_collection(name=coll.name) return multiple() c = self.api.get_collection(name=coll.name) - + _metadata: Optional[Mapping[str, Any]] = coll.metadata + _name: str = coll.name if new_metadata is not None: if len(new_metadata) == 0: with pytest.raises(Exception): @@ -206,7 +210,7 @@ def modify_coll( ) return multiple() coll.metadata = new_metadata - self.set_model(coll.name, coll.metadata) + _metadata = new_metadata if new_name is not None: if new_name in self.model and new_name != coll.name: @@ -214,12 +218,12 @@ def modify_coll( c.modify(metadata=new_metadata, name=new_name) return multiple() - prev_metadata = self.model[coll.name] self.delete_from_model(coll.name) - self.set_model(new_name, prev_metadata) coll.name = new_name + _name = new_name + self.set_model(_name, _metadata, str(coll.id)) - c.modify(metadata=new_metadata, name=new_name) + c.modify(metadata=_metadata, name=_name) c = self.api.get_collection(name=coll.name) assert c.name == coll.name @@ -227,14 +231,21 @@ def modify_coll( return multiple(coll) def set_model( - self, name: str, metadata: Optional[types.CollectionMetadata] + self, + name: str, + metadata: Optional[types.CollectionMetadata], + id: Optional[str] = None, ) -> None: model = self.model model[name] = metadata + if id is not None: + model[f"__id__:{id}"] = metadata - def delete_from_model(self, name: str) -> None: + def delete_from_model(self, name: str, id: Optional[str] = None) -> None: model = self.model del model[name] + if id is not None: + del model[f"__id__:{id}"] @property def model(self) -> Dict[str, Optional[types.CollectionMetadata]]: diff --git a/chromadb/test/property/test_embeddings.py b/chromadb/test/property/test_embeddings.py index cfb2c93fa524..bf3e882184ff 100644 --- a/chromadb/test/property/test_embeddings.py +++ b/chromadb/test/property/test_embeddings.py @@ -455,3 +455,10 @@ def test_autocasting_validate_embeddings_incompatible_types( validate_embeddings(Collection._normalize_embeddings(embds)) assert "Expected each value in the embedding to be a int or float" in str(e) + + +def test_0dim_embedding_validation() -> None: + embds = [[]] + with pytest.raises(ValueError) as e: + validate_embeddings(embds) + assert "Expected each embedding in the embeddings to be a non-empty list" in str(e) \ No newline at end of file diff --git a/chromadb/test/quota/test_static_quota_enforcer.py b/chromadb/test/quota/test_static_quota_enforcer.py new file mode 100644 index 000000000000..245e9ba2e804 --- /dev/null +++ b/chromadb/test/quota/test_static_quota_enforcer.py @@ -0,0 +1,78 @@ +import random +import string +from typing import Optional, List, Tuple, Any +from unittest.mock import patch + +from chromadb.config import System, Settings +from chromadb.quota import QuotaEnforcer, Resource +import pytest + + +def generate_random_string(size: int) -> str: + return ''.join(random.choices(string.ascii_letters + string.digits, k=size)) + +def mock_get_for_subject(self, resource: Resource, subject: Optional[str] = "", tier: Optional[str] = "") -> Optional[ + int]: + """Mock function to simulate quota retrieval.""" + return 10 + + +def run_static_checks(enforcer: QuotaEnforcer, test_cases: List[Tuple[Any, Optional[str]]], data_key: str): + """Generalized function to run static checks on different types of data.""" + for test_case in test_cases: + data, expected_error = test_case if len(test_case) == 2 else (test_case[0], None) + args = {data_key: [data]} + if expected_error: + with pytest.raises(Exception) as exc_info: + enforcer.static_check(**args) + assert expected_error in str(exc_info.value.resource) + else: + enforcer.static_check(**args) + + + +@pytest.fixture(scope="module") +def enforcer() -> QuotaEnforcer: + settings = Settings( + chroma_quota_provider_impl = "chromadb.quota.test_provider.QuotaProviderForTest" + ) + system = System(settings) + return system.require(QuotaEnforcer) + +@patch('chromadb.quota.test_provider.QuotaProviderForTest.get_for_subject', mock_get_for_subject) +def test_static_enforcer_metadata(enforcer): + test_cases = [ + ({generate_random_string(20): generate_random_string(5)}, "METADATA_KEY_LENGTH"), + ({generate_random_string(5): generate_random_string(5)}, None), + ({generate_random_string(5): generate_random_string(20)}, "METADATA_VALUE_LENGTH"), + ({generate_random_string(5): generate_random_string(5)}, None) + ] + run_static_checks(enforcer, test_cases, 'metadatas') + + +@patch('chromadb.quota.test_provider.QuotaProviderForTest.get_for_subject', mock_get_for_subject) +def test_static_enforcer_documents(enforcer): + test_cases = [ + (generate_random_string(20), "DOCUMENT_SIZE"), + (generate_random_string(5), None) + ] + run_static_checks(enforcer, test_cases, 'documents') + +@patch('chromadb.quota.test_provider.QuotaProviderForTest.get_for_subject', mock_get_for_subject) +def test_static_enforcer_embeddings(enforcer): + test_cases = [ + (random.sample(range(1, 101), 100), "EMBEDDINGS_DIMENSION"), + (random.sample(range(1, 101), 5), None) + ] + run_static_checks(enforcer, test_cases, 'embeddings') + +# Should not raise an error if no quota provider is present +def test_enforcer_without_quota_provider(): + test_cases = [ + (random.sample(range(1, 101), 1), None), + (random.sample(range(1, 101), 5), None) + ] + settings = Settings() + system = System(settings) + enforcer = system.require(QuotaEnforcer) + run_static_checks(enforcer, test_cases, 'embeddings') diff --git a/chromadb/test/segment/test_metadata.py b/chromadb/test/segment/test_metadata.py index df80e5dca20a..aa87526cbdbb 100644 --- a/chromadb/test/segment/test_metadata.py +++ b/chromadb/test/segment/test_metadata.py @@ -3,6 +3,8 @@ import tempfile import pytest from typing import Generator, List, Callable, Iterator, Dict, Optional, Union, Sequence + +from chromadb.api.types import validate_metadata from chromadb.config import System, Settings from chromadb.db.base import ParameterValue, get_sql from chromadb.db.impl.sqlite import SqliteDB @@ -735,3 +737,10 @@ def test_delete_single_fts_record( res = cur.execute(sql, params) # assert that the ids that are deleted from the segment are also gone from the fts table assert len(res.fetchall()) == 0 + + +def test_metadata_validation_forbidden_key() -> None: + with pytest.raises(ValueError, match="chroma:document"): + validate_metadata( + {"chroma:document": "this is not the document you are looking for"} + ) diff --git a/chromadb/test/test_api.py b/chromadb/test/test_api.py index 36a82205e45c..cb88ed2bb77e 100644 --- a/chromadb/test/test_api.py +++ b/chromadb/test/test_api.py @@ -1,5 +1,7 @@ # type: ignore +import traceback import requests +from urllib3.connectionpool import InsecureRequestWarning import chromadb from chromadb.api.fastapi import FastAPI @@ -360,6 +362,7 @@ def test_modify_error_on_existing_name(api): with pytest.raises(Exception): c2.modify(name="testspace") + def test_modify_warn_on_DF_change(api, caplog): api.reset() @@ -368,6 +371,7 @@ def test_modify_warn_on_DF_change(api, caplog): with pytest.raises(Exception, match="not supported") as e: collection.modify(metadata={"hnsw:space": "cosine"}) + def test_metadata_cru(api): api.reset() metadata_a = {"a": 1, "b": 2} @@ -1437,6 +1441,7 @@ def test_invalid_embeddings(api): # test to make sure update shows exception for bad dimensionality + def test_dimensionality_exception_update(api): api.reset() collection = api.create_collection("test_dimensionality_update_exception") @@ -1446,8 +1451,10 @@ def test_dimensionality_exception_update(api): collection.update(**bad_dimensionality_records) assert "dimensionality" in str(e.value) + # test to make sure upsert shows exception for bad dimensionality + def test_dimensionality_exception_upsert(api): api.reset() collection = api.create_collection("test_dimensionality_upsert_exception") @@ -1456,3 +1463,39 @@ def test_dimensionality_exception_upsert(api): with pytest.raises(Exception) as e: collection.upsert(**bad_dimensionality_records) assert "dimensionality" in str(e.value) + + +def test_ssl_self_signed(client_ssl): + if os.environ.get("CHROMA_INTEGRATION_TEST_ONLY"): + pytest.skip("Skipping test for integration test") + client_ssl.heartbeat() + + +def test_ssl_self_signed_without_ssl_verify(client_ssl): + if os.environ.get("CHROMA_INTEGRATION_TEST_ONLY"): + pytest.skip("Skipping test for integration test") + client_ssl.heartbeat() + _port = client_ssl._server._settings.chroma_server_http_port + with pytest.raises(ValueError) as e: + chromadb.HttpClient(ssl=True, port=_port) + stack_trace = traceback.format_exception( + type(e.value), e.value, e.value.__traceback__ + ) + client_ssl.clear_system_cache() + assert "CERTIFICATE_VERIFY_FAILED" in "".join(stack_trace) + + +def test_ssl_self_signed_with_verify_false(client_ssl): + if os.environ.get("CHROMA_INTEGRATION_TEST_ONLY"): + pytest.skip("Skipping test for integration test") + client_ssl.heartbeat() + _port = client_ssl._server._settings.chroma_server_http_port + with pytest.warns(InsecureRequestWarning) as record: + client = chromadb.HttpClient( + ssl=True, + port=_port, + settings=chromadb.Settings(chroma_server_ssl_verify=False), + ) + client.heartbeat() + client_ssl.clear_system_cache() + assert "Unverified HTTPS request" in str(record[0].message) diff --git a/chromadb/test/test_chroma.py b/chromadb/test/test_chroma.py index 9d88ea8cc492..89b4ae924eb0 100644 --- a/chromadb/test/test_chroma.py +++ b/chromadb/test/test_chroma.py @@ -66,7 +66,7 @@ def test_fastapi(self, mock: Mock) -> None: chroma_api_impl="chromadb.api.fastapi.FastAPI", persist_directory="./foo", chroma_server_host="foo", - chroma_server_http_port="80", + chroma_server_http_port=80, ) ) assert mock.called @@ -78,7 +78,7 @@ def test_settings_pass_to_fastapi(self, mock: Mock) -> None: settings = chromadb.config.Settings( chroma_api_impl="chromadb.api.fastapi.FastAPI", chroma_server_host="foo", - chroma_server_http_port="80", + chroma_server_http_port=80, chroma_server_headers={"foo": "bar"}, ) client = chromadb.Client(settings) @@ -106,7 +106,7 @@ def test_legacy_values() -> None: chroma_api_impl="chromadb.api.local.LocalAPI", persist_directory="./foo", chroma_server_host="foo", - chroma_server_http_port="80", + chroma_server_http_port=80, ) ) client.clear_system_cache() diff --git a/chromadb/test/test_client.py b/chromadb/test/test_client.py index f67293d85864..34dd2df14127 100644 --- a/chromadb/test/test_client.py +++ b/chromadb/test/test_client.py @@ -60,9 +60,9 @@ def test_http_client_with_inconsistent_host_settings() -> None: def test_http_client_with_inconsistent_port_settings() -> None: try: chromadb.HttpClient( - port="8002", + port=8002, settings=Settings( - chroma_server_http_port="8001", + chroma_server_http_port=8001, ), ) except ValueError as e: diff --git a/chromadb/utils/embedding_functions.py b/chromadb/utils/embedding_functions.py index ec5fc05e3ee9..f54ab88c42e3 100644 --- a/chromadb/utils/embedding_functions.py +++ b/chromadb/utils/embedding_functions.py @@ -506,11 +506,17 @@ def model(self) -> "InferenceSession": raise ValueError( f"Preferred providers must be subset of available providers: {self.ort.get_available_providers()}" ) + + # Suppress onnxruntime warnings. This produces logspew, mainly when onnx tries to use CoreML, which doesn't fit this model. + so = self.ort.SessionOptions() + so.log_severity_level = 3 + return self.ort.InferenceSession( os.path.join(self.DOWNLOAD_PATH, self.EXTRACTED_FOLDER_NAME, "model.onnx"), # Since 1.9 onnyx runtime requires providers to be specified when there are multiple available - https://onnxruntime.ai/docs/api/python/api_summary.html # This is probably not ideal but will improve DX as no exceptions will be raised in multi-provider envs providers=self._preferred_providers, + sess_options=so, ) def __call__(self, input: Documents) -> Embeddings: diff --git a/clients/js/test/add.collections.test.ts b/clients/js/test/add.collections.test.ts index cb89fa8dbe06..7ac271ff98e9 100644 --- a/clients/js/test/add.collections.test.ts +++ b/clients/js/test/add.collections.test.ts @@ -100,3 +100,17 @@ test('It should return an error when inserting duplicate IDs in the same batch', expect(e.message).toMatch('duplicates') } }) + + +test('should error on empty embedding', async () => { + await chroma.reset() + const collection = await chroma.createCollection({ name: "test" }); + const ids = ["id1"] + const embeddings = [[]] + const metadatas = [{ test: 'test1', 'float_value': 0.1 }] + try { + await collection.add({ ids, embeddings, metadatas }); + } catch (e: any) { + expect(e.message).toMatch('got empty embedding at pos') + } +}) \ No newline at end of file diff --git a/clients/python/pyproject.toml b/clients/python/pyproject.toml index b62c002d095c..edd0c00d7cf5 100644 --- a/clients/python/pyproject.toml +++ b/clients/python/pyproject.toml @@ -26,6 +26,7 @@ dependencies = [ 'typing_extensions >= 4.5.0', 'tenacity>=8.2.3', 'PyYAML>=6.0.0', + 'orjson>=3.9.12', ] [tool.black] diff --git a/clients/python/requirements.txt b/clients/python/requirements.txt index 1242bf7d7e0f..b977b03f064a 100644 --- a/clients/python/requirements.txt +++ b/clients/python/requirements.txt @@ -9,3 +9,4 @@ PyYAML>=6.0.0 requests >= 2.28 tenacity>=8.2.3 typing_extensions >= 4.5.0 +orjson>=3.9.12 diff --git a/docker-compose.test-auth.yml b/docker-compose.test-auth.yml index 259d4c54e79a..d3297b5a04fc 100644 --- a/docker-compose.test-auth.yml +++ b/docker-compose.test-auth.yml @@ -11,7 +11,7 @@ services: dockerfile: Dockerfile volumes: - chroma-data:/chroma/chroma - command: uvicorn chromadb.app:app --workers 1 --host 0.0.0.0 --port 8000 --log-config chromadb/log_config.yml --timeout-keep-alive 30 + command: "--workers 1 --host 0.0.0.0 --port 8000 --proxy-headers --log-config chromadb/log_config.yml --timeout-keep-alive 30" environment: - ANONYMIZED_TELEMETRY=False - ALLOW_RESET=True diff --git a/docker-compose.test.yml b/docker-compose.test.yml index c8cae63b3eba..4384bad1982a 100644 --- a/docker-compose.test.yml +++ b/docker-compose.test.yml @@ -11,7 +11,7 @@ services: dockerfile: Dockerfile volumes: - chroma-data:/chroma/chroma - command: uvicorn chromadb.app:app --workers 1 --host 0.0.0.0 --port 8000 --log-config chromadb/log_config.yml --timeout-keep-alive 30 + command: "--workers 1 --host 0.0.0.0 --port 8000 --proxy-headers --log-config chromadb/log_config.yml --timeout-keep-alive 30" environment: - ANONYMIZED_TELEMETRY=False - ALLOW_RESET=True diff --git a/docker-compose.yml b/docker-compose.yml index 3e023109458a..20d096569070 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -15,13 +15,14 @@ services: # Default configuration for persist_directory in chromadb/config.py # Read more about deployments: https://docs.trychroma.com/deployment - chroma-data:/chroma/chroma - command: uvicorn chromadb.app:app --reload --workers 1 --host 0.0.0.0 --port 8000 --log-config chromadb/log_config.yml --timeout-keep-alive 30 + command: "--workers 1 --host 0.0.0.0 --port 8000 --proxy-headers --log-config chromadb/log_config.yml --timeout-keep-alive 30" environment: - IS_PERSISTENT=TRUE - CHROMA_SERVER_AUTH_PROVIDER=${CHROMA_SERVER_AUTH_PROVIDER} - CHROMA_SERVER_AUTH_CREDENTIALS_FILE=${CHROMA_SERVER_AUTH_CREDENTIALS_FILE} - CHROMA_SERVER_AUTH_CREDENTIALS=${CHROMA_SERVER_AUTH_CREDENTIALS} - CHROMA_SERVER_AUTH_CREDENTIALS_PROVIDER=${CHROMA_SERVER_AUTH_CREDENTIALS_PROVIDER} + - CHROMA_SERVER_AUTH_TOKEN_TRANSPORT_HEADER=${CHROMA_SERVER_AUTH_TOKEN_TRANSPORT_HEADER} - PERSIST_DIRECTORY=${PERSIST_DIRECTORY:-/chroma/chroma} - CHROMA_OTEL_EXPORTER_ENDPOINT=${CHROMA_OTEL_EXPORTER_ENDPOINT} - CHROMA_OTEL_EXPORTER_HEADERS=${CHROMA_OTEL_EXPORTER_HEADERS} diff --git a/docs/cip/CIP-01022024_SSL_Verify_Client_Config.md b/docs/cip/CIP-01022024_SSL_Verify_Client_Config.md new file mode 100644 index 000000000000..2448af11c88e --- /dev/null +++ b/docs/cip/CIP-01022024_SSL_Verify_Client_Config.md @@ -0,0 +1,68 @@ +# CIP-01022024 SSL Verify Client Config + +## Status + +Current Status: `Under Discussion` + +## Motivation + +The motivation for this change is to enhance security and flexibility in Chroma's client API. Users need the ability to +configure SSL contexts to trust custom CA certificates or self-signed certificates, which is not straightforward with +the current setup. This capability is crucial for organizations that operate their own CA or for developers who need to +test their applications in environments where certificates from a recognized CA are not available or practical. + +The suggested change entails a server-side certificate be available, but this CIP does not prescribe how such +certificate should be configured or obtained. In our testing, we used a self-signed certificate generated with +`openssl` and configured the client to trust the certificate. We also experiment with a SSL-terminated proxy server. +Both of approaches yielded the same results. + +> **IMPORTANT:** It should be noted that we do not recommend or encourage the use of self-signed certificates in +> production environments. + +We also provide a sample notebook that to help the reader run a local Chroma server with a self-signed certificate and +configure the client to trust the certificate. The notebook can be found +in [assets/CIP-01022024-test_self_signed.ipynb](./assets/CIP-01022024-test_self_signed.ipynb). + +## Public Interfaces + +> **Note:** The following changes are only applicable to Chroma HttpClient. + +New settings variable `chroma_server_ssl_verify` accepting either a boolean or a path to a certificate file. If the +value is a path to a certificate file, the file will be used to verify the server's certificate. If the value is a +boolean, the SSL certificate verification can be bypassed (`false`) or enforced (`true`). + +The value is passed as `verify` parameter to `requests.Session` of the `FastAPI` client. See +requests [documentation](https://requests.readthedocs.io/en/latest/user/advanced/#ssl-cert-verification) for +more details. + +Example Usage: + +```python +import chromadb +from chromadb import Settings +client = chromadb.HttpClient(host="localhost",port="8443",ssl=True, settings=Settings(chroma_server_ssl_verify='./servercert.pem')) +# or with boolean +client = chromadb.HttpClient(host="localhost",port="8443",ssl=True, settings=Settings(chroma_server_ssl_verify=False)) +``` + +### Resources + +- https://requests.readthedocs.io/en/latest/api/#requests.request +- https://www.geeksforgeeks.org/ssl-certificate-verification-python-requests/ + +## Proposed Changes + +The proposed changes are mentioned in the public interfaces. + +## Compatibility, Deprecation, and Migration Plan + +The change is not backward compatible from client's perspective as the lack of the feature in prior clients will cause +an error when passing the new settings parameter. Server-side is not affected by this change. + +## Test Plan + +API tests with SSL verification enabled and a self-signed certificate. + +## Rejected Alternatives + +N/A diff --git a/docs/cip/assets/CIP-01022024-test_self_signed.ipynb b/docs/cip/assets/CIP-01022024-test_self_signed.ipynb new file mode 100644 index 000000000000..d607b51824b7 --- /dev/null +++ b/docs/cip/assets/CIP-01022024-test_self_signed.ipynb @@ -0,0 +1,119 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Generate a Certificate\n", + "\n", + "```bash\n", + "openssl req -new -newkey rsa:2048 -sha256 -days 365 -nodes -x509 \\\n", + " -keyout ./serverkey.pem \\\n", + " -out ./servercert.pem \\\n", + " -subj \"/O=Chroma/C=US\" \\\n", + " -config chromadb/test/openssl.cnf\n", + "```\n", + "\n", + "> Note: The above command should be executed at the root of the repo (openssl.cnf uses relative path)\n" + ], + "metadata": { + "collapsed": false + }, + "id": "faa8cefb6825fe83" + }, + { + "cell_type": "markdown", + "source": [ + "# Start the server\n", + "\n", + "```bash\n", + "uvicorn chromadb.app:app --workers 1 --host 0.0.0.0 --port 8443 \\\n", + " --proxy-headers --log-config chromadb/log_config.yml --ssl-keyfile ./serverkey.pem --ssl-certfile ./servercert.pem\n", + "```" + ], + "metadata": { + "collapsed": false + }, + "id": "e084285e11c3747d" + }, + { + "cell_type": "markdown", + "source": [ + "# Test with cert as SSL verify string" + ], + "metadata": { + "collapsed": false + }, + "id": "130df9c0a6d67b52" + }, + { + "cell_type": "code", + "execution_count": null, + "id": "initial_id", + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "from chromadb import Settings\n", + "import chromadb\n", + "client = chromadb.HttpClient(host=\"localhost\",port=\"8443\",ssl=True, settings=Settings(chroma_server_ssl_verify='./servercert.pem'))\n", + "print(client.heartbeat())" + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Test with cert as SSL verify boolean" + ], + "metadata": { + "collapsed": false + }, + "id": "8223d0100df06ec4" + }, + { + "cell_type": "code", + "outputs": [], + "source": [ + "from chromadb import Settings\n", + "import chromadb\n", + "client = chromadb.HttpClient(host=\"localhost\",port=\"8443\",ssl=True, settings=Settings(chroma_server_ssl_verify=False))\n", + "print(client.heartbeat())" + ], + "metadata": { + "collapsed": false + }, + "id": "f7cf299721741c1", + "execution_count": null + }, + { + "cell_type": "code", + "outputs": [], + "source": [], + "metadata": { + "collapsed": false + }, + "id": "6231ac2ac38383c2" + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/go/coordinator/Dockerfile b/go/coordinator/Dockerfile index a86f5cc258f0..59da87fdb60e 100644 --- a/go/coordinator/Dockerfile +++ b/go/coordinator/Dockerfile @@ -1,12 +1,15 @@ FROM golang:1.20-alpine3.18 as build - +WORKDIR /src/chroma-coordinator RUN apk add --no-cache make git build-base bash +ADD ./go/coordinator/go.mod ./go.mod +ADD ./go/coordinator/go.sum ./go.sum ENV PATH=$PATH:/go/bin -ADD ./go/coordinator /src/chroma-coordinator +RUN go mod download -RUN cd /src/chroma-coordinator \ - && make +ADD ./go/coordinator ./ +ENV GOCACHE=/root/.cache/go-build +RUN --mount=type=cache,target="/root/.cache/go-build" make FROM alpine:3.17.3 @@ -23,9 +26,8 @@ RUN apk add \ RUN mkdir /chroma-coordinator WORKDIR /chroma-coordinator -COPY --from=build /src/chroma-coordinator/bin/chroma /chroma-coordinator/bin/chroma +COPY --from=build /src/chroma-coordinator/bin/coordinator /chroma-coordinator/bin/coordinator +COPY --from=build /src/chroma-coordinator/bin/logservice /chroma-coordinator/bin/logservice ENV PATH=$PATH:/chroma-coordinator/bin -COPY --from=build /src/chroma-coordinator/migrations /chroma-coordinator/migrations - CMD /bin/bash diff --git a/go/coordinator/Dockerfile.migration b/go/coordinator/Dockerfile.migration new file mode 100644 index 000000000000..092f2629540f --- /dev/null +++ b/go/coordinator/Dockerfile.migration @@ -0,0 +1,4 @@ +FROM arigaio/atlas:latest +workdir /app +COPY ./go/coordinator/migrations migrations +COPY ./go/coordinator/atlas.hcl atlas.hcl diff --git a/go/coordinator/Makefile b/go/coordinator/Makefile index 8fb52e4bb748..f1a440e4744c 100644 --- a/go/coordinator/Makefile +++ b/go/coordinator/Makefile @@ -1,6 +1,7 @@ .PHONY: build build: - go build -v -o bin/chroma ./cmd + go build -v -o bin/coordinator ./cmd/coordinator/ + go build -v -o bin/logservice ./cmd/logservice/ test: build go test -cover -race ./... diff --git a/go/coordinator/atlas.hcl b/go/coordinator/atlas.hcl index 2883c58d65e8..f2c17f57c191 100644 --- a/go/coordinator/atlas.hcl +++ b/go/coordinator/atlas.hcl @@ -10,9 +10,9 @@ data "external_schema" "gorm" { ] } -env "gorm" { +env "dev" { src = data.external_schema.gorm.url - dev = "postgres://localhost:5432/dev?sslmode=disable" + dev = "postgres://localhost:5432/chroma?sslmode=disable" migration { dir = "file://migrations" } diff --git a/go/coordinator/cmd/grpccoordinator/cmd.go b/go/coordinator/cmd/coordinator/cmd.go similarity index 64% rename from go/coordinator/cmd/grpccoordinator/cmd.go rename to go/coordinator/cmd/coordinator/cmd.go index 8859790b56c8..a1dadfc5cdca 100644 --- a/go/coordinator/cmd/grpccoordinator/cmd.go +++ b/go/coordinator/cmd/coordinator/cmd.go @@ -1,18 +1,18 @@ -package grpccoordinator +package main import ( + "github.com/chroma/chroma-coordinator/internal/coordinator/grpc" + "github.com/chroma/chroma-coordinator/internal/grpcutils" "io" "time" "github.com/chroma/chroma-coordinator/cmd/flag" - "github.com/chroma/chroma-coordinator/internal/grpccoordinator" - "github.com/chroma/chroma-coordinator/internal/grpccoordinator/grpcutils" "github.com/chroma/chroma-coordinator/internal/utils" "github.com/spf13/cobra" ) var ( - conf = grpccoordinator.Config{ + conf = grpc.Config{ GrpcConfig: &grpcutils.GrpcConfig{}, } @@ -30,14 +30,15 @@ func init() { flag.GRPCAddr(Cmd, &conf.GrpcConfig.BindAddress) // System Catalog - Cmd.Flags().StringVar(&conf.SystemCatalogProvider, "system-catalog-provider", "memory", "System catalog provider") - Cmd.Flags().StringVar(&conf.Username, "username", "root", "MetaTable username") - Cmd.Flags().StringVar(&conf.Password, "password", "", "MetaTable password") - Cmd.Flags().StringVar(&conf.Address, "db-address", "127.0.0.1", "MetaTable db address") - Cmd.Flags().IntVar(&conf.Port, "db-port", 5432, "MetaTable db port") - Cmd.Flags().StringVar(&conf.DBName, "db-name", "", "MetaTable db name") - Cmd.Flags().IntVar(&conf.MaxIdleConns, "max-idle-conns", 10, "MetaTable max idle connections") - Cmd.Flags().IntVar(&conf.MaxOpenConns, "max-open-conns", 10, "MetaTable max open connections") + Cmd.Flags().StringVar(&conf.SystemCatalogProvider, "system-catalog-provider", "database", "System catalog provider") + Cmd.Flags().StringVar(&conf.DBConfig.Username, "username", "chroma", "MetaTable username") + Cmd.Flags().StringVar(&conf.DBConfig.Password, "password", "chroma", "MetaTable password") + Cmd.Flags().StringVar(&conf.DBConfig.Address, "db-address", "postgres", "MetaTable db address") + Cmd.Flags().IntVar(&conf.DBConfig.Port, "db-port", 5432, "MetaTable db port") + Cmd.Flags().StringVar(&conf.DBConfig.DBName, "db-name", "chroma", "MetaTable db name") + Cmd.Flags().IntVar(&conf.DBConfig.MaxIdleConns, "max-idle-conns", 10, "MetaTable max idle connections") + Cmd.Flags().IntVar(&conf.DBConfig.MaxOpenConns, "max-open-conns", 10, "MetaTable max open connections") + Cmd.Flags().StringVar(&conf.DBConfig.SslMode, "ssl-mode", "disable", "SSL mode for database connection") // Pulsar Cmd.Flags().StringVar(&conf.PulsarAdminURL, "pulsar-admin-url", "http://localhost:8080", "Pulsar admin url") @@ -59,6 +60,6 @@ func init() { func exec(*cobra.Command, []string) { utils.RunProcess(func() (io.Closer, error) { - return grpccoordinator.New(conf) + return grpc.New(conf) }) } diff --git a/go/coordinator/cmd/main.go b/go/coordinator/cmd/coordinator/main.go similarity index 85% rename from go/coordinator/cmd/main.go rename to go/coordinator/cmd/coordinator/main.go index 0b7cfa7b54d7..bfa31c8c9be9 100644 --- a/go/coordinator/cmd/main.go +++ b/go/coordinator/cmd/coordinator/main.go @@ -4,7 +4,6 @@ import ( "fmt" "os" - "github.com/chroma/chroma-coordinator/cmd/grpccoordinator" "github.com/chroma/chroma-coordinator/internal/utils" "github.com/rs/zerolog" "github.com/spf13/cobra" @@ -20,7 +19,7 @@ var ( ) func init() { - rootCmd.AddCommand(grpccoordinator.Cmd) + rootCmd.AddCommand(Cmd) } func main() { diff --git a/go/coordinator/cmd/logservice/cmd.go b/go/coordinator/cmd/logservice/cmd.go new file mode 100644 index 000000000000..721067bb3b2e --- /dev/null +++ b/go/coordinator/cmd/logservice/cmd.go @@ -0,0 +1,46 @@ +package main + +import ( + "github.com/chroma/chroma-coordinator/cmd/flag" + "github.com/chroma/chroma-coordinator/internal/grpcutils" + "github.com/chroma/chroma-coordinator/internal/logservice/grpc" + "github.com/chroma/chroma-coordinator/internal/utils" + "github.com/spf13/cobra" + "io" +) + +var ( + conf = grpc.Config{ + GrpcConfig: &grpcutils.GrpcConfig{}, + } + + Cmd = &cobra.Command{ + Use: "logservice", + Short: "Start a logservice service", + Long: `RecordLog root command`, + Run: exec, + } +) + +func init() { + // GRPC + flag.GRPCAddr(Cmd, &conf.GrpcConfig.BindAddress) + Cmd.Flags().BoolVar(&conf.StartGrpc, "start-grpc", true, "start grpc server or not") + + // DB provider + Cmd.Flags().StringVar(&conf.DBProvider, "db-provider", "postgres", "DB provider") + + // DB dev + Cmd.Flags().StringVar(&conf.DBConfig.Address, "db-host", "postgres", "DB host") + Cmd.Flags().IntVar(&conf.DBConfig.Port, "db-port", 5432, "DB port") + Cmd.Flags().StringVar(&conf.DBConfig.Username, "db-user", "chroma", "DB user") + Cmd.Flags().StringVar(&conf.DBConfig.Password, "db-password", "chroma", "DB password") + Cmd.Flags().StringVar(&conf.DBConfig.DBName, "db-name", "chroma", "DB name") + Cmd.Flags().StringVar(&conf.DBConfig.SslMode, "ssl-mode", "disable", "SSL mode for database connection") +} + +func exec(*cobra.Command, []string) { + utils.RunProcess(func() (io.Closer, error) { + return grpc.New(conf) + }) +} diff --git a/go/coordinator/cmd/logservice/main.go b/go/coordinator/cmd/logservice/main.go new file mode 100644 index 000000000000..d88c70ec61e9 --- /dev/null +++ b/go/coordinator/cmd/logservice/main.go @@ -0,0 +1,36 @@ +package main + +import ( + "fmt" + "os" + + "github.com/chroma/chroma-coordinator/internal/utils" + "github.com/rs/zerolog" + "github.com/spf13/cobra" + "go.uber.org/automaxprocs/maxprocs" +) + +var ( + rootCmd = &cobra.Command{ + Use: "logservice", + Short: "RecordLog root command", + Long: `RecordLog root command`, + } +) + +func init() { + rootCmd.AddCommand(Cmd) +} + +func main() { + utils.LogLevel = zerolog.DebugLevel + utils.ConfigureLogger() + if _, err := maxprocs.Set(); err != nil { + _, _ = fmt.Fprintln(os.Stderr, err) + os.Exit(1) + } + if err := rootCmd.Execute(); err != nil { + _, _ = fmt.Fprintln(os.Stderr, err) + os.Exit(1) + } +} diff --git a/go/coordinator/go.mod b/go/coordinator/go.mod index 93b04935f57f..8c9317b439ea 100644 --- a/go/coordinator/go.mod +++ b/go/coordinator/go.mod @@ -6,6 +6,7 @@ require ( ariga.io/atlas-provider-gorm v0.1.1 github.com/apache/pulsar-client-go v0.9.1-0.20231030094548-620ecf4addfb github.com/google/uuid v1.3.1 + github.com/lib/pq v1.10.7 github.com/pingcap/log v1.1.0 github.com/rs/zerolog v1.31.0 github.com/spf13/cobra v1.7.0 diff --git a/go/coordinator/go.sum b/go/coordinator/go.sum index 15390626451c..adb6bb095083 100644 --- a/go/coordinator/go.sum +++ b/go/coordinator/go.sum @@ -154,6 +154,8 @@ github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/lib/pq v1.10.7 h1:p7ZhMD+KsSRozJr34udlUrhboJwWAgCg34+/ZZNvZZw= +github.com/lib/pq v1.10.7/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o= github.com/linkedin/goavro/v2 v2.9.8 h1:jN50elxBsGBDGVDEKqUlDuU1cFwJ11K/yrJCBMe/7Wg= github.com/linkedin/goavro/v2 v2.9.8/go.mod h1:UgQUb2N/pmueQYH9bfqFioWxzYCZXSfF8Jw03O5sjqA= github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= diff --git a/go/coordinator/internal/common/errors.go b/go/coordinator/internal/common/errors.go index 0275e2b6574b..5ba4284410f9 100644 --- a/go/coordinator/internal/common/errors.go +++ b/go/coordinator/internal/common/errors.go @@ -31,6 +31,7 @@ var ( ErrInvalidCollectionUpdate = errors.New("invalid collection update, reset collection true and collection value not empty") ErrSegmentUniqueConstraintViolation = errors.New("unique constraint violation") ErrSegmentDeleteNonExistingSegment = errors.New("delete non existing segment") + ErrSegmentUpdateNonExistingSegment = errors.New("update non existing segment") // Segment metadata errors ErrUnknownSegmentMetadataType = errors.New("segment metadata value type not supported") diff --git a/go/coordinator/internal/coordinator/apis_test.go b/go/coordinator/internal/coordinator/apis_test.go index 62ff01ecec05..3f780c258c32 100644 --- a/go/coordinator/internal/coordinator/apis_test.go +++ b/go/coordinator/internal/coordinator/apis_test.go @@ -872,11 +872,13 @@ func TestUpdateSegment(t *testing.T) { }) // Update topic to new value + collectionID := segment.CollectionID.String() newTopic := "new_topic" segment.Topic = &newTopic c.UpdateSegment(ctx, &model.UpdateSegment{ - ID: segment.ID, - Topic: segment.Topic, + Collection: &collectionID, + ID: segment.ID, + Topic: segment.Topic, }) result, err := c.GetSegments(ctx, segment.ID, nil, nil, nil, types.NilUniqueID()) assert.NoError(t, err) @@ -885,6 +887,7 @@ func TestUpdateSegment(t *testing.T) { // Update topic to None segment.Topic = nil c.UpdateSegment(ctx, &model.UpdateSegment{ + Collection: &collectionID, ID: segment.ID, Topic: segment.Topic, ResetTopic: true, @@ -893,33 +896,35 @@ func TestUpdateSegment(t *testing.T) { assert.NoError(t, err) assert.Equal(t, []*model.Segment{segment}, result) + // TODO: revisit why we need this // Update collection to new value - segment.CollectionID = sampleCollections[1].ID - newCollecionID := segment.CollectionID.String() - c.UpdateSegment(ctx, &model.UpdateSegment{ - ID: segment.ID, - Collection: &newCollecionID, - }) - result, err = c.GetSegments(ctx, segment.ID, nil, nil, nil, types.NilUniqueID()) - assert.NoError(t, err) - assert.Equal(t, []*model.Segment{segment}, result) + //segment.CollectionID = sampleCollections[1].ID + //newCollecionID := segment.CollectionID.String() + //c.UpdateSegment(ctx, &model.UpdateSegment{ + // ID: segment.ID, + // Collection: &newCollecionID, + //}) + //result, err = c.GetSegments(ctx, segment.ID, nil, nil, nil, types.NilUniqueID()) + //assert.NoError(t, err) + //assert.Equal(t, []*model.Segment{segment}, result) // Update collection to None - segment.CollectionID = types.NilUniqueID() - c.UpdateSegment(ctx, &model.UpdateSegment{ - ID: segment.ID, - Collection: nil, - ResetCollection: true, - }) - result, err = c.GetSegments(ctx, segment.ID, nil, nil, nil, types.NilUniqueID()) - assert.NoError(t, err) - assert.Equal(t, []*model.Segment{segment}, result) + //segment.CollectionID = types.NilUniqueID() + //c.UpdateSegment(ctx, &model.UpdateSegment{ + // ID: segment.ID, + // Collection: nil, + // ResetCollection: true, + //}) + //result, err = c.GetSegments(ctx, segment.ID, nil, nil, nil, types.NilUniqueID()) + //assert.NoError(t, err) + //assert.Equal(t, []*model.Segment{segment}, result) // Add a new metadata key segment.Metadata.Set("test_str2", &model.SegmentMetadataValueStringType{Value: "str2"}) c.UpdateSegment(ctx, &model.UpdateSegment{ - ID: segment.ID, - Metadata: segment.Metadata}) + Collection: &collectionID, + ID: segment.ID, + Metadata: segment.Metadata}) result, err = c.GetSegments(ctx, segment.ID, nil, nil, nil, types.NilUniqueID()) assert.NoError(t, err) assert.Equal(t, []*model.Segment{segment}, result) @@ -927,8 +932,9 @@ func TestUpdateSegment(t *testing.T) { // Update a metadata key segment.Metadata.Set("test_str", &model.SegmentMetadataValueStringType{Value: "str3"}) c.UpdateSegment(ctx, &model.UpdateSegment{ - ID: segment.ID, - Metadata: segment.Metadata}) + Collection: &collectionID, + ID: segment.ID, + Metadata: segment.Metadata}) result, err = c.GetSegments(ctx, segment.ID, nil, nil, nil, types.NilUniqueID()) assert.NoError(t, err) assert.Equal(t, []*model.Segment{segment}, result) @@ -938,8 +944,9 @@ func TestUpdateSegment(t *testing.T) { newMetadata := model.NewSegmentMetadata[model.SegmentMetadataValueType]() newMetadata.Set("test_str", nil) c.UpdateSegment(ctx, &model.UpdateSegment{ - ID: segment.ID, - Metadata: newMetadata}) + Collection: &collectionID, + ID: segment.ID, + Metadata: newMetadata}) result, err = c.GetSegments(ctx, segment.ID, nil, nil, nil, types.NilUniqueID()) assert.NoError(t, err) assert.Equal(t, []*model.Segment{segment}, result) @@ -947,6 +954,7 @@ func TestUpdateSegment(t *testing.T) { // Delete all metadata keys segment.Metadata = nil c.UpdateSegment(ctx, &model.UpdateSegment{ + Collection: &collectionID, ID: segment.ID, Metadata: segment.Metadata, ResetMetadata: true}, diff --git a/go/coordinator/internal/grpccoordinator/collection_service.go b/go/coordinator/internal/coordinator/grpc/collection_service.go similarity index 99% rename from go/coordinator/internal/grpccoordinator/collection_service.go rename to go/coordinator/internal/coordinator/grpc/collection_service.go index faaf6b4dbf9e..9276f1401072 100644 --- a/go/coordinator/internal/grpccoordinator/collection_service.go +++ b/go/coordinator/internal/coordinator/grpc/collection_service.go @@ -1,4 +1,4 @@ -package grpccoordinator +package grpc import ( "context" diff --git a/go/coordinator/internal/grpccoordinator/collection_service_test.go b/go/coordinator/internal/coordinator/grpc/collection_service_test.go similarity index 97% rename from go/coordinator/internal/grpccoordinator/collection_service_test.go rename to go/coordinator/internal/coordinator/grpc/collection_service_test.go index 390b08f76075..c4f02a0682c2 100644 --- a/go/coordinator/internal/grpccoordinator/collection_service_test.go +++ b/go/coordinator/internal/coordinator/grpc/collection_service_test.go @@ -1,11 +1,11 @@ -package grpccoordinator +package grpc import ( "context" + "github.com/chroma/chroma-coordinator/internal/grpcutils" "testing" "github.com/chroma/chroma-coordinator/internal/common" - "github.com/chroma/chroma-coordinator/internal/grpccoordinator/grpcutils" "github.com/chroma/chroma-coordinator/internal/metastore/db/dbcore" "github.com/chroma/chroma-coordinator/internal/proto/coordinatorpb" "pgregory.net/rapid" diff --git a/go/coordinator/internal/grpccoordinator/proto_model_convert.go b/go/coordinator/internal/coordinator/grpc/proto_model_convert.go similarity index 99% rename from go/coordinator/internal/grpccoordinator/proto_model_convert.go rename to go/coordinator/internal/coordinator/grpc/proto_model_convert.go index 18c4fd307ab2..9b47f1f33ce0 100644 --- a/go/coordinator/internal/grpccoordinator/proto_model_convert.go +++ b/go/coordinator/internal/coordinator/grpc/proto_model_convert.go @@ -1,4 +1,4 @@ -package grpccoordinator +package grpc import ( "github.com/chroma/chroma-coordinator/internal/common" diff --git a/go/coordinator/internal/grpccoordinator/proto_model_convert_test.go b/go/coordinator/internal/coordinator/grpc/proto_model_convert_test.go similarity index 99% rename from go/coordinator/internal/grpccoordinator/proto_model_convert_test.go rename to go/coordinator/internal/coordinator/grpc/proto_model_convert_test.go index 9cfa2f0632fe..2586151d3c71 100644 --- a/go/coordinator/internal/grpccoordinator/proto_model_convert_test.go +++ b/go/coordinator/internal/coordinator/grpc/proto_model_convert_test.go @@ -1,4 +1,4 @@ -package grpccoordinator +package grpc import ( "testing" diff --git a/go/coordinator/internal/grpccoordinator/segment_service.go b/go/coordinator/internal/coordinator/grpc/segment_service.go similarity index 99% rename from go/coordinator/internal/grpccoordinator/segment_service.go rename to go/coordinator/internal/coordinator/grpc/segment_service.go index b2d3be5e4ff2..6e63e384ef15 100644 --- a/go/coordinator/internal/grpccoordinator/segment_service.go +++ b/go/coordinator/internal/coordinator/grpc/segment_service.go @@ -1,4 +1,4 @@ -package grpccoordinator +package grpc import ( "context" diff --git a/go/coordinator/internal/grpccoordinator/server.go b/go/coordinator/internal/coordinator/grpc/server.go similarity index 90% rename from go/coordinator/internal/grpccoordinator/server.go rename to go/coordinator/internal/coordinator/grpc/server.go index 4205a47153b6..578298719a7c 100644 --- a/go/coordinator/internal/grpccoordinator/server.go +++ b/go/coordinator/internal/coordinator/grpc/server.go @@ -1,13 +1,13 @@ -package grpccoordinator +package grpc import ( "context" "errors" + "github.com/chroma/chroma-coordinator/internal/grpcutils" "time" "github.com/apache/pulsar-client-go/pulsar" "github.com/chroma/chroma-coordinator/internal/coordinator" - "github.com/chroma/chroma-coordinator/internal/grpccoordinator/grpcutils" "github.com/chroma/chroma-coordinator/internal/memberlist_manager" "github.com/chroma/chroma-coordinator/internal/metastore/db/dao" "github.com/chroma/chroma-coordinator/internal/metastore/db/dbcore" @@ -29,13 +29,7 @@ type Config struct { SystemCatalogProvider string // MetaTable config - Username string - Password string - Address string - Port int - DBName string - MaxIdleConns int - MaxOpenConns int + DBConfig dbcore.DBConfig // Notification config NotificationStoreProvider string @@ -77,16 +71,8 @@ func New(config Config) (*Server, error) { if config.SystemCatalogProvider == "memory" { return NewWithGrpcProvider(config, grpcutils.Default, nil) } else if config.SystemCatalogProvider == "database" { - dBConfig := dbcore.DBConfig{ - Username: config.Username, - Password: config.Password, - Address: config.Address, - Port: config.Port, - DBName: config.DBName, - MaxIdleConns: config.MaxIdleConns, - MaxOpenConns: config.MaxOpenConns, - } - db, err := dbcore.Connect(dBConfig) + dBConfig := config.DBConfig + db, err := dbcore.ConnectPostgres(dBConfig) if err != nil { return nil, err } @@ -175,7 +161,7 @@ func NewWithGrpcProvider(config Config, provider grpcutils.GrpcProvider, db *gor return nil, err } - s.grpcServer, err = provider.StartGrpcServer("coordinator", config.GrpcConfig, func(registrar grpc.ServiceRegistrar) { + s.grpcServer, err = provider.StartGrpcServer("coordinator", config.GrpcConfig, func(registrar grpc.ServiceRegistrar) { coordinatorpb.RegisterSysDBServer(registrar, s) }) if err != nil { diff --git a/go/coordinator/internal/grpccoordinator/tenant_database_service.go b/go/coordinator/internal/coordinator/grpc/tenant_database_service.go similarity index 99% rename from go/coordinator/internal/grpccoordinator/tenant_database_service.go rename to go/coordinator/internal/coordinator/grpc/tenant_database_service.go index eb36b3de949a..5ec1045c5ec7 100644 --- a/go/coordinator/internal/grpccoordinator/tenant_database_service.go +++ b/go/coordinator/internal/coordinator/grpc/tenant_database_service.go @@ -1,4 +1,4 @@ -package grpccoordinator +package grpc import ( "context" diff --git a/go/coordinator/internal/coordinator/meta.go b/go/coordinator/internal/coordinator/meta.go index f6f2df7584e4..720eb877388a 100644 --- a/go/coordinator/internal/coordinator/meta.go +++ b/go/coordinator/internal/coordinator/meta.go @@ -2,6 +2,8 @@ package coordinator import ( "context" + "errors" + "github.com/jackc/pgx/v5/pgconn" "sync" "github.com/chroma/chroma-coordinator/internal/common" @@ -222,6 +224,18 @@ func (mt *MetaTable) AddCollection(ctx context.Context, createCollection *model. collection, err := mt.catalog.CreateCollection(ctx, createCollection, createCollection.Ts) if err != nil { log.Error("create collection failed", zap.Error(err)) + var pgErr *pgconn.PgError + ok := errors.As(err, &pgErr) + if ok { + log.Error("Postgres Error") + switch pgErr.Code { + case "23505": + log.Error("collection id already exists") + return nil, common.ErrCollectionUniqueConstraintViolation + default: + return nil, err + } + } return nil, err } mt.tenantDatabaseCollectionCache[tenantID][databaseName][collection.ID] = collection @@ -361,6 +375,19 @@ func (mt *MetaTable) AddSegment(ctx context.Context, createSegment *model.Create segment, err := mt.catalog.CreateSegment(ctx, createSegment, createSegment.Ts) if err != nil { + log.Error("create segment failed", zap.Error(err)) + var pgErr *pgconn.PgError + ok := errors.As(err, &pgErr) + if ok { + log.Error("Postgres Error") + switch pgErr.Code { + case "23505": + log.Error("segment id already exists") + return common.ErrSegmentUniqueConstraintViolation + default: + return err + } + } return err } mt.segmentsCache[createSegment.ID] = segment diff --git a/go/coordinator/internal/grpccoordinator/grpcutils/config.go b/go/coordinator/internal/grpcutils/config.go similarity index 100% rename from go/coordinator/internal/grpccoordinator/grpcutils/config.go rename to go/coordinator/internal/grpcutils/config.go diff --git a/go/coordinator/internal/grpccoordinator/grpcutils/config_test.go b/go/coordinator/internal/grpcutils/config_test.go similarity index 100% rename from go/coordinator/internal/grpccoordinator/grpcutils/config_test.go rename to go/coordinator/internal/grpcutils/config_test.go diff --git a/go/coordinator/internal/grpccoordinator/grpcutils/service.go b/go/coordinator/internal/grpcutils/service.go similarity index 100% rename from go/coordinator/internal/grpccoordinator/grpcutils/service.go rename to go/coordinator/internal/grpcutils/service.go diff --git a/go/coordinator/internal/logservice/apis.go b/go/coordinator/internal/logservice/apis.go new file mode 100644 index 000000000000..2eba78b20f68 --- /dev/null +++ b/go/coordinator/internal/logservice/apis.go @@ -0,0 +1,11 @@ +package logservice + +import ( + "github.com/chroma/chroma-coordinator/internal/common" +) + +type ( + IRecordLog interface { + common.Component + } +) diff --git a/go/coordinator/internal/logservice/grpc/server.go b/go/coordinator/internal/logservice/grpc/server.go new file mode 100644 index 000000000000..e3fb1980f78b --- /dev/null +++ b/go/coordinator/internal/logservice/grpc/server.go @@ -0,0 +1,104 @@ +package grpc + +import ( + "context" + "errors" + "github.com/chroma/chroma-coordinator/internal/grpcutils" + "github.com/chroma/chroma-coordinator/internal/logservice" + "github.com/chroma/chroma-coordinator/internal/metastore/db/dbcore" + "github.com/chroma/chroma-coordinator/internal/proto/logservicepb" + "github.com/pingcap/log" + "go.uber.org/zap" + "google.golang.org/grpc" + "google.golang.org/grpc/health" +) + +type Config struct { + // GrpcConfig config + GrpcConfig *grpcutils.GrpcConfig + + // System catalog provider + DBProvider string + + // Postgres config + DBConfig dbcore.DBConfig + + // whether to start grpc service + StartGrpc bool +} + +type Server struct { + logservicepb.UnimplementedLogServiceServer + logService logservice.IRecordLog + grpcServer grpcutils.GrpcServer + healthServer *health.Server +} + +func New(config Config) (*Server, error) { + log.Info("New Log Service...") + + if config.DBProvider == "postgres" { + dBConfig := config.DBConfig + _, err := dbcore.ConnectPostgres(dBConfig) + if err != nil { + log.Error("Error connecting to Postgres DB.", zap.Error(err)) + panic(err) + } + } else { + log.Error("invalid DB provider, only postgres is supported") + return nil, errors.New("invalid DB provider, only postgres is supported") + } + + s := startLogService() + if config.StartGrpc { + s.grpcServer = startGrpcService(s, config.GrpcConfig) + } + + log.Info("New Log Service Completed.") + return s, nil +} + +func startLogService() *Server { + log.Info("Staring Log Service...") + ctx := context.Background() + s := &Server{ + healthServer: health.NewServer(), + } + + logService, err := logservice.NewLogService(ctx) + if err != nil { + log.Error("Error creating Log Service.", zap.Error(err)) + panic(err) + } + s.logService = logService + err = s.logService.Start() + if err != nil { + log.Error("Error starting Log Service.", zap.Error(err)) + panic(err) + } + log.Info("Log Service Started.") + return s +} + +func startGrpcService(s *Server, grpcConfig *grpcutils.GrpcConfig) grpcutils.GrpcServer { + log.Info("Staring Grpc Service...") + server, err := grpcutils.Default.StartGrpcServer("logservice", grpcConfig, func(registrar grpc.ServiceRegistrar) { + logservicepb.RegisterLogServiceServer(registrar, s) + }) + if err != nil { + log.Error("Error starting grpc Service.", zap.Error(err)) + panic(err) + } + return server +} + +func (s *Server) Close() error { + s.healthServer.Shutdown() + err := s.logService.Stop() + if err != nil { + log.Error("Failed to stop log service", zap.Error(err)) + return err + } + log.Info("Server closed") + return nil +} diff --git a/go/coordinator/internal/logservice/recordlog.go b/go/coordinator/internal/logservice/recordlog.go new file mode 100644 index 000000000000..78729128de6b --- /dev/null +++ b/go/coordinator/internal/logservice/recordlog.go @@ -0,0 +1,33 @@ +package logservice + +import ( + "context" + "github.com/chroma/chroma-coordinator/internal/metastore/db/dao" + "github.com/chroma/chroma-coordinator/internal/metastore/db/dbmodel" + "github.com/pingcap/log" +) + +var _ IRecordLog = (*RecordLog)(nil) + +type RecordLog struct { + ctx context.Context + recordLogDb dbmodel.IRecordLogDb +} + +func NewLogService(ctx context.Context) (*RecordLog, error) { + s := &RecordLog{ + ctx: ctx, + recordLogDb: dao.NewMetaDomain().RecordLogDb(ctx), + } + return s, nil +} + +func (s *RecordLog) Start() error { + log.Info("RecordLog start") + return nil +} + +func (s *RecordLog) Stop() error { + log.Info("RecordLog stop") + return nil +} diff --git a/go/coordinator/internal/metastore/coordinator/table_catalog.go b/go/coordinator/internal/metastore/coordinator/table_catalog.go index 4bd0d7f1244f..f8ae8a84e287 100644 --- a/go/coordinator/internal/metastore/coordinator/table_catalog.go +++ b/go/coordinator/internal/metastore/coordinator/table_catalog.go @@ -2,7 +2,6 @@ package coordinator import ( "context" - "github.com/chroma/chroma-coordinator/internal/common" "github.com/chroma/chroma-coordinator/internal/metastore" "github.com/chroma/chroma-coordinator/internal/metastore/db/dbmodel" @@ -222,7 +221,7 @@ func (tc *Catalog) CreateCollection(ctx context.Context, createCollection *model } collectionName := createCollection.Name - existing, err := tc.metaDomain.CollectionDb(txCtx).GetCollections(types.FromUniqueID(createCollection.ID), &collectionName, nil, tenantID, databaseName) + existing, err := tc.metaDomain.CollectionDb(txCtx).GetCollections(nil, &collectionName, nil, tenantID, databaseName) if err != nil { log.Error("error getting collection", zap.Error(err)) return err @@ -492,6 +491,22 @@ func (tc *Catalog) UpdateSegment(ctx context.Context, updateSegment *model.Updat var result *model.Segment err := tc.txImpl.Transaction(ctx, func(txCtx context.Context) error { + // TODO: we should push in collection_id here, add a GET to fix test for now + if updateSegment.Collection == nil { + results, err := tc.metaDomain.SegmentDb(txCtx).GetSegments(updateSegment.ID, nil, nil, nil, types.NilUniqueID()) + if err != nil { + return err + } + if results == nil || len(results) == 0 { + return common.ErrSegmentUpdateNonExistingSegment + } + if results != nil && len(results) > 1 { + // TODO: fix this error + return common.ErrInvalidCollectionUpdate + } + updateSegment.Collection = results[0].Segment.CollectionID + } + // update segment dbSegment := &dbmodel.UpdateSegment{ ID: updateSegment.ID.String(), diff --git a/go/coordinator/internal/metastore/db/dao/common.go b/go/coordinator/internal/metastore/db/dao/common.go index c67cea6c7597..771def6f99fc 100644 --- a/go/coordinator/internal/metastore/db/dao/common.go +++ b/go/coordinator/internal/metastore/db/dao/common.go @@ -40,3 +40,7 @@ func (*metaDomain) SegmentMetadataDb(ctx context.Context) dbmodel.ISegmentMetada func (*metaDomain) NotificationDb(ctx context.Context) dbmodel.INotificationDb { return ¬ificationDb{dbcore.GetDB(ctx)} } + +func (*metaDomain) RecordLogDb(ctx context.Context) dbmodel.IRecordLogDb { + return &recordLogDb{dbcore.GetDB(ctx)} +} diff --git a/go/coordinator/internal/metastore/db/dao/record_log.go b/go/coordinator/internal/metastore/db/dao/record_log.go new file mode 100644 index 000000000000..d1601e503c86 --- /dev/null +++ b/go/coordinator/internal/metastore/db/dao/record_log.go @@ -0,0 +1,9 @@ +package dao + +import ( + "gorm.io/gorm" +) + +type recordLogDb struct { + db *gorm.DB +} diff --git a/go/coordinator/internal/metastore/db/dao/segment.go b/go/coordinator/internal/metastore/db/dao/segment.go index c4c3842e2784..5d57e6f941a6 100644 --- a/go/coordinator/internal/metastore/db/dao/segment.go +++ b/go/coordinator/internal/metastore/db/dao/segment.go @@ -165,20 +165,23 @@ func generateSegmentUpdatesWithoutID(in *dbmodel.UpdateSegment) map[string]inter } } - if in.ResetCollection { - if in.Collection == nil { - ret["collection_id"] = nil - } - } else { - if in.Collection != nil { - ret["collection_id"] = *in.Collection - } - } - log.Info("generate segment updates without id", zap.Any("updates", ret)) + // TODO: check this + //if in.ResetCollection { + // if in.Collection == nil { + // ret["collection_id"] = nil + // } + //} else { + // if in.Collection != nil { + // ret["collection_id"] = *in.Collection + // } + //} + //log.Info("generate segment updates without id", zap.Any("updates", ret)) return ret } func (s *segmentDb) Update(in *dbmodel.UpdateSegment) error { updates := generateSegmentUpdatesWithoutID(in) - return s.db.Model(&dbmodel.Segment{}).Where("id = ?", in.ID).Updates(updates).Error + return s.db.Model(&dbmodel.Segment{}). + Where("collection_id = ?", &in.Collection). + Where("id = ?", in.ID).Updates(updates).Error } diff --git a/go/coordinator/internal/metastore/db/dao/segment_metadata.go b/go/coordinator/internal/metastore/db/dao/segment_metadata.go index 14d4d2ec2d04..97800c78d8d3 100644 --- a/go/coordinator/internal/metastore/db/dao/segment_metadata.go +++ b/go/coordinator/internal/metastore/db/dao/segment_metadata.go @@ -21,7 +21,7 @@ func (s *segmentMetadataDb) DeleteBySegmentID(segmentID string) error { func (s *segmentMetadataDb) DeleteBySegmentIDAndKeys(segmentID string, keys []string) error { return s.db. Where("segment_id = ?", segmentID). - Where("`key` IN ?", keys). + Where("key IN ?", keys). Delete(&dbmodel.SegmentMetadata{}).Error } diff --git a/go/coordinator/internal/metastore/db/dbcore/core.go b/go/coordinator/internal/metastore/db/dbcore/core.go index 95d2885dfc40..ce05a1b4ca1c 100644 --- a/go/coordinator/internal/metastore/db/dbcore/core.go +++ b/go/coordinator/internal/metastore/db/dbcore/core.go @@ -3,7 +3,9 @@ package dbcore import ( "context" "fmt" + "os" "reflect" + "strconv" "github.com/chroma/chroma-coordinator/internal/common" "github.com/chroma/chroma-coordinator/internal/metastore/db/dbmodel" @@ -11,7 +13,6 @@ import ( "github.com/pingcap/log" "go.uber.org/zap" "gorm.io/driver/postgres" - "gorm.io/driver/sqlite" "gorm.io/gorm" "gorm.io/gorm/logger" ) @@ -28,11 +29,13 @@ type DBConfig struct { DBName string MaxIdleConns int MaxOpenConns int + SslMode string } -func Connect(cfg DBConfig) (*gorm.DB, error) { - dsn := fmt.Sprintf("host=%s user=%s password=%s dbname=%s port=%d sslmode=require", - cfg.Address, cfg.Username, cfg.Password, cfg.DBName, cfg.Port) +func ConnectPostgres(cfg DBConfig) (*gorm.DB, error) { + log.Info("ConnectPostgres", zap.String("host", cfg.Address), zap.String("database", cfg.DBName), zap.Int("port", cfg.Port)) + dsn := fmt.Sprintf("host=%s user=%s password=%s dbname=%s port=%d sslmode=%s", + cfg.Address, cfg.Username, cfg.Password, cfg.DBName, cfg.Port, cfg.SslMode) ormLogger := logger.Default ormLogger.LogMode(logger.Info) @@ -61,7 +64,7 @@ func Connect(cfg DBConfig) (*gorm.DB, error) { globalDB = db - log.Info("db connected success", + log.Info("Postgres connected success", zap.String("host", cfg.Address), zap.String("database", cfg.DBName), zap.Error(err)) @@ -114,14 +117,7 @@ func GetDB(ctx context.Context) *gorm.DB { return globalDB.WithContext(ctx) } -func ConfigDatabaseForTesting() *gorm.DB { - db, err := gorm.Open(sqlite.Open(":memory:"), &gorm.Config{ - Logger: logger.Default.LogMode(logger.Info), - }) - if err != nil { - panic("failed to connect database") - } - SetGlobalDB(db) +func CreateTestTables(db *gorm.DB) { // Setup tenant related tables db.Migrator().DropTable(&dbmodel.Tenant{}) db.Migrator().CreateTable(&dbmodel.Tenant{}) @@ -154,5 +150,22 @@ func ConfigDatabaseForTesting() *gorm.DB { // Setup notification related tables db.Migrator().DropTable(&dbmodel.Notification{}) db.Migrator().CreateTable(&dbmodel.Notification{}) +} + +func ConfigDatabaseForTesting() *gorm.DB { + dbAddress := os.Getenv("POSTGRES_HOST") + dbPort, err := strconv.Atoi(os.Getenv("POSTGRES_PORT")) + db, err := ConnectPostgres(DBConfig{ + Username: "chroma", + Password: "chroma", + Address: dbAddress, + Port: dbPort, + DBName: "chroma", + }) + if err != nil { + panic("failed to connect database") + } + SetGlobalDB(db) + CreateTestTables(db) return db } diff --git a/go/coordinator/internal/metastore/db/dbmodel/common.go b/go/coordinator/internal/metastore/db/dbmodel/common.go index d188193ae184..d90b7df55e61 100644 --- a/go/coordinator/internal/metastore/db/dbmodel/common.go +++ b/go/coordinator/internal/metastore/db/dbmodel/common.go @@ -15,6 +15,7 @@ type IMetaDomain interface { SegmentDb(ctx context.Context) ISegmentDb SegmentMetadataDb(ctx context.Context) ISegmentMetadataDb NotificationDb(ctx context.Context) INotificationDb + RecordLogDb(ctx context.Context) IRecordLogDb } //go:generate mockery --name=ITransaction diff --git a/go/coordinator/internal/metastore/db/dbmodel/mocks/IMetaDomain.go b/go/coordinator/internal/metastore/db/dbmodel/mocks/IMetaDomain.go index 0ee94c373e94..50c33f10e6f7 100644 --- a/go/coordinator/internal/metastore/db/dbmodel/mocks/IMetaDomain.go +++ b/go/coordinator/internal/metastore/db/dbmodel/mocks/IMetaDomain.go @@ -126,6 +126,21 @@ func (_m *IMetaDomain) TenantDb(ctx context.Context) dbmodel.ITenantDb { return r0 } +func (_m *IMetaDomain) RecordLogDb(ctx context.Context) dbmodel.IRecordLogDb { + ret := _m.Called(ctx) + + var r0 dbmodel.IRecordLogDb + if rf, ok := ret.Get(0).(func(context.Context) dbmodel.IRecordLogDb); ok { + r0 = rf(ctx) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(dbmodel.IRecordLogDb) + } + } + + return r0 +} + // NewIMetaDomain creates a new instance of IMetaDomain. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. // The first argument is typically a *testing.T value. func NewIMetaDomain(t interface { diff --git a/go/coordinator/internal/metastore/db/dbmodel/record_log.go b/go/coordinator/internal/metastore/db/dbmodel/record_log.go new file mode 100644 index 000000000000..de8aeaa75b77 --- /dev/null +++ b/go/coordinator/internal/metastore/db/dbmodel/record_log.go @@ -0,0 +1,16 @@ +package dbmodel + +type RecordLog struct { + CollectionID *string `gorm:"collection_id;primaryKey;autoIncrement:false"` + ID int64 `gorm:"id;primaryKey;"` // auto_increment id + Timestamp int64 `gorm:"timestamp;"` + Record *[]byte `gorm:"record;type:bytea"` +} + +func (v RecordLog) TableName() string { + return "record_logs" +} + +//go:generate mockery --name=IRecordLogDb +type IRecordLogDb interface { +} diff --git a/go/coordinator/internal/metastore/db/dbmodel/segment.go b/go/coordinator/internal/metastore/db/dbmodel/segment.go index 0967436e11e8..50fe84ec7cc2 100644 --- a/go/coordinator/internal/metastore/db/dbmodel/segment.go +++ b/go/coordinator/internal/metastore/db/dbmodel/segment.go @@ -7,6 +7,11 @@ import ( ) type Segment struct { + /* Making CollectionID the primary key allows fast search when we have CollectionID. + This requires us to push down CollectionID from the caller. We don't think there is + need to modify CollectionID in the near future. Each Segment should always have a + collection as a parent and cannot be modified. */ + CollectionID *string `gorm:"collection_id;primaryKey"` ID string `gorm:"id;primaryKey"` Type string `gorm:"type;type:string;not null"` Scope string `gorm:"scope"` @@ -15,7 +20,6 @@ type Segment struct { IsDeleted bool `gorm:"is_deleted;type:bool;default:false"` CreatedAt time.Time `gorm:"created_at;type:timestamp;not null;default:current_timestamp"` UpdatedAt time.Time `gorm:"updated_at;type:timestamp;not null;default:current_timestamp"` - CollectionID *string `gorm:"collection_id"` } func (s Segment) TableName() string { diff --git a/go/coordinator/internal/proto/coordinatorpb/chroma.pb.go b/go/coordinator/internal/proto/coordinatorpb/chroma.pb.go index 3cec5eefe062..d130dd11af3c 100644 --- a/go/coordinator/internal/proto/coordinatorpb/chroma.pb.go +++ b/go/coordinator/internal/proto/coordinatorpb/chroma.pb.go @@ -1,7 +1,7 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: -// protoc-gen-go v1.31.0 -// protoc v4.23.4 +// protoc-gen-go v1.32.0 +// protoc v3.20.3 // source: chromadb/proto/chroma.proto package coordinatorpb @@ -914,7 +914,7 @@ type VectorQueryResult struct { Id string `protobuf:"bytes,1,opt,name=id,proto3" json:"id,omitempty"` SeqId []byte `protobuf:"bytes,2,opt,name=seq_id,json=seqId,proto3" json:"seq_id,omitempty"` - Distance float64 `protobuf:"fixed64,3,opt,name=distance,proto3" json:"distance,omitempty"` + Distance float32 `protobuf:"fixed32,3,opt,name=distance,proto3" json:"distance,omitempty"` Vector *Vector `protobuf:"bytes,4,opt,name=vector,proto3,oneof" json:"vector,omitempty"` } @@ -964,7 +964,7 @@ func (x *VectorQueryResult) GetSeqId() []byte { return nil } -func (x *VectorQueryResult) GetDistance() float64 { +func (x *VectorQueryResult) GetDistance() float32 { if x != nil { return x.Distance } @@ -1356,7 +1356,7 @@ var file_chromadb_proto_chroma_proto_rawDesc = []byte{ 0x74, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x15, 0x0a, 0x06, 0x73, 0x65, 0x71, 0x5f, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x05, 0x73, 0x65, 0x71, 0x49, 0x64, 0x12, 0x1a, 0x0a, 0x08, 0x64, 0x69, 0x73, 0x74, - 0x61, 0x6e, 0x63, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x01, 0x52, 0x08, 0x64, 0x69, 0x73, 0x74, + 0x61, 0x6e, 0x63, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x02, 0x52, 0x08, 0x64, 0x69, 0x73, 0x74, 0x61, 0x6e, 0x63, 0x65, 0x12, 0x2b, 0x0a, 0x06, 0x76, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x0e, 0x2e, 0x63, 0x68, 0x72, 0x6f, 0x6d, 0x61, 0x2e, 0x56, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x48, 0x00, 0x52, 0x06, 0x76, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x88, 0x01, diff --git a/go/coordinator/internal/proto/coordinatorpb/chroma_grpc.pb.go b/go/coordinator/internal/proto/coordinatorpb/chroma_grpc.pb.go index 09283123121b..b2d9a1781496 100644 --- a/go/coordinator/internal/proto/coordinatorpb/chroma_grpc.pb.go +++ b/go/coordinator/internal/proto/coordinatorpb/chroma_grpc.pb.go @@ -1,7 +1,7 @@ // Code generated by protoc-gen-go-grpc. DO NOT EDIT. // versions: -// - protoc-gen-go-grpc v1.3.0 -// - protoc v4.23.4 +// - protoc-gen-go-grpc v1.2.0 +// - protoc v3.20.3 // source: chromadb/proto/chroma.proto package coordinatorpb @@ -18,11 +18,6 @@ import ( // Requires gRPC-Go v1.32.0 or later. const _ = grpc.SupportPackageIsVersion7 -const ( - VectorReader_GetVectors_FullMethodName = "/chroma.VectorReader/GetVectors" - VectorReader_QueryVectors_FullMethodName = "/chroma.VectorReader/QueryVectors" -) - // VectorReaderClient is the client API for VectorReader service. // // For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream. @@ -41,7 +36,7 @@ func NewVectorReaderClient(cc grpc.ClientConnInterface) VectorReaderClient { func (c *vectorReaderClient) GetVectors(ctx context.Context, in *GetVectorsRequest, opts ...grpc.CallOption) (*GetVectorsResponse, error) { out := new(GetVectorsResponse) - err := c.cc.Invoke(ctx, VectorReader_GetVectors_FullMethodName, in, out, opts...) + err := c.cc.Invoke(ctx, "/chroma.VectorReader/GetVectors", in, out, opts...) if err != nil { return nil, err } @@ -50,7 +45,7 @@ func (c *vectorReaderClient) GetVectors(ctx context.Context, in *GetVectorsReque func (c *vectorReaderClient) QueryVectors(ctx context.Context, in *QueryVectorsRequest, opts ...grpc.CallOption) (*QueryVectorsResponse, error) { out := new(QueryVectorsResponse) - err := c.cc.Invoke(ctx, VectorReader_QueryVectors_FullMethodName, in, out, opts...) + err := c.cc.Invoke(ctx, "/chroma.VectorReader/QueryVectors", in, out, opts...) if err != nil { return nil, err } @@ -99,7 +94,7 @@ func _VectorReader_GetVectors_Handler(srv interface{}, ctx context.Context, dec } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: VectorReader_GetVectors_FullMethodName, + FullMethod: "/chroma.VectorReader/GetVectors", } handler := func(ctx context.Context, req interface{}) (interface{}, error) { return srv.(VectorReaderServer).GetVectors(ctx, req.(*GetVectorsRequest)) @@ -117,7 +112,7 @@ func _VectorReader_QueryVectors_Handler(srv interface{}, ctx context.Context, de } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: VectorReader_QueryVectors_FullMethodName, + FullMethod: "/chroma.VectorReader/QueryVectors", } handler := func(ctx context.Context, req interface{}) (interface{}, error) { return srv.(VectorReaderServer).QueryVectors(ctx, req.(*QueryVectorsRequest)) diff --git a/go/coordinator/internal/proto/coordinatorpb/coordinator.pb.go b/go/coordinator/internal/proto/coordinatorpb/coordinator.pb.go index be93392c3049..1b5347462e2f 100644 --- a/go/coordinator/internal/proto/coordinatorpb/coordinator.pb.go +++ b/go/coordinator/internal/proto/coordinatorpb/coordinator.pb.go @@ -1,7 +1,7 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: -// protoc-gen-go v1.31.0 -// protoc v4.23.4 +// protoc-gen-go v1.32.0 +// protoc v3.20.3 // source: chromadb/proto/coordinator.proto package coordinatorpb diff --git a/go/coordinator/internal/proto/coordinatorpb/coordinator_grpc.pb.go b/go/coordinator/internal/proto/coordinatorpb/coordinator_grpc.pb.go index ed123f9f3a6f..74f79e0711d8 100644 --- a/go/coordinator/internal/proto/coordinatorpb/coordinator_grpc.pb.go +++ b/go/coordinator/internal/proto/coordinatorpb/coordinator_grpc.pb.go @@ -1,7 +1,7 @@ // Code generated by protoc-gen-go-grpc. DO NOT EDIT. // versions: -// - protoc-gen-go-grpc v1.3.0 -// - protoc v4.23.4 +// - protoc-gen-go-grpc v1.2.0 +// - protoc v3.20.3 // source: chromadb/proto/coordinator.proto package coordinatorpb @@ -19,22 +19,6 @@ import ( // Requires gRPC-Go v1.32.0 or later. const _ = grpc.SupportPackageIsVersion7 -const ( - SysDB_CreateDatabase_FullMethodName = "/chroma.SysDB/CreateDatabase" - SysDB_GetDatabase_FullMethodName = "/chroma.SysDB/GetDatabase" - SysDB_CreateTenant_FullMethodName = "/chroma.SysDB/CreateTenant" - SysDB_GetTenant_FullMethodName = "/chroma.SysDB/GetTenant" - SysDB_CreateSegment_FullMethodName = "/chroma.SysDB/CreateSegment" - SysDB_DeleteSegment_FullMethodName = "/chroma.SysDB/DeleteSegment" - SysDB_GetSegments_FullMethodName = "/chroma.SysDB/GetSegments" - SysDB_UpdateSegment_FullMethodName = "/chroma.SysDB/UpdateSegment" - SysDB_CreateCollection_FullMethodName = "/chroma.SysDB/CreateCollection" - SysDB_DeleteCollection_FullMethodName = "/chroma.SysDB/DeleteCollection" - SysDB_GetCollections_FullMethodName = "/chroma.SysDB/GetCollections" - SysDB_UpdateCollection_FullMethodName = "/chroma.SysDB/UpdateCollection" - SysDB_ResetState_FullMethodName = "/chroma.SysDB/ResetState" -) - // SysDBClient is the client API for SysDB service. // // For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream. @@ -64,7 +48,7 @@ func NewSysDBClient(cc grpc.ClientConnInterface) SysDBClient { func (c *sysDBClient) CreateDatabase(ctx context.Context, in *CreateDatabaseRequest, opts ...grpc.CallOption) (*ChromaResponse, error) { out := new(ChromaResponse) - err := c.cc.Invoke(ctx, SysDB_CreateDatabase_FullMethodName, in, out, opts...) + err := c.cc.Invoke(ctx, "/chroma.SysDB/CreateDatabase", in, out, opts...) if err != nil { return nil, err } @@ -73,7 +57,7 @@ func (c *sysDBClient) CreateDatabase(ctx context.Context, in *CreateDatabaseRequ func (c *sysDBClient) GetDatabase(ctx context.Context, in *GetDatabaseRequest, opts ...grpc.CallOption) (*GetDatabaseResponse, error) { out := new(GetDatabaseResponse) - err := c.cc.Invoke(ctx, SysDB_GetDatabase_FullMethodName, in, out, opts...) + err := c.cc.Invoke(ctx, "/chroma.SysDB/GetDatabase", in, out, opts...) if err != nil { return nil, err } @@ -82,7 +66,7 @@ func (c *sysDBClient) GetDatabase(ctx context.Context, in *GetDatabaseRequest, o func (c *sysDBClient) CreateTenant(ctx context.Context, in *CreateTenantRequest, opts ...grpc.CallOption) (*ChromaResponse, error) { out := new(ChromaResponse) - err := c.cc.Invoke(ctx, SysDB_CreateTenant_FullMethodName, in, out, opts...) + err := c.cc.Invoke(ctx, "/chroma.SysDB/CreateTenant", in, out, opts...) if err != nil { return nil, err } @@ -91,7 +75,7 @@ func (c *sysDBClient) CreateTenant(ctx context.Context, in *CreateTenantRequest, func (c *sysDBClient) GetTenant(ctx context.Context, in *GetTenantRequest, opts ...grpc.CallOption) (*GetTenantResponse, error) { out := new(GetTenantResponse) - err := c.cc.Invoke(ctx, SysDB_GetTenant_FullMethodName, in, out, opts...) + err := c.cc.Invoke(ctx, "/chroma.SysDB/GetTenant", in, out, opts...) if err != nil { return nil, err } @@ -100,7 +84,7 @@ func (c *sysDBClient) GetTenant(ctx context.Context, in *GetTenantRequest, opts func (c *sysDBClient) CreateSegment(ctx context.Context, in *CreateSegmentRequest, opts ...grpc.CallOption) (*ChromaResponse, error) { out := new(ChromaResponse) - err := c.cc.Invoke(ctx, SysDB_CreateSegment_FullMethodName, in, out, opts...) + err := c.cc.Invoke(ctx, "/chroma.SysDB/CreateSegment", in, out, opts...) if err != nil { return nil, err } @@ -109,7 +93,7 @@ func (c *sysDBClient) CreateSegment(ctx context.Context, in *CreateSegmentReques func (c *sysDBClient) DeleteSegment(ctx context.Context, in *DeleteSegmentRequest, opts ...grpc.CallOption) (*ChromaResponse, error) { out := new(ChromaResponse) - err := c.cc.Invoke(ctx, SysDB_DeleteSegment_FullMethodName, in, out, opts...) + err := c.cc.Invoke(ctx, "/chroma.SysDB/DeleteSegment", in, out, opts...) if err != nil { return nil, err } @@ -118,7 +102,7 @@ func (c *sysDBClient) DeleteSegment(ctx context.Context, in *DeleteSegmentReques func (c *sysDBClient) GetSegments(ctx context.Context, in *GetSegmentsRequest, opts ...grpc.CallOption) (*GetSegmentsResponse, error) { out := new(GetSegmentsResponse) - err := c.cc.Invoke(ctx, SysDB_GetSegments_FullMethodName, in, out, opts...) + err := c.cc.Invoke(ctx, "/chroma.SysDB/GetSegments", in, out, opts...) if err != nil { return nil, err } @@ -127,7 +111,7 @@ func (c *sysDBClient) GetSegments(ctx context.Context, in *GetSegmentsRequest, o func (c *sysDBClient) UpdateSegment(ctx context.Context, in *UpdateSegmentRequest, opts ...grpc.CallOption) (*ChromaResponse, error) { out := new(ChromaResponse) - err := c.cc.Invoke(ctx, SysDB_UpdateSegment_FullMethodName, in, out, opts...) + err := c.cc.Invoke(ctx, "/chroma.SysDB/UpdateSegment", in, out, opts...) if err != nil { return nil, err } @@ -136,7 +120,7 @@ func (c *sysDBClient) UpdateSegment(ctx context.Context, in *UpdateSegmentReques func (c *sysDBClient) CreateCollection(ctx context.Context, in *CreateCollectionRequest, opts ...grpc.CallOption) (*CreateCollectionResponse, error) { out := new(CreateCollectionResponse) - err := c.cc.Invoke(ctx, SysDB_CreateCollection_FullMethodName, in, out, opts...) + err := c.cc.Invoke(ctx, "/chroma.SysDB/CreateCollection", in, out, opts...) if err != nil { return nil, err } @@ -145,7 +129,7 @@ func (c *sysDBClient) CreateCollection(ctx context.Context, in *CreateCollection func (c *sysDBClient) DeleteCollection(ctx context.Context, in *DeleteCollectionRequest, opts ...grpc.CallOption) (*ChromaResponse, error) { out := new(ChromaResponse) - err := c.cc.Invoke(ctx, SysDB_DeleteCollection_FullMethodName, in, out, opts...) + err := c.cc.Invoke(ctx, "/chroma.SysDB/DeleteCollection", in, out, opts...) if err != nil { return nil, err } @@ -154,7 +138,7 @@ func (c *sysDBClient) DeleteCollection(ctx context.Context, in *DeleteCollection func (c *sysDBClient) GetCollections(ctx context.Context, in *GetCollectionsRequest, opts ...grpc.CallOption) (*GetCollectionsResponse, error) { out := new(GetCollectionsResponse) - err := c.cc.Invoke(ctx, SysDB_GetCollections_FullMethodName, in, out, opts...) + err := c.cc.Invoke(ctx, "/chroma.SysDB/GetCollections", in, out, opts...) if err != nil { return nil, err } @@ -163,7 +147,7 @@ func (c *sysDBClient) GetCollections(ctx context.Context, in *GetCollectionsRequ func (c *sysDBClient) UpdateCollection(ctx context.Context, in *UpdateCollectionRequest, opts ...grpc.CallOption) (*ChromaResponse, error) { out := new(ChromaResponse) - err := c.cc.Invoke(ctx, SysDB_UpdateCollection_FullMethodName, in, out, opts...) + err := c.cc.Invoke(ctx, "/chroma.SysDB/UpdateCollection", in, out, opts...) if err != nil { return nil, err } @@ -172,7 +156,7 @@ func (c *sysDBClient) UpdateCollection(ctx context.Context, in *UpdateCollection func (c *sysDBClient) ResetState(ctx context.Context, in *emptypb.Empty, opts ...grpc.CallOption) (*ChromaResponse, error) { out := new(ChromaResponse) - err := c.cc.Invoke(ctx, SysDB_ResetState_FullMethodName, in, out, opts...) + err := c.cc.Invoke(ctx, "/chroma.SysDB/ResetState", in, out, opts...) if err != nil { return nil, err } @@ -265,7 +249,7 @@ func _SysDB_CreateDatabase_Handler(srv interface{}, ctx context.Context, dec fun } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: SysDB_CreateDatabase_FullMethodName, + FullMethod: "/chroma.SysDB/CreateDatabase", } handler := func(ctx context.Context, req interface{}) (interface{}, error) { return srv.(SysDBServer).CreateDatabase(ctx, req.(*CreateDatabaseRequest)) @@ -283,7 +267,7 @@ func _SysDB_GetDatabase_Handler(srv interface{}, ctx context.Context, dec func(i } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: SysDB_GetDatabase_FullMethodName, + FullMethod: "/chroma.SysDB/GetDatabase", } handler := func(ctx context.Context, req interface{}) (interface{}, error) { return srv.(SysDBServer).GetDatabase(ctx, req.(*GetDatabaseRequest)) @@ -301,7 +285,7 @@ func _SysDB_CreateTenant_Handler(srv interface{}, ctx context.Context, dec func( } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: SysDB_CreateTenant_FullMethodName, + FullMethod: "/chroma.SysDB/CreateTenant", } handler := func(ctx context.Context, req interface{}) (interface{}, error) { return srv.(SysDBServer).CreateTenant(ctx, req.(*CreateTenantRequest)) @@ -319,7 +303,7 @@ func _SysDB_GetTenant_Handler(srv interface{}, ctx context.Context, dec func(int } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: SysDB_GetTenant_FullMethodName, + FullMethod: "/chroma.SysDB/GetTenant", } handler := func(ctx context.Context, req interface{}) (interface{}, error) { return srv.(SysDBServer).GetTenant(ctx, req.(*GetTenantRequest)) @@ -337,7 +321,7 @@ func _SysDB_CreateSegment_Handler(srv interface{}, ctx context.Context, dec func } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: SysDB_CreateSegment_FullMethodName, + FullMethod: "/chroma.SysDB/CreateSegment", } handler := func(ctx context.Context, req interface{}) (interface{}, error) { return srv.(SysDBServer).CreateSegment(ctx, req.(*CreateSegmentRequest)) @@ -355,7 +339,7 @@ func _SysDB_DeleteSegment_Handler(srv interface{}, ctx context.Context, dec func } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: SysDB_DeleteSegment_FullMethodName, + FullMethod: "/chroma.SysDB/DeleteSegment", } handler := func(ctx context.Context, req interface{}) (interface{}, error) { return srv.(SysDBServer).DeleteSegment(ctx, req.(*DeleteSegmentRequest)) @@ -373,7 +357,7 @@ func _SysDB_GetSegments_Handler(srv interface{}, ctx context.Context, dec func(i } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: SysDB_GetSegments_FullMethodName, + FullMethod: "/chroma.SysDB/GetSegments", } handler := func(ctx context.Context, req interface{}) (interface{}, error) { return srv.(SysDBServer).GetSegments(ctx, req.(*GetSegmentsRequest)) @@ -391,7 +375,7 @@ func _SysDB_UpdateSegment_Handler(srv interface{}, ctx context.Context, dec func } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: SysDB_UpdateSegment_FullMethodName, + FullMethod: "/chroma.SysDB/UpdateSegment", } handler := func(ctx context.Context, req interface{}) (interface{}, error) { return srv.(SysDBServer).UpdateSegment(ctx, req.(*UpdateSegmentRequest)) @@ -409,7 +393,7 @@ func _SysDB_CreateCollection_Handler(srv interface{}, ctx context.Context, dec f } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: SysDB_CreateCollection_FullMethodName, + FullMethod: "/chroma.SysDB/CreateCollection", } handler := func(ctx context.Context, req interface{}) (interface{}, error) { return srv.(SysDBServer).CreateCollection(ctx, req.(*CreateCollectionRequest)) @@ -427,7 +411,7 @@ func _SysDB_DeleteCollection_Handler(srv interface{}, ctx context.Context, dec f } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: SysDB_DeleteCollection_FullMethodName, + FullMethod: "/chroma.SysDB/DeleteCollection", } handler := func(ctx context.Context, req interface{}) (interface{}, error) { return srv.(SysDBServer).DeleteCollection(ctx, req.(*DeleteCollectionRequest)) @@ -445,7 +429,7 @@ func _SysDB_GetCollections_Handler(srv interface{}, ctx context.Context, dec fun } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: SysDB_GetCollections_FullMethodName, + FullMethod: "/chroma.SysDB/GetCollections", } handler := func(ctx context.Context, req interface{}) (interface{}, error) { return srv.(SysDBServer).GetCollections(ctx, req.(*GetCollectionsRequest)) @@ -463,7 +447,7 @@ func _SysDB_UpdateCollection_Handler(srv interface{}, ctx context.Context, dec f } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: SysDB_UpdateCollection_FullMethodName, + FullMethod: "/chroma.SysDB/UpdateCollection", } handler := func(ctx context.Context, req interface{}) (interface{}, error) { return srv.(SysDBServer).UpdateCollection(ctx, req.(*UpdateCollectionRequest)) @@ -481,7 +465,7 @@ func _SysDB_ResetState_Handler(srv interface{}, ctx context.Context, dec func(in } info := &grpc.UnaryServerInfo{ Server: srv, - FullMethod: SysDB_ResetState_FullMethodName, + FullMethod: "/chroma.SysDB/ResetState", } handler := func(ctx context.Context, req interface{}) (interface{}, error) { return srv.(SysDBServer).ResetState(ctx, req.(*emptypb.Empty)) diff --git a/go/coordinator/internal/proto/logservicepb/logservice.pb.go b/go/coordinator/internal/proto/logservicepb/logservice.pb.go new file mode 100644 index 000000000000..6eaa51a4349e --- /dev/null +++ b/go/coordinator/internal/proto/logservicepb/logservice.pb.go @@ -0,0 +1,67 @@ +// Code generated by protoc-gen-go. DO NOT EDIT. +// versions: +// protoc-gen-go v1.32.0 +// protoc v3.20.3 +// source: chromadb/proto/logservice.proto + +package logservicepb + +import ( + protoreflect "google.golang.org/protobuf/reflect/protoreflect" + protoimpl "google.golang.org/protobuf/runtime/protoimpl" + reflect "reflect" +) + +const ( + // Verify that this generated code is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) + // Verify that runtime/protoimpl is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) +) + +var File_chromadb_proto_logservice_proto protoreflect.FileDescriptor + +var file_chromadb_proto_logservice_proto_rawDesc = []byte{ + 0x0a, 0x1f, 0x63, 0x68, 0x72, 0x6f, 0x6d, 0x61, 0x64, 0x62, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, + 0x2f, 0x6c, 0x6f, 0x67, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, + 0x6f, 0x12, 0x06, 0x63, 0x68, 0x72, 0x6f, 0x6d, 0x61, 0x32, 0x0c, 0x0a, 0x0a, 0x4c, 0x6f, 0x67, + 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x42, 0x42, 0x5a, 0x40, 0x67, 0x69, 0x74, 0x68, 0x75, + 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x63, 0x68, 0x72, 0x6f, 0x6d, 0x61, 0x2f, 0x63, 0x68, 0x72, + 0x6f, 0x6d, 0x61, 0x2d, 0x63, 0x6f, 0x6f, 0x72, 0x64, 0x69, 0x6e, 0x61, 0x74, 0x6f, 0x72, 0x2f, + 0x69, 0x6e, 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x6c, + 0x6f, 0x67, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x70, 0x62, 0x62, 0x06, 0x70, 0x72, 0x6f, + 0x74, 0x6f, 0x33, +} + +var file_chromadb_proto_logservice_proto_goTypes = []interface{}{} +var file_chromadb_proto_logservice_proto_depIdxs = []int32{ + 0, // [0:0] is the sub-list for method output_type + 0, // [0:0] is the sub-list for method input_type + 0, // [0:0] is the sub-list for extension type_name + 0, // [0:0] is the sub-list for extension extendee + 0, // [0:0] is the sub-list for field type_name +} + +func init() { file_chromadb_proto_logservice_proto_init() } +func file_chromadb_proto_logservice_proto_init() { + if File_chromadb_proto_logservice_proto != nil { + return + } + type x struct{} + out := protoimpl.TypeBuilder{ + File: protoimpl.DescBuilder{ + GoPackagePath: reflect.TypeOf(x{}).PkgPath(), + RawDescriptor: file_chromadb_proto_logservice_proto_rawDesc, + NumEnums: 0, + NumMessages: 0, + NumExtensions: 0, + NumServices: 1, + }, + GoTypes: file_chromadb_proto_logservice_proto_goTypes, + DependencyIndexes: file_chromadb_proto_logservice_proto_depIdxs, + }.Build() + File_chromadb_proto_logservice_proto = out.File + file_chromadb_proto_logservice_proto_rawDesc = nil + file_chromadb_proto_logservice_proto_goTypes = nil + file_chromadb_proto_logservice_proto_depIdxs = nil +} diff --git a/go/coordinator/internal/proto/logservicepb/logservice_grpc.pb.go b/go/coordinator/internal/proto/logservicepb/logservice_grpc.pb.go new file mode 100644 index 000000000000..5a89141fa817 --- /dev/null +++ b/go/coordinator/internal/proto/logservicepb/logservice_grpc.pb.go @@ -0,0 +1,65 @@ +// Code generated by protoc-gen-go-grpc. DO NOT EDIT. +// versions: +// - protoc-gen-go-grpc v1.2.0 +// - protoc v3.20.3 +// source: chromadb/proto/logservice.proto + +package logservicepb + +import ( + grpc "google.golang.org/grpc" +) + +// This is a compile-time assertion to ensure that this generated file +// is compatible with the grpc package it is being compiled against. +// Requires gRPC-Go v1.32.0 or later. +const _ = grpc.SupportPackageIsVersion7 + +// LogServiceClient is the client API for LogService service. +// +// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream. +type LogServiceClient interface { +} + +type logServiceClient struct { + cc grpc.ClientConnInterface +} + +func NewLogServiceClient(cc grpc.ClientConnInterface) LogServiceClient { + return &logServiceClient{cc} +} + +// LogServiceServer is the server API for LogService service. +// All implementations must embed UnimplementedLogServiceServer +// for forward compatibility +type LogServiceServer interface { + mustEmbedUnimplementedLogServiceServer() +} + +// UnimplementedLogServiceServer must be embedded to have forward compatible implementations. +type UnimplementedLogServiceServer struct { +} + +func (UnimplementedLogServiceServer) mustEmbedUnimplementedLogServiceServer() {} + +// UnsafeLogServiceServer may be embedded to opt out of forward compatibility for this service. +// Use of this interface is not recommended, as added methods to LogServiceServer will +// result in compilation errors. +type UnsafeLogServiceServer interface { + mustEmbedUnimplementedLogServiceServer() +} + +func RegisterLogServiceServer(s grpc.ServiceRegistrar, srv LogServiceServer) { + s.RegisterService(&LogService_ServiceDesc, srv) +} + +// LogService_ServiceDesc is the grpc.ServiceDesc for LogService service. +// It's only intended for direct use with grpc.RegisterService, +// and not to be introspected or modified (even as a copy) +var LogService_ServiceDesc = grpc.ServiceDesc{ + ServiceName: "chroma.LogService", + HandlerType: (*LogServiceServer)(nil), + Methods: []grpc.MethodDesc{}, + Streams: []grpc.StreamDesc{}, + Metadata: "chromadb/proto/logservice.proto", +} diff --git a/go/coordinator/migrations/20231129183041.sql b/go/coordinator/migrations/20231129183041.sql deleted file mode 100644 index 2a31ebb48778..000000000000 --- a/go/coordinator/migrations/20231129183041.sql +++ /dev/null @@ -1,8 +0,0 @@ --- Create "notifications" table -CREATE TABLE "public"."notifications" ( - "id" bigserial NOT NULL, - "collection_id" text NULL, - "type" text NULL, - "status" text NULL, - PRIMARY KEY ("id") -); diff --git a/go/coordinator/migrations/20231116210409.sql b/go/coordinator/migrations/20240216211350.sql similarity index 85% rename from go/coordinator/migrations/20231116210409.sql rename to go/coordinator/migrations/20240216211350.sql index bb9c8d8a00c4..2d4b286c681a 100644 --- a/go/coordinator/migrations/20231116210409.sql +++ b/go/coordinator/migrations/20240216211350.sql @@ -38,6 +38,22 @@ CREATE TABLE "public"."databases" ( ); -- Create index "idx_tenantid_name" to table: "databases" CREATE UNIQUE INDEX "idx_tenantid_name" ON "public"."databases" ("name", "tenant_id"); +-- Create "notifications" table +CREATE TABLE "public"."notifications" ( + "id" bigserial NOT NULL, + "collection_id" text NULL, + "type" text NULL, + "status" text NULL, + PRIMARY KEY ("id") +); +-- Create "record_logs" table +CREATE TABLE "public"."record_logs" ( + "collection_id" text NOT NULL, + "id" bigserial NOT NULL, + "timestamp" bigint NULL, + "record" bytea NULL, + PRIMARY KEY ("collection_id", "id") +); -- Create "segment_metadata" table CREATE TABLE "public"."segment_metadata" ( "segment_id" text NOT NULL, @@ -52,6 +68,7 @@ CREATE TABLE "public"."segment_metadata" ( ); -- Create "segments" table CREATE TABLE "public"."segments" ( + "collection_id" text NOT NULL, "id" text NOT NULL, "type" text NOT NULL, "scope" text NULL, @@ -60,8 +77,7 @@ CREATE TABLE "public"."segments" ( "is_deleted" boolean NULL DEFAULT false, "created_at" timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP, "updated_at" timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP, - "collection_id" text NULL, - PRIMARY KEY ("id") + PRIMARY KEY ("collection_id", "id") ); -- Create "tenants" table CREATE TABLE "public"."tenants" ( diff --git a/go/coordinator/migrations/atlas.sum b/go/coordinator/migrations/atlas.sum index d4ee513fa904..6d1a0e5baaa9 100644 --- a/go/coordinator/migrations/atlas.sum +++ b/go/coordinator/migrations/atlas.sum @@ -1,3 +1,2 @@ -h1:j28ectYxexGfQz/LClD7yYVUHAfIcPHlboAJ1Qw0G7I= -20231116210409.sql h1:vwZRvrXrUMOuDykEaheyEzsnNCpmH73x0QEefzUtf8o= -20231129183041.sql h1:FglI5Hjf7kqvjCsSYWkK2IGS2aThQBaVhpg9WekhNEA= +h1:0AmSHt0xnRVJjHv8/LoOph5FzyVC5io1/O1lOY/Ihdo= +20240216211350.sql h1:yoz9m9lOVG1g7JPG0sWW+PXOb5sNg1W7Y5kLqhibGqg= diff --git a/idl/chromadb/proto/logservice.proto b/idl/chromadb/proto/logservice.proto new file mode 100644 index 000000000000..18c32a6a0d46 --- /dev/null +++ b/idl/chromadb/proto/logservice.proto @@ -0,0 +1,8 @@ +syntax = "proto3"; + +package chroma; +option go_package = "github.com/chroma/chroma-coordinator/internal/proto/logservicepb"; + +service LogService { + +} diff --git a/idl/makefile b/idl/makefile index 18cbc1977ba4..183fd24a1985 100644 --- a/idl/makefile +++ b/idl/makefile @@ -17,6 +17,7 @@ proto_go: --go-grpc_opt paths=source_relative \ --plugin protoc-gen-go-grpc="${GOPATH}/bin/protoc-gen-go-grpc" \ chromadb/proto/*.proto + @mv ../go/coordinator/internal/proto/coordinatorpb/chromadb/proto/logservice*.go ../go/coordinator/internal/proto/logservicepb/ @mv ../go/coordinator/internal/proto/coordinatorpb/chromadb/proto/*.go ../go/coordinator/internal/proto/coordinatorpb/ @rm -rf ../go/coordinator/internal/proto/coordinatorpb/chromadb @echo "Done" diff --git a/k8s/deployment/kubernetes.yaml b/k8s/deployment/kubernetes.yaml index b1f9baabdd0b..5b5ec4a7a847 100644 --- a/k8s/deployment/kubernetes.yaml +++ b/k8s/deployment/kubernetes.yaml @@ -77,6 +77,76 @@ spec: --- +apiVersion: v1 +kind: Service +metadata: + name: postgres + namespace: chroma +spec: + ports: + - name: postgres-port + port: 5432 + targetPort: 5432 + selector: + app: postgres + type: ClusterIP + +--- + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: postgres + namespace: chroma +spec: + replicas: 1 + selector: + matchLabels: + app: postgres + template: + metadata: + labels: + app: postgres + spec: + containers: + - name: postgres + image: postgres:14.1-alpine + env: + - name: POSTGRES_DB + value: chroma + - name: POSTGRES_USER + value: chroma + - name: POSTGRES_PASSWORD + value: chroma + ports: + - containerPort: 5432 + +--- + +apiVersion: batch/v1 +kind: Job +metadata: + name: migration + namespace: chroma +spec: + template: + metadata: + labels: + app: migration + spec: + restartPolicy: OnFailure + containers: + - args: + - 'migrate' + - 'apply' + - '--url' + - 'postgres://chroma:chroma@postgres:5432/chroma?sslmode=disable' + image: migration + imagePullPolicy: IfNotPresent + name: migration + +--- + apiVersion: v1 kind: Service metadata: @@ -188,7 +258,7 @@ spec: spec: containers: - command: - - "chroma" + - "coordinator" - "coordinator" - "--pulsar-admin-url=http://pulsar.chroma:8080" - "--pulsar-url=pulsar://pulsar.chroma:6650" @@ -219,3 +289,47 @@ spec: selector: app: coordinator type: ClusterIP + +--- + +apiVersion: v1 +kind: Service +metadata: + name: logservice + namespace: chroma +spec: + ports: + - name: grpc + port: 50051 + targetPort: grpc + selector: + app: logservice + type: ClusterIP + +--- + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: logservice + namespace: chroma +spec: + replicas: 1 + selector: + matchLabels: + app: logservice + template: + metadata: + labels: + app: logservice + spec: + containers: + - command: + - "logservice" + - "logservice" + image: chroma-coordinator + imagePullPolicy: IfNotPresent + name: logservice + ports: + - containerPort: 50051 + name: grpc diff --git a/k8s/dev/coordinator.yaml b/k8s/dev/coordinator.yaml new file mode 100644 index 000000000000..f7f8c122bd45 --- /dev/null +++ b/k8s/dev/coordinator.yaml @@ -0,0 +1,42 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: coordinator + namespace: chroma +spec: + replicas: 1 + selector: + matchLabels: + app: coordinator + template: + metadata: + labels: + app: coordinator + spec: + containers: + - command: + - "coordinator" + - "coordinator" + - "--pulsar-admin-url=http://pulsar.chroma:8080" + - "--pulsar-url=pulsar://pulsar.chroma:6650" + - "--notifier-provider=pulsar" + image: coordinator + imagePullPolicy: IfNotPresent + name: coordinator + ports: + - containerPort: 50051 + name: grpc +--- +apiVersion: v1 +kind: Service +metadata: + name: coordinator + namespace: chroma +spec: + ports: + - name: grpc + port: 50051 + targetPort: grpc + selector: + app: coordinator + type: ClusterIP diff --git a/k8s/dev/logservice.yaml b/k8s/dev/logservice.yaml new file mode 100644 index 000000000000..a4b491116ee9 --- /dev/null +++ b/k8s/dev/logservice.yaml @@ -0,0 +1,39 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: logservice + namespace: chroma +spec: + replicas: 1 + selector: + matchLabels: + app: logservice + template: + metadata: + labels: + app: logservice + spec: + containers: + - command: + - "logservice" + - "logservice" + image: coordinator + imagePullPolicy: IfNotPresent + name: logservice + ports: + - containerPort: 50051 + name: grpc +--- +apiVersion: v1 +kind: Service +metadata: + name: logservice + namespace: chroma +spec: + ports: + - name: grpc + port: 50051 + targetPort: grpc + selector: + app: logservice + type: ClusterIP diff --git a/k8s/dev/migration.yaml b/k8s/dev/migration.yaml new file mode 100644 index 000000000000..df4ac881740e --- /dev/null +++ b/k8s/dev/migration.yaml @@ -0,0 +1,22 @@ +apiVersion: batch/v1 +kind: Job +metadata: + name: migration + namespace: chroma +spec: + template: + metadata: + labels: + app: migration + spec: + restartPolicy: OnFailure + containers: + - args: + - 'migrate' + - 'apply' + - '--url' + - 'postgres://chroma:chroma@postgres:5432/chroma?sslmode=disable' + image: migration + imagePullPolicy: IfNotPresent + name: migration +--- diff --git a/k8s/dev/postgres.yaml b/k8s/dev/postgres.yaml new file mode 100644 index 000000000000..e2b8fad31593 --- /dev/null +++ b/k8s/dev/postgres.yaml @@ -0,0 +1,41 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: postgres + namespace: chroma +spec: + replicas: 1 + selector: + matchLabels: + app: postgres + template: + metadata: + labels: + app: postgres + spec: + containers: + - name: postgres + image: postgres:14.1-alpine + env: + - name: POSTGRES_DB + value: chroma + - name: POSTGRES_USER + value: chroma + - name: POSTGRES_PASSWORD + value: chroma + ports: + - containerPort: 5432 +--- +apiVersion: v1 +kind: Service +metadata: + name: postgres + namespace: chroma +spec: + ports: + - name: postgres-port + port: 5432 + targetPort: 5432 + selector: + app: postgres + type: ClusterIP diff --git a/k8s/dev/pulsar.yaml b/k8s/dev/pulsar.yaml new file mode 100644 index 000000000000..4038ecda2093 --- /dev/null +++ b/k8s/dev/pulsar.yaml @@ -0,0 +1,45 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: pulsar + namespace: chroma +spec: + replicas: 1 + selector: + matchLabels: + app: pulsar + template: + metadata: + labels: + app: pulsar + spec: + containers: + - name: pulsar + image: apachepulsar/pulsar + command: [ "/pulsar/bin/pulsar", "standalone" ] + ports: + - containerPort: 6650 + - containerPort: 8080 + volumeMounts: + - name: pulsardata + mountPath: /pulsar/data + volumes: + - name: pulsardata + emptyDir: {} +--- +apiVersion: v1 +kind: Service +metadata: + name: pulsar + namespace: chroma +spec: + ports: + - name: pulsar-port + port: 6650 + targetPort: 6650 + - name: admin-port + port: 8080 + targetPort: 8080 + selector: + app: pulsar + type: ClusterIP \ No newline at end of file diff --git a/k8s/dev/server.yaml b/k8s/dev/server.yaml new file mode 100644 index 000000000000..9d76314e693e --- /dev/null +++ b/k8s/dev/server.yaml @@ -0,0 +1,52 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: server + namespace: chroma +spec: + replicas: 2 + selector: + matchLabels: + app: server + template: + metadata: + labels: + app: server + spec: + containers: + - name: server + image: server + imagePullPolicy: IfNotPresent + ports: + - containerPort: 8000 + volumeMounts: + - name: chroma + mountPath: /test + env: + - name: IS_PERSISTENT + value: "TRUE" + - name: CHROMA_PRODUCER_IMPL + value: "chromadb.ingest.impl.pulsar.PulsarProducer" + - name: CHROMA_CONSUMER_IMPL + value: "chromadb.ingest.impl.pulsar.PulsarConsumer" + - name: CHROMA_SEGMENT_MANAGER_IMPL + value: "chromadb.segment.impl.manager.distributed.DistributedSegmentManager" + - name: PULSAR_BROKER_URL + value: "pulsar.chroma" + - name: PULSAR_BROKER_PORT + value: "6650" + - name: PULSAR_ADMIN_PORT + value: "8080" + - name: ALLOW_RESET + value: "TRUE" + - name: CHROMA_SYSDB_IMPL + value: "chromadb.db.impl.grpc.client.GrpcSysDB" + - name: CHROMA_SERVER_GRPC_PORT + value: "50051" + - name: CHROMA_COORDINATOR_HOST + value: "coordinator.chroma" + volumes: + - name: chroma + emptyDir: {} + + diff --git a/k8s/dev/setup.yaml b/k8s/dev/setup.yaml new file mode 100644 index 000000000000..d9e1d95cc151 --- /dev/null +++ b/k8s/dev/setup.yaml @@ -0,0 +1,100 @@ +kind: Namespace +apiVersion: v1 +metadata: + name: chroma +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: memberlist-reader +rules: +- apiGroups: + - chroma.cluster + resources: + - memberlists + verbs: + - get + - list + - watch + - create + - update + - patch + - delete +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: memberlist-reader +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: memberlist-reader +subjects: +- kind: ServiceAccount + name: default + namespace: chroma +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + namespace: chroma + name: pod-list-role +rules: +- apiGroups: [""] + resources: ["pods"] + verbs: ["get", "list", "watch"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: pod-list-role-binding + namespace: chroma +subjects: +- kind: ServiceAccount + name: default + namespace: chroma +roleRef: + kind: Role + name: pod-list-role + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: memberlists.chroma.cluster +spec: + group: chroma.cluster + versions: + - name: v1 + served: true + storage: true + schema: + openAPIV3Schema: + type: object + properties: + spec: + type: object + properties: + members: + type: array + items: + type: object + properties: + url: # Rename to ip + type: string + pattern: '^((25[0-5]|(2[0-4]|1\d|[1-9]|)\d)\.?\b){4}$' + scope: Namespaced + names: + plural: memberlists + singular: memberlist + kind: MemberList + shortNames: + - ml +--- +apiVersion: chroma.cluster/v1 +kind: MemberList +metadata: + name: worker-memberlist + namespace: chroma +spec: + members: \ No newline at end of file diff --git a/k8s/dev/worker.yaml b/k8s/dev/worker.yaml new file mode 100644 index 000000000000..82b4c9d905ba --- /dev/null +++ b/k8s/dev/worker.yaml @@ -0,0 +1,40 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: worker + namespace: chroma +spec: + replicas: 1 + selector: + matchLabels: + app: worker + template: + metadata: + labels: + app: worker + member-type: worker + spec: + containers: + - name: worker + image: worker + imagePullPolicy: IfNotPresent + command: ["cargo", "run"] + ports: + - containerPort: 50051 + volumeMounts: + - name: chroma + mountPath: /index_data + env: + - name: CHROMA_WORKER__PULSAR_URL + value: pulsar://pulsar.chroma:6650 + - name: CHROMA_WORKER__PULSAR_NAMESPACE + value: default + - name: CHROMA_WORKER__PULSAR_TENANT + value: default + - name: CHROMA_WORKER__MY_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + volumes: + - name: chroma + emptyDir: {} \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 01aa0d8663bc..d425e77952d7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,6 +43,7 @@ dependencies = [ 'tenacity>=8.2.3', 'PyYAML>=6.0.0', 'mmh3>=4.0.1', + 'orjson>=3.9.12', ] [tool.black] diff --git a/requirements.txt b/requirements.txt index 3e99734fd3a8..0ed94e5033ba 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ -bcrypt==4.0.1 -chroma-hnswlib==0.7.3 +bcrypt>=4.0.1 +chroma-hnswlib>=0.7.3 fastapi>=0.95.2 graphlib_backport==1.0.3; python_version < '3.9' grpcio>=1.58.0 @@ -12,16 +12,17 @@ opentelemetry-api>=1.2.0 opentelemetry-exporter-otlp-proto-grpc>=1.2.0 opentelemetry-instrumentation-fastapi>=0.41b0 opentelemetry-sdk>=1.2.0 -overrides==7.3.1 -posthog==2.4.0 -pulsar-client==3.1.0 +overrides>=7.3.1 +posthog>=2.4.0 +pulsar-client>=3.1.0 pydantic>=1.9 -pypika==0.48.9 +pypika>=0.48.9 PyYAML>=6.0.0 -requests==2.28.1 +requests>=2.28.1 tenacity>=8.2.3 -tokenizers==0.13.2 +tokenizers>=0.13.2 tqdm>=4.65.0 typer>=0.9.0 typing_extensions>=4.5.0 -uvicorn[standard]==0.18.3 +uvicorn[standard]>=0.18.3 +orjson>=3.9.12 diff --git a/rust/worker/Cargo.toml b/rust/worker/Cargo.toml index 25a3b2d099ee..e3c916fe012d 100644 --- a/rust/worker/Cargo.toml +++ b/rust/worker/Cargo.toml @@ -35,6 +35,8 @@ parking_lot = "0.12.1" aws-sdk-s3 = "1.5.0" aws-smithy-types = "1.1.0" aws-config = { version = "1.1.2", features = ["behavior-version-latest"] } +arrow = "50.0.0" +roaring = "0.10.3" [build-dependencies] tonic-build = "0.10" diff --git a/rust/worker/src/blockstore/mod.rs b/rust/worker/src/blockstore/mod.rs new file mode 100644 index 000000000000..96be70e534a1 --- /dev/null +++ b/rust/worker/src/blockstore/mod.rs @@ -0,0 +1,2 @@ +mod positional_posting_list_value; +mod types; diff --git a/rust/worker/src/blockstore/positional_posting_list_value.rs b/rust/worker/src/blockstore/positional_posting_list_value.rs new file mode 100644 index 000000000000..8c790d17f4cc --- /dev/null +++ b/rust/worker/src/blockstore/positional_posting_list_value.rs @@ -0,0 +1,122 @@ +use arrow::{ + array::{AsArray, Int32Array, Int32Builder, ListArray, ListBuilder}, + datatypes::Int32Type, +}; +use thiserror::Error; + +use std::collections::HashSet; + +use crate::errors::{ChromaError, ErrorCodes}; + +#[derive(Debug, Clone)] +pub(crate) struct PositionalPostingList { + pub(crate) doc_ids: Int32Array, + pub(crate) positions: ListArray, +} + +pub(crate) struct PositionalPostingListBuilder { + doc_ids_builder: Int32Builder, + positions_builder: ListBuilder, + doc_id_set: HashSet, +} + +impl PositionalPostingListBuilder { + pub(crate) fn new() -> Self { + PositionalPostingListBuilder { + doc_ids_builder: Int32Builder::new(), + positions_builder: ListBuilder::new(Int32Builder::new()), + doc_id_set: HashSet::new(), + } + } +} + +impl PositionalPostingList { + pub(crate) fn get_doc_ids(&self) -> Int32Array { + return self.doc_ids.clone(); + } + + pub(crate) fn get_positions_for_doc_id(&self, doc_id: i32) -> Option { + let index = self.doc_ids.iter().position(|x| x == Some(doc_id)); + match index { + Some(index) => { + let target_positions = self.positions.value(index); + // Int32Array is composed of a Datatype, ScalarBuffer, and a null bitmap, these are all cheap to clone since the buffer is Arc'ed + let downcast = target_positions.as_primitive::().clone(); + return Some(downcast); + } + None => None, + } + } +} + +#[derive(Error, Debug)] +pub(crate) enum PositionalPostingListBuilderError { + #[error("Doc ID already exists in the list")] + DocIdAlreadyExists, +} + +impl ChromaError for PositionalPostingListBuilderError { + fn code(&self) -> ErrorCodes { + match self { + PositionalPostingListBuilderError::DocIdAlreadyExists => ErrorCodes::AlreadyExists, + } + } +} + +impl PositionalPostingListBuilder { + pub(crate) fn add_doc_id_and_positions( + &mut self, + doc_id: i32, + positions: Vec, + ) -> Result<(), PositionalPostingListBuilderError> { + if self.doc_id_set.contains(&doc_id) { + return Err(PositionalPostingListBuilderError::DocIdAlreadyExists); + } + + self.doc_ids_builder.append_value(doc_id); + let positions = positions + .into_iter() + .map(Some) + .collect::>>(); + self.positions_builder.append_value(positions); + self.doc_id_set.insert(doc_id); + Ok(()) + } + + pub(crate) fn build(&mut self) -> PositionalPostingList { + let doc_ids = self.doc_ids_builder.finish(); + let positions = self.positions_builder.finish(); + PositionalPostingList { + doc_ids: doc_ids, + positions: positions, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_positional_posting_list() { + let mut builder = PositionalPostingListBuilder::new(); + + let _res = builder.add_doc_id_and_positions(1, vec![1, 2, 3]); + let _res = builder.add_doc_id_and_positions(2, vec![4, 5, 6]); + + let list = builder.build(); + assert_eq!(list.get_doc_ids().values()[0], 1); + assert_eq!(list.get_doc_ids().values()[1], 2); + assert_eq!( + list.get_positions_for_doc_id(1).unwrap(), + Int32Array::from(vec![1, 2, 3]) + ); + assert_eq!( + list.get_positions_for_doc_id(2).unwrap(), + Int32Array::from(vec![4, 5, 6]) + ); + + let res = builder.add_doc_id_and_positions(1, vec![1, 2, 3]); + assert!(res.is_err()); + } +} diff --git a/rust/worker/src/blockstore/types.rs b/rust/worker/src/blockstore/types.rs new file mode 100644 index 000000000000..b9c0021f334d --- /dev/null +++ b/rust/worker/src/blockstore/types.rs @@ -0,0 +1,478 @@ +use super::positional_posting_list_value::PositionalPostingList; +use crate::errors::ChromaError; +use arrow::array::Int32Array; +use roaring::RoaringBitmap; +use std::fmt::Display; +use std::hash::{Hash, Hasher}; + +// ===== Key Types ===== +#[derive(Clone)] +pub(crate) struct BlockfileKey { + pub(crate) prefix: String, + pub(crate) key: Key, +} + +#[derive(Clone, PartialEq, PartialOrd, Debug)] +pub(crate) enum Key { + String(String), + Float(f32), +} + +#[derive(Debug, Clone)] +pub(crate) enum KeyType { + String, + Float, +} + +impl Display for Key { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Key::String(s) => write!(f, "{}", s), + Key::Float(fl) => write!(f, "{}", fl), + } + } +} + +impl BlockfileKey { + pub(crate) fn new(prefix: String, key: Key) -> Self { + BlockfileKey { prefix, key } + } +} + +impl Hash for BlockfileKey { + // Hash is only used for the HashMap implementation, which is a test/reference implementation + // Therefore this hash implementation is not used in production and allowed to be + // hacky + fn hash(&self, state: &mut H) { + self.prefix.hash(state); + } +} + +impl PartialEq for BlockfileKey { + fn eq(&self, other: &Self) -> bool { + self.prefix == other.prefix && self.key == other.key + } +} + +impl PartialOrd for BlockfileKey { + fn partial_cmp(&self, other: &Self) -> Option { + if self.prefix == other.prefix { + self.key.partial_cmp(&other.key) + } else { + self.prefix.partial_cmp(&other.prefix) + } + } +} + +impl Eq for BlockfileKey {} + +impl Ord for BlockfileKey { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + if self.prefix == other.prefix { + match self.key { + Key::String(ref s1) => match &other.key { + Key::String(s2) => s1.cmp(s2), + _ => panic!("Cannot compare string to float"), + }, + Key::Float(f1) => match &other.key { + Key::Float(f2) => f1.partial_cmp(f2).unwrap(), + _ => panic!("Cannot compare float to string"), + }, + } + } else { + self.prefix.cmp(&other.prefix) + } + } +} + +// ===== Value Types ===== + +#[derive(Debug, Clone)] +pub(crate) enum Value { + Int32ArrayValue(Int32Array), + PositionalPostingListValue(PositionalPostingList), + StringValue(String), + RoaringBitmapValue(RoaringBitmap), +} + +#[derive(Debug, Clone)] +pub(crate) enum ValueType { + Int32Array, + PositionalPostingList, + RoaringBitmap, + String, +} + +pub(crate) trait Blockfile { + // ===== Lifecycle methods ===== + fn open(path: &str) -> Result> + where + Self: Sized; + fn create( + path: &str, + key_type: KeyType, + value_type: ValueType, + ) -> Result> + where + Self: Sized; + + // ===== Transaction methods ===== + fn begin_transaction(&mut self) -> Result<(), Box>; + + fn commit_transaction(&mut self) -> Result<(), Box>; + + // ===== Data methods ===== + fn get(&self, key: BlockfileKey) -> Result>; + fn get_by_prefix( + &self, + prefix: String, + ) -> Result, Box>; + + fn set(&mut self, key: BlockfileKey, value: Value) -> Result<(), Box>; + + fn get_gt( + &self, + prefix: String, + key: Key, + ) -> Result, Box>; + + fn get_lt( + &self, + prefix: String, + key: Key, + ) -> Result, Box>; + + fn get_gte( + &self, + prefix: String, + key: Key, + ) -> Result, Box>; + + fn get_lte( + &self, + prefix: String, + key: Key, + ) -> Result, Box>; +} + +struct HashMapBlockfile { + map: std::collections::HashMap, +} + +impl Blockfile for HashMapBlockfile { + // TODO: change this to respect path instead of ignoring it and creating a new thing + fn open(_path: &str) -> Result> { + Ok(HashMapBlockfile { + map: std::collections::HashMap::new(), + }) + } + fn create( + path: &str, + key_type: KeyType, + value_type: ValueType, + ) -> Result> + where + Self: Sized, + { + Ok(HashMapBlockfile { + map: std::collections::HashMap::new(), + }) + } + fn get(&self, key: BlockfileKey) -> Result> { + match self.map.get(&key) { + Some(value) => Ok(value.clone()), + None => { + // TOOD: make error + panic!("Key not found"); + } + } + } + + fn get_by_prefix( + &self, + prefix: String, + ) -> Result, Box> { + let mut result = Vec::new(); + for (key, value) in self.map.iter() { + if key.prefix == prefix { + result.push((key.clone(), value.clone())); + } + } + Ok(result) + } + + fn set(&mut self, key: BlockfileKey, value: Value) -> Result<(), Box> { + self.map.insert(key, value); + Ok(()) + } + + fn get_gt( + &self, + prefix: String, + key: Key, + ) -> Result, Box> { + let mut result = Vec::new(); + for (k, v) in self.map.iter() { + if k.prefix == prefix && k.key > key { + result.push((k.clone(), v.clone())); + } + } + Ok(result) + } + + fn get_gte( + &self, + prefix: String, + key: Key, + ) -> Result, Box> { + let mut result = Vec::new(); + for (k, v) in self.map.iter() { + if k.prefix == prefix && k.key >= key { + result.push((k.clone(), v.clone())); + } + } + Ok(result) + } + + fn get_lt( + &self, + prefix: String, + key: Key, + ) -> Result, Box> { + let mut result = Vec::new(); + for (k, v) in self.map.iter() { + if k.prefix == prefix && k.key < key { + result.push((k.clone(), v.clone())); + } + } + Ok(result) + } + + fn get_lte( + &self, + prefix: String, + key: Key, + ) -> Result, Box> { + let mut result = Vec::new(); + for (k, v) in self.map.iter() { + if k.prefix == prefix && k.key <= key { + result.push((k.clone(), v.clone())); + } + } + Ok(result) + } + + fn begin_transaction(&mut self) -> Result<(), Box> { + Ok(()) + } + + fn commit_transaction(&mut self) -> Result<(), Box> { + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::blockstore::positional_posting_list_value::PositionalPostingListBuilder; + use arrow::array::Array; + use std::fmt::Debug; + + impl Debug for BlockfileKey { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "BlockfileKey(prefix: {}, key: {})", + self.prefix, self.key + ) + } + } + + #[test] + fn test_blockfile_set_get() { + let mut blockfile = + HashMapBlockfile::create("test", KeyType::String, ValueType::Int32Array).unwrap(); + let key = BlockfileKey { + prefix: "text_prefix".to_string(), + key: Key::String("key1".to_string()), + }; + let _res = blockfile + .set( + key.clone(), + Value::Int32ArrayValue(Int32Array::from(vec![1, 2, 3])), + ) + .unwrap(); + let value = blockfile.get(key); + // downcast to string + match value.unwrap() { + Value::Int32ArrayValue(arr) => assert_eq!(arr, Int32Array::from(vec![1, 2, 3])), + _ => panic!("Value is not a string"), + } + } + + #[test] + fn test_blockfile_get_by_prefix() { + let mut blockfile = HashMapBlockfile::open("test").unwrap(); + let key1 = BlockfileKey { + prefix: "text_prefix".to_string(), + key: Key::String("key1".to_string()), + }; + let key2 = BlockfileKey { + prefix: "text_prefix".to_string(), + key: Key::String("key2".to_string()), + }; + let _res = blockfile + .set( + key1.clone(), + Value::Int32ArrayValue(Int32Array::from(vec![1, 2, 3])), + ) + .unwrap(); + let _res = blockfile + .set( + key2.clone(), + Value::Int32ArrayValue(Int32Array::from(vec![4, 5, 6])), + ) + .unwrap(); + let values = blockfile.get_by_prefix("text_prefix".to_string()).unwrap(); + assert_eq!(values.len(), 2); + // May return values in any order + match &values[0].1 { + Value::Int32ArrayValue(arr) => assert!( + arr == &Int32Array::from(vec![1, 2, 3]) || arr == &Int32Array::from(vec![4, 5, 6]) + ), + _ => panic!("Value is not a string"), + } + match &values[1].1 { + Value::Int32ArrayValue(arr) => assert!( + arr == &Int32Array::from(vec![1, 2, 3]) || arr == &Int32Array::from(vec![4, 5, 6]) + ), + _ => panic!("Value is not a string"), + } + } + + #[test] + fn test_storing_arrow_in_blockfile() { + let mut blockfile = HashMapBlockfile::open("test").unwrap(); + let key = BlockfileKey { + prefix: "text_prefix".to_string(), + key: Key::String("key1".to_string()), + }; + let array = Value::Int32ArrayValue(Int32Array::from(vec![1, 2, 3])); + let _res = blockfile.set(key.clone(), array).unwrap(); + let value = blockfile.get(key).unwrap(); + match value { + Value::Int32ArrayValue(arr) => assert_eq!(arr, Int32Array::from(vec![1, 2, 3])), + _ => panic!("Value is not an arrow int32 array"), + } + } + + #[test] + fn test_blockfile_get_gt() { + let mut blockfile = HashMapBlockfile::open("test").unwrap(); + let key1 = BlockfileKey { + prefix: "text_prefix".to_string(), + key: Key::String("key1".to_string()), + }; + let key2 = BlockfileKey { + prefix: "text_prefix".to_string(), + key: Key::String("key2".to_string()), + }; + let key3 = BlockfileKey { + prefix: "text_prefix".to_string(), + key: Key::String("key3".to_string()), + }; + let _res = blockfile.set( + key1.clone(), + Value::Int32ArrayValue(Int32Array::from(vec![1])), + ); + let _res = blockfile.set( + key2.clone(), + Value::Int32ArrayValue(Int32Array::from(vec![2])), + ); + let _res = blockfile.set( + key3.clone(), + Value::Int32ArrayValue(Int32Array::from(vec![3])), + ); + let values = blockfile + .get_gt("text_prefix".to_string(), Key::String("key1".to_string())) + .unwrap(); + assert_eq!(values.len(), 2); + match &values[0].0.key { + Key::String(s) => assert!(s == "key2" || s == "key3"), + _ => panic!("Key is not a string"), + } + match &values[1].0.key { + Key::String(s) => assert!(s == "key2" || s == "key3"), + _ => panic!("Key is not a string"), + } + } + + #[test] + fn test_learning_arrow_struct() { + let mut builder = PositionalPostingListBuilder::new(); + let _res = builder.add_doc_id_and_positions(1, vec![0]); + let _res = builder.add_doc_id_and_positions(2, vec![0, 1]); + let _res = builder.add_doc_id_and_positions(3, vec![0, 1, 2]); + let list_term_1 = builder.build(); + + // Example of how to use the struct array, which is one value for a term + let mut blockfile = HashMapBlockfile::open("test").unwrap(); + let key = BlockfileKey { + prefix: "text_prefix".to_string(), + key: Key::String("term1".to_string()), + }; + let _res = blockfile + .set(key.clone(), Value::PositionalPostingListValue(list_term_1)) + .unwrap(); + let posting_list = blockfile.get(key).unwrap(); + let posting_list = match posting_list { + Value::PositionalPostingListValue(arr) => arr, + _ => panic!("Value is not an arrow struct array"), + }; + + let ids = posting_list.get_doc_ids(); + let ids = ids.as_any().downcast_ref::().unwrap(); + // find index of target id + let target_id = 2; + + // imagine this is binary search instead of linear + for i in 0..ids.len() { + if ids.is_null(i) { + continue; + } + if ids.value(i) == target_id { + let pos_list = posting_list.get_positions_for_doc_id(target_id).unwrap(); + let pos_list = pos_list.as_any().downcast_ref::().unwrap(); + assert_eq!(pos_list.len(), 2); + assert_eq!(pos_list.value(0), 0); + assert_eq!(pos_list.value(1), 1); + break; + } + } + } + + #[test] + fn test_roaring_bitmap_example() { + let mut bitmap = RoaringBitmap::new(); + bitmap.insert(1); + bitmap.insert(2); + bitmap.insert(3); + let mut blockfile = HashMapBlockfile::open("test").unwrap(); + let key = BlockfileKey::new( + "text_prefix".to_string(), + Key::String("bitmap1".to_string()), + ); + let _res = blockfile + .set(key.clone(), Value::RoaringBitmapValue(bitmap)) + .unwrap(); + let value = blockfile.get(key).unwrap(); + match value { + Value::RoaringBitmapValue(bitmap) => { + assert!(bitmap.contains(1)); + assert!(bitmap.contains(2)); + assert!(bitmap.contains(3)); + } + _ => panic!("Value is not a roaring bitmap"), + } + } +} diff --git a/rust/worker/src/lib.rs b/rust/worker/src/lib.rs index ae7ea7dc7d52..b245f24df280 100644 --- a/rust/worker/src/lib.rs +++ b/rust/worker/src/lib.rs @@ -1,4 +1,5 @@ mod assignment; +mod blockstore; mod config; mod errors; mod index; diff --git a/server.htpasswd b/server.htpasswd deleted file mode 100644 index 77f277a399ba..000000000000 --- a/server.htpasswd +++ /dev/null @@ -1 +0,0 @@ -admin:$2y$05$e5sRb6NCcSH3YfbIxe1AGu2h5K7OOd982OXKmd8WyQ3DRQ4MvpnZS