lasq: Add cases for lasq in ann-benchmark

weaviate · Nov 18, 2024 · e139084 · e139084
1 parent cf84fe7
commit e139084
Show file tree

Hide file tree

Showing 5 changed files with 100 additions and 5 deletions.
diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml
@@ -97,6 +97,20 @@ jobs:
           first: ${{ needs.real-version-in-tag.outputs.real_version }}
           second: "1.26.0"
           operator: ">="
+  newer-or-equal-than-1_28:
+    name: "Check if the version is newer than 1.28"
+    needs: real-version-in-tag
+    runs-on: ubuntu-latest
+    outputs:
+      check: ${{ steps.semver_compare.outputs.result }}
+    steps:
+      - name: Semver Compare
+        id: semver_compare
+        uses: fabriziocacicia/[email protected]
+        with:
+          first: ${{ needs.real-version-in-tag.outputs.real_version }}
+          second: "1.28.0"
+          operator: ">="
   filter-memory-leak:
     name: Filter (cache) memory leak when querying while importing
     if: ${{ github.event.inputs.test_to_run == 'filter-memory-leak' || github.event.inputs.test_to_run == '' }}
@@ -258,6 +272,44 @@ jobs:
           path: 'results'
           destination: 'ann-pipelines/github-action-runs'
           glob: '*.json'
+
+  ann-benchmarks-lasq-sift-aws:
+    needs: [newer-or-equal-than-1_28]
+    if: ${{ (needs.newer-or-equal-than-1_28.outputs.check == 'true') && (github.event.inputs.test_to_run == 'ann-benchmarks-lasq-sift-aws' || github.event.inputs.test_to_run == '')}}
+    name: "[bench AWS] SIFT1M lasq=true"
+    runs-on: ubuntu-latest
+    timeout-minutes: 60
+    env:
+      AWS_ACCESS_KEY_ID: ${{secrets.AWS_ACCESS_KEY}}
+      AWS_SECRET_ACCESS_KEY: ${{secrets.AWS_SECRET_ACCESS_KEY}}
+      DATASET: sift-128-euclidean
+      DISTANCE: l2-squared
+      REQUIRED_RECALL: 0.992
+      QUANTIZATION: lasq
+      PERSISTENCE_LSM_ACCESS_STRATEGY: ${{inputs.lsm_access_strategy}}
+    steps:
+      - uses: actions/checkout@v3
+      - name: Login to Docker Hub
+        uses: docker/login-action@v2
+        with:
+          username: ${{secrets.DOCKER_USERNAME}}
+          password: ${{secrets.DOCKER_PASSWORD}}
+      - id: 'gcs_auth'
+        name: 'Authenticate to Google Cloud'
+        uses: 'google-github-actions/auth@v1'
+        with:
+          credentials_json: ${{secrets.GCP_SERVICE_ACCOUNT_BENCHMARKS}}
+      - name: 'Set up Cloud SDK'
+        uses: 'google-github-actions/setup-gcloud@v1'
+      - name: Run chaos test
+        if: always()
+        run: ./ann_benchmark_quantization_aws.sh
+      - id: 'upload-files'
+        uses: 'google-github-actions/upload-cloud-storage@v1'
+        with:
+          path: 'results'
+          destination: 'ann-pipelines/github-action-runs'
+          glob: '*.json'
   ann-benchmarks-pq-glove-aws:
     name: "[bench AWS] Glove100 pq=true"
     if: ${{ github.event.inputs.test_to_run == 'ann-benchmarks-pq-glove-aws' || github.event.inputs.test_to_run == '' }}
@@ -331,6 +383,43 @@ jobs:
           path: 'results'
           destination: 'ann-pipelines/github-action-runs'
           glob: '*.json'
+  ann-benchmarks-lasq-glove-aws:
+    needs: [newer-or-equal-than-1_28]
+    if: ${{ (needs.newer-or-equal-than-1_28.outputs.check == 'true') && ( github.event.inputs.test_to_run == 'ann-benchmarks-lasq-glove-aws' || github.event.inputs.test_to_run == '' ) }}
+    name: "[bench AWS] Glove100 lasq=true"
+    runs-on: ubuntu-latest
+    timeout-minutes: 60
+    env:
+      AWS_ACCESS_KEY_ID: ${{secrets.AWS_ACCESS_KEY}}
+      AWS_SECRET_ACCESS_KEY: ${{secrets.AWS_SECRET_ACCESS_KEY}}
+      DATASET: glove-100-angular
+      DISTANCE: cosine
+      REQUIRED_RECALL: 0.89
+      QUANTIZATION: lasq
+      PERSISTENCE_LSM_ACCESS_STRATEGY: ${{inputs.lsm_access_strategy}}
+    steps:
+      - uses: actions/checkout@v3
+      - name: Login to Docker Hub
+        uses: docker/login-action@v2
+        with:
+          username: ${{secrets.DOCKER_USERNAME}}
+          password: ${{secrets.DOCKER_PASSWORD}}
+      - id: 'gcs_auth'
+        name: 'Authenticate to Google Cloud'
+        uses: 'google-github-actions/auth@v1'
+        with:
+          credentials_json: ${{secrets.GCP_SERVICE_ACCOUNT_BENCHMARKS}}
+      - name: 'Set up Cloud SDK'
+        uses: 'google-github-actions/setup-gcloud@v1'
+      - name: Run chaos test
+        if: always()
+        run: ./ann_benchmark_quantization_aws.sh
+      - id: 'upload-files'
+        uses: 'google-github-actions/upload-cloud-storage@v1'
+        with:
+          path: 'results'
+          destination: 'ann-pipelines/github-action-runs'
+          glob: '*.json'
   ann-benchmarks-sift-gcp:
     name: "[bench GCP] SIFT1M pq=false"
     if: ${{ github.event.inputs.test_to_run == 'ann-benchmarks-sift-gcp' || github.event.inputs.test_to_run == '' }}

diff --git a/apps/ann-benchmarks/Dockerfile b/apps/ann-benchmarks/Dockerfile
@@ -1,7 +1,7 @@
 FROM python:3.11-slim-bullseye
 
 # to support pprof profiles
-RUN apt-get update && apt-get install -y golang-go pkg-config libhdf5-dev
+RUN apt-get update && apt-get install -y golang-go pkg-config libhdf5-dev git
 
 WORKDIR /workdir
 

diff --git a/apps/ann-benchmarks/requirements.txt b/apps/ann-benchmarks/requirements.txt
@@ -1,4 +1,4 @@
-weaviate-client==4.7.0-rc.0
+weaviate-client@git+https://github.com/weaviate/weaviate-python-client.git@6233847809c207301722df0d9b9bd4082614ff27
 loguru==0.5.3
 seaborn==0.12.2
 h5py==3.11.0

diff --git a/apps/ann-benchmarks/weaviate_import.py b/apps/ann-benchmarks/weaviate_import.py
@@ -45,7 +45,7 @@ def load_records(
 
     with client.batch.fixed_size(batch_size=batch_size) as batch:
         for vector in vectors:
-            if i == 100000 and quantization in ["pq", "sq"] and override == False:
+            if i == 100000 and quantization in ["pq", "sq", "lasq"] and override == False:
                 logger.info(f"pausing import to enable quantization")
                 break
 
@@ -66,7 +66,7 @@ def load_records(
     for err in client.batch.failed_objects:
         logger.error(err.message)
 
-    if quantization in ["pq", "sq"] and override == False:
+    if quantization in ["pq", "sq", "lasq"] and override == False:
 
         if quantization == "pq":
             collection.config.update(
@@ -82,6 +82,12 @@ def load_records(
                     quantizer=wvc.Reconfigure.VectorIndex.Quantizer.sq()
                 )
             )
+        elif quantization == "lasq":
+            collection.config.update(
+                vector_index_config=wvc.Reconfigure.VectorIndex.hnsw(
+                    quantizer=wvc.Reconfigure.VectorIndex.Quantizer.lasq()
+                )
+            )
 
         wait_for_all_shards_ready(collection)
 

diff --git a/apps/multi-tenancy-concurrent-imports/multi-tenancy-load-test b/apps/multi-tenancy-concurrent-imports/multi-tenancy-load-test
+4 −0		README.md
+36 −0		concurrency-diagnostics/go.mod
+202 −0		concurrency-diagnostics/go.sum
+269 −0		concurrency-diagnostics/run.go
+1 −1		config.env
+1 −1		create_cluster.sh
+0 −2		importer/Dockerfile
+3 −0		importer/manifests/import-job.yaml
+3 −0		importer/manifests/querying-deployment.yaml
+3 −0		importer/manifests/reset-schema-pod.yaml
+9 −5		importer/query.sh
+21 −31		importer/querying.py
+1 −0		importer/requirements.txt
+24 −42		importer/reset_schema.py
+5 −31		importer/schema_corruption_checker.py
+31 −41		importer/tenants_and_data.py
+1 −1		weaviate/deploy.sh
+20 −2		weaviate/values.yaml