Skip to content

Commit

Permalink
lasq: Add cases for lasq in ann-benchmark
Browse files Browse the repository at this point in the history
  • Loading branch information
rlmanrique committed Nov 18, 2024
1 parent cf84fe7 commit e139084
Show file tree
Hide file tree
Showing 5 changed files with 100 additions and 5 deletions.
89 changes: 89 additions & 0 deletions .github/workflows/tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,20 @@ jobs:
first: ${{ needs.real-version-in-tag.outputs.real_version }}
second: "1.26.0"
operator: ">="
newer-or-equal-than-1_28:
name: "Check if the version is newer than 1.28"
needs: real-version-in-tag
runs-on: ubuntu-latest
outputs:
check: ${{ steps.semver_compare.outputs.result }}
steps:
- name: Semver Compare
id: semver_compare
uses: fabriziocacicia/[email protected]
with:
first: ${{ needs.real-version-in-tag.outputs.real_version }}
second: "1.28.0"
operator: ">="
filter-memory-leak:
name: Filter (cache) memory leak when querying while importing
if: ${{ github.event.inputs.test_to_run == 'filter-memory-leak' || github.event.inputs.test_to_run == '' }}
Expand Down Expand Up @@ -258,6 +272,44 @@ jobs:
path: 'results'
destination: 'ann-pipelines/github-action-runs'
glob: '*.json'

ann-benchmarks-lasq-sift-aws:
needs: [newer-or-equal-than-1_28]
if: ${{ (needs.newer-or-equal-than-1_28.outputs.check == 'true') && (github.event.inputs.test_to_run == 'ann-benchmarks-lasq-sift-aws' || github.event.inputs.test_to_run == '')}}
name: "[bench AWS] SIFT1M lasq=true"
runs-on: ubuntu-latest
timeout-minutes: 60
env:
AWS_ACCESS_KEY_ID: ${{secrets.AWS_ACCESS_KEY}}
AWS_SECRET_ACCESS_KEY: ${{secrets.AWS_SECRET_ACCESS_KEY}}
DATASET: sift-128-euclidean
DISTANCE: l2-squared
REQUIRED_RECALL: 0.992
QUANTIZATION: lasq
PERSISTENCE_LSM_ACCESS_STRATEGY: ${{inputs.lsm_access_strategy}}
steps:
- uses: actions/checkout@v3
- name: Login to Docker Hub
uses: docker/login-action@v2
with:
username: ${{secrets.DOCKER_USERNAME}}
password: ${{secrets.DOCKER_PASSWORD}}
- id: 'gcs_auth'
name: 'Authenticate to Google Cloud'
uses: 'google-github-actions/auth@v1'
with:
credentials_json: ${{secrets.GCP_SERVICE_ACCOUNT_BENCHMARKS}}
- name: 'Set up Cloud SDK'
uses: 'google-github-actions/setup-gcloud@v1'
- name: Run chaos test
if: always()
run: ./ann_benchmark_quantization_aws.sh
- id: 'upload-files'
uses: 'google-github-actions/upload-cloud-storage@v1'
with:
path: 'results'
destination: 'ann-pipelines/github-action-runs'
glob: '*.json'
ann-benchmarks-pq-glove-aws:
name: "[bench AWS] Glove100 pq=true"
if: ${{ github.event.inputs.test_to_run == 'ann-benchmarks-pq-glove-aws' || github.event.inputs.test_to_run == '' }}
Expand Down Expand Up @@ -331,6 +383,43 @@ jobs:
path: 'results'
destination: 'ann-pipelines/github-action-runs'
glob: '*.json'
ann-benchmarks-lasq-glove-aws:
needs: [newer-or-equal-than-1_28]
if: ${{ (needs.newer-or-equal-than-1_28.outputs.check == 'true') && ( github.event.inputs.test_to_run == 'ann-benchmarks-lasq-glove-aws' || github.event.inputs.test_to_run == '' ) }}
name: "[bench AWS] Glove100 lasq=true"
runs-on: ubuntu-latest
timeout-minutes: 60
env:
AWS_ACCESS_KEY_ID: ${{secrets.AWS_ACCESS_KEY}}
AWS_SECRET_ACCESS_KEY: ${{secrets.AWS_SECRET_ACCESS_KEY}}
DATASET: glove-100-angular
DISTANCE: cosine
REQUIRED_RECALL: 0.89
QUANTIZATION: lasq
PERSISTENCE_LSM_ACCESS_STRATEGY: ${{inputs.lsm_access_strategy}}
steps:
- uses: actions/checkout@v3
- name: Login to Docker Hub
uses: docker/login-action@v2
with:
username: ${{secrets.DOCKER_USERNAME}}
password: ${{secrets.DOCKER_PASSWORD}}
- id: 'gcs_auth'
name: 'Authenticate to Google Cloud'
uses: 'google-github-actions/auth@v1'
with:
credentials_json: ${{secrets.GCP_SERVICE_ACCOUNT_BENCHMARKS}}
- name: 'Set up Cloud SDK'
uses: 'google-github-actions/setup-gcloud@v1'
- name: Run chaos test
if: always()
run: ./ann_benchmark_quantization_aws.sh
- id: 'upload-files'
uses: 'google-github-actions/upload-cloud-storage@v1'
with:
path: 'results'
destination: 'ann-pipelines/github-action-runs'
glob: '*.json'
ann-benchmarks-sift-gcp:
name: "[bench GCP] SIFT1M pq=false"
if: ${{ github.event.inputs.test_to_run == 'ann-benchmarks-sift-gcp' || github.event.inputs.test_to_run == '' }}
Expand Down
2 changes: 1 addition & 1 deletion apps/ann-benchmarks/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
FROM python:3.11-slim-bullseye

# to support pprof profiles
RUN apt-get update && apt-get install -y golang-go pkg-config libhdf5-dev
RUN apt-get update && apt-get install -y golang-go pkg-config libhdf5-dev git

WORKDIR /workdir

Expand Down
2 changes: 1 addition & 1 deletion apps/ann-benchmarks/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
weaviate-client==4.7.0-rc.0
weaviate-client@git+https://github.com/weaviate/weaviate-python-client.git@6233847809c207301722df0d9b9bd4082614ff27
loguru==0.5.3
seaborn==0.12.2
h5py==3.11.0
Expand Down
10 changes: 8 additions & 2 deletions apps/ann-benchmarks/weaviate_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def load_records(

with client.batch.fixed_size(batch_size=batch_size) as batch:
for vector in vectors:
if i == 100000 and quantization in ["pq", "sq"] and override == False:
if i == 100000 and quantization in ["pq", "sq", "lasq"] and override == False:
logger.info(f"pausing import to enable quantization")
break

Expand All @@ -66,7 +66,7 @@ def load_records(
for err in client.batch.failed_objects:
logger.error(err.message)

if quantization in ["pq", "sq"] and override == False:
if quantization in ["pq", "sq", "lasq"] and override == False:

if quantization == "pq":
collection.config.update(
Expand All @@ -82,6 +82,12 @@ def load_records(
quantizer=wvc.Reconfigure.VectorIndex.Quantizer.sq()
)
)
elif quantization == "lasq":
collection.config.update(
vector_index_config=wvc.Reconfigure.VectorIndex.hnsw(
quantizer=wvc.Reconfigure.VectorIndex.Quantizer.lasq()
)
)

wait_for_all_shards_ready(collection)

Expand Down

0 comments on commit e139084

Please sign in to comment.