diff --git a/.env.example b/.env.example index 7c55e51..7cda94e 100644 --- a/.env.example +++ b/.env.example @@ -17,7 +17,7 @@ MODELS_CACHE_DIR= COMPOSE_PROJECT_NAME=albert-models # to protect the API, you can set a secret key (optional) -API_KEY=mysecretkey +API_KEY=mysecretkey # comment this line to disable the API key (empty value does not work) # to download private model on Huggingface (optional) HF_TOKEN= diff --git a/README.md b/README.md index 182e987..6359052 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Albert models -![](https://img.shields.io/badge/python-3.12-green) ![](https://img.shields.io/badge/vLLM-latest-blue) ![](https://img.shields.io/badge/HuggingFace%20Text%20Embeddings%20Inference-latest-red) +![](https://img.shields.io/badge/python-3.12-green) ![](https://img.shields.io/badge/vLLM-v0.5.1-blue) ![](https://img.shields.io/badge/HuggingFace%20Text%20Embeddings%20Inference-1.5-red) ### Deploy a full OpenAI API with vLLM that supports all embedding models diff --git a/docker-compose.yml b/docker-compose.yml index 1493af6..fb5b175 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -2,7 +2,7 @@ version: "3.8" services: vllm: - image: vllm/vllm-openai:latest + image: vllm/vllm-openai:v0.5.1 command: --model $LLM_HF_REPO_ID $VLLM_ARGS environment: - VLLM_API_KEY=${API_KEY} @@ -23,10 +23,9 @@ services: capabilities: [gpu] tei: - image: ghcr.io/huggingface/text-embeddings-inference:latest + image: ghcr.io/huggingface/text-embeddings-inference:1.5 command: --model-id $EMBEDDINGS_HF_REPO_ID $TEI_ARGS environment: - - API_KEY=${API_KEY} - HUGGING_FACE_HUB_TOKEN=${HF_TOKEN} volumes: - ${MODELS_CACHE_DIR:-"./models"}/${EMBEDDINGS_HF_REPO_ID}:/data diff --git a/nginx/nginx.conf b/nginx/nginx.conf index 438125a..11d7fd7 100755 --- a/nginx/nginx.conf +++ b/nginx/nginx.conf @@ -3,64 +3,64 @@ server { client_max_body_size 0; # Langchain HuggingfaceHubEmbeddings - location ~ ^/v1$ { - rewrite ^/v1 / break; - proxy_pass http://tei:80; - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - proxy_set_header Authorization $http_authorization; + location ~ ^/v1/?$ { + rewrite ^/v1/?$ / break; + proxy_pass http://tei:80; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_set_header Authorization $http_authorization; } # OpenAI endpoints location ~ ^/(health|v1/models|docs|openapi.json) { - proxy_pass http://fastapi:8000; - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - proxy_set_header Authorization $http_authorization; + proxy_pass http://fastapi:8000; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_set_header Authorization $http_authorization; } location ~ ^/(v1/chat/completions|v1/completions) { - proxy_pass http://vllm:8000; - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - proxy_set_header Authorization $http_authorization; + proxy_pass http://vllm:8000; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_set_header Authorization $http_authorization; } location ~ ^/(v1/embeddings) { - proxy_pass http://tei:80; - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - proxy_set_header Authorization $http_authorization; + proxy_pass http://tei:80; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_set_header Authorization $http_authorization; } # vLLM location ~ ^/vllm { - rewrite ^/vllm/(.*) /$1 break; - proxy_pass http://vllm:8000; - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - proxy_set_header Authorization $http_authorization; + rewrite ^/vllm/(.*) /$1 break; + proxy_pass http://vllm:8000; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_set_header Authorization $http_authorization; } # TEI location ~ ^/tei { - rewrite ^/tei/(.*) /$1 break; - proxy_pass http://tei:80; - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - proxy_set_header Authorization $http_authorization; + rewrite ^/tei/(.*) /$1 break; + proxy_pass http://tei:80; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_set_header Authorization $http_authorization; } } \ No newline at end of file