Skip to content

Commit

Permalink
feat: add more flexiblity for langchain integration
Browse files Browse the repository at this point in the history
  • Loading branch information
leoguillaumegouv committed Jul 15, 2024
1 parent c2f9c54 commit cdc19b2
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 45 deletions.
2 changes: 1 addition & 1 deletion .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ MODELS_CACHE_DIR=
COMPOSE_PROJECT_NAME=albert-models

# to protect the API, you can set a secret key (optional)
API_KEY=mysecretkey
API_KEY=mysecretkey # comment this line to disable the API key (empty value does not work)

# to download private model on Huggingface (optional)
HF_TOKEN=
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Albert models

![](https://img.shields.io/badge/python-3.12-green) ![](https://img.shields.io/badge/vLLM-latest-blue) ![](https://img.shields.io/badge/HuggingFace%20Text%20Embeddings%20Inference-latest-red)
![](https://img.shields.io/badge/python-3.12-green) ![](https://img.shields.io/badge/vLLM-v0.5.1-blue) ![](https://img.shields.io/badge/HuggingFace%20Text%20Embeddings%20Inference-1.5-red)

### Deploy a full OpenAI API with vLLM that supports all embedding models

Expand Down
5 changes: 2 additions & 3 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ version: "3.8"

services:
vllm:
image: vllm/vllm-openai:latest
image: vllm/vllm-openai:v0.5.1
command: --model $LLM_HF_REPO_ID $VLLM_ARGS
environment:
- VLLM_API_KEY=${API_KEY}
Expand All @@ -23,10 +23,9 @@ services:
capabilities: [gpu]

tei:
image: ghcr.io/huggingface/text-embeddings-inference:latest
image: ghcr.io/huggingface/text-embeddings-inference:1.5
command: --model-id $EMBEDDINGS_HF_REPO_ID $TEI_ARGS
environment:
- API_KEY=${API_KEY}
- HUGGING_FACE_HUB_TOKEN=${HF_TOKEN}
volumes:
- ${MODELS_CACHE_DIR:-"./models"}/${EMBEDDINGS_HF_REPO_ID}:/data
Expand Down
80 changes: 40 additions & 40 deletions nginx/nginx.conf
Original file line number Diff line number Diff line change
Expand Up @@ -3,64 +3,64 @@ server {
client_max_body_size 0;

# Langchain HuggingfaceHubEmbeddings
location ~ ^/v1$ {
rewrite ^/v1 / break;
proxy_pass http://tei:80;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header Authorization $http_authorization;
location ~ ^/v1/?$ {
rewrite ^/v1/?$ / break;
proxy_pass http://tei:80;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header Authorization $http_authorization;
}

# OpenAI endpoints
location ~ ^/(health|v1/models|docs|openapi.json) {
proxy_pass http://fastapi:8000;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header Authorization $http_authorization;
proxy_pass http://fastapi:8000;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header Authorization $http_authorization;
}

location ~ ^/(v1/chat/completions|v1/completions) {
proxy_pass http://vllm:8000;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header Authorization $http_authorization;
proxy_pass http://vllm:8000;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header Authorization $http_authorization;
}

location ~ ^/(v1/embeddings) {
proxy_pass http://tei:80;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header Authorization $http_authorization;
proxy_pass http://tei:80;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header Authorization $http_authorization;
}

# vLLM
location ~ ^/vllm {
rewrite ^/vllm/(.*) /$1 break;
proxy_pass http://vllm:8000;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header Authorization $http_authorization;
rewrite ^/vllm/(.*) /$1 break;
proxy_pass http://vllm:8000;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header Authorization $http_authorization;

}

# TEI
location ~ ^/tei {
rewrite ^/tei/(.*) /$1 break;
proxy_pass http://tei:80;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header Authorization $http_authorization;
rewrite ^/tei/(.*) /$1 break;
proxy_pass http://tei:80;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header Authorization $http_authorization;
}
}

0 comments on commit cdc19b2

Please sign in to comment.