Skip to content

Commit

Permalink
Add wait_for_rest_service fn to evaluate method
Browse files Browse the repository at this point in the history
Signed-off-by: Abhishree <[email protected]>
  • Loading branch information
athitten committed Sep 30, 2024
1 parent a1832c4 commit 30ee09e
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 131 deletions.
47 changes: 46 additions & 1 deletion nemo/collections/llm/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -427,7 +427,51 @@ def evaluate(

from lm_eval import tasks, evaluator
from lm_eval.api.model import LM
import time
import requests
from requests.exceptions import RequestException

def wait_for_rest_service(rest_url, max_retries=30, retry_interval=2):
"""
Wait for REST service to be ready.
Args:
rest_url (str): URL of the REST service's health endpoint
max_retries (int): Maximum number of retry attempts
retry_interval (int): Time to wait between retries in seconds
Returns:
bool: True if rest service is ready, False otherwise
"""
for _ in range(max_retries):
rest_ready = check_service(rest_url)

if rest_ready:
print("REST service is ready.")
return True

print(f"REST Service not ready yet. Retrying in {retry_interval} seconds...")
time.sleep(retry_interval)

print("Timeout: One or both services did not become ready.")
return False

def check_service(url):
"""
Check if a service is ready by making a GET request to its health endpoint.
Args:
url (str): URL of the service's health endpoint
Returns:
bool: True if the service is ready, False otherwise
"""
try:
response = requests.get(url, timeout=5)
return response.status_code == 200
except RequestException:
return False

class CustomModel(LM):
def __init__(self, model_name, api_url, max_tokens_to_generate, temperature, top_p, top_k):
self.model_name = model_name
Expand Down Expand Up @@ -475,8 +519,9 @@ def generate_until(self, inputs):
results.append(generated_text)

return results
model = CustomModel(model_name, url, temperature, top_p, top_k)

wait_for_rest_service(rest_url=f"{url}/health")
model = CustomModel(model_name, url, max_tokens_to_generate, temperature, top_p, top_k)
results = evaluator.simple_evaluate(
model=model,
tasks=eval_task,
Expand Down
12 changes: 5 additions & 7 deletions nemo/deploy/service/rest_model_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@

from nemo.deploy.nlp import NemoQueryLLM


class TritonSettings(BaseSettings):
_triton_service_port: int
_triton_service_ip: str
Expand Down Expand Up @@ -63,7 +62,6 @@ def openai_format_response(self):
app = FastAPI()
triton_settings = TritonSettings()


class CompletionRequest(BaseModel):
model: str
prompt: str
Expand All @@ -76,15 +74,15 @@ class CompletionRequest(BaseModel):
frequency_penalty: float = 1.0


@app.get("/hello")
def root():
return {"message": "Hello World"}
@app.get("/v1/health")
def health_check():
return {"status": "ok"}

@app.get("/triton_health")
@app.get("/v1/triton_health")
async def check_triton_health():
"""
This method exposes endpoint "/triton_health" which can be used to verify if Triton server is accessible while running the REST or FastAPI application.
Verify by running: curl http://service_http_address:service_port/triton_health and the returned status should inform if the server is accessible.
Verify by running: curl http://service_http_address:service_port/v1/triton_health and the returned status should inform if the server is accessible.
"""
triton_url = f"http://{triton_settings.triton_service_ip}:{str(triton_settings.triton_service_port)}/v2/health/ready"
print(f"Attempting to connect to Triton server at: {triton_url}")
Expand Down
123 changes: 0 additions & 123 deletions scripts/export/convert_nemo2_for_export.py

This file was deleted.

0 comments on commit 30ee09e

Please sign in to comment.