Skip to content

Commit

Permalink
Merge branch 'master' into lekurile/update_bench_scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
lekurile committed Mar 1, 2024
2 parents 1e51ded + ffb8a4b commit 81036e2
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 1 deletion.
20 changes: 20 additions & 0 deletions benchmarks/inference/mii/run_aml.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Copyright (c) Microsoft Corporation.
# SPDX-License-Identifier: Apache-2.0

# DeepSpeed Team

# Run benchmark against AML endpoint
python ./run_benchmark.py \
--model <model name> \
--deployment_name <aml deployment name> \
--aml_api_url <aml endpoint URL> \
--aml_api_key <aml API key> \
--mean_prompt_length 2600 \
--mean_max_new_tokens 60 \
--num_requests 256 \
--backend aml

### Gernerate the plots
python ./src/plot_th_lat.py

echo "Find figures in ./plots/ and log outputs in ./results/"
6 changes: 5 additions & 1 deletion benchmarks/inference/mii/src/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,11 @@ def get_response(response: requests.Response) -> List[str]:
token_gen_time = []
start_time = time.time()
response = requests.post(args.aml_api_url, headers=headers, json=pload)
output = get_response(response)
# Sometimes the AML endpoint will return an error, so we send the request again
try:
output = get_response(response)
except Exception as e:
return call_aml(input_tokens, max_new_tokens, args)

return ResponseDetails(
generated_tokens=output,
Expand Down

0 comments on commit 81036e2

Please sign in to comment.