Skip to content

Commit

Permalink
Improve the general docs
Browse files Browse the repository at this point in the history
  • Loading branch information
arjunsuresh committed Sep 24, 2024
1 parent 70f9a81 commit 6f56438
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 3 deletions.
2 changes: 1 addition & 1 deletion docs/benchmarks/text_to_image/reproducibility/scc24.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ or supporting multi-node execution) useful for the community and [MLCommons](htt

=== "Nvidia"
## Nvidia MLPerf Implementation
{{ mlperf_inference_implementation_readme (4, "sdxl", "nvidia", extra_variation_tags=",_short", scenarios=["Offline"],categories=["Datacenter"], setup_tips=False, implementation_tips=False, skip_test_query_count=True) }}
{{ mlperf_inference_implementation_readme (4, "sdxl", "nvidia", extra_variation_tags=",_short,_scc24-base", scenarios=["Offline"],categories=["Datacenter"], setup_tips=False, implementation_tips=False, skip_test_query_count=True) }}

!!! info
Once the above run is successful, you can change `_scc24-base` to `_scc24-main` to run the main variant.
Expand Down
4 changes: 2 additions & 2 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -385,9 +385,9 @@ def get_run_cmd_extra(f_pre_space, model, implementation, device, scenario, scen
if scenario == "Server" or (scenario == "All Scenarios" and "Server" in scenarios):
extra_content += f"{f_pre_space} * `<SERVER_TARGET_QPS>` must be determined manually. It is usually around 80% of the Offline QPS, but on some systems, it can drop below 50%. If a higher value is specified, the latency constraint will not be met, and the run will be considered invalid.\n"
if implementation == "reference" and model in [ "sdxl", "gptj-99", "gptj-99.9" ] and device in ["cuda", "rocm"]:
extra_content += f"{f_pre_space} * `--precision=float16` can help run on GPUs with less RAM \n"
extra_content += f"{f_pre_space} * `--precision=float16` can help run on GPUs with less RAM / gives better performance \n"
if implementation == "reference" and model in [ "sdxl", "gptj-99", "gptj-99.9" ] and device in ["cpu"]:
extra_content += f"{f_pre_space} * `--precision=bfloat16` can help run on GPUs with less RAM \n"
extra_content += f"{f_pre_space} * `--precision=bfloat16` can give better performance \n"
if "gptj" in model and implementation == "reference":
extra_content += f"{f_pre_space} * `--beam-size=1` Beam size of 4 is mandatory for a closed division submission but reducing the beam size can help in running the model on GPUs with lower device memory\n"
if extra_content:
Expand Down

0 comments on commit 6f56438

Please sign in to comment.