Skip to content

Commit

Permalink
[Misc] Fix arg names (vllm-project#5524)
Browse files Browse the repository at this point in the history
  • Loading branch information
AllenDou authored Jun 14, 2024
1 parent a06b8cb commit 9e4a99e
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 6 deletions.
2 changes: 1 addition & 1 deletion benchmarks/kernels/benchmark_paged_attention.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ def run_cuda_benchmark(num_iters: int, profile: bool = False) -> float:
choices=["v1", "v2"],
default="v2")
parser.add_argument("--batch-size", type=int, default=8)
parser.add_argument("--seq_len", type=int, default=4096)
parser.add_argument("--seq-len", type=int, default=4096)
parser.add_argument("--num-query-heads", type=int, default=64)
parser.add_argument("--num-kv-heads", type=int, default=8)
parser.add_argument("--head-size",
Expand Down
2 changes: 1 addition & 1 deletion examples/aqlm_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def main():
type=int,
default=0,
help='known good models by index, [0-4]')
parser.add_argument('--tensor_parallel_size',
parser.add_argument('--tensor-parallel-size',
'-t',
type=int,
default=1,
Expand Down
8 changes: 4 additions & 4 deletions examples/fp8/extract_scales.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,7 +327,7 @@ def main(args):
"--quantization-param-path <filename>). This is only used "
"if the KV cache dtype is FP8 and on ROCm (AMD GPU).")
parser.add_argument(
"--quantized_model",
"--quantized-model",
help="Specify the directory containing a single quantized HF model. "
"It is expected that the quantization format is FP8_E4M3, for use "
"on ROCm (AMD GPU).",
Expand All @@ -339,18 +339,18 @@ def main(args):
choices=["auto", "safetensors", "npz", "pt"],
default="auto")
parser.add_argument(
"--output_dir",
"--output-dir",
help="Optionally specify the output directory. By default the "
"KV cache scaling factors will be saved in the model directory, "
"however you can override this behavior here.",
default=None)
parser.add_argument(
"--output_name",
"--output-name",
help="Optionally specify the output filename.",
# TODO: Change this once additional scaling factors are enabled
default="kv_cache_scales.json")
parser.add_argument(
"--tp_size",
"--tp-size",
help="Optionally specify the tensor-parallel (TP) size that the "
"quantized model should correspond to. If specified, during KV "
"cache scaling factor extraction the observed TP size will be "
Expand Down

0 comments on commit 9e4a99e

Please sign in to comment.