Benchmarking : Absolute -> Relative imports (#85)

SUMMARY: Fix benchmarking imports - Absolute -> Relative. TEST PLAN: - local testing : invoke the benchmark script from outside the repository root. ``` vllm-test) varun@floppy-fan:~/code$ python3 -m neuralmagic-vllm.neuralmagic.benchmarks.run_benchmarks --help usage: run_benchmarks.py [-h] -i INPUT_CONFIG_FILE -o OUTPUT_DIRECTORY Runs benchmark-scripts as a subprocess options: -h, --help show this help message and exit -i INPUT_CONFIG_FILE, --input-config-file INPUT_CONFIG_FILE Path to the input config file describing the benhmarks to run -o OUTPUT_DIRECTORY, --output-directory OUTPUT_DIRECTORY Path to a directory that is the output store ``` - Run nm-benchmark GHA job. --------- Co-authored-by: Varun Sundar Rabindranath <[email protected]>
neuralmagic · Mar 4, 2024 · a17eb47 · a17eb47
1 parent 733c126
commit a17eb47
Show file tree

Hide file tree

Showing 7 changed files with 17 additions and 16 deletions.
diff --git a/neuralmagic/benchmarks/run_benchmark_serving.py b/neuralmagic/benchmarks/run_benchmark_serving.py
@@ -7,9 +7,9 @@
 from typing import NamedTuple, Optional
 from pathlib import Path
 
-from neuralmagic.tools.call_cmd import call_cmd
-from neuralmagic.benchmarks.common import download_model, max_model_length_from_model_id, script_args_to_cla, benchmark_configs
-from neuralmagic.benchmarks.scripts.common import warmup_server, num_available_gpus
+from .common import download_model, max_model_length_from_model_id, script_args_to_cla, benchmark_configs
+from .scripts.common import warmup_server, num_available_gpus
+from ..tools.call_cmd import call_cmd
 
 BENCH_SERVER_HOST = "localhost"
 BENCH_SERVER_PORT = 9000

diff --git a/neuralmagic/benchmarks/run_benchmark_throughput.py b/neuralmagic/benchmarks/run_benchmark_throughput.py
@@ -2,8 +2,8 @@
 from pathlib import Path
 from typing import NamedTuple, Optional
 
-from neuralmagic.tools.call_cmd import call_cmd
-from neuralmagic.benchmarks.common import script_args_to_cla, benchmark_configs, max_model_length_from_model_id
+from .common import script_args_to_cla, benchmark_configs, max_model_length_from_model_id
+from ..tools.call_cmd import call_cmd
 
 
 def run_benchmark_throughput_script(config: NamedTuple,

diff --git a/neuralmagic/benchmarks/run_benchmarks.py b/neuralmagic/benchmarks/run_benchmarks.py
@@ -1,9 +1,9 @@
 import argparse
 
 from pathlib import Path
-from neuralmagic.benchmarks.common import benchmark_configs
-from neuralmagic.benchmarks import (run_benchmark_serving_script,
-                                    run_benchmark_throughput_script)
+from .common import benchmark_configs
+from .run_benchmark_serving import run_benchmark_serving_script
+from .run_benchmark_throughput import run_benchmark_throughput_script
 
 
 def run(config_file_path: Path, output_directory: Path) -> None:

diff --git a/neuralmagic/benchmarks/scripts/benchmark_serving.py b/neuralmagic/benchmarks/scripts/benchmark_serving.py
@@ -33,8 +33,9 @@
 from tqdm.asyncio import tqdm
 from transformers import PreTrainedTokenizerBase
 from vllm.transformers_utils.tokenizer import get_tokenizer
-from neuralmagic.benchmarks.scripts.common import instantiate_benchmark_results_dict, generate_synthetic_requests, print_benchmark_io
-from neuralmagic.benchmarks.datasets_registry import get_dataset, DatasetArgs
+from .common import instantiate_benchmark_results_dict, generate_synthetic_requests, print_benchmark_io
+# TODO (move this to scripts)
+from .datasets_registry import get_dataset, DatasetArgs
 
 from neuralmagic.benchmarks.scripts.backend_request_func import (
     ASYNC_REQUEST_FUNCS,

diff --git a/neuralmagic/benchmarks/scripts/benchmark_throughput.py b/neuralmagic/benchmarks/scripts/benchmark_throughput.py
@@ -13,8 +13,8 @@
 from pathlib import Path
 from typing import List, Optional, Tuple
 from transformers import AutoTokenizer
-from neuralmagic.benchmarks.scripts.common import instantiate_benchmark_results_dict, generate_synthetic_requests, warmup_vllm_engine, num_available_gpus
-from neuralmagic.benchmarks.datasets_registry import get_dataset, DatasetArgs
+from .common import instantiate_benchmark_results_dict, generate_synthetic_requests, warmup_vllm_engine, num_available_gpus
+from .datasets_registry import get_dataset, DatasetArgs
 
 
 def get_tensor_parallel_size(args: argparse.Namespace) -> int:

diff --git a/neuralmagic/benchmarks/scripts/common.py b/neuralmagic/benchmarks/scripts/common.py
@@ -11,9 +11,9 @@
 from vllm import LLM, SamplingParams
 from vllm.outputs import RequestOutput
 from vllm.transformers_utils.tokenizer import get_tokenizer
-from neuralmagic.tools.call_cmd import call_cmd
-from neuralmagic.benchmarks.datasets_registry import SHAREGPT_PATH, SHAREGPT_DOWNLOAD_STR
-from neuralmagic.benchmarks.scripts.backend_request_func import RequestFuncInput, async_request_vllm
+from .datasets_registry import SHAREGPT_PATH, SHAREGPT_DOWNLOAD_STR
+from .backend_request_func import RequestFuncInput, async_request_vllm
+from ...tools.call_cmd import call_cmd
 
 
 def num_available_gpus() -> int:

diff --git a/neuralmagic/benchmarks/datasets_registry.py → ...c/benchmarks/scripts/datasets_registry.py b/neuralmagic/benchmarks/datasets_registry.py → ...c/benchmarks/scripts/datasets_registry.py
@@ -5,7 +5,7 @@
 from datasets import load_dataset
 from typing import List, Tuple, Optional
 from pathlib import Path
-from neuralmagic.tools.call_cmd import call_cmd
+from ...tools.call_cmd import call_cmd
 
 
 @dataclass