added requirement

microsoft · Jan 19, 2024 · 682e904 · 682e904
1 parent 71447a7
commit 682e904
Show file tree

Hide file tree

Showing 9 changed files with 23 additions and 16 deletions.
diff --git a/benchmarks/inference/mii/requirements.txt b/benchmarks/inference/mii/requirements.txt
@@ -0,0 +1,5 @@
+transformers
+matplotlib
+deepspeed-mii>=0.2.0
+vllm>=0.2.7
+numpy
diff --git a/benchmarks/inference/mii/src/client.py b/benchmarks/inference/mii/src/client.py
@@ -17,10 +17,10 @@
 import numpy as np
 from transformers import AutoTokenizer
 
-from postprocess_results import ResponseDetails
-from random_query_generator import RandomQueryGenerator
-from sample_input import all_text
-from utils import parse_args, print_summary, get_args_product, CLIENT_PARAMS
+from .postprocess_results import ResponseDetails
+from .random_query_generator import RandomQueryGenerator
+from .sample_input import all_text
+from .utils import parse_args, print_summary, get_args_product, CLIENT_PARAMS
 
 
 def call_mii(client, input_tokens, max_new_tokens, stream):

diff --git a/benchmarks/inference/mii/src/plot_effective_throughput.py b/benchmarks/inference/mii/src/plot_effective_throughput.py
@@ -10,7 +10,7 @@
 import numpy as np
 import pandas as pd
 
-from postprocess_results import read_json, get_tokenizer
+from .postprocess_results import read_json, get_tokenizer
 
 RAGGED_BATCH_SIZE = 768
 SLA_PROMPT_TOKENS_PER_SEC = 512

diff --git a/benchmarks/inference/mii/src/plot_latency_percentile.py b/benchmarks/inference/mii/src/plot_latency_percentile.py
@@ -10,7 +10,7 @@
 import numpy as np
 import itertools
 
-from postprocess_results import read_json, get_token_latency
+from .postprocess_results import read_json, get_token_latency
 
 bs = 768
 SKIP_HEAD_TOKEN_NUM = 2

diff --git a/benchmarks/inference/mii/src/plot_repl_scale.py b/benchmarks/inference/mii/src/plot_repl_scale.py
@@ -9,7 +9,7 @@
 from pathlib import Path
 import numpy as np
 
-from postprocess_results import read_json, get_summary
+from .postprocess_results import read_json, get_summary
 
 bs = 768
 

diff --git a/benchmarks/inference/mii/src/plot_th_lat.py b/benchmarks/inference/mii/src/plot_th_lat.py
@@ -3,14 +3,16 @@
 
 # DeepSpeed Team
 
-import glob
-import matplotlib.pyplot as plt
 import argparse
-from pathlib import Path
-import numpy as np
-from postprocess_results import read_json, get_summary
+import glob
 import os
 import re
+from pathlib import Path
+
+import matplotlib.pyplot as plt
+import numpy as np
+
+from .postprocess_results import read_json, get_summary
 
 
 def get_args():

diff --git a/benchmarks/inference/mii/src/plot_tp_sizes.py b/benchmarks/inference/mii/src/plot_tp_sizes.py
@@ -9,7 +9,7 @@
 from pathlib import Path
 import numpy as np
 
-from postprocess_results import read_json, get_summary
+from .postprocess_results import read_json, get_summary
 
 bs = 768
 

diff --git a/benchmarks/inference/mii/src/server.py b/benchmarks/inference/mii/src/server.py
@@ -10,7 +10,7 @@
 from deepspeed.inference import RaggedInferenceEngineConfig, DeepSpeedTPConfig
 from deepspeed.inference.v2.ragged import DSStateManagerConfig
 
-from utils import parse_args, SERVER_PARAMS
+from .utils import parse_args, SERVER_PARAMS
 
 
 def start_server(args):

diff --git a/benchmarks/inference/mii/src/utils.py b/benchmarks/inference/mii/src/utils.py
@@ -14,8 +14,8 @@
 from pathlib import Path
 from typing import Iterator, List
 
-from defaults import ARG_DEFAULTS, MODEL_DEFAULTS
-from postprocess_results import get_summary, ResponseDetails
+from .defaults import ARG_DEFAULTS, MODEL_DEFAULTS
+from .postprocess_results import get_summary, ResponseDetails
 
 # For these arguments, users can provide multiple values when running the
 # benchmark. The benchmark will iterate over all possible combinations.