diff --git a/.github/workflows/llm_performance_tests.yml b/.github/workflows/llm_performance_tests.yml index 1db4e1259bf..b8ef55bef67 100644 --- a/.github/workflows/llm_performance_tests.yml +++ b/.github/workflows/llm_performance_tests.yml @@ -164,6 +164,35 @@ jobs: done cd ../../../../test/benchmark python csv_to_html.py -f $CSV_SAVE_PATH/batch_size_2 + + - name: Merge and sort csv files of multiple batches and generate html + shell: bash + run: | + cd python/llm/test/benchmark + mkdir merged_temp + # go through all the files and go to merged_temp + cd ../../dev/benchmark/all-in-one/test_batch1 + for file in *.csv; do + if [[ $file != *test* ]]; then + cp "$file" ../../../../test/benchmark/merged_temp + fi + done + cd ../test_batch2 + for file in *.csv; do + if [[ $file != *test* ]]; then + cp "$file" ../../../../test/benchmark/merged_temp + fi + done + cd ../../../../test/benchmark + python merge_csv_batch.py -f ./merged_temp + cd merged_temp + find . -name "*batch*.csv" -delete + for file in *.csv; do + cp "$file" $CSV_SAVE_PATH/merged + done + cd .. + python csv_to_html.py -f $CSV_SAVE_PATH/merged + rm -r merged_temp - name: Check and upload results to ftp shell: bash diff --git a/python/llm/test/benchmark/merge_csv_batch.py b/python/llm/test/benchmark/merge_csv_batch.py new file mode 100644 index 00000000000..453f46c4bf5 --- /dev/null +++ b/python/llm/test/benchmark/merge_csv_batch.py @@ -0,0 +1,45 @@ +# +# Copyright 2016 The BigDL Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Python program to concat CSVs + +import os +import sys +import argparse +import pandas as pd + +def main(): + parser = argparse.ArgumentParser(description="concat .csv files") + parser.add_argument("-f", "--folder_path", type=str, dest="folder_path", + help="The directory which stores the .csv files", default="./") + args = parser.parse_args() + + csv_files = [] + for file_name in os.listdir(args.folder_path): + file_path = os.path.join(args.folder_path, file_name) + if os.path.isfile(file_path) and file_name.endswith(".csv"): + csv_files.append(file_path) + csv_files.sort() + + merged_df = pd.concat([pd.read_csv(file, index_col=0) for file in csv_files], ignore_index=True) + merged_df["input_len"] = merged_df["input/output tokens"].apply(lambda x: int(x.split("-")[0])) + merged_df = merged_df.sort_values(by=["model", "input_len", "batch_size"]) + merged_df.reset_index(drop=True, inplace=True) + merged_csv = csv_files[0].replace("_batch1", "").replace("_batch2", "").replace("_batch4", "") + merged_df.drop("input_len", axis=1).to_csv(merged_csv) + +if __name__ == "__main__": + sys.exit(main())