diff --git a/tests/python/contrib/test_hexagon/benchmark_hexagon.py b/tests/python/contrib/test_hexagon/benchmark_hexagon.py index 979bd111707b..e32a79949606 100644 --- a/tests/python/contrib/test_hexagon/benchmark_hexagon.py +++ b/tests/python/contrib/test_hexagon/benchmark_hexagon.py @@ -23,11 +23,11 @@ import numpy as np import logging import tempfile -import csv import tvm.testing from tvm import te from tvm.contrib.hexagon.build import HexagonLauncherRPC +from .benchmarks_table import benchmarks_table RPC_SERVER_PORT = 7070 @@ -58,98 +58,14 @@ def test_elemwise_add(hexagon_launcher: HexagonLauncherRPC): print("-" * 80) print() - # TODO: We should move this into a separate test fixture, to make it easier to write - # additional benchmarking functions. We'd just need to generalize the assumptions regarding - # the particular fields being tracked as independent variables. - class benchmark_results_collection: - def __init__(self): - self.row_dicts_ = [] - - def num_failures(self): - num = 0 - for d in self.row_dicts_: - if d["status"] == "FAIL": - num += 1 - return num - - def num_skips(self): - num = 0 - for d in self.row_dicts_: - if d["status"] == "SKIP": - num += 1 - return num - - def record_success( - self, dtype, sched_type, mem_scope, num_vecs_per_tensor, benchmark_result - ): - median_usec = benchmark_result.median * 1000000 - min_usec = benchmark_result.min * 1000000 - max_usec = benchmark_result.max * 1000000 - - self.row_dicts_.append( - { - "dtype": dtype, - "sched_type": sched_type, - "mem_scope": mem_scope, - "num_vecs_per_tensor": num_vecs_per_tensor, - "status": "OK", - "median(µsec)": f"{median_usec:.3}", - "min(µsec)": f"{min_usec:.3}", - "max(µsec)": f"{max_usec:.3}", - } - ) - - def record_failure(self, dtype, sched_type, mem_scope, num_vecs_per_tensor, error_text): - self.row_dicts_.append( - { - "dtype": dtype, - "sched_type": sched_type, - "mem_scope": mem_scope, - "num_vecs_per_tensor": num_vecs_per_tensor, - "status": "FAIL", - "comment": error_text, - } - ) - - def record_skip(self, dtype, sched_type, mem_scope, num_vecs_per_tensor, comment_text): - self.row_dicts_.append( - { - "dtype": dtype, - "sched_type": sched_type, - "mem_scope": mem_scope, - "num_vecs_per_tensor": num_vecs_per_tensor, - "status": "SKIP", - "comment": comment_text, - } - ) - - def dump(self, f): - csv.register_dialect( - "benchmarks", - delimiter="\t", - quotechar='"', - quoting=csv.QUOTE_MINIMAL, - ) - - fieldnames = [ - "dtype", - "sched_type", - "mem_scope", - "num_vecs_per_tensor", - "status", - "median(µsec)", - "min(µsec)", - "max(µsec)", - "comment", - ] - - writer = csv.DictWriter(f, fieldnames, dialect="benchmarks", restval="") + br = benchmarks_table({ + 'dtype':'dtype', + 'sched_type':'sched_type', + 'mem_scope':'mem_scope', + 'num_vectors_per_tensor':'# 2KB vectors per tensor', + 'comments':'comments', + }) - writer.writeheader() - for d in self.row_dicts_: - writer.writerow(d) - - br = benchmark_results_collection() # Create and benchmark a single primfunc. # If an unexpected problem occurs, raise an exception. Otherwise add a row of output to 'br'. @@ -159,11 +75,11 @@ def test_one_config(dtype, sched_type, mem_scope, num_vectors_per_tensor): if num_vectors_per_tensor == 2048 and mem_scope == "global.vtcm": br.record_skip( - dtype, - sched_type, - mem_scope, - num_vectors_per_tensor, - f"Expect to exceed VTCM budget.", + dtype=dtype, + sched_type=sched_type, + mem_scope=mem_scope, + num_vectors_per_tensor=num_vectors_per_tensor, + comments="Expect to exceed VTCM budget.", ) return @@ -262,18 +178,38 @@ def test_one_config(dtype, sched_type, mem_scope, num_vectors_per_tensor): tvm.testing.assert_allclose(host_numpy_C_data_expected, result) br.record_success( - dtype, sched_type, mem_scope, num_vectors_per_tensor, timing_result - ) + timing_result, + dtype=dtype, + sched_type=sched_type, + mem_scope=mem_scope, + num_vectors_per_tensor=num_vectors_per_tensor, + ) except Exception as err: f.write("ERROR:\n") f.write("{}\n".format(err)) br.record_failure( - dtype, sched_type, mem_scope, num_vectors_per_tensor, f"See {report_path}" - ) + dtype=dtype, + sched_type=sched_type, + mem_scope=mem_scope, + num_vectors_per_tensor=num_vectors_per_tensor, + comments=f"See {report_path}" + ) # ----------------------------------------------------------------------------------------------- + csv_column_order = [ + 'dtype', + 'sched_type', + 'mem_scope', + 'num_vectors_per_tensor', + 'row_status', + "timings_median_usecs", + "timings_min_usecs", + "timings_max_usecs", + 'comments', + ] + # Hexagon v69 allows more dtypes, but we're sticking with v68 for now. for dtype in [ "int8", @@ -300,7 +236,7 @@ def test_one_config(dtype, sched_type, mem_scope, num_vectors_per_tensor): test_one_config(dtype, sched_type, mem_scope, num_vectors_per_tensor) # Report our progress. - br.dump(sys.stdout) + br.print_csv(sys.stdout, csv_column_order) print("-" * 80) print(f"OUTPUT DIRECTORY: {host_output_dir}") @@ -309,8 +245,8 @@ def test_one_config(dtype, sched_type, mem_scope, num_vectors_per_tensor): tabular_output_filename = os.path.join(host_output_dir, "benchmark-results.csv") with open(tabular_output_filename, "w") as csv_file: - br.dump(csv_file) + br.print_csv(csv_file, csv_column_order) print(f"BENCHMARK RESULTS FILE: {tabular_output_filename}") - if br.num_failures() > 0: + if br.has_fail() > 0: pytest.fail("At least one benchmark configuration failed", pytrace=False) diff --git a/tests/python/contrib/test_hexagon/benchmarks_table.py b/tests/python/contrib/test_hexagon/benchmarks_table.py new file mode 100644 index 000000000000..5ebebf2568f5 --- /dev/null +++ b/tests/python/contrib/test_hexagon/benchmarks_table.py @@ -0,0 +1,231 @@ + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import csv + +class benchmarks_table: + """ + Stores/reports the result of benchmark runs. + + Each line item has a status: success, fail, or skip. + + Each 'success' line item must include benchmark data, + in the form provided by TVM's `time_evaluator` mechanism. + + Each line item may also specify values for any subset of + the columns provided to the table's construstor. + """ + BUILTIN_COLUMN_NAMES_TO_DESCS = { + "row_status":"status", + "timings_median_usecs":"median(µsec)", + "timings_min_usecs":"min(µsec)", + "timings_max_usecs":"max(µsec)", + } + + class column_metadata_: + def __init__(self, name, is_reserved, header_text): + self.name = name + self.is_reserved = is_reserved + self.header_text = header_text + + class column_collection_metadata_: + def __init__(self): + self.by_name = {} + self.by_header_text = {} + + def add(self, cm): + if cm.name in self.by_name: + raise Exception(f"A column already exists with name '{cm.name}'") + + if cm.header_text in self.by_header_text: + raise Exception(f"A column already exists with header_text '{cm.header_text}'") + + self.by_name[ cm.name ] = cm + self.by_header_text[ cm.header_text ] = cm + + def get_column_names(self): + return set(self.by_name.keys()) + + def get_unreserved_column_names(self): + return set([ k for k,v in self.by_name.items() if not v.is_reserved]) + + def get_reserved_column_names(self): + return set([ k for k,v in self.by_name.items() if v.is_reserved]) + + def get_ordered_by_name_sequence(self, name_sequence): + """ + Returns a list of column_metadata objects, ordered according to + `name_sequence`. + """ + return_list = [] + for column_name in name_sequence: + assert column_name in self.by_name + return_list.append(self.by_name[column_name]) + return return_list + + def convert_dict_key_from_column_name_to_header_text(self, d_in): + """ + `d_in` : A dictionary whose keys are a subset of those in `self.by_name` + + Returns a new dictionary whose keys have been replaced with the + corresponding `header_text`. + + Useful for things like csv.DictWriter. + """ + d_out = {} + + for k_in,v in d_in.items(): + k_out = self.by_name[k_in].header_text + d_out[ k_out ] = v + + return d_out + + def __init__(self, user_column_defns): + """ + `user_column_defns` : A dictionary of the form + (column_name : column_description). + + The combination of this dictionary and the + BUILTIN_COLUMN_NAMES_TO_DESCS dictionary defines the set + of columns in that the benchmark table supports. + + In the combined dictionary, no two columns can have + the same name or the same description. + """ + self.all_cols_metadata_ = self.column_collection_metadata_() + + for col_name, col_header_text in self.BUILTIN_COLUMN_NAMES_TO_DESCS.items(): + self.all_cols_metadata_.add(self.column_metadata_(col_name, True, col_header_text)) + + for col_name, col_header_text in user_column_defns.items(): + self.all_cols_metadata_.add(self.column_metadata_(col_name, False, col_header_text)) + + self.line_items_ = [] + + def validate_user_supplied_kwargs(self, kwarg_dict): + provided_column_names = set(kwarg_dict.keys()) + defined_column_names = self.all_cols_metadata_.get_column_names() + reserved_column_names = self.all_cols_metadata_.get_reserved_column_names() + + reserved_names_used = provided_column_names.intersection(reserved_column_names) + undefined_names_used = provided_column_names - defined_column_names + + if len(reserved_names_used) > 0: + name_list = ', '.join(reserved_names_used) + raise Exception(f'Cannot supply a value for reserved column names: {reserved_names_used}') + + if len(undefined_names_used) > 0: + name_list = ', '.join(undefined_names_used) + raise Exception(f'Cannot supply a value for undefined column names: {undefined_names_used}') + + def record_success(self, timings, **kwargs): + """ + `timings` : Assumed to have the structure and syntax of + the timing results provided by TVM's `time_evaluator` + mechanism. + + `kwargs` : Optional values for any of the other columns + defined for this benchmark table. + """ + self.validate_user_supplied_kwargs(kwargs) + + line_item = dict(kwargs) + + line_item['row_status'] = 'SUCCESS' + line_item['timings_min_usecs'] = timings.min * 1000000 + line_item['timings_max_usecs'] = timings.max * 1000000 + line_item['timings_median_usecs'] = timings.median * 1000000 + + self.line_items_.append(line_item) + + def record_skip(self, **kwargs): + self.validate_user_supplied_kwargs(kwargs) + + line_item = dict(kwargs) + line_item['row_status'] = 'SKIP' + self.line_items_.append(line_item) + + def record_fail(self, **kwargs): + self.validate_user_supplied_kwargs(kwargs) + + line_item = dict(kwargs) + line_item['row_status'] = 'FAIL' + self.line_items_.append(line_item) + + def has_fail(self): + """ + Returns True if the table contains at least one 'file' line item, + otherwise returns False. + """ + for li in self.line_items_: + if li['row_status'] == 'FAIL': + return True + + return False + + def print_csv(self, f, column_name_order, timing_decimal_places=3): + """ + Print the benchmark results as a csv. + + `f` : The output stream. + + `column_name_order`: an iterable sequence of column names, indicating the + order of column in the CSV output. + Each string must be one of the column names provided by + BUILTIN_COLUMN_NAMES_TO_DESCS or provided to the class constructor. + + The CSV output will contain only those columns that are mentioned in + this list. + + `timing_decimal_places`: for the numeric timing values, this is the + number of decimal places to provide in the printed output. + For example, a value of 3 is equivalent to the Python formatting string + `'{:.3f}'` + """ + csv.register_dialect( + "benchmarks", + delimiter="\t", + quotechar='"', + quoting=csv.QUOTE_MINIMAL, + ) + + output_order_cm_list = self.all_cols_metadata_.get_ordered_by_name_sequence(column_name_order) + + output_order_header_texts = [ cm.header_text for cm in output_order_cm_list ] + + writer = csv.DictWriter(f, output_order_header_texts, dialect="benchmarks", restval="") + + writer.writeheader() + for line_item_dict in self.line_items_: + for k in [ + "timings_median_usecs", + "timings_min_usecs", + "timings_max_usecs", + ]: + if k in line_item_dict: + old_value = line_item_dict[k] + if isinstance(old_value, float): + str_value = f"{old_value:>0.{timing_decimal_places}f}" + line_item_dict[k] = str_value + + # self.line_items_ is a list of dictionaries, where each dictionary is indexed + # by column *name*. DictWriter requires dictionaries that are indexed by *header text*. + csv_line_item_dict = \ + self.all_cols_metadata_.convert_dict_key_from_column_name_to_header_text(line_item_dict) + + writer.writerow(csv_line_item_dict)