From 187f7a504a221fcc94fb461101efba4a093c62ec Mon Sep 17 00:00:00 2001 From: NorthmanPKU <72958002+NorthmanPKU@users.noreply.github.com> Date: Fri, 25 Oct 2024 22:14:53 -0400 Subject: [PATCH] Visualizer Ver1.0 (#113) * gitignore * init * [Draft]Complete basic structure, still met with type conversion problem * Update core.pyx * Visualizer python interface * support newly added optypes * Restore cutlass submodule to commit cc3c29a * Repair minor issues including replace magic type numbers, useless var in func and remove block_dim --------- Co-authored-by: Mengdi Wu <48128384+wmdi@users.noreply.github.com> Co-authored-by: Jianan Ji --- .gitignore | 7 + demo/reference_mugraphs/gated_mlp.py | 1 + demo/reference_mugraphs/lora.py | 3 +- include/mirage/kernel/customized.h | 1 + include/mirage/kernel/operator.h | 3 + include/mirage/threadblock/operator.h | 5 + python/mirage/_cython/CCore.pxd | 108 +++++- python/mirage/_cython/core.pyx | 425 ++++++++++++++++++++++- python/mirage/kernel.py | 7 + python/mirage/visualizer.py | 467 ++++++++++++++++++++++++++ src/kernel/customized.cc | 4 + src/kernel/operator.cc | 13 + src/threadblock/input_loader.cc | 2 + src/threadblock/operator.cc | 14 + src/threadblock/output.cc | 3 + 15 files changed, 1041 insertions(+), 22 deletions(-) create mode 100644 python/mirage/visualizer.py diff --git a/.gitignore b/.gitignore index fbf98f1..3edf0e1 100644 --- a/.gitignore +++ b/.gitignore @@ -38,3 +38,10 @@ dist/ # Cython /python/mirage/_cython/core.cpp + +# Mac OS .DS_Store +.DS_Store + +# Visualizer results +*.png +*.dot \ No newline at end of file diff --git a/demo/reference_mugraphs/gated_mlp.py b/demo/reference_mugraphs/gated_mlp.py index fc2cf7d..4c3a12b 100644 --- a/demo/reference_mugraphs/gated_mlp.py +++ b/demo/reference_mugraphs/gated_mlp.py @@ -60,3 +60,4 @@ def torch_gated_mlp(X, W1, W2): mean_syn = curr_time / 1000 #print(timings) print(mean_syn) + graph.visualize("gated_mlp") diff --git a/demo/reference_mugraphs/lora.py b/demo/reference_mugraphs/lora.py index ec20870..328dd11 100644 --- a/demo/reference_mugraphs/lora.py +++ b/demo/reference_mugraphs/lora.py @@ -2,6 +2,7 @@ import argparse import os import torch +from mirage import visualizer @torch.compile(backend="cudagraphs") def torch_lora(X, W, A, B): @@ -44,7 +45,7 @@ def optimize_lora(checkpoint): curr_time = starter.elapsed_time(ender) mean_syn = curr_time / 1000 print(mean_syn) - + graph.visualize("lora") if __name__ == "__main__": parser = argparse.ArgumentParser() diff --git a/include/mirage/kernel/customized.h b/include/mirage/kernel/customized.h index ca84a6d..34cbb39 100644 --- a/include/mirage/kernel/customized.h +++ b/include/mirage/kernel/customized.h @@ -39,6 +39,7 @@ class KNCustomizedOp : public mirage::kernel::KNOperator { public: mirage::threadblock::Graph bgraph; + void get_bgraph(mirage::threadblock::Graph** bgraph); }; } // namespace kernel diff --git a/include/mirage/kernel/operator.h b/include/mirage/kernel/operator.h index 129c34a..bfd3eae 100644 --- a/include/mirage/kernel/operator.h +++ b/include/mirage/kernel/operator.h @@ -38,6 +38,9 @@ class KNOperator { KNOperator(Graph *graph, mirage::type::KNOperatorType _type, std::vector const &inputs); + int get_input_dtensors(DTensor** inputs); + int get_output_dtensors(DTensor** inputs); + virtual ~KNOperator(); virtual bool profile(ProfileResult &result) = 0; virtual bool fingerprint(void) = 0; diff --git a/include/mirage/threadblock/operator.h b/include/mirage/threadblock/operator.h index 17c3ba7..d09c7a0 100644 --- a/include/mirage/threadblock/operator.h +++ b/include/mirage/threadblock/operator.h @@ -36,6 +36,9 @@ class TBOperator { TBOperator(Graph *graph, mirage::type::TBOperatorType, std::vector const &inputs); + int get_input_stensors(STensor** inputs); + int get_output_stensors(STensor** inputs); + virtual ~TBOperator(); virtual operator json() const = 0; @@ -57,6 +60,7 @@ class TBInputOp : public TBOperator { ~TBInputOp(); operator json() const override; + size_t get_dtensor_guid(); public: mirage::kernel::DTensor dtensor; @@ -74,6 +78,7 @@ class TBOutputOp : public TBOperator { ~TBOutputOp(); operator json() const override; + size_t get_dtensor_guid(); public: mirage::kernel::DTensor dtensor; diff --git a/python/mirage/_cython/CCore.pxd b/python/mirage/_cython/CCore.pxd index a6deccb..51f6b46 100644 --- a/python/mirage/_cython/CCore.pxd +++ b/python/mirage/_cython/CCore.pxd @@ -45,12 +45,70 @@ cdef extern from "mirage/type.h" namespace "mirage::type": TB_EPILOGUE_ALLREDUCE = 3101, TB_EPILOGUE_ALLTOALL = 3102, TB_EPILOGUE_INVALID = 3199, + cdef enum KNOperatorType: + KN_UNKOWN = 1000, + KN_INPUT_OP = 1001, + KN_OUTPUT_OP = 1002, + KN_MATMUL_OP = 1003, + # ElementUnary + KN_EXP_OP = 1100, + KN_SQUARE_OP = 1101, + KN_SQRT_OP = 1102, + KN_SILU_OP = 1103, + # ElementBinary + KN_ADD_OP = 1200, + KN_MUL_OP = 1201, + KN_DIV_OP = 1202, + # Reduction & Normalization + KN_REDUCTION_0_OP = 1300, + KN_REDUCTION_1_OP = 1301, + KN_REDUCTION_2_OP = 1302, + KN_RMS_NORM_OP = 1350, + # Communication + KN_ALLREDUCE_OP = 1400, + KN_CUSTOMIZED_OP = 1999, cdef enum TBOperatorType: + TB_UNKOWN = 2000, + TB_INPUT_OP = 2001, + TB_OUTPUT_OP = 2002, + TB_MATMUL_OP = 2003, + # ElementUnary + TB_EXP_OP = 2100, + TB_SQUARE_OP = 2101, + TB_SQRT_OP = 2102, + TB_SILU_OP = 2103, + TB_MUL_SCALAR_OP = 2104, + # ElementBinary + TB_ADD_OP = 2200, + TB_MUL_OP = 2201, + TB_DIV_OP = 2202, + # Reduction and Normalization + TB_REDUCTION_FIRST_OP_ID = 2300, + TB_REDUCTION_0_OP = 2301, + TB_REDUCTION_1_OP = 2302, + TB_REDUCTION_2_OP = 2303, + TB_REDUCTION_0_TO_DIMX_OP = 2304, + TB_REDUCTION_1_TO_DIMX_OP = 2305, + TB_REDUCTION_2_TO_DIMX_OP = 2306, + TB_REDUCTION_LAST_OP_ID = 2349, + TB_RMS_NORM_OP = 2350, + # Concat + TB_CONCAT_FIRST_OP_ID = 2400, + TB_CONCAT_0_OP = 2400, + TB_CONCAT_1_OP = 2401, + TB_CONCAT_2_OP = 2402, + TB_CONCAT_LAST_OP_ID = 2410, + TB_CONCAT_THEN_MATMUL_OP = 2411, + # Forloop Accum + # LD indicates last dimension + TB_FORLOOP_ACCUM_FIRST_OP = 2500, TB_FORLOOP_ACCUM_NO_RED_OP = 2500, TB_FORLOOP_ACCUM_RED_LD_SUM_OP = 2501, TB_FORLOOP_ACCUM_RED_LD_MEAN_OP = 2502, TB_FORLOOP_ACCUM_RED_LD_RMS_OP = 2503, TB_FORLOOP_ACCUM_REDTOX_LD_SUM_OP = 2504, + TB_FORLOOP_ACCUM_LAST_OP = 2599, + TB_CUSTOMIZED_OP = 2999 cdef extern from "mirage/layout.h" namespace "mirage::layout": # This must be consistent with mirage/layout.h @@ -63,10 +121,10 @@ cdef extern from "mirage/layout.h" namespace "mirage::layout": SmemColumnMajor = 201, SmemUnknownLayout = 299 -cdef extern from "mirage/kernel/graph.h" namespace "mirage::kernel": - cdef cppclass KNOperator: - pass - ctypedef struct CppDTensor "mirage::kernel::DTensor": +cdef cppclass CppTBGraph "mirage::threadblock::Graph" + +cdef extern from "mirage/kernel/device_tensor.h" namespace "mirage::kernel": + cdef struct CppDTensor "mirage::kernel::DTensor": DataType data_type DmemLayout layout int num_dims @@ -75,7 +133,19 @@ cdef extern from "mirage/kernel/graph.h" namespace "mirage::kernel": #KNOperator *owner_op #void *data_ptr int owner_ts_idx - pass + +cdef extern from "mirage/kernel/graph.h" namespace "mirage::kernel": + + cdef cppclass CppKNOperator "mirage::kernel::KNOperator": + KNOperatorType op_type + vector[CppDTensor] input_tensors + vector[CppDTensor] output_tensors + int get_input_dtensors(CppDTensor** cinputs) + int get_output_dtensors(CppDTensor** cinputs) + + cdef cppclass CppKNCustomizedOp "mirage::kernel::KNCustomizedOp"(CppKNOperator): + CppTBGraph bgraph + void get_bgraph(CppTBGraph** bgraph) cdef cppclass CppKNGraph "mirage::kernel::Graph": CppKNGraph() @@ -99,16 +169,33 @@ cdef extern from "mirage/kernel/graph.h" namespace "mirage::kernel": int get_input_dtensor_layout(const CppDTensor *input, int *strides) void generate_triton_program(const char *filepath) void generate_cuda_program(const char *filepath) + vector[CppKNOperator*] operators cdef extern from "mirage/threadblock/graph.h" namespace "mirage::threadblock": - cdef cppclass TBOperator: - pass ctypedef struct CppSTensor "mirage::threadblock::STensor": DataType data_type SmemLayout layout int num_dims int dim[4] - int owner_ts_id + int owner_ts_idx + size_t guid + + cdef cppclass CppTBOperator "mirage::threadblock::TBOperator": + TBOperatorType op_type + vector[CppSTensor] input_tensors + vector[CppSTensor] output_tensors + int get_input_stensors(CppSTensor** cinputs) + int get_output_stensors(CppSTensor** cinputs) + + cdef cppclass CppTBInputOp "mirage::threadblock::TBInputOp"(CppTBOperator): + int forloop_dim + int3 input_map + size_t get_dtensor_guid() + + cdef cppclass CppTBOutputOp "mirage::threadblock::TBOutputOp"(CppTBOperator): + int forloop_dim + int3 output_map + size_t get_dtensor_guid() cdef cppclass CppTBGraph "mirage::threadblock::Graph": CppTBGraph(dim3 grid_dim, @@ -142,6 +229,11 @@ cdef extern from "mirage/threadblock/graph.h" namespace "mirage::threadblock": int dim) CppSTensor* forloop_accum(const CppSTensor *A, TBOperatorType optype) + dim3 grid_dim + dim3 block_dim + int forloop_range + int reduction_dimx + vector[CppTBOperator*] operators cdef extern from "mirage/search/search_c.h" namespace "mirage::search_c": ctypedef struct MInt3: diff --git a/python/mirage/_cython/core.pyx b/python/mirage/_cython/core.pyx index 3e362a4..f1f7ade 100644 --- a/python/mirage/_cython/core.pyx +++ b/python/mirage/_cython/core.pyx @@ -101,6 +101,118 @@ bfloat16 = dtype('bf16') float32 = dtype('fp32') float64 = dtype('fp64') +def get_kn_operator_type_string(int op_type): + if op_type == KN_UNKOWN: + return "kn_unknown" + elif op_type == KN_INPUT_OP: + return "kn_input_op" + elif op_type == KN_OUTPUT_OP: + return "kn_output_op" + elif op_type == KN_MATMUL_OP: + return "kn_matmul_op" + elif op_type == KN_EXP_OP: + return "kn_exp_op" + elif op_type == KN_SQUARE_OP: + return "kn_square_op" + elif op_type == KN_SQRT_OP: + return "kn_sqrt_op" + elif op_type == KN_SILU_OP: + return "kn_silu_op" + elif op_type == KN_ADD_OP: + return "kn_add_op" + elif op_type == KN_MUL_OP: + return "kn_mul_op" + elif op_type == KN_DIV_OP: + return "kn_div_op" + elif op_type == KN_REDUCTION_0_OP: + return "kn_reduction_0_op" + elif op_type == KN_REDUCTION_1_OP: + return "kn_reduction_1_op" + elif op_type == KN_REDUCTION_2_OP: + return "kn_reduction_2_op" + elif op_type == KN_RMS_NORM_OP: + return "kn_rms_norm_op" + elif op_type == KN_ALLREDUCE_OP: + return "kn_allreduce_op" + elif op_type == KN_CUSTOMIZED_OP: + return "kn_customized_op" + else: + return "unknown_op_type" + str(op_type) + + +def get_tb_operator_type_string(int op_type): + if op_type == TB_UNKOWN: + return "tb_unknown" + elif op_type == TB_INPUT_OP: + return "tb_input_op" + elif op_type == TB_OUTPUT_OP: + return "tb_output_op" + elif op_type == TB_MATMUL_OP: + return "tb_matmul_op" + elif op_type == TB_EXP_OP: + return "tb_exp_op" + elif op_type == TB_SQUARE_OP: + return "tb_square_op" + elif op_type == TB_SQRT_OP: + return "tb_sqrt_op" + elif op_type == TB_SILU_OP: + return "tb_silu_op" + elif op_type == TB_MUL_SCALAR_OP: + return "tb_mul_scalar_op" + elif op_type == TB_ADD_OP: + return "tb_add_op" + elif op_type == TB_MUL_OP: + return "tb_mul_op" + elif op_type == TB_DIV_OP: + return "tb_div_op" + elif op_type == TB_REDUCTION_FIRST_OP_ID: + return "tb_reduction_first_op_id" + elif op_type == TB_REDUCTION_0_OP: + return "tb_reduction_0_op" + elif op_type == TB_REDUCTION_1_OP: + return "tb_reduction_1_op" + elif op_type == TB_REDUCTION_2_OP: + return "tb_reduction_2_op" + elif op_type == TB_REDUCTION_0_TO_DIMX_OP: + return "tb_reduction_0_to_dimx_op" + elif op_type == TB_REDUCTION_1_TO_DIMX_OP: + return "tb_reduction_1_to_dimx_op" + elif op_type == TB_REDUCTION_2_TO_DIMX_OP: + return "tb_reduction_2_to_dimx_op" + elif op_type == TB_REDUCTION_LAST_OP_ID: + return "tb_reduction_last_op_id" + elif op_type == TB_RMS_NORM_OP: + return "tb_rms_norm_op" + elif op_type == TB_CONCAT_FIRST_OP_ID: + return "tb_concat_first_op_id" + elif op_type == TB_CONCAT_0_OP: + return "tb_concat_0_op" + elif op_type == TB_CONCAT_1_OP: + return "tb_concat_1_op" + elif op_type == TB_CONCAT_2_OP: + return "tb_concat_2_op" + elif op_type == TB_CONCAT_LAST_OP_ID: + return "tb_concat_last_op_id" + elif op_type == TB_CONCAT_THEN_MATMUL_OP: + return "tb_concat_then_matmul_op" + elif op_type == TB_FORLOOP_ACCUM_FIRST_OP: + return "tb_forloop_accum_first_op" + elif op_type == TB_FORLOOP_ACCUM_NO_RED_OP: + return "tb_forloop_accum_no_red_op" + elif op_type == TB_FORLOOP_ACCUM_RED_LD_SUM_OP: + return "tb_forloop_accum_red_ld_sum_op" + elif op_type == TB_FORLOOP_ACCUM_RED_LD_MEAN_OP: + return "tb_forloop_accum_red_ld_mean_op" + elif op_type == TB_FORLOOP_ACCUM_RED_LD_RMS_OP: + return "tb_forloop_accum_red_ld_rms_op" + elif op_type == TB_FORLOOP_ACCUM_REDTOX_LD_SUM_OP: + return "tb_forloop_accum_redtox_ld_sum_op" + elif op_type == TB_CUSTOMIZED_OP: + return "tb_customized_op" + else: + return "unknown_op_type" + str(op_type) + + def convert_dtype_to_ctype(type : dtype): if type.is_int8(): return DT_INT8 @@ -168,6 +280,13 @@ cdef class DTensor: ptr = ctypes.cast(tensor, ctypes.c_void_p).value self.c_ptr = (ptr) + property guid: + def __get__(self): + if self.c_ptr == NULL: + return None + else: + return self.c_ptr.guid + property tensor: def __get__(self): if self.c_ptr == NULL: @@ -181,6 +300,7 @@ cdef class DTensor: property num_dims: def __get__(self): if self.c_ptr == NULL: + print("Error: tensor is None in num_dims property") return None else: return self.c_ptr.num_dims @@ -212,7 +332,12 @@ cdef class STensor: else: ptr = ctypes.cast(tensor, ctypes.c_void_p).value self.c_ptr = (ptr) - + property guid: + def __get__(self): + if self.c_ptr == NULL: + return None + else: + return self.c_ptr.guid property tensor: def __get__(self): if self.c_ptr == NULL: @@ -240,13 +365,181 @@ cdef class STensor: def __cinit__(self, tensor): self._set_tensor(tensor) - def dim(self, int idx): + def dim(self, int idx): if (idx < self.c_ptr.num_dims): return self.c_ptr.dim[idx] else: assert False , "Error: index out of range" return None +cdef class CyKNOperator: + cdef CppKNOperator* c_ptr # Hold a CppKNOperator instance + + cdef inline _set_operator(self, op): + cdef unsigned long long ptr + if op is None: + self.c_ptr = (NULL) + else: + ptr = ctypes.cast(op, ctypes.c_void_p).value + self.c_ptr = (ptr) + + def get_input_dtensors(self): + cdef CppDTensor* cinputs[1024] + num = self.c_ptr.get_input_dtensors(cinputs) + inputs = list() + for i in range(num): + ptr = ctypes.cast(cinputs[i], ctypes.c_void_p) + inputs.append(DTensor(ptr)) + return inputs + + def get_output_dtensors(self): + cdef CppDTensor* coutputs[1024] + num = self.c_ptr.get_output_dtensors(coutputs) + outputs = list() + for i in range(num): + ptr = ctypes.cast(coutputs[i], ctypes.c_void_p) + outputs.append(DTensor(ptr)) + return outputs + + property op_type: + def __get__(self): + if self.c_ptr == NULL: + return None + else: + return get_kn_operator_type_string(int(self.c_ptr.op_type)) + + def __cinit__(self, op): + self._set_operator(op) + +cdef class CyKNCustomizedOp(CyKNOperator): + cdef CppKNCustomizedOp* c_customized_ptr + + def __cinit__(self, op): + cdef unsigned long long ptr + if op is None: + self.c_customized_ptr = (NULL) + else: + ptr = ctypes.cast(op, ctypes.c_void_p).value + self.c_customized_ptr = (ptr) + + def get_bgraph(self): + cdef CppTBGraph* bgraph + self.c_customized_ptr.get_bgraph(&bgraph) + + ptr = ctypes.cast(bgraph, ctypes.c_void_p) + cybgraph = CyTBGraph(bgraph = ptr) + return cybgraph + +cdef class CyTBOperator: + cdef CppTBOperator* c_ptr # Hold a CppTBOperator instance + + cdef inline _set_operator(self, op): + cdef unsigned long long ptr + if op is None: + self.c_ptr = (NULL) + else: + ptr = ctypes.cast(op, ctypes.c_void_p).value + self.c_ptr = (ptr) + + def get_input_stensors(self): + cdef CppSTensor* cinputs[1024] + num = self.c_ptr.get_input_stensors(cinputs) + inputs = list() + for i in range(num): + ptr = ctypes.cast(cinputs[i], ctypes.c_void_p) + inputs.append(STensor(ptr)) + return inputs + + def get_output_stensors(self): + cdef CppSTensor* coutputs[1024] + num = self.c_ptr.get_output_stensors(coutputs) + outputs = list() + for i in range(num): + ptr = ctypes.cast(coutputs[i], ctypes.c_void_p) + outputs.append(STensor(ptr)) + return outputs + + property op_type: + def __get__(self): + if self.c_ptr == NULL: + return None + else: + return get_tb_operator_type_string(int(self.c_ptr.op_type)) + + def __cinit__(self, op): + self._set_operator(op) + +cdef class CyTBInputOp(CyTBOperator): + cdef CppTBInputOp* c_input_ptr + + def __cinit__(self, op): + cdef unsigned long long ptr + if op is None: + self.c_input_ptr = (NULL) + else: + ptr = ctypes.cast(op, ctypes.c_void_p).value + self.c_input_ptr = (ptr) + + property input_map: + def __get__(self): + if self.c_input_ptr == NULL: + return None + else: + return { + "x": self.c_input_ptr.input_map.x, + "y": self.c_input_ptr.input_map.y, + "z": self.c_input_ptr.input_map.z + } + + property forloop_dim: + def __get__(self): + if self.c_input_ptr == NULL: + return None + else: + return self.c_input_ptr.forloop_dim + + property dtensor_guid: + def __get__(self): + if self.c_input_ptr == NULL: + return None + else: + return self.c_input_ptr.get_dtensor_guid() + +cdef class CyTBOutputOp(CyTBOperator): + cdef CppTBOutputOp* c_output_ptr + + def __cinit__(self, op): + cdef unsigned long long ptr + if op is None: + self.c_output_ptr = (NULL) + else: + ptr = ctypes.cast(op, ctypes.c_void_p).value + self.c_output_ptr = (ptr) + + property output_map: + def __get__(self): + if self.c_output_ptr == NULL: + return None + else: + return { + "x": self.c_output_ptr.output_map.x, + "y": self.c_output_ptr.output_map.y, + "z": self.c_output_ptr.output_map.z + } + + property forloop_dim: + def __get__(self): + if self.c_output_ptr == NULL: + return None + else: + return self.c_output_ptr.forloop_dim + + property dtensor_guid: + def __get__(self): + if self.c_output_ptr == NULL: + return None + else: + return self.c_output_ptr.get_dtensor_guid() cdef class CyKNGraph: cdef CppKNGraph *p_kgraph #Hold a CppKNGraph instance @@ -328,7 +621,7 @@ cdef class CyKNGraph: t = ctypes.cast(ptr, ctypes.c_void_p) return DTensor(t) - def customized(self, list[DTensor] inputs, CyTBGraph bgraph): + def customized(self, list inputs, CyTBGraph bgraph): cdef vector[const CppDTensor*] cinputs cinputs.resize(len(inputs)) cdef DTensor t @@ -359,6 +652,77 @@ cdef class CyKNGraph: ptr = ctypes.cast(cinputs[i], ctypes.c_void_p) inputs.append(DTensor(ptr)) return inputs + + # visualizer utils + + def _kn_tensor_to_dict(self, DTensor t): + return { + "num_dims": t.num_dims, + "dim": [t.dim(i) for i in range(t.num_dims)], + "guid": t.guid + } + + def _tb_tensor_to_dict(self, STensor t): + return { + "num_dims": t.num_dims, + "dim": [t.dim(i) for i in range(t.num_dims)], + "guid": t.guid + } + + def _get_tb_operator_info(self, CyTBOperator op): + ans = { + "op_type": op.op_type, + "input_tensors": [self._tb_tensor_to_dict(t) for t in op.get_input_stensors()], + "output_tensors": [self._tb_tensor_to_dict(t) for t in op.get_output_stensors()], + } + if "input" in op.op_type: + input_op = CyTBInputOp(ctypes.cast((op.c_ptr), ctypes.c_void_p)) + ans["input_map"] = input_op.input_map + ans["forloop_dim"] = input_op.forloop_dim + ans["dtensor"] = { + "guid": input_op.dtensor_guid + } + elif "output" in op.op_type: + output_op = CyTBOutputOp(ctypes.cast((op.c_ptr), ctypes.c_void_p)) + ans["output_map"] = output_op.output_map + ans["forloop_dim"] = output_op.forloop_dim + ans["dtensor"] = { + "guid": output_op.dtensor_guid + } + return ans + + def _get_bgraph_info(self, CyKNOperator op): + cop = CyKNCustomizedOp(ctypes.cast((op.c_ptr), ctypes.c_void_p)) + bgraph = cop.get_bgraph() + return { + "grid_dim": bgraph.grid_dim, + "forloop_range": bgraph.forloop_range, + "operators": [self._get_tb_operator_info(i) for i in bgraph.operators] + } + + def _get_kn_operator_info(self, CyKNOperator op): + if op.op_type == "kn_customized_op": + return { + "op_type": op.op_type, + "input_tensors": [self._kn_tensor_to_dict(t) for t in op.get_input_dtensors()], + "output_tensors": [self._kn_tensor_to_dict(t) for t in op.get_output_dtensors()], + "bgraph": self._get_bgraph_info(op) + } + else: + return { + "op_type": op.op_type, + "input_tensors": [self._kn_tensor_to_dict(t) for t in op.get_input_dtensors()], + "output_tensors": [self._kn_tensor_to_dict(t) for t in op.get_output_dtensors()], + } + + def get_graph_structure(self): + operators = [] + ops = self.p_kgraph.operators + for i in range(ops.size()): + op = CyKNOperator(None) + op.c_ptr = ops[i] + operators.append(self._get_kn_operator_info(op)) + return operators def get_input_dtensor_layout(self, DTensor A): cdef int cstrides[128] @@ -371,18 +735,31 @@ cdef class CyKNGraph: cdef class CyTBGraph: cdef CppTBGraph *p_bgraph #Hold a CppTBGraph instance - def __cinit__(self, tuple grid_dim, tuple block_dim, int forloop_range, int dimx): - assert len(grid_dim) == 3, "grid_dim must include 3 dimensions" - assert len(block_dim) == 3, "block_dim must include 3 dimensions" + def __cinit__(self, tuple grid_dim = (), tuple block_dim = (), int forloop_range = -1, int dimx = -1, bgraph = None): + cdef unsigned long long ptr cdef dim3 c_grid_dim - c_grid_dim.x = grid_dim[0] - c_grid_dim.y = grid_dim[1] - c_grid_dim.z = grid_dim[2] cdef dim3 c_block_dim - c_block_dim.x = block_dim[0] - c_block_dim.y = block_dim[1] - c_block_dim.z = block_dim[2] - self.p_bgraph = new CppTBGraph(c_grid_dim, c_block_dim, forloop_range, dimx) + if bgraph is None: + if len(grid_dim) == 0 or len(block_dim) == 0 or forloop_range == -1 or dimx == -1: + assert False, "grid_dim, block_dim, forloop_range, dimx must be provided" + assert len(grid_dim) == 3, "grid_dim must include 3 dimensions" + assert len(block_dim) == 3, "block_dim must include 3 dimensions" + c_grid_dim.x = grid_dim[0] + c_grid_dim.y = grid_dim[1] + c_grid_dim.z = grid_dim[2] + c_block_dim.x = block_dim[0] + c_block_dim.y = block_dim[1] + c_block_dim.z = block_dim[2] + self.p_bgraph = new CppTBGraph(c_grid_dim, c_block_dim, forloop_range, dimx) + else: + ptr = ctypes.cast(bgraph, ctypes.c_void_p).value + if isinstance(bgraph, int): + self.p_bgraph = (ptr) + elif isinstance(bgraph, ctypes.c_void_p): + self.p_bgraph = (ptr) + else: + assert False, "bgraph must be an integer or ctypes.c_void_p, but got " + str(type(bgraph)) + def new_input(self, DTensor dtensor, tuple input_map, int forloop_dim): assert len(input_map) == 3, "input_map must be of length 3" @@ -464,6 +841,28 @@ cdef class CyTBGraph: t = ctypes.cast(ptr, ctypes.c_void_p) return STensor(t) + property grid_dim: + def __get__(self): + return { + "x": self.p_bgraph.grid_dim.x, + "y": self.p_bgraph.grid_dim.y, + "z": self.p_bgraph.grid_dim.z + } + + property forloop_range: + def __get__(self): + return self.p_bgraph.forloop_range + + property operators: + def __get__(self): + cdef vector[CppTBOperator*] coperators + coperators = self.p_bgraph.operators + operators = list() + for i in range(coperators.size()): + ptr = ctypes.cast(coperators[i], ctypes.c_void_p) + operators.append(CyTBOperator(ptr)) + return operators + def search(CyKNGraph input_graph, *, int max_num_new_graphs = 1024, list imaps = None, list omaps = None, list griddims = None, list blockdims = None, list fmaps = None, list franges = None, str previous_checkpoint = None, bool verbose, str default_config = None): # set cimaps cdef vector[MInt3] cimaps diff --git a/python/mirage/kernel.py b/python/mirage/kernel.py index 282280a..eb27d39 100644 --- a/python/mirage/kernel.py +++ b/python/mirage/kernel.py @@ -10,6 +10,7 @@ from .core import * from .threadblock import * +from .visualizer import * from .utils import * HARD_CODE = """ @@ -125,6 +126,7 @@ def __init__(self, graph): self.run = None self._valid_cuda_kernels = False self._cached_results = None + self.visualizer = None def new_input( self, dims: tuple, strides: tuple = None, dtype: dtype = float16 @@ -404,3 +406,8 @@ def superoptimize( best_graph, best_perf = g, perf return best_graph + + def visualize(self, file_name): + operators = self.cygraph.get_graph_structure() + self.visualizer = visualizer(file_name) + self.visualizer.draw_graphs(operators) diff --git a/python/mirage/visualizer.py b/python/mirage/visualizer.py new file mode 100644 index 0000000..051938c --- /dev/null +++ b/python/mirage/visualizer.py @@ -0,0 +1,467 @@ +import json +import graphviz as gv + +colors_map = { + "kernel": { + "node": "#70a148", + "bg": "#e0edd5", + "edge": "#527536", + "edge_label": "black", + "io": "#527536", + }, + "block": { + "node": "#5a8fcb", + "bg": "#dbe8f5", + "edge": "#4273b1", + "edge_label": "black", + "io": "#527536", + }, + "thread": { + "node": "#f5c342", + "bg": "#fdf2d0", + "edge": "#b89230", + "edge_label": "black", + "io": "#4273b1", # 输入输出节点和边的颜色 + } +} + +node_font_size = "30" +edge_font_size = "30" +tensor_node_font_size = "23" +graph_label_font_size = "32" + +op_nodelabel_mapping = { + "kn_input_op": "Input", + "kn_output_op": "Output", + "kn_customized_op": "Customized\nOp ", + "kn_mul_op": "Multiply", + "tb_input_op": "Input", + "tb_output_op": "Output", + "tb_mul_op": "Multiply", + "tb_matmul_op": "MatMul", + "tb_accum_nored_op": "AccumNoRed", + "kn_unkown": "Unknown", + "kn_matmul_op": "MatMul", + "kn_reduction_0_op": "Reduction 0", + "kn_reduction_1_op": "Reduction 1", + "kn_reduction_2_op": "Reduction 2", + "kn_exp_op": "Exp", + "kn_square_op": "Square", + "kn_sqrt_op": "Sqrt", + "kn_silu_op": "SiLU", + "kn_add_op": "Add", + "kn_div_op": "Div", + "kn_allreduce_op": "AllReduce", + "tb_unkown": "Unknown", + "tb_reduction_0_op": "Reduction 0", + "tb_reduction_1_op": "Reduction 1", + "tb_reduction_2_op": "Reduction 2", + "tb_exp_op": "Exp", + "tb_square_op": "Square", + "tb_sqrt_op": "Sqrt", + "tb_silu_op": "SiLU", + "tb_mul_scalar_op": "Multiply Scalar", + "tb_add_op": "Add", + "tb_div_op": "Div", + "tb_reduction_0_to_dimx_op": "Reduction 0\nto DimX", + "tb_reduction_1_to_dimx_op": "Reduction 1\nto DimX", + "tb_reduction_2_to_dimx_op": "Reduction 2\nto DimX", + "tb_concat_0_op": "Concat 0", + "tb_concat_1_op": "Concat 1", + "tb_concat_2_op": "Concat 2", + "tb_accum_red_ld_sum_op": "Accum Red\nLD Sum", + "tb_accum_red_ld_mean_op": "Accum Red\nLD Mean", + "tb_accum_red_ld_rms_op": "Accum Red\nLD RMS", + "tb_accum_redtox_ld_sum_op": "Accum RedtoX\nLD Sum", + "tb_customized_op": "Customized\nOp", + "tb_forloop_accum_first_op": "Forloop Accum\nFirst", + "tb_forloop_accum_no_red_op": "Forloop Accum\nNo Red", # Add by hand +} +guid_tensors_map = {} + +tensor_name_suffix = "'" +phi_symbol = "\u2205" +arrow_symbol = "↔" + +def draw_edge(G, from_node, to_node, graph_type, label=None): + G.edge(from_node, to_node, color=colors_map[graph_type]["edge"], penwidth="6", + label=label, fontname="sans-serif", fontsize=edge_font_size, fontcolor=colors_map[graph_type]["edge_label"]) + +def get_format_str(operator_data): + s = "" + if operator_data['forloop_dim'] >= 0: + s += str(operator_data['forloop_dim']) + else: + s += '\u2205' + return f"fmap: [i↔{s}]" + +def letter_sequence(): + alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + index = 0 + while True: + yield alphabet[index % len(alphabet)] + index += 1 + +def is_graph_data(item): + return isinstance(item, dict) and "op_type" in item + +class node: + def __init__(self, name, op_type, id, label, color): + self.name = name # Unique name + self.op_type = op_type + self.id = id + self.label = label # Shown in the graph + self.input_tensors = [] + self.output_tensors = [] + self.color = color + + def is_customized_node(self): + return "customized" in self.op_type + + def is_input_node(self): + return "input" in self.op_type + + def is_output_node(self): + return "output" in self.op_type + +class kernel_node(node): + def __init__(self, name, op_type, id, label): + super().__init__(name, op_type, id, label, colors_map["kernel"]["node"]) + + def is_kernel_output_node(self): + return not self.output_tensors + + def draw(self, G): + G.node(self.name, label=self.label, color=self.color, style="rounded,filled", shape="box", + penwidth="0", fontsize=node_font_size, fontcolor="white", fontname="sans-serif", margin="0.4,0.4") + +class block_node(node): + def __init__(self, name, op_type, id, label, iomap=None, forloop_dim=None, forloop_range=None): + super().__init__(name, op_type, id, label, colors_map["block"]["node"]) + # Only input and output nodes have iomap + if self.is_input_node() or self.is_output_node(): + self.iomap_str = self.get_iomap_str(iomap) + self.original_tensor = None + # Only input nodes have forloop_dim and forloop_range + if self.is_input_node(): + self.forloop_dim = forloop_dim + self.forloop_range = forloop_range + self.formap_str = self.get_formap_str() + + def get_iomap_str(self, io_map): + map_entries = [] + for key, value in io_map.items(): + if value == -1: + map_entries.append(f"{key}{arrow_symbol}{phi_symbol}") + else: + map_entries.append(f"{key}{arrow_symbol}{value}") + if self.is_input_node(): + map_string = "imap: {" + ", ".join(map_entries) + "}" + else: + map_string = "omap: {" + ", ".join(map_entries) + "}" + return map_string + + def get_formap_str(self): + s = "" + if self.forloop_dim >= 0: + s += str(self.forloop_dim) + else: + s += phi_symbol + return f"fmap: [i{arrow_symbol}{s}]" + + def draw(self, G): + tensor_color = colors_map["kernel"]["io"] + if self.is_input_node(): + output_shape = self.output_tensors[0].shape + shape_before_loop = output_shape.copy() + if self.forloop_dim >= 0: + shape_before_loop[self.forloop_dim] = output_shape[self.forloop_dim] * self.forloop_range + + tensor_node_name = self.original_tensor.name + "'_input" + G.node(tensor_node_name, label=self.original_tensor.name + "'\n" + str(shape_before_loop) + "\n" + self.iomap_str, + color=tensor_color, style="filled", shape="box", penwidth="0", fontsize=tensor_node_font_size, fontcolor="white", fontname="sans-serif") + G.node(self.name, label=self.label + "\n" + self.formap_str, color=self.color, style="rounded,filled", + shape="box", penwidth="0", fontsize=node_font_size, fontcolor="white", fontname="sans-serif", margin="0.4,0.4") + draw_edge(G, tensor_node_name, self.name, "block") + elif self.is_output_node(): + tensor_node_name = self.original_tensor.name + "'_output" + G.node(self.name, label=self.label, color=self.color, style="rounded,filled", + shape="box", penwidth="0", fontsize=node_font_size, fontcolor="white", fontname="sans-serif", margin="0.4,0.4") + G.node(tensor_node_name, label=self.original_tensor.name + "'\n" + str(self.input_tensors[0].shape) + "\n" + self.iomap_str, + color=tensor_color, style="filled", shape="box", penwidth="0", fontsize=tensor_node_font_size, fontcolor="white", fontname="sans-serif") + draw_edge(G, self.name, tensor_node_name, "block") + else: + G.node(self.name, label=self.label, color=self.color, style="rounded,filled", + shape="box", penwidth="0", fontsize=node_font_size, fontcolor="white", fontname="sans-serif", margin="0.4,0.4") + +class tensor: + def _init_(self, guid, color, shape): + self.guid = guid + self.color = color + self.last_node = None + self.next_nodes = [] + self.shape = shape + +class kernel_tensor(tensor): + def __init__(self, guid, shape): + super()._init_(guid, colors_map["kernel"]["io"], shape) + self.name = None + guid_tensors_map[guid] = self + + def draw(self, G): + if self.next_nodes: + for next_node in self.next_nodes: + if self.last_node.is_customized_node() or next_node.is_customized_node(): + G.node(self.name, label=self.name + "\n" + str(self.shape), color=self.color, style="filled", shape="box", + penwidth="0", fontsize=tensor_node_font_size, fontcolor="white", fontname="sans-serif") + if not (self.last_node.is_input_node()): + draw_edge(G, self.last_node.name, self.name, "kernel") + if next_node: + draw_edge(G, self.name, next_node.name, "kernel") + elif self.last_node.is_input_node(): + if not self.name: + self.name = str(self.guid) + G.node(self.name, label=str(self.shape), color=self.color, style="filled", shape="box", + penwidth="0", fontsize=tensor_node_font_size, fontcolor="white", fontname="sans-serif") + draw_edge(G, self.name, next_node.name, "kernel") + elif not next_node: + if not self.name: + self.name = str(self.guid) + G.node(self.name, label=str(self.shape), color=self.color, style="filled", shape="box", + penwidth="0", fontsize=tensor_node_font_size, fontcolor="white", fontname="sans-serif") + draw_edge(G, self.last_node.name, self.name, "kernel") + else: + draw_edge(G, self.last_node.name, next_node.name, "kernel", label=str(self.shape)) + elif self.last_node.is_customized_node(): + if not self.name: + G.node(self.name, label=str(self.shape), color=self.color, style="filled", shape="box", + penwidth="0", fontsize=tensor_node_font_size, fontcolor="white", fontname="sans-serif") + else: + G.node(self.name, label=self.name + "\n" + str(self.shape), color=self.color, style="filled", shape="box", + penwidth="0", fontsize=tensor_node_font_size, fontcolor="white", fontname="sans-serif") + draw_edge(G, self.last_node.name, self.name, "kernel") + else: + G.node(self.last_node.name+"_output_tensor", label=str(self.shape), color=self.color, style="filled", shape="box", + penwidth="0", fontsize=tensor_node_font_size, fontcolor="white", fontname="sans-serif") + draw_edge(G, self.last_node.name, self.last_node.name+"_output_tensor", "kernel") + +class block_tensor(tensor): + def __init__(self, guid, shape): + super()._init_(guid, colors_map["block"]["edge"], shape) + guid_tensors_map[guid] = self + + def draw(self, G): + for next_node in self.next_nodes: + draw_edge(G, self.last_node.name, next_node.name, "block", label=str(self.shape)) + + +class graph: + def __init__(self, label, bg_color, edge_color): + self.label = label + self.bg_color = bg_color + self.nodes = [] + self.tensors = [] + self.edge_color = edge_color + +class kernel_graph(graph): + + def __init__(self, label): + super().__init__(label, colors_map["kernel"]["bg"], colors_map["kernel"]["edge"]) + self.block_graphs = [] + self.letter_sequence = letter_sequence() + + def read_nodes(self, graph_data): + block_graph_datas_to_handle = [] + for node_data in graph_data: + node_id = id(node_data) + node_op_type = node_data["op_type"] + node_name = f"{node_op_type}_{node_id}" + new_node = kernel_node(node_name, node_op_type, node_id, op_nodelabel_mapping[node_op_type]) + for input_tensor in node_data["input_tensors"]: + tensor_guid = input_tensor["guid"] + if tensor_guid in guid_tensors_map: + tensor = guid_tensors_map[tensor_guid] + else: + tensor = kernel_tensor(tensor_guid, input_tensor["dim"][:input_tensor["num_dims"]]) + if new_node.is_customized_node() and not tensor.name: + tensor.name = next(self.letter_sequence) + tensor.next_nodes.append(new_node) + if tensor_guid not in guid_tensors_map: + guid_tensors_map[tensor_guid] = tensor + if tensor not in self.tensors: + self.tensors.append(tensor) + new_node.input_tensors.append(tensor) + for output_tensor in node_data["output_tensors"]: + tensor_guid = output_tensor["guid"] + if tensor_guid in guid_tensors_map: + tensor = guid_tensors_map[tensor_guid] + else: + tensor = kernel_tensor(tensor_guid, output_tensor["dim"][:output_tensor["num_dims"]]) + if new_node.is_customized_node() and not tensor.name: + tensor.name = next(self.letter_sequence) + tensor.last_node = new_node + if tensor_guid not in guid_tensors_map: + guid_tensors_map[tensor_guid] = tensor + if tensor not in self.tensors: + self.tensors.append(tensor) + new_node.output_tensors.append(tensor) + + self.nodes.append(new_node) + + if "bgraph" in node_data: + grid_dim = node_data["bgraph"]["grid_dim"] + forloop_range = node_data["bgraph"]["forloop_range"] + new_block_graph = block_graph("Block graph "+str(len(block_graph_datas_to_handle)+1), + grid_dim, forloop_range, self) + new_node.related_node = new_block_graph + new_node.label += " "+str(len(block_graph_datas_to_handle)+1) + block_graph_datas_to_handle.append((new_block_graph, node_data["bgraph"]["operators"])) + + for new_block_graph, block_graph_data in block_graph_datas_to_handle: + new_block_graph.read_nodes(block_graph_data) + self.block_graphs.append(new_block_graph) + + def draw_graph(self, G): + # Draw block graphs from back to front + for i in range(len(self.block_graphs) - 1, -1, -1): + self.block_graphs[i].draw_graph(G) + + with G.subgraph(name="cluster" + self.label) as sub: + sub.attr(rankdir='LR', splines='ortho', bgcolor=self.bg_color, fontname="sans-serif", + label=self.label, labelloc='t', labeljust='l', labeldistance="1.5", fontsize=graph_label_font_size, fontcolor="black", + style="filled", penwidth="0") + for node in self.nodes: + if node.is_input_node(): + continue + else: + node.draw(sub) + for tensor in self.tensors: + tensor.draw(sub) + +class block_graph(graph): + + def __init__(self, label, grid_dim, forloop_range, kernel_graph): + super().__init__(label, colors_map["block"]["bg"], colors_map["block"]["edge"]) + self.related_node = None + self.grid_dim = grid_dim + self.forloop_range = forloop_range + self.kernel_graph = kernel_graph + + def get_grid_size_and_forloop(self): + grid_size_str = "" + forloop_str = "" + grid_size_str = f"grid size: [{', '.join([f'{k}={v}' for k, v in self.grid_dim.items()])}]" + + forloop_str = f"forloop: [i={self.forloop_range}]" + + return "; " + grid_size_str + "; " + forloop_str + + def read_nodes(self, graph_data): + nodes = [] + for node_data in graph_data: + node_id = id(node_data) + node_op_type = node_data["op_type"] + node_name = f"{node_op_type}_{node_id}" + io_map = None + if "input_map" in node_data: + io_map = node_data["input_map"] + elif "output_map" in node_data: + io_map = node_data["output_map"] + forloop_dim = None if "forloop_dim" not in node_data else node_data["forloop_dim"] + new_node = block_node(node_name, node_op_type, node_id, op_nodelabel_mapping[node_op_type], + io_map, forloop_dim, self.forloop_range) + for output_tensor in node_data["output_tensors"]: + tensor_guid = output_tensor["guid"] + if tensor_guid in guid_tensors_map: + tensor = guid_tensors_map[tensor_guid] + else: + tensor = block_tensor(tensor_guid, output_tensor["dim"][:output_tensor["num_dims"]]) + tensor.last_node = new_node + if tensor_guid not in guid_tensors_map: + guid_tensors_map[tensor_guid] = tensor + if tensor not in self.tensors: + self.tensors.append(tensor) + new_node.output_tensors.append(tensor) + for input_tensor in node_data["input_tensors"]: + tensor_guid = input_tensor["guid"] + if tensor_guid in guid_tensors_map: + tensor = guid_tensors_map[tensor_guid] + else: + tensor = block_tensor(tensor_guid, input_tensor["dim"][:input_tensor["num_dims"]]) + tensor.next_nodes.append(new_node) + if tensor_guid not in guid_tensors_map: + guid_tensors_map[tensor_guid] = tensor + if tensor not in self.tensors: + self.tensors.append(tensor) + new_node.input_tensors.append(tensor) + if "dtensor" in node_data: + new_node.original_tensor = guid_tensors_map[node_data["dtensor"]["guid"]] + + self.nodes.append(new_node) + return nodes + + def draw_graph(self, G): + with G.subgraph(name="cluster" + self.label) as sub: + sub.attr(rankdir='LR', splines='ortho', bgcolor=self.bg_color, fontname="sans-serif", + label=self.label + self.get_grid_size_and_forloop(), labelloc='t', labeljust='l', + labeldistance="1.5", fontsize=graph_label_font_size, fontcolor="black", style="filled", penwidth="0") + for node in self.nodes: + node.draw(sub) + for tensor in self.tensors: + tensor.draw(sub) + +class visualizer: + def __init__(self, output_filename): + self.graphs = [] + self.output_filename = output_filename + self.letter_sequence = letter_sequence() + self.G = gv.Digraph(format='png', name="Kernel Graph") + self.G.attr(rankdir='LR', splines='ortho', bgcolor="#ffffff", fontname="sans-serif", + nodesep="1.6", ranksep="0.3", fontsize="16", fontcolor="black", compound="true") + self.new_kernel_graph = kernel_graph("Kernel Graph") + + def draw_graphs(self, operators, dot=True, png=True): + self.new_kernel_graph.read_nodes(operators) + self.new_kernel_graph.draw_graph(self.G) + if dot: + self.G.save(self.output_filename + ".dot") + print(f"Graph saved as {self.output_filename}.dot") + if png: + self.G.render(self.output_filename, cleanup=True) + print(f"Graph saved as {self.output_filename}.png") + + + +def handle_graph_data(graph_data, graph_title, output_filename, dot=True, png=True): + G = gv.Digraph(format='png', name=graph_title) + G.attr(rankdir='LR', splines='ortho', bgcolor="#ffffff", fontname="sans-serif", + nodesep="1.6", ranksep="0.3", fontsize="16", fontcolor="black", compound="true") + + new_kernel_graph = kernel_graph("Kernel Graph") + new_kernel_graph.read_nodes(graph_data) + new_kernel_graph.draw_graph(G) + + if dot: + G.save(output_filename + ".dot") + print(f"Graph saved as {output_filename}.dot") + if png: + G.render(output_filename, cleanup=True) + print(f"Graph saved as {output_filename}.png") + + +if __name__ == "__main__": + # file_name = "multi_graph.json" + file_name = "mirage_search_checkpoint.json" + with open(file_name) as f: + data = json.load(f) + + if isinstance(data, list): + if all(isinstance(item, list) for item in data): + for idx, graph_list in enumerate(data): + handle_graph_data(graph_list, graph_title=f"Combined graph {idx+1}", output_filename=f"reframe_outcome/reframe_combined_graph_{idx+1}") + elif all(is_graph_data(item) for item in data): + handle_graph_data(data, graph_title="Combined graph", output_filename="reframe_outcome/reframe_combined_graph") + else: + print("Invalid data format.") + else: + print("Invalid data format.") \ No newline at end of file diff --git a/src/kernel/customized.cc b/src/kernel/customized.cc index 205c45b..0c462b8 100644 --- a/src/kernel/customized.cc +++ b/src/kernel/customized.cc @@ -230,6 +230,10 @@ KNCustomizedOp::KNCustomizedOp(mirage::kernel::Graph *_kgraph, } } +void KNCustomizedOp::get_bgraph(mirage::threadblock::Graph** bgraph_) { + *bgraph_ = &(this->bgraph); +} + KNCustomizedOp::~KNCustomizedOp() { // while (!bgraph.operators.empty()) { // delete bgraph.operators.back(); diff --git a/src/kernel/operator.cc b/src/kernel/operator.cc index 90c58e3..32a23fd 100644 --- a/src/kernel/operator.cc +++ b/src/kernel/operator.cc @@ -50,5 +50,18 @@ KNOperator::KNOperator(Graph *_graph, KNOperator::~KNOperator() {} +int KNOperator::get_input_dtensors(DTensor **inputs) { + for (size_t i = 0; i < input_tensors.size(); ++i) { + inputs[i] = &input_tensors[i]; + } + return input_tensors.size(); +} + +int KNOperator::get_output_dtensors(DTensor **outputs) { + for (size_t i = 0; i < output_tensors.size(); ++i) { + outputs[i] = &output_tensors[i]; + } + return output_tensors.size(); +} } // namespace kernel } // namespace mirage diff --git a/src/threadblock/input_loader.cc b/src/threadblock/input_loader.cc index c1d81b8..29bff4d 100644 --- a/src/threadblock/input_loader.cc +++ b/src/threadblock/input_loader.cc @@ -119,5 +119,7 @@ TBInputOp::operator json() const { {"forloop_dim", forloop_dim}}; } +size_t TBInputOp::get_dtensor_guid() { return dtensor.guid; } + } // namespace threadblock } // namespace mirage diff --git a/src/threadblock/operator.cc b/src/threadblock/operator.cc index 1078329..60b88a3 100644 --- a/src/threadblock/operator.cc +++ b/src/threadblock/operator.cc @@ -38,6 +38,20 @@ TBOperator::TBOperator(Graph *_graph, input_tensors.push_back(input2); } +int TBOperator::get_input_stensors(STensor** inputs) { + for (size_t i = 0; i < input_tensors.size(); ++i) { + inputs[i] = &input_tensors[i]; + } + return input_tensors.size(); +} + +int TBOperator::get_output_stensors(STensor** outputs) { + for (size_t i = 0; i < output_tensors.size(); ++i) { + outputs[i] = &output_tensors[i]; + } + return output_tensors.size(); +} + TBOperator::~TBOperator() {} } // namespace threadblock diff --git a/src/threadblock/output.cc b/src/threadblock/output.cc index 6d7d990..a6dc0b1 100644 --- a/src/threadblock/output.cc +++ b/src/threadblock/output.cc @@ -113,5 +113,8 @@ TBOutputOp::operator json() const { {"dtensor", dtensor}, {"output_map", output_map}}; } + +size_t TBOutputOp::get_dtensor_guid() { return dtensor.guid; } + } // namespace threadblock } // namespace mirage