diff --git a/python/tvm/contrib/cutlass/gen_tensor_op.py b/python/tvm/contrib/cutlass/gen_tensor_op.py index c8221514ce0a..73f3287c8fb1 100644 --- a/python/tvm/contrib/cutlass/gen_tensor_op.py +++ b/python/tvm/contrib/cutlass/gen_tensor_op.py @@ -136,6 +136,7 @@ def get_tile_descriptions(math_inst): TileDescription([128, 256, 32], 3, [2, 4, 1], math_inst, min_cc, max_cc), TileDescription([256, 64, 32], 4, [4, 1, 1], math_inst, min_cc, max_cc), TileDescription([64, 256, 32], 4, [1, 4, 1], math_inst, min_cc, max_cc), + TileDescription([128, 128, 32], 2, [2, 2, 1], math_inst, min_cc, max_cc), TileDescription([128, 128, 32], 3, [2, 2, 1], math_inst, min_cc, max_cc), TileDescription([128, 128, 32], 4, [2, 2, 1], math_inst, min_cc, max_cc), TileDescription([128, 128, 32], 5, [2, 2, 1], math_inst, min_cc, max_cc), @@ -152,9 +153,11 @@ def get_tile_descriptions(math_inst): TileDescription([64, 64, 64], 5, [2, 2, 1], math_inst, min_cc, max_cc), ] - return generate_tensor_op_common( + sm75_kernels = generate_sm75_tensor_op_1688(out_dtype, op_creator) + sm80_kernels = generate_tensor_op_common( math_instructions, alignment_constraints, get_tile_descriptions, op_creator ) + return sm75_kernels + sm80_kernels class ProfilerEngine: