Skip to content

Commit

Permalink
[MetaSchedule] add fp16-16-32 TensorCores rule to default settings (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
Hzfengsy authored Jan 23, 2023
1 parent b77d24c commit 35a66f6
Showing 1 changed file with 17 additions and 0 deletions.
17 changes: 17 additions & 0 deletions src/meta_schedule/schedule_rule/schedule_rule.cc
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,22 @@ Array<ScheduleRule> ScheduleRule::DefaultCUDA() {

Array<ScheduleRule> ScheduleRule::DefaultCUDATensorCore() {
Array<Map<String, String>> intrin_groups = {
// Tensor Cores f32 += f16 * f16
{
{"init", "wmma_fill_16x16x16_f32"},
{"load_a", "wmma_load_16x16x16_f16_a"},
{"load_b", "wmma_load_16x16x16_f16_b"},
{"compute", "wmma_sync_16x16x16_f16f16f32"},
{"store", "wmma_store_16x16x16_f32_shared"},
},
{
{"init", "wmma_fill_16x16x16_f32"},
{"load_a", "wmma_load_16x16x16_f16_a"},
{"load_b", "wmma_load_16x16x16_f16_b_trans"},
{"compute", "wmma_sync_16x16x16_f16f16f32_trans"},
{"store", "wmma_store_16x16x16_f32_shared"},
},
// Tensor Cores f16 += f16 * f16
{
{"init", "wmma_fill_16x16x16_f16"},
{"load_a", "wmma_load_16x16x16_f16_a"},
Expand All @@ -186,6 +202,7 @@ Array<ScheduleRule> ScheduleRule::DefaultCUDATensorCore() {
{"compute", "wmma_sync_16x16x16_f16f16f16_trans"},
{"store", "wmma_store_16x16x16_f16_shared"},
},
// Tensor Cores s32 += s8 * s8
{
{"init", "wmma_fill_16x16x16_s32"},
{"load_a", "wmma_load_16x16x16_s8_a"},
Expand Down

0 comments on commit 35a66f6

Please sign in to comment.