-
Notifications
You must be signed in to change notification settings - Fork 13
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
c41ce40
commit 49c17f1
Showing
3 changed files
with
114 additions
and
115 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,30 +1,30 @@ | ||
module @SimpleModel attributes {} { | ||
func.func @forward(%arg0: tensor<1x784xf32> {ttir.name = "input_1"}, %arg1: tensor<10x784xf32> {ttir.name = "linear.weight"}, %arg2: tensor<10xf32> {ttir.name = "linear.bias"}) -> (tensor<1x10xf32> {ttir.name = "SimpleModel_472.output_softmax_1495"}) { | ||
%0 = tensor.empty() : tensor<784x10xf32> | ||
%1 = "ttir.transpose"(%arg1, %0) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<10x784xf32>, tensor<784x10xf32>) -> tensor<784x10xf32> | ||
%1 = "ttir.transpose"(%arg1, %0) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<10x784xf32>, tensor<784x10xf32>) -> tensor<784x10xf32> | ||
%2 = tensor.empty() : tensor<1x10xf32> | ||
%3 = "ttir.matmul"(%arg0, %1, %2) <{operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x784xf32>, tensor<784x10xf32>, tensor<1x10xf32>) -> tensor<1x10xf32> | ||
%3 = "ttir.matmul"(%arg0, %1, %2) : (tensor<1x784xf32>, tensor<784x10xf32>, tensor<1x10xf32>) -> tensor<1x10xf32> | ||
%4 = tensor.empty() : tensor<1x10xf32> | ||
%5 = "ttir.add"(%3, %arg2, %4) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x10xf32>, tensor<10xf32>, tensor<1x10xf32>) -> tensor<1x10xf32> | ||
%5 = "ttir.add"(%3, %arg2, %4) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x10xf32>, tensor<10xf32>, tensor<1x10xf32>) -> tensor<1x10xf32> | ||
%6 = tensor.empty() : tensor<1x10xf32> | ||
%7 = "ttir.softmax"(%5, %6) <{dimension = -1 : si32, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x10xf32>, tensor<1x10xf32>) -> tensor<1x10xf32> | ||
%7 = "ttir.softmax"(%5, %6) <{dimension = -1 : si32}> : (tensor<1x10xf32>, tensor<1x10xf32>) -> tensor<1x10xf32> | ||
return %7 : tensor<1x10xf32> | ||
} | ||
func.func @backward(%arg0: tensor<1x10xf32> {ttir.name = "loss_SimpleModel_472.output_softmax_1495"}, %arg1: tensor<1x10xf32> {ttir.name = "SimpleModel_472.output_softmax_1495"}, %arg2: tensor<1x784xf32> {ttir.name = "input_1"}) -> (tensor<1x10xf32> {ttir.name = "grad_acc_linear.bias_grad_accumulator"}, tensor<10x784xf32> {ttir.name = "grad_acc_linear.weight_grad_accumulator"}) { | ||
%0 = tensor.empty() : tensor<1x10xf32> | ||
%1 = "ttir.multiply"(%arg0, %arg1, %0) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x10xf32>, tensor<1x10xf32>, tensor<1x10xf32>) -> tensor<1x10xf32> | ||
%1 = "ttir.multiply"(%arg0, %arg1, %0) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x10xf32>, tensor<1x10xf32>, tensor<1x10xf32>) -> tensor<1x10xf32> | ||
%2 = tensor.empty() : tensor<1x1xf32> | ||
%3 = "ttir.sum"(%1, %2) <{keep_dim = true, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x10xf32>, tensor<1x1xf32>) -> tensor<1x1xf32> | ||
%3 = "ttir.sum"(%1, %2) <{keep_dim = true}> : (tensor<1x10xf32>, tensor<1x1xf32>) -> tensor<1x1xf32> | ||
%4 = tensor.empty() : tensor<1x10xf32> | ||
%5 = "ttir.subtract"(%arg0, %3, %4) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x10xf32>, tensor<1x1xf32>, tensor<1x10xf32>) -> tensor<1x10xf32> | ||
%5 = "ttir.subtract"(%arg0, %3, %4) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x10xf32>, tensor<1x1xf32>, tensor<1x10xf32>) -> tensor<1x10xf32> | ||
%6 = tensor.empty() : tensor<1x10xf32> | ||
%7 = "ttir.multiply"(%5, %arg1, %6) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x10xf32>, tensor<1x10xf32>, tensor<1x10xf32>) -> tensor<1x10xf32> | ||
%7 = "ttir.multiply"(%5, %arg1, %6) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x10xf32>, tensor<1x10xf32>, tensor<1x10xf32>) -> tensor<1x10xf32> | ||
%8 = tensor.empty() : tensor<784x1xf32> | ||
%9 = "ttir.transpose"(%arg2, %8) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x784xf32>, tensor<784x1xf32>) -> tensor<784x1xf32> | ||
%9 = "ttir.transpose"(%arg2, %8) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x784xf32>, tensor<784x1xf32>) -> tensor<784x1xf32> | ||
%10 = tensor.empty() : tensor<784x10xf32> | ||
%11 = "ttir.matmul"(%9, %7, %10) <{operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<784x1xf32>, tensor<1x10xf32>, tensor<784x10xf32>) -> tensor<784x10xf32> | ||
%11 = "ttir.matmul"(%9, %7, %10) : (tensor<784x1xf32>, tensor<1x10xf32>, tensor<784x10xf32>) -> tensor<784x10xf32> | ||
%12 = tensor.empty() : tensor<10x784xf32> | ||
%13 = "ttir.transpose"(%11, %12) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<784x10xf32>, tensor<10x784xf32>) -> tensor<10x784xf32> | ||
%13 = "ttir.transpose"(%11, %12) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<784x10xf32>, tensor<10x784xf32>) -> tensor<10x784xf32> | ||
return %7, %13 : tensor<1x10xf32>, tensor<10x784xf32> | ||
} | ||
} |
Oops, something went wrong.