diff --git a/tools/explorer/test/models/forward_and_backward.mlir b/tools/explorer/test/models/forward_and_backward.mlir
index 3f0b8f781d..e205bcf2b8 100644
--- a/tools/explorer/test/models/forward_and_backward.mlir
+++ b/tools/explorer/test/models/forward_and_backward.mlir
@@ -1,30 +1,30 @@
 module @SimpleModel attributes {} {
   func.func @forward(%arg0: tensor<1x784xf32> {ttir.name = "input_1"}, %arg1: tensor<10x784xf32> {ttir.name = "linear.weight"}, %arg2: tensor<10xf32> {ttir.name = "linear.bias"}) -> (tensor<1x10xf32> {ttir.name = "SimpleModel_472.output_softmax_1495"}) {
     %0 = tensor.empty() : tensor<784x10xf32>
-    %1 = "ttir.transpose"(%arg1, %0) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<10x784xf32>, tensor<784x10xf32>) -> tensor<784x10xf32>
+    %1 = "ttir.transpose"(%arg1, %0) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<10x784xf32>, tensor<784x10xf32>) -> tensor<784x10xf32>
     %2 = tensor.empty() : tensor<1x10xf32>
-    %3 = "ttir.matmul"(%arg0, %1, %2) <{operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x784xf32>, tensor<784x10xf32>, tensor<1x10xf32>) -> tensor<1x10xf32>
+    %3 = "ttir.matmul"(%arg0, %1, %2) : (tensor<1x784xf32>, tensor<784x10xf32>, tensor<1x10xf32>) -> tensor<1x10xf32>
     %4 = tensor.empty() : tensor<1x10xf32>
-    %5 = "ttir.add"(%3, %arg2, %4) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x10xf32>, tensor<10xf32>, tensor<1x10xf32>) -> tensor<1x10xf32>
+    %5 = "ttir.add"(%3, %arg2, %4) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x10xf32>, tensor<10xf32>, tensor<1x10xf32>) -> tensor<1x10xf32>
     %6 = tensor.empty() : tensor<1x10xf32>
-    %7 = "ttir.softmax"(%5, %6) <{dimension = -1 : si32, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x10xf32>, tensor<1x10xf32>) -> tensor<1x10xf32>
+    %7 = "ttir.softmax"(%5, %6) <{dimension = -1 : si32}> : (tensor<1x10xf32>, tensor<1x10xf32>) -> tensor<1x10xf32>
     return %7 : tensor<1x10xf32>
   }
   func.func @backward(%arg0: tensor<1x10xf32> {ttir.name = "loss_SimpleModel_472.output_softmax_1495"}, %arg1: tensor<1x10xf32> {ttir.name = "SimpleModel_472.output_softmax_1495"}, %arg2: tensor<1x784xf32> {ttir.name = "input_1"}) -> (tensor<1x10xf32> {ttir.name = "grad_acc_linear.bias_grad_accumulator"}, tensor<10x784xf32> {ttir.name = "grad_acc_linear.weight_grad_accumulator"}) {
     %0 = tensor.empty() : tensor<1x10xf32>
-    %1 = "ttir.multiply"(%arg0, %arg1, %0) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x10xf32>, tensor<1x10xf32>, tensor<1x10xf32>) -> tensor<1x10xf32>
+    %1 = "ttir.multiply"(%arg0, %arg1, %0) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x10xf32>, tensor<1x10xf32>, tensor<1x10xf32>) -> tensor<1x10xf32>
     %2 = tensor.empty() : tensor<1x1xf32>
-    %3 = "ttir.sum"(%1, %2) <{keep_dim = true, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x10xf32>, tensor<1x1xf32>) -> tensor<1x1xf32>
+    %3 = "ttir.sum"(%1, %2) <{keep_dim = true}> : (tensor<1x10xf32>, tensor<1x1xf32>) -> tensor<1x1xf32>
     %4 = tensor.empty() : tensor<1x10xf32>
-    %5 = "ttir.subtract"(%arg0, %3, %4) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x10xf32>, tensor<1x1xf32>, tensor<1x10xf32>) -> tensor<1x10xf32>
+    %5 = "ttir.subtract"(%arg0, %3, %4) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x10xf32>, tensor<1x1xf32>, tensor<1x10xf32>) -> tensor<1x10xf32>
     %6 = tensor.empty() : tensor<1x10xf32>
-    %7 = "ttir.multiply"(%5, %arg1, %6) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x10xf32>, tensor<1x10xf32>, tensor<1x10xf32>) -> tensor<1x10xf32>
+    %7 = "ttir.multiply"(%5, %arg1, %6) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x10xf32>, tensor<1x10xf32>, tensor<1x10xf32>) -> tensor<1x10xf32>
     %8 = tensor.empty() : tensor<784x1xf32>
-    %9 = "ttir.transpose"(%arg2, %8) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x784xf32>, tensor<784x1xf32>) -> tensor<784x1xf32>
+    %9 = "ttir.transpose"(%arg2, %8) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x784xf32>, tensor<784x1xf32>) -> tensor<784x1xf32>
     %10 = tensor.empty() : tensor<784x10xf32>
-    %11 = "ttir.matmul"(%9, %7, %10) <{operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<784x1xf32>, tensor<1x10xf32>, tensor<784x10xf32>) -> tensor<784x10xf32>
+    %11 = "ttir.matmul"(%9, %7, %10) : (tensor<784x1xf32>, tensor<1x10xf32>, tensor<784x10xf32>) -> tensor<784x10xf32>
     %12 = tensor.empty() : tensor<10x784xf32>
-    %13 = "ttir.transpose"(%11, %12) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<784x10xf32>, tensor<10x784xf32>) -> tensor<10x784xf32>
+    %13 = "ttir.transpose"(%11, %12) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<784x10xf32>, tensor<10x784xf32>) -> tensor<10x784xf32>
     return %7, %13 : tensor<1x10xf32>, tensor<10x784xf32>
   }
 }
diff --git a/tools/explorer/test/models/linear_autoencoder.mlir b/tools/explorer/test/models/linear_autoencoder.mlir
index 8d7defc535..d8af25bbff 100644
--- a/tools/explorer/test/models/linear_autoencoder.mlir
+++ b/tools/explorer/test/models/linear_autoencoder.mlir
@@ -1,49 +1,49 @@
 module @LinearAE attributes {} {
   func.func @forward(%arg0: tensor<1x784xf32> {ttir.name = "input_1"}, %arg1: tensor<784x128xf32> {ttir.name = "encoder_lin1.weight"}, %arg2: tensor<128xf32> {ttir.name = "encoder_lin1.bias"}, %arg3: tensor<128x64xf32> {ttir.name = "encoder_lin2.weight"}, %arg4: tensor<64xf32> {ttir.name = "encoder_lin2.bias"}, %arg5: tensor<64x12xf32> {ttir.name = "encoder_lin3.weight"}, %arg6: tensor<12xf32> {ttir.name = "encoder_lin3.bias"}, %arg7: tensor<12x3xf32> {ttir.name = "encoder_lin4.weight"}, %arg8: tensor<3xf32> {ttir.name = "encoder_lin4.bias"}, %arg9: tensor<3x12xf32> {ttir.name = "decoder_lin1.weight"}, %arg10: tensor<12xf32> {ttir.name = "decoder_lin1.bias"}, %arg11: tensor<12x64xf32> {ttir.name = "decoder_lin2.weight"}, %arg12: tensor<64xf32> {ttir.name = "decoder_lin2.bias"}, %arg13: tensor<64x128xf32> {ttir.name = "decoder_lin3.weight"}, %arg14: tensor<128xf32> {ttir.name = "decoder_lin3.bias"}, %arg15: tensor<128x784xf32> {ttir.name = "decoder_lin4.weight"}, %arg16: tensor<784xf32> {ttir.name = "decoder_lin4.bias"}) -> (tensor<1x784xf32> {ttir.name = "LinearAE.output_add_29"}) {
     %0 = tensor.empty() : tensor<1x128xf32>
-    %1 = "ttir.matmul"(%arg0, %arg1, %0) <{operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x784xf32>, tensor<784x128xf32>, tensor<1x128xf32>) -> tensor<1x128xf32>
+    %1 = "ttir.matmul"(%arg0, %arg1, %0) : (tensor<1x784xf32>, tensor<784x128xf32>, tensor<1x128xf32>) -> tensor<1x128xf32>
     %2 = tensor.empty() : tensor<1x128xf32>
-    %3 = "ttir.add"(%1, %arg2, %2) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x128xf32>, tensor<128xf32>, tensor<1x128xf32>) -> tensor<1x128xf32>
+    %3 = "ttir.add"(%1, %arg2, %2) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x128xf32>, tensor<128xf32>, tensor<1x128xf32>) -> tensor<1x128xf32>
     %4 = tensor.empty() : tensor<1x128xf32>
-    %5 = "ttir.relu"(%3, %4) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x128xf32>, tensor<1x128xf32>) -> tensor<1x128xf32>
+    %5 = "ttir.relu"(%3, %4) <{operandSegmentSizes = array<i32: 1, 1>}> : (tensor<1x128xf32>, tensor<1x128xf32>) -> tensor<1x128xf32>
     %6 = tensor.empty() : tensor<1x64xf32>
-    %7 = "ttir.matmul"(%5, %arg3, %6) <{operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x128xf32>, tensor<128x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %7 = "ttir.matmul"(%5, %arg3, %6) : (tensor<1x128xf32>, tensor<128x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
     %8 = tensor.empty() : tensor<1x64xf32>
-    %9 = "ttir.add"(%7, %arg4, %8) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %9 = "ttir.add"(%7, %arg4, %8) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x64xf32>, tensor<64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
     %10 = tensor.empty() : tensor<1x64xf32>
-    %11 = "ttir.relu"(%9, %10) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %11 = "ttir.relu"(%9, %10) <{operandSegmentSizes = array<i32: 1, 1>}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
     %12 = tensor.empty() : tensor<1x12xf32>
-    %13 = "ttir.matmul"(%11, %arg5, %12) <{operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<64x12xf32>, tensor<1x12xf32>) -> tensor<1x12xf32>
+    %13 = "ttir.matmul"(%11, %arg5, %12) : (tensor<1x64xf32>, tensor<64x12xf32>, tensor<1x12xf32>) -> tensor<1x12xf32>
     %14 = tensor.empty() : tensor<1x12xf32>
-    %15 = "ttir.add"(%13, %arg6, %14) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x12xf32>, tensor<12xf32>, tensor<1x12xf32>) -> tensor<1x12xf32>
+    %15 = "ttir.add"(%13, %arg6, %14) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x12xf32>, tensor<12xf32>, tensor<1x12xf32>) -> tensor<1x12xf32>
     %16 = tensor.empty() : tensor<1x12xf32>
-    %17 = "ttir.relu"(%15, %16) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x12xf32>, tensor<1x12xf32>) -> tensor<1x12xf32>
+    %17 = "ttir.relu"(%15, %16) <{operandSegmentSizes = array<i32: 1, 1>}> : (tensor<1x12xf32>, tensor<1x12xf32>) -> tensor<1x12xf32>
     %18 = tensor.empty() : tensor<1x3xf32>
-    %19 = "ttir.matmul"(%17, %arg7, %18) <{operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x12xf32>, tensor<12x3xf32>, tensor<1x3xf32>) -> tensor<1x3xf32>
+    %19 = "ttir.matmul"(%17, %arg7, %18) : (tensor<1x12xf32>, tensor<12x3xf32>, tensor<1x3xf32>) -> tensor<1x3xf32>
     %20 = tensor.empty() : tensor<1x3xf32>
-    %21 = "ttir.add"(%19, %arg8, %20) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x3xf32>, tensor<3xf32>, tensor<1x3xf32>) -> tensor<1x3xf32>
+    %21 = "ttir.add"(%19, %arg8, %20) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x3xf32>, tensor<3xf32>, tensor<1x3xf32>) -> tensor<1x3xf32>
     %22 = tensor.empty() : tensor<1x12xf32>
-    %23 = "ttir.matmul"(%21, %arg9, %22) <{operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x3xf32>, tensor<3x12xf32>, tensor<1x12xf32>) -> tensor<1x12xf32>
+    %23 = "ttir.matmul"(%21, %arg9, %22) : (tensor<1x3xf32>, tensor<3x12xf32>, tensor<1x12xf32>) -> tensor<1x12xf32>
     %24 = tensor.empty() : tensor<1x12xf32>
-    %25 = "ttir.add"(%23, %arg10, %24) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x12xf32>, tensor<12xf32>, tensor<1x12xf32>) -> tensor<1x12xf32>
+    %25 = "ttir.add"(%23, %arg10, %24) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x12xf32>, tensor<12xf32>, tensor<1x12xf32>) -> tensor<1x12xf32>
     %26 = tensor.empty() : tensor<1x12xf32>
-    %27 = "ttir.relu"(%25, %26) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x12xf32>, tensor<1x12xf32>) -> tensor<1x12xf32>
+    %27 = "ttir.relu"(%25, %26) <{operandSegmentSizes = array<i32: 1, 1>}> : (tensor<1x12xf32>, tensor<1x12xf32>) -> tensor<1x12xf32>
     %28 = tensor.empty() : tensor<1x64xf32>
-    %29 = "ttir.matmul"(%27, %arg11, %28) <{operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x12xf32>, tensor<12x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %29 = "ttir.matmul"(%27, %arg11, %28) : (tensor<1x12xf32>, tensor<12x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
     %30 = tensor.empty() : tensor<1x64xf32>
-    %31 = "ttir.add"(%29, %arg12, %30) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %31 = "ttir.add"(%29, %arg12, %30) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x64xf32>, tensor<64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
     %32 = tensor.empty() : tensor<1x64xf32>
-    %33 = "ttir.relu"(%31, %32) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
+    %33 = "ttir.relu"(%31, %32) <{operandSegmentSizes = array<i32: 1, 1>}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32>
     %34 = tensor.empty() : tensor<1x128xf32>
-    %35 = "ttir.matmul"(%33, %arg13, %34) <{operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x64xf32>, tensor<64x128xf32>, tensor<1x128xf32>) -> tensor<1x128xf32>
+    %35 = "ttir.matmul"(%33, %arg13, %34) : (tensor<1x64xf32>, tensor<64x128xf32>, tensor<1x128xf32>) -> tensor<1x128xf32>
     %36 = tensor.empty() : tensor<1x128xf32>
-    %37 = "ttir.add"(%35, %arg14, %36) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x128xf32>, tensor<128xf32>, tensor<1x128xf32>) -> tensor<1x128xf32>
+    %37 = "ttir.add"(%35, %arg14, %36) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x128xf32>, tensor<128xf32>, tensor<1x128xf32>) -> tensor<1x128xf32>
     %38 = tensor.empty() : tensor<1x128xf32>
-    %39 = "ttir.relu"(%37, %38) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x128xf32>, tensor<1x128xf32>) -> tensor<1x128xf32>
+    %39 = "ttir.relu"(%37, %38) <{operandSegmentSizes = array<i32: 1, 1>}> : (tensor<1x128xf32>, tensor<1x128xf32>) -> tensor<1x128xf32>
     %40 = tensor.empty() : tensor<1x784xf32>
-    %41 = "ttir.matmul"(%39, %arg15, %40) <{operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x128xf32>, tensor<128x784xf32>, tensor<1x784xf32>) -> tensor<1x784xf32>
+    %41 = "ttir.matmul"(%39, %arg15, %40) : (tensor<1x128xf32>, tensor<128x784xf32>, tensor<1x784xf32>) -> tensor<1x784xf32>
     %42 = tensor.empty() : tensor<1x784xf32>
-    %43 = "ttir.add"(%41, %arg16, %42) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>, #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>]}> : (tensor<1x784xf32>, tensor<784xf32>, tensor<1x784xf32>) -> tensor<1x784xf32>
+    %43 = "ttir.add"(%41, %arg16, %42) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x784xf32>, tensor<784xf32>, tensor<1x784xf32>) -> tensor<1x784xf32>
     return %43 : tensor<1x784xf32>
   }
 }
diff --git a/tools/explorer/test/models/open_llama_3b_single_layer.mlir b/tools/explorer/test/models/open_llama_3b_single_layer.mlir
index 5e17dc39e9..97731870ba 100644
--- a/tools/explorer/test/models/open_llama_3b_single_layer.mlir
+++ b/tools/explorer/test/models/open_llama_3b_single_layer.mlir
@@ -1,170 +1,169 @@
-#any_device = #tt.operand_constraint<dram|l1|scalar|tile|none|interleaved|single_bank|height_sharded|width_sharded|block_sharded|any_layout|any_device|any_device_tile|l1_block_sharded>
 #loc = loc("LlamaForCausalLM":0:0)
 #system_desc = #tt.system_desc<[{role = host, target_triple = "x86_64-pc-linux-gnu"}], [{arch = <wormhole_b0>, grid = 8x8, l1_size = 1499136, num_dram_channels = 12, dram_channel_size = 1073741824, noc_l1_address_align_bytes = 16, pcie_address_align_bytes = 32, noc_dram_address_align_bytes = 32, l1_unreserved_base = 1024, erisc_l1_unreserved_base = 1024, dram_unreserved_base = 1024, dram_unreserved_end = 1073741824, physical_cores = {worker = [ 0x0,  0x1,  0x2,  0x3,  0x4,  0x5,  0x6,  0x7,  1x0,  1x1,  1x2,  1x3,  1x4,  1x5,  1x6,  1x7,  2x0,  2x1,  2x2,  2x3,  2x4,  2x5,  2x6,  2x7,  3x0,  3x1,  3x2,  3x3,  3x4,  3x5,  3x6,  3x7,  4x0,  4x1,  4x2,  4x3,  4x4,  4x5,  4x6,  4x7,  5x0,  5x1,  5x2,  5x3,  5x4,  5x5,  5x6,  5x7,  6x0,  6x1,  6x2,  6x3,  6x4,  6x5,  6x6,  6x7,  7x0,  7x1,  7x2,  7x3,  7x4,  7x5,  7x6,  7x7] dram = [ 8x0,  9x0,  10x0,  8x1,  9x1,  10x1,  8x2,  9x2,  10x2,  8x3,  9x3,  10x3]}, supported_data_types = [<f32>, <f16>, <bf16>, <bfp_f8>, <bfp_bf8>, <bfp_f4>, <bfp_bf4>, <bfp_f2>, <bfp_bf2>, <u32>, <u16>, <u8>], supported_tile_sizes = [ 4x16,  16x16,  32x16,  4x32,  16x32,  32x32], num_cbs = 32}], [0], [3 : i32], [ 0x0x0x0]>
 module @LlamaForCausalLM attributes {tt.system_desc = #system_desc} {
   func.func @forward(%arg0: tensor<1x12xi32> {ttir.name = "input_1"} loc("LlamaForCausalLM":0:0), %arg1: tensor<1xf32> {ttir.name = "input_1_add_4"} loc("LlamaForCausalLM":0:0), %arg2: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_14"} loc("LlamaForCausalLM":0:0), %arg3: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_24.1"} loc("LlamaForCausalLM":0:0), %arg4: tensor<1xf32> {ttir.name = "input_1_multiply_25"} loc("LlamaForCausalLM":0:0), %arg5: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_26.1"} loc("LlamaForCausalLM":0:0), %arg6: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_38.1"} loc("LlamaForCausalLM":0:0), %arg7: tensor<1xf32> {ttir.name = "input_1_multiply_39"} loc("LlamaForCausalLM":0:0), %arg8: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_40.1"} loc("LlamaForCausalLM":0:0), %arg9: tensor<1xf32> {ttir.name = "input_1_multiply_48"} loc("LlamaForCausalLM":0:0), %arg10: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_49"} loc("LlamaForCausalLM":0:0), %arg11: tensor<1xf32> {ttir.name = "input_1_add_70"} loc("LlamaForCausalLM":0:0), %arg12: tensor<1xf32> {ttir.name = "input_1_add_90"} loc("LlamaForCausalLM":0:0), %arg13: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_100"} loc("LlamaForCausalLM":0:0), %arg14: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_110.1"} loc("LlamaForCausalLM":0:0), %arg15: tensor<1xf32> {ttir.name = "input_1_multiply_111"} loc("LlamaForCausalLM":0:0), %arg16: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_112.1"} loc("LlamaForCausalLM":0:0), %arg17: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_124.1"} loc("LlamaForCausalLM":0:0), %arg18: tensor<1xf32> {ttir.name = "input_1_multiply_125"} loc("LlamaForCausalLM":0:0), %arg19: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_126.1"} loc("LlamaForCausalLM":0:0), %arg20: tensor<1xf32> {ttir.name = "input_1_multiply_134"} loc("LlamaForCausalLM":0:0), %arg21: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_135"} loc("LlamaForCausalLM":0:0), %arg22: tensor<1xf32> {ttir.name = "input_1_add_156"} loc("LlamaForCausalLM":0:0), %arg23: tensor<1xf32> {ttir.name = "input_1_add_176"} loc("LlamaForCausalLM":0:0), %arg24: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_186"} loc("LlamaForCausalLM":0:0), %arg25: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_196.1"} loc("LlamaForCausalLM":0:0), %arg26: tensor<1xf32> {ttir.name = "input_1_multiply_197"} loc("LlamaForCausalLM":0:0), %arg27: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_198.1"} loc("LlamaForCausalLM":0:0), %arg28: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_210.1"} loc("LlamaForCausalLM":0:0), %arg29: tensor<1xf32> {ttir.name = "input_1_multiply_211"} loc("LlamaForCausalLM":0:0), %arg30: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_212.1"} loc("LlamaForCausalLM":0:0), %arg31: tensor<1xf32> {ttir.name = "input_1_multiply_220"} loc("LlamaForCausalLM":0:0), %arg32: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_221"} loc("LlamaForCausalLM":0:0), %arg33: tensor<1xf32> {ttir.name = "input_1_add_242"} loc("LlamaForCausalLM":0:0), %arg34: tensor<1xf32> {ttir.name = "input_1_add_262"} loc("LlamaForCausalLM":0:0), %arg35: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_272"} loc("LlamaForCausalLM":0:0), %arg36: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_282.1"} loc("LlamaForCausalLM":0:0), %arg37: tensor<1xf32> {ttir.name = "input_1_multiply_283"} loc("LlamaForCausalLM":0:0), %arg38: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_284.1"} loc("LlamaForCausalLM":0:0), %arg39: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_296.1"} loc("LlamaForCausalLM":0:0), %arg40: tensor<1xf32> {ttir.name = "input_1_multiply_297"} loc("LlamaForCausalLM":0:0), %arg41: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_298.1"} loc("LlamaForCausalLM":0:0), %arg42: tensor<1xf32> {ttir.name = "input_1_multiply_306"} loc("LlamaForCausalLM":0:0), %arg43: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_307"} loc("LlamaForCausalLM":0:0), %arg44: tensor<1xf32> {ttir.name = "input_1_add_328"} loc("LlamaForCausalLM":0:0), %arg45: tensor<1xf32> {ttir.name = "input_1_add_348"} loc("LlamaForCausalLM":0:0), %arg46: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_358"} loc("LlamaForCausalLM":0:0), %arg47: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_368.1"} loc("LlamaForCausalLM":0:0), %arg48: tensor<1xf32> {ttir.name = "input_1_multiply_369"} loc("LlamaForCausalLM":0:0), %arg49: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_370.1"} loc("LlamaForCausalLM":0:0), %arg50: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_382.1"} loc("LlamaForCausalLM":0:0), %arg51: tensor<1xf32> {ttir.name = "input_1_multiply_383"} loc("LlamaForCausalLM":0:0), %arg52: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_384.1"} loc("LlamaForCausalLM":0:0), %arg53: tensor<1xf32> {ttir.name = "input_1_multiply_392"} loc("LlamaForCausalLM":0:0), %arg54: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_393"} loc("LlamaForCausalLM":0:0), %arg55: tensor<1xf32> {ttir.name = "input_1_add_414"} loc("LlamaForCausalLM":0:0), %arg56: tensor<1xf32> {ttir.name = "input_1_add_434"} loc("LlamaForCausalLM":0:0), %arg57: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_444"} loc("LlamaForCausalLM":0:0), %arg58: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_454.1"} loc("LlamaForCausalLM":0:0), %arg59: tensor<1xf32> {ttir.name = "input_1_multiply_455"} loc("LlamaForCausalLM":0:0), %arg60: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_456.1"} loc("LlamaForCausalLM":0:0), %arg61: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_468.1"} loc("LlamaForCausalLM":0:0), %arg62: tensor<1xf32> {ttir.name = "input_1_multiply_469"} loc("LlamaForCausalLM":0:0), %arg63: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_470.1"} loc("LlamaForCausalLM":0:0), %arg64: tensor<1xf32> {ttir.name = "input_1_multiply_478"} loc("LlamaForCausalLM":0:0), %arg65: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_479"} loc("LlamaForCausalLM":0:0), %arg66: tensor<1xf32> {ttir.name = "input_1_add_500"} loc("LlamaForCausalLM":0:0), %arg67: tensor<1xf32> {ttir.name = "input_1_add_520"} loc("LlamaForCausalLM":0:0), %arg68: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_530"} loc("LlamaForCausalLM":0:0), %arg69: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_540.1"} loc("LlamaForCausalLM":0:0), %arg70: tensor<1xf32> {ttir.name = "input_1_multiply_541"} loc("LlamaForCausalLM":0:0), %arg71: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_542.1"} loc("LlamaForCausalLM":0:0), %arg72: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_554.1"} loc("LlamaForCausalLM":0:0), %arg73: tensor<1xf32> {ttir.name = "input_1_multiply_555"} loc("LlamaForCausalLM":0:0), %arg74: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_556.1"} loc("LlamaForCausalLM":0:0), %arg75: tensor<1xf32> {ttir.name = "input_1_multiply_564"} loc("LlamaForCausalLM":0:0), %arg76: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_565"} loc("LlamaForCausalLM":0:0), %arg77: tensor<1xf32> {ttir.name = "input_1_add_586"} loc("LlamaForCausalLM":0:0), %arg78: tensor<1xf32> {ttir.name = "input_1_add_606"} loc("LlamaForCausalLM":0:0), %arg79: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_616"} loc("LlamaForCausalLM":0:0), %arg80: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_626.1"} loc("LlamaForCausalLM":0:0), %arg81: tensor<1xf32> {ttir.name = "input_1_multiply_627"} loc("LlamaForCausalLM":0:0), %arg82: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_628.1"} loc("LlamaForCausalLM":0:0), %arg83: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_640.1"} loc("LlamaForCausalLM":0:0), %arg84: tensor<1xf32> {ttir.name = "input_1_multiply_641"} loc("LlamaForCausalLM":0:0), %arg85: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_642.1"} loc("LlamaForCausalLM":0:0), %arg86: tensor<1xf32> {ttir.name = "input_1_multiply_650"} loc("LlamaForCausalLM":0:0), %arg87: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_651"} loc("LlamaForCausalLM":0:0), %arg88: tensor<1xf32> {ttir.name = "input_1_add_672"} loc("LlamaForCausalLM":0:0), %arg89: tensor<1xf32> {ttir.name = "input_1_add_692"} loc("LlamaForCausalLM":0:0), %arg90: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_702"} loc("LlamaForCausalLM":0:0), %arg91: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_712.1"} loc("LlamaForCausalLM":0:0), %arg92: tensor<1xf32> {ttir.name = "input_1_multiply_713"} loc("LlamaForCausalLM":0:0), %arg93: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_714.1"} loc("LlamaForCausalLM":0:0), %arg94: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_726.1"} loc("LlamaForCausalLM":0:0), %arg95: tensor<1xf32> {ttir.name = "input_1_multiply_727"} loc("LlamaForCausalLM":0:0), %arg96: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_728.1"} loc("LlamaForCausalLM":0:0), %arg97: tensor<1xf32> {ttir.name = "input_1_multiply_736"} loc("LlamaForCausalLM":0:0), %arg98: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_737"} loc("LlamaForCausalLM":0:0), %arg99: tensor<1xf32> {ttir.name = "input_1_add_758"} loc("LlamaForCausalLM":0:0), %arg100: tensor<1xf32> {ttir.name = "input_1_add_778"} loc("LlamaForCausalLM":0:0), %arg101: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_788"} loc("LlamaForCausalLM":0:0), %arg102: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_798.1"} loc("LlamaForCausalLM":0:0), %arg103: tensor<1xf32> {ttir.name = "input_1_multiply_799"} loc("LlamaForCausalLM":0:0), %arg104: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_800.1"} loc("LlamaForCausalLM":0:0), %arg105: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_812.1"} loc("LlamaForCausalLM":0:0), %arg106: tensor<1xf32> {ttir.name = "input_1_multiply_813"} loc("LlamaForCausalLM":0:0), %arg107: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_814.1"} loc("LlamaForCausalLM":0:0), %arg108: tensor<1xf32> {ttir.name = "input_1_multiply_822"} loc("LlamaForCausalLM":0:0), %arg109: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_823"} loc("LlamaForCausalLM":0:0), %arg110: tensor<1xf32> {ttir.name = "input_1_add_844"} loc("LlamaForCausalLM":0:0), %arg111: tensor<1xf32> {ttir.name = "input_1_add_864"} loc("LlamaForCausalLM":0:0), %arg112: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_874"} loc("LlamaForCausalLM":0:0), %arg113: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_884.1"} loc("LlamaForCausalLM":0:0), %arg114: tensor<1xf32> {ttir.name = "input_1_multiply_885"} loc("LlamaForCausalLM":0:0), %arg115: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_886.1"} loc("LlamaForCausalLM":0:0), %arg116: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_898.1"} loc("LlamaForCausalLM":0:0), %arg117: tensor<1xf32> {ttir.name = "input_1_multiply_899"} loc("LlamaForCausalLM":0:0), %arg118: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_900.1"} loc("LlamaForCausalLM":0:0), %arg119: tensor<1xf32> {ttir.name = "input_1_multiply_908"} loc("LlamaForCausalLM":0:0), %arg120: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_909"} loc("LlamaForCausalLM":0:0), %arg121: tensor<1xf32> {ttir.name = "input_1_add_930"} loc("LlamaForCausalLM":0:0), %arg122: tensor<1xf32> {ttir.name = "input_1_add_950"} loc("LlamaForCausalLM":0:0), %arg123: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_960"} loc("LlamaForCausalLM":0:0), %arg124: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_970.1"} loc("LlamaForCausalLM":0:0), %arg125: tensor<1xf32> {ttir.name = "input_1_multiply_971"} loc("LlamaForCausalLM":0:0), %arg126: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_972.1"} loc("LlamaForCausalLM":0:0), %arg127: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_984.1"} loc("LlamaForCausalLM":0:0), %arg128: tensor<1xf32> {ttir.name = "input_1_multiply_985"} loc("LlamaForCausalLM":0:0), %arg129: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_986.1"} loc("LlamaForCausalLM":0:0), %arg130: tensor<1xf32> {ttir.name = "input_1_multiply_994"} loc("LlamaForCausalLM":0:0), %arg131: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_995"} loc("LlamaForCausalLM":0:0), %arg132: tensor<1xf32> {ttir.name = "input_1_add_1016"} loc("LlamaForCausalLM":0:0), %arg133: tensor<1xf32> {ttir.name = "input_1_add_1036"} loc("LlamaForCausalLM":0:0), %arg134: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_1046"} loc("LlamaForCausalLM":0:0), %arg135: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1056.1"} loc("LlamaForCausalLM":0:0), %arg136: tensor<1xf32> {ttir.name = "input_1_multiply_1057"} loc("LlamaForCausalLM":0:0), %arg137: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1058.1"} loc("LlamaForCausalLM":0:0), %arg138: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1070.1"} loc("LlamaForCausalLM":0:0), %arg139: tensor<1xf32> {ttir.name = "input_1_multiply_1071"} loc("LlamaForCausalLM":0:0), %arg140: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1072.1"} loc("LlamaForCausalLM":0:0), %arg141: tensor<1xf32> {ttir.name = "input_1_multiply_1080"} loc("LlamaForCausalLM":0:0), %arg142: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_1081"} loc("LlamaForCausalLM":0:0), %arg143: tensor<1xf32> {ttir.name = "input_1_add_1102"} loc("LlamaForCausalLM":0:0), %arg144: tensor<1xf32> {ttir.name = "input_1_add_1122"} loc("LlamaForCausalLM":0:0), %arg145: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_1132"} loc("LlamaForCausalLM":0:0), %arg146: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1142.1"} loc("LlamaForCausalLM":0:0), %arg147: tensor<1xf32> {ttir.name = "input_1_multiply_1143"} loc("LlamaForCausalLM":0:0), %arg148: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1144.1"} loc("LlamaForCausalLM":0:0), %arg149: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1156.1"} loc("LlamaForCausalLM":0:0), %arg150: tensor<1xf32> {ttir.name = "input_1_multiply_1157"} loc("LlamaForCausalLM":0:0), %arg151: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1158.1"} loc("LlamaForCausalLM":0:0), %arg152: tensor<1xf32> {ttir.name = "input_1_multiply_1166"} loc("LlamaForCausalLM":0:0), %arg153: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_1167"} loc("LlamaForCausalLM":0:0), %arg154: tensor<1xf32> {ttir.name = "input_1_add_1188"} loc("LlamaForCausalLM":0:0), %arg155: tensor<1xf32> {ttir.name = "input_1_add_1208"} loc("LlamaForCausalLM":0:0), %arg156: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_1218"} loc("LlamaForCausalLM":0:0), %arg157: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1228.1"} loc("LlamaForCausalLM":0:0), %arg158: tensor<1xf32> {ttir.name = "input_1_multiply_1229"} loc("LlamaForCausalLM":0:0), %arg159: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1230.1"} loc("LlamaForCausalLM":0:0), %arg160: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1242.1"} loc("LlamaForCausalLM":0:0), %arg161: tensor<1xf32> {ttir.name = "input_1_multiply_1243"} loc("LlamaForCausalLM":0:0), %arg162: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1244.1"} loc("LlamaForCausalLM":0:0), %arg163: tensor<1xf32> {ttir.name = "input_1_multiply_1252"} loc("LlamaForCausalLM":0:0), %arg164: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_1253"} loc("LlamaForCausalLM":0:0), %arg165: tensor<1xf32> {ttir.name = "input_1_add_1274"} loc("LlamaForCausalLM":0:0), %arg166: tensor<1xf32> {ttir.name = "input_1_add_1294"} loc("LlamaForCausalLM":0:0), %arg167: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_1304"} loc("LlamaForCausalLM":0:0), %arg168: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1314.1"} loc("LlamaForCausalLM":0:0), %arg169: tensor<1xf32> {ttir.name = "input_1_multiply_1315"} loc("LlamaForCausalLM":0:0), %arg170: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1316.1"} loc("LlamaForCausalLM":0:0), %arg171: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1328.1"} loc("LlamaForCausalLM":0:0), %arg172: tensor<1xf32> {ttir.name = "input_1_multiply_1329"} loc("LlamaForCausalLM":0:0), %arg173: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1330.1"} loc("LlamaForCausalLM":0:0), %arg174: tensor<1xf32> {ttir.name = "input_1_multiply_1338"} loc("LlamaForCausalLM":0:0), %arg175: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_1339"} loc("LlamaForCausalLM":0:0), %arg176: tensor<1xf32> {ttir.name = "input_1_add_1360"} loc("LlamaForCausalLM":0:0), %arg177: tensor<1xf32> {ttir.name = "input_1_add_1380"} loc("LlamaForCausalLM":0:0), %arg178: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_1390"} loc("LlamaForCausalLM":0:0), %arg179: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1400.1"} loc("LlamaForCausalLM":0:0), %arg180: tensor<1xf32> {ttir.name = "input_1_multiply_1401"} loc("LlamaForCausalLM":0:0), %arg181: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1402.1"} loc("LlamaForCausalLM":0:0), %arg182: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1414.1"} loc("LlamaForCausalLM":0:0), %arg183: tensor<1xf32> {ttir.name = "input_1_multiply_1415"} loc("LlamaForCausalLM":0:0), %arg184: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1416.1"} loc("LlamaForCausalLM":0:0), %arg185: tensor<1xf32> {ttir.name = "input_1_multiply_1424"} loc("LlamaForCausalLM":0:0), %arg186: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_1425"} loc("LlamaForCausalLM":0:0), %arg187: tensor<1xf32> {ttir.name = "input_1_add_1446"} loc("LlamaForCausalLM":0:0), %arg188: tensor<1xf32> {ttir.name = "input_1_add_1466"} loc("LlamaForCausalLM":0:0), %arg189: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_1476"} loc("LlamaForCausalLM":0:0), %arg190: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1486.1"} loc("LlamaForCausalLM":0:0), %arg191: tensor<1xf32> {ttir.name = "input_1_multiply_1487"} loc("LlamaForCausalLM":0:0), %arg192: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1488.1"} loc("LlamaForCausalLM":0:0), %arg193: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1500.1"} loc("LlamaForCausalLM":0:0), %arg194: tensor<1xf32> {ttir.name = "input_1_multiply_1501"} loc("LlamaForCausalLM":0:0), %arg195: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1502.1"} loc("LlamaForCausalLM":0:0), %arg196: tensor<1xf32> {ttir.name = "input_1_multiply_1510"} loc("LlamaForCausalLM":0:0), %arg197: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_1511"} loc("LlamaForCausalLM":0:0), %arg198: tensor<1xf32> {ttir.name = "input_1_add_1532"} loc("LlamaForCausalLM":0:0), %arg199: tensor<1xf32> {ttir.name = "input_1_add_1552"} loc("LlamaForCausalLM":0:0), %arg200: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_1562"} loc("LlamaForCausalLM":0:0), %arg201: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1572.1"} loc("LlamaForCausalLM":0:0), %arg202: tensor<1xf32> {ttir.name = "input_1_multiply_1573"} loc("LlamaForCausalLM":0:0), %arg203: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1574.1"} loc("LlamaForCausalLM":0:0), %arg204: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1586.1"} loc("LlamaForCausalLM":0:0), %arg205: tensor<1xf32> {ttir.name = "input_1_multiply_1587"} loc("LlamaForCausalLM":0:0), %arg206: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1588.1"} loc("LlamaForCausalLM":0:0), %arg207: tensor<1xf32> {ttir.name = "input_1_multiply_1596"} loc("LlamaForCausalLM":0:0), %arg208: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_1597"} loc("LlamaForCausalLM":0:0), %arg209: tensor<1xf32> {ttir.name = "input_1_add_1618"} loc("LlamaForCausalLM":0:0), %arg210: tensor<1xf32> {ttir.name = "input_1_add_1638"} loc("LlamaForCausalLM":0:0), %arg211: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_1648"} loc("LlamaForCausalLM":0:0), %arg212: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1658.1"} loc("LlamaForCausalLM":0:0), %arg213: tensor<1xf32> {ttir.name = "input_1_multiply_1659"} loc("LlamaForCausalLM":0:0), %arg214: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1660.1"} loc("LlamaForCausalLM":0:0), %arg215: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1672.1"} loc("LlamaForCausalLM":0:0), %arg216: tensor<1xf32> {ttir.name = "input_1_multiply_1673"} loc("LlamaForCausalLM":0:0), %arg217: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1674.1"} loc("LlamaForCausalLM":0:0), %arg218: tensor<1xf32> {ttir.name = "input_1_multiply_1682"} loc("LlamaForCausalLM":0:0), %arg219: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_1683"} loc("LlamaForCausalLM":0:0), %arg220: tensor<1xf32> {ttir.name = "input_1_add_1704"} loc("LlamaForCausalLM":0:0), %arg221: tensor<1xf32> {ttir.name = "input_1_add_1724"} loc("LlamaForCausalLM":0:0), %arg222: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_1734"} loc("LlamaForCausalLM":0:0), %arg223: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1744.1"} loc("LlamaForCausalLM":0:0), %arg224: tensor<1xf32> {ttir.name = "input_1_multiply_1745"} loc("LlamaForCausalLM":0:0), %arg225: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1746.1"} loc("LlamaForCausalLM":0:0), %arg226: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1758.1"} loc("LlamaForCausalLM":0:0), %arg227: tensor<1xf32> {ttir.name = "input_1_multiply_1759"} loc("LlamaForCausalLM":0:0), %arg228: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1760.1"} loc("LlamaForCausalLM":0:0), %arg229: tensor<1xf32> {ttir.name = "input_1_multiply_1768"} loc("LlamaForCausalLM":0:0), %arg230: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_1769"} loc("LlamaForCausalLM":0:0), %arg231: tensor<1xf32> {ttir.name = "input_1_add_1790"} loc("LlamaForCausalLM":0:0), %arg232: tensor<1xf32> {ttir.name = "input_1_add_1810"} loc("LlamaForCausalLM":0:0), %arg233: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_1820"} loc("LlamaForCausalLM":0:0), %arg234: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1830.1"} loc("LlamaForCausalLM":0:0), %arg235: tensor<1xf32> {ttir.name = "input_1_multiply_1831"} loc("LlamaForCausalLM":0:0), %arg236: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1832.1"} loc("LlamaForCausalLM":0:0), %arg237: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1844.1"} loc("LlamaForCausalLM":0:0), %arg238: tensor<1xf32> {ttir.name = "input_1_multiply_1845"} loc("LlamaForCausalLM":0:0), %arg239: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1846.1"} loc("LlamaForCausalLM":0:0), %arg240: tensor<1xf32> {ttir.name = "input_1_multiply_1854"} loc("LlamaForCausalLM":0:0), %arg241: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_1855"} loc("LlamaForCausalLM":0:0), %arg242: tensor<1xf32> {ttir.name = "input_1_add_1876"} loc("LlamaForCausalLM":0:0), %arg243: tensor<1xf32> {ttir.name = "input_1_add_1896"} loc("LlamaForCausalLM":0:0), %arg244: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_1906"} loc("LlamaForCausalLM":0:0), %arg245: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1916.1"} loc("LlamaForCausalLM":0:0), %arg246: tensor<1xf32> {ttir.name = "input_1_multiply_1917"} loc("LlamaForCausalLM":0:0), %arg247: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1918.1"} loc("LlamaForCausalLM":0:0), %arg248: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1930.1"} loc("LlamaForCausalLM":0:0), %arg249: tensor<1xf32> {ttir.name = "input_1_multiply_1931"} loc("LlamaForCausalLM":0:0), %arg250: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1932.1"} loc("LlamaForCausalLM":0:0), %arg251: tensor<1xf32> {ttir.name = "input_1_multiply_1940"} loc("LlamaForCausalLM":0:0), %arg252: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_1941"} loc("LlamaForCausalLM":0:0), %arg253: tensor<1xf32> {ttir.name = "input_1_add_1962"} loc("LlamaForCausalLM":0:0), %arg254: tensor<1xf32> {ttir.name = "input_1_add_1982"} loc("LlamaForCausalLM":0:0), %arg255: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_1992"} loc("LlamaForCausalLM":0:0), %arg256: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_2002.1"} loc("LlamaForCausalLM":0:0), %arg257: tensor<1xf32> {ttir.name = "input_1_multiply_2003"} loc("LlamaForCausalLM":0:0), %arg258: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_2004.1"} loc("LlamaForCausalLM":0:0), %arg259: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_2016.1"} loc("LlamaForCausalLM":0:0), %arg260: tensor<1xf32> {ttir.name = "input_1_multiply_2017"} loc("LlamaForCausalLM":0:0), %arg261: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_2018.1"} loc("LlamaForCausalLM":0:0), %arg262: tensor<1xf32> {ttir.name = "input_1_multiply_2026"} loc("LlamaForCausalLM":0:0), %arg263: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_2027"} loc("LlamaForCausalLM":0:0), %arg264: tensor<1xf32> {ttir.name = "input_1_add_2048"} loc("LlamaForCausalLM":0:0), %arg265: tensor<1xf32> {ttir.name = "input_1_add_2068"} loc("LlamaForCausalLM":0:0), %arg266: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_2078"} loc("LlamaForCausalLM":0:0), %arg267: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_2088.1"} loc("LlamaForCausalLM":0:0), %arg268: tensor<1xf32> {ttir.name = "input_1_multiply_2089"} loc("LlamaForCausalLM":0:0), %arg269: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_2090.1"} loc("LlamaForCausalLM":0:0), %arg270: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_2102.1"} loc("LlamaForCausalLM":0:0), %arg271: tensor<1xf32> {ttir.name = "input_1_multiply_2103"} loc("LlamaForCausalLM":0:0), %arg272: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_2104.1"} loc("LlamaForCausalLM":0:0), %arg273: tensor<1xf32> {ttir.name = "input_1_multiply_2112"} loc("LlamaForCausalLM":0:0), %arg274: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_2113"} loc("LlamaForCausalLM":0:0), %arg275: tensor<1xf32> {ttir.name = "input_1_add_2134"} loc("LlamaForCausalLM":0:0), %arg276: tensor<1xf32> {ttir.name = "input_1_add_2154"} loc("LlamaForCausalLM":0:0), %arg277: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_2164"} loc("LlamaForCausalLM":0:0), %arg278: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_2174.1"} loc("LlamaForCausalLM":0:0), %arg279: tensor<1xf32> {ttir.name = "input_1_multiply_2175"} loc("LlamaForCausalLM":0:0), %arg280: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_2176.1"} loc("LlamaForCausalLM":0:0), %arg281: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_2188.1"} loc("LlamaForCausalLM":0:0), %arg282: tensor<1xf32> {ttir.name = "input_1_multiply_2189"} loc("LlamaForCausalLM":0:0), %arg283: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_2190.1"} loc("LlamaForCausalLM":0:0), %arg284: tensor<1xf32> {ttir.name = "input_1_multiply_2198"} loc("LlamaForCausalLM":0:0), %arg285: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_2199"} loc("LlamaForCausalLM":0:0), %arg286: tensor<1xf32> {ttir.name = "input_1_add_2220"} loc("LlamaForCausalLM":0:0), %arg287: tensor<1xf32> {ttir.name = "input_1_add_2240"} loc("LlamaForCausalLM":0:0), %arg288: tensor<3200xf32> {ttir.name = "model.norm.weight"} loc("LlamaForCausalLM":0:0), %arg289: tensor<32000x3200xf32> {ttir.name = "model.embed_tokens.weight"} loc("LlamaForCausalLM":0:0), %arg290: tensor<3200xf32> {ttir.name = "model.layers.0.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg291: tensor<3200x3200xf32> {ttir.name = "model.layers.0.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg292: tensor<3200x3200xf32> {ttir.name = "model.layers.0.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg293: tensor<3200x3200xf32> {ttir.name = "model.layers.0.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg294: tensor<3200x3200xf32> {ttir.name = "model.layers.0.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg295: tensor<3200xf32> {ttir.name = "model.layers.0.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg296: tensor<3200x8640xf32> {ttir.name = "model.layers.0.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg297: tensor<3200x8640xf32> {ttir.name = "model.layers.0.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg298: tensor<8640x3200xf32> {ttir.name = "model.layers.0.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg299: tensor<3200xf32> {ttir.name = "model.layers.1.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg300: tensor<3200x3200xf32> {ttir.name = "model.layers.1.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg301: tensor<3200x3200xf32> {ttir.name = "model.layers.1.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg302: tensor<3200x3200xf32> {ttir.name = "model.layers.1.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg303: tensor<3200x3200xf32> {ttir.name = "model.layers.1.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg304: tensor<3200xf32> {ttir.name = "model.layers.1.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg305: tensor<3200x8640xf32> {ttir.name = "model.layers.1.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg306: tensor<3200x8640xf32> {ttir.name = "model.layers.1.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg307: tensor<8640x3200xf32> {ttir.name = "model.layers.1.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg308: tensor<3200xf32> {ttir.name = "model.layers.2.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg309: tensor<3200x3200xf32> {ttir.name = "model.layers.2.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg310: tensor<3200x3200xf32> {ttir.name = "model.layers.2.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg311: tensor<3200x3200xf32> {ttir.name = "model.layers.2.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg312: tensor<3200x3200xf32> {ttir.name = "model.layers.2.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg313: tensor<3200xf32> {ttir.name = "model.layers.2.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg314: tensor<3200x8640xf32> {ttir.name = "model.layers.2.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg315: tensor<3200x8640xf32> {ttir.name = "model.layers.2.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg316: tensor<8640x3200xf32> {ttir.name = "model.layers.2.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg317: tensor<3200xf32> {ttir.name = "model.layers.3.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg318: tensor<3200x3200xf32> {ttir.name = "model.layers.3.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg319: tensor<3200x3200xf32> {ttir.name = "model.layers.3.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg320: tensor<3200x3200xf32> {ttir.name = "model.layers.3.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg321: tensor<3200x3200xf32> {ttir.name = "model.layers.3.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg322: tensor<3200xf32> {ttir.name = "model.layers.3.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg323: tensor<3200x8640xf32> {ttir.name = "model.layers.3.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg324: tensor<3200x8640xf32> {ttir.name = "model.layers.3.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg325: tensor<8640x3200xf32> {ttir.name = "model.layers.3.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg326: tensor<3200xf32> {ttir.name = "model.layers.4.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg327: tensor<3200x3200xf32> {ttir.name = "model.layers.4.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg328: tensor<3200x3200xf32> {ttir.name = "model.layers.4.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg329: tensor<3200x3200xf32> {ttir.name = "model.layers.4.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg330: tensor<3200x3200xf32> {ttir.name = "model.layers.4.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg331: tensor<3200xf32> {ttir.name = "model.layers.4.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg332: tensor<3200x8640xf32> {ttir.name = "model.layers.4.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg333: tensor<3200x8640xf32> {ttir.name = "model.layers.4.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg334: tensor<8640x3200xf32> {ttir.name = "model.layers.4.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg335: tensor<3200xf32> {ttir.name = "model.layers.5.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg336: tensor<3200x3200xf32> {ttir.name = "model.layers.5.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg337: tensor<3200x3200xf32> {ttir.name = "model.layers.5.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg338: tensor<3200x3200xf32> {ttir.name = "model.layers.5.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg339: tensor<3200x3200xf32> {ttir.name = "model.layers.5.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg340: tensor<3200xf32> {ttir.name = "model.layers.5.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg341: tensor<3200x8640xf32> {ttir.name = "model.layers.5.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg342: tensor<3200x8640xf32> {ttir.name = "model.layers.5.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg343: tensor<8640x3200xf32> {ttir.name = "model.layers.5.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg344: tensor<3200xf32> {ttir.name = "model.layers.6.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg345: tensor<3200x3200xf32> {ttir.name = "model.layers.6.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg346: tensor<3200x3200xf32> {ttir.name = "model.layers.6.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg347: tensor<3200x3200xf32> {ttir.name = "model.layers.6.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg348: tensor<3200x3200xf32> {ttir.name = "model.layers.6.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg349: tensor<3200xf32> {ttir.name = "model.layers.6.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg350: tensor<3200x8640xf32> {ttir.name = "model.layers.6.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg351: tensor<3200x8640xf32> {ttir.name = "model.layers.6.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg352: tensor<8640x3200xf32> {ttir.name = "model.layers.6.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg353: tensor<3200xf32> {ttir.name = "model.layers.7.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg354: tensor<3200x3200xf32> {ttir.name = "model.layers.7.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg355: tensor<3200x3200xf32> {ttir.name = "model.layers.7.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg356: tensor<3200x3200xf32> {ttir.name = "model.layers.7.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg357: tensor<3200x3200xf32> {ttir.name = "model.layers.7.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg358: tensor<3200xf32> {ttir.name = "model.layers.7.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg359: tensor<3200x8640xf32> {ttir.name = "model.layers.7.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg360: tensor<3200x8640xf32> {ttir.name = "model.layers.7.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg361: tensor<8640x3200xf32> {ttir.name = "model.layers.7.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg362: tensor<3200xf32> {ttir.name = "model.layers.8.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg363: tensor<3200x3200xf32> {ttir.name = "model.layers.8.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg364: tensor<3200x3200xf32> {ttir.name = "model.layers.8.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg365: tensor<3200x3200xf32> {ttir.name = "model.layers.8.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg366: tensor<3200x3200xf32> {ttir.name = "model.layers.8.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg367: tensor<3200xf32> {ttir.name = "model.layers.8.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg368: tensor<3200x8640xf32> {ttir.name = "model.layers.8.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg369: tensor<3200x8640xf32> {ttir.name = "model.layers.8.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg370: tensor<8640x3200xf32> {ttir.name = "model.layers.8.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg371: tensor<3200xf32> {ttir.name = "model.layers.9.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg372: tensor<3200x3200xf32> {ttir.name = "model.layers.9.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg373: tensor<3200x3200xf32> {ttir.name = "model.layers.9.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg374: tensor<3200x3200xf32> {ttir.name = "model.layers.9.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg375: tensor<3200x3200xf32> {ttir.name = "model.layers.9.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg376: tensor<3200xf32> {ttir.name = "model.layers.9.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg377: tensor<3200x8640xf32> {ttir.name = "model.layers.9.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg378: tensor<3200x8640xf32> {ttir.name = "model.layers.9.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg379: tensor<8640x3200xf32> {ttir.name = "model.layers.9.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg380: tensor<3200xf32> {ttir.name = "model.layers.10.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg381: tensor<3200x3200xf32> {ttir.name = "model.layers.10.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg382: tensor<3200x3200xf32> {ttir.name = "model.layers.10.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg383: tensor<3200x3200xf32> {ttir.name = "model.layers.10.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg384: tensor<3200x3200xf32> {ttir.name = "model.layers.10.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg385: tensor<3200xf32> {ttir.name = "model.layers.10.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg386: tensor<3200x8640xf32> {ttir.name = "model.layers.10.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg387: tensor<3200x8640xf32> {ttir.name = "model.layers.10.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg388: tensor<8640x3200xf32> {ttir.name = "model.layers.10.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg389: tensor<3200xf32> {ttir.name = "model.layers.11.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg390: tensor<3200x3200xf32> {ttir.name = "model.layers.11.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg391: tensor<3200x3200xf32> {ttir.name = "model.layers.11.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg392: tensor<3200x3200xf32> {ttir.name = "model.layers.11.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg393: tensor<3200x3200xf32> {ttir.name = "model.layers.11.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg394: tensor<3200xf32> {ttir.name = "model.layers.11.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg395: tensor<3200x8640xf32> {ttir.name = "model.layers.11.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg396: tensor<3200x8640xf32> {ttir.name = "model.layers.11.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg397: tensor<8640x3200xf32> {ttir.name = "model.layers.11.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg398: tensor<3200xf32> {ttir.name = "model.layers.12.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg399: tensor<3200x3200xf32> {ttir.name = "model.layers.12.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg400: tensor<3200x3200xf32> {ttir.name = "model.layers.12.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg401: tensor<3200x3200xf32> {ttir.name = "model.layers.12.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg402: tensor<3200x3200xf32> {ttir.name = "model.layers.12.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg403: tensor<3200xf32> {ttir.name = "model.layers.12.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg404: tensor<3200x8640xf32> {ttir.name = "model.layers.12.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg405: tensor<3200x8640xf32> {ttir.name = "model.layers.12.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg406: tensor<8640x3200xf32> {ttir.name = "model.layers.12.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg407: tensor<3200xf32> {ttir.name = "model.layers.13.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg408: tensor<3200x3200xf32> {ttir.name = "model.layers.13.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg409: tensor<3200x3200xf32> {ttir.name = "model.layers.13.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg410: tensor<3200x3200xf32> {ttir.name = "model.layers.13.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg411: tensor<3200x3200xf32> {ttir.name = "model.layers.13.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg412: tensor<3200xf32> {ttir.name = "model.layers.13.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg413: tensor<3200x8640xf32> {ttir.name = "model.layers.13.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg414: tensor<3200x8640xf32> {ttir.name = "model.layers.13.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg415: tensor<8640x3200xf32> {ttir.name = "model.layers.13.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg416: tensor<3200xf32> {ttir.name = "model.layers.14.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg417: tensor<3200x3200xf32> {ttir.name = "model.layers.14.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg418: tensor<3200x3200xf32> {ttir.name = "model.layers.14.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg419: tensor<3200x3200xf32> {ttir.name = "model.layers.14.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg420: tensor<3200x3200xf32> {ttir.name = "model.layers.14.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg421: tensor<3200xf32> {ttir.name = "model.layers.14.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg422: tensor<3200x8640xf32> {ttir.name = "model.layers.14.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg423: tensor<3200x8640xf32> {ttir.name = "model.layers.14.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg424: tensor<8640x3200xf32> {ttir.name = "model.layers.14.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg425: tensor<3200xf32> {ttir.name = "model.layers.15.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg426: tensor<3200x3200xf32> {ttir.name = "model.layers.15.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg427: tensor<3200x3200xf32> {ttir.name = "model.layers.15.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg428: tensor<3200x3200xf32> {ttir.name = "model.layers.15.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg429: tensor<3200x3200xf32> {ttir.name = "model.layers.15.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg430: tensor<3200xf32> {ttir.name = "model.layers.15.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg431: tensor<3200x8640xf32> {ttir.name = "model.layers.15.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg432: tensor<3200x8640xf32> {ttir.name = "model.layers.15.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg433: tensor<8640x3200xf32> {ttir.name = "model.layers.15.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg434: tensor<3200xf32> {ttir.name = "model.layers.16.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg435: tensor<3200x3200xf32> {ttir.name = "model.layers.16.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg436: tensor<3200x3200xf32> {ttir.name = "model.layers.16.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg437: tensor<3200x3200xf32> {ttir.name = "model.layers.16.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg438: tensor<3200x3200xf32> {ttir.name = "model.layers.16.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg439: tensor<3200xf32> {ttir.name = "model.layers.16.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg440: tensor<3200x8640xf32> {ttir.name = "model.layers.16.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg441: tensor<3200x8640xf32> {ttir.name = "model.layers.16.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg442: tensor<8640x3200xf32> {ttir.name = "model.layers.16.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg443: tensor<3200xf32> {ttir.name = "model.layers.17.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg444: tensor<3200x3200xf32> {ttir.name = "model.layers.17.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg445: tensor<3200x3200xf32> {ttir.name = "model.layers.17.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg446: tensor<3200x3200xf32> {ttir.name = "model.layers.17.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg447: tensor<3200x3200xf32> {ttir.name = "model.layers.17.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg448: tensor<3200xf32> {ttir.name = "model.layers.17.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg449: tensor<3200x8640xf32> {ttir.name = "model.layers.17.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg450: tensor<3200x8640xf32> {ttir.name = "model.layers.17.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg451: tensor<8640x3200xf32> {ttir.name = "model.layers.17.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg452: tensor<3200xf32> {ttir.name = "model.layers.18.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg453: tensor<3200x3200xf32> {ttir.name = "model.layers.18.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg454: tensor<3200x3200xf32> {ttir.name = "model.layers.18.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg455: tensor<3200x3200xf32> {ttir.name = "model.layers.18.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg456: tensor<3200x3200xf32> {ttir.name = "model.layers.18.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg457: tensor<3200xf32> {ttir.name = "model.layers.18.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg458: tensor<3200x8640xf32> {ttir.name = "model.layers.18.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg459: tensor<3200x8640xf32> {ttir.name = "model.layers.18.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg460: tensor<8640x3200xf32> {ttir.name = "model.layers.18.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg461: tensor<3200xf32> {ttir.name = "model.layers.19.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg462: tensor<3200x3200xf32> {ttir.name = "model.layers.19.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg463: tensor<3200x3200xf32> {ttir.name = "model.layers.19.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg464: tensor<3200x3200xf32> {ttir.name = "model.layers.19.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg465: tensor<3200x3200xf32> {ttir.name = "model.layers.19.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg466: tensor<3200xf32> {ttir.name = "model.layers.19.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg467: tensor<3200x8640xf32> {ttir.name = "model.layers.19.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg468: tensor<3200x8640xf32> {ttir.name = "model.layers.19.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg469: tensor<8640x3200xf32> {ttir.name = "model.layers.19.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg470: tensor<3200xf32> {ttir.name = "model.layers.20.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg471: tensor<3200x3200xf32> {ttir.name = "model.layers.20.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg472: tensor<3200x3200xf32> {ttir.name = "model.layers.20.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg473: tensor<3200x3200xf32> {ttir.name = "model.layers.20.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg474: tensor<3200x3200xf32> {ttir.name = "model.layers.20.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg475: tensor<3200xf32> {ttir.name = "model.layers.20.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg476: tensor<3200x8640xf32> {ttir.name = "model.layers.20.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg477: tensor<3200x8640xf32> {ttir.name = "model.layers.20.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg478: tensor<8640x3200xf32> {ttir.name = "model.layers.20.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg479: tensor<3200xf32> {ttir.name = "model.layers.21.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg480: tensor<3200x3200xf32> {ttir.name = "model.layers.21.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg481: tensor<3200x3200xf32> {ttir.name = "model.layers.21.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg482: tensor<3200x3200xf32> {ttir.name = "model.layers.21.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg483: tensor<3200x3200xf32> {ttir.name = "model.layers.21.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg484: tensor<3200xf32> {ttir.name = "model.layers.21.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg485: tensor<3200x8640xf32> {ttir.name = "model.layers.21.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg486: tensor<3200x8640xf32> {ttir.name = "model.layers.21.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg487: tensor<8640x3200xf32> {ttir.name = "model.layers.21.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg488: tensor<3200xf32> {ttir.name = "model.layers.22.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg489: tensor<3200x3200xf32> {ttir.name = "model.layers.22.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg490: tensor<3200x3200xf32> {ttir.name = "model.layers.22.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg491: tensor<3200x3200xf32> {ttir.name = "model.layers.22.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg492: tensor<3200x3200xf32> {ttir.name = "model.layers.22.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg493: tensor<3200xf32> {ttir.name = "model.layers.22.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg494: tensor<3200x8640xf32> {ttir.name = "model.layers.22.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg495: tensor<3200x8640xf32> {ttir.name = "model.layers.22.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg496: tensor<8640x3200xf32> {ttir.name = "model.layers.22.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg497: tensor<3200xf32> {ttir.name = "model.layers.23.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg498: tensor<3200x3200xf32> {ttir.name = "model.layers.23.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg499: tensor<3200x3200xf32> {ttir.name = "model.layers.23.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg500: tensor<3200x3200xf32> {ttir.name = "model.layers.23.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg501: tensor<3200x3200xf32> {ttir.name = "model.layers.23.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg502: tensor<3200xf32> {ttir.name = "model.layers.23.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg503: tensor<3200x8640xf32> {ttir.name = "model.layers.23.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg504: tensor<3200x8640xf32> {ttir.name = "model.layers.23.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg505: tensor<8640x3200xf32> {ttir.name = "model.layers.23.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg506: tensor<3200xf32> {ttir.name = "model.layers.24.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg507: tensor<3200x3200xf32> {ttir.name = "model.layers.24.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg508: tensor<3200x3200xf32> {ttir.name = "model.layers.24.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg509: tensor<3200x3200xf32> {ttir.name = "model.layers.24.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg510: tensor<3200x3200xf32> {ttir.name = "model.layers.24.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg511: tensor<3200xf32> {ttir.name = "model.layers.24.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg512: tensor<3200x8640xf32> {ttir.name = "model.layers.24.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg513: tensor<3200x8640xf32> {ttir.name = "model.layers.24.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg514: tensor<8640x3200xf32> {ttir.name = "model.layers.24.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg515: tensor<3200xf32> {ttir.name = "model.layers.25.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg516: tensor<3200x3200xf32> {ttir.name = "model.layers.25.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg517: tensor<3200x3200xf32> {ttir.name = "model.layers.25.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg518: tensor<3200x3200xf32> {ttir.name = "model.layers.25.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg519: tensor<3200x3200xf32> {ttir.name = "model.layers.25.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg520: tensor<3200xf32> {ttir.name = "model.layers.25.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg521: tensor<3200x8640xf32> {ttir.name = "model.layers.25.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg522: tensor<3200x8640xf32> {ttir.name = "model.layers.25.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg523: tensor<8640x3200xf32> {ttir.name = "model.layers.25.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg524: tensor<3200x32000xf32> {ttir.name = "lm_head.weight"} loc("LlamaForCausalLM":0:0)) -> (tensor<1x12x3200xf32> {ttir.name = "LlamaForCausalLM.output_matmul_2246"}) {
     %0 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2091)
-    %1 = "ttir.embedding"(%arg0, %arg289, %0) <{operand_constraints = [#any_device, #any_device, #any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12xi32>, tensor<32000x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2091)
+    %1 = "ttir.embedding"(%arg0, %arg289, %0) : (tensor<1x12xi32>, tensor<32000x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2091)
     %2 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2092)
-    %3 = "ttir.multiply"(%1, %1, %2) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2092)
+    %3 = "ttir.multiply"(%1, %1, %2) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2092)
     %4 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2093)
-    %5 = "ttir.mean"(%3, %4) <{dim_arg = [-1 : i32], keep_dim = true, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2093)
+    %5 = "ttir.mean"(%3, %4) <{dim_arg = [-1 : i32], keep_dim = true}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2093)
     %6 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2094)
-    %7 = "ttir.add"(%5, %arg1, %6) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2094)
+    %7 = "ttir.add"(%5, %arg1, %6) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x12x1xf32>, tensor<1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2094)
     %8 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2095)
-    %9 = "ttir.sqrt"(%7, %8) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2095)
+    %9 = "ttir.sqrt"(%7, %8) <{operandSegmentSizes = array<i32: 1, 1>}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2095)
     %10 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2096)
-    %11 = "ttir.reciprocal"(%9, %10) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2096)
+    %11 = "ttir.reciprocal"(%9, %10) <{operandSegmentSizes = array<i32: 1, 1>}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2096)
     %12 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2097)
-    %13 = "ttir.multiply"(%1, %11, %12) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2097)
+    %13 = "ttir.multiply"(%1, %11, %12) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2097)
     %14 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2098)
-    %15 = "ttir.multiply"(%arg290, %13, %14) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2098)
+    %15 = "ttir.multiply"(%arg290, %13, %14) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2098)
     %16 = tensor.empty() : tensor<12x3200xf32> loc(#loc2099)
-    %17 = "ttir.squeeze"(%15, %16) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2099)
+    %17 = "ttir.squeeze"(%15, %16) <{dim = 0 : si32}> : (tensor<1x12x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2099)
     %18 = tensor.empty() : tensor<12x3200xf32> loc(#loc2100)
-    %19 = "ttir.matmul"(%17, %arg291, %18) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2100)
+    %19 = "ttir.matmul"(%17, %arg291, %18) : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2100)
     %20 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc2101)
-    %21 = "ttir.reshape"(%19, %20) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc2101)
+    %21 = "ttir.reshape"(%19, %20) <{shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc2101)
     %22 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2102)
-    %23 = "ttir.transpose"(%21, %22) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2102)
+    %23 = "ttir.transpose"(%21, %22) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2102)
     %24 = tensor.empty() : tensor<1x12x100xf32> loc(#loc2103)
-    %25 = "ttir.concat"(%arg2, %arg2, %24) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x50xf32>, tensor<1x12x50xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc2103)
+    %25 = "ttir.concat"(%arg2, %arg2, %24) <{dim = -1 : si32}> : (tensor<1x12x50xf32>, tensor<1x12x50xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc2103)
     %26 = tensor.empty() : tensor<1x12x100xf32> loc(#loc2104)
-    %27 = "ttir.sin"(%25, %26) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc2104)
+    %27 = "ttir.sin"(%25, %26) <{operandSegmentSizes = array<i32: 1, 1>}> : (tensor<1x12x100xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc2104)
     %28 = tensor.empty() : tensor<1x1x12x100xf32> loc(#loc2105)
-    %29 = "ttir.unsqueeze"(%27, %28) <{dim = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x1x12x100xf32>) -> tensor<1x1x12x100xf32> loc(#loc2105)
+    %29 = "ttir.unsqueeze"(%27, %28) <{dim = 1 : si32}> : (tensor<1x12x100xf32>, tensor<1x1x12x100xf32>) -> tensor<1x1x12x100xf32> loc(#loc2105)
     %30 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2106)
-    %31 = "ttir.multiply"(%23, %29, %30) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2106)
+    %31 = "ttir.multiply"(%23, %29, %30) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2106)
     %32 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc2107)
-    %33 = "ttir.transpose"(%23, %32) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2107)
+    %33 = "ttir.transpose"(%23, %32) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2107)
     %34 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc2108)
-    %35 = "ttir.matmul"(%arg3, %33, %34) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc2108)
+    %35 = "ttir.matmul"(%arg3, %33, %34) : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc2108)
     %36 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2109)
-    %37 = "ttir.transpose"(%35, %36) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2109)
+    %37 = "ttir.transpose"(%35, %36) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2109)
     %38 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2110)
-    %39 = "ttir.multiply"(%37, %arg4, %38) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2110)
+    %39 = "ttir.multiply"(%37, %arg4, %38) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x32x12x50xf32>, tensor<1xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2110)
     %40 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc2111)
-    %41 = "ttir.transpose"(%23, %40) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2111)
+    %41 = "ttir.transpose"(%23, %40) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2111)
     %42 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc2112)
-    %43 = "ttir.matmul"(%arg5, %41, %42) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc2112)
+    %43 = "ttir.matmul"(%arg5, %41, %42) : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc2112)
     %44 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2113)
-    %45 = "ttir.transpose"(%43, %44) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2113)
+    %45 = "ttir.transpose"(%43, %44) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2113)
     %46 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2114)
-    %47 = "ttir.concat"(%39, %45, %46) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1x32x12x50xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2114)
+    %47 = "ttir.concat"(%39, %45, %46) <{dim = -1 : si32}> : (tensor<1x32x12x50xf32>, tensor<1x32x12x50xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2114)
     %48 = tensor.empty() : tensor<1x12x100xf32> loc(#loc2115)
-    %49 = "ttir.cos"(%25, %48) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc2115)
+    %49 = "ttir.cos"(%25, %48) <{operandSegmentSizes = array<i32: 1, 1>}> : (tensor<1x12x100xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc2115)
     %50 = tensor.empty() : tensor<1x1x12x100xf32> loc(#loc2116)
-    %51 = "ttir.unsqueeze"(%49, %50) <{dim = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x1x12x100xf32>) -> tensor<1x1x12x100xf32> loc(#loc2116)
+    %51 = "ttir.unsqueeze"(%49, %50) <{dim = 1 : si32}> : (tensor<1x12x100xf32>, tensor<1x1x12x100xf32>) -> tensor<1x1x12x100xf32> loc(#loc2116)
     %52 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2117)
-    %53 = "ttir.multiply"(%47, %51, %52) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2117)
+    %53 = "ttir.multiply"(%47, %51, %52) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2117)
     %54 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2118)
-    %55 = "ttir.add"(%31, %53, %54) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2118)
+    %55 = "ttir.add"(%31, %53, %54) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2118)
     %56 = tensor.empty() : tensor<32x12x100xf32> loc(#loc2119)
-    %57 = "ttir.squeeze"(%55, %56) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc2119)
+    %57 = "ttir.squeeze"(%55, %56) <{dim = 0 : si32}> : (tensor<1x32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc2119)
     %58 = tensor.empty() : tensor<12x3200xf32> loc(#loc2120)
-    %59 = "ttir.matmul"(%17, %arg292, %58) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2120)
+    %59 = "ttir.matmul"(%17, %arg292, %58) : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2120)
     %60 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc2121)
-    %61 = "ttir.reshape"(%59, %60) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc2121)
+    %61 = "ttir.reshape"(%59, %60) <{shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc2121)
     %62 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2122)
-    %63 = "ttir.transpose"(%61, %62) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2122)
+    %63 = "ttir.transpose"(%61, %62) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2122)
     %64 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2123)
-    %65 = "ttir.multiply"(%63, %29, %64) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2123)
+    %65 = "ttir.multiply"(%63, %29, %64) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2123)
     %66 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc2124)
-    %67 = "ttir.transpose"(%63, %66) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2124)
+    %67 = "ttir.transpose"(%63, %66) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2124)
     %68 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc2125)
-    %69 = "ttir.matmul"(%arg6, %67, %68) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc2125)
+    %69 = "ttir.matmul"(%arg6, %67, %68) : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc2125)
     %70 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2126)
-    %71 = "ttir.transpose"(%69, %70) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2126)
+    %71 = "ttir.transpose"(%69, %70) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2126)
     %72 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2127)
-    %73 = "ttir.multiply"(%71, %arg7, %72) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2127)
+    %73 = "ttir.multiply"(%71, %arg7, %72) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x32x12x50xf32>, tensor<1xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2127)
     %74 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc2128)
-    %75 = "ttir.transpose"(%63, %74) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2128)
+    %75 = "ttir.transpose"(%63, %74) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2128)
     %76 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc2129)
-    %77 = "ttir.matmul"(%arg8, %75, %76) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc2129)
+    %77 = "ttir.matmul"(%arg8, %75, %76) : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc2129)
     %78 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2130)
-    %79 = "ttir.transpose"(%77, %78) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2130)
+    %79 = "ttir.transpose"(%77, %78) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2130)
     %80 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2131)
-    %81 = "ttir.concat"(%73, %79, %80) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1x32x12x50xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2131)
+    %81 = "ttir.concat"(%73, %79, %80) <{dim = -1 : si32}> : (tensor<1x32x12x50xf32>, tensor<1x32x12x50xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2131)
     %82 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2132)
-    %83 = "ttir.multiply"(%81, %51, %82) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2132)
+    %83 = "ttir.multiply"(%81, %51, %82) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2132)
     %84 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2133)
-    %85 = "ttir.add"(%65, %83, %84) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2133)
+    %85 = "ttir.add"(%65, %83, %84) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2133)
     %86 = tensor.empty() : tensor<32x12x100xf32> loc(#loc2134)
-    %87 = "ttir.squeeze"(%85, %86) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc2134)
+    %87 = "ttir.squeeze"(%85, %86) <{dim = 0 : si32}> : (tensor<1x32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc2134)
     %88 = tensor.empty() : tensor<32x100x12xf32> loc(#loc2135)
-    %89 = "ttir.transpose"(%87, %88) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<32x100x12xf32>) -> tensor<32x100x12xf32> loc(#loc2135)
+    %89 = "ttir.transpose"(%87, %88) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<32x12x100xf32>, tensor<32x100x12xf32>) -> tensor<32x100x12xf32> loc(#loc2135)
     %90 = tensor.empty() : tensor<32x12x12xf32> loc(#loc2136)
-    %91 = "ttir.matmul"(%57, %89, %90) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<32x100x12xf32>, tensor<32x12x12xf32>) -> tensor<32x12x12xf32> loc(#loc2136)
+    %91 = "ttir.matmul"(%57, %89, %90) : (tensor<32x12x100xf32>, tensor<32x100x12xf32>, tensor<32x12x12xf32>) -> tensor<32x12x12xf32> loc(#loc2136)
     %92 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc2137)
-    %93 = "ttir.unsqueeze"(%91, %92) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc2137)
+    %93 = "ttir.unsqueeze"(%91, %92) <{dim = 0 : si32}> : (tensor<32x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc2137)
     %94 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc2138)
-    %95 = "ttir.multiply"(%93, %arg9, %94) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc2138)
+    %95 = "ttir.multiply"(%93, %arg9, %94) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x32x12x12xf32>, tensor<1xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc2138)
     %96 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc2139)
-    %97 = "ttir.add"(%95, %arg10, %96) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1x1x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc2139)
+    %97 = "ttir.add"(%95, %arg10, %96) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x32x12x12xf32>, tensor<1x1x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc2139)
     %98 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc2140)
-    %99 = "ttir.softmax"(%97, %98) <{dimension = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc2140)
+    %99 = "ttir.softmax"(%97, %98) <{dimension = -1 : si32}> : (tensor<1x32x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc2140)
     %100 = tensor.empty() : tensor<32x12x12xf32> loc(#loc2141)
-    %101 = "ttir.squeeze"(%99, %100) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<32x12x12xf32>) -> tensor<32x12x12xf32> loc(#loc2141)
+    %101 = "ttir.squeeze"(%99, %100) <{dim = 0 : si32}> : (tensor<1x32x12x12xf32>, tensor<32x12x12xf32>) -> tensor<32x12x12xf32> loc(#loc2141)
     %102 = tensor.empty() : tensor<12x3200xf32> loc(#loc2142)
-    %103 = "ttir.matmul"(%17, %arg293, %102) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2142)
+    %103 = "ttir.matmul"(%17, %arg293, %102) : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2142)
     %104 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc2143)
-    %105 = "ttir.reshape"(%103, %104) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc2143)
+    %105 = "ttir.reshape"(%103, %104) <{shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc2143)
     %106 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2144)
-    %107 = "ttir.transpose"(%105, %106) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2144)
+    %107 = "ttir.transpose"(%105, %106) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2144)
     %108 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc2145)
-    %109 = "ttir.transpose"(%107, %108) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2145)
+    %109 = "ttir.transpose"(%107, %108) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2145)
     %110 = tensor.empty() : tensor<32x100x12xf32> loc(#loc2146)
-    %111 = "ttir.squeeze"(%109, %110) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x100x12xf32>, tensor<32x100x12xf32>) -> tensor<32x100x12xf32> loc(#loc2146)
+    %111 = "ttir.squeeze"(%109, %110) <{dim = 0 : si32}> : (tensor<1x32x100x12xf32>, tensor<32x100x12xf32>) -> tensor<32x100x12xf32> loc(#loc2146)
     %112 = tensor.empty() : tensor<32x12x100xf32> loc(#loc2147)
-    %113 = "ttir.transpose"(%111, %112) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x100x12xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc2147)
+    %113 = "ttir.transpose"(%111, %112) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<32x100x12xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc2147)
     %114 = tensor.empty() : tensor<32x12x100xf32> loc(#loc2148)
-    %115 = "ttir.matmul"(%101, %113, %114) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32x12x12xf32>, tensor<32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc2148)
+    %115 = "ttir.matmul"(%101, %113, %114) : (tensor<32x12x12xf32>, tensor<32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc2148)
     %116 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2149)
-    %117 = "ttir.unsqueeze"(%115, %116) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2149)
+    %117 = "ttir.unsqueeze"(%115, %116) <{dim = 0 : si32}> : (tensor<32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2149)
     %118 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc2150)
-    %119 = "ttir.transpose"(%117, %118) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc2150)
+    %119 = "ttir.transpose"(%117, %118) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x32x12x100xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc2150)
     %120 = tensor.empty() : tensor<12x3200xf32> loc(#loc2151)
-    %121 = "ttir.reshape"(%119, %120) <{operand_constraints = [#any_device, #any_device], shape = [12 : i32, 3200 : i32]}> : (tensor<1x12x32x100xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2151)
+    %121 = "ttir.reshape"(%119, %120) <{shape = [12 : i32, 3200 : i32]}> : (tensor<1x12x32x100xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2151)
     %122 = tensor.empty() : tensor<12x3200xf32> loc(#loc2152)
-    %123 = "ttir.matmul"(%121, %arg294, %122) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2152)
+    %123 = "ttir.matmul"(%121, %arg294, %122) : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2152)
     %124 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2153)
-    %125 = "ttir.unsqueeze"(%123, %124) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2153)
+    %125 = "ttir.unsqueeze"(%123, %124) <{dim = 0 : si32}> : (tensor<12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2153)
     %126 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2154)
-    %127 = "ttir.add"(%1, %125, %126) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2154)
+    %127 = "ttir.add"(%1, %125, %126) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2154)
     %128 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2155)
-    %129 = "ttir.multiply"(%127, %127, %128) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2155)
+    %129 = "ttir.multiply"(%127, %127, %128) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2155)
     %130 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2156)
-    %131 = "ttir.mean"(%129, %130) <{dim_arg = [-1 : i32], keep_dim = true, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2156)
+    %131 = "ttir.mean"(%129, %130) <{dim_arg = [-1 : i32], keep_dim = true}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2156)
     %132 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2157)
-    %133 = "ttir.add"(%131, %arg11, %132) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2157)
+    %133 = "ttir.add"(%131, %arg11, %132) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x12x1xf32>, tensor<1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2157)
     %134 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2158)
-    %135 = "ttir.sqrt"(%133, %134) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2158)
+    %135 = "ttir.sqrt"(%133, %134) <{operandSegmentSizes = array<i32: 1, 1>}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2158)
     %136 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2159)
-    %137 = "ttir.reciprocal"(%135, %136) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2159)
+    %137 = "ttir.reciprocal"(%135, %136) <{operandSegmentSizes = array<i32: 1, 1>}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2159)
     %138 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2160)
-    %139 = "ttir.multiply"(%127, %137, %138) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2160)
+    %139 = "ttir.multiply"(%127, %137, %138) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2160)
     %140 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2161)
-    %141 = "ttir.multiply"(%arg295, %139, %140) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2161)
+    %141 = "ttir.multiply"(%arg295, %139, %140) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2161)
     %142 = tensor.empty() : tensor<12x3200xf32> loc(#loc2162)
-    %143 = "ttir.squeeze"(%141, %142) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2162)
+    %143 = "ttir.squeeze"(%141, %142) <{dim = 0 : si32}> : (tensor<1x12x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2162)
     %144 = tensor.empty() : tensor<12x8640xf32> loc(#loc2163)
-    %145 = "ttir.matmul"(%143, %arg296, %144) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x8640xf32>, tensor<12x8640xf32>) -> tensor<12x8640xf32> loc(#loc2163)
+    %145 = "ttir.matmul"(%143, %arg296, %144) : (tensor<12x3200xf32>, tensor<3200x8640xf32>, tensor<12x8640xf32>) -> tensor<12x8640xf32> loc(#loc2163)
     %146 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc2164)
-    %147 = "ttir.unsqueeze"(%145, %146) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc2164)
+    %147 = "ttir.unsqueeze"(%145, %146) <{dim = 0 : si32}> : (tensor<12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc2164)
     %148 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc2165)
-    %149 = "ttir.sigmoid"(%147, %148) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc2165)
+    %149 = "ttir.sigmoid"(%147, %148) <{operandSegmentSizes = array<i32: 1, 1>}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc2165)
     %150 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc2166)
-    %151 = "ttir.multiply"(%147, %149, %150) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc2166)
+    %151 = "ttir.multiply"(%147, %149, %150) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc2166)
     %152 = tensor.empty() : tensor<12x8640xf32> loc(#loc2167)
-    %153 = "ttir.matmul"(%143, %arg297, %152) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x8640xf32>, tensor<12x8640xf32>) -> tensor<12x8640xf32> loc(#loc2167)
+    %153 = "ttir.matmul"(%143, %arg297, %152) : (tensor<12x3200xf32>, tensor<3200x8640xf32>, tensor<12x8640xf32>) -> tensor<12x8640xf32> loc(#loc2167)
     %154 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc2168)
-    %155 = "ttir.unsqueeze"(%153, %154) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc2168)
+    %155 = "ttir.unsqueeze"(%153, %154) <{dim = 0 : si32}> : (tensor<12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc2168)
     %156 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc2169)
-    %157 = "ttir.multiply"(%151, %155, %156) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc2169)
+    %157 = "ttir.multiply"(%151, %155, %156) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc2169)
     %158 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2170)
-    %159 = "ttir.matmul"(%157, %arg298, %158) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<8640x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2170)
+    %159 = "ttir.matmul"(%157, %arg298, %158) : (tensor<1x12x8640xf32>, tensor<8640x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2170)
     %160 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2171)
-    %161 = "ttir.add"(%127, %159, %160) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2171)
+    %161 = "ttir.add"(%127, %159, %160) <{operandSegmentSizes = array<i32: 2, 1>}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2171)
     return %161 : tensor<1x12x3200xf32> loc(#loc2090)
   } loc(#loc)
 } loc(#loc)