diff --git a/tools/explorer/test/models/forward_and_backward.mlir b/tools/explorer/test/models/forward_and_backward.mlir index 3f0b8f781d..e205bcf2b8 100644 --- a/tools/explorer/test/models/forward_and_backward.mlir +++ b/tools/explorer/test/models/forward_and_backward.mlir @@ -1,30 +1,30 @@ module @SimpleModel attributes {} { func.func @forward(%arg0: tensor<1x784xf32> {ttir.name = "input_1"}, %arg1: tensor<10x784xf32> {ttir.name = "linear.weight"}, %arg2: tensor<10xf32> {ttir.name = "linear.bias"}) -> (tensor<1x10xf32> {ttir.name = "SimpleModel_472.output_softmax_1495"}) { %0 = tensor.empty() : tensor<784x10xf32> - %1 = "ttir.transpose"(%arg1, %0) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#tt.operand_constraint, #tt.operand_constraint]}> : (tensor<10x784xf32>, tensor<784x10xf32>) -> tensor<784x10xf32> + %1 = "ttir.transpose"(%arg1, %0) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<10x784xf32>, tensor<784x10xf32>) -> tensor<784x10xf32> %2 = tensor.empty() : tensor<1x10xf32> - %3 = "ttir.matmul"(%arg0, %1, %2) <{operand_constraints = [#tt.operand_constraint, #tt.operand_constraint, #tt.operand_constraint]}> : (tensor<1x784xf32>, tensor<784x10xf32>, tensor<1x10xf32>) -> tensor<1x10xf32> + %3 = "ttir.matmul"(%arg0, %1, %2) : (tensor<1x784xf32>, tensor<784x10xf32>, tensor<1x10xf32>) -> tensor<1x10xf32> %4 = tensor.empty() : tensor<1x10xf32> - %5 = "ttir.add"(%3, %arg2, %4) <{operandSegmentSizes = array, operand_constraints = [#tt.operand_constraint, #tt.operand_constraint, #tt.operand_constraint]}> : (tensor<1x10xf32>, tensor<10xf32>, tensor<1x10xf32>) -> tensor<1x10xf32> + %5 = "ttir.add"(%3, %arg2, %4) <{operandSegmentSizes = array}> : (tensor<1x10xf32>, tensor<10xf32>, tensor<1x10xf32>) -> tensor<1x10xf32> %6 = tensor.empty() : tensor<1x10xf32> - %7 = "ttir.softmax"(%5, %6) <{dimension = -1 : si32, operand_constraints = [#tt.operand_constraint, #tt.operand_constraint]}> : (tensor<1x10xf32>, tensor<1x10xf32>) -> tensor<1x10xf32> + %7 = "ttir.softmax"(%5, %6) <{dimension = -1 : si32}> : (tensor<1x10xf32>, tensor<1x10xf32>) -> tensor<1x10xf32> return %7 : tensor<1x10xf32> } func.func @backward(%arg0: tensor<1x10xf32> {ttir.name = "loss_SimpleModel_472.output_softmax_1495"}, %arg1: tensor<1x10xf32> {ttir.name = "SimpleModel_472.output_softmax_1495"}, %arg2: tensor<1x784xf32> {ttir.name = "input_1"}) -> (tensor<1x10xf32> {ttir.name = "grad_acc_linear.bias_grad_accumulator"}, tensor<10x784xf32> {ttir.name = "grad_acc_linear.weight_grad_accumulator"}) { %0 = tensor.empty() : tensor<1x10xf32> - %1 = "ttir.multiply"(%arg0, %arg1, %0) <{operandSegmentSizes = array, operand_constraints = [#tt.operand_constraint, #tt.operand_constraint, #tt.operand_constraint]}> : (tensor<1x10xf32>, tensor<1x10xf32>, tensor<1x10xf32>) -> tensor<1x10xf32> + %1 = "ttir.multiply"(%arg0, %arg1, %0) <{operandSegmentSizes = array}> : (tensor<1x10xf32>, tensor<1x10xf32>, tensor<1x10xf32>) -> tensor<1x10xf32> %2 = tensor.empty() : tensor<1x1xf32> - %3 = "ttir.sum"(%1, %2) <{keep_dim = true, operand_constraints = [#tt.operand_constraint, #tt.operand_constraint]}> : (tensor<1x10xf32>, tensor<1x1xf32>) -> tensor<1x1xf32> + %3 = "ttir.sum"(%1, %2) <{keep_dim = true}> : (tensor<1x10xf32>, tensor<1x1xf32>) -> tensor<1x1xf32> %4 = tensor.empty() : tensor<1x10xf32> - %5 = "ttir.subtract"(%arg0, %3, %4) <{operandSegmentSizes = array, operand_constraints = [#tt.operand_constraint, #tt.operand_constraint, #tt.operand_constraint]}> : (tensor<1x10xf32>, tensor<1x1xf32>, tensor<1x10xf32>) -> tensor<1x10xf32> + %5 = "ttir.subtract"(%arg0, %3, %4) <{operandSegmentSizes = array}> : (tensor<1x10xf32>, tensor<1x1xf32>, tensor<1x10xf32>) -> tensor<1x10xf32> %6 = tensor.empty() : tensor<1x10xf32> - %7 = "ttir.multiply"(%5, %arg1, %6) <{operandSegmentSizes = array, operand_constraints = [#tt.operand_constraint, #tt.operand_constraint, #tt.operand_constraint, #tt.operand_constraint]}> : (tensor<1x10xf32>, tensor<1x10xf32>, tensor<1x10xf32>) -> tensor<1x10xf32> + %7 = "ttir.multiply"(%5, %arg1, %6) <{operandSegmentSizes = array}> : (tensor<1x10xf32>, tensor<1x10xf32>, tensor<1x10xf32>) -> tensor<1x10xf32> %8 = tensor.empty() : tensor<784x1xf32> - %9 = "ttir.transpose"(%arg2, %8) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#tt.operand_constraint, #tt.operand_constraint]}> : (tensor<1x784xf32>, tensor<784x1xf32>) -> tensor<784x1xf32> + %9 = "ttir.transpose"(%arg2, %8) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x784xf32>, tensor<784x1xf32>) -> tensor<784x1xf32> %10 = tensor.empty() : tensor<784x10xf32> - %11 = "ttir.matmul"(%9, %7, %10) <{operand_constraints = [#tt.operand_constraint, #tt.operand_constraint, #tt.operand_constraint]}> : (tensor<784x1xf32>, tensor<1x10xf32>, tensor<784x10xf32>) -> tensor<784x10xf32> + %11 = "ttir.matmul"(%9, %7, %10) : (tensor<784x1xf32>, tensor<1x10xf32>, tensor<784x10xf32>) -> tensor<784x10xf32> %12 = tensor.empty() : tensor<10x784xf32> - %13 = "ttir.transpose"(%11, %12) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#tt.operand_constraint, #tt.operand_constraint]}> : (tensor<784x10xf32>, tensor<10x784xf32>) -> tensor<10x784xf32> + %13 = "ttir.transpose"(%11, %12) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<784x10xf32>, tensor<10x784xf32>) -> tensor<10x784xf32> return %7, %13 : tensor<1x10xf32>, tensor<10x784xf32> } } diff --git a/tools/explorer/test/models/linear_autoencoder.mlir b/tools/explorer/test/models/linear_autoencoder.mlir index 8d7defc535..d8af25bbff 100644 --- a/tools/explorer/test/models/linear_autoencoder.mlir +++ b/tools/explorer/test/models/linear_autoencoder.mlir @@ -1,49 +1,49 @@ module @LinearAE attributes {} { func.func @forward(%arg0: tensor<1x784xf32> {ttir.name = "input_1"}, %arg1: tensor<784x128xf32> {ttir.name = "encoder_lin1.weight"}, %arg2: tensor<128xf32> {ttir.name = "encoder_lin1.bias"}, %arg3: tensor<128x64xf32> {ttir.name = "encoder_lin2.weight"}, %arg4: tensor<64xf32> {ttir.name = "encoder_lin2.bias"}, %arg5: tensor<64x12xf32> {ttir.name = "encoder_lin3.weight"}, %arg6: tensor<12xf32> {ttir.name = "encoder_lin3.bias"}, %arg7: tensor<12x3xf32> {ttir.name = "encoder_lin4.weight"}, %arg8: tensor<3xf32> {ttir.name = "encoder_lin4.bias"}, %arg9: tensor<3x12xf32> {ttir.name = "decoder_lin1.weight"}, %arg10: tensor<12xf32> {ttir.name = "decoder_lin1.bias"}, %arg11: tensor<12x64xf32> {ttir.name = "decoder_lin2.weight"}, %arg12: tensor<64xf32> {ttir.name = "decoder_lin2.bias"}, %arg13: tensor<64x128xf32> {ttir.name = "decoder_lin3.weight"}, %arg14: tensor<128xf32> {ttir.name = "decoder_lin3.bias"}, %arg15: tensor<128x784xf32> {ttir.name = "decoder_lin4.weight"}, %arg16: tensor<784xf32> {ttir.name = "decoder_lin4.bias"}) -> (tensor<1x784xf32> {ttir.name = "LinearAE.output_add_29"}) { %0 = tensor.empty() : tensor<1x128xf32> - %1 = "ttir.matmul"(%arg0, %arg1, %0) <{operand_constraints = [#tt.operand_constraint, #tt.operand_constraint, #tt.operand_constraint]}> : (tensor<1x784xf32>, tensor<784x128xf32>, tensor<1x128xf32>) -> tensor<1x128xf32> + %1 = "ttir.matmul"(%arg0, %arg1, %0) : (tensor<1x784xf32>, tensor<784x128xf32>, tensor<1x128xf32>) -> tensor<1x128xf32> %2 = tensor.empty() : tensor<1x128xf32> - %3 = "ttir.add"(%1, %arg2, %2) <{operandSegmentSizes = array, operand_constraints = [#tt.operand_constraint, #tt.operand_constraint, #tt.operand_constraint]}> : (tensor<1x128xf32>, tensor<128xf32>, tensor<1x128xf32>) -> tensor<1x128xf32> + %3 = "ttir.add"(%1, %arg2, %2) <{operandSegmentSizes = array}> : (tensor<1x128xf32>, tensor<128xf32>, tensor<1x128xf32>) -> tensor<1x128xf32> %4 = tensor.empty() : tensor<1x128xf32> - %5 = "ttir.relu"(%3, %4) <{operandSegmentSizes = array, operand_constraints = [#tt.operand_constraint, #tt.operand_constraint]}> : (tensor<1x128xf32>, tensor<1x128xf32>) -> tensor<1x128xf32> + %5 = "ttir.relu"(%3, %4) <{operandSegmentSizes = array}> : (tensor<1x128xf32>, tensor<1x128xf32>) -> tensor<1x128xf32> %6 = tensor.empty() : tensor<1x64xf32> - %7 = "ttir.matmul"(%5, %arg3, %6) <{operand_constraints = [#tt.operand_constraint, #tt.operand_constraint, #tt.operand_constraint]}> : (tensor<1x128xf32>, tensor<128x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32> + %7 = "ttir.matmul"(%5, %arg3, %6) : (tensor<1x128xf32>, tensor<128x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32> %8 = tensor.empty() : tensor<1x64xf32> - %9 = "ttir.add"(%7, %arg4, %8) <{operandSegmentSizes = array, operand_constraints = [#tt.operand_constraint, #tt.operand_constraint, #tt.operand_constraint]}> : (tensor<1x64xf32>, tensor<64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32> + %9 = "ttir.add"(%7, %arg4, %8) <{operandSegmentSizes = array}> : (tensor<1x64xf32>, tensor<64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32> %10 = tensor.empty() : tensor<1x64xf32> - %11 = "ttir.relu"(%9, %10) <{operandSegmentSizes = array, operand_constraints = [#tt.operand_constraint, #tt.operand_constraint]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32> + %11 = "ttir.relu"(%9, %10) <{operandSegmentSizes = array}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32> %12 = tensor.empty() : tensor<1x12xf32> - %13 = "ttir.matmul"(%11, %arg5, %12) <{operand_constraints = [#tt.operand_constraint, #tt.operand_constraint, #tt.operand_constraint]}> : (tensor<1x64xf32>, tensor<64x12xf32>, tensor<1x12xf32>) -> tensor<1x12xf32> + %13 = "ttir.matmul"(%11, %arg5, %12) : (tensor<1x64xf32>, tensor<64x12xf32>, tensor<1x12xf32>) -> tensor<1x12xf32> %14 = tensor.empty() : tensor<1x12xf32> - %15 = "ttir.add"(%13, %arg6, %14) <{operandSegmentSizes = array, operand_constraints = [#tt.operand_constraint, #tt.operand_constraint, #tt.operand_constraint]}> : (tensor<1x12xf32>, tensor<12xf32>, tensor<1x12xf32>) -> tensor<1x12xf32> + %15 = "ttir.add"(%13, %arg6, %14) <{operandSegmentSizes = array}> : (tensor<1x12xf32>, tensor<12xf32>, tensor<1x12xf32>) -> tensor<1x12xf32> %16 = tensor.empty() : tensor<1x12xf32> - %17 = "ttir.relu"(%15, %16) <{operandSegmentSizes = array, operand_constraints = [#tt.operand_constraint, #tt.operand_constraint]}> : (tensor<1x12xf32>, tensor<1x12xf32>) -> tensor<1x12xf32> + %17 = "ttir.relu"(%15, %16) <{operandSegmentSizes = array}> : (tensor<1x12xf32>, tensor<1x12xf32>) -> tensor<1x12xf32> %18 = tensor.empty() : tensor<1x3xf32> - %19 = "ttir.matmul"(%17, %arg7, %18) <{operand_constraints = [#tt.operand_constraint, #tt.operand_constraint, #tt.operand_constraint]}> : (tensor<1x12xf32>, tensor<12x3xf32>, tensor<1x3xf32>) -> tensor<1x3xf32> + %19 = "ttir.matmul"(%17, %arg7, %18) : (tensor<1x12xf32>, tensor<12x3xf32>, tensor<1x3xf32>) -> tensor<1x3xf32> %20 = tensor.empty() : tensor<1x3xf32> - %21 = "ttir.add"(%19, %arg8, %20) <{operandSegmentSizes = array, operand_constraints = [#tt.operand_constraint, #tt.operand_constraint, #tt.operand_constraint]}> : (tensor<1x3xf32>, tensor<3xf32>, tensor<1x3xf32>) -> tensor<1x3xf32> + %21 = "ttir.add"(%19, %arg8, %20) <{operandSegmentSizes = array}> : (tensor<1x3xf32>, tensor<3xf32>, tensor<1x3xf32>) -> tensor<1x3xf32> %22 = tensor.empty() : tensor<1x12xf32> - %23 = "ttir.matmul"(%21, %arg9, %22) <{operand_constraints = [#tt.operand_constraint, #tt.operand_constraint, #tt.operand_constraint]}> : (tensor<1x3xf32>, tensor<3x12xf32>, tensor<1x12xf32>) -> tensor<1x12xf32> + %23 = "ttir.matmul"(%21, %arg9, %22) : (tensor<1x3xf32>, tensor<3x12xf32>, tensor<1x12xf32>) -> tensor<1x12xf32> %24 = tensor.empty() : tensor<1x12xf32> - %25 = "ttir.add"(%23, %arg10, %24) <{operandSegmentSizes = array, operand_constraints = [#tt.operand_constraint, #tt.operand_constraint, #tt.operand_constraint]}> : (tensor<1x12xf32>, tensor<12xf32>, tensor<1x12xf32>) -> tensor<1x12xf32> + %25 = "ttir.add"(%23, %arg10, %24) <{operandSegmentSizes = array}> : (tensor<1x12xf32>, tensor<12xf32>, tensor<1x12xf32>) -> tensor<1x12xf32> %26 = tensor.empty() : tensor<1x12xf32> - %27 = "ttir.relu"(%25, %26) <{operandSegmentSizes = array, operand_constraints = [#tt.operand_constraint, #tt.operand_constraint]}> : (tensor<1x12xf32>, tensor<1x12xf32>) -> tensor<1x12xf32> + %27 = "ttir.relu"(%25, %26) <{operandSegmentSizes = array}> : (tensor<1x12xf32>, tensor<1x12xf32>) -> tensor<1x12xf32> %28 = tensor.empty() : tensor<1x64xf32> - %29 = "ttir.matmul"(%27, %arg11, %28) <{operand_constraints = [#tt.operand_constraint, #tt.operand_constraint, #tt.operand_constraint]}> : (tensor<1x12xf32>, tensor<12x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32> + %29 = "ttir.matmul"(%27, %arg11, %28) : (tensor<1x12xf32>, tensor<12x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32> %30 = tensor.empty() : tensor<1x64xf32> - %31 = "ttir.add"(%29, %arg12, %30) <{operandSegmentSizes = array, operand_constraints = [#tt.operand_constraint, #tt.operand_constraint, #tt.operand_constraint]}> : (tensor<1x64xf32>, tensor<64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32> + %31 = "ttir.add"(%29, %arg12, %30) <{operandSegmentSizes = array}> : (tensor<1x64xf32>, tensor<64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32> %32 = tensor.empty() : tensor<1x64xf32> - %33 = "ttir.relu"(%31, %32) <{operandSegmentSizes = array, operand_constraints = [#tt.operand_constraint, #tt.operand_constraint]}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32> + %33 = "ttir.relu"(%31, %32) <{operandSegmentSizes = array}> : (tensor<1x64xf32>, tensor<1x64xf32>) -> tensor<1x64xf32> %34 = tensor.empty() : tensor<1x128xf32> - %35 = "ttir.matmul"(%33, %arg13, %34) <{operand_constraints = [#tt.operand_constraint, #tt.operand_constraint, #tt.operand_constraint]}> : (tensor<1x64xf32>, tensor<64x128xf32>, tensor<1x128xf32>) -> tensor<1x128xf32> + %35 = "ttir.matmul"(%33, %arg13, %34) : (tensor<1x64xf32>, tensor<64x128xf32>, tensor<1x128xf32>) -> tensor<1x128xf32> %36 = tensor.empty() : tensor<1x128xf32> - %37 = "ttir.add"(%35, %arg14, %36) <{operandSegmentSizes = array, operand_constraints = [#tt.operand_constraint, #tt.operand_constraint, #tt.operand_constraint]}> : (tensor<1x128xf32>, tensor<128xf32>, tensor<1x128xf32>) -> tensor<1x128xf32> + %37 = "ttir.add"(%35, %arg14, %36) <{operandSegmentSizes = array}> : (tensor<1x128xf32>, tensor<128xf32>, tensor<1x128xf32>) -> tensor<1x128xf32> %38 = tensor.empty() : tensor<1x128xf32> - %39 = "ttir.relu"(%37, %38) <{operandSegmentSizes = array, operand_constraints = [#tt.operand_constraint, #tt.operand_constraint]}> : (tensor<1x128xf32>, tensor<1x128xf32>) -> tensor<1x128xf32> + %39 = "ttir.relu"(%37, %38) <{operandSegmentSizes = array}> : (tensor<1x128xf32>, tensor<1x128xf32>) -> tensor<1x128xf32> %40 = tensor.empty() : tensor<1x784xf32> - %41 = "ttir.matmul"(%39, %arg15, %40) <{operand_constraints = [#tt.operand_constraint, #tt.operand_constraint, #tt.operand_constraint]}> : (tensor<1x128xf32>, tensor<128x784xf32>, tensor<1x784xf32>) -> tensor<1x784xf32> + %41 = "ttir.matmul"(%39, %arg15, %40) : (tensor<1x128xf32>, tensor<128x784xf32>, tensor<1x784xf32>) -> tensor<1x784xf32> %42 = tensor.empty() : tensor<1x784xf32> - %43 = "ttir.add"(%41, %arg16, %42) <{operandSegmentSizes = array, operand_constraints = [#tt.operand_constraint, #tt.operand_constraint, #tt.operand_constraint]}> : (tensor<1x784xf32>, tensor<784xf32>, tensor<1x784xf32>) -> tensor<1x784xf32> + %43 = "ttir.add"(%41, %arg16, %42) <{operandSegmentSizes = array}> : (tensor<1x784xf32>, tensor<784xf32>, tensor<1x784xf32>) -> tensor<1x784xf32> return %43 : tensor<1x784xf32> } } diff --git a/tools/explorer/test/models/open_llama_3b_single_layer.mlir b/tools/explorer/test/models/open_llama_3b_single_layer.mlir index 5e17dc39e9..97731870ba 100644 --- a/tools/explorer/test/models/open_llama_3b_single_layer.mlir +++ b/tools/explorer/test/models/open_llama_3b_single_layer.mlir @@ -1,170 +1,169 @@ -#any_device = #tt.operand_constraint #loc = loc("LlamaForCausalLM":0:0) #system_desc = #tt.system_desc<[{role = host, target_triple = "x86_64-pc-linux-gnu"}], [{arch = , grid = 8x8, l1_size = 1499136, num_dram_channels = 12, dram_channel_size = 1073741824, noc_l1_address_align_bytes = 16, pcie_address_align_bytes = 32, noc_dram_address_align_bytes = 32, l1_unreserved_base = 1024, erisc_l1_unreserved_base = 1024, dram_unreserved_base = 1024, dram_unreserved_end = 1073741824, physical_cores = {worker = [ 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 1x0, 1x1, 1x2, 1x3, 1x4, 1x5, 1x6, 1x7, 2x0, 2x1, 2x2, 2x3, 2x4, 2x5, 2x6, 2x7, 3x0, 3x1, 3x2, 3x3, 3x4, 3x5, 3x6, 3x7, 4x0, 4x1, 4x2, 4x3, 4x4, 4x5, 4x6, 4x7, 5x0, 5x1, 5x2, 5x3, 5x4, 5x5, 5x6, 5x7, 6x0, 6x1, 6x2, 6x3, 6x4, 6x5, 6x6, 6x7, 7x0, 7x1, 7x2, 7x3, 7x4, 7x5, 7x6, 7x7] dram = [ 8x0, 9x0, 10x0, 8x1, 9x1, 10x1, 8x2, 9x2, 10x2, 8x3, 9x3, 10x3]}, supported_data_types = [, , , , , , , , , , , ], supported_tile_sizes = [ 4x16, 16x16, 32x16, 4x32, 16x32, 32x32], num_cbs = 32}], [0], [3 : i32], [ 0x0x0x0]> module @LlamaForCausalLM attributes {tt.system_desc = #system_desc} { func.func @forward(%arg0: tensor<1x12xi32> {ttir.name = "input_1"} loc("LlamaForCausalLM":0:0), %arg1: tensor<1xf32> {ttir.name = "input_1_add_4"} loc("LlamaForCausalLM":0:0), %arg2: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_14"} loc("LlamaForCausalLM":0:0), %arg3: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_24.1"} loc("LlamaForCausalLM":0:0), %arg4: tensor<1xf32> {ttir.name = "input_1_multiply_25"} loc("LlamaForCausalLM":0:0), %arg5: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_26.1"} loc("LlamaForCausalLM":0:0), %arg6: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_38.1"} loc("LlamaForCausalLM":0:0), %arg7: tensor<1xf32> {ttir.name = "input_1_multiply_39"} loc("LlamaForCausalLM":0:0), %arg8: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_40.1"} loc("LlamaForCausalLM":0:0), %arg9: tensor<1xf32> {ttir.name = "input_1_multiply_48"} loc("LlamaForCausalLM":0:0), %arg10: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_49"} loc("LlamaForCausalLM":0:0), %arg11: tensor<1xf32> {ttir.name = "input_1_add_70"} loc("LlamaForCausalLM":0:0), %arg12: tensor<1xf32> {ttir.name = "input_1_add_90"} loc("LlamaForCausalLM":0:0), %arg13: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_100"} loc("LlamaForCausalLM":0:0), %arg14: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_110.1"} loc("LlamaForCausalLM":0:0), %arg15: tensor<1xf32> {ttir.name = "input_1_multiply_111"} loc("LlamaForCausalLM":0:0), %arg16: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_112.1"} loc("LlamaForCausalLM":0:0), %arg17: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_124.1"} loc("LlamaForCausalLM":0:0), %arg18: tensor<1xf32> {ttir.name = "input_1_multiply_125"} loc("LlamaForCausalLM":0:0), %arg19: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_126.1"} loc("LlamaForCausalLM":0:0), %arg20: tensor<1xf32> {ttir.name = "input_1_multiply_134"} loc("LlamaForCausalLM":0:0), %arg21: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_135"} loc("LlamaForCausalLM":0:0), %arg22: tensor<1xf32> {ttir.name = "input_1_add_156"} loc("LlamaForCausalLM":0:0), %arg23: tensor<1xf32> {ttir.name = "input_1_add_176"} loc("LlamaForCausalLM":0:0), %arg24: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_186"} loc("LlamaForCausalLM":0:0), %arg25: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_196.1"} loc("LlamaForCausalLM":0:0), %arg26: tensor<1xf32> {ttir.name = "input_1_multiply_197"} loc("LlamaForCausalLM":0:0), %arg27: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_198.1"} loc("LlamaForCausalLM":0:0), %arg28: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_210.1"} loc("LlamaForCausalLM":0:0), %arg29: tensor<1xf32> {ttir.name = "input_1_multiply_211"} loc("LlamaForCausalLM":0:0), %arg30: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_212.1"} loc("LlamaForCausalLM":0:0), %arg31: tensor<1xf32> {ttir.name = "input_1_multiply_220"} loc("LlamaForCausalLM":0:0), %arg32: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_221"} loc("LlamaForCausalLM":0:0), %arg33: tensor<1xf32> {ttir.name = "input_1_add_242"} loc("LlamaForCausalLM":0:0), %arg34: tensor<1xf32> {ttir.name = "input_1_add_262"} loc("LlamaForCausalLM":0:0), %arg35: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_272"} loc("LlamaForCausalLM":0:0), %arg36: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_282.1"} loc("LlamaForCausalLM":0:0), %arg37: tensor<1xf32> {ttir.name = "input_1_multiply_283"} loc("LlamaForCausalLM":0:0), %arg38: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_284.1"} loc("LlamaForCausalLM":0:0), %arg39: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_296.1"} loc("LlamaForCausalLM":0:0), %arg40: tensor<1xf32> {ttir.name = "input_1_multiply_297"} loc("LlamaForCausalLM":0:0), %arg41: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_298.1"} loc("LlamaForCausalLM":0:0), %arg42: tensor<1xf32> {ttir.name = "input_1_multiply_306"} loc("LlamaForCausalLM":0:0), %arg43: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_307"} loc("LlamaForCausalLM":0:0), %arg44: tensor<1xf32> {ttir.name = "input_1_add_328"} loc("LlamaForCausalLM":0:0), %arg45: tensor<1xf32> {ttir.name = "input_1_add_348"} loc("LlamaForCausalLM":0:0), %arg46: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_358"} loc("LlamaForCausalLM":0:0), %arg47: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_368.1"} loc("LlamaForCausalLM":0:0), %arg48: tensor<1xf32> {ttir.name = "input_1_multiply_369"} loc("LlamaForCausalLM":0:0), %arg49: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_370.1"} loc("LlamaForCausalLM":0:0), %arg50: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_382.1"} loc("LlamaForCausalLM":0:0), %arg51: tensor<1xf32> {ttir.name = "input_1_multiply_383"} loc("LlamaForCausalLM":0:0), %arg52: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_384.1"} loc("LlamaForCausalLM":0:0), %arg53: tensor<1xf32> {ttir.name = "input_1_multiply_392"} loc("LlamaForCausalLM":0:0), %arg54: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_393"} loc("LlamaForCausalLM":0:0), %arg55: tensor<1xf32> {ttir.name = "input_1_add_414"} loc("LlamaForCausalLM":0:0), %arg56: tensor<1xf32> {ttir.name = "input_1_add_434"} loc("LlamaForCausalLM":0:0), %arg57: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_444"} loc("LlamaForCausalLM":0:0), %arg58: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_454.1"} loc("LlamaForCausalLM":0:0), %arg59: tensor<1xf32> {ttir.name = "input_1_multiply_455"} loc("LlamaForCausalLM":0:0), %arg60: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_456.1"} loc("LlamaForCausalLM":0:0), %arg61: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_468.1"} loc("LlamaForCausalLM":0:0), %arg62: tensor<1xf32> {ttir.name = "input_1_multiply_469"} loc("LlamaForCausalLM":0:0), %arg63: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_470.1"} loc("LlamaForCausalLM":0:0), %arg64: tensor<1xf32> {ttir.name = "input_1_multiply_478"} loc("LlamaForCausalLM":0:0), %arg65: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_479"} loc("LlamaForCausalLM":0:0), %arg66: tensor<1xf32> {ttir.name = "input_1_add_500"} loc("LlamaForCausalLM":0:0), %arg67: tensor<1xf32> {ttir.name = "input_1_add_520"} loc("LlamaForCausalLM":0:0), %arg68: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_530"} loc("LlamaForCausalLM":0:0), %arg69: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_540.1"} loc("LlamaForCausalLM":0:0), %arg70: tensor<1xf32> {ttir.name = "input_1_multiply_541"} loc("LlamaForCausalLM":0:0), %arg71: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_542.1"} loc("LlamaForCausalLM":0:0), %arg72: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_554.1"} loc("LlamaForCausalLM":0:0), %arg73: tensor<1xf32> {ttir.name = "input_1_multiply_555"} loc("LlamaForCausalLM":0:0), %arg74: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_556.1"} loc("LlamaForCausalLM":0:0), %arg75: tensor<1xf32> {ttir.name = "input_1_multiply_564"} loc("LlamaForCausalLM":0:0), %arg76: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_565"} loc("LlamaForCausalLM":0:0), %arg77: tensor<1xf32> {ttir.name = "input_1_add_586"} loc("LlamaForCausalLM":0:0), %arg78: tensor<1xf32> {ttir.name = "input_1_add_606"} loc("LlamaForCausalLM":0:0), %arg79: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_616"} loc("LlamaForCausalLM":0:0), %arg80: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_626.1"} loc("LlamaForCausalLM":0:0), %arg81: tensor<1xf32> {ttir.name = "input_1_multiply_627"} loc("LlamaForCausalLM":0:0), %arg82: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_628.1"} loc("LlamaForCausalLM":0:0), %arg83: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_640.1"} loc("LlamaForCausalLM":0:0), %arg84: tensor<1xf32> {ttir.name = "input_1_multiply_641"} loc("LlamaForCausalLM":0:0), %arg85: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_642.1"} loc("LlamaForCausalLM":0:0), %arg86: tensor<1xf32> {ttir.name = "input_1_multiply_650"} loc("LlamaForCausalLM":0:0), %arg87: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_651"} loc("LlamaForCausalLM":0:0), %arg88: tensor<1xf32> {ttir.name = "input_1_add_672"} loc("LlamaForCausalLM":0:0), %arg89: tensor<1xf32> {ttir.name = "input_1_add_692"} loc("LlamaForCausalLM":0:0), %arg90: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_702"} loc("LlamaForCausalLM":0:0), %arg91: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_712.1"} loc("LlamaForCausalLM":0:0), %arg92: tensor<1xf32> {ttir.name = "input_1_multiply_713"} loc("LlamaForCausalLM":0:0), %arg93: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_714.1"} loc("LlamaForCausalLM":0:0), %arg94: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_726.1"} loc("LlamaForCausalLM":0:0), %arg95: tensor<1xf32> {ttir.name = "input_1_multiply_727"} loc("LlamaForCausalLM":0:0), %arg96: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_728.1"} loc("LlamaForCausalLM":0:0), %arg97: tensor<1xf32> {ttir.name = "input_1_multiply_736"} loc("LlamaForCausalLM":0:0), %arg98: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_737"} loc("LlamaForCausalLM":0:0), %arg99: tensor<1xf32> {ttir.name = "input_1_add_758"} loc("LlamaForCausalLM":0:0), %arg100: tensor<1xf32> {ttir.name = "input_1_add_778"} loc("LlamaForCausalLM":0:0), %arg101: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_788"} loc("LlamaForCausalLM":0:0), %arg102: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_798.1"} loc("LlamaForCausalLM":0:0), %arg103: tensor<1xf32> {ttir.name = "input_1_multiply_799"} loc("LlamaForCausalLM":0:0), %arg104: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_800.1"} loc("LlamaForCausalLM":0:0), %arg105: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_812.1"} loc("LlamaForCausalLM":0:0), %arg106: tensor<1xf32> {ttir.name = "input_1_multiply_813"} loc("LlamaForCausalLM":0:0), %arg107: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_814.1"} loc("LlamaForCausalLM":0:0), %arg108: tensor<1xf32> {ttir.name = "input_1_multiply_822"} loc("LlamaForCausalLM":0:0), %arg109: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_823"} loc("LlamaForCausalLM":0:0), %arg110: tensor<1xf32> {ttir.name = "input_1_add_844"} loc("LlamaForCausalLM":0:0), %arg111: tensor<1xf32> {ttir.name = "input_1_add_864"} loc("LlamaForCausalLM":0:0), %arg112: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_874"} loc("LlamaForCausalLM":0:0), %arg113: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_884.1"} loc("LlamaForCausalLM":0:0), %arg114: tensor<1xf32> {ttir.name = "input_1_multiply_885"} loc("LlamaForCausalLM":0:0), %arg115: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_886.1"} loc("LlamaForCausalLM":0:0), %arg116: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_898.1"} loc("LlamaForCausalLM":0:0), %arg117: tensor<1xf32> {ttir.name = "input_1_multiply_899"} loc("LlamaForCausalLM":0:0), %arg118: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_900.1"} loc("LlamaForCausalLM":0:0), %arg119: tensor<1xf32> {ttir.name = "input_1_multiply_908"} loc("LlamaForCausalLM":0:0), %arg120: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_909"} loc("LlamaForCausalLM":0:0), %arg121: tensor<1xf32> {ttir.name = "input_1_add_930"} loc("LlamaForCausalLM":0:0), %arg122: tensor<1xf32> {ttir.name = "input_1_add_950"} loc("LlamaForCausalLM":0:0), %arg123: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_960"} loc("LlamaForCausalLM":0:0), %arg124: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_970.1"} loc("LlamaForCausalLM":0:0), %arg125: tensor<1xf32> {ttir.name = "input_1_multiply_971"} loc("LlamaForCausalLM":0:0), %arg126: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_972.1"} loc("LlamaForCausalLM":0:0), %arg127: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_984.1"} loc("LlamaForCausalLM":0:0), %arg128: tensor<1xf32> {ttir.name = "input_1_multiply_985"} loc("LlamaForCausalLM":0:0), %arg129: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_986.1"} loc("LlamaForCausalLM":0:0), %arg130: tensor<1xf32> {ttir.name = "input_1_multiply_994"} loc("LlamaForCausalLM":0:0), %arg131: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_995"} loc("LlamaForCausalLM":0:0), %arg132: tensor<1xf32> {ttir.name = "input_1_add_1016"} loc("LlamaForCausalLM":0:0), %arg133: tensor<1xf32> {ttir.name = "input_1_add_1036"} loc("LlamaForCausalLM":0:0), %arg134: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_1046"} loc("LlamaForCausalLM":0:0), %arg135: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1056.1"} loc("LlamaForCausalLM":0:0), %arg136: tensor<1xf32> {ttir.name = "input_1_multiply_1057"} loc("LlamaForCausalLM":0:0), %arg137: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1058.1"} loc("LlamaForCausalLM":0:0), %arg138: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1070.1"} loc("LlamaForCausalLM":0:0), %arg139: tensor<1xf32> {ttir.name = "input_1_multiply_1071"} loc("LlamaForCausalLM":0:0), %arg140: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1072.1"} loc("LlamaForCausalLM":0:0), %arg141: tensor<1xf32> {ttir.name = "input_1_multiply_1080"} loc("LlamaForCausalLM":0:0), %arg142: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_1081"} loc("LlamaForCausalLM":0:0), %arg143: tensor<1xf32> {ttir.name = "input_1_add_1102"} loc("LlamaForCausalLM":0:0), %arg144: tensor<1xf32> {ttir.name = "input_1_add_1122"} loc("LlamaForCausalLM":0:0), %arg145: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_1132"} loc("LlamaForCausalLM":0:0), %arg146: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1142.1"} loc("LlamaForCausalLM":0:0), %arg147: tensor<1xf32> {ttir.name = "input_1_multiply_1143"} loc("LlamaForCausalLM":0:0), %arg148: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1144.1"} loc("LlamaForCausalLM":0:0), %arg149: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1156.1"} loc("LlamaForCausalLM":0:0), %arg150: tensor<1xf32> {ttir.name = "input_1_multiply_1157"} loc("LlamaForCausalLM":0:0), %arg151: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1158.1"} loc("LlamaForCausalLM":0:0), %arg152: tensor<1xf32> {ttir.name = "input_1_multiply_1166"} loc("LlamaForCausalLM":0:0), %arg153: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_1167"} loc("LlamaForCausalLM":0:0), %arg154: tensor<1xf32> {ttir.name = "input_1_add_1188"} loc("LlamaForCausalLM":0:0), %arg155: tensor<1xf32> {ttir.name = "input_1_add_1208"} loc("LlamaForCausalLM":0:0), %arg156: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_1218"} loc("LlamaForCausalLM":0:0), %arg157: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1228.1"} loc("LlamaForCausalLM":0:0), %arg158: tensor<1xf32> {ttir.name = "input_1_multiply_1229"} loc("LlamaForCausalLM":0:0), %arg159: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1230.1"} loc("LlamaForCausalLM":0:0), %arg160: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1242.1"} loc("LlamaForCausalLM":0:0), %arg161: tensor<1xf32> {ttir.name = "input_1_multiply_1243"} loc("LlamaForCausalLM":0:0), %arg162: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1244.1"} loc("LlamaForCausalLM":0:0), %arg163: tensor<1xf32> {ttir.name = "input_1_multiply_1252"} loc("LlamaForCausalLM":0:0), %arg164: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_1253"} loc("LlamaForCausalLM":0:0), %arg165: tensor<1xf32> {ttir.name = "input_1_add_1274"} loc("LlamaForCausalLM":0:0), %arg166: tensor<1xf32> {ttir.name = "input_1_add_1294"} loc("LlamaForCausalLM":0:0), %arg167: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_1304"} loc("LlamaForCausalLM":0:0), %arg168: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1314.1"} loc("LlamaForCausalLM":0:0), %arg169: tensor<1xf32> {ttir.name = "input_1_multiply_1315"} loc("LlamaForCausalLM":0:0), %arg170: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1316.1"} loc("LlamaForCausalLM":0:0), %arg171: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1328.1"} loc("LlamaForCausalLM":0:0), %arg172: tensor<1xf32> {ttir.name = "input_1_multiply_1329"} loc("LlamaForCausalLM":0:0), %arg173: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1330.1"} loc("LlamaForCausalLM":0:0), %arg174: tensor<1xf32> {ttir.name = "input_1_multiply_1338"} loc("LlamaForCausalLM":0:0), %arg175: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_1339"} loc("LlamaForCausalLM":0:0), %arg176: tensor<1xf32> {ttir.name = "input_1_add_1360"} loc("LlamaForCausalLM":0:0), %arg177: tensor<1xf32> {ttir.name = "input_1_add_1380"} loc("LlamaForCausalLM":0:0), %arg178: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_1390"} loc("LlamaForCausalLM":0:0), %arg179: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1400.1"} loc("LlamaForCausalLM":0:0), %arg180: tensor<1xf32> {ttir.name = "input_1_multiply_1401"} loc("LlamaForCausalLM":0:0), %arg181: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1402.1"} loc("LlamaForCausalLM":0:0), %arg182: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1414.1"} loc("LlamaForCausalLM":0:0), %arg183: tensor<1xf32> {ttir.name = "input_1_multiply_1415"} loc("LlamaForCausalLM":0:0), %arg184: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1416.1"} loc("LlamaForCausalLM":0:0), %arg185: tensor<1xf32> {ttir.name = "input_1_multiply_1424"} loc("LlamaForCausalLM":0:0), %arg186: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_1425"} loc("LlamaForCausalLM":0:0), %arg187: tensor<1xf32> {ttir.name = "input_1_add_1446"} loc("LlamaForCausalLM":0:0), %arg188: tensor<1xf32> {ttir.name = "input_1_add_1466"} loc("LlamaForCausalLM":0:0), %arg189: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_1476"} loc("LlamaForCausalLM":0:0), %arg190: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1486.1"} loc("LlamaForCausalLM":0:0), %arg191: tensor<1xf32> {ttir.name = "input_1_multiply_1487"} loc("LlamaForCausalLM":0:0), %arg192: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1488.1"} loc("LlamaForCausalLM":0:0), %arg193: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1500.1"} loc("LlamaForCausalLM":0:0), %arg194: tensor<1xf32> {ttir.name = "input_1_multiply_1501"} loc("LlamaForCausalLM":0:0), %arg195: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1502.1"} loc("LlamaForCausalLM":0:0), %arg196: tensor<1xf32> {ttir.name = "input_1_multiply_1510"} loc("LlamaForCausalLM":0:0), %arg197: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_1511"} loc("LlamaForCausalLM":0:0), %arg198: tensor<1xf32> {ttir.name = "input_1_add_1532"} loc("LlamaForCausalLM":0:0), %arg199: tensor<1xf32> {ttir.name = "input_1_add_1552"} loc("LlamaForCausalLM":0:0), %arg200: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_1562"} loc("LlamaForCausalLM":0:0), %arg201: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1572.1"} loc("LlamaForCausalLM":0:0), %arg202: tensor<1xf32> {ttir.name = "input_1_multiply_1573"} loc("LlamaForCausalLM":0:0), %arg203: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1574.1"} loc("LlamaForCausalLM":0:0), %arg204: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1586.1"} loc("LlamaForCausalLM":0:0), %arg205: tensor<1xf32> {ttir.name = "input_1_multiply_1587"} loc("LlamaForCausalLM":0:0), %arg206: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1588.1"} loc("LlamaForCausalLM":0:0), %arg207: tensor<1xf32> {ttir.name = "input_1_multiply_1596"} loc("LlamaForCausalLM":0:0), %arg208: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_1597"} loc("LlamaForCausalLM":0:0), %arg209: tensor<1xf32> {ttir.name = "input_1_add_1618"} loc("LlamaForCausalLM":0:0), %arg210: tensor<1xf32> {ttir.name = "input_1_add_1638"} loc("LlamaForCausalLM":0:0), %arg211: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_1648"} loc("LlamaForCausalLM":0:0), %arg212: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1658.1"} loc("LlamaForCausalLM":0:0), %arg213: tensor<1xf32> {ttir.name = "input_1_multiply_1659"} loc("LlamaForCausalLM":0:0), %arg214: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1660.1"} loc("LlamaForCausalLM":0:0), %arg215: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1672.1"} loc("LlamaForCausalLM":0:0), %arg216: tensor<1xf32> {ttir.name = "input_1_multiply_1673"} loc("LlamaForCausalLM":0:0), %arg217: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1674.1"} loc("LlamaForCausalLM":0:0), %arg218: tensor<1xf32> {ttir.name = "input_1_multiply_1682"} loc("LlamaForCausalLM":0:0), %arg219: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_1683"} loc("LlamaForCausalLM":0:0), %arg220: tensor<1xf32> {ttir.name = "input_1_add_1704"} loc("LlamaForCausalLM":0:0), %arg221: tensor<1xf32> {ttir.name = "input_1_add_1724"} loc("LlamaForCausalLM":0:0), %arg222: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_1734"} loc("LlamaForCausalLM":0:0), %arg223: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1744.1"} loc("LlamaForCausalLM":0:0), %arg224: tensor<1xf32> {ttir.name = "input_1_multiply_1745"} loc("LlamaForCausalLM":0:0), %arg225: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1746.1"} loc("LlamaForCausalLM":0:0), %arg226: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1758.1"} loc("LlamaForCausalLM":0:0), %arg227: tensor<1xf32> {ttir.name = "input_1_multiply_1759"} loc("LlamaForCausalLM":0:0), %arg228: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1760.1"} loc("LlamaForCausalLM":0:0), %arg229: tensor<1xf32> {ttir.name = "input_1_multiply_1768"} loc("LlamaForCausalLM":0:0), %arg230: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_1769"} loc("LlamaForCausalLM":0:0), %arg231: tensor<1xf32> {ttir.name = "input_1_add_1790"} loc("LlamaForCausalLM":0:0), %arg232: tensor<1xf32> {ttir.name = "input_1_add_1810"} loc("LlamaForCausalLM":0:0), %arg233: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_1820"} loc("LlamaForCausalLM":0:0), %arg234: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1830.1"} loc("LlamaForCausalLM":0:0), %arg235: tensor<1xf32> {ttir.name = "input_1_multiply_1831"} loc("LlamaForCausalLM":0:0), %arg236: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1832.1"} loc("LlamaForCausalLM":0:0), %arg237: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1844.1"} loc("LlamaForCausalLM":0:0), %arg238: tensor<1xf32> {ttir.name = "input_1_multiply_1845"} loc("LlamaForCausalLM":0:0), %arg239: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1846.1"} loc("LlamaForCausalLM":0:0), %arg240: tensor<1xf32> {ttir.name = "input_1_multiply_1854"} loc("LlamaForCausalLM":0:0), %arg241: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_1855"} loc("LlamaForCausalLM":0:0), %arg242: tensor<1xf32> {ttir.name = "input_1_add_1876"} loc("LlamaForCausalLM":0:0), %arg243: tensor<1xf32> {ttir.name = "input_1_add_1896"} loc("LlamaForCausalLM":0:0), %arg244: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_1906"} loc("LlamaForCausalLM":0:0), %arg245: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1916.1"} loc("LlamaForCausalLM":0:0), %arg246: tensor<1xf32> {ttir.name = "input_1_multiply_1917"} loc("LlamaForCausalLM":0:0), %arg247: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1918.1"} loc("LlamaForCausalLM":0:0), %arg248: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1930.1"} loc("LlamaForCausalLM":0:0), %arg249: tensor<1xf32> {ttir.name = "input_1_multiply_1931"} loc("LlamaForCausalLM":0:0), %arg250: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_1932.1"} loc("LlamaForCausalLM":0:0), %arg251: tensor<1xf32> {ttir.name = "input_1_multiply_1940"} loc("LlamaForCausalLM":0:0), %arg252: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_1941"} loc("LlamaForCausalLM":0:0), %arg253: tensor<1xf32> {ttir.name = "input_1_add_1962"} loc("LlamaForCausalLM":0:0), %arg254: tensor<1xf32> {ttir.name = "input_1_add_1982"} loc("LlamaForCausalLM":0:0), %arg255: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_1992"} loc("LlamaForCausalLM":0:0), %arg256: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_2002.1"} loc("LlamaForCausalLM":0:0), %arg257: tensor<1xf32> {ttir.name = "input_1_multiply_2003"} loc("LlamaForCausalLM":0:0), %arg258: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_2004.1"} loc("LlamaForCausalLM":0:0), %arg259: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_2016.1"} loc("LlamaForCausalLM":0:0), %arg260: tensor<1xf32> {ttir.name = "input_1_multiply_2017"} loc("LlamaForCausalLM":0:0), %arg261: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_2018.1"} loc("LlamaForCausalLM":0:0), %arg262: tensor<1xf32> {ttir.name = "input_1_multiply_2026"} loc("LlamaForCausalLM":0:0), %arg263: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_2027"} loc("LlamaForCausalLM":0:0), %arg264: tensor<1xf32> {ttir.name = "input_1_add_2048"} loc("LlamaForCausalLM":0:0), %arg265: tensor<1xf32> {ttir.name = "input_1_add_2068"} loc("LlamaForCausalLM":0:0), %arg266: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_2078"} loc("LlamaForCausalLM":0:0), %arg267: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_2088.1"} loc("LlamaForCausalLM":0:0), %arg268: tensor<1xf32> {ttir.name = "input_1_multiply_2089"} loc("LlamaForCausalLM":0:0), %arg269: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_2090.1"} loc("LlamaForCausalLM":0:0), %arg270: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_2102.1"} loc("LlamaForCausalLM":0:0), %arg271: tensor<1xf32> {ttir.name = "input_1_multiply_2103"} loc("LlamaForCausalLM":0:0), %arg272: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_2104.1"} loc("LlamaForCausalLM":0:0), %arg273: tensor<1xf32> {ttir.name = "input_1_multiply_2112"} loc("LlamaForCausalLM":0:0), %arg274: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_2113"} loc("LlamaForCausalLM":0:0), %arg275: tensor<1xf32> {ttir.name = "input_1_add_2134"} loc("LlamaForCausalLM":0:0), %arg276: tensor<1xf32> {ttir.name = "input_1_add_2154"} loc("LlamaForCausalLM":0:0), %arg277: tensor<1x12x50xf32> {ttir.name = "input_0_unsqueeze_2164"} loc("LlamaForCausalLM":0:0), %arg278: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_2174.1"} loc("LlamaForCausalLM":0:0), %arg279: tensor<1xf32> {ttir.name = "input_1_multiply_2175"} loc("LlamaForCausalLM":0:0), %arg280: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_2176.1"} loc("LlamaForCausalLM":0:0), %arg281: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_2188.1"} loc("LlamaForCausalLM":0:0), %arg282: tensor<1xf32> {ttir.name = "input_1_multiply_2189"} loc("LlamaForCausalLM":0:0), %arg283: tensor<1x32x50x100xf32> {ttir.name = "dc.input_tensor.index_2190.1"} loc("LlamaForCausalLM":0:0), %arg284: tensor<1xf32> {ttir.name = "input_1_multiply_2198"} loc("LlamaForCausalLM":0:0), %arg285: tensor<1x1x12x12xf32> {ttir.name = "input_1_add_2199"} loc("LlamaForCausalLM":0:0), %arg286: tensor<1xf32> {ttir.name = "input_1_add_2220"} loc("LlamaForCausalLM":0:0), %arg287: tensor<1xf32> {ttir.name = "input_1_add_2240"} loc("LlamaForCausalLM":0:0), %arg288: tensor<3200xf32> {ttir.name = "model.norm.weight"} loc("LlamaForCausalLM":0:0), %arg289: tensor<32000x3200xf32> {ttir.name = "model.embed_tokens.weight"} loc("LlamaForCausalLM":0:0), %arg290: tensor<3200xf32> {ttir.name = "model.layers.0.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg291: tensor<3200x3200xf32> {ttir.name = "model.layers.0.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg292: tensor<3200x3200xf32> {ttir.name = "model.layers.0.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg293: tensor<3200x3200xf32> {ttir.name = "model.layers.0.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg294: tensor<3200x3200xf32> {ttir.name = "model.layers.0.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg295: tensor<3200xf32> {ttir.name = "model.layers.0.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg296: tensor<3200x8640xf32> {ttir.name = "model.layers.0.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg297: tensor<3200x8640xf32> {ttir.name = "model.layers.0.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg298: tensor<8640x3200xf32> {ttir.name = "model.layers.0.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg299: tensor<3200xf32> {ttir.name = "model.layers.1.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg300: tensor<3200x3200xf32> {ttir.name = "model.layers.1.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg301: tensor<3200x3200xf32> {ttir.name = "model.layers.1.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg302: tensor<3200x3200xf32> {ttir.name = "model.layers.1.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg303: tensor<3200x3200xf32> {ttir.name = "model.layers.1.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg304: tensor<3200xf32> {ttir.name = "model.layers.1.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg305: tensor<3200x8640xf32> {ttir.name = "model.layers.1.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg306: tensor<3200x8640xf32> {ttir.name = "model.layers.1.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg307: tensor<8640x3200xf32> {ttir.name = "model.layers.1.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg308: tensor<3200xf32> {ttir.name = "model.layers.2.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg309: tensor<3200x3200xf32> {ttir.name = "model.layers.2.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg310: tensor<3200x3200xf32> {ttir.name = "model.layers.2.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg311: tensor<3200x3200xf32> {ttir.name = "model.layers.2.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg312: tensor<3200x3200xf32> {ttir.name = "model.layers.2.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg313: tensor<3200xf32> {ttir.name = "model.layers.2.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg314: tensor<3200x8640xf32> {ttir.name = "model.layers.2.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg315: tensor<3200x8640xf32> {ttir.name = "model.layers.2.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg316: tensor<8640x3200xf32> {ttir.name = "model.layers.2.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg317: tensor<3200xf32> {ttir.name = "model.layers.3.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg318: tensor<3200x3200xf32> {ttir.name = "model.layers.3.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg319: tensor<3200x3200xf32> {ttir.name = "model.layers.3.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg320: tensor<3200x3200xf32> {ttir.name = "model.layers.3.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg321: tensor<3200x3200xf32> {ttir.name = "model.layers.3.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg322: tensor<3200xf32> {ttir.name = "model.layers.3.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg323: tensor<3200x8640xf32> {ttir.name = "model.layers.3.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg324: tensor<3200x8640xf32> {ttir.name = "model.layers.3.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg325: tensor<8640x3200xf32> {ttir.name = "model.layers.3.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg326: tensor<3200xf32> {ttir.name = "model.layers.4.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg327: tensor<3200x3200xf32> {ttir.name = "model.layers.4.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg328: tensor<3200x3200xf32> {ttir.name = "model.layers.4.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg329: tensor<3200x3200xf32> {ttir.name = "model.layers.4.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg330: tensor<3200x3200xf32> {ttir.name = "model.layers.4.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg331: tensor<3200xf32> {ttir.name = "model.layers.4.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg332: tensor<3200x8640xf32> {ttir.name = "model.layers.4.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg333: tensor<3200x8640xf32> {ttir.name = "model.layers.4.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg334: tensor<8640x3200xf32> {ttir.name = "model.layers.4.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg335: tensor<3200xf32> {ttir.name = "model.layers.5.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg336: tensor<3200x3200xf32> {ttir.name = "model.layers.5.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg337: tensor<3200x3200xf32> {ttir.name = "model.layers.5.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg338: tensor<3200x3200xf32> {ttir.name = "model.layers.5.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg339: tensor<3200x3200xf32> {ttir.name = "model.layers.5.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg340: tensor<3200xf32> {ttir.name = "model.layers.5.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg341: tensor<3200x8640xf32> {ttir.name = "model.layers.5.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg342: tensor<3200x8640xf32> {ttir.name = "model.layers.5.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg343: tensor<8640x3200xf32> {ttir.name = "model.layers.5.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg344: tensor<3200xf32> {ttir.name = "model.layers.6.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg345: tensor<3200x3200xf32> {ttir.name = "model.layers.6.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg346: tensor<3200x3200xf32> {ttir.name = "model.layers.6.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg347: tensor<3200x3200xf32> {ttir.name = "model.layers.6.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg348: tensor<3200x3200xf32> {ttir.name = "model.layers.6.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg349: tensor<3200xf32> {ttir.name = "model.layers.6.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg350: tensor<3200x8640xf32> {ttir.name = "model.layers.6.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg351: tensor<3200x8640xf32> {ttir.name = "model.layers.6.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg352: tensor<8640x3200xf32> {ttir.name = "model.layers.6.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg353: tensor<3200xf32> {ttir.name = "model.layers.7.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg354: tensor<3200x3200xf32> {ttir.name = "model.layers.7.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg355: tensor<3200x3200xf32> {ttir.name = "model.layers.7.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg356: tensor<3200x3200xf32> {ttir.name = "model.layers.7.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg357: tensor<3200x3200xf32> {ttir.name = "model.layers.7.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg358: tensor<3200xf32> {ttir.name = "model.layers.7.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg359: tensor<3200x8640xf32> {ttir.name = "model.layers.7.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg360: tensor<3200x8640xf32> {ttir.name = "model.layers.7.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg361: tensor<8640x3200xf32> {ttir.name = "model.layers.7.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg362: tensor<3200xf32> {ttir.name = "model.layers.8.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg363: tensor<3200x3200xf32> {ttir.name = "model.layers.8.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg364: tensor<3200x3200xf32> {ttir.name = "model.layers.8.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg365: tensor<3200x3200xf32> {ttir.name = "model.layers.8.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg366: tensor<3200x3200xf32> {ttir.name = "model.layers.8.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg367: tensor<3200xf32> {ttir.name = "model.layers.8.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg368: tensor<3200x8640xf32> {ttir.name = "model.layers.8.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg369: tensor<3200x8640xf32> {ttir.name = "model.layers.8.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg370: tensor<8640x3200xf32> {ttir.name = "model.layers.8.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg371: tensor<3200xf32> {ttir.name = "model.layers.9.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg372: tensor<3200x3200xf32> {ttir.name = "model.layers.9.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg373: tensor<3200x3200xf32> {ttir.name = "model.layers.9.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg374: tensor<3200x3200xf32> {ttir.name = "model.layers.9.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg375: tensor<3200x3200xf32> {ttir.name = "model.layers.9.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg376: tensor<3200xf32> {ttir.name = "model.layers.9.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg377: tensor<3200x8640xf32> {ttir.name = "model.layers.9.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg378: tensor<3200x8640xf32> {ttir.name = "model.layers.9.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg379: tensor<8640x3200xf32> {ttir.name = "model.layers.9.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg380: tensor<3200xf32> {ttir.name = "model.layers.10.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg381: tensor<3200x3200xf32> {ttir.name = "model.layers.10.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg382: tensor<3200x3200xf32> {ttir.name = "model.layers.10.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg383: tensor<3200x3200xf32> {ttir.name = "model.layers.10.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg384: tensor<3200x3200xf32> {ttir.name = "model.layers.10.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg385: tensor<3200xf32> {ttir.name = "model.layers.10.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg386: tensor<3200x8640xf32> {ttir.name = "model.layers.10.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg387: tensor<3200x8640xf32> {ttir.name = "model.layers.10.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg388: tensor<8640x3200xf32> {ttir.name = "model.layers.10.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg389: tensor<3200xf32> {ttir.name = "model.layers.11.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg390: tensor<3200x3200xf32> {ttir.name = "model.layers.11.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg391: tensor<3200x3200xf32> {ttir.name = "model.layers.11.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg392: tensor<3200x3200xf32> {ttir.name = "model.layers.11.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg393: tensor<3200x3200xf32> {ttir.name = "model.layers.11.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg394: tensor<3200xf32> {ttir.name = "model.layers.11.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg395: tensor<3200x8640xf32> {ttir.name = "model.layers.11.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg396: tensor<3200x8640xf32> {ttir.name = "model.layers.11.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg397: tensor<8640x3200xf32> {ttir.name = "model.layers.11.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg398: tensor<3200xf32> {ttir.name = "model.layers.12.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg399: tensor<3200x3200xf32> {ttir.name = "model.layers.12.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg400: tensor<3200x3200xf32> {ttir.name = "model.layers.12.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg401: tensor<3200x3200xf32> {ttir.name = "model.layers.12.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg402: tensor<3200x3200xf32> {ttir.name = "model.layers.12.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg403: tensor<3200xf32> {ttir.name = "model.layers.12.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg404: tensor<3200x8640xf32> {ttir.name = "model.layers.12.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg405: tensor<3200x8640xf32> {ttir.name = "model.layers.12.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg406: tensor<8640x3200xf32> {ttir.name = "model.layers.12.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg407: tensor<3200xf32> {ttir.name = "model.layers.13.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg408: tensor<3200x3200xf32> {ttir.name = "model.layers.13.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg409: tensor<3200x3200xf32> {ttir.name = "model.layers.13.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg410: tensor<3200x3200xf32> {ttir.name = "model.layers.13.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg411: tensor<3200x3200xf32> {ttir.name = "model.layers.13.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg412: tensor<3200xf32> {ttir.name = "model.layers.13.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg413: tensor<3200x8640xf32> {ttir.name = "model.layers.13.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg414: tensor<3200x8640xf32> {ttir.name = "model.layers.13.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg415: tensor<8640x3200xf32> {ttir.name = "model.layers.13.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg416: tensor<3200xf32> {ttir.name = "model.layers.14.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg417: tensor<3200x3200xf32> {ttir.name = "model.layers.14.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg418: tensor<3200x3200xf32> {ttir.name = "model.layers.14.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg419: tensor<3200x3200xf32> {ttir.name = "model.layers.14.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg420: tensor<3200x3200xf32> {ttir.name = "model.layers.14.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg421: tensor<3200xf32> {ttir.name = "model.layers.14.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg422: tensor<3200x8640xf32> {ttir.name = "model.layers.14.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg423: tensor<3200x8640xf32> {ttir.name = "model.layers.14.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg424: tensor<8640x3200xf32> {ttir.name = "model.layers.14.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg425: tensor<3200xf32> {ttir.name = "model.layers.15.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg426: tensor<3200x3200xf32> {ttir.name = "model.layers.15.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg427: tensor<3200x3200xf32> {ttir.name = "model.layers.15.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg428: tensor<3200x3200xf32> {ttir.name = "model.layers.15.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg429: tensor<3200x3200xf32> {ttir.name = "model.layers.15.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg430: tensor<3200xf32> {ttir.name = "model.layers.15.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg431: tensor<3200x8640xf32> {ttir.name = "model.layers.15.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg432: tensor<3200x8640xf32> {ttir.name = "model.layers.15.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg433: tensor<8640x3200xf32> {ttir.name = "model.layers.15.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg434: tensor<3200xf32> {ttir.name = "model.layers.16.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg435: tensor<3200x3200xf32> {ttir.name = "model.layers.16.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg436: tensor<3200x3200xf32> {ttir.name = "model.layers.16.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg437: tensor<3200x3200xf32> {ttir.name = "model.layers.16.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg438: tensor<3200x3200xf32> {ttir.name = "model.layers.16.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg439: tensor<3200xf32> {ttir.name = "model.layers.16.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg440: tensor<3200x8640xf32> {ttir.name = "model.layers.16.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg441: tensor<3200x8640xf32> {ttir.name = "model.layers.16.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg442: tensor<8640x3200xf32> {ttir.name = "model.layers.16.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg443: tensor<3200xf32> {ttir.name = "model.layers.17.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg444: tensor<3200x3200xf32> {ttir.name = "model.layers.17.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg445: tensor<3200x3200xf32> {ttir.name = "model.layers.17.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg446: tensor<3200x3200xf32> {ttir.name = "model.layers.17.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg447: tensor<3200x3200xf32> {ttir.name = "model.layers.17.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg448: tensor<3200xf32> {ttir.name = "model.layers.17.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg449: tensor<3200x8640xf32> {ttir.name = "model.layers.17.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg450: tensor<3200x8640xf32> {ttir.name = "model.layers.17.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg451: tensor<8640x3200xf32> {ttir.name = "model.layers.17.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg452: tensor<3200xf32> {ttir.name = "model.layers.18.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg453: tensor<3200x3200xf32> {ttir.name = "model.layers.18.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg454: tensor<3200x3200xf32> {ttir.name = "model.layers.18.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg455: tensor<3200x3200xf32> {ttir.name = "model.layers.18.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg456: tensor<3200x3200xf32> {ttir.name = "model.layers.18.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg457: tensor<3200xf32> {ttir.name = "model.layers.18.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg458: tensor<3200x8640xf32> {ttir.name = "model.layers.18.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg459: tensor<3200x8640xf32> {ttir.name = "model.layers.18.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg460: tensor<8640x3200xf32> {ttir.name = "model.layers.18.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg461: tensor<3200xf32> {ttir.name = "model.layers.19.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg462: tensor<3200x3200xf32> {ttir.name = "model.layers.19.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg463: tensor<3200x3200xf32> {ttir.name = "model.layers.19.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg464: tensor<3200x3200xf32> {ttir.name = "model.layers.19.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg465: tensor<3200x3200xf32> {ttir.name = "model.layers.19.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg466: tensor<3200xf32> {ttir.name = "model.layers.19.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg467: tensor<3200x8640xf32> {ttir.name = "model.layers.19.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg468: tensor<3200x8640xf32> {ttir.name = "model.layers.19.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg469: tensor<8640x3200xf32> {ttir.name = "model.layers.19.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg470: tensor<3200xf32> {ttir.name = "model.layers.20.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg471: tensor<3200x3200xf32> {ttir.name = "model.layers.20.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg472: tensor<3200x3200xf32> {ttir.name = "model.layers.20.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg473: tensor<3200x3200xf32> {ttir.name = "model.layers.20.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg474: tensor<3200x3200xf32> {ttir.name = "model.layers.20.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg475: tensor<3200xf32> {ttir.name = "model.layers.20.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg476: tensor<3200x8640xf32> {ttir.name = "model.layers.20.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg477: tensor<3200x8640xf32> {ttir.name = "model.layers.20.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg478: tensor<8640x3200xf32> {ttir.name = "model.layers.20.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg479: tensor<3200xf32> {ttir.name = "model.layers.21.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg480: tensor<3200x3200xf32> {ttir.name = "model.layers.21.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg481: tensor<3200x3200xf32> {ttir.name = "model.layers.21.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg482: tensor<3200x3200xf32> {ttir.name = "model.layers.21.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg483: tensor<3200x3200xf32> {ttir.name = "model.layers.21.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg484: tensor<3200xf32> {ttir.name = "model.layers.21.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg485: tensor<3200x8640xf32> {ttir.name = "model.layers.21.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg486: tensor<3200x8640xf32> {ttir.name = "model.layers.21.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg487: tensor<8640x3200xf32> {ttir.name = "model.layers.21.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg488: tensor<3200xf32> {ttir.name = "model.layers.22.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg489: tensor<3200x3200xf32> {ttir.name = "model.layers.22.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg490: tensor<3200x3200xf32> {ttir.name = "model.layers.22.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg491: tensor<3200x3200xf32> {ttir.name = "model.layers.22.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg492: tensor<3200x3200xf32> {ttir.name = "model.layers.22.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg493: tensor<3200xf32> {ttir.name = "model.layers.22.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg494: tensor<3200x8640xf32> {ttir.name = "model.layers.22.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg495: tensor<3200x8640xf32> {ttir.name = "model.layers.22.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg496: tensor<8640x3200xf32> {ttir.name = "model.layers.22.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg497: tensor<3200xf32> {ttir.name = "model.layers.23.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg498: tensor<3200x3200xf32> {ttir.name = "model.layers.23.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg499: tensor<3200x3200xf32> {ttir.name = "model.layers.23.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg500: tensor<3200x3200xf32> {ttir.name = "model.layers.23.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg501: tensor<3200x3200xf32> {ttir.name = "model.layers.23.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg502: tensor<3200xf32> {ttir.name = "model.layers.23.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg503: tensor<3200x8640xf32> {ttir.name = "model.layers.23.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg504: tensor<3200x8640xf32> {ttir.name = "model.layers.23.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg505: tensor<8640x3200xf32> {ttir.name = "model.layers.23.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg506: tensor<3200xf32> {ttir.name = "model.layers.24.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg507: tensor<3200x3200xf32> {ttir.name = "model.layers.24.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg508: tensor<3200x3200xf32> {ttir.name = "model.layers.24.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg509: tensor<3200x3200xf32> {ttir.name = "model.layers.24.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg510: tensor<3200x3200xf32> {ttir.name = "model.layers.24.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg511: tensor<3200xf32> {ttir.name = "model.layers.24.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg512: tensor<3200x8640xf32> {ttir.name = "model.layers.24.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg513: tensor<3200x8640xf32> {ttir.name = "model.layers.24.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg514: tensor<8640x3200xf32> {ttir.name = "model.layers.24.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg515: tensor<3200xf32> {ttir.name = "model.layers.25.input_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg516: tensor<3200x3200xf32> {ttir.name = "model.layers.25.self_attn.q_proj.weight"} loc("LlamaForCausalLM":0:0), %arg517: tensor<3200x3200xf32> {ttir.name = "model.layers.25.self_attn.k_proj.weight"} loc("LlamaForCausalLM":0:0), %arg518: tensor<3200x3200xf32> {ttir.name = "model.layers.25.self_attn.v_proj.weight"} loc("LlamaForCausalLM":0:0), %arg519: tensor<3200x3200xf32> {ttir.name = "model.layers.25.self_attn.o_proj.weight"} loc("LlamaForCausalLM":0:0), %arg520: tensor<3200xf32> {ttir.name = "model.layers.25.post_attention_layernorm.weight"} loc("LlamaForCausalLM":0:0), %arg521: tensor<3200x8640xf32> {ttir.name = "model.layers.25.mlp.gate_proj.weight"} loc("LlamaForCausalLM":0:0), %arg522: tensor<3200x8640xf32> {ttir.name = "model.layers.25.mlp.up_proj.weight"} loc("LlamaForCausalLM":0:0), %arg523: tensor<8640x3200xf32> {ttir.name = "model.layers.25.mlp.down_proj.weight"} loc("LlamaForCausalLM":0:0), %arg524: tensor<3200x32000xf32> {ttir.name = "lm_head.weight"} loc("LlamaForCausalLM":0:0)) -> (tensor<1x12x3200xf32> {ttir.name = "LlamaForCausalLM.output_matmul_2246"}) { %0 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2091) - %1 = "ttir.embedding"(%arg0, %arg289, %0) <{operand_constraints = [#any_device, #any_device, #any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12xi32>, tensor<32000x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2091) + %1 = "ttir.embedding"(%arg0, %arg289, %0) : (tensor<1x12xi32>, tensor<32000x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2091) %2 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2092) - %3 = "ttir.multiply"(%1, %1, %2) <{operandSegmentSizes = array, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2092) + %3 = "ttir.multiply"(%1, %1, %2) <{operandSegmentSizes = array}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2092) %4 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2093) - %5 = "ttir.mean"(%3, %4) <{dim_arg = [-1 : i32], keep_dim = true, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2093) + %5 = "ttir.mean"(%3, %4) <{dim_arg = [-1 : i32], keep_dim = true}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2093) %6 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2094) - %7 = "ttir.add"(%5, %arg1, %6) <{operandSegmentSizes = array, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2094) + %7 = "ttir.add"(%5, %arg1, %6) <{operandSegmentSizes = array}> : (tensor<1x12x1xf32>, tensor<1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2094) %8 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2095) - %9 = "ttir.sqrt"(%7, %8) <{operandSegmentSizes = array, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2095) + %9 = "ttir.sqrt"(%7, %8) <{operandSegmentSizes = array}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2095) %10 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2096) - %11 = "ttir.reciprocal"(%9, %10) <{operandSegmentSizes = array, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2096) + %11 = "ttir.reciprocal"(%9, %10) <{operandSegmentSizes = array}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2096) %12 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2097) - %13 = "ttir.multiply"(%1, %11, %12) <{operandSegmentSizes = array, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2097) + %13 = "ttir.multiply"(%1, %11, %12) <{operandSegmentSizes = array}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2097) %14 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2098) - %15 = "ttir.multiply"(%arg290, %13, %14) <{operandSegmentSizes = array, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2098) + %15 = "ttir.multiply"(%arg290, %13, %14) <{operandSegmentSizes = array}> : (tensor<3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2098) %16 = tensor.empty() : tensor<12x3200xf32> loc(#loc2099) - %17 = "ttir.squeeze"(%15, %16) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2099) + %17 = "ttir.squeeze"(%15, %16) <{dim = 0 : si32}> : (tensor<1x12x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2099) %18 = tensor.empty() : tensor<12x3200xf32> loc(#loc2100) - %19 = "ttir.matmul"(%17, %arg291, %18) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2100) + %19 = "ttir.matmul"(%17, %arg291, %18) : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2100) %20 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc2101) - %21 = "ttir.reshape"(%19, %20) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc2101) + %21 = "ttir.reshape"(%19, %20) <{shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc2101) %22 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2102) - %23 = "ttir.transpose"(%21, %22) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2102) + %23 = "ttir.transpose"(%21, %22) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2102) %24 = tensor.empty() : tensor<1x12x100xf32> loc(#loc2103) - %25 = "ttir.concat"(%arg2, %arg2, %24) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x50xf32>, tensor<1x12x50xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc2103) + %25 = "ttir.concat"(%arg2, %arg2, %24) <{dim = -1 : si32}> : (tensor<1x12x50xf32>, tensor<1x12x50xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc2103) %26 = tensor.empty() : tensor<1x12x100xf32> loc(#loc2104) - %27 = "ttir.sin"(%25, %26) <{operandSegmentSizes = array, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc2104) + %27 = "ttir.sin"(%25, %26) <{operandSegmentSizes = array}> : (tensor<1x12x100xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc2104) %28 = tensor.empty() : tensor<1x1x12x100xf32> loc(#loc2105) - %29 = "ttir.unsqueeze"(%27, %28) <{dim = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x1x12x100xf32>) -> tensor<1x1x12x100xf32> loc(#loc2105) + %29 = "ttir.unsqueeze"(%27, %28) <{dim = 1 : si32}> : (tensor<1x12x100xf32>, tensor<1x1x12x100xf32>) -> tensor<1x1x12x100xf32> loc(#loc2105) %30 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2106) - %31 = "ttir.multiply"(%23, %29, %30) <{operandSegmentSizes = array, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2106) + %31 = "ttir.multiply"(%23, %29, %30) <{operandSegmentSizes = array}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2106) %32 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc2107) - %33 = "ttir.transpose"(%23, %32) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2107) + %33 = "ttir.transpose"(%23, %32) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2107) %34 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc2108) - %35 = "ttir.matmul"(%arg3, %33, %34) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc2108) + %35 = "ttir.matmul"(%arg3, %33, %34) : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc2108) %36 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2109) - %37 = "ttir.transpose"(%35, %36) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2109) + %37 = "ttir.transpose"(%35, %36) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2109) %38 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2110) - %39 = "ttir.multiply"(%37, %arg4, %38) <{operandSegmentSizes = array, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2110) + %39 = "ttir.multiply"(%37, %arg4, %38) <{operandSegmentSizes = array}> : (tensor<1x32x12x50xf32>, tensor<1xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2110) %40 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc2111) - %41 = "ttir.transpose"(%23, %40) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2111) + %41 = "ttir.transpose"(%23, %40) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2111) %42 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc2112) - %43 = "ttir.matmul"(%arg5, %41, %42) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc2112) + %43 = "ttir.matmul"(%arg5, %41, %42) : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc2112) %44 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2113) - %45 = "ttir.transpose"(%43, %44) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2113) + %45 = "ttir.transpose"(%43, %44) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2113) %46 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2114) - %47 = "ttir.concat"(%39, %45, %46) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1x32x12x50xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2114) + %47 = "ttir.concat"(%39, %45, %46) <{dim = -1 : si32}> : (tensor<1x32x12x50xf32>, tensor<1x32x12x50xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2114) %48 = tensor.empty() : tensor<1x12x100xf32> loc(#loc2115) - %49 = "ttir.cos"(%25, %48) <{operandSegmentSizes = array, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc2115) + %49 = "ttir.cos"(%25, %48) <{operandSegmentSizes = array}> : (tensor<1x12x100xf32>, tensor<1x12x100xf32>) -> tensor<1x12x100xf32> loc(#loc2115) %50 = tensor.empty() : tensor<1x1x12x100xf32> loc(#loc2116) - %51 = "ttir.unsqueeze"(%49, %50) <{dim = 1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x100xf32>, tensor<1x1x12x100xf32>) -> tensor<1x1x12x100xf32> loc(#loc2116) + %51 = "ttir.unsqueeze"(%49, %50) <{dim = 1 : si32}> : (tensor<1x12x100xf32>, tensor<1x1x12x100xf32>) -> tensor<1x1x12x100xf32> loc(#loc2116) %52 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2117) - %53 = "ttir.multiply"(%47, %51, %52) <{operandSegmentSizes = array, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2117) + %53 = "ttir.multiply"(%47, %51, %52) <{operandSegmentSizes = array}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2117) %54 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2118) - %55 = "ttir.add"(%31, %53, %54) <{operandSegmentSizes = array, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2118) + %55 = "ttir.add"(%31, %53, %54) <{operandSegmentSizes = array}> : (tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2118) %56 = tensor.empty() : tensor<32x12x100xf32> loc(#loc2119) - %57 = "ttir.squeeze"(%55, %56) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc2119) + %57 = "ttir.squeeze"(%55, %56) <{dim = 0 : si32}> : (tensor<1x32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc2119) %58 = tensor.empty() : tensor<12x3200xf32> loc(#loc2120) - %59 = "ttir.matmul"(%17, %arg292, %58) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2120) + %59 = "ttir.matmul"(%17, %arg292, %58) : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2120) %60 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc2121) - %61 = "ttir.reshape"(%59, %60) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc2121) + %61 = "ttir.reshape"(%59, %60) <{shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc2121) %62 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2122) - %63 = "ttir.transpose"(%61, %62) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2122) + %63 = "ttir.transpose"(%61, %62) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2122) %64 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2123) - %65 = "ttir.multiply"(%63, %29, %64) <{operandSegmentSizes = array, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2123) + %65 = "ttir.multiply"(%63, %29, %64) <{operandSegmentSizes = array}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2123) %66 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc2124) - %67 = "ttir.transpose"(%63, %66) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2124) + %67 = "ttir.transpose"(%63, %66) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2124) %68 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc2125) - %69 = "ttir.matmul"(%arg6, %67, %68) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc2125) + %69 = "ttir.matmul"(%arg6, %67, %68) : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc2125) %70 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2126) - %71 = "ttir.transpose"(%69, %70) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2126) + %71 = "ttir.transpose"(%69, %70) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2126) %72 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2127) - %73 = "ttir.multiply"(%71, %arg7, %72) <{operandSegmentSizes = array, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2127) + %73 = "ttir.multiply"(%71, %arg7, %72) <{operandSegmentSizes = array}> : (tensor<1x32x12x50xf32>, tensor<1xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2127) %74 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc2128) - %75 = "ttir.transpose"(%63, %74) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2128) + %75 = "ttir.transpose"(%63, %74) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2128) %76 = tensor.empty() : tensor<1x32x50x12xf32> loc(#loc2129) - %77 = "ttir.matmul"(%arg8, %75, %76) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc2129) + %77 = "ttir.matmul"(%arg8, %75, %76) : (tensor<1x32x50x100xf32>, tensor<1x32x100x12xf32>, tensor<1x32x50x12xf32>) -> tensor<1x32x50x12xf32> loc(#loc2129) %78 = tensor.empty() : tensor<1x32x12x50xf32> loc(#loc2130) - %79 = "ttir.transpose"(%77, %78) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2130) + %79 = "ttir.transpose"(%77, %78) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x32x50x12xf32>, tensor<1x32x12x50xf32>) -> tensor<1x32x12x50xf32> loc(#loc2130) %80 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2131) - %81 = "ttir.concat"(%73, %79, %80) <{dim = -1 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x50xf32>, tensor<1x32x12x50xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2131) + %81 = "ttir.concat"(%73, %79, %80) <{dim = -1 : si32}> : (tensor<1x32x12x50xf32>, tensor<1x32x12x50xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2131) %82 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2132) - %83 = "ttir.multiply"(%81, %51, %82) <{operandSegmentSizes = array, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2132) + %83 = "ttir.multiply"(%81, %51, %82) <{operandSegmentSizes = array}> : (tensor<1x32x12x100xf32>, tensor<1x1x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2132) %84 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2133) - %85 = "ttir.add"(%65, %83, %84) <{operandSegmentSizes = array, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2133) + %85 = "ttir.add"(%65, %83, %84) <{operandSegmentSizes = array}> : (tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2133) %86 = tensor.empty() : tensor<32x12x100xf32> loc(#loc2134) - %87 = "ttir.squeeze"(%85, %86) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc2134) + %87 = "ttir.squeeze"(%85, %86) <{dim = 0 : si32}> : (tensor<1x32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc2134) %88 = tensor.empty() : tensor<32x100x12xf32> loc(#loc2135) - %89 = "ttir.transpose"(%87, %88) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<32x100x12xf32>) -> tensor<32x100x12xf32> loc(#loc2135) + %89 = "ttir.transpose"(%87, %88) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<32x12x100xf32>, tensor<32x100x12xf32>) -> tensor<32x100x12xf32> loc(#loc2135) %90 = tensor.empty() : tensor<32x12x12xf32> loc(#loc2136) - %91 = "ttir.matmul"(%57, %89, %90) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<32x100x12xf32>, tensor<32x12x12xf32>) -> tensor<32x12x12xf32> loc(#loc2136) + %91 = "ttir.matmul"(%57, %89, %90) : (tensor<32x12x100xf32>, tensor<32x100x12xf32>, tensor<32x12x12xf32>) -> tensor<32x12x12xf32> loc(#loc2136) %92 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc2137) - %93 = "ttir.unsqueeze"(%91, %92) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc2137) + %93 = "ttir.unsqueeze"(%91, %92) <{dim = 0 : si32}> : (tensor<32x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc2137) %94 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc2138) - %95 = "ttir.multiply"(%93, %arg9, %94) <{operandSegmentSizes = array, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc2138) + %95 = "ttir.multiply"(%93, %arg9, %94) <{operandSegmentSizes = array}> : (tensor<1x32x12x12xf32>, tensor<1xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc2138) %96 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc2139) - %97 = "ttir.add"(%95, %arg10, %96) <{operandSegmentSizes = array, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1x1x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc2139) + %97 = "ttir.add"(%95, %arg10, %96) <{operandSegmentSizes = array}> : (tensor<1x32x12x12xf32>, tensor<1x1x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc2139) %98 = tensor.empty() : tensor<1x32x12x12xf32> loc(#loc2140) - %99 = "ttir.softmax"(%97, %98) <{dimension = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc2140) + %99 = "ttir.softmax"(%97, %98) <{dimension = -1 : si32}> : (tensor<1x32x12x12xf32>, tensor<1x32x12x12xf32>) -> tensor<1x32x12x12xf32> loc(#loc2140) %100 = tensor.empty() : tensor<32x12x12xf32> loc(#loc2141) - %101 = "ttir.squeeze"(%99, %100) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x12xf32>, tensor<32x12x12xf32>) -> tensor<32x12x12xf32> loc(#loc2141) + %101 = "ttir.squeeze"(%99, %100) <{dim = 0 : si32}> : (tensor<1x32x12x12xf32>, tensor<32x12x12xf32>) -> tensor<32x12x12xf32> loc(#loc2141) %102 = tensor.empty() : tensor<12x3200xf32> loc(#loc2142) - %103 = "ttir.matmul"(%17, %arg293, %102) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2142) + %103 = "ttir.matmul"(%17, %arg293, %102) : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2142) %104 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc2143) - %105 = "ttir.reshape"(%103, %104) <{operand_constraints = [#any_device, #any_device], shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc2143) + %105 = "ttir.reshape"(%103, %104) <{shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc2143) %106 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2144) - %107 = "ttir.transpose"(%105, %106) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2144) + %107 = "ttir.transpose"(%105, %106) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x12x32x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2144) %108 = tensor.empty() : tensor<1x32x100x12xf32> loc(#loc2145) - %109 = "ttir.transpose"(%107, %108) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2145) + %109 = "ttir.transpose"(%107, %108) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x32x12x100xf32>, tensor<1x32x100x12xf32>) -> tensor<1x32x100x12xf32> loc(#loc2145) %110 = tensor.empty() : tensor<32x100x12xf32> loc(#loc2146) - %111 = "ttir.squeeze"(%109, %110) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x100x12xf32>, tensor<32x100x12xf32>) -> tensor<32x100x12xf32> loc(#loc2146) + %111 = "ttir.squeeze"(%109, %110) <{dim = 0 : si32}> : (tensor<1x32x100x12xf32>, tensor<32x100x12xf32>) -> tensor<32x100x12xf32> loc(#loc2146) %112 = tensor.empty() : tensor<32x12x100xf32> loc(#loc2147) - %113 = "ttir.transpose"(%111, %112) <{dim0 = -2 : si32, dim1 = -1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x100x12xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc2147) + %113 = "ttir.transpose"(%111, %112) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<32x100x12xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc2147) %114 = tensor.empty() : tensor<32x12x100xf32> loc(#loc2148) - %115 = "ttir.matmul"(%101, %113, %114) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32x12x12xf32>, tensor<32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc2148) + %115 = "ttir.matmul"(%101, %113, %114) : (tensor<32x12x12xf32>, tensor<32x12x100xf32>, tensor<32x12x100xf32>) -> tensor<32x12x100xf32> loc(#loc2148) %116 = tensor.empty() : tensor<1x32x12x100xf32> loc(#loc2149) - %117 = "ttir.unsqueeze"(%115, %116) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2149) + %117 = "ttir.unsqueeze"(%115, %116) <{dim = 0 : si32}> : (tensor<32x12x100xf32>, tensor<1x32x12x100xf32>) -> tensor<1x32x12x100xf32> loc(#loc2149) %118 = tensor.empty() : tensor<1x12x32x100xf32> loc(#loc2150) - %119 = "ttir.transpose"(%117, %118) <{dim0 = -3 : si32, dim1 = -2 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x32x12x100xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc2150) + %119 = "ttir.transpose"(%117, %118) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x32x12x100xf32>, tensor<1x12x32x100xf32>) -> tensor<1x12x32x100xf32> loc(#loc2150) %120 = tensor.empty() : tensor<12x3200xf32> loc(#loc2151) - %121 = "ttir.reshape"(%119, %120) <{operand_constraints = [#any_device, #any_device], shape = [12 : i32, 3200 : i32]}> : (tensor<1x12x32x100xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2151) + %121 = "ttir.reshape"(%119, %120) <{shape = [12 : i32, 3200 : i32]}> : (tensor<1x12x32x100xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2151) %122 = tensor.empty() : tensor<12x3200xf32> loc(#loc2152) - %123 = "ttir.matmul"(%121, %arg294, %122) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2152) + %123 = "ttir.matmul"(%121, %arg294, %122) : (tensor<12x3200xf32>, tensor<3200x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2152) %124 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2153) - %125 = "ttir.unsqueeze"(%123, %124) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2153) + %125 = "ttir.unsqueeze"(%123, %124) <{dim = 0 : si32}> : (tensor<12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2153) %126 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2154) - %127 = "ttir.add"(%1, %125, %126) <{operandSegmentSizes = array, operand_constraints = [#any_device, #any_device, #any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2154) + %127 = "ttir.add"(%1, %125, %126) <{operandSegmentSizes = array}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2154) %128 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2155) - %129 = "ttir.multiply"(%127, %127, %128) <{operandSegmentSizes = array, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2155) + %129 = "ttir.multiply"(%127, %127, %128) <{operandSegmentSizes = array}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2155) %130 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2156) - %131 = "ttir.mean"(%129, %130) <{dim_arg = [-1 : i32], keep_dim = true, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2156) + %131 = "ttir.mean"(%129, %130) <{dim_arg = [-1 : i32], keep_dim = true}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2156) %132 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2157) - %133 = "ttir.add"(%131, %arg11, %132) <{operandSegmentSizes = array, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2157) + %133 = "ttir.add"(%131, %arg11, %132) <{operandSegmentSizes = array}> : (tensor<1x12x1xf32>, tensor<1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2157) %134 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2158) - %135 = "ttir.sqrt"(%133, %134) <{operandSegmentSizes = array, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2158) + %135 = "ttir.sqrt"(%133, %134) <{operandSegmentSizes = array}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2158) %136 = tensor.empty() : tensor<1x12x1xf32> loc(#loc2159) - %137 = "ttir.reciprocal"(%135, %136) <{operandSegmentSizes = array, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2159) + %137 = "ttir.reciprocal"(%135, %136) <{operandSegmentSizes = array}> : (tensor<1x12x1xf32>, tensor<1x12x1xf32>) -> tensor<1x12x1xf32> loc(#loc2159) %138 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2160) - %139 = "ttir.multiply"(%127, %137, %138) <{operandSegmentSizes = array, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2160) + %139 = "ttir.multiply"(%127, %137, %138) <{operandSegmentSizes = array}> : (tensor<1x12x3200xf32>, tensor<1x12x1xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2160) %140 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2161) - %141 = "ttir.multiply"(%arg295, %139, %140) <{operandSegmentSizes = array, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2161) + %141 = "ttir.multiply"(%arg295, %139, %140) <{operandSegmentSizes = array}> : (tensor<3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2161) %142 = tensor.empty() : tensor<12x3200xf32> loc(#loc2162) - %143 = "ttir.squeeze"(%141, %142) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2162) + %143 = "ttir.squeeze"(%141, %142) <{dim = 0 : si32}> : (tensor<1x12x3200xf32>, tensor<12x3200xf32>) -> tensor<12x3200xf32> loc(#loc2162) %144 = tensor.empty() : tensor<12x8640xf32> loc(#loc2163) - %145 = "ttir.matmul"(%143, %arg296, %144) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x8640xf32>, tensor<12x8640xf32>) -> tensor<12x8640xf32> loc(#loc2163) + %145 = "ttir.matmul"(%143, %arg296, %144) : (tensor<12x3200xf32>, tensor<3200x8640xf32>, tensor<12x8640xf32>) -> tensor<12x8640xf32> loc(#loc2163) %146 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc2164) - %147 = "ttir.unsqueeze"(%145, %146) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc2164) + %147 = "ttir.unsqueeze"(%145, %146) <{dim = 0 : si32}> : (tensor<12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc2164) %148 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc2165) - %149 = "ttir.sigmoid"(%147, %148) <{operandSegmentSizes = array, operand_constraints = [#any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc2165) + %149 = "ttir.sigmoid"(%147, %148) <{operandSegmentSizes = array}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc2165) %150 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc2166) - %151 = "ttir.multiply"(%147, %149, %150) <{operandSegmentSizes = array, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc2166) + %151 = "ttir.multiply"(%147, %149, %150) <{operandSegmentSizes = array}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc2166) %152 = tensor.empty() : tensor<12x8640xf32> loc(#loc2167) - %153 = "ttir.matmul"(%143, %arg297, %152) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<12x3200xf32>, tensor<3200x8640xf32>, tensor<12x8640xf32>) -> tensor<12x8640xf32> loc(#loc2167) + %153 = "ttir.matmul"(%143, %arg297, %152) : (tensor<12x3200xf32>, tensor<3200x8640xf32>, tensor<12x8640xf32>) -> tensor<12x8640xf32> loc(#loc2167) %154 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc2168) - %155 = "ttir.unsqueeze"(%153, %154) <{dim = 0 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc2168) + %155 = "ttir.unsqueeze"(%153, %154) <{dim = 0 : si32}> : (tensor<12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc2168) %156 = tensor.empty() : tensor<1x12x8640xf32> loc(#loc2169) - %157 = "ttir.multiply"(%151, %155, %156) <{operandSegmentSizes = array, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc2169) + %157 = "ttir.multiply"(%151, %155, %156) <{operandSegmentSizes = array}> : (tensor<1x12x8640xf32>, tensor<1x12x8640xf32>, tensor<1x12x8640xf32>) -> tensor<1x12x8640xf32> loc(#loc2169) %158 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2170) - %159 = "ttir.matmul"(%157, %arg298, %158) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x12x8640xf32>, tensor<8640x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2170) + %159 = "ttir.matmul"(%157, %arg298, %158) : (tensor<1x12x8640xf32>, tensor<8640x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2170) %160 = tensor.empty() : tensor<1x12x3200xf32> loc(#loc2171) - %161 = "ttir.add"(%127, %159, %160) <{operandSegmentSizes = array, operand_constraints = [#any_device, #any_device, #any_device, #any_device, #any_device, #any_device]}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2171) + %161 = "ttir.add"(%127, %159, %160) <{operandSegmentSizes = array}> : (tensor<1x12x3200xf32>, tensor<1x12x3200xf32>, tensor<1x12x3200xf32>) -> tensor<1x12x3200xf32> loc(#loc2171) return %161 : tensor<1x12x3200xf32> loc(#loc2090) } loc(#loc) } loc(#loc)