diff --git a/.gitignore b/.gitignore index 8663a2ff0e..30c0a5f5eb 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,4 @@ ttrt-artifacts/* query_results.json run_results.json ttrt_report.xml +cluster_description.yaml \ No newline at end of file diff --git a/cluster_descriptor.yaml b/cluster_descriptor.yaml new file mode 100644 index 0000000000..325a5cdb91 --- /dev/null +++ b/cluster_descriptor.yaml @@ -0,0 +1,24 @@ +arch: { + 0: Wormhole, +} + +chips: { + 0: [0,0,0,0], +} + +ethernet_connections: [ +] + +chips_with_mmio: [ + 0: 1, +] + +# harvest_mask is the bit indicating which tensix row is harvested. So bit 0 = first tensix row; bit 1 = second tensix row etc... +harvesting: { + 0: {noc_translation: true, harvest_mask: 1}, +} + +# This value will be null if the boardtype is unknown, should never happen in practice but to be defensive it would be useful to throw an error on this case. +boardtype: { + 0: n150, +} \ No newline at end of file diff --git a/lib/Dialect/TTNN/Analysis/L1InterleavedPolicy.cpp b/lib/Dialect/TTNN/Analysis/L1InterleavedPolicy.cpp index 7f00f5024a..0a70381198 100644 --- a/lib/Dialect/TTNN/Analysis/L1InterleavedPolicy.cpp +++ b/lib/Dialect/TTNN/Analysis/L1InterleavedPolicy.cpp @@ -93,6 +93,7 @@ L1InterleavedPolicy::getOptimalOpConfig( llvm::DenseMap optimalConfig; optimalL1Usage = 0; + optimalConfigMask = 0; n = opsL1Usage.size(); for (i = 0; i < (1 << n); i++) { @@ -108,10 +109,12 @@ L1InterleavedPolicy::getOptimalOpConfig( j <<= 1; } - // Check if the current configuration is optimal. + // Figure out this const based on exec data, but will be replaced + // with API. // + constexpr float tensorL1UsageCap = 0.75; if (optimalL1Usage < currentL1Usage && - currentL1Usage <= usableL1CacheSize) { + currentL1Usage <= tensorL1UsageCap * usableL1CacheSize) { optimalL1Usage = currentL1Usage; optimalConfigMask = i; } diff --git a/out.mlir b/out.mlir deleted file mode 100644 index d1165776f1..0000000000 --- a/out.mlir +++ /dev/null @@ -1,124 +0,0 @@ -#device = #tt.device (0, d0, d1)>, l1Map = (d0, d1)[s0, s1] -> (0, d0 floordiv s0, d1 floordiv s1, (d0 mod s0) * s1 + d1 mod s1), dramMap = (d0, d1)[s0, s1] -> (0, 0, ((((d0 floordiv s0) * 8 + d1 floordiv s1) * (s1 * s0) + (d0 mod s0) * s1 + d1 mod s1) floordiv 8192) mod 12, (((d0 floordiv s0) * 8 + d1 floordiv s1) * (s1 * s0) + (d0 mod s0) * s1 + d1 mod s1) floordiv 98304 + (((d0 floordiv s0) * 8 + d1 floordiv s1) * (s1 * s0) + (d0 mod s0) * s1 + d1 mod s1) mod 8192), meshShape = , chipIds = [0]> -#dram = #tt.memory_space -#l1_ = #tt.memory_space -#system = #tt.memory_space -#system_desc = #tt.system_desc<[{arch = , grid = 8x8, l1_size = 1499136, num_dram_channels = 12, dram_channel_size = 1073741824, noc_l1_address_align_bytes = 16, pcie_address_align_bytes = 32, noc_dram_address_align_bytes = 32, l1_unreserved_base = 1024, erisc_l1_unreserved_base = 1024, dram_unreserved_base = 1024, dram_unreserved_end = 1073741824, physical_cores = {worker = [ 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 1x0, 1x1, 1x2, 1x3, 1x4, 1x5, 1x6, 1x7, 2x0, 2x1, 2x2, 2x3, 2x4, 2x5, 2x6, 2x7, 3x0, 3x1, 3x2, 3x3, 3x4, 3x5, 3x6, 3x7, 4x0, 4x1, 4x2, 4x3, 4x4, 4x5, 4x6, 4x7, 5x0, 5x1, 5x2, 5x3, 5x4, 5x5, 5x6, 5x7, 6x0, 6x1, 6x2, 6x3, 6x4, 6x5, 6x6, 6x7, 7x0, 7x1, 7x2, 7x3, 7x4, 7x5, 7x6, 7x7] dram = [ 8x0, 9x0, 10x0, 8x1, 9x1, 10x1, 8x2, 9x2, 10x2, 8x3, 9x3, 10x3]}, supported_data_types = [, , , , , , , , , , , ], supported_tile_sizes = [ 4x16, 16x16, 32x16, 4x32, 16x32, 32x32], num_cbs = 32}], [0], [3 : i32], [ 0x0x0x0]> -#layout = #tt.layout<(d0, d1, d2) -> (d0 * 12 + d1, d2), undef, <1x1>, memref<12x3200xf32, #system>> -#layout1 = #tt.layout<(d0, d1, d2, d3) -> (d0 * 12 + d1 * 12 + d2, d3), undef, <1x1>, memref<12x12xf32, #system>> -#layout2 = #tt.layout<(d0) -> (0, d0), undef, <1x1>, memref<1x1xf32, #system>> -#layout3 = #tt.layout<(d0, d1) -> (d0, d1), undef, <1x1>, memref<3200x3200xf32, #system>> -#layout4 = #tt.layout<(d0, d1, d2) -> (d0 * 12 + d1, d2), undef, <1x1>, memref<1x100x!tt.tile<32x32, f32>, #dram>, interleaved> -#layout5 = #tt.layout<(d0, d1) -> (d0, d1), undef, <8x8>, memref<2x400xf32, #dram>, interleaved> -#layout6 = #tt.layout<(d0, d1) -> (d0, d1), undef, <1x1>, memref<100x100x!tt.tile<32x32, f32>, #dram>, interleaved> -#layout7 = #tt.layout<(d0, d1) -> (d0, d1), undef, <8x8>, memref<2x400xf32, #l1_>, interleaved> -#layout8 = #tt.layout<(d0, d1, d2, d3) -> (d0 * 384 + d1 * 32 + d2, d3), undef, <8x8>, memref<48x13xf32, #l1_>, interleaved> -#layout9 = #tt.layout<(d0, d1, d2, d3) -> (d0 * 384 + d1 * 12 + d2, d3), undef, <8x8>, memref<48x13xf32, #l1_>, interleaved> -#layout10 = #tt.layout<(d0, d1, d2) -> (d0 * 12 + d1, d2), undef, <8x8>, memref<48x13xf32, #l1_>, interleaved> -#layout11 = #tt.layout<(d0, d1, d2) -> (d0 * 100 + d1, d2), undef, <8x8>, memref<400x2xf32, #l1_>, interleaved> -#layout12 = #tt.layout<(d0, d1, d2) -> (d0 * 12 + d1, d2), undef, <8x8>, memref<48x2xf32, #l1_>, interleaved> -#layout13 = #tt.layout<(d0, d1, d2, d3) -> (d0 * 384 + d1 * 12 + d2, d3), undef, <8x8>, memref<48x2xf32, #l1_>, interleaved> -#layout14 = #tt.layout<(d0) -> (0, d0), undef, <1x1>, memref<1x1x!tt.tile<32x32, f32>, #dram>, interleaved> -#layout15 = #tt.layout<(d0, d1, d2, d3) -> (d0 * 12 + d1 * 12 + d2, d3), undef, <1x1>, memref<1x1x!tt.tile<32x32, f32>, #dram>, interleaved> -#layout16 = #tt.layout<(d0, d1, d2, d3) -> (d0 * 3200 + d1 * 100 + d2, d3), undef, <8x8>, memref<400x2xf32, #l1_>, interleaved> -#layout17 = #tt.layout<(d0, d1, d2) -> (d0 * 12 + d1, d2), undef, <8x8>, memref<2x400xf32, #l1_>, interleaved> -module @SelfAttention attributes {tt.device = #device, tt.system_desc = #system_desc} { - func.func @forward(%arg0: tensor<1x12x3200xf32, #layout> {ttir.name = "hidden_states_1"}, %arg1: tensor<1x1x12x12xf32, #layout1> {ttir.name = "attention_mask"}, %arg2: tensor<1xf32, #layout2> {ttir.name = "input_1_multiply_20"}, %arg3: tensor<3200x3200xf32, #layout3> {ttir.name = "model.q_proj.weight"}, %arg4: tensor<3200x3200xf32, #layout3> {ttir.name = "model.k_proj.weight"}, %arg5: tensor<3200x3200xf32, #layout3> {ttir.name = "model.v_proj.weight"}, %arg6: tensor<3200x3200xf32, #layout3> {ttir.name = "model.o_proj.weight"}) -> (tensor<1x12x3200xf32, #layout> {ttir.name = "SelfAttention.output_reshape_38"}) { - %0 = "ttnn.get_device"() <{mesh_shape = #ttnn}> : () -> !tt.device<#device> - %1 = "ttnn.to_layout"(%arg0) <{layout = #ttnn.layout}> : (tensor<1x12x3200xf32, #layout>) -> tensor<1x12x3200xf32, #layout4> - %2 = "ttnn.to_device"(%1, %0) <{memory_config = #ttnn.memory_config<, , <<1x100>>>}> : (tensor<1x12x3200xf32, #layout4>, !tt.device<#device>) -> tensor<1x12x3200xf32, #layout4> - "ttnn.dealloc"(%1) : (tensor<1x12x3200xf32, #layout4>) -> () - %3 = "ttnn.reshape"(%2) <{shape = [12 : i32, 3200 : i32]}> : (tensor<1x12x3200xf32, #layout4>) -> tensor<12x3200xf32, #layout5> - "ttnn.dealloc"(%2) : (tensor<1x12x3200xf32, #layout4>) -> () - %4 = "ttnn.to_layout"(%arg3) <{layout = #ttnn.layout}> : (tensor<3200x3200xf32, #layout3>) -> tensor<3200x3200xf32, #layout6> - %5 = "ttnn.to_device"(%4, %0) <{memory_config = #ttnn.memory_config<, , <<100x100>>>}> : (tensor<3200x3200xf32, #layout6>, !tt.device<#device>) -> tensor<3200x3200xf32, #layout6> - "ttnn.dealloc"(%4) : (tensor<3200x3200xf32, #layout6>) -> () - %6 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<2x400>>>, shape = #ttnn.shape<12x3200>}> : (!tt.device<#device>) -> tensor<12x3200xf32, #layout7> - %7 = "ttnn.matmul"(%3, %5, %6) : (tensor<12x3200xf32, #layout5>, tensor<3200x3200xf32, #layout6>, tensor<12x3200xf32, #layout7>) -> tensor<12x3200xf32, #layout7> - "ttnn.dealloc"(%5) : (tensor<3200x3200xf32, #layout6>) -> () - %8 = "ttnn.reshape"(%7) <{shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32, #layout7>) -> tensor<1x12x32x100xf32, #layout8> - "ttnn.dealloc"(%6) : (tensor<12x3200xf32, #layout7>) -> () - %9 = "ttnn.transpose"(%8) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x12x32x100xf32, #layout8>) -> tensor<1x32x12x100xf32, #layout9> - "ttnn.dealloc"(%8) : (tensor<1x12x32x100xf32, #layout8>) -> () - %10 = "ttnn.reshape"(%9) <{shape = [32 : i32, 12 : i32, 100 : i32]}> : (tensor<1x32x12x100xf32, #layout9>) -> tensor<32x12x100xf32, #layout10> - "ttnn.dealloc"(%9) : (tensor<1x32x12x100xf32, #layout9>) -> () - %11 = "ttnn.to_layout"(%arg4) <{layout = #ttnn.layout}> : (tensor<3200x3200xf32, #layout3>) -> tensor<3200x3200xf32, #layout6> - %12 = "ttnn.to_device"(%11, %0) <{memory_config = #ttnn.memory_config<, , <<100x100>>>}> : (tensor<3200x3200xf32, #layout6>, !tt.device<#device>) -> tensor<3200x3200xf32, #layout6> - "ttnn.dealloc"(%11) : (tensor<3200x3200xf32, #layout6>) -> () - %13 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<2x400>>>, shape = #ttnn.shape<12x3200>}> : (!tt.device<#device>) -> tensor<12x3200xf32, #layout7> - %14 = "ttnn.matmul"(%3, %12, %13) : (tensor<12x3200xf32, #layout5>, tensor<3200x3200xf32, #layout6>, tensor<12x3200xf32, #layout7>) -> tensor<12x3200xf32, #layout7> - "ttnn.dealloc"(%12) : (tensor<3200x3200xf32, #layout6>) -> () - %15 = "ttnn.reshape"(%14) <{shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32, #layout7>) -> tensor<1x12x32x100xf32, #layout8> - "ttnn.dealloc"(%13) : (tensor<12x3200xf32, #layout7>) -> () - %16 = "ttnn.transpose"(%15) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x12x32x100xf32, #layout8>) -> tensor<1x32x12x100xf32, #layout9> - "ttnn.dealloc"(%15) : (tensor<1x12x32x100xf32, #layout8>) -> () - %17 = "ttnn.reshape"(%16) <{shape = [32 : i32, 12 : i32, 100 : i32]}> : (tensor<1x32x12x100xf32, #layout9>) -> tensor<32x12x100xf32, #layout10> - "ttnn.dealloc"(%16) : (tensor<1x32x12x100xf32, #layout9>) -> () - %18 = "ttnn.transpose"(%17) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<32x12x100xf32, #layout10>) -> tensor<32x100x12xf32, #layout11> - "ttnn.dealloc"(%17) : (tensor<32x12x100xf32, #layout10>) -> () - %19 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<48x2>>>, shape = #ttnn.shape<32x12x12>}> : (!tt.device<#device>) -> tensor<32x12x12xf32, #layout12> - %20 = "ttnn.matmul"(%10, %18, %19) : (tensor<32x12x100xf32, #layout10>, tensor<32x100x12xf32, #layout11>, tensor<32x12x12xf32, #layout12>) -> tensor<32x12x12xf32, #layout12> - "ttnn.dealloc"(%18) : (tensor<32x100x12xf32, #layout11>) -> () - "ttnn.dealloc"(%10) : (tensor<32x12x100xf32, #layout10>) -> () - %21 = "ttnn.reshape"(%20) <{shape = [1 : i32, 32 : i32, 12 : i32, 12 : i32]}> : (tensor<32x12x12xf32, #layout12>) -> tensor<1x32x12x12xf32, #layout13> - "ttnn.dealloc"(%19) : (tensor<32x12x12xf32, #layout12>) -> () - %22 = "ttnn.to_layout"(%arg2) <{layout = #ttnn.layout}> : (tensor<1xf32, #layout2>) -> tensor<1xf32, #layout14> - %23 = "ttnn.to_device"(%22, %0) <{memory_config = #ttnn.memory_config<, , <<1x1>>>}> : (tensor<1xf32, #layout14>, !tt.device<#device>) -> tensor<1xf32, #layout14> - "ttnn.dealloc"(%22) : (tensor<1xf32, #layout14>) -> () - %24 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<48x2>>>, shape = #ttnn.shape<1x32x12x12>}> : (!tt.device<#device>) -> tensor<1x32x12x12xf32, #layout13> - %25 = "ttnn.multiply"(%21, %23, %24) <{operandSegmentSizes = array}> : (tensor<1x32x12x12xf32, #layout13>, tensor<1xf32, #layout14>, tensor<1x32x12x12xf32, #layout13>) -> tensor<1x32x12x12xf32, #layout13> - "ttnn.dealloc"(%23) : (tensor<1xf32, #layout14>) -> () - "ttnn.dealloc"(%21) : (tensor<1x32x12x12xf32, #layout13>) -> () - %26 = "ttnn.to_layout"(%arg1) <{layout = #ttnn.layout}> : (tensor<1x1x12x12xf32, #layout1>) -> tensor<1x1x12x12xf32, #layout15> - %27 = "ttnn.to_device"(%26, %0) <{memory_config = #ttnn.memory_config<, , <<1x1>>>}> : (tensor<1x1x12x12xf32, #layout15>, !tt.device<#device>) -> tensor<1x1x12x12xf32, #layout15> - "ttnn.dealloc"(%26) : (tensor<1x1x12x12xf32, #layout15>) -> () - %28 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<48x2>>>, shape = #ttnn.shape<1x32x12x12>}> : (!tt.device<#device>) -> tensor<1x32x12x12xf32, #layout13> - %29 = "ttnn.add"(%25, %27, %28) <{operandSegmentSizes = array}> : (tensor<1x32x12x12xf32, #layout13>, tensor<1x1x12x12xf32, #layout15>, tensor<1x32x12x12xf32, #layout13>) -> tensor<1x32x12x12xf32, #layout13> - "ttnn.dealloc"(%27) : (tensor<1x1x12x12xf32, #layout15>) -> () - "ttnn.dealloc"(%24) : (tensor<1x32x12x12xf32, #layout13>) -> () - %30 = "ttnn.softmax"(%29) <{dimension = -1 : si32}> : (tensor<1x32x12x12xf32, #layout13>) -> tensor<1x32x12x12xf32, #layout13> - "ttnn.dealloc"(%28) : (tensor<1x32x12x12xf32, #layout13>) -> () - %31 = "ttnn.reshape"(%30) <{shape = [32 : i32, 12 : i32, 12 : i32]}> : (tensor<1x32x12x12xf32, #layout13>) -> tensor<32x12x12xf32, #layout12> - "ttnn.dealloc"(%30) : (tensor<1x32x12x12xf32, #layout13>) -> () - %32 = "ttnn.to_layout"(%arg5) <{layout = #ttnn.layout}> : (tensor<3200x3200xf32, #layout3>) -> tensor<3200x3200xf32, #layout6> - %33 = "ttnn.to_device"(%32, %0) <{memory_config = #ttnn.memory_config<, , <<100x100>>>}> : (tensor<3200x3200xf32, #layout6>, !tt.device<#device>) -> tensor<3200x3200xf32, #layout6> - "ttnn.dealloc"(%32) : (tensor<3200x3200xf32, #layout6>) -> () - %34 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<2x400>>>, shape = #ttnn.shape<12x3200>}> : (!tt.device<#device>) -> tensor<12x3200xf32, #layout7> - %35 = "ttnn.matmul"(%3, %33, %34) : (tensor<12x3200xf32, #layout5>, tensor<3200x3200xf32, #layout6>, tensor<12x3200xf32, #layout7>) -> tensor<12x3200xf32, #layout7> - "ttnn.dealloc"(%33) : (tensor<3200x3200xf32, #layout6>) -> () - "ttnn.dealloc"(%3) : (tensor<12x3200xf32, #layout5>) -> () - %36 = "ttnn.reshape"(%35) <{shape = [1 : i32, 12 : i32, 32 : i32, 100 : i32]}> : (tensor<12x3200xf32, #layout7>) -> tensor<1x12x32x100xf32, #layout8> - "ttnn.dealloc"(%34) : (tensor<12x3200xf32, #layout7>) -> () - %37 = "ttnn.transpose"(%36) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x12x32x100xf32, #layout8>) -> tensor<1x32x12x100xf32, #layout9> - "ttnn.dealloc"(%36) : (tensor<1x12x32x100xf32, #layout8>) -> () - %38 = "ttnn.transpose"(%37) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<1x32x12x100xf32, #layout9>) -> tensor<1x32x100x12xf32, #layout16> - "ttnn.dealloc"(%37) : (tensor<1x32x12x100xf32, #layout9>) -> () - %39 = "ttnn.reshape"(%38) <{shape = [32 : i32, 100 : i32, 12 : i32]}> : (tensor<1x32x100x12xf32, #layout16>) -> tensor<32x100x12xf32, #layout11> - "ttnn.dealloc"(%38) : (tensor<1x32x100x12xf32, #layout16>) -> () - %40 = "ttnn.transpose"(%39) <{dim0 = -2 : si32, dim1 = -1 : si32}> : (tensor<32x100x12xf32, #layout11>) -> tensor<32x12x100xf32, #layout10> - "ttnn.dealloc"(%39) : (tensor<32x100x12xf32, #layout11>) -> () - %41 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<48x13>>>, shape = #ttnn.shape<32x12x100>}> : (!tt.device<#device>) -> tensor<32x12x100xf32, #layout10> - %42 = "ttnn.matmul"(%31, %40, %41) : (tensor<32x12x12xf32, #layout12>, tensor<32x12x100xf32, #layout10>, tensor<32x12x100xf32, #layout10>) -> tensor<32x12x100xf32, #layout10> - "ttnn.dealloc"(%40) : (tensor<32x12x100xf32, #layout10>) -> () - "ttnn.dealloc"(%31) : (tensor<32x12x12xf32, #layout12>) -> () - %43 = "ttnn.reshape"(%42) <{shape = [1 : i32, 32 : i32, 12 : i32, 100 : i32]}> : (tensor<32x12x100xf32, #layout10>) -> tensor<1x32x12x100xf32, #layout9> - "ttnn.dealloc"(%41) : (tensor<32x12x100xf32, #layout10>) -> () - %44 = "ttnn.transpose"(%43) <{dim0 = -3 : si32, dim1 = -2 : si32}> : (tensor<1x32x12x100xf32, #layout9>) -> tensor<1x12x32x100xf32, #layout8> - "ttnn.dealloc"(%43) : (tensor<1x32x12x100xf32, #layout9>) -> () - %45 = "ttnn.reshape"(%44) <{shape = [12 : i32, 3200 : i32]}> : (tensor<1x12x32x100xf32, #layout8>) -> tensor<12x3200xf32, #layout7> - "ttnn.dealloc"(%44) : (tensor<1x12x32x100xf32, #layout8>) -> () - %46 = "ttnn.to_layout"(%arg6) <{layout = #ttnn.layout}> : (tensor<3200x3200xf32, #layout3>) -> tensor<3200x3200xf32, #layout6> - %47 = "ttnn.to_device"(%46, %0) <{memory_config = #ttnn.memory_config<, , <<100x100>>>}> : (tensor<3200x3200xf32, #layout6>, !tt.device<#device>) -> tensor<3200x3200xf32, #layout6> - "ttnn.dealloc"(%46) : (tensor<3200x3200xf32, #layout6>) -> () - %48 = "ttnn.empty"(%0) <{dtype = #tt.supportedDataTypes, layout = #ttnn.layout, memory_config = #ttnn.memory_config<, , <<2x400>>>, shape = #ttnn.shape<12x3200>}> : (!tt.device<#device>) -> tensor<12x3200xf32, #layout7> - %49 = "ttnn.matmul"(%45, %47, %48) : (tensor<12x3200xf32, #layout7>, tensor<3200x3200xf32, #layout6>, tensor<12x3200xf32, #layout7>) -> tensor<12x3200xf32, #layout7> - "ttnn.dealloc"(%47) : (tensor<3200x3200xf32, #layout6>) -> () - "ttnn.dealloc"(%45) : (tensor<12x3200xf32, #layout7>) -> () - %50 = "ttnn.reshape"(%49) <{shape = [1 : i32, 12 : i32, 3200 : i32]}> : (tensor<12x3200xf32, #layout7>) -> tensor<1x12x3200xf32, #layout17> - "ttnn.dealloc"(%48) : (tensor<12x3200xf32, #layout7>) -> () - %51 = "ttnn.from_device"(%50) : (tensor<1x12x3200xf32, #layout17>) -> tensor<1x12x3200xf32, #layout> - "ttnn.dealloc"(%50) : (tensor<1x12x3200xf32, #layout17>) -> () - %52 = "ttnn.to_layout"(%51) <{layout = #ttnn.layout}> : (tensor<1x12x3200xf32, #layout>) -> tensor<1x12x3200xf32, #layout> - "ttnn.dealloc"(%51) : (tensor<1x12x3200xf32, #layout>) -> () - return %52 : tensor<1x12x3200xf32, #layout> - } -} diff --git a/test/ttmlir/Silicon/TTNN/l1_interleaved_policy/large_tensors.mlir b/test/ttmlir/Dialect/TTNN/l1_interleaved_policy/unittests/dram_ABC_l1_None.mlir similarity index 51% rename from test/ttmlir/Silicon/TTNN/l1_interleaved_policy/large_tensors.mlir rename to test/ttmlir/Dialect/TTNN/l1_interleaved_policy/unittests/dram_ABC_l1_None.mlir index b258435db9..408d3eda3b 100644 --- a/test/ttmlir/Silicon/TTNN/l1_interleaved_policy/large_tensors.mlir +++ b/test/ttmlir/Dialect/TTNN/l1_interleaved_policy/unittests/dram_ABC_l1_None.mlir @@ -1,19 +1,30 @@ // RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="system-desc-path=%system_desc_path% enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=L1Interleaved" %s > %t.mlir // RUN: FileCheck %s --input-file=%t.mlir // RUN: ttmlir-translate --ttnn-to-flatbuffer %t.mlir > %t.ttnn +// +// A B +// \ / +// C +// | +// D +// +// (A > L1) AND (B > L1) AND (C > L1) +// => +// DRAM: ABC; L1: None +// #any_device = #tt.operand_constraint module attributes {} { - func.func @forward(%arg0: tensor<8192x8192xbf16>, %arg1: tensor<8192x8192xbf16>, %arg2: tensor<8192x8192xbf16>) -> tensor<8192x8192xbf16> { - // CHECK: #[[LAYOUT_2:layout2]] = #tt.layout<{{.*}}, memref<{{.*}}, #dram>, {{.*}}> + func.func @forward(%arg0: tensor<8192x8192xbf16>, %arg1: tensor<8192x8192xbf16>, %arg2: tensor<8192x8192xbf16>, %arg3: tensor<8192x8192xbf16>) -> tensor<8192x8192xbf16> { + // CHECK: #[[LAYOUT_2:layout2]] = #tt.layout<(d0, d1) -> (d0, d1), undef, <{{.*}}>, memref<{{.*}}, #dram>, interleaved> %0 = tensor.empty() : tensor<8192x8192xbf16> - // CHECK: %{{.*}} = "ttnn.add"{{.*}} -> tensor<8192x8192xbf16, #[[LAYOUT_2]]> + // CHECK-DAG: %{{.*}} = "ttnn.add"{{.*}} -> tensor<8192x8192xbf16, #[[LAYOUT_2]]> %1 = "ttir.add"(%arg0, %arg1, %0) <{operandSegmentSizes = array, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<8192x8192xbf16>, tensor<8192x8192xbf16>, tensor<8192x8192xbf16>) -> tensor<8192x8192xbf16> %2 = tensor.empty() : tensor<8192x8192xbf16> - // CHECK: %{{.*}} = "ttnn.add"{{.*}} -> tensor<8192x8192xbf16, #[[LAYOUT_2]]> - %3 = "ttir.add"(%1, %arg2, %2) <{operandSegmentSizes = array, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<8192x8192xbf16>, tensor<8192x8192xbf16>, tensor<8192x8192xbf16>) -> tensor<8192x8192xbf16> + // CHECK-DAG: %{{.*}} = "ttnn.add"{{.*}} -> tensor<8192x8192xbf16, #[[LAYOUT_2]]> + %3 = "ttir.add"(%arg2, %arg3, %2) <{operandSegmentSizes = array, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<8192x8192xbf16>, tensor<8192x8192xbf16>, tensor<8192x8192xbf16>) -> tensor<8192x8192xbf16> %4 = tensor.empty() : tensor<8192x8192xbf16> - // CHECK: %{{.*}} = "ttnn.relu"{{.*}} -> tensor<8192x8192xbf16, #[[LAYOUT_2]]> - %7 = "ttir.relu"(%3, %4) <{operandSegmentSizes = array, operand_constraints = [#any_device, #any_device]}> : (tensor<8192x8192xbf16>, tensor<8192x8192xbf16>) -> tensor<8192x8192xbf16> - return %7 : tensor<8192x8192xbf16> + // CHECK-DAG: %{{.*}} = "ttnn.matmul"{{.*}} -> tensor<8192x8192xbf16, #[[LAYOUT_2]]> + %5 = "ttir.matmul"(%1, %3, %4) <{operandSegmentSizes = array, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<8192x8192xbf16>, tensor<8192x8192xbf16>, tensor<8192x8192xbf16>) -> tensor<8192x8192xbf16> + return %5 : tensor<8192x8192xbf16> } -} +} \ No newline at end of file diff --git a/test/ttmlir/Dialect/TTNN/l1_interleaved_policy/unittests/dram_AB_l1_C.mlir b/test/ttmlir/Dialect/TTNN/l1_interleaved_policy/unittests/dram_AB_l1_C.mlir new file mode 100644 index 0000000000..1250078312 --- /dev/null +++ b/test/ttmlir/Dialect/TTNN/l1_interleaved_policy/unittests/dram_AB_l1_C.mlir @@ -0,0 +1,33 @@ +// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="system-desc-path=%system_desc_path% enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=L1Interleaved" %s > %t.mlir +// RUN: FileCheck %s --input-file=%t.mlir +// RUN: ttmlir-translate --ttnn-to-flatbuffer %t.mlir > %t.ttnn +// +// A B +// \ / +// C +// | +// D +// +// (A + C > L1) AND (B + C > L1) AND (A + B > L1) AND (A < C) AND (B < C) AND (C <= L1) +// => +// DRAM: AB; L1: C +// +#any_device = #tt.operand_constraint +module attributes {} { + func.func @forward(%arg0: tensor<5120x4096xbf16>, %arg1: tensor<5120x4096xbf16>, %arg2: tensor<4096x5120xbf16>, %arg3: tensor<4096x5120xbf16>) -> tensor<5120x5120xbf16> { + // CHECK: #[[L1_:.*]] = #tt.memory_space + // CHECK: #[[LAYOUT_4:layout4]] = #tt.layout<(d0, d1) -> (d0, d1), undef, <{{.*}}>, memref<{{.*}}, #dram>, interleaved> + // CHECK: #[[LAYOUT_6:layout6]] = #tt.layout<(d0, d1) -> (d0, d1), undef, <{{.*}}>, memref<{{.*}}, #dram>, interleaved> + // CHECK: #[[LAYOUT_7:.*]] = #tt.layout<(d0, d1) -> (d0, d1), undef, <{{.*}}>, memref<{{.*}}, #l1_>, interleaved> + %0 = tensor.empty() : tensor<5120x4096xbf16> + // CHECK-DAG: %{{.*}} = "ttnn.add"{{.*}} -> tensor<5120x4096xbf16, #[[LAYOUT_4]]> + %1 = "ttir.add"(%arg0, %arg1, %0) <{operandSegmentSizes = array, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<5120x4096xbf16>, tensor<5120x4096xbf16>, tensor<5120x4096xbf16>) -> tensor<5120x4096xbf16> + %2 = tensor.empty() : tensor<4096x5120xbf16> + // CHECK-DAG: %{{.*}} = "ttnn.add"{{.*}} -> tensor<4096x5120xbf16, #[[LAYOUT_6]]> + %3 = "ttir.add"(%arg2, %arg3, %2) <{operandSegmentSizes = array, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<4096x5120xbf16>, tensor<4096x5120xbf16>, tensor<4096x5120xbf16>) -> tensor<4096x5120xbf16> + %4 = tensor.empty() : tensor<5120x5120xbf16> + // CHECK-DAG: %{{.*}} = "ttnn.matmul"{{.*}} -> tensor<5120x5120xbf16, #[[LAYOUT_7]]> + %5 = "ttir.matmul"(%1, %3, %4) <{operandSegmentSizes = array, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<5120x4096xbf16>, tensor<4096x5120xbf16>, tensor<5120x5120xbf16>) -> tensor<5120x5120xbf16> + return %5 : tensor<5120x5120xbf16> + } +} \ No newline at end of file diff --git a/test/ttmlir/Dialect/TTNN/l1_interleaved_policy/unittests/dram_AC_l1_B.mlir b/test/ttmlir/Dialect/TTNN/l1_interleaved_policy/unittests/dram_AC_l1_B.mlir new file mode 100644 index 0000000000..3ce99f7cbb --- /dev/null +++ b/test/ttmlir/Dialect/TTNN/l1_interleaved_policy/unittests/dram_AC_l1_B.mlir @@ -0,0 +1,32 @@ +// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="system-desc-path=%system_desc_path% enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=L1Interleaved" %s > %t.mlir +// RUN: FileCheck %s --input-file=%t.mlir +// RUN: ttmlir-translate --ttnn-to-flatbuffer %t.mlir > %t.ttnn +// +// A B +// \ / +// C +// | +// D +// +// (A + C > L1) AND (B + C > L1) AND (A + B > L1) AND (A < B) AND (C < B) AND (B <= L1) +// => +// DRAM: AC; L1: B +// +#any_device = #tt.operand_constraint +module attributes {} { + func.func @forward(%arg0: tensor<4096x5120xbf16>, %arg1: tensor<4096x5120xbf16>, %arg2: tensor<5120x5120xbf16>, %arg3: tensor<5120x5120xbf16>) -> tensor<4096x5120xbf16> { + // CHECK: #[[L1_:.*]] = #tt.memory_space + // CHECK: #[[LAYOUT_3:layout3]] = #tt.layout<(d0, d1) -> (d0, d1), undef, <{{.*}}>, memref<{{.*}}, #dram>, interleaved> + // CHECK: #[[LAYOUT_5:.*]] = #tt.layout<(d0, d1) -> (d0, d1), undef, <{{.*}}>, memref<{{.*}}, #l1_>, interleaved> + %0 = tensor.empty() : tensor<4096x5120xbf16> + // CHECK-DAG: %{{.*}} = "ttnn.add"{{.*}} -> tensor<4096x5120xbf16, #[[LAYOUT_3]]> + %1 = "ttir.add"(%arg0, %arg1, %0) <{operandSegmentSizes = array, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<4096x5120xbf16>, tensor<4096x5120xbf16>, tensor<4096x5120xbf16>) -> tensor<4096x5120xbf16> + %2 = tensor.empty() : tensor<5120x5120xbf16> + // CHECK-DAG: %{{.*}} = "ttnn.add"{{.*}} -> tensor<5120x5120xbf16, #[[LAYOUT_5]]> + %3 = "ttir.add"(%arg2, %arg3, %2) <{operandSegmentSizes = array, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<5120x5120xbf16>, tensor<5120x5120xbf16>, tensor<5120x5120xbf16>) -> tensor<5120x5120xbf16> + %4 = tensor.empty() : tensor<4096x5120xbf16> + // CHECK-DAG: %{{.*}} = "ttnn.matmul"{{.*}} -> tensor<4096x5120xbf16, #[[LAYOUT_3]]> + %5 = "ttir.matmul"(%1, %3, %4) <{operandSegmentSizes = array, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<4096x5120xbf16>, tensor<5120x5120xbf16>, tensor<4096x5120xbf16>) -> tensor<4096x5120xbf16> + return %5 : tensor<4096x5120xbf16> + } +} \ No newline at end of file diff --git a/test/ttmlir/Dialect/TTNN/l1_interleaved_policy/unittests/dram_A_l1_BC.mlir b/test/ttmlir/Dialect/TTNN/l1_interleaved_policy/unittests/dram_A_l1_BC.mlir new file mode 100644 index 0000000000..578a15894f --- /dev/null +++ b/test/ttmlir/Dialect/TTNN/l1_interleaved_policy/unittests/dram_A_l1_BC.mlir @@ -0,0 +1,32 @@ +// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="system-desc-path=%system_desc_path% enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=L1Interleaved" %s > %t.mlir +// RUN: FileCheck %s --input-file=%t.mlir +// RUN: ttmlir-translate --ttnn-to-flatbuffer %t.mlir > %t.ttnn +// +// A B +// \ / +// C +// | +// D +// +// (A + B + C > L1) AND (A + C < B + C) AND (A + B < B + C) AND (B + C <= L1) +// => +// DRAM: A; L1: BC +// +#any_device = #tt.operand_constraint +module attributes {} { + func.func @forward(%arg0: tensor<2048x2048xbf16>, %arg1: tensor<2048x2048xbf16>, %arg2: tensor<2048x8192xbf16>, %arg3: tensor<2048x8192xbf16>) -> tensor<2048x8192xbf16> { + // CHECK: #[[L1_:.*]] = #tt.memory_space + // CHECK: #[[LAYOUT_3:layout3]] = #tt.layout<(d0, d1) -> (d0, d1), undef, <{{.*}}>, memref<{{.*}}, #dram>, interleaved> + // CHECK: #[[LAYOUT_5:.*]] = #tt.layout<(d0, d1) -> (d0, d1), undef, <{{.*}}>, memref<{{.*}}, #l1_>, interleaved> + %0 = tensor.empty() : tensor<2048x2048xbf16> + // CHECK-DAG: %{{.*}} = "ttnn.add"{{.*}} -> tensor<2048x2048xbf16, #[[LAYOUT_3]]> + %1 = "ttir.add"(%arg0, %arg1, %0) <{operandSegmentSizes = array, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<2048x2048xbf16>, tensor<2048x2048xbf16>, tensor<2048x2048xbf16>) -> tensor<2048x2048xbf16> + %2 = tensor.empty() : tensor<2048x8192xbf16> + // CHECK-DAG: %{{.*}} = "ttnn.add"{{.*}} -> tensor<2048x8192xbf16, #[[LAYOUT_5]]> + %3 = "ttir.add"(%arg2, %arg3, %2) <{operandSegmentSizes = array, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<2048x8192xbf16>, tensor<2048x8192xbf16>, tensor<2048x8192xbf16>) -> tensor<2048x8192xbf16> + %4 = tensor.empty() : tensor<2048x8192xbf16> + // CHECK-DAG: %{{.*}} = "ttnn.matmul"{{.*}} -> tensor<2048x8192xbf16, #[[LAYOUT_5]]> + %5 = "ttir.matmul"(%1, %3, %4) <{operandSegmentSizes = array, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<2048x2048xbf16>, tensor<2048x8192xbf16>, tensor<2048x8192xbf16>) -> tensor<2048x8192xbf16> + return %5 : tensor<2048x8192xbf16> + } +} \ No newline at end of file diff --git a/test/ttmlir/Dialect/TTNN/l1_interleaved_policy/unittests/dram_C_l1_AB.mlir b/test/ttmlir/Dialect/TTNN/l1_interleaved_policy/unittests/dram_C_l1_AB.mlir new file mode 100644 index 0000000000..282034ad4d --- /dev/null +++ b/test/ttmlir/Dialect/TTNN/l1_interleaved_policy/unittests/dram_C_l1_AB.mlir @@ -0,0 +1,33 @@ +// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="system-desc-path=%system_desc_path% enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=L1Interleaved" %s > %t.mlir +// RUN: FileCheck %s --input-file=%t.mlir +// RUN: ttmlir-translate --ttnn-to-flatbuffer %t.mlir > %t.ttnn +// +// A B +// \ / +// C +// | +// D +// +// (A + B + C > L1) AND (A + C < A + B) AND (B + C < A + B) AND (A + B <= L1) +// => +// DRAM: C; L1: AB +// +#any_device = #tt.operand_constraint +module attributes {} { + func.func @forward(%arg0: tensor<2048x8192xbf16>, %arg1: tensor<2048x8192xbf16>, %arg2: tensor<8192x2048xbf16>, %arg3: tensor<8192x2048xbf16>) -> tensor<2048x2048xbf16> { + // CHECK: #[[L1_:.*]] = #tt.memory_space + // CHECK: #[[LAYOUT_4:.*]] = #tt.layout<(d0, d1) -> (d0, d1), undef, <{{.*}}>, memref<{{.*}}, #l1_>, interleaved> + // CHECK: #[[LAYOUT_6:.*]] = #tt.layout<(d0, d1) -> (d0, d1), undef, <{{.*}}>, memref<{{.*}}, #l1_>, interleaved> + // CHECK: #[[LAYOUT_7:.*]] = #tt.layout<(d0, d1) -> (d0, d1), undef, <{{.*}}>, memref<{{.*}}, #dram>, interleaved> + %0 = tensor.empty() : tensor<2048x8192xbf16> + // CHECK: %{{.*}} = "ttnn.add"{{.*}} -> tensor<2048x8192xbf16, #[[LAYOUT_4]]> + %1 = "ttir.add"(%arg0, %arg1, %0) <{operandSegmentSizes = array, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<2048x8192xbf16>, tensor<2048x8192xbf16>, tensor<2048x8192xbf16>) -> tensor<2048x8192xbf16> + %2 = tensor.empty() : tensor<8192x2048xbf16> + // CHECK: %{{.*}} = "ttnn.add"{{.*}} -> tensor<8192x2048xbf16, #[[LAYOUT_6]]> + %3 = "ttir.add"(%arg2, %arg3, %2) <{operandSegmentSizes = array, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<8192x2048xbf16>, tensor<8192x2048xbf16>, tensor<8192x2048xbf16>) -> tensor<8192x2048xbf16> + %4 = tensor.empty() : tensor<2048x2048xbf16> + // CHECK: %{{.*}} = "ttnn.matmul"{{.*}} -> tensor<2048x2048xbf16, #[[LAYOUT_7]]> + %5 = "ttir.matmul"(%1, %3, %4) <{operandSegmentSizes = array, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<2048x8192xbf16>, tensor<8192x2048xbf16>, tensor<2048x2048xbf16>) -> tensor<2048x2048xbf16> + return %5 : tensor<2048x2048xbf16> + } +} diff --git a/test/ttmlir/Dialect/TTNN/l1_interleaved_policy/unittests/dram_None_l1_ABC.mlir b/test/ttmlir/Dialect/TTNN/l1_interleaved_policy/unittests/dram_None_l1_ABC.mlir new file mode 100644 index 0000000000..a932ed1e21 --- /dev/null +++ b/test/ttmlir/Dialect/TTNN/l1_interleaved_policy/unittests/dram_None_l1_ABC.mlir @@ -0,0 +1,31 @@ +// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="system-desc-path=%system_desc_path% enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=L1Interleaved" %s > %t.mlir +// RUN: FileCheck %s --input-file=%t.mlir +// RUN: ttmlir-translate --ttnn-to-flatbuffer %t.mlir > %t.ttnn +// +// A B +// \ / +// C +// | +// D +// +// (A + B + C <= L1) +// => +// DRAM: None; L1: ABC +// +#any_device = #tt.operand_constraint +module attributes {} { + func.func @forward(%arg0: tensor<32x32xbf16>, %arg1: tensor<32x32xbf16>, %arg2: tensor<32x32xbf16>, %arg3: tensor<32x32xbf16>) -> tensor<32x32xbf16> { + // CHECK: #[[L1_:.*]] = #tt.memory_space + // CHECK: #[[LAYOUT_2:.*]] = #tt.layout<(d0, d1) -> (d0, d1), undef, <{{.*}}>, memref<{{.*}}, #l1_>, interleaved> + %0 = tensor.empty() : tensor<32x32xbf16> + // CHECK: %{{.*}} = "ttnn.add"{{.*}} -> tensor<32x32xbf16, #[[LAYOUT_2]]> + %1 = "ttir.add"(%arg0, %arg1, %0) <{operandSegmentSizes = array, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32x32xbf16>, tensor<32x32xbf16>, tensor<32x32xbf16>) -> tensor<32x32xbf16> + %2 = tensor.empty() : tensor<32x32xbf16> + // CHECK: %{{.*}} = "ttnn.add"{{.*}} -> tensor<32x32xbf16, #[[LAYOUT_2]]> + %3 = "ttir.add"(%arg2, %arg3, %2) <{operandSegmentSizes = array, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32x32xbf16>, tensor<32x32xbf16>, tensor<32x32xbf16>) -> tensor<32x32xbf16> + %4 = tensor.empty() : tensor<32x32xbf16> + // CHECK: %{{.*}} = "ttnn.add"{{.*}} -> tensor<32x32xbf16, #[[LAYOUT_2]]> + %5 = "ttir.add"(%1, %3, %4) <{operandSegmentSizes = array, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32x32xbf16>, tensor<32x32xbf16>, tensor<32x32xbf16>) -> tensor<32x32xbf16> + return %5 : tensor<32x32xbf16> + } +} diff --git a/test/ttmlir/Silicon/TTNN/l1_interleaved_policy/simple_join.mlir b/test/ttmlir/Silicon/TTNN/l1_interleaved_policy/simple_join.mlir deleted file mode 100644 index 842580092e..0000000000 --- a/test/ttmlir/Silicon/TTNN/l1_interleaved_policy/simple_join.mlir +++ /dev/null @@ -1,23 +0,0 @@ -// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="system-desc-path=%system_desc_path% enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=L1Interleaved" %s > %t.mlir -// RUN: FileCheck %s --input-file=%t.mlir -// RUN: ttmlir-translate --ttnn-to-flatbuffer %t.mlir > %t.ttnn -#any_device = #tt.operand_constraint -module attributes {} { - func.func @forward(%arg0: tensor<64x128xbf16>, %arg1: tensor<64x128xbf16>, %arg2: tensor<64x128xbf16>, %arg3: tensor<64x128xbf16>) -> tensor<64x128xbf16> { - // CHECK: #[[L1_:.*]] = #tt.memory_space - // CHECK: #[[LAYOUT_2:.*]] = #tt.layout<(d0, d1) -> (d0, d1), undef, <{{.*}}>, memref<{{.*}}, #l1_>, interleaved> - %0 = tensor.empty() : tensor<64x128xbf16> - // CHECK-DAG: %{{.*}} = "ttnn.add"{{.*}} -> tensor<64x128xbf16, #[[LAYOUT_2]]> - %1 = "ttir.add"(%arg0, %arg1, %0) <{operandSegmentSizes = array, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<64x128xbf16>, tensor<64x128xbf16>, tensor<64x128xbf16>) -> tensor<64x128xbf16> - %2 = tensor.empty() : tensor<64x128xbf16> - // CHECK-DAG: %{{.*}} = "ttnn.add"{{.*}} -> tensor<64x128xbf16, #[[LAYOUT_2]]> - %3 = "ttir.add"(%arg2, %arg3, %2) <{operandSegmentSizes = array, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<64x128xbf16>, tensor<64x128xbf16>, tensor<64x128xbf16>) -> tensor<64x128xbf16> - %4 = tensor.empty() : tensor<64x128xbf16> - // CHECK-DAG: %{{.*}} = "ttnn.add"{{.*}} -> tensor<64x128xbf16, #[[LAYOUT_2]]> - %5 = "ttir.add"(%1, %3, %4) <{operandSegmentSizes = array, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<64x128xbf16>, tensor<64x128xbf16>, tensor<64x128xbf16>) -> tensor<64x128xbf16> - %6 = tensor.empty() : tensor<64x128xbf16> - // CHECK-DAG: %{{.*}} = "ttnn.relu"{{.*}} -> tensor<64x128xbf16, #[[LAYOUT_2]]> - %7 = "ttir.relu"(%5, %6) <{operandSegmentSizes = array, operand_constraints = [#any_device, #any_device]}> : (tensor<64x128xbf16>, tensor<64x128xbf16>) -> tensor<64x128xbf16> - return %7 : tensor<64x128xbf16> - } -} diff --git a/test/ttmlir/Silicon/TTNN/l1_interleaved_policy/single_op.mlir b/test/ttmlir/Silicon/TTNN/l1_interleaved_policy/single_op.mlir index 3dfc66d77e..542e4c95c9 100644 --- a/test/ttmlir/Silicon/TTNN/l1_interleaved_policy/single_op.mlir +++ b/test/ttmlir/Silicon/TTNN/l1_interleaved_policy/single_op.mlir @@ -4,9 +4,9 @@ // UNSUPPORTED: true #any_device_tile = #tt.operand_constraint module attributes {} { - func.func @forward(%arg0: tensor<64x128xbf16>, %arg1: tensor<128x96xbf16>) -> tensor<64x96xbf16> { - %0 = tensor.empty() : tensor<64x96xbf16> - %1 = "ttir.matmul"(%arg0, %arg1, %0) <{operand_constraints = [#any_device_tile, #any_device_tile, #any_device_tile]}> : (tensor<64x128xbf16>, tensor<128x96xbf16>, tensor<64x96xbf16>) -> tensor<64x96xbf16> - return %1 : tensor<64x96xbf16> + func.func @forward(%arg0: tensor<5120x5120xbf16>) -> tensor<5120x5120xbf16> { + %0 = tensor.empty() : tensor<5120x5120xbf16> + %1 = "ttir.relu"(%arg0, %0) <{operandSegmentSizes = array, operand_constraints = [#any_device_tile, #any_device_tile]}> : (tensor<5120x5120xbf16>, tensor<5120x5120xbf16>) -> tensor<5120x5120xbf16> + return %1 : tensor<5120x5120xbf16> } }