diff --git a/include/aie/Dialect/AIE/IR/AIEOps.td b/include/aie/Dialect/AIE/IR/AIEOps.td index 25bde1647e..e876a9b4df 100644 --- a/include/aie/Dialect/AIE/IR/AIEOps.td +++ b/include/aie/Dialect/AIE/IR/AIEOps.td @@ -1538,7 +1538,9 @@ def AIE_ShimDMAAllocationOp : AIE_Op<"shim_dma_allocation", [HasParent<"DeviceOp ins FlatSymbolRefAttr:$sym_name, DMAChannelDir:$channel_dir, AIEI64Attr:$channel_index, - AIEI64Attr:$col + AIEI64Attr:$col, + // If this is set we are using the PLIO in this ShimTile + DefaultValuedAttr:$plio ); let results = (outs); @@ -1634,7 +1636,8 @@ def AIE_ObjectFifoCreateOp: AIE_Op<"objectfifo", [HasParent<"DeviceOp">, Symbol] TypeAttrOf:$elemType, BDDimLayoutArrayAttr:$dimensionsToStream, BDDimLayoutArrayArrayAttr:$dimensionsFromStreamPerConsumer, - DefaultValuedAttr:$via_DMA + DefaultValuedAttr:$via_DMA, + DefaultValuedAttr:$plio ); let assemblyFormat = [{ diff --git a/lib/Dialect/AIE/Transforms/AIECreatePathFindFlows.cpp b/lib/Dialect/AIE/Transforms/AIECreatePathFindFlows.cpp index c008620a9c..13d2092588 100644 --- a/lib/Dialect/AIE/Transforms/AIECreatePathFindFlows.cpp +++ b/lib/Dialect/AIE/Transforms/AIECreatePathFindFlows.cpp @@ -104,8 +104,8 @@ struct ConvertFlowsToInterconnect : OpConversionPattern { SwitchboxOp swOp = analyzer.getSwitchbox(rewriter, curr.col, curr.row); int shimCh = srcChannel; // TODO: must reserve N3, N7, S2, S3 for DMA connections - if (curr == srcSB && - analyzer.getTile(rewriter, srcSB.col, srcSB.row).isShimNOCTile()) { + if (curr == srcSB && analyzer.getTile(rewriter, srcSB.col, srcSB.row) + .isShimNOCorPLTile()) { // shim DMAs at start of flows if (srcBundle == WireBundle::DMA) { shimCh = srcChannel == 0 @@ -125,13 +125,10 @@ struct ConvertFlowsToInterconnect : OpConversionPattern { srcBundle, srcChannel, WireBundle::North, shimCh); } else if (srcBundle == WireBundle::PLIO) { // PLIO at start of flows with mux - if (srcChannel == 2 || srcChannel == 3 || srcChannel == 6 || - srcChannel == 7) { // Only some PLIO requrie mux - ShimMuxOp shimMuxOp = analyzer.getShimMux(rewriter, srcSB.col); - addConnection( - rewriter, cast(shimMuxOp.getOperation()), - flowOp, srcBundle, srcChannel, WireBundle::North, shimCh); - } + ShimMuxOp shimMuxOp = analyzer.getShimMux(rewriter, srcSB.col); + addConnection(rewriter, + cast(shimMuxOp.getOperation()), flowOp, + srcBundle, srcChannel, WireBundle::North, shimCh); } } for (const auto &[bundle, channel] : setting.dsts) { @@ -146,7 +143,7 @@ struct ConvertFlowsToInterconnect : OpConversionPattern { bundle == WireBundle::NOC)) { shimCh = channel; if (analyzer.getTile(rewriter, curr.col, curr.row) - .isShimNOCTile()) { + .isShimNOCorPLTile()) { // shim DMAs at end of flows if (bundle == WireBundle::DMA) { shimCh = channel == 0 @@ -162,8 +159,7 @@ struct ConvertFlowsToInterconnect : OpConversionPattern { addConnection( rewriter, cast(shimMuxOp.getOperation()), flowOp, WireBundle::North, shimCh, bundle, channel); - } else if (channel >= - 2) { // must be PLIO...only PLIO >= 2 require mux + } else if (bundle == WireBundle::PLIO) { ShimMuxOp shimMuxOp = analyzer.getShimMux(rewriter, curr.col); addConnection( rewriter, cast(shimMuxOp.getOperation()), diff --git a/lib/Dialect/AIE/Transforms/AIEObjectFifoStatefulTransform.cpp b/lib/Dialect/AIE/Transforms/AIEObjectFifoStatefulTransform.cpp index c9bb260637..8a2dba3906 100644 --- a/lib/Dialect/AIE/Transforms/AIEObjectFifoStatefulTransform.cpp +++ b/lib/Dialect/AIE/Transforms/AIEObjectFifoStatefulTransform.cpp @@ -973,11 +973,12 @@ struct AIEObjectFifoStatefulTransformPass void createObjectFifoAllocationInfo(OpBuilder &builder, MLIRContext *ctx, FlatSymbolRefAttr obj_fifo, int colIndex, DMAChannelDir channelDir, - int channelIndex) { + int channelIndex, bool plio) { builder.create(builder.getUnknownLoc(), obj_fifo, DMAChannelDirAttr::get(ctx, channelDir), builder.getI64IntegerAttr(channelIndex), - builder.getI64IntegerAttr(colIndex)); + builder.getI64IntegerAttr(colIndex), + builder.getBoolAttr(plio)); } void runOnOperation() override { @@ -986,6 +987,8 @@ struct AIEObjectFifoStatefulTransformPass DMAChannelAnalysis dmaAnalysis(device); OpBuilder builder = OpBuilder::atBlockEnd(device.getBody()); auto ctx = device->getContext(); + auto producerWireType = WireBundle::DMA; + auto consumerWireType = WireBundle::DMA; std::set objectFifoTiles; // track cores to check for loops during unrolling @@ -1125,13 +1128,15 @@ struct AIEObjectFifoStatefulTransformPass producerChan.channel, 0, producer.getDimensionsToStreamAttr()); // generate objectFifo allocation info builder.setInsertionPoint(&device.getBody()->back()); + if (producer.getProducerTileOp().isShimTile()) createObjectFifoAllocationInfo( builder, ctx, SymbolRefAttr::get(ctx, producer.getName()), producer.getProducerTileOp().colIndex(), producerChan.direction, - producerChan.channel); + producerChan.channel, producer.getPlio()); for (auto consumer : consumers) { + // create consumer tile DMA DMAChannel consumerChan = dmaAnalysis.getSlaveDMAChannel(consumer.getProducerTile()); @@ -1141,18 +1146,32 @@ struct AIEObjectFifoStatefulTransformPass consumerChan.channel, 1, consumerDims); // generate objectFifo allocation info builder.setInsertionPoint(&device.getBody()->back()); + + // If we have PLIO then figure out the direction and make that a PLIO + if (producer.getPlio()) { + producerWireType = producer.getProducerTileOp().isShimTile() + ? WireBundle::PLIO + : WireBundle::DMA; + consumerWireType = !(producer.getProducerTileOp().isShimTile()) + ? WireBundle::PLIO + : WireBundle::DMA; + } else { + producerWireType = WireBundle::DMA; + consumerWireType = WireBundle::DMA; + } + if (consumer.getProducerTileOp().isShimTile()) createObjectFifoAllocationInfo( builder, ctx, SymbolRefAttr::get(ctx, producer.getName()), consumer.getProducerTileOp().colIndex(), consumerChan.direction, - consumerChan.channel); + consumerChan.channel, producer.getPlio()); // create flow builder.setInsertionPointAfter(producer); builder.create(builder.getUnknownLoc(), - producer.getProducerTile(), WireBundle::DMA, + producer.getProducerTile(), producerWireType, producerChan.channel, consumer.getProducerTile(), - WireBundle::DMA, consumerChan.channel); + consumerWireType, consumerChan.channel); } } diff --git a/lib/Dialect/AIE/Transforms/AIEPathFinder.cpp b/lib/Dialect/AIE/Transforms/AIEPathFinder.cpp index 966aa6c5e5..dd1ca764e8 100644 --- a/lib/Dialect/AIE/Transforms/AIEPathFinder.cpp +++ b/lib/Dialect/AIE/Transforms/AIEPathFinder.cpp @@ -167,7 +167,7 @@ ShimMuxOp DynamicTileAnalysis::getShimMux(OpBuilder &builder, int col) { if (coordToShimMux.count({col, row})) { return coordToShimMux[{col, row}]; } - assert(getTile(builder, col, row).isShimNOCTile()); + assert(getTile(builder, col, row).isShimNOCorPLTile()); auto switchboxOp = builder.create(builder.getUnknownLoc(), getTile(builder, col, row)); SwitchboxOp::ensureTerminator(switchboxOp.getConnections(), builder, diff --git a/lib/Targets/AIETargetHSA.cpp b/lib/Targets/AIETargetHSA.cpp index 4711e11197..ff0ad1441d 100644 --- a/lib/Targets/AIETargetHSA.cpp +++ b/lib/Targets/AIETargetHSA.cpp @@ -14,7 +14,7 @@ #include "aie/Dialect/AIEX/IR/AIEXDialect.h" #include "aie/Targets/AIETargets.h" -#include "mlir/Dialect/Func/IR/FuncOps.h" // Eddie added to get the NPU func ops +#include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/IR/Attributes.h" #include "mlir/IR/IRMapping.h" #include "mlir/Pass/Pass.h" @@ -134,6 +134,7 @@ mlir::LogicalResult AIETranslateToHSA(ModuleOp module, raw_ostream &output) { uint32_t ChannelId = infoOp->getChannelIndex(); bool isMM2S = channelDir == AIE::DMAChannelDir::MM2S; int col = infoOp->getCol(); + bool isPlio = infoOp->getPlio(); llvm::SmallVector strides = llvm::map_to_vector( llvm::reverse(op.getMixedStrides()), @@ -182,7 +183,8 @@ mlir::LogicalResult AIETranslateToHSA(ModuleOp module, raw_ostream &output) { output << "\tmlir_aie_packet_nd_memcpy(&pkt" << op_count << ", 0 /* herd_id */, " << col << " /* col */, " << isMM2S << " /* dir */, " << ChannelId - << "/* channel */, 4 /* Burst length */, 2 /* Memory space */, " + << "/* channel */, 4 /* Burst length */, " << (isPlio ? 1 : 2) + << " /* Memory space */, " "(uint64_t)buf" << arg_idx << " + " << offset << " /* Address */, " << sizes[0] * 4 << " /* 1d_length */, " << (strides[1] ? sizes[1] : 1) diff --git a/lib/Targets/AIETargetXAIEV2.cpp b/lib/Targets/AIETargetXAIEV2.cpp index 0ca128de56..5099ed2345 100644 --- a/lib/Targets/AIETargetXAIEV2.cpp +++ b/lib/Targets/AIETargetXAIEV2.cpp @@ -725,24 +725,43 @@ mlir::LogicalResult AIETranslateToXAIEV2(ModuleOp module, raw_ostream &output) { } for (auto connectOp : b.getOps()) { - if (connectOp.getSourceBundle() == WireBundle::North) - // demux! - output - << "__mlir_aie_try(XAie_EnableAieToShimDmaStrmPort(" - << deviceInstRef << ", " << tileLocStr("x", "y") - << ", " - // << - // stringifyWireBundle(connectOp.sourceBundle()).upper() - << connectOp.sourceIndex() << "));\n"; - else if (connectOp.getDestBundle() == WireBundle::North) - // mux - output - << "__mlir_aie_try(XAie_EnableShimDmaToAieStrmPort(" - << deviceInstRef << ", " << tileLocStr("x", "y") - << ", " - // << - // stringifyWireBundle(connectOp.sourceBundle()).upper() - << connectOp.destIndex() << "));\n"; + + if (connectOp.getSourceBundle() == WireBundle::DMA || + connectOp.getDestBundle() == WireBundle::DMA) { + if (connectOp.getSourceBundle() == WireBundle::North) + // demux! + output + << "__mlir_aie_try(XAie_EnableAieToShimDmaStrmPort(" + << deviceInstRef << ", " << tileLocStr("x", "y") + << ", " + // << + // stringifyWireBundle(connectOp.sourceBundle()).upper() + << connectOp.sourceIndex() << "));\n"; + else if (connectOp.getDestBundle() == WireBundle::North) + // mux + output + << "__mlir_aie_try(XAie_EnableShimDmaToAieStrmPort(" + << deviceInstRef << ", " << tileLocStr("x", "y") + << ", " + // << + // stringifyWireBundle(connectOp.sourceBundle()).upper() + << connectOp.destIndex() << "));\n"; + } + + else if (connectOp.getSourceBundle() == WireBundle::PLIO || + connectOp.getDestBundle() == WireBundle::PLIO) { + if (connectOp.getSourceBundle() == WireBundle::North) { + // mux + output << "__mlir_aie_try(XAie_AieToPlIntfEnable(" << deviceInstRef + << ", " << tileLocStr("x", "y") << ", " + << connectOp.destIndex() << ", PLIF_WIDTH_64));\n"; + } else if (connectOp.getDestBundle() == WireBundle::North) { + // mux + output << "__mlir_aie_try(XAie_PlToAieIntfEnable(" << deviceInstRef + << ", " << tileLocStr("x", "y") << ", " + << connectOp.destIndex() << ", PLIF_WIDTH_64));\n"; + } + } } } for (auto switchboxOp : targetOp.getOps()) { diff --git a/programming_examples/basic/passthrough_dmas_plio/CMakeLists.txt b/programming_examples/basic/passthrough_dmas_plio/CMakeLists.txt new file mode 100644 index 0000000000..c17d3d365b --- /dev/null +++ b/programming_examples/basic/passthrough_dmas_plio/CMakeLists.txt @@ -0,0 +1,75 @@ +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# (c) Copyright 2023 Advanced Micro Devices, Inc. + +# parameters +# -DBOOST_ROOT: Path to Boost install +# -DXRT_INC_DIR: Full path to src/runtime_src/core/include in XRT cloned repo +# -DXRT_LIB_DIR: Path to xrt_coreutil.lib +# -DTARGET_NAME: Target name to be built + +# cmake needs this line +cmake_minimum_required(VERSION 3.1) + +set(CMAKE_CXX_STANDARD 23) +set(CMAKE_CXX_STANDARD_REQUIRED YES) + +find_program(WSL NAMES powershell.exe) + +if (NOT WSL) + set(CMAKE_C_COMPILER gcc-13) + set(CMAKE_CXX_COMPILER g++-13) + set(BOOST_ROOT /usr/include/boost CACHE STRING "Path to Boost install") + set(XRT_INC_DIR /opt/xilinx/xrt/include CACHE STRING "Path to XRT cloned repo") + set(XRT_LIB_DIR /opt/xilinx/xrt/lib CACHE STRING "Path to xrt_coreutil.lib") +else() + set(BOOST_ROOT C:/Technical/thirdParty/boost_1_83_0 CACHE STRING "Path to Boost install") + set(XRT_INC_DIR C:/Technical/XRT/src/runtime_src/core/include CACHE STRING "Path to XRT cloned repo") + set(XRT_LIB_DIR C:/Technical/xrtNPUfromDLL CACHE STRING "Path to xrt_coreutil.lib") +endif() + +set(TARGET_NAME test CACHE STRING "Target to be built") + +SET (ProjectName proj_${TARGET_NAME}) +SET (currentTarget ${TARGET_NAME}) + +if ( WSL ) + set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELEASE ${CMAKE_BINARY_DIR}) +endif () + +project(${ProjectName}) + +# Find packages +find_package(Boost REQUIRED) + +add_executable(${currentTarget} + ${CMAKE_CURRENT_SOURCE_DIR}/../../../runtime_lib/test_lib/test_utils.cpp + test.cpp +) + +target_compile_definitions(${currentTarget} PUBLIC DISABLE_ABI_CHECK=1) + +target_include_directories (${currentTarget} PUBLIC + ${XRT_INC_DIR} + ${Boost_INCLUDE_DIRS} + ${CMAKE_CURRENT_SOURCE_DIR}/../../../runtime_lib/test_lib +) + +target_link_directories(${currentTarget} PUBLIC + ${XRT_LIB_DIR} + ${Boost_LIBRARY_DIRS} +) + +if (NOT WSL) + target_link_libraries(${currentTarget} PUBLIC + xrt_coreutil + boost_program_options + boost_filesystem + ) +else() + target_link_libraries(${currentTarget} PUBLIC + xrt_coreutil + ) +endif() diff --git a/programming_examples/basic/passthrough_dmas_plio/Makefile b/programming_examples/basic/passthrough_dmas_plio/Makefile new file mode 100644 index 0000000000..a88e6c49d6 --- /dev/null +++ b/programming_examples/basic/passthrough_dmas_plio/Makefile @@ -0,0 +1,48 @@ +##===- Makefile -----------------------------------------------------------===## +# +# This file licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# Copyright (C) 2024, Advanced Micro Devices, Inc. +# +##===----------------------------------------------------------------------===## + +srcdir := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST)))) + +include ${srcdir}/../../makefile-common + +targetname = passThroughDMAs +LENGTH ?= 1024 + +all: input output + +build/aie-input-plio.mlir: ${srcdir}/aie2-input-plio.py + mkdir -p ${@D} + python3 $< ${LENGTH} > $@ + +build/aie-output-plio.mlir: ${srcdir}/aie2-output-plio.py + mkdir -p ${@D} + python3 $< ${LENGTH} > $@ + +input: build/aie-input-plio.mlir + aiecc.py --link_against_hsa --host-target=x86_64-amd-linux-gnu build/aie-input-plio.mlir \ + -I${srcdir}/../../../install/runtime_lib/x86_64-hsa/test_lib/include \ + -L/lib/x86_64-linux-gnu/ \ + ${srcdir}/test_vck5000.cpp \ + ${srcdir}/../../../install/runtime_lib/x86_64-hsa/test_lib/src/test_library.cpp \ + -Wl,--whole-archive -Wl,--no-whole-archive -lstdc++ -ldl -lelf -o input.elf + +output: build/aie-output-plio.mlir + aiecc.py --link_against_hsa --host-target=x86_64-amd-linux-gnu build/aie-output-plio.mlir \ + -I${srcdir}/../../../install/runtime_lib/x86_64-hsa/test_lib/include \ + -L/lib/x86_64-linux-gnu/ \ + ${srcdir}/test_vck5000.cpp \ + ${srcdir}/../../../install/runtime_lib/x86_64-hsa/test_lib/src/test_library.cpp \ + -Wl,--whole-archive -Wl,--no-whole-archive -lstdc++ -ldl -lelf -o output.elf + +run_vck5000: + test.elf + +clean: + rm -rf build aie-output-plio.mlir.prj aie-input-plio.mlir.prj core_* input.elf output.elf diff --git a/programming_examples/basic/passthrough_dmas_plio/README.md b/programming_examples/basic/passthrough_dmas_plio/README.md new file mode 100644 index 0000000000..182a1801c0 --- /dev/null +++ b/programming_examples/basic/passthrough_dmas_plio/README.md @@ -0,0 +1,27 @@ + + +# Passthrough DMAs with PLIO + +This reference design can be run on the VCK5000 Versal device. This design leverages the same data movement pattern as the [Passthrough DMAs](../passthrough-dmas) example design but it uses a soft DMA. Please see the [platforms repo](https://github.com/Xilinx/ROCm-air-platforms) for more information on how the programmable logic is integrated with the AIEs. This is meant to be an illustrative example to highlight how to integrate PL designs with AIE designs programmed using mlir-aie. + +In the platform, tile (26, 0) has PLIO connected to a DMA implemented in the programmable logic. There are two designs, `aie2-input-plio.py` uses the soft DMA to push data from DRAM into the AIEs, wheras `aie2-output-plio.py` uses the soft DMA to receive data from the AIEs and push it to DRAM. The soft DMA is programmed using the same mechanism as the ShimDMAs. + +In the [design](./aie2.py) data is brought from external memory to `ComputeTile2` and back, without modification from the tile, by using an implicit copy via the compute tile's Data Movement Accelerator (DMA). The data is read from and written to external memory through the Shim tile (`col`, 0). + +The implicit copy is performed using the `object_fifo_link` operation that specifies how input data arriving via `of_in` should be sent further via `of_out` by specifically leveraging the compute tile's DMA. This operation and its functionality are described in more depth in [Section-2b](../../../programming_guide/section-2/section-2b/03_Link_Distribute_Join/README.md#object-fifo-link) of the programming guide. + + +To compile and run the design for VCK5000: +``` +make all +./output.elf // To run the kernel which outputs over PLIO +./input.elf // To run the kernel which inputs over PLIO +``` diff --git a/programming_examples/basic/passthrough_dmas_plio/aie2-input-plio.py b/programming_examples/basic/passthrough_dmas_plio/aie2-input-plio.py new file mode 100644 index 0000000000..f3242d4e20 --- /dev/null +++ b/programming_examples/basic/passthrough_dmas_plio/aie2-input-plio.py @@ -0,0 +1,62 @@ +# passthrough_dmas_plio/aie2-output-plio.py -*- Python -*- +# +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates + +import sys + +from aie.dialects.aie import * +from aie.dialects.aiex import * +from aie.dialects.scf import * +from aie.extras.dialects.ext import memref, arith +from aie.extras.context import mlir_mod_ctx + +N = 1024 + +if len(sys.argv) > 1: + N = int(sys.argv[1]) + +dev = AIEDevice.xcvc1902 + + +def my_passthrough(): + with mlir_mod_ctx() as ctx: + + @device(dev) + def device_body(): + memRef_ty = T.memref(1024, T.i32()) + + # Tile declarations + ShimTile1 = tile(30, 0) + ShimTile2 = tile(26, 0) + ComputeTile2 = tile(30, 2) + + # AIE-array data movement with object fifos + of_in = object_fifo("in", ShimTile1, ComputeTile2, 2, memRef_ty, plio=True) + of_out = object_fifo("out", ComputeTile2, ShimTile2, 2, memRef_ty) + object_fifo_link(of_in, of_out) + + # Set up compute tiles + + # Compute tile 2 + @core(ComputeTile2) + def core_body(): + for _ in for_(sys.maxsize): + yield_([]) + + # To/from AIE-array data movement + tensor_ty = T.memref(N, T.i32()) + + @FuncOp.from_py_func(tensor_ty, tensor_ty, tensor_ty) + def sequence(A, B, C): + npu_dma_memcpy_nd(metadata="out", bd_id=0, mem=C, sizes=[1, 1, 1, N]) + npu_dma_memcpy_nd(metadata="in", bd_id=1, mem=A, sizes=[1, 1, 1, N]) + npu_sync(column=0, row=0, direction=0, channel=0) + + print(ctx.module) + + +my_passthrough() diff --git a/programming_examples/basic/passthrough_dmas_plio/aie2-output-plio.py b/programming_examples/basic/passthrough_dmas_plio/aie2-output-plio.py new file mode 100644 index 0000000000..65763ee24c --- /dev/null +++ b/programming_examples/basic/passthrough_dmas_plio/aie2-output-plio.py @@ -0,0 +1,64 @@ +# passthrough_dmas_plio/aie2-output-plio.py -*- Python -*- +# +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates + +import sys + +from aie.dialects.aie import * +from aie.dialects.aiex import * +from aie.dialects.scf import * +from aie.extras.dialects.ext import memref, arith +from aie.extras.context import mlir_mod_ctx + +N = 1024 + +if len(sys.argv) > 1: + N = int(sys.argv[1]) + +dev = AIEDevice.xcvc1902 + + +def my_passthrough(): + with mlir_mod_ctx() as ctx: + + @device(dev) + def device_body(): + memRef_ty = T.memref(1024, T.i32()) + + # Tile declarations + ShimTile1 = tile(26, 0) + ShimTile2 = tile(30, 0) + ComputeTile2 = tile(30, 2) + + # AIE-array data movement with object fifos + of_in = object_fifo("in", ShimTile1, ComputeTile2, 2, memRef_ty) + of_out = object_fifo( + "out", ComputeTile2, ShimTile2, 2, memRef_ty, plio=True + ) + object_fifo_link(of_in, of_out) + + # Set up compute tiles + + # Compute tile 2 + @core(ComputeTile2) + def core_body(): + for _ in for_(sys.maxsize): + yield_([]) + + # To/from AIE-array data movement + tensor_ty = T.memref(N, T.i32()) + + @FuncOp.from_py_func(tensor_ty, tensor_ty, tensor_ty) + def sequence(A, B, C): + npu_dma_memcpy_nd(metadata="out", bd_id=0, mem=C, sizes=[1, 1, 1, N]) + npu_dma_memcpy_nd(metadata="in", bd_id=1, mem=A, sizes=[1, 1, 1, N]) + npu_sync(column=0, row=0, direction=0, channel=0) + + print(ctx.module) + + +my_passthrough() diff --git a/programming_examples/basic/passthrough_dmas_plio/run_vck5000.lit b/programming_examples/basic/passthrough_dmas_plio/run_vck5000.lit new file mode 100644 index 0000000000..0d392dd2ef --- /dev/null +++ b/programming_examples/basic/passthrough_dmas_plio/run_vck5000.lit @@ -0,0 +1,9 @@ +// (c) Copyright 2024 Advanced Micro Devices, Inc. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// REQUIRES: hsa +// +// RUN: make -f %S/Makefile clean +// RUN: make -f %S/Makefile all +// RUN: %run_on_vck5000 ./input.elf +// RUN: %run_on_vck5000 ./output.elf diff --git a/programming_examples/basic/passthrough_dmas_plio/test_vck5000.cpp b/programming_examples/basic/passthrough_dmas_plio/test_vck5000.cpp new file mode 100644 index 0000000000..7ab2cb3e7e --- /dev/null +++ b/programming_examples/basic/passthrough_dmas_plio/test_vck5000.cpp @@ -0,0 +1,134 @@ +//===- test_vck5000.cpp -----------------------------------000---*- C++ -*-===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Copyright (C) 2024, Advanced Micro Devices, Inc. +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "memory_allocator.h" +#include "test_library.h" + +#include "aie_data_movement.cpp" +#include "aie_inc.cpp" + +#include "hsa/hsa.h" +#include "hsa/hsa_ext_amd.h" + +constexpr int DMA_COUNT = 1024; + +void hsa_check_status(const std::string func_name, hsa_status_t status) { + if (status != HSA_STATUS_SUCCESS) { + const char *status_string(new char[1024]); + hsa_status_string(status, &status_string); + std::cout << func_name << " failed: " << status_string << std::endl; + delete[] status_string; + } else { + std::cout << func_name << " success" << std::endl; + } +} + +int main(int argc, char *argv[]) { + uint64_t row = 0; + uint64_t col = 6; + + std::vector queues; + uint32_t aie_max_queue_size(0); + + aie_libxaie_ctx_t *xaie = mlir_aie_init_libxaie(); + + // This is going to initialize HSA, create a queue + // and get an agent + int ret = mlir_aie_init_device(xaie); + + if (ret) { + std::cout << "[ERROR] Error when calling mlir_aie_init_device)" + << std::endl; + return -1; + } + + // Getting access to all of the HSA agents + std::vector agents = xaie->agents; + + if (agents.empty()) { + std::cout << "No agents found. Exiting." << std::endl; + return -1; + } + + std::cout << "Found " << agents.size() << " agents" << std::endl; + + hsa_queue_t *q = xaie->cmd_queue; + + // Adding to our vector of queues + queues.push_back(q); + assert(queues.size() > 0 && "No queues were sucesfully created!"); + + mlir_aie_configure_cores(xaie); + mlir_aie_configure_switchboxes(xaie); + mlir_aie_initialize_locks(xaie); + mlir_aie_configure_dmas(xaie); + mlir_aie_start_cores(xaie); + + // Allocating some device memory + ext_mem_model_t buf0, buf1, buf2; + uint32_t *in_a = (uint32_t *)mlir_aie_mem_alloc(xaie, buf0, DMA_COUNT); + uint32_t *in_b = (uint32_t *)mlir_aie_mem_alloc(xaie, buf1, DMA_COUNT); + uint32_t *out = (uint32_t *)mlir_aie_mem_alloc(xaie, buf2, DMA_COUNT); + mlir_aie_sync_mem_dev(buf0); + mlir_aie_sync_mem_dev(buf1); + mlir_aie_sync_mem_dev(buf2); + + if (in_a == nullptr || in_b == nullptr || out == nullptr) { + std::cout << "Could not allocate in device memory" << std::endl; + return -1; + } + + for (int i = 0; i < DMA_COUNT; i++) { + in_a[i] = i + 1; + in_b[i] = i + 1; + out[i] = 0xdeface; + } + + // Pass arguments in the order of dma_memcpys in the mlir + invoke_data_movement(queues[0], &agents[0], out, in_a); + + int errors = 0; + + for (int i = 0; i < DMA_COUNT; i++) { + uint32_t s = in_a[i]; + uint32_t d = out[i]; + if (d != s) { + errors++; + printf("mismatch %x != %x\n", d, s); + } + } + + // destroying the queue + hsa_queue_destroy(queues[0]); + + // Shutdown AIR and HSA + mlir_aie_deinit_libxaie(xaie); + + if (!errors) { + printf("PASS!\n"); + return 0; + } else { + printf("fail %d/%d.\n", errors, DMA_COUNT); + return -1; + } +} diff --git a/python/dialects/aie.py b/python/dialects/aie.py index a1b62a950f..0018c7c919 100644 --- a/python/dialects/aie.py +++ b/python/dialects/aie.py @@ -212,6 +212,7 @@ def __init__( dimensionsToStream=None, dimensionsFromStreamPerConsumer=None, via_DMA=None, + plio=None, ): self.datatype = datatype if not isinstance(consumerTiles, List): @@ -231,6 +232,7 @@ def __init__( dimensionsToStream=dimensionsToStream, dimensionsFromStreamPerConsumer=dimensionsFromStreamPerConsumer, via_DMA=via_DMA, + plio=plio, ) def acquire(self, port, num_elem): diff --git a/test/Targets/AIEGenerateXAIE/plio_shimmux.mlir b/test/Targets/AIEGenerateXAIE/plio_shimmux.mlir index d8ee2023e0..415090dfac 100644 --- a/test/Targets/AIEGenerateXAIE/plio_shimmux.mlir +++ b/test/Targets/AIEGenerateXAIE/plio_shimmux.mlir @@ -26,7 +26,6 @@ // CHECK: __mlir_aie_try(XAie_StrmConnCctEnable(&(ctx->DevInst), XAie_TileLoc(x,y), NORTH, 0, SOUTH, 0)); // CHECK: x = 2; // CHECK: y = 0; -// CHECK: __mlir_aie_try(XAie_EnableAieToShimDmaStrmPort(&(ctx->DevInst), XAie_TileLoc(x,y), 2)); module { aie.device(xcvc1902) {