Skip to content

Commit

Permalink
Fixing up PLIO compiler support and creating an example
Browse files Browse the repository at this point in the history
  • Loading branch information
eddierichter-amd committed Jul 17, 2024
1 parent 9530ac4 commit d08090b
Show file tree
Hide file tree
Showing 13 changed files with 516 additions and 35 deletions.
7 changes: 5 additions & 2 deletions include/aie/Dialect/AIE/IR/AIEOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -1538,7 +1538,9 @@ def AIE_ShimDMAAllocationOp : AIE_Op<"shim_dma_allocation", [HasParent<"DeviceOp
ins FlatSymbolRefAttr:$sym_name,
DMAChannelDir:$channel_dir,
AIEI64Attr:$channel_index,
AIEI64Attr:$col
AIEI64Attr:$col,
// If this is set we are using the PLIO in this ShimTile
DefaultValuedAttr<BoolAttr, "false">:$plio
);

let results = (outs);
Expand Down Expand Up @@ -1634,7 +1636,8 @@ def AIE_ObjectFifoCreateOp: AIE_Op<"objectfifo", [HasParent<"DeviceOp">, Symbol]
TypeAttrOf<AIE_ObjectFifoType>:$elemType,
BDDimLayoutArrayAttr:$dimensionsToStream,
BDDimLayoutArrayArrayAttr:$dimensionsFromStreamPerConsumer,
DefaultValuedAttr<BoolAttr, "false">:$via_DMA
DefaultValuedAttr<BoolAttr, "false">:$via_DMA,
DefaultValuedAttr<BoolAttr, "false">:$plio
);

let assemblyFormat = [{
Expand Down
11 changes: 4 additions & 7 deletions lib/Dialect/AIE/Transforms/AIECreatePathFindFlows.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,13 +125,10 @@ struct ConvertFlowsToInterconnect : OpConversionPattern<FlowOp> {
srcBundle, srcChannel, WireBundle::North, shimCh);
} else if (srcBundle ==
WireBundle::PLIO) { // PLIO at start of flows with mux
if (srcChannel == 2 || srcChannel == 3 || srcChannel == 6 ||
srcChannel == 7) { // Only some PLIO requrie mux
ShimMuxOp shimMuxOp = analyzer.getShimMux(rewriter, srcSB.col);
addConnection(
rewriter, cast<Interconnect>(shimMuxOp.getOperation()),
flowOp, srcBundle, srcChannel, WireBundle::North, shimCh);
}
ShimMuxOp shimMuxOp = analyzer.getShimMux(rewriter, srcSB.col);
addConnection(
rewriter, cast<Interconnect>(shimMuxOp.getOperation()),
flowOp, srcBundle, srcChannel, WireBundle::North, shimCh);
}
}
for (const auto &[bundle, channel] : setting.dsts) {
Expand Down
29 changes: 23 additions & 6 deletions lib/Dialect/AIE/Transforms/AIEObjectFifoStatefulTransform.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -973,11 +973,11 @@ struct AIEObjectFifoStatefulTransformPass
void createObjectFifoAllocationInfo(OpBuilder &builder, MLIRContext *ctx,
FlatSymbolRefAttr obj_fifo, int colIndex,
DMAChannelDir channelDir,
int channelIndex) {
int channelIndex, bool plio) {
builder.create<ShimDMAAllocationOp>(builder.getUnknownLoc(), obj_fifo,
DMAChannelDirAttr::get(ctx, channelDir),
builder.getI64IntegerAttr(channelIndex),
builder.getI64IntegerAttr(colIndex));
builder.getI64IntegerAttr(colIndex), builder.getBoolAttr(plio));
}

void runOnOperation() override {
Expand All @@ -986,6 +986,8 @@ struct AIEObjectFifoStatefulTransformPass
DMAChannelAnalysis dmaAnalysis(device);
OpBuilder builder = OpBuilder::atBlockEnd(device.getBody());
auto ctx = device->getContext();
auto producerWireType = WireBundle::DMA;
auto consumerWireType = WireBundle::DMA;
std::set<TileOp>
objectFifoTiles; // track cores to check for loops during unrolling

Expand Down Expand Up @@ -1125,13 +1127,15 @@ struct AIEObjectFifoStatefulTransformPass
producerChan.channel, 0, producer.getDimensionsToStreamAttr());
// generate objectFifo allocation info
builder.setInsertionPoint(&device.getBody()->back());

if (producer.getProducerTileOp().isShimTile())
createObjectFifoAllocationInfo(
builder, ctx, SymbolRefAttr::get(ctx, producer.getName()),
producer.getProducerTileOp().colIndex(), producerChan.direction,
producerChan.channel);
producerChan.channel, producer.getPlio());

for (auto consumer : consumers) {

// create consumer tile DMA
DMAChannel consumerChan =
dmaAnalysis.getSlaveDMAChannel(consumer.getProducerTile());
Expand All @@ -1141,18 +1145,31 @@ struct AIEObjectFifoStatefulTransformPass
consumerChan.channel, 1, consumerDims);
// generate objectFifo allocation info
builder.setInsertionPoint(&device.getBody()->back());

// If we have PLIO then figure out the direction and make that a PLIO
if(producer.getPlio()) {
producerWireType = producer.getProducerTileOp().isShimTile() ? WireBundle::PLIO : WireBundle::DMA;
consumerWireType = !(producer.getProducerTileOp().isShimTile()) ? WireBundle::PLIO : WireBundle::DMA;
}
else {
producerWireType = WireBundle::DMA;
consumerWireType = WireBundle::DMA;
}

if (consumer.getProducerTileOp().isShimTile())
createObjectFifoAllocationInfo(
builder, ctx, SymbolRefAttr::get(ctx, producer.getName()),
consumer.getProducerTileOp().colIndex(), consumerChan.direction,
consumerChan.channel);
consumerChan.channel, producer.getPlio());



// create flow
builder.setInsertionPointAfter(producer);
builder.create<FlowOp>(builder.getUnknownLoc(),
producer.getProducerTile(), WireBundle::DMA,
producer.getProducerTile(), producerWireType,
producerChan.channel, consumer.getProducerTile(),
WireBundle::DMA, consumerChan.channel);
consumerWireType, consumerChan.channel);
}
}

Expand Down
5 changes: 3 additions & 2 deletions lib/Targets/AIETargetHSA.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
#include "aie/Dialect/AIEX/IR/AIEXDialect.h"
#include "aie/Targets/AIETargets.h"

#include "mlir/Dialect/Func/IR/FuncOps.h" // Eddie added to get the NPU func ops
#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/IR/Attributes.h"
#include "mlir/IR/IRMapping.h"
#include "mlir/Pass/Pass.h"
Expand Down Expand Up @@ -134,6 +134,7 @@ mlir::LogicalResult AIETranslateToHSA(ModuleOp module, raw_ostream &output) {
uint32_t ChannelId = infoOp->getChannelIndex();
bool isMM2S = channelDir == AIE::DMAChannelDir::MM2S;
int col = infoOp->getCol();
bool isPlio = infoOp->getPlio();

llvm::SmallVector<int64_t, 4> strides = llvm::map_to_vector(
llvm::reverse(op.getMixedStrides()),
Expand Down Expand Up @@ -182,7 +183,7 @@ mlir::LogicalResult AIETranslateToHSA(ModuleOp module, raw_ostream &output) {
output << "\tmlir_aie_packet_nd_memcpy(&pkt" << op_count
<< ", 0 /* herd_id */, " << col << " /* col */, " << isMM2S
<< " /* dir */, " << ChannelId
<< "/* channel */, 4 /* Burst length */, 2 /* Memory space */, "
<< "/* channel */, 4 /* Burst length */, " << (isPlio ? 1 : 2) << " /* Memory space */, "
"(uint64_t)buf"
<< arg_idx << " + " << offset << " /* Address */, " << sizes[0] * 4
<< " /* 1d_length */, " << (strides[1] ? sizes[1] : 1)
Expand Down
56 changes: 38 additions & 18 deletions lib/Targets/AIETargetXAIEV2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -725,24 +725,44 @@ mlir::LogicalResult AIETranslateToXAIEV2(ModuleOp module, raw_ostream &output) {
}

for (auto connectOp : b.getOps<ConnectOp>()) {
if (connectOp.getSourceBundle() == WireBundle::North)
// demux!
output
<< "__mlir_aie_try(XAie_EnableAieToShimDmaStrmPort("
<< deviceInstRef << ", " << tileLocStr("x", "y")
<< ", "
// <<
// stringifyWireBundle(connectOp.sourceBundle()).upper()
<< connectOp.sourceIndex() << "));\n";
else if (connectOp.getDestBundle() == WireBundle::North)
// mux
output
<< "__mlir_aie_try(XAie_EnableShimDmaToAieStrmPort("
<< deviceInstRef << ", " << tileLocStr("x", "y")
<< ", "
// <<
// stringifyWireBundle(connectOp.sourceBundle()).upper()
<< connectOp.destIndex() << "));\n";

if(connectOp.getSourceBundle() == WireBundle::DMA || connectOp.getDestBundle() == WireBundle::DMA) {
if (connectOp.getSourceBundle() == WireBundle::North)
// demux!
output
<< "__mlir_aie_try(XAie_EnableAieToShimDmaStrmPort("
<< deviceInstRef << ", " << tileLocStr("x", "y")
<< ", "
// <<
// stringifyWireBundle(connectOp.sourceBundle()).upper()
<< connectOp.sourceIndex() << "));\n";
else if (connectOp.getDestBundle() == WireBundle::North)
// mux
output
<< "__mlir_aie_try(XAie_EnableShimDmaToAieStrmPort("
<< deviceInstRef << ", " << tileLocStr("x", "y")
<< ", "
// <<
// stringifyWireBundle(connectOp.sourceBundle()).upper()
<< connectOp.destIndex() << "));\n";
}

else if(connectOp.getSourceBundle() == WireBundle::PLIO || connectOp.getDestBundle() == WireBundle::PLIO) {
// Note: Right now this just works with PLIO channel 0 and 1 as those don't require to program
// the shim mux
if(connectOp.destIndex() != 0 && connectOp.destIndex() != 1) {
return connectOp.emitOpError("Currently only PLIO channel 0 and 1 are supported.");
}

if (connectOp.getDestBundle() == WireBundle::North)
// mux
output
<< "__mlir_aie_try(XAie_PlToAieIntfEnable("
<< deviceInstRef << ", " << tileLocStr("x", "y")
<< ", "
<< connectOp.destIndex()
<< ", PLIF_WIDTH_64));\n";
}
}
}
for (auto switchboxOp : targetOp.getOps<ShimSwitchboxOp>()) {
Expand Down
75 changes: 75 additions & 0 deletions programming_examples/basic/passthrough_dmas_plio/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# (c) Copyright 2023 Advanced Micro Devices, Inc.

# parameters
# -DBOOST_ROOT: Path to Boost install
# -DXRT_INC_DIR: Full path to src/runtime_src/core/include in XRT cloned repo
# -DXRT_LIB_DIR: Path to xrt_coreutil.lib
# -DTARGET_NAME: Target name to be built

# cmake needs this line
cmake_minimum_required(VERSION 3.1)

set(CMAKE_CXX_STANDARD 23)
set(CMAKE_CXX_STANDARD_REQUIRED YES)

find_program(WSL NAMES powershell.exe)

if (NOT WSL)
set(CMAKE_C_COMPILER gcc-13)
set(CMAKE_CXX_COMPILER g++-13)
set(BOOST_ROOT /usr/include/boost CACHE STRING "Path to Boost install")
set(XRT_INC_DIR /opt/xilinx/xrt/include CACHE STRING "Path to XRT cloned repo")
set(XRT_LIB_DIR /opt/xilinx/xrt/lib CACHE STRING "Path to xrt_coreutil.lib")
else()
set(BOOST_ROOT C:/Technical/thirdParty/boost_1_83_0 CACHE STRING "Path to Boost install")
set(XRT_INC_DIR C:/Technical/XRT/src/runtime_src/core/include CACHE STRING "Path to XRT cloned repo")
set(XRT_LIB_DIR C:/Technical/xrtNPUfromDLL CACHE STRING "Path to xrt_coreutil.lib")
endif()

set(TARGET_NAME test CACHE STRING "Target to be built")

SET (ProjectName proj_${TARGET_NAME})
SET (currentTarget ${TARGET_NAME})

if ( WSL )
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY_RELEASE ${CMAKE_BINARY_DIR})
endif ()

project(${ProjectName})

# Find packages
find_package(Boost REQUIRED)

add_executable(${currentTarget}
${CMAKE_CURRENT_SOURCE_DIR}/../../../runtime_lib/test_lib/test_utils.cpp
test.cpp
)

target_compile_definitions(${currentTarget} PUBLIC DISABLE_ABI_CHECK=1)

target_include_directories (${currentTarget} PUBLIC
${XRT_INC_DIR}
${Boost_INCLUDE_DIRS}
${CMAKE_CURRENT_SOURCE_DIR}/../../../runtime_lib/test_lib
)

target_link_directories(${currentTarget} PUBLIC
${XRT_LIB_DIR}
${Boost_LIBRARY_DIRS}
)

if (NOT WSL)
target_link_libraries(${currentTarget} PUBLIC
xrt_coreutil
boost_program_options
boost_filesystem
)
else()
target_link_libraries(${currentTarget} PUBLIC
xrt_coreutil
)
endif()
50 changes: 50 additions & 0 deletions programming_examples/basic/passthrough_dmas_plio/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
##===- Makefile -----------------------------------------------------------===##
#
# This file licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# Copyright (C) 2024, Advanced Micro Devices, Inc.
#
##===----------------------------------------------------------------------===##

srcdir := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST))))

include ${srcdir}/../../makefile-common

devicename ?= xcvc1902
col ?= 26
targetname = passThroughDMAs
LENGTH ?= 1024

all: input output

build/aie-input-plio.mlir: ${srcdir}/aie2-input-plio.py
mkdir -p ${@D}
python3 $< ${LENGTH} ${devicename} ${col} > $@

build/aie-output-plio.mlir: ${srcdir}/aie2-output-plio.py
mkdir -p ${@D}
python3 $< ${LENGTH} ${devicename} ${col} > $@

input: build/aie-input-plio.mlir
aiecc.py --link_against_hsa --host-target=x86_64-amd-linux-gnu build/aie-input-plio.mlir \
-I${srcdir}/../../../install/runtime_lib/x86_64-hsa/test_lib/include \
-L/lib/x86_64-linux-gnu/ \
${srcdir}/test_vck5000.cpp \
${srcdir}/../../../install/runtime_lib/x86_64-hsa/test_lib/src/test_library.cpp \
-Wl,--whole-archive -Wl,--no-whole-archive -lstdc++ -ldl -lelf -o input.elf

output: build/aie-output-plio.mlir
aiecc.py --link_against_hsa --host-target=x86_64-amd-linux-gnu build/aie-output-plio.mlir \
-I${srcdir}/../../../install/runtime_lib/x86_64-hsa/test_lib/include \
-L/lib/x86_64-linux-gnu/ \
${srcdir}/test_vck5000.cpp \
${srcdir}/../../../install/runtime_lib/x86_64-hsa/test_lib/src/test_library.cpp \
-Wl,--whole-archive -Wl,--no-whole-archive -lstdc++ -ldl -lelf -o output.elf

run_vck5000:
test.elf

clean:
rm -rf build aie-output-plio.mlir.prj aie-input-plio.mlir.prj core_* input.elf output.elf
27 changes: 27 additions & 0 deletions programming_examples/basic/passthrough_dmas_plio/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
<!---//===- README.md --------------------------*- Markdown -*-===//
//
// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
// Copyright (C) 2024, Advanced Micro Devices, Inc.
//
//===----------------------------------------------------------------------===//-->

# <ins>Passthrough DMAs with PLIO</ins>

This reference design can be run on the VCK5000 Versal device. This design leverages the same data movement pattern as the [Passthrough DMAs](../passthrough-dmas) example design but it uses a soft DMA. Please see the [platforms repo](https://github.com/Xilinx/ROCm-air-platforms) for more information on how the programmable logic is integrated with the AIEs. This is meant to be an illustrative example to highlight how to integrate PL designs with AIE designs programmed using mlir-aie.

In the platform, tile (26, 0) has PLIO connected to a DMA implemented in the programmable logic. There are two designs, `aie2-input-plio.py` uses the soft DMA to push data from DRAM into the AIEs, wheras `aie2-output-plio.py` uses the soft DMA to receive data from the AIEs and push it to DRAM. The soft DMA is programmed using the same mechanism as the ShimDMAs.

In the [design](./aie2.py) data is brought from external memory to `ComputeTile2` and back, without modification from the tile, by using an implicit copy via the compute tile's Data Movement Accelerator (DMA). The data is read from and written to external memory through the Shim tile (`col`, 0).

The implicit copy is performed using the `object_fifo_link` operation that specifies how input data arriving via `of_in` should be sent further via `of_out` by specifically leveraging the compute tile's DMA. This operation and its functionality are described in more depth in [Section-2b](../../../programming_guide/section-2/section-2b/03_Link_Distribute_Join/README.md#object-fifo-link) of the programming guide.


To compile and run the design for VCK5000:
```
make all
./output.elf // To run the kernel which outputs over PLIO
./input.elf // To run the kernel which inputs over PLIO
```
Loading

0 comments on commit d08090b

Please sign in to comment.