Skip to content

Commit

Permalink
add --aie-generate-txn to aiecc
Browse files Browse the repository at this point in the history
  • Loading branch information
fifield committed Aug 16, 2024
1 parent c3e8d8e commit 7e8fe92
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 20 deletions.
8 changes: 8 additions & 0 deletions python/compiler/aiecc/cl_arguments.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,14 @@ def parse_args(args=None):
const=True,
help="Generate libxaie v2 for CDO",
)
parser.add_argument(
"--aie-generate-txn",
dest="txn",
default=False,
action="store_const",
const=True,
help="Generate txn binary for configuration",
)
parser.add_argument(
"--aie-generate-xclbin",
dest="xcl",
Expand Down
22 changes: 22 additions & 0 deletions python/compiler/aiecc/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -551,6 +551,24 @@ async def process_cdo(self):
await read_file_async(self.prepend_tmp("input_physical.mlir"))
)
generate_cdo(input_physical.operation, self.tmpdirname, emit_unified=True)

async def process_txn(self):
from aie.dialects.aie import generate_cdo, generate_txn

with Context(), Location.unknown():
for elf in glob.glob("*.elf"):
try:
shutil.copy(elf, self.tmpdirname)
except shutil.SameFileError:
pass
for elf_map in glob.glob("*.elf.map"):
try:
shutil.copy(elf_map, self.tmpdirname)
except shutil.SameFileError:
pass
input_physical = Module.parse(
await read_file_async(self.prepend_tmp("input_physical.mlir"))
)
generate_txn(input_physical.operation, self.tmpdirname)

async def process_xclbin_gen(self):
Expand Down Expand Up @@ -1091,9 +1109,13 @@ async def run_flow(self):
# Must have elfs, before we build the final binary assembly
if opts.cdo and opts.execute:
await self.process_cdo()

if opts.cdo or opts.xcl:
await self.process_xclbin_gen()

if opts.txn and opts.execute:
await self.process_txn()

def dumpprofile(self):
sortedruntimes = sorted(
self.runtimes.items(), key=lambda item: item[1], reverse=True
Expand Down
8 changes: 4 additions & 4 deletions test/npu-xrt/add_one_two_txn/run.lit
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
// REQUIRES: ryzen_ai
//
// RUN: clang-15 %S/test.cpp -o test.exe -std=c++11 -Wall %xrt_flags -lrt -lstdc++ -lboost_program_options -lboost_filesystem
// RUN: %python aiecc.py --xclbin-kernel-name=ADDONE --xclbin-kernel-id=0x901 --xclbin-instance-name=ADDONEINST --no-aiesim --aie-generate-cdo --aie-generate-npu --no-compile-host --xclbin-name=add_one.xclbin --npu-insts-name=insts.txt %S/aie1.mlir
// RUN: %python aiecc.py --xclbin-kernel-name=ADDTWO --xclbin-kernel-id=0x902 --xclbin-instance-name=ADDTWOINST --no-aiesim --aie-generate-cdo --aie-generate-npu --no-compile-host --xclbin-name=add_two.xclbin --npu-insts-name=insts.txt %S/aie2.mlir
// RUN: %python aiecc.py --xclbin-kernel-name=ADDONE --xclbin-kernel-id=0x901 --xclbin-instance-name=ADDONEINST --no-aiesim --aie-generate-cdo --aie-generate-npu --no-compile-host --xclbin-name=add_one.xclbin --npu-insts-name=add_one_insts.txt %S/aie1.mlir
// RUN: %python aiecc.py --no-aiesim --aie-generate-txn --aie-generate-npu --no-compile-host --npu-insts-name=add_two_insts.txt %S/aie2.mlir
// RUN: %python txn2mlir.py aie2.mlir.prj/txn.bin > add_two_cfg.mlir
// RUN: aie-translate -aie-npu-instgen -aie-npu-instgen-binary=true add_two_cfg.mlir -o txn.bin
// RUN: %run_on_npu ./test.exe -x add_one.xclbin -i insts.txt -c txn.bin | FileCheck %s
// RUN: aie-translate -aie-npu-instgen -aie-npu-instgen-binary=true add_two_cfg.mlir -o add_two_cfg.bin
// RUN: %run_on_npu ./test.exe -x add_one.xclbin -i add_one_insts.txt -c add_two_cfg.bin -j add_two_insts.txt | FileCheck %s
// CHECK: PASS!
34 changes: 18 additions & 16 deletions test/npu-xrt/add_one_two_txn/test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,10 @@ int main(int argc, const char *argv[]) {
"the input xclbin path")("verbosity,v",
po::value<int>()->default_value(0),
"the verbosity of the output")(
"instr,i", po::value<std::string>()->required(),
"path of file containing userspace instructions to be sent to the LX6")(
"instr0,i", po::value<std::string>()->required(),
"path to instructions for kernel0")("instr1,j",
po::value<std::string>()->required(),
"path to instructions for kernel1")(
"cfg,c", po::value<std::string>()->required(), "txn binary path");
po::variables_map vm;

Expand All @@ -94,21 +96,21 @@ int main(int argc, const char *argv[]) {
return 1;
}

check_arg_file_exists(vm, "xclbin");
check_arg_file_exists(vm, "instr");
check_arg_file_exists(vm, "cfg");
std::vector<uint32_t> instr_0_v =
load_instr_sequence(vm["instr0"].as<std::string>());

std::vector<uint32_t> instr_v =
load_instr_sequence(vm["instr"].as<std::string>());
std::vector<uint32_t> instr_1_v =
load_instr_sequence(vm["instr1"].as<std::string>());

std::vector<uint32_t> cfg_1_v =
load_instr_binary(vm["cfg"].as<std::string>());

int verbosity = vm["verbosity"].as<int>();
if (verbosity >= 1)
std::cout << "Sequence instr count: " << instr_v.size() << "\n";
if (verbosity >= 1)
if (verbosity >= 1) {
std::cout << "Sequence instr 0 count: " << instr_0_v.size() << "\n";
std::cout << "Sequence instr 1 count: " << instr_1_v.size() << "\n";
std::cout << "Sequence cfg count: " << cfg_1_v.size() << "\n";
}

// Start the XRT test code
// Get a device handle
Expand Down Expand Up @@ -141,14 +143,14 @@ int main(int argc, const char *argv[]) {

auto kernel0 = xrt::kernel(context, kernelName0);

auto bo_instr_0 = xrt::bo(device, instr_v.size() * sizeof(int),
auto bo_instr_0 = xrt::bo(device, instr_0_v.size() * sizeof(int),
XCL_BO_FLAGS_CACHEABLE, kernel0.group_id(1));
auto bo_inA_0 = xrt::bo(device, IN_SIZE * sizeof(int32_t),
XRT_BO_FLAGS_HOST_ONLY, kernel0.group_id(3));
auto bo_out_0 = xrt::bo(device, OUT_SIZE * sizeof(int32_t),
XRT_BO_FLAGS_HOST_ONLY, kernel0.group_id(4));

auto bo_instr_1 = xrt::bo(device, instr_v.size() * sizeof(int),
auto bo_instr_1 = xrt::bo(device, instr_1_v.size() * sizeof(int),
XCL_BO_FLAGS_CACHEABLE, kernel0.group_id(1));
auto bo_cfg_1 = xrt::bo(device, cfg_1_v.size() * sizeof(int),
XCL_BO_FLAGS_CACHEABLE, kernel0.group_id(1));
Expand Down Expand Up @@ -179,8 +181,8 @@ int main(int argc, const char *argv[]) {
void *bufInstr_0 = bo_instr_0.map<void *>();
void *bufInstr_1 = bo_instr_1.map<void *>();
void *bufCfg_1 = bo_cfg_1.map<void *>();
memcpy(bufInstr_0, instr_v.data(), instr_v.size() * sizeof(int));
memcpy(bufInstr_1, instr_v.data(), instr_v.size() * sizeof(int));
memcpy(bufInstr_0, instr_0_v.data(), instr_0_v.size() * sizeof(int));
memcpy(bufInstr_1, instr_1_v.data(), instr_1_v.size() * sizeof(int));
memcpy(bufCfg_1, cfg_1_v.data(), cfg_1_v.size() * sizeof(int));

// Synchronizing BOs
Expand All @@ -199,7 +201,7 @@ int main(int argc, const char *argv[]) {
xrt::run run0 = xrt::run(kernel0);
run0.set_arg(0, opcode);
run0.set_arg(1, bo_instr_0);
run0.set_arg(2, instr_v.size());
run0.set_arg(2, instr_0_v.size());
run0.set_arg(3, bo_inA_0);
run0.set_arg(4, bo_out_0);
run0.set_arg(5, 0);
Expand All @@ -220,7 +222,7 @@ int main(int argc, const char *argv[]) {
xrt::run run1 = xrt::run(kernel0);
run1.set_arg(0, opcode);
run1.set_arg(1, bo_instr_1);
run1.set_arg(2, instr_v.size());
run1.set_arg(2, instr_1_v.size());
run1.set_arg(3, bo_inA_1);
run1.set_arg(4, bo_out_1);
run1.set_arg(5, 0);
Expand Down

0 comments on commit 7e8fe92

Please sign in to comment.