Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[API][Backend] Streaming and OpenCL Backends #138

Merged
merged 104 commits into from
Dec 10, 2019
Merged
Changes from 1 commit
Commits
Show all changes
104 commits
Select commit Hold shift + click to select a range
2b3b2b0
add sdaccel, aocl for heterocl
ybai62868 Jul 25, 2019
86525fc
fpga
ybai62868 Jul 25, 2019
1d8115f
Create codeanalys_openclc.cc
ybai62868 Jul 25, 2019
47026fe
Update target.py
ybai62868 Jul 25, 2019
5fba7cc
run
ybai62868 Jul 26, 2019
31d00e3
can run successfully
ybai62868 Jul 26, 2019
e0dc81f
Create codegen_opencl.cc
ybai62868 Jul 29, 2019
8c010da
now
ybai62868 Jul 29, 2019
843b6f4
all done
ybai62868 Jul 30, 2019
f0ac7a7
Update codegen_sdaccel.cc
ybai62868 Jul 30, 2019
8713bda
Update codegen_sdaccel.cc
ybai62868 Jul 30, 2019
e687bfb
modified: python/heterocl/tvm/target.py
hgyhungry Aug 7, 2019
b091758
new file: samples/ppac/gemm/csrcPrint.py
hgyhungry Aug 7, 2019
8e12d35
all
ybai62868 Aug 7, 2019
416ca43
remove tvm check code from kernel
hgyhungry Aug 9, 2019
e9a0a1c
opencl-backend
ybai62868 Aug 14, 2019
5695f2c
all
ybai62868 Aug 14, 2019
568555d
fix ppac module build
hgyhungry Aug 15, 2019
2518aeb
support ppac MVPb pragma
hgyhungry Aug 15, 2019
314994e
fix ignoring ppac pragma in cpu backend
hgyhungry Aug 16, 2019
a06f97b
opencl-backend
ybai62868 Aug 18, 2019
3f03b41
aocl-backend
ybai62868 Aug 18, 2019
ffb8673
move ppac codegen to ppac folder; fix argument name with merlinc anal…
hgyhungry Sep 2, 2019
8afdea8
discard the new for-loop type; include ppac in hlib
hgyhungry Sep 2, 2019
819eae8
discard some previous changes
hgyhungry Sep 2, 2019
4ee0a93
Use int64_t as return type of GeMM on ppac
hgyhungry Sep 3, 2019
66851f0
[add] codegenc kernedef + stream init
hecmay Sep 3, 2019
63833c2
[add] var_shape_map
Sep 4, 2019
69fd36f
[update] kerneldef struct shape
hecmay Sep 5, 2019
32a522d
[update] use noderef and restore
hecmay Sep 5, 2019
171699c
[fix] return op
Sep 6, 2019
2cd15d2
[add] hcl device & kernelstmt printer
Sep 7, 2019
adb5af1
[fix] def workaround
Sep 9, 2019
3457773
[update] stream example
hecmay Sep 12, 2019
2330ea3
[add] stream expr & stmt ir
hecmay Sep 13, 2019
ae7bebf
[fix] kernel arg location for stream
hecmay Sep 14, 2019
c2dbf4c
Merge branch 'master' of https://github.com/Hecmay/heterocl
hecmay Sep 14, 2019
83e4d7e
opt1
ybai62868 Aug 18, 2019
df48ef9
opencl-general
ybai62868 Aug 18, 2019
d51970a
new-version
ybai62868 Aug 18, 2019
530ce5d
no bug
ybai62868 Aug 18, 2019
64d91e5
a
ybai62868 Aug 18, 2019
e123363
test+unroll+pipeline
ybai62868 Aug 19, 2019
0cdceb8
pragma
ybai62868 Aug 21, 2019
4fba56c
new
ybai62868 Aug 21, 2019
ccd58fd
type has fixed
ybai62868 Aug 22, 2019
1832760
new_test
ybai62868 Aug 23, 2019
c3d8f3c
test_reorder_split_fuse
ybai62868 Aug 23, 2019
ad27bcc
target
ybai62868 Aug 23, 2019
337db9a
order
ybai62868 Aug 23, 2019
3cb38bf
simplified by rui
ybai62868 Aug 23, 2019
c4562e5
analysis
ybai62868 Aug 23, 2019
51a4f77
bug fixed
ybai62868 Aug 27, 2019
6fe29d5
[delete] all of the code about opencl
ybai62868 Aug 27, 2019
a0611c6
[ADD] new opencl back-end including xilinx & intel
ybai62868 Aug 27, 2019
4852b98
fixed __local
ybai62868 Aug 28, 2019
ea0771f
fixed data_type for xilinx opencl
ybai62868 Sep 2, 2019
4ebdfb7
add makefile for SDAccel_runtime
ybai62868 Sep 2, 2019
05ff646
add the runtime for sdaccel
ybai62868 Sep 3, 2019
fa30a01
create the sdaccel host
ybai62868 Sep 3, 2019
51584e4
fixed the indent problem partly
ybai62868 Sep 3, 2019
c5239bf
test the zhang-05 server
ybai62868 Sep 4, 2019
7991a81
add indent to the host.cpp
ybai62868 Sep 4, 2019
104e5e6
automatically generate makefile
ybai62868 Sep 5, 2019
dad3e75
delete common folder from opencl
ybai62868 Sep 5, 2019
59f5f7e
add shmat to sdaccel runtime
ybai62868 Sep 5, 2019
e2dd3a2
fixed bug for sdaccel runtime seg fault
ybai62868 Sep 5, 2019
f405ec3
fixed the bug of host.cpp multiple
ybai62868 Sep 5, 2019
edfa9ce
fixed host.cpp multiple bug
ybai62868 Sep 5, 2019
4bb58fa
fixed endif for makefile
ybai62868 Sep 5, 2019
edf784f
modify sdaccel_sw_emu -> sdaccel_csim
ybai62868 Sep 6, 2019
aa67e48
fix the __local and __global for intel opencl back-end
ybai62868 Sep 12, 2019
c46b932
Fix the arbitrary integer precision for aocl
ybai62868 Sep 13, 2019
879da3c
[add] ir visitor & functor for codegen
Sep 15, 2019
245bffa
[add] aocl stream codegen
hecmay Sep 16, 2019
cc65e5d
[add] aocl stream support
hecmay Sep 18, 2019
786ccb7
[fix] aocl type conversion
hecmay Sep 18, 2019
1ec3fcd
[fix] aocl channel syntax
hecmay Sep 18, 2019
1ab0c8c
[add] sch.stream_to
hecmay Oct 1, 2019
8b8dea9
[fix] add stream annotation
hecmay Oct 4, 2019
23fa599
[add] host device codegen
hecmay Oct 6, 2019
a3f168a
[add] stream ir mutator
hecmay Oct 15, 2019
5a3112e
[Add] Interface prag,a for SDx sim
hecmay Oct 16, 2019
52ffe80
[add] host xcel codegen
hecmay Oct 22, 2019
c5907cf
[update] build interface
hecmay Oct 22, 2019
99fe2b7
[update] new build interface
hecmay Oct 31, 2019
a18f24f
[fix] temp update
hecmay Oct 31, 2019
955974c
[add] ppac hlib & riscv codegen
hecmay Oct 31, 2019
76dec95
Merge branch 'ppac' into opencl
hecmay Oct 31, 2019
ed31f99
[update] stream example
hecmay Nov 8, 2019
73daf28
[add] rocc-ppac sim
hecmay Nov 10, 2019
e8fe221
[rm] submodule
hecmay Nov 13, 2019
c98d859
[update] rocc ppac hlib
hecmay Nov 13, 2019
6d9780a
[add] unified sim & kernel updater
hecmay Nov 25, 2019
85db48d
re-organize build common util
hecmay Nov 28, 2019
f85cfdd
merge with upstream
hecmay Nov 28, 2019
8c72a7a
[update] stream in codegen c
hecmay Dec 4, 2019
280ae2f
[update] codegen construct for streaming
hecmay Dec 7, 2019
96e388f
[update] code post-processing
hecmay Dec 8, 2019
1aabf4e
[fix] test cases
hecmay Dec 9, 2019
dd3e2a8
[fix] python compatibility
hecmay Dec 9, 2019
eefae89
[update] future
hecmay Dec 10, 2019
e53cb1e
[fix] metaclass
hecmay Dec 10, 2019
378069f
[fix] test import issue
hecmay Dec 10, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
fixed the bug of host.cpp multiple
ybai62868 authored and hecmay committed Sep 14, 2019

Verified

This commit was signed with the committer’s verified signature.
webknjaz 🇺🇦 Sviatoslav Sydorenko (Святослав Сидоренко)
commit f405ec361296230a6d8717325b4709aab81db14d
55 changes: 55 additions & 0 deletions samples/lenet/common/common.mk
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
SHELL = /bin/bash
VPATH = ./
CC = xcpp
CLCC = xocc
ifeq ($(XDEVICE_REPO_PATH),)
DEVICE_REPO_OPT =
else
DEVICE_REPO_OPT = --xp prop:solution.device_repo_paths=${XDEVICE_REPO_PATH}
endif
HOST_CFLAGS += -I${XILINX_SDX}/runtime/include/1_2
HOST_LFLAGS += -L${XILINX_SDX}/runtime/lib/x86_64 -lxilinxopencl -lrt -pthread
CLCC_OPT += $(CLCC_OPT_LEVEL) ${DEVICE_REPO_OPT} --xdevice ${XDEVICE} -o ${XCLBIN} ${KERNEL_DEFS} ${KERNEL_INCS}
ifeq (${KEEP_TEMP},1)
CLCC_OPT += -s
endif
ifeq (${KERNEL_DEBUG},1)
CLCC_OPT += -g
endif
CLCC_OPT += --kernel ${KERNEL_NAME}
OBJECTS := $(HOST_SRCS:.cpp=.o)
.PHONY: all
all: run
host: ${HOST_EXE_DIR}/${HOST_EXE}
xbin_cpu_em:
make SDA_FLOW=cpu_emu xbin -f sdaccel.mk
xbin_hw_em:
make SDA_FLOW=hw_emu xbin -f sdaccel.mk
xbin_hw :
make SDA_FLOW=hw xbin -f sdaccel.mk
xbin: ${XCLBIN}
run_cpu_em:
make SDA_FLOW=cpu_emu run_em -f sdaccel.mk
run_hw_em:
make SDA_FLOW=hw_emu run_em -f sdaccel.mk
run_hw :
make SDA_FLOW=hw run_hw_int -f sdaccel.mk
run_em: xconfig host xbin
XCL_EMULATION_MODE=true ${HOST_EXE_DIR}/${HOST_EXE} ${HOST_ARGS}
run_hw_int : host xbin_hw
source ${BOARD_SETUP_FILE};${HOST_EXE_DIR}/${HOST_EXE} ${HOST_ARGS}
estimate :
${CLCC} -c -t hw_emu --xdevice ${XDEVICE} --report estimate ${KERNEL_SRCS}
xconfig : emconfig.json
emconfig.json :
emconfigutil --xdevice ${XDEVICE} ${DEVICE_REPO_OPT} --od .
${HOST_EXE_DIR}/${HOST_EXE} : ${OBJECTS}
${CC} ${HOST_LFLAGS} ${OBJECTS} -o $@
${XCLBIN}:
${CLCC} ${CLCC_OPT} ${KERNEL_SRCS}
%.o: %.cpp
${CC} ${HOST_CFLAGS} -c $< -o $@
clean:
${RM} -rf ${HOST_EXE} ${OBJECTS} ${XCLBIN} emconfig.json _xocc_${XCLBIN_NAME}_*.dir .Xil
cleanall: clean
${RM} -rf *.xclbin sdaccel_profile_summary.* _xocc_* TempConfig *.log *.jou
23 changes: 23 additions & 0 deletions samples/lenet/lenet_sdaccel.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import heterocl as hcl
import numpy as np
from lenet_main import *

batch_size = 50

# f = build_lenet_inf(batch_size, 'vhls_csim')
f = build_lenet_inf(batch_size, 'sdaccel_sw_emu')

mnist = mx.test_utils.get_mnist()
correct_sum = 0

for i in range(50 // batch_size):
label = mnist['test_label'][i*batch_size:(i+1)*batch_size]
input_image_np = mnist['test_data'][i*batch_size:(i+1)*batch_size]
input_image_hcl = hcl.asarray(input_image_np)
output_hcl = hcl.asarray(np.zeros((batch_size,10)))
f(input_image_hcl, weight_conv1_hcl, weight_conv2_hcl, weight_fc1_hcl, weight_fc2_hcl, output_hcl)
prediction = np.argmax(output_hcl.asnumpy(), axis=1)
correct_sum += np.sum(np.equal(prediction, label))

print(str(qtype1) + ", " + str(qtype2) + ": Accuracy over 10000 test images is: {}".format(correct_sum / 10000.))
assert correct_sum == 9882
193 changes: 193 additions & 0 deletions samples/lenet/main.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
#define CL_HPP_CL_1_2_DEFAULT_BUILD
#define CL_HPP_TARGET_OPENCL_VERSION 120
#define CL_HPP_MINIMUM_OPENCL_VERSION 120
#define CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY 1
#include <CL/cl2.hpp>
#include <fstream>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
#include <cstring>
#include <iostream>
#include <iomanip>
#include <cmath>
#include <sys/ipc.h>
#include <sys/shm.h>
#pragma once




int main(void) {
#if defined(SDX_PLATFORM) && !defined(TARGET_DEVICE)
#define STR_VALUE(arg) #arg
#define GET_STRING(name) STR_VALUE(name)
#define TARGET_DEVICE GET_STRING(SDX_PLATFORM)
#endif
char* xclbinFilename = argv[1];

std::vector<float> source_0(50 * 1 * 28 * 28);
std::vector<int> source_1(20 * 1 * 5 * 5);
std::vector<int> source_2(50 * 20 * 5 * 5);
std::vector<int> source_3(500 * 800);
std::vector<int> source_4(10 * 500);
std::vector<float> source_5(50 * 10);

size_t vector_size_bytes_0 = sizeof(float) * 50 * 1 * 28 * 28;
size_t vector_size_bytes_1 = sizeof(int) * 20 * 1 * 5 * 5;
size_t vector_size_bytes_2 = sizeof(int) * 50 * 20 * 5 * 5;
size_t vector_size_bytes_3 = sizeof(int) * 500 * 800;
size_t vector_size_bytes_4 = sizeof(int) * 10 * 500;
size_t vector_size_bytes_5 = sizeof(float) * 50 * 10;

float* arg_0 = (float*)shmat(2949125, nullptr, 0);
for (size_t i0 = 0; i0 < 50; i0++) {
for (size_t i1 = 0; i1 < 1; i1++) {
for (size_t i2 = 0; i2 < 28; i2++) {
for (size_t i3 = 0; i3 < 28; i3++) {
source_0[i3 + i2*28 + i1*784 + i0*784] = arg_0[i3 + i2*28 + i1*784 + i0*784];
}
}
}
}
int* arg_1 = (int*)shmat(3473408, nullptr, 0);
for (size_t i0 = 0; i0 < 20; i0++) {
for (size_t i1 = 0; i1 < 1; i1++) {
for (size_t i2 = 0; i2 < 5; i2++) {
for (size_t i3 = 0; i3 < 5; i3++) {
source_1[i3 + i2*5 + i1*25 + i0*25] = arg_1[i3 + i2*5 + i1*25 + i0*25] >> 14;
}
}
}
}
int* arg_2 = (int*)shmat(3473409, nullptr, 0);
for (size_t i0 = 0; i0 < 50; i0++) {
for (size_t i1 = 0; i1 < 20; i1++) {
for (size_t i2 = 0; i2 < 5; i2++) {
for (size_t i3 = 0; i3 < 5; i3++) {
source_2[i3 + i2*5 + i1*25 + i0*500] = arg_2[i3 + i2*5 + i1*25 + i0*500] >> 14;
}
}
}
}
int* arg_3 = (int*)shmat(2097154, nullptr, 0);
for (size_t i0 = 0; i0 < 500; i0++) {
for (size_t i1 = 0; i1 < 800; i1++) {
source_3[i1 + i0*800] = arg_3[i1 + i0*800] >> 14;
}
}
int* arg_4 = (int*)shmat(1835011, nullptr, 0);
for (size_t i0 = 0; i0 < 10; i0++) {
for (size_t i1 = 0; i1 < 500; i1++) {
source_4[i1 + i0*500] = arg_4[i1 + i0*500] >> 14;
}
}
float* arg_5 = (float*)shmat(1703940, nullptr, 0);
for (size_t i0 = 0; i0 < 50; i0++) {
for (size_t i1 = 0; i1 < 10; i1++) {
source_5[i1 + i0*10] = arg_5[i1 + i0*10];
}
}
std::vector<cl::Platform> platforms;
cl::Platform::get(&platforms);
cl::Platform platform = platforms[0];

std::vector<cl::Device> devices;
platform.getDevices(CL_DEVICE_TYPE_ACCELERATOR, &devices);
cl::Device device = devices[0];

cl::Context context(device);
cl::CommandQueue q(context, device);

std::ifstream bin_file(xclbinFilename, std::ifstream::binary);
bin_file.seekg (0, bin_file.end);
unsigned nb = bin_file.tellg();
bin_file.seekg (0, bin_file.beg);
char *buf = new char [nb];
bin_file.read(buf, nb);

cl::Program::Binaries bins;
bins.push_back({buf,nb});
devices.resize(1);
cl::Program program(context, devices, bins);

int err1;
cl::Kernel kernel(program, "default_function", &err1);
auto default_function = cl::KernelFunctor<cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&>(kernel);

cl::Buffer buffer_0(context, CL_MEM_READ_WRITE, vector_size_bytes_0);
cl::Buffer buffer_1(context, CL_MEM_READ_WRITE, vector_size_bytes_1);
cl::Buffer buffer_2(context, CL_MEM_READ_WRITE, vector_size_bytes_2);
cl::Buffer buffer_3(context, CL_MEM_READ_WRITE, vector_size_bytes_3);
cl::Buffer buffer_4(context, CL_MEM_READ_WRITE, vector_size_bytes_4);
cl::Buffer buffer_5(context, CL_MEM_READ_WRITE, vector_size_bytes_5);

q.enqueueWriteBuffer(buffer_0, CL_TRUE, 0, vector_size_bytes_0, source_0.data());
q.enqueueWriteBuffer(buffer_1, CL_TRUE, 0, vector_size_bytes_1, source_1.data());
q.enqueueWriteBuffer(buffer_2, CL_TRUE, 0, vector_size_bytes_2, source_2.data());
q.enqueueWriteBuffer(buffer_3, CL_TRUE, 0, vector_size_bytes_3, source_3.data());
q.enqueueWriteBuffer(buffer_4, CL_TRUE, 0, vector_size_bytes_4, source_4.data());
q.enqueueWriteBuffer(buffer_5, CL_TRUE, 0, vector_size_bytes_5, source_5.data());

default_function(cl::EnqueueArgs(q, cl::NDRange(1,1,1), cl::NDRange(1,1,1)),buffer_0, buffer_1, buffer_2, buffer_3, buffer_4, buffer_5);
q.finish();

q.enqueueReadBuffer(buffer_0, CL_TRUE, 0, vector_size_bytes_0, source_0.data());
q.enqueueReadBuffer(buffer_1, CL_TRUE, 0, vector_size_bytes_1, source_1.data());
q.enqueueReadBuffer(buffer_2, CL_TRUE, 0, vector_size_bytes_2, source_2.data());
q.enqueueReadBuffer(buffer_3, CL_TRUE, 0, vector_size_bytes_3, source_3.data());
q.enqueueReadBuffer(buffer_4, CL_TRUE, 0, vector_size_bytes_4, source_4.data());
q.enqueueReadBuffer(buffer_5, CL_TRUE, 0, vector_size_bytes_5, source_5.data());

for (size_t i0 = 0; i0 < 50; i0++) {
for (size_t i1 = 0; i1 < 1; i1++) {
for (size_t i2 = 0; i2 < 28; i2++) {
for (size_t i3 = 0; i3 < 28; i3++) {
arg_0[i3 + i2*28 + i1*784 + i0*784] = source_0[i3 + i2*28 + i1*784 + i0*784];
}
}
}
}
shmdt(arg_0);
for (size_t i0 = 0; i0 < 20; i0++) {
for (size_t i1 = 0; i1 < 1; i1++) {
for (size_t i2 = 0; i2 < 5; i2++) {
for (size_t i3 = 0; i3 < 5; i3++) {
arg_1[i3 + i2*5 + i1*25 + i0*25] = source_1[i3 + i2*5 + i1*25 + i0*25] << 14;
}
}
}
}
shmdt(arg_1);
for (size_t i0 = 0; i0 < 50; i0++) {
for (size_t i1 = 0; i1 < 20; i1++) {
for (size_t i2 = 0; i2 < 5; i2++) {
for (size_t i3 = 0; i3 < 5; i3++) {
arg_2[i3 + i2*5 + i1*25 + i0*500] = source_2[i3 + i2*5 + i1*25 + i0*500] << 14;
}
}
}
}
shmdt(arg_2);
for (size_t i0 = 0; i0 < 500; i0++) {
for (size_t i1 = 0; i1 < 800; i1++) {
arg_3[i1 + i0*800] = source_3[i1 + i0*800] << 14;
}
}
shmdt(arg_3);
for (size_t i0 = 0; i0 < 10; i0++) {
for (size_t i1 = 0; i1 < 500; i1++) {
arg_4[i1 + i0*500] = source_4[i1 + i0*500] << 14;
}
}
shmdt(arg_4);
for (size_t i0 = 0; i0 < 50; i0++) {
for (size_t i1 = 0; i1 < 10; i1++) {
arg_5[i1 + i0*10] = source_5[i1 + i0*10];
}
}
shmdt(arg_5);
}
32 changes: 32 additions & 0 deletions samples/lenet/sdaccel.mk
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
ifndef XILINX_SDX
$(error Environment variable XILINX_SDX is required and should point to SDAccel install area)
endif
SDA_FLOW = cpu_emu
HOST_SRCS = host.cpp
HOST_EXE_DIR=.
HOST_EXE = host
HOST_CFLAGS = -g -Wall -DFPGA_DEVICE -DC_KERNEL
HOST_LFLAGS =
KERNEL_SRCS = default_function.cl
KERNEL_NAME = default_function
KERNEL_DEFS =
KERNEL_INCS =
XDEVICE=xilinx:adm-pcie-7v3:1ddr:3.0
XDEVICE_REPO_PATH=
KEEP_TEMP=1
KERNEL_DEBUG=
XCLBIN_NAME=bin_krnl
HOST_CFLAGS+=-DTARGET_DEVICE=\"${XDEVICE}\"
BOARD_SETUP_FILE=setup.sh
ifeq (${SDA_FLOW},cpu_emu)
CLCC_OPT += -t sw_emu
XCLBIN = ${XCLBIN_NAME}_cpu_emu.xclbin
else ifeq (${SDA_FLOW},hw_emu)
CLCC_OPT += -t hw_emu
XCLBIN = ${XCLBIN_NAME}_hw_emu.xclbin
else ifeq (${SDA_FLOW},hw)
XCLBIN = ${XCLBIN_NAME}_hw.xclbin
CLCC_OPT += -t hw
endifHOST_ARGS = ${XCLBIN}
COMMON_DIR = ./common
include ${COMMON_DIR}/common.mk
8 changes: 4 additions & 4 deletions samples/smith_waterman/main.cpp
Original file line number Diff line number Diff line change
@@ -39,25 +39,25 @@ int main(void) {
size_t vector_size_bytes_2 = sizeof(unsigned int) * 1024 * 256;
size_t vector_size_bytes_3 = sizeof(unsigned int) * 1024 * 256;

unsigned int* arg_0 = (unsigned int*)shmat(1507336, nullptr, 0);
unsigned int* arg_0 = (unsigned int*)shmat(1966082, nullptr, 0);
for (size_t i0 = 0; i0 < 1024; i0++) {
for (size_t i1 = 0; i1 < 128; i1++) {
source_0[i1 + i0*128] = arg_0[i1 + i0*128];
}
}
unsigned int* arg_1 = (unsigned int*)shmat(3145728, nullptr, 0);
unsigned int* arg_1 = (unsigned int*)shmat(3342336, nullptr, 0);
for (size_t i0 = 0; i0 < 1024; i0++) {
for (size_t i1 = 0; i1 < 128; i1++) {
source_1[i1 + i0*128] = arg_1[i1 + i0*128];
}
}
unsigned int* arg_2 = (unsigned int*)shmat(3145729, nullptr, 0);
unsigned int* arg_2 = (unsigned int*)shmat(3342337, nullptr, 0);
for (size_t i0 = 0; i0 < 1024; i0++) {
for (size_t i1 = 0; i1 < 256; i1++) {
source_2[i1 + i0*256] = arg_2[i1 + i0*256];
}
}
unsigned int* arg_3 = (unsigned int*)shmat(1769474, nullptr, 0);
unsigned int* arg_3 = (unsigned int*)shmat(1703939, nullptr, 0);
for (size_t i0 = 0; i0 < 1024; i0++) {
for (size_t i1 = 0; i1 < 256; i1++) {
source_3[i1 + i0*256] = arg_3[i1 + i0*256];
3 changes: 1 addition & 2 deletions samples/smith_waterman/smith_waterman_vhls.py
Original file line number Diff line number Diff line change
@@ -2,8 +2,7 @@
import numpy as np
from smith_waterman_main import *

# f = top("vhls_csim")
f = top("sdaccel_sw_emu")
f = top("vhls_csim")

# add a very simple test
_seqA_np = np.ones((num, lenA))
7 changes: 4 additions & 3 deletions tvm/src/codegen/opencl/sdaccel_module.cc
Original file line number Diff line number Diff line change
@@ -493,10 +493,11 @@ void GenHostCode(TVMArgs& args,
stream << "source_" << i << "(";
TVMArray* arr = args[i];
for (int j = 0;j < arr->ndim;j++) {
if (j == 0) {
stream << arr->shape[j];
if (j == arr->ndim-1) {
stream << arr->shape[j] << ")";
} else {
stream << " * " << arr->shape[j] << ")";
// stream << " * " << arr->shape[j] << ")";
stream << arr->shape[j] << " * ";
}
}
stream << ";\n";