diff --git a/bfs/bulk/.gitignore b/bfs/bulk/.gitignore new file mode 100644 index 0000000..b340692 --- /dev/null +++ b/bfs/bulk/.gitignore @@ -0,0 +1,2 @@ +build/ +.build/ diff --git a/bfs/bulk/.vscode/c_cpp_properties.json b/bfs/bulk/.vscode/c_cpp_properties.json new file mode 100644 index 0000000..437972f --- /dev/null +++ b/bfs/bulk/.vscode/c_cpp_properties.json @@ -0,0 +1,13 @@ +{ + "configurations": [ + { + "name": "Linux", + "defines": [], + "compilerPath": "/usr/bin/gcc", + "cStandard": "c11", + "cppStandard": "c++17", + "intelliSenseMode": "clang-x64" + } + ], + "version": 4 +} \ No newline at end of file diff --git a/bfs/bulk/.vscode/launch.json b/bfs/bulk/.vscode/launch.json new file mode 100644 index 0000000..af41ee8 --- /dev/null +++ b/bfs/bulk/.vscode/launch.json @@ -0,0 +1,37 @@ +{ + "version": "0.2.0", + "configurations": [ + { + "name": "Build and run host", + "type": "cppdbg", + "request": "launch", + "program": "${workspaceFolder}/build/host", + "args": [ + "./input.data", + "./check.data" + ], + "stopAtEntry": false, + "cwd": "${workspaceFolder}", + /* + "environment": [ + { + "name": "LD_LIBRARY_PATH", + "value": "/tools/Xilinx/Vivado/2018.3/lnx64/tools/opencv/opencv_gcc/" + } + ], + */ + "externalConsole": false, //set to true to see output in cmd instead + "MIMode": "gdb", + "miDebuggerPath": "gdb", + "miDebuggerArgs": "--cd=${workspaceFolder}", + "setupCommands": [ + { + "description": "Enable pretty-printing for gdb", + "text": "-enable-pretty-printing", + "ignoreFailures": true + } + ], + "preLaunchTask": "Biuld host" + }, + ] +} diff --git a/bfs/bulk/.vscode/settings.json b/bfs/bulk/.vscode/settings.json new file mode 100644 index 0000000..29af6e9 --- /dev/null +++ b/bfs/bulk/.vscode/settings.json @@ -0,0 +1,73 @@ +{ + "python.pythonPath": "/usr/include/python3.6", + "files.associations": { + "array": "cpp", + "chrono": "cpp", + "string": "cpp", + "*.tcc": "cpp", + "bitset": "cpp", + "cctype": "cpp", + "clocale": "cpp", + "cmath": "cpp", + "complex": "cpp", + "cstdint": "cpp", + "cstdio": "cpp", + "cstdlib": "cpp", + "cstring": "cpp", + "ctime": "cpp", + "cwchar": "cpp", + "cwctype": "cpp", + "deque": "cpp", + "unordered_map": "cpp", + "vector": "cpp", + "exception": "cpp", + "fstream": "cpp", + "functional": "cpp", + "initializer_list": "cpp", + "iosfwd": "cpp", + "iostream": "cpp", + "istream": "cpp", + "limits": "cpp", + "memory": "cpp", + "new": "cpp", + "ostream": "cpp", + "numeric": "cpp", + "ratio": "cpp", + "sstream": "cpp", + "stdexcept": "cpp", + "streambuf": "cpp", + "system_error": "cpp", + "type_traits": "cpp", + "tuple": "cpp", + "typeinfo": "cpp", + "utility": "cpp", + "cinttypes": "cpp", + "cstdarg": "cpp", + "cstddef": "cpp", + "atomic": "cpp", + "strstream": "cpp", + "cfenv": "cpp", + "list": "cpp", + "iomanip": "cpp", + "mutex": "cpp", + "thread": "cpp", + "typeindex": "cpp", + "optional": "cpp", + "string_view": "cpp", + "valarray": "cpp", + "algorithm": "cpp", + "regex": "cpp", + "iterator": "cpp", + "map": "cpp", + "memory_resource": "cpp", + "random": "cpp", + "set": "cpp", + "condition_variable": "cpp", + "bfs.h": "c" + }, + "C_Cpp.default.includePath": [ + "${workspaceFolder}/", + "${workspaceFolder}/../../common" + ], + "C_Cpp.dimInactiveRegions": false +} diff --git a/bfs/bulk/.vscode/tasks.json b/bfs/bulk/.vscode/tasks.json new file mode 100644 index 0000000..104bc59 --- /dev/null +++ b/bfs/bulk/.vscode/tasks.json @@ -0,0 +1,35 @@ +{ + // See https://go.microsoft.com/fwlink/?LinkId=733558 + // for the documentation about the tasks.json format + "version": "2.0.0", + "tasks": [ + { + "type": "shell", + "label": "Biuld host", + "command": "/usr/bin/g++", + "args": [ + "-g", + "-march=native", + "-O3", + "-opt-prefetch", +// "-std=c++98", + "-I/usr/include/", + "-I${workspaceFolder}/", + "-I${workspaceFolder}/../../common/", + "-I/tools/Xilinx/Vivado/2018.3/include/", + "${workspaceFolder}/bfs.c", + "${workspaceFolder}/local_support.c", + "${workspaceFolder}/../../common/support.c", + "${workspaceFolder}/../../common/harness.c", + "-o", + "${workspaceFolder}/build/host", + ], + "options": { + "cwd": "/usr/bin" + }, + "problemMatcher": [ + "$gcc" + ] + } + ] +} \ No newline at end of file diff --git a/bfs/bulk/.vscode/tasks.json.bak b/bfs/bulk/.vscode/tasks.json.bak new file mode 100755 index 0000000..db59994 --- /dev/null +++ b/bfs/bulk/.vscode/tasks.json.bak @@ -0,0 +1,37 @@ +{ + // See https://go.microsoft.com/fwlink/?LinkId=733558 + // for the documentation about the tasks.json format + "version": "2.0.0", + "tasks": [ + { + "type": "shell", + "label": "Biuld host", + "command": "/usr/bin/g++", + "args": [ + "-g", + "-D__DEBUG__", + "-march=native", "-O3","-opt-prefetch", + "-std=c++11", + "-I/usr/include/", + "-I${workspaceFolder}/inc/", + "-I/tools/Xilinx/Vivado/2018.3/include/", + "${workspaceFolder}/src/aux.cpp", + "${workspaceFolder}/src/net_tail.cpp", + "${workspaceFolder}/src/IRB_Big_CU.cpp", + "${workspaceFolder}/src/net_head.cpp", + "${workspaceFolder}/src/net_pwc.cpp", + "${workspaceFolder}/src/host.cpp", + "-L/tools/Xilinx/Vivado/2018.3/lnx64/tools/opencv/opencv_gcc", + "-lopencv_core", "-lopencv_highgui", "-lopencv_features2d", "-lopencv_flann", "-lopencv_imgproc", + "-o", + "${workspaceFolder}/build/host", + ], + "options": { + "cwd": "/usr/bin" + }, + "problemMatcher": [ + "$gcc" + ] + } + ] +} diff --git a/bfs/bulk/Makefile b/bfs/bulk/Makefile index 12d5a2f..2df4b44 100644 --- a/bfs/bulk/Makefile +++ b/bfs/bulk/Makefile @@ -1,23 +1,182 @@ -KERN=bfs -ALG=bulk +#First check if the sysroot is defined. +# Run Target: +# hw - Compile for hardware +# emu - Compile for emulation (Default) +# cpu_emu - Quick compile for cpu emulation trating all HW functions as CPU functions +TARGET := emu -CFLAGS?=-O3 -Wall -Wno-unused-label +#ifndef SYSROOT +#ifeq ($(TARGET), hw) +# $(error SYSROOT is not set) +#endif +#endif -SRCS=$(KERN).c local_support.c ../../common/support.c -FILES=$(SRCS) $(KERN).h ../../common/support.h -$(KERN): $(FILES) ../../common/harness.c - $(CC) $(CFLAGS) -I../../common -o $(KERN) $(SRCS) ../../common/harness.c +DSA_PATH := ./DSA/ultra.dsa +OVERLAY := bfs_bulk +ELF_FILE := host +BOARD := ZCU102 +PROC := psu_cortexa53 +TARGET_OS := linux -run: $(KERN) input.data check.data - ./$(KERN) input.data check.data +#Head definition +TOP_FUNCTION = bfs +TOP_FILE = bfs.c -generate: $(FILES) generate.c - $(CC) $(CFLAGS) -I../../common -o generate $(SRCS) generate.c - ./generate +#----------------- +# 0 -> 75 +# 1 -> 100 +# 2 -> 150 +# 3-> 200 +# 4-> 300 +CLKID = 1 -hls: $(KERN).c $(KERN).h - vivado_hls hls.tcl -clean: - rm -f $(KERN) generate output.data +HW_FLAGS := +ifneq ($(TARGET), cpu_emu) + HW_FLAGS += -sds-hw $(TOP_FUNCTION) $(CURDIR)/$(TOP_FILE) -clkid $(CLKID) -sds-end +endif + +BUILD_DIR := $(CURDIR)/build +TEST_DIR := $(BUILD_DIR)/$(BOARD)-$(OVERLAY) + +# Emulation Mode: +# debug - Include debug data +# optimized - Exclude debug data (Default) +EMU_MODE := optimized + + + +EMU_FLAGS := +ifneq ($(TARGET), hw) + EMU_FLAGS := -mno-bitstream -mno-boot-files -emulation $(EMU_MODE) +endif + + +#CFLAGS = -g -Wall -O3 -c -I$(CURDIR)/inc/ -fno-builtin -D__HW__ -Wno-unused-label #-D__DEBUG__ -D__CHECK_REULTS_PER_LAYER__ +CFLAGS = -Wall -O3 -c -I$(CURDIR)/../../common -I$(CURDIR)/ -fno-builtin -Wno-unused-label +CFLAGS += -MT"$@" -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@)" +LFLAGS = "$@" "$<" +#SDSFLAGS := -sds-pf $(CURDIR)/$(BOARD)/platforms/$(OVERLAY) -target-os linux +SDSFLAGS := -sds-pf zcu102 -target-os $(TARGET_OS) + +LDFLAGS := +#ifeq ($(TARGET), hw) +# LDFLAGS += --sysroot=$(SYSROOT) -Wl,-rpath-link=$(SYSROOT)/lib/aarch64-linux-gnu/,-rpath-link=$(SYSROOT)/usr/lib/aarch64-linux-gnu/,-rpath-link=$(SYSROOT)/usr/lib/ +# LDFLAGS += -lopencv_core -lopencv_highgui -lopencv_imgproc -lGL -lGLU -lglut +#endif + +C_COMP := sdscc $(SDSFLAGS) +OBJECTS = $(TEST_DIR)/bfs.o +OBJECTS += $(TEST_DIR)/local_support.o +LOGFILE = $(TEST_DIR)/_sds/reports/sds.log + + +# Check Rule Builds the Sources and Executes on Specified Target +check: all +ifneq ($(TARGET), hw) +ifeq ($(TARGET_OS), linux) +ifeq ($(EMU_MODE), optimized) + cp $(CURDIR)/../../utility/emu_run_no_gui.sh $(TEST_DIR)/emu_run.sh +else + cp $(CURDIR)/../../utility/emu_run.sh $(TEST_DIR)/emu_run.sh +endif + cd $(TEST_DIR) ; ./emu_run.sh $(OVERLAY) +endif +else + $(info "This Release Doesn't Support Automated Hardware Execution") +endif + +#all: clean help platform exec +all: clean help exec + + +platform: + @mkdir -p ./$(BOARD)/hw + @mkdir -p ./$(BOARD)/platforms + @cp -rf $(DSA_PATH) ./$(BOARD)/hw/$(OVERLAY).dsa + xsct -sdx build_pfm.tcl $(OVERLAY) $(BOARD) $(PROC) + @cp -rf .build/$(OVERLAY)/export/$(OVERLAY) \ + $(BOARD)/platforms/$(OVERLAY) + @echo "Successfully finished building SDx platform." + @echo "SDx platform stored in $(BOARD)/platforms/$(OVERLAY)." + + +elf: $(OBJECTS) + @mkdir -p $(TEST_DIR) + @echo 'Building Target: $@' + @echo 'Trigerring: SDS++ Linker' + cd $(TEST_DIR) ; $(C_COMP) -I$(CURDIR)/../../common $(CURDIR)/../../common/support.c $(CURDIR)/../../common/harness.c -Wall -Wno-unused-label $(LDFLAGS) -o $(ELF_FILE) $(OBJECTS) $(EMU_FLAGS) + @echo 'SDx Completed Building Target: $@' + @echo + @tput setaf 2; \ + echo "PASS: Platform successfully tested."; \ + tput sgr0; + @echo + +help: + @echo "usage: make" + @echo + @echo "options:" + @echo "--------" + @echo "help: show help message." + @echo "all: make the SDx platform, and test it." + @echo "platform: make the SDx platform based on the input arguments." + @echo "test: do a simple test after a given platform is made." + @echo "cleantest: clean the test folder." + @echo "clean: clean the test and SDx platforms for the given board." + @echo "cleanall: clean all the platforms for a fresh start." + @echo + @echo "arguments:" + @echo "----------" + @echo "DSA_PATH: path to the dsa file" + @echo " e.g., ./platform/hw/hdmi.dsa" + @echo "PROC: name of the processor that can be targeted" + @echo " e.g., psu_cortexa53" + @echo + @echo "current configuration:" + @echo "----------------------" + @echo "make DSA_PATH=$(DSA_PATH)" + @echo " OVERLAY=$(OVERLAY)" + @echo " BOARD=$(BOARD)" + @echo " PROC=$(PROC)" +# @echo " SYSROOT=$(SYSROOT)" + @echo + +exec: cleantest $(OBJECTS) + @mkdir -p $(TEST_DIR) + @echo 'Building Target: $@' + @echo 'Trigerring: SDS++ Linker' + cd $(TEST_DIR) ; $(C_COMP) -Wall $(LDFLAGS) -o $(ELF_FILE) $(OBJECTS) $(EMU_FLAGS) + @echo 'SDx Completed Building Target: $@' + @echo + @tput setaf 2; \ + echo "PASS: Platform successfully tested."; \ + tput sgr0; + @echo + + +$(TEST_DIR)/%.o: $(CURDIR)/%.c + @echo 'Building file: $<' + @echo 'Invoking: SDS++ Compiler' + @mkdir -p $(TEST_DIR) + cd $(TEST_DIR) ; $(C_COMP) $(CFLAGS) -o $(LFLAGS) $(HW_FLAGS) + @echo 'Finished building: $<' + @echo ' ' +ifeq ($(TARGET), cpu_emu) + @echo 'Ignore the warning which states that hw function is not a HW accelerator but has pragma applied for cpu_emu mode' + @echo ' ' +endif + + +cleantest: + @rm -rf $(BUILD_DIR) + +clean: cleantest + rm -rf .build + rm -rf $(BOARD)/platforms + rm -rf ./$(BOARD)/hw + +cleanall: clean + rm -rf ./*/platforms + rm -rf ./*/hw \ No newline at end of file diff --git a/bfs/bulk/Makefile_General b/bfs/bulk/Makefile_General new file mode 100644 index 0000000..12d5a2f --- /dev/null +++ b/bfs/bulk/Makefile_General @@ -0,0 +1,23 @@ +KERN=bfs +ALG=bulk + +CFLAGS?=-O3 -Wall -Wno-unused-label + +SRCS=$(KERN).c local_support.c ../../common/support.c +FILES=$(SRCS) $(KERN).h ../../common/support.h + +$(KERN): $(FILES) ../../common/harness.c + $(CC) $(CFLAGS) -I../../common -o $(KERN) $(SRCS) ../../common/harness.c + +run: $(KERN) input.data check.data + ./$(KERN) input.data check.data + +generate: $(FILES) generate.c + $(CC) $(CFLAGS) -I../../common -o generate $(SRCS) generate.c + ./generate + +hls: $(KERN).c $(KERN).h + vivado_hls hls.tcl + +clean: + rm -f $(KERN) generate output.data diff --git a/bfs/bulk/bfs.c b/bfs/bulk/bfs.c index 689ba94..0370303 100644 --- a/bfs/bulk/bfs.c +++ b/bfs/bulk/bfs.c @@ -10,6 +10,10 @@ void bfs(node_t nodes[N_NODES], edge_t edges[N_EDGES], node_index_t starting_node, level_t level[N_NODES], edge_index_t level_counts[N_LEVELS]) { + + #pragma HLS data_pack variable=nodes + #pragma HLS data_pack variable=edges + node_index_t n; edge_index_t e; level_t horizon; diff --git a/bfs/bulk/bfs.h b/bfs/bulk/bfs.h index cb23edc..59da7b4 100644 --- a/bfs/bulk/bfs.h +++ b/bfs/bulk/bfs.h @@ -1,3 +1,5 @@ +#ifndef __BFS_BULK_HEADER__ +#define __BFS_BULK_HEADER__ /* Implementations based on: Harish and Narayanan. "Accelerating large graph algorithms on the GPU using CUDA." HiPC, 2007. @@ -15,8 +17,8 @@ Hong, Oguntebi, Olukotun. "Efficient Parallel Graph Exploration on Multi-Core CP #define SCALE 8 #define EDGE_FACTOR 16 -#define N_NODES (1LL< Starting the program...\n"); // Parse command line. - char *in_file; - #ifdef CHECK_OUTPUT - char *check_file; - #endif - assert( argc<4 && "Usage: ./benchmark " ); + const char *in_file; +#ifdef CHECK_OUTPUT + const char *check_file; +#endif + + if (argc != 3) + { + printf("x> Usage: sudo %s \n", argv[0]); + return -1; + } + in_file = "input.data"; - #ifdef CHECK_OUTPUT +#ifdef CHECK_OUTPUT check_file = "check.data"; - #endif - if( argc>1 ) +#endif + if (argc > 1) in_file = argv[1]; - #ifdef CHECK_OUTPUT - if( argc>2 ) +#ifdef CHECK_OUTPUT + if (argc > 2) check_file = argv[2]; - #endif +#endif // Load input data int in_fd; char *data; - data = malloc(INPUT_SIZE); - assert( data!=NULL && "Out of memory" ); - in_fd = open( in_file, O_RDONLY ); - assert( in_fd>0 && "Couldn't open input data file"); + data = generic_alloc(INPUT_SIZE); + assert(data != NULL && "Out of memory"); + in_fd = open(in_file, O_RDONLY); + assert(in_fd > 0 && "Couldn't open input data file"); input_to_data(in_fd, data); - + // Unpack and call - run_benchmark( data ); + run_benchmark(data); - #ifdef WRITE_OUTPUT +#ifdef WRITE_OUTPUT int out_fd; - out_fd = open("output.data", O_WRONLY|O_CREAT|O_TRUNC, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH); - assert( out_fd>0 && "Couldn't open output data file" ); + out_fd = open("output.data", O_WRONLY | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH); + assert(out_fd > 0 && "Couldn't open output data file"); data_to_output(out_fd, data); close(out_fd); - #endif +#endif - // Load check data - #ifdef CHECK_OUTPUT +// Load check data +#ifdef CHECK_OUTPUT int check_fd; char *ref; - ref = malloc(INPUT_SIZE); - assert( ref!=NULL && "Out of memory" ); - check_fd = open( check_file, O_RDONLY ); - assert( check_fd>0 && "Couldn't open check data file"); + ref = generic_alloc(INPUT_SIZE); + assert(ref != NULL && "Out of memory"); + check_fd = open(check_file, O_RDONLY); + assert(check_fd > 0 && "Couldn't open check data file"); output_to_data(check_fd, ref); - #endif +#endif - // Validate benchmark results - #ifdef CHECK_OUTPUT - if( !check_data(data, ref) ) { +// Validate benchmark results +#ifdef CHECK_OUTPUT + if (!check_data(data, ref)) + { fprintf(stderr, "Benchmark results are incorrect\n"); return -1; } - #endif - free(data); - free(ref); +#endif + generic_free(data); + generic_free(ref); printf("Success.\n"); return 0; diff --git a/common/utils/sds_utils.h b/common/utils/sds_utils.h new file mode 100644 index 0000000..5d1a00f --- /dev/null +++ b/common/utils/sds_utils.h @@ -0,0 +1,42 @@ +/********** +Copyright (c) 2018, Xilinx, Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors +may be used to endorse or promote products derived from this software +without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********/ +#ifndef SDS_UTILS_H_ +#define SDS_UTILS_H_ +#include +#include "sds_lib.h" +static uint64_t tot, cnt, calls; +inline static void reset() { tot = cnt = calls = 0; } +inline static void start() +{ + cnt = sds_clock_counter(); + calls++; +}; +inline static void stop() { tot += (sds_clock_counter() - cnt); }; +inline static uint64_t avg_cpu_cycles() { return (tot / calls); }; +#endif diff --git a/fft/strided/.gitignore b/fft/strided/.gitignore new file mode 100644 index 0000000..c690061 --- /dev/null +++ b/fft/strided/.gitignore @@ -0,0 +1,2 @@ +build/ +.build/ \ No newline at end of file diff --git a/fft/strided/.vscode/c_cpp_properties.json b/fft/strided/.vscode/c_cpp_properties.json new file mode 100644 index 0000000..437972f --- /dev/null +++ b/fft/strided/.vscode/c_cpp_properties.json @@ -0,0 +1,13 @@ +{ + "configurations": [ + { + "name": "Linux", + "defines": [], + "compilerPath": "/usr/bin/gcc", + "cStandard": "c11", + "cppStandard": "c++17", + "intelliSenseMode": "clang-x64" + } + ], + "version": 4 +} \ No newline at end of file diff --git a/fft/strided/.vscode/launch.json b/fft/strided/.vscode/launch.json new file mode 100644 index 0000000..af41ee8 --- /dev/null +++ b/fft/strided/.vscode/launch.json @@ -0,0 +1,37 @@ +{ + "version": "0.2.0", + "configurations": [ + { + "name": "Build and run host", + "type": "cppdbg", + "request": "launch", + "program": "${workspaceFolder}/build/host", + "args": [ + "./input.data", + "./check.data" + ], + "stopAtEntry": false, + "cwd": "${workspaceFolder}", + /* + "environment": [ + { + "name": "LD_LIBRARY_PATH", + "value": "/tools/Xilinx/Vivado/2018.3/lnx64/tools/opencv/opencv_gcc/" + } + ], + */ + "externalConsole": false, //set to true to see output in cmd instead + "MIMode": "gdb", + "miDebuggerPath": "gdb", + "miDebuggerArgs": "--cd=${workspaceFolder}", + "setupCommands": [ + { + "description": "Enable pretty-printing for gdb", + "text": "-enable-pretty-printing", + "ignoreFailures": true + } + ], + "preLaunchTask": "Biuld host" + }, + ] +} diff --git a/fft/strided/.vscode/settings.json b/fft/strided/.vscode/settings.json new file mode 100644 index 0000000..29af6e9 --- /dev/null +++ b/fft/strided/.vscode/settings.json @@ -0,0 +1,73 @@ +{ + "python.pythonPath": "/usr/include/python3.6", + "files.associations": { + "array": "cpp", + "chrono": "cpp", + "string": "cpp", + "*.tcc": "cpp", + "bitset": "cpp", + "cctype": "cpp", + "clocale": "cpp", + "cmath": "cpp", + "complex": "cpp", + "cstdint": "cpp", + "cstdio": "cpp", + "cstdlib": "cpp", + "cstring": "cpp", + "ctime": "cpp", + "cwchar": "cpp", + "cwctype": "cpp", + "deque": "cpp", + "unordered_map": "cpp", + "vector": "cpp", + "exception": "cpp", + "fstream": "cpp", + "functional": "cpp", + "initializer_list": "cpp", + "iosfwd": "cpp", + "iostream": "cpp", + "istream": "cpp", + "limits": "cpp", + "memory": "cpp", + "new": "cpp", + "ostream": "cpp", + "numeric": "cpp", + "ratio": "cpp", + "sstream": "cpp", + "stdexcept": "cpp", + "streambuf": "cpp", + "system_error": "cpp", + "type_traits": "cpp", + "tuple": "cpp", + "typeinfo": "cpp", + "utility": "cpp", + "cinttypes": "cpp", + "cstdarg": "cpp", + "cstddef": "cpp", + "atomic": "cpp", + "strstream": "cpp", + "cfenv": "cpp", + "list": "cpp", + "iomanip": "cpp", + "mutex": "cpp", + "thread": "cpp", + "typeindex": "cpp", + "optional": "cpp", + "string_view": "cpp", + "valarray": "cpp", + "algorithm": "cpp", + "regex": "cpp", + "iterator": "cpp", + "map": "cpp", + "memory_resource": "cpp", + "random": "cpp", + "set": "cpp", + "condition_variable": "cpp", + "bfs.h": "c" + }, + "C_Cpp.default.includePath": [ + "${workspaceFolder}/", + "${workspaceFolder}/../../common" + ], + "C_Cpp.dimInactiveRegions": false +} diff --git a/fft/strided/.vscode/tasks.json b/fft/strided/.vscode/tasks.json new file mode 100644 index 0000000..5fc847d --- /dev/null +++ b/fft/strided/.vscode/tasks.json @@ -0,0 +1,35 @@ +{ + // See https://go.microsoft.com/fwlink/?LinkId=733558 + // for the documentation about the tasks.json format + "version": "2.0.0", + "tasks": [ + { + "type": "shell", + "label": "Biuld host", + "command": "/usr/bin/g++", + "args": [ + "-g", + "-march=native", + "-O3", + "-opt-prefetch", +// "-std=c++98", + "-I/usr/include/", + "-I${workspaceFolder}/", + "-I${workspaceFolder}/../../common/", + "-I/tools/Xilinx/Vivado/2018.3/include/", + "${workspaceFolder}/fft.c", + "${workspaceFolder}/local_support.c", + "${workspaceFolder}/../../common/support.c", + "${workspaceFolder}/../../common/harness.c", + "-o", + "${workspaceFolder}/build/host", + ], + "options": { + "cwd": "/usr/bin" + }, + "problemMatcher": [ + "$gcc" + ] + } + ] +} \ No newline at end of file diff --git a/fft/strided/.vscode/tasks.json.bak b/fft/strided/.vscode/tasks.json.bak new file mode 100755 index 0000000..db59994 --- /dev/null +++ b/fft/strided/.vscode/tasks.json.bak @@ -0,0 +1,37 @@ +{ + // See https://go.microsoft.com/fwlink/?LinkId=733558 + // for the documentation about the tasks.json format + "version": "2.0.0", + "tasks": [ + { + "type": "shell", + "label": "Biuld host", + "command": "/usr/bin/g++", + "args": [ + "-g", + "-D__DEBUG__", + "-march=native", "-O3","-opt-prefetch", + "-std=c++11", + "-I/usr/include/", + "-I${workspaceFolder}/inc/", + "-I/tools/Xilinx/Vivado/2018.3/include/", + "${workspaceFolder}/src/aux.cpp", + "${workspaceFolder}/src/net_tail.cpp", + "${workspaceFolder}/src/IRB_Big_CU.cpp", + "${workspaceFolder}/src/net_head.cpp", + "${workspaceFolder}/src/net_pwc.cpp", + "${workspaceFolder}/src/host.cpp", + "-L/tools/Xilinx/Vivado/2018.3/lnx64/tools/opencv/opencv_gcc", + "-lopencv_core", "-lopencv_highgui", "-lopencv_features2d", "-lopencv_flann", "-lopencv_imgproc", + "-o", + "${workspaceFolder}/build/host", + ], + "options": { + "cwd": "/usr/bin" + }, + "problemMatcher": [ + "$gcc" + ] + } + ] +} diff --git a/fft/strided/Makefile b/fft/strided/Makefile index a8c150b..8621f9c 100644 --- a/fft/strided/Makefile +++ b/fft/strided/Makefile @@ -1,23 +1,159 @@ -KERN=fft -ALG=strided +#First check if the sysroot is defined. +# Run Target: +# hw - Compile for hardware +# emu - Compile for emulation (Default) +# cpu_emu - Quick compile for cpu emulation trating all HW functions as CPU functions +TARGET := emu -CFLAGS?=-O3 -Wall -Wno-unused-label +#ifndef SYSROOT +#ifeq ($(TARGET), hw) +# $(error SYSROOT is not set) +#endif +#endif -SRCS=$(KERN).c local_support.c ../../common/support.c -FILES=$(SRCS) $(KERN).h ../../common/support.h -$(KERN): $(FILES) ../../common/harness.c - $(CC) $(CFLAGS) -I../../common -o $(KERN) $(SRCS) ../../common/harness.c -lm +DSA_PATH := ./DSA/ultra.dsa +OVERLAY := fft_str +BOARD := ZCU102 +PROC := psu_cortexa53 +TARGET_OS := linux -run: $(KERN) input.data check.data - ./$(KERN) input.data check.data +#Head definition +TOP_FUNCTION = fft +TOP_FILE = fft.c -generate: $(FILES) generate.c - $(CC) $(CFLAGS) -I../../common -o generate $(SRCS) generate.c -lm - ./generate +#----------------- +# 0 -> 75 +# 1 -> 100 +# 2 -> 150 +# 3-> 200 +# 4-> 300 +CLKID = 1 -hls: $(KERN).c $(KERN).h - vivado_hls hls.tcl -clean: - rm -f $(KERN) generate output.data +HW_FLAGS := +ifneq ($(TARGET), cpu_emu) + HW_FLAGS += -sds-hw $(TOP_FUNCTION) $(CURDIR)/$(TOP_FILE) -clkid $(CLKID) -sds-end +endif + +BUILD_DIR := $(CURDIR)/build +TEST_DIR := $(BUILD_DIR)/$(BOARD)-$(OVERLAY) + +# Emulation Mode: +# debug - Include debug data +# optimized - Exclude debug data (Default) +EMU_MODE := optimized + + + +EMU_FLAGS := +ifneq ($(TARGET), hw) + EMU_FLAGS := -mno-bitstream -mno-boot-files -emulation $(EMU_MODE) +endif + + +#CFLAGS = -g -Wall -O3 -c -I$(CURDIR)/inc/ -fno-builtin -D__HW__ -Wno-unused-label #-D__DEBUG__ -D__CHECK_REULTS_PER_LAYER__ +CFLAGS = -Wall -O3 -c -I$(CURDIR)/../../common -I$(CURDIR)/ -fno-builtin -Wno-unused-label +CFLAGS += -MT"$@" -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@)" +LFLAGS = "$@" "$<" +#SDSFLAGS := -sds-pf $(CURDIR)/$(BOARD)/platforms/$(OVERLAY) -target-os linux +SDSFLAGS := -sds-pf zcu102 -target-os $(TARGET_OS) + +LDFLAGS := +#ifeq ($(TARGET), hw) +# LDFLAGS += --sysroot=$(SYSROOT) -Wl,-rpath-link=$(SYSROOT)/lib/aarch64-linux-gnu/,-rpath-link=$(SYSROOT)/usr/lib/aarch64-linux-gnu/,-rpath-link=$(SYSROOT)/usr/lib/ +# LDFLAGS += -lopencv_core -lopencv_highgui -lopencv_imgproc -lGL -lGLU -lglut +#endif + +C_COMP := sdscc $(SDSFLAGS) +OBJECTS = $(TEST_DIR)/fft.o +OBJECTS += $(TEST_DIR)/local_support.o +LOGFILE = $(TEST_DIR)/_sds/reports/sds.log + +# Check Rule Builds the Sources and Executes on Specified Target +check: all +ifneq ($(TARGET), hw) +ifeq ($(TARGET_OS), linux) +ifeq ($(EMU_MODE), optimized) + cp $(CURDIR)/../../utility/emu_run_no_gui.sh $(TEST_DIR)/emu_run.sh +else + cp $(CURDIR)/../../utility/emu_run.sh $(TEST_DIR)/emu_run.sh +endif + cd $(TEST_DIR) ; ./emu_run.sh $(OVERLAY) +endif +else + $(info "This Release Doesn't Support Automated Hardware Execution") +endif + +all: clean help host exec + + +help: + @echo "usage: make" + @echo + @echo "options:" + @echo "--------" + @echo "help: show help message." + @echo "all: make the SDx platform, and test it." + @echo "platform: make the SDx platform based on the input arguments." + @echo "test: do a simple test after a given platform is made." + @echo "cleantest: clean the test folder." + @echo "clean: clean the test and SDx platforms for the given board." + @echo "cleanall: clean all the platforms for a fresh start." + @echo + @echo "arguments:" + @echo "----------" + @echo "DSA_PATH: path to the dsa file" + @echo " e.g., ./platform/hw/hdmi.dsa" + @echo "PROC: name of the processor that can be targeted" + @echo " e.g., psu_cortexa53" + @echo + @echo "current configuration:" + @echo "----------------------" + @echo "make DSA_PATH=$(DSA_PATH)" + @echo " OVERLAY=$(OVERLAY)" + @echo " BOARD=$(BOARD)" + @echo " PROC=$(PROC)" +# @echo " SYSROOT=$(SYSROOT)" + @echo + +host: + $(MAKE) -C $(CURDIR)/../../common + +exec: cleantest $(OBJECTS) + @mkdir -p $(TEST_DIR) + @echo 'Building Target: $@' + @echo 'Trigerring: SDS++ Linker' + cd $(TEST_DIR) ; $(C_COMP) -Wall $(CURDIR)/../../common/build/support.o $(CURDIR)/../../common/build/harness.o $(LDFLAGS) -o $(OVERLAY) $(OBJECTS) $(EMU_FLAGS) + @echo 'SDx Completed Building Target: $@' + @echo + @tput setaf 2; \ + echo "PASS: Platform successfully tested."; \ + tput sgr0; + @echo + + +$(TEST_DIR)/%.o: $(CURDIR)/%.c + @echo 'Building file: $<' + @echo 'Invoking: SDSCC Compiler' + @mkdir -p $(TEST_DIR) + cd $(TEST_DIR) ; $(C_COMP) $(CFLAGS) -o $(LFLAGS) $(HW_FLAGS) + @echo 'Finished building: $<' + @echo ' ' +ifeq ($(TARGET), cpu_emu) + @echo 'Ignore the warning which states that hw function is not a HW accelerator but has pragma applied for cpu_emu mode' + @echo ' ' +endif + + +cleantest: + @rm -rf $(BUILD_DIR) + +clean: cleantest + rm -rf .build + rm -rf $(BOARD)/platforms + rm -rf ./$(BOARD)/hw + +cleanall: clean + rm -rf ./*/platforms + rm -rf ./*/hw diff --git a/fft/strided/Makefile_General b/fft/strided/Makefile_General new file mode 100644 index 0000000..a8c150b --- /dev/null +++ b/fft/strided/Makefile_General @@ -0,0 +1,23 @@ +KERN=fft +ALG=strided + +CFLAGS?=-O3 -Wall -Wno-unused-label + +SRCS=$(KERN).c local_support.c ../../common/support.c +FILES=$(SRCS) $(KERN).h ../../common/support.h + +$(KERN): $(FILES) ../../common/harness.c + $(CC) $(CFLAGS) -I../../common -o $(KERN) $(SRCS) ../../common/harness.c -lm + +run: $(KERN) input.data check.data + ./$(KERN) input.data check.data + +generate: $(FILES) generate.c + $(CC) $(CFLAGS) -I../../common -o generate $(SRCS) generate.c -lm + ./generate + +hls: $(KERN).c $(KERN).h + vivado_hls hls.tcl + +clean: + rm -f $(KERN) generate output.data diff --git a/fft/strided/fft.h b/fft/strided/fft.h index a0bd0ac..8a91b5f 100644 --- a/fft/strided/fft.h +++ b/fft/strided/fft.h @@ -1,3 +1,6 @@ +#ifndef __FFT_STR_HEADER__ +#define __FFT_STR_HEADER__ + #include #include #include "support.h" @@ -5,15 +8,42 @@ #define FFT_SIZE 1024 #define twoPI 6.28318530717959 -void fft(double real[FFT_SIZE], double img[FFT_SIZE], double real_twid[FFT_SIZE/2], double img_twid[FFT_SIZE/2]); +#ifdef __cplusplus +extern "C" +{ +#endif + +#pragma SDS data zero_copy( \ + real [0:FFT_SIZE], \ + img [0:FFT_SIZE], \ + real_twid [0:FFT_SIZE / 2], \ + img_twid [0:FFT_SIZE / 2]) + +#pragma SDS data access_pattern( \ + real \ + : SEQUENTIAL, \ + img \ + : SEQUENTIAL, \ + real_twid \ + : SEQUENTIAL, \ + img_twid \ + : SEQUENTIAL) + void fft(double real[FFT_SIZE], double img[FFT_SIZE], double real_twid[FFT_SIZE / 2], double img_twid[FFT_SIZE / 2]); + +#ifdef __cplusplus +} +#endif //////////////////////////////////////////////////////////////////////////////// // Test harness interface code. -struct bench_args_t { +struct bench_args_t +{ double real[FFT_SIZE]; double img[FFT_SIZE]; - double real_twid[FFT_SIZE/2]; - double img_twid[FFT_SIZE/2]; + double real_twid[FFT_SIZE / 2]; + double img_twid[FFT_SIZE / 2]; }; + +#endif \ No newline at end of file diff --git a/fft/strided/local_support.c b/fft/strided/local_support.c index 6a361da..6fa26a5 100644 --- a/fft/strided/local_support.c +++ b/fft/strided/local_support.c @@ -1,13 +1,34 @@ #include "fft.h" #include +#ifdef __SDSCC__ +#include "utils/sds_utils.h" +#endif + int INPUT_SIZE = sizeof(struct bench_args_t); #define EPSILON ((double)1.0e-6) -void run_benchmark( void *vargs ) { +void run_benchmark(void *vargs) +{ struct bench_args_t *args = (struct bench_args_t *)vargs; - fft(args->real, args->img, args->real_twid, args->img_twid ); +#ifdef __SDSCC__ + reset(); + start(); +#endif + + fft(args->real, args->img, args->real_twid, args->img_twid); + +#ifdef __SDSCC__ + stop(); + + uint64_t compute_Total_avg = avg_cpu_cycles(); + double delay = (compute_Total_avg * (1000000.0 / (sds_clock_frequency()))); + //AP freq is 1.2GHz + printf("-> Number of CPU cycles halted for kernel %llu \t~\t %f(uS).\n", compute_Total_avg, delay); + printf("-> For this AP Thick/S is %d.\n", sds_clock_frequency()); +#endif + } /* Input format: @@ -21,27 +42,29 @@ double: twiddle factor (real part) double: twiddle factor (complex part) */ -void input_to_data(int fd, void *vdata) { +void input_to_data(int fd, void *vdata) +{ struct bench_args_t *data = (struct bench_args_t *)vdata; char *p, *s; // Load input string p = readfile(fd); - s = find_section_start(p,1); + s = find_section_start(p, 1); parse_double_array(s, data->real, FFT_SIZE); - s = find_section_start(p,2); + s = find_section_start(p, 2); parse_double_array(s, data->img, FFT_SIZE); - s = find_section_start(p,3); - parse_double_array(s, data->real_twid, FFT_SIZE/2); + s = find_section_start(p, 3); + parse_double_array(s, data->real_twid, FFT_SIZE / 2); - s = find_section_start(p,4); - parse_double_array(s, data->img_twid, FFT_SIZE/2); + s = find_section_start(p, 4); + parse_double_array(s, data->img_twid, FFT_SIZE / 2); free(p); } -void data_to_input(int fd, void *vdata) { +void data_to_input(int fd, void *vdata) +{ struct bench_args_t *data = (struct bench_args_t *)vdata; write_section_header(fd); @@ -51,10 +74,10 @@ void data_to_input(int fd, void *vdata) { write_double_array(fd, data->img, FFT_SIZE); write_section_header(fd); - write_double_array(fd, data->real_twid, FFT_SIZE/2); + write_double_array(fd, data->real_twid, FFT_SIZE / 2); write_section_header(fd); - write_double_array(fd, data->img_twid, FFT_SIZE/2); + write_double_array(fd, data->img_twid, FFT_SIZE / 2); } /* Output format: @@ -64,23 +87,25 @@ double: freq (real part) double: freq (complex part) */ -void output_to_data(int fd, void *vdata) { +void output_to_data(int fd, void *vdata) +{ struct bench_args_t *data = (struct bench_args_t *)vdata; char *p, *s; // Zero-out everything. - memset(vdata,0,sizeof(struct bench_args_t)); + memset(vdata, 0, sizeof(struct bench_args_t)); // Load input string p = readfile(fd); - s = find_section_start(p,1); + s = find_section_start(p, 1); parse_double_array(s, data->real, FFT_SIZE); - s = find_section_start(p,2); + s = find_section_start(p, 2); parse_double_array(s, data->img, FFT_SIZE); free(p); } -void data_to_output(int fd, void *vdata) { +void data_to_output(int fd, void *vdata) +{ struct bench_args_t *data = (struct bench_args_t *)vdata; write_section_header(fd); @@ -90,22 +115,24 @@ void data_to_output(int fd, void *vdata) { write_double_array(fd, data->img, FFT_SIZE); } -int check_data( void *vdata, void *vref ) { +int check_data(void *vdata, void *vref) +{ struct bench_args_t *data = (struct bench_args_t *)vdata; struct bench_args_t *ref = (struct bench_args_t *)vref; int has_errors = 0; int i; double real_diff, img_diff; - for(i=0; ireal[i] - ref->real[i]; img_diff = data->img[i] - ref->img[i]; - has_errors |= (real_diff<-EPSILON) || (EPSILON 75 +# 1 -> 100 +# 2 -> 150 +# 3-> 200 +# 4-> 300 +CLKID = 1 -hls: $(KERN).c $(KERN).h - vivado_hls hls.tcl -clean: - rm -f $(KERN) generate output.data +HW_FLAGS := +ifneq ($(TARGET), cpu_emu) + HW_FLAGS += -sds-hw $(TOP_FUNCTION) $(CURDIR)/$(TOP_FILE) -clkid $(CLKID) -sds-end +endif + +BUILD_DIR := $(CURDIR)/build +TEST_DIR := $(BUILD_DIR)/$(BOARD)-$(OVERLAY) + +# Emulation Mode: +# debug - Include debug data +# optimized - Exclude debug data (Default) +EMU_MODE := optimized + + + +EMU_FLAGS := +ifneq ($(TARGET), hw) + EMU_FLAGS := -mno-bitstream -mno-boot-files -emulation $(EMU_MODE) +endif + + +#CFLAGS = -g -Wall -O3 -c -I$(CURDIR)/inc/ -fno-builtin -D__HW__ -Wno-unused-label #-D__DEBUG__ -D__CHECK_REULTS_PER_LAYER__ +CFLAGS = -Wall -O3 -c -I$(CURDIR)/../../common -I$(CURDIR)/ -fno-builtin -Wno-unused-label +CFLAGS += -MT"$@" -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@)" +LFLAGS = "$@" "$<" +#SDSFLAGS := -sds-pf $(CURDIR)/$(BOARD)/platforms/$(OVERLAY) -target-os linux +SDSFLAGS := -sds-pf zcu102 -target-os $(TARGET_OS) + +LDFLAGS := +#ifeq ($(TARGET), hw) +# LDFLAGS += --sysroot=$(SYSROOT) -Wl,-rpath-link=$(SYSROOT)/lib/aarch64-linux-gnu/,-rpath-link=$(SYSROOT)/usr/lib/aarch64-linux-gnu/,-rpath-link=$(SYSROOT)/usr/lib/ +# LDFLAGS += -lopencv_core -lopencv_highgui -lopencv_imgproc -lGL -lGLU -lglut +#endif + +C_COMP := sdscc $(SDSFLAGS) +OBJECTS = $(TEST_DIR)/gemm.o +OBJECTS += $(TEST_DIR)/local_support.o +LOGFILE = $(TEST_DIR)/_sds/reports/sds.log + +# Check Rule Builds the Sources and Executes on Specified Target +check: all +ifneq ($(TARGET), hw) +ifeq ($(TARGET_OS), linux) +ifeq ($(EMU_MODE), optimized) + cp $(CURDIR)/../../utility/emu_run_no_gui.sh $(TEST_DIR)/emu_run.sh +else + cp $(CURDIR)/../../utility/emu_run.sh $(TEST_DIR)/emu_run.sh +endif + cd $(TEST_DIR) ; ./emu_run.sh $(OVERLAY) +endif +else + $(info "This Release Doesn't Support Automated Hardware Execution") +endif + +all: clean help host exec + + +help: + @echo "usage: make" + @echo + @echo "options:" + @echo "--------" + @echo "help: show help message." + @echo "all: make the SDx platform, and test it." + @echo "platform: make the SDx platform based on the input arguments." + @echo "test: do a simple test after a given platform is made." + @echo "cleantest: clean the test folder." + @echo "clean: clean the test and SDx platforms for the given board." + @echo "cleanall: clean all the platforms for a fresh start." + @echo + @echo "arguments:" + @echo "----------" + @echo "DSA_PATH: path to the dsa file" + @echo " e.g., ./platform/hw/hdmi.dsa" + @echo "PROC: name of the processor that can be targeted" + @echo " e.g., psu_cortexa53" + @echo + @echo "current configuration:" + @echo "----------------------" + @echo "make DSA_PATH=$(DSA_PATH)" + @echo " OVERLAY=$(OVERLAY)" + @echo " BOARD=$(BOARD)" + @echo " PROC=$(PROC)" +# @echo " SYSROOT=$(SYSROOT)" + @echo + +host: + $(MAKE) -C $(CURDIR)/../../common + +exec: cleantest $(OBJECTS) + @mkdir -p $(TEST_DIR) + @echo 'Building Target: $@' + @echo 'Trigerring: SDS++ Linker' + cd $(TEST_DIR) ; $(C_COMP) -Wall $(CURDIR)/../../common/build/support.o $(CURDIR)/../../common/build/harness.o $(LDFLAGS) -o $(OVERLAY) $(OBJECTS) $(EMU_FLAGS) + @echo 'SDx Completed Building Target: $@' + @echo + @tput setaf 2; \ + echo "PASS: Platform successfully tested."; \ + tput sgr0; + @echo + + +$(TEST_DIR)/%.o: $(CURDIR)/%.c + @echo 'Building file: $<' + @echo 'Invoking: SDSCC Compiler' + @mkdir -p $(TEST_DIR) + cd $(TEST_DIR) ; $(C_COMP) $(CFLAGS) -o $(LFLAGS) $(HW_FLAGS) + @echo 'Finished building: $<' + @echo ' ' +ifeq ($(TARGET), cpu_emu) + @echo 'Ignore the warning which states that hw function is not a HW accelerator but has pragma applied for cpu_emu mode' + @echo ' ' +endif + + +cleantest: + @rm -rf $(BUILD_DIR) + +clean: cleantest + rm -rf .build + rm -rf $(BOARD)/platforms + rm -rf ./$(BOARD)/hw + +cleanall: clean + rm -rf ./*/platforms + rm -rf ./*/hw diff --git a/gemm/ncubed/Makefile_General b/gemm/ncubed/Makefile_General new file mode 100644 index 0000000..6fef6c9 --- /dev/null +++ b/gemm/ncubed/Makefile_General @@ -0,0 +1,23 @@ +KERN=gemm +ALG=ncubed + +CFLAGS?=-O3 -Wall -Wno-unused-label + +SRCS=$(KERN).c local_support.c ../../common/support.c +FILES=$(SRCS) $(KERN).h ../../common/support.h + +$(KERN): $(FILES) ../../common/harness.c + $(CC) $(CFLAGS) -I../../common -o $(KERN) $(SRCS) ../../common/harness.c + +run: $(KERN) input.data check.data + ./$(KERN) input.data check.data + +generate: $(FILES) generate.c + $(CC) $(CFLAGS) -I../../common -o generate $(SRCS) generate.c + ./generate + +hls: $(KERN).c $(KERN).h + vivado_hls hls.tcl + +clean: + rm -f $(KERN) generate output.data diff --git a/gemm/ncubed/gemm.h b/gemm/ncubed/gemm.h index 885e0b9..d254e55 100644 --- a/gemm/ncubed/gemm.h +++ b/gemm/ncubed/gemm.h @@ -9,7 +9,7 @@ //Specify row/column sizes #define row_size 64 #define col_size 64 -#define N row_size*col_size +#define N row_size *col_size //Define the input range to operate over #define MIN 0. @@ -18,11 +18,35 @@ //Set number of iterations to execute #define MAX_ITERATION 1 -void gemm(TYPE m1[N], TYPE m2[N], TYPE prod[N]); +#ifdef __cplusplus +extern "C" +{ +#endif + +#pragma SDS data zero_copy( \ + m1 [0:N], \ + m2 [0:N], \ + prod [0:N]) + +#pragma SDS data access_pattern( \ + m1 \ + : SEQUENTIAL, \ + m2 \ + : SEQUENTIAL, \ + prod \ + : SEQUENTIAL) + + void gemm(TYPE m1[N], TYPE m2[N], TYPE prod[N]); + +#ifdef __cplusplus +} +#endif + //////////////////////////////////////////////////////////////////////////////// // Test harness interface code. -struct bench_args_t { +struct bench_args_t +{ TYPE m1[N]; TYPE m2[N]; TYPE prod[N]; diff --git a/gemm/ncubed/hs_err_pid19132.log b/gemm/ncubed/hs_err_pid19132.log new file mode 100644 index 0000000..649fe51 --- /dev/null +++ b/gemm/ncubed/hs_err_pid19132.log @@ -0,0 +1,11 @@ +# +# An unexpected error has occurred (11) +# +Stack: +/tools/Xilinx/Vivado/2018.3/tps/lnx64/jre9.0.4/lib//server/libjvm.so(+0xb6aadb) [0x7f687f864adb] +/tools/Xilinx/Vivado/2018.3/tps/lnx64/jre9.0.4/lib//server/libjvm.so(JVM_handle_linux_signal+0xbb) [0x7f687f869e1b] +/tools/Xilinx/Vivado/2018.3/tps/lnx64/jre9.0.4/lib//server/libjvm.so(+0xb647b8) [0x7f687f85e7b8] +/lib/x86_64-linux-gnu/libc.so.6(+0x3ef20) [0x7f68c42b2f20] +/tools/Xilinx/Vivado/2018.3/lib/lnx64.o/librdi_common.so(HI18N::Convert::In(wchar_t const*, wchar_t const*, std::__cxx11::basic_string, std::allocator >&, std::codecvt const&)+0x36) [0x7f68c52d4376] +/tools/Xilinx/Vivado/2018.3/lib/lnx64.o/librdi_common.so(Java_ui_data_common_parami_ParamMgr_1getValueAsBool+0x159) [0x7f68c53b65c9] +[0x7f686fed2ca7] diff --git a/gemm/ncubed/local_support.c b/gemm/ncubed/local_support.c index 4d69fa9..52e9e54 100644 --- a/gemm/ncubed/local_support.c +++ b/gemm/ncubed/local_support.c @@ -1,13 +1,32 @@ #include "gemm.h" #include +#ifdef __SDSCC__ +#include "utils/sds_utils.h" +#endif + int INPUT_SIZE = sizeof(struct bench_args_t); #define EPSILON ((TYPE)1.0e-6) -void run_benchmark( void *vargs ) { +void run_benchmark(void *vargs) +{ struct bench_args_t *args = (struct bench_args_t *)vargs; - gemm( args->m1, args->m2, args->prod ); +#ifdef __SDSCC__ + reset(); + start(); +#endif + gemm(args->m1, args->m2, args->prod); + +#ifdef __SDSCC__ + stop(); + + uint64_t compute_Total_avg = avg_cpu_cycles(); + double delay = (compute_Total_avg * (1000000.0 / (sds_clock_frequency()))); + //AP freq is 1.2GHz + printf("-> Number of CPU cycles halted for kernel %llu \t~\t %f(uS).\n", compute_Total_avg, delay); + printf("-> For this AP Thick/S is %d.\n", sds_clock_frequency()); +#endif } /* Input format: @@ -17,31 +36,36 @@ TYPE[N]: matrix 1 TYPE[N]: matrix 2 */ -void input_to_data(int fd, void *vdata) { +void input_to_data(int fd, void *vdata) +{ struct bench_args_t *data = (struct bench_args_t *)vdata; char *p, *s; // Zero-out everything. - memset(vdata,0,sizeof(struct bench_args_t)); + memset(vdata, 0, sizeof(struct bench_args_t)); // Load input string p = readfile(fd); - s = find_section_start(p,1); - STAC(parse_,TYPE,_array)(s, data->m1, N); + s = find_section_start(p, 1); + STAC(parse_, TYPE, _array) + (s, data->m1, N); - s = find_section_start(p,2); - STAC(parse_,TYPE,_array)(s, data->m2, N); + s = find_section_start(p, 2); + STAC(parse_, TYPE, _array) + (s, data->m2, N); free(p); - } -void data_to_input(int fd, void *vdata) { +void data_to_input(int fd, void *vdata) +{ struct bench_args_t *data = (struct bench_args_t *)vdata; write_section_header(fd); - STAC(write_,TYPE,_array)(fd, data->m1, N); + STAC(write_, TYPE, _array) + (fd, data->m1, N); write_section_header(fd); - STAC(write_,TYPE,_array)(fd, data->m2, N); + STAC(write_, TYPE, _array) + (fd, data->m2, N); } /* Output format: @@ -49,35 +73,42 @@ void data_to_input(int fd, void *vdata) { TYPE[N]: output matrix */ -void output_to_data(int fd, void *vdata) { +void output_to_data(int fd, void *vdata) +{ struct bench_args_t *data = (struct bench_args_t *)vdata; char *p, *s; // Load input string p = readfile(fd); - s = find_section_start(p,1); - STAC(parse_,TYPE,_array)(s, data->prod, N); + s = find_section_start(p, 1); + STAC(parse_, TYPE, _array) + (s, data->prod, N); free(p); } -void data_to_output(int fd, void *vdata) { +void data_to_output(int fd, void *vdata) +{ struct bench_args_t *data = (struct bench_args_t *)vdata; write_section_header(fd); - STAC(write_,TYPE,_array)(fd, data->prod, N); + STAC(write_, TYPE, _array) + (fd, data->prod, N); } -int check_data( void *vdata, void *vref ) { +int check_data(void *vdata, void *vref) +{ struct bench_args_t *data = (struct bench_args_t *)vdata; struct bench_args_t *ref = (struct bench_args_t *)vref; int has_errors = 0; - int r,c; + int r, c; TYPE diff; - for( r=0; rprod[r*row_size + c] - ref->prod[r*row_size+c]; - has_errors |= (diff<-EPSILON) || (EPSILONprod[r * row_size + c] - ref->prod[r * row_size + c]; + has_errors |= (diff < -EPSILON) || (EPSILON < diff); } } diff --git a/gemm/ncubed/synth.sh b/gemm/ncubed/synth.sh new file mode 100755 index 0000000..cbb026d --- /dev/null +++ b/gemm/ncubed/synth.sh @@ -0,0 +1,5 @@ +#!/usr/bin/env bash +source /tools/Xilinx/Vivado/2018.3/settings64.sh + +/usr/bin/time -f "Took %E to synthesize the QNet Accl." make TARGET=hw + diff --git a/md/grid/.gitignore b/md/grid/.gitignore new file mode 100644 index 0000000..c690061 --- /dev/null +++ b/md/grid/.gitignore @@ -0,0 +1,2 @@ +build/ +.build/ \ No newline at end of file diff --git a/md/grid/.vscode/c_cpp_properties.json b/md/grid/.vscode/c_cpp_properties.json new file mode 100644 index 0000000..437972f --- /dev/null +++ b/md/grid/.vscode/c_cpp_properties.json @@ -0,0 +1,13 @@ +{ + "configurations": [ + { + "name": "Linux", + "defines": [], + "compilerPath": "/usr/bin/gcc", + "cStandard": "c11", + "cppStandard": "c++17", + "intelliSenseMode": "clang-x64" + } + ], + "version": 4 +} \ No newline at end of file diff --git a/md/grid/.vscode/launch.json b/md/grid/.vscode/launch.json new file mode 100644 index 0000000..af41ee8 --- /dev/null +++ b/md/grid/.vscode/launch.json @@ -0,0 +1,37 @@ +{ + "version": "0.2.0", + "configurations": [ + { + "name": "Build and run host", + "type": "cppdbg", + "request": "launch", + "program": "${workspaceFolder}/build/host", + "args": [ + "./input.data", + "./check.data" + ], + "stopAtEntry": false, + "cwd": "${workspaceFolder}", + /* + "environment": [ + { + "name": "LD_LIBRARY_PATH", + "value": "/tools/Xilinx/Vivado/2018.3/lnx64/tools/opencv/opencv_gcc/" + } + ], + */ + "externalConsole": false, //set to true to see output in cmd instead + "MIMode": "gdb", + "miDebuggerPath": "gdb", + "miDebuggerArgs": "--cd=${workspaceFolder}", + "setupCommands": [ + { + "description": "Enable pretty-printing for gdb", + "text": "-enable-pretty-printing", + "ignoreFailures": true + } + ], + "preLaunchTask": "Biuld host" + }, + ] +} diff --git a/md/grid/.vscode/settings.json b/md/grid/.vscode/settings.json new file mode 100644 index 0000000..29af6e9 --- /dev/null +++ b/md/grid/.vscode/settings.json @@ -0,0 +1,73 @@ +{ + "python.pythonPath": "/usr/include/python3.6", + "files.associations": { + "array": "cpp", + "chrono": "cpp", + "string": "cpp", + "*.tcc": "cpp", + "bitset": "cpp", + "cctype": "cpp", + "clocale": "cpp", + "cmath": "cpp", + "complex": "cpp", + "cstdint": "cpp", + "cstdio": "cpp", + "cstdlib": "cpp", + "cstring": "cpp", + "ctime": "cpp", + "cwchar": "cpp", + "cwctype": "cpp", + "deque": "cpp", + "unordered_map": "cpp", + "vector": "cpp", + "exception": "cpp", + "fstream": "cpp", + "functional": "cpp", + "initializer_list": "cpp", + "iosfwd": "cpp", + "iostream": "cpp", + "istream": "cpp", + "limits": "cpp", + "memory": "cpp", + "new": "cpp", + "ostream": "cpp", + "numeric": "cpp", + "ratio": "cpp", + "sstream": "cpp", + "stdexcept": "cpp", + "streambuf": "cpp", + "system_error": "cpp", + "type_traits": "cpp", + "tuple": "cpp", + "typeinfo": "cpp", + "utility": "cpp", + "cinttypes": "cpp", + "cstdarg": "cpp", + "cstddef": "cpp", + "atomic": "cpp", + "strstream": "cpp", + "cfenv": "cpp", + "list": "cpp", + "iomanip": "cpp", + "mutex": "cpp", + "thread": "cpp", + "typeindex": "cpp", + "optional": "cpp", + "string_view": "cpp", + "valarray": "cpp", + "algorithm": "cpp", + "regex": "cpp", + "iterator": "cpp", + "map": "cpp", + "memory_resource": "cpp", + "random": "cpp", + "set": "cpp", + "condition_variable": "cpp", + "bfs.h": "c" + }, + "C_Cpp.default.includePath": [ + "${workspaceFolder}/", + "${workspaceFolder}/../../common" + ], + "C_Cpp.dimInactiveRegions": false +} diff --git a/md/grid/.vscode/tasks.json b/md/grid/.vscode/tasks.json new file mode 100644 index 0000000..5fc847d --- /dev/null +++ b/md/grid/.vscode/tasks.json @@ -0,0 +1,35 @@ +{ + // See https://go.microsoft.com/fwlink/?LinkId=733558 + // for the documentation about the tasks.json format + "version": "2.0.0", + "tasks": [ + { + "type": "shell", + "label": "Biuld host", + "command": "/usr/bin/g++", + "args": [ + "-g", + "-march=native", + "-O3", + "-opt-prefetch", +// "-std=c++98", + "-I/usr/include/", + "-I${workspaceFolder}/", + "-I${workspaceFolder}/../../common/", + "-I/tools/Xilinx/Vivado/2018.3/include/", + "${workspaceFolder}/fft.c", + "${workspaceFolder}/local_support.c", + "${workspaceFolder}/../../common/support.c", + "${workspaceFolder}/../../common/harness.c", + "-o", + "${workspaceFolder}/build/host", + ], + "options": { + "cwd": "/usr/bin" + }, + "problemMatcher": [ + "$gcc" + ] + } + ] +} \ No newline at end of file diff --git a/md/grid/.vscode/tasks.json.bak b/md/grid/.vscode/tasks.json.bak new file mode 100755 index 0000000..db59994 --- /dev/null +++ b/md/grid/.vscode/tasks.json.bak @@ -0,0 +1,37 @@ +{ + // See https://go.microsoft.com/fwlink/?LinkId=733558 + // for the documentation about the tasks.json format + "version": "2.0.0", + "tasks": [ + { + "type": "shell", + "label": "Biuld host", + "command": "/usr/bin/g++", + "args": [ + "-g", + "-D__DEBUG__", + "-march=native", "-O3","-opt-prefetch", + "-std=c++11", + "-I/usr/include/", + "-I${workspaceFolder}/inc/", + "-I/tools/Xilinx/Vivado/2018.3/include/", + "${workspaceFolder}/src/aux.cpp", + "${workspaceFolder}/src/net_tail.cpp", + "${workspaceFolder}/src/IRB_Big_CU.cpp", + "${workspaceFolder}/src/net_head.cpp", + "${workspaceFolder}/src/net_pwc.cpp", + "${workspaceFolder}/src/host.cpp", + "-L/tools/Xilinx/Vivado/2018.3/lnx64/tools/opencv/opencv_gcc", + "-lopencv_core", "-lopencv_highgui", "-lopencv_features2d", "-lopencv_flann", "-lopencv_imgproc", + "-o", + "${workspaceFolder}/build/host", + ], + "options": { + "cwd": "/usr/bin" + }, + "problemMatcher": [ + "$gcc" + ] + } + ] +} diff --git a/md/grid/Makefile b/md/grid/Makefile index c33e95d..885f65a 100644 --- a/md/grid/Makefile +++ b/md/grid/Makefile @@ -1,23 +1,159 @@ -KERN=md -ALG=grid +#First check if the sysroot is defined. +# Run Target: +# hw - Compile for hardware +# emu - Compile for emulation (Default) +# cpu_emu - Quick compile for cpu emulation trating all HW functions as CPU functions +TARGET := emu -CFLAGS?=-O3 -Wall -Wno-unused-label +#ifndef SYSROOT +#ifeq ($(TARGET), hw) +# $(error SYSROOT is not set) +#endif +#endif -SRCS=$(KERN).c local_support.c ../../common/support.c -FILES=$(SRCS) $(KERN).h ../../common/support.h -$(KERN): $(FILES) ../../common/harness.c - $(CC) $(CFLAGS) -I../../common -o $(KERN) $(SRCS) ../../common/harness.c +DSA_PATH := ./DSA/ultra.dsa +OVERLAY := md_grid +BOARD := ZCU102 +PROC := psu_cortexa53 +TARGET_OS := linux -run: $(KERN) input.data check.data - ./$(KERN) input.data check.data +#Head definition +TOP_FUNCTION = md +TOP_FILE = md.c -generate: $(FILES) generate.c - $(CC) $(CFLAGS) -I../../common -o generate $(SRCS) generate.c - ./generate +#----------------- +# 0 -> 75 +# 1 -> 100 +# 2 -> 150 +# 3-> 200 +# 4-> 300 +CLKID = 1 -hls: $(KERN).c $(KERN).h - vivado_hls hls.tcl -clean: - rm -f $(KERN) generate output.data +HW_FLAGS := +ifneq ($(TARGET), cpu_emu) + HW_FLAGS += -sds-hw $(TOP_FUNCTION) $(CURDIR)/$(TOP_FILE) -clkid $(CLKID) -sds-end +endif + +BUILD_DIR := $(CURDIR)/build +TEST_DIR := $(BUILD_DIR)/$(BOARD)-$(OVERLAY) + +# Emulation Mode: +# debug - Include debug data +# optimized - Exclude debug data (Default) +EMU_MODE := optimized + + + +EMU_FLAGS := +ifneq ($(TARGET), hw) + EMU_FLAGS := -mno-bitstream -mno-boot-files -emulation $(EMU_MODE) +endif + + +#CFLAGS = -g -Wall -O3 -c -I$(CURDIR)/inc/ -fno-builtin -D__HW__ -Wno-unused-label #-D__DEBUG__ -D__CHECK_REULTS_PER_LAYER__ +CFLAGS = -Wall -O3 -c -I$(CURDIR)/../../common -I$(CURDIR)/ -fno-builtin -Wno-unused-label +CFLAGS += -MT"$@" -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@)" +LFLAGS = "$@" "$<" +#SDSFLAGS := -sds-pf $(CURDIR)/$(BOARD)/platforms/$(OVERLAY) -target-os linux +SDSFLAGS := -sds-pf zcu102 -target-os $(TARGET_OS) + +LDFLAGS := +#ifeq ($(TARGET), hw) +# LDFLAGS += --sysroot=$(SYSROOT) -Wl,-rpath-link=$(SYSROOT)/lib/aarch64-linux-gnu/,-rpath-link=$(SYSROOT)/usr/lib/aarch64-linux-gnu/,-rpath-link=$(SYSROOT)/usr/lib/ +# LDFLAGS += -lopencv_core -lopencv_highgui -lopencv_imgproc -lGL -lGLU -lglut +#endif + +C_COMP := sdscc $(SDSFLAGS) +OBJECTS = $(TEST_DIR)/md.o +OBJECTS += $(TEST_DIR)/local_support.o +LOGFILE = $(TEST_DIR)/_sds/reports/sds.log + +# Check Rule Builds the Sources and Executes on Specified Target +check: all +ifneq ($(TARGET), hw) +ifeq ($(TARGET_OS), linux) +ifeq ($(EMU_MODE), optimized) + cp $(CURDIR)/../../utility/emu_run_no_gui.sh $(TEST_DIR)/emu_run.sh +else + cp $(CURDIR)/../../utility/emu_run.sh $(TEST_DIR)/emu_run.sh +endif + cd $(TEST_DIR) ; ./emu_run.sh $(OVERLAY) +endif +else + $(info "This Release Doesn't Support Automated Hardware Execution") +endif + +all: clean help host exec + + +help: + @echo "usage: make" + @echo + @echo "options:" + @echo "--------" + @echo "help: show help message." + @echo "all: make the SDx platform, and test it." + @echo "platform: make the SDx platform based on the input arguments." + @echo "test: do a simple test after a given platform is made." + @echo "cleantest: clean the test folder." + @echo "clean: clean the test and SDx platforms for the given board." + @echo "cleanall: clean all the platforms for a fresh start." + @echo + @echo "arguments:" + @echo "----------" + @echo "DSA_PATH: path to the dsa file" + @echo " e.g., ./platform/hw/hdmi.dsa" + @echo "PROC: name of the processor that can be targeted" + @echo " e.g., psu_cortexa53" + @echo + @echo "current configuration:" + @echo "----------------------" + @echo "make DSA_PATH=$(DSA_PATH)" + @echo " OVERLAY=$(OVERLAY)" + @echo " BOARD=$(BOARD)" + @echo " PROC=$(PROC)" +# @echo " SYSROOT=$(SYSROOT)" + @echo + +host: + $(MAKE) -C $(CURDIR)/../../common + +exec: cleantest $(OBJECTS) + @mkdir -p $(TEST_DIR) + @echo 'Building Target: $@' + @echo 'Trigerring: SDS++ Linker' + cd $(TEST_DIR) ; $(C_COMP) -Wall $(CURDIR)/../../common/build/support.o $(CURDIR)/../../common/build/harness.o $(LDFLAGS) -o $(OVERLAY) $(OBJECTS) $(EMU_FLAGS) + @echo 'SDx Completed Building Target: $@' + @echo + @tput setaf 2; \ + echo "PASS: Platform successfully tested."; \ + tput sgr0; + @echo + + +$(TEST_DIR)/%.o: $(CURDIR)/%.c + @echo 'Building file: $<' + @echo 'Invoking: SDSCC Compiler' + @mkdir -p $(TEST_DIR) + cd $(TEST_DIR) ; $(C_COMP) $(CFLAGS) -o $(LFLAGS) $(HW_FLAGS) + @echo 'Finished building: $<' + @echo ' ' +ifeq ($(TARGET), cpu_emu) + @echo 'Ignore the warning which states that hw function is not a HW accelerator but has pragma applied for cpu_emu mode' + @echo ' ' +endif + + +cleantest: + @rm -rf $(BUILD_DIR) + +clean: cleantest + rm -rf .build + rm -rf $(BOARD)/platforms + rm -rf ./$(BOARD)/hw + +cleanall: clean + rm -rf ./*/platforms + rm -rf ./*/hw diff --git a/md/grid/Makefile_General b/md/grid/Makefile_General new file mode 100644 index 0000000..c33e95d --- /dev/null +++ b/md/grid/Makefile_General @@ -0,0 +1,23 @@ +KERN=md +ALG=grid + +CFLAGS?=-O3 -Wall -Wno-unused-label + +SRCS=$(KERN).c local_support.c ../../common/support.c +FILES=$(SRCS) $(KERN).h ../../common/support.h + +$(KERN): $(FILES) ../../common/harness.c + $(CC) $(CFLAGS) -I../../common -o $(KERN) $(SRCS) ../../common/harness.c + +run: $(KERN) input.data check.data + ./$(KERN) input.data check.data + +generate: $(FILES) generate.c + $(CC) $(CFLAGS) -I../../common -o generate $(SRCS) generate.c + ./generate + +hls: $(KERN).c $(KERN).h + vivado_hls hls.tcl + +clean: + rm -f $(KERN) generate output.data diff --git a/md/grid/local_support.c b/md/grid/local_support.c index 7315b94..6e1079b 100644 --- a/md/grid/local_support.c +++ b/md/grid/local_support.c @@ -1,13 +1,35 @@ #include "md.h" #include +#ifdef __SDSCC__ +#include "utils/sds_utils.h" +#endif + int INPUT_SIZE = sizeof(struct bench_args_t); #define EPSILON ((TYPE)1.0e-6) -void run_benchmark( void *vargs ) { +void run_benchmark(void *vargs) +{ struct bench_args_t *args = (struct bench_args_t *)vargs; - md( args->n_points, args->force, args->position ); + +#ifdef __SDSCC__ + reset(); + start(); +#endif + + md(args->n_points, args->force, args->position); + +#ifdef __SDSCC__ + stop(); + + uint64_t compute_Total_avg = avg_cpu_cycles(); + double delay = (compute_Total_avg * (1000000.0 / (sds_clock_frequency()))); + //AP freq is 1.2GHz + printf("-> Number of CPU cycles halted for kernel %llu \t~\t %f(uS).\n", compute_Total_avg, delay); + printf("-> For this AP Thick/S is %d.\n", sds_clock_frequency()); +#endif + } /* Input format: @@ -17,31 +39,34 @@ int32_t[blockSide^3]: grid populations TYPE[blockSide^3*densityFactor]: positions */ -void input_to_data(int fd, void *vdata) { +void input_to_data(int fd, void *vdata) +{ struct bench_args_t *data = (struct bench_args_t *)vdata; char *p, *s; // Zero-out everything. - memset(vdata,0,sizeof(struct bench_args_t)); + memset(vdata, 0, sizeof(struct bench_args_t)); // Load input string p = readfile(fd); - s = find_section_start(p,1); - parse_int32_t_array(s, (int32_t *)(data->n_points), blockSide*blockSide*blockSide); + s = find_section_start(p, 1); + parse_int32_t_array(s, (int32_t *)(data->n_points), blockSide * blockSide * blockSide); - s = find_section_start(p,2); - STAC(parse_,TYPE,_array)(s, (double *)(data->position), 3*blockSide*blockSide*blockSide*densityFactor); + s = find_section_start(p, 2); + STAC(parse_, TYPE, _array) + (s, (double *)(data->position), 3 * blockSide * blockSide * blockSide * densityFactor); free(p); } -void data_to_input(int fd, void *vdata) { +void data_to_input(int fd, void *vdata) +{ struct bench_args_t *data = (struct bench_args_t *)vdata; write_section_header(fd); - write_int32_t_array(fd, (int32_t *)(data->n_points), blockSide*blockSide*blockSide); + write_int32_t_array(fd, (int32_t *)(data->n_points), blockSide * blockSide * blockSide); write_section_header(fd); - STAC(write_,TYPE,_array)(fd, (double *)(data->position), 3*blockSide*blockSide*blockSide*densityFactor); - + STAC(write_, TYPE, _array) + (fd, (double *)(data->position), 3 * blockSide * blockSide * blockSide * densityFactor); } /* Output format: @@ -49,43 +74,52 @@ void data_to_input(int fd, void *vdata) { TYPE[blockSide^3*densityFactor]: force */ -void output_to_data(int fd, void *vdata) { +void output_to_data(int fd, void *vdata) +{ struct bench_args_t *data = (struct bench_args_t *)vdata; char *p, *s; // Zero-out everything. - memset(vdata,0,sizeof(struct bench_args_t)); + memset(vdata, 0, sizeof(struct bench_args_t)); // Load input string p = readfile(fd); - s = find_section_start(p,1); - STAC(parse_,TYPE,_array)(s, (double *)data->force, 3*blockSide*blockSide*blockSide*densityFactor); + s = find_section_start(p, 1); + STAC(parse_, TYPE, _array) + (s, (double *)data->force, 3 * blockSide * blockSide * blockSide * densityFactor); free(p); } -void data_to_output(int fd, void *vdata) { +void data_to_output(int fd, void *vdata) +{ struct bench_args_t *data = (struct bench_args_t *)vdata; write_section_header(fd); - STAC(write_,TYPE,_array)(fd, (double *)data->force, 3*blockSide*blockSide*blockSide*densityFactor); + STAC(write_, TYPE, _array) + (fd, (double *)data->force, 3 * blockSide * blockSide * blockSide * densityFactor); } -int check_data( void *vdata, void *vref ) { +int check_data(void *vdata, void *vref) +{ struct bench_args_t *data = (struct bench_args_t *)vdata; struct bench_args_t *ref = (struct bench_args_t *)vref; int has_errors = 0; int i, j, k, d; TYPE diff_x, diff_y, diff_z; - for(i=0; iforce[i][j][k][d].x - ref->force[i][j][k][d].x; diff_y = data->force[i][j][k][d].y - ref->force[i][j][k][d].y; diff_z = data->force[i][j][k][d].z - ref->force[i][j][k][d].z; - has_errors |= (diff_x<-EPSILON) || (EPSILON(y) ? (x) : (y) ) +#define MIN(x, y) ((x) < (y) ? (x) : (y)) +#define MAX(x, y) ((x) > (y) ? (x) : (y)) -void md( int n_points[blockSide][blockSide][blockSide], - dvector_t force[blockSide][blockSide][blockSide][densityFactor], - dvector_t position[blockSide][blockSide][blockSide][densityFactor] ) +void md(int n_points[blockSide][blockSide][blockSide], + dvector_t force[blockSide][blockSide][blockSide][densityFactor], + dvector_t position[blockSide][blockSide][blockSide][densityFactor]) { ivector_t b0, b1; // b0 is the current block, b1 is b0 or a neighboring block - dvector_t p, q; // p is a point in b0, q is a point in either b0 or b1 + dvector_t p, q; // p is a point in b0, q is a point in either b0 or b1 int32_t p_idx, q_idx; TYPE dx, dy, dz, r2inv, r6inv, potential, f; - // Iterate over the grid, block by block - loop_grid0_x: for( b0.x=0; b0.x 75 +# 1 -> 100 +# 2 -> 150 +# 3-> 200 +# 4-> 300 +CLKID = 1 -hls: $(KERN).c $(KERN).h - vivado_hls hls.tcl -clean: - rm -f $(KERN) generate output.data +HW_FLAGS := +ifneq ($(TARGET), cpu_emu) + HW_FLAGS += -sds-hw $(TOP_FUNCTION) $(CURDIR)/$(TOP_FILE) -clkid $(CLKID) -sds-end +endif + +BUILD_DIR := $(CURDIR)/build +TEST_DIR := $(BUILD_DIR)/$(BOARD)-$(OVERLAY) + +# Emulation Mode: +# debug - Include debug data +# optimized - Exclude debug data (Default) +EMU_MODE := optimized + + + +EMU_FLAGS := +ifneq ($(TARGET), hw) + EMU_FLAGS := -mno-bitstream -mno-boot-files -emulation $(EMU_MODE) +endif + + +#CFLAGS = -g -Wall -O3 -c -I$(CURDIR)/inc/ -fno-builtin -D__HW__ -Wno-unused-label #-D__DEBUG__ -D__CHECK_REULTS_PER_LAYER__ +CFLAGS = -Wall -O3 -c -I$(CURDIR)/../../common -I$(CURDIR)/ -fno-builtin -Wno-unused-label +CFLAGS += -MT"$@" -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@)" +LFLAGS = "$@" "$<" +#SDSFLAGS := -sds-pf $(CURDIR)/$(BOARD)/platforms/$(OVERLAY) -target-os linux +SDSFLAGS := -sds-pf zcu102 -target-os $(TARGET_OS) + +LDFLAGS := +#ifeq ($(TARGET), hw) +# LDFLAGS += --sysroot=$(SYSROOT) -Wl,-rpath-link=$(SYSROOT)/lib/aarch64-linux-gnu/,-rpath-link=$(SYSROOT)/usr/lib/aarch64-linux-gnu/,-rpath-link=$(SYSROOT)/usr/lib/ +# LDFLAGS += -lopencv_core -lopencv_highgui -lopencv_imgproc -lGL -lGLU -lglut +#endif + +C_COMP := sdscc $(SDSFLAGS) +OBJECTS = $(TEST_DIR)/md.o +OBJECTS += $(TEST_DIR)/local_support.o +LOGFILE = $(TEST_DIR)/_sds/reports/sds.log + +# Check Rule Builds the Sources and Executes on Specified Target +check: all +ifneq ($(TARGET), hw) +ifeq ($(TARGET_OS), linux) +ifeq ($(EMU_MODE), optimized) + cp $(CURDIR)/../../utility/emu_run_no_gui.sh $(TEST_DIR)/emu_run.sh +else + cp $(CURDIR)/../../utility/emu_run.sh $(TEST_DIR)/emu_run.sh +endif + cd $(TEST_DIR) ; ./emu_run.sh $(OVERLAY) +endif +else + $(info "This Release Doesn't Support Automated Hardware Execution") +endif + +all: clean help host exec + + +help: + @echo "usage: make" + @echo + @echo "options:" + @echo "--------" + @echo "help: show help message." + @echo "all: make the SDx platform, and test it." + @echo "platform: make the SDx platform based on the input arguments." + @echo "test: do a simple test after a given platform is made." + @echo "cleantest: clean the test folder." + @echo "clean: clean the test and SDx platforms for the given board." + @echo "cleanall: clean all the platforms for a fresh start." + @echo + @echo "arguments:" + @echo "----------" + @echo "DSA_PATH: path to the dsa file" + @echo " e.g., ./platform/hw/hdmi.dsa" + @echo "PROC: name of the processor that can be targeted" + @echo " e.g., psu_cortexa53" + @echo + @echo "current configuration:" + @echo "----------------------" + @echo "make DSA_PATH=$(DSA_PATH)" + @echo " OVERLAY=$(OVERLAY)" + @echo " BOARD=$(BOARD)" + @echo " PROC=$(PROC)" +# @echo " SYSROOT=$(SYSROOT)" + @echo + +host: + $(MAKE) -C $(CURDIR)/../../common + +exec: cleantest $(OBJECTS) + @mkdir -p $(TEST_DIR) + @echo 'Building Target: $@' + @echo 'Trigerring: SDS++ Linker' + cd $(TEST_DIR) ; $(C_COMP) -Wall $(CURDIR)/../../common/build/support.o $(CURDIR)/../../common/build/harness.o $(LDFLAGS) -o $(OVERLAY) $(OBJECTS) $(EMU_FLAGS) + @echo 'SDx Completed Building Target: $@' + @echo + @tput setaf 2; \ + echo "PASS: Platform successfully tested."; \ + tput sgr0; + @echo + + +$(TEST_DIR)/%.o: $(CURDIR)/%.c + @echo 'Building file: $<' + @echo 'Invoking: SDSCC Compiler' + @mkdir -p $(TEST_DIR) + cd $(TEST_DIR) ; $(C_COMP) $(CFLAGS) -o $(LFLAGS) $(HW_FLAGS) + @echo 'Finished building: $<' + @echo ' ' +ifeq ($(TARGET), cpu_emu) + @echo 'Ignore the warning which states that hw function is not a HW accelerator but has pragma applied for cpu_emu mode' + @echo ' ' +endif + + +cleantest: + @rm -rf $(BUILD_DIR) + +clean: cleantest + rm -rf .build + rm -rf $(BOARD)/platforms + rm -rf ./$(BOARD)/hw + +cleanall: clean + rm -rf ./*/platforms + rm -rf ./*/hw diff --git a/md/knn/Makefile_General b/md/knn/Makefile_General new file mode 100644 index 0000000..b130461 --- /dev/null +++ b/md/knn/Makefile_General @@ -0,0 +1,23 @@ +KERN=md +ALG=knn + +CFLAGS?=-O3 -Wall -Wno-unused-label + +SRCS=$(KERN).c local_support.c ../../common/support.c +FILES=$(SRCS) $(KERN).h ../../common/support.h + +$(KERN): $(FILES) ../../common/harness.c + $(CC) $(CFLAGS) -I../../common -o $(KERN) $(SRCS) ../../common/harness.c + +run: $(KERN) input.data check.data + ./$(KERN) input.data check.data + +generate: $(FILES) generate.c + $(CC) $(CFLAGS) -I../../common -o generate $(SRCS) generate.c + ./generate + +hls: $(KERN).c $(KERN).h + vivado_hls hls.tcl + +clean: + rm -f $(KERN) generate output.data diff --git a/md/knn/local_support.c b/md/knn/local_support.c index eb92b37..bc486e5 100644 --- a/md/knn/local_support.c +++ b/md/knn/local_support.c @@ -1,15 +1,37 @@ #include "md.h" #include +#ifdef __SDSCC__ +#include "utils/sds_utils.h" +#endif + int INPUT_SIZE = sizeof(struct bench_args_t); #define EPSILON ((TYPE)1.0e-6) -void run_benchmark( void *vargs ) { +void run_benchmark(void *vargs) +{ struct bench_args_t *args = (struct bench_args_t *)vargs; - md_kernel( args->force_x, args->force_y, args->force_z, - args->position_x, args->position_y, args->position_z, - args->NL ); + +#ifdef __SDSCC__ + reset(); + start(); +#endif + + md_kernel(args->force_x, args->force_y, args->force_z, + args->position_x, args->position_y, args->position_z, + args->NL); + +#ifdef __SDSCC__ + stop(); + + uint64_t compute_Total_avg = avg_cpu_cycles(); + double delay = (compute_Total_avg * (1000000.0 / (sds_clock_frequency()))); + //AP freq is 1.2GHz + printf("-> Number of CPU cycles halted for kernel %llu \t~\t %f(uS).\n", compute_Total_avg, delay); + printf("-> For this AP Thick/S is %d.\n", sds_clock_frequency()); +#endif + } /* Input format: @@ -23,43 +45,50 @@ TYPE[nAtoms]: z positions int32_t[nAtoms*maxNeighbors]: neighbor list */ -void input_to_data(int fd, void *vdata) { +void input_to_data(int fd, void *vdata) +{ struct bench_args_t *data = (struct bench_args_t *)vdata; char *p, *s; // Zero-out everything. - memset(vdata,0,sizeof(struct bench_args_t)); + memset(vdata, 0, sizeof(struct bench_args_t)); // Load input string p = readfile(fd); - s = find_section_start(p,1); - STAC(parse_,TYPE,_array)(s, data->position_x, nAtoms); + s = find_section_start(p, 1); + STAC(parse_, TYPE, _array) + (s, data->position_x, nAtoms); - s = find_section_start(p,2); - STAC(parse_,TYPE,_array)(s, data->position_y, nAtoms); + s = find_section_start(p, 2); + STAC(parse_, TYPE, _array) + (s, data->position_y, nAtoms); - s = find_section_start(p,3); - STAC(parse_,TYPE,_array)(s, data->position_z, nAtoms); + s = find_section_start(p, 3); + STAC(parse_, TYPE, _array) + (s, data->position_z, nAtoms); - s = find_section_start(p,4); - parse_int32_t_array(s, data->NL, nAtoms*maxNeighbors); + s = find_section_start(p, 4); + parse_int32_t_array(s, data->NL, nAtoms * maxNeighbors); free(p); } -void data_to_input(int fd, void *vdata) { +void data_to_input(int fd, void *vdata) +{ struct bench_args_t *data = (struct bench_args_t *)vdata; write_section_header(fd); - STAC(write_,TYPE,_array)(fd, data->position_x, nAtoms); + STAC(write_, TYPE, _array) + (fd, data->position_x, nAtoms); write_section_header(fd); - STAC(write_,TYPE,_array)(fd, data->position_y, nAtoms); + STAC(write_, TYPE, _array) + (fd, data->position_y, nAtoms); write_section_header(fd); - STAC(write_,TYPE,_array)(fd, data->position_z, nAtoms); + STAC(write_, TYPE, _array) + (fd, data->position_z, nAtoms); write_section_header(fd); - write_int32_t_array(fd, data->NL, nAtoms*maxNeighbors); - + write_int32_t_array(fd, data->NL, nAtoms * maxNeighbors); } /* Output format: @@ -71,52 +100,62 @@ TYPE[nAtoms]: new y force TYPE[nAtoms]: new z force */ -void output_to_data(int fd, void *vdata) { +void output_to_data(int fd, void *vdata) +{ struct bench_args_t *data = (struct bench_args_t *)vdata; char *p, *s; // Zero-out everything. - memset(vdata,0,sizeof(struct bench_args_t)); + memset(vdata, 0, sizeof(struct bench_args_t)); // Load input string p = readfile(fd); - s = find_section_start(p,1); - STAC(parse_,TYPE,_array)(s, data->force_x, nAtoms); + s = find_section_start(p, 1); + STAC(parse_, TYPE, _array) + (s, data->force_x, nAtoms); - s = find_section_start(p,2); - STAC(parse_,TYPE,_array)(s, data->force_y, nAtoms); + s = find_section_start(p, 2); + STAC(parse_, TYPE, _array) + (s, data->force_y, nAtoms); - s = find_section_start(p,3); - STAC(parse_,TYPE,_array)(s, data->force_z, nAtoms); + s = find_section_start(p, 3); + STAC(parse_, TYPE, _array) + (s, data->force_z, nAtoms); free(p); } -void data_to_output(int fd, void *vdata) { +void data_to_output(int fd, void *vdata) +{ struct bench_args_t *data = (struct bench_args_t *)vdata; write_section_header(fd); - STAC(write_,TYPE,_array)(fd, data->force_x, nAtoms); + STAC(write_, TYPE, _array) + (fd, data->force_x, nAtoms); write_section_header(fd); - STAC(write_,TYPE,_array)(fd, data->force_y, nAtoms); + STAC(write_, TYPE, _array) + (fd, data->force_y, nAtoms); write_section_header(fd); - STAC(write_,TYPE,_array)(fd, data->force_z, nAtoms); + STAC(write_, TYPE, _array) + (fd, data->force_z, nAtoms); } -int check_data( void *vdata, void *vref ) { +int check_data(void *vdata, void *vref) +{ struct bench_args_t *data = (struct bench_args_t *)vdata; struct bench_args_t *ref = (struct bench_args_t *)vref; int has_errors = 0; int i; TYPE diff_x, diff_y, diff_z; - for( i=0; iforce_x[i] - ref->force_x[i]; diff_y = data->force_y[i] - ref->force_y[i]; diff_z = data->force_z[i] - ref->force_z[i]; - has_errors |= (diff_x<-EPSILON) || (EPSILON 75 +# 1 -> 100 +# 2 -> 150 +# 3-> 200 +# 4-> 300 +CLKID = 1 -hls: $(KERN).c $(KERN).h - vivado_hls hls.tcl -clean: - rm -f $(KERN) generate output.data +HW_FLAGS := +ifneq ($(TARGET), cpu_emu) + HW_FLAGS += -sds-hw $(TOP_FUNCTION) $(CURDIR)/$(TOP_FILE) -clkid $(CLKID) -sds-end +endif + +BUILD_DIR := $(CURDIR)/build +TEST_DIR := $(BUILD_DIR)/$(BOARD)-$(OVERLAY) + +# Emulation Mode: +# debug - Include debug data +# optimized - Exclude debug data (Default) +EMU_MODE := optimized + + + +EMU_FLAGS := +ifneq ($(TARGET), hw) + EMU_FLAGS := -mno-bitstream -mno-boot-files -emulation $(EMU_MODE) +endif + + +#CFLAGS = -g -Wall -O3 -c -I$(CURDIR)/inc/ -fno-builtin -D__HW__ -Wno-unused-label #-D__DEBUG__ -D__CHECK_REULTS_PER_LAYER__ +CFLAGS = -Wall -O3 -c -I$(CURDIR)/../../common -I$(CURDIR)/ -fno-builtin -Wno-unused-label +CFLAGS += -MT"$@" -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@)" +LFLAGS = "$@" "$<" +#SDSFLAGS := -sds-pf $(CURDIR)/$(BOARD)/platforms/$(OVERLAY) -target-os linux +SDSFLAGS := -sds-pf zcu102 -target-os $(TARGET_OS) + +LDFLAGS := +#ifeq ($(TARGET), hw) +# LDFLAGS += --sysroot=$(SYSROOT) -Wl,-rpath-link=$(SYSROOT)/lib/aarch64-linux-gnu/,-rpath-link=$(SYSROOT)/usr/lib/aarch64-linux-gnu/,-rpath-link=$(SYSROOT)/usr/lib/ +# LDFLAGS += -lopencv_core -lopencv_highgui -lopencv_imgproc -lGL -lGLU -lglut +#endif + +C_COMP := sdscc $(SDSFLAGS) +OBJECTS = $(TEST_DIR)/nw.o +OBJECTS += $(TEST_DIR)/local_support.o +LOGFILE = $(TEST_DIR)/_sds/reports/sds.log + +# Check Rule Builds the Sources and Executes on Specified Target +check: all +ifneq ($(TARGET), hw) +ifeq ($(TARGET_OS), linux) +ifeq ($(EMU_MODE), optimized) + cp $(CURDIR)/../../utility/emu_run_no_gui.sh $(TEST_DIR)/emu_run.sh +else + cp $(CURDIR)/../../utility/emu_run.sh $(TEST_DIR)/emu_run.sh +endif + cd $(TEST_DIR) ; ./emu_run.sh $(OVERLAY) +endif +else + $(info "This Release Doesn't Support Automated Hardware Execution") +endif + +all: clean help host exec + + +help: + @echo "usage: make" + @echo + @echo "options:" + @echo "--------" + @echo "help: show help message." + @echo "all: make the SDx platform, and test it." + @echo "platform: make the SDx platform based on the input arguments." + @echo "test: do a simple test after a given platform is made." + @echo "cleantest: clean the test folder." + @echo "clean: clean the test and SDx platforms for the given board." + @echo "cleanall: clean all the platforms for a fresh start." + @echo + @echo "arguments:" + @echo "----------" + @echo "DSA_PATH: path to the dsa file" + @echo " e.g., ./platform/hw/hdmi.dsa" + @echo "PROC: name of the processor that can be targeted" + @echo " e.g., psu_cortexa53" + @echo + @echo "current configuration:" + @echo "----------------------" + @echo "make DSA_PATH=$(DSA_PATH)" + @echo " OVERLAY=$(OVERLAY)" + @echo " BOARD=$(BOARD)" + @echo " PROC=$(PROC)" +# @echo " SYSROOT=$(SYSROOT)" + @echo + +host: + $(MAKE) -C $(CURDIR)/../../common + +exec: cleantest $(OBJECTS) + @mkdir -p $(TEST_DIR) + @echo 'Building Target: $@' + @echo 'Trigerring: SDS++ Linker' + cd $(TEST_DIR) ; $(C_COMP) -Wall $(CURDIR)/../../common/build/support.o $(CURDIR)/../../common/build/harness.o $(LDFLAGS) -o $(OVERLAY) $(OBJECTS) $(EMU_FLAGS) + @echo 'SDx Completed Building Target: $@' + @echo + @tput setaf 2; \ + echo "PASS: Platform successfully tested."; \ + tput sgr0; + @echo + + +$(TEST_DIR)/%.o: $(CURDIR)/%.c + @echo 'Building file: $<' + @echo 'Invoking: SDSCC Compiler' + @mkdir -p $(TEST_DIR) + cd $(TEST_DIR) ; $(C_COMP) $(CFLAGS) -o $(LFLAGS) $(HW_FLAGS) + @echo 'Finished building: $<' + @echo ' ' +ifeq ($(TARGET), cpu_emu) + @echo 'Ignore the warning which states that hw function is not a HW accelerator but has pragma applied for cpu_emu mode' + @echo ' ' +endif + + +cleantest: + @rm -rf $(BUILD_DIR) + +clean: cleantest + rm -rf .build + rm -rf $(BOARD)/platforms + rm -rf ./$(BOARD)/hw + +cleanall: clean + rm -rf ./*/platforms + rm -rf ./*/hw diff --git a/nw/nw/Makefile_General b/nw/nw/Makefile_General new file mode 100644 index 0000000..f2b0f67 --- /dev/null +++ b/nw/nw/Makefile_General @@ -0,0 +1,23 @@ +KERN=nw +ALG=nw + +CFLAGS?=-O3 -Wall -Wno-unused-label + +SRCS=$(KERN).c local_support.c ../../common/support.c +FILES=$(SRCS) $(KERN).h ../../common/support.h + +$(KERN): $(FILES) ../../common/harness.c + $(CC) $(CFLAGS) -I../../common -o $(KERN) $(SRCS) ../../common/harness.c + +run: $(KERN) input.data check.data + ./$(KERN) input.data check.data + +generate: $(FILES) generate.c + $(CC) $(CFLAGS) -I../../common -o generate $(SRCS) generate.c + ./generate + +hls: $(KERN).c $(KERN).h + vivado_hls hls.tcl + +clean: + rm -f $(KERN) generate output.data diff --git a/nw/nw/local_support.c b/nw/nw/local_support.c index 1d45871..fbeafde 100644 --- a/nw/nw/local_support.c +++ b/nw/nw/local_support.c @@ -1,11 +1,32 @@ #include "nw.h" #include +#ifdef __SDSCC__ +#include "utils/sds_utils.h" +#endif + int INPUT_SIZE = sizeof(struct bench_args_t); -void run_benchmark( void *vargs ) { +void run_benchmark(void *vargs) +{ struct bench_args_t *args = (struct bench_args_t *)vargs; - needwun( args->seqA, args->seqB, args->alignedA, args->alignedB, args->M, args->ptr); + +#ifdef __SDSCC__ + reset(); + start(); +#endif + + needwun(args->seqA, args->seqB, args->alignedA, args->alignedB, args->M, args->ptr); + +#ifdef __SDSCC__ + stop(); + + uint64_t compute_Total_avg = avg_cpu_cycles(); + double delay = (compute_Total_avg * (1000000.0 / (sds_clock_frequency()))); + //AP freq is 1.2GHz + printf("-> Number of CPU cycles halted for kernel %llu \t~\t %f(uS).\n", compute_Total_avg, delay); + printf("-> For this AP Thick/S is %d.\n", sds_clock_frequency()); +#endif } /* Input format: @@ -15,24 +36,25 @@ char[]: sequence A char[]: sequence B */ -void input_to_data(int fd, void *vdata) { +void input_to_data(int fd, void *vdata) +{ struct bench_args_t *data = (struct bench_args_t *)vdata; char *p, *s; // Zero-out everything. - memset(vdata,0,sizeof(struct bench_args_t)); + memset(vdata, 0, sizeof(struct bench_args_t)); // Load input string p = readfile(fd); - s = find_section_start(p,1); + s = find_section_start(p, 1); parse_string(s, data->seqA, ALEN); - s = find_section_start(p,2); + s = find_section_start(p, 2); parse_string(s, data->seqB, BLEN); free(p); - } -void data_to_input(int fd, void *vdata) { +void data_to_input(int fd, void *vdata) +{ struct bench_args_t *data = (struct bench_args_t *)vdata; write_section_header(fd); @@ -51,41 +73,44 @@ char[sum_size]: aligned sequence A char[sum_size]: aligned sequence B */ -void output_to_data(int fd, void *vdata) { +void output_to_data(int fd, void *vdata) +{ struct bench_args_t *data = (struct bench_args_t *)vdata; char *p, *s; // Zero-out everything. - memset(vdata,0,sizeof(struct bench_args_t)); + memset(vdata, 0, sizeof(struct bench_args_t)); // Load input string p = readfile(fd); - s = find_section_start(p,1); - parse_string(s, data->alignedA, ALEN+BLEN); + s = find_section_start(p, 1); + parse_string(s, data->alignedA, ALEN + BLEN); - s = find_section_start(p,2); - parse_string(s, data->alignedB, ALEN+BLEN); + s = find_section_start(p, 2); + parse_string(s, data->alignedB, ALEN + BLEN); free(p); } -void data_to_output(int fd, void *vdata) { +void data_to_output(int fd, void *vdata) +{ struct bench_args_t *data = (struct bench_args_t *)vdata; write_section_header(fd); - write_string(fd, data->alignedA, ALEN+BLEN); + write_string(fd, data->alignedA, ALEN + BLEN); write_section_header(fd); - write_string(fd, data->alignedB, ALEN+BLEN); + write_string(fd, data->alignedB, ALEN + BLEN); write_section_header(fd); } -int check_data( void *vdata, void *vref ) { +int check_data(void *vdata, void *vref) +{ struct bench_args_t *data = (struct bench_args_t *)vdata; struct bench_args_t *ref = (struct bench_args_t *)vref; int has_errors = 0; - has_errors |= memcmp(data->alignedA, ref->alignedA, ALEN+BLEN); - has_errors |= memcmp(data->alignedB, ref->alignedB, ALEN+BLEN); + has_errors |= memcmp(data->alignedA, ref->alignedA, ALEN + BLEN); + has_errors |= memcmp(data->alignedB, ref->alignedB, ALEN + BLEN); // Return true if it's correct. return !has_errors; diff --git a/nw/nw/nw.c b/nw/nw/nw.c index 3d21a6a..3b47cf0 100644 --- a/nw/nw/nw.c +++ b/nw/nw/nw.c @@ -8,48 +8,65 @@ #define SKIPA '^' #define SKIPB '<' -#define MAX(A,B) ( ((A)>(B))?(A):(B) ) +#define MAX(A, B) (((A) > (B)) ? (A) : (B)) void needwun(char SEQA[ALEN], char SEQB[BLEN], - char alignedA[ALEN+BLEN], char alignedB[ALEN+BLEN], - int M[(ALEN+1)*(BLEN+1)], char ptr[(ALEN+1)*(BLEN+1)]){ + char alignedA[ALEN + BLEN], char alignedB[ALEN + BLEN], + int M[(ALEN + 1) * (BLEN + 1)], char ptr[(ALEN + 1) * (BLEN + 1)]) +{ int score, up_left, up, left, max; int row, row_up, r; int a_idx, b_idx; int a_str_idx, b_str_idx; - init_row: for(a_idx=0; a_idx<(ALEN+1); a_idx++){ +init_row: + for (a_idx = 0; a_idx < (ALEN + 1); a_idx++) + { M[a_idx] = a_idx * GAP_SCORE; } - init_col: for(b_idx=0; b_idx<(BLEN+1); b_idx++){ - M[b_idx*(ALEN+1)] = b_idx * GAP_SCORE; +init_col: + for (b_idx = 0; b_idx < (BLEN + 1); b_idx++) + { + M[b_idx * (ALEN + 1)] = b_idx * GAP_SCORE; } - // Matrix filling loop - fill_out: for(b_idx=1; b_idx<(BLEN+1); b_idx++){ - fill_in: for(a_idx=1; a_idx<(ALEN+1); a_idx++){ - if(SEQA[a_idx-1] == SEQB[b_idx-1]){ +// Matrix filling loop +fill_out: + for (b_idx = 1; b_idx < (BLEN + 1); b_idx++) + { + fill_in: + for (a_idx = 1; a_idx < (ALEN + 1); a_idx++) + { + if (SEQA[a_idx - 1] == SEQB[b_idx - 1]) + { score = MATCH_SCORE; - } else { + } + else + { score = MISMATCH_SCORE; } - row_up = (b_idx-1)*(ALEN+1); - row = (b_idx)*(ALEN+1); + row_up = (b_idx - 1) * (ALEN + 1); + row = (b_idx) * (ALEN + 1); - up_left = M[row_up + (a_idx-1)] + score; - up = M[row_up + (a_idx )] + GAP_SCORE; - left = M[row + (a_idx-1)] + GAP_SCORE; + up_left = M[row_up + (a_idx - 1)] + score; + up = M[row_up + (a_idx)] + GAP_SCORE; + left = M[row + (a_idx - 1)] + GAP_SCORE; max = MAX(up_left, MAX(up, left)); M[row + a_idx] = max; - if(max == left){ + if (max == left) + { ptr[row + a_idx] = SKIPB; - } else if(max == up){ + } + else if (max == up) + { ptr[row + a_idx] = SKIPA; - } else{ + } + else + { ptr[row + a_idx] = ALIGN; } } @@ -61,31 +78,40 @@ void needwun(char SEQA[ALEN], char SEQB[BLEN], a_str_idx = 0; b_str_idx = 0; - trace: while(a_idx>0 || b_idx>0) { - r = b_idx*(ALEN+1); - if (ptr[r + a_idx] == ALIGN){ - alignedA[a_str_idx++] = SEQA[a_idx-1]; - alignedB[b_str_idx++] = SEQB[b_idx-1]; +trace: + while (a_idx > 0 || b_idx > 0) + { + r = b_idx * (ALEN + 1); + if (ptr[r + a_idx] == ALIGN) + { + alignedA[a_str_idx++] = SEQA[a_idx - 1]; + alignedB[b_str_idx++] = SEQB[b_idx - 1]; a_idx--; b_idx--; } - else if (ptr[r + a_idx] == SKIPB){ - alignedA[a_str_idx++] = SEQA[a_idx-1]; + else if (ptr[r + a_idx] == SKIPB) + { + alignedA[a_str_idx++] = SEQA[a_idx - 1]; alignedB[b_str_idx++] = '-'; a_idx--; } - else{ // SKIPA + else + { // SKIPA alignedA[a_str_idx++] = '-'; - alignedB[b_str_idx++] = SEQB[b_idx-1]; + alignedB[b_str_idx++] = SEQB[b_idx - 1]; b_idx--; } } - // Pad the result - pad_a: for( ; a_str_idx 75 +# 1 -> 100 +# 2 -> 150 +# 3-> 200 +# 4-> 300 +CLKID = 1 -hls: $(KERN).c $(KERN).h - vivado_hls hls.tcl -clean: - rm -f $(KERN) generate output.data +HW_FLAGS := +ifneq ($(TARGET), cpu_emu) + HW_FLAGS += -sds-hw $(TOP_FUNCTION) $(CURDIR)/$(TOP_FILE) -clkid $(CLKID) -sds-end +endif + +BUILD_DIR := $(CURDIR)/build +TEST_DIR := $(BUILD_DIR)/$(BOARD)-$(OVERLAY) + +# Emulation Mode: +# debug - Include debug data +# optimized - Exclude debug data (Default) +EMU_MODE := optimized + + + +EMU_FLAGS := +ifneq ($(TARGET), hw) + EMU_FLAGS := -mno-bitstream -mno-boot-files -emulation $(EMU_MODE) +endif + + +#CFLAGS = -g -Wall -O3 -c -I$(CURDIR)/inc/ -fno-builtin -D__HW__ -Wno-unused-label #-D__DEBUG__ -D__CHECK_REULTS_PER_LAYER__ +CFLAGS = -Wall -O3 -c -I$(CURDIR)/../../common -I$(CURDIR)/ -fno-builtin -Wno-unused-label +CFLAGS += -MT"$@" -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@)" +LFLAGS = "$@" "$<" +#SDSFLAGS := -sds-pf $(CURDIR)/$(BOARD)/platforms/$(OVERLAY) -target-os linux +SDSFLAGS := -sds-pf zcu102 -target-os $(TARGET_OS) + +LDFLAGS := +#ifeq ($(TARGET), hw) +# LDFLAGS += --sysroot=$(SYSROOT) -Wl,-rpath-link=$(SYSROOT)/lib/aarch64-linux-gnu/,-rpath-link=$(SYSROOT)/usr/lib/aarch64-linux-gnu/,-rpath-link=$(SYSROOT)/usr/lib/ +# LDFLAGS += -lopencv_core -lopencv_highgui -lopencv_imgproc -lGL -lGLU -lglut +#endif + +C_COMP := sdscc $(SDSFLAGS) +OBJECTS = $(TEST_DIR)/stencil.o +OBJECTS += $(TEST_DIR)/local_support.o +LOGFILE = $(TEST_DIR)/_sds/reports/sds.log + +# Check Rule Builds the Sources and Executes on Specified Target +check: all +ifneq ($(TARGET), hw) +ifeq ($(TARGET_OS), linux) +ifeq ($(EMU_MODE), optimized) + cp $(CURDIR)/../../utility/emu_run_no_gui.sh $(TEST_DIR)/emu_run.sh +else + cp $(CURDIR)/../../utility/emu_run.sh $(TEST_DIR)/emu_run.sh +endif + cd $(TEST_DIR) ; ./emu_run.sh $(OVERLAY) +endif +else + $(info "This Release Doesn't Support Automated Hardware Execution") +endif + +all: clean help host exec + + +help: + @echo "usage: make" + @echo + @echo "options:" + @echo "--------" + @echo "help: show help message." + @echo "all: make the SDx platform, and test it." + @echo "platform: make the SDx platform based on the input arguments." + @echo "test: do a simple test after a given platform is made." + @echo "cleantest: clean the test folder." + @echo "clean: clean the test and SDx platforms for the given board." + @echo "cleanall: clean all the platforms for a fresh start." + @echo + @echo "arguments:" + @echo "----------" + @echo "DSA_PATH: path to the dsa file" + @echo " e.g., ./platform/hw/hdmi.dsa" + @echo "PROC: name of the processor that can be targeted" + @echo " e.g., psu_cortexa53" + @echo + @echo "current configuration:" + @echo "----------------------" + @echo "make DSA_PATH=$(DSA_PATH)" + @echo " OVERLAY=$(OVERLAY)" + @echo " BOARD=$(BOARD)" + @echo " PROC=$(PROC)" +# @echo " SYSROOT=$(SYSROOT)" + @echo + +host: + $(MAKE) -C $(CURDIR)/../../common + +exec: cleantest $(OBJECTS) + @mkdir -p $(TEST_DIR) + @echo 'Building Target: $@' + @echo 'Trigerring: SDS++ Linker' + cd $(TEST_DIR) ; $(C_COMP) -Wall $(CURDIR)/../../common/build/support.o $(CURDIR)/../../common/build/harness.o $(LDFLAGS) -o $(OVERLAY) $(OBJECTS) $(EMU_FLAGS) + @echo 'SDx Completed Building Target: $@' + @echo + @tput setaf 2; \ + echo "PASS: Platform successfully tested."; \ + tput sgr0; + @echo + + +$(TEST_DIR)/%.o: $(CURDIR)/%.c + @echo 'Building file: $<' + @echo 'Invoking: SDSCC Compiler' + @mkdir -p $(TEST_DIR) + cd $(TEST_DIR) ; $(C_COMP) $(CFLAGS) -o $(LFLAGS) $(HW_FLAGS) + @echo 'Finished building: $<' + @echo ' ' +ifeq ($(TARGET), cpu_emu) + @echo 'Ignore the warning which states that hw function is not a HW accelerator but has pragma applied for cpu_emu mode' + @echo ' ' +endif + + +cleantest: + @rm -rf $(BUILD_DIR) + +clean: cleantest + rm -rf .build + rm -rf $(BOARD)/platforms + rm -rf ./$(BOARD)/hw + +cleanall: clean + rm -rf ./*/platforms + rm -rf ./*/hw diff --git a/stencil/stencil2d/Makefile_General b/stencil/stencil2d/Makefile_General new file mode 100644 index 0000000..66a6052 --- /dev/null +++ b/stencil/stencil2d/Makefile_General @@ -0,0 +1,23 @@ +KERN=stencil +ALG=stencil2d + +CFLAGS?=-O3 -Wall -Wno-unused-label + +SRCS=$(KERN).c local_support.c ../../common/support.c +FILES=$(SRCS) $(KERN).h ../../common/support.h + +$(KERN): $(FILES) ../../common/harness.c + $(CC) $(CFLAGS) -I../../common -o $(KERN) $(SRCS) ../../common/harness.c + +run: $(KERN) input.data check.data + ./$(KERN) input.data check.data + +generate: $(FILES) generate.c + $(CC) $(CFLAGS) -I../../common -o generate $(SRCS) generate.c + ./generate + +hls: $(KERN).c $(KERN).h + vivado_hls hls.tcl + +clean: + rm -f $(KERN) generate output.data diff --git a/stencil/stencil2d/local_support.c b/stencil/stencil2d/local_support.c index 1e44c1e..7af2d7e 100644 --- a/stencil/stencil2d/local_support.c +++ b/stencil/stencil2d/local_support.c @@ -1,13 +1,34 @@ #include "stencil.h" #include +#ifdef __SDSCC__ +#include "utils/sds_utils.h" +#endif + int INPUT_SIZE = sizeof(struct bench_args_t); #define EPSILON (1.0e-6) -void run_benchmark( void *vargs ) { +void run_benchmark(void *vargs) +{ struct bench_args_t *args = (struct bench_args_t *)vargs; - stencil( args->orig, args->sol, args->filter ); +#ifdef __SDSCC__ + reset(); + start(); +#endif + + stencil(args->orig, args->sol, args->filter); + +#ifdef __SDSCC__ + stop(); + + uint64_t compute_Total_avg = avg_cpu_cycles(); + double delay = (compute_Total_avg * (1000000.0 / (sds_clock_frequency()))); + //AP freq is 1.2GHz + printf("-> Number of CPU cycles halted for kernel %llu \t~\t %f(uS).\n", compute_Total_avg, delay); + printf("-> For this AP Thick/S is %d.\n", sds_clock_frequency()); +#endif + } /* Input format: @@ -17,30 +38,36 @@ TYPE[row_size*col_size]: input matrix TYPE[f_size]: filter coefficients */ -void input_to_data(int fd, void *vdata) { +void input_to_data(int fd, void *vdata) +{ struct bench_args_t *data = (struct bench_args_t *)vdata; char *p, *s; // Zero-out everything. - memset(vdata,0,sizeof(struct bench_args_t)); + memset(vdata, 0, sizeof(struct bench_args_t)); // Load input string p = readfile(fd); - s = find_section_start(p,1); - STAC(parse_,TYPE,_array)(s, data->orig, row_size*col_size); + s = find_section_start(p, 1); + STAC(parse_, TYPE, _array) + (s, data->orig, row_size * col_size); - s = find_section_start(p,2); - STAC(parse_,TYPE,_array)(s, data->filter, f_size); + s = find_section_start(p, 2); + STAC(parse_, TYPE, _array) + (s, data->filter, f_size); free(p); } -void data_to_input(int fd, void *vdata) { +void data_to_input(int fd, void *vdata) +{ struct bench_args_t *data = (struct bench_args_t *)vdata; write_section_header(fd); - STAC(write_,TYPE,_array)(fd, data->orig, row_size*col_size); + STAC(write_, TYPE, _array) + (fd, data->orig, row_size * col_size); write_section_header(fd); - STAC(write_,TYPE,_array)(fd, data->filter, f_size); + STAC(write_, TYPE, _array) + (fd, data->filter, f_size); } /* Output format: @@ -48,37 +75,44 @@ void data_to_input(int fd, void *vdata) { TYPE[row_size*col_size]: solution matrix */ -void output_to_data(int fd, void *vdata) { +void output_to_data(int fd, void *vdata) +{ struct bench_args_t *data = (struct bench_args_t *)vdata; char *p, *s; // Zero-out everything. - memset(vdata,0,sizeof(struct bench_args_t)); + memset(vdata, 0, sizeof(struct bench_args_t)); // Load input string p = readfile(fd); - s = find_section_start(p,1); - STAC(parse_,TYPE,_array)(s, data->sol, row_size*col_size); + s = find_section_start(p, 1); + STAC(parse_, TYPE, _array) + (s, data->sol, row_size * col_size); free(p); } -void data_to_output(int fd, void *vdata) { +void data_to_output(int fd, void *vdata) +{ struct bench_args_t *data = (struct bench_args_t *)vdata; write_section_header(fd); - STAC(write_,TYPE,_array)(fd, data->sol, row_size*col_size); + STAC(write_, TYPE, _array) + (fd, data->sol, row_size * col_size); } -int check_data( void *vdata, void *vref ) { +int check_data(void *vdata, void *vref) +{ struct bench_args_t *data = (struct bench_args_t *)vdata; struct bench_args_t *ref = (struct bench_args_t *)vref; int has_errors = 0; int row, col; TYPE diff; - for(row=0; rowsol[row*col_size + col] - ref->sol[row*col_size + col]; - has_errors |= (diff<-EPSILON) || (EPSILONsol[row * col_size + col] - ref->sol[row * col_size + col]; + has_errors |= (diff < -EPSILON) || (EPSILON < diff); } } diff --git a/stencil/stencil2d/stencil.h b/stencil/stencil2d/stencil.h index a1532da..4bfce8b 100644 --- a/stencil/stencil2d/stencil.h +++ b/stencil/stencil2d/stencil.h @@ -15,15 +15,29 @@ //Set number of iterations to execute #define MAX_ITERATION 1 -void stencil( TYPE orig[row_size * col_size], - TYPE sol[row_size * col_size], - TYPE filter[f_size] ); +#pragma SDS data zero_copy( \ + orig [0:row_size * col_size], \ + sol [0:row_size * col_size], \ + filter [0:f_size]) + +#pragma SDS data access_pattern( \ + orig \ + : SEQUENTIAL, \ + sol \ + : SEQUENTIAL, \ + filter \ + : SEQUENTIAL) + +void stencil(TYPE orig[row_size * col_size], + TYPE sol[row_size * col_size], + TYPE filter[f_size]); //////////////////////////////////////////////////////////////////////////////// // Test harness interface code. -struct bench_args_t { - TYPE orig[row_size*col_size]; - TYPE sol[row_size*col_size]; +struct bench_args_t +{ + TYPE orig[row_size * col_size]; + TYPE sol[row_size * col_size]; TYPE filter[f_size]; }; diff --git a/stencil/stencil2d/synth.sh b/stencil/stencil2d/synth.sh new file mode 100755 index 0000000..cbb026d --- /dev/null +++ b/stencil/stencil2d/synth.sh @@ -0,0 +1,5 @@ +#!/usr/bin/env bash +source /tools/Xilinx/Vivado/2018.3/settings64.sh + +/usr/bin/time -f "Took %E to synthesize the QNet Accl." make TARGET=hw + diff --git a/stencil/stencil3d/.gitignore b/stencil/stencil3d/.gitignore new file mode 100644 index 0000000..c690061 --- /dev/null +++ b/stencil/stencil3d/.gitignore @@ -0,0 +1,2 @@ +build/ +.build/ \ No newline at end of file diff --git a/stencil/stencil3d/.vscode/c_cpp_properties.json b/stencil/stencil3d/.vscode/c_cpp_properties.json new file mode 100644 index 0000000..437972f --- /dev/null +++ b/stencil/stencil3d/.vscode/c_cpp_properties.json @@ -0,0 +1,13 @@ +{ + "configurations": [ + { + "name": "Linux", + "defines": [], + "compilerPath": "/usr/bin/gcc", + "cStandard": "c11", + "cppStandard": "c++17", + "intelliSenseMode": "clang-x64" + } + ], + "version": 4 +} \ No newline at end of file diff --git a/stencil/stencil3d/.vscode/launch.json b/stencil/stencil3d/.vscode/launch.json new file mode 100644 index 0000000..af41ee8 --- /dev/null +++ b/stencil/stencil3d/.vscode/launch.json @@ -0,0 +1,37 @@ +{ + "version": "0.2.0", + "configurations": [ + { + "name": "Build and run host", + "type": "cppdbg", + "request": "launch", + "program": "${workspaceFolder}/build/host", + "args": [ + "./input.data", + "./check.data" + ], + "stopAtEntry": false, + "cwd": "${workspaceFolder}", + /* + "environment": [ + { + "name": "LD_LIBRARY_PATH", + "value": "/tools/Xilinx/Vivado/2018.3/lnx64/tools/opencv/opencv_gcc/" + } + ], + */ + "externalConsole": false, //set to true to see output in cmd instead + "MIMode": "gdb", + "miDebuggerPath": "gdb", + "miDebuggerArgs": "--cd=${workspaceFolder}", + "setupCommands": [ + { + "description": "Enable pretty-printing for gdb", + "text": "-enable-pretty-printing", + "ignoreFailures": true + } + ], + "preLaunchTask": "Biuld host" + }, + ] +} diff --git a/stencil/stencil3d/.vscode/settings.json b/stencil/stencil3d/.vscode/settings.json new file mode 100644 index 0000000..29af6e9 --- /dev/null +++ b/stencil/stencil3d/.vscode/settings.json @@ -0,0 +1,73 @@ +{ + "python.pythonPath": "/usr/include/python3.6", + "files.associations": { + "array": "cpp", + "chrono": "cpp", + "string": "cpp", + "*.tcc": "cpp", + "bitset": "cpp", + "cctype": "cpp", + "clocale": "cpp", + "cmath": "cpp", + "complex": "cpp", + "cstdint": "cpp", + "cstdio": "cpp", + "cstdlib": "cpp", + "cstring": "cpp", + "ctime": "cpp", + "cwchar": "cpp", + "cwctype": "cpp", + "deque": "cpp", + "unordered_map": "cpp", + "vector": "cpp", + "exception": "cpp", + "fstream": "cpp", + "functional": "cpp", + "initializer_list": "cpp", + "iosfwd": "cpp", + "iostream": "cpp", + "istream": "cpp", + "limits": "cpp", + "memory": "cpp", + "new": "cpp", + "ostream": "cpp", + "numeric": "cpp", + "ratio": "cpp", + "sstream": "cpp", + "stdexcept": "cpp", + "streambuf": "cpp", + "system_error": "cpp", + "type_traits": "cpp", + "tuple": "cpp", + "typeinfo": "cpp", + "utility": "cpp", + "cinttypes": "cpp", + "cstdarg": "cpp", + "cstddef": "cpp", + "atomic": "cpp", + "strstream": "cpp", + "cfenv": "cpp", + "list": "cpp", + "iomanip": "cpp", + "mutex": "cpp", + "thread": "cpp", + "typeindex": "cpp", + "optional": "cpp", + "string_view": "cpp", + "valarray": "cpp", + "algorithm": "cpp", + "regex": "cpp", + "iterator": "cpp", + "map": "cpp", + "memory_resource": "cpp", + "random": "cpp", + "set": "cpp", + "condition_variable": "cpp", + "bfs.h": "c" + }, + "C_Cpp.default.includePath": [ + "${workspaceFolder}/", + "${workspaceFolder}/../../common" + ], + "C_Cpp.dimInactiveRegions": false +} diff --git a/stencil/stencil3d/.vscode/tasks.json b/stencil/stencil3d/.vscode/tasks.json new file mode 100644 index 0000000..5fc847d --- /dev/null +++ b/stencil/stencil3d/.vscode/tasks.json @@ -0,0 +1,35 @@ +{ + // See https://go.microsoft.com/fwlink/?LinkId=733558 + // for the documentation about the tasks.json format + "version": "2.0.0", + "tasks": [ + { + "type": "shell", + "label": "Biuld host", + "command": "/usr/bin/g++", + "args": [ + "-g", + "-march=native", + "-O3", + "-opt-prefetch", +// "-std=c++98", + "-I/usr/include/", + "-I${workspaceFolder}/", + "-I${workspaceFolder}/../../common/", + "-I/tools/Xilinx/Vivado/2018.3/include/", + "${workspaceFolder}/fft.c", + "${workspaceFolder}/local_support.c", + "${workspaceFolder}/../../common/support.c", + "${workspaceFolder}/../../common/harness.c", + "-o", + "${workspaceFolder}/build/host", + ], + "options": { + "cwd": "/usr/bin" + }, + "problemMatcher": [ + "$gcc" + ] + } + ] +} \ No newline at end of file diff --git a/stencil/stencil3d/.vscode/tasks.json.bak b/stencil/stencil3d/.vscode/tasks.json.bak new file mode 100755 index 0000000..db59994 --- /dev/null +++ b/stencil/stencil3d/.vscode/tasks.json.bak @@ -0,0 +1,37 @@ +{ + // See https://go.microsoft.com/fwlink/?LinkId=733558 + // for the documentation about the tasks.json format + "version": "2.0.0", + "tasks": [ + { + "type": "shell", + "label": "Biuld host", + "command": "/usr/bin/g++", + "args": [ + "-g", + "-D__DEBUG__", + "-march=native", "-O3","-opt-prefetch", + "-std=c++11", + "-I/usr/include/", + "-I${workspaceFolder}/inc/", + "-I/tools/Xilinx/Vivado/2018.3/include/", + "${workspaceFolder}/src/aux.cpp", + "${workspaceFolder}/src/net_tail.cpp", + "${workspaceFolder}/src/IRB_Big_CU.cpp", + "${workspaceFolder}/src/net_head.cpp", + "${workspaceFolder}/src/net_pwc.cpp", + "${workspaceFolder}/src/host.cpp", + "-L/tools/Xilinx/Vivado/2018.3/lnx64/tools/opencv/opencv_gcc", + "-lopencv_core", "-lopencv_highgui", "-lopencv_features2d", "-lopencv_flann", "-lopencv_imgproc", + "-o", + "${workspaceFolder}/build/host", + ], + "options": { + "cwd": "/usr/bin" + }, + "problemMatcher": [ + "$gcc" + ] + } + ] +} diff --git a/stencil/stencil3d/Makefile b/stencil/stencil3d/Makefile index b93b14f..11ef7f0 100644 --- a/stencil/stencil3d/Makefile +++ b/stencil/stencil3d/Makefile @@ -1,23 +1,159 @@ -KERN=stencil -ALG=stencil3d +#First check if the sysroot is defined. +# Run Target: +# hw - Compile for hardware +# emu - Compile for emulation (Default) +# cpu_emu - Quick compile for cpu emulation trating all HW functions as CPU functions +TARGET := emu -CFLAGS?=-O3 -Wall -Wno-unused-label +#ifndef SYSROOT +#ifeq ($(TARGET), hw) +# $(error SYSROOT is not set) +#endif +#endif -SRCS=$(KERN).c local_support.c ../../common/support.c -FILES=$(SRCS) $(KERN).h ../../common/support.h -$(KERN): $(FILES) ../../common/harness.c - $(CC) $(CFLAGS) -I../../common -o $(KERN) $(SRCS) ../../common/harness.c +DSA_PATH := ./DSA/ultra.dsa +OVERLAY := stencil_3d +BOARD := ZCU102 +PROC := psu_cortexa53 +TARGET_OS := linux -run: $(KERN) input.data check.data - ./$(KERN) input.data check.data +#Head definition +TOP_FUNCTION = stencil3d +TOP_FILE = stencil.c -generate: $(FILES) generate.c - $(CC) $(CFLAGS) -I../../common -o generate $(SRCS) generate.c - ./generate +#----------------- +# 0 -> 75 +# 1 -> 100 +# 2 -> 150 +# 3-> 200 +# 4-> 300 +CLKID = 1 -hls: $(KERN).c $(KERN).h - vivado_hls hls.tcl -clean: - rm -f $(KERN) generate output.data +HW_FLAGS := +ifneq ($(TARGET), cpu_emu) + HW_FLAGS += -sds-hw $(TOP_FUNCTION) $(CURDIR)/$(TOP_FILE) -clkid $(CLKID) -sds-end +endif + +BUILD_DIR := $(CURDIR)/build +TEST_DIR := $(BUILD_DIR)/$(BOARD)-$(OVERLAY) + +# Emulation Mode: +# debug - Include debug data +# optimized - Exclude debug data (Default) +EMU_MODE := optimized + + + +EMU_FLAGS := +ifneq ($(TARGET), hw) + EMU_FLAGS := -mno-bitstream -mno-boot-files -emulation $(EMU_MODE) +endif + + +#CFLAGS = -g -Wall -O3 -c -I$(CURDIR)/inc/ -fno-builtin -D__HW__ -Wno-unused-label #-D__DEBUG__ -D__CHECK_REULTS_PER_LAYER__ +CFLAGS = -Wall -O3 -c -I$(CURDIR)/../../common -I$(CURDIR)/ -fno-builtin -Wno-unused-label +CFLAGS += -MT"$@" -MMD -MP -MF"$(@:%.o=%.d)" -MT"$(@)" +LFLAGS = "$@" "$<" +#SDSFLAGS := -sds-pf $(CURDIR)/$(BOARD)/platforms/$(OVERLAY) -target-os linux +SDSFLAGS := -sds-pf zcu102 -target-os $(TARGET_OS) + +LDFLAGS := +#ifeq ($(TARGET), hw) +# LDFLAGS += --sysroot=$(SYSROOT) -Wl,-rpath-link=$(SYSROOT)/lib/aarch64-linux-gnu/,-rpath-link=$(SYSROOT)/usr/lib/aarch64-linux-gnu/,-rpath-link=$(SYSROOT)/usr/lib/ +# LDFLAGS += -lopencv_core -lopencv_highgui -lopencv_imgproc -lGL -lGLU -lglut +#endif + +C_COMP := sdscc $(SDSFLAGS) +OBJECTS = $(TEST_DIR)/stencil.o +OBJECTS += $(TEST_DIR)/local_support.o +LOGFILE = $(TEST_DIR)/_sds/reports/sds.log + +# Check Rule Builds the Sources and Executes on Specified Target +check: all +ifneq ($(TARGET), hw) +ifeq ($(TARGET_OS), linux) +ifeq ($(EMU_MODE), optimized) + cp $(CURDIR)/../../utility/emu_run_no_gui.sh $(TEST_DIR)/emu_run.sh +else + cp $(CURDIR)/../../utility/emu_run.sh $(TEST_DIR)/emu_run.sh +endif + cd $(TEST_DIR) ; ./emu_run.sh $(OVERLAY) +endif +else + $(info "This Release Doesn't Support Automated Hardware Execution") +endif + +all: clean help host exec + + +help: + @echo "usage: make" + @echo + @echo "options:" + @echo "--------" + @echo "help: show help message." + @echo "all: make the SDx platform, and test it." + @echo "platform: make the SDx platform based on the input arguments." + @echo "test: do a simple test after a given platform is made." + @echo "cleantest: clean the test folder." + @echo "clean: clean the test and SDx platforms for the given board." + @echo "cleanall: clean all the platforms for a fresh start." + @echo + @echo "arguments:" + @echo "----------" + @echo "DSA_PATH: path to the dsa file" + @echo " e.g., ./platform/hw/hdmi.dsa" + @echo "PROC: name of the processor that can be targeted" + @echo " e.g., psu_cortexa53" + @echo + @echo "current configuration:" + @echo "----------------------" + @echo "make DSA_PATH=$(DSA_PATH)" + @echo " OVERLAY=$(OVERLAY)" + @echo " BOARD=$(BOARD)" + @echo " PROC=$(PROC)" +# @echo " SYSROOT=$(SYSROOT)" + @echo + +host: + $(MAKE) -C $(CURDIR)/../../common + +exec: cleantest $(OBJECTS) + @mkdir -p $(TEST_DIR) + @echo 'Building Target: $@' + @echo 'Trigerring: SDS++ Linker' + cd $(TEST_DIR) ; $(C_COMP) -Wall $(CURDIR)/../../common/build/support.o $(CURDIR)/../../common/build/harness.o $(LDFLAGS) -o $(OVERLAY) $(OBJECTS) $(EMU_FLAGS) + @echo 'SDx Completed Building Target: $@' + @echo + @tput setaf 2; \ + echo "PASS: Platform successfully tested."; \ + tput sgr0; + @echo + + +$(TEST_DIR)/%.o: $(CURDIR)/%.c + @echo 'Building file: $<' + @echo 'Invoking: SDSCC Compiler' + @mkdir -p $(TEST_DIR) + cd $(TEST_DIR) ; $(C_COMP) $(CFLAGS) -o $(LFLAGS) $(HW_FLAGS) + @echo 'Finished building: $<' + @echo ' ' +ifeq ($(TARGET), cpu_emu) + @echo 'Ignore the warning which states that hw function is not a HW accelerator but has pragma applied for cpu_emu mode' + @echo ' ' +endif + + +cleantest: + @rm -rf $(BUILD_DIR) + +clean: cleantest + rm -rf .build + rm -rf $(BOARD)/platforms + rm -rf ./$(BOARD)/hw + +cleanall: clean + rm -rf ./*/platforms + rm -rf ./*/hw diff --git a/stencil/stencil3d/Makefile_General b/stencil/stencil3d/Makefile_General new file mode 100644 index 0000000..b93b14f --- /dev/null +++ b/stencil/stencil3d/Makefile_General @@ -0,0 +1,23 @@ +KERN=stencil +ALG=stencil3d + +CFLAGS?=-O3 -Wall -Wno-unused-label + +SRCS=$(KERN).c local_support.c ../../common/support.c +FILES=$(SRCS) $(KERN).h ../../common/support.h + +$(KERN): $(FILES) ../../common/harness.c + $(CC) $(CFLAGS) -I../../common -o $(KERN) $(SRCS) ../../common/harness.c + +run: $(KERN) input.data check.data + ./$(KERN) input.data check.data + +generate: $(FILES) generate.c + $(CC) $(CFLAGS) -I../../common -o generate $(SRCS) generate.c + ./generate + +hls: $(KERN).c $(KERN).h + vivado_hls hls.tcl + +clean: + rm -f $(KERN) generate output.data diff --git a/stencil/stencil3d/local_support.c b/stencil/stencil3d/local_support.c index 0c7c5f4..b03d044 100644 --- a/stencil/stencil3d/local_support.c +++ b/stencil/stencil3d/local_support.c @@ -1,13 +1,33 @@ #include "stencil.h" #include +#ifdef __SDSCC__ +#include "utils/sds_utils.h" +#endif + int INPUT_SIZE = sizeof(struct bench_args_t); #define EPSILON (1.0e-6) -void run_benchmark( void *vargs ) { +void run_benchmark(void *vargs) +{ struct bench_args_t *args = (struct bench_args_t *)vargs; - stencil3d( args->C, args->orig, args->sol ); +#ifdef __SDSCC__ + reset(); + start(); +#endif + stencil3d(args->C, args->orig, args->sol); + +#ifdef __SDSCC__ + stop(); + + uint64_t compute_Total_avg = avg_cpu_cycles(); + double delay = (compute_Total_avg * (1000000.0 / (sds_clock_frequency()))); + //AP freq is 1.2GHz + printf("-> Number of CPU cycles halted for kernel %llu \t~\t %f(uS).\n", compute_Total_avg, delay); + printf("-> For this AP Thick/S is %d.\n", sds_clock_frequency()); +#endif + } /* Input format: @@ -17,28 +37,34 @@ TYPE[2]: stencil coefficients (inner/outer) TYPE[SIZE]: input matrix */ -void input_to_data(int fd, void *vdata) { +void input_to_data(int fd, void *vdata) +{ struct bench_args_t *data = (struct bench_args_t *)vdata; char *p, *s; // Load input string p = readfile(fd); - s = find_section_start(p,1); - STAC(parse_,TYPE,_array)(s, data->C, 2); + s = find_section_start(p, 1); + STAC(parse_, TYPE, _array) + (s, data->C, 2); - s = find_section_start(p,2); - STAC(parse_,TYPE,_array)(s, data->orig, SIZE); + s = find_section_start(p, 2); + STAC(parse_, TYPE, _array) + (s, data->orig, SIZE); free(p); } -void data_to_input(int fd, void *vdata) { +void data_to_input(int fd, void *vdata) +{ struct bench_args_t *data = (struct bench_args_t *)vdata; write_section_header(fd); - STAC(write_,TYPE,_array)(fd, data->C, 2); + STAC(write_, TYPE, _array) + (fd, data->C, 2); write_section_header(fd); - STAC(write_,TYPE,_array)(fd, data->orig, SIZE); + STAC(write_, TYPE, _array) + (fd, data->orig, SIZE); } /* Output format: @@ -46,34 +72,40 @@ void data_to_input(int fd, void *vdata) { TYPE[SIZE]: solution matrix */ -void output_to_data(int fd, void *vdata) { +void output_to_data(int fd, void *vdata) +{ struct bench_args_t *data = (struct bench_args_t *)vdata; char *p, *s; // Load input string p = readfile(fd); - s = find_section_start(p,1); - STAC(parse_,TYPE,_array)(s, data->sol, SIZE); + s = find_section_start(p, 1); + STAC(parse_, TYPE, _array) + (s, data->sol, SIZE); free(p); } -void data_to_output(int fd, void *vdata) { +void data_to_output(int fd, void *vdata) +{ struct bench_args_t *data = (struct bench_args_t *)vdata; write_section_header(fd); - STAC(write_,TYPE,_array)(fd, data->sol, SIZE); + STAC(write_, TYPE, _array) + (fd, data->sol, SIZE); } -int check_data( void *vdata, void *vref ) { +int check_data(void *vdata, void *vref) +{ struct bench_args_t *data = (struct bench_args_t *)vdata; struct bench_args_t *ref = (struct bench_args_t *)vref; int has_errors = 0; int i; TYPE diff; - for(i=0; isol[i] - ref->sol[i]; - has_errors |= (diff<-EPSILON) || (EPSILON "init.sh" <> "_sds/emulation/sd_card.manifest" +echo $PWD/../../input.data >> "_sds/emulation/sd_card.manifest" +echo $PWD/../../check.data >> "_sds/emulation/sd_card.manifest" +mv init.sh _sds +sdsoc_emulator -graphic-xsim -no-reboot |tee emulator.log +fi diff --git a/utility/emu_run_no_gui.sh b/utility/emu_run_no_gui.sh new file mode 100755 index 0000000..f87fc75 --- /dev/null +++ b/utility/emu_run_no_gui.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash +if [ -f "$PWD/_sds/init.sh" ] +then +rm -rf $PWD/_sds/emulation/sd_card +sdsoc_emulator -no-reboot |tee emulator.log +else +cat > "init.sh" <> "_sds/emulation/sd_card.manifest" +echo $PWD/../../input.data >> "_sds/emulation/sd_card.manifest" +echo $PWD/../../check.data >> "_sds/emulation/sd_card.manifest" +mv init.sh _sds +sdsoc_emulator -no-reboot |tee emulator.log +fi