From 94311b306a3964dd044412c9b9f7583175f0454b Mon Sep 17 00:00:00 2001 From: Liangfu Chen Date: Fri, 6 Sep 2019 00:52:10 +0800 Subject: [PATCH] [VTA] de10-nano driver (#3394) * rework; * `de10-nano` -> `de10nano`; * fix compilation error; * bug fix; * Update install.md * Update install.md * Update install.md * update with current runtime; * add debug messages; * bug fix in cma kernel module; --- config/vta_config.py | 4 +- python/vta/environment.py | 6 +- python/vta/testing/util.py | 2 +- src/de10nano/cma_api.cc | 198 ++++++++++++++++++++++++++++++++ src/de10nano/cma_api.h | 93 +++++++++++++++ src/de10nano/de10nano_driver.cc | 154 +++++++++++++++++++++++++ src/de10nano/de10nano_driver.h | 63 ++++++++++ 7 files changed, 515 insertions(+), 5 deletions(-) create mode 100644 src/de10nano/cma_api.cc create mode 100644 src/de10nano/cma_api.h create mode 100644 src/de10nano/de10nano_driver.cc create mode 100644 src/de10nano/de10nano_driver.h diff --git a/config/vta_config.py b/config/vta_config.py index b925bf5fe4df..8c1f0af880d5 100644 --- a/config/vta_config.py +++ b/config/vta_config.py @@ -133,7 +133,9 @@ def main(): cflags_str = " ".join(pkg.cflags) if pkg.TARGET == "pynq": cflags_str += " -DVTA_TARGET_PYNQ" - if pkg.TARGET == "ultra96": + elif cfg.TARGET == "de10nano": + cflags_str += " -DVTA_TARGET_DE10_NANO" + elif pkg.TARGET == "ultra96": cflags_str += " -DVTA_TARGET_ULTRA96" print(cflags_str) diff --git a/python/vta/environment.py b/python/vta/environment.py index ee2428be828b..3a3323ed8493 100644 --- a/python/vta/environment.py +++ b/python/vta/environment.py @@ -229,11 +229,11 @@ def target(self): @property def target_host(self): """The target host""" - if self.TARGET == "pynq": + if self.TARGET in ["pynq", "de10nano"]: return "llvm -target=armv7-none-linux-gnueabihf" - if self.TARGET == "ultra96": + elif self.TARGET == "ultra96": return "llvm -target=aarch64-linux-gnu" - if self.TARGET == "sim" or self.TARGET == "tsim": + elif self.TARGET in ["sim", "tsim"]: return "llvm" raise ValueError("Unknown target %s" % self.TARGET) diff --git a/python/vta/testing/util.py b/python/vta/testing/util.py index 67fc6b275b79..9a324fb7926b 100644 --- a/python/vta/testing/util.py +++ b/python/vta/testing/util.py @@ -52,7 +52,7 @@ def run(run_func): assert simulator.enabled() run_func(env, rpc.LocalSession()) - elif env.TARGET in ["pynq", "ultra96"]: + elif env.TARGET in ["pynq", "ultra96", "de10nano"]: # The environment variables below should be set if we are using # a tracker to obtain a remote for a test device tracker_host = os.environ.get("TVM_TRACKER_HOST", None) diff --git a/src/de10nano/cma_api.cc b/src/de10nano/cma_api.cc new file mode 100644 index 000000000000..d6d24cb9a02a --- /dev/null +++ b/src/de10nano/cma_api.cc @@ -0,0 +1,198 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/* + * The MIT License (MIT) + * + * COPYRIGHT (C) 2017 Institute of Electronics and Computer Science (EDI), Latvia. + * AUTHOR: Rihards Novickis (rihards.novickis@edi.lv) + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + */ + +/*! + * Copyright (c) 2018 by Contributors + * \file cma_api.cc + * \brief Application layer implementation for contigous memory allocation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "cma_api.h" + +#ifndef CMA_IOCTL_MAGIC +#define CMA_IOCTL_MAGIC 0xf2 +#endif + +#define CMA_ALLOC_CACHED _IOC(_IOC_WRITE|_IOC_READ, CMA_IOCTL_MAGIC, 1, 4) +#define CMA_ALLOC_NONCACHED _IOC(_IOC_WRITE|_IOC_READ, CMA_IOCTL_MAGIC, 2, 4) +#define CMA_FREE _IOC(_IOC_WRITE, CMA_IOCTL_MAGIC, 3, 4) +#define CMA_GET_PHY_ADDR _IOC(_IOC_WRITE|_IOC_READ, CMA_IOCTL_MAGIC, 4, 4) +#define CMA_GET_SIZE _IOC(_IOC_WRITE|_IOC_READ, CMA_IOCTL_MAGIC, 5, 4) + +#define CMA_IOCTL_MAXNR 5 + +#ifndef CMA_DEBUG + #define CMA_DEBUG 0 +#endif +#ifndef DRIVER_NODE_NAME + #define DRIVER_NODE_NAME "cma" +#endif + +#if CMA_DEBUG == 1 + #define __DEBUG(fmt, args...) printf("CMA_API_DEBUG: " fmt, ##args) +#else + #define __DEBUG(fmt, args...) +#endif + +#define ROUND_UP(N, S) ((((N) + (S) - 1) / (S)) * (S)) + + +/* Private functions */ +void *cma_alloc(size_t size, unsigned ioctl_cmd); + +/* Global file descriptor */ +int cma_fd = 0; + +int cma_init(void) { + __DEBUG("Opening \"/dev/" DRIVER_NODE_NAME "\" file\n"); + + cma_fd = open("/dev/" DRIVER_NODE_NAME, O_RDWR); + if (cma_fd == -1) { + __DEBUG("Failed to initialize api - \"%s\"\n", strerror(errno)); + return -1; + } + + return 0; +} + +int cma_release(void) { + __DEBUG("Closing \"/dev/" DRIVER_NODE_NAME "\" file\n"); + + if (close(cma_fd) == -1) { + __DEBUG("Failed to finilize api - \"%s\"\n", strerror(errno)); + return -1; + } + + return 0; +} + +void *cma_alloc_cached(size_t size) { + return cma_alloc(size, CMA_ALLOC_CACHED); +} + +void *cma_alloc_noncached(size_t size) { + return cma_alloc(size, CMA_ALLOC_NONCACHED); +} + +int cma_free(void *mem) { + __DEBUG("Releasing contigous memory from 0x%x\n", (unsigned)mem); + unsigned data, v_addr; + + /* save user space pointer value */ + data = (unsigned)mem; + v_addr = (unsigned)mem; + + if ( ioctl(cma_fd, CMA_GET_SIZE, &data) == -1 ) { + __DEBUG("cma_free - ioctl command unsuccsessful - 0\n"); + return -1; + } + /* data now contains size */ + + /* unmap memory */ + munmap(mem, data); + + /* free cma entry */ + if ( ioctl(cma_fd, CMA_FREE, &v_addr) == -1 ) { + __DEBUG("cma_free - ioctl command unsuccsessful - 1\n"); + return -1; + } + + return 0; +} + +unsigned cma_get_phy_addr(void *mem) { + unsigned data; + __DEBUG("Getting physical address from 0x%x\n", (unsigned)mem); + + /* save user space pointer value */ + data = (unsigned)mem; + + /* get physical address */ + if ( ioctl(cma_fd, CMA_GET_PHY_ADDR, &data) == -1 ) { + __DEBUG("cma_free - ioctl command unsuccsessful\n"); + return 0; + } + /* data now contains physical address */ + + return data; +} + + +void *cma_alloc(size_t size, unsigned ioctl_cmd) { + unsigned data; + void *mem; + __DEBUG("Allocating 0x%x bytes of contigous memory\n", size); + + /* Page align size */ + size = ROUND_UP(size, getpagesize()); + + /* ioctl cmd to allocate contigous memory */ + data = (unsigned)size; + if ( ioctl(cma_fd, ioctl_cmd, &data) == -1 ) { + __DEBUG("cma_alloc - ioctl command unsuccsessful\n"); + return NULL; + } + + /* at this point phy_addr is written to data */ + + /* mmap memory */ + mem = mmap(NULL, size, PROT_WRITE | PROT_READ, MAP_SHARED, cma_fd, data); + if (mem == MAP_FAILED) { + __DEBUG("cma_alloc - mmap unsuccsessful\n"); + return NULL; + } + + return mem; +} diff --git a/src/de10nano/cma_api.h b/src/de10nano/cma_api.h new file mode 100644 index 000000000000..f20939d52b63 --- /dev/null +++ b/src/de10nano/cma_api.h @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * \file cma_api.h + * \brief API for contigous memory allocation driver. + */ + +#ifndef VTA_DE10NANO_CMA_API_H_ +#define VTA_DE10NANO_CMA_API_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * \brief Initialize CMA api (basically perform open() syscall). + * + * \return Returns 0 on SUCCESS. On FAILURE returns -1 and errno is set + * accordingly. + */ +int cma_init(void); + + +/** + * \brief Release CMA api (basically perform close() syscall). + * + * \return Returns 0 on SUCCESS. On FAILURE returns -1 and errno is set + * accordingly. + */ +int cma_release(void); + + +/** + * \brief Allocate cached, physically contigous memory. + * + * \param size Size in bytes. + * + * \return Returns NULL on FAILURE. Otherwise pointer to valid userspace + * memory. + */ +void *cma_alloc_cached(size_t size); + + +/** + * \brief Allocate noncached, physically contigous memory. + * + * \param size Size in bytes. + * + * \return Returns NULL on FAILURE. Otherwise pointer to valid userspace + * memory. + */ +void *cma_alloc_noncached(size_t size); + + +/** + * \brief Release physically contigous memory. + * + * \param mem Pointer to previously allocated contiguous memory. + * + * \return Returns 0 on SUCCESS, -1 on FAILURE. + */ +int cma_free(void *mem); + + +/** + * \brief Get physical memory of cma memory block (should be used for DMA). + * + * \param mem Pointer to previously allocated contiguous memory. + * + * \return Returns address on SUCCESS, 0 on FAILURE. + */ +unsigned cma_get_phy_addr(void *mem); + + +#ifdef __cplusplus +} +#endif +#endif // VTA_DE10NANO_CMA_API_H_ diff --git a/src/de10nano/de10nano_driver.cc b/src/de10nano/de10nano_driver.cc new file mode 100644 index 000000000000..97607f536051 --- /dev/null +++ b/src/de10nano/de10nano_driver.cc @@ -0,0 +1,154 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * \file de10-nano_driver.cc + * \brief VTA driver for DE10_Nano board. + */ + +#include "de10nano_driver.h" + +#include +#include +#include +#include +#include "cma_api.h" + +void* VTAMemAlloc(size_t size, int cached) { + static int _ = cma_init(); (void)_; + if (cached) { + return cma_alloc_cached(size); + } else { + return cma_alloc_noncached(size); + } +} + +void VTAMemFree(void* buf) { + cma_free(buf); +} + +vta_phy_addr_t VTAMemGetPhyAddr(void* buf) { + return cma_get_phy_addr(buf) + 0x80000000; +} + +void VTAMemCopyFromHost(void* dst, const void* src, size_t size) { + // For SoC-based FPGAs that used shared memory with the CPU, use memcopy() + memcpy(dst, src, size); +} + +void VTAMemCopyToHost(void* dst, const void* src, size_t size) { + // For SoC-based FPGAs that used shared memory with the CPU, use memcopy() + memcpy(dst, src, size); +} + +void VTAFlushCache(void * offset, vta_phy_addr_t buf, int size) { + CHECK(false) << "VTAFlushCache not implemented for de10nano"; + printf("VTAFlushCache not implemented for de10nano"); +} + +void VTAInvalidateCache(void * offset, vta_phy_addr_t buf, int size) { + CHECK(false) << "VTAInvalidateCache not implemented for de10nano"; + printf("VTAInvalidateCache not implemented for de10nano"); +} + +void *VTAMapRegister(uint32_t addr) { + // Align the base address with the pages + uint32_t virt_base = addr & ~(getpagesize() - 1); + // Calculate base address offset w.r.t the base address + uint32_t virt_offset = addr - virt_base; + // Open file and mmap + uint32_t mmap_file = open("/dev/mem", O_RDWR|O_SYNC); + return mmap(NULL, + (VTA_IP_REG_MAP_RANGE + virt_offset), + PROT_READ|PROT_WRITE, + MAP_SHARED, + mmap_file, + virt_base); +} + +void VTAUnmapRegister(void *vta) { + // Unmap memory + int status = munmap(vta, VTA_IP_REG_MAP_RANGE); + assert(status == 0); +} + +void VTAWriteMappedReg(void* base_addr, uint32_t offset, uint32_t val) { + *((volatile uint32_t *) (reinterpret_cast(base_addr) + offset)) = val; +} + +uint32_t VTAReadMappedReg(void* base_addr, uint32_t offset) { + return *((volatile uint32_t *) (reinterpret_cast(base_addr) + offset)); +} + +class VTADevice { + public: + VTADevice() { + // VTA stage handles + vta_host_handle_ = VTAMapRegister(VTA_HOST_ADDR); + } + + ~VTADevice() { + // Close VTA stage handle + VTAUnmapRegister(vta_host_handle_); + } + + int Run(vta_phy_addr_t insn_phy_addr, + uint32_t insn_count, + uint32_t wait_cycles) { + VTAWriteMappedReg(vta_host_handle_, 0x04, 0); + VTAWriteMappedReg(vta_host_handle_, 0x08, insn_count); + VTAWriteMappedReg(vta_host_handle_, 0x0c, insn_phy_addr); + + // VTA start + VTAWriteMappedReg(vta_host_handle_, 0x0, VTA_START); + + // Loop until the VTA is done + unsigned t, flag = 0; + for (t = 0; t < wait_cycles; ++t) { + flag = VTAReadMappedReg(vta_host_handle_, 0x00); + flag &= 0x2; + if (flag == 0x2) break; + std::this_thread::yield(); + } + // Report error if timeout + return t < wait_cycles ? 0 : 1; + } + + private: + // VTA handles (register maps) + void* vta_host_handle_{nullptr}; +}; + +VTADeviceHandle VTADeviceAlloc() { + return new VTADevice(); +} + +void VTADeviceFree(VTADeviceHandle handle) { + delete static_cast(handle); +} + +int VTADeviceRun(VTADeviceHandle handle, + vta_phy_addr_t insn_phy_addr, + uint32_t insn_count, + uint32_t wait_cycles) { + return static_cast(handle)->Run( + insn_phy_addr, insn_count, wait_cycles); +} + +void VTAProgram(const char* bitstream) { + CHECK(false) << "VTAProgram not implemented for de10nano"; +} diff --git a/src/de10nano/de10nano_driver.h b/src/de10nano/de10nano_driver.h new file mode 100644 index 000000000000..0009e7574b02 --- /dev/null +++ b/src/de10nano/de10nano_driver.h @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * \file de10-nano_driver.h + * \brief VTA driver for DE10_Nano board. + */ + +#ifndef VTA_DE10NANO_DE10NANO_DRIVER_H_ +#define VTA_DE10NANO_DE10NANO_DRIVER_H_ + +#ifdef __cplusplus +extern "C" { +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +void *VTAMapRegister(uint32_t addr); +void VTAUnmapRegister(void *vta); +void VTAWriteMappedReg(void* base_addr, uint32_t offset, uint32_t val); +uint32_t VTAReadMappedReg(void* base_addr, uint32_t offset); +void VTAProgram(const char* bitstream); + +/*! \brief VTA configuration register address range */ +#define VTA_RANGE 0x400 +/*! \brief VTA configuration register start value */ +#define VTA_START 0x1 +/*! \brief VTA configuration register auto-restart value */ +#define VTA_AUTORESTART 0x81 +/*! \brief VTA configuration register done value */ +#define VTA_DONE 0x2 + +/*! \brief VTA fetch stage configuration register address +*/ +#define VTA_HOST_ADDR 0xFF220000 + +#ifdef __cplusplus +} +#endif +#endif // VTA_DE10NANO_DE10NANO_DRIVER_H_