diff --git a/3rdparty/cma/Makefile b/3rdparty/cma/Makefile
new file mode 100644
index 000000000000..0f19b0cb6431
--- /dev/null
+++ b/3rdparty/cma/Makefile
@@ -0,0 +1,39 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+PATH_SETTINGS?=$(PWD)/settings.mk
+
+# Driver configuration file
+include $(PATH_SETTINGS)
+
+ifeq ($(KERNELRELEASE),)
+# kbuild
+
+default:
+	make ARCH=$(ARCH) CROSS_COMPILE=$(CROSS_COMPILE) -C $(KSOURCE_DIR) M=`pwd` modules
+
+clean:
+	make -C $(KSOURCE_DIR) M=`pwd` clean
+
+else
+# run from Kernel Makefile
+
+obj-m 	  := cma.o
+ccflags-y := -DDRIVER_NODE_NAME="\"$(DRIVER_NODE_NAME)\"" \
+			 -DCMA_DEBUG=$(CMA_DEBUG) \
+			 -DCMA_IOC_MAGIC=$(CMA_IOC_MAGIC)
+endif
diff --git a/3rdparty/cma/cma.c b/3rdparty/cma/cma.c
new file mode 100644
index 000000000000..72d291344546
--- /dev/null
+++ b/3rdparty/cma/cma.c
@@ -0,0 +1,555 @@
+/* cma.c - ARM specific Linux driver for allocating physically contigious memory.
+ *
+ * The MIT License (MIT)
+ *
+ * COPYRIGHT (C) 2017 Institute of Electronics and Computer Science (EDI), Latvia.
+ * AUTHOR: Rihards Novickis (rihards.novickis@edi.lv)
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *
+ * DESCRIPTION:
+ * In some DMA use cases there is a need or it is more efficient to use large
+ * physically contigous memory regions. When Linux kernel is compiled with CMA
+ * (Contigous Memory Allocator) feature, this module allows to allocate this
+ * contigous memory and pass it to the user space. Memory can be either cached
+ * or uncached.
+ *
+ * For api description, see "cma_api.h" header file.
+ *
+ */
+
+/* Linux driver includes */
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/mutex.h>
+
+/* CMA specific includes */
+#include <linux/dma-mapping.h>
+#include <linux/dma-contiguous.h>
+
+/* IOCTL description */
+#include "cma.h"
+
+
+/* Handle defines */
+#ifndef CONFIG_DMA_CMA
+  #error "CMA configuration not set in kernel!"
+#endif
+#ifndef CMA_DEBUG
+  #define CMA_DEBUG       0
+#endif
+#ifndef DRIVER_NODE_NAME
+  #define DRIVER_NODE_NAME   "cma"
+#endif
+
+
+/* Commonly used printk statements */
+#define __ERROR(fmt, args...)    printk(KERN_ERR "CMA_ERROR: " fmt, ##args)
+#define __INFO(fmt, args...)      printk(KERN_INFO "CMA_INFO: " fmt, ##args)
+
+#if CMA_DEBUG == 1
+  #define __DEBUG(fmt, args...)  printk(KERN_INFO "CMA_DEBUG: " fmt, ##args)
+#else
+  #define __DEBUG(fmt, args...)
+#endif
+
+
+/* cma entry flags */
+#define CMA_ENTRY_CACHED     0
+#define CMA_ENTRY_MAPPED    (1<<0)
+#define CMA_ENTRY_NONCACHED   (1<<1)
+
+
+/* fops declarations */
+static int cma_ioctl(struct file *filp, unsigned int cmd, unsigned int arg);
+static int cma_mmap(struct file *filp, struct vm_area_struct *vm_area_dscr);
+
+/* ioctl interface commands */
+static int cma_ioctl_alloc(struct file *filp, unsigned int cmd, unsigned int arg, int cached_flag);
+static int cma_ioctl_free(struct file *filp, unsigned int cmd, unsigned int arg);
+static int cma_ioctl_get_phy_addr(struct file *filp, unsigned int cmd, unsigned int arg);
+static int cma_ioctl_get_size(struct file *filp, unsigned int cmd, unsigned int arg);
+
+/* CMA entry specific functions */
+struct cma_entry *cma_entry_get_by_phy_addr(unsigned int phy_addr);
+struct cma_entry *cma_entry_get_by_v_usr_addr(unsigned v_usr_addr);
+static int cma_entry_add(struct cma_entry *entry);
+static int cma_entry_release(unsigned v_usr_addr);
+
+/* mmap ops declarations */
+void cma_mmap_close(struct vm_area_struct *vma);
+
+
+/* File operations */
+struct file_operations fops = {
+  .owner         = THIS_MODULE,
+  .unlocked_ioctl   = cma_ioctl,
+  .mmap         = cma_mmap
+};
+
+/* mmap operation structure */
+static struct vm_operations_struct cma_ops = {
+  .close     = cma_mmap_close
+};
+
+
+/* List structure for containing memory allocation information */
+struct cma_entry{
+  struct cma_entry *next;
+  pid_t     pid;    /* calling process id */
+  unsigned   size;    /* size of allocation */
+  dma_addr_t  phy_addr;  /* physical address */
+  void      *v_ptr;    /* kernel-space pointer */
+  unsigned   v_usr_addr; /* user-space addr */
+  int     flags;    /* memory allocation related flags */
+};
+
+
+/* Global variables */
+int major;
+static struct class   *class;
+static struct device   *device;
+struct cma_entry     *cma_start = NULL;
+static struct mutex   mutex_cma_list_modify;
+
+
+struct cma_entry *cma_entry_get_by_phy_addr(unsigned int phy_addr) {
+  struct cma_entry *walk = cma_start;
+
+  __DEBUG("cma_entry_get_by_phy_addr()\n");
+
+  if (mutex_lock_interruptible(&mutex_cma_list_modify))
+    return NULL;
+
+  /* search for physical address */
+  while (walk != NULL) {
+    if (walk->phy_addr == phy_addr) {
+      goto leave;
+    }
+    walk = walk->next;
+  }
+
+leave:
+  mutex_unlock(&mutex_cma_list_modify);
+  return walk;
+}
+
+
+struct cma_entry *cma_entry_get_by_v_usr_addr(unsigned v_usr_addr) {
+  struct cma_entry *walk = cma_start;
+
+  __DEBUG("cma_entry_get_by_v_usr_addr()\n");
+
+  if (mutex_lock_interruptible(&mutex_cma_list_modify)) {
+    __DEBUG("cma_entry_get_by_v_usr_addr: failed to call mutex_lock_interruptible().\n");
+    return NULL;
+  }
+
+  /* search for user virtual address */
+  while (walk != NULL) {
+    if (walk->v_usr_addr == v_usr_addr) {
+      __DEBUG("found an entry with v_usr_addr (0x%x).\n", v_usr_addr);
+      goto leave;
+    }
+    __DEBUG("> walk->v_usr_addr=(0x%x), expected v_usr_addr=(0x%x).\n",
+            walk->v_usr_addr, v_usr_addr);
+    walk = walk->next;
+  }
+  __DEBUG("failed to find an entry with v_usr_addr (0x%x).\n", v_usr_addr);
+
+leave:
+  mutex_unlock(&mutex_cma_list_modify);
+  return walk;
+}
+
+
+static int cma_entry_add(struct cma_entry *entry) {
+  struct cma_entry *walk;
+  __DEBUG("cma_entry_add() - phy_addr 0x%x; pid 0x%x\n", entry->phy_addr, entry->pid);
+
+  if (mutex_lock_interruptible(&mutex_cma_list_modify))
+    return -EAGAIN;
+
+  /* add entry in start - this is more effective */
+  entry->next = cma_start;
+  cma_start = entry;
+
+  /* print entry list for debugging */
+  walk = cma_start;
+  while (walk != NULL) {
+    __DEBUG("> walk->phy_addr=(0x%x).\n", walk->phy_addr);
+    walk = walk->next;
+  }
+
+  mutex_unlock(&mutex_cma_list_modify);
+
+  return 0;
+}
+
+
+static int cma_entry_release(unsigned v_usr_addr) {
+  int err;
+  struct cma_entry *walk_prev, *walk_curr;
+
+  /* print entry list for debugging */
+  struct cma_entry *walk;
+  __DEBUG("cma_entry_release() - v_usr_addr 0x%x; pid 0x%x\n", v_usr_addr, current->pid);
+
+  if (mutex_lock_interruptible(&mutex_cma_list_modify))
+    return -EAGAIN;
+
+  walk_prev = NULL;
+  walk_curr = cma_start;
+
+  while (walk_curr != NULL) {
+    if (walk_curr->v_usr_addr == v_usr_addr) {
+      /* check if mapped */
+      if (walk_curr->flags & CMA_ENTRY_MAPPED) {
+        __DEBUG("failed to find a valid entry with v_usr_addr(0x%x), entry mapped.\n", v_usr_addr);
+        err = -1;
+        goto leave;
+      }
+
+      /* check if not the first entry */
+      if (walk_prev != NULL)
+        walk_prev->next = walk_curr->next;
+      else
+        cma_start = walk_curr->next;
+      if ((walk_curr->next == NULL) && (cma_start == walk_curr))
+        cma_start = NULL;
+
+      __DEBUG("found an entry with v_usr_addr=0x%x, phy_addr=0x%x, next=0x%x\n",
+              v_usr_addr, walk_curr->phy_addr, (int)walk_curr->next);
+      dma_free_coherent(NULL, walk_curr->size, walk_curr->v_ptr, walk_curr->phy_addr);
+      kfree(walk_curr);
+      err = 0;
+      goto leave;
+    }
+    __DEBUG("skip entry with v_usr_addr (0x%x).\n", walk_curr->v_usr_addr);
+
+    /* prepare next walk */
+    walk_prev = walk_curr;
+    walk_curr = walk_curr->next;
+  }
+
+  __DEBUG("failed to find an entry with v_usr_addr (0x%x).\n", v_usr_addr);
+  err = -1;
+
+leave:
+
+  /* print entry list for debugging */
+  walk = cma_start;
+  while (walk != NULL) {
+    __DEBUG("> walk->v_usr_addr=(0x%x), walk->next=0x%x\n", walk->v_usr_addr, (int)walk->next);
+    walk = walk->next;
+  }
+
+  mutex_unlock(&mutex_cma_list_modify);
+  return err;
+}
+
+
+/* inline function for readability */
+inline int check_entry_accordance(struct cma_entry *entry, struct vm_area_struct *vma) {
+  if ( entry == NULL )
+    return -EFAULT;
+
+  if ( entry->phy_addr != vma->vm_pgoff << PAGE_SHIFT )
+    return -EFAULT;
+
+  if ( entry->size != vma->vm_end-vma->vm_start )
+    return -EFAULT;
+
+  if ( entry->pid != current->pid )
+    return -EACCES;
+
+  return 0;
+}
+
+static int cma_mmap(struct file *filp, struct vm_area_struct *vma) {
+  int err;
+  struct cma_entry *entry;
+
+  __DEBUG("cma_mmap() - phy_addr 0x%lx, v_user_addr 0x%lx\n",
+          vma->vm_pgoff << PAGE_SHIFT, vma->vm_start);
+
+  entry = cma_entry_get_by_phy_addr(vma->vm_pgoff << PAGE_SHIFT);
+
+  /* check if mmap is alligned with according entry */
+  err = check_entry_accordance(entry, vma);
+  if (err) return err;
+
+  /* set user address for later reference (used when freeing the memory ) */
+  entry->v_usr_addr = vma->vm_start;
+
+  /* should memory be uncached? */
+  if ( entry->flags & CMA_ENTRY_NONCACHED )
+    vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+  /* map memory to user space */
+  if (remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
+                      vma->vm_end-vma->vm_start, vma->vm_page_prot)) {
+    up_write(&current->mm->mmap_sem);
+      return -EAGAIN;
+  }
+
+  /* save mmap ops and set entry mapped flag */
+  vma->vm_ops = &cma_ops;
+  entry->flags = (entry->flags & (~CMA_ENTRY_MAPPED)) | CMA_ENTRY_MAPPED;
+
+  return 0;
+}
+
+
+void cma_mmap_close(struct vm_area_struct *vma) {
+  struct cma_entry *entry;
+
+  __DEBUG("cma_mmap_close()\n");
+
+  /* remove custom mapped flag */
+  entry = cma_entry_get_by_phy_addr(vma->vm_pgoff << PAGE_SHIFT);
+  if ( entry != NULL )
+    entry->flags &= (~CMA_ENTRY_MAPPED);
+}
+
+
+static int cma_ioctl(struct file *filp, unsigned int cmd, unsigned int arg) {
+  /* routine check */
+  __DEBUG("IOCTL command issued\n");
+
+  /* check validity of the cmd */
+  if (_IOC_TYPE(cmd) != CMA_IOCTL_MAGIC) {
+    __ERROR("IOCTL Incorrect magic number\n");
+    return -ENOTTY;
+  }
+  if (_IOC_NR(cmd) > CMA_IOCTL_MAXNR) {
+    __ERROR("IOCTL Command is not valid\n");
+    return -ENOTTY;
+  }
+
+  /* get size from userspace */
+  switch (cmd) {
+    case CMA_ALLOC_CACHED:
+      return cma_ioctl_alloc(filp, cmd, arg, CMA_ENTRY_CACHED);
+    case CMA_ALLOC_NONCACHED:
+      return cma_ioctl_alloc(filp, cmd, arg, CMA_ENTRY_NONCACHED);
+    case CMA_FREE:
+      return cma_ioctl_free(filp, cmd, arg);
+    case CMA_GET_PHY_ADDR:
+      return cma_ioctl_get_phy_addr(filp, cmd, arg);
+    case CMA_GET_SIZE:
+      return cma_ioctl_get_size(filp, cmd, arg);
+    default:
+      __DEBUG("This should never happen!\n");
+  }
+
+  return 0;
+}
+
+
+static int cma_ioctl_alloc(struct file *filp, unsigned int cmd, unsigned int arg, int cached_flag) {
+  int err;
+  struct cma_entry *entry;
+  __DEBUG("cma_ioctl_alloc() called!\n");
+
+  if (!access_ok(VERIFY_READ, (void __user*) arg, _IOC_SIZE(cmd))) {
+    __DEBUG("fail to get read access to %d bytes of memory.\n", entry->size);
+    return -EFAULT;
+  }
+  if (!access_ok(VERIFY_WRITE, (void __user*) arg, _IOC_SIZE(cmd))) {
+    __DEBUG("fail to get write access to %d bytes of memory.\n", entry->size);
+    return -EFAULT;
+  }
+
+  /* create new cma entry  */
+  entry = kmalloc(sizeof(struct cma_entry), GFP_KERNEL);
+
+  /* set entry params */
+  __get_user(entry->size, (typeof(&entry->size))arg);
+  entry->pid     = current->pid;
+  entry->flags   = cached_flag;
+
+  /* allocate contigous memory */
+  entry->v_ptr = dma_alloc_coherent(NULL, entry->size, &entry->phy_addr, GFP_KERNEL);
+  if ( entry->v_ptr == NULL ) {
+    err = -ENOMEM;
+    __DEBUG("==== FAILED TO ALLOCATE 0x%X BYTES OF COHERENT MEMORY ====\n", entry->size);
+    goto error_dma_alloc_coherent;
+  }
+
+  /* add entry */
+  err = cma_entry_add(entry);
+  if (err)  goto error_cma_entry_add;
+
+  /* put physical address to user space */
+  __put_user(entry->phy_addr, (typeof(&entry->phy_addr))arg);
+
+  __DEBUG("allocated 0x%x bytes of coherent memory at phy_addr=0x%x\n", entry->size, entry->phy_addr);
+  return entry->phy_addr;
+
+
+error_cma_entry_add:
+  dma_free_coherent(NULL, entry->size, entry->v_ptr, entry->phy_addr);
+
+error_dma_alloc_coherent:
+  kfree(entry);
+
+  return err;
+}
+
+
+static int cma_ioctl_free(struct file *filp, unsigned int cmd, unsigned int arg) {
+  dma_addr_t v_usr_addr;
+  __DEBUG("cma_ioctl_free() called!\n");
+
+  if (!access_ok(VERIFY_READ, (void __user*) arg, _IOC_SIZE(cmd)))
+    return -EFAULT;
+
+  __get_user(v_usr_addr, (typeof(&v_usr_addr))arg);
+
+  return cma_entry_release(v_usr_addr);
+}
+
+
+static struct cma_entry *cma_ioctl_get_entry_from_v_usr_addr(unsigned int cmd, unsigned int arg) {
+  unsigned v_usr_addr;
+
+  __DEBUG("cma_ioctl_get_entry_from_v_usr_addr() called!\n");
+
+  /* routine check */
+  if (!access_ok(VERIFY_READ, (void __user*) arg, _IOC_SIZE(cmd))) {
+    __DEBUG("failed to get read access to virtual user address: 0x%x\n", arg);
+    return NULL;
+  }
+  if (!access_ok(VERIFY_WRITE, (void __user*) arg, _IOC_SIZE(cmd))) {
+    __DEBUG("failed to get write access to virtual user address: 0x%x\n", arg);
+    return NULL;
+  }
+
+  /* get process user address */
+  __get_user(v_usr_addr, (typeof(&v_usr_addr))arg);
+
+  /* search for appropriate entry */
+  return cma_entry_get_by_v_usr_addr(v_usr_addr);
+}
+
+
+static int cma_ioctl_get_phy_addr(struct file *filp, unsigned int cmd, unsigned int arg) {
+  struct cma_entry *entry;
+
+  __DEBUG("cma_ioctl_get_phy_addr() called!\n");
+
+  /* get entry */
+  entry = cma_ioctl_get_entry_from_v_usr_addr(cmd, arg);
+  if (entry == NULL) {
+    __DEBUG("cma entry has not been found.\n");
+    return -EFAULT;
+  }
+
+  /* put physical address into user space */
+  __put_user(entry->phy_addr, (typeof(&entry->phy_addr))arg);
+
+  return 0;
+}
+
+
+static int cma_ioctl_get_size(struct file *filp, unsigned int cmd, unsigned int arg) {
+  struct cma_entry *entry;
+
+  __DEBUG("cma_ioctl_get_size() called!\n");
+
+  /* get entry */
+  entry = cma_ioctl_get_entry_from_v_usr_addr(cmd, arg);
+  if (entry == NULL) {
+    __DEBUG("cma_ioctl_get_size: failed to get_entry_from_v_usr_addr.\n");
+    return -EFAULT;
+  }
+
+  /* put size into user space */
+  __put_user(entry->size, (typeof(&entry->size))arg);
+
+  return 0;
+}
+
+
+static int cma_init(void) {
+  int err;
+  __INFO("Initializeing Contigous Memory Allocator module\n");
+
+  /* obtain major number */
+  major = register_chrdev(0, DRIVER_NODE_NAME, &fops);
+  if ( major < 0 ) {
+    __ERROR("Failed to allocate major number\n");
+    return -major;
+  }
+
+  /* create class */
+  class = class_create(THIS_MODULE, DRIVER_NODE_NAME);
+  if ( IS_ERR(class) ) {
+    __ERROR("Failed to create class\n");
+    err = PTR_ERR(class);
+    goto error_class_create;
+  }
+
+  /* create device node */
+  device = device_create(class, NULL, MKDEV(major, 0), NULL, DRIVER_NODE_NAME);
+  if ( IS_ERR(device) ) {
+    __ERROR("Failed to create device\n");
+    err = PTR_ERR(device);
+    goto error_device_create;
+  }
+
+  mutex_init(&mutex_cma_list_modify);
+
+  return 0;
+
+
+error_device_create:
+  class_destroy(class);
+
+error_class_create:
+  unregister_chrdev(major, DRIVER_NODE_NAME);
+
+  return err;
+}
+
+
+static void cma_exit(void) {
+  __INFO("Releasing Contigous Memory Allocator module\n");
+
+  /* TODO: walk_list_remove_pid */
+
+  device_destroy(class, MKDEV(major, 0));
+
+  class_destroy(class);
+
+  unregister_chrdev(major, DRIVER_NODE_NAME);
+}
+
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Driver for allocating cached and noncached physically contigous memory. "
+                   "Exploits kernel CMA feature.");
+module_init(cma_init);
+module_exit(cma_exit);
diff --git a/3rdparty/cma/cma.h b/3rdparty/cma/cma.h
new file mode 100644
index 000000000000..f005b3065c3a
--- /dev/null
+++ b/3rdparty/cma/cma.h
@@ -0,0 +1,46 @@
+/* cma.h
+ *
+ * The MIT License (MIT)
+ *
+ * COPYRIGHT (C) 2017 Institute of Electronics and Computer Science (EDI), Latvia.
+ * AUTHOR: Rihards Novickis (rihards.novickis@edi.lv)
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef VTA_DE10_NANO_KERNEL_MODULE_CMA_H_
+#define VTA_DE10_NANO_KERNEL_MODULE_CMA_H_
+
+
+/* Should be defined in settings.mk file */
+#ifndef CMA_IOCTL_MAGIC
+#define CMA_IOCTL_MAGIC  0xf2
+#endif
+
+
+#define CMA_ALLOC_CACHED      _IOC(_IOC_WRITE|_IOC_READ, CMA_IOCTL_MAGIC, 1, 4)
+#define CMA_ALLOC_NONCACHED   _IOC(_IOC_WRITE|_IOC_READ, CMA_IOCTL_MAGIC, 2, 4)
+#define CMA_FREE              _IOC(_IOC_WRITE,           CMA_IOCTL_MAGIC, 3, 4)
+#define CMA_GET_PHY_ADDR      _IOC(_IOC_WRITE|_IOC_READ, CMA_IOCTL_MAGIC, 4, 4)
+#define CMA_GET_SIZE          _IOC(_IOC_WRITE|_IOC_READ, CMA_IOCTL_MAGIC, 5, 4)
+
+#define CMA_IOCTL_MAXNR                 5
+
+
+#endif  // VTA_DE10_NANO_KERNEL_MODULE_CMA_H_
diff --git a/3rdparty/cma/settings.mk b/3rdparty/cma/settings.mk
new file mode 100644
index 000000000000..7403845023d5
--- /dev/null
+++ b/3rdparty/cma/settings.mk
@@ -0,0 +1,19 @@
+# ==================== COMPILATION RELATED SETTINGS ====================
+# Path to the kernel sources (from "./driver", if relative path is used)
+KSOURCE_DIR=/opt/intel/linux-socfpga-rel_socfpga-4.9.78-ltsi_18.08.02_pr
+
+# Cross compiler "prepend" string
+CROSS_COMPILE=arm-linux-gnueabihf-
+
+# Architecture
+ARCH=arm
+
+# Compile with debug information
+CMA_DEBUG?=0
+
+# ==================== DRIVER RELATED SETTINGS ====================
+# Node name used in "/dev" folder
+DRIVER_NODE_NAME="cma"
+
+# Unique (across system) ioctl magic number. Every ioctl interface should have one.
+CMA_IOC_MAGIC=0xf2
diff --git a/cmake/modules/VTA.cmake b/cmake/modules/VTA.cmake
index 0de9e72f3cff..b8be98a53a74 100644
--- a/cmake/modules/VTA.cmake
+++ b/cmake/modules/VTA.cmake
@@ -82,7 +82,11 @@ elseif(PYTHON)
     # Rules for Zynq-class FPGAs with pynq OS support (see pynq.io)
     if(${VTA_TARGET} STREQUAL "pynq" OR
        ${VTA_TARGET} STREQUAL "ultra96")
-      list(APPEND FPGA_RUNTIME_SRCS vta/src/pynq/pynq_driver.cc)
+      file(GLOB FPGA_RUNTIME_SRCS vta/src/pynq/pynq_driver.cc)
+      # Rules for Pynq v2.4
+      find_library(__cma_lib NAMES cma PATH /usr/lib)
+    elseif(${VTA_TARGET} STREQUAL "de10nano")  # DE10-Nano rules
+      file(GLOB FPGA_RUNTIME_SRCS vta/src/de10nano/*.cc vta/src/*.cc)
     endif()
     # Target lib: vta
     add_library(vta SHARED ${FPGA_RUNTIME_SRCS})
@@ -91,11 +95,17 @@ elseif(PYTHON)
       string(SUBSTRING ${__def} 3 -1 __strip_def)
       target_compile_definitions(vta PUBLIC ${__strip_def})
     endforeach()
-    # Rules for Pynq v2.4
-    find_library(__cma_lib NAMES cma PATH /usr/lib)
-    target_link_libraries(vta ${__cma_lib})
+    if(${VTA_TARGET} STREQUAL "pynq" OR
+       ${VTA_TARGET} STREQUAL "ultra96")
+      target_link_libraries(vta ${__cma_lib})
+    elseif(${VTA_TARGET} STREQUAL "de10nano")  # DE10-Nano rules
+      target_compile_definitions(vta PUBLIC VTA_MAX_XFER=2097152) # (1<<21)
+      target_include_directories(vta PUBLIC
+        "/usr/local/intelFPGA_lite/18.1/embedded/ds-5/sw/gcc/arm-linux-gnueabihf/include")
+    endif()
   endif()
 
+
 else()
   message(STATUS "Cannot found python in env, VTA build is skipped..")
 endif()
diff --git a/docs/vta/install.md b/docs/vta/install.md
index 4493edb527fd..44248c16279d 100644
--- a/docs/vta/install.md
+++ b/docs/vta/install.md
@@ -20,8 +20,9 @@ VTA Installation Guide
 
 We present three installation guides, each extending on the previous one:
 1. [Simulator installation](#vta-simulator-installation)
-2. [Hardware test setup](#vta-pynq-based-test-setup)
-3. [FPGA toolchain installation](#vta-fpga-toolchain-installation)
+2. [PYNQ-based test setup](#vta-pynq-based-test-setup)
+3. [Custom test setup for Intel FPGA](#vta-custom-test-setup-for-intel-fpga)
+4. [FPGA toolchain installation](#vta-fpga-toolchain-installation)
 
 ## VTA Simulator Installation
 
@@ -182,9 +183,80 @@ The performance metrics measured on the Pynq board will be reported for each con
 
 You can also try out our [VTA programming tutorials](https://docs.tvm.ai/vta/tutorials/index.html).
 
+## VTA Custom Test Setup for Intel FPGA
+
+Similar to the PYNQ side setup steps, this third guide bring us the details on how can we setup up the Linux environment for Intel FPGA boards like DE10-Nano. 
+
+In terms of hardware components, you would need the [DE10-Nano Development Kit](https://www.terasic.com.tw/cgi-bin/page/archive.pl?Language=English&No=1046), which can be acquired for $130, or $100 for academics from [Terasic](https://www.terasic.com.tw/). A microSD card would be delivered the kit. Power cables and USB cables would be included as well. However, an additional Ethernet cable would be needed to connect the board to LAN.
+
+The rest part of this guide would provide the steps to
+
+* Flash the microSD card with latest Angstrom Linux image
+* Cross compilation setup
+* Device-side RPC server setup and deployment
+
+### DE10-Nano Board Setup
+
+Before powering up the device, we need to flash the microSD card image with latest Angstrom Linux image.
+
+#### Flash SD Card and Boot Angstrom Linux
+
+To flash SD card and boot Linux on DE10-Nano, it is recommended to navigate to the [Resource](https://www.terasic.com.tw/cgi-bin/page/archive.pl?Language=English&CategoryNo=167&No=1046&PartNo=4) tab of the DE10-Nano product page from Terasic Inc.
+After registeration and login on the webpage, the prebuild Angstrom Linux image would be available for downloading and flashing.
+Specifically, to flash the downloaded Linux SD card image into your physical SD card:
+
+First, extract the gzipped archive file.
+
+``` bash
+tar xf de10-nano-image-Angstrom-v2016.12.socfpga-sdimg.2017.03.31.tgz
+```
+
+This would produce a single SD card image named `de10-nano-image-Angstrom-v2016.12.socfpga-sdimg` (approx. 2.4 GB), it contains all the file systems to boot Angstrom Linux.
+
+Second, plugin a SD card that is ready to flash in your PC, and identify the device id for the disk with `fdisk -l`, or `gparted` if you feel better to use GUI. The typical device id for your disk would likely to be `/dev/sdb`. 
+
+Then, flash the disk image into your physical SD card with the following command:
+
+``` bash
+# NOTE: root privilege is typically required to run the following command.
+dd if=de10-nano-image-Angstrom-v2016.12.socfpga-sdimg of=/dev/sdb status=progress
+```
+This would take a few minutes for your PC to write the whole file systems into the SD card.
+After this process completes, you are ready to unmount the SD card and insert it into your DE10-Nano board.
+Now you can connect the power cable and serial port to boot the Angstrom Linux.
+
+> Note: When boot up from the microSD card, you might notice the incompatibility of the linux kernel `zImage` in the microSD card. 
+> In this case, you might need to build the `zImage` file of your own from [socfpga-4.9.78-ltsi](https://github.com/altera-opensource/linux-socfpga/tree/socfpga-4.9.78-ltsi) branch of the [linux-socfpga](https://github.com/altera-opensource/linux-socfpga) repository. 
+> For a quick fix, you can also download a prebuilt version of the `zImage` file [here](https://raw.githubusercontent.com/liangfu/de10-nano-supplement/master/zImage).
+
+After connecting he usb cables to the DE10-Nano board, power on the board by connecting the power cable. You may then connect to the serial port of the device by using `minicom` on your host PC:
+
+``` bash
+# NOTE: root privilege is typically required to run the following command.
+minicom -D /dev/ttyUSB0
+```
+
+The default user name for the device would be `root`, and the password is empty for the default user.
+
+You may now start to install supporting Python3 packages (TVM has dropped the support for Python2), specifically, they are `numpy`, `attrs` and `decorator`.
+
+> Note: You might fail to install `numpy` by using `pip3` on the DE10-Nano device. 
+> In that case, you have the option to either build your own filesystem image for the board from [meta-de10-nano](https://github.com/intel/meta-de10-nano) repository; 
+> an alternative option is to download prebuilt packages from existing Linux distributions, e.g. Debian.
+> For a quick fix, we have concatenated the supplementary binary files [here](https://raw.githubusercontent.com/liangfu/de10-nano-supplement/master/rootfs_supplement.tgz), and you can extract the files into the root filesystem.
+
+#### Install Required Python Packages
+
+After accessing bash terminal from the serial port, we need to install required Python packages before building and installing TVM and VTA programs.
+
+#### Build Additional Components to Use VTA Bitstream
+
+To use the above built bitstream on DE10-Nano hardware, several additional components need to be compiled for the system. 
+Specifically, to compile application executables for the system, you need to download and install [SoCEDS](http://fpgasoftware.intel.com/soceds/18.1/?edition=standard&download_manager=dlm3&platform=linux) (recommended), or alternatively install the `g++-arm-linux-gnueabihf` package on your host machine. You would also need a `cma` kernel module to allocate contigous memory, and a driver for communicating with the VTA subsystem. 
+
 ## VTA FPGA Toolchain Installation
 
-This third and last guide allows users to generate custom VTA bitstreams using free-to-use Xilinx or Intel compilation toolchains.
+This last guide allows users to generate custom VTA bitstreams using free-to-use Xilinx or Intel compilation toolchains.
 
 ### Xilinx Toolchain Installation
 
@@ -323,50 +395,6 @@ This process might be a bit lengthy, and might take up to half an hour to comple
 
 Once the compilation completes, the generated bistream can be found under `<tvmroot>/vta/build/hardware/intel/quartus/<configuration>/export/vta.rbf`. You can also open the Quartus project file (.qpf) available at `<tvmroot>/vta/build/hardware/intel/quartus/<configuration>/de10_nano_top.qpf` to look around the generated reports.
 
-#### Flash SD Card and Boot Angstrom Linux
-
-To flash SD card and boot Linux on DE10-Nano, it is recommended to navigate to the [Resource](https://www.terasic.com.tw/cgi-bin/page/archive.pl?Language=English&CategoryNo=167&No=1046&PartNo=4) tab of the DE10-Nano product page from Terasic Inc.
-After registeration and login on the webpage, the prebuild Angstrom Linux image would be available for downloading and flashing.
-Specifically, to flash the downloaded Linux SD card image into your physical SD card:
-
-First, extract the gzipped archive file.
-
-``` bash
-tar xf de10-nano-image-Angstrom-v2016.12.socfpga-sdimg.2017.03.31.tgz
-```
-
-This would produce a single SD card image named `de10-nano-image-Angstrom-v2016.12.socfpga-sdimg` (approx. 2.4 GB), it contains all the file systems to boot Angstrom Linux.
-
-Second, plugin a SD card that is ready to flash in your PC, and identify the device id for the disk with `fdisk -l`, or `gparted` if you feel better to use GUI. The typical device id for your disk would likely to be `/dev/sdb`. 
-
-Then, flash the disk image into your physical SD card with the following command:
-
-``` bash
-# NOTE: root privilege is typically required to run the following command.
-dd if=de10-nano-image-Angstrom-v2016.12.socfpga-sdimg of=/dev/sdb status=progress
-```
-This would take a few minutes for your PC to write the whole file systems into the SD card.
-After this process completes, you are ready to unmount the SD card and insert it into your DE10-Nano board.
-Now you can connect the power cable and serial port to boot the Angstrom Linux.
-
-#### Build Additional Components to Use VTA Bitstream
-
-To use the above built bitstream on DE10-Nano hardware, several additional components need to be compiled for the system. 
-Specifically, to compile application executables for the system, you need to download and install [SoCEDS](http://fpgasoftware.intel.com/soceds/18.1/?edition=standard&download_manager=dlm3&platform=linux), or alternatively install the `g++-arm-linux-gnueabihf` package on your host machine. You would also need a `cma` kernel module to allocate contigous memory, and a driver for communicating with the VTA subsystem. 
-
-For easier program debugging (e.g. `metal_test` program at `vta/tests/hardware/metal_test`), it is also recommended to install `gdbserver` on you device. For instance, you can start your program on the device by runninng:
-
-``` bash
-gdbserver localhost:4444 ./metal_test
-```
-, and then you can set break points and print values of desired varilables on the host:
-``` bash
-gdb-multiarch --fullname metal_test
-(gdb) target remote <device-ip>:4444
-```
-
-In addition, to enable fully featured VTA for DE10-Nano, you would also need `python3-numpy`, `python3-decorate`, `python3-attrs` to be cross-compiled.
-
 ### Use the Custom Bitstream
 
 We can program the new VTA FPGA bitstream by setting the bitstream path of the `vta.program_fpga()` function in the tutorial examples, or in the `test_program_rpc.py` script.
diff --git a/vta/config/vta_config.py b/vta/config/vta_config.py
index b925bf5fe4df..8c1f0af880d5 100644
--- a/vta/config/vta_config.py
+++ b/vta/config/vta_config.py
@@ -133,7 +133,9 @@ def main():
         cflags_str = " ".join(pkg.cflags)
         if pkg.TARGET == "pynq":
             cflags_str += " -DVTA_TARGET_PYNQ"
-        if pkg.TARGET == "ultra96":
+        elif cfg.TARGET == "de10nano":
+            cflags_str += " -DVTA_TARGET_DE10_NANO"
+        elif pkg.TARGET == "ultra96":
             cflags_str += " -DVTA_TARGET_ULTRA96"
         print(cflags_str)
 
diff --git a/vta/python/vta/environment.py b/vta/python/vta/environment.py
index ee2428be828b..3a3323ed8493 100644
--- a/vta/python/vta/environment.py
+++ b/vta/python/vta/environment.py
@@ -229,11 +229,11 @@ def target(self):
     @property
     def target_host(self):
         """The target host"""
-        if self.TARGET == "pynq":
+        if self.TARGET in ["pynq", "de10nano"]:
             return "llvm -target=armv7-none-linux-gnueabihf"
-        if self.TARGET == "ultra96":
+        elif self.TARGET == "ultra96":
             return "llvm -target=aarch64-linux-gnu"
-        if self.TARGET == "sim" or self.TARGET == "tsim":
+        elif self.TARGET in ["sim", "tsim"]:
             return "llvm"
         raise ValueError("Unknown target %s" % self.TARGET)
 
diff --git a/vta/python/vta/testing/util.py b/vta/python/vta/testing/util.py
index 67fc6b275b79..9a324fb7926b 100644
--- a/vta/python/vta/testing/util.py
+++ b/vta/python/vta/testing/util.py
@@ -52,7 +52,7 @@ def run(run_func):
                 assert simulator.enabled()
             run_func(env, rpc.LocalSession())
 
-    elif env.TARGET in ["pynq", "ultra96"]:
+    elif env.TARGET in ["pynq", "ultra96", "de10nano"]:
         # The environment variables below should be set if we are using
         # a tracker to obtain a remote for a test device
         tracker_host = os.environ.get("TVM_TRACKER_HOST", None)
diff --git a/vta/src/de10nano/cma_api.cc b/vta/src/de10nano/cma_api.cc
new file mode 100644
index 000000000000..d6d24cb9a02a
--- /dev/null
+++ b/vta/src/de10nano/cma_api.cc
@@ -0,0 +1,198 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*
+ * The MIT License (MIT)
+ *
+ * COPYRIGHT (C) 2017 Institute of Electronics and Computer Science (EDI), Latvia.
+ * AUTHOR: Rihards Novickis (rihards.novickis@edi.lv)
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ */
+
+/*!
+ *  Copyright (c) 2018 by Contributors
+ * \file cma_api.cc
+ * \brief Application layer implementation for contigous memory allocation.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+
+#include "cma_api.h"
+
+#ifndef CMA_IOCTL_MAGIC
+#define CMA_IOCTL_MAGIC       0xf2
+#endif
+
+#define CMA_ALLOC_CACHED      _IOC(_IOC_WRITE|_IOC_READ,  CMA_IOCTL_MAGIC, 1, 4)
+#define CMA_ALLOC_NONCACHED   _IOC(_IOC_WRITE|_IOC_READ,  CMA_IOCTL_MAGIC, 2, 4)
+#define CMA_FREE              _IOC(_IOC_WRITE,            CMA_IOCTL_MAGIC, 3, 4)
+#define CMA_GET_PHY_ADDR      _IOC(_IOC_WRITE|_IOC_READ,  CMA_IOCTL_MAGIC, 4, 4)
+#define CMA_GET_SIZE          _IOC(_IOC_WRITE|_IOC_READ,  CMA_IOCTL_MAGIC, 5, 4)
+
+#define CMA_IOCTL_MAXNR       5
+
+#ifndef CMA_DEBUG
+  #define CMA_DEBUG           0
+#endif
+#ifndef DRIVER_NODE_NAME
+  #define DRIVER_NODE_NAME    "cma"
+#endif
+
+#if CMA_DEBUG == 1
+  #define __DEBUG(fmt, args...)  printf("CMA_API_DEBUG: " fmt, ##args)
+#else
+  #define __DEBUG(fmt, args...)
+#endif
+
+#define ROUND_UP(N, S)     ((((N) + (S) - 1) / (S)) * (S))
+
+
+/* Private functions */
+void *cma_alloc(size_t size, unsigned ioctl_cmd);
+
+/* Global file descriptor */
+int cma_fd = 0;
+
+int cma_init(void) {
+  __DEBUG("Opening \"/dev/" DRIVER_NODE_NAME "\" file\n");
+
+  cma_fd = open("/dev/" DRIVER_NODE_NAME, O_RDWR);
+  if (cma_fd == -1) {
+    __DEBUG("Failed to initialize api - \"%s\"\n", strerror(errno));
+    return -1;
+  }
+
+  return 0;
+}
+
+int cma_release(void) {
+  __DEBUG("Closing \"/dev/" DRIVER_NODE_NAME "\" file\n");
+
+  if (close(cma_fd) == -1) {
+    __DEBUG("Failed to finilize api - \"%s\"\n", strerror(errno));
+    return -1;
+  }
+
+  return 0;
+}
+
+void *cma_alloc_cached(size_t size) {
+  return cma_alloc(size, CMA_ALLOC_CACHED);
+}
+
+void *cma_alloc_noncached(size_t size) {
+  return cma_alloc(size, CMA_ALLOC_NONCACHED);
+}
+
+int cma_free(void *mem) {
+  __DEBUG("Releasing contigous memory from 0x%x\n", (unsigned)mem);
+  unsigned data, v_addr;
+
+  /* save user space pointer value */
+  data   = (unsigned)mem;
+  v_addr = (unsigned)mem;
+
+  if ( ioctl(cma_fd, CMA_GET_SIZE, &data) == -1 ) {
+    __DEBUG("cma_free - ioctl command unsuccsessful - 0\n");
+    return -1;
+  }
+  /* data now contains size */
+
+  /* unmap memory */
+  munmap(mem, data);
+
+  /* free cma entry */
+  if ( ioctl(cma_fd, CMA_FREE, &v_addr) == -1 ) {
+    __DEBUG("cma_free - ioctl command unsuccsessful - 1\n");
+    return -1;
+  }
+
+  return 0;
+}
+
+unsigned cma_get_phy_addr(void *mem) {
+  unsigned data;
+  __DEBUG("Getting physical address from 0x%x\n", (unsigned)mem);
+
+  /* save user space pointer value */
+  data = (unsigned)mem;
+
+  /* get physical address */
+  if ( ioctl(cma_fd, CMA_GET_PHY_ADDR, &data) == -1 ) {
+    __DEBUG("cma_free - ioctl command unsuccsessful\n");
+    return 0;
+  }
+  /* data now contains physical address */
+
+  return data;
+}
+
+
+void *cma_alloc(size_t size, unsigned ioctl_cmd) {
+  unsigned data;
+  void   *mem;
+  __DEBUG("Allocating 0x%x bytes of contigous memory\n", size);
+
+  /* Page align size */
+  size = ROUND_UP(size, getpagesize());
+
+  /* ioctl cmd to allocate contigous memory */
+  data = (unsigned)size;
+  if ( ioctl(cma_fd, ioctl_cmd, &data) == -1 ) {
+    __DEBUG("cma_alloc - ioctl command unsuccsessful\n");
+    return NULL;
+  }
+
+  /* at this point phy_addr is written to data */
+
+  /* mmap memory */
+  mem = mmap(NULL, size, PROT_WRITE | PROT_READ, MAP_SHARED, cma_fd, data);
+  if (mem == MAP_FAILED) {
+    __DEBUG("cma_alloc - mmap unsuccsessful\n");
+    return NULL;
+  }
+
+  return mem;
+}
diff --git a/vta/src/de10nano/cma_api.h b/vta/src/de10nano/cma_api.h
new file mode 100644
index 000000000000..f20939d52b63
--- /dev/null
+++ b/vta/src/de10nano/cma_api.h
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ * \file cma_api.h
+ * \brief API for contigous memory allocation driver.
+ */
+
+#ifndef VTA_DE10NANO_CMA_API_H_
+#define VTA_DE10NANO_CMA_API_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * \brief Initialize CMA api (basically perform open() syscall).
+ *
+ * \return Returns 0 on SUCCESS. On FAILURE returns -1 and errno is set
+ * accordingly.
+ */
+int cma_init(void);
+
+
+/**
+ * \brief Release CMA api (basically perform close() syscall).
+ *
+ * \return Returns 0 on SUCCESS. On FAILURE returns -1 and errno is set
+ * accordingly.
+ */
+int cma_release(void);
+
+
+/**
+ * \brief Allocate cached, physically contigous memory.
+ *
+ * \param size Size in bytes.
+ *
+ * \return Returns NULL on FAILURE. Otherwise pointer to valid userspace
+ * memory.
+ */
+void *cma_alloc_cached(size_t size);
+
+
+/**
+ * \brief Allocate noncached, physically contigous memory.
+ *
+ * \param size Size in bytes.
+ *
+ * \return Returns NULL on FAILURE. Otherwise pointer to valid userspace
+ * memory.
+ */
+void *cma_alloc_noncached(size_t size);
+
+
+/**
+ * \brief Release physically contigous memory.
+ *
+ * \param mem Pointer to previously allocated contiguous memory.
+ *
+ * \return Returns 0 on SUCCESS, -1 on FAILURE.
+ */
+int cma_free(void *mem);
+
+
+/**
+ * \brief Get physical memory of cma memory block (should be used for DMA).
+ *
+ * \param mem Pointer to previously allocated contiguous memory.
+ *
+ * \return Returns address on SUCCESS, 0 on FAILURE.
+ */
+unsigned cma_get_phy_addr(void *mem);
+
+
+#ifdef __cplusplus
+}
+#endif
+#endif  // VTA_DE10NANO_CMA_API_H_
diff --git a/vta/src/de10nano/de10nano_driver.cc b/vta/src/de10nano/de10nano_driver.cc
new file mode 100644
index 000000000000..97607f536051
--- /dev/null
+++ b/vta/src/de10nano/de10nano_driver.cc
@@ -0,0 +1,154 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ * \file de10-nano_driver.cc
+ * \brief VTA driver for DE10_Nano board.
+ */
+
+#include "de10nano_driver.h"
+
+#include <string.h>
+#include <vta/driver.h>
+#include <dmlc/logging.h>
+#include <thread>
+#include "cma_api.h"
+
+void* VTAMemAlloc(size_t size, int cached) {
+  static int _ = cma_init(); (void)_;
+  if (cached) {
+    return cma_alloc_cached(size);
+  } else {
+    return cma_alloc_noncached(size);
+  }
+}
+
+void VTAMemFree(void* buf) {
+  cma_free(buf);
+}
+
+vta_phy_addr_t VTAMemGetPhyAddr(void* buf) {
+  return cma_get_phy_addr(buf) + 0x80000000;
+}
+
+void VTAMemCopyFromHost(void* dst, const void* src, size_t size) {
+  // For SoC-based FPGAs that used shared memory with the CPU, use memcopy()
+  memcpy(dst, src, size);
+}
+
+void VTAMemCopyToHost(void* dst, const void* src, size_t size) {
+  // For SoC-based FPGAs that used shared memory with the CPU, use memcopy()
+  memcpy(dst, src, size);
+}
+
+void VTAFlushCache(void * offset, vta_phy_addr_t buf, int size) {
+  CHECK(false) << "VTAFlushCache not implemented for de10nano";
+  printf("VTAFlushCache not implemented for de10nano");
+}
+
+void VTAInvalidateCache(void * offset, vta_phy_addr_t buf, int size) {
+  CHECK(false) << "VTAInvalidateCache not implemented for de10nano";
+  printf("VTAInvalidateCache not implemented for de10nano");
+}
+
+void *VTAMapRegister(uint32_t addr) {
+  // Align the base address with the pages
+  uint32_t virt_base = addr & ~(getpagesize() - 1);
+  // Calculate base address offset w.r.t the base address
+  uint32_t virt_offset = addr - virt_base;
+  // Open file and mmap
+  uint32_t mmap_file = open("/dev/mem", O_RDWR|O_SYNC);
+  return mmap(NULL,
+              (VTA_IP_REG_MAP_RANGE + virt_offset),
+              PROT_READ|PROT_WRITE,
+              MAP_SHARED,
+              mmap_file,
+              virt_base);
+}
+
+void VTAUnmapRegister(void *vta) {
+  // Unmap memory
+  int status = munmap(vta, VTA_IP_REG_MAP_RANGE);
+  assert(status == 0);
+}
+
+void VTAWriteMappedReg(void* base_addr, uint32_t offset, uint32_t val) {
+  *((volatile uint32_t *) (reinterpret_cast<char *>(base_addr) + offset)) = val;
+}
+
+uint32_t VTAReadMappedReg(void* base_addr, uint32_t offset) {
+  return *((volatile uint32_t *) (reinterpret_cast<char *>(base_addr) + offset));
+}
+
+class VTADevice {
+ public:
+  VTADevice() {
+    // VTA stage handles
+    vta_host_handle_ = VTAMapRegister(VTA_HOST_ADDR);
+  }
+
+  ~VTADevice() {
+    // Close VTA stage handle
+    VTAUnmapRegister(vta_host_handle_);
+  }
+
+  int Run(vta_phy_addr_t insn_phy_addr,
+          uint32_t insn_count,
+          uint32_t wait_cycles) {
+    VTAWriteMappedReg(vta_host_handle_, 0x04, 0);
+    VTAWriteMappedReg(vta_host_handle_, 0x08, insn_count);
+    VTAWriteMappedReg(vta_host_handle_, 0x0c, insn_phy_addr);
+
+    // VTA start
+    VTAWriteMappedReg(vta_host_handle_, 0x0, VTA_START);
+
+    // Loop until the VTA is done
+    unsigned t, flag = 0;
+    for (t = 0; t < wait_cycles; ++t) {
+      flag = VTAReadMappedReg(vta_host_handle_, 0x00);
+      flag &= 0x2;
+      if (flag == 0x2) break;
+      std::this_thread::yield();
+    }
+    // Report error if timeout
+    return t < wait_cycles ? 0 : 1;
+  }
+
+ private:
+  // VTA handles (register maps)
+  void* vta_host_handle_{nullptr};
+};
+
+VTADeviceHandle VTADeviceAlloc() {
+  return new VTADevice();
+}
+
+void VTADeviceFree(VTADeviceHandle handle) {
+  delete static_cast<VTADevice*>(handle);
+}
+
+int VTADeviceRun(VTADeviceHandle handle,
+                 vta_phy_addr_t insn_phy_addr,
+                 uint32_t insn_count,
+                 uint32_t wait_cycles) {
+  return static_cast<VTADevice*>(handle)->Run(
+      insn_phy_addr, insn_count, wait_cycles);
+}
+
+void VTAProgram(const char* bitstream) {
+  CHECK(false) << "VTAProgram not implemented for de10nano";
+}
diff --git a/vta/src/de10nano/de10nano_driver.h b/vta/src/de10nano/de10nano_driver.h
new file mode 100644
index 000000000000..0009e7574b02
--- /dev/null
+++ b/vta/src/de10nano/de10nano_driver.h
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ * \file de10-nano_driver.h
+ * \brief VTA driver for DE10_Nano board.
+ */
+
+#ifndef VTA_DE10NANO_DE10NANO_DRIVER_H_
+#define VTA_DE10NANO_DE10NANO_DRIVER_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+#include <assert.h>
+#include <fcntl.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+
+void *VTAMapRegister(uint32_t addr);
+void VTAUnmapRegister(void *vta);
+void VTAWriteMappedReg(void* base_addr, uint32_t offset, uint32_t val);
+uint32_t VTAReadMappedReg(void* base_addr, uint32_t offset);
+void VTAProgram(const char* bitstream);
+
+/*! \brief VTA configuration register address range */
+#define VTA_RANGE 0x400
+/*! \brief VTA configuration register start value */
+#define VTA_START 0x1
+/*! \brief VTA configuration register auto-restart value */
+#define VTA_AUTORESTART 0x81
+/*! \brief VTA configuration register done value */
+#define VTA_DONE 0x2
+
+/*! \brief VTA fetch stage configuration register address
+*/
+#define VTA_HOST_ADDR    0xFF220000
+
+#ifdef __cplusplus
+}
+#endif
+#endif  // VTA_DE10NANO_DE10NANO_DRIVER_H_