Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merge master to spop branch #4

Merged
merged 30 commits into from
May 12, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
e54e760
fix restructured text (#5541)
tmoreau89 May 8, 2020
95540d2
[CRT]fix to reduce RAM size during loading model (#5507)
siju-samuel May 8, 2020
15a4218
Load platform specific lib for tvmdsoop instead of only so (#5542)
tobegit3hub May 8, 2020
2175f6b
[RPC] Improve RPCServer AsyncIO support. (#5544)
tqchen May 8, 2020
aded92d
[Rust] Add first stage of updating and rewriting Rust bindings. (#5526)
jroesch May 8, 2020
06efa7f
[TE] Fix MakeLoopNest for warp memory (#5382)
roastduck May 9, 2020
ae7e0a1
[TIR][Printer] text format printer considering future parsing use (#5…
spectrometerHBH May 9, 2020
64c6795
[Optimization] Warp level reduction support for CUDA (#5498)
wpan11nv May 9, 2020
f939c61
A clone of test/python/unittest/test_runtime_micro.py, however (#5546)
tom-gall May 9, 2020
47ea99c
[CI] Install wasmtime for WebAssembly tests (#5494)
kazum May 9, 2020
0c43fa0
Apparently, ONNX Conv with no 'pads' defaults to zero padding (#5548)
May 9, 2020
cdc7ae4
[WEB] WebGPU support (#5545)
tqchen May 9, 2020
28057b8
[TOPI][RELAY][TENSORFLOW]Math ops added (#5502)
siju-samuel May 11, 2020
29ae608
[RUNTIME] Hexagon driver for offloading kernels to simulator (#5492)
May 11, 2020
7bad56b
[LINT] clang-format the h,cc,m files. (#5557)
tqchen May 11, 2020
0e87752
[BYOC, MergeComposite] Add additional check before re-using the cache…
masahi May 11, 2020
de866b0
[WEB] Setup lint, doc, test (#5556)
tqchen May 11, 2020
a4eae2b
[CI] Update ci-cpu to bionic (#5555)
tqchen May 11, 2020
4196726
[CI] Update ci-cpu to bionic (#5554)
tqchen May 11, 2020
7f5e37a
[Fix] Fix conv2d alter op for arm cpu (#5532)
icemelon May 11, 2020
621a61d
[FRONTEND]onnx, mxnet, pytorch mathops added (#5561)
siju-samuel May 11, 2020
ad2ee97
Fix topi test for tensorcore (#5563)
May 11, 2020
1a0f44d
[Refactor][std::string --> String] IR is updated with String (#5547)
May 11, 2020
39b2db4
[DOCKER] Fix vulkansdk in the ci-gpu (#5566)
tqchen May 11, 2020
5cd8bb3
[CI] reintroduce docker stage for wasm tests (#5565)
tqchen May 11, 2020
4d713d6
[CI] Update ci-lint to use the latest image that contains clang-forma…
tqchen May 12, 2020
5428b54
[DOCKER] Add clang-format and nodejs to ci-lint (#5567)
tqchen May 12, 2020
579da6b
[TARGET] Phase out WebGL (#5570)
tqchen May 12, 2020
46787c8
[LINT] Enable clang-format. (#5572)
tqchen May 12, 2020
ec2eb89
Merge pull request #3 from apache/master
deepakbabel23 May 12, 2020
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
4 changes: 2 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ __pycache__/
# C extensions
*.so
*.ll

.npm
# Distribution / packaging
.Python
env/
Expand Down Expand Up @@ -225,7 +225,7 @@ Pipfile.lock
# conda package artifacts
conda/Dockerfile.cuda*
conda/pkg

.node_repl_history
# nix files
.envrc
*.nix
Expand Down
4 changes: 2 additions & 2 deletions 3rdparty/bfloat16/bfloat16.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
==============================================================================*/

#include <tvm/runtime/c_runtime_api.h>

#include <cstddef>
#include <cstdint>

Expand Down Expand Up @@ -50,8 +51,7 @@ void BFloat16ToFloat(const uint16_t* src, float* dst, size_t size) {
#endif
}

void BFloat16Add(const uint16_t* a, const uint16_t* b, uint16_t* dst,
size_t size) {
void BFloat16Add(const uint16_t* a, const uint16_t* b, uint16_t* dst, size_t size) {
float a_f, b_f;
BFloat16ToFloat(a, &a_f, 1);
BFloat16ToFloat(b, &b_f, 1);
Expand Down
17 changes: 7 additions & 10 deletions 3rdparty/cma/cma.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,20 +27,17 @@
#ifndef VTA_DE10_NANO_KERNEL_MODULE_CMA_H_
#define VTA_DE10_NANO_KERNEL_MODULE_CMA_H_


/* Should be defined in settings.mk file */
#ifndef CMA_IOCTL_MAGIC
#define CMA_IOCTL_MAGIC 0xf2
#define CMA_IOCTL_MAGIC 0xf2
#endif

#define CMA_ALLOC_CACHED _IOC(_IOC_WRITE | _IOC_READ, CMA_IOCTL_MAGIC, 1, 4)
#define CMA_ALLOC_NONCACHED _IOC(_IOC_WRITE | _IOC_READ, CMA_IOCTL_MAGIC, 2, 4)
#define CMA_FREE _IOC(_IOC_WRITE, CMA_IOCTL_MAGIC, 3, 4)
#define CMA_GET_PHY_ADDR _IOC(_IOC_WRITE | _IOC_READ, CMA_IOCTL_MAGIC, 4, 4)
#define CMA_GET_SIZE _IOC(_IOC_WRITE | _IOC_READ, CMA_IOCTL_MAGIC, 5, 4)

#define CMA_ALLOC_CACHED _IOC(_IOC_WRITE|_IOC_READ, CMA_IOCTL_MAGIC, 1, 4)
#define CMA_ALLOC_NONCACHED _IOC(_IOC_WRITE|_IOC_READ, CMA_IOCTL_MAGIC, 2, 4)
#define CMA_FREE _IOC(_IOC_WRITE, CMA_IOCTL_MAGIC, 3, 4)
#define CMA_GET_PHY_ADDR _IOC(_IOC_WRITE|_IOC_READ, CMA_IOCTL_MAGIC, 4, 4)
#define CMA_GET_SIZE _IOC(_IOC_WRITE|_IOC_READ, CMA_IOCTL_MAGIC, 5, 4)

#define CMA_IOCTL_MAXNR 5

#define CMA_IOCTL_MAXNR 5

#endif // VTA_DE10_NANO_KERNEL_MODULE_CMA_H_
62 changes: 28 additions & 34 deletions 3rdparty/cma/cma_api_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,48 +30,47 @@
* \brief Application layer implementation for contigous memory allocation.
*/

#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <unistd.h>
#include <errno.h>
#include <string.h>
#include <sys/types.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <unistd.h>

#include "cma_api.h"

#ifndef CMA_IOCTL_MAGIC
#define CMA_IOCTL_MAGIC 0xf2
#define CMA_IOCTL_MAGIC 0xf2
#endif

#define CMA_ALLOC_CACHED _IOC(_IOC_WRITE|_IOC_READ, CMA_IOCTL_MAGIC, 1, 4)
#define CMA_ALLOC_NONCACHED _IOC(_IOC_WRITE|_IOC_READ, CMA_IOCTL_MAGIC, 2, 4)
#define CMA_FREE _IOC(_IOC_WRITE, CMA_IOCTL_MAGIC, 3, 4)
#define CMA_GET_PHY_ADDR _IOC(_IOC_WRITE|_IOC_READ, CMA_IOCTL_MAGIC, 4, 4)
#define CMA_GET_SIZE _IOC(_IOC_WRITE|_IOC_READ, CMA_IOCTL_MAGIC, 5, 4)
#define CMA_ALLOC_CACHED _IOC(_IOC_WRITE | _IOC_READ, CMA_IOCTL_MAGIC, 1, 4)
#define CMA_ALLOC_NONCACHED _IOC(_IOC_WRITE | _IOC_READ, CMA_IOCTL_MAGIC, 2, 4)
#define CMA_FREE _IOC(_IOC_WRITE, CMA_IOCTL_MAGIC, 3, 4)
#define CMA_GET_PHY_ADDR _IOC(_IOC_WRITE | _IOC_READ, CMA_IOCTL_MAGIC, 4, 4)
#define CMA_GET_SIZE _IOC(_IOC_WRITE | _IOC_READ, CMA_IOCTL_MAGIC, 5, 4)

#define CMA_IOCTL_MAXNR 5
#define CMA_IOCTL_MAXNR 5

#ifndef CMA_DEBUG
#define CMA_DEBUG 0
#define CMA_DEBUG 0
#endif
#ifndef DRIVER_NODE_NAME
#define DRIVER_NODE_NAME "cma"
#define DRIVER_NODE_NAME "cma"
#endif

#if CMA_DEBUG == 1
#define __DEBUG(fmt, args...) printf("CMA_API_DEBUG: " fmt, ##args)
#define __DEBUG(fmt, args...) printf("CMA_API_DEBUG: " fmt, ##args)
#else
#define __DEBUG(fmt, args...)
#define __DEBUG(fmt, args...)
#endif

#define ROUND_UP(N, S) ((((N) + (S) - 1) / (S)) * (S))

#define ROUND_UP(N, S) ((((N) + (S)-1) / (S)) * (S))

/* Private functions */
void *cma_alloc(size_t size, unsigned ioctl_cmd);
void* cma_alloc(size_t size, unsigned ioctl_cmd);

/* Global file descriptor */
int cma_fd = 0;
Expand Down Expand Up @@ -99,23 +98,19 @@ int cma_release(void) {
return 0;
}

void *cma_alloc_cached(size_t size) {
return cma_alloc(size, CMA_ALLOC_CACHED);
}
void* cma_alloc_cached(size_t size) { return cma_alloc(size, CMA_ALLOC_CACHED); }

void *cma_alloc_noncached(size_t size) {
return cma_alloc(size, CMA_ALLOC_NONCACHED);
}
void* cma_alloc_noncached(size_t size) { return cma_alloc(size, CMA_ALLOC_NONCACHED); }

int cma_free(void *mem) {
int cma_free(void* mem) {
__DEBUG("Releasing contigous memory from 0x%x\n", (unsigned)mem);
unsigned data, v_addr;

/* save user space pointer value */
data = (unsigned)mem;
data = (unsigned)mem;
v_addr = (unsigned)mem;

if ( ioctl(cma_fd, CMA_GET_SIZE, &data) == -1 ) {
if (ioctl(cma_fd, CMA_GET_SIZE, &data) == -1) {
__DEBUG("cma_free - ioctl command unsuccsessful - 0\n");
return -1;
}
Expand All @@ -125,23 +120,23 @@ int cma_free(void *mem) {
munmap(mem, data);

/* free cma entry */
if ( ioctl(cma_fd, CMA_FREE, &v_addr) == -1 ) {
if (ioctl(cma_fd, CMA_FREE, &v_addr) == -1) {
__DEBUG("cma_free - ioctl command unsuccsessful - 1\n");
return -1;
}

return 0;
}

unsigned cma_get_phy_addr(void *mem) {
unsigned cma_get_phy_addr(void* mem) {
unsigned data;
__DEBUG("Getting physical address from 0x%x\n", (unsigned)mem);

/* save user space pointer value */
data = (unsigned)mem;

/* get physical address */
if ( ioctl(cma_fd, CMA_GET_PHY_ADDR, &data) == -1 ) {
if (ioctl(cma_fd, CMA_GET_PHY_ADDR, &data) == -1) {
__DEBUG("cma_free - ioctl command unsuccsessful\n");
return 0;
}
Expand All @@ -150,18 +145,17 @@ unsigned cma_get_phy_addr(void *mem) {
return data;
}


void *cma_alloc(size_t size, unsigned ioctl_cmd) {
void* cma_alloc(size_t size, unsigned ioctl_cmd) {
unsigned data;
void *mem;
void* mem;
__DEBUG("Allocating 0x%x bytes of contigous memory\n", size);

/* Page align size */
size = ROUND_UP(size, getpagesize());

/* ioctl cmd to allocate contigous memory */
data = (unsigned)size;
if ( ioctl(cma_fd, ioctl_cmd, &data) == -1 ) {
if (ioctl(cma_fd, ioctl_cmd, &data) == -1) {
__DEBUG("cma_alloc - ioctl command unsuccsessful\n");
return NULL;
}
Expand Down
82 changes: 52 additions & 30 deletions 3rdparty/compiler-rt/builtin_fp16.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,16 +29,33 @@ static inline uint32_t __clz(uint32_t x) {
int n = 32;
uint32_t y;

y = x >>16; if (y) { n = n -16; x = y; }
y = x >> 8; if (y) { n = n - 8; x = y; }
y = x >> 4; if (y) { n = n - 4; x = y; }
y = x >> 2; if (y) { n = n - 2; x = y; }
y = x >> 1; if (y) return n - 2;
y = x >> 16;
if (y) {
n = n - 16;
x = y;
}
y = x >> 8;
if (y) {
n = n - 8;
x = y;
}
y = x >> 4;
if (y) {
n = n - 4;
x = y;
}
y = x >> 2;
if (y) {
n = n - 2;
x = y;
}
y = x >> 1;
if (y) return n - 2;
return n - x;
}

template <typename SRC_T, typename SRC_REP_T, int SRC_SIG_BITS,
typename DST_T, typename DST_REP_T, int DST_SIG_BITS>
template <typename SRC_T, typename SRC_REP_T, int SRC_SIG_BITS, typename DST_T, typename DST_REP_T,
int DST_SIG_BITS>
static inline DST_T __truncXfYf2__(SRC_T a) {
// Various constants whose values follow from the type parameters.
// Any reasonable optimizer will fold and propagate all of these.
Expand Down Expand Up @@ -71,7 +88,10 @@ static inline DST_T __truncXfYf2__(SRC_T a) {
const DST_REP_T dstNaNCode = dstQNaN - 1;

// Break a into a sign and representation of the absolute value
union SrcExchangeType { SRC_T f; SRC_REP_T i; };
union SrcExchangeType {
SRC_T f;
SRC_REP_T i;
};
SrcExchangeType src_rep;
src_rep.f = a;
const SRC_REP_T aRep = src_rep.i;
Expand All @@ -88,25 +108,21 @@ static inline DST_T __truncXfYf2__(SRC_T a) {

const SRC_REP_T roundBits = aAbs & roundMask;
// Round to nearest
if (roundBits > halfway)
absResult++;
// Ties to even
if (roundBits > halfway) absResult++;
// Ties to even
else if (roundBits == halfway)
absResult += absResult & 1;
}
else if (aAbs > srcInfinity) {
} else if (aAbs > srcInfinity) {
// a is NaN.
// Conjure the result by beginning with infinity, setting the qNaN
// bit and inserting the (truncated) trailing NaN field.
absResult = (DST_REP_T)dstInfExp << DST_SIG_BITS;
absResult |= dstQNaN;
absResult |= ((aAbs & srcNaNCode) >> (SRC_SIG_BITS - DST_SIG_BITS)) & dstNaNCode;
}
else if (aAbs >= overflow) {
} else if (aAbs >= overflow) {
// a overflows to infinity.
absResult = (DST_REP_T)dstInfExp << DST_SIG_BITS;
}
else {
} else {
// a underflows on conversion to the destination type or is an exact
// zero. The result may be a denormal or zero. Extract the exponent
// to get the shift amount for the denormalization.
Expand All @@ -124,24 +140,26 @@ static inline DST_T __truncXfYf2__(SRC_T a) {
absResult = denormalizedSignificand >> (SRC_SIG_BITS - DST_SIG_BITS);
const SRC_REP_T roundBits = denormalizedSignificand & roundMask;
// Round to nearest
if (roundBits > halfway)
absResult++;
// Ties to even
if (roundBits > halfway) absResult++;
// Ties to even
else if (roundBits == halfway)
absResult += absResult & 1;
}
}

// Apply the signbit to (DST_T)abs(a).
const DST_REP_T result = absResult | sign >> (srcBits - dstBits);
union DstExchangeType { DST_T f; DST_REP_T i; };
union DstExchangeType {
DST_T f;
DST_REP_T i;
};
DstExchangeType dst_rep;
dst_rep.i = result;
return dst_rep.f;
}

template<typename SRC_T, typename SRC_REP_T, int SRC_SIG_BITS,
typename DST_T, typename DST_REP_T, int DST_SIG_BITS>
template <typename SRC_T, typename SRC_REP_T, int SRC_SIG_BITS, typename DST_T, typename DST_REP_T,
int DST_SIG_BITS>
static inline DST_T __extendXfYf2__(SRC_T a) {
// Various constants whose values follow from the type parameters.
// Any reasonable optimizer will fold and propagate all of these.
Expand All @@ -157,15 +175,18 @@ static inline DST_T __extendXfYf2__(SRC_T a) {
const SRC_REP_T srcQNaN = SRC_REP_T(1) << (SRC_SIG_BITS - 1);
const SRC_REP_T srcNaNCode = srcQNaN - 1;

const int dstBits = sizeof(DST_T)*8;
const int dstBits = sizeof(DST_T) * 8;
const int dstExpBits = dstBits - DST_SIG_BITS - 1;
const int dstInfExp = (1 << dstExpBits) - 1;
const int dstExpBias = dstInfExp >> 1;

const DST_REP_T dstMinNormal = DST_REP_T(1) << DST_SIG_BITS;

// Break a into a sign and representation of the absolute value
union SrcExchangeType { SRC_T f; SRC_REP_T i; };
union SrcExchangeType {
SRC_T f;
SRC_REP_T i;
};
SrcExchangeType src_rep;
src_rep.f = a;
const SRC_REP_T aRep = src_rep.i;
Expand All @@ -191,8 +212,7 @@ static inline DST_T __extendXfYf2__(SRC_T a) {
absResult = (DST_REP_T)dstInfExp << DST_SIG_BITS;
absResult |= (DST_REP_T)(aAbs & srcQNaN) << (DST_SIG_BITS - SRC_SIG_BITS);
absResult |= (DST_REP_T)(aAbs & srcNaNCode) << (DST_SIG_BITS - SRC_SIG_BITS);
}
else if (aAbs) {
} else if (aAbs) {
// a is denormal.
// renormalize the significand and clear the leading bit, then insert
// the correct adjusted exponent in the destination type.
Expand All @@ -201,15 +221,17 @@ static inline DST_T __extendXfYf2__(SRC_T a) {
absResult ^= dstMinNormal;
const int resultExponent = dstExpBias - srcExpBias - scale + 1;
absResult |= (DST_REP_T)resultExponent << DST_SIG_BITS;
}
else {
} else {
// a is zero.
absResult = 0;
}

// Apply the signbit to (DST_T)abs(a).
const DST_REP_T result = absResult | (DST_REP_T)sign << (dstBits - srcBits);
union DstExchangeType { DST_T f; DST_REP_T i; };
union DstExchangeType {
DST_T f;
DST_REP_T i;
};
DstExchangeType dst_rep;
dst_rep.i = result;
return dst_rep.f;
Expand Down
2 changes: 0 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ endif()
tvm_option(USE_CUDA "Build with CUDA" OFF)
tvm_option(USE_OPENCL "Build with OpenCL" OFF)
tvm_option(USE_VULKAN "Build with Vulkan" OFF)
tvm_option(USE_OPENGL "Build with OpenGL" OFF)
tvm_option(USE_METAL "Build with Metal" OFF)
tvm_option(USE_ROCM "Build with ROCM" OFF)
tvm_option(ROCM_PATH "The path to rocm" /opt/rocm)
Expand Down Expand Up @@ -308,7 +307,6 @@ include(cmake/modules/VTA.cmake)
include(cmake/modules/CUDA.cmake)
include(cmake/modules/Hexagon.cmake)
include(cmake/modules/OpenCL.cmake)
include(cmake/modules/OpenGL.cmake)
include(cmake/modules/OpenMP.cmake)
include(cmake/modules/Vulkan.cmake)
include(cmake/modules/Metal.cmake)
Expand Down
Loading