Skip to content

Commit

Permalink
Revert "Sync with apache/incubator-tvm 6/15/2020 (#116)"
Browse files Browse the repository at this point in the history
This reverts commit c3c1472.
  • Loading branch information
Trevor Morris authored and trevor-m committed Jun 18, 2020
1 parent c3c1472 commit 4e01034
Show file tree
Hide file tree
Showing 1,434 changed files with 55,547 additions and 82,066 deletions.
7 changes: 3 additions & 4 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,10 @@
__pycache__/
*.py[cod]
*$py.class
*.S

# C extensions
*.so
*.ll
.npm

# Distribution / packaging
.Python
env/
Expand Down Expand Up @@ -225,7 +224,7 @@ Pipfile.lock
# conda package artifacts
conda/Dockerfile.cuda*
conda/pkg
.node_repl_history

# nix files
.envrc
*.nix
Expand Down
4 changes: 2 additions & 2 deletions 3rdparty/bfloat16/bfloat16.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
==============================================================================*/

#include <tvm/runtime/c_runtime_api.h>

#include <cstddef>
#include <cstdint>

Expand Down Expand Up @@ -51,7 +50,8 @@ void BFloat16ToFloat(const uint16_t* src, float* dst, size_t size) {
#endif
}

void BFloat16Add(const uint16_t* a, const uint16_t* b, uint16_t* dst, size_t size) {
void BFloat16Add(const uint16_t* a, const uint16_t* b, uint16_t* dst,
size_t size) {
float a_f, b_f;
BFloat16ToFloat(a, &a_f, 1);
BFloat16ToFloat(b, &b_f, 1);
Expand Down
17 changes: 10 additions & 7 deletions 3rdparty/cma/cma.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,17 +27,20 @@
#ifndef VTA_DE10_NANO_KERNEL_MODULE_CMA_H_
#define VTA_DE10_NANO_KERNEL_MODULE_CMA_H_


/* Should be defined in settings.mk file */
#ifndef CMA_IOCTL_MAGIC
#define CMA_IOCTL_MAGIC 0xf2
#define CMA_IOCTL_MAGIC 0xf2
#endif

#define CMA_ALLOC_CACHED _IOC(_IOC_WRITE | _IOC_READ, CMA_IOCTL_MAGIC, 1, 4)
#define CMA_ALLOC_NONCACHED _IOC(_IOC_WRITE | _IOC_READ, CMA_IOCTL_MAGIC, 2, 4)
#define CMA_FREE _IOC(_IOC_WRITE, CMA_IOCTL_MAGIC, 3, 4)
#define CMA_GET_PHY_ADDR _IOC(_IOC_WRITE | _IOC_READ, CMA_IOCTL_MAGIC, 4, 4)
#define CMA_GET_SIZE _IOC(_IOC_WRITE | _IOC_READ, CMA_IOCTL_MAGIC, 5, 4)

#define CMA_IOCTL_MAXNR 5
#define CMA_ALLOC_CACHED _IOC(_IOC_WRITE|_IOC_READ, CMA_IOCTL_MAGIC, 1, 4)
#define CMA_ALLOC_NONCACHED _IOC(_IOC_WRITE|_IOC_READ, CMA_IOCTL_MAGIC, 2, 4)
#define CMA_FREE _IOC(_IOC_WRITE, CMA_IOCTL_MAGIC, 3, 4)
#define CMA_GET_PHY_ADDR _IOC(_IOC_WRITE|_IOC_READ, CMA_IOCTL_MAGIC, 4, 4)
#define CMA_GET_SIZE _IOC(_IOC_WRITE|_IOC_READ, CMA_IOCTL_MAGIC, 5, 4)

#define CMA_IOCTL_MAXNR 5


#endif // VTA_DE10_NANO_KERNEL_MODULE_CMA_H_
62 changes: 34 additions & 28 deletions 3rdparty/cma/cma_api_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,47 +30,48 @@
* \brief Application layer implementation for contigous memory allocation.
*/

#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <unistd.h>
#include <errno.h>
#include <string.h>
#include <sys/types.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <unistd.h>

#include "cma_api.h"

#ifndef CMA_IOCTL_MAGIC
#define CMA_IOCTL_MAGIC 0xf2
#define CMA_IOCTL_MAGIC 0xf2
#endif

#define CMA_ALLOC_CACHED _IOC(_IOC_WRITE | _IOC_READ, CMA_IOCTL_MAGIC, 1, 4)
#define CMA_ALLOC_NONCACHED _IOC(_IOC_WRITE | _IOC_READ, CMA_IOCTL_MAGIC, 2, 4)
#define CMA_FREE _IOC(_IOC_WRITE, CMA_IOCTL_MAGIC, 3, 4)
#define CMA_GET_PHY_ADDR _IOC(_IOC_WRITE | _IOC_READ, CMA_IOCTL_MAGIC, 4, 4)
#define CMA_GET_SIZE _IOC(_IOC_WRITE | _IOC_READ, CMA_IOCTL_MAGIC, 5, 4)
#define CMA_ALLOC_CACHED _IOC(_IOC_WRITE|_IOC_READ, CMA_IOCTL_MAGIC, 1, 4)
#define CMA_ALLOC_NONCACHED _IOC(_IOC_WRITE|_IOC_READ, CMA_IOCTL_MAGIC, 2, 4)
#define CMA_FREE _IOC(_IOC_WRITE, CMA_IOCTL_MAGIC, 3, 4)
#define CMA_GET_PHY_ADDR _IOC(_IOC_WRITE|_IOC_READ, CMA_IOCTL_MAGIC, 4, 4)
#define CMA_GET_SIZE _IOC(_IOC_WRITE|_IOC_READ, CMA_IOCTL_MAGIC, 5, 4)

#define CMA_IOCTL_MAXNR 5
#define CMA_IOCTL_MAXNR 5

#ifndef CMA_DEBUG
#define CMA_DEBUG 0
#define CMA_DEBUG 0
#endif
#ifndef DRIVER_NODE_NAME
#define DRIVER_NODE_NAME "cma"
#define DRIVER_NODE_NAME "cma"
#endif

#if CMA_DEBUG == 1
#define __DEBUG(fmt, args...) printf("CMA_API_DEBUG: " fmt, ##args)
#define __DEBUG(fmt, args...) printf("CMA_API_DEBUG: " fmt, ##args)
#else
#define __DEBUG(fmt, args...)
#define __DEBUG(fmt, args...)
#endif

#define ROUND_UP(N, S) ((((N) + (S)-1) / (S)) * (S))
#define ROUND_UP(N, S) ((((N) + (S) - 1) / (S)) * (S))


/* Private functions */
void* cma_alloc(size_t size, unsigned ioctl_cmd);
void *cma_alloc(size_t size, unsigned ioctl_cmd);

/* Global file descriptor */
int cma_fd = 0;
Expand Down Expand Up @@ -98,19 +99,23 @@ int cma_release(void) {
return 0;
}

void* cma_alloc_cached(size_t size) { return cma_alloc(size, CMA_ALLOC_CACHED); }
void *cma_alloc_cached(size_t size) {
return cma_alloc(size, CMA_ALLOC_CACHED);
}

void* cma_alloc_noncached(size_t size) { return cma_alloc(size, CMA_ALLOC_NONCACHED); }
void *cma_alloc_noncached(size_t size) {
return cma_alloc(size, CMA_ALLOC_NONCACHED);
}

int cma_free(void* mem) {
int cma_free(void *mem) {
__DEBUG("Releasing contigous memory from 0x%x\n", (unsigned)mem);
unsigned data, v_addr;

/* save user space pointer value */
data = (unsigned)mem;
data = (unsigned)mem;
v_addr = (unsigned)mem;

if (ioctl(cma_fd, CMA_GET_SIZE, &data) == -1) {
if ( ioctl(cma_fd, CMA_GET_SIZE, &data) == -1 ) {
__DEBUG("cma_free - ioctl command unsuccsessful - 0\n");
return -1;
}
Expand All @@ -120,23 +125,23 @@ int cma_free(void* mem) {
munmap(mem, data);

/* free cma entry */
if (ioctl(cma_fd, CMA_FREE, &v_addr) == -1) {
if ( ioctl(cma_fd, CMA_FREE, &v_addr) == -1 ) {
__DEBUG("cma_free - ioctl command unsuccsessful - 1\n");
return -1;
}

return 0;
}

unsigned cma_get_phy_addr(void* mem) {
unsigned cma_get_phy_addr(void *mem) {
unsigned data;
__DEBUG("Getting physical address from 0x%x\n", (unsigned)mem);

/* save user space pointer value */
data = (unsigned)mem;

/* get physical address */
if (ioctl(cma_fd, CMA_GET_PHY_ADDR, &data) == -1) {
if ( ioctl(cma_fd, CMA_GET_PHY_ADDR, &data) == -1 ) {
__DEBUG("cma_free - ioctl command unsuccsessful\n");
return 0;
}
Expand All @@ -145,17 +150,18 @@ unsigned cma_get_phy_addr(void* mem) {
return data;
}

void* cma_alloc(size_t size, unsigned ioctl_cmd) {

void *cma_alloc(size_t size, unsigned ioctl_cmd) {
unsigned data;
void* mem;
void *mem;
__DEBUG("Allocating 0x%x bytes of contigous memory\n", size);

/* Page align size */
size = ROUND_UP(size, getpagesize());

/* ioctl cmd to allocate contigous memory */
data = (unsigned)size;
if (ioctl(cma_fd, ioctl_cmd, &data) == -1) {
if ( ioctl(cma_fd, ioctl_cmd, &data) == -1 ) {
__DEBUG("cma_alloc - ioctl command unsuccsessful\n");
return NULL;
}
Expand Down
82 changes: 30 additions & 52 deletions 3rdparty/compiler-rt/builtin_fp16.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,33 +29,16 @@ static inline uint32_t __clz(uint32_t x) {
int n = 32;
uint32_t y;

y = x >> 16;
if (y) {
n = n - 16;
x = y;
}
y = x >> 8;
if (y) {
n = n - 8;
x = y;
}
y = x >> 4;
if (y) {
n = n - 4;
x = y;
}
y = x >> 2;
if (y) {
n = n - 2;
x = y;
}
y = x >> 1;
if (y) return n - 2;
y = x >>16; if (y) { n = n -16; x = y; }
y = x >> 8; if (y) { n = n - 8; x = y; }
y = x >> 4; if (y) { n = n - 4; x = y; }
y = x >> 2; if (y) { n = n - 2; x = y; }
y = x >> 1; if (y) return n - 2;
return n - x;
}

template <typename SRC_T, typename SRC_REP_T, int SRC_SIG_BITS, typename DST_T, typename DST_REP_T,
int DST_SIG_BITS>
template <typename SRC_T, typename SRC_REP_T, int SRC_SIG_BITS,
typename DST_T, typename DST_REP_T, int DST_SIG_BITS>
static inline DST_T __truncXfYf2__(SRC_T a) {
// Various constants whose values follow from the type parameters.
// Any reasonable optimizer will fold and propagate all of these.
Expand Down Expand Up @@ -88,10 +71,7 @@ static inline DST_T __truncXfYf2__(SRC_T a) {
const DST_REP_T dstNaNCode = dstQNaN - 1;

// Break a into a sign and representation of the absolute value
union SrcExchangeType {
SRC_T f;
SRC_REP_T i;
};
union SrcExchangeType { SRC_T f; SRC_REP_T i; };
SrcExchangeType src_rep;
src_rep.f = a;
const SRC_REP_T aRep = src_rep.i;
Expand All @@ -108,21 +88,25 @@ static inline DST_T __truncXfYf2__(SRC_T a) {

const SRC_REP_T roundBits = aAbs & roundMask;
// Round to nearest
if (roundBits > halfway) absResult++;
// Ties to even
if (roundBits > halfway)
absResult++;
// Ties to even
else if (roundBits == halfway)
absResult += absResult & 1;
} else if (aAbs > srcInfinity) {
}
else if (aAbs > srcInfinity) {
// a is NaN.
// Conjure the result by beginning with infinity, setting the qNaN
// bit and inserting the (truncated) trailing NaN field.
absResult = (DST_REP_T)dstInfExp << DST_SIG_BITS;
absResult |= dstQNaN;
absResult |= ((aAbs & srcNaNCode) >> (SRC_SIG_BITS - DST_SIG_BITS)) & dstNaNCode;
} else if (aAbs >= overflow) {
}
else if (aAbs >= overflow) {
// a overflows to infinity.
absResult = (DST_REP_T)dstInfExp << DST_SIG_BITS;
} else {
}
else {
// a underflows on conversion to the destination type or is an exact
// zero. The result may be a denormal or zero. Extract the exponent
// to get the shift amount for the denormalization.
Expand All @@ -140,26 +124,24 @@ static inline DST_T __truncXfYf2__(SRC_T a) {
absResult = denormalizedSignificand >> (SRC_SIG_BITS - DST_SIG_BITS);
const SRC_REP_T roundBits = denormalizedSignificand & roundMask;
// Round to nearest
if (roundBits > halfway) absResult++;
// Ties to even
if (roundBits > halfway)
absResult++;
// Ties to even
else if (roundBits == halfway)
absResult += absResult & 1;
}
}

// Apply the signbit to (DST_T)abs(a).
const DST_REP_T result = absResult | sign >> (srcBits - dstBits);
union DstExchangeType {
DST_T f;
DST_REP_T i;
};
union DstExchangeType { DST_T f; DST_REP_T i; };
DstExchangeType dst_rep;
dst_rep.i = result;
return dst_rep.f;
}

template <typename SRC_T, typename SRC_REP_T, int SRC_SIG_BITS, typename DST_T, typename DST_REP_T,
int DST_SIG_BITS>
template<typename SRC_T, typename SRC_REP_T, int SRC_SIG_BITS,
typename DST_T, typename DST_REP_T, int DST_SIG_BITS>
static inline DST_T __extendXfYf2__(SRC_T a) {
// Various constants whose values follow from the type parameters.
// Any reasonable optimizer will fold and propagate all of these.
Expand All @@ -175,18 +157,15 @@ static inline DST_T __extendXfYf2__(SRC_T a) {
const SRC_REP_T srcQNaN = SRC_REP_T(1) << (SRC_SIG_BITS - 1);
const SRC_REP_T srcNaNCode = srcQNaN - 1;

const int dstBits = sizeof(DST_T) * 8;
const int dstBits = sizeof(DST_T)*8;
const int dstExpBits = dstBits - DST_SIG_BITS - 1;
const int dstInfExp = (1 << dstExpBits) - 1;
const int dstExpBias = dstInfExp >> 1;

const DST_REP_T dstMinNormal = DST_REP_T(1) << DST_SIG_BITS;

// Break a into a sign and representation of the absolute value
union SrcExchangeType {
SRC_T f;
SRC_REP_T i;
};
union SrcExchangeType { SRC_T f; SRC_REP_T i; };
SrcExchangeType src_rep;
src_rep.f = a;
const SRC_REP_T aRep = src_rep.i;
Expand All @@ -212,7 +191,8 @@ static inline DST_T __extendXfYf2__(SRC_T a) {
absResult = (DST_REP_T)dstInfExp << DST_SIG_BITS;
absResult |= (DST_REP_T)(aAbs & srcQNaN) << (DST_SIG_BITS - SRC_SIG_BITS);
absResult |= (DST_REP_T)(aAbs & srcNaNCode) << (DST_SIG_BITS - SRC_SIG_BITS);
} else if (aAbs) {
}
else if (aAbs) {
// a is denormal.
// renormalize the significand and clear the leading bit, then insert
// the correct adjusted exponent in the destination type.
Expand All @@ -221,17 +201,15 @@ static inline DST_T __extendXfYf2__(SRC_T a) {
absResult ^= dstMinNormal;
const int resultExponent = dstExpBias - srcExpBias - scale + 1;
absResult |= (DST_REP_T)resultExponent << DST_SIG_BITS;
} else {
}
else {
// a is zero.
absResult = 0;
}

// Apply the signbit to (DST_T)abs(a).
const DST_REP_T result = absResult | (DST_REP_T)sign << (dstBits - srcBits);
union DstExchangeType {
DST_T f;
DST_REP_T i;
};
union DstExchangeType { DST_T f; DST_REP_T i; };
DstExchangeType dst_rep;
dst_rep.i = result;
return dst_rep.f;
Expand Down
Loading

0 comments on commit 4e01034

Please sign in to comment.