Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Revert PR 17767 for fixing GPU memory usage regression #18283

Merged
merged 2 commits into from
May 13, 2020
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 48 additions & 0 deletions 3rdparty/mshadow/mshadow/base.h
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,7 @@ extern "C" {
}

#include "./half.h"
#include "./half2.h"
#include "./bfloat.h"
#define MSHADOW_HALF_BF_OPERATOR(RTYPE, OP) \
MSHADOW_XINLINE RTYPE operator OP(mshadow::half::half_t a, mshadow::bfloat::bf16_t b) { \
Expand Down Expand Up @@ -386,6 +387,11 @@ struct DataType<half::half_t> {
#endif
};
template<>
struct DataType<half::half2_t> {
static const int kFlag = kFloat16;
static const int kLanes = 2;
};
template<>
struct DataType<bfloat::bf16_t> {
static const int kFlag = kBfloat16;
static const int kLanes = 1;
Expand Down Expand Up @@ -1138,6 +1144,48 @@ struct minimum {
}
#endif

#define MSHADOW_TYPE_SWITCH_WITH_HALF2(type, DType, ...) \
switch (type) { \
case mshadow::kFloat32: \
{ \
typedef float DType; \
{__VA_ARGS__} \
} \
break; \
case mshadow::kFloat64: \
{ \
typedef double DType; \
{__VA_ARGS__} \
} \
break; \
case mshadow::kFloat16: \
{ \
typedef mshadow::half::half2_t DType; \
{__VA_ARGS__} \
} \
break; \
case mshadow::kUint8: \
{ \
typedef uint8_t DType; \
{__VA_ARGS__} \
} \
break; \
case mshadow::kInt32: \
{ \
typedef int32_t DType; \
{__VA_ARGS__} \
} \
break; \
case mshadow::kInt64: \
{ \
typedef int64_t DType; \
{__VA_ARGS__} \
} \
break; \
default: \
LOG(FATAL) << "Unknown type enum " << type; \
}

#define MSHADOW_SGL_DBL_TYPE_SWITCH(type, DType, ...) \
switch (type) { \
case mshadow::kFloat32: \
Expand Down
143 changes: 143 additions & 0 deletions 3rdparty/mshadow/mshadow/half2.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
/*!
* Copyright (c) 2017 by Contributors
* \file half2.h
* \brief definition of vector float16, half2 type.
*
* \author Antti-Pekka Hynninen
*/
#ifndef MSHADOW_HALF2_H_
#define MSHADOW_HALF2_H_

#if (defined(__CUDACC__) && __CUDA_ARCH__ >= 530 && MSHADOW_USE_CUDA && CUDA_VERSION >= 7050)
#define MSHADOW_CUDA_HALF2 1
#include <cuda_fp16.h>
#else
#define MSHADOW_CUDA_HALF2 0
#endif

#include<math.h>

/*! \brief namespace for mshadow */
namespace mshadow {
/* \brief name space for host/device portable half-precision floats */
namespace half {

#define MSHADOW_HALF2_ASSIGNOP(AOP, OP) \
template<typename T> \
MSHADOW_XINLINE half2_t operator AOP (const T& a) { \
return *this = half2_t(*this OP a); /* NOLINT(*)*/ \
} \

class MSHADOW_ALIGNED(4) half2_t {
public:
#if MSHADOW_CUDA_HALF2
half2 half2_;
#else
half_t half_t2[2];
#endif

MSHADOW_XINLINE half2_t() {}

#if MSHADOW_CUDA_HALF2
MSHADOW_XINLINE explicit half2_t(half2 a) : half2_(a) {}
#else
MSHADOW_XINLINE explicit half2_t(half_t a, half_t b) {
half_t2[0] = a;
half_t2[1] = b;
}
#endif

MSHADOW_XINLINE explicit half2_t(int a) {
#if MSHADOW_CUDA_HALF2
half2_ = __half2half2(__int2half_rz(a));
#else
half_t2[0] = (half_t)a;
half_t2[1] = (half_t)a;
#endif
}

MSHADOW_XINLINE half2_t operator+() {
return *this;
}

MSHADOW_XINLINE half2_t operator-() {
#if MSHADOW_CUDA_HALF2
return half2_t(__hneg2(half2_));
#else
return half2_t(-half_t2[0], -half_t2[1]);
#endif
}

MSHADOW_XINLINE half2_t operator=(const half2_t& a) {
#if MSHADOW_CUDA_HALF2
half2_ = a.half2_;
#else
half_t2[0] = a.half_t2[0];
half_t2[1] = a.half_t2[1];
#endif
return a;
}

MSHADOW_HALF2_ASSIGNOP(+=, +)
MSHADOW_HALF2_ASSIGNOP(-=, -)
MSHADOW_HALF2_ASSIGNOP(*=, *)
MSHADOW_HALF2_ASSIGNOP(/=, /)
};

/*! \brief overloaded + operator for half2_t */
MSHADOW_XINLINE half2_t operator+(half2_t a, half2_t b) {
#if MSHADOW_CUDA_HALF2
return half2_t(__floats2half2_rn(__low2float(a.half2_) + __low2float(b.half2_),
__high2float(a.half2_) + __high2float(b.half2_)));
#else
return half2_t(a.half_t2[0] + b.half_t2[0], a.half_t2[1] + b.half_t2[1]);
#endif
}
/*! \brief overloaded - operator for half2_t */
MSHADOW_XINLINE half2_t operator-(half2_t a, half2_t b) {
#if MSHADOW_CUDA_HALF2
return half2_t(__floats2half2_rn(__low2float(a.half2_) - __low2float(b.half2_),
__high2float(a.half2_) - __high2float(b.half2_)));
#else
return half2_t(a.half_t2[0] - b.half_t2[0], a.half_t2[1] - b.half_t2[1]);
#endif
}
/*! \brief overloaded * operator for half2_t */
MSHADOW_XINLINE half2_t operator*(half2_t a, half2_t b) {
#if MSHADOW_CUDA_HALF2
return half2_t(__floats2half2_rn(__low2float(a.half2_) * __low2float(b.half2_),
__high2float(a.half2_) * __high2float(b.half2_)));
#else
return half2_t(a.half_t2[0] * b.half_t2[0], a.half_t2[1] * b.half_t2[1]);
#endif
}
/*! \brief overloaded / operator for half2_t */
MSHADOW_XINLINE half2_t operator/(half2_t a, half2_t b) {
#if MSHADOW_CUDA_HALF2
return half2_t(__floats2half2_rn(__low2float(a.half2_) / __low2float(b.half2_),
__high2float(a.half2_) / __high2float(b.half2_)));
#else
return half2_t(a.half_t2[0] / b.half_t2[0], a.half_t2[1] / b.half_t2[1]);
#endif
}
/*! \brief overloaded % operator for half2_t */
MSHADOW_XINLINE half2_t operator%(half2_t a, half2_t b) {
#if MSHADOW_CUDA_HALF2
return half2_t(__floats2half2_rn(::fmod(__low2float(a.half2_), __low2float(b.half2_)),
::fmod(__high2float(a.half2_), __high2float(b.half2_))));
#else
return half2_t(::fmod(a.half_t2[0], b.half_t2[0]), ::fmod(a.half_t2[1], b.half_t2[1]));
#endif
}
/*! \brief overloaded == operator for half2_t */
MSHADOW_XINLINE bool operator==(half2_t a, half2_t b) {
#if MSHADOW_CUDA_HALF2
return __hbeq2(a.half2_, b.half2_);
#else
return (a.half_t2[0] == b.half_t2[0] && a.half_t2[1] == b.half_t2[1]);
#endif
}

} // namespace half
} // namespace mshadow
#endif // MSHADOW_HALF2_H_
Loading