From fe394be02b97df48f03278c43e38e0d05ac028a0 Mon Sep 17 00:00:00 2001
From: Joe Evans <joeev@amazon.com>
Date: Thu, 12 Nov 2020 19:47:48 -0800
Subject: [PATCH] Don't use namespace for pow() function, since it is built
 into cuda math library, and cast the second argument so it will find an
 acceptable form.

Remove thrust library override and use default from cuda 11.0.

Fix lint.
---
 ci/build_windows.py                | 17 +----------------
 src/operator/contrib/multi_lamb.cu |  8 ++++----
 2 files changed, 5 insertions(+), 20 deletions(-)
diff --git a/ci/build_windows.py b/ci/build_windows.py
index c8d3af515b5a..3d9dbfc2d734 100755
--- a/ci/build_windows.py
+++ b/ci/build_windows.py
@@ -157,20 +157,6 @@ def windows_build(args):
     mxnet_root = get_mxnet_root()
     logging.info("Found MXNet root: {}".format(mxnet_root))
 
-    if 'GPU' in args.flavour:
-        # Get Thrust version to be shipped in Cuda 11, due to flakyness of
-        # older Thrust versions with MSVC 19 compiler
-        with remember_cwd():
-            tmpdirname = tempfile.mkdtemp()
-            os.chdir(tmpdirname)
-            r = requests.get('https://github.com/thrust/thrust/archive/1.9.8.zip', allow_redirects=True)
-            with open('thrust.zip', 'wb') as f:
-                f.write(r.content)
-            with zipfile.ZipFile('thrust.zip', 'r') as zip_ref:
-                zip_ref.extractall('.')
-            thrust_path = os.path.join(tmpdirname, "thrust-1.9.8")
-
-
     # cuda thrust / CUB + VS 2019 is flaky: try multiple times if fail
     MAXIMUM_TRY = 5
     build_try = 0
@@ -184,8 +170,7 @@ def windows_build(args):
             os.chdir(path)
             env = os.environ.copy()
             if 'GPU' in args.flavour:
-                env["CXXFLAGS"] = '/FS /MD /O2 /Ob2 /I {}'.format(thrust_path)
-                env["CUDAFLAGS"] = '-I {}'.format(thrust_path)
+                env["CXXFLAGS"] = '/FS /MD /O2 /Ob2'
             cmd = "\"{}\" && cmake -GNinja {} {}".format(args.vcvars,
                                                          CMAKE_FLAGS[args.flavour],
                                                          mxnet_root)
diff --git a/src/operator/contrib/multi_lamb.cu b/src/operator/contrib/multi_lamb.cu
index 6415bfbda015..0a55b89bc501 100644
--- a/src/operator/contrib/multi_lamb.cu
+++ b/src/operator/contrib/multi_lamb.cu
@@ -50,10 +50,10 @@ __global__ void KernelStep1(const MultiLAMBKernelParam<DType, MPDType> kernel_pa
 
   MPDType biascorrection1, biascorrection2;
   if (bias_correction) {
-    biascorrection1 = 1.0 -
-                      static_cast<MPDType>(std::pow(beta1, kernel_params.step_count[tensor_id]));
-    biascorrection2 = 1.0 -
-                      static_cast<MPDType>(std::pow(beta2, kernel_params.step_count[tensor_id]));
+    biascorrection1 = 1.0 - static_cast<MPDType>(
+                      pow(beta1, static_cast<float>(kernel_params.step_count[tensor_id])));
+    biascorrection2 = 1.0 - static_cast<MPDType>(
+                      pow(beta2, static_cast<float>(kernel_params.step_count[tensor_id])));
   } else {
     biascorrection1 = static_cast<MPDType>(1.0);
     biascorrection2 = static_cast<MPDType>(1.0);