deepmodeling · amcadmus · Sep 16, 2020 · Sep 7, 2020 · Sep 7, 2020 · Sep 10, 2020
diff --git a/source/CMakeLists.txt b/source/CMakeLists.txt
@@ -125,7 +125,7 @@ else()
   endif()
 endif()
 if (USE_CUDA_TOOLKIT)
-  add_definitions("-DUSE_CUDA_TOOLKIT")
+  add_definitions("-D GOOGLE_CUDA")
 endif()
 
 # define USE_TTM

diff --git a/source/lib/include/CustomeOperation.h b/source/lib/include/CustomeOperation.h
diff --git a/source/lib/include/DeviceFunctor.h b/source/lib/include/DeviceFunctor.h
@@ -0,0 +1,62 @@
+#pragma once
+#include <vector>
+#include <climits>
+#include <stdio.h>
+#include <iostream>
+#include <cuda_runtime.h>
+
+typedef unsigned long long int_64;
+#define SQRT_2_PI 0.7978845608028654 
+
+#define cudaErrcheck(res) {cudaAssert((res), __FILE__, __LINE__);}
+inline void cudaAssert(cudaError_t code, const char *file, int line, bool abort=true) {
+    if (code != cudaSuccess) {
+        fprintf(stderr,"cuda assert: %s %s %d\n", cudaGetErrorString(code), file, line);
+        if (abort) exit(code);
+    }
+}
+
+template<typename FPTYPE>
+struct DescrptSeAGPUExecuteFunctor {
+    void operator()(const FPTYPE * coord, const int * type, const int * ilist, const int * jrange, const int * jlist, int * array_int, unsigned long long * array_longlong, const FPTYPE * avg, const FPTYPE * std, FPTYPE * descript, FPTYPE * descript_deriv, FPTYPE * rij, int * nlist, const int nloc, const int nall, const int nnei, const int ndescrpt, const float rcut_r, const float rcut_r_smth, const std::vector<int> sec_a, const bool fill_nei_a, const int MAGIC_NUMBER);
+};
+
+template<typename FPTYPE>
+struct DescrptSeRGPUExecuteFunctor {
+    void operator()(const FPTYPE * coord, const int * type, const int * ilist, const int * jrange, const int * jlist, int * array_int, unsigned long long * array_longlong, const FPTYPE * avg, const FPTYPE * std, FPTYPE * descript, FPTYPE * descript_deriv, FPTYPE * rij, int * nlist, const int nloc, const int nall, const int nnei, const int ndescrpt, const float rcut_r, const float rcut_r_smth, const std::vector<int> sec_a, const bool fill_nei_a, const int MAGIC_NUMBER);
+};
+
+template<typename FPTYPE>
+struct ProdForceSeAGPUExecuteFunctor {
+    void operator()(FPTYPE * force, const FPTYPE * net_derive, const FPTYPE * in_deriv, const int * nlist, const int nloc, const int nall, const int nnei, const int ndescrpt, const int n_a_sel, const int n_a_shift);
+};
+
+template<typename FPTYPE>
+struct ProdForceSeRGPUExecuteFunctor {
+    void operator()(FPTYPE * force, const FPTYPE * net_derive, const FPTYPE * in_deriv, const int * nlist, const int nloc, const int nall, const int nnei, const int ndescrpt);
+};
+
+template<typename FPTYPE>
+struct ProdVirialSeAGPUExecuteFunctor {
+    void operator()(FPTYPE * virial, FPTYPE * atom_virial, const FPTYPE * net_deriv, const FPTYPE * in_deriv, const FPTYPE * rij, const int * nlist, const int nloc, const int nall, const int nnei, const int ndescrpt, const int n_a_sel, const int n_a_shift);
+};
+
+template<typename FPTYPE>
+struct ProdVirialSeRGPUExecuteFunctor {
+    void operator()(FPTYPE * virial, FPTYPE * atom_virial, const FPTYPE * net_deriv, const FPTYPE * in_deriv, const FPTYPE * rij, const int * nlist, const int nloc, const int nall, const int nnei, const int ndescrpt);
+};
+
+template<typename FPTYPE>
+struct GeluGPUExecuteFunctor {
+    void operator()(const FPTYPE * in, FPTYPE * out, const int size);
+};
+
+template<typename FPTYPE>
+struct GeluGradGPUExecuteFunctor {
+    void operator()(const FPTYPE * dy, const FPTYPE * in, FPTYPE * out, const int size);
+};
+
+template<typename FPTYPE>
+struct GeluGradGradGPUExecuteFunctor {
+    void operator()(const FPTYPE * dy, const FPTYPE * dy_, const FPTYPE * in, FPTYPE * out, const int size);
+};
diff --git a/source/lib/include/NNPInter.h b/source/lib/include/NNPInter.h
@@ -98,9 +98,6 @@ class NNPInter
 
   // function used for neighbor list copy
   vector<int> get_sel_a() const;
-#ifdef USE_CUDA_TOOLKIT
-  void update_nbor(const InternalNeighborList & nlist, const int nloc);
-#endif
 };
 
 class NNPInterModelDevi
@@ -195,9 +192,6 @@ class NNPInterModelDevi
   // function used for nborlist copy
   vector<vector<int> > get_sel() const;
   void cum_sum(const std::vector<std::vector<int32> > n_sel);
-#ifdef USE_CUDA_TOOLKIT
-  void update_nbor(const InternalNeighborList & nlist, const int nloc);
-#endif
 };
 
 
diff --git a/source/lib/include/common.h b/source/lib/include/common.h
@@ -8,11 +8,17 @@
 
 using namespace tensorflow;
 using namespace std;
+#include <tensorflow/core/graph/default_device.h>
+#include <tensorflow/core/graph/graph_def_builder.h>
 
 #include "NNPAtomMap.h"
 #include <vector>
+#include <string>
+#include <iostream>
 #include "version.h"
 
+using CPUDevice = Eigen::ThreadPoolDevice;
+using GPUDevice = Eigen::GpuDevice;
 #ifdef HIGH_PREC
 typedef double VALUETYPE;
 typedef double ENERGYTYPE;
@@ -122,6 +128,20 @@ session_input_tensors (std::vector<std::pair<string, Tensor>> & input_tensors,
 		       const int			nghost = 0,
 		       const string			scope = "");
 
+int
+session_input_tensors (std::vector<std::pair<string, Tensor>> & input_tensors,
+		       const vector<VALUETYPE> &	dcoord_,
+		       const int &			ntypes,
+		       const vector<int> &		datype_,
+		       const vector<VALUETYPE> &	dbox,		    
+		       InternalNeighborList &		dlist, 
+		       const vector<VALUETYPE> &	fparam_,
+		       const vector<VALUETYPE> &	aparam_,
+		       const NNPAtomMap<VALUETYPE>&	nnpmap,
+		       const int			nghost,
+		       const int			ago,
+		       const string			scope = "");
+
 int
 session_input_tensors (std::vector<std::pair<string, Tensor>> & input_tensors,
 		       const vector<VALUETYPE> &	dcoord_,