diff --git a/src/examples/FuseTExamples/AddEx.cc b/src/examples/FuseTExamples/AddEx.cc new file mode 100644 index 00000000000..4cfa683b7d0 --- /dev/null +++ b/src/examples/FuseTExamples/AddEx.cc @@ -0,0 +1,152 @@ +/* + This file is part of MADNESS. + + Copyright (C) 2007,2010 Oak Ridge National Laboratory + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + For more information please contact: + + Robert J. Harrison + Oak Ridge National Laboratory + One Bethel Valley Road + P.O. Box 2008, MS-6367 + + email: harrisonrj@ornl.gov + tel: 865-241-3937 + fax: 865-572-0680 + + $Id$ +*/ +//#define WORLD_INSTANTIATE_STATIC_TEMPLATES +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace madness; + +static const double L = 20; // Half box size +static const long k = 8; // wavelet order +static const double thresh = 1e-6; // precision +static const double c = 2.0; // +static const double tstep = 0.1; +static const double alpha = 1.9; // Exponent +static const double VVV = 0.2; // Vp constant value + +// Initial Gaussian with exponent alpha +static double uinitial(const coord_3d& r) { + const double x=r[0], y=r[1], z=r[2]; + return exp(-alpha*(2*x*x+y*y+z*z))*pow(constants::pi/alpha,-1.5); +} + +static double Vp(const coord_3d& r) { + return VVV; +} + +// Exact solution at time t +class uexact : public FunctionFunctorInterface { + double t; +public: + uexact(double t) : t(t) {} + + double operator()(const coord_3d& r) const { + const double x=r[0], y=r[1], z=r[2]; + double rsq = (x*x+y*y+z*z); + + return exp(VVV*t)*exp(-rsq*alpha/(1.0+4.0*alpha*t*c)) * pow(alpha/((1+4*alpha*t*c)*constants::pi),1.5); + } +}; + +// Functor to compute exp(f) where f is a madness function +template +struct unaryexp { + void operator()(const Key& key, Tensor& t) const { + UNARY_OPTIMIZED_ITERATOR(T, t, *_p0 = exp(*_p0);); + } + template + void serialize(Archive& ar) {} +}; + +int main(int argc, char** argv) +{ + initialize(argc, argv); + World world(SafeMPI::COMM_WORLD); + + startup(world, argc, argv); + + FunctionDefaults<3>::set_k(k); + FunctionDefaults<3>::set_thresh(thresh); + FunctionDefaults<3>::set_refine(true); + FunctionDefaults<3>::set_autorefine(false); + FunctionDefaults<3>::set_cubic_cell(-L, L); + + real_function_3d u0 = real_factory_3d(world).f(uinitial); + real_function_3d u1 = real_factory_3d(world).f(uinitial); + u0.truncate(); + u1.truncate(); + + double u0_norm = u0.norm2(); + double u0_trace = u0.trace(); + double u1_norm = u1.norm2(); + double u1_trace = u1.trace(); + + if (world.rank() == 0) print("u0 Initial norm", u0_norm,"trace", u0_trace); + if (world.rank() == 0) print("u1 Initial norm", u1_norm,"trace", u1_trace); + world.gop.fence(); + + // Make exponential of Vp + real_function_3d result_factory = real_factory_3d(world); + real_function_3d result_factory1 = real_factory_3d(world); + real_function_3d result(result_factory); + real_function_3d result1(result_factory); + + double result_init_norm = result.norm2(); + double result_init_trace = result.trace(); + + if (world.rank() == 0) print("Initial Result norm", result_init_norm,"trace", result_init_trace); + world.gop.fence(); + + AddOp op1("Add",&result,&u0,&u1); + OpExecutor exe(world); + exe.execute(&op1, false); + world.gop.fence(); + + u0_norm = u0.norm2(); + u0_trace = u0.trace(); + u1_norm = u1.norm2(); + u1_trace = u1.trace(); + double result_norm = result.norm2(); + double result_trace = result.trace(); + + result1 = u0 + u1; + + double result1_norm = result1.norm2(); + double result1_trace = result1.trace(); + if (world.rank() == 0) print("u0 norm", u0_norm," u0 trace", u0_trace); + if (world.rank() == 0) print("u1 norm", u1_norm," u1 trace", u1_trace); + if (world.rank() == 0) print("Result norm", result_norm," result trace", result_trace); + if (world.rank() == 0) print("Result1 norm", result1_norm," result1 trace", result1_trace); + world.gop.fence(); + + finalize(); + return 0; +} + diff --git a/src/examples/FuseTExamples/CopyEx.cc b/src/examples/FuseTExamples/CopyEx.cc new file mode 100644 index 00000000000..05b34c1e627 --- /dev/null +++ b/src/examples/FuseTExamples/CopyEx.cc @@ -0,0 +1,207 @@ +/* + This file is part of MADNESS. + + Copyright (C) 2007,2010 Oak Ridge National Laboratory + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + For more information please contact: + + Robert J. Harrison + Oak Ridge National Laboratory + One Bethel Valley Road + P.O. Box 2008, MS-6367 + + email: harrisonrj@ornl.gov + tel: 865-241-3937 + fax: 865-572-0680 + + $Id$ +*/ +//#define WORLD_INSTANTIATE_STATIC_TEMPLATES +#include +#include +#include +#include +#include +#include +/*! +\file heat2.cc +\brief Example Green function for the 3D heat equation with a linear term +\defgroup heatex2 Evolve in time 3D heat equation with a linear term +\ingroup examples + +The source is here. + +\par Points of interest + - application of a function of a function to exponentiate the potential + - use of a functor to compute the solution at an arbitrary future time + - convolution with the Green's function + + +\par Background + +This adds to the complexity of the other \ref exampleheat "heat equation example" +by including a linear term. Specifically, we solve +\f[ + \frac{\partial u(x,t)}{\partial t} = c \nabla^2 u(x,t) + V_p(x,t) u(x,t) +\f] +If \f$ V_p = 0 \f$ time evolution operator is +\f[ + G_0(x,t) = \frac{1}{\sqrt{4 \pi c t}} \exp \frac{-x^2}{4 c t} +\f] +For non-zero \f$ V_p \f$ the time evolution is performed using the Trotter splitting +\f[ + G(x,t) = G_0(x,t/2) * \exp(V_p t) * G_0(x,t/2) + O(t^3) +\f] +In order to form an exact solution for testing, we choose \f$ V_p(x,t)=\mbox{constant} \f$ +but the solution method is not limited to this choice. + +*/ +using namespace madness; + +static const double L = 20; // Half box size +static const long k = 8; // wavelet order +static const double thresh = 1e-6; // precision +static const double c = 2.0; // +static const double tstep = 0.1; +static const double alpha = 1.9; // Exponent +static const double VVV = 0.2; // Vp constant value + +// Initial Gaussian with exponent alpha +static double uinitial(const coord_3d& r) { + const double x=r[0], y=r[1], z=r[2]; + return exp(-alpha*(2*x*x+y*y+z*z))*pow(constants::pi/alpha,-1.5); +} + +static double Vp(const coord_3d& r) { + return VVV; +} + +// Exact solution at time t +class uexact : public FunctionFunctorInterface { + double t; +public: + uexact(double t) : t(t) {} + + double operator()(const coord_3d& r) const { + const double x=r[0], y=r[1], z=r[2]; + double rsq = (x*x+y*y+z*z); + + return exp(VVV*t)*exp(-rsq*alpha/(1.0+4.0*alpha*t*c)) * pow(alpha/((1+4*alpha*t*c)*constants::pi),1.5); + } +}; + +// Functor to compute exp(f) where f is a madness function +template +struct unaryexp { + void operator()(const Key& key, Tensor& t) const { + UNARY_OPTIMIZED_ITERATOR(T, t, *_p0 = exp(*_p0);); + } + template + void serialize(Archive& ar) {} +}; + +int main(int argc, char** argv) +{ + initialize(argc, argv); + World world(SafeMPI::COMM_WORLD); + + startup(world, argc, argv); + + FunctionDefaults<3>::set_k(k); + FunctionDefaults<3>::set_thresh(thresh); + FunctionDefaults<3>::set_refine(true); + FunctionDefaults<3>::set_autorefine(false); + FunctionDefaults<3>::set_cubic_cell(-L, L); + + real_function_3d u0 = real_factory_3d(world).f(uinitial); + u0.truncate(); + + double u0_norm = u0.norm2(); + double u0_trace = u0.trace(); + + if (world.rank() == 0) print("Initial norm", u0_norm,"trace", u0_trace); + world.gop.fence(); + + // Make exponential of Vp + real_function_3d result_factory = real_factory_3d(world); + real_function_3d result(result_factory); + real_function_3d result_factory1 = real_factory_3d(world); + real_function_3d result1(result_factory1); + real_function_3d result_factory2 = real_factory_3d(world); + real_function_3d result2(result_factory2); + real_function_3d result_factory3 = real_factory_3d(world); + real_function_3d result3(result_factory3); + real_function_3d result_factory4 = real_factory_3d(world); + real_function_3d result4(result_factory4); + real_function_3d result_factory5 = real_factory_3d(world); + real_function_3d result5(result_factory5); + real_function_3d result_factory6 = real_factory_3d(world); + real_function_3d result6(result_factory6); + real_function_3d result_factory7 = real_factory_3d(world); + real_function_3d result7(result_factory7); + real_function_3d result_factory8 = real_factory_3d(world); + real_function_3d result8(result_factory8); + real_function_3d result_factory9 = real_factory_3d(world); + real_function_3d result9(result_factory9); + real_function_3d result_factory10 = real_factory_3d(world); + real_function_3d result10(result_factory10); + + double result_init_norm = result.norm2(); + double result_init_trace = result.trace(); + + if (world.rank() == 0) print("Initial Result norm", result_init_norm,"trace", result_init_trace); + world.gop.fence(); + + CopyOp op1("Copy",&result,&u0); + CopyOp op2("Copy",&result1,&u0); + CopyOp op3("Copy",&result2,&u0); + CopyOp op4("Copy",&result3,&u0); + CopyOp op5("Copy",&result4,&u0); + CopyOp op6("Copy",&result5,&u0); + CopyOp op7("Copy",&result6,&u0); + CopyOp op8("Copy",&result7,&u0); + CopyOp op9("Copy",&result8,&u0); + CopyOp op10("Copy",&result9,&u0); + CopyOp op11("Copy",&result10,&u0); + OpExecutor exe(world); + exe.execute(&op1, false); + exe.execute(&op2, false); + exe.execute(&op3, false); + exe.execute(&op4, false); + exe.execute(&op5, false); + exe.execute(&op6, false); + exe.execute(&op7, false); + exe.execute(&op8, false); + exe.execute(&op9, false); + exe.execute(&op10, false); + exe.execute(&op11, false); + world.gop.fence(); + + u0_norm = u0.norm2(); + u0_trace = u0.trace(); + double result_norm = result.norm2(); + double result_trace = result.trace(); + + if (world.rank() == 0) print("u0 norm", u0_norm," u0 trace", u0_trace); + world.gop.fence(); + if (world.rank() == 0) print("Result norm", result_norm," result trace", result_trace); + world.gop.fence(); + + finalize(); + return 0; +} + diff --git a/src/examples/FuseTExamples/DerivativeEx.cc b/src/examples/FuseTExamples/DerivativeEx.cc new file mode 100644 index 00000000000..f1a67779580 --- /dev/null +++ b/src/examples/FuseTExamples/DerivativeEx.cc @@ -0,0 +1,385 @@ +/* + This file is part of MADNESS. + + Copyright (C) 2007,2010 Oak Ridge National Laboratory + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + For more information please contact: + + Robert J. Harrison + Oak Ridge National Laboratory + One Bethel Valley Road + P.O. Box 2008, MS-6367 + + email: harrisonrj@ornl.gov + tel: 865-241-3937 + fax: 865-572-0680 +*/ + + +/// \file examples/hello.cc +/// \brief Simplest example program for MADNESS +/// \defgroup hellowworldmad Hello world MADNESS style +/// \ingroup examples +/// +/// Simplest program that initializes the MADNESS parallel runtime +/// using initialize(), makes a madness::World object, prints +/// a greeting, and then cleans up. +/// +/// To initialize the MADNESS numerical environment you also need +/// \c startup(world,argc,argv) and should include mra/mra.h rather +/// than world/MADworld.h . + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace madness;; + +static const double L = 20; +static const long k = 8; +static const double thresh = 1e-12; +static const double c = 2.0; +static const double alpha = 1.9; // Exponent +#define FUNC_SIZE 20 +#define PI 3.1415926535897932385 +#define LO 0.0000000000 +#define HI 4.0000000000 + +static double sin_amp = 1.0; +static double cos_amp = 1.0; +static double sin_freq = 1.0; +static double cos_freq = 1.0; +static double sigma_x = 1.0; +static double sigma_y = 1.0; +static double sigma_z = 1.0; +static double center_x = 0.0; +static double center_y = 0.0; +static double center_z = 0.0; +static double gaussian_amp = 1.0; +static double sigma_sq_x = sigma_x*sigma_x; +static double sigma_sq_y = sigma_y*sigma_y; +static double sigma_sq_z = sigma_z*sigma_z; + +double rtclock(); + +static double uinitial(const coord_3d& r) +{ + const double x=r[0], y=r[1], z=r[2]; + return -2.0/(sqrt(x*x+y*y+z*z+1e-8)); +} + +static double random_function(const coord_3d& r) +{ + const double x=r[0], y=r[1], z=r[2]; + + const double dx = x - center_x; + const double dy = y - center_y; + const double dz = z - center_z; + + const double periodic_part = sin_amp * sin(sin_freq*(dx+dy+dz)) + + cos_amp * cos(cos_freq*(dx+dy+dz)); + + const double x_comp = dx*dx/sigma_sq_x; + const double y_comp = dy*dy/sigma_sq_y; + const double z_comp = dz*dz/sigma_sq_z; + + const double gaussian_part = -gaussian_amp/exp(sqrt(x_comp+y_comp+z_comp)); + + return gaussian_part*gaussian_part; +} + +static double get_rand() { + double r3 = LO + static_cast(rand())/(static_cast(RAND_MAX/(HI-LO))); + return r3; +} + +static void randomizer() +{ + sin_amp = get_rand(); + cos_amp = get_rand(); + sin_freq = get_rand(); + cos_freq = get_rand(); + sigma_x = get_rand(); + sigma_y = get_rand(); + sigma_z = get_rand(); + center_x = get_rand()*L/(2.0*HI); + center_y = get_rand()*L/(2.0*HI); + center_z = get_rand()*L/(2.0*HI); + gaussian_amp = get_rand(); + sigma_sq_x = sigma_x*sigma_x; + sigma_sq_y = sigma_y*sigma_y; + sigma_sq_z = sigma_z*sigma_z; +} + +int main(int argc, char** argv) +{ + initialize(argc,argv); + World world(SafeMPI::COMM_WORLD); + + startup(world, argc, argv); + + FunctionDefaults<3>::set_k(k); + FunctionDefaults<3>::set_thresh(thresh); + FunctionDefaults<3>::set_refine(true); + FunctionDefaults<3>::set_autorefine(false); + FunctionDefaults<3>::set_cubic_cell(-L, L); + + FunctionDefaults<3>::set_max_refine_level(30); + + + // 2 * N Functions + real_function_3d f[FUNC_SIZE]; + real_function_3d fac0[FUNC_SIZE]; + real_function_3d fac1[FUNC_SIZE]; + real_function_3d fac2[FUNC_SIZE]; + real_function_3d* df0[FUNC_SIZE]; + real_function_3d* df1[FUNC_SIZE]; + real_function_3d* df2[FUNC_SIZE]; + real_function_3d dm0[FUNC_SIZE]; + real_function_3d dm1[FUNC_SIZE]; + real_function_3d dm2[FUNC_SIZE]; + + std::cout<<"Differentiation of "<* op0[FUNC_SIZE]; + DerivativeOp* op1[FUNC_SIZE]; + DerivativeOp* op2[FUNC_SIZE]; + + real_derivative_3d D0 = free_space_derivative(world, 0); + real_derivative_3d D1 = free_space_derivative(world, 1); + real_derivative_3d D2 = free_space_derivative(world, 2); + + for (i=0; i("Derivative0",df0[i],&f[i],world,&D0); + op1[i] = new DerivativeOp("Derivative1",df1[i],df0[i],world,&D1); + op2[i] = new DerivativeOp("Derivative2",df2[i],df1[i],world,&D2); + + } + + vector*> sequence; + + for (i=0; i odag(sequence); + odag.processSequence(); + + FusedOpSequence fsequence = odag.getFusedOpSequence(); + FusedExecutor fexecuter(world, &fsequence); + fexecuter.execute(); + + + clkend = rtclock() - clkbegin; + if (world.rank() == 0) printf("Running Time: %f\n", clkend); + world.gop.fence(); + + + + if (world.rank() == 0) print ("====================================================="); + if (world.rank() == 0) print (" MADNESS "); + if (world.rank() == 0) print ("====================================================="); + + clkbegin = rtclock(); + + + for (i=0; inorm2(); + double FuseTnorm1 = df1[FUNC_SIZE-1]->norm2(); + double FuseTnorm2 = df2[FUNC_SIZE-1]->norm2(); + + + if (world.rank() == 0) print("[Result MADNESS] Norm0 : ", MADnorm0 ); + if (world.rank() == 0) print("[Result Fuset] Norm0 : ", FuseTnorm0); + + if (world.rank() == 0) print("[Result MADNESS] Norm1 : ", MADnorm1); + if (world.rank() == 0) print("[Result Fuset] Norm1 : ", FuseTnorm1); + + if (world.rank() == 0) print("[Result MADNESS] Norm2 : ", MADnorm2); + if (world.rank() == 0) print("[Result Fuset] Norm2 : ", FuseTnorm2); + + +/* + double MADtrace0 = df0.trace(); + double MADtrace1 = df1.trace(); + double MADtrace2 = df2.trace(); + + if (world.rank() == 0) print ("====================================================="); + if (world.rank() == 0) print (" FuseT "); + if (world.rank() == 0) print ("====================================================="); + + clkbegin = rtclock(); + + //f1.reconstruct(); + //world.gop.fence(); + DerivativeOp derivative_op_0("Derivative",&i0,&f1,world,&D0); // i5 <-- i1 + DerivativeOp derivative_op_1("Derivative",&i1,&f1,world,&D1); // i6 <-- f3 + DerivativeOp derivative_op_2("Derivative",&i2,&f1,world,&D2); // i7 <-- f4 + bool fused =false; + + if(fused){ + + vector*> sequence; + sequence.push_back(&derivative_op_0); + sequence.push_back(&derivative_op_1); + sequence.push_back(&derivative_op_2); + + FuseT odag(sequence); + odag.processSequence(); + FusedOpSequence fsequence = odag.getFusedOpSequence(); + FusedExecutor fexecuter(world, &fsequence); + if (world.rank() == 0) print ("==before exe================================================"); + + clkbegin = rtclock(); + + fexecuter.execute(); + + clkend = rtclock() - clkbegin; + if (world.rank() == 0) printf("Running Time: %f\n", clkend); + + }else{ + + OpExecutor exe(world); + clkbegin = rtclock(); + + exe.execute(&derivative_op_0, false); + exe.execute(&derivative_op_1, false); + exe.execute(&derivative_op_2, false); + world.gop.fence(); + } + clkend = rtclock() - clkbegin; + if (world.rank() == 0) printf("OpExecutor Running Time: %f\n", clkend); + + double FuseTnorm0 = i0.norm2(); + double FuseTnorm1 = i1.norm2(); + double FuseTnorm2 = i2.norm2(); + + double FuseTtrace0 = i0.trace(); + double FuseTtrace1 = i1.trace(); + double FuseTtrace2 = i2.trace(); + + + real_function_3d dif0 = i0 - df0; + real_function_3d dif1 = i1 - df1; + real_function_3d dif2 = i2 - df2; + world.gop.fence(); + + double Difnorm0 = dif0.norm2(); + double Difnorm1 = dif1.norm2(); + double Difnorm2 = dif2.norm2(); + + + if (world.rank() == 0) print("[Result MADNESS] Norm0 : ", MADnorm0," Trace : ", MADtrace0); + if (world.rank() == 0) print("[Result Fuset] Norm0 : ", FuseTnorm0," Trace : ", FuseTtrace0); + + if (world.rank() == 0) print("[Result MADNESS] Norm1 : ", MADnorm1," Trace : ", MADtrace1); + if (world.rank() == 0) print("[Result Fuset] Norm1 : ", FuseTnorm1," Trace : ", FuseTtrace1); + + if (world.rank() == 0) print("[Result MADNESS] Norm2 : ", MADnorm2," Trace : ", MADtrace2); + if (world.rank() == 0) print("[Result Fuset] Norm2 : ", FuseTnorm2," Trace : ", FuseTtrace2); + + if (world.rank() == 0) print("[Difference] Norm2 : ", Difnorm0); + if (world.rank() == 0) print("[Difference] Norm2 : ", Difnorm1); + if (world.rank() == 0) print("[Difference] Norm2 : ", Difnorm2); +*/ + + finalize(); + return 0; +} + +double rtclock() +{ + struct timezone Tzp; + struct timeval Tp; + int stat; + stat = gettimeofday (&Tp, &Tzp); + if (stat != 0) + printf("Error return from gettimeofday: %d", stat); + return (Tp.tv_sec + Tp.tv_usec * 1.0e-6); +} + diff --git a/src/examples/FuseTExamples/FusedEx.cc b/src/examples/FuseTExamples/FusedEx.cc new file mode 100644 index 00000000000..3e41b2fee9f --- /dev/null +++ b/src/examples/FuseTExamples/FusedEx.cc @@ -0,0 +1,251 @@ +/* + This file is part of MADNESS. + + Copyright (C) 2007,2010 Oak Ridge National Laboratory + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + For more information please contact: + + Robert J. Harrison + Oak Ridge National Laboratory + One Bethel Valley Road + P.O. Box 2008, MS-6367 + + email: harrisonrj@ornl.gov + tel: 865-241-3937 + fax: 865-572-0680 + + $Id$ +*/ +//#define WORLD_INSTANTIATE_STATIC_TEMPLATES +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/*! +\file heat2.cc +\brief Example Green function for the 3D heat equation with a linear term +\defgroup heatex2 Evolve in time 3D heat equation with a linear term +\ingroup examples + +The source is here. + +\par Points of interest + - application of a function of a function to exponentiate the potential + - use of a functor to compute the solution at an arbitrary future time + - convolution with the Green's function + + +\par Background + +This adds to the complexity of the other \ref exampleheat "heat equation example" +by including a linear term. Specifically, we solve +\f[ + \frac{\partial u(x,t)}{\partial t} = c \nabla^2 u(x,t) + V_p(x,t) u(x,t) +\f] +If \f$ V_p = 0 \f$ time evolution operator is +\f[ + G_0(x,t) = \frac{1}{\sqrt{4 \pi c t}} \exp \frac{-x^2}{4 c t} +\f] +For non-zero \f$ V_p \f$ the time evolution is performed using the Trotter splitting +\f[ + G(x,t) = G_0(x,t/2) * \exp(V_p t) * G_0(x,t/2) + O(t^3) +\f] +In order to form an exact solution for testing, we choose \f$ V_p(x,t)=\mbox{constant} \f$ +but the solution method is not limited to this choice. + +*/ + +using namespace madness; + +static const double L = 20; // Half box size +static const long k = 8; // wavelet order +static const double thresh = 1e-6; // precision +static const double c = 2.0; // +static const double tstep = 0.1; +static const double alpha = 1.9; // Exponent +static const double VVV = 0.2; // Vp constant value + +// Initial Gaussian with exponent alpha +static double uinitial(const coord_3d& r) { + const double x=r[0], y=r[1], z=r[2]; + return exp(-alpha*(2*x*x+3.2*y*y+1.7*z*z))*pow(constants::pi/alpha,-1.5); +} + + + +static double uinitial2(const coord_3d& r) { + const double x=r[0], y=r[1], z=r[2]; + return exp(-alpha*(5*x*x+y*y+z*z))*pow(constants::pi/alpha,-1.5); +} + + +static double Vp(const coord_3d& r) { + return VVV; +} + + +// Exact solution at time t +class uexact : public FunctionFunctorInterface { + double t; +public: + uexact(double t) : t(t) {} + + double operator()(const coord_3d& r) const { + const double x=r[0], y=r[1], z=r[2]; + double rsq = (x*x+y*y+z*z); + + return exp(VVV*t)*exp(-rsq*alpha/(1.0+4.0*alpha*t*c)) * pow(alpha/((1+4*alpha*t*c)*constants::pi),1.5); + } +}; + + +// Functor to compute exp(f) where f is a madness function +template +struct unaryexp { + void operator()(const Key& key, Tensor& t) const { + UNARY_OPTIMIZED_ITERATOR(T, t, *_p0 = exp(*_p0);); + } + template + void serialize(Archive& ar) {} +}; + +int main(int argc, char** argv) +{ + initialize(argc, argv); + World world(SafeMPI::COMM_WORLD); + + startup(world, argc, argv); + + FunctionDefaults<3>::set_k(k); + FunctionDefaults<3>::set_thresh(thresh); + FunctionDefaults<3>::set_refine(true); + FunctionDefaults<3>::set_autorefine(false); + FunctionDefaults<3>::set_cubic_cell(-L, L); + FunctionDefaults<3>::set_max_refine_level(4); + real_function_3d u0 = real_factory_3d(world).f(uinitial); + u0._treeName = "u0"; + u0.truncate(); + real_function_3d u1 = real_factory_3d(world).f(uinitial2); + u1._treeName = "u1"; + u1.truncate(); + + double u0_norm = u0.norm2(); + double u0_trace = u0.trace(); + double u1_norm = u1.norm2(); + double u1_trace = u1.trace(); + + if (world.rank() == 0) print("Initial norm", u0_norm,"trace", u0_trace); + if (world.rank() == 0) print("Initial norm", u1_norm,"trace", u1_trace); + u0.compress(); + u1.compress(); + double in = u0.inner(u1); + u1.reconstruct(); + world.gop.fence(); + + // Make exponential of Vp + real_function_3d rreconstruct_factory = real_factory_3d(world); + real_function_3d rreconstruct(rreconstruct_factory); + rreconstruct._treeName="RReconstruct"; + real_function_3d rcopy_factory = real_factory_3d(world); + real_function_3d rcopy(rcopy_factory); + rcopy._treeName="RCopy"; + real_function_3d rcmp_factory = real_factory_3d(world); + real_function_3d rcmp(rcmp_factory); + rcmp._treeName ="RCompress"; + real_function_3d rcmp1_factory = real_factory_3d(world); + real_function_3d rcmp1(rcmp1_factory); + rcmp1._treeName ="RCompress1"; + real_function_3d rinner_factory = real_factory_3d(world); + real_function_3d rinner(rinner_factory); + rinner._treeName ="RInner"; + + + ReconstructOp op0("Reconstruct",&rreconstruct,&u0); + CopyOp op1("Copy",&rcopy,&rreconstruct); + CompressOp op2("Compress_rcopy",&rcmp,&rcopy); + CompressOp op3("Compress_u1",&rcmp1,&u1); + InnerOp op4("Inner",&rinner,&rcmp,&rcmp1); + //InnerOp op4("Inner",&rinner,&u0,&u1); + + bool fused = true; + bool printInfo = true; + + if(fused){ + vector* > sequence; + sequence.push_back(&op0); + sequence.push_back(&op1); + sequence.push_back(&op2); + sequence.push_back(&op3); + sequence.push_back(&op4); + FuseT odag(sequence); + odag.processSequence(); + + if(printInfo && world.rank() == 0){ + odag.printOpsAndTrees(); + odag.printValidSequences(); + } + FusedOpSequence fsequence = odag.getFusedOpSequence(); + FusedExecutor fexecuter(world, &fsequence); + fexecuter.execute(); + + double rreconstruct_norm =rreconstruct.norm2(); + double rcopy_norm =rcopy.norm2(); + double rcmp_norm =rcmp.norm2(); + double rcmp1_norm =rcmp1.norm2(); + double rreconstruct_trace =rreconstruct.trace(); + double rcopy_trace =rcopy.trace(); + double rcmp_trace =rcmp.trace(); + double rcmp1_trace =rcmp1.trace(); + + //double in = u0.inner(u1); + if(world.rank() == 0){ + cout< exe(world); + exe.execute(&op2, false); + world.gop.fence(); + + } + +// double result_norm = result.norm2(); +// double result_trace = u0.trace(); + + //if (world.rank() == 0) print("Result norm", result_norm," result trace", result_trace); + world.gop.fence(); + finalize(); + return 0; +} + diff --git a/src/examples/FuseTExamples/InnerEx.cc b/src/examples/FuseTExamples/InnerEx.cc new file mode 100644 index 00000000000..05952e68dbd --- /dev/null +++ b/src/examples/FuseTExamples/InnerEx.cc @@ -0,0 +1,392 @@ +/* + This file is part of MADNESS. + + Copyright (C) 2007,2010 Oak Ridge National Laboratory + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + For more information please contact: + + Robert J. Harrison + Oak Ridge National Laboratory + One Bethel Valley Road + P.O. Box 2008, MS-6367 + + email: harrisonrj@ornl.gov + tel: 865-241-3937 + fax: 865-572-0680 + + $Id$ +*/ +//#define WORLD_INSTANTIATE_STATIC_TEMPLATES +#include +#include +#include +#include +#include +#include +#include +#include +/*! + \file heat2.cc + \brief Example Green function for the 3D heat equation with a linear term + \defgroup heatex2 Evolve in time 3D heat equation with a linear term + \ingroup examples + + The source is here. + + \par Points of interest + - application of a function of a function to exponentiate the potential + - use of a functor to compute the solution at an arbitrary future time + - convolution with the Green's function + + + \par Background + + This adds to the complexity of the other \ref exampleheat "heat equation example" + by including a linear term. Specifically, we solve + \f[ + \frac{\partial u(x,t)}{\partial t} = c \nabla^2 u(x,t) + V_p(x,t) u(x,t) + \f] + If \f$ V_p = 0 \f$ time evolution operator is + \f[ + G_0(x,t) = \frac{1}{\sqrt{4 \pi c t}} \exp \frac{-x^2}{4 c t} + \f] + For non-zero \f$ V_p \f$ the time evolution is performed using the Trotter splitting + \f[ + G(x,t) = G_0(x,t/2) * \exp(V_p t) * G_0(x,t/2) + O(t^3) + \f] + In order to form an exact solution for testing, we choose \f$ V_p(x,t)=\mbox{constant} \f$ + but the solution method is not limited to this choice. + +*/ + +using namespace madness; + +static const double L = 20; // Half box size +static const long k = 8; // wavelet order +static const double thresh = 1e-6; // precision // w/o diff. and 1e-12 -> 64 x 64 +static const double c = 2.0; // +static const double tstep = 0.1; +static const double alpha = 1.9; // Exponent +static const double VVV = 0.2; // Vp constant value + +#define PI 3.1415926535897932385 +#define LO 0.0000000000 +#define HI 4.0000000000 + +static double sin_amp = 1.0; +static double cos_amp = 1.0; +static double sin_freq = 1.0; +static double cos_freq = 1.0; +static double sigma_x = 1.0; +static double sigma_y = 1.0; +static double sigma_z = 1.0; +static double center_x = 0.0; +static double center_y = 0.0; +static double center_z = 0.0; +static double gaussian_amp = 1.0; +static double sigma_sq_x = sigma_x*sigma_x; +static double sigma_sq_y = sigma_y*sigma_y; +static double sigma_sq_z = sigma_z*sigma_z; + +#define FUNC_SIZE 8 +#define FUNC_SIZE_M 8 + +double rtclock(); + +// Initial Gaussian with exponent alpha +static double uinitial(const coord_3d& r) { + const double x=r[0], y=r[1], z=r[2]; + return exp(-alpha*(2*x*3.2*x+y*y+1.7*z*z))*pow(constants::pi/alpha,-1.5); +} + +static double uinitial2(const coord_3d& r) { + const double x=r[0], y=r[1], z=r[2]; + srand(time(0)); + return exp(-alpha*(5*x*x+y*y+z*z))*pow(constants::pi/alpha,-1.5); +} +static double uinitial1(const coord_3d& r) { + const double x=r[0], y=r[1], z=r[2]; + return exp(-alpha*(2*x*x+1.4*y*y+z*z))*pow(constants::pi/alpha,-1.5); +}; + +static double random_function(const coord_3d& r) { + const double x=r[0], y=r[1], z=r[2]; + + const double dx = x - center_x; + const double dy = y - center_y; + const double dz = z - center_z; + + const double periodic_part = sin_amp * sin(sin_freq*(dx+dy+dz)) + + cos_amp * cos(cos_freq*(dx+dy+dz)); + + const double x_comp = dx*dx/sigma_sq_x; + const double y_comp = dy*dy/sigma_sq_y; + const double z_comp = dz*dz/sigma_sq_z; + + const double gaussian_part = -gaussian_amp/exp(sqrt(x_comp+y_comp+z_comp)); + + return gaussian_part*gaussian_part; +} + +static double get_rand() { + double r3 = LO + static_cast(rand())/(static_cast(RAND_MAX/(HI-LO))); + return r3; +} + +static void randomizer() +{ + sin_amp = get_rand(); + cos_amp = get_rand(); + sin_freq = get_rand(); + cos_freq = get_rand(); + sigma_x = get_rand(); + sigma_y = get_rand(); + sigma_z = get_rand(); + center_x = get_rand()*L/(2.0*HI); + center_y = get_rand()*L/(2.0*HI); + center_z = get_rand()*L/(2.0*HI); + gaussian_amp = get_rand(); + sigma_sq_x = sigma_x*sigma_x; + sigma_sq_y = sigma_y*sigma_y; + sigma_sq_z = sigma_z*sigma_z; +} + +static double ghaly(const coord_3d& r) { + std::srand(time(NULL)); + const double randVal = std::rand()/1000000000.0; + const double x=r[0], y=r[1], z=r[2]; + return 3.0*exp(-2.0*sqrt(x*x + randVal*randVal + y*y + z*z + 1e-4)); +} + +static double Vp(const coord_3d& r) { + return VVV; +} + +class alpha_functor : public FunctionFunctorInterface { +private: + double coeff; +public: + alpha_functor(double coeff=1.0) : coeff(coeff) {} + + virtual double operator()(const coord_3d& r) const { + const double x=r[0], y=r[1], z=r[2]; + return (coeff * (x*x + y*y + z*z) * sin(x*x + y*y + z*z)); + } +}; +// Exact solution at time t +class uexact : public FunctionFunctorInterface { + double t; +public: + uexact(double t) : t(t) {} + + double operator()(const coord_3d& r) const { + const double x=r[0], y=r[1], z=r[2]; + double rsq = (x*x+y*y+z*z); + + return exp(VVV*t)*exp(-rsq*alpha/(1.0+4.0*alpha*t*c)) * pow(alpha/((1+4*alpha*t*c)*constants::pi),1.5); + } +}; + + +// Functor to compute exp(f) where f is a madness function +template +struct unaryexp { + void operator()(const Key& key, Tensor& t) const { + UNARY_OPTIMIZED_ITERATOR(T, t, *_p0 = exp(*_p0);); + } + template + void serialize(Archive& ar) {} +}; + +int main(int argc, char** argv) +{ + initialize(argc, argv); + World world(SafeMPI::COMM_WORLD); + + startup(world, argc, argv); + + FunctionDefaults<3>::set_k(k); + FunctionDefaults<3>::set_thresh(thresh); + FunctionDefaults<3>::set_refine(true); + FunctionDefaults<3>::set_autorefine(false); + FunctionDefaults<3>::set_cubic_cell(-L, L); + + + FunctionDefaults<3>::set_max_refine_level(14); + + + if (world.rank() == 0) print ("===================================================="); + if (world.rank() == 0) printf(" Initializing Functions\n"); + if (world.rank() == 0) printf(" %d Functions, %d Functions\n", FUNC_SIZE, FUNC_SIZE_M); + if (world.rank() == 0) print ("===================================================="); + world.gop.fence(); + + // 2 * N Functions + real_function_3d h[FUNC_SIZE]; + real_function_3d g[FUNC_SIZE_M]; + + real_function_3d temp_factory_h[FUNC_SIZE]; + real_function_3d temp_factory_g[FUNC_SIZE_M]; + real_function_3d* temp_h[FUNC_SIZE]; + real_function_3d* temp_g[FUNC_SIZE_M]; + + // N * N Results Functions by Inner-Product + real_function_3d temp_factory[FUNC_SIZE][FUNC_SIZE_M]; + real_function_3d* temp[FUNC_SIZE][FUNC_SIZE_M]; + + int i, j; + double clkbegin, clkend; + clkbegin = rtclock(); + + for (i=0; i* c_op_h[FUNC_SIZE]; + CompressOp* c_op_g[FUNC_SIZE_M]; + + for (i=0; i("Compress",temp_h[i],&h[i]); + + for (j=0; j("Compress",temp_g[j],&g[j]); + + InnerOp* inner_op_ug[FUNC_SIZE][FUNC_SIZE_M]; + for (i=0; i("Inner",temp[i][j],temp_h[i],temp_g[j]); + + vector*> sequence; + + for (i=0; i odag(sequence); + odag.processSequence(); + + FusedOpSequence fsequence = odag.getFusedOpSequence(); + FusedExecutor fexecuter(world, &fsequence); + fexecuter.execute(); + + clkend = rtclock() - clkbegin; + if (world.rank() == 0) printf("Running Time: %f\n", clkend); + world.gop.fence(); + + for (i=0; i_sum); + } + +// +// +// + if (world.rank() == 0) print ("===================================================="); + if (world.rank() == 0) print ("== MADNESS =============================="); + if (world.rank() == 0) print ("===================================================="); + world.gop.fence(); + + clkbegin = rtclock(); + double resultInner[FUNC_SIZE][FUNC_SIZE_M] = {0.0,}; + + for (i=0; i +#include +#include +#include +#include +#include +#include +#include + +using namespace madness; + +static const double L = 20; // Half box size +static const long k = 8; // wavelet order +static const double thresh = 1e-6; // precision // w/o diff. and 1e-12 -> 64 x 64 +static const double c = 2.0; // +static const double tstep = 0.1; +static const double alpha = 1.9; // Exponent +static const double VVV = 0.2; // Vp constant value + +#define FUNC_SIZE 64 +#define FUNC_SIZE_M 64 +#define MIN_NODES 4 +#define SCALE MIN_NODES/4 + +double rtclock(); + +// Initial Gaussian with exponent alpha +static double uinitial(const coord_3d& r) { + const double x=r[0], y=r[1], z=r[2]; + return exp(-alpha*(2*x*3.2*x+y*y+1.7*z*z))*pow(constants::pi/alpha,-1.5); +} + +static double uinitial2(const coord_3d& r) { + const double x=r[0], y=r[1], z=r[2]; + srand(time(0)); + return exp(-alpha*(5*x*x+y*y+z*z))*pow(constants::pi/alpha,-1.5); +} +static double uinitial1(const coord_3d& r) { + const double x=r[0], y=r[1], z=r[2]; + return exp(-alpha*(2*x*x+1.4*y*y+z*z))*pow(constants::pi/alpha,-1.5); +}; + +static double ghaly(const coord_3d& r) { + std::srand(time(NULL)); + const double randVal = std::rand()/1000000000.0; + const double x=r[0], y=r[1], z=r[2]; + return 3.0*exp(-2.0*sqrt(x*x + randVal*randVal + y*y + z*z + 1e-4)); +} +static double Vp(const coord_3d& r) { + return VVV; +} + +class alpha_functor : public FunctionFunctorInterface { +private: + double coeff; +public: + alpha_functor(double coeff=1.0) : coeff(coeff) {} + + virtual double operator()(const coord_3d& r) const { + const double x=r[0], y=r[1], z=r[2]; + return (coeff * (x*x + y*y + z*z) * sin(x*x + y*y + z*z)); + } +}; +// Exact solution at time t +class uexact : public FunctionFunctorInterface { + double t; +public: + uexact(double t) : t(t) {} + + double operator()(const coord_3d& r) const { + const double x=r[0], y=r[1], z=r[2]; + double rsq = (x*x+y*y+z*z); + + return exp(VVV*t)*exp(-rsq*alpha/(1.0+4.0*alpha*t*c)) * pow(alpha/((1+4*alpha*t*c)*constants::pi),1.5); + } +}; + + +// Functor to compute exp(f) where f is a madness function +template +struct unaryexp { + void operator()(const Key& key, Tensor& t) const { + UNARY_OPTIMIZED_ITERATOR(T, t, *_p0 = exp(*_p0);); + } + template + void serialize(Archive& ar) {} +}; + +int main(int argc, char** argv) +{ + initialize(argc, argv); + World world(SafeMPI::COMM_WORLD); + + startup(world, argc, argv); + + FunctionDefaults<3>::set_k(k); + FunctionDefaults<3>::set_thresh(thresh); + FunctionDefaults<3>::set_refine(true); + FunctionDefaults<3>::set_autorefine(false); + FunctionDefaults<3>::set_cubic_cell(-L, L); + FunctionDefaults<3>::set_max_refine_level(14); + + if (world.rank() == 0) print ("===================================================="); + if (world.rank() == 0) printf(" Initializing Functions\n"); + if (world.rank() == 0) printf(" %d Functions, %d Functions\n", FUNC_SIZE, FUNC_SIZE_M); + if (world.rank() == 0) printf(" Max-refine-level: %d Functions\n", 14); + if (world.rank() == 0) print ("===================================================="); + world.gop.fence(); + + // N and M Functions + real_function_3d h[FUNC_SIZE]; + real_function_3d g[FUNC_SIZE_M]; + + // N and M (Result) Functions + real_function_3d temp_factory_h[FUNC_SIZE]; + real_function_3d temp_factory_g[FUNC_SIZE_M]; + real_function_3d* temp_h[FUNC_SIZE]; + real_function_3d* temp_g[FUNC_SIZE_M]; + + // (SCALE) * N * M Results Functions by Inner-Product + real_function_3d temp_factory[SCALE][FUNC_SIZE][FUNC_SIZE_M]; + real_function_3d* temp[SCALE][FUNC_SIZE][FUNC_SIZE_M]; + + int i, j, k; + double clkbegin, clkend; + clkbegin = rtclock(); + + for (i=0; i* c_op_h[FUNC_SIZE]; + CompressOp* c_op_g[FUNC_SIZE_M]; + InnerOp* inner_op_ug[SCALE][FUNC_SIZE][FUNC_SIZE_M]; + vector*> sequence; + + for (i=0; i("Compress",temp_h[i],&h[i]); + for (j=0; j("Compress",temp_g[j],&g[j]); + + for (k=0; k("Inner",temp[i][j],temp_h[i],temp_g[j]); + + for (i=0; i odag(sequence); + odag.processSequence(); + + FusedOpSequence fsequence = odag.getFusedOpSequence(); + FusedExecutor fexecuter(world, &fsequence); + fexecuter.execute(); + + clkend = rtclock() - clkbegin; + if (world.rank() == 0) printf("[the fused version by FuseT] Running Time: %f\n", clkend); + world.gop.fence(); + + for (k=0; k_sum); +// +// +// + if (world.rank() == 0) print ("===================================================="); + if (world.rank() == 0) print ("== MADNESS =============================="); + if (world.rank() == 0) print ("===================================================="); + world.gop.fence(); + + clkbegin = rtclock(); + double resultInner[SCALE][FUNC_SIZE][FUNC_SIZE_M] = {0.0,}; + + for (i=0; i +#include +#include +#include +#include +/*! + \file heat2.cc + \brief Example Green function for the 3D heat equation with a linear term + \defgroup heatex2 Evolve in time 3D heat equation with a linear term + \ingroup examples + + The source is here. + + \par Points of interest + - application of a function of a function to exponentiate the potential + - use of a functor to compute the solution at an arbitrary future time + - convolution with the Green's function + + + \par Background + + This adds to the complexity of the other \ref exampleheat "heat equation example" + by including a linear term. Specifically, we solve + \f[ + \frac{\partial u(x,t)}{\partial t} = c \nabla^2 u(x,t) + V_p(x,t) u(x,t) + \f] + If \f$ V_p = 0 \f$ time evolution operator is + \f[ + G_0(x,t) = \frac{1}{\sqrt{4 \pi c t}} \exp \frac{-x^2}{4 c t} + \f] + For non-zero \f$ V_p \f$ the time evolution is performed using the Trotter splitting + \f[ + G(x,t) = G_0(x,t/2) * \exp(V_p t) * G_0(x,t/2) + O(t^3) + \f] + In order to form an exact solution for testing, we choose \f$ V_p(x,t)=\mbox{constant} \f$ + but the solution method is not limited to this choice. + +*/ + +using namespace madness; + +static const double L = 20; // Half box size +static const long k = 8; // wavelet order +static const double thresh = 1e-6; // precision +static const double c = 2.0; // +static const double tstep = 0.1; +static const double alpha = 1.9; // Exponent +static const double VVV = 0.2; // Vp constant value + +// Initial Gaussian with exponent alpha +static double uinitial(const coord_3d& r) { + const double x=r[0], y=r[1], z=r[2]; + return exp(-alpha*(2*x*3.2*x+y*y+1.7*z*z))*pow(constants::pi/alpha,-1.5); +} + +static double uinitial2(const coord_3d& r) { + const double x=r[0], y=r[1], z=r[2]; + return exp(-alpha*(5*x*x+y*y+z*z))*pow(constants::pi/alpha,-1.5); +} +static double alpha_func(const coord_3d& r) { + const double x=r[0], y=r[1], z=r[2]; + return ((x*x + y*y + z*z) * sin(x*x + y*y + z*z)); +}; + +static double Vp(const coord_3d& r) { + return VVV; +} + +class alpha_functor : public FunctionFunctorInterface { +private: + double coeff; +public: + alpha_functor(double coeff=1.0) : coeff(coeff) {} + + virtual double operator()(const coord_3d& r) const { + const double x=r[0], y=r[1], z=r[2]; + return (coeff * (x*x + y*y + z*z) * sin(x*x + y*y + z*z)); + } +}; +// Exact solution at time t +class uexact : public FunctionFunctorInterface { + double t; +public: + uexact(double t) : t(t) {} + + double operator()(const coord_3d& r) const { + const double x=r[0], y=r[1], z=r[2]; + double rsq = (x*x+y*y+z*z); + + return exp(VVV*t)*exp(-rsq*alpha/(1.0+4.0*alpha*t*c)) * pow(alpha/((1+4*alpha*t*c)*constants::pi),1.5); + } +}; + + +// Functor to compute exp(f) where f is a madness function +template +struct unaryexp { + void operator()(const Key& key, Tensor& t) const { + UNARY_OPTIMIZED_ITERATOR(T, t, *_p0 = exp(*_p0);); + } + template + void serialize(Archive& ar) {} +}; + +int main(int argc, char** argv) +{ + initialize(argc, argv); + World world(SafeMPI::COMM_WORLD); + + startup(world, argc, argv); + + FunctionDefaults<3>::set_k(k); + FunctionDefaults<3>::set_thresh(thresh); + FunctionDefaults<3>::set_refine(true); + FunctionDefaults<3>::set_autorefine(false); + FunctionDefaults<3>::set_cubic_cell(-L, L); + + if (world.rank() == 0) printf ("Initializing Two Functions -- uinitial & alpha_func\n"); + world.gop.fence(); + real_function_3d u0 = real_factory_3d(world).f(uinitial); + real_function_3d u1 = real_factory_3d(world).f(uinitial2); + u0.truncate(); + u1.truncate(); + + double u0_norm = u0.norm2(); + double u0_trace = u0.trace(); + + real_function_3d result_factory = real_factory_3d(world); + real_function_3d result(result_factory); + + if (world.rank() == 0) print("Initial norm", u0_norm,"trace", u0_trace); + world.gop.fence(); + + if (world.rank() == 0) printf (" Inner Product by MADNESS ==========\n"); + world.gop.fence(); + double u0_inner = u0.inner(u0); + + if (world.rank() == 0) print (" Inner Product by MADNESS: ", u0_inner); + world.gop.fence(); + + // Make exponential of Vp + real_function_3d mimic_u0 = real_factory_3d(world).f(uinitial); + mimic_u0.truncate(); + + mimic_u0.compress(); + u1.compress(); + + if (world.rank() == 0) printf (" Inner Product by OpExecutor ==========\n"); + world.gop.fence(); + InnerOp op1("Inner",&result,&u1,&mimic_u0); + OpExecutor exe(world); + exe.execute(&op1, true); + + if (world.rank() == 0 ) printf ("SUM: %7.10f\n", op1._sum); + world.gop.fence(); + + double hello = mimic_u0.inner(u1); + if (world.rank() == 0) printf ("inner by MADNESS --same functions: %7.10f\n", hello); + + + if (world.rank() == 0) print (" Inner Product by OpExecutor ", 0, "not yet"); + world.gop.fence(); + + double result_u0_norm = u0.norm2(); + double result_u0_trace = u0.trace(); + if (world.rank() == 0) print("Result norm", result_u0_norm,"trace", result_u0_trace); + world.gop.fence(); + + finalize(); + return 0; +} + diff --git a/src/examples/FuseTExamples/Makefile.am b/src/examples/FuseTExamples/Makefile.am new file mode 100644 index 00000000000..ed7695f0600 --- /dev/null +++ b/src/examples/FuseTExamples/Makefile.am @@ -0,0 +1,27 @@ +include $(top_srcdir)/config/MakeGlobal.am +EXTRA_DIST = CMakeLists.txt + +# specify the git commit version in the info functions +GITREV="MADNESS_GITREVISION=\"`git --git-dir=$(top_srcdir)/.git rev-parse HEAD`\" " + +# default location for basis sets etc +AM_CPPFLAGS += -D$(GITREV) + + +noinst_PROGRAMS = CopyEx CompressEx InnerEx Test_FusedEx FusedEx ReconstructEx InnerExWeak multiplyEx + +thisincludedir = $(includedir)/examples/FuseTExamples/ +thisinclude_HEADERS = molecularmask.h nonlinsol.h spectralprop.h + +AUTOMAKE_OPTIONS = subdir-objects + +CopyEx_SOURCES = CopyEx.cc +CompressEx_SOURCES = compress.cc +InnerEx_SOURCES = InnerEx.cc +ReconstructEx_SOURCES = ReconstructEx.cc +FusedEx_SOURCES = FusedEx.cc +testFusedEx_SOURCES = Test_FusedEx.cc +InnerExWeak_SOURCES = InnerEx_Weak.cc + +LDADD = $(LIBCHEM) $(MRALIBS) + diff --git a/src/examples/FuseTExamples/MatrixInnerEx.cc b/src/examples/FuseTExamples/MatrixInnerEx.cc new file mode 100644 index 00000000000..1368225c855 --- /dev/null +++ b/src/examples/FuseTExamples/MatrixInnerEx.cc @@ -0,0 +1,136 @@ +/* + This file is part of MADNESS. + + Copyright (C) 2007,2010 Oak Ridge National Laboratory + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + For more information please contact: + + Robert J. Harrison + Oak Ridge National Laboratory + One Bethel Valley Road + P.O. Box 2008, MS-6367 + + email: harrisonrj@ornl.gov + tel: 865-241-3937 + fax: 865-572-0680 + + $Id$ +*/ +//#define WORLD_INSTANTIATE_STATIC_TEMPLATES +#include +#include +#include +#include +#include +#include +#include + +using namespace madness; + +static const double L = 20; // Half box size +static const long k = 8; // wavelet order +static const double thresh = 1e-6; // precision +static const double c = 2.0; // +static const double tstep = 0.1; +static const double alpha = 1.9; // Exponent +static const double VVV = 0.2; // Vp constant value + +// Initial Gaussian with exponent alpha +static double uinitial(const coord_3d& r) { + const double x=r[0], y=r[1], z=r[2]; + return exp(-alpha*(2*x*x+y*y+z*z))*pow(constants::pi/alpha,-1.5); +} + +static double Vp(const coord_3d& r) { + return VVV; +} + +// Exact solution at time t +class uexact : public FunctionFunctorInterface { + double t; +public: + uexact(double t) : t(t) {} + + double operator()(const coord_3d& r) const { + const double x=r[0], y=r[1], z=r[2]; + double rsq = (x*x+y*y+z*z); + + return exp(VVV*t)*exp(-rsq*alpha/(1.0+4.0*alpha*t*c)) * pow(alpha/((1+4*alpha*t*c)*constants::pi),1.5); + } +}; + +// Functor to compute exp(f) where f is a madness function +template +struct unaryexp { + void operator()(const Key& key, Tensor& t) const { + UNARY_OPTIMIZED_ITERATOR(T, t, *_p0 = exp(*_p0);); + } + template + void serialize(Archive& ar) {} +}; + +int main(int argc, char** argv) +{ + initialize(argc, argv); + World world(SafeMPI::COMM_WORLD); + + startup(world, argc, argv); + + FunctionDefaults<3>::set_k(k); + FunctionDefaults<3>::set_thresh(thresh); + FunctionDefaults<3>::set_refine(true); + FunctionDefaults<3>::set_autorefine(false); + FunctionDefaults<3>::set_cubic_cell(-L, L); + + real_function_3d u0 = real_factory_3d(world).f(uinitial); + u0.truncate(); + + double u0_norm = u0.norm2(); + double u0_trace = u0.trace(); + + if (world.rank() == 0) print("Initial norm", u0_norm,"trace", u0_trace); + world.gop.fence(); + + // Make exponential of Vp + real_function_3d result_factory = real_factory_3d(world); + real_function_3d result(result_factory); + + double result_init_norm = result.norm2(); + double result_init_trace = result.trace(); + + if (world.rank() == 0) print("Initial Result norm", result_init_norm,"trace", result_init_trace); + world.gop.fence(); + + CopyOp op1("Copy",&result,&u0); + OpExecutor exe(world); + exe.execute(&op1, false); + world.gop.fence(); + + u0_norm = u0.norm2(); + u0_trace = u0.trace(); + double result_norm = result.norm2(); + double result_trace = result.trace(); + + if (world.rank() == 0) print("u0 norm", u0_norm," u0 trace", u0_trace); + world.gop.fence(); + if (world.rank() == 0) print("Result norm", result_norm," result trace", result_trace); + world.gop.fence(); + + finalize(); + return 0; +} + diff --git a/src/examples/FuseTExamples/MultiplyEx.cc b/src/examples/FuseTExamples/MultiplyEx.cc new file mode 100644 index 00000000000..91738c69c7f --- /dev/null +++ b/src/examples/FuseTExamples/MultiplyEx.cc @@ -0,0 +1,139 @@ +/* + This file is part of MADNESS. + + Copyright (C) 2007,2010 Oak Ridge National Laboratory + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + For more information please contact: + + Robert J. Harrison + Oak Ridge National Laboratory + One Bethel Valley Road + P.O. Box 2008, MS-6367 + + email: harrisonrj@ornl.gov + tel: 865-241-3937 + fax: 865-572-0680 +*/ + + +/// \file examples/hello.cc +/// \brief Simplest example program for MADNESS +/// \defgroup hellowworldmad Hello world MADNESS style +/// \ingroup examples +/// +/// Simplest program that initializes the MADNESS parallel runtime +/// using initialize(), makes a madness::World object, prints +/// a greeting, and then cleans up. +/// +/// To initialize the MADNESS numerical environment you also need +/// \c startup(world,argc,argv) and should include mra/mra.h rather +/// than world/MADworld.h . + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace madness;; + +static const double L = 20; +static const long k = 8; +static const double thresh = 1e-6; +//static const double thresh = 1e-12; +static const double c = 2.0; +static const double alpha = 1.9; // Exponent + +inline static double uinitial(const coord_3d& r) +{ + const double x=r[0], y=r[1], z=r[2]; + return -2.0/(sqrt(x*x+1.2*y*y+z*z+1e-8)); + +} +inline static double uinitial1(const coord_3d& r) +{ + const double x=r[0], y=r[1], z=r[2]; + return -3.0/(sqrt(x*x/0.7+y*y*2.8+z*z*1.2+1e-8)); + +} +inline static double uinitial12(const coord_3d& r) +{ + return uinitial(r)*uinitial1(r); +} + +int main(int argc, char** argv) +{ + initialize(argc,argv); + World world(SafeMPI::COMM_WORLD); + + startup(world, argc, argv); + + FunctionDefaults<3>::set_defaults(world); + FunctionDefaults<3>::set_k(k); + FunctionDefaults<3>::set_thresh(thresh); + FunctionDefaults<3>::set_refine(true); + FunctionDefaults<3>::set_initial_level(5); + FunctionDefaults<3>::set_truncate_mode(1); + FunctionDefaults<3>::set_cubic_cell(-L/2, L/2); + + + real_function_3d u0 = real_factory_3d(world).f(uinitial); + real_function_3d u1 = real_factory_3d(world).f(uinitial1); + real_function_3d u12 = real_factory_3d(world).f(uinitial12); + //u0.truncate(); + //u1.truncate(); + + double u12_norm = u12.norm2(); + double u12_trace = u12.trace(); + + if (world.rank() == 0) print("[Analytical Product] Initial norm", u12_norm,"trace", u12_trace); + + real_function_3d result_factory = real_factory_3d(world); + real_function_3d result_fuset(result_factory); + real_function_3d result_factory1 = real_factory_3d(world); + real_function_3d result_fuset1(result_factory); + + MultiplyOp op1("Multiply",&result_fuset, &u0, &u1, 0.0); + MultiplyOp op2("Multiply",&result_fuset1, &u0, &u1, 0.0); + OpExecutor exe(world); + exe.execute(&op1, false); + + vector*> sequence; + sequence.push_back(&op2); + FuseT odag(sequence); + FusedOpSequence fsequence = odag.getFusedOpSequence(); + FusedExecutor fexecuter(world, &fsequence); + fexecuter.execute(); + + + double result_fuset_norm = result_fuset.norm2(); + double result_fuset_trace = result_fuset.trace(); + double result_fuset1_norm = result_fuset1.norm2(); + double result_fuset1_trace= result_fuset1.trace(); + if (world.rank() == 0) print("[Result Fuset] Norm", result_fuset_norm,"trace", result_fuset_trace); + if (world.rank() == 0) print("[Result Fuset] Norm", result_fuset1_norm,"trace", result_fuset1_trace); + + finalize(); + return 0; +} diff --git a/src/examples/FuseTExamples/ReconstructEx.cc b/src/examples/FuseTExamples/ReconstructEx.cc new file mode 100644 index 00000000000..9c97ff56988 --- /dev/null +++ b/src/examples/FuseTExamples/ReconstructEx.cc @@ -0,0 +1,276 @@ +/* + This file is part of MADNESS. + + Copyright (C) 2007,2010 Oak Ridge National Laboratory + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + For more information please contact: + + Robert J. Harrison + Oak Ridge National Laboratory + One Bethel Valley Road + P.O. Box 2008, MS-6367 + + email: harrisonrj@ornl.gov + tel: 865-241-3937 + fax: 865-572-0680 +*/ + + +/// \file examples/hello.cc +/// \brief Simplest example program for MADNESS +/// \defgroup hellowworldmad Hello world MADNESS style +/// \ingroup examples +/// +/// Simplest program that initializes the MADNESS parallel runtime +/// using initialize(), makes a madness::World object, prints +/// a greeting, and then cleans up. +/// +/// To initialize the MADNESS numerical environment you also need +/// \c startup(world,argc,argv) and should include mra/mra.h rather +/// than world/MADworld.h . + +#include +#include +#include +#include +#include +#include +#include +#include +#include +//#include + +using namespace madness;; + +static const double L = 20; +static const long k = 8; +static const double thresh = 1e-6; +static const double c = 2.0; +static const double alpha = 1.9; // Exponent + +#define PI 3.1415926535897932385 +#define LO 0.0000000000 +#define HI 4.0000000000 + +static double sin_amp = 1.0; +static double cos_amp = 1.0; +static double sin_freq = 1.0; +static double cos_freq = 1.0; +static double sigma_x = 1.0; +static double sigma_y = 1.0; +static double sigma_z = 1.0; +static double center_x = 0.0; +static double center_y = 0.0; +static double center_z = 0.0; +static double gaussian_amp = 1.0; +static double sigma_sq_x = sigma_x*sigma_x; +static double sigma_sq_y = sigma_y*sigma_y; +static double sigma_sq_z = sigma_z*sigma_z; + +double rtclock(); + +static double uinitial(const coord_3d& r) +{ + const double x=r[0], y=r[1], z=r[2]; + return -2.0/(sqrt(x*x+y*y+z*z+1e-8)); +} + +static double random_function(const coord_3d& r) +{ + const double x=r[0], y=r[1], z=r[2]; + + const double dx = x - center_x; + const double dy = y - center_y; + const double dz = z - center_z; + + const double periodic_part = sin_amp * sin(sin_freq*(dx+dy+dz)) + + cos_amp * cos(cos_freq*(dx+dy+dz)); + + const double x_comp = dx*dx/sigma_sq_x; + const double y_comp = dy*dy/sigma_sq_y; + const double z_comp = dz*dz/sigma_sq_z; + + const double gaussian_part = -gaussian_amp/exp(sqrt(x_comp+y_comp+z_comp)); + + return gaussian_part*gaussian_part; +} + +static double get_rand() { + double r3 = LO + static_cast(rand())/(static_cast(RAND_MAX/(HI-LO))); + return r3; +} + +static void randomizer() +{ + sin_amp = get_rand(); + cos_amp = get_rand(); + sin_freq = get_rand(); + cos_freq = get_rand(); + sigma_x = get_rand(); + sigma_y = get_rand(); + sigma_z = get_rand(); + center_x = get_rand()*L/(2.0*HI); + center_y = get_rand()*L/(2.0*HI); + center_z = get_rand()*L/(2.0*HI); + gaussian_amp = get_rand(); + sigma_sq_x = sigma_x*sigma_x; + sigma_sq_y = sigma_y*sigma_y; + sigma_sq_z = sigma_z*sigma_z; +} + +int main(int argc, char** argv) +{ + initialize(argc,argv); + World world(SafeMPI::COMM_WORLD); + + startup(world, argc, argv); + + FunctionDefaults<3>::set_k(k); + FunctionDefaults<3>::set_thresh(thresh); + FunctionDefaults<3>::set_refine(true); + FunctionDefaults<3>::set_autorefine(false); + FunctionDefaults<3>::set_cubic_cell(-L, L); + FunctionDefaults<3>::set_max_refine_level(14); + + real_function_3d f1 = real_factory_3d(world).f(random_function); + real_function_3d f2 = real_factory_3d(world).f(random_function); + real_function_3d f3 = real_factory_3d(world).f(random_function); + real_function_3d f4 = real_factory_3d(world).f(random_function); + real_function_3d f5 = real_factory_3d(world).f(random_function); + + real_function_3d i1_factory = real_factory_3d(world); + real_function_3d i2_factory = real_factory_3d(world); + real_function_3d i3_factory = real_factory_3d(world); + real_function_3d i4_factory = real_factory_3d(world); + real_function_3d i5_factory = real_factory_3d(world); + real_function_3d i6_factory = real_factory_3d(world); + real_function_3d i7_factory = real_factory_3d(world); + real_function_3d i8_factory = real_factory_3d(world); + real_function_3d i9_factory = real_factory_3d(world); + real_function_3d i1(i1_factory); + real_function_3d i2(i2_factory); + real_function_3d i3(i3_factory); + real_function_3d i4(i4_factory); + real_function_3d i5(i5_factory); + real_function_3d i6(i6_factory); + real_function_3d i7(i7_factory); + real_function_3d i8(i8_factory); + real_function_3d i9(i9_factory); + + f1.truncate(); + f2.truncate(); + f3.truncate(); + f4.truncate(); + f5.truncate(); + +// i1 = f1 * f2; +// i4 = f2 * f3; +// compress--- i1, f3, f4, f5 +// i3 = f5 - i1 // minus? +// i2 = f3 + f4 +// double r = inner (i2, i3); + + double clkbegin, clkend; + if (world.rank() == 0) print ("====================================================="); + if (world.rank() == 0) print (" FuseT "); + if (world.rank() == 0) print ("====================================================="); + + world.gop.fence(); + clkbegin = rtclock(); + + MultiplyOp mul_op_1("Multiply",&i1,&f1,&f2,0.0); // i1 + MultiplyOp mul_op_2("Multiply",&i4,&f2,&f3,0.0); // i4 + CompressOp compress_op_1("Compress",&i5,&i1); // i5 <-- i1 + CompressOp compress_op_2("Compress",&i6,&f3); // i6 <-- f3 + CompressOp compress_op_3("Compress",&i7,&f4); // i7 <-- f4 + CompressOp compress_op_4("Compress",&i8,&f5); // i8 <-- f5 + AddOp add_op_1("Add",&i3,&i8,&i5); // f5(i8) + i1(i5) + AddOp add_op_2("Add",&i2,&i6,&i7); // f3(i6) + f4(i7) + InnerOp inner_op_1("Inner",&i9,&i2,&i3); + + if (world.rank() == 0) print ("==after Ops...================================================="); + vector*> sequence; + sequence.push_back(&mul_op_1); + sequence.push_back(&mul_op_2); + sequence.push_back(&compress_op_1); + sequence.push_back(&compress_op_2); + sequence.push_back(&compress_op_3); + sequence.push_back(&compress_op_4); + sequence.push_back(&add_op_1); + sequence.push_back(&add_op_2); + sequence.push_back(&inner_op_1); + + FuseT odag(sequence); + odag.processSequence(); + FusedOpSequence fsequence = odag.getFusedOpSequence(); + FusedExecutor fexecuter(world, &fsequence); + if (world.rank() == 0) print ("==before exe================================================"); + fexecuter.execute(); + + clkend = rtclock() - clkbegin; + if (world.rank() == 0) printf("Running Time: %f\n", clkend); + + if (world.rank() == 0) print ("====================================================="); + if (world.rank() == 0) print (" MADNESS "); + if (world.rank() == 0) print ("====================================================="); + world.gop.fence(); + + real_function_3d j1_factory = real_factory_3d(world); + real_function_3d j2_factory = real_factory_3d(world); + real_function_3d j3_factory = real_factory_3d(world); + real_function_3d j4_factory = real_factory_3d(world); + real_function_3d j1(j1_factory); + real_function_3d j2(j4_factory); + real_function_3d j3(j1_factory); + real_function_3d j4(j4_factory); + + clkbegin = rtclock(); + + j1 = f1 * f2; + j4 = f2 * f3; + + j1.compress(); + f3.compress(); + f4.compress(); + f5.compress(); + + j3 = f5 + j1; + j2 = f3 + f4; + + double r = j2.inner(j3); + + clkend = rtclock() - clkbegin; + if (world.rank() == 0) printf("Running Time: %f\n", clkend); + + if (world.rank() == 0) printf("[MADNESS]r: %f, [FuseT]i9: %f\n",r, inner_op_1._sum); + world.gop.fence(); + + finalize(); + return 0; +} + +double rtclock() +{ + struct timezone Tzp; + struct timeval Tp; + int stat; + stat = gettimeofday (&Tp, &Tzp); + if (stat != 0) + printf("Error return from gettimeofday: %d", stat); + return (Tp.tv_sec + Tp.tv_usec * 1.0e-6); +} + diff --git a/src/examples/FuseTExamples/SC2016/compress_only_matrixinner.cc b/src/examples/FuseTExamples/SC2016/compress_only_matrixinner.cc new file mode 100644 index 00000000000..3e3c4a6e16b --- /dev/null +++ b/src/examples/FuseTExamples/SC2016/compress_only_matrixinner.cc @@ -0,0 +1,488 @@ +/* + This file is part of MADNESS. + + Copyright (C) 2007,2010 Oak Ridge National Laboratory + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + For more information please contact: + + Robert J. Harrison + Oak Ridge National Laboratory + One Bethel Valley Road + P.O. Box 2008, MS-6367 + + email: harrisonrj@ornl.gov + tel: 865-241-3937 + fax: 865-572-0680 + + $Id$ +*/ +//#define WORLD_INSTANTIATE_STATIC_TEMPLATES +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +/*! + \file heat2.cc + \brief Example Green function for the 3D heat equation with a linear term + \defgroup heatex2 Evolve in time 3D heat equation with a linear term + \ingroup examples + + The source is here. + + \par Points of interest + - application of a function of a function to exponentiate the potential + - use of a functor to compute the solution at an arbitrary future time + - convolution with the Green's function + + + \par Background + + This adds to the complexity of the other \ref exampleheat "heat equation example" + by including a linear term. Specifically, we solve + \f[ + \frac{\partial u(x,t)}{\partial t} = c \nabla^2 u(x,t) + V_p(x,t) u(x,t) + \f] + If \f$ V_p = 0 \f$ time evolution operator is + \f[ + G_0(x,t) = \frac{1}{\sqrt{4 \pi c t}} \exp \frac{-x^2}{4 c t} + \f] + For non-zero \f$ V_p \f$ the time evolution is performed using the Trotter splitting + \f[ + G(x,t) = G_0(x,t/2) * \exp(V_p t) * G_0(x,t/2) + O(t^3) + \f] + In order to form an exact solution for testing, we choose \f$ V_p(x,t)=\mbox{constant} \f$ + but the solution method is not limited to this choice. + +*/ + +using namespace madness; + +static const double L = 20; // Half box size +static const long k = 8; // wavelet order +static const double thresh = 1e-6; // precision // w/o diff. and 1e-12 -> 64 x 64 +static const double c = 2.0; // +static const double tstep = 0.1; +static const double alpha = 1.9; // Exponent +static const double VVV = 0.2; // Vp constant value + +#define PI 3.1415926535897932385 +#define LO 0.0000000000 +#define HI 4.0000000000 + +static double sin_amp = 1.0; +static double cos_amp = 1.0; +static double sin_freq = 1.0; +static double cos_freq = 1.0; +static double sigma_x = 1.0; +static double sigma_y = 1.0; +static double sigma_z = 1.0; +static double center_x = 0.0; +static double center_y = 0.0; +static double center_z = 0.0; +static double gaussian_amp = 1.0; +static double sigma_sq_x = sigma_x*sigma_x; +static double sigma_sq_y = sigma_y*sigma_y; +static double sigma_sq_z = sigma_z*sigma_z; + +#define FUNC_SIZE 32 +#define FUNC_SIZE_M 32 + +double rtclock(); + +// Initial Gaussian with exponent alpha +static double uinitial(const coord_3d& r) { + const double x=r[0], y=r[1], z=r[2]; + return exp(-alpha*(2*x*3.2*x+y*y+1.7*z*z))*pow(constants::pi/alpha,-1.5); +} + +static double uinitial2(const coord_3d& r) { + const double x=r[0], y=r[1], z=r[2]; + srand(time(0)); + return exp(-alpha*(5*x*x+y*y+z*z))*pow(constants::pi/alpha,-1.5); +} +static double uinitial1(const coord_3d& r) { + const double x=r[0], y=r[1], z=r[2]; + return exp(-alpha*(2*x*x+1.4*y*y+z*z))*pow(constants::pi/alpha,-1.5); +}; + +static double random_function(const coord_3d& r) { + const double x=r[0], y=r[1], z=r[2]; + + const double dx = x - center_x; + const double dy = y - center_y; + const double dz = z - center_z; + + const double periodic_part = sin_amp * sin(sin_freq*(dx+dy+dz)) + + cos_amp * cos(cos_freq*(dx+dy+dz)); + + const double x_comp = dx*dx/sigma_sq_x; + const double y_comp = dy*dy/sigma_sq_y; + const double z_comp = dz*dz/sigma_sq_z; + + const double gaussian_part = -gaussian_amp/exp(sqrt(x_comp+y_comp+z_comp)); + + return gaussian_part*gaussian_part; +} + +static double get_rand() { + double r3 = LO + static_cast(rand())/(static_cast(RAND_MAX/(HI-LO))); + return r3; +} + +static void randomizer() +{ + sin_amp = get_rand(); + cos_amp = get_rand(); + sin_freq = get_rand(); + cos_freq = get_rand(); + sigma_x = get_rand(); + sigma_y = get_rand(); + sigma_z = get_rand(); + center_x = get_rand()*L/(2.0*HI); + center_y = get_rand()*L/(2.0*HI); + center_z = get_rand()*L/(2.0*HI); + gaussian_amp = get_rand(); + sigma_sq_x = sigma_x*sigma_x; + sigma_sq_y = sigma_y*sigma_y; + sigma_sq_z = sigma_z*sigma_z; +} + +static double ghaly(const coord_3d& r) { + std::srand(time(NULL)); + const double randVal = std::rand()/1000000000.0; + const double x=r[0], y=r[1], z=r[2]; + return 3.0*exp(-2.0*sqrt(x*x + randVal*randVal + y*y + z*z + 1e-4)); +} + +static double Vp(const coord_3d& r) { + return VVV; +} + +class alpha_functor : public FunctionFunctorInterface { +private: + double coeff; +public: + alpha_functor(double coeff=1.0) : coeff(coeff) {} + + virtual double operator()(const coord_3d& r) const { + const double x=r[0], y=r[1], z=r[2]; + return (coeff * (x*x + y*y + z*z) * sin(x*x + y*y + z*z)); + } +}; +// Exact solution at time t +class uexact : public FunctionFunctorInterface { + double t; +public: + uexact(double t) : t(t) {} + + double operator()(const coord_3d& r) const { + const double x=r[0], y=r[1], z=r[2]; + double rsq = (x*x+y*y+z*z); + + return exp(VVV*t)*exp(-rsq*alpha/(1.0+4.0*alpha*t*c)) * pow(alpha/((1+4*alpha*t*c)*constants::pi),1.5); + } +}; + +// Functor to compute exp(f) where f is a madness function +template +struct unaryexp { + void operator()(const Key& key, Tensor& t) const { + UNARY_OPTIMIZED_ITERATOR(T, t, *_p0 = exp(*_p0);); + } + template + void serialize(Archive& ar) {} +}; + + +typedef DistributedMatrix distmatT; +typedef Function functionT; +typedef std::vector vecfuncT; + +int main(int argc, char** argv) +{ + initialize(argc, argv); + World world(SafeMPI::COMM_WORLD); + + startup(world, argc, argv); + + FunctionDefaults<3>::set_k(k); + FunctionDefaults<3>::set_thresh(thresh); + FunctionDefaults<3>::set_refine(true); + FunctionDefaults<3>::set_autorefine(false); + FunctionDefaults<3>::set_cubic_cell(-L, L); + + FunctionDefaults<3>::set_max_refine_level(14); + //FunctionDefaults<3>::set_max_refine_level(8); + + + if (world.rank() == 0) print ("===================================================="); + if (world.rank() == 0) printf(" Initializing Functions\n"); + if (world.rank() == 0) printf(" %d Functions, %d Functions\n", FUNC_SIZE, FUNC_SIZE_M); + if (world.rank() == 0) print ("===================================================="); + world.gop.fence(); + + // 2 * N Functions + real_function_3d h[FUNC_SIZE]; + real_function_3d g[FUNC_SIZE_M]; + real_function_3d output[FUNC_SIZE*FUNC_SIZE_M]; + + real_function_3d temp_factory_h[FUNC_SIZE]; + real_function_3d temp_factory_g[FUNC_SIZE_M]; + real_function_3d* temp_h[FUNC_SIZE]; + real_function_3d* temp_g[FUNC_SIZE_M]; + + // N * N Results Functions by Inner-Product + real_function_3d temp_factory[FUNC_SIZE][FUNC_SIZE_M]; + real_function_3d* temp[FUNC_SIZE][FUNC_SIZE_M]; + + int i, j; + double clkbegin, clkend; + clkbegin = rtclock(); + + for (i=0; i* matrix_inner_op = new MatrixInnerOp("MatrixInner", temp_h[0], fs, gs, false); + + OpExecutor exe(world); + exe.execute(matrix_inner_op, false); + + clkend = rtclock() - clkbegin; + if (world.rank() == 0) printf("Running Time: %f\n", clkend); + world.gop.fence(); + + + if (world.rank() == 0) + for (i=0; i_r)(i, j)); + printf ("(%d,%d): %f\n", i, j, (*matrix_inner_op->_r)(i, j)); + } + world.gop.fence(); + +// +// +// + if (world.rank() == 0) print ("===================================================="); + if (world.rank() == 0) print ("== MADNESS - individual inner ============"); + if (world.rank() == 0) print ("===================================================="); + world.gop.fence(); + + vecfuncT v_f; + vecfuncT v_g; +/* + for (i=0; i ghaly = matrix_inner(world, v_f, v_g); + //Tensor ghaly = matrix_inner_old(world, v_f, v_g); + +/* + double resultInner[FUNC_SIZE][FUNC_SIZE_M] = {0.0, }; + for (i=0; i +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace madness; + +static const double L = 20; // Half box size +static const long k = 8; // wavelet order +//static const double thresh = 1e-6; // precision // w/o diff. and 1e-12 -> 64 x 64 +static const double c = 2.0; // +static const double tstep = 0.1; +static const double alpha = 1.9; // Exponent +static const double VVV = 0.2; // Vp constant value + +#define VMRA +#define PI 3.1415926535897932385 +#define LO 0.0000000000 +#define HI 4.0000000000 + +static double sin_amp = 1.0; +static double cos_amp = 1.0; +static double sin_freq = 1.0; +static double cos_freq = 1.0; +static double sigma_x = 1.0; +static double sigma_y = 1.0; +static double sigma_z = 1.0; +static double center_x = 0.0; +static double center_y = 0.0; +static double center_z = 0.0; +static double gaussian_amp = 1.0; +static double sigma_sq_x = sigma_x*sigma_x; +static double sigma_sq_y = sigma_y*sigma_y; +static double sigma_sq_z = sigma_z*sigma_z; + +double rtclock(); + +static double uinitial(const coord_3d& r) { + const double x=r[0], y=r[1], z=r[2]; + return exp(-alpha*(2*x*x+3.2*y*y+1.7*z*z))*pow(constants::pi/alpha,-1.5); +} +static double random_function(const coord_3d& r) { + const double x=r[0], y=r[1], z=r[2]; + + const double dx = x - center_x; + const double dy = y - center_y; + const double dz = z - center_z; + + const double periodic_part = sin_amp * sin(sin_freq*(dx+dy+dz)) + + cos_amp * cos(cos_freq*(dx+dy+dz)); + + const double x_comp = dx*dx/sigma_sq_x; + const double y_comp = dy*dy/sigma_sq_y; + const double z_comp = dz*dz/sigma_sq_z; + + const double gaussian_part = -gaussian_amp/exp(sqrt(x_comp+y_comp+z_comp)); + + return gaussian_part*gaussian_part; +} + +static double get_rand() { + double r3 = LO + static_cast(rand())/(static_cast(RAND_MAX/(HI-LO))); + return r3; +} + +static void randomizer() +{ + sin_amp = get_rand(); + cos_amp = get_rand(); + sin_freq = get_rand(); + cos_freq = get_rand(); + sigma_x = get_rand(); + sigma_y = get_rand(); + sigma_z = get_rand(); + center_x = get_rand()*L/(2.0*HI); + center_y = get_rand()*L/(2.0*HI); + center_z = get_rand()*L/(2.0*HI); + gaussian_amp = get_rand(); + sigma_sq_x = sigma_x*sigma_x; + sigma_sq_y = sigma_y*sigma_y; + sigma_sq_z = sigma_z*sigma_z; +} + +class alpha_functor : public FunctionFunctorInterface { +private: + double coeff; +public: + alpha_functor(double coeff=1.0) : coeff(coeff) {} + + virtual double operator()(const coord_3d& r) const { + const double x=r[0], y=r[1], z=r[2]; + return (coeff * (x*x + y*y + z*z) * sin(x*x + y*y + z*z)); + } +}; +// Exact solution at time t +class uexact : public FunctionFunctorInterface { + double t; +public: + uexact(double t) : t(t) {} + + double operator()(const coord_3d& r) const { + const double x=r[0], y=r[1], z=r[2]; + double rsq = (x*x+y*y+z*z); + + return exp(VVV*t)*exp(-rsq*alpha/(1.0+4.0*alpha*t*c)) * pow(alpha/((1+4*alpha*t*c)*constants::pi),1.5); + } +}; + +// Functor to compute exp(f) where f is a madness function +template +struct unaryexp { + void operator()(const Key& key, Tensor& t) const { + UNARY_OPTIMIZED_ITERATOR(T, t, *_p0 = exp(*_p0);); + } + template + void serialize(Archive& ar) {} +}; + +typedef Function functionT; +typedef std::vector vecfuncT; + +void checkCorrectness(World& world, vecfuncT &f, vecfuncT &g) +{ + double f_norm = 0.0; + double f_trace = 0.0; + double g_norm = 0.0; + double g_trace = 0.0; + + for (int i=0; i 64 x 64 + int FUNC_SIZE = 2; + int FUNC_SIZE_M = 2; + int type = 0; + + if (argc == 5) + { + FUNC_SIZE = atoi(argv[1]); + FUNC_SIZE_M = atoi(argv[2]); + thresh = atof(argv[3]); + max_refine_level = atoi(argv[4]); + } + + initialize(argc, argv); + World world(SafeMPI::COMM_WORLD); + + startup(world, argc, argv); + + if (world.rank() == 0) print ("===================================================="); + if (world.rank() == 0) printf(" Micro Benchmark #2 \n"); + if (world.rank() == 0) printf(" %d functions based on %d and %d random functions\n", FUNC_SIZE*FUNC_SIZE_M, FUNC_SIZE, FUNC_SIZE_M); + if (world.rank() == 0) printf(" threshold: %13.4g, max_refine_level: %d\n", thresh, max_refine_level); + if (world.rank() == 0) print ("===================================================="); + world.gop.fence(); + + // Setting FunctionDefaults + FunctionDefaults<3>::set_k(k); + FunctionDefaults<3>::set_thresh(thresh); + FunctionDefaults<3>::set_refine(true); + FunctionDefaults<3>::set_autorefine(false); + FunctionDefaults<3>::set_cubic_cell(-L, L); + FunctionDefaults<3>::set_max_refine_level(max_refine_level); + + + if (world.rank() == 0) print ("===================================================="); + if (world.rank() == 0) print ("== Initializing Functions ======================"); + if (world.rank() == 0) print ("===================================================="); + world.gop.fence(); + + // 2 * N Functions + real_function_3d h[FUNC_SIZE]; + real_function_3d g[FUNC_SIZE_M]; + real_function_3d output[FUNC_SIZE*FUNC_SIZE_M]; + real_function_3d output2[FUNC_SIZE*FUNC_SIZE_M]; + + real_function_3d result_factory(world); + real_function_3d result(result_factory); + + int i, j; + double clkbegin, clkend; + double clkReconstruct, clkDerivative, clkCompress, clkMatrixInner; + clkbegin = rtclock(); + + // M functions + for (i=0; i r_2= Tensor(FUNC_SIZE*FUNC_SIZE_M/2, FUNC_SIZE*FUNC_SIZE_M/2); + clkbegin = rtclock(); + + std::vector< std::shared_ptr > gradop; + gradop = gradient_operator(world); + + //distmatT r = column_distributed_matrix(world, n, n); + reconstruct(world, v_f); + reconstruct(world, v_g); + clkReconstruct = rtclock(); + vecfuncT dvx_bra = apply(world, *(gradop[0]), v_f, false); + vecfuncT dvy_bra = apply(world, *(gradop[1]), v_f, false); + vecfuncT dvz_bra = apply(world, *(gradop[2]), v_f, false); + vecfuncT dvx_ket = apply(world, *(gradop[0]), v_g, false); + vecfuncT dvy_ket = apply(world, *(gradop[1]), v_g, false); + vecfuncT dvz_ket = apply(world, *(gradop[2]), v_g, false); + world.gop.fence(); + clkDerivative = rtclock(); + compress(world,dvx_bra,false); + compress(world,dvy_bra,false); + compress(world,dvz_bra,false); + compress(world,dvx_ket,false); + compress(world,dvy_ket,false); + compress(world,dvz_ket,false); + world.gop.fence(); + clkCompress = rtclock(); + r_2 += matrix_inner(world, dvx_bra, dvx_ket); + r_2 += matrix_inner(world, dvy_bra, dvy_ket); + r_2 += matrix_inner(world, dvz_bra, dvz_ket); + r_2 *= 0.5; + clkMatrixInner = rtclock(); + + clkend = rtclock() - clkbegin; + if (world.rank() == 0) + { + printf("Running Time--- Reconstruct: %f\n", clkReconstruct - clkbegin); + printf("Running Time--- Derivative: %f\n", clkDerivative - clkReconstruct); + printf("Running Time--- Compress: %f\n", clkCompress - clkDerivative); + printf("Running Time--- MatrixInner: %f\n", clkMatrixInner - clkCompress); + printf("Running Time--- Overall: %f\n", clkend); + } + world.gop.fence(); + + if (world.rank() == 0) + { + for (i=0; i r= Tensor(FUNC_SIZE*FUNC_SIZE_M/2, FUNC_SIZE*FUNC_SIZE_M/2); + + // Results for ReconstructOp + real_function_3d reconstruct_factory_h[FUNC_SIZE*FUNC_SIZE_M/2]; + real_function_3d reconstruct_factory_g[FUNC_SIZE_M*FUNC_SIZE/2]; + real_function_3d* reconstruct_h[FUNC_SIZE*FUNC_SIZE_M/2]; + real_function_3d* reconstruct_g[FUNC_SIZE_M*FUNC_SIZE/2]; + + for (i=0; i(world,0); + real_derivative_3d D_h_y = free_space_derivative(world,1); + real_derivative_3d D_h_z = free_space_derivative(world,2); + real_derivative_3d D_g_x = free_space_derivative(world,0); + real_derivative_3d D_g_y = free_space_derivative(world,1); + real_derivative_3d D_g_z = free_space_derivative(world,2); + + // Results for CompressOp + real_function_3d compress_factory_h_x[FUNC_SIZE*FUNC_SIZE_M/2]; + real_function_3d compress_factory_h_y[FUNC_SIZE*FUNC_SIZE_M/2]; + real_function_3d compress_factory_h_z[FUNC_SIZE*FUNC_SIZE_M/2]; + real_function_3d compress_factory_g_x[FUNC_SIZE*FUNC_SIZE_M/2]; + real_function_3d compress_factory_g_y[FUNC_SIZE*FUNC_SIZE_M/2]; + real_function_3d compress_factory_g_z[FUNC_SIZE*FUNC_SIZE_M/2]; + + real_function_3d compress_h_x[FUNC_SIZE*FUNC_SIZE_M/2]; + real_function_3d compress_h_y[FUNC_SIZE*FUNC_SIZE_M/2]; + real_function_3d compress_h_z[FUNC_SIZE*FUNC_SIZE_M/2]; + real_function_3d compress_g_x[FUNC_SIZE*FUNC_SIZE_M/2]; + real_function_3d compress_g_y[FUNC_SIZE*FUNC_SIZE_M/2]; + real_function_3d compress_g_z[FUNC_SIZE*FUNC_SIZE_M/2]; + + for (i=0; i* reconstruct_op_h[FUNC_SIZE*FUNC_SIZE_M/2]; // vbra + ReconstructOp* reconstruct_op_g[FUNC_SIZE*FUNC_SIZE_M/2]; // bket + + for (i=0; i("ReconstructOp", reconstruct_h[i], &output[i]); + reconstruct_op_g[i] = new ReconstructOp("ReconstructOp", reconstruct_g[i], &output[i + (FUNC_SIZE*FUNC_SIZE_M/2)]); + } + + + // Derivative Op + DerivativeOp* derivative_op_x_b[FUNC_SIZE*FUNC_SIZE_M/2]; + DerivativeOp* derivative_op_y_b[FUNC_SIZE*FUNC_SIZE_M/2]; + DerivativeOp* derivative_op_z_b[FUNC_SIZE*FUNC_SIZE_M/2]; + DerivativeOp* derivative_op_x_k[FUNC_SIZE*FUNC_SIZE_M/2]; + DerivativeOp* derivative_op_y_k[FUNC_SIZE*FUNC_SIZE_M/2]; + DerivativeOp* derivative_op_z_k[FUNC_SIZE*FUNC_SIZE_M/2]; + + //vecfuncT dvx_bra = apply(world, *(gradop[0]), v_f, false); + for (i=0; i("Derivative00",derivative_h_x[i],reconstruct_h[i], world,&D_h_x); + derivative_op_y_b[i] = new DerivativeOp("Derivative01",derivative_h_y[i],reconstruct_h[i],world,&D_h_y); + derivative_op_z_b[i] = new DerivativeOp("Derivative02",derivative_h_z[i],reconstruct_h[i],world,&D_h_z); + } + + for (i=0; i("Derivative10",derivative_g_x[i],reconstruct_g[i], world,&D_g_x); + derivative_op_y_k[i] = new DerivativeOp("Derivative11",derivative_g_y[i],reconstruct_g[i],world,&D_g_y); + derivative_op_z_k[i] = new DerivativeOp("Derivative12",derivative_g_z[i],reconstruct_g[i],world,&D_g_z); + } + + + // Compress Op + CompressOp* compress_op_x_b[FUNC_SIZE*FUNC_SIZE_M/2]; + CompressOp* compress_op_y_b[FUNC_SIZE*FUNC_SIZE_M/2]; + CompressOp* compress_op_z_b[FUNC_SIZE*FUNC_SIZE_M/2]; + CompressOp* compress_op_x_k[FUNC_SIZE*FUNC_SIZE_M/2]; + CompressOp* compress_op_y_k[FUNC_SIZE*FUNC_SIZE_M/2]; + CompressOp* compress_op_z_k[FUNC_SIZE*FUNC_SIZE_M/2]; + + for (i=0; i("CompressOp",&compress_h_x[i],derivative_h_x[i]); + compress_op_y_b[i] = new CompressOp("CompressOp",&compress_h_y[i],derivative_h_y[i]); + compress_op_z_b[i] = new CompressOp("CompressOp",&compress_h_z[i],derivative_h_z[i]); + compress_op_x_k[i] = new CompressOp("CompressOp",&compress_g_x[i],derivative_g_x[i]); + compress_op_y_k[i] = new CompressOp("CompressOp",&compress_g_y[i],derivative_g_y[i]); + compress_op_z_k[i] = new CompressOp("CompressOp",&compress_g_z[i],derivative_g_z[i]); + } + + + // MatrixInner Op + vecfuncT h_x; + vecfuncT h_y; + vecfuncT h_z; + vecfuncT g_x; + vecfuncT g_y; + vecfuncT g_z; + + + clkend = rtclock() - clkbegin; + if (world.rank() == 0) printf("Running Time: %f\n", clkend); + world.gop.fence(); +// +// + //clkbegin = rtclock(); + vector*> sequence; + vector*> sequenceRight; + vector*> sequenceOverall; + + // Pushing ReconstructOp + for (i=0; i* matrixinner_op_a = new MatrixInnerOp("MatrixInner", &matrixinner_x, h_x, g_x, false, false); + MatrixInnerOp* matrixinner_op_b = new MatrixInnerOp("MatrixInner", &matrixinner_y, h_y, g_y, false, false); + MatrixInnerOp* matrixinner_op_c = new MatrixInnerOp("MatrixInner", &matrixinner_z, h_z, g_z, false, false); + + // Pushing MatrixInnerOp + sequenceOverall.push_back(matrixinner_op_a); + sequenceOverall.push_back(matrixinner_op_b); + sequenceOverall.push_back(matrixinner_op_c); + + // Processing a sequence of Operators + FuseT odag(sequence); + FuseT odagRight(sequenceRight); + FuseT odagOverall(sequenceOverall); + odag.processSequence(); + odagRight.processSequence(); + odagOverall.processSequence(); + + FusedOpSequence fsequence = odag.getFusedOpSequence(); + FusedOpSequence fsequenceRight = odagRight.getFusedOpSequence(); + FusedOpSequence fsequenceOverall = odagOverall.getFusedOpSequence(); + FusedExecutor fexecuter(world, &fsequence); + FusedExecutor fexecuterRight(world, &fsequenceRight); + FusedExecutor fexecuterOverall(world, &fsequenceOverall); + clkbegin = rtclock(); + fexecuter.execute(); + fexecuterRight.execute(); + fexecuterOverall.execute(); + + r += (*matrixinner_op_a->_r); + r += (*matrixinner_op_b->_r); + r += (*matrixinner_op_c->_r); + r *= 0.5; + + clkend = rtclock() - clkbegin; + if (world.rank() == 0) printf("Running Time: %f\n", clkend); + world.gop.fence(); + + //printf ("1: %f\n", (*matrixinner_op_a->_r)); + + +/* + vecfuncT abc; + vecfuncT bcd; + + for (i=0; i +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace madness; + +static const double L = 20; // Half box size +static const long k = 8; // wavelet order +//static const double thresh = 1e-6; // precision // w/o diff. and 1e-12 -> 64 x 64 +static const double c = 2.0; // +static const double tstep = 0.1; +static const double alpha = 1.9; // Exponent +static const double VVV = 0.2; // Vp constant value + +#define VMRA +#define PI 3.1415926535897932385 +#define LO 0.0000000000 +#define HI 4.0000000000 + +static double sin_amp = 1.0; +static double cos_amp = 1.0; +static double sin_freq = 1.0; +static double cos_freq = 1.0; +static double sigma_x = 1.0; +static double sigma_y = 1.0; +static double sigma_z = 1.0; +static double center_x = 0.0; +static double center_y = 0.0; +static double center_z = 0.0; +static double gaussian_amp = 1.0; +static double sigma_sq_x = sigma_x*sigma_x; +static double sigma_sq_y = sigma_y*sigma_y; +static double sigma_sq_z = sigma_z*sigma_z; + +double rtclock(); + +static double uinitial(const coord_3d& r) { + const double x=r[0], y=r[1], z=r[2]; + return exp(-alpha*(2*x*x+3.2*y*y+1.7*z*z))*pow(constants::pi/alpha,-1.5); +} +static double random_function(const coord_3d& r) { + const double x=r[0], y=r[1], z=r[2]; + + const double dx = x - center_x; + const double dy = y - center_y; + const double dz = z - center_z; + + const double periodic_part = sin_amp * sin(sin_freq*(dx+dy+dz)) + + cos_amp * cos(cos_freq*(dx+dy+dz)); + + const double x_comp = dx*dx/sigma_sq_x; + const double y_comp = dy*dy/sigma_sq_y; + const double z_comp = dz*dz/sigma_sq_z; + + const double gaussian_part = -gaussian_amp/exp(sqrt(x_comp+y_comp+z_comp)); + + return gaussian_part*gaussian_part; +} + +static double get_rand() { + double r3 = LO + static_cast(rand())/(static_cast(RAND_MAX/(HI-LO))); + return r3; +} + +static void randomizer() +{ + sin_amp = get_rand(); + cos_amp = get_rand(); + sin_freq = get_rand(); + cos_freq = get_rand(); + sigma_x = get_rand(); + sigma_y = get_rand(); + sigma_z = get_rand(); + center_x = get_rand()*L/(2.0*HI); + center_y = get_rand()*L/(2.0*HI); + center_z = get_rand()*L/(2.0*HI); + gaussian_amp = get_rand(); + sigma_sq_x = sigma_x*sigma_x; + sigma_sq_y = sigma_y*sigma_y; + sigma_sq_z = sigma_z*sigma_z; +} + +class alpha_functor : public FunctionFunctorInterface { +private: + double coeff; +public: + alpha_functor(double coeff=1.0) : coeff(coeff) {} + + virtual double operator()(const coord_3d& r) const { + const double x=r[0], y=r[1], z=r[2]; + return (coeff * (x*x + y*y + z*z) * sin(x*x + y*y + z*z)); + } +}; +// Exact solution at time t +class uexact : public FunctionFunctorInterface { + double t; +public: + uexact(double t) : t(t) {} + + double operator()(const coord_3d& r) const { + const double x=r[0], y=r[1], z=r[2]; + double rsq = (x*x+y*y+z*z); + + return exp(VVV*t)*exp(-rsq*alpha/(1.0+4.0*alpha*t*c)) * pow(alpha/((1+4*alpha*t*c)*constants::pi),1.5); + } +}; + +// Functor to compute exp(f) where f is a madness function +template +struct unaryexp { + void operator()(const Key& key, Tensor& t) const { + UNARY_OPTIMIZED_ITERATOR(T, t, *_p0 = exp(*_p0);); + } + template + void serialize(Archive& ar) {} +}; + +typedef Function functionT; +typedef std::vector vecfuncT; + +void checkCorrectness(World& world, vecfuncT &f, vecfuncT &g) +{ + double f_norm = 0.0; + double f_trace = 0.0; + double g_norm = 0.0; + double g_trace = 0.0; + + for (int i=0; i 64 x 64 + int FUNC_SIZE = 2; + int FUNC_SIZE_M = 2; + int type = 0; + + if (argc == 5) + { + FUNC_SIZE = atoi(argv[1]); + FUNC_SIZE_M = atoi(argv[2]); + thresh = atof(argv[3]); + max_refine_level = atoi(argv[4]); + } + + initialize(argc, argv); + World world(SafeMPI::COMM_WORLD); + + startup(world, argc, argv); + + if (world.rank() == 0) print ("===================================================="); + if (world.rank() == 0) printf(" Micro Benchmark #2 \n"); + if (world.rank() == 0) printf(" %d functions based on %d and %d random functions\n", FUNC_SIZE*FUNC_SIZE_M, FUNC_SIZE, FUNC_SIZE_M); + if (world.rank() == 0) printf(" threshold: %13.4g, max_refine_level: %d\n", thresh, max_refine_level); + if (world.rank() == 0) print ("===================================================="); + world.gop.fence(); + + // Setting FunctionDefaults + FunctionDefaults<3>::set_k(k); + FunctionDefaults<3>::set_thresh(thresh); + FunctionDefaults<3>::set_refine(true); + FunctionDefaults<3>::set_autorefine(false); + FunctionDefaults<3>::set_cubic_cell(-L, L); + FunctionDefaults<3>::set_max_refine_level(max_refine_level); + + + if (world.rank() == 0) print ("===================================================="); + if (world.rank() == 0) print ("== Initializing Functions ======================"); + if (world.rank() == 0) print ("===================================================="); + world.gop.fence(); + + // 2 * N Functions + real_function_3d h[FUNC_SIZE]; + real_function_3d g[FUNC_SIZE_M]; + real_function_3d output[FUNC_SIZE*FUNC_SIZE_M]; + real_function_3d output2[FUNC_SIZE*FUNC_SIZE_M]; + + real_function_3d result_factory(world); + real_function_3d result(result_factory); + + int i, j; + double clkbegin, clkend; + double clkReconstruct, clkDerivative, clkCompress, clkMatrixInner; + clkbegin = rtclock(); + + // M functions + for (i=0; i r_2= Tensor(FUNC_SIZE*FUNC_SIZE_M/2, FUNC_SIZE*FUNC_SIZE_M/2); + clkbegin = rtclock(); + + std::vector< std::shared_ptr > gradop; + gradop = gradient_operator(world); + + //distmatT r = column_distributed_matrix(world, n, n); + reconstruct(world, v_f); + reconstruct(world, v_g); + clkReconstruct = rtclock(); + vecfuncT dvx_bra = apply(world, *(gradop[0]), v_f, false); + vecfuncT dvy_bra = apply(world, *(gradop[1]), v_f, false); + vecfuncT dvz_bra = apply(world, *(gradop[2]), v_f, false); + vecfuncT dvx_ket = apply(world, *(gradop[0]), v_g, false); + vecfuncT dvy_ket = apply(world, *(gradop[1]), v_g, false); + vecfuncT dvz_ket = apply(world, *(gradop[2]), v_g, false); + world.gop.fence(); + clkDerivative = rtclock(); + compress(world,dvx_bra,false); + compress(world,dvy_bra,false); + compress(world,dvz_bra,false); + compress(world,dvx_ket,false); + compress(world,dvy_ket,false); + compress(world,dvz_ket,false); + world.gop.fence(); + clkCompress = rtclock(); + r_2 += matrix_inner(world, dvx_bra, dvx_ket); + r_2 += matrix_inner(world, dvy_bra, dvy_ket); + r_2 += matrix_inner(world, dvz_bra, dvz_ket); + r_2 *= 0.5; + clkMatrixInner = rtclock(); + + clkend = rtclock() - clkbegin; + if (world.rank() == 0) + { + printf("Running Time--- Reconstruct: %f\n", clkReconstruct - clkbegin); + printf("Running Time--- Derivative: %f\n", clkDerivative - clkReconstruct); + printf("Running Time--- Compress: %f\n", clkCompress - clkDerivative); + printf("Running Time--- MatrixInner: %f\n", clkMatrixInner - clkCompress); + printf("Running Time--- Overall: %f\n", clkend); + } + world.gop.fence(); +/* + if (world.rank() == 0) + { + for (i=0; i r= Tensor(FUNC_SIZE*FUNC_SIZE_M/2, FUNC_SIZE*FUNC_SIZE_M/2); + + // Results for ReconstructOp + real_function_3d reconstruct_factory_h[FUNC_SIZE*FUNC_SIZE_M/2]; + real_function_3d reconstruct_factory_g[FUNC_SIZE_M*FUNC_SIZE/2]; + real_function_3d* reconstruct_h[FUNC_SIZE*FUNC_SIZE_M/2]; + real_function_3d* reconstruct_g[FUNC_SIZE_M*FUNC_SIZE/2]; + + for (i=0; i(world,0); + real_derivative_3d D_h_y = free_space_derivative(world,1); + real_derivative_3d D_h_z = free_space_derivative(world,2); + real_derivative_3d D_g_x = free_space_derivative(world,0); + real_derivative_3d D_g_y = free_space_derivative(world,1); + real_derivative_3d D_g_z = free_space_derivative(world,2); + + // Results for CompressOp + real_function_3d compress_factory_h_x[FUNC_SIZE*FUNC_SIZE_M/2]; + real_function_3d compress_factory_h_y[FUNC_SIZE*FUNC_SIZE_M/2]; + real_function_3d compress_factory_h_z[FUNC_SIZE*FUNC_SIZE_M/2]; + real_function_3d compress_factory_g_x[FUNC_SIZE*FUNC_SIZE_M/2]; + real_function_3d compress_factory_g_y[FUNC_SIZE*FUNC_SIZE_M/2]; + real_function_3d compress_factory_g_z[FUNC_SIZE*FUNC_SIZE_M/2]; + + real_function_3d compress_h_x[FUNC_SIZE*FUNC_SIZE_M/2]; + real_function_3d compress_h_y[FUNC_SIZE*FUNC_SIZE_M/2]; + real_function_3d compress_h_z[FUNC_SIZE*FUNC_SIZE_M/2]; + real_function_3d compress_g_x[FUNC_SIZE*FUNC_SIZE_M/2]; + real_function_3d compress_g_y[FUNC_SIZE*FUNC_SIZE_M/2]; + real_function_3d compress_g_z[FUNC_SIZE*FUNC_SIZE_M/2]; + + for (i=0; i* reconstruct_op_h[FUNC_SIZE*FUNC_SIZE_M/2]; // vbra + ReconstructOp* reconstruct_op_g[FUNC_SIZE*FUNC_SIZE_M/2]; // bket + + for (i=0; i("ReconstructOp", reconstruct_h[i], &output[i]); + reconstruct_op_g[i] = new ReconstructOp("ReconstructOp", reconstruct_g[i], &output[i + (FUNC_SIZE*FUNC_SIZE_M/2)]); + } + + + // Derivative Op + DerivativeOp* derivative_op_x_b[FUNC_SIZE*FUNC_SIZE_M/2]; + DerivativeOp* derivative_op_y_b[FUNC_SIZE*FUNC_SIZE_M/2]; + DerivativeOp* derivative_op_z_b[FUNC_SIZE*FUNC_SIZE_M/2]; + DerivativeOp* derivative_op_x_k[FUNC_SIZE*FUNC_SIZE_M/2]; + DerivativeOp* derivative_op_y_k[FUNC_SIZE*FUNC_SIZE_M/2]; + DerivativeOp* derivative_op_z_k[FUNC_SIZE*FUNC_SIZE_M/2]; + + //vecfuncT dvx_bra = apply(world, *(gradop[0]), v_f, false); + for (i=0; i("Derivative00",derivative_h_x[i],reconstruct_h[i], world,&D_h_x); + derivative_op_y_b[i] = new DerivativeOp("Derivative01",derivative_h_y[i],reconstruct_h[i],world,&D_h_y); + derivative_op_z_b[i] = new DerivativeOp("Derivative02",derivative_h_z[i],reconstruct_h[i],world,&D_h_z); + //derivative_op_y_b[i] = new DerivativeOp("Derivative01",derivative_h_y[i],derivative_h_x[i],world,&D_h_y); + //derivative_op_z_b[i] = new DerivativeOp("Derivative02",derivative_h_z[i],derivative_h_y[i],world,&D_h_z); + } + + for (i=0; i("Derivative10",derivative_g_x[i],reconstruct_g[i], world,&D_g_x); + derivative_op_y_k[i] = new DerivativeOp("Derivative11",derivative_g_y[i],reconstruct_g[i],world,&D_g_y); + derivative_op_z_k[i] = new DerivativeOp("Derivative12",derivative_g_z[i],reconstruct_g[i],world,&D_g_z); + //derivative_op_y_k[i] = new DerivativeOp("Derivative11",derivative_g_y[i],derivative_g_x[i],world,&D_g_y); + //derivative_op_z_k[i] = new DerivativeOp("Derivative12",derivative_g_z[i],derivative_g_y[i],world,&D_g_z); + } + + + // Compress Op + CompressOp* compress_op_x_b[FUNC_SIZE*FUNC_SIZE_M/2]; + CompressOp* compress_op_y_b[FUNC_SIZE*FUNC_SIZE_M/2]; + CompressOp* compress_op_z_b[FUNC_SIZE*FUNC_SIZE_M/2]; + CompressOp* compress_op_x_k[FUNC_SIZE*FUNC_SIZE_M/2]; + CompressOp* compress_op_y_k[FUNC_SIZE*FUNC_SIZE_M/2]; + CompressOp* compress_op_z_k[FUNC_SIZE*FUNC_SIZE_M/2]; + + for (i=0; i("CompressOp",&compress_h_x[i],derivative_h_x[i]); + compress_op_y_b[i] = new CompressOp("CompressOp",&compress_h_y[i],derivative_h_y[i]); + compress_op_z_b[i] = new CompressOp("CompressOp",&compress_h_z[i],derivative_h_z[i]); + compress_op_x_k[i] = new CompressOp("CompressOp",&compress_g_x[i],derivative_g_x[i]); + compress_op_y_k[i] = new CompressOp("CompressOp",&compress_g_y[i],derivative_g_y[i]); + compress_op_z_k[i] = new CompressOp("CompressOp",&compress_g_z[i],derivative_g_z[i]); + } + + + // MatrixInner Op + vecfuncT h_x; + vecfuncT h_y; + vecfuncT h_z; + vecfuncT g_x; + vecfuncT g_y; + vecfuncT g_z; + + + clkend = rtclock() - clkbegin; + if (world.rank() == 0) printf("Running Time: %f\n", clkend); + world.gop.fence(); +// +// + //clkbegin = rtclock(); + vector*> sequence; + + // Pushing ReconstructOp + for (i=0; i* matrixinner_op_a = new MatrixInnerOp("MatrixInner", &matrixinner_x, h_x, g_x, false, false); + MatrixInnerOp* matrixinner_op_b = new MatrixInnerOp("MatrixInner", &matrixinner_y, h_y, g_y, false, false); + MatrixInnerOp* matrixinner_op_c = new MatrixInnerOp("MatrixInner", &matrixinner_z, h_z, g_z, false, false); + + // Pushing MatrixInnerOp + sequence.push_back(matrixinner_op_a); + sequence.push_back(matrixinner_op_b); + sequence.push_back(matrixinner_op_c); + + // Processing a sequence of Operators + FuseT odag(sequence); + odag.processSequence(); + + FusedOpSequence fsequence = odag.getFusedOpSequence(); + FusedExecutor fexecuter(world, &fsequence); + clkbegin = rtclock(); + fexecuter.execute(); + + r += (*matrixinner_op_a->_r); + r += (*matrixinner_op_b->_r); + r += (*matrixinner_op_c->_r); + r *= 0.5; + + clkend = rtclock() - clkbegin; + if (world.rank() == 0) printf("Running Time: %f\n", clkend); + world.gop.fence(); + + //printf ("1: %f\n", (*matrixinner_op_a->_r)); + + +/* + vecfuncT abc; + vecfuncT bcd; + + for (i=0; i +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace madness; + +static const double L = 20; // Half box size +static const long k = 8; // wavelet order +//static const double thresh = 1e-6; // precision // w/o diff. and 1e-12 -> 64 x 64 +static const double c = 2.0; // +static const double tstep = 0.1; +static const double alpha = 1.9; // Exponent +static const double VVV = 0.2; // Vp constant value + +#define VMRA_1 +#define PI 3.1415926535897932385 +#define LO 0.0000000000 +#define HI 4.0000000000 + +static double sin_amp = 1.0; +static double cos_amp = 1.0; +static double sin_freq = 1.0; +static double cos_freq = 1.0; +static double sigma_x = 1.0; +static double sigma_y = 1.0; +static double sigma_z = 1.0; +static double center_x = 0.0; +static double center_y = 0.0; +static double center_z = 0.0; +static double gaussian_amp = 1.0; +static double sigma_sq_x = sigma_x*sigma_x; +static double sigma_sq_y = sigma_y*sigma_y; +static double sigma_sq_z = sigma_z*sigma_z; + +double rtclock(); + +static double uinitial(const coord_3d& r) { + const double x=r[0], y=r[1], z=r[2]; + return exp(-alpha*(2*x*x+3.2*y*y+1.7*z*z))*pow(constants::pi/alpha,-1.5); +} +static double random_function(const coord_3d& r) { + const double x=r[0], y=r[1], z=r[2]; + + const double dx = x - center_x; + const double dy = y - center_y; + const double dz = z - center_z; + + const double periodic_part = sin_amp * sin(sin_freq*(dx+dy+dz)) + + cos_amp * cos(cos_freq*(dx+dy+dz)); + + const double x_comp = dx*dx/sigma_sq_x; + const double y_comp = dy*dy/sigma_sq_y; + const double z_comp = dz*dz/sigma_sq_z; + + const double gaussian_part = -gaussian_amp/exp(sqrt(x_comp+y_comp+z_comp)); + + return gaussian_part*gaussian_part; +} + +static double get_rand() { + double r3 = LO + static_cast(rand())/(static_cast(RAND_MAX/(HI-LO))); + return r3; +} + +static void randomizer() +{ + sin_amp = get_rand(); + cos_amp = get_rand(); + sin_freq = get_rand(); + cos_freq = get_rand(); + sigma_x = get_rand(); + sigma_y = get_rand(); + sigma_z = get_rand(); + center_x = get_rand()*L/(2.0*HI); + center_y = get_rand()*L/(2.0*HI); + center_z = get_rand()*L/(2.0*HI); + gaussian_amp = get_rand(); + sigma_sq_x = sigma_x*sigma_x; + sigma_sq_y = sigma_y*sigma_y; + sigma_sq_z = sigma_z*sigma_z; +} + +class alpha_functor : public FunctionFunctorInterface { +private: + double coeff; +public: + alpha_functor(double coeff=1.0) : coeff(coeff) {} + + virtual double operator()(const coord_3d& r) const { + const double x=r[0], y=r[1], z=r[2]; + return (coeff * (x*x + y*y + z*z) * sin(x*x + y*y + z*z)); + } +}; +// Exact solution at time t +class uexact : public FunctionFunctorInterface { + double t; +public: + uexact(double t) : t(t) {} + + double operator()(const coord_3d& r) const { + const double x=r[0], y=r[1], z=r[2]; + double rsq = (x*x+y*y+z*z); + + return exp(VVV*t)*exp(-rsq*alpha/(1.0+4.0*alpha*t*c)) * pow(alpha/((1+4*alpha*t*c)*constants::pi),1.5); + } +}; + +// Functor to compute exp(f) where f is a madness function +template +struct unaryexp { + void operator()(const Key& key, Tensor& t) const { + UNARY_OPTIMIZED_ITERATOR(T, t, *_p0 = exp(*_p0);); + } + template + void serialize(Archive& ar) {} +}; + +typedef Function functionT; +typedef std::vector vecfuncT; + +struct write_test_input { + + double eprec=FunctionDefaults<3>::get_thresh()*0.1; + + std::string filename_; + write_test_input(std::string mol="lih") : filename_("test_input") { + std::ofstream of(filename_); + of << "dft\n"; + of << "xc hf\n"; + of << "no_orient\n"; + of << "k 8\n"; + of << "protocol 1.e-5 \n"; + of << "nuclear_corrfac slater 2.0\n"; + of << "end\n"; + + if (mol=="lih") { + of << "geometry\n"; + of << "eprec " << eprec << std::endl; + of << "Li 0.0 0.0 0.0\n"; + of << "H 1.4375 0.0 0.0\n"; + of << "end\n"; + } else if (mol=="hf") { + double eprec=1.e-5; + of << "geometry\n"; + of << "eprec " << eprec << std::endl; + of << "F 0.1 0.0 0.2\n"; + of << "H 1.4375 0.0 0.0\n"; + of << "end\n"; + } + of.close(); + } + + ~write_test_input() { + std::remove(filename_.c_str()); + } + + std::string filename() const {return filename_;} +}; + +int main(int argc, char** argv) +{ + // input + // (1) M -- M and M functions + // (2) thresh -- threshold + // (3) max-refine -- max-refine-level + // (4) type -- 0: all, 1: FuseT, 2: vmra, 3: OpExecutor + + // M1. Kinetic Energy Matrix Calculation : vmra.h vs FusedExecutor (Reconstruct + DerivativeOp + CompressOp + InnerMatrixOp) + int max_refine_level = 14; // + double thresh = 1e-12; // precision // w/o diff. and 1e-12 -> 64 x 64 + int FUNC_SIZE = 4; + int FUNC_SIZE_M = 4; + int type = 0; + + if (argc == 5) + { + FUNC_SIZE = atoi(argv[1]); + FUNC_SIZE_M = FUNC_SIZE; + max_refine_level = atoi(argv[3]); + thresh = atof(argv[2]); + type = atoi(argv[4]); + } + + initialize(argc, argv); + World world(SafeMPI::COMM_WORLD); + + startup(world, argc, argv); + + if (world.rank() == 0) print ("===================================================="); + if (world.rank() == 0) printf(" Micro Benchmark #2 \n"); + if (world.rank() == 0) printf(" %d functions based on %d and %d random functions\n", FUNC_SIZE*FUNC_SIZE_M, FUNC_SIZE, FUNC_SIZE_M); + if (world.rank() == 0) printf(" threshold: %13.4g, max_refine_level: %d\n", thresh, max_refine_level); + if (world.rank() == 0) print ("===================================================="); + world.gop.fence(); + + // Setting FunctionDefaults + FunctionDefaults<3>::set_k(k); + FunctionDefaults<3>::set_thresh(thresh); + FunctionDefaults<3>::set_refine(true); + FunctionDefaults<3>::set_autorefine(false); + FunctionDefaults<3>::set_cubic_cell(-L, L); + FunctionDefaults<3>::set_max_refine_level(max_refine_level); + + + if (world.rank() == 0) print ("===================================================="); + if (world.rank() == 0) print ("== Initializing Functions ======================"); + if (world.rank() == 0) print ("===================================================="); + world.gop.fence(); + + // 2 * N Functions + real_function_3d h[FUNC_SIZE]; + real_function_3d g[FUNC_SIZE_M]; + real_function_3d output[FUNC_SIZE*FUNC_SIZE_M]; + + real_function_3d result_factory(world); + real_function_3d result(result_factory); + + int i, j; + double clkbegin, clkend; + clkbegin = rtclock(); + + // M functions + for (i=0; i(world, n, n); + reconstruct(world, vbra); + reconstruct(world, vket); + } +*/ + // Results for ReconstructOp + real_function_3d reconstruct_factory_h[FUNC_SIZE*FUNC_SIZE_M/2]; + real_function_3d reconstruct_factory_g[FUNC_SIZE_M*FUNC_SIZE/2]; + real_function_3d* reconstruct_h[FUNC_SIZE*FUNC_SIZE_M/2]; + real_function_3d* reconstruct_g[FUNC_SIZE_M*FUNC_SIZE/2]; + + for (i=0; i(world,0); + real_derivative_3d D_h_y = free_space_derivative(world,1); + real_derivative_3d D_h_z = free_space_derivative(world,2); + //real_derivative_3d D_g_x = free_space_derivative(world,0); + //real_derivative_3d D_g_y = free_space_derivative(world,1); + //real_derivative_3d D_g_z = free_space_derivative(world,2); + + // Results for CompressOp + real_function_3d compress_factory_h_x[FUNC_SIZE*FUNC_SIZE_M/2]; + real_function_3d compress_factory_h_y[FUNC_SIZE*FUNC_SIZE_M/2]; + real_function_3d compress_factory_h_z[FUNC_SIZE*FUNC_SIZE_M/2]; + real_function_3d compress_factory_g_x[FUNC_SIZE*FUNC_SIZE_M/2]; + real_function_3d compress_factory_g_y[FUNC_SIZE*FUNC_SIZE_M/2]; + real_function_3d compress_factory_g_z[FUNC_SIZE*FUNC_SIZE_M/2]; + + real_function_3d* compress_h_x[FUNC_SIZE*FUNC_SIZE_M/2]; + real_function_3d* compress_h_y[FUNC_SIZE*FUNC_SIZE_M/2]; + real_function_3d* compress_h_z[FUNC_SIZE*FUNC_SIZE_M/2]; + real_function_3d* compress_g_x[FUNC_SIZE*FUNC_SIZE_M/2]; + real_function_3d* compress_g_y[FUNC_SIZE*FUNC_SIZE_M/2]; + real_function_3d* compress_g_z[FUNC_SIZE*FUNC_SIZE_M/2]; + + for (i=0; i* reconstruct_op_h[FUNC_SIZE*FUNC_SIZE_M/2]; // vbra + ReconstructOp* reconstruct_op_g[FUNC_SIZE*FUNC_SIZE_M/2]; // bket + + for (i=0; i("ReconstructOp", reconstruct_h[i], &output[i]); + reconstruct_op_g[i] = new ReconstructOp("ReconstructOp", reconstruct_g[i], &output[i + (FUNC_SIZE*FUNC_SIZE_M/2)]); + } + +/* + vecfuncT dvx_bra = apply(world, *(gradop[0]), vbra, false); + vecfuncT dvy_bra = apply(world, *(gradop[1]), vbra, false); + vecfuncT dvz_bra = apply(world, *(gradop[2]), vbra, false); + vecfuncT dvx_ket = apply(world, *(gradop[0]), vket, false); + vecfuncT dvy_ket = apply(world, *(gradop[1]), vket, false); + vecfuncT dvz_ket = apply(world, *(gradop[2]), vket, false); + world.gop.fence(); +*/ + + // Derivative Op + DerivativeOp* derivative_op_x_b[FUNC_SIZE*FUNC_SIZE_M/2]; + DerivativeOp* derivative_op_y_b[FUNC_SIZE*FUNC_SIZE_M/2]; + DerivativeOp* derivative_op_z_b[FUNC_SIZE*FUNC_SIZE_M/2]; + DerivativeOp* derivative_op_x_k[FUNC_SIZE*FUNC_SIZE_M/2]; + DerivativeOp* derivative_op_y_k[FUNC_SIZE*FUNC_SIZE_M/2]; + DerivativeOp* derivative_op_z_k[FUNC_SIZE*FUNC_SIZE_M/2]; + + for (i=0; i("Derivative00",derivative_h_x[i],reconstruct_h[i], world,&D_h_x); + derivative_op_y_b[i] = new DerivativeOp("Derivative01",derivative_h_y[i],derivative_h_x[i],world,&D_h_y); + derivative_op_z_b[i] = new DerivativeOp("Derivative02",derivative_h_z[i],derivative_h_y[i],world,&D_h_z); + } + + for (i=0; i("Derivative10",derivative_g_x[i],reconstruct_g[i], world,&D_h_x); + derivative_op_y_k[i] = new DerivativeOp("Derivative11",derivative_g_y[i],derivative_g_x[i],world,&D_h_y); + derivative_op_z_k[i] = new DerivativeOp("Derivative12",derivative_g_z[i],derivative_g_y[i],world,&D_h_z); + } + + + // Compress Op + CompressOp* compress_op_x_b[FUNC_SIZE]; + CompressOp* compress_op_y_b[FUNC_SIZE]; + CompressOp* compress_op_z_b[FUNC_SIZE]; + CompressOp* compress_op_x_k[FUNC_SIZE]; + CompressOp* compress_op_y_k[FUNC_SIZE]; + CompressOp* compress_op_z_k[FUNC_SIZE]; + + for (i=0; i("CompressOp",compress_h_x[i],derivative_h_x[i]); + compress_op_y_b[i] = new CompressOp("CompressOp",compress_h_y[i],derivative_h_y[i]); + compress_op_z_b[i] = new CompressOp("CompressOp",compress_h_z[i],derivative_h_z[i]); + compress_op_x_k[i] = new CompressOp("CompressOp",compress_g_x[i],derivative_g_x[i]); + compress_op_y_k[i] = new CompressOp("CompressOp",compress_g_y[i],derivative_g_y[i]); + compress_op_z_k[i] = new CompressOp("CompressOp",compress_g_z[i],derivative_g_z[i]); + } + +/* + r += matrix_inner(r.distribution(), dvx_bra, dvx_ket, true); + r += matrix_inner(r.distribution(), dvy_bra, dvy_ket, true); + r += matrix_inner(r.distribution(), dvz_bra, dvz_ket, true); + r *= 0.5; + return r; +*/ + + // MatrixInner Op + vecfuncT h_x; + vecfuncT h_y; + vecfuncT h_z; + vecfuncT g_x; + vecfuncT g_y; + vecfuncT g_z; + + for (i=0; i* matrixinner_op_a = new MatrixInnerOp("MatrixInner", &matrixinner_x, h_x, g_x, true, false); + MatrixInnerOp* matrixinner_op_b = new MatrixInnerOp("MatrixInner", &matrixinner_y, h_y, g_y, true, false); + MatrixInnerOp* matrixinner_op_c = new MatrixInnerOp("MatrixInner", &matrixinner_z, h_z, g_z, true, false); + + clkend = rtclock() - clkbegin; + if (world.rank() == 0) printf("Running Time: %f\n", clkend); + world.gop.fence(); +// +// + clkbegin = rtclock(); + vector*> sequence; + + // Pushing ReconstructOp + for (i=0; i* matrixinner_op_a = new MatrixInnerOp("MatrixInner", &matrixinner_x, h_x, g_x, true); + //MatrixInnerOp* matrixinner_op_b = new MatrixInnerOp("MatrixInner", &matrixinner_y, h_y, g_y, true); + //MatrixInnerOp* matrixinner_op_c = new MatrixInnerOp("MatrixInner", &matrixinner_z, h_z, g_z, true); + + sequence.push_back(matrixinner_op_a); + sequence.push_back(matrixinner_op_b); + sequence.push_back(matrixinner_op_c); +*/ + + + // Processing a sequence of Operators + FuseT odag(sequence); + odag.processSequence(); + + FusedOpSequence fsequence = odag.getFusedOpSequence(); + FusedExecutor fexecuter(world, &fsequence); + fexecuter.execute(); + + clkend = rtclock() - clkbegin; + if (world.rank() == 0) printf ("Done!\n"); + if (world.rank() == 0) printf("Running Time: %f\n", clkend); + world.gop.fence(); +/* + if (world.rank() == 0) + for (i=0; i_sum); + } + } + world.gop.fence(); +*/ + + finalize(); + return 0; +} + +double rtclock() +{ + struct timezone Tzp; + struct timeval Tp; + int stat; + stat = gettimeofday (&Tp, &Tzp); + if (stat != 0) + printf("Error return from gettimeofday: %d", stat); + return (Tp.tv_sec + Tp.tv_usec * 1.0e-6); +} + diff --git a/src/examples/FuseTExamples/SC2016/sc_m1.cc b/src/examples/FuseTExamples/SC2016/sc_m1.cc new file mode 100644 index 00000000000..273693eab09 --- /dev/null +++ b/src/examples/FuseTExamples/SC2016/sc_m1.cc @@ -0,0 +1,403 @@ +/* + This file is part of MADNESS. + + Copyright (C) 2007,2010 Oak Ridge National Laboratory + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + For more information please contact: + + Robert J. Harrison + Oak Ridge National Laboratory + One Bethel Valley Road + P.O. Box 2008, MS-6367 + + email: harrisonrj@ornl.gov + tel: 865-241-3937 + fax: 865-572-0680 + + $Id$ +*/ +//#define WORLD_INSTANTIATE_STATIC_TEMPLATES +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace madness; + +static const double L = 20; // Half box size +static const long k = 8; // wavelet order +//static const double thresh = 1e-6; // precision // w/o diff. and 1e-12 -> 64 x 64 +static const double c = 2.0; // +static const double tstep = 0.1; +static const double alpha = 1.9; // Exponent +static const double VVV = 0.2; // Vp constant value + +#define PI 3.1415926535897932385 +#define LO 0.0000000000 +#define HI 4.0000000000 + +static double sin_amp = 1.0; +static double cos_amp = 1.0; +static double sin_freq = 1.0; +static double cos_freq = 1.0; +static double sigma_x = 1.0; +static double sigma_y = 1.0; +static double sigma_z = 1.0; +static double center_x = 0.0; +static double center_y = 0.0; +static double center_z = 0.0; +static double gaussian_amp = 1.0; +static double sigma_sq_x = sigma_x*sigma_x; +static double sigma_sq_y = sigma_y*sigma_y; +static double sigma_sq_z = sigma_z*sigma_z; + +double rtclock(); + +static double random_function(const coord_3d& r) { + const double x=r[0], y=r[1], z=r[2]; + + const double dx = x - center_x; + const double dy = y - center_y; + const double dz = z - center_z; + + const double periodic_part = sin_amp * sin(sin_freq*(dx+dy+dz)) + + cos_amp * cos(cos_freq*(dx+dy+dz)); + + const double x_comp = dx*dx/sigma_sq_x; + const double y_comp = dy*dy/sigma_sq_y; + const double z_comp = dz*dz/sigma_sq_z; + + const double gaussian_part = -gaussian_amp/exp(sqrt(x_comp+y_comp+z_comp)); + + return gaussian_part*gaussian_part; +} + +static double get_rand() { + double r3 = LO + static_cast(rand())/(static_cast(RAND_MAX/(HI-LO))); + return r3; +} + +static void randomizer() +{ + sin_amp = get_rand(); + cos_amp = get_rand(); + sin_freq = get_rand(); + cos_freq = get_rand(); + sigma_x = get_rand(); + sigma_y = get_rand(); + sigma_z = get_rand(); + center_x = get_rand()*L/(2.0*HI); + center_y = get_rand()*L/(2.0*HI); + center_z = get_rand()*L/(2.0*HI); + gaussian_amp = get_rand(); + sigma_sq_x = sigma_x*sigma_x; + sigma_sq_y = sigma_y*sigma_y; + sigma_sq_z = sigma_z*sigma_z; +} + +class alpha_functor : public FunctionFunctorInterface { +private: + double coeff; +public: + alpha_functor(double coeff=1.0) : coeff(coeff) {} + + virtual double operator()(const coord_3d& r) const { + const double x=r[0], y=r[1], z=r[2]; + return (coeff * (x*x + y*y + z*z) * sin(x*x + y*y + z*z)); + } +}; +// Exact solution at time t +class uexact : public FunctionFunctorInterface { + double t; +public: + uexact(double t) : t(t) {} + + double operator()(const coord_3d& r) const { + const double x=r[0], y=r[1], z=r[2]; + double rsq = (x*x+y*y+z*z); + + return exp(VVV*t)*exp(-rsq*alpha/(1.0+4.0*alpha*t*c)) * pow(alpha/((1+4*alpha*t*c)*constants::pi),1.5); + } +}; + +// Functor to compute exp(f) where f is a madness function +template +struct unaryexp { + void operator()(const Key& key, Tensor& t) const { + UNARY_OPTIMIZED_ITERATOR(T, t, *_p0 = exp(*_p0);); + } + template + void serialize(Archive& ar) {} +}; + +typedef Function functionT; +typedef std::vector vecfuncT; + +int main(int argc, char** argv) +{ + // input + // (1) M -- M and M functions + // (2) thresh -- threshold + // (3) max-refine -- max-refine-level + // (4) type -- 0: all, 1: FuseT, 2: vmra, 3: OpExecutor + + // m1. MatrixInner : MatrixInnerOp-DGEMM (OpExecutor) vs MatrixInner-MADNESS (vmra.h) vs MatrixInner using lots of InnerOp (FusedExecutor) vs Matrix Inner using funcimpl.inner (MADNESS) + int max_refine_level = 14; + double thresh = 1e-06; // precision // w/o diff. and 1e-12 -> 64 x 64 + int FUNC_SIZE = 32; + int FUNC_SIZE_M = 32; + int type = 0; + + if (argc == 5) + { + FUNC_SIZE = atoi(argv[1]); + FUNC_SIZE_M = atoi(argv[2]); + thresh = atof(argv[3]); + max_refine_level = atoi(argv[4]); + } + + initialize(argc, argv); + World world(SafeMPI::COMM_WORLD); + + startup(world, argc, argv); + + if (world.rank() == 0) print ("===================================================="); + if (world.rank() == 0) printf(" Micro Benchmark #1 \n"); + if (world.rank() == 0) printf(" %d functions based on %d and %d random functions\n", FUNC_SIZE*FUNC_SIZE_M, FUNC_SIZE, FUNC_SIZE_M); + if (world.rank() == 0) printf(" threshold: %13.4g, max_refine_level: %d\n", thresh, max_refine_level); + if (world.rank() == 0) print ("===================================================="); + world.gop.fence(); + + // Setting FunctionDefaults + FunctionDefaults<3>::set_k(k); + FunctionDefaults<3>::set_thresh(thresh); + FunctionDefaults<3>::set_refine(true); + FunctionDefaults<3>::set_autorefine(false); + FunctionDefaults<3>::set_cubic_cell(-L, L); + FunctionDefaults<3>::set_max_refine_level(max_refine_level); + + + if (world.rank() == 0) print ("===================================================="); + if (world.rank() == 0) print ("== Initializing Functions ======================"); + if (world.rank() == 0) print ("===================================================="); + world.gop.fence(); + + // 2 * N Functions + real_function_3d h[FUNC_SIZE]; + real_function_3d g[FUNC_SIZE_M]; + real_function_3d output[FUNC_SIZE*FUNC_SIZE_M]; + + real_function_3d result_factory(world); + real_function_3d result(result_factory); + + int i, j; + double clkbegin, clkend; + clkbegin = rtclock(); + + for (i=0; i* matrix_inner_op = new MatrixInnerOp("MatrixInner", &result, fs, gs, false, false); + OpExecutor exe(world); + exe.execute(matrix_inner_op, false); + + clkend = rtclock() - clkbegin; + if (world.rank() == 0) printf("Running Time: %f\n", clkend); + world.gop.fence(); +/* + if (world.rank() == 0) + for (i=0; i_r)(i, j)); + } + world.gop.fence(); +*/ + +// +// +// + + // m1. MatrixInner : MatrixInnerOp-DGEMM (OpExecutor) vs MatrixInner-MADNESS (vmra.h) vs MatrixInner using lots of InnerOp (FusedExecutor) vs Matrix Inner using funcimpl.inner (MADNESS) + if (world.rank() == 0) print ("===================================================="); + if (world.rank() == 0) print ("=== MatrixInner-MADNESS (vmra.h) ==================="); + if (world.rank() == 0) print ("===================================================="); + world.gop.fence(); + + vecfuncT v_f; + vecfuncT v_g; + + clkbegin = rtclock(); + for (i=0; i ghaly = matrix_inner(world, v_f, v_g); + + clkend = rtclock() - clkbegin; + if (world.rank() == 0) printf("Running Time: %f\n", clkend); + world.gop.fence(); + +/* + if (world.rank() == 0) + for (i=0; i* inner_op_ug[FUNC_SIZE*FUNC_SIZE_M/2][FUNC_SIZE_M*FUNC_SIZE/2]; + + for (i=0; i("Inner",&result,&output[i],&output[(FUNC_SIZE*FUNC_SIZE_M/2) + j]); + + vector*> sequence; + + for (i=0; i odag(sequence); + odag.processSequence(); + + FusedOpSequence fsequence = odag.getFusedOpSequence(); + FusedExecutor fexecuter(world, &fsequence); + fexecuter.execute(); + + clkend = rtclock() - clkbegin; + if (world.rank() == 0) printf("Running Time: %f\n", clkend); + world.gop.fence(); + +/* + if (world.rank() == 0) + for (i=0; i_sum); + } + } + world.gop.fence(); +*/ +/* + // m1. MatrixInner : MatrixInnerOp-DGEMM (OpExecutor) vs MatrixInner-MADNESS (vmra.h) vs MatrixInner using lots of InnerOp (FusedExecutor) vs Matrix Inner using funcimpl.inner (MADNESS) + if (world.rank() == 0) print ("===================================================="); + if (world.rank() == 0) print ("=== MatrixInner using funcimpl.inner (MADNESS) ====="); + if (world.rank() == 0) print ("===================================================="); + world.gop.fence(); + + double resultInner[FUNC_SIZE*FUNC_SIZE_M/2][FUNC_SIZE_M*FUNC_SIZE/2]; + + for (i=0; i +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace madness;; + +static const double L = 20; +static const long k = 8; +//static const double thresh = 1e-12; +static const double c = 2.0; +static const double alpha = 1.9; // Exponent +#define PI 3.1415926535897932385 +#define LO 0.0000000000 +#define HI 4.0000000000 + +static double sin_amp = 1.0; +static double cos_amp = 1.0; +static double sin_freq = 1.0; +static double cos_freq = 1.0; +static double sigma_x = 1.0; +static double sigma_y = 1.0; +static double sigma_z = 1.0; +static double center_x = 0.0; +static double center_y = 0.0; +static double center_z = 0.0; +static double gaussian_amp = 1.0; +static double sigma_sq_x = sigma_x*sigma_x; +static double sigma_sq_y = sigma_y*sigma_y; +static double sigma_sq_z = sigma_z*sigma_z; + +double rtclock(); + +static double uinitial(const coord_3d& r) +{ + const double x=r[0], y=r[1], z=r[2]; + return -2.0/(sqrt(x*x+y*y+z*z+1e-8)); +} + +static double random_function(const coord_3d& r) +{ + const double x=r[0], y=r[1], z=r[2]; + + const double dx = x - center_x; + const double dy = y - center_y; + const double dz = z - center_z; + + const double periodic_part = sin_amp * sin(sin_freq*(dx+dy+dz)) + + cos_amp * cos(cos_freq*(dx+dy+dz)); + + const double x_comp = dx*dx/sigma_sq_x; + const double y_comp = dy*dy/sigma_sq_y; + const double z_comp = dz*dz/sigma_sq_z; + + const double gaussian_part = -gaussian_amp/exp(sqrt(x_comp+y_comp+z_comp)); + + return gaussian_part*gaussian_part; +} + +static double get_rand() { + double r3 = LO + static_cast(rand())/(static_cast(RAND_MAX/(HI-LO))); + return r3; +} + +static void randomizer() +{ + sin_amp = get_rand(); + cos_amp = get_rand(); + sin_freq = get_rand(); + cos_freq = get_rand(); + sigma_x = get_rand(); + sigma_y = get_rand(); + sigma_z = get_rand(); + center_x = get_rand()*L/(2.0*HI); + center_y = get_rand()*L/(2.0*HI); + center_z = get_rand()*L/(2.0*HI); + gaussian_amp = get_rand(); + sigma_sq_x = sigma_x*sigma_x; + sigma_sq_y = sigma_y*sigma_y; + sigma_sq_z = sigma_z*sigma_z; +} + +int main(int argc, char** argv) +{ + initialize(argc,argv); + World world(SafeMPI::COMM_WORLD); + + // m2. Sequence of Derivative Ops : FuseT vs MADNESS + int max_refine_level = 30; + double thresh = 1e-12; // precision // w/o diff. and 1e-12 -> 64 x 64 + int FUNC_SIZE = 20; + int FUNC_SIZE_M = 32; + int type = 0; + + if (argc == 5) + { + FUNC_SIZE = atoi(argv[1]); + FUNC_SIZE_M = atoi(argv[2]); + thresh = atof(argv[3]); + max_refine_level = atoi(argv[4]); + } + + if (world.rank() == 0) print ("===================================================="); + if (world.rank() == 0) printf(" Macro Benchmark #2 \n"); + if (world.rank() == 0) printf(" %d functions\n", FUNC_SIZE); + if (world.rank() == 0) printf(" threshold: %13.4g, max_refine_level: %d\n", thresh, max_refine_level); + if (world.rank() == 0) print ("===================================================="); + world.gop.fence(); + + startup(world, argc, argv); + + FunctionDefaults<3>::set_k(k); + FunctionDefaults<3>::set_thresh(thresh); + FunctionDefaults<3>::set_refine(true); + FunctionDefaults<3>::set_autorefine(false); + FunctionDefaults<3>::set_cubic_cell(-L, L); + FunctionDefaults<3>::set_max_refine_level(max_refine_level); + + // 2 * N Functions + real_function_3d f[FUNC_SIZE]; + real_function_3d fac0[FUNC_SIZE]; + real_function_3d fac1[FUNC_SIZE]; + real_function_3d fac2[FUNC_SIZE]; + real_function_3d* df0[FUNC_SIZE]; + real_function_3d* df1[FUNC_SIZE]; + real_function_3d* df2[FUNC_SIZE]; + real_function_3d dm0[FUNC_SIZE]; + real_function_3d dm1[FUNC_SIZE]; + real_function_3d dm2[FUNC_SIZE]; + + + + int i, j; + double clkbegin, clkend; + clkbegin = rtclock(); + + for (i=0; i* op0[FUNC_SIZE]; + DerivativeOp* op1[FUNC_SIZE]; + DerivativeOp* op2[FUNC_SIZE]; + + real_derivative_3d D0 = free_space_derivative(world, 0); + real_derivative_3d D1 = free_space_derivative(world, 1); + real_derivative_3d D2 = free_space_derivative(world, 2); + + for (i=0; i("Derivative0",df0[i],&f[i],world,&D0); + op1[i] = new DerivativeOp("Derivative1",df1[i],df0[i],world,&D1); + op2[i] = new DerivativeOp("Derivative2",df2[i],df1[i],world,&D2); + + } + + vector*> sequence; + + for (i=0; i odag(sequence); + odag.processSequence(); + + FusedOpSequence fsequence = odag.getFusedOpSequence(); + FusedExecutor fexecuter(world, &fsequence); + fexecuter.execute(); + + + clkend = rtclock() - clkbegin; + if (world.rank() == 0) printf("Running Time: %f\n", clkend); + world.gop.fence(); + + + + if (world.rank() == 0) print ("====================================================="); + if (world.rank() == 0) print (" MADNESS "); + if (world.rank() == 0) print ("====================================================="); + + clkbegin = rtclock(); + + for (i=0; inorm2(); + double FuseTnorm1 = df1[FUNC_SIZE-1]->norm2(); + double FuseTnorm2 = df2[FUNC_SIZE-1]->norm2(); + + + if (world.rank() == 0) print("[Result MADNESS] Norm0 : ", MADnorm0 ); + if (world.rank() == 0) print("[Result Fuset] Norm0 : ", FuseTnorm0); + + if (world.rank() == 0) print("[Result MADNESS] Norm1 : ", MADnorm1); + if (world.rank() == 0) print("[Result Fuset] Norm1 : ", FuseTnorm1); + + if (world.rank() == 0) print("[Result MADNESS] Norm2 : ", MADnorm2); + if (world.rank() == 0) print("[Result Fuset] Norm2 : ", FuseTnorm2); + + finalize(); + return 0; +} + +double rtclock() +{ + struct timezone Tzp; + struct timeval Tp; + int stat; + stat = gettimeofday (&Tp, &Tzp); + if (stat != 0) + printf("Error return from gettimeofday: %d", stat); + return (Tp.tv_sec + Tp.tv_usec * 1.0e-6); +} + diff --git a/src/examples/FuseTExamples/SC2016/sc_m3.cc b/src/examples/FuseTExamples/SC2016/sc_m3.cc new file mode 100644 index 00000000000..a84ff756237 --- /dev/null +++ b/src/examples/FuseTExamples/SC2016/sc_m3.cc @@ -0,0 +1,408 @@ +/* + This file is part of MADNESS. + + Copyright (C) 2007,2010 Oak Ridge National Laboratory + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + For more information please contact: + + Robert J. Harrison + Oak Ridge National Laboratory + One Bethel Valley Road + P.O. Box 2008, MS-6367 + + email: harrisonrj@ornl.gov + tel: 865-241-3937 + fax: 865-572-0680 + + $Id$ +*/ +//#define WORLD_INSTANTIATE_STATIC_TEMPLATES +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace madness; + +static const double L = 20; // Half box size +static const long k = 8; // wavelet order +//static const double thresh = 1e-6; // precision // w/o diff. and 1e-12 -> 64 x 64 +static const double c = 2.0; // +static const double tstep = 0.1; +static const double alpha = 1.9; // Exponent +static const double VVV = 0.2; // Vp constant value + +#define PI 3.1415926535897932385 +#define LO 0.0000000000 +#define HI 4.0000000000 + +static double sin_amp = 1.0; +static double cos_amp = 1.0; +static double sin_freq = 1.0; +static double cos_freq = 1.0; +static double sigma_x = 1.0; +static double sigma_y = 1.0; +static double sigma_z = 1.0; +static double center_x = 0.0; +static double center_y = 0.0; +static double center_z = 0.0; +static double gaussian_amp = 1.0; +static double sigma_sq_x = sigma_x*sigma_x; +static double sigma_sq_y = sigma_y*sigma_y; +static double sigma_sq_z = sigma_z*sigma_z; + +double rtclock(); + +static double random_function(const coord_3d& r) { + const double x=r[0], y=r[1], z=r[2]; + + const double dx = x - center_x; + const double dy = y - center_y; + const double dz = z - center_z; + + const double periodic_part = sin_amp * sin(sin_freq*(dx+dy+dz)) + + cos_amp * cos(cos_freq*(dx+dy+dz)); + + const double x_comp = dx*dx/sigma_sq_x; + const double y_comp = dy*dy/sigma_sq_y; + const double z_comp = dz*dz/sigma_sq_z; + + const double gaussian_part = -gaussian_amp/exp(sqrt(x_comp+y_comp+z_comp)); + + return gaussian_part*gaussian_part; +} + +static double get_rand() { + double r3 = LO + static_cast(rand())/(static_cast(RAND_MAX/(HI-LO))); + return r3; +} + +static void randomizer() +{ + sin_amp = get_rand(); + cos_amp = get_rand(); + sin_freq = get_rand(); + cos_freq = get_rand(); + sigma_x = get_rand(); + sigma_y = get_rand(); + sigma_z = get_rand(); + center_x = get_rand()*L/(2.0*HI); + center_y = get_rand()*L/(2.0*HI); + center_z = get_rand()*L/(2.0*HI); + gaussian_amp = get_rand(); + sigma_sq_x = sigma_x*sigma_x; + sigma_sq_y = sigma_y*sigma_y; + sigma_sq_z = sigma_z*sigma_z; +} + +class alpha_functor : public FunctionFunctorInterface { +private: + double coeff; +public: + alpha_functor(double coeff=1.0) : coeff(coeff) {} + + virtual double operator()(const coord_3d& r) const { + const double x=r[0], y=r[1], z=r[2]; + return (coeff * (x*x + y*y + z*z) * sin(x*x + y*y + z*z)); + } +}; +// Exact solution at time t +class uexact : public FunctionFunctorInterface { + double t; +public: + uexact(double t) : t(t) {} + + double operator()(const coord_3d& r) const { + const double x=r[0], y=r[1], z=r[2]; + double rsq = (x*x+y*y+z*z); + + return exp(VVV*t)*exp(-rsq*alpha/(1.0+4.0*alpha*t*c)) * pow(alpha/((1+4*alpha*t*c)*constants::pi),1.5); + } +}; + +// Functor to compute exp(f) where f is a madness function +template +struct unaryexp { + void operator()(const Key& key, Tensor& t) const { + UNARY_OPTIMIZED_ITERATOR(T, t, *_p0 = exp(*_p0);); + } + template + void serialize(Archive& ar) {} +}; + +typedef Function functionT; +typedef std::vector vecfuncT; + +int main(int argc, char** argv) +{ + // input + // (1) M -- M and M functions + // (2) thresh -- threshold + // (3) max-refine -- max-refine-level + // (4) type -- 0: all, 1: FuseT, 2: vmra, 3: OpExecutor + + // m1. MatrixInner : MatrixInnerOp-DGEMM (OpExecutor) vs MatrixInner-MADNESS (vmra.h) vs MatrixInner using lots of InnerOp (FusedExecutor) vs Matrix Inner using funcimpl.inner (MADNESS) + int max_refine_level = 14; + double thresh = 1e-12; // precision // w/o diff. and 1e-12 -> 64 x 64 + int FUNC_SIZE = 16; + int FUNC_SIZE_M = 16; + int type = 0; + + if (argc == 5) + { + FUNC_SIZE = atoi(argv[1]); + FUNC_SIZE_M = FUNC_SIZE; + max_refine_level = atoi(argv[3]); + thresh = atof(argv[2]); + type = atoi(argv[4]); + } + + initialize(argc, argv); + World world(SafeMPI::COMM_WORLD); + + startup(world, argc, argv); + + if (world.rank() == 0) print ("===================================================="); + if (world.rank() == 0) printf(" Micro Benchmark #1 \n"); + if (world.rank() == 0) printf(" %d functions based on %d and %d random functions\n", FUNC_SIZE*FUNC_SIZE_M, FUNC_SIZE, FUNC_SIZE_M); + if (world.rank() == 0) printf(" threshold: %13.4g, max_refine_level: %d\n", thresh, max_refine_level); + if (world.rank() == 0) print ("===================================================="); + world.gop.fence(); + + // Setting FunctionDefaults + FunctionDefaults<3>::set_k(k); + FunctionDefaults<3>::set_thresh(thresh); + FunctionDefaults<3>::set_refine(true); + FunctionDefaults<3>::set_autorefine(false); + FunctionDefaults<3>::set_cubic_cell(-L, L); + FunctionDefaults<3>::set_max_refine_level(max_refine_level); + + + if (world.rank() == 0) print ("===================================================="); + if (world.rank() == 0) print ("== Initializing Functions ======================"); + if (world.rank() == 0) print ("===================================================="); + world.gop.fence(); + + // 2 * N Functions + real_function_3d h[FUNC_SIZE]; + real_function_3d g[FUNC_SIZE_M]; + real_function_3d output[FUNC_SIZE*FUNC_SIZE_M]; + + real_function_3d result_factory(world); + real_function_3d result(result_factory); + + int i, j; + double clkbegin, clkend; + clkbegin = rtclock(); + + for (i=0; i* matrix_inner_op = new MatrixInnerOp("MatrixInner", &result, fs, gs, false, true); + + OpExecutor exe(world); + exe.execute(matrix_inner_op, false); + + clkend = rtclock() - clkbegin; + if (world.rank() == 0) printf("Running Time: %f\n", clkend); + world.gop.fence(); + +/* + if (world.rank() == 0) + for (i=0; i_r)(i, j)); + } + world.gop.fence(); +*/ +// +// +// + + // m1. MatrixInner : MatrixInnerOp-DGEMM (OpExecutor) vs MatrixInner-MADNESS (vmra.h) vs MatrixInner using lots of InnerOp (FusedExecutor) vs Matrix Inner using funcimpl.inner (MADNESS) + if (world.rank() == 0) print ("===================================================="); + if (world.rank() == 0) print ("=== MatrixInner-MADNESS (vmra.h) ==================="); + if (world.rank() == 0) print ("===================================================="); + world.gop.fence(); + + vecfuncT v_f; + vecfuncT v_g; + + clkbegin = rtclock(); + for (i=0; i ghaly = matrix_inner(world, v_f, v_g); + + clkend = rtclock() - clkbegin; + if (world.rank() == 0) printf("Running Time: %f\n", clkend); + world.gop.fence(); + +/* + if (world.rank() == 0) + for (i=0; i* inner_op_ug[FUNC_SIZE*FUNC_SIZE_M/2][FUNC_SIZE_M*FUNC_SIZE/2]; + + for (i=0; i("Inner",&result,&output[i],&output[(FUNC_SIZE*FUNC_SIZE_M/2) + j]); + + vector*> sequence; + + for (i=0; i odag(sequence); + odag.processSequence(); + + FusedOpSequence fsequence = odag.getFusedOpSequence(); + FusedExecutor fexecuter(world, &fsequence); + fexecuter.execute(); + + clkend = rtclock() - clkbegin; + if (world.rank() == 0) printf("Running Time: %f\n", clkend); + world.gop.fence(); + +/* + if (world.rank() == 0) + for (i=0; i_sum); + } + } + world.gop.fence(); +*/ + + // m1. MatrixInner : MatrixInnerOp-DGEMM (OpExecutor) vs MatrixInner-MADNESS (vmra.h) vs MatrixInner using lots of InnerOp (FusedExecutor) vs Matrix Inner using funcimpl.inner (MADNESS) + if (world.rank() == 0) print ("===================================================="); + if (world.rank() == 0) print ("=== MatrixInner using funcimpl.inner (MADNESS) ====="); + if (world.rank() == 0) print ("===================================================="); + world.gop.fence(); + + double resultInner[FUNC_SIZE*FUNC_SIZE_M/2][FUNC_SIZE_M*FUNC_SIZE/2]; + + for (i=0; i +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +//#define FUSET_UNFUSED +#define FUSET_FUSED +#define MADNESS_VER + +using namespace madness; + +#define FUNC_SIZE_N 3 +#define FUNC_SIZE_M 3 +double rtclock(); + +static const double L = 20; // Half box size +static const long k = 8; // wavelet order +static const double thresh = 1e-6; // precision +static const double c = 2.0; // +static const double tstep = 0.1; +static const double alpha = 1.9; // Exponent +static const double VVV = 0.2; // Vp constant value + +// Initial Gaussian with exponent alpha +static double uinitial(const coord_3d& r) { + const double x=r[0], y=r[1], z=r[2]; + return exp(-alpha*(2*x*x+3.2*y*y+1.7*z*z))*pow(constants::pi/alpha,-1.5); +} + +static double uinitial2(const coord_3d& r) { + const double x=r[0], y=r[1], z=r[2]; + return exp(-alpha*(5*x*x+y*y+z*z))*pow(constants::pi/alpha,-1.5); +} + +static double Vp(const coord_3d& r) { + return VVV; +} + +// Exact solution at time t +class uexact : public FunctionFunctorInterface { + double t; +public: + uexact(double t) : t(t) {} + + double operator()(const coord_3d& r) const { + const double x=r[0], y=r[1], z=r[2]; + double rsq = (x*x+y*y+z*z); + + return exp(VVV*t)*exp(-rsq*alpha/(1.0+4.0*alpha*t*c)) * pow(alpha/((1+4*alpha*t*c)*constants::pi),1.5); + } +}; + +// Functor to compute exp(f) where f is a madness function +template +struct unaryexp { + void operator()(const Key& key, Tensor& t) const { + UNARY_OPTIMIZED_ITERATOR(T, t, *_p0 = exp(*_p0);); + } + template + void serialize(Archive& ar) {} +}; + +int main(int argc, char** argv) +{ + int i, j; + double clkbegin, clkend; + initialize(argc, argv); + World world(SafeMPI::COMM_WORLD); + + startup(world, argc, argv); + + FunctionDefaults<3>::set_k(k); + FunctionDefaults<3>::set_thresh(thresh); + FunctionDefaults<3>::set_refine(true); + FunctionDefaults<3>::set_autorefine(false); + FunctionDefaults<3>::set_cubic_cell(-L, L); + //FunctionDefaults<3>::set_max_refine_level(4); + + if (world.rank() == 0) print("Creating (N + M) functions"); + real_function_3d u[FUNC_SIZE_N]; + real_function_3d g[FUNC_SIZE_M]; + + for (i=0; i exe(world); + CompressOp* op_u[FUNC_SIZE_N]; + CompressOp* op_g[FUNC_SIZE_M]; + InnerOp* op_ug[FUNC_SIZE_N][FUNC_SIZE_M]; + + clkbegin = rtclock(); + + for (i=0; i("CompressOp",u_result[i], &u[i]); + for (i=0; i("CompressOp",g_result[i], &g[i]); + + for (i=0; i("InnerOp",ug_result[i][j],u_result[i],g_result[j]); + + for (i=0; i_sum); + world.gop.fence(); +*/ +#endif + + +#ifdef FUSET_FUSED + if (world.rank() == 0) print("================================================"); + if (world.rank() == 0) print("By FuseT, FUSED"); + if (world.rank() == 0) print("================================================"); + + CompressOp* op_u_fused[FUNC_SIZE_N]; + CompressOp* op_g_fused[FUNC_SIZE_M]; + InnerOp* op_ug_fused[FUNC_SIZE_N][FUNC_SIZE_M]; + vector*> sequence; + FuseT* odag; + + for (i=0; i("CompressOp-u",u_result_fused[i], &u[i]); + for (i=0; i("CompressOp-g",g_result_fused[i], &g[i]); + + for (i=0; i("InnerOp",ug_result_fused[i][j],u_result_fused[i],g_result_fused[j]); + + for (i=0; i(sequence); + odag->processSequence(); + + FusedOpSequence fsequence = odag->getFusedOpSequence(); + FusedExecutor fexecutor(world, &fsequence); + fexecutor.execute(); + world.gop.fence(); + + clkend = rtclock() - clkbegin; + if (world.rank() == 0) print ("================================================"); + if (world.rank() == 0) printf("Running Time: %f\n", clkend); + if (world.rank() == 0) print ("================================================"); + world.gop.fence(); +/* + if (world.rank() == 0) print("After Inner-Product Operators - u*g"); + for (i=0; i_sum); + world.gop.fence(); +*/ +#endif + + + +#ifdef MADNESS_VER + if (world.rank() == 0) print("================================================"); + if (world.rank() == 0) print("By MADNESS"); + if (world.rank() == 0) print("================================================"); + + double omg[FUNC_SIZE_N][FUNC_SIZE_M]; + + clkbegin = rtclock(); + + for (i=0; i +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define FUSET_UNFUSED +#define FUSET_FUSED +#define MADNESS_VER +#define SIZE_FUNC 8 + +using namespace madness; + +double rtclock(); + +static const double L = 20; // Half box size +static const long k = 8; // wavelet order +static const double thresh = 1e-6; // precision +//static const double thresh = 1e-12; // precision +static const double c = 2.0; // +static const double tstep = 0.1; +static const double alpha = 1.9; // Exponent +static const double VVV = 0.2; // Vp constant value + +// Initial Gaussian with exponent alpha +static double uinitial(const coord_3d& r) { + const double x=r[0], y=r[1], z=r[2]; + return exp(-alpha*(2*x*x+3.2*y*y+1.7*z*z))*pow(constants::pi/alpha,-1.5); +}; + +static double uinitial2(const coord_3d& r) { + const double x=r[0], y=r[1], z=r[2]; + return exp(-alpha*(5*x*x+y*y+z*z))*pow(constants::pi/alpha,-1.5); +}; + +static double alpha_func_rand(const coord_3d& r) { + const double x=r[0], y=r[1], z=r[2]; + return ( (2*x*x + 3*y*y + 4*z*z) * sin(x*x + y*y + z*z) ); +}; + +static double alpha_func(const coord_3d& r) { + const double x=r[0], y=r[1], z=r[2]; + return ( (x*x + y*y + z*z) * sin(x*x + y*y + z*z) ); +}; + +static double beta_func(const coord_3d& r) { + const double x=r[0], y=r[1], z=r[2]; + return (exp(- x*x - y*y - z*z)); +}; + +static double Vp(const coord_3d& r) { + return VVV; +}; + +// Exact solution at time t +class uexact : public FunctionFunctorInterface { + double t; +public: + uexact(double t) : t(t) {} + + double operator()(const coord_3d& r) const { + const double x=r[0], y=r[1], z=r[2]; + double rsq = (x*x+y*y+z*z); + + return exp(VVV*t)*exp(-rsq*alpha/(1.0+4.0*alpha*t*c)) * pow(alpha/((1+4*alpha*t*c)*constants::pi),1.5); + } +}; + +// Functor to compute exp(f) where f is a madness function +template +struct unaryexp { + void operator()(const Key& key, Tensor& t) const { + UNARY_OPTIMIZED_ITERATOR(T, t, *_p0 = exp(*_p0);); + } + template + void serialize(Archive& ar) {} +}; + +int main(int argc, char** argv) +{ + int i, j; + double clkbegin, clkend; + + initialize(argc, argv); + World world(SafeMPI::COMM_WORLD); + + startup(world, argc, argv); + + FunctionDefaults<3>::set_k(k); + FunctionDefaults<3>::set_thresh(thresh); + FunctionDefaults<3>::set_refine(true); + FunctionDefaults<3>::set_autorefine(false); + FunctionDefaults<3>::set_cubic_cell(-L, L); + // FunctionDefaults<3>::set_max_refine_level(4); + + if (world.rank() == 0) print("================================================"); + if (world.rank() == 0) print(" Initializing Functions"); + if (world.rank() == 0) print("================================================"); + + real_function_3d f_0 = real_factory_3d(world).f(uinitial); + real_function_3d f_1 = real_factory_3d(world).f(uinitial2); + real_function_3d f_2 = real_factory_3d(world).f(uinitial); + real_function_3d g_0 = real_factory_3d(world).f(uinitial2); + real_function_3d g_1 = real_factory_3d(world).f(uinitial); + real_function_3d g_2 = real_factory_3d(world).f(uinitial2); + + f_0._treeName = "f_0"; + f_1._treeName = "f_1"; + f_2._treeName = "f_2"; + g_0._treeName = "g_0"; + g_1._treeName = "g_1"; + g_2._treeName = "g_2"; + + f_0.truncate(); + f_1.truncate(); + f_2.truncate(); + g_0.truncate(); + g_1.truncate(); + g_2.truncate(); + + real_function_3d rf_0_factory = real_factory_3d(world); + real_function_3d rf_0(rf_0_factory); + real_function_3d rf_1_factory = real_factory_3d(world); + real_function_3d rf_1(rf_1_factory); + real_function_3d rf_2_factory = real_factory_3d(world); + real_function_3d rf_2(rf_2_factory); + real_function_3d rg_0_factory = real_factory_3d(world); + real_function_3d rg_0(rg_0_factory); + real_function_3d rg_1_factory = real_factory_3d(world); + real_function_3d rg_1(rg_1_factory); + real_function_3d rg_2_factory = real_factory_3d(world); + real_function_3d rg_2(rg_2_factory); + + real_function_3d rfg_0_factory = real_factory_3d(world); + real_function_3d rfg_0(rfg_0_factory); + real_function_3d rfg_1_factory = real_factory_3d(world); + real_function_3d rfg_1(rfg_1_factory); + real_function_3d rfg_2_factory = real_factory_3d(world); + real_function_3d rfg_2(rfg_2_factory); + real_function_3d rfg_3_factory = real_factory_3d(world); + real_function_3d rfg_3(rfg_3_factory); + real_function_3d rfg_4_factory = real_factory_3d(world); + real_function_3d rfg_4(rfg_4_factory); + real_function_3d rfg_5_factory = real_factory_3d(world); + real_function_3d rfg_5(rfg_5_factory); + real_function_3d rfg_6_factory = real_factory_3d(world); + real_function_3d rfg_6(rfg_6_factory); + real_function_3d rfg_7_factory = real_factory_3d(world); + real_function_3d rfg_7(rfg_7_factory); + real_function_3d rfg_8_factory = real_factory_3d(world); + real_function_3d rfg_8(rfg_8_factory); + + // Creating Operators + CompressOp op_compress_f_0("Compress", &rf_0, &f_0); + CompressOp op_compress_f_1("Compress", &rf_1, &f_1); + CompressOp op_compress_f_2("Compress", &rf_2, &f_2); + CompressOp op_compress_g_0("Compress", &rg_0, &g_0); + CompressOp op_compress_g_1("Compress", &rg_1, &g_1); + CompressOp op_compress_g_2("Compress", &rg_2, &g_2); + + InnerOp op_inner_f_g_0_0("Inner", &rfg_0, &rf_0, &rg_0); + InnerOp op_inner_f_g_0_1("Inner", &rfg_1, &rf_0, &rg_1); + InnerOp op_inner_f_g_0_2("Inner", &rfg_2, &rf_0, &rg_2); + InnerOp op_inner_f_g_1_0("Inner", &rfg_3, &rf_1, &rg_0); + InnerOp op_inner_f_g_1_1("Inner", &rfg_4, &rf_1, &rg_1); + InnerOp op_inner_f_g_1_2("Inner", &rfg_5, &rf_1, &rg_2); + InnerOp op_inner_f_g_2_0("Inner", &rfg_6, &rf_2, &rg_0); + InnerOp op_inner_f_g_2_1("Inner", &rfg_7, &rf_2, &rg_1); + InnerOp op_inner_f_g_2_2("Inner", &rfg_8, &rf_2, &rg_2); + + // OpExecutor!!!!!! + OpExecutor exe(world); + if (world.rank() == 0) print("================================================"); + if (world.rank() == 0) print("== By FuseT, UNFUSED"); + if (world.rank() == 0) print("================================================"); + clkbegin = rtclock(); + + // Compress + if (world.rank() == 0) print("== [Running Compress Operations] ========================"); + world.gop.fence(); + + exe.execute(&op_compress_f_0,true); + exe.execute(&op_compress_f_1,true); + exe.execute(&op_compress_f_2,true); + exe.execute(&op_compress_g_0,true); + exe.execute(&op_compress_g_1,true); + exe.execute(&op_compress_g_2,true); + + // Inner + if (world.rank() == 0) print("== [Running Inner Operations] ========================"); + world.gop.fence(); + + exe.execute(&op_inner_f_g_0_0,true); + exe.execute(&op_inner_f_g_0_1,true); + exe.execute(&op_inner_f_g_0_2,true); + exe.execute(&op_inner_f_g_1_0,true); + exe.execute(&op_inner_f_g_1_1,true); + exe.execute(&op_inner_f_g_1_2,true); + exe.execute(&op_inner_f_g_2_0,true); + exe.execute(&op_inner_f_g_2_1,true); + exe.execute(&op_inner_f_g_2_2,true); + + // + // + // + clkend = rtclock() - clkbegin; + if (world.rank() == 0) printf("Running Time: %f\n", clkend); + + if (world.rank() == 0) + { + printf ("Inner-Product [%d] = %f\n", 0, op_inner_f_g_0_0._sum); + printf ("Inner-Product [%d] = %f\n", 1, op_inner_f_g_0_1._sum); + printf ("Inner-Product [%d] = %f\n", 2, op_inner_f_g_0_2._sum); + printf ("Inner-Product [%d] = %f\n", 3, op_inner_f_g_1_0._sum); + printf ("Inner-Product [%d] = %f\n", 4, op_inner_f_g_1_1._sum); + printf ("Inner-Product [%d] = %f\n", 5, op_inner_f_g_1_2._sum); + printf ("Inner-Product [%d] = %f\n", 6, op_inner_f_g_2_0._sum); + printf ("Inner-Product [%d] = %f\n", 7, op_inner_f_g_2_1._sum); + printf ("Inner-Product [%d] = %f\n", 8, op_inner_f_g_2_2._sum); + } + +// +// +// +// +// + if (world.rank() == 0) print("================================================"); + if (world.rank() == 0) print("By FuseT, FUSED"); + if (world.rank() == 0) print("================================================"); + world.gop.fence(); + + vector*> sequence; + + sequence.push_back(&op_compress_f_0); + sequence.push_back(&op_compress_f_1); + sequence.push_back(&op_compress_f_2); + sequence.push_back(&op_compress_g_0); + sequence.push_back(&op_compress_g_1); + sequence.push_back(&op_compress_g_2); + + sequence.push_back(&op_inner_f_g_0_0); + sequence.push_back(&op_inner_f_g_0_1); + sequence.push_back(&op_inner_f_g_0_2); + sequence.push_back(&op_inner_f_g_1_0); + sequence.push_back(&op_inner_f_g_1_1); + sequence.push_back(&op_inner_f_g_1_2); + sequence.push_back(&op_inner_f_g_2_0); + sequence.push_back(&op_inner_f_g_2_1); + sequence.push_back(&op_inner_f_g_2_2); + + op_inner_f_g_0_0._sum = 0.0; + op_inner_f_g_0_1._sum = 0.0; + op_inner_f_g_0_2._sum = 0.0; + op_inner_f_g_1_0._sum = 0.0; + op_inner_f_g_1_1._sum = 0.0; + op_inner_f_g_1_2._sum = 0.0; + op_inner_f_g_2_0._sum = 0.0; + op_inner_f_g_2_1._sum = 0.0; + op_inner_f_g_2_2._sum = 0.0; + + if (world.rank() == 0) print("== [Running Operations] ================================"); + world.gop.fence(); + clkbegin = rtclock(); + + FuseT odag(sequence); + odag.processSequence(); + + FusedOpSequence fsequence = odag.getFusedOpSequence(); + FusedExecutor fexecuter(world, &fsequence); + + fexecuter.execute(); + + clkend = rtclock() - clkbegin; + if (world.rank() == 0) printf("Running Time: %f\n", clkend); + + if (world.rank() == 0) + { + printf ("Inner-Product [%d] = %f\n", 0, op_inner_f_g_0_0._sum); + printf ("Inner-Product [%d] = %f\n", 1, op_inner_f_g_0_1._sum); + printf ("Inner-Product [%d] = %f\n", 2, op_inner_f_g_0_2._sum); + printf ("Inner-Product [%d] = %f\n", 3, op_inner_f_g_1_0._sum); + printf ("Inner-Product [%d] = %f\n", 4, op_inner_f_g_1_1._sum); + printf ("Inner-Product [%d] = %f\n", 5, op_inner_f_g_1_2._sum); + printf ("Inner-Product [%d] = %f\n", 6, op_inner_f_g_2_0._sum); + printf ("Inner-Product [%d] = %f\n", 7, op_inner_f_g_2_1._sum); + printf ("Inner-Product [%d] = %f\n", 8, op_inner_f_g_2_2._sum); + } + +// +// +// +// +// + if (world.rank() == 0) print("================================================"); + if (world.rank() == 0) print("By MADNESS"); + if (world.rank() == 0) print("================================================"); + + if (world.rank() == 0) print("== [Running Operations] ================================"); + world.gop.fence(); + clkbegin = rtclock(); + f_0.compress(); + f_1.compress(); + f_2.compress(); + g_0.compress(); + g_1.compress(); + g_2.compress(); + + double MAD_results[9]; + + MAD_results[0] = f_0.inner(g_0); + MAD_results[1] = f_0.inner(g_1); + MAD_results[2] = f_0.inner(g_2); + MAD_results[3] = f_1.inner(g_0); + MAD_results[4] = f_1.inner(g_1); + MAD_results[5] = f_1.inner(g_2); + MAD_results[6] = f_2.inner(g_0); + MAD_results[7] = f_2.inner(g_1); + MAD_results[8] = f_2.inner(g_2); + + clkend = rtclock() - clkbegin; + if (world.rank() == 0) printf("Running Time: %f\n", clkend); + if (world.rank() == 0) + { + for (i=0; i<9; i++) + printf ("Inner-Product [%d] = %f\n", i, MAD_results[i]); + } + + world.gop.fence(); + finalize(); + return 0; +} + +double rtclock() +{ + struct timezone Tzp; + struct timeval Tp; + int stat; + stat = gettimeofday (&Tp, &Tzp); + if (stat != 0) + printf("Error return from gettimeofday: %d", stat); + return (Tp.tv_sec + Tp.tv_usec * 1.0e-6); +} + diff --git a/src/examples/FuseTExamples/Test_FusedEx_only_Fused.cc b/src/examples/FuseTExamples/Test_FusedEx_only_Fused.cc new file mode 100644 index 00000000000..c8d11d94266 --- /dev/null +++ b/src/examples/FuseTExamples/Test_FusedEx_only_Fused.cc @@ -0,0 +1,283 @@ +/* + This file is part of MADNESS. + + Copyright (C) 2007,2010 Oak Ridge National Laboratory + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + For more information please contact: + + Robert J. Harrison + Oak Ridge National Laboratory + One Bethel Valley Road + P.O. Box 2008, MS-6367 + + email: harrisonrj@ornl.gov + tel: 865-241-3937 + fax: 865-572-0680 + + $Id$ +*/ +//#define WORLD_INSTANTIATE_STATIC_TEMPLATES +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define FUSET_UNFUSED +#define FUSET_FUSED +#define MADNESS_VER +#define SIZE_FUNC 8 + +using namespace madness; + +double rtclock(); + +static const double L = 20; // Half box size +static const long k = 8; // wavelet order +//static const double thresh = 1e-6; // precision +static const double thresh = 1e-12; // precision +static const double c = 2.0; // +static const double tstep = 0.1; +static const double alpha = 1.9; // Exponent +static const double VVV = 0.2; // Vp constant value + +// Initial Gaussian with exponent alpha +static double uinitial(const coord_3d& r) { + const double x=r[0], y=r[1], z=r[2]; + return exp(-alpha*(2*x*x+3.2*y*y+1.7*z*z))*pow(constants::pi/alpha,-1.5); +} + +static double uinitial2(const coord_3d& r) { + const double x=r[0], y=r[1], z=r[2]; + return exp(-alpha*(5*x*x+y*y+z*z))*pow(constants::pi/alpha,-1.5); +} + +static double Vp(const coord_3d& r) { + return VVV; +} + +// Exact solution at time t +class uexact : public FunctionFunctorInterface { + double t; +public: + uexact(double t) : t(t) {} + + double operator()(const coord_3d& r) const { + const double x=r[0], y=r[1], z=r[2]; + double rsq = (x*x+y*y+z*z); + + return exp(VVV*t)*exp(-rsq*alpha/(1.0+4.0*alpha*t*c)) * pow(alpha/((1+4*alpha*t*c)*constants::pi),1.5); + } +}; + +// Functor to compute exp(f) where f is a madness function +template +struct unaryexp { + void operator()(const Key& key, Tensor& t) const { + UNARY_OPTIMIZED_ITERATOR(T, t, *_p0 = exp(*_p0);); + } + template + void serialize(Archive& ar) {} +}; + +int main(int argc, char** argv) +{ + int i, j; + double clkbegin, clkend; + + initialize(argc, argv); + World world(SafeMPI::COMM_WORLD); + + startup(world, argc, argv); + + FunctionDefaults<3>::set_k(k); + FunctionDefaults<3>::set_thresh(thresh); + FunctionDefaults<3>::set_refine(true); + FunctionDefaults<3>::set_autorefine(false); + FunctionDefaults<3>::set_cubic_cell(-L, L); + // FunctionDefaults<3>::set_max_refine_level(4); + + if (world.rank() == 0) print("================================================"); + if (world.rank() == 0) print(" Initializing Functions"); + if (world.rank() == 0) print("================================================"); + + real_function_3d f_0 = real_factory_3d(world).f(uinitial); + real_function_3d f_1 = real_factory_3d(world).f(uinitial2); + real_function_3d f_2 = real_factory_3d(world).f(uinitial); + real_function_3d g_0 = real_factory_3d(world).f(uinitial2); + real_function_3d g_1 = real_factory_3d(world).f(uinitial); + real_function_3d g_2 = real_factory_3d(world).f(uinitial2); + + f_0._treeName = "f_0"; + f_1._treeName = "f_1"; + f_2._treeName = "f_2"; + g_0._treeName = "g_0"; + g_1._treeName = "g_1"; + g_2._treeName = "g_2"; + + f_0.truncate(); + f_1.truncate(); + f_2.truncate(); + g_0.truncate(); + g_1.truncate(); + g_2.truncate(); + + real_function_3d rf_0_factory = real_factory_3d(world); + real_function_3d rf_0(rf_0_factory); + real_function_3d rf_1_factory = real_factory_3d(world); + real_function_3d rf_1(rf_1_factory); + real_function_3d rf_2_factory = real_factory_3d(world); + real_function_3d rf_2(rf_2_factory); + real_function_3d rg_0_factory = real_factory_3d(world); + real_function_3d rg_0(rg_0_factory); + real_function_3d rg_1_factory = real_factory_3d(world); + real_function_3d rg_1(rg_1_factory); + real_function_3d rg_2_factory = real_factory_3d(world); + real_function_3d rg_2(rg_2_factory); + + real_function_3d rfg_0_factory = real_factory_3d(world); + real_function_3d rfg_0(rfg_0_factory); + real_function_3d rfg_1_factory = real_factory_3d(world); + real_function_3d rfg_1(rfg_1_factory); + real_function_3d rfg_2_factory = real_factory_3d(world); + real_function_3d rfg_2(rfg_2_factory); + real_function_3d rfg_3_factory = real_factory_3d(world); + real_function_3d rfg_3(rfg_3_factory); + real_function_3d rfg_4_factory = real_factory_3d(world); + real_function_3d rfg_4(rfg_4_factory); + real_function_3d rfg_5_factory = real_factory_3d(world); + real_function_3d rfg_5(rfg_5_factory); + real_function_3d rfg_6_factory = real_factory_3d(world); + real_function_3d rfg_6(rfg_6_factory); + real_function_3d rfg_7_factory = real_factory_3d(world); + real_function_3d rfg_7(rfg_7_factory); + real_function_3d rfg_8_factory = real_factory_3d(world); + real_function_3d rfg_8(rfg_8_factory); + + // Creating Operators + CompressOp op_compress_f_0("Compress", &rf_0, &f_0); + CompressOp op_compress_f_1("Compress", &rf_1, &f_1); + CompressOp op_compress_f_2("Compress", &rf_2, &f_2); + CompressOp op_compress_g_0("Compress", &rg_0, &g_0); + CompressOp op_compress_g_1("Compress", &rg_1, &g_1); + CompressOp op_compress_g_2("Compress", &rg_2, &g_2); + + InnerOp op_inner_f_g_0_0("Inner", &rfg_0, &rf_0, &rg_0); + InnerOp op_inner_f_g_0_1("Inner", &rfg_1, &rf_0, &rg_1); + InnerOp op_inner_f_g_0_2("Inner", &rfg_2, &rf_0, &rg_2); + InnerOp op_inner_f_g_1_0("Inner", &rfg_3, &rf_1, &rg_0); + InnerOp op_inner_f_g_1_1("Inner", &rfg_4, &rf_1, &rg_1); + InnerOp op_inner_f_g_1_2("Inner", &rfg_5, &rf_1, &rg_2); + InnerOp op_inner_f_g_2_0("Inner", &rfg_6, &rf_2, &rg_0); + InnerOp op_inner_f_g_2_1("Inner", &rfg_7, &rf_2, &rg_1); + InnerOp op_inner_f_g_2_2("Inner", &rfg_8, &rf_2, &rg_2); + +// +// +// +// +// + if (world.rank() == 0) print("================================================"); + if (world.rank() == 0) print("By FuseT, FUSED"); + if (world.rank() == 0) print("================================================"); + world.gop.fence(); + + vector*> sequence; + + sequence.push_back(&op_compress_f_0); + sequence.push_back(&op_compress_f_1); + sequence.push_back(&op_compress_f_2); + sequence.push_back(&op_compress_g_0); + sequence.push_back(&op_compress_g_1); + sequence.push_back(&op_compress_g_2); + + sequence.push_back(&op_inner_f_g_0_0); + sequence.push_back(&op_inner_f_g_0_1); + sequence.push_back(&op_inner_f_g_0_2); + sequence.push_back(&op_inner_f_g_1_0); + sequence.push_back(&op_inner_f_g_1_1); + sequence.push_back(&op_inner_f_g_1_2); + sequence.push_back(&op_inner_f_g_2_0); + sequence.push_back(&op_inner_f_g_2_1); + sequence.push_back(&op_inner_f_g_2_2); + + op_inner_f_g_0_0._sum = 0.0; + op_inner_f_g_0_1._sum = 0.0; + op_inner_f_g_0_2._sum = 0.0; + op_inner_f_g_1_0._sum = 0.0; + op_inner_f_g_1_1._sum = 0.0; + op_inner_f_g_1_2._sum = 0.0; + op_inner_f_g_2_0._sum = 0.0; + op_inner_f_g_2_1._sum = 0.0; + op_inner_f_g_2_2._sum = 0.0; + + if (world.rank() == 0) print("== [Running Operations] ================================"); + world.gop.fence(); + clkbegin = rtclock(); + + FuseT odag(sequence); + odag.processSequence(); + + FusedOpSequence fsequence = odag.getFusedOpSequence(); + FusedExecutor fexecuter(world, &fsequence); + + fexecuter.execute(); + + clkend = rtclock() - clkbegin; + if (world.rank() == 0) printf("Running Time: %f\n", clkend); + + if (world.rank() == 0) + { + printf ("Inner-Product [%d] = %f\n", 0, op_inner_f_g_0_0._sum); + printf ("Inner-Product [%d] = %f\n", 1, op_inner_f_g_0_1._sum); + printf ("Inner-Product [%d] = %f\n", 2, op_inner_f_g_0_2._sum); + printf ("Inner-Product [%d] = %f\n", 3, op_inner_f_g_1_0._sum); + printf ("Inner-Product [%d] = %f\n", 4, op_inner_f_g_1_1._sum); + printf ("Inner-Product [%d] = %f\n", 5, op_inner_f_g_1_2._sum); + printf ("Inner-Product [%d] = %f\n", 6, op_inner_f_g_2_0._sum); + printf ("Inner-Product [%d] = %f\n", 7, op_inner_f_g_2_1._sum); + printf ("Inner-Product [%d] = %f\n", 8, op_inner_f_g_2_2._sum); + } +// +// +// +// +// + world.gop.fence(); + finalize(); + return 0; +} + +double rtclock() +{ + struct timezone Tzp; + struct timeval Tp; + int stat; + stat = gettimeofday (&Tp, &Tzp); + if (stat != 0) + printf("Error return from gettimeofday: %d", stat); + return (Tp.tv_sec + Tp.tv_usec * 1.0e-6); +} + diff --git a/src/examples/FuseTExamples/Test_FusedEx_only_MADNESS.cc b/src/examples/FuseTExamples/Test_FusedEx_only_MADNESS.cc new file mode 100644 index 00000000000..79ca6a668d1 --- /dev/null +++ b/src/examples/FuseTExamples/Test_FusedEx_only_MADNESS.cc @@ -0,0 +1,201 @@ +/* + This file is part of MADNESS. + + Copyright (C) 2007,2010 Oak Ridge National Laboratory + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + For more information please contact: + + Robert J. Harrison + Oak Ridge National Laboratory + One Bethel Valley Road + P.O. Box 2008, MS-6367 + + email: harrisonrj@ornl.gov + tel: 865-241-3937 + fax: 865-572-0680 + + $Id$ +*/ +//#define WORLD_INSTANTIATE_STATIC_TEMPLATES +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define FUSET_UNFUSED +#define FUSET_FUSED +#define MADNESS_VER +#define SIZE_FUNC 8 + +using namespace madness; + +double rtclock(); + +static const double L = 20; // Half box size +static const long k = 8; // wavelet order +//static const double thresh = 1e-6; // precision +static const double thresh = 1e-12; // precision +static const double c = 2.0; // +static const double tstep = 0.1; +static const double alpha = 1.9; // Exponent +static const double VVV = 0.2; // Vp constant value + +// Initial Gaussian with exponent alpha +static double uinitial(const coord_3d& r) { + const double x=r[0], y=r[1], z=r[2]; + return exp(-alpha*(2*x*x+3.2*y*y+1.7*z*z))*pow(constants::pi/alpha,-1.5); +} + +static double uinitial2(const coord_3d& r) { + const double x=r[0], y=r[1], z=r[2]; + return exp(-alpha*(5*x*x+y*y+z*z))*pow(constants::pi/alpha,-1.5); +} + +static double Vp(const coord_3d& r) { + return VVV; +} + +// Exact solution at time t +class uexact : public FunctionFunctorInterface { + double t; +public: + uexact(double t) : t(t) {} + + double operator()(const coord_3d& r) const { + const double x=r[0], y=r[1], z=r[2]; + double rsq = (x*x+y*y+z*z); + + return exp(VVV*t)*exp(-rsq*alpha/(1.0+4.0*alpha*t*c)) * pow(alpha/((1+4*alpha*t*c)*constants::pi),1.5); + } +}; + +// Functor to compute exp(f) where f is a madness function +template +struct unaryexp { + void operator()(const Key& key, Tensor& t) const { + UNARY_OPTIMIZED_ITERATOR(T, t, *_p0 = exp(*_p0);); + } + template + void serialize(Archive& ar) {} +}; + +int main(int argc, char** argv) +{ + int i, j; + double clkbegin, clkend; + + initialize(argc, argv); + World world(SafeMPI::COMM_WORLD); + + startup(world, argc, argv); + + FunctionDefaults<3>::set_k(k); + FunctionDefaults<3>::set_thresh(thresh); + FunctionDefaults<3>::set_refine(true); + FunctionDefaults<3>::set_autorefine(false); + FunctionDefaults<3>::set_cubic_cell(-L, L); + // FunctionDefaults<3>::set_max_refine_level(4); + + if (world.rank() == 0) print("================================================"); + if (world.rank() == 0) print(" Initializing Functions"); + if (world.rank() == 0) print("================================================"); + + real_function_3d f_0 = real_factory_3d(world).f(uinitial); + real_function_3d f_1 = real_factory_3d(world).f(uinitial2); + real_function_3d f_2 = real_factory_3d(world).f(uinitial); + real_function_3d g_0 = real_factory_3d(world).f(uinitial2); + real_function_3d g_1 = real_factory_3d(world).f(uinitial); + real_function_3d g_2 = real_factory_3d(world).f(uinitial2); + + f_0._treeName = "f_0"; + f_1._treeName = "f_1"; + f_2._treeName = "f_2"; + g_0._treeName = "g_0"; + g_1._treeName = "g_1"; + g_2._treeName = "g_2"; + + f_0.truncate(); + f_1.truncate(); + f_2.truncate(); + g_0.truncate(); + g_1.truncate(); + g_2.truncate(); + +// +// +// +// +// + if (world.rank() == 0) print("================================================"); + if (world.rank() == 0) print("By MADNESS"); + if (world.rank() == 0) print("================================================"); + + if (world.rank() == 0) print("== [Running Operations] ================================"); + world.gop.fence(); + clkbegin = rtclock(); + f_0.compress(); + f_1.compress(); + f_2.compress(); + g_0.compress(); + g_1.compress(); + g_2.compress(); + + double MAD_results[9]; + + MAD_results[0] = f_0.inner(g_0); + MAD_results[1] = f_0.inner(g_1); + MAD_results[2] = f_0.inner(g_2); + MAD_results[3] = f_1.inner(g_0); + MAD_results[4] = f_1.inner(g_1); + MAD_results[5] = f_1.inner(g_2); + MAD_results[6] = f_2.inner(g_0); + MAD_results[7] = f_2.inner(g_1); + MAD_results[8] = f_2.inner(g_2); + + clkend = rtclock() - clkbegin; + if (world.rank() == 0) printf("Running Time: %f\n", clkend); + if (world.rank() == 0) + { + for (i=0; i<9; i++) + printf ("Inner-Product [%d] = %f\n", i, MAD_results[i]); + } + + world.gop.fence(); + finalize(); + return 0; +} + +double rtclock() +{ + struct timezone Tzp; + struct timeval Tp; + int stat; + stat = gettimeofday (&Tp, &Tzp); + if (stat != 0) + printf("Error return from gettimeofday: %d", stat); + return (Tp.tv_sec + Tp.tv_usec * 1.0e-6); +} + diff --git a/src/examples/FuseTExamples/Test_FusedEx_only_Unfused.cc b/src/examples/FuseTExamples/Test_FusedEx_only_Unfused.cc new file mode 100644 index 00000000000..eedd2c41504 --- /dev/null +++ b/src/examples/FuseTExamples/Test_FusedEx_only_Unfused.cc @@ -0,0 +1,274 @@ +/* + This file is part of MADNESS. + + Copyright (C) 2007,2010 Oak Ridge National Laboratory + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + For more information please contact: + + Robert J. Harrison + Oak Ridge National Laboratory + One Bethel Valley Road + P.O. Box 2008, MS-6367 + + email: harrisonrj@ornl.gov + tel: 865-241-3937 + fax: 865-572-0680 + + $Id$ +*/ +//#define WORLD_INSTANTIATE_STATIC_TEMPLATES +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define FUSET_UNFUSED +#define FUSET_FUSED +#define MADNESS_VER +#define SIZE_FUNC 8 + +using namespace madness; + +double rtclock(); + +static const double L = 20; // Half box size +static const long k = 8; // wavelet order +//static const double thresh = 1e-6; // precision +static const double thresh = 1e-12; // precision +static const double c = 2.0; // +static const double tstep = 0.1; +static const double alpha = 1.9; // Exponent +static const double VVV = 0.2; // Vp constant value + +// Initial Gaussian with exponent alpha +static double uinitial(const coord_3d& r) { + const double x=r[0], y=r[1], z=r[2]; + return exp(-alpha*(2*x*x+3.2*y*y+1.7*z*z))*pow(constants::pi/alpha,-1.5); +} + +static double uinitial2(const coord_3d& r) { + const double x=r[0], y=r[1], z=r[2]; + return exp(-alpha*(5*x*x+y*y+z*z))*pow(constants::pi/alpha,-1.5); +} + +static double Vp(const coord_3d& r) { + return VVV; +} + +// Exact solution at time t +class uexact : public FunctionFunctorInterface { + double t; +public: + uexact(double t) : t(t) {} + + double operator()(const coord_3d& r) const { + const double x=r[0], y=r[1], z=r[2]; + double rsq = (x*x+y*y+z*z); + + return exp(VVV*t)*exp(-rsq*alpha/(1.0+4.0*alpha*t*c)) * pow(alpha/((1+4*alpha*t*c)*constants::pi),1.5); + } +}; + +// Functor to compute exp(f) where f is a madness function +template +struct unaryexp { + void operator()(const Key& key, Tensor& t) const { + UNARY_OPTIMIZED_ITERATOR(T, t, *_p0 = exp(*_p0);); + } + template + void serialize(Archive& ar) {} +}; + +int main(int argc, char** argv) +{ + int i, j; + double clkbegin, clkend; + + initialize(argc, argv); + World world(SafeMPI::COMM_WORLD); + + startup(world, argc, argv); + + FunctionDefaults<3>::set_k(k); + FunctionDefaults<3>::set_thresh(thresh); + FunctionDefaults<3>::set_refine(true); + FunctionDefaults<3>::set_autorefine(false); + FunctionDefaults<3>::set_cubic_cell(-L, L); + // FunctionDefaults<3>::set_max_refine_level(4); + + if (world.rank() == 0) print("================================================"); + if (world.rank() == 0) print(" Initializing Functions"); + if (world.rank() == 0) print("================================================"); + + real_function_3d f_0 = real_factory_3d(world).f(uinitial); + real_function_3d f_1 = real_factory_3d(world).f(uinitial2); + real_function_3d f_2 = real_factory_3d(world).f(uinitial); + real_function_3d g_0 = real_factory_3d(world).f(uinitial2); + real_function_3d g_1 = real_factory_3d(world).f(uinitial); + real_function_3d g_2 = real_factory_3d(world).f(uinitial2); + + f_0._treeName = "f_0"; + f_1._treeName = "f_1"; + f_2._treeName = "f_2"; + g_0._treeName = "g_0"; + g_1._treeName = "g_1"; + g_2._treeName = "g_2"; + + f_0.truncate(); + f_1.truncate(); + f_2.truncate(); + g_0.truncate(); + g_1.truncate(); + g_2.truncate(); + + real_function_3d rf_0_factory = real_factory_3d(world); + real_function_3d rf_0(rf_0_factory); + real_function_3d rf_1_factory = real_factory_3d(world); + real_function_3d rf_1(rf_1_factory); + real_function_3d rf_2_factory = real_factory_3d(world); + real_function_3d rf_2(rf_2_factory); + real_function_3d rg_0_factory = real_factory_3d(world); + real_function_3d rg_0(rg_0_factory); + real_function_3d rg_1_factory = real_factory_3d(world); + real_function_3d rg_1(rg_1_factory); + real_function_3d rg_2_factory = real_factory_3d(world); + real_function_3d rg_2(rg_2_factory); + + real_function_3d rfg_0_factory = real_factory_3d(world); + real_function_3d rfg_0(rfg_0_factory); + real_function_3d rfg_1_factory = real_factory_3d(world); + real_function_3d rfg_1(rfg_1_factory); + real_function_3d rfg_2_factory = real_factory_3d(world); + real_function_3d rfg_2(rfg_2_factory); + real_function_3d rfg_3_factory = real_factory_3d(world); + real_function_3d rfg_3(rfg_3_factory); + real_function_3d rfg_4_factory = real_factory_3d(world); + real_function_3d rfg_4(rfg_4_factory); + real_function_3d rfg_5_factory = real_factory_3d(world); + real_function_3d rfg_5(rfg_5_factory); + real_function_3d rfg_6_factory = real_factory_3d(world); + real_function_3d rfg_6(rfg_6_factory); + real_function_3d rfg_7_factory = real_factory_3d(world); + real_function_3d rfg_7(rfg_7_factory); + real_function_3d rfg_8_factory = real_factory_3d(world); + real_function_3d rfg_8(rfg_8_factory); + + // Creating Operators + CompressOp op_compress_f_0("Compress", &rf_0, &f_0); + CompressOp op_compress_f_1("Compress", &rf_1, &f_1); + CompressOp op_compress_f_2("Compress", &rf_2, &f_2); + CompressOp op_compress_g_0("Compress", &rg_0, &g_0); + CompressOp op_compress_g_1("Compress", &rg_1, &g_1); + CompressOp op_compress_g_2("Compress", &rg_2, &g_2); + + InnerOp op_inner_f_g_0_0("Inner", &rfg_0, &rf_0, &rg_0); + InnerOp op_inner_f_g_0_1("Inner", &rfg_1, &rf_0, &rg_1); + InnerOp op_inner_f_g_0_2("Inner", &rfg_2, &rf_0, &rg_2); + InnerOp op_inner_f_g_1_0("Inner", &rfg_3, &rf_1, &rg_0); + InnerOp op_inner_f_g_1_1("Inner", &rfg_4, &rf_1, &rg_1); + InnerOp op_inner_f_g_1_2("Inner", &rfg_5, &rf_1, &rg_2); + InnerOp op_inner_f_g_2_0("Inner", &rfg_6, &rf_2, &rg_0); + InnerOp op_inner_f_g_2_1("Inner", &rfg_7, &rf_2, &rg_1); + InnerOp op_inner_f_g_2_2("Inner", &rfg_8, &rf_2, &rg_2); + + // OpExecutor!!!!!! + OpExecutor exe(world); + if (world.rank() == 0) print("================================================"); + if (world.rank() == 0) print("== By FuseT, UNFUSED"); + if (world.rank() == 0) print("================================================"); + clkbegin = rtclock(); + + // Compress + if (world.rank() == 0) print("== [Running Compress Operations] ========================"); + world.gop.fence(); + + exe.execute(&op_compress_f_0,true); + exe.execute(&op_compress_f_1,true); + exe.execute(&op_compress_f_2,true); + exe.execute(&op_compress_g_0,true); + exe.execute(&op_compress_g_1,true); + exe.execute(&op_compress_g_2,true); + + // Inner + if (world.rank() == 0) print("== [Running Inner Operations] ========================"); + world.gop.fence(); + + exe.execute(&op_inner_f_g_0_0,true); + exe.execute(&op_inner_f_g_0_1,true); + exe.execute(&op_inner_f_g_0_2,true); + exe.execute(&op_inner_f_g_1_0,true); + exe.execute(&op_inner_f_g_1_1,true); + exe.execute(&op_inner_f_g_1_2,true); + exe.execute(&op_inner_f_g_2_0,true); + exe.execute(&op_inner_f_g_2_1,true); + exe.execute(&op_inner_f_g_2_2,true); + + // + // + // + clkend = rtclock() - clkbegin; + if (world.rank() == 0) printf("Running Time: %f\n", clkend); + + if (world.rank() == 0) + { + printf ("Inner-Product [%d] = %f\n", 0, op_inner_f_g_0_0._sum); + printf ("Inner-Product [%d] = %f\n", 1, op_inner_f_g_0_1._sum); + printf ("Inner-Product [%d] = %f\n", 2, op_inner_f_g_0_2._sum); + printf ("Inner-Product [%d] = %f\n", 3, op_inner_f_g_1_0._sum); + printf ("Inner-Product [%d] = %f\n", 4, op_inner_f_g_1_1._sum); + printf ("Inner-Product [%d] = %f\n", 5, op_inner_f_g_1_2._sum); + printf ("Inner-Product [%d] = %f\n", 6, op_inner_f_g_2_0._sum); + printf ("Inner-Product [%d] = %f\n", 7, op_inner_f_g_2_1._sum); + printf ("Inner-Product [%d] = %f\n", 8, op_inner_f_g_2_2._sum); + } + +// +// +// +// +// + +// +// +// +// +// + world.gop.fence(); + finalize(); + return 0; +} + +double rtclock() +{ + struct timezone Tzp; + struct timeval Tp; + int stat; + stat = gettimeofday (&Tp, &Tzp); + if (stat != 0) + printf("Error return from gettimeofday: %d", stat); + return (Tp.tv_sec + Tp.tv_usec * 1.0e-6); +} + diff --git a/src/examples/FuseTExamples/Test_FusedEx_original.cc b/src/examples/FuseTExamples/Test_FusedEx_original.cc new file mode 100644 index 00000000000..b5369fbcfee --- /dev/null +++ b/src/examples/FuseTExamples/Test_FusedEx_original.cc @@ -0,0 +1,394 @@ +/* + This file is part of MADNESS. + + Copyright (C) 2007,2010 Oak Ridge National Laboratory + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + For more information please contact: + + Robert J. Harrison + Oak Ridge National Laboratory + One Bethel Valley Road + P.O. Box 2008, MS-6367 + + email: harrisonrj@ornl.gov + tel: 865-241-3937 + fax: 865-572-0680 + + $Id$ +*/ +//#define WORLD_INSTANTIATE_STATIC_TEMPLATES +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define FUSET_UNFUSED +//#define FUSET_FUSED +//#define MADNESS_VER + +/*! +\file heat2.cc +\brief Example Green function for the 3D heat equation with a linear term +\defgroup heatex2 Evolve in time 3D heat equation with a linear term +\ingroup examples + +The source is here. + +\par Points of interest + - application of a function of a function to exponentiate the potential + - use of a functor to compute the solution at an arbitrary future time + - convolution with the Green's function + + +\par Background + +This adds to the complexity of the other \ref exampleheat "heat equation example" +by including a linear term. Specifically, we solve +\f[ + \frac{\partial u(x,t)}{\partial t} = c \nabla^2 u(x,t) + V_p(x,t) u(x,t) +\f] +If \f$ V_p = 0 \f$ time evolution operator is +\f[ + G_0(x,t) = \frac{1}{\sqrt{4 \pi c t}} \exp \frac{-x^2}{4 c t} +\f] +For non-zero \f$ V_p \f$ the time evolution is performed using the Trotter splitting +\f[ + G(x,t) = G_0(x,t/2) * \exp(V_p t) * G_0(x,t/2) + O(t^3) +\f] +In order to form an exact solution for testing, we choose \f$ V_p(x,t)=\mbox{constant} \f$ +but the solution method is not limited to this choice. + +*/ + +using namespace madness; + +#define FUNC_SIZE 10 +double rtclock(); + +static const double L = 20; // Half box size +static const long k = 8; // wavelet order +static const double thresh = 1e-6; // precision +static const double c = 2.0; // +static const double tstep = 0.1; +static const double alpha = 1.9; // Exponent +static const double VVV = 0.2; // Vp constant value + +// Initial Gaussian with exponent alpha +static double uinitial(const coord_3d& r) { + const double x=r[0], y=r[1], z=r[2]; + return exp(-alpha*(2*x*x+3.2*y*y+1.7*z*z))*pow(constants::pi/alpha,-1.5); +} + +static double uinitial2(const coord_3d& r) { + const double x=r[0], y=r[1], z=r[2]; + return exp(-alpha*(5*x*x+y*y+z*z))*pow(constants::pi/alpha,-1.5); +} + +static double Vp(const coord_3d& r) { + return VVV; +} + +// Exact solution at time t +class uexact : public FunctionFunctorInterface { + double t; +public: + uexact(double t) : t(t) {} + + double operator()(const coord_3d& r) const { + const double x=r[0], y=r[1], z=r[2]; + double rsq = (x*x+y*y+z*z); + + return exp(VVV*t)*exp(-rsq*alpha/(1.0+4.0*alpha*t*c)) * pow(alpha/((1+4*alpha*t*c)*constants::pi),1.5); + } +}; + +// Functor to compute exp(f) where f is a madness function +template +struct unaryexp { + void operator()(const Key& key, Tensor& t) const { + UNARY_OPTIMIZED_ITERATOR(T, t, *_p0 = exp(*_p0);); + } + template + void serialize(Archive& ar) {} +}; + +int main(int argc, char** argv) +{ + int i, j; + initialize(argc, argv); + World world(SafeMPI::COMM_WORLD); + + startup(world, argc, argv); + + FunctionDefaults<3>::set_k(k); + FunctionDefaults<3>::set_thresh(thresh); + FunctionDefaults<3>::set_refine(true); + FunctionDefaults<3>::set_autorefine(false); + FunctionDefaults<3>::set_cubic_cell(-L, L); + FunctionDefaults<3>::set_max_refine_level(4); + + // Step: 1 + // for i = 0 to N-1 + // u[i].create(); + if (world.rank() == 0) print("Creating functions"); + real_function_3d u[FUNC_SIZE]; + real_function_3d u_result_factory[FUNC_SIZE]; + real_function_3d* u_result[FUNC_SIZE]; + real_function_3d* u_result_fused[FUNC_SIZE]; + real_function_3d g[FUNC_SIZE]; + real_function_3d g_result_factory[FUNC_SIZE]; + real_function_3d* g_result[FUNC_SIZE]; + real_function_3d* g_result_fused[FUNC_SIZE]; + + //real_function_3d temp_factory = real_factory_3d(world); + //real_function_3d temp(temp_factory); + real_function_3d temp_factory[FUNC_SIZE][FUNC_SIZE]; + real_function_3d* temp[FUNC_SIZE][FUNC_SIZE]; + + for (i=0; i* op_u[FUNC_SIZE]; + CompressOp* op_g[FUNC_SIZE]; + + if (world.rank() == 0) print("Creating Compress Operators - u & g"); + for (i=0; i("CompressOp-u",u_result[i], &u[i]); + op_g[i] = new CompressOp("CompressOp-g",g_result[i], &g[i]); + } + + OpExecutor exe(world); + if (world.rank() == 0) print("Running Compress Operators - u"); + for (i=0; i* op_ug[FUNC_SIZE][FUNC_SIZE]; + for (i=0; i("InnerOp-u*g",&temp,u_result[i],g_result[j]); + op_ug[i][j] = new InnerOp("InnerOp-u*g",temp[i][j],u_result[i],g_result[j]); + } + } + + if (world.rank() == 0) print("Running Inner-Product Operators - u*g"); + for (i=0; i_sum); + // Correctness by using MADNESS +#endif + + +#ifdef FUSET_FUSED + if (world.rank() == 0) print("================================================"); + if (world.rank() == 0) print("By FuseT, FUSED"); + if (world.rank() == 0) print("================================================"); + + clkbegin = rtclock(); + CompressOp* op_u_fused[FUNC_SIZE]; + CompressOp* op_g_fused[FUNC_SIZE]; + for (i=0; i("CompressOp-u",u_result_fused[i], &u[i]); + op_g_fused[i] = new CompressOp("CompressOp-g",g_result_fused[i], &g[i]); + } + + real_function_3d temp_factory_fused = real_factory_3d(world); + real_function_3d temp_fused(temp_factory_fused); + InnerOp* op_ug_fused[FUNC_SIZE][FUNC_SIZE]; + for (i=0; i("InnerOp-u*g",&temp_fused,u_result_fused[i],g_result_fused[j]); + } + } + + vector*> sequence; + for (i=0; i odag(sequence); + odag.processSequence(); + + FusedOpSequence fsequence = odag.getFusedOpSequence(); + FusedExecutor fexecutor(world, &fsequence); + fexecutor.execute(); + world.gop.fence(); + + clkend = rtclock() - clkbegin; + if (world.rank() == 0) printf("Running Time: %f\n", clkend); + + if (world.rank() == 0) print("After Inner-Product Operators - u*g"); + for (i=0; i_sum); + } + } + // Correctness by using MADNESS +#endif +/* + for (i=0; i_sum - op_ug[i][j]->_sum); + } + if (world.rank() == 0) printf("\n"); + } +*/ +#ifdef MADNESS_VER + if (world.rank() == 0) print("================================================"); + if (world.rank() == 0) print("By MADNESS"); + if (world.rank() == 0) print("================================================"); + + clkbegin = rtclock(); + for (i=0; i +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +/*! + \file heat2.cc + \brief Example Green function for the 3D heat equation with a linear term + \defgroup heatex2 Evolve in time 3D heat equation with a linear term + \ingroup examples + + The source is here. + + \par Points of interest + - application of a function of a function to exponentiate the potential + - use of a functor to compute the solution at an arbitrary future time + - convolution with the Green's function + + + \par Background + + This adds to the complexity of the other \ref exampleheat "heat equation example" + by including a linear term. Specifically, we solve + \f[ + \frac{\partial u(x,t)}{\partial t} = c \nabla^2 u(x,t) + V_p(x,t) u(x,t) + \f] + If \f$ V_p = 0 \f$ time evolution operator is + \f[ + G_0(x,t) = \frac{1}{\sqrt{4 \pi c t}} \exp \frac{-x^2}{4 c t} + \f] + For non-zero \f$ V_p \f$ the time evolution is performed using the Trotter splitting + \f[ + G(x,t) = G_0(x,t/2) * \exp(V_p t) * G_0(x,t/2) + O(t^3) + \f] + In order to form an exact solution for testing, we choose \f$ V_p(x,t)=\mbox{constant} \f$ + but the solution method is not limited to this choice. + +*/ + +using namespace madness; + +static const double L = 20; // Half box size +static const long k = 8; // wavelet order +static const double thresh = 1e-6; // precision // w/o diff. and 1e-12 -> 64 x 64 +static const double c = 2.0; // +static const double tstep = 0.1; +static const double alpha = 1.9; // Exponent +static const double VVV = 0.2; // Vp constant value + +#define PI 3.1415926535897932385 +#define LO 0.0000000000 +#define HI 4.0000000000 + +static double sin_amp = 1.0; +static double cos_amp = 1.0; +static double sin_freq = 1.0; +static double cos_freq = 1.0; +static double sigma_x = 1.0; +static double sigma_y = 1.0; +static double sigma_z = 1.0; +static double center_x = 0.0; +static double center_y = 0.0; +static double center_z = 0.0; +static double gaussian_amp = 1.0; +static double sigma_sq_x = sigma_x*sigma_x; +static double sigma_sq_y = sigma_y*sigma_y; +static double sigma_sq_z = sigma_z*sigma_z; + +#define FUNC_SIZE 2 +#define FUNC_SIZE_M 2 + +double rtclock(); + +// Initial Gaussian with exponent alpha +static double random_function(const coord_3d& r) { + const double x=r[0], y=r[1], z=r[2]; + + const double dx = x - center_x; + const double dy = y - center_y; + const double dz = z - center_z; + + const double periodic_part = sin_amp * sin(sin_freq*(dx+dy+dz)) + + cos_amp * cos(cos_freq*(dx+dy+dz)); + + const double x_comp = dx*dx/sigma_sq_x; + const double y_comp = dy*dy/sigma_sq_y; + const double z_comp = dz*dz/sigma_sq_z; + + const double gaussian_part = -gaussian_amp/exp(sqrt(x_comp+y_comp+z_comp)); + + return gaussian_part*gaussian_part; +} + +static double get_rand() { + double r3 = LO + static_cast(rand())/(static_cast(RAND_MAX/(HI-LO))); + return r3; +} + +static void randomizer() +{ + sin_amp = get_rand(); + cos_amp = get_rand(); + sin_freq = get_rand(); + cos_freq = get_rand(); + sigma_x = get_rand(); + sigma_y = get_rand(); + sigma_z = get_rand(); + center_x = get_rand()*L/(2.0*HI); + center_y = get_rand()*L/(2.0*HI); + center_z = get_rand()*L/(2.0*HI); + gaussian_amp = get_rand(); + sigma_sq_x = sigma_x*sigma_x; + sigma_sq_y = sigma_y*sigma_y; + sigma_sq_z = sigma_z*sigma_z; +} + +class alpha_functor : public FunctionFunctorInterface { +private: + double coeff; +public: + alpha_functor(double coeff=1.0) : coeff(coeff) {} + + virtual double operator()(const coord_3d& r) const { + const double x=r[0], y=r[1], z=r[2]; + return (coeff * (x*x + y*y + z*z) * sin(x*x + y*y + z*z)); + } +}; +// Exact solution at time t +class uexact : public FunctionFunctorInterface { + double t; +public: + uexact(double t) : t(t) {} + + double operator()(const coord_3d& r) const { + const double x=r[0], y=r[1], z=r[2]; + double rsq = (x*x+y*y+z*z); + + return exp(VVV*t)*exp(-rsq*alpha/(1.0+4.0*alpha*t*c)) * pow(alpha/((1+4*alpha*t*c)*constants::pi),1.5); + } +}; + +// Functor to compute exp(f) where f is a madness function +template +struct unaryexp { + void operator()(const Key& key, Tensor& t) const { + UNARY_OPTIMIZED_ITERATOR(T, t, *_p0 = exp(*_p0);); + } + template + void serialize(Archive& ar) {} +}; + + +typedef Function functionT; +typedef std::vector vecfuncT; + +int main(int argc, char** argv) +{ + initialize(argc, argv); + World world(SafeMPI::COMM_WORLD); + + startup(world, argc, argv); + + FunctionDefaults<3>::set_k(k); + FunctionDefaults<3>::set_thresh(thresh); + FunctionDefaults<3>::set_refine(true); + FunctionDefaults<3>::set_autorefine(false); + FunctionDefaults<3>::set_cubic_cell(-L, L); + + FunctionDefaults<3>::set_max_refine_level(14); + + if (world.rank() == 0) print ("===================================================="); + if (world.rank() == 0) printf(" Initializing Functions\n"); + if (world.rank() == 0) printf(" %d Functions, %d Functions\n", FUNC_SIZE, FUNC_SIZE_M); + if (world.rank() == 0) print ("===================================================="); + world.gop.fence(); + + // M and N functions (i.e., 32 and 32 functions.) + real_function_3d h[FUNC_SIZE]; + real_function_3d g[FUNC_SIZE_M]; + + // M*N functions (i.e., 1024 functions.) + real_function_3d output[FUNC_SIZE*FUNC_SIZE_M]; + real_function_3d output2[FUNC_SIZE*FUNC_SIZE_M]; + + // M*N output functions for compress operator + real_function_3d comp_factory_h[FUNC_SIZE*FUNC_SIZE_M/2]; + real_function_3d* comp_h[FUNC_SIZE*FUNC_SIZE_M/2]; + + real_function_3d comp_factory_g[FUNC_SIZE*FUNC_SIZE_M/2]; + real_function_3d* comp_g[FUNC_SIZE*FUNC_SIZE_M/2]; + + // Matrix_inner + real_function_3d result_factory = real_factory_3d(world); + real_function_3d result(result_factory); + + int i, j; + double clkbegin, clkend; + clkbegin = rtclock(); + + for (i=0; i* compress_op_h[FUNC_SIZE*FUNC_SIZE_M/2]; + CompressOp* compress_op_g[FUNC_SIZE*FUNC_SIZE_M/2]; + + for (i=0; i("Compress",comp_h[i],&output[i]); + compress_op_g[i] = new CompressOp("Compress",comp_g[i],&output[i+(FUNC_SIZE*FUNC_SIZE_M/2)]); + } + + // OpExecutor +// OpExecutor exe(world); + + + if (world.rank() == 0) print ("after"); + for (i=0; i*> sequence; + + for (i=0; i* matrix_inner_op = new MatrixInnerOp("MatrixInner", &result, fs, gs, false, true); + + sequence.push_back(matrix_inner_op); + + FuseT odag(sequence); + odag.processSequence(); + + FusedOpSequence fsequence = odag.getFusedOpSequence(); + FusedExecutor fexecutor(world, &fsequence); + if (world.rank() == 0) printf("Checkpoint before execution\n"); + fexecutor.execute(); + + // OpExecutor + //OpExecutor exe(world); + //exe.execute(matrix_inner_op, false); + + clkend = rtclock() - clkbegin; + if (world.rank() == 0) printf("Running Time: %f\n", clkend); + world.gop.fence(); + +#ifdef DEBUG_OUTPUT + if (world.rank() == 0) + for (i=0; i_r)(i, j)); + } + world.gop.fence(); +#endif +// +// +// + if (world.rank() == 0) print ("===================================================="); + if (world.rank() == 0) print ("== MADNESS ============"); + if (world.rank() == 0) print ("===================================================="); + world.gop.fence(); + + clkbegin = rtclock(); + + // Compress Operations -- M*N + for (i=0; i ghaly = matrix_inner(world, v_f, v_g); + //Tensor ghaly = matrix_inner_old(world, v_f, v_g); + + if (world.rank() == 0) + { + clkend = rtclock() - clkbegin; + printf("Running Time: %f\n", clkend); + } + world.gop.fence(); + +#ifdef DEBUG_OUTPUT + if (world.rank() == 0) + for (i=0; i +#include +#include +#include +#include +#include +#include +#include +#include +#include +/*! + \file heat2.cc + \brief Example Green function for the 3D heat equation with a linear term + \defgroup heatex2 Evolve in time 3D heat equation with a linear term + \ingroup examples + + The source is here. + + \par Points of interest + - application of a function of a function to exponentiate the potential + - use of a functor to compute the solution at an arbitrary future time + - convolution with the Green's function + + + \par Background + + This adds to the complexity of the other \ref exampleheat "heat equation example" + by including a linear term. Specifically, we solve + \f[ + \frac{\partial u(x,t)}{\partial t} = c \nabla^2 u(x,t) + V_p(x,t) u(x,t) + \f] + If \f$ V_p = 0 \f$ time evolution operator is + \f[ + G_0(x,t) = \frac{1}{\sqrt{4 \pi c t}} \exp \frac{-x^2}{4 c t} + \f] + For non-zero \f$ V_p \f$ the time evolution is performed using the Trotter splitting + \f[ + G(x,t) = G_0(x,t/2) * \exp(V_p t) * G_0(x,t/2) + O(t^3) + \f] + In order to form an exact solution for testing, we choose \f$ V_p(x,t)=\mbox{constant} \f$ + but the solution method is not limited to this choice. + +*/ + +using namespace madness; + +static const double L = 20; // Half box size +static const long k = 8; // wavelet order +static const double thresh = 1e-6; // precision // w/o diff. and 1e-12 -> 64 x 64 +static const double c = 2.0; // +static const double tstep = 0.1; +static const double alpha = 1.9; // Exponent +static const double VVV = 0.2; // Vp constant value + +#define PI 3.1415926535897932385 +#define LO 0.0000000000 +#define HI 4.0000000000 + +static double sin_amp = 1.0; +static double cos_amp = 1.0; +static double sin_freq = 1.0; +static double cos_freq = 1.0; +static double sigma_x = 1.0; +static double sigma_y = 1.0; +static double sigma_z = 1.0; +static double center_x = 0.0; +static double center_y = 0.0; +static double center_z = 0.0; +static double gaussian_amp = 1.0; +static double sigma_sq_x = sigma_x*sigma_x; +static double sigma_sq_y = sigma_y*sigma_y; +static double sigma_sq_z = sigma_z*sigma_z; + +#define FUNC_SIZE 4 +#define FUNC_SIZE_M 4 + +double rtclock(); + +// Initial Gaussian with exponent alpha +static double uinitial(const coord_3d& r) { + const double x=r[0], y=r[1], z=r[2]; + return exp(-alpha*(2*x*3.2*x+y*y+1.7*z*z))*pow(constants::pi/alpha,-1.5); +} + +static double uinitial2(const coord_3d& r) { + const double x=r[0], y=r[1], z=r[2]; + srand(time(0)); + return exp(-alpha*(5*x*x+y*y+z*z))*pow(constants::pi/alpha,-1.5); +} +static double uinitial1(const coord_3d& r) { + const double x=r[0], y=r[1], z=r[2]; + return exp(-alpha*(2*x*x+1.4*y*y+z*z))*pow(constants::pi/alpha,-1.5); +}; + +static double random_function(const coord_3d& r) { + const double x=r[0], y=r[1], z=r[2]; + + const double dx = x - center_x; + const double dy = y - center_y; + const double dz = z - center_z; + + const double periodic_part = sin_amp * sin(sin_freq*(dx+dy+dz)) + + cos_amp * cos(cos_freq*(dx+dy+dz)); + + const double x_comp = dx*dx/sigma_sq_x; + const double y_comp = dy*dy/sigma_sq_y; + const double z_comp = dz*dz/sigma_sq_z; + + const double gaussian_part = -gaussian_amp/exp(sqrt(x_comp+y_comp+z_comp)); + + return gaussian_part*gaussian_part; +} + +static double get_rand() { + double r3 = LO + static_cast(rand())/(static_cast(RAND_MAX/(HI-LO))); + return r3; +} + +static void randomizer() +{ + sin_amp = get_rand(); + cos_amp = get_rand(); + sin_freq = get_rand(); + cos_freq = get_rand(); + sigma_x = get_rand(); + sigma_y = get_rand(); + sigma_z = get_rand(); + center_x = get_rand()*L/(2.0*HI); + center_y = get_rand()*L/(2.0*HI); + center_z = get_rand()*L/(2.0*HI); + gaussian_amp = get_rand(); + sigma_sq_x = sigma_x*sigma_x; + sigma_sq_y = sigma_y*sigma_y; + sigma_sq_z = sigma_z*sigma_z; +} + +static double ghaly(const coord_3d& r) { + std::srand(time(NULL)); + const double randVal = std::rand()/1000000000.0; + const double x=r[0], y=r[1], z=r[2]; + return 3.0*exp(-2.0*sqrt(x*x + randVal*randVal + y*y + z*z + 1e-4)); +} + +static double Vp(const coord_3d& r) { + return VVV; +} + +class alpha_functor : public FunctionFunctorInterface { +private: + double coeff; +public: + alpha_functor(double coeff=1.0) : coeff(coeff) {} + + virtual double operator()(const coord_3d& r) const { + const double x=r[0], y=r[1], z=r[2]; + return (coeff * (x*x + y*y + z*z) * sin(x*x + y*y + z*z)); + } +}; +// Exact solution at time t +class uexact : public FunctionFunctorInterface { + double t; +public: + uexact(double t) : t(t) {} + + double operator()(const coord_3d& r) const { + const double x=r[0], y=r[1], z=r[2]; + double rsq = (x*x+y*y+z*z); + + return exp(VVV*t)*exp(-rsq*alpha/(1.0+4.0*alpha*t*c)) * pow(alpha/((1+4*alpha*t*c)*constants::pi),1.5); + } +}; + +// Functor to compute exp(f) where f is a madness function +template +struct unaryexp { + void operator()(const Key& key, Tensor& t) const { + UNARY_OPTIMIZED_ITERATOR(T, t, *_p0 = exp(*_p0);); + } + template + void serialize(Archive& ar) {} +}; + + +typedef DistributedMatrix distmatT; +typedef Function functionT; +typedef std::vector vecfuncT; + +int main(int argc, char** argv) +{ + initialize(argc, argv); + World world(SafeMPI::COMM_WORLD); + + startup(world, argc, argv); + + FunctionDefaults<3>::set_k(k); + FunctionDefaults<3>::set_thresh(thresh); + FunctionDefaults<3>::set_refine(true); + FunctionDefaults<3>::set_autorefine(false); + FunctionDefaults<3>::set_cubic_cell(-L, L); + + FunctionDefaults<3>::set_max_refine_level(14); + + + if (world.rank() == 0) print ("===================================================="); + if (world.rank() == 0) printf(" Initializing Functions\n"); + if (world.rank() == 0) printf(" %d Functions, %d Functions\n", FUNC_SIZE, FUNC_SIZE_M); + if (world.rank() == 0) print ("===================================================="); + world.gop.fence(); + + // 2 * N Functions + real_function_3d h[FUNC_SIZE]; + real_function_3d g[FUNC_SIZE_M]; + + real_function_3d temp_factory_h[FUNC_SIZE]; + real_function_3d temp_factory_g[FUNC_SIZE_M]; + real_function_3d* temp_h[FUNC_SIZE]; + real_function_3d* temp_g[FUNC_SIZE_M]; + + // N * N Results Functions by Inner-Product + real_function_3d temp_factory[FUNC_SIZE][FUNC_SIZE_M]; + real_function_3d* temp[FUNC_SIZE][FUNC_SIZE_M]; + + int i, j; + double clkbegin, clkend; + clkbegin = rtclock(); + + for (i=0; i* c_op_h[FUNC_SIZE]; + CompressOp* c_op_g[FUNC_SIZE_M]; + + for (i=0; i("Compress",temp_h[i],&h[i]); + + for (j=0; j("Compress",temp_g[j],&g[j]); + + + /*InnerOp* inner_op_ug[FUNC_SIZE][FUNC_SIZE_M]; + for (i=0; i("Inner",temp[i][j], temp_h[i], temp_g[j]); +*/ + vecfuncT fs; + vecfuncT gs; + + for (i=0; i* matrix_inner_op = new MatrixInnerOp("MatrixInner", fs, gs, false); + + vector*> sequence; + for (i=0; i odag(sequence); + odag.processSequence(); + + FusedOpSequence fsequence = odag.getFusedOpSequence(); + FusedExecutor fexecuter(world, &fsequence); + fexecuter.execute(); + + clkend = rtclock() - clkbegin; + if (world.rank() == 0) printf("Running Time: %f\n", clkend); + world.gop.fence(); + + for (i=0; i_sum); + } + world.gop.fence(); + +// +// +// + if (world.rank() == 0) print ("===================================================="); + if (world.rank() == 0) print ("== MADNESS - matrix_inner ===================="); + if (world.rank() == 0) print ("===================================================="); + world.gop.fence(); + + vecfuncT v_f; + vecfuncT v_g; + + for (i=0; i ghaly = matrix_inner_old(world, v_f, v_g); + //Tensor ghaly = matrix_inner(world, v_f, v_g); + +/* double resultInner[FUNC_SIZE][FUNC_SIZE_M] = {0.0, }; + for (i=0; i +#include +#include +#include +#include +#include +#include +#include +/*! + \file heat2.cc + \brief Example Green function for the 3D heat equation with a linear term + \defgroup heatex2 Evolve in time 3D heat equation with a linear term + \ingroup examples + + The source is here. + + \par Points of interest + - application of a function of a function to exponentiate the potential + - use of a functor to compute the solution at an arbitrary future time + - convolution with the Green's function + + + \par Background + + This adds to the complexity of the other \ref exampleheat "heat equation example" + by including a linear term. Specifically, we solve + \f[ + \frac{\partial u(x,t)}{\partial t} = c \nabla^2 u(x,t) + V_p(x,t) u(x,t) + \f] + If \f$ V_p = 0 \f$ time evolution operator is + \f[ + G_0(x,t) = \frac{1}{\sqrt{4 \pi c t}} \exp \frac{-x^2}{4 c t} + \f] + For non-zero \f$ V_p \f$ the time evolution is performed using the Trotter splitting + \f[ + G(x,t) = G_0(x,t/2) * \exp(V_p t) * G_0(x,t/2) + O(t^3) + \f] + In order to form an exact solution for testing, we choose \f$ V_p(x,t)=\mbox{constant} \f$ + but the solution method is not limited to this choice. + +*/ + +using namespace madness; + +static const double L = 20; // Half box size +static const long k = 8; // wavelet order +static const double thresh = 1e-6; // precision +static const double c = 2.0; // +static const double tstep = 0.1; +static const double alpha = 1.9; // Exponent +static const double VVV = 0.2; // Vp constant value + +#define FUNC_SIZE 2 +#define FUNC_SIZE_M 2 + +double rtclock(); + +// Initial Gaussian with exponent alpha +static double uinitial(const coord_3d& r) { + const double x=r[0], y=r[1], z=r[2]; + return exp(-alpha*(2*x*3.2*x+y*y+1.7*z*z))*pow(constants::pi/alpha,-1.5); +} + +static double guess(const coord_3d& r) { + const double x=r[0], y=r[1], z=r[2]; + return 6.0*exp(-2.0*sqrt(x*x+y*y+z*z+1e-4)); +} + +static double ghaly(const coord_3d& r) { + std::srand(time(NULL)); + const double randVal = std::rand()/1000000000.0; + const double x=r[0], y=r[1], z=r[2]; + return 3.0*exp(-2.0*sqrt(x*x + randVal*randVal + y*y + z*z + 1e-4)); +} +static double uinitial1(const coord_3d& r) { + const double x=r[0], y=r[1], z=r[2]; + return exp(-alpha*(2*x*x+1.4*y*y+z*z))*pow(constants::pi/alpha,-1.5); +}; + +static double Vp(const coord_3d& r) { + return VVV; +} + +class alpha_functor : public FunctionFunctorInterface { +private: + double coeff; +public: + alpha_functor(double coeff=1.0) : coeff(coeff) {} + + virtual double operator()(const coord_3d& r) const { + const double x=r[0], y=r[1], z=r[2]; + return (coeff * (x*x + y*y + z*z) * sin(x*x + y*y + z*z)); + } +}; +// Exact solution at time t +class uexact : public FunctionFunctorInterface { + double t; +public: + uexact(double t) : t(t) {} + + double operator()(const coord_3d& r) const { + const double x=r[0], y=r[1], z=r[2]; + double rsq = (x*x+y*y+z*z); + + return exp(VVV*t)*exp(-rsq*alpha/(1.0+4.0*alpha*t*c)) * pow(alpha/((1+4*alpha*t*c)*constants::pi),1.5); + } +}; + + +// Functor to compute exp(f) where f is a madness function +template +struct unaryexp { + void operator()(const Key& key, Tensor& t) const { + UNARY_OPTIMIZED_ITERATOR(T, t, *_p0 = exp(*_p0);); + } + template + void serialize(Archive& ar) {} +}; + +int main(int argc, char** argv) +{ + initialize(argc, argv); + World world(SafeMPI::COMM_WORLD); + + startup(world, argc, argv); + + FunctionDefaults<3>::set_k(k); + FunctionDefaults<3>::set_thresh(thresh); + FunctionDefaults<3>::set_refine(true); + FunctionDefaults<3>::set_autorefine(false); + FunctionDefaults<3>::set_cubic_cell(-L, L); + FunctionDefaults<3>::set_max_refine_level(14); + + if (world.rank() == 0) print ("===================================================="); + if (world.rank() == 0) printf(" Initializing Functions\n"); + if (world.rank() == 0) printf(" %d Functions, %d Functions\n", FUNC_SIZE, FUNC_SIZE_M); + if (world.rank() == 0) print ("===================================================="); + world.gop.fence(); + + // 2 * N Functions + real_function_3d h[FUNC_SIZE]; + real_function_3d g[FUNC_SIZE_M]; + + // N * N Results Functions by Inner-Product + real_function_3d temp_factory_h[FUNC_SIZE]; + real_function_3d temp_factory_g[FUNC_SIZE_M]; + real_function_3d* temp_h[FUNC_SIZE]; + real_function_3d* temp_g[FUNC_SIZE_M]; + + real_function_3d temp_factory_hg[FUNC_SIZE][FUNC_SIZE_M]; + real_function_3d* temp_hg[FUNC_SIZE][FUNC_SIZE_M]; + + int i, j; + double clkbegin, clkend; + clkbegin = rtclock(); + for (i=0; i::set_max_refine_level(30); + for (i=0; i* inner_op_h[FUNC_SIZE]; + CompressOp* inner_op_g[FUNC_SIZE_M]; + clkbegin = rtclock(); + for (i=0; i("Inner",temp_h[i],&h[i]); + + for (j=0; j("Inner",temp_g[j],&g[j]); + + // + if (world.rank() == 0) print ("======= OpExecutor ========================================="); + world.gop.fence(); + OpExecutor exe(world); + for (i=0; i* inner_op_hg[FUNC_SIZE][FUNC_SIZE_M]; +/* + if (world.rank() == 0) print ("======= OpExecutor ========================================="); + for (i=0; i("Inner", temp_hg[i][j], temp_h[i], temp_g[j]); + + for (i=0; i*> sequence; + for (i=0; i odag(sequence); + odag.processSequence(); + + FusedOpSequence fsequence = odag.getFusedOpSequence(); + FusedExecutor fexecuter(world, &fsequence); + fexecuter.execute(); +*/ + clkend = rtclock() - clkbegin; + if (world.rank() == 0) printf("Running Time: %f\n", clkend); + world.gop.fence(); + + for (i=0; inorm2(); + result_h_trace = temp_h[i]->trace(); + if (world.rank() == 0) print (i,"norm:", result_h_norm, " trace", result_h_trace); + } + + for (j=0; jnorm2(); + result_g_trace = temp_g[j]->trace(); + if (world.rank() == 0) print (i,"norm:", result_g_norm, " trace", result_g_trace); + } + + for (i=0; i_sum); + world.gop.fence(); +// +// +// + if (world.rank() == 0) print ("===================================================="); + if (world.rank() == 0) print ("== MADNESS =============================="); + if (world.rank() == 0) print ("===================================================="); + world.gop.fence(); + + + clkbegin = rtclock(); + + for (i=0; i +#include +#include +#include +#include +#include +#include +#include +#include +using namespace madness;; + +static const double L = 20; +static const long k = 8; +//static const double thresh = 1e-6; +static const double thresh = 1e-6; +static const double c = 2.0; +static const double alpha = 1.9; // Exponent +static const double VVV = 0.2; // Vp constant value + +static double uinitial(const coord_3d& r) +{ + const double x=r[0], y=r[1], z=r[2]; + //std::srand(std::time(0)); + int random_variable = 3; + return exp(-alpha*(x*x + y*y + random_variable*z*z)) * pow(constants::pi/alpha, -1.5); +} + +static double Vp(const coord_3d& r) { + return VVV; +} + +int main(int argc, char** argv) +{ + initialize(argc,argv); + World world(SafeMPI::COMM_WORLD); + + startup(world, argc, argv); + + FunctionDefaults<3>::set_k(k); + FunctionDefaults<3>::set_thresh(thresh); + FunctionDefaults<3>::set_refine(true); + FunctionDefaults<3>::set_autorefine(false); + FunctionDefaults<3>::set_cubic_cell(-L, L); + + if (world.rank() == 0) printf("after FunctionDefaults\n"); + world.gop.fence(); + + real_function_3d u0 = real_factory_3d(world).f(uinitial); + real_function_3d u1 = real_factory_3d(world).f(uinitial); + u0.truncate(); + u1.truncate(); + + double u0_norm = u0.norm2(); + double u0_trace = u0.trace(); + + if (world.rank() == 0) print("Initial u0 norm", u0_norm,"trace", u0_trace); + world.gop.fence(); + + // Make exponential of Vp + real_function_3d result_factory = real_factory_3d(world); + real_function_3d result(result_factory); + + OpExecutor exe(world); + + // Nothing for compressed u1 + if (world.rank() == 0) printf ("Before u0 is executed by OpExecutor\n"); + world.gop.fence(); + + // + // Compress Operation by FuseT + // + CompressOp op1("Compress",&result, &u0); + exe.execute(&op1, true); + world.gop.fence(); + + // + // Compress Operation by MADNESS + // + if (world.rank() == 0) + { + printf ("======================================\n"); + printf ("Before u1.compress() by MADNESS\n"); + } + world.gop.fence(); + + u1.compress(); + + double result_n1_norm = u1.norm2(); + double result_n1_trace = u1.trace(); + double result_n0_norm = u0.norm2(); + double result_n0_trace = u0.trace(); + double result_result_norm = result.norm2(); + double result_result_trace = result.trace(); + + if (world.rank() == 0) print("By MADNESS u1: norm", result_n1_norm," trace", result_n1_trace); + if (world.rank() == 0) print("Input u0: norm", result_n0_norm," trace", result_n0_trace); + if (world.rank() == 0) print("Output result: norm", result_result_norm," trace", result_result_trace); + world.gop.fence(); + + finalize(); + return 0; +} + + diff --git a/src/examples/FuseTExamples/compress_only_matrixinner.cc b/src/examples/FuseTExamples/compress_only_matrixinner.cc new file mode 100644 index 00000000000..3e3c4a6e16b --- /dev/null +++ b/src/examples/FuseTExamples/compress_only_matrixinner.cc @@ -0,0 +1,488 @@ +/* + This file is part of MADNESS. + + Copyright (C) 2007,2010 Oak Ridge National Laboratory + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + For more information please contact: + + Robert J. Harrison + Oak Ridge National Laboratory + One Bethel Valley Road + P.O. Box 2008, MS-6367 + + email: harrisonrj@ornl.gov + tel: 865-241-3937 + fax: 865-572-0680 + + $Id$ +*/ +//#define WORLD_INSTANTIATE_STATIC_TEMPLATES +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +/*! + \file heat2.cc + \brief Example Green function for the 3D heat equation with a linear term + \defgroup heatex2 Evolve in time 3D heat equation with a linear term + \ingroup examples + + The source is here. + + \par Points of interest + - application of a function of a function to exponentiate the potential + - use of a functor to compute the solution at an arbitrary future time + - convolution with the Green's function + + + \par Background + + This adds to the complexity of the other \ref exampleheat "heat equation example" + by including a linear term. Specifically, we solve + \f[ + \frac{\partial u(x,t)}{\partial t} = c \nabla^2 u(x,t) + V_p(x,t) u(x,t) + \f] + If \f$ V_p = 0 \f$ time evolution operator is + \f[ + G_0(x,t) = \frac{1}{\sqrt{4 \pi c t}} \exp \frac{-x^2}{4 c t} + \f] + For non-zero \f$ V_p \f$ the time evolution is performed using the Trotter splitting + \f[ + G(x,t) = G_0(x,t/2) * \exp(V_p t) * G_0(x,t/2) + O(t^3) + \f] + In order to form an exact solution for testing, we choose \f$ V_p(x,t)=\mbox{constant} \f$ + but the solution method is not limited to this choice. + +*/ + +using namespace madness; + +static const double L = 20; // Half box size +static const long k = 8; // wavelet order +static const double thresh = 1e-6; // precision // w/o diff. and 1e-12 -> 64 x 64 +static const double c = 2.0; // +static const double tstep = 0.1; +static const double alpha = 1.9; // Exponent +static const double VVV = 0.2; // Vp constant value + +#define PI 3.1415926535897932385 +#define LO 0.0000000000 +#define HI 4.0000000000 + +static double sin_amp = 1.0; +static double cos_amp = 1.0; +static double sin_freq = 1.0; +static double cos_freq = 1.0; +static double sigma_x = 1.0; +static double sigma_y = 1.0; +static double sigma_z = 1.0; +static double center_x = 0.0; +static double center_y = 0.0; +static double center_z = 0.0; +static double gaussian_amp = 1.0; +static double sigma_sq_x = sigma_x*sigma_x; +static double sigma_sq_y = sigma_y*sigma_y; +static double sigma_sq_z = sigma_z*sigma_z; + +#define FUNC_SIZE 32 +#define FUNC_SIZE_M 32 + +double rtclock(); + +// Initial Gaussian with exponent alpha +static double uinitial(const coord_3d& r) { + const double x=r[0], y=r[1], z=r[2]; + return exp(-alpha*(2*x*3.2*x+y*y+1.7*z*z))*pow(constants::pi/alpha,-1.5); +} + +static double uinitial2(const coord_3d& r) { + const double x=r[0], y=r[1], z=r[2]; + srand(time(0)); + return exp(-alpha*(5*x*x+y*y+z*z))*pow(constants::pi/alpha,-1.5); +} +static double uinitial1(const coord_3d& r) { + const double x=r[0], y=r[1], z=r[2]; + return exp(-alpha*(2*x*x+1.4*y*y+z*z))*pow(constants::pi/alpha,-1.5); +}; + +static double random_function(const coord_3d& r) { + const double x=r[0], y=r[1], z=r[2]; + + const double dx = x - center_x; + const double dy = y - center_y; + const double dz = z - center_z; + + const double periodic_part = sin_amp * sin(sin_freq*(dx+dy+dz)) + + cos_amp * cos(cos_freq*(dx+dy+dz)); + + const double x_comp = dx*dx/sigma_sq_x; + const double y_comp = dy*dy/sigma_sq_y; + const double z_comp = dz*dz/sigma_sq_z; + + const double gaussian_part = -gaussian_amp/exp(sqrt(x_comp+y_comp+z_comp)); + + return gaussian_part*gaussian_part; +} + +static double get_rand() { + double r3 = LO + static_cast(rand())/(static_cast(RAND_MAX/(HI-LO))); + return r3; +} + +static void randomizer() +{ + sin_amp = get_rand(); + cos_amp = get_rand(); + sin_freq = get_rand(); + cos_freq = get_rand(); + sigma_x = get_rand(); + sigma_y = get_rand(); + sigma_z = get_rand(); + center_x = get_rand()*L/(2.0*HI); + center_y = get_rand()*L/(2.0*HI); + center_z = get_rand()*L/(2.0*HI); + gaussian_amp = get_rand(); + sigma_sq_x = sigma_x*sigma_x; + sigma_sq_y = sigma_y*sigma_y; + sigma_sq_z = sigma_z*sigma_z; +} + +static double ghaly(const coord_3d& r) { + std::srand(time(NULL)); + const double randVal = std::rand()/1000000000.0; + const double x=r[0], y=r[1], z=r[2]; + return 3.0*exp(-2.0*sqrt(x*x + randVal*randVal + y*y + z*z + 1e-4)); +} + +static double Vp(const coord_3d& r) { + return VVV; +} + +class alpha_functor : public FunctionFunctorInterface { +private: + double coeff; +public: + alpha_functor(double coeff=1.0) : coeff(coeff) {} + + virtual double operator()(const coord_3d& r) const { + const double x=r[0], y=r[1], z=r[2]; + return (coeff * (x*x + y*y + z*z) * sin(x*x + y*y + z*z)); + } +}; +// Exact solution at time t +class uexact : public FunctionFunctorInterface { + double t; +public: + uexact(double t) : t(t) {} + + double operator()(const coord_3d& r) const { + const double x=r[0], y=r[1], z=r[2]; + double rsq = (x*x+y*y+z*z); + + return exp(VVV*t)*exp(-rsq*alpha/(1.0+4.0*alpha*t*c)) * pow(alpha/((1+4*alpha*t*c)*constants::pi),1.5); + } +}; + +// Functor to compute exp(f) where f is a madness function +template +struct unaryexp { + void operator()(const Key& key, Tensor& t) const { + UNARY_OPTIMIZED_ITERATOR(T, t, *_p0 = exp(*_p0);); + } + template + void serialize(Archive& ar) {} +}; + + +typedef DistributedMatrix distmatT; +typedef Function functionT; +typedef std::vector vecfuncT; + +int main(int argc, char** argv) +{ + initialize(argc, argv); + World world(SafeMPI::COMM_WORLD); + + startup(world, argc, argv); + + FunctionDefaults<3>::set_k(k); + FunctionDefaults<3>::set_thresh(thresh); + FunctionDefaults<3>::set_refine(true); + FunctionDefaults<3>::set_autorefine(false); + FunctionDefaults<3>::set_cubic_cell(-L, L); + + FunctionDefaults<3>::set_max_refine_level(14); + //FunctionDefaults<3>::set_max_refine_level(8); + + + if (world.rank() == 0) print ("===================================================="); + if (world.rank() == 0) printf(" Initializing Functions\n"); + if (world.rank() == 0) printf(" %d Functions, %d Functions\n", FUNC_SIZE, FUNC_SIZE_M); + if (world.rank() == 0) print ("===================================================="); + world.gop.fence(); + + // 2 * N Functions + real_function_3d h[FUNC_SIZE]; + real_function_3d g[FUNC_SIZE_M]; + real_function_3d output[FUNC_SIZE*FUNC_SIZE_M]; + + real_function_3d temp_factory_h[FUNC_SIZE]; + real_function_3d temp_factory_g[FUNC_SIZE_M]; + real_function_3d* temp_h[FUNC_SIZE]; + real_function_3d* temp_g[FUNC_SIZE_M]; + + // N * N Results Functions by Inner-Product + real_function_3d temp_factory[FUNC_SIZE][FUNC_SIZE_M]; + real_function_3d* temp[FUNC_SIZE][FUNC_SIZE_M]; + + int i, j; + double clkbegin, clkend; + clkbegin = rtclock(); + + for (i=0; i* matrix_inner_op = new MatrixInnerOp("MatrixInner", temp_h[0], fs, gs, false); + + OpExecutor exe(world); + exe.execute(matrix_inner_op, false); + + clkend = rtclock() - clkbegin; + if (world.rank() == 0) printf("Running Time: %f\n", clkend); + world.gop.fence(); + + + if (world.rank() == 0) + for (i=0; i_r)(i, j)); + printf ("(%d,%d): %f\n", i, j, (*matrix_inner_op->_r)(i, j)); + } + world.gop.fence(); + +// +// +// + if (world.rank() == 0) print ("===================================================="); + if (world.rank() == 0) print ("== MADNESS - individual inner ============"); + if (world.rank() == 0) print ("===================================================="); + world.gop.fence(); + + vecfuncT v_f; + vecfuncT v_g; +/* + for (i=0; i ghaly = matrix_inner(world, v_f, v_g); + //Tensor ghaly = matrix_inner_old(world, v_f, v_g); + +/* + double resultInner[FUNC_SIZE][FUNC_SIZE_M] = {0.0, }; + for (i=0; i +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +/*! + \file heat2.cc + \brief Example Green function for the 3D heat equation with a linear term + \defgroup heatex2 Evolve in time 3D heat equation with a linear term + \ingroup examples + + The source is here. + + \par Points of interest + - application of a function of a function to exponentiate the potential + - use of a functor to compute the solution at an arbitrary future time + - convolution with the Green's function + + + \par Background + + This adds to the complexity of the other \ref exampleheat "heat equation example" + by including a linear term. Specifically, we solve + \f[ + \frac{\partial u(x,t)}{\partial t} = c \nabla^2 u(x,t) + V_p(x,t) u(x,t) + \f] + If \f$ V_p = 0 \f$ time evolution operator is + \f[ + G_0(x,t) = \frac{1}{\sqrt{4 \pi c t}} \exp \frac{-x^2}{4 c t} + \f] + For non-zero \f$ V_p \f$ the time evolution is performed using the Trotter splitting + \f[ + G(x,t) = G_0(x,t/2) * \exp(V_p t) * G_0(x,t/2) + O(t^3) + \f] + In order to form an exact solution for testing, we choose \f$ V_p(x,t)=\mbox{constant} \f$ + but the solution method is not limited to this choice. + +*/ + +using namespace madness; + +static const double L = 20; // Half box size +static const long k = 8; // wavelet order +static const double thresh = 1e-3; // precision // w/o diff. and 1e-12 -> 64 x 64 +static const double c = 2.0; // +static const double tstep = 0.1; +static const double alpha = 1.9; // Exponent +static const double VVV = 0.2; // Vp constant value + +#define PI 3.1415926535897932385 +#define LO 0.0000000000 +#define HI 4.0000000000 + +static double sin_amp = 1.0; +static double cos_amp = 1.0; +static double sin_freq = 1.0; +static double cos_freq = 1.0; +static double sigma_x = 1.0; +static double sigma_y = 1.0; +static double sigma_z = 1.0; +static double center_x = 0.0; +static double center_y = 0.0; +static double center_z = 0.0; +static double gaussian_amp = 1.0; +static double sigma_sq_x = sigma_x*sigma_x; +static double sigma_sq_y = sigma_y*sigma_y; +static double sigma_sq_z = sigma_z*sigma_z; + +#define FUNC_SIZE 4 +#define FUNC_SIZE_M 4 + +double rtclock(); + +// Initial Gaussian with exponent alpha +static double random_function(const coord_3d& r) { + const double x=r[0], y=r[1], z=r[2]; + + const double dx = x - center_x; + const double dy = y - center_y; + const double dz = z - center_z; + + const double periodic_part = sin_amp * sin(sin_freq*(dx+dy+dz)) + + cos_amp * cos(cos_freq*(dx+dy+dz)); + + const double x_comp = dx*dx/sigma_sq_x; + const double y_comp = dy*dy/sigma_sq_y; + const double z_comp = dz*dz/sigma_sq_z; + + const double gaussian_part = -gaussian_amp/exp(sqrt(x_comp+y_comp+z_comp)); + + return gaussian_part*gaussian_part; +} + +static double get_rand() { + double r3 = LO + static_cast(rand())/(static_cast(RAND_MAX/(HI-LO))); + return r3; +} + +static void randomizer() +{ + sin_amp = get_rand(); + cos_amp = get_rand(); + sin_freq = get_rand(); + cos_freq = get_rand(); + sigma_x = get_rand(); + sigma_y = get_rand(); + sigma_z = get_rand(); + center_x = get_rand()*L/(2.0*HI); + center_y = get_rand()*L/(2.0*HI); + center_z = get_rand()*L/(2.0*HI); + gaussian_amp = get_rand(); + sigma_sq_x = sigma_x*sigma_x; + sigma_sq_y = sigma_y*sigma_y; + sigma_sq_z = sigma_z*sigma_z; +} + +class alpha_functor : public FunctionFunctorInterface { +private: + double coeff; +public: + alpha_functor(double coeff=1.0) : coeff(coeff) {} + + virtual double operator()(const coord_3d& r) const { + const double x=r[0], y=r[1], z=r[2]; + return (coeff * (x*x + y*y + z*z) * sin(x*x + y*y + z*z)); + } +}; +// Exact solution at time t +class uexact : public FunctionFunctorInterface { + double t; +public: + uexact(double t) : t(t) {} + + double operator()(const coord_3d& r) const { + const double x=r[0], y=r[1], z=r[2]; + double rsq = (x*x+y*y+z*z); + + return exp(VVV*t)*exp(-rsq*alpha/(1.0+4.0*alpha*t*c)) * pow(alpha/((1+4*alpha*t*c)*constants::pi),1.5); + } +}; + +// Functor to compute exp(f) where f is a madness function +template +struct unaryexp { + void operator()(const Key& key, Tensor& t) const { + UNARY_OPTIMIZED_ITERATOR(T, t, *_p0 = exp(*_p0);); + } + template + void serialize(Archive& ar) {} +}; + + +typedef Function functionT; +typedef std::vector vecfuncT; + +int main(int argc, char** argv) +{ + initialize(argc, argv); + World world(SafeMPI::COMM_WORLD); + + startup(world, argc, argv); + + FunctionDefaults<3>::set_k(k); + FunctionDefaults<3>::set_thresh(thresh); + FunctionDefaults<3>::set_refine(true); + FunctionDefaults<3>::set_autorefine(false); + FunctionDefaults<3>::set_cubic_cell(-L, L); + + FunctionDefaults<3>::set_max_refine_level(14); + //FunctionDefaults<3>::set_max_refine_level(8); + + if (world.rank() == 0) print ("===================================================="); + if (world.rank() == 0) printf(" Initializing Functions\n"); + if (world.rank() == 0) printf(" %d Functions, %d Functions\n", FUNC_SIZE, FUNC_SIZE_M); + if (world.rank() == 0) print ("===================================================="); + world.gop.fence(); + + // M and N functions (i.e., 32 and 32 functions.) + real_function_3d h[FUNC_SIZE]; + real_function_3d g[FUNC_SIZE_M]; + + // M*N functions (i.e., 1024 functions.) + real_function_3d output[FUNC_SIZE*FUNC_SIZE_M]; + + // M*N output functions for compress operator + real_function_3d comp_factory_h[FUNC_SIZE*FUNC_SIZE_M/2]; + real_function_3d* comp_h[FUNC_SIZE*FUNC_SIZE_M/2]; + + real_function_3d comp_factory_g[FUNC_SIZE*FUNC_SIZE_M/2]; + real_function_3d* comp_g[FUNC_SIZE*FUNC_SIZE_M/2]; + + // Matrix_inner + real_function_3d result_factory = real_factory_3d(world); + real_function_3d result(result_factory); + + int i, j; + double clkbegin, clkend; + clkbegin = rtclock(); + + for (i=0; i compress_op_1("Compress",comp_h[0],&output[0]); + CompressOp compress_op_2("Compress",comp_h[1],&output[1]); + CompressOp compress_op_3("Compress",comp_h[2],&output[2]); + CompressOp compress_op_4("Compress",comp_h[3],&output[3]); + CompressOp compress_op_5("Compress",comp_h[4],&output[4]); + CompressOp compress_op_6("Compress",comp_h[5],&output[5]); + CompressOp compress_op_7("Compress",comp_h[6],&output[6]); + CompressOp compress_op_8("Compress",comp_h[7],&output[7]); + CompressOp compress_op_9("Compress",comp_h[8],&output[8]); + CompressOp compress_op_10("Compress",comp_h[9],&output[9]); + + + + + + // Creating Compress Operators + CompressOp* compress_op_h[FUNC_SIZE*FUNC_SIZE_M/2]; + CompressOp* compress_op_g[FUNC_SIZE*FUNC_SIZE_M/2]; + + for (i=0; i("Compress",comp_h[i],&output[i]); + compress_op_g[i] = new CompressOp("Compress",comp_g[i],&output[i+(FUNC_SIZE*FUNC_SIZE_M/2)]); + } + + // Creating Matrix-Inner Operator + vecfuncT fs; + vecfuncT gs; + + for (i=0; i* matrix_inner_op = new MatrixInnerOp("MatrixInner", &result, fs, gs, false); + + // FuseT + vector*> sequence; + vector*> sequence1; + + for (i=0; i odag(sequence); + odag.processSequence(); + + if(world.rank() == 0){ + odag.printOpsAndTrees(); + odag.printValidSequences(); + } + + FusedOpSequence fsequence = odag.getFusedOpSequence(); + FusedExecutor fexecutor(world, &fsequence); + fexecutor.execute(); + + + // OpExecutor + //OpExecutor exe(world); + //exe.execute(matrix_inner_op, false); + + clkend = rtclock() - clkbegin; + if (world.rank() == 0) printf("Running Time: %f\n", clkend); + world.gop.fence(); + + finalize(); + exit(0); +#ifdef DEBUG_OUTPUT + if (world.rank() == 0) + for (i=0; i_r)(i, j)); + } + world.gop.fence(); +#endif +// +// +// +/* if (world.rank() == 0) print ("===================================================="); + if (world.rank() == 0) print ("== MADNESS ============"); + if (world.rank() == 0) print ("===================================================="); + world.gop.fence(); + + clkbegin = rtclock(); + + // Compress Operations -- M*N + for (i=0; i ghaly = matrix_inner(world, v_f, v_g); + //Tensor ghaly = matrix_inner_old(world, v_f, v_g); + + if (world.rank() == 0) + { + clkend = rtclock() - clkbegin; + printf("Running Time: %f\n", clkend); + } + world.gop.fence(); + +#ifdef DEBUG_OUTPUT + if (world.rank() == 0) + for (i=0; i +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +/*! + \file heat2.cc + \brief Example Green function for the 3D heat equation with a linear term + \defgroup heatex2 Evolve in time 3D heat equation with a linear term + \ingroup examples + + The source is here. + + \par Points of interest + - application of a function of a function to exponentiate the potential + - use of a functor to compute the solution at an arbitrary future time + - convolution with the Green's function + + + \par Background + + This adds to the complexity of the other \ref exampleheat "heat equation example" + by including a linear term. Specifically, we solve + \f[ + \frac{\partial u(x,t)}{\partial t} = c \nabla^2 u(x,t) + V_p(x,t) u(x,t) + \f] + If \f$ V_p = 0 \f$ time evolution operator is + \f[ + G_0(x,t) = \frac{1}{\sqrt{4 \pi c t}} \exp \frac{-x^2}{4 c t} + \f] + For non-zero \f$ V_p \f$ the time evolution is performed using the Trotter splitting + \f[ + G(x,t) = G_0(x,t/2) * \exp(V_p t) * G_0(x,t/2) + O(t^3) + \f] + In order to form an exact solution for testing, we choose \f$ V_p(x,t)=\mbox{constant} \f$ + but the solution method is not limited to this choice. + +*/ + +using namespace madness; + +static const double L = 20; // Half box size +static const long k = 8; // wavelet order +static const double thresh = 1e-6; // precision // w/o diff. and 1e-12 -> 64 x 64 +static const double c = 2.0; // +static const double tstep = 0.1; +static const double alpha = 1.9; // Exponent +static const double VVV = 0.2; // Vp constant value + +#define PI 3.1415926535897932385 +#define LO 0.0000000000 +#define HI 4.0000000000 + +static double sin_amp = 1.0; +static double cos_amp = 1.0; +static double sin_freq = 1.0; +static double cos_freq = 1.0; +static double sigma_x = 1.0; +static double sigma_y = 1.0; +static double sigma_z = 1.0; +static double center_x = 0.0; +static double center_y = 0.0; +static double center_z = 0.0; +static double gaussian_amp = 1.0; +static double sigma_sq_x = sigma_x*sigma_x; +static double sigma_sq_y = sigma_y*sigma_y; +static double sigma_sq_z = sigma_z*sigma_z; + +#define FUNC_SIZE 2 +#define FUNC_SIZE_M 2 + +double rtclock(); + +// Initial Gaussian with exponent alpha +static double random_function(const coord_3d& r) { + const double x=r[0], y=r[1], z=r[2]; + + const double dx = x - center_x; + const double dy = y - center_y; + const double dz = z - center_z; + + const double periodic_part = sin_amp * sin(sin_freq*(dx+dy+dz)) + + cos_amp * cos(cos_freq*(dx+dy+dz)); + + const double x_comp = dx*dx/sigma_sq_x; + const double y_comp = dy*dy/sigma_sq_y; + const double z_comp = dz*dz/sigma_sq_z; + + const double gaussian_part = -gaussian_amp/exp(sqrt(x_comp+y_comp+z_comp)); + + return gaussian_part*gaussian_part; +} + +static double get_rand() { + double r3 = LO + static_cast(rand())/(static_cast(RAND_MAX/(HI-LO))); + return r3; +} + +static void randomizer() +{ + sin_amp = get_rand(); + cos_amp = get_rand(); + sin_freq = get_rand(); + cos_freq = get_rand(); + sigma_x = get_rand(); + sigma_y = get_rand(); + sigma_z = get_rand(); + center_x = get_rand()*L/(2.0*HI); + center_y = get_rand()*L/(2.0*HI); + center_z = get_rand()*L/(2.0*HI); + gaussian_amp = get_rand(); + sigma_sq_x = sigma_x*sigma_x; + sigma_sq_y = sigma_y*sigma_y; + sigma_sq_z = sigma_z*sigma_z; +} + +class alpha_functor : public FunctionFunctorInterface { +private: + double coeff; +public: + alpha_functor(double coeff=1.0) : coeff(coeff) {} + + virtual double operator()(const coord_3d& r) const { + const double x=r[0], y=r[1], z=r[2]; + return (coeff * (x*x + y*y + z*z) * sin(x*x + y*y + z*z)); + } +}; +// Exact solution at time t +class uexact : public FunctionFunctorInterface { + double t; +public: + uexact(double t) : t(t) {} + + double operator()(const coord_3d& r) const { + const double x=r[0], y=r[1], z=r[2]; + double rsq = (x*x+y*y+z*z); + + return exp(VVV*t)*exp(-rsq*alpha/(1.0+4.0*alpha*t*c)) * pow(alpha/((1+4*alpha*t*c)*constants::pi),1.5); + } +}; + +// Functor to compute exp(f) where f is a madness function +template +struct unaryexp { + void operator()(const Key& key, Tensor& t) const { + UNARY_OPTIMIZED_ITERATOR(T, t, *_p0 = exp(*_p0);); + } + template + void serialize(Archive& ar) {} +}; + + +typedef Function functionT; +typedef std::vector vecfuncT; + +int main(int argc, char** argv) +{ + initialize(argc, argv); + World world(SafeMPI::COMM_WORLD); + + startup(world, argc, argv); + + FunctionDefaults<3>::set_k(k); + FunctionDefaults<3>::set_thresh(thresh); + FunctionDefaults<3>::set_refine(true); + FunctionDefaults<3>::set_autorefine(false); + FunctionDefaults<3>::set_cubic_cell(-L, L); + + FunctionDefaults<3>::set_max_refine_level(14); + //FunctionDefaults<3>::set_max_refine_level(8); + + if (world.rank() == 0) print ("===================================================="); + if (world.rank() == 0) printf(" Initializing Functions\n"); + if (world.rank() == 0) printf(" %d Functions, %d Functions\n", FUNC_SIZE, FUNC_SIZE_M); + if (world.rank() == 0) print ("===================================================="); + world.gop.fence(); + + // M and N functions (i.e., 32 and 32 functions.) + real_function_3d h[FUNC_SIZE]; + real_function_3d g[FUNC_SIZE_M]; + + // M*N functions (i.e., 1024 functions.) + real_function_3d output[FUNC_SIZE*FUNC_SIZE_M]; + real_function_3d output2[FUNC_SIZE*FUNC_SIZE_M]; + + // M*N output functions for compress operator + real_function_3d comp_factory_h[FUNC_SIZE*FUNC_SIZE_M/2]; + real_function_3d comp_h[FUNC_SIZE*FUNC_SIZE_M/2]; + + real_function_3d comp_factory_g[FUNC_SIZE*FUNC_SIZE_M/2]; + real_function_3d comp_g[FUNC_SIZE*FUNC_SIZE_M/2]; + + // Matrix_inner + real_function_3d result_factory = real_factory_3d(world); + real_function_3d result(result_factory); + + int i, j; + double clkbegin, clkend; + clkbegin = rtclock(); + + for (i=0; i* compress_op_h[FUNC_SIZE*FUNC_SIZE_M/2]; + CompressOp* compress_op_g[FUNC_SIZE*FUNC_SIZE_M/2]; + + for (i=0; i("Compress",&comp_h[i],&output[i]); + compress_op_g[i] = new CompressOp("Compress",&comp_g[i],&output[i+(FUNC_SIZE*FUNC_SIZE_M/2)]); + } + + // OpExecutor +// OpExecutor exe(world); + + + if (world.rank() == 0) print ("after"); + for (i=0; i*> sequence; + + for (i=0; i* matrix_inner_op = new MatrixInnerOp("MatrixInner", &result, fs, gs, false); + + sequence.push_back(matrix_inner_op); + + FuseT odag(sequence); + odag.processSequence(); + + if (world.rank() == 0) + { + odag.printOpsAndTrees(); + odag.printValidSequences(); + } + + FusedOpSequence fsequence = odag.getFusedOpSequence(); + FusedExecutor fexecutor(world, &fsequence); + fexecutor.execute(); + + + clkend = rtclock() - clkbegin; + if (world.rank() == 0) printf("Running Time: %f\n", clkend); + world.gop.fence(); + +#ifdef DEBUG_OUTPUT + if (world.rank() == 0) + for (i=0; i_r)(i, j)); + } + world.gop.fence(); +#endif +// +// +// + if (world.rank() == 0) print ("===================================================="); + if (world.rank() == 0) print ("== MADNESS ============"); + if (world.rank() == 0) print ("===================================================="); + world.gop.fence(); + + clkbegin = rtclock(); + + // Compress Operations -- M*N + for (i=0; i ghaly = matrix_inner(world, v_f, v_g); + //Tensor ghaly = matrix_inner_old(world, v_f, v_g); + + if (world.rank() == 0) + { + clkend = rtclock() - clkbegin; + printf("Running Time: %f\n", clkend); + } + world.gop.fence(); + +#ifdef DEBUG_OUTPUT + if (world.rank() == 0) + for (i=0; i +#include +#include +#include +#include +#include +#include +#include + +using namespace madness; + +static const double L = 20; // Half box size +static const long k = 8; // wavelet order +static const double thresh = 1e-6; // precision // w/o diff. and 1e-12 -> 64 x 64 +static const double c = 2.0; // +static const double tstep = 0.1; +static const double alpha = 1.9; // Exponent +static const double VVV = 0.2; // Vp constant value + +#define FUNC_SIZE 64 +#define FUNC_SIZE_M 64 +#define MIN_NODES 16 +#define SCALE MIN_NODES/4 + +double rtclock(); + +// Initial Gaussian with exponent alpha +static double uinitial(const coord_3d& r) { + const double x=r[0], y=r[1], z=r[2]; + return exp(-alpha*(2*x*3.2*x+y*y+1.7*z*z))*pow(constants::pi/alpha,-1.5); +} + +static double uinitial2(const coord_3d& r) { + const double x=r[0], y=r[1], z=r[2]; + srand(time(0)); + return exp(-alpha*(5*x*x+y*y+z*z))*pow(constants::pi/alpha,-1.5); +} +static double uinitial1(const coord_3d& r) { + const double x=r[0], y=r[1], z=r[2]; + return exp(-alpha*(2*x*x+1.4*y*y+z*z))*pow(constants::pi/alpha,-1.5); +}; + +static double ghaly(const coord_3d& r) { + std::srand(time(NULL)); + const double randVal = std::rand()/1000000000.0; + const double x=r[0], y=r[1], z=r[2]; + return 3.0*exp(-2.0*sqrt(x*x + randVal*randVal + y*y + z*z + 1e-4)); +} +static double Vp(const coord_3d& r) { + return VVV; +} + +class alpha_functor : public FunctionFunctorInterface { +private: + double coeff; +public: + alpha_functor(double coeff=1.0) : coeff(coeff) {} + + virtual double operator()(const coord_3d& r) const { + const double x=r[0], y=r[1], z=r[2]; + return (coeff * (x*x + y*y + z*z) * sin(x*x + y*y + z*z)); + } +}; +// Exact solution at time t +class uexact : public FunctionFunctorInterface { + double t; +public: + uexact(double t) : t(t) {} + + double operator()(const coord_3d& r) const { + const double x=r[0], y=r[1], z=r[2]; + double rsq = (x*x+y*y+z*z); + + return exp(VVV*t)*exp(-rsq*alpha/(1.0+4.0*alpha*t*c)) * pow(alpha/((1+4*alpha*t*c)*constants::pi),1.5); + } +}; + + +// Functor to compute exp(f) where f is a madness function +template +struct unaryexp { + void operator()(const Key& key, Tensor& t) const { + UNARY_OPTIMIZED_ITERATOR(T, t, *_p0 = exp(*_p0);); + } + template + void serialize(Archive& ar) {} +}; + +int main(int argc, char** argv) +{ + initialize(argc, argv); + World world(SafeMPI::COMM_WORLD); + + startup(world, argc, argv); + + FunctionDefaults<3>::set_k(k); + FunctionDefaults<3>::set_thresh(thresh); + FunctionDefaults<3>::set_refine(true); + FunctionDefaults<3>::set_autorefine(false); + FunctionDefaults<3>::set_cubic_cell(-L, L); + FunctionDefaults<3>::set_max_refine_level(14); + + if (world.rank() == 0) print ("===================================================="); + if (world.rank() == 0) printf(" Initializing Functions\n"); + if (world.rank() == 0) printf(" %d Functions, %d Functions\n", FUNC_SIZE, FUNC_SIZE_M); + if (world.rank() == 0) printf(" Max-refine-level: %d Functions\n", 14); + if (world.rank() == 0) print ("===================================================="); + world.gop.fence(); + + // N and M Functions + real_function_3d h[FUNC_SIZE]; + real_function_3d g[FUNC_SIZE_M]; + + // N and M (Result) Functions + real_function_3d temp_factory_h[FUNC_SIZE]; + real_function_3d temp_factory_g[FUNC_SIZE_M]; + real_function_3d* temp_h[FUNC_SIZE]; + real_function_3d* temp_g[FUNC_SIZE_M]; + + // (SCALE) * N * M Results Functions by Inner-Product + real_function_3d temp_factory[SCALE][FUNC_SIZE][FUNC_SIZE_M]; + real_function_3d* temp[SCALE][FUNC_SIZE][FUNC_SIZE_M]; + + int i, j, k; + double clkbegin, clkend; + clkbegin = rtclock(); + + for (i=0; i* c_op_h[FUNC_SIZE]; + CompressOp* c_op_g[FUNC_SIZE_M]; + InnerOp* inner_op_ug[SCALE][FUNC_SIZE][FUNC_SIZE_M]; + vector*> sequence; + + for (i=0; i("Compress",temp_h[i],&h[i]); + for (j=0; j("Compress",temp_g[j],&g[j]); + + for (k=0; k("Inner",temp[k][i][j],temp_h[i],temp_g[j]); + + for (i=0; i odag(sequence); + odag.processSequence(); + + FusedOpSequence fsequence = odag.getFusedOpSequence(); + FusedExecutor fexecuter(world, &fsequence); + fexecuter.execute(); + + clkend = rtclock() - clkbegin; + if (world.rank() == 0) printf("[the fused version by FuseT] Running Time: %f\n", clkend); + world.gop.fence(); + + for (k=0; k_sum); +// +// +// + if (world.rank() == 0) print ("===================================================="); + if (world.rank() == 0) print ("== MADNESS =============================="); + if (world.rank() == 0) print ("===================================================="); + world.gop.fence(); + + clkbegin = rtclock(); + double resultInner[SCALE][FUNC_SIZE][FUNC_SIZE_M] = {0.0,}; + + for (i=0; i + class MatrixInnerOp : public PrimitiveOp + { + typedef Function KTREE; + typedef FunctionNode KNODE; + typedef Key keyT; + typedef WorldContainer,FunctionNode> dcT; + typedef WorldObject> woT; + typedef GenTensor coeffT; + typedef Tensor tensorT; + + public: + MatrixInnerOp (string opName, KTREE* output, const std::vector& f, const std::vector& g, bool sym); + FuseTContainer compute (const keyT& key, const FuseTContainer &s); + + bool notEmpty(map& notEmptyMap) const + { + unsigned long treeID = _i1->get_impl()->id().get_obj_id(); + return notEmptyMap[treeID]; + } + + bool isDone (const keyT& key) const; + bool isPre () const { return true; } // false does not work. but It should be false. + bool needsParameter () const { return true; } + void reduce (World& world); + + public: + // MatrixInnerOpp + Tensor* _r; + + private: + //!Points to operand trees + const KTREE* _i1; + + //!Points to operand nodes of the tree + KNODE *_t1, *_t2; + + //!Variables for MatrixInnerOp + std::vector _left_v_coeffs; + std::vector _right_v_coeffs; + //dcT& _coeffs; + //dcT& _coeffs_target; + + bool _sym; + std::vector* > _left; + std::vector* > _right; + + std::map checkKeyDoneLeft; + std::map checkKeyDoneRight; + + std::map candidatesLeft; + std::map candidatesRight; + + int _k; // Wavelet order + }; + + // Constructor + // World is needed for communication in the "compute" function + template + MatrixInnerOp::MatrixInnerOp(string opName, KTREE* output, const std::vector& f, const std::vector& g, bool sym) + : PrimitiveOp(opName, output, false, true) + , _sym(sym) + { + this->_r = new Tensor(f.size(), g.size()); + + for (unsigned int i=0; i_r)(i,j) = 0.0; + + for (unsigned int i=0; iget_coeffs() ); + for (unsigned int j=0; jget_coeffs() ); + + // dependnecy Info PSI, ALPHA, DELTA,SIGMA, ID + this->_OpID = output->get_impl()->id().get_obj_id(); + + for (unsigned int i=0; i_dInfoVec.push_back(DependencyInfo(&f[i], true, true, false, false)); + + for (unsigned int i=0; i_dInfoVec.push_back(DependencyInfo(&g[i], true, true, false, false)); + + this->_dInfoVec.push_back(DependencyInfo(output,true,true,false,false)); + + woT(f[0].world()); + } + + // + // it should hangle both a parent and a leaf node. + // + template + FuseTContainer + MatrixInnerOp::compute(const keyT& key, const FuseTContainer &s) + { + FuseT_VParameter* inheritedWhole; + FuseT_VType* inheritedLeft; + FuseT_VType* inheritedRight; + + // Processing for Paramter + if (s.get() == 0) + { + inheritedLeft = new FuseT_VType; + inheritedRight = new FuseT_VType; + + for (unsigned int i=0; i<_left.size(); i++) + inheritedLeft->value.push_back(i); + for (unsigned int i=0; i<_right.size(); i++) + inheritedRight->value.push_back(i); + } + else + { + inheritedWhole = new FuseT_VParameter( ((FuseT_VParameter*)s.get())->value ); + + inheritedLeft = new FuseT_VType(((FuseT_VType*)(((inheritedWhole->value[0]).get())))->value); + inheritedRight = new FuseT_VType(((FuseT_VType*)(((inheritedWhole->value[1]).get())))->value); + } + + // Main --- O(M*N) + int indexLeft; + int indexRight; + FuseT_VType whichNodesLeft; // value = std::vector + FuseT_VType whichNodesRight; + + double* A = (double*)malloc(sizeof(double)*16*16*16); + double* B = (double*)malloc(sizeof(double)*16*16*16); + double C = 0.0; + int l, m; + + for (unsigned int i=0; ivalue.size(); i++) + { + indexLeft = inheritedLeft->value[i]; + const KNODE& fnode = _left_v_coeffs[indexLeft].find(key).get()->second; + if (_left_v_coeffs[indexLeft].find(key).get()->second.has_children()) + whichNodesLeft.value.push_back(indexLeft); + + if (fnode.has_coeff()) + { + for (unsigned int j=0; jvalue.size(); j++) + { + indexRight = inheritedRight->value[j]; + + const KNODE& gnode = _right_v_coeffs[indexRight].find(key).get()->second; + if (i==0) + { + if (_right_v_coeffs[indexRight].find(key).get()->second.has_children()) + whichNodesRight.value.push_back(indexRight); + } + + // the actuall computation + if (gnode.has_coeff()) + { + int k; + + for (l=0; l<16; l++){ + for (m=0; m<16; m++) { + for (k=0; k<16; k++){ + A[l*16*16 + m*16 + k] = (fnode.coeff())(l, m, k); + B[l*16*16 + m*16 + k] = (gnode.coeff())(l, m, k); + } + } + } + C = 0.0; + cblas::gemm(cblas::CBLAS_TRANSPOSE::Trans, cblas::CBLAS_TRANSPOSE::NoTrans, 1, 1, 16*16*16, 1, A, 16*16*16, B, 16*16*16, 1, &C, 1); + + (*this->_r)(indexLeft, indexRight) += C; + // (*this->_r)(indexLeft, indexRight) += fnode.coeff().trace_conj(gnode.coeff()); + + } + } + } + else + { + if (i==0) + { + for (unsigned int j=0; jvalue.size(); j++) + { + indexRight = inheritedRight->value[j]; + if (_right_v_coeffs[indexRight].find(key).get()->second.has_children()) + whichNodesRight.value.push_back(indexRight); + } + } + } + } + + delete A; + delete B; + + if (whichNodesLeft.value.size() == 0) + checkKeyDoneLeft.insert(std::pair(key,true)); + else + checkKeyDoneLeft.insert(std::pair(key,false)); + + if (whichNodesRight.value.size() == 0) + checkKeyDoneRight.insert(std::pair(key,true)); + else + checkKeyDoneRight.insert(std::pair(key,false)); + + + // + FuseT_VParameter v_parameter; + FuseT_VParameter inner_parameter; + + FuseTContainer candiParameter_L(static_cast*> (new FuseT_VType(whichNodesLeft.value))); + FuseTContainer candiParameter_R(static_cast*> (new FuseT_VType(whichNodesRight.value))); + inner_parameter.value.push_back(candiParameter_L); + inner_parameter.value.push_back(candiParameter_R); + + for (KeyChildIterator kit(key); kit; ++kit) + { + FuseTContainer wrapper(static_cast*>(new FuseT_VParameter(inner_parameter.value))); + v_parameter.value.push_back(wrapper); + } + + // Return Parameters + FuseTContainer targets(static_cast*>(new FuseT_VParameter(v_parameter.value))); + return targets; + } + + // isDone + template + bool + MatrixInnerOp::isDone(const keyT& key) const + { + bool isE1; + bool isE2; + + // O(M + N) + for (unsigned int i=0; i<_left.size(); i++) + { + isE1 = _left[i]->get_coeffs().probe(key) || isE1; + } + if (!isE1) { std::cout<get_coeffs().probe(key) || isE2; + } + if (!isE2) { std::cout<second) return true; + if (checkKeyDoneRight.find(key)->second) return true; + + return false; + } + + template + void + MatrixInnerOp::reduce(World& world){ + world.gop.sum(_r->ptr(),_left.size()*_right.size()); + } + +}; /*fuset*/ + +#endif /* __fuset_MatrixInnerOp_h__ */ diff --git a/src/madness/mra/FuseT/AddOp.h b/src/madness/mra/FuseT/AddOp.h new file mode 100644 index 00000000000..1c7dce3fc03 --- /dev/null +++ b/src/madness/mra/FuseT/AddOp.h @@ -0,0 +1,175 @@ +// +// Ghaly +// +// Compresses the function, transforming into wavelet basis. +// Possible non-blocking comm. +// +// By default fence=true meaning that this oepration completes before returning, +// othewise if fence=false it returns without fencing and the user must invoke +// workd.gop.fence() to assure global completion before using the function +// for other purposes. +// +// Noop if already compressed or if not initialized. +// +// Since reconstruction/compression do not discard information we define them +// as const ... "logical constness" not "bitwise contness". +// +#ifndef __MADNESS_MRA_FUSET_ADD_OP__INCLUDED__ +#define __MADNESS_MRA_FUSET_ADD_OP__INCLUDED__ + +#include "PrimitiveOp.h" +#include "../mra.h" +#include "../function_common_data.h" +#include "../../world/MADworld.h" +#include "../../tensor/tensor.h" +#include "../../tensor/gentensor.h" + +namespace madness +{ + template + class AddOp : public PrimitiveOp + { + typedef Function KTREE; + typedef FunctionNode KNODE; // identical to nodeT + typedef FunctionImpl implT; + typedef Key keyT; + typedef WorldContainer,FunctionNode> dcT; + typedef WorldObject> woT; ///< Base class world object type + typedef GenTensor coeffT; //Type of tensor used to hold coeffs + typedef Tensor tensorT; + + public: + AddOp(string opName, KTREE* output, const KTREE* i1, const KTREE*i2); + FuseTContainer compute(const keyT& key, const FuseTContainer &s); + + bool notEmpty(map& notEmptyMap) const{ + unsigned long treeID = _i1->get_impl()->id().get_obj_id(); + unsigned long treeID2 = _i2->get_impl()->id().get_obj_id(); + return notEmptyMap[treeID2] && notEmptyMap[treeID]; + } + + // + bool isDone(const keyT& key) const; + bool isPre() const { return true; } + bool needsParameter() const { return false; } + void reduce(World& world) { } + public: // for Add Product (specific) + //void do_add() { } + + private: + //!Points to operand trees + const KTREE* _i1, *_i2, *_result; + + //!Points to operand nodes of the tree + KNODE *_t1, *_t2; + + //!Variables for CompressOp + dcT& _coeffs_left; + dcT& _coeffs_right; + dcT& _coeffs_target; + const FunctionCommonData& _cdata; + TensorArgs _targs; + + // Wavelet order + int _k; + }; + + + // Constructor + // World is needed for communication in the "compute" function + template + AddOp::AddOp(string opName, KTREE* output, const KTREE* i1, const KTREE* i2) + : PrimitiveOp(opName, output, false,true) + , _i1(i1) + , _i2(i2) + , _result(output) + , _cdata(FunctionCommonData::get(i1->k())) + , _targs(i1->get_impl()->get_tensor_args()) + , _coeffs_left(i1->get_impl()->get_coeffs()) + , _coeffs_right(i2->get_impl()->get_coeffs()) + , _coeffs_target(output->get_impl()->get_coeffs()) + , _k(i1->get_impl()->get_k()) + { + //dependnecy Info PSI, ALPHA, DELTA,SIGMA, ID + this->_OpID = output->get_impl()->id().get_obj_id(); + this->_dInfoVec.push_back(DependencyInfo(i1,true,false,false,false)); + this->_dInfoVec.push_back(DependencyInfo(i2,true,false,false,false)); + this->_dInfoVec.push_back(DependencyInfo(output,true,false,false,false)); + + woT(i1->world()); + } + + // + template + FuseTContainer + AddOp::compute(const keyT& key, const FuseTContainer &s) + { + // other is node + KNODE* target_node; + TensorArgs targs2 = this->_targs; + bool isLeft = _coeffs_left.probe(key); + bool isRight = _coeffs_right.probe(key); + T alpha = 1.0; + T beta = 1.0; + coeffT target_coeff = coeffT(); + + if (isLeft == true) + { // Left has a node with the key + const KNODE& node_left = _coeffs_left.find(key).get()->second; + coeffT coeff_left = node_left.coeff().full_tensor_copy(); + //target_coeff.copy(coeff_left); + + if (isRight == true) + { // Right also has a node with the key + const KNODE& node_right = _coeffs_right.find(key).get()->second; + coeffT coeff_right = node_right.coeff().full_tensor_copy(); + target_coeff = coeff_left + coeff_right; // can I use "+" operator directly?? + target_node = new KNODE(target_coeff, true); + } + else + { // Only Left has a node with the key + target_node = new KNODE(coeff_left, true); + } + } + else + { + if (isRight == true) + { + // only Right has a node with the key + const KNODE& node_right = _coeffs_right.find(key).get()->second; + coeffT coeff_right = node_right.coeff().full_tensor_copy(); + target_node = new KNODE(coeff_right, true); + } + else + { + std::cout<<"ERROR!!!! should not be happend"<_result->get_impl()->get_coeffs().replace(key, *target_node); + + FuseTContainer result; + return result; + } + + // isDone + template + bool + AddOp::isDone(const keyT& key) const + { + bool isE1 = _i1->get_impl()->get_coeffs().probe(key); + if(!isE1) return isE1; + + bool isE2 = _i2->get_impl()->get_coeffs().probe(key); + if(!isE2) return isE2; + + bool isLeaf = !_i1->get_impl()->get_coeffs().find(key).get()->second.has_children(); + if (isLeaf) return isLeaf; + + bool isLeaf2 = !_i2->get_impl()->get_coeffs().find(key).get()->second.has_children(); + return isLeaf2; + } + +}; /*fuset*/ + +#endif /* __fuset_AddOp_h__ */ diff --git a/src/madness/mra/FuseT/AddOp_prev.h b/src/madness/mra/FuseT/AddOp_prev.h new file mode 100644 index 00000000000..c3fecc4eae0 --- /dev/null +++ b/src/madness/mra/FuseT/AddOp_prev.h @@ -0,0 +1,90 @@ +#ifndef __fuset_AddOp_h__ +#define __fuset_AddOp_h__ + +#include "PrimitiveOp.h" + +namespace fuset { + + template + class AddOp : public PrimitiveOp{ + typedef KaryTree KTREE; + typedef Node KNODE; + + public: + /*!Adds trees i1 and i2 and creates result tree*/ + AddOp(string name, KTREE *result, const KTREE *i1, const KTREE *i2); + + /*!Adds two nodes from i1 and i2. + If one exists and another is empty, the it just replaces the values at the result node with data from the node that exists*/ + void compute(int n, TRANS l); + + /*!Checks to see if the add operation needs to recurse further down*/ + bool isDone(int n, TRANS l) const; + + bool isPre() const { return true; } + + bool notEmpty(map &emptyMap) const{ + bool retValue = (emptyMap[_i1->_treeID] || emptyMap[_i2->_treeID]); + //cout<<"Add Not Empty : "< + AddOp::AddOp(string name, KTREE *output, const KTREE *i1, const KTREE *i2) + : PrimitiveOp(name, output,false), + _i1(i1), + _i2(i2) + { + //dependnecy Info PSI, ALPHA, DELTA,SIGMA, ID + this->_OpID = output->_treeID; + this->_dInfoVec.push_back(DependencyInfo(i1,true,false,false,false)); + this->_dInfoVec.push_back(DependencyInfo(i2,true,false,false,false)); + } + + template + void + AddOp::compute(int n, TRANS l) { + // std::cout<<"Entering n : "<getNode(n,l,isE1); + _t2 = _i2->getNode(n,l,isE2); + // std::cout<<"Before if n : "<_result->createNode(n,l); + t3->setZero(); + if(!isE1) + for(int i =0; i< _t1->getLength(); i++) + t3->getData()[i] += _t1->getData()[i]; + // std::cout<<"Before isE2 n : "<getLength(); i++) + t3->getData()[i] += _t2->getData()[i]; + // std::cout<<"Before isDone n : "<setType(LEAF); + // std::cout<<"After isDone n : "<deleteEmptyNode(); + _t2->deleteEmptyNode(); + } + + template + bool + AddOp::isDone(int n, TRANS l) const { + if(_t1->getType() !=INTERIOR + && _t2->getType() != INTERIOR) + return true; + return false; + } + +}; /*fuset*/ + +#endif /* __fuset_AddOp_h__ */ diff --git a/src/madness/mra/FuseT/CcOp.h b/src/madness/mra/FuseT/CcOp.h new file mode 100644 index 00000000000..fa6ceea5f78 --- /dev/null +++ b/src/madness/mra/FuseT/CcOp.h @@ -0,0 +1,105 @@ +#ifndef __fuset_CcOp_h__ +#define __fuset_CcOp_h__ + +#include "PrimitiveOp.h" + +namespace fuset { + + template + class CcOp : public PrimitiveOp { + typedef KaryTree KTREE; + typedef Node KNODE; + private: + //!Points to operand trees + const KTREE *_i1; + + int _numChild; + //!Points to operand nodes of the tree + KNODE *_t1, *_t2; + + public: + CcOp(string name, KTREE *output, const KTREE *i1); + + void compute(int n, TRANS l); + + bool isDone(int n, TRANS l) const; + + bool isPre() const { return false; } + + bool notEmpty(map& emptyMap) const{ + //cout<<"Not Empty i1 : "<_treeID]<_treeID]; + } + + + }; + + template + CcOp::CcOp(string name, KTREE *output, const KTREE *i1) + : PrimitiveOp(name, output,false), + _i1(i1), + _numChild(output->getNumChild()) + { + //dependnecy Info PSI, ALPHA, DELTA,SIGMA, ID + this->_OpID = output->_treeID; + this->_dInfoVec.push_back(DependencyInfo(i1,true,false,false,false)); + this->_dInfoVec.push_back(DependencyInfo(output,false,false,true,false)); + + + } + + template + void CcOp::compute(int n, TRANS l) { + bool isE1; + _t1 = _i1->getNode(n,l,isE1); + KNODE* rNode = NULL; + + if(!isE1){ + rNode = this->_result->createNode(n,l); + rNode->setZero(); + + for(int i =0; i< _t1->getLength(); i++) + rNode->getData()[i] += _t1->getData()[i]; + } else { + std::cerr<<"Not supposed to be empty "<_result->childTranslation(l,childID); + + KNODE* temp = this->_result->getNode(n_child,l_child,isE1); + + if(isE1){ + std::cerr<<"Not supposed to be empty"<getLength(); i++) + rNode->getData()[i] += temp->getData()[i]; + } + + }else{ + rNode->setType(LEAF); + } + } + + template + bool CcOp::isDone(int n, TRANS l) const { + //cout<<"Is done "<* temp = _i1->getNode(n,l,isE1); + if(temp->getType() == LEAF) + return true; + return false; + } + +}; /*fuset*/ + +#endif /* __fuset_CcOp_h__ */ + diff --git a/src/madness/mra/FuseT/CompressOp.h b/src/madness/mra/FuseT/CompressOp.h new file mode 100644 index 00000000000..76c6122254e --- /dev/null +++ b/src/madness/mra/FuseT/CompressOp.h @@ -0,0 +1,268 @@ +// +// Ghaly +// +// Compresses the function, transforming into wavelet basis. +// Possible non-blocking comm. +// +// By default fence=true meaning that this oepration completes before returning, +// othewise if fence=false it returns without fencing and the user must invoke +// workd.gop.fence() to assure global completion before using the function +// for other purposes. +// +// Noop if already compressed or if not initialized. +// +// Since reconstruction/compression do not discard information we define them +// as const ... "logical constness" not "bitwise contness". +// +#ifndef __MADNESS_MRA_FUSET_COMPRESS_OP__INCLUDED__ +#define __MADNESS_MRA_FUSET_COMPRESS_OP__INCLUDED__ + +#include "PrimitiveOp.h" +#include "FuseTContainer.h" +#include "../mra.h" +#include "../function_common_data.h" +#include "../../world/MADworld.h" +#include "../../tensor/tensor.h" +#include "../../tensor/gentensor.h" + +namespace madness +{ + template + class CompressOp : public PrimitiveOp + { + typedef Function KTREE; + typedef FunctionNode KNODE; + typedef Key keyT; + typedef WorldContainer,FunctionNode> dcT; + typedef WorldObject> woT; + typedef GenTensor coeffT; + typedef Tensor tensorT; + + public: + CompressOp (string opName, KTREE* output, const KTREE* i1); + FuseTContainer compute (const keyT& key, const FuseTContainer &s); + //Future> postCompute (const keyT& key, const std::vector>> &s); + + bool notEmpty(map& notEmptyMap) const + { + unsigned long treeID = _i1->get_impl()->id().get_obj_id(); + //cout<<"Checking for treeID : "< child_patch (const keyT& child) const; + + private: + //!Points to operand trees + const KTREE* _i1; + + //!Points to operand nodes of the tree + KNODE *_t1, *_t2; + + //!Variables for CompressOp + dcT& _coeffs; + dcT& _coeffs_target; + const FunctionCommonData& _cdata; + TensorArgs _targs; + + int _k; // Wavelet order + bool _nonstandard; + bool _keepleaves; + bool _redundant; + bool _root; + }; + + // Constructor + // World is needed for communication in the "compute" function + template + CompressOp::CompressOp(string opName, KTREE* output, const KTREE* i1) + : PrimitiveOp(opName, output, false) + , _i1(i1) + , _cdata(FunctionCommonData::get(i1->k())) + , _targs(i1->get_impl()->get_tensor_args()) + , _nonstandard(false) + , _keepleaves(false) + , _redundant(false) + , _root(false) + , _coeffs(i1->get_impl()->get_coeffs()) + , _coeffs_target(output->get_impl()->get_coeffs()) + , _k(i1->get_impl()->get_k()) + { + // dependnecy Info PSI, ALPHA, DELTA,SIGMA, ID + this->_OpID = output->get_impl()->id().get_obj_id(); + this->_dInfoVec.push_back(DependencyInfo(i1,true,false,true,false)); + this->_dInfoVec.push_back(DependencyInfo(output,true,false,true,false)); + + woT(i1->world()); + } + + // + // it should hangle both a parent and a leaf node. + // + template + FuseTContainer + CompressOp::compute(const keyT& key, const FuseTContainer &s) + { + // For Root + if ( key == keyT(0) ) // need to be changed. + { + this->_result->get_impl()->compressed = true; + this->_result->get_impl()->nonstandard = this->_nonstandard; + this->_result->get_impl()->redundant = this->_redundant; + } + + // get fetches remote data (here actually local) + FuseT_CoeffT* s_coeff; + FuseT_VParameter* v_parameter; + KNODE& node = _coeffs.find(key).get()->second; + if (node.has_children()) + { + // + // Intermediate Nodes + // + KNODE temp; // new parent which should be connected to its children. + s_coeff = new FuseT_CoeffT(); + v_parameter = new FuseT_VParameter(); + v_parameter->value = ((FuseT_VParameter*)s.get())->value; + tensorT d(this->_cdata.v2k); + int i = 0; + + // This could be a potential issue related to the children + // What if a parent is executed before its all children are not done? + // Besides, if I think the below way creates all possible children + // even if the original function has some of them. + for (KeyChildIterator kit(key); kit; ++kit, ++i) + { + d(child_patch(kit.key())) += ((FuseT_CoeffT*)((v_parameter->value[i].get())))->value.full_tensor_copy(); + delete v_parameter->value[i].data; + temp.set_has_children_recursive(_coeffs_target, kit.key()); // children + } + + + d = filter(d); + + //this->_result->get_impl()->get_coeffs().replace(key,temp); + _coeffs_target.replace(key, temp); + + // can we use MADNESS_ASSERT?? + typename dcT::accessor acc; + typename dcT::accessor acc_t; + _coeffs.find(acc, key); + _coeffs_target.find(acc_t, key); + + // acc = parent + if (acc->second.has_coeff()) + { + const tensorT c = acc->second.coeff().full_tensor_copy(); + if (c.dim(0) == _k) + d(_cdata.s0) += c; + else + d += c; + } + + // tighter thresh for intenal nodes + TensorArgs targs2 = this->_targs; + targs2.thresh*=0.1; + + // need the deep copy for contiguity + coeffT ss = coeffT(copy(d(_cdata.s0)), targs2); + + if (key.level() > 0 && !_nonstandard) + d(_cdata.s0) = 0.0; + + // insert either sum or difference coefficients + if (this->_redundant) + { + acc_t->second.set_coeff(ss); + } + else + { + coeffT dd = coeffT(d, targs2); + acc_t->second.set_coeff(dd); + } + + // Making Future form + s_coeff->value = ss.full_tensor_copy(); + FuseTContainer result2(static_cast*>(s_coeff)); + return result2; + } + else + { + // + // Leaf Nodes + // + KNODE temp; + s_coeff = new FuseT_CoeffT(); + + if (!_i1->get_impl()->get_coeffs().probe(key)) + { + print (node); + } + + s_coeff->value = node.coeff().full_tensor_copy(); + + + + + if (!_keepleaves) + { + temp.clear_coeff(); + this->_result->get_impl()->get_coeffs().replace(key,temp); + } + FuseTContainer result(static_cast*>(s_coeff)); + return result; + } + + // Create a Return + cout<<"This should not have happenned"< resultV; + return resultV; + } + + // + // helper functions for compressOp + // + template + std::vector + CompressOp::child_patch(const keyT& child) const + { + std::vector s(NDIM); + const Vector& l = child.translation(); + for (std::size_t i = 0; i + GenTensor + CompressOp::filter(const coeffT& s) const + { + coeffT result = transform(s, _cdata.hgT); + return result; + } + + // isDone + template + bool CompressOp::isDone(const keyT& key) const + { + bool isE1 = _i1->get_impl()->get_coeffs().probe(key); + if(!isE1) { + printf ("possible?: %d\n", isE1); + return isE1; + } + + bool isLeaf = !_i1->get_impl()->get_coeffs().find(key).get()->second.has_children(); + return isLeaf; + } + + +}; /*fuset*/ + +#endif /* __fuset_CompressOp_h__ */ diff --git a/src/madness/mra/FuseT/CopyOp.h b/src/madness/mra/FuseT/CopyOp.h new file mode 100644 index 00000000000..dcf910506b5 --- /dev/null +++ b/src/madness/mra/FuseT/CopyOp.h @@ -0,0 +1,97 @@ + +#ifndef __MADNESS_MRA_FUSET_COPY_OP__INCLUDED__ +#define __MADNESS_MRA_FUSET_COPY_OP__INCLUDED__ + +#include "PrimitiveOp.h" + +namespace madness +{ + template + class CopyOp : public PrimitiveOp + { + typedef Function KTREE; + typedef FunctionNode KNODE; + typedef Key keyT; + typedef WorldContainer , FunctionNode > dcT; + + ///< Type of container holding the nodes + + public: + CopyOp(string opName, KTREE* output, const KTREE* i1); + + // + void compute(const keyT& key); + + // Non-blocking + FuseTContainer compute(const keyT& key, const FuseTContainer &s); + + // Blocking + //Future> postCompute(const keyT& key, const std::vector>> &s) { } + + bool isDone(const keyT& key) const; + bool isPre() const { return true; } + bool needsParameter() const { return false; } + void reduce(World& world){} + private: + //!Points to operand trees + const KTREE* _i1; + + //!Points to operand nodes of the tree + KNODE *_t1, *_t2; + }; + + template + CopyOp::CopyOp(string opName, KTREE* output, const KTREE* i1) + : PrimitiveOp(opName, output, false), + _i1(i1) + { + //dependnecy Info PSI, ALPHA, DELTA,SIGMA, ID + this->_OpID = output->get_impl()->id().get_obj_id(); + this->_dInfoVec.push_back(DependencyInfo(i1,true,false,false,false)); + this->_dInfoVec.push_back(DependencyInfo(output,true,false,false,false)); + } + + template + void + CopyOp::compute(const keyT& key) + { + typename dcT::iterator it= _i1->get_impl()->get_coeffs().find(key).get(); + if (it == _i1->get_impl()->get_coeffs().end()) + { + cerr<<"This should not have happenned"<second; + this->_result->get_impl()->get_coeffs().replace(key,node); + } + + template + FuseTContainer + CopyOp::compute(const keyT& key, const FuseTContainer &s) + { + typename dcT::iterator it= _i1->get_impl()->get_coeffs().find(key).get(); + if (it == _i1->get_impl()->get_coeffs().end()) + { + cerr<<"This should not have happenned"<second; + this->_result->get_impl()->get_coeffs().replace(key,node); + + FuseTContainer temp; + return temp; + } + + template + bool CopyOp::isDone(const keyT& key) const + { + bool isE1 = _i1->get_impl()->get_coeffs().probe(key); + if(!isE1) return isE1; + bool isLeaf = !_i1->get_impl()->get_coeffs().find(key).get()->second.has_children(); + return isLeaf; + } + +}; /*fuset*/ + +#endif /* __fuset_CopyOp_h__ */ diff --git a/src/madness/mra/FuseT/CpOp.h b/src/madness/mra/FuseT/CpOp.h new file mode 100644 index 00000000000..15d74dde7af --- /dev/null +++ b/src/madness/mra/FuseT/CpOp.h @@ -0,0 +1,89 @@ +#ifndef __fuset_CpOp_h__ +#define __fuset_CpOp_h__ + +#include "PrimitiveOp.h" + +namespace fuset { + template + class CpOp : public PrimitiveOp { + typedef KaryTree KTREE; + typedef Node KNODE; + public: + CpOp(string opName, KTREE *output, const KTREE *i1); + + void compute(int n, TRANS l); + + bool isDone(int n, TRANS l) const; + + bool isPre() const { return true; } + + private: + //!Points to operand trees + const KTREE *_i1; + + //!Points to operand nodes of the tree + KNODE *_t1, *_t2; + }; + + template + CpOp::CpOp(string opName, KTREE *output, const KTREE *i1) + : PrimitiveOp(opName, output, false), + _i1(i1) + { + //dependnecy Info PSI, ALPHA, DELTA,SIGMA, ID + this->_OpID = output->_treeID; + + this->_dInfoVec.push_back(DependencyInfo(i1,true,false,false,false)); + this->_dInfoVec.push_back(DependencyInfo(output,false,true,false,false)); + + } + + template + void CpOp::compute(int n, TRANS l) { + bool isE1; + _t1 = _i1->getNode(n,l,isE1); + + if(!isE1){ + + KNODE* rNode = this->_result->createNode(n,l); + rNode->setZero(); + + if(n !=0){ + int n_parent = n-1; + TRANS l_parent = this->_result->parentTranslation(l); + bool isEmpty; + _t2 = this->_result->getNode(n_parent, l_parent, isEmpty); + if(!isEmpty) { + rNode->setData(_t2->getData()); + /* for(int i =0; i< _t2->getLength(); i++) */ + /* rNode->getData()[i] = _t2->getData()[i]; */ + } + else{ + + std::cerr<<"Not supposed to be empty (n,l) :"<setData(_t1->getData()); + for(int i =0; i< _t1->getLength(); i++) + rNode->getData()[i] += _t1->getData()[i]; + + if(isDone(n,l)) + rNode->setType(LEAF); + } + } + + template + bool CpOp::isDone(int n, TRANS l) const { + bool isE1; + Node* temp = _i1->getNode(n,l,isE1); + if(temp->getType() != INTERIOR) + return true; + return false; + } + +}; /*fuset*/ + +#endif /* __fuset_CpOp_h__ */ diff --git a/src/madness/mra/FuseT/DerivativeOp.h b/src/madness/mra/FuseT/DerivativeOp.h new file mode 100644 index 00000000000..384100457e5 --- /dev/null +++ b/src/madness/mra/FuseT/DerivativeOp.h @@ -0,0 +1,311 @@ + +// Ghaly +// +// Derivatives the function, transforming into scaling function basis. Possible non-blocking comm. +// +#ifndef __MADNESS_MRA_FUSET_DERIVATIVE_OP__INCLUDED__ +#define __MADNESS_MRA_FUSET_DERIVATIVE_OP__INCLUDED___OP__INCLUDED__ + +#include "PrimitiveOp.h" +#include "../function_common_data.h" +#define LEFT 0 +#define CENTER 1 +#define RIGHT 2 +namespace madness +{ + template + class DerivativeOp : public PrimitiveOp, public WorldObject> + { + public: + typedef Function KTREE; + typedef FunctionNode KNODE; + typedef GenTensor coeffT; + typedef Key keyT; + typedef std::pair argT; + typedef WorldContainer,FunctionNode> dcT; + typedef WorldContainer > daT; + typedef Tensor tensorT; + typedef DerivativeOp dOp; + typedef WorldObject woT; + int pending_task, pending_from_left, pending_from_right; + public: + DerivativeOp(string opName, KTREE* output, const KTREE* i1, World& world, Derivative* dOp); + Future > computeFuture(const keyT& key, const FuseTContainer &s); + bool isDone(const keyT& key) const; + bool isPre() const { return true; } + bool needsParameter() const { return true; } + void reduce(World& world){} + bool returnsFuture(){return true;} + + private: + + void createFuture(const keyT& key); + void pushFromLeft(const keyT& key, const coeffT& coeff); + void pushFromRight(const keyT& key, const coeffT& coeff); + //FuseTContainer computeDerivative(const keyT& key, Future& left, argT& center, Future& right); + FuseTContainer computeDerivative(const keyT& key, argT& left, argT& center, argT& right); + + //!Points to operand trees + const KTREE* _i1; + const Derivative* _D; + + //!Points to operand nodes of the tree + KNODE *_t1, *_t2; + + daT _fromLeft, _fromRight; + + World& _world; + //!Variables for DerivativeOp + const FunctionCommonData& _cdata; + + }; + + // Constructor + template + DerivativeOp::DerivativeOp(string opName, KTREE* output, const KTREE* i1, World& world, Derivative* dOp) + : PrimitiveOp(opName, output, false) + , _i1(i1) + , _D(dOp) + , _cdata(FunctionCommonData::get(i1->k())) + , woT(world) + , _world(world) + + { + pending_from_left = 0; + pending_from_right = 0; + pending_task = 0; + woT::process_pending(); + const std::shared_ptr > > _pmap(FunctionDefaults::get_pmap()); + _fromLeft = daT(world,_pmap,false); + _fromRight = daT(world,_pmap,false); + _fromLeft.process_pending(); + _fromRight.process_pending(); + + //dependnecy Info PSI, ALPHA, DELTA,SIGMA, ID + this->_OpID = output->get_impl()->id().get_obj_id(); + this->_dInfoVec.push_back(DependencyInfo(i1,true,true,false,false)); + this->_dInfoVec.push_back(DependencyInfo(output,true,true,false,false)); + } + + + + template + Future > + DerivativeOp::computeFuture(const keyT& key, const FuseTContainer &s) + { + //cout<*)s.get())->value)[LEFT]; + center = (((FuseT_VArgT*)s.get())->value)[CENTER]; + right = (((FuseT_VArgT*)s.get())->value)[RIGHT]; + }else{ + cerr<<"This should not have happenned"<get_impl()->get_coeffs().find(key).get(); + KNODE& node = it->second; // source + if(!node.has_children()){ + //cout<neighbor(key,-1); + rightKey = _D->neighbor(key,+1); + + if(!leftKey.is_invalid()){ + pending_from_right++; + woT::task(_fromRight.owner(leftKey),&DerivativeOp::pushFromRight, leftKey, node.coeff()); + + } else + left.first = leftKey; + + if(!rightKey.is_invalid()){ + pending_from_left++; + woT::task(_fromLeft.owner(rightKey),&DerivativeOp::pushFromLeft, rightKey, node.coeff()); + + } else + right.first = rightKey; + + } + + + //creates futures for this key from left and right + //incase it is not created by fromLeft or fromRight yet + createFuture(key); + + /*************************************************************** + Set up the futures that the actual computation depends upon + ***************************************************************/ + + Future leftPara; + Future rightPara; + + if(left.second.has_data() || left.first.is_invalid()) + leftPara = Future(left); + else{ + typename daT::accessor acc; + if(_fromLeft.find(acc,key)) + leftPara = acc->second; + else + cerr<<"This should not have happenned"<(right); + else{ + typename daT::accessor acc; + if(_fromRight.find(acc,key)) + rightPara = acc->second; + else + cerr<<"This should not have happenned"< > returnParameter; + pending_task++; + returnParameter = woT::task(_world.rank(),&DerivativeOp::computeDerivative, key, leftPara, center, rightPara); + return returnParameter; + } + + template + FuseTContainer + //DerivativeOp::computeDerivative(const keyT& key, Future& left, argT& center, Future& right) + DerivativeOp::computeDerivative(const keyT& key, argT& left, argT& center, argT& right) + { + + //std:://cout<do_diff2i(_i1->get_impl().get(), this->_result->get_impl().get(), key, left.get(), center, right.get()); + _D->do_diff2i(_i1->get_impl().get(), this->_result->get_impl().get(), key, left, center, right); + + //All Coefficients are available, but the key is a boundary, so use boundary calculations + }else if (c && ( (rInv && lInv) || (l&&rInv) || (r&&lInv) ) ){ + //_D->do_diff2b(_i1->get_impl().get(), this->_result->get_impl().get(), key, left.get(), center, right.get()); + //cout<do_diff2b(_i1->get_impl().get(), this->_result->get_impl().get(), key, left, center, right); + + //Some coefficients are not available, so this is an empy interior node + }else{ + //cout<_result->get_impl()->get_coeffs().replace(key,KNODE(coeffT(),true)); // Empty internal node + } + pending_task--; + if(!isInterior){ + return FuseTContainer(); + } + + //Construct Parameters if recursive call is required + FuseT_VParameter* v_parameter = new FuseT_VParameter(); + for (KeyChildIterator kit(key); kit; ++kit) { + const keyT& child = kit.key(); + FuseT_VArgT* tvec = new FuseT_VArgT(); + if ((child.translation()[_D->get_axis()]&1) == 0) { + //tvec->value.push_back(left.get()); + tvec->value.push_back(left); + tvec->value.push_back(center); + tvec->value.push_back(center); + }else{ + tvec->value.push_back(center); + tvec->value.push_back(center); + //tvec->value.push_back(right.get()); + tvec->value.push_back(right); + } + FuseTContainer wrapper(static_cast*>(tvec)); + v_parameter->value.push_back(wrapper); + } + + FuseTContainer temp(static_cast*>(v_parameter)); + return temp; + } + + template + void + DerivativeOp::createFuture(const keyT& key){ + typename daT::accessor acc; + if(!_fromLeft.find(acc,key)) + _fromLeft.insert(acc,key); + + if(!_fromRight.find(acc,key)) + _fromRight.insert(acc,key); + } + + + template + void + DerivativeOp::pushFromLeft(const keyT& key, const coeffT& coeff){ + if(_fromLeft.owner(key) == _world.rank()){ + typename daT::accessor acc; + if(!_fromLeft.find(acc,key)) + _fromLeft.insert(acc,key); + acc->second = Future(std::make_pair(_D->neighbor(key,-1),coeff)); + }else{ + cerr<<"Should not have happened. "< + void + DerivativeOp::pushFromRight(const keyT& key, const coeffT& coeff){ + if(_fromLeft.owner(key) == _world.rank()){ + typename daT::accessor acc; + if(!_fromRight.find(acc,key)) + _fromRight.insert(acc,key); + acc->second = Future(std::make_pair(_D->neighbor(key,+1),coeff)); + }else{ + cerr<<"Should not have happened. "< + bool DerivativeOp::isDone(const keyT& key) const + { + + bool isLeaf = !this->_result->get_impl()->get_coeffs().find(key).get()->second.has_children(); + //cout< + class DiffOp : public PrimitiveOp + { + typedef Function KTREE; + typedef FunctionNode KNODE; // identical to nodeT + typedef Key keyT; + typedef WorldContainer, FunctionNode> dcT; + typedef WorldObject> woT; ///< Base class world object type + + typedef GenTensor coeffT; //Type of tensor used to hold coeffs + typedef Tensor tensorT; + typedef std::pair argT; + + + ///< Type of container holding the nodes + + public: + DiffOp (string opName, KTREE* output, const KTREE* i1); + void compute (const keyT& key) { } + FuseTContainer compute (const keyT& key, const FuseTContainer &s) { } + + // + //Future> postCompute(const keyT& key, const std::vector>> &s) { } + bool isDone (const keyT& key) const; + bool isPre () const { return true; } + bool needsParameter () const { return false; } + + public: // for DiffOp + + Key neighbor (const keyT& key, int step) const; + argT find_neighbor (const Key& key, int step) const; + + private: + //!Points to operand trees + const KTREE* _i1; + + //!Points to operand nodes of the tree + KNODE *_t1, *_t2; + + //!Variables for CompressOp + dcT& _coeffs; + const FunctionCommonData& _cdata; + TensorArgs _targs; + bool _temp; + int _num; + + int _k; // Wavelet order + }; + + + // Constructor + // World is needed for communication in the "compute" function + template + DiffOp::DiffOp(string opName, KTREE* output, const KTREE* i1) + : PrimitiveOp(opName, output, false) + , _i1(i1) + , _cdata(FunctionCommonData::get(i1->k())) + , _targs(i1->get_impl()->get_tensor_args()) + , _coeffs(i1->get_impl()->get_coeffs()) + , _k(i1->get_impl()->get_k()) + { + // output is itself. + + //dependnecy Info PSI, ALPHA, DELTA,SIGMA, ID + this->_OpID = output->get_impl()->id().get_obj_id(); + this->_dInfoVec.push_back(DependencyInfo(i1,true,false,false,false)); + this->_dInfoVec.push_back(DependencyInfo(output,true,false,false,false)); + + this->_num = 0; + this->_temp = false; + woT(i1->world()); + } + +/* + template + void FunctionImpl::do_diff1(const DerivativeBase* D, + const implT* f, + const keyT& key, + const std::pair& left, + const std::pair& center, + const std::pair& right) { + D->do_diff1(f,this,key,left,center,right); + } +*/ +/* + void do_diff1(const implT* f, implT* df, const keyT& key, + const argT& left, + const argT& center, + const argT& right) const { + MADNESS_ASSERT(axisget_coeffs().replace(key,nodeT(coeffT(),true)); + for (KeyChildIterator kit(key); kit; ++kit) { + const keyT& child = kit.key(); + if ((child.translation()[axis]&1) == 0) { + // leftmost child automatically has right sibling + forward_do_diff1(f, df, child, left, center, center); + } + else { + // rightmost child automatically has left sibling + forward_do_diff1(f, df, child, center, center, right); + } + } + } + else { + forward_do_diff1(f, df, key, left, center, right); + } + } +*/ +/* + void forward_do_diff1(const implT* f, implT* df, const keyT& key, + const argT& left, + const argT& center, + const argT& right) const { + + const dcT& coeffs = f->get_coeffs(); + ProcessID owner = coeffs.owner(key); + + if (owner == world.rank()) { + if (!left.second.has_data()) { + woT::task(owner, &madness::DerivativeBase::do_diff1, + f, df, key, find_neighbor(f, key,-1), center, right, + TaskAttributes::hipri()); + } + else if (!right.second.has_data()) { + woT::task(owner, &madness::DerivativeBase::do_diff1, + f, df, key, left, center, find_neighbor(f, key,1), + TaskAttributes::hipri()); + } + // Boundary node + else if (left.first.is_invalid() || right.first.is_invalid()) { + woT::task(owner, &madness::DerivativeBase::do_diff2b, + f, df, key, left, center, right); + } + // Interior node + else { + woT::task(owner, &madness::DerivativeBase::do_diff2i, + f, df, key, left, center, right); + } + } + else { + df->task(owner, &madness::FunctionImpl::forward_do_diff1, + this, f, key, left, center, right, TaskAttributes::hipri()); + } + } +*/ +/* + Key neighbor(const keyT& key, int step) const { + Vector l = key.translation(); + l[axis] += step; + if (!enforce_bc(bc(axis,0), bc(axis,1), key.level(), l[axis])) { + return keyT::invalid(); + } + else { + return keyT(key.level(),l); + } + } + + Future + find_neighbor(const implT* f, const Key& key, int step) const { + keyT neigh = neighbor(key, step); + if (neigh.is_invalid()) { + return Future(argT(neigh,coeffT(vk,f->get_tensor_args()))); // Zero bc + } + else { + Future result; + f->task(f->get_coeffs().owner(neigh), &implT::sock_it_to_me, neigh, result.remote_ref(world), TaskAttributes::hipri()); + return result; + } + } +*/ + + // neighbor from Derivative + template + Key + DiffOp::neighbor(const keyT& key, int step) const + { + Vector l = key.translation(); + // l[axis] += step; + // enforce_bc(??) + + + return keyT::invalid(); + } + + // find_neighbor from Derivative + template + std::pair,GenTensor> + DiffOp::find_neighbor(const Key& key, int step) const + { + keyT neigh = neighbor(key, step); + + if (neigh.is_invalid()) + { + argT temp; + return temp; + // + } + else + { + argT result; + // f-> + // sock_it_to_me!!! + return result; + } + } + + + // isDone + template + bool + DiffOp::isDone(const keyT& key) const + { + bool isE1 = _i1->get_impl()->get_coeffs().probe(key); + if(!isE1) return isE1; + bool isLeaf = !_i1->get_impl()->get_coeffs().find(key).get()->second.has_children(); + return isLeaf; + } + +}; /*fuset*/ + +#endif /* __fuset_DiffOp_h__ */ diff --git a/src/madness/mra/FuseT/FuseT.h b/src/madness/mra/FuseT/FuseT.h new file mode 100644 index 00000000000..f9e71a5fd13 --- /dev/null +++ b/src/madness/mra/FuseT/FuseT.h @@ -0,0 +1,442 @@ +#ifndef __fuset_FuseT_h__ +#define __fuset_FuseT_h__ +#define DEBUG 0 + +#include "PrimitiveOp.h" +#include +namespace madness { + using namespace std; + + struct ValidOpDag{ + vector _preOps; + vector _postOps; + vector _postOperands; + }; + + /*! A Node in the OpDAG for _sequence*/ + template + struct FusedOpSequence{ + vector*> _sequence; + vector*> _trees; + vector _validOpDags; + vector > _operandTrees; + + FusedOpSequence(vector*> sequence, vector*> trees){ + _sequence = sequence; + _trees = trees; + } + }; + + + //vector _consumerList; + // + ////helper variables + //bool _isCompleted, _isVisitd, _isPre, _isPost; + //}; + + template + class FuseT{ + typedef DependencyInfo DInfo; + private: + //!creates an opDAG that represents the sequence + //!of operators in _sequence + void generateOpDAG(); + + //!generates a sequence of valid OpDAGs from the opDAG + //!for sequence of operators in _sequence + void getValidOpDAGSequence(); + + /*! This method bins operators in each validOpDAG + into pre or post compute ops*/ + void getPrePostOps(); + + //! Once a preOp is identified, this methods adds + //!all the producers ops in the chain in preOps + void addPreOps(int opIndex, int validOpDagID); + + //! Once a postOp is identified, this method adds + //!all the consumers ops in this chain in postOps + void addPostOps(int opIndex, int validOpDagID); + + //!Helper Method for getPrePostOps. + //!Identifies if an oprator should be in preOps + inline bool isPostOp(int opIndex, int validOpDagID) + { + if(DEBUG) cout<*> _sequence; + + /*! Pointer to all the trees in the program. _sequence[i]->result + is the same pointer as _trees[i]. If _sequence[i] is does not exist, then the tree is not created by any operators in the opDAG */ + + vector*> _trees; + + /*!maps the operator/tree id to the index value in _sequence. + For example, if (*_sequence)[10]->_OpID = i, then _opIdToIndex[i] = 10*/ + std::map _opIdToIndex; + + /*!This will hold the output of the the algorithm for generating seqeuence of valid OpDAGs from _sequence. + This provides a sequence of OpDAGs that can be fused together*/ + vector> _validOpDAGs; + + /*!stores pre Operators and post Operators for each of the valid opDAGs*/ + vector > _preOps; + vector > _postOps; + + /* The following set of variables describe attributes + for each operator or opNode in the OpDag. + ///////////////////////////////////////////////////////////////////// + /////////////////////////////////////////////////////////////////// + ////////////////////////////////////////////////////////////////////// + + ! for each operator this stores that validOpDagID that it belongs to + this is redundant information, that can be obtained from _validOpDags, + stored just for convienence in isPreOp and isPostOp methods*/ + vector _idValidOpDAG; + + /*! This is a directed acyclic graph where each node contains a vector containing all its producers, and the type of dependency it has with that producer. _producerList[3]={0,1} implies that Operator (*_sequence)[3] depends on operators (*_sequence)[0] (*_sequence)[1] */ + vector > _producerList; + + /*! Similar to _producerList but contains all the consumers*/ + vector > _consumerList; + + //!during generation of preOps and postOps, isCompleted is used to indentify if a operator has already been completed + vector _isCompleted; + + //!for computing preOPs and postOps + vector _isVisited; + + //!stores if an operator is in pre or post compute + vector _isPre; + vector _isPost; + + ////////////////////////////////////////////////////////////////////// + ////////////////////////////////////////////////////////////////////// + ////////////////////////////////////////////////////////////////////// + + //true after processing is complete. Used by print to + //figure out if the analysis is complete + bool _isSequenceProcessed; + + public: + + FuseT(vector*> sequence){ + _sequence = sequence; + _numOps = _sequence.size(); + _isSequenceProcessed = false; + } + + void processSequence(){ + + generateOpDAG(); + getValidOpDAGSequence(); + getPrePostOps(); + _isSequenceProcessed = true; + + } + + void printValidSequences(); + + void printOpsAndTrees(); + + FusedOpSequence getFusedOpSequence(); + + + }; + + + template + FusedOpSequence FuseT::getFusedOpSequence(){ + + //set _sequence and _trees + FusedOpSequence temp(_sequence,_trees); + + //set _validOpDags + int numValidOpDags = _validOpDAGs.size(); + for(int i = 0; i< numValidOpDags; i++){ + ValidOpDag vdag; + vdag._preOps = _preOps[i]; + vdag._postOps = _postOps[i]; + + for(int op : vdag._postOps) + for(auto di : _producerList[op]) + vdag._postOperands.push_back(di._producerIndex); + + //sort and eleminate redundancy + auto *pOps = &vdag._postOperands; + sort(pOps->begin(), pOps->end()); + auto it = unique(pOps->begin(),pOps->end()); + pOps->resize(distance(pOps->begin(),it)); + + //adding valid opDAGs + temp._validOpDags.push_back(vdag); + + } + + //set OperandTrees + for(int i =0; i<_producerList.size();i++){ + temp._operandTrees.push_back(vector ()); + for(auto di: _producerList[i]) + temp._operandTrees[i].push_back(di._producerIndex); + } + + return temp; + + } + + + template + void FuseT::addPreOps(int opIndex, int validOpDagID){ + if (DEBUG) cout<<"Checking for adding to Pre ID : "< + void FuseT::addPostOps(int opIndex, int validOpDagID){ + if (DEBUG) cout<<"Checking for adding to Post ID : "< + void FuseT::getPrePostOps(){ + + _isCompleted.assign(_numOps,false); + _isVisited.assign(_numOps,false); + _isPre.assign(_numOps,false); + _isPost.assign(_numOps,false); + + for(int i =0; i< _validOpDAGs.size(); i++) + { + vector validOpDAG = _validOpDAGs[i]; +// vector source; +// +// for(int op : validOpDAG) +// if (isSource(op)) +// source.push_back(op); + + for (int op : validOpDAG) + if (!_isVisited[op] && isPreOp(op,i)) + addPreOps(op,i); + + for (int op : validOpDAG) + _isVisited[op] = false; + + for (int op : validOpDAG) + if (!_isVisited[op] && isPostOp(op,i)) + addPostOps(op,i); + + for (int op : validOpDAG) + if (_isPost[op] && _isPre[op]) + std::cerr<<"Should not be happening"< + void FuseT::generateOpDAG(){ + + //set up _opIdToIndex + for(int i =0;i<_sequence.size();i++){ + _opIdToIndex[_sequence[i]->_OpID] = i; + _producerList.push_back(vector()); + _consumerList.push_back(vector()); + _trees.push_back(_sequence[i]->_result); + } + + for(int i =0; i<_sequence.size(); i++){ + + int numProducers = _sequence[i]->_dInfoVec.size(); + + for(int j = 0; j_dInfoVec[j]; + + /*!treeID and opID are the same for the op that produces the tree + if the treeID does not exist in the sequence of operators + then it must mean that the tree was not produced by any + operator in the program. It might be a true input.*/ + if(_opIdToIndex.find(treeInfo._treeID) == _opIdToIndex.end()){ + _trees.push_back(treeInfo._producerTree); + _opIdToIndex[treeInfo._treeID] = _trees.size() - 1 ; + } + + treeInfo._producerIndex = _opIdToIndex[treeInfo._treeID]; + treeInfo._consumerIndex = i; + + + //pure inputs are also inserted as part of the producerList. + //This is used by postOps for identifying which ops are done + //and which are not + _producerList[i].push_back(treeInfo); + //cout< + void FuseT::getValidOpDAGSequence(){ + + _validOpDAGs.push_back(vector()); + _idValidOpDAG.assign(_sequence.size(), -1); + for(int i = 0; i< _sequence.size(); i++){ + _validOpDAGs[0].push_back(i); + _idValidOpDAG[i] = 0; + } + + //initialize _preOps and _postOps + for(int i =0; i< _validOpDAGs.size(); i++) + { + _preOps.push_back(vector()); + _postOps.push_back(vector()); + + } + + } + + template + void FuseT::printValidSequences(){ + if(!_isSequenceProcessed){ + cout<<"Sequence not processed!"<_opName<<" OpID : "<<_sequence[i]->_OpID<< endl; + cout<<"________________________"<_opName<_opName< + void FuseT::printOpsAndTrees(){ + if(!_isSequenceProcessed){ + cout<<"Sequence not processed!"<_opName<<" OpID : "<<_sequence[i]->_OpID<<" TreeID :"<<_trees[i]->get_impl()->id().get_obj_id()<<" TreeName : "<<_trees[i]->_treeName<< endl; + cout<<"________________________"<get_impl()->id().get_obj_id()<<" TreeName : "<<_trees[i]->_treeName<< endl; + cout<<"________________________"< +#include +#include +#define DIM 3 +using namespace madness; + +// +// +// +template +class FuseTContainer; + +template +class Base; + +template +class FuseT_Type; + +template +class FuseT_VType; + +template +class FuseT_CoeffT; + +template +class FuseT_VArgT; + +template +class FuseT_VParameter; + +template +class FuseT_VCoeffT; + +// Map types to integers + enum class WHAT_AM_I : int {FuseT_VCoeffT, FuseT_CoeffT, FuseT_VArgT, FuseT_VParameter, FuseT_Type, FuseT_VType, EMPTY}; + + +// +// +// +template +struct WhatAmI {}; + +template +struct WhatAmI> {static const WHAT_AM_I t=WHAT_AM_I::FuseT_Type;}; + +template +struct WhatAmI> {static const WHAT_AM_I t=WHAT_AM_I::FuseT_VType;}; + +template +struct WhatAmI> {static const WHAT_AM_I t=WHAT_AM_I::FuseT_CoeffT;}; + +template +struct WhatAmI> {static const WHAT_AM_I t=WHAT_AM_I::FuseT_VCoeffT;}; + + +template +struct WhatAmI> {static const WHAT_AM_I t=WHAT_AM_I::FuseT_VArgT;}; + +template +struct WhatAmI> {static const WHAT_AM_I t=WHAT_AM_I::FuseT_VParameter;}; + +// Simple base class +// Do I have to add get() methods for a variety of return types? +template +class Base +{ +public: + virtual WHAT_AM_I what() const = 0; + virtual ~Base() {}; + + template + void serialize(Archive& ar) { } +}; + +// +template +class FuseT_Type : public Base +{ +public: + T value; + + FuseT_Type() { } + FuseT_Type(const FuseT_Type& other) : value (other.value) { } + FuseT_Type(T &v) { value = v; } + ~FuseT_Type() { } + + WHAT_AM_I what() const { return WhatAmI::t; }; + + template + void serialize(Archive& ar) { ar & value; } +}; + +template +class FuseT_VType: public Base +{ +public: + std::vector value; + + FuseT_VType() { value = std::vector(); } + FuseT_VType(int size) {value = std::vector(size); } + FuseT_VType(const FuseT_VType& other) : value (other.value) { } + FuseT_VType(std::vector &v) { value = v; } + ~FuseT_VType() { } + + WHAT_AM_I what() const { return WhatAmI::t; }; + + template + void serialize(Archive& ar) { ar & value; } +}; + +// +template +class FuseT_CoeffT : public Base +{ + typedef GenTensor coeffT; +public: + coeffT value; + + FuseT_CoeffT() { } + FuseT_CoeffT(const FuseT_CoeffT& other) : value (other.value) { } + FuseT_CoeffT(coeffT &v) { value = v; } + ~FuseT_CoeffT() { } + + WHAT_AM_I what() const { return WhatAmI::t; }; + + template + void serialize(Archive& ar) { ar & value; } +}; + +// +template +class FuseT_VCoeffT : public Base +{ + typedef GenTensor coeffT; +public: + std::vector value; + + FuseT_VCoeffT() { value = std::vector(); } + FuseT_VCoeffT(int size) { value = std::vector(size); } + FuseT_VCoeffT(const FuseT_VCoeffT& other) : value (other.value) { } + FuseT_VCoeffT(std::vector &v) { value = v; } + ~FuseT_VCoeffT() { value.clear();} + + WHAT_AM_I what() const { return WhatAmI::t; }; + + template + void serialize(Archive& ar) { ar & value; } +}; + +//Operands to use for differentiation +template +class FuseT_VArgT : public Base +{ + + typedef GenTensor coeffT; + typedef Key keyT; + typedef std::pair argT; +public: + std::vector value; + + FuseT_VArgT() { value = std::vector(); } + FuseT_VArgT(int size) { value = std::vector(size); } + FuseT_VArgT(const FuseT_VArgT& other) : value (other.value) { } + FuseT_VArgT(std::vector &v) { value = v; } + argT& operator[](int idx){return value[idx];} + ~FuseT_VArgT() { value.clear();} + + WHAT_AM_I what() const { return WhatAmI::t; }; + + template + void serialize(Archive& ar) { ar & value; } +}; + + +//Parameter for Post Operands +template +class FuseT_VParameter : public Base +{ +public: + std::vector> value; + + FuseT_VParameter() { value = std::vector>(); } + FuseT_VParameter(int size) { value = std::vector>(size); } + FuseT_VParameter(const FuseT_VParameter& other) : value (other.value) { } + FuseT_VParameter(std::vector > &v) { value = v; } + ~FuseT_VParameter() { value.clear();} + FuseTContainer operator[](int i){return value[i];} + WHAT_AM_I what() const { return WhatAmI::t; }; + + template + void serialize(Archive& ar) { ar & value; } +}; + +// +// FuseTContainer +// +template +class FuseTContainer +{ + void allocate(WHAT_AM_I t) + { + if (t == WHAT_AM_I::FuseT_CoeffT) + data = static_cast*>(new FuseT_CoeffT); + else if (t == WHAT_AM_I::FuseT_VCoeffT) + data = static_cast*>(new FuseT_VCoeffT); + else if (t == WHAT_AM_I::FuseT_VArgT) + data = static_cast*>(new FuseT_VArgT); + else if (t == WHAT_AM_I::FuseT_VParameter) + data = static_cast*>(new FuseT_VParameter); + else if (t == WHAT_AM_I::FuseT_Type) + data = static_cast*>(new FuseT_Type); + else if (t == WHAT_AM_I::FuseT_VType) + data = static_cast*>(new FuseT_VType); + else + data = 0; + } + +public: + Base* data; + + // Default constructor makes an empty wrapper + FuseTContainer() : data(0) {} + + // Need to handle "data" pointer + FuseTContainer& operator=(FuseTContainer other) + { + FuseT_Type* copiedFuseTType; + FuseT_VType* copiedFuseTVType; + FuseT_CoeffT* copiedFuseTCoeffT; + FuseT_VCoeffT* copiedFuseTVCoeffT; + FuseT_VArgT* copiedFuseTVArgT; + FuseT_VParameter* copiedFuseTVParameter; + + switch(other.what()) + { + case WHAT_AM_I::FuseT_Type: + //std::cout<<"=Type"<(); + copiedFuseTType->value = ((FuseT_Type*)(other.data))->value; + this->data = static_cast*>(copiedFuseTType); + break; + case WHAT_AM_I::FuseT_VType: + //std::cout<<"=Type"<(); + copiedFuseTVType->value = ((FuseT_VType*)(other.data))->value; + this->data = static_cast*>(copiedFuseTVType); + break; + case WHAT_AM_I::FuseT_CoeffT: + //std::cout<<"=CoeffT"<(); + copiedFuseTCoeffT->value = ((FuseT_CoeffT*)(other.data))->value; + this->data = static_cast*>(copiedFuseTCoeffT); + break; + case WHAT_AM_I::FuseT_VCoeffT: + //std::cout<<"=VCoeffT"<(); + copiedFuseTVCoeffT->value = ((FuseT_VCoeffT*)(other.data))->value; + this->data = static_cast*>(copiedFuseTVCoeffT); + break; + case WHAT_AM_I::FuseT_VArgT: + //std::cout<<"=VCoeffT"<(); + copiedFuseTVArgT->value = ((FuseT_VArgT*)(other.data))->value; + this->data = static_cast*>(copiedFuseTVArgT); + break; + case WHAT_AM_I::FuseT_VParameter: + //std::cout<<"=VParameter"<(); + copiedFuseTVParameter->value = ((FuseT_VParameter*)(other.data))->value; + this->data = static_cast*>(copiedFuseTVParameter); + break; + case WHAT_AM_I::EMPTY: + //std::cout<<"=EMPTY"<* obj) : data(obj) {} // !!! TAKES OWNERSHIP OF POINTER + // should use shared_ptr + + // Returns type identity + WHAT_AM_I what() const + { + if (data) + return data->what(); + else + return WHAT_AM_I::EMPTY; + } + + Base* get() const + { + return data; + } + + void set(Base* p) + { + data = p; + } + + ~FuseTContainer() + { /////////////////////////////////////////////////////// + // should be memory leak. + // please check this out + //if (data != 0) + // delet data; + }//delete data;} // what about this? + + template + static void do_serialize(const Archive& ar, FuseTContainer& w, bool deserialize) + { + int t = static_cast(w.what()); + ar & t; + if (deserialize) w.allocate(static_cast(t)); + + if (w.what() == WHAT_AM_I::FuseT_CoeffT) ar & *static_cast*>(w.data); + else if (w.what() == WHAT_AM_I::FuseT_VCoeffT) ar & *static_cast*>(w.data); + else if (w.what() == WHAT_AM_I::FuseT_VArgT) ar & *static_cast*>(w.data); + else if (w.what() == WHAT_AM_I::FuseT_VParameter) ar & *static_cast*>(w.data); + else if (w.what() == WHAT_AM_I::FuseT_Type) ar & *static_cast*>(w.data); + else if (w.what() == WHAT_AM_I::FuseT_VType) ar & *static_cast*>(w.data); + else if (w.what() == WHAT_AM_I::EMPTY) ar & *static_cast*>(w.data); + else + std::cout<<__func__<<" wft"< + struct ArchiveStoreImpl > + { + static void store(const Archive& ar, const FuseTContainer& w) + { + w.do_serialize(ar, const_cast&>(w), false); + } + }; + + template + struct ArchiveLoadImpl > + { + static void load(const Archive& ar, FuseTContainer& w) + { + w.do_serialize(ar, w, true); + } + }; + } +} + +#endif // diff --git a/src/madness/mra/FuseT/FusedExecutor.h b/src/madness/mra/FuseT/FusedExecutor.h new file mode 100644 index 00000000000..61639c3ab58 --- /dev/null +++ b/src/madness/mra/FuseT/FusedExecutor.h @@ -0,0 +1,314 @@ + +#ifndef __fuset_FusedExecutor_h__ +#define __fuset_FusedExecutor_h__ + +//#include "PrimitiveOp.h" +#include +#include "FuseT.h" +#include +#define DEBUG 0 +#define DEBUG1 0 +namespace madness { + + //!this is local information passed on to each recursive call. + //!It can be thought of as the data structure holding information + //!about what needs to computed and what does not + template + struct LocalFuseInfo{ + + map _notEmpty; + //input operands for postCompute Ops + vector _postOperands; + + vector _preCompute; + vector _postCompute; + + template + void serialize(Archive& ar) { ar & _notEmpty & _postOperands & _preCompute & _postCompute; } + }; + + /*The FusedExecutor takes a fusedOpSequence and computes it by + doing the appropriate traversal. The traversal is done using + fusedTraversal method. The fusedTraversal method is a wrapper + for continueTraversal that actually does the + work. ContinueTraversal consists of two parts :1) Self + Recusion : If an operator returns a future, then continue + traversal calls itself to wait for the future to be + available. It then continues from where it left off. 2) Calls + to FusedTraversal: After it computes all the preOps, it calls + the fusedTraversal at the children nodes. + */ + template + class FusedExecutor:public WorldObject >{ + public: + typedef FusedExecutor feT; + typedef KeykeyT; + typedef WorldObject woT; + typedef WorldContainer,FunctionNode>dcT; + typedef FunctionNodeKNODE; + typedef GenTensorcoeffT; + typedef map > paraMap; + typedef vector > postParameter; + + private: + World& _world; + Future fusedTraversal(keyT key, const LocalFuseInfo& lInfo, paraMap& pmap); + Future continueTraversal(keyT key, const LocalFuseInfo& lInfo, paraMap& pMap, LocalFuseInfo& newlInfo, vector &pMapVec, int lastIdx, FuseTContainer& lastReturn); + + FusedOpSequence* _fOps; + map > fusedPostCompute(keyT Key, const vector postComputeOps, vector >& v); + dcT _coeffs; + + public: + /*passes world, fOp and coeffs. The last parameters is used to figure out + the distribution of keys amoung multiple MPI ranks*/ + FusedExecutor(World& world, FusedOpSequence* fOp): woT(world), _world(world){ + + woT::process_pending(); + const std::shared_ptr > > _pmap(FunctionDefaults::get_pmap()); + _coeffs = dcT(world,_pmap,false); + _coeffs.process_pending(); + _fOps = fOp; + + } + void execute(); + }; + + + template + void FusedExecutor::execute() + { + for(int i = 0; i< _fOps->_validOpDags.size(); i++) + { + //cout<<"Beginning"< lInfo; + for(int j : _fOps->_validOpDags[i]._preOps) + lInfo._preCompute.push_back(j); + //cout<<"Middle"<_validOpDags[i]._postOps) + lInfo._postCompute.push_back(j); + //cout<<"Second Middle"<_validOpDags[i]._postOperands) + lInfo._postOperands.push_back(j); + //contains a mapping of parameters for each operation + //the key is the order in which the operations appear in _fOps->sequence + //cout<<"Second Execution Time"<_validOpDags[i]._preOps) + if(_fOps->_sequence[j]->_isReduce) + _fOps->_sequence[j]->reduce(_world); + + for(int j : _fOps->_validOpDags[i]._postOps) + if(_fOps->_sequence[j]->_isReduce) + _fOps->_sequence[j]->reduce(_world); + + } + } + + //typedef map > paraMap; + template + Future > > + FusedExecutor::continueTraversal(keyT key, const LocalFuseInfo& lInfo, paraMap& pMap, LocalFuseInfo &newlInfo, vector &pMapVec, int lastIdx, FuseTContainer& lastReturn){ + + //process the preCompute Op that was computed before this call + if(lastIdx != -1){ + int i = lInfo._preCompute[lastIdx]; + + PrimitiveOp * preOp = _fOps->_sequence[i]; + //Control Code for identifying if this operator needs + //to be executed in the next executive call + if(!preOp->isDone(key)){ + newlInfo._preCompute.push_back(i); + + //creating parameters for the recursive call + //creates pMapVec[j][i] represents the parameters for operator i + //anf the jth child node + int j =0; + for (KeyChildIterator kit(key); kit; ++kit,j++) + { + //parameter must be passed so the return type of preOp must ne FuseT_VParameter + if (preOp->needsParameter()) + pMapVec[j][i] = ((FuseT_VParameter*)lastReturn.get())->value[j]; + //parameter doesnt have to be passed so create an empty Container + else + pMapVec[j][i] = FuseTContainer(); + } + } + } + + //process the remainder of the preCompute Operators + for(int idx = lastIdx+1; idx * preOp = _fOps->_sequence[i]; + + if(preOp->returnsFuture()){ + + Future > temp = preOp->computeFuture(key,pMap[i]); + return woT::task(_world.rank(), &feT::continueTraversal, key, lInfo, pMap, newlInfo, pMapVec, idx, temp); + } + + //Real Work + FuseTContainer temp; + temp = preOp->compute(key,pMap[i]); + + //Control Code for identifying if this operator needs + //to be executed in the next executive call + if(!preOp->isDone(key)){ + newlInfo._preCompute.push_back(i); + + //creating parameters for the recursive call + //creates pMapVec[j][i] represents the parameters for operator i + //anf the jth child node + int j =0; + for (KeyChildIterator kit(key); kit; ++kit,j++) + { + //parameter must be passed so the return type of preOp must ne FuseT_VParameter + if (preOp->needsParameter()) + pMapVec[j][i] = ((FuseT_VParameter*)temp.get())->value[j]; + //parameter doesnt have to be passed so create an empty Container + else + pMapVec[j][i] = FuseTContainer(); + } + } + } + + if(DEBUG) cout<<"After PreCompute"<* source = _fOps->_trees[i]; + unsigned long treeID = source->get_impl()->id().get_obj_id(); + + + + bool exists, notEmpty; + exists = source->get_impl()->get_coeffs().probe(key); + if(exists){ + notEmpty = source->get_impl()->get_coeffs().find(key).get()->second.has_children(); + }else + notEmpty = false; + + + + newlInfo._notEmpty[treeID] = notEmpty; + if(DEBUG) cout<<"Source Tree : "<_treeName<<" Tree ID "<* postOp = _fOps->_sequence[i]; + if(postOp->notEmpty(newlInfo._notEmpty)){ + if(DEBUG1) cout<_OpID<_OpID] = true; + newlInfo._postCompute.push_back(i); + } + } + + if(DEBUG) cout<_operandTrees[i]) + newlInfo._postOperands.push_back(j); + + if(DEBUG) cout<<"After Finding Post Operands"<(1< kit(key); kit; ++kit, i++) { + const keyT& child = kit.key(); + v[i] = woT::task(_coeffs.owner(child), &feT::fusedTraversal, child, newlInfo, pMapVec[i]); + } + } + + if(DEBUG) cout<<"After Recursive Call"< 0){ + if(DEBUG) cout<<"Calling Post Compute"<(temp); + + } + + + //typedef map > paraMap; + template + Future > > + FusedExecutor::fusedTraversal(keyT key, const LocalFuseInfo& lInfo, paraMap& pMap) + { + if(DEBUG1){ cout<<" Key : "< newlInfo; + //will hold new parameters + vector pMapVec = vector(1< temp = FuseTContainer(); + //cout<<"About to enter continue traversal"< + map > FusedExecutor::fusedPostCompute(keyT key, const vector postComputeOps, vector >& v){ + + if(DEBUG) cout<<"Entering Post Compute"< > temp; + //the first condition makes sure that v is not empty + //the second condition makes sure that operator i produces coeffs at a child node + if(v.size() > 0 && v[0].get().find(i) != v[0].get().end()){ + int j = 0; + for (KeyChildIterator kit(key); kit; ++kit, j++) { + FuseTContainer t = v[j].get()[i]; + temp.push_back(t); + } + } + FuseTContainer para(static_cast*>(new FuseT_VParameter(temp))); + PrimitiveOp* postOp = _fOps->_sequence[i]; + if(DEBUG) cout<<"Computing in Post Compute"<compute(key,para); + } + if(DEBUG) cout<<"Exiting Post Compute"< + class InnerOp : public PrimitiveOp + { + typedef Function KTREE; + typedef FunctionNode KNODE; // identical to nodeT + typedef Key keyT; + typedef WorldContainer,FunctionNode> dcT; + typedef WorldObject> woT; ///< Base class world object type + typedef GenTensor coeffT; //Type of tensor used to hold coeffs + typedef Tensor tensorT; + + public: + InnerOp(string opName, KTREE* output, const KTREE* i1, const KTREE*i2); + FuseTContainer compute(const keyT& key, const FuseTContainer &s); + + bool notEmpty(map& notEmptyMap) const{ + unsigned long treeID = _i1->get_impl()->id().get_obj_id(); + unsigned long treeID2 = _i2->get_impl()->id().get_obj_id(); + //cout<<"Checking for treeID : "<& _cdata; + TensorArgs _targs; + + // Wavelet order + int _k; + }; + + + // Constructor + // World is needed for communication in the "compute" function + template + InnerOp::InnerOp(string opName, KTREE* output, const KTREE* i1, const KTREE* i2) + : PrimitiveOp(opName, output, false,true) + , _i1(i1) + , _i2(i2) + , _cdata(FunctionCommonData::get(i1->k())) + , _targs(i1->get_impl()->get_tensor_args()) + , _coeffs(i1->get_impl()->get_coeffs()) + , _coeffs2(i2->get_impl()->get_coeffs()) + , _k(i1->get_impl()->get_k()) + , _sum(0.0) + { + + // output is itself. + //dependnecy Info PSI, ALPHA, DELTA,SIGMA, ID + this->_OpID = output->get_impl()->id().get_obj_id(); + this->_dInfoVec.push_back(DependencyInfo(i1,true,false,false,false)); + this->_dInfoVec.push_back(DependencyInfo(i2,true,false,false,false)); + this->_dInfoVec.push_back(DependencyInfo(output,true,false,false,false)); + + woT(i1->world()); + } + + // + template + FuseTContainer + InnerOp::compute(const keyT& key, const FuseTContainer &s) + { + KNODE& fnode = _coeffs.find(key).get()->second; + + if (fnode.has_coeff()) + { + //if (_i2->get_impl()->get_coeffs().probe(key)) + if (_coeffs.probe(key)) + { + //const KNODE& gnode = _i2->get_impl()->get_coeffs().find(key).get()->second; + const KNODE& gnode = _coeffs2.find(key).get()->second; + if (gnode.has_coeff()) + { + if (gnode.coeff().dim(0) != fnode.coeff().dim(0)) + { + cerr<<"functions have different k or compress/reconstruct error"<_sum += fnode.coeff().trace_conj(gnode.coeff()); + } + } + } + + //FuseTContainer result(static_cast*>(new FuseT_Type(this->_sum))); + FuseTContainer result; + return result; + } + + // isDone + template + bool + InnerOp::isDone(const keyT& key) const + { + bool isE1 = _i1->get_impl()->get_coeffs().probe(key); + if(!isE1){ std::cout<<"ouch!"<get_impl()->get_coeffs().probe(key); + if(!isE2) { std::cout<<"oops!" <get_impl()->get_coeffs().find(key).get()->second.has_children(); + if (isLeaf) + return isLeaf; + + bool isLeaf2 = !_i2->get_impl()->get_coeffs().find(key).get()->second.has_children(); + return isLeaf2; + } + + // Reduction + template + void + InnerOp::reduce(World& world){ + world.gop.sum(_sum); + } +}; /*fuset*/ + +#endif /* __fuset_InnerOp_h__ */ diff --git a/src/madness/mra/FuseT/KaryTree.cpp b/src/madness/mra/FuseT/KaryTree.cpp new file mode 100644 index 00000000000..a7dc9f4e3d8 --- /dev/null +++ b/src/madness/mra/FuseT/KaryTree.cpp @@ -0,0 +1,102 @@ +#include "KaryTree.h" +#include +using namespace std; +template +KaryTree::~KaryTree(){ + if(isInitialized){ + for(int i =0; i< _maxDepth; i++){ + for(typename TreeDS::iterator it = _mapKT[i].begin(); it!=_mapKT[i].end(); it++) + { + it->second->DeleteData(); + } + _mapKT[i].clear(); + } + delete[] _mapKT; + } +} + + +template +void KaryTree::DeleteTreeOnly(){ + if(isInitialized){ + for(int i =0; i< _maxDepth; i++) + _mapKT[i].clear(); + delete[] _mapKT; + } +} + +template +KaryTree::KaryTree(int maxDepth,int numChild, int dataLength){ + _maxDepth = maxDepth; + _mapKT = new TreeDS[_maxDepth]; + _dataLength = dataLength; + _numChild = numChild; + isInitialized = true; + _rseed = 0; +} + + +//KaryTree::KaryTree(KaryTree kt); +template +void KaryTree::GenerateRandom(GeneratedData gd, DTYPE d){ + //delete the tree if it exists + //this->~KaryTree(); + //recreate a clean tree + //KaryTree(_maxDepth,_numChild,_dataLength); + int seed= std::time(NULL)%100 + KaryTree::_rseed; + srand(seed); + std::cout<<"Seed : "<::_rseed+=8; + RecurseRandom(0,0,gd,d); +} + + +template +void KaryTree::RecurseRandom(int n, int l, GeneratedData gd, DTYPE d){ + Node* newNode = CreateNode(n,l); + if(gd == ZERO) + newNode->SetZero(); + if(gd == SOME_NUM) + newNode->SetNum(d); + else + newNode->SetRandom(); + + double r = (double)(rand() % 1000); + + //probabilty is high at low depth and low at high depth + double prob = 1.0-pow((double)n/double(_maxDepth),0.5); + //std::cout<<"("<ChildTranslation(l,childId), gd,d); + else + newNode->SetType(LEAF); +} + + + +template +void KaryTree::PrintTree(int n, int l){ + Node* node = _mapKT[n][l]; + + for(int i =0; i"<<"Node( "<GetType()<<" Data : (" + <GetData()[0]<<", " + <GetData()[1]<<",... )"<GetType()==INTERIOR) + for(int childId = 0; childId < _numChild; childId++) + PrintTree(n+1,this->ChildTranslation(l,childId)); + + +} + + +template class KaryTree; +//template int KaryTree::ChildTranslation(int a, int b); + + diff --git a/src/madness/mra/FuseT/KaryTree.h b/src/madness/mra/FuseT/KaryTree.h new file mode 100644 index 00000000000..61678487409 --- /dev/null +++ b/src/madness/mra/FuseT/KaryTree.h @@ -0,0 +1,202 @@ +#ifndef __fuset_KaryTree_h__ +#define __fuset_KaryTree_h__ +#include +#include "Node.h" +namespace fuset { + + //#define DEBUG 1 + using namespace std; + enum GeneratedData {ZERO, SOME_NUM, RANDOM}; + typedef unsigned long long TRANS; + template + class KaryTree { + typedef std::map*> TreeDS; + private: + + TreeDS* _mapKT; + int _dataLength; + int _maxDepth; + int _numChild; + bool isInitialized; + void DeleteTreeOnly(); + //delete data at a tree level from all nodes + void DeleteData(int level); + void recurseRandom(int n, TRANS l, GeneratedData gd, Type d); + + public: + string _treeName; + static int _rseed; + static int _numTrees; + + int _treeID; + + inline TRANS childTranslation(TRANS l, int childId) const{ + return l*_numChild + childId; + } + + inline TRANS parentTranslation(TRANS l) const { + return l/_numChild; + } + + //!Constructor + KaryTree(string name, int maxDepth,int numChild, int dataLength); + + //!Delete Tree including the data + ~KaryTree(); + + //!generates a random tree with the parameters defined by the constructor + void generateRandom(GeneratedData gd, Type d); + + //!print out the child tree starting at depth n and translation l + void printTree(int n, TRANS l); + + Type getNorm(); + + inline bool hasNode(int depth, TRANS translation){ + return (_mapKT[depth].find(translation) != _mapKT[depth].end()) ; + } + inline Node* getNode(int depth, TRANS translation, bool& isEmpty) const{ + isEmpty = (_mapKT[depth].find(translation) == _mapKT[depth].end()); + if(!isEmpty) + return (*this)(depth, translation); + + Node* temp = new Node(); + return temp; + + } + + + inline Node* setNode(int depth, TRANS translation, Node* n){ + _mapKT[depth][translation] = n; + return _mapKT[depth][translation]; + } + + //creates a new node and allocates memory for data at that node + inline Node* createNode(int depth, TRANS translation){ + Node* temp = new Node(INTERIOR,_dataLength); + _mapKT[depth][translation] = temp; + return _mapKT[depth][translation]; + } + + //!returns the node at given depth and translation + inline Node* operator()(int depth, TRANS translation) const{ + bool isEmpty = (_mapKT[depth].find(translation) == _mapKT[depth].end()); + if(!isEmpty) + return _mapKT[depth][translation]; + std::cerr<<"The node is empty"<(); + } + + inline int getNumChild() const {return _numChild;} + }; + + + template + KaryTree::~KaryTree(){ + if(isInitialized){ + for(int i =0; i< _maxDepth; i++){ + for(typename TreeDS::iterator it = _mapKT[i].begin(); it!=_mapKT[i].end(); it++) + { + it->second->deleteData(); + } + _mapKT[i].clear(); + } + delete[] _mapKT; + } + } + + + template + void KaryTree::DeleteTreeOnly(){ + if(isInitialized){ + for(int i =0; i< _maxDepth; i++) + _mapKT[i].clear(); + delete[] _mapKT; + } + } + + template + KaryTree::KaryTree(string name,int maxDepth,int numChild, int dataLength){ + _treeName = name; + _maxDepth = maxDepth; + _mapKT = new TreeDS[_maxDepth]; + _dataLength = dataLength; + _numChild = numChild; + isInitialized = true; + _rseed = 0; + _treeID = KaryTree::_numTrees++; + } + + + //KaryTree::KaryTree(KaryTree kt); + template + void KaryTree::generateRandom(GeneratedData gd, Type d){ + //delete the tree if it exists + //this->~KaryTree(); + //recreate a clean tree + //KaryTree(_maxDepth,_numChild,_dataLength); + //int seed= std::time(NULL)%100 + KaryTree::_rseed; + int seed= KaryTree::_rseed; + srand(seed); + std::cout<<"Seed : "<::_rseed+=8; + recurseRandom(0,0,gd,d); + } + + + template + void KaryTree::recurseRandom(int n, TRANS l, GeneratedData gd, Type d){ + Node* newNode = createNode(n,l); + if(gd == ZERO) + newNode->setZero(); + if(gd == SOME_NUM) + newNode->setVal(d); + else + newNode->setRandom(); + + double r = (double)(rand() % 1000); + + //probabilty is high at low depth and low at high depth + double prob = 1.0-pow((double)n/double(_maxDepth),0.5); + //std::cout<<"("<childTranslation(l,childId), gd,d); + else + newNode->setType(LEAF); + } + + template + Type KaryTree::getNorm(){ + Type retValue = 0; + for(int d = 0; d<_maxDepth; d++) + for(auto p : _mapKT[d]) + retValue+=p.second->innerLocal(); + return pow(retValue,0.5); + } + + template + void KaryTree::printTree(int n, TRANS l){ + Node* node = _mapKT[n][l]; + + for(int i =0; i"<<"Node( "<getType()<<" Data : (" + <getData()[0]<<", " + <getData()[1]<<",... )"<getType()==INTERIOR) + for(int childId = 0; childId < _numChild; childId++) + printTree(n+1,this->childTranslation(l,childId)); + } + + + template <> int KaryTree::_rseed = 0; + template <> int KaryTree::_numTrees = 0; + +}; /*fuset*/ + +#endif /*__fuset_KarrayTree_h__*/ diff --git a/src/madness/mra/FuseT/MatrixInnerOp.h b/src/madness/mra/FuseT/MatrixInnerOp.h new file mode 100644 index 00000000000..5841fbf4db0 --- /dev/null +++ b/src/madness/mra/FuseT/MatrixInnerOp.h @@ -0,0 +1,288 @@ +// +// Ghaly +// +// Compresses the function, transforming into wavelet basis. +// Possible non-blocking comm. +// +// By default fence=true meaning that this oepration completes before returning, +// othewise if fence=false it returns without fencing and the user must invoke +// workd.gop.fence() to assure global completion before using the function +// for other purposes. +// +// Noop if already compressed or if not initialized. +// +// Since reconstruction/compression do not discard information we define them +// as const ... "logical constness" not "bitwise contness". +// +#ifndef __MADNESS_MRA_FUSET_MATRIXINNER_OP__INCLUDED__ +#define __MADNESS_MRA_FUSET_MATRIXINNER_OP__INCLUDED__ + +#include "PrimitiveOp.h" +#include "FuseTContainer.h" +#include "../mra.h" +#include "../function_common_data.h" +#include "../../world/MADworld.h" +#include "../../tensor/tensor.h" +#include "../../tensor/gentensor.h" + +namespace madness +{ + template + class MatrixInnerOp : public PrimitiveOp + { + typedef Function KTREE; + typedef FunctionNode KNODE; + typedef Key keyT; + typedef WorldContainer,FunctionNode> dcT; + typedef WorldObject> woT; + typedef GenTensor coeffT; + typedef Tensor tensorT; + + public: + MatrixInnerOp (string opName, KTREE* output, const std::vector& f, const std::vector& g, bool sym, bool dgemm); + FuseTContainer compute (const keyT& key, const FuseTContainer &s); + + bool notEmpty(map& notEmptyMap) const + { + for(auto a:_left) + { + if(notEmptyMap[a->id().get_obj_id()]){ + for(auto b:_right){ + if(notEmptyMap[b->id().get_obj_id()]){ + return true; + } + } + } + } + return false; + //unsigned long treeID = _i1->get_impl()->id().get_obj_id(); + //return notEmptyMap[treeID]; + } + + bool isDone (const keyT& key) const; + bool isPre () const { return true; } // false does not work. but It should be false. + bool needsParameter () const { return false; } + void reduce (World& world); + + public: + // MatrixInnerOpp + Tensor* _r; + + private: + //!Points to operand trees + const KTREE* _i1; + + //!Points to operand nodes of the tree + KNODE *_t1, *_t2; + + //!Variables for MatrixInnerOp + std::vector _left_v_coeffs; + std::vector _right_v_coeffs; + //dcT& _coeffs; + //dcT& _coeffs_target; + + bool _sym; + bool _dgemm; + std::vector* > _left; + std::vector* > _right; + + + int _k; // Wavelet order + }; + + // Constructor + // World is needed for communication in the "compute" function + template + MatrixInnerOp::MatrixInnerOp(string opName, KTREE* output, const std::vector& f, const std::vector& g, bool sym, bool dgemm) + : PrimitiveOp(opName, output, false, true) + , _sym(sym) + , _dgemm(dgemm) + { + + this->_r = new Tensor(f.size(), g.size()); + + for (unsigned int i=0; i_r)(i,j) = 0.0; + + for (unsigned int i=0; iget_coeffs() ); + for (unsigned int j=0; jget_coeffs() ); + + // dependnecy Info PSI, ALPHA, DELTA,SIGMA, ID + this->_OpID = output->get_impl()->id().get_obj_id(); + + for (unsigned int i=0; i_dInfoVec.push_back(DependencyInfo(&f[i], true, false, false, false)); + + for (unsigned int i=0; i_dInfoVec.push_back(DependencyInfo(&g[i], true, false, false, false)); + + this->_dInfoVec.push_back(DependencyInfo(output,true,false,false,false)); + + woT(f[0].world()); + } + + // + // it should hangle both a parent and a leaf node. + // + template + FuseTContainer + MatrixInnerOp::compute(const keyT& key, const FuseTContainer &s) + { + // std::cout<<__func__<<", key: "<* inheritedLeft; + FuseT_VType* inheritedRight; + + inheritedLeft = new FuseT_VType; + inheritedRight = new FuseT_VType; + + for (unsigned int i=0; i<_left.size(); i++) { + if(_left[i]->get_coeffs().probe(key)) { + inheritedLeft->value.push_back(i); + } + } + + for (unsigned int i=0; i<_right.size(); i++) { + if(_right[i]->get_coeffs().probe(key)) { + inheritedRight->value.push_back(i); + } + } + + if(inheritedLeft->value.empty()) + { + return FuseTContainer(); + } + + if(inheritedRight->value.empty()) + { + return FuseTContainer(); + } + + + unsigned int indexLeft; + unsigned int indexRight; + unsigned int leftSize = inheritedLeft->value.size(); + unsigned int rightSize = inheritedRight->value.size(); + + // The Pre-Computatio + // Assumption: the size of coefficient --> 16*16*16 = 4096 + double* A = (double*)malloc(sizeof(double)*16*16*16*leftSize); + double* B = (double*)malloc(sizeof(double)*16*16*16*rightSize); + double* C = (double*)malloc(sizeof(double)*leftSize*rightSize); + unsigned int k,l,m; + + // + // + for (unsigned int i=0; ivalue[i]; + const KNODE& fnode = _left_v_coeffs[indexLeft].find(key).get()->second; + + + // 3D array to 1D array with i for fnode and j for gnode + if (fnode.has_coeff()) + { + for (k=0; k<16; k++) + for (l=0; l<16; l++) + for (m=0; m<16; m++) + A[i*16*16*16 + k*16*16 + l*16 + m] = (fnode.coeff())(k,l,m); + } + else + { + for (k=0; k<16; k++) + for (l=0; l<16; l++) + for (m=0; m<16; m++) + A[i*16*16*16 + k*16*16 + l*16 + m] = 0.0; + } + } + + + // + for (unsigned int i=0; ivalue[i]; + const KNODE& gnode = _right_v_coeffs[indexRight].find(key).get()->second; + + + // 3D array to 1D array with i for fnode and j for gnode + if (gnode.has_coeff()) + { + for (k=0; k<16; k++) + for (l=0; l<16; l++) + for (m=0; m<16; m++) + B[i*16*16*16 + k*16*16 + l*16 + m] = (gnode.coeff())(k,l,m); + } + else + { + for (k=0; k<16; k++) + for (l=0; l<16; l++) + for (m=0; m<16; m++) + B[i*16*16*16 + k*16*16 + l*16 + m] = 0.0; + } + } + + // + for (k=0; kvalue[k]; + for (l=0; lvalue[l]; + (*this->_r)(indexLeft, indexRight) += C[k + l*leftSize]; // k*rightSize + l --> row-major + } + } + + delete A; + delete B; + delete C; + + + return FuseTContainer(); + } + + // isDone + template + bool + MatrixInnerOp::isDone(const keyT& key) const + { + bool isE1= false; + bool isE2 =false; + + + for (unsigned int i=0; i<_left.size(); i++) { + if(_left[i]->get_coeffs().probe(key)) + isE1 = isE1 || _left_v_coeffs[i].find(key).get()->second.has_children(); + } + for (unsigned int i=0; i<_right.size(); i++) { + if(_right[i]->get_coeffs().probe(key)) + isE2 = isE2 || _right_v_coeffs[i].find(key).get()->second.has_children(); + } + + return !(isE1 && isE2); + + } + + template + void + MatrixInnerOp::reduce(World& world){ + world.gop.sum(_r->ptr(),_left.size()*_right.size()); + } + +}; /*fuset*/ + +#endif /* __fuset_MatrixInnerOp_h__ */ diff --git a/src/madness/mra/FuseT/MatrixInnerPreOp.h b/src/madness/mra/FuseT/MatrixInnerPreOp.h new file mode 100644 index 00000000000..99203a8211c --- /dev/null +++ b/src/madness/mra/FuseT/MatrixInnerPreOp.h @@ -0,0 +1,346 @@ +// +// Ghaly +// +// Compresses the function, transforming into wavelet basis. +// Possible non-blocking comm. +// +// By default fence=true meaning that this oepration completes before returning, +// othewise if fence=false it returns without fencing and the user must invoke +// workd.gop.fence() to assure global completion before using the function +// for other purposes. +// +// Noop if already compressed or if not initialized. +// +// Since reconstruction/compression do not discard information we define them +// as const ... "logical constness" not "bitwise contness". +// +#ifndef __MADNESS_MRA_FUSET_MATRIXINNERPRE_OP__INCLUDED__ +#define __MADNESS_MRA_FUSET_MATRIXINNERPRE_OP__INCLUDED__ + +#include "PrimitiveOp.h" +#include "FuseTContainer.h" +#include "../mra.h" +#include "../function_common_data.h" +#include "../../world/MADworld.h" +#include "../../tensor/tensor.h" +#include "../../tensor/gentensor.h" + +namespace madness +{ + template + class MatrixInnerPreOp : public PrimitiveOp + { + typedef Function KTREE; + typedef FunctionNode KNODE; + typedef Key keyT; + typedef WorldContainer,FunctionNode> dcT; + typedef WorldObject> woT; + typedef GenTensor coeffT; + typedef Tensor tensorT; + + public: + MatrixInnerPreOp (string opName, KTREE* output, const std::vector& f, const std::vector& g, bool sym); + FuseTContainer compute (const keyT& key, const FuseTContainer &s); + + bool notEmpty(map& notEmptyMap) const + { + for(auto a:_left) + { + if(notEmptyMap[a->id().get_obj_id()]){ + for(auto b:_right){ + if(notEmptyMap[b->id().get_obj_id()]){ + return true; + } + } + } + } + return false; + //unsigned long treeID = _i1->get_impl()->id().get_obj_id(); + //return notEmptyMap[treeID]; + } + + bool isDone (const keyT& key) const; + bool isPre () const { return true; } // false does not work. but It should be false. + bool needsParameter () const { return true; } + void reduce (World& world); + + public: + // MatrixInnerPreOpp + Tensor* _r; + + private: + //!Points to operand trees + const KTREE* _i1; + + //!Points to operand nodes of the tree + KNODE *_t1, *_t2; + + //!Variables for MatrixInnerPreOp + std::vector _left_v_coeffs; + std::vector _right_v_coeffs; + //dcT& _coeffs; + //dcT& _coeffs_target; + + bool _sym; + std::vector* > _left; + std::vector* > _right; + + std::map checkKeyDoneLeft; + std::map checkKeyDoneRight; + + std::map candidatesLeft; + std::map candidatesRight; + + int _k; // Wavelet order + }; + + // Constructor + // World is needed for communication in the "compute" function + template + MatrixInnerPreOp::MatrixInnerPreOp(string opName, KTREE* output, const std::vector& f, const std::vector& g, bool sym) + : PrimitiveOp(opName, output, false, true) + , _sym(sym) + { + this->_r = new Tensor(f.size(), g.size()); + + for (unsigned int i=0; i_r)(i,j) = 0.0; + + for (unsigned int i=0; iget_coeffs() ); + for (unsigned int j=0; jget_coeffs() ); + + // dependnecy Info PSI, ALPHA, DELTA,SIGMA, ID + this->_OpID = output->get_impl()->id().get_obj_id(); + + for (unsigned int i=0; i_dInfoVec.push_back(DependencyInfo(&f[i], true, false, false, false)); + + for (unsigned int i=0; i_dInfoVec.push_back(DependencyInfo(&g[i], true, false, false, false)); + + this->_dInfoVec.push_back(DependencyInfo(output,true,true,false,false)); + + woT(f[0].world()); + } + + // + // it should hangle both a parent and a leaf node. + // + template + FuseTContainer + MatrixInnerPreOp::compute(const keyT& key, const FuseTContainer &s) + { + FuseT_VParameter* inheritedWhole; + FuseT_VType* inheritedLeft; + FuseT_VType* inheritedRight; + + inheritedLeft = new FuseT_VType; + inheritedRight = new FuseT_VType; + + //cout<<"Running middle"<; + inheritedRight = new FuseT_VType; + + for (unsigned int i=0; i<_left.size(); i++) + inheritedLeft->value.push_back(i); + for (unsigned int i=0; i<_right.size(); i++) + inheritedRight->value.push_back(i); + } + else + { + inheritedWhole = new FuseT_VParameter( ((FuseT_VParameter*)s.get())->value ); + + for (unsigned int i=0; i<_left.size(); i++){ + if(_left[i]->get_coeffs().probe(key)){ + + inheritedRight->value.push_back(i); + } + } + + for (unsigned int i=0; i<_right.size(); i++){ + if(_right[i]->get_coeffs().probe(key)){ + inheritedRight->value.push_back(i); + } + + } + + + inheritedLeft = new FuseT_VType(((FuseT_VType*)(((inheritedWhole->value[0]).get())))->value); + inheritedRight = new FuseT_VType(((FuseT_VType*)(((inheritedWhole->value[1]).get())))->value); + } + + FuseT_VType whichNodesLeft; // value = std::vector + FuseT_VType whichNodesRight; + + unsigned int indexLeft; + unsigned int indexRight; + unsigned int leftSize = inheritedLeft->value.size(); + unsigned int rightSize = inheritedRight->value.size(); + + // The Pre-Computatio + // Assumption: the size of coefficient --> 16*16*16 = 4096 + double* A = (double*)malloc(sizeof(double)*16*16*16*leftSize); + double* B = (double*)malloc(sizeof(double)*16*16*16*rightSize); + double* C = (double*)malloc(sizeof(double)*leftSize*rightSize); + unsigned int k,l,m; + + // + for (unsigned int i=0; ivalue[i]; + const KNODE& fnode = _left_v_coeffs[indexLeft].find(key).get()->second; + + if (_left_v_coeffs[indexLeft].find(key).get()->second.has_children()) + whichNodesLeft.value.push_back(indexLeft); + + // 3D array to 1D array with i for fnode and j for gnode + if (fnode.has_coeff()) + { + for (k=0; k<16; k++) + for (l=0; l<16; l++) + for (m=0; m<16; m++) + A[i*16*16*16 + k*16*16 + l*16 + m] = (fnode.coeff())(k,l,m); + } + else + { + for (k=0; k<16; k++) + for (l=0; l<16; l++) + for (m=0; m<16; m++) + A[i*16*16*16 + k*16*16 + l*16 + m] = 0.0; + } + } + + + // + for (unsigned int i=0; ivalue[i]; + const KNODE& gnode = _right_v_coeffs[indexRight].find(key).get()->second; + + if (_right_v_coeffs[indexRight].find(key).get()->second.has_children()) + whichNodesRight.value.push_back(indexRight); + + // 3D array to 1D array with i for fnode and j for gnode + if (gnode.has_coeff()) + { + for (k=0; k<16; k++) + for (l=0; l<16; l++) + for (m=0; m<16; m++) + B[i*16*16*16 + k*16*16 + l*16 + m] = (gnode.coeff())(k,l,m); + } + else + { + for (k=0; k<16; k++) + for (l=0; l<16; l++) + for (m=0; m<16; m++) + B[i*16*16*16 + k*16*16 + l*16 + m] = 0.0; + } + } + + // + for (k=0; kvalue[k]; + for (l=0; lvalue[l]; + (*this->_r)(indexLeft, indexRight) += C[k + l*leftSize]; // k*rightSize + l --> row-major + } + } + + delete A; + delete B; + delete C; + + + + if (whichNodesLeft.value.size() == 0) + checkKeyDoneLeft.insert(std::pair(key,true)); + else + checkKeyDoneLeft.insert(std::pair(key,false)); + + if (whichNodesRight.value.size() == 0) + checkKeyDoneRight.insert(std::pair(key,true)); + else + checkKeyDoneRight.insert(std::pair(key,false)); + + + // + FuseT_VParameter v_parameter; + FuseT_VParameter inner_parameter; + + FuseTContainer candiParameter_L(static_cast*> (new FuseT_VType(whichNodesLeft.value))); + FuseTContainer candiParameter_R(static_cast*> (new FuseT_VType(whichNodesRight.value))); + inner_parameter.value.push_back(candiParameter_L); + inner_parameter.value.push_back(candiParameter_R); + + for (KeyChildIterator kit(key); kit; ++kit) + { + FuseTContainer wrapper(static_cast*>(new FuseT_VParameter(inner_parameter.value))); + v_parameter.value.push_back(wrapper); + } + + // Return Parameters + FuseTContainer targets(static_cast*>(new FuseT_VParameter(v_parameter.value))); + //cout<<"exiting compute"< + bool + MatrixInnerPreOp::isDone(const keyT& key) const + { + bool isE1; + bool isE2; + + // O(M + N) + for (unsigned int i=0; i<_left.size(); i++) + { + isE1 = _left[i]->get_coeffs().probe(key) || isE1; + } + if (!isE1) { std::cout<get_coeffs().probe(key) || isE2; + } + if (!isE2) { std::cout<second) return true; + if (checkKeyDoneRight.find(key)->second) return true; + + return false; + } + + template + void + MatrixInnerPreOp::reduce(World& world){ + world.gop.sum(_r->ptr(),_left.size()*_right.size()); + } + +}; /*fuset*/ + +#endif /* __fuset_MatrixInnerPreOp_h__ */ diff --git a/src/madness/mra/FuseT/MultiplyOp.h b/src/madness/mra/FuseT/MultiplyOp.h new file mode 100644 index 00000000000..2efbc477fe7 --- /dev/null +++ b/src/madness/mra/FuseT/MultiplyOp.h @@ -0,0 +1,245 @@ + +// Ghaly +// +// Reconstructs the function, transforming into scaling function basis. Possible non-blocking comm. +// +#ifndef __MADNESS_MRA_FUSET_MULTIPLY_OP__INCLUDED__ +#define __MADNESS_MRA_FUSET_MULTIPLY_OP__INCLUDED__ + +#include "PrimitiveOp.h" +#include "../function_common_data.h" +#define DEBUG 0 + +namespace madness +{ + + template + class MultiplyOp : public PrimitiveOp + { + public: + typedef Function KTREE; + typedef FunctionNode KNODE; + typedef Key keyT; + typedef WorldContainer,FunctionNode> dcT; + + typedef GenTensor coeffT; + typedef Tensor tensorT; + + public: + MultiplyOp(string opName, KTREE* output, const KTREE* i1, const KTREE* i2, double tol); + FuseTContainer compute(const keyT& key, const FuseTContainer &s); + + bool isDone(const keyT& key) const; + bool isPre() const { return true; } + bool needsParameter() const { return true; } + void reduce(World& world){} + public: + std::vector child_patch(const keyT& child) const; + void do_mul(const keyT& key, const Tensor& left, const std::pair< keyT, Tensor >& arg); + + double truncate_tol(double tol, const keyT& key) const; + + private: + //!Points to operand trees + const KTREE* _i1; + const KTREE* _i2; + double _tol; + //!Variables for MultiplyOp + const FunctionCommonData& _cdata; + TensorArgs _targs; + }; + + // Constructor + template + MultiplyOp::MultiplyOp(string opName, KTREE* output, const KTREE* i1, const KTREE* i2, double tol) + : PrimitiveOp(opName, output, false) + , _i1(i1) + , _i2(i2) + , _cdata(FunctionCommonData::get(i1->k())) + , _targs(i1->get_impl()->get_tensor_args()) + , _tol(tol) + { + //dependnecy Info PSI, ALPHA, DELTA,SIGMA, ID + this->_OpID = output->get_impl()->id().get_obj_id(); + this->_dInfoVec.push_back(DependencyInfo(i1,true,true,false,false)); + this->_dInfoVec.push_back(DependencyInfo(i2,true,true,false,false)); + this->_dInfoVec.push_back(DependencyInfo(output,true,true,false,false)); + } + + template + FuseTContainer + MultiplyOp::compute(const keyT& key, const FuseTContainer &s) + { + typedef typename FunctionImpl::dcT::const_iterator literT; + typedef typename FunctionImpl::dcT::const_iterator riterT; + double lnorm=1e99, rnorm=1e99; + std::vector parameter; + + if(s.get() == 0){ + parameter = std::vector(2); + + }else if (s.what() == WHAT_AM_I::FuseT_VCoeffT) { + parameter = ((FuseT_VCoeffT*)s.get())->value; + + delete (s.data); + + //printf ("target: %p vs. parameter: %p\n", ¶meter, &(((FuseT_VCoeffT*)s.get())->value)); + + }else{ + cerr<<"This should not have happenned"< lc = parameter[0]; + if (lc.size() == 0) { + literT it = _i1->get_impl()->get_coeffs().find(key).get(); + MADNESS_ASSERT(it != _i1->get_impl()->get_coeffs().end()); + lnorm = it->second.get_norm_tree(); + if (it->second.has_coeff()) + lc = it->second.coeff().full_tensor_copy(); + } + + Tensor rc = parameter[1]; + if (rc.size() == 0) { + riterT it = _i2->get_impl()->get_coeffs().find(key).get(); + MADNESS_ASSERT(it != _i2->get_impl()->get_coeffs().end()); + rnorm = it->second.get_norm_tree(); + if (it->second.has_coeff()) + rc = it->second.coeff().full_tensor_copy(); + } + + // both nodes are leaf nodes: multiply and return + if (rc.size() && lc.size()) { // Yipee! + do_mul(key, lc, std::make_pair(key,rc)); + return FuseTContainer(); + } + + if (_tol) { + if (lc.size()) + lnorm = lc.normf(); // Otherwise got from norm tree above + if (rc.size()) + rnorm = rc.normf(); + if (lnorm*rnorm < truncate_tol(_tol, key)) { + this->_result->get_impl()->get_coeffs().replace(key, KNODE(coeffT(_cdata.vk,_targs),false)); // Zero leaf node + return FuseTContainer(); + } + } + + // Recur down + this->_result->get_impl()->get_coeffs().replace(key, KNODE(coeffT(),true)); // Interior node + + Tensor lss; + if (lc.size()) { + Tensor ld(_cdata.v2k); + ld(_cdata.s0) = lc(___); + lss = _i1->get_impl()->unfilter(ld); + } + + Tensor rss; + if (rc.size()) { + Tensor rd(_cdata.v2k); + rd(_cdata.s0) = rc(___); + rss = _i2->get_impl()->unfilter(rd); + } + + + + FuseT_VParameter* v_parameter = new FuseT_VParameter(); + + for (KeyChildIterator kit(key); kit; ++kit) { + const keyT& child = kit.key(); + FuseT_VCoeffT* tvec = new FuseT_VCoeffT(2); + if (lc.size()) + tvec->value[0] = copy(lss(child_patch(child))); + if (rc.size()) + tvec->value[1] = copy(rss(child_patch(child))); + + FuseTContainer wrapper(static_cast*>(tvec)); + v_parameter->value.push_back(wrapper); + + } + FuseTContainer temp(static_cast*>(v_parameter)); + return temp; + + + } + + + template + std::vector + MultiplyOp::child_patch(const keyT& child) const + { + std::vector s(NDIM); + const Vector& l = child.translation(); + for (std::size_t i = 0; i + bool MultiplyOp::isDone(const keyT& key) const + { + bool isE1 = this->_result->get_impl()->get_coeffs().probe(key); + if(!isE1) return isE1; + bool isLeaf = !this->_result->get_impl()->get_coeffs().find(key).get()->second.has_children(); + return isLeaf; + } + + + template + void MultiplyOp::do_mul(const keyT& key, const Tensor& left, const std::pair< keyT, Tensor >& arg) { + PROFILE_MEMBER_FUNC(FunctionImpl); + const keyT& rkey = arg.first; + const Tensor& rcoeff = arg.second; + + Tensor rcube = this->_result->get_impl()->fcube_for_mul(key, rkey, rcoeff); + //madness::print("do_mul: l", key, left.size()); + Tensor lcube = this->_result->get_impl()->fcube_for_mul(key, key, left); + + + Tensor tcube(_cdata.vk,false); + TERNARY_OPTIMIZED_ITERATOR(T, tcube, T, lcube, T, rcube, *_p0 = *_p1 * *_p2;); + double scale = pow(0.5,0.5*NDIM*key.level())*sqrt(FunctionDefaults::get_cell_volume()); + tcube = transform(tcube,_cdata.quad_phiw).scale(scale); + if(DEBUG && key == keyT(0)){ + std::cout<<"Computed Coeff at "<_result->get_impl()->get_coeffs().replace(key, KNODE(coeffT(tcube),false)); + return; + } + + template + double MultiplyOp::truncate_tol(double tol, const keyT& key) const { + const static double fac=1.0/std::pow(2,NDIM*0.5); + tol*=fac; + + // RJH ... introduced max level here to avoid runaway + // refinement due to truncation threshold going down to + // intrinsic numerical error + const int MAXLEVEL1 = 20; // 0.5**20 ~= 1e-6 + const int MAXLEVEL2 = 10; // 0.25**10 ~= 1e-6 + int truncate_mode = 1; + if (truncate_mode == 0) { + return tol; + } + else if (truncate_mode == 1) { + double L = FunctionDefaults::get_cell_min_width(); + return tol*std::min(1.0,pow(0.5,double(std::min(key.level(),MAXLEVEL1)))*L); + } + else if (truncate_mode == 2) { + double L = FunctionDefaults::get_cell_min_width(); + return tol*std::min(1.0,pow(0.25,double(std::min(key.level(),MAXLEVEL2)))*L*L); + } + else { + MADNESS_EXCEPTION("truncate_mode invalid",truncate_mode); + } + } + + + +}; /*fuset*/ + +#endif /* __fuset_MultiplyOp_h__ */ diff --git a/src/madness/mra/FuseT/Node.h b/src/madness/mra/FuseT/Node.h new file mode 100644 index 00000000000..aaeaf6c56bb --- /dev/null +++ b/src/madness/mra/FuseT/Node.h @@ -0,0 +1,139 @@ +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace fuset { + + + //#define DEBUG 1 + using namespace std; + enum NodeType {EMPTY, INTERIOR, LEAF}; + + + template + class Node { + public: + Node(); + + Node(const Node& node); + + Node(NodeType nt, const std::vector &data); + + Node(NodeType nt, int length); + + //FIXME: not the right way to delete. object maybe stack allocated + void deleteEmptyNode(); + + //!sets the data at this node to zero + void setZero(); + + void setVal(Type val); + + //!set random data + void setRandom(); + + + inline NodeType getType() { return _nt; } + // const std::vector &data(); + // //Type* operator()(){return _data;} + // //int getLength(){return _dataLength;} + // size_t size(); + + // void deleteData(); + // void setType(NodeType nt); + // void setData(const std::vector& data); + // void setNode(NodeType nt, const std::vector& data); + // Node& operator=(Node& node); + + inline const std::vector& getData() const { return _data;} + inline std::vector& getData() { return _data;} + //Type* operator()(){return _data;} + inline int getLength(){return _data.size();} + + inline void deleteData() { _data.clear(); } + + inline void setType(NodeType nt) {_nt = nt;} + inline void setData(const std::vector& data) {_data = data;} + inline void setNode(NodeType nt, const std::vector& data){_nt = nt; _data = data;} + inline Node& operator=(Node& node) { swap(node); return *this; } + + inline Type innerLocal(){ + Type retValue = (Type)0; + if(_nt != EMPTY) + for(auto d : _data) + retValue+=d*d; + else + cerr<<"Error at inner Local"< _data; + }; + + + template + Node::Node() + : _nt(EMPTY) {} + + template + Node::Node(const Node& node) + : _nt(node.nt), + _data(node.data) {} + + template + Node::Node(NodeType nt, const std::vector &data) + : _nt(nt), + _data(data) {} + + /*!creates a node and allocates memory for data + also initialized the data to 0.0*/ + template + Node::Node(NodeType nt, int length) + : _nt(nt), + _data(length) {} + + //not the right way to delete. object maybe stack allocated + template + void + Node::deleteEmptyNode() { + if(_nt == EMPTY) + delete this; + } + + //!sets the data at this node to zero + template + void + Node::setZero() { + setVal(Type(0)); + } + + template + void + Node::setVal(Type val){ + if(_data.empty()) + std::cerr<<"Writing to NULL data in setZero"< + void + Node::setRandom() { + std::generate(_data.begin(), _data.end(), RandomNumber); + if(_data.empty()) + std::cerr<<"Writing to NULL data in SetRandom"< + class NothingOp : public PrimitiveOp { + + typedef Function KTREE; + typedef FunctionNode KNODE; + typedef Key keyT; + typedef WorldContainer , FunctionNode > dcT; + +///< Type of container holding the nodes + + public: + NothingOp(string opName, KTREE* output, const KTREE* i1); + + void compute (const keyT& key) { } + FuseTContainer compute(const keyT& key, const FuseTContainer &s); + + //Future> afterComputeB(const keyT& key, const std::vector>> &v) { } + //Future> postCompute(const keyT& key, const std::vector>> &s) { } + + bool isDone(const keyT& key) const; + bool isPre() const { return true; } + bool needsParameter() const { return false; } + + private: + //!Points to operand trees + const KTREE* _i1; + dcT& _coeffs; + + //!Points to operand nodes of the tree + KNODE *_t1, *_t2; + }; + + template + NothingOp::NothingOp(string opName, KTREE* output, const KTREE* i1) + : PrimitiveOp(opName, output, false), + _i1(i1) + , _coeffs(i1->get_impl()->get_coeffs()) + { + //dependnecy Info PSI, ALPHA, DELTA,SIGMA, ID + this->_OpID = output->get_impl()->id().get_obj_id(); + this->_dInfoVec.push_back(DependencyInfo(i1,true,false,false,false)); + this->_dInfoVec.push_back(DependencyInfo(output,true,false,false,false)); + + } + + template + FuseTContainer + NothingOp::compute(const keyT& key, const FuseTContainer &s) + { + std::cout<<"["<<__func__<<"] "<get_impl()->get_coeffs().find(key).get(); + + KNODE& node = _coeffs.find(key).get()->second; + FuseTContainer temp; + return temp; + } + + + template + bool NothingOp::isDone(const keyT& key) const { + + bool isE1 = _i1->get_impl()->get_coeffs().probe(key); + if(!isE1) return isE1; + bool isLeaf = !_i1->get_impl()->get_coeffs().find(key).get()->second.has_children(); + return isLeaf; + + } + +}; /*fuset*/ + +#endif /* __fuset_CopyOp_h__ */ diff --git a/src/madness/mra/FuseT/OpExecutor.h b/src/madness/mra/FuseT/OpExecutor.h new file mode 100644 index 00000000000..65aafb182ae --- /dev/null +++ b/src/madness/mra/FuseT/OpExecutor.h @@ -0,0 +1,178 @@ +#ifndef __MADNESS_MRA_FUSET_OPEXECUTOR_H__INCLUDED__ +#define __MADNESS_MRA_FUSET_OPEXECUTOR_H__INCLUDED__ + +#define DEBUG_1 +#define DEBUG 0 + +#include +#include +#include "PrimitiveOp.h" + +namespace madness +{ + /*!This class takes a primitive Op and computes the result tree*/ + template + class OpExecutor: public WorldObject> + { + typedef OpExecutor oeT; + typedef Key keyT; + typedef WorldObject woT; + typedef WorldContainer,FunctionNode> dcT; + typedef FunctionNode KNODE; + typedef GenTensor coeffT; + + public: + OpExecutor(World& world) + : woT(world) + ,_world(world) + { woT::process_pending(); } + + Future> traverseTree (const keyT key, const FuseTContainer &s); + FuseTContainer PostCompute (const keyT key, const std::vector>> &v); + void execute (PrimitiveOp* pOp, bool isBlocking); + + Future< FuseTContainer > traverseTreeFuture(const keyT key, const FuseTContainer &s, bool skipPre); + + private: + World& _world; + PrimitiveOp* _pOp; + dcT* coeffs; + Function* _result; + }; + + // + template + Future< FuseTContainer > + OpExecutor::traverseTree(const keyT key, const FuseTContainer &s) + { + // std::cout <<__func__<<", key: "< temp; + + // Pre-Computation + if (_pOp->isPre()) + temp = _pOp->compute(key, s); + + std::vector< Future > > v; + if (!_pOp->isDone(key)) + { + if (!_pOp->isPre() && _pOp->needsParameter()) + v = future_vector_factory >(1< kit(key); kit; ++kit, ++i) + { + const keyT& child = kit.key(); + + if (_pOp->needsParameter()) + { + if (_pOp->isPre()) + woT::task(coeffs->owner(child), &OpExecutor::traverseTree, child, ((FuseT_VParameter*)(temp.get()))->value[i]); + else + v[i] = woT::task(coeffs->owner(child), &OpExecutor::traverseTree, child, temp); + } + else + woT::task(coeffs->owner(child), &OpExecutor::traverseTree, child, temp); + } + } + + // Post-Computation + if (!_pOp->isPre()) + { + Future> returnVal = woT::task(_world.rank(), &OpExecutor::PostCompute, key, v); + + if (!v.empty()) + { + v.clear(); + } + + return returnVal; + } + + // for Pre-Computation (temporal) + Future> retValue(temp); + return retValue; + } + + + // + template + FuseTContainer + OpExecutor::PostCompute(const keyT key, const std::vector> > &v) + { + vector > temp; + for(auto a: v) + temp.push_back(a.get()); + return _pOp->compute(key, FuseTContainer(static_cast*>(new FuseT_VParameter(temp)))); + } + + + //implementation to handle only the derivative operator + template + Future< FuseTContainer > + OpExecutor::traverseTreeFuture(const keyT key, const FuseTContainer &s, bool skipPre) + { + Future > retPreCompute; + FuseTContainer temp; + // Pre-Computation + if (!skipPre){ + retPreCompute = _pOp->computeFuture(key, s); + return woT::task(_world.rank(), &OpExecutor::traverseTreeFuture, key, retPreCompute, true); + }else{ + temp = s; + } + + + if (!_pOp->isDone(key)) + { + int i = 0; + for (KeyChildIterator kit(key); kit; ++kit, ++i) + { + const keyT& child = kit.key(); + + if (_pOp->needsParameter()) + woT::task(coeffs->owner(child), &OpExecutor::traverseTreeFuture, child, ((FuseT_VParameter*)(temp.get()))->value[i],false); + else + woT::task(coeffs->owner(child), &OpExecutor::traverseTreeFuture, child, temp,false); + } + } + + // for Pre-Computation (temporal) + Future> retValue(temp); + return retValue; + } + + + + + // execute with operators + template + void + OpExecutor::execute(PrimitiveOp* pOp, bool hasParameters) + { + _pOp = pOp; + _result = _pOp->_result; + coeffs = &_result->get_impl()->get_coeffs(); + + if (_world.rank() == coeffs->owner(keyT(0)) ) + { + FuseTContainer initParameter; + FuseTContainer root; + + if(pOp->returnsFuture()){ + root = traverseTreeFuture(keyT(0), initParameter, false); + }else{ + root = traverseTree(keyT(0), initParameter); + } + } + + pOp->setComplete(true); + _world.gop.fence(); + + //for operators that require a reduction at the end + if(pOp->_isReduce) + pOp->reduce(_world); + } + +}; /*fuset*/ + +#endif /*__fuset_OpExecutor_h__*/ + diff --git a/src/madness/mra/FuseT/OpExecutor_new.h b/src/madness/mra/FuseT/OpExecutor_new.h new file mode 100644 index 00000000000..43fc7196838 --- /dev/null +++ b/src/madness/mra/FuseT/OpExecutor_new.h @@ -0,0 +1,150 @@ +#ifndef __MADNESS_MRA_FUSET_OPEXECUTOR_H__INCLUDED__ +#define __MADNESS_MRA_FUSET_OPEXECUTOR_H__INCLUDED__ + +#define DEBUG_1 +#define DEBUG 0 + +#include +#include +#include "PrimitiveOp.h" + +namespace madness +{ + /*!This class takes a primitive Op and computes the result tree*/ + template + class OpExecutor: public WorldObject> + { + typedef OpExecutor oeT; + typedef Key keyT; + typedef WorldObject woT; + typedef WorldContainer,FunctionNode> dcT; + typedef FunctionNode KNODE; + typedef GenTensor coeffT; + + public: + OpExecutor(World& world) + : woT(world) + ,_world(world) + { + woT::process_pending(); + } + + Future> traverseTree (const keyT key, const FuseTContainer &s); + FuseTContainer PostCompute (const keyT key, const std::vector< Future < FuseTContainer > > &v); + + //Starts the computation of pOp + void execute (PrimitiveOp* pOp, bool isBlocking); + + private: + World& _world; + PrimitiveOp* _pOp; + dcT* coeffs; + + //pointer to the resulting kary tree + Function* _result; + }; + + // + // + template + Future< FuseTContainer > + OpExecutor::traverseTree(const keyT key, const FuseTContainer &s) + { +/* + if(DEBUG){ + cout<<" Key : "< temp; + + // Pre-Computation + if (_pOp->isPre()) + { + temp = _pOp->compute(key, s); + } + + //std::vector< Future > > v;// = future_vector_factory >(1< > > v;// = future_vector_factory >(1<isDone(key)) + { + if (!_pOp->isPre()) + v = future_vector_factory >(1< kit(key); kit; ++kit, ++i) + { + const keyT& child = kit.key(); + + if (_pOp->needsParameter()) + { + if (_pOp->isPre()) + { + woT::task(coeffs->owner(child), &OpExecutor::traverseTree, child, ((FuseT_VParameter*)(temp.get()))->value[i]); + } + else + { + v[i] = woT::task(coeffs->owner(child), &OpExecutor::traverseTree, child, temp); + } + } + else + { + woT::task(coeffs->owner(child), &OpExecutor::traverseTree, child, temp); + } + } + } + + // Post-Computation + if (!_pOp->isPre()) + { + //std::cout<<"Running again"<> sum(static_cast*>(new FuseT_VParameter(k.value))); + return woT::task(_world.rank(), &OpExecutor::PostCompute, key, v); + // _world.gop.fence(); + } + + // for Pre-Computation (temporal) + Future> retValue(temp); + return retValue; + } + + // + template + FuseTContainer + OpExecutor::PostCompute(const keyT key, const std::vector> > &v) + { + vector > temp; + for(auto a: v) + temp.push_back(a.get()); + + return _pOp->compute(key, FuseTContainer(static_cast*>(new FuseT_VParameter(temp)))); + } + + // execute with operators + template + void + OpExecutor::execute(PrimitiveOp* pOp, bool hasParameters) + { + _pOp = pOp; + _result = _pOp->_result; + coeffs = &_result->get_impl()->get_coeffs(); + + if (_world.rank() == coeffs->owner(keyT(0)) ) + { + FuseTContainer initParameter; + FuseTContainer root = traverseTree(keyT(0), initParameter); + } + + pOp->setComplete(true); + _world.gop.fence(); + + //for operators that require a reduction at the end + if(pOp->_isReduce) + pOp->reduce(_world); + } + +}; /*fuset*/ + +#endif /*__fuset_OpExecutor_h__*/ + diff --git a/src/madness/mra/FuseT/OpExecutor_prev.h b/src/madness/mra/FuseT/OpExecutor_prev.h new file mode 100644 index 00000000000..efc8373459b --- /dev/null +++ b/src/madness/mra/FuseT/OpExecutor_prev.h @@ -0,0 +1,121 @@ +#ifndef __MADNESS_MRA_FUSET_OPEXECUTOR_H__INCLUDED__ +#define __MADNESS_MRA_FUSET_OPEXECUTOR_H__INCLUDED__ + +#define DEBUG_1 + +#include +#include "PrimitiveOp.h" + +namespace madness +{ + /*!This class takes a primitive Op and computes the result tree*/ + template + class OpExecutor: public WorldObject> + { + typedef OpExecutor oeT; + typedef Key keyT; + typedef WorldObject woT; + typedef WorldContainer,FunctionNode> dcT; + typedef FunctionNode KNODE; + typedef GenTensor coeffT; + + public: + OpExecutor(World& world) + : woT(world) + ,_world(world) + { woT::process_pending();} + + FuseTContainer traverseTree (const keyT key, FuseTContainer &s); + FuseTContainer PostCompute (const keyT key, FuseTContainer &v); + + //Starts the computation of pOp + void execute (PrimitiveOp* pOp, bool isBlocking); + + private: + World& _world; + PrimitiveOp* _pOp; + dcT* coeffs; + + //pointer to the resulting kary tree + Function* _result; + }; + + // + // + template + FuseTContainer + OpExecutor::traverseTree(const keyT key, FuseTContainer &s) + { + FuseTContainer temp; + + // Pre-Computation + if (_pOp->isPre()) + { + temp = _pOp->compute(key, s); + } + + FuseT_VParameter k; + if (!_pOp->isDone(key)) + { + int i = 0; + for (KeyChildIterator kit(key); kit; ++kit, ++i) + { + const keyT& child = kit.key(); + if (temp.what() == WHAT_AM_I::FuseT_VCoeffT) + { + FuseTContainer single(static_cast*>(new FuseT_CoeffT( ((FuseT_VCoeffT*)temp.get())->value[i] ))); + woT::task(coeffs->owner(child), &OpExecutor::traverseTree, child, single); + } + else + { + if (!_pOp->isPre()) + k.value.push_back(woT::task(coeffs->owner(child), &OpExecutor::traverseTree, child, temp)); + else + woT::task(coeffs->owner(child), &OpExecutor::traverseTree, child, temp); + } + } + } + + // Post-Computation + if (!_pOp->isPre()) + { + Future> sum(static_cast*>(new FuseT_VParameter(k.value))); + return woT::task(_world.rank(), &OpExecutor::PostCompute, key, sum); + _world.gop.fence(); + } + + // for Pre-Computation (temporal) + return temp; + } + + // + template + FuseTContainer + OpExecutor::PostCompute(const keyT key, FuseTContainer &v) + { + return _pOp->compute(key, v); + } + + // execute with operators + template + void + OpExecutor::execute(PrimitiveOp* pOp, bool hasParameters) + { + _pOp = pOp; + _result = _pOp->_result; + coeffs = &_result->get_impl()->get_coeffs(); + + if (_world.rank() == coeffs->owner(keyT(0)) ) + { + FuseTContainer initParameter; + FuseTContainer root = traverseTree(keyT(0), initParameter); + } + + pOp->setComplete(true); + _world.gop.fence(); + } + +}; /*fuset*/ + +#endif /*__fuset_OpExecutor_h__*/ + diff --git a/src/madness/mra/FuseT/OpExecutor_prev_revised.h b/src/madness/mra/FuseT/OpExecutor_prev_revised.h new file mode 100644 index 00000000000..4ac7f5903ad --- /dev/null +++ b/src/madness/mra/FuseT/OpExecutor_prev_revised.h @@ -0,0 +1,116 @@ +#ifndef __MADNESS_MRA_FUSET_OPEXECUTOR_H__INCLUDED__ +#define __MADNESS_MRA_FUSET_OPEXECUTOR_H__INCLUDED__ + +#define DEBUG_1 + +#include +#include "PrimitiveOp.h" + +namespace madness +{ + /*!This class takes a primitive Op and computes the result tree*/ + template + class OpExecutor: public WorldObject> + { + typedef OpExecutor oeT; + typedef Key keyT; + typedef WorldObject woT; + typedef WorldContainer,FunctionNode> dcT; + typedef FunctionNode KNODE; + typedef GenTensor coeffT; + + public: + OpExecutor(World& world) + : woT(world) + ,_world(world) + { woT::process_pending();} + + FuseTContainer traverseTree (const keyT key, FuseTContainer &s); + FuseTContainer PostCompute (const keyT key, FuseTContainer &v); + + //Starts the computation of pOp + void execute (PrimitiveOp* pOp, bool isBlocking); + + private: + World& _world; + PrimitiveOp* _pOp; + dcT* coeffs; + + //pointer to the resulting kary tree + Function* _result; + }; + + // + // + template + FuseTContainer + OpExecutor::traverseTree(const keyT key, FuseTContainer &s) + { + FuseTContainer temp; + + // Pre-Computation + if (_pOp->isPre()) + temp = _pOp->compute(key, s); + + FuseT_VParameter k; + if (!_pOp->isDone(key)) + { + int i = 0; + for (KeyChildIterator kit(key); kit; ++kit, ++i) + { + const keyT& child = kit.key(); + + if (_pOp->needsParameter()) + { + if (_pOp->isPre()) + woT::task(coeffs->owner(child), &OpExecutor::traverseTree, child, ((FuseT_VParameter*)(temp.get()))->value[i]); + else + k.value.push_back(woT::task(coeffs->owner(child), &OpExecutor::traverseTree, child, temp)); + } + else + woT::task(coeffs->owner(child), &OpExecutor::traverseTree, child, temp); + } + } + + // Post-Computation + if (!_pOp->isPre()) + { + Future> sum(static_cast*>(new FuseT_VParameter(k.value))); + return woT::task(_world.rank(), &OpExecutor::PostCompute, key, sum); + } + + // for Pre-Computation (temporal) + return temp; + } + + // + template + FuseTContainer + OpExecutor::PostCompute(const keyT key, FuseTContainer &v) + { + return _pOp->compute(key, v); + } + + // execute with operators + template + void + OpExecutor::execute(PrimitiveOp* pOp, bool hasParameters) + { + _pOp = pOp; + _result = _pOp->_result; + coeffs = &_result->get_impl()->get_coeffs(); + + if (_world.rank() == coeffs->owner(keyT(0)) ) + { + FuseTContainer initParameter; + FuseTContainer root = traverseTree(keyT(0), initParameter); + } + + pOp->setComplete(true); + _world.gop.fence(); + } + +}; /*fuset*/ + +#endif /*__fuset_OpExecutor_h__*/ + diff --git a/src/madness/mra/FuseT/PrimitiveOp.h b/src/madness/mra/FuseT/PrimitiveOp.h new file mode 100644 index 00000000000..e35c628c253 --- /dev/null +++ b/src/madness/mra/FuseT/PrimitiveOp.h @@ -0,0 +1,107 @@ +#ifndef __MADNESS_MRA_FUSET_PRIMITIVE_OP_H__INCLUDED__ +#define __MADNESS_MRA_FUSET_PRIMITIVE_OP_H__INCLUDED__ + +#include "madness/mra/mra.h" +#include "madness/world/archive.h" +#include +#include "FuseTContainer.h" + +namespace madness +{ + using namespace std; +/* !spatial relation, Same Node Relation (PSI), Some Sibling Relation (SIGMA), Ancestor Relation (ALPHA), + * Descendent Relation (DELTA). Dependency Info contains information about type of dependency to parameters*/ + template + struct DependencyInfo + { + const Function* _producerTree; + string _treeName; + unsigned long _treeID; + + bool _psi; + bool _alpha; + bool _delta; + bool _sigma; + +/* !this is used by FuseT. _producerIndex stores the index of operator that produces this tree/ + * _consumerIndex stores the index of the operator that consumes this tree*/ + int _producerIndex; + int _consumerIndex; + + DependencyInfo(const Function* producerTree, bool psi, bool alpha, bool delta, bool sigma) + { + _psi = psi; + _alpha = alpha; + _delta = delta; + _sigma = sigma; + + _producerTree = producerTree; + _treeID = _producerTree->get_impl()->id().get_obj_id(); + _treeName = _producerTree->_treeName; + } + }; + +/*The _OpID is used by the fusion compiler to analyze dependencies relating to this operation. + It "MUST" be set equal to the _treeID of the result tree. + In MADNESS, the tree ID of a Function is given by func->get_impl()->id().get_obj_id()*/ + template class PrimitiveOp { + typedef Function KTREE; + typedef FunctionNode KNODE; + typedef Key keyT; + typedef WorldContainer, FunctionNode > dcT; +///< Type of container holding the nodes + + public: + PrimitiveOp(string opName="Unknown",KTREE* result=NULL, bool isComplete=false, bool isReduce=false) + :_opName(opName), + _result(result), + _isComplete(isComplete), + _isReduce(isReduce) + {} + + virtual ~PrimitiveOp() {} + + virtual FuseTContainer compute (const keyT& key, const FuseTContainer &s) { return FuseTContainer();} + virtual Future< FuseTContainer > computeFuture (const keyT& key, const FuseTContainer &s) { return Future >();} + + virtual bool isDone(const keyT& key) const = 0; + virtual bool isPre() const = 0; + virtual bool needsParameter() const = 0; + virtual void reduce(World& world) = 0; + virtual bool returnsFuture(){ return false;} + + //!used for postCompute ops to see if it needs to be pushed to the compute stack or not + virtual bool notEmpty(map& notEmptyMap) const{return true;} + void setComplete(bool isComplete) { _isComplete=isComplete; } + + void PrintDependencyInfo(){ + + std::cout< > _dInfoVec; + bool _isReduce; + + }; + +} /*fuset*/ + +#endif /* __MADNESS_MRA_PRIMITIVE_OP_H__INCLUDED__ */ diff --git a/src/madness/mra/FuseT/ReconstructOp.h b/src/madness/mra/FuseT/ReconstructOp.h new file mode 100644 index 00000000000..9bf3504e793 --- /dev/null +++ b/src/madness/mra/FuseT/ReconstructOp.h @@ -0,0 +1,211 @@ + +// Ghaly +// +// Reconstructs the function, transforming into scaling function basis. Possible non-blocking comm. +// +#ifndef __MADNESS_MRA_FUSET_RECONSTRUCT_OP__INCLUDED__ +#define __MADNESS_MRA_FUSET_RECONSTRUCT_OP__INCLUDED__ + +#include "PrimitiveOp.h" +#include "../function_common_data.h" + +namespace madness +{ + template + class ReconstructOp : public PrimitiveOp + { + public: + typedef Function KTREE; + typedef FunctionNode KNODE; + typedef Key keyT; + typedef WorldContainer,FunctionNode> dcT; + + typedef GenTensor coeffT; + typedef Tensor tensorT; + + public: + ReconstructOp (string opName, KTREE* output, const KTREE* i1); + FuseTContainer compute (const keyT& key, const FuseTContainer &s); + + bool isDone (const keyT& key) const; + bool isPre () const { return true; } + bool needsParameter () const { return true; } + void reduce(World& world){} + public: + std::vector child_patch (const keyT& child) const; + + private: + //!Points to operand trees + const KTREE* _i1; + + //!Points to operand nodes of the tree + KNODE *_t1, *_t2; + + //!Variables for ReconstructOp + const FunctionCommonData& _cdata; + TensorArgs _targs; + }; + + // Constructor + template + ReconstructOp::ReconstructOp(string opName, KTREE* output, const KTREE* i1) + : PrimitiveOp(opName, output, false) + , _i1(i1) + , _cdata(FunctionCommonData::get(i1->k())) + , _targs(i1->get_impl()->get_tensor_args()) + { + //dependnecy Info PSI, ALPHA, DELTA,SIGMA, ID + this->_OpID = output->get_impl()->id().get_obj_id(); + this->_dInfoVec.push_back(DependencyInfo(i1,true,true,false,false)); + this->_dInfoVec.push_back(DependencyInfo(output,true,true,false,false)); + } + + template + FuseTContainer + ReconstructOp::compute(const keyT& key, const FuseTContainer &s) + { + FuseT_CoeffT* s_coeff; + if (s.get() == 0) + { + // it should be called initially + s_coeff = new FuseT_CoeffT(); + s_coeff->value = coeffT(); + } + else + { + s_coeff = new FuseT_CoeffT(); + if (s.what() == WHAT_AM_I::FuseT_CoeffT) + { + s_coeff->value = (((FuseT_CoeffT*)s.get())->value).full_tensor_copy(); + delete s.data; + } + else + { + cerr<<"This should not have happenned"<_result->get_impl()->compressed = false; + this->_result->get_impl()->nonstandard = false; + this->_result->get_impl()->redundant = false; + } + + typename dcT::iterator it= _i1->get_impl()->get_coeffs().find(key).get(); + if (it == _i1->get_impl()->get_coeffs().end()) + { + _i1->get_impl()->get_coeffs().replace(key,KNODE(coeffT(), false)); + it = _i1->get_impl()->get_coeffs().find(key).get(); + } + + // The integral operator will correctly connect interior nodes + // to children but may leave interior nodes without coefficients + // ... but they still need to sum down so just give them zeros + KNODE& node = it->second; // source + KNODE targetNode; // target (if target is initially copied from source, it should be perfect.) + FuseTContainer returnP; + if (node.has_children() && !node.has_coeff()) + { + targetNode.set_coeff(coeffT(_cdata.v2k, _targs)); + this->_result->get_impl()->get_coeffs().replace(key,targetNode); + } + + if (node.has_children() || node.has_coeff()) + { + coeffT d = copy(node.coeff()); + + if (!d.has_data()) { + d = coeffT(_cdata.v2k, _targs); + } + if (key.level() > 0) + { + d(_cdata.s0) += s_coeff->value; // this is the problem!!! + } + delete s_coeff; + + if (d.dim(0) == 2*(_i1->get_impl()->get_k())) + { + FuseT_VParameter v_parameter; + + d = _i1->get_impl()->unfilter(d); + targetNode.clear_coeff(); + targetNode.set_has_children(true); + this->_result->get_impl()->get_coeffs().replace(key, targetNode); + + for (KeyChildIterator kit(key); kit; ++kit) + { + FuseT_CoeffT s_coeff_s; + + const keyT& child = kit.key(); + coeffT ss = copy(d(child_patch(child))); + ss.reduce_rank(_i1->thresh()); + s_coeff_s.value = ss; + + FuseTContainer wrapper(static_cast*>(new FuseT_CoeffT(s_coeff_s.value))); + v_parameter.value.push_back(wrapper); + } + + // Wrapping ReconstructV to FuseTContainer + FuseTContainer temp(static_cast*>(new FuseT_VParameter(v_parameter.value))); + return temp; + } + else + { + MADNESS_ASSERT(node.is_leaf()); //??? + targetNode.coeff().reduce_rank(_targs.thresh); + this->_result->get_impl()->get_coeffs().replace(key, targetNode); + } + } + else + { + coeffT ss = s_coeff->value; + if (s_coeff->value.has_no_data()) + { + ss = coeffT(_cdata.vk, _targs); + } + + if (key.level()) + { + targetNode.set_coeff(copy(ss)); + this->_result->get_impl()->get_coeffs().replace(key, targetNode); + } + else + { + // my example--ReconstructEx does visit this else statement. + targetNode.set_coeff(ss); + this->_result->get_impl()->get_coeffs().replace(key, targetNode); + } + } + // + // nothing... + delete s_coeff; + return returnP; + } + + template + std::vector + ReconstructOp::child_patch(const keyT& child) const + { + std::vector s(NDIM); + const Vector& l = child.translation(); + for (std::size_t i = 0; i + bool ReconstructOp::isDone(const keyT& key) const + { + bool isE1 = _i1->get_impl()->get_coeffs().probe(key); + if(!isE1) return isE1; + bool isLeaf = !_i1->get_impl()->get_coeffs().find(key).get()->second.has_children(); + return isLeaf; + } + +}; /*fuset*/ + +#endif /* __fuset_ReconstructOp_h__ */ diff --git a/src/madness/mra/FuseT/TransformOp.h b/src/madness/mra/FuseT/TransformOp.h new file mode 100644 index 00000000000..26d7ae3e4f2 --- /dev/null +++ b/src/madness/mra/FuseT/TransformOp.h @@ -0,0 +1,325 @@ +// +// Ghaly +// +// Compresses the function, transforming into wavelet basis. +// Possible non-blocking comm. +// +// By default fence=true meaning that this oepration completes before returning, +// othewise if fence=false it returns without fencing and the user must invoke +// workd.gop.fence() to assure global completion before using the function +// for other purposes. +// +// Noop if already compressed or if not initialized. +// +// Since reconstruction/compression do not discard information we define them +// as const ... "logical constness" not "bitwise contness". +// +#ifndef __MADNESS_MRA_FUSET_TRANSFORM_OP__INCLUDED__ +#define __MADNESS_MRA_FUSET_TRANSFORM_OP__INCLUDED__ + +#include "PrimitiveOp.h" +#include "FuseTContainer.h" +#include "../mra.h" +#include "../function_common_data.h" +#include "../../world/MADworld.h" +#include "../../tensor/tensor.h" +#include "../../tensor/gentensor.h" + +namespace madness +{ + template + class TransformOp : public PrimitiveOp + { + typedef Function KTREE; + typedef FunctionNode KNODE; + typedef Key keyT; + typedef WorldContainer,FunctionNode> dcT; + typedef WorldObject> woT; + typedef GenTensor coeffT; + typedef Tensor tensorT; + + public: + TransformOp (string opName, std::vector& output, const std::vector& v, const DistributedMatrix& g, bool sym); + FuseTContainer compute (const keyT& key, const FuseTContainer &s); + + bool notEmpty(map& notEmptyMap) const + { + unsigned long treeID = _i1->get_impl()->id().get_obj_id(); + return notEmptyMap[treeID]; + } + + bool isDone (const keyT& key) const; + bool isPre () const { return true; } // false does not work. but It should be false. + bool needsParameter () const { return true; } + void reduce (World& world); + + public: + // MatrixInnerOpp + Tensor* _r; + + private: + //!Points to operand trees + const KTREE* _i1; + + //!Points to operand nodes of the tree + KNODE *_t1, *_t2; + + //!Variables for MatrixInnerOp + std::vector _left_v_coeffs; + std::vector _right_v_coeffs; + //dcT& _coeffs; + //dcT& _coeffs_target; + + bool _sym; + std::vector* > _left; + std::vector* > _right; + + std::map checkKeyDoneLeft; + std::map checkKeyDoneRight; + + std::map candidatesLeft; + std::map candidatesRight; + + int _k; // Wavelet order + }; + + // Constructor + // World is needed for communication in the "compute" function + template + TransformOp::TransformOp(string opName, std::vector& output, const std::vector& v, const DistributedMatrix& c, bool sym) + : PrimitiveOp(opName, output, false, true) + , _sym(sym) + { + long n = v.size(); + long m = c.rowdim(); + + MADNESS_ASSERT(n == c.columndim()); + + + output = zero_functions_compressed(v[0].world(), m); + +// this->_r = new Tensor(f.size(), g.size()); + +// for (unsigned int i=0; i_r)(i,j) = 0.0; + +// for (unsigned int i=0; iget_coeffs() ); + // for (unsigned int j=0; jget_coeffs() ); + + // dependnecy Info PSI, ALPHA, DELTA,SIGMA, ID + // this->_OpID = output->get_impl()->id().get_obj_id(); + + for (unsigned int i=0; i_dInfoVec.push_back(DependencyInfo(&v[i], true, true, false, false)); + + // for (unsigned int i=0; i_dInfoVec.push_back(DependencyInfo(&g[i], true, true, false, false)); + + //this->_dInfoVec.push_back(DependencyInfo(output,true,true,false,false)); + + woT(v[0].world()); + } + + // + // it should hangle both a parent and a leaf node. + // + template + FuseTContainer + TransformOp::compute(const keyT& key, const FuseTContainer &s) + { +/* + FuseT_VParameter* inheritedWhole; + FuseT_VType* inheritedLeft; + FuseT_VType* inheritedRight; + + // Processing for Paramter + if (s.get() == 0) + { + inheritedLeft = new FuseT_VType; + inheritedRight = new FuseT_VType; + + for (unsigned int i=0; i<_left.size(); i++) + inheritedLeft->value.push_back(i); + for (unsigned int i=0; i<_right.size(); i++) + inheritedRight->value.push_back(i); + } + else + { + inheritedWhole = new FuseT_VParameter( ((FuseT_VParameter*)s.get())->value ); + + inheritedLeft = new FuseT_VType(((FuseT_VType*)(((inheritedWhole->value[0]).get())))->value); + inheritedRight = new FuseT_VType(((FuseT_VType*)(((inheritedWhole->value[1]).get())))->value); + } + + FuseT_VType whichNodesLeft; // value = std::vector + FuseT_VType whichNodesRight; + + unsigned int indexLeft; + unsigned int indexRight; + unsigned int leftSize = inheritedLeft->value.size(); + unsigned int rightSize = inheritedRight->value.size(); + + // The Pre-Computatio + // Assumption: the size of coefficient --> 16*16*16 = 4096 + double* A = (double*)malloc(sizeof(double)*16*16*16*leftSize); + double* B = (double*)malloc(sizeof(double)*16*16*16*rightSize); + double* C = (double*)malloc(sizeof(double)*leftSize*rightSize); + unsigned int k,l,m; + + // + for (unsigned int i=0; ivalue[i]; + const KNODE& fnode = _left_v_coeffs[indexLeft].find(key).get()->second; + + if (_left_v_coeffs[indexLeft].find(key).get()->second.has_children()) + whichNodesLeft.value.push_back(indexLeft); + + // 3D array to 1D array with i for fnode and j for gnode + if (fnode.has_coeff()) + { + for (k=0; k<16; k++) + for (l=0; l<16; l++) + for (m=0; m<16; m++) + A[i*16*16*16 + k*16*16 + l*16 + m] = (fnode.coeff())(k,l,m); + } + else + { + for (k=0; k<16; k++) + for (l=0; l<16; l++) + for (m=0; m<16; m++) + A[i*16*16*16 + k*16*16 + l*16 + m] = 0.0; + } + } + + + // + for (unsigned int i=0; ivalue[i]; + const KNODE& gnode = _right_v_coeffs[indexRight].find(key).get()->second; + + if (_right_v_coeffs[indexRight].find(key).get()->second.has_children()) + whichNodesRight.value.push_back(indexRight); + + // 3D array to 1D array with i for fnode and j for gnode + if (gnode.has_coeff()) + { + for (k=0; k<16; k++) + for (l=0; l<16; l++) + for (m=0; m<16; m++) + B[i*16*16*16 + k*16*16 + l*16 + m] = (gnode.coeff())(k,l,m); + } + else + { + for (k=0; k<16; k++) + for (l=0; l<16; l++) + for (m=0; m<16; m++) + B[i*16*16*16 + k*16*16 + l*16 + m] = 0.0; + } + } + + // + for (k=0; kvalue[k]; + for (l=0; lvalue[l]; + (*this->_r)(indexLeft, indexRight) += C[k + l*leftSize]; // k*rightSize + l --> row-major + } + } + + delete A; + delete B; + delete C; + + + + if (whichNodesLeft.value.size() == 0) + checkKeyDoneLeft.insert(std::pair(key,true)); + else + checkKeyDoneLeft.insert(std::pair(key,false)); + + if (whichNodesRight.value.size() == 0) + checkKeyDoneRight.insert(std::pair(key,true)); + else + checkKeyDoneRight.insert(std::pair(key,false)); + + + // + FuseT_VParameter v_parameter; + FuseT_VParameter inner_parameter; + + FuseTContainer candiParameter_L(static_cast*> (new FuseT_VType(whichNodesLeft.value))); + FuseTContainer candiParameter_R(static_cast*> (new FuseT_VType(whichNodesRight.value))); + inner_parameter.value.push_back(candiParameter_L); + inner_parameter.value.push_back(candiParameter_R); + + for (KeyChildIterator kit(key); kit; ++kit) + { + FuseTContainer wrapper(static_cast*>(new FuseT_VParameter(inner_parameter.value))); + v_parameter.value.push_back(wrapper); + } + + // Return Parameters + FuseTContainer targets(static_cast*>(new FuseT_VParameter(v_parameter.value))); + return targets; +*/ + } + + // isDone + template + bool + TransformOp::isDone(const keyT& key) const + { +/* + bool isE1; + bool isE2; + + // O(M + N) + for (unsigned int i=0; i<_left.size(); i++) + { + isE1 = _left[i]->get_coeffs().probe(key) || isE1; + } + if (!isE1) { std::cout<get_coeffs().probe(key) || isE2; + } + if (!isE2) { std::cout<second) return true; + if (checkKeyDoneRight.find(key)->second) return true; +*/ + return false; + } + + template + void + TransformOp::reduce(World& world){ + // world.gop.sum(_r->ptr(),_left.size()*_right.size()); + } + +}; /*fuset*/ + +#endif /* __fuset_TransformOp_h__ */ diff --git a/src/madness/mra/FuseT/existing_MatrixInnerOp.h b/src/madness/mra/FuseT/existing_MatrixInnerOp.h new file mode 100644 index 00000000000..096efbc2c89 --- /dev/null +++ b/src/madness/mra/FuseT/existing_MatrixInnerOp.h @@ -0,0 +1,280 @@ +// +// Ghaly +// +// Compresses the function, transforming into wavelet basis. +// Possible non-blocking comm. +// +// By default fence=true meaning that this oepration completes before returning, +// othewise if fence=false it returns without fencing and the user must invoke +// workd.gop.fence() to assure global completion before using the function +// for other purposes. +// +// Noop if already compressed or if not initialized. +// +// Since reconstruction/compression do not discard information we define them +// as const ... "logical constness" not "bitwise contness". +// +#ifndef __MADNESS_MRA_FUSET_MATRIXINNER_OP__INCLUDED__ +#define __MADNESS_MRA_FUSET_MATRIXINNER_OP__INCLUDED__ + +#include "PrimitiveOp.h" +#include "FuseTContainer.h" +#include "../mra.h" +#include "../function_common_data.h" +#include "../../world/MADworld.h" +#include "../../tensor/tensor.h" +#include "../../tensor/gentensor.h" + +namespace madness +{ + template + class MatrixInnerOp : public PrimitiveOp + { + typedef Function KTREE; + typedef FunctionNode KNODE; + typedef Key keyT; + typedef WorldContainer,FunctionNode> dcT; + typedef WorldObject> woT; + typedef GenTensor coeffT; + typedef Tensor tensorT; + + public: + MatrixInnerOp (string opName, KTREE* output, const std::vector& f, const std::vector& g, bool sym); + FuseTContainer compute (const keyT& key, const FuseTContainer &s); + + bool notEmpty(map& notEmptyMap) const + { + unsigned long treeID = _i1->get_impl()->id().get_obj_id(); + return notEmptyMap[treeID]; + } + + bool isDone (const keyT& key) const; + bool isPre () const { return true; } // false does not work. but It should be false. + bool needsParameter () const { return true; } + void reduce (World& world); + + public: + // MatrixInnerOpp + Tensor* _r; + + private: + //!Points to operand trees + const KTREE* _i1; + + //!Points to operand nodes of the tree + KNODE *_t1, *_t2; + + //!Variables for MatrixInnerOp + std::vector _left_v_coeffs; + std::vector _right_v_coeffs; + //dcT& _coeffs; + //dcT& _coeffs_target; + + bool _sym; + std::vector* > _left; + std::vector* > _right; + + std::map checkKeyDoneLeft; + std::map checkKeyDoneRight; + + bool overallL; + bool overallR; + + int _k; // Wavelet order + }; + + // Constructor + // World is needed for communication in the "compute" function + template + MatrixInnerOp::MatrixInnerOp(string opName, KTREE* output, const std::vector& f, const std::vector& g, bool sym) + : PrimitiveOp(opName, output, false, true) + , _sym(sym) + { + this->_r = new Tensor(f.size(), g.size()); + + for (unsigned int i=0; i_r)(i,j) = 0.0; + + for (unsigned int i=0; iget_coeffs() ); + for (unsigned int j=0; jget_coeffs() ); + + // dependnecy Info PSI, ALPHA, DELTA,SIGMA, ID + this->_OpID = output->get_impl()->id().get_obj_id(); + + for (unsigned int i=0; i_dInfoVec.push_back(DependencyInfo(&f[i], true, true, false, false)); + + for (unsigned int i=0; i_dInfoVec.push_back(DependencyInfo(&g[i], true, true, false, false)); + + this->_dInfoVec.push_back(DependencyInfo(output,true,true,false,false)); + + woT(f[0].world()); + } + + // + // it should hangle both a parent and a leaf node. + // + template + FuseTContainer + MatrixInnerOp::compute(const keyT& key, const FuseTContainer &s) + { + FuseT_VType* possibleLists; + + // Processing for Paramter + if (s.get() == 0) + { + possibleLists = new FuseT_VType; + for (int i=0; i<_left.size() + _right.size(); i++) + possibleLists->value.push_back(1); + } + else + { + possibleLists = new FuseT_VType( ((FuseT_VType*)s.get())->value ); + } + + // Main --- O(M*N) + bool tempLeft; + bool tempRight; + bool overallLeft = true; + bool overallRight = true; + FuseT_VType whichNodesLeft; + FuseT_VType whichNodesRight; + for (int i=0; i<_left.size(); i++) + { + if (possibleLists->value[i] != 0) + { + const KNODE& fnode = _left_v_coeffs[i].find(key).get()->second; + tempLeft = _left_v_coeffs[i].find(key).get()->second.has_children(); + overallLeft = overallLeft && !tempLeft; + + if (fnode.has_coeff()) + { + for (int j=0; j<_right.size(); j++) + { + if (possibleLists->value[_left.size() + j] != 0) + { + if (i == 0) + { + tempRight = _right_v_coeffs[j].find(key).get()->second.has_children(); + overallRight = overallRight && !tempRight; + } + + const KNODE& gnode = _right_v_coeffs[j].find(key).get()->second; + if (gnode.has_coeff()) //{} + (*this->_r)(i, j) += fnode.coeff().trace_conj(gnode.coeff()); + } + else + { + if(i == 0) + { + tempRight = false; + overallRight = overallRight && true; + } + } + if (i == 0) + whichNodesRight.value.push_back(tempRight); + } + } + else + { + if (i == 0) { + for (int j=0; j<_right.size(); j++) + { + if (possibleLists->value[_left.size() + j] != 0) + { + tempRight = _right_v_coeffs[j].find(key).get()->second.has_children(); + overallRight = overallRight && !tempRight; + } + else + { + tempRight = false; + overallRight = overallRight && true; + } + whichNodesRight.value.push_back(tempRight); + } + } + } + } + else + { + tempLeft = false; + overallLeft = overallLeft && true; + + if (i == 0) + { + for (int j=0; j<_right.size(); j++) + { + if (possibleLists->value[_left.size() + j] != 0) + { + tempRight = _right_v_coeffs[j].find(key).get()->second.has_children(); + overallRight = overallRight && !tempRight; + } + else + { + tempRight = false; + overallRight = overallRight && true; + } + whichNodesRight.value.push_back(tempRight); + } + } + } + whichNodesLeft.value.push_back(tempLeft); + } + checkKeyDoneLeft.insert( std::pair(key,overallLeft) ); + checkKeyDoneRight.insert( std::pair(key,overallRight) ); + + whichNodesLeft.value.insert(whichNodesLeft.value.end(), whichNodesRight.value.begin(), whichNodesRight.value.end()); + + // + FuseT_VParameter v_parameter; + for (KeyChildIterator kit(key); kit; ++kit) + { + FuseTContainer wrapper(static_cast*>(new FuseT_VType(whichNodesLeft.value))); + v_parameter.value.push_back(wrapper); + } + + FuseTContainer targets(static_cast*>(new FuseT_VParameter(v_parameter.value))); + return targets; + } + + // isDone + template + bool + MatrixInnerOp::isDone(const keyT& key) const + { + bool isE1; + bool isE2; + + // O(M + N) + for (int i=0; i<_left.size(); i++) + { + isE1 = _left[i]->get_coeffs().probe(key) || isE1; + } + if (!isE1) { std::cout<get_coeffs().probe(key) || isE2; + } + if (!isE2) { std::cout<second) return true; + if (checkKeyDoneRight.find(key)->second) return true; + + return false; + } + + template + void + MatrixInnerOp::reduce(World& world){ + world.gop.sum(_r->ptr(),_left.size()*_right.size()); + } + +}; /*fuset*/ + +#endif /* __fuset_MatrixInnerOp_h__ */ diff --git a/src/madness/mra/FuseT/helper.cc b/src/madness/mra/FuseT/helper.cc new file mode 100644 index 00000000000..e3fe55d367e --- /dev/null +++ b/src/madness/mra/FuseT/helper.cc @@ -0,0 +1,14 @@ +#include "helper.h" + +using namespace std; +double rtclock() +{ + struct timezone Tzp; + struct timeval Tp; + int stat; + stat = gettimeofday (&Tp, &Tzp); + if (stat != 0) + printf("Error return from gettimeofday: %d", stat); + return (Tp.tv_sec + Tp.tv_usec * 1.0e-6); +} + diff --git a/src/madness/mra/FuseT/helper.cpp b/src/madness/mra/FuseT/helper.cpp new file mode 100644 index 00000000000..e3fe55d367e --- /dev/null +++ b/src/madness/mra/FuseT/helper.cpp @@ -0,0 +1,14 @@ +#include "helper.h" + +using namespace std; +double rtclock() +{ + struct timezone Tzp; + struct timeval Tp; + int stat; + stat = gettimeofday (&Tp, &Tzp); + if (stat != 0) + printf("Error return from gettimeofday: %d", stat); + return (Tp.tv_sec + Tp.tv_usec * 1.0e-6); +} + diff --git a/src/madness/mra/FuseT/helper.h b/src/madness/mra/FuseT/helper.h new file mode 100644 index 00000000000..277cfea4109 --- /dev/null +++ b/src/madness/mra/FuseT/helper.h @@ -0,0 +1,9 @@ +#include +#include +#include +#include +#include +#include +#include +double rtclock(); + diff --git a/src/madness/mra/FuseT/main.cpp b/src/madness/mra/FuseT/main.cpp new file mode 100644 index 00000000000..7d445c79cb4 --- /dev/null +++ b/src/madness/mra/FuseT/main.cpp @@ -0,0 +1,140 @@ +#include "KaryTree.h" +#include "CpOp.h" +#include "AddOp.h" +#include "CcOp.h" +#include "OpExecutor.h" +#include "FuseT.h" +#include "FusedExecutor.h" +#include "helper.h" +#define RESULTTREE 0 +#define MAXDEPTH 12 +#define NUMCHILD 8 +#define DATALENGTH 1000 +//template class KaryTree; + +using namespace fuset; +using namespace std; + +typedef double dtype; + +int main(int argc, char* argv[]) { + int isFused = 0; + if ( argc != 2 ){ + cout<<"Incorrect command line parameters"< t1("t1",MAXDEPTH,NUMCHILD,DATALENGTH), t2("t2",MAXDEPTH,NUMCHILD,DATALENGTH), t3("t3",MAXDEPTH,NUMCHILD,DATALENGTH), t4("t4",MAXDEPTH,NUMCHILD,DATALENGTH); + + /*!the seed for srand is the time(NULL), this adding the sleep + * to generate different trees*/ + t1.generateRandom(SOME_NUM,1.0); + t2.generateRandom(SOME_NUM,2.0); + t3.generateRandom(SOME_NUM,1.5); + t4.generateRandom(SOME_NUM,2.0); + + //results + KaryTree rCp0("rCp0",MAXDEPTH,NUMCHILD,DATALENGTH); + KaryTree rAdd0("rAdd0",MAXDEPTH,NUMCHILD,DATALENGTH); + KaryTree rCc0("rCc0",MAXDEPTH,NUMCHILD,DATALENGTH); + KaryTree rCp1("rCp1",MAXDEPTH,NUMCHILD,DATALENGTH); + KaryTree rAdd1("rAdd1",MAXDEPTH,NUMCHILD,DATALENGTH); + KaryTree rCc1("rCc1",MAXDEPTH,NUMCHILD,DATALENGTH); + + //initializing operators + + AddOp opAdd0("opAdd0", &rAdd0, &t2, &t3); + CpOp opCp0("opCp0", &rCp0, &rAdd0); + CcOp opCc0("opCc0", &rCc0,&t4); + CpOp opCp1("opCp1", &rCp1, &t1); + AddOp opAdd1("opAdd1", &rAdd1, &rCc0, &t3); + CcOp opCc1("opCc1", &rCc1,&rAdd1); + + opAdd0.PrintDependencyInfo(); + opCp0.PrintDependencyInfo(); + opCc0.PrintDependencyInfo(); + opCp1.PrintDependencyInfo(); + opAdd1.PrintDependencyInfo(); + opCc1.PrintDependencyInfo(); + + std::vector*> sequence; + sequence.push_back(&opAdd0); + sequence.push_back(&opCp0); + sequence.push_back(&opCc0); +// sequence.push_back(&opCp1); +// sequence.push_back(&opAdd1); +// sequence.push_back(&opCc1); + + + double start,end; + + if(isFused){ + cout<<"***********************"< odag(sequence); + odag.processSequence(); + + if(printInfo){ + odag.printOpsAndTrees(); + odag.printValidSequences(); + } + FusedOpSequence fsequence = odag.getFusedOpSequence(); + FusedExecutor fexecuter(&fsequence); + start = rtclock(); + fexecuter.execute(); + end = rtclock(); + }else{ + cout<<"***********************"< opx; + start = rtclock(); + for(auto op : sequence) + opx.execute(op, false); + end =rtclock(); + } + + cout<<"Total Run time is "<_result->getNorm(); + cout<<"Norm is "< + class MatrixInnerOp : public PrimitiveOp + { + typedef Function KTREE; + typedef FunctionNode KNODE; + typedef Key keyT; + typedef WorldContainer,FunctionNode> dcT; + typedef WorldObject> woT; + typedef GenTensor coeffT; + typedef Tensor tensorT; + + public: + MatrixInnerOp (string opName, KTREE* output, const std::vector& f, const std::vector& g, bool sym); + FuseTContainer compute (const keyT& key, const FuseTContainer &s); + + bool notEmpty(map& notEmptyMap) const + { + unsigned long treeID = _i1->get_impl()->id().get_obj_id(); + return notEmptyMap[treeID]; + } + + bool isDone (const keyT& key) const; + bool isPre () const { return true; } // false does not work. but It should be false. + bool needsParameter () const { return true; } + void reduce (World& world); + + public: + // MatrixInnerOpp + Tensor* _r; + + private: + //!Points to operand trees + const KTREE* _i1; + + //!Points to operand nodes of the tree + KNODE *_t1, *_t2; + + //!Variables for MatrixInnerOp + std::vector _left_v_coeffs; + std::vector _right_v_coeffs; + //dcT& _coeffs; + //dcT& _coeffs_target; + + bool _sym; + std::vector* > _left; + std::vector* > _right; + + std::map checkKeyDoneLeft; + std::map checkKeyDoneRight; + + std::map candidatesLeft; + std::map candidatesRight; + + bool overallL; + bool overallR; + + int _k; // Wavelet order + }; + + // Constructor + // World is needed for communication in the "compute" function + template + MatrixInnerOp::MatrixInnerOp(string opName, KTREE* output, const std::vector& f, const std::vector& g, bool sym) + : PrimitiveOp(opName, output, false, true) + , _sym(sym) + { + this->_r = new Tensor(f.size(), g.size()); + + for (unsigned int i=0; i_r)(i,j) = 0.0; + + for (unsigned int i=0; iget_coeffs() ); + for (unsigned int j=0; jget_coeffs() ); + + // dependnecy Info PSI, ALPHA, DELTA,SIGMA, ID + this->_OpID = output->get_impl()->id().get_obj_id(); + + for (unsigned int i=0; i_dInfoVec.push_back(DependencyInfo(&f[i], true, true, false, false)); + + for (unsigned int i=0; i_dInfoVec.push_back(DependencyInfo(&g[i], true, true, false, false)); + + this->_dInfoVec.push_back(DependencyInfo(output,true,true,false,false)); + + woT(f[0].world()); + } + + // + // it should hangle both a parent and a leaf node. + // + template + FuseTContainer + MatrixInnerOp::compute(const keyT& key, const FuseTContainer &s) + { + FuseT_VParameter* inheritedWhole; + FuseT_VType* inheritedLeft; + FuseT_VType* inheritedRight; + + // Processing for Paramter + if (s.get() == 0) + { + inheritedLeft = new FuseT_VType; + inheritedRight = new FuseT_VType; + + for (unsigned int i=0; i<_left.size(); i++) + inheritedLeft->value.push_back(i); + for (unsigned int i=0; i<_right.size(); i++) + inheritedRight->value.push_back(i); + } + else + { + inheritedWhole = new FuseT_VParameter( ((FuseT_VParameter*)s.get())->value ); + + inheritedLeft = new FuseT_VType(((FuseT_VType*)(((inheritedWhole->value[0]).get())))->value); + inheritedRight = new FuseT_VType(((FuseT_VType*)(((inheritedWhole->value[1]).get())))->value); + } + + // std::cout<<"key: "<< key <<",left: "<value.size()<<", right: "<value.size()< whichNodesLeft; // value = std::vector + FuseT_VType whichNodesRight; + + for (unsigned int i=0; ivalue.size(); i++) + { + indexLeft = inheritedLeft->value[i]; + const KNODE& fnode = _left_v_coeffs[indexLeft].find(key).get()->second; + if (_left_v_coeffs[indexLeft].find(key).get()->second.has_children()) + whichNodesLeft.value.push_back(indexLeft); + + if (fnode.has_coeff()) + { + for (unsigned int j=0; jvalue.size(); j++) + { + indexRight = inheritedRight->value[j]; + + const KNODE& gnode = _right_v_coeffs[indexRight].find(key).get()->second; + if (i==0) + if (_right_v_coeffs[indexRight].find(key).get()->second.has_children()) + whichNodesRight.value.push_back(indexRight); + + if (gnode.has_coeff()) + (*this->_r)(indexLeft, indexRight) += fnode.coeff().trace_conj(gnode.coeff()); + } + } + else + { + if (i==0) + { + for (unsigned int j=0; jvalue.size(); j++) + { + indexRight = inheritedRight->value[j]; + if (_right_v_coeffs[indexRight].find(key).get()->second.has_children()) + whichNodesRight.value.push_back(indexRight); + } + } + } + } + + //std::cout<<"-key: "<< key <<",left: "<(key,true)); + else + checkKeyDoneLeft.insert(std::pair(key,false)); + + if (whichNodesRight.value.size() == 0) + checkKeyDoneRight.insert(std::pair(key,true)); + else + checkKeyDoneRight.insert(std::pair(key,false)); + + + // + FuseT_VParameter v_parameter; + FuseT_VParameter inner_parameter; + + FuseTContainer candiParameter_L(static_cast*> (new FuseT_VType(whichNodesLeft.value))); + FuseTContainer candiParameter_R(static_cast*> (new FuseT_VType(whichNodesRight.value))); + inner_parameter.value.push_back(candiParameter_L); + inner_parameter.value.push_back(candiParameter_R); + + for (KeyChildIterator kit(key); kit; ++kit) + { + FuseTContainer wrapper(static_cast*>(new FuseT_VParameter(inner_parameter.value))); + v_parameter.value.push_back(wrapper); + } + + // Return Parameters + FuseTContainer targets(static_cast*>(new FuseT_VParameter(v_parameter.value))); + return targets; + } + + // isDone + template + bool + MatrixInnerOp::isDone(const keyT& key) const + { + bool isE1; + bool isE2; + + // O(M + N) + for (unsigned int i=0; i<_left.size(); i++) + { + isE1 = _left[i]->get_coeffs().probe(key) || isE1; + } + if (!isE1) { std::cout<get_coeffs().probe(key) || isE2; + } + if (!isE2) { std::cout<second) return true; + if (checkKeyDoneRight.find(key)->second) return true; + + return false; + } + + template + void + MatrixInnerOp::reduce(World& world){ + world.gop.sum(_r->ptr(),_left.size()*_right.size()); + } + +}; /*fuset*/ + +#endif /* __fuset_MatrixInnerOp_h__ */ diff --git a/src/madness/mra/Makefile.am b/src/madness/mra/Makefile.am index cb45016745c..e5e09f3837f 100644 --- a/src/madness/mra/Makefile.am +++ b/src/madness/mra/Makefile.am @@ -45,7 +45,12 @@ thisinclude_HEADERS = adquad.h funcimpl.h indexit.h legendre.h operator.h v function_factory.h function_interface.h gfit.h convolution1d.h \ simplecache.h derivative.h displacements.h functypedefs.h \ sdf_shape_3D.h sdf_domainmask.h vmra1.h - + FuseT/PrimitiveOp.h FuseT/CompressOp.h FuseT/CopyOp.h \ + FuseT/FusedExecutor.h FuseT/FuseTContainer.h \ + FuseT/InnerOp.h FuseT/OpExecutor.h FuseT/ReconstructOp.h \ + FuseT/FuseT.h FuseT/AddOp.h FuseT/DerivativeOp.h \ + FuseT/MultiplyOp.h FuseT/FuseT.h FuseT/MatrixInnerOp.h \ + FuseT/TransformOp.h FuseT/MatrixInnerPreOp.h LDADD = libMADmra.la $(LIBLINALG) $(LIBTENSOR) $(LIBMISC) $(LIBMUPARSER) $(LIBWORLD) diff --git a/src/madness/mra/derivative.h b/src/madness/mra/derivative.h index 99995a81785..7cc96d12b79 100644 --- a/src/madness/mra/derivative.h +++ b/src/madness/mra/derivative.h @@ -57,6 +57,9 @@ namespace madness { template class Function; + template + class DerivativeOp; + } @@ -291,6 +294,11 @@ namespace madness { Tensor right_r0t, right_rpt; ///< Blocks of the derivative for the right boundary Tensor bv_left, bv_right ; ///< Blocks of the derivative operator for the boundary contribution + public: + const std::size_t get_axis() const{ + return this->axis; + } + void do_diff2b(const implT* f, implT* df, const keyT& key, const argT& left, const argT& center, diff --git a/src/madness/mra/funcimpl.h b/src/madness/mra/funcimpl.h index 3438166c2c2..a107b61e12c 100644 --- a/src/madness/mra/funcimpl.h +++ b/src/madness/mra/funcimpl.h @@ -917,7 +917,7 @@ namespace madness { int truncate_mode; ///< 0=default=(|d|& cdata; @@ -925,8 +925,6 @@ namespace madness { std::shared_ptr< FunctionFunctorInterface > functor; bool on_demand; ///< does this function have an additional functor? - bool compressed; ///< Compression status - bool redundant; ///< If true, function keeps sum coefficients on all levels dcT coeffs; ///< The coefficients @@ -934,6 +932,10 @@ namespace madness { FunctionImpl(const FunctionImpl& p); public: + bool nonstandard; ///< If true, compress keeps scaling coeff + bool compressed; ///< Compression status + bool redundant; ///< If true, function keeps sum coefficients on all levels + Timer timer_accumulate; Timer timer_lr_result; Timer timer_filter; diff --git a/src/madness/mra/mra.h b/src/madness/mra/mra.h index 97b14269219..785a5d1db6d 100644 --- a/src/madness/mra/mra.h +++ b/src/madness/mra/mra.h @@ -127,6 +127,13 @@ namespace madness { std::shared_ptr< FunctionImpl > impl; public: + std::string _treeName; + unsigned long _treeID; + + void setTreeName(std::string name){ + _treeName = name; + } + bool impl_initialized()const{ if(impl==NULL) return false; else return true; diff --git a/src/madness/mra/mra1.cc b/src/madness/mra/mra1.cc index f4375cb7237..d6cf21432cf 100644 --- a/src/madness/mra/mra1.cc +++ b/src/madness/mra/mra1.cc @@ -36,6 +36,10 @@ #include #include #include +#include +#include +#include +#include namespace madness { @@ -95,6 +99,24 @@ namespace madness { template class Displacements<1>; template class DerivativeBase; template class DerivativeBase; + + + //FuseT explicit Instantiations + template <> volatile std::list WorldObject >::pending = std::list(); + template <> Spinlock WorldObject >::pending_mutex(0); + template <> volatile std::list WorldObject >::pending = std::list(); + template <> Spinlock WorldObject >::pending_mutex(0); + + template class PrimitiveOp; + template class OpExecutor; + template class FusedExecutor; + template class CopyOp; + template class WorldObject >; + template class WorldObject >; + + + + #endif } diff --git a/src/madness/mra/mra2.cc b/src/madness/mra/mra2.cc index df3be2d0552..86ab211ad20 100644 --- a/src/madness/mra/mra2.cc +++ b/src/madness/mra/mra2.cc @@ -35,6 +35,10 @@ #include #include #include +#include +#include +#include +#include #ifdef FUNCTION_INSTANTIATE_2 namespace madness { @@ -84,5 +88,25 @@ namespace madness { template class DerivativeBase; template class DerivativeBase; + + //FuseT explicit Instantiations + template <> volatile std::list WorldObject >::pending = std::list(); + template <> Spinlock WorldObject >::pending_mutex(0); + template <> volatile std::list WorldObject >::pending = std::list(); + template <> Spinlock WorldObject >::pending_mutex(0); + + template class PrimitiveOp; + template class OpExecutor; + template class FusedExecutor; + template class CopyOp; + template class WorldObject >; + template class WorldObject >; + + + + + + + } #endif diff --git a/src/madness/mra/mra3.cc b/src/madness/mra/mra3.cc index 761578aa5ba..a3c47244e8d 100644 --- a/src/madness/mra/mra3.cc +++ b/src/madness/mra/mra3.cc @@ -35,6 +35,11 @@ #include #include #include +#include +#include +#include +#include +#include #ifdef FUNCTION_INSTANTIATE_3 @@ -60,6 +65,11 @@ namespace madness { template <> volatile std::list WorldObject, FunctionNode, 3>, Hash > > >::pending = std::list(); template <> Spinlock WorldObject, FunctionNode, 3>, Hash > > >::pending_mutex(0); + //For derivative Operator + typedef Future, GenTensor > > argT; + template <> volatile std::list WorldObject, argT, Hash > > >::pending = std::list(); + template <> Spinlock WorldObject, argT, Hash > > >::pending_mutex(0); + template <> volatile std::list WorldObject >::pending = std::list(); template <> Spinlock WorldObject >::pending_mutex(0); template <> volatile std::list WorldObject,3> >::pending = std::list(); @@ -86,6 +96,26 @@ namespace madness { template class DerivativeBase; template class DerivativeBase; + //FuseT explicit Instantiations + template <> volatile std::list WorldObject >::pending = std::list(); + template <> Spinlock WorldObject >::pending_mutex(0); + template <> volatile std::list WorldObject >::pending = std::list(); + template <> Spinlock WorldObject >::pending_mutex(0); + + template <> volatile std::list WorldObject >::pending = std::list(); + template <> Spinlock WorldObject >::pending_mutex(0); + + template class PrimitiveOp; + template class OpExecutor; + template class FusedExecutor; + template class DerivativeOp; + template class CopyOp; + template class WorldObject >; + template class WorldObject >; + template class WorldObject >; + + + } #endif