From 7e72a47460510617c5bf3ced2b8dabe5a2bf5355 Mon Sep 17 00:00:00 2001 From: stgeke Date: Thu, 26 Mar 2020 10:01:37 +0100 Subject: [PATCH] Fix multiscalar bug (#83) --- okl/core/cdsAdvectionHex3D.okl | 30 +- okl/core/cdsSumMakefHex3D.okl | 4 +- okl/core/math.okl | 13 + okl/plugins/RANSktau.okl | 317 ++++++++++++++++++ okl/{avg => plugins}/avg.okl | 0 .../velRecycling.okl | 0 src/core/cds.cpp | 1 - src/core/cds.h | 2 +- src/core/ins.h | 1 + src/core/insSetup.cpp | 14 +- src/core/parReader.cpp | 82 ++--- src/core/runTime.cpp | 38 ++- src/nekInterface/nekInterface.f | 11 +- src/{core => }/nrs.hpp | 0 src/udf/udf.hpp | 4 - 15 files changed, 433 insertions(+), 84 deletions(-) create mode 100644 okl/plugins/RANSktau.okl rename okl/{avg => plugins}/avg.okl (100%) rename okl/{velRecycling => plugins}/velRecycling.okl (100%) rename src/{core => }/nrs.hpp (100%) diff --git a/okl/core/cdsAdvectionHex3D.okl b/okl/core/cdsAdvectionHex3D.okl index ea29d84ba..8596e4052 100644 --- a/okl/core/cdsAdvectionHex3D.okl +++ b/okl/core/cdsAdvectionHex3D.okl @@ -40,16 +40,16 @@ SOFTWARE. for(int k=0;ks_wxx[1]) ? s_wxx[0]:s_wxx[1]; } } + +@kernel void scalarScaledAdd(const dlong N, + const dfloat a, + const dfloat b, + @restrict const dfloat * X, + @restrict dfloat * Y){ + + for(dlong n=0;n 0) itau = 1/tau; + + dfloat sigd = p_sigd_min; + dfloat f_beta_str = 1.0; + if (xk > 0) { + const dfloat xk3 = xk*xk*tau*tau; + sigd = p_sigd_max; + f_beta_str = (1.0 + p_fb_c1st*xk3)/(1.0 + p_fb_c2st*xk3); + } + + // compute source term for k + const dfloat Y_k = rho*p_betainf_str*f_beta_str*itau; + const dfloat kSrc = mu_t*stMag2; + const dfloat kDiag = Y_k; + + // compute rource term for omega + const dfloat x_w = abs(OiOjSk)*(tau*tau*tau*p_ibetainf_str3); + const dfloat f_b = (1.0 + p_fb_c1*x_w)/(1.0 + p_fb_c2*x_w); + dfloat tauSrc = rho*(p_beta0*f_b - sigd*xk*tau); + dfloat tauDiag = rho*tau*p_alp_inf*stMag2 + + 8.0*rho*p_alpinf_str*kk*xtq*p_sigma_tau; + + // apply correction + const dfloat S_tau = 8.0*mue*xtq; + if (tau <= p_tiny) + tauSrc -= S_tau; + else + tauDiag += S_tau*itau; + + SRC[id + 0*offset] = kSrc; + SRC[id + 1*offset] = tauSrc; + SRCDIAG[id + 0*offset] = kDiag; + SRCDIAG[id + 1*offset] = tauDiag; + } + } + } + } +} + +@kernel void SijOijHex3D(const dlong Nelements, + const dlong offset, + @restrict const dfloat * vgeo, + @restrict const dfloat * D, + @restrict const dfloat * U, + @restrict dfloat * SO){ + + for(dlong e=0; eo_wrk0.copyFrom(cds->o_S, cds->Ntotal*sizeof(dfloat), 0, is*cds->fieldOffset*sizeof(dfloat)); if (solver->Nmasked) mesh->maskKernel(solver->Nmasked, solver->o_maskIds, cds->o_wrk0); - const dfloat lambda = 1; // dummy value not used if coeff is variable cds->Niter[is] = ellipticSolve(solver, cds->TOL, cds->o_wrk1, cds->o_wrk0); cds->helmholtzAddBCKernel(mesh->Nelements, diff --git a/src/core/cds.h b/src/core/cds.h index 2a2d74656..6db2cf0e3 100644 --- a/src/core/cds.h +++ b/src/core/cds.h @@ -100,7 +100,7 @@ typedef struct { occa::memory o_rho, o_diff; dfloat *cU, *cSd, *cS, *FS, *BF; - occa::memory o_cU, o_cSd, o_cS, o_FS, o_BF; + occa::memory o_cU, o_cSd, o_cS, o_FS, o_BF, o_BFDiag; occa::memory o_wrk0, o_wrk1, o_wrk2, o_wrk3, o_wrk4, o_wrk5, o_wrk6; diff --git a/src/core/ins.h b/src/core/ins.h index 4c28f7c2d..dfa9c50b6 100644 --- a/src/core/ins.h +++ b/src/core/ins.h @@ -126,6 +126,7 @@ typedef struct { occa::kernel pqKernel; occa::kernel ncKernel; + occa::kernel scalarScaledAddKernel; occa::kernel scaledAddKernel; occa::kernel subCycleVolumeKernel, subCycleCubatureVolumeKernel ; occa::kernel subCycleSurfaceKernel, subCycleCubatureSurfaceKernel;; diff --git a/src/core/insSetup.cpp b/src/core/insSetup.cpp index 7a3e59c15..dbaf16c91 100644 --- a/src/core/insSetup.cpp +++ b/src/core/insSetup.cpp @@ -634,6 +634,10 @@ ins_t *insSetup(MPI_Comm comm, setupAide &options, int buildOnly) ins->maxKernel = mesh->device.buildKernel(fileName.c_str(), kernelName.c_str(), kernelInfo); + kernelName = "scalarScaledAdd"; + ins->scalarScaledAddKernel = + mesh->device.buildKernel(fileName.c_str(), kernelName.c_str(), kernelInfo); + // =========================================================================== fileName = oklpath + "insFilterRT" + suffix + ".okl"; @@ -974,12 +978,12 @@ cds_t *cdsSetup(ins_t *ins, mesh_t *mesh, setupAide options, occa::properties &k cds->filterRTKernel = mesh->device.buildKernel(fileName.c_str(), kernelName.c_str(), kernelInfo); - if(cds->Nsubsteps){ - fileName = install_dir + "/libparanumal/okl/scaledAdd.okl"; - kernelName = "scaledAddwOffset"; - cds->scaledAddKernel = - mesh->device.buildKernel(fileName.c_str(), kernelName.c_str(), kernelInfo); + fileName = install_dir + "/libparanumal/okl/scaledAdd.okl"; + kernelName = "scaledAddwOffset"; + cds->scaledAddKernel = + mesh->device.buildKernel(fileName.c_str(), kernelName.c_str(), kernelInfo); + if(cds->Nsubsteps){ fileName = oklpath + "cdsSubCycle" + suffix + ".okl"; kernelName = "cdsSubCycleStrongCubatureVolume" + suffix; cds->subCycleStrongCubatureVolumeKernel = mesh->device.buildKernel(fileName, kernelName, kernelInfo); diff --git a/src/core/parReader.cpp b/src/core/parReader.cpp index 70cecae89..88c72eddc 100644 --- a/src/core/parReader.cpp +++ b/src/core/parReader.cpp @@ -378,47 +378,53 @@ libParanumal::setupAide parRead(std::string &setupFile, MPI_Comm comm) nscal++; isStart++; - options.setArgs("SCALAR00 PRECONDITIONER", "JACOBI"); - - double s_residualTol; - if(ini.extract("temperature", "residualtol", s_residualTol)) { - options.setArgs("SCALAR00 SOLVER TOLERANCE", to_string_f(s_residualTol)); - } - - if(ini.extract("temperature", "conductivity", sbuf)) { - int err = 0; - double diffusivity = te_interp(sbuf.c_str(), &err); - if(err) ABORT("Invalid expression for conductivity!"); - if(diffusivity < 0) diffusivity = fabs(1/diffusivity); - options.setArgs("SCALAR00 DIFFUSIVITY", to_string_f(diffusivity)); - } else { - if(!variableProperties) - ABORT("Cannot find mandatory parameter TEMPERATURE::conductivity!"); - } - - if(ini.extract("temperature", "rhocp", sbuf)) { - int err = 0; - double rhoCp = te_interp(sbuf.c_str(), &err); - if(err) ABORT("Invalid expression for rhoCp!"); - options.setArgs("SCALAR00 DENSITY", to_string_f(rhoCp)); + string solver; + ini.extract("temperature", "solver", solver); + if(solver == "none") { + options.setArgs("SCALAR00 SOLVER", "NONE"); } else { - if(!variableProperties) - ABORT("Cannot find mandatory parameter TEMPERATURE::rhoCp!"); + options.setArgs("SCALAR00 PRECONDITIONER", "JACOBI"); + + double s_residualTol; + if(ini.extract("temperature", "residualtol", s_residualTol)) { + options.setArgs("SCALAR00 SOLVER TOLERANCE", to_string_f(s_residualTol)); + } + + if(ini.extract("temperature", "conductivity", sbuf)) { + int err = 0; + double diffusivity = te_interp(sbuf.c_str(), &err); + if(err) ABORT("Invalid expression for conductivity!"); + if(diffusivity < 0) diffusivity = fabs(1/diffusivity); + options.setArgs("SCALAR00 DIFFUSIVITY", to_string_f(diffusivity)); + } else { + if(!variableProperties) + ABORT("Cannot find mandatory parameter TEMPERATURE::conductivity!"); + } + + if(ini.extract("temperature", "rhocp", sbuf)) { + int err = 0; + double rhoCp = te_interp(sbuf.c_str(), &err); + if(err) ABORT("Invalid expression for rhoCp!"); + options.setArgs("SCALAR00 DENSITY", to_string_f(rhoCp)); + } else { + if(!variableProperties) + ABORT("Cannot find mandatory parameter TEMPERATURE::rhoCp!"); + } + + string s_bcMap; + if(ini.extract("temperature", "boundarytypemap", s_bcMap)) { + std::vector sList; + sList = serializeString(s_bcMap); + bcMap::setup(sList, "scalar00"); + } else { + ABORT("Cannot find mandatory parameter TEMPERATURE::boundaryTypeMap!"); + } } - - string s_bcMap; - if(ini.extract("temperature", "boundarytypemap", s_bcMap)) { - std::vector sList; - sList = serializeString(s_bcMap); - bcMap::setup(sList, "scalar00"); - } else { - ABORT("Cannot find mandatory parameter TEMPERATURE::boundaryTypeMap!"); - } - - } else { - if(equation == "lowmachns") - ABORT("PROBLEMTYPE::equation = lowMachNS requires solving for temperature!"); } + + if(equation == "lowmachns" && ini.sections.count("temperature") == 0) + ABORT("PROBLEMTYPE::equation = lowMachNS requires solving for temperature!"); + // for (auto & sec : ini.sections) { string key = sec.first; diff --git a/src/core/runTime.cpp b/src/core/runTime.cpp index aed2bf62c..3261bebf5 100644 --- a/src/core/runTime.cpp +++ b/src/core/runTime.cpp @@ -175,22 +175,27 @@ void makeq(ins_t *ins, dfloat time, occa::memory o_BF) mesh_t *mesh; (is) ? mesh = cds->meshV : mesh = cds->mesh; + const dlong sOffset = is*cds->fieldOffset; + occa::memory o_adv = cds->o_wrk0; if(cds->options.compareArgs("FILTER STABILIZATION", "RELAXATION")) cds->filterRTKernel( cds->meshV->Nelements, ins->o_filterMT, ins->filterS, - is*cds->fieldOffset, + sOffset, cds->o_rho, cds->o_S, cds->o_FS); - occa::memory o_adv = cds->o_wrk0; if(cds->Nsubsteps) { + o_adv = scalarStrongSubCycle(cds, time, is, cds->o_U, cds->o_S); + } else { - if(cds->options.compareArgs("ADVECTION TYPE", "CUBATURE")) { + + //ins->setScalarKernel(cds->fieldOffset, 1.0, cds->o_rho); + if(cds->options.compareArgs("ADVECTION TYPE", "CUBATURE")) { cds->advectionStrongCubatureVolumeKernel( cds->meshV->Nelements, mesh->o_vgeo, @@ -199,7 +204,7 @@ void makeq(ins_t *ins, dfloat time, occa::memory o_BF) mesh->o_cubInterpT, mesh->o_cubProjectT, cds->vFieldOffset, - is*cds->fieldOffset, + sOffset, cds->o_Ue, cds->o_S, cds->o_rho, @@ -210,20 +215,22 @@ void makeq(ins_t *ins, dfloat time, occa::memory o_BF) mesh->o_vgeo, mesh->o_Dmatrices, cds->vFieldOffset, - is*cds->fieldOffset, + sOffset, cds->o_Ue, cds->o_S, cds->o_rho, cds->o_wrk0); } + ins->scaledAddKernel( cds->meshV->Nelements*cds->meshV->Np, -1.0, 0*cds->fieldOffset, cds->o_wrk0, 1.0, - is*cds->fieldOffset, + sOffset, cds->o_FS); + } cds->sumMakefKernel( @@ -235,7 +242,7 @@ void makeq(ins_t *ins, dfloat time, occa::memory o_BF) cds->o_extbdfB, cds->o_extbdfC, cds->fieldOffset*cds->NSfields, - is*cds->fieldOffset, + sOffset, cds->o_S, o_adv, cds->o_FS, @@ -276,9 +283,15 @@ void scalarSolve(ins_t *ins, dfloat time, dfloat dt, occa::memory o_S) cds->o_rho, cds->o_ellipticCoeff); - if(udf.sEqnAddToLhs) { - occa::memory o_LHS = cds->o_ellipticCoeff + cds->fieldOffset*sizeof(dfloat); - udf.sEqnAddToLhs(ins, is, time+dt, o_LHS); + if(cds->o_BFDiag.ptr()) { + cds->scaledAddKernel( + cds->Nlocal, + 1.0, + is*cds->fieldOffset, + cds->o_BFDiag, + 1.0, + cds->fieldOffset, + cds->o_ellipticCoeff); } occa::memory o_Snew = cdsSolve(is, cds, time+dt); @@ -394,10 +407,7 @@ void fluidSolve(ins_t *ins, dfloat time, dfloat dt, occa::memory o_U) ins->o_mue, ins->o_rho, ins->o_ellipticCoeff); - if(udf.uEqnAddToLhs) { - occa::memory o_LHS = ins->o_ellipticCoeff + ins->fieldOffset*sizeof(dfloat); - udf.uEqnAddToLhs(ins, time+dt, o_LHS); - } + occa::memory o_Unew = tombo::velocitySolve(ins, time+dt); for (int s=ins->Nstages;s>1;s--) { o_U.copyFrom( diff --git a/src/nekInterface/nekInterface.f b/src/nekInterface/nekInterface.f index 273faaf45..604a0c1b2 100644 --- a/src/nekInterface/nekInterface.f +++ b/src/nekInterface/nekInterface.f @@ -256,8 +256,9 @@ subroutine nekf_resetio() real ts integer npscals, p63s logical ifxyos, ifvos, ifpos, iftos, ifpscos(ldimt1) - common /ifos/ fxyos, ifvos, ifpos, iftos, ifpscos, npscals, p63s, - $ ts + common /ros/ ts + common /ios/ npscals, p63s + common /ifos/ ifxyos, ifvos, ifpos, iftos, ifpscos time = ts @@ -287,8 +288,10 @@ subroutine nekf_setio(ttime, xo, vo, po, so, ns, fp64) real ts integer npscals, p63s logical ifxyos, ifvos, ifpos, iftos, ifpscos(ldimt1) - common /ifos/ fxyos, ifvos, ifpos, iftos, ifpscos, npscals, p63s, - $ ts + + common /ros/ ts + common /ios/ npscals, p63s + common /ifos/ ifxyos, ifvos, ifpos, iftos, ifpscos if(ns.gt.ldimt) call exitti('nekf_setifo: ns > ldimt$',ns) diff --git a/src/core/nrs.hpp b/src/nrs.hpp similarity index 100% rename from src/core/nrs.hpp rename to src/nrs.hpp diff --git a/src/udf/udf.hpp b/src/udf/udf.hpp index 2eb84bf04..3f7eafae7 100644 --- a/src/udf/udf.hpp +++ b/src/udf/udf.hpp @@ -17,8 +17,6 @@ typedef void (*udfsetup)(ins_t *ins); typedef void (*udfloadKernels)(ins_t *ins); typedef void (*udfexecuteStep)(ins_t *ins, dfloat time, int tstep); -typedef void (*udfuEqnAddToLhs)(ins_t *ins, dfloat time, occa::memory o_LHS); -typedef void (*udfsEqnAddToLhs)(ins_t *ins, int i, dfloat time, occa::memory o_LHS); typedef void (*udfuEqnSource)(ins_t *ins, dfloat time, occa::memory o_U, occa::memory o_FU); typedef void (*udfsEqnSource)(ins_t *ins, dfloat time, occa::memory o_S, occa::memory o_SU); typedef void (*udfproperties)(ins_t *ins, dfloat time, occa::memory o_U, @@ -32,8 +30,6 @@ typedef struct udfsetup setup; udfloadKernels loadKernels; udfexecuteStep executeStep; - udfuEqnAddToLhs uEqnAddToLhs; - udfsEqnAddToLhs sEqnAddToLhs; udfuEqnSource uEqnSource; udfsEqnSource sEqnSource; udfproperties properties;