From 774580883e625e952bfcf94a57f165eda879e253 Mon Sep 17 00:00:00 2001 From: "Marc A. Suchard" Date: Thu, 7 Nov 2024 19:25:30 -0800 Subject: [PATCH] oops --- src/cyclops/engine/ModelSpecifics.hpp.new | 2405 --------------------- src/cyclops/engine/ModelSpecifics.hpp.old | 2363 -------------------- 2 files changed, 4768 deletions(-) delete mode 100644 src/cyclops/engine/ModelSpecifics.hpp.new delete mode 100644 src/cyclops/engine/ModelSpecifics.hpp.old diff --git a/src/cyclops/engine/ModelSpecifics.hpp.new b/src/cyclops/engine/ModelSpecifics.hpp.new deleted file mode 100644 index 29e791d8..00000000 --- a/src/cyclops/engine/ModelSpecifics.hpp.new +++ /dev/null @@ -1,2405 +0,0 @@ -/* - * ModelSpecifics.hpp - * - * Created on: Jul 13, 2012 - * Author: msuchard - */ - -#ifndef MODELSPECIFICS_HPP_ -#define MODELSPECIFICS_HPP_ - -#include -#include -#include -#include -#include - -#include "ModelSpecifics.h" -#include "Iterators.h" - -#include "Recursions.hpp" -#include "ParallelLoops.h" -// #include "Ranges.h" - -#ifdef USE_RCPP_PARALLEL -#include -#include -#endif - - -//#include "R.h" -//#include "Rcpp.h" // TODO Remove - -#ifdef CYCLOPS_DEBUG_TIMING - #include "Timing.h" -#endif - namespace bsccs { - template - const std::string DenseIterator::name = "Den"; - - template - const std::string IndicatorIterator::name = "Ind"; - - template - const std::string SparseIterator::name = "Spa"; - - template - const std::string InterceptIterator::name = "Icp"; - } -//#define OLD_WAY -//#define NEW_WAY1 -#define NEW_WAY2 - -//#define USE_BIGNUM -#define USE_LONG_DOUBLE - -namespace bsccs { - -#ifdef USE_BIGNUM - typedef bigNum DDouble; -#else - #ifdef USE_LONG_DOUBLE - typedef long double DDouble; - #else - typedef double DDouble; - #endif -#endif - -#if defined(DEBUG_COX) || defined(DEBUG_COX_MIN) - using std::cerr; - using std::endl; -#endif - -#define NEW_LOOPS - -template -ModelSpecifics::ModelSpecifics(const ModelData& input) - : AbstractModelSpecifics(input), BaseModel(input.getYVectorRef(), input.getTimeVectorRef()), - modelData(input), - hX(modelData.getX()) - // hY(input.getYVectorRef()), - // hOffs(input.getTimeVectorRef()) - // hPidOriginal(input.getPidVectorRef()), hPid(const_cast(hPidOriginal.data())), - // boundType(MmBoundType::METHOD_2) -// threadPool(4,4,1000) -// threadPool(0,0,10) - { - // Do nothing - // TODO Memory allocation here - // std::cerr << "ctor ModelSpecifics \n"; - -#ifdef CYCLOPS_DEBUG_TIMING - auto now = bsccs::chrono::system_clock::now(); - auto now_c = bsccs::chrono::system_clock::to_time_t(now); - std::cout << std::endl << "Start: " << std::ctime(&now_c) << std::endl; -#endif - - -} - -template -AbstractModelSpecifics* ModelSpecifics::clone(ComputeDeviceArguments computeDevice) const { - return new ModelSpecifics(modelData); -} - -template -double ModelSpecifics::getGradientObjective(bool useCrossValidation) { - -#ifdef CYCLOPS_DEBUG_TIMING - auto start = bsccs::chrono::steady_clock::now(); -#endif - - auto& xBeta = getXBeta(); - - RealType criterion = 0; - if (useCrossValidation) { - for (int i = 0; i < K; i++) { - criterion += xBeta[i] * hY[i] * hKWeight[i]; - } - } else { - for (int i = 0; i < K; i++) { - criterion += xBeta[i] * hY[i]; - } - } -#ifdef CYCLOPS_DEBUG_TIMING - auto end = bsccs::chrono::steady_clock::now(); - ///////////////////////////" - duration["getGradObj "] += bsccs::chrono::duration_cast(end - start).count(); -#endif - return static_cast (criterion); - } - -template -void ModelSpecifics::printTiming() { - -#ifdef CYCLOPS_DEBUG_TIMING - - std::cout << std::endl; - for (auto& d : duration) { - std::cout << d.first << " " << d.second << std::endl; - } - std::cout << "NEW LOOPS" << std::endl; - -#endif -} - -template -ModelSpecifics::~ModelSpecifics() { - // TODO Memory release here - // std::cerr << "dtor ModelSpecifics \n"; - -#ifdef CYCLOPS_DEBUG_TIMING - - printTiming(); - - auto now = bsccs::chrono::system_clock::now(); - auto now_c = bsccs::chrono::system_clock::to_time_t(now); - std::cout << std::endl << "End: " << std::ctime(&now_c) << std::endl; -#endif - -} - -template template -void ModelSpecifics::incrementNormsImpl(int index) { - - // TODO should use row-access - IteratorType it(hX, index); - for (; it; ++it) { - const int k = it.index(); - const RealType x = it.value(); - - norm[k] += std::abs(x); - } -} - -template -void ModelSpecifics::initializeMmXt() { - -#ifdef CYCLOPS_DEBUG_TIMING - auto start = bsccs::chrono::steady_clock::now(); -#endif - - hXt = hX.transpose(); - -#ifdef CYCLOPS_DEBUG_TIMING -auto end = bsccs::chrono::steady_clock::now(); -///////////////////////////" -duration["transpose "] += bsccs::chrono::duration_cast(end - start).count(); -#endif -} - -template -void ModelSpecifics::initializeMM( - MmBoundType boundType, - const std::vector& fixBeta - ) { - -#ifdef CYCLOPS_DEBUG_TIMING - auto start = bsccs::chrono::steady_clock::now(); -#endif - - // std::cout << "Computing norms..." << std::endl; - norm.resize(K); - zeroVector(&norm[0], K); - - for (int j = 0; j < J; ++j) { - if ( - true - // !fixBeta[j] - ) { - switch(hX.getFormatType(j)) { - case INDICATOR : - incrementNormsImpl>(j); - break; - case SPARSE : - incrementNormsImpl>(j); - break; - case DENSE : - incrementNormsImpl>(j); - break; - case INTERCEPT : - incrementNormsImpl>(j); - break; - } - } - } - - if (boundType == MmBoundType::METHOD_1) { - // std::cerr << "boundType: METHOD_1" << std::endl; - - - } else if (boundType == MmBoundType::METHOD_2) { - // std::cerr << "boundType: METHOD_2" << std::endl; - - double total = 0; - - for (int j = 0; j < J; ++j) { - if (!fixBeta[j]) { - typedef std::pair Range; - - std::vector range(modelData.getNumberOfPatients(), Range( - -std::numeric_limits::max(), - std::numeric_limits::max() - )); - - modelData.binaryReductionByStratum(range, j, [](Range& r, double x) { - return Range( - std::max(x, r.first), - std::min(x, r.second) - ); - }); - - double curvature = 0.0; - for (Range r : range) { - curvature += r.first - r.second; - } - curvature /= 4; - - total += curvature; - } - } - - if (curvature.size() == 0) { - curvature.resize(J); - } - - for (int j = 0; j < J; ++j) { - curvature[j] = total; - } - } - -#ifdef CYCLOPS_DEBUG_TIMING - auto end = bsccs::chrono::steady_clock::now(); - ///////////////////////////" - duration["norms "] += bsccs::chrono::duration_cast(end - start).count(); -#endif - -// struct timeval time1, time2; -// gettimeofday(&time1, NULL); -// -// std::cout << "Constructing Xt..." << std::endl; - -// if (!hXt) { -// initializeMmXt(); -// } - -// gettimeofday(&time2, NULL); -// double duration = //calculateSeconds(time1, time2); -// time2.tv_sec - time1.tv_sec + -// (double)(time2.tv_usec - time1.tv_usec) / 1000000.0; -// -// std::cout << "Done with MM initialization" << std::endl; -// std::cout << duration << std::endl; - // Rcpp::stop("out"); - -} - -template -const std::vector ModelSpecifics::getXBeta() { - return std::vector(std::begin(hXBeta), std::end(hXBeta)); -} - -template -const std::vector ModelSpecifics::getXBetaSave() { - return std::vector(std::begin(hXBetaSave), std::end(hXBetaSave)); -} - -template -void ModelSpecifics::zeroXBeta() { - std::fill(std::begin(hXBeta), std::end(hXBeta), 0.0); -} - -template -void ModelSpecifics::saveXBeta() { - auto& xBeta = getXBeta(); - if (hXBetaSave.size() < xBeta.size()) { - hXBetaSave.resize(xBeta.size()); - } - std::copy(std::begin(xBeta), std::end(xBeta), std::begin(hXBetaSave)); -} - - - -template -void ModelSpecifics::computeXBeta(double* beta, bool useWeights) { - - if (!hXt) { - initializeMmXt(); - } - -#ifdef CYCLOPS_DEBUG_TIMING - auto start = bsccs::chrono::steady_clock::now(); -#endif - - switch(hXt->getFormatType(0)) { - case INDICATOR : - computeXBetaImpl>(beta); - break; - case SPARSE : - computeXBetaImpl>(beta); - break; - case DENSE : - computeXBetaImpl>(beta); - break; - case INTERCEPT: - break; -} - - -#ifdef CYCLOPS_DEBUG_TIMING -auto end = bsccs::chrono::steady_clock::now(); -///////////////////////////" -duration["computeXBeta "] += bsccs::chrono::duration_cast(end - start).count(); -#endif - -} - -template template -void ModelSpecifics::computeXBetaImpl(double *beta) { - - for (int k = 0; k < K; ++k) { - RealType sum = 0.0; - IteratorType it(*hXt, k); - for (; it; ++it) { - const auto j = it.index(); - sum += it.value() * beta[j]; - } - hXBeta[k] = sum; - // TODO Add back in for fastest LR -// const auto exb = std::exp(sum); -// offsExpXBeta[k] = exb; -// denomPid[k] = 1.0 + exb; - } - - // std::cerr << "did weird stuff to denomPid" << std::endl; -} - -template -bool ModelSpecifics::allocateXjY(void) { return BaseModel::precomputeGradient; } - -template -bool ModelSpecifics::allocateXjX(void) { return BaseModel::precomputeHessian; } - -template -bool ModelSpecifics::sortPid(void) { return BaseModel::sortPid; } - -template -bool ModelSpecifics::initializeAccumulationVectors(void) { return BaseModel::cumulativeGradientAndHessian; } - -template -bool ModelSpecifics::allocateNtoKIndices(void) { return BaseModel::hasNtoKIndices; } - -template -bool ModelSpecifics::hasResetableAccumulators(void) { return BaseModel::hasResetableAccumulators; } - -template template -void ModelSpecifics::axpy(RealType* y, const RealType alpha, const int index) { - IteratorType it(hX, index); - for (; it; ++it) { - const int k = it.index(); - y[k] += alpha * it.value(); - } -} - -template -void ModelSpecifics::axpyXBeta(const double beta, const int j) { - -#ifdef CYCLOPS_DEBUG_TIMING - auto start = bsccs::chrono::steady_clock::now(); -#endif - - if (beta != 0.0) { - switch (hX.getFormatType(j)) { - case INDICATOR: - axpy < IndicatorIterator > (hXBeta.data(), beta, j); - break; - case INTERCEPT: - axpy < InterceptIterator > (hXBeta.data(), beta, j); - break; - case DENSE: - axpy < DenseIterator > (hXBeta.data(), beta, j); - break; - case SPARSE: - axpy < SparseIterator > (hXBeta.data(), beta, j); - break; - } - } - -#ifdef CYCLOPS_DEBUG_TIMING - auto end = bsccs::chrono::steady_clock::now(); - ///////////////////////////" - duration["axpy "] += bsccs::chrono::duration_cast(end - start).count(); -#endif - -} - - -// ESK: Added cWeights (censoring weights) as an input -template -void ModelSpecifics::setWeights(double* inWeights, double *cenWeights, bool useCrossValidation) { - // Set K weights - if (hKWeight.size() != K) { - hKWeight.resize(K); - } - - if (useCrossValidation) { - for (size_t k = 0; k < K; ++k) { - hKWeight[k] = inWeights[k]; - } - } else { - std::fill(hKWeight.begin(), hKWeight.end(), static_cast(1)); - } - - if (initializeAccumulationVectors()) { - setPidForAccumulation(inWeights); - } - - // Set N weights (these are the same for independent data models) - if (hNWeight.size() < N + 1) { // Add +1 for extra (zero-weight stratum) - hNWeight.resize(N + 1); - } - - std::fill(hNWeight.begin(), hNWeight.end(), static_cast(0)); - for (size_t k = 0; k < K; ++k) { - RealType event = BaseModel::observationCount(hY[k]) * hKWeight[k]; - incrementByGroup(hNWeight.data(), hPid, k, event); - } - - //ESK: Compute hNtoK indices manually and create censoring weights hYWeight is isTwoWayScan - if (hYWeight.size() != K) { - hYWeight.resize(K); - } - if (hYWeightDouble.size() != K) { - hYWeightDouble.resize(K); - } - if (BaseModel::isTwoWayScan) { - hNtoK.resize(N + 1); - int n = 0; - for (size_t k = 0; k < K;) { - while (hKWeight[k] == static_cast(0)) { - ++k; - } - hNtoK[n] = k; - int currentPid = hPid[k]; - do { - ++k; - } while (k < K && (currentPid == hPid[k] || hKWeight[k] == static_cast(0))); - ++n; - } - hNtoK[n] = K; - - for (size_t k = 0; k < K; ++k) { - hYWeight[k] = cenWeights[k]; - hYWeightDouble[k] = cenWeights[k]; - } - } - -#ifdef DEBUG_COX - cerr << "Done with set weights" << endl; -#endif - -} - -template -void ModelSpecifics::computeXjY(bool useCrossValidation) { - for (size_t j = 0; j < J; ++j) { - hXjY[j] = 0; - - GenericIterator it(hX, j); - - if (useCrossValidation) { - for (; it; ++it) { - const int k = it.index(); - if (BaseModel::exactTies && hNWeight[BaseModel::getGroup(hPid, k)] > 1) { - // Do not precompute - hXjY[j] += it.value() * hY[k] * hKWeight[k]; // TODO Remove - } else if (BaseModel::isSurvivalModel) { - // ESK: Modified for suvival data - hXjY[j] += it.value() * BaseModel::observationCount(hY[k]) * hKWeight[k]; - } else { - hXjY[j] += it.value() * hY[k] * hKWeight[k]; - } - } - } else { - for (; it; ++it) { - const int k = it.index(); - if (BaseModel::exactTies && hNWeight[BaseModel::getGroup(hPid, k)] > 1) { - // Do not precompute - hXjY[j] += it.value() * hY[k]; - } else if (BaseModel::isSurvivalModel) { - // ESK: Modified for survival data - hXjY[j] += it.value() * BaseModel::observationCount(hY[k]); - } else { - hXjY[j] += it.value() * hY[k]; - } - } - } -#ifdef DEBUG_COX - cerr << "j: " << j << " = " << hXjY[j]<< endl; -#endif - } -} - -template -void ModelSpecifics::computeXjX(bool useCrossValidation) { - for (size_t j = 0; j < J; ++j) { - hXjX[j] = 0; - GenericIterator it(hX, j); - - if (useCrossValidation) { - for (; it; ++it) { - const int k = it.index(); - if (BaseModel::exactTies && hNWeight[BaseModel::getGroup(hPid, k)] > 1) { - // Do not precompute - } else { - hXjX[j] += it.value() * it.value() * hKWeight[k]; - } - } - } else { - for (; it; ++it) { - const int k = it.index(); - if (BaseModel::exactTies && hNWeight[BaseModel::getGroup(hPid, k)] > 1) { - // Do not precompute - } else { - hXjX[j] += it.value() * it.value(); - } - } - } - } -} - -template -void ModelSpecifics::computeNtoKIndices(bool useCrossValidation) { - - hNtoK.resize(N+1); - int n = 0; - for (size_t k = 0; k < K;) { - hNtoK[n] = k; - int currentPid = hPid[k]; - do { - ++k; - } while (k < K && currentPid == hPid[k]); - ++n; - } - hNtoK[n] = K; -} - -template -void ModelSpecifics::computeFixedTermsInLogLikelihood(bool useCrossValidation) { - if(BaseModel::likelihoodHasFixedTerms) { - logLikelihoodFixedTerm = static_cast(0); - bool hasOffs = hOffs.size() > 0; - if (useCrossValidation) { - for(size_t i = 0; i < K; i++) { - auto offs = hasOffs ? hOffs[i] : static_cast(0); - logLikelihoodFixedTerm += BaseModel::logLikeFixedTermsContrib(hY[i], offs, offs) * hKWeight[i]; - } - } else { - for(size_t i = 0; i < K; i++) { - auto offs = hasOffs ? hOffs[i] : static_cast(0); - logLikelihoodFixedTerm += BaseModel::logLikeFixedTermsContrib(hY[i], offs, offs); // TODO SEGV in Poisson model - } - } - } -} - -template -void ModelSpecifics::computeFixedTermsInGradientAndHessian(bool useCrossValidation) { -#ifdef CYCLOPS_DEBUG_TIMING - auto start = bsccs::chrono::steady_clock::now(); -#endif - if (sortPid()) { - doSortPid(useCrossValidation); - } - if (allocateXjY()) { - computeXjY(useCrossValidation); - } - if (allocateXjX()) { - computeXjX(useCrossValidation); - } - if (allocateNtoKIndices()) { - computeNtoKIndices(useCrossValidation); - } -#ifdef CYCLOPS_DEBUG_TIMING - auto end = bsccs::chrono::steady_clock::now(); - ///////////////////////////" - auto name = "compFixedGH "; - duration[name] += bsccs::chrono::duration_cast(end - start).count(); -#endif -} - -template -double ModelSpecifics::getLogLikelihood(bool useCrossValidation) { - -#ifdef CYCLOPS_DEBUG_TIMING - auto start = bsccs::chrono::steady_clock::now(); -#endif - - RealType logLikelihood = static_cast(0.0); - if (useCrossValidation) { - for (size_t i = 0; i < K; i++) { - logLikelihood += BaseModel::logLikeNumeratorContrib(hY[i], hXBeta[i]) * hKWeight[i]; - } - } else { - for (size_t i = 0; i < K; i++) { - logLikelihood += BaseModel::logLikeNumeratorContrib(hY[i], hXBeta[i]); - } - } - - if (BaseModel::likelihoodHasDenominator) { // Compile-time switch - if(BaseModel::cumulativeGradientAndHessian) { - for (size_t i = 0; i < N; i++) { - // Weights modified in computeNEvents() - logLikelihood -= BaseModel::logLikeDenominatorContrib(hNWeight[i], accDenomPid[i]); - } - } else { // TODO Unnecessary code duplication - for (size_t i = 0; i < N; i++) { - // Weights modified in computeNEvents() - logLikelihood -= BaseModel::logLikeDenominatorContrib(hNWeight[i], denomPid[i]); - } - } - } - - // RANGE - - if (BaseModel::likelihoodHasFixedTerms) { - logLikelihood += logLikelihoodFixedTerm; - } - -#ifdef CYCLOPS_DEBUG_TIMING - auto end = bsccs::chrono::steady_clock::now(); - ///////////////////////////" - duration["compLogLike "] += bsccs::chrono::duration_cast(end - start).count(); -#endif - - return static_cast(logLikelihood); -} - -template -double ModelSpecifics::getPredictiveLogLikelihood(double* weights) { - - std::vector saveKWeight; - if (BaseModel::cumulativeGradientAndHessian) { - - // saveKWeight = hKWeight; // make copy - if (saveKWeight.size() != K) { - saveKWeight.resize(K); - } - for (size_t k = 0; k < K; ++k) { - saveKWeight[k] = hKWeight[k]; // make copy to a double vector - } - - setPidForAccumulation(weights); - setWeights(weights, BaseModel::isTwoWayScan ? hYWeightDouble.data() : nullptr, true); // set new weights // TODO Possible error for gfr - computeRemainingStatistics(true); // compute accDenomPid - } - - RealType logLikelihood = static_cast(0.0); - - if (BaseModel::cumulativeGradientAndHessian) { - for (size_t k = 0; k < K; ++k) { - logLikelihood += BaseModel::logPredLikeContrib(hY[k], weights[k], hXBeta[k], &accDenomPid[0], hPid, k); // TODO Going to crash with ties - } - } else { // TODO Unnecessary code duplication - for (size_t k = 0; k < K; ++k) { // TODO Is index of K correct? - logLikelihood += BaseModel::logPredLikeContrib(hY[k], weights[k], hXBeta[k], &denomPid[0], hPid, k); - } - } - - if (BaseModel::cumulativeGradientAndHessian) { - setPidForAccumulation(&saveKWeight[0]); - setWeights(saveKWeight.data(), BaseModel::isTwoWayScan ? hYWeightDouble.data() : nullptr, true); // set old weights // TODO Possible error for gfr - computeRemainingStatistics(true); - } - - return static_cast(logLikelihood); -} - -template -void ModelSpecifics::getPredictiveEstimates(double* y, double* weights){ - - if (weights) { - for (size_t k = 0; k < K; ++k) { - if (weights[k]) { - y[k] = BaseModel::predictEstimate(hXBeta[k]); - } - } - } else { - for (size_t k = 0; k < K; ++k) { - y[k] = BaseModel::predictEstimate(hXBeta[k]); - } - } - // TODO How to remove code duplication above? -} - - -template -void makeDenseCrossProduct(const CompressedDataMatrix& x, - int index1, int index2) { - -} - -template template -void ModelSpecifics::getSchoenfeldResidualsImpl(int index, - std::vector* residuals, - std::vector* times, - double* covariate, - double* score, - Weights w) { - - const bool hasResiduals = residuals != nullptr; - const bool hasTimes = times != nullptr; - const bool hasScore = score != nullptr && covariate != nullptr; - - if (hasResiduals) { - residuals->clear(); - } - if (hasTimes) { - times->clear(); - } - - RealType uGradient = static_cast(0); // unweighted - RealType uHessian = static_cast(0); - - RealType wGradient = static_cast(0); // weighted - RealType wHessian = static_cast(0); - - RealType xHessian = static_cast(0); - - // TODO: only written for accummulive models (Cox, Fine/Grey) - - IteratorType it(hX, index); - - RealType resNumerator = static_cast(0); - RealType resDenominator = static_cast(0); - - RealType scoreNumerator1 = static_cast(0); - RealType scoreNumerator2 = static_cast(0); - RealType scoreDenominator = static_cast(0); - - // find start relavent accumulator reset point - auto reset = begin(accReset); - while( *reset < it.index() ) { - ++reset; - } - - for (; it; ) { - int i = it.index(); - const RealType x = it.value(); - - if (*reset <= i) { - resNumerator = static_cast(0); - resDenominator = static_cast(0); - - scoreNumerator1 = static_cast(0); - scoreNumerator2 = static_cast(0); - scoreDenominator = static_cast(0); - - ++reset; - } - - const auto expXBeta = offsExpXBeta[i]; // std::exp(hXBeta[i]); - - resNumerator += expXBeta * x; - resDenominator += expXBeta; - - if (hY[i] == 1 && hasResiduals) { - residuals->push_back(it.value() - resNumerator / resDenominator); - if (hasTimes) { - times->push_back(hOffs[i]); - } - } - - if (hasScore) { - const auto weight = covariate[i]; - // MAS does not believe reweighing is correct, but is matching cox.zph - - // const auto xt = x * covariate[i]; - // MAS believes covariate should be adjusted - const auto numerator1 = expXBeta * x; // TODO not xt? - const auto numerator2 = expXBeta * x * x; // TODO not xt? - - if (hY[i] == 1) { - uGradient += x; - wGradient += x * weight; // TODO not xt and no weight? - } - - scoreNumerator1 += numerator1; - scoreNumerator2 += numerator2; - - const auto t = scoreNumerator1 / resDenominator; - const auto gradient = hNWeight[i] * t; - const auto hessian = hNWeight[i] * (scoreNumerator2 / resDenominator - t * t); - - uGradient -= gradient; - wGradient -= gradient * weight; - - uHessian += hessian; - wHessian += hessian * weight * weight; - xHessian += hessian * weight; - } - - ++it; - - if (IteratorType::isSparse) { - - const int next = it ? it.index() : N; - for (++i; i < next; ++i) { - - if (*reset <= i) { - resNumerator = static_cast(0); - resDenominator = static_cast(0); - - scoreNumerator1 = static_cast(0); - scoreNumerator2 = static_cast(0); - scoreDenominator = static_cast(0); - - ++reset; - } - - const auto expXBeta = offsExpXBeta[i]; // std::exp(hXBeta[i]); - - resDenominator += expXBeta; - - if (hY[i] == 1 && hasResiduals) { - residuals->push_back(-resNumerator / resDenominator); - if (hasTimes) { - times->push_back(hOffs[i]); - } - } - - if (hasScore) { - const auto weight = covariate[i]; - // MAS does not believe reweighing is correct, but is matching cox.zph - - // const auto xt = x * covariate[i]; - // MAS believes covariate should be adjusted - const auto numerator1 = expXBeta * x; // TODO not xt? - const auto numerator2 = expXBeta * x * x; // TODO not xt? - - if (hY[i] == 1) { - uGradient += x; - wGradient += x * weight; // TODO not xt and no weight? - } - - scoreNumerator1 += numerator1; - scoreNumerator2 += numerator2; - - const auto t = scoreNumerator1 / resDenominator; - const auto gradient = hNWeight[i] * t; - const auto hessian = hNWeight[i] * (scoreNumerator2 / resDenominator - t * t); - - uGradient -= gradient; - wGradient -= gradient * weight; - - uHessian += hessian; - wHessian += hessian * weight * weight; - } - } - } - } - - if (hasScore) { - score[0] = static_cast(uGradient); - score[1] = static_cast(uHessian); - score[2] = static_cast(wGradient); - score[3] = static_cast(wHessian); - score[4] = static_cast(xHessian); - // *score = static_cast(gradient / hessian); - std::cerr << "score = " << wGradient << " / " << wHessian << "\n"; - // std::cerr << "hess2 = " << hess2 << " or " << static_cast(2.0) * hXjX[index] << "\n"; - } -} - -// TODO The following function is an example of a double-dispatch, rewrite without need for virtual function -template -void ModelSpecifics::computeGradientAndHessian(int index, double *ogradient, - double *ohessian, bool useWeights) { - -#ifdef CYCLOPS_DEBUG_TIMING -#ifndef CYCLOPS_DEBUG_TIMING_LOW - auto start = bsccs::chrono::steady_clock::now(); -#endif -#endif - - if (hX.getNumberOfNonZeroEntries(index) == 0) { - *ogradient = 0.0; *ohessian = 0.0; - return; - } - - // Run-time dispatch, so virtual call should not effect speed - if (useWeights) { - switch (hX.getFormatType(index)) { - case INDICATOR : - computeGradientAndHessianImpl>(index, ogradient, ohessian, weighted); - break; - case SPARSE : - computeGradientAndHessianImpl>(index, ogradient, ohessian, weighted); - break; - case DENSE : - computeGradientAndHessianImpl>(index, ogradient, ohessian, weighted); - break; - case INTERCEPT : - computeGradientAndHessianImpl>(index, ogradient, ohessian, weighted); - break; - } - } else { - switch (hX.getFormatType(index)) { - case INDICATOR : - computeGradientAndHessianImpl>(index, ogradient, ohessian, unweighted); - break; - case SPARSE : - computeGradientAndHessianImpl>(index, ogradient, ohessian, unweighted); - break; - case DENSE : - computeGradientAndHessianImpl>(index, ogradient, ohessian, unweighted); - break; - case INTERCEPT : - computeGradientAndHessianImpl>(index, ogradient, ohessian, unweighted); - break; - } - } - -#ifdef CYCLOPS_DEBUG_TIMING -#ifndef CYCLOPS_DEBUG_TIMING_LOW - auto end = bsccs::chrono::steady_clock::now(); - ///////////////////////////" - duration["compGradAndHess "] += bsccs::chrono::duration_cast(end - start).count(); -#endif -#endif - -#ifdef CYCLOPS_DEBUG_TIMING_GPU_COX - duration["CPU GH "] += bsccs::chrono::duration_cast(end - start).count(); -#endif -} - -template -std::pair operator+( - const std::pair& lhs, - const std::pair& rhs) { - - return { lhs.first + rhs.first, lhs.second + rhs.second }; -} - -template -void ModelSpecifics::computeMMGradientAndHessian( - std::vector& gh, - const std::vector& fixBeta, - bool useWeights) { - - if (norm.size() == 0) { - initializeMM(boundType, fixBeta); - } - -#ifdef CYCLOPS_DEBUG_TIMING -#ifndef CYCLOPS_DEBUG_TIMING_LOW - auto start = bsccs::chrono::steady_clock::now(); -#endif -#endif - - for (int index = 0; index < J; ++index) { - double *ogradient = &(gh[index].first); - double *ohessian = &(gh[index].second); - - if (fixBeta[index]) { - *ogradient = 0.0; *ohessian = 0.0; - } else { - - // Run-time dispatch, so virtual call should not effect speed - if (useWeights) { - switch (hX.getFormatType(index)) { - case INDICATOR : - computeMMGradientAndHessianImpl>(index, ogradient, ohessian, weighted); - break; - case SPARSE : - computeMMGradientAndHessianImpl>(index, ogradient, ohessian, weighted); - break; - case DENSE : - computeMMGradientAndHessianImpl>(index, ogradient, ohessian, weighted); - break; - case INTERCEPT : - computeMMGradientAndHessianImpl>(index, ogradient, ohessian, weighted); - break; - } - } else { - switch (hX.getFormatType(index)) { - case INDICATOR : - computeMMGradientAndHessianImpl>(index, ogradient, ohessian, unweighted); - break; - case SPARSE : - computeMMGradientAndHessianImpl>(index, ogradient, ohessian, unweighted); - break; - case DENSE : - computeMMGradientAndHessianImpl>(index, ogradient, ohessian, unweighted); - break; - case INTERCEPT : - computeMMGradientAndHessianImpl>(index, ogradient, ohessian, unweighted); - break; - } - } - } - } - -#ifdef CYCLOPS_DEBUG_TIMING -#ifndef CYCLOPS_DEBUG_TIMING_LOW - auto end = bsccs::chrono::steady_clock::now(); - ///////////////////////////" - duration["compMMGradAndHess"] += bsccs::chrono::duration_cast(end - start).count(); -#endif -#endif - -} - -template template -void ModelSpecifics::computeMMGradientAndHessianImpl(int index, double *ogradient, - double *ohessian, Weights w) { - -#ifdef CYCLOPS_DEBUG_TIMING -#ifdef CYCLOPS_DEBUG_TIMING_LOW - auto start = bsccs::chrono::steady_clock::now(); -#endif -#endif - - RealType gradient = static_cast(0); - RealType hessian = static_cast(0); - - IteratorType it(hX, index); - for (; it; ++it) { - const int k = it.index(); - - BaseModel::template incrementMMGradientAndHessian( - gradient, hessian, offsExpXBeta[k], - denomPid[BaseModel::getGroup(hPid, k)], hNWeight[BaseModel::getGroup(hPid, k)], - it.value(), hXBeta[k], hY[k], norm[k]); - } - - if (BaseModel::precomputeGradient) { // Compile-time switch - gradient -= hXjY[index]; - } - - if (BaseModel::precomputeHessian) { // Compile-time switch - hessian += static_cast(2.0) * hXjX[index]; - } - - *ogradient = static_cast(gradient); - *ohessian = static_cast(hessian); - - std::cerr << index << " " << gradient << " " << hessian << std::endl; - -#ifdef CYCLOPS_DEBUG_TIMING -#ifdef CYCLOPS_DEBUG_TIMING_LOW - auto end = bsccs::chrono::steady_clock::now(); - ///////////////////////////" - auto name = "compGradHessMM" + IteratorType::name + ""; - duration[name] += bsccs::chrono::duration_cast(end - start).count(); -#endif -#endif -} - -template template -void ModelSpecifics::computeGradientAndHessianImpl(int index, double *ogradient, - double *ohessian, Weights w) { - -#ifdef CYCLOPS_DEBUG_TIMING -#ifdef CYCLOPS_DEBUG_TIMING_LOW - auto start = bsccs::chrono::steady_clock::now(); -#endif -#endif - - RealType gradient = static_cast(0); - RealType hessian = static_cast(0); - - if (BaseModel::cumulativeGradientAndHessian) { // Compile-time switch - - if (sparseIndices[index] == nullptr || sparseIndices[index]->size() > 0) { - - IteratorType it(sparseIndices[index].get(), N); - - RealType accNumerPid = static_cast(0); - RealType accNumerPid2 = static_cast(0); - RealType decNumerPid = static_cast(0); // ESK: Competing risks contrib to gradient - RealType decNumerPid2 = static_cast(0); // ESK: Competing risks contrib to hessian - - //computeBackwardAccumlatedNumerator(it, Weights::isWeighted); // Perform inside loop rather than outside. - - // find start relavent accumulator reset point - auto reset = begin(accReset); - while( *reset < it.index() ) { - ++reset; - } - - for (; it; ) { - int i = it.index(); - - if (*reset <= i) { - accNumerPid = static_cast(0.0); - accNumerPid2 = static_cast(0.0); - ++reset; - } - - const auto numerator1 = numerPid[i]; - const auto numerator2 = numerPid2[i]; - - accNumerPid += numerator1; - accNumerPid2 += numerator2; - - // Compile-time delegation - BaseModel::incrementGradientAndHessian(it, - w, // Signature-only, for iterator-type specialization - &gradient, &hessian, accNumerPid, accNumerPid2, - accDenomPid[i], hNWeight[i], 0.0, hXBeta[i], hY[i]); // When function is in-lined, compiler will only use necessary arguments - ++it; - - if (IteratorType::isSparse) { - - const int next = it ? it.index() : N; - for (++i; i < next; ++i) { - - if (*reset <= i) { - accNumerPid = static_cast(0.0); - accNumerPid2 = static_cast(0.0); - ++reset; - } - - BaseModel::incrementGradientAndHessian(it, - w, // Signature-only, for iterator-type specialization - &gradient, &hessian, accNumerPid, accNumerPid2, - accDenomPid[i], hNWeight[i], static_cast(0), hXBeta[i], hY[i]); // When function is in-lined, compiler will only use necessary arguments - } - } - } - if (BaseModel::isTwoWayScan) { - // Manually perform backwards accumulation here instead of a separate function. - auto revIt = it.reverse(); - - // segmented prefix-scan - RealType totalNumer = static_cast(0); - RealType totalNumer2 = static_cast(0); - - auto backReset = end(accReset) - 1; - - for ( ; revIt; ) { - - int i = revIt.index(); - - if (static_cast(*backReset) == i) { - totalNumer = static_cast(0); - totalNumer2 = static_cast(0); - --backReset; - } - - totalNumer += (hY[hNtoK[i]] > static_cast(1)) ? numerPid[i] / hYWeight[hNtoK[i]] : 0; - totalNumer2 += (hY[hNtoK[i]] > static_cast(1)) ? numerPid2[i] / hYWeight[hNtoK[i]]: 0; - decNumerPid = (hY[hNtoK[i]] == static_cast(1)) ? - hYWeight[hNtoK[i]] * totalNumer : 0; - decNumerPid2 = (hY[hNtoK[i]] == static_cast(1)) ? - hYWeight[hNtoK[i]] * totalNumer2 : 0; - - - // Increase gradient and hessian by competing risks contribution - BaseModel::incrementGradientAndHessian(it, - w, // Signature-only, for iterator-type specialization - &gradient, &hessian, decNumerPid, decNumerPid2, - accDenomPid[i], hNWeight[i], static_cast(0), hXBeta[i], - hY[i]); // When function is in-lined, compiler will only use necess - --revIt; - - if (IteratorType::isSparse) { - - const int next = revIt ? revIt.index() : -1; - for (--i; i > next; --i) { - //if (*reset <= i) { // TODO MAS: This is not correct (only need for stratified models) - // accNumerPid = static_cast(0); - // accNumerPid2 = static_cast(0); - // ++reset; - //} - - decNumerPid = (hY[hNtoK[i]] == static_cast(1)) ? hYWeight[hNtoK[i]] * totalNumer : 0; - decNumerPid2 = (hY[hNtoK[i]] == static_cast(1)) ? hYWeight[hNtoK[i]] * totalNumer2 : 0; - BaseModel::incrementGradientAndHessian(it, w, // Signature-only, for iterator-type specialization - &gradient, &hessian, decNumerPid, decNumerPid2, - accDenomPid[i], hNWeight[i], static_cast(0), - hXBeta[i], - hY[i]); // When function is in-lined, compiler will only use necess - } - } - } // End two-way scan - } // End backwards iterator - } - - } else if (BaseModel::hasIndependentRows) { - - IteratorType it(hX, index); - - for (; it; ++it) { - const int i = it.index(); - - RealType numerator1 = BaseModel::gradientNumeratorContrib(it.value(), offsExpXBeta[i], hXBeta[i], hY[i]); - RealType numerator2 = (!IteratorType::isIndicator && BaseModel::hasTwoNumeratorTerms) ? - BaseModel::gradientNumerator2Contrib(it.value(), offsExpXBeta[i]) : static_cast(0); - - // Compile-time delegation - BaseModel::incrementGradientAndHessian(it, - w, // Signature-only, for iterator-type specialization - &gradient, &hessian, numerator1, numerator2, - denomPid[i], hNWeight[i], it.value(), hXBeta[i], hY[i]); // When function is in-lined, compiler will only use necessary arguments - } - - } else if (BaseModel::exactCLR) { - //tbb::mutex mutex0; - -#ifdef USE_RCPP_PARALLEL - - tbb::combinable newGrad(static_cast(0)); - tbb::combinable newHess(static_cast(0)); - - auto func = [&,index](const tbb::blocked_range& range){ - - using std::isinf; - - for (int i = range.begin(); i < range.end(); ++i) { - DenseView x(IteratorType(hX, index), hNtoK[i], hNtoK[i+1]); - int numSubjects = hNtoK[i+1] - hNtoK[i]; - int numCases = hNWeight[i]; - std::vector value = computeHowardRecursion(offsExpXBeta.begin() + hNtoK[i], x, numSubjects, numCases); - if (value[0]==0 || value[1] == 0 || value[2] == 0 || isinf(value[0]) || isinf(value[1]) || isinf(value[2])) { - DenseView newX(IteratorType(hX, index), hNtoK[i], hNtoK[i+1]); - std::vector value = computeHowardRecursion(offsExpXBeta.begin() + hNtoK[i], newX, numSubjects, numCases);//, threadPool);//, hY.begin() + hNtoK[i]); - using namespace sugar; - //mutex0.lock(); - newGrad.local() -= (RealType)(-value[1]/value[0]); - newHess.local() -= (RealType)((value[1]/value[0]) * (value[1]/value[0]) - value[2]/value[0]); - //mutex0.unlock(); - continue; - } - //mutex0.lock(); - newGrad.local() -= (RealType)(-value[1]/value[0]); - newHess.local() -= (RealType)((value[1]/value[0]) * (value[1]/value[0]) - value[2]/value[0]); - //mutex0.unlock(); - } - }; - tbb::parallel_for(tbb::blocked_range(0,N),func); - gradient += newGrad.combine([](const RealType& x, const RealType& y) {return x+y;}); - hessian += newHess.combine([](const RealType& x, const RealType& y) {return x+y;}); -#else - - using std::isinf; - - for (int i=0; i x(IteratorType(hX, index), hNtoK[i], hNtoK[i+1]); - int numSubjects = hNtoK[i+1] - hNtoK[i]; - int numCases = hNWeight[i]; - - std::vector value = computeHowardRecursion(offsExpXBeta.begin() + hNtoK[i], x, numSubjects, numCases);//, threadPool);//, hY.begin() + hNtoK[i]); - //std::cout<<" values" << i <<": "< newX(IteratorType(hX, index), hNtoK[i], hNtoK[i+1]); - std::vector value = computeHowardRecursion(offsExpXBeta.begin() + hNtoK[i], newX, numSubjects, numCases);//, threadPool);//, hY.begin() + hNtoK[i]); - using namespace sugar; - gradient -= (RealType)(-value[1]/value[0]); - hessian -= (RealType)((value[1]/value[0]) * (value[1]/value[0]) - value[2]/value[0]); - continue; - } - //gradient -= (RealType)(value[3] - value[1]/value[0]); - gradient -= (RealType)(-value[1]/value[0]); - hessian -= (RealType)((value[1]/value[0]) * (value[1]/value[0]) - value[2]/value[0]); - } -#endif // USE_RCPP_PARALLEL - - } else { - -// auto rangeKey = helper::dependent::getRangeKey( -// hX, index, hPid, -// typename IteratorType::tag()); -// -// auto rangeXNumerator = helper::dependent::getRangeX( -// hX, index, offsExpXBeta, -// typename IteratorType::tag()); -// -// auto rangeGradient = helper::dependent::getRangeGradient( -// sparseIndices[index].get(), N, // runtime error: reference binding to null pointer of type 'struct vector' -// denomPid, hNWeight, -// typename IteratorType::tag()); -// -// const auto result = variants::trial::nested_reduce( -// rangeKey.begin(), rangeKey.end(), // key -// rangeXNumerator.begin(), // inner -// rangeGradient.begin(), // outer -// std::pair{0,0}, Fraction{0,0}, -// TestNumeratorKernel(), // Inner transform-reduce -// TestGradientKernel()); // Outer transform-reduce -// -// gradient = result.real(); -// hessian = result.imag(); - - IteratorType it(hX, index); - const int end = it.size() - 1; - - RealType numerator1 = static_cast(0); - RealType numerator2 = static_cast(0); - int key = hPid[it.index()]; - -// auto incrementNumerators2 = [this](const typename IteratorType::Index i, const typename IteratorType::ValueType x, -// const RealTypePair lhs) -> RealTypePair { -// -// const auto linearPredictor = offsExpXBeta[i]; -// return { -// lhs.first + BaseModel::gradientNumeratorContrib(x, linearPredictor, static_cast(0), static_cast(0)), -// lhs.second + (!IteratorType::isIndicator && BaseModel::hasTwoNumeratorTerms ? -// BaseModel::gradientNumerator2Contrib(x, linearPredictor) : -// static_cast(0)) -// }; -// }; - - auto incrementNumerators = [this,&it,&numerator1,&numerator2]() { - const int i = it.index(); - - numerator1 += BaseModel::gradientNumeratorContrib(it.value(), offsExpXBeta[i], static_cast(0), static_cast(0)); - numerator2 += (!IteratorType::isIndicator && BaseModel::hasTwoNumeratorTerms) ? - BaseModel::gradientNumerator2Contrib(it.value(), offsExpXBeta[i]) : - static_cast(0); - }; - - for ( ; it.inRange(end); ++it) { - incrementNumerators(); - - const int nextKey = hPid[it.nextIndex()]; - if (key != nextKey) { - - BaseModel::incrementGradientAndHessian(it, w, // Signature-only, for iterator-type specialization - &gradient, &hessian, numerator1, numerator2, - denomPid[key], hNWeight[key], 0, 0, 0); // When function is in-lined, compiler will only use necessary arguments - - numerator1 = static_cast(0); - numerator2 = static_cast(0); - - key = nextKey; - } - } - - incrementNumerators(); - - BaseModel::incrementGradientAndHessian(it, w, // Signature-only, for iterator-type specialization - &gradient, &hessian, numerator1, numerator2, - denomPid[key], hNWeight[key], 0, 0, 0); // When function is in-lined, compiler will only use necessary arguments - } - - if (BaseModel::precomputeGradient) { // Compile-time switch - gradient -= hXjY[index]; - } - - if (BaseModel::precomputeHessian) { // Compile-time switch - hessian += static_cast(2.0) * hXjX[index]; - } - - std::cerr << "gradient = " << gradient << " @ " << index << "\n"; - std::cerr << "hessian = " << hessian << " @ " << index << "\n"; - - *ogradient = static_cast(gradient); - *ohessian = static_cast(hessian); - -#ifdef CYCLOPS_DEBUG_TIMING -#ifdef CYCLOPS_DEBUG_TIMING_LOW - auto end = bsccs::chrono::steady_clock::now(); - ///////////////////////////" - auto name = "compGradHess" + IteratorType::name + " "; - duration[name] += bsccs::chrono::duration_cast(end - start).count(); -#endif -#endif - -} - -template -void ModelSpecifics::computeThirdDerivative(int index, double *othird, bool useWeights) { - -#ifdef CYCLOPS_DEBUG_TIMING -#ifndef CYCLOPS_DEBUG_TIMING_LOW - auto start = bsccs::chrono::steady_clock::now(); -#endif -#endif - - if (hX.getNumberOfNonZeroEntries(index) == 0) { - *othird = 0.0; - return; - } - - // Run-time dispatch, so virtual call should not effect speed - if (useWeights) { - switch (hX.getFormatType(index)) { - case INDICATOR : - computeThirdDerivativeImpl>(index, othird, weighted); - break; - case SPARSE : - computeThirdDerivativeImpl>(index, othird, weighted); - break; - case DENSE : - computeThirdDerivativeImpl>(index, othird, weighted); - break; - case INTERCEPT : - computeThirdDerivativeImpl>(index, othird, weighted); - break; - } - } else { - switch (hX.getFormatType(index)) { - case INDICATOR : - computeThirdDerivativeImpl>(index, othird, unweighted); - break; - case SPARSE : - computeThirdDerivativeImpl>(index, othird, unweighted); - break; - case DENSE : - computeThirdDerivativeImpl>(index, othird, unweighted); - break; - case INTERCEPT : - computeThirdDerivativeImpl>(index, othird, unweighted); - break; - } - } - -#ifdef CYCLOPS_DEBUG_TIMING -#ifndef CYCLOPS_DEBUG_TIMING_LOW - auto end = bsccs::chrono::steady_clock::now(); - ///////////////////////////" - duration["compThird "] += bsccs::chrono::duration_cast(end - start).count(); -#endif -#endif -} - -template template -void ModelSpecifics::computeThirdDerivativeImpl(int index, double *othird, Weights w) { - -#ifdef CYCLOPS_DEBUG_TIMING -#ifdef CYCLOPS_DEBUG_TIMING_LOW - auto start = bsccs::chrono::steady_clock::now(); -#endif -#endif - - RealType third = static_cast(0); - - if (BaseModel::cumulativeGradientAndHessian) { // Compile-time switch - - if (sparseIndices[index] == nullptr || sparseIndices[index]->size() > 0) { - - IteratorType it(sparseIndices[index].get(), N); - - RealType accNumerPid = static_cast(0); - RealType accNumerPid2 = static_cast(0); - - // find start relavent accumulator reset point - auto reset = begin(accReset); - while( *reset < it.index() ) { - ++reset; - } - - for (; it; ) { - int i = it.index(); - - if (*reset <= i) { - accNumerPid = static_cast(0.0); - accNumerPid2 = static_cast(0.0); - ++reset; - } - - const auto numerator1 = numerPid[i]; - const auto numerator2 = numerPid2[i]; - - accNumerPid += numerator1; - accNumerPid2 += numerator2; - - // Compile-time delegation - BaseModel::incrementThirdDerivative(it, - w, // Signature-only, for iterator-type specialization - &third, accNumerPid, accNumerPid2, - accDenomPid[i], hNWeight[i], 0.0, hXBeta[i], hY[i]); // When function is in-lined, compiler will only use necessary arguments - ++it; - - if (IteratorType::isSparse) { - - const int next = it ? it.index() : N; - for (++i; i < next; ++i) { - - if (*reset <= i) { - accNumerPid = static_cast(0.0); - accNumerPid2 = static_cast(0.0); - ++reset; - } - - BaseModel::incrementThirdDerivative(it, - w, // Signature-only, for iterator-type specialization - &third, accNumerPid, accNumerPid2, - accDenomPid[i], hNWeight[i], static_cast(0), hXBeta[i], hY[i]); // When function is in-lined, compiler will only use necessary arguments - } - } - } - } else { - throw new std::logic_error("Not yet support"); - } - } - - *othird = static_cast(third); - -#ifdef CYCLOPS_DEBUG_TIMING -#ifdef CYCLOPS_DEBUG_TIMING_LOW - auto end = bsccs::chrono::steady_clock::now(); - ///////////////////////////" - auto name = "compThird" + IteratorType::name + " "; - duration[name] += bsccs::chrono::duration_cast(end - start).count(); -#endif -#endif -} - -template -void ModelSpecifics::computeSchoenfeldResiduals(int indexOne, - std::vector* residuals, - std::vector* otimes, - double* covariate, - double* score, - bool useWeights) { - if (useWeights) { - throw new std::logic_error("Weights are not yet implemented in Schoenfeld residual calculations"); - } else { // no weights - switch (hX.getFormatType(indexOne)) { - case INDICATOR : - getSchoenfeldResidualsImpl>(indexOne, residuals, otimes, covariate, score, weighted); - break; - case SPARSE : - getSchoenfeldResidualsImpl>(indexOne, residuals, otimes, covariate, score, weighted); - break; - case DENSE : - getSchoenfeldResidualsImpl>(indexOne, residuals, otimes, covariate, score, weighted); - break; - case INTERCEPT : - getSchoenfeldResidualsImpl>(indexOne, residuals, otimes, covariate, score, weighted); - break; - } - } -} - - -template -void ModelSpecifics::computeFisherInformation(int indexOne, int indexTwo, - double *oinfo, bool useWeights) { - - if (useWeights) { - throw new std::logic_error("Weights are not yet implemented in Fisher Information calculations"); - } else { // no weights - switch (hX.getFormatType(indexOne)) { - case INDICATOR : - dispatchFisherInformation>(indexOne, indexTwo, oinfo, weighted); - break; - case SPARSE : - dispatchFisherInformation>(indexOne, indexTwo, oinfo, weighted); - break; - case DENSE : - dispatchFisherInformation>(indexOne, indexTwo, oinfo, weighted); - break; - case INTERCEPT : - dispatchFisherInformation>(indexOne, indexTwo, oinfo, weighted); - break; - } - } -} - -template template -void ModelSpecifics::dispatchFisherInformation(int indexOne, int indexTwo, double *oinfo, Weights w) { - switch (hX.getFormatType(indexTwo)) { - case INDICATOR : - computeFisherInformationImpl>(indexOne, indexTwo, oinfo, w); - break; - case SPARSE : - computeFisherInformationImpl>(indexOne, indexTwo, oinfo, w); - break; - case DENSE : - computeFisherInformationImpl>(indexOne, indexTwo, oinfo, w); - break; - case INTERCEPT : - computeFisherInformationImpl>(indexOne, indexTwo, oinfo, w); - break; - } -// std::cerr << "End of dispatch" << std::endl; -} - - -template template -SparseIterator ModelSpecifics::getSubjectSpecificHessianIterator(int index) { - - if (hessianSparseCrossTerms.find(index) == hessianSparseCrossTerms.end()) { - - auto indices = make_shared >(); - auto values = make_shared >(); - - CDCPtr column = bsccs::make_shared>(indices, values, SPARSE); - hessianSparseCrossTerms.insert(std::make_pair(index, column)); - - IteratorType itCross(hX, index); - for (; itCross;) { - RealType value = 0.0; - int currentPid = hPid[itCross.index()]; // TODO Need to fix for stratified Cox - do { - const int k = itCross.index(); - value += BaseModel::gradientNumeratorContrib(itCross.value(), - offsExpXBeta[k], hXBeta[k], hY[k]); - ++itCross; - } while (itCross && currentPid == hPid[itCross.index()]); // TODO Need to fix for stratified Cox - indices->push_back(currentPid); - values->push_back(value); - } - } - return SparseIterator(*hessianSparseCrossTerms[index]); - -} - -template template -void ModelSpecifics::computeFisherInformationImpl(int indexOne, int indexTwo, double *oinfo, Weights w) { - - IteratorTypeOne itOne(hX, indexOne); - IteratorTypeTwo itTwo(hX, indexTwo); - PairProductIterator it(itOne, itTwo); - - RealType information = static_cast(0); - for (; it.valid(); ++it) { - const int k = it.index(); - // Compile-time delegation - - BaseModel::incrementFisherInformation(it, - w, // Signature-only, for iterator-type specialization - &information, - offsExpXBeta[k], - 0.0, 0.0, // numerPid[k], numerPid2[k], // remove - denomPid[BaseModel::getGroup(hPid, k)], - hKWeight[k], it.value(), hXBeta[k], hY[k]); // When function is in-lined, compiler will only use necessary arguments - } - - if (BaseModel::hasStrataCrossTerms) { - - // Check if index is pre-computed -//#define USE_DENSE -#ifdef USE_DENSE - if (hessianCrossTerms.find(indexOne) == hessianCrossTerms.end()) { - // Make new - std::vector crossOneTerms(N); - IteratorTypeOne crossOne(modelData, indexOne); - for (; crossOne; ++crossOne) { - const int k = crossOne.index(); - incrementByGroup(crossOneTerms.data(), hPid, k, - BaseModel::gradientNumeratorContrib(crossOne.value(), offsExpXBeta[k], hXBeta[k], hY[k])); - } - hessianCrossTerms[indexOne]; - hessianCrossTerms[indexOne].swap(crossOneTerms); - } - std::vector& crossOneTerms = hessianCrossTerms[indexOne]; - - // TODO Remove code duplication - if (hessianCrossTerms.find(indexTwo) == hessianCrossTerms.end()) { - std::vector crossTwoTerms(N); - IteratorTypeTwo crossTwo(modelData, indexTwo); - for (; crossTwo; ++crossTwo) { - const int k = crossTwo.index(); - incrementByGroup(crossTwoTerms.data(), hPid, k, - BaseModel::gradientNumeratorContrib(crossTwo.value(), offsExpXBeta[k], hXBeta[k], hY[k])); - } - hessianCrossTerms[indexTwo]; - hessianCrossTerms[indexTwo].swap(crossTwoTerms); - } - std::vector& crossTwoTerms = hessianCrossTerms[indexTwo]; - - // TODO Sparse loop - real cross = 0.0; - for (int n = 0; n < N; ++n) { - cross += crossOneTerms[n] * crossTwoTerms[n] / (denomPid[n] * denomPid[n]); - } - information -= cross; -#else - SparseIterator sparseCrossOneTerms = getSubjectSpecificHessianIterator(indexOne); - SparseIterator sparseCrossTwoTerms = getSubjectSpecificHessianIterator(indexTwo); - PairProductIterator,SparseIterator,RealType> itSparseCross(sparseCrossOneTerms, sparseCrossTwoTerms); - - RealType sparseCross = static_cast(0); - for (; itSparseCross.valid(); ++itSparseCross) { - const int n = itSparseCross.index(); - sparseCross += itSparseCross.value() / (denomPid[n] * denomPid[n]); - } - information -= sparseCross; -#endif - } - - *oinfo = static_cast(information); -} - -template -void ModelSpecifics::computeNumeratorForGradient(int index, bool useWeights) { - -#ifdef CYCLOPS_DEBUG_TIMING -#ifndef CYCLOPS_DEBUG_TIMING_LOW - auto start = bsccs::chrono::steady_clock::now(); -#endif -#endif - - if (BaseModel::hasNtoKIndices || BaseModel::cumulativeGradientAndHessian) { - - switch (hX.getFormatType(index)) { - case INDICATOR : { - IndicatorIterator itI(*(sparseIndices)[index]); - for (; itI; ++itI) { // Only affected entries - numerPid[itI.index()] = static_cast(0.0); - } -/* - zeroVector(numerPid.data(), N); - if (BaseModel::efron) { - zeroVector(numerPid3.data(), N); - } -*/ - if (useWeights) { - incrementNumeratorForGradientImpl, WeightedOperation>(index); - } else { - incrementNumeratorForGradientImpl, UnweightedOperation>(index); - } - break; - } - case SPARSE : { - SparseIterator itS(*(sparseIndices)[index]); - for (; itS; ++itS) { // Only affected entries - numerPid[itS.index()] = static_cast(0.0); - if (BaseModel::hasTwoNumeratorTerms) { // Compile-time switch - numerPid2[itS.index()] = static_cast(0.0); // TODO Does this invalid the cache line too much? - } - } -/* - zeroVector(numerPid.data(), N); - if (BaseModel::hasTwoNumeratorTerms) { // Compile-time switch - zeroVector(numerPid2.data(), N); - } - if (BaseModel::efron) { - zeroVector(numerPid3.data(), N); - zeroVector(numerPid4.data(), N); - } -*/ - if (useWeights) { - incrementNumeratorForGradientImpl, WeightedOperation>(index); - } else { - incrementNumeratorForGradientImpl, UnweightedOperation>(index); - } - break; - } - case DENSE : { - zeroVector(numerPid.data(), N); - if (BaseModel::hasTwoNumeratorTerms) { // Compile-time switch - zeroVector(numerPid2.data(), N); - } -/* - if (BaseModel::efron) { - zeroVector(numerPid3.data(), N); - zeroVector(numerPid4.data(), N); - } -*/ - if (useWeights) { - incrementNumeratorForGradientImpl, WeightedOperation>(index); - } else { - incrementNumeratorForGradientImpl, UnweightedOperation>(index); - } - break; - } - case INTERCEPT : { - zeroVector(numerPid.data(), N); - if (BaseModel::hasTwoNumeratorTerms) { // Compile-time switch - zeroVector(numerPid2.data(), N); - } -/* - if (BaseModel::efron) { - zeroVector(numerPid3.data(), N); - zeroVector(numerPid4.data(), N); - } -*/ - if (useWeights) { - incrementNumeratorForGradientImpl, WeightedOperation>(index); - } else { - incrementNumeratorForGradientImpl, UnweightedOperation>(index); - } - break; - } - default : break; - } - } - -#ifdef CYCLOPS_DEBUG_TIMING -#ifndef CYCLOPS_DEBUG_TIMING_LOW - auto end = bsccs::chrono::steady_clock::now(); - ///////////////////////////" - duration["compNumForGrad "] += bsccs::chrono::duration_cast(end - start).count();; -#endif -#endif - -#ifdef CYCLOPS_DEBUG_TIMING_GPU_COX - duration["CPU GH "] += bsccs::chrono::duration_cast(end - start).count(); -#endif -} - -template template -void ModelSpecifics::incrementNumeratorForGradientImpl(int index) { - -#ifdef CYCLOPS_DEBUG_TIMING -#ifdef CYCLOPS_DEBUG_TIMING_LOW - auto start = bsccs::chrono::steady_clock::now(); -#endif -#endif - - IteratorType it(hX, index); - for (; it; ++it) { - const int k = it.index(); - incrementByGroup(numerPid.data(), hPid, k, - Weights::isWeighted ? - hKWeight[k] * BaseModel::gradientNumeratorContrib(it.value(), offsExpXBeta[k], hXBeta[k], hY[k]) : - BaseModel::gradientNumeratorContrib(it.value(), offsExpXBeta[k], hXBeta[k], hY[k]) - ); - if (!IteratorType::isIndicator && BaseModel::hasTwoNumeratorTerms) { - incrementByGroup(numerPid2.data(), hPid, k, - Weights::isWeighted ? - hKWeight[k] * BaseModel::gradientNumerator2Contrib(it.value(), offsExpXBeta[k]) : - BaseModel::gradientNumerator2Contrib(it.value(), offsExpXBeta[k]) - ); - } -/* - if (BaseModel::efron) { - incrementByGroup(numerPid3.data(), hPid, k, - Weights::isWeighted ? - hKWeight[k] * hY[k] * BaseModel::gradientNumeratorContrib(it.value(), offsExpXBeta[k], hXBeta[k], hY[k]) : - hY[k] * BaseModel::gradientNumeratorContrib(it.value(), offsExpXBeta[k], hXBeta[k], hY[k]) - ); - } - if (!IteratorType::isIndicator && BaseModel::efron) { - incrementByGroup(numerPid4.data(), hPid, k, - Weights::isWeighted ? - hKWeight[k] * hY[k] * BaseModel::gradientNumerator2Contrib(it.value(), offsExpXBeta[k]) : - hY[k] * BaseModel::gradientNumerator2Contrib(it.value(), offsExpXBeta[k]) - ); - } -*/ - } -#ifdef CYCLOPS_DEBUG_TIMING -#ifdef CYCLOPS_DEBUG_TIMING_LOW - auto end = bsccs::chrono::steady_clock::now(); - ///////////////////////////" - auto name = "compNumGrad" + IteratorType::name + " "; - duration[name] += bsccs::chrono::duration_cast(end - start).count(); -#endif -#endif - -} - -template -void ModelSpecifics::updateXBeta(double delta, int index, bool useWeights) { - -#ifdef CYCLOPS_DEBUG_TIMING -#ifndef CYCLOPS_DEBUG_TIMING_LOW - auto start = bsccs::chrono::steady_clock::now(); -#endif -#endif - - RealType realDelta = static_cast(delta); - - // Run-time dispatch to implementation depending on covariate FormatType - switch(hX.getFormatType(index)) { - case INDICATOR : { - if (useWeights) { - updateXBetaImpl, WeightedOperation>(realDelta, index); - } else { - updateXBetaImpl, UnweightedOperation>(realDelta, index); - } - break; - } - case SPARSE : { - if (useWeights) { - updateXBetaImpl, WeightedOperation>(realDelta, index); - } else { - updateXBetaImpl, UnweightedOperation>(realDelta, index); - } - break; - } - case DENSE : { - if (useWeights) { - updateXBetaImpl, WeightedOperation>(realDelta, index); - } else { - updateXBetaImpl, UnweightedOperation>(realDelta, index); - } - break; - } - case INTERCEPT : { - if (useWeights) { - updateXBetaImpl, WeightedOperation>(realDelta, index); - } else { - updateXBetaImpl, UnweightedOperation>(realDelta, index); - } - break; - } - default : break; - } - -#ifdef CYCLOPS_DEBUG_TIMING -#ifndef CYCLOPS_DEBUG_TIMING_LOW - auto end = bsccs::chrono::steady_clock::now(); - ///////////////////////////" - duration["updateXBeta "] += bsccs::chrono::duration_cast(end - start).count(); -#endif -#endif - -#ifdef CYCLOPS_DEBUG_TIMING_GPU_COX - duration["CPU GH "] += bsccs::chrono::duration_cast(end - start).count(); -#endif - -} - -template template -inline void ModelSpecifics::updateXBetaImpl(RealType realDelta, int index) { - -#ifdef CYCLOPS_DEBUG_TIMING -#ifdef CYCLOPS_DEBUG_TIMING_LOW - auto start = bsccs::chrono::steady_clock::now(); -#endif -#endif - - IteratorType it(hX, index); - - if (BaseModel::cumulativeGradientAndHessian) { // cox - for (; it; ++it) { - const int k = it.index(); - hXBeta[k] += realDelta * it.value(); // TODO Check optimization with indicator and intercept - - if (BaseModel::likelihoodHasDenominator) { // Compile-time switch - RealType oldEntry = Weights::isWeighted ? - hKWeight[k] * offsExpXBeta[k] : offsExpXBeta[k]; // TODO Delegate condition to forming offExpXBeta - offsExpXBeta[k] = BaseModel::getOffsExpXBeta(hOffs.data(), hXBeta[k], hY[k], k); // Update offsExpXBeta - RealType newEntry = Weights::isWeighted ? - hKWeight[k] * offsExpXBeta[k] : offsExpXBeta[k]; // TODO Delegate condition - incrementByGroup(denomPid.data(), hPid, k, (newEntry - oldEntry)); // Update denominators - } - } - } else { - for (; it; ++it) { - const int k = it.index(); - hXBeta[k] += realDelta * it.value(); // TODO Check optimization with indicator and intercept - - if (BaseModel::likelihoodHasDenominator) { // Compile-time switch - RealType oldEntry = offsExpXBeta[k]; - RealType newEntry = offsExpXBeta[k] = BaseModel::getOffsExpXBeta(hOffs.data(), hXBeta[k], hY[k], k); - incrementByGroup(denomPid.data(), hPid, k, (newEntry - oldEntry)); -/* - if (BaseModel::efron) { - incrementByGroup(denomPid2.data(), hPid, k, hY[k]*(newEntry - oldEntry)); // Update denominators - } -*/ - } - } - } - - computeAccumlatedDenominator(Weights::isWeighted); - -#ifdef CYCLOPS_DEBUG_TIMING -#ifdef CYCLOPS_DEBUG_TIMING_LOW - auto end = bsccs::chrono::steady_clock::now(); - ///////////////////////////" - auto name = "updateXBeta" + IteratorType::name + " "; - duration[name] += bsccs::chrono::duration_cast(end - start).count(); -#endif -#endif - -} - -template template -void ModelSpecifics::computeRemainingStatisticsImpl() { - - auto& xBeta = getXBeta(); - - if (BaseModel::likelihoodHasDenominator) { - fillVector(denomPid.data(), N, BaseModel::getDenomNullValue()); -/* - if (BaseModel::efron) { - fillVector(denomPid2.data(), N, BaseModel::getDenomNullValue()); - } -*/ - if (BaseModel::cumulativeGradientAndHessian) { // cox - for (size_t k = 0; k < K; ++k) { - offsExpXBeta[k] = BaseModel::getOffsExpXBeta(hOffs.data(), xBeta[k], hY[k], k); - RealType weightoffsExpXBeta = Weights::isWeighted ? - hKWeight[k] * BaseModel::getOffsExpXBeta(hOffs.data(), xBeta[k], hY[k], k) : - BaseModel::getOffsExpXBeta(hOffs.data(), xBeta[k], hY[k], k); - incrementByGroup(denomPid.data(), hPid, k, weightoffsExpXBeta); // Update denominators - } - } else { - for (size_t k = 0; k < K; ++k) { - offsExpXBeta[k] = BaseModel::getOffsExpXBeta(hOffs.data(), xBeta[k], hY[k], k); - incrementByGroup(denomPid.data(), hPid, k, offsExpXBeta[k]); -/* - if (BaseModel::efron) { - incrementByGroup(denomPid2.data(), hPid, k, hY[k]*offsExpXBeta[k]); // Update denominators - } -*/ - } - } - computeAccumlatedDenominator(Weights::isWeighted); - } -} - -template -void ModelSpecifics::computeRemainingStatistics(bool useWeights) { - -#ifdef CYCLOPS_DEBUG_TIMING - auto start = bsccs::chrono::steady_clock::now(); -#endif - - if (useWeights) { - computeRemainingStatisticsImpl(); - } else { - computeRemainingStatisticsImpl(); - } - -#ifdef CYCLOPS_DEBUG_TIMING - auto end = bsccs::chrono::steady_clock::now(); - ///////////////////////////" - duration["compRS "] += bsccs::chrono::duration_cast(end - start).count();; -#endif - -} - -template -void ModelSpecifics::computeAccumlatedNumerator(bool useWeights) { - - if (BaseModel::likelihoodHasDenominator && //The two switches should ideally be separated - BaseModel::cumulativeGradientAndHessian) { // Compile-time switch - - if (accNumerPid.size() != N) { - accNumerPid.resize(N, static_cast(0)); - } - if (accNumerPid2.size() != N) { - accNumerPid2.resize(N, static_cast(0)); - } - - // segmented prefix-scan - RealType totalNumer = static_cast(0); - RealType totalNumer2 = static_cast(0); - - auto reset = begin(accReset); - - for (size_t i = 0; i < N; ++i) { - - if (static_cast(*reset) == i) { - totalNumer = static_cast(0); - totalNumer2 = static_cast(0); - ++reset; - } - - totalNumer += numerPid[i]; - totalNumer2 += numerPid2[i]; - accNumerPid[i] = totalNumer; - accNumerPid2[i] = totalNumer2; - } - } -} - -template -void ModelSpecifics::computeAccumlatedDenominator(bool useWeights) { - - if (BaseModel::likelihoodHasDenominator && //The two switches should ideally be separated - BaseModel::cumulativeGradientAndHessian) { // Compile-time switch - - - if (accDenomPid.size() != (N + 1)) { - accDenomPid.resize(N + 1, static_cast(0)); - } - - // segmented prefix-scan - RealType totalDenom = static_cast(0); - - auto reset = begin(accReset); - - for (size_t i = 0; i < N; ++i) { - - if (static_cast(*reset) == i) { // TODO Check with SPARSE - totalDenom = static_cast(0); - ++reset; - } - - totalDenom += denomPid[i]; - accDenomPid[i] = totalDenom; - } - //ESK : Incorporate backwards scan here: - if (BaseModel::isTwoWayScan) { - // RealType backDenom = static_cast(0); - RealType totalDenom = static_cast(0); - - auto reset = begin(accReset); - - //Q: How can we change int to size_t w/o errors - for (int i = (N - 1); i >= 0; i--) { - - if (static_cast(*reset) == i) { // TODO Check with SPARSE - totalDenom = static_cast(0); - ++reset; - } - - totalDenom += (hY[hNtoK[i]] > static_cast(1)) ? denomPid[i] / hYWeight[hNtoK[i]] : 0; - accDenomPid[i] += (hY[hNtoK[i]] == static_cast(1)) ? hYWeight[hNtoK[i]] * totalDenom : 0; - } - } // End two-way scan - } - -} - -// ESK: This is no longer needed. Incorporated in incrementGradientAndHessian -template template -void ModelSpecifics::computeBackwardAccumlatedNumerator( - IteratorType it, - bool useWeights) { - - if (decNumerPid.size() != N) { - decNumerPid.resize(N, static_cast(0)); - } - if (decNumerPid2.size() != N) { - decNumerPid2.resize(N, static_cast(0)); - } - auto revIt = it.reverse(); - - // segmented prefix-scan - RealType totalNumer = static_cast(0); - RealType totalNumer2 = static_cast(0); - - auto reset = end(accReset) - 1; - - for ( ; revIt; ) { - - int i = revIt.index(); - - if (static_cast(*reset) == i) { - totalNumer = static_cast(0); - totalNumer2 = static_cast(0); - --reset; - } - - totalNumer += (hY[hNtoK[i]] > static_cast(1)) ? numerPid[i] / hYWeight[hNtoK[i]] : 0; - totalNumer2 += (hY[hNtoK[i]] > static_cast(1)) ? numerPid2[i] / hYWeight[hNtoK[i]] : 0; - decNumerPid[i] = (hY[hNtoK[i]] == static_cast(1)) ? hYWeight[hNtoK[i]] * totalNumer : 0; - decNumerPid2[i] = (hY[hNtoK[i]] == static_cast(1)) ? hYWeight[hNtoK[i]] * totalNumer2 : 0; - --revIt; - - if (IteratorType::isSparse) { - - const int next = revIt ? revIt.index() : -1; - for (--i; i > next; --i) { // TODO MAS: This may be incorrect - //if (*reset <= i) { // TODO MAS: This is not correct (only need for stratifed models) - // accNumerPid = static_cast(0); - // accNumerPid2 = static_cast(0); - // ++reset; - //} - - // ESK: Start implementing sparse - decNumerPid[i] = (hY[hNtoK[i]] == static_cast(1)) ? hYWeight[hNtoK[i]] * totalNumer : 0; - decNumerPid2[i] = (hY[hNtoK[i]] == static_cast(1)) ? hYWeight[hNtoK[i]] * totalNumer2 : 0; - } - } - } -} - -// ESK: This is no longer needed. Incorporated in computeAccumlatedDenominator -template -void ModelSpecifics::computeBackwardAccumlatedDenominator(bool useWeights) { - - if (BaseModel::likelihoodHasDenominator && //The two switches should ideally be separated - BaseModel::cumulativeGradientAndHessian) { // Compile-time switch - - // segmented prefix-scan - RealType backDenom = static_cast(0); - RealType totalDenom = static_cast(0); - - auto reset = begin(accReset); - - //Q: How can we change int to size_t w/o errors - for (int i = (N - 1); i >= 0; i--) { - - if (static_cast(*reset) == i) { // TODO Check with SPARSE - totalDenom = static_cast(0); - ++reset; - } - - totalDenom += (hY[hNtoK[i]] > static_cast(1)) ? denomPid[i] / hYWeight[hNtoK[i]] : 0; - accDenomPid[i] += (hY[hNtoK[i]] == static_cast(1)) ? hYWeight[hNtoK[i]] * totalDenom : 0; - } - } -} - - -template -void ModelSpecifics::doSortPid(bool useCrossValidation) { -/* For Cox model: - * - * We currently assume that hZ[k] are sorted in decreasing order by k. - * - */ -} - -template -void ModelSpecifics::getOriginalPid() { - - hPidInternal = hPidOriginal; // Make copy - -} -template -void ModelSpecifics::setPidForAccumulation(const double* weights) { - setPidForAccumulationImpl(weights); -} - -template template -void ModelSpecifics::setPidForAccumulationImpl(const AnyRealType* weights) { - - hPidInternal = hPidOriginal; // Make copy - hPid = hPidInternal.data(); // Point to copy - hPidSize = hPidInternal.size(); - accReset.clear(); - - const int ignore = -1; - - // Find first non-zero weight - size_t index = 0; - while(weights != nullptr && weights[index] == 0.0 && index < K) { - hPid[index] = ignore; - index++; - } - - int lastPid = hPid[index]; - AnyRealType lastTime = hOffs[index]; - AnyRealType lastEvent = hY[index]; - - int pid = hPid[index] = 0; - - for (size_t k = index + 1; k < K; ++k) { - if (weights == nullptr || weights[k] != 0.0) { - int nextPid = hPid[k]; - - if (nextPid != lastPid) { // start new strata - pid++; - accReset.push_back(pid); - lastPid = nextPid; - } else { - - if (lastEvent == 1.0 && lastTime == hOffs[k] && lastEvent == hY[k]) { - // In a tie, do not increment denominator - } else { - pid++; - } - } - lastTime = hOffs[k]; - lastEvent = hY[k]; - - hPid[k] = pid; - } else { - hPid[k] = ignore; - } - } - pid++; - accReset.push_back(pid); - - // Save number of denominators - N = pid; - - if (weights != nullptr) { - for (size_t i = 0; i < K; ++i) { - if (hPid[i] == ignore) hPid[i] = N; // do NOT accumulate, since loops use: i < N - } - } - setupSparseIndices(N); // ignore pid == N (pointing to removed data strata) -} - -template -void ModelSpecifics::setupSparseIndices(const int max) { - sparseIndices.clear(); // empty if full! - - for (size_t j = 0; j < J; ++j) { - if (hX.getFormatType(j) == DENSE || hX.getFormatType(j) == INTERCEPT) { - sparseIndices.push_back(NULL); - } else { - std::set unique; - const size_t n = hX.getNumberOfEntries(j); - const int* indicators = hX.getCompressedColumnVector(j); - for (size_t j = 0; j < n; j++) { // Loop through non-zero entries only - const int k = indicators[j]; - const int i = (k < hPidSize) ? hPid[k] : k; - if (i < max) { - unique.insert(i); - } - } - auto indices = bsccs::make_shared(unique.begin(), unique.end()); - sparseIndices.push_back(indices); - } - } -} - -template -void ModelSpecifics::deviceInitialization() { - // Do nothing -} - -template -void ModelSpecifics::initialize( - int iN, - int iK, - int iJ, - const void*, - double* iNumerPid, - double* iNumerPid2, - double* iDenomPid, - double* iXjY, - std::vector* >* iSparseIndices, - const int* iPid_unused, - double* iOffsExpXBeta, - double* iXBeta, - double* iOffs, - double* iBeta, - const double* iY_unused) { - - N = iN; - K = iK; - J = iJ; - offsExpXBeta.resize(K); - hXBeta.resize(K); - - if (allocateXjY()) { - hXjY.resize(J); - } - - if (allocateXjX()) { - hXjX.resize(J); - } -#ifdef CYCLOPS_DEBUG_TIMING_GPU_COX - auto start = std::chrono::steady_clock::now(); -#endif - if (initializeAccumulationVectors()) { - setPidForAccumulation(static_cast(nullptr)); // calls setupSparseIndices() before returning - } else { - // TODO Suspect below is not necessary for non-grouped data. - // If true, then fill with pointers to CompressedDataColumn and do not delete in destructor - setupSparseIndices(N); // Need to be recomputed when hPid change! - } -#ifdef CYCLOPS_DEBUG_TIMING_GPU_COX - auto end = std::chrono::steady_clock::now(); - double timerPid = std::chrono::duration(end - start).count(); - std::cout << " OVERHEAD CCD setPid: " << timerPid << " s \n"; -#endif - - - size_t alignedLength = getAlignedLength(N + 1); - // numerDenomPidCache.resize(3 * alignedLength, 0); - // numerPid = numerDenomPidCache.data(); - // denomPid = numerPid + alignedLength; // Nested in denomPid allocation - // numerPid2 = numerPid + 2 * alignedLength; - denomPid.resize(alignedLength); - denomPid2.resize(alignedLength); - numerPid.resize(alignedLength); - numerPid2.resize(alignedLength); - numerPid3.resize(alignedLength); - numerPid4.resize(alignedLength); - - deviceInitialization(); - -} - -} // namespace - -#endif /* MODELSPECIFICS_HPP_ */ diff --git a/src/cyclops/engine/ModelSpecifics.hpp.old b/src/cyclops/engine/ModelSpecifics.hpp.old deleted file mode 100644 index 6e3a02f1..00000000 --- a/src/cyclops/engine/ModelSpecifics.hpp.old +++ /dev/null @@ -1,2363 +0,0 @@ -/* - * ModelSpecifics.hpp - * - * Created on: Jul 13, 2012 - * Author: msuchard - */ - -#ifndef MODELSPECIFICS_HPP_ -#define MODELSPECIFICS_HPP_ - -#include -#include -#include -#include -#include - -#include "ModelSpecifics.h" -#include "Iterators.h" - -#include "Recursions.hpp" -#include "ParallelLoops.h" -// #include "Ranges.h" - -#ifdef USE_RCPP_PARALLEL -#include -#include -#endif - - -//#include "R.h" -//#include "Rcpp.h" // TODO Remove - -#ifdef CYCLOPS_DEBUG_TIMING - #include "Timing.h" -#endif - namespace bsccs { - template - const std::string DenseIterator::name = "Den"; - - template - const std::string IndicatorIterator::name = "Ind"; - - template - const std::string SparseIterator::name = "Spa"; - - template - const std::string InterceptIterator::name = "Icp"; - } -//#define OLD_WAY -//#define NEW_WAY1 -#define NEW_WAY2 - -//#define USE_BIGNUM -#define USE_LONG_DOUBLE - -namespace bsccs { - -#ifdef USE_BIGNUM - typedef bigNum DDouble; -#else - #ifdef USE_LONG_DOUBLE - typedef long double DDouble; - #else - typedef double DDouble; - #endif -#endif - -#if defined(DEBUG_COX) || defined(DEBUG_COX_MIN) - using std::cerr; - using std::endl; -#endif - -#define NEW_LOOPS - -template -ModelSpecifics::ModelSpecifics(const ModelData& input) - : AbstractModelSpecifics(input), BaseModel(input.getYVectorRef(), input.getTimeVectorRef()), - modelData(input), - hX(modelData.getX()) - // hY(input.getYVectorRef()), - // hOffs(input.getTimeVectorRef()) - // hPidOriginal(input.getPidVectorRef()), hPid(const_cast(hPidOriginal.data())), - // boundType(MmBoundType::METHOD_2) -// threadPool(4,4,1000) -// threadPool(0,0,10) - { - // Do nothing - // TODO Memory allocation here - // std::cerr << "ctor ModelSpecifics \n"; - -#ifdef CYCLOPS_DEBUG_TIMING - auto now = bsccs::chrono::system_clock::now(); - auto now_c = bsccs::chrono::system_clock::to_time_t(now); - std::cout << std::endl << "Start: " << std::ctime(&now_c) << std::endl; -#endif - - -} - -template -AbstractModelSpecifics* ModelSpecifics::clone(ComputeDeviceArguments computeDevice) const { - return new ModelSpecifics(modelData); -} - -template -double ModelSpecifics::getGradientObjective(bool useCrossValidation) { - -#ifdef CYCLOPS_DEBUG_TIMING - auto start = bsccs::chrono::steady_clock::now(); -#endif - - auto& xBeta = getXBeta(); - - RealType criterion = 0; - if (useCrossValidation) { - for (int i = 0; i < K; i++) { - criterion += xBeta[i] * hY[i] * hKWeight[i]; - } - } else { - for (int i = 0; i < K; i++) { - criterion += xBeta[i] * hY[i]; - } - } -#ifdef CYCLOPS_DEBUG_TIMING - auto end = bsccs::chrono::steady_clock::now(); - ///////////////////////////" - duration["getGradObj "] += bsccs::chrono::duration_cast(end - start).count(); -#endif - return static_cast (criterion); - } - -template -void ModelSpecifics::printTiming() { - -#ifdef CYCLOPS_DEBUG_TIMING - - std::cout << std::endl; - for (auto& d : duration) { - std::cout << d.first << " " << d.second << std::endl; - } - std::cout << "NEW LOOPS" << std::endl; - -#endif -} - -template -ModelSpecifics::~ModelSpecifics() { - // TODO Memory release here - // std::cerr << "dtor ModelSpecifics \n"; - -#ifdef CYCLOPS_DEBUG_TIMING - - printTiming(); - - auto now = bsccs::chrono::system_clock::now(); - auto now_c = bsccs::chrono::system_clock::to_time_t(now); - std::cout << std::endl << "End: " << std::ctime(&now_c) << std::endl; -#endif - -} - -template template -void ModelSpecifics::incrementNormsImpl(int index) { - - // TODO should use row-access - IteratorType it(hX, index); - for (; it; ++it) { - const int k = it.index(); - const RealType x = it.value(); - - norm[k] += std::abs(x); - } -} - -template -void ModelSpecifics::initializeMmXt() { - -#ifdef CYCLOPS_DEBUG_TIMING - auto start = bsccs::chrono::steady_clock::now(); -#endif - - hXt = hX.transpose(); - -#ifdef CYCLOPS_DEBUG_TIMING -auto end = bsccs::chrono::steady_clock::now(); -///////////////////////////" -duration["transpose "] += bsccs::chrono::duration_cast(end - start).count(); -#endif -} - -template -void ModelSpecifics::initializeMM( - MmBoundType boundType, - const std::vector& fixBeta - ) { - -#ifdef CYCLOPS_DEBUG_TIMING - auto start = bsccs::chrono::steady_clock::now(); -#endif - - // std::cout << "Computing norms..." << std::endl; - norm.resize(K); - zeroVector(&norm[0], K); - - for (int j = 0; j < J; ++j) { - if ( - true - // !fixBeta[j] - ) { - switch(hX.getFormatType(j)) { - case INDICATOR : - incrementNormsImpl>(j); - break; - case SPARSE : - incrementNormsImpl>(j); - break; - case DENSE : - incrementNormsImpl>(j); - break; - case INTERCEPT : - incrementNormsImpl>(j); - break; - } - } - } - - if (boundType == MmBoundType::METHOD_1) { - // std::cerr << "boundType: METHOD_1" << std::endl; - - - } else if (boundType == MmBoundType::METHOD_2) { - // std::cerr << "boundType: METHOD_2" << std::endl; - - double total = 0; - - for (int j = 0; j < J; ++j) { - if (!fixBeta[j]) { - typedef std::pair Range; - - std::vector range(modelData.getNumberOfPatients(), Range( - -std::numeric_limits::max(), - std::numeric_limits::max() - )); - - modelData.binaryReductionByStratum(range, j, [](Range& r, double x) { - return Range( - std::max(x, r.first), - std::min(x, r.second) - ); - }); - - double curvature = 0.0; - for (Range r : range) { - curvature += r.first - r.second; - } - curvature /= 4; - - total += curvature; - } - } - - if (curvature.size() == 0) { - curvature.resize(J); - } - - for (int j = 0; j < J; ++j) { - curvature[j] = total; - } - } - -#ifdef CYCLOPS_DEBUG_TIMING - auto end = bsccs::chrono::steady_clock::now(); - ///////////////////////////" - duration["norms "] += bsccs::chrono::duration_cast(end - start).count(); -#endif - -// struct timeval time1, time2; -// gettimeofday(&time1, NULL); -// -// std::cout << "Constructing Xt..." << std::endl; - -// if (!hXt) { -// initializeMmXt(); -// } - -// gettimeofday(&time2, NULL); -// double duration = //calculateSeconds(time1, time2); -// time2.tv_sec - time1.tv_sec + -// (double)(time2.tv_usec - time1.tv_usec) / 1000000.0; -// -// std::cout << "Done with MM initialization" << std::endl; -// std::cout << duration << std::endl; - // Rcpp::stop("out"); - -} - -template -const std::vector ModelSpecifics::getXBeta() { - return std::vector(std::begin(hXBeta), std::end(hXBeta)); -} - -template -const std::vector ModelSpecifics::getXBetaSave() { - return std::vector(std::begin(hXBetaSave), std::end(hXBetaSave)); -} - -template -void ModelSpecifics::zeroXBeta() { - std::fill(std::begin(hXBeta), std::end(hXBeta), 0.0); -} - -template -void ModelSpecifics::saveXBeta() { - auto& xBeta = getXBeta(); - if (hXBetaSave.size() < xBeta.size()) { - hXBetaSave.resize(xBeta.size()); - } - std::copy(std::begin(xBeta), std::end(xBeta), std::begin(hXBetaSave)); -} - - - -template -void ModelSpecifics::computeXBeta(double* beta, bool useWeights) { - - if (!hXt) { - initializeMmXt(); - } - -#ifdef CYCLOPS_DEBUG_TIMING - auto start = bsccs::chrono::steady_clock::now(); -#endif - - switch(hXt->getFormatType(0)) { - case INDICATOR : - computeXBetaImpl>(beta); - break; - case SPARSE : - computeXBetaImpl>(beta); - break; - case DENSE : - computeXBetaImpl>(beta); - break; - case INTERCEPT: - break; -} - - -#ifdef CYCLOPS_DEBUG_TIMING -auto end = bsccs::chrono::steady_clock::now(); -///////////////////////////" -duration["computeXBeta "] += bsccs::chrono::duration_cast(end - start).count(); -#endif - -} - -template template -void ModelSpecifics::computeXBetaImpl(double *beta) { - - for (int k = 0; k < K; ++k) { - RealType sum = 0.0; - IteratorType it(*hXt, k); - for (; it; ++it) { - const auto j = it.index(); - sum += it.value() * beta[j]; - } - hXBeta[k] = sum; - // TODO Add back in for fastest LR -// const auto exb = std::exp(sum); -// offsExpXBeta[k] = exb; -// denomPid[k] = 1.0 + exb; - } - - // std::cerr << "did weird stuff to denomPid" << std::endl; -} - -template -bool ModelSpecifics::allocateXjY(void) { return BaseModel::precomputeGradient; } - -template -bool ModelSpecifics::allocateXjX(void) { return BaseModel::precomputeHessian; } - -template -bool ModelSpecifics::sortPid(void) { return BaseModel::sortPid; } - -template -bool ModelSpecifics::initializeAccumulationVectors(void) { return BaseModel::cumulativeGradientAndHessian; } - -template -bool ModelSpecifics::allocateNtoKIndices(void) { return BaseModel::hasNtoKIndices; } - -template -bool ModelSpecifics::hasResetableAccumulators(void) { return BaseModel::hasResetableAccumulators; } - -template template -void ModelSpecifics::axpy(RealType* y, const RealType alpha, const int index) { - IteratorType it(hX, index); - for (; it; ++it) { - const int k = it.index(); - y[k] += alpha * it.value(); - } -} - -template -void ModelSpecifics::axpyXBeta(const double beta, const int j) { - -#ifdef CYCLOPS_DEBUG_TIMING - auto start = bsccs::chrono::steady_clock::now(); -#endif - - if (beta != 0.0) { - switch (hX.getFormatType(j)) { - case INDICATOR: - axpy < IndicatorIterator > (hXBeta.data(), beta, j); - break; - case INTERCEPT: - axpy < InterceptIterator > (hXBeta.data(), beta, j); - break; - case DENSE: - axpy < DenseIterator > (hXBeta.data(), beta, j); - break; - case SPARSE: - axpy < SparseIterator > (hXBeta.data(), beta, j); - break; - } - } - -#ifdef CYCLOPS_DEBUG_TIMING - auto end = bsccs::chrono::steady_clock::now(); - ///////////////////////////" - duration["axpy "] += bsccs::chrono::duration_cast(end - start).count(); -#endif - -} - - -// ESK: Added cWeights (censoring weights) as an input -template -void ModelSpecifics::setWeights(double* inWeights, double *cenWeights, bool useCrossValidation) { - // Set K weights - if (hKWeight.size() != K) { - hKWeight.resize(K); - } - - if (useCrossValidation) { - for (size_t k = 0; k < K; ++k) { - hKWeight[k] = inWeights[k]; - } - } else { - std::fill(hKWeight.begin(), hKWeight.end(), static_cast(1)); - } - - if (initializeAccumulationVectors()) { - setPidForAccumulation(inWeights); - } - - // Set N weights (these are the same for independent data models) - if (hNWeight.size() < N + 1) { // Add +1 for extra (zero-weight stratum) - hNWeight.resize(N + 1); - } - - std::fill(hNWeight.begin(), hNWeight.end(), static_cast(0)); - for (size_t k = 0; k < K; ++k) { - RealType event = BaseModel::observationCount(hY[k]) * hKWeight[k]; - incrementByGroup(hNWeight.data(), hPid, k, event); - } - - //ESK: Compute hNtoK indices manually and create censoring weights hYWeight is isTwoWayScan - if (hYWeight.size() != K) { - hYWeight.resize(K); - } - if (hYWeightDouble.size() != K) { - hYWeightDouble.resize(K); - } - if (BaseModel::isTwoWayScan) { - hNtoK.resize(N + 1); - int n = 0; - for (size_t k = 0; k < K;) { - while (hKWeight[k] == static_cast(0)) { - ++k; - } - hNtoK[n] = k; - int currentPid = hPid[k]; - do { - ++k; - } while (k < K && (currentPid == hPid[k] || hKWeight[k] == static_cast(0))); - ++n; - } - hNtoK[n] = K; - - for (size_t k = 0; k < K; ++k) { - hYWeight[k] = cenWeights[k]; - hYWeightDouble[k] = cenWeights[k]; - } - } - -#ifdef DEBUG_COX - cerr << "Done with set weights" << endl; -#endif - -} - -template -void ModelSpecifics::computeXjY(bool useCrossValidation) { - for (size_t j = 0; j < J; ++j) { - hXjY[j] = 0; - - GenericIterator it(hX, j); - - if (useCrossValidation) { - for (; it; ++it) { - const int k = it.index(); - if (BaseModel::exactTies && hNWeight[BaseModel::getGroup(hPid, k)] > 1) { - // Do not precompute - hXjY[j] += it.value() * hY[k] * hKWeight[k]; // TODO Remove - } else if (BaseModel::isSurvivalModel) { - // ESK: Modified for suvival data - hXjY[j] += it.value() * BaseModel::observationCount(hY[k]) * hKWeight[k]; - } else { - hXjY[j] += it.value() * hY[k] * hKWeight[k]; - } - } - } else { - for (; it; ++it) { - const int k = it.index(); - if (BaseModel::exactTies && hNWeight[BaseModel::getGroup(hPid, k)] > 1) { - // Do not precompute - hXjY[j] += it.value() * hY[k]; - } else if (BaseModel::isSurvivalModel) { - // ESK: Modified for survival data - hXjY[j] += it.value() * BaseModel::observationCount(hY[k]); - } else { - hXjY[j] += it.value() * hY[k]; - } - } - } -#ifdef DEBUG_COX - cerr << "j: " << j << " = " << hXjY[j]<< endl; -#endif - } -} - -template -void ModelSpecifics::computeXjX(bool useCrossValidation) { - for (size_t j = 0; j < J; ++j) { - hXjX[j] = 0; - GenericIterator it(hX, j); - - if (useCrossValidation) { - for (; it; ++it) { - const int k = it.index(); - if (BaseModel::exactTies && hNWeight[BaseModel::getGroup(hPid, k)] > 1) { - // Do not precompute - } else { - hXjX[j] += it.value() * it.value() * hKWeight[k]; - } - } - } else { - for (; it; ++it) { - const int k = it.index(); - if (BaseModel::exactTies && hNWeight[BaseModel::getGroup(hPid, k)] > 1) { - // Do not precompute - } else { - hXjX[j] += it.value() * it.value(); - } - } - } - } -} - -template -void ModelSpecifics::computeNtoKIndices(bool useCrossValidation) { - - hNtoK.resize(N+1); - int n = 0; - for (size_t k = 0; k < K;) { - hNtoK[n] = k; - int currentPid = hPid[k]; - do { - ++k; - } while (k < K && currentPid == hPid[k]); - ++n; - } - hNtoK[n] = K; -} - -template -void ModelSpecifics::computeFixedTermsInLogLikelihood(bool useCrossValidation) { - if(BaseModel::likelihoodHasFixedTerms) { - logLikelihoodFixedTerm = static_cast(0); - bool hasOffs = hOffs.size() > 0; - if (useCrossValidation) { - for(size_t i = 0; i < K; i++) { - auto offs = hasOffs ? hOffs[i] : static_cast(0); - logLikelihoodFixedTerm += BaseModel::logLikeFixedTermsContrib(hY[i], offs, offs) * hKWeight[i]; - } - } else { - for(size_t i = 0; i < K; i++) { - auto offs = hasOffs ? hOffs[i] : static_cast(0); - logLikelihoodFixedTerm += BaseModel::logLikeFixedTermsContrib(hY[i], offs, offs); // TODO SEGV in Poisson model - } - } - } -} - -template -void ModelSpecifics::computeFixedTermsInGradientAndHessian(bool useCrossValidation) { -#ifdef CYCLOPS_DEBUG_TIMING - auto start = bsccs::chrono::steady_clock::now(); -#endif - if (sortPid()) { - doSortPid(useCrossValidation); - } - if (allocateXjY()) { - computeXjY(useCrossValidation); - } - if (allocateXjX()) { - computeXjX(useCrossValidation); - } - if (allocateNtoKIndices()) { - computeNtoKIndices(useCrossValidation); - } -#ifdef CYCLOPS_DEBUG_TIMING - auto end = bsccs::chrono::steady_clock::now(); - ///////////////////////////" - auto name = "compFixedGH "; - duration[name] += bsccs::chrono::duration_cast(end - start).count(); -#endif -} - -template -double ModelSpecifics::getLogLikelihood(bool useCrossValidation) { - -#ifdef CYCLOPS_DEBUG_TIMING - auto start = bsccs::chrono::steady_clock::now(); -#endif - - RealType logLikelihood = static_cast(0.0); - if (useCrossValidation) { - for (size_t i = 0; i < K; i++) { - logLikelihood += BaseModel::logLikeNumeratorContrib(hY[i], hXBeta[i]) * hKWeight[i]; - } - } else { - for (size_t i = 0; i < K; i++) { - logLikelihood += BaseModel::logLikeNumeratorContrib(hY[i], hXBeta[i]); - } - } - - if (BaseModel::likelihoodHasDenominator) { // Compile-time switch - if(BaseModel::cumulativeGradientAndHessian) { - for (size_t i = 0; i < N; i++) { - // Weights modified in computeNEvents() - logLikelihood -= BaseModel::logLikeDenominatorContrib(hNWeight[i], accDenomPid[i]); - } - } else { // TODO Unnecessary code duplication - for (size_t i = 0; i < N; i++) { - // Weights modified in computeNEvents() - logLikelihood -= BaseModel::logLikeDenominatorContrib(hNWeight[i], denomPid[i]); - } - } - } - - // RANGE - - if (BaseModel::likelihoodHasFixedTerms) { - logLikelihood += logLikelihoodFixedTerm; - } - -#ifdef CYCLOPS_DEBUG_TIMING - auto end = bsccs::chrono::steady_clock::now(); - ///////////////////////////" - duration["compLogLike "] += bsccs::chrono::duration_cast(end - start).count(); -#endif - - return static_cast(logLikelihood); -} - -template -double ModelSpecifics::getPredictiveLogLikelihood(double* weights) { - - std::vector saveKWeight; - if (BaseModel::cumulativeGradientAndHessian) { - - // saveKWeight = hKWeight; // make copy - if (saveKWeight.size() != K) { - saveKWeight.resize(K); - } - for (size_t k = 0; k < K; ++k) { - saveKWeight[k] = hKWeight[k]; // make copy to a double vector - } - - setPidForAccumulation(weights); - setWeights(weights, BaseModel::isTwoWayScan ? hYWeightDouble.data() : nullptr, true); // set new weights // TODO Possible error for gfr - computeRemainingStatistics(true); // compute accDenomPid - } - - RealType logLikelihood = static_cast(0.0); - - if (BaseModel::cumulativeGradientAndHessian) { - for (size_t k = 0; k < K; ++k) { - logLikelihood += BaseModel::logPredLikeContrib(hY[k], weights[k], hXBeta[k], &accDenomPid[0], hPid, k); // TODO Going to crash with ties - } - } else { // TODO Unnecessary code duplication - for (size_t k = 0; k < K; ++k) { // TODO Is index of K correct? - logLikelihood += BaseModel::logPredLikeContrib(hY[k], weights[k], hXBeta[k], &denomPid[0], hPid, k); - } - } - - if (BaseModel::cumulativeGradientAndHessian) { - setPidForAccumulation(&saveKWeight[0]); - setWeights(saveKWeight.data(), BaseModel::isTwoWayScan ? hYWeightDouble.data() : nullptr, true); // set old weights // TODO Possible error for gfr - computeRemainingStatistics(true); - } - - return static_cast(logLikelihood); -} - -template -void ModelSpecifics::getPredictiveEstimates(double* y, double* weights){ - - if (weights) { - for (size_t k = 0; k < K; ++k) { - if (weights[k]) { - y[k] = BaseModel::predictEstimate(hXBeta[k]); - } - } - } else { - for (size_t k = 0; k < K; ++k) { - y[k] = BaseModel::predictEstimate(hXBeta[k]); - } - } - // TODO How to remove code duplication above? -} - -template template -void ModelSpecifics::getSchoenfeldResidualsImpl(int index, - std::vector* residuals, - std::vector* times, - double* covariate, - double* score, - Weights w) { - - const bool hasResiduals = residuals != nullptr; - const bool hasTimes = times != nullptr; - const bool hasScore = covariate != nullptr && score != nullptr; - - if (hasResiduals) { - residuals->clear(); - } - if (hasTimes) { - times->clear(); - } - - RealType gradient = static_cast(0); - RealType hessian = static_cast(0); - - // TODO: only written for accummulive models (Cox, Fine/Grey) - - // TODO: can center covariate to avoid overflow in numerators and denominator - - // if (sparseIndices[index] == nullptr || sparseIndices[index]->size() > 0) { - - // IteratorType it(sparseIndices[index].get(), N); - IteratorType it(hX, index); - - RealType resNumerator = static_cast(0); - RealType resDenominator = static_cast(0); - - RealType scoreNumerator1 = static_cast(0); - RealType scoreNumerator2 = static_cast(0); - RealType scoreDenominator = static_cast(0); - - // find start relavent accumulator reset point - auto reset = begin(accReset); - while( *reset < it.index() ) { - ++reset; - } - - for (; it; ) { - int i = it.index(); - const RealType x = it.value(); - - if (*reset <= i) { - resNumerator = static_cast(0); - resDenominator = static_cast(0); - - scoreNumerator1 = static_cast(0); - scoreNumerator2 = static_cast(0); - scoreDenominator = static_cast(0); - - ++reset; - } - - const auto expXBeta = offsExpXBeta[i]; // std::exp(hXBeta[i]); - - resNumerator += expXBeta * x; - resDenominator += expXBeta; - - if (hY[i] == 1 && hasResiduals) { - residuals->push_back(it.value() - resNumerator / resDenominator); - if (hasTimes) { - times->push_back(hOffs[i]); - } - } - - if (hasScore) { - const auto weight = covariate[i]; - // MAS does not believe reweighing is correct, but is matching cox.zph - - // const auto xt = x * covariate[i]; - // MAS believes covariate should be adjusted - const auto numerator1 = expXBeta * x; // TODO not xt? - const auto numerator2 = expXBeta * x * x; // TODO not xt? - - if (hY[i] == 1) { - gradient += x * weight; // TODO not xt and no weight? - } - - scoreNumerator1 += numerator1; - scoreNumerator2 += numerator2; - - const auto t = scoreNumerator1 / resDenominator; - gradient -= hNWeight[i] * weight * t; - hessian += hNWeight[i] * weight * weight * (scoreNumerator2 / resDenominator - t * t); - } - - ++it; - - if (IteratorType::isSparse) { - - const int next = it ? it.index() : N; - for (++i; i < next; ++i) { - - if (*reset <= i) { - resNumerator = static_cast(0); - resDenominator = static_cast(0); - - scoreNumerator1 = static_cast(0); - scoreNumerator2 = static_cast(0); - scoreDenominator = static_cast(0); - - ++reset; - } - - const auto expXBeta = offsExpXBeta[i]; // std::exp(hXBeta[i]); - - resDenominator += expXBeta; - - if (hY[i] == 1 && hasResiduals) { - residuals->push_back(-resNumerator / resDenominator); - if (hasTimes) { - times->push_back(hOffs[i]); - } - } - - if (hasScore) { - // TODO incr gradient / Hessian - } - } - } - } - - if (hasScore) { - score[0] = static_cast(gradient); - score[1] = static_cast(hessian); - // *score = static_cast(gradient / hessian); - std::cerr << "score = " << gradient << " / " << hessian << "\n"; - // std::cerr << "hess2 = " << hess2 << " or " << static_cast(2.0) * hXjX[index] << "\n"; - } -} - -// TODO The following function is an example of a double-dispatch, rewrite without need for virtual function -template -void ModelSpecifics::computeGradientAndHessian(int index, double *ogradient, - double *ohessian, bool useWeights) { - -#ifdef CYCLOPS_DEBUG_TIMING -#ifndef CYCLOPS_DEBUG_TIMING_LOW - auto start = bsccs::chrono::steady_clock::now(); -#endif -#endif - - if (hX.getNumberOfNonZeroEntries(index) == 0) { - *ogradient = 0.0; *ohessian = 0.0; - return; - } - - // Run-time dispatch, so virtual call should not effect speed - if (useWeights) { - switch (hX.getFormatType(index)) { - case INDICATOR : - computeGradientAndHessianImpl>(index, ogradient, ohessian, weighted); - break; - case SPARSE : - computeGradientAndHessianImpl>(index, ogradient, ohessian, weighted); - break; - case DENSE : - computeGradientAndHessianImpl>(index, ogradient, ohessian, weighted); - break; - case INTERCEPT : - computeGradientAndHessianImpl>(index, ogradient, ohessian, weighted); - break; - } - } else { - switch (hX.getFormatType(index)) { - case INDICATOR : - computeGradientAndHessianImpl>(index, ogradient, ohessian, unweighted); - break; - case SPARSE : - computeGradientAndHessianImpl>(index, ogradient, ohessian, unweighted); - break; - case DENSE : - computeGradientAndHessianImpl>(index, ogradient, ohessian, unweighted); - break; - case INTERCEPT : - computeGradientAndHessianImpl>(index, ogradient, ohessian, unweighted); - break; - } - } - -#ifdef CYCLOPS_DEBUG_TIMING -#ifndef CYCLOPS_DEBUG_TIMING_LOW - auto end = bsccs::chrono::steady_clock::now(); - ///////////////////////////" - duration["compGradAndHess "] += bsccs::chrono::duration_cast(end - start).count(); -#endif -#endif - -#ifdef CYCLOPS_DEBUG_TIMING_GPU_COX - duration["CPU GH "] += bsccs::chrono::duration_cast(end - start).count(); -#endif -} - -template -std::pair operator+( - const std::pair& lhs, - const std::pair& rhs) { - - return { lhs.first + rhs.first, lhs.second + rhs.second }; -} - -template -void ModelSpecifics::computeMMGradientAndHessian( - std::vector& gh, - const std::vector& fixBeta, - bool useWeights) { - - if (norm.size() == 0) { - initializeMM(boundType, fixBeta); - } - -#ifdef CYCLOPS_DEBUG_TIMING -#ifndef CYCLOPS_DEBUG_TIMING_LOW - auto start = bsccs::chrono::steady_clock::now(); -#endif -#endif - - for (int index = 0; index < J; ++index) { - double *ogradient = &(gh[index].first); - double *ohessian = &(gh[index].second); - - if (fixBeta[index]) { - *ogradient = 0.0; *ohessian = 0.0; - } else { - - // Run-time dispatch, so virtual call should not effect speed - if (useWeights) { - switch (hX.getFormatType(index)) { - case INDICATOR : - computeMMGradientAndHessianImpl>(index, ogradient, ohessian, weighted); - break; - case SPARSE : - computeMMGradientAndHessianImpl>(index, ogradient, ohessian, weighted); - break; - case DENSE : - computeMMGradientAndHessianImpl>(index, ogradient, ohessian, weighted); - break; - case INTERCEPT : - computeMMGradientAndHessianImpl>(index, ogradient, ohessian, weighted); - break; - } - } else { - switch (hX.getFormatType(index)) { - case INDICATOR : - computeMMGradientAndHessianImpl>(index, ogradient, ohessian, unweighted); - break; - case SPARSE : - computeMMGradientAndHessianImpl>(index, ogradient, ohessian, unweighted); - break; - case DENSE : - computeMMGradientAndHessianImpl>(index, ogradient, ohessian, unweighted); - break; - case INTERCEPT : - computeMMGradientAndHessianImpl>(index, ogradient, ohessian, unweighted); - break; - } - } - } - } - -#ifdef CYCLOPS_DEBUG_TIMING -#ifndef CYCLOPS_DEBUG_TIMING_LOW - auto end = bsccs::chrono::steady_clock::now(); - ///////////////////////////" - duration["compMMGradAndHess"] += bsccs::chrono::duration_cast(end - start).count(); -#endif -#endif - -} - -template template -void ModelSpecifics::computeMMGradientAndHessianImpl(int index, double *ogradient, - double *ohessian, Weights w) { - -#ifdef CYCLOPS_DEBUG_TIMING -#ifdef CYCLOPS_DEBUG_TIMING_LOW - auto start = bsccs::chrono::steady_clock::now(); -#endif -#endif - - RealType gradient = static_cast(0); - RealType hessian = static_cast(0); - - IteratorType it(hX, index); - for (; it; ++it) { - const int k = it.index(); - - BaseModel::template incrementMMGradientAndHessian( - gradient, hessian, offsExpXBeta[k], - denomPid[BaseModel::getGroup(hPid, k)], hNWeight[BaseModel::getGroup(hPid, k)], - it.value(), hXBeta[k], hY[k], norm[k]); - } - - if (BaseModel::precomputeGradient) { // Compile-time switch - gradient -= hXjY[index]; - } - - if (BaseModel::precomputeHessian) { // Compile-time switch - hessian += static_cast(2.0) * hXjX[index]; - } - - *ogradient = static_cast(gradient); - *ohessian = static_cast(hessian); - - std::cerr << index << " " << gradient << " " << hessian << std::endl; - -#ifdef CYCLOPS_DEBUG_TIMING -#ifdef CYCLOPS_DEBUG_TIMING_LOW - auto end = bsccs::chrono::steady_clock::now(); - ///////////////////////////" - auto name = "compGradHessMM" + IteratorType::name + ""; - duration[name] += bsccs::chrono::duration_cast(end - start).count(); -#endif -#endif -} - -template template -void ModelSpecifics::computeGradientAndHessianImpl(int index, double *ogradient, - double *ohessian, Weights w) { - -#ifdef CYCLOPS_DEBUG_TIMING -#ifdef CYCLOPS_DEBUG_TIMING_LOW - auto start = bsccs::chrono::steady_clock::now(); -#endif -#endif - - RealType gradient = static_cast(0); - RealType hessian = static_cast(0); - - if (BaseModel::cumulativeGradientAndHessian) { // Compile-time switch - - if (sparseIndices[index] == nullptr || sparseIndices[index]->size() > 0) { - - IteratorType it(sparseIndices[index].get(), N); - - RealType accNumerPid = static_cast(0); - RealType accNumerPid2 = static_cast(0); - RealType decNumerPid = static_cast(0); // ESK: Competing risks contrib to gradient - RealType decNumerPid2 = static_cast(0); // ESK: Competing risks contrib to hessian - - //computeBackwardAccumlatedNumerator(it, Weights::isWeighted); // Perform inside loop rather than outside. - - // find start relavent accumulator reset point - auto reset = begin(accReset); - while( *reset < it.index() ) { - ++reset; - } - - for (; it; ) { - int i = it.index(); - - if (*reset <= i) { - accNumerPid = static_cast(0.0); - accNumerPid2 = static_cast(0.0); - ++reset; - } - - const auto numerator1 = numerPid[i]; - const auto numerator2 = numerPid2[i]; - - accNumerPid += numerator1; - accNumerPid2 += numerator2; - - // Compile-time delegation - BaseModel::incrementGradientAndHessian(it, - w, // Signature-only, for iterator-type specialization - &gradient, &hessian, accNumerPid, accNumerPid2, - accDenomPid[i], hNWeight[i], 0.0, hXBeta[i], hY[i]); // When function is in-lined, compiler will only use necessary arguments - ++it; - - if (IteratorType::isSparse) { - - const int next = it ? it.index() : N; - for (++i; i < next; ++i) { - - if (*reset <= i) { - accNumerPid = static_cast(0.0); - accNumerPid2 = static_cast(0.0); - ++reset; - } - - BaseModel::incrementGradientAndHessian(it, - w, // Signature-only, for iterator-type specialization - &gradient, &hessian, accNumerPid, accNumerPid2, - accDenomPid[i], hNWeight[i], static_cast(0), hXBeta[i], hY[i]); // When function is in-lined, compiler will only use necessary arguments - } - } - } - if (BaseModel::isTwoWayScan) { - // Manually perform backwards accumulation here instead of a separate function. - auto revIt = it.reverse(); - - // segmented prefix-scan - RealType totalNumer = static_cast(0); - RealType totalNumer2 = static_cast(0); - - auto backReset = end(accReset) - 1; - - for ( ; revIt; ) { - - int i = revIt.index(); - - if (static_cast(*backReset) == i) { - totalNumer = static_cast(0); - totalNumer2 = static_cast(0); - --backReset; - } - - totalNumer += (hY[hNtoK[i]] > static_cast(1)) ? numerPid[i] / hYWeight[hNtoK[i]] : 0; - totalNumer2 += (hY[hNtoK[i]] > static_cast(1)) ? numerPid2[i] / hYWeight[hNtoK[i]]: 0; - decNumerPid = (hY[hNtoK[i]] == static_cast(1)) ? - hYWeight[hNtoK[i]] * totalNumer : 0; - decNumerPid2 = (hY[hNtoK[i]] == static_cast(1)) ? - hYWeight[hNtoK[i]] * totalNumer2 : 0; - - - // Increase gradient and hessian by competing risks contribution - BaseModel::incrementGradientAndHessian(it, - w, // Signature-only, for iterator-type specialization - &gradient, &hessian, decNumerPid, decNumerPid2, - accDenomPid[i], hNWeight[i], static_cast(0), hXBeta[i], - hY[i]); // When function is in-lined, compiler will only use necess - --revIt; - - if (IteratorType::isSparse) { - - const int next = revIt ? revIt.index() : -1; - for (--i; i > next; --i) { - //if (*reset <= i) { // TODO MAS: This is not correct (only need for stratified models) - // accNumerPid = static_cast(0); - // accNumerPid2 = static_cast(0); - // ++reset; - //} - - decNumerPid = (hY[hNtoK[i]] == static_cast(1)) ? hYWeight[hNtoK[i]] * totalNumer : 0; - decNumerPid2 = (hY[hNtoK[i]] == static_cast(1)) ? hYWeight[hNtoK[i]] * totalNumer2 : 0; - BaseModel::incrementGradientAndHessian(it, w, // Signature-only, for iterator-type specialization - &gradient, &hessian, decNumerPid, decNumerPid2, - accDenomPid[i], hNWeight[i], static_cast(0), - hXBeta[i], - hY[i]); // When function is in-lined, compiler will only use necess - } - } - } // End two-way scan - } // End backwards iterator - } - - } else if (BaseModel::hasIndependentRows) { - - IteratorType it(hX, index); - - for (; it; ++it) { - const int i = it.index(); - - RealType numerator1 = BaseModel::gradientNumeratorContrib(it.value(), offsExpXBeta[i], hXBeta[i], hY[i]); - RealType numerator2 = (!IteratorType::isIndicator && BaseModel::hasTwoNumeratorTerms) ? - BaseModel::gradientNumerator2Contrib(it.value(), offsExpXBeta[i]) : static_cast(0); - - // Compile-time delegation - BaseModel::incrementGradientAndHessian(it, - w, // Signature-only, for iterator-type specialization - &gradient, &hessian, numerator1, numerator2, - denomPid[i], hNWeight[i], it.value(), hXBeta[i], hY[i]); // When function is in-lined, compiler will only use necessary arguments - } - - } else if (BaseModel::exactCLR) { - //tbb::mutex mutex0; - -#ifdef USE_RCPP_PARALLEL - - tbb::combinable newGrad(static_cast(0)); - tbb::combinable newHess(static_cast(0)); - - auto func = [&,index](const tbb::blocked_range& range){ - - using std::isinf; - - for (int i = range.begin(); i < range.end(); ++i) { - DenseView x(IteratorType(hX, index), hNtoK[i], hNtoK[i+1]); - int numSubjects = hNtoK[i+1] - hNtoK[i]; - int numCases = hNWeight[i]; - std::vector value = computeHowardRecursion(offsExpXBeta.begin() + hNtoK[i], x, numSubjects, numCases); - if (value[0]==0 || value[1] == 0 || value[2] == 0 || isinf(value[0]) || isinf(value[1]) || isinf(value[2])) { - DenseView newX(IteratorType(hX, index), hNtoK[i], hNtoK[i+1]); - std::vector value = computeHowardRecursion(offsExpXBeta.begin() + hNtoK[i], newX, numSubjects, numCases);//, threadPool);//, hY.begin() + hNtoK[i]); - using namespace sugar; - //mutex0.lock(); - newGrad.local() -= (RealType)(-value[1]/value[0]); - newHess.local() -= (RealType)((value[1]/value[0]) * (value[1]/value[0]) - value[2]/value[0]); - //mutex0.unlock(); - continue; - } - //mutex0.lock(); - newGrad.local() -= (RealType)(-value[1]/value[0]); - newHess.local() -= (RealType)((value[1]/value[0]) * (value[1]/value[0]) - value[2]/value[0]); - //mutex0.unlock(); - } - }; - tbb::parallel_for(tbb::blocked_range(0,N),func); - gradient += newGrad.combine([](const RealType& x, const RealType& y) {return x+y;}); - hessian += newHess.combine([](const RealType& x, const RealType& y) {return x+y;}); -#else - - using std::isinf; - - for (int i=0; i x(IteratorType(hX, index), hNtoK[i], hNtoK[i+1]); - int numSubjects = hNtoK[i+1] - hNtoK[i]; - int numCases = hNWeight[i]; - - std::vector value = computeHowardRecursion(offsExpXBeta.begin() + hNtoK[i], x, numSubjects, numCases);//, threadPool);//, hY.begin() + hNtoK[i]); - //std::cout<<" values" << i <<": "< newX(IteratorType(hX, index), hNtoK[i], hNtoK[i+1]); - std::vector value = computeHowardRecursion(offsExpXBeta.begin() + hNtoK[i], newX, numSubjects, numCases);//, threadPool);//, hY.begin() + hNtoK[i]); - using namespace sugar; - gradient -= (RealType)(-value[1]/value[0]); - hessian -= (RealType)((value[1]/value[0]) * (value[1]/value[0]) - value[2]/value[0]); - continue; - } - //gradient -= (RealType)(value[3] - value[1]/value[0]); - gradient -= (RealType)(-value[1]/value[0]); - hessian -= (RealType)((value[1]/value[0]) * (value[1]/value[0]) - value[2]/value[0]); - } -#endif // USE_RCPP_PARALLEL - - } else { - -// auto rangeKey = helper::dependent::getRangeKey( -// hX, index, hPid, -// typename IteratorType::tag()); -// -// auto rangeXNumerator = helper::dependent::getRangeX( -// hX, index, offsExpXBeta, -// typename IteratorType::tag()); -// -// auto rangeGradient = helper::dependent::getRangeGradient( -// sparseIndices[index].get(), N, // runtime error: reference binding to null pointer of type 'struct vector' -// denomPid, hNWeight, -// typename IteratorType::tag()); -// -// const auto result = variants::trial::nested_reduce( -// rangeKey.begin(), rangeKey.end(), // key -// rangeXNumerator.begin(), // inner -// rangeGradient.begin(), // outer -// std::pair{0,0}, Fraction{0,0}, -// TestNumeratorKernel(), // Inner transform-reduce -// TestGradientKernel()); // Outer transform-reduce -// -// gradient = result.real(); -// hessian = result.imag(); - - IteratorType it(hX, index); - const int end = it.size() - 1; - - RealType numerator1 = static_cast(0); - RealType numerator2 = static_cast(0); - int key = hPid[it.index()]; - -// auto incrementNumerators2 = [this](const typename IteratorType::Index i, const typename IteratorType::ValueType x, -// const RealTypePair lhs) -> RealTypePair { -// -// const auto linearPredictor = offsExpXBeta[i]; -// return { -// lhs.first + BaseModel::gradientNumeratorContrib(x, linearPredictor, static_cast(0), static_cast(0)), -// lhs.second + (!IteratorType::isIndicator && BaseModel::hasTwoNumeratorTerms ? -// BaseModel::gradientNumerator2Contrib(x, linearPredictor) : -// static_cast(0)) -// }; -// }; - - auto incrementNumerators = [this,&it,&numerator1,&numerator2]() { - const int i = it.index(); - - numerator1 += BaseModel::gradientNumeratorContrib(it.value(), offsExpXBeta[i], static_cast(0), static_cast(0)); - numerator2 += (!IteratorType::isIndicator && BaseModel::hasTwoNumeratorTerms) ? - BaseModel::gradientNumerator2Contrib(it.value(), offsExpXBeta[i]) : - static_cast(0); - }; - - for ( ; it.inRange(end); ++it) { - incrementNumerators(); - - const int nextKey = hPid[it.nextIndex()]; - if (key != nextKey) { - - BaseModel::incrementGradientAndHessian(it, w, // Signature-only, for iterator-type specialization - &gradient, &hessian, numerator1, numerator2, - denomPid[key], hNWeight[key], 0, 0, 0); // When function is in-lined, compiler will only use necessary arguments - - numerator1 = static_cast(0); - numerator2 = static_cast(0); - - key = nextKey; - } - } - - incrementNumerators(); - - BaseModel::incrementGradientAndHessian(it, w, // Signature-only, for iterator-type specialization - &gradient, &hessian, numerator1, numerator2, - denomPid[key], hNWeight[key], 0, 0, 0); // When function is in-lined, compiler will only use necessary arguments - } - - if (BaseModel::precomputeGradient) { // Compile-time switch - gradient -= hXjY[index]; - } - - if (BaseModel::precomputeHessian) { // Compile-time switch - hessian += static_cast(2.0) * hXjX[index]; - } - - std::cerr << "gradient = " << gradient << " @ " << index << "\n"; - std::cerr << "hessian = " << hessian << " @ " << index << "\n"; - - *ogradient = static_cast(gradient); - *ohessian = static_cast(hessian); - -#ifdef CYCLOPS_DEBUG_TIMING -#ifdef CYCLOPS_DEBUG_TIMING_LOW - auto end = bsccs::chrono::steady_clock::now(); - ///////////////////////////" - auto name = "compGradHess" + IteratorType::name + " "; - duration[name] += bsccs::chrono::duration_cast(end - start).count(); -#endif -#endif - -} - -template -void ModelSpecifics::computeThirdDerivative(int index, double *othird, bool useWeights) { - -#ifdef CYCLOPS_DEBUG_TIMING -#ifndef CYCLOPS_DEBUG_TIMING_LOW - auto start = bsccs::chrono::steady_clock::now(); -#endif -#endif - - if (hX.getNumberOfNonZeroEntries(index) == 0) { - *othird = 0.0; - return; - } - - // Run-time dispatch, so virtual call should not effect speed - if (useWeights) { - switch (hX.getFormatType(index)) { - case INDICATOR : - computeThirdDerivativeImpl>(index, othird, weighted); - break; - case SPARSE : - computeThirdDerivativeImpl>(index, othird, weighted); - break; - case DENSE : - computeThirdDerivativeImpl>(index, othird, weighted); - break; - case INTERCEPT : - computeThirdDerivativeImpl>(index, othird, weighted); - break; - } - } else { - switch (hX.getFormatType(index)) { - case INDICATOR : - computeThirdDerivativeImpl>(index, othird, unweighted); - break; - case SPARSE : - computeThirdDerivativeImpl>(index, othird, unweighted); - break; - case DENSE : - computeThirdDerivativeImpl>(index, othird, unweighted); - break; - case INTERCEPT : - computeThirdDerivativeImpl>(index, othird, unweighted); - break; - } - } - -#ifdef CYCLOPS_DEBUG_TIMING -#ifndef CYCLOPS_DEBUG_TIMING_LOW - auto end = bsccs::chrono::steady_clock::now(); - ///////////////////////////" - duration["compThird "] += bsccs::chrono::duration_cast(end - start).count(); -#endif -#endif -} - -template template -void ModelSpecifics::computeThirdDerivativeImpl(int index, double *othird, Weights w) { - -#ifdef CYCLOPS_DEBUG_TIMING -#ifdef CYCLOPS_DEBUG_TIMING_LOW - auto start = bsccs::chrono::steady_clock::now(); -#endif -#endif - - RealType third = static_cast(0); - - if (BaseModel::cumulativeGradientAndHessian) { // Compile-time switch - - if (sparseIndices[index] == nullptr || sparseIndices[index]->size() > 0) { - - IteratorType it(sparseIndices[index].get(), N); - - RealType accNumerPid = static_cast(0); - RealType accNumerPid2 = static_cast(0); - - // find start relavent accumulator reset point - auto reset = begin(accReset); - while( *reset < it.index() ) { - ++reset; - } - - for (; it; ) { - int i = it.index(); - - if (*reset <= i) { - accNumerPid = static_cast(0.0); - accNumerPid2 = static_cast(0.0); - ++reset; - } - - const auto numerator1 = numerPid[i]; - const auto numerator2 = numerPid2[i]; - - accNumerPid += numerator1; - accNumerPid2 += numerator2; - - // Compile-time delegation - BaseModel::incrementThirdDerivative(it, - w, // Signature-only, for iterator-type specialization - &third, accNumerPid, accNumerPid2, - accDenomPid[i], hNWeight[i], 0.0, hXBeta[i], hY[i]); // When function is in-lined, compiler will only use necessary arguments - ++it; - - if (IteratorType::isSparse) { - - const int next = it ? it.index() : N; - for (++i; i < next; ++i) { - - if (*reset <= i) { - accNumerPid = static_cast(0.0); - accNumerPid2 = static_cast(0.0); - ++reset; - } - - BaseModel::incrementThirdDerivative(it, - w, // Signature-only, for iterator-type specialization - &third, accNumerPid, accNumerPid2, - accDenomPid[i], hNWeight[i], static_cast(0), hXBeta[i], hY[i]); // When function is in-lined, compiler will only use necessary arguments - } - } - } - } else { - throw new std::logic_error("Not yet support"); - } - } - - *othird = static_cast(third); - -#ifdef CYCLOPS_DEBUG_TIMING -#ifdef CYCLOPS_DEBUG_TIMING_LOW - auto end = bsccs::chrono::steady_clock::now(); - ///////////////////////////" - auto name = "compThird" + IteratorType::name + " "; - duration[name] += bsccs::chrono::duration_cast(end - start).count(); -#endif -#endif -} - -template -void ModelSpecifics::computeSchoenfeldResiduals(int indexOne, - std::vector* residuals, - std::vector* otimes, - double* covariate, - double* score, - bool useWeights) { - if (useWeights) { - throw new std::logic_error("Weights are not yet implemented in Schoenfeld residual calculations"); - } else { // no weights - switch (hX.getFormatType(indexOne)) { - case INDICATOR : - getSchoenfeldResidualsImpl>(indexOne, residuals, otimes, covariate, score, weighted); - break; - case SPARSE : - getSchoenfeldResidualsImpl>(indexOne, residuals, otimes, covariate, score, weighted); - break; - case DENSE : - getSchoenfeldResidualsImpl>(indexOne, residuals, otimes, covariate, score, weighted); - break; - case INTERCEPT : - getSchoenfeldResidualsImpl>(indexOne, residuals, otimes, covariate, score, weighted); - break; - } - } -} - - -template -void ModelSpecifics::computeFisherInformation(int indexOne, int indexTwo, - double *oinfo, bool useWeights) { - - if (useWeights) { - throw new std::logic_error("Weights are not yet implemented in Fisher Information calculations"); - } else { // no weights - switch (hX.getFormatType(indexOne)) { - case INDICATOR : - dispatchFisherInformation>(indexOne, indexTwo, oinfo, weighted); - break; - case SPARSE : - dispatchFisherInformation>(indexOne, indexTwo, oinfo, weighted); - break; - case DENSE : - dispatchFisherInformation>(indexOne, indexTwo, oinfo, weighted); - break; - case INTERCEPT : - dispatchFisherInformation>(indexOne, indexTwo, oinfo, weighted); - break; - } - } -} - -template template -void ModelSpecifics::dispatchFisherInformation(int indexOne, int indexTwo, double *oinfo, Weights w) { - switch (hX.getFormatType(indexTwo)) { - case INDICATOR : - computeFisherInformationImpl>(indexOne, indexTwo, oinfo, w); - break; - case SPARSE : - computeFisherInformationImpl>(indexOne, indexTwo, oinfo, w); - break; - case DENSE : - computeFisherInformationImpl>(indexOne, indexTwo, oinfo, w); - break; - case INTERCEPT : - computeFisherInformationImpl>(indexOne, indexTwo, oinfo, w); - break; - } -// std::cerr << "End of dispatch" << std::endl; -} - - -template template -SparseIterator ModelSpecifics::getSubjectSpecificHessianIterator(int index) { - - if (hessianSparseCrossTerms.find(index) == hessianSparseCrossTerms.end()) { - - auto indices = make_shared >(); - auto values = make_shared >(); - - CDCPtr column = bsccs::make_shared>(indices, values, SPARSE); - hessianSparseCrossTerms.insert(std::make_pair(index, column)); - - IteratorType itCross(hX, index); - for (; itCross;) { - RealType value = 0.0; - int currentPid = hPid[itCross.index()]; // TODO Need to fix for stratified Cox - do { - const int k = itCross.index(); - value += BaseModel::gradientNumeratorContrib(itCross.value(), - offsExpXBeta[k], hXBeta[k], hY[k]); - ++itCross; - } while (itCross && currentPid == hPid[itCross.index()]); // TODO Need to fix for stratified Cox - indices->push_back(currentPid); - values->push_back(value); - } - } - return SparseIterator(*hessianSparseCrossTerms[index]); - -} - -template template -void ModelSpecifics::computeFisherInformationImpl(int indexOne, int indexTwo, double *oinfo, Weights w) { - - IteratorTypeOne itOne(hX, indexOne); - IteratorTypeTwo itTwo(hX, indexTwo); - PairProductIterator it(itOne, itTwo); - - RealType information = static_cast(0); - for (; it.valid(); ++it) { - const int k = it.index(); - // Compile-time delegation - - BaseModel::incrementFisherInformation(it, - w, // Signature-only, for iterator-type specialization - &information, - offsExpXBeta[k], - 0.0, 0.0, // numerPid[k], numerPid2[k], // remove - denomPid[BaseModel::getGroup(hPid, k)], - hKWeight[k], it.value(), hXBeta[k], hY[k]); // When function is in-lined, compiler will only use necessary arguments - } - - if (BaseModel::hasStrataCrossTerms) { - - // Check if index is pre-computed -//#define USE_DENSE -#ifdef USE_DENSE - if (hessianCrossTerms.find(indexOne) == hessianCrossTerms.end()) { - // Make new - std::vector crossOneTerms(N); - IteratorTypeOne crossOne(modelData, indexOne); - for (; crossOne; ++crossOne) { - const int k = crossOne.index(); - incrementByGroup(crossOneTerms.data(), hPid, k, - BaseModel::gradientNumeratorContrib(crossOne.value(), offsExpXBeta[k], hXBeta[k], hY[k])); - } - hessianCrossTerms[indexOne]; - hessianCrossTerms[indexOne].swap(crossOneTerms); - } - std::vector& crossOneTerms = hessianCrossTerms[indexOne]; - - // TODO Remove code duplication - if (hessianCrossTerms.find(indexTwo) == hessianCrossTerms.end()) { - std::vector crossTwoTerms(N); - IteratorTypeTwo crossTwo(modelData, indexTwo); - for (; crossTwo; ++crossTwo) { - const int k = crossTwo.index(); - incrementByGroup(crossTwoTerms.data(), hPid, k, - BaseModel::gradientNumeratorContrib(crossTwo.value(), offsExpXBeta[k], hXBeta[k], hY[k])); - } - hessianCrossTerms[indexTwo]; - hessianCrossTerms[indexTwo].swap(crossTwoTerms); - } - std::vector& crossTwoTerms = hessianCrossTerms[indexTwo]; - - // TODO Sparse loop - real cross = 0.0; - for (int n = 0; n < N; ++n) { - cross += crossOneTerms[n] * crossTwoTerms[n] / (denomPid[n] * denomPid[n]); - } - information -= cross; -#else - SparseIterator sparseCrossOneTerms = getSubjectSpecificHessianIterator(indexOne); - SparseIterator sparseCrossTwoTerms = getSubjectSpecificHessianIterator(indexTwo); - PairProductIterator,SparseIterator,RealType> itSparseCross(sparseCrossOneTerms, sparseCrossTwoTerms); - - RealType sparseCross = static_cast(0); - for (; itSparseCross.valid(); ++itSparseCross) { - const int n = itSparseCross.index(); - sparseCross += itSparseCross.value() / (denomPid[n] * denomPid[n]); - } - information -= sparseCross; -#endif - } - - *oinfo = static_cast(information); -} - -template -void ModelSpecifics::computeNumeratorForGradient(int index, bool useWeights) { - -#ifdef CYCLOPS_DEBUG_TIMING -#ifndef CYCLOPS_DEBUG_TIMING_LOW - auto start = bsccs::chrono::steady_clock::now(); -#endif -#endif - - if (BaseModel::hasNtoKIndices || BaseModel::cumulativeGradientAndHessian) { - - switch (hX.getFormatType(index)) { - case INDICATOR : { - IndicatorIterator itI(*(sparseIndices)[index]); - for (; itI; ++itI) { // Only affected entries - numerPid[itI.index()] = static_cast(0.0); - } -/* - zeroVector(numerPid.data(), N); - if (BaseModel::efron) { - zeroVector(numerPid3.data(), N); - } -*/ - if (useWeights) { - incrementNumeratorForGradientImpl, WeightedOperation>(index); - } else { - incrementNumeratorForGradientImpl, UnweightedOperation>(index); - } - break; - } - case SPARSE : { - SparseIterator itS(*(sparseIndices)[index]); - for (; itS; ++itS) { // Only affected entries - numerPid[itS.index()] = static_cast(0.0); - if (BaseModel::hasTwoNumeratorTerms) { // Compile-time switch - numerPid2[itS.index()] = static_cast(0.0); // TODO Does this invalid the cache line too much? - } - } -/* - zeroVector(numerPid.data(), N); - if (BaseModel::hasTwoNumeratorTerms) { // Compile-time switch - zeroVector(numerPid2.data(), N); - } - if (BaseModel::efron) { - zeroVector(numerPid3.data(), N); - zeroVector(numerPid4.data(), N); - } -*/ - if (useWeights) { - incrementNumeratorForGradientImpl, WeightedOperation>(index); - } else { - incrementNumeratorForGradientImpl, UnweightedOperation>(index); - } - break; - } - case DENSE : { - zeroVector(numerPid.data(), N); - if (BaseModel::hasTwoNumeratorTerms) { // Compile-time switch - zeroVector(numerPid2.data(), N); - } -/* - if (BaseModel::efron) { - zeroVector(numerPid3.data(), N); - zeroVector(numerPid4.data(), N); - } -*/ - if (useWeights) { - incrementNumeratorForGradientImpl, WeightedOperation>(index); - } else { - incrementNumeratorForGradientImpl, UnweightedOperation>(index); - } - break; - } - case INTERCEPT : { - zeroVector(numerPid.data(), N); - if (BaseModel::hasTwoNumeratorTerms) { // Compile-time switch - zeroVector(numerPid2.data(), N); - } -/* - if (BaseModel::efron) { - zeroVector(numerPid3.data(), N); - zeroVector(numerPid4.data(), N); - } -*/ - if (useWeights) { - incrementNumeratorForGradientImpl, WeightedOperation>(index); - } else { - incrementNumeratorForGradientImpl, UnweightedOperation>(index); - } - break; - } - default : break; - } - } - -#ifdef CYCLOPS_DEBUG_TIMING -#ifndef CYCLOPS_DEBUG_TIMING_LOW - auto end = bsccs::chrono::steady_clock::now(); - ///////////////////////////" - duration["compNumForGrad "] += bsccs::chrono::duration_cast(end - start).count();; -#endif -#endif - -#ifdef CYCLOPS_DEBUG_TIMING_GPU_COX - duration["CPU GH "] += bsccs::chrono::duration_cast(end - start).count(); -#endif -} - -template template -void ModelSpecifics::incrementNumeratorForGradientImpl(int index) { - -#ifdef CYCLOPS_DEBUG_TIMING -#ifdef CYCLOPS_DEBUG_TIMING_LOW - auto start = bsccs::chrono::steady_clock::now(); -#endif -#endif - - IteratorType it(hX, index); - for (; it; ++it) { - const int k = it.index(); - incrementByGroup(numerPid.data(), hPid, k, - Weights::isWeighted ? - hKWeight[k] * BaseModel::gradientNumeratorContrib(it.value(), offsExpXBeta[k], hXBeta[k], hY[k]) : - BaseModel::gradientNumeratorContrib(it.value(), offsExpXBeta[k], hXBeta[k], hY[k]) - ); - if (!IteratorType::isIndicator && BaseModel::hasTwoNumeratorTerms) { - incrementByGroup(numerPid2.data(), hPid, k, - Weights::isWeighted ? - hKWeight[k] * BaseModel::gradientNumerator2Contrib(it.value(), offsExpXBeta[k]) : - BaseModel::gradientNumerator2Contrib(it.value(), offsExpXBeta[k]) - ); - } -/* - if (BaseModel::efron) { - incrementByGroup(numerPid3.data(), hPid, k, - Weights::isWeighted ? - hKWeight[k] * hY[k] * BaseModel::gradientNumeratorContrib(it.value(), offsExpXBeta[k], hXBeta[k], hY[k]) : - hY[k] * BaseModel::gradientNumeratorContrib(it.value(), offsExpXBeta[k], hXBeta[k], hY[k]) - ); - } - if (!IteratorType::isIndicator && BaseModel::efron) { - incrementByGroup(numerPid4.data(), hPid, k, - Weights::isWeighted ? - hKWeight[k] * hY[k] * BaseModel::gradientNumerator2Contrib(it.value(), offsExpXBeta[k]) : - hY[k] * BaseModel::gradientNumerator2Contrib(it.value(), offsExpXBeta[k]) - ); - } -*/ - } -#ifdef CYCLOPS_DEBUG_TIMING -#ifdef CYCLOPS_DEBUG_TIMING_LOW - auto end = bsccs::chrono::steady_clock::now(); - ///////////////////////////" - auto name = "compNumGrad" + IteratorType::name + " "; - duration[name] += bsccs::chrono::duration_cast(end - start).count(); -#endif -#endif - -} - -template -void ModelSpecifics::updateXBeta(double delta, int index, bool useWeights) { - -#ifdef CYCLOPS_DEBUG_TIMING -#ifndef CYCLOPS_DEBUG_TIMING_LOW - auto start = bsccs::chrono::steady_clock::now(); -#endif -#endif - - RealType realDelta = static_cast(delta); - - // Run-time dispatch to implementation depending on covariate FormatType - switch(hX.getFormatType(index)) { - case INDICATOR : { - if (useWeights) { - updateXBetaImpl, WeightedOperation>(realDelta, index); - } else { - updateXBetaImpl, UnweightedOperation>(realDelta, index); - } - break; - } - case SPARSE : { - if (useWeights) { - updateXBetaImpl, WeightedOperation>(realDelta, index); - } else { - updateXBetaImpl, UnweightedOperation>(realDelta, index); - } - break; - } - case DENSE : { - if (useWeights) { - updateXBetaImpl, WeightedOperation>(realDelta, index); - } else { - updateXBetaImpl, UnweightedOperation>(realDelta, index); - } - break; - } - case INTERCEPT : { - if (useWeights) { - updateXBetaImpl, WeightedOperation>(realDelta, index); - } else { - updateXBetaImpl, UnweightedOperation>(realDelta, index); - } - break; - } - default : break; - } - -#ifdef CYCLOPS_DEBUG_TIMING -#ifndef CYCLOPS_DEBUG_TIMING_LOW - auto end = bsccs::chrono::steady_clock::now(); - ///////////////////////////" - duration["updateXBeta "] += bsccs::chrono::duration_cast(end - start).count(); -#endif -#endif - -#ifdef CYCLOPS_DEBUG_TIMING_GPU_COX - duration["CPU GH "] += bsccs::chrono::duration_cast(end - start).count(); -#endif - -} - -template template -inline void ModelSpecifics::updateXBetaImpl(RealType realDelta, int index) { - -#ifdef CYCLOPS_DEBUG_TIMING -#ifdef CYCLOPS_DEBUG_TIMING_LOW - auto start = bsccs::chrono::steady_clock::now(); -#endif -#endif - - IteratorType it(hX, index); - - if (BaseModel::cumulativeGradientAndHessian) { // cox - for (; it; ++it) { - const int k = it.index(); - hXBeta[k] += realDelta * it.value(); // TODO Check optimization with indicator and intercept - - if (BaseModel::likelihoodHasDenominator) { // Compile-time switch - RealType oldEntry = Weights::isWeighted ? - hKWeight[k] * offsExpXBeta[k] : offsExpXBeta[k]; // TODO Delegate condition to forming offExpXBeta - offsExpXBeta[k] = BaseModel::getOffsExpXBeta(hOffs.data(), hXBeta[k], hY[k], k); // Update offsExpXBeta - RealType newEntry = Weights::isWeighted ? - hKWeight[k] * offsExpXBeta[k] : offsExpXBeta[k]; // TODO Delegate condition - incrementByGroup(denomPid.data(), hPid, k, (newEntry - oldEntry)); // Update denominators - } - } - } else { - for (; it; ++it) { - const int k = it.index(); - hXBeta[k] += realDelta * it.value(); // TODO Check optimization with indicator and intercept - - if (BaseModel::likelihoodHasDenominator) { // Compile-time switch - RealType oldEntry = offsExpXBeta[k]; - RealType newEntry = offsExpXBeta[k] = BaseModel::getOffsExpXBeta(hOffs.data(), hXBeta[k], hY[k], k); - incrementByGroup(denomPid.data(), hPid, k, (newEntry - oldEntry)); -/* - if (BaseModel::efron) { - incrementByGroup(denomPid2.data(), hPid, k, hY[k]*(newEntry - oldEntry)); // Update denominators - } -*/ - } - } - } - - computeAccumlatedDenominator(Weights::isWeighted); - -#ifdef CYCLOPS_DEBUG_TIMING -#ifdef CYCLOPS_DEBUG_TIMING_LOW - auto end = bsccs::chrono::steady_clock::now(); - ///////////////////////////" - auto name = "updateXBeta" + IteratorType::name + " "; - duration[name] += bsccs::chrono::duration_cast(end - start).count(); -#endif -#endif - -} - -template template -void ModelSpecifics::computeRemainingStatisticsImpl() { - - auto& xBeta = getXBeta(); - - if (BaseModel::likelihoodHasDenominator) { - fillVector(denomPid.data(), N, BaseModel::getDenomNullValue()); -/* - if (BaseModel::efron) { - fillVector(denomPid2.data(), N, BaseModel::getDenomNullValue()); - } -*/ - if (BaseModel::cumulativeGradientAndHessian) { // cox - for (size_t k = 0; k < K; ++k) { - offsExpXBeta[k] = BaseModel::getOffsExpXBeta(hOffs.data(), xBeta[k], hY[k], k); - RealType weightoffsExpXBeta = Weights::isWeighted ? - hKWeight[k] * BaseModel::getOffsExpXBeta(hOffs.data(), xBeta[k], hY[k], k) : - BaseModel::getOffsExpXBeta(hOffs.data(), xBeta[k], hY[k], k); - incrementByGroup(denomPid.data(), hPid, k, weightoffsExpXBeta); // Update denominators - } - } else { - for (size_t k = 0; k < K; ++k) { - offsExpXBeta[k] = BaseModel::getOffsExpXBeta(hOffs.data(), xBeta[k], hY[k], k); - incrementByGroup(denomPid.data(), hPid, k, offsExpXBeta[k]); -/* - if (BaseModel::efron) { - incrementByGroup(denomPid2.data(), hPid, k, hY[k]*offsExpXBeta[k]); // Update denominators - } -*/ - } - } - computeAccumlatedDenominator(Weights::isWeighted); - } -} - -template -void ModelSpecifics::computeRemainingStatistics(bool useWeights) { - -#ifdef CYCLOPS_DEBUG_TIMING - auto start = bsccs::chrono::steady_clock::now(); -#endif - - if (useWeights) { - computeRemainingStatisticsImpl(); - } else { - computeRemainingStatisticsImpl(); - } - -#ifdef CYCLOPS_DEBUG_TIMING - auto end = bsccs::chrono::steady_clock::now(); - ///////////////////////////" - duration["compRS "] += bsccs::chrono::duration_cast(end - start).count();; -#endif - -} - -template -void ModelSpecifics::computeAccumlatedNumerator(bool useWeights) { - - if (BaseModel::likelihoodHasDenominator && //The two switches should ideally be separated - BaseModel::cumulativeGradientAndHessian) { // Compile-time switch - - if (accNumerPid.size() != N) { - accNumerPid.resize(N, static_cast(0)); - } - if (accNumerPid2.size() != N) { - accNumerPid2.resize(N, static_cast(0)); - } - - // segmented prefix-scan - RealType totalNumer = static_cast(0); - RealType totalNumer2 = static_cast(0); - - auto reset = begin(accReset); - - for (size_t i = 0; i < N; ++i) { - - if (static_cast(*reset) == i) { - totalNumer = static_cast(0); - totalNumer2 = static_cast(0); - ++reset; - } - - totalNumer += numerPid[i]; - totalNumer2 += numerPid2[i]; - accNumerPid[i] = totalNumer; - accNumerPid2[i] = totalNumer2; - } - } -} - -template -void ModelSpecifics::computeAccumlatedDenominator(bool useWeights) { - - if (BaseModel::likelihoodHasDenominator && //The two switches should ideally be separated - BaseModel::cumulativeGradientAndHessian) { // Compile-time switch - - - if (accDenomPid.size() != (N + 1)) { - accDenomPid.resize(N + 1, static_cast(0)); - } - - // segmented prefix-scan - RealType totalDenom = static_cast(0); - - auto reset = begin(accReset); - - for (size_t i = 0; i < N; ++i) { - - if (static_cast(*reset) == i) { // TODO Check with SPARSE - totalDenom = static_cast(0); - ++reset; - } - - totalDenom += denomPid[i]; - accDenomPid[i] = totalDenom; - } - //ESK : Incorporate backwards scan here: - if (BaseModel::isTwoWayScan) { - // RealType backDenom = static_cast(0); - RealType totalDenom = static_cast(0); - - auto reset = begin(accReset); - - //Q: How can we change int to size_t w/o errors - for (int i = (N - 1); i >= 0; i--) { - - if (static_cast(*reset) == i) { // TODO Check with SPARSE - totalDenom = static_cast(0); - ++reset; - } - - totalDenom += (hY[hNtoK[i]] > static_cast(1)) ? denomPid[i] / hYWeight[hNtoK[i]] : 0; - accDenomPid[i] += (hY[hNtoK[i]] == static_cast(1)) ? hYWeight[hNtoK[i]] * totalDenom : 0; - } - } // End two-way scan - } - -} - -// ESK: This is no longer needed. Incorporated in incrementGradientAndHessian -template template -void ModelSpecifics::computeBackwardAccumlatedNumerator( - IteratorType it, - bool useWeights) { - - if (decNumerPid.size() != N) { - decNumerPid.resize(N, static_cast(0)); - } - if (decNumerPid2.size() != N) { - decNumerPid2.resize(N, static_cast(0)); - } - auto revIt = it.reverse(); - - // segmented prefix-scan - RealType totalNumer = static_cast(0); - RealType totalNumer2 = static_cast(0); - - auto reset = end(accReset) - 1; - - for ( ; revIt; ) { - - int i = revIt.index(); - - if (static_cast(*reset) == i) { - totalNumer = static_cast(0); - totalNumer2 = static_cast(0); - --reset; - } - - totalNumer += (hY[hNtoK[i]] > static_cast(1)) ? numerPid[i] / hYWeight[hNtoK[i]] : 0; - totalNumer2 += (hY[hNtoK[i]] > static_cast(1)) ? numerPid2[i] / hYWeight[hNtoK[i]] : 0; - decNumerPid[i] = (hY[hNtoK[i]] == static_cast(1)) ? hYWeight[hNtoK[i]] * totalNumer : 0; - decNumerPid2[i] = (hY[hNtoK[i]] == static_cast(1)) ? hYWeight[hNtoK[i]] * totalNumer2 : 0; - --revIt; - - if (IteratorType::isSparse) { - - const int next = revIt ? revIt.index() : -1; - for (--i; i > next; --i) { // TODO MAS: This may be incorrect - //if (*reset <= i) { // TODO MAS: This is not correct (only need for stratifed models) - // accNumerPid = static_cast(0); - // accNumerPid2 = static_cast(0); - // ++reset; - //} - - // ESK: Start implementing sparse - decNumerPid[i] = (hY[hNtoK[i]] == static_cast(1)) ? hYWeight[hNtoK[i]] * totalNumer : 0; - decNumerPid2[i] = (hY[hNtoK[i]] == static_cast(1)) ? hYWeight[hNtoK[i]] * totalNumer2 : 0; - } - } - } -} - -// ESK: This is no longer needed. Incorporated in computeAccumlatedDenominator -template -void ModelSpecifics::computeBackwardAccumlatedDenominator(bool useWeights) { - - if (BaseModel::likelihoodHasDenominator && //The two switches should ideally be separated - BaseModel::cumulativeGradientAndHessian) { // Compile-time switch - - // segmented prefix-scan - RealType backDenom = static_cast(0); - RealType totalDenom = static_cast(0); - - auto reset = begin(accReset); - - //Q: How can we change int to size_t w/o errors - for (int i = (N - 1); i >= 0; i--) { - - if (static_cast(*reset) == i) { // TODO Check with SPARSE - totalDenom = static_cast(0); - ++reset; - } - - totalDenom += (hY[hNtoK[i]] > static_cast(1)) ? denomPid[i] / hYWeight[hNtoK[i]] : 0; - accDenomPid[i] += (hY[hNtoK[i]] == static_cast(1)) ? hYWeight[hNtoK[i]] * totalDenom : 0; - } - } -} - - -template -void ModelSpecifics::doSortPid(bool useCrossValidation) { -/* For Cox model: - * - * We currently assume that hZ[k] are sorted in decreasing order by k. - * - */ -} - -template -void ModelSpecifics::getOriginalPid() { - - hPidInternal = hPidOriginal; // Make copy - -} -template -void ModelSpecifics::setPidForAccumulation(const double* weights) { - setPidForAccumulationImpl(weights); -} - -template template -void ModelSpecifics::setPidForAccumulationImpl(const AnyRealType* weights) { - - hPidInternal = hPidOriginal; // Make copy - hPid = hPidInternal.data(); // Point to copy - hPidSize = hPidInternal.size(); - accReset.clear(); - - const int ignore = -1; - - // Find first non-zero weight - size_t index = 0; - while(weights != nullptr && weights[index] == 0.0 && index < K) { - hPid[index] = ignore; - index++; - } - - int lastPid = hPid[index]; - AnyRealType lastTime = hOffs[index]; - AnyRealType lastEvent = hY[index]; - - int pid = hPid[index] = 0; - - for (size_t k = index + 1; k < K; ++k) { - if (weights == nullptr || weights[k] != 0.0) { - int nextPid = hPid[k]; - - if (nextPid != lastPid) { // start new strata - pid++; - accReset.push_back(pid); - lastPid = nextPid; - } else { - - if (lastEvent == 1.0 && lastTime == hOffs[k] && lastEvent == hY[k]) { - // In a tie, do not increment denominator - } else { - pid++; - } - } - lastTime = hOffs[k]; - lastEvent = hY[k]; - - hPid[k] = pid; - } else { - hPid[k] = ignore; - } - } - pid++; - accReset.push_back(pid); - - // Save number of denominators - N = pid; - - if (weights != nullptr) { - for (size_t i = 0; i < K; ++i) { - if (hPid[i] == ignore) hPid[i] = N; // do NOT accumulate, since loops use: i < N - } - } - setupSparseIndices(N); // ignore pid == N (pointing to removed data strata) -} - -template -void ModelSpecifics::setupSparseIndices(const int max) { - sparseIndices.clear(); // empty if full! - - for (size_t j = 0; j < J; ++j) { - if (hX.getFormatType(j) == DENSE || hX.getFormatType(j) == INTERCEPT) { - sparseIndices.push_back(NULL); - } else { - std::set unique; - const size_t n = hX.getNumberOfEntries(j); - const int* indicators = hX.getCompressedColumnVector(j); - for (size_t j = 0; j < n; j++) { // Loop through non-zero entries only - const int k = indicators[j]; - const int i = (k < hPidSize) ? hPid[k] : k; - if (i < max) { - unique.insert(i); - } - } - auto indices = bsccs::make_shared(unique.begin(), unique.end()); - sparseIndices.push_back(indices); - } - } -} - -template -void ModelSpecifics::deviceInitialization() { - // Do nothing -} - -template -void ModelSpecifics::initialize( - int iN, - int iK, - int iJ, - const void*, - double* iNumerPid, - double* iNumerPid2, - double* iDenomPid, - double* iXjY, - std::vector* >* iSparseIndices, - const int* iPid_unused, - double* iOffsExpXBeta, - double* iXBeta, - double* iOffs, - double* iBeta, - const double* iY_unused) { - - N = iN; - K = iK; - J = iJ; - offsExpXBeta.resize(K); - hXBeta.resize(K); - - if (allocateXjY()) { - hXjY.resize(J); - } - - if (allocateXjX()) { - hXjX.resize(J); - } -#ifdef CYCLOPS_DEBUG_TIMING_GPU_COX - auto start = std::chrono::steady_clock::now(); -#endif - if (initializeAccumulationVectors()) { - setPidForAccumulation(static_cast(nullptr)); // calls setupSparseIndices() before returning - } else { - // TODO Suspect below is not necessary for non-grouped data. - // If true, then fill with pointers to CompressedDataColumn and do not delete in destructor - setupSparseIndices(N); // Need to be recomputed when hPid change! - } -#ifdef CYCLOPS_DEBUG_TIMING_GPU_COX - auto end = std::chrono::steady_clock::now(); - double timerPid = std::chrono::duration(end - start).count(); - std::cout << " OVERHEAD CCD setPid: " << timerPid << " s \n"; -#endif - - - size_t alignedLength = getAlignedLength(N + 1); - // numerDenomPidCache.resize(3 * alignedLength, 0); - // numerPid = numerDenomPidCache.data(); - // denomPid = numerPid + alignedLength; // Nested in denomPid allocation - // numerPid2 = numerPid + 2 * alignedLength; - denomPid.resize(alignedLength); - denomPid2.resize(alignedLength); - numerPid.resize(alignedLength); - numerPid2.resize(alignedLength); - numerPid3.resize(alignedLength); - numerPid4.resize(alignedLength); - - deviceInitialization(); - -} - -} // namespace - -#endif /* MODELSPECIFICS_HPP_ */