From 99005a6f7624bc204c0ba921fa5a74e97d514c18 Mon Sep 17 00:00:00 2001 From: "Marc A. Suchard" Date: Thu, 7 Nov 2024 08:12:03 -0800 Subject: [PATCH] first working impl of Schoenfeld residuals and PH test (for no strata) --- NAMESPACE | 2 + R/Residuals.R | 60 +++++++++++++++--- man/residuals.cyclopsFit.Rd | 2 +- man/testProportionality.Rd | 2 +- src/RcppCyclopsInterface.cpp | 21 +++++-- src/cyclops/engine/ModelSpecifics.hpp | 88 +++++++++++++++++++++------ tests/testthat/test-residuals.R | 20 +++--- 7 files changed, 150 insertions(+), 45 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 9aa3f4ab..0a06d10e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -8,6 +8,7 @@ S3method(logLik,cyclopsFit) S3method(predict,cyclopsFit) S3method(print,cyclopsData) S3method(print,cyclopsFit) +S3method(print,cyclopsZph) S3method(residuals,cyclopsFit) S3method(summary,cyclopsData) S3method(survfit,cyclopsFit) @@ -69,6 +70,7 @@ importFrom(stats,model.response) importFrom(stats,pchisq) importFrom(stats,poisson) importFrom(stats,predict) +importFrom(stats,printCoefmat) importFrom(stats,qchisq) importFrom(stats,qnorm) importFrom(stats,rbinom) diff --git a/R/Residuals.R b/R/Residuals.R index 66faec1c..438dc4a4 100644 --- a/R/Residuals.R +++ b/R/Residuals.R @@ -32,16 +32,20 @@ #' @importFrom stats residuals #' #' @export -residuals.cyclopsFit <- function(object, parm, type = "schoenfeld", ...) { - modelType <- object$cyclopsData$modelType - if (modelType != "cox") { - stop("Residuals for only Cox models are implemented") +residuals.cyclopsFit <- function(object, parm = NULL, type = "schoenfeld", ...) { + + .checkInterface(object$cyclopsData, testOnly = TRUE) + + if (object$cyclopsData$modelType != "cox") { + stop("Residuals for only Cox models are currently implemented") } if (type != "schoenfeld") { - stop("Only Schoenfeld residuals are implemented") + stop("Only Schoenfeld residuals are currently implemented") } - .checkInterface(object$cyclopsData, testOnly = TRUE) + if (getNumberOfCovariates(object$cyclopsData) != 1) { + warning("Only single-covariate models are currently implemented") # TODO change to stop + } res <- .cyclopsGetSchoenfeldResiduals(object$interface, NULL) @@ -63,22 +67,60 @@ residuals.cyclopsFit <- function(object, parm, type = "schoenfeld", ...) { #' @param transformedTimes Vector of transformed time #' #' @export -testProportionality <- function(object, parm, transformedTimes) { +testProportionality <- function(object, parm = NULL, transformedTimes) { .checkInterface(object$cyclopsData, testOnly = TRUE) if (object$cyclopsData$modelType != "cox") { - stop("Proportionality test for only Cox models are implemented") + stop("Proportionality test for only Cox models are currently implemented") } + nCovariates <- getNumberOfCovariates(object$cyclopsData) + if (nCovariates != 1) { + warning("Only single-covariate models are currently implemented") # TODO change to stop + } + + if (getNumberOfRows(object$cyclopsData) != length(transformedTimes)) { stop("Incorrect 'transformedTime' length") } + # transformedTimes <- transformedTimes - mean(transformedTimes) transformedTimes <- transformedTimes[object$cyclopsData$sortOrder] - message("TODO: permute transformedTimes") res <- .cyclopsTestProportionality(object$interface, NULL, transformedTimes) + nCovariates <- 1 # TODO Remove + res$hessian <- matrix(res$hessian, nrow = (nCovariates + 1)) + + if (any(abs(res$gradient[1:nCovariates]) > 1E-5)) { + stop("Internal state of Cyclops 'object' is not at its mode") + } + + u <- c(rep(0, nCovariates), res$gradient[nCovariates + 1]) + test <- drop(solve(res$hessian, u) %*% u) + df <- 1 + + tbl <- cbind(test, df, pchisq(test, df, lower.tail = FALSE)) + + names <- as.character(getCovariateIds(object$cyclopsData)[1]) + if (!is.null(object$cyclopsData$coefficientNames)) { + names <- object$coefficientNames[1] + } + + dimnames(tbl) <- list(names, c("chisq", "df", "p")) + res$table <- tbl + + class(res) <- "cyclopsZph" return(res) } + +#' @method print cyclopsZph +#' @importFrom stats printCoefmat +#' +#' @export +print.cyclopsZph <- function(x, digits = max(options()$digits - 4, 3), + signif.stars = FALSE, ...) { + invisible(printCoefmat(x$table, digits=digits, signif.stars=signif.stars, + P.values=TRUE, has.Pvalue=TRUE, ...)) +} diff --git a/man/residuals.cyclopsFit.Rd b/man/residuals.cyclopsFit.Rd index 174d5015..3df56494 100644 --- a/man/residuals.cyclopsFit.Rd +++ b/man/residuals.cyclopsFit.Rd @@ -4,7 +4,7 @@ \alias{residuals.cyclopsFit} \title{Model residuals} \usage{ -\method{residuals}{cyclopsFit}(object, parm, type = "schoenfeld", ...) +\method{residuals}{cyclopsFit}(object, parm = NULL, type = "schoenfeld", ...) } \arguments{ \item{object}{A Cyclops model fit object} diff --git a/man/testProportionality.Rd b/man/testProportionality.Rd index 3949e40d..8027f52b 100644 --- a/man/testProportionality.Rd +++ b/man/testProportionality.Rd @@ -4,7 +4,7 @@ \alias{testProportionality} \title{Test hazard ratio proportionality assumption} \usage{ -testProportionality(object, parm, transformedTimes) +testProportionality(object, parm = NULL, transformedTimes) } \arguments{ \item{object}{A Cyclops model fit object} diff --git a/src/RcppCyclopsInterface.cpp b/src/RcppCyclopsInterface.cpp index e5998b44..e60c532a 100644 --- a/src/RcppCyclopsInterface.cpp +++ b/src/RcppCyclopsInterface.cpp @@ -246,16 +246,27 @@ Rcpp::List cyclopsTestProportionality(SEXP inRcppCcdInterface, std::vector residuals; std::vector times; - std::vector score; - score.resize(2); - // double score = 0.0; + const int nCovariates = 1; // TODO generalize for multiple covariates + const int dim = nCovariates + 1; + std::vector score(dim * (dim + 1)); interface->getCcd().getSchoenfeldResiduals(indices[0], &residuals, ×, &covariate, &score[0]); + std::vector gradient(dim); + std::vector hessian(dim * dim); + + for (int i = 0; i < dim; ++i) { + gradient[i] = score[i]; + for (int j = 0; j < dim; ++j) { + hessian[i * dim + j] = score[dim + i * dim + j]; + } + } + return List::create( - Named("transformed") = covariate, - Named("score") = score, + Named("weights") = covariate, + Named("gradient") = gradient, + Named("hessian") = hessian, Named("residuals") = residuals, Named("times") = times ); diff --git a/src/cyclops/engine/ModelSpecifics.hpp b/src/cyclops/engine/ModelSpecifics.hpp index 9b20197f..6aadcbb1 100644 --- a/src/cyclops/engine/ModelSpecifics.hpp +++ b/src/cyclops/engine/ModelSpecifics.hpp @@ -732,6 +732,13 @@ void ModelSpecifics::getPredictiveEstimates(double* y, doubl // TODO How to remove code duplication above? } + +template +void makeDenseCrossProduct(const CompressedDataMatrix& x, + int index1, int index2) { + +} + template template void ModelSpecifics::getSchoenfeldResidualsImpl(int index, std::vector* residuals, @@ -742,7 +749,7 @@ void ModelSpecifics::getSchoenfeldResidualsImpl(int index, const bool hasResiduals = residuals != nullptr; const bool hasTimes = times != nullptr; - const bool hasScore = covariate != nullptr && score != nullptr; + const bool hasScore = score != nullptr && covariate != nullptr; if (hasResiduals) { residuals->clear(); @@ -751,14 +758,16 @@ void ModelSpecifics::getSchoenfeldResidualsImpl(int index, times->clear(); } - RealType gradient = static_cast(0); - RealType hessian = static_cast(0); + RealType uGradient = static_cast(0); // unweighted + RealType uHessian = static_cast(0); - // TODO: only written for accummulive models (Cox, Fine/Grey) + RealType wGradient = static_cast(0); // weighted + RealType wHessian = static_cast(0); + + RealType xHessian = static_cast(0); - // if (sparseIndices[index] == nullptr || sparseIndices[index]->size() > 0) { + // TODO: only written for accummulive models (Cox, Fine/Grey) - // IteratorType it(sparseIndices[index].get(), N); IteratorType it(hX, index); RealType resNumerator = static_cast(0); @@ -802,21 +811,32 @@ void ModelSpecifics::getSchoenfeldResidualsImpl(int index, } if (hasScore) { + const auto weight = covariate[i]; + // MAS does not believe reweighing is correct, but is matching cox.zph - const auto xt = x * covariate[i]; - const auto numerator1 = expXBeta * xt; - const auto numerator2 = expXBeta * xt * xt; + // const auto xt = x * covariate[i]; + // MAS believes covariate should be adjusted + const auto numerator1 = expXBeta * x; // TODO not xt? + const auto numerator2 = expXBeta * x * x; // TODO not xt? + + if (hY[i] == 1) { + uGradient += x; + wGradient += x * weight; // TODO not xt and no weight? + } scoreNumerator1 += numerator1; scoreNumerator2 += numerator2; const auto t = scoreNumerator1 / resDenominator; - gradient += hNWeight[i] * t; - hessian += hNWeight[i] * (scoreNumerator2 / resDenominator - t * t); + const auto gradient = hNWeight[i] * t; + const auto hessian = hNWeight[i] * (scoreNumerator2 / resDenominator - t * t); - if (hY[i] == 1) { - gradient -= xt; - } + uGradient -= gradient; + wGradient -= gradient * weight; + + uHessian += hessian; + wHessian += hessian * weight * weight; + xHessian += hessian * weight; } ++it; @@ -849,17 +869,47 @@ void ModelSpecifics::getSchoenfeldResidualsImpl(int index, } if (hasScore) { - // TODO incr gradient / Hessian + const auto weight = covariate[i]; + // MAS does not believe reweighing is correct, but is matching cox.zph + + // const auto xt = x * covariate[i]; + // MAS believes covariate should be adjusted + const auto numerator1 = expXBeta * x; // TODO not xt? + const auto numerator2 = expXBeta * x * x; // TODO not xt? + + if (hY[i] == 1) { + uGradient += x; + wGradient += x * weight; // TODO not xt and no weight? + } + + scoreNumerator1 += numerator1; + scoreNumerator2 += numerator2; + + const auto t = scoreNumerator1 / resDenominator; + const auto gradient = hNWeight[i] * t; + const auto hessian = hNWeight[i] * (scoreNumerator2 / resDenominator - t * t); + + uGradient -= gradient; + wGradient -= gradient * weight; + + uHessian += hessian; + wHessian += hessian * weight * weight; } } } } if (hasScore) { - score[0] = static_cast(gradient); - score[1] = static_cast(hessian); + score[0] = static_cast(uGradient); + score[1] = static_cast(wGradient); + + score[2] = static_cast(uHessian); + score[3] = static_cast(xHessian); + score[4] = static_cast(xHessian); + score[5] = static_cast(wHessian); + // *score = static_cast(gradient / hessian); - std::cerr << "score = " << gradient << " / " << hessian << "\n"; + // std::cerr << "score = " << wGradient << " / " << wHessian << "\n"; // std::cerr << "hess2 = " << hess2 << " or " << static_cast(2.0) * hXjX[index] << "\n"; } } @@ -1345,8 +1395,6 @@ void ModelSpecifics::computeGradientAndHessianImpl(int index hessian += static_cast(2.0) * hXjX[index]; } - std::cerr << "hessian = " << hessian << " @ " << index << "\n"; - *ogradient = static_cast(gradient); *ohessian = static_cast(hessian); diff --git a/tests/testthat/test-residuals.R b/tests/testthat/test-residuals.R index d7043645..7e58190f 100644 --- a/tests/testthat/test-residuals.R +++ b/tests/testthat/test-residuals.R @@ -3,24 +3,26 @@ library("survival") suppressWarnings(RNGversion("3.5.0")) -test_that("Check Schoenfeld residual tests", { - gfit <- coxph(Surv(futime, fustat) ~ age + ecog.ps, - data=ovarian) - gres <- residuals(gfit, "schoenfeld")[,1] +test_that("Check Schoenfeld residuals and PH test, no strata", { + gfit <- coxph(Surv(futime, fustat) ~ age, + data=ovarian, method = "breslow") + gres <- residuals(gfit, "schoenfeld") - data <- createCyclopsData(Surv(futime, fustat) ~ age + ecog.ps, + ovarian$mage <- ovarian$age - mean(ovarian$age) + + data <- createCyclopsData(Surv(futime, fustat) ~ age, data=ovarian, modelType = "cox") cfit <- fitCyclopsModel(data) cres <- residuals(cfit, "schoenfeld") expect_equal(cres, gres) - gold <- cox.zph(gfit, transform = "identity") + gtest <- cox.zph(gfit, transform = "identity", global = FALSE) - summary(lm(cres ~ as.numeric(names(cres)) - 1)) - logLik(lm(cres ~ as.numeric(names(cres)) - 1)) + ttimes <- ovarian$futime - mean(ovarian$futime[ovarian$fustat == 1]) - testProportionality(cfit, parm = NULL, transformedTimes = ovarian$futime) + ctest <- testProportionality(cfit, parm = NULL, transformedTimes = ttimes) + expect_equal(ctest$table, gtest$table) })