Skip to content

Commit

Permalink
commit code for bootstrapping differences
Browse files Browse the repository at this point in the history
  • Loading branch information
jianxiaoyang committed Oct 19, 2023
1 parent 8d52884 commit fee07f5
Show file tree
Hide file tree
Showing 5 changed files with 65 additions and 30 deletions.
5 changes: 3 additions & 2 deletions R/NewDataConversion.R
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ convertToCyclopsData.data.frame <- function(outcomes,
name = covarNames)

if (modelType == "cox_time" && !is.null(timeEffectMap)) {
if (!all(timeEffectMap$covariateId %in% covariates$covariateId)) stop("Invalid covariateId for time effects.")
if (!all(timeEffectMap$covariateId %in% covarNames)) stop("Invalid covariateId for time effects.")
loadNewSqlCyclopsDataStratTimeEffects(object = dataPtr,
stratumId = outcomes$stratumId,
rowId = outcomes$rowId,
Expand Down Expand Up @@ -440,7 +440,8 @@ convertToCyclopsData.tbl_dbi <- function(outcomes,
batchSize = 100000) # TODO Pick magic number

if (modelType == "cox_time" && !is.null(timeEffectMap)) {
if (!all(timeEffectMap$covariateId %in% covariates$covariateId)) stop("Invalid covariateId for time effects.")
covarNames <- unique(pull(covariates, covariateId))
if (!all(timeEffectMap$covariateId %in% covarNames)) stop("Invalid covariateId for time effects.")
loadNewSqlCyclopsDataStratTimeEffects(object = dataPtr,
stratumId = outcomes$stratumId,
rowId = outcomes$rowId,
Expand Down
1 change: 1 addition & 0 deletions src/cyclops/CcdInterface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ void CcdInterface::setDefaultArguments(void) {
arguments.doBootstrap = false;
arguments.replicates = 100;
arguments.reportRawEstimates = false;
arguments.reportDifference = false;
arguments.modelName = "sccs";
arguments.fileFormat = "generic";
//arguments.outputFormat = "estimates";
Expand Down
1 change: 1 addition & 0 deletions src/cyclops/CcdInterface.h
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ struct CCDArguments {
// Needed for boot-strapping
bool doBootstrap;
bool reportRawEstimates;
bool reportDifference;
int replicates;
std::string bsFileName;
bool doPartial;
Expand Down
7 changes: 6 additions & 1 deletion src/cyclops/ModelData.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -465,7 +465,12 @@ std::vector<std::string> ModelData<RealType>::loadStratTimeEffects(
IdType timeCovId = ++maxCovariateId;
X.getColumn(index).add_label(timeCovId); // TODO return label to user or automatically exclude this column from L1 regularization
timeEffectCovariates.push_back({st+1, timeEffectCovariateIds[j]}); // (stratum, timeEffectCovariateName)
//std::cout << "Create a new column with label [" << timeCovId << "] at stratum [" << st+1 << "] from cov [" << timeEffectCovariateIds[j] << "]\n";
// std::cout << "Create a new column with label [" << timeCovId << "] at stratum [" << st+1 << "] from cov [" << timeEffectCovariateIds[j] << "]\n";
std::ostringstream stream;
stream << "Created a new column with label " << timeCovId
<< " at stratum " << st+1
<< " from cov " << timeEffectCovariateIds[j];
log->writeLine(stream);
}

i++;
Expand Down
81 changes: 54 additions & 27 deletions src/cyclops/drivers/BootstrapDriver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,6 @@ void BootstrapDriver::logResults(const CCDArguments& arguments, std::vector<doub

void BootstrapDriver::logHR(const CCDArguments& arguments, std::vector<double>& savedBeta, std::string treatmentId) {

// int j = J-1;
int tId = 0;
while (modelData->getColumnLabel(tId) != treatmentId) tId++;

Expand All @@ -146,43 +145,71 @@ void BootstrapDriver::logHR(const CCDArguments& arguments, std::vector<double>&

string sep(","); // TODO Make option

// Raw estimates
// for(rvector::iterator it = estimates[j]->begin(); it != estimates[j]->end(); ++it) outLog << *it << endl;

// Stats
outLog << "Drug_concept_id" << sep << "Condition_concept_id" << sep <<
"score" << sep << "standard_error" << sep << "bs_mean" << sep << "bs_lower" << sep <<
"bs_upper" << sep << "bs_prob0" << endl;

for (int j = tId; j < J; ++j) {
outLog << modelData->getColumnLabel(j) <<
sep << treatmentId << sep;

double mean = 0.0;
double var = 0.0;
double prob0 = 0.0;
for (rvector::iterator it = estimates[j]->begin(); it != estimates[j]->end(); ++it) {
mean += *it;
var += *it * *it;
if (*it == 0.0) {
prob0 += 1.0;
outLog << modelData->getColumnLabel(j) <<
sep << treatmentId << sep;

double mean = 0.0;
double var = 0.0;
double prob0 = 0.0;
for (rvector::iterator it = estimates[j]->begin(); it != estimates[j]->end(); ++it) {
mean += *it;
var += *it * *it;
if (*it == 0.0) {
prob0 += 1.0;
}
}
}

double size = static_cast<double>(estimates[j]->size());
mean /= size;
var = (var / size) - (mean * mean);
prob0 /= size;
double size = static_cast<double>(estimates[j]->size());
mean /= size;
var = (var / size) - (mean * mean);
prob0 /= size;

sort(estimates[j]->begin(), estimates[j]->end());
int offsetLower = static_cast<int>(size * 0.025);
int offsetUpper = static_cast<int>(size * 0.975);

sort(estimates[j]->begin(), estimates[j]->end());
int offsetLower = static_cast<int>(size * 0.025);
int offsetUpper = static_cast<int>(size * 0.975);
double lower = *(estimates[j]->begin() + offsetLower);
double upper = *(estimates[j]->begin() + offsetUpper);

double lower = *(estimates[j]->begin() + offsetLower);
double upper = *(estimates[j]->begin() + offsetUpper);
outLog << savedBeta[j] << sep;
outLog << std::sqrt(var) << sep << mean << sep << lower << sep << upper << sep << prob0 << endl;
}

outLog << savedBeta[j] << sep;
outLog << std::sqrt(var) << sep << mean << sep << lower << sep << upper << sep << prob0 << endl;
if (arguments.reportDifference) {
double mean = 0.0;
double var = 0.0;
double prob0 = 0.0;

double size = static_cast<double>(estimates[tId]->size());
std::vector<double> diff(size, static_cast<double>(0));
for (int i = 0; i < size; i++) {
diff[i] = *(estimates[tId]->begin() + i) - *(estimates[tId+1]->begin() + i);
mean += diff[i];
var += diff[i] * diff[i];
if (diff[i] == 0.0) {
prob0 += 1.0;
}
}

mean /= size;
var = (var / size) - (mean * mean);
prob0 /= size;
sort(diff.begin(), diff.end());

int offsetLower = static_cast<int>(size * 0.025);
int offsetUpper = static_cast<int>(size * 0.975);

double lower = diff[offsetLower];
double upper = diff[offsetUpper];

outLog << savedBeta[tId] - savedBeta[tId+1] << sep;
outLog << std::sqrt(var) << sep << mean << sep << lower << sep << upper << sep << prob0 << endl;
}
outLog.close();
}
Expand Down

0 comments on commit fee07f5

Please sign in to comment.