diff --git a/R/MVP.r b/R/MVP.r index 99d3b65..6e20baf 100755 --- a/R/MVP.r +++ b/R/MVP.r @@ -190,9 +190,6 @@ function(phe, geno, map, K=NULL, nPC.GLM=NULL, nPC.MLM=NULL, nPC.FarmCPU=NULL, } } - logging.log("Check if NAs exist in genotype", "\n", verbose = verbose) - if(hasNA(geno@address, mrkbycol = MrkByCol, geno_ind = seqTaxa, threads = ncpus)) stop("NA is not allowed in genotype, use 'MVP.Data.impute' to impute.") - logging.log("Calculate allele frequency...", "\n", verbose = verbose) marker_freq <- BigRowMean(geno@address, MrkByCol, threads = ncpus, geno_ind = seqTaxa) / 2 map$MAF <- ifelse(marker_freq > 0.5, 1 - marker_freq, marker_freq) diff --git a/src/assoc.cpp b/src/assoc.cpp index fc12a1c..7078f8f 100755 --- a/src/assoc.cpp +++ b/src/assoc.cpp @@ -124,14 +124,14 @@ SEXP glm_c(const arma::vec &y, const arma::mat &X, const arma::mat & iXX, XPtr -bool hasNA(XPtr pMat, double NA_C, bool mrkbycol = true, const Nullable geno_ind = R_NilValue, const Nullable marker_ind = R_NilValue, const int threads = 1) { +bool hasNA(XPtr pMat, bool mrkbycol = true, const Nullable geno_ind = R_NilValue, const Nullable marker_ind = R_NilValue, const int threads = 1) { omp_setup(threads); @@ -124,7 +124,8 @@ bool hasNA(XPtr pMat, double NA_C, bool mrkbycol = true, const Nullab for (int j = 0; j < m; j++) { if(HasNA) continue; for (int i = 0; i < n; i++) { - if (mat[_marker_ind[j]][_geno_ind[i]] == NA_C) { + // if (mat[_marker_ind[j]][_geno_ind[i]] == NA_C) { + if (isna(mat[_marker_ind[j]][_geno_ind[i]])) { HasNA = true; } } @@ -134,7 +135,8 @@ bool hasNA(XPtr pMat, double NA_C, bool mrkbycol = true, const Nullab for (int j = 0; j < n; j++) { if(HasNA) continue; for (int i = 0; i < m; i++) { - if (mat[_geno_ind[j]][_marker_ind[i]] == NA_C) { + // if (mat[_geno_ind[j]][_marker_ind[i]] == NA_C) { + if (isna(mat[_geno_ind[j]][_marker_ind[i]])) { HasNA = true; } } @@ -146,7 +148,8 @@ bool hasNA(XPtr pMat, double NA_C, bool mrkbycol = true, const Nullab for (int j = 0; j < pMat->ncol(); j++) { if(HasNA) continue; for (int i = 0; i < n; i++) { - if (mat[j][_geno_ind[i]] == NA_C) { + // if (mat[j][_geno_ind[i]] == NA_C) { + if (isna(mat[j][_geno_ind[i]])) { HasNA = true; } } @@ -156,7 +159,8 @@ bool hasNA(XPtr pMat, double NA_C, bool mrkbycol = true, const Nullab for (int j = 0; j < n; j++) { if(HasNA) continue; for (int i = 0; i < pMat->nrow(); i++) { - if (mat[_geno_ind[j]][i] == NA_C) { + // if (mat[_geno_ind[j]][i] == NA_C) { + if (isna(mat[_geno_ind[j]][i])) { HasNA = true; } } @@ -172,7 +176,8 @@ bool hasNA(XPtr pMat, double NA_C, bool mrkbycol = true, const Nullab for (int j = 0; j < m; j++) { if(HasNA) continue; for (int i = 0; i < pMat->nrow(); i++) { - if (mat[_marker_ind[j]][i] == NA_C) { + // if (mat[_marker_ind[j]][i] == NA_C) { + if (isna(mat[_marker_ind[j]][i])) { HasNA = true; } } @@ -182,7 +187,8 @@ bool hasNA(XPtr pMat, double NA_C, bool mrkbycol = true, const Nullab for (int j = 0; j < pMat->ncol(); j++) { if(HasNA) continue; for (int i = 0; i < m; i++) { - if (mat[j][_marker_ind[i]] == NA_C) { + // if (mat[j][_marker_ind[i]] == NA_C) { + if (isna(mat[j][_marker_ind[i]])) { HasNA = true; } } @@ -193,14 +199,14 @@ bool hasNA(XPtr pMat, double NA_C, bool mrkbycol = true, const Nullab for (int j = 0; j < pMat->ncol(); j++) { if(HasNA) continue; for (int i = 0; i < pMat->nrow(); i++) { - if (mat[j][i] == NA_C) { + // if (mat[j][i] == NA_C) { + if (isna(mat[j][i])) { HasNA = true; } } } } } - return HasNA; } @@ -210,13 +216,13 @@ bool hasNA(SEXP pBigMat, bool mrkbycol = true, const Nullable geno_i switch(xpMat->matrix_type()) { case 1: - return hasNA(xpMat, NA_CHAR, mrkbycol, geno_ind, marker_ind, threads); + return hasNA(xpMat, mrkbycol, geno_ind, marker_ind, threads); case 2: - return hasNA(xpMat, NA_SHORT, mrkbycol, geno_ind, marker_ind, threads); + return hasNA(xpMat, mrkbycol, geno_ind, marker_ind, threads); case 4: - return hasNA(xpMat, NA_INTEGER, mrkbycol, geno_ind, marker_ind, threads); + return hasNA(xpMat, mrkbycol, geno_ind, marker_ind, threads); case 8: - return hasNA(xpMat, NA_REAL, mrkbycol, geno_ind, marker_ind, threads); + return hasNA(xpMat, mrkbycol, geno_ind, marker_ind, threads); default: throw Rcpp::exception("unknown type detected for big.matrix object!"); } diff --git a/src/kinship.cpp b/src/kinship.cpp index e375665..2595058 100755 --- a/src/kinship.cpp +++ b/src/kinship.cpp @@ -21,7 +21,7 @@ arma::vec BigRowMean(XPtr pMat, bool marker_bycol = true, size_t step int n; int m = marker_bycol ? pMat->ncol() : pMat->nrow(); - arma::vec mean(m); + arma::vec mean(m, fill::zeros); uvec _geno_ind; if(geno_ind.isNotNull()){ @@ -54,14 +54,16 @@ arma::vec BigRowMean(XPtr pMat, bool marker_bycol = true, size_t step #pragma omp parallel for for(int l = 0; l < cnt; l++){ for(int k = 0; k < n; k++){ - Z_buffer(k, l) = bigm[(i_marker + l)][k]; + T elem = bigm[(i_marker + l)][k]; + Z_buffer(k, l) = (isna(elem) ? datum::nan : (double)elem); } } }else{ #pragma omp parallel for for(int k = 0; k < n; k++){ for(int l = 0; l < cnt; l++){ - Z_buffer(k, l) = bigm[k][(i_marker + l)]; + T elem = bigm[k][(i_marker + l)]; + Z_buffer(k, l) = (isna(elem) ? datum::nan : (double)elem); } } } @@ -70,14 +72,16 @@ arma::vec BigRowMean(XPtr pMat, bool marker_bycol = true, size_t step #pragma omp parallel for for(int l = 0; l < cnt; l++){ for(int k = 0; k < n; k++){ - Z_buffer(k, l) = bigm[(i_marker + l)][_geno_ind[k]]; + T elem = bigm[(i_marker + l)][_geno_ind[k]]; + Z_buffer(k, l) = (isna(elem) ? datum::nan : (double)elem); } } }else{ #pragma omp parallel for for(int k = 0; k < n; k++){ for(int l = 0; l < cnt; l++){ - Z_buffer(k, l) = bigm[_geno_ind[k]][(i_marker + l)]; + T elem = bigm[_geno_ind[k]][(i_marker + l)]; + Z_buffer(k, l) = (isna(elem) ? datum::nan : (double)elem); } } } @@ -87,6 +91,7 @@ arma::vec BigRowMean(XPtr pMat, bool marker_bycol = true, size_t step for(int l = 0; l < cnt; l++){ mean[i_marker + l] = arma::mean(Z_buffer.col(l)); } + if(mean.subvec(i_marker, i_marker + cnt - 1).has_nan()) throw Rcpp::exception("NA is not allowed in genotype, use 'MVP.Data.impute' to impute!"); i = j; i_marker += cnt; @@ -348,94 +353,7 @@ SEXP kin_cal(XPtr pMat, const Nullable geno_ind = R_NilVa vec means; if(marker_freq.isNotNull()){ means = as(marker_freq) * 2; - }else{ - means.resize(m); - if(_geno_ind.is_empty()){ - if(_marker_ind.is_empty()){ - if(marker_bycol){ - #pragma omp parallel for - for(int j = 0; j < m; j++){ - double p1 = 0.0; - for(int k = 0; k < n; k++){ - p1 += bigm[j][k]; - } - means[j] = p1 / n; - } - }else{ - #pragma omp parallel for - for(int j = 0; j < m; j++){ - double p1 = 0.0; - for(int k = 0; k < n; k++){ - p1 += bigm[k][j]; - } - means[j] = p1 / n; - } - } - }else{ - if(marker_bycol){ - #pragma omp parallel for - for(int j = 0; j < m; j++){ - double p1 = 0.0; - for(int k = 0; k < n; k++){ - p1 += bigm[_marker_ind[j]][k]; - } - means[j] = p1 / n; - } - }else{ - #pragma omp parallel for - for(int j = 0; j < m; j++){ - double p1 = 0.0; - for(int k = 0; k < n; k++){ - p1 += bigm[k][_marker_ind[j]]; - } - means[j] = p1 / n; - } - } - } - }else{ - if(_marker_ind.is_empty()){ - if(marker_bycol){ - #pragma omp parallel for - for(int j = 0; j < m; j++){ - double p1 = 0.0; - for(int k = 0; k < n; k++){ - p1 += bigm[j][_geno_ind[k]]; - } - means[j] = p1 / n; - } - }else{ - #pragma omp parallel for - for(int j = 0; j < m; j++){ - double p1 = 0.0; - for(int k = 0; k < n; k++){ - p1 += bigm[_geno_ind[k]][j]; - } - means[j] = p1 / n; - } - } - }else{ - if(marker_bycol){ - #pragma omp parallel for - for(int j = 0; j < m; j++){ - double p1 = 0.0; - for(int k = 0; k < n; k++){ - p1 += bigm[_marker_ind[j]][_geno_ind[k]]; - } - means[j] = p1 / n; - } - }else{ - #pragma omp parallel for - for(int j = 0; j < m; j++){ - double p1 = 0.0; - for(int k = 0; k < n; k++){ - p1 += bigm[_geno_ind[k]][_marker_ind[j]]; - } - means[j] = p1 / n; - } - } - } - } - if(means.has_nan()) throw Rcpp::exception("NA is not allowed in genotype, use 'MVP.Data.impute' to impute!"); + if(means.has_nan()) throw Rcpp::exception("NA is not allowed in allele frequency!"); } arma::mat kin = zeros(n, n); @@ -453,7 +371,6 @@ SEXP kin_cal(XPtr pMat, const Nullable geno_ind = R_NilVa { cnt++; } - if (cnt != step) { Z_buffer.set_size(cnt, n); } @@ -464,14 +381,14 @@ SEXP kin_cal(XPtr pMat, const Nullable geno_ind = R_NilVa #pragma omp parallel for for(int l = 0; l < cnt; l++){ for(int k = 0; k < n; k++){ - Z_buffer(l, k) = bigm[(i_marker + l)][k] - means[(i_marker + l)]; + Z_buffer(l, k) = (double)bigm[(i_marker + l)][k]; } } }else{ #pragma omp parallel for for(int k = 0; k < n; k++){ for(int l = 0; l < cnt; l++){ - Z_buffer(l, k) = bigm[k][(i_marker + l)] - means[(i_marker + l)]; + Z_buffer(l, k) = (double)bigm[k][(i_marker + l)]; } } } @@ -480,14 +397,14 @@ SEXP kin_cal(XPtr pMat, const Nullable geno_ind = R_NilVa #pragma omp parallel for for(int l = 0; l < cnt; l++){ for(int k = 0; k < n; k++){ - Z_buffer(l, k) = bigm[_marker_ind[(i_marker + l)]][k] - means[(i_marker + l)]; + Z_buffer(l, k) = (double)bigm[_marker_ind[(i_marker + l)]][k]; } } }else{ #pragma omp parallel for for(int k = 0; k < n; k++){ for(int l = 0; l < cnt; l++){ - Z_buffer(l, k) = bigm[k][_marker_ind[(i_marker + l)]] - means[(i_marker + l)]; + Z_buffer(l, k) = (double)bigm[k][_marker_ind[(i_marker + l)]]; } } } @@ -498,14 +415,14 @@ SEXP kin_cal(XPtr pMat, const Nullable geno_ind = R_NilVa #pragma omp parallel for for(int l = 0; l < cnt; l++){ for(int k = 0; k < n; k++){ - Z_buffer(l, k) = bigm[(i_marker + l)][_geno_ind[k]] - means[(i_marker + l)]; + Z_buffer(l, k) = (double)bigm[(i_marker + l)][_geno_ind[k]]; } } }else{ #pragma omp parallel for for(int k = 0; k < n; k++){ for(int l = 0; l < cnt; l++){ - Z_buffer(l, k) = bigm[_geno_ind[k]][(i_marker + l)] - means[(i_marker + l)]; + Z_buffer(l, k) = (double)bigm[_geno_ind[k]][(i_marker + l)]; } } } @@ -514,20 +431,28 @@ SEXP kin_cal(XPtr pMat, const Nullable geno_ind = R_NilVa #pragma omp parallel for for(int l = 0; l < cnt; l++){ for(int k = 0; k < n; k++){ - Z_buffer(l, k) = bigm[_marker_ind[(i_marker + l)]][_geno_ind[k]] - means[(i_marker + l)]; + Z_buffer(l, k) = (double)bigm[_marker_ind[(i_marker + l)]][_geno_ind[k]]; } } }else{ #pragma omp parallel for for(int k = 0; k < n; k++){ for(int l = 0; l < cnt; l++){ - Z_buffer(l, k) = bigm[_geno_ind[k]][_marker_ind[(i_marker + l)]] - means[(i_marker + l)]; + Z_buffer(l, k) = (double)bigm[_geno_ind[k]][_marker_ind[(i_marker + l)]]; } } } } } + if(marker_freq.isNotNull()){ + Z_buffer.each_col() -= means.subvec(i_marker, i_marker + cnt - 1); + }else{ + means = mean(Z_buffer, 1); + if(means.has_nan()) throw Rcpp::exception("NA is not allowed in genotype, use 'MVP.Data.impute' to impute!"); + Z_buffer.each_col() -= means; + } + if(mkl){ double alp = 1.0; double beta = 1.0; diff --git a/src/rMVP.h b/src/rMVP.h index 53ac0cd..b988697 100644 --- a/src/rMVP.h +++ b/src/rMVP.h @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include "progress_bar.hpp"