Three ways of producing predictions in validation data using BGLR

(0) Generating a testing set

  library(BGLR)
  data(wheat)
  X=scale(wheat.X)
  y=wheat.Y[,2]
  n=length(y)
  nTST=150
  tst=sample(1:n,size=nTST)

(1) Setting the entries of the testing set in the response (y) to NA

  yNA=y
  yNA[tst]=NA
  fm=BGLR(y=yNA,ETA=list(list(X=X,model='BRR')),nIter=6000,burnIn=1000)
  yHat_1=fm$yHat[tst]

(2) Splitting both X and y into training and testing sets

  yTRN=y[-tst]; yTST=y[tst]
  X.TRN=X[-tst,]; X.TST=X[tst,]
  
  fm=BGLR(y=yTRN,ETA=list(list(X=X.TRN,model='BRR')),nIter=6000,burnIn=1000)
  yHat_2=fm$mu+as.vector(X.TST%*%fm$ETA[[1]]$b)

(3) Using G-matrix (only valid for GBLUP)

  G=tcrossprod(X)/ncol(X)
  
  G11=G[-tst,-tst] # genomic relationships in the training data
  G21=G[tst,-tst]
  
  fm=BGLR(y=yTRN,ETA=list(list(K=G11,model='RKHS')),nIter=6000,burnIn=1000)
  yHat_3=fm$mu+as.vector(G21%*%solve(G11)%*%fm$ETA[[1]]$u)
  cor(cbind(yHat_1,yHat_2,yHat_3))

(4) Cross-validation using a loop

 folds=sample(1:5,size=n,replace=T)
 yHatCV=rep(NA,n)
 
 timeIn=proc.time()[3]
  for(i in 1:max(folds)){
  	tst=which(folds==i)
  	yNA=y
    yNA[tst]=NA
    fm=BGLR(y=yNA,ETA=list(list(X=X,model='BRR')),nIter=6000,burnIn=1000)
    yHatCV[tst]=fm$yHat[tst]
  }
  
 proc.time()[3]-timeIn

(5) Cross-validation in parallel at multiple cores

  # First create a function that fits one fold
  fitFold=function(y,W,folds,fold){
  	tst=which(folds==fold)
  	yNA=y
    yNA[tst]=NA
    fm=BGLR(y=yNA,ETA=list(list(X=W,model='BRR')),nIter=6000,burnIn=1000,verbose=F)
    tmp=fm$yHat[tst]
    names(tmp)=tst
    return(tmp)
  }
  
  library(parallel)
   
   system.time( tmp<-mclapply(FUN=fitFold,y=y,W=X,folds=folds,X=1:5,mc.cores=3))
   yHatCV2=unlist(tmp)
   yHatCV2=yHatCV2[order(as.integer(names(yHatCV2)))] # need to order the vector
   cor(yHatCV,yHatCV2)

Back to examples

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Validation.md

Validation.md

Three ways of producing predictions in validation data using BGLR

Files

Validation.md

Latest commit

History

Validation.md

File metadata and controls

Three ways of producing predictions in validation data using BGLR