diff --git a/.github/.gitignore b/.github/.gitignore new file mode 100644 index 0000000..2b1c9a5 --- /dev/null +++ b/.github/.gitignore @@ -0,0 +1 @@ +desktop.ini diff --git a/.github/workflows/.gitignore b/.github/workflows/.gitignore new file mode 100644 index 0000000..2b1c9a5 --- /dev/null +++ b/.github/workflows/.gitignore @@ -0,0 +1 @@ +desktop.ini diff --git a/.gitignore b/.gitignore index 74e1958..e6b3712 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,6 @@ .RData .Ruserdata output -*.lnk \ No newline at end of file +*.lnk +*/desktop.ini +desktop.ini diff --git a/R/.gitignore b/R/.gitignore new file mode 100644 index 0000000..2b1c9a5 --- /dev/null +++ b/R/.gitignore @@ -0,0 +1 @@ +desktop.ini diff --git a/R/28_generateRandVectors.R b/R/28_generateRandVectors.R index 4c68ee1..f2d1f16 100644 --- a/R/28_generateRandVectors.R +++ b/R/28_generateRandVectors.R @@ -3,7 +3,7 @@ # Project: n-TARP clustering # https://github.com/SEED-research-lab/n-tarp # -# Copyright 2017-2020 Taylor Williams +# Copyright 2017-2021 Taylor Williams # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -22,65 +22,86 @@ # Authors: Taylor Williams # Affiliation: Purdue University # +# Input stack: +# CLEAN PROBABILITY MATRIX (feature vector) file + +# # Description: # -# Package dependancies: +# Package dependencies: # # Changelog: # 2019.09.13. forked from other SEED lab projects # 2020.02.05. allow user to choose number of random vectors +# 2021.07.30. Continuing effort to save progress every thousand vectors +# 2021.07.2x. commented out the file loading code--a bug with grepl is suspected. Workaround: manually load the 3 RData files # -# Feature wishlist: (*: planned but not complete) -# * +# Feature wishlist: (*: planned; /: started; x: complete) +# [*] re-enable file loading ## ===================================================== ## -## Clean the environment ########## + +######### Clean the environment ########## +# rm(list=ls()) +# ## Clean the environment except required variables varsToRetain <- c("filenameFV", "dataFolderPath") rm(list=setdiff(ls(), varsToRetain)) -## Required libraries ########## -require("readr") -require("tcltk") -require("tidyr") -require("dplyr") -require("tibble") -require("beepr") - -#Read data from files #### -## Check for pre-defined starting directory and course prefix #### -if(!exists("filenamePrefix")) filenamePrefix <- NULL -if(!exists("dataFolderPath")) dataFolderPath <- NULL -if(!exists("filenameFV")) filenameFV <- NULL - - -## get data file locations from user #### -#Locate the CLEAN probability matrix (feature vector) file -if(!exists("filenameFV")){ - #read the CLEAN probability matrix (feature vector) file - prompt <- "*****Select the CLEAN PROBABILITY MATRIX (feature vector) file*****\n (The file picker window may have opened in the background. Check behind this window if you do not see it.)\n" - cat("\n", prompt) - filenameFV <- tcltk::tk_choose.files(caption = prompt, - default = file.path(getwd(), "output", ""), - filter = matrix(c("CSV", ".csv", - "RData", ".RData", - "All files", ".*"), - 3, 2, byrow = TRUE), - multi = FALSE) -} - -#load in the data based on the type of data file provided -if(grepl(x = filenameFV, pattern = "\\.RData$")){ - load(file = filenameFV) - probMatrix <- stu_LO_FV -}else if(grepl(x = filenameFV, pattern = "\\.(csv|CSV)$")){ - probMatrix <- read_csv(file = filenameFV) -}else { - message("Invalid Data Filetype.") - break -} +######### Internal functions ########## + +######### Setup ########## +#load required packages +require(tidyverse) +require(readxl) +require(beepr) + +#Load functions + + +######### Read Data ########## + +## WORKAROUND +probMatrix <- stu_LO_FV #run this and insert the appropriate clean feature vector (aka, probability matrix) + +# ## Check for pre-defined starting directory and course prefix #### +# if(!exists("filenamePrefix")) filenamePrefix <- NULL +# if(!exists("dataFolderPath")) dataFolderPath <- NULL +# if(!exists("filenameFV")) filenameFV <- NULL +# +# +# ## get data file locations from user #### +# #Locate the CLEAN probability matrix (feature vector) file +# if(!exists("filenameFV")){ +# #read the CLEAN probability matrix (feature vector) file +# prompt <- "*****Select the CLEAN PROBABILITY MATRIX (feature vector) file*****\n (The file picker window may have opened in the background. Check behind this window if you do not see it.)\n" +# cat("\n", prompt) +# filenameFV <- tcltk::tk_choose.files(caption = prompt, +# default = file.path(getwd(), "output", ""), +# filter = matrix(c("CSV", ".csv", +# "RData", ".RData", +# "All files", ".*"), +# 3, 2, byrow = TRUE), +# multi = FALSE) +# } +# +# #load in the data based on the type of data file provided +# if(grepl(x = filenameFV, pattern = "\\.RData$")){ +# load(file = filenameFV) +# probMatrix <- stu_LO_FV +# }else if(grepl(x = filenameFV, pattern = "\\.(csv|CSV)$")){ +# probMatrix <- read_csv(file = filenameFV) +# }else { +# message("Invalid Data Filetype.") +# break +# } + + + + +######### Main ########## ##Generate random vectors #### diff --git a/R/40_findBestThreshold.R b/R/40_findBestThreshold.R index 160fb28..b6d60d0 100644 --- a/R/40_findBestThreshold.R +++ b/R/40_findBestThreshold.R @@ -50,6 +50,11 @@ source(file.path(getwd(), "R", "functions", "ExtractRVnumsAndNames.R")) source(file.path(getwd(), "R", "functions", "DisplayPercentComplete.R")) source(file.path(getwd(), "R", "functions", "file-structure-functions.R")) + +#start a timer to track how long the script takes to execute +start40 <- proc.time() #save the time + + # ## Read data from file(s) #### # #read the PROJECTIONS data file # prompt <- "*****Select the PROJECTIONS data file*****\n (The file picker window may have opened in the background. Check behind this window if you do not see it.)\n" @@ -80,6 +85,9 @@ source(file.path(getwd(), "R", "functions", "file-structure-functions.R")) #Read data from files #### +cat("Loading data from files\n") +proc.time() - start40 + ## Check for pre-defined starting directory and course prefix #### if(!exists("filenamePrefix")) filenamePrefix <- NULL if(!exists("dataFolderPath")) dataFolderPath <- NULL @@ -112,35 +120,81 @@ if(grepl(x = filenameProj, pattern = "\\.RData$")){ break } +cat("Loading data complete\n\n") +proc.time() - start40 - -#get the string values for the random vectors -numsAndNames <- ExtractRVnumsAndNames(RP_names = names(projection)) - -#split the returned list into two seperate variables. Convert to matricies for use in the next step -RV_nums <- as.matrix(numsAndNames$nums) -RV_names <- as.matrix(numsAndNames$names) - +## Extract string values for the random vectors #### +cat("Extracting string values for the random vectors\n") +proc.time() - start40 +# check if already complete and saved +fileExistCheck <- FileExistCheck_workingDir(filename = "40_RVnumsAndNames.RData", subDir = "output") +if(fileExistCheck != FALSE){ + load(fileExistCheck) +}else{ + + #get the string values for the random vectors + numsAndNames <- ExtractRVnumsAndNames(RP_names = names(projection)) + + #split the returned list into two separate variables. Convert to matrices for use in the next step + RV_nums <- as.matrix(numsAndNames$nums) + RV_names <- as.matrix(numsAndNames$names) + + #save String extraction to a RData file + save(numsAndNames, file = file.path("output", "40_RVnumsAndNames.RData"), + precheck = TRUE, compress = TRUE) +} +cat("String extraction complete\n\n") +proc.time() - start40 ## Sort all of the projection columns #### +cat("Sorting projection columns\n") +# check if the projections have already been sorted and saved +fileExistCheck <- FileExistCheck_workingDir(filename = "40_projectionSort.RData", subDir = "output") +if(fileExistCheck != FALSE){ + load(fileExistCheck) +}else{ + #duplicate the projection data frame to store results of sorted projections + projectionSort <- projection + #replace the row names with an ascending integer + # (after sorting, the rows will no longer refer to a single user's projection) + rownames(projectionSort) <- c(1:nrow(projectionSort)) + + #sort each column + for(i in 1:ncol(projection)) + { + projectionSort[,i] <- projection[order(projection[i]),i] + + + #| print completion progress to console #### + #during first iteration, create progress status variables for main processing loop + if(i==1){ + iCount <- 0 #loop counter for completion updates + pct <- 0 #percentage complete tracker + } + #print function + updateVars <- DisplayPercentComplete(projectionSort, iCount, pct, displayText = "Sorting projections: ") + #update status variables + iCount <- updateVars$iCount + pct <- updateVars$pct + #print update + cat(updateVars$toPrint) + } + + #save sorted projections to a RData file + save(projectionSort, file = file.path("output", "40_projectionSort.RData"), + precheck = TRUE, compress = TRUE) +} +cat("Sorting projection columns complete\n\n") +proc.time() - start40 -#duplicate the projection data frame to store results of sorted projections -projectionSort <- projection -#replace the row names with an assending integer -# (after sorting, the rows will no longer refer to a single user's projection) -rownames(projectionSort) <- c(1:nrow(projectionSort)) -#sort each column -for(i in 1:ncol(projection)) -{ - projectionSort[,i] <- projection[order(projection[i]),i] -} ## Test each projection value as the threshold to find best threshold value (minimizing withinSS, W) #### +cat("Finding best threshold value for each projection (minimizing withinSS, W)") #create empty data frame to store results of matrix multiplication minW_RandVec <- data.frame(matrix(nrow = 2, ncol = length(projection))) #set names @@ -162,6 +216,8 @@ colnames(minW_RandVec) <- minW_RandVecColNames # set row names rownames(minW_RandVec) <- c("Min WithinSS (W)", "Group Threshold") + +## loop through all the projections for(i in 1:ncol(projectionSort)) { #build current column name @@ -170,6 +226,23 @@ for(i in 1:ncol(projectionSort)) #reset the minimimum withinSS value to an abserdly large value minW <- 1e10 + + #| print completion progress to console #### + #durring first iteration, create progress status variables for main processing loop + if(i==1){ + iCount <- 0 #loop counter for completion updates + pct <- 0 #percentage complete tracker + } + #print function + updateVars <- DisplayPercentComplete(projectionSort, iCount, pct, displayText = "Locating best threshold values: ") + #update status variables + iCount <- updateVars$iCount + pct <- updateVars$pct + #print update + cat(updateVars$toPrint) + + + ## brute-force search for the global minimum for(j in 1:nrow(projectionSort)) { #test each of the projection values (x_j) as the threshold @@ -207,25 +280,16 @@ for(i in 1:ncol(projectionSort)) minW_RandVec["Group 1 Count", curColName] <- sum(projectionSort[,curColName] < as.numeric(bestThresh)) minW_RandVec["Group 2 Count", curColName] <- sum(projectionSort[,curColName] >= as.numeric(bestThresh)) - - - #| print completion progress to console #### - #durring first iteration, create progress status variables for main processing loop - if(i==1) - { - iCount <- 0 #loop counter for completion updates - pct <- 0 #percentage complete tracker + if (i %% 250==0) { + #write to a RData file + save(minW_RandVec, file = file.path("output", paste0("40_minW_and_threshold-",i,".RData")), + precheck = TRUE, compress = TRUE) + cat("\nSaving 250 file") + proc.time() - start40 + } - #print function - updateVars <- DisplayPercentComplete(projectionSort, iCount, pct, displayText = "Locating best threshold values: ") - - #update status variables - iCount <- updateVars$iCount - pct <- updateVars$pct - - #print update - cat(updateVars$toPrint) + } #transpose and sort the min. withinSS and group threshold data @@ -236,10 +300,12 @@ minW_RandVec_sort <- minW_RandVec_sort[order(minW_RandVec_sort$`Min WithinSS (W) ## |Save min. withinSS and group threshold data to file #### #write a CSV file cat("\nSaving CSV file.") -write.csv(file = file.path("output", "40_minW_and_threshold.csv"), +proc.time() - start40 + +write.csv(file = file.path("output", "40_minW_and_threshold_sorted.csv"), x = minW_RandVec_sort) #write to a RData file -save(minW_RandVec_sort, file = file.path("output", "40_minW_and_threshold.RData"), +save(minW_RandVec_sort, file = file.path("output", "40_minW_and_threshold_sorted.RData"), precheck = TRUE, compress = TRUE) @@ -251,8 +317,10 @@ clusterWThreshold <- 0.36 clusterCandidates <- minW_RandVec[1,minW_RandVec["Min WithinSS (W)",] < clusterWThreshold] sortedCandidates <- clusterCandidates[1,order(clusterCandidates)] + #report percentage of promising projections -length(clusterCandidates)/length(minW_RandVec) +cat("percentage of promising projections:", length(clusterCandidates)/length(minW_RandVec)) + #save the names of the cluster candidates sortedCandidateNames <- names(sortedCandidates) @@ -260,9 +328,12 @@ sortedCandidateNames <- names(sortedCandidates) ##|Save data to file #### #write to a CSV file cat("\nSaving files.") -write.csv(file = file.path("output", "40_best_RP_names.csv"), +write.csv(file = file.path("output", + paste0("40_best_RP_names (W-lt-", clusterWThreshold, ").csv")), x = sortedCandidateNames) #write to a RData file -save(sortedCandidateNames, file = file.path("output", "40_best_RP_names.RData"), +save(sortedCandidateNames, + file = file.path("output", + paste0("40_best_RP_names (W-lt-", clusterWThreshold, ").RData")), precheck = TRUE, compress = TRUE) diff --git a/R/45_plotProjectionHistograms.R b/R/45_plotProjectionHistograms.R index 7892d91..fd7ced7 100644 --- a/R/45_plotProjectionHistograms.R +++ b/R/45_plotProjectionHistograms.R @@ -138,7 +138,8 @@ repeat{ beepr::beep(sound = 10) #notify user to provide input histogramSet <- readline(prompt=paste0("Which set of histograms would you like to generate? (1) All ", ncol(projection), " projections or - (2) only the ", length(sortedCandidateNames)," projections that cluster?: ")) + (2) only the ", length(sortedCandidateNames)," projections that cluster or + (3) a custom number of top projections: ")) #set the names of the projections to plot based on user selection #if user selected ALL projections @@ -151,8 +152,14 @@ repeat{ histogramSetNames <- sortedCandidateNames break + #if the user selected CUSTOM number of projections + }else if(histogramSet == "3" || histogramSet == "(3)"){ + histogramNum <- as.numeric(readline(prompt=paste0("How many histograms would you like to generate?: "))) + histogramSetNames <- rownames(minW_RandVec_sort[1:histogramNum, ]) + break + }else{ - message("Please enter either '1' or '2'.\n") + message("Please enter either '1' '2' or '3'.\n") } #repeat *OR* end if user requested @@ -181,7 +188,7 @@ for(i in 1:length(histogramSetNames)) # "aes(fill=..count..)" will color each bar a shade of blue according to its count # "guides(fill=FALSE)" turns off the legend ggHist <- ggHist + geom_histogram(binwidth = binWidth, aes(fill=..count..)) + - guides(fill=FALSE) + guides(fill="none") #set x-axis values, adjust to match bin boundaries ggHist <- ggHist + scale_x_continuous(breaks = seq(from = binWidth/2, to = 3, by = binWidth)) @@ -206,7 +213,7 @@ for(i in 1:length(histogramSetNames)) #| print completion progress to console #### # if more than 10 histograms will be created if (length(histogramSetNames) > 10) { - #durring first iteration, create progress status variables for main processing loop + #during first iteration, create progress status variables for main processing loop if(i==1) { iCount <- 0 #loop counter for completion updates diff --git a/R/50_seperateGroups.R b/R/50_seperateGroups.R index 6179484..2429cad 100644 --- a/R/50_seperateGroups.R +++ b/R/50_seperateGroups.R @@ -23,11 +23,19 @@ # Affiliation: Purdue University # # Description: +# +# Input stack: +# 30_projections.RData +# 40_best_RP_names.RData +# 40_minW_and_threshold.RData +# 110v2_stuFeatureVector-*.RData (CLEAN PROBABILITY MATRIX (feature vector)) # -# Package dependancies: +# Package dependencies: # # Changelog: # 2019.09.19. forked from other SEED lab projects +# 2021.08.02. commented out the file loading code--a bug with grepl is suspected. Workaround: manually load the 3 RData files + # # Feature wishlist: (*: planned but not complete) # *need a robust way to select what object in the loaded PROJECTIONS file is @@ -56,112 +64,115 @@ source(file.path(getwd(), "R", "functions", "file-structure-functions.R")) #Read data from files #### -## Check for pre-defined starting directory and course prefix #### -if(!exists("filenamePrefix")) filenamePrefix <- NULL -if(!exists("dataFolderPath")) dataFolderPath <- NULL -if(!exists("filenameFV")) filenameFV <- NULL - - -## get data file locations from user #### -#Locate the CLEAN probability matrix (feature vector) file -if(!exists("filenameFV")){ - #read the CLEAN probability matrix (feature vector) file - prompt <- "*****Select the CLEAN PROBABILITY MATRIX (feature vector) file*****\n (The file picker window may have opened in the background. Check behind this window if you do not see it.)\n" - cat("\n", prompt) - filenameFV <- tcltk::tk_choose.files(caption = prompt, - default = file.path(getwd(), "output", ""), - filter = matrix(c("CSV", ".csv", - "RData", ".RData", - "All files", ".*"), - 3, 2, byrow = TRUE), - multi = FALSE) -} - -#load in the data based on the type of data file provided -if(grepl(x = filenameFV, pattern = "\\.RData$")){ - load(file = filenameFV) - probMatrix <- stu_LO_FV -}else if(grepl(x = filenameFV, pattern = "\\.(csv|CSV)$")){ - probMatrix <- read_csv(file = filenameFV) -}else { - message("Invalid Data Filetype.") - break -} - - - - - -#read the MIN_W and THRESHOLD data file -filenameMinW <- - SelectFile(prompt = "*****Select the MIN_W and THRESHOLD data file*****\n (The file picker window may have opened in the background. Check behind this window if you do not see it.)\n", - defaultFilename = "40_minW_and_threshold.RData", - # filenamePrefix = ifelse(exists("filenamePrefix") & !is.null(filenamePrefix), - # yes = filenamePrefix, no = ""), - fileTypeMatrix = matrix(c("RData", ".RData", "CSV", ".csv", "All files", ".*"), - 3, 2, byrow = TRUE), - dataFolderPath = ifelse(exists("dataFolderPath") & !is.null(dataFolderPath), - yes = dataFolderPath, no = "")) - - -#load in the data based on the type of data file provided -if(grepl(x = filenameMinW, pattern = "\\.RData$")){ - load(file = filenameMinW) -}else if(grepl(x = filenameMinW, pattern = "\\.(csv|CSV)$")){ - minW_RandVec_sort <- read_csv(file = filenameMinW) -}else{ - message("Invalid Data Filetype.") - return -} - - - -#read the BEST RANDOM PROJECTIONS data file -filenameBestRP <- - SelectFile(prompt = "*****Select the BEST RANDOM PROJECTIONS data file*****\n (The file picker window may have opened in the background. Check behind this window if you do not see it.)\n", - defaultFilename = "40_best_RP_names.RData", - # filenamePrefix = ifelse(exists("filenamePrefix") & !is.null(filenamePrefix), - # yes = filenamePrefix, no = ""), - fileTypeMatrix = matrix(c("RData", ".RData", "CSV", ".csv", "All files", ".*"), - 3, 2, byrow = TRUE), - dataFolderPath = ifelse(exists("dataFolderPath") & !is.null(dataFolderPath), - yes = dataFolderPath, no = "")) - - -#load in the data based on the type of data file provided -if(grepl(x = filenameBestRP, pattern = "\\.RData$")){ - load(file = filenameBestRP) -}else if(grepl(x = filenameBestRP, pattern = "\\.(csv|CSV)$")){ - sortedCandidateNames <- read_csv(file = filenameBestRP) -}else{ - message("Invalid Data Filetype.") - return -} - - - - -#read the PROJECTIONS data file -filenameProj <- - SelectFile(prompt = "*****Select the PROJECTIONS data file*****\n (The file picker window may have opened in the background. Check behind this window if you do not see it.)\n", - defaultFilename = "30_projections.RData", - # filenamePrefix = ifelse(exists("filenamePrefix") & !is.null(filenamePrefix), - # yes = filenamePrefix, no = ""), - fileTypeMatrix = matrix(c("RData", ".RData", "CSV", ".csv", "All files", ".*"), - 3, 2, byrow = TRUE), - dataFolderPath = ifelse(exists("dataFolderPath") & !is.null(dataFolderPath), - yes = dataFolderPath, no = "")) - - -#load in the data based on the type of data file provided -if(grepl(x = filenameProj, pattern = "\\.RData$")){ - load(file = filenameProj) -}else if(grepl(x = filenameProj, pattern = "\\.(csv|CSV)$")){ - projections <- read_csv(file = filename) -}else{ - message("Invalid Data Filetype.") - break -} +## WORKAROUND +probMatrix <- stu_LO_FV #run this and insert the appropriate clean feature vector (aka, probability matrix) +# +# ## Check for pre-defined starting directory and course prefix #### +# if(!exists("filenamePrefix")) filenamePrefix <- NULL +# if(!exists("dataFolderPath")) dataFolderPath <- NULL +# if(!exists("filenameFV")) filenameFV <- NULL +# +# +# ## get data file locations from user #### +# #Locate the CLEAN probability matrix (feature vector) file +# if(!exists("filenameFV")){ +# #read the CLEAN probability matrix (feature vector) file +# prompt <- "*****Select the CLEAN PROBABILITY MATRIX (feature vector) file*****\n (The file picker window may have opened in the background. Check behind this window if you do not see it.)\n" +# cat("\n", prompt) +# filenameFV <- tcltk::tk_choose.files(caption = prompt, +# default = file.path(getwd(), "output", ""), +# filter = matrix(c("CSV", ".csv", +# "RData", ".RData", +# "All files", ".*"), +# 3, 2, byrow = TRUE), +# multi = FALSE) +# } +# +# #load in the data based on the type of data file provided +# if(grepl(x = filenameFV, pattern = "\\.RData$")){ +# load(file = filenameFV) +# probMatrix <- stu_LO_FV +# }else if(grepl(x = filenameFV, pattern = "\\.(csv|CSV)$")){ +# probMatrix <- read_csv(file = filenameFV) +# }else { +# message("Invalid Data Filetype.") +# break +# } +# +# +# +# +# +# #read the MIN_W and THRESHOLD data file +# filenameMinW <- +# SelectFile(prompt = "*****Select the MIN_W and THRESHOLD data file*****\n (The file picker window may have opened in the background. Check behind this window if you do not see it.)\n", +# defaultFilename = "40_minW_and_threshold.RData", +# # filenamePrefix = ifelse(exists("filenamePrefix") & !is.null(filenamePrefix), +# # yes = filenamePrefix, no = ""), +# fileTypeMatrix = matrix(c("RData", ".RData", "CSV", ".csv", "All files", ".*"), +# 3, 2, byrow = TRUE), +# dataFolderPath = ifelse(exists("dataFolderPath") & !is.null(dataFolderPath), +# yes = dataFolderPath, no = "")) +# +# +# #load in the data based on the type of data file provided +# if(grepl(x = filenameMinW, pattern = "\\.RData$")){ +# load(file = filenameMinW) +# }else if(grepl(x = filenameMinW, pattern = "\\.(csv|CSV)$")){ +# minW_RandVec_sort <- read_csv(file = filenameMinW) +# }else{ +# message("Invalid Data Filetype.") +# return +# } +# +# +# +# #read the BEST RANDOM PROJECTIONS data file +# filenameBestRP <- +# SelectFile(prompt = "*****Select the BEST RANDOM PROJECTIONS data file*****\n (The file picker window may have opened in the background. Check behind this window if you do not see it.)\n", +# defaultFilename = "40_best_RP_names.RData", +# # filenamePrefix = ifelse(exists("filenamePrefix") & !is.null(filenamePrefix), +# # yes = filenamePrefix, no = ""), +# fileTypeMatrix = matrix(c("RData", ".RData", "CSV", ".csv", "All files", ".*"), +# 3, 2, byrow = TRUE), +# dataFolderPath = ifelse(exists("dataFolderPath") & !is.null(dataFolderPath), +# yes = dataFolderPath, no = "")) +# +# +# #load in the data based on the type of data file provided +# if(grepl(x = filenameBestRP, pattern = "\\.RData$")){ +# load(file = filenameBestRP) +# }else if(grepl(x = filenameBestRP, pattern = "\\.(csv|CSV)$")){ +# sortedCandidateNames <- read_csv(file = filenameBestRP) +# }else{ +# message("Invalid Data Filetype.") +# return +# } +# +# +# +# +# #read the PROJECTIONS data file +# filenameProj <- +# SelectFile(prompt = "*****Select the PROJECTIONS data file*****\n (The file picker window may have opened in the background. Check behind this window if you do not see it.)\n", +# defaultFilename = "30_projections.RData", +# # filenamePrefix = ifelse(exists("filenamePrefix") & !is.null(filenamePrefix), +# # yes = filenamePrefix, no = ""), +# fileTypeMatrix = matrix(c("RData", ".RData", "CSV", ".csv", "All files", ".*"), +# 3, 2, byrow = TRUE), +# dataFolderPath = ifelse(exists("dataFolderPath") & !is.null(dataFolderPath), +# yes = dataFolderPath, no = "")) +# +# +# #load in the data based on the type of data file provided +# if(grepl(x = filenameProj, pattern = "\\.RData$")){ +# load(file = filenameProj) +# }else if(grepl(x = filenameProj, pattern = "\\.(csv|CSV)$")){ +# projections <- read_csv(file = filename) +# }else{ +# message("Invalid Data Filetype.") +# break +# } @@ -178,7 +189,7 @@ if(length(sortedCandidateNames) == 0) }else { #loop through all of the candidate projections and add them to a new data frame - CandidateProjections <- data.frame(userID = probMatrix$userID) + CandidateProjections <- data.frame(userID = probMatrix$'User ID') for(i in 1:length(sortedCandidateNames)) { #add projection values to the data frame diff --git a/R/functions/.gitignore b/R/functions/.gitignore new file mode 100644 index 0000000..2b1c9a5 --- /dev/null +++ b/R/functions/.gitignore @@ -0,0 +1 @@ +desktop.ini