Skip to content

Commit

Permalink
Added function transform_vars_omv applying transformations (SQRT, LOG…
Browse files Browse the repository at this point in the history
…, INV) to make a skewed variable conform to a normal distribution
  • Loading branch information
sjentsch committed Jul 9, 2024
1 parent 0dc180b commit 1abb4ae
Show file tree
Hide file tree
Showing 4 changed files with 328 additions and 0 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ export(read_omv)
export(replace_omv)
export(search_omv)
export(sort_omv)
export(transform_vars_omv)
export(transpose_omv)
export(wide2long_omv)
export(write_omv)
94 changes: 94 additions & 0 deletions R/transform_vars_omv.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
#' Transform skewed variables (aiming at they conform to a normal distribution) in .omv-files for the statistical spreadsheet 'jamovi' (<https://www.jamovi.org>)
#'
#' @param dtaInp Either a data frame or the name of a data file to be read (including the path, if required; "FILENAME.ext"; default: NULL); files can be of
#' any supported file type, see Details below
#' @param fleOut Name of the data file to be written (including the path, if required; "FILE_OUT.omv"; default: ""); if empty, the resulting data frame is
#' returned instead
#' @param varXfm
#' See Details for more information.
#' @param psvAnl Whether analyses that are contained in the input file shall be transferred to the output file (default: FALSE)
#' @param usePkg Name of the package: "foreign" or "haven" that shall be used to read SPSS, Stata and SAS files; "foreign" is the default (it comes with
#' base R), but "haven" is newer and more comprehensive
#' @param selSet Name of the data set that is to be selected from the workspace (only applies when reading .RData-files)
#' @param ... Additional arguments passed on to methods; see Details below
#'
#' @return a data frame (only returned if `fleOut` is empty) where the order of variables / columns of the input data set is re-arranged
#'
#' @details
#' * `varXfm` has to be a named list variable where the names can either indicate the type of transformation or the kind and degree of skewness that shall be
#' corrected. For the type of transformation, the following names are valid: posSqr, negSqr, posLog, negLog, posInv, negInv; where the second part of the
#' name indicates the transformation to be carried out: ...Sqr - square root, ...Log - logarith to the basis 10, ...Inv - inversion, i.e., 1 / original
#' value), and where the first part of the name indicates whether the original value is used (pos...) or whether the original value is subtracted from the
#' maximum value of that variable (neg...; a constant of 1 is added to the maximum value for Log and Inv). For the degree and kind of skewness, the following
#' names are valid: mdrPos, strPos, svrPos, mdrNeg, strNeg, svrNeg (degree: moderate, strong, severe; kind: positive or negaitve).
#' * The ellipsis-parameter (`...`) can be used to submit arguments / parameters to the functions that are used for reading and writing the data. By clicking
#' on the respective function under “See also”, you can get a more detailed overview over which parameters each of those functions take. The functions are:
#' `read_omv` and `write_omv` (for jamovi-files), `read.table` (for CSV / TSV files; using similar defaults as `read.csv` for CSV and `read.delim` for TSV
#' which both are based upon `read.table`), `load` (for .RData-files), `readRDS` (for .rds-files), `read_sav` (needs the R-package `haven`) or `read.spss`
#' (needs the R-package `foreign`) for SPSS-files, `read_dta` (`haven`) / `read.dta` (`foreign`) for Stata-files, `read_sas` (`haven`) for SAS-data-files,
#' and `read_xpt` (`haven`) / `read.xport` (`foreign`) for SAS-transport-files. If you would like to use `haven`, you may need to install it using
#' `install.packages("haven", dep = TRUE)`.
#'
#' @seealso `transform_vars_omv` internally uses the following functions for reading and writing data files in different formats: [jmvReadWrite::read_omv()] and
#' [jmvReadWrite::write_omv()] for jamovi-files, [utils::read.table()] for CSV / TSV files, [load()] for reading .RData-files, [readRDS()] for .rds-files,
#' [haven::read_sav()] or [foreign::read.spss()] for SPSS-files, [haven::read_dta()] or [foreign::read.dta()] for Stata-files, [haven::read_sas()] for
#' SAS-data-files, and [haven::read_xpt()] or [foreign::read.xport()] for SAS-transport-files.
#'
#' @examples
#' \dontrun{
#' # generate skewed variables
#' set.seed(335)
#' dtaInp <- data.frame(MP = rnorm(1000) * 1e-1 + rexp(1000, 2) * (1 - 1e-1), MN = rnorm(1000) * 1e-1 - rexp(1000, 2) * (1 - 1e-1),
#' SP = rnorm(1000) * 1e-2 + rexp(1000, 2) * (1 - 1e-2), SN = rnorm(1000) * 1e-2 - rexp(1000, 2) * (1 - 1e-2),
#' EP = rnorm(1000) * 1e-4 + rexp(1000, 2) * (1 - 1e-4), EN = rnorm(1000) * 1e-4 - rexp(1000, 2) * (1 - 1e-4))
#' jmv::descriptives(data = dtaInp, skew = TRUE, sw = TRUE)
#'
#' crrXfm <- list(posSqr = c("MP"), negSqr = c("MN"), posLog = c("MP", "SP"), negLog = c("SN"), posInv = c("MP", "SP", "EP"), negInv = c("EN"))
#' dtaOut <- jmvReadWrite::transform_vars_omv(dtaInp = dtaInp, varXfm = crrXfm)
#' jmv::descriptives(data = dtaOut, skew = TRUE, sw = TRUE)
#'
#' crrXfm <- list(mdrPos = c("MP"), mdrNeg = c("MN"), strPos = c("SP"), strNeg = c("SN"), svrPos = c("EP"), svrNeg = c("EN"))
#' dtaOut <- jmvReadWrite::transform_vars_omv(dtaInp = dtaInp, varXfm = crrXfm)
#' jmv::descriptives(data = dtaOut, skew = TRUE, sw = TRUE)
#'
#' }
#'
#' @export transform_vars_omv
#'
transform_vars_omv <- function(dtaInp = NULL, fleOut = "", varXfm = NULL, psvAnl = FALSE, usePkg = c("foreign", "haven"), selSet = "", ...) {
# check the input parameter: varXfm needs to be given
if (length(varXfm) < 1 || !is.list(varXfm)) {
stop("Calling transform_vars_omv requires the parameter varXfm, using the correct format (see Details in help).")
}
names(varXfm) <- gsub("mdrPos", "posSqr", gsub("strPos", "posLog", gsub("svrPos", "posInv",
gsub("mdrNeg", "negSqr", gsub("strNeg", "negLog", gsub("svrNeg", "negInv", names(varXfm)))))))
if (!all(names(varXfm) %in% c("posSqr", "negSqr", "posLog", "negLog", "posInv", "negInv"))) {
stop("The parameter varXfm has an invalid entry (wrong name), please use the correct format (see Details in help).")
}

# check and import input data set (either as data frame or from a file)
if (!is.null(list(...)[["fleInp"]])) stop("Please use the argument dtaInp instead of fleInp.")
dtaFrm <- inp2DF(dtaInp = dtaInp, usePkg = usePkg, selSet = selSet, ...)

# TO-DO: replace the functionality underneath with compute_omv once implemented
for (crrNme in names(varXfm)) {
crrSfx <- toupper(substr(crrNme, 4, 6))
cmdJmv <- ifelse(crrSfx == "INV", "1 / (RPL_VAR)", sprintf("%s(RPL_VAR)", gsub("SQR", "SQRT", gsub("LOG", "LOG10", crrSfx))))
for (crrVar in varXfm[[crrNme]]) {
if (substr(crrNme, 1, 3) == "pos") {
rplVar <- paste0("RPL_VAR", ifelse(substr(crrNme, 4, 6) != "Sqr" && min(dtaFrm[, crrVar]) < 1, " - VMIN(RPL_VAR) + 1",
ifelse(min(dtaFrm[, crrVar]) < 0, " - VMIN(RPL_VAR)", "")))
} else if (substr(crrNme, 1, 3) == "neg") {
rplVar <- paste0("VMAX(RPL_VAR)", ifelse(substr(crrNme, 4, 6) != "Sqr", " + 1", ""), " - RPL_VAR")
}
tgtVar <- paste0(crrVar, "_", crrSfx)
tgtAtt <- list(columnType = "Computed", formula = gsub("RPL_VAR", gsub("RPL_VAR", crrVar, rplVar), cmdJmv))
dtaFrm[, tgtVar] <- eval(parse(text = gsub("SQRT", "sqrt", gsub("LOG10", "log10", gsub("RPL_VAR",
gsub("RPL_VAR", paste0("dtaFrm[, \"", crrVar, "\"]"), gsub("VMIN", "min", gsub("VMAX", "max", rplVar))), cmdJmv)))))
dtaFrm[tgtVar] <- jmvReadWrite:::setAtt(names(tgtAtt), inpObj = tgtAtt, outObj = dtaFrm[tgtVar])
}
}

# rtnDta in globals.R (unified function to either write the data frame, open it in a new jamovi session or return it)
rtnDta(dtaFrm = dtaFrm, fleOut = fleOut, dtaTtl = jmvTtl("_transform_vars"), psvAnl = psvAnl, dtaInp = dtaInp, ...)
}
83 changes: 83 additions & 0 deletions man/transform_vars_omv.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 1abb4ae

Please sign in to comment.