Updating the ipd wrapper function to include the argument n_t (traini…

…ng set size) as a named argument for the postpi methods.
ipd-tools · Nov 15, 2024 · cc66fa6 · cc66fa6
1 parent 920dc41
commit cc66fa6
Show file tree

Hide file tree

Showing 10 changed files with 61 additions and 36 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: ipd
 Title: Inference on Predicted Data
-Version: 0.1.1
+Version: 0.1.2
 Authors@R: c(
       person(given = "Stephen", family = "Salerno",
         email = "[email protected]", role = c("aut", "cre", "cph"),

diff --git a/LICENSE b/LICENSE
@@ -1,2 +1,2 @@
 YEAR: 2024
-COPYRIGHT HOLDER: IPD authors
+COPYRIGHT HOLDER: ipd authors
diff --git a/R/ipd.R b/R/ipd.R
@@ -52,6 +52,10 @@
 #' @param alternative A string specifying the alternative hypothesis. Must be
 #' one of \code{"two-sided"}, \code{"less"}, or \code{"greater"}.
 #'
+#' @param n_t (integer, optional) Size of the dataset used to train the
+#' prediction function (necessary for the \code{"postpi"} methods if \code{n_t} <
+#' \code{nrow(X_l)}. Defaults to \code{Inf}.
+#'
 #' @param ... Additional arguments to be passed to the fitting function. See
 #' the \code{Details} section for more information.
 #'
@@ -212,9 +216,9 @@
 
 ipd <- function(formula, method, model, data,
 
-  label = NULL, unlabeled_data = NULL, seed = NULL,
+  label = NULL, unlabeled_data = NULL, seed = NULL, intercept = TRUE,
 
-  intercept = TRUE, alpha = 0.05, alternative = "two-sided", ...) {
+  alpha = 0.05, alternative = "two-sided", n_t = Inf, ...) {
 
   #--- CHECKS & ASSERTIONS -----------------------------------------------------
 
@@ -428,7 +432,14 @@ ipd <- function(formula, method, model, data,
 
   func <- get(paste(method, model, sep = "_"))
 
-  fit <- func(X_l, Y_l, f_l, X_u, f_u, ...)
+  if(grepl("postpi", method) && model == "ols") {
+
+    fit <- func(X_l, Y_l, f_l, X_u, f_u, n_t = n_t, ...)
+
+  } else {
+
+    fit <- func(X_l, Y_l, f_l, X_u, f_u, ...)
+  }
 
   names(fit$est) <- colnames(X_u)
 

diff --git a/R/postpi_boot_logistic.R b/R/postpi_boot_logistic.R
@@ -30,6 +30,8 @@
 #' Options include "par" (parametric) or "npar" (nonparametric).
 #' Defaults to "par".
 #'
+#' @param seed (optional) An \code{integer} seed for random number generation.
+#'
 #' @return A list of outputs: estimate of inference model parameters and
 #' corresponding standard error based on both parametric and non-parametric
 #' bootstrap methods.
@@ -60,7 +62,7 @@
 
 postpi_boot_logistic <- function(X_l, Y_l, f_l, X_u, f_u,
 
-  nboot = 100, se_type = "par") {
+  nboot = 100, se_type = "par", seed = NULL) {
 
   #-- 1. Estimate Prediction Model (Done in Data Step)
 
@@ -74,7 +76,7 @@ postpi_boot_logistic <- function(X_l, Y_l, f_l, X_u, f_u,
 
   #-- 3. Bootstrap
 
-  set.seed(12345)
+  if (!is.null(seed)) set.seed(seed)
 
   n <- nrow(X_l)
 

diff --git a/R/pspa_poisson.R b/R/pspa_poisson.R
@@ -32,21 +32,21 @@
 #'
 #' @examples
 #'
-#' # dat <- simdat(model = "poisson")
+#' dat <- simdat(model = "poisson")
 #'
-#' # form <- Y - f ~ X1
+#' form <- Y - f ~ X1
 #'
-#' # X_l <- model.matrix(form, data = dat[dat$set == "labeled",])
+#' X_l <- model.matrix(form, data = dat[dat$set == "labeled",])
 #'
-#' # Y_l <- dat[dat$set == "labeled", all.vars(form)[1]] |> matrix(ncol = 1)
+#' Y_l <- dat[dat$set == "labeled", all.vars(form)[1]] |> matrix(ncol = 1)
 #'
-#' # f_l <- dat[dat$set == "labeled", all.vars(form)[2]] |> matrix(ncol = 1)
+#' f_l <- dat[dat$set == "labeled", all.vars(form)[2]] |> matrix(ncol = 1)
 #'
-#' # X_u <- model.matrix(form, data = dat[dat$set == "unlabeled",])
+#' X_u <- model.matrix(form, data = dat[dat$set == "unlabeled",])
 #'
-#' # f_u <- dat[dat$set == "unlabeled", all.vars(form)[2]] |> matrix(ncol = 1)
+#' f_u <- dat[dat$set == "unlabeled", all.vars(form)[2]] |> matrix(ncol = 1)
 #'
-#' # pspa_poisson(X_l, Y_l, f_l, X_u, f_u)
+#' pspa_poisson(X_l, Y_l, f_l, X_u, f_u)
 #'
 #' @import stats
 #'

diff --git a/inst/paper.R b/inst/paper.R
@@ -6,20 +6,8 @@
 
 #--- LOAD NECESSARY PACKAGES ---------------------------------------------------
 
-#-- Install devtools if it is not already installed
-
-install.packages("devtools")
-
-#-- Install the IPD package from GitHub
-
-devtools::install_github("awanafiaz/ipd")
-
-#-- Load the IPD library
-
 library(ipd)
 
-#-- Load additional libraries
-
 library(tidyverse)
 
 library(patchwork)

diff --git a/man/ipd.Rd b/man/ipd.Rd
diff --git a/man/postpi_boot_logistic.Rd b/man/postpi_boot_logistic.Rd
diff --git a/man/pspa_poisson.Rd b/man/pspa_poisson.Rd
diff --git a/vignettes/ipd.Rmd b/vignettes/ipd.Rmd
@@ -19,6 +19,10 @@ knitr::opts_chunk$set(
 )
 ```
 
+```{r, echo=FALSE}
+default_options <- options()
+```
+
 # Introduction
 
 ## Background
@@ -530,6 +534,10 @@ augmented_df <- augment(fit_postpi)
 head(augmented_df)
 ```
 
+```{r, echo=FALSE}
+options(default_options)
+```
+
 # Conclusions
 
 The `ipd` package offers a suite of functions for conducting inference on predicted data. With custom methods for printing, summarizing, tidying, glancing, and augmenting model outputs, `ipd` streamlines the process of IPD-based inference in `R`. We will continue to develop this package to include more targets of inference and IPD methods as they are developed, as well as additional functionality for analyzing such data. For further information and detailed documentation, please refer to the function help pages within the package, e.g.,