Merge pull request #41 from mattheaphy/split

Split
mattheaphy · Nov 19, 2023 · 351e5fb · 351e5fb
2 parents 1a86aa1 + 6a08307
commit 351e5fb
Show file tree

Hide file tree

Showing 23 changed files with 892 additions and 86 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: actxps
 Title: Create Actuarial Experience Studies: Prepare Data, Summarize Results, and Create Reports
-Version: 1.3.0
+Version: 1.3.0.9000
 Authors@R: 
     person("Matt", "Heaphy", email = "[email protected]", role = c("aut", "cre"))
 Maintainer: Matt Heaphy <[email protected]>

diff --git a/NAMESPACE b/NAMESPACE
@@ -27,23 +27,42 @@ S3method(select,exposed_df)
 S3method(semi_join,exposed_df)
 S3method(slice,exposed_df)
 S3method(summary,exp_df)
+S3method(summary,exposed_df)
 S3method(summary,trx_df)
 S3method(tidy,step_expose)
 S3method(ungroup,exposed_df)
 S3method(vec_cast,data.frame.exposed_df)
+S3method(vec_cast,data.frame.split_exposed_df)
 S3method(vec_cast,exposed_df.data.frame)
 S3method(vec_cast,exposed_df.exposed_df)
 S3method(vec_cast,exposed_df.grouped_df)
+S3method(vec_cast,exposed_df.split_exposed_df)
 S3method(vec_cast,exposed_df.tbl_df)
 S3method(vec_cast,grouped_df.exposed_df)
+S3method(vec_cast,grouped_df.split_exposed_df)
+S3method(vec_cast,split_exposed_df.data.frame)
+S3method(vec_cast,split_exposed_df.exposed_df)
+S3method(vec_cast,split_exposed_df.grouped_df)
+S3method(vec_cast,split_exposed_df.split_exposed_df)
+S3method(vec_cast,split_exposed_df.tbl_df)
 S3method(vec_cast,tbl_df.exposed_df)
+S3method(vec_cast,tbl_df.split_exposed_df)
 S3method(vec_ptype2,data.frame.exposed_df)
+S3method(vec_ptype2,data.frame.split_exposed_df)
 S3method(vec_ptype2,exposed_df.data.frame)
 S3method(vec_ptype2,exposed_df.exposed_df)
 S3method(vec_ptype2,exposed_df.grouped_df)
+S3method(vec_ptype2,exposed_df.split_exposed_df)
 S3method(vec_ptype2,exposed_df.tbl_df)
 S3method(vec_ptype2,grouped_df.exposed_df)
+S3method(vec_ptype2,grouped_df.split_exposed_df)
+S3method(vec_ptype2,split_exposed_df.data.frame)
+S3method(vec_ptype2,split_exposed_df.exposed_df)
+S3method(vec_ptype2,split_exposed_df.grouped_df)
+S3method(vec_ptype2,split_exposed_df.split_exposed_df)
+S3method(vec_ptype2,split_exposed_df.tbl_df)
 S3method(vec_ptype2,tbl_df.exposed_df)
+S3method(vec_ptype2,tbl_df.split_exposed_df)
 export(add_predictions)
 export(add_transactions)
 export(anti_join)
@@ -63,12 +82,14 @@ export(expose_pm)
 export(expose_pq)
 export(expose_pw)
 export(expose_py)
+export(expose_split)
 export(filter)
 export(full_join)
 export(group_by)
 export(groups)
 export(inner_join)
 export(is_exposed_df)
+export(is_split_exposed_df)
 export(left_join)
 export(mutate)
 export(plot_actual_to_expected)
@@ -110,6 +131,7 @@ importFrom(dplyr,ungroup)
 importFrom(generics,tidy)
 importFrom(ggplot2,autoplot)
 importFrom(lubridate,"%m+%")
+importFrom(lubridate,"%m-%")
 importFrom(recipes,bake)
 importFrom(recipes,prep)
 importFrom(rlang,":=")

diff --git a/NEWS.md b/NEWS.md
@@ -1,6 +1,27 @@
+# actxps (development version)
+
+- actxps now supports split exposures that divide calendar periods crossing 
+policy anniversaries into pre-anniversary and post-anniversary records. The 
+function `expose_split()` can convert any `exposed_df` object with calendar 
+period exposures (yearly, quarterly, monthly, or weekly) into a 
+`split_exposed_df` object. Split exposure data frames contain columns for 
+exposures both on a calendar period and policy year basis.
+- `exp_stats()` and `exp_shiny()` now require clarification as to which exposure
+basis should be used when passed a `split_exposed_df` object.
+- All `expose_df` objects now contains a `default_status` attribute.
+- `autotable()` functions now contain the arguments `decimals_amt` and 
+`suffix_amt`. The former allows one to specify the number of decimals appearing
+after amount columns. The latter is used to automatically scale large numbers 
+into by thousands, millions, billions, or trillions.
+- Corrected an error in the calculation of the standard deviations of claims 
+when `exp_stats()` is passed a weighting variable.
+- Added a `summary()` method for `exposed_df` objects that calls `exp_stats()`.
+- The assumed default status in `expose()` functions was changed from the first
+observed status to the most common status.
+
 # actxps 1.3.0
 
-- A new `conf_int` argument was added to `exp_stats()` and that creates confidence 
+- A new `conf_int` argument was added to `exp_stats()` that creates confidence 
 intervals around observed termination rates, credibility-weighted termination 
 rates, and any actual-to-expected ratios.
 - Similarly, `conf_int` was added to `trx_stats()` to create confidence intervals around utilization rates and any "percentage of" output columns. A `conf_level`

diff --git a/R/exp_shiny.R b/R/exp_shiny.R
@@ -120,6 +120,10 @@
 #' @param theme The name of a theme passed to the `preset` argument of
 #' `bslib::bs_theme()`. Alternatively, a complete Bootstrap theme created using
 #' `bslib::bs_theme()`.
+#' @param col_exposure Name of the column in `dat` containing exposures. This
+#' input is only used to clarify the exposure basis when `dat` is a
+#' `split_exposed_df` object. For more information on split exposures, see
+#' [expose_split()].
 #'
 #' @inheritParams exp_stats
 #'
@@ -151,13 +155,28 @@ exp_shiny <- function(dat,
                       credibility = TRUE,
                       conf_level = 0.95,
                       cred_r = 0.05,
-                      theme = "shiny") {
+                      theme = "shiny",
+                      col_exposure = "exposure") {
 
   rlang::check_installed("shiny")
   rlang::check_installed("bslib")
   rlang::check_installed("thematic")
 
   verify_exposed_df(dat)
+
+  # special logic required for split exposed data frames
+  if (is_split_exposed_df(dat)) {
+    check_split_expose_basis(dat, col_exposure)
+    dat <- rename(dat,
+                  exposure = {{col_exposure}})
+
+    if (col_exposure == "exposure_cal") {
+      dat$exposure_pol <- NULL
+    } else {
+      dat$exposure_cal <- NULL
+    }
+  }
+
   # check for presence of transactions
   all_trx_types <- verify_get_trx_types(dat, required = FALSE)
   has_trx <- !is.null(all_trx_types)
@@ -743,9 +762,20 @@ exp_shiny <- function(dat,
         shiny::need(input$play, "Paused")
       )
 
-      keep <- purrr::imap_lgl(preds$predictors,
-                              ~ length(setdiff(preds$scope[[.y]],
-                                               input[[paste0("i_", .x)]])) > 0)
+      keep <- preds |> select(predictors, scope, is_number) |>
+        purrr::pmap_lgl(\(predictors, scope, is_number) {
+
+          selected <- input[[paste0("i_", predictors)]]
+
+          if (is_number) {
+            !dplyr::near(scope[[1]], selected[[1]]) ||
+              !dplyr::near(scope[[2]], selected[[2]])
+          } else {
+            length(setdiff(scope, selected)) > 0
+          }
+
+        })
+
       preds$predictors[keep]
     })
 

diff --git a/R/exp_stats.R b/R/exp_stats.R
@@ -94,7 +94,7 @@
 #' (`.weight_qs`), and the number of records (`.weight_n`).
 #'
 #' @examples
-#' toy_census |> expose("2020-12-31", target_status = "Surrender") |>
+#' toy_census |> expose("2022-12-31", target_status = "Surrender") |>
 #'     exp_stats()
 #'
 #' exp_res <- census_dat |>
@@ -131,6 +131,8 @@ exp_stats <- function(.data, target_status = attr(.data, "target_status"),
     rlang::abort(c(x = glue::glue("Only 1 column can be passed to `wt`. You supplied {length(wt)} values.")))
   }
 
+  check_split_expose_basis(.data, col_exposure)
+
   res <- .data |>
     rename(exposure = {{col_exposure}},
            status = {{col_status}}) |>
@@ -280,7 +282,7 @@ finish_exp_stats <- function(.data, target_status, expected,
       ci <- rlang::exprs(
         # For binomial N
         # Var(S) = n * p * (Var(X) + E(X)^2 * (1 - p))
-        sd_agg = (claims * (
+        sd_agg = (n_claims * (
           (ex2_wt - ex_wt ^ 2) + ex_wt ^ 2 * (1 - q_obs))) ^ 0.5,
         q_obs_lower = stats::qnorm(p[[1]], claims, sd_agg) / exposure,
         q_obs_upper = stats::qnorm(p[[2]], claims, sd_agg) / exposure

diff --git a/R/expose.R b/R/expose.R
@@ -20,8 +20,7 @@
 #' always applied regardless of status.
 #'
 #' `default_status` is used to indicate the default active status that
-#' should be used when exposure records are created. If left blank, then the
-#' first status level will be assumed to be the default active status.
+#' should be used when exposure records are created.
 #'
 #' # Policy period and calendar period variations
 #'
@@ -46,14 +45,18 @@
 #' @param .data A data frame with census-level records
 #' @param end_date Experience study end date
 #' @param start_date Experience study start date. Default value = 1900-01-01.
-#' @param target_status Character vector of target status values. Default value = `NULL`.
-#' @param cal_expo Set to TRUE for calendar year exposures. Otherwise policy year exposures are assumed.
+#' @param target_status Character vector of target status values. Default value
+#'  = `NULL`.
+#' @param cal_expo Set to TRUE for calendar year exposures. Otherwise policy
+#' year exposures are assumed.
 #' @param expo_length Exposure period length
 #' @param col_pol_num Name of the column in `.data` containing the policy number
 #' @param col_status Name of the column in `.data` containing the policy status
 #' @param col_issue_date Name of the column in `.data` containing the issue date
-#' @param col_term_date Name of the column in `.data` containing the termination date
-#' @param default_status Optional scalar character representing the default active status code
+#' @param col_term_date Name of the column in `.data` containing the termination
+#' date
+#' @param default_status Optional scalar character representing the default
+#' active status code. If not provided, the most common status is assumed.
 #' @param ... Arguments passed to `expose()`
 #'
 #' @return A tibble with class `exposed_df`, `tbl_df`, `tbl`,
@@ -72,10 +75,13 @@
 #'
 #' census_dat |> expose_py("2019-12-31", target_status = "Surrender")
 #'
+#' @seealso [expose_split()] for information on splitting calendar year
+#' exposures by policy year.
+#'
 #' @references Atkinson and McGarry (2016). Experience Study Calculations.
 #' <https://www.soa.org/49378a/globalassets/assets/files/research/experience-study-calculations.pdf>
 #'
-#' @importFrom lubridate %m+%
+#' @importFrom lubridate %m+% %m-%
 #'
 #' @export
 expose <- function(.data,
@@ -102,32 +108,22 @@ expose <- function(.data,
 
   # set up exposure period lengths
   expo_length <- rlang::arg_match(expo_length)
-  expo_step <- switch(expo_length,
-                      "year" = lubridate::years(1),
-                      "quarter" = months(3),
-                      "month" = months(1),
-                      "week" = lubridate::days(7))
-
-  cal_frac <- switch(expo_length,
-                     "year" = year_frac,
-                     "quarter" = quarter_frac,
-                     "month" = month_frac,
-                     'week' = week_frac)
+  expo_step <- expo_step(expo_length)
+  cal_frac <- cal_frac(expo_length)
 
   # column renames and name conflicts
   .data <- .data |>
     rename(pol_num = {{col_pol_num}},
-                  status = {{col_status}},
-                  issue_date = {{col_issue_date}},
-                  term_date = {{col_term_date}}) |>
+           status = {{col_status}},
+           issue_date = {{col_issue_date}},
+           term_date = {{col_term_date}}) |>
     .expo_name_conflict(cal_expo, expo_length)
 
   # set up statuses
   if (!is.factor(.data$status)) .data$status <- factor(.data$status)
 
   if (missing(default_status)) {
-    default_status <- factor(levels(.data$status)[[1]],
-                             levels = levels(.data$status))
+    default_status <- most_common(.data$status)
   } else {
     status_levels <- union(levels(.data$status), default_status)
     default_status <- factor(default_status,
@@ -138,7 +134,7 @@ expose <- function(.data,
   # pre-exposure updates
   res <- .data |>
     filter(issue_date < end_date,
-                  is.na(term_date) | term_date > start_date) |>
+           is.na(term_date) | term_date > start_date) |>
     mutate(
       term_date = dplyr::if_else(term_date > end_date,
                                  lubridate::NA_Date_, term_date),
@@ -176,17 +172,18 @@ expose <- function(.data,
   if (cal_expo) {
     res <- res |>
       mutate(first_per = .time == 1,
-                    cal_e = cal_b %m+% (expo_step * .time) - 1,
-                    cal_b = cal_b %m+% (expo_step * (.time - 1)),
-                    exposure = dplyr::case_when(
-                      status %in% target_status ~ 1,
-                      first_per & last_per ~ cal_frac(last_date) - cal_frac(first_date, 1),
-                      first_per ~ 1 - cal_frac(first_date, 1),
-                      last_per ~ cal_frac(last_date),
-                      TRUE ~ 1)
+             cal_e = cal_b %m+% (expo_step * .time) - 1,
+             cal_b = cal_b %m+% (expo_step * (.time - 1)),
+             exposure = dplyr::case_when(
+               status %in% target_status ~ 1,
+               first_per & last_per ~ cal_frac(last_date) -
+                 cal_frac(first_date, 1),
+               first_per ~ 1 - cal_frac(first_date, 1),
+               last_per ~ cal_frac(last_date),
+               TRUE ~ 1)
       ) |>
       select(-rep_n, -first_date, -last_date, -first_per, -last_per,
-                    -.time, -tot_per) |>
+             -.time, -tot_per) |>
       relocate(cal_e, .after = cal_b) |>
       dplyr::rename_with(.fn = rename_col, .cols = cal_b, prefix = "cal") |>
       dplyr::rename_with(.fn = rename_col, .cols = cal_e, prefix = "cal",
@@ -214,7 +211,7 @@ expose <- function(.data,
   # set up S3 object
   new_exposed_df(res, end_date, start_date,
                  target_status, cal_expo, expo_length,
-                 trx_types = NULL)
+                 trx_types = NULL, default_status)
 
 }
 
@@ -267,6 +264,7 @@ expose_cw <- function(...) {
   expose(cal_expo = TRUE, expo_length = "week", ...)
 }
 
+# helper functions for calendar year fractions - do not export
 year_frac <- function(x, .offset = 0) {
   (lubridate::yday(x) - .offset) / (365 + lubridate::leap_year(x))
 }
@@ -285,6 +283,14 @@ week_frac <- function(x, .offset = 0) {
   (lubridate::wday(x) - .offset) / 7
 }
 
+cal_frac <- function(x) {
+  switch(x,
+         "year" = year_frac,
+         "quarter" = quarter_frac,
+         "month" = month_frac,
+         'week' = week_frac)
+}
+
 # helper function to handle name conflicts
 .expo_name_conflict <- function(.data, cal_expo, expo_length) {
 
@@ -314,3 +320,18 @@ abbr_period <- function(x) {
          "month" = "mth",
          'week' = "wk")
 }
+
+# determine the most common status
+most_common <- function(x) {
+  y <- table(x) |> sort(decreasing = TRUE) |> names()
+  factor(y[[1]], levels(x))
+}
+
+# helper function for determining exposure step lengths
+expo_step <- function(x) {
+  switch(x,
+         "year" = lubridate::years(1),
+         "quarter" = months(3),
+         "month" = months(1),
+         "week" = lubridate::days(7))
+}