From 6fb423ce6d79f95b314159f6265b63cdc10f213a Mon Sep 17 00:00:00 2001 From: Matt Heaphy Date: Sun, 17 Sep 2023 08:53:56 -0400 Subject: [PATCH 01/25] dev version --- DESCRIPTION | 2 +- NEWS.md | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index e530715..db521a5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: actxps Title: Create Actuarial Experience Studies: Prepare Data, Summarize Results, and Create Reports -Version: 1.3.0 +Version: 1.3.0.9000 Authors@R: person("Matt", "Heaphy", email = "mattrmattrs@gmail.com", role = c("aut", "cre")) Maintainer: Matt Heaphy diff --git a/NEWS.md b/NEWS.md index e6857cb..1e58da9 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,8 @@ +# actxps (development version) + # actxps 1.3.0 -- A new `conf_int` argument was added to `exp_stats()` and that creates confidence +- A new `conf_int` argument was added to `exp_stats()` that creates confidence intervals around observed termination rates, credibility-weighted termination rates, and any actual-to-expected ratios. - Similarly, `conf_int` was added to `trx_stats()` to create confidence intervals around utilization rates and any "percentage of" output columns. A `conf_level` From 624988c79cce405ed55d657ae15b83d3a9566c07 Mon Sep 17 00:00:00 2001 From: Matt Heaphy Date: Tue, 31 Oct 2023 08:42:13 -0400 Subject: [PATCH 02/25] Added default status property to expose_df objects --- R/expose.R | 70 ++++++++++++++---------- R/exposed_df_helpers.R | 21 +++++-- man/expose.Rd | 14 +++-- man/is_exposed_df.Rd | 12 +++- man/step_expose.Rd | 3 +- tests/testthat/test-exposed_df_helpers.R | 11 ++++ 6 files changed, 88 insertions(+), 43 deletions(-) diff --git a/R/expose.R b/R/expose.R index ad75538..4bd73dd 100644 --- a/R/expose.R +++ b/R/expose.R @@ -20,8 +20,7 @@ #' always applied regardless of status. #' #' `default_status` is used to indicate the default active status that -#' should be used when exposure records are created. If left blank, then the -#' first status level will be assumed to be the default active status. +#' should be used when exposure records are created. #' #' # Policy period and calendar period variations #' @@ -46,14 +45,18 @@ #' @param .data A data frame with census-level records #' @param end_date Experience study end date #' @param start_date Experience study start date. Default value = 1900-01-01. -#' @param target_status Character vector of target status values. Default value = `NULL`. -#' @param cal_expo Set to TRUE for calendar year exposures. Otherwise policy year exposures are assumed. +#' @param target_status Character vector of target status values. Default value +#' = `NULL`. +#' @param cal_expo Set to TRUE for calendar year exposures. Otherwise policy +#' year exposures are assumed. #' @param expo_length Exposure period length #' @param col_pol_num Name of the column in `.data` containing the policy number #' @param col_status Name of the column in `.data` containing the policy status #' @param col_issue_date Name of the column in `.data` containing the issue date -#' @param col_term_date Name of the column in `.data` containing the termination date -#' @param default_status Optional scalar character representing the default active status code +#' @param col_term_date Name of the column in `.data` containing the termination +#' date +#' @param default_status Optional scalar character representing the default +#' active status code. If not provided, the most common status is assumed. #' @param ... Arguments passed to `expose()` #' #' @return A tibble with class `exposed_df`, `tbl_df`, `tbl`, @@ -75,7 +78,7 @@ #' @references Atkinson and McGarry (2016). Experience Study Calculations. #' #' -#' @importFrom lubridate %m+% +#' @importFrom lubridate %m+% %m-% #' #' @export expose <- function(.data, @@ -108,26 +111,21 @@ expose <- function(.data, "month" = months(1), "week" = lubridate::days(7)) - cal_frac <- switch(expo_length, - "year" = year_frac, - "quarter" = quarter_frac, - "month" = month_frac, - 'week' = week_frac) + cal_frac <- cal_frac(expo_length) # column renames and name conflicts .data <- .data |> rename(pol_num = {{col_pol_num}}, - status = {{col_status}}, - issue_date = {{col_issue_date}}, - term_date = {{col_term_date}}) |> + status = {{col_status}}, + issue_date = {{col_issue_date}}, + term_date = {{col_term_date}}) |> .expo_name_conflict(cal_expo, expo_length) # set up statuses if (!is.factor(.data$status)) .data$status <- factor(.data$status) if (missing(default_status)) { - default_status <- factor(levels(.data$status)[[1]], - levels = levels(.data$status)) + default_status <- most_common(.data$status) } else { status_levels <- union(levels(.data$status), default_status) default_status <- factor(default_status, @@ -138,7 +136,7 @@ expose <- function(.data, # pre-exposure updates res <- .data |> filter(issue_date < end_date, - is.na(term_date) | term_date > start_date) |> + is.na(term_date) | term_date > start_date) |> mutate( term_date = dplyr::if_else(term_date > end_date, lubridate::NA_Date_, term_date), @@ -176,17 +174,18 @@ expose <- function(.data, if (cal_expo) { res <- res |> mutate(first_per = .time == 1, - cal_e = cal_b %m+% (expo_step * .time) - 1, - cal_b = cal_b %m+% (expo_step * (.time - 1)), - exposure = dplyr::case_when( - status %in% target_status ~ 1, - first_per & last_per ~ cal_frac(last_date) - cal_frac(first_date, 1), - first_per ~ 1 - cal_frac(first_date, 1), - last_per ~ cal_frac(last_date), - TRUE ~ 1) + cal_e = cal_b %m+% (expo_step * .time) - 1, + cal_b = cal_b %m+% (expo_step * (.time - 1)), + exposure = dplyr::case_when( + status %in% target_status ~ 1, + first_per & last_per ~ cal_frac(last_date) - + cal_frac(first_date, 1), + first_per ~ 1 - cal_frac(first_date, 1), + last_per ~ cal_frac(last_date), + TRUE ~ 1) ) |> select(-rep_n, -first_date, -last_date, -first_per, -last_per, - -.time, -tot_per) |> + -.time, -tot_per) |> relocate(cal_e, .after = cal_b) |> dplyr::rename_with(.fn = rename_col, .cols = cal_b, prefix = "cal") |> dplyr::rename_with(.fn = rename_col, .cols = cal_e, prefix = "cal", @@ -214,7 +213,7 @@ expose <- function(.data, # set up S3 object new_exposed_df(res, end_date, start_date, target_status, cal_expo, expo_length, - trx_types = NULL) + trx_types = NULL, default_status) } @@ -267,6 +266,7 @@ expose_cw <- function(...) { expose(cal_expo = TRUE, expo_length = "week", ...) } +# helper functions for calendar year fractions - do not export year_frac <- function(x, .offset = 0) { (lubridate::yday(x) - .offset) / (365 + lubridate::leap_year(x)) } @@ -285,6 +285,14 @@ week_frac <- function(x, .offset = 0) { (lubridate::wday(x) - .offset) / 7 } +cal_frac <- function(x) { + switch(x, + "year" = year_frac, + "quarter" = quarter_frac, + "month" = month_frac, + 'week' = week_frac) +} + # helper function to handle name conflicts .expo_name_conflict <- function(.data, cal_expo, expo_length) { @@ -314,3 +322,9 @@ abbr_period <- function(x) { "month" = "mth", 'week' = "wk") } + +# determine the most common status +most_common <- function(x) { + y <- table(x) |> sort(decreasing = TRUE) |> names() + factor(y[[1]], levels(x)) +} diff --git a/R/exposed_df_helpers.R b/R/exposed_df_helpers.R index 2e2372b..70d505e 100644 --- a/R/exposed_df_helpers.R +++ b/R/exposed_df_helpers.R @@ -60,7 +60,8 @@ as_exposed_df <- function(x, end_date, start_date = as.Date("1900-01-01"), col_pol_per, cols_dates, col_trx_n_ = "trx_n_", - col_trx_amt_ = "trx_amt_") { + col_trx_amt_ = "trx_amt_", + default_status) { if (is_exposed_df(x)) return(x) @@ -129,14 +130,19 @@ as_exposed_df <- function(x, end_date, start_date = as.Date("1900-01-01"), i = "Hint: create these columns or use the `col_*` arguments to specify existing columns that should be mapped to these elements.")) } + if (missing(default_status)) { + default_status <- most_common(x$status) + } + new_exposed_df(x, end_date, start_date, target_status, cal_expo, - expo_length, trx_types) + expo_length, trx_types, default_status) } # low-level class constructor new_exposed_df <- function(x, end_date, start_date, target_status, - cal_expo, expo_length, trx_types = NULL) { + cal_expo, expo_length, trx_types = NULL, + default_status) { date_cols <- make_date_col_names(cal_expo, expo_length) @@ -149,7 +155,8 @@ new_exposed_df <- function(x, end_date, start_date, target_status, start_date = start_date, end_date = end_date, date_cols = date_cols, - trx_types = trx_types) + trx_types = trx_types, + default_status = as.character(default_status)) } @@ -328,9 +335,10 @@ exposed_df_ptype2 <- function(x, y, ..., x_arg = "", y_arg = "") { split_type <- strsplit(expo_type, "_")[[1]] cal_expo <- split_type[[1]] == "calendar" expo_length <- split_type[[2]] + default_status <- attr(x, "default_status") new_exposed_df(out, end_date, start_date, target_status, cal_expo, - expo_length, trx_types) + expo_length, trx_types, default_status) } @@ -357,9 +365,10 @@ exposed_df_cast <- function(x, to, ..., x_arg = "", to_arg = "") { split_type <- strsplit(expo_type, "_")[[1]] cal_expo <- split_type[[1]] == "calendar" expo_length <- split_type[[2]] + default_status <- attr(to, "default_status") new_exposed_df(out, end_date, start_date, target_status, cal_expo, - expo_length, trx_types) + expo_length, trx_types, default_status) } # exposed_df | exposed_df diff --git a/man/expose.Rd b/man/expose.Rd index 0315c54..1cf087d 100644 --- a/man/expose.Rd +++ b/man/expose.Rd @@ -49,9 +49,11 @@ expose_cw(...) \item{start_date}{Experience study start date. Default value = 1900-01-01.} -\item{target_status}{Character vector of target status values. Default value = \code{NULL}.} +\item{target_status}{Character vector of target status values. Default value += \code{NULL}.} -\item{cal_expo}{Set to TRUE for calendar year exposures. Otherwise policy year exposures are assumed.} +\item{cal_expo}{Set to TRUE for calendar year exposures. Otherwise policy +year exposures are assumed.} \item{expo_length}{Exposure period length} @@ -61,9 +63,11 @@ expose_cw(...) \item{col_issue_date}{Name of the column in \code{.data} containing the issue date} -\item{col_term_date}{Name of the column in \code{.data} containing the termination date} +\item{col_term_date}{Name of the column in \code{.data} containing the termination +date} -\item{default_status}{Optional scalar character representing the default active status code} +\item{default_status}{Optional scalar character representing the default +active status code. If not provided, the most common status is assumed.} \item{...}{Arguments passed to \code{expose()}} } @@ -102,7 +106,7 @@ always applied regardless of status. \code{default_status} is used to indicate the default active status that should be used when exposure records are created. If left blank, then the -first status level will be assumed to be the default active status. +most common status level will be assumed as the default active status. } \section{Policy period and calendar period variations}{ The functions \code{expose_py()}, \code{expose_pq()}, \code{expose_pm()}, diff --git a/man/is_exposed_df.Rd b/man/is_exposed_df.Rd index 7227c89..7c34770 100644 --- a/man/is_exposed_df.Rd +++ b/man/is_exposed_df.Rd @@ -21,7 +21,8 @@ as_exposed_df( col_pol_per, cols_dates, col_trx_n_ = "trx_n_", - col_trx_amt_ = "trx_amt_" + col_trx_amt_ = "trx_amt_", + default_status ) } \arguments{ @@ -31,9 +32,11 @@ as_exposed_df( \item{start_date}{Experience study start date. Default value = 1900-01-01.} -\item{target_status}{Character vector of target status values. Default value = \code{NULL}.} +\item{target_status}{Character vector of target status values. Default value += \code{NULL}.} -\item{cal_expo}{Set to TRUE for calendar year exposures. Otherwise policy year exposures are assumed.} +\item{cal_expo}{Set to TRUE for calendar year exposures. Otherwise policy +year exposures are assumed.} \item{expo_length}{Exposure period length} @@ -69,6 +72,9 @@ counts.} \item{col_trx_amt_}{Optional. Prefix to use for columns containing transaction amounts.} + +\item{default_status}{Optional scalar character representing the default +active status code. If not provided, the most common status is assumed.} } \value{ For \code{is_exposed_df()}, a length-1 logical vector. For diff --git a/man/step_expose.Rd b/man/step_expose.Rd index d92fbae..fb9f159 100644 --- a/man/step_expose.Rd +++ b/man/step_expose.Rd @@ -35,7 +35,8 @@ preprocessing have been estimated.} \item{start_date}{Experience study start date. Default value = 1900-01-01.} -\item{target_status}{Character vector of target status values. Default value = \code{NULL}.} +\item{target_status}{Character vector of target status values. Default value += \code{NULL}.} \item{options}{A named list of additional arguments passed to \code{\link[=expose]{expose()}}.} diff --git a/tests/testthat/test-exposed_df_helpers.R b/tests/testthat/test-exposed_df_helpers.R index db942f8..10ab3d7 100644 --- a/tests/testthat/test-exposed_df_helpers.R +++ b/tests/testthat/test-exposed_df_helpers.R @@ -132,3 +132,14 @@ test_that("exposed_df persists in a grouped and ungrouped context after using dp expect_s3_class(anti_join(grouped, join_frame, by = "pol_num"), "exposed_df") }) + +test_that("as_exposed_df default_status works", { + + expect_equal(as_exposed_df(expo2, "2022-12-31") |> attr("default_status"), + "Active") + expect_equal(as_exposed_df(expo2, "2022-12-31", default_status = "Inforce") |> + attr("default_status"), + "Inforce") + expect_equal(attr(expo, "default_status"), "Active") + +}) From e81e2b7e03016ee164a47cea63ecce5060f362e5 Mon Sep 17 00:00:00 2001 From: Matt Heaphy Date: Fri, 3 Nov 2023 07:58:52 -0400 Subject: [PATCH 03/25] namespace catch-up --- NAMESPACE | 1 + 1 file changed, 1 insertion(+) diff --git a/NAMESPACE b/NAMESPACE index 86828f9..24d7762 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -110,6 +110,7 @@ importFrom(dplyr,ungroup) importFrom(generics,tidy) importFrom(ggplot2,autoplot) importFrom(lubridate,"%m+%") +importFrom(lubridate,"%m-%") importFrom(recipes,bake) importFrom(recipes,prep) importFrom(rlang,":=") From 1803c10562354d6d5c053029005718fa458a8ce1 Mon Sep 17 00:00:00 2001 From: Matt Heaphy Date: Sat, 4 Nov 2023 08:28:37 -0400 Subject: [PATCH 04/25] first draft of expose_split function --- R/expose_split.R | 89 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) create mode 100644 R/expose_split.R diff --git a/R/expose_split.R b/R/expose_split.R new file mode 100644 index 0000000..d7aded7 --- /dev/null +++ b/R/expose_split.R @@ -0,0 +1,89 @@ +expose_split <- function(dat) { + + verify_exposed_df(dat) + if (attr(dat, "exposure_type") != "calendar_year") { + rlang::abort(c(x = "`dat` must contain calendar year exposures.", + i = "Hint: Try passing an `exposed_df` object that was created by `expose_cy()`.")) + } + + cal_frac <- cal_frac("year") + target_status <- attr(dat, "target_status") + default_status <- attr(dat, "default_status") + + pol_frac <- function(x, start, end) { + as.integer(x - start + 1) / as.integer(end - start + 1) + } + + # time fractions + # h = yearfrac from boy to anniv + # v = yearfrac from boy to term + + dat <- dat |> mutate( + anniv = issue_date %m+% + (lubridate::years(1) * + (lubridate::year(cal_yr) - lubridate::year(issue_date))), + split = between(anniv, cal_yr, cal_yr_end), + h = cal_frac(anniv, 1), + v = cal_frac(term_date) + ) + + pre_anniv <- dat |> + filter(split) |> + mutate(piece = 1L, + cal_yr = pmax(issue_date, cal_yr), + cal_yr_end = anniv - 1, + exposure = h, + exposure_py = 1 - pol_frac(cal_yr - 1L, + anniv %m-% lubridate::years(1), + anniv - 1L) + ) + + post_anniv <- dat |> + mutate(piece = 2L, + cal_yr = anniv, + exposure = 1 - h, + exposure_py = pol_frac(cal_yr_end, + anniv, + anniv %m+% lubridate::years(1) - 1L)) + + dat <- dplyr::bind_rows(pre_anniv, post_anniv) |> + filter(cal_yr <= cal_yr_end, + is.na(term_date) | term_date >= cal_yr) |> + mutate(term_date = dplyr::if_else(between(term_date, cal_yr, cal_yr_end), + term_date, lubridate::NA_Date_), + pol_yr = lubridate::year(cal_yr) - lubridate::year(issue_date) + + piece - 1L, + status = dplyr::if_else(is.na(term_date), + factor(default_status, + levels = levels(dat$status)), + status), + claims = status %in% target_status, + exposure_cy = dplyr::case_when( + claims ~ dplyr::if_else(piece == 1 | cal_yr == issue_date, + 1, 1 - h), + is.na(term_date) ~ exposure, + piece == 1 ~ v, + .default = v - h + ), + exposure_py = dplyr::case_when( + claims ~ dplyr::if_else(piece == 2, 1, exposure_py), + is.na(term_date) ~ exposure_py, + piece == 1 ~ pol_frac(term_date, + anniv %m-% lubridate::years(1), + anniv - 1L) - (1 - exposure_py), + .default = pol_frac(term_date, + anniv, + anniv %m+% lubridate::years(1) - 1L) + ), + piece = dplyr::if_else(piece == 2, "post_anniv", "pre_anniv") |> + factor() + ) |> + arrange(pol_num, cal_yr, piece) |> + select(-h, -v, -split, -anniv, -claims, -exposure) + + class(dat) <- c("split_exposed_df", class(dat)) + attr(dat, "exposure_type") <- "split_year" + + dat + +} From 3a29db7bb5cfd7666322f8b476e73336f6967ac2 Mon Sep 17 00:00:00 2001 From: Matt Heaphy Date: Sat, 4 Nov 2023 09:35:20 -0400 Subject: [PATCH 05/25] roxygen for expose_split --- NAMESPACE | 1 + R/expose_split.R | 49 +++++++++++++++++++++++++++++++++++---------- R/globals.R | 3 ++- man/expose.Rd | 3 +-- man/expose_split.Rd | 39 ++++++++++++++++++++++++++++++++++++ 5 files changed, 81 insertions(+), 14 deletions(-) create mode 100644 man/expose_split.Rd diff --git a/NAMESPACE b/NAMESPACE index 24d7762..fee3dff 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -63,6 +63,7 @@ export(expose_pm) export(expose_pq) export(expose_pw) export(expose_py) +export(expose_split) export(filter) export(full_join) export(group_by) diff --git a/R/expose_split.R b/R/expose_split.R index d7aded7..544e0b6 100644 --- a/R/expose_split.R +++ b/R/expose_split.R @@ -1,3 +1,30 @@ +#' Split calendar year exposures by policy year +#' +#' @description Convert a calendar year exposed data frame into a split exposed +#' data frame that divides each calendar year into two pieces: a pre-anniversary +#' record and a post-anniversary record. +#' +#' @details `dat` must be an `exposed_df` with calendar year exposure records. +#' Calendar year exposures are created by the function [expose_cy()] (or +#' [expose()] when `expo_length = "year"` and `cal_expo = TRUE`). +#' +#' @param dat An `exposed_df` object with calendar year exposures. +#' +#' @return A tibble with class `split_exposed_df`, `exposed_df`, `tbl_df`, +#' `tbl`, and `data.frame`. The results include all columns in `dat` except that +#' `exposure` has been renamed to `exposure_cal`. Additional columns include: +#' +#' - `exposure_pol` - policy year exposures +#' - `pol_yr` - policy year +#' - `piece` - a factor containing 2 levels: "pre_anniv" (pre-anniversary +#' records) and "post_anniv" (post-anniversary records) +#' +#' @examples +#' toy_census |> expose_cy("2022-12-31") |> expose_split() +#' +#' @seealso [expose()] +#' +#' @export expose_split <- function(dat) { verify_exposed_df(dat) @@ -33,18 +60,18 @@ expose_split <- function(dat) { cal_yr = pmax(issue_date, cal_yr), cal_yr_end = anniv - 1, exposure = h, - exposure_py = 1 - pol_frac(cal_yr - 1L, - anniv %m-% lubridate::years(1), - anniv - 1L) + exposure_pol = 1 - pol_frac(cal_yr - 1L, + anniv %m-% lubridate::years(1), + anniv - 1L) ) post_anniv <- dat |> mutate(piece = 2L, cal_yr = anniv, exposure = 1 - h, - exposure_py = pol_frac(cal_yr_end, - anniv, - anniv %m+% lubridate::years(1) - 1L)) + exposure_pol = pol_frac(cal_yr_end, + anniv, + anniv %m+% lubridate::years(1) - 1L)) dat <- dplyr::bind_rows(pre_anniv, post_anniv) |> filter(cal_yr <= cal_yr_end, @@ -58,19 +85,19 @@ expose_split <- function(dat) { levels = levels(dat$status)), status), claims = status %in% target_status, - exposure_cy = dplyr::case_when( + exposure_cal = dplyr::case_when( claims ~ dplyr::if_else(piece == 1 | cal_yr == issue_date, 1, 1 - h), is.na(term_date) ~ exposure, piece == 1 ~ v, .default = v - h ), - exposure_py = dplyr::case_when( - claims ~ dplyr::if_else(piece == 2, 1, exposure_py), - is.na(term_date) ~ exposure_py, + exposure_pol = dplyr::case_when( + claims ~ dplyr::if_else(piece == 2, 1, exposure_pol), + is.na(term_date) ~ exposure_pol, piece == 1 ~ pol_frac(term_date, anniv %m-% lubridate::years(1), - anniv - 1L) - (1 - exposure_py), + anniv - 1L) - (1 - exposure_pol), .default = pol_frac(term_date, anniv, anniv %m+% lubridate::years(1) - 1L) diff --git a/R/globals.R b/R/globals.R index 867accc..6a83dc4 100644 --- a/R/globals.R +++ b/R/globals.R @@ -10,4 +10,5 @@ utils::globalVariables(c("issue_date", "term_date", "last_date", "q_obs_lower", "q_obs_upper", "trx_util_lower", "trx_util_upper", "sd_agg", "sd_all", "sd_trx", "trx_amt_sq", - "n", "name", "ymax", "ymin")) + "n", "name", "ymax", "ymin", + "cal_yr", "anniv", "cal_yr_end", "h", "piece", "v")) diff --git a/man/expose.Rd b/man/expose.Rd index 1cf087d..04638b9 100644 --- a/man/expose.Rd +++ b/man/expose.Rd @@ -105,8 +105,7 @@ in the observation period. If the annual exposure method isn't desired, always applied regardless of status. \code{default_status} is used to indicate the default active status that -should be used when exposure records are created. If left blank, then the -most common status level will be assumed as the default active status. +should be used when exposure records are created. } \section{Policy period and calendar period variations}{ The functions \code{expose_py()}, \code{expose_pq()}, \code{expose_pm()}, diff --git a/man/expose_split.Rd b/man/expose_split.Rd new file mode 100644 index 0000000..08de3ed --- /dev/null +++ b/man/expose_split.Rd @@ -0,0 +1,39 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/expose_split.R +\name{expose_split} +\alias{expose_split} +\title{Split calendar year exposures by policy year} +\usage{ +expose_split(dat) +} +\arguments{ +\item{dat}{An \code{exposed_df} object with calendar year exposures.} +} +\value{ +A tibble with class \code{split_exposed_df}, \code{exposed_df}, \code{tbl_df}, +\code{tbl}, and \code{data.frame}. The results include all columns in \code{dat} except that +\code{exposure} has been renamed to \code{exposure_cal}. Additional columns include: +\itemize{ +\item \code{exposure_pol} - policy year exposures +\item \code{pol_yr} - policy year +\item \code{piece} - a factor containing 2 levels: "pre_anniv" (pre-anniversary +records) and "post_anniv" (post-anniversary records) +} +} +\description{ +Convert a calendar year exposed data frame into a split exposed +data frame that divides each calendar year into two pieces: a pre-anniversary +record and a post-anniversary record. +} +\details{ +\code{dat} must be an \code{exposed_df} with calendar year exposure records. +Calendar year exposures are created by the function \code{\link[=expose_cy]{expose_cy()}} (or +\code{\link[=expose]{expose()}} when \code{expo_length = "year"} and \code{cal_expo = TRUE}). +} +\examples{ +toy_census |> expose_cy("2022-12-31") |> expose_split() + +} +\seealso{ +\code{\link[=expose]{expose()}} +} From 1854d640156df55d77d8fd2eeaccd6289fd02228 Mon Sep 17 00:00:00 2001 From: Matt Heaphy Date: Sat, 4 Nov 2023 09:41:23 -0400 Subject: [PATCH 06/25] split exposure tests --- tests/testthat/test-expose.R | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/tests/testthat/test-expose.R b/tests/testthat/test-expose.R index 323e8ac..aeb5862 100644 --- a/tests/testthat/test-expose.R +++ b/tests/testthat/test-expose.R @@ -104,3 +104,26 @@ test_that("Renaming and name conflict warnings work", { expect_warning(expose_cy(toy_census |> mutate(cal_yr = 1), "2020-12-31")) expect_warning(expose_cy(toy_census |> mutate(cal_yr_end = 1), "2020-12-31")) }) + +# split exposure tests + +test_that("expose_split() fails when passed non-CY exposures", { + expect_error(expose_split(1, regexp = "must be an `exposed_df`")) + expect_error(expose_py(toy_census, "2022-12-31") |> expose_split(), + regexp = "must contain calendar year exposures") + expect_no_error(expose_cy(toy_census, "2022-12-31") |> expose_split()) +}) + + +study_split <- expose_split(study_cy) |> add_transactions(withdrawals) +study_cy <- add_transactions(study_cy, withdrawals) + +test_that("expose_split() is consistent with expose_cy()", { + expect_equal(sum(study_cy$exposure), sum(study_split$exposure_cal)) + expect_equal(sum(study_cy$status != "Active"), + sum(study_split$status != "Active")) + expect_equal(sum(study_cy$trx_amt_Base), + sum(study_split$trx_amt_Base)) + expect_equal(sum(study_cy$trx_amt_Rider), + sum(study_split$trx_amt_Rider)) +}) From dabcaa2a1aee689d04fd2d8fe33e7914fcfa1952 Mon Sep 17 00:00:00 2001 From: Matt Heaphy Date: Sat, 4 Nov 2023 10:02:27 -0400 Subject: [PATCH 07/25] warning for calling expose_split with transactions attached --- R/expose_split.R | 6 ++++++ tests/testthat/test-expose.R | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/R/expose_split.R b/R/expose_split.R index 544e0b6..822dcf5 100644 --- a/R/expose_split.R +++ b/R/expose_split.R @@ -33,6 +33,12 @@ expose_split <- function(dat) { i = "Hint: Try passing an `exposed_df` object that was created by `expose_cy()`.")) } + if (!is.null(attr(dat, "trx_types"))) { + rlang::warn(c("!" = "`dat` has transactions attached. This will lead to duplication of transactions after exposures are split.", + "i" = "Try calling `add_transactions()` after calling `expose_split()` instead of beforehand.") + ) + } + cal_frac <- cal_frac("year") target_status <- attr(dat, "target_status") default_status <- attr(dat, "default_status") diff --git a/tests/testthat/test-expose.R b/tests/testthat/test-expose.R index aeb5862..8bc4140 100644 --- a/tests/testthat/test-expose.R +++ b/tests/testthat/test-expose.R @@ -127,3 +127,9 @@ test_that("expose_split() is consistent with expose_cy()", { expect_equal(sum(study_cy$trx_amt_Rider), sum(study_split$trx_amt_Rider)) }) + +test_that("expose_split() warns about transactions attached too early", { + expect_warning( + expose_split(study_cy), + regexp = "This will lead to duplication of transactions") +}) From c1d2b033cca370cb27667ac7859042020fc87736 Mon Sep 17 00:00:00 2001 From: Matt Heaphy Date: Sat, 4 Nov 2023 10:16:54 -0400 Subject: [PATCH 08/25] informative error when split exposures are passed to summary function without clarifying the exposure basis --- R/exp_stats.R | 2 ++ R/expose_split.R | 9 +++++++++ R/trx_stats.R | 2 ++ tests/testthat/test-expose.R | 7 +++++++ 4 files changed, 20 insertions(+) diff --git a/R/exp_stats.R b/R/exp_stats.R index 9e71fa8..4e90ac5 100644 --- a/R/exp_stats.R +++ b/R/exp_stats.R @@ -131,6 +131,8 @@ exp_stats <- function(.data, target_status = attr(.data, "target_status"), rlang::abort(c(x = glue::glue("Only 1 column can be passed to `wt`. You supplied {length(wt)} values."))) } + check_warn_split_expose(.data, col_exposure) + res <- .data |> rename(exposure = {{col_exposure}}, status = {{col_status}}) |> diff --git a/R/expose_split.R b/R/expose_split.R index 822dcf5..2db0892 100644 --- a/R/expose_split.R +++ b/R/expose_split.R @@ -120,3 +120,12 @@ expose_split <- function(dat) { dat } + +# This internal function sends a warning if a `split_exposed_df` is passed +# without clarifying which exposure basis should be used. +check_warn_split_expose <- function(dat, col_exposure) { + if (inherits(dat, "split_exposed_df") && col_exposure == "exposure") { + rlang::abort(c(x = "A `split_exposed_df` was passed without clarifying whether policy or calendar year exposures should be used to summarize results.", + i = 'Pass "exposure_pol" (for policy year exposures) or "exposure_cal" (for calendar year exposures) to the `col_exposure` argument.')) + } +} diff --git a/R/trx_stats.R b/R/trx_stats.R index f4918a3..242d3b0 100644 --- a/R/trx_stats.R +++ b/R/trx_stats.R @@ -170,6 +170,8 @@ trx_stats <- function(.data, } } + check_warn_split_expose(.data, col_exposure) + start_date <- attr(.data, "start_date") end_date <- attr(.data, "end_date") diff --git a/tests/testthat/test-expose.R b/tests/testthat/test-expose.R index 8bc4140..76fd383 100644 --- a/tests/testthat/test-expose.R +++ b/tests/testthat/test-expose.R @@ -133,3 +133,10 @@ test_that("expose_split() warns about transactions attached too early", { expose_split(study_cy), regexp = "This will lead to duplication of transactions") }) + +test_that("split exposures error when passed to summary functions without clarifying the exposure basis", { + expect_error(exp_stats(study_split), + regexp = "A `split_exposed_df` was passed without clarifying") + expect_error(trx_stats(study_split), + regexp = "A `split_exposed_df` was passed without clarifying") +}) From 214a9ac824f0b08a95296afbd5f9ca4eaa31a01a Mon Sep 17 00:00:00 2001 From: Matt Heaphy Date: Sun, 5 Nov 2023 09:49:41 -0500 Subject: [PATCH 09/25] additional split exposure tests --- tests/testthat/test-expose.R | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tests/testthat/test-expose.R b/tests/testthat/test-expose.R index 76fd383..989b727 100644 --- a/tests/testthat/test-expose.R +++ b/tests/testthat/test-expose.R @@ -140,3 +140,19 @@ test_that("split exposures error when passed to summary functions without clarif expect_error(trx_stats(study_split), regexp = "A `split_exposed_df` was passed without clarifying") }) + +check_period_end_split <- expose_cy(toy_census, "2020-12-31", + target_status = "Surrender") |> + expose_split() |> + select(pol_num, cal_yr, cal_yr_end) |> + group_by(pol_num) |> + mutate(x = dplyr::lead(cal_yr)) |> + ungroup() |> + na.omit() |> + filter(x != cal_yr_end + 1) |> + nrow() + +test_that("Split period start and end dates roll", { + expect_true(all(study_split$cal_yr <= study_split$cal_yr_end)) + expect_equal(check_period_end_split, 0) +}) From 3c5d42397d8a1154cd0a39828ead326e2dfa1143 Mon Sep 17 00:00:00 2001 From: Matt Heaphy Date: Sun, 5 Nov 2023 20:32:09 -0500 Subject: [PATCH 10/25] - split exposure treatment for shiny apps - renamed internal split exposure checking function --- R/exp_shiny.R | 13 ++++++++++++- R/exp_stats.R | 2 +- R/expose_split.R | 12 +++++++----- R/trx_stats.R | 2 +- 4 files changed, 21 insertions(+), 8 deletions(-) diff --git a/R/exp_shiny.R b/R/exp_shiny.R index b0cb9eb..7189356 100644 --- a/R/exp_shiny.R +++ b/R/exp_shiny.R @@ -120,6 +120,10 @@ #' @param theme The name of a theme passed to the `preset` argument of #' `bslib::bs_theme()`. Alternatively, a complete Bootstrap theme created using #' `bslib::bs_theme()`. +#' @param col_exposure Name of the column in `dat` containing exposures. This +#' input is only used to clarify the exposure basis when `dat` is a +#' `split_exposed_df` object. For more information on split exposures, see +#' [expose_split()]. #' #' @inheritParams exp_stats #' @@ -151,13 +155,20 @@ exp_shiny <- function(dat, credibility = TRUE, conf_level = 0.95, cred_r = 0.05, - theme = "shiny") { + theme = "shiny", + col_exposure = "exposure") { rlang::check_installed("shiny") rlang::check_installed("bslib") rlang::check_installed("thematic") verify_exposed_df(dat) + check_split_expose_basis(dat, col_exposure) + if (inherits(dat, "split_exposed_df")) { + dat <- rename(dat, + exposure = {{col_exposure}}) + } + # check for presence of transactions all_trx_types <- verify_get_trx_types(dat, required = FALSE) has_trx <- !is.null(all_trx_types) diff --git a/R/exp_stats.R b/R/exp_stats.R index 4e90ac5..363ddfb 100644 --- a/R/exp_stats.R +++ b/R/exp_stats.R @@ -131,7 +131,7 @@ exp_stats <- function(.data, target_status = attr(.data, "target_status"), rlang::abort(c(x = glue::glue("Only 1 column can be passed to `wt`. You supplied {length(wt)} values."))) } - check_warn_split_expose(.data, col_exposure) + check_split_expose_basis(.data, col_exposure) res <- .data |> rename(exposure = {{col_exposure}}, diff --git a/R/expose_split.R b/R/expose_split.R index 2db0892..fa12437 100644 --- a/R/expose_split.R +++ b/R/expose_split.R @@ -121,11 +121,13 @@ expose_split <- function(dat) { } -# This internal function sends a warning if a `split_exposed_df` is passed +# This internal function sends an error if a `split_exposed_df` is passed # without clarifying which exposure basis should be used. -check_warn_split_expose <- function(dat, col_exposure) { - if (inherits(dat, "split_exposed_df") && col_exposure == "exposure") { - rlang::abort(c(x = "A `split_exposed_df` was passed without clarifying whether policy or calendar year exposures should be used to summarize results.", - i = 'Pass "exposure_pol" (for policy year exposures) or "exposure_cal" (for calendar year exposures) to the `col_exposure` argument.')) +check_split_expose_basis <- function(dat, col_exposure) { + if (inherits(dat, "split_exposed_df") && + !col_exposure %in% c("exposure_cal", "exposure_pol")) { + rlang::abort(c(x = "A `split_exposed_df` was passed without clarifying which exposure basis should be used to summarize results.", + i = 'Pass "exposure_pol" to `col_exposure` for policy year exposures.', + i = 'Pass "exposure_cal" to `col_exposure` for calendar year exposures.')) } } diff --git a/R/trx_stats.R b/R/trx_stats.R index 242d3b0..829f0e0 100644 --- a/R/trx_stats.R +++ b/R/trx_stats.R @@ -170,7 +170,7 @@ trx_stats <- function(.data, } } - check_warn_split_expose(.data, col_exposure) + check_split_expose_basis(.data, col_exposure) start_date <- attr(.data, "start_date") end_date <- attr(.data, "end_date") From 9691867de9284bd06f60fb1c6cb1d2087952b81f Mon Sep 17 00:00:00 2001 From: Matt Heaphy Date: Sun, 5 Nov 2023 21:05:03 -0500 Subject: [PATCH 11/25] cleaned up extraneous filter description appearing for numeric fields caused by floating point representation --- R/exp_shiny.R | 17 ++++++++++++++--- man/exp_shiny.Rd | 8 +++++++- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/R/exp_shiny.R b/R/exp_shiny.R index 7189356..3a7cebd 100644 --- a/R/exp_shiny.R +++ b/R/exp_shiny.R @@ -754,9 +754,20 @@ exp_shiny <- function(dat, shiny::need(input$play, "Paused") ) - keep <- purrr::imap_lgl(preds$predictors, - ~ length(setdiff(preds$scope[[.y]], - input[[paste0("i_", .x)]])) > 0) + keep <- preds |> select(predictors, scope, is_number) |> + purrr::pmap_lgl(\(predictors, scope, is_number) { + + selected <- input[[paste0("i_", predictors)]] + + if (is_number) { + !dplyr::near(scope[[1]], selected[[1]]) || + !dplyr::near(scope[[2]], selected[[2]]) + } else { + length(setdiff(scope, selected)) > 0 + } + + }) + preds$predictors[keep] }) diff --git a/man/exp_shiny.Rd b/man/exp_shiny.Rd index 74f6e82..bfabbc3 100644 --- a/man/exp_shiny.Rd +++ b/man/exp_shiny.Rd @@ -13,7 +13,8 @@ exp_shiny( credibility = TRUE, conf_level = 0.95, cred_r = 0.05, - theme = "shiny" + theme = "shiny", + col_exposure = "exposure" ) } \arguments{ @@ -44,6 +45,11 @@ method} \item{theme}{The name of a theme passed to the \code{preset} argument of \code{bslib::bs_theme()}. Alternatively, a complete Bootstrap theme created using \code{bslib::bs_theme()}.} + +\item{col_exposure}{Name of the column in \code{dat} containing exposures. This +input is only used to clarify the exposure basis when \code{dat} is a +\code{split_exposed_df} object. For more information on split exposures, see +\code{\link[=expose_split]{expose_split()}}.} } \value{ No return value. This function is called for the side effect of From 4e46e2a885c2b9f12041164cd28a6dcd1ea6ff7a Mon Sep 17 00:00:00 2001 From: Matt Heaphy Date: Sun, 5 Nov 2023 21:08:16 -0500 Subject: [PATCH 12/25] shiny - hide unused exposure basis --- R/exp_shiny.R | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/R/exp_shiny.R b/R/exp_shiny.R index 3a7cebd..6bc0cec 100644 --- a/R/exp_shiny.R +++ b/R/exp_shiny.R @@ -163,10 +163,18 @@ exp_shiny <- function(dat, rlang::check_installed("thematic") verify_exposed_df(dat) - check_split_expose_basis(dat, col_exposure) + + # special logic required for split exposed data frames if (inherits(dat, "split_exposed_df")) { + check_split_expose_basis(dat, col_exposure) dat <- rename(dat, exposure = {{col_exposure}}) + + if (col_exposure == "exposure_cal") { + dat$exposure_pol <- NULL + } else { + dat$exposure_cal <- NULL + } } # check for presence of transactions From c6d10f77c48fd94d61d77b0c50b965002a35331e Mon Sep 17 00:00:00 2001 From: Matt Heaphy Date: Sun, 5 Nov 2023 21:16:31 -0500 Subject: [PATCH 13/25] documentation cleanup --- R/expose.R | 3 +++ R/expose_split.R | 7 ++++--- R/globals.R | 3 ++- man/expose.Rd | 4 ++++ man/expose_split.Rd | 7 ++++--- 5 files changed, 17 insertions(+), 7 deletions(-) diff --git a/R/expose.R b/R/expose.R index 4bd73dd..b785003 100644 --- a/R/expose.R +++ b/R/expose.R @@ -75,6 +75,9 @@ #' #' census_dat |> expose_py("2019-12-31", target_status = "Surrender") #' +#' @seealso [expose_split()] for information on splitting calendar year +#' exposures by policy year. +#' #' @references Atkinson and McGarry (2016). Experience Study Calculations. #' #' diff --git a/R/expose_split.R b/R/expose_split.R index fa12437..b4ea2fe 100644 --- a/R/expose_split.R +++ b/R/expose_split.R @@ -1,8 +1,9 @@ #' Split calendar year exposures by policy year #' -#' @description Convert a calendar year exposed data frame into a split exposed -#' data frame that divides each calendar year into two pieces: a pre-anniversary -#' record and a post-anniversary record. +#' @description Convert a data frame of calendar year exposure-level records +#' into a split exposed data frame that divides each calendar year into two +#' pieces based on policy years: a pre-anniversary record and a post-anniversary +#' record. #' #' @details `dat` must be an `exposed_df` with calendar year exposure records. #' Calendar year exposures are created by the function [expose_cy()] (or diff --git a/R/globals.R b/R/globals.R index 6a83dc4..16bf57c 100644 --- a/R/globals.R +++ b/R/globals.R @@ -11,4 +11,5 @@ utils::globalVariables(c("issue_date", "term_date", "last_date", "trx_util_lower", "trx_util_upper", "sd_agg", "sd_all", "sd_trx", "trx_amt_sq", "n", "name", "ymax", "ymin", - "cal_yr", "anniv", "cal_yr_end", "h", "piece", "v")) + "cal_yr", "anniv", "cal_yr_end", "h", "piece", "v", + "scope")) diff --git a/man/expose.Rd b/man/expose.Rd index 04638b9..9f10b7f 100644 --- a/man/expose.Rd +++ b/man/expose.Rd @@ -139,3 +139,7 @@ census_dat |> expose_py("2019-12-31", target_status = "Surrender") Atkinson and McGarry (2016). Experience Study Calculations. \url{https://www.soa.org/49378a/globalassets/assets/files/research/experience-study-calculations.pdf} } +\seealso{ +\code{\link[=expose_split]{expose_split()}} for information on splitting calendar year +exposures by policy year. +} diff --git a/man/expose_split.Rd b/man/expose_split.Rd index 08de3ed..00c92d7 100644 --- a/man/expose_split.Rd +++ b/man/expose_split.Rd @@ -21,9 +21,10 @@ records) and "post_anniv" (post-anniversary records) } } \description{ -Convert a calendar year exposed data frame into a split exposed -data frame that divides each calendar year into two pieces: a pre-anniversary -record and a post-anniversary record. +Convert a data frame of calendar year exposure-level records +into a split exposed data frame that divides each calendar year into two +pieces based on policy years: a pre-anniversary record and a post-anniversary +record. } \details{ \code{dat} must be an \code{exposed_df} with calendar year exposure records. From a2d61d8847a05d57b5a9ae65c9e171eb28d74664 Mon Sep 17 00:00:00 2001 From: Matt Heaphy Date: Sat, 11 Nov 2023 21:40:15 -0500 Subject: [PATCH 14/25] New autotable arguments for auto-scaling amount columns and specifying the number of decimals displayed. --- R/tables.R | 18 ++++++++++++++++-- man/autotable.Rd | 15 +++++++++++++++ 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/R/tables.R b/R/tables.R index 24536a4..1e703c7 100644 --- a/R/tables.R +++ b/R/tables.R @@ -15,6 +15,15 @@ #' [trx_stats()] function. #' @param fontsize Font size percentage multiplier. #' @param decimals Number of decimals to display for percentages +#' @param decimals_amt Number of decimals to display for amount columns (number +#' of claims, claim amounts, exposures, transaction counts, total transactions, +#' and average transactions) +#' @param suffix_amt This argument has the same meaning as the `suffixing` +#' argument in [gt::fmt_number()] for amount columns. If `FALSE` (the default), +#' no scaling or suffixing are applied to amount columns. If `TRUE`, all amount +#' columns are automatically scaled and suffixed by "K" (thousands), "M" +#' (millions), "B" (billions), or "T" (trillions). See [gt::fmt_number()] for +#' more information. #' @param colorful If `TRUE`, color will be added to the the observed #' termination rate and actual-to-expected columns for termination studies, and #' the utilization rate and "percentage of" columns for transaction studies. @@ -80,6 +89,8 @@ autotable.exp_df <- function(object, fontsize = 100, decimals = 1, rename_cols = rlang::list2(...), show_conf_int = FALSE, show_cred_adj = FALSE, + decimals_amt = 0, + suffix_amt = FALSE, ...) { rlang::check_installed("RColorBrewer") @@ -110,7 +121,8 @@ autotable.exp_df <- function(object, fontsize = 100, decimals = 1, tab <- object |> select(-dplyr::starts_with(".weight")) |> gt::gt(...) |> - gt::fmt_number(c(claims, exposure), decimals = 0) |> + gt::fmt_number(c(n_claims, claims, exposure), + decimals = decimals_amt, suffixing = suffix_amt) |> gt::fmt_percent(c(q_obs, dplyr::ends_with("_lower"), dplyr::ends_with("_upper"), @@ -211,6 +223,8 @@ autotable.trx_df <- function(object, fontsize = 100, decimals = 1, color_pct_of = "RColorBrewer::RdBu", rename_cols = rlang::list2(...), show_conf_int = FALSE, + decimals_amt = 0, + suffix_amt = FALSE, ...) { rlang::check_installed("RColorBrewer") @@ -240,7 +254,7 @@ autotable.trx_df <- function(object, fontsize = 100, decimals = 1, arrange(trx_type) |> gt::gt(groupname_col = "trx_type") |> gt::fmt_number(c(trx_n, trx_amt, trx_flag, avg_trx, avg_all), - decimals = 0) |> + decimals = decimals_amt, suffixing = suffix_amt) |> gt::fmt_number(trx_freq, decimals = 1) |> gt::fmt_percent(c(dplyr::starts_with("trx_util"), dplyr::starts_with("pct_of_")), diff --git a/man/autotable.Rd b/man/autotable.Rd index ecc2ba3..5ae1c02 100644 --- a/man/autotable.Rd +++ b/man/autotable.Rd @@ -18,6 +18,8 @@ autotable(object, ...) rename_cols = rlang::list2(...), show_conf_int = FALSE, show_cred_adj = FALSE, + decimals_amt = 0, + suffix_amt = FALSE, ... ) @@ -30,6 +32,8 @@ autotable(object, ...) color_pct_of = "RColorBrewer::RdBu", rename_cols = rlang::list2(...), show_conf_int = FALSE, + decimals_amt = 0, + suffix_amt = FALSE, ... ) } @@ -64,6 +68,17 @@ assuming they are available on \code{object}.} \item{show_cred_adj}{If \code{TRUE} credibility-weighted termination rates will be displayed assuming they are available on \code{object}.} +\item{decimals_amt}{Number of decimals to display for amount columns (number +of claims, claim amounts, exposures, transaction counts, total transactions, +and average transactions)} + +\item{suffix_amt}{This argument has the same meaning as the \code{suffixing} +argument in \code{\link[gt:fmt_number]{gt::fmt_number()}} for amount columns. If \code{FALSE} (the default), +no scaling or suffixing are applied to amount columns. If \code{TRUE}, all amount +columns are automatically scaled and suffixed by "K" (thousands), "M" +(millions), "B" (billions), or "T" (trillions). See \code{\link[gt:fmt_number]{gt::fmt_number()}} for +more information.} + \item{color_util}{Color palette used for utilization rates.} \item{color_pct_of}{Color palette used for "percentage of" columns.} From aa5bea9681b97c374940a4961c33be58b42514d3 Mon Sep 17 00:00:00 2001 From: Matt Heaphy Date: Sat, 11 Nov 2023 21:43:39 -0500 Subject: [PATCH 15/25] bug fix for standard deviation of claims in weighted exp_stats call --- R/exp_stats.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/exp_stats.R b/R/exp_stats.R index 363ddfb..5b7e6d7 100644 --- a/R/exp_stats.R +++ b/R/exp_stats.R @@ -282,7 +282,7 @@ finish_exp_stats <- function(.data, target_status, expected, ci <- rlang::exprs( # For binomial N # Var(S) = n * p * (Var(X) + E(X)^2 * (1 - p)) - sd_agg = (claims * ( + sd_agg = (n_claims * ( (ex2_wt - ex_wt ^ 2) + ex_wt ^ 2 * (1 - q_obs))) ^ 0.5, q_obs_lower = stats::qnorm(p[[1]], claims, sd_agg) / exposure, q_obs_upper = stats::qnorm(p[[2]], claims, sd_agg) / exposure From 1ba4f31a9ba35d8d44348add9cfa1d7620fb568c Mon Sep 17 00:00:00 2001 From: Matt Heaphy Date: Thu, 16 Nov 2023 08:37:18 -0500 Subject: [PATCH 16/25] moved expo_step out of expose for use in other functions --- R/expose.R | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/R/expose.R b/R/expose.R index b785003..554c433 100644 --- a/R/expose.R +++ b/R/expose.R @@ -108,12 +108,7 @@ expose <- function(.data, # set up exposure period lengths expo_length <- rlang::arg_match(expo_length) - expo_step <- switch(expo_length, - "year" = lubridate::years(1), - "quarter" = months(3), - "month" = months(1), - "week" = lubridate::days(7)) - + expo_step <- expo_step(expo_length) cal_frac <- cal_frac(expo_length) # column renames and name conflicts @@ -331,3 +326,12 @@ most_common <- function(x) { y <- table(x) |> sort(decreasing = TRUE) |> names() factor(y[[1]], levels(x)) } + +# helper function for determining exposure step lengths +expo_step <- function(x) { + switch(x, + "year" = lubridate::years(1), + "quarter" = months(3), + "month" = months(1), + "week" = lubridate::days(7)) +} From 2b33c93a8b017d9ebe8d1b081d4f844082c59333 Mon Sep 17 00:00:00 2001 From: Matt Heaphy Date: Thu, 16 Nov 2023 08:41:05 -0500 Subject: [PATCH 17/25] generalized expose_split() to work with all calendar exposure types --- R/expose_split.R | 129 ++++++++++++++++++++++------------- man/expose_split.Rd | 29 +++++--- tests/testthat/test-expose.R | 2 +- 3 files changed, 100 insertions(+), 60 deletions(-) diff --git a/R/expose_split.R b/R/expose_split.R index b4ea2fe..cd99995 100644 --- a/R/expose_split.R +++ b/R/expose_split.R @@ -1,15 +1,24 @@ -#' Split calendar year exposures by policy year +#' Split calendar exposures by policy year #' -#' @description Convert a data frame of calendar year exposure-level records -#' into a split exposed data frame that divides each calendar year into two -#' pieces based on policy years: a pre-anniversary record and a post-anniversary -#' record. +#' @description Split calendar period exposures that cross a policy anniversary +#' into a pre-anniversary record and a post-anniversary record. #' -#' @details `dat` must be an `exposed_df` with calendar year exposure records. -#' Calendar year exposures are created by the function [expose_cy()] (or -#' [expose()] when `expo_length = "year"` and `cal_expo = TRUE`). +#' After splitting the data, the resulting data frame will contain both calendar +#' exposures and policy year exposures. These columns will be named +#' `exposure_cal` and `exposure_pol`, respectively. Calendar exposures will be +#' in the original units passed to `expose_split()`. Policy exposures will +#' always be expressed in years. #' -#' @param dat An `exposed_df` object with calendar year exposures. +#' After splitting exposures, downstream functions like `exp_stats()` and +#' `exp_shiny()` will require clarification as to which exposure basis should +#' be used to summarize results. +#' +#' @details `dat` must be an `exposed_df` with calendar year, quarter, month, +#' or week exposure records. Calendar year exposures are created by the +#' functions [expose_cy()], [expose_cq()], [expose_cm()], or [expose_cw()], (or +#' [expose()] when `cal_expo = TRUE`). +#' +#' @param dat An `exposed_df` object with calendar period exposures. #' #' @return A tibble with class `split_exposed_df`, `exposed_df`, `tbl_df`, #' `tbl`, and `data.frame`. The results include all columns in `dat` except that @@ -17,8 +26,6 @@ #' #' - `exposure_pol` - policy year exposures #' - `pol_yr` - policy year -#' - `piece` - a factor containing 2 levels: "pre_anniv" (pre-anniversary -#' records) and "post_anniv" (post-anniversary records) #' #' @examples #' toy_census |> expose_cy("2022-12-31") |> expose_split() @@ -29,9 +36,11 @@ expose_split <- function(dat) { verify_exposed_df(dat) - if (attr(dat, "exposure_type") != "calendar_year") { - rlang::abort(c(x = "`dat` must contain calendar year exposures.", - i = "Hint: Try passing an `exposed_df` object that was created by `expose_cy()`.")) + expo_type <- strsplit(attr(dat, "exposure_type"), "_")[[1]] + + if (expo_type[[1]] != "calendar") { + rlang::abort(c(x = "`dat` must contain calendar exposures.", + i = "Hint: Try passing an `exposed_df` object that was created by `expose_cy()`, `expose_cq()`, `expose_cm()`, or `expose_cw()`.")) } if (!is.null(attr(dat, "trx_types"))) { @@ -40,52 +49,68 @@ expose_split <- function(dat) { ) } - cal_frac <- cal_frac("year") target_status <- attr(dat, "target_status") default_status <- attr(dat, "default_status") + date_cols <- attr(dat, "date_cols") |> rlang::parse_exprs() + expo_length <- expo_type[[2]] + + pol_frac <- function(x, start, end, y) { + if (missing(y)) { + as.integer(x - start + 1) / as.integer(end - start + 1) + } else { + as.integer(x - y) / as.integer(end - start + 1) + } - pol_frac <- function(x, start, end) { - as.integer(x - start + 1) / as.integer(end - start + 1) } + cal_frac <- cal_frac(expo_length) # time fractions # h = yearfrac from boy to anniv # v = yearfrac from boy to term - dat <- dat |> mutate( - anniv = issue_date %m+% - (lubridate::years(1) * - (lubridate::year(cal_yr) - lubridate::year(issue_date))), - split = between(anniv, cal_yr, cal_yr_end), - h = cal_frac(anniv, 1), - v = cal_frac(term_date) - ) + dat <- dat |> + # temporary generic date column names + rename(cal_b = !!date_cols[[1]], + cal_e = !!date_cols[[2]]) |> + mutate( + anniv = issue_date %m+% + (lubridate::years(1) * + (lubridate::year(cal_b) - lubridate::year(issue_date))), + split = between(anniv, cal_b, cal_e), + h = cal_frac(anniv, 1), + v = cal_frac(term_date) + ) pre_anniv <- dat |> filter(split) |> mutate(piece = 1L, - cal_yr = pmax(issue_date, cal_yr), - cal_yr_end = anniv - 1, + cal_b = pmax(issue_date, cal_b), + cal_e = anniv - 1, exposure = h, - exposure_pol = 1 - pol_frac(cal_yr - 1L, + exposure_pol = 1 - pol_frac(cal_b - 1L, anniv %m-% lubridate::years(1), anniv - 1L) ) post_anniv <- dat |> mutate(piece = 2L, - cal_yr = anniv, - exposure = 1 - h, - exposure_pol = pol_frac(cal_yr_end, + cal_b = dplyr::if_else(split, anniv, cal_b), + exposure = dplyr::if_else(split, 1 - h, 1), + anniv = dplyr::if_else(anniv > cal_e, + anniv %m-% lubridate::years(1), + anniv), + exposure_pol = pol_frac(cal_e, anniv, - anniv %m+% lubridate::years(1) - 1L)) + anniv %m+% lubridate::years(1) - 1L, + cal_b - 1L) + ) dat <- dplyr::bind_rows(pre_anniv, post_anniv) |> - filter(cal_yr <= cal_yr_end, - is.na(term_date) | term_date >= cal_yr) |> - mutate(term_date = dplyr::if_else(between(term_date, cal_yr, cal_yr_end), + filter(cal_b <= cal_e, + is.na(term_date) | term_date >= cal_b) |> + mutate(term_date = dplyr::if_else(between(term_date, cal_b, cal_e), term_date, lubridate::NA_Date_), - pol_yr = lubridate::year(cal_yr) - lubridate::year(issue_date) + + pol_yr = lubridate::year(anniv) - lubridate::year(issue_date) + piece - 1L, status = dplyr::if_else(is.na(term_date), factor(default_status, @@ -93,30 +118,38 @@ expose_split <- function(dat) { status), claims = status %in% target_status, exposure_cal = dplyr::case_when( - claims ~ dplyr::if_else(piece == 1 | cal_yr == issue_date, + claims ~ dplyr::if_else(piece == 1 | cal_b == issue_date, 1, 1 - h), is.na(term_date) ~ exposure, piece == 1 ~ v, .default = v - h ), exposure_pol = dplyr::case_when( - claims ~ dplyr::if_else(piece == 2, 1, exposure_pol), + claims ~ dplyr::case_when( + piece == 1 ~ exposure_pol, + split ~ 1, + TRUE ~ 1 - pol_frac(cal_b - 1L, + anniv, + anniv %m+% lubridate::years(1) - 1L) + ), is.na(term_date) ~ exposure_pol, piece == 1 ~ pol_frac(term_date, anniv %m-% lubridate::years(1), anniv - 1L) - (1 - exposure_pol), - .default = pol_frac(term_date, - anniv, - anniv %m+% lubridate::years(1) - 1L) - ), - piece = dplyr::if_else(piece == 2, "post_anniv", "pre_anniv") |> - factor() + TRUE ~ pol_frac(term_date, + anniv, + anniv %m+% lubridate::years(1) - 1L) + ) ) |> - arrange(pol_num, cal_yr, piece) |> - select(-h, -v, -split, -anniv, -claims, -exposure) + arrange(pol_num, cal_b, piece) |> + select(-h, -v, -split, -anniv, -claims, -exposure, -piece) |> + # restore date column names + rename(!!date_cols[[1]] := cal_b, + !!date_cols[[2]] := cal_e) + # update exposure type and update class class(dat) <- c("split_exposed_df", class(dat)) - attr(dat, "exposure_type") <- "split_year" + attr(dat, "exposure_type") <- paste0("split_", expo_length) dat @@ -129,6 +162,6 @@ check_split_expose_basis <- function(dat, col_exposure) { !col_exposure %in% c("exposure_cal", "exposure_pol")) { rlang::abort(c(x = "A `split_exposed_df` was passed without clarifying which exposure basis should be used to summarize results.", i = 'Pass "exposure_pol" to `col_exposure` for policy year exposures.', - i = 'Pass "exposure_cal" to `col_exposure` for calendar year exposures.')) + i = 'Pass "exposure_cal" to `col_exposure` for calendar exposures.')) } } diff --git a/man/expose_split.Rd b/man/expose_split.Rd index 00c92d7..ed33066 100644 --- a/man/expose_split.Rd +++ b/man/expose_split.Rd @@ -2,12 +2,12 @@ % Please edit documentation in R/expose_split.R \name{expose_split} \alias{expose_split} -\title{Split calendar year exposures by policy year} +\title{Split calendar exposures by policy year} \usage{ expose_split(dat) } \arguments{ -\item{dat}{An \code{exposed_df} object with calendar year exposures.} +\item{dat}{An \code{exposed_df} object with calendar period exposures.} } \value{ A tibble with class \code{split_exposed_df}, \code{exposed_df}, \code{tbl_df}, @@ -16,20 +16,27 @@ A tibble with class \code{split_exposed_df}, \code{exposed_df}, \code{tbl_df}, \itemize{ \item \code{exposure_pol} - policy year exposures \item \code{pol_yr} - policy year -\item \code{piece} - a factor containing 2 levels: "pre_anniv" (pre-anniversary -records) and "post_anniv" (post-anniversary records) } } \description{ -Convert a data frame of calendar year exposure-level records -into a split exposed data frame that divides each calendar year into two -pieces based on policy years: a pre-anniversary record and a post-anniversary -record. +Split calendar period exposures that cross a policy anniversary +into a pre-anniversary record and a post-anniversary record. + +After splitting the data, the resulting data frame will contain both calendar +exposures and policy year exposures. These columns will be named +\code{exposure_cal} and \code{exposure_pol}, respectively. Calendar exposures will be +in the original units passed to \code{expose_split()}. Policy exposures will +always be expressed in years. + +After splitting exposures, downstream functions like \code{exp_stats()} and +\code{exp_shiny()} will require clarification as to which exposure basis should +be used to summarize results. } \details{ -\code{dat} must be an \code{exposed_df} with calendar year exposure records. -Calendar year exposures are created by the function \code{\link[=expose_cy]{expose_cy()}} (or -\code{\link[=expose]{expose()}} when \code{expo_length = "year"} and \code{cal_expo = TRUE}). +\code{dat} must be an \code{exposed_df} with calendar year, quarter, month, +or week exposure records. Calendar year exposures are created by the +functions \code{\link[=expose_cy]{expose_cy()}}, \code{\link[=expose_cq]{expose_cq()}}, \code{\link[=expose_cm]{expose_cm()}}, or \code{\link[=expose_cw]{expose_cw()}}, (or +\code{\link[=expose]{expose()}} when \code{cal_expo = TRUE}). } \examples{ toy_census |> expose_cy("2022-12-31") |> expose_split() diff --git a/tests/testthat/test-expose.R b/tests/testthat/test-expose.R index 989b727..1e9b4ac 100644 --- a/tests/testthat/test-expose.R +++ b/tests/testthat/test-expose.R @@ -110,7 +110,7 @@ test_that("Renaming and name conflict warnings work", { test_that("expose_split() fails when passed non-CY exposures", { expect_error(expose_split(1, regexp = "must be an `exposed_df`")) expect_error(expose_py(toy_census, "2022-12-31") |> expose_split(), - regexp = "must contain calendar year exposures") + regexp = "must contain calendar exposures") expect_no_error(expose_cy(toy_census, "2022-12-31") |> expose_split()) }) From 40b1458f3949d3ade11af5e0bf3d15a54d2f1829 Mon Sep 17 00:00:00 2001 From: Matt Heaphy Date: Sat, 18 Nov 2023 09:07:04 -0500 Subject: [PATCH 18/25] - Added cast and ptype methods for split_exposed_df. - Added is_exposed_df function. --- NAMESPACE | 17 ++++++ R/exp_shiny.R | 2 +- R/expose_split.R | 21 ++++++-- R/exposed_df_helpers.R | 120 ++++++++++++++++++++++++++++++++++++++--- man/expose_split.Rd | 15 ++++-- 5 files changed, 160 insertions(+), 15 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index fee3dff..4335fae 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -31,19 +31,35 @@ S3method(summary,trx_df) S3method(tidy,step_expose) S3method(ungroup,exposed_df) S3method(vec_cast,data.frame.exposed_df) +S3method(vec_cast,data.frame.split_exposed_df) S3method(vec_cast,exposed_df.data.frame) S3method(vec_cast,exposed_df.exposed_df) S3method(vec_cast,exposed_df.grouped_df) +S3method(vec_cast,exposed_df.split_exposed_df) S3method(vec_cast,exposed_df.tbl_df) S3method(vec_cast,grouped_df.exposed_df) +S3method(vec_cast,grouped_df.split_exposed_df) +S3method(vec_cast,split_exposed_df.data.frame) +S3method(vec_cast,split_exposed_df.exposed_df) +S3method(vec_cast,split_exposed_df.grouped_df) +S3method(vec_cast,split_exposed_df.tbl_df) S3method(vec_cast,tbl_df.exposed_df) +S3method(vec_cast,tbl_df.split_exposed_df) S3method(vec_ptype2,data.frame.exposed_df) +S3method(vec_ptype2,data.frame.split_exposed_df) S3method(vec_ptype2,exposed_df.data.frame) S3method(vec_ptype2,exposed_df.exposed_df) S3method(vec_ptype2,exposed_df.grouped_df) +S3method(vec_ptype2,exposed_df.split_exposed_df) S3method(vec_ptype2,exposed_df.tbl_df) S3method(vec_ptype2,grouped_df.exposed_df) +S3method(vec_ptype2,grouped_df.split_exposed_df) +S3method(vec_ptype2,split_exposed_df.data.frame) +S3method(vec_ptype2,split_exposed_df.exposed_df) +S3method(vec_ptype2,split_exposed_df.grouped_df) +S3method(vec_ptype2,split_exposed_df.tbl_df) S3method(vec_ptype2,tbl_df.exposed_df) +S3method(vec_ptype2,tbl_df.split_exposed_df) export(add_predictions) export(add_transactions) export(anti_join) @@ -70,6 +86,7 @@ export(group_by) export(groups) export(inner_join) export(is_exposed_df) +export(is_split_exposed_df) export(left_join) export(mutate) export(plot_actual_to_expected) diff --git a/R/exp_shiny.R b/R/exp_shiny.R index 6bc0cec..84a2353 100644 --- a/R/exp_shiny.R +++ b/R/exp_shiny.R @@ -165,7 +165,7 @@ exp_shiny <- function(dat, verify_exposed_df(dat) # special logic required for split exposed data frames - if (inherits(dat, "split_exposed_df")) { + if (is_split_exposed_df(dat)) { check_split_expose_basis(dat, col_exposure) dat <- rename(dat, exposure = {{col_exposure}}) diff --git a/R/expose_split.R b/R/expose_split.R index cd99995..b4a3227 100644 --- a/R/expose_split.R +++ b/R/expose_split.R @@ -13,6 +13,9 @@ #' `exp_shiny()` will require clarification as to which exposure basis should #' be used to summarize results. #' +#' `is_split_exposed_df()` will return `TRUE` if `x` is a `split_exposed_df` +#' object. +#' #' @details `dat` must be an `exposed_df` with calendar year, quarter, month, #' or week exposure records. Calendar year exposures are created by the #' functions [expose_cy()], [expose_cq()], [expose_cm()], or [expose_cw()], (or @@ -20,13 +23,16 @@ #' #' @param dat An `exposed_df` object with calendar period exposures. #' -#' @return A tibble with class `split_exposed_df`, `exposed_df`, `tbl_df`, -#' `tbl`, and `data.frame`. The results include all columns in `dat` except that -#' `exposure` has been renamed to `exposure_cal`. Additional columns include: +#' @return For `expose_split()`, a tibble with class `split_exposed_df`, +#' `exposed_df`, `tbl_df`, `tbl`, and `data.frame`. The results include all +#' columns in `dat` except that `exposure` has been renamed to `exposure_cal`. +#' Additional columns include: #' #' - `exposure_pol` - policy year exposures #' - `pol_yr` - policy year #' +#' For `is_split_exposed_df()`, a length-1 logical vector. +#' #' @examples #' toy_census |> expose_cy("2022-12-31") |> expose_split() #' @@ -155,10 +161,17 @@ expose_split <- function(dat) { } +#' @rdname expose_split +#' @export +is_split_exposed_df <- function(x) { + inherits(x, "split_exposed_df") +} + + # This internal function sends an error if a `split_exposed_df` is passed # without clarifying which exposure basis should be used. check_split_expose_basis <- function(dat, col_exposure) { - if (inherits(dat, "split_exposed_df") && + if (is_split_exposed_df(dat) && !col_exposure %in% c("exposure_cal", "exposure_pol")) { rlang::abort(c(x = "A `split_exposed_df` was passed without clarifying which exposure basis should be used to summarize results.", i = 'Pass "exposure_pol" to `col_exposure` for policy year exposures.', diff --git a/R/exposed_df_helpers.R b/R/exposed_df_helpers.R index 70d505e..67d5967 100644 --- a/R/exposed_df_helpers.R +++ b/R/exposed_df_helpers.R @@ -142,16 +142,25 @@ as_exposed_df <- function(x, end_date, start_date = as.Date("1900-01-01"), # low-level class constructor new_exposed_df <- function(x, end_date, start_date, target_status, cal_expo, expo_length, trx_types = NULL, - default_status) { + default_status, split = FALSE) { date_cols <- make_date_col_names(cal_expo, expo_length) + exposure_type <- if (cal_expo) { + if (split) { + "split" + } else { + "calendar" + } + } else { + "policy" + } tibble::new_tibble(x, class = "exposed_df", target_status = target_status, - exposure_type = glue::glue("{if (cal_expo) 'calendar' else 'policy'}_{expo_length}") |> - as.character(), + exposure_type = paste(exposure_type, expo_length, + sep = "_"), start_date = start_date, end_date = end_date, date_cols = date_cols, @@ -333,12 +342,13 @@ exposed_df_ptype2 <- function(x, y, ..., x_arg = "", y_arg = "") { expo_type <- attr(x, "exposure_type") %||% attr(y, "exposure_type") split_type <- strsplit(expo_type, "_")[[1]] - cal_expo <- split_type[[1]] == "calendar" + cal_expo <- split_type[[1]] %in% c("calendar", "split") expo_length <- split_type[[2]] default_status <- attr(x, "default_status") new_exposed_df(out, end_date, start_date, target_status, cal_expo, - expo_length, trx_types, default_status) + expo_length, trx_types, default_status, + is_split_exposed_df(x)) } @@ -363,14 +373,16 @@ exposed_df_cast <- function(x, to, ..., x_arg = "", to_arg = "") { expo_type <- attr(to, "exposure_type") split_type <- strsplit(expo_type, "_")[[1]] - cal_expo <- split_type[[1]] == "calendar" + cal_expo <- split_type[[1]] %in% c("calendar", "split") expo_length <- split_type[[2]] default_status <- attr(to, "default_status") new_exposed_df(out, end_date, start_date, target_status, cal_expo, - expo_length, trx_types, default_status) + expo_length, trx_types, default_status, + is_split_exposed_df(x)) } + # exposed_df | exposed_df #' @export vec_ptype2.exposed_df.exposed_df <- function(x, y, ...) { @@ -382,6 +394,29 @@ vec_cast.exposed_df.exposed_df <- function(x, to, ...) { exposed_df_cast(x, to, ...) } + +# exposed_df | split_exposed_df +#' @export +vec_ptype2.split_exposed_df.exposed_df <- function(x, y, ...) { + exposed_df_ptype2(x, y, ...) +} + +#' @export +vec_cast.split_exposed_df.exposed_df <- function(x, to, ...) { + exposed_df_cast(x, to, ...) +} + +#' @export +vec_ptype2.exposed_df.split_exposed_df <- function(x, y, ...) { + exposed_df_ptype2(x, y, ...) +} + +#' @export +vec_cast.exposed_df.split_exposed_df <- function(x, to, ...) { + exposed_df_cast(x, to, ...) +} + + # exposed_df | tbl_df #' @export vec_ptype2.exposed_df.tbl_df <- function(x, y, ...) { @@ -403,6 +438,28 @@ vec_cast.tbl_df.exposed_df <- function(x, to, ...) { vctrs::tib_cast(x, to, ...) } +# split_exposed_df | tbl_df +#' @export +vec_ptype2.split_exposed_df.tbl_df <- function(x, y, ...) { + exposed_df_ptype2(x, y, ...) +} + +#' @export +vec_ptype2.tbl_df.split_exposed_df <- function(x, y, ...) { + exposed_df_ptype2(x, y, ...) +} + +#' @export +vec_cast.split_exposed_df.tbl_df <- function(x, to, ...) { + exposed_df_cast(x, to, ...) +} + +#' @export +vec_cast.tbl_df.split_exposed_df <- function(x, to, ...) { + vctrs::tib_cast(x, to, ...) +} + + # exposed_df | data.frame #' @export vec_ptype2.exposed_df.data.frame <- function(x, y, ...) { @@ -424,6 +481,27 @@ vec_cast.data.frame.exposed_df <- function(x, to, ...) { vctrs::df_cast(x, to, ...) } +# split_exposed_df | data.frame +#' @export +vec_ptype2.split_exposed_df.data.frame <- function(x, y, ...) { + exposed_df_ptype2(x, y, ...) +} + +#' @export +vec_ptype2.data.frame.split_exposed_df <- function(x, y, ...) { + exposed_df_ptype2(x, y, ...) +} + +#' @export +vec_cast.split_exposed_df.data.frame <- function(x, to, ...) { + exposed_df_cast(x, to, ...) +} + +#' @export +vec_cast.data.frame.split_exposed_df <- function(x, to, ...) { + vctrs::df_cast(x, to, ...) +} + # exposed_df | grouped_df #' @export @@ -453,6 +531,34 @@ vec_cast.grouped_df.exposed_df <- function(x, to, ...) { } +# split_exposed_df | grouped_df +#' @export +vec_ptype2.split_exposed_df.grouped_df <- function(x, y, ...) { + g <- union(dplyr::group_vars(x), dplyr::group_vars(y)) + exposed_df_ptype2(x, y, ...) |> + group_by(dplyr::across(dplyr::all_of(g))) +} + +#' @export +vec_ptype2.grouped_df.split_exposed_df <- function(x, y, ...) { + g <- union(dplyr::group_vars(x), dplyr::group_vars(y)) + exposed_df_ptype2(x, y, ...) |> + group_by(dplyr::across(dplyr::all_of(g))) +} + +#' @export +vec_cast.split_exposed_df.grouped_df <- function(x, to, ...) { + exposed_df_cast(x, to, ...) |> + group_by(!!!groups(to)) +} + +#' @export +vec_cast.grouped_df.split_exposed_df <- function(x, to, ...) { + vctrs::tib_cast(x, to, ...) |> + group_by(!!!groups(to)) +} + + # helper for determining date columns make_date_col_names <- function(cal_expo, expo_length) { abbrev <- abbr_period(expo_length) diff --git a/man/expose_split.Rd b/man/expose_split.Rd index ed33066..500feff 100644 --- a/man/expose_split.Rd +++ b/man/expose_split.Rd @@ -2,21 +2,27 @@ % Please edit documentation in R/expose_split.R \name{expose_split} \alias{expose_split} +\alias{is_split_exposed_df} \title{Split calendar exposures by policy year} \usage{ expose_split(dat) + +is_split_exposed_df(x) } \arguments{ \item{dat}{An \code{exposed_df} object with calendar period exposures.} } \value{ -A tibble with class \code{split_exposed_df}, \code{exposed_df}, \code{tbl_df}, -\code{tbl}, and \code{data.frame}. The results include all columns in \code{dat} except that -\code{exposure} has been renamed to \code{exposure_cal}. Additional columns include: +For \code{expose_split()}, a tibble with class \code{split_exposed_df}, +\code{exposed_df}, \code{tbl_df}, \code{tbl}, and \code{data.frame}. The results include all +columns in \code{dat} except that \code{exposure} has been renamed to \code{exposure_cal}. +Additional columns include: \itemize{ \item \code{exposure_pol} - policy year exposures \item \code{pol_yr} - policy year } + +For \code{is_split_exposed_df()}, a length-1 logical vector. } \description{ Split calendar period exposures that cross a policy anniversary @@ -31,6 +37,9 @@ always be expressed in years. After splitting exposures, downstream functions like \code{exp_stats()} and \code{exp_shiny()} will require clarification as to which exposure basis should be used to summarize results. + +\code{is_split_exposed_df()} will return \code{TRUE} if \code{x} is a \code{split_exposed_df} +object. } \details{ \code{dat} must be an \code{exposed_df} with calendar year, quarter, month, From c09f02f6fc35cec9222139ad325690084eb84d97 Mon Sep 17 00:00:00 2001 From: Matt Heaphy Date: Sat, 18 Nov 2023 10:16:40 -0500 Subject: [PATCH 19/25] typing tests for split_exposed_df objects --- NAMESPACE | 2 + R/exposed_df_helpers.R | 28 +++++- tests/testthat/test-expose.R | 2 +- tests/testthat/test-exposed_df_helpers.R | 109 +++++++++++++++++++++-- 4 files changed, 132 insertions(+), 9 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 4335fae..6c256e4 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -42,6 +42,7 @@ S3method(vec_cast,grouped_df.split_exposed_df) S3method(vec_cast,split_exposed_df.data.frame) S3method(vec_cast,split_exposed_df.exposed_df) S3method(vec_cast,split_exposed_df.grouped_df) +S3method(vec_cast,split_exposed_df.split_exposed_df) S3method(vec_cast,split_exposed_df.tbl_df) S3method(vec_cast,tbl_df.exposed_df) S3method(vec_cast,tbl_df.split_exposed_df) @@ -57,6 +58,7 @@ S3method(vec_ptype2,grouped_df.split_exposed_df) S3method(vec_ptype2,split_exposed_df.data.frame) S3method(vec_ptype2,split_exposed_df.exposed_df) S3method(vec_ptype2,split_exposed_df.grouped_df) +S3method(vec_ptype2,split_exposed_df.split_exposed_df) S3method(vec_ptype2,split_exposed_df.tbl_df) S3method(vec_ptype2,tbl_df.exposed_df) S3method(vec_ptype2,tbl_df.split_exposed_df) diff --git a/R/exposed_df_helpers.R b/R/exposed_df_helpers.R index 67d5967..dfce5a9 100644 --- a/R/exposed_df_helpers.R +++ b/R/exposed_df_helpers.R @@ -156,8 +156,14 @@ new_exposed_df <- function(x, end_date, start_date, target_status, "policy" } + new_class <- if (exposure_type == "split") { + c("split_exposed_df", "exposed_df") + } else { + "exposed_df" + } + tibble::new_tibble(x, - class = "exposed_df", + class = new_class, target_status = target_status, exposure_type = paste(exposure_type, expo_length, sep = "_"), @@ -189,7 +195,12 @@ print.exposed_df <- function(x, ...) { #' @export group_by.exposed_df <- function(.data, ..., .add, .drop) { x <- NextMethod() - class(x) <- c("exposed_df", class(x)) + if (is_split_exposed_df(.data)) { + class(x) <- c("split_exposed_df", "exposed_df", class(x)) + } else { + class(x) <- c("exposed_df", class(x)) + } + x } @@ -379,7 +390,7 @@ exposed_df_cast <- function(x, to, ..., x_arg = "", to_arg = "") { new_exposed_df(out, end_date, start_date, target_status, cal_expo, expo_length, trx_types, default_status, - is_split_exposed_df(x)) + is_split_exposed_df(to)) } @@ -394,6 +405,17 @@ vec_cast.exposed_df.exposed_df <- function(x, to, ...) { exposed_df_cast(x, to, ...) } +# split_exposed_df | split_exposed_df +#' @export +vec_ptype2.split_exposed_df.split_exposed_df <- function(x, y, ...) { + exposed_df_ptype2(x, y, ...) +} + +#' @export +vec_cast.split_exposed_df.split_exposed_df <- function(x, to, ...) { + exposed_df_cast(x, to, ...) +} + # exposed_df | split_exposed_df #' @export diff --git a/tests/testthat/test-expose.R b/tests/testthat/test-expose.R index 1e9b4ac..8cc3a3a 100644 --- a/tests/testthat/test-expose.R +++ b/tests/testthat/test-expose.R @@ -107,7 +107,7 @@ test_that("Renaming and name conflict warnings work", { # split exposure tests -test_that("expose_split() fails when passed non-CY exposures", { +test_that("expose_split() fails when passed non-calendar exposures", { expect_error(expose_split(1, regexp = "must be an `exposed_df`")) expect_error(expose_py(toy_census, "2022-12-31") |> expose_split(), regexp = "must contain calendar exposures") diff --git a/tests/testthat/test-exposed_df_helpers.R b/tests/testthat/test-exposed_df_helpers.R index 10ab3d7..cbfa49d 100644 --- a/tests/testthat/test-exposed_df_helpers.R +++ b/tests/testthat/test-exposed_df_helpers.R @@ -15,10 +15,10 @@ test_that("as_exposed_df works", { rename(pnum = pol_num) expo5 <- expo4 |> rename(pstat = status, - expo = exposure, - py = pol_yr, - start = pol_date_yr, - end = pol_date_yr_end) + expo = exposure, + py = pol_yr, + start = pol_date_yr, + end = pol_date_yr_end) expect_error(as_exposed_df(data.frame(a = 1:3), Sys.Date())) @@ -57,7 +57,7 @@ test_that("as_exposed_df works with transactions", { expo7 <- expo6 |> rename(n_A = trx_n_A, n_B = trx_n_B, - amt_A = trx_amt_A, amt_B = trx_amt_B) + amt_A = trx_amt_A, amt_B = trx_amt_B) expect_error(as_exposed_df(expo7, "2022-12-31", trx_types = c("A", "B"))) expect_no_error(as_exposed_df(expo7, "2022-12-31", trx_types = c("A", "B"), col_trx_n_ = "n_", @@ -143,3 +143,102 @@ test_that("as_exposed_df default_status works", { expect_equal(attr(expo, "default_status"), "Active") }) + + +# split exposure tests ---------------------------------------------------- + +split <- expose_cy(toy_census, "2020-12-31", target_status = "Surrender") |> + expose_split() + +test_that("is_split_exposed_df works", { + expect_true(is_split_exposed_df(split)) + expect_false(is_split_exposed_df(mtcars)) +}) + +split2 <- as.data.frame(split) + +test_that("split_exposed_df class persists after grouping and ungrouping", { + expect_s3_class(split, "split_exposed_df") + expect_s3_class(split |> group_by(pol_num), "split_exposed_df") + expect_s3_class(split |> group_by(pol_num) |> ungroup(), "split_exposed_df") + expect_identical(split, ungroup(split)) +}) + +test_that("split_exposed_df casting and coercion works with tibble and data.frame", { + expect_s3_class(dplyr::bind_rows(split, data.frame(x = 1)), + "split_exposed_df") + expect_s3_class(dplyr::bind_rows(split, tibble::tibble(x = 1)), + "split_exposed_df") + + split8 <- dplyr::bind_rows(split, split) + expect_s3_class(split8, "split_exposed_df") + expect_identical(attr(split8, "target_status"), + attr(split8, "target_status") |> + unique()) + + split9 <- vctrs::vec_rbind( + split, + expose_cy(toy_census, "2022-12-31", start_date = "1890-01-01", + target_status = "Death") |> + expose_split()) + + expect_identical(attr(split9, "end_date"), as.Date("2022-12-31")) + expect_identical(attr(split9, "start_date"), as.Date("1890-01-01")) + expect_identical(attr(split9, "target_status"), c("Surrender", "Death")) + + split10 <- expose_cq(toy_census, "2020-12-31") |> expose_split() + expect_error(dplyr::bind_rows(split, split10)) + +}) + +test_that("split_exposed_df persists in a grouped and ungrouped context after using dplyr verbs", { + + # rename, relocate + grouped <- split |> mutate(x = ifelse(pol_num == 1, "A", "B")) |> group_by(x) + + expect_s3_class(grouped, "split_exposed_df") + expect_s3_class(ungroup(split), "split_exposed_df") + expect_s3_class(ungroup(grouped), "split_exposed_df") + expect_s3_class(filter(split, pol_num == 1), "split_exposed_df") + expect_s3_class(filter(grouped, pol_num == 1), "split_exposed_df") + expect_s3_class(mutate(split, z = 1), "split_exposed_df") + expect_s3_class(mutate(grouped, z = 1), "split_exposed_df") + expect_s3_class(select(split, pol_num), "split_exposed_df") + expect_s3_class(select(grouped, pol_num, x), "split_exposed_df") + expect_s3_class(slice(split, 1:2), "split_exposed_df") + expect_s3_class(slice(grouped, 1:2), "split_exposed_df") + expect_s3_class(arrange(split, pol_yr), "split_exposed_df") + expect_s3_class(arrange(grouped, pol_yr), "split_exposed_df") + expect_s3_class(rename(split, abc = pol_num), "split_exposed_df") + expect_s3_class(rename(grouped, abc = pol_num), "split_exposed_df") + expect_s3_class(relocate(split, pol_num, .after = status), "split_exposed_df") + expect_s3_class(relocate(grouped, pol_num, .after = status), + "split_exposed_df") + + join_frame <- data.frame(pol_num = 1, zzz = 2L) + expect_s3_class(left_join(split, join_frame, by = "pol_num"), + "split_exposed_df") + expect_s3_class(left_join(grouped, join_frame, by = "pol_num"), + "split_exposed_df") + expect_s3_class(right_join(split, join_frame, by = "pol_num"), + "split_exposed_df") + expect_s3_class(right_join(grouped, join_frame, by = "pol_num", + multiple = "all"), "split_exposed_df") + expect_s3_class(inner_join(split, join_frame, by = "pol_num"), + "split_exposed_df") + expect_s3_class(inner_join(grouped, join_frame, by = "pol_num"), + "split_exposed_df") + expect_s3_class(full_join(split, join_frame, by = "pol_num"), + "split_exposed_df") + expect_s3_class(full_join(grouped, join_frame, by = "pol_num"), + "split_exposed_df") + expect_s3_class(semi_join(split, join_frame, by = "pol_num"), + "split_exposed_df") + expect_s3_class(semi_join(grouped, join_frame, by = "pol_num"), + "split_exposed_df") + expect_s3_class(anti_join(split, join_frame, by = "pol_num"), + "split_exposed_df") + expect_s3_class(anti_join(grouped, join_frame, by = "pol_num"), + "split_exposed_df") + +}) From d8d5b4165a7718b633a36ffcefe384a7c1035038 Mon Sep 17 00:00:00 2001 From: Matt Heaphy Date: Sat, 18 Nov 2023 10:29:18 -0500 Subject: [PATCH 20/25] news update --- NEWS.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/NEWS.md b/NEWS.md index 1e58da9..9a7679e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,21 @@ # actxps (development version) +- actxps now supports split exposures that divide calendar periods crossing +policy anniversaries into pre-anniversary and post-anniversary records. The +function `expose_split()` can convert any `exposed_df` object with calendar +period exposures (yearly, quarterly, monthly, or weekly) into a +`split_exposed_df` object. Split exposure data frames contain columns for +exposures both on a calendar period and policy year basis. +- `exp_stats()` and `exp_shiny()` now require clarification as to which exposure +basis should be used when passed a `split_exposed_df` object. +- All `expose_df` objects now contains a `default_status` attribute. +- `autotable()` functions now contain the arguments `decimals_amt` and +`suffix_amt`. The former allows one to specify the number of decimals appearing +after amount columns. The latter is used to automatically scale large numbers +into by thousands, millions, billions, or trillions. +- Corrected an error in the calculation of the standard deviations of claims +when `exp_stats()` is passed a weighting variable. + # actxps 1.3.0 - A new `conf_int` argument was added to `exp_stats()` that creates confidence From d748d364ae7a0eefdebc3537e77f48e4e13af7a0 Mon Sep 17 00:00:00 2001 From: Matt Heaphy Date: Sat, 18 Nov 2023 10:58:39 -0500 Subject: [PATCH 21/25] Summary method for exposure data frames that calls exp_stats --- NAMESPACE | 1 + NEWS.md | 1 + R/exp_stats.R | 2 +- R/exposed_df_summary.R | 26 ++++++++++++++++++++++++++ man/exp_stats.Rd | 2 +- man/summary.exposed_df.Rd | 33 +++++++++++++++++++++++++++++++++ 6 files changed, 63 insertions(+), 2 deletions(-) create mode 100644 R/exposed_df_summary.R create mode 100644 man/summary.exposed_df.Rd diff --git a/NAMESPACE b/NAMESPACE index 6c256e4..c1cd702 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -27,6 +27,7 @@ S3method(select,exposed_df) S3method(semi_join,exposed_df) S3method(slice,exposed_df) S3method(summary,exp_df) +S3method(summary,exposed_df) S3method(summary,trx_df) S3method(tidy,step_expose) S3method(ungroup,exposed_df) diff --git a/NEWS.md b/NEWS.md index 9a7679e..991334c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -15,6 +15,7 @@ after amount columns. The latter is used to automatically scale large numbers into by thousands, millions, billions, or trillions. - Corrected an error in the calculation of the standard deviations of claims when `exp_stats()` is passed a weighting variable. +- Added a `summary()` method for `exposed_df` objects that calls `exp_stats()`. # actxps 1.3.0 diff --git a/R/exp_stats.R b/R/exp_stats.R index 5b7e6d7..6256587 100644 --- a/R/exp_stats.R +++ b/R/exp_stats.R @@ -94,7 +94,7 @@ #' (`.weight_qs`), and the number of records (`.weight_n`). #' #' @examples -#' toy_census |> expose("2020-12-31", target_status = "Surrender") |> +#' toy_census |> expose("2022-12-31", target_status = "Surrender") |> #' exp_stats() #' #' exp_res <- census_dat |> diff --git a/R/exposed_df_summary.R b/R/exposed_df_summary.R new file mode 100644 index 0000000..075d792 --- /dev/null +++ b/R/exposed_df_summary.R @@ -0,0 +1,26 @@ +#' Summarize experience study records +#' +#' Create a summary data frame of termination experience for a given target +#' status. +#' +#' @details +#' Calling [summary()] on an `exposed_df` object will summarize results using +#' [exp_stats()]. See [exp_stats()] for more information. +#' +#' @param object A data frame with exposure-level records +#' +#' @param ... Additional arguments passed to [exp_stats()] +#' +#' @return A tibble with class `exp_df`, `tbl_df`, `tbl`, +#' and `data.frame`. +#' +#' @examples +#' toy_census |> expose("2022-12-31", target_status = "Surrender") |> +#' summary() +#' +#' @seealso [exp_stats()] +#' +#' @export +summary.exposed_df <- function(object, ...) { + exp_stats(object, ...) +} diff --git a/man/exp_stats.Rd b/man/exp_stats.Rd index 08a1f91..184ea05 100644 --- a/man/exp_stats.Rd +++ b/man/exp_stats.Rd @@ -130,7 +130,7 @@ data while retaining any grouping variables passed to the "dots" } \examples{ -toy_census |> expose("2020-12-31", target_status = "Surrender") |> +toy_census |> expose("2022-12-31", target_status = "Surrender") |> exp_stats() exp_res <- census_dat |> diff --git a/man/summary.exposed_df.Rd b/man/summary.exposed_df.Rd new file mode 100644 index 0000000..8fdbd21 --- /dev/null +++ b/man/summary.exposed_df.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/exposed_df_summary.R +\name{summary.exposed_df} +\alias{summary.exposed_df} +\title{Summarize experience study records} +\usage{ +\method{summary}{exposed_df}(object, ...) +} +\arguments{ +\item{object}{A data frame with exposure-level records} + +\item{...}{Additional arguments passed to \code{\link[=exp_stats]{exp_stats()}}} +} +\value{ +A tibble with class \code{exp_df}, \code{tbl_df}, \code{tbl}, +and \code{data.frame}. +} +\description{ +Create a summary data frame of termination experience for a given target +status. +} +\details{ +Calling \code{\link[=summary]{summary()}} on an \code{exposed_df} object will summarize results using +\code{\link[=exp_stats]{exp_stats()}}. See \code{\link[=exp_stats]{exp_stats()}} for more information. +} +\examples{ +toy_census |> expose("2022-12-31", target_status = "Surrender") |> + summary() + +} +\seealso{ +\code{\link[=exp_stats]{exp_stats()}} +} From b59e4fe3ec4d8675993d5bdae9beb8d037dce455 Mon Sep 17 00:00:00 2001 From: Matt Heaphy Date: Sat, 18 Nov 2023 11:00:43 -0500 Subject: [PATCH 22/25] Documentation fix --- R/expose_split.R | 1 + man/expose_split.Rd | 2 ++ 2 files changed, 3 insertions(+) diff --git a/R/expose_split.R b/R/expose_split.R index b4a3227..d35bc10 100644 --- a/R/expose_split.R +++ b/R/expose_split.R @@ -22,6 +22,7 @@ #' [expose()] when `cal_expo = TRUE`). #' #' @param dat An `exposed_df` object with calendar period exposures. +#' @param x Any object #' #' @return For `expose_split()`, a tibble with class `split_exposed_df`, #' `exposed_df`, `tbl_df`, `tbl`, and `data.frame`. The results include all diff --git a/man/expose_split.Rd b/man/expose_split.Rd index 500feff..c60f2c9 100644 --- a/man/expose_split.Rd +++ b/man/expose_split.Rd @@ -11,6 +11,8 @@ is_split_exposed_df(x) } \arguments{ \item{dat}{An \code{exposed_df} object with calendar period exposures.} + +\item{x}{Any object} } \value{ For \code{expose_split()}, a tibble with class \code{split_exposed_df}, From d27836e4688b601a0ee9a0f4a9140f06bc231943 Mon Sep 17 00:00:00 2001 From: Matt Heaphy Date: Sun, 19 Nov 2023 10:00:08 -0500 Subject: [PATCH 23/25] Argument rename --- R/expose_split.R | 41 +++++++++++++++++++++-------------------- man/expose_split.Rd | 8 ++++---- 2 files changed, 25 insertions(+), 24 deletions(-) diff --git a/R/expose_split.R b/R/expose_split.R index d35bc10..7892cc4 100644 --- a/R/expose_split.R +++ b/R/expose_split.R @@ -16,17 +16,17 @@ #' `is_split_exposed_df()` will return `TRUE` if `x` is a `split_exposed_df` #' object. #' -#' @details `dat` must be an `exposed_df` with calendar year, quarter, month, +#' @details `.data` must be an `exposed_df` with calendar year, quarter, month, #' or week exposure records. Calendar year exposures are created by the #' functions [expose_cy()], [expose_cq()], [expose_cm()], or [expose_cw()], (or #' [expose()] when `cal_expo = TRUE`). #' -#' @param dat An `exposed_df` object with calendar period exposures. +#' @param .data An `exposed_df` object with calendar period exposures. #' @param x Any object #' #' @return For `expose_split()`, a tibble with class `split_exposed_df`, #' `exposed_df`, `tbl_df`, `tbl`, and `data.frame`. The results include all -#' columns in `dat` except that `exposure` has been renamed to `exposure_cal`. +#' columns in `.data` except that `exposure` has been renamed to `exposure_cal`. #' Additional columns include: #' #' - `exposure_pol` - policy year exposures @@ -40,25 +40,25 @@ #' @seealso [expose()] #' #' @export -expose_split <- function(dat) { +expose_split <- function(.data) { - verify_exposed_df(dat) - expo_type <- strsplit(attr(dat, "exposure_type"), "_")[[1]] + verify_exposed_df(.data) + expo_type <- strsplit(attr(.data, "exposure_type"), "_")[[1]] if (expo_type[[1]] != "calendar") { - rlang::abort(c(x = "`dat` must contain calendar exposures.", + rlang::abort(c(x = "`.data` must contain calendar exposures.", i = "Hint: Try passing an `exposed_df` object that was created by `expose_cy()`, `expose_cq()`, `expose_cm()`, or `expose_cw()`.")) } - if (!is.null(attr(dat, "trx_types"))) { - rlang::warn(c("!" = "`dat` has transactions attached. This will lead to duplication of transactions after exposures are split.", + if (!is.null(attr(.data, "trx_types"))) { + rlang::warn(c("!" = "`.data` has transactions attached. This will lead to duplication of transactions after exposures are split.", "i" = "Try calling `add_transactions()` after calling `expose_split()` instead of beforehand.") ) } - target_status <- attr(dat, "target_status") - default_status <- attr(dat, "default_status") - date_cols <- attr(dat, "date_cols") |> rlang::parse_exprs() + target_status <- attr(.data, "target_status") + default_status <- attr(.data, "default_status") + date_cols <- attr(.data, "date_cols") |> rlang::parse_exprs() expo_length <- expo_type[[2]] pol_frac <- function(x, start, end, y) { @@ -75,7 +75,7 @@ expose_split <- function(dat) { # h = yearfrac from boy to anniv # v = yearfrac from boy to term - dat <- dat |> + .data <- .data |> # temporary generic date column names rename(cal_b = !!date_cols[[1]], cal_e = !!date_cols[[2]]) |> @@ -88,7 +88,7 @@ expose_split <- function(dat) { v = cal_frac(term_date) ) - pre_anniv <- dat |> + pre_anniv <- .data |> filter(split) |> mutate(piece = 1L, cal_b = pmax(issue_date, cal_b), @@ -99,7 +99,7 @@ expose_split <- function(dat) { anniv - 1L) ) - post_anniv <- dat |> + post_anniv <- .data |> mutate(piece = 2L, cal_b = dplyr::if_else(split, anniv, cal_b), exposure = dplyr::if_else(split, 1 - h, 1), @@ -112,7 +112,7 @@ expose_split <- function(dat) { cal_b - 1L) ) - dat <- dplyr::bind_rows(pre_anniv, post_anniv) |> + .data <- dplyr::bind_rows(pre_anniv, post_anniv) |> filter(cal_b <= cal_e, is.na(term_date) | term_date >= cal_b) |> mutate(term_date = dplyr::if_else(between(term_date, cal_b, cal_e), @@ -121,7 +121,7 @@ expose_split <- function(dat) { piece - 1L, status = dplyr::if_else(is.na(term_date), factor(default_status, - levels = levels(dat$status)), + levels = levels(.data$status)), status), claims = status %in% target_status, exposure_cal = dplyr::case_when( @@ -150,15 +150,16 @@ expose_split <- function(dat) { ) |> arrange(pol_num, cal_b, piece) |> select(-h, -v, -split, -anniv, -claims, -exposure, -piece) |> + relocate(pol_yr, .after = cal_e) |> # restore date column names rename(!!date_cols[[1]] := cal_b, !!date_cols[[2]] := cal_e) # update exposure type and update class - class(dat) <- c("split_exposed_df", class(dat)) - attr(dat, "exposure_type") <- paste0("split_", expo_length) + class(.data) <- c("split_exposed_df", class(.data)) + attr(.data, "exposure_type") <- paste0("split_", expo_length) - dat + .data } diff --git a/man/expose_split.Rd b/man/expose_split.Rd index c60f2c9..abafe87 100644 --- a/man/expose_split.Rd +++ b/man/expose_split.Rd @@ -5,19 +5,19 @@ \alias{is_split_exposed_df} \title{Split calendar exposures by policy year} \usage{ -expose_split(dat) +expose_split(.data) is_split_exposed_df(x) } \arguments{ -\item{dat}{An \code{exposed_df} object with calendar period exposures.} +\item{.data}{An \code{exposed_df} object with calendar period exposures.} \item{x}{Any object} } \value{ For \code{expose_split()}, a tibble with class \code{split_exposed_df}, \code{exposed_df}, \code{tbl_df}, \code{tbl}, and \code{data.frame}. The results include all -columns in \code{dat} except that \code{exposure} has been renamed to \code{exposure_cal}. +columns in \code{.data} except that \code{exposure} has been renamed to \code{exposure_cal}. Additional columns include: \itemize{ \item \code{exposure_pol} - policy year exposures @@ -44,7 +44,7 @@ be used to summarize results. object. } \details{ -\code{dat} must be an \code{exposed_df} with calendar year, quarter, month, +\code{.data} must be an \code{exposed_df} with calendar year, quarter, month, or week exposure records. Calendar year exposures are created by the functions \code{\link[=expose_cy]{expose_cy()}}, \code{\link[=expose_cq]{expose_cq()}}, \code{\link[=expose_cm]{expose_cm()}}, or \code{\link[=expose_cw]{expose_cw()}}, (or \code{\link[=expose]{expose()}} when \code{cal_expo = TRUE}). From 61afe39f9f4f3f3c088e95de034020e44c04cbda Mon Sep 17 00:00:00 2001 From: Matt Heaphy Date: Sun, 19 Nov 2023 10:06:00 -0500 Subject: [PATCH 24/25] Updated exposures vignette for expose_split --- vignettes/exposures.Rmd | 83 +++++++++++++++++++++++++++++++++++------ 1 file changed, 71 insertions(+), 12 deletions(-) diff --git a/vignettes/exposures.Rmd b/vignettes/exposures.Rmd index 7b7a020..6d53e0f 100644 --- a/vignettes/exposures.Rmd +++ b/vignettes/exposures.Rmd @@ -140,7 +140,7 @@ exposed_data2 |> ``` -## Other exposures periods +## Other exposure periods The default exposure basis used by `expose()` is policy years. Using the arguments `cal_expo` and `expo_length` other exposure periods can be used. @@ -152,9 +152,11 @@ Looking at the second policy, we can see that the first year is left-censored be ```{r expo-cal} -toy_census[2, ] |> +exposed_cal <- toy_census |> expose(end_date = "2022-12-31", cal_expo = TRUE, target_status = "Surrender") +exposed_cal |> filter(pol_num == 2) + ``` @@ -164,25 +166,82 @@ toy_census[2, ] |> The length of the exposure period can be decreased by passing `"quarter"`, `"month"`, or `"week"` to the `expo_length` argument. This can be used with policy or calendar-based exposures. ```{r expo-mth} -toy_census[2, ] |> +toy_census |> expose(end_date = "2022-12-31", cal_expo = TRUE, expo_length = "quarter", - target_status = "Surrender") + target_status = "Surrender") |> + filter(pol_num == 2) ``` ### Convenience functions The following functions are convenience wrappers around `expose()` that target a specific exposure type without specifying `cal_expo` and `expo_length`. -- `expose_py` = exposures by policy year -- `expose_pq` = exposures by policy quarter -- `expose_pm` = exposures by policy month -- `expose_pw` = exposures by policy week -- `expose_cy` = exposures by calendar year -- `expose_cq` = exposures by calendar quarter -- `expose_cm` = exposures by calendar month -- `expose_cw` = exposures by calendar week +- `expose_py()` = exposures by policy year +- `expose_pq()` = exposures by policy quarter +- `expose_pm()` = exposures by policy month +- `expose_pw()` = exposures by policy week +- `expose_cy()` = exposures by calendar year +- `expose_cq()` = exposures by calendar quarter +- `expose_cm()` = exposures by calendar month +- `expose_cw()` = exposures by calendar week + +## Split exposures by calendar period and policy year + +A common technique used in experience studies is to split calendar years into two records: a pre-anniversary record and a post-anniversary record. In actxps, this can be accomplished using the `expose_split()` function. + +Let's continue examining the second policy. `exposed_cal`, which contains calendar year exposures, is passed into `expose_split()`. The resulting data frame now contains 19 records instead of 10. There is one record for 2011 and 2 records for all other years. The year 2011 only has a single record because the policy was issued in this year, so there can only be a post-anniversary record. + +```{r expo-split} +split <- expose_split(exposed_cal) + +split |> filter(pol_num == 2) |> + select(cal_yr, cal_yr_end, pol_yr, exposure_pol, exposure_cal) +``` + +The output of `expose_split()` contains two exposure columns. + +- `exposure_pol` contains policy year exposures +- `exposure_cal` contains calendar year exposures + +The two exposure bases will often not match for two reasons: + +1. Calendar years and policy years have different start and end dates that may or may not include a leap day. In the first row, the calendar year exposure is 0.6 years of the year 2011, which does not include a leap day. In the second row, the policy year exposure is 0.5984 years of the policy year spanning 2011-05-27 to 2012-05-26, which does include a leap day. + +2. Application of the annual exposure method. If the termination event of interest appears on a post-anniversary record, policy exposures will be 1 and calendar exposures will be the fraction of the year spanning the anniversary to December 31st. Conversely, if the termination event of interest appears on a pre-anniversary record, calendar exposures will be 1 and policy exposures will be the fraction of the policy year from January 1st to the last day of the current policy year. While it may sound confusing at first, these rules are important to ensure that the termination event of interest always has an exposure of 1 when the data is grouped on a calendar year or policy year basis. + +Some downstream functions like `exp_stats()` expect `exposed_df` objects to have a single column for exposures. For split exposures, the exposure basis must be specified using the `col_exposure` argument. + +```{r, split-stats-unclear, eval = FALSE} + +exp_stats(split) +``` + + +```{r split-stats-unclear-cat, echo = FALSE} + +tryCatch(exp_stats(split), + error = function(e) cat(e$message)) +``` + + +```{r, split-stats-clear, eval = FALSE} + +exp_stats(split, col_exposure = "exposure_pol") +``` + +`expose_split()` doesn't just work with calendar year exposures. Calendar quarters, months, or weeks can also be split. For periods shorter than a year, a record is only split into pre- and post-anniversary segments if a policy anniversary appears in the middle of the period. + +```{r, split-qtr} + +expose_cq(toy_census, "2022-12-31", target_status = "Surrender") |> + expose_split() |> + filter(pol_num == 2) |> + select(cal_qtr, cal_qtr_end, pol_yr, exposure_pol, exposure_cal) +``` + +Note, however, that calendar period exposures will always be expressed in the original units and policy exposures will always be expressed in years. Above, calendar exposures are quarters whereas policy exposures are years. ## Tidymodels recipe step From 6a0830751178c98a5808b6c3a07631ce49c43689 Mon Sep 17 00:00:00 2001 From: Matt Heaphy Date: Sun, 19 Nov 2023 10:21:30 -0500 Subject: [PATCH 25/25] news update --- NEWS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NEWS.md b/NEWS.md index 991334c..3d1d0d0 100644 --- a/NEWS.md +++ b/NEWS.md @@ -16,6 +16,8 @@ into by thousands, millions, billions, or trillions. - Corrected an error in the calculation of the standard deviations of claims when `exp_stats()` is passed a weighting variable. - Added a `summary()` method for `exposed_df` objects that calls `exp_stats()`. +- The assumed default status in `expose()` functions was changed from the first +observed status to the most common status. # actxps 1.3.0