Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Split #41

Merged
merged 26 commits into from
Nov 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
6fb423c
dev version
mattheaphy Sep 17, 2023
022649a
Merge branch 'main' into split
mattheaphy Sep 17, 2023
624988c
Added default status property to expose_df objects
mattheaphy Oct 31, 2023
e81e2b7
namespace catch-up
mattheaphy Nov 3, 2023
1803c10
first draft of expose_split function
mattheaphy Nov 4, 2023
3a29db7
roxygen for expose_split
mattheaphy Nov 4, 2023
1854d64
split exposure tests
mattheaphy Nov 4, 2023
dabcaa2
warning for calling expose_split with transactions attached
mattheaphy Nov 4, 2023
c1d2b03
informative error when split exposures are passed to summary function…
mattheaphy Nov 4, 2023
214a9ac
additional split exposure tests
mattheaphy Nov 5, 2023
3c5d423
- split exposure treatment for shiny apps
mattheaphy Nov 6, 2023
9691867
cleaned up extraneous filter description appearing for numeric fields…
mattheaphy Nov 6, 2023
4e46e2a
shiny - hide unused exposure basis
mattheaphy Nov 6, 2023
c6d10f7
documentation cleanup
mattheaphy Nov 6, 2023
a2d61d8
New autotable arguments for auto-scaling amount columns and specifyin…
mattheaphy Nov 12, 2023
aa5bea9
bug fix for standard deviation of claims in weighted exp_stats call
mattheaphy Nov 12, 2023
1ba4f31
moved expo_step out of expose for use in other functions
mattheaphy Nov 16, 2023
2b33c93
generalized expose_split() to work with all calendar exposure types
mattheaphy Nov 16, 2023
40b1458
- Added cast and ptype methods for split_exposed_df.
mattheaphy Nov 18, 2023
c09f02f
typing tests for split_exposed_df objects
mattheaphy Nov 18, 2023
d8d5b41
news update
mattheaphy Nov 18, 2023
d748d36
Summary method for exposure data frames that calls exp_stats
mattheaphy Nov 18, 2023
b59e4fe
Documentation fix
mattheaphy Nov 18, 2023
d27836e
Argument rename
mattheaphy Nov 19, 2023
61afe39
Updated exposures vignette for expose_split
mattheaphy Nov 19, 2023
6a08307
news update
mattheaphy Nov 19, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: actxps
Title: Create Actuarial Experience Studies: Prepare Data, Summarize Results, and Create Reports
Version: 1.3.0
Version: 1.3.0.9000
Authors@R:
person("Matt", "Heaphy", email = "[email protected]", role = c("aut", "cre"))
Maintainer: Matt Heaphy <[email protected]>
Expand Down
22 changes: 22 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -27,23 +27,42 @@ S3method(select,exposed_df)
S3method(semi_join,exposed_df)
S3method(slice,exposed_df)
S3method(summary,exp_df)
S3method(summary,exposed_df)
S3method(summary,trx_df)
S3method(tidy,step_expose)
S3method(ungroup,exposed_df)
S3method(vec_cast,data.frame.exposed_df)
S3method(vec_cast,data.frame.split_exposed_df)
S3method(vec_cast,exposed_df.data.frame)
S3method(vec_cast,exposed_df.exposed_df)
S3method(vec_cast,exposed_df.grouped_df)
S3method(vec_cast,exposed_df.split_exposed_df)
S3method(vec_cast,exposed_df.tbl_df)
S3method(vec_cast,grouped_df.exposed_df)
S3method(vec_cast,grouped_df.split_exposed_df)
S3method(vec_cast,split_exposed_df.data.frame)
S3method(vec_cast,split_exposed_df.exposed_df)
S3method(vec_cast,split_exposed_df.grouped_df)
S3method(vec_cast,split_exposed_df.split_exposed_df)
S3method(vec_cast,split_exposed_df.tbl_df)
S3method(vec_cast,tbl_df.exposed_df)
S3method(vec_cast,tbl_df.split_exposed_df)
S3method(vec_ptype2,data.frame.exposed_df)
S3method(vec_ptype2,data.frame.split_exposed_df)
S3method(vec_ptype2,exposed_df.data.frame)
S3method(vec_ptype2,exposed_df.exposed_df)
S3method(vec_ptype2,exposed_df.grouped_df)
S3method(vec_ptype2,exposed_df.split_exposed_df)
S3method(vec_ptype2,exposed_df.tbl_df)
S3method(vec_ptype2,grouped_df.exposed_df)
S3method(vec_ptype2,grouped_df.split_exposed_df)
S3method(vec_ptype2,split_exposed_df.data.frame)
S3method(vec_ptype2,split_exposed_df.exposed_df)
S3method(vec_ptype2,split_exposed_df.grouped_df)
S3method(vec_ptype2,split_exposed_df.split_exposed_df)
S3method(vec_ptype2,split_exposed_df.tbl_df)
S3method(vec_ptype2,tbl_df.exposed_df)
S3method(vec_ptype2,tbl_df.split_exposed_df)
export(add_predictions)
export(add_transactions)
export(anti_join)
Expand All @@ -63,12 +82,14 @@ export(expose_pm)
export(expose_pq)
export(expose_pw)
export(expose_py)
export(expose_split)
export(filter)
export(full_join)
export(group_by)
export(groups)
export(inner_join)
export(is_exposed_df)
export(is_split_exposed_df)
export(left_join)
export(mutate)
export(plot_actual_to_expected)
Expand Down Expand Up @@ -110,6 +131,7 @@ importFrom(dplyr,ungroup)
importFrom(generics,tidy)
importFrom(ggplot2,autoplot)
importFrom(lubridate,"%m+%")
importFrom(lubridate,"%m-%")
importFrom(recipes,bake)
importFrom(recipes,prep)
importFrom(rlang,":=")
Expand Down
23 changes: 22 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,27 @@
# actxps (development version)

- actxps now supports split exposures that divide calendar periods crossing
policy anniversaries into pre-anniversary and post-anniversary records. The
function `expose_split()` can convert any `exposed_df` object with calendar
period exposures (yearly, quarterly, monthly, or weekly) into a
`split_exposed_df` object. Split exposure data frames contain columns for
exposures both on a calendar period and policy year basis.
- `exp_stats()` and `exp_shiny()` now require clarification as to which exposure
basis should be used when passed a `split_exposed_df` object.
- All `expose_df` objects now contains a `default_status` attribute.
- `autotable()` functions now contain the arguments `decimals_amt` and
`suffix_amt`. The former allows one to specify the number of decimals appearing
after amount columns. The latter is used to automatically scale large numbers
into by thousands, millions, billions, or trillions.
- Corrected an error in the calculation of the standard deviations of claims
when `exp_stats()` is passed a weighting variable.
- Added a `summary()` method for `exposed_df` objects that calls `exp_stats()`.
- The assumed default status in `expose()` functions was changed from the first
observed status to the most common status.

# actxps 1.3.0

- A new `conf_int` argument was added to `exp_stats()` and that creates confidence
- A new `conf_int` argument was added to `exp_stats()` that creates confidence
intervals around observed termination rates, credibility-weighted termination
rates, and any actual-to-expected ratios.
- Similarly, `conf_int` was added to `trx_stats()` to create confidence intervals around utilization rates and any "percentage of" output columns. A `conf_level`
Expand Down
38 changes: 34 additions & 4 deletions R/exp_shiny.R
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,10 @@
#' @param theme The name of a theme passed to the `preset` argument of
#' `bslib::bs_theme()`. Alternatively, a complete Bootstrap theme created using
#' `bslib::bs_theme()`.
#' @param col_exposure Name of the column in `dat` containing exposures. This
#' input is only used to clarify the exposure basis when `dat` is a
#' `split_exposed_df` object. For more information on split exposures, see
#' [expose_split()].
#'
#' @inheritParams exp_stats
#'
Expand Down Expand Up @@ -151,13 +155,28 @@ exp_shiny <- function(dat,
credibility = TRUE,
conf_level = 0.95,
cred_r = 0.05,
theme = "shiny") {
theme = "shiny",
col_exposure = "exposure") {

rlang::check_installed("shiny")
rlang::check_installed("bslib")
rlang::check_installed("thematic")

verify_exposed_df(dat)

# special logic required for split exposed data frames
if (is_split_exposed_df(dat)) {
check_split_expose_basis(dat, col_exposure)
dat <- rename(dat,
exposure = {{col_exposure}})

if (col_exposure == "exposure_cal") {
dat$exposure_pol <- NULL
} else {
dat$exposure_cal <- NULL
}
}

# check for presence of transactions
all_trx_types <- verify_get_trx_types(dat, required = FALSE)
has_trx <- !is.null(all_trx_types)
Expand Down Expand Up @@ -743,9 +762,20 @@ exp_shiny <- function(dat,
shiny::need(input$play, "Paused")
)

keep <- purrr::imap_lgl(preds$predictors,
~ length(setdiff(preds$scope[[.y]],
input[[paste0("i_", .x)]])) > 0)
keep <- preds |> select(predictors, scope, is_number) |>
purrr::pmap_lgl(\(predictors, scope, is_number) {

selected <- input[[paste0("i_", predictors)]]

if (is_number) {
!dplyr::near(scope[[1]], selected[[1]]) ||
!dplyr::near(scope[[2]], selected[[2]])
} else {
length(setdiff(scope, selected)) > 0
}

})

preds$predictors[keep]
})

Expand Down
6 changes: 4 additions & 2 deletions R/exp_stats.R
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@
#' (`.weight_qs`), and the number of records (`.weight_n`).
#'
#' @examples
#' toy_census |> expose("2020-12-31", target_status = "Surrender") |>
#' toy_census |> expose("2022-12-31", target_status = "Surrender") |>
#' exp_stats()
#'
#' exp_res <- census_dat |>
Expand Down Expand Up @@ -131,6 +131,8 @@ exp_stats <- function(.data, target_status = attr(.data, "target_status"),
rlang::abort(c(x = glue::glue("Only 1 column can be passed to `wt`. You supplied {length(wt)} values.")))
}

check_split_expose_basis(.data, col_exposure)

res <- .data |>
rename(exposure = {{col_exposure}},
status = {{col_status}}) |>
Expand Down Expand Up @@ -280,7 +282,7 @@ finish_exp_stats <- function(.data, target_status, expected,
ci <- rlang::exprs(
# For binomial N
# Var(S) = n * p * (Var(X) + E(X)^2 * (1 - p))
sd_agg = (claims * (
sd_agg = (n_claims * (
(ex2_wt - ex_wt ^ 2) + ex_wt ^ 2 * (1 - q_obs))) ^ 0.5,
q_obs_lower = stats::qnorm(p[[1]], claims, sd_agg) / exposure,
q_obs_upper = stats::qnorm(p[[2]], claims, sd_agg) / exposure
Expand Down
89 changes: 55 additions & 34 deletions R/expose.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,7 @@
#' always applied regardless of status.
#'
#' `default_status` is used to indicate the default active status that
#' should be used when exposure records are created. If left blank, then the
#' first status level will be assumed to be the default active status.
#' should be used when exposure records are created.
#'
#' # Policy period and calendar period variations
#'
Expand All @@ -46,14 +45,18 @@
#' @param .data A data frame with census-level records
#' @param end_date Experience study end date
#' @param start_date Experience study start date. Default value = 1900-01-01.
#' @param target_status Character vector of target status values. Default value = `NULL`.
#' @param cal_expo Set to TRUE for calendar year exposures. Otherwise policy year exposures are assumed.
#' @param target_status Character vector of target status values. Default value
#' = `NULL`.
#' @param cal_expo Set to TRUE for calendar year exposures. Otherwise policy
#' year exposures are assumed.
#' @param expo_length Exposure period length
#' @param col_pol_num Name of the column in `.data` containing the policy number
#' @param col_status Name of the column in `.data` containing the policy status
#' @param col_issue_date Name of the column in `.data` containing the issue date
#' @param col_term_date Name of the column in `.data` containing the termination date
#' @param default_status Optional scalar character representing the default active status code
#' @param col_term_date Name of the column in `.data` containing the termination
#' date
#' @param default_status Optional scalar character representing the default
#' active status code. If not provided, the most common status is assumed.
#' @param ... Arguments passed to `expose()`
#'
#' @return A tibble with class `exposed_df`, `tbl_df`, `tbl`,
Expand All @@ -72,10 +75,13 @@
#'
#' census_dat |> expose_py("2019-12-31", target_status = "Surrender")
#'
#' @seealso [expose_split()] for information on splitting calendar year
#' exposures by policy year.
#'
#' @references Atkinson and McGarry (2016). Experience Study Calculations.
#' <https://www.soa.org/49378a/globalassets/assets/files/research/experience-study-calculations.pdf>
#'
#' @importFrom lubridate %m+%
#' @importFrom lubridate %m+% %m-%
#'
#' @export
expose <- function(.data,
Expand All @@ -102,32 +108,22 @@ expose <- function(.data,

# set up exposure period lengths
expo_length <- rlang::arg_match(expo_length)
expo_step <- switch(expo_length,
"year" = lubridate::years(1),
"quarter" = months(3),
"month" = months(1),
"week" = lubridate::days(7))

cal_frac <- switch(expo_length,
"year" = year_frac,
"quarter" = quarter_frac,
"month" = month_frac,
'week' = week_frac)
expo_step <- expo_step(expo_length)
cal_frac <- cal_frac(expo_length)

# column renames and name conflicts
.data <- .data |>
rename(pol_num = {{col_pol_num}},
status = {{col_status}},
issue_date = {{col_issue_date}},
term_date = {{col_term_date}}) |>
status = {{col_status}},
issue_date = {{col_issue_date}},
term_date = {{col_term_date}}) |>
.expo_name_conflict(cal_expo, expo_length)

# set up statuses
if (!is.factor(.data$status)) .data$status <- factor(.data$status)

if (missing(default_status)) {
default_status <- factor(levels(.data$status)[[1]],
levels = levels(.data$status))
default_status <- most_common(.data$status)
} else {
status_levels <- union(levels(.data$status), default_status)
default_status <- factor(default_status,
Expand All @@ -138,7 +134,7 @@ expose <- function(.data,
# pre-exposure updates
res <- .data |>
filter(issue_date < end_date,
is.na(term_date) | term_date > start_date) |>
is.na(term_date) | term_date > start_date) |>
mutate(
term_date = dplyr::if_else(term_date > end_date,
lubridate::NA_Date_, term_date),
Expand Down Expand Up @@ -176,17 +172,18 @@ expose <- function(.data,
if (cal_expo) {
res <- res |>
mutate(first_per = .time == 1,
cal_e = cal_b %m+% (expo_step * .time) - 1,
cal_b = cal_b %m+% (expo_step * (.time - 1)),
exposure = dplyr::case_when(
status %in% target_status ~ 1,
first_per & last_per ~ cal_frac(last_date) - cal_frac(first_date, 1),
first_per ~ 1 - cal_frac(first_date, 1),
last_per ~ cal_frac(last_date),
TRUE ~ 1)
cal_e = cal_b %m+% (expo_step * .time) - 1,
cal_b = cal_b %m+% (expo_step * (.time - 1)),
exposure = dplyr::case_when(
status %in% target_status ~ 1,
first_per & last_per ~ cal_frac(last_date) -
cal_frac(first_date, 1),
first_per ~ 1 - cal_frac(first_date, 1),
last_per ~ cal_frac(last_date),
TRUE ~ 1)
) |>
select(-rep_n, -first_date, -last_date, -first_per, -last_per,
-.time, -tot_per) |>
-.time, -tot_per) |>
relocate(cal_e, .after = cal_b) |>
dplyr::rename_with(.fn = rename_col, .cols = cal_b, prefix = "cal") |>
dplyr::rename_with(.fn = rename_col, .cols = cal_e, prefix = "cal",
Expand Down Expand Up @@ -214,7 +211,7 @@ expose <- function(.data,
# set up S3 object
new_exposed_df(res, end_date, start_date,
target_status, cal_expo, expo_length,
trx_types = NULL)
trx_types = NULL, default_status)

}

Expand Down Expand Up @@ -267,6 +264,7 @@ expose_cw <- function(...) {
expose(cal_expo = TRUE, expo_length = "week", ...)
}

# helper functions for calendar year fractions - do not export
year_frac <- function(x, .offset = 0) {
(lubridate::yday(x) - .offset) / (365 + lubridate::leap_year(x))
}
Expand All @@ -285,6 +283,14 @@ week_frac <- function(x, .offset = 0) {
(lubridate::wday(x) - .offset) / 7
}

cal_frac <- function(x) {
switch(x,
"year" = year_frac,
"quarter" = quarter_frac,
"month" = month_frac,
'week' = week_frac)
}

# helper function to handle name conflicts
.expo_name_conflict <- function(.data, cal_expo, expo_length) {

Expand Down Expand Up @@ -314,3 +320,18 @@ abbr_period <- function(x) {
"month" = "mth",
'week' = "wk")
}

# determine the most common status
most_common <- function(x) {
y <- table(x) |> sort(decreasing = TRUE) |> names()
factor(y[[1]], levels(x))
}

# helper function for determining exposure step lengths
expo_step <- function(x) {
switch(x,
"year" = lubridate::years(1),
"quarter" = months(3),
"month" = months(1),
"week" = lubridate::days(7))
}
Loading
Loading