Skip to content

Commit

Permalink
Merge pull request #42 from mattheaphy/dev
Browse files Browse the repository at this point in the history
exp_df and trx_df coercion
  • Loading branch information
mattheaphy authored Nov 25, 2023
2 parents 351e5fb + 5093881 commit 4854843
Show file tree
Hide file tree
Showing 28 changed files with 961 additions and 49 deletions.
4 changes: 4 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,9 @@ export(add_predictions)
export(add_transactions)
export(anti_join)
export(arrange)
export(as_exp_df)
export(as_exposed_df)
export(as_trx_df)
export(autoplot)
export(autotable)
export(bake)
Expand All @@ -88,8 +90,10 @@ export(full_join)
export(group_by)
export(groups)
export(inner_join)
export(is_exp_df)
export(is_exposed_df)
export(is_split_exposed_df)
export(is_trx_df)
export(left_join)
export(mutate)
export(plot_actual_to_expected)
Expand Down
7 changes: 7 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,13 @@ when `exp_stats()` is passed a weighting variable.
- Added a `summary()` method for `exposed_df` objects that calls `exp_stats()`.
- The assumed default status in `expose()` functions was changed from the first
observed status to the most common status.
- The functions `as_exp_df()` and `as_trx_df()` were added to convert
pre-aggregated experience studies to the `exp_df` and `trx_df` formats,
respectively.
- `agg_sim_dat` - a new simulated data set of pre-aggregated experience was
added for testing `as_exp_df()` and `as_trx_df()`.
- `is_exp_df()` and `as_trx_df()` were added to test for the `exp_df` and
`trx_df` classes.

# actxps 1.3.0

Expand Down
40 changes: 40 additions & 0 deletions R/agg_sim_dat.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#' Aggregate simulated annuity data
#'
#' A pre-aggregated version of surrender and withdrawal experience from the
#' simulated data sets `census_dat`, `withdrawals`, and `account_vals`. This
#' data is theoretical only and does not represent the experience on any
#' specific product.
#'
#' @format A data frame containing summarized experience study results grouped
#' by policy year, income guarantee presence, tax-qualified status, and product.
#'
#' @details
#'
#' \describe{
#' \item{pol_yr}{Policy year}
#' \item{inc_guar}{Indicates whether the policy was issued with an income
#' guarantee}
#' \item{qual}{Indicates whether the policy was purchased with tax-qualified
#' funds}
#' \item{product}{Product: a, b, or c}
#' \item{exposure_n}{Sum of policy year exposures by count}
#' \item{claims_n}{Sum of claim counts}
#' \item{av}{Sum of account value}
#' \item{exposure_amt}{Sum of policy year exposures weighted by account value}
#' \item{claims_amt}{Sum of claims weighted by account value}
#' \item{av_sq}{Sum of squared account values}
#' \item{n}{Number of exposure records}
#' \item{wd}{Sum of partial withdrawal transactions}
#' \item{wd_n}{Count of partial withdrawal transactions}
#' \item{wd_flag}{Count of exposure records with partial withdrawal
#' transactions}
#' \item{wd_sq}{Sum of squared partial withdrawal transactions}
#' \item{av_w_wd}{Sum of account value for exposure records with partial
#' withdrawal transactions}
#' }
#' @seealso [census_dat]
#' @name agg_sim_dat

NULL
#' @rdname agg_sim_dat
"agg_sim_dat"
160 changes: 160 additions & 0 deletions R/exp_df_helpers.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
#' Termination summary helper functions
#'
#' Convert aggregate termination experience studies to the `exp_df` class.
#'
#' `is_exp_df()` will return `TRUE` if `x` is an `exp_df` object.
#'
#' `as_exp_df()` will coerce a data frame to an `exp_df` object if that
#' data frame has columns for exposures and claims.
#'
#' `as_exp_df()` is most useful for working with aggregate summaries of
#' experience that were not created by actxps where individual policy
#' information is not available. After converting the data to the `exp_df`
#' class, [summary()] can be used to summarize data by any grouping variables,
#' and [autoplot()] and [autotable()] are available for reporting.
#'
#' If nothing is passed to `wt`, the data frame `x` must include columns
#' containing:
#'
#' - Exposures (`exposure`)
#' - Claim counts (`claims`)
#'
#' If `wt` is passed, the data must include columns containing:
#'
#' - Weighted exposures (`exposure`)
#' - Weighted claims (`claims`)
#' - Claim counts (`n_claims`)
#' - The raw sum of weights **NOT** multiplied by exposures
#' - Exposure record counts (`.weight_n`)
#' - The raw sum of squared weights (`.weight_sq`)
#'
#' The names in parentheses above are expected column names. If the data
#' frame passed to `as_exp_df()` uses different column names, these can be
#' specified using the `col_*` arguments.
#'
#' When a column name is passed to `wt`, the columns `.weight`, `.weight_n`,
#' and `.weight_sq` are used to calculate credibility and confidence intervals.
#' If credibility and confidence intervals aren't required, then it is not
#' necessary to pass anything to `wt`. The results of `as_exp_df()` and any
#' downstream summaries will still be weighted as long as the exposures and
#' claims are pre-weighted.
#'
#' `target_status`, `start_date`, and `end_date` are optional arguments that are
#' only used for printing the resulting `exp_df` object.
#'
#' @param x An object. For `as_exp_df()`, `x` must be a data frame.
#' @param expected A character vector containing column names in x with
#' expected values
#' @param wt Optional. Length 1 character vector. Name of the column in `x`
#' containing weights to use in the calculation of claims, exposures, partial
#' credibility, and confidence intervals.
#' @param col_claims Optional. Name of the column in `x` containing claims. The
#' assumed default is "claims".
#' @param col_exposure Optional. Name of the column in `x` containing exposures.
#' The assumed default is "exposure".
#' @param col_n_claims Optional and only used used when `wt` is passed. Name of
#' the column in `x` containing the number of claims.
#' @param col_weight_sq Optional and only used used when `wt` is passed. Name of
#' the column in `x` containing the sum of squared weights.
#' @param col_weight_n Optional and only used used when `wt` is passed. Name of
#' the column in `x` containing exposure record counts.
#' @param credibility If `TRUE`, future calls to [summary()] will include
#' partial credibility weights and credibility-weighted termination rates.
#' @param conf_level Confidence level used for the Limited Fluctuation
#' credibility method and confidence intervals
#' @param cred_r Error tolerance under the Limited Fluctuation credibility
#' method
#' @param conf_int If `TRUE`, future calls to [summary()] will include
#' confidence intervals around the observed termination rates and any
#' actual-to-expected ratios.
#' @inheritParams expose
#'
#' @return For `is_exp_df()`, a length-1 logical vector. For `as_exp_df()`,
#' an `exp_df` object.
#'
#' @seealso [exp_stats()] for information on how `exp_df` objects are typically
#' created from individual exposure records.
#'
#' @examples
#' # convert pre-aggregated experience into an exp_df object
#' dat <- as_exp_df(agg_sim_dat, col_exposure = "exposure_n",
#' col_claims = "claims_n",
#' target_status = "Surrender",
#' start_date = 2005, end_date = 2019,
#' conf_int = TRUE)
#' dat
#' is_exp_df(dat)
#'
#' # summary by policy year
#' summary(dat, pol_yr)
#'
#' # repeat the prior exercise on a weighted basis
#' dat_wt <- as_exp_df(agg_sim_dat, wt = "av",
#' col_exposure = "exposure_amt",
#' col_claims = "claims_amt",
#' col_n_claims = "claims_n",
#' col_weight_sq = "av_sq",
#' col_weight_n = "n",
#' target_status = "Surrender",
#' start_date = 2005, end_date = 2019,
#' conf_int = TRUE)
#' dat_wt
#'
#' # summary by policy year
#' summary(dat_wt, pol_yr)
#'
#'
#' @export
as_exp_df <- function(x, expected = NULL, wt = NULL,
col_claims, col_exposure,
col_n_claims, col_weight_sq, col_weight_n,
target_status = NULL,
start_date = as.Date("1900-01-01"), end_date = NULL,
credibility = FALSE,
conf_level = 0.95, cred_r = 0.05, conf_int = FALSE) {

if (is_exp_df(x)) return(x)

if (!is.data.frame(x)) {
rlang::abort("`x` must be a data frame.")
}

# column name alignment
if (!missing(col_exposure)) x <- x |> rename(exposure = {{col_exposure}})
if (!missing(col_claims)) x <- x |> rename(claims = {{col_claims}})

if (is.null(wt)) {
req_names <- c("exposure", "claims")
} else {
req_names <- c("exposure", "claims", "n_claims", ".weight",
".weight_sq", ".weight_n")
if (!missing(col_n_claims)) x <- x |> rename(n_claims = {{col_n_claims}})
x <- x |> rename(.weight = {{wt}})
if (!missing(col_weight_sq)) x <- x |>
rename(.weight_sq = {{col_weight_sq}})
if (!missing(col_weight_n)) x <- x |> rename(.weight_n = {{col_weight_n}})
}

# check required columns
verify_col_names(names(x), req_names)

if (is.null(wt)) x$n_claims <- x$claims

new_exp_df(x,
.groups = list(),
target_status = target_status,
start_date = start_date,
expected = expected,
end_date = end_date,
wt = wt,
credibility = credibility,
conf_level = conf_level, cred_r = cred_r,
conf_int = conf_int)

}

#' @export
#' @rdname as_exp_df
is_exp_df <- function(x) {
inherits(x, "exp_df")
}
21 changes: 19 additions & 2 deletions R/exp_stats.R
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,23 @@ finish_exp_stats <- function(.data, target_status, expected,
.after = dplyr::last_col())
}

tibble::new_tibble(res,
new_exp_df(res,
.groups = .groups,
target_status = target_status,
start_date = start_date,
expected = expected,
end_date = end_date,
wt = wt,
credibility = credibility,
conf_level = conf_level, cred_r = cred_r,
conf_int = conf_int)
}

# low level class constructor
new_exp_df <- function(x, .groups, target_status, start_date, expected,
end_date, wt, credibility, conf_level,
cred_r = cred_r, conf_int) {
tibble::new_tibble(x,
class = "exp_df",
groups = .groups,
target_status = target_status,
Expand All @@ -341,7 +357,8 @@ finish_exp_stats <- function(.data, target_status, expected,
end_date = end_date,
wt = wt,
xp_params = list(credibility = credibility,
conf_level = conf_level, cred_r = cred_r,
conf_level = conf_level,
cred_r = cred_r,
conf_int = conf_int))
}

Expand Down
3 changes: 2 additions & 1 deletion R/expose_split.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@
#' @examples
#' toy_census |> expose_cy("2022-12-31") |> expose_split()
#'
#' @seealso [expose()]
#' @seealso [expose()] for information on creating exposure records from census
#' data.
#'
#' @export
expose_split <- function(.data) {
Expand Down
21 changes: 14 additions & 7 deletions R/exposed_df_helpers.R
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@
#' `as_exposed_df()`, an `exposed_df` object.
#'
#' @importFrom vctrs vec_ptype2 vec_cast
#' @seealso [expose()] for information on how `exposed_df` objects are typically
#' created from census data.
#'
#' @export
is_exposed_df <- function(x) {
Expand Down Expand Up @@ -119,16 +121,11 @@ as_exposed_df <- function(x, end_date, start_date = as.Date("1900-01-01"),

# check required columns
# pol_num, status, exposure, 2 date cols, policy period (policy expo only)
unmatched <- c("pol_num", "status", "exposure",
req_names <- c("pol_num", "status", "exposure",
exp_col_pol_per,
exp_cols_dates,
exp_cols_trx)
unmatched <- setdiff(unmatched, names(x))

if (length(unmatched) > 0) {
rlang::abort(c(x = glue::glue("The following columns are missing from `x`: {paste(unmatched, collapse = ', ')}."),
i = "Hint: create these columns or use the `col_*` arguments to specify existing columns that should be mapped to these elements."))
}
verify_col_names(names(x), req_names)

if (missing(default_status)) {
default_status <- most_common(x$status)
Expand Down Expand Up @@ -612,3 +609,13 @@ verify_get_trx_types <- function(.data, required = TRUE) {
}
trx_types
}

# function to verify that required names exist and to send an error if not
verify_col_names <- function(x_names, required) {
unmatched <- setdiff(required, x_names)

if (length(unmatched) > 0) {
rlang::abort(c(x = glue::glue("The following columns are missing: {paste(unmatched, collapse = ', ')}."),
i = "Hint: create these columns or use the `col_*` arguments to specify existing columns that should be mapped to these elements."))
}
}
2 changes: 1 addition & 1 deletion R/sim_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
#' \item{av_anniv}{Account value on the policy anniversary date}
#' }
#'

#' @seealso [census_dat]
#' @name sim_data

NULL
Expand Down
Loading

0 comments on commit 4854843

Please sign in to comment.