Merge pull request #47 from mattheaphy/dev

Dev
mattheaphy · Jun 24, 2024 · d518aa9 · d518aa9
2 parents 079d1ef + 4f74f79
commit d518aa9
Show file tree

Hide file tree

Showing 11 changed files with 209 additions and 66 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: actxps
 Title: Create Actuarial Experience Studies: Prepare Data, Summarize Results, and Create Reports
-Version: 1.4.0.9000
+Version: 1.5.0
 Authors@R: 
     person("Matt", "Heaphy", email = "[email protected]", role = c("aut", "cre"))
 Maintainer: Matt Heaphy <[email protected]>

diff --git a/NEWS.md b/NEWS.md
@@ -1,7 +1,24 @@
-# actxps 1.4.0.9000
-
-- To improve the speed of date calculations, lubridate was replaced with the clock package. Lubridate is no longer included in Imports.
-- **Breaking change** - The `pol_interval()` function is no longer exported. As part of the removal of lubridate, this function's `dur_length` argument only accepts, "year", "quarter", "month", or "week".
+# actxps 1.5.0
+
+- `expose_split()` bug fixes: 
+
+  - `expose_split()` was updated to respect the values of `start_date` 
+    and `end_date` originally passed to the `expose()` function.
+  - Future policy anniversary dates falling on February 29th leap days are now
+    consistent with `expose()`
+  - New tests were added to verify that the sum of policy year exposures
+    (`exposure_pol`) after calling `expose_split()` match exposures produced by
+    `expose_py()`.
+
+- The `expose()` family of functions and `add_transactions()` now allow date 
+  columns to be passed as character vectors in YYYY-MM-DD format. Any character 
+  vectors are converted to dates behind-the-scenes, and any missing values will
+  results in an error message. 
+- To improve the speed of date calculations, lubridate was replaced with the 
+  clock package. Lubridate is no longer included in Imports.
+- **Breaking change** - The `pol_interval()` function is no longer exported. 
+  As part of the removal of lubridate, this function's `dur_length` argument 
+  only accepts, "year", "quarter", "month", or "week".
 - Shiny app layout updates
 - Small vignette and documentation clean-ups
 

diff --git a/R/exp_shiny.R b/R/exp_shiny.R
@@ -256,10 +256,6 @@ exp_shiny <- function(dat,
     yVar_trx <- NULL
   }
 
-  is.Date <- function(x) {
-    inherits(x, "Date")
-  }
-
   # function to make input widgets
   widget <- function(x,
                      checkbox_limit = 8) {

diff --git a/R/expose.R b/R/expose.R
@@ -42,6 +42,8 @@
 #' - `m` = months
 #' - `w` = weeks
 #'
+#' All columns containing dates must be in YYYY-MM-DD format.
+#'
 #' @param .data A data frame with census-level records
 #' @param end_date Experience study end date
 #' @param start_date Experience study start date. Default value = 1900-01-01.
@@ -109,13 +111,26 @@ expose <- function(.data,
   cal_floor <- cal_floor(expo_length)
   add_period <- add_period(expo_length)
 
+  n_term_dates <- sum(!is.na(.data[[col_term_date]]))
+
   # column renames and name conflicts
   .data <- .data |>
     rename(pol_num = {{col_pol_num}},
            status = {{col_status}},
            issue_date = {{col_issue_date}},
            term_date = {{col_term_date}}) |>
-    .expo_name_conflict(cal_expo, expo_length)
+    .expo_name_conflict(cal_expo, expo_length) |>
+    # convert to dates if needed
+    mutate(dplyr::across(c(issue_date, term_date), .convert_date))
+
+  .check_missing_dates(.data$issue_date, "issue_date")
+
+  if (n_term_dates != sum(!is.na(.data$term_date))) {
+    rlang::abort(c(
+      "Bad termination date formats were detected.",
+      i = "Make sure all dates are in YYYY-MM-DD format.")
+    )
+  }
 
   # set up statuses
   if (!is.factor(.data$status)) .data$status <- factor(.data$status)
@@ -129,6 +144,12 @@ expose <- function(.data,
     levels(.data$status) <- status_levels
   }
 
+  if (default_status %in% target_status) {
+    rlang::abort(
+      "`default_status` is not allowed to be the same as `target_status`"
+    )
+  }
+
   # pre-exposure updates
   res <- .data |>
     filter(issue_date < end_date,
@@ -369,3 +390,21 @@ most_common <- function(x) {
   y <- table(x) |> sort(decreasing = TRUE) |> names()
   factor(y[[1]], levels(x))
 }
+
+is.Date <- function(x) {
+  inherits(x, "Date")
+}
+
+.convert_date <- function(x) {
+  if (is.Date(x)) return(x)
+  clock::date_parse(x, format = "%Y-%m-%d")
+}
+
+.check_missing_dates <- function(x, name) {
+  if (any(is.na(x))) {
+    rlang::abort(c(
+      glue::glue("Missing values are not allowed in the `{name}` column."),
+      i = "Make sure all dates are in YYYY-MM-DD format.")
+    )
+  }
+}
diff --git a/R/expose_split.R b/R/expose_split.R
@@ -60,100 +60,88 @@ expose_split <- function(.data) {
   target_status <- attr(.data, "target_status")
   default_status <- attr(.data, "default_status")
   date_cols <- attr(.data, "date_cols") |> rlang::parse_exprs()
+  start_date <- attr(.data, "start_date")
+  end_date <- attr(.data, "end_date")
   expo_length <- expo_type[[2]]
 
-  pol_frac <- function(x, start, end, y) {
-    if (missing(y)) {
-      as.integer(x - start + 1) / as.integer(end - start + 1)
-    } else {
-      as.integer(x - y) / as.integer(end - start + 1)
-    }
-
+  pol_frac <- function(x, start, end, y = start - 1) {
+    as.integer(x - y) / as.integer(end - start + 1)
   }
   cal_frac <- cal_frac(expo_length)
 
   # clock::add_years with invalid pre-populated
   add_years <- \(x, n) clock::add_years(x, n, invalid = "previous")
 
   # time fractions
-  # h = yearfrac from boy to anniv
-  # v = yearfrac from boy to term
+  # b = fraction from boy to cal_b
+  #     - usually zero except for new contracts and a truncated start date
+  # h = fraction from boy to anniv
+  # v = fraction from boy to the earlier of termination and cal_e
 
   .data <- .data |>
     # temporary generic date column names
     rename(cal_b = !!date_cols[[1]],
            cal_e = !!date_cols[[2]]) |>
     mutate(
-      anniv = add_years(
-        issue_date,
-        clock::get_year(cal_b) - clock::get_year(issue_date)),
+      pol_yr = clock::get_year(cal_b) - clock::get_year(issue_date),
+      anniv = add_years(issue_date, pol_yr),
       split = between(anniv, cal_b, cal_e),
-      h = cal_frac(anniv, 1),
-      v = cal_frac(term_date)
+      cal_b = pmax(start_date, issue_date, cal_b),
+      cal_e = pmin(end_date, cal_e),
+      b = cal_frac(cal_b, 1),
+      h = dplyr::if_else(split, cal_frac(anniv, 1), 0),
+      v = dplyr::if_else(is.na(term_date), cal_e, term_date) |> cal_frac()
     )
 
   pre_anniv <- .data |>
     filter(split) |>
     mutate(piece = 1L,
-           cal_b = pmax(issue_date, cal_b),
-           cal_e = anniv - 1,
-           exposure = h,
-           exposure_pol = 1 - pol_frac(cal_b - 1L,
-                                       add_years(anniv, -1),
-                                       anniv - 1L)
+           next_anniv = anniv,
+           cal_e = pmin(end_date, next_anniv - 1),
+           exposure = pmin(h, v) - b
     )
 
   post_anniv <- .data |>
     mutate(piece = 2L,
-           cal_b = dplyr::if_else(split, anniv, cal_b),
-           exposure = dplyr::if_else(split, 1 - h, 1),
-           anniv = dplyr::if_else(anniv > cal_e,
-                                  add_years(anniv, -1),
-                                  anniv),
-           exposure_pol = pol_frac(cal_e,
-                                   anniv,
-                                   add_years(anniv, 1) - 1L,
-                                   cal_b - 1L)
+           cal_b = dplyr::if_else(split, pmax(anniv, start_date), cal_b),
+           pol_yr = pol_yr + (cal_b >= anniv),
+           exposure = v - pmax(h, b),
+           next_anniv = add_years(issue_date, pol_yr)
     )
 
   .data <- dplyr::bind_rows(pre_anniv, post_anniv) |>
     filter(cal_b <= cal_e,
            is.na(term_date) | term_date >= cal_b) |>
-    mutate(term_date = dplyr::if_else(between(term_date, cal_b, cal_e),
+    mutate(anniv = add_years(issue_date, pol_yr - 1L),
+           term_date = dplyr::if_else(between(term_date, cal_b, cal_e),
                                       term_date, as.Date(NA)),
-           pol_yr = clock::get_year(anniv) - clock::get_year(issue_date) +
-             piece - 1L,
            status = dplyr::if_else(is.na(term_date),
                                    factor(default_status,
                                           levels = levels(.data$status)),
                                    status),
            claims = status %in% target_status,
            exposure_cal = dplyr::case_when(
-             claims ~ dplyr::if_else(piece == 1 | cal_b == issue_date,
-                                     1, 1 - h),
+             claims ~ dplyr::if_else(piece == 1 | cal_b == issue_date |
+                                       cal_b == start_date,
+                                     1, 1 - (h - b)),
              is.na(term_date) ~ exposure,
-             piece == 1 ~ v,
-             .default = v - h
+             piece == 1 ~ v - b,
+             .default = v - pmax(h, b)
            ),
-           exposure_pol = dplyr::case_when(
-             claims ~ dplyr::case_when(
-               piece == 1 ~ exposure_pol,
-               split ~ 1,
-               TRUE ~ 1 - pol_frac(cal_b - 1L,
-                                   anniv,
-                                   add_years(anniv, 1) - 1L)
-             ),
-             is.na(term_date) ~ exposure_pol,
-             piece == 1 ~ pol_frac(term_date,
-                                   add_years(anniv, -1),
-                                   anniv - 1L) - (1 - exposure_pol),
-             TRUE ~ pol_frac(term_date,
-                             anniv,
-                             add_years(anniv, 1) - 1L)
+           exposure_pol = dplyr::if_else(
+             claims,
+             1 - pol_frac(cal_b - 1L,
+                          anniv,
+                          next_anniv - 1L),
+             pol_frac(pmin(cal_e, term_date, na.rm = TRUE),
+                      anniv,
+                      next_anniv - 1L,
+                      cal_b - 1L)
            )
     ) |>
     arrange(pol_num, cal_b, piece) |>
-    select(-h, -v, -split, -anniv, -claims, -exposure, -piece) |>
+    select(-b, -h, -v, -split, -anniv, -next_anniv,
+           -claims, -exposure, -piece) |>
     relocate(pol_yr, .after = cal_e) |>
     # restore date column names
     rename(!!date_cols[[1]] := cal_b,

diff --git a/R/globals.R b/R/globals.R
@@ -12,4 +12,4 @@ utils::globalVariables(c("issue_date", "term_date", "last_date",
                          "sd_all", "sd_trx", "trx_amt_sq",
                          "n", "name", "ymax", "ymin",
                          "cal_yr", "anniv", "cal_yr_end", "h", "piece", "v",
-                         "scope"))
+                         "scope", "next_anniv", "b"))
diff --git a/R/transactions.R b/R/transactions.R
@@ -12,6 +12,8 @@
 #' Transactions are associated with the `exposed_df` object by matching
 #' transactions dates with exposure dates ranges found in `exposed_df`.
 #'
+#' All columns containing dates must be in YYYY-MM-DD format.
+#'
 #' @param .data A data frame with exposure-level records with the class
 #' `exposed_df`. Use [as_exposed_df()] to convert a data frame to an
 #' `exposed_df` object if necessary.
@@ -64,8 +66,10 @@ add_transactions <- function(.data, trx_data,
     rename(pol_num = {{col_pol_num}},
            trx_date = {{col_trx_date}},
            trx_type = {{col_trx_type}},
-           trx_amt = {{col_trx_amt}})
+           trx_amt = {{col_trx_amt}}) |>
+    mutate(trx_date = .convert_date(trx_date))
 
+  .check_missing_dates(trx_data$trx_date, "trx_date")
 
   # check for conflicting transaction types
   existing_trx_types <- attr(.data, "trx_types")

diff --git a/man/add_transactions.Rd b/man/add_transactions.Rd
diff --git a/man/expose.Rd b/man/expose.Rd