Merge pull request #72 from StatistikStadtZuerich/run2024

run2024
StatistikStadtZuerich · Jul 15, 2024 · a19df55 · a19df55
2 parents 3edf4aa + 4503d46
commit a19df55
Show file tree

Hide file tree

Showing 27 changed files with 892 additions and 957 deletions.
diff --git a/1_Code/0000_General/0001_model_control-flow.r b/1_Code/0000_General/0001_model_control-flow.r
diff --git a/1_Code/0000_General/general_init.R b/1_Code/0000_General/general_init.R
@@ -10,40 +10,44 @@
 #' @export
 #'
 #' @examples util_gf("lower")
+
+
 init <- function(i_scen = "middle", var_file = "/2_Data/3_Parameter/variables.yml"){
+
   # packages
-  require(tidyverse)
+  require(ckanr) # download of open data directly from ckan  
   require(dvmisc) # for expand_grid
-  require(gridExtra) # for ggplot on multiple pages
-  require(nlme) # needed for mgcv, see below
-  require(mgcv) # for gam
-  require(flexclust) # weighted kmeans clustering
-  require(ggrepel) # to label lines (when too many for a normal legend)
-  require(zoo) # moving average (no confusion with filter function in base R)
-  require(ckanr) # download of open data directly from ckan
-  require(scales) # for pretty axis breaks
-  require(rlang) # for functions: symbols, quasiquotation (!!, !!!, :=)
-  require(gtools) # invalid function (to check for NA, NULL, NaN), e.g. in constrained regression
-  # require(this.path) # to extract the current file name
-  require(modelr) # add_predictions
+  require(flexclust) # weighted kmeans clustering  
+  require(ggrepel) # to label lines (when too many for a normal legend)  
+  require(gridExtra) # for ggplot on multiple pages  
+  require(gtools) # invalid function (to check for NA, NULL, NaN), e.g. in constrained regression  
+  require(mgcv) # for gam   
+  require(modelr) # add_predictions  
+  require(nlme) # needed for mgcv  
   require(quarto) #for programmatically rendering quarto
+  require(rlang) # for functions: symbols, quasiquotation (!!, !!!, :=)  
+  require(scales) # for pretty axis breaks  
+  require(this.path) #for this.dir function
+  require(tidyverse) #tidyverse functionality
+  require(zoo) # moving average (no confusion with filter function in base R)  
+
 
   # no scientic notation
   options(scipen = 999)
 
-  # general stuff (without dependency on parameters) ----------------------------------
+  # general objects without dependency on parameters ----------------------------------
   # read general variables for common use
   vars <- yaml::read_yaml(paste0(here::here(), var_file), eval.expr = TRUE, fileEncoding = "UTF-8")
   vars$i_scen <- i_scen
 
   # import parameters
   para <- read_delim(paste0(here::here(), vars$para_file), ";", lazy = FALSE) %>%
-    select(parameter, lower, middle, upper) %>%
+    select(parameter, lower, middle, middle_birth_lower, middle_birth_upper, upper) %>%
     pivot_longer(cols = lower:upper, names_to = "scenario") %>%
     filter(scenario == i_scen) %>%
     select(parameter, value)
 
-  # read parameters (depend on scenario)
+  # read parameters (depending on scenario)
   for (i_para in 1:nrow(para)) {
     assign(para$parameter[i_para], para$value[i_para], envir = .GlobalEnv)
   }
@@ -53,11 +57,11 @@ init <- function(i_scen = "middle", var_file = "/2_Data/3_Parameter/variables.ym
 
 
   # create variables --------------------------------------------------------
+
   # lookup tables
   vars$look_dis <- read_csv2(paste0(here::here(), vars$dis_file), lazy = FALSE) %>%
     select(QuarCd, distr)
 
-
   vars$look_reg <- mutate(vars$look_dis, distnum = if_else(distr == "Kreis 1", 10, as.numeric(QuarCd))) %>%
     select(distnum, distr) %>%
     unique() %>%
@@ -105,6 +109,16 @@ init <- function(i_scen = "middle", var_file = "/2_Data/3_Parameter/variables.ym
       age < vars$age_4[5] ~ vars$age_4t[4],
       TRUE ~ vars$age_4t[5]
     ), levels = vars$age_4t))
+
+  vars$look_a5 <- tibble(age = 0:120) %>%
+    mutate(age_5 = factor(case_when(
+      age < vars$age_5[1] ~ vars$age_5t[1],
+      age < vars$age_5[2] ~ vars$age_5t[2],
+      age < vars$age_5[3] ~ vars$age_5t[3],
+      TRUE ~ vars$age_5t[4]
+    ), levels = vars$age_5t))  
+
+
 
   # unique levels
   vars$text_d <- unique(vars$look_dis$distr)
@@ -118,6 +132,7 @@ init <- function(i_scen = "middle", var_file = "/2_Data/3_Parameter/variables.ym
   vars$uni_t <- factor(vars$pro_category, levels = vars$pro_category)
   vars$uni_i <- factor(vars$text_i, levels = vars$text_i)
   vars$uni_c <- factor(vars$text_c, levels = vars$text_c)
+  vars$uni_cb <- factor(vars$text_cb, levels = vars$text_cb) 
 
   vars$look_own <- tibble(EigentumGrundstkCd = as.integer(labels(vars$uni_w)), owner = levels(vars$uni_w))
 
@@ -165,7 +180,10 @@ init <- function(i_scen = "middle", var_file = "/2_Data/3_Parameter/variables.ym
   # relocation functions
   source(paste0(vars$code_path, "/0000_general/general_relocation-function.r"))
 
-  # general stuff (with dependency on parameters) ----------------------------------
+
+
+
+  # general objects with dependency on parameters ----------------------------------
 
   # path for results
   vars$res_path <- paste0(vars$res_path, i_scen)
@@ -202,7 +220,7 @@ init <- function(i_scen = "middle", var_file = "/2_Data/3_Parameter/variables.ym
   # color for year (base period)
   vars$col_y_base <- colorRampPalette(vars$col_6)(length(vars$uniy_bir_base))
 
-  # colour for time distributions
+  # colors for time distributions
   vars$col_time <- c(
     rep(vars$col_grey, length(vars$uniy_bir_base)),
     colorRampPalette(vars$col_6[1:5])(length(vars$uniy_scen))

diff --git a/1_Code/0000_General/general_utils.R b/1_Code/0000_General/general_utils.R
@@ -18,7 +18,6 @@
 #' spa (living space)  
 #' aca (allocation)  
 #' pro (projects)  
-#' own (ownership)  
 #' hou (housing model)  
 #' deh (demography and housing model)  
 #' out (model outputs)  
@@ -104,11 +103,6 @@ run_scen <- function(scenarios, modules, keep_log = TRUE) {
       source(paste0(code_path, "1100_Projects/1100_projects.r"))
     }
 
-    # ownership
-    if (modules %in% c("all", "alw", "how", "hom", "own")) {
-      source(paste0(code_path, "1200_Ownership/1200_ownership.r"))
-    }
-
     # housing model
     if (modules %in% c("all", "alw", "how", "hom", "hou")) {
       source(paste0(code_path, "1300_Housing-Model/1300_housing-model.r"))
@@ -246,20 +240,20 @@ dir_ex_create <- function(path){
 }
 
 
-#' check if 3 files are present
+#' check if 5 files are present
 #' 
-#' @description checks if the input contains 3 values and stops execution if this is not the case
+#' @description checks if the input contains 5 values and stops execution if this is not the case
 #'
 #' @param files character vector
 #'
 #' @return stops execution if condition is not met
 #' @export
 #'
-#' @examples stop_3(c("a.csv", "b.csv", "c.csv"))
-stop_3 <- function(files){
+#' @examples stop_5(c("a.csv", "b.csv", "c.csv", "d.csv", "e.csv"))
+stop_5 <- function(files){
   stopifnot("missing files;\
             make sure to run the whole model (0001_model_control-flow.r) beforehand" = 
-              length(read_files) == 3)
+              length(read_files) == 5)
 }
 
 

diff --git a/1_Code/0000_General/model_control-flow.r b/1_Code/0000_General/model_control-flow.r
@@ -0,0 +1,19 @@
+source("1_code/0000_general/general_init.R")
+params <- init("middle")
+
+time_start <- Sys.time()
+
+# run_scen(
+#     scenarios = c("lower", "middle", "middle_birth_lower", "middle_birth_upper", "upper"),
+#     modules = c("alw"))
+
+# run_scen(
+#     scenarios = c("lower", "middle", "middle_birth_lower", "middle_birth_upper", "upper"),
+#     modules = c("all"))
+
+Sys.time() - time_start
+
+
+render_book(cache_refresh = TRUE)
+
+
diff --git a/1_Code/0100_Birth/0100_birth-fertility.r b/1_Code/0100_Birth/0100_birth-fertility.r
@@ -146,7 +146,7 @@ fer_fit <- arrange(fer_tail, district, year, origin, age) %>%
 # constrained regression
 # (proportion of linear model and mean, within bandwidth)
 
-fer_pred <- con_reg(
+fer_pred_temp <- con_reg(
   data = fer_fit, x = "year", y = "fer_fit",
   group_cols = c("district", "age", "origin"),
   window = bir_window_thres, base_t0 = bir_base_begin,
@@ -162,7 +162,30 @@ fer_pred <- con_reg(
   left_join(select(fer_dyao, district, year, age, origin, fer_dyao),
     by = c("district", "year", "age", "origin")
   ) %>%
-  mutate(fer_all = if_else(year <= bir_base_end, fer_dyao, pred_roll))
+  mutate(fer_all_temp = if_else(year <= bir_base_end, fer_dyao, pred_roll))
+
+
+# fertility multiplier ----------------------------------------------------
+
+# multiplier by year
+fer_mult_dat <- tibble(year = c(scen_begin, scen_end),
+                       mult = c(bir_mult_begin, bir_mult_end)) |> 
+  mutate(year_new = (year - scen_begin + 1)^bir_mult_exp)
+
+fer_mult <- tibble(year = scen_begin:scen_end) |> 
+  mutate(year_new = (year - scen_begin + 1)^bir_mult_exp) |> 
+  add_predictions(lm(mult ~ year_new, data = fer_mult_dat), var = "mult")
+
+# ggplot(fer_mult) + 
+#   geom_point(aes(x = year, y = mult))
+
+
+
+# fertility rates with multiplier
+fer_pred <- fer_pred_temp |> 
+  left_join(fer_mult, by = "year") |> 
+  mutate(fer_all = fer_all_temp * mult)
+
 
 # plot 0140: the predictions: age distribution by district and year
 
@@ -175,7 +198,7 @@ pred_fit <- filter(fer_pred, year >= scen_begin) %>%
   arrange(district, year, origin, age) %>%
   group_by(district, year, origin) %>%
   mutate(pred_fit = pmax(0, predict(
-    loess(pred_roll ~ age, span = bir_fer_span_pred, degree = 1, na.action = na.aggregate)
+    loess(fer_all ~ age, span = bir_fer_span_pred, degree = 1, na.action = na.aggregate)
   ))) %>%
   ungroup()
 

diff --git a/1_Code/0200_Death/0200_death.r b/1_Code/0200_Death/0200_death.r
@@ -51,7 +51,7 @@ pop <- read_csv(pop_od) %>%
 # FSO data (used in the prediction)
 # rate is converted to percent
 mor_fso <- read_csv(dea_fso_od) %>%
-  rename(year = EreignisDatJahr, age = AlterVCd) %>%
+  rename(year = EreignisDatJahr, age = AlterVCd, KategorieCd = KategorieDatenBFSCd) %>%
   filter(HerkunftCd == 0) %>%
   mutate(
     sex = fact_if(SexCd, uni_s),
@@ -316,6 +316,18 @@ mor_zh_yas_past_future <- select(mor_zh_yas, year, age, sex, mor_yas) %>%
 
 # plot 0210
 
+# ZH and CH: past and future
+mor_zh_ch_yas_past_future <- mor_zh_yas_past_future |> 
+  mutate(region = factor(text_r[1], uni_r)) |> 
+  right_join(mor_yasr, by = c("year", "age", "sex", "region")) |> 
+  mutate(mor_new = if_else((year >= scen_begin) & (region == uni_r[1]), 
+                           mor_yas, mor_yasr)) |> 
+  select(year, age, sex, region, mor_new) |> 
+  rename(mor_yasr = mor_new) |> 
+  arrange(year, age, sex, region)
+
+# plot 0210a
+
 # export mortality rates --------------------------------------------------
 
 # prepare the export data

diff --git a/1_Code/0900_Living-Space/0900_living-space.r b/1_Code/0900_Living-Space/0900_living-space.r
@@ -16,7 +16,7 @@ spa_dat <- read_csv(spa_od) %>%
   rename(year = StichtagDatJahr, area = Wohnflaeche, apartments = AnzWhgStat, people = AnzBestWir) %>%
   left_join(look_dis, by = "QuarCd") %>%
   mutate(
-    owner = fact_if(EigentuemerSSZPubl3Cd_noDM , uni_w),
+    owner = fact_if(EigentuemerSSZPubl3Cd_noDM, uni_w),
     district = factor(distr, uni_d)
   ) %>%
   group_by(year, district, owner) %>% 

diff --git a/1_Code/1100_Projects/1100_projects.r b/1_Code/1100_Projects/1100_projects.r
@@ -100,7 +100,7 @@ pro_delay <- as_tibble(expand_grid(
   owner = uni_w,
   status = pro_category,
   indicator = uni_i,
-  delay = as.double(0:5)
+  delay = as.double(0:pro_max_delay)
 )) %>%
   left_join(select(pro_not, c(district, year, owner, status, indicator, realized)),
     by = c("district", "year", "owner", "status", "indicator")