-
Notifications
You must be signed in to change notification settings - Fork 190
/
Titanic_full.R
60 lines (53 loc) · 1.48 KB
/
Titanic_full.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# loading the needed libraries
library(datasets)
library(tibble)
library(dplyr)
library(purrr)
# looking at the table
dplyr::glimpse(x = Titanic)
# converting to tibble
tibble::as_tibble(x = Titanic)
# a custom function to repeat dataframe `rep` number of times, which is going to
# be count data for us
rep_df <- function(df, rep) {
df[rep(1:nrow(df), rep), ]
}
# converting dataframe to full length based on count information
Titanic_full <-
tibble::as_tibble(x = datasets::Titanic) %>%
tibble::rowid_to_column(., var = "id") %>%
dplyr::mutate_at(
.vars = dplyr::vars("id"),
.funs = ~ as.factor(.)
) %>%
split(x = ., f = .$id) %>%
purrr::map_dfr(.x = ., .f = ~ rep_df(df = ., rep = .$n)) %>%
dplyr::mutate_at(
.vars = dplyr::vars("id"),
.funs = ~ as.numeric(as.character(.))
) %>%
dplyr::mutate_if(
.predicate = is.character,
.funs = ~ as.factor(.)
) %>%
dplyr::mutate_if(
.predicate = is.factor,
.funs = ~ droplevels(.)
) %>%
dplyr::select(-n, -id) %>%
tibble::rownames_to_column(var = "id") %>%
dplyr::mutate_at(
.vars = "id",
.funs = ~ as.numeric(as.character(.))
)
# reordering the Class variables
Titanic_full$Class <-
factor(
x = Titanic_full$Class,
levels = c("1st", "2nd", "3rd", "Crew", ordered = TRUE)
)
# looking at the final dataset
dplyr::glimpse(Titanic_full)
# saving the files
readr::write_csv(x = Titanic_full, path = "data-raw/Titanic_full.csv")
save(Titanic_full, file = "data/Titanic_full.rdata")