-
Notifications
You must be signed in to change notification settings - Fork 0
/
load_data.R
25 lines (17 loc) · 867 Bytes
/
load_data.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
library(readxl)
# load the dataset
heart_dataset <- read_excel("C:/Users/Alexander/Spring_22/predictive_analytics/final_project/heart_failure_clinical_records_dataset.xlsx")
# randomly sample indices
train_percentage = .8
samplesize = train_percentage * nrow(heart_dataset)
set.seed(80)
index = sample(seq_len(nrow(heart_dataset)), size=samplesize)
# create training and test split
train = heart_dataset[index,]
test = heart_dataset[-index,]
# create a normalized dataset for neural networks
heart_dataset_normalized = as.data.frame(lapply(heart_dataset, function(x)(x-min(x)) / (max(x) - min(x))))
# now create normalized test and train
train_normalized = heart_dataset_normalized[index,]
test_normalized = heart_dataset_normalized[-index,]
# TODO: in the future we could make the train and test sets have equal numbers of each class