-
Notifications
You must be signed in to change notification settings - Fork 1
/
.Rhistory
512 lines (512 loc) · 22.2 KB
/
.Rhistory
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
all_variables_combined %>% select(contains("Gas")) %>% gather() %>%
ggplot(aes(value)) +
facet_wrap(~ key, scales = "free") +
geom_lines()
all_variables_combined %>% select(contains("Gas")) %>% gather() %>%
ggplot(aes(value)) +
facet_wrap(~ key, scales = "free") +
geom_line()
all_variables_combined %>% select(contains("Gas"),date) %>% gather() %>%
ggplot(aes(x=date,y=value)) +
facet_wrap(~ key, scales = "free") +
geom_line()
all_variables_combined %>% ggplot(aes(x=date,y=HGasSLPsyn_rel)) %>% geom_line()
all_variables_combined %>% ggplot(aes(x=date,y=HGasSLPsyn_rel)) + geom_line()
all_variables_combined %>% ggplot(aes(x=date,y=LGasSLPsyn_rel)) + geom_line()
all_variables_combined %>% ggplot(aes(x=date,y=HGasSLPana_rel)) + geom_line()
all_variables_combined %>% ggplot(aes(x=date,y=LGasSLPana_rel)) + geom_line()
all_variables_combined %>% ggplot(aes(x=date,y=HGasRLMmT_rel)) + geom_line()
all_variables_combined %>% ggplot(aes(x=date,y=LGasRLMmT_rel)) + geom_line()
all_variables_combined %>% ggplot(aes(x=date,y=HGasRLMoT_rel)) + geom_line()
all_variables_combined %>% ggplot(aes(x=date,y=LGasRLMoT_rel)) + geom_line()
all_variables_combined %>% filter(date > "2022-01-01") %>% ggplot(aes(x=date,y=HGasSLPsyn_rel)) + geom_line()
all_variables_combined %>% filter(date > "2021-01-01") %>% ggplot(aes(x=date,y=HGasSLPsyn_rel)) + geom_line()
all_variables_combined %>% filter(date > "2021-01-01") %>% ggplot(aes(x=date,y=LGasSLPsyn_rel)) + geom_line()
all_variables_combined %>% filter(date > "2021-01-01") %>% ggplot(aes(x=date,y=HGasSLPana_rel)) + geom_line()
all_variables_combined %>% filter(date > "2021-01-01") %>% ggplot(aes(x=date,y=LGasSLPana_rel)) + geom_line()
all_variables_combined %>% filter(date > "2021-01-01") %>% ggplot(aes(x=date,y=HGasRLMmT_rel)) + geom_line()
all_variables_combined %>% filter(date > "2021-01-01") %>% ggplot(aes(x=date,y=HGasRLMoT_rel)) + geom_line()
all_variables_combined %>% filter(date > "2021-01-01") %>% ggplot(aes(x=date,y=LGasRLMoT_rel)) + geom_line()
all_variables_combined %>% filter(date > "2021-01-01") %>% ggplot(aes(x=date,y=HGasSLPsyn_rel)) + geom_line()
all_variables_combined %>% filter(date > "2021-01-01") %>% ggplot(aes(x=date,y=LGasSLPsyn_rel)) + geom_line()
all_variables_combined %>% filter(date > "2021-01-01") %>% ggplot(aes(x=date,y=HGasSLPana_rel)) + geom_line()
all_variables_combined %>% filter(date > "2021-01-01") %>% ggplot(aes(x=date,y=LGasSLPana_rel)) + geom_line()
all_variables_combined %>% filter(date > "2021-01-01") %>% ggplot(aes(x=date,y=HGasRLMmT_rel)) + geom_line()
all_variables_combined %>% filter(date > "2021-01-01") %>% ggplot(aes(x=date,y=LGasRLMmT_rel)) + geom_line()
all_variables_combined %>% filter(date > "2021-01-01") %>% ggplot(aes(x=date,y=HGasRLMoT_rel)) + geom_line()
all_variables_combined %>% filter(date > "2021-01-01") %>% ggplot(aes(x=date,y=LGasRLMoT_rel)) + geom_line()
head(all_variables_combined,1)
head(all_variables_combined,1) %>% select(contains("Gas"))
head(all_variables_combined,1) %>% select(contains("Gas")) %>% select(-contains("price"))
all_variables_combined %>% filter(date > "2021-01-01") %>% ggplot(aes(x=date,y=HGasSLPsyn_rel)) + geom_line()
all_variables_combined %>% filter(date > "2021-01-01") %>% ggplot(aes(x=date,y=HGasRLMmT_rel)) + geom_line()
all_variables_combined %>% filter(date > "2021-01-01") %>% ggplot(aes(x=date,y=LGasRLMmT_rel)) + geom_line()
all_variables_combined %>% filter(date > "2021-01-01") %>% ggplot(aes(x=date,y=HGasSLPsyn_rel)) + geom_line()
all_variables_combined %>% filter(date > "2021-01-01") %>% ggplot(aes(x=date,y=HGasRLMmT_rel)) + geom_line()
all_variables_combined %>% select(contains("LGas","rel"))
all_variables_combined %>% select(contains("LGasrel"))
all_variables_combined %>% select(contains("rel") & contains("LGas"))
all_variables_combined %>% select(contains("y") & contains("lag"))
all_variables_combined %<>% transform_lag(7)
all_variables_combined %>% select(contains("y") & contains("lag"))
all_variables_combined %>% select(contains("y") & contains("____lag"))
all_variables_combined %>% select(contains("y_") & contains("____lag"))
all_variables_combined_midterm = select(-(contains("y_") & contains("____lag")))
all_variables_combined_midterm = select(matches("y\_.*\_lag"))
all_variables_combined_midterm = select(matches("y_.*____lag"))
all_variables_combined %>% select(-(contains("y_") & contains("____lag"))
)
all_variables_combined_ar = all_variables_combined %>% select(-(contains("Gas",ignore.case=FALSE) & !contains("____lag")))
library(tidyverse)
library(hablar)
library(lubridate)
library(magrittr)
library(imputeTS)
"%.%" = function(x,y){paste(x,y,sep = "")}
setwd('/Users/finnkruger/Documents/GitHub/gasprices')
library(tidyverse)
library(hablar)
library(lubridate)
library(magrittr)
library(imputeTS)
"%.%" = function(x,y){paste(x,y,sep = "")}
#setwd('/Users/finnkruger/Documents/GitHub/gasprices')
load('data-constr/final_weather_data.RData')
load('data-constr/DAX_data.RData')
load('data-constr/the_combined.Rdata')
load('data-constr/the_combined_rel.Rdata')
load('data-constr/Quarterly_GDP_data.Rdata')
load('data-constr/gas_price_data.Rdata')
load('data-constr/el_price_data.Rdata')
load('data-constr/co2_price_data.Rdata')
###### DATES & CYCLIC BEHAVIOR
startdate = as.Date("2017-11-01")
currentDate = Sys.Date()-1
date_full = seq(startdate, currentDate, by="days") %>% data.frame(date = .)
date_full %<>%
mutate(dd_day = day(date),
dd_month = month(date),
dd_year = year(date),
dd_weekday = wday(date),
dd_week = week(date))
transform_sin = function(data){
data %>% select_if(is.numeric) %>%
rename_all(~ . %.% "_sin") %>% mutate_all(~sin(2*pi*./max(.))) %>% cbind(data)
}
transform_cos = function(data){
data %>% select(-contains("_sin")) %>% select_if(is.numeric) %>%
rename_all(~ . %.% "_cos") %>% mutate_all(~cos(2*pi*./max(.))) %>% cbind(data)
}
date_full %<>% transform_sin()
date_full %<>% transform_cos() %>%
select(-contains("year_"))
####### MASTER JOINTZ, y, na omit
all_variables_combined = date_full %>%
left_join(final_weather_data, by = 'date') %>%
left_join(DAX_data, by = 'date') %>%
left_join(data_the_combined, by = 'date') %>%
left_join(data_the_combined_rel, by = 'date') %>%
left_join(Quarterly_GDP, by = 'date') %>%
left_join(gas_data, by = 'date') %>%
left_join(el_data, by = 'date') %>%
left_join(co2_data, by = 'date') %>%
rename(y = total) %>%
na.omit()
###### FEATURE ENGINEERING: ln,square,elasticity,infinity
transform_ln = function(data){
data %>% select(-contains("__"),-date,-y) %>% select_if(is.numeric) %>%
rename_all(~ . %.% "__ln") %>% mutate_all(~log(.+1-min(.))) %>% cbind(data)
}
transform_sqrd = function(data){
data %>% select(-contains("__"),-date,-y) %>% select_if(is.numeric) %>%
rename_all(~ . %.% "__sqrd") %>% mutate_all(~ .^2) %>% cbind(data)
}
transform_elas = function(data){
data %>% select(-contains("__"),-date,-y) %>% select_if(is.numeric) %>%
rename_all(~ . %.% "__elas") %>% mutate_all(~ 1/.^2) %>% cbind(data)
}
all_variables_combined %<>% transform_ln()
all_variables_combined %<>% transform_sqrd()
all_variables_combined %<>% transform_elas() %>%
mutate(across(.cols = everything(), ~ ifelse(is.infinite(.x), max(s(.x))*1000, .x)))
######### LAG: 7 day w/ na interpolation
transform_lag = function(data,lag=1){
data %>% select(-contains("____"),-date,-contains("dd_")) %>% select_if(is.numeric) %>%
rename_all(~ . %.% "____lag" %.% lag) %>% mutate_all(~ lag(.,order_by = data$date)) %>% cbind(data) %>%
mutate_all(~ na_interpolation(.))
}
all_variables_combined %<>% transform_lag(7)
all_variables_combined_midterm = all_variables_combined %>% select(-(contains("y_") & contains("____lag")))
all_variables_combined_ar = all_variables_combined %>% select(-((contains("y_") | contains("Gas",ignore.case=FALSE)) & !contains("____lag")))
all_variables_combined = all_variables_combined_midterm
save(all_variables_combined,file="data-constr/masters_jointz.RData")
all_variables_combined %>% write_csv("data-constr/masters_jointz.csv")
all_variables_combined = all_variables_combined_ar
save(all_variables_combined,file="data-constr/masters_jointz_ar.RData")
all_variables_combined %>% write_csv("data-constr/masters_jointz_ar.csv")
arima = all_variables_combined %>%
select(y,date) %>%
transform_lag(1) %>%
transform_lag(2) %>%
transform_lag(3) %>%
transform_lag(4) %>%
transform_lag(5) %>%
transform_lag(6) %>%
transform_lag(7) #%>%
#select(-date)
save(arima,file="data-constr/masters_arima.RData")
arima %>% write_csv("data-constr/masters_arima.csv")
sarima = all_variables_combined %>%
select(y,date,contains("dd_")) %>%
transform_lag(1) %>%
transform_lag(2) %>%
transform_lag(3) %>%
transform_lag(4) %>%
transform_lag(5) %>%
transform_lag(6) %>%
transform_lag(7) #%>%
#select(-date)
save(sarima,file="data-constr/masters_sarima.RData")
sarima %>% write_csv("data-constr/masters_sarima.csv")
### Actual_Arima
actual_sarima = all_variables_combined %>%
select(y,date)
actual_sarima %>% write_csv("data-constr/actual_sarima.csv")
# ##### IGNORE EVERYTHING BELOW EXCEPT OTHERWISE told
# data_constructed = read_csv("../data-constr/the_base.csv", col_types = cols(Gasday = col_date(format = "%d.%m.%y")))
# data_weather = read_csv("../data-orig/weather_changes/export.csv")
#
# importIntoEnv()
#
# joinable_data <- data_weather %>%
# select(date, tavg, tmin, tmax)%>%
# rename(Gasday = date)
#
#
#
# including_weather <- data_constructed %>%
# left_join(joinable_data)
#
# final_weather_data <- df_hamburg%>%
# left_join(df_berlin, by = 'date')%>%
# left_join(df_munich, by = 'date')%>%
# left_join(df_cologne, by = 'date')%>%
# left_join(df_frankfurt, by = 'date')
#
#
#
#
#
#
#
#
#
# ############## Quartiles
#
# data_gas_price = read_csv("../data-constr/gasprice_imputed.csv")
#
#
# including_weather_price = including_weather %>%
# left_join(data_gas_price)
#
#
# #dont_work <- including_weather %>%
# # mutate(Quater =
# # case_when(
# ## Gasday <= 2022-01-01 ~ "Q1",
# # Gasday >= 2022-01-01 ~ "Q2",
# # ))
#
#
#
# including_weather_price %>%
# write_csv("../data-constr/masters_jointz.csv")
# all_variables_combined %>% filter(date > "2021-01-01") %>% ggplot(aes(x=date,y=HGasSLPsyn_rel)) + geom_line()
# all_variables_combined %>% filter(date > "2021-01-01") %>% ggplot(aes(x=date,y=LGasSLPsyn_rel)) + geom_line()
# all_variables_combined %>% filter(date > "2021-01-01") %>% ggplot(aes(x=date,y=HGasSLPana_rel)) + geom_line()
# all_variables_combined %>% filter(date > "2021-01-01") %>% ggplot(aes(x=date,y=LGasSLPana_rel)) + geom_line()
# all_variables_combined %>% filter(date > "2021-01-01") %>% ggplot(aes(x=date,y=HGasRLMmT_rel)) + geom_line()
# all_variables_combined %>% filter(date > "2021-01-01") %>% ggplot(aes(x=date,y=LGasRLMmT_rel)) + geom_line()
# all_variables_combined %>% filter(date > "2021-01-01") %>% ggplot(aes(x=date,y=HGasRLMoT_rel)) + geom_line()
# all_variables_combined %>% filter(date > "2021-01-01") %>% ggplot(aes(x=date,y=LGasRLMoT_rel)) + geom_line()
all_variables_combined_ar = all_variables_combined %>% select(-(contains("Gas",ignore.case=FALSE) & !contains("____lag")))
all_variables_combined_ar = all_variables_combined %>% select(-(contains("Gas",ignore.case=FALSE) & !contains("____lag")))
all_variables_combined = date_full %>%
left_join(final_weather_data, by = 'date') %>%
left_join(DAX_data, by = 'date') %>%
left_join(data_the_combined, by = 'date') %>%
left_join(data_the_combined_rel, by = 'date') %>%
left_join(Quarterly_GDP, by = 'date') %>%
left_join(gas_data, by = 'date') %>%
left_join(el_data, by = 'date') %>%
left_join(co2_data, by = 'date') %>%
rename(y = total) %>%
na.omit()
###### FEATURE ENGINEERING: ln,square,elasticity,infinity
transform_ln = function(data){
data %>% select(-contains("__"),-date,-y) %>% select_if(is.numeric) %>%
rename_all(~ . %.% "__ln") %>% mutate_all(~log(.+1-min(.))) %>% cbind(data)
}
transform_sqrd = function(data){
data %>% select(-contains("__"),-date,-y) %>% select_if(is.numeric) %>%
rename_all(~ . %.% "__sqrd") %>% mutate_all(~ .^2) %>% cbind(data)
}
transform_elas = function(data){
data %>% select(-contains("__"),-date,-y) %>% select_if(is.numeric) %>%
rename_all(~ . %.% "__elas") %>% mutate_all(~ 1/.^2) %>% cbind(data)
}
all_variables_combined %<>% transform_ln()
all_variables_combined %<>% transform_sqrd()
all_variables_combined %<>% transform_elas() %>%
mutate(across(.cols = everything(), ~ ifelse(is.infinite(.x), max(s(.x))*1000, .x)))
######### LAG: 7 day w/ na interpolation
transform_lag = function(data,lag=1){
data %>% select(-contains("____"),-date,-contains("dd_")) %>% select_if(is.numeric) %>%
rename_all(~ . %.% "____lag" %.% lag) %>% mutate_all(~ lag(.,order_by = data$date)) %>% cbind(data) %>%
mutate_all(~ na_interpolation(.))
}
all_variables_combined %<>% transform_lag(7)
all_variables_combined_midterm = all_variables_combined %>% select(-(contains("y_") & contains("____lag")))
all_variables_combined_ar = all_variables_combined %>% select(-(contains("Gas",ignore.case=FALSE) & !contains("____lag")))
all_variables_combined_ar = all_variables_combined %>% select(-((contains("Gas",ignore.case=FALSE) | contains("y_")) & !contains("____lag")))
library(tidyverse)
library(hablar)
library(lubridate)
library(magrittr)
library(imputeTS)
"%.%" = function(x,y){paste(x,y,sep = "")}
#setwd('/Users/finnkruger/Documents/GitHub/gasprices')
load('data-constr/final_weather_data.RData')
load('data-constr/DAX_data.RData')
load('data-constr/the_combined.Rdata')
load('data-constr/the_combined_rel.Rdata')
load('data-constr/Quarterly_GDP_data.Rdata')
load('data-constr/gas_price_data.Rdata')
load('data-constr/el_price_data.Rdata')
load('data-constr/co2_price_data.Rdata')
###### DATES & CYCLIC BEHAVIOR
startdate = as.Date("2017-11-01")
currentDate = Sys.Date()-1
date_full = seq(startdate, currentDate, by="days") %>% data.frame(date = .)
date_full %<>%
mutate(dd_day = day(date),
dd_month = month(date),
dd_year = year(date),
dd_weekday = wday(date),
dd_week = week(date))
transform_sin = function(data){
data %>% select_if(is.numeric) %>%
rename_all(~ . %.% "_sin") %>% mutate_all(~sin(2*pi*./max(.))) %>% cbind(data)
}
transform_cos = function(data){
data %>% select(-contains("_sin")) %>% select_if(is.numeric) %>%
rename_all(~ . %.% "_cos") %>% mutate_all(~cos(2*pi*./max(.))) %>% cbind(data)
}
date_full %<>% transform_sin()
date_full %<>% transform_cos() %>%
select(-contains("year_"))
####### MASTER JOINTZ, y, na omit
all_variables_combined = date_full %>%
left_join(final_weather_data, by = 'date') %>%
left_join(DAX_data, by = 'date') %>%
left_join(data_the_combined, by = 'date') %>%
left_join(data_the_combined_rel, by = 'date') %>%
left_join(Quarterly_GDP, by = 'date') %>%
left_join(gas_data, by = 'date') %>%
left_join(el_data, by = 'date') %>%
left_join(co2_data, by = 'date') %>%
rename(y = total) %>%
na.omit()
###### FEATURE ENGINEERING: ln,square,elasticity,infinity
transform_ln = function(data){
data %>% select(-contains("__"),-date,-y) %>% select_if(is.numeric) %>%
rename_all(~ . %.% "__ln") %>% mutate_all(~log(.+1-min(.))) %>% cbind(data)
}
transform_sqrd = function(data){
data %>% select(-contains("__"),-date,-y) %>% select_if(is.numeric) %>%
rename_all(~ . %.% "__sqrd") %>% mutate_all(~ .^2) %>% cbind(data)
}
transform_elas = function(data){
data %>% select(-contains("__"),-date,-y) %>% select_if(is.numeric) %>%
rename_all(~ . %.% "__elas") %>% mutate_all(~ 1/.^2) %>% cbind(data)
}
all_variables_combined %<>% transform_ln()
all_variables_combined %<>% transform_sqrd()
all_variables_combined %<>% transform_elas() %>%
mutate(across(.cols = everything(), ~ ifelse(is.infinite(.x), max(s(.x))*1000, .x)))
######### LAG: 7 day w/ na interpolation
transform_lag = function(data,lag=1){
data %>% select(-contains("____"),-date,-contains("dd_")) %>% select_if(is.numeric) %>%
rename_all(~ . %.% "____lag" %.% lag) %>% mutate_all(~ lag(.,order_by = data$date)) %>% cbind(data) %>%
mutate_all(~ na_interpolation(.))
}
all_variables_combined %<>% transform_lag(7)
all_variables_combined_midterm = all_variables_combined %>% select(-(contains("y_") & contains("____lag")))
all_variables_combined_ar = all_variables_combined %>% select(-((contains("Gas",ignore.case=FALSE) | contains("y_")) & !contains("____lag")))
save(all_variables_combined_midterm,file="data-constr/masters_jointz.RData")
all_variables_combined_ar %>% write_csv("data-constr/masters_jointz.csv")
save(all_variables_combined,file="data-constr/masters_jointz_ar.RData")
all_variables_combined_ar %>% write_csv("data-constr/masters_jointz_ar.csv")
arima = all_variables_combined %>%
select(y,date) %>%
transform_lag(1) %>%
transform_lag(2) %>%
transform_lag(3) %>%
transform_lag(4) %>%
transform_lag(5) %>%
transform_lag(6) %>%
transform_lag(7) #%>%
#select(-date)
save(arima,file="data-constr/masters_arima.RData")
arima %>% write_csv("data-constr/masters_arima.csv")
sarima = all_variables_combined %>%
select(y,date,contains("dd_")) %>%
transform_lag(1) %>%
transform_lag(2) %>%
transform_lag(3) %>%
transform_lag(4) %>%
transform_lag(5) %>%
transform_lag(6) %>%
transform_lag(7) #%>%
#select(-date)
save(sarima,file="data-constr/masters_sarima.RData")
sarima %>% write_csv("data-constr/masters_sarima.csv")
### Actual_Arima
actual_sarima = all_variables_combined %>%
select(y,date)
actual_sarima %>% write_csv("data-constr/actual_sarima.csv")
# ##### IGNORE EVERYTHING BELOW EXCEPT OTHERWISE told
# data_constructed = read_csv("../data-constr/the_base.csv", col_types = cols(Gasday = col_date(format = "%d.%m.%y")))
# data_weather = read_csv("../data-orig/weather_changes/export.csv")
#
# importIntoEnv()
#
# joinable_data <- data_weather %>%
# select(date, tavg, tmin, tmax)%>%
# rename(Gasday = date)
#
#
#
# including_weather <- data_constructed %>%
# left_join(joinable_data)
#
# final_weather_data <- df_hamburg%>%
# left_join(df_berlin, by = 'date')%>%
# left_join(df_munich, by = 'date')%>%
# left_join(df_cologne, by = 'date')%>%
# left_join(df_frankfurt, by = 'date')
#
#
#
#
#
#
#
#
#
# ############## Quartiles
#
# data_gas_price = read_csv("../data-constr/gasprice_imputed.csv")
#
#
# including_weather_price = including_weather %>%
# left_join(data_gas_price)
#
#
# #dont_work <- including_weather %>%
# # mutate(Quater =
# # case_when(
# ## Gasday <= 2022-01-01 ~ "Q1",
# # Gasday >= 2022-01-01 ~ "Q2",
# # ))
#
#
#
# including_weather_price %>%
# write_csv("../data-constr/masters_jointz.csv")
# all_variables_combined %>% filter(date > "2021-01-01") %>% ggplot(aes(x=date,y=HGasSLPsyn_rel)) + geom_line()
# all_variables_combined %>% filter(date > "2021-01-01") %>% ggplot(aes(x=date,y=LGasSLPsyn_rel)) + geom_line()
# all_variables_combined %>% filter(date > "2021-01-01") %>% ggplot(aes(x=date,y=HGasSLPana_rel)) + geom_line()
# all_variables_combined %>% filter(date > "2021-01-01") %>% ggplot(aes(x=date,y=LGasSLPana_rel)) + geom_line()
# all_variables_combined %>% filter(date > "2021-01-01") %>% ggplot(aes(x=date,y=HGasRLMmT_rel)) + geom_line()
# all_variables_combined %>% filter(date > "2021-01-01") %>% ggplot(aes(x=date,y=LGasRLMmT_rel)) + geom_line()
# all_variables_combined %>% filter(date > "2021-01-01") %>% ggplot(aes(x=date,y=HGasRLMoT_rel)) + geom_line()
# all_variables_combined %>% filter(date > "2021-01-01") %>% ggplot(aes(x=date,y=LGasRLMoT_rel)) + geom_line()
all_variables_combined_midterm %>% write_csv("data-constr/masters_jointz.csv")
date_full = seq(startdate, currentDate, by="days") %>% data.frame(date = .)
all_variables_combined_midterm %>% select(date)
all_variables_combined = date_full %>%
left_join(final_weather_data, by = 'date') %>%
left_join(DAX_data, by = 'date') %>%
left_join(data_the_combined, by = 'date') %>%
left_join(data_the_combined_rel, by = 'date') %>%
left_join(Quarterly_GDP, by = 'date') %>%
left_join(gas_data, by = 'date') %>%
left_join(el_data, by = 'date') %>%
left_join(co2_data, by = 'date') %>%
rename(y = total) %>%
na.omit()
###### FEATURE ENGINEERING: ln,square,elasticity,infinity
transform_ln = function(data){
data %>% select(-contains("__"),-date,-y) %>% select_if(is.numeric) %>%
rename_all(~ . %.% "__ln") %>% mutate_all(~log(.+1-min(.))) %>% cbind(data)
}
transform_sqrd = function(data){
data %>% select(-contains("__"),-date,-y) %>% select_if(is.numeric) %>%
rename_all(~ . %.% "__sqrd") %>% mutate_all(~ .^2) %>% cbind(data)
}
transform_elas = function(data){
data %>% select(-contains("__"),-date,-y) %>% select_if(is.numeric) %>%
rename_all(~ . %.% "__elas") %>% mutate_all(~ 1/.^2) %>% cbind(data)
}
all_variables_combined %<>% transform_ln()
all_variables_combined %<>% transform_sqrd()
all_variables_combined %<>% transform_elas() %>%
mutate(across(where(is.numeric), ~ ifelse(is.infinite(.x), max(s(.x))*1000, .x)))
######### LAG: 7 day w/ na interpolation
transform_lag = function(data,lag=1){
data %>% select(-contains("____"),-date,-contains("dd_")) %>% select_if(is.numeric) %>%
rename_all(~ . %.% "____lag" %.% lag) %>% mutate_all(~ lag(.,order_by = data$date)) %>% cbind(data) %>%
mutate_all(~ na_interpolation(.))
}
all_variables_combined %<>% transform_lag(7)
all_variables_combined_midterm = all_variables_combined %>% select(-(contains("y_") & contains("____lag")))
all_variables_combined_ar = all_variables_combined %>% select(-((contains("Gas",ignore.case=FALSE) | contains("y_")) & !contains("____lag")))
all_variables_combined_midterm %>% select(date)
as.numeric(Sys.Date())
save(all_variables_combined_midterm,file="data-constr/masters_jointz.RData")
all_variables_combined_midterm %>% write_csv("data-constr/masters_jointz.csv")
save(all_variables_combined,file="data-constr/masters_jointz_ar.RData")
all_variables_combined_ar %>% write_csv("data-constr/masters_jointz_ar.csv")
arima = all_variables_combined %>%
select(y,date) %>%
transform_lag(1) %>%
transform_lag(2) %>%
transform_lag(3) %>%
transform_lag(4) %>%
transform_lag(5) %>%
transform_lag(6) %>%
transform_lag(7) #%>%
#select(-date)
save(arima,file="data-constr/masters_arima.RData")
arima %>% write_csv("data-constr/masters_arima.csv")
sarima = all_variables_combined %>%
select(y,date,contains("dd_")) %>%
transform_lag(1) %>%
transform_lag(2) %>%
transform_lag(3) %>%
transform_lag(4) %>%
transform_lag(5) %>%
transform_lag(6) %>%
transform_lag(7) #%>%
#select(-date)
save(sarima,file="data-constr/masters_sarima.RData")
sarima %>% write_csv("data-constr/masters_sarima.csv")
### Actual_Arima
actual_sarima = all_variables_combined %>%
select(y,date) %>% mutate(date = as.numeric(Date))
actual_sarima = all_variables_combined %>%
select(y,date) %>% mutate(date = as.numeric(date))
actual_sarima %>% write_csv("data-constr/actual_sarima.csv")
data_the_combined %>% names()
data_the_combined_rel %>% names()