-
-
Notifications
You must be signed in to change notification settings - Fork 2
/
mixed-models.qmd
2650 lines (2263 loc) · 101 KB
/
mixed-models.qmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# Mixed Models {#sec-mixedModels}
## Getting Started {#sec-mixedModelsGettingStarted}
### Load Packages {#sec-mixedModelsLoadPackages}
```{r}
library("lme4")
library("lmerTest")
library("MuMIn")
library("emmeans")
library("sjstats")
library("mgcv")
library("AICcmodavg")
library("bbmle")
library("rstan")
library("brms")
library("cmdstanr") # todo: install.packages("cmdstanr", repos = c('https://stan-dev.r-universe.dev', getOption("repos"))); check_cmdstan_toolchain(); cmdstanr::install_cmdstan()
library("fitdistrplus")
library("parallel")
library("parallelly")
library("plotly")
library("viridis")
library("tidyverse")
```
### Specify Package Options {#sec-mixedModelsPackageOptions}
```{r}
emm_options(lmerTest.limit = 100000)
emm_options(pbkrtest.limit = 100000)
```
### Load Data {#sec-mixedModelsLoadData}
```{r}
#| eval: false
#| include: false
load(file = file.path(path, "/OneDrive - University of Iowa/Teaching/Courses/Fantasy Football/Data/nfl_depthCharts.RData", fsep = ""))
load(file = file.path(path, "/OneDrive - University of Iowa/Teaching/Courses/Fantasy Football/Data/player_stats_weekly.RData", fsep = ""))
load(file = file.path(path, "/OneDrive - University of Iowa/Teaching/Courses/Fantasy Football/Data/player_stats_seasonal.RData", fsep = ""))
load(file = file.path(path, "/OneDrive - University of Iowa/Teaching/Courses/Fantasy Football/Data/bayesianMixedModelFit.RData", fsep = ""))
```
```{r}
load(file = "./data/nfl_depthCharts.RData")
load(file = "./data/player_stats_weekly.RData")
load(file = "./data/player_stats_seasonal.RData")
```
We created the `player_stats_weekly.RData` and `player_stats_seasonal.RData` objects in @sec-calculatePlayerAge.
## Overview of Mixed Models {#sec-mixedModelsOverview}
We will discuss a modeling framework that goes by many terms, including mixed models, mixed-effects models, multilevel models, hierarchical linear models.
They are sometimes called multilevel models and hierarchical linear models, whose name emphasizes the hierarchical structure of the data because the data are nonindependent.
When observations (i.e., data points) are collected from multiple lower-level units (e.g., people) in an upper-level unit (e.g., married couple, family, classroom, school, neighborhood, team), the data from the lower-level units are considered "nested" within the upper-level unit.
In this context, *nested data* refers to multiple observations from the same upper-level unit.
For instance, longitudinal data are nested within the same participant.
Students are nested within classrooms, which are nested within schools.
Players are nested within teams.
When data are nested, the data from the lower-level unit are likely to be correlated, to some degree, because they come from the same upper-level unit.
For example, multiple players may come from the same team, and the players' performance on that team is likely interrelated because they share common experiences and influence one another.
Thus, data from multiple players on a given team are considered nested within that team.
Longitudinal data can also be considered nested data, in which time points are nested within the person (i.e., the same player provides an observation across multiple time points).
As we will discuss, it is important to account for levels of nesting when the observations are nonindependent.
These models are also sometimes called mixed models or mixed-effects models because the models can include a mix of fixed and random effects.
Fixed effects are effects that are constant across individuals (i.e., upper-level units).
Random effects are effects that vary across individuals (i.e., upper-level units).
For instance, consider a longitudinal study of fantasy performance as a function of age.
If we have longitudinal data for multiple players, the time points are nested within players.
Examining the association between age as a fixed effect in relation to fantasy performace would examine the assocication between a player's age and their fantasy performance while holding the association between age and fantasy performance constant across all players.
That is, it would assume that all players show the same trajectory such as increase for 4 years then decrease.
Examining the association between age as a random effect in relation to fantasy performace would examine the assocication between a player's age and their fantasy performance while allowing the association between age and fantasy performance to vary across players.
That is, it would allow the possibility that some players improve with age, whereas other players decline in performance with age.
When including random effects of a variable (e.g., age) in a mixed model, it is also important to include fixed effects of that variable in the model.
This is because random effects have a mean of zero.
Fixed effects allow the mean to differ from zero.
Thus, inclusion of random effects without the corresponding fixed effect can lead to bias in estimation of the association between the predictor variables and the outcome variable.
### Ecological Fallacy {#sec-ecologicalFallacy}
As described in @sec-fallaciesEcological, the ecological fallacy is the error of drawing inferences about an individual from group-level data.
An type of ecological fallacy is [Simpson's paradox](#sec-simpsonsParadox).
### Simpson's Paradox {#sec-simpsonsParadox}
Examples of Simpson's Paradox are depicted in Figures [-@fig-simpsonParadox] and [-@fig-simpsonsParadox].
In the example below, there is a positive association between the predictor variable and outcome variable for each group.
However, when the groups are combined, there is a *negative* association between predictor variable and outcome variable.
That is, the sign of an association can differ at different levels of analysis (e.g., group level versus person level).
::: {#fig-simpsonsParadox}
![](images/simpsonsParadox.png){fig-alt="An Example of Simpson's Paradox. In this example, there is a positive association between `x` and `y` within each group (i.e., red or blue), but there is a negative association between `x` and `y` when the groups are combined. (Figure retrieved from <https://en.wikipedia.org/wiki/File:Simpson%27s_paradox_continuous.svg>)"}
An Example of Simpson's Paradox. In this example, there is a positive association between `x` and `y` within each group (i.e., red or blue), but there is a negative association between `x` and `y` when the groups are combined. (Figure retrieved from <https://en.wikipedia.org/wiki/File:Simpson%27s_paradox_continuous.svg>)
:::
Consider that we observe a between-person association between a predictor, for example, how much sports drink a player drinks and their performance, such that players who drink more sports drink before games tend to perform better during games.
However, based on this association, if we draw the inference that sports drink consumption leads to better performance, this could be a faulty inference.
It is possible that, at the *within*-person level, there is no association or even a negative association between sports drink consumption and performance.
For helping to approximate causality, a much stronger test than relying on the between-person association would be to examine the association within the individual.
That is, examining the association within the individual would examine: when a player consumes more sports drink, whether they perform better than when the same player consumes less sports drink.
We describe within-person analyses to approximate causal inferences in @sec-causalInferenceWithinSubject.
In short, we often need to account for the groups or "levels" within which people are nested.
When multiple observations are nested within the same upper-level unit, they are often correlated.
That is, some variance is attributable to the lower-level unit (e.g., the player) and some variance is attributable to the upper-level unit (e.g., the team).
It is important to evaluate how much variance is attributable to the upper-level unit.
If substantial variance is attributable to the upper-level unit, it is important to account for the nested structure of the data.
Mixed models are a useful approach to account for data with a nested structure so you can avoid committing the ecological fallacy.
## Fantasy Points Per Season by Position, Age, and Experience {#sec-fantasyPointsByAgeExperience}
```{r}
player_stats_seasonal_offense_subset <- player_stats_seasonal %>%
dplyr::filter(position_group %in% c("QB","RB","WR","TE") | position %in% c("K"))
player_stats_seasonal_offense_subset$position[which(player_stats_seasonal_offense_subset$position == "HB")] <- "RB"
player_stats_seasonal_offense_subset$player_idFactor <- factor(player_stats_seasonal_offense_subset$player_id)
player_stats_seasonal_offense_subset$positionFactor <- factor(player_stats_seasonal_offense_subset$position)
```
```{r}
#| eval: false
#| include: false
for(i in unique(nfl_depthCharts$season)){
print(i)
nfl_depthCharts %>%
filter(season == i) %>%
select(week) %>%
arrange(week) %>%
unique() %>%
pull() %>%
print()
}
```
```{r}
seasons17week <- 2001:2020
seasons18week <- 2021:max(nfl_depthCharts$season, na.rm = TRUE)
endOfSeasonDepthCharts <- nfl_depthCharts %>%
filter((season %in% seasons17week & week == 18) | (season %in% seasons18week & week == 19)) # get end-of-season depth charts
qb1s <- endOfSeasonDepthCharts %>%
filter(position == "QB", depth_team == 1)
fb1s <- endOfSeasonDepthCharts %>%
filter(position == "FB", depth_team == 1)
k1s <- endOfSeasonDepthCharts %>%
filter(position == "K", depth_team == 1)
rb1s <- endOfSeasonDepthCharts %>%
filter(position == "RB", depth_team == 1)
wr1s <- endOfSeasonDepthCharts %>%
filter(position == "WR", depth_team == 1)
te1s <- endOfSeasonDepthCharts %>%
filter(position == "TE", depth_team == 1)
player_stats_seasonal_offense_subsetDepth <- player_stats_seasonal_offense_subset %>%
filter(player_id %in% c(
qb1s$gsis_id,
fb1s$gsis_id,
k1s$gsis_id,
rb1s$gsis_id,
wr1s$gsis_id,
te1s$gsis_id
))
```
Create a `newdata` object for generating the plots of model-implied fantasy points by age and position:
```{r}
pointsPerSeason_positionAge_newData <- expand.grid(
positionFactor = factor(c("FB","QB","RB","TE","WR")), #,"K"
age = seq(from = 20, to = 40, length.out = 10000)
)
pointsPerSeason_positionAge_newData$ageCentered20 <- pointsPerSeason_positionAge_newData$age - 20
pointsPerSeason_positionAge_newData$ageCentered20Quadratic <- pointsPerSeason_positionAge_newData$ageCentered20 ^ 2
pointsPerSeason_positionAge_newData$years_of_experience <- floor(pointsPerSeason_positionAge_newData$age - 22) # assuming that most players start at age 22 (i.e., rookie year) and thus have 1 year of experience at age 23
pointsPerSeason_positionAge_newData$years_of_experience[which(pointsPerSeason_positionAge_newData$years_of_experience < 0)] <- 0
```
Create an object with complete cases for generating the plots of individuals' model-implied fantasy points by age and position:
```{r}
player_stats_seasonal_offense_subsetCC <- player_stats_seasonal_offense_subset %>%
filter(
!is.na(player_idFactor),
!is.na(fantasyPoints),
!is.na(positionFactor),
!is.na(ageCentered20),
!is.na(ageCentered20Quadratic),
!is.na(years_of_experience))
```
### Scatterplots of Fantasy Points by Age and Position {#sec-fantasyPointsByAgeScatterplot}
[Scatterplots](#sec-scatterplot) are a helpful tool for quickly examining the association between two variables.
However, [scatterplots](#sec-scatterplot)—as well as correlation and multiple regression—can hide meaningful associations that differ across units of analysis.
#### Quarterbacks {#sec-fantasyPointsByAgeScatterplotQB}
A [scatterplot](#sec-scatterplot) of Quarterbacks' fantasy points by age is in @fig-fantasyPointsByAgeScatterplotQB.
```{r}
#| label: fig-fantasyPointsByAgeScatterplotQB
#| fig-cap: "Scatterplot of Fantasy Points by Age for Quarterbacks."
#| fig-alt: "Scatterplot of Fantasy Points by Age for Quarterbacks."
#| code-fold: true
plot_scatterplotFantasyPointsByAgeQB <- ggplot(
data = player_stats_seasonal_offense_subset %>%
filter(position == "QB") %>%
mutate(
age = round(age, 2),
fantasyPoints = round(fantasyPoints, 2)
),
mapping = aes(
x = age,
y = fantasyPoints,
color = player_id)) +
geom_point(
aes(
text = player_display_name, # add player name for mouse over tooltip
label = season # add season for mouse over tooltip
)) +
geom_smooth(
mapping = aes(
x = age,
y = fantasyPoints),
inherit.aes = FALSE
) +
scale_color_viridis(discrete = TRUE) +
labs(
x = "Player Age (years)",
y = "Fantasy Points (Season)",
title = "Fantasy Points (Season) by Player Age: Quarterbacks"
) +
theme_classic() +
theme(legend.position = "none")
ggplotly(
plot_scatterplotFantasyPointsByAgeQB,
tooltip = c("age","fantasyPoints","text","label"))
```
Based on the [scatterplot](#sec-scatterplot) (and the bivariate association below), Quarterbacks' fantasy points appear to increase with age.
```{r}
cor.test(
formula = ~ age + fantasyPoints,
data = player_stats_seasonal_offense_subset %>% filter(position == "QB")
)
```
#### Fullbacks {#sec-fantasyPointsByAgeScatterplotFB}
A [scatterplot](#sec-scatterplot) of Fullbacks' fantasy points by age is in @fig-fantasyPointsByAgeScatterplotFB.
```{r}
#| label: fig-fantasyPointsByAgeScatterplotFB
#| fig-cap: "Scatterplot of Fantasy Points by Age for Fullbacks."
#| fig-alt: "Scatterplot of Fantasy Points by Age for Fullbacks."
#| code-fold: true
plot_scatterplotFantasyPointsByAgeFB <- ggplot(
data = player_stats_seasonal_offense_subset %>%
filter(position == "FB") %>%
mutate(
age = round(age, 2),
fantasyPoints = round(fantasyPoints, 2)
),
mapping = aes(
x = age,
y = fantasyPoints,
color = player_id)) +
geom_point(
aes(
text = player_display_name, # add player name for mouse over tooltip
label = season # add season for mouse over tooltip
)) +
geom_smooth(
mapping = aes(
x = age,
y = fantasyPoints),
inherit.aes = FALSE
) +
scale_color_viridis(discrete = TRUE) +
labs(
x = "Player Age (years)",
y = "Fantasy Points (Season)",
title = "Fantasy Points (Season) by Player Age: Fullbacks"
) +
theme_classic() +
theme(legend.position = "none")
ggplotly(
plot_scatterplotFantasyPointsByAgeFB,
tooltip = c("age","fantasyPoints","text","label"))
```
Based on the [scatterplot](#sec-scatterplot), Fullbacks' fantasy points appear to be relatively stable across ages.
#### Running Backs {#sec-fantasyPointsByAgeScatterplotRB}
A [scatterplot](#sec-scatterplot) of Running Backs' fantasy points by age is in @fig-fantasyPointsByAgeScatterplotRB.
```{r}
#| label: fig-fantasyPointsByAgeScatterplotRB
#| fig-cap: "Scatterplot of Fantasy Points by Age for Running Backs."
#| fig-alt: "Scatterplot of Fantasy Points by Age for Running Backs."
#| code-fold: true
plot_scatterplotFantasyPointsByAgeRB <- ggplot(
data = player_stats_seasonal_offense_subset %>%
filter(position == "RB") %>%
mutate(
age = round(age, 2),
fantasyPoints = round(fantasyPoints, 2)
),
mapping = aes(
x = age,
y = fantasyPoints,
color = player_id)) +
geom_point(
aes(
text = player_display_name, # add player name for mouse over tooltip
label = season # add season for mouse over tooltip
)) +
geom_smooth(
mapping = aes(
x = age,
y = fantasyPoints),
inherit.aes = FALSE
) +
scale_color_viridis(discrete = TRUE) +
labs(
x = "Player Age (years)",
y = "Fantasy Points (Season)",
title = "Fantasy Points (Season) by Player Age: Running Backs"
) +
theme_classic() +
theme(legend.position = "none")
ggplotly(
plot_scatterplotFantasyPointsByAgeRB,
tooltip = c("age","fantasyPoints","text","label"))
```
Based on the [scatterplot](#sec-scatterplot), Running Backs' fantasy points appear to be relatively stable across ages.
#### Wide Receivers {#sec-fantasyPointsByAgeScatterplotWR}
A [scatterplot](#sec-scatterplot) of Wide Receivers' fantasy points by age is in @fig-fantasyPointsByAgeScatterplotWR.
```{r}
#| label: fig-fantasyPointsByAgeScatterplotWR
#| fig-cap: "Scatterplot of Fantasy Points by Age for Wide Receivers."
#| fig-alt: "Scatterplot of Fantasy Points by Age for Wide Receivers."
#| code-fold: true
plot_scatterplotFantasyPointsByAgeWR <- ggplot(
data = player_stats_seasonal_offense_subset %>%
filter(position == "WR") %>%
mutate(
age = round(age, 2),
fantasyPoints = round(fantasyPoints, 2)
),
mapping = aes(
x = age,
y = fantasyPoints,
color = player_id)) +
geom_point(
aes(
text = player_display_name, # add player name for mouse over tooltip
label = season # add season for mouse over tooltip
)) +
geom_smooth(
mapping = aes(
x = age,
y = fantasyPoints),
inherit.aes = FALSE
) +
scale_color_viridis(discrete = TRUE) +
labs(
x = "Player Age (years)",
y = "Fantasy Points (Season)",
title = "Fantasy Points (Season) by Player Age: Wide Receivers"
) +
theme_classic() +
theme(legend.position = "none")
ggplotly(
plot_scatterplotFantasyPointsByAgeWR,
tooltip = c("age","fantasyPoints","text","label"))
```
Based on the [scatterplot](#sec-scatterplot), Wide Receivers' fantasy points appear to be relatively stable across ages.
#### Tight Ends {#sec-fantasyPointsByAgeScatterplotTE}
A [scatterplot](#sec-scatterplot) of Tight Ends' fantasy points by age is in @fig-fantasyPointsByAgeScatterplotTE.
```{r}
#| label: fig-fantasyPointsByAgeScatterplotTE
#| fig-cap: "Scatterplot of Fantasy Points by Age for Tight Ends."
#| fig-alt: "Scatterplot of Fantasy Points by Age for Tight Ends."
#| code-fold: true
plot_scatterplotFantasyPointsByAgeTE <- ggplot(
data = player_stats_seasonal_offense_subset %>%
filter(position == "TE") %>%
mutate(
age = round(age, 2),
fantasyPoints = round(fantasyPoints, 2)
),
mapping = aes(
x = age,
y = fantasyPoints,
color = player_id)) +
geom_point(
aes(
text = player_display_name, # add player name for mouse over tooltip
label = season # add season for mouse over tooltip
)) +
geom_smooth(
mapping = aes(
x = age,
y = fantasyPoints),
inherit.aes = FALSE
) +
scale_color_viridis(discrete = TRUE) +
labs(
x = "Player Age (years)",
y = "Fantasy Points (Season)",
title = "Fantasy Points (Season) by Player Age: Tight Ends"
) +
theme_classic() +
theme(legend.position = "none")
ggplotly(
plot_scatterplotFantasyPointsByAgeTE,
tooltip = c("age","fantasyPoints","text","label"))
```
Based on the [scatterplot](#sec-scatterplot) (and the bivariate association below), Tight Ends' fantasy points appear to increase with age.
```{r}
cor.test(
formula = ~ age + fantasyPoints,
data = player_stats_seasonal_offense_subset %>% filter(position == "TE")
)
```
### Plots of Raw Trajectories of Fantasy Points By Age and Player {#sec-fantasyPointsByAgeExperienceRawData}
[Scatterplots](#sec-scatterplot) can be helpful for quickly visualizing the association between two variables.
However, as mentioned earlier, [scatterplots](#sec-scatterplot) can hide the association between variables at different units of analysis.
For instance, consider that we are trying to predict how a player will perform based on their age.
We are interested not only in what the association is between age and fantasy points between players (i.e., a between-person association).
We are also interested in what the association is between age and fantasy points *within* a given player (and within each player; i.e., a within-individual association).
Arguably, the within-individual association between age and fantasy points is more relevant to the prediction of performance than the association between age and fantasy points between players.
Assuming that the between-player association between age and fantasy points is the same as the within-player association when it is not is an example of the [ecological fallacy](#sec-fallaciesEcological).
Below, we depict players' raw trajectories of fantasy points as a function of age.
These are known as spaghetti plots.
By examining the trajectory for each player, we can get a better understanding of hor performance changes (within an individual) as a function of age.
#### Quarterbacks {#sec-fantasyPointsByAgeExperienceRawDataQB}
A plot of Quarterbacks' raw fantasy points data by age is in @fig-rawFantasyPointsByAgeQB.
```{r}
#| label: fig-rawFantasyPointsByAgeQB
#| fig-cap: "Plot of Raw Trajectories of Fantasy Points by Age for Quarterbacks."
#| fig-alt: "Plot of Raw Trajectories of Fantasy Points by Age for Quarterbacks."
#| code-fold: true
plot_rawFantasyPointsByAgeQB <- ggplot(
data = player_stats_seasonal_offense_subset %>%
filter(position == "QB") %>%
mutate(
age = round(age, 2),
fantasyPoints = round(fantasyPoints, 2)
),
mapping = aes(
x = age,
y = fantasyPoints,
color = player_id)) +
geom_line(
aes(
text = player_display_name, # add player name for mouse over tooltip
label = season # add season for mouse over tooltip
)) +
scale_color_viridis(discrete = TRUE) +
labs(
x = "Player Age (years)",
y = "Fantasy Points (Season)",
title = "Fantasy Points (Season) by Player Age: Quarterbacks"
) +
theme_classic() +
theme(legend.position = "none")
ggplotly(
plot_rawFantasyPointsByAgeQB,
tooltip = c("age","fantasyPoints","text","label"))
```
#### Fullbacks {#sec-fantasyPointsByAgeExperienceRawDataFB}
A plot of Fullbacks' raw fantasy points data by age is in @fig-rawFantasyPointsByAgeFB.
```{r}
#| label: fig-rawFantasyPointsByAgeFB
#| fig-cap: "Plot of Raw Trajectories of Fantasy Points by Age for Fullbacks."
#| fig-alt: "Plot of Raw Trajectories of Fantasy Points by Age for Fullbacks."
#| code-fold: true
plot_rawFantasyPointsByAgeFB <- ggplot(
data = player_stats_seasonal_offense_subset %>%
filter(position == "FB") %>%
mutate(
age = round(age, 2),
fantasyPoints = round(fantasyPoints, 2)
),
mapping = aes(
x = age,
y = fantasyPoints,
color = player_id)) +
geom_line(
aes(
text = player_display_name, # add player name for mouse over tooltip
label = season # add season for mouse over tooltip
)) +
scale_color_viridis(discrete = TRUE) +
labs(
x = "Player Age (years)",
y = "Fantasy Points (Season)",
title = "Fantasy Points (Season) by Player Age: Fullbacks"
) +
theme_classic() +
theme(legend.position = "none")
ggplotly(
plot_rawFantasyPointsByAgeFB,
tooltip = c("age","fantasyPoints","text","label"))
```
#### Running Backs {#sec-fantasyPointsByAgeExperienceRawDataRB}
A plot of Running Backs' raw fantasy points data by age is in @fig-rawFantasyPointsByAgeRB.
```{r}
#| label: fig-rawFantasyPointsByAgeRB
#| fig-cap: "Plot of Raw Trajectories of Fantasy Points by Age for Running Backs."
#| fig-alt: "Plot of Raw Trajectories of Fantasy Points by Age for Running Backs."
#| code-fold: true
plot_rawFantasyPointsByAgeRB <- ggplot(
data = player_stats_seasonal_offense_subset %>%
filter(position == "RB") %>%
mutate(
age = round(age, 2),
fantasyPoints = round(fantasyPoints, 2)
),
mapping = aes(
x = age,
y = fantasyPoints,
color = player_id)) +
geom_line(
aes(
text = player_display_name, # add player name for mouse over tooltip
label = season # add season for mouse over tooltip
)) +
scale_color_viridis(discrete = TRUE) +
labs(
x = "Player Age (years)",
y = "Fantasy Points (Season)",
title = "Fantasy Points (Season) by Player Age: Running Backs"
) +
theme_classic() +
theme(legend.position = "none")
ggplotly(
plot_rawFantasyPointsByAgeRB,
tooltip = c("age","fantasyPoints","text","label"))
```
#### Wide Receivers {#sec-fantasyPointsByAgeExperienceRawDataWR}
A plot of Wide Receivers' raw fantasy points data by age is in @fig-rawFantasyPointsByAgeWR.
```{r}
#| label: fig-rawFantasyPointsByAgeWR
#| fig-cap: "Plot of Raw Trajectories of Fantasy Points by Age for Wide Receivers."
#| fig-alt: "Plot of Raw Trajectories of Fantasy Points by Age for Wide Receivers."
#| code-fold: true
plot_rawFantasyPointsByAgeWR <- ggplot(
data = player_stats_seasonal_offense_subset %>%
filter(position == "WR") %>%
mutate(
age = round(age, 2),
fantasyPoints = round(fantasyPoints, 2)
),
mapping = aes(
x = age,
y = fantasyPoints,
color = player_id)) +
geom_line(
aes(
text = player_display_name, # add player name for mouse over tooltip
label = season # add season for mouse over tooltip
)) +
scale_color_viridis(discrete = TRUE) +
labs(
x = "Player Age (years)",
y = "Fantasy Points (Season)",
title = "Fantasy Points (Season) by Player Age: Wide Receivers"
) +
theme_classic() +
theme(legend.position = "none")
ggplotly(
plot_rawFantasyPointsByAgeWR,
tooltip = c("age","fantasyPoints","text","label"))
```
#### Tight Ends {#sec-fantasyPointsByAgeExperienceRawDataTE}
A plot of Tight Ends' raw fantasy points data by age is in @fig-rawFantasyPointsByAgeTE.
```{r}
#| label: fig-rawFantasyPointsByAgeTE
#| fig-cap: "Plot of Raw Trajectories of Fantasy Points by Age for Tight Ends."
#| fig-alt: "Plot of Raw Trajectories of Fantasy Points by Age for Tight Ends."
#| code-fold: true
plot_rawFantasyPointsByAgeTE <- ggplot(
data = player_stats_seasonal_offense_subset %>%
filter(position == "TE") %>%
mutate(
age = round(age, 2),
fantasyPoints = round(fantasyPoints, 2)
),
mapping = aes(
x = age,
y = fantasyPoints,
color = player_id)) +
geom_line(
aes(
text = player_display_name, # add player name for mouse over tooltip
label = season # add season for mouse over tooltip
)) +
scale_color_viridis(discrete = TRUE) +
labs(
x = "Player Age (years)",
y = "Fantasy Points (Season)",
title = "Fantasy Points (Season) by Player Age: Tight Ends"
) +
theme_classic() +
theme(legend.position = "none")
ggplotly(
plot_rawFantasyPointsByAgeTE,
tooltip = c("age","fantasyPoints","text","label"))
```
### Linear Regression Models {#sec-fantasyPointsByAgeExperienceModelsRegression}
#### Null Model {#sec-fantasyPointsByAgeExperienceModelsRegressionNull}
```{r}
pointsPerSeason_nullModel <- lm(
fantasyPoints ~ 1,
data = player_stats_seasonal_offense_subset,
na.action = "na.exclude"
)
summary(pointsPerSeason_nullModel)
summary(pointsPerSeason_nullModel)$r.squared
AIC(pointsPerSeason_nullModel)
MuMIn::AICc(pointsPerSeason_nullModel)
```
A plot of the model-implied trajectories of fantasy points by age from the null model is in @fig-fantasyPointsNull.
```{r}
#| label: fig-fantasyPointsNull
#| fig-cap: "Plot of Model-Implied Trajectories of Fantasy Points by Age in Null Model."
#| fig-alt: "Plot of Model-Implied Trajectories of Fantasy Points by Age in Null Model."
#| code-fold: true
pointsPerSeason_positionAge_newData$fantasyPoints_nullModel <- predict(
object = pointsPerSeason_nullModel,
newdata = pointsPerSeason_positionAge_newData
)
ggplot2::ggplot(
data = pointsPerSeason_positionAge_newData,
mapping = aes(
x = age,
y = fantasyPoints_nullModel
)
) +
geom_line(linewidth = 2) +
labs(
x = "Player Age (years)",
y = "Fantasy Points (Season)",
title = "Fantasy Points (Season) by Player Age and Position",
subtitle = "Null Model"
) +
theme_classic()
```
#### Linear Model {#sec-fantasyPointsByAgeExperienceModelsRegressionLinear}
```{r}
pointsPerSeason_linearRegression <- lm(
fantasyPoints ~ positionFactor + ageCentered20 + positionFactor:ageCentered20,
data = player_stats_seasonal_offense_subset,
na.action = "na.exclude"
)
summary(pointsPerSeason_linearRegression)
summary(pointsPerSeason_linearRegression)$r.squared
AIC(pointsPerSeason_linearRegression)
MuMIn::AICc(pointsPerSeason_linearRegression)
```
A plot of the model-implied trajectories of fantasy points by age from the linear regression model is in @fig-fantasyPointsLinearRegression.
```{r}
#| label: fig-fantasyPointsLinearRegression
#| fig-cap: "Plot of Model-Implied Trajectories of Fantasy Points by Age in Linear Regression Model."
#| fig-alt: "Plot of Model-Implied Trajectories of Fantasy Points by Age in Linear Regression Model."
#| code-fold: true
pointsPerSeason_positionAge_newData$fantasyPoints_linearRegression <- predict(
object = pointsPerSeason_linearRegression,
newdata = pointsPerSeason_positionAge_newData
)
ggplot2::ggplot(
data = pointsPerSeason_positionAge_newData,
mapping = aes(
x = age,
y = fantasyPoints_linearRegression,
color = positionFactor
)
) +
geom_line(linewidth = 2) +
labs(
x = "Player Age (years)",
y = "Fantasy Points (Season)",
title = "Fantasy Points (Season) by Player Age and Position",
subtitle = "Linear Regression Model",
color = "Position"
) +
theme_classic()
```
#### Quadratic Model {#sec-fantasyPointsByAgeExperienceModelsRegressionQuadratic}
```{r}
pointsPerSeason_quadraticRegression <- lm(
fantasyPoints ~ positionFactor + ageCentered20 + ageCentered20Quadratic + positionFactor:ageCentered20 + positionFactor:ageCentered20Quadratic,
data = player_stats_seasonal_offense_subset,
na.action = "na.exclude"
)
summary(pointsPerSeason_quadraticRegression)
summary(pointsPerSeason_quadraticRegression)$r.squared
AIC(pointsPerSeason_quadraticRegression)
MuMIn::AICc(pointsPerSeason_quadraticRegression)
```
A plot of the model-implied trajectories of fantasy points by age from the regression model with a quadratic term for age is in @fig-fantasyPointsQuadraticRegression.
```{r}
#| label: fig-fantasyPointsQuadraticRegression
#| fig-cap: "Plot of Model-Implied Trajectories of Fantasy Points by Age in Quadratic Regression Model."
#| fig-alt: "Plot of Model-Implied Trajectories of Fantasy Points by Age in Quadratic Regression Model."
#| code-fold: true
pointsPerSeason_positionAge_newData$fantasyPoints_quadraticRegression <- predict(
object = pointsPerSeason_quadraticRegression,
newdata = pointsPerSeason_positionAge_newData
)
ggplot2::ggplot(
data = pointsPerSeason_positionAge_newData,
mapping = aes(
x = age,
y = fantasyPoints_quadraticRegression,
color = positionFactor
)
) +
geom_line(linewidth = 2) +
labs(
x = "Player Age (years)",
y = "Fantasy Points (Season)",
title = "Fantasy Points (Season) by Player Age and Position",
subtitle = "Quadratic Regression Model",
color = "Position"
) +
theme_classic()
```
#### Compare Models {#sec-fantasyPointsByAgeExperienceModelsRegressionCompare}
```{r}
anova(
pointsPerSeason_linearRegression,
pointsPerSeason_quadraticRegression
)
AIC(
pointsPerSeason_nullModel,
pointsPerSeason_linearRegression,
pointsPerSeason_quadraticRegression
)
lmModels <- list(
"nullModel" = pointsPerSeason_nullModel,
"linearRegression" = pointsPerSeason_linearRegression,
"quadraticRegression" = pointsPerSeason_quadraticRegression
)
bbmle::AICtab(lmModels)
MuMIn::AICc(
pointsPerSeason_nullModel,
pointsPerSeason_linearRegression,
pointsPerSeason_quadraticRegression
)
summary(pointsPerSeason_nullModel)$r.squared
summary(pointsPerSeason_linearRegression)$r.squared
summary(pointsPerSeason_quadraticRegression)$r.squared
deviance(pointsPerSeason_nullModel)
deviance(pointsPerSeason_linearRegression)
deviance(pointsPerSeason_quadraticRegression)
logLik(pointsPerSeason_nullModel)
logLik(pointsPerSeason_linearRegression)
logLik(pointsPerSeason_quadraticRegression)
```
### Mixed Models {#sec-fantasyPointsByAgeExperienceModels}
By accounting for which player each observation comes from using mixed models, we can examine the association between age and fantasy points in a more meaningful way, without violating the [assumption in multiple regression](#sec-assumptionsRegression) that the observations are independent (i.e., that the residuals are uncorrelated).
#### Random Intercepts Model {#sec-fantasyPointsByAgeExperienceModelsIntercepts}
```{r}
pointsPerSeason_randomIntercepts <- lmerTest::lmer(
fantasyPoints ~ 1 + (1 | player_idFactor),
data = player_stats_seasonal_offense_subset,
REML = FALSE,
control = lmerControl(optimizer = "bobyqa")
)
summary(pointsPerSeason_randomIntercepts)
MuMIn::r.squaredGLMM(pointsPerSeason_randomIntercepts)
performance::icc(pointsPerSeason_randomIntercepts)
AIC(pointsPerSeason_randomIntercepts)
AICc(pointsPerSeason_randomIntercepts)
```
A plot of the model-implied trajectories of fantasy points by age from the mixed model with random intercepts is in @fig-fantasyPointsRandomIntercepts.
```{r}
#| label: fig-fantasyPointsRandomIntercepts
#| fig-cap: "Plot of Model-Implied Trajectories of Fantasy Points by Age in Random Intercepts Mixed Model."
#| fig-alt: "Plot of Model-Implied Trajectories of Fantasy Points by Age in Random Intercepts Mixed Model."
#| code-fold: true
pointsPerSeason_positionAge_newData$fantasyPoints_randomIntercepts <- predict(
object = pointsPerSeason_randomIntercepts,
newdata = pointsPerSeason_positionAge_newData,
re.form = NA
)
ggplot2::ggplot(
data = pointsPerSeason_positionAge_newData,
mapping = aes(
x = age,
y = fantasyPoints_randomIntercepts
)
) +
geom_line(linewidth = 2) +
labs(
x = "Player Age (years)",
y = "Fantasy Points (Season)",
title = "Fantasy Points (Season) by Player Age",
subtitle = "Random Intercepts Model"
) +
theme_classic()
```
A plot of individuals' model-implied trajectories of fantasy points by age from the mixed model with random intercepts is in @fig-individualFantasyPointsRandomIntercepts.
```{r}
#| label: fig-individualFantasyPointsRandomIntercepts
#| fig-cap: "Plot of Individuals' Implied Trajectories of Fantasy Points by Age, from a Mixed Model with Random Intercepts. Overlaid with the Model-Implied Trajectory."
#| fig-alt: "Plot of Individuals' Implied Trajectories of Fantasy Points by Age, from a Mixed Model with Random Intercepts. Overlaid with the Model-Implied Trajectory."
#| code-fold: true
player_stats_seasonal_offense_subsetCC$fantasyPoints_randomIntercepts <- predict(
object = pointsPerSeason_randomIntercepts,
newdata = player_stats_seasonal_offense_subsetCC
)
plot_individualFantasyPointsRandomIntercepts <- ggplot(
data = player_stats_seasonal_offense_subsetCC %>%
mutate(
age = round(age, 2),
fantasyPoints_randomIntercepts = round(fantasyPoints_randomIntercepts, 2)
),
mapping = aes(
x = age,
y = fantasyPoints_randomIntercepts,
group = player_id)) +
geom_line(
aes(
x = age,
y = fantasyPoints_randomIntercepts,
text = player_display_name, # add player name for mouse over tooltip
label = season # add season for mouse over tooltip
),
linewidth = 0.5,
color = "black") +
geom_line(
mapping = aes(
x = age,
y = fantasyPoints_randomIntercepts
),
data = pointsPerSeason_positionAge_newData %>%
mutate(
age = round(age, 2),
fantasyPoints = round(fantasyPoints_randomIntercepts, 2)
),
inherit.aes = FALSE,
se = TRUE,
color = "#3366FF",
linewidth = 2
) +
labs(
x = "Player Age (years)",
y = "Fantasy Points (Season)",
title = "Fantasy Points (Season) by Player Age and Position: Random Intercepts Model",
#color = "Position"
) +
theme_classic()
ggplotly(
plot_individualFantasyPointsRandomIntercepts,
tooltip = c("age","fantasyPoints_randomIntercepts","text","label")
)
```
#### Random Intercepts Model with Position as Fixed-Effect Predictor {#sec-fantasyPointsByAgeExperienceModelsPosition}
```{r}
pointsPerSeason_position <- lmerTest::lmer(
fantasyPoints ~ positionFactor + (1 | player_idFactor),
data = player_stats_seasonal_offense_subset,
REML = FALSE,
control = lmerControl(optimizer = "bobyqa")
)
summary(pointsPerSeason_position)
MuMIn::r.squaredGLMM(pointsPerSeason_position)
emmeans::emmeans(pointsPerSeason_position, "positionFactor")
performance::icc(pointsPerSeason_position)
AIC(pointsPerSeason_position)
```
A plot of the model-implied trajectories of fantasy points by age and position from the mixed model with random intercepts and a fixed effect of position is in @fig-fantasyPointsPosition.
```{r}
#| label: fig-fantasyPointsPosition
#| fig-cap: "Plot of Model-Implied Trajectories of Fantasy Points by Age in Random Intercepts Mixed Model With Position as a Fixed-Effect Predictor."
#| fig-alt: "Plot of Model-Implied Trajectories of Fantasy Points by Age in Random Intercepts Mixed Model With Position as a Fixed-Effect Predictor."
#| code-fold: true