diff --git a/404.html b/404.html index 8bfac0a8..e48d3358 100644 --- a/404.html +++ b/404.html @@ -34,7 +34,7 @@ nflfastR - 4.6.1.9011 + 4.6.1.9012 diff --git a/LICENSE-text.html b/LICENSE-text.html index 4ea40f54..b9cbd00e 100644 --- a/LICENSE-text.html +++ b/LICENSE-text.html @@ -7,7 +7,7 @@ nflfastR - 4.6.1.9011 + 4.6.1.9012 diff --git a/LICENSE.html b/LICENSE.html index 7c17cd65..8e8252f0 100644 --- a/LICENSE.html +++ b/LICENSE.html @@ -7,7 +7,7 @@ nflfastR - 4.6.1.9011 + 4.6.1.9012 diff --git a/articles/beginners_guide.html b/articles/beginners_guide.html index b082f96c..dfcf7788 100644 --- a/articles/beginners_guide.html +++ b/articles/beginners_guide.html @@ -29,7 +29,7 @@ nflfastR - 4.6.1.9011 + 4.6.1.9012 @@ -668,7 +668,7 @@ Figures with QB stats load_teams() #> ── nflverse team graphics ────────────────────────────────────────────────────── -#> ℹ Data updated: 2024-08-02 17:34:42 UTC +#> ℹ Data updated: 2024-08-12 18:41:37 UTC #> # A tibble: 32 × 16 #> team_abbr team_name team_id team_nick team_conf team_division team_color #> <chr> <chr> <chr> <chr> <chr> <chr> <chr> @@ -918,7 +918,7 @@ Get team wins each season#> $ stadium_id : chr [1:6978] "ATL00" "CHI98" "CLE00" "GNB00" ... #> $ stadium : chr [1:6978] "Georgia Dome" "Soldier Field" "Cleveland Browns Stadium" "Lambeau Field" ... #> - attr(*, "nflverse_type")= chr "games and schedules" -#> - attr(*, "nflverse_timestamp")= POSIXct[1:1], format: "2024-08-02 17:35:13" +#> - attr(*, "nflverse_timestamp")= POSIXct[1:1], format: "2024-08-12 18:42:06" To start, we want to create a dataframe where each row is a team-season observation, listing how many games they won. There are multiple ways to do this, but I’m going to just take the home and away @@ -931,7 +931,7 @@ Get team wins each season rename(team = home_team) home %>% head(5) #> ── nflverse games and schedules ──────────────────────────────────────────────── -#> ℹ Data updated: 2024-08-02 17:35:13 UTC +#> ℹ Data updated: 2024-08-12 18:42:06 UTC #> # A tibble: 5 × 4 #> season week team result #> <int> <int> <chr> <int> @@ -950,7 +950,7 @@ Get team wins each season mutate(result = -result) away %>% head(5) #> ── nflverse games and schedules ──────────────────────────────────────────────── -#> ℹ Data updated: 2024-08-02 17:35:13 UTC +#> ℹ Data updated: 2024-08-12 18:42:06 UTC #> # A tibble: 5 × 4 #> season week team result #> <int> <int> <chr> <int> @@ -975,7 +975,7 @@ Get team wins each season results %>% filter(season == 2019 & team == 'SEA') #> ── nflverse games and schedules ──────────────────────────────────────────────── -#> ℹ Data updated: 2024-08-02 17:35:13 UTC +#> ℹ Data updated: 2024-08-12 18:42:06 UTC #> # A tibble: 16 × 5 #> season week team result win #> <int> <int> <chr> <int> <dbl> diff --git a/articles/field_descriptions.html b/articles/field_descriptions.html index 4a5593ce..5da90fa1 100644 --- a/articles/field_descriptions.html +++ b/articles/field_descriptions.html @@ -29,7 +29,7 @@ nflfastR - 4.6.1.9011 + 4.6.1.9012 diff --git a/articles/index.html b/articles/index.html index 0e5e14ae..7db9fe3e 100644 --- a/articles/index.html +++ b/articles/index.html @@ -7,7 +7,7 @@ nflfastR - 4.6.1.9011 + 4.6.1.9012 diff --git a/articles/nflfastR.html b/articles/nflfastR.html index 8eef379f..0ea69fde 100644 --- a/articles/nflfastR.html +++ b/articles/nflfastR.html @@ -29,7 +29,7 @@ nflfastR - 4.6.1.9011 + 4.6.1.9012 @@ -124,27 +124,27 @@ The Main Functionsdplyr::filter(game_type == "SB") %>% dplyr::pull(game_id) pbp <- nflfastR::build_nflfastR_pbp(ids) -#> ── Build nflfastR Play-by-Play Data ───────────── nflfastR version 4.6.1.9011 ── -#> • 17:36:21 | Start download of 3 games... -#> ✔ 17:36:26 | Download finished. Adding variables... -#> ✔ 17:36:26 | added game variables -#> ✔ 17:36:27 | added nflscrapR variables -#> ✔ 17:36:27 | added ep variables -#> ✔ 17:36:27 | added air_yac_ep variables -#> ✔ 17:36:28 | added wp variables -#> ✔ 17:36:28 | added air_yac_wp variables -#> ✔ 17:36:28 | added cp and cpoe -#> ✔ 17:36:28 | added fixed drive variables -#> ✔ 17:36:28 | added series variables -#> • 17:36:28 | Cleaning up play-by-play... -#> ✔ 17:36:28 | Cleaning completed -#> ✔ 17:36:28 | added qb_epa -#> • 17:36:28 | Computing xyac... -#> ✔ 17:36:30 | added xyac variables -#> • 17:36:30 | Computing xpass... -#> ✔ 17:36:31 | added xpass and pass_oe -#> • 17:36:31 | Decode player ids... -#> ✔ 17:36:32 | Decoding of player ids completed +#> ── Build nflfastR Play-by-Play Data ───────────── nflfastR version 4.6.1.9012 ── +#> • 18:43:07 | Start download of 3 games... +#> ✔ 18:43:12 | Download finished. Adding variables... +#> ✔ 18:43:12 | added game variables +#> ✔ 18:43:13 | added nflscrapR variables +#> ✔ 18:43:13 | added ep variables +#> ✔ 18:43:13 | added air_yac_ep variables +#> ✔ 18:43:14 | added wp variables +#> ✔ 18:43:14 | added air_yac_wp variables +#> ✔ 18:43:14 | added cp and cpoe +#> ✔ 18:43:14 | added fixed drive variables +#> ✔ 18:43:14 | added series variables +#> • 18:43:14 | Cleaning up play-by-play... +#> ✔ 18:43:14 | Cleaning completed +#> ✔ 18:43:14 | added qb_epa +#> • 18:43:14 | Computing xyac... +#> ✔ 18:43:16 | added xyac variables +#> • 18:43:17 | Computing xpass... +#> ✔ 18:43:17 | added xpass and pass_oe +#> • 18:43:17 | Decode player ids... +#> ✔ 18:43:18 | Decoding of player ids completed #> ── DONE ──────────────────────────────────────────────────────────────────────── In most cases, however, it is not necessary to use this function for individual games, because nflfastR provides both a data repository and @@ -359,7 +359,7 @@ Exampl tictoc::tic(glue::glue("{length(games_2019)} games with nflfastR:")) f <- nflfastR::fast_scraper(games_2019) tictoc::toc() -#> 10 games with nflfastR:: 8.106 sec elapsed +#> 10 games with nflfastR:: 8.25 sec elapsed Example 3: Completion Percentage Over Expected (CPOE) @@ -375,7 +375,7 @@ Example 3: Completio tictoc::tic("loading all games from 2009") games_2009 <- nflfastR::load_pbp(2009) %>% dplyr::filter(season_type == "REG") tictoc::toc() -#> loading all games from 2009: 2.198 sec elapsed +#> loading all games from 2009: 2.088 sec elapsed games_2009 %>% dplyr::filter(!is.na(cpoe)) %>% dplyr::group_by(passer_player_name) %>% @@ -864,15 +864,15 @@ Build database nflfastR::update_db() -#> ── Update nflfastR Play-by-Play Database ──────── nflfastR version 4.6.1.9011 ── -#> ℹ 17:37:20 | Can't find the data table "nflfastR_pbp" +#> ── Update nflfastR Play-by-Play Database ──────── nflfastR version 4.6.1.9012 ── +#> ℹ 18:44:02 | Can't find the data table "nflfastR_pbp" #> in your database. Will load the play by play data from #> scratch. -#> • 17:37:20 | Starting download of 25 seasons between 1999 and 2023... -#> • 17:38:45 | Checking for missing completed games... -#> ℹ 17:38:47 | You have 6703 games and are missing 0. -#> ✔ 17:38:47 | Database update completed -#> ℹ 17:38:47 | Path to your db: ./pbp_db +#> • 18:44:02 | Starting download of 25 seasons between 1999 and 2023... +#> • 18:45:21 | Checking for missing completed games... +#> ℹ 18:45:22 | You have 6703 games and are missing 0. +#> ✔ 18:45:23 | Database update completed +#> ℹ 18:45:23 | Path to your db: ./pbp_db #> ── DONE ──────────────────────────────────────────────────────────────────────── This created a database in the current directory called pbp_db. @@ -883,25 +883,25 @@ Build database nflfastR::update_db() -#> ── Update nflfastR Play-by-Play Database ──────── nflfastR version 4.6.1.9011 ── -#> • 17:38:47 | Checking for missing completed games... -#> ℹ 17:38:48 | You have 6703 games and are missing 0. -#> ✔ 17:38:49 | Database update completed -#> ℹ 17:38:49 | Path to your db: /home/runner/work/nflfastR/nflfastR/vignettes/pbp_db +#> ── Update nflfastR Play-by-Play Database ──────── nflfastR version 4.6.1.9012 ── +#> • 18:45:23 | Checking for missing completed games... +#> ℹ 18:45:23 | You have 6703 games and are missing 0. +#> ✔ 18:45:24 | Database update completed +#> ℹ 18:45:24 | Path to your db: /home/runner/work/nflfastR/nflfastR/vignettes/pbp_db #> ── DONE ──────────────────────────────────────────────────────────────────────── If it’s partway through a season and you want to re-build a season to allow for data corrections from the NFL to propagate into your database, you can specify one season to be rebuilt: nflfastR::update_db(force_rebuild = 2020) -#> ── Update nflfastR Play-by-Play Database ──────── nflfastR version 4.6.1.9011 ── -#> • 17:38:49 | Purging season 2020 from the data table "nflfastR_pbp" in your +#> ── Update nflfastR Play-by-Play Database ──────── nflfastR version 4.6.1.9012 ── +#> • 18:45:24 | Purging season 2020 from the data table "nflfastR_pbp" in your #> connected database... -#> • 17:38:50 | Starting download of the 1 season 2020 -#> • 17:38:53 | Checking for missing completed games... -#> ℹ 17:38:54 | You have 6703 games and are missing 0. -#> ✔ 17:38:54 | Database update completed -#> ℹ 17:38:54 | Path to your db: /home/runner/work/nflfastR/nflfastR/vignettes/pbp_db +#> • 18:45:25 | Starting download of the 1 season 2020 +#> • 18:45:28 | Checking for missing completed games... +#> ℹ 18:45:28 | You have 6703 games and are missing 0. +#> ✔ 18:45:29 | Database update completed +#> ℹ 18:45:29 | Path to your db: /home/runner/work/nflfastR/nflfastR/vignettes/pbp_db #> ── DONE ──────────────────────────────────────────────────────────────────────── diff --git a/authors.html b/authors.html index 2b8f34b9..5fa7b6ce 100644 --- a/authors.html +++ b/authors.html @@ -7,7 +7,7 @@ nflfastR - 4.6.1.9011 + 4.6.1.9012 @@ -100,14 +100,14 @@ Citation Carl S, Baldwin B (2024). nflfastR: Functions to Efficiently Access NFL Play by Play Data. -R package version 4.6.1.9011, +R package version 4.6.1.9012, https://github.com/nflverse/nflfastR, https://www.nflfastr.com/. @Manual{, title = {nflfastR: Functions to Efficiently Access NFL Play by Play Data}, author = {Sebastian Carl and Ben Baldwin}, year = {2024}, - note = {R package version 4.6.1.9011, + note = {R package version 4.6.1.9012, https://github.com/nflverse/nflfastR}, url = {https://www.nflfastr.com/}, } diff --git a/index.html b/index.html index cc4b4680..2c248dad 100644 --- a/index.html +++ b/index.html @@ -36,7 +36,7 @@ nflfastR - 4.6.1.9011 + 4.6.1.9012 diff --git a/news/index.html b/news/index.html index 40c2e093..35bd93e2 100644 --- a/news/index.html +++ b/news/index.html @@ -7,7 +7,7 @@ nflfastR - 4.6.1.9011 + 4.6.1.9012 @@ -73,7 +73,8 @@ #463) Fixed an issue affecting scores of 2022 games involving a return touchdown (#466) Added identification of scrambles from 1999 through 2004 with thank to Aaron Schatz (#468) -nflfastR tried to fix bugs in the underlying pbp data of JAX home games prior to the 2016 season. An update of the raw pbp data resolved those bugs so nflfastR needs to remove the hard coded adjustments. This means that nflfastR <= v4.6.1 will return incorrect pbp data for all Jacksonville home games prior to the 2016 season! +nflfastR tried to fix bugs in the underlying pbp data of JAX home games prior to the 2016 season. An update of the raw pbp data resolved those bugs so nflfastR needs to remove the hard coded adjustments. This means that nflfastR <= v4.6.1 will return incorrect pbp data for all Jacksonville home games prior to the 2016 season! (#478) +Fixed a problem where clean_pbp() returned pass = 1 in actual rush plays in very rare cases. (#479) nflfastR 4.6.1CRAN release: 2024-01-09 diff --git a/pkgdown.yml b/pkgdown.yml index 21ee61a4..41403ac3 100644 --- a/pkgdown.yml +++ b/pkgdown.yml @@ -1,11 +1,11 @@ pandoc: 3.1.11 pkgdown: 2.1.0.9000 -pkgdown_sha: 502c3e2ce0df6c7236d87098637073eaba61c5f4 +pkgdown_sha: 1d40a80e6b3564a6d7da0ce467b0a4570aa5665e articles: beginners_guide: beginners_guide.html field_descriptions: field_descriptions.html nflfastR: nflfastR.html -last_built: 2024-08-02T17:32Z +last_built: 2024-08-12T18:39Z urls: reference: https://www.nflfastr.com/reference article: https://www.nflfastr.com/articles diff --git a/reference/add_qb_epa.html b/reference/add_qb_epa.html index 0abe926b..2135bb61 100644 --- a/reference/add_qb_epa.html +++ b/reference/add_qb_epa.html @@ -7,7 +7,7 @@ nflfastR - 4.6.1.9011 + 4.6.1.9012 diff --git a/reference/add_xpass.html b/reference/add_xpass.html index 9b638485..890a6915 100644 --- a/reference/add_xpass.html +++ b/reference/add_xpass.html @@ -15,7 +15,7 @@ nflfastR - 4.6.1.9011 + 4.6.1.9012 diff --git a/reference/add_xyac.html b/reference/add_xyac.html index e27d2e28..937b1ad3 100644 --- a/reference/add_xyac.html +++ b/reference/add_xyac.html @@ -7,7 +7,7 @@ nflfastR - 4.6.1.9011 + 4.6.1.9012 diff --git a/reference/build_nflfastR_pbp.html b/reference/build_nflfastR_pbp.html index 95dd9da8..c7cf9b93 100644 --- a/reference/build_nflfastR_pbp.html +++ b/reference/build_nflfastR_pbp.html @@ -31,7 +31,7 @@ nflfastR - 4.6.1.9011 + 4.6.1.9012 @@ -155,44 +155,44 @@ Examplestry({# to avoid CRAN test problems build_nflfastR_pbp(c("2018_21_NE_LA", "2019_21_SF_KC")) }) -#> ── Build nflfastR Play-by-Play Data ───────────── nflfastR version 4.6.1.9011 ── -#> • 17:32:50 | Start download of 2 games... +#> ── Build nflfastR Play-by-Play Data ───────────── nflfastR version 4.6.1.9012 ── +#> • 18:39:49 | Start download of 2 games... #> ℹ It is recommended to use parallel processing when trying to load multiple games.Please consider running `future::plan("multisession")`! Will go on sequentially... -#> ✔ 17:32:52 | Download finished. Adding variables... -#> ✔ 17:32:53 | added game variables -#> ✔ 17:32:53 | added nflscrapR variables -#> ✔ 17:32:54 | added ep variables -#> ✔ 17:32:54 | added air_yac_ep variables -#> ✔ 17:32:54 | added wp variables -#> ✔ 17:32:54 | added air_yac_wp variables -#> ✔ 17:32:54 | added cp and cpoe -#> ✔ 17:32:54 | added fixed drive variables -#> ✔ 17:32:54 | added series variables -#> • 17:32:54 | Cleaning up play-by-play... -#> ✔ 17:32:54 | Cleaning completed -#> ✔ 17:32:55 | added qb_epa -#> • 17:32:55 | Computing xyac... -#> ✔ 17:32:57 | added xyac variables -#> • 17:32:57 | Computing xpass... -#> ✔ 17:32:57 | added xpass and pass_oe -#> • 17:32:57 | Decode player ids... -#> ✔ 17:32:58 | Decoding of player ids completed +#> ✔ 18:39:52 | Download finished. Adding variables... +#> ✔ 18:39:52 | added game variables +#> ✔ 18:39:52 | added nflscrapR variables +#> ✔ 18:39:53 | added ep variables +#> ✔ 18:39:53 | added air_yac_ep variables +#> ✔ 18:39:53 | added wp variables +#> ✔ 18:39:53 | added air_yac_wp variables +#> ✔ 18:39:53 | added cp and cpoe +#> ✔ 18:39:53 | added fixed drive variables +#> ✔ 18:39:54 | added series variables +#> • 18:39:54 | Cleaning up play-by-play... +#> ✔ 18:39:54 | Cleaning completed +#> ✔ 18:39:54 | added qb_epa +#> • 18:39:54 | Computing xyac... +#> ✔ 18:39:56 | added xyac variables +#> • 18:39:56 | Computing xpass... +#> ✔ 18:39:56 | added xpass and pass_oe +#> • 18:39:56 | Decode player ids... +#> ✔ 18:39:57 | Decoding of player ids completed #> ── DONE ──────────────────────────────────────────────────────────────────────── #> ── nflverse play by play ─────────────────────────────────────────────────────── -#> ℹ Data updated: 2024-08-02 17:32:58 UTC +#> ℹ Data updated: 2024-08-12 18:39:57 UTC #> # A tibble: 349 × 372 #> play_id game_id old_game_id home_team away_team season_type week posteam #> <dbl> <chr> <chr> <chr> <chr> <chr> <int> <chr> #> 1 1 2018_21_NE… 2019020300 LA NE POST 21 NA -#> 2 38 2018_21_NE… 2019020300 LA NE POST 21 NE +#> 2 38 2018_21_NE… 2019020300 LA NE POST 21 LAR #> 3 67 2018_21_NE… 2019020300 LA NE POST 21 NE #> 4 89 2018_21_NE… 2019020300 LA NE POST 21 NE #> 5 111 2018_21_NE… 2019020300 LA NE POST 21 NE #> 6 133 2018_21_NE… 2019020300 LA NE POST 21 NE #> 7 155 2018_21_NE… 2019020300 LA NE POST 21 NE -#> 8 182 2018_21_NE… 2019020300 LA NE POST 21 LA -#> 9 204 2018_21_NE… 2019020300 LA NE POST 21 LA -#> 10 226 2018_21_NE… 2019020300 LA NE POST 21 LA +#> 8 182 2018_21_NE… 2019020300 LA NE POST 21 LAR +#> 9 204 2018_21_NE… 2019020300 LA NE POST 21 LAR +#> 10 226 2018_21_NE… 2019020300 LA NE POST 21 LAR #> # ℹ 339 more rows #> # ℹ 364 more variables: posteam_type <chr>, defteam <chr>, side_of_field <chr>, #> # yardline_100 <dbl>, game_date <chr>, quarter_seconds_remaining <dbl>, @@ -209,31 +209,31 @@ Examples slice_tail(n = 3) %>% build_nflfastR_pbp() }) -#> ── Build nflfastR Play-by-Play Data ───────────── nflfastR version 4.6.1.9011 ── -#> • 17:32:58 | Start download of 3 games... +#> ── Build nflfastR Play-by-Play Data ───────────── nflfastR version 4.6.1.9012 ── +#> • 18:39:57 | Start download of 3 games... #> ℹ It is recommended to use parallel processing when trying to load multiple games.Please consider running `future::plan("multisession")`! Will go on sequentially... -#> ✔ 17:33:01 | Download finished. Adding variables... -#> ✔ 17:33:01 | added game variables -#> ✔ 17:33:01 | added nflscrapR variables -#> ✔ 17:33:01 | added ep variables -#> ✔ 17:33:01 | added air_yac_ep variables -#> ✔ 17:33:02 | added wp variables -#> ✔ 17:33:02 | added air_yac_wp variables -#> ✔ 17:33:02 | added cp and cpoe -#> ✔ 17:33:02 | added fixed drive variables -#> ✔ 17:33:02 | added series variables -#> • 17:33:02 | Cleaning up play-by-play... -#> ✔ 17:33:02 | Cleaning completed -#> ✔ 17:33:02 | added qb_epa -#> • 17:33:02 | Computing xyac... -#> ✔ 17:33:03 | added xyac variables -#> • 17:33:03 | Computing xpass... -#> ✔ 17:33:03 | added xpass and pass_oe -#> • 17:33:03 | Decode player ids... -#> ✔ 17:33:03 | Decoding of player ids completed +#> ✔ 18:40:00 | Download finished. Adding variables... +#> ✔ 18:40:00 | added game variables +#> ✔ 18:40:00 | added nflscrapR variables +#> ✔ 18:40:00 | added ep variables +#> ✔ 18:40:00 | added air_yac_ep variables +#> ✔ 18:40:00 | added wp variables +#> ✔ 18:40:01 | added air_yac_wp variables +#> ✔ 18:40:01 | added cp and cpoe +#> ✔ 18:40:01 | added fixed drive variables +#> ✔ 18:40:01 | added series variables +#> • 18:40:01 | Cleaning up play-by-play... +#> ✔ 18:40:01 | Cleaning completed +#> ✔ 18:40:01 | added qb_epa +#> • 18:40:01 | Computing xyac... +#> ✔ 18:40:02 | added xyac variables +#> • 18:40:02 | Computing xpass... +#> ✔ 18:40:02 | added xpass and pass_oe +#> • 18:40:02 | Decode player ids... +#> ✔ 18:40:02 | Decoding of player ids completed #> ── DONE ──────────────────────────────────────────────────────────────────────── #> ── nflverse play by play ─────────────────────────────────────────────────────── -#> ℹ Data updated: 2024-08-02 17:33:03 UTC +#> ℹ Data updated: 2024-08-12 18:40:02 UTC #> # A tibble: 539 × 372 #> play_id game_id old_game_id home_team away_team season_type week posteam #> <dbl> <chr> <chr> <chr> <chr> <chr> <int> <chr> diff --git a/reference/calculate_expected_points.html b/reference/calculate_expected_points.html index 80b007a2..759f3626 100644 --- a/reference/calculate_expected_points.html +++ b/reference/calculate_expected_points.html @@ -15,7 +15,7 @@ nflfastR - 4.6.1.9011 + 4.6.1.9012 diff --git a/reference/calculate_player_stats.html b/reference/calculate_player_stats.html index 9fde4f08..b640ded1 100644 --- a/reference/calculate_player_stats.html +++ b/reference/calculate_player_stats.html @@ -9,7 +9,7 @@ nflfastR - 4.6.1.9011 + 4.6.1.9012 diff --git a/reference/calculate_player_stats_def.html b/reference/calculate_player_stats_def.html index cb089c88..5ca7c22d 100644 --- a/reference/calculate_player_stats_def.html +++ b/reference/calculate_player_stats_def.html @@ -9,7 +9,7 @@ nflfastR - 4.6.1.9011 + 4.6.1.9012 diff --git a/reference/calculate_player_stats_kicking.html b/reference/calculate_player_stats_kicking.html index 4dcaf9b2..26f8a24c 100644 --- a/reference/calculate_player_stats_kicking.html +++ b/reference/calculate_player_stats_kicking.html @@ -7,7 +7,7 @@ nflfastR - 4.6.1.9011 + 4.6.1.9012 diff --git a/reference/calculate_series_conversion_rates.html b/reference/calculate_series_conversion_rates.html index 683dd95c..1ab34f3e 100644 --- a/reference/calculate_series_conversion_rates.html +++ b/reference/calculate_series_conversion_rates.html @@ -19,7 +19,7 @@ nflfastR - 4.6.1.9011 + 4.6.1.9012 diff --git a/reference/calculate_standings.html b/reference/calculate_standings.html index a9582eee..2901a3a9 100644 --- a/reference/calculate_standings.html +++ b/reference/calculate_standings.html @@ -11,7 +11,7 @@ nflfastR - 4.6.1.9011 + 4.6.1.9012 diff --git a/reference/calculate_win_probability.html b/reference/calculate_win_probability.html index 291bfaee..463d1599 100644 --- a/reference/calculate_win_probability.html +++ b/reference/calculate_win_probability.html @@ -19,7 +19,7 @@ nflfastR - 4.6.1.9011 + 4.6.1.9012 diff --git a/reference/clean_pbp.html b/reference/clean_pbp.html index aa026b64..98a56265 100644 --- a/reference/clean_pbp.html +++ b/reference/clean_pbp.html @@ -7,7 +7,7 @@ nflfastR - 4.6.1.9011 + 4.6.1.9012 diff --git a/reference/decode_player_ids.html b/reference/decode_player_ids.html index 188a38e7..3d1672f8 100644 --- a/reference/decode_player_ids.html +++ b/reference/decode_player_ids.html @@ -25,7 +25,7 @@ nflfastR - 4.6.1.9011 + 4.6.1.9012 @@ -134,8 +134,8 @@ Examples "32013030-2d30-3032-3739-3434d4d3846d" ) )) -#> • 17:33:48 | Decode player ids... -#> ✔ 17:33:48 | Decoding of player ids completed +#> • 18:40:46 | Decode player ids... +#> ✔ 18:40:46 | Decoding of player ids completed #> name id #> 1 P.Mahomes 00-0033873 #> 2 B.Baldwin <NA> diff --git a/reference/fast_scraper.html b/reference/fast_scraper.html index 254a9e09..450f178b 100644 --- a/reference/fast_scraper.html +++ b/reference/fast_scraper.html @@ -13,7 +13,7 @@ nflfastR - 4.6.1.9011 + 4.6.1.9012 @@ -1162,24 +1162,24 @@ Examplesfast_scraper(c("2019_01_GB_CHI", "2013_21_SEA_DEN")) }) #> ℹ It is recommended to use parallel processing when trying to load multiple games.Please consider running `future::plan("multisession")`! Will go on sequentially... -#> ✔ 17:33:50 | Download finished. Adding variables... -#> ✔ 17:33:50 | added game variables -#> ✔ 17:33:50 | added nflscrapR variables -#> ✔ 17:33:50 | added ep variables -#> ✔ 17:33:50 | added air_yac_ep variables -#> ✔ 17:33:51 | added wp variables -#> ✔ 17:33:51 | added air_yac_wp variables -#> ✔ 17:33:51 | added cp and cpoe -#> ✔ 17:33:51 | added fixed drive variables -#> ✔ 17:33:51 | added series variables -#> ✔ 17:33:51 | Procedure completed. +#> ✔ 18:40:48 | Download finished. Adding variables... +#> ✔ 18:40:49 | added game variables +#> ✔ 18:40:49 | added nflscrapR variables +#> ✔ 18:40:49 | added ep variables +#> ✔ 18:40:49 | added air_yac_ep variables +#> ✔ 18:40:49 | added wp variables +#> ✔ 18:40:49 | added air_yac_wp variables +#> ✔ 18:40:49 | added cp and cpoe +#> ✔ 18:40:49 | added fixed drive variables +#> ✔ 18:40:50 | added series variables +#> ✔ 18:40:50 | Procedure completed. #> ── nflverse play by play ─────────────────────────────────────────────────────── -#> ℹ Data updated: 2024-08-02 17:33:51 UTC +#> ℹ Data updated: 2024-08-12 18:40:50 UTC #> # A tibble: 337 × 339 #> play_id game_id old_game_id home_team away_team season_type week posteam #> <dbl> <chr> <chr> <chr> <chr> <chr> <int> <chr> #> 1 1 2013_21_SE… 2014020200 DEN SEA POST 21 NA -#> 2 37 2013_21_SE… 2014020200 DEN SEA POST 21 NA +#> 2 37 2013_21_SE… 2014020200 DEN SEA POST 21 DEN #> 3 61 2013_21_SE… 2014020200 DEN SEA POST 21 DEN #> 4 83 2013_21_SE… 2014020200 DEN SEA POST 21 DEN #> 5 119 2013_21_SE… 2014020200 DEN SEA POST 21 SEA @@ -1206,19 +1206,19 @@ Examples fast_scraper() }) #> ℹ It is recommended to use parallel processing when trying to load multiple games.Please consider running `future::plan("multisession")`! Will go on sequentially... -#> ✔ 17:33:53 | Download finished. Adding variables... -#> ✔ 17:33:53 | added game variables -#> ✔ 17:33:54 | added nflscrapR variables -#> ✔ 17:33:54 | added ep variables -#> ✔ 17:33:54 | added air_yac_ep variables -#> ✔ 17:33:54 | added wp variables -#> ✔ 17:33:54 | added air_yac_wp variables -#> ✔ 17:33:54 | added cp and cpoe -#> ✔ 17:33:54 | added fixed drive variables -#> ✔ 17:33:55 | added series variables -#> ✔ 17:33:55 | Procedure completed. +#> ✔ 18:40:52 | Download finished. Adding variables... +#> ✔ 18:40:52 | added game variables +#> ✔ 18:40:53 | added nflscrapR variables +#> ✔ 18:40:53 | added ep variables +#> ✔ 18:40:53 | added air_yac_ep variables +#> ✔ 18:40:53 | added wp variables +#> ✔ 18:40:53 | added air_yac_wp variables +#> ✔ 18:40:53 | added cp and cpoe +#> ✔ 18:40:53 | added fixed drive variables +#> ✔ 18:40:53 | added series variables +#> ✔ 18:40:53 | Procedure completed. #> ── nflverse play by play ─────────────────────────────────────────────────────── -#> ℹ Data updated: 2024-08-02 17:33:55 UTC +#> ℹ Data updated: 2024-08-12 18:40:53 UTC #> # A tibble: 539 × 339 #> play_id game_id old_game_id home_team away_team season_type week posteam #> <dbl> <chr> <chr> <chr> <chr> <chr> <int> <chr> diff --git a/reference/fast_scraper_roster.html b/reference/fast_scraper_roster.html index f2153786..619908c5 100644 --- a/reference/fast_scraper_roster.html +++ b/reference/fast_scraper_roster.html @@ -7,7 +7,7 @@ nflfastR - 4.6.1.9011 + 4.6.1.9012 diff --git a/reference/fast_scraper_schedules.html b/reference/fast_scraper_schedules.html index 9f772b51..abfe0c35 100644 --- a/reference/fast_scraper_schedules.html +++ b/reference/fast_scraper_schedules.html @@ -7,7 +7,7 @@ nflfastR - 4.6.1.9011 + 4.6.1.9012 @@ -102,7 +102,7 @@ Examplesfast_scraper_schedules(2015:2018) }) #> ── nflverse games and schedules ──────────────────────────────────────────────── -#> ℹ Data updated: 2024-08-02 17:33:57 UTC +#> ℹ Data updated: 2024-08-12 18:40:55 UTC #> # A tibble: 1,068 × 46 #> game_id season game_type week gameday weekday gametime away_team away_score #> <chr> <int> <chr> <int> <chr> <chr> <chr> <chr> <int> diff --git a/reference/field_descriptions.html b/reference/field_descriptions.html index 4187467e..963fd8a1 100644 --- a/reference/field_descriptions.html +++ b/reference/field_descriptions.html @@ -7,7 +7,7 @@ nflfastR - 4.6.1.9011 + 4.6.1.9012 diff --git a/reference/index.html b/reference/index.html index 1fc1bb03..a0580e89 100644 --- a/reference/index.html +++ b/reference/index.html @@ -7,7 +7,7 @@ nflfastR - 4.6.1.9011 + 4.6.1.9012 diff --git a/reference/load_pbp.html b/reference/load_pbp.html index 561b6017..eb656b23 100644 --- a/reference/load_pbp.html +++ b/reference/load_pbp.html @@ -7,7 +7,7 @@ nflfastR - 4.6.1.9011 + 4.6.1.9012 diff --git a/reference/load_player_stats.html b/reference/load_player_stats.html index 6f757b22..21600794 100644 --- a/reference/load_player_stats.html +++ b/reference/load_player_stats.html @@ -7,7 +7,7 @@ nflfastR - 4.6.1.9011 + 4.6.1.9012 diff --git a/reference/missing_raw_pbp.html b/reference/missing_raw_pbp.html index 8840cc55..05d1ea61 100644 --- a/reference/missing_raw_pbp.html +++ b/reference/missing_raw_pbp.html @@ -11,7 +11,7 @@ nflfastR - 4.6.1.9011 + 4.6.1.9012 diff --git a/reference/nflfastR-package.html b/reference/nflfastR-package.html index 27cf8783..df95c5d6 100644 --- a/reference/nflfastR-package.html +++ b/reference/nflfastR-package.html @@ -9,7 +9,7 @@ nflfastR - 4.6.1.9011 + 4.6.1.9012 diff --git a/reference/report.html b/reference/report.html index 23515dbf..56f09e23 100644 --- a/reference/report.html +++ b/reference/report.html @@ -13,7 +13,7 @@ nflfastR - 4.6.1.9011 + 4.6.1.9012 @@ -118,7 +118,7 @@ Examples#> • R version 4.4.1 (2024-06-14) • Running under: Ubuntu 22.04.4 LTS #> ── Package Status ────────────────────────────────────────────────────────────── #> package installed cran dev behind -#> 1 nflfastR 4.6.1.9011 4.6.1 4.6.1.9010 +#> 1 nflfastR 4.6.1.9012 4.6.1 4.6.1.9011 #> 2 nflplotR 1.3.1 1.3.1 1.3.1 #> 3 nflreadr 1.4.1.00 1.4.1 1.4.1.00 #> 4 nflseedR 1.2.0 1.2.0 1.2.0.9001 dev diff --git a/reference/save_raw_pbp.html b/reference/save_raw_pbp.html index b5fd723b..c058862f 100644 --- a/reference/save_raw_pbp.html +++ b/reference/save_raw_pbp.html @@ -13,7 +13,7 @@ nflfastR - 4.6.1.9011 + 4.6.1.9012 diff --git a/reference/stat_ids.html b/reference/stat_ids.html index 4b8db4e2..a4538e11 100644 --- a/reference/stat_ids.html +++ b/reference/stat_ids.html @@ -7,7 +7,7 @@ nflfastR - 4.6.1.9011 + 4.6.1.9012 diff --git a/reference/teams_colors_logos.html b/reference/teams_colors_logos.html index 784bdf1e..7b2e4069 100644 --- a/reference/teams_colors_logos.html +++ b/reference/teams_colors_logos.html @@ -7,7 +7,7 @@ nflfastR - 4.6.1.9011 + 4.6.1.9012 diff --git a/reference/update_db.html b/reference/update_db.html index 3d07df90..13b5bf0c 100644 --- a/reference/update_db.html +++ b/reference/update_db.html @@ -9,7 +9,7 @@ nflfastR - 4.6.1.9011 + 4.6.1.9012 diff --git a/search.json b/search.json index 7aa3b256..e468d654 100644 --- a/search.json +++ b/search.json @@ -1 +1 @@ -[{"path":"https://www.nflfastr.com/LICENSE.html","id":null,"dir":"","previous_headings":"","what":"MIT License","title":"MIT License","text":"Copyright (c) 2020 Sebastian Carl; Ben Baldwin Permission hereby granted, free charge, person obtaining copy software associated documentation files (“Software”), deal Software without restriction, including without limitation rights use, copy, modify, merge, publish, distribute, sublicense, /sell copies Software, permit persons Software furnished , subject following conditions: copyright notice permission notice shall included copies substantial portions Software. SOFTWARE PROVIDED “”, WITHOUT WARRANTY KIND, EXPRESS IMPLIED, INCLUDING LIMITED WARRANTIES MERCHANTABILITY, FITNESS PARTICULAR PURPOSE NONINFRINGEMENT. EVENT SHALL AUTHORS COPYRIGHT HOLDERS LIABLE CLAIM, DAMAGES LIABILITY, WHETHER ACTION CONTRACT, TORT OTHERWISE, ARISING , CONNECTION SOFTWARE USE DEALINGS SOFTWARE.","code":""},{"path":"https://www.nflfastr.com/articles/beginners_guide.html","id":"introduction","dir":"Articles","previous_headings":"","what":"Introduction","title":"A beginner's guide to nflfastR","text":"following guide assume R installed. also highly recommend working RStudio. need help getting installed unfamiliar RStudio laid , please see section Lee Sharpe’s guide. quick word ’re new programming: happening R. Obviously, need install R computer . Make sure save ’re script (RStudio, File –> New File –> R script) can save work run multiple lines code . run code script, highlight want, press control + enter press Run button top editor (see Lee’s guide). don’t highlight anything press control + enter, currently selected line run. go R journey, might get stuck google bunch things, ’s totally okay normal. ’s got started!","code":""},{"path":"https://www.nflfastr.com/articles/beginners_guide.html","id":"setup","dir":"Articles","previous_headings":"","what":"Setup","title":"A beginner's guide to nflfastR","text":"First, need install magic packages. need run step given computer. can just type RStudio console (look Console pane RStudio) directly since ’re never going .","code":""},{"path":"https://www.nflfastr.com/articles/beginners_guide.html","id":"install-packages","dir":"Articles","previous_headings":"Setup","what":"Install packages","title":"A beginner's guide to nflfastR","text":"","code":"install.packages(\"tidyverse\", type = \"binary\") install.packages(\"ggrepel\", type = \"binary\") install.packages(\"nflreadr\", type = \"binary\") install.packages(\"nflplotR\", type = \"binary\")"},{"path":"https://www.nflfastr.com/articles/beginners_guide.html","id":"load-packages","dir":"Articles","previous_headings":"Setup","what":"Load packages","title":"A beginner's guide to nflfastR","text":"Okay, now ’s stuff ’re going want start putting R script. following loads tidyverse, contains lot helper functions working data ggrepel making figures, along nflreadr (allows one quickly download nflfastR data, along lot data). Finally, nflplotR makes plotting easier. one optional makes R prefer display numbers scientific notation, find annoying:","code":"library(tidyverse) library(ggrepel) library(nflreadr) library(nflplotR) options(scipen = 9999)"},{"path":"https://www.nflfastr.com/articles/beginners_guide.html","id":"load-data","dir":"Articles","previous_headings":"Setup","what":"Load data","title":"A beginner's guide to nflfastR","text":"load full play play 2019 season (including playoffs). ’ll get get seasons later. Note downloading pre-cleaned data nflfastR data repository using load_pbp() function included nflreadr, much faster building pbp scratch.","code":"data <- load_pbp(2019)"},{"path":[]},{"path":"https://www.nflfastr.com/articles/beginners_guide.html","id":"dimensions","dir":"Articles","previous_headings":"Basics: how to look at your data","what":"Dimensions","title":"A beginner's guide to nflfastR","text":"moving forward, ways get sense ’s dataframe. can check dimensions data, tells us 47260 rows (.e., plays) data 372 columns (variables): str displays structure dataframe: , ’ve added [1:10], selects first 10 columns, otherwise list extremely long (remember 372 columns!). Normally, just type str(data). can similarly take glimpse data: ’m showing first 10 columns. usual command glimpse(data).","code":"dim(data) #> [1] 47260 372 str(data[1:10]) #> nflvrs_d [47,260 × 10] (S3: nflverse_data/tbl_df/tbl/data.table/data.frame) #> $ play_id : num [1:47260] 1 36 51 79 100 121 148 185 214 239 ... #> $ game_id : chr [1:47260] \"2019_01_ATL_MIN\" \"2019_01_ATL_MIN\" \"2019_01_ATL_MIN\" \"2019_01_ATL_MIN\" ... #> $ old_game_id : chr [1:47260] \"2019090804\" \"2019090804\" \"2019090804\" \"2019090804\" ... #> $ home_team : chr [1:47260] \"MIN\" \"MIN\" \"MIN\" \"MIN\" ... #> $ away_team : chr [1:47260] \"ATL\" \"ATL\" \"ATL\" \"ATL\" ... #> $ season_type : chr [1:47260] \"REG\" \"REG\" \"REG\" \"REG\" ... #> $ week : int [1:47260] 1 1 1 1 1 1 1 1 1 1 ... #> $ posteam : chr [1:47260] NA \"ATL\" \"ATL\" \"ATL\" ... #> $ posteam_type: chr [1:47260] NA \"away\" \"away\" \"away\" ... #> $ defteam : chr [1:47260] NA \"MIN\" \"MIN\" \"MIN\" ... #> - attr(*, \"nflverse_timestamp\")= POSIXct[1:1], format: \"2024-03-07 14:39:28\" #> - attr(*, \"nflverse_type\")= chr \"play by play data\" #> - attr(*, \"nflfastR_version\")=Classes 'package_version', 'numeric_version' hidden list of 1 #> ..$ : int [1:4] 4 6 1 9007 glimpse(data[1:10]) #> Rows: 47,260 #> Columns: 10 #> $ play_id 1, 36, 51, 79, 100, 121, 148, 185, 214, 239, 255, 277, 29… #> $ game_id \"2019_01_ATL_MIN\", \"2019_01_ATL_MIN\", \"2019_01_ATL_MIN\", … #> $ old_game_id \"2019090804\", \"2019090804\", \"2019090804\", \"2019090804\", \"… #> $ home_team \"MIN\", \"MIN\", \"MIN\", \"MIN\", \"MIN\", \"MIN\", \"MIN\", \"MIN\", \"… #> $ away_team \"ATL\", \"ATL\", \"ATL\", \"ATL\", \"ATL\", \"ATL\", \"ATL\", \"ATL\", \"… #> $ season_type \"REG\", \"REG\", \"REG\", \"REG\", \"REG\", \"REG\", \"REG\", \"REG\", \"… #> $ week 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, … #> $ posteam NA, \"ATL\", \"ATL\", \"ATL\", \"ATL\", \"ATL\", \"MIN\", \"MIN\", \"MIN… #> $ posteam_type NA, \"away\", \"away\", \"away\", \"away\", \"away\", \"home\", \"home… #> $ defteam NA, \"MIN\", \"MIN\", \"MIN\", \"MIN\", \"MIN\", \"ATL\", \"ATL\", \"ATL…"},{"path":"https://www.nflfastr.com/articles/beginners_guide.html","id":"variable-names","dir":"Articles","previous_headings":"Basics: how to look at your data","what":"Variable names","title":"A beginner's guide to nflfastR","text":"Another useful command get names variables data, get entering names(data) (won’t show , , 372 columns). lot work !","code":""},{"path":"https://www.nflfastr.com/articles/beginners_guide.html","id":"viewer","dir":"Articles","previous_headings":"Basics: how to look at your data","what":"Viewer","title":"A beginner's guide to nflfastR","text":"One way look data View() function. ’re coming Excel background, help feel home way see ’s data. open viewer RStudio new panel. Try ! Since many columns, Viewer won’t show . pick columns view, can select : %>% thing lets pipe together bunch different commands. ’re taking data, “select”ing variables want look , Viewing. , can’t display results , try !","code":"View(data) data %>% select(home_team, away_team, posteam, desc) %>% View()"},{"path":"https://www.nflfastr.com/articles/beginners_guide.html","id":"head-manipulation","dir":"Articles","previous_headings":"Basics: how to look at your data","what":"Head + manipulation","title":"A beginner's guide to nflfastR","text":"start, let’s just look first rows (“head”) data. couple things. “desc” important variable lists description happened play, head says show first rows (“head” data). Since already sorted game, first 6 rows week 1 game, ATL @ MIN. make code easier read, people often put part pipe new line, useful working complicated functions. run: return exact output one written multiple lines, code isn’t easy read. ’ve covered select, next important function learn filter, lets filter data want. following returns plays run plays pass plays; .e., punts, kickoffs, field goals, dead ball penalties (e.g. false starts) don’t know attempted play . Compared first time , opening line start game, kickoff, punt now gone. Note ’re checking whether variable equal something, need use double equals sign == like . ’s probably technical reason [shrug emoji]. Also, character | used “”, & “”. rush == 1 | pass == 1 means “rush pass”. Note rush, pass, name, passer, rusher, receiver columns nflfastR creations, provided make working data easier. can see , passer filled dropbacks (including sacks scrambles, also pass = 1), name equal passer pass plays rusher rush plays. Think primary player involved play. wanted view special teams plays? , can use filter: Fourth plays? Fourth plays aren’t special teams plays? far, ’ve just taking look initial dataset downloaded, none results preserved. save new dataframe just plays want, need use <- assign new dataframe. Let’s save new dataframe ’s just run plays pass plays non-missing EPA, called pbp_rp. , !.na(epa) means exclude plays missing (na) EPA. ! symbol often used computer folk negate something, .na(epa) means “EPA missing” !.na(epa) means “EPA missing”, used .","code":"data %>% select(posteam, defteam, desc, rush, pass) %>% head() #> ── nflverse play by play data ────────────────────────────────────────────────── #> ℹ Data updated: 2024-03-07 14:39:28 UTC #> # A tibble: 6 × 5 #> posteam defteam desc rush pass #> #> 1 NA NA GAME 0 0 #> 2 ATL MIN 5-D.Bailey kicks 65 yards from MIN 35 to end zone… 0 0 #> 3 ATL MIN (15:00) 2-M.Ryan sacked at ATL 17 for -8 yards (5… 0 1 #> 4 ATL MIN (14:20) 24-D.Freeman right tackle to ATL 21 for 4… 1 0 #> 5 ATL MIN (13:41) (Shotgun) 2-M.Ryan scrambles left end to … 0 1 #> 6 ATL MIN (12:59) 5-M.Bosher punt is BLOCKED by 50-E.Wilson… 0 0 data %>% select(posteam, defteam, desc, rush, pass) %>% head() data %>% filter(rush == 1 | pass == 1) %>% select(posteam, desc, rush, pass, name, passer, rusher, receiver) %>% head() #> ── nflverse play by play data ────────────────────────────────────────────────── #> ℹ Data updated: 2024-03-07 14:39:28 UTC #> # A tibble: 6 × 8 #> posteam desc rush pass name passer rusher receiver #> #> 1 ATL (15:00) 2-M.Ryan sacked at A… 0 1 M.Ry… M.Ryan NA NA #> 2 ATL (14:20) 24-D.Freeman right t… 1 0 D.Fr… NA D.Fre… NA #> 3 ATL (13:41) (Shotgun) 2-M.Ryan s… 0 1 M.Ry… M.Ryan NA NA #> 4 MIN (12:53) 33-D.Cook right end … 1 0 D.Co… NA D.Cook NA #> 5 MIN (12:32) 8-K.Cousins pass sho… 0 1 K.Co… K.Cou… NA D.Cook #> 6 MIN (11:57) 8-K.Cousins pass sho… 0 1 K.Co… K.Cou… NA A.Thiel… data %>% filter(special == 1) %>% select(down, ydstogo, desc) %>% head() #> ── nflverse play by play data ────────────────────────────────────────────────── #> ℹ Data updated: 2024-03-07 14:39:28 UTC #> # A tibble: 6 × 3 #> down ydstogo desc #> #> 1 NA 0 5-D.Bailey kicks 65 yards from MIN 35 to end zone, Touchback. #> 2 4 2 (12:59) 5-M.Bosher punt is BLOCKED by 50-E.Wilson, Center-47-J.… #> 3 NA 0 (Kick formation) 5-D.Bailey extra point is GOOD, Center-58-A.Cu… #> 4 NA 0 5-D.Bailey kicks 67 yards from MIN 35 to ATL -2. 38-K.Barner to… #> 5 NA 0 (Kick formation) 5-D.Bailey extra point is GOOD, Center-58-A.Cu… #> 6 NA 0 5-D.Bailey kicks 65 yards from MIN 35 to end zone, Touchback. data %>% filter(down == 4) %>% select(down, ydstogo, desc) %>% head() #> ── nflverse play by play data ────────────────────────────────────────────────── #> ℹ Data updated: 2024-03-07 14:39:28 UTC #> # A tibble: 6 × 3 #> down ydstogo desc #> #> 1 4 2 (12:59) 5-M.Bosher punt is BLOCKED by 50-E.Wilson, Center-47-J.… #> 2 4 19 (2:38) 5-M.Bosher punts 33 yards to MIN 8, Center-47-J.Harris, … #> 3 4 20 (12:33) 2-B.Colquitt punts 51 yards to ATL 17, Center-58-A.Cutt… #> 4 4 27 (1:49) 5-M.Bosher punts 45 yards to MIN 10, Center-47-J.Harris,… #> 5 4 10 (:49) 2-B.Colquitt punts 57 yards to ATL 33, Center-58-A.Cuttin… #> 6 4 1 (10:56) 2-B.Colquitt punts 42 yards to ATL 10, Center-58-A.Cutt… data %>% filter(down == 4 & special == 0) %>% select(down, ydstogo, desc) %>% head() #> ── nflverse play by play data ────────────────────────────────────────────────── #> ℹ Data updated: 2024-03-07 14:39:28 UTC #> # A tibble: 6 × 3 #> down ydstogo desc #> #> 1 4 5 (9:25) (Shotgun) 2-M.Ryan pass deep left to 18-C.Ridley for 20 … #> 2 4 2 (4:39) (Punt formation) PENALTY on MIN, Delay of Game, 5 yards,… #> 3 4 2 (1:27) (No Huddle, Shotgun) 2-M.Ryan pass short left to 11-J.Jo… #> 4 4 1 (2:59) (Punt formation) Direct snap to 41-A.Levine. 41-A.Levin… #> 5 4 3 (9:30) (Shotgun) 3-R.Griffin pass short left to 89-M.Andrews fo… #> 6 4 1 (3:55) 17-J.Allen FUMBLES (Aborted) at NYJ 37, RECOVERED by NYJ… pbp_rp <- data %>% filter(rush == 1 | pass == 1, !is.na(epa))"},{"path":"https://www.nflfastr.com/articles/beginners_guide.html","id":"some-basic-stuff-part-1","dir":"Articles","previous_headings":"","what":"Some basic stuff: Part 1","title":"A beginner's guide to nflfastR","text":"Okay, big dataset call dropbacks pass plays non-dropbacks rush plays. Now actually want , like, stuff.","code":""},{"path":"https://www.nflfastr.com/articles/beginners_guide.html","id":"group-by-and-summarize","dir":"Articles","previous_headings":"Some basic stuff: Part 1","what":"Group by and Summarize","title":"A beginner's guide to nflfastR","text":"Let’s take look various Cowboys’ running backs fared run plays 2019: ’s lot going . ’ve covered filter already. group_by function extremely useful function , well, groups tell – case rusher. Summarize useful collapsing data summary ’re looking , , grouping player, ’re summarizing mean EPA, success, yardage (bad rushing stat, since ’re ), getting number plays using n(), returns number group. Unsurprisingly, Prescott much effective rusher 2019 running backs, meaningful difference Pollard Elliott efficiency. check PFR team stats page, ’ll notice doesn’t match official stats. nflfastR computes EPA provides player names plays penalties two-point conversions. wanting match official stats, need restrict <= 4 (excluded two-point conversions, listed NA) play_type = run (exclude penalties, play_type = no_play): Now exactly match PFR: Zeke 301 carries 4.5 yards/carry, Pollard 86 carries 5.3 yards/carry. Note still aren’t matching Dak’s stats PFR NFL classifies scrambles rush attempts nflfastR .","code":"pbp_rp %>% filter(posteam == \"DAL\", rush == 1) %>% group_by(rusher) %>% summarize( mean_epa = mean(epa), success_rate = mean(success), ypc = mean(yards_gained), plays = n() ) %>% arrange(-mean_epa) %>% filter(plays > 20) #> # A tibble: 3 × 5 #> rusher mean_epa success_rate ypc plays #> #> 1 D.Prescott 0.288 0.591 6.41 22 #> 2 T.Pollard -0.0265 0.456 5.08 90 #> 3 E.Elliott -0.0412 0.411 4.39 309 pbp_rp %>% filter(posteam == \"DAL\", down <= 4, play_type == 'run') %>% group_by(rusher) %>% summarize( mean_epa = mean(epa), success_rate = mean(success), ypc=mean(yards_gained), plays=n() ) %>% filter(plays > 20) #> # A tibble: 3 × 5 #> rusher mean_epa success_rate ypc plays #> #> 1 D.Prescott 0.288 0.591 6.41 22 #> 2 E.Elliott -0.0185 0.422 4.51 301 #> 3 T.Pollard -0.0210 0.453 5.29 86"},{"path":"https://www.nflfastr.com/articles/beginners_guide.html","id":"manipulating-columns-mutate-if_else-and-case_when","dir":"Articles","previous_headings":"Some basic stuff: Part 1","what":"Manipulating columns: mutate, if_else, and case_when","title":"A beginner's guide to nflfastR","text":"Let’s say want make new column, named home, equal 1 team ball home team. Let’s introduce another extremely useful function, if_else: mutate R’s word creating new column (overwriting existing one); case, ’ve created new column called home. uses if_else, uses following pattern: condition (case, posteam == home_team), value condition true (case, posteam == home_team, 1), value condition false (0). use , example, look average EPA/play home road teams: Note EPA/play similar home teams away teams home already built nflfastR EPA model, result expected. Actually, away EPA/play actually somewhat higher, presumably away teams -performed usual 2019 homefield advantage continues decline generally. if_else nice ’re creating new column based simple condition. need something complicated? case_when good option. ’s works: Note new syntax case_when: condition (first one, air yards less 0), followed ~, followed assignment (first one, “Negative”). , created 4 bins based air yards got average completion probability (cp) based nflfastR model. Unsurprisingly, cp lower longer downfield throw goes.","code":"pbp_rp %>% mutate( home = if_else(posteam == home_team, 1, 0) ) %>% select(posteam, home_team, home) %>% head(10) #> ── nflverse play by play data ────────────────────────────────────────────────── #> ℹ Data updated: 2024-03-07 14:39:28 UTC #> # A tibble: 10 × 3 #> posteam home_team home #> #> 1 ATL MIN 0 #> 2 ATL MIN 0 #> 3 ATL MIN 0 #> 4 MIN MIN 1 #> 5 MIN MIN 1 #> 6 MIN MIN 1 #> 7 ATL MIN 0 #> 8 ATL MIN 0 #> 9 ATL MIN 0 #> 10 MIN MIN 1 pbp_rp %>% mutate( home = if_else(posteam == home_team, 1, 0) ) %>% group_by(home) %>% summarize(epa = mean(epa)) #> # A tibble: 2 × 2 #> home epa #> #> 1 0 0.0215 #> 2 1 -0.0158 pbp_rp %>% filter(!is.na(cp)) %>% mutate( depth = case_when( air_yards < 0 ~ \"Negative\", air_yards >= 0 & air_yards < 10 ~ \"Short\", air_yards >= 10 & air_yards < 20 ~ \"Medium\", air_yards >= 20 ~ \"Deep\" ) ) %>% group_by(depth) %>% summarize(cp = mean(cp)) #> # A tibble: 4 × 2 #> depth cp #> #> 1 Deep 0.367 #> 2 Medium 0.573 #> 3 Negative 0.847 #> 4 Short 0.718"},{"path":"https://www.nflfastr.com/articles/beginners_guide.html","id":"a-basic-figure","dir":"Articles","previous_headings":"Some basic stuff: Part 1","what":"A basic figure","title":"A beginner's guide to nflfastR","text":"Now ’ve gained skills manipulating data, let’s put use making things. teams pass-heavy first half early downs win probability 20 80, excluding final 2 minutes half everyone pass-happy? , ’ve already used filter, group_by, summarize. new function using arrange, sorts data variable(s) given. minus sign front mean_pass means sort descending order. Let’s make first figure: image kind mess – still need title, axis labels, etc – gets point across. ’ll get stuff later. importantly, made something interesting using nflfastR data! “reorder” sorts teams according pass rate, “-” saying descending order. “aes” short “aesthetic”, R’s weird way asking variables go x y axes. Looking figure, Chiefs never playoff success establish run.","code":"schotty <- pbp_rp %>% filter(wp > .20 & wp < .80 & down <= 2 & qtr <= 2 & half_seconds_remaining > 120) %>% group_by(posteam) %>% summarize(mean_pass = mean(pass), plays = n()) %>% arrange(-mean_pass) schotty #> # A tibble: 32 × 3 #> posteam mean_pass plays #> #> 1 KC 0.691 388 #> 2 MIA 0.594 288 #> 3 NO 0.585 325 #> 4 LA 0.584 329 #> 5 CHI 0.561 310 #> 6 CLE 0.555 272 #> 7 CAR 0.554 271 #> 8 TB 0.551 321 #> 9 GB 0.550 291 #> 10 ARI 0.548 325 #> # ℹ 22 more rows ggplot(schotty, aes(x=reorder(posteam,-mean_pass), y=mean_pass)) + geom_text(aes(label=posteam))"},{"path":"https://www.nflfastr.com/articles/beginners_guide.html","id":"loading-multiple-seasons","dir":"Articles","previous_headings":"","what":"Loading multiple seasons","title":"A beginner's guide to nflfastR","text":"data stored data repository, fast load data multiple seasons. loads play--play data 2015 2019 seasons. Let’s make sure got . now, understand : season 48,000 plays. Just fun, let’s look various play types:","code":"pbp <- load_pbp(2015:2019) pbp %>% group_by(season) %>% summarize(n = n()) #> # A tibble: 5 × 2 #> season n #> #> 1 2015 48122 #> 2 2016 47651 #> 3 2017 47242 #> 4 2018 47109 #> 5 2019 47260 pbp %>% group_by(play_type) %>% summarize(n = n()) #> # A tibble: 10 × 2 #> play_type n #> #> 1 extra_point 6240 #> 2 field_goal 5155 #> 3 kickoff 13614 #> 4 no_play 22745 #> 5 pass 99984 #> 6 punt 12083 #> 7 qb_kneel 2090 #> 8 qb_spike 340 #> 9 run 68128 #> 10 NA 7005"},{"path":"https://www.nflfastr.com/articles/beginners_guide.html","id":"figures-with-qb-stats","dir":"Articles","previous_headings":"","what":"Figures with QB stats","title":"A beginner's guide to nflfastR","text":"Let’s stuff quarterbacks: Lots new stuff . First, ’re grouping id name make sure ’re getting unique players; .e., two players name (like Javorius Allen Josh Allen J.Allen), also using id differentiate . qb_epa nflfastR creation equal EPA instances except pass completed fumble lost, case QB gets “credit” play spot fumble lost (making EPA function like passing yards). last part summarize comment gets last team player observed playing . way getting dataset quarterbacks without joining external roster data make sure hit number dropbacks. case, filtering n_dropbacks > 100 makes sure ’re including quarterbacks. ungroup() near end good practice grouping make sure don’t get weird behavior data created line. Let’s make figures. load_teams() function provided nflreadr package, since already loaded package, ’s ready use. Let’s join qbs dataframe created: left_join means keep rows left dataframe (first one provided, qbs), join rows available rows dataframe. also need provide joining variables, team qbs team_abbr load_teams(). type = c('team' = 'team_abbr')? knows, ’s left_join requires instructions match.","code":"qbs <- pbp %>% filter(season_type == \"REG\", !is.na(epa)) %>% group_by(id, name) %>% summarize( epa = mean(qb_epa), cpoe = mean(cpoe, na.rm = T), n_dropbacks = sum(pass), n_plays = n(), team = last(posteam) ) %>% ungroup() %>% filter(n_dropbacks > 100 & n_plays > 1000) #> `summarise()` has grouped output by 'id'. You can override using the `.groups` #> argument. load_teams() #> ── nflverse team graphics ────────────────────────────────────────────────────── #> ℹ Data updated: 2024-08-02 17:34:42 UTC #> # A tibble: 32 × 16 #> team_abbr team_name team_id team_nick team_conf team_division team_color #> #> 1 ARI Arizona Cardi… 3800 Cardinals NFC NFC West #97233F #> 2 ATL Atlanta Falco… 0200 Falcons NFC NFC South #A71930 #> 3 BAL Baltimore Rav… 0325 Ravens AFC AFC North #241773 #> 4 BUF Buffalo Bills 0610 Bills AFC AFC East #00338D #> 5 CAR Carolina Pant… 0750 Panthers NFC NFC South #0085CA #> 6 CHI Chicago Bears 0810 Bears NFC NFC North #0B162A #> 7 CIN Cincinnati Be… 0920 Bengals AFC AFC North #FB4F14 #> 8 CLE Cleveland Bro… 1050 Browns AFC AFC North #FF3C00 #> 9 DAL Dallas Cowboys 1200 Cowboys NFC NFC East #002244 #> 10 DEN Denver Broncos 1400 Broncos AFC AFC West #002244 #> # ℹ 22 more rows #> # ℹ 9 more variables: team_color2 , team_color3 , team_color4 , #> # team_logo_wikipedia , team_logo_espn , team_wordmark , #> # team_conference_logo , team_league_logo , team_logo_squared qbs <- qbs %>% left_join(load_teams(), by = c('team' = 'team_abbr'))"},{"path":"https://www.nflfastr.com/articles/beginners_guide.html","id":"with-team-color-dots","dir":"Articles","previous_headings":"Figures with QB stats","what":"With team color dots","title":"A beginner's guide to nflfastR","text":"Now can make figure! looks complicated, just way getting bunch different stuff plot: lines averages, dots, names, etc. added comments explain going , practice making figures usually just copy paste stuff /google need.","code":"qbs %>% ggplot(aes(x = cpoe, y = epa)) + #horizontal line with mean EPA geom_hline(yintercept = mean(qbs$epa), color = \"red\", linetype = \"dashed\", alpha=0.5) + #vertical line with mean CPOE geom_vline(xintercept = mean(qbs$cpoe), color = \"red\", linetype = \"dashed\", alpha=0.5) + #add points for the QBs with the right colors #cex controls point size and alpha the transparency (alpha = 1 is normal) geom_point(color = qbs$team_color, cex=qbs$n_plays / 350, alpha = .6) + #add names using ggrepel, which tries to make them not overlap geom_text_repel(aes(label=name)) + #add a smooth line fitting cpoe + epa stat_smooth(geom='line', alpha=0.5, se=FALSE, method='lm')+ #titles and caption labs(x = \"Completion % above expected (CPOE)\", y = \"EPA per play (passes, rushes, and penalties)\", title = \"Quarterback Efficiency, 2015 - 2019\", caption = \"Data: @nflfastR\") + #uses the black and white ggplot theme theme_bw() + #center title with hjust = 0.5 theme( plot.title = element_text(size = 14, hjust = 0.5, face = \"bold\") ) + #make ticks look nice #if this doesn't work, `install.packages('scales')` scale_y_continuous(breaks = scales::pretty_breaks(n = 10)) + scale_x_continuous(breaks = scales::pretty_breaks(n = 10))"},{"path":"https://www.nflfastr.com/articles/beginners_guide.html","id":"with-team-logos","dir":"Articles","previous_headings":"Figures with QB stats","what":"With team logos","title":"A beginner's guide to nflfastR","text":"also make plot team logos: changes ’ve made use geom_nfl_logos instead geom_point (figure right size images width part? Trial error). figure look better fewer players shown, point explaining stuff, let’s call good enough.","code":"qbs %>% ggplot(aes(x = cpoe, y = epa)) + #horizontal line with mean EPA geom_hline(yintercept = mean(qbs$epa), color = \"red\", linetype = \"dashed\", alpha=0.5) + #vertical line with mean CPOE geom_vline(xintercept = mean(qbs$cpoe), color = \"red\", linetype = \"dashed\", alpha=0.5) + #add points for the QBs with the logos (this uses nflplotR package) geom_nfl_logos(aes(team_abbr = team), width = qbs$n_plays / 45000, alpha = 0.75) + #add names using ggrepel, which tries to make them not overlap geom_text_repel(aes(label=name)) + #add a smooth line fitting cpoe + epa stat_smooth(geom='line', alpha=0.5, se=FALSE, method='lm')+ #titles and caption labs(x = \"Completion % above expected (CPOE)\", y = \"EPA per play (passes, rushes, and penalties)\", title = \"Quarterback Efficiency, 2015 - 2019\", caption = \"Data: @nflfastR\") + theme_bw() + #center title theme( plot.title = element_text(size = 14, hjust = 0.5, face = \"bold\") ) + #make ticks look nice scale_y_continuous(breaks = scales::pretty_breaks(n = 10)) + scale_x_continuous(breaks = scales::pretty_breaks(n = 10))"},{"path":"https://www.nflfastr.com/articles/beginners_guide.html","id":"team-tiers-plot","dir":"Articles","previous_headings":"Figures with QB stats","what":"Team tiers plot","title":"A beginner's guide to nflfastR","text":"’s helpful, notes chart originally shown , like uses nflplotR team logos. geom_mean_lines() function adds mean lines offensive defensive EPA per play slope lines created using geom_abline() scale_y_reverse() reverses vertical axis = better defense Everything else comprehensible now!","code":"library(nflplotR) # get pbp and filter to regular season rush and pass plays pbp <- nflreadr::load_pbp(2005) %>% dplyr::filter(season_type == \"REG\") %>% dplyr::filter(!is.na(posteam) & (rush == 1 | pass == 1)) # offense epa offense <- pbp %>% dplyr::group_by(team = posteam) %>% dplyr::summarise(off_epa = mean(epa, na.rm = TRUE)) # defense epa defense <- pbp %>% dplyr::group_by(team = defteam) %>% dplyr::summarise(def_epa = mean(epa, na.rm = TRUE)) # make figure offense %>% dplyr::inner_join(defense, by = \"team\") %>% ggplot2::ggplot(aes(x = off_epa, y = def_epa)) + # tier lines ggplot2::geom_abline(slope = -1.5, intercept = (4:-3)/10, alpha = .2) + # nflplotR magic nflplotR::geom_mean_lines(aes(y0 = off_epa, x0 = def_epa)) + nflplotR::geom_nfl_logos(aes(team_abbr = team), width = 0.07, alpha = 0.7) + ggplot2::labs( x = \"Offense EPA/play\", y = \"Defense EPA/play\", caption = \"Data: @nflfastR\", title = \"2005 NFL Offensive and Defensive EPA per Play\" ) + ggplot2::theme_bw() + ggplot2::theme( plot.title = ggplot2::element_text(size = 12, hjust = 0.5, face = \"bold\") ) + ggplot2::scale_y_reverse()"},{"path":"https://www.nflfastr.com/articles/beginners_guide.html","id":"a-few-more-things-on-plotting","dir":"Articles","previous_headings":"Figures with QB stats","what":"A few more things on plotting","title":"A beginner's guide to nflfastR","text":"two ways view plots. One RStudio Viewer, shows RStudio plot something. plots RStudio viewer look ugly pixelated, probably need install Cairo package set default viewer Tools –> Global Options –> General –> Graphics –> Backend: Set Cairo. save .png preferred dimensions resolution. example, ggsave(\"test.png\", width = 16, height = 9, units = \"cm\") save current plot “test.png” units specified (can view ggsave options ). One note: RStudio Viewer can take long time preview ggplots, especially ’re things like adding images. ’re getting frustrated plot taking long time display, can take advantage ggpreview nflplotR. , first save plot object run ggpreview (doesn’t make sense, see examples ).","code":""},{"path":"https://www.nflfastr.com/articles/beginners_guide.html","id":"real-life-example-lets-make-a-win-total-model","dir":"Articles","previous_headings":"","what":"Real life example: let’s make a win total model","title":"A beginner's guide to nflfastR","text":"’m going try go process cleaning joining multiple data sets try get sense approach something like , step--step.","code":""},{"path":"https://www.nflfastr.com/articles/beginners_guide.html","id":"get-team-wins-each-season","dir":"Articles","previous_headings":"Real life example: let’s make a win total model","what":"Get team wins each season","title":"A beginner's guide to nflfastR","text":"’re going cheat little take advantage Lee Sharpe’s famous games file. stuff added nflfastR, ’s easier working file game one row. ’re curious, triple colon way access referred non-exported functions package. Think like secret menu (secret? Sometimes package developers want limit number exported functions overwhelming). start, want create dataframe row team-season observation, listing many games won. multiple ways , ’m going just take home away results bind together. example, ’s home results look like: Note used rename change home_team team. away teams, need flip result since result given perspective home team. Now let’s make columns called win based result. results %>% filter(season == 2019 & team == 'SEA') part end isn’t actually saving data new form, just making sure previous step wanted. good habit get : frequently inspect data make sure looks like think . Now dataframe wanted, can get team wins season easily: , ’re making sure data looks like “” checking 5 seasons wins, making sure looks right. Now team-season win point differential data ready, need go back nflfastR data get EPA/play.","code":"games <- nflreadr::load_schedules() str(games) #> nflvrs_d [6,978 × 46] (S3: nflverse_data/tbl_df/tbl/data.table/data.frame) #> $ game_id : chr [1:6978] \"1999_01_MIN_ATL\" \"1999_01_KC_CHI\" \"1999_01_PIT_CLE\" \"1999_01_OAK_GB\" ... #> $ season : int [1:6978] 1999 1999 1999 1999 1999 1999 1999 1999 1999 1999 ... #> $ game_type : chr [1:6978] \"REG\" \"REG\" \"REG\" \"REG\" ... #> $ week : int [1:6978] 1 1 1 1 1 1 1 1 1 1 ... #> $ gameday : chr [1:6978] \"1999-09-12\" \"1999-09-12\" \"1999-09-12\" \"1999-09-12\" ... #> $ weekday : chr [1:6978] \"Sunday\" \"Sunday\" \"Sunday\" \"Sunday\" ... #> $ gametime : chr [1:6978] NA NA NA NA ... #> $ away_team : chr [1:6978] \"MIN\" \"KC\" \"PIT\" \"OAK\" ... #> $ away_score : int [1:6978] 17 17 43 24 14 3 10 30 25 28 ... #> $ home_team : chr [1:6978] \"ATL\" \"CHI\" \"CLE\" \"GB\" ... #> $ home_score : int [1:6978] 14 20 0 28 31 41 19 28 24 20 ... #> $ location : chr [1:6978] \"Home\" \"Home\" \"Home\" \"Home\" ... #> $ result : int [1:6978] -3 3 -43 4 17 38 9 -2 -1 -8 ... #> $ total : int [1:6978] 31 37 43 52 45 44 29 58 49 48 ... #> $ overtime : int [1:6978] 0 0 0 0 0 0 0 0 0 0 ... #> $ old_game_id : chr [1:6978] \"1999091210\" \"1999091206\" \"1999091213\" \"1999091208\" ... #> $ gsis : int [1:6978] 598 597 604 602 591 603 592 600 588 596 ... #> $ nfl_detail_id : chr [1:6978] NA NA NA NA ... #> $ pfr : chr [1:6978] \"199909120atl\" \"199909120chi\" \"199909120cle\" \"199909120gnb\" ... #> $ pff : int [1:6978] NA NA NA NA NA NA NA NA NA NA ... #> $ espn : chr [1:6978] \"190912001\" \"190912003\" \"190912005\" \"190912009\" ... #> $ ftn : int [1:6978] NA NA NA NA NA NA NA NA NA NA ... #> $ away_rest : int [1:6978] 7 7 7 7 7 7 7 7 7 7 ... #> $ home_rest : int [1:6978] 7 7 7 7 7 7 7 7 7 7 ... #> $ away_moneyline : int [1:6978] NA NA NA NA NA NA NA NA NA NA ... #> $ home_moneyline : int [1:6978] NA NA NA NA NA NA NA NA NA NA ... #> $ spread_line : num [1:6978] -4 -3 -6 9 -3 5.5 3.5 7 -3 9.5 ... #> $ away_spread_odds: int [1:6978] NA NA NA NA NA NA NA NA NA NA ... #> $ home_spread_odds: int [1:6978] NA NA NA NA NA NA NA NA NA NA ... #> $ total_line : num [1:6978] 49 38 37 43 45.5 49 38 44.5 37 42 ... #> $ under_odds : int [1:6978] NA NA NA NA NA NA NA NA NA NA ... #> $ over_odds : int [1:6978] NA NA NA NA NA NA NA NA NA NA ... #> $ div_game : int [1:6978] 0 0 1 0 1 0 1 1 1 0 ... #> $ roof : chr [1:6978] \"dome\" \"outdoors\" \"outdoors\" \"outdoors\" ... #> $ surface : chr [1:6978] \"astroturf\" \"grass\" \"grass\" \"grass\" ... #> $ temp : int [1:6978] NA 80 78 67 NA 76 NA 73 75 NA ... #> $ wind : int [1:6978] NA 12 12 10 NA 8 NA 5 3 NA ... #> $ away_qb_id : chr [1:6978] \"00-0003761\" \"00-0006300\" \"00-0015700\" \"00-0005741\" ... #> $ home_qb_id : chr [1:6978] \"00-0002876\" \"00-0010560\" \"00-0004230\" \"00-0005106\" ... #> $ away_qb_name : chr [1:6978] \"Randall Cunningham\" \"Elvis Grbac\" \"Kordell Stewart\" \"Rich Gannon\" ... #> $ home_qb_name : chr [1:6978] \"Chris Chandler\" \"Shane Matthews\" \"Ty Detmer\" \"Brett Favre\" ... #> $ away_coach : chr [1:6978] \"Dennis Green\" \"Gunther Cunningham\" \"Bill Cowher\" \"Jon Gruden\" ... #> $ home_coach : chr [1:6978] \"Dan Reeves\" \"Dick Jauron\" \"Chris Palmer\" \"Ray Rhodes\" ... #> $ referee : chr [1:6978] \"Gerry Austin\" \"Phil Luckett\" \"Bob McElwee\" \"Tony Corrente\" ... #> $ stadium_id : chr [1:6978] \"ATL00\" \"CHI98\" \"CLE00\" \"GNB00\" ... #> $ stadium : chr [1:6978] \"Georgia Dome\" \"Soldier Field\" \"Cleveland Browns Stadium\" \"Lambeau Field\" ... #> - attr(*, \"nflverse_type\")= chr \"games and schedules\" #> - attr(*, \"nflverse_timestamp\")= POSIXct[1:1], format: \"2024-08-02 17:35:13\" home <- games %>% filter(game_type == 'REG') %>% select(season, week, home_team, result) %>% rename(team = home_team) home %>% head(5) #> ── nflverse games and schedules ──────────────────────────────────────────────── #> ℹ Data updated: 2024-08-02 17:35:13 UTC #> # A tibble: 5 × 4 #> season week team result #> #> 1 1999 1 ATL -3 #> 2 1999 1 CHI 3 #> 3 1999 1 CLE -43 #> 4 1999 1 GB 4 #> 5 1999 1 IND 17 away <- games %>% filter(game_type == 'REG') %>% select(season, week, away_team, result) %>% rename(team = away_team) %>% mutate(result = -result) away %>% head(5) #> ── nflverse games and schedules ──────────────────────────────────────────────── #> ℹ Data updated: 2024-08-02 17:35:13 UTC #> # A tibble: 5 × 4 #> season week team result #> #> 1 1999 1 MIN 3 #> 2 1999 1 KC -3 #> 3 1999 1 PIT 43 #> 4 1999 1 OAK -4 #> 5 1999 1 BUF -17 results <- bind_rows(home, away) %>% arrange(week) %>% mutate( win = case_when( result > 0 ~ 1, result < 0 ~ 0, result == 0 ~ 0.5 ) ) results %>% filter(season == 2019 & team == 'SEA') #> ── nflverse games and schedules ──────────────────────────────────────────────── #> ℹ Data updated: 2024-08-02 17:35:13 UTC #> # A tibble: 16 × 5 #> season week team result win #> #> 1 2019 1 SEA 1 1 #> 2 2019 2 SEA 2 1 #> 3 2019 3 SEA -6 0 #> 4 2019 4 SEA 17 1 #> 5 2019 5 SEA 1 1 #> 6 2019 6 SEA 4 1 #> 7 2019 7 SEA -14 0 #> 8 2019 8 SEA 7 1 #> 9 2019 9 SEA 6 1 #> 10 2019 10 SEA 3 1 #> 11 2019 12 SEA 8 1 #> 12 2019 13 SEA 7 1 #> 13 2019 14 SEA -16 0 #> 14 2019 15 SEA 6 1 #> 15 2019 16 SEA -14 0 #> 16 2019 17 SEA -5 0 team_wins <- results %>% group_by(team, season) %>% summarize( wins = sum(win), point_diff = sum(result)) %>% ungroup() #> `summarise()` has grouped output by 'team'. You can override using the #> `.groups` argument. team_wins %>% arrange(-wins) %>% head(5) #> # A tibble: 5 × 4 #> team season wins point_diff #> #> 1 NE 2007 16 315 #> 2 CAR 2015 15 192 #> 3 GB 2011 15 201 #> 4 PIT 2004 15 121 #> 5 BAL 2019 14 249"},{"path":"https://www.nflfastr.com/articles/beginners_guide.html","id":"get-team-epa-by-season","dir":"Articles","previous_headings":"Real life example: let’s make a win total model","what":"Get team EPA by season","title":"A beginner's guide to nflfastR","text":"Let’s start getting data every season nflfastR data repository: ’m pretty aggressive dropping rows columns (filter select) otherwise loading memory can painful computer. need ’re . Note ’m keeping regular season games (season_type == \"REG\") since analysis usually done. Now can get EPA/play offense defense. Let’s break pass rush . don’t remember let’s steps. know need group team, season, pass, ’s beginning: makes two rows per team-season. get team-season row? pivot_wider need: one hard wrap head around usually open reference page, read example, pray try works. case . Hooray! turned two-lines-per-team dataframe one, 0 column pass == 0 (run plays) 1 column pass == 1. Now let’s rename something sensible save: Note variable names numbers need surrounded tick marks work. Now can repeat process defense: Let’s another sanity check looking top 5 pass offenses defenses: top pass defenses (2002 TB, 2017 JAX, 2019 NE) offenses (2007 Pats, 2004 Colts, 2011 Packers) definitely check !","code":"pbp <- load_pbp(1999:2019) %>% filter(rush == 1 | pass == 1, season_type == \"REG\", !is.na(epa), !is.na(posteam), posteam != \"\") %>% select(season, posteam, pass, defteam, epa) pbp %>% group_by(posteam, season, pass) %>% summarize(epa = mean(epa)) %>% head(4) #> `summarise()` has grouped output by 'posteam', 'season'. You can override using #> the `.groups` argument. #> # A tibble: 4 × 4 #> # Groups: posteam, season [2] #> posteam season pass epa #> #> 1 ARI 1999 0 -0.226 #> 2 ARI 1999 1 -0.150 #> 3 ARI 2000 0 -0.248 #> 4 ARI 2000 1 -0.0690 pbp %>% group_by(posteam, season, pass) %>% summarize(epa = mean(epa)) %>% pivot_wider(names_from = pass, values_from = epa) %>% head(4) #> `summarise()` has grouped output by 'posteam', 'season'. You can override using #> the `.groups` argument. #> # A tibble: 4 × 4 #> # Groups: posteam, season [4] #> posteam season `0` `1` #> #> 1 ARI 1999 -0.226 -0.150 #> 2 ARI 2000 -0.248 -0.0690 #> 3 ARI 2001 -0.179 0.0727 #> 4 ARI 2002 -0.160 -0.0517 offense <- pbp %>% group_by(posteam, season, pass) %>% summarize(epa = mean(epa)) %>% pivot_wider(names_from = pass, values_from = epa) %>% rename(off_pass_epa = `1`, off_rush_epa = `0`) #> `summarise()` has grouped output by 'posteam', 'season'. You can override using #> the `.groups` argument. defense <- pbp %>% group_by(defteam, season, pass) %>% summarize(epa = mean(epa)) %>% pivot_wider(names_from = pass, values_from = epa) %>% rename(def_pass_epa = `1`, def_rush_epa = `0`) #> `summarise()` has grouped output by 'defteam', 'season'. You can override using #> the `.groups` argument. #top 5 offenses offense %>% arrange(-off_pass_epa) %>% head(5) #> # A tibble: 5 × 4 #> # Groups: posteam, season [5] #> posteam season off_rush_epa off_pass_epa #> #> 1 NE 2007 0.00380 0.422 #> 2 IND 2004 -0.00281 0.420 #> 3 GB 2011 -0.114 0.412 #> 4 KC 2018 0.0209 0.348 #> 5 DEN 2013 -0.0296 0.343 #top 5 defenses defense %>% arrange(def_pass_epa) %>% head(5) #> # A tibble: 5 × 4 #> # Groups: defteam, season [5] #> defteam season def_rush_epa def_pass_epa #> #> 1 TB 2002 -0.0754 -0.290 #> 2 NE 2019 -0.168 -0.241 #> 3 BAL 2003 -0.232 -0.238 #> 4 JAX 2017 -0.141 -0.223 #> 5 NYJ 2009 -0.104 -0.220"},{"path":"https://www.nflfastr.com/articles/beginners_guide.html","id":"fix-team-names-and-join","dir":"Articles","previous_headings":"Real life example: let’s make a win total model","what":"Fix team names and join","title":"A beginner's guide to nflfastR","text":"Now ’re ready bind together. Actually, let’s make sure team names ready . Nope, yet, need fix Raiders, Rams, Chargers, LV, LA, LAC nflfastR. TRUE statement bottom says none cases found, keep team . Let’s make sure worked: HOU 3 fewer seasons didn’t exist 1999 2001, fine, team names number seasons . Okay NOW can join: Now ’re getting really close want! Next need create new columns prior year EPA, let’s point differential . Finally! Now data place can start things .","code":"team_wins %>% group_by(team) %>% summarize(n=n()) %>% arrange(n) #> # A tibble: 35 × 2 #> team n #> #> 1 LV 5 #> 2 LAC 8 #> 3 LA 9 #> 4 STL 17 #> 5 SD 18 #> 6 OAK 21 #> 7 HOU 23 #> 8 ARI 26 #> 9 ATL 26 #> 10 BAL 26 #> # ℹ 25 more rows team_wins <- team_wins %>% mutate( team = case_when( team == 'OAK' ~ 'LV', team == 'SD' ~ 'LAC', team == 'STL' ~ 'LA', TRUE ~ team ) ) team_wins %>% group_by(team) %>% summarize(n=n()) %>% arrange(n) #> # A tibble: 32 × 2 #> team n #> #> 1 HOU 23 #> 2 ARI 26 #> 3 ATL 26 #> 4 BAL 26 #> 5 BUF 26 #> 6 CAR 26 #> 7 CHI 26 #> 8 CIN 26 #> 9 CLE 26 #> 10 DAL 26 #> # ℹ 22 more rows data <- team_wins %>% left_join(offense, by = c('team' = 'posteam', 'season')) %>% left_join(defense, by = c('team' = 'defteam', 'season')) data %>% filter(team == 'SEA' & season >= 2012) #> # A tibble: 13 × 8 #> team season wins point_diff off_rush_epa off_pass_epa def_rush_epa #> #> 1 SEA 2012 11 167 -0.00476 0.213 -0.0738 #> 2 SEA 2013 13 186 -0.101 0.188 -0.128 #> 3 SEA 2014 12 140 0.0295 0.139 -0.231 #> 4 SEA 2015 10 146 -0.104 0.249 -0.148 #> 5 SEA 2016 10.5 62 -0.126 0.102 -0.207 #> 6 SEA 2017 9 34 -0.192 0.0584 -0.122 #> 7 SEA 2018 10 81 -0.0273 0.210 -0.130 #> 8 SEA 2019 11 7 -0.136 0.119 -0.0930 #> 9 SEA 2020 12 88 NA NA NA #> 10 SEA 2021 7 29 NA NA NA #> 11 SEA 2022 9 6 NA NA NA #> 12 SEA 2023 9 -38 NA NA NA #> 13 SEA 2024 NA NA NA NA NA #> # ℹ 1 more variable: def_pass_epa data <- data %>% arrange(team, season) %>% group_by(team) %>% mutate( prior_off_rush_epa = lag(off_rush_epa), prior_off_pass_epa = lag(off_pass_epa), prior_def_rush_epa = lag(def_rush_epa), prior_def_pass_epa = lag(def_pass_epa), prior_point_diff = lag(point_diff) ) %>% ungroup() data %>% head(5) #> # A tibble: 5 × 13 #> team season wins point_diff off_rush_epa off_pass_epa def_rush_epa #> #> 1 ARI 1999 6 -137 -0.226 -0.150 -0.0329 #> 2 ARI 2000 3 -233 -0.248 -0.0690 0.0153 #> 3 ARI 2001 7 -48 -0.179 0.0727 -0.0783 #> 4 ARI 2002 5 -155 -0.160 -0.0517 -0.0175 #> 5 ARI 2003 4 -227 -0.232 -0.115 -0.0880 #> # ℹ 6 more variables: def_pass_epa , prior_off_rush_epa , #> # prior_off_pass_epa , prior_def_rush_epa , #> # prior_def_pass_epa , prior_point_diff "},{"path":"https://www.nflfastr.com/articles/beginners_guide.html","id":"correlations-and-regressions","dir":"Articles","previous_headings":"Real life example: let’s make a win total model","what":"Correlations and regressions","title":"A beginner's guide to nflfastR","text":"’ve covered select, see new use minus sign de-selects variables (need de-select team name correlation work doesn’t work character strings, correlation season number meaningless). ’ve run correlation dataframe, removing missing values, rounding 2 digits. surprisingly, see wins current season strongly related passing offense EPA rushing EPA defense EPA, prior offense carries predictive power prior defense. Pass offense stable year year (0.51) rush offense (0.46), pass defense (0.29), rush defense (0.32). ’m actually surprised values passing offense aren’t higher relative others. Maybe prior results come nflscrapR era (2009 - 2019)? Let’s check looks like since 2009 relative earlier seasons: Yep, seems case. recent period, passing offense become slightly stable predictive following-year success, time rushing offense become substantially less stable less predictive future team success. Now let’s basic regression wins prior offense defense EPA/play. Maybe look recent period fit model since ’s relevant 2020. real world, rigorous making decisions like , let’s proceed anyway. ’m actually pretty surprised passing offense isn’t higher . compare simply using point differential? R2 somewhat higher just point differential. isn’t surprising ’ve thrown away special teams plays haven’t attempted make adjustments things like fumble luck know can improve EPA’s predictive power.","code":"data %>% select(-team, -season) %>% cor(use=\"complete.obs\") %>% round(2) #> wins point_diff off_rush_epa off_pass_epa def_rush_epa #> wins 1.00 0.92 0.38 0.64 -0.27 #> point_diff 0.92 1.00 0.41 0.68 -0.32 #> off_rush_epa 0.38 0.41 1.00 0.55 0.17 #> off_pass_epa 0.64 0.68 0.55 1.00 0.09 #> def_rush_epa -0.27 -0.32 0.17 0.09 1.00 #> def_pass_epa -0.56 -0.61 0.04 -0.03 0.34 #> prior_off_rush_epa 0.20 0.22 0.46 0.36 0.13 #> prior_off_pass_epa 0.26 0.29 0.32 0.51 0.08 #> prior_def_rush_epa -0.11 -0.15 0.12 0.05 0.32 #> prior_def_pass_epa -0.18 -0.20 -0.01 -0.01 0.09 #> prior_point_diff 0.36 0.41 0.18 0.32 -0.08 #> def_pass_epa prior_off_rush_epa prior_off_pass_epa #> wins -0.56 0.20 0.26 #> point_diff -0.61 0.22 0.29 #> off_rush_epa 0.04 0.46 0.32 #> off_pass_epa -0.03 0.36 0.51 #> def_rush_epa 0.34 0.13 0.08 #> def_pass_epa 1.00 -0.01 0.05 #> prior_off_rush_epa -0.01 1.00 0.55 #> prior_off_pass_epa 0.05 0.55 1.00 #> prior_def_rush_epa 0.19 0.18 0.09 #> prior_def_pass_epa 0.29 0.06 -0.02 #> prior_point_diff -0.19 0.40 0.68 #> prior_def_rush_epa prior_def_pass_epa prior_point_diff #> wins -0.11 -0.18 0.36 #> point_diff -0.15 -0.20 0.41 #> off_rush_epa 0.12 -0.01 0.18 #> off_pass_epa 0.05 -0.01 0.32 #> def_rush_epa 0.32 0.09 -0.08 #> def_pass_epa 0.19 0.29 -0.19 #> prior_off_rush_epa 0.18 0.06 0.40 #> prior_off_pass_epa 0.09 -0.02 0.68 #> prior_def_rush_epa 1.00 0.34 -0.33 #> prior_def_pass_epa 0.34 1.00 -0.59 #> prior_point_diff -0.33 -0.59 1.00 message(\"2009 through 2019\") #> 2009 through 2019 data %>% filter(season >= 2009) %>% select(wins, point_diff, off_pass_epa, off_rush_epa, prior_point_diff, prior_off_pass_epa, prior_off_rush_epa) %>% cor(use=\"complete.obs\") %>% round(2) #> wins point_diff off_pass_epa off_rush_epa prior_point_diff #> wins 1.00 0.92 0.73 0.40 0.43 #> point_diff 0.92 1.00 0.79 0.46 0.44 #> off_pass_epa 0.73 0.79 1.00 0.37 0.38 #> off_rush_epa 0.40 0.46 0.37 1.00 0.19 #> prior_point_diff 0.43 0.44 0.38 0.19 1.00 #> prior_off_pass_epa 0.34 0.36 0.45 0.10 0.78 #> prior_off_rush_epa 0.24 0.25 0.17 0.24 0.45 #> prior_off_pass_epa prior_off_rush_epa #> wins 0.34 0.24 #> point_diff 0.36 0.25 #> off_pass_epa 0.45 0.17 #> off_rush_epa 0.10 0.24 #> prior_point_diff 0.78 0.45 #> prior_off_pass_epa 1.00 0.35 #> prior_off_rush_epa 0.35 1.00 message(\"1999 through 2008\") #> 1999 through 2008 data %>% filter(season < 2009) %>% select(wins, point_diff, off_pass_epa, off_rush_epa, prior_point_diff, prior_off_pass_epa, prior_off_rush_epa) %>% cor(use=\"complete.obs\") %>% round(2) #> wins point_diff off_pass_epa off_rush_epa prior_point_diff #> wins 1.00 0.92 0.58 0.39 0.28 #> point_diff 0.92 1.00 0.60 0.38 0.36 #> off_pass_epa 0.58 0.60 1.00 0.68 0.27 #> off_rush_epa 0.39 0.38 0.68 1.00 0.18 #> prior_point_diff 0.28 0.36 0.27 0.18 1.00 #> prior_off_pass_epa 0.19 0.23 0.54 0.47 0.60 #> prior_off_rush_epa 0.18 0.20 0.47 0.57 0.38 #> prior_off_pass_epa prior_off_rush_epa #> wins 0.19 0.18 #> point_diff 0.23 0.20 #> off_pass_epa 0.54 0.47 #> off_rush_epa 0.47 0.57 #> prior_point_diff 0.60 0.38 #> prior_off_pass_epa 1.00 0.69 #> prior_off_rush_epa 0.69 1.00 data <- data %>% filter(season >= 2009) fit <- lm(wins ~ prior_off_pass_epa + prior_off_rush_epa + prior_def_pass_epa + prior_def_rush_epa, data = data) summary(fit) #> #> Call: #> lm(formula = wins ~ prior_off_pass_epa + prior_off_rush_epa + #> prior_def_pass_epa + prior_def_rush_epa, data = data) #> #> Residuals: #> Min 1Q Median 3Q Max #> -7.7060 -1.8889 0.0678 2.2456 7.0869 #> #> Coefficients: #> Estimate Std. Error t value Pr(>|t|) #> (Intercept) 7.9690 0.3883 20.524 < 0.0000000000000002 *** #> prior_off_pass_epa 6.5727 1.2784 5.141 0.000000438 *** #> prior_off_rush_epa 6.0275 2.2705 2.655 0.00827 ** #> prior_def_pass_epa -4.0837 1.6455 -2.482 0.01351 * #> prior_def_rush_epa -5.1531 2.3288 -2.213 0.02751 * #> --- #> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 #> #> Residual standard error: 2.859 on 379 degrees of freedom #> (128 observations deleted due to missingness) #> Multiple R-squared: 0.164, Adjusted R-squared: 0.1552 #> F-statistic: 18.58 on 4 and 379 DF, p-value: 0.0000000000000584 fit2 <- lm(wins ~ prior_point_diff, data = data) summary(fit2) #> #> Call: #> lm(formula = wins ~ prior_point_diff, data = data) #> #> Residuals: #> Min 1Q Median 3Q Max #> -7.2007 -1.9624 0.1361 2.1952 7.3562 #> #> Coefficients: #> Estimate Std. Error t value Pr(>|t|) #> (Intercept) 8.097917 0.126658 63.94 <0.0000000000000002 *** #> prior_point_diff 0.012974 0.001254 10.35 <0.0000000000000002 *** #> --- #> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 #> #> Residual standard error: 2.775 on 478 degrees of freedom #> (32 observations deleted due to missingness) #> Multiple R-squared: 0.183, Adjusted R-squared: 0.1813 #> F-statistic: 107.1 on 1 and 478 DF, p-value: < 0.00000000000000022"},{"path":"https://www.nflfastr.com/articles/beginners_guide.html","id":"predictions","dir":"Articles","previous_headings":"Real life example: let’s make a win total model","what":"Predictions","title":"A beginner's guide to nflfastR","text":"Now let’s get predictions EPA model: mostly checks . just used simple point differential predict? surprisingly, looks pretty similar. basic models don’t incorporate schedule, roster changes, etc. example, better model take account Tom Brady longer playing Patriots. hopefully useful!","code":"preds <- predict(fit, data %>% filter(season == 2020)) %>% #was just a vector, need a tibble to bind as_tibble() %>% #make the column name make sense rename(prediction = value) %>% round(1) %>% #get names bind_cols( data %>% filter(season == 2020) %>% select(team) ) preds %>% arrange(-prediction) %>% head(5) #> # A tibble: 5 × 2 #> prediction team #> #> 1 11.5 BAL #> 2 10.2 SF #> 3 9.8 NE #> 4 9.6 DAL #> 5 9.6 NO preds2 <- predict(fit2, data %>% filter(season == 2020)) %>% #was just a vector, need a tibble to bind as_tibble() %>% #make the column name make sense rename(prediction = value) %>% round(1) %>% #get names bind_cols( data %>% filter(season == 2020) %>% select(team) ) preds2 %>% arrange(-prediction) %>% head(5) #> # A tibble: 5 × 2 #> prediction team #> #> 1 11.3 BAL #> 2 10.6 NE #> 3 10.3 SF #> 4 10 KC #> 5 9.6 DAL"},{"path":"https://www.nflfastr.com/articles/beginners_guide.html","id":"next-steps","dir":"Articles","previous_headings":"","what":"Next Steps","title":"A beginner's guide to nflfastR","text":"now know enough able tackle great deal questions using nflfastR data. good way build skills take interesting things see try replicate (making figures, also involve heavy dose googling stuff). Looking others’ code also good way learn. One option look nflfastR code base, much now understand ’s . example, function cleans data prepares later stages: ’s heavy dose mutate, group_by, arrange, lag, if_else, case_when.","code":""},{"path":"https://www.nflfastr.com/articles/beginners_guide.html","id":"resources-the-gold-standards","dir":"Articles","previous_headings":"Next Steps","what":"Resources: The gold standards","title":"A beginner's guide to nflfastR","text":"R package section pretty R heavy. Introduction R (recommended) Open Source Football: Mix R Python Mockup Blog (Thomas Mock): Invaluable resource making cool stuff R","code":""},{"path":"https://www.nflfastr.com/articles/beginners_guide.html","id":"code-examples-r","dir":"Articles","previous_headings":"Next Steps","what":"Code examples: R","title":"A beginner's guide to nflfastR","text":"Lee Sharpe: basic intro R RStudio Lee Sharpe: lots useful NFL / nflscrapR code Lee Sharpe: update current season games Josh Hermsmeyer: Getting Started R NFL Analysis Slavin: visualizing positional tiers SFB9 Ron Yurko: assorted examples CowboysStats: defensive playmaking EPA Michael Lopez: function sample plays Michael Lopez: R NFL analysis (presentation club staffers) Mitchell Wesson: QB hits investigation Mitchell Wesson: Investigation nflscrapR EP model WHoffman: graphs receivers (aDoT, success rate, ) ChiBearsStats: investigation 3rd downs vs offensive efficiency ChiBearsStats: insignificance field goal kicking","code":""},{"path":"https://www.nflfastr.com/articles/beginners_guide.html","id":"more-data-sources","dir":"Articles","previous_headings":"Next Steps","what":"More data sources","title":"A beginner's guide to nflfastR","text":"Lee Sharpe: Draft Picks, Draft Values, Games, Logos, Rosters, Standings greerre: get .csv file weather & stadium data PFR python Parker Fleming: Introduction College Football Data R cfbscrapR","code":""},{"path":"https://www.nflfastr.com/articles/beginners_guide.html","id":"other-code-examples-python","dir":"Articles","previous_headings":"Next Steps","what":"Other code examples: Python","title":"A beginner's guide to nflfastR","text":"Deryck97: nflfastR Python Guide Nick Wan: nflfastR Python Colab Guide Cory Jez: animated plot 903124S: Sampling EP 903124S: estimating EPA using nfldb 903124S: estimate EPA college football Blake Atkinson: explosiveness blog post python code Blake Atkinson: player type visualizations blog post python code","code":""},{"path":"https://www.nflfastr.com/articles/nflfastR.html","id":"the-main-functions","dir":"Articles","previous_headings":"","what":"The Main Functions","title":"Get started with nflfastR","text":"nflfastR comes set functions access NFL play--play data team rosters. section provides brief introduction essential functions. nflfastR processes cleans play--play data adds variables ’s models. Since tasks performed separate functions, easiest way compute complete nflfastR dataset build_nflfastR_pbp(). main input function set game ids can accessed fast_scraper_schedules(). following code demonstrates build nflfastR dataset Super Bowls 2017 - 2019 seasons. cases, however, necessary use function individual games, nflfastR provides data repository two main play--play functions: load_pbp() update_db(). cover load_pbp() , please see Example 8: Using built-database function work database function update_db(). easiest way access data data repository new function load_pbp(). can load multiple seasons directly memory supports multiple data formats. Loading play--play data 2018-2020 seasons easy Joining roster data play--play data set possible well. data can accessed function fast_scraper_roster() application demonstrated Example 10: Working roster position data.","code":"library(nflfastR) library(dplyr, warn.conflicts = FALSE) ids <- nflfastR::fast_scraper_schedules(2017:2019) %>% dplyr::filter(game_type == \"SB\") %>% dplyr::pull(game_id) pbp <- nflfastR::build_nflfastR_pbp(ids) #> ── Build nflfastR Play-by-Play Data ───────────── nflfastR version 4.6.1.9011 ── #> • 17:36:21 | Start download of 3 games... #> ✔ 17:36:26 | Download finished. Adding variables... #> ✔ 17:36:26 | added game variables #> ✔ 17:36:27 | added nflscrapR variables #> ✔ 17:36:27 | added ep variables #> ✔ 17:36:27 | added air_yac_ep variables #> ✔ 17:36:28 | added wp variables #> ✔ 17:36:28 | added air_yac_wp variables #> ✔ 17:36:28 | added cp and cpoe #> ✔ 17:36:28 | added fixed drive variables #> ✔ 17:36:28 | added series variables #> • 17:36:28 | Cleaning up play-by-play... #> ✔ 17:36:28 | Cleaning completed #> ✔ 17:36:28 | added qb_epa #> • 17:36:28 | Computing xyac... #> ✔ 17:36:30 | added xyac variables #> • 17:36:30 | Computing xpass... #> ✔ 17:36:31 | added xpass and pass_oe #> • 17:36:31 | Decode player ids... #> ✔ 17:36:32 | Decoding of player ids completed #> ── DONE ──────────────────────────────────────────────────────────────────────── pbp <- nflfastR::load_pbp(2018:2020)"},{"path":"https://www.nflfastr.com/articles/nflfastR.html","id":"application-examples","dir":"Articles","previous_headings":"","what":"Application Examples","title":"Get started with nflfastR","text":"examples listed assume following two libraries installed loaded.","code":"library(nflfastR) library(tidyverse)"},{"path":"https://www.nflfastr.com/articles/nflfastR.html","id":"example-1-replicate-nflscrapr-with-fast_scraper","dir":"Articles","previous_headings":"Application Examples","what":"Example 1: replicate nflscrapR with fast_scraper","title":"Get started with nflfastR","text":"functionality nflscrapR can duplicated using fast_scraper(). obtains information contained nflscrapR (plus extra) much quickly. compare nflscrapR, use data repository program longer functions now NFL taken old Gamecenter feed. Note EP differs nflscrapR use newer era-adjusted model (post Open Source Football). example also uses built-function clean_pbp() create ‘name’ column primary player involved (QB pass play ball-carrier run play).","code":"readr::read_csv(\"https://github.com/ryurko/nflscrapR-data/blob/master/play_by_play_data/regular_season/reg_pbp_2019.csv?raw=true\") %>% dplyr::filter(home_team == \"SF\" & away_team == \"SEA\") %>% dplyr::select(desc, play_type, ep, epa, home_wp) %>% utils::head(6) %>% knitr::kable(digits = 3) nflfastR::fast_scraper(\"2019_10_SEA_SF\") %>% nflfastR::clean_pbp() %>% dplyr::select(desc, play_type, ep, epa, home_wp, name) %>% utils::head(6) %>% knitr::kable(digits = 3)"},{"path":"https://www.nflfastr.com/articles/nflfastR.html","id":"example-2-scrape-a-batch-of-games-very-quickly-with-fast_scraper","dir":"Articles","previous_headings":"Application Examples","what":"Example 2: scrape a batch of games very quickly with fast_scraper","title":"Get started with nflfastR","text":"demonstration nflfastR’s capabilities. nflfastR can scrape batch games quickly, please respectful Github’s servers use data repository hosts scraped cleaned data whenever possible. reason ever actually use scraper ’s middle season haven’t updated repository recent games (automatically updated overnight every day).","code":"# get list of some games from 2019 games_2019 <- nflfastR::fast_scraper_schedules(2019) %>% utils::head(10) %>% dplyr::pull(game_id) tictoc::tic(glue::glue(\"{length(games_2019)} games with nflfastR:\")) f <- nflfastR::fast_scraper(games_2019) tictoc::toc() #> 10 games with nflfastR:: 8.106 sec elapsed"},{"path":"https://www.nflfastr.com/articles/nflfastR.html","id":"example-3-completion-percentage-over-expected-cpoe","dir":"Articles","previous_headings":"Application Examples","what":"Example 3: Completion Percentage Over Expected (CPOE)","title":"Get started with nflfastR","text":"Let’s look CPOE leaders 2009 regular season. discussed , nflfastR data repository old seasons, ’s need actually scrape . Let’s use convenience function load_pbp() fetches data repository (non-R users, .csv .parquet also available data repository).","code":"tictoc::tic(\"loading all games from 2009\") games_2009 <- nflfastR::load_pbp(2009) %>% dplyr::filter(season_type == \"REG\") tictoc::toc() #> loading all games from 2009: 2.198 sec elapsed games_2009 %>% dplyr::filter(!is.na(cpoe)) %>% dplyr::group_by(passer_player_name) %>% dplyr::summarize(cpoe = mean(cpoe), Atts = n()) %>% dplyr::filter(Atts > 200) %>% dplyr::arrange(-cpoe) %>% utils::head(5) %>% knitr::kable(digits = 1)"},{"path":"https://www.nflfastr.com/articles/nflfastR.html","id":"example-4-using-drive-information","dir":"Articles","previous_headings":"Application Examples","what":"Example 4: Using Drive Information","title":"Get started with nflfastR","text":"working nflfastR, drive results automatically included. use fixed_drive fixed_drive_result since NFL-provided information bit wonky. Let’s look much likely teams score starting 1st & 10 20 yard line 2015 (last year touchbacks kickoffs changed 25) 2000. 20.5% 1st & 10 plays teams’ 20 see drive end score 2003, compared 30.5% 2015. implications Expected Points models (see article).","code":"pbp <- nflfastR::load_pbp(c(2003, 2015)) out <- pbp %>% dplyr::filter(season_type == \"REG\" & down == 1 & ydstogo == 10 & yardline_100 == 80) %>% dplyr::mutate(drive_score = dplyr::if_else(fixed_drive_result %in% c(\"Touchdown\", \"Field goal\"), 1, 0)) %>% dplyr::group_by(season) %>% dplyr::summarize(drive_score = mean(drive_score)) out %>% knitr::kable(digits = 3)"},{"path":"https://www.nflfastr.com/articles/nflfastR.html","id":"example-5-plot-offensive-and-defensive-epa-per-play-for-a-given-season","dir":"Articles","previous_headings":"Application Examples","what":"Example 5: Plot offensive and defensive EPA per play for a given season","title":"Get started with nflfastR","text":"Let’s build NFL team tiers using offensive defensive expected points added per play 2005 regular season. Creating data viz including NFL team logos (wordmarks, headshots), recommend nflverse R package nflplotR. using load_pbp(), helper function clean_pbp() already run, creates “rush” “pass” columns () properly count sacks scrambles pass plays (b) properly include plays penalties. Using , can keep rush pass plays.","code":"library(nflplotR) pbp <- nflfastR::load_pbp(2005) %>% dplyr::filter(season_type == \"REG\") %>% dplyr::filter(!is.na(posteam) & (rush == 1 | pass == 1)) offense <- pbp %>% dplyr::group_by(team = posteam) %>% dplyr::summarise(off_epa = mean(epa, na.rm = TRUE)) defense <- pbp %>% dplyr::group_by(team = defteam) %>% dplyr::summarise(def_epa = mean(epa, na.rm = TRUE)) offense %>% dplyr::inner_join(defense, by = \"team\") %>% ggplot2::ggplot(aes(x = off_epa, y = def_epa)) + ggplot2::geom_abline(slope = -1.5, intercept = c(.4, .3, .2, .1, 0, -.1, -.2, -.3), alpha = .2) + nflplotR::geom_mean_lines(aes(y0 = off_epa, x0 = def_epa)) + nflplotR::geom_nfl_logos(aes(team_abbr = team), width = 0.07, alpha = 0.7) + ggplot2::labs( x = \"Offense EPA/play\", y = \"Defense EPA/play\", caption = \"Data: @nflfastR\", title = \"2005 NFL Offensive and Defensive EPA per Play\" ) + ggplot2::theme_bw() + ggplot2::theme( plot.title = ggplot2::element_text(size = 12, hjust = 0.5, face = \"bold\") ) + ggplot2::scale_y_reverse()"},{"path":"https://www.nflfastr.com/articles/nflfastR.html","id":"example-6-expected-points-calculator","dir":"Articles","previous_headings":"Application Examples","what":"Example 6: Expected Points calculator","title":"Get started with nflfastR","text":"provided calculator working Expected Points model. example use , looking Expected Points drive beginning following touchback changed time. put 'SEA' home_team posteam, matters figuring whether team ball home team (’s actual effect given team; matter team supplied). surprisingly, offenses become much successful time, kickoff touchback moving 20 25 2016 providing additional boost. Note td_prob example probability next score within half touchdown scored team ball, probability current drive end touchdown (numbers different Example 4 ). compare recent four years expectation playing dome inputting things changing roof input: 2018 2019, 1st & 10 home team’s 25 yard line higher EP domes home, expected.","code":"data <- tibble::tibble( \"season\" = 1999:2019, \"home_team\" = \"SEA\", \"posteam\" = \"SEA\", \"roof\" = \"outdoors\", \"half_seconds_remaining\" = 1800, \"yardline_100\" = c(rep(80, 17), rep(75, 4)), \"down\" = 1, \"ydstogo\" = 10, \"posteam_timeouts_remaining\" = 3, \"defteam_timeouts_remaining\" = 3 ) nflfastR::calculate_expected_points(data) %>% dplyr::select(season, yardline_100, td_prob, ep) %>% knitr::kable(digits = 2) data <- tibble::tibble( \"season\" = 2016:2019, \"week\" = 5, \"home_team\" = \"SEA\", \"posteam\" = \"SEA\", \"roof\" = \"dome\", \"half_seconds_remaining\" = 1800, \"yardline_100\" = c(rep(75, 4)), \"down\" = 1, \"ydstogo\" = 10, \"posteam_timeouts_remaining\" = 3, \"defteam_timeouts_remaining\" = 3 ) nflfastR::calculate_expected_points(data) %>% dplyr::select(season, yardline_100, td_prob, ep) %>% knitr::kable(digits = 2)"},{"path":"https://www.nflfastr.com/articles/nflfastR.html","id":"example-7-win-probability-calculator","dir":"Articles","previous_headings":"Application Examples","what":"Example 7: Win probability calculator","title":"Get started with nflfastR","text":"also provided calculator working win probability models. example use , looking win probability begin game depends pre-game spread. put 'SEA' home_team posteam, matters figuring whether team ball home team (’s actual effect given team; matter team supplied). surprisingly, vegas_wp increases amount team coming game favored .","code":"data <- tibble::tibble( \"receive_2h_ko\" = 0, \"home_team\" = \"SEA\", \"posteam\" = \"SEA\", \"score_differential\" = 0, \"half_seconds_remaining\" = 1800, \"game_seconds_remaining\" = 3600, \"spread_line\" = c(1, 3, 4, 7, 14), \"down\" = 1, \"ydstogo\" = 10, \"yardline_100\" = 75, \"posteam_timeouts_remaining\" = 3, \"defteam_timeouts_remaining\" = 3 ) nflfastR::calculate_win_probability(data) %>% dplyr::select(spread_line, wp, vegas_wp) %>% knitr::kable(digits = 2)"},{"path":"https://www.nflfastr.com/articles/nflfastR.html","id":"example-8-using-the-built-in-database-function","dir":"Articles","previous_headings":"Application Examples","what":"Example 8: Using the built-in database function","title":"Get started with nflfastR","text":"’re comfortable using dplyr functions manipulate tidy data, ’re ready use database. use database? provided function nflfastR makes extremely easy build database keep updated Play--play data 20+ seasons takes lot memory: working database allows bring memory actually need R makes extremely easy work databases.","code":""},{"path":"https://www.nflfastr.com/articles/nflfastR.html","id":"start-install-and-load-packages","dir":"Articles","previous_headings":"Application Examples > Example 8: Using the built-in database function","what":"Start: install and load packages","title":"Get started with nflfastR","text":"start, need install two packages required aren’t installed automatically nflfastR installs: DBI RSQLite (advanced users can use types databases, example use SQLite). statements make sure packages won’t updated already installed: always, need install . don’t need loaded build database nflfastR knows use , need later working database.","code":"if (!require(\"DBI\")) install.packages(\"DBI\") if (!require(\"RSQLite\")) install.packages(\"RSQLite\") library(DBI) library(RSQLite)"},{"path":"https://www.nflfastr.com/articles/nflfastR.html","id":"build-database","dir":"Articles","previous_headings":"Application Examples > Example 8: Using the built-in database function","what":"Build database","title":"Get started with nflfastR","text":"’s exactly one function nflfastR works databases: update_db(). notes: use update_db() arguments, build SQLite database called pbp_db current working directory, play--play data table called nflfastR_pbp. can specify different directory dbdir. can specify different filename dbname. can specify different table name tblname. want rebuild database scratch whatever reason, supply force_rebuild = TRUE. primarily intended case update play--play data data repo due fixing bug want force database wiped updated. want rebuild specified seasons, can also supplied force_rebuild (e.g. force_rebuild = c(2019, 2020)). parameter db_connection intended advanced users want use DBI drivers, MariaDB, Postgres odbc. Please note dbdir dbname dropped db_connection provided argument tblname still used write data table database. Let’s say just want dump database current working directory. go! created database current directory called pbp_db. Wait, ’s ? ’s ! ’s partway season want make sure new games added database? run? update_db()! (just make sure ’re directory database saved supply right file path) ’s partway season want re-build season allow data corrections NFL propagate database, can specify one season rebuilt:","code":"nflfastR::update_db() #> ── Update nflfastR Play-by-Play Database ──────── nflfastR version 4.6.1.9011 ── #> ℹ 17:37:20 | Can't find the data table \"nflfastR_pbp\" #> in your database. Will load the play by play data from #> scratch. #> • 17:37:20 | Starting download of 25 seasons between 1999 and 2023... #> • 17:38:45 | Checking for missing completed games... #> ℹ 17:38:47 | You have 6703 games and are missing 0. #> ✔ 17:38:47 | Database update completed #> ℹ 17:38:47 | Path to your db: ./pbp_db #> ── DONE ──────────────────────────────────────────────────────────────────────── nflfastR::update_db() #> ── Update nflfastR Play-by-Play Database ──────── nflfastR version 4.6.1.9011 ── #> • 17:38:47 | Checking for missing completed games... #> ℹ 17:38:48 | You have 6703 games and are missing 0. #> ✔ 17:38:49 | Database update completed #> ℹ 17:38:49 | Path to your db: /home/runner/work/nflfastR/nflfastR/vignettes/pbp_db #> ── DONE ──────────────────────────────────────────────────────────────────────── nflfastR::update_db(force_rebuild = 2020) #> ── Update nflfastR Play-by-Play Database ──────── nflfastR version 4.6.1.9011 ── #> • 17:38:49 | Purging season 2020 from the data table \"nflfastR_pbp\" in your #> connected database... #> • 17:38:50 | Starting download of the 1 season 2020 #> • 17:38:53 | Checking for missing completed games... #> ℹ 17:38:54 | You have 6703 games and are missing 0. #> ✔ 17:38:54 | Database update completed #> ℹ 17:38:54 | Path to your db: /home/runner/work/nflfastR/nflfastR/vignettes/pbp_db #> ── DONE ────────────────────────────────────────────────────────────────────────"},{"path":"https://www.nflfastr.com/articles/nflfastR.html","id":"connect-to-database","dir":"Articles","previous_headings":"Application Examples > Example 8: Using the built-in database function","what":"Connect to database","title":"Get started with nflfastR","text":"Now can make connection database. part look little bit foreign, need know database located. ’s current working directory, work: looks like nothing happened, now connection database. Now ’re ready stuff. aren’t familiar databases, ’re organized around tables. ’s see tables present database: Since went defaults, ’s table called nflfastR_pbp. Another useful function see fields (.e., columns) table: list list columns nflfastR play--play. Notice supply name table (\"nflfastR_pbp\"). way, ’s couple things learn. main driver tbl, helps get output specific table database: now, everything magically just “work”: can forget ’re even working database! far, everything stayed database. want bring query memory, just use collect() end: ’ve searched 1 million rows data across 300+ columns brought 6950 rows two columns memory. Pretty neat! supply data shiny apps rbsdm.com without running memory server. Now ’s one thing remember. ’re finished need database: details using database nflfastR, see Thomas Mock’s life-changing post . detailed information dbplyr (dplyr database back-end) given second edition Hadley Wickham’s R Data Science (2e).","code":"connection <- DBI::dbConnect(RSQLite::SQLite(), \"./pbp_db\") connection #> #> Path: /home/runner/work/nflfastR/nflfastR/vignettes/pbp_db #> Extensions: TRUE DBI::dbListTables(connection) #> [1] \"nflfastR_pbp\" DBI::dbListFields(connection, \"nflfastR_pbp\") %>% utils::head(10) #> [1] \"play_id\" \"game_id\" \"old_game_id\" \"home_team\" \"away_team\" #> [6] \"season_type\" \"week\" \"posteam\" \"posteam_type\" \"defteam\" pbp_db <- dplyr::tbl(connection, \"nflfastR_pbp\") pbp_db %>% dplyr::group_by(season) %>% dplyr::summarize(n = dplyr::n()) #> # Source: SQL [?? x 2] #> # Database: sqlite 3.46.0 [/home/runner/work/nflfastR/nflfastR/vignettes/pbp_db] #> season n #> #> 1 1999 46136 #> 2 2000 45491 #> 3 2001 44969 #> 4 2002 47355 #> 5 2003 46810 #> 6 2004 46706 #> 7 2005 46823 #> 8 2006 46299 #> 9 2007 46266 #> 10 2008 45917 #> # ℹ more rows pbp_db %>% dplyr::filter(rush == 1 | pass == 1, down <= 2, !is.na(epa), !is.na(posteam)) %>% dplyr::group_by(pass) %>% dplyr::summarize(mean_epa = mean(epa, na.rm = TRUE)) #> # Source: SQL [2 x 2] #> # Database: sqlite 3.46.0 [/home/runner/work/nflfastR/nflfastR/vignettes/pbp_db] #> pass mean_epa #> #> 1 0 -0.102 #> 2 1 0.0708 russ <- pbp_db %>% dplyr::filter(name == \"R.Wilson\" & posteam == \"SEA\") %>% dplyr::select(desc, epa) %>% dplyr::collect() russ #> # A tibble: 6,946 × 2 #> desc epa #> #> 1 (14:12) 3-R.Wilson pass short right to 18-S.Rice to SEA 34 for 9 yar… 1.13 #> 2 (12:53) 3-R.Wilson pass incomplete deep left to 18-S.Rice. PENALTY o… 2.68 #> 3 (11:25) (Shotgun) 3-R.Wilson pass incomplete short right to 18-S.Ric… -1.31 #> 4 (10:24) (Shotgun) 3-R.Wilson pass short left to 18-S.Rice to ARI 31 … 0.928 #> 5 (9:47) 3-R.Wilson scrambles right end ran ob at ARI 27 for 4 yards (… -0.0194 #> 6 (8:35) 3-R.Wilson pass incomplete short right to 18-S.Rice. -0.426 #> 7 (7:54) (Shotgun) 3-R.Wilson left end pushed ob at ARI 9 for 4 yards … -1.17 #> 8 (:27) 3-R.Wilson sacked at SEA 17 for -5 yards (51-P.Lenon). Penalty… -1.13 #> 9 (14:28) (Shotgun) 3-R.Wilson pass short right to 17-B.Edwards to SEA… 1.94 #> 10 (13:59) 3-R.Wilson pass incomplete deep left to 87-B.Obomanu. -0.453 #> # ℹ 6,936 more rows DBI::dbDisconnect(connection)"},{"path":"https://www.nflfastr.com/articles/nflfastR.html","id":"example-9-working-with-the-expected-yards-after-catch-model","dir":"Articles","previous_headings":"Application Examples","what":"Example 9: working with the expected yards after catch model","title":"Get started with nflfastR","text":"variables xyac follows: xyac_epa: expected value EPA gained catch, starting catch made. xyac_success: probability play earns positive EPA (relative play started) based ball caught. xyac_fd: Probability play earns first based ball caught. xyac_mean_yardage xyac_median_yardage: Average median expected yards catch based ball caught. notes: epa = air_epa + yac_epa, air_epa EPA associated catch target location. receiver loses fumble, removed yac_epa Expected value EPA catch point = air_epa + xyac_epa want get YAC EPA expected, need compare yac_epa xyac_epa, example get first downs expected, compare first_down xyac_fd fields populated pass attempts, whether caught , restrict completed passes measuring, example, YAC EPA expected expected YAC EPA model doesn’t take receiver fumbles account, actual minus expected YAC slightly negative due fumbles happening Let’s create measures EPA first downs expected 2015: presence many running backs list suggests even though takes account target depth pass direction, model doesn’t great job capturing space. Alternatively, running backs might better generating yards catch since running football primary role.","code":"nflfastR::load_pbp(2015) %>% dplyr::group_by(receiver, receiver_id, posteam) %>% dplyr::mutate(tgt = sum(complete_pass + incomplete_pass)) %>% dplyr::filter(tgt >= 50) %>% dplyr::filter(complete_pass == 1, air_yards < yardline_100, !is.na(xyac_epa)) %>% dplyr::summarize( epa_oe = mean(yac_epa - xyac_epa), actual_fd = mean(first_down), expected_fd = mean(xyac_fd), fd_oe = mean(first_down - xyac_fd), rec = dplyr::n() ) %>% dplyr::ungroup() %>% dplyr::select(receiver, posteam, actual_fd, expected_fd, fd_oe, epa_oe, rec) %>% dplyr::arrange(-epa_oe) %>% utils::head(10) %>% knitr::kable(digits = 3)"},{"path":"https://www.nflfastr.com/articles/nflfastR.html","id":"example-10-working-with-roster-and-position-data","dir":"Articles","previous_headings":"Application Examples","what":"Example 10: Working with roster and position data","title":"Get started with nflfastR","text":"long last, ’s way merge new play--play data roster information. Use function get rosters: Now let’s load play--play data 2019: player IDs look like nflfastR now automatically decodes IDs look like old format GSIS IDs: Now ’re ready join roster data using IDs: surprisingly, 5 top 5 WRs terms EPA added come ahead top RB. Note number targets won’t match official stats ’re including plays penalties.","code":"roster <- nflfastR::fast_scraper_roster(2019) games_2019 <- nflfastR::load_pbp(2019) games_2019 %>% dplyr::filter(rush == 1 | pass == 1, posteam == \"SEA\") %>% dplyr::select(name, id) #> ── nflverse play by play data ────────────────────────────────────────────────── #> ℹ Data updated: 2024-03-07 14:39:28 UTC #> # A tibble: 1,207 × 2 #> name id #> #> 1 C.Carson 00-0033594 #> 2 R.Wilson 00-0029263 #> 3 R.Wilson 00-0029263 #> 4 C.Carson 00-0033594 #> 5 R.Wilson 00-0029263 #> 6 C.Carson 00-0033594 #> 7 R.Wilson 00-0029263 #> 8 C.Carson 00-0033594 #> 9 R.Wilson 00-0029263 #> 10 R.Wilson 00-0029263 #> # ℹ 1,197 more rows joined <- games_2019 %>% dplyr::filter(!is.na(receiver_id)) %>% dplyr::select(posteam, season, desc, receiver, receiver_id, epa) %>% dplyr::left_join(roster, by = c(\"receiver_id\" = \"gsis_id\")) # the real work is done, this just makes a table and has it look nice joined %>% dplyr::filter(position %in% c(\"WR\", \"TE\", \"RB\")) %>% dplyr::group_by(receiver_id, receiver, position) %>% dplyr::summarize(tot_epa = sum(epa), n = n()) %>% dplyr::arrange(-tot_epa) %>% dplyr::ungroup() %>% dplyr::group_by(position) %>% dplyr::mutate(position_rank = 1:n()) %>% dplyr::filter(position_rank <= 5) %>% dplyr::rename(Pos_Rank = position_rank, Player = receiver, Pos = position, Tgt = n, EPA = tot_epa) %>% dplyr::select(Player, Pos, Pos_Rank, Tgt, EPA) %>% knitr::kable(digits = 0)"},{"path":"https://www.nflfastr.com/articles/nflfastR.html","id":"example-11-replicating-official-stats","dir":"Articles","previous_headings":"Application Examples","what":"Example 11: Replicating official stats","title":"Get started with nflfastR","text":"columns like name, passer, fantasy etc nflfastR-created columns mimic “real” football: .e., excluding plays spikes, counting scrambles sacks pass plays, etc. ’re trying replicate official statistics – perhaps fantasy purposes – use *_player_name *_player_id columns.","code":""},{"path":"https://www.nflfastr.com/articles/nflfastR.html","id":"leaderboards","dir":"Articles","previous_headings":"Application Examples > Example 11: Replicating official stats","what":"Leaderboards","title":"Get started with nflfastR","text":"Let’s try replicate page passing leaders. match official stats NFL.com (note filter season_type == \"REG\" since official stats count regular season games). Note ’re using passing_yards yards_gained equal passing yards plays laterals. works, ’ve also provided function : calculate_player_stats(). function takes nflfastR play--play dataframe input along one argument, weekly, defaults FALSE. weekly true, week--week dataframe returned (rather aggregate whole provided dataframe). Let’s replicate top 10 players passing yards: can rush attempts replicate NFL leaderboard: , matches exactly.","code":"nflfastR::load_pbp(2020) %>% dplyr::filter(season_type == \"REG\", complete_pass == 1 | incomplete_pass == 1 | interception == 1, !is.na(down)) %>% dplyr::group_by(passer_player_name, posteam) %>% dplyr::summarize( yards = sum(passing_yards, na.rm = T), tds = sum(touchdown == 1 & td_team == posteam), ints = sum(interception), att = dplyr::n() ) %>% dplyr::arrange(-yards) %>% utils::head(10) %>% knitr::kable(digits = 0) nflfastR::load_pbp(2020) %>% dplyr::filter(season_type == \"REG\") %>% nflfastR::calculate_player_stats() %>% dplyr::arrange(-passing_yards) %>% dplyr::select(player_name, recent_team, completions, attempts, passing_yards, passing_tds, interceptions) %>% utils::head(10) %>% knitr::kable(digits = 0) nflfastR::load_pbp(2020) %>% dplyr::filter(season_type == \"REG\") %>% nflfastR::calculate_player_stats() %>% dplyr::arrange(-rushing_yards) %>% dplyr::select(player_name, recent_team, carries, rushing_yards, rushing_tds, rushing_fumbles_lost) %>% utils::head(10) %>% knitr::kable(digits = 0)"},{"path":"https://www.nflfastr.com/articles/nflfastR.html","id":"yards-from-scrimmage","dir":"Articles","previous_headings":"Application Examples > Example 11: Replicating official stats","what":"Yards from scrimmage","title":"Get started with nflfastR","text":"want total yards scrimmage? ’ll demonstrate three methods . hardest way use fantasy_player_name column, rusher rush plays receiver receiving plays: Looking PFR scrimmage stats, columns exact match. also just use calculate_player_stats() : get thing. third way use load_player_stats() function, can load data frame player-level stats every week since 1999. output identical.","code":"nflfastR::load_pbp(2020) %>% dplyr::filter(season_type == \"REG\", !is.na(down)) %>% dplyr::group_by(fantasy_player_name, posteam) %>% dplyr::summarize( carries = sum(rush_attempt), receptions = sum(complete_pass), touches = sum(rush_attempt + complete_pass), yards = sum(yards_gained), tds = sum(touchdown == 1 & td_team == posteam) ) %>% dplyr::arrange(-yards) %>% utils::head(10) %>% knitr::kable(digits = 0) nflfastR::load_pbp(2020) %>% dplyr::filter(season_type == \"REG\") %>% nflfastR::calculate_player_stats() %>% dplyr::mutate( yards = rushing_yards + receiving_yards, touches = carries + receptions, tds = rushing_tds + receiving_tds ) %>% dplyr::arrange(-yards) %>% dplyr::select(player_name, recent_team, carries, receptions, touches, yards, tds) %>% utils::head(10) %>% knitr::kable(digits = 0) nflfastR::load_player_stats(seasons = 2020) %>% dplyr::filter(season_type == \"REG\") %>% dplyr::group_by(player_id) %>% dplyr::summarize( player_name = dplyr::first(player_name), recent_team = dplyr::first(recent_team), yards = sum(rushing_yards + receiving_yards), touches = sum(carries + receptions), carries = sum(carries), receptions = sum(receptions), tds = sum(rushing_tds + receiving_tds) ) %>% dplyr::ungroup() %>% dplyr::arrange(-yards) %>% dplyr::select(player_name, recent_team, carries, receptions, touches, yards, tds) %>% utils::head(10) %>% knitr::kable(digits = 0)"},{"path":"https://www.nflfastr.com/articles/nflfastR.html","id":"fantasy-points","dir":"Articles","previous_headings":"Application Examples > Example 11: Replicating official stats","what":"Fantasy points","title":"Get started with nflfastR","text":"Let’s calculate PPR fantasy points per game first 16 weeks season among wide receivers appeared 5 games. Comparing FantasyPros website, exact match.","code":"nflfastR::load_pbp(2020) %>% dplyr::filter(week <= 16) %>% nflfastR::calculate_player_stats() %>% dplyr::mutate( ppg = fantasy_points_ppr / games ) %>% dplyr::filter(games > 5) %>% # only keep the WRs dplyr::inner_join( nflfastR::fast_scraper_roster(2020) %>% dplyr::filter(position == \"WR\") %>% dplyr::select(player_id = gsis_id), by = \"player_id\" ) %>% dplyr::arrange(-ppg) %>% dplyr::select(player_name, recent_team, games, fantasy_points_ppr, ppg) %>% utils::head(10) %>% knitr::kable(digits = 1)"},{"path":[]},{"path":"https://www.nflfastr.com/articles/nflfastR.html","id":"the-drive-column-looks-wacky","dir":"Articles","previous_headings":"Frequent issues","what":"The drive column looks wacky","title":"Get started with nflfastR","text":"Use fixed_drive fixed_drive_result instead. See Example 4: Using Drive Information.","code":""},{"path":"https://www.nflfastr.com/articles/nflfastR.html","id":"why-are-there-so-many-win-probability-columns","dir":"Articles","previous_headings":"Frequent issues","what":"Why are there so many win probability columns?","title":"Get started with nflfastR","text":"vegas_wp vegas_home_wp incorporate pregame spread much better models.","code":""},{"path":"https://www.nflfastr.com/articles/nflfastR.html","id":"im-trying-to-do-x--help","dir":"Articles","previous_headings":"Frequent issues","what":"I’m trying to do X. Help!","title":"Get started with nflfastR","text":"Please ask Discord channel.","code":""},{"path":"https://www.nflfastr.com/articles/nflfastR.html","id":"links","dir":"Articles","previous_headings":"","what":"Links","title":"Get started with nflfastR","text":"section helper holds hyperlinks chapters. ’s workaround missing sections anchor bug pkgdown hopefully fixed pull request point future. Main Functions Example 1: replicate nflscrapR fast_scraper Example 2: scrape batch games quickly fast_scraper Example 3: Completion Percentage Expected (CPOE) Example 4: Using Drive Information Example 5: Plot offensive defensive EPA per play given season Example 6: Expected Points calculator Example 7: Win probability calculator Example 8: Using built-database function Example 9: working expected yards catch model Example 10: Working roster position data Example 11: Replicating official stats Frequent issues Links","code":""},{"path":"https://www.nflfastr.com/authors.html","id":null,"dir":"","previous_headings":"","what":"Authors","title":"Authors and Citation","text":"Sebastian Carl. Author. Ben Baldwin. Maintainer, author. Lee Sharpe. Contributor. Maksim Horowitz. Contributor. Ron Yurko. Contributor. Samuel Ventura. Contributor. Tan Ho. Contributor. John Edwards. Contributor.","code":""},{"path":"https://www.nflfastr.com/authors.html","id":"citation","dir":"","previous_headings":"","what":"Citation","title":"Authors and Citation","text":"Carl S, Baldwin B (2024). nflfastR: Functions Efficiently Access NFL Play Play Data. R package version 4.6.1.9011, https://github.com/nflverse/nflfastR, https://www.nflfastr.com/.","code":"@Manual{, title = {nflfastR: Functions to Efficiently Access NFL Play by Play Data}, author = {Sebastian Carl and Ben Baldwin}, year = {2024}, note = {R package version 4.6.1.9011, https://github.com/nflverse/nflfastR}, url = {https://www.nflfastr.com/}, }"},{"path":"https://www.nflfastr.com/index.html","id":"nflfastr-","dir":"","previous_headings":"","what":"An R package to quickly obtain clean and tidy NFL play by play data","title":"An R package to quickly obtain clean and tidy NFL play by play data","text":"nflfastR set functions efficiently scrape NFL play--play data. nflfastR expands upon features nflscrapR: package contains NFL play--play data back 1999 suggested package name, obtains games much faster Includes completion probability (cp), completion percentage expected (cpoe), expected yards catch (xyac_epa xyac_mean_yardage) play--play going back 2006 Includes drive information, including drive starting position drive result Includes series information, including series number series success Hosts repository play--play data going back 1999 quick access Features models Expected Points, Win Probability, Completion Probability, Yards Catch (see section ) Includes function update_db() creates updates database owe debt gratitude original nflscrapR team, Maksim Horowitz, Ronald Yurko, Samuel Ventura, without whose contributions inspiration package exist.","code":""},{"path":"https://www.nflfastr.com/index.html","id":"installation","dir":"","previous_headings":"","what":"Installation","title":"An R package to quickly obtain clean and tidy NFL play by play data","text":"easiest way get nflfastR install CRAN : get bug fix use feature development version, can install development version nflfastR either GitHub : prebuilt development repo :","code":"install.packages(\"nflfastR\") if (!require(\"pak\")) install.packages(\"pak\") pak::pak(\"nflverse/nflfastR\") install.packages(\"nflfastR\", repos = c(\"https://nflverse.r-universe.dev\", getOption(\"repos\")))"},{"path":"https://www.nflfastr.com/index.html","id":"usage","dir":"","previous_headings":"","what":"Usage","title":"An R package to quickly obtain clean and tidy NFL play by play data","text":"provided application examples Getting Started article. However, require basic knowledge R. reason nflfastR beginner’s guide, recommend looking introduction nflfastR R. can find column names descriptions Field Descriptions article, accessing field_descriptions dataframe package.","code":""},{"path":"https://www.nflfastr.com/index.html","id":"data-access","dir":"","previous_headings":"","what":"Data access","title":"An R package to quickly obtain clean and tidy NFL play by play data","text":"Even though nflfastR fast, recommend downloading data using nflreadr package. data sets include play--play data complete seasons going back 1999 updated nightly season. files contain regular season postseason data, one can use game_type week figure games occurred postseason.","code":""},{"path":"https://www.nflfastr.com/index.html","id":"nflfastr-models","dir":"","previous_headings":"","what":"nflfastR models","title":"An R package to quickly obtain clean and tidy NFL play by play data","text":"nflfastR uses models Expected Points, Win Probability, Completion Probability, Expected Yards Catch. read models, please see post Open Source Football. detailed description motivation Expected Points models, highly recommend paper nflscrapR team located . visualization Expected Points model yardline. visualization Completion Probability model air yards pass direction. nflfastR includes two win probability models: one one without incorporating pre-game spread.","code":""},{"path":"https://www.nflfastr.com/index.html","id":"special-thanks","dir":"","previous_headings":"","what":"Special thanks","title":"An R package to quickly obtain clean and tidy NFL play by play data","text":"Nick Shoemaker finding making available JSON-formatted NFL play--play back 1999 (nflfastR uses source 1999 2000 previously also used 2001-2010) Lau Sze Yui developing scraping function access JSON-formatted NFL play--play beginning 2001 Aaron Schatz FTN Fantasy providing charting data correctly mark scrambles 1999-2005 seasons Lee Sharpe curating resource game information Timo Riske, Lau Sze Yui, Sean Clement, Daniel Houston many helpful discussions regarding development new nflfastR models Zach Feldman Josh Hermsmeyer many helpful discussions CPOE models well Peter Owen many helpful suggestions CP model Florian Schmitt logo design many users found reported bugs nflfastR 1.0 course, original nflscrapR team, Maksim Horowitz, Ronald Yurko, Samuel Ventura, whose work represented dramatic step forward state public NFL research","code":""},{"path":"https://www.nflfastr.com/reference/add_qb_epa.html","id":null,"dir":"Reference","previous_headings":"","what":"Compute QB epa — add_qb_epa","title":"Compute QB epa — add_qb_epa","text":"Compute QB epa","code":""},{"path":"https://www.nflfastr.com/reference/add_qb_epa.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Compute QB epa — add_qb_epa","text":"","code":"add_qb_epa(pbp, ...)"},{"path":"https://www.nflfastr.com/reference/add_qb_epa.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Compute QB epa — add_qb_epa","text":"pbp Data frame play--play data scraped using fast_scraper(). ... Additional arguments passed message function (internal use).","code":""},{"path":"https://www.nflfastr.com/reference/add_qb_epa.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Compute QB epa — add_qb_epa","text":"Add variable 'qb_epa', gives QB credit EPA point receiver lost fumble completed catch makes EPA work like passing yards plays fumbles","code":""},{"path":"https://www.nflfastr.com/reference/add_xpass.html","id":null,"dir":"Reference","previous_headings":"","what":"Add expected pass columns — add_xpass","title":"Add expected pass columns — add_xpass","text":"Build columns expected dropback model. return NA data prior 2006 since NFL started marking scrambles. Must run dataframe already clean_pbp() run . Note functions build_nflfastR_pbp() database function update_db() already include function.","code":""},{"path":"https://www.nflfastr.com/reference/add_xpass.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Add expected pass columns — add_xpass","text":"","code":"add_xpass(pbp, ...)"},{"path":"https://www.nflfastr.com/reference/add_xpass.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Add expected pass columns — add_xpass","text":"pbp Data frame play--play data scraped using fast_scraper(). ... Additional arguments passed message function (internal use).","code":""},{"path":"https://www.nflfastr.com/reference/add_xpass.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Add expected pass columns — add_xpass","text":"input Data Frame parameter pbp following columns added: xpass Probability dropback scaled 0 1. pass_oe Dropback percent expected given play scaled 0 100.","code":""},{"path":"https://www.nflfastr.com/reference/add_xyac.html","id":null,"dir":"Reference","previous_headings":"","what":"Add expected yards after completion (xyac) variables — add_xyac","title":"Add expected yards after completion (xyac) variables — add_xyac","text":"Add expected yards completion (xyac) variables","code":""},{"path":"https://www.nflfastr.com/reference/add_xyac.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Add expected yards after completion (xyac) variables — add_xyac","text":"","code":"add_xyac(pbp, ...)"},{"path":"https://www.nflfastr.com/reference/add_xyac.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Add expected yards after completion (xyac) variables — add_xyac","text":"pbp Data frame play--play data scraped using fast_scraper(). ... Additional arguments passed message function (internal use).","code":""},{"path":"https://www.nflfastr.com/reference/add_xyac.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Add expected yards after completion (xyac) variables — add_xyac","text":"input Data Frame parameter 'pbp' following columns added: xyac_epa Expected value EPA gained catch, starting catch made. Zero yards catch listed zero EPA. xyac_success Probability play earns positive EPA (relative play started) based ball caught. xyac_fd Probability play earns first based ball caught. xyac_mean_yardage Average expected yards catch based ball caught. xyac_median_yardage Median expected yards catch based ball caught.","code":""},{"path":"https://www.nflfastr.com/reference/add_xyac.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Add expected yards after completion (xyac) variables — add_xyac","text":"Build columns capture expect catch.","code":""},{"path":"https://www.nflfastr.com/reference/build_nflfastR_pbp.html","id":null,"dir":"Reference","previous_headings":"","what":"Build a Complete nflfastR Data Set — build_nflfastR_pbp","title":"Build a Complete nflfastR Data Set — build_nflfastR_pbp","text":"build_nflfastR_pbp convenient wrapper around 6 nflfastR functions: fast_scraper() clean_pbp() add_qb_epa() add_xyac() add_xpass() decode_player_ids() Please see either documentation function nflfastR Field Descriptions website learn output.","code":""},{"path":"https://www.nflfastr.com/reference/build_nflfastR_pbp.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Build a Complete nflfastR Data Set — build_nflfastR_pbp","text":"","code":"build_nflfastR_pbp( game_ids, dir = getOption(\"nflfastR.raw_directory\", default = NULL), ..., decode = TRUE, rules = TRUE )"},{"path":"https://www.nflfastr.com/reference/build_nflfastR_pbp.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Build a Complete nflfastR Data Set — build_nflfastR_pbp","text":"game_ids Vector character ids data frame including variable game_id (see details information). dir Path local directory (defaults option \"nflfastR.raw_directory\") nflfastR searches raw game play--play data. See save_raw_pbp() additional information. ... Additional arguments passed scraping functions (internal use) decode TRUE, function decode_player_ids() executed. rules FALSE, printing header footer console output suppressed.","code":""},{"path":"https://www.nflfastr.com/reference/build_nflfastR_pbp.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Build a Complete nflfastR Data Set — build_nflfastR_pbp","text":"nflfastR play--play data frame like can loaded https://github.com/nflverse/nflverse-data.","code":""},{"path":"https://www.nflfastr.com/reference/build_nflfastR_pbp.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Build a Complete nflfastR Data Set — build_nflfastR_pbp","text":"load valid game_ids please use package function fast_scraper_schedules().","code":""},{"path":[]},{"path":"https://www.nflfastr.com/reference/build_nflfastR_pbp.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Build a Complete nflfastR Data Set — build_nflfastR_pbp","text":"","code":"# \\donttest{ # Build nflfastR pbp for the 2018 and 2019 Super Bowls try({# to avoid CRAN test problems build_nflfastR_pbp(c(\"2018_21_NE_LA\", \"2019_21_SF_KC\")) }) #> ── Build nflfastR Play-by-Play Data ───────────── nflfastR version 4.6.1.9011 ── #> • 17:32:50 | Start download of 2 games... #> ℹ It is recommended to use parallel processing when trying to load multiple games.Please consider running `future::plan(\"multisession\")`! Will go on sequentially... #> ✔ 17:32:52 | Download finished. Adding variables... #> ✔ 17:32:53 | added game variables #> ✔ 17:32:53 | added nflscrapR variables #> ✔ 17:32:54 | added ep variables #> ✔ 17:32:54 | added air_yac_ep variables #> ✔ 17:32:54 | added wp variables #> ✔ 17:32:54 | added air_yac_wp variables #> ✔ 17:32:54 | added cp and cpoe #> ✔ 17:32:54 | added fixed drive variables #> ✔ 17:32:54 | added series variables #> • 17:32:54 | Cleaning up play-by-play... #> ✔ 17:32:54 | Cleaning completed #> ✔ 17:32:55 | added qb_epa #> • 17:32:55 | Computing xyac... #> ✔ 17:32:57 | added xyac variables #> • 17:32:57 | Computing xpass... #> ✔ 17:32:57 | added xpass and pass_oe #> • 17:32:57 | Decode player ids... #> ✔ 17:32:58 | Decoding of player ids completed #> ── DONE ──────────────────────────────────────────────────────────────────────── #> ── nflverse play by play ─────────────────────────────────────────────────────── #> ℹ Data updated: 2024-08-02 17:32:58 UTC #> # A tibble: 349 × 372 #> play_id game_id old_game_id home_team away_team season_type week posteam #> #> 1 1 2018_21_NE… 2019020300 LA NE POST 21 NA #> 2 38 2018_21_NE… 2019020300 LA NE POST 21 NE #> 3 67 2018_21_NE… 2019020300 LA NE POST 21 NE #> 4 89 2018_21_NE… 2019020300 LA NE POST 21 NE #> 5 111 2018_21_NE… 2019020300 LA NE POST 21 NE #> 6 133 2018_21_NE… 2019020300 LA NE POST 21 NE #> 7 155 2018_21_NE… 2019020300 LA NE POST 21 NE #> 8 182 2018_21_NE… 2019020300 LA NE POST 21 LA #> 9 204 2018_21_NE… 2019020300 LA NE POST 21 LA #> 10 226 2018_21_NE… 2019020300 LA NE POST 21 LA #> # ℹ 339 more rows #> # ℹ 364 more variables: posteam_type , defteam , side_of_field , #> # yardline_100 , game_date , quarter_seconds_remaining , #> # half_seconds_remaining , game_seconds_remaining , #> # game_half , quarter_end , drive , sp , qtr , #> # down , goal_to_go , time , yrdln , ydstogo , #> # ydsnet , desc , play_type , yards_gained , … # It is also possible to directly use the # output of `fast_scraper_schedules` as input try({# to avoid CRAN test problems library(dplyr, warn.conflicts = FALSE) fast_scraper_schedules(2020) %>% slice_tail(n = 3) %>% build_nflfastR_pbp() }) #> ── Build nflfastR Play-by-Play Data ───────────── nflfastR version 4.6.1.9011 ── #> • 17:32:58 | Start download of 3 games... #> ℹ It is recommended to use parallel processing when trying to load multiple games.Please consider running `future::plan(\"multisession\")`! Will go on sequentially... #> ✔ 17:33:01 | Download finished. Adding variables... #> ✔ 17:33:01 | added game variables #> ✔ 17:33:01 | added nflscrapR variables #> ✔ 17:33:01 | added ep variables #> ✔ 17:33:01 | added air_yac_ep variables #> ✔ 17:33:02 | added wp variables #> ✔ 17:33:02 | added air_yac_wp variables #> ✔ 17:33:02 | added cp and cpoe #> ✔ 17:33:02 | added fixed drive variables #> ✔ 17:33:02 | added series variables #> • 17:33:02 | Cleaning up play-by-play... #> ✔ 17:33:02 | Cleaning completed #> ✔ 17:33:02 | added qb_epa #> • 17:33:02 | Computing xyac... #> ✔ 17:33:03 | added xyac variables #> • 17:33:03 | Computing xpass... #> ✔ 17:33:03 | added xpass and pass_oe #> • 17:33:03 | Decode player ids... #> ✔ 17:33:03 | Decoding of player ids completed #> ── DONE ──────────────────────────────────────────────────────────────────────── #> ── nflverse play by play ─────────────────────────────────────────────────────── #> ℹ Data updated: 2024-08-02 17:33:03 UTC #> # A tibble: 539 × 372 #> play_id game_id old_game_id home_team away_team season_type week posteam #> #> 1 1 2020_20_BU… 2021012401 KC BUF POST 20 NA #> 2 42 2020_20_BU… 2021012401 KC BUF POST 20 BUF #> 3 57 2020_20_BU… 2021012401 KC BUF POST 20 BUF #> 4 78 2020_20_BU… 2021012401 KC BUF POST 20 BUF #> 5 102 2020_20_BU… 2021012401 KC BUF POST 20 BUF #> 6 123 2020_20_BU… 2021012401 KC BUF POST 20 BUF #> 7 145 2020_20_BU… 2021012401 KC BUF POST 20 BUF #> 8 174 2020_20_BU… 2021012401 KC BUF POST 20 BUF #> 9 207 2020_20_BU… 2021012401 KC BUF POST 20 BUF #> 10 236 2020_20_BU… 2021012401 KC BUF POST 20 BUF #> # ℹ 529 more rows #> # ℹ 364 more variables: posteam_type , defteam , side_of_field , #> # yardline_100 , game_date , quarter_seconds_remaining , #> # half_seconds_remaining , game_seconds_remaining , #> # game_half , quarter_end , drive , sp , qtr , #> # down , goal_to_go , time , yrdln , ydstogo , #> # ydsnet , desc
load_teams() #> ── nflverse team graphics ────────────────────────────────────────────────────── -#> ℹ Data updated: 2024-08-02 17:34:42 UTC +#> ℹ Data updated: 2024-08-12 18:41:37 UTC #> # A tibble: 32 × 16 #> team_abbr team_name team_id team_nick team_conf team_division team_color #> <chr> <chr> <chr> <chr> <chr> <chr> <chr> @@ -918,7 +918,7 @@ Get team wins each season#> $ stadium_id : chr [1:6978] "ATL00" "CHI98" "CLE00" "GNB00" ... #> $ stadium : chr [1:6978] "Georgia Dome" "Soldier Field" "Cleveland Browns Stadium" "Lambeau Field" ... #> - attr(*, "nflverse_type")= chr "games and schedules" -#> - attr(*, "nflverse_timestamp")= POSIXct[1:1], format: "2024-08-02 17:35:13"
To start, we want to create a dataframe where each row is a team-season observation, listing how many games they won. There are multiple ways to do this, but I’m going to just take the home and away @@ -931,7 +931,7 @@
In most cases, however, it is not necessary to use this function for individual games, because nflfastR provides both a data repository and @@ -359,7 +359,7 @@
tictoc::tic("loading all games from 2009") games_2009 <- nflfastR::load_pbp(2009) %>% dplyr::filter(season_type == "REG") tictoc::toc() -#> loading all games from 2009: 2.198 sec elapsed +#> loading all games from 2009: 2.088 sec elapsed games_2009 %>% dplyr::filter(!is.na(cpoe)) %>% dplyr::group_by(passer_player_name) %>% @@ -864,15 +864,15 @@ Build database nflfastR::update_db() -#> ── Update nflfastR Play-by-Play Database ──────── nflfastR version 4.6.1.9011 ── -#> ℹ 17:37:20 | Can't find the data table "nflfastR_pbp" +#> ── Update nflfastR Play-by-Play Database ──────── nflfastR version 4.6.1.9012 ── +#> ℹ 18:44:02 | Can't find the data table "nflfastR_pbp" #> in your database. Will load the play by play data from #> scratch. -#> • 17:37:20 | Starting download of 25 seasons between 1999 and 2023... -#> • 17:38:45 | Checking for missing completed games... -#> ℹ 17:38:47 | You have 6703 games and are missing 0. -#> ✔ 17:38:47 | Database update completed -#> ℹ 17:38:47 | Path to your db: ./pbp_db +#> • 18:44:02 | Starting download of 25 seasons between 1999 and 2023... +#> • 18:45:21 | Checking for missing completed games... +#> ℹ 18:45:22 | You have 6703 games and are missing 0. +#> ✔ 18:45:23 | Database update completed +#> ℹ 18:45:23 | Path to your db: ./pbp_db #> ── DONE ──────────────────────────────────────────────────────────────────────── This created a database in the current directory called pbp_db. @@ -883,25 +883,25 @@ Build database nflfastR::update_db() -#> ── Update nflfastR Play-by-Play Database ──────── nflfastR version 4.6.1.9011 ── -#> • 17:38:47 | Checking for missing completed games... -#> ℹ 17:38:48 | You have 6703 games and are missing 0. -#> ✔ 17:38:49 | Database update completed -#> ℹ 17:38:49 | Path to your db: /home/runner/work/nflfastR/nflfastR/vignettes/pbp_db +#> ── Update nflfastR Play-by-Play Database ──────── nflfastR version 4.6.1.9012 ── +#> • 18:45:23 | Checking for missing completed games... +#> ℹ 18:45:23 | You have 6703 games and are missing 0. +#> ✔ 18:45:24 | Database update completed +#> ℹ 18:45:24 | Path to your db: /home/runner/work/nflfastR/nflfastR/vignettes/pbp_db #> ── DONE ──────────────────────────────────────────────────────────────────────── If it’s partway through a season and you want to re-build a season to allow for data corrections from the NFL to propagate into your database, you can specify one season to be rebuilt: nflfastR::update_db(force_rebuild = 2020) -#> ── Update nflfastR Play-by-Play Database ──────── nflfastR version 4.6.1.9011 ── -#> • 17:38:49 | Purging season 2020 from the data table "nflfastR_pbp" in your +#> ── Update nflfastR Play-by-Play Database ──────── nflfastR version 4.6.1.9012 ── +#> • 18:45:24 | Purging season 2020 from the data table "nflfastR_pbp" in your #> connected database... -#> • 17:38:50 | Starting download of the 1 season 2020 -#> • 17:38:53 | Checking for missing completed games... -#> ℹ 17:38:54 | You have 6703 games and are missing 0. -#> ✔ 17:38:54 | Database update completed -#> ℹ 17:38:54 | Path to your db: /home/runner/work/nflfastR/nflfastR/vignettes/pbp_db +#> • 18:45:25 | Starting download of the 1 season 2020 +#> • 18:45:28 | Checking for missing completed games... +#> ℹ 18:45:28 | You have 6703 games and are missing 0. +#> ✔ 18:45:29 | Database update completed +#> ℹ 18:45:29 | Path to your db: /home/runner/work/nflfastR/nflfastR/vignettes/pbp_db #> ── DONE ────────────────────────────────────────────────────────────────────────
nflfastR::update_db() -#> ── Update nflfastR Play-by-Play Database ──────── nflfastR version 4.6.1.9011 ── -#> ℹ 17:37:20 | Can't find the data table "nflfastR_pbp" +#> ── Update nflfastR Play-by-Play Database ──────── nflfastR version 4.6.1.9012 ── +#> ℹ 18:44:02 | Can't find the data table "nflfastR_pbp" #> in your database. Will load the play by play data from #> scratch. -#> • 17:37:20 | Starting download of 25 seasons between 1999 and 2023... -#> • 17:38:45 | Checking for missing completed games... -#> ℹ 17:38:47 | You have 6703 games and are missing 0. -#> ✔ 17:38:47 | Database update completed -#> ℹ 17:38:47 | Path to your db: ./pbp_db +#> • 18:44:02 | Starting download of 25 seasons between 1999 and 2023... +#> • 18:45:21 | Checking for missing completed games... +#> ℹ 18:45:22 | You have 6703 games and are missing 0. +#> ✔ 18:45:23 | Database update completed +#> ℹ 18:45:23 | Path to your db: ./pbp_db #> ── DONE ────────────────────────────────────────────────────────────────────────
This created a database in the current directory called pbp_db.
pbp_db
nflfastR::update_db() -#> ── Update nflfastR Play-by-Play Database ──────── nflfastR version 4.6.1.9011 ── -#> • 17:38:47 | Checking for missing completed games... -#> ℹ 17:38:48 | You have 6703 games and are missing 0. -#> ✔ 17:38:49 | Database update completed -#> ℹ 17:38:49 | Path to your db: /home/runner/work/nflfastR/nflfastR/vignettes/pbp_db +#> ── Update nflfastR Play-by-Play Database ──────── nflfastR version 4.6.1.9012 ── +#> • 18:45:23 | Checking for missing completed games... +#> ℹ 18:45:23 | You have 6703 games and are missing 0. +#> ✔ 18:45:24 | Database update completed +#> ℹ 18:45:24 | Path to your db: /home/runner/work/nflfastR/nflfastR/vignettes/pbp_db #> ── DONE ────────────────────────────────────────────────────────────────────────
If it’s partway through a season and you want to re-build a season to allow for data corrections from the NFL to propagate into your database, you can specify one season to be rebuilt:
nflfastR::update_db(force_rebuild = 2020) -#> ── Update nflfastR Play-by-Play Database ──────── nflfastR version 4.6.1.9011 ── -#> • 17:38:49 | Purging season 2020 from the data table "nflfastR_pbp" in your +#> ── Update nflfastR Play-by-Play Database ──────── nflfastR version 4.6.1.9012 ── +#> • 18:45:24 | Purging season 2020 from the data table "nflfastR_pbp" in your #> connected database... -#> • 17:38:50 | Starting download of the 1 season 2020 -#> • 17:38:53 | Checking for missing completed games... -#> ℹ 17:38:54 | You have 6703 games and are missing 0. -#> ✔ 17:38:54 | Database update completed -#> ℹ 17:38:54 | Path to your db: /home/runner/work/nflfastR/nflfastR/vignettes/pbp_db +#> • 18:45:25 | Starting download of the 1 season 2020 +#> • 18:45:28 | Checking for missing completed games... +#> ℹ 18:45:28 | You have 6703 games and are missing 0. +#> ✔ 18:45:29 | Database update completed +#> ℹ 18:45:29 | Path to your db: /home/runner/work/nflfastR/nflfastR/vignettes/pbp_db #> ── DONE ────────────────────────────────────────────────────────────────────────
Carl S, Baldwin B (2024). nflfastR: Functions to Efficiently Access NFL Play by Play Data. -R package version 4.6.1.9011, +R package version 4.6.1.9012, https://github.com/nflverse/nflfastR, https://www.nflfastr.com/.
@Manual{, title = {nflfastR: Functions to Efficiently Access NFL Play by Play Data}, author = {Sebastian Carl and Ben Baldwin}, year = {2024}, - note = {R package version 4.6.1.9011, + note = {R package version 4.6.1.9012, https://github.com/nflverse/nflfastR}, url = {https://www.nflfastr.com/}, }
clean_pbp()
pass = 1
CRAN release: 2024-01-09