diff --git a/DESCRIPTION b/DESCRIPTION index 3339a9c9..1b683ec8 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: worldfootballR Title: Extract and Clean World Football (Soccer) Data -Version: 0.6.2.9100 +Version: 0.6.2.9200 Authors@R: c( person("Jason", "Zivkovic", , "jaseziv83@gmail.com", role = c("aut", "cre", "cph")), person("Tony", "ElHabr", , "anthonyelhabr@gmail.com", role = "ctb"), diff --git a/NAMESPACE b/NAMESPACE index ea078f9d..b5775aa7 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -97,7 +97,9 @@ importFrom(glue,glue) importFrom(glue,glue_collapse) importFrom(httr,GET) importFrom(httr,POST) +importFrom(httr,build_url) importFrom(httr,content) +importFrom(httr,parse_url) importFrom(httr,set_cookies) importFrom(janitor,clean_names) importFrom(janitor,make_clean_names) diff --git a/NEWS.md b/NEWS.md index ad96512e..a178cabf 100644 --- a/NEWS.md +++ b/NEWS.md @@ -23,6 +23,7 @@ * `fb_league_stats()` uses `rvest::htm_table()` if `team_or_player = "table"` (faster and more reliable), and only uses chromote if `team_or_player = "player"`. (0.6.2.7100) * Use `quiet = FALSE` in all `purrr::possibly()` calls internally. Improve messaging for unexpected outcomes in `fotmob_get_matches_by_date()` and `fotmob_get_match_info()`. (0.6.2.8000) [#244](https://github.com/JaseZiv/worldfootballR/pull/244) * `load_fb_match_shooting()` added. (0.6.2.9000) [#249](https://github.com/JaseZiv/worldfootballR/pull/249) +* `fotmob_get_league_matches()` and `fotmob_get_league_tables` gain a `season` parameter. (0.6.2.9200) [#256](https://github.com/JaseZiv/worldfootballR/pull/256) # worldfootballR 0.6.2 diff --git a/R/fotmob_leagues.R b/R/fotmob_leagues.R index a3df6ac3..fd519090 100644 --- a/R/fotmob_leagues.R +++ b/R/fotmob_leagues.R @@ -153,8 +153,16 @@ fotmob_get_league_ids <- function(cached = TRUE) { } #' @importFrom purrr safely -.fotmob_get_league_resp <- function(league_id, page_url, fallback = TRUE) { - url <- sprintf("https://www.fotmob.com/api/leagues?id=%s", league_id) +#' @importFrom httr parse_url build_url +#' @importFrom rlang inform +#' @importFrom glue glue +.fotmob_get_league_resp <- function(league_id, page_url, season = NULL, fallback = TRUE) { + url <- httr::parse_url("https://www.fotmob.com/api/leagues") + url$query <- list( + "id" = league_id, + "season" = season + ) + url <- httr::build_url(url) resp <- safely_from_json(url) if(!is.null(resp$result)) { return(resp$result) @@ -162,6 +170,11 @@ fotmob_get_league_ids <- function(cached = TRUE) { first_url <- url if(fallback) { + if (!is.null(season)) { + rlang::inform( + glue::glue('`season` ignored in call to "{page_url}".') + ) + } resp <- .fotmob_get_league_resp_from_build_id(page_url) if(!is.null(resp$result)) { return(resp$result) @@ -175,18 +188,20 @@ fotmob_get_league_ids <- function(cached = TRUE) { #' Get fotmob match results by league #' -#' Returns match results for all matches played on the selected date from fotmob.com. +#' Returns match status given a league and season #' #' @param country Three character country code. Can be one or multiple. If provided, `league_name` must also be provided (of the same length) #' @param league_name League names. If provided, `country` must also be provided (of the same length). #' @param league_id Fotmob ID for the league. Only used if `country` and `league_name` are not specified. +#' @param season Season, e.g. `"2021/2022"`. Can be one or multiple. If left as `NULL` (default), data for the latest season available will be pulled. #' @inheritParams fotmob_get_league_ids #' #' @return returns a dataframe of league matches #' -#' @importFrom purrr possibly map2_dfr +#' @importFrom purrr possibly pmap_dfr #' @importFrom tibble tibble #' @importFrom rlang maybe_missing +#' @importFrom tidyr crossing #' #' @export #' @@ -207,6 +222,13 @@ fotmob_get_league_ids <- function(cached = TRUE) { #' league_id = 47 #' ) #' +#' # can specify past seasons +#' fotmob_get_league_matches( +#' country = "GER", +#' league_name = "1. Bundesliga", +#' season = "2020/2021" +#' ) +#' #' # multiple leagues (could also use ids) #' league_matches <- fotmob_get_league_matches( #' country = c("ENG", "ESP" ), @@ -219,7 +241,7 @@ fotmob_get_league_ids <- function(cached = TRUE) { #' tidyr::unnest_wider(c(home, away), names_sep = "_") #' }) #' } -fotmob_get_league_matches <- function(country, league_name, league_id, cached = TRUE) { +fotmob_get_league_matches <- function(country, league_name, league_id, season = NULL, cached = TRUE) { urls <- .fotmob_get_league_ids( cached = cached, @@ -228,28 +250,64 @@ fotmob_get_league_matches <- function(country, league_name, league_id, cached = league_id = rlang::maybe_missing(league_id, NULL) ) - fp <- purrr::possibly( - .fotmob_get_league_matches, - quiet = FALSE, - otherwise = tibble::tibble() + # Need to coerce to `NA_character` since crossing doesn't like `NULL` + season <- ifelse(is.null(season), NA_character_, season) + urls <- tidyr::crossing( + urls, + "season" = season ) - purrr::map2_dfr( - urls$id, urls$page_url, + + purrr::pmap_dfr( + list( + urls$id, + urls$page_url, + urls$season + ), .fotmob_get_league_matches ) } +#' @importFrom glue glue glue_collapse +#' @importFrom rlang inform +.fotmob_message_for_season <- function(resp, season = NULL) { + + if (is.null(season)) { + rlang::inform( + glue::glue('Defaulting `season` to latest ("{resp$allAvailableSeasons[1]}").'), + .frequency = "once", + .frequency_id = ".fotmob_get_league_(matches|tables)" + ) + } else { + if (isFALSE(season %in% resp$allAvailableSeasons)) { + stop( + glue::glue( + "`season` should be one of the following:\n{glue::glue_collapse(resp$allAvailableSeasons, '\n')}" + ) + ) + } + } + +} + + #' @importFrom janitor clean_names #' @importFrom tibble as_tibble #' @importFrom purrr map_dfr #' @importFrom dplyr bind_rows -.fotmob_get_league_matches <- function(league_id, page_url) { - resp <- .fotmob_get_league_resp(league_id, page_url) +.fotmob_get_league_matches <- function(league_id, page_url, season = NULL) { + # And now coerce NA back to NULL + season <- switch(!is.na(season), season, NULL) + resp <- .fotmob_get_league_resp( + league_id = league_id, + page_url = page_url, + season = season + ) + .fotmob_message_for_season(resp, season) rounds <- resp$matches$data$matchesCombinedByRound purrr::map_dfr( rounds, ~dplyr::mutate(.x, dplyr::across(.data[["roundName"]], as.character)), - ) %>% + ) %>% janitor::clean_names() %>% tibble::as_tibble() } @@ -265,6 +323,7 @@ fotmob_get_league_matches <- function(country, league_name, league_id, cached = #' @importFrom purrr possibly map2_dfr #' @importFrom tibble tibble #' @importFrom rlang maybe_missing +#' @importFrom tidyr crossing #' #' @export #' @@ -285,6 +344,13 @@ fotmob_get_league_matches <- function(country, league_name, league_id, cached = #' league_id = 47 #' ) #' +#' # one league, past season +#' fotmob_get_league_tables( +#' country = "GER", +#' league_name = "1. Bundesliga", +#' season = "2020/2021" +#' ) +#' #' # multiple leagues (could also use ids) #' league_tables <- fotmob_get_league_tables( #' country = c("ENG", "ESP" ), @@ -296,7 +362,7 @@ fotmob_get_league_matches <- function(country, league_name, league_id, cached = #' dplyr::filter(table_type == "away") #' }) #' } -fotmob_get_league_tables <- function(country, league_name, league_id, cached = TRUE) { +fotmob_get_league_tables <- function(country, league_name, league_id, season = NULL, cached = TRUE) { urls <- .fotmob_get_league_ids( cached = cached, @@ -305,14 +371,19 @@ fotmob_get_league_tables <- function(country, league_name, league_id, cached = T league_id = rlang::maybe_missing(league_id, NULL) ) - fp <- purrr::possibly( - .fotmob_get_league_tables, - quiet = FALSE, - otherwise = tibble::tibble() + season <- ifelse(is.null(season), NA_character_, season) + urls <- tidyr::crossing( + urls, + "season" = season ) - purrr::map2_dfr( - urls$id, urls$page_url, - fp + + purrr::pmap_dfr( + list( + urls$id, + urls$page_url, + urls$season + ), + .fotmob_get_league_tables ) } @@ -321,9 +392,20 @@ fotmob_get_league_tables <- function(country, league_name, league_id, cached = T #' @importFrom rlang .data #' @importFrom dplyr select all_of bind_rows rename mutate #' @importFrom tidyr pivot_longer unnest_longer unnest -.fotmob_get_league_tables <- function(league_id, page_url) { - resp <- .fotmob_get_league_resp(league_id, page_url) +.fotmob_get_league_tables <- function(league_id, page_url, season = NULL) { + + season <- switch(!is.na(season), season, NULL) + resp <- .fotmob_get_league_resp( + league_id = league_id, + page_url = page_url, + season = season + ) + .fotmob_message_for_season(resp, season) + table_init <- resp$table$data + # TODO: + # - Use purrr::flatten_chr(resp$table$data$tableFilterTypes) instead of hard-coding `cols`? + # - Extract "form" as well? cols <- c("all", "home", "away") table <- if("table" %in% names(table_init)) { table_init$table %>% dplyr::select(dplyr::all_of(cols)) diff --git a/man/fotmob_get_league_matches.Rd b/man/fotmob_get_league_matches.Rd index 8d581f59..539ba676 100644 --- a/man/fotmob_get_league_matches.Rd +++ b/man/fotmob_get_league_matches.Rd @@ -4,7 +4,13 @@ \alias{fotmob_get_league_matches} \title{Get fotmob match results by league} \usage{ -fotmob_get_league_matches(country, league_name, league_id, cached = TRUE) +fotmob_get_league_matches( + country, + league_name, + league_id, + season = NULL, + cached = TRUE +) } \arguments{ \item{country}{Three character country code. Can be one or multiple. If provided, `league_name` must also be provided (of the same length)} @@ -13,13 +19,15 @@ fotmob_get_league_matches(country, league_name, league_id, cached = TRUE) \item{league_id}{Fotmob ID for the league. Only used if `country` and `league_name` are not specified.} +\item{season}{Season, e.g. `"2021/2022"`. Can be one or multiple. If left as `NULL` (default), data for the latest season available will be pulled.} + \item{cached}{Whether to load the dataframe from the \href{https://github.com/JaseZiv/worldfootballR_data/blob/master/raw-data/fotmob-leagues/all_leagues.csv}{data CSV}. This is faster and most likely what you want to do, unless you identify a league that's being tracked by fotmob that's not in this pre-saved CSV.} } \value{ returns a dataframe of league matches } \description{ -Returns match results for all matches played on the selected date from fotmob.com. +Returns match status given a league and season } \examples{ \donttest{ @@ -38,6 +46,13 @@ fotmob_get_league_matches( league_id = 47 ) +# can specify past seasons +fotmob_get_league_matches( + country = "GER", + league_name = "1. Bundesliga", + season = "2020/2021" +) + # multiple leagues (could also use ids) league_matches <- fotmob_get_league_matches( country = c("ENG", "ESP" ), diff --git a/man/fotmob_get_league_tables.Rd b/man/fotmob_get_league_tables.Rd index d5cafb74..013889a8 100644 --- a/man/fotmob_get_league_tables.Rd +++ b/man/fotmob_get_league_tables.Rd @@ -4,7 +4,13 @@ \alias{fotmob_get_league_tables} \title{Get standings from fotmob} \usage{ -fotmob_get_league_tables(country, league_name, league_id, cached = TRUE) +fotmob_get_league_tables( + country, + league_name, + league_id, + season = NULL, + cached = TRUE +) } \arguments{ \item{country}{Three character country code. Can be one or multiple. If provided, `league_name` must also be provided (of the same length)} @@ -13,6 +19,8 @@ fotmob_get_league_tables(country, league_name, league_id, cached = TRUE) \item{league_id}{Fotmob ID for the league. Only used if `country` and `league_name` are not specified.} +\item{season}{Season, e.g. `"2021/2022"`. Can be one or multiple. If left as `NULL` (default), data for the latest season available will be pulled.} + \item{cached}{Whether to load the dataframe from the \href{https://github.com/JaseZiv/worldfootballR_data/blob/master/raw-data/fotmob-leagues/all_leagues.csv}{data CSV}. This is faster and most likely what you want to do, unless you identify a league that's being tracked by fotmob that's not in this pre-saved CSV.} } \value{ @@ -38,6 +46,13 @@ fotmob_get_league_tables( league_id = 47 ) +# one league, past season +fotmob_get_league_tables( + country = "GER", + league_name = "1. Bundesliga", + season = "2020/2021" +) + # multiple leagues (could also use ids) league_tables <- fotmob_get_league_tables( country = c("ENG", "ESP" ), diff --git a/tests/testthat/test-fotmob.R b/tests/testthat/test-fotmob.R index 11cac107..ad62fd76 100644 --- a/tests/testthat/test-fotmob.R +++ b/tests/testthat/test-fotmob.R @@ -45,6 +45,25 @@ test_that("fotmob_get_league_matches() works", { expect_gt(nrow(epl_league_matches), 0) expect_setequal(colnames(epl_league_matches), expected_league_matches_cols) + epl_league_matches_2021 <- fotmob_get_league_matches( + league_id = 47, + season = "2021/2022" + ) + + expect_gt(nrow(epl_league_matches_2021), 0) + expect_setequal(colnames(epl_league_matches_2021), expected_league_matches_cols) + expect_false( + epl_league_matches$page_url[1] == epl_league_matches_2021$page_url[1] + ) + + expect_error( + fotmob_get_league_matches( + league_id = 47, + season = "2021" + ), + regexp = "`season` should be one of the following" + ) + ## MLS is usually in-season when European leagues are out-of-season, so it's useful ## for checking that stats work in the off-season mls_league_matches <- fotmob_get_league_matches( @@ -139,6 +158,26 @@ test_that("fotmob_get_league_tables() works", { expect_gt(nrow(epl_league_table), 0) expect_setequal(colnames(epl_league_table), expected_domestic_league_table_cols) + ## past season + epl_league_table_2021 <- fotmob_get_league_tables( + league_id = 47, + season = "2021/2022" + ) + + expect_gt(nrow(epl_league_table_2021), 0) + expect_setequal(colnames(epl_league_table_2021), expected_domestic_league_table_cols) + expect_false( + all(epl_league_table_2021$table_scores_str[1:20] == epl_league_table$table_scores_str[1:20]) + ) + + expect_error( + fotmob_get_league_tables( + league_id = 47, + season = "2021" + ), + regexp = "`season` should be one of the following" + ) + ## see not about MLS from before mls_league_table <- fotmob_get_league_tables( league_id = 130 diff --git a/vignettes/extract-fotmob-data.Rmd b/vignettes/extract-fotmob-data.Rmd index 51b1e396..510a1aac 100644 --- a/vignettes/extract-fotmob-data.Rmd +++ b/vignettes/extract-fotmob-data.Rmd @@ -191,7 +191,7 @@ epl_player_xg_2021 %>% ### Match Results -Matches for one or multiple leagues can be retrieved with `fotmob_get_league_matches`. Only the current season's matches can be retrieved with this function. +Matches for one or multiple leagues can be retrieved with `fotmob_get_league_matches`. ```{r fotmob_get_league_matches} league_matches <- fotmob_get_league_matches( @@ -205,6 +205,16 @@ league_matches_unnested <- league_matches %>% dplyr::glimpse(league_matches_unnested) ``` +Data can even be retrieved for past seasons. + +```{r fotmob_get_league_matches-past} +fotmob_get_league_matches( + country = "GER", + league_name = "1. Bundesliga", + season = "2020/2021" +) +``` + fotmob also lists data for all leagues by date. Use `fotmob_get_matches_by_date` to select matches occurring on specific day(s) and filter down to the league(s) that you care about. Since `fotmob_get_league_matches` does not allow for selecting seasons, the only way to get matches from prior seasons is to use `fotmob_get_matches_by_date` ```{r fotmob_get_matches_by_date}