-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathget_superliga_player_stats.py
93 lines (76 loc) · 4.56 KB
/
get_superliga_player_stats.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
#%%
import pickle
import time
import pandas as pd
from selenium import webdriver
from functools import reduce
import scrapers.scrape_player_stats as scraper
#%%
# Setup url and driver
url = 'https://superliga.dk/stats/stats-21-22/'
option = webdriver.ChromeOptions()
option.binary_location = 'C:/Program Files/BraveSoftware/Brave-Browser/Application/brave.exe'
driver = webdriver.Chrome(executable_path='C:/webdrivers/chromedriver.exe', options=option)
# Get player and keeper stats
player_stats, keeper_stats = scraper.get_all_player_stats(driver, url)
# Add gameweek and save as csv
gameweek = input('Enter gameweek: ')
player_stats['gameweek'] = gameweek
keeper_stats['gameweek'] = gameweek
player_stats.to_csv(f'data/player_stats.csv', mode='a', header=False)
keeper_stats.to_csv(f'data/keeper_stats.csv', mode='a', header=False)
#%%
# ------- Secure/stupid way to get all player stats ---------- #
url = 'https://superliga.dk/stats/stats-21-22/'
option = webdriver.ChromeOptions()
option.binary_location = 'C:/Program Files/BraveSoftware/Brave-Browser/Application/brave.exe'
driver = webdriver.Chrome(executable_path='C:/webdrivers/chromedriver.exe', options=option)
player_overall_stats = scraper.get_player_overall_stats(driver, url)
option = webdriver.ChromeOptions()
option.binary_location = 'C:/Program Files/BraveSoftware/Brave-Browser/Application/brave.exe'
driver = webdriver.Chrome(executable_path='C:/webdrivers/chromedriver.exe', options=option)
player_offensive_stats = scraper.get_player_offensive_stats(driver, url)
option = webdriver.ChromeOptions()
option.binary_location = 'C:/Program Files/BraveSoftware/Brave-Browser/Application/brave.exe'
driver = webdriver.Chrome(executable_path='C:/webdrivers/chromedriver.exe', options=option)
player_goals_stats = scraper.get_player_goals_stats(driver, url)
option = webdriver.ChromeOptions()
option.binary_location = 'C:/Program Files/BraveSoftware/Brave-Browser/Application/brave.exe'
driver = webdriver.Chrome(executable_path='C:/webdrivers/chromedriver.exe', options=option)
player_pass_stats = scraper.get_player_pass_stats(driver, url)
option = webdriver.ChromeOptions()
option.binary_location = 'C:/Program Files/BraveSoftware/Brave-Browser/Application/brave.exe'
driver = webdriver.Chrome(executable_path='C:/webdrivers/chromedriver.exe', options=option)
player_defensive_stats = scraper.get_player_defensive_stats(driver, url)
option = webdriver.ChromeOptions()
option.binary_location = 'C:/Program Files/BraveSoftware/Brave-Browser/Application/brave.exe'
driver = webdriver.Chrome(executable_path='C:/webdrivers/chromedriver.exe', options=option)
player_fitness_stats = scraper.get_player_fitness_stats(driver, url)
option = webdriver.ChromeOptions()
option.binary_location = 'C:/Program Files/BraveSoftware/Brave-Browser/Application/brave.exe'
driver = webdriver.Chrome(executable_path='C:/webdrivers/chromedriver.exe', options=option)
player_goalkeeper_stats = scraper.get_player_goalkeeping_stats(driver, url)
#%%
# Data is splitted into outfield players and goalkeepers, as goalkeepers have different stats
dfs_goalkeeper = [player_overall_stats, player_offensive_stats, player_defensive_stats, player_fitness_stats,
player_goals_stats, player_goalkeeper_stats, player_pass_stats]
dfs_players = [player_overall_stats, player_offensive_stats, player_defensive_stats, player_fitness_stats,
player_goals_stats, player_pass_stats]
# Merge outfield and goalkeeper dataframes
df_goalkeeper_final = reduce(lambda left, right: pd.merge(left, right,
on=['player', 'club', 'matches'],
suffixes=['', '_y']), dfs_goalkeeper)
df_player_final = reduce(lambda left, right: pd.merge(left, right,
on=['player', 'club', 'matches'],
suffixes=['', '_y']), dfs_players)
# Remove duplicate columns
df_goalkeeper_final.drop(df_goalkeeper_final.filter(regex='_y$').columns.tolist(), axis=1, inplace=True)
df_player_final.drop(df_player_final.filter(regex='_y$').columns.tolist(), axis=1, inplace=True)
# Remove goalkeepers from player dataframe
df_player_final = df_player_final[~df_player_final['player'].isin(df_goalkeeper_final['player'])]
# add playing round and write to csv
gameweek = input('Enter gameweek: ')
df_player_final['gameweek'] = gameweek
df_goalkeeper_final['gameweek'] = gameweek
df_player_final.to_csv(f'data/player_stats.csv', mode='a', header=False)
df_goalkeeper_final.to_csv(f'data/keeper_stats.csv')