-
Notifications
You must be signed in to change notification settings - Fork 108
/
Copy path11SimulateMatches.py
93 lines (71 loc) · 3.53 KB
/
11SimulateMatches.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
#This code is adapted from
#https://dashee87.github.io/football/python/predicting-football-results-with-statistical-modelling/
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn
from scipy.stats import poisson,skellam
epl = pd.read_csv("http://www.football-data.co.uk/mmz4281/1920/E0.csv")
ep = epl[['HomeTeam','AwayTeam','FTHG','FTAG']]
epl = epl.rename(columns={'FTHG': 'HomeGoals', 'FTAG': 'AwayGoals'})
epl.head()
epl = epl[:-10]
epl.mean()
# importing the tools required for the Poisson regression model
import statsmodels.api as sm
import statsmodels.formula.api as smf
goal_model_data = pd.concat([epl[['HomeTeam','AwayTeam','HomeGoals']].assign(home=1).rename(
columns={'HomeTeam':'team', 'AwayTeam':'opponent','HomeGoals':'goals'}),
epl[['AwayTeam','HomeTeam','AwayGoals']].assign(home=0).rename(
columns={'AwayTeam':'team', 'HomeTeam':'opponent','AwayGoals':'goals'})])
#Fit the model to the data
#Home advantage included
#Team and opponent as fixed effects.
poisson_model = smf.glm(formula="goals ~ home + team + opponent", data=goal_model_data,
family=sm.families.Poisson()).fit()
poisson_model.summary()
home_team='Man City'
away_team='Arsenal'
#Predict for Arsenal vs. Manchester City
home_score_rate=poisson_model.predict(pd.DataFrame(data={'team': home_team, 'opponent': away_team,
'home':1},index=[1]))
away_score_rate=poisson_model.predict(pd.DataFrame(data={'team': away_team, 'opponent': home_team,
'home':1},index=[1]))
print(home_team + ' against ' + away_team + ' expect to score: ' + str(home_score_rate))
print(away_team + ' against ' + home_team + ' expect to score: ' + str(away_score_rate))
#Lets just get a result
home_goals=np.random.poisson(home_score_rate)
away_goals=np.random.poisson(away_score_rate)
print(home_team + ': ' + str(home_goals[0]))
print(away_team + ': ' + str(away_goals[0]))
#Code to caluclate the goals for the match.
def simulate_match(foot_model, homeTeam, awayTeam, max_goals=10):
home_goals_avg = foot_model.predict(pd.DataFrame(data={'team': homeTeam,
'opponent': awayTeam,'home':1},
index=[1])).values[0]
away_goals_avg = foot_model.predict(pd.DataFrame(data={'team': awayTeam,
'opponent': homeTeam,'home':0},
index=[1])).values[0]
team_pred = [[poisson.pmf(i, team_avg) for i in range(0, max_goals+1)] for team_avg in [home_goals_avg, away_goals_avg]]
return(np.outer(np.array(team_pred[0]), np.array(team_pred[1])))
max_goals=5
score_matrix=simulate_match(poisson_model, home_team, away_team,max_goals)
fig=plt.figure()
#Make 2d histogram of results
from pylab import rcParams
rcParams['figure.figsize'] = 12/2.54, 8/2.54
ax=fig.add_subplot(1,1,1)
pos=ax.imshow(score_matrix, extent=[-0.5,max_goals+0.5,-0.5,max_goals+0.5], aspect='auto',cmap=plt.cm.Reds)
fig.colorbar(pos, ax=ax)
ax.set_title('Probability of outcome')
plt.xlim((-0.5,5.5))
plt.ylim((-0.5,5.5))
plt.tight_layout()
ax.set_xlabel('Goals scored by ' + away_team)
ax.set_ylabel('Goals scored by ' + home_team)
plt.show()
fig.savefig('output/2DOutcomes.pdf' , dpi=None, bbox_inches="tight")
#Home, draw, away probabilities
homewin=np.sum(np.tril(score_matrix, -1))
draw=np.sum(np.diag(score_matrix))
awaywin=np.sum(np.triu(score_matrix, 1))