{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import random\n",
    "import os\n",
    "import matplotlib.pyplot as plt\n",
    "from sklearn.metrics import classification_report\n",
    "import lightgbm\n",
    "from sklearn.model_selection import StratifiedKFold, RandomizedSearchCV\n",
    "from sklearn.pipeline import Pipeline\n",
    "from sklearn.metrics import log_loss\n",
    "from sklearn.calibration import calibration_curve\n",
    "from sklearn.calibration import CalibratedClassifierCV\n",
    "from sklearn.preprocessing import KBinsDiscretizer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "dir = \"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {},
   "outputs": [],
   "source": [
    "odds_df = pd.read_csv(dir + \"Code/EDA/stacked_df.csv\")\n",
    "\n",
    "odds_df[\"B365_bookspread\"] = np.where(odds_df[\"B365_bookspread\"]==0,\n",
    "                                      -1*odds_df[\"B365_bookspread\"],\n",
    "                                      odds_df[\"B365_bookspread\"])\n",
    "\n",
    "odds_df =  odds_df[[\"match_id\", \"team_id\", \"B365_bookspread\"]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {},
   "outputs": [],
   "source": [
    "match_df = pd.read_csv(dir + \"Data/match_clean.csv\")\n",
    "\n",
    "match_df = match_df.drop('Unnamed: 0',axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(0.5780346820809249, 0.7462686567164178, 0.29411764705882354)"
      ]
     },
     "execution_count": 65,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "1/1.73, 1/1.34, 1/3.4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>BSH</th>\n",
       "      <th>BSD</th>\n",
       "      <th>BSA</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>1.73</td>\n",
       "      <td>3.40</td>\n",
       "      <td>4.20</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>2.50</td>\n",
       "      <td>3.20</td>\n",
       "      <td>2.50</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>2.00</td>\n",
       "      <td>3.25</td>\n",
       "      <td>3.25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>2.40</td>\n",
       "      <td>2.88</td>\n",
       "      <td>2.88</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>6.50</td>\n",
       "      <td>3.75</td>\n",
       "      <td>1.44</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16301</th>\n",
       "      <td>3.20</td>\n",
       "      <td>3.40</td>\n",
       "      <td>2.15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16306</th>\n",
       "      <td>2.10</td>\n",
       "      <td>3.40</td>\n",
       "      <td>3.30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16307</th>\n",
       "      <td>1.45</td>\n",
       "      <td>4.40</td>\n",
       "      <td>6.50</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16308</th>\n",
       "      <td>2.20</td>\n",
       "      <td>3.40</td>\n",
       "      <td>3.15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16310</th>\n",
       "      <td>1.45</td>\n",
       "      <td>4.20</td>\n",
       "      <td>7.00</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>14185 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        BSH   BSD   BSA\n",
       "22     1.73  3.40  4.20\n",
       "23     2.50  3.20  2.50\n",
       "24     2.00  3.25  3.25\n",
       "25     2.40  2.88  2.88\n",
       "26     6.50  3.75  1.44\n",
       "...     ...   ...   ...\n",
       "16301  3.20  3.40  2.15\n",
       "16306  2.10  3.40  3.30\n",
       "16307  1.45  4.40  6.50\n",
       "16308  2.20  3.40  3.15\n",
       "16310  1.45  4.20  7.00\n",
       "\n",
       "[14185 rows x 3 columns]"
      ]
     },
     "execution_count": 61,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "match_df[['BSH', 'BSD', 'BSA']].dropna()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['B365H', 'B365D', 'B365A']\n",
      "['BWH', 'BWD', 'BWA']\n",
      "['IWH', 'IWD', 'IWA']\n",
      "['LBH', 'LBD', 'LBA']\n",
      "['PSH', 'PSD', 'PSA']\n",
      "['WHH', 'WHD', 'WHA']\n",
      "['SJH', 'SJD', 'SJA']\n",
      "['VCH', 'VCD', 'VCA']\n",
      "['GBH', 'GBD', 'GBA']\n",
      "['BSH', 'BSD', 'BSA']\n"
     ]
    }
   ],
   "source": [
    "odds_col = match_df.columns[32:63].tolist()\n",
    "\n",
    "odds_df = match_df[[\"date\", \"match_id\", \"home_team_id\", \"away_team_id\"]]\n",
    "\n",
    "for i in range(0, len(odds_col), 3):\n",
    "    print(odds_col[i:i+3])\n",
    "    \n",
    "    cur_odds_df = match_df.loc[~match_df[odds_col[i]].isnull(), \n",
    "                               [\"date\", \"match_id\", \"home_team_id\"] + odds_col[i:i+3]]\n",
    "    \n",
    "    inv_odds = (1/cur_odds_df[odds_col[i:i+3]])\n",
    "\n",
    "    booksum = inv_odds.sum(axis=1)\n",
    "\n",
    "    margin = booksum - 1\n",
    "\n",
    "    inv_odds[\"booksum\"] = booksum\n",
    "\n",
    "    bookprob = pd.concat([inv_odds[odds_col[i]]/inv_odds[\"booksum\"], \n",
    "                          inv_odds[odds_col[i+1]]/inv_odds[\"booksum\"], \n",
    "                          inv_odds[odds_col[i+2]]/inv_odds[\"booksum\"]], axis=1)\n",
    "\n",
    "    bookprob.columns = odds_col[i:i+3]\n",
    "    \n",
    "    bookprob[\"{}_bookspread\".format(odds_col[i][:-1])] = bookprob[odds_col[i]] - bookprob[odds_col[i+2]]\n",
    "    \n",
    "    bookprob[\"{}_margin\".format(odds_col[i][:-1])] = margin\n",
    "    \n",
    "    bookprob[[\"date\", \"match_id\", \"home_team_id\"]] = cur_odds_df[[\"date\", \"match_id\", \"home_team_id\"]]\n",
    "    \n",
    "    odds_df = odds_df.merge(bookprob, \n",
    "                            on=[\"date\", \"match_id\", \"home_team_id\"],\n",
    "                            how=\"left\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "wl_odds_player_df = pd.read_csv(dir + \"Data/wl_odds_player_df.csv\")\n",
    "\n",
    "wl_odds_player_df = wl_odds_player_df.drop('Unnamed: 0', axis=1)\n",
    "\n",
    "odds_df = odds_df.loc[odds_df[\"match_id\"].isin(set(wl_odds_player_df[\"match_id\"]))]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "<ipython-input-7-10b0c71b543d>:6: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  away_df[\"B365_bookspread\"] = -1*away_df[\"B365_bookspread\"]\n"
     ]
    }
   ],
   "source": [
    "home_df = odds_df[[\"match_id\", \"home_team_id\", \"B365_bookspread\"]]\n",
    "home_df.columns = [\"match_id\", \"team_id\", \"B365_bookspread\"]\n",
    "\n",
    "away_df = odds_df[[\"match_id\", \"away_team_id\", \"B365_bookspread\"]]\n",
    "away_df.columns = [\"match_id\", \"team_id\", \"B365_bookspread\"]\n",
    "away_df[\"B365_bookspread\"] = -1*away_df[\"B365_bookspread\"]\n",
    "\n",
    "odds_df = pd.concat([home_df, away_df])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(['2010/2011', '2011/2012', '2012/2013'], '2013/2014')"
      ]
     },
     "execution_count": 52,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "seasons = sorted(wl_odds_player_df[\"season\"].unique())\n",
    "\n",
    "i=4\n",
    "seasons[i:i+4][:-1], seasons[i:i+4][-1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "i = 2\n",
    "    \n",
    "train_df = wl_odds_player_df.loc[wl_odds_player_df[\"season\"].isin(seasons[i:i+4][:-1])].dropna()\n",
    "test_df = wl_odds_player_df.loc[wl_odds_player_df[\"season\"].isin([seasons[i:i+4][-1]])].dropna()\n",
    "\n",
    "train_dates = sorted(train_df[\"date\"].unique())\n",
    "\n",
    "calib_dates = random.sample(train_dates, int(len(train_dates)*0.25))\n",
    "\n",
    "calib_df = train_df.loc[train_df[\"date\"].isin(calib_dates)]\n",
    "\n",
    "train_df = train_df.loc[~train_df[\"date\"].isin(calib_dates)]\n",
    "\n",
    "feat_col = [c for c in train_df.columns if \"avg\" in c]\n",
    "feat_col = feat_col + [\"rating_diff\"]\n",
    "\n",
    "skf = StratifiedKFold(n_splits=6, shuffle=True)\n",
    "\n",
    "lgb_param_grid = {'lgb__n_estimators': [100, 200, 300,],\n",
    "                 'lgb__num_leaves':  [2, 4, 6, 10, 15, 20],\n",
    "                 'lgb__subsample': [0.4, 0.6, 0.8, 1],\n",
    "                 'lgb__colsample_bytree': [0.4, 0.6, 0.8, 1],\n",
    "                 'lgb__is_unbalance': [False]}\n",
    "\n",
    "lgb_pipe = Pipeline([(\"lgb\", lightgbm.LGBMClassifier())])\n",
    "\n",
    "#lose\n",
    "lgb_cv_lose = RandomizedSearchCV(lgb_pipe, \n",
    "                                 param_distributions=lgb_param_grid, \n",
    "                            n_jobs=-1,\n",
    "                            cv=skf, \n",
    "                                 n_iter=50,\n",
    "                                 refit=True,\n",
    "                                )\n",
    "\n",
    "lgb_cv_lose.fit(train_df.drop([\"result\", \"win\", \"lose\", \"draw\", \"team_id\", \"m_rating\",\n",
    "                          \"cumulative_lose\", \"cumulative_win\",\n",
    "                          \"season\", \"date\", \"stage\", \"match_id\", 'match_num'], axis=1),\n",
    "                train_df[\"lose\"])\n",
    "\n",
    "\n",
    "lgb_calib_lose = CalibratedClassifierCV(lgb_cv_lose, method='sigmoid', cv=5)\n",
    "\n",
    "lgb_calib_lose.fit(calib_df.drop([\"result\", \"win\", \"lose\", \"draw\", \"team_id\", \"m_rating\",\n",
    "                             \"cumulative_lose\", \"cumulative_win\",\n",
    "                             \"season\", \"date\", \"stage\", \"match_id\", 'match_num'], axis=1),\n",
    "              calib_df[\"lose\"])\n",
    "\n",
    "lgb_calib_pred_lose = lgb_calib_lose.predict_proba(test_df.drop([\"result\", \"win\", \"lose\", \"draw\", \"team_id\", \"m_rating\",\n",
    "                                                                 \"cumulative_lose\", \"cumulative_win\",\n",
    "                                                                 \"season\", \"date\", \"stage\", \"match_id\", 'match_num'], axis=1))\n",
    "\n",
    "\n",
    "#win\n",
    "lgb_cv_win = RandomizedSearchCV(lgb_pipe, \n",
    "                                 param_distributions=lgb_param_grid, \n",
    "                            n_jobs=-1,\n",
    "                            cv=skf, \n",
    "                                 n_iter=50,\n",
    "                                 refit=True,\n",
    "                                )\n",
    "\n",
    "lgb_cv_win.fit(train_df.drop([\"result\", \"win\", \"lose\", \"draw\", \"team_id\", \"m_rating\",\n",
    "                          \"cumulative_lose\", \"cumulative_win\",\n",
    "                          \"season\", \"date\", \"stage\", \"match_id\", 'match_num'], axis=1),\n",
    "                train_df[\"win\"])\n",
    "\n",
    "\n",
    "lgb_calib_win = CalibratedClassifierCV(lgb_cv_win, method='sigmoid', cv=5)\n",
    "\n",
    "lgb_calib_win.fit(calib_df.drop([\"result\", \"win\", \"lose\", \"draw\", \"team_id\", \"m_rating\",\n",
    "                             \"cumulative_lose\", \"cumulative_win\",\n",
    "                             \"season\", \"date\", \"stage\", \"match_id\", 'match_num'], axis=1),\n",
    "              calib_df[\"win\"])\n",
    "\n",
    "lgb_calib_pred_win = lgb_calib_win.predict_proba(test_df.drop([\"result\", \"win\", \"lose\", \"draw\", \"team_id\", \"m_rating\",\n",
    "                                                                 \"cumulative_lose\", \"cumulative_win\",\n",
    "                                                                 \"season\", \"date\", \"stage\", \"match_id\", 'match_num'], axis=1))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['league_id', 'rating_diff', 'home', 'match_bin', 'avg_overall_rating',\n",
       "       'avg_volleys_imp', 'avg_long_passing', 'avg_finishing',\n",
       "       'avg_ball_control', 'avg_dribbling', 'avg_short_passing',\n",
       "       'avg_reactions', 'avg_vision_imp', 'avg_penalties', 'avg_agility_imp',\n",
       "       'cumulative_result'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "(train_df.drop([\"result\", \"win\", \"lose\", \"draw\", \"team_id\", \"m_rating\",\n",
    "                \"cumulative_lose\", \"cumulative_win\",\n",
    "                \"season\", \"date\", \"stage\", \"match_id\", 'match_num'], axis=1).columns)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "lgb_cv_pred_lose = lgb_calib_win.predict_proba(test_df.drop([\"result\", \"win\", \"lose\", \"draw\", \"team_id\", \"m_rating\",\n",
    "                                                                 \"cumulative_lose\", \"cumulative_win\",\n",
    "                                                                 \"season\", \"date\", \"stage\", \"match_id\", 'match_num'], axis=1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "lgb_cv_pred_win = lgb_calib_win.predict_proba(test_df.drop([\"result\", \"win\", \"lose\", \"draw\", \"team_id\", \"m_rating\",\n",
    "                                                                 \"cumulative_lose\", \"cumulative_win\",\n",
    "                                                                 \"season\", \"date\", \"stage\", \"match_id\", 'match_num'], axis=1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_pred_df = pd.DataFrame({\"lose_pred\":lgb_calib_pred_lose[:,1], \"win_pred\":lgb_calib_pred_win[:,1]})\n",
    "\n",
    "test_pred_df.columns = [\"lose_pred\", \"win_pred\"]\n",
    "\n",
    "test_pred_df[\"sum_pred\"] = test_pred_df.sum(axis=1)\n",
    "\n",
    "test_pred_df[\"lose_pred\"] = test_pred_df[\"lose_pred\"] / test_pred_df[\"sum_pred\"]\n",
    "\n",
    "test_pred_df[\"win_pred\"] = test_pred_df[\"win_pred\"] / test_pred_df[\"sum_pred\"]\n",
    "\n",
    "test_pred_df = pd.concat([test_pred_df, test_df.reset_index(drop=True)], axis=1)\n",
    "\n",
    "test_pred_df = test_pred_df.merge(odds_df, on=[\"team_id\", \"match_id\"], how=\"left\")\n",
    "\n",
    "test_pred_df = test_pred_df.dropna()\n",
    "\n",
    "test_pred_df[\"pred_spread\"] = test_pred_df[\"win_pred\"] - test_pred_df[\"lose_pred\"]\n",
    "\n",
    "test_pred_df = test_pred_df.loc[test_pred_df[\"home\"]==1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "kbin = KBinsDiscretizer(n_bins=10, encode='ordinal', strategy=\"kmeans\")\n",
    "\n",
    "kbin.fit(test_pred_df[\"pred_spread\"].values.reshape(-1, 1))\n",
    "\n",
    "bins = kbin.transform(test_pred_df[\"pred_spread\"].values.reshape(-1, 1))\n",
    "\n",
    "test_pred_df[\"pred_spread_bins\"] = bins"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr:last-of-type th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th colspan=\"3\" halign=\"left\">pred_spread</th>\n",
       "      <th colspan=\"3\" halign=\"left\">win</th>\n",
       "      <th colspan=\"3\" halign=\"left\">lose</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>mean</th>\n",
       "      <th>min</th>\n",
       "      <th>max</th>\n",
       "      <th>mean</th>\n",
       "      <th>sum</th>\n",
       "      <th>size</th>\n",
       "      <th>mean</th>\n",
       "      <th>sum</th>\n",
       "      <th>size</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>pred_spread_bins</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0.0</th>\n",
       "      <td>-0.667663</td>\n",
       "      <td>-0.747637</td>\n",
       "      <td>-0.595472</td>\n",
       "      <td>0.144578</td>\n",
       "      <td>12</td>\n",
       "      <td>83</td>\n",
       "      <td>0.710843</td>\n",
       "      <td>59</td>\n",
       "      <td>83</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1.0</th>\n",
       "      <td>-0.520336</td>\n",
       "      <td>-0.592088</td>\n",
       "      <td>-0.446229</td>\n",
       "      <td>0.154639</td>\n",
       "      <td>15</td>\n",
       "      <td>97</td>\n",
       "      <td>0.649485</td>\n",
       "      <td>63</td>\n",
       "      <td>97</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2.0</th>\n",
       "      <td>-0.372072</td>\n",
       "      <td>-0.444808</td>\n",
       "      <td>-0.295559</td>\n",
       "      <td>0.235669</td>\n",
       "      <td>37</td>\n",
       "      <td>157</td>\n",
       "      <td>0.490446</td>\n",
       "      <td>77</td>\n",
       "      <td>157</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3.0</th>\n",
       "      <td>-0.218130</td>\n",
       "      <td>-0.292947</td>\n",
       "      <td>-0.145375</td>\n",
       "      <td>0.275641</td>\n",
       "      <td>43</td>\n",
       "      <td>156</td>\n",
       "      <td>0.410256</td>\n",
       "      <td>64</td>\n",
       "      <td>156</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4.0</th>\n",
       "      <td>-0.068091</td>\n",
       "      <td>-0.142719</td>\n",
       "      <td>0.013661</td>\n",
       "      <td>0.321429</td>\n",
       "      <td>72</td>\n",
       "      <td>224</td>\n",
       "      <td>0.450893</td>\n",
       "      <td>101</td>\n",
       "      <td>224</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5.0</th>\n",
       "      <td>0.099476</td>\n",
       "      <td>0.015003</td>\n",
       "      <td>0.171442</td>\n",
       "      <td>0.415789</td>\n",
       "      <td>158</td>\n",
       "      <td>380</td>\n",
       "      <td>0.326316</td>\n",
       "      <td>124</td>\n",
       "      <td>380</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6.0</th>\n",
       "      <td>0.245948</td>\n",
       "      <td>0.172990</td>\n",
       "      <td>0.315648</td>\n",
       "      <td>0.461224</td>\n",
       "      <td>226</td>\n",
       "      <td>490</td>\n",
       "      <td>0.267347</td>\n",
       "      <td>131</td>\n",
       "      <td>490</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7.0</th>\n",
       "      <td>0.388882</td>\n",
       "      <td>0.318984</td>\n",
       "      <td>0.464139</td>\n",
       "      <td>0.559783</td>\n",
       "      <td>206</td>\n",
       "      <td>368</td>\n",
       "      <td>0.190217</td>\n",
       "      <td>70</td>\n",
       "      <td>368</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8.0</th>\n",
       "      <td>0.538671</td>\n",
       "      <td>0.465553</td>\n",
       "      <td>0.609792</td>\n",
       "      <td>0.652330</td>\n",
       "      <td>182</td>\n",
       "      <td>279</td>\n",
       "      <td>0.139785</td>\n",
       "      <td>39</td>\n",
       "      <td>279</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9.0</th>\n",
       "      <td>0.679140</td>\n",
       "      <td>0.610537</td>\n",
       "      <td>0.772113</td>\n",
       "      <td>0.829060</td>\n",
       "      <td>194</td>\n",
       "      <td>234</td>\n",
       "      <td>0.029915</td>\n",
       "      <td>7</td>\n",
       "      <td>234</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                 pred_spread                           win            \\\n",
       "                        mean       min       max      mean  sum size   \n",
       "pred_spread_bins                                                       \n",
       "0.0                -0.667663 -0.747637 -0.595472  0.144578   12   83   \n",
       "1.0                -0.520336 -0.592088 -0.446229  0.154639   15   97   \n",
       "2.0                -0.372072 -0.444808 -0.295559  0.235669   37  157   \n",
       "3.0                -0.218130 -0.292947 -0.145375  0.275641   43  156   \n",
       "4.0                -0.068091 -0.142719  0.013661  0.321429   72  224   \n",
       "5.0                 0.099476  0.015003  0.171442  0.415789  158  380   \n",
       "6.0                 0.245948  0.172990  0.315648  0.461224  226  490   \n",
       "7.0                 0.388882  0.318984  0.464139  0.559783  206  368   \n",
       "8.0                 0.538671  0.465553  0.609792  0.652330  182  279   \n",
       "9.0                 0.679140  0.610537  0.772113  0.829060  194  234   \n",
       "\n",
       "                      lose            \n",
       "                      mean  sum size  \n",
       "pred_spread_bins                      \n",
       "0.0               0.710843   59   83  \n",
       "1.0               0.649485   63   97  \n",
       "2.0               0.490446   77  157  \n",
       "3.0               0.410256   64  156  \n",
       "4.0               0.450893  101  224  \n",
       "5.0               0.326316  124  380  \n",
       "6.0               0.267347  131  490  \n",
       "7.0               0.190217   70  368  \n",
       "8.0               0.139785   39  279  \n",
       "9.0               0.029915    7  234  "
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test_pred_df.groupby(\"pred_spread_bins\").agg({\"pred_spread\": [\"mean\", \"min\", \"max\"],\n",
    "                                              \"win\": [\"mean\", \"sum\", \"size\"],\n",
    "                                              \"lose\": [\"mean\", \"sum\", \"size\"]})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_pred_df[\"pred_spread_bins\"] = 2\n",
    "\n",
    "test_pred_df.loc[(test_pred_df[\"pred_spread\"]>= -0.079694) & \n",
    "                (test_pred_df[\"pred_spread\"]<= 0.059606), \"pred_spread_bins\"] = 4\n",
    "\n",
    "\n",
    "test_pred_df.loc[(test_pred_df[\"pred_spread\"]>= 0.060011), \"pred_spread_bins\"] = 5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "bet_win_bins = [9,8,7,6,5]\n",
    "bet_lose_bins = [0]\n",
    "bet_draw_bins = [4]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [],
   "source": [
    "out_test_pred_df = test_pred_df.copy()\n",
    "\n",
    "out_test_pred_df[\"target\"] = 0\n",
    "\n",
    "out_test_pred_df.loc[out_test_pred_df[\"pred_spread_bins\"].isin(bet_win_bins), \"target\"] = 1\n",
    "\n",
    "#out_test_pred_df.loc[out_test_pred_df[\"pred_spread_bins\"].isin([bet_lose_bins]), \"target\"] = -1\n",
    "\n",
    "out_test_pred_df.loc[out_test_pred_df[\"pred_spread_bins\"].isin(bet_draw_bins), \"target\"] = -1\n",
    "\n",
    "out_test_pred_df.to_csv(dir + \"Data/bet_spread_df.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_pred_df[\"bet\"] = \"no bet\"\n",
    "\n",
    "test_pred_df.loc[test_pred_df[\"pred_spread_bins\"].isin(bet_win_bins), \"bet\"] = \"bet win\"\n",
    "\n",
    "test_pred_df.loc[test_pred_df[\"pred_spread_bins\"].isin(bet_lose_bins), \"bet\"] = \"bet lose\"\n",
    "\n",
    "test_pred_df.loc[test_pred_df[\"pred_spread_bins\"].isin(bet_draw_bins), \"bet\"] = \"bet draw\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 87,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "\n",
      "text/plain": [
       "<Figure size 864x720 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "colors1 = {'bet lose':'pink', 'bet win':'green', 'bet draw': 'blue', \"no bet\":'orange'}\n",
    "\n",
    "fig, ax = plt.subplots(figsize=(12,10))\n",
    "for k in colors1.keys():\n",
    "    #ix = np.where(test_pred_df.loc[test_pred_df[\"home\"]==0,'bet'] == k)[0]\n",
    "    #ax.scatter(test_pred_df.loc[test_pred_df[\"home\"]==0, \"pred_spread\"].values[ix],\n",
    "    #           test_pred_df.loc[test_pred_df[\"home\"]==0, \"B365_bookspread\"].values[ix],\n",
    "    #           c = colors1[k], label = k, s = 30)\n",
    "    \n",
    "    ix = np.where(test_pred_df['bet'] == k)[0]\n",
    "    ax.scatter(test_pred_df[\"pred_spread\"].values[ix],\n",
    "               test_pred_df[\"B365_bookspread\"].values[ix],\n",
    "               c = colors1[k], label = k, s = 30)\n",
    "ax.legend(prop={'size': 19})\n",
    "ax.set_xlabel('predicted spread', fontsize=19)\n",
    "ax.set_ylabel(\"bookmaker's spread\", fontsize=19)\n",
    "ax.xaxis.set_tick_params(labelsize=15)\n",
    "ax.yaxis.set_tick_params(labelsize=15)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_pred_df[\"bet_accuracy\"] = \"incorrect\"\n",
    "\n",
    "test_pred_df.loc[((test_pred_df[\"result\"]== 1) & (test_pred_df[\"bet\"]==\"bet win\") |\n",
    "                  (test_pred_df[\"result\"]== 0) & (test_pred_df[\"bet\"]==\"bet draw\")), \n",
    "                 \"bet_accuracy\"] = \"correct\"\n",
    "\n",
    "test_pred_df.loc[(test_pred_df[\"bet\"]==\"no bet\"), \"bet_accuracy\"] = \"no bet\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 89,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "\n",
      "text/plain": [
       "<Figure size 864x720 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "colors2 = {'incorrect':'red', 'correct':'green', 'no bet': 'white'}\n",
    "\n",
    "fig, ax = plt.subplots(figsize=(12,10))\n",
    "for k in colors2.keys():\n",
    "    ix = np.where(test_pred_df.loc[test_pred_df[\"bet_accuracy\"]!=\"no bet\",'bet_accuracy'] == k)[0]\n",
    "    ax.scatter(test_pred_df.loc[test_pred_df[\"bet_accuracy\"]!=\"no bet\",\"pred_spread\"].values[ix],\n",
    "               test_pred_df.loc[test_pred_df[\"bet_accuracy\"]!=\"no bet\",\"B365_bookspread\"].values[ix],\n",
    "               c = colors2[k], label = k, s = 30)\n",
    "ax.legend(prop={'size': 19})\n",
    "ax.set_xlabel('predicted spread', fontsize=19)\n",
    "ax.set_ylabel(\"bookmaker's spread\", fontsize=19)\n",
    "ax.xaxis.set_tick_params(labelsize=15)\n",
    "ax.yaxis.set_tick_params(labelsize=15)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "match_df = pd.read_csv(dir + \"Data/match_clean.csv\")\n",
    "\n",
    "match_df = match_df.drop('Unnamed: 0',axis=1)\n",
    "\n",
    "match_df = match_df.loc[match_df[\"match_id\"].isin(set(test_pred_df[\"match_id\"]))]\n",
    "\n",
    "match_df[\"result\"] = \"BWD\"\n",
    "match_df.loc[match_df[\"home_team_goal\"]> match_df[\"away_team_goal\"], \"result\"] = \"BWH\"\n",
    "match_df.loc[match_df[\"home_team_goal\"]< match_df[\"away_team_goal\"], \"result\"] = \"BWA\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Series([], Name: result, dtype: int64)"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "bet_lose = test_pred_df.loc[(test_pred_df[\"pred_spread_bins\"].isin(bet_lose_bins)),\n",
    "                             [\"team_id\", \"match_id\", \"home\", \"result\"]]\n",
    "\n",
    "bet_lose[\"result\"].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Series([], Name: result, dtype: float64)"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "bet_lose[\"result\"].value_counts()/len(bet_lose)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "nan"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "book_l_df = match_df.loc[match_df[\"match_id\"].isin(set(bet_lose[\"match_id\"])), \n",
    "                         [\"result\", \"BWH\", \"BWD\", \"BWA\", \"match_id\"]]\n",
    "\n",
    "book_l_df[\"pred\"] = book_l_df[['BWH','BWD', 'BWA']].idxmin(axis=1)\n",
    "\n",
    "np.mean(book_l_df[\"pred\"]==book_l_df[\"result\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "bet win     1655\n",
       "no bet       591\n",
       "bet draw     222\n",
       "Name: bet, dtype: int64"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test_pred_df[\"bet\"].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "correct      990\n",
       "incorrect    887\n",
       "no bet       591\n",
       "Name: bet_accuracy, dtype: int64"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test_pred_df[\"bet_accuracy\"].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.6531034482758621"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "947/(947+503)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "compare_matches = set(test_pred_df.loc[test_pred_df[\"bet\"].isin([\"bet lose\",\n",
    "                                                                 \"bet win\",\n",
    "                                                                 \"bet draw\"]), \"match_id\"])\n",
    "\n",
    "follow_book_bet = match_df.loc[match_df[\"match_id\"].isin(compare_matches)]\n",
    "\n",
    "follow_book_bet = follow_book_bet[[\"match_id\", \"result\", 'BWH','BWD', 'BWA']]\n",
    "\n",
    "follow_book_bet[\"pred\"] = follow_book_bet[['BWH','BWD', 'BWA']].idxmin(axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.5466169419286094"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.mean(follow_book_bet[\"pred\"]==follow_book_bet[\"result\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "943.48"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "follow_book_bet[\"dollar\"] = follow_book_bet[\"BWD\"]\n",
    "\n",
    "follow_book_bet[\"dollar\"] = np.where(follow_book_bet[\"result\"]==\"BWH\", \n",
    "                                     follow_book_bet[\"BWH\"],\n",
    "                                     follow_book_bet[\"dollar\"])\n",
    "\n",
    "\n",
    "follow_book_bet[\"dollar\"] = np.where(follow_book_bet[\"result\"]==\"BWA\", \n",
    "                                     follow_book_bet[\"BWA\"],\n",
    "                                     follow_book_bet[\"dollar\"])\n",
    "\n",
    "follow_book_bet[\"dollar\"] = np.where(follow_book_bet[\"pred\"]!=follow_book_bet[\"result\"],\n",
    "                                    -1, follow_book_bet[\"dollar\"])\n",
    "\n",
    "\n",
    "\n",
    "follow_book_bet[\"dollar\"].sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1877"
      ]
     },
     "execution_count": 54,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(follow_book_bet)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1075.4300000000005"
      ]
     },
     "execution_count": 47,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "894.9600000000006+180.46999999999997"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.5466169419286094"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.mean(follow_book_bet[\"result\"]==follow_book_bet[\"pred\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "correct      990\n",
       "incorrect    887\n",
       "no bet       591\n",
       "Name: bet_accuracy, dtype: int64"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test_pred_df[\"bet_accuracy\"].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['lose_pred', 'win_pred', 'sum_pred', 'league_id', 'season', 'date',\n",
       "       'stage', 'match_id', 'team_id', 'm_rating', 'win', 'lose',\n",
       "       'rating_diff', 'home', 'cumulative_win', 'cumulative_lose', 'match_num',\n",
       "       'match_bin', 'draw', 'avg_overall_rating', 'avg_volleys_imp',\n",
       "       'avg_long_passing', 'avg_finishing', 'avg_ball_control',\n",
       "       'avg_dribbling', 'avg_short_passing', 'avg_reactions', 'avg_vision_imp',\n",
       "       'avg_penalties', 'avg_agility_imp', 'result', 'cumulative_result',\n",
       "       'B365_bookspread', 'pred_spread', 'pred_spread_bins', 'bet',\n",
       "       'bet_accuracy'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test_pred_df.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [],
   "source": [
    "odds_col = match_df.columns[32:63].tolist()\n",
    "\n",
    "dollar = 0\n",
    "for t, m, h, r in zip(bet_lose[\"team_id\"], bet_lose[\"match_id\"], bet_lose[\"home\"], bet_lose[\"result\"]):\n",
    "\n",
    "    if h == 0:\n",
    "        bwh = match_df.loc[(match_df[\"away_team_id\"]==t) & \n",
    "                           (match_df[\"match_id\"]==m), \"BWH\"].values[0]\n",
    "        if np.isnan(bwh):\n",
    "            continue\n",
    "            \n",
    "        if r == -1:\n",
    "            dollar += bwh\n",
    "        elif r == 0:\n",
    "            dollar -= 1\n",
    "        elif r == 1:\n",
    "            dollar -= 1\n",
    "            \n",
    "    elif h == 1:\n",
    "        bwa = match_df.loc[(match_df[\"home_team_id\"]==t) & \n",
    "                               (match_df[\"match_id\"]==m),  \"BWA\"].values[0]  \n",
    "        \n",
    "        if np.isnan(bwa):\n",
    "            continue\n",
    "        \n",
    "        if r == -1:\n",
    "            dollar -= 1\n",
    "        elif r == 0:\n",
    "            dollar -= 1\n",
    "        elif r == 1:\n",
    "            dollar += bwa        "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dollar"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       " 1    936\n",
       " 0    386\n",
       "-1    333\n",
       "Name: result, dtype: int64"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "bet_win = test_pred_df.loc[(test_pred_df[\"pred_spread_bins\"].isin(bet_win_bins)), \n",
    "                           [\"team_id\", \"match_id\", \"home\", \"result\"]]\n",
    "\n",
    "bet_win[\"result\"].value_counts()\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.5673716012084592"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "book_w_df = match_df.loc[match_df[\"match_id\"].isin(set(bet_win[\"match_id\"])), \n",
    "                         [\"result\", \"BWH\", \"BWD\", \"BWA\", \"match_id\"]]\n",
    "\n",
    "book_w_df[\"pred\"] = book_w_df[['BWH','BWD', 'BWA']].idxmin(axis=1)\n",
    "\n",
    "np.mean(book_w_df[\"pred\"]==book_w_df[\"result\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [],
   "source": [
    "odds_col = match_df.columns[32:63].tolist()\n",
    "\n",
    "dollar = 0\n",
    "\n",
    "for t, m, h, r in zip(bet_win[\"team_id\"], bet_win[\"match_id\"], bet_win[\"home\"], bet_win[\"result\"]):\n",
    "    \n",
    "    if h == 0:\n",
    "        bwa = match_df.loc[(match_df[\"away_team_id\"]==t) & \n",
    "                           (match_df[\"match_id\"]==m), \"BWA\"].values[0]\n",
    "        \n",
    "        if r == -1:\n",
    "            dollar -= 1\n",
    "        elif r == 0:  \n",
    "            dollar -= 1\n",
    "        elif r == 1:\n",
    "            dollar += bwa\n",
    "        \n",
    "    elif h == 1:\n",
    "        bwh = match_df.loc[(match_df[\"home_team_id\"]==t) & \n",
    "                           (match_df[\"match_id\"]==m),  \"BWH\"].values[0]\n",
    "        \n",
    "        if r == -1:\n",
    "            dollar -= 1\n",
    "        elif r == 0:\n",
    "            dollar -= 1\n",
    "        elif r == 1:\n",
    "            dollar += bwh        "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "894.9600000000006"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dollar"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "-1    96\n",
       " 1    72\n",
       " 0    54\n",
       "Name: result, dtype: int64"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "bet_draw = test_pred_df.loc[(test_pred_df[\"pred_spread_bins\"].isin(bet_draw_bins)), \n",
    "                           [\"team_id\", \"match_id\", \"home\", \"result\"]]\n",
    "\n",
    "bet_draw[\"result\"].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.3918918918918919"
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "book_d_df = match_df.loc[match_df[\"match_id\"].isin(set(bet_draw[\"match_id\"])), \n",
    "                         [\"result\", \"BWH\", \"BWD\", \"BWA\", \"match_id\"]]\n",
    "\n",
    "book_d_df[\"pred\"] = book_d_df[['BWH','BWD', 'BWA']].idxmin(axis=1)\n",
    "\n",
    "np.mean(book_d_df[\"pred\"]==book_d_df[\"result\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [],
   "source": [
    "odds_col = match_df.columns[32:63].tolist()\n",
    "\n",
    "dollar = 0\n",
    "\n",
    "for t, m, h, r in zip(bet_win[\"team_id\"], bet_win[\"match_id\"], bet_win[\"home\"], bet_win[\"result\"]):\n",
    "    \n",
    "    if h == 0:\n",
    "        bwd = match_df.loc[(match_df[\"away_team_id\"]==t) & \n",
    "                           (match_df[\"match_id\"]==m), \"BWD\"].values[0]\n",
    "        \n",
    "        if r == -1:\n",
    "            dollar -= 1\n",
    "        elif r == 0:  \n",
    "            dollar += bwd\n",
    "        elif r == 1:\n",
    "            dollar -= 1\n",
    "        \n",
    "    elif h == 1:\n",
    "        bwd = match_df.loc[(match_df[\"home_team_id\"]==t) & \n",
    "                           (match_df[\"match_id\"]==m),  \"BWD\"].values[0]\n",
    "        \n",
    "        if r == -1:\n",
    "            dollar -= 1\n",
    "        elif r == 0:\n",
    "            dollar += bwd\n",
    "        elif r == 1:\n",
    "            dollar -= 1        "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "180.46999999999997"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dollar"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 93,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.09048767967145789"
      ]
     },
     "execution_count": 93,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "(132+185+211.81)/ (1877+1956+2011)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 90,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.07032498668087374"
      ]
     },
     "execution_count": 90,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "132/1877"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 91,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.09458077709611452"
      ]
     },
     "execution_count": 91,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "185/1956"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 92,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.10532570860268524"
      ]
     },
     "execution_count": 92,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "211.81/2011"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}