From 42569cb2955a30a98b9e68515598da5f51f0dd05 Mon Sep 17 00:00:00 2001
From: Anastasia Rizzo <55031920+arizzogithub@users.noreply.github.com>
Date: Sat, 4 Apr 2020 01:56:53 +0200
Subject: [PATCH] #3 Traversal of the space of train test splits, eeg.csv

---
 ... space of train test splits, eeg.csv.ipynb | 1224 +++++++++++++++++
 1 file changed, 1224 insertions(+)
 create mode 100644 dev/AnastasiaRizzo/#3 Traversal of the space of train test splits, eeg.csv.ipynb
diff --git a/dev/AnastasiaRizzo/#3 Traversal of the space of train test splits, eeg.csv.ipynb b/dev/AnastasiaRizzo/#3 Traversal of the space of train test splits, eeg.csv.ipynb
new file mode 100644
index 000000000..5329bce54
--- /dev/null
+++ b/dev/AnastasiaRizzo/#3 Traversal of the space of train test splits, eeg.csv.ipynb	
@@ -0,0 +1,1224 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Traversal of the space of train/test splits #3  / eeg.csv dataset\n",
+    "### Please note:\n",
+    "    K-Nearest Neighbours model (with its default state and with the best hyper parameter {'n_neighbors': 1} ) will be applied for this issue #3. Decision had been made, since this model became a leader in issue #2."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### * Import, Read data from ‘eeg.csv’ file\n",
+    "#### * Info, Head, Missing Data \n",
+    "#### * Observation of target variable 'Class' (visualisation: countplot)\n",
+    "#### * Create datasets for ML \n",
+    "#### * Scaler\n",
+    "#### * Applying 'test_size' attribute splitting from 10-90 % for Train and Test ('random_state' = 0)\n",
+    "#### * Finding the best number of  'random_state' attribute from 10-90 for Train and Test ('test_size'=0.3)\n",
+    "#### * 'Train\\Test' splitting method with new attributes\n",
+    "#### * Scaler\n",
+    "#### * Conclusion\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "###  Import "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 458,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# import standard libraries\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "import seaborn as sns\n",
+    "%matplotlib inline\n",
+    "\n",
+    "import sklearn.metrics as metrics\n",
+    "from sklearn.metrics import (accuracy_score, \n",
+    "                             f1_score,\n",
+    "                             precision_score, \n",
+    "                             average_precision_score, \n",
+    "                             recall_score\n",
+    "                            )\n",
+    "from sklearn.model_selection import (train_test_split, \n",
+    "                                     GridSearchCV\n",
+    "                                    )\n",
+    "\n",
+    "from sklearn.neighbors import KNeighborsClassifier\n",
+    "\n",
+    "import warnings\n",
+    "warnings.filterwarnings(\"ignore\")\n",
+    "\n",
+    "%run train_test_split.py\n",
+    "%run scalers.py"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Read data from ‘eeg.csv’ file + Info"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 459,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class 'pandas.core.frame.DataFrame'>\n",
+      "RangeIndex: 14976 entries, 0 to 14975\n",
+      "Data columns (total 15 columns):\n",
+      "V1       14976 non-null float64\n",
+      "V2       14976 non-null float64\n",
+      "V3       14976 non-null float64\n",
+      "V4       14976 non-null float64\n",
+      "V5       14976 non-null float64\n",
+      "V6       14976 non-null float64\n",
+      "V7       14976 non-null float64\n",
+      "V8       14976 non-null float64\n",
+      "V9       14976 non-null float64\n",
+      "V10      14976 non-null float64\n",
+      "V11      14976 non-null float64\n",
+      "V12      14976 non-null float64\n",
+      "V13      14976 non-null float64\n",
+      "V14      14976 non-null float64\n",
+      "Class    14976 non-null int64\n",
+      "dtypes: float64(14), int64(1)\n",
+      "memory usage: 1.7 MB\n"
+     ]
+    }
+   ],
+   "source": [
+    "# read data from 'egg.csv' file\n",
+    "dataset = pd.read_csv('eeg.csv') \n",
+    "dataset.info()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Dataset consists of 14976  rows and 15 columns; \n",
+    "\n",
+    "has 2 datatypes: float64(14), int64(1);"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Head"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 460,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>V1</th>\n",
+       "      <th>V2</th>\n",
+       "      <th>V3</th>\n",
+       "      <th>V4</th>\n",
+       "      <th>V5</th>\n",
+       "      <th>V6</th>\n",
+       "      <th>V7</th>\n",
+       "      <th>V8</th>\n",
+       "      <th>V9</th>\n",
+       "      <th>V10</th>\n",
+       "      <th>V11</th>\n",
+       "      <th>V12</th>\n",
+       "      <th>V13</th>\n",
+       "      <th>V14</th>\n",
+       "      <th>Class</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>4329.23</td>\n",
+       "      <td>4009.23</td>\n",
+       "      <td>4289.23</td>\n",
+       "      <td>4148.21</td>\n",
+       "      <td>4350.26</td>\n",
+       "      <td>4586.15</td>\n",
+       "      <td>4096.92</td>\n",
+       "      <td>4641.03</td>\n",
+       "      <td>4222.05</td>\n",
+       "      <td>4238.46</td>\n",
+       "      <td>4211.28</td>\n",
+       "      <td>4280.51</td>\n",
+       "      <td>4635.90</td>\n",
+       "      <td>4393.85</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>4324.62</td>\n",
+       "      <td>4004.62</td>\n",
+       "      <td>4293.85</td>\n",
+       "      <td>4148.72</td>\n",
+       "      <td>4342.05</td>\n",
+       "      <td>4586.67</td>\n",
+       "      <td>4097.44</td>\n",
+       "      <td>4638.97</td>\n",
+       "      <td>4210.77</td>\n",
+       "      <td>4226.67</td>\n",
+       "      <td>4207.69</td>\n",
+       "      <td>4279.49</td>\n",
+       "      <td>4632.82</td>\n",
+       "      <td>4384.10</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>4327.69</td>\n",
+       "      <td>4006.67</td>\n",
+       "      <td>4295.38</td>\n",
+       "      <td>4156.41</td>\n",
+       "      <td>4336.92</td>\n",
+       "      <td>4583.59</td>\n",
+       "      <td>4096.92</td>\n",
+       "      <td>4630.26</td>\n",
+       "      <td>4207.69</td>\n",
+       "      <td>4222.05</td>\n",
+       "      <td>4206.67</td>\n",
+       "      <td>4282.05</td>\n",
+       "      <td>4628.72</td>\n",
+       "      <td>4389.23</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>4328.72</td>\n",
+       "      <td>4011.79</td>\n",
+       "      <td>4296.41</td>\n",
+       "      <td>4155.90</td>\n",
+       "      <td>4343.59</td>\n",
+       "      <td>4582.56</td>\n",
+       "      <td>4097.44</td>\n",
+       "      <td>4630.77</td>\n",
+       "      <td>4217.44</td>\n",
+       "      <td>4235.38</td>\n",
+       "      <td>4210.77</td>\n",
+       "      <td>4287.69</td>\n",
+       "      <td>4632.31</td>\n",
+       "      <td>4396.41</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>4326.15</td>\n",
+       "      <td>4011.79</td>\n",
+       "      <td>4292.31</td>\n",
+       "      <td>4151.28</td>\n",
+       "      <td>4347.69</td>\n",
+       "      <td>4586.67</td>\n",
+       "      <td>4095.90</td>\n",
+       "      <td>4627.69</td>\n",
+       "      <td>4210.77</td>\n",
+       "      <td>4244.10</td>\n",
+       "      <td>4212.82</td>\n",
+       "      <td>4288.21</td>\n",
+       "      <td>4632.82</td>\n",
+       "      <td>4398.46</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "        V1       V2       V3       V4       V5       V6       V7       V8  \\\n",
+       "0  4329.23  4009.23  4289.23  4148.21  4350.26  4586.15  4096.92  4641.03   \n",
+       "1  4324.62  4004.62  4293.85  4148.72  4342.05  4586.67  4097.44  4638.97   \n",
+       "2  4327.69  4006.67  4295.38  4156.41  4336.92  4583.59  4096.92  4630.26   \n",
+       "3  4328.72  4011.79  4296.41  4155.90  4343.59  4582.56  4097.44  4630.77   \n",
+       "4  4326.15  4011.79  4292.31  4151.28  4347.69  4586.67  4095.90  4627.69   \n",
+       "\n",
+       "        V9      V10      V11      V12      V13      V14  Class  \n",
+       "0  4222.05  4238.46  4211.28  4280.51  4635.90  4393.85      1  \n",
+       "1  4210.77  4226.67  4207.69  4279.49  4632.82  4384.10      1  \n",
+       "2  4207.69  4222.05  4206.67  4282.05  4628.72  4389.23      1  \n",
+       "3  4217.44  4235.38  4210.77  4287.69  4632.31  4396.41      1  \n",
+       "4  4210.77  4244.10  4212.82  4288.21  4632.82  4398.46      1  "
+      ]
+     },
+     "execution_count": 460,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# preview of the first 5 lines of the loaded data \n",
+    "dataset.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Missing Data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 461,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "V1       0\n",
+       "V2       0\n",
+       "V3       0\n",
+       "V4       0\n",
+       "V5       0\n",
+       "V6       0\n",
+       "V7       0\n",
+       "V8       0\n",
+       "V9       0\n",
+       "V10      0\n",
+       "V11      0\n",
+       "V12      0\n",
+       "V13      0\n",
+       "V14      0\n",
+       "Class    0\n",
+       "dtype: int64"
+      ]
+     },
+     "execution_count": 461,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# check for missing values\n",
+    "dataset.isnull().sum()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "\n",
+    "Dataset has no missing values."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Observation of target variable 'Class' (visualisation: countplot)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "__Note:__ In 'Class' column, __'1'__ indicates the __eye-open__, __'2'__ the __eye-closed state__ (based on OpenML dataset description at https://www.openml.org/d/1471)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 462,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "1    8254\n",
+       "2    6722\n",
+       "Name: Class, dtype: int64"
+      ]
+     },
+     "execution_count": 462,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# check for number of '1' and '2' in 'Class' column\n",
+    "dataset['Class'].value_counts()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "From the __output__, we can observe that there are __8254 eye-open state__ and __6722 eye-closed state__ cases in the dataset."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 463,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1(eye-open state): 55.11 % of the dataset\n",
+      "2(eye-closed state): 44.89 % of the dataset\n"
+     ]
+    }
+   ],
+   "source": [
+    "# present '1' and '2' states in %\n",
+    "print('1(eye-open state):', round(dataset['Class'].value_counts()[1] / len(dataset) * 100, 2), '% of the dataset')\n",
+    "print('2(eye-closed state):', round(dataset['Class'].value_counts()[2] / len(dataset) * 100, 2), '% of the dataset')\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 464,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Text(0.5, 1.0, 'Class Distributions (1 || 2)')"
+      ]
+     },
+     "execution_count": 464,
+     "metadata": {},
+     "output_type": "execute_result"
+    },
+    {
+     "data": {
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAEXCAYAAABcRGizAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAdMklEQVR4nO3df7xVdZ3v8ddbUMuygDwaAiOW1GjO+KMzSppW0iBaN5huFv3yjPGIHpP9sJmm0bm3MM0Ze1wby0rnUiDoNR1SC65ZxkWtmceYekQyhAxERo6QHD2IP0gM/Nw/vt+ji80+Z+0DZ+1z4Lyfj8d+7LU+67vW+u7DYb/3Wt+1z1JEYGZm1pt9BroDZmY2+DkszMyslMPCzMxKOSzMzKyUw8LMzEo5LMzMrJTDwszMSjksrE8krZX0xYHuRxlJ4yWFpNYKtn2hpOWF+XmSbunv/eRtV/Y6dqEv75D0O0nDdnM78yRdWFMLSeN3c7ufkbRod7ZhPXNY2EskHSLpW5IelrRV0mOSfirpzIHuW7f8ptL92CJpjaQfSHp7TdN1wGhgWYPb7UsIXga8ow/dboikOyV9p6bcp9dRsf8FXBIR2wEkjc4/+99K2i5pXlU7lvROSQslbcj/7g9I+kRNs+8BrZJOqaofQ5nDwoD0CRZYCpwOXAD8OfBu4CfAvw5Yx+r7JOkN9EhgBvAC8EtJf9/dICK2R8TvI2Jbf+1U0j6ShkXEsxHxZH9ttzdVvI5dIekk4E+BHxbK+wNPAJcCd1fchZOA3wAfAI4GrgJmS/pId4OI2Ar8APhcxX0ZmiLCDz8AbgXWA6+us2xkYXot8MXC/N8CDwDPAY8B3wdGFJa/FrgW2Ag8D6wBziss/xTwu7ysE7gNGN5LPwP4QJ36PwHbgCPy/PjctjXP7wtckV/jVtIn9kvzsjtz25ceuf7XwLPAmcDyvP2jgQuB5YV9zwNuAf4n8Hhe52rglYU2dwLfqenzPOCWwnTUPMbXvo7c9lTSm/PzeX+XA/vV7OvK/DN5Iv/sLwP2KbR5f/53+wPQBfwCOKSXn/t3gJt7WX4LMK/B37V5wIV1/l3H9/F3dgFwU03t1Pzve8BA/5/a2x4+sjAkjQKmkN7Mnq1dHhGbeln9ReA84C3AR4ATgG8Xln8N+DPgvaRPpp8ghQr5PPx3ga8CbyYdyfxsF1/GN0hHytN6WP454K+A6cAE4EPAQ3nZ+4EO4CLSEcvownqvIIXAp4CjgP/qYfvvAI4BJgH/HZgMfL0P/f88cBcpZLr7sK62kaQxwE+B+4HjSEdWHwb+uabpR0nhdhLwGdK/0YfyNl4P3ADMJx2dnUoK9N6cArT34fU0w2uA2t/NdmA48Lbmd2fvNnygO2CDwhGAgJV9XTEivlmYXSvpS8BCSW0R8SJwGHB/RNzT3abQ/k9IRySLIuIZ0hvxr3eh/0TEk5I2Am/ooclhpCOYf4/0EfRR4D/zul2StgPPRMTva9YbBnw2Iu7rLkiqt/3twDk5bJdL+gdgjqQLIuK5Bvq/WdILwJZiH+rs69PABuDT+ee7UtL5wP+W9OWI2JLbrYiIr+Tp30n6JCnIrgcOJR1p3RgR3eG3nN4dlvc7KEh6L+n1nFysR8QWSZtJR2TWj3xkYZCCYtdWlE6TtFhSh6RngJuB/YDX5yZXAR+U9GtJl0kqDgwvJgXEI5Kuk9Qm6cBd7QvpdfT0Z5TnAceS3ji/K+k9khr5/d9GY4PLD9Qcld1F+jm8sYF1++JI4K4cFN3+I+/riGJ/atZbDxycp38N/D9SqN0k6W8ktZTs95Wk014DTtLJ5LGJwoeQoj+Q+mv9yGFhAKtIb7JH9mUlSYeRBsBXAmcBbyWdZoL05kVE/JT0qfQy4CDgJ5KuzsueAY4HPkj6pH8B8FtJh/b1BUg6CGghjYnsJCKWkj5t/iPp934+sLiBwNga+eqf3fQiO4fyvruwnd4CsVj/Y51l+0AaNCedJptMCpUZwCpJx/Sy3yeAkbvQ336Vr3r7KfCViLiqh2ajSONf1o8cFkZEdJEGlj8j6dW1yyWN6GHVVlIofCEi7oqI35FOcdRu/4mIuDYi/pr0xtQmaf+8bFtE3B4R3VdgvYo0vtFXf0d6Q17YU4OIeCYifhgRfwO8BziNlz+Nv0A65bSr/kzSqwrzE/M2H87znew4FgJpjKOokT6sAN5WE3Jvr9lXqUjuioivAn9BOvL4UC+r3E8asxkwkk4lBcVXa05/Ftu8kTTOtLSZfRsKPGZh3T5NOoffLunLpE+cAt5F+sT/J3XWWUX6wHGepJtJb5DnFRtIuoj0H/dB0u/b+4E1EbE1n3d+I/BL0hU57wIOpHzsZEQepO0+zdMGnA18KSJW11tB0t+SzrkvI33q/gjwNGlgG9JYyimS/g/paOKJkj7UGg7Mza/3UNLlpN8rjFfcDnxT0vtIA+ufAsax4xjOWuCEfBnzs6SfSa0rST/jKyV9izRGcynp4oQtddrvRNJE0sUEt5Gupjou92VFL6vdRgr62m0dmydfA7yY51+IiN621WeS3kk6ir0SuC7/+wNsj4jiUcQppN+vVf25f8OXzvrx8oP0yffbpFM5W0mfNn8KnFFos5YdL539HOnqpj8AS0inlF66DBL4H6Sg2EJ687sVODIveztwB/BkXn85aZC4tz4WLy19HniENGh7ak278ex46ewnSaH1DCkkfgGcVGg/kXQu/3lqLp2t04cLqX/p7FdIl6k+SzrNdUChzb6kK7+eyI+LKFw6m9u8iTTWsYXGLp3dysuXzu5fWH4nvV+me2T+d308b2M1KWh7+7mPzP16Sy//Ht2PtSXbmkcfL52l/qXFO+2LFGrnD/T/pb3xofwDNjPrlaRLgZaI2OkIo4/bmUd6k7+wUAvg8IhYuxvbPZr0geVNEbF5d/poO/OYhZk16p+ANbv7t6EqdChwtoOiGh6zMLOGRMTTwCUD3Y+eRMTPB7oPezOHhZk124+Bp2pqX61Ts0FkrxyzOOigg2L8+PED3Q0zsz3Kfffd90RE1P2C5l55ZDF+/Hja2wfbn7ExMxvcJPX0t888wG1mZuUcFmZmVsphYWZmpRwWZmZWymFhZmalHBZmZlbKYWFmZqUcFmZmVsphYWZmpfbKb3D3h/de98WB7oINQrd89LKB7oLZgPCRhZmZlXJYmJlZqUrDQtIXJD0oabmk6yW9QtLhku6WtErSv0naL7fdP8+vzsvHF7ZzQa4/JOn0KvtsZmY7qywsJI0h3Z+5NSKOBoYB04GvA5dHxARgEy/fBH4GsCkijiDdU/jreTtH5fXeAkwh3ah+sN6py8xsr1T1aajhwCslDQcOADYApwE35uXzgWl5emqeJy+fJEm5fkNEbI2IR0g3lz+h4n6bmVlBZWEREY8BlwGPkkJiM3Af8FREbMvNOoAxeXoMsC6vuy23f12xXmedl0iaKaldUntnZ2f/vyAzsyGsytNQI0lHBYeTbqT+KuCMOk27b9WnHpb1VN+xEDE7IlojorWlpe6NnszMbBdVeRrq3cAjEdEZEX8EbgZOAkbk01IAY4H1eboDGAeQl78W6CrW66xjZmZNUGVYPApMlHRAHnuYBKwA7gA+kNu0AQvz9KI8T15+e6QbhC8CpuerpQ4HJgD3VNhvMzOrUdk3uCPibkk3AkuBbcD9wGzgJ8ANkr6Wa3PyKnOAayWtJh1RTM/beVDSAlLQbAPOjYjtVfXbzMx2Vumf+4iIWcCsmvIa6lzNFBHPA2f1sJ1LgEv6vYNmZtYQf4PbzMxKOSzMzKyUw8LMzEo5LMzMrJTDwszMSjkszMyslMPCzMxKOSzMzKyUw8LMzEo5LMzMrJTDwszMSjkszMyslMPCzMxKOSzMzKyUw8LMzEo5LMzMrFRlYSHpzZKWFR5PSzpP0ihJiyWtys8jc3tJukLSakkPSDq+sK223H6VpLae92pmZlWoLCwi4qGIODYijgXeCmwBfgScDyyJiAnAkjwPcAbp/toTgJnAVQCSRpHutnci6Q57s7oDxszMmqNZp6EmAQ9HxH8BU4H5uT4fmJanpwLXRPIrYISk0cDpwOKI6IqITcBiYEqT+m1mZjQvLKYD1+fpQyJiA0B+PjjXxwDrCut05FpP9R1ImimpXVJ7Z2dnP3ffzGxoqzwsJO0HvA/4YVnTOrXopb5jIWJ2RLRGRGtLS0vfO2pmZj1qxpHFGcDSiHg8zz+eTy+RnzfmegcwrrDeWGB9L3UzM2uSZoTFh3n5FBTAIqD7iqY2YGGhfna+KmoisDmfproNmCxpZB7YnpxrZmbWJMOr3LikA4C/BD5VKF8KLJA0A3gUOCvXbwXOBFaTrpw6ByAiuiRdDNyb210UEV1V9tvMzHZUaVhExBbgdTW1J0lXR9W2DeDcHrYzF5hbRR/NzKycv8FtZmalKj2yMLP+98EvLh3oLtggtOCy48sb7QYfWZiZWSmHhZmZlXJYmJlZKYeFmZmVcliYmVkph4WZmZVyWJiZWSmHhZmZlXJYmJlZKYeFmZmVcliYmVkph4WZmZVyWJiZWSmHhZmZlao0LCSNkHSjpN9KWinpbZJGSVosaVV+HpnbStIVklZLekDS8YXttOX2qyS19bxHMzOrQtVHFt8CfhYRfwocA6wEzgeWRMQEYEmeBzgDmJAfM4GrACSNAmYBJwInALO6A8bMzJqjsrCQ9BrgVGAOQES8EBFPAVOB+bnZfGBanp4KXBPJr4ARkkYDpwOLI6IrIjYBi4EpVfXbzMx2VuWRxRuATuBqSfdL+r6kVwGHRMQGgPx8cG4/BlhXWL8j13qq70DSTEntkto7Ozv7/9WYmQ1hVYbFcOB44KqIOA54jpdPOdWjOrXopb5jIWJ2RLRGRGtLS8uu9NfMzHpQZVh0AB0RcXeev5EUHo/n00vk542F9uMK648F1vdSNzOzJqksLCLi98A6SW/OpUnACmAR0H1FUxuwME8vAs7OV0VNBDbn01S3AZMljcwD25NzzczMmmR4xdv/LHCdpP2ANcA5pIBaIGkG8ChwVm57K3AmsBrYktsSEV2SLgbuze0uioiuivttZmYFlYZFRCwDWussmlSnbQDn9rCducDc/u2dmZk1yt/gNjOzUg4LMzMr5bAwM7NSDgszMyvlsDAzs1IOCzMzK+WwMDOzUg4LMzMr5bAwM7NSDgszMyvlsDAzs1IOCzMzK+WwMDOzUg4LMzMr5bAwM7NSDgszMytVaVhIWivpN5KWSWrPtVGSFktalZ9H5rokXSFptaQHJB1f2E5bbr9KUltP+zMzs2o048jiXRFxbER03zHvfGBJREwAluR5gDOACfkxE7gKUrgAs4ATgROAWd0BY2ZmzTEQp6GmAvPz9HxgWqF+TSS/AkZIGg2cDiyOiK6I2AQsBqY0u9NmZkNZ1WERwM8l3SdpZq4dEhEbAPLzwbk+BlhXWLcj13qq70DSTEntkto7Ozv7+WWYmQ1twyve/skRsV7SwcBiSb/tpa3q1KKX+o6FiNnAbIDW1tadlpuZ2a6r9MgiItbn543Aj0hjDo/n00vk5425eQcwrrD6WGB9L3UzM2uSysJC0qskHdg9DUwGlgOLgO4rmtqAhXl6EXB2vipqIrA5n6a6DZgsaWQe2J6ca2Zm1iRVnoY6BPiRpO79/CAifibpXmCBpBnAo8BZuf2twJnAamALcA5ARHRJuhi4N7e7KCK6Kuy3mZnVqCwsImINcEyd+pPApDr1AM7tYVtzgbn93UczM2tMQ6ehJC1ppGZmZnunXo8sJL0COAA4KI8XdF+Z9Brg0Ir7ZmZmg0TZaahPAeeRguE+Xg6Lp4HvVtgvMzMbRHoNi4j4FvAtSZ+NiG83qU9mZjbINDTAHRHflnQSML64TkRcU1G/zMxsEGkoLCRdC7wRWAZsz+UAHBZmZkNAo5fOtgJH5ctbzcxsiGn0G9zLgddX2REzMxu8Gj2yOAhYIekeYGt3MSLeV0mvzMxsUGk0LC6sshNmZja4NXo11C+q7oiZmQ1ejV4N9Qwv30NiP2Bf4LmIeE1VHTMzs8Gj0SOLA4vzkqaR7k1hZmZDwC7dzyIifgyc1s99MTOzQarR01DvL8zuQ/rehb9zYWY2RDR6NdR/K0xvA9YCU/u9N2ZmNig1OmZxzq7uQNIwoB14LCLeK+lw4AZgFLAU+HhEvCBpf9KfD3kr8CTwoYhYm7dxATCD9KdGPhcRvq2qmVkTNXrzo7GSfiRpo6THJd0kaWyD+/g8sLIw/3Xg8oiYAGwihQD5eVNEHAFcntsh6ShgOvAWYApwZQ4gMzNrkkYHuK8GFpHuazEG+L+51qscKO8Bvp/nRRoYvzE3mQ9My9NT8zx5+aTcfipwQ0RsjYhHSPfo9pVYZmZN1GhYtETE1RGxLT/mAS0NrPdN4EvAi3n+dcBTEbEtz3eQwof8vA4gL9+c279Ur7POSyTNlNQuqb2zs7PBl2VmZo1oNCyekPQxScPy42OkcYUeSXovsDEi7iuW6zSNkmW9rfNyIWJ2RLRGRGtLSyM5ZmZmjWo0LD4BfBD4PbAB+ABQNuh9MvA+SWtJA9qnkY40RkjqHlgfC6zP0x3AOIC8/LVAV7FeZx0zM2uCRsPiYqAtIloi4mBSeFzY2woRcUFEjI2I8aQB6tsj4qPAHaSwAWgDFubpRXmevPz2fP+MRcB0SfvnK6kmAPc02G8zM+sHjX7P4s8jYlP3TER0STpuF/f5D8ANkr4G3A/MyfU5wLWSVpOOKKbnfT0oaQGwgvQdj3MjYvvOmzUzs6o0Ghb7SBrZHRiSRvVhXSLiTuDOPL2GOlczRcTzwFk9rH8JcEmj+zMzs/7V6Bv+N4D/lHQjaXD5g/jN28xsyGj0G9zXSGonDVILeH9ErKi0Z2ZmNmj05VTSCtK4gZmZDTG79CfKzcxsaHFYmJlZKYeFmZmVcliYmVkph4WZmZVyWJiZWSmHhZmZlXJYmJlZKYeFmZmVcliYmVkph4WZmZVyWJiZWSmHhZmZlaosLCS9QtI9kn4t6UFJX831wyXdLWmVpH+TtF+u75/nV+fl4wvbuiDXH5J0elV9NjOz+qo8stgKnBYRxwDHAlMkTQS+DlweEROATcCM3H4GsCkijgAuz+2QdBTpFqtvAaYAV0oaVmG/zcysRmVhEcmzeXbf/AjSDZRuzPX5wLQ8PTXPk5dPkqRcvyEitkbEI8Bq6tyW1czMqlPpmIWkYZKWARuBxcDDwFMRsS036QDG5OkxwDqAvHwz8Lpivc46ZmbWBJWGRURsj4hjgbGko4Ej6zXLz+phWU/1HUiaKaldUntnZ+eudtnMzOpoytVQEfEUcCcwERghqft2rmOB9Xm6AxgHkJe/Fugq1uusU9zH7IhojYjWlpaWKl6GmdmQVeXVUC2SRuTpVwLvBlYCdwAfyM3agIV5elGeJy+/PSIi16fnq6UOByYA91TVbzMz29nw8ia7bDQwP1+5tA+wICJukbQCuEHS14D7gTm5/RzgWkmrSUcU0wEi4kFJC4AVwDbg3IjYXmG/zcysRmVhEREPAMfVqa+hztVMEfE8cFYP27oEuKS/+2hmZo3xN7jNzKyUw8LMzEo5LMzMrJTDwszMSjkszMyslMPCzMxKOSzMzKyUw8LMzEo5LMzMrJTDwszMSjkszMyslMPCzMxKOSzMzKyUw8LMzEo5LMzMrJTDwszMSlV5W9Vxku6QtFLSg5I+n+ujJC2WtCo/j8x1SbpC0mpJD0g6vrCtttx+laS2nvZpZmbVqPLIYhvwdxFxJDAROFfSUcD5wJKImAAsyfMAZ5Durz0BmAlcBSlcgFnAiaQ77M3qDhgzM2uOysIiIjZExNI8/QywEhgDTAXm52bzgWl5eipwTSS/AkZIGg2cDiyOiK6I2AQsBqZU1W8zM9tZU8YsJI0n3Y/7buCQiNgAKVCAg3OzMcC6wmodudZT3czMmqTysJD0auAm4LyIeLq3pnVq0Uu9dj8zJbVLau/s7Ny1zpqZWV2VhoWkfUlBcV1E3JzLj+fTS+TnjbneAYwrrD4WWN9LfQcRMTsiWiOitaWlpX9fiJnZEFfl1VAC5gArI+JfCosWAd1XNLUBCwv1s/NVUROBzfk01W3AZEkj88D25FwzM7MmGV7htk8GPg78RtKyXPtH4FJggaQZwKPAWXnZrcCZwGpgC3AOQER0SboYuDe3uygiuirst5mZ1agsLCLiP6g/3gAwqU77AM7tYVtzgbn91zszM+sLf4PbzMxKOSzMzKyUw8LMzEo5LMzMrJTDwszMSjkszMyslMPCzMxKOSzMzKyUw8LMzEo5LMzMrJTDwszMSjkszMyslMPCzMxKOSzMzKyUw8LMzEo5LMzMrJTDwszMSlV5D+65kjZKWl6ojZK0WNKq/Dwy1yXpCkmrJT0g6fjCOm25/SpJbfX2ZWZm1aryyGIeMKWmdj6wJCImAEvyPMAZwIT8mAlcBSlcgFnAicAJwKzugDEzs+apLCwi4pdAV015KjA/T88HphXq10TyK2CEpNHA6cDiiOiKiE3AYnYOIDMzq1izxywOiYgNAPn54FwfA6wrtOvItZ7qO5E0U1K7pPbOzs5+77iZ2VA2WAa4VacWvdR3LkbMjojWiGhtaWnp186ZmQ11zQ6Lx/PpJfLzxlzvAMYV2o0F1vdSNzOzJmp2WCwCuq9oagMWFupn56uiJgKb82mq24DJkkbmge3JuWZmZk00vKoNS7oeeCdwkKQO0lVNlwILJM0AHgXOys1vBc4EVgNbgHMAIqJL0sXAvbndRRFRO2huZmYVqywsIuLDPSyaVKdtAOf2sJ25wNx+7JqZmfXRYBngNjOzQcxhYWZmpRwWZmZWymFhZmalHBZmZlbKYWFmZqUcFmZmVsphYWZmpRwWZmZWymFhZmalHBZmZlbKYWFmZqUcFmZmVsphYWZmpRwWZmZWymFhZmal9piwkDRF0kOSVks6f6D7Y2Y2lOwRYSFpGPBd4AzgKODDko4a2F6ZmQ0de0RYACcAqyNiTUS8ANwATB3gPpmZDRmV3YO7n40B1hXmO4ATiw0kzQRm5tlnJT3UpL4NBQcBTwx0JwYDfewbA90F25F/NzP1z6/mYT0t2FPCQnVqscNMxGxgdnO6M7RIao+I1oHuh1kt/242z55yGqoDGFeYHwusH6C+mJkNOXtKWNwLTJB0uKT9gOnAogHuk5nZkLFHnIaKiG2SPgPcBgwD5kbEgwPcraHEp/dssPLvZpMoIspbmZnZkLannIYyM7MB5LAwM7NSDgvrkaS5kjZKWj7QfTErkjRO0h2SVkp6UNLnB7pPezuPWViPJJ0KPAtcExFHD3R/zLpJGg2Mjoilkg4E7gOmRcSKAe7aXstHFtajiPgl0DXQ/TCrFREbImJpnn4GWEn6Sw9WEYeFme3RJI0HjgPuHtie7N0cFma2x5L0auAm4LyIeHqg+7M3c1iY2R5J0r6koLguIm4e6P7s7RwWZrbHkSRgDrAyIv5loPszFDgsrEeSrgfuAt4sqUPSjIHuk1l2MvBx4DRJy/LjzIHu1N7Ml86amVkpH1mYmVkph4WZmZVyWJiZWSmHhZmZlXJYmJlZKYeF2W6S9HpJN0h6WNIKSbdKepP/Wq/tTfaI26qaDVb5y2E/AuZHxPRcOxY4ZEA7ZtbPfGRhtnveBfwxIv61uxARy4B13fOSxkv6d0lL8+OkXB8t6Zf5C2XLJZ0iaZikeXn+N5K+0PyXZLYzH1mY7Z6jSfdS6M1G4C8j4nlJE4DrgVbgI8BtEXGJpGHAAcCxwJju+4dIGlFd180a57Awq96+wHfy6antwJty/V5gbv6DeD+OiGWS1gBvkPRt4CfAzwekx2Y1fBrKbPc8CLy1pM0XgMeBY0hHFPvBSzeXOhV4DLhW0tkRsSm3uxM4F/h+Nd026xuHhdnuuR3YX9InuwuS/gI4rNDmtcCGiHiR9MfvhuV2hwEbI+J7pL+gerykg4B9IuIm4MvA8c15GWa982kos90QESHpr4BvSjofeB5YC5xXaHYlcJOks4A7gOdy/Z3A30v6I+le52eTbg16taTuD3IXVP4izBrgvzprZmalfBrKzMxKOSzMzKyUw8LMzEo5LMzMrJTDwszMSjkszMyslMPCzMxK/X/KWlDyEVavMgAAAABJRU5ErkJggg==\n",
+      "text/plain": [
+       "<Figure size 432x288 with 1 Axes>"
+      ]
+     },
+     "metadata": {
+      "needs_background": "light"
+     },
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "# visualisation of '1' and '2' states.\n",
+    "colors = [\"mediumseagreen\", \"royalblue\"]\n",
+    "sns.countplot('Class', data = dataset, palette = colors)\n",
+    "plt.title('Class Distributions (1 || 2)', fontsize = 14)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Create datasets for ML "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 465,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>V1</th>\n",
+       "      <th>V2</th>\n",
+       "      <th>V3</th>\n",
+       "      <th>V4</th>\n",
+       "      <th>V5</th>\n",
+       "      <th>V6</th>\n",
+       "      <th>V7</th>\n",
+       "      <th>V8</th>\n",
+       "      <th>V9</th>\n",
+       "      <th>V10</th>\n",
+       "      <th>V11</th>\n",
+       "      <th>V12</th>\n",
+       "      <th>V13</th>\n",
+       "      <th>V14</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>4329.23</td>\n",
+       "      <td>4009.23</td>\n",
+       "      <td>4289.23</td>\n",
+       "      <td>4148.21</td>\n",
+       "      <td>4350.26</td>\n",
+       "      <td>4586.15</td>\n",
+       "      <td>4096.92</td>\n",
+       "      <td>4641.03</td>\n",
+       "      <td>4222.05</td>\n",
+       "      <td>4238.46</td>\n",
+       "      <td>4211.28</td>\n",
+       "      <td>4280.51</td>\n",
+       "      <td>4635.90</td>\n",
+       "      <td>4393.85</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>4324.62</td>\n",
+       "      <td>4004.62</td>\n",
+       "      <td>4293.85</td>\n",
+       "      <td>4148.72</td>\n",
+       "      <td>4342.05</td>\n",
+       "      <td>4586.67</td>\n",
+       "      <td>4097.44</td>\n",
+       "      <td>4638.97</td>\n",
+       "      <td>4210.77</td>\n",
+       "      <td>4226.67</td>\n",
+       "      <td>4207.69</td>\n",
+       "      <td>4279.49</td>\n",
+       "      <td>4632.82</td>\n",
+       "      <td>4384.10</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>4327.69</td>\n",
+       "      <td>4006.67</td>\n",
+       "      <td>4295.38</td>\n",
+       "      <td>4156.41</td>\n",
+       "      <td>4336.92</td>\n",
+       "      <td>4583.59</td>\n",
+       "      <td>4096.92</td>\n",
+       "      <td>4630.26</td>\n",
+       "      <td>4207.69</td>\n",
+       "      <td>4222.05</td>\n",
+       "      <td>4206.67</td>\n",
+       "      <td>4282.05</td>\n",
+       "      <td>4628.72</td>\n",
+       "      <td>4389.23</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>4328.72</td>\n",
+       "      <td>4011.79</td>\n",
+       "      <td>4296.41</td>\n",
+       "      <td>4155.90</td>\n",
+       "      <td>4343.59</td>\n",
+       "      <td>4582.56</td>\n",
+       "      <td>4097.44</td>\n",
+       "      <td>4630.77</td>\n",
+       "      <td>4217.44</td>\n",
+       "      <td>4235.38</td>\n",
+       "      <td>4210.77</td>\n",
+       "      <td>4287.69</td>\n",
+       "      <td>4632.31</td>\n",
+       "      <td>4396.41</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>4326.15</td>\n",
+       "      <td>4011.79</td>\n",
+       "      <td>4292.31</td>\n",
+       "      <td>4151.28</td>\n",
+       "      <td>4347.69</td>\n",
+       "      <td>4586.67</td>\n",
+       "      <td>4095.90</td>\n",
+       "      <td>4627.69</td>\n",
+       "      <td>4210.77</td>\n",
+       "      <td>4244.10</td>\n",
+       "      <td>4212.82</td>\n",
+       "      <td>4288.21</td>\n",
+       "      <td>4632.82</td>\n",
+       "      <td>4398.46</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "        V1       V2       V3       V4       V5       V6       V7       V8  \\\n",
+       "0  4329.23  4009.23  4289.23  4148.21  4350.26  4586.15  4096.92  4641.03   \n",
+       "1  4324.62  4004.62  4293.85  4148.72  4342.05  4586.67  4097.44  4638.97   \n",
+       "2  4327.69  4006.67  4295.38  4156.41  4336.92  4583.59  4096.92  4630.26   \n",
+       "3  4328.72  4011.79  4296.41  4155.90  4343.59  4582.56  4097.44  4630.77   \n",
+       "4  4326.15  4011.79  4292.31  4151.28  4347.69  4586.67  4095.90  4627.69   \n",
+       "\n",
+       "        V9      V10      V11      V12      V13      V14  \n",
+       "0  4222.05  4238.46  4211.28  4280.51  4635.90  4393.85  \n",
+       "1  4210.77  4226.67  4207.69  4279.49  4632.82  4384.10  \n",
+       "2  4207.69  4222.05  4206.67  4282.05  4628.72  4389.23  \n",
+       "3  4217.44  4235.38  4210.77  4287.69  4632.31  4396.41  \n",
+       "4  4210.77  4244.10  4212.82  4288.21  4632.82  4398.46  "
+      ]
+     },
+     "execution_count": 465,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# set 'X' for features and 'y' for the target ('Class').\n",
+    "X = dataset.drop('Class', axis=1)\n",
+    "y = dataset['Class']\n",
+    "X.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Scaler"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "__Note:__ Usually, at this stage I am applying __scaler__ for data. This important process needs to be done before building and training ML models since many ML algorithms don't perform  well if the features are not on relatively similar scales. \n",
+    "However, I always asked myself 'At this stage, how am I supposed to know which scaler fits data in its best way?'. Therefore, I will apply scaler later.\n",
+    "\n",
+    "For experiment reason, __K-Nearest Neighbours model (in its default state and with hyper parameter {'n_neighbors': 1})__ will be applied. The following steps will be done:\n",
+    "\n",
+    "    Step 1. The best 'test_size' attribute split percentage in 'Train\\Test splitting method' will be found. \n",
+    "    Step 2. The best 'random_state' attribute number in 'Train\\Test splitting method' will be found. \n",
+    "    Step 3. The best Scaler for KNN accuracy (with the best hyper parameters) will be found and applyed.\n",
+    "  "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Applying 'test_size' attribute splitting from 10-90 % for Train and Test ('random_state' = 0)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This method will show two kinds of measurment: Accuracy and F1 score. F1 score was choosen instead of Precision and Recall metrics since it is the harmonic mean of them and gives a better measure of the incorrectly classified cases than the accuracy metric."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 466,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>X_train</th>\n",
+       "      <th>X_test</th>\n",
+       "      <th>Accuracy</th>\n",
+       "      <th>Accuracy with hyper parameters</th>\n",
+       "      <th>F1_score</th>\n",
+       "      <th>F1_score with hyper parameters</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>90.0</td>\n",
+       "      <td>10.0</td>\n",
+       "      <td>0.973965</td>\n",
+       "      <td>0.983311</td>\n",
+       "      <td>0.975941</td>\n",
+       "      <td>0.984596</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>80.0</td>\n",
+       "      <td>20.0</td>\n",
+       "      <td>0.970961</td>\n",
+       "      <td>0.979973</td>\n",
+       "      <td>0.973190</td>\n",
+       "      <td>0.981504</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>70.0</td>\n",
+       "      <td>30.0</td>\n",
+       "      <td>0.963721</td>\n",
+       "      <td>0.972847</td>\n",
+       "      <td>0.966687</td>\n",
+       "      <td>0.974887</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>60.0</td>\n",
+       "      <td>40.0</td>\n",
+       "      <td>0.956268</td>\n",
+       "      <td>0.968119</td>\n",
+       "      <td>0.960134</td>\n",
+       "      <td>0.970800</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>50.0</td>\n",
+       "      <td>50.0</td>\n",
+       "      <td>0.947917</td>\n",
+       "      <td>0.959268</td>\n",
+       "      <td>0.952658</td>\n",
+       "      <td>0.962882</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>40.0</td>\n",
+       "      <td>60.0</td>\n",
+       "      <td>0.940574</td>\n",
+       "      <td>0.953706</td>\n",
+       "      <td>0.946169</td>\n",
+       "      <td>0.957963</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>30.0</td>\n",
+       "      <td>70.0</td>\n",
+       "      <td>0.928272</td>\n",
+       "      <td>0.940481</td>\n",
+       "      <td>0.935395</td>\n",
+       "      <td>0.946262</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>20.0</td>\n",
+       "      <td>80.0</td>\n",
+       "      <td>0.910859</td>\n",
+       "      <td>0.926550</td>\n",
+       "      <td>0.920108</td>\n",
+       "      <td>0.933795</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>10.0</td>\n",
+       "      <td>90.0</td>\n",
+       "      <td>0.867869</td>\n",
+       "      <td>0.890125</td>\n",
+       "      <td>0.881998</td>\n",
+       "      <td>0.900437</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   X_train  X_test  Accuracy  Accuracy with hyper parameters  F1_score  \\\n",
+       "1     90.0    10.0  0.973965                        0.983311  0.975941   \n",
+       "2     80.0    20.0  0.970961                        0.979973  0.973190   \n",
+       "3     70.0    30.0  0.963721                        0.972847  0.966687   \n",
+       "4     60.0    40.0  0.956268                        0.968119  0.960134   \n",
+       "5     50.0    50.0  0.947917                        0.959268  0.952658   \n",
+       "6     40.0    60.0  0.940574                        0.953706  0.946169   \n",
+       "7     30.0    70.0  0.928272                        0.940481  0.935395   \n",
+       "8     20.0    80.0  0.910859                        0.926550  0.920108   \n",
+       "9     10.0    90.0  0.867869                        0.890125  0.881998   \n",
+       "\n",
+       "   F1_score with hyper parameters  \n",
+       "1                        0.984596  \n",
+       "2                        0.981504  \n",
+       "3                        0.974887  \n",
+       "4                        0.970800  \n",
+       "5                        0.962882  \n",
+       "6                        0.957963  \n",
+       "7                        0.946262  \n",
+       "8                        0.933795  \n",
+       "9                        0.900437  "
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "split_train_test (X, y, split_size, columns1, table, index)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The results above show, that the best splitting percentage is '90/10'. However, I always remember one rule:\n",
+    "'The more training data you have, the better your model will be. The more testing data you have, the less variance you can expect in your results (ie. accuracy, false positive rate, etc.)'.\n",
+    "\n",
+    "Therefore, __'70/30'__ splitting percentage with:\n",
+    "\n",
+    "       KNN Accuracy:                       0.963721\n",
+    "       KNN Accuracy with hyper parameters: 0.972847\n",
+    "       KNN F1_score:                       0.966687\n",
+    "       KNN F1_score with hyper parameters: 0.974887\n",
+    "\n",
+    "will be choosen for future testing.\n",
+    "\n",
+    "Next step will be applying 'random_state' attribute number from 10-90 for Train and Test with 'test_size'= 0.3."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Finding the best number of  'random_state' attribute from 10-90 for Train and Test ('test_size'=0.3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 467,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th>Random State</th>\n",
+       "      <th>Accuracy</th>\n",
+       "      <th>Accuracy with hyper parameters</th>\n",
+       "      <th>F1_score</th>\n",
+       "      <th>F1_score with hyper parameters</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>10.0</td>\n",
+       "      <td>0.957489</td>\n",
+       "      <td>0.971066</td>\n",
+       "      <td>0.960949</td>\n",
+       "      <td>0.973306</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>20.0</td>\n",
+       "      <td>0.967282</td>\n",
+       "      <td>0.973069</td>\n",
+       "      <td>0.970297</td>\n",
+       "      <td>0.975441</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>30.0</td>\n",
+       "      <td>0.961273</td>\n",
+       "      <td>0.973514</td>\n",
+       "      <td>0.964620</td>\n",
+       "      <td>0.975788</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>42.0</td>\n",
+       "      <td>0.965502</td>\n",
+       "      <td>0.973737</td>\n",
+       "      <td>0.968231</td>\n",
+       "      <td>0.975780</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>50.0</td>\n",
+       "      <td>0.960160</td>\n",
+       "      <td>0.970621</td>\n",
+       "      <td>0.964250</td>\n",
+       "      <td>0.973547</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>60.0</td>\n",
+       "      <td>0.968395</td>\n",
+       "      <td>0.979524</td>\n",
+       "      <td>0.971417</td>\n",
+       "      <td>0.981429</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>70.0</td>\n",
+       "      <td>0.964612</td>\n",
+       "      <td>0.976853</td>\n",
+       "      <td>0.968244</td>\n",
+       "      <td>0.979183</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>80.0</td>\n",
+       "      <td>0.960160</td>\n",
+       "      <td>0.971956</td>\n",
+       "      <td>0.963297</td>\n",
+       "      <td>0.974159</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>90.0</td>\n",
+       "      <td>0.963276</td>\n",
+       "      <td>0.973959</td>\n",
+       "      <td>0.966497</td>\n",
+       "      <td>0.976108</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "        Random State  Accuracy  Accuracy with hyper parameters  F1_score  \\\n",
+       "1  0.0          10.0  0.957489                        0.971066  0.960949   \n",
+       "2  0.0          20.0  0.967282                        0.973069  0.970297   \n",
+       "3  0.0          30.0  0.961273                        0.973514  0.964620   \n",
+       "4  0.0          42.0  0.965502                        0.973737  0.968231   \n",
+       "5  0.0          50.0  0.960160                        0.970621  0.964250   \n",
+       "6  0.0          60.0  0.968395                        0.979524  0.971417   \n",
+       "7  0.0          70.0  0.964612                        0.976853  0.968244   \n",
+       "8  0.0          80.0  0.960160                        0.971956  0.963297   \n",
+       "9  0.0          90.0  0.963276                        0.973959  0.966497   \n",
+       "\n",
+       "   F1_score with hyper parameters  \n",
+       "1                        0.973306  \n",
+       "2                        0.975441  \n",
+       "3                        0.975788  \n",
+       "4                        0.975780  \n",
+       "5                        0.973547  \n",
+       "6                        0.981429  \n",
+       "7                        0.979183  \n",
+       "8                        0.974159  \n",
+       "9                        0.976108  "
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "random_state (X, y, random1, columns2, table1, index)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The results above depict, that the best __'random_state' number = 60__ with:\n",
+    "\n",
+    "       KNN Accuracy:                       0.968395\n",
+    "       KNN Accuracy with hyper parameters: 0.979524\n",
+    "       KNN F1_score:                       0.971417\n",
+    "       KNN F1_score with hyper parameters: 0.981429"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "At this stage, our Train\\Test splitting parameters already found:\n",
+    "   __X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=60)__\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 'Train\\Test' splitting method with new attributes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 468,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# apply 'Train\\Test' splitting method\n",
+    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=60)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Scalers"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Tables above show, that the best __Accuracy__ and __F1 score__ always belong to __KNN with hyper parameter {'n_neighbors': 1}__. Therefore, in order to find the best scaler to improve measurements, Scalers will be applied to it."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 469,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Model</th>\n",
+       "      <th>Scaler</th>\n",
+       "      <th>Accuracy</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>K-Nearest Neighbour:</td>\n",
+       "      <td>StandardScaler(copy=True, with_mean=True, with...</td>\n",
+       "      <td>0.968395</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>K-Nearest Neighbour:</td>\n",
+       "      <td>MinMaxScaler(copy=True, feature_range=(0, 1))</td>\n",
+       "      <td>0.969953</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>K-Nearest Neighbour:</td>\n",
+       "      <td>MaxAbsScaler(copy=True)</td>\n",
+       "      <td>0.980191</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>K-Nearest Neighbour:</td>\n",
+       "      <td>RobustScaler(copy=True, quantile_range=(25.0, ...</td>\n",
+       "      <td>0.971956</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>K-Nearest Neighbour:</td>\n",
+       "      <td>QuantileTransformer(copy=True, ignore_implicit...</td>\n",
+       "      <td>0.971289</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>K-Nearest Neighbour:</td>\n",
+       "      <td>QuantileTransformer(copy=True, ignore_implicit...</td>\n",
+       "      <td>0.963944</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>K-Nearest Neighbour:</td>\n",
+       "      <td>PowerTransformer(copy=True, method='yeo-johnso...</td>\n",
+       "      <td>0.445805</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>K-Nearest Neighbour:</td>\n",
+       "      <td>Normalizer(copy=True, norm='l2')</td>\n",
+       "      <td>0.980414</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                  Model                                             Scaler  \\\n",
+       "0  K-Nearest Neighbour:  StandardScaler(copy=True, with_mean=True, with...   \n",
+       "1  K-Nearest Neighbour:      MinMaxScaler(copy=True, feature_range=(0, 1))   \n",
+       "2  K-Nearest Neighbour:                            MaxAbsScaler(copy=True)   \n",
+       "3  K-Nearest Neighbour:  RobustScaler(copy=True, quantile_range=(25.0, ...   \n",
+       "4  K-Nearest Neighbour:  QuantileTransformer(copy=True, ignore_implicit...   \n",
+       "5  K-Nearest Neighbour:  QuantileTransformer(copy=True, ignore_implicit...   \n",
+       "6  K-Nearest Neighbour:  PowerTransformer(copy=True, method='yeo-johnso...   \n",
+       "7  K-Nearest Neighbour:                   Normalizer(copy=True, norm='l2')   \n",
+       "\n",
+       "   Accuracy  \n",
+       "0  0.968395  \n",
+       "1  0.969953  \n",
+       "2  0.980191  \n",
+       "3  0.971956  \n",
+       "4  0.971289  \n",
+       "5  0.963944  \n",
+       "6  0.445805  \n",
+       "7  0.980414  "
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "scalers (X_train, y_train, X_test, y_test)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The results above depict, that the best scaler is __Normalizer__ with __Accuracy = 0.980414__."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Conclusion"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    " __No scaling, after 'test_size' 70/30:__\n",
+    " \n",
+    " KNN Accuracy:                                         0.963721\n",
+    " \n",
+    " KNN Accuracy with hyper parameter {'n_neighbors': 1}: __0.972847__\n",
+    " \n",
+    "  __No scaling, after 'random_state' 60:__\n",
+    " \n",
+    " KNN Accuracy:                                         0.968395\n",
+    " \n",
+    " KNN Accuracy with hyper parameter {'n_neighbors': 1}: __0.979524__\n",
+    " \n",
+    " __Scaler 'Normalizer' applied:__\n",
+    " \n",
+    " KNN Accuracy with hyper parameter {'n_neighbors': 1}: __0.980414__\n",
+    "  \n",
+    "Dataset was tuned and improved for future calculations __from 0.963721 to 0.980414.__"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Anastasia Rizzo"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

	V1	V2	V3	V4	V5	V6	V7	V8	V9	V10	V11	V12	V13	V14	Class
0	4329.23	4009.23	4289.23	4148.21	4350.26	4586.15	4096.92	4641.03	4222.05	4238.46	4211.28	4280.51	4635.90	4393.85	1
1	4324.62	4004.62	4293.85	4148.72	4342.05	4586.67	4097.44	4638.97	4210.77	4226.67	4207.69	4279.49	4632.82	4384.10	1
2	4327.69	4006.67	4295.38	4156.41	4336.92	4583.59	4096.92	4630.26	4207.69	4222.05	4206.67	4282.05	4628.72	4389.23	1
3	4328.72	4011.79	4296.41	4155.90	4343.59	4582.56	4097.44	4630.77	4217.44	4235.38	4210.77	4287.69	4632.31	4396.41	1
4	4326.15	4011.79	4292.31	4151.28	4347.69	4586.67	4095.90	4627.69	4210.77	4244.10	4212.82	4288.21	4632.82	4398.46	1
	X_train	X_test	Accuracy	Accuracy with hyper parameters	F1_score	F1_score with hyper parameters
1	90.0	10.0	0.973965	0.983311	0.975941	0.984596
2	80.0	20.0	0.970961	0.979973	0.973190	0.981504
3	70.0	30.0	0.963721	0.972847	0.966687	0.974887
4	60.0	40.0	0.956268	0.968119	0.960134	0.970800
5	50.0	50.0	0.947917	0.959268	0.952658	0.962882
6	40.0	60.0	0.940574	0.953706	0.946169	0.957963
7	30.0	70.0	0.928272	0.940481	0.935395	0.946262
8	20.0	80.0	0.910859	0.926550	0.920108	0.933795
9	10.0	90.0	0.867869	0.890125	0.881998	0.900437
	Random State	Accuracy	Accuracy with hyper parameters	F1_score	F1_score with hyper parameters
1	10.0	0.957489	0.971066	0.960949	0.973306
2	20.0	0.967282	0.973069	0.970297	0.975441
3	30.0	0.961273	0.973514	0.964620	0.975788
4	42.0	0.965502	0.973737	0.968231	0.975780
5	50.0	0.960160	0.970621	0.964250	0.973547
6	60.0	0.968395	0.979524	0.971417	0.981429
7	70.0	0.964612	0.976853	0.968244	0.979183
8	80.0	0.960160	0.971956	0.963297	0.974159
9	90.0	0.963276	0.973959	0.966497	0.976108
	Model	Scaler	Accuracy
0	K-Nearest Neighbour:	StandardScaler(copy=True, with_mean=True, with...	0.968395
1	K-Nearest Neighbour:	MinMaxScaler(copy=True, feature_range=(0, 1))	0.969953
2	K-Nearest Neighbour:	MaxAbsScaler(copy=True)	0.980191
3	K-Nearest Neighbour:	RobustScaler(copy=True, quantile_range=(25.0, ...	0.971956
4	K-Nearest Neighbour:	QuantileTransformer(copy=True, ignore_implicit...	0.971289
5	K-Nearest Neighbour:	QuantileTransformer(copy=True, ignore_implicit...	0.963944
6	K-Nearest Neighbour:	PowerTransformer(copy=True, method='yeo-johnso...	0.445805
7	K-Nearest Neighbour:	Normalizer(copy=True, norm='l2')	0.980414