{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Logistic Regression" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Загрузка данных" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "current directory is : /home/tim/02_ML\n", "Directory name is : 02_ML\n" ] } ], "source": [ "# Проверим нашу рабочую директорию\n", "import os\n", "\n", "dirpath = os.getcwd()\n", "print(\"current directory is : \" + dirpath)\n", "foldername = os.path.basename(dirpath)\n", "print(\"Directory name is : \" + foldername)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# При необходимости, поменяем на новую рабочую директорию.\n", "os.chdir('/home/tim/02_ML')" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>Exam 1</th>\n", " <th>Exam 2</th>\n", " <th>Admitted</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>34.623660</td>\n", " <td>78.024693</td>\n", " <td>0</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>30.286711</td>\n", " <td>43.894998</td>\n", " <td>0</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>35.847409</td>\n", " <td>72.902198</td>\n", " <td>0</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>60.182599</td>\n", " <td>86.308552</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>79.032736</td>\n", " <td>75.344376</td>\n", " <td>1</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " Exam 1 Exam 2 Admitted\n", "0 34.623660 78.024693 0\n", "1 30.286711 43.894998 0\n", "2 35.847409 72.902198 0\n", "3 60.182599 86.308552 1\n", "4 79.032736 75.344376 1" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data = pd.read_csv(\"ex2data1.txt\", header=None, names=['Exam 1', 'Exam 2', 'Admitted'])\n", "data.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Plotting of Data" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Text(0, 0.5, 'Exam 2 Score')" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "<Figure size 864x576 with 1 Axes>" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "positive = data[data['Admitted'].isin([1])]\n", "negative = data[data['Admitted'].isin([0])]\n", "\n", "fig, ax = plt.subplots(figsize=(12,8))\n", "ax.scatter(positive['Exam 1'], positive['Exam 2'], s=50, c='b', marker='o', label='Admitted')\n", "ax.scatter(negative['Exam 1'], negative['Exam 2'], s=50, c='r', marker='x', label='Not Admitted')\n", "ax.legend()\n", "ax.set_xlabel('Exam 1 Score')\n", "ax.set_ylabel('Exam 2 Score')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Sigmoid function Сигмоид-функция.\n", "\n", "$ g(z) = \\frac{1}{(1+e^{-z})}$" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "def sigmoid(z):\n", " return 1/ (1 + np.exp(-z))" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[<matplotlib.lines.Line2D at 0x7f68e67874e0>]" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "<Figure size 864x576 with 1 Axes>" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# проверим, что функция работает\n", "nums = np.arange(-10, 10, step=1)\n", "\n", "fig, ax = plt.subplots(figsize=(12,8))\n", "ax.plot(nums, sigmoid(nums), 'r')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Compute the Cost Function and Gradient\n", "\n", "$J(\\Theta) = \\frac{1}{m} \\sum_{i=1}^{m} [ -y^{(i)}log(h_{\\Theta}(x^{(i)})) - (1 - y^{(i)})log(1 - (h_{\\Theta}(x^{(i)}))]$\n", "\n", "$ \\frac{\\partial J(\\Theta)}{\\partial \\Theta_j} = \\frac{1}{m} \\sum_{i=1}^{m} (h_{\\Theta}(x^{(i)}) - y^{(i)})x_j^{(i)}$" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "def costFunction(theta, X, y):\n", " \"\"\"\n", " Функция потерь логистической регрессии\n", " \"\"\"\n", " m=len(y)\n", " \n", " predictions = sigmoid(np.dot(X,theta))\n", " error = (-y * np.log(predictions)) - ((1-y)*np.log(1-predictions))\n", "\n", " cost = 1/m * sum(error)\n", " \n", " grad = 1/m * np.dot(X.transpose(),(predictions - y))\n", " \n", " return cost[0] , grad" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Feature scaling" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "def featureNormalization(X):\n", " \"\"\"\n", " Нормализация данных. Если features (признаки) отличаются на порядки,\n", " то их масштабирование существенно ускоряет работу метода градиентного спуска. \n", " \"\"\"\n", " # среднее и стандартное отклонение:\n", " mean=np.mean(X,axis=0)\n", " std=np.std(X,axis=0)\n", " \n", " X_norm = (X - mean)/std\n", " \n", " return X_norm , mean , std" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(100, 2) (100,)\n", "m = 100 ; n = 2\n" ] } ], "source": [ "X=data.iloc[:,:-1].values\n", "y=data.iloc[:,-1].values\n", "m , n = X.shape[0], X.shape[1]\n", "print(X.shape, y.shape)\n", "print('m =',m,'; n =', n)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Функция потерь для первоначальной theta: 0.693147180559946\n", "Градиент для первоначальной theta (с нулевыми значениями): [[-0.1 ]\n", " [-0.28122914]\n", " [-0.25098615]]\n" ] } ], "source": [ "X, X_mean, X_std = featureNormalization(X)\n", "# для простоты записи вводится фиктивный признак X_{0}= 1 \n", "X= np.append(np.ones((m,1)),X,axis=1)\n", "y=y.reshape(m,1)\n", "# Начальная theta = нули\n", "initial_theta = np.zeros((n+1,1))\n", "# Рсчет функции потерь и градиента для первоначальной theta \n", "cost, grad= costFunction(initial_theta,X,y)\n", "print(\"Функция потерь для первоначальной theta:\",cost)\n", "print(\"Градиент для первоначальной theta (с нулевыми значениями):\",grad)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Gradient Descent" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "def gradientDescent(X,y,theta,alpha,num_iters):\n", " \"\"\"\n", " Take in numpy array X, y and theta and update theta by taking num_iters gradient steps\n", " with learning rate of alpha\n", " \n", " return theta and the list of the cost of theta during each iteration\n", " \"\"\"\n", " \n", " m=len(y)\n", " J_history =[]\n", " \n", " for i in range(num_iters):\n", " cost, grad = costFunction(theta,X,y)\n", " theta = theta - (alpha * grad)\n", " J_history.append(cost)\n", " \n", " return theta , J_history" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "theta , J_history = gradientDescent(X,y,initial_theta,1,400)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Theta optimized by gradient descent: [[1.65947664]\n", " [3.8670477 ]\n", " [3.60347302]]\n", "The cost of the optimized theta: 0.20360044248226664\n" ] } ], "source": [ "print(\"Theta optimized by gradient descent:\",theta)\n", "print(\"The cost of the optimized theta:\",J_history[-1])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Plotting of Cost Function" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Text(0.5, 1.0, 'Cost function using Gradient Descent')" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "<Figure size 432x288 with 1 Axes>" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "theta , J_history = gradientDescent(X,y,initial_theta,0.01,400)\n", "plt.plot(J_history)\n", "plt.xlabel(\"Iteration\")\n", "plt.ylabel(\"$J(\\Theta)$\")\n", "plt.title(\"Cost function using Gradient Descent\")" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Text(0.5, 1.0, 'Cost function using Gradient Descent')" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "<Figure size 432x288 with 1 Axes>" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "theta , J_history = gradientDescent(X,y,initial_theta,0.1,400)\n", "plt.plot(J_history)\n", "plt.xlabel(\"Iteration\")\n", "plt.ylabel(\"$J(\\Theta)$\")\n", "plt.title(\"Cost function using Gradient Descent\")" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Text(0.5, 1.0, 'Cost function using Gradient Descent')" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "<Figure size 432x288 with 1 Axes>" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "theta , J_history = gradientDescent(X,y,initial_theta,1,400)\n", "plt.plot(J_history)\n", "plt.xlabel(\"Iteration\")\n", "plt.ylabel(\"$J(\\Theta)$\")\n", "plt.title(\"Cost function using Gradient Descent\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Plotting the decision boundary\n", "\n", "From Machine Learning Resources:\n", " \n", "$h_\\Theta(x) = g(z)$, where g is the sigmoid function and $z = \\Theta^Tx$\n", "\n", "Since $h_\\Theta(x) \\geq 0.5$ is interpreted as predicting class \"1\", $g(\\Theta^Tx) \\geq 0.5$ or $\\Theta^Tx \\geq 0$ predict class \"1\" \n", "\n", "$\\Theta_1 + \\Theta_2x_2 + \\Theta_3x_3 = 0$ is the decision boundary \n", "\n", "Since, we plot $x_2$ against $x_3$, the boundary line will be the equation $ x_3 = \\frac{-(\\Theta_1+\\Theta_2x_2)}{\\Theta_3}$" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "<matplotlib.legend.Legend at 0x7f68e629a2e8>" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "<Figure size 864x576 with 1 Axes>" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "fig, ax = plt.subplots(figsize=(12,8))\n", "pos , neg = (y==1).reshape(100,1) , (y==0).reshape(100,1)\n", "plt.scatter(X[pos[:,0],1],X[pos[:,0],2],c=\"b\",marker=\"o\",label=\"Admitted\")\n", "plt.scatter(X[neg[:,0],1],X[neg[:,0],2],c=\"r\",marker=\"x\",label=\"Not admitted\")\n", "x_value= np.array([np.min(X[:,1]),np.max(X[:,1])])\n", "y_value=-(theta[0] +theta[1]*x_value)/theta[2]\n", "plt.plot(x_value,y_value, \"g\")\n", "plt.xlabel(\"Exam 1 score\")\n", "plt.ylabel(\"Exam 2 score\")\n", "plt.legend(loc=0)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Prediction" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "def classifierPredict(theta,X):\n", " \"\"\"\n", " take in numpy array of theta and X and predict the class \n", " \"\"\"\n", " predictions = X.dot(theta)\n", " \n", " return predictions>0" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "For a student with scores 45 and 85, we predict an admission probability of 0.7677628875792492\n" ] } ], "source": [ "x_test = np.array([45,85])\n", "x_test = (x_test - X_mean)/X_std\n", "x_test = np.append(np.ones(1),x_test)\n", "prob = sigmoid(x_test.dot(theta))\n", "print(\"For a student with scores 45 and 85, we predict an admission probability of\",prob[0])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Accuracy on training set " ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Train Accuracy: 89 %\n" ] } ], "source": [ "p=classifierPredict(theta,X)\n", "print(\"Train Accuracy:\", sum(p==y)[0],\"%\")" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 2 }