{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Problem Statement \n", "\n", "To predict the fire forest burn area" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "#Importing the necessary libraries\n", "\n", "import matplotlib.pyplot as plt\n", "import math\n", "import numpy as np\n", "import pandas as pd\n", "import random" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# Loading the dataset\n", "db = pd.read_csv('forest_fires.csv')" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>X</th>\n", " <th>Y</th>\n", " <th>month</th>\n", " <th>day</th>\n", " <th>FFMC</th>\n", " <th>DMC</th>\n", " <th>DC</th>\n", " <th>ISI</th>\n", " <th>temp</th>\n", " <th>RH</th>\n", " <th>wind</th>\n", " <th>rain</th>\n", " <th>area</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>7</td>\n", " <td>5</td>\n", " <td>mar</td>\n", " <td>fri</td>\n", " <td>86.2</td>\n", " <td>26.2</td>\n", " <td>94.3</td>\n", " <td>5.1</td>\n", " <td>8.2</td>\n", " <td>51</td>\n", " <td>6.7</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>7</td>\n", " <td>4</td>\n", " <td>oct</td>\n", " <td>tue</td>\n", " <td>90.6</td>\n", " <td>35.4</td>\n", " <td>669.1</td>\n", " <td>6.7</td>\n", " <td>18.0</td>\n", " <td>33</td>\n", " <td>0.9</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>7</td>\n", " <td>4</td>\n", " <td>oct</td>\n", " <td>sat</td>\n", " <td>90.6</td>\n", " <td>43.7</td>\n", " <td>686.9</td>\n", " <td>6.7</td>\n", " <td>14.6</td>\n", " <td>33</td>\n", " <td>1.3</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>8</td>\n", " <td>6</td>\n", " <td>mar</td>\n", " <td>fri</td>\n", " <td>91.7</td>\n", " <td>33.3</td>\n", " <td>77.5</td>\n", " <td>9.0</td>\n", " <td>8.3</td>\n", " <td>97</td>\n", " <td>4.0</td>\n", " <td>0.2</td>\n", " <td>0.0</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>8</td>\n", " <td>6</td>\n", " <td>mar</td>\n", " <td>sun</td>\n", " <td>89.3</td>\n", " <td>51.3</td>\n", " <td>102.2</td>\n", " <td>9.6</td>\n", " <td>11.4</td>\n", " <td>99</td>\n", " <td>1.8</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " X Y month day FFMC DMC DC ISI temp RH wind rain area\n", "0 7 5 mar fri 86.2 26.2 94.3 5.1 8.2 51 6.7 0.0 0.0\n", "1 7 4 oct tue 90.6 35.4 669.1 6.7 18.0 33 0.9 0.0 0.0\n", "2 7 4 oct sat 90.6 43.7 686.9 6.7 14.6 33 1.3 0.0 0.0\n", "3 8 6 mar fri 91.7 33.3 77.5 9.0 8.3 97 4.0 0.2 0.0\n", "4 8 6 mar sun 89.3 51.3 102.2 9.6 11.4 99 1.8 0.0 0.0" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Printing the first 5 rows of the loaded Dataset\n", "db.head()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "<class 'pandas.core.frame.DataFrame'>\n", "RangeIndex: 517 entries, 0 to 516\n", "Data columns (total 13 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 X 517 non-null int64 \n", " 1 Y 517 non-null int64 \n", " 2 month 517 non-null object \n", " 3 day 517 non-null object \n", " 4 FFMC 517 non-null float64\n", " 5 DMC 517 non-null float64\n", " 6 DC 517 non-null float64\n", " 7 ISI 517 non-null float64\n", " 8 temp 517 non-null float64\n", " 9 RH 517 non-null int64 \n", " 10 wind 517 non-null float64\n", " 11 rain 517 non-null float64\n", " 12 area 517 non-null float64\n", "dtypes: float64(8), int64(3), object(2)\n", "memory usage: 52.6+ KB\n" ] } ], "source": [ "# Extracting the dataset information\n", "db.info()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[<AxesSubplot:title={'center':'X'}>,\n", " <AxesSubplot:title={'center':'Y'}>,\n", " <AxesSubplot:title={'center':'FFMC'}>],\n", " [<AxesSubplot:title={'center':'DMC'}>,\n", " <AxesSubplot:title={'center':'DC'}>,\n", " <AxesSubplot:title={'center':'ISI'}>],\n", " [<AxesSubplot:title={'center':'temp'}>,\n", " <AxesSubplot:title={'center':'RH'}>,\n", " <AxesSubplot:title={'center':'wind'}>],\n", " [<AxesSubplot:title={'center':'rain'}>,\n", " <AxesSubplot:title={'center':'area'}>, <AxesSubplot:>]],\n", " dtype=object)" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "<Figure size 1440x1080 with 12 Axes>" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "## Plotting\n", "\n", "%matplotlib inline\n", "import matplotlib.pyplot as plt\n", "\n", "plt.style.use('seaborn')\n", "db.hist(bins=30, figsize=(20,15))" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "# Converting days and months into integers \n", "\n", "db.month.replace(('jan','feb','mar','apr','may','jun','jul','aug','sep','oct','nov','dec'),(1,2,3,4,5,6,7,8,9,10,11,12), inplace=True)\n", "db.day.replace(('mon','tue','wed','thu','fri','sat','sun'),(1,2,3,4,5,6,7), inplace=True)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>X</th>\n", " <th>Y</th>\n", " <th>month</th>\n", " <th>day</th>\n", " <th>FFMC</th>\n", " <th>DMC</th>\n", " <th>DC</th>\n", " <th>ISI</th>\n", " <th>temp</th>\n", " <th>RH</th>\n", " <th>wind</th>\n", " <th>rain</th>\n", " <th>area</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>7</td>\n", " <td>5</td>\n", " <td>3</td>\n", " <td>5</td>\n", " <td>86.2</td>\n", " <td>26.2</td>\n", " <td>94.3</td>\n", " <td>5.1</td>\n", " <td>8.2</td>\n", " <td>51</td>\n", " <td>6.7</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>7</td>\n", " <td>4</td>\n", " <td>10</td>\n", " <td>2</td>\n", " <td>90.6</td>\n", " <td>35.4</td>\n", " <td>669.1</td>\n", " <td>6.7</td>\n", " <td>18.0</td>\n", " <td>33</td>\n", " <td>0.9</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>7</td>\n", " <td>4</td>\n", " <td>10</td>\n", " <td>6</td>\n", " <td>90.6</td>\n", " <td>43.7</td>\n", " <td>686.9</td>\n", " <td>6.7</td>\n", " <td>14.6</td>\n", " <td>33</td>\n", " <td>1.3</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>8</td>\n", " <td>6</td>\n", " <td>3</td>\n", " <td>5</td>\n", " <td>91.7</td>\n", " <td>33.3</td>\n", " <td>77.5</td>\n", " <td>9.0</td>\n", " <td>8.3</td>\n", " <td>97</td>\n", " <td>4.0</td>\n", " <td>0.2</td>\n", " <td>0.0</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>8</td>\n", " <td>6</td>\n", " <td>3</td>\n", " <td>7</td>\n", " <td>89.3</td>\n", " <td>51.3</td>\n", " <td>102.2</td>\n", " <td>9.6</td>\n", " <td>11.4</td>\n", " <td>99</td>\n", " <td>1.8</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " </tr>\n", " <tr>\n", " <th>5</th>\n", " <td>8</td>\n", " <td>6</td>\n", " <td>8</td>\n", " <td>7</td>\n", " <td>92.3</td>\n", " <td>85.3</td>\n", " <td>488.0</td>\n", " <td>14.7</td>\n", " <td>22.2</td>\n", " <td>29</td>\n", " <td>5.4</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " </tr>\n", " <tr>\n", " <th>6</th>\n", " <td>8</td>\n", " <td>6</td>\n", " <td>8</td>\n", " <td>1</td>\n", " <td>92.3</td>\n", " <td>88.9</td>\n", " <td>495.6</td>\n", " <td>8.5</td>\n", " <td>24.1</td>\n", " <td>27</td>\n", " <td>3.1</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " </tr>\n", " <tr>\n", " <th>7</th>\n", " <td>8</td>\n", " <td>6</td>\n", " <td>8</td>\n", " <td>1</td>\n", " <td>91.5</td>\n", " <td>145.4</td>\n", " <td>608.2</td>\n", " <td>10.7</td>\n", " <td>8.0</td>\n", " <td>86</td>\n", " <td>2.2</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " </tr>\n", " <tr>\n", " <th>8</th>\n", " <td>8</td>\n", " <td>6</td>\n", " <td>9</td>\n", " <td>2</td>\n", " <td>91.0</td>\n", " <td>129.5</td>\n", " <td>692.6</td>\n", " <td>7.0</td>\n", " <td>13.1</td>\n", " <td>63</td>\n", " <td>5.4</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " </tr>\n", " <tr>\n", " <th>9</th>\n", " <td>7</td>\n", " <td>5</td>\n", " <td>9</td>\n", " <td>6</td>\n", " <td>92.5</td>\n", " <td>88.0</td>\n", " <td>698.6</td>\n", " <td>7.1</td>\n", " <td>22.8</td>\n", " <td>40</td>\n", " <td>4.0</td>\n", " <td>0.0</td>\n", " <td>0.0</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " X Y month day FFMC DMC DC ISI temp RH wind rain area\n", "0 7 5 3 5 86.2 26.2 94.3 5.1 8.2 51 6.7 0.0 0.0\n", "1 7 4 10 2 90.6 35.4 669.1 6.7 18.0 33 0.9 0.0 0.0\n", "2 7 4 10 6 90.6 43.7 686.9 6.7 14.6 33 1.3 0.0 0.0\n", "3 8 6 3 5 91.7 33.3 77.5 9.0 8.3 97 4.0 0.2 0.0\n", "4 8 6 3 7 89.3 51.3 102.2 9.6 11.4 99 1.8 0.0 0.0\n", "5 8 6 8 7 92.3 85.3 488.0 14.7 22.2 29 5.4 0.0 0.0\n", "6 8 6 8 1 92.3 88.9 495.6 8.5 24.1 27 3.1 0.0 0.0\n", "7 8 6 8 1 91.5 145.4 608.2 10.7 8.0 86 2.2 0.0 0.0\n", "8 8 6 9 2 91.0 129.5 692.6 7.0 13.1 63 5.4 0.0 0.0\n", "9 7 5 9 6 92.5 88.0 698.6 7.1 22.8 40 4.0 0.0 0.0" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Printing after replacement\n", "db.head(10)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>X</th>\n", " <th>Y</th>\n", " <th>month</th>\n", " <th>day</th>\n", " <th>FFMC</th>\n", " <th>DMC</th>\n", " <th>DC</th>\n", " <th>ISI</th>\n", " <th>temp</th>\n", " <th>RH</th>\n", " <th>wind</th>\n", " <th>rain</th>\n", " <th>area</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>X</th>\n", " <td>1.000000</td>\n", " <td>0.539548</td>\n", " <td>-0.065003</td>\n", " <td>-0.024922</td>\n", " <td>-0.021039</td>\n", " <td>-0.048384</td>\n", " <td>-0.085916</td>\n", " <td>0.006210</td>\n", " <td>-0.051258</td>\n", " <td>0.085223</td>\n", " <td>0.018798</td>\n", " <td>0.065387</td>\n", " <td>0.063385</td>\n", " </tr>\n", " <tr>\n", " <th>Y</th>\n", " <td>0.539548</td>\n", " <td>1.000000</td>\n", " <td>-0.066292</td>\n", " <td>-0.005453</td>\n", " <td>-0.046308</td>\n", " <td>0.007782</td>\n", " <td>-0.101178</td>\n", " <td>-0.024488</td>\n", " <td>-0.024103</td>\n", " <td>0.062221</td>\n", " <td>-0.020341</td>\n", " <td>0.033234</td>\n", " <td>0.044873</td>\n", " </tr>\n", " <tr>\n", " <th>month</th>\n", " <td>-0.065003</td>\n", " <td>-0.066292</td>\n", " <td>1.000000</td>\n", " <td>-0.050837</td>\n", " <td>0.291477</td>\n", " <td>0.466645</td>\n", " <td>0.868698</td>\n", " <td>0.186597</td>\n", " <td>0.368842</td>\n", " <td>-0.095280</td>\n", " <td>-0.086368</td>\n", " <td>0.013438</td>\n", " <td>0.056496</td>\n", " </tr>\n", " <tr>\n", " <th>day</th>\n", " <td>-0.024922</td>\n", " <td>-0.005453</td>\n", " <td>-0.050837</td>\n", " <td>1.000000</td>\n", " <td>-0.041068</td>\n", " <td>0.062870</td>\n", " <td>0.000105</td>\n", " <td>0.032909</td>\n", " <td>0.052190</td>\n", " <td>0.092151</td>\n", " <td>0.032478</td>\n", " <td>-0.048340</td>\n", " <td>0.023226</td>\n", " </tr>\n", " <tr>\n", " <th>FFMC</th>\n", " <td>-0.021039</td>\n", " <td>-0.046308</td>\n", " <td>0.291477</td>\n", " <td>-0.041068</td>\n", " <td>1.000000</td>\n", " <td>0.382619</td>\n", " <td>0.330512</td>\n", " <td>0.531805</td>\n", " <td>0.431532</td>\n", " <td>-0.300995</td>\n", " <td>-0.028485</td>\n", " <td>0.056702</td>\n", " <td>0.040122</td>\n", " </tr>\n", " <tr>\n", " <th>DMC</th>\n", " <td>-0.048384</td>\n", " <td>0.007782</td>\n", " <td>0.466645</td>\n", " <td>0.062870</td>\n", " <td>0.382619</td>\n", " <td>1.000000</td>\n", " <td>0.682192</td>\n", " <td>0.305128</td>\n", " <td>0.469594</td>\n", " <td>0.073795</td>\n", " <td>-0.105342</td>\n", " <td>0.074790</td>\n", " <td>0.072994</td>\n", " </tr>\n", " <tr>\n", " <th>DC</th>\n", " <td>-0.085916</td>\n", " <td>-0.101178</td>\n", " <td>0.868698</td>\n", " <td>0.000105</td>\n", " <td>0.330512</td>\n", " <td>0.682192</td>\n", " <td>1.000000</td>\n", " <td>0.229154</td>\n", " <td>0.496208</td>\n", " <td>-0.039192</td>\n", " <td>-0.203466</td>\n", " <td>0.035861</td>\n", " <td>0.049383</td>\n", " </tr>\n", " <tr>\n", " <th>ISI</th>\n", " <td>0.006210</td>\n", " <td>-0.024488</td>\n", " <td>0.186597</td>\n", " <td>0.032909</td>\n", " <td>0.531805</td>\n", " <td>0.305128</td>\n", " <td>0.229154</td>\n", " <td>1.000000</td>\n", " <td>0.394287</td>\n", " <td>-0.132517</td>\n", " <td>0.106826</td>\n", " <td>0.067668</td>\n", " <td>0.008258</td>\n", " </tr>\n", " <tr>\n", " <th>temp</th>\n", " <td>-0.051258</td>\n", " <td>-0.024103</td>\n", " <td>0.368842</td>\n", " <td>0.052190</td>\n", " <td>0.431532</td>\n", " <td>0.469594</td>\n", " <td>0.496208</td>\n", " <td>0.394287</td>\n", " <td>1.000000</td>\n", " <td>-0.527390</td>\n", " <td>-0.227116</td>\n", " <td>0.069491</td>\n", " <td>0.097844</td>\n", " </tr>\n", " <tr>\n", " <th>RH</th>\n", " <td>0.085223</td>\n", " <td>0.062221</td>\n", " <td>-0.095280</td>\n", " <td>0.092151</td>\n", " <td>-0.300995</td>\n", " <td>0.073795</td>\n", " <td>-0.039192</td>\n", " <td>-0.132517</td>\n", " <td>-0.527390</td>\n", " <td>1.000000</td>\n", " <td>0.069410</td>\n", " <td>0.099751</td>\n", " <td>-0.075519</td>\n", " </tr>\n", " <tr>\n", " <th>wind</th>\n", " <td>0.018798</td>\n", " <td>-0.020341</td>\n", " <td>-0.086368</td>\n", " <td>0.032478</td>\n", " <td>-0.028485</td>\n", " <td>-0.105342</td>\n", " <td>-0.203466</td>\n", " <td>0.106826</td>\n", " <td>-0.227116</td>\n", " <td>0.069410</td>\n", " <td>1.000000</td>\n", " <td>0.061119</td>\n", " <td>0.012317</td>\n", " </tr>\n", " <tr>\n", " <th>rain</th>\n", " <td>0.065387</td>\n", " <td>0.033234</td>\n", " <td>0.013438</td>\n", " <td>-0.048340</td>\n", " <td>0.056702</td>\n", " <td>0.074790</td>\n", " <td>0.035861</td>\n", " <td>0.067668</td>\n", " <td>0.069491</td>\n", " <td>0.099751</td>\n", " <td>0.061119</td>\n", " <td>1.000000</td>\n", " <td>-0.007366</td>\n", " </tr>\n", " <tr>\n", " <th>area</th>\n", " <td>0.063385</td>\n", " <td>0.044873</td>\n", " <td>0.056496</td>\n", " <td>0.023226</td>\n", " <td>0.040122</td>\n", " <td>0.072994</td>\n", " <td>0.049383</td>\n", " <td>0.008258</td>\n", " <td>0.097844</td>\n", " <td>-0.075519</td>\n", " <td>0.012317</td>\n", " <td>-0.007366</td>\n", " <td>1.000000</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " X Y month day FFMC DMC DC \\\n", "X 1.000000 0.539548 -0.065003 -0.024922 -0.021039 -0.048384 -0.085916 \n", "Y 0.539548 1.000000 -0.066292 -0.005453 -0.046308 0.007782 -0.101178 \n", "month -0.065003 -0.066292 1.000000 -0.050837 0.291477 0.466645 0.868698 \n", "day -0.024922 -0.005453 -0.050837 1.000000 -0.041068 0.062870 0.000105 \n", "FFMC -0.021039 -0.046308 0.291477 -0.041068 1.000000 0.382619 0.330512 \n", "DMC -0.048384 0.007782 0.466645 0.062870 0.382619 1.000000 0.682192 \n", "DC -0.085916 -0.101178 0.868698 0.000105 0.330512 0.682192 1.000000 \n", "ISI 0.006210 -0.024488 0.186597 0.032909 0.531805 0.305128 0.229154 \n", "temp -0.051258 -0.024103 0.368842 0.052190 0.431532 0.469594 0.496208 \n", "RH 0.085223 0.062221 -0.095280 0.092151 -0.300995 0.073795 -0.039192 \n", "wind 0.018798 -0.020341 -0.086368 0.032478 -0.028485 -0.105342 -0.203466 \n", "rain 0.065387 0.033234 0.013438 -0.048340 0.056702 0.074790 0.035861 \n", "area 0.063385 0.044873 0.056496 0.023226 0.040122 0.072994 0.049383 \n", "\n", " ISI temp RH wind rain area \n", "X 0.006210 -0.051258 0.085223 0.018798 0.065387 0.063385 \n", "Y -0.024488 -0.024103 0.062221 -0.020341 0.033234 0.044873 \n", "month 0.186597 0.368842 -0.095280 -0.086368 0.013438 0.056496 \n", "day 0.032909 0.052190 0.092151 0.032478 -0.048340 0.023226 \n", "FFMC 0.531805 0.431532 -0.300995 -0.028485 0.056702 0.040122 \n", "DMC 0.305128 0.469594 0.073795 -0.105342 0.074790 0.072994 \n", "DC 0.229154 0.496208 -0.039192 -0.203466 0.035861 0.049383 \n", "ISI 1.000000 0.394287 -0.132517 0.106826 0.067668 0.008258 \n", "temp 0.394287 1.000000 -0.527390 -0.227116 0.069491 0.097844 \n", "RH -0.132517 -0.527390 1.000000 0.069410 0.099751 -0.075519 \n", "wind 0.106826 -0.227116 0.069410 1.000000 0.061119 0.012317 \n", "rain 0.067668 0.069491 0.099751 0.061119 1.000000 -0.007366 \n", "area 0.008258 0.097844 -0.075519 0.012317 -0.007366 1.000000 " ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Correlation analysis of database\n", "\n", "db.corr()" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>X</th>\n", " <th>Y</th>\n", " <th>month</th>\n", " <th>day</th>\n", " <th>FFMC</th>\n", " <th>DMC</th>\n", " <th>DC</th>\n", " <th>ISI</th>\n", " <th>temp</th>\n", " <th>RH</th>\n", " <th>wind</th>\n", " <th>rain</th>\n", " <th>area</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>count</th>\n", " <td>517.000000</td>\n", " <td>517.000000</td>\n", " <td>517.000000</td>\n", " <td>517.000000</td>\n", " <td>517.000000</td>\n", " <td>517.000000</td>\n", " <td>517.000000</td>\n", " <td>517.000000</td>\n", " <td>517.000000</td>\n", " <td>517.000000</td>\n", " <td>517.000000</td>\n", " <td>517.000000</td>\n", " <td>517.000000</td>\n", " </tr>\n", " <tr>\n", " <th>mean</th>\n", " <td>4.669246</td>\n", " <td>4.299807</td>\n", " <td>7.475822</td>\n", " <td>4.259188</td>\n", " <td>90.644681</td>\n", " <td>110.872340</td>\n", " <td>547.940039</td>\n", " <td>9.021663</td>\n", " <td>18.889168</td>\n", " <td>44.288201</td>\n", " <td>4.017602</td>\n", " <td>0.021663</td>\n", " <td>12.847292</td>\n", " </tr>\n", " <tr>\n", " <th>std</th>\n", " <td>2.313778</td>\n", " <td>1.229900</td>\n", " <td>2.275990</td>\n", " <td>2.072929</td>\n", " <td>5.520111</td>\n", " <td>64.046482</td>\n", " <td>248.066192</td>\n", " <td>4.559477</td>\n", " <td>5.806625</td>\n", " <td>16.317469</td>\n", " <td>1.791653</td>\n", " <td>0.295959</td>\n", " <td>63.655818</td>\n", " </tr>\n", " <tr>\n", " <th>min</th>\n", " <td>1.000000</td>\n", " <td>2.000000</td>\n", " <td>1.000000</td>\n", " <td>1.000000</td>\n", " <td>18.700000</td>\n", " <td>1.100000</td>\n", " <td>7.900000</td>\n", " <td>0.000000</td>\n", " <td>2.200000</td>\n", " <td>15.000000</td>\n", " <td>0.400000</td>\n", " <td>0.000000</td>\n", " <td>0.000000</td>\n", " </tr>\n", " <tr>\n", " <th>25%</th>\n", " <td>3.000000</td>\n", " <td>4.000000</td>\n", " <td>7.000000</td>\n", " <td>2.000000</td>\n", " <td>90.200000</td>\n", " <td>68.600000</td>\n", " <td>437.700000</td>\n", " <td>6.500000</td>\n", " <td>15.500000</td>\n", " <td>33.000000</td>\n", " <td>2.700000</td>\n", " <td>0.000000</td>\n", " <td>0.000000</td>\n", " </tr>\n", " <tr>\n", " <th>50%</th>\n", " <td>4.000000</td>\n", " <td>4.000000</td>\n", " <td>8.000000</td>\n", " <td>5.000000</td>\n", " <td>91.600000</td>\n", " <td>108.300000</td>\n", " <td>664.200000</td>\n", " <td>8.400000</td>\n", " <td>19.300000</td>\n", " <td>42.000000</td>\n", " <td>4.000000</td>\n", " <td>0.000000</td>\n", " <td>0.520000</td>\n", " </tr>\n", " <tr>\n", " <th>75%</th>\n", " <td>7.000000</td>\n", " <td>5.000000</td>\n", " <td>9.000000</td>\n", " <td>6.000000</td>\n", " <td>92.900000</td>\n", " <td>142.400000</td>\n", " <td>713.900000</td>\n", " <td>10.800000</td>\n", " <td>22.800000</td>\n", " <td>53.000000</td>\n", " <td>4.900000</td>\n", " <td>0.000000</td>\n", " <td>6.570000</td>\n", " </tr>\n", " <tr>\n", " <th>max</th>\n", " <td>9.000000</td>\n", " <td>9.000000</td>\n", " <td>12.000000</td>\n", " <td>7.000000</td>\n", " <td>96.200000</td>\n", " <td>291.300000</td>\n", " <td>860.600000</td>\n", " <td>56.100000</td>\n", " <td>33.300000</td>\n", " <td>100.000000</td>\n", " <td>9.400000</td>\n", " <td>6.400000</td>\n", " <td>1090.840000</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " X Y month day FFMC DMC \\\n", "count 517.000000 517.000000 517.000000 517.000000 517.000000 517.000000 \n", "mean 4.669246 4.299807 7.475822 4.259188 90.644681 110.872340 \n", "std 2.313778 1.229900 2.275990 2.072929 5.520111 64.046482 \n", "min 1.000000 2.000000 1.000000 1.000000 18.700000 1.100000 \n", "25% 3.000000 4.000000 7.000000 2.000000 90.200000 68.600000 \n", "50% 4.000000 4.000000 8.000000 5.000000 91.600000 108.300000 \n", "75% 7.000000 5.000000 9.000000 6.000000 92.900000 142.400000 \n", "max 9.000000 9.000000 12.000000 7.000000 96.200000 291.300000 \n", "\n", " DC ISI temp RH wind rain \\\n", "count 517.000000 517.000000 517.000000 517.000000 517.000000 517.000000 \n", "mean 547.940039 9.021663 18.889168 44.288201 4.017602 0.021663 \n", "std 248.066192 4.559477 5.806625 16.317469 1.791653 0.295959 \n", "min 7.900000 0.000000 2.200000 15.000000 0.400000 0.000000 \n", "25% 437.700000 6.500000 15.500000 33.000000 2.700000 0.000000 \n", "50% 664.200000 8.400000 19.300000 42.000000 4.000000 0.000000 \n", "75% 713.900000 10.800000 22.800000 53.000000 4.900000 0.000000 \n", "max 860.600000 56.100000 33.300000 100.000000 9.400000 6.400000 \n", "\n", " area \n", "count 517.000000 \n", "mean 12.847292 \n", "std 63.655818 \n", "min 0.000000 \n", "25% 0.000000 \n", "50% 0.520000 \n", "75% 6.570000 \n", "max 1090.840000 " ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "db.describe()" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import train_test_split\n", "\n", "train_set, test_set = train_test_split(db, test_size=0.2, random_state=42)\n", "work_set = train_set.copy() " ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>X</th>\n", " <th>Y</th>\n", " <th>month</th>\n", " <th>day</th>\n", " <th>FFMC</th>\n", " <th>DMC</th>\n", " <th>DC</th>\n", " <th>ISI</th>\n", " <th>temp</th>\n", " <th>RH</th>\n", " <th>wind</th>\n", " <th>rain</th>\n", " <th>area</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>329</th>\n", " <td>4</td>\n", " <td>3</td>\n", " <td>9</td>\n", " <td>6</td>\n", " <td>92.2</td>\n", " <td>102.3</td>\n", " <td>751.5</td>\n", " <td>8.4</td>\n", " <td>23.5</td>\n", " <td>27</td>\n", " <td>4.0</td>\n", " <td>0.0</td>\n", " <td>3.33</td>\n", " </tr>\n", " <tr>\n", " <th>173</th>\n", " <td>4</td>\n", " <td>4</td>\n", " <td>9</td>\n", " <td>1</td>\n", " <td>90.9</td>\n", " <td>126.5</td>\n", " <td>686.5</td>\n", " <td>7.0</td>\n", " <td>17.7</td>\n", " <td>39</td>\n", " <td>2.2</td>\n", " <td>0.0</td>\n", " <td>3.07</td>\n", " </tr>\n", " <tr>\n", " <th>272</th>\n", " <td>2</td>\n", " <td>5</td>\n", " <td>8</td>\n", " <td>2</td>\n", " <td>92.1</td>\n", " <td>152.6</td>\n", " <td>658.2</td>\n", " <td>14.3</td>\n", " <td>20.2</td>\n", " <td>47</td>\n", " <td>4.0</td>\n", " <td>0.0</td>\n", " <td>3.09</td>\n", " </tr>\n", " <tr>\n", " <th>497</th>\n", " <td>3</td>\n", " <td>4</td>\n", " <td>8</td>\n", " <td>2</td>\n", " <td>96.1</td>\n", " <td>181.1</td>\n", " <td>671.2</td>\n", " <td>14.3</td>\n", " <td>32.3</td>\n", " <td>27</td>\n", " <td>2.2</td>\n", " <td>0.0</td>\n", " <td>14.68</td>\n", " </tr>\n", " <tr>\n", " <th>182</th>\n", " <td>5</td>\n", " <td>4</td>\n", " <td>2</td>\n", " <td>7</td>\n", " <td>86.8</td>\n", " <td>15.6</td>\n", " <td>48.3</td>\n", " <td>3.9</td>\n", " <td>12.4</td>\n", " <td>53</td>\n", " <td>2.2</td>\n", " <td>0.0</td>\n", " <td>6.38</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " X Y month day FFMC DMC DC ISI temp RH wind rain area\n", "329 4 3 9 6 92.2 102.3 751.5 8.4 23.5 27 4.0 0.0 3.33\n", "173 4 4 9 1 90.9 126.5 686.5 7.0 17.7 39 2.2 0.0 3.07\n", "272 2 5 8 2 92.1 152.6 658.2 14.3 20.2 47 4.0 0.0 3.09\n", "497 3 4 8 2 96.1 181.1 671.2 14.3 32.3 27 2.2 0.0 14.68\n", "182 5 4 2 7 86.8 15.6 48.3 3.9 12.4 53 2.2 0.0 6.38" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train_set.head()" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>X</th>\n", " <th>Y</th>\n", " <th>month</th>\n", " <th>day</th>\n", " <th>FFMC</th>\n", " <th>DMC</th>\n", " <th>DC</th>\n", " <th>ISI</th>\n", " <th>temp</th>\n", " <th>RH</th>\n", " <th>wind</th>\n", " <th>rain</th>\n", " <th>area</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>304</th>\n", " <td>6</td>\n", " <td>5</td>\n", " <td>5</td>\n", " <td>6</td>\n", " <td>85.1</td>\n", " <td>28.0</td>\n", " <td>113.8</td>\n", " <td>3.5</td>\n", " <td>11.3</td>\n", " <td>94</td>\n", " <td>4.9</td>\n", " <td>0.0</td>\n", " <td>0.00</td>\n", " </tr>\n", " <tr>\n", " <th>501</th>\n", " <td>7</td>\n", " <td>5</td>\n", " <td>8</td>\n", " <td>2</td>\n", " <td>96.1</td>\n", " <td>181.1</td>\n", " <td>671.2</td>\n", " <td>14.3</td>\n", " <td>21.6</td>\n", " <td>65</td>\n", " <td>4.9</td>\n", " <td>0.8</td>\n", " <td>0.00</td>\n", " </tr>\n", " <tr>\n", " <th>441</th>\n", " <td>8</td>\n", " <td>6</td>\n", " <td>8</td>\n", " <td>1</td>\n", " <td>92.1</td>\n", " <td>207.0</td>\n", " <td>672.6</td>\n", " <td>8.2</td>\n", " <td>25.5</td>\n", " <td>29</td>\n", " <td>1.8</td>\n", " <td>0.0</td>\n", " <td>1.23</td>\n", " </tr>\n", " <tr>\n", " <th>153</th>\n", " <td>5</td>\n", " <td>4</td>\n", " <td>9</td>\n", " <td>5</td>\n", " <td>94.3</td>\n", " <td>85.1</td>\n", " <td>692.3</td>\n", " <td>15.9</td>\n", " <td>20.1</td>\n", " <td>47</td>\n", " <td>4.9</td>\n", " <td>0.0</td>\n", " <td>1.46</td>\n", " </tr>\n", " <tr>\n", " <th>503</th>\n", " <td>2</td>\n", " <td>4</td>\n", " <td>8</td>\n", " <td>3</td>\n", " <td>94.5</td>\n", " <td>139.4</td>\n", " <td>689.1</td>\n", " <td>20.0</td>\n", " <td>29.2</td>\n", " <td>30</td>\n", " <td>4.9</td>\n", " <td>0.0</td>\n", " <td>1.95</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " X Y month day FFMC DMC DC ISI temp RH wind rain area\n", "304 6 5 5 6 85.1 28.0 113.8 3.5 11.3 94 4.9 0.0 0.00\n", "501 7 5 8 2 96.1 181.1 671.2 14.3 21.6 65 4.9 0.8 0.00\n", "441 8 6 8 1 92.1 207.0 672.6 8.2 25.5 29 1.8 0.0 1.23\n", "153 5 4 9 5 94.3 85.1 692.3 15.9 20.1 47 4.9 0.0 1.46\n", "503 2 4 8 3 94.5 139.4 689.1 20.0 29.2 30 4.9 0.0 1.95" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "test_set.head()" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "<AxesSubplot:xlabel='X', ylabel='Y'>" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "<Figure size 576x396 with 1 Axes>" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "work_set.plot(kind='scatter', x='X', y='Y', alpha=0.1, s=300)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "<AxesSubplot:xlabel='X', ylabel='Y'>" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "<Figure size 576x396 with 1 Axes>" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "work_set.plot(kind='scatter', x='X', y='Y', alpha=0.2, s=20*work_set['area'])" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>X</th>\n", " <th>Y</th>\n", " <th>month</th>\n", " <th>day</th>\n", " <th>FFMC</th>\n", " <th>DMC</th>\n", " <th>DC</th>\n", " <th>ISI</th>\n", " <th>temp</th>\n", " <th>RH</th>\n", " <th>wind</th>\n", " <th>rain</th>\n", " <th>area</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>329</th>\n", " <td>4</td>\n", " <td>3</td>\n", " <td>9</td>\n", " <td>6</td>\n", " <td>92.2</td>\n", " <td>102.3</td>\n", " <td>751.5</td>\n", " <td>8.4</td>\n", " <td>23.5</td>\n", " <td>27</td>\n", " <td>4.0</td>\n", " <td>0.0</td>\n", " <td>3.33</td>\n", " </tr>\n", " <tr>\n", " <th>173</th>\n", " <td>4</td>\n", " <td>4</td>\n", " <td>9</td>\n", " <td>1</td>\n", " <td>90.9</td>\n", " <td>126.5</td>\n", " <td>686.5</td>\n", " <td>7.0</td>\n", " <td>17.7</td>\n", " <td>39</td>\n", " <td>2.2</td>\n", " <td>0.0</td>\n", " <td>3.07</td>\n", " </tr>\n", " <tr>\n", " <th>272</th>\n", " <td>2</td>\n", " <td>5</td>\n", " <td>8</td>\n", " <td>2</td>\n", " <td>92.1</td>\n", " <td>152.6</td>\n", " <td>658.2</td>\n", " <td>14.3</td>\n", " <td>20.2</td>\n", " <td>47</td>\n", " <td>4.0</td>\n", " <td>0.0</td>\n", " <td>3.09</td>\n", " </tr>\n", " <tr>\n", " <th>497</th>\n", " <td>3</td>\n", " <td>4</td>\n", " <td>8</td>\n", " <td>2</td>\n", " <td>96.1</td>\n", " <td>181.1</td>\n", " <td>671.2</td>\n", " <td>14.3</td>\n", " <td>32.3</td>\n", " <td>27</td>\n", " <td>2.2</td>\n", " <td>0.0</td>\n", " <td>14.68</td>\n", " </tr>\n", " <tr>\n", " <th>182</th>\n", " <td>5</td>\n", " <td>4</td>\n", " <td>2</td>\n", " <td>7</td>\n", " <td>86.8</td>\n", " <td>15.6</td>\n", " <td>48.3</td>\n", " <td>3.9</td>\n", " <td>12.4</td>\n", " <td>53</td>\n", " <td>2.2</td>\n", " <td>0.0</td>\n", " <td>6.38</td>\n", " </tr>\n", " <tr>\n", " <th>...</th>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " </tr>\n", " <tr>\n", " <th>71</th>\n", " <td>4</td>\n", " <td>5</td>\n", " <td>9</td>\n", " <td>5</td>\n", " <td>94.3</td>\n", " <td>85.1</td>\n", " <td>692.3</td>\n", " <td>15.9</td>\n", " <td>17.7</td>\n", " <td>37</td>\n", " <td>3.6</td>\n", " <td>0.0</td>\n", " <td>0.00</td>\n", " </tr>\n", " <tr>\n", " <th>106</th>\n", " <td>4</td>\n", " <td>5</td>\n", " <td>3</td>\n", " <td>4</td>\n", " <td>91.4</td>\n", " <td>30.7</td>\n", " <td>74.3</td>\n", " <td>7.5</td>\n", " <td>18.2</td>\n", " <td>29</td>\n", " <td>3.1</td>\n", " <td>0.0</td>\n", " <td>0.00</td>\n", " </tr>\n", " <tr>\n", " <th>270</th>\n", " <td>2</td>\n", " <td>2</td>\n", " <td>8</td>\n", " <td>2</td>\n", " <td>92.1</td>\n", " <td>152.6</td>\n", " <td>658.2</td>\n", " <td>14.3</td>\n", " <td>21.8</td>\n", " <td>56</td>\n", " <td>3.1</td>\n", " <td>0.0</td>\n", " <td>0.52</td>\n", " </tr>\n", " <tr>\n", " <th>435</th>\n", " <td>2</td>\n", " <td>5</td>\n", " <td>7</td>\n", " <td>6</td>\n", " <td>90.8</td>\n", " <td>84.7</td>\n", " <td>376.6</td>\n", " <td>5.6</td>\n", " <td>23.8</td>\n", " <td>51</td>\n", " <td>1.8</td>\n", " <td>0.0</td>\n", " <td>0.00</td>\n", " </tr>\n", " <tr>\n", " <th>102</th>\n", " <td>2</td>\n", " <td>4</td>\n", " <td>8</td>\n", " <td>2</td>\n", " <td>94.8</td>\n", " <td>108.3</td>\n", " <td>647.1</td>\n", " <td>17.0</td>\n", " <td>20.1</td>\n", " <td>40</td>\n", " <td>4.0</td>\n", " <td>0.0</td>\n", " <td>0.00</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "<p>413 rows × 13 columns</p>\n", "</div>" ], "text/plain": [ " X Y month day FFMC DMC DC ISI temp RH wind rain area\n", "329 4 3 9 6 92.2 102.3 751.5 8.4 23.5 27 4.0 0.0 3.33\n", "173 4 4 9 1 90.9 126.5 686.5 7.0 17.7 39 2.2 0.0 3.07\n", "272 2 5 8 2 92.1 152.6 658.2 14.3 20.2 47 4.0 0.0 3.09\n", "497 3 4 8 2 96.1 181.1 671.2 14.3 32.3 27 2.2 0.0 14.68\n", "182 5 4 2 7 86.8 15.6 48.3 3.9 12.4 53 2.2 0.0 6.38\n", ".. .. .. ... ... ... ... ... ... ... .. ... ... ...\n", "71 4 5 9 5 94.3 85.1 692.3 15.9 17.7 37 3.6 0.0 0.00\n", "106 4 5 3 4 91.4 30.7 74.3 7.5 18.2 29 3.1 0.0 0.00\n", "270 2 2 8 2 92.1 152.6 658.2 14.3 21.8 56 3.1 0.0 0.52\n", "435 2 5 7 6 90.8 84.7 376.6 5.6 23.8 51 1.8 0.0 0.00\n", "102 2 4 8 2 94.8 108.3 647.1 17.0 20.1 40 4.0 0.0 0.00\n", "\n", "[413 rows x 13 columns]" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "work_set" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "# Extracting features form the dataset\n", "\n", "# converting to list \n", "\n", "x_values = list(work_set['X'])\n", "y_values = list(work_set['Y'])\n", "\n", "loc_values = []\n", "\n", "for index in range(0, len(x_values)):\n", " temp_values = []\n", " temp_values.append(x_values[index])\n", " temp_values.append(y_values[index])\n", " loc_values.append(temp_values)" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "# Counting the instance location in dataset\n", "\n", "def count_points(x_points, y_points, scaling_factor):\n", " count_array = []\n", " \n", " for index in range(0, len(x_points)):\n", " temp_values = [x_values[index], y_points[index]]\n", " count = 0\n", " \n", " for value in loc_values:\n", " if(temp_values == value):\n", " count = count + 1\n", " count_array.append(count * scaling_factor)\n", " \n", " return count_array" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>X</th>\n", " <th>Y</th>\n", " <th>month</th>\n", " <th>day</th>\n", " <th>FFMC</th>\n", " <th>DMC</th>\n", " <th>DC</th>\n", " <th>ISI</th>\n", " <th>temp</th>\n", " <th>RH</th>\n", " <th>wind</th>\n", " <th>rain</th>\n", " <th>area</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>329</th>\n", " <td>4</td>\n", " <td>3</td>\n", " <td>9</td>\n", " <td>6</td>\n", " <td>92.2</td>\n", " <td>102.3</td>\n", " <td>751.5</td>\n", " <td>8.4</td>\n", " <td>23.5</td>\n", " <td>27</td>\n", " <td>4.0</td>\n", " <td>0.0</td>\n", " <td>3.33</td>\n", " </tr>\n", " <tr>\n", " <th>173</th>\n", " <td>4</td>\n", " <td>4</td>\n", " <td>9</td>\n", " <td>1</td>\n", " <td>90.9</td>\n", " <td>126.5</td>\n", " <td>686.5</td>\n", " <td>7.0</td>\n", " <td>17.7</td>\n", " <td>39</td>\n", " <td>2.2</td>\n", " <td>0.0</td>\n", " <td>3.07</td>\n", " </tr>\n", " <tr>\n", " <th>272</th>\n", " <td>2</td>\n", " <td>5</td>\n", " <td>8</td>\n", " <td>2</td>\n", " <td>92.1</td>\n", " <td>152.6</td>\n", " <td>658.2</td>\n", " <td>14.3</td>\n", " <td>20.2</td>\n", " <td>47</td>\n", " <td>4.0</td>\n", " <td>0.0</td>\n", " <td>3.09</td>\n", " </tr>\n", " <tr>\n", " <th>497</th>\n", " <td>3</td>\n", " <td>4</td>\n", " <td>8</td>\n", " <td>2</td>\n", " <td>96.1</td>\n", " <td>181.1</td>\n", " <td>671.2</td>\n", " <td>14.3</td>\n", " <td>32.3</td>\n", " <td>27</td>\n", " <td>2.2</td>\n", " <td>0.0</td>\n", " <td>14.68</td>\n", " </tr>\n", " <tr>\n", " <th>182</th>\n", " <td>5</td>\n", " <td>4</td>\n", " <td>2</td>\n", " <td>7</td>\n", " <td>86.8</td>\n", " <td>15.6</td>\n", " <td>48.3</td>\n", " <td>3.9</td>\n", " <td>12.4</td>\n", " <td>53</td>\n", " <td>2.2</td>\n", " <td>0.0</td>\n", " <td>6.38</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " X Y month day FFMC DMC DC ISI temp RH wind rain area\n", "329 4 3 9 6 92.2 102.3 751.5 8.4 23.5 27 4.0 0.0 3.33\n", "173 4 4 9 1 90.9 126.5 686.5 7.0 17.7 39 2.2 0.0 3.07\n", "272 2 5 8 2 92.1 152.6 658.2 14.3 20.2 47 4.0 0.0 3.09\n", "497 3 4 8 2 96.1 181.1 671.2 14.3 32.3 27 2.2 0.0 14.68\n", "182 5 4 2 7 86.8 15.6 48.3 3.9 12.4 53 2.2 0.0 6.38" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "work_set.head()" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[<AxesSubplot:xlabel='RH', ylabel='RH'>]], dtype=object)" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "<Figure size 1080x720 with 1 Axes>" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Plotting the histogram for the RH attribute\n", "from pandas.plotting import scatter_matrix\n", "\n", "attributes = ['RH']\n", "scatter_matrix(work_set[attributes], figsize=(15,10))" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[<AxesSubplot:xlabel='temp', ylabel='temp'>]], dtype=object)" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "<Figure size 1080x720 with 1 Axes>" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Plotting the histogram for the temp attribute\n", "from pandas.plotting import scatter_matrix\n", "\n", "attributes = ['temp']\n", "scatter_matrix(work_set[attributes], figsize=(15,10))" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[<AxesSubplot:xlabel='DMC', ylabel='DMC'>]], dtype=object)" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "<Figure size 1080x720 with 1 Axes>" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Plotting the histogram for the DMC attribute\n", "from pandas.plotting import scatter_matrix\n", "\n", "attributes = ['DMC']\n", "scatter_matrix(work_set[attributes], figsize=(15,10))" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[<AxesSubplot:xlabel='area', ylabel='area'>]], dtype=object)" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "<Figure size 1080x720 with 1 Axes>" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Plotting the histogram for the area attribute\n", "from pandas.plotting import scatter_matrix\n", "\n", "attributes = ['area']\n", "scatter_matrix(work_set[attributes], figsize=(15,10))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Finding the unique values in month , day and area ( the values could be repetitive)" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([ 3, 10, 8, 9, 4, 6, 7, 2, 1, 12, 5, 11])" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "db['month'].unique()" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([5, 2, 6, 7, 1, 3, 4])" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "db['day'].unique()" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([0.00000e+00, 3.60000e-01, 4.30000e-01, 4.70000e-01, 5.50000e-01,\n", " 6.10000e-01, 7.10000e-01, 7.70000e-01, 9.00000e-01, 9.50000e-01,\n", " 9.60000e-01, 1.07000e+00, 1.12000e+00, 1.19000e+00, 1.36000e+00,\n", " 1.43000e+00, 1.46000e+00, 1.56000e+00, 1.61000e+00, 1.63000e+00,\n", " 1.64000e+00, 1.69000e+00, 1.75000e+00, 1.90000e+00, 1.94000e+00,\n", " 1.95000e+00, 2.01000e+00, 2.14000e+00, 2.29000e+00, 2.51000e+00,\n", " 2.53000e+00, 2.55000e+00, 2.57000e+00, 2.69000e+00, 2.74000e+00,\n", " 3.07000e+00, 3.50000e+00, 4.53000e+00, 4.61000e+00, 4.69000e+00,\n", " 4.88000e+00, 5.23000e+00, 5.33000e+00, 5.44000e+00, 6.38000e+00,\n", " 6.83000e+00, 6.96000e+00, 7.04000e+00, 7.19000e+00, 7.30000e+00,\n", " 7.40000e+00, 8.24000e+00, 8.31000e+00, 8.68000e+00, 8.71000e+00,\n", " 9.41000e+00, 1.00100e+01, 1.00200e+01, 1.09300e+01, 1.10600e+01,\n", " 1.12400e+01, 1.13200e+01, 1.15300e+01, 1.21000e+01, 1.30500e+01,\n", " 1.37000e+01, 1.39900e+01, 1.45700e+01, 1.54500e+01, 1.72000e+01,\n", " 1.92300e+01, 2.34100e+01, 2.42300e+01, 2.60000e+01, 2.61300e+01,\n", " 2.73500e+01, 2.86600e+01, 2.94800e+01, 3.03200e+01, 3.17200e+01,\n", " 3.18600e+01, 3.20700e+01, 3.58800e+01, 3.68500e+01, 3.70200e+01,\n", " 3.77100e+01, 4.85500e+01, 4.93700e+01, 5.83000e+01, 6.41000e+01,\n", " 7.13000e+01, 8.84900e+01, 9.51800e+01, 1.03390e+02, 1.05660e+02,\n", " 1.54880e+02, 1.96480e+02, 2.00940e+02, 2.12880e+02, 1.09084e+03,\n", " 1.01300e+01, 2.87000e+00, 7.60000e-01, 9.00000e-02, 7.50000e-01,\n", " 2.47000e+00, 6.80000e-01, 2.40000e-01, 2.10000e-01, 1.52000e+00,\n", " 1.03400e+01, 8.02000e+00, 1.38000e+00, 8.85000e+00, 3.30000e+00,\n", " 4.25000e+00, 6.54000e+00, 7.90000e-01, 1.70000e-01, 4.40000e+00,\n", " 5.20000e-01, 9.27000e+00, 3.09000e+00, 8.98000e+00, 1.11900e+01,\n", " 5.38000e+00, 1.78500e+01, 1.07300e+01, 2.20300e+01, 9.77000e+00,\n", " 2.47700e+01, 1.10000e+00, 2.42400e+01, 8.00000e+00, 2.64000e+00,\n", " 8.64500e+01, 6.57000e+00, 3.52000e+00, 4.10000e-01, 5.18000e+00,\n", " 1.42900e+01, 1.58000e+00, 3.78000e+00, 4.41000e+00, 3.43600e+01,\n", " 7.21000e+00, 1.01000e+00, 2.18000e+00, 4.42000e+00, 3.33000e+00,\n", " 6.58000e+00, 1.56400e+01, 1.12200e+01, 2.13000e+00, 5.60400e+01,\n", " 7.48000e+00, 1.47000e+00, 3.93000e+00, 6.10000e+00, 5.83000e+00,\n", " 2.81900e+01, 3.71000e+00, 7.31000e+00, 2.03000e+00, 1.72000e+00,\n", " 5.97000e+00, 1.30600e+01, 1.26000e+00, 8.12000e+00, 1.09000e+00,\n", " 3.94000e+00, 2.93000e+00, 5.65000e+00, 2.00300e+01, 1.26400e+01,\n", " 1.83000e+01, 3.93500e+01, 1.74630e+02, 7.73000e+00, 1.63300e+01,\n", " 5.86000e+00, 4.28700e+01, 1.21800e+01, 1.60000e+01, 2.45900e+01,\n", " 2.87400e+01, 9.96000e+00, 3.01800e+01, 7.07600e+01, 5.17800e+01,\n", " 3.64000e+00, 3.63000e+00, 8.16000e+00, 4.95000e+00, 6.04000e+00,\n", " 3.95000e+00, 7.80000e+00, 4.62000e+00, 7.46280e+02, 7.02000e+00,\n", " 2.44000e+00, 3.05000e+00, 1.85760e+02, 6.30000e+00, 7.20000e-01,\n", " 4.96000e+00, 2.35000e+00, 3.20000e+00, 6.36000e+00, 1.53400e+01,\n", " 5.40000e-01, 6.43000e+00, 3.30000e-01, 1.23000e+00, 3.35000e+00,\n", " 9.71000e+00, 8.27500e+01, 3.32000e+00, 5.39000e+00, 6.84000e+00,\n", " 3.18000e+00, 5.55000e+00, 6.61000e+00, 6.11300e+01, 3.84800e+01,\n", " 7.03200e+01, 1.00800e+01, 3.19000e+00, 1.76000e+00, 7.36000e+00,\n", " 2.21000e+00, 2.78530e+02, 2.75000e+00, 1.29000e+00, 2.64300e+01,\n", " 2.07000e+00, 2.00000e+00, 1.64000e+01, 4.67000e+01, 4.33200e+01,\n", " 8.59000e+00, 2.77000e+00, 1.46800e+01, 4.05400e+01, 1.08200e+01,\n", " 4.95900e+01, 5.80000e+00, 2.17000e+00, 6.44000e+00, 5.42900e+01,\n", " 1.11600e+01])" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "db['area'].unique()" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [], "source": [ "# defining the method for plotting the histogram\n", "def histogram_plot(db, title):\n", " plt.figure(figsize=(8, 6)) \n", " \n", " ax = plt.subplot() \n", " ax.spines[\"top\"].set_visible(False) \n", " ax.spines[\"bottom\"].set_visible(False) \n", " ax.spines[\"right\"].set_visible(False) \n", " ax.spines[\"left\"].set_visible(False)\n", " \n", " ax.get_xaxis().tick_bottom()\n", " ax.get_yaxis().tick_left() \n", " \n", " plt.title(title, fontsize = 22)\n", " plt.hist(db, edgecolor='black', linewidth=1.2)\n", " plt.show()" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "<Figure size 576x432 with 1 Axes>" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Scattering the plot with the help of the location\n", "\n", "plt.figure(figsize=(8, 6)) \n", " \n", "ax = plt.subplot() \n", "ax.spines[\"top\"].set_visible(False) \n", "ax.spines[\"bottom\"].set_visible(False) \n", "ax.spines[\"right\"].set_visible(False) \n", "ax.spines[\"left\"].set_visible(False)\n", " \n", "ax.get_xaxis().tick_bottom()\n", "ax.get_yaxis().tick_left() \n", " \n", "plt.title(\"Fire location plot\", fontsize = 22)\n", "plt.scatter(x_values, y_values, s = count_points(x_values, y_values, 25), alpha = 0.3)\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [], "source": [ "# Encoding the data using LabelEncoder\n", "\n", "from sklearn.preprocessing import LabelEncoder\n", "\n", "month_encoder = LabelEncoder()\n", "day_encoder = LabelEncoder()\n", "\n", "months = db['month']\n", "days = db['day']\n", "\n", "month_1hot = month_encoder.fit_transform(months) # label encoding month\n", "day_1hot = day_encoder.fit_transform(days) # label encoding day" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([ 2, 9, 9, 2, 2, 7, 7, 7, 8, 8, 8, 8, 7, 8, 8, 8, 2,\n", " 9, 2, 3, 8, 8, 5, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8,\n", " 8, 8, 9, 9, 9, 2, 6, 7, 7, 8, 8, 8, 8, 6, 2, 2, 8,\n", " 7, 7, 7, 7, 8, 8, 9, 1, 1, 2, 2, 7, 7, 7, 7, 8, 8,\n", " 8, 2, 2, 8, 2, 7, 8, 1, 1, 2, 7, 7, 7, 7, 7, 7, 7,\n", " 8, 8, 8, 8, 2, 7, 2, 7, 7, 7, 8, 1, 2, 7, 7, 7, 7,\n", " 7, 8, 0, 2, 2, 7, 8, 8, 2, 2, 8, 8, 2, 2, 2, 2, 2,\n", " 7, 7, 7, 8, 8, 8, 9, 2, 8, 9, 9, 1, 2, 2, 8, 2, 7,\n", " 8, 8, 6, 8, 8, 7, 7, 6, 7, 7, 2, 8, 7, 8, 5, 6, 6,\n", " 8, 8, 7, 8, 7, 7, 8, 2, 7, 2, 8, 8, 2, 7, 7, 2, 7,\n", " 8, 7, 7, 8, 7, 7, 3, 7, 8, 7, 8, 9, 1, 9, 7, 8, 2,\n", " 8, 2, 2, 2, 7, 7, 8, 7, 7, 3, 8, 8, 8, 8, 2, 1, 9,\n", " 2, 8, 7, 8, 8, 8, 9, 7, 8, 2, 2, 2, 8, 8, 8, 2, 7,\n", " 8, 2, 6, 8, 8, 9, 7, 8, 7, 8, 8, 8, 8, 8, 7, 8, 8,\n", " 8, 3, 3, 3, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,\n", " 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,\n", " 7, 11, 11, 11, 11, 11, 11, 11, 11, 11, 1, 1, 1, 6, 6, 6, 6,\n", " 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 8,\n", " 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,\n", " 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,\n", " 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,\n", " 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 6, 7, 7,\n", " 8, 8, 7, 7, 2, 0, 6, 7, 7, 7, 7, 7, 8, 2, 7, 7, 1,\n", " 8, 8, 2, 1, 1, 8, 7, 7, 5, 5, 8, 7, 7, 8, 7, 8, 1,\n", " 8, 6, 1, 1, 6, 7, 7, 7, 6, 2, 7, 7, 7, 7, 6, 8, 7,\n", " 7, 7, 7, 7, 7, 8, 7, 7, 7, 7, 6, 7, 7, 7, 8, 8, 7,\n", " 3, 6, 8, 7, 7, 2, 8, 7, 7, 7, 7, 7, 7, 6, 7, 7, 7,\n", " 7, 7, 7, 8, 1, 1, 1, 2, 2, 2, 3, 3, 4, 5, 5, 5, 5,\n", " 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,\n", " 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,\n", " 7, 7, 7, 7, 7, 7, 10])" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "month_1hot" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([4, 1, 5, 4, 6, 6, 0, 0, 1, 5, 5, 5, 4, 0, 2, 4, 5, 0, 2, 5, 1, 0,\n", " 6, 5, 5, 6, 4, 0, 5, 6, 4, 0, 4, 6, 0, 1, 1, 4, 5, 1, 1, 5, 1, 5,\n", " 2, 2, 0, 0, 0, 0, 3, 6, 2, 2, 3, 3, 1, 6, 0, 4, 6, 6, 3, 6, 0, 3,\n", " 4, 4, 4, 4, 4, 4, 4, 1, 4, 3, 4, 0, 4, 1, 6, 6, 1, 2, 3, 3, 3, 3,\n", " 6, 5, 5, 4, 6, 6, 0, 6, 5, 5, 6, 6, 6, 1, 1, 5, 5, 4, 3, 6, 5, 0,\n", " 4, 4, 6, 0, 1, 1, 5, 5, 0, 3, 0, 0, 6, 1, 4, 6, 0, 4, 2, 6, 5, 0,\n", " 6, 3, 1, 5, 6, 0, 1, 1, 0, 2, 4, 5, 2, 3, 0, 1, 1, 3, 4, 6, 5, 4,\n", " 5, 6, 5, 2, 2, 4, 0, 3, 5, 5, 6, 3, 2, 2, 4, 3, 2, 2, 6, 0, 5, 5,\n", " 3, 6, 2, 1, 6, 0, 6, 0, 4, 1, 6, 0, 5, 6, 4, 3, 1, 2, 1, 4, 3, 3,\n", " 1, 0, 1, 6, 6, 2, 5, 3, 5, 1, 4, 3, 5, 5, 4, 0, 5, 5, 6, 0, 2, 0,\n", " 6, 4, 0, 4, 2, 6, 0, 5, 6, 5, 2, 6, 1, 1, 5, 6, 5, 1, 5, 6, 2, 4,\n", " 6, 6, 6, 6, 6, 2, 2, 2, 2, 2, 2, 3, 3, 3, 5, 5, 5, 5, 0, 4, 4, 4,\n", " 4, 1, 1, 1, 1, 1, 1, 1, 1, 6, 2, 3, 0, 0, 0, 0, 4, 1, 6, 2, 4, 6,\n", " 2, 5, 5, 5, 5, 5, 4, 1, 1, 6, 6, 6, 2, 5, 0, 0, 4, 4, 5, 6, 6, 6,\n", " 6, 6, 6, 6, 6, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 5, 5, 5, 5,\n", " 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4,\n", " 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 5, 6, 6, 2, 6, 3,\n", " 4, 5, 0, 5, 3, 6, 2, 3, 2, 3, 5, 6, 6, 3, 4, 4, 0, 4, 6, 1, 0, 6,\n", " 6, 6, 5, 2, 2, 3, 4, 3, 2, 1, 5, 5, 4, 1, 4, 4, 0, 5, 6, 3, 1, 2,\n", " 6, 6, 2, 2, 6, 5, 5, 3, 3, 0, 3, 6, 3, 5, 3, 6, 4, 5, 0, 5, 5, 4,\n", " 4, 0, 0, 4, 4, 6, 6, 2, 2, 6, 2, 4, 0, 3, 3, 0, 3, 2, 5, 5, 5, 5,\n", " 6, 1, 1, 5, 0, 2, 3, 6, 6, 4, 0, 5, 3, 3, 3, 6, 6, 0, 3, 3, 6, 6,\n", " 6, 0, 1, 1, 1, 2, 2, 3, 4, 4, 5, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 3,\n", " 4, 4, 4, 4, 4, 6, 6, 6, 6, 5, 1])" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "day_1hot" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/Users/vishwasmore/opt/anaconda3/lib/python3.7/site-packages/sklearn/preprocessing/data.py:617: DataConversionWarning: Data with input dtype int64, float64 were all converted to float64 by StandardScaler.\n", " return self.partial_fit(X, y)\n", "/Users/vishwasmore/opt/anaconda3/lib/python3.7/site-packages/sklearn/base.py:462: DataConversionWarning: Data with input dtype int64, float64 were all converted to float64 by StandardScaler.\n", " return self.fit(X, **fit_params).transform(X)\n" ] } ], "source": [ "# Standardizing the data (Feature Scaling) so that all the features are of the same scale\n", "from sklearn.preprocessing import StandardScaler\n", "\n", "scaler = StandardScaler()\n", "\n", "numerical_features = db.drop(['month', 'day'], axis=1)\n", "scaled_features = scaler.fit_transform(numerical_features)" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[ 1.00831277, 0.56986043, -0.80595947, ..., 1.49861442,\n", " -0.07326831, -0.20201979],\n", " [ 1.00831277, -0.24400101, -0.00810203, ..., -1.74175564,\n", " -0.07326831, -0.20201979],\n", " [ 1.00831277, -0.24400101, -0.00810203, ..., -1.51828184,\n", " -0.07326831, -0.20201979],\n", " ...,\n", " [ 1.00831277, -0.24400101, -1.64008316, ..., 1.49861442,\n", " -0.07326831, -0.02653216],\n", " [-1.58736044, -0.24400101, 0.68095666, ..., -0.00983371,\n", " -0.07326831, -0.20201979],\n", " [ 0.57570057, -1.05786246, -2.02087875, ..., 0.26950853,\n", " -0.07326831, -0.20201979]])" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "scaled_features" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [], "source": [ "from sklearn.base import BaseEstimator, TransformerMixin\n", "\n", "# defining the methods for the AttributeSelector\n", "class AttributeSelector(BaseEstimator, TransformerMixin):\n", " def __init__(self, attribute_names):\n", " self.attribute_names = attribute_names\n", "\n", " def fit(self, X, y=None):\n", " return self\n", "\n", " def transform(self, X):\n", " return X[self.attribute_names].values" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [], "source": [ "from sklearn.preprocessing import MultiLabelBinarizer\n", "# defining the methods for the CustomBinarizer\n", "class CustomBinarizer(BaseEstimator, TransformerMixin):\n", " def __init__(self, class_labels):\n", " self.class_labels = class_labels\n", " def fit(self, X, y=None,**fit_params):\n", " return self\n", " def transform(self, X):\n", " return MultiLabelBinarizer(classes=self.class_labels).fit_transform(X)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "https://scikit-learn.org/stable/modules/generated/sklearn.pipeline.Pipeline.html" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [], "source": [ "from sklearn.pipeline import Pipeline\n", "\n", "\n", "numerical_attributes = ['X', 'Y', 'FFMC', 'DMC', 'DC', 'ISI', 'temp', 'RH', 'wind', 'rain'] # Selecting the numerical columns\n", "categorical_attributes = ['month', 'day'] # # Selecting the categorical columns\n", "categorical_classes = np.concatenate((db['month'].unique(), db['day'].unique()), axis=0)\n", "\n", "# creating the separate numerical and categorical pipelines\n", "numerical_pipeline = Pipeline([\n", " ('selector', AttributeSelector(numerical_attributes)),\n", " ('standardize', StandardScaler()),\n", "])\n", "categorical_pipeline = Pipeline([\n", " ('selector', AttributeSelector(categorical_attributes)),\n", " ('encode', CustomBinarizer(categorical_classes)),\n", "])" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "<Figure size 576x432 with 1 Axes>" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "#FFMC distrubution\n", "# Creating Histogram based on FFMC attribute\n", "histogram_plot(db['FFMC'], title = \"FFMC distribution\")\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAeQAAAF6CAYAAADIwivKAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/d3fzzAAAACXBIWXMAAAsTAAALEwEAmpwYAAAkqUlEQVR4nO3de3hU9Z3H8c8kkwRzgySNtsrShyBQMQYkGEEhoGtI2dZul1qU6LSU1a0sQsOCgEAANaxcKurGIpV62RIEs16qtmqLaBsuEjAFWRCsoPiUyCWEIcnM2GSSOfsHmykxgSSTy/wyeb+ex0dzZnLmmx9j3nPmcrBZlmUJAAAEVViwBwAAAAQZAAAjEGQAAAxAkAEAMABBBgDAAAQZ6CG60wcqutOsQEchyAhZJSUlGjx4cJN/hgwZouHDh+s73/mOHn74YR05cqTFfblcLm3atEkOh0OZmZlKTU3VqFGjdM899+jtt9/usJnXrFmjwYMHq6CgwL/tlVde0eDBg7Vw4cKA97t9+3bdfffdbfqem2++WYMHD9aJEycuuq2jNTdrw5/llClTOu12gWCzB3sAoLNFR0frH//xH/1f19fXq7q6WocOHVJhYaGKior00EMP6V/+5V+a/f5du3Zp9uzZOnXqlHr37q1BgwZp6NChKisr09atW1VcXKysrCw99thjioiI6Kofq9VOnjypqVOn6oorrgj2KC3qTrMCHY0gI+QlJCTo5z//eZPtlmVp06ZNys/P16JFi/T1r39do0aNanSdDz74wH9UNmfOHN1111265JJL/Jd/9NFHmjlzpjZv3qwFCxZo1apVHT5/VlaWhg4dqvj4+IC+P9Cnf59//nl5vV597WtfC+j7A3GhWdPS0vTmm28qOjq6y2YBuhpPWaPHstlsmjx5snJzc1VXV6cVK1Y0CoLH49HcuXNVX1+vhx9+WPfcc0+jGEvSkCFD9MwzzygiIkKvv/66Dh482OFzxsXFacCAAUpOTu7wfV9Mv379NGDAANntwX/cfskll2jAgAH6xje+EexRgE5DkNHj/eQnP9Fll12mgwcPau/evf7tf/jDH1RWVqbU1FT94Ac/uOD3f/Ob39TkyZM1btw4VVRUtOo2KysrtWLFCt18881KS0vTP//zP+vNN99s9roXeg15z549mjZtmsaNG6fU1FSNHTtWc+fO1eHDh/3XKSgo0NixYyVJZWVlGjx4sBwOR6P9rl+/XitXrlR6errS09O1ZMkSSRd/vdjtdis/P1833nijhg4dqh/+8Id67bXXmlzP4XBo8ODB+uCDD5pc9tXXyy8268VeQ/7zn/+s6dOna+TIkUpNTdXNN9+spUuXNjv34MGDNXHiRFVWVurBBx/UmDFjdM0112jChAlat26d6urqmv4BAF0k+A99gSCz2+0aM2aMXnrpJb3//vu69tprJUlvvfWWJOk73/lOi/toyxuunE6nHA6HPvnkE11++eUaN26cjh49qlmzZunKK69s1T727t2rKVOmqLa2Vunp6UpLS9Onn36q1157TZs3b9aLL76oQYMGafDgwcrKytLmzZv9r6UPGDCg0b4KCwt17NgxjR49WqdPn1b//v1bvP377rtPf/3rX3X99dcrLCxMO3fu1Ny5c/Xxxx9r7ty5rV6L87Vm1q/asGGD8vPz5fP5NGzYMF122WU6dOiQNm7cqLffflvPPPOMrr766kbf43a7NXnyZB0/flzDhw+XZVnauXOnfv7zn+vkyZNatGhRQPMD7UWQAUkpKSmSpM8++8y/7dNPP5UkXXPNNR16WwUFBfrkk080YcIErVy5UpGRkZKkp59+Wo8++mir9vHYY4/pb3/7m5577jndcMMN/u2rVq3Sr371Kz377LNavny5xo8fr7S0NG3evPmCr6UfPXpUa9as8b/xzefztXj75eXlevHFF/2x++STT+RwOPTMM88oKyvL/6CmLVoz6/k++ugj5efnKyoqSmvWrPGvg8/n05o1a1RQUKAZM2bo7bff9q9xw8979dVX6+2339Zll10mSdqxY4d+8pOfaNOmTcrNzVVsbGyb5wfai6esAcn/himn0+nfVl5eLklKSkrqsNupra3Vq6++ql69eumhhx5qFIp/+7d/09ChQ1u1n4bZvv71rzfafs8992jRokUXfYr9q6644opG70IPC2v518Ldd9/d6Mhz4MCBmjFjhiRp48aNrb7t9li/fr18Pp+mTZvW6EFJWFiY7rvvPmVkZKisrKzZlwLmzJnjj7Ek3XDDDerfv7+8Xq+OHj3aFeMDTRBkQOdCKZ17o1eD8PBwSerQ1xX/93//Vx6PR8OGDWv2XdPnh/FiRowYIUn60Y9+pEcffVQffPCB6urq1KdPHzkcDl133XWtnulb3/pWq6/boLmn8ceNGydJzb5e3Bl2794tSZowYUKzlzfM2HC986WlpTXZ1vCmuS+//LKjRgTahCADOvcmK0nq3bu3f1vDL+gzZ8502O2cOnVKkhodnZ2vtZ+/vf/++zVy5EiVl5fr6aef1p133qmRI0dq9uzZev/999s00/k/c2uEh4c3+27nhqP1hp+xszXczoXWrG/fvpKk06dPN9oeFhbW7FPSDQ/AWvOUPdAZCDIg6dChQ5LOvbGoQcNTsvv27Wvx+z/66COtXbu2xeuefwTenIYotCQuLk7//d//raKiIt1zzz26+uqr5Xa79dvf/lZTpkzRypUrW7UfqXVPUX91xuY+CtXwkbHWnhylveFr6fPV9fX1ktToZQGp5T8DIFgIMnq8v/3tb9q5c6ckNToxSMPTx5s3b25xHxs3btRjjz2mNWvWXPR6DUfGX3zxRbOXN7w23FpDhw7VnDlz9Morr2j79u2aN2+ewsPD9dxzz+nkyZNt2ldr1dbW6uzZs022N/xM5x/9N8SvIY7nq6qqatccl156qSTp2LFjzV7esL0j3wMAdCaCjB6vsLBQlZWVGjJkiFJTU/3bb7nlFvXr10/79u3Tb37zmwt+/6FDh/TGG29IknJyci56W6mpqYqPj9fevXubPJUqSX/6059anNflcukHP/iBbr311kbbExMTNXXqVF111VXy+Xz+IHfGEeGOHTuabPv9738vScrIyPBvazizVnM/64cffthkW1tmbXid/ELnEm/42Nr58wAmI8josXw+n1566SU9/vjjstvtTT5LHBkZqcWLFyssLEwLFy7Uc889p5qamkbX2bNnj+699159+eWXys7OVmZm5kVvMyIiQjk5OfJ6vZo3b548Ho//sqKiIm3btq3FuWNjY2VZlv7yl7/o17/+daPLDh06pMOHDys6Otr/Ua6Gp2zdbneH/S1Ky5cv11//+lf/13v37tVTTz0lu92uu+66y7+94SWAjRs3yuv1+rdv2LCh0UlYGrRl1rvuukvh4eF66qmnGr1ublmWnnzySe3evVtXXHGFbrrppoB+RqCr8TlkhDyn06k5c+b4v/b5fKqsrNShQ4d0+vRpRUZGavny5f53Lp9vzJgxeuKJJzRnzhwtX75ca9eu1ZAhQxQfH6/PPvtMH3/8saRzR9Otfd122rRp+uCDD7Rt2zZlZWUpPT1dZWVl2r9/v4YNG9ZsqL5q6dKluuuuu7Rs2TIVFRUpJSVFZ8+eVWlpqerq6rRkyRL/G5f69OmjPn366OzZs5o8ebKuueaadv3NUREREfra176m7373uxo1apRqa2tVUlKi+vp65eXladCgQf7r/vCHP1RhYaF2796t7Oxspaam6tNPP9Xhw4d16623+p9ZaNCWWVNTU/XAAw9o2bJlmjJliq699lr/iUGOHj2qxMREPf74401OdwqYiiNkhDyPx6M33njD/89bb72lvXv3Kjk5WVOnTtVbb72l7373uxf8/vHjx+t3v/udpkyZoksvvVR79+7VO++8o4qKCt100036xS9+oV/84hfq1atXq+bp1auXnn32Wf3sZz9TbGys3nvvPblcLi1durTFp7wbpKWlqbCwUOPHj5fT6dSWLVv08ccf64YbbtBzzz3XaD82m00rV65U//79tX//fr333nutuo0LsdlsevbZZ/VP//RP2rNnj0pLSzVs2DCtW7dOd955Z6Pr9u3bV5s2bVJWVpaqq6tVXFys+Ph4rVu3Tt/73vea3XdbZnU4HCosLNRNN92kzz77TO+++64sy9KUKVP0m9/8ptmPNwGmsln8TeAAAAQdR8gAABiAIAMAYACCDACAAQgyAAAGIMgAABggqJ9DLi+vbtf3JyREy+n0tHxFdArWP3hY++Bh7YMnFNY+OTnugpd16yNku711J+JH52D9g4e1Dx7WPnhCfe27dZABAAgVBBkAAAMQZAAADECQAQAwAEEGAMAABBkAAAMQZAAADECQAQAwAEEGAMAABBkAAAMQZAAADECQAQAwQFD/ticAQOepqqqUx2Pu344UHR2t+PjewR7DGAQZAEJQVVWl0tPTVFnpDPYoF9S7d4JKS/cR5f9HkAEgBHk8HlVWOjXmzkcVFZMQ7HGaqHE7tXXDbHk8HoL8/wgyAISwqJgE9YpNDPYYaAXe1AUAgAEIMgAABiDIAAAYgCADAGAAggwAgAEIMgAABiDIAAAYgCADAGAAggwAgAEIMgAABiDIAAAYgCADAGAAggwAgAEIMgAABiDIAAAYgCADAGAAggwAgAFaFeQPP/xQDoej0bY33nhDt99+u//roqIiTZw4UZMmTdJ7773XsVMCABDi7C1dYd26dXr99dd1ySWX+Ld99NFHeumll2RZliSpvLxc69ev18svv6yamhrl5OToxhtvVGRkZOdNDgBACGnxCLlfv34qKCjwf+10OrV69WotWLDAv23fvn269tprFRkZqbi4OPXr10+HDh3qnIkBAAhBLR4hZ2dn69ixY5Kk+vp6LVy4UA888ICioqL813G5XIqLi/N/HRMTI5fL1eKNJyREy24PD2Ruv+TkuJavhE7D+gcPax883WHtvd7qYI/QKklJsW1az+6w9oFqMcjnO3DggD7//HMtXbpUNTU1Onz4sJYtW6aRI0fK7Xb7r+d2uxsF+kKcTk/bJz5PcnKcysu7x50uFLH+wcPaB093WfuKipYPikxQUeFSRETr1rO7rP3FXOwBRZuCnJaWpt/97neSpGPHjuk//uM/tHDhQpWXl+vxxx9XTU2NamtrdeTIEQ0aNKh9UwMA0IO0KcgXkpycLIfDoZycHFmWpVmzZjV6ShsAAFxcq4Lct29fFRUVXXTbpEmTNGnSpI6dDgCAHoITgwAAYACCDACAATrkNWQA6Gmqqirl8bTvkyKd6dSpk8EeAW1EkAGgjaqqKpWenqbKSmewR0EIIcgA0EYej0eVlU6NufNRRcUkBHucZlWVH9WuVx8K9hhoA4IMAAGKiklQr9jEYI/RrBo3R+/dDW/qAgDAAAQZAAADEGQAAAxAkAEAMABBBgDAAAQZAAADEGQAAAxAkAEAMABBBgDAAAQZAAADEGQAAAxAkAEAMABBBgDAAAQZAAADEGQAAAxAkAEAMABBBgDAAAQZAAADEGQAAAxAkAEAMABBBgDAAAQZAAADEGQAAAxAkAEAMABBBgDAAAQZAAADEGQAAAzQqiB/+OGHcjgckqSDBw8qJydHDodD//qv/6rTp09LkoqKijRx4kRNmjRJ7733XudNDABACLK3dIV169bp9ddf1yWXXCJJWrZsmfLy8nTVVVdp06ZNWrdune6++26tX79eL7/8smpqapSTk6Mbb7xRkZGRnf4DAAAQClo8Qu7Xr58KCgr8X69evVpXXXWVJKm+vl5RUVHat2+frr32WkVGRiouLk79+vXToUOHOm9qAABCTItHyNnZ2Tp27Jj/60svvVSS9Oc//1mFhYXasGGDtm7dqri4OP91YmJi5HK5WrzxhIRo2e3hgcztl5wc1/KV0GlY/+Bh7YMnKSk22COEjKSk2Dbdl0P5ft9ikJvz5ptv6qmnntLTTz+txMRExcbGyu12+y93u92NAn0hTqcnkJv3S06OU3l5dbv2gcCx/sHD2gdPcnKcKipaPuBA61RUuBQR0br7cijc7y/2gKLN77J+7bXXVFhYqPXr1+sf/uEfJElpaWkqLS1VTU2NqqurdeTIEQ0aNCjwiQEA6GHadIRcX1+vZcuW6Rvf+IZmzJghSbruuus0c+ZMORwO5eTkyLIszZo1S1FRUZ0yMAAAoahVQe7bt6+KiookSbt27Wr2OpMmTdKkSZM6bjIAAHoQTgwCAIABCDIAAAYgyAAAGIAgAwBgAIIMAIABCDIAAAYgyAAAGIAgAwBgAIIMAIABCDIAAAYgyAAAGIAgAwBgAIIMAIABCDIAAAYgyAAAGIAgAwBgAIIMAIABCDIAAAYgyAAAGIAgAwBgAIIMAIABCDIAAAYgyAAAGIAgAwBgAIIMAIABCDIAAAYgyAAAGIAgAwBgAIIMAIABCDIAAAYgyAAAGIAgAwBgAIIMAIABWhXkDz/8UA6HQ5L0+eefa/LkycrJydGSJUvk8/kkSU8++aRuu+023XHHHdq3b1/nTQwAQAhqMcjr1q3TokWLVFNTI0l65JFHlJubqxdeeEGWZWnLli06cOCAdu3apf/5n//R6tWr9eCDD3b64AAAhJIWg9yvXz8VFBT4vz5w4IAyMjIkSZmZmdqxY4dKS0s1evRo2Ww2XX755aqvr9eZM2c6b2oAAEKMvaUrZGdn69ixY/6vLcuSzWaTJMXExKi6uloul0t9+vTxX6dhe2Ji4kX3nZAQLbs9PMDRz0lOjmvX96N9WP/gYe2DJykpNtgjhIykpNg23ZdD+X7fYpC/Kizs7wfVbrdb8fHxio2NldvtbrQ9Lq7lRXM6PW29+UaSk+NUXl7drn0gcKx/8LD2wZOcHKeKClewxwgZFRUuRUS07r4cCvf7iz2gaPO7rIcMGaKSkhJJUnFxsUaMGKHhw4dr27Zt8vl8+uKLL+Tz+Vo8OgYAAH/X5iPkefPmKS8vT6tXr1ZKSoqys7MVHh6uESNG6Pbbb5fP59PixYs7Y1YAAEJWq4Lct29fFRUVSZL69++vwsLCJteZMWOGZsyY0bHTAQDQQ3BiEAAADECQAQAwAEEGAMAABBkAAAMQZAAADECQAQAwAEEGAMAABBkAAAMQZAAADECQAQAwAEEGAMAABBkAAAMQZAAADECQAQAwAEEGAMAABBkAAAMQZAAADECQAQAwAEEGAMAABBkAAAMQZAAADECQAQAwAEEGAMAABBkAAAMQZAAADECQAQAwAEEGAMAABBkAAAMQZAAADECQAQAwAEEGAMAABBkAAAMQZAAADGAP5Ju8Xq/mz5+vsrIyhYWF6eGHH5bdbtf8+fNls9k0cOBALVmyRGFh9B4AgNYIKMh/+tOfVFdXp02bNmn79u16/PHH5fV6lZubq+uvv16LFy/Wli1blJWV1dHzAgAQkgI6hO3fv7/q6+vl8/nkcrlkt9t14MABZWRkSJIyMzO1Y8eODh0UAIBQFtARcnR0tMrKyjRhwgQ5nU6tXbtWu3fvls1mkyTFxMSourq6xf0kJETLbg8PZAS/5OS4dn0/2of1Dx7WPniSkmKDPULISEqKbdN9OZTv9wEF+fnnn9fo0aM1e/ZsHT9+XD/+8Y/l9Xr9l7vdbsXHx7e4H6fTE8jN+yUnx6m8vOXwo3Ow/sHD2gdPcnKcKipcwR4jZFRUuBQR0br7cijc7y/2gCKgp6zj4+MVF3dup71791ZdXZ2GDBmikpISSVJxcbFGjBgRyK4BAOiRAjpCnjJlihYsWKCcnBx5vV7NmjVLqampysvL0+rVq5WSkqLs7OyOnhUAgJAVUJBjYmL0xBNPNNleWFjY7oEAAOiJ+KAwAAAGIMgAABiAIAMAYACCDACAAQgyAAAGIMgAABiAIAMAYACCDACAAQgyAAAGIMgAABiAIAMAYACCDACAAQgyAAAGIMgAABiAIAMAYACCDACAAQgyAAAGIMgAABiAIAMAYACCDACAAQgyAAAGsAd7gI5UVVUpj8cT7DEuKjo6WvHxvYM9BgDAMCET5KqqSqWnp6my0hnsUS6qd+8ElZbuI8oAgEZCJsgej0eVlU6NufNRRcUkBHucZtW4ndq6YbY8Hg9BBgA0EjJBbhAVk6BesYnBHgMAgDbhTV0AABiAIAMAYACCDACAAQgyAAAGIMgAABiAIAMAYACCDACAAQgyAAAGCPjEIL/85S/17rvvyuv1avLkycrIyND8+fNls9k0cOBALVmyRGFh9B4AgNYIqJglJSXas2ePNm7cqPXr1+vEiRN65JFHlJubqxdeeEGWZWnLli0dPSsAACEroCBv27ZNgwYN0vTp03Xvvfdq3LhxOnDggDIyMiRJmZmZ2rFjR4cOCgBAKAvoKWun06kvvvhCa9eu1bFjxzRt2jRZliWbzSZJiomJUXV1dYv7SUiIlt0eHsgIfsnJcZIkr7fl2zNFUlKsf+7uLlR+ju6ItQ+epKTYYI8QMtr6+zCU7/cBBblPnz5KSUlRZGSkUlJSFBUVpRMnTvgvd7vdio+Pb3E/Tmf7/u7i5OQ4lZefC3FFhatd++pKFRUuRUR0nwcQF3L++qNrsfbBk5wc161+35iuLb8PQ+F+f7EHFAE9ZZ2enq6tW7fKsiydPHlSX375pUaNGqWSkhJJUnFxsUaMGBHYtAAA9EABHSHfdNNN2r17t2677TZZlqXFixerb9++ysvL0+rVq5WSkqLs7OyOnhUAgJAV8Mee5s6d22RbYWFhu4YBAKCn4oPCAAAYgCADAGAAggwAgAEIMgAABiDIAAAYgCADAGAAggwAgAEIMgAABiDIAAAYgCADAGAAggwAgAEIMgAABiDIAAAYgCADAGAAggwAgAEIMgAABiDIAAAYgCADAGAAggwAgAEIMgAABiDIAAAYgCADAGAAggwAgAEIMgAABiDIAAAYgCADAGAAggwAgAEIMgAABiDIAAAYgCADAGAAggwAgAEIMgAABiDIAAAYgCADAGCAdgW5oqJCY8eO1ZEjR/T5559r8uTJysnJ0ZIlS+Tz+TpqRgAAQl7AQfZ6vVq8eLF69eolSXrkkUeUm5urF154QZZlacuWLR02JAAAoc4e6DeuWLFCd9xxh55++mlJ0oEDB5SRkSFJyszM1Pbt25WVldUxUwLocaqqKuXxeII9RhNeb7VOnToZ7DEQggIK8iuvvKLExESNGTPGH2TLsmSz2SRJMTExqq6ubnE/CQnRstvDAxnBLzk5TtK5/0m6i6SkWP/c3V2o/BzdUSivfWVlpUZcN1RnnWeCPQo6WVt/H4by/T6gIL/88suy2Wx6//33dfDgQc2bN09nzvz9fxy32634+PgW9+N0tu/Rb3JynMrLz4W4osLVrn11pYoKlyIius8DiAs5f/3RtUJ97U+cOKmzzjMac+ejiopJCPY4TVSVH9WuVx8K9hgh4eDBI63+/Z2UFNvlv+ujo6MVH9+7w/Z3sQcUAQV5w4YN/v92OBxaunSpVq1apZKSEl1//fUqLi7WyJEjA9k1APhFxSSoV2xisMdoosbtDPYI3V5drUe2sHDdcktmsEe5qN69E1Rauq9Do3whAb+G/FXz5s1TXl6eVq9erZSUFGVnZ3fUrgEAIabeWyPLV2/ssyDSuQdeWzfMlsfj6R5BXr9+vf+/CwsL27s7AEAPYuqzIMHAiUEAADAAQQYAwAAEGQAAAxBkAAAMQJABADAAQQYAwAAEGQAAAxBkAAAMQJABADAAQQYAwAAEGQAAAxBkAAAMQJABADAAQQYAwAAEGQAAAxBkAAAMQJABADAAQQYAwAAEGQAAAxBkAAAMQJABADAAQQYAwAAEGQAAAxBkAAAMYA/2AEAoqqqqlMfj6bT9e73VqqhwBfz90dHRio/v3YETAWgvggx0sKqqSqWnp6my0hnsUS6od+8ElZbuI8qAQQgy0ME8Ho8qK50ac+ejiopJCPY4TdS4ndq6YbY8Hg9BBgxCkIFOEhWToF6xicEeA0A3wZu6AAAwAEEGAMAABBkAAAPwGjLQQ506dTLYI1yQybMBnYUgAz1MXa1HtrBw3XJLZrBHAXCegILs9Xq1YMEClZWVqba2VtOmTdOVV16p+fPny2azaeDAgVqyZInCwnhGvDkmP/rnhBGhr95bI8tXb+zHsiSpqvyodr36ULDHALpUQEF+/fXX1adPH61atUpnz57V97//fX3rW99Sbm6urr/+ei1evFhbtmxRVlZWR8/brXWHIxNOGNFzmPyxrBq3uSdVATpLQEH+9re/rezsbEmSZVkKDw/XgQMHlJGRIUnKzMzU9u3bCfJXmH5kwgkjACB4AgpyTEyMJMnlcmnmzJnKzc3VihUrZLPZ/JdXV1e3uJ+EhGjZ7eGBjOCXnBwn6dy5fbsLk49MJCkpKda/ri1p7fV6ku50XwTQsrb8TmyPgN/Udfz4cU2fPl05OTm69dZbtWrVKv9lbrdb8fHxLe7D6WzfyfeTk+NUXn7ul197TrSPxioqXIqIaDkq568//o77IhBaWvs7sTUuFvaA3nV1+vRpTZ06Vffff79uu+02SdKQIUNUUlIiSSouLtaIESMC2TUAAD1SQEFeu3atqqqqtGbNGjkcDjkcDuXm5qqgoEC33367vF6v/zVmAADQsoCesl60aJEWLVrUZHthYWG7BwIAoCfig8IAABiAIAMAYACCDACAAQgyAAAGIMgAABiAIAMAYACCDACAAQgyAAAGIMgAABiAIAMAYACCDACAAQgyAAAGIMgAABiAIAMAYICA/vpFIJiqqirl8XiCPcYFnTp1MtgjAOiGCDK6laqqSqWnp6my0hnsUQCgQxFkdCsej0eVlU6NufNRRcUkBHucZlWVH9WuVx8K9hgAuhmCjG4pKiZBvWITgz1Gs2rcHL0DaDve1AUAgAEIMgAABiDIAAAYgCADAGAA3tSFJlr7OVqvt1oVFa5OnqYxPuMLIFQRZPjV1XpkCwvXLbdkBnsUAOhxCDL86r01snz1fMYXAIKAIKMJPuMLAF2PN3UBAGAAggwAgAEIMgAABiDIAAAYgCADAGAAggwAgAEIMgAABujQzyH7fD4tXbpUH3/8sSIjI5Wfn69vfvObHXkTAACEpA49Qn7nnXdUW1urF198UbNnz9by5cs7cvcAAISsDj1CLi0t1ZgxYyRJw4YN0/79+zty961i8pmcajyV5/5t6IymzycxY0cwfT7J/BlNn08yf0bT55O6fjabZVlWR+1s4cKFGj9+vMaOHStJGjdunN555x3Z7ZyhEwCAi+nQp6xjY2Pldrv9X/t8PmIMAEArdGiQhw8fruLiYknS3r17NWjQoI7cPQAAIatDn7JueJf1X/7yF1mWpf/8z//UgAEDOmr3AACErA4NMgAACAwnBgEAwAAEGQAAA3TLt0BzRrCu4fV6tWDBApWVlam2tlbTpk3TlVdeqfnz58tms2ngwIFasmSJwsLC9OSTT+qPf/yj7Ha7FixYoLS0tGCPHxIqKio0ceJEPfvss7Lb7ax9F/nlL3+pd999V16vV5MnT1ZGRgZr3wW8Xq/mz5+vsrIyhYWF6eGHH+5Z93urG/r9739vzZs3z7Isy9qzZ4917733Bnmi0PTSSy9Z+fn5lmVZltPptMaOHWv99Kc/tXbu3GlZlmXl5eVZf/jDH6z9+/dbDofD8vl8VllZmTVx4sRgjh0yamtrrX//93+3xo8fbx0+fJi17yI7d+60fvrTn1r19fWWy+Wy/uu//ou17yKbN2+2Zs6caVmWZW3bts267777etTad8unrE04I1hP8O1vf1s/+9nPJEmWZSk8PFwHDhxQRkaGJCkzM1M7duxQaWmpRo8eLZvNpssvv1z19fU6c+ZMMEcPCStWrNAdd9yhSy+9VJJY+y6ybds2DRo0SNOnT9e9996rcePGsfZdpH///qqvr5fP55PL5ZLdbu9Ra98tg+xyuRQbG+v/Ojw8XHV1dUGcKDTFxMQoNjZWLpdLM2fOVG5urizLks1m819eXV3d5M+jYTsC98orrygxMdH/wFMSa99FnE6n9u/fryeeeEIPPvig5syZw9p3kejoaJWVlWnChAnKy8uTw+HoUWvfLV9D5oxgXef48eOaPn26cnJydOutt2rVqlX+y9xut+Lj45v8ebjdbsXFxQVj3JDx8ssvy2az6f3339fBgwc1b968RkcArH3n6dOnj1JSUhQZGamUlBRFRUXpxIkT/stZ+87z/PPPa/To0Zo9e7aOHz+uH//4x/J6vf7LQ33tu+URMmcE6xqnT5/W1KlTdf/99+u2226TJA0ZMkQlJSWSpOLiYo0YMULDhw/Xtm3b5PP59MUXX8jn8ykxMTGYo3d7GzZsUGFhodavX6+rrrpKK1asUGZmJmvfBdLT07V161ZZlqWTJ0/qyy+/1KhRo1j7LhAfH+8Pa+/evVVXV9ejfud0yxODcEawrpGfn6+33npLKSkp/m0LFy5Ufn6+vF6vUlJSlJ+fr/DwcBUUFKi4uFg+n08PPPCARowYEcTJQ4vD4dDSpUsVFhamvLw81r4LrFy5UiUlJbIsS7NmzVLfvn1Z+y7gdru1YMEClZeXy+v16kc/+pFSU1N7zNp3yyADABBquuVT1gAAhBqCDACAAQgyAAAGIMgAABiAIAMAYACCDACAAQgyAAAGIMgAABjg/wB2Dfe2Qw2ZGgAAAABJRU5ErkJggg==\n", "text/plain": [ "<Figure size 576x432 with 1 Axes>" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "#DC distrubution\n", "# Creating Histogram based on DC attribute \n", "histogram_plot(db['DC'], title = \"DC distribution\")\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [], "source": [ "# Separating the features and labels into X and Y\n", "X = db.iloc[:,[0,1,2,3,4,5,6,7,8,9,10,11]].values\n", "Y = db.iloc[:, 11].values" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [], "source": [ "# Separating the test and training set\n", "train_x, test_x, train_y, test_y = train_test_split(X,Y, test_size=0.3, random_state = 9)\n", "mse_values = []\n", "variance_score = []" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "After all the data cleaning and modifications, we have our training and test sets ready. They can be easily consumed by the algorithm of our choice" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.6" } }, "nbformat": 4, "nbformat_minor": 4 }