diff --git a/notebooks/3.1 Exploratory data analysis I.ipynb b/notebooks/3.1 Exploratory data analysis I.ipynb
deleted file mode 100644
index a2d9f86..0000000
--- a/notebooks/3.1 Exploratory data analysis I.ipynb
+++ /dev/null
@@ -1,1653 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# 3.1 Basic of exploratory data analysis with š¼\n",
- "\n",
- "In this notebook, we will focus on another essential skill in data analysis, namely the ability to get insights about a dataset by means of plotting and summary statistics."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### `describe()`"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 383,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "
\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " week_day \n",
- " day_hour \n",
- " n_mentions \n",
- " year \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " count \n",
- " 2819.000000 \n",
- " 2819.000000 \n",
- " 2819.000000 \n",
- " 2819.000000 \n",
- " \n",
- " \n",
- " mean \n",
- " 3.196169 \n",
- " 12.782547 \n",
- " 0.807733 \n",
- " 2014.777226 \n",
- " \n",
- " \n",
- " std \n",
- " 1.946637 \n",
- " 7.611198 \n",
- " 0.859091 \n",
- " 1.687017 \n",
- " \n",
- " \n",
- " min \n",
- " 0.000000 \n",
- " 0.000000 \n",
- " 0.000000 \n",
- " 2010.000000 \n",
- " \n",
- " \n",
- " 25% \n",
- " 2.000000 \n",
- " 5.000000 \n",
- " 0.000000 \n",
- " 2013.000000 \n",
- " \n",
- " \n",
- " 50% \n",
- " 3.000000 \n",
- " 15.000000 \n",
- " 1.000000 \n",
- " 2015.000000 \n",
- " \n",
- " \n",
- " 75% \n",
- " 5.000000 \n",
- " 19.000000 \n",
- " 1.000000 \n",
- " 2016.000000 \n",
- " \n",
- " \n",
- " max \n",
- " 6.000000 \n",
- " 23.000000 \n",
- " 6.000000 \n",
- " 2017.000000 \n",
- " \n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " week_day day_hour n_mentions year\n",
- "count 2819.000000 2819.000000 2819.000000 2819.000000\n",
- "mean 3.196169 12.782547 0.807733 2014.777226\n",
- "std 1.946637 7.611198 0.859091 1.687017\n",
- "min 0.000000 0.000000 0.000000 2010.000000\n",
- "25% 2.000000 5.000000 0.000000 2013.000000\n",
- "50% 3.000000 15.000000 1.000000 2015.000000\n",
- "75% 5.000000 19.000000 1.000000 2016.000000\n",
- "max 6.000000 23.000000 6.000000 2017.000000"
- ]
- },
- "execution_count": 383,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# the default behavior is to include only\n",
- "# column with numerical values\n",
- "\n",
- "\n",
- "df.describe()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# in this case fails as pandas does not know\n",
- "# how to handle a column with values of type list (fair enough)\n",
- "\n",
- "df.describe(include='all')"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 386,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " created_at \n",
- " week_day \n",
- " day_hour \n",
- " n_mentions \n",
- " year \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " count \n",
- " 2819 \n",
- " 2819.000000 \n",
- " 2819.000000 \n",
- " 2819.000000 \n",
- " 2819.000000 \n",
- " \n",
- " \n",
- " unique \n",
- " 2819 \n",
- " NaN \n",
- " NaN \n",
- " NaN \n",
- " NaN \n",
- " \n",
- " \n",
- " top \n",
- " 2013-02-21 06:48:55 \n",
- " NaN \n",
- " NaN \n",
- " NaN \n",
- " NaN \n",
- " \n",
- " \n",
- " freq \n",
- " 1 \n",
- " NaN \n",
- " NaN \n",
- " NaN \n",
- " NaN \n",
- " \n",
- " \n",
- " first \n",
- " 2010-06-04 18:31:57 \n",
- " NaN \n",
- " NaN \n",
- " NaN \n",
- " NaN \n",
- " \n",
- " \n",
- " last \n",
- " 2017-04-05 14:56:29 \n",
- " NaN \n",
- " NaN \n",
- " NaN \n",
- " NaN \n",
- " \n",
- " \n",
- " mean \n",
- " NaN \n",
- " 3.196169 \n",
- " 12.782547 \n",
- " 0.807733 \n",
- " 2014.777226 \n",
- " \n",
- " \n",
- " std \n",
- " NaN \n",
- " 1.946637 \n",
- " 7.611198 \n",
- " 0.859091 \n",
- " 1.687017 \n",
- " \n",
- " \n",
- " min \n",
- " NaN \n",
- " 0.000000 \n",
- " 0.000000 \n",
- " 0.000000 \n",
- " 2010.000000 \n",
- " \n",
- " \n",
- " 25% \n",
- " NaN \n",
- " 2.000000 \n",
- " 5.000000 \n",
- " 0.000000 \n",
- " 2013.000000 \n",
- " \n",
- " \n",
- " 50% \n",
- " NaN \n",
- " 3.000000 \n",
- " 15.000000 \n",
- " 1.000000 \n",
- " 2015.000000 \n",
- " \n",
- " \n",
- " 75% \n",
- " NaN \n",
- " 5.000000 \n",
- " 19.000000 \n",
- " 1.000000 \n",
- " 2016.000000 \n",
- " \n",
- " \n",
- " max \n",
- " NaN \n",
- " 6.000000 \n",
- " 23.000000 \n",
- " 6.000000 \n",
- " 2017.000000 \n",
- " \n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " created_at week_day day_hour n_mentions \\\n",
- "count 2819 2819.000000 2819.000000 2819.000000 \n",
- "unique 2819 NaN NaN NaN \n",
- "top 2013-02-21 06:48:55 NaN NaN NaN \n",
- "freq 1 NaN NaN NaN \n",
- "first 2010-06-04 18:31:57 NaN NaN NaN \n",
- "last 2017-04-05 14:56:29 NaN NaN NaN \n",
- "mean NaN 3.196169 12.782547 0.807733 \n",
- "std NaN 1.946637 7.611198 0.859091 \n",
- "min NaN 0.000000 0.000000 0.000000 \n",
- "25% NaN 2.000000 5.000000 0.000000 \n",
- "50% NaN 3.000000 15.000000 1.000000 \n",
- "75% NaN 5.000000 19.000000 1.000000 \n",
- "max NaN 6.000000 23.000000 6.000000 \n",
- "\n",
- " year \n",
- "count 2819.000000 \n",
- "unique NaN \n",
- "top NaN \n",
- "freq NaN \n",
- "first NaN \n",
- "last NaN \n",
- "mean 2014.777226 \n",
- "std 1.687017 \n",
- "min 2010.000000 \n",
- "25% 2013.000000 \n",
- "50% 2015.000000 \n",
- "75% 2016.000000 \n",
- "max 2017.000000 "
- ]
- },
- "execution_count": 386,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# that's a workaround to include all other columns\n",
- "\n",
- "df.describe(exclude=[list])"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 320,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "count 2819\n",
- "unique 2819\n",
- "top 2013-02-21 06:48:55\n",
- "freq 1\n",
- "first 2010-06-04 18:31:57\n",
- "last 2017-04-05 14:56:29\n",
- "Name: created_at, dtype: object"
- ]
- },
- "execution_count": 320,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df.created_at.describe()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 387,
- "metadata": {},
- "outputs": [],
- "source": [
- "df['week_day_name'] = df['week_day_name'].astype('category')"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 389,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " created_at \n",
- " week_day \n",
- " day_hour \n",
- " n_mentions \n",
- " year \n",
- " week_day_name \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " count \n",
- " 2819 \n",
- " 2819.000000 \n",
- " 2819.000000 \n",
- " 2819.000000 \n",
- " 2819.000000 \n",
- " 2819 \n",
- " \n",
- " \n",
- " unique \n",
- " 2819 \n",
- " NaN \n",
- " NaN \n",
- " NaN \n",
- " NaN \n",
- " 7 \n",
- " \n",
- " \n",
- " top \n",
- " 2013-02-21 06:48:55 \n",
- " NaN \n",
- " NaN \n",
- " NaN \n",
- " NaN \n",
- " Friday \n",
- " \n",
- " \n",
- " freq \n",
- " 1 \n",
- " NaN \n",
- " NaN \n",
- " NaN \n",
- " NaN \n",
- " 530 \n",
- " \n",
- " \n",
- " first \n",
- " 2010-06-04 18:31:57 \n",
- " NaN \n",
- " NaN \n",
- " NaN \n",
- " NaN \n",
- " NaN \n",
- " \n",
- " \n",
- " last \n",
- " 2017-04-05 14:56:29 \n",
- " NaN \n",
- " NaN \n",
- " NaN \n",
- " NaN \n",
- " NaN \n",
- " \n",
- " \n",
- " mean \n",
- " NaN \n",
- " 3.196169 \n",
- " 12.782547 \n",
- " 0.807733 \n",
- " 2014.777226 \n",
- " NaN \n",
- " \n",
- " \n",
- " std \n",
- " NaN \n",
- " 1.946637 \n",
- " 7.611198 \n",
- " 0.859091 \n",
- " 1.687017 \n",
- " NaN \n",
- " \n",
- " \n",
- " min \n",
- " NaN \n",
- " 0.000000 \n",
- " 0.000000 \n",
- " 0.000000 \n",
- " 2010.000000 \n",
- " NaN \n",
- " \n",
- " \n",
- " 25% \n",
- " NaN \n",
- " 2.000000 \n",
- " 5.000000 \n",
- " 0.000000 \n",
- " 2013.000000 \n",
- " NaN \n",
- " \n",
- " \n",
- " 50% \n",
- " NaN \n",
- " 3.000000 \n",
- " 15.000000 \n",
- " 1.000000 \n",
- " 2015.000000 \n",
- " NaN \n",
- " \n",
- " \n",
- " 75% \n",
- " NaN \n",
- " 5.000000 \n",
- " 19.000000 \n",
- " 1.000000 \n",
- " 2016.000000 \n",
- " NaN \n",
- " \n",
- " \n",
- " max \n",
- " NaN \n",
- " 6.000000 \n",
- " 23.000000 \n",
- " 6.000000 \n",
- " 2017.000000 \n",
- " NaN \n",
- " \n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " created_at week_day day_hour n_mentions \\\n",
- "count 2819 2819.000000 2819.000000 2819.000000 \n",
- "unique 2819 NaN NaN NaN \n",
- "top 2013-02-21 06:48:55 NaN NaN NaN \n",
- "freq 1 NaN NaN NaN \n",
- "first 2010-06-04 18:31:57 NaN NaN NaN \n",
- "last 2017-04-05 14:56:29 NaN NaN NaN \n",
- "mean NaN 3.196169 12.782547 0.807733 \n",
- "std NaN 1.946637 7.611198 0.859091 \n",
- "min NaN 0.000000 0.000000 0.000000 \n",
- "25% NaN 2.000000 5.000000 0.000000 \n",
- "50% NaN 3.000000 15.000000 1.000000 \n",
- "75% NaN 5.000000 19.000000 1.000000 \n",
- "max NaN 6.000000 23.000000 6.000000 \n",
- "\n",
- " year week_day_name \n",
- "count 2819.000000 2819 \n",
- "unique NaN 7 \n",
- "top NaN Friday \n",
- "freq NaN 530 \n",
- "first NaN NaN \n",
- "last NaN NaN \n",
- "mean 2014.777226 NaN \n",
- "std 1.687017 NaN \n",
- "min 2010.000000 NaN \n",
- "25% 2013.000000 NaN \n",
- "50% 2015.000000 NaN \n",
- "75% 2016.000000 NaN \n",
- "max 2017.000000 NaN "
- ]
- },
- "execution_count": 389,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df.describe(exclude=['object'])"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Plotting"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 334,
- "metadata": {},
- "outputs": [],
- "source": [
- "%matplotlib inline\n",
- "\n",
- "import matplotlib.pyplot as plt"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "#### Histograms\n",
- "\n",
- "They are useful to see the distribution of a certain variable in your dataset."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 184,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " text \n",
- " \n",
- " \n",
- " n_mentions \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " 0 \n",
- " 1145 \n",
- " \n",
- " \n",
- " 1 \n",
- " 1231 \n",
- " \n",
- " \n",
- " 2 \n",
- " 329 \n",
- " \n",
- " \n",
- " 3 \n",
- " 78 \n",
- " \n",
- " \n",
- " 4 \n",
- " 28 \n",
- " \n",
- " \n",
- " 5 \n",
- " 6 \n",
- " \n",
- " \n",
- " 6 \n",
- " 2 \n",
- " \n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " text\n",
- "n_mentions \n",
- "0 1145\n",
- "1 1231\n",
- "2 329\n",
- "3 78\n",
- "4 28\n",
- "5 6\n",
- "6 2"
- ]
- },
- "execution_count": 184,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df.groupby(['n_mentions'])[['text']].count()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 185,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "image/png": "iVBORw0KGgoAAAANSUhEUgAAAmoAAAGDCAYAAACbcTyoAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAgAElEQVR4nO3deZxlZX3n8c/XbhBwAZWWYW+iqEHi2qIGNUSMAXGCM0HFMQaVhJhxi2gUoxl3JWPGJTMuIYKgISBBHVGJSlwAF5aGIKtLD6J0g3Qji6Ag22/+OE/Ltajuqu6uqvt09ef9et1XnXuW5/zOqXvrfuuc59yTqkKSJEn9ude4C5AkSdLkDGqSJEmdMqhJkiR1yqAmSZLUKYOaJElSpwxqkiRJnTKoSUCSjyb52xlqa5ckNydZ0J5/I8mfzUTbrb1/S3LITLW3Dut9Z5Jrk/x0mvO/Nck/z3ZdMy3JsUneOaZ1J8nHk1yf5Jxx1LA2SS5Jss+465A2JQY1zXtJrkhyS5KbktyQ5NtJXpbk16//qnpZVb1jmm09Y23zVNVPquq+VXXnDNR+j7BTVftX1XEb2vY61rEL8Fpgj6r6T5NM3yfJ8rmsaZ56CvAHwE5Vtdc4C5kssFbVI6vqG2MqacaN45+JJIuTVJKFc7lebbwMatpU/Oequh+wK3Ak8Abg6JleyTz+47sL8LOqWjnuQjYmq4+qroNdgSuq6hezUc+mbB6/NzXfVZUPH/P6AVwBPGPCuL2Au4A92/NjgXe24W2BLwA3ANcBZzL8U/PJtswtwM3A64HFQAGHAj8BzhgZt7C19w3gPcA5wM+BzwEPbNP2AZZPVi+wH3AbcHtb33dH2vuzNnwv4M3Aj4GVwCeArdu01XUc0mq7FnjTWvbT1m35Va29N7f2n9G2+a5Wx7ETlrvPhOk3AzsAbwVOam3eBFwCLBlZbgfg0219PwJetZbajgU+BHyxtXU28JAJ27lwZP7RffRi4FvA+9vv9HLgd9v4K9t+O2TCuj4KnNbWdTqw68j0R7Rp1wHfB543YdmPAKcCv2DC625ku09pyy8D/ryNPxS4Fbiz7cO3TbLsum7LvYG/b7//a9p2bTn62mM4UroSuBp4SZt2GMPr7rZWy+cnvpda2x8ArmqPDwD3nqrtNv1ZwKVt/64AXreG3/vq7f0/wI3A94B9J7xmj27trwDeCSyYZF/9jPb+Hln2Hu8v4PeBi0bmOQ04d+T5mcBzpnr9MrxvjgD+X1v3Sdz9nv8Jw+t19XvlyeP+G+mj78fYC/DhY7YfTBLU2vifAH/Zho/l7qD2nvaBtll7PBXIZG1xd0j4BENg2ZLJg9oKYM82z6eBf27T9mENQa0Nv3X1vCPTv8HdIeSlDB/2vwXcF/gM8MkJtf1Tq+vRwK+A317DfvoEQ4i8X1v2B8Cha6pzwrKTbcdbGYLHs4AFbb+e1abdCzgP+B/A5q3+y4E/XEP7x7YPvL2AhcDxwIkTtnNtQe0O4CWtjne23/2HGMLGMxkCw31H1nUT8LQ2/YPAN9u0+zAEope0Oh7LEID3GFn2RmDvto1bTLItZwAfBrYAHsPwQf/0kVq/uZb9vK7b8n6GUPjA9nv9PPCekd/ZHcDbGV7nzwJ+CTxg4ntiDa/NtwNnAQ8GFgHfBt4xzbavBp7ahh8APG6K7X1Na+f5bf+uDj2fBf6x/V4ezPDP0F9MWPaV7Xe15STtv5WR9xfD++RWhn/WNmMItyvavtuS4R+SBzHF6xd4dds3O7Xfyz8CJ6zp9erDx9oenvrUpuwqhg+wiW4Htmc4inJ7VZ1ZVVPdFPetVfWLqrplDdM/WVUX13BK62+B563HabHJvBB4X1VdXlU3A28EDp5wmudtVXVLVX2X4ajBoyc20mo5GHhjVd1UVVcA/wt40QbW982qOrWG/nqfHFn3E4BFVfX2qrqtqi5nCJQHr6Wtz1bVOVV1B0NQe8w61PGjqvp4q+NTwM7A26vqV1X1FYYjKw8dmf+LVXVGVf0KeBPw5CQ7A89mODX58aq6o6r+gyF4P3dk2c9V1beq6q6qunW0iNbG3sAbqurWqroA+BjwpzO9LUnCcGTsNVV1XVXdBLyb39zHt7dlb6+qUxmO8Dx8mnW8sC27sqpWAW/jN18va2v7dmCPJPevquur6vy1rGcl8IHWzqcYjmIekGQ7hgD4V+29t5IhmI5u31VV9b/b72pN781fa/OcyxDSH8/wfvkWw+/sScAPq+pnTP36fRnD0evl7TX0VuAgT79qffii0aZsR4bTTxO9l+EP61eGzzqOqqojp2jrynWY/mOG/9a3nV6Za7VDa2+07YXAdiPjRq/S/CXDkbeJVh9BmNjWjhtY38R1b9E+rHYFdkhyw8j0BQynlqbb1mTbsSbXjAzfAlBVE8eNtvfr31dV3ZzkOoZ9vSvwxAl1L2QIofdYdhI7AKtD02o/BpZMZyOa6W7LImAr4Lz2OgYIw35e7Wct+K62Lvt1stfeDtNs+48ZTq0fmeRC4Iiq+s4a1rNiwj9Kq9ezK8Nr9uqR7bsXv7n/p3pfTuZ07j51ezpwPfB7DEejT2/zTPX63RX4bJK7RqbfyW++L6VpMahpk5TkCQwh5JsTp7UP0dcCr02yJ/C1JOdW1VcZTllMZqojbjuPDO/CcEThWoZ+TFuN1LWA4QN2uu1exfChMNr2HQwf5jtNseyoa1tNuzL0HVrd1oppLj9VnRNdyXBkaPd1XG4yqzveb8XQBxDgHlemrqNf/76S3JfhyOtVDHWfXlV/sJZl17YvrgIemOR+I2FtXfbzuriWIbQ9sqrWp/3pvvYuac93aeOmbrjqXODAJJsBr2Dow7XzGmbfMUlGwtouDKdzr2QIT9tOCITrsg2TTT+d4WjyTxguPLqe4WjZrxhOMcPUr98rgZdW1bcmTkiy6yTzS2vkqU9tUpLcP8mzgRMZ+qZcNMk8z06y+tTRjQz/Ca/+z/gahv4o6+pPkuyRZCuGfjsnt1NXP2A4ynRA+9B6M0OfltWuARaPfpXIBCcAr0myWwsU7wY+tZYPrkm1Wk4C3pXkfu3D5HBgul9dcA3woCRbT3P+c4CbkrwhyZZJFiTZswXoddJOu61g2McLkrwUeMi6tjPBs5I8JcnmwDsY+tZdyXCRycOSvCjJZu3xhCS/Pc1ar2Toy/WeJFskeRTDRQQz/hURVXUXQ8B4f5IHAyTZMckfTrOJqV7rJwBvTrIoybYM/bWm3I4kmyd5YZKtq+p2hnB911oWeTDwqravnwv8NnBqVV0NfAX4X+19fa8kD0nye9PcPpj8/fVthlO0ewHnVNUltCOpDP0LYerX70cZ3ku7tm1elOTANm1V2971+TuiTZBBTZuKzye5ieE/3TcB72PokD2Z3YF/Z+hT8x3gw1X19TbtPQwfTjcked06rP+TDJ2zf8rQifxVAFV1I/DfGfoprWA4OjT6fWT/2n7+LMlk/XiOaW2fwXDl2a0MnafXxyvb+i9nONL4L639KVXV9xg+uC9v+2aHKea/k6G/12Na3dcy7IPpBr2J/hz4a4YLDh7J8GG7If4FeAvDqfHHA38Cvz7a+kyGvkhXMfw+/47fDNdTeQFDh/KrGDrDv6Wq/n0D612TNzBcbHJWkp8zvK6n2wftaIZ+ZDck+b+TTH8nsBS4ELgIOL+Nm44XAVe0ml7G0N9tTc5meE9eC7wLOKj1E4Ohb9/mDEeBrwdOZuhfOl33eH+1fqTnA5dU1W1t+neAH7d+cNN5/X6Q4ajfV9rfnbMYgh5V9cu2Hd9q+/ZJ61CvNkGpKftIS5I095K8mOHq3aeMuxZpXDyiJkmS1CmDmiRJUqc89SlJktQpj6hJkiR1yqAmSZLUqXn5hbfbbrttLV68eNxlSJIkTem88867tqoWTTZtXga1xYsXs3Tp0nGXIUmSNKUkP17TNE99SpIkdcqgJkmS1CmDmiRJUqcMapIkSZ0yqEmSJHXKoCZJktQpg5okSVKnDGqSJEmdMqhJkiR1yqAmSZLUKYOaJElSpwxqkiRJnTKoSZIkdWrhuAuQFh/xxXWa/4ojD5ilSiRJ6otH1CRJkjplUJMkSeqUQU2SJKlTBjVJkqROzVpQS3JMkpVJLh4Z994k30tyYZLPJtlmZNobkyxL8v0kfzgyfr82blmSI2arXkmSpN7M5hG1Y4H9Jow7Ddizqh4F/AB4I0CSPYCDgUe2ZT6cZEGSBcCHgP2BPYAXtHklSZLmvVkLalV1BnDdhHFfqao72tOzgJ3a8IHAiVX1q6r6EbAM2Ks9llXV5VV1G3Bim1eSJGneG2cftZcC/9aGdwSuHJm2vI1b0/h7SHJYkqVJlq5atWoWypUkSZpbYwlqSd4E3AEcP1NtVtVRVbWkqpYsWrRoppqVJEkamzm/M0GSFwPPBvatqmqjVwA7j8y2UxvHWsZLkiTNa3N6RC3JfsDrgT+qql+OTDoFODjJvZPsBuwOnAOcC+yeZLckmzNccHDKXNYsSZI0LrN2RC3JCcA+wLZJlgNvYbjK897AaUkAzqqql1XVJUlOAi5lOCX68qq6s7XzCuDLwALgmKq6ZLZqliRJ6smsBbWqesEko49ey/zvAt41yfhTgVNnsDRJkqSNgncmkCRJ6pRBTZIkqVMGNUmSpE4Z1CRJkjplUJMkSeqUQU2SJKlTBjVJkqROGdQkSZI6Nef3+pxPFh/xxXWa/4ojD5ilSiRJ0nzkETVJkqROGdQkSZI6ZVCTJEnqlEFNkiSpUwY1SZKkThnUJEmSOmVQkyRJ6pRBTZIkqVMGNUmSpE4Z1CRJkjplUJMkSeqUQU2SJKlTBjVJkqROGdQkSZI6ZVCTJEnqlEFNkiSpUwY1SZKkThnUJEmSOmVQkyRJ6pRBTZIkqVMGNUmSpE4Z1CRJkjplUJMkSeqUQU2SJKlTBjVJkqROGdQkSZI6ZVCTJEnqlEFNkiSpUwY1SZKkThnUJEmSOmVQkyRJ6pRBTZIkqVOzFtSSHJNkZZKLR8Y9MMlpSX7Yfj6gjU+Sf0iyLMmFSR43sswhbf4fJjlktuqVJEnqzWweUTsW2G/CuCOAr1bV7sBX23OA/YHd2+Mw4CMwBDvgLcATgb2At6wOd5IkSfPdrAW1qjoDuG7C6AOB49rwccBzRsZ/ogZnAdsk2R74Q+C0qrquqq4HTuOe4U+SJGlemus+attV1dVt+KfAdm14R+DKkfmWt3FrGn8PSQ5LsjTJ0lWrVs1s1ZIkSWMwtosJqqqAmsH2jqqqJVW1ZNGiRTPVrCRJ0tjMdVC7pp3SpP1c2cavAHYemW+nNm5N4yVJkua9uQ5qpwCrr9w8BPjcyPg/bVd/Pgm4sZ0i/TLwzCQPaBcRPLONkyRJmvcWzlbDSU4A9gG2TbKc4erNI4GTkhwK/Bh4Xpv9VOBZwDLgl8BLAKrquiTvAM5t8729qiZeoCBJkjQvzVpQq6oXrGHSvpPMW8DL19DOMcAxM1iaJEnSRsE7E0iSJHXKoCZJktQpg5okSVKnDGqSJEmdMqhJkiR1yqAmSZLUKYOaJElSpwxqkiRJnTKoSZIkdcqgJkmS1CmDmiRJUqcMapIkSZ0yqEmSJHXKoCZJktQpg5okSVKnDGqSJEmdMqhJkiR1yqAmSZLUKYOaJElSpwxqkiRJnTKoSZIkdcqgJkmS1CmDmiRJUqcMapIkSZ0yqEmSJHXKoCZJktQpg5okSVKnDGqSJEmdMqhJkiR1yqAmSZLUKYOaJElSpwxqkiRJnTKoSZIkdcqgJkmS1CmDmiRJUqcMapIkSZ0yqEmSJHXKoCZJktQpg5okSVKnDGqSJEmdMqhJkiR1aixBLclrklyS5OIkJyTZIsluSc5OsizJp5Js3ua9d3u+rE1fPI6aJUmS5tqcB7UkOwKvApZU1Z7AAuBg4O+A91fVQ4HrgUPbIocC17fx72/zSZIkzXvjOvW5ENgyyUJgK+Bq4OnAyW36ccBz2vCB7Tlt+r5JMoe1SpIkjcWcB7WqWgH8PfAThoB2I3AecENV3dFmWw7s2IZ3BK5sy97R5n/QXNYsSZI0DuM49fkAhqNkuwE7APcB9puBdg9LsjTJ0lWrVm1oc5IkSWM3jlOfzwB+VFWrqup24DPA3sA27VQowE7Aija8AtgZoE3fGvjZxEar6qiqWlJVSxYtWjTb2yBJkjTrxhHUfgI8KclWra/ZvsClwNeBg9o8hwCfa8OntOe06V+rqprDeiVJksZiHH3Uzma4KOB84KJWw1HAG4DDkyxj6IN2dFvkaOBBbfzhwBFzXbMkSdI4LJx6lplXVW8B3jJh9OXAXpPMeyvw3LmoS5IkqSfemUCSJKlTBjVJkqROGdQkSZI6ZVCTJEnqlEFNkiSpUwY1SZKkThnUJEmSOmVQkyRJ6pRBTZIkqVMGNUmSpE4Z1CRJkjplUJMkSeqUQU2SJKlTBjVJkqROGdQkSZI6ZVCTJEnqlEFNkiSpUwY1SZKkTk0Z1JK8J8n9kyxM8uUk1yT5b3NRnCRJ0qZsOkfU9q+qnwPPBq4CHgG8YVarkiRJ0rSC2sL281nAv1bV9UDNXkmSJEmCu0PY2vxbkouBO4GXJ9kW+NXsliVJkqTpHFF7I/B04PFVdTtwK/DHs1qVJEmSphXUzqmqlVV1B0BV3QycMrtlSZIkaY2nPpM8GNge2DLJ7wBpk+4PbDUHtUmSJG3S1tZH7QDgpcBOwIdHxv8c+NvZLEqSJElrCWpV9XHg40meV1UnzWFNkiRJYnp91L6R5B+TfAEgyR5JXjy7ZUmSJGk6Qe3jwOnAzu35D4HXzlpFkiRJAqYX1B5cVf8C3AXQvqLjrlmtSpIkSdMKar9I8kDa3QiSPIHhggJJkiTNouncmeB1wOeB30pyOrAjcNCsViVJkqSpg1pVLU3y+8BvM3yX2qVVddusVyZJkrSJm/LUZ5ItgcOBv6yqC4Bdkuw/65VJkiRt4qbTR+2YNt9T2vOrgHfPWkWSJEkCphfUdq+qdwO3A1TVL7n7dlKSJEmaJdMJarcl2YK7r/rcDbCPmiRJ0iybzlWfbwe+BOyU5Djg94BDZ7UqSZIkTeuqzy8lOQ/4XYZTnn9dVStnvTJJkqRN3JRBLcmxDLeQOrOqls16RZIkSQKm10fteGA34J+S/L8kn0ry8lmuS5IkaZM3nVOfpyX5d+DxwL7Ay9vwh2a5NkmSpE3adL7w9svAd4BDgB8BT6qqh27ISpNsk+TkJN9LclmSJyd5YJLTkvyw/XxAmzdJ/iHJsiQXJnnchqxbkiRpYzGdU58/AO4AdgceBjw0yeYbuN4PAl+qqkcAjwYuA44AvlpVuwNfbc8B9m/r3h04DPjIBq5bkiRpozBlUKuqV1bVU4DnAzcCn2w/10uSrYGnAUe39m+rqhuAA4Hj2mzHAc9pwwcCn6jBWcA2SbZf3/VLkiRtLNbYRy3Jwqq6I8nLgKcCT2C4fdQngDM3YJ27AauAjyd5NHAe8Gpgu6q6us3zU2C7NrwjcOXI8svbuKtHxpHkMIYjbuyyyy4bUJ4kSVIf1nZE7Zz2cxvgw8CeVbVPVf1tVX1lA9a5EHgc8JGqeizwC+4+zQlAVRXtTgjTVVVHVdWSqlqyaNGiDShPkiSpD2sLagGoqiOr6ltVNVO3jVoOLK+qs9vzkxmC2zWrT2m2n6u/VHcFsPPI8ju1cZIkSfPa2r6eY1GSw9c0saretz4rrKqfJrkyycOr6vsMX/lxaXscAhzZfn6uLXIK8IokJwJPBG4cOUUqSZI0b60tqC0A7ks7sjbDXgkc364evRx4CcPRvZOSHAr8GHhem/dU4FnAMuCXbV5JkqR5b21B7eqqevtsrLSqLgCWTDJp30nmLYYv2ZUkSdqkTNlHTZIkSeOxtqB2j6NbkiRJmjtrDGpVdd1cFiJJkqTfNJ1bSEmSJGkMDGqSJEmdMqhJkiR1yqAmSZLUKYOaJElSpwxqkiRJnTKoSZIkdcqgJkmS1CmDmiRJUqcMapIkSZ0yqEmSJHXKoCZJktQpg5okSVKnDGqSJEmdMqhJkiR1yqAmSZLUKYOaJElSpwxqkiRJnTKoSZIkdcqgJkmS1CmDmiRJUqcMapIkSZ0yqEmSJHXKoCZJktQpg5okSVKnDGqSJEmdMqhJkiR1yqAmSZLUKYOaJElSpwxqkiRJnTKoSZIkdcqgJkmS1CmDmiRJUqcMapIkSZ0yqEmSJHXKoCZJktQpg5okSVKnDGqSJEmdGltQS7IgyX8k+UJ7vluSs5MsS/KpJJu38fduz5e16YvHVbMkSdJcGucRtVcDl408/zvg/VX1UOB64NA2/lDg+jb+/W0+SZKkeW8sQS3JTsABwMfa8wBPB05usxwHPKcNH9ie06bv2+aXJEma18Z1RO0DwOuBu9rzBwE3VNUd7flyYMc2vCNwJUCbfmObX5IkaV6b86CW5NnAyqo6b4bbPSzJ0iRLV61aNZNNS5IkjcU4jqjtDfxRkiuAExlOeX4Q2CbJwjbPTsCKNrwC2BmgTd8a+NnERqvqqKpaUlVLFi1aNLtbIEmSNAfmPKhV1RuraqeqWgwcDHytql4IfB04qM12CPC5NnxKe06b/rWqqjksWZIkaSx6+h61NwCHJ1nG0Aft6Db+aOBBbfzhwBFjqk+SJGlOLZx6ltlTVd8AvtGGLwf2mmSeW4HnzmlhkiRJHejpiJokSZJGGNQkSZI6ZVCTJEnqlEFNkiSpUwY1SZKkThnUJEmSOmVQkyRJ6pRBTZIkqVMGNUmSpE4Z1CRJkjplUJMkSeqUQU2SJKlTBjVJkqROLRx3AZJmzuIjvrhO819x5AGzVIkkaSZ4RE2SJKlTBjVJkqROGdQkSZI6ZVCTJEnqlEFNkiSpUwY1SZKkThnUJEmSOmVQkyRJ6pRBTZIkqVMGNUmSpE4Z1CRJkjplUJMkSeqUQU2SJKlTBjVJkqROGdQkSZI6ZVCTJEnqlEFNkiSpUwY1SZKkThnUJEmSOmVQkyRJ6pRBTZIkqVMGNUmSpE4Z1CRJkjplUJMkSeqUQU2SJKlTBjVJkqROGdQkSZI6ZVCTJEnq1JwHtSQ7J/l6kkuTXJLk1W38A5OcluSH7ecD2vgk+Ycky5JcmORxc12zJEnSOIzjiNodwGurag/gScDLk+wBHAF8tap2B77angPsD+zeHocBH5n7kiVJkubenAe1qrq6qs5vwzcBlwE7AgcCx7XZjgOe04YPBD5Rg7OAbZJsP8dlS5Ikzbmx9lFLshh4LHA2sF1VXd0m/RTYrg3vCFw5stjyNm5iW4clWZpk6apVq2atZkmSpLkytqCW5L7Ap4G/qqqfj06rqgJqXdqrqqOqaklVLVm0aNEMVipJkjQeYwlqSTZjCGnHV9Vn2uhrVp/SbD9XtvErgJ1HFt+pjZMkSZrXxnHVZ4Cjgcuq6n0jk04BDmnDhwCfGxn/p+3qzycBN46cIpUkSZq3Fo5hnXsDLwIuSnJBG/c3wJHASUkOBX4MPK9NOxV4FrAM+CXwkrktV5IkaTzmPKhV1TeBrGHyvpPMX8DLZ7UoSZKkDnlnAkmSpE4Z1CRJkjplUJMkSeqUQU2SJKlTBjVJkqROGdQkSZI6ZVCTJEnqlEFNkiSpUwY1SZKkThnUJEmSOmVQkyRJ6pRBTZIkqVMGNUmSpE4Z1CRJkjplUJMkSeqUQU2SJKlTBjVJkqROGdQkSZI6ZVCTJEnqlEFNkiSpUwY1SZKkThnUJEmSOmVQkyRJ6pRBTZIkqVMGNUmSpE4tHHcBktSjxUd8cdrzXnHkAbNYiaRNmUfUJEmSOmVQkyRJ6pRBTZIkqVMGNUmSpE4Z1CRJkjplUJMkSeqUQU2SJKlTBjVJkqROGdQkSZI6ZVCTJEnqlEFNkiSpUwY1SZKkTnlTdknSrFmXm9uDN7iXJvKImiRJUqcMapIkSZ3aaIJakv2SfD/JsiRHjLseSZKk2bZRBLUkC4APAfsDewAvSLLHeKuSJEmaXRvLxQR7Acuq6nKAJCcCBwKXjrUqSZLmiBdmbJo2lqC2I3DlyPPlwBPHVIskSdpIbWyBN1U11gKmI8lBwH5V9Wft+YuAJ1bVK0bmOQw4rD19OPD9OShtW+DaOVjPpsL9OfPcpzPL/Tnz3Kczz306s+Zif+5aVYsmm7CxHFFbAew88nynNu7Xquoo4Ki5LCrJ0qpaMpfrnM/cnzPPfTqz3J8zz30689ynM2vc+3OjuJgAOBfYPcluSTYHDgZOGXNNkiRJs2qjOKJWVXckeQXwZWABcExVXTLmsiRJkmbVRhHUAKrqVODUcdcxwZyeat0EuD9nnvt0Zrk/Z577dOa5T2fWWPfnRnExgSRJ0qZoY+mjJkmStMkxqK0Hb2c1s5Ick2RlkovHXct8kGTnJF9PcmmSS5K8etw1beySbJHknCTfbfv0beOuaT5IsiDJfyT5wrhrmQ+SXJHkoiQXJFk67nrmgyTbJDk5yfeSXJbkyXNeg6c+1027ndUPgD9g+OLdc4EXVJV3SVhPSZ4G3Ax8oqr2HHc9G7sk2wPbV9X5Se4HnAc8x9fo+ksS4D5VdXOSzYBvAq+uqrPGXNpGLcnhwBLg/lX17HHXs7FLcgWwpKr8DrUZkuQ44Myq+lj71omtquqGuazBI2rr7te3s6qq24DVt7PSeqqqM4Drxl3HfFFVV1fV+W34JuAyhrt7aD3V4Ob2dLP28L/cDZBkJ+AA4GPjrkWaTJKtgacBRwNU1W1zHdLAoLY+JrudlR+C6lKSxcBjgbPHW8nGr52muwBYCZxWVe7TDfMB4PXAXeMuZB4p4CtJzmt369GG2Q1YBXy8naL/WJL7zHURBjVpnkpyX+DTwF9V1c/HXc/GrqrurKrHMNwZZa8knqZfT0meDaysqvPGXcs885SqehywP/Dy1q1E628h8DjgI1X1WOAXwJz3Szeorbspb2cljVvrR/Vp4Piq+sy465lP2qmPrwP7jbuWjdjewB+1PlUnAk9P8s/jLWnjV1Ur2s+VwGcZuupo/S0Hlo8cPT+ZIbjNKYPauvN2Vupa6/h+NHBZVb1v3PXMB0kWJdmmDW/JcDHR98Zb1carqt5YVTtV1WKGv6Ffq6o/GXNZG7Uk92kXD5esqYwAAAVESURBVNFOzz0T8Er6DVBVPwWuTPLwNmpfYM4vytpo7kzQC29nNfOSnADsA2ybZDnwlqo6erxVbdT2Bl4EXNT6VAH8Tbu7h9bP9sBx7arvewEnVZVfKaGebAd8dvg/jYXAv1TVl8Zb0rzwSuD4dmDmcuAlc12AX88hSZLUKU99SpIkdcqgJkmS1CmDmiRJUqcMapIkSZ0yqEmSJHXKoCZpViWp0S8zTbIwyaok6/31Fkn+ZsLzb29IjdNY33OS/I9Zavtvpp5rg9p/cZIdRp6fmGT32VynpJljUJM0234B7Nm+KBaGL4vd0Lt5/Ea4qarf3cD2pvJ64MMb2kiSyb67claDGvBiYIeR5x9h2B5JGwGDmqS5cCpwQBt+AXDC6gntG9WPSXJOu/HxgW38i5N8JsmXkvwwyf9s448EtkxyQZLj27ib288keW+Si5NclOT5bfw+Sb6R5OQk30tyfLuDA0mOTHJpkguT/P3EwpM8DPhVVV3bnh+b5KNJlib5Qbtv5eqbtr83ybmtrb8YWfeZSU5hwreaT9yWJH+d5FVt2vuTfK0NP31kW5+Z5DtJzk/yr+2eriR5fJLT2w25v5xk+yQHAUsYvrDzghaWzwSesYbQKKkzBjVJc+FE4OAkWwCPAs4emfYmhlsI7QX8PvDedgscgMcAzwd+B3h+kp2r6gjglqp6TFW9cMJ6/mtb5tHAM1pb27dpjwX+CtgD+C1g7yQPAv4L8MiqehTwzklq3xs4f8K4xQz3UTwA+GjbrkOBG6vqCcATgD9Pslub/3HAq6vqYaONTLItZwJPbZOXAPdt9219KnBGkm2BNwPPaDffXgoc3ub538BBVfV44BjgXVV1cpvnhW0dt1TVXcCyto8kdc7/qCTNuqq6MMlihqNpE29l9UyGG3S/rj3fAtilDX+1qm4ESHIpsCtw5VpW9RTghKq6E7gmyekMoennwDlVtby1dQFD2DoLuBU4uvWZm6zf3PbAqgnjTmqB54dJLgce0bbjUe0oFsDWwO7AbW3dP1pL3audBzw+yf2BXzEExCUMQe1VwJMYgua32gHBzYHvAA8H9gROa+MXAFevZT0rGU6HnjeNmiSNkUFN0lw5Bfh7hvu6PmhkfIA/rqrvj86c5IkMYWW1O9mwv1n3aKvdu3cvhpstHwS8Anj6hOVuYQhdoybee68YtuOVVfXl0QlJ9mHopzelqro9yY8Y+pV9G7iQ4SjjQ4HLgIcAp1XVCyas43eAS6rqydNZD0MYvmWa80oaI099SporxwBvq6qLJoz/MvDKkT5jj51GW7e3030TnclwinRBkkXA04Bz1tRI69+1dbth/WuY/HTgZQxBadRzk9wryUMYTqN+v23HX66uK8nDRk7hrsu2nAm8DjijDb8M+I8absx8FsMp24e2ddyn9aH7PrAoyZPb+M2SPLK1dxNwvwnrfBhw8TRqkzRmBjVJc6KqllfVP0wy6R3AZsCFSS5pz6dyVJv/+AnjP8twFOq7wNeA11fVT9fSzv2ALyS5EPgmcPgk85wBPHZ1kGx+whAA/w14WVXdCnyM4WKB85NcDPwj0zsCOHFbzmQ43fqdqrqG4dTsmQBVtYrhaNsJrebvAI+oqtsYjgj+XZLvAhcAq6+EPZahH90FSbZMsh1Dv7i17RdJncjwT5okaU2SfBD4fFX9e5JjgS+0jvobnSSvAX5eVUePuxZJU/OImiRN7d3AVuMuYobcABw37iIkTY9H1CRJkjrlETVJkqROGdQkSZI6ZVCTJEnqlEFNkiSpUwY1SZKkThnUJEmSOvX/Ab31rhvd0pIcAAAAAElFTkSuQmCC",
- "text/plain": [
- ""
- ]
- },
- "metadata": {
- "needs_background": "light"
- },
- "output_type": "display_data"
- }
- ],
- "source": [
- "%matplotlib inline\n",
- "plt.figure(figsize=(10, 6))\n",
- "plt.hist(df.n_mentions, bins='auto', rwidth=1.0)\n",
- "plt.title('Distribution of the number of mentions per tweet')\n",
- "plt.ylabel(\"Tweets\")\n",
- "plt.xlabel(\"Mentions (per tweet)\")\n",
- "plt.show()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 187,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "image/png": "iVBORw0KGgoAAAANSUhEUgAAAmQAAAGDCAYAAACFuAwbAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAgAElEQVR4nO3deZwlZX3v8c/XARWXgMjIZR+jGEUTRx0JLklwV/AGcuOCMYpIguaicYtxTExE44JJFGOiJnhRwLgRl4BCjIgI7jgQZNU4wUEYRmaQRVBBlt/9o56BY9Mz3Q1z+unp/rxfr/PqOk9VPedXZ5nznarnVKWqkCRJUj936V2AJEnSQmcgkyRJ6sxAJkmS1JmBTJIkqTMDmSRJUmcGMkmSpM4MZFowkvxzkr/aRH3tmuS6JIva/S8n+aNN0Xfr7z+SHLip+pvB474lyRVJfjTN5Q9L8q/jrmtTS3J0krd0euwk+VCSq5Kc0aOGjUlyfpK9e9chLTQGMs0LSVYl+XmSa5NcneTrSV6a5Nb3eFW9tKr+Zpp9PXljy1TVD6vqXlV18yao/XahpqqeUVXH3Nm+Z1jHrsBrgD2q6n9NMn/vJJfOZk3z1OOBpwA7V9WePQuZLJhW1UOr6sudStrkevynIcmSJJVki9l8XG3eDGSaT/53Vd0b2A04HHgdcNSmfpB5/I/srsCPq2pt70I2J+v3ks7AbsCqqvrpOOpZyObxZ1MLQVV587bZ34BVwJMntO0J3AI8rN0/GnhLm94O+BxwNXAl8BWG/6B8uK3zc+A64M+BJUABBwM/BE4fadui9fdl4O3AGcBPgOOBbdu8vYFLJ6sXeDrwC+DG9njfGenvj9r0XYA3ABcDa4Fjga3bvPV1HNhquwL4y408T1u39de1/t7Q+n9y2+ZbWh1HT1jvnhPmXwfsCBwGHNf6vBY4H1g2st6OwKfa4/0A+NON1HY08F7gxNbXt4AHTNjOLUaWH32OXgR8DTiivaYXAY9t7Ze05+3ACY/1z8DJ7bFOA3Ybmf/gNu9K4HvAcyas+37gJOCnTHjfjWz3CW39lcAft/aDgeuBm9tz+KZJ1p3pttwN+Pv2+l/etmur0fcew57PtcAa4KA27xCG990vWi2fnfhZan2/G7is3d4N3G2qvtv8fYAL2vO7GvizDbzu67f3n4BrgO8CT5rwnj2q9b8aeAuwaJLn6se0z/fIurf7fAFPAM4dWeZk4Nsj978C7D/V+5fhc7Mc+J/22Mdx22f+hwzv1/Wflcf0/jfS29y/dS/Am7dNcWOSQNbafwj8SZs+mtsC2dvbF9eW7fZbQCbri9vCwLEMwWQrJg9kq4GHtWU+Bfxrm7c3Gwhkbfqw9cuOzP8yt4WNFzN8qf8qcC/g08CHJ9T2gVbXw4EbgIds4Hk6liEs3rut+9/AwRuqc8K6k23HYQwBYx9gUXtev9nm3QU4E/hr4K6t/ouAp22g/6PbF9uewBbAR4CPT9jOjQWym4CDWh1vaa/9exlCxVMZgsG9Rh7rWuC32/x/AL7a5t2TIfgc1Op4BEPQ3WNk3WuAx7VtvPsk23I68D7g7sBShi/0J47U+tWNPM8z3ZYjGMLftu11/Szw9pHX7CbgzQzv832AnwH3mfiZ2MB7883AN4H7AYuBrwN/M82+1wC/1abvAzxyiu19Vevnue35XR9uPgP8S3td7sfwn56XTFj35e212mqS/g9j5PPF8Dm5nuE/ZVsyhNjV7bnbiuE/Hvdlivcv8Ir23OzcXpd/AT62oferN29T3TxkqfnuMoYvqoluBHZg2CtyY1V9paqmurDrYVX106r6+Qbmf7iqzqvhUNRfAc+5A4ezJvN84F1VdVFVXQe8HjhgwuGZN1XVz6vqOwx7AR4+sZNWywHA66vq2qpaBbwTeMGdrO+rVXVSDePpPjzy2I8GFlfVm6vqF1V1EUNwPGAjfX2mqs6oqpsYAtnSGdTxg6r6UKvjE8AuwJur6oaq+gLDnpIHjix/YlWdXlU3AH8JPCbJLsAzGQ4pfqiqbqqq/2II2M8eWff4qvpaVd1SVdePFtH6eBzwuqq6vqrOBv4f8MJNvS1JwrCn61VVdWVVXQu8jV9+jm9s695YVScx7LH5tWnW8fy27tqqWge8iV9+v2ys7xuBPZL8SlVdVVVnbeRx1gLvbv18gmGv5L5JtmcIeq9sn721DAF0dPsuq6p/bK/Vhj6bt2rLfJshjD+K4fPyNYbXbC/g+1X1Y6Z+/76UYW/0pe09dBjwLA+b6o7yjaP5bieGw0YT/R3DP6BfGL7TOLKqDp+ir0tmMP9ihv99bze9Mjdqx9bfaN9bANuPtI3+KvJnDHvSJlq/R2BiXzvdyfomPvbd25fSbsCOSa4emb+I4ZDQdPuabDs25PKR6Z8DVNXEttH+bn29quq6JFcyPNe7Ab85oe4tGMLm7dadxI7A+nC03sXAsulsRDPdbVkM3AM4s72PAcLwPK/34xZw15vJ8zrZe2/Hafb9+wyHxA9Pcg6wvKq+sYHHWT3hP0TrH2c3hvfsmpHtuwu//PxP9bmczGncdsj1NOAq4HcY9i6f1paZ6v27G/CZJLeMzL+ZX/5cStNmINO8leTRDGHjqxPntS/L1wCvSfIw4EtJvl1VpzAcapjMVHvQdhmZ3pVhD8EVDOOM7jFS1yKGL9Lp9nsZwz/+o33fxPClvfMU6466otW0G8PYnvV9rZ7m+lPVOdElDHt6dp/hepNZPwD+Hgxj9ABu90vQGbr19UpyL4Y9qZcx1H1aVT1lI+tu7Lm4DNg2yb1HQtlMnueZuIIhnD20qu5I/9N9753f7u/a2qbuuOrbwH5JtgRexjDGapcNLL5TkoyEsl0ZDsNewhCStpsQ/GayDZPNP41h7/APGX4AdBXD3q8bGA4Nw9Tv30uAF1fV1ybOSLLbJMtLG+UhS807SX4lyTOBjzOMHTl3kmWemWT9IZ9rGP5nu/5/upczjBeZqT9MskeSezCMq/lkO+T03wx7jfZtX05vYBhzst7lwJLRU3RM8DHgVUnu34LD24BPbOQLalKtluOAtya5d/vSeDUw3VMCXA7cN8nW01z+DODaJK9LslWSRUke1oLyjLTDZasZnuNFSV4MPGCm/UywT5LHJ7kr8DcMY98uYfixx4OSvCDJlu326CQPmWatlzCMtXp7krsn+Q2Gwfyb/NQLVXULQ5A4Isn9AJLslORp0+xiqvf6x4A3JFmcZDuG8VRTbkeSuyZ5fpKtq+pGhhB9y0ZWuR/wp+25fjbwEOCkqloDfAF4Z/tc3yXJA5L8zjS3Dyb/fH2d4dDqnsAZVXU+bc8ow/g/mPr9+88Mn6Xd2jYvTrJfm7eube8d+XdEC5SBTPPJZ5Ncy/A/178E3sUwMHoyuwNfZBjz8g3gfVV1apv3doYvoauT/NkMHv/DDIOkf8QwmPtPAarqGuD/MowjWs2wt2f0fF7/1v7+OMlk42w+2Po+neGXXtczDGK+I17eHv8ihj2HH239T6mqvsvwBX1Re252nGL5mxnGYy1tdV/B8BxMN9BN9MfAaxkG/j+U4Uv1zvgo8EaGQ9qPAv4Qbt17+lSGsUKXMbye7+CXQ/RUnscwsPsyhkHpb6yqL97JejfkdQw/+vhmkp8wvK+nO0bsKIZxXlcn+fdJ5r8FWAGcA5wLnNXapuMFwKpW00sZxqNtyLcYPpNXAG8FntXGccEw9u6uDHt1rwI+yTD+c7pu9/lq4zzPAs6vql+0+d8ALm7j1Kbz/v0Hhr14X2j/7nyTIdBRVT9r2/G19tzuNYN6tUClphzHLEnSeCR5EcOvZR/fuxapJ/eQSZIkdWYgkyRJ6sxDlpIkSZ25h0ySJKkzA5kkSVJnm/WJYbfbbrtasmRJ7zIkSZKmdOaZZ15RVYsnm7dZB7IlS5awYsWK3mVIkiRNKcnFG5o39kOW7ezG/5Xkc+3+/ZN8K8nKJJ9oZ8kmyd3a/ZVt/pJx1yZJkjQXzMYYslcAF47cfwdwRFU9kOGsywe39oOBq1r7EW05SZKkeW+sgSzJzsC+DJeboF038IkMl74AOAbYv03v1+7T5j+pLS9JkjSvjXsP2buBP+e2i8reF7h65KLIlwI7temdGK5BSJt/TVtekiRpXhtbIEvyTGBtVZ25ifs9JMmKJCvWrVu3KbuWJEnqYpx7yB4H/G6SVcDHGQ5V/gOwTZL1v+7cGVjdplcDuwC0+VsDP57YaVUdWVXLqmrZ4sWT/nJUkiRpszK2QFZVr6+qnatqCXAA8KWqej5wKvCsttiBwPFt+oR2nzb/S+V1nSRJ0gLQ40z9rwNenWQlwxixo1r7UcB9W/urgeUdapMkSZp1s3Ji2Kr6MvDlNn0RsOcky1wPPHs26pEkSZpLvJalJElSZwYySZKkzgxkkiRJnRnIJEmSOpuVQf2SJI3TkuUnjrX/VYfvO9b+JfeQSZIkdWYgkyRJ6sxAJkmS1JmBTJIkqTMDmSRJUmcGMkmSpM4MZJIkSZ0ZyCRJkjozkEmSJHVmIJMkSerMQCZJktSZgUySJKkzA5kkSVJnBjJJkqTODGSSJEmdbdG7AEmS1NeS5SeO/TFWHb7v2B9jc+YeMkmSpM4MZJIkSZ0ZyCRJkjozkEmSJHVmIJMkSerMQCZJktSZgUySJKkzA5kkSVJnBjJJkqTODGSSJEmdjS2QJbl7kjOSfCfJ+Une1NqPTvKDJGe329LWniTvSbIyyTlJHjmu2iRJkuaScV7L8gbgiVV1XZItga8m+Y8277VV9ckJyz8D2L3dfhN4f/srSZI0r41tD1kNrmt3t2y32sgq+wHHtvW+CWyTZIdx1SdJkjRXjHUMWZJFSc4G1gInV9W32qy3tsOSRyS5W2vbCbhkZPVLW5skSdK8NtZAVlU3V9VSYGdgzyQPA14PPBh4NLAt8LqZ9JnkkCQrkqxYt27dJq9ZkiRpts3Kryyr6mrgVODpVbWmHZa8AfgQsGdbbDWwy8hqO7e2iX0dWVXLqmrZ4sWLx126JEnS2I3zV5aLk2zTprcCngJ8d/24sCQB9gfOa6ucALyw/dpyL+CaqlozrvokSZLminH+ynIH4JgkixiC33FV9bkkX0qyGAhwNvDStvxJwD7ASuBnwEFjrE2SJGnOGFsgq6pzgEdM0v7EDSxfwKHjqkeSJGmu8kz9kiRJnRnIJEmSOjOQSZIkdWYgkyRJ6sxAJkmS1JmBTJIkqTMDmSRJUmcGMkmSpM4MZJIkSZ0ZyCRJkjozkEmSJHVmIJMkSerMQCZJktTZFr0L2BwsWX7iWPtfdfi+Y+1fkiTNbe4hkyRJ6sxAJkmS1JmBTJIkqTMDmSRJUmcGMkmSpM4MZJIkSZ0ZyCRJkjozkEmSJHXmiWElSRozTzCuqbiHTJIkqTMDmSRJUmcGMkmSpM4MZJIkSZ05qF+SJM158/2HEe4hkyRJ6sxAJkmS1JmBTJIkqbOxBbIkd09yRpLvJDk/yZta+/2TfCvJyiSfSHLX1n63dn9lm79kXLVJkiTNJePcQ3YD8MSqejiwFHh6kr2AdwBHVNUDgauAg9vyBwNXtfYj2nKSJEnz3tgCWQ2ua3e3bLcCngh8srUfA+zfpvdr92nzn5Qk46pPkiRprhjrGLIki5KcDawFTgb+B7i6qm5qi1wK7NSmdwIuAWjzrwHuO876JEmS5oKxBrKqurmqlgI7A3sCD76zfSY5JMmKJCvWrVt3p2uUJEnqbVZ+ZVlVVwOnAo8Btkmy/oS0OwOr2/RqYBeANn9r4MeT9HVkVS2rqmWLFy8ee+2SJEnjNs5fWS5Osk2b3gp4CnAhQzB7VlvsQOD4Nn1Cu0+b/6WqqnHVJ0mSNFeM89JJOwDHJFnEEPyOq6rPJbkA+HiStwD/BRzVlj8K+HCSlcCVwAFjrE2SJGnOGFsgq6pzgEdM0n4Rw3iyie3XA88eVz2SJElzlWfqlyRJ6sxAJkmS1JmBTJIkqTMDmSRJUmcGMkmSpM4MZJIkSZ0ZyCRJkjozkEmSJHVmIJMkSerMQCZJktSZgUySJKkzA5kkSVJnBjJJkqTODGSSJEmdGcgkSZI6M5BJkiR1ZiCTJEnqzEAmSZLUmYFMkiSpMwOZJElSZwYySZKkzgxkkiRJnRnIJEmSOjOQSZIkdWYgkyRJ6sxAJkmS1JmBTJIkqTMDmSRJUmcGMkmSpM4MZJIkSZ0ZyCRJkjobWyBLskuSU5NckOT8JK9o7YclWZ3k7HbbZ2Sd1ydZmeR7SZ42rtokSZLmki3G2PdNwGuq6qwk9wbOTHJym3dEVf396MJJ9gAOAB4K7Ah8McmDqurmMdYoSZLU3dj2kFXVmqo6q01fC1wI7LSRVfYDPl5VN1TVD4CVwJ7jqk+SJGmumJUxZEmWAI8AvtWaXpbknCQfTHKf1rYTcMnIapey8QAnSZI0L4w9kCW5F/Ap4JVV9RPg/cADgKXAGuCdM+zvkCQrkqxYt27dJq9XkiRpto01kCXZkiGMfaSqPg1QVZdX1c1VdQvwAW47LLka2GVk9Z1b2y+pqiOrallVLVu8ePE4y5ckSZoV4/yVZYCjgAur6l0j7TuMLPZ7wHlt+gTggCR3S3J/YHfgjHHVJ0mSNFeM81eWjwNeAJyb5OzW9hfA85IsBQpYBbwEoKrOT3IccAHDLzQP9ReWkiRpIRhbIKuqrwKZZNZJG1nnrcBbx1WTJEnSXOSZ+iVJkjozkEmSJHVmIJMkSerMQCZJktSZgUySJKkzA5kkSVJnBjJJkqTODGSSJEmdGcgkSZI6M5BJkiR1ZiCTJEnqzEAmSZLUmYFMkiSpMwOZJElSZwYySZKkzgxkkiRJnW0x1QJJ3g68HfgZcCKwFHhVVX10zLVpmpYsP3Gs/a86fN+x9i9J0kI3nT1kz6iqnwDPBC4DHgy8bqxVSZIkLSDTCWTr96LtA/xbVV0F1PhKkiRJWlimPGQJ/EeS84CbgUOTbAfcMN6yJEmSFo7p7CF7PfBE4FFVdSNwPfD7Y61KkiRpAZlOIDujqtZW1U0AVXUdcMJ4y5IkSVo4NnjIMsn9gB2ArZL8OpA261eAe8xCbZIkSQvCxsaQ7Qu8GNgZeN9I+0+AvxpnUZIkSQvJBgNZVX0I+FCS51TVcbNYkyRJ0oIynTFkX07yL0k+B5BkjyQvGm9ZkiRJC8d0AtmHgNOAXdr97wOvGVtFkiRJC8x0Atn92mWSbgFop764ZaxVSZIkLSDTCWQ/TbIt7ez8SR7NMLBfkiRJm8B0ztT/Z8BngV9NchqwE/CssVYlSZK0gEwZyKpqRZInAA9hOBfZBVX1i7FXJkmStEBMecgyyVbAq4E/qaqzgV2TPGMa6+2S5NQkFyQ5P8krWvu2SU5O8v329z6tPUnek2RlknOSPPJObpskSdJmYTpjyD7Ylnt8u38Z8LZprHcT8Jqq2gPYi+HC5HsAy4FTqmp34JR2H+AZwO7tdgjw/uluhCRJ0uZsOoFs96p6G3AjQFX9jNsuo7RBVbWmqs5q09cCFzKMP9sPOKYtdgywf5veDzi2Bt8Etkmyw0w2RpIkaXM0nUD2iyR357ZfWd4fmNEYsiRLgEcA3wK2r6o1bdaPgO3b9E7AJSOrXdraJEmS5rXpBLI3A58Hdk5yDHAq8PrpPkCSewGfAl5ZVb90uoyqKlrQm0F/hyRZkWTFunXrZrKqJEnSnDSdX1l+PsmZwGMZDlW+tqrWTqfzJFsyhLGPVNWnW/PlSXaoqjXtkOT6vlZz29UAYLio+epJ6jkSOBJg2bJlMwpzkiRJc9F0fmV5NPBM4Pyq+vcZhLEARwEXVtW7RmadABzYpg8Ejh9pf2H7teVewDUjhzYlSZLmremcGPYjwG8xhKVdgRXA6VX13inWexzwAuDcJGe3tr8ADgeOS3IwcDHwnDbvJGAfYCXwM+CgmWyIJEnS5mo6hyxPTvJF4FHAk4BD2/RGA1lVfZUN/xrzSZMsX61vSZKkBWXKQJbkP4GtgW8DXwH2qqrLxl2YJEnSQjGdX1n+N8NJXncHHgQ8MMldx1qVJEnSAjKdQ5YvB0iyNfBC4MPA/YCtxluaJEnSwrDBQJZki6q6KclLGQb1P5rhsknHMhy6lCQtEEuWnzjW/lcdvu9Y+5fmuo3tITsDeCSwDfA+4NtVNaMz9EuSJGlqGwtkAaiqw2epFkmSpAVpY4FscZJXb2jmhJO9SpIk6Q7aWCBbBNyLDZ9LTJIkSZvAxgLZmqp686xVIkmStEBt7Dxk7hmTJEmaBRsLZLe7vJEkSZI2vQ0esqyqK2ezEM1fc/38RXO9PknS/DedSydJkiRpjAxkkiRJnRnIJEmSOjOQSZIkdWYgkyRJ6sxAJkmS1JmBTJIkqTMDmSRJUmcGMkmSpM4MZJIkSZ0ZyCRJkjozkEmSJHVmIJMkSerMQCZJktSZgUySJKkzA5kkSVJnBjJJkqTODGSSJEmdGcgkSZI6G1sgS/LBJGuTnDfSdliS1UnObrd9Rua9PsnKJN9L8rRx1SVJkjTXjHMP2dHA0ydpP6KqlrbbSQBJ9gAOAB7a1nlfkkVjrE2SJGnOGFsgq6rTgSunufh+wMer6oaq+gGwEthzXLVJkiTNJT3GkL0syTntkOZ9WttOwCUjy1za2iRJkua92Q5k7wceACwF1gDvnGkHSQ5JsiLJinXr1m3q+iRJkmbdrAayqrq8qm6uqluAD3DbYcnVwC4ji+7c2ibr48iqWlZVyxYvXjzegiVJkmbBrAayJDuM3P09YP0vME8ADkhytyT3B3YHzpjN2iRJknrZYlwdJ/kYsDewXZJLgTcCeydZChSwCngJQFWdn+Q44ALgJuDQqrp5XLVJkiTNJWMLZFX1vEmaj9rI8m8F3jqueiRJkuYqz9QvSZLUmYFMkiSpMwOZJElSZwYySZKkzgxkkiRJnY3tV5aSpOlZsvzEsT/GqsP3HftjSLrj3EMmSZLUmYFMkiSpMwOZJElSZwYySZKkzgxkkiRJnRnIJEmSOjOQSZIkdWYgkyRJ6sxAJkmS1JmBTJIkqTMDmSRJUmcGMkmSpM4MZJIkSZ0ZyCRJkjozkEmSJHVmIJMkSerMQCZJktSZgUySJKkzA5kkSVJnBjJJkqTODGSSJEmdGcgkSZI6M5BJkiR1ZiCTJEnqzEAmSZLU2dgCWZIPJlmb5LyRtm2TnJzk++3vfVp7krwnycok5yR55LjqkiRJmmvGuYfsaODpE9qWA6dU1e7AKe0+wDOA3dvtEOD9Y6xLkiRpThlbIKuq04ErJzTvBxzTpo8B9h9pP7YG3wS2SbLDuGqTJEmaS2Z7DNn2VbWmTf8I2L5N7wRcMrLcpa1NkiRp3us2qL+qCqiZrpfkkCQrkqxYt27dGCqTJEmaXbMdyC5ffyiy/V3b2lcDu4wst3Nru52qOrKqllXVssWLF4+1WEmSpNkw24HsBODANn0gcPxI+wvbry33Aq4ZObQpSZI0r20xro6TfAzYG9guyaXAG4HDgeOSHAxcDDynLX4SsA+wEvgZcNC46pIkSZprxhbIqup5G5j1pEmWLeDQcdUiSZI0l3mmfkmSpM4MZJIkSZ0ZyCRJkjozkEmSJHU2tkH9khaOJctPHGv/qw7fd6z9S1Jv7iGTJEnqzEAmSZLUmYFMkiSpMwOZJElSZw7qlzYDDpqXpPnNPWSSJEmdGcgkSZI6M5BJkiR1ZiCTJEnqzEAmSZLUmYFMkiSpMwOZJElSZwYySZKkzgxkkiRJnRnIJEmSOjOQSZIkdWYgkyRJ6sxAJkmS1JmBTJIkqTMDmSRJUmcGMkmSpM4MZJIkSZ0ZyCRJkjozkEmSJHVmIJMkSerMQCZJktTZFj0eNMkq4FrgZuCmqlqWZFvgE8ASYBXwnKq6qkd9kiRJs6nnHrInVNXSqlrW7i8HTqmq3YFT2n1JkqR5by4dstwPOKZNHwPs37EWSZKkWdMrkBXwhSRnJjmktW1fVWva9I+A7fuUJkmSNLu6jCEDHl9Vq5PcDzg5yXdHZ1ZVJanJVmwB7hCAXXfddfyVSpIkjVmXPWRVtbr9XQt8BtgTuDzJDgDt79oNrHtkVS2rqmWLFy+erZIlSZLGZtYDWZJ7Jrn3+mngqcB5wAnAgW2xA4HjZ7s2SZKkHnocstwe+EyS9Y//0ar6fJJvA8clORi4GHhOh9okSZJm3awHsqq6CHj4JO0/Bp402/VIkiT1NpdOeyFJkrQgGcgkSZI6M5BJkiR1ZiCTJEnqzEAmSZLUmYFMkiSpMwOZJElSZwYySZKkzgxkkiRJnfW4dJIkzaoly08ca/+rDt93rP1Lmv/cQyZJktSZgUySJKkzA5kkSVJnBjJJkqTODGSSJEmdGcgkSZI6M5BJkiR1ZiCTJEnqzEAmSZLUmYFMkiSpMwOZJElSZwYySZKkzgxkkiRJnRnIJEmSOjOQSZIkdWYgkyRJ6sxAJkmS1JmBTJIkqTMDmSRJUmcGMkmSpM4MZJIkSZ3NuUCW5OlJvpdkZZLlveuRJEkatzkVyJIsAt4LPAPYA3hekj36ViVJkjRecyqQAXsCK6vqoqr6BfBxYL/ONUmSJI3VXAtkOwGXjNy/tLVJkiTNW6mq3jXcKsmzgKdX1R+1+y8AfrOqXjayzCHAIe3urwHf2wQPvR1wxSboR/34Gs4Pvo7zg6/j/ODruOntVlWLJ5uxxWxXMoXVwC4j93dubbeqqiOBIzflgyZZUVXLNmWfml2+hvODr+P84Os4P/g6zq65dsjy28DuSe6f5K7AAcAJnWuSJEkaqzm1h6yqbkryMuA/gUXAB6vq/M5lSZIkjdWcCmQAVXUScNIsP+wmPQSqLnwN5wdfx/nB13F+8HWcRXNqUL8kSdJCNNfGkEmSJC04CzqQeZmm+SHJqiTnJjk7yYre9Wh6knwwydok5420bZvk5CTfb3/v07NGTW0Dr+NhSVa3z+TZSfbpWaM2LskuSU5NckGS85O8orX7eZxFCzaQeZmmeecJVbXUn2hvVo4Gnj6hbTlwSlXtDpzS7mtuO5rbv44AR7TP5NI2Nlhz103Aa6pqD2Av4ND2fejncRYt2ECGl2mSuqqq04ErJzTvBxzTpo8B9p/VojRjG3gdtRmpqjVVdYTgyXQAAASWSURBVFabvha4kOEqOX4eZ9FCDmRepmn+KOALSc5sV3LQ5mv7qlrTpn8EbN+zGN0pL0tyTjuk6aGuzUSSJcAjgG/h53FWLeRApvnj8VX1SIbDz4cm+e3eBenOq+En4P4MfPP0fuABwFJgDfDOvuVoOpLcC/gU8Mqq+snoPD+P47eQA9mUl2nS5qGqVre/a4HPMByO1ubp8iQ7ALS/azvXozugqi6vqpur6hbgA/iZnPOSbMkQxj5SVZ9uzX4eZ9FCDmRepmkeSHLPJPdePw08FThv42tpDjsBOLBNHwgc37EW3UHrv8Sb38PP5JyWJMBRwIVV9a6RWX4eZ9GCPjFs+yn2u7ntMk1v7VySZijJrzLsFYPhyhMf9XXcPCT5GLA3sB1wOfBG4N+B44BdgYuB51SVA8bnsA28jnszHK4sYBXwkpGxSJpjkjwe+ApwLnBLa/4LhnFkfh5nyYIOZJIkSXPBQj5kKUmSNCcYyCRJkjozkEmSJHVmIJMkSerMQCZJktSZgUxSN0mum3D/RUn+aZZreHaSC5OcOqF9SZI/GEdtrW/PzSXpVgYySfNOki1msPjBwB9X1RMmtC8B/uD2i0vSpmcgkzQntb1IX2oXqD4lya6t/egkzxpZ7rr2d+8kX0lyAnDBJP09L8m5Sc5L8o7W9tfA44GjkvzdhFUOB34rydlJXtXadkzy+STfT/K3I30/Nck3kpyV5N/aNQEnPv6jknwnyXeAQyds51faumcleWxrPzbJ/iPLfSTJfjN8GiVtJgxkknraqgWes5OcDbx5ZN4/AsdU1W8AHwHeM43+Hgm8oqoeNNqYZEfgHcATGc4g/+gk+1fVm4EVwPOr6rUT+loOfKWqllbVEa1tKfBc4NeB5ybZJcl2wBuAJ7eL3K8AXj1JbR8CXl5VD5/QvhZ4Slv3uSPbeRTwolb/1sBjgROn8RxI2gzNZLe+JG1qP6+qpevvJHkRsKzdfQzwf9r0h4G/ZWpnVNUPJml/NPDlqlrXHucjwG8zXKppJk6pqmtaHxcAuwHbAHsAXxsuCchdgW+MrpRkG2Cbqjp9ZHue0aa3BP4pyVLgZuBBAFV1WpL3JVkM/D7wqaq6aYb1StpMGMgkbW5uou3dT3IXhgC03k/H/Ng3jEzfzPBvaICTq+p5d7DPVzFcA/LhDNt1/ci8Y4E/BA4ADrqD/UvaDHjIUtJc9XWGIALwfIaLH8NwsepHtenfZdjDNJUzgN9Jsl2SRcDzgNOmWOda4N7T6PubwOOSPBAgyT2T/NIh06q6Gri6XcQZhu1Zb2tgTVXdArwAWDQy72jgla2P242LkzR/GMgkzVUvBw5Kcg5DUHlFa/8AQ7j6DsNhzSn3ilXVGoYxYacC3wHOrKrjp1jtHODmNhD/VRtaqB0GfRHwsVbrN4AHT7LoQcB721i5jLS/Dziwbc+DR7enqi4HLmQYfyZpHktV9a5BkjSJJPcAzgUeuX7smqT5yT1kkjQHJXkyw96xfzSMSfOfe8gkSZI6cw+ZJElSZwYySZKkzgxkkiRJnRnIJEmSOjOQSZIkdWYgkyRJ6uz/Ax8RhNHWt6jvAAAAAElFTkSuQmCC",
- "text/plain": [
- ""
- ]
- },
- "metadata": {
- "needs_background": "light"
- },
- "output_type": "display_data"
- }
- ],
- "source": [
- "%matplotlib inline\n",
- "plt.figure(figsize=(10, 6))\n",
- "plt.hist(df.day_hour, bins='auto', rwidth=0.6)\n",
- "plt.title('Distribution of the number of mentions per tweet')\n",
- "plt.ylabel(\"Tweets\")\n",
- "plt.xlabel(\"Hour of the day\")\n",
- "plt.show()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 188,
- "metadata": {},
- "outputs": [],
- "source": [
- "df_2017 = df[df.created_at.dt.year == 2017]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 189,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "image/png": "iVBORw0KGgoAAAANSUhEUgAAAl4AAAGDCAYAAAD6aR7qAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAYAUlEQVR4nO3de9RldX3f8c9XRqOgAspoETSjFWNojaij9X4BzVLJElqt15jRaGlatHhp4mhrTVxLg2mrSYymRRHQoNZbAgkJ1uAltDHqQFEUakXFCHIZLyhovCDf/nH2xCfjDPMMcH7nmcPrtdaznnP25ezfM2cdeK+999m7ujsAAMzfLRY9AACAmwvhBQAwiPACABhEeAEADCK8AAAGEV4AAIMILwCAQYQXsFBV9UdVddJ20x5VVd+oqgMHbP8NVXVRVV1dVRdW1bO2m3//qjq3qr5XVZ+qql9YMe+IqvpoVX2nqi7abr17VNU12/10VR03778JWLuEF7BoxyV5QlU9Lkmq6tZJ3pLkpd192U25oaraaweTr0lyZJJ9k/xqkjdV1YOm5X8myWlJTkqyf5J3JfmTqrrltO53k7w1ycu2f9Hu/lJ333bbT5L7Jbkuyftvyr8J2LMIL2ChuvsbSV6Y5ISq2ifJq5J8sbtPTpKqukVVvaKqvlhVX6+qd1fV/ivmva+qLq+qq6a9Tz+/7bWnvWlvqqozq+q7SR6xg+2/srs/393XdffHk/x1kodMs49Icl13v7G7f5DkDUl+JsmjpnX/prv/KMmXV/Gn/kqSD3f3JTfk3wlYDsILWLjufm+SczPbo3TM9LPNizPbI/XIJAdntofq91fM/7MkhyT5R0k+m+Qd2738M5P8VpLbJfn49Y2jqvZOsjHJ56ZJ/yTJZ1aMs5OcP01ftaqqJM9OcsrurAcsn3WLHgDA5N8m+WKS/9DdX10x/deSPL+7L02SqvqtJF+oqk3dfV2Sk7ctWFW/mWRrVe3T3d+dJv/xtCcrSX6ws41PcXRCkk92919Ok2+b5NvbLfrtzCJudzw6yR2SfGA31wOWjPAC1oTuvqKqvp6f7G3a5m5J/rSqrttu+p2qamuS307ylCQHZHYOVabH28Lrq1md1ye5V2aHF7e5Jsntt1vu9kmuXuVrbrMpyXu7+3u7uR6wZBxqBNa6S5I8rrv3W/Fz6+6+PLPzpp6Y5PDMTo6/57ROrVi/d7WBqnpNZsH1+O5eGVWfS3LfFctVkvvkp+Pw+l57nyRPjsOMQIQXsPb9tySvraq7JUlV3amqnjTNu11mhw+/kWTvJK/Z3Revqldmtsfscd39ze1mfzjJXlV17PQNx+OS/CjJx6Z1bzF9C/OWs6d16xXfeNzmyUmuTHL27o4NWD7CC1jrXp/kzCRnVdXVmX3r8IHTvJOSfG36+dw0b9Wmy0u8OsmGJF9ccb2t30iS7v5+kqOSPD/JVUl+OclR3f2j6SUOT/J3SU5Pco/p8V9st5lNSd4+nZgP3MyV/xYAAIxhjxcAwCDCCwBgEOEFADCI8AIAGER4AQAMskdcuf6AAw7oDRs2LHoYAAC7dM4553y9u9fvaN4eEV4bNmzIli1bFj0MAIBdqqqv7GyeQ40AAIMILwCAQYQXAMAgwgsAYBDhBQAwiPACABhEeAEADCK8AAAGEV4AAIMILwCAQYQXAMAgwgsAYBDhBQAwyLpFDwCA8TZsPmOh27/4+CMXun1YFHu8AAAGEV4AAIMILwCAQYQXAMAgwgsAYBDhBQAwiPACABhEeAEADCK8AAAGEV4AAIMILwCAQYQXAMAgwgsAYBDhBQAwiPACABhEeAEADCK8AAAGEV4AAIMILwCAQYQXAMAgwgsAYBDhBQAwiPACABhEeAEADCK8AAAGEV4AAIMILwCAQYQXAMAgwgsAYBDhBQAwiPACABhEeAEADCK8AAAGWTfPF6+qFyd5fpJOcn6S5yY5MMm7k9wxyTlJnt3dP5znOABYfhs2n7HQ7V98/JEL3T57hrnt8aqqg5L8uyQbu/ufJtkrydOTvC7JG7r7nkm+leR58xoDAMBaMu9DjeuS3Kaq1iXZO8llSQ5P8r5p/ilJjp7zGAAA1oS5hVd3X5rkvyT528yC69uZHVq8qruvnRa7JMlB8xoDAMBaMs9DjfsnOSrJ3ZPcJck+SR6/G+sfU1VbqmrL1q1b5zRKAIBx5nmo8bFJvtzdW7v7R0k+kORhSfabDj0mycFJLt3Ryt19Qndv7O6N69evn+MwAQDGmGd4/W2SB1fV3lVVSY5IckGSjyR5yrTMpiSnzXEMAABrxjzP8fpEZifRn5vZpSRukeSEJC9L8pKquiizS0qcOK8xAACsJXO9jld3vyrJq7ab/KUkD5rndgEA1iJXrgcAGER4AQAMIrwAAAYRXgAAgwgvAIBBhBcAwCDCCwBgEOEFADCI8AIAGER4AQAMIrwAAAYRXgAAgwgvAIBBhBcAwCDCCwBgEOEFADCI8AIAGER4AQAMIrwAAAYRXgAAgwgvAIBBhBcAwCDCCwBgEOEFADCI8AIAGER4AQAMIrwAAAYRXgAAgwgvAIBBhBcAwCDCCwBgEOEFADCI8AIAGER4AQAMIrwAAAYRXgAAgwgvAIBBhBcAwCDCCwBgEOEFADCI8AIAGER4AQAMIrwAAAYRXgAAgwgvAIBBhBcAwCDCCwBgEOEFADCI8AIAGER4AQAMIrwAAAYRXgAAgwgvAIBBhBcAwCDCCwBgEOEFADCI8AIAGER4AQAMIrwAAAYRXgAAg8w1vKpqv6p6X1X936q6sKoeUlV3qKoPVdUXpt/7z3MMAABrxbz3eP1ekjO7+95J7pvkwiSbk5zV3YckOWt6DgCw9OYWXlW1b5JHJjkxSbr7h919VZKjkpwyLXZKkqPnNQYAgLVknnu87p5ka5KTqur/VNVbq2qfJHfu7sumZS5PcucdrVxVx1TVlqrasnXr1jkOEwBgjHmG17ok90/yh919vyTfzXaHFbu7k/SOVu7uE7p7Y3dvXL9+/RyHCQAwxjzD65Ikl3T3J6bn78ssxK6oqgOTZPp95RzHAACwZswtvLr78iRfraqfmyYdkeSCJKcn2TRN25TktHmNAQBgLVk359d/YZJTq+pWSb6U5LmZxd57qup5Sb6S5KlzHgMAwJow1/Dq7vOSbNzBrCPmuV0AgLXIlesBAAYRXgAAgwgvAIBBhBcAwCDCCwBgEOEFADCI8AIAGER4AQAMIrwAAAYRXgAAgwgvAIBBhBcAwCDCCwBgEOEFADCI8AIAGER4AQAMIrwAAAYRXgAAgwgvAIBBhBcAwCC7DK+q+u2qun1VrauqD1bVFVX1zBGDAwBYJutWscwTuvvlVXV0kq8leXqSjyZ55zwHBgAsnw2bz1jYti8+/siFbXub1Rxq3BZnT0zy3u7+VpKe35AAAJbTavZ4/UVVfTbJj5McW1UHJPnBfIcFALB8VrPH6+VJDk/ygO7+UZLvJ3nyXEcFALCEVhNen+zuK7v72iTp7muSnD7fYQEALJ+dHmqsqjslOTDJbarqPklqmnX7JHsPGBsAwFK5vnO8jkzyq0kOTvLmFdO/k+SV8xwUAMAy2ml4dfdJSU6qqqd293sGjgkAYCmt5hyvj1bVf6+qP0uSqjq0qp4z32EBACyf1YTXSUk+luSu0/MvJHnp3EYEALCkVhNed+rudya5LkmmS0pcN9dRAQAsodWE13er6g6ZrlZfVQ/M7AR7AAB2w2quXP/vk/xpkntU1ceSHJTkKXMdFQDAEtpleHX3lqp6TJKfz+xaXhd09w/nPjIAgCWzy0ONVXWbJC9J8m+6+7wkd6uqJ8x9ZAAAS2Y153i9bVru4dPzryV57dxGBACwpFYTXod092uT/ChJuvt7+cntgwAAWKXVhNcPq+rW+cm3Gu+exDleAAC7aTXfanx1kjOTHFxVpyR5VJLnzXVUAABLaDXfajyzqs5J8tDMDjH+endfOfeRAQAsmV2GV1WdnNktg87u7ovmPiIAgCW1mnO8Tk1y9yRvqaovVtX/qKpj5zwuAICls5pDjR+qqr9M8oAkRyQ5dnr8pjmPDQBgqazmUOMHk+yb5FNJzk7y4O7+2rwHBgCwbFZzqPH/Jbk2ySFJ7pXknlV1q7mOCgBgCa3mUOMLk6Sq9k3yK0nekeROSW4z36EBACyXnYZXVa3r7mur6teSPCLJAzO7XdDbMzvkCADAbri+PV6fTHL/JPsleXOST3W3K9YDANxA1xdelSTdffygsQAALLXrC6/1VfWSnc3s7tfPYTwAAEvr+sJrryS3zbTnCwCAG+f6wuuy7n71sJEAACy567uOlz1dAAA3oesLryOGjQIA4GZgp+HV3d8cORAAgGW3mlsGAQBwExBeAACDCC8AgEF2eZPsG6uq9kqyJcml3f1LVXX3JO9Ocsck5yR59lq4FdGGzWcsdPsXH3/kQrcPAMzfiD1exyW5cMXz1yV5Q3ffM8m3kjxvwBgAABZuruFVVQcnOTLJW6fnleTwJO+bFjklydHzHAMAwFox7z1ev5vkN5JcNz2/Y5Kruvva6fklSQ7a0YpVdUxVbamqLVu3bp3zMAEA5m9u4VVVv5Tkyu4+54as390ndPfG7t64fv36m3h0AADjzfPk+ocleVJVPTHJrZPcPsnvJdmvqtZNe70OTnLpHMcAALBmzG2PV3e/vLsP7u4NSZ6e5MPd/awkH0nylGmxTUlOm9cYAADWkkVcx+tlSV5SVRdlds7XiQsYAwDAcHO/jleSdPdHk3x0evylJA8asV0AgLXElesBAAYRXgAAgwgvAIBBhBcAwCDCCwBgEOEFADCI8AIAGER4AQAMIrwAAAYRXgAAgwgvAIBBhBcAwCDCCwBgEOEFADCI8AIAGER4AQAMIrwAAAYRXgAAgwgvAIBBhBcAwCDCCwBgEOEFADCI8AIAGER4AQAMIrwAAAYRXgAAgwgvAIBBhBcAwCDCCwBgEOEFADCI8AIAGER4AQAMsm7RA2B+Nmw+Y6Hbv/j4Ixe6fQBYa+zxAgAYRHgBAAwivAAABhFeAACDCC8AgEGEFwDAIMILAGAQ4QUAMIjwAgAYxJXrAWCNcgeS5WOPFwDAIMILAGAQ4QUAMIjwAgAYRHgBAAwivAAABhFeAACDCC8AgEGEFwDAIMILAGAQ4QUAMIjwAgAYRHgBAAyybtEDgJu7DZvPWNi2Lz7+yIVtG+DmyB4vAIBBhBcAwCBzC6+qumtVfaSqLqiqz1XVcdP0O1TVh6rqC9Pv/ec1BgCAtWSee7yuTfLS7j40yYOTHFtVhybZnOSs7j4kyVnTcwCApTe38Oruy7r73Onx1UkuTHJQkqOSnDItdkqSo+c1BgCAtWTIOV5VtSHJ/ZJ8Ismdu/uyadblSe48YgwAAIs29/CqqtsmeX+SF3X3d1bO6+5O0jtZ75iq2lJVW7Zu3TrvYQIAzN1cw6uqbplZdJ3a3R+YJl9RVQdO8w9McuWO1u3uE7p7Y3dvXL9+/TyHCQAwxDy/1VhJTkxyYXe/fsWs05Nsmh5vSnLavMYAALCWzPPK9Q9L8uwk51fVedO0VyQ5Psl7qup5Sb6S5KlzHAMAwJoxt/Dq7v+VpHYy+4h5bRcAYK1y5XoAgEHcJJs9yiJvKJ24qTQAN449XgAAgwgvAIBBhBcAwCDCCwBgEOEFADCI8AIAGER4AQAMIrwAAAYRXgAAgwgvAIBBhBcAwCDCCwBgEOEFADCI8AIAGER4AQAMIrwAAAYRXgAAgwgvAIBBhBcAwCDCCwBgkHWLHgCwfDZsPmNh2774+CMXtm2AXbHHCwBgEOEFADCI8AIAGER4AQAMIrwAAAYRXgAAgwgvAIBBhBcAwCDCCwBgEOEFADCI8AIAGER4AQAMIrwAAAYRXgAAgwgvAIBBhBcAwCDCCwBgEOEFADCI8AIAGER4AQAMIrwAAAYRXgAAgwgvAIBB1i16AABr3YbNZyx0+xcff+RCtw/cdOzxAgAYRHgBAAwivAAABhFeAACDCC8AgEGEFwDAIMILAGAQ4QUAMIjwAgAYRHgBAAwivAAABhFeAACDLCS8qurxVfX5qrqoqjYvYgwAAKMND6+q2ivJm5I8IcmhSZ5RVYeOHgcAwGiL2OP1oCQXdfeXuvuHSd6d5KgFjAMAYKhFhNdBSb664vkl0zQAgKVW3T12g1VPSfL47n7+9PzZSf5Zd79gu+WOSXLM9PTnknz+Bm7ygCRfv4HrsnZ4H5eH93J5eC+Xh/fypvWz3b1+RzPWjR5JkkuT3HXF84Onaf9Ad5+Q5IQbu7Gq2tLdG2/s67BY3sfl4b1cHt7L5eG9HGcRhxo/leSQqrp7Vd0qydOTnL6AcQAADDV8j1d3X1tVL0jywSR7JXlbd39u9DgAAEZbxKHGdPefJ/nzQZu70YcrWRO8j8vDe7k8vJfLw3s5yPCT6wEAbq7cMggAYJClDS+3JVoeVXVxVZ1fVedV1ZZFj4fVq6q3VdWVVfXZFdPuUFUfqqovTL/3X+QYWZ2dvJe/WVWXTp/N86rqiYscI6tTVXetqo9U1QVV9bmqOm6a7rM5wFKGl9sSLaXHdPdhvu68xzk5yeO3m7Y5yVndfUiSs6bnrH0n56ffyyR5w/TZPGw6f5e179okL+3uQ5M8OMmx0/8jfTYHWMrwitsSwZrQ3X+V5JvbTT4qySnT41OSHD10UNwgO3kv2QN192Xdfe70+OokF2Z2BxmfzQGWNbzclmi5dJL/WVXnTHc0YM925+6+bHp8eZI7L3Iw3GgvqKrPTIciHZraw1TVhiT3S/KJ+GwOsazhxXJ5eHffP7NDx8dW1SMXPSBuGj37WrWvVu+5/jDJP05yWJLLkvzXxQ6H3VFVt03y/iQv6u7vrJznszk/yxpeq7otEXuG7r50+n1lkj/O7FAye64rqurAJJl+X7ng8XADdfcV3f3j7r4uyVvis7nHqKpbZhZdp3b3B6bJPpsDLGt4uS3RkqiqfarqdtseJ/nFJJ+9/rVY405Psml6vCnJaQscCzfCtv9JT/55fDb3CFVVSU5McmF3v37FLJ/NAZb2AqrT15p/Nz+5LdFrFjwkboCqukdme7mS2Z0W3um93HNU1buSPDrJAUmuSPKqJH+S5D1J7pbkK0me2t1O2l7jdvJePjqzw4yd5OIk/3rFOUKsUVX18CRnJzk/yXXT5Fdkdp6Xz+acLW14AQCsNct6qBEAYM0RXgAAgwgvAIBBhBcAwCDCCwBgEOEFzF1VXbPd8+dU1R8MHsO/rKoLq+oj203fUFXPnMfYptd2bSvg7wkvYI9VVet2Y/HnJflX3f2Y7aZvSPLMn14c4KYnvICFmvYKfXi60fJZVXW3afrJVfWUFctdM/1+dFWdXVWnJ7lgB6/3jKo6v6o+W1Wvm6b9pyQPT3JiVf3n7VY5Pskjquq8qnrxNO0uVXVmVX2hqn5nxWv/YlV9vKrOrar3Tve62377D6iqT1fVp5Mcu93fefa07rlV9dBp+tur6ugVy51aVUft5j8jsIcQXsAIt5nC5ryqOi/Jq1fMe2OSU7r7F5KcmuT3V/F6909yXHffa+XEqrpLktclOTyzK6o/sKqO7u5XJ9mS5Fnd/evbvdbmJGd392Hd/YZp2mFJnpbkPkmeVlV3raoDkvzHJI+dbtq+JclLdjC2k5K8sLvvu930K5M8blr3aSv+zhOTPGca/75JHprkjFX8GwB7oN3ZTQ9wQ/1ddx+27UlVPSfJxunpQ5L8i+nxO5L8Tnbtk9395R1Mf2CSj3b31mk7pyZ5ZGa3KdodZ3X3t6fXuCDJzybZL8mhSf737FZ3uVWSj69cqar2S7Jfd//Vir/nCdPjWyb5g6o6LMmPk9wrSbr7Y1X15qpan+TJSd7f3dfu5niBPYTwAtaqazPtla+qW2QWOtt8d87b/sGKxz/O7L+VleRD3f2MG/iaL87sHof3zezv+v6KeW9P8stJnp7kuTfw9YE9gEONwKL9dWbBkSTPyuzmvcnspssPmB4/KbM9RrvyySSPqqoDqmqvJM9I8rFdrHN1ktut4rX/JsnDquqeSVJV+1TVPzjU2d1XJblquglxMvt7ttk3yWXdfV2SZyfZa8W8k5O8aHqNnzpvDVgewgtYtBcmeW5VfSazIDlumv6WzCLq05kdjtzlXq7uviyzc7Y+kuTTSc7p7tN2sdpnkvx4OiH+xTtbaDp8+Zwk75rG+vEk997Bos9N8qbpXLZaMf3NSTZNf8+9V/493X1FkgszOz8MWGLV3YseA8DNWlXtneT8JPffdm4ZsJzs8QJYoKp6bGZ7u94oumD52eMFADCIPV4AAIMILwCAQYQXAMAgwgsAYBDhBQAwiPACABjk/wPPeDY/gKwSZwAAAABJRU5ErkJggg==",
- "text/plain": [
- ""
- ]
- },
- "metadata": {
- "needs_background": "light"
- },
- "output_type": "display_data"
- }
- ],
- "source": [
- "%matplotlib inline\n",
- "plt.figure(figsize=(10, 6))\n",
- "plt.hist(df_2017.day_hour, bins='auto', rwidth=0.6)\n",
- "plt.title('Year 2017')\n",
- "plt.ylabel(\"Tweets\")\n",
- "plt.xlabel(\"Hour of the day\")\n",
- "plt.show()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "So far we have used directly `matplotlib` to generate our plots.\n",
- "\n",
- "`pandas`'s dataframes provide some methods that directly call `matplotlib`'s API behind the scenes:\n",
- "- `hist()` for histograms\n",
- "- `boxplot()` for boxplots\n",
- "- `plot()` for other types of plots (specified with e.g. `any='scatter'`)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "By passing the `by` parameter to e.g. `hist()` it is possible to produce one histogram plot of a given variable for each value in another column."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Let's see how we can plot the number of mentions by year:"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 335,
- "metadata": {},
- "outputs": [],
- "source": [
- "df['year'] = df.created_at.dt.year"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 356,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "image/png": "iVBORw0KGgoAAAANSUhEUgAAAmgAAAJOCAYAAAADGvtiAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAgAElEQVR4nOzde5hlZXnn/e+Pk0ZRjp0eQtMWb0QNGgGnhmhIIhFMUBwho2FwPIBD0pk3oibmndjmoDEzydXMJFHmGiXpiNoah0OIDj3AiA7COBkj2o2IctIWG2nCoeUUMRpF7/ePvQo3ZfWhah/Wqr2/n+vaV+/1rLX2vld13bXv/axnPStVhSRJkrpjj7YDkCRJ0mNZoEmSJHWMBZokSVLHWKBJkiR1jAWaJElSx1igSZIkdYwFmiRJUsdYoHVIksclOT/J7Um+keT6JC/qW39CkluS/GOSq5M8pW/daUk+1ay7ZoHXPjrJ5mb95iRHj+mwpLEYcf6sT3Jrku8nOXM8RySNz6jyJ8nTklyaZHuS+5NcmeTpYzy0ZcsCrVv2Au4Ang/sB/wecHGSmSQHAx8Gfh84ENgEXNS37/3AO4F18180yT7ApcBfAQcAG4BLm3ZpUowkfxqfB34duG40oUutG1X+7A9sBJ4OrAQ+Q+/zSLsQ7yTQbUluAN4OHAScWVU/3bQ/Efg6cExV3dK3/a8Ar6qq4/vafgF4H7Cqmv/wJF8D1lTVR8d1LNK4DSN/5r3e3wLvqar3jzh0qXXDzp9mmwOB+4CDq+q+EYa/7NmD1mFJVgJPA24EnknvWzwAVfVN4CtN+648E7ihHluN37Cb+0rL0hDzR5o6I8yfnwPutjjbNQu0jkqyN/AhYEPzDWVf4KF5mz0EPGk3Xm6QfaVlZ8j5I02VUeVPklXAu4A3DSPOSWeB1kFJ9gA+CHwHOLtpfhh48rxNnwx8YzdecpB9pWVlBPkjTY1R5U+SFcDHgHdX1QVDCHXiWaB1TJIA59MbTPmyqvpus+pG4Ki+7Z4I/HjTvis3As9uXnvOs3dzX2nZGFH+SFNhVPmT5AB6xdnGqvqjoQY9wSzQuuc84CeAf1lV3+pr/wjwrCQvS/J44K30xpXdApBkz6Z9L2CPJI9vuqkBrgG+B7yhuZR67lvRJ8ZwPNI4jSJ/SLJPsz7A3s16/35q0gw9f5I8GbgS+L9VtXacB7PsVZWPjjyApwAFfJtel/Lc45XN+hOBW4Bv0Su6Zvr2PbPZt//x/r71xwCbm32vo3f1TevH7MPHsB4jzp9rFlh/fNvH7MPHsB6jyh/gjGb5m/Ned3Xbx9z1h9NsSJIkdYxd9JIkSR1jgSZJktQxFmiSJEkdY4EmSZLUMRZokiRJHbNX2wEAHHzwwTUzM9N2GJpSmzdv/npVrWg7jqUyf9Qm80daup3lTycKtJmZGTZt2tR2GJpSSW5vO4ZBmD9qk/kjLd3O8sdTnJIkSR2zqAItyXuT3JvkiztYnyT/JcmWJDckec5wwpSmS5LfTHJjki8muaC5jYokaUostgft/cBJO1n/IuCI5rGG3n29JC1CkkOBNwCzVfUsYE/g9HajkiSN06IKtKr6JHD/TjY5BfhA9Xwa2D/JIYMEKE2pvYAfSbIX8ATg71uOR5I0RsMeg3YocEff8ramTdJuqqo7gT8BvgbcBTxUVR9rNypJ0ji1dhVnkjX0ToOyevXqtsKYCDNrL1/yvlvXnTzESDQMSQ6g1xt9OPAg8NdJXlVVf9W3jfkzz1LzYFpyYNL+TiTZH3gP8CyggH8L3ApcBMwAW4HTquqBpb7HpP3MtLwMuwftTuCwvuVVTdsPqar1VTVbVbMrVizbKXSkUTgR+GpVba+q7wIfBn66fwPzR+Jc4KNV9QzgKOBmYC1wVVUdAVzVLEvL0rALtI3Aa5qrOZ9L79TMXUN+D2nSfQ14bpInJAlwAr0PH0lAkv2AnwPOB6iq71TVg/R6njc0m20ATm0nQmlwizrFmeQC4Hjg4CTbgLcBewNU1Z8DVwAvBrYA/wi8dpjBStOgqq5NcglwHfAI8DlgfbtRSZ1yOLAdeF+So4DNwBuBlX2dAncDK1uKTxrYogq0qnrFLtYX8LqBIpJEVb2N3hcgST9sL+A5wOubLzTnMu90ZlVVklpoZ8dwajnwTgKSpOVmG7Ctqq5tli+hV7DdMze1U/PvvQvt7BhOLQcWaJKkZaWq7gbuSPL0pukE4CZ646DPaNrOAC5tITxpKDpxs3RJkhbp9cCHkuwD3EZvzPMewMVJzgJuB05rMT5pIBZokqRlp6quB2YXWHXCuGORRsFTnJIkSR1jgSZJktQxFmiSJEkdY4EmSZLUMRZokiRJHWOBJkmS1DEWaJIkSR3jPGjSBJtZe/mS99267uQhRiJJWgx70CRJkjrGAk2SJKljLNAkSZI6xgJNkiSpYxZdoCU5KcmtSbYkWbvA+tVJrk7yuSQ3JHnxcEKVJEmaDosq0JLsCbwLeBFwJPCKJEfO2+z3gIur6hjgdODdwwhUkiRpWiy2B+1YYEtV3VZV3wEuBE6Zt00BT26e7wf8/WAhSpIkTZfFzoN2KHBH3/I24KfmbfMHwMeSvB54InDikqOTJEmaQqO4SOAVwPurahXwYuCDSX7ofZKsSbIpyabt27ePIAxJkqTlabEF2p3AYX3Lq5q2fmcBFwNU1d8BjwcOnv9CVbW+qmaranbFihWLDEOSJGlyLbZA+yxwRJLDk+xD7yKAjfO2+RpwAkCSn6BXoNlFJkmStJsWVaBV1SPA2cCVwM30rta8MckfJnlps9lvAb+a5PPABcCZVVXDDFqadEn2T3JJkluS3JzkeW3HJEkan0XfLL2qrgCumNf21r7nNwHHDR6aNNXOBT5aVS9vequf0HZAkqTxWXSBJmm0kuwH/BxwJkAzpc132oxJkjReFmhS9xxOb9zm+5IcBWwG3lhV32w3LEla3mbWXr6k/bauO3nIkeyaBZrUPXsBzwFeX1XXJjkXWAv8/twGSdYAawBWr17dSpCjsNQ/nm1ZTn/sJS0v3ixd6p5twLaqurZZvoRewfYop6mRpMlmgSZ1TFXdDdyR5OlN0wnATS2GJEkaM09xSt30euBDzRWctwGvbTkeSZpagwy/WOqQBgs0qYOq6npgtu04JEnt8BSnJElSx1igSZIkdYynOCVJ0tg5Tc3O2YMmSZLUMRZokqRlKcmeST6X5LJm+fAk1ybZkuSi5ipoaVmyQJMkLVdvBG7uWz4HeEdVPRV4ADirlaikIbBAkyQtO0lWAScD72mWA7yA3p03ADYAp7YTnTQ4CzRJ0nL0TuC3ge83ywcBD1bVI83yNuDQNgKThsECTZK0rCR5CXBvVW1e4v5rkmxKsmn79u1Djk4aDgs0SdJycxzw0iRbgQvpndo8F9g/ydz0UauAOxfauarWV9VsVc2uWLFiHPFKi7boAi3JSUluba6SWbuDbU5LclOSG5P8t8HDlCSpp6reUlWrqmoGOB34RFW9ErgaeHmz2RnApS2FKA1sUQVakj2BdwEvAo4EXpHkyHnbHAG8BTiuqp4J/MaQYpUkaWfeDLwpyRZ6Y9LObzkeackWeyeBY4EtVXUbQJILgVOAm/q2+VXgXVX1AEBV3TuMQCVJmq+qrgGuaZ7fRu9zShNsqXcgWG4We4rzUOCOvuWFrpJ5GvC0JP83yaeTnDRIgJIkSdNmFPfi3As4Ajie3iDNTyb5yap6sH+jJGuANQCrV68eQRiSJEnL02J70O4EDutbXugqmW3Axqr6blV9FfgSvYLtMbyKRpIkaWGLLdA+CxzR3O9sH3pXz2yct81/p9d7RpKD6Z3yvG3AOCVJkqbGogq0Zobms4Er6d3/7OKqujHJHyZ5abPZlcB9SW6id8nzv6+q+4YZtCRJ0iRb9Bi0qroCuGJe21v7nhfwpuYhSZKkRfJOApIkSR1jgSZJktQxFmiSJEkdY4EmSZLUMRZoUgcl2TPJ55Jc1nYskqTxs0CTuumN9KaykSRNIQs0qWOSrAJOBt7TdiySpHZYoEnd807gt4Hvtx2IJKkdo7hZuqQlSvIS4N6q2pzk+J1stwZYA7B69eqRxDKz9vIl77t13clDjGS0BjlOSRoVe9CkbjkOeGmSrcCFwAuS/NX8japqfVXNVtXsihUrxh2jJGnELNCkDqmqt1TVqqqaAU4HPlFVr2o5LEnSmFmgSZIkdYxj0KSOqqprgGtaDkOS1AJ70CRJkjrGAk2SJKljLNAkSZI6xgJNkiSpYxZdoCU5KcmtSbYkWbuT7V6WpJLMDhaiJEnSdFlUgZZkT+BdwIuAI4FXJDlyge2eRO9mz9cOI0hJkqRpstgetGOBLVV1W1V9h95M56cssN1/AM4Bvj1gfJIkSVNnsfOgHQrc0be8Dfip/g2SPAc4rKouT/LvB4xPkiTthmm5f+60GOpFAkn2AP4M+K3d2HZNkk1JNm3fvn2YYUiSJC1ri+1BuxM4rG95VdM250nAs4BrkgD8M2BjkpdW1ab+F6qq9cB6gNnZ2VpkHJIkdZa9WRrUYnvQPgsckeTwJPvQu5nzxrmVVfVQVR1cVTPNzZ4/DfxQcSZJkqQdW1SBVlWPAGcDVwI3AxdX1Y1J/jDJS0cRoCRJ0rRZ9M3Sq+oK4Ip5bW/dwbbHLy0sSZKk6eWdBCRJkjpm0T1okiS1KclhwAeAlUAB66vq3CQHAhcBM8BW4LSqeqCtOJdqkAsMNDnsQZMkLTePAL9VVUcCzwVe19zVZi1wVVUdAVzVLEvLkgWaJGlZqaq7quq65vk36F20dii9O9tsaDbbAJzaToTS4CzQJEnLVpIZ4Bh6935eWVV3NavupncKVFqWLNAkSctSkn2BvwF+o6r+oX9dVRW98WkL7eedbNR5FmiSpGUnyd70irMPVdWHm+Z7khzSrD8EuHehfatqfVXNVtXsihUrxhOwtEgWaJKkZSW9ewmeD9xcVX/Wt2ojcEbz/Azg0nHHJg2L02xIkpab44BXA19Icn3T9jvAOuDiJGcBtwOntRSfNDALNEnSslJVfwtkB6tPGGcs0qh4ilPqmCSHJbk6yU1JbkzyxrZjkiSNlz1oUvfMTcJ5XZInAZuTfLyqbmo7MEnSeNiDJnXMTibhlCRNCQs0qcPmTcIpSZoSFmhSR+1sEk4n2pSkyWaBJnXQDibhfJQTbUrSZFt0gZbkpCS3JtmSZO0C69/UXH12Q5KrkjxlOKFK02Enk3BKkqbEogq0JHsC7wJeBBwJvCLJkfM2+xwwW1XPBi4B/tMwApWmyNwknC9Icn3zeHHbQUmSxmex02wcC2ypqtsAklwInAI8evl/VV3dt/2ngVcNGqQ0TXYxCackaQos9hTnocAdfcvb2Pnl/2cB/3OxQUmSJE2zkU1Um+RVwCzw/B2sXwOsAVi9evWowpAkSVp2FtuDdidwWN/yqqbtMZKcCPwu8NKq+qeFXsir0CRJkha22ALts8ARSQ5Psg9wOrCxf4MkxwB/Qa84u3c4YUqSJE2PRRVoVfUIcDZwJb3bz1xcVTcm+cMkL202+8/AvsBfN1efbdzBy0mSJGkBix6DVlVXAFfMa3tr3/MThxCXJEnS1PJOApIkSR1jgSZJktQxFmiSJEkdY4EmSZLUMRZokiRJHTOyOwlIkoZvZu3lbYcgaQzsQZMkSeoYCzRJkqSOsUCTJEnqGMegSZI05ZY6tnHrupOHHInm2IMmSZLUMRZokiRJHWOBJkmS1DGOQZMkSUvivHyjYw+aJElSx1igSZIkdYwFmiRJUscsukBLclKSW5NsSbJ2gfWPS3JRs/7aJDPDCFSaJrvKM0k7Zv5oEiyqQEuyJ/Au4EXAkcArkhw5b7OzgAeq6qnAO4BzhhGoNC12M88kLcD80aRYbA/ascCWqrqtqr4DXAicMm+bU4ANzfNLgBOSZLAwpamyO3kmaWHmjybCYgu0Q4E7+pa3NW0LblNVjwAPAQctNUBpCu1OnklamPmjidDaPGhJ1gBrmsWHk9y6g00PBr4+nqh2W9diWnI8Gd0J6OX0M3rKOAMZhgXy5z7a/3k/+jMe4e/VbsfQol3GMIafz1B/DruIdxLyZzl9/ozTNB//0I59qfmz2ALtTuCwvuVVTdtC22xLshewH3Df/BeqqvXA+l29YZJNVTW7yDhHqmsxdS0e6F5MXYtnF3aZZ/PzpwvHZwzG0BGLzp8dmfKf41QffxeOfbGnOD8LHJHk8CT7AKcDG+dtsxE4o3n+cuATVVWDhSlNld3JM0kLM380ERbVg1ZVjyQ5G7gS2BN4b1XdmOQPgU1VtRE4H/hgki3A/fSSQ9Ju2lGetRyWtCyYP5oUix6DVlVXAFfMa3tr3/NvA788eGiP2mU3dAu6FlPX4oHuxdS1eHZqoTzbhS4cnzH0GEPLlpA/OzLVP0em+/hbP/Z49lGSJKlbvNWTJElSx1igSZIkdUxr86BJWpokz6A3M/rc5Jt3Ahur6ub2opKWP3NLXWIPmrSMJHkzvVvXBPhM8whwQZs3hU7yM0nelOQX2opBGkRXc0vTy4sEpGUkyZeAZ1bVd+e17wPcWFVHjCmOz1TVsc3zXwVeB3wE+AXgf1TVunHEIUiyH/AW4FTgR4EC7gUuBdZV1YMthrdsdCW3NH5dzaHO9aAl2S/JuiS3JLk/yX1Jbm7a9m8hnpPmxXZ+khuS/LckK8cdTxPHLyY5L8nG5nFef5wtxPPsvud7J/m9Jq4/TvKEFuLZK8mvJflo8391Q5L/meTfJdl73PEM2feBH1ug/ZBm3bj0/xzXAC+sqrfTK9BeOY4AuvC3oiN/Hy4GHgCOr6oDq+og4OebtovHFMMk6EputaIL+dSiTuZQ53rQklwJfALYUFV3N23/jN7dCU6oqrGeQklyXVU9p3n+HuBu4C+BfwU8v6pOHXM87wSeBnyA3k2AoXcrk9cAX66qN44zniam/p/RnwIHAe+j923koKp6zZjjuQB4ENjAY39GZwAHVtW/Hmc8w9QUBP8V+DI/uCH0auCpwNlV9dExxfF54Hh6X/Ku7L8lSpLPVdUxY4ih9b8VXfj7kOTWqnr6YtfpsbqSW23pQj61pas51MUCrVM/qHl/gK+vqqP71j1meUzxfKmqnrZAe4AvtdEN3/+BnOR64F9U1XebmD5fVc/e+SsMPZ4Ff0a7WrdcJNkDOJbHDmT+bFV9b4wxbKXXqxB6pwOOq6q7kuwL/O048qILfyu68PchyceA/0Xvg/Wepm0lcCa9ns0TRx3DpOhCbrWlC/nUlq7mUBev4rw9yW+z8A/qjp3tOCI/muRN9D6InpwkffcWbeMU8beT/Iuq+uy89n8BfLuFeAD2S/JL9H4ej5sbw1FVlaSNbwD3J/ll4G+q6vvw6B/eX6bXZb2sNcf06ZZjmNnBqu8DvzSmMLrwt6ILfx/+NbAW+N/N8RdwD737T542phgmQhdyq0VdyKe2dDKHulig9f+gfrRpa/MH9ZfAk5rnG4CDge1N1+/1LcRzJnBekifxg9N3hwEPNeva8L+BlzbPP51kZVXd0/yMvt5CPKcD5wDvTvIAvQ/P/YCr8d6wI1VV/wh8dUxvt7O/FcO83dzOtP73oaoeSPI+4OPAp6vq4bl1zWm7iT41p6HpQj61oqs51LlTnNo9zQfAo93wc2MG9FhJDgKoqvvajkXjk+S1VfW+aYghyRvoXUV7M3A08MaqurRZ9+gpWGmpupBPo9TVHOp8gZbkZ+iNCfhiVX2shfd/A/Dhqtq2y43HJL1Lgk/iseMkrmzzcvokTwZWVNVX5rU/u6puaCmsuRgOB44BbqqqW9qMReOR5GtVtXoaYkjyBeB5VfVwkhngEuCDVXXuuC7Y0GTrQj6NUldzqHMFWjo2v1KSh4BvAl8BLgD+uqq2jzOGefG8Bngb8DF6hRn0rlB8IfD2qvpACzGdBryT3rwxewNnzo2Ra+PbR5L/Pnf1XJJTmtiuAY4D/riq3j/OeDQaSXZU+Ad4WlU9bkpiuLGqntm3vC+9D5ibgBeM+0ImLU9d+F1uS1dzqItj0BaaX2l7kj+hN3hz3BNg3gb8c+BEeufo355kM71i7cNV9Y0xx/O7wD+f31uW5ADgWnrTb4zb7zQx3ZXkWOCDSd5SVR+hl9zj9pS+52+ml2BfTXIwcBXw/hZi0vCtBH6RH77wI8CnpiiGe5IcXVXXAzS9AC8B3gv85Jhi0PLXhd/ltnQyh7pYoO3RFBt70Ovh2w5QVd9M8kgL8VRzZc/HgI+lN9Hpi4BXAH8CrBhzPHPTGsw3N+VBG/asqrsAquozSX4euCzJYSwc66j1v+deVfXVJravJ5n4CSenyGXAvnN/VPsluWaKYngN8Ji/jVX1CPCaJH8xphi0/HXhd7ktncyhLp7i3ErL8yvNi2eH55+TPKG5am2c8ZwBvJVewdg/meILgf/Qxum7JJ8CXt0//qy5yvS/Az8z7q7xJN+jd1o6wOOApzS/Q/sAm8Y9L5skSYvVuQJtR9K7ZdDKud6QMb7v06rqS+N8z11pehh/kR++SKCVOb6SHAV8s6q2zGvfGzitqj7URlzzpXe7kp+oqr9rOxZJknZm2RRokiRJ06JzN0vfmSSXtR1Dvw7Gs77tGObrWkxdi0eSpIUsqx60JIfMDUbvgg7G88+ranPbcfTrWkxdi0eSpIUsqwJNkiRpGnTuFGeS65L8XpIfbzsW6GQ8s0muTvJXSQ5L8vEkDyX5bJJ2ZjvuWExdi0eSpMXqXIEGHADsD1yd5DNJfjPJjxnPo94N/CfgcnqTB/5FVe1H7ya37zamTsYjSdKidO4UZ/+tgZL8LL0JYf8VvZuYXlBVYx3k3cF4Hp2Xbf790dq6Z1jXYupaPJIkLVYXe9AeVVX/p6p+nd58X+cAzzMevp3kF5L8MlBJ5u45+Xzgey3E08WYuhaPJEmL0sVbPf3QpLBV9T3go81j3LoWz7+jd/ru+/Qmq/1/k7yf3mS1v9pCPF2MqWvxSJK0KJ07xbkzSV5bVe9rO445xrNrXYupa/FIkrSQ5VagPWY8UduMZ9e6FlPX4pEkaSGdO8WZ5IYdrQJWjjMWMJ7d0bWYuhaPJEmL1bkCjd4H6C8C82/8HXpTJoyb8exa12LqWjySJC1KFwu0y4B9q+r6+SuSXDP+cIxnN3Qtpq7FI0nSoiyrMWiSJEnToNPzoEmSJE0jCzRJkqSOsUCTJEnqGAs0SZKkjrFAkyRJ6hgLNEmSpI6xQJMkSeoYCzRJkqSOsUCTJEnqGAs0SZKkjrFAkyRJ6hgLNEmSpI6xQJMkSeoYCzRJkqSOsUCTJEnqGAu0DknyuCTnJ7k9yTeSXJ/kRX3rT0hyS5J/THJ1kqf0rTstyaeaddfMe92Dk/zfJPcleTDJ3yU5boyHJo3cqPJn3nu8Jkkl+ZURH440VqPMnyZnvpnk4ebxnjEd1rJmgdYtewF3AM8H9gN+D7g4yUySg4EPA78PHAhsAi7q2/d+4J3AugVe92Hg3wIrgAOAc4D/kWSvER2H1IZR5Q8ASQ4Afge4cSTRS+0aaf4AR1XVvs3DLzi7IVXVdgzaiSQ3AG8HDgLOrKqfbtqfCHwdOKaqbunb/leAV1XV8Tt4vT2Ak4GNwMqqune0RyC1Z5j5k+TPgRuA04C/qip7ATTRhpU/SQo4oqq2jCv2SWAPWoclWQk8jd439mcCn59bV1XfBL7StO/u690AfJtecfYeizNNsmHmT5JjgVngz4cfqdQ9w/78AT6Z5O4kH04yM8RQJ5YFWkcl2Rv4ELCh+YayL/DQvM0eAp60u69ZVc8Gngz8G+BvhxSq1DnDzJ8kewLvBs6uqu8PO1apa0bw+fN8YAZ4BvD3wGUOsdk1f0Ad1JyG/CDwHeDspvlhesVVvycD31jMa1fVt4ELktyc5Pqq+vwud5KWkRHkz68DN1TVp4cWpNRRo/j8qapPNk+/k+SNwD8APwF8YeCAJ5g9aB2TJMD5wErgZVX13WbVjcBRfds9Efhxlj5geW/g/xkgVKlzRpQ/JwC/1JyeuRv4aeBPk/zXoQYvtWyMnz8FZIBQp4IFWvecR++bxb+sqm/1tX8EeFaSlyV5PPBWet/qb4HeaZimfS9gjySPb7qpSfLcJD+TZJ8kP5LkzfQS8NpxHpg0BkPPH+DM5jWPbh6b6A2c/t1xHJA0RqP4/HlmkqObbfYF/hS4E7h5jMe1LFmgdUgzr8yv0fsQuLtvzphXVtV24GXAHwEPAD8FnN63+6uBb9FLsJ9tnv9ls+5xwLuA++glxouBk6vq70d/VNJ4jCp/qurBqrp77kHv1M8/VNX8MTnSsjXCz5+V9Kbk+AfgNnpj0V7S1zunHXCaDUmSpI6xB02SJKljLNAkSZI6xgJNkiSpY3ZZoCV5b5J7k3yxr+3AJB9P8uXm3wOa9iT5L0m2JLkhyXNGGbwkSdIk2p0etPcDJ81rWwtcVVVHAFc1ywAvAo5oHmvoXdEhSZKkRditqzib+2ZdVlXPapZvBY6vqruSHAJcU1VPT/IXzfML5m+3s9c/+OCDa2ZmZqADkZZq8+bNX6+qFW3HsVTmj9pk/khLt7P8Weqtnlb2FV1305vnBOBQ4I6+7bY1bTst0GZmZti0adMSQ5EGk+T2tmMYhPmjNo0yf5K8F3gJcG9fB8GB9ObVmgG2AqdV1QPNLPjn0pvn8R+BM6vqul29h/mjNu0sfwa+SKB6XXCLnkwtyZokm5Js2r59+6BhSJImz/txiI2m1FILtHuaU5s0/97btN8JHNa33aqm7YdU1fqqmq2q2RUrlm3vuCRpRJqbbN8/r/kUYEPzfANwal/7B6rn08D+c59T0nK01AJtI3BG8/wM4NK+9tc0V3M+F3hoV+PPJElahMUOsZGWpV2OQUtyAXA8cHCSbcDbgHXAxUnOAm4HTms2v4Le+f8t9MYAvHYEMUuSRFVVkiUNsaF3GpTVq1cPPS5pGHZZoFXVK3aw6oQFti3gdYMG1W9m7eVL3nfrupOHGIkkPdZS/z75t2kg9yQ5pG8WgSUNsQHWA8zOznpDau1SG7WIdxKQWpJka5IvJLk+yaambcFJoCU9yiE2mgoWaFK7fr6qjq6q2dyjzDcAACAASURBVGZ5R1eoSVOnGWLzd8DTk2xrhtWsA16Y5MvAic0y9IbY3EZviM1fAr/eQsjS0Cx1HjRJo3EKvTGf0LtC7RrgzW0FI7Wp7SE2UpvsQZPaU8DHkmxuBi3Djq9QkyRNEXvQpPb8TFXdmeRHgY8nuaV/5c6uUPMqNEmabPagSS2pqjubf+8FPgIcy44ngZ6/rxM9S9IEs0CTWpDkiUmeNPcc+AXgi+z4CjVJ0hTxFKfUjpXAR3r3d2Yv4L9V1UeTfJaFJ4GWJE0RCzSpBVV1G3DUAu33scAVapKk6eIpTkmSpI6xQJMkSeoYCzRJkqSOsUCTJEnqGAs0SZKkjrFAkyRJ6hin2ZhyM2svX9J+W9edPORIJEnSHHvQJEmSOsYCTZIkqWMGKtCS/GaSG5N8MckFSR6f5PAk1ybZkuSiJPsMK1hJkqRpsOQCLcmhwBuA2ap6FrAncDpwDvCOqnoq8ABw1jAClSRJmhaDnuLcC/iRJHsBTwDuAl4AXNKs3wCcOuB7SJL0GJ7B0aRb8lWcVXVnkj8BvgZ8C/gYsBl4sKoeaTbbBhw6cJSSlmSpV+mCV+qqu/rO4BxZVd9KcjG9MzgvpncG58Ikf07vDM55LYYqLdkgpzgPAE4BDgd+DHgicNIi9l+TZFOSTdu3b19qGJKk6eQZHE20QU5xngh8taq2V9V3gQ8DxwH7NwkDsAq4c6Gdq2p9Vc1W1eyKFSsGCEOSNE2q6k5g7gzOXcBDeAZHE2aQAu1rwHOTPCFJgBOAm4CrgZc325wBXDpYiJIk/YBncDQNllygVdW19LqSrwO+0LzWeuDNwJuSbAEOAs4fQpzSREqyZ5LPJbmsWXaQs7RrnsHRxBvoVk9V9TbgbfOabwOOHeR1pYVM6ID3NwI3A09uluemqXGQs4ZqwvLn0TM49C5SOwHYxA/O4FyIZ3C0zHknAaklSVYBJwPvaZaDg5ylXfIMjqaBN0uX2vNO4LeBJzXLB+EgZ2m3eAZHk84eNKkFSV4C3FtVm5e4v4OcJWmCWaBJ7TgOeGmSrfTGy7wAOBcHOUuSsECTWlFVb6mqVVU1Q28G9E9U1StxmhpJEhZoUtc4yFmS5EUCUtuq6hrgmua5g5wlSfagSZIkdY0FmiRJUsdYoEmSJHWMBZokSVLHWKBJkiR1jFdxauwGuWmzJI3LhN1gXsuMPWiSJEkdY4EmSZLUMRZokiRJHWOBJkmS1DEDFWhJ9k9ySZJbktyc5HlJDkzy8SRfbv49YFjBSpIkTYNBe9DOBT5aVc8AjgJuBtYCV1XVEcBVzbIkSZJ205ILtCT7AT8HnA9QVd+pqgeBU4ANzWYbgFMHDVKSpH6ewdGkG6QH7XBgO/C+JJ9L8p4kTwRWVtVdzTZ3AysHDVKSpHk8g6OJNkiBthfwHOC8qjoG+CbzkqGqCqiFdk6yJsmmJJu2b98+QBiSpGniGRxNg0EKtG3Atqq6tlm+hF7Bdk+SQwCaf+9daOeqWl9Vs1U1u2LFigHCkCRNGc/gaOItuUCrqruBO5I8vWk6AbgJ2Aic0bSdAVw6UITSBEry+CSfSfL5JDcmeXvTfniSa5NsSXJRkn3ajlXqIM/gaOINehXn64EPJbkBOBr4Y2Ad8MIkXwZObJYlPdY/AS+oqqPo5c5JSZ4LnAO8o6qeCjwAnNVijFJXeQZHE2+gm6VX1fXA7AKrThjkdaVJ13y7f7hZ3Lt5FPAC4N807RuAPwDOG3d8UpdV1d1J7kjy9Kq6lR+cwbmJ3pmbdXgGR8vcQAWapKVLsiewGXgq8C7gK8CDVfVIs8k24NAd7LsGWAOwevXq0Qcrdc/cGZx9gNuA19I7K3RxkrOA24HTWoxPGogFmtSSqvoecHSS/YGPAM9YxL7rgfUAs7OzC46zkSaZZ3A06bwXp9SyZnqAq4HnAfsnmfvitAq4s7XAJEmtsUCTWpBkRdNzRpIfAV5Ib6LNq4GXN5s5hkaSppSnOKV2HAJsaMah7QFcXFWXJbkJuDDJfwQ+RzMRpyRpuligSS2oqhuAYxZovw04dvwRSdJ4zay9fEn7bV138pAj6SZPcUqSJHWMPWhakqV+85EkqS3L6bPLHjRJkqSOsUCTJEnqGAs0SZKkjrFAkyRJ6hgLNEmSpI6xQJMkSeoYCzRJkqSOcR40SVpGltM8TpKWzh40SZKkjrFAkyRJ6piBC7Qkeyb5XJLLmuXDk1ybZEuSi5LsM3iYkiRJ02MYPWhvBG7uWz4HeEdVPRV4ADhrCO8hSdJj2EGgSTZQgZZkFXAy8J5mOcALgEuaTTYApw7yHpIk7YAdBJpYg17F+U7gt4EnNcsHAQ9W1SPN8jbg0AHfQ5o4SQ4DPgCsBApYX1XnJjkQuAiYAbYCp1XVA23FKXVVXwfBHwFv6usg+DfNJhuAPwDOayXAFgxyhe/WdScPMRINw5J70JK8BLi3qjYvcf81STYl2bR9+/alhiEtV48Av1VVRwLPBV6X5EhgLXBVVR0BXNUsS/phcx0E32+W7SDQRBmkB+044KVJXgw8HngycC6wf5K9miRZBdy50M5VtR5YDzA7O1sDxCEtO1V1F3BX8/wbSW6m92FyCnB8s9kG4BrgzS2EqBFyLrPB9HcQJDl+CfuvAdYArF69esjRadSmJX+WXKBV1VuAtwA0CfL/VdUrk/w18HLgQuAM4NIhxLls2MWsxUoyAxwDXAusbIo3gLvpnQKV9Fid7yDws0CDGsU8aG+mNx5gC70u5/NH8B7SREiyL/A3wG9U1T/0r6uqojc+baH9HCKgqVVVb6mqVVU1A5wOfKKqXglcTa+DAKawg0CTZSgFWlVdU1UvaZ7fVlXHVtVTq+qXq+qfhvEe0qRJsje94uxDVfXhpvmeJIc06w8B7l1o36paX1WzVTW7YsWK8QQsdZ8dBJoY3otTakFzxdn5wM1V9Wd9qzbS++a/DnsApF2qqmvojdWkqm4Djm0zHmlYLNCkdhwHvBr4QpLrm7bfoVeYXZzkLOB24LSW4hvIUsffOPZGknos0KQWVNXfAtnB6hPGGYskqXu8WbokSVLHTHQPmpc5S5Kk5cgeNEmSpI6xQJMkSeoYCzRJkqSOmegxaJKWF8eNSlKPPWiSJEkdY4EmSZLUMRZokiRJHWOBJkmS1DEWaJIkSR1jgSZJktQxFmiSJEkdY4EmSZLUMUueqDbJYcAHgJVAAeur6twkBwIXATPAVuC0qnpg8FAlSZp8g0zYrMkxyJ0EHgF+q6quS/IkYHOSjwNnAldV1boka4G1wJsHD1XSOPkhoa6yg0DTYMkFWlXdBdzVPP9GkpuBQ4FTgOObzTYA17AMCzQ/nDRqSd4LvAS4t6qe1bT5ASPtmh0EmnhDGYOWZAY4BrgWWNkUbwB30/uGI+mHvR84aV7bWnofMEcAVzXLkvpU1V1VdV3z/BtAfwfBhmazDcCp7UQoDW7gAi3JvsDfAL9RVf/Qv66qil7380L7rUmyKcmm7du3DxqGtOxU1SeB++c1+wEjLYIdBJpUAxVoSfamV5x9qKo+3DTfk+SQZv0hwL0L7VtV66tqtqpmV6xYMUgY0iTxA0baTXYQaJItuUBLEuB84Oaq+rO+VRuBM5rnZwCXLj08aXr5ASPtmB0EmnSD9KAdB7waeEGS65vHi4F1wAuTfBk4sVmWtHv8gJF2wQ4CTYNBruL8WyA7WH3CUl9XmnJzHzDr8ANG2pG5DoIvJLm+afsdenlzcZKzgNuB01qKTxrYIPOgSRpAkgvoTUlzcJJtwNvwA0baJTsINA0s0KSWVNUrdrDKDxhJmnLei1OSJKljLNAkSZI6xgJNkiSpYyzQJEmSOsYCTZIkqWMs0CRJkjrGAk2SJKljnAdNkqQpN7P28iXtt3XdyUOORHMs0CRJ0pIstbDTrnmKU5IkqWPsQZsAfoORJGmy2IMmSZLUMfagSZoIDnKWNEnsQZMkSeoYCzRJkqSOsUCTJEnqmJEVaElOSnJrki1J1o7qfaRJZP5IS2f+aBKM5CKBJHsC7wJeCGwDPptkY1XdNIr3kyaJ+TNeTlMzWcwfTYpRXcV5LLClqm4DSHIhcApgguyEHxRqmD/S0pk/mgijOsV5KHBH3/K2pk3Srpk/0tKZP5oIrc2DlmQNsKZZfDjJrTvY9GDg6+OJqpOm+fiHduw5Z6ernzKM9xgn82e3TfPxmz87YP7stmk+/tbzZ1QF2p3AYX3Lq5q2R1XVemD9rl4oyaaqmh1ueMvHNB//FB+7+TMk03z8U3zs5s+QTPPxd+HYR3WK87PAEUkOT7IPcDqwcUTvJU0a80daOvNHE2EkPWhV9UiSs4ErgT2B91bVjaN4L2nSmD/S0pk/mhQjG4NWVVcAVwzhpXbZDT3hpvn4p/bYzZ+hmebjn9pjN3+GZpqPv/VjT1W1HYMkSZL6eKsnSZKkjrFAkyRJ6pjW5kHbkSTPoDfr89zEgncCG6vq5vaikpYH80daOvNHXdKpHrQkbwYuBAJ8pnkEuMAb3k62JPslWZfkliT3J7kvyc1N2/5tx7ccmD/Ty/wZnPkzvbqaP526SCDJl4BnVtV357XvA9xYVUe0E9l4JNkPeAtwKvCjQAH3ApcC66rqwRbDG6kkVwKfADZU1d1N2z8DzgBOqKpfaDO+5cD8MX8wf5bM/DF/6Fj+dKoHDfg+8GMLtB/SrJt0FwMPAMdX1YFVdRDw803bxa1GNnozVXXOXHIAVNXdVXUOy/BWMi0xf8wf82fpzB/zp1P507UetJOA/wp8mR/c7HY18FTg7Kr6aFuxjUOSW6vq6YtdNwmSfAz4X/S+wdzTtK0EzgReWFUnthjesmD+mD+YP0tm/pg/dCx/OlWgASTZAziWxw7S/GxVfa+9qMajq78k45DkAGAtvQG6K+l1r99D7xYt51TV/S2Gt2yYP+YP5s+SmT/mDx3Kn84VaNNs3i/JjzbNc78k66rqgbZiG4fmCqpVwKer6uG+9pMm/durBmf+mD9aOvOne/ljgbZMJHltVb2v7ThGJckbgNcBNwNHA2+sqkubdddV1XPajE/Lm/lj/mjpzJ928scCbZlI8rWqWt12HKOS5AvA86rq4SQzwCXAB6vq3CSfq6pjWg1Qy5r5Y/5o6cyfdvKncxPVTrMkN+xoFb3z4pNsj7lu5aramuR44JIkT6F3/NJOmT/mj5bO/Ole/ligdctK4BfpXdbcL8Cnxh/OWN2T5Oiquh6g+SbzEuC9wE+2G5qWCfPH/NHSmT8dyx8LtG65DNh37pekX5Jrxh/OWL0GeKS/oaoeAV6T5C/aCUnLjPnTx/zRIpk/fbqQP45BkyRJ6piu3UlAkiRp6lmgSZIkdYwFmiRJUsdYoEmSJHWMBZokSVLHWKBJkiR1jAWaJElSx1igSZIkdYwFmiRJUsdYoEmSJHWMBZokSVLHWKBJkiR1jAWaJElSx1igSZIkdYwFmiRJUsdYoEmSJHWMBZokSVLHWKBJkiR1jAWaJElSx1igSZIkdYwFmiRJUsdYoEmSJHWMBZokSVLHWKBJkiR1jAWaJElSx1igSZIkdYwFmiRJUsdYoEmSJHWMBZokSVLHWKBJkiR1jAWaJElSx1igdUiSxyU5P8ntSb6R5PokL+pbf0KSW5L8Y5Krkzylb91pST7VrLtmgdfeM8l/TPL3zWt/Lsn+Yzo0aeRGlT9JfjbJw/MeleRlYzw8SVPGAq1b9gLuAJ4P7Af8HnBxkpkkBwMfBn4fOBDYBFzUt+/9wDuBdTt47bcDPw08D3gy8Grg2yM4BqktI8mfqvo/VbXv3AN4CfAw8NFRHoyk6ZaqajsG7USSG+gVVwcBZ1bVTzftTwS+DhxTVbf0bf8rwKuq6vi+tgPofXAdVVVfGWP4UquGkT8LvOb7AKrqtSMMXdKUswetw5KsBJ4G3Ag8E/j83Lqq+ibwlaZ9V34SeAR4eZK7k3wpyetGELLUGUPMn/7XfCLwcmDD8CKVpB+2V9sBaGFJ9gY+BGyoqluS7Atsn7fZQ8CTduPlVtE75fM04HDgCOCqJF+qqo8PMWypE4acP/3+Fb2et/89eJSStGP2oHVQkj2ADwLfAc5umh+mN3as35OBb+zGS36r+fcPq+pbVXUDcCHw4iGEK3XKCPKn3xnAB8qxIZJGzAKtY5IEOB9YCbysqr7brLoROKpvuycCP96078oNzb/9Hyp+wGjijCh/5vY5DDge+MCw4pWkHbFA657zgJ8A/mVVfauv/SPAs5K8LMnjgbcCN8wNcG6m0Xg8vdPWeyR5fHOah+bCgP8D/G4zFcFPAKcDl43vsKSxGHr+9Hk18CkvtJE0DhZoHdLMy/RrwNHA3X1zLr2yqrYDLwP+CHgA+Cl6RdacV9M7lXke8LPN87/sW/8K4CnAfcDlwO9X1VUjPiRpbEacPwCvwYsDJI2J02xIkiR1jD1okiRJHWOBJkmS1DEWaJIkSR1jgSZJktQxFmiSJEkd04lbPR188ME1MzPTdhiaUps3b/56Va1oO46lMn/UpuWeP1JXdaJAm5mZYdOmTW2HoSmV5Pa2YxiE+aM2Lff8kbrKU5ySJEkdY4EmSZLUMRZokiRJHWOBJkmS1DEWaJIkSR3Tias4pd0xs/byJe+7dd3JQ4xE6lnq76S/j5J2xR40SZKkjrFAkyRJ6hgLNEmSpI6xQJMkSeoYCzRJkqSOsUCTJEnqGAs0SZKkjnEetCnnPE6SJHWPPWiSJEkdY4EmSZLUMRZoUkuS/GaSG5N8MckFSR6f5PAk1ybZkuSiJPu0Hackafws0KQWJDkUeAMwW1XPAvYETgfOAd5RVU8FHgDOai9KSVJbLNCk9uwF/EiSvYAnAHcBLwAuadZvAE5tKTZJUou8ilNqQVXdmeRPgK8B3wI+BmwGHqyqR5rNtgGHthSiRmipV0+DV1BL08IeNKkFSQ4ATgEOB34MeCJw0iL2X5NkU5JN27dvH1GUkqS27LJAS/LeJPcm+WJf2x8kuTPJ9c3jxX3r3tIMcL41yS+OKnBpmTsR+GpVba+q7wIfBo4D9m9OeQKsAu5caOeqWl9Vs1U1u2LFivFELEkam93pQXs/C3+zf0dVHd08rgBIciS9gc7PbPZ5d5I9hxWsNEG+Bjw3yROSBDgBuAm4Gnh5s80ZwKUtxSdJatEuC7Sq+iRw/26+3inAhVX1T1X1VWALcOwA8UkTqaqupXcxwHXAF+jl4nrgzcCbkmwBDgLOby1ISVJrBrlI4OwkrwE2Ab9VVQ/QG9D86b5tHOQs7UBVvQ1427zm2/BLjSRNvaVeJHAe8OPA0fSmBvjTxb6Ag5wlSZIWtqQCraruqarvVdX3gb/kB9/47wQO69vUQc6SJEmLtKQCLckhfYu/BMxd4bkROD3J45IcDhwBfGawECVJkqbLLsegJbkAOB44OMk2emNmjk9yNFDAVuDXAKrqxiQX07sa7RHgdVX1vdGELkmSNJl2WaBV1SsWaN7hlWVV9UfAHw0SlCRJ0jTzTgKSJEkdY4EmSZLUMRZokiRJHWOBJkmS1DEWaJIkSR1jgSZJktQxFmiSJEkdY4EmSZLUMbucqLZtM2svX/K+W9edPMRIJEmSxsMeNEmSpI6xQJMkSeoYCzRJkqSOsUCTJEnqGAs0SZKkjrFAk1qSZP8klyS5JcnNSZ6X5MAkH0/y5ebfA9qOU5I0fhZoUnvOBT5aVc8AjgJuBtYCV1XVEcBVzbIkacpYoEktSLIf8HPA+QBV9Z2qehA4BdjQbLYBOLWdCCVJber8RLXqJicQHtjhwHbgfUmOAjYDbwRWVtVdzTZ3Aytbik+S1CJ70KR27AU8Bzivqo4Bvsm805lVVUAttHOSNUk2Jdm0ffv2kQcrSRovCzSpHduAbVV1bbN8Cb2C7Z4khwA0/9670M5Vtb6qZqtqdsWKFWMJWJI0PhZoUguq6m7gjiRPb5pOAG4CNgJnNG1nAJe2EJ4kqWWOQZPa83rgQ0n2AW4DXkvvS9PFSc4CbgdOazE+SVJLLNA0doNcYDBJqup6YHaBVSeMOxZJUrd4ilOSJKljLNAkSZI6xgJNkiSpYyzQJEmSOmaXBVqS9ya5N8kX+9oWvKFzev5Lki1JbkjynFEGL0mSNIl2pwft/cBJ89p2dEPnFwFHNI81wHnDCVOSJGl67LJAq6pPAvfPa97RDZ1PAT5QPZ8G9p+bFV2SJEm7Z6nzoO3ohs6HAnf0bbetabsLSVJrBpl/cOu6k4cYiaT/v727ibWjLuM4/v1BZCMKAqUipZREosEYwTSNBhcgimiJsDAIGimGpBuIGE2guNGNyWWjkmiUJmCqUV6CkjZAeBHExBiQ8hKRFLQhF6GBtiIoxIUWHhdnKle5ty/33HNmzpzvJ2l6ZuacO88snnN/d17+/wMx9EMC+5rQeV+c7FmSJGl+iw1oC03ovAM4Yc77VjTr3sLJniVJkua32IC20ITOW4CLm6c5PwL8fc6lUEmSJB2A/d6DluRG4AzgmCTPA98EZph/Quc7gc8A24F/Mpj8WZIkSQdhvwGtqi5aYNNbJnRu7ke7bNiiJEmSppkzCUiSJHWMAU2SJKljDGiSJEkdY0CTJEnqGAOaJElSxxjQJEmSOsaAJkmS1DEGNKlFSQ5N8liS25vlk5I8lGR7kpuTHNZ2jZKk8dvvQLU6OKs23LHoz87OrF3CSjQhrgC2Ae9slq8BvltVNyX5EXAp8MO2ipMktcOAJrUkyQpgLfBt4GtJAnwc+ELzlk3AtxgioPkHgyRNJi9xSu35HnAl8EazfDTwSlXtaZafB45vozBJUrsMaFILkpwL7KqqRxb5+fVJtibZunv37iWuTpLUNgOa1I7Tgc8mmQVuYnBp81rgyCR7bz1YAeyY78NVtbGqVlfV6mXLlo2jXknSGBnQpBZU1dVVtaKqVgEXAvdX1ReBXwOfa962DtjcUomSpBYZ0KRuuYrBAwPbGdyTdn3L9UiSWuBTnFLLquoB4IHm9TPAmjbrkSS1zzNokiRJHWNAkyRJ6hgDmiRJUscY0CRJkjrGgCZJktQxBjRJkqSOMaBJkiR1jOOgLWDVhjvaLkGSJE0pz6BJkiR1jAFNkiSpY3p9idPLlNLiDdM/szNrl7ASSZo+QwW0JLPAq8DrwJ6qWp3kKOBmYBUwC1xQVS8PV6YkSdL0WIpLnGdW1alVtbpZ3gDcV1UnA/c1y5IkSTpAo7gH7TxgU/N6E3D+CPYhSZLUW8Peg1bAPUkKuK6qNgLLq+qFZvuLwPIh9yFJanhvrTQdhg1oH6uqHUmOBe5N8tTcjVVVTXh7iyTrgfUAK1euHLIMSZKk/hjqEmdV7Wj+3wXcBqwBdiY5DqD5f9cCn91YVauravWyZcuGKUOSJKlXFh3Qkrw9yTv2vgbOBv4IbAHWNW9bB2wetkhJkqRpMswlzuXAbUn2/pyfV9VdSR4GbklyKfAscMHwZUqSJE2PRQe0qnoG+NA8618CzhqmKKnvkpwA/ITBHzoFbKyqax1HUJIEPZ9JQOqwPcDXq+rR5laBR5LcC1zCYBzBmSQbGIwjeFWLdY6VsxdI0oBzcUotqKoXqurR5vWrwDbgeBxHUJKEAU1qXZJVwGnAQziOoCQJA5rUqiSHA78AvlpV/5i7raqKwf1p831ufZKtSbbu3r17DJVKksbJgCa1JMnbGISzn1XVL5vVjiMoSTKgSW3IYHya64FtVfWdOZscR1CS5FOcUktOB74EPJHk8WbdN4AZejCOoPNFStJwDGhSC6rqt0AW2Ow4gpI05bzEKUmS1DEGNEmSpI4xoEmSJHWMAU2SJKljDGiSJEkdY0CTJEnqGIfZkNQLix17bXZm7RJXIknD8wyaJElSxxjQJEmSOsaAJkmS1DEGNEmSpI4xoEmSJHWMAU2SJKljDGiSJEkd4zhoPbDY8Z8kSVI3eQZNkiSpYzyDJmmqeQZaUhcZ0DrEXxSSJAm8xClJktQ5IwtoSc5J8nSS7Uk2jGo/Uh/ZP5I03UYS0JIcCvwA+DRwCnBRklNGsS+pb+wfSdKozqCtAbZX1TNV9S/gJuC8Ee1L6hv7R5Km3KgC2vHAc3OWn2/WSdo/+0eSplxrT3EmWQ+sbxZfS/L0Am89BvjreKrqpGk+/iU79lyzz80nLsU+xsn+OWDTfPz2jzTBRhXQdgAnzFle0az7r6raCGzc3w9KsrWqVi9teZNjmo9/io/d/lki03z803zsUh+M6hLnw8DJSU5KchhwIbBlRPuS+sb+kaQpN5IzaFW1J8nlwN3AocANVfXkKPYl9Y39I0ka2T1oVXUncOcS/Kj9XsbpuWk+/qk9dvtnyUzz8U/zsUsTL1XVdg2SJEmaw6meJEmSOsaAJkmS1DGtjYO2kCTvZzBq+t6BOXcAW6pqW3tVSZPB/pGkfujUGbQkVzGY1ibA75t/AW50wmhp3+wfSeqPTj0kkORPwAeq6t//t/4w4MmqOrmdyjRqSY4ArgbOB44FCtgFbAZmquqVFsubCPbP9LJ/pP7p1Bk04A3gPfOsP67Z1mtJjkgyk+SpJH9L8lKSbc26I9uub8RuAV4Gzqiqo6rqaODMZt0trVY2Oewf+8f+kXqia2fQzgG+D/yZNyeLXgm8F7i8qu5qq7ZxSHI3cD+wqapebNa9G1gHnFVVZ7dZ3yglebqq3new2/Qm+8f+OdhtkrqrUwENIMkhwBr+9ybnh6vq9faqGo9p/pJNcg/wKwa/XHc265YDlwCfrKpPtFjexLB/7B/7R+qHzj3FWVVvAA+2XUdLnk1yJfN/yT63rw/2wOeBDcBvmmMuYCeDOSgvaLOwSWL/2D/2j9QPnTuDNs2SvIvBl+x5DG70hTe/ZGeq6uW2ahuHZoiIFcCDVfXanPXn9P3ynIZn/9g/Up8Y0CZEki9X1Y/brmNUknwFuAzYBpwKXFFVm5ttj1bVh9usT5PN/rF/pEljQJsQEWXaxQAAANVJREFUSf5SVSvbrmNUkjwBfLSqXkuyCrgV+GlVXZvksao6rdUCNdHsH/tHmjSduwdtmiX5w0KbgOXjrKUFh+y9LFNVs0nOAG5NciKD45f2yf6xf6Q+MaB1y3LgUwzGLporwO/GX85Y7UxyalU9DtCcCTgXuAH4YLulaULYP/aP1BsGtG65HTh875fsXEkeGH85Y3UxsGfuiqraA1yc5Lp2StKEsX/msH+kyeY9aJIkSR3TtameJEmSpp4BTZIkqWMMaJIkSR1jQJMkSeoYA5okSVLH/Adq2J2Y2TdD4QAAAABJRU5ErkJggg==",
- "text/plain": [
- ""
- ]
- },
- "metadata": {
- "needs_background": "light"
- },
- "output_type": "display_data"
- }
- ],
- "source": [
- "axes = df.hist(column='day_hour', by='year', figsize=(10,10))"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "#### Scatter plots\n",
- "\n",
- "They are useful to plot the relation between two variables in your dataset."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 176,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " created_at \n",
- " text \n",
- " tweet_link \n",
- " week_day \n",
- " day_hour \n",
- " tweet_mentions \n",
- " n_mentions \n",
- " \n",
- " \n",
- " id \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " 849636868052275200 \n",
- " 2017-04-05 14:56:29 \n",
- " b'And so the robots spared humanity ... https://t.co/v7JUJQWfCv' \n",
- " https://twitter.com/i/web/status/849636868052275200 \n",
- " Wednesday \n",
- " 14 \n",
- " [] \n",
- " 0 \n",
- " \n",
- " \n",
- " 848988730585096192 \n",
- " 2017-04-03 20:01:01 \n",
- " b\"@ForIn2020 @waltmossberg @mims @defcon_5 Exactly. Tesla is absurdly overvalued if based on the past, but that's irr\\xe2\\x80\\xa6 https://t.co/qQcTqkzgMl\" \n",
- " https://twitter.com/i/web/status/848988730585096192 \n",
- " Monday \n",
- " 20 \n",
- " [@ForIn2020, @waltmossberg, @mims, @defcon_5] \n",
- " 4 \n",
- " \n",
- " \n",
- " 848943072423497728 \n",
- " 2017-04-03 16:59:35 \n",
- " b'@waltmossberg @mims @defcon_5 Et tu, Walt?' \n",
- " https://twitter.com/i/web/status/848943072423497728 \n",
- " Monday \n",
- " 16 \n",
- " [@waltmossberg, @mims, @defcon_5] \n",
- " 3 \n",
- " \n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " created_at \\\n",
- "id \n",
- "849636868052275200 2017-04-05 14:56:29 \n",
- "848988730585096192 2017-04-03 20:01:01 \n",
- "848943072423497728 2017-04-03 16:59:35 \n",
- "\n",
- " text \\\n",
- "id \n",
- "849636868052275200 b'And so the robots spared humanity ... https://t.co/v7JUJQWfCv' \n",
- "848988730585096192 b\"@ForIn2020 @waltmossberg @mims @defcon_5 Exactly. Tesla is absurdly overvalued if based on the past, but that's irr\\xe2\\x80\\xa6 https://t.co/qQcTqkzgMl\" \n",
- "848943072423497728 b'@waltmossberg @mims @defcon_5 Et tu, Walt?' \n",
- "\n",
- " tweet_link \\\n",
- "id \n",
- "849636868052275200 https://twitter.com/i/web/status/849636868052275200 \n",
- "848988730585096192 https://twitter.com/i/web/status/848988730585096192 \n",
- "848943072423497728 https://twitter.com/i/web/status/848943072423497728 \n",
- "\n",
- " week_day day_hour \\\n",
- "id \n",
- "849636868052275200 Wednesday 14 \n",
- "848988730585096192 Monday 20 \n",
- "848943072423497728 Monday 16 \n",
- "\n",
- " tweet_mentions n_mentions \n",
- "id \n",
- "849636868052275200 [] 0 \n",
- "848988730585096192 [@ForIn2020, @waltmossberg, @mims, @defcon_5] 4 \n",
- "848943072423497728 [@waltmossberg, @mims, @defcon_5] 3 "
- ]
- },
- "execution_count": 176,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df.head(3)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 357,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "image/png": "iVBORw0KGgoAAAANSUhEUgAAAl4AAAGDCAYAAAD6aR7qAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAgAElEQVR4nO3dfZzVdZ338ffbEXUylSXRhECMDNcV00KBC6/WtgzTtlh3t5YLKtvC2u3+hlaTR+peuLZR1j62m2sly1qI8iqb3GwlM+mGC0iMckwlxRtwKKGIUJu8GT/XH78feDjMnJlz+M7vd86c1/PxmIdzvufucz4zOm9/dx9HhAAAADD8Dii7AAAAgHZB8AIAACgIwQsAAKAgBC8AAICCELwAAAAKQvACAAAoCMELaEK2r7G9uKT3tu0v2v6d7Z+UUUMttn9h+8yy66iX7bD9grLr6I/txbZ/Y/vXQ3z8pbaXDVMt59v+8XC8NtAMCF7AENh+wPY224dWrL3V9qoSyxouZ0g6S9LzIuL0MgvpL4BGxJ9FxKqSShpxbE+U9AFJJ0bEc/u5/0zbDxVfGTAyEbyAoeuQ9J6yi6iX7Y46n3KspAci4rHhqAfDy/aBdT5loqTfRsS24agHwN4IXsDQLZH0Qdujq++wPSnflXRgxdoq22/Nvz/f9mrbn7S90/Z9tv9Hvr4l35r2pqqXPdL2TbYfsf0D28dWvPYJ+X07bG+0/bqK+66x/Tnb37H9mKSX9VPvONvX58+/1/aCfP0tkj4vaabtR21f1s9z6/ostg+2/XHbm20/bPv/2O7M7zvT9kO2P5A/71e235zfd4GkeZI+lNfyX/n6A7ZfUfHan7K9Nf/6lO2DB3vt/P5zbN+Z97fH9gf7+awH55/xpIq1sbZ7bR9l+0jb384fs8P2j2zX+u/qK2zfkz/+M7adv+YBthfZfjCv9cu2j6j8HFV1VfbgUttft73M9i5J5/fzOY7IX3N7/h6L8vd8haSbJI3Le3xN1fMOlfTfFfc/antcfvdB+Ws+4mz377SK542z/Y38/e63/e6BGmL7Ofnv4i5nu7YnV93/b/nv1S7bt9n+n/n6c23/wfZzKh774vw9R9X4GQClIngBQ7de0ipJ+/yBHqLpkm6X9BxJX5H0VUmnSXqBpPmSPm372RWPnyfpf0s6UtLPJC2X9vwxvCl/jaMk/Z2kz9o+seK5/0vS5ZIOk9Tf8TJflfSQpHGS/kbSv9j+i4i4WtLbJa2JiGdHxCUJPstHJb1Q0in5/eMlfaTitZ4r6Yh8/S2SPmP7TyLiqvwzfyyv5S/7qeNiSTPy136RpNMlLRrstfP7rpb0tog4TNJJkr5f/eIR8bik6yTNrVh+naQf5FuIPqCsj2MlHS3pw5JqzWF7tbI+nZy/zux8/fz862WSni/p2ZI+XeN1qr1W0tcljVb+e1Ll35X14fmS/lzSGyW9OSK+J+lVkrbmPT6/8kn5Vs/K+58dEVvzu1+j7Oc+WtL1u+vNg+d/Sfq5sr6/XNJ7bc9W/z4j6Y+SjpH09/lXpVuV/XzHKPtd+7+2D4mIXyv79/F1FY99g6SvRsSTA7wXUL6I4Isvvgb5kvSApFco+wP9e2V/aN8qaVV+/yRlf3APrHjOKklvzb8/X9I9FfdNzR9/dMXabyWdkn9/jbI/ILvve7akPkkTJL1e0o+q6vsPSZdUPPfLNT7LhPy1DqtYu0LSNRW1/rjG84f8WSRZ0mOSJlfcN1PS/fn3Z0rqrerbNkkzKj7L4v5+Fvn3mySdU3HfbGW7SYfy2pslvU3S4YP87F8haVPF7dWS3ph//8+SviXpBUP4HQpJZ1TcvlbShfn3N0v6x4r7pkh6UtKB+ed4qEYPLpX0wxrv2yHpCWXHcO1ee5ue+d3d5/Wrnt/f+18q6XsVt0+U1Jt/P13S5qrHXyTpiwPU9qSkEyrW/mWQ37/fSXpR/v3rJa2ueK1fSzp9qP9e88VXGV9s8QLqEBF3SPq2pAsbePrDFd/35q9XvVa5xWtLxfs+KmmHsi1Ux0qanu+u2ml7p7KtY8/t77n9GCdpR0Q8UrH2oLKtE6k/y1hJz5J0W0WtN+bru/02Ip6quP0H7d2HWsblte/2YL42lNf+a0nnSHrQ2a7cmQO8xy2SnmV7uu1JygLlN/P7lki6V9J3ne1yHez3ovKswcpa+vscByrbijYUtX7eR0oa1c/r1/Pz7k/1ZznE2a72Y5Xtmqz8/fyw+v8sY5V9zsr6K+uU7Q/avsv27/PXOiL/TFIWek+0fZyyE0J+HxFNdyYuUKnegzABSJdI+qmkT1Ss7T4Q/VmSduXf73OGWJ0m7P4m3203RtJWZX+kfhARZ9V4bq3dXVsljbF9WEX4miipZz/r7c9vlIWwP4uIRl6/1ueQss9yrKRf5Lcn5muDv3DErZJemx8P9E5lW6Am9PO4PtvXKtvd+LCkb+/uW/7PD0j6QH4c2Pdt3xoRNw+lhn4+x24TJT2Vv984Zb9XkvacLDFWe6vVp98o26p0rKQ7K15/qD+PwX4G1bYo26J5/BAeu13Z55wg6e6K2iRJ+fFcH1K2u/IXEfG07d8p25KqiPhj/rOZL+kESf9ZZ61A4djiBdQpIu6V9DVJ765Y267sD9l82x22/15VBwk34BzbZ9g+SNmxXmsjYouyLW4vtP0G26Pyr9Ns/+kQ698i6f9JusL2IbZPVnb8U/LrMkXE05KWSvqk7aMkyfb4Gsf7VHtY2XFJA1khaVF+wPuRyo4dG/Rz2D7I9jzbR0R2PNAuSU/XeMpXlO3Wmpd/v/t1Xm37BflB8r9Xtgu31uvU+hzvs31cHrL/RdLX8q11v1S2NencPCQuknTwUF84IvqUhcrLbR/m7CSN92voP++HJT1n98H+Q/ATSY/Y/ifbnfm/DyfZPm2A2q6TdKntZ+XHKVaeZHKYsmC2XdKBtj8i6fCql/myst3frxHBCy2A4AU05p8lHVq1tkDSQmXHN/2ZsnCzP76ibOvaDkkvUfZ/9bu3srxS2UH1W5Xt8vlX1fHHWNnWm0n587+p7Piw7+1nvQP5J2W749bmZ919T9kxTENxtbJdSTttd/Vz/2JlJz3cLqlb2ZbIoV549g2SHshreruyUNWviFinbKvmOGVn+e12vLLP86ikNZI+GxG3DPH9K31BWWj4oaT7lR1s/q78vX8v6R+VnW3ak9dR73W13pU/7z5lJ1t8JX/PQUXE3cqC4X35z2HcII/vU3YSwSn5Z/lNXvtAwe2dyna5/lrZMX1frLhvpbJd079Utgvyj6rarRoRq5WF3Z9GxF67KYFm5Ih6tyIDANA8bH9f0lci4vNl1wIMhuAFAGhZ+S7MmyRNqDphBGhK7GoEALQk219Stqv3vYQutAq2eAEAABSELV4AAAAFIXgBAAAUpCUuoHrkkUfGpEmTyi4DAABgULfddttvIqL6QseSWiR4TZo0SevXry+7DAAAgEHZHvCacuxqBAAAKAjBCwAAoCAELwAAgIIQvAAAAApC8AIAACgIwQsAAKAgBC8AAICCELwAAAAKQvACAAAoCMELAACgIC0xMmi4Tbrwhn3WHvjouSVUMjLQz/ToaVrTL79JDz/yxJ7bRx92kNZdfFaJFQFoF22/xau/P2i11lEb/UyPnqZVHbok6eFHntD0y28qqSIA7aTtgxeA9lIdugZbB4CUCF4AAAAFIXgBAAAUhOAFoK0cfdhBda0DQEptH7wGOjOMM8YaQz/To6dprbv4rH1CFmc1AiiKI6LsGgY1bdq0WL9+fdllAAAADMr2bRExrb/72n6LFwAAQFEIXgAAAAUheAEAABSE4AUAAFAQZjWKOXip0c/06GlaZ125Svdse2zP7eOPOlQ3vf/M8gpqcV0berRk5UZt3dmrcaM7tXD2FM05dXzZZQFNqe23eDEHLy36mR49Tas6dEnSPdse01lXriqnoBbXtaFHF13XrZ6dvQpJPTt7ddF13era0FN2aUBTavvgBaC9VIeuwdZR25KVG9X7ZN9ea71P9mnJyo0lVQQ0N4IXAKBhW3f21rUOtDuCFwCgYeNGd9a1DrQ7gheAtnL8UYfWtY7aFs6eos5RHXutdY7q0MLZU0qqCGhubR+8mIOXFv1Mj56mddP7z9wnZHFWY+PmnDpeV5w3VeNHd8qSxo/u1BXnTeWsRmAAzGoEAABIiFmNAAAATYDgBQAAUBCCFwAAQEEIXgAAAAUheAEAABSEIdliAHFq9DM9eprWyZfcqF2PPzPm5vCDO3T7ZWeXWFFrm7d0jVZv2rHn9qzJY7R8wcwSK2p9i7q6tWLdFvVFqMPW3OkTtHjO1LLLQgJtv8WLAcRp0c/06Gla1aFLknY93qeTL7mxpIpaW3XokqTVm3Zo3tI1JVXU+hZ1dWvZ2s3qyy/31BehZWs3a1FXd8mVIYW2D14A2kt16BpsHbVVh67B1jG4Feu21LWO1kLwAgCgifQNcGHzgdbRWgheAAA0kQ67rnW0FoIXgLZy+MEdda2jtlmTx9S1jsHNnT6hrnW0lrYPXgwgTot+pkdP07r9srP3CVmc1di45Qtm7hOyOKtx/yyeM1XzZ0zcs4Wrw9b8GRM5q3GEYEg2AABAQgzJBgAAaAIELwAAgIIQvAAAAApC8AIAACgIwQsAAKAgwzYk2/YESV+WdLSkkHRVRPyb7TGSviZpkqQHJL0uIn43XHUMBQOI06Kf6dHTtF5w0Q16quKE7gMt3XsF/WwUA53R7Lo29GjJyo3aurNX40Z3auHsKZpz6vhSahnOLV5PSfpARJwoaYakd9g+UdKFkm6OiOMl3ZzfLg0DiNOin+nR07SqQ5ckPRXZOurHQGc0u64NPbroum717OxVSOrZ2auLrutW14aeUuoZtuAVEb+KiJ/m3z8i6S5J4yW9VtKX8od9SdKc4aoBAKpVh67B1lEbA53R7Jas3KjeJ/v2Wut9sk9LVm4spZ5CjvGyPUnSqZLWSTo6In6V3/VrZbsi+3vOBbbX216/ffv2IsoEANSJgc5odlt39ta1PtyGPXjZfrakb0h6b0Tsqrwvssvm9/tvZ0RcFRHTImLa2LFjh7tMAEADGOiMZjdudGdd68NtWIOX7VHKQtfyiLguX37Y9jH5/cdI2jacNQBApQMHyAMDraM2Bjqj2S2cPUWdo/aez9o5qkMLZ08ppZ5hC162LelqSXdFxJUVd10v6U3592+S9K3hqmEoGECcFv1Mj56mde8V5+4TsjirsXEMdEazm3PqeF1x3lSNH90pSxo/ulNXnDe1tLMah21Itu0zJP1IUrekp/PlDys7zutaSRMlPajschI7ar0WQ7IBAECrqDUke9iu4xURP5Y00Mb7lw/X+wIAADQrrlwPAABQEIIXAABAQQheAAAABSF4AQAAFGTYDq5vJQwgTot+pkdP05q3dI1Wb3rmZOpZk8do+YKZJVbU2pppADHQ7Np+ixcDiNOin+nR07SqQ5ckrd60Q/OWrimpotbWbAOIgWbX9sELQHupDl2DraO2ZhtADDQ7ghcAoGHNNoAYaHYELwBAw5ptADHQ7AheANrKrMlj6lpHbc02gBhodm0fvBhAnBb9TI+eprV8wcx9QhZnNTau2QYQA81u2IZkp8SQbAAA0CpqDclu+y1eAAAARSF4AQAAFITgBQAAUBCCFwAAQEEIXgAAAAVhSLYYQJwa/UyPnqZ18iU3atfjz4y5OfzgDt1+2dklVtTaGJINDF3bb/FiAHFa9DM9eppWdeiSpF2P9+nkS24sqaLWxpBsoD5tH7wAtJfq0DXYOmpjSDZQH4IXAKBhDMkG6kPwAgA0jCHZQH0IXgDayuEHd9S1jtoYkg3Up+2DFwOI06Kf6dHTtG6/7Ox9QhZnNTaOIdlAfRiSDQAAkBBDsgEAAJoAwQsAAKAgBC8AAICCELwAAAAKQvACAAAoCEOyxQDi1OhnevQ0rRMu/o7+2PfMGd2HdFh3X35OiRW1tkVd3Vqxbov6ItRha+70CVo8Z2rZZQFNqe23eDGAOC36mR49Tas6dEnSH/tCJ1z8nZIqam2Lurq1bO1m9eWXJuqL0LK1m7Woq7vkyoDm1PbBC0B7qQ5dg62jthXrttS1DrQ7ghcAoGF9A1yEe6B1oN0RvAAADeuw61oH2h3BC0BbOaSj/0Aw0Dpqmzt9Ql3rQLtr++DFAOK06Gd69DStuy8/Z5+QxVmNjVs8Z6rmz5i4ZwtXh635MyZyViMwAIZkAwAAJMSQbAAAgCZA8AIAACgIwQsAAKAgBC8AAICCELwAAAAKwpBsMYA4NfqZHj1N66wrV+mebY/tuX38UYfqpvefWV5BLW7e0jVavWnHntuzJo/R8gUzS6wIaF5tv8WLAcRp0c/06Gla1aFLku7Z9pjOunJVOQW1uOrQJUmrN+3QvKVrSqoIaG5tH7wAtJfq0DXYOmqrDl2DrQPtjuAFAABQEIIXAABAQQheANrK8UcdWtc6aps1eUxd60C7a/vgxQDitOhnevQ0rZvef+Y+IYuzGhu3fMHMfUIWZzUCA2NINgAAQEIMyQYAAGgCBC8AAICCELwAAAAKQvACAAAoyLAFL9tfsL3N9h0Va5fa7rH9s/zrnOF6fwAAgGYznEOyr5H0aUlfrlr/ZER8fBjft24MIE6LfqZHT9M6+ZIbtevxvj23Dz+4Q7dfdnaJFQF7W9TVrRXrtqgvQh225k6foMVzppZdFhIYti1eEfFDSU0/rIsBxGnRz/ToaVrVoUuSdj3ep5MvubGkioC9Lerq1rK1m9WXX+6pL0LL1m7Woq7ukitDCmUc4/VO27fnuyL/pIT3B9DGqkPXYOtA0Vas21LXOlpL0cHrc5ImSzpF0q8kfWKgB9q+wPZ62+u3b99eVH0AAJSqb4ALmw+0jtZSaPCKiIcjoi8inpa0VNLpNR57VURMi4hpY8eOLa5IAABK1GHXtY7WUmjwsn1Mxc2/knTHQI8FgOFw+MEdda0DRZs7fUJd62gtw3k5iRWS1kiaYvsh22+R9DHb3bZvl/QySe8brvcfKgYQp0U/06Onad1+2dn7hCzOakQzWTxnqubPmLhnC1eHrfkzJnJW4wjBkGwAAICEGJINAADQBAheAAAABSF4AQAAFITgBQAAUBCCFwAAQEGGc0h2y2AAcVr0M70XXHSDnqo4AflAS/deQU8bxQDitOYtXaPVm54ZzTtr8hgtXzCzxIqA5tX2W7wYQJwW/UyvOnRJ0lORraN+DCBOqzp0SdLqTTs0b+makioCmlvbBy+g2VWHrsHWURsDiNOqDl2DrQPtjuAFoK0wgBhAmQheANoKA4gBlIngBTS5AwfIAwOtozYGEKc1a/KYutaBdtf2wYsBxGnRz/TuveLcfUIWZzU2jgHEaS1fMHOfkMVZjcDAGJINAACQEEOyAQAAmgDBCwAAoCAELwAAgIIQvAAAAArCrEYxWzA15gqm17WhR0tWbtTWnb0aN7pTC2dP0ZxTx5ddFiCJ30+gHjW3eNnusL28qGLKwGzBtJgrmF7Xhh5ddF23enb2KiT17OzVRdd1q2tDT9mlAfx+AnWqGbwiok/SsbYPKqgetDjmCqa3ZOVG9T7Zt9da75N9WrJyY0kVAc/g9xOoz1B2Nd4nabXt6yU9tnsxIq4ctqoA7LF1Z29d60CR+P0E6jOUg+s3Sfp2/tjDKr4AFGDc6M661oEi8fsJ1GfQLV4RcVkRhWBkOND971ZkrmDjFs6eoouu695rd07nqA4tnD2lxKqADL+fQH0G3eJl+xbb36/+KqK4IjBbMC3mCqY359TxuuK8qRo/ulOWNH50p644bypnjaEp8PsJ1GfQWY22X1Jx8xBJfy3pqYj40HAWVolZjQAAoFXUmtU4lF2Nt1Utrbb9kySVAQAAtJFBg5ftMRU3D5D0EklHDFtFAAAAI9RQLidxm6SQZElPSbpf0luGsygAAICRaCi7Go8rohAAAICRbii7GkdJ+gdJL82XVkn6j4h4chjrAgAAGHGGsqvxc5JGSfpsfvsN+dpbh6uoojEkO62TL7lRux5/5po+hx/codsvO7vEilofQ4gBYGQYypXrT4uIN0XE9/OvN0s6bbgLKwpDstOqDl2StOvxPp18yY0lVdT6GEIMACPHUIJXn+3Ju2/Yfr6kvhqPRxurDl2DrWNwDCEGgJFjKLsaF0q6xfZ9ys5sPFbSm4e1KgB7MIQYAEaOoZzVeLPt4yXtHry1MSIeH96yAOw2bnSnevoJWQwhBoDWM5RdjVJ20dSTJJ0i6fW23zh8JaGVHX5wR13rGNzC2VPUOWrv/jGEGABa01CGZP+npI9LOkPZQfWnSep3/lArYkh2WrdfdvY+IYuzGvcPQ4gBYOQYypDsuySdGIM9cBgxJBsAALSKWkOyh7Kr8Q5Jz01bEgAAQPsZ8OB62/+lbEbjYZLutP0TSXsOqo+I1wx/eQAAACNHrbMaP15YFQAAAG1gwOAVET8oshAAAICRbqiXkwAAAMB+GsqV60c8hjqnRT/Tm7d0jVZv2rHn9qzJY7R8wcwSKwIANGIo1/H6S9sjdssYQ53Top/pVYcuSVq9aYfmLV1TUkUAgEYNJVC9XtI9tj9m+4ThLqhoDHVOi36mVx26BlsHADSvQYNXRMyXdKqkTZKusb3G9gW2Dxv26gAAAEaQIe1CjIhdkr4u6auSjpH0V5J+avtdw1gbAADAiDKUY7xeY/ubklZJGiXp9Ih4laQXSfrA8JY3/BjqnBb9TG/W5DF1rQMAmtdQtnj9taRPRsTUiFgSEdskKSL+IOktw1pdARjqnBb9TG/5gpn7hCzOagSA1jTokOxmwJBsAADQKvZrSLbtGbZvtf2o7Sds99nelb5MAACAkW0ouxo/LWmupHskdUp6q6TPDGdRAAAAI9FQz2q8V1JHRPRFxBclccAOAABAnYYyMugPtg+S9DPbH5P0KzHjEQAAoG5DCVBvyB/3TkmPSZqg7ExHAAAA1GHQLV4R8aDtsfn3lw31hW1/QdKrJW2LiJPytTGSviZpkqQHJL0uIn5Xf9lpTbrwhn3WHvjouSVUMjLQz/QWdXVrxbot6otQh6250ydo8ZypZZcFAKjTgFu8nLnU9m8kbZT0S9vbbX9kiK99jfY9FuxCSTdHxPGSbs5vl6q/kFBrHbXRz/QWdXVr2drN6ssv/dIXoWVrN2tRV3fJlQEA6lVrV+P7JM2SdFpEjImIP5E0XdIs2+8b7IUj4oeSqqf4vlbSl/LvvyRpTv0lA+1lxbotda0DAJpXreD1BklzI+L+3QsRcZ+k+ZLe2OD7HR0Rv8q//7Wkowd6YD6Ie73t9du3b2/w7YDW1zfARY4HWgcANK9awWtURPymejEitiub2bhfIrtk/oB/OSLiqoiYFhHTxo4du79vB7SsDruudQBA86oVvJ5o8L5aHrZ9jCTl/9zW4OsAbWPu9Al1rQMAmlet4PUi27v6+XpEUqOnU10v6U3592+S9K0GXyeZgc624yy8xtDP9BbPmar5Mybu2cLVYWv+jImc1QgALWjYhmTbXiHpTElHSnpY0iWSuiRdK2mipAeVXU6i+gD8fTAkGwAAtIpaQ7KHcuX6hkTE3AHuevlwvScAAEAzY/QPAABAQQheAAAABSF4AQAAFITgBQAAUJBhO7i+lTDUOS36md5xF96w19WGLel+egoALaftt3gx1Dkt+pledeiSspEPx9FTAGg5bR+8gGY30JX2mNQIAK2H4AUAAFAQghcAAEBBCF5Ak3Od6wCA5tX2wYuhzmnRz/Tu/+i5+4QszmoEgNY0bEOyU2JINgAAaBW1hmS3/RYvAACAohC8AAAACkLwAgAAKAjBCwAAoCAELwAAgIIwJFsMdU6NfqZ3wsXf0R/7njkD+ZAO6+7LzymxotY2b+kard60Y8/tWZPHaPmCmSVW1NoWdXVrxbot6otQh6250ydo8ZypZZcFNKW23+LFUOe06Gd61aFLkv7YFzrh4u+UVFFrqw5dkrR60w7NW7qmpIpa26Kubi1bu1l9+aWJ+iK0bO1mLerqLrkyoDm1ffACml116BpsHbVVh67B1lHbinVb6loH2h3BCwDQsL4BLsI90DrQ7gheAICGdbj/qaEDrQPtjuAFNLlDOvr/AzbQOmqbNXlMXeuobe70CXWtA+2u7YMXQ53Top/p3X35OfuELM5qbNzyBTP3CVmc1di4xXOmav6MiXu2cHXYmj9jImc1AgNgSDYAAEBCDMkGAABoAgQvAACAghC8AAAACkLwAgAAKAjBCwAAoCAMyRZDnVOjn+nR07S6NvRoycqN2rqzV+NGd2rh7Cmac+r4sstqWfQTGLq23+LFUOe06Gd69DStrg09uui6bvXs7FVI6tnZq4uu61bXhp6yS2tJ9BOoT9sHLwDtZcnKjep9sm+vtd4n+7Rk5caSKmpt9BOoD8ELQFvZurO3rnXURj+B+hC8ALSVcaM761pHbfQTqA/BC0BbWTh7ijpHdey11jmqQwtnTympotZGP4H6tH3wYqhzWvQzPXqa1pxTx+uK86Zq/OhOWdL40Z264rypnIXXIPoJ1Ich2QAAAAkxJBsAAKAJELwAAAAKQvACAAAoCMELAACgIAQvAACAgjAkWwwgTo1+pnfchTeo8vxjS7qfnjZsUVe3Vqzbor4IddiaO32CFs+ZWnZZLYt+AkPX9lu8GECcFv1Mrzp0SVLk66jfoq5uLVu7WX35pXT6IrRs7WYt6uouubLWRD+B+rR98AKa3UBX2mv+K/A1pxXrttS1jtroJ1AfgheAttI3wEWjB1pHbfQTqA/BC0Bb6bDrWkdt9BOoD8ELaHID/fniz1pj5k6fUNc6aqOfQH3aPngxgDgt+pne/R89d5+QxVmNjVs8Z6rmz5i4Z4tMh635MyZyFl6D6CdQH4ZkAwAAJMSQbAAAgCZA8AIAACgIwQsAAKAgBC8AAICClDKr0fYDkh6R1CfpqYEOQCsKswXTop/pnXzJjdr1eN+e24cf3KHbLzu7xIpaG7MFAZSlzC1eL4uIU5oxdNVaR230M73q0CVJux7v08mX3FhSRa2N2YIAysSuRqDJVYeuwdZRG7MFAZSprOAVkr5r+1jI5tQAAA0+SURBVDbbF/T3ANsX2F5ve/327dsLLg/ASMVsQQBlKit4nRERL5b0KknvsP3S6gdExFURMS0ipo0dO7b4CgGMSMwWBFCmUoJXRPTk/9wm6ZuSTi+jDqAVHH5wR13rqI3ZggDKVHjwsn2o7cN2fy/plZLuKLqO3ZgtmBb9TO/2y87eJ2RxVmPjmC0IoEyFz2q0/XxlW7mk7HIWX4mIy2s9h1mNAACgVdSa1Vj4dbwi4j5JLyr6fQEAAMrG5SQAAAAKQvACAAAoCMELAACgIAQvAACAgpQyJLvZMNQ5LfqZHkOy02JIdlr0M72uDT1asnKjtu7s1bjRnVo4e4rmnDq+7LJaVjP1s+23eDHUOS36mR5DstNiSHZa9DO9rg09uui6bvXs7FVI6tnZq4uu61bXhp6yS2tJzdbPtg9eQLNjSHZaDMlOi36mt2TlRvU+ufe/371P9mnJyo0lVdTamq2fBC8AbYUh2WnRz/S27uytax21NVs/CV4A2gpDstOin+mNG91Z1zpqa7Z+EryAJseQ7LQYkp0W/Uxv4ewp6hy197/fnaM6tHD2lJIqam3N1s+2D14MdU6LfqbHkOy0GJKdFv1Mb86p43XFeVM1fnSnLGn86E5dcd5UzmpsULP1s/Ah2Y1gSDYAAGgVtYZkt/0WLwAAgKIQvAAAAApC8AIAACgIwQsAAKAgBC8AAICCMCRbDHVOjX6mR0/Tmrd0jVZv2rHn9qzJY7R8wcwSK2ptzTSAeKSgpyNX22/xYqhzWvQzPXqaVnXokqTVm3Zo3tI1JVXU2pptAPFIQE9HtrYPXgDaS3XoGmwdtTXbAOKRgJ6ObAQvAEDDmm0A8UhAT0c2ghcAoGHNNoB4JKCnIxvBC0BbmTV5TF3rqK3ZBhCPBPR0ZGv74MVQ57ToZ3r0NK3lC2buE7I4q7FxzTaAeCSgpyMbQ7IBAAASYkg2AABAEyB4AQAAFITgBQAAUBCCFwAAQEEIXgAAAAVhSLYYQJwa/UyPnqY1/fKb9PAjT+y5ffRhB2ndxWeVWFFrY6AzMHRtv8WLAcRp0c/06Gla1aFLkh5+5AlNv/ymkipqbQx0BurT9sELQHupDl2DraM2BjoD9SF4AQAaxkBnoD4ELwBAwxjoDNSH4AWgrRx92EF1raM2BjoD9Wn74MUA4rToZ3r0NK11F5+1T8jirMbGMdAZqA9DsgEAABJiSDYAAEATIHgBAAAUhOAFAABQEIIXAABAQQheAAAABWFIthhAnBr9TI+epsWQ7LQWdXVrxbot6otQh6250ydo8ZypZZcFNKW23+LFAOK06Gd69DQthmSntairW8vWblZffmmivggtW7tZi7q6S64MaE5tH7wAtBeGZKe1Yt2WutaBdkfwAgA0rG+Ai3APtA60O4IXAKBhHXZd60C7I3gBaCsMyU5r7vQJda0D7a7tgxcDiNOin+nR07QYkp3W4jlTNX/GxD1buDpszZ8xkbMagQEwJBsAACAhhmQDAAA0AYIXAABAQQheAAAABSF4AQAAFKSU4GX7bNsbbd9r+8IyagAAACha4UOybXdI+oyksyQ9JOlW29dHxJ1F17IbA4jTop/p0dO0zrpyle7Z9tie28cfdahuev+Z5RUEoG2UscXrdEn3RsR9EfGEpK9Kem0JdUhiAHFq9DM9eppWdeiSpHu2PaazrlxVTkEA2koZwWu8pMrpqQ/lawAw7KpD12DrAJBS0x5cb/sC2+ttr9++fXvZ5QAAAOy3MoJXj6TKIV7Py9f2EhFXRcS0iJg2duzYwooDAAAYLmUEr1slHW/7ONsHSfo7SdeXUAeANnT8UYfWtQ4AKRUevCLiKUnvlLRS0l2Sro2IXxRdx24MIE6LfqZHT9O66f1n7hOyOKsRQFEYkg0AAJAQQ7IBAACaAMELAACgIAQvAACAghC8AAAACkLwAgAAKAjBCwAAoCAELwAAgIIQvAAAAApC8AIAACgIwQsAAKAgLTEyyPZ2SQ8W8FZHSvpNAe/TLuhnevQ0LfqZFv1Mj56mVVQ/j42Isf3d0RLBqyi21w80Wwn1o5/p0dO06Gda9DM9eppWM/STXY0AAAAFIXgBAAAUhOC1t6vKLmCEoZ/p0dO06Gda9DM9eppW6f3kGC8AAICCsMULAACgIAQvSbbPtr3R9r22Lyy7nlZn+wu2t9m+o+xaRgLbE2zfYvtO27+w/Z6ya2p1tg+x/RPbP897elnZNY0Etjtsb7D97bJraXW2H7DdbftntteXXc9IYHu07a/bvtv2XbZnllJHu+9qtN0h6ZeSzpL0kKRbJc2NiDtLLayF2X6ppEclfTkiTiq7nlZn+xhJx0TET20fJuk2SXP4HW2cbUs6NCIetT1K0o8lvSci1pZcWkuz/X5J0yQdHhGvLrueVmb7AUnTIoJreCVi+0uSfhQRn7d9kKRnRcTOoutgi5d0uqR7I+K+iHhC0lclvbbkmlpaRPxQ0o6y6xgpIuJXEfHT/PtHJN0laXy5VbW2yDya3xyVf7X3/4XuJ9vPk3SupM+XXQtQzfYRkl4q6WpJiognyghdEsFLyv6Abam4/ZD4o4YmZXuSpFMlrSu3ktaX7xb7maRtkm6KCHq6fz4l6UOSni67kBEiJH3X9m22Lyi7mBHgOEnbJX0x3x3+eduHllEIwQtoEbafLekbkt4bEbvKrqfVRURfRJwi6XmSTrfNbvEG2X61pG0RcVvZtYwgZ0TEiyW9StI78kM40LgDJb1Y0uci4lRJj0kq5ZhugpfUI2lCxe3n5WtA08iPQ/qGpOURcV3Z9Ywk+e6GWySdXXYtLWyWpNfkxyV9VdJf2F5WbkmtLSJ68n9uk/RNZYfFoHEPSXqoYsv215UFscIRvLKD6Y+3fVx+sN3fSbq+5JqAPfIDwa+WdFdEXFl2PSOB7bG2R+ffdyo7uebucqtqXRFxUUQ8LyImKftv6PcjYn7JZbUs24fmJ9Io3x32SkmcJb4fIuLXkrbYnpIvvVxSKScoHVjGmzaTiHjK9jslrZTUIekLEfGLkstqabZXSDpT0pG2H5J0SURcXW5VLW2WpDdI6s6PSZKkD0fEd0qsqdUdI+lL+VnNB0i6NiK4BAKaxdGSvpn9P5cOlPSViLix3JJGhHdJWp5vZLlP0pvLKKLtLycBAABQFHY1AgAAFITgBQAAUBCCFwAAQEEIXgAAAAUheAEAABSE4AWgbrbD9icqbn/Q9qWJXvsa23+T4rUGeZ+/tX2X7VuG+73y9xtt+x8rbo+z/fUi3htA8yB4AWjE45LOs31k2YVUsl3PtQnfImlBRLxsuOqpMlrSnuAVEVsjYtgDJoDmQvAC0IinJF0l6X3Vd1RvsbL9aP7PM23/wPa3bN9n+6O259n+ie1u25MrXuYVttfb/mU+B3D3UOsltm+1fbvtt1W87o9sX69+rkRte27++nfY/td87SOSzpB0te0lVY8fUp351e+/kddzq+1Z+fqltr9ge1X+/HfnL/1RSZNt/yz/HJNs35E/5xDbX8xff4Ptl+Xr59u+zvaNtu+x/bGKXlyTf6Zu2/v8HAA0p7a/cj2Ahn1G0u27w8AQvUjSn0raoezK0Z+PiNNtv0fZVaXfmz9ukrLZdJMl3WL7BZLeKOn3EXGa7YMlrbb93fzxL5Z0UkTcX/lmtsdJ+ldJL5H0O0nftT0nIv7Z9l9I+mBErG+wzn+T9MmI+LHticqmX/xp/vwTJL1M0mGSNtr+nLKBvCflg7lle1LF+71DUkTEVNsn5HW+ML/vFEmnKtvKuNH2v0s6StL4iDgpf63RNXoOoIkQvAA0JCJ22f6ypHdL6h3i026NiF9Jku1NknYHp25lQWW3ayPiaUn32L5PWZB5paSTK7amHSHpeElPSPpJdejKnSZpVURsz99zuaSXSupKUOcrJJ2Yj3WRpMNtPzv//oaIeFzS47a3KRsBU8sZkv5dkiLibtsPStodvG6OiN/ntdwp6VhJv5D0/DyE3VBRH4AmR/ACsD8+Jemnkr5YsfaU8sMYbB8g6aCK+x6v+P7pittPa+//HlXPMgtJlvSuiFhZeYftMyU91lj5AxpKnQdImhERf6yqp/r5fdq//9bu81oR8TvbL5I0W9LbJb1O0t/vx3sAKAjHeAFoWETskHStsgPVd3tA2a49SXqNpFENvPTf2j4gP57q+ZI2KtuV9w+2R0mS7RfaPnSQ1/mJpD+3fWQ+EHuupB80UE9/vqtst6Pyek4Z5PGPKNv12J8fSZqXv84LJU1U9pn7lZ/UcEBEfEPSImW7WgG0AIIXgP31CUmVZzcuVRZ2fi5pphrbGrVZWWj6b0lvz7cqfV7ZwfM/zQ9K/w8NsiUp3114oaRbJP1c0m0R8a0G6unPuyVNyw/0v1PZlqdatfxW2XFpd1Qf0C/ps5IOsN0t6WuSzs93VQ5kvKRVtn8maZmkixr+FAAK5YjqLfoAAAAYDmzxAgAAKAjBCwAAoCAELwAAgIIQvAAAAApC8AIAACgIwQsAAKAgBC8AAICCELwAAAAK8v8BGAnb7ptrn1kAAAAASUVORK5CYII=",
- "text/plain": [
- ""
- ]
- },
- "metadata": {
- "needs_background": "light"
- },
- "output_type": "display_data"
- }
- ],
- "source": [
- "%matplotlib inline\n",
- "plt.figure(figsize=(10, 6))\n",
- "\n",
- "# specify the type of plot and the two\n",
- "# variables to be plotted against one another\n",
- "plt.scatter(df.n_mentions, df.day_hour)\n",
- "\n",
- "# give a title to the plot\n",
- "plt.title('Number of mentions vs hour of the day')\n",
- "\n",
- "# give a label to the axes\n",
- "plt.ylabel(\"Day hour\")\n",
- "plt.xlabel(\"Number of mentions\")\n",
- "plt.show()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "#### Bar charts\n",
- "\n",
- "They are useful to plot categorical data."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 170,
- "metadata": {},
- "outputs": [],
- "source": [
- "plt.bar?"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 343,
- "metadata": {},
- "outputs": [],
- "source": [
- "tweets_by_weekday = df.groupby(df.created_at.dt.weekday)[['text']].count()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 344,
- "metadata": {},
- "outputs": [],
- "source": [
- "week_days = [\n",
- " \"Mon\",\n",
- " \"Tue\",\n",
- " \"Wed\",\n",
- " \"Thur\",\n",
- " \"Fri\",\n",
- " \"Sat\",\n",
- " \"Sun\"\n",
- "]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 360,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "image/png": "iVBORw0KGgoAAAANSUhEUgAAAfQAAAGDCAYAAADd8eLzAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAgAElEQVR4nO3deZglZX328e8toKCMso2E1SGRuEYJjoq4oBIXRIVEcFcwRDTRhLwaFRW3l6jkjUpCTAwoJmiMSFAQhUQQBVQUGRRZXOIEIawyKMsAig783j+qWs+03T2nmzm9PP39XFdfXfVUnapf9enr3KeeU6eeVBWSJGlhu8dcFyBJku4+A12SpAYY6JIkNcBAlySpAQa6JEkNMNAlSWqAga5FL8mBSb4613XMRJJK8sC5rmM6klye5A/muo7pSnKvJLcm2Xaua5EmYqBrUehD5Gf9C/LYzwdHvM8n94F70rj2R/btZ414/95kAkjykoHn/GdJ7hr8Pxh2O1V1R1VtWlXX9Ns9Pslh4/Z1XZInrO9jkIZhoGsxeU7/gjz289pZ2Ocq4HFJthxoOwD471nYt4Cq+sTYcw7sBVwz+H8w1/WNSXKPJL4ma8b855HGSbJ7kvOT3Nz/3n1g2VlJDk/ytSSrk5yeZKspNvcL4GTghf3jNwBeAHxiYJvL+jP2Dcft50/66QcmObuv54Ykn5qk7ickuTLJkydYdmCSy/qaf5TkJROss3F/BrtVP//WJGuS3LefPzzJ3/XT90ryviT/m+THSf45ySYD23p2kguT3JTk3CSPmKTmh/T1vGiS5evzuZhQkj9N8h8D81cm+fjA/PVJHtz/fSrJ9kn+Ange8Lb+TP8/+m3cHzi9b/uL/vFPTHJe/7f4VpLHD2z7G0n+b5LzgNsBu/M1Ywa6NCDJFsCpwFHAlsAHgFPHnWG/GHgF3Yv3PYG/WsdmPwa8vJ9+BnAJcM00yjocOB3YHNge+IcJ6n4m8EngeVV1FkBVpV92n/549qqqJcDuwIXjt1FVPwfOB/bom/YArgAePzB/dj99BPC7wC7AA4HtgLf3+/t94KPAq+j+hkcDpyS517iadwW+APx5VX1ygmMaxXMxkbPHjjnJbwNrgCf08w8F7qqq7w8+oKqOAj4NHN6f6e9fVfsD1wNP79uOSrKM7g3dW4EtgMOAk5NsPrC5l9L9fywBrptB/RJgoGtxObk/Sxr7eeUE6+wN/LCqPl5Va/qg+T7wnIF1/qWq/ruqfgacQBdqk6qqc4EtkjyI7oX7Y9Os+5fAA4Btq+rnVTX+Ar796UJzr6r65iTbuAt4eJJNquraqrp0kvXOBvboewseQRemeyTZGHg0cE6SAAcD/6eqflpVq4H30PdC9MuOrqrzqurOqjoOuAPYbWA/TwROAV5eVZ+fpJb1/lxMpKq+C78K7ycBnwNW92G8B3DOdLc54ADgM1X1xaq6q6pOA74LPH1gnY9U1Q+q6pdVteZu7EuLnIGuxWTfqtps4OfDE6yzLd1Z6aAr6M5AxwyeRd0ODPM57MeB1wJPAU5ax7rjvREI8M0klyb543HL/xI4oaoumejBVXUbXTf/q4Frk5ya5MGT7Ots4MnArsDFwBl0obYbsLKqfgIsBe4NXDD25gj4r74dujcfrx988wTswNrdya8Gzh3rTZjEqJ6LiZxDd9xPovsbnEV33IO9EjPxAOCl4/4Wy1n7b3Hl3di+9CsGurS2a+hehAftCFx9N7f7ceDPgNOq6vZxy27rf997oO23xiaq6rqqemVVbUvXjf1PWfuravsD+yY5ZLKdV9UXquppwDZ0Z7kTvZkBOBd4EPCHwNn92euOwLP4dbDdAPwMeNjAm6P7DVxgdiXw7nFvnu49rlv91cCOSY6crGZG91xMZOyNzBPpwn2sG36qQJ/oWwTj266kOwMf/Fvcp6qOnOIx0owY6NLaTgN+N8mLk2yY5AXAQ4HJuoWHUlU/oguHt06wbBVdSL00yQb9GfjvjC1Psn+S7fvZG+kC4K6BTVwD7AkckuRPx28/ydZJ9uk/S78DuHXc4wdruR24AHgNvw6yc+kC+Ox+nbvo3hAcmeT+/T62S/KMfv0PA69O8th07pNk7yRLBna1Gngm8KQkR0xUCyN6LiZxdl/PL/rn42y6i942Aib7eOLHwG+vo+04YP8ke/bP7Sb99G8hrWcGuhaTz2Xt76H/Rtd336X8bOD1wE/ourufXVU33N2dV9VXx77DPIFXAm/o9/kwuhAd82jgvHTfmT4FOKSqLhu37f+lC/VD018dP+AewOvogv+ndG8sfiP4B5xNF2TfHJhfwtqfJb8JWAl8I8ktwBfpzuypqhX98XyQ7g3ISuDA8TupqpuApwF7JTl8guUjey4mcDHdtQrn9Pu+ge7s+itVNdkZ9DHAo/uu9OP7tncD7+7bXts/T88D3kXXs3EFcAi+9moEMvn/qiRJWih8lyhJUgMMdEmSGmCgS5LUAANdkqQGGOiSJDVgw3WvMn9ttdVWtWzZsrkuQ5KkWXHBBRfcUFVLJ1q2oAN92bJlrFixYq7LkCRpViQZfzvkX7HLXZKkBhjokiQ1wECXJKkBBrokSQ0w0CVJaoCBLklSAwx0SZIaYKBLktQAA12SpAYY6JIkNcBAlySpAQa6JEkNMNAlSWrAgh5tTZKma9mhp87Zvi8/Yu8527fa5xm6JEkNMNAlSWqAgS5JUgMMdEmSGmCgS5LUAANdkqQGGOiSJDXAQJckqQEGuiRJDTDQJUlqgIEuSVIDDHRJkhpgoEuS1ICRBnqSy5NcnOTCJCv6ti2SnJHkh/3vzfv2JDkqycokFyXZdZS1SZLUktk4Q39KVe1SVcv7+UOBM6tqZ+DMfh5gL2Dn/udg4EOzUJskSU2Yiy73fYDj+unjgH0H2j9WnW8AmyXZZg7qkyRpwRl1oBdwepILkhzct21dVdf209cBW/fT2wFXDjz2qr5NkiStw4Yj3v4TqurqJPcHzkjy/cGFVVVJajob7N8YHAyw4447rr9KJUlawEZ6hl5VV/e/rwdOAh4D/HisK73/fX2/+tXADgMP375vG7/NY6pqeVUtX7p06SjLlyRpwRhZoCe5T5IlY9PA04FLgFOAA/rVDgA+20+fAry8v9p9N+Dmga55SZI0hVF2uW8NnJRkbD//XlX/leR84IQkBwFXAM/v1z8NeBawErgdeMUIa5MkqSkjC/Squgx45ATtPwH2nKC9gNeMqh5JklrmneIkSWqAgS5JUgMMdEmSGmCgS5LUAANdkqQGGOiSJDXAQJckqQEGuiRJDTDQJUlqgIEuSVIDDHRJkhpgoEuS1AADXZKkBhjokiQ1wECXJKkBBrokSQ0w0CVJaoCBLklSAwx0SZIaYKBLktQAA12SpAYY6JIkNcBAlySpAQa6JEkNMNAlSWqAgS5JUgMMdEmSGmCgS5LUAANdkqQGGOiSJDXAQJckqQEGuiRJDTDQJUlqgIEuSVIDDHRJkhpgoEuS1AADXZKkBhjokiQ1YMO5LkCSNDuWHXrqnO378iP2nrN9LxaeoUuS1ADP0CVJTVssPROeoUuS1AADXZKkBhjokiQ1wECXJKkBBrokSQ0w0CVJaoCBLklSAwx0SZIaYKBLktSAkQd6kg2SfDvJ5/v5nZKcl2Rlkk8luWfffq9+fmW/fNmoa5MkqRWzcYZ+CPC9gfm/AY6sqgcCNwIH9e0HATf27Uf260mSpCGMNNCTbA/sDXyknw/wVODEfpXjgH376X36efrle/brS5KkdRj1GfrfAW8E7urntwRuqqo1/fxVwHb99HbAlQD98pv79deS5OAkK5KsWLVq1ShrlyRpwRhZoCd5NnB9VV2wPrdbVcdU1fKqWr506dL1uWlJkhasUQ6f+njguUmeBWwM3Bf4e2CzJBv2Z+HbA1f3618N7ABclWRD4H7AT0ZYnyRJzRjZGXpVvbmqtq+qZcALgS9V1UuALwP79asdAHy2nz6ln6df/qWqqlHVJ0lSS+bie+hvAl6XZCXdZ+TH9u3HAlv27a8DDp2D2iRJWpBG2eX+K1V1FnBWP30Z8JgJ1vk5sP9s1CNJUmu8U5wkSQ0w0CVJaoCBLklSA2blM3TNf8sOPXXO9n35EXvP2b4lqRWeoUuS1AADXZKkBtjlrkXNjxoktcIzdEmSGmCgS5LUAANdkqQGGOiSJDXAQJckqQEGuiRJDTDQJUlqgIEuSVIDDHRJkhpgoEuS1ABv/SotUt72VmqLZ+iSJDXAQJckqQEGuiRJDTDQJUlqgIEuSVID1hnoSfZPsqSfPizJZ5LsOvrSJEnSsIY5Q39bVa1O8gTgD4BjgQ+NtixJkjQdwwT6nf3vvYFjqupU4J6jK0mSJE3XMIF+dZKjgRcApyW515CPkyRJs2SYYH4+8AXgGVV1E7AF8IaRViVJkqZlmEA/uqo+U1U/BKiqa4GXjbYsSZI0HcME+sMGZ5JsADxqNOVIkqSZmDTQk7w5yWrgEUluSbK6n78e+OysVShJktZp0kCvqvdW1RLgb6vqvlW1pP/ZsqrePIs1SpKkdRimy/2tSV6a5G0ASXZI8pgR1yVJkqZhmED/R+BxwIv7+Vv7NkmSNE9sOMQ6j62qXZN8G6CqbkzijWUkSZpHhjlD/2V/ZXsBJFkK3DXSqiRJ0rQME+hHAScBWyd5N/BV4D0jrUqSJE3LOrvcq+oTSS4A9gQC7FtV3xt5ZZIkaWjD3pN9K+D2qvogcEOSnUZYkyRJmqZhxkN/B/AmYOy75xsB/zbKoiRJ0vQMc4b+h8BzgdsAquoaYMkoi5IkSdMzTKD/oqqKX1/lfp/RliRJkqZrmEA/oR8PfbMkrwS+CHx4tGVJkqTpGOYq9/cleRpwC/Ag4O1VdcbIK5MkSUNbZ6AnOQg4p6reMAv1zKllh546p/u//Ii953T/kqSFa5hbv+4IHJ1kGXABcA7wlaq6cIR1SZKkaVjnZ+hV9Y6qeirwMOArwBvogl2SJM0Tw3S5HwY8HtgU+DbwV3TBLkmS5olhutz/CFgDnAqcDXy9qu4YaVWSJGlahuly3xX4A+CbwNOAi5N8ddSFSZKk4Q1z69eHAy8BDgBeAFwNfGmIx22c5JtJvpPk0iTv6tt3SnJekpVJPjU2tnqSe/XzK/vly+7GcUmStKgMc2OZI+hu9XoU8JCqekpVvX2Ix90BPLWqHgnsAjwzyW7A3wBHVtUDgRuBg/r1DwJu7NuP7NeTJElDGCbQv1hV/6+qzq2qXwIkOWRdD6rOrf3sRv1PAU8FTuzbjwP27af36efpl++ZJMMdhiRJi9swgf7yCdoOHGbjSTZIciFwPXAG8D/ATVW1pl/lKmC7fno74EqAfvnNwJYTbPPgJCuSrFi1atUwZUiS1LxJr3JP8iLgxcBOSU4ZWLQE+OkwG6+qO4FdkmwGnAQ8+G7UOrbNY4BjAJYvX153d3uSJLVgqq+tnQtcC2wFvH+gfTVw0XR2UlU3Jfky8Di6QV427M/Ct6e7yI7+9w7AVUk2BO4H/GQ6+5EkabGaNNCr6grgCroQnrYkS4Ff9mG+Cd1X3v4G+DKwH3A83ZXzn+0fcko///V++Zf6YVslSdI6DHNjmZnaBjguyQZ0n9WfUFWfT/Jd4Pgkf01357lj+/WPBT6eZCVdl/4LR1ibJElNGVmgV9VFwO9P0H4Z8JgJ2n8O7D+qeiRJatmkV7knObP/7ffBJUma56Y6Q98mye7Ac5McD6z1nfCq+tZIK5MkSUObKtDfDryN7kr0D4xbNnaDGEmSNA9MdZX7icCJSd5WVYfPYk2SJGma1nlRXFUdnuS5wJP6prOq6vOjLUuSJE3HMKOtvRc4BPhu/3NIkveMujBJkjS8Yb62tjewS1XdBZDkOLrvj79llIVJkqThDTM4C8BmA9P3G0UhkiRp5oY5Q38v8O3+Xuyh+yz90JFWJUmSpmWYi+I+meQs4NF905uq6rqRViVJkqZlqFu/VtW1dIOnSJKkeWjYz9AlSdI8ZqBLktSAKQM9yQZJvj9bxUiSpJmZMtCr6k7gB0l2nKV6JEnSDAxzUdzmwKVJvgncNtZYVc8dWVWSJGlahgn0t428CkmSdLcM8z30s5M8ANi5qr6Y5N7ABqMvTZIkDWuYwVleCZwIHN03bQecPMqiJEnS9AzztbXXAI8HbgGoqh8C9x9lUZIkaXqGCfQ7quoXYzNJNgRqdCVJkqTpGibQz07yFmCTJE8D/gP43GjLkiRJ0zFMoB8KrAIuBl4FnAYcNsqiJEnS9AxzlftdSY4DzqPrav9BVdnlLknSPLLOQE+yN/DPwP/QjYe+U5JXVdV/jro4SZI0nGFuLPN+4ClVtRIgye8ApwIGuiRJ88Qwn6GvHgvz3mXA6hHVI0mSZmDSM/Qkf9RPrkhyGnAC3Wfo+wPnz0JtkiRpSFN1uT9nYPrHwB799Cpgk5FVJEmSpm3SQK+qV8xmIZIkaeaGucp9J+DPgWWD6zt8qiRJ88cwV7mfDBxLd3e4u0ZbjiRJmolhAv3nVXXUyCuRJEkzNkyg/32SdwCnA3eMNVbVt0ZWlSRJmpZhAv33gJcBT+XXXe7Vz0uSpHlgmEDfH/jtwSFUJUnS/DLMneIuATYbdSGSJGnmhjlD3wz4fpLzWfszdL+2JknSPDFMoL9j5FVIkqS7ZZjx0M+ejUIkSdLMDXOnuNV0V7UD3BPYCLitqu47ysIkSdLwhjlDXzI2nSTAPsBuoyxKkiRNzzBXuf9KdU4GnjGieiRJ0gwM0+X+RwOz9wCWAz8fWUWSJGnahrnKfXBc9DXA5XTd7pIkaZ4Y5jN0x0WXJGmemzTQk7x9isdVVR0+gnokSdIMTHWGftsEbfcBDgK2BAx0SZLmiUkDvarePzadZAlwCPAK4Hjg/ZM9TpIkzb4pv7aWZIskfw1cRBf+u1bVm6rq+nVtOMkOSb6c5LtJLk1yyMA2z0jyw/735n17khyVZGWSi5Lsuh6OT5KkRWHSQE/yt8D5wGrg96rqnVV14zS2vQZ4fVU9lO5GNK9J8lDgUODMqtoZOLOfB9gL2Ln/ORj40HQPRpKkxWqqM/TXA9sChwHXJLml/1md5JZ1bbiqrq2qb/XTq4HvAdvRfeXtuH6144B9++l9gI/1N6/5BrBZkm1mdFSSJC0yU32GPq27yE0lyTLg94HzgK2r6tp+0XXA1v30dsCVAw+7qm+7FkmSNKX1FtqTSbIp8GngL6tqrTP7qip+PfDLsNs7OMmKJCtWrVq1HiuVJGnhGmmgJ9mILsw/UVWf6Zt/PNaV3v8eu8DuamCHgYdv37etpaqOqarlVbV86dKloytekqQFZGSB3o/Mdizwvar6wMCiU4AD+ukDgM8OtL+8v9p9N+Dmga55SZI0hWHu5T5TjwdeBlyc5MK+7S3AEcAJSQ4CrgCe3y87DXgWsBK4ne4775IkaQgjC/Sq+iqQSRbvOcH6BbxmVPVIktSykV8UJ0mSRs9AlySpAQa6JEkNMNAlSWqAgS5JUgMMdEmSGmCgS5LUAANdkqQGGOiSJDXAQJckqQEGuiRJDTDQJUlqgIEuSVIDDHRJkhpgoEuS1AADXZKkBhjokiQ1wECXJKkBBrokSQ0w0CVJaoCBLklSAwx0SZIaYKBLktQAA12SpAYY6JIkNcBAlySpAQa6JEkNMNAlSWqAgS5JUgMMdEmSGmCgS5LUAANdkqQGGOiSJDXAQJckqQEGuiRJDTDQJUlqgIEuSVIDDHRJkhpgoEuS1AADXZKkBhjokiQ1wECXJKkBBrokSQ0w0CVJaoCBLklSAwx0SZIaYKBLktQAA12SpAYY6JIkNcBAlySpASML9CQfTXJ9kksG2rZIckaSH/a/N+/bk+SoJCuTXJRk11HVJUlSi0Z5hv6vwDPHtR0KnFlVOwNn9vMAewE79z8HAx8aYV2SJDVnZIFeVecAPx3XvA9wXD99HLDvQPvHqvMNYLMk24yqNkmSWjPbn6FvXVXX9tPXAVv309sBVw6sd1Xf9huSHJxkRZIVq1atGl2lkiQtIHN2UVxVFVAzeNwxVbW8qpYvXbp0BJVJkrTwzHag/3isK73/fX3ffjWww8B62/dtkiRpCLMd6KcAB/TTBwCfHWh/eX+1+27AzQNd85IkaR02HNWGk3wSeDKwVZKrgHcARwAnJDkIuAJ4fr/6acCzgJXA7cArRlWXJEktGlmgV9WLJlm05wTrFvCaUdUiSVLrvFOcJEkNMNAlSWqAgS5JUgMMdEmSGmCgS5LUAANdkqQGGOiSJDXAQJckqQEGuiRJDTDQJUlqgIEuSVIDDHRJkhpgoEuS1AADXZKkBhjokiQ1wECXJKkBBrokSQ0w0CVJaoCBLklSAwx0SZIaYKBLktQAA12SpAYY6JIkNcBAlySpAQa6JEkNMNAlSWqAgS5JUgMMdEmSGmCgS5LUAANdkqQGGOiSJDXAQJckqQEGuiRJDTDQJUlqgIEuSVIDDHRJkhpgoEuS1AADXZKkBhjokiQ1wECXJKkBBrokSQ0w0CVJaoCBLklSAwx0SZIaYKBLktQAA12SpAYY6JIkNcBAlySpAQa6JEkNmFeBnuSZSX6QZGWSQ+e6HkmSFop5E+hJNgD+EdgLeCjwoiQPnduqJElaGOZNoAOPAVZW1WVV9QvgeGCfOa5JkqQFYT4F+nbAlQPzV/VtkiRpHVJVc10DAEn2A55ZVX/Sz78MeGxVvXbcegcDB/ezDwJ+MKuFTm0r4Ia5LmIOLNbjhsV77Iv1uGHxHvtiPW6YX8f+gKpaOtGCDWe7kilcDewwML9937aWqjoGOGa2ipqOJCuqavlc1zHbFutxw+I99sV63LB4j32xHjcsnGOfT13u5wM7J9kpyT2BFwKnzHFNkiQtCPPmDL2q1iR5LfAFYAPgo1V16RyXJUnSgjBvAh2gqk4DTpvrOu6GeflRwCxYrMcNi/fYF+txw+I99sV63LBAjn3eXBQnSZJmbj59hi5JkmbIQB9CkkrybwPzGyZZleTzc1nXbEiyZZIL+5/rklw9MH/Pua5vFJIcmeQvB+a/kOQjA/PvT/K6Ibf1ziR/NYo617cpnuubknx3ruubbUnuHPh7XJhk2QTrbJvkxNmvbnSSvDXJpUku6o/7sVOse2CSbWezvlGYzjHPZ/PqM/R57Dbg4Uk2qaqfAU9jgq/UtaiqfgLsAl04AbdW1fvmtKjR+xrwfODvktyD7juo9x1Yvjvwf+aisFGa7Lnug2zGb16TbFhVa9ZHjbPsZ1W1y2QL++O6BthvFmsaqSSPA54N7FpVdyTZCpjqjfuBwCXANbNQ3kjM4JjnLc/Qh3casHc//SLgk2MLkmyR5OT+3d03kjyib39nko8mOSvJZUn+Yg7qHokkD0xy4cD8oUkO66d37s9qL0hyTpLfnbtKZ+Rc4HH99MPoXrBWJ9k8yb2AhwDfSvKGJOf3z/u7xh7cv9v/7yRfpbv5UQs2SPLh/izm9CSbAPT/28v76a2SXN5PH5jklCRfAs6cu7LXr/HHlWRZkkvmuq71aBvghqq6A6Cqbqiqa5K8vf9fvyTJMensBywHPtGf1W4yp5XP3GTHfHkf7iRZnuSsfnrevq4b6MM7Hnhhko2BRwDnDSx7F/DtqnoE8BbgYwPLHgw8g+5e9e9IstEs1TuXjgH+rKoeBbwZ+OAc1zMt/VnXmiQ70p2Nf53u+X4c3QvYxcCTgZ3pntddgEcleVKSR9HdQ2EX4FnAo2f9AEZjZ+Afq+phwE3A84Z4zK7AflW1x0grG51NBrrbTxpoX+jHNZXTgR36N6T/lGTsGD9YVY+uqocDmwDPrqoTgRXAS6pql773ciGa7JinMi9f1+1yH1JVXdR3Pb6I3/xq3RPoX+Cq6kv9Z5FjXbSn9u/87khyPbA13X3qm5RkM2A34NNJxpoX4v/ZuXRhvjvwAbpxBXYHbqbrkn96//Ptfv1N6UJvCXBSVd0OkKSVmyP9qKrGemQuAJYN8Zgzquqnoytp5Cbrcl/oxzWpqrq1f1P6ROApwKfSDWW9OskbgXsDWwCXAp+bu0rXnymOeSrz8nV9Ib7QzqVTgPfRnZ1tOeRj7hiYvpN2/uZrWLuHZ+O+LXTdV5N+9rhAfI0uwH+Prsv9SuD1wC3AvwB7AO+tqqMHH5SBi+kaM/7/eKx7dfD/YONxj7lt1EXNkVaPC4CquhM4CzgrycXAq+h6JZdX1ZX99RXjn+sFbYJjPoCp/7fn5eu6Xe7T81HgXVV18bj2rwAvAUjyZLpAu2WWa5tt1wHb9p8rb0x/fUFV3Qhcm+QPAZLcI8kj57DOmTqX7kKZn1bVnf0Z2WZ03e7n0t3R8I+TbAqQZLsk9wfOAfZNskmSJcBz5qb8WXM58Kh+upmLwxarJA9KsvNA0y78egCsG/r/98HneTVdr9SCNckxX8Ha/9vDfMQ05+bFu4qFoqquAo6aYNE7gY8muQi4ne7dXdOq6udJ3kP3GdrVwODXml4IfKh/J39P4N+A78x6kXfPxXRXt//7uLZNq+oG4PQkDwG+3n+0cCvw0qr6VpJP0R3v9XRjFLTsfcAJ6UZBPHWui9HdtinwD/1HZ2uAlXSjW95E11N1HWv/T/8r8M9JfgY8boF+jj7ZMT8EODbJ4XRn7/Oed4qTJKkBdrlLktQAA12SpAYY6JIkNcBAlySpAQa6JEkNMNClBmU9jhg3wbZvneb6C2bEOWkhM9ClNo3d6Y78esS4hw0s353uBjmSGmGgS20aasQ4gClGjXtpkm/2g5McnWSDwR30o6t9PcnejDPZiHNJXtnv6ztJPp3k3kmWJPnR2AAXSe47OC9pOAa61KBhRoyrql8keToTjxr3EOAFwOP7+/LfSX97Y4AkW9PdGe7tVbXWHeLWMeLcZ/pRux4JfA84qKpW092Ja+yNwQv79X653v4g0iLgrV+ldq1rxDiYfNS4R9Ddx/r8/ta2m9DdyhZgI7oxzl9TVWdPsN8nMvmIcw9P8td098XflO6e+AAfAd4InAy8AnjlTA9aWqwMdKld6xoxDrrR8SYaNe7PgeOq6s0TbHcN3RCqzwAmCvSp/Cuwb1V9J8mBdCMXUlVfS7KsH9xog6q6ZJrblUU6Ig4AAADiSURBVBY9u9yldq1rxDiYfNS4M4H9+mmSbJHkAf1jCvhj4MFJ3jTBfqcacW4J3Wh8GzHQhd/7GN1gOP+CpGkz0KV2jY0Y941xbTf3I8ZRVafThejX+3GgTwSWVNV3gcPoRpW7CDgD2GZsI/340S8CnprkzwZ3WlXfAsZGnPtP1h6d6210n+V/Dfj+uHo/AWwOfPJuHLO0aDnamqR5Icl+wD5V9bK5rkVaiPwMXdKcS/IPwF50V8VLmgHP0CVJaoCfoUuS1AADXZKkBhjokiQ1wECXJKkBBrokSQ0w0CVJasD/B1kBS7YOiQJ8AAAAAElFTkSuQmCC",
- "text/plain": [
- ""
- ]
- },
- "metadata": {
- "needs_background": "light"
- },
- "output_type": "display_data"
- }
- ],
- "source": [
- "%matplotlib inline\n",
- "plt.figure(figsize=(8, 6))\n",
- "\n",
- "# specify the type of plot and the labels\n",
- "# for the y axis (the bars)\n",
- "plt.bar(\n",
- " tweets_by_weekday.index,\n",
- " tweets_by_weekday.text,\n",
- " tick_label=week_days,\n",
- " width=0.5\n",
- ")\n",
- "\n",
- "# give a title to the plot\n",
- "plt.title('Elon Musk\\'s week on Twitter')\n",
- "\n",
- "# give a label to the axes\n",
- "plt.ylabel(\"Number of tweets\")\n",
- "plt.xlabel(\"Week day\")\n",
- "plt.show()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "#### Box plots\n",
- "\n",
- "![box plot explained](./figures/eda-boxplot.png)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 346,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " text \n",
- " \n",
- " \n",
- " created_at \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " 0 \n",
- " 315 \n",
- " \n",
- " \n",
- " 1 \n",
- " 385 \n",
- " \n",
- " \n",
- " 2 \n",
- " 380 \n",
- " \n",
- " \n",
- " 3 \n",
- " 361 \n",
- " \n",
- " \n",
- " 4 \n",
- " 530 \n",
- " \n",
- " \n",
- " 5 \n",
- " 426 \n",
- " \n",
- " \n",
- " 6 \n",
- " 422 \n",
- " \n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " text\n",
- "created_at \n",
- "0 315\n",
- "1 385\n",
- "2 380\n",
- "3 361\n",
- "4 530\n",
- "5 426\n",
- "6 422"
- ]
- },
- "execution_count": 346,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "tweets_by_weekday"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 347,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " text \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " count \n",
- " 7.000000 \n",
- " \n",
- " \n",
- " mean \n",
- " 402.714286 \n",
- " \n",
- " \n",
- " std \n",
- " 67.551744 \n",
- " \n",
- " \n",
- " min \n",
- " 315.000000 \n",
- " \n",
- " \n",
- " 25% \n",
- " 370.500000 \n",
- " \n",
- " \n",
- " 50% \n",
- " 385.000000 \n",
- " \n",
- " \n",
- " 75% \n",
- " 424.000000 \n",
- " \n",
- " \n",
- " max \n",
- " 530.000000 \n",
- " \n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " text\n",
- "count 7.000000\n",
- "mean 402.714286\n",
- "std 67.551744\n",
- "min 315.000000\n",
- "25% 370.500000\n",
- "50% 385.000000\n",
- "75% 424.000000\n",
- "max 530.000000"
- ]
- },
- "execution_count": 347,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "tweets_by_weekday.describe()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 349,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- ""
- ]
- },
- "execution_count": 349,
- "metadata": {},
- "output_type": "execute_result"
- },
- {
- "data": {
- "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD4CAYAAAAXUaZHAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAMw0lEQVR4nO3db4hl9XnA8e+TXa1iNm6rZpBdyZTGFwNbYmVqhGxhdqUS/xB9kZbYBsUObNOKDQ01rixNUTpE2xemQhGXDHGlcdsQkCxqbILOJd22prq4WsMEuiSKu6y1SrJkNBGVpy/mtzCuszv37sydO/PM9wOXued3zpnzu3D3u4cz909kJpKkWj406AlIkpaecZekgoy7JBVk3CWpIOMuSQWtH/QEAM4///wcHh4e9DSkD3jzzTc555xzBj0NaV4HDhx4PTMvmG/dioj78PAwzz777KCnIX1Ap9NhbGxs0NOQ5hURL59snZdlJKkg4y5JBRl3SSrIuEtSQcZdkgoy7tI89u7dy5YtW7jiiivYsmULe/fuHfSUpJ6siJdCSivJ3r172bVrF5OTk7z33nusW7eO8fFxAG644YYBz07qjmfu0gkmJiaYnJxk27ZtrF+/nm3btjE5OcnExMSgpyZ1zbhLJ5ienmbr1q3vG9u6dSvT09MDmpHUO+MunWBkZIT9+/e/b2z//v2MjIwMaEZS74y7dIJdu3YxPj7O1NQU7777LlNTU4yPj7Nr165BT03qmn9QlU5w/I+mt956K9PT04yMjDAxMeEfU7WqxEr4DtXR0dH0g8O0EvnBYVrJIuJAZo7Ot87LMpJUkHGXpIKMuyQVZNwlqSDjLkkFGXdJKsi4S1JBxl2SCjLuklSQcZekgoy7JBVk3CWpIOMuSQUZd0kqyLhLUkHGXZIKMu6SVJBxl6SCjLskFWTcJamgruIeES9FxH9HxMGIeLaN/UZEfD8i/qf9/PU2HhFxX0QciogXIuLSfj4ASdIH9XLmvi0zL5nzTds7gScz82LgybYMcBVwcbvtAO5fqslKkrqzmMsy1wF72v09wPVzxh/KWU8DGyPiwkUcR5LUo/VdbpfA9yIigQcyczcwlJlH2/pXgaF2fxPwypx9D7exo3PGiIgdzJ7ZMzQ0RKfTOa0HIPXTzMyMz02tSt3GfWtmHomIjwLfj4gfz12ZmdnC37X2H8RugNHR0RwbG+tld2lZdDodfG5qNerqskxmHmk/XwMeAS4D/vf45Zb287W2+RHgojm7b25jkqRlsmDcI+KciNhw/D5wJfAisA+4qW12E/Cddn8fcGN71czlwLE5l28kScugm8syQ8AjEXF8+4cz84mIeAb4VkSMAy8Df9i2fxy4GjgEvAXcvOSzliSd0oJxz8yfAJ+YZ/wN4Ip5xhO4ZUlmJ0k6Lb5DVZIKMu6SVJBxl6SCjLskFWTcJakg4y5JBRl3SSrIuEtSQcZdkgoy7pJUkHGXpIKMuyQVZNwlqSDjLkkFGXdJKsi4S1JBxl2SCjLuklSQcZekgoy7JBVk3CWpIOMuSQUZd0kqyLhLUkHGXZIKMu6SVJBxl6SCjLskFWTcJakg4y5JBRl3SSrIuEtSQcZdkgoy7pJUkHGXpIKMuyQVZNwlqaCu4x4R6yLiuYh4tC0/GBE/jYiD7XZJG4+IuC8iDkXECxFxab8mL0ma3/oetv0iMA18ZM7YbZn57RO2uwq4uN0+CdzffkqSlklXZ+4RsRm4Bvh6F5tfBzyUs54GNkbEhYuYoySpR92euX8N+DKw4YTxiYj4CvAksDMz3wY2Aa/M2eZwGzs6d8eI2AHsABgaGqLT6fQ8eanfZmZmfG5qVVow7hFxLfBaZh6IiLE5q+4AXgXOBHYDtwN3dXvgzNzd9mN0dDTHxsZOvYM0AJ1OB5+bWo26uSzzKeAzEfES8M/A9oj4p8w82i69vA18A7isbX8EuGjO/pvbmCRpmSwY98y8IzM3Z+Yw8Dngqcz8/PHr6BERwPXAi22XfcCN7VUzlwPHMvPofL9bktQfvbxa5kTfjIgLgAAOAl9o448DVwOHgLeAmxc1Q0lSz3qKe2Z2gE67v/0k2yRwy2InJkk6fb5DVZIKMu6SVJBxl6SCjLskFWTcJakg4y5JBRl3SSrIuEtSQcZdkgoy7pJUkHGXpIKMuyQVZNwlqSDjLkkFGXdJKsi4S1JBi/kmJmnVmf1WyP6b/c4aaXA8c9eakpk93T52+6M972PYtRJ45q5V6xN3fo9jv3yn78cZ3vlY349x7tln8PzfXNn342jtMO5atY798h1euvuavh6j0+kwNjbW12PA8vwHorXFyzKSVJBxl6SCjLskFWTcJakg4y5JBRl3SSrIuEtSQcZdkgoy7pJUkHGXpIKMuyQVZNwlqSDjLkkFGXdJKsi4S1JBxl2SCjLuklRQ13GPiHUR8VxEPNqWfzMifhgRhyLiXyLizDb+a235UFs/3J+pS5JOppcz9y8C03OW7wHuzcyPAz8Dxtv4OPCzNn5v206StIy6intEbAauAb7elgPYDny7bbIHuL7dv64t09Zf0baXJC2Tbr8g+2vAl4ENbfk84OeZ+W5bPgxsavc3Aa8AZOa7EXGsbf/63F8YETuAHQBDQ0N0Op3TfAhay/r9vJmZmVm256b/BrSUFox7RFwLvJaZByJibKkOnJm7gd0Ao6OjuRzfMK9inniMfj9vOp1O348BLMtj0drSzZn7p4DPRMTVwFnAR4B/ADZGxPp29r4ZONK2PwJcBByOiPXAucAbSz5zSdJJLXjNPTPvyMzNmTkMfA54KjP/GJgCPts2uwn4Tru/ry3T1j+Vmbmks5YkndJiXud+O/CliDjE7DX1yTY+CZzXxr8E7FzcFCVJver2D6oAZGYH6LT7PwEum2ebXwF/sARzkySdJt+hKkkFGXdJKsi4S1JBxl2SCjLuklSQcZekgoy7JBVk3CWpoJ7exCStJBtGdvLbe5bhDdB7Ft5ksTaMwOynaktLw7hr1frF9N28dHd/g7hcnwo5vPOxvh9Da4uXZSSpIOMuSQUZd0kqyLhLUkHGXZIKMu6SVJBxl6SCjLskFWTcJakg4y5JBRl3SSrIz5bRqrYsn8nyRP+Pce7ZZ/T9GFpbjLtWrX5/aBjM/uexHMeRlpqXZSSpIOMuSQUZd0kqyLhLUkHGXZIKMu6SVJBxl6SCjLskFWTcJakg4y5JBRl3SSrIuEtSQcZdkgoy7pJU0IJxj4izIuK/IuL5iPhRRNzZxh+MiJ9GxMF2u6SNR0TcFxGHIuKFiLi03w9CkvR+3Xye+9vA9syciYgzgP0R8d227rbM/PYJ218FXNxunwTubz8lSctkwTP3nDXTFs9otzzFLtcBD7X9ngY2RsSFi5+qJKlbXX0TU0SsAw4AHwf+MTN/GBF/BkxExFeAJ4Gdmfk2sAl4Zc7uh9vY0RN+5w5gB8DQ0BCdTmeRD0XqD5+bWo26intmvgdcEhEbgUciYgtwB/AqcCawG7gduKvbA2fm7rYfo6OjOTY21tvMpeXwxGP43NRq1NOrZTLz58AU8OnMPNouvbwNfAO4rG12BLhozm6b25gkaZl082qZC9oZOxFxNvD7wI+PX0ePiACuB15su+wDbmyvmrkcOJaZR+f51ZKkPunmssyFwJ523f1DwLcy89GIeCoiLgACOAh8oW3/OHA1cAh4C7h56actSTqVBeOemS8AvzPP+PaTbJ/ALYufmiTpdPkOVUkqyLhLUkHGXZIKMu6SVJBxl6SCjLskFWTcJakg4y5JBRl3SSrIuEtSQcZdkgoy7pJUkHGXpIKMuyQVZNwlqSDjLkkFGXdJKsi4S1JBxl2SCjLuklSQcZekgoy7JBVk3CWpIOMuSQUZd0kqyLhLUkHrBz0BaTlFRO/73NP7cTKz952kJeSZu9aUzOzpNjU11fM+hl0rgXGXpIKMuyQVZNwlqSDjLkkFGXdJKsi4S1JBxl2SCjLuklRQrIQ3XETE/wEvD3oe0jzOB14f9CSkk/hYZl4w34oVEXdppYqIZzNzdNDzkHrlZRlJKsi4S1JBxl06td2DnoB0OrzmLkkFeeYuSQUZd0kqyLhrzYqIjRHx56e573BE/NFSz0laKsZda9lG4LTiDgwDxl0rlnHXWnY38FsRcTAi/j4ibouIZyLihYi4EyAifrctnxUR50TEjyJiS9v399q+fznQRyHNw1fLaM2KiGHg0czcEhFXAp8F/hQIYB/wd5n5g4j4W+As4GzgcGZ+NSLGgL/KzGsHMnlpAesHPQFphbiy3Z5ryx8GLgZ+ANwFPAP8CviLgcxO6pFxl2YF8NXMfGCedecxG/szmD2Df3M5JyadDq+5ay37BbCh3f9X4E8i4sMAEbEpIj7a1j0A/DXwTeCeefaVVhzP3LVmZeYbEfHvEfEi8F3gYeA/IwJgBvh8RHwaeCczH46IdcB/RMR24N+A9yLieeDBzLx3QA9Dmpd/UJWkgrwsI0kFGXdJKsi4S1JBxl2SCjLuklSQcZekgoy7JBX0/wx1Y9pk3PIIAAAAAElFTkSuQmCC",
- "text/plain": [
- ""
- ]
- },
- "metadata": {
- "needs_background": "light"
- },
- "output_type": "display_data"
- }
- ],
- "source": [
- "tweets_by_weekday.boxplot()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 175,
- "metadata": {},
- "outputs": [],
- "source": [
- "plt.bar?"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 222,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " created_at \n",
- " text \n",
- " tweet_link \n",
- " tweet_mentions \n",
- " n_mentions \n",
- " week_day \n",
- " day_hour \n",
- " \n",
- " \n",
- " id \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " 849636868052275200 \n",
- " 2017-04-05 14:56:29 \n",
- " b'And so the robots spared humanity ... https://t.co/v7JUJQWfCv' \n",
- " https://twitter.com/i/web/status/849636868052275200 \n",
- " [] \n",
- " 0 \n",
- " 2 \n",
- " 14 \n",
- " \n",
- " \n",
- " 848988730585096192 \n",
- " 2017-04-03 20:01:01 \n",
- " b\"@ForIn2020 @waltmossberg @mims @defcon_5 Exactly. Tesla is absurdly overvalued if based on the past, but that's irr\\xe2\\x80\\xa6 https://t.co/qQcTqkzgMl\" \n",
- " https://twitter.com/i/web/status/848988730585096192 \n",
- " [@ForIn2020, @waltmossberg, @mims, @defcon_5] \n",
- " 4 \n",
- " 0 \n",
- " 20 \n",
- " \n",
- " \n",
- " 848943072423497728 \n",
- " 2017-04-03 16:59:35 \n",
- " b'@waltmossberg @mims @defcon_5 Et tu, Walt?' \n",
- " https://twitter.com/i/web/status/848943072423497728 \n",
- " [@waltmossberg, @mims, @defcon_5] \n",
- " 3 \n",
- " 0 \n",
- " 16 \n",
- " \n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " created_at \\\n",
- "id \n",
- "849636868052275200 2017-04-05 14:56:29 \n",
- "848988730585096192 2017-04-03 20:01:01 \n",
- "848943072423497728 2017-04-03 16:59:35 \n",
- "\n",
- " text \\\n",
- "id \n",
- "849636868052275200 b'And so the robots spared humanity ... https://t.co/v7JUJQWfCv' \n",
- "848988730585096192 b\"@ForIn2020 @waltmossberg @mims @defcon_5 Exactly. Tesla is absurdly overvalued if based on the past, but that's irr\\xe2\\x80\\xa6 https://t.co/qQcTqkzgMl\" \n",
- "848943072423497728 b'@waltmossberg @mims @defcon_5 Et tu, Walt?' \n",
- "\n",
- " tweet_link \\\n",
- "id \n",
- "849636868052275200 https://twitter.com/i/web/status/849636868052275200 \n",
- "848988730585096192 https://twitter.com/i/web/status/848988730585096192 \n",
- "848943072423497728 https://twitter.com/i/web/status/848943072423497728 \n",
- "\n",
- " tweet_mentions n_mentions \\\n",
- "id \n",
- "849636868052275200 [] 0 \n",
- "848988730585096192 [@ForIn2020, @waltmossberg, @mims, @defcon_5] 4 \n",
- "848943072423497728 [@waltmossberg, @mims, @defcon_5] 3 \n",
- "\n",
- " week_day day_hour \n",
- "id \n",
- "849636868052275200 2 14 \n",
- "848988730585096192 0 20 \n",
- "848943072423497728 0 16 "
- ]
- },
- "execution_count": 222,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df.head(3)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 351,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " day_hour \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " count \n",
- " 2819.000000 \n",
- " \n",
- " \n",
- " mean \n",
- " 12.782547 \n",
- " \n",
- " \n",
- " std \n",
- " 7.611198 \n",
- " \n",
- " \n",
- " min \n",
- " 0.000000 \n",
- " \n",
- " \n",
- " 25% \n",
- " 5.000000 \n",
- " \n",
- " \n",
- " 50% \n",
- " 15.000000 \n",
- " \n",
- " \n",
- " 75% \n",
- " 19.000000 \n",
- " \n",
- " \n",
- " max \n",
- " 23.000000 \n",
- " \n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " day_hour\n",
- "count 2819.000000\n",
- "mean 12.782547\n",
- "std 7.611198\n",
- "min 0.000000\n",
- "25% 5.000000\n",
- "50% 15.000000\n",
- "75% 19.000000\n",
- "max 23.000000"
- ]
- },
- "execution_count": 351,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df[['day_hour']].describe()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 354,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "day_hour 5.0\n",
- "Name: 0.25, dtype: float64"
- ]
- },
- "execution_count": 354,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df[['day_hour']].quantile(.25)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 247,
- "metadata": {},
- "outputs": [],
- "source": [
- "df.boxplot?"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 251,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "image/png": "iVBORw0KGgoAAAANSUhEUgAAAe4AAAGTCAYAAADuq/FJAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAgAElEQVR4nO3deZglVWH38e8PBhFlRDYJoMNERQU1oowQjSIqEgUNGPdEBFxQH427Zt64YdQEY9S8L66oyCISQURRiOISBAGVfRO3wBBkR0BBiQo57x/nXKZoehmmu6f7zHw/z3OfrlvrqXPr3l/VqeqqlFKQJEl9WGuuCyBJklacwS1JUkcMbkmSOmJwS5LUEYNbkqSOGNySJHXE4Fb3khyS5H1zXY65Nlk9JNknyfdXdZlWRpLFSUqSBdOcz7Iku8xUuaT5wuDWjGk/lLcmuSXJjUmOT/KAuS7XUAuEB891OSRpZRncmmnPKqWsD2wOXAMcOMflmTWp/A5JWqX80dGsKKX8D/AlYNtRvyQbJDksyXVJLkvyjlHwJflEkmMG434gyXdaOO6c5JdJ/iHJ9e3I/m8nWnaSVyT5RZIbkhyXZIvW/+Q2ynmtVeAF40y7dpIPteVcmuS1w2bbJCcleX+SU4HfAQ9MskVbzg1tua8YzO9OzdejdRm8X5bk/yT5cWul+FySew6GPzPJuUluSnJakj8bDHt0krOT3Jzki8Ad001cNflokl8n+UmSp7aez0ty1pgR35Tkq+PM4MlJLhi8/1aSMwbvT0myZ+veIskx7fO+NMnrBuOtlWRpkv9K8qskRyXZaIJCP6fV0yOmWLm92nb1qyRvHzNshySnt3q8qtXDPdqwjyX50Jjxj0vyximWtyzJW5Kc3+r0i6PPLsmGSb7e1v3G1n3/wbQnJXlf+0xvSfK1JBsnOSLJb5KckWTxYPyHtbq+IclPkzx/srJpNVdK8eVrRl7AMmCX1n0v4FDgsMHww4CvAguBxcDPgJcNxv8ZsA/wROB64P5t2M7AbcCHgXWBJwG/BR7ahh8CvK91P6VN+5g27oHAyYMyFODBk6zDq4AfA/cHNgS+3aZZ0IafBPw38HBgAbAOcDLwcWpwbgdcBzxlbNkG6/LLMXV2IfAAYCPg1MG6PBq4FtgRWBvYu42/LnAP4DLgja0MzwX+OFzWmPXap9XhaPwXAL9uy1wXuAHYZjD+OcBzxpnPesD/AJu0+VwDXNE+0/WAW4GNqQcFZwHvamV9IHAJ8JdtPq8HftDqeV3gU8CRbdjiUZ0D+wK/mOwza9NsC9wC7NTm9+G2vqPtcXvgz9s8FwMXA29ow3YArgTWau83oe6UbbYC2/uPgC1aPV4MvKoN2xh4DnW7XggcDXxlMO1Jbb0eBGxA3eZ+BuzSyngY8Lk27r2By1tdLGjbxfXAtnP9nfc1N685L4Cv1efVfshuAW5qIXIl8Mg2bG3gD8MfG+CVwEmD9zu2ALkMeNGg/87tR/jeg35HAe9s3YewPOw+C/zLYLz1W1kWt/dTBfd3gVcO3u/CXYP7HwfDHwDcDiwc9Ptn4JCxZRusy9jgftXg/W7Af7XuTwDvHVO+n1J3XHZq9ZvBsNOYPLjHjv8jYK/Bst7fuh8O3AisO8G8TgH+mhqEJ7bP4unAk4HzB5/lf4+Z7v8Mwuhi4KmDYZu3z2kUrAV4C20nagW2vXcB/z54f++2ve0ywfhvAI4dvL8YeFrrfi1wwgpu7y8evP8X4JMTjLsdcOPg/UnA2wfvPwT8x+D9s4BzW/cLgFPGzO9TwLtn67vsa36/pnXVpjSOPUsp306yNrAH8L0k21J/iNehhvLIZcCWozellB8muQS4HzUMhm4spfx2zLRbjLP8LYCzB/O8Jcmv2nKWrUD5t6Ae3YxcPs44w35bADeUUm4eU7YlK7Cs8eY3XK+tgL2T/N1g+D3a8AJcUUopY6adzHjjj5Z1KHBkkncAewFHlVJ+P8F8vkfbAWndN1J3Jn7f3o/KvkWSmwbTrU0N/dHwY5P872D47cBmg/dvpe4k/ZKp3elzK6X8tn3uACR5CPUofAn1KHgBtUVg5FDgxcC32t//uwLLBLh60P27Vg6S3Av4CHWHZsM2fGGStUspt7f31wymvXWc9+u37q2AHcfU5QLg8BUso1YznuPWrCil3F5K+TL1x/gJ1Ka9P1J/hEYWUZtZAUjyGmoz55XA28bMcsMk9x4z7ZXjLPrK4TLaNBsPlzOFq6jNtyPjXRU/DL8rgY2SLBxTttHyfksNipE/GWd+w2UM1+ty6lHwfQeve5VSjmzl3DJJxkw7mfHGvxKglPID6hHqE4G/YfJQGAX3Tq37e9TgfhLLg/ty4NIxZV9YStltMPwZY4bfs5Qy/Jx2Bd6R5DlTrBfU+rijHltwbjwY/gngJ8DWpZT7AP8ADOvi88AeSR4FbAN8ZQWWOZk3Aw8FdmzL22lUtJWY1+XA98bU1fqllFdPs4zqlMGtWZFqD+rRxsXtKOMo4P1JFibZCngT9QdzdET0PurRzl7A25JsN2a270lyjyRPBJ5JPW841pHAvkm2S7Iu8E/AD0spy9rwa6jnWydyFPD6JFsmuS/w95OtZynlcmoT9T8nuWe7eOxlo/UCzgV2S7JRkj+hNtGO9Zok928XZ70d+GLr/2ngVUl2bPV57yS7t52E06mnD16XZJ0kf009VzuZ+w3Gfx41oE4YDD8M+Cjwx1LKZP/zfRo1lHYAflRKuYh2VEg93w+1Gf7mJH+fZL3Ui/4ekeSxbfgnqdvCVgBJNm3by9BF1CPWjyX5qynW7UvAM5M8oV109o/c+fdtIfAb4JYkDwPuFHrtqP4M6g7LMaWUW6dY3lQWUo+ab2qf67unMa+vAw9pF9+t016PTbLNNMuoThncmmlfS3IL9Ufy/cDe7Ycd4O+oR6CXAN8HvgAcnHrF9ueBD5RSziul/Jx6RHR4C1+oTZI3Uo8Qj6CeF/7J2IWXUr4NvBM4hnoU9iDghYNR9gcObVcXj3dl7qep523Pp16gdQI1IG8fZ9yRF1HPy14JHEs99/jtNuxw4DxqM/2JLA/loS+0YZcA/0XdgaGUcibwCmqY3ki9mGmfNuwP1PPM+1CvC3gB8OVJygjwQ2BrauvH+4HnllJ+NRh+OPAIlu90jKudsjgbuKiVA+qOxGWllGvbOLdTd662Ay5ty/wM9UIsqE3RxwEnJrmZeqHajuMs67w2n08necYkZboIeA21Lq+i1tewif0t1JaEm6mf8Xifw6HAI5mZJuh/o16sdz113b6xsjNqp2F2pW7HV1K/Cx+gtk5pDZQ7n/KS5p8kOwOfL6Xcf6pxZ2HZz6BecLTVlCOv3PyXAS8fBP2cSbIe9Sr2x7SdpzVKkp2oOy1bFX8YNY95xC0NtGbd3ZIsSLIltYnz2Lku1yryauCMNTS016H+i9pnDG3Ndwa3dGcB3kNtaj2H+m9C75rTEq0C7cj/9dSLqualJH/bblYy9nXR1FNPOt9tqP/CuDm1iXvUf9EEy7slyVQXAkqzxqZySZI64hG3JEkdMbglSeqIwS1JUkcMbkmSOmJwS5LUEYNbkqSOGNySJHXE4JYkqSMGtyRJHTG4JUnqiMEtSVJHDG5JkjpicEuS1BGDW5KkjhjckiR1xOCWJKkjBrckSR0xuCVJ6ojBLUlSRwxuSZI6smBVLmyTTTYpixcvXpWLlCSpS2edddb1pZRNx/ZfpcG9ePFizjzzzFW5SEmSupTksvH621QuSVJHDG5JkjpicEuS1BGDW5KkjhjckiR1xOCWJKkjBrckSR0xuCVJ6ojBLUlSRwxuSZI6YnBLktQRg1uSpI4Y3JIkdWSVPh1sVUgy7XmUUmagJHNnJuoArIcR66GyHvqvA7AeRnquh9XuiLuUMulrq7//+pTj9G6q9bMerIe7WwfWw+pRB2A9jPRcD6tdcEuStDozuCVJ6ojBLUlSRwxuSZI6YnBLktQRg1uSpI4Y3JIkdcTgliSpIwa3JEkdMbglSeqIwS1JUkcMbkmSOmJwS5LUEYNbkqSOGNySJHXE4JYkqSMGtyRJHTG4JUnqiMEtSVJHDG5JkjpicEuS1BGDW5KkjhjckiR1xOCWJKkjBrckSR0xuCVJ6ojBLUlSRwxuSZI6YnBLktQRg1uSpI4Y3JIkdcTgliSpIwa3JEkdmTK4kzwgyX8m+XGSi5K8vvXfKMm3kvy8/d1w9osrSdKabUWOuG8D3lxK2Rb4c+A1SbYFlgLfKaVsDXynvZckSbNoyuAupVxVSjm7dd8MXAxsCewBHNpGOxTYc7YKKUmSqgV3Z+Qki4FHAz8ENiulXNUGXQ1sNsE0+wH7ASxatGhlyylpAo96z4n8+tY/Tmsei5ceP63pN1hvHc57967Tmsd0WQ8amYltAaa3PczmtrDCwZ1kfeAY4A2llN8kuWNYKaUkKeNNV0o5CDgIYMmSJeOOI2nl/frWP7LsgN3ntAzTDbyZYD1oZHXfFlboqvIk61BD+4hSypdb72uSbN6Gbw5cOztFlCRJIytyVXmAzwIXl1I+PBh0HLB3694b+OrMF0+SJA2tSFP5XwB7ARckObf1+wfgAOCoJC8DLgOePztFlCRJI1MGdynl+0AmGPzUmS2OJEmajHdOkySpIwa3JEkdMbglSeqIwS1JUkcMbkmSOmJwS5LUEYNbkqSO3K2HjMy11f3G8SvKeqisB+mu5vphK34nZl9Xwb263zh+RVkPlfUg3dVcfy/8Tsw+m8olSeqIwS1JUkcMbkmSOmJwS5LUEYNbkqSOGNySJHXE4JYkqSMGtyRJHTG4JUnqiMEtSVJHDG5JkjpicEuS1BGDW5KkjhjckiR1xOCWJKkjBrckSR0xuCVJ6ojBLUlSRwxuSZI6YnBLktQRg1uSpI4Y3JIkdcTgliSpIwvmugCSpmfhNkt55KFL57gMALvPaRmkNYXBLXXu5osPYNkBcxuai5ceP6fLl9YkNpVLktQRg1uSpI4Y3JIkdcTgliSpIwa3JEkdMbglSeqIwS1JUkcMbkmSOmJwS5LUEYNbkqSOGNySJHXE4JYkqSMGtyRJHTG4JUnqiMEtSVJHDG5JkjpicEuS1BGDW5KkjhjckiR1xOCWJKkjBrckSR2ZMriTHJzk2iQXDvrtn+SKJOe2126zW0xJkgQrdsR9CPD0cfp/pJSyXXudMLPFkiRJ45kyuEspJwM3rIKySJKkKSyYxrSvTfIS4EzgzaWUG8cbKcl+wH4AixYtmsbiYOE2S3nkoUunNY/pWrgNwO5zXAbrQRrL78WoDHNbD9bBqAwwW/WwssH9CeC9QGl/PwS8dLwRSykHAQcBLFmypKzk8gC4+eIDWHbA3G4Qi5ceP6fLB+tBGo/fi2qu68E6qGazHlbqqvJSyjWllNtLKf8LfBrYYWaLJUmSxrNSwZ1k88HbZwMXTjSuJEmaOVM2lSc5EtgZ2CTJL4F3Azsn2Y7aVL4MeOUsllGSJDVTBncp5UXj9P7sLJRFkiRNwTunSZLUEYNbkqSOGNySJHXE4JYkqSMGtyRJHTG4JUnqiMEtSVJHDG5JkjpicEuS1BGDW5KkjhjckiR1xOCWJKkjBrckSR0xuCVJ6ojBLUlSRwxuSZI6YnBLktQRg1uSpI4Y3JIkdcTgliSpIwa3JEkdMbglSeqIwS1JUkcMbkmSOmJwS5LUEYNbkqSOGNySJHXE4JYkqSMGtyRJHTG4JUnqiMEtSVJHDG5JkjpicEuS1BGDW5KkjhjckiR1xOCWJKkjBrckSR0xuCVJ6ojBLUlSRwxuSZI6YnBLktQRg1uSpI4Y3JIkdcTgliSpIwa3JEkdMbglSeqIwS1JUkcMbkmSOmJwS5LUEYNbkqSOGNySJHXE4JYkqSMGtyRJHTG4JUnqiMEtSVJHpgzuJAcnuTbJhYN+GyX5VpKft78bzm4xJUkSrNgR9yHA08f0Wwp8p5SyNfCd9l6SJM2yKYO7lHIycMOY3nsAh7buQ4E9Z7hckiRpHAtWcrrNSilXte6rgc0mGjHJfsB+AIsWLVrJxUl3tXCbpTzy0Llt7Fm4DcDuc1oGSWuWlQ3uO5RSSpIyyfCDgIMAlixZMuF40t1188UHsOyAuQ3NxUuPn9PlS1rzrOxV5dck2Ryg/b125ookSZImsrLBfRywd+veG/jqzBRHkiRNZkX+HexI4HTgoUl+meRlwAHA05L8HNilvZckSbNsynPcpZQXTTDoqTNcFkmSNAXvnCZJUkcMbkmSOmJwS5LUEYNbkqSOGNySJHXE4JYkqSMGtyRJHTG4JUnqiMEtSVJHDG5JkjpicEuS1BGDW5KkjhjckiR1xOCWJKkjBrckSR0xuCVJ6ojBLUlSRwxuSZI6YnBLktQRg1uSpI4Y3JIkdcTgliSpIwa3JEkdMbglSeqIwS1JUkcMbkmSOmJwS5LUEYNbkqSOGNySJHXE4JYkqSMGtyRJHTG4JUnqyIK5LoCk6Vu89Pg5Xf4G660zp8sfsR60JjC4pc4tO2D3aU2/eOnx057HfGA9aE1hU7kkSR0xuCVJ6ojBLUlSRwxuSZI6YnBLktQRg1uSpI4Y3JIkdcTgliSpIwa3JEkdMbglSeqIwS1JUke6u1e5DxGQpMnN5e/kfPmNXJ2zoqvgnokHAPggAUmrMx+2svpnhU3lkiR1xOCWJKkjBrckSR0xuCVJ6ojBLUlSRwxuSZI6YnBLktQRg1uSpI5M6wYsSZYBNwO3A7eVUpbMRKEkSdL4ZuLOaU8upVw/A/ORJElTsKlckqSOTPeIuwAnJinAp0opB40dIcl+wH4AixYtmubiNLI630BfkjSx6Qb3E0opVyS5H/CtJD8ppZw8HKGF+UEAS5YsKdNcnlj9b6AvSZrYtJrKSylXtL/XAscCO8xEoSRJ0vhWOriT3DvJwlE3sCtw4UwVTJIk3dV0mso3A45NMprPF0op35iRUkmSpHGtdHCXUi4BHjWDZZEkSVPw38EkSeqIwS1JUkcMbkmSOmJwS5LUEYNbkqSOGNySJHXE4JYkqSMGtyRJHTG4JUnqiMEtSVJHDG5JkjpicEuS1BGDW5KkjhjckiR1xOCWJKkjBrckSR0xuCVJ6ojBLUlSRwxuSZI6YnBLktQRg1uSpI4Y3JIkdcTgliSpIwa3JEkdMbglSeqIwS1JUkcMbkmSOmJwS5LUEYNbkqSOGNySJHXE4JYkqSMGtyRJHVkw1wWQpmPx0uPndPkbrLfOnC5f0prH4Fa3lh2w+7TnsXjp8TMyH0laVWwqlySpIwa3JEkdMbglSeqIwS1JUkcMbkmSOmJwS5LUEYNbkqSOGNySJHXE4JYkqSMGtyRJHTG4JUnqiMEtSVJHDG5JkjpicEuS1BGDW5KkjhjckiR1xOCWJKkjBrckSR0xuCVJ6ojBLUlSRwxuSZI6Mq3gTvL0JD9N8oskS2eqUJIkaXwrHdxJ1gY+BjwD2BZ4UZJtZ6pgkiTprqZzxL0D8ItSyiWllD8A/w7sMTPFkiRJ40kpZeUmTJ4LPL2U8vL2fi9gx1LKa8eMtx+wH8CiRYu2v+yyy6ZX4qnLNe15rGydzBczUQdgPYxYD5X10H8dgPUw0kM9JDmrlLJkbP8Fs7pUoJRyEHAQwJIlS2b9014dNqjpsg4q66GyHirrobIeqp7rYTpN5VcADxi8v3/rJ0mSZsl0gvsMYOskf5rkHsALgeNmpliSJGk8K91UXkq5LclrgW8CawMHl1IumrGSSZKku5jWOe5SygnACTNUFkmSNAXvnCZJUkcMbkmSOmJwS5LUEYNbkqSOGNySJHXE4JYkqSMGtyRJHTG4JUnqiMEtSVJHVvqxniu1sOQ6YHaf6zm1TYDr57gM84H1UFkP1sGI9VBZD9V8qIetSimbju25SoN7Pkhy5njPN13TWA+V9WAdjFgPlfVQzed6sKlckqSOGNySJHVkTQzug+a6APOE9VBZD9bBiPVQWQ/VvK2HNe4ctyRJPVsTj7glSeqWwS1JUke6DO4ktyc5d/BaPM44WyT50gTTn5RkXl7mP5kkJcnnB+8XJLkuyddnaP77J3nLTMxrNiR5e5KLkpzfPvcdJxl3nyRbzMAy5/W2cnfq5G7Mc95uB0k2Hnzvr05yReu+KcmPV8Hy90ny0dlezoqapD7OTXKPWVje95NsN9PzXYHlfiTJGwbvv5nkM4P3H0ryphWc16xu36tiG1kwmzOfRbeWUibceJIsKKVcCTx3FZZpVfgt8Igk65VSbgWeBlwxx2VaJZI8Dngm8JhSyu+TbAJM9sO0D3AhcOXdWMaCUspt0yroKrQSddK9UsqvgO2g/gADt5RS/rXtvK/0Dmxvn/3IRPUxp4WaHacCzwf+Lcla1Juj3Gcw/PHAG+eiYHOhyyPu8bS9nOOSfBf4TpLFSS5sw9ZL8u9JLk5yLLDeYLpPJDmzHbW8p/V7SpKvDMZ5WptuPjgB2L11vwg4cjQgyUZJvtKOvn6Q5M9a//2THNyOHi9J8rrBNG9P8rMk3wceOuj/iiRnJDkvyTFJ7pVkYZJLk6zTxrnP8P0s2xy4vpTye4BSyvWllCuTvKuV88IkB6V6LrAEOKIdeayXZFkLNpIsSXLSoG4OT3IqcHhn28pEdTLZuva+HUxm7SSfbp/PiUnWa+W7o9UkySZJlrXusb8Zmyc5uW0zFyZ5Yhtv31Y3PwL+YrSwJM9K8sMk5yT5dpLNkqyV5OdJNm3jrJXkF6P3q0qSByc5d/B+aZJ3tO6tU49Yz2rr+5DW/4Vtvc9L8p+t372SHN2+D8cA9xzM86DB9+Fdrd+uGbR0JnlGkqNnYJVOAx7Xuh9O3Sm/OcmGSdYFtgHOTvLWtr2eP/qOtnJMtH2flOQDSX7Uho8+87WTfHAwr1e2/vNjGymldPcCbgfOba9jW799gF8CG7X3i4ELW/ebgINb958BtwFL2vvR+GsDJ7XhAX4CbNqGfQF41jxY71ta+b5E/QKdC+wMfL0NPxB4d+t+CnBu696fuuGvS91T/RWwDrA9cAFwL+re6y+At7RpNh4s933A37XuzwF7tu79gA+tonVfv63vz4CPA08afn6t+/DR59Q+yyWDYcuATVr3EuCkQd2cBazX27YySZ1Mtq5dbwdj1n//QTkXt89qu/b+KODFY7eFtt7LWvc+3Pk3483A2wef8ULqztF/A5tSWzNOBT7axtmQ5f+Z8/JRHQDvBt7QuncFjpmD+ngw7fvf3i8F3tG6/xN4UOv+C+DE1n0xsFnrvm/7+zbgoNb9aOpv76iOR/W2ADgF2JZ6MPiz0XbTPodnzND6XQosAl4JvAp4L7BbW4dTWl0fRP1OrkVtgdlpiu37pMHnthvw7cE2PaqvdYEzgT+dL9tIr0fct5ZStmuvZw/6f6uUcsM44+8EfB6glHI+cP5g2POTnA2cQ92T27bU2jwceHGS+1L39P5jNlbk7mrlX0w92j5hzOAnUMtNKeW7wMZJRs1Jx5dSfl9KuR64FtgMeCJ1x+d3pZTfAMcN5vWIJKckuQD4W2rdAHwG2Ld170v9AZ91pZRbqF/A/YDrgC8m2Qd4ctujvYC6s/LwiecyoeNKPfUAHW0rk9TJZLreDqZwaSlldJR5FvV7MpXhb8YZwL6pTc6PLKXcDOxI3fG5rpTyB+CLg2nvD3yz1c1bWV43BwMvad0vZX7UDQBtG/1z4Jh2RP4xYHQtyKnAYUlezvLW2OH34RzgosHsXtS+D2dTj3i3LaX8L3AE8DdJNqJunyfOUPFPozaJPx44vb1G70+lBuCu1O/n2cDDgK2ZfPsG+HL7O9xmdgVe0uroh8DGbV7zYhvp9Rz3RH57d0ZO8qfAW4DHllJuTHIIy5uCPgd8Dfgf4Ogyv85/HQf8K/Voe+MVnOb3g+7bmfqzP4R6RHVeC4OdAUopp6aehtgZWLuUcuEKl3qaSim3U/eQT2pfhFdSj3qXlFIub1+me04w+W0s/zEaO86U28183VbGqZO9mXxdu98OJjF23UanOVbosy+lnJxkJ+qpqEOSfBj4zSTLOxD4cCnluFYP+7f5XJ7kmiRPAXag7vCsasN1hrret1GPRq8v418j9ApqCD2T2uz86IlmnmRr4PXADqWUm1Ivmh3V7cHAMa37i20bnQmnUkP6kdSm8supR8C/oX4HnwT8cynlU2PK+gYmN9puht+HUFuXvjl25PmwjfR6xH13nQz8DUCSR1B/7KE2m/wW+HWSzYBnjCYo9eK2K4F3MI/2mJuDgfeUUi4Y0/8U2gbQNpLr2x7mRE4G9kw9r7sQeNZg2ELgqtTzlmM3qsOoTcKrrF6SPLT9WIxsB/y0dV+fZH3ufDHizdR1GFlG3fsHeM4ki+pmW5mgTi5jxdd1pJvtYCUtY3l9THjBapKtgGtKKZ+mtig8hnq09aTUq7fXAZ43mGQDll8cuveY2X2GeqR69AwG191xNbBFOwd8T9p1MaWUG6mf57PhjvOrj2rTPLCU8gPgncCNwJbc+fvwKJYfMd6H+h37TZLNgb8cLbiUcjn1qVpLqTt+M+U06k7FDaWU21tLyaiV6zTgm8BL228BSbZMcj8m374n8k3g1Vl+HcdDktx7vmwjq9sR90Q+AXwuycXU8zhnAbSjiHOo5ygvp+7RDR1BPXd58aos7FRKKb8E/t84g/YHDk5yPvA77rqhjJ3P2Um+CJxHbTY9YzD4ndQN8rr2dxiCR1DPdx7JqrM+cGBr6ruNep5qP+Am6t731dy5/IcAn0xyK/WL/R7gs0neSz1CnUhP28pEdbINK7auQHfbwcr4V+CoJPsBx08y3s7AW5P8kXo9yUtKKVe1lpzTqdvauYPx9weOTnIj8F3qOdCR46g7NHOyU1NK+Z8k/0Q9N3sFMPxXuRcCn2jrdQ9qeJwHfKS1LIV63vvCJJcAh7bvw0XUZmioTdE/pn4fLuOu34cvAPcppfxsBlfrAuo1Cl8Y02/9durnxCTbAKcngfoZvniK7Xsin6E2m5+dOrPrgD2ZJ9uItzydROr/4p1TSvnsXJdlPkm9anuPUspec12W+WJN3FbcDiaWehX7R0opT5zrssyFJJ8ETi+lHDrXZZmvprONrClH3HdbkrOoTaNvnhWNu9UAAAPMSURBVOuyzCdJDqQ2E+8212WZL9bEbcXtYGJJlgKvZm7Obc+5dkHXjcDrphp3TTXdbcQjbkmSOrKmXJwmSdJqweCWJKkjBrckSR0xuKVVIMufaHdR6r2g35z6sITZXOYH2/I+OKb/zkkeP3h/SLtCfF6Yb+WR5huvKpdWjTueaNduCvEF6k0s3j2Ly9yPej/psTd32Jn6P6inzeKyJc0Sj7ilVayUci01VF+banHq/cDPbq/HAyQ5LMmeo+mSHJFkj+G82vQfTH1S0QVJXtD6H0e9QctZo36t/2LqAxre2FoARv9DulOS01KfGvbcwfjjPm1pMPx5qbd9JMnr2w07SPLA1CeukWT7JN9LfRrVN9udtkjyoCTfaP1PSfKwceb/3nYEvvbdrmhpNWVwS3OglHIJ9elC96PezelppZTHAC9g+V3xPkt9ghVJNqDep3nsnb/+mnqr00cBuwAfTLJ5KeWvWP4wnjseelBKWQZ8knrjh+1KKae0QZtTH1LzTOCAtsxdqQ9W2KEtY/vU+zQPnUJ9iAPt76+SbNm6T263gDwQeG4pZXvq7Xrf38Y/iHo/6O2p94H/+HDGrYl/U2DfObptqDQv2VQuzb11gI8m2Y76oIOHAJRSvpfk46nP6X0O9dF/Yx9g8gTgyBZs1yT5HvBY7voEpKl8pT3Z6cep92KHOz9tCeoR/NbUez/Tynh1kvVT7wH9AOopgJ2owf1l6rOPHwF8q92Gcm3qvbLXp+6IHN36Q3184sg7gR+WUva7m+shrfYMbmkOJHkgNaSvpZ7nvoZ61LwW9SljI4cBL6beX3pfZs/wyVoZ/L3L05bGcRq1bD+lHoG/lHp/+DdTn598USnlccMJUh83e9MET6mCej/p7ZNsNMGjeqU1lk3l0irWjqA/CXy0Pc97A+CqdsS7F/WodOQQ4A0ApZQfc1enAC9Isnab707Aj6Yowtgnp01koqctjVeGt1CPxM8Bngz8vpTya2qYb5rkcW0e6yR5eHtq3aVJntf6J8ufUgXwDWqT/fHtaF5SY3BLq8Z6o38HA74NnEh9YhnUc7t7JzkPeBh3fkb0NdSnlE30BKFjgfOpTz76LvC2UsrVU5Tla8Czx1ycdhellBOpTd+npz7r+0uMH/inUJvJT25N9pcD32/z+AP1UZofaOt3LrWJHOp9ml/W+l8E3OnCu1LK0cCngeOSrIckwHuVS/NakntRH134mHYEK2kN5xG3NE8l2YV6tH2goS1pxCNuSZI64hG3JEkdMbglSeqIwS1JUkcMbkmSOmJwS5LUkf8PcpANSdCEqd0AAAAASUVORK5CYII=",
- "text/plain": [
- ""
- ]
- },
- "metadata": {
- "needs_background": "light"
- },
- "output_type": "display_data"
- }
- ],
- "source": [
- "%matplotlib inline\n",
- "\n",
- "df[['day_hour', 'week_day_name']].boxplot(\n",
- " by='week_day_name',\n",
- " grid=False,\n",
- " figsize=(8,6),\n",
- " fontsize=10\n",
- ")\n",
- "\n",
- "# give a title to the plot\n",
- "plt.title('')\n",
- "\n",
- "# give a label to the axes\n",
- "plt.xlabel(\"Day of the week\")\n",
- "plt.show()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 252,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "image/png": "iVBORw0KGgoAAAANSUhEUgAAAe4AAAGTCAYAAADuq/FJAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAdxUlEQVR4nO3deZSkdX3v8ffHmRFxQJSAHbbQMSFG1Cs6E4wbmYnoxeUG4xL1Rg4Yk4k5atBodDQmxiQk5HpikusaFWSICi5IJMJB0EwLLlEWQUE0Kg5hE1TAMFxX/N4/nqdN2dMzPXRVT/Wv+/06p07Xs3/r2131qeepp+tJVSFJktpwt3EXIEmSdp7BLUlSQwxuSZIaYnBLktQQg1uSpIYY3JIkNcTglmZIckqSvxp3HeO2oz4kOS7JJ3Z1TfORZDJJJVk55Hq2JDlyVHVJ82Vwa9HqXyi/m2RrkluTnJ3koHHXNagPhF8cdx2Slg+DW4vd/6qqPYD9gJuAN4y5ngWTjs9JSTvki4SaUFXfAz4AHDo9LsleSU5N8s0k1yR59XTwJXlLkjMG5v3bJB/rw3FdkuuSvCrJt/o9+9/e3raT/F6Srya5JclZSfbvx1/Qz3J5f1TgmbMsuyLJ3/Xb+XqSFw4etk0yleSEJJ8E/h9wvyT799u5pd/u7w2s76cOX08/loHhLUlemeSL/VGKdya5x8D0Jye5LMltST6V5H8MTHtokkuT3J7kvcBPltt+a/LGJN9J8qUkj+1HPiPJJTNm/KMkH5plBeuTfGFg+PwkFw0MX5jkKf39/ZOc0f++v57kDwfmu1uSjUm+luTbSd6XZO/tFP20vk8PmuPBHdP/XX07yZ/MmHZ4kk/3fbyx78Pd+2lvSvJ3M+Y/K8lLdrQ9aadVlTdvi/IGbAGO7O/fE9gEnDow/VTgQ8CewCTwH8DzBub/D+A44DHAt4AD+2nrgB8Brwd2A34NuAO4fz/9FOCv+vu/3i/7sH7eNwAXDNRQwC/u4DE8H/gicCBwH+Cj/TIr++lTwH8CDwRWAquAC4A30wXnYcA3gV+fWdvAY7luRs+uAA4C9gY+OfBYHgrcDDwcWAEc28+/G3B34BrgJX0NTwd+OLitGY/ruL6H0/M/E/hOv83dgFuABwzM/zngabOsZ3fge8A+/XpuAq7vf6e7A98FfoZuJ+MS4M/6Wu8HXA38z349xwP/3vd5N+CfgNP6aZPTPQeeC3x1R7+zfplDga3AEf36Xt8/3um/xzXAr/brnASuAl7cTzscuAG4Wz+8D92bsolxP6e8LY3b2Avw5m17tz5UtgK39SFyA/DgftoK4AfAoQPz/z4wNTD88D5ArgGePTB+Xf8ivHpg3PuAP+3vnzIQdicB/2dgvj36Wib74bmC+9+A3x8YPpJtg/svBqYfBNwJ7Dkw7m+AU2bWNvBYZgb38weGnwh8rb//FuAvZ9T3Zbo3Lkf0/c3AtE+x4+CeOf9ngWMGtnVCf/+BwK3AbttZ14XAU/sgPK//XRwFrAc+P/C7/M8Zy70SeGd//yrgsQPT9ut/T9PBWsDL6N9E7cTf3p8Bpw8Mr+7/3o7czvwvBs4cGL4KeFx//4XAOeN+PnlbOrehzrKUdoGnVNVHk6wAjgY+nuRQuhfiVXShPO0a4IDpgar6TJKrgfvShcGgW6vqjhnL7j/L9vcHLh1Y59Yk3+63s2Un6t8fuHZg+NpZ5hkctz9wS1XdPqO2tTuxrdnWN/i4DgaOTfKigel376cXcH1V1Yxld2S2+ae3tQk4LcmrgWOA91XV97ezno/TvwHp799K92bi+/3wdO37J7ltYLkVdKE/Pf3MJD8emH4nMDEw/Md0b5KuY24/9Xurqjv63zsASX6Jbi98Ld3RnZV0RwSmbQKeA5zf//zHndimtFP8jFtNqKo7q+qDdC/Gj6Y7fP1DuhfsaT9Hd5gVgCQvoDvMeQPw8hmrvE+S1TOWvWGWTd8wuI1+mZ8Z3M4cbqQ7fDtttrPiB8PvBmDvJHvOqG16e3fQBcW0n51lfYPbGHxc19LtBd974HbPqjqtr/OAJJmx7I7MNv8NAFX173R7qI8B/jfwzztYz3RwH9Hf/zhdcP8a/x3c1wJfn1H7nlX1xIHpT5gx/R5VNfh7ejzw6iRPm+NxQdePn/QxyT3pfu/T3gJ8CTikqu4FvAoY7MW7gKOTPAR4APAvO7FNaacY3GpCf1LZ0XSfE19VVXfS7UWfkGTPJAcDf0T3gjm9R/RXdHs7xwAvT3LYjNW+NsndkzwGeDLw/lk2fRrw3CSHJdkN+GvgM1W1pZ9+E93nrdvzPuD4JAckuTfwih09zqq6lu4Q9d8kuUd/8tjzph8XcBnwxCR7J/lZukO0M70gyYH9yVl/Ary3H/924PlJHt73c3WSJ/VvEj5N9/HBHyZZleSpdJ/V7sh9B+Z/Bl1AnTMw/VTgjcAPq2pH//P9KeD+/fY+W1VX0r1Zejjd5/3QHYa/Pckrkuye7qS/ByX5lX76W+n+Fg4GSLJv//cy6Eq6Q/BvSvIbczy2DwBPTvLo/qSzv+CnXy/3BP4L2Jrkl4E/GFy436u/iO4NyxlV9d05tiftNINbi92/JtlK9yJ5AnBs/8IO8CK6PdCrgU8A7wFOTnfG9ruAv62qy6vqK3R7RP/chy/AN+gOyd4AvJvuc+Evzdx4VX0U+FPgDLq9sF8AnjUwy58Dm/qzi39rlvrfTve57efpTtA6hy4g79zBY3423eeyNwBnAq/p64AuCC6nO0x/Hv8dyoPe00+7Gvga3RsYqupi4PfowvRWupO0juun/YDuc+bj6M4LeCbwwR3UCPAZ4BC6ox8nAE+vqm8PTP9n4EH895uOWfUfWVwKXNnXAd0biWuq6uZ+njvp3lwdBny93+Y7gL36+f8ROAs4L8ntdCeqPXyWbV3er+ftSZ6wg5quBF5A18sb6fo1eIj9ZXRHEm6n+x3P9nvYBDyYHR9tkO6y/PRHVNLSl2Qd8K6qOnCueRdg208A3lpVB8858/zWvwX43YGgH5sku9Odxf6w/s3TspLkCLo3LQeXL7QaIfe4pQXUH9Z9YpKVSQ4AXkO3F70c/AFw0TIN7VV0/6L2DkNbo2ZwSwsrwGvpDrV+ju7fhP5srBXtAv2e//HAS8dcynYl+e10X5wz83bl3EvvcL0PoPsXxv2AfxhJsdIAD5VLktQQ97glSWqIwS1JUkMMbkmSGmJwS5LUEINbkqSGGNySJDXE4JYkqSEGtyRJDTG4JUlqiMEtSVJDDG5JkhpicEuS1BCDW5KkhhjckiQ1xOCWJKkhBrckSQ0xuCVJaojBLUlSQwxuSZIaYnBLktSQlbtyY/vss09NTk7uyk1u44477mD16tVjrWExsA8d+2APptmHjn3oLIY+XHLJJd+qqn1njt+lwT05OcnFF1+8Kze5jampKdatWzfWGhYD+9CxD/Zgmn3o2IfOYuhDkmtmG++hckmSGmJwS5LUEINbkqSGGNySJDXE4JYkqSEGtyRJDTG4JUlqiMEtSVJDDG5JkhpicEuS1BCDW5KkhhjckiQ1xOCWJKkhu/TqYLtCkqHXUVUjqGR8RtEDsA/T7EPHPrTfA7AP01ruw5Lb466qHd4OfsWH55yndXM9PvtgH+5qD+zD0ugB2IdpLfdhyQW3JElLmcEtSVJDDG5JkhpicEuS1BCDW5KkhhjckiQ1xOCWJKkhBrckSQ0xuCVJaojBLUlSQwxuSZIaYnBLktQQg1uSpIYY3JIkNcTgliSpIQa3JEkNMbglSWqIwS1JUkMMbkmSGmJwS5LUEINbkqSGGNySJDXE4JYkqSEGtyRJDTG4JUlqiMEtSVJDDG5JkhpicEuS1BCDW5KkhhjckiQ1xOCWJKkhBrckSQ0xuCVJasicwZ3koCSbk3wxyZVJju/H753k/CRf6X/eZ+HLlSRpeduZPe4fAS+tqkOBXwVekORQYCPwsao6BPhYPyxJkhbQnMFdVTdW1aX9/duBq4ADgKOBTf1sm4CnLFSRkiSps/KuzJxkEngo8Blgoqpu7Cd9A5jYzjIbgA0AExMTTE1NzbPU0VkMNSwGrffhBR+7gzt+OPx6JjeePe9lV6+CNz129fBFDGEUfRimB7A4+jAKrT8nRqX1Piz514aq2qkbsAdwCfDUfvi2GdNvnWsda9asqXE7+BUfHncJi8JS6MMoHsPmzZvHXsOwhq1h2B6MoobFYCk8hlFYCn1YKq8NwMU1S5bu1FnlSVYBZwDvrqoP9qNvSrJfP30/4OaRvqOQJEnb2JmzygOcBFxVVa8fmHQWcGx//1jgQ6MvT5IkDdqZz7gfBRwDfCHJZf24VwEnAu9L8jzgGuC3FqZESZI0bc7grqpPANnO5MeOthxJkrQjfnOaJEkNMbglSWqIwS1JUkMMbkmSGmJwS5LUEINbkqSGGNySJDXkLl1kZNwe8trz+M53h//m+GG+OH6v3Vdx+WseP3QNw7AP0rZG8bwY9mIri+F5Me4+LIYeLHVNBfd3vvtDtpz4pKHWMTU1xbp16+a9/LBP7FGwD9K2hn1eDPucgMXxvBh3HxZDD5Y6D5VLktQQg1uSpIYY3JIkNcTgliSpIQa3JEkNMbglSWqIwS1JUkMMbkmSGmJwS5LUEINbkqSGGNySJDXE4JYkqSEGtyRJDTG4JUlqiMEtSVJDDG5JkhpicEuS1BCDW5KkhhjckiQ1xOCWJKkhBrckSQ0xuCVJaojBLUlSQ1aOuwBJkkZpzwds5MGbNg6/ok3D1ADwpOFrmIXBLUlaUm6/6kS2nDhcaE5NTbFu3bp5Lz+58eyhtr8jHiqXJKkhBrckSQ0xuCVJaojBLUlSQwxuSZIaYnBLktQQg1uSpIYY3JIkNcTgliSpIQa3JEkNMbglSWqIwS1JUkMMbkmSGmJwS5LUEINbkqSGGNySJDXE4JYkqSEGtyRJDTG4JUlqiMEtSVJDDG5JkhoyZ3AnOTnJzUmuGBj350muT3JZf3viwpYpSZJg5/a4TwGOmmX831fVYf3tnNGWJUmSZjNncFfVBcAtu6AWSZI0h1TV3DMlk8CHq+pB/fCfA8cB/wVcDLy0qm7dzrIbgA0AExMTa04//fR5F/uia14072VH6Q0Hv2Gs27cPHfvQsQ8d+9A57tw7OOWo1fNefuvWreyxxx5j2/4ojKKGxdCH9evXX1JVa7eZUFVz3oBJ4IqB4QlgBd0e+wnAyTuznjVr1tQwDn7Fh4davqpq8+bNY69hWPZhdDXYh+F7MIoaRsE+jKYGnxOdxdAH4OKaJUvndVZ5Vd1UVXdW1Y+BtwOHz2c9kiTprplXcCfZb2DwN4ErtjevJEkanZVzzZDkNGAdsE+S64DXAOuSHAYUsAX4/QWsUZIk9eYM7qp69iyjT1qAWiRJ0hz85jRJkhpicEuS1BCDW5KkhhjckiQ1xOCWJKkhBrckSQ0xuCVJaojBLUlSQwxuSZIaYnBLktQQg1uSpIYY3JIkNcTgliSpIQa3JEkNMbglSWqIwS1JUkMMbkmSGmJwS5LUEINbkqSGGNySJDXE4JYkqSEGtyRJDTG4JUlqiMEtSVJDDG5JkhpicEuS1BCDW5KkhhjckiQ1xOCWJKkhBrckSQ0xuCVJaojBLUlSQwxuSZIaYnBLktQQg1uSpIYY3JIkNcTgliSpIQa3JEkNMbglSWqIwS1JUkMMbkmSGmJwS5LUEINbkqSGGNySJDXE4JYkqSEGtyRJDTG4JUlqiMEtSVJDDG5JkhpicEuS1BCDW5KkhhjckiQ1xOCWJKkhBrckSQ0xuCVJasicwZ3k5CQ3J7liYNzeSc5P8pX+530WtkxJkgQ7t8d9CnDUjHEbgY9V1SHAx/phSZK0wOYM7qq6ALhlxuijgU39/U3AU0ZclyRJmsXKeS43UVU39ve/AUxsb8YkG4ANABMTE0xNTc1zk51hl9+6devYaxgF+zCaGuzDaHowbA2jYh9gzwds5MGbhjwIumnuWba/fZiaWj3c9kdgSb82VNWcN2ASuGJg+LYZ02/dmfWsWbOmhnHwKz481PJVVZs3bx57DcOyD6OrwT4M34NR1DAK9mE0Nfic6CyGPgAX1yxZOt+zym9Ksh9A//Pmod9BSJKkOc03uM8Cju3vHwt8aDTlSJKkHdmZfwc7Dfg0cP8k1yV5HnAi8LgkXwGO7IclSdICm/PktKp69nYmPXbEtUiSpDn4zWmSJDXE4JYkqSEGtyRJDTG4JUlqiMEtSVJDDG5JkhpicEuS1BCDW5KkhhjckiQ1xOCWJKkhBrckSQ0xuCVJaojBLUlSQwxuSZIaYnBLktQQg1uSpIYY3JIkNcTgliSpIQa3JEkNMbglSWqIwS1JUkMMbkmSGmJwS5LUEINbkqSGGNySJDXE4JYkqSEGtyRJDTG4JUlqiMEtSVJDDG5JkhpicEuS1BCDW5KkhqwcdwGShje58ezhVnDucMvvtfuq4bYvjdjQzwkY6nmxkM8Jg1tq3JYTnzTU8pMbzx56HdJiMoq/58X8vPBQuSRJDTG4JUlqiMEtSVJDDG5JkhpicEuS1BCDW5KkhhjckiQ1xOCWJKkhBrckSQ0xuCVJaojBLUlSQ/yu8kYt5S/QvyvsgwZ5sRUtBwZ3g5b6F+jvLPugQV5sRcuFh8olSWqIwS1JUkMMbkmSGmJwS5LUEINbkqSGGNySJDXE4JYkqSEGtyRJDRnqC1iSbAFuB+4EflRVa0dRlCRJmt0ovjltfVV9awTrkSRJc/BQuSRJDRl2j7uA85IU8E9V9baZMyTZAGwAmJiYYGpqaqgNjvuiEqtXMfRjWCyWyuMYln2wB9OWSh+GeRxbt24dug/2cWENG9yPrqrrk9wXOD/Jl6rqgsEZ+jB/G8DatWtr3bp1897Ylvkv+hNeSKB37tkM87tYMuyDPZi2VPow5OOYmpoarg/2ccENdai8qq7vf94MnAkcPoqiJEnS7OYd3ElWJ9lz+j7weOCKURUmSZK2Ncyh8gngzCTT63lPVZ07kqokSdKs5h3cVXU18JAR1iJJkubgv4NJktQQg1uSpIYY3JIkNcTgliSpIQa3JEkNMbglSWqIwS1JUkMMbkmSGmJwS5LUEINbkqSGGNySJDXE4JYkqSEGtyRJDTG4JUlqiMEtSVJDDG5JkhpicEuS1BCDW5KkhhjckiQ1xOCWJKkhBrckSQ0xuCVJaojBLUlSQwxuSZIaYnBLktQQg1uSpIYY3JIkNcTgliSpIQa3JEkNMbglSWqIwS1JUkMMbkmSGrJy3AVIkkZrcuPZw63g3Pkvv9fuq4bbtuZkcEvSErLlxCcNtfzkxrOHXocWlofKJUlqiMEtSVJDDG5JkhpicEuS1BCDW5KkhhjckiQ1xOCWJKkhBrckSQ0xuCVJaojBLUlSQwxuSZIaYnBLktQQg1uSpIYY3JIkNcTgliSpIQa3JEkNMbglSWqIwS1JUkMMbkmSGmJwS5LUEINbkqSGDBXcSY5K8uUkX02ycVRFSZKk2c07uJOsAN4EPAE4FHh2kkNHVZgkSdrWMHvchwNfraqrq+oHwOnA0aMpS5IkzSZVNb8Fk6cDR1XV7/bDxwAPr6oXzphvA7ABYGJiYs3pp58+XMVzWL9+/dDr2Lx58wgqGZ9R9ADswzT70LEP7fcA7MO0Fvqwfv36S6pq7czxKxd0q0BVvQ14G8DatWtr3bp1C729HU6fmppioWsYt515M2YfOvZhefQA7MM0+9BpuQ/DHCq/HjhoYPjAfpwkSVogwwT3RcAhSX4+yd2BZwFnjaYsSZI0m3kfKq+qHyV5IfARYAVwclVdObLKJEnSNob6jLuqzgHOGVEtkiRpDn5zmiRJDTG4JUlqiMEtSVJDDG5JkhpicEuS1BCDW5KkhhjckiQ1xOCWJKkhBrckSQ2Z92U957Wx5JvANbtsg7PbB/jWmGtYDOxDxz7Yg2n2oWMfOouhDwdX1b4zR+7S4F4Mklw82/VNlxv70LEP9mCafejYh85i7oOHyiVJaojBLUlSQ5ZjcL9t3AUsEvahYx/swTT70LEPnUXbh2X3GbckSS1bjnvckiQ1y+CWJKkhyya4kxyV5MtJvppk47jrGZckJye5OckV465lXJIclGRzki8muTLJ8eOuaRyS3CPJZ5Nc3vfhteOuaVySrEjyuSQfHnct45RkS5IvJLksycXjrmccktw7yQeSfCnJVUkeMe6aZloWn3EnWQH8B/A44DrgIuDZVfXFsRY2BkmOALYCp1bVg8Zdzzgk2Q/Yr6ouTbIncAnwlOX295AkwOqq2ppkFfAJ4Piq+vcxl7bLJfkjYC1wr6p68rjrGZckW4C1VTXuLx4ZmySbgAur6h1J7g7cs6puG3ddg5bLHvfhwFer6uqq+gFwOnD0mGsai6q6ALhl3HWMU1XdWFWX9vdvB64CDhhvVbtedbb2g6v629J/Jz9DkgOBJwHvGHctGq8kewFHACcBVNUPFltow/IJ7gOAaweGr2MZvlBrW0kmgYcCnxlvJePRHyK+DLgZOL+qlmMf/gF4OfDjcReyCBRwXpJLkmwYdzFj8PPAN4F39h+dvCPJ6nEXNdNyCW5pG0n2AM4AXlxV/zXuesahqu6sqsOAA4HDkyyrj0+SPBm4uaouGXcti8Sjq+phwBOAF/QfrS0nK4GHAW+pqocCdwCL7pyo5RLc1wMHDQwf2I/TMtV/pnsG8O6q+uC46xm3/nDgZuCocdeyiz0K+I3+s93TgV9P8q7xljQ+VXV9//Nm4Ey6jxmXk+uA6waOPH2ALsgXleUS3BcBhyT5+f5kg2cBZ425Jo1Jf1LWScBVVfX6cdczLkn2TXLv/v7udCdvfmm8Ve1aVfXKqjqwqibpXhf+raqeM+ayxiLJ6v5kTfrDw48HltV/n1TVN4Brk9y/H/VYYNGdtLpy3AXsClX1oyQvBD4CrABOrqorx1zWWCQ5DVgH7JPkOuA1VXXSeKva5R4FHAN8of98F+BVVXXOGGsah/2ATf1/XdwNeF9VLet/h1rmJoAzu/e1rATeU1XnjreksXgR8O5+J+9q4Lljrmcby+LfwSRJWiqWy6FySZKWBINbkqSGGNySJDXE4JYkqSEGtyRJDTG4pV0gyZ39FZeu7K/G9dIkC/r8S/K6fnuvmzF+XZJHDgyfkuTpC1nLXbHY6pEWm2Xxf9zSIvDd/qtFSXJf4D3AvYDXLOA2NwB7V9WdM8avo7tC3KcWcNuSFoh73NIu1n+d5AbghelMJrkwyaX97ZEASU5N8pTp5ZK8O8lPXdWuX/51Sa7or6P8zH78WcAewCXT4/rxk8DzgZf0RwAe0086Ismnklw9uLeb5I+TXJTk87NdrzvJM5K8vr9/fJKr+/v3S/LJ/v6aJB/vL1zxkf6yqiT5hSTn9uMvTPLLs6z/L/s98BV3udHSEmVwS2NQVVfTfYvffemuzPW4/uIOzwT+bz/bScBx8JPLDT4SOHvGqp4KHAY8BDgSeF2S/arqN+j38qvqvQPb3QK8Ffj7ftqF/aT9gEcDTwZO7Lf5eOAQuu+rPgxYM8tFJy4EpsP/McC3kxzQ37+g/074NwBPr6o1wMnACf38bwNe1I9/GfDmwRX3h/j3BZ47y1EDadnyULk0fquANyY5DLgT+CWAqvp4kjcn2Rd4GnBGVf1oxrKPBk7rg+2mJB8HfoW7/l38/1JVPwa+mGSiH/f4/va5fngPuiC/YHqhqvpGkj3677g+iO4jgCPogvuDwP2BBwHn91+luQK4sb8y2yOB9/fjAXYbqOdPgc9U1XK8tKS0Qwa3NAZJ7kcX0jfTfc59E91e892A7w3MeirwHLoLYCzkdyZ/f7C8gZ9/U1X/NMeyn6Kr7ct0e+C/AzwCeCnwc8CVVfWIwQWS3Au4bfpz/1lcRLeHv3dV3XKXHom0xHmoXNrF+j3otwJvrO5iAXsBN/Z7vMfQ7ZVOOwV4MUBVzXaVoguBZyZZ0a/3COCzc5RwO7DnTpT6EeB3+r1jkhzQn1g3Ww0vo9sT/xywHvh+VX2HLsz3TfKIfh2rkjywv/7515M8ox+fJA8ZWOe5dIfsz56+YpWkjsEt7Rq7T/87GPBR4Dxg+mSvNwPHJrkc+GXgjumFquom4CrgndtZ75nA54HLgX8DXt5fmnBH/hX4zRknp22jqs6jO/T96SRfoLs28WwheiHdYfIL+kP21wKf6NfxA+DpwN/2j+8yukPkAL8NPK8ffyXwUyfeVdX7gbcDZ/WXHZWEVweTFrUk9wS+ADys34OVtMy5xy0tUkmOpNvbfoOhLWmae9ySJDXEPW5JkhpicEuS1BCDW5KkhhjckiQ1xOCWJKkh/x9ZqOA6Fj/ykAAAAABJRU5ErkJggg==",
- "text/plain": [
- ""
- ]
- },
- "metadata": {
- "needs_background": "light"
- },
- "output_type": "display_data"
- }
- ],
- "source": [
- "%matplotlib inline\n",
- "\n",
- "df[['day_hour', 'week_day']].boxplot(\n",
- " by='week_day',\n",
- " grid=True, # just to show the difference with/without\n",
- " figsize=(8,6),\n",
- " fontsize=10\n",
- ")\n",
- "\n",
- "# give a title to the plot\n",
- "plt.title('')\n",
- "\n",
- "# give a label to the axes\n",
- "plt.xlabel(\"Day of the week\")\n",
- "plt.show()"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3 (ipykernel)",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.10.4"
- },
- "toc": {
- "base_numbering": 1,
- "nav_menu": {},
- "number_sections": false,
- "sideBar": true,
- "skip_h1_title": false,
- "title_cell": "Table of Contents",
- "title_sidebar": "Contents",
- "toc_cell": false,
- "toc_position": {
- "height": "calc(100% - 180px)",
- "left": "10px",
- "top": "150px",
- "width": "384px"
- },
- "toc_section_display": true,
- "toc_window_display": true
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/notebooks/3.1 Exploratory data analysis basics.ipynb b/notebooks/3.1 Exploratory data analysis basics.ipynb
index 8089ec7..f44c8e7 100644
--- a/notebooks/3.1 Exploratory data analysis basics.ipynb
+++ b/notebooks/3.1 Exploratory data analysis basics.ipynb
@@ -11,7 +11,7 @@
},
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
@@ -27,13 +27,202 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_pickle(\"./musk_tweets_enhanced.pkl\")"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "Int64Index: 2819 entries, 849636868052275200 to 15434727182\n",
+ "Data columns (total 8 columns):\n",
+ " # Column Non-Null Count Dtype \n",
+ "--- ------ -------------- ----- \n",
+ " 0 created_at 2819 non-null datetime64[ns]\n",
+ " 1 text 2819 non-null object \n",
+ " 2 tweet_link 2819 non-null object \n",
+ " 3 tweet_mentions 2819 non-null object \n",
+ " 4 n_mentions 2819 non-null int64 \n",
+ " 5 week_day_name 2819 non-null object \n",
+ " 6 week_day 2819 non-null int64 \n",
+ " 7 day_hour 2819 non-null int64 \n",
+ "dtypes: datetime64[ns](1), int64(3), object(4)\n",
+ "memory usage: 198.2+ KB\n"
+ ]
+ }
+ ],
+ "source": [
+ "df.info()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " created_at \n",
+ " text \n",
+ " tweet_link \n",
+ " tweet_mentions \n",
+ " n_mentions \n",
+ " week_day_name \n",
+ " week_day \n",
+ " day_hour \n",
+ " \n",
+ " \n",
+ " id \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 849636868052275200 \n",
+ " 2017-04-05 14:56:29 \n",
+ " b'And so the robots spared humanity ... https:... \n",
+ " https://twitter.com/i/web/status/8496368680522... \n",
+ " [] \n",
+ " 0 \n",
+ " Wednesday \n",
+ " 2 \n",
+ " 14 \n",
+ " \n",
+ " \n",
+ " 848988730585096192 \n",
+ " 2017-04-03 20:01:01 \n",
+ " b\"@ForIn2020 @waltmossberg @mims @defcon_5 Exa... \n",
+ " https://twitter.com/i/web/status/8489887305850... \n",
+ " [@ForIn2020, @waltmossberg, @mims, @defcon_5] \n",
+ " 4 \n",
+ " Monday \n",
+ " 0 \n",
+ " 20 \n",
+ " \n",
+ " \n",
+ " 848943072423497728 \n",
+ " 2017-04-03 16:59:35 \n",
+ " b'@waltmossberg @mims @defcon_5 Et tu, Walt?' \n",
+ " https://twitter.com/i/web/status/8489430724234... \n",
+ " [@waltmossberg, @mims, @defcon_5] \n",
+ " 3 \n",
+ " Monday \n",
+ " 0 \n",
+ " 16 \n",
+ " \n",
+ " \n",
+ " 848935705057280001 \n",
+ " 2017-04-03 16:30:19 \n",
+ " b'Stormy weather in Shortville ...' \n",
+ " https://twitter.com/i/web/status/8489357050572... \n",
+ " [] \n",
+ " 0 \n",
+ " Monday \n",
+ " 0 \n",
+ " 16 \n",
+ " \n",
+ " \n",
+ " 848416049573658624 \n",
+ " 2017-04-02 06:05:23 \n",
+ " b\"@DaveLeeBBC @verge Coal is dying due to nat ... \n",
+ " https://twitter.com/i/web/status/8484160495736... \n",
+ " [@DaveLeeBBC, @verge] \n",
+ " 2 \n",
+ " Sunday \n",
+ " 6 \n",
+ " 6 \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " created_at \\\n",
+ "id \n",
+ "849636868052275200 2017-04-05 14:56:29 \n",
+ "848988730585096192 2017-04-03 20:01:01 \n",
+ "848943072423497728 2017-04-03 16:59:35 \n",
+ "848935705057280001 2017-04-03 16:30:19 \n",
+ "848416049573658624 2017-04-02 06:05:23 \n",
+ "\n",
+ " text \\\n",
+ "id \n",
+ "849636868052275200 b'And so the robots spared humanity ... https:... \n",
+ "848988730585096192 b\"@ForIn2020 @waltmossberg @mims @defcon_5 Exa... \n",
+ "848943072423497728 b'@waltmossberg @mims @defcon_5 Et tu, Walt?' \n",
+ "848935705057280001 b'Stormy weather in Shortville ...' \n",
+ "848416049573658624 b\"@DaveLeeBBC @verge Coal is dying due to nat ... \n",
+ "\n",
+ " tweet_link \\\n",
+ "id \n",
+ "849636868052275200 https://twitter.com/i/web/status/8496368680522... \n",
+ "848988730585096192 https://twitter.com/i/web/status/8489887305850... \n",
+ "848943072423497728 https://twitter.com/i/web/status/8489430724234... \n",
+ "848935705057280001 https://twitter.com/i/web/status/8489357050572... \n",
+ "848416049573658624 https://twitter.com/i/web/status/8484160495736... \n",
+ "\n",
+ " tweet_mentions n_mentions \\\n",
+ "id \n",
+ "849636868052275200 [] 0 \n",
+ "848988730585096192 [@ForIn2020, @waltmossberg, @mims, @defcon_5] 4 \n",
+ "848943072423497728 [@waltmossberg, @mims, @defcon_5] 3 \n",
+ "848935705057280001 [] 0 \n",
+ "848416049573658624 [@DaveLeeBBC, @verge] 2 \n",
+ "\n",
+ " week_day_name week_day day_hour \n",
+ "id \n",
+ "849636868052275200 Wednesday 2 14 \n",
+ "848988730585096192 Monday 0 20 \n",
+ "848943072423497728 Monday 0 16 \n",
+ "848935705057280001 Monday 0 16 \n",
+ "848416049573658624 Sunday 6 6 "
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.head()"
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {},
@@ -43,7 +232,7 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 11,
"metadata": {},
"outputs": [
{
@@ -137,7 +326,7 @@
"max 6.000000 6.000000 23.000000"
]
},
- "execution_count": 3,
+ "execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
@@ -152,7 +341,7 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": 9,
"metadata": {},
"outputs": [
{
@@ -366,7 +555,7 @@
"std 0.859091 NaN 1.946637 7.611198 "
]
},
- "execution_count": 6,
+ "execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
@@ -380,7 +569,7 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": 10,
"metadata": {},
"outputs": [
{
@@ -483,7 +672,7 @@
"std NaN 0.859091 1.946637 7.611198"
]
},
- "execution_count": 8,
+ "execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
@@ -496,7 +685,7 @@
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": 11,
"metadata": {},
"outputs": [
{
@@ -512,7 +701,7 @@
"Name: created_at, dtype: object"
]
},
- "execution_count": 10,
+ "execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
@@ -523,7 +712,7 @@
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
@@ -702,7 +891,7 @@
},
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
@@ -722,7 +911,7 @@
},
{
"cell_type": "code",
- "execution_count": 15,
+ "execution_count": 14,
"metadata": {},
"outputs": [
{
@@ -798,7 +987,7 @@
"6 2"
]
},
- "execution_count": 15,
+ "execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
@@ -807,6 +996,33 @@
"df.groupby(['n_mentions'])[['text']].count()"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "1 1231\n",
+ "0 1145\n",
+ "2 329\n",
+ "3 78\n",
+ "4 28\n",
+ "5 6\n",
+ "6 2\n",
+ "Name: n_mentions, dtype: int64"
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df['n_mentions'].value_counts()"
+ ]
+ },
{
"cell_type": "code",
"execution_count": 16,
@@ -837,7 +1053,7 @@
},
{
"cell_type": "code",
- "execution_count": 17,
+ "execution_count": 26,
"metadata": {},
"outputs": [
{
@@ -865,7 +1081,7 @@
},
{
"cell_type": "code",
- "execution_count": 18,
+ "execution_count": 27,
"metadata": {},
"outputs": [],
"source": [
@@ -874,7 +1090,7 @@
},
{
"cell_type": "code",
- "execution_count": 19,
+ "execution_count": 28,
"metadata": {},
"outputs": [
{
@@ -928,7 +1144,7 @@
},
{
"cell_type": "code",
- "execution_count": 20,
+ "execution_count": 29,
"metadata": {},
"outputs": [],
"source": [
@@ -937,7 +1153,7 @@
},
{
"cell_type": "code",
- "execution_count": 21,
+ "execution_count": 30,
"metadata": {},
"outputs": [
{
@@ -968,7 +1184,7 @@
},
{
"cell_type": "code",
- "execution_count": 22,
+ "execution_count": 31,
"metadata": {},
"outputs": [
{
@@ -1088,7 +1304,7 @@
"848943072423497728 Monday 0 16 2017 "
]
},
- "execution_count": 22,
+ "execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
@@ -1099,7 +1315,7 @@
},
{
"cell_type": "code",
- "execution_count": 23,
+ "execution_count": 19,
"metadata": {},
"outputs": [
{
@@ -1143,7 +1359,7 @@
},
{
"cell_type": "code",
- "execution_count": 24,
+ "execution_count": 33,
"metadata": {},
"outputs": [],
"source": [
@@ -1152,7 +1368,7 @@
},
{
"cell_type": "code",
- "execution_count": 25,
+ "execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
@@ -1161,7 +1377,7 @@
},
{
"cell_type": "code",
- "execution_count": 26,
+ "execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
@@ -1178,7 +1394,7 @@
},
{
"cell_type": "code",
- "execution_count": 27,
+ "execution_count": 22,
"metadata": {},
"outputs": [
{
@@ -1227,7 +1443,7 @@
},
{
"cell_type": "code",
- "execution_count": 28,
+ "execution_count": 24,
"metadata": {},
"outputs": [
{
@@ -1303,7 +1519,7 @@
"6 422"
]
},
- "execution_count": 28,
+ "execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
@@ -1314,7 +1530,7 @@
},
{
"cell_type": "code",
- "execution_count": 29,
+ "execution_count": 25,
"metadata": {},
"outputs": [
{
@@ -1390,7 +1606,7 @@
"max 530.000000"
]
},
- "execution_count": 29,
+ "execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
@@ -1401,7 +1617,7 @@
},
{
"cell_type": "code",
- "execution_count": 30,
+ "execution_count": 26,
"metadata": {},
"outputs": [
{
@@ -1410,7 +1626,7 @@
""
]
},
- "execution_count": 30,
+ "execution_count": 26,
"metadata": {},
"output_type": "execute_result"
},
@@ -1433,147 +1649,7 @@
},
{
"cell_type": "code",
- "execution_count": 31,
- "metadata": {},
- "outputs": [],
- "source": [
- "plt.bar?"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 32,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " created_at \n",
- " text \n",
- " tweet_link \n",
- " tweet_mentions \n",
- " n_mentions \n",
- " week_day_name \n",
- " week_day \n",
- " day_hour \n",
- " year \n",
- " \n",
- " \n",
- " id \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " 849636868052275200 \n",
- " 2017-04-05 14:56:29 \n",
- " b'And so the robots spared humanity ... https:... \n",
- " https://twitter.com/i/web/status/8496368680522... \n",
- " [] \n",
- " 0 \n",
- " Wednesday \n",
- " 2 \n",
- " 14 \n",
- " 2017 \n",
- " \n",
- " \n",
- " 848988730585096192 \n",
- " 2017-04-03 20:01:01 \n",
- " b\"@ForIn2020 @waltmossberg @mims @defcon_5 Exa... \n",
- " https://twitter.com/i/web/status/8489887305850... \n",
- " [@ForIn2020, @waltmossberg, @mims, @defcon_5] \n",
- " 4 \n",
- " Monday \n",
- " 0 \n",
- " 20 \n",
- " 2017 \n",
- " \n",
- " \n",
- " 848943072423497728 \n",
- " 2017-04-03 16:59:35 \n",
- " b'@waltmossberg @mims @defcon_5 Et tu, Walt?' \n",
- " https://twitter.com/i/web/status/8489430724234... \n",
- " [@waltmossberg, @mims, @defcon_5] \n",
- " 3 \n",
- " Monday \n",
- " 0 \n",
- " 16 \n",
- " 2017 \n",
- " \n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " created_at \\\n",
- "id \n",
- "849636868052275200 2017-04-05 14:56:29 \n",
- "848988730585096192 2017-04-03 20:01:01 \n",
- "848943072423497728 2017-04-03 16:59:35 \n",
- "\n",
- " text \\\n",
- "id \n",
- "849636868052275200 b'And so the robots spared humanity ... https:... \n",
- "848988730585096192 b\"@ForIn2020 @waltmossberg @mims @defcon_5 Exa... \n",
- "848943072423497728 b'@waltmossberg @mims @defcon_5 Et tu, Walt?' \n",
- "\n",
- " tweet_link \\\n",
- "id \n",
- "849636868052275200 https://twitter.com/i/web/status/8496368680522... \n",
- "848988730585096192 https://twitter.com/i/web/status/8489887305850... \n",
- "848943072423497728 https://twitter.com/i/web/status/8489430724234... \n",
- "\n",
- " tweet_mentions n_mentions \\\n",
- "id \n",
- "849636868052275200 [] 0 \n",
- "848988730585096192 [@ForIn2020, @waltmossberg, @mims, @defcon_5] 4 \n",
- "848943072423497728 [@waltmossberg, @mims, @defcon_5] 3 \n",
- "\n",
- " week_day_name week_day day_hour year \n",
- "id \n",
- "849636868052275200 Wednesday 2 14 2017 \n",
- "848988730585096192 Monday 0 20 2017 \n",
- "848943072423497728 Monday 0 16 2017 "
- ]
- },
- "execution_count": 32,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df.head(3)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 33,
+ "execution_count": 40,
"metadata": {},
"outputs": [
{
@@ -1649,7 +1725,7 @@
"max 23.000000"
]
},
- "execution_count": 33,
+ "execution_count": 40,
"metadata": {},
"output_type": "execute_result"
}
@@ -1660,7 +1736,7 @@
},
{
"cell_type": "code",
- "execution_count": 34,
+ "execution_count": 41,
"metadata": {},
"outputs": [
{
@@ -1670,7 +1746,7 @@
"Name: 0.25, dtype: float64"
]
},
- "execution_count": 34,
+ "execution_count": 41,
"metadata": {},
"output_type": "execute_result"
}
@@ -1681,7 +1757,7 @@
},
{
"cell_type": "code",
- "execution_count": 35,
+ "execution_count": 42,
"metadata": {},
"outputs": [],
"source": [
@@ -1690,7 +1766,7 @@
},
{
"cell_type": "code",
- "execution_count": 36,
+ "execution_count": 43,
"metadata": {},
"outputs": [
{
@@ -1726,7 +1802,7 @@
},
{
"cell_type": "code",
- "execution_count": 37,
+ "execution_count": 44,
"metadata": {},
"outputs": [
{
diff --git a/notebooks/3.2 Exploratory data analysis II and working with texts.ipynb b/notebooks/3.2 Exploratory data analysis II and working with texts.ipynb
deleted file mode 100644
index ca8a132..0000000
--- a/notebooks/3.2 Exploratory data analysis II and working with texts.ipynb
+++ /dev/null
@@ -1,2423 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# 3.2 Exploratory data analysis and working with texts\n",
- "\n",
- "In this notebook, we learn about:\n",
- "1. descriptive statistics to explore data;\n",
- "2. working with texts (hints)."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Part 1: descriptive statistics\n",
- "\n",
- "*The goal of exploratory data analysis is to develop an understanding of your data. EDA is fundamentally a creative process. And like most creative processes, the key to asking quality questions is to generate a large quantity of questions.* \n",
- "\n",
- "Key questions:\n",
- "* Which kind of variation occurs within variables?\n",
- "* Which kind of co-variation occurs between variables?\n",
- "\n",
- "https://r4ds.had.co.nz/exploratory-data-analysis.html"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {},
- "outputs": [],
- "source": [
- "# imports\n",
- "\n",
- "import os, codecs\n",
- "import pandas as pd\n",
- "import numpy as np\n",
- "import seaborn as sns\n",
- "import matplotlib.pyplot as plt"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Import the dataset\n",
- "Let us import the Venetian apprenticeship contracts dataset in memory."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {},
- "outputs": [],
- "source": [
- "root_folder = \"../data/apprenticeship_venice/\"\n",
- "df_contracts = pd.read_csv(codecs.open(os.path.join(root_folder,\"professions_data.csv\"), encoding=\"utf8\"), sep=\";\")\n",
- "df_professions = pd.read_csv(codecs.open(os.path.join(root_folder,\"professions_classification.csv\"), encoding=\"utf8\"), sep=\",\")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Let's take another look to the dataset."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\n",
- "RangeIndex: 9653 entries, 0 to 9652\n",
- "Data columns (total 47 columns):\n",
- " # Column Non-Null Count Dtype \n",
- "--- ------ -------------- ----- \n",
- " 0 page_title 9653 non-null object \n",
- " 1 register 9653 non-null object \n",
- " 2 annual_salary 7870 non-null float64\n",
- " 3 a_profession 9653 non-null object \n",
- " 4 profession_code_strict 9618 non-null object \n",
- " 5 profession_code_gen 9614 non-null object \n",
- " 6 profession_cat 9597 non-null object \n",
- " 7 corporation 9350 non-null object \n",
- " 8 keep_profession_a 9653 non-null int64 \n",
- " 9 complete_profession_a 9653 non-null int64 \n",
- " 10 enrolmentY 9628 non-null float64\n",
- " 11 enrolmentM 9631 non-null float64\n",
- " 12 startY 9533 non-null float64\n",
- " 13 startM 9539 non-null float64\n",
- " 14 length 9645 non-null float64\n",
- " 15 has_fled 9653 non-null int64 \n",
- " 16 m_profession 9535 non-null object \n",
- " 17 m_profession_code_strict 9508 non-null object \n",
- " 18 m_profession_code_gen 9506 non-null object \n",
- " 19 m_profession_cat 9489 non-null object \n",
- " 20 m_corporation 9276 non-null object \n",
- " 21 keep_profession_m 9653 non-null int64 \n",
- " 22 complete_profession_m 9653 non-null int64 \n",
- " 23 m_gender 9554 non-null float64\n",
- " 24 m_name 9623 non-null object \n",
- " 25 m_surname 6960 non-null object \n",
- " 26 m_patronimic 2620 non-null object \n",
- " 27 m_atelier 1434 non-null object \n",
- " 28 m_coords 9639 non-null object \n",
- " 29 a_name 9653 non-null object \n",
- " 30 a_age 9303 non-null float64\n",
- " 31 a_gender 9522 non-null float64\n",
- " 32 a_geo_origins 7149 non-null object \n",
- " 33 a_geo_origins_std 4636 non-null object \n",
- " 34 a_coords 9610 non-null object \n",
- " 35 a_quondam 7848 non-null float64\n",
- " 36 accommodation_master 9653 non-null int64 \n",
- " 37 personal_care_master 9653 non-null int64 \n",
- " 38 clothes_master 9653 non-null int64 \n",
- " 39 generic_expenses_master 9653 non-null int64 \n",
- " 40 salary_in_kind_master 9653 non-null int64 \n",
- " 41 pledge_goods_master 9653 non-null int64 \n",
- " 42 pledge_money_master 9653 non-null int64 \n",
- " 43 salary_master 9653 non-null int64 \n",
- " 44 female_guarantor 9653 non-null int64 \n",
- " 45 period_cat 7891 non-null float64\n",
- " 46 incremental_salary 9653 non-null int64 \n",
- "dtypes: float64(11), int64(15), object(21)\n",
- "memory usage: 3.5+ MB\n"
- ]
- }
- ],
- "source": [
- "df_contracts.info()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " page_title \n",
- " register \n",
- " annual_salary \n",
- " a_profession \n",
- " profession_code_strict \n",
- " profession_code_gen \n",
- " profession_cat \n",
- " corporation \n",
- " keep_profession_a \n",
- " complete_profession_a \n",
- " ... \n",
- " personal_care_master \n",
- " clothes_master \n",
- " generic_expenses_master \n",
- " salary_in_kind_master \n",
- " pledge_goods_master \n",
- " pledge_money_master \n",
- " salary_master \n",
- " female_guarantor \n",
- " period_cat \n",
- " incremental_salary \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " 0 \n",
- " Carlo Della sosta (Orese) 1592-08-03 \n",
- " asv, giustizia vecchia, accordi dei garzoni, 1... \n",
- " NaN \n",
- " orese \n",
- " orese \n",
- " orefice \n",
- " orefice \n",
- " Oresi \n",
- " 1 \n",
- " 1 \n",
- " ... \n",
- " 1 \n",
- " 1 \n",
- " 1 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " NaN \n",
- " 0 \n",
- " \n",
- " \n",
- " 1 \n",
- " Antonio quondam Andrea (squerariol) 1583-01-09 \n",
- " asv, giustizia vecchia, accordi dei garzoni, 1... \n",
- " 12.5 \n",
- " squerariol \n",
- " squerariol \n",
- " lavori allo squero \n",
- " lavori allo squero \n",
- " Squerarioli \n",
- " 1 \n",
- " 1 \n",
- " ... \n",
- " 0 \n",
- " 0 \n",
- " 1 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 1 \n",
- " 0 \n",
- " 1.0 \n",
- " 0 \n",
- " \n",
- " \n",
- " 2 \n",
- " Cristofollo di Zuane (batioro in carta) 1591-0... \n",
- " asv, giustizia vecchia, accordi dei garzoni, 1... \n",
- " NaN \n",
- " batioro \n",
- " batioro \n",
- " battioro \n",
- " fabbricatore di foglie/fili/cordelle d'oro o a... \n",
- " Battioro \n",
- " 1 \n",
- " 1 \n",
- " ... \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " 0 \n",
- " NaN \n",
- " 0 \n",
- " \n",
- " \n",
- "
\n",
- "
3 rows Ć 47 columns
\n",
- "
"
- ],
- "text/plain": [
- " page_title \\\n",
- "0 Carlo Della sosta (Orese) 1592-08-03 \n",
- "1 Antonio quondam Andrea (squerariol) 1583-01-09 \n",
- "2 Cristofollo di Zuane (batioro in carta) 1591-0... \n",
- "\n",
- " register annual_salary \\\n",
- "0 asv, giustizia vecchia, accordi dei garzoni, 1... NaN \n",
- "1 asv, giustizia vecchia, accordi dei garzoni, 1... 12.5 \n",
- "2 asv, giustizia vecchia, accordi dei garzoni, 1... NaN \n",
- "\n",
- " a_profession profession_code_strict profession_code_gen \\\n",
- "0 orese orese orefice \n",
- "1 squerariol squerariol lavori allo squero \n",
- "2 batioro batioro battioro \n",
- "\n",
- " profession_cat corporation \\\n",
- "0 orefice Oresi \n",
- "1 lavori allo squero Squerarioli \n",
- "2 fabbricatore di foglie/fili/cordelle d'oro o a... Battioro \n",
- "\n",
- " keep_profession_a complete_profession_a ... personal_care_master \\\n",
- "0 1 1 ... 1 \n",
- "1 1 1 ... 0 \n",
- "2 1 1 ... 0 \n",
- "\n",
- " clothes_master generic_expenses_master salary_in_kind_master \\\n",
- "0 1 1 0 \n",
- "1 0 1 0 \n",
- "2 0 0 0 \n",
- "\n",
- " pledge_goods_master pledge_money_master salary_master female_guarantor \\\n",
- "0 0 0 0 0 \n",
- "1 0 0 1 0 \n",
- "2 0 0 0 0 \n",
- "\n",
- " period_cat incremental_salary \n",
- "0 NaN 0 \n",
- "1 1.0 0 \n",
- "2 NaN 0 \n",
- "\n",
- "[3 rows x 47 columns]"
- ]
- },
- "execution_count": 4,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df_contracts.head(3)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "Index(['page_title', 'register', 'annual_salary', 'a_profession',\n",
- " 'profession_code_strict', 'profession_code_gen', 'profession_cat',\n",
- " 'corporation', 'keep_profession_a', 'complete_profession_a',\n",
- " 'enrolmentY', 'enrolmentM', 'startY', 'startM', 'length', 'has_fled',\n",
- " 'm_profession', 'm_profession_code_strict', 'm_profession_code_gen',\n",
- " 'm_profession_cat', 'm_corporation', 'keep_profession_m',\n",
- " 'complete_profession_m', 'm_gender', 'm_name', 'm_surname',\n",
- " 'm_patronimic', 'm_atelier', 'm_coords', 'a_name', 'a_age', 'a_gender',\n",
- " 'a_geo_origins', 'a_geo_origins_std', 'a_coords', 'a_quondam',\n",
- " 'accommodation_master', 'personal_care_master', 'clothes_master',\n",
- " 'generic_expenses_master', 'salary_in_kind_master',\n",
- " 'pledge_goods_master', 'pledge_money_master', 'salary_master',\n",
- " 'female_guarantor', 'period_cat', 'incremental_salary'],\n",
- " dtype='object')"
- ]
- },
- "execution_count": 5,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df_contracts.columns"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Every row represents an apprenticeship contract. Contracts were registered both at the guild's and at a public office. This is a sample of contracts from a much larger set of records.\n",
- "\n",
- "Some of the variables we will work with are:\n",
- "* `annual_salary`: the annual salary paid to the apprencice, if any (in Venetian ducats).\n",
- "* `a_profession` to `corporation`: increasingly generic classifications for the apprentice's stated profession.\n",
- "* `startY` and `enrolmentY`: contract start and registration year respectively.\n",
- "* `length`: of the contract, in years.\n",
- "* `m_gender` and `a_gender`: of master and apprentice respectively.\n",
- "* `a_age`: age of the apprentice at entry, in years.\n",
- "* `female_guarantor`: if at least one of the contract's guarantors was female, boolean."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 6,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " Trascrizione \n",
- " Standard \n",
- " Gruppo 0 \n",
- " Gruppo 1 \n",
- " Gruppo 2 \n",
- " Gruppo 3 \n",
- " Gruppo 4 \n",
- " Corporazione \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " 0 \n",
- " al negotio del libraro \n",
- " librer \n",
- " libraio \n",
- " librai - diverse specializzazioni \n",
- " stampa \n",
- " altre lavorazioni manifatturiere \n",
- " beni \n",
- " libreri, stampatori e ligadori \n",
- " \n",
- " \n",
- " 1 \n",
- " arte de far arpicordi \n",
- " arte de far arpicordi \n",
- " fabbricatore di arpicordi \n",
- " fabbricatore di strumenti musicali \n",
- " musica \n",
- " altri servizi \n",
- " servizi \n",
- " NaN \n",
- " \n",
- " \n",
- " 2 \n",
- " arte de' colori \n",
- " arte dei colori \n",
- " fabbricazione/vendita di colori \n",
- " colori \n",
- " colori \n",
- " decorazioni e mestieri dell'arte \n",
- " beni \n",
- " spezieri \n",
- " \n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " Trascrizione Standard \\\n",
- "0 al negotio del libraro librer \n",
- "1 arte de far arpicordi arte de far arpicordi \n",
- "2 arte de' colori arte dei colori \n",
- "\n",
- " Gruppo 0 Gruppo 1 \\\n",
- "0 libraio librai - diverse specializzazioni \n",
- "1 fabbricatore di arpicordi fabbricatore di strumenti musicali \n",
- "2 fabbricazione/vendita di colori colori \n",
- "\n",
- " Gruppo 2 Gruppo 3 Gruppo 4 \\\n",
- "0 stampa altre lavorazioni manifatturiere beni \n",
- "1 musica altri servizi servizi \n",
- "2 colori decorazioni e mestieri dell'arte beni \n",
- "\n",
- " Corporazione \n",
- "0 libreri, stampatori e ligadori \n",
- "1 NaN \n",
- "2 spezieri "
- ]
- },
- "execution_count": 6,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df_professions.head(3)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "The professions data frame contains a classification system for each profession as found in the records (transcription, first column). The last column is the guild (or corporation) which governed the given profession. This work was performed manually by historians. We don't use it here as the classifications we need are already part of the main dataframe."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Questions\n",
- "\n",
- "* Plot the distribution (histogram) of the apprentices' age, contract length, annual salary and start year.\n",
- "* Calculate the proportion of female apprentices and masters, and of contracts with a female guarantor.\n",
- "* How likely it is for a female apprentice to have a female master? And for a male apprentice?"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 7,
- "metadata": {},
- "outputs": [],
- "source": [
- "salaries_male_guarantor = df_contracts[\n",
- " df_contracts.female_guarantor == 0\n",
- "].annual_salary"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 8,
- "metadata": {},
- "outputs": [],
- "source": [
- "salaries_female_guarantor = df_contracts[\n",
- " df_contracts.female_guarantor == 1\n",
- "].annual_salary"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "salaries_male_guarantor.hist()\n",
- "salaries_female_guarantor.hist()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 11,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- ""
- ]
- },
- "execution_count": 11,
- "metadata": {},
- "output_type": "execute_result"
- },
- {
- "data": {
- "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAD4CAYAAAAAczaOAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAAATKUlEQVR4nO3df6zd9X3f8edrkKQSzjCMzGKG1WRyK9GgUbgCpKbVtbKCIVudbFMEQsTOD7mTQErUTIvTqAM1jeRuSapFzeicYoWsaW6ZkigWkFHXwkP5gwbMHIwhFJc4G1eurQZq4iTKRvbeH+dr63Bzr+/18bnnnOvP8yEdne/5fH+c1/ne69c553u+9zhVhSSpDX9v3AEkSaNj6UtSQyx9SWqIpS9JDbH0Jakh5487wOlccskltW7duoHW/eEPf8gFF1ww3EDLZKVkNefwrZSs5hy+5cy6b9++v62qt8w7s6om9nLttdfWoB599NGB1x21lZLVnMO3UrKac/iWMyvwZC3Qqx7ekaSGWPqS1BBLX5IaYulLUkMsfUlqiKUvSQ2x9CWpIZa+JDXE0pekhkz01zAM07ptD52aPrz9nWNMIknj4yt9SWqIpS9JDbH0Jakhlr4kNcTSl6SGWPqS1BBLX5IaYulLUkMsfUlqiKUvSQ2x9CWpIZa+JDXE0pekhlj6ktQQS1+SGmLpS1JDLH1JaoilL0kNsfQlqSGLln6Sy5M8muTZJAeTfKgbvyfJbJL93eWWvnU+luRQkueT3NQ3vrEbO5Rk2/I8JEnSQpbyH6O/Bnykqp5K8mZgX5Ld3bw/qKpP9S+c5ErgVuCXgH8E/EWSX+hmfw74deAl4Ikku6rq2WE8EEnS4hYt/ao6Ahzppn+Q5Dlg7WlW2QTMVNVPgO8mOQRc1807VFUvAiSZ6Za19CVpRFJVS184WQc8BrwN+C1gC/Aq8CS9dwOvJPlD4PGq+pNunfuAb3Sb2FhVH+zG7wCur6q75tzHVmArwJo1a66dmZkZ6IGdOHGCVatWnbp9YPb4qemr1l440DaXy9ysk8qcw7dSsppz+JYz64YNG/ZV1dR885ZyeAeAJKuArwAfrqpXk9wLfAKo7vrTwPvPNmxV7QB2AExNTdX09PRA29m7dy/9627Z9tCp6cO3D7bN5TI366Qy5/CtlKzmHL5xZV1S6Sd5A73C/1JVfRWgqo72zf888GB3cxa4vG/1y7oxTjMuSRqBpZy9E+A+4Lmq+kzf+KV9i70beKab3gXcmuRNSa4A1gPfAp4A1ie5Iskb6X3Yu2s4D0OStBRLeaX/K8AdwIEk+7ux3wZuS3I1vcM7h4HfBKiqg0keoPcB7WvAnVX1U4AkdwGPAOcBO6vq4NAeiSRpUUs5e+ebQOaZ9fBp1vkk8Ml5xh8+3XqSpOXlX+RKUkMsfUlqiKUvSQ2x9CWpIZa+JDXE0pekhlj6ktQQS1+SGmLpS1JDLH1JaoilL0kNsfQlqSGWviQ1xNKXpIZY+pLUEEtfkhpi6UtSQyx9SWqIpS9JDbH0Jakhi/7H6OeiddseOjV9ePs7x5hEkkbLV/qS1BBLX5IaYulLUkMsfUlqiKUvSQ2x9CWpIZa+JDVk0dJPcnmSR5M8m+Rgkg914xcn2Z3khe76om48ST6b5FCSp5Nc07etzd3yLyTZvHwPS5I0n6W80n8N+EhVXQncANyZ5EpgG7CnqtYDe7rbADcD67vLVuBe6D1JAHcD1wPXAXeffKKQJI3GoqVfVUeq6qlu+gfAc8BaYBNwf7fY/cC7uulNwBer53FgdZJLgZuA3VX1clW9AuwGNg7zwUiSTi9VtfSFk3XAY8DbgP9VVau78QCvVNXqJA8C26vqm928PcBHgWng56rq97rx3wF+XFWfmnMfW+m9Q2DNmjXXzszMDPTATpw4wapVq07dPjB7fN7lrlp74UDbH6a5WSeVOYdvpWQ15/AtZ9YNGzbsq6qp+eYt+bt3kqwCvgJ8uKpe7fV8T1VVkqU/e5xGVe0AdgBMTU3V9PT0QNvZu3cv/etu6fu+nX6Hbx9s+8M0N+ukMufwrZSs5hy+cWVd0tk7Sd5Ar/C/VFVf7YaPdodt6K6PdeOzwOV9q1/WjS00LkkakaWcvRPgPuC5qvpM36xdwMkzcDYDX+8bf293Fs8NwPGqOgI8AtyY5KLuA9wbuzFJ0ogs5fDOrwB3AAeS7O/GfhvYDjyQ5APA94D3dPMeBm4BDgE/At4HUFUvJ/kE8ES33O9W1cvDeBCSpKVZtPS7D2SzwOx3zLN8AXcusK2dwM4zCShJGh7/IleSGmLpS1JDLH1JaoilL0kNsfQlqSGWviQ1xNKXpIZY+pLUEEtfkhpi6UtSQyx9SWqIpS9JDbH0Jakhlr4kNcTSl6SGWPqS1BBLX5IaYulLUkMsfUlqiKUvSQ2x9CWpIZa+JDXE0pekhlj6ktQQS1+SGmLpS1JDLH1JaoilL0kNWbT0k+xMcizJM31j9ySZTbK/u9zSN+9jSQ4leT7JTX3jG7uxQ0m2Df+hSJIWs5RX+l8ANs4z/gdVdXV3eRggyZXArcAvdev85yTnJTkP+BxwM3AlcFu3rCRphM5fbIGqeizJuiVubxMwU1U/Ab6b5BBwXTfvUFW9CJBkplv22TOPLEkaVKpq8YV6pf9gVb2tu30PsAV4FXgS+EhVvZLkD4HHq+pPuuXuA77RbWZjVX2wG78DuL6q7prnvrYCWwHWrFlz7czMzEAP7MSJE6xaterU7QOzx+dd7qq1Fw60/WGam3VSmXP4VkpWcw7fcmbdsGHDvqqamm/eoq/0F3Av8AmguutPA+8fcFuvU1U7gB0AU1NTNT09PdB29u7dS/+6W7Y9NO9yh28fbPvDNDfrpDLn8K2UrOYcvnFlHaj0q+royekknwce7G7OApf3LXpZN8ZpxiVJIzLQKZtJLu27+W7g5Jk9u4Bbk7wpyRXAeuBbwBPA+iRXJHkjvQ97dw0eW5I0iEVf6Sf5MjANXJLkJeBuYDrJ1fQO7xwGfhOgqg4meYDeB7SvAXdW1U+77dwFPAKcB+ysqoPDfjCSpNNbytk7t80zfN9plv8k8Ml5xh8GHj6jdJKkofIvciWpIZa+JDXE0pekhlj6ktQQS1+SGmLpS1JDLH1JaoilL0kNsfQlqSGWviQ1xNKXpIZY+pLUEEtfkhpi6UtSQyx9SWqIpS9JDbH0JakhA/3H6OeSddseOjV9ePs7x5hEkpafr/QlqSGWviQ1xNKXpIZY+pLUEEtfkhpi6UtSQyx9SWqIpS9JDbH0Jakhi5Z+kp1JjiV5pm/s4iS7k7zQXV/UjSfJZ5McSvJ0kmv61tncLf9Cks3L83AkSaezlFf6XwA2zhnbBuypqvXAnu42wM3A+u6yFbgXek8SwN3A9cB1wN0nnygkSaOzaOlX1WPAy3OGNwH3d9P3A+/qG/9i9TwOrE5yKXATsLuqXq6qV4Dd/OwTiSRpmaWqFl8oWQc8WFVv627/XVWt7qYDvFJVq5M8CGyvqm928/YAHwWmgZ+rqt/rxn8H+HFVfWqe+9pK710Ca9asuXZmZmagB3bixAlWrVp16vaB2eOLrnPV2gsHuq+zNTfrpDLn8K2UrOYcvuXMumHDhn1VNTXfvLP+ls2qqiSLP3MsfXs7gB0AU1NTNT09PdB29u7dS/+6W/q+TXMhh28f7L7O1tysk8qcw7dSsppz+MaVddCzd452h23oro9147PA5X3LXdaNLTQuSRqhQUt/F3DyDJzNwNf7xt/bncVzA3C8qo4AjwA3Jrmo+wD3xm5MkjRCix7eSfJlesfkL0nyEr2zcLYDDyT5APA94D3d4g8DtwCHgB8B7wOoqpeTfAJ4olvud6tq7ofDkqRltmjpV9VtC8x6xzzLFnDnAtvZCew8o3SSpKHyL3IlqSGWviQ1xNKXpIZY+pLUEEtfkhpi6UtSQyx9SWqIpS9JDbH0Jakhlr4kNcTSl6SGWPqS1BBLX5IaYulLUkMsfUlqiKUvSQ2x9CWpIZa+JDXE0pekhlj6ktQQS1+SGmLpS1JDLH1JaoilL0kNsfQlqSGWviQ1xNKXpIZY+pLUkLMq/SSHkxxIsj/Jk93YxUl2J3mhu76oG0+SzyY5lOTpJNcM4wFIkpZuGK/0N1TV1VU11d3eBuypqvXAnu42wM3A+u6yFbh3CPctSToDy3F4ZxNwfzd9P/CuvvEvVs/jwOokly7D/UuSFpCqGnzl5LvAK0AB/6WqdiT5u6pa3c0P8EpVrU7yILC9qr7ZzdsDfLSqnpyzza303gmwZs2aa2dmZgbKduLECVatWnXq9oHZ44uuc9XaCwe6r7M1N+ukMufwrZSs5hy+5cy6YcOGfX1HX17n/LPc9turajbJPwR2J/lO/8yqqiRn9KxSVTuAHQBTU1M1PT09ULC9e/fSv+6WbQ8tus7h2we7r7M1N+ukMufwrZSs5hy+cWU9q8M7VTXbXR8DvgZcBxw9edimuz7WLT4LXN63+mXdmCRpRAYu/SQXJHnzyWngRuAZYBewuVtsM/D1bnoX8N7uLJ4bgONVdWTg5JKkM3Y2h3fWAF/rHbbnfOBPq+q/J3kCeCDJB4DvAe/pln8YuAU4BPwIeN9Z3LckaQADl35VvQj803nGvw+8Y57xAu4c9P4kSWfPv8iVpIac7dk755R1fWf4HN7+zjEmkaTl4St9SWrIOf1Kf90Szs2XpJb4Sl+SGmLpS1JDLH1JaoilL0kNsfQlqSGWviQ1xNKXpIZY+pLUkHP6j7POhl/JIOlc5Ct9SWqIpS9JDbH0Jakhlr4kNcTSl6SGWPqS1BBP2VwCT9+UdK7wlb4kNcTSl6SGWPqS1BBLX5Ia4ge5Z8EPeCWtNJb+MvOJQdIksfTPUH+JS9JKY+mPSf+Txxc2XjDGJJJaMvLST7IR+E/AecAfV9X2UWdYDstxGMdDQ5KGbaSln+Q84HPArwMvAU8k2VVVz44yR4t8ApEEo3+lfx1wqKpeBEgyA2wCzqnSX+i4/0LjB2aPs6Wbd6aFPHeby1HoJ+/jI1e9xvQQtgM+8Ujjkqoa3Z0l/xrYWFUf7G7fAVxfVXf1LbMV2Nrd/EXg+QHv7hLgb88i7iitlKzmHL6VktWcw7ecWX++qt4y34yJ+yC3qnYAO852O0merKqpIURadislqzmHb6VkNefwjSvrqP8idxa4vO/2Zd2YJGkERl36TwDrk1yR5I3ArcCuEWeQpGaN9PBOVb2W5C7gEXqnbO6sqoPLdHdnfYhohFZKVnMO30rJas7hG0vWkX6QK0kaL79lU5IaYulLUkPOydJPsjHJ80kOJdk27jwnJbk8yaNJnk1yMMmHuvF7kswm2d9dbhl3VoAkh5Mc6DI92Y1dnGR3khe664vGnPEX+/bb/iSvJvnwJOzTJDuTHEvyTN/YvPsvPZ/tfmefTnLNBGT9j0m+0+X5WpLV3fi6JD/u27d/NOacC/6sk3ys26fPJ7lpzDn/rC/j4ST7u/HR7s+qOqcu9D4g/mvgrcAbgW8DV447V5ftUuCabvrNwF8BVwL3AP923PnmyXsYuGTO2H8AtnXT24DfH3fOOT/7vwF+fhL2KfBrwDXAM4vtP+AW4BtAgBuAv5yArDcC53fTv9+XdV3/chOQc96fdfdv69vAm4Arul44b1w558z/NPDvx7E/z8VX+qe+6qGq/g9w8qsexq6qjlTVU930D4DngLXjTXXGNgH3d9P3A+8aX5Sf8Q7gr6vqe+MOAlBVjwEvzxleaP9tAr5YPY8Dq5NcOpKgzJ+1qv68ql7rbj5O7+9qxmqBfbqQTcBMVf2kqr4LHKLXD8vudDmTBHgP8OVRZJnrXCz9tcD/7rv9EhNYrEnWAb8M/GU3dFf3NnrnuA+Z9Cngz5Ps674eA2BNVR3ppv8GWDOeaPO6ldf/Q5rEfbrQ/pv039v303snctIVSf5nkv+R5FfHFarPfD/rSd2nvwocraoX+sZGtj/PxdKfeElWAV8BPlxVrwL3Av8EuBo4Qu+t3yR4e1VdA9wM3Jnk1/pnVu+96USc89v9sd9vAP+tG5rUfXrKJO2/00nyceA14Evd0BHgH1fVLwO/Bfxpkr8/rnysgJ/1HLfx+hcnI92f52LpT/RXPSR5A73C/1JVfRWgqo5W1U+r6v8Bn2dEb0EXU1Wz3fUx4Gv0ch09edihuz42voSvczPwVFUdhcndpyy8/yby9zbJFuCfA7d3T1J0h0u+303vo3es/BfGlfE0P+uJ26dJzgf+JfBnJ8dGvT/PxdKf2K966I7l3Qc8V1Wf6RvvP3b7buCZueuOWpILkrz55DS9D/WeobcvN3eLbQa+Pp6EP+N1r54mcZ92Ftp/u4D3dmfx3AAc7zsMNBbp/YdH/w74jar6Ud/4W9L7vzFI8lZgPfDieFKe9me9C7g1yZuSXEEv57dGnW+OfwZ8p6peOjkw8v05qk+MR3mhdybEX9F7xvz4uPP05Xo7vbfzTwP7u8stwH8FDnTju4BLJyDrW+md+fBt4ODJ/Qj8A2AP8ALwF8DFE5D1AuD7wIV9Y2Pfp/SehI4A/5fe8eQPLLT/6J2187nud/YAMDUBWQ/ROyZ+8nf1j7pl/1X3O7EfeAr4F2POueDPGvh4t0+fB24eZ85u/AvAv5mz7Ej3p1/DIEkNORcP70iSFmDpS1JDLH1JaoilL0kNsfQlqSGWviQ1xNKXpIb8f1JfOTuBGZthAAAAAElFTkSuQmCC\n",
- "text/plain": [
- ""
- ]
- },
- "metadata": {
- "needs_background": "light"
- },
- "output_type": "display_data"
- }
- ],
- "source": [
- "df_contracts.annual_salary.hist(bins=100)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 12,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- ""
- ]
- },
- "execution_count": 12,
- "metadata": {},
- "output_type": "execute_result"
- },
- {
- "data": {
- "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYYAAAD4CAYAAADo30HgAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAAAVq0lEQVR4nO3df5Bd5X3f8fe3yGDCupKAdEslTYUbjTMENQnsAKlTzypKsQCPRTsOg4exJYeMxlNISa1MkOtpyKT1VG5KPHaauqNGjOWG8eIQuyj8iK3IbD3+Q8SIYsQP2yxErrUji9pgOWtIHaXf/nGfde6z7K97z713d+H9mtnZc57znHO+99yz96Pz4x5FZiJJ0rS/s9QFSJKWF4NBklQxGCRJFYNBklQxGCRJlVVLXcB8Lrzwwty4cWNH8/zgBz/gvPPO609BPWB9zVhfM9bXzEqp7+jRo9/JzB/vekGZuWx/Lr/88uzUww8/3PE8g2R9zVhfM9bXzEqpD3g0G3z2eipJklQxGCRJFYNBklQxGCRJFYNBklQxGCRJFYNBklQxGCRJFYNBklRZ1o/EeK3ZuOcBdm8+w849Dyyq//G91/W5Ikl6NY8YJEkVg0GSVDEYJEkVg0GSVDEYJEkVg0GSVDEYJEkVg0GSVDEYJEkVg0GSVFkwGCLiroh4ISKebGv7nYj4WkQ8ERGfi4g1bdM+GBETEfH1iHh7W/u20jYREXt6/kokST2xmCOGTwLbZrQdAi7NzH8MfAP4IEBEXALcCPxUmee/RMRZEXEW8PvANcAlwLtLX0nSMrNgMGTml4AXZ7R9ITPPlNEjwPoyvB0Yy8z/m5l/AUwAV5Sficx8PjN/CIyVvpKkZSYyc+FOERuB+zPz0lmm/QlwT2b+YUT8Z+BIZv5hmbYfeKh03ZaZv1La3wNcmZm3zrK8XcAugOHh4cvHxsY6ekFTU1MMDQ11NM+gHJs8zfC5cOqVxfXfvG51fwuaxXLefmB9TVlfMyulvi1bthzNzJFul9PosdsR8SHgDHB3k+W0y8x9wD6AkZGRHB0d7Wj+8fFxOp1nUHaWx27feWxxm/34TaP9LWgWy3n7gfU1ZX3NvF7q6zoYImIn8A5ga/7tYccksKGt2/rSxjztkqRlpKvbVSNiG/AbwDsz8+W2SQeBGyPinIi4GNgE/DnwFWBTRFwcEWfTukB9sFnpkqR+WPCIISI+DYwCF0bECeAOWnchnQMcighoXVd4f2Y+FRGfAZ6mdYrplsz8m7KcW4HPA2cBd2XmU314PZKkhhYMhsx89yzN++fp/2Hgw7O0Pwg82FF1kqSB85vPkqRKo7uStPxs3PNAR/2P772uT5VIWqk8YpAkVQwGSVLFYJAkVQwGSVLFYJAkVQwGSVLFYJAkVQwGSVLFYJAkVQwGSVLFYJAkVQwGSVLFYJAkVQwGSVLFYJAkVQwGSVLFYJAkVQwGSVLFYJAkVQwGSVLFYJAkVRYMhoi4KyJeiIgn29rOj4hDEfFs+b22tEdEfDwiJiLiiYi4rG2eHaX/sxGxoz8vR5LU1GKOGD4JbJvRtgc4nJmbgMNlHOAaYFP52QV8AlpBAtwBXAlcAdwxHSaSpOVlwWDIzC8BL85o3g4cKMMHgOvb2j+VLUeANRFxEfB24FBmvpiZLwGHeHXYSJKWgcjMhTtFbATuz8xLy/j3MnNNGQ7gpcxcExH3A3sz88tl2mHgdmAUeGNm/vvS/m+BVzLzP82yrl20jjYYHh6+fGxsrKMXNDU1xdDQUEfzDMqxydMMnwunXllc/83rVne1jk7MXMdy3n5gfU1ZXzMrpb4tW7YczcyRbpezqmkhmZkRsXC6LH55+4B9ACMjIzk6OtrR/OPj43Q6z6Ds3PMAuzef4c5ji9vsx28a7WodnZi5juW8/cD6mrK+Zl4v9XV7V9KpcoqI8vuF0j4JbGjrt760zdUuSVpmug2Gg8D0nUU7gPva2t9b7k66CjidmSeBzwNXR8TactH56tImSVpmFjynERGfpnWN4MKIOEHr7qK9wGci4mbgm8ANpfuDwLXABPAy8D6AzHwxIv4d8JXS77czc+YFbUnSMrBgMGTmu+eYtHWWvgncMsdy7gLu6qg6SdLA+c1nSVKl8V1JWtk2zriLaffmM/Pe2XR873X9LknSEvOIQZJUMRgkSRWDQZJU8RpDAzPPz0vSa4FHDJKkisEgSaoYDJKkisEgSaoYDJKkisEgSaoYDJKkisEgSaoYDJKkisEgSaoYDJKkisEgSaoYDJKkisEgSaoYDJKkisEgSaoYDJKkSqNgiIh/HRFPRcSTEfHpiHhjRFwcEY9ExERE3BMRZ5e+55TxiTJ9Y09egSSpp7oOhohYB/wrYCQzLwXOAm4EPgJ8NDN/AngJuLnMcjPwUmn/aOknSVpmmp5KWgWcGxGrgB8DTgK/ANxbph8Ari/D28s4ZfrWiIiG65ck9VhkZvczR9wGfBh4BfgCcBtwpBwVEBEbgIcy89KIeBLYlpknyrTngCsz8zszlrkL2AUwPDx8+djYWEc1TU1NMTQ01PVr6sSxydMdzzN8Lpx6ZXF9N69b3fHyu6mp3UL1dVNTLw3y/e2G9TVjfc1M17dly5ajmTnS7XJWdTtjRKyldRRwMfA94I+Abd0ub1pm7gP2AYyMjOTo6GhH84+Pj9PpPN3aueeBjufZvfkMdx5b3GY/ftNox8vvpqZ2C9XXTU29NMj3txvW14z1NdOr+pqcSvpF4C8y8/9k5l8DnwXeCqwpp5YA1gOTZXgS2ABQpq8Gvttg/ZKkPmgSDP8buCoifqxcK9gKPA08DLyr9NkB3FeGD5ZxyvQvZpPzWJKkvug6GDLzEVoXkR8DjpVl7QNuBz4QERPABcD+Mst+4ILS/gFgT4O6JUl90vU1BoDMvAO4Y0bz88AVs/T9K+CXmqxPS29jh9cwju+9rk+VSOoXv/ksSaoYDJKkisEgSaoYDJKkisEgSaoYDJKkisEgSaoYDJKkisEgSaoYDJKkisEgSaoYDJKkSqOH6Km/On1gnST1gkcMkqSKwSBJqhgMkqSKwSBJqhgMkqSKwSBJqhgMkqSKwSBJqhgMkqSKwSBJqjQKhohYExH3RsTXIuKZiPi5iDg/Ig5FxLPl99rSNyLi4xExERFPRMRlvXkJkqReanrE8DHgTzPzJ4GfBp4B9gCHM3MTcLiMA1wDbCo/u4BPNFy3JKkPug6GiFgNvA3YD5CZP8zM7wHbgQOl2wHg+jK8HfhUthwB1kTERd2uX5LUH5GZ3c0Y8TPAPuBpWkcLR4HbgMnMXFP6BPBSZq6JiPuBvZn55TLtMHB7Zj46Y7m7aB1RMDw8fPnY2FhHdU1NTTE0NNTVa+rUscnTHc8zfC6ceqUPxfRIr+vbvG517xbGYN/fblhfM9bXzHR9W7ZsOZqZI90up8ljt1cBlwG/mpmPRMTH+NvTRgBkZkZER8mTmftoBQ4jIyM5OjraUVHj4+N0Ok+3dnbxWOzdm89w57Hl+7TzXtd3/KbRni0LBvv+dsP6mrG+ZnpVX5NrDCeAE5n5SBm/l1ZQnJo+RVR+v1CmTwIb2uZfX9okSctI18GQmd8GvhURbylNW2mdVjoI7ChtO4D7yvBB4L3l7qSrgNOZebLb9UuS+qPpOYNfBe6OiLOB54H30Qqbz0TEzcA3gRtK3weBa4EJ4OXSV5K0zDQKhsx8HJjtAsfWWfomcEuT9UmS+s9vPkuSKgaDJKliMEiSKgaDJKliMEiSKgaDJKliMEiSKgaDJKliMEiSKgaDJKliMEiSKgaDJKliMEiSKgaDJKliMEiSKgaDJKliMEiSKgaDJKliMEiSKgaDJKliMEiSKgaDJKliMEiSKo2DISLOioj/FRH3l/GLI+KRiJiIiHsi4uzSfk4ZnyjTNzZdtySp93pxxHAb8Ezb+EeAj2bmTwAvATeX9puBl0r7R0s/SdIy0ygYImI9cB3wB2U8gF8A7i1dDgDXl+HtZZwyfWvpL0laRiIzu5854l7gPwBvAn4d2AkcKUcFRMQG4KHMvDQingS2ZeaJMu054MrM/M6MZe4CdgEMDw9fPjY21lFNU1NTDA0Ndf2aOnFs8nTH8wyfC6de6UMxPdLr+javW927hTHY97cb1teM9TUzXd+WLVuOZuZIt8tZ1e2MEfEO4IXMPBoRo90uZ6bM3AfsAxgZGcnR0c4WPT4+TqfzdGvnngc6nmf35jPceazrzd53va7v+E2jPVsWDPb97Yb1NWN9zfSqviafAG8F3hkR1wJvBP4u8DFgTUSsyswzwHpgsvSfBDYAJyJiFbAa+G6D9UuS+qDrawyZ+cHMXJ+ZG4EbgS9m5k3Aw8C7SrcdwH1l+GAZp0z/YjY5jyVJ6ot+fI/hduADETEBXADsL+37gQtK+weAPX1YtySpoZ6cTM7McWC8DD8PXDFLn78CfqkX65Mk9Y/ffJYkVQwGSVJl+d43qdeEjR3e0nt873V9qkTSYnnEIEmqGAySpIrBIEmqGAySpIrBIEmqGAySpIrBIEmqGAySpIrBIEmqGAySpIrBIEmq+KykNp0+10eSXos8YpAkVQwGSVLFYJAkVQwGSVLFYJAkVbwrSVpAJ3er7d58htH+lSINhMGgFa2bW4z970Ol+XkqSZJU6ToYImJDRDwcEU9HxFMRcVtpPz8iDkXEs+X32tIeEfHxiJiIiCci4rJevQhJUu80OWI4A+zOzEuAq4BbIuISYA9wODM3AYfLOMA1wKbyswv4RIN1S5L6pOtgyMyTmflYGf5L4BlgHbAdOFC6HQCuL8PbgU9lyxFgTURc1O36JUn90ZNrDBGxEfhZ4BFgODNPlknfBobL8DrgW22znShtkqRlJDKz2QIihoD/CXw4Mz8bEd/LzDVt01/KzLURcT+wNzO/XNoPA7dn5qMzlreL1qkmhoeHLx8bG+uonqmpKYaGhrp6LccmT3c1XyeGz4VTr/R9NV1b6vo2r1s97/SZ728379lC65ipk3UMnwt/7/zOlj9ITf4+BsH6mpmub8uWLUczc6Tb5TS6XTUi3gD8MXB3Zn62NJ+KiIsy82Q5VfRCaZ8ENrTNvr60VTJzH7APYGRkJEdHRzuqaXx8nE7nmbZzAE9X3b35DHceW753CS91fcdvGp13+sz3t5v3bKF1zNTJOnZvPsMNXe5/g9Dk72MQrK+ZXtXX5K6kAPYDz2Tm77ZNOgjsKMM7gPva2t9b7k66CjjddspJkrRMNPmn4VuB9wDHIuLx0vZvgL3AZyLiZuCbwA1l2oPAtcAE8DLwvgbrliT1SdfBUK4VxByTt87SP4Fbul2fJGkwlu/Jbr0uLfSIi92bzwzkWpD0euYjMSRJFYNBklQxGCRJFYNBklQxGCRJFYNBklQxGCRJFb/HIK0wnf53pv5XpuqURwySpIrBIEmqGAySpIrBIEmqePFZrzudXryVXm88YpAkVQwGSVLFYJAkVbzGIGng5rrOM9d/xOSX9AbLIwZJUsVgkCRVDAZJUsVrDNJrXPv5/LnO4c/U6Tn9fn83xAcHDtZrOhj8IpMkde41HQzSSuA/YLTcDDwYImIb8DHgLOAPMnPvoGuQND/DamGv5dNbAw2GiDgL+H3gnwEngK9ExMHMfHqQdUj95Ifq0uvmPVhJH9z9NugjhiuAicx8HiAixoDtgMEgaUktJkwWe/G+2+W3W8qgiswc3Moi3gVsy8xfKePvAa7MzFvb+uwCdpXRtwBf73A1FwLf6UG5/WJ9zVhfM9bXzEqp7x9m5o93u5Bld/E5M/cB+7qdPyIezcyRHpbUU9bXjPU1Y33NvF7qG/QX3CaBDW3j60ubJGmZGHQwfAXYFBEXR8TZwI3AwQHXIEmax0BPJWXmmYi4Ffg8rdtV78rMp3q8mq5PQw2I9TVjfc1YXzOvi/oGevFZkrT8+RA9SVLFYJAkVVZsMETEtoj4ekRMRMSeWaafExH3lOmPRMTGAda2ISIejoinI+KpiLhtlj6jEXE6Ih4vP785qPrK+o9HxLGy7kdnmR4R8fGy/Z6IiMsGWNtb2rbL4xHx/Yj4tRl9Brr9IuKuiHghIp5sazs/Ig5FxLPl99o55t1R+jwbETsGWN/vRMTXyvv3uYhYM8e88+4LfazvtyJisu09vHaOeef9W+9jffe01XY8Ih6fY95BbL9ZP1P6tg9m5or7oXXh+jngzcDZwFeBS2b0+ZfAfy3DNwL3DLC+i4DLyvCbgG/MUt8ocP8SbsPjwIXzTL8WeAgI4CrgkSV8r79N6ws7S7b9gLcBlwFPtrX9R2BPGd4DfGSW+c4Hni+/15bhtQOq72pgVRn+yGz1LWZf6GN9vwX8+iLe/3n/1vtV34zpdwK/uYTbb9bPlH7tgyv1iOFHj9bIzB8C04/WaLcdOFCG7wW2RkQMorjMPJmZj5XhvwSeAdYNYt09tB34VLYcAdZExEVLUMdW4LnM/OYSrPtHMvNLwIszmtv3sQPA9bPM+nbgUGa+mJkvAYeAbYOoLzO/kJlnyugRWt8bWhJzbL/FWMzfemPz1Vc+N24APt3r9S7WPJ8pfdkHV2owrAO+1TZ+gld/8P6oT/njOA1cMJDq2pRTWD8LPDLL5J+LiK9GxEMR8VODrYwEvhARR6P1GJKZFrONB+FG5v6DXMrtBzCcmSfL8LeB4Vn6LJft+Mu0jgBns9C+0E+3llNdd81xGmQ5bL9/CpzKzGfnmD7Q7TfjM6Uv++BKDYYVISKGgD8Gfi0zvz9j8mO0To/8NPB7wP8YcHk/n5mXAdcAt0TE2wa8/gVF60uQ7wT+aJbJS739Ktk6Zl+W935HxIeAM8Ddc3RZqn3hE8A/An4GOEnrdM1y9G7mP1oY2Pab7zOll/vgSg2GxTxa40d9ImIVsBr47kCqa63zDbTewLsz87Mzp2fm9zNzqgw/CLwhIi4cVH2ZOVl+vwB8jtYhe7vl8PiSa4DHMvPUzAlLvf2KU9On18rvF2bps6TbMSJ2Au8AbiofHK+yiH2hLzLzVGb+TWb+P+C/zbHepd5+q4B/AdwzV59Bbb85PlP6sg+u1GBYzKM1DgLTV9/fBXxxrj+MXivnJPcDz2Tm787R5+9PX/OIiCtovRcDCa6IOC8i3jQ9TOsi5ZMzuh0E3hstVwGn2w5ZB2XOf6kt5fZr076P7QDum6XP54GrI2JtOVVydWnru2j9p1i/AbwzM1+eo89i9oV+1dd+zeqfz7HepX6Mzi8CX8vME7NNHNT2m+czpT/7YD+vpPfzh9ZdM9+gdcfCh0rbb9P6IwB4I61TEBPAnwNvHmBtP0/rkO4J4PHycy3wfuD9pc+twFO07rI4AvyTAdb35rLer5Yaprdfe31B6z9Veg44BowM+P09j9YH/eq2tiXbfrQC6iTw17TO0d5M65rVYeBZ4M+A80vfEVr/O+H0vL9c9sMJ4H0DrG+C1rnl6X1w+i69fwA8ON++MKD6/nvZt56g9QF30cz6yvir/tYHUV9p/+T0PtfWdym231yfKX3ZB30khiSpslJPJUmS+sRgkCRVDAZJUsVgkCRVDAZJUsVgkCRVDAZJUuX/A7zU+O5qfyxrAAAAAElFTkSuQmCC\n",
- "text/plain": [
- ""
- ]
- },
- "metadata": {
- "needs_background": "light"
- },
- "output_type": "display_data"
- }
- ],
- "source": [
- "df_contracts[df_contracts.annual_salary < 20].annual_salary.hist(bins=25)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 13,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- ""
- ]
- },
- "execution_count": 13,
- "metadata": {},
- "output_type": "execute_result"
- },
- {
- "data": {
- "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAD4CAYAAAAAczaOAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAAATlElEQVR4nO3df6zd9X3f8edrEEjKzTCM1iKGzVRyKhG8ZuEWkNpu14kKhvwBUasIygDnh1xNMLUa3eK0q8JKM3lVaKb8KJojrJKF5Yq1SbDAKXKseizVWMCMYgxhOMHJuEP2UlNTJyyt0/f+OF9bp+xe3+vre+45Pp/nQzo63/P5fs73+32f7zmv8z3f7/d+b6oKSVIb/s6wF0CStHwMfUlqiKEvSQ0x9CWpIYa+JDXkzGEvwIlccMEFtXr16oFN//vf/z7nnHPOwKY/bONeH4x/jeNeH4x/jcOob/fu3d+rqh+fbdxIh/7q1at58sknBzb9Xbt2MTU1NbDpD9u41wfjX+O41wfjX+Mw6kvynbnGuXtHkhpi6EtSQwx9SWqIoS9JDTH0Jakhhr4kNcTQl6SGGPqS1BBDX5IaMtJ/kXu6Wr3pkaHNe//m9w5t3pJG37xb+kkuTvInSZ5LsjfJr3btdyWZSfJ0d7uu7zkfTbIvyQtJrulrX9+17UuyaTAlSZLmspAt/aPAnVX1VJK3AruT7OjGfbKqPtHfOcmlwI3AO4C3AV9L8vZu9GeBXwBeBp5Isq2qnluKQiRJ85s39KvqFeCVbvgvkzwPrDrBU64Hpqvqh8BLSfYBV3Tj9lXVtwGSTHd9DX1JWiY5mX+MnmQ18BhwGfAvgA3Aa8CT9H4NvJrkM8DjVfWF7jn3AV/tJrG+qj7ctd8CXFlVd7xhHhuBjQArV668fHp6etHFzefIkSNMTEws+XT3zBxe8mku1NpV5x4fHlR9o2Tcaxz3+mD8axxGfevWrdtdVZOzjVvwgdwkE8AfAb9WVa8luRe4G6ju/h7gg6e6sFW1BdgCMDk5WYO8JOmgLnm6YZgHcm+eOj487peshfGvcdzrg/GvcdTqW1DoJ3kTvcB/oKq+BFBVB/rGfw54uHs4A1zc9/SLujZO0C5JWgYLOXsnwH3A81X1e33tF/Z1ex/wbDe8DbgxydlJLgHWAN8AngDWJLkkyVn0DvZuW5oyJEkLsZAt/Z8FbgH2JHm6a/sN4KYk76S3e2c/8CsAVbU3yYP0DtAeBW6vqh8BJLkDeBQ4A9haVXuXrBJJ0rwWcvbO14HMMmr7CZ7zceDjs7RvP9HzJEmD5WUYJKkhhr4kNcTQl6SGGPqS1BBDX5IaYuhLUkMMfUlqiKEvSQ0x9CWpIYa+JDXE0Jekhhj6ktQQQ1+SGmLoS1JDDH1JaoihL0kNMfQlqSGGviQ1xNCXpIYY+pLUEENfkhpi6EtSQwx9SWqIoS9JDTH0Jakhhr4kNcTQl6SGGPqS1BBDX5IaYuhLUkMMfUlqiKEvSQ2ZN/STXJzkT5I8l2Rvkl/t2s9PsiPJi939eV17knwqyb4kzyR5V9+0buv6v5jktsGVJUmazZkL6HMUuLOqnkryVmB3kh3ABmBnVW1OsgnYBHwEuBZY092uBO4FrkxyPvAxYBKobjrbqurVpS5K7dgzc5gNmx4Zyrz3b37vUOYrnYp5t/Sr6pWqeqob/kvgeWAVcD1wf9ftfuCGbvh64PPV8ziwIsmFwDXAjqo61AX9DmD9UhYjSTqxVNXCOyergceAy4DvVtWKrj3Aq1W1IsnDwOaq+no3bie9XwBTwJur6ne69t8CXq+qT7xhHhuBjQArV668fHp6+lTqO6EjR44wMTGx5NPdM3N4yae5UGtXnXt8eFD1jZKDhw5z4PXhzLv/tR6UFtbhuNc4jPrWrVu3u6omZxu3kN07ACSZAP4I+LWqeq2X8z1VVUkW/u1xAlW1BdgCMDk5WVNTU0sx2Vnt2rWLQUx/WLsbAPbfPHV8eFD1jZJPP/AQ9+xZ8Nt4SfW/1oPSwjoc9xpHrb4Fnb2T5E30Av+BqvpS13yg221Dd3+wa58BLu57+kVd21ztkqRlspCzdwLcBzxfVb/XN2obcOwMnNuAh/rab+3O4rkKOFxVrwCPAlcnOa870+fqrk2StEwW8rv4Z4FbgD1Jnu7afgPYDDyY5EPAd4D3d+O2A9cB+4AfAB8AqKpDSe4Gnuj6/XZVHVqKIiRJCzNv6HcHZDPH6PfM0r+A2+eY1lZg68ksoCRp6fgXuZLUEENfkhpi6EtSQwx9SWqIoS9JDTH0Jakhhr4kNcTQl6SGGPqS1BBDX5IaYuhLUkMMfUlqyHD++4SkRVs9zH/S4/8FPu25pS9JDTH0Jakhhr4kNcTQl6SGGPqS1BBDX5IaYuhLUkMMfUlqiKEvSQ0x9CWpIYa+JDXE0Jekhhj6ktQQQ1+SGmLoS1JDDH1JaoihL0kNMfQlqSHzhn6SrUkOJnm2r+2uJDNJnu5u1/WN+2iSfUleSHJNX/v6rm1fkk1LX4okaT4L2dL/A2D9LO2frKp3drftAEkuBW4E3tE95/eTnJHkDOCzwLXApcBNXV9J0jKa9x+jV9VjSVYvcHrXA9NV9UPgpST7gCu6cfuq6tsASaa7vs+d/CJLkhYrVTV/p17oP1xVl3WP7wI2AK8BTwJ3VtWrST4DPF5VX+j63Qd8tZvM+qr6cNd+C3BlVd0xy7w2AhsBVq5cefn09PSp1HdCR44cYWJiYsmnu2fm8JJPc6HWrjr3+PCg6hslBw8d5sDrw5l3/2s9KLOtw1F5fy2VcX+fDqO+devW7a6qydnGzbulP4d7gbuB6u7vAT64yGn9LVW1BdgCMDk5WVNTU0sx2Vnt2rWLQUx/w6ZHlnyaC7X/5qnjw4Oqb5R8+oGHuGfPYt/Gp6b/tR6U2dbhqLy/lsq4v09Hrb5FfVqq6sCx4SSfAx7uHs4AF/d1vahr4wTtkqRlsqhTNpNc2PfwfcCxM3u2ATcmOTvJJcAa4BvAE8CaJJckOYvewd5ti19sSdJizLuln+SLwBRwQZKXgY8BU0neSW/3zn7gVwCqam+SB+kdoD0K3F5VP+qmcwfwKHAGsLWq9i51MZKkE1vI2Ts3zdJ83wn6fxz4+Czt24HtJ7V0kqQl5V/kSlJDDH1JaoihL0kNMfQlqSGGviQ1xNCXpIYY+pLUEENfkhpi6EtSQwx9SWqIoS9JDTH0Jakhhr4kNcTQl6SGGPqS1BBDX5IaYuhLUkMMfUlqiKEvSQ0x9CWpIfP+Y3RJOmb1pkeWfJp3rj3Khnmmu3/ze5d8vq1yS1+SGjLWW/rzbZUsZAtDksaJW/qS1BBDX5IaYuhLUkMMfUlqiKEvSQ0x9CWpIYa+JDXE0Jekhhj6ktSQeUM/ydYkB5M829d2fpIdSV7s7s/r2pPkU0n2JXkmybv6nnNb1//FJLcNphxJ0oksZEv/D4D1b2jbBOysqjXAzu4xwLXAmu62EbgXel8SwMeAK4ErgI8d+6KQJC2feUO/qh4DDr2h+Xrg/m74fuCGvvbPV8/jwIokFwLXADuq6lBVvQrs4P//IpEkDViqav5OyWrg4aq6rHv8F1W1ohsO8GpVrUjyMLC5qr7ejdsJfASYAt5cVb/Ttf8W8HpVfWKWeW2k9yuBlStXXj49Pb3o4vbMHD7h+JVvgQOvL3ryI2ntqnOPDx85coSJiYkhLs3gHTx0eGjrsP+1HpTZ1uF87+vTzUI+h8vxWg/KMD6H69at211Vk7ONO+WrbFZVJZn/m2Ph09sCbAGYnJysqampRU9rvito3rn2KPfsGa8Lje6/eer48K5duziV1+908OkHHhraOux/rQdltnU4bleGXcjncDle60EZtc/hYs/eOdDttqG7P9i1zwAX9/W7qGubq12StIwWG/rbgGNn4NwGPNTXfmt3Fs9VwOGqegV4FLg6yXndAdyruzZJ0jKa93dxki/S2yd/QZKX6Z2Fsxl4MMmHgO8A7++6bweuA/YBPwA+AFBVh5LcDTzR9fvtqnrjwWFJ0oDNG/pVddMco94zS98Cbp9jOluBrSe1dJKkJeVf5EpSQwx9SWqIoS9JDTH0Jakhhr4kNcTQl6SGGPqS1BBDX5IaYuhLUkMMfUlqiKEvSQ0x9CWpIYa+JDXE0Jekhhj6ktQQQ1+SGmLoS1JDDH1JaoihL0kNMfQlqSGGviQ1xNCXpIacOewFkKT5rN70yNDmvX/ze4c270FwS1+SGmLoS1JDDH1JaoihL0kNMfQlqSGGviQ1xNCXpIYY+pLUkFMK/ST7k+xJ8nSSJ7u285PsSPJid39e154kn0qyL8kzSd61FAVIkhZuKbb011XVO6tqsnu8CdhZVWuAnd1jgGuBNd1tI3DvEsxbknQSBrF753rg/m74fuCGvvbPV8/jwIokFw5g/pKkOaSqFv/k5CXgVaCA/1BVW5L8RVWt6MYHeLWqViR5GNhcVV/vxu0EPlJVT75hmhvp/RJg5cqVl09PTy96+fbMHD7h+JVvgQOvL3ryI2ntqnOPDx85coSJiYkhLs3gHTx0eGjrsP+1HpTZ1uF87+vTzah/Dk91PQ/jc7hu3brdfXtf/pZTveDaz1XVTJKfAHYk+Wb/yKqqJCf1rVJVW4AtAJOTkzU1NbXohdswz0Wa7lx7lHv2jNc15/bfPHV8eNeuXZzK63c6+PQDDw1tHfa/1oMy2zqc7319uhn1z+GprudR+xye0u6dqprp7g8CXwauAA4c223T3R/sus8AF/c9/aKuTZK0TBYd+knOSfLWY8PA1cCzwDbgtq7bbcBD3fA24NbuLJ6rgMNV9cqil1ySdNJO5TfVSuDLvd32nAn8p6r64yRPAA8m+RDwHeD9Xf/twHXAPuAHwAdOYd6SpEVYdOhX1beBn56l/c+B98zSXsDti52fJOnU+Re5ktQQQ1+SGmLoS1JDDH1JaoihL0kNMfQlqSGGviQ1xNCXpIYY+pLUEENfkhpi6EtSQwx9SWqIoS9JDTH0Jakhhr4kNcTQl6SGGPqS1BBDX5IaYuhLUkMMfUlqyKL/MbrUutWbHhn4PO5ce5QNyzAftcMtfUlqiKEvSQ0x9CWpIYa+JDXE0Jekhhj6ktQQQ1+SGmLoS1JDDH1JaoihL0kNMfQlqSHLHvpJ1id5Icm+JJuWe/6S1LJlDf0kZwCfBa4FLgVuSnLpci6DJLVsubf0rwD2VdW3q+qvgGng+mVeBklqVqpq+WaW/BKwvqo+3D2+Bbiyqu7o67MR2Ng9/CnghQEu0gXA9wY4/WEb9/pg/Gsc9/pg/GscRn3/oKp+fLYRI3c9/araAmxZjnklebKqJpdjXsMw7vXB+Nc47vXB+Nc4avUt9+6dGeDivscXdW2SpGWw3KH/BLAmySVJzgJuBLYt8zJIUrOWdfdOVR1NcgfwKHAGsLWq9i7nMrzBsuxGGqJxrw/Gv8Zxrw/Gv8aRqm9ZD+RKkobLv8iVpIYY+pLUkLEL/SRbkxxM8mxf211JZpI83d2u69rflOT+JHuSPJ/ko33PGcnLRcxWX9f+z5N8M8neJL/b1/7RroYXklzT137a15fkF5Ls7tbf7iTv7ut/ede+L8mnkmS5a5nLya7DbtzfT3Ikya/3tZ3267Br/4dJ/lvXvifJm7v2kVyHJ/keHb2MqaqxugH/GHgX8Gxf213Ar8/S95eB6W74x4D9wGp6B5m/BfwkcBbwZ8Clw67tBPWtA74GnN09/onu/tJu2c8GLulqOmOM6vtHwNu64cuAmb7nfAO4CgjwVeDaYde2mBr7xv8h8J+PvY/HaB2eCTwD/HT3+O8BZ4zyOjzJ+kYuY8ZuS7+qHgMOLbQ7cE6SM4G3AH8FvMYIXy5ijvr+GbC5qn7Y9TnYtV9P7w33w6p6CdhHr7axqK+q/kdV/e+uz17gLUnOTnIh8Her6vHqfdo+D9ywLAUswEmuQ5LcALxEr8ZjxmIdAlcDz1TVn3Xtf15VPxrldXiS9Y1cxoxd6J/AHUme6X6ande1/SHwfeAV4LvAJ6rqELAK+F99z325axtVbwd+Psl/T/JfkvxM1z5XHeNSX79fBJ7qPnSr6NV0zKjXB3PUmGQC+Ajwb97Qf1zW4duBSvJokqeS/Kuu/XRbh3PVN3IZM3KXYRiQe4G76X3r3g3cA3yQ3rftj4C3AecB/zXJ14a1kKfgTOB8ej+FfwZ4MMlPDneRltSs9XVbgCR5B/Dv6G01nq7mWod3AZ+sqiMjskt7seaq70zg57q2HwA7k+wGDg9rQRdprvpGLmOaCP2qOnBsOMnngIe7h78M/HFV/TVwMMmfApP0voFPp8tFvAx8qQvBbyT5G3oXeTrRZS/Gob7/k+Qi4MvArVX1ra7/DL2ajhn1+mDuGq8Efqk7MLgC+Jsk/xfYzXisw5eBx6rqewBJttPbX/4FTq91OFd9I5cxTeze6fYPHvM+4NhR9+8C7+76nEPvW/qbnH6Xi/gKvQNJJHk7vQND36O3zDd2+7kvAdbQOzg2FvUlWQE8Amyqqj891rmqXgFeS3JVd8bHrcBDy73QJ+krzFJjVf18Va2uqtXAvwf+bVV9hjFZh/T+On9tkh/r9nv/E+C503AdfoXZ6xu9jBnmUfBB3IAv0tt/9tf0vn0/BPxHYA+9swS2ARd2fSfonRGxF3gO+Jd907kO+J/0jrD/5rDrmqe+s+htGT0LPAW8u6//b3Y1vEDf2Q/jUB/wr+ntL32673bsrInJrv+3gM/Q/fX5KNxOdh32Pe8u+s5CG4d12PX/p91n8Fngd/vaR3IdnuR7dOQyxsswSFJDmti9I0nqMfQlqSGGviQ1xNCXpIYY+pLUEENfkhpi6EtSQ/4fkkQcB3xhV/gAAAAASUVORK5CYII=\n",
- "text/plain": [
- ""
- ]
- },
- "metadata": {
- "needs_background": "light"
- },
- "output_type": "display_data"
- }
- ],
- "source": [
- "df_contracts.startY.hist(bins=10)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 14,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "0.026105873821609893"
- ]
- },
- "execution_count": 14,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# proportion of female apprentices\n",
- "1-(df_contracts.a_gender.sum()/df_contracts.shape[0])"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 15,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "0.023723194861701047"
- ]
- },
- "execution_count": 15,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# proportion of female masters\n",
- "1-(df_contracts.m_gender.sum()/df_contracts.shape[0])"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 16,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "0.7310924369747899"
- ]
- },
- "execution_count": 16,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# prop female apprentices with male master\n",
- "df_contracts[(df_contracts.a_gender == 0) & (df_contracts.startY < 1800)].m_gender.sum()\\\n",
- " /df_contracts[(df_contracts.a_gender == 0) & (df_contracts.startY < 1800)].shape[0]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 17,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "0.9810528582193992"
- ]
- },
- "execution_count": 17,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# prop male apprentices with male master\n",
- "df_contracts[(df_contracts.a_gender == 1) & (df_contracts.startY < 1800)].m_gender.sum()\\\n",
- " /df_contracts[(df_contracts.a_gender == 1) & (df_contracts.startY < 1800)].shape[0]"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Looking at empirical distributions"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 18,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- ""
- ]
- },
- "execution_count": 18,
- "metadata": {},
- "output_type": "execute_result"
- },
- {
- "data": {
- "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAD4CAYAAAAAczaOAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAAAT5ElEQVR4nO3df4xd5Z3f8fenJslGsFnIQkdem9REdVJB2HWXEaHa7WrYdBMD0UKqVYpFA07SdaIFKVFdrcx2JdJESLTdbLZRtqycYEG0KQ4Km2AF0qyXZkQqlQScUMyPUAwxwq5jKyELOySiNfn2j3umc2PGM3Pnju+Yed4v6WrOec45z3nuF/yZM+ece26qCklSG/7ecg9AkjQ6hr4kNcTQl6SGGPqS1BBDX5IacspyD2A+Z555Zq1bt27g7V588UVOPfXUpR/Qq4x16LEOM6xFz0quw549e35YVWfNtuykD/1169bx4IMPDrzd5OQkExMTSz+gVxnr0GMdZliLnpVchyTPHG+Zp3ckqSGGviQ1xNCXpIYY+pLUEENfkhpi6EtSQwx9SWqIoS9JDTH0JakhJ/0ncpfLum13z7l8/02XjWgkkrR0PNKXpIYY+pLUEENfkhpi6EtSQwx9SWqIoS9JDZk39JPsSHIkySN9bV9M8lD32p/koa59XZKf9i37i75tLkiyN8m+JJ9OkhPyjiRJx7WQ+/RvBT4DfH66oar+xfR0kk8Cz/et/1RVbZiln5uB3we+BdwDbAS+NvCIJUmLNu+RflXdBzw327LuaP29wO1z9ZFkNfCGqrq/qoreL5ArBh6tJGkow34i958Ch6vqyb62c5J8F3gB+OOq+iawBjjQt86Brm1WSbYAWwDGxsaYnJwceGBTU1OL2m7a1vOPzrl8mL5Hadg6rBTWYYa16Gm1DsOG/iZ+/ij/EPCmqvpRkguAryQ5b9BOq2o7sB1gfHy8FvPlxcN+6fHm+R7DcNXi+x6llfzlz4OwDjOsRU+rdVh06Cc5BfjnwAXTbVX1EvBSN70nyVPAW4CDwNq+zdd2bZKkERrmls1/Bnyvqv7/aZskZyVZ1U2/GVgPPF1Vh4AXklzUXQe4GrhriH1LkhZhIbds3g78D+CtSQ4k+WC36EpeeQH3t4CHu1s4vwR8uKqmLwL/AfA5YB/wFN65I0kjN+/pnaradJz2zbO03QnceZz1HwTeNuD4JElLyE/kSlJDDH1JaoihL0kNMfQlqSGGviQ1xNCXpIYY+pLUEENfkhpi6EtSQwx9SWqIoS9JDTH0Jakhhr4kNcTQl6SGGPqS1BBDX5IaYuhLUkMMfUlqiKEvSQ1ZyBej70hyJMkjfW0fS3IwyUPd69K+Zdcn2ZfkiSTv6mvf2LXtS7Jt6d+KJGk+CznSvxXYOEv7p6pqQ/e6ByDJucCVwHndNv85yaokq4A/By4BzgU2detKkkbolPlWqKr7kqxbYH+XAzur6iXg+0n2ARd2y/ZV1dMASXZ26z42+JAlSYs1b+jP4bokVwMPAlur6sfAGuD+vnUOdG0Azx7T/vbjdZxkC7AFYGxsjMnJyYEHNzU1tajtpm09/+icy4fpe5SGrcNKYR1mWIueVuuw2NC/GfgEUN3PTwIfWKpBVdV2YDvA+Ph4TUxMDNzH5OQki9lu2uZtd8+5fP9Vi+97lIatw0phHWZYi55W67Co0K+qw9PTST4LfLWbPQic3bfq2q6NOdolSSOyqFs2k6zum30PMH1nzy7gyiSvS3IOsB74NvAAsD7JOUleS+9i767FD1uStBjzHuknuR2YAM5McgC4AZhIsoHe6Z39wIcAqurRJHfQu0B7FLi2ql7u+rkO+DqwCthRVY8u9ZuRJM1tIXfvbJql+ZY51r8RuHGW9nuAewYanSRpSfmJXElqiKEvSQ0x9CWpIYa+JDXE0Jekhhj6ktQQQ1+SGmLoS1JDDH1JaoihL0kNMfQlqSGGviQ1xNCXpIYY+pLUEENfkhpi6EtSQwx9SWqIoS9JDZn36xI1u3Xb7j7usv03XTbCkUjSws17pJ9kR5IjSR7pa/uPSb6X5OEkX05yete+LslPkzzUvf6ib5sLkuxNsi/Jp5PkhLwjSdJxLeT0zq3AxmPadgNvq6pfBf4XcH3fsqeqakP3+nBf+83A7wPru9exfUqSTrB5Q7+q7gOeO6btr6vqaDd7P7B2rj6SrAbeUFX3V1UBnweuWNSIJUmLthTn9D8AfLFv/pwk3wVeAP64qr4JrAEO9K1zoGubVZItwBaAsbExJicnBx7U1NTUorabtvX8o/OvdBzD7HepDVuHlcI6zLAWPa3WYajQT/JvgaPAF7qmQ8CbqupHSS4AvpLkvEH7rartwHaA8fHxmpiYGHhsk5OTLGa7aZvnuFA7n/1XLX6/S23YOqwU1mGGtehptQ6LDv0km4F3A+/oTtlQVS8BL3XTe5I8BbwFOMjPnwJa27VJkkZoUffpJ9kI/CHwu1X1k772s5Ks6qbfTO+C7dNVdQh4IclF3V07VwN3DT16SdJA5j3ST3I7MAGcmeQAcAO9u3VeB+zu7ry8v7tT57eAjyf5v8DPgA9X1fRF4D+gdyfQ64GvdS9J0gjNG/pVtWmW5luOs+6dwJ3HWfYg8LaBRidJWlI+hkGSGmLoS1JDDH1JaoihL0kNMfQlqSGGviQ1xNCXpIYY+pLUEENfkhpi6EtSQ5r9jty5vuNWklYqj/QlqSGGviQ1pNnTOyfSfKeO9t902YhGIkk/zyN9SWqIoS9JDTH0Jakhhr4kNcTQl6SGLCj0k+xIciTJI31tb0yyO8mT3c8zuvYk+XSSfUkeTvLrfdtc063/ZJJrlv7tSJLmstAj/VuBjce0bQPurar1wL3dPMAlwPrutQW4GXq/JIAbgLcDFwI3TP+ikCSNxoJCv6ruA547pvly4LZu+jbgir72z1fP/cDpSVYD7wJ2V9VzVfVjYDev/EUiSTqBhvlw1lhVHeqmfwCMddNrgGf71jvQtR2v/RWSbKH3VwJjY2NMTk4OPLipqak5t9t6/tGB+1wqi3k/izVfHVphHWZYi55W67Akn8itqkpSS9FX1992YDvA+Ph4TUxMDNzH5OQkc223eTkfuLb3xeMuWupP685Xh1ZYhxnWoqfVOgxz987h7rQN3c8jXftB4Oy+9dZ2bcdrlySNyDChvwuYvgPnGuCuvvaru7t4LgKe704DfR14Z5Izugu47+zaJEkjsqDTO0luByaAM5McoHcXzk3AHUk+CDwDvLdb/R7gUmAf8BPg/QBV9VySTwAPdOt9vKqOvTgsSTqBFhT6VbXpOIveMcu6BVx7nH52ADsWPDpJ0pLyE7mS1BBDX5IaYuhLUkMMfUlqiKEvSQ0x9CWpIYa+JDXE0Jekhhj6ktQQQ1+SGmLoS1JDDH1JaoihL0kNMfQlqSGGviQ1xNCXpIYY+pLUEENfkhpi6EtSQxYd+knemuShvtcLST6a5GNJDva1X9q3zfVJ9iV5Ism7luYtSJIWakFfjD6bqnoC2ACQZBVwEPgy8H7gU1X1J/3rJzkXuBI4D/gV4G+SvKWqXl7sGCRJg1mq0zvvAJ6qqmfmWOdyYGdVvVRV3wf2ARcu0f4lSQuwVKF/JXB73/x1SR5OsiPJGV3bGuDZvnUOdG2SpBFJVQ3XQfJa4H8D51XV4SRjwA+BAj4BrK6qDyT5DHB/Vf1lt90twNeq6kuz9LkF2AIwNjZ2wc6dOwce19TUFKeddtpxl+89+PzAfY7C+Wt+aUn7m68OrbAOM6xFz0quw8UXX7ynqsZnW7boc/p9LgG+U1WHAaZ/AiT5LPDVbvYgcHbfdmu7tleoqu3AdoDx8fGamJgYeFCTk5PMtd3mbXcP3Oco7L9qYkn7m68OrbAOM6xFT6t1WIrTO5voO7WTZHXfsvcAj3TTu4Ark7wuyTnAeuDbS7B/SdICDXWkn+RU4HeAD/U1/4ckG+id3tk/vayqHk1yB/AYcBS41jt3JGm0hgr9qnoR+OVj2t43x/o3AjcOs09J0uL5iVxJaoihL0kNMfQlqSGGviQ1xNCXpIYY+pLUEENfkhpi6EtSQwx9SWqIoS9JDTH0Jakhhr4kNcTQl6SGGPqS1BBDX5IaYuhLUkMMfUlqiKEvSQ0x9CWpIUOHfpL9SfYmeSjJg13bG5PsTvJk9/OMrj1JPp1kX5KHk/z6sPuXJC3cUh3pX1xVG6pqvJvfBtxbVeuBe7t5gEuA9d1rC3DzEu1fkrQAJ+r0zuXAbd30bcAVfe2fr577gdOTrD5BY5AkHWMpQr+Av06yJ8mWrm2sqg510z8AxrrpNcCzfdse6NokSSNwyhL08ZtVdTDJ3wd2J/le/8KqqiQ1SIfdL48tAGNjY0xOTg48qKmpqTm323r+0YH7HIXFvNe5zFeHVliHGdaip9U6DB36VXWw+3kkyZeBC4HDSVZX1aHu9M2RbvWDwNl9m6/t2o7tczuwHWB8fLwmJiYGHtfk5CRzbbd5290D9zkK+6+aWNL+5qtDK6zDDGvR02odhjq9k+TUJL84PQ28E3gE2AVc0612DXBXN70LuLq7i+ci4Pm+00CSpBNs2CP9MeDLSab7+i9V9V+TPADckeSDwDPAe7v17wEuBfYBPwHeP+T+JUkDGCr0q+pp4Ndmaf8R8I5Z2gu4dph9SpIWz0/kSlJDDH1JaoihL0kNMfQlqSGGviQ1xNCXpIYY+pLUEENfkhpi6EtSQwx9SWrIUjxaWUto3TxP/9x/02UjGomklcgjfUlqiKEvSQ0x9CWpIYa+JDXE0Jekhhj6ktQQQ1+SGmLoS1JDDH1JasiiQz/J2Um+keSxJI8m+UjX/rEkB5M81L0u7dvm+iT7kjyR5F1L8QYkSQs3zGMYjgJbq+o7SX4R2JNkd7fsU1X1J/0rJzkXuBI4D/gV4G+SvKWqXh5iDJKkASz6SL+qDlXVd7rpvwMeB9bMscnlwM6qeqmqvg/sAy5c7P4lSYNLVQ3fSbIOuA94G/Cvgc3AC8CD9P4a+HGSzwD3V9VfdtvcAnytqr40S39bgC0AY2NjF+zcuXPgMU1NTXHaaacdd/neg88P3OfJ4Pw1vzTn8mPf19jr4fBPF7btSjbf/w8tsRY9K7kOF1988Z6qGp9t2dBP2UxyGnAn8NGqeiHJzcAngOp+fhL4wCB9VtV2YDvA+Ph4TUxMDDyuyclJ5tpu8zxPszxZ7b9qYs7lx76vrecf5ZN7T1nQtivZfP8/tMRa9LRah6Hu3knyGnqB/4Wq+iuAqjpcVS9X1c+AzzJzCucgcHbf5mu7NknSiAxz906AW4DHq+pP+9pX9632HuCRbnoXcGWS1yU5B1gPfHux+5ckDW6Y0zu/AbwP2Jvkoa7tj4BNSTbQO72zH/gQQFU9muQO4DF6d/5c6507kjRaiw79qvrvQGZZdM8c29wI3LjYfUqShuMnciWpIYa+JDXE0Jekhhj6ktQQQ1+SGmLoS1JDhn4Mg1aOdXM8mmL/TZeNcCSSTpQVHfpzhdir1Up8T5JGx9M7ktQQQ1+SGmLoS1JDVvQ5fY3OfNcavBAsnRw80pekhnikrxVt+i+QrecfnfXb0vwLRK3xSF+SGmLoS1JDDH1Jaojn9KXG7D34/KzXN8BrHC0w9DUSLT7Xp8X3fLKa7b/F9MX91v5bjDz0k2wE/hOwCvhcVd006jG0apjn9iznM39O1s8A+BwkvRqNNPSTrAL+HPgd4ADwQJJdVfXYKMehk8vJGp4n67ikYYz6SP9CYF9VPQ2QZCdwOWDoa9FejX/BnMj9DvOXzzDjmm+/J2vfy2k5TgGmqk5Ix7PuLPk9YGNV/atu/n3A26vqumPW2wJs6WbfCjyxiN2dCfxwiOGuFNahxzrMsBY9K7kO/6CqzpptwUl5IbeqtgPbh+kjyYNVNb5EQ3rVsg491mGGtehptQ6jvk//IHB23/zark2SNAKjDv0HgPVJzknyWuBKYNeIxyBJzRrp6Z2qOprkOuDr9G7Z3FFVj56g3Q11emgFsQ491mGGtehpsg4jvZArSVpePntHkhpi6EtSQ1Zc6CfZmOSJJPuSbFvu8YxSkh1JjiR5pK/tjUl2J3my+3nGco5xFJKcneQbSR5L8miSj3TtTdUiyS8k+XaS/9nV4d917eck+Vb3b+SL3U0VTUiyKsl3k3y1m2+uFisq9Pse83AJcC6wKcm5yzuqkboV2HhM2zbg3qpaD9zbza90R4GtVXUucBFwbff/QWu1eAn47ar6NWADsDHJRcC/Bz5VVf8Q+DHwweUb4sh9BHi8b765Wqyo0KfvMQ9V9X+A6cc8NKGq7gOeO6b5cuC2bvo24IpRjmk5VNWhqvpON/139P6Rr6GxWlTPVDf7mu5VwG8DX+raV3wdpiVZC1wGfK6bDw3WYqWF/hrg2b75A11by8aq6lA3/QNgbDkHM2pJ1gH/GPgWDdaiO53xEHAE2A08BfxtVR3tVmnp38ifAX8I/Kyb/2UarMVKC33NoXr35zZzj26S04A7gY9W1Qv9y1qpRVW9XFUb6H36/ULgHy3viJZHkncDR6pqz3KPZbmdlM/eGYKPeXilw0lWV9WhJKvpHfGteEleQy/wv1BVf9U1N1kLgKr62yTfAP4JcHqSU7oj3Fb+jfwG8LtJLgV+AXgDve/1aK4WK+1I38c8vNIu4Jpu+hrgrmUcy0h052pvAR6vqj/tW9RULZKcleT0bvr19L7H4nHgG8Dvdaut+DoAVNX1VbW2qtbRy4X/VlVX0WAtVtwncrvf5H/GzGMeblzeEY1OktuBCXqPjD0M3AB8BbgDeBPwDPDeqjr2Yu+KkuQ3gW8Ce5k5f/tH9M7rN1OLJL9K7+LkKnoHeHdU1ceTvJneTQ5vBL4L/Muqemn5RjpaSSaAf1NV726xFisu9CVJx7fSTu9IkuZg6EtSQwx9SWqIoS9JDTH0Jakhhr4kNcTQl6SG/D/CpIrwm9HJbAAAAABJRU5ErkJggg==\n",
- "text/plain": [
- ""
- ]
- },
- "metadata": {
- "needs_background": "light"
- },
- "output_type": "display_data"
- }
- ],
- "source": [
- "df_contracts[df_contracts.annual_salary < 50].annual_salary.hist(bins=40)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 19,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- ""
- ]
- },
- "execution_count": 19,
- "metadata": {},
- "output_type": "execute_result"
- },
- {
- "data": {
- "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX8AAAD4CAYAAAAEhuazAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAAAQoUlEQVR4nO3df4xlZX3H8fenoNWwhoVgJwRohzabNpRtKUyApqaZrSku8AeYGCKhuljN+gckmvIHWxMD9UeyadS2tpZ2LRsxVaekatkALd1snFD+QNm1yPKjlq0uLZPtbuzC6qixWf32j3u2nZ3O751f9z7vVzK55z7nuec+35ydzz3znHPPpqqQJLXlp9Z6AJKk1Wf4S1KDDH9JapDhL0kNMvwlqUFnr/UA5nLBBRfU8PDwaW3f//73Oeecc9ZmQCvIuvrPoNY2qHXB4NY2va4DBw58p6reONdr1nX4Dw8Ps3///tPaxsfHGR0dXZsBrSDr6j+DWtug1gWDW9v0upK8NN9rnPaRpAYZ/pLUIMNfkhpk+EtSgwx/SWqQ4S9JDTL8JalBhr8kNcjwl6QGretv+EorYXjHI4vqf3jnjSs0EmnteOQvSQ0y/CWpQYa/JDXI8JekBhn+ktQgw1+SGmT4S1KDDH9JapDhL0kNMvwlqUGGvyQ1yPCXpAYZ/pLUIMNfkhpk+EtSg+YN/ySXJPlKkueTPJfkfV37+Un2Jnmxezyva0+STyY5lOSZJFdO2da2rv+LSbatXFmSpLks5Mj/JHBXVV0GXAvckeQyYAewr6o2Afu65wDXA5u6n+3AfdD7sADuAa4BrgbuOfWBIUlaXfOGf1Udqaqvd8vfA14ALgJuAh7ouj0A3Nwt3wR8tnqeBDYmuRB4C7C3qo5X1SvAXmDrchYjSVqYVNXCOyfDwOPA5cC/V9XGrj3AK1W1McnDwM6qeqJbtw+4GxgFXldVH+naPwj8sKo+Nu09ttP7i4GhoaGrxsbGThvD5OQkGzZsWHSh6511rZ6DEycW1X/zRefO2L4ea1sOg1oXDG5t0+vasmXLgaoames1C/4/fJNsAL4IvL+qvtvL+56qqiQL/xSZQ1XtAnYBjIyM1Ojo6Gnrx8fHmd42CKxr9dy+2P/D97bRGdvXY23LYVDrgsGtbSl1LehqnySvoRf8n6uqL3XNR7vpHLrHY137BHDJlJdf3LXN1i5JWmULudonwP3AC1X1iSmr9gCnrtjZBjw0pf2d3VU/1wInquoI8BhwXZLzuhO913VtkqRVtpBpn98A3gEcTPJ01/YBYCfwYJJ3Ay8Bt3TrHgVuAA4BPwDeBVBVx5N8GHiq6/ehqjq+HEVIkhZn3vDvTtxmltVvnqF/AXfMsq3dwO7FDFCStPz8hq8kNcjwl6QGGf6S1CDDX5IaZPhLUoMMf0lqkOEvSQ0y/CWpQYa/JDXI8JekBhn+ktQgw1+SGmT4S1KDDH9JapDhL0kNMvwlqUGGvyQ1yPCXpAYZ/pLUIMNfkhpk+EtSgwx/SWqQ4S9JDTL8JalBhr8kNcjwl6QGGf6S1CDDX5IaZPhLUoMMf0lqkOEvSQ0y/CWpQYa/JDXI8JekBhn+ktQgw1+SGmT4S1KDDH9JatC84Z9kd5JjSZ6d0nZvkokkT3c/N0xZ9/tJDiX5ZpK3TGnf2rUdSrJj+UuRJC3UQo78PwNsnaH9j6rqiu7nUYAklwFvB365e82fJzkryVnAp4DrgcuAW7u+kqQ1cPZ8Harq8STDC9zeTcBYVf0I+HaSQ8DV3bpDVfUtgCRjXd/nFz9kSdKZSlXN36kX/g9X1eXd83uB24HvAvuBu6rqlSR/BjxZVX/d9bsf+PtuM1ur6j1d+zuAa6rqzhneazuwHWBoaOiqsbGx09ZPTk6yYcOGRRe63lnX6jk4cWJR/TdfdO6M7euxtuUwqHXB4NY2va4tW7YcqKqRuV4z75H/LO4DPgxU9/hx4HeXuK3TVNUuYBfAyMhIjY6OnrZ+fHyc6W2DwLpWz+07HllU/8O3jc7Yvh5rWw6DWhcMbm1LqWtJ4V9VR08tJ/k08HD3dAK4ZErXi7s25miXJK2yJV3qmeTCKU/fCpy6EmgP8PYkP53kUmAT8DXgKWBTkkuTvJbeSeE9Sx+2JOlMzHvkn+QLwChwQZKXgXuA0SRX0Jv2OQy8F6CqnkvyIL0TuSeBO6rqx9127gQeA84CdlfVc8tdjCRpYRZytc+tMzTfP0f/jwIfnaH9UeDRRY1OkrQi/IavJDXI8JekBhn+ktQgw1+SGmT4S1KDDH9JapDhL0kNMvwlqUGGvyQ1yPCXpAYZ/pLUIMNfkhpk+EtSgwx/SWqQ4S9JDTL8JalBhr8kNWhJ/4G7tFKGdzyyqP6Hd964QiORBptH/pLUIMNfkhrktI/62mKniST1eOQvSQ0y/CWpQYa/JDXI8JekBhn+ktQgw1+SGmT4S1KDDH9JapDhL0kNMvwlqUGGvyQ1yPCXpAYZ/pLUIMNfkhpk+EtSgwx/SWqQ4S9JDZo3/JPsTnIsybNT2s5PsjfJi93jeV17knwyyaEkzyS5csprtnX9X0yybWXKkSQtxEKO/D8DbJ3WtgPYV1WbgH3dc4DrgU3dz3bgPuh9WAD3ANcAVwP3nPrAkCStvnnDv6oeB45Pa74JeKBbfgC4eUr7Z6vnSWBjkguBtwB7q+p4Vb0C7OX/f6BIklZJqmr+Tskw8HBVXd49f7WqNnbLAV6pqo1JHgZ2VtUT3bp9wN3AKPC6qvpI1/5B4IdV9bEZ3ms7vb8aGBoaumpsbOy09ZOTk2zYsGFJxa5n1tVzcOLECo5maTZfdO6M7e6z/jOotU2va8uWLQeqamSu15x9pm9aVZVk/k+QhW9vF7ALYGRkpEZHR09bPz4+zvS2QWBdPbfveGTlBrNEh28bnbHdfdZ/BrW2pdS11PA/muTCqjrSTesc69ongEum9Lu4a5ugd/Q/tX18ie8trarhWT6Q7tp8csYPq8M7b1zpIUlnbKmXeu4BTl2xsw14aEr7O7urfq4FTlTVEeAx4Lok53Uneq/r2iRJa2DeI/8kX6B31H5BkpfpXbWzE3gwybuBl4Bbuu6PAjcAh4AfAO8CqKrjST4MPNX1+1BVTT+JLElaJfOGf1XdOsuqN8/Qt4A7ZtnObmD3okYnSVoRZ3zCV9LpZjtHMBvPEWgteHsHSWqQ4S9JDTL8JalBzvlrRR2cOLEuv7gltc4jf0lqkOEvSQ0y/CWpQYa/JDXI8JekBhn+ktQgw1+SGmT4S1KDDH9JapDhL0kNMvwlqUGGvyQ1yPCXpAYZ/pLUIMNfkhpk+EtSgwx/SWqQ4S9JDTL8JalBhr8kNcjwl6QGGf6S1CDDX5IadPZaD0DS4gzveGRR/Q/vvHGFRqJ+5pG/JDXI8JekBhn+ktQgw1+SGmT4S1KDDH9JapDhL0kNMvwlqUGGvyQ16IzCP8nhJAeTPJ1kf9d2fpK9SV7sHs/r2pPkk0kOJXkmyZXLUYAkafGW48h/S1VdUVUj3fMdwL6q2gTs654DXA9s6n62A/ctw3tLkpZgJaZ9bgIe6JYfAG6e0v7Z6nkS2JjkwhV4f0nSPFJVS39x8m3gFaCAv6yqXUleraqN3foAr1TVxiQPAzur6olu3T7g7qraP22b2+n9ZcDQ0NBVY2Njp73n5OQkGzZsWPKY16tBrevY8RMc/eFaj2JlDL2eZalt80XnLqr/wYkTK7r9Qf23CINb2/S6tmzZcmDKbMyMzvSunm+qqokkPwPsTfIvU1dWVSVZ1KdLVe0CdgGMjIzU6OjoaevHx8eZ3jYIBrWuP/3cQ3z84GDePPauzSeXpbbDt40uqv/ti72r5yK3P6j/FmFwa1tKXWc07VNVE93jMeDLwNXA0VPTOd3jsa77BHDJlJdf3LVJklbZksM/yTlJ3nBqGbgOeBbYA2zrum0DHuqW9wDv7K76uRY4UVVHljxySdKSncnfrEPAl3vT+pwNfL6q/iHJU8CDSd4NvATc0vV/FLgBOAT8AHjXGby3JOkMLDn8q+pbwK/O0P5fwJtnaC/gjqW+nyRp+fgNX0lqkOEvSQ0y/CWpQYa/JDXI8JekBhn+ktSgwfzevdRHhhd5u4aV3v5dm08yujJD0Trikb8kNcjwl6QGGf6S1CDDX5IaZPhLUoMMf0lqkOEvSQ0y/CWpQYa/JDXI8JekBhn+ktQgw1+SGmT4S1KDDH9JapDhL0kNMvwlqUGGvyQ1yPCXpAYZ/pLUIMNfkhpk+EtSgwx/SWrQ2Ws9APWX4R2PLKr/XZtXaCCSzohH/pLUIMNfkhpk+EtSgwx/SWqQ4S9JDfJqn8Yt9uodSYPB8Jd0xhZ7EHF4540rNBItlOEvad3zw2X5rXr4J9kK/AlwFvBXVbVztccgaW5OBw6+VQ3/JGcBnwJ+G3gZeCrJnqp6fjXH0S/8BZS0Ulb7yP9q4FBVfQsgyRhwE2D4Sw1Z6QOb2bZ/1+aT3L5M773YqaX1NnWVqlrRNzjtzZK3AVur6j3d83cA11TVnVP6bAe2d09/EfjmtM1cAHxnFYa72qyr/wxqbYNaFwxubdPr+rmqeuNcL1h3J3yrahewa7b1SfZX1cgqDmlVWFf/GdTaBrUuGNzallLXan/JawK4ZMrzi7s2SdIqWu3wfwrYlOTSJK8F3g7sWeUxSFLzVnXap6pOJrkTeIzepZ67q+q5RW5m1imhPmdd/WdQaxvUumBwa1t0Xat6wleStD54YzdJapDhL0kN6pvwT7I1yTeTHEqyY63Hs5ySHE5yMMnTSfav9XiWKsnuJMeSPDul7fwke5O82D2et5ZjXKpZars3yUS3355OcsNajnEpklyS5CtJnk/yXJL3de19vd/mqKuv91mS1yX5WpJvdHX9Qdd+aZKvdvn4N90FNXNvqx/m/LvbQvwrU24LAdw6KLeFSHIYGKmqvv7ySZLfBCaBz1bV5V3bHwLHq2pn96F9XlXdvZbjXIpZarsXmKyqj63l2M5EkguBC6vq60neABwAbgZup4/32xx13UIf77MkAc6pqskkrwGeAN4H/B7wpaoaS/IXwDeq6r65ttUvR/7/e1uIqvpv4NRtIbSOVNXjwPFpzTcBD3TLD9D7Bew7s9TW96rqSFV9vVv+HvACcBF9vt/mqKuvVc9k9/Q13U8BvwX8bde+oP3VL+F/EfAfU56/zADsyCkK+MckB7rbWwySoao60i3/JzC0loNZAXcmeaabFuqrqZHpkgwDvwZ8lQHab9Pqgj7fZ0nOSvI0cAzYC/wb8GpVney6LCgf+yX8B92bqupK4Hrgjm6KYeBUb45x/c8zLtx9wC8AVwBHgI+v6WjOQJINwBeB91fVd6eu6+f9NkNdfb/PqurHVXUFvTskXA380lK20y/hP9C3haiqie7xGPBlejt0UBzt5l9PzcMeW+PxLJuqOtr9Iv4E+DR9ut+6ueMvAp+rqi91zX2/32aqa1D2GUBVvQp8Bfh1YGOSU1/aXVA+9kv4D+xtIZKc052QIsk5wHXAs3O/qq/sAbZ1y9uAh9ZwLMvqVDh23kof7rfuBOL9wAtV9Ykpq/p6v81WV7/vsyRvTLKxW349vYtgXqD3IfC2rtuC9ldfXO0D0F2S9cf8320hPrq2I1oeSX6e3tE+9G638fl+rS3JF4BRereXPQrcA/wd8CDws8BLwC1V1XcnTmepbZTe9EEBh4H3Tpkn7wtJ3gT8E3AQ+EnX/AF68+N9u9/mqOtW+nifJfkVeid0z6J38P5gVX2oy5Ex4Hzgn4Hfqaofzbmtfgl/SdLy6ZdpH0nSMjL8JalBhr8kNcjwl6QGGf6S1CDDX5IaZPhLUoP+BweMU0oHLqrWAAAAAElFTkSuQmCC\n",
- "text/plain": [
- ""
- ]
- },
- "metadata": {
- "needs_background": "light"
- },
- "output_type": "display_data"
- }
- ],
- "source": [
- "df_contracts[df_contracts.a_age < 30].a_age.hist(bins=25)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Two very important distributions"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Normal\n",
- "\n",
- "Also known as Gaussian, is a bell-shaped distribution with mass around the mean and exponentially decaying on the sides. It is fully characterized by the mean (center of mass) and standard deviation (spread).\n",
- "\n",
- "https://en.wikipedia.org/wiki/Normal_distribution"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 20,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- ""
- ]
- },
- "execution_count": 20,
- "metadata": {},
- "output_type": "execute_result"
- },
- {
- "data": {
- "image/png": "iVBORw0KGgoAAAANSUhEUgAAAWAAAAFgCAYAAACFYaNMAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAAAYbUlEQVR4nO3df5Bd5X3f8fcXCbQYy15BVM1GKwpUMmknLj+6i3+uJ0F1BohraMfBpKlNPKT6A+yxSusEp3+knelk7Kknlp1GZDRALBoCIRiD4lDHmB+2aY2zQmYBAx024A2rSkhgix82wgi+/eMera6urqQrac8+d+99v2bu3HOee+7dL/7x4ZznPOd5IjORJM2940oXIEn9ygCWpEIMYEkqxACWpEIMYEkqxACWpEJqDeCIGIyI2yLiyYh4IiLeExEnR8TdEfFU9b6kOjYi4ssRMRkRj0TEuXXWJkml1X0G/CXgG5n5S8BZwBPANcA9mbkKuKfaB7gQWFW91gDXHu7HL7jgggR8+fLlq9tfbdUWwBHxduADwPUAmfnzzNwFXAxsrA7bCFxSbV8M3JgNDwKDETF0qL/x/PPP11C5JM2NOs+ATwd2An8WET+IiOsi4iRgWWZuq47ZDiyrtpcDzzZ9f7pq209ErImIzRGxeefOnTWWL0n1qjOAFwLnAtdm5jnAT9nX3QBANp6DPujpeTuZuSEzRzJzZOnSpbNWrCTNtToDeBqYzszvV/u30Qjk5/Z2LVTvO6rPtwIrmr4/XLVJUk+qLYAzczvwbEScWTWtBh4HNgGXV22XA3dW25uAj1ejId4NvNjUVSFJPWdhzb//KeCmiDgBeBr4BI3QvzUirgCmgEurY+8CLgImgZ9Vx0pSz6o1gDPzYWCkzUer2xybwFV11iNJ3cQn4SSpEANYkgoxgCWpEANYkgoxgCWpEANYkgoxgCWpkLofxJB6wu7duxkfHz+gfXR0lIGBgQIVqRcYwFIHxsfHWbv+DgaHV8607ZqeZN2VMDY2VrAyzWcGsNShweGVLF11duky1EPsA5akQgxgSSrEAJakQgxgSSrEAJakQgxgSSrEAJakQgxgSSrEAJakQgxgSSrEAJakQgxgSSrEAJakQgxgSSrEAJakQgxgSSrEAJakQgxgSSrEAJakQgxgSSrEAJakQgxgSSrEZemlNnbv3s34+PjM/sTEBG++6fmKZpcBLLUxPj7O2vV3MDi8EoDpLd9l8B2jhatSrzGApYMYHF7J0lVnA7BrerJsMepJXlNJUiEGsCQVYgBLUiEGsCQVYgBLUiEGsCQVYgBLUiEGsCQVYgBLUiG1BnBE/CgiHo2IhyNic9V2ckTcHRFPVe9LqvaIiC9HxGREPBIR59ZZmySVNhdnwL+amWdn5ki1fw1wT2auAu6p9gEuBFZVrzXAtXNQmyQVU6IL4mJgY7W9Ebikqf3GbHgQGIyIoQL1SdKcqDuAE/hmRDwUEWuqtmWZua3a3g4sq7aXA882fXe6attPRKyJiM0RsXnnzp111S1Jtat7NrT3Z+bWiPhHwN0R8WTzh5mZEZFH8oOZuQHYADAyMnJE35WkblLrGXBmbq3edwBfA84DntvbtVC976gO3wqsaPr6cNUmST2ptgCOiJMiYvHebeDXgMeATcDl1WGXA3dW25uAj1ejId4NvNjUVSFJPafOLohlwNciYu/f+YvM/EZEjAO3RsQVwBRwaXX8XcBFwCTwM+ATNdYmScXVFsCZ+TRwVpv2F4DVbdoTuKqueiQ4cK23vUZHRxkYGChQkfqZSxKpr7Su9QaN5YbWXQljY2MFK1M/MoDVd5rXepNKci4ISSrEAJakQgxgSSrEAJakQgxgSSrEAJakQgxgSSrEAJakQgxgSSrEAJakQgxgSSrEuSDU997Y8zoTExP7tU1MTPDmm56fqF4GsPrey9unWP/MqwxNLZhpm97yXQbfMVqwKvUDA1gCFg+dsd8MabumJ8sVo75hAEtHqV3XhRO760gYwNJRau26cGJ3HSkDWDoGrV0X0pHwNq8kFWIAS1IhdkFIs6TdTbnXXnsNgEWLFu3X7s06gQEszZr244nvZ8HiUxha9c6ZNm/WaS8DWJpF7cYTLxwc8kad2rIPWJIKMYAlqRADWJIKMYAlqRBvwqln7N69m/Hx8QPaHfKlbmUAq2eMj4+zdv0dDA6vnGlzyJe6mQGsnjI4vNIhX5o37AOWpEI8A5YKs++6fxnAUmH2XfcvA1jqAvZd9yf7gCWpEM+A1dNap4jshuXmu7EmlWEAq6e1ThHZDcvNd2NNKsMAVs9rniKyW5ab78aaNPe87pGkQgxgSSrEAJakQgxgSSrEAJakQmoP4IhYEBE/iIivV/unR8T3I2IyIv4yIk6o2hdV+5PV56fVXZsklTQXZ8CfBp5o2v888MXMXAn8BLiiar8C+EnV/sXqOEnqWbUGcEQMA78OXFftB3A+cFt1yEbgkmr74mqf6vPV1fGS1JPqPgNeB/wu8Ga1fwqwKzP3VPvTwPJqeznwLED1+YvV8fuJiDURsTkiNu/cubPG0iWpXrUFcER8CNiRmQ/N5u9m5obMHMnMkaVLl87mT0vSnKrzUeT3AR+OiIuAAeBtwJeAwYhYWJ3lDgNbq+O3AiuA6YhYCLwdeKHG+iSpqNrOgDPzs5k5nJmnAZcB92bmbwH3AR+pDrscuLPa3lTtU31+b2ZmXfVJUmklxgH/HnB1REzS6OO9vmq/Hjilar8auKZAbZI0Z+ZkNrTMvB+4v9p+GjivzTG7gd+Yi3okqRv4JJwkFWIAS1IhBrAkFWIAS1IhBrAkFeKacJq3du/ezfj4+My+qwtrvjGANW+Nj4+zdv0dDA6vBFxdWPOPAax5bXB4pasLa97yek2SCjGAJakQA1iSCjGAJakQA1iSCjGAJakQA1iSCjGAJakQA1iSCjGAJakQH0WWutAbe15nYmJiv7bR0VEGBgYKVaQ6GMBSF3p5+xTrn3mVoakFQGOei3VXwtjYWOHKNJsMYKlLLR46Y2aiIfUmA1jzQuvcv+D8v5r/DGDNC61z/4Lz/2r+M4A1bzTP/QvO/6v5z+s3SSrEAJakQgxgSSrEAJakQgxgSSrEAJakQgxgSSrEAJakQgxgSSrEAJakQjoK4Ih4XydtkqTOdXoG/McdtkmSOnTIyXgi4j3Ae4GlEXF100dvAxbUWZgk9brDzYZ2AvDW6rjFTe0vAR+pqyj1N+f+Vb84ZABn5reBb0fEVzJzao5qUp9z7l/1i07nA14UERuA05q/k5nn11GU5Ny/6gedBvBfAX8KXAe8UV85ktQ/Og3gPZl5ba2VSFKf6fSuxl9HxJURMRQRJ+991VqZJPW4Ts+AL6/eP9PUlsAZs1uOJPWPjgI4M08/0h+OiAHgO8Ci6u/clpl/EBGnA7cApwAPAR/LzJ9HxCLgRuBfAC8AH83MHx3p35V60Rt7XmdiYuKA9tHRUQYGBgpUpNnQUQBHxMfbtWfmjYf42mvA+Zn5SkQcDzwQEf8LuBr4YmbeEhF/ClwBXFu9/yQzV0bEZcDngY8ewT+L1LNe3j7F+mdeZWhq3/NPu6YnWXcljI2NFaxMx6LTLojmAZgDwGpgC40z1rYyM4FXqt3jq1cC5wP/tmrfCPwXGgF8cbUNcBvwPyIiqt+R+t7ioTP2G5qn+a/TLohPNe9HxCCNboRDiogFNLoZVgJ/Avw9sCsz91SHTAPLq+3lwLPV39sTES/S6KZ4vuU31wBrAE499dROypekrnS0z3b+FDhsv3BmvpGZZwPDwHnALx3l32v+zQ2ZOZKZI0uXLj3Wn5OkYjrtA/5rGt0H0JiE558Ct3b6RzJzV0TcB7wHGIyIhdVZ8DCwtTpsK7ACmI6IhcDbadyMk6Se1Gkf8BeatvcAU5k5fagvRMRS4PUqfE8EPkjjxtp9NCbyuYXG8LY7q69sqva/V31+r/2/knpZp33A346IZey7GfdUB18bAjZW/cDHAbdm5tcj4nHgloj4b8APgOur468H/mdETAI/Bi47gn8Oqe84NG3+67QL4lLgvwP3AwH8cUR8JjNvO9h3MvMR4Jw27U/T6A9ubd8N/EZnZUtyaNr812kXxH8GRjNzB8x0L3yLxnAxSYU4NG1+63QUxHF7w7fywhF8V5LURqdnwN+IiL8Fbq72PwrcVU9JktQfDrcm3EpgWWZ+JiL+DfD+6qPvATfVXZwk9bLDnQGvAz4LkJm3A7cDRMQ7q8/+VY21SVJPO1w/7rLMfLS1sWo7rZaKJKlPHC6ABw/x2YmzWIck9Z3DBfDmiPj3rY0R8Ts0JtmRJB2lw/UBrwW+FhG/xb7AHQFOAP51jXVJUs87ZABn5nPAeyPiV4Ffrpr/JjPvrb0ySepxnc4FcR+NSXQkSbPEp9kkqRADWJIKMYAlqRADWJIKMYAlqRADWJIKMYAlqZBO5wOWarN7927Gx8dn9icmJnjzTc8N1PsMYBU3Pj7O2vV3MDi8EoDpLd9l8B2jh/mWNP8ZwOoKg8MrZ9Y22zU9WbYYaY54nSdJhRjAklSIASxJhdgHrDnVOuIBHPWg/mUAa061jngARz2ofxnAmnPNIx7AUQ/qX173SVIhngFLPeSNPa8zMTGxX9vo6CgDAwOFKtKhGMBSD3l5+xTrn3mVoakFQKN7Z92VMDY2VrgytWMASz1m8dAZ+/Wxq3vZByxJhRjAklSIASxJhRjAklSIN+FUKydblw7OAFatnGxdOjgDWLVzsnWpPa8FJakQA1iSCjGAJakQA1iSCjGAJakQA1iSCqktgCNiRUTcFxGPR8QPI+LTVfvJEXF3RDxVvS+p2iMivhwRkxHxSEScW1dtktQN6hwHvAf4j5m5JSIWAw9FxN3AbwP3ZObnIuIa4Brg94ALgVXV613AtdW7pKPUboJ2cJL2blFbAGfmNmBbtf1yRDwBLAcuBn6lOmwjcD+NAL4YuDEzE3gwIgYjYqj6Hc0DrnjcfVonaAcnae8mc/IkXEScBpwDfB9Y1hSq24Fl1fZy4Nmmr01XbfsFcESsAdYAnHrqqfUVrSPmisfdyQnau1ftARwRbwW+CqzNzJciYuazzMyIyCP5vczcAGwAGBkZOaLvqn6ueCx1rtZrw4g4nkb43pSZt1fNz0XEUPX5ELCjat8KrGj6+nDVJkk9qc5REAFcDzyRmX/U9NEm4PJq+3Lgzqb2j1ejId4NvGj/r6ReVmcXxPuAjwGPRsTDVdvvA58Dbo2IK4Ap4NLqs7uAi4BJ4GfAJ2qsTZKKq3MUxANAHOTj1W2OT+CquuqRpG7j+CBJKsQAlqRCDGBJKsQAlqRCXBNO6jPt5odwbogyDGCpz7TOD+HcEOUYwFIfcn6I7mAfsCQVYgBLUiEGsCQVYgBLUiEGsCQVYgBLUiEOQ9NRcf233uHCneUYwDoqrv/WO1y4sxwDWEfN9d96hw9mlOH1oiQVYgBLUiEGsCQVYgBLUiEGsCQVYgBLUiEGsCQVYgBLUiEGsCQVYgBLUiEGsCQVYgBLUiEGsCQVYgBLUiEGsCQVYgBLUiEGsCQV4ooY6kjrGnCu/yYdOwNYHWldA87136RjZwCrY81rwLn+m3TsDGAdwCXnpblhAOsALjkvzQ0DWG255Hx/e2PP60xMTBzQPjo6ysDAQIGKepMBLOkAL2+fYv0zrzI0tWCmbdf0JOuuhLGxsYKV9RYDWFJbi4fO2O8qSLPPuyqSVIgBLEmFGMCSVEhtARwRN0TEjoh4rKnt5Ii4OyKeqt6XVO0REV+OiMmIeCQizq2rLknqFnWeAX8FuKCl7RrgnsxcBdxT7QNcCKyqXmuAa2usS5K6Qm0BnJnfAX7c0nwxsLHa3ghc0tR+YzY8CAxGxFBdtUlSN5jrPuBlmbmt2t4OLKu2lwPPNh03XbUdICLWRMTmiNi8c+fO+iqVpJoVuwmXmQnkUXxvQ2aOZObI0qVLa6hMkubGXAfwc3u7Fqr3HVX7VmBF03HDVZsk9ay5fhJuE3A58Lnq/c6m9k9GxC3Au4AXm7oqJHWBdvNDODfEsaktgCPiZuBXgF+IiGngD2gE760RcQUwBVxaHX4XcBEwCfwM+ERddUk6Oq3zQzg3xLGrLYAz8zcP8tHqNscmcFVdtUiaHc4PMbt8Ek6SCjGAJakQA1iSCnE+YLnkvFSIASyXnJcKMYAFuOS8VILXmZJUiGfAko6KKycfOwNY0lFx5eRjZwBLOmo+GXds7AOWpEIMYEkqxACWpEIMYEkqxACWpEIMYEkqxGFokmaNyxYdGQNY0qxx2aIjYwBLmlU+nNE5A7jPtM79C87/K5ViAPewg4XtDQ88zZIVq2banP9XKsMA7mGtE63DvrBtvkR0/l+pDAO4xzVPtA6Grcprd2UG/TlawgCWNKfaXZn162gJA1hSbdqNC56YmOBtv/hPHCmBASypRu0mbfem7z4GsKRatY4L9j7EPgZwD2m9ueH4Xqm7GcA9pPXmhpd6UnczgOeJdkN32g3baR525qWe1N0M4Hmi9ey2X4ftSL3EAJ5HWh+qkDS/eYdGkgrxDHieOtgAd0c9SPOHATxPOcBdmv8M4HnMAe7qFf26lJEBLKm41iu6H089yRUfmOCss87a77heC2UDWFJXaL6i2zU9yfpvPb5fF1svDr00gCV1pX5YW84A7kKu2yb1BwO4Cx1qKSFJvcMAnmOtZ7evvfYaAIsWLZppazdhtSMcpN5jAM+xA2csu58Fi09haNU7Z47xbFc6UOtQtXYnLzC/RkoYwDU6WF9u89ntrulJFg4OebYrHUbrULV2Jy/tRkp0OpNgCV0VwBFxAfAlYAFwXWZ+rnBJR6TdhOg3PPA0S1asmmnz7FY6eq1D1VpPXtppverspjHGXRPAEbEA+BPgg8A0MB4RmzLz8dn8O530wba2dXIMHBi4e8PWs1tpbnSyCGgnY4zbnTXD7Id01wQwcB4wmZlPA0TELcDFwKwG8Pj4OL/9+1/gpFOGAHj+6cdYcOJilgz945ljWts6OWZv2+AZ/3y/v/fytqfZ+daTZvZf2THNgldfnWlr3e+0bbaO8bf97V767W2P/G8+/39eYsm3Hptp2/v/y+OOi33fW3wKrZqDe2Jigi/+5d0zOQHw0xe28ZU//E+z+iBIZOas/dixiIiPABdk5u9U+x8D3pWZn2w5bg2wpto9E/i/h/npXwCen+Vyj0W31QPW1Clr6ow1Hej5zLygtbGbzoA7kpkbgA2dHh8RmzNzpMaSjki31QPW1Clr6ow1da6bHq3aCqxo2h+u2iSpJ3VTAI8DqyLi9Ig4AbgM2FS4JkmqTdd0QWTmnoj4JPC3NIah3ZCZP5yFn+64u2KOdFs9YE2dsqbOWFOHuuYmnCT1m27qgpCkvmIAS1IhPRnAEXFDROyIiMcOf/TciIgVEXFfRDweET+MiE93QU0DEfF3ETFR1fRfS9e0V0QsiIgfRMTXS9cCEBE/iohHI+LhiNhcuh6AiBiMiNsi4smIeCIi3lO4njOr/3z2vl6KiLUla6rq+g/V/74fi4ibI6L8JBCVnuwDjogPAK8AN2bmL5euByAihoChzNwSEYuBh4BLZvtR6yOsKYCTMvOViDgeeAD4dGY+WKqmvSLiamAEeFtmfqgL6vkRMJKZXfOAQURsBL6bmddVI4fekpm7CpcFzEwtsJXGw1RTBetYTuN/1/8sM1+NiFuBuzLzK6VqataTZ8CZ+R3gx6XraJaZ2zJzS7X9MvAEsLxwTZmZr1S7x1ev4v9Gjohh4NeB60rX0q0i4u3AB4DrATLz590SvpXVwN+XDN8mC4ETI2Ih8Bbg/xWuZ0ZPBnC3i4jTgHOA7xcuZe+l/sPADuDuzCxeE7AO+F3gzcJ1NEvgmxHxUPU4fGmnAzuBP6u6aq6LiJMO96U5dBlwc+kiMnMr8AXgH4BtwIuZ+c2yVe1jAM+xiHgr8FVgbWa+VLqezHwjM8+m8eTheRFRtMsmIj4E7MjMh0rW0cb7M/Nc4ELgqqqbq6SFwLnAtZl5DvBT4JqyJTVU3SEfBv6qC2pZQmNSr9OBXwROioh/V7aqfQzgOVT1s34VuCkzby9dT7Pq8vU+4IAJQ+bY+4APV32utwDnR8Sfly1p5kyKzNwBfI3G7H0lTQPTTVcst9EI5G5wIbAlM58rXQjwL4FnMnNnZr4O3A68t3BNMwzgOVLd8LoeeCIz/6h0PQARsTQiBqvtE2nMxfxkyZoy87OZOZyZp9G4jL03M4uesUTESdWNU6rL/F8Dio6wycztwLMRcWbVtJpZnrr1GPwmXdD9UPkH4N0R8Zbq/4Oradx/6Qo9GcARcTPwPeDMiJiOiCtK10TjzO5jNM7o9g7TuahwTUPAfRHxCI25OO7OzK4Y9tVllgEPRMQE8HfA32TmNwrXBPAp4Kbqv7+zgT8sW87Mv6A+SONMs7jqCuE2YAvwKI3M65rHkntyGJokzQc9eQYsSfOBASxJhRjAklSIASxJhRjAklSIASxJhRjAklTI/wfCU6p7uQ4EvAAAAABJRU5ErkJggg==\n",
- "text/plain": [
- ""
- ]
- },
- "metadata": {
- "needs_background": "light"
- },
- "output_type": "display_data"
- }
- ],
- "source": [
- "s1 = np.random.normal(5, 1, 10000)\n",
- "sns.displot(s1)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 21,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- ""
- ]
- },
- "execution_count": 21,
- "metadata": {},
- "output_type": "execute_result"
- },
- {
- "data": {
- "image/png": "iVBORw0KGgoAAAANSUhEUgAAAWAAAAD4CAYAAADSIzzWAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAAANqElEQVR4nO3df2jc933H8dfbkubI8tzOihGZFe8qRNKYGdpGdD86irVEiyUXbxACGyQyaGCIh+xmf4wNBLmA2F9jLOiPQUi3WKxkbGn3zyabKl5gLbTrTlk6Z7YxV1V25LWOKmd1ZSmqZL/3h0433elOkrfv3ftOej7A4O/X37vPW7G/T33veydi7i4AQPXtih4AAHYqAgwAQQgwAAQhwAAQhAADQJDGBzn44Ycf9lQqVaFRAGB7mpiY+LG7Hyje/0ABTqVSymQyyU0FADuAmV0vtZ9bEAAQhAADQBACDABBCDAABCHAABCEAANAEAIMAEEIMAAEIcAAEIQAA0AQAgwAQQgwAAQhwAAQhAADQBACDABBCDAABCHAABCEAANAEAIMAEEe6P8JByRhZGRE2Wy2auvdvHlTknTw4MGqrNfZ2anBwcGqrIX6RoBRddlsVu+9f0X39uyvynoN8z+RJP1osfL/3Bvmb1d8DWwfBBgh7u3Zr4VP91VlrearY5JUlfVW1wK2gnvAABCEAANAEAIMAEEIMAAEIcAAEIQAA0AQAgwAQQgwAAQhwAAQhAADQBACDABBCDAABCHAABCEAANAEAIMAEEIMAAEIcAAEIQAA0AQAgwAQQgwAAQhwAAQhAADQBACDABBCDAABCHAABCEAANAEAIMAEEIMAAEIcAAEIQAA0AQAgwAQQgwAAQhwAAQhAADQBACDABBCDAABCHAABCEAANAEAIMAEEIMAAEIcAAEIQAb8HIyIhGRkaixwB2nO1+7jVGD1APstls9AjAjrTdzz2ugAEgCAEGgCAEGACCEGAACEKAASAIAQaAIAQYAIIQYAAIQoABIAgBBoAgBBgAghBgAAhCgAEgCAEGgCAEGACCEGAACEKAASAIAQaAIAQYAIIQYAAIQoABIAgBBoAgBBgAghBgAAhCgAEgCAEGgCAEGACCEGAACEKAASAIAQaAIAQYAIIQYAAIQoABIAgBBoAgBBgAghBgAAhCgAEgCAEGgCBVCfDs7KzOnDmj2dnZktvl9m302Gw2u27/iy++qNOnTxc8RyaTUXd3t1544QUNDAyot7dXmUxGp0+f1qlTp/T888+ru7tbPT096u7u1okTJ3T06FE999xz+TXm5+eVzWbXzQag8paWlnTmzBllMhn19vbqmWeeUX9/v/r6+pTNZiUVdmFgYEBHjx7VwMBA/hxe24tsNqvjx4/nH7uZcm1KQlUCfO7cOV26dEmjo6Mlt8vt2+ixw8PD6/ZfuXJFly9fLniOdDotd9cHH3ygyclJLSwsKJ1O6/Lly7p27Zqmp6fl7lpaWpK7686dO5KkmZmZ/Bo3btzQ3bt3180GoPJu3bqlS5cuKZ1Oa2FhQYuLi7px44bm5+c1PDwsqbALk5OTkqTJycn8Oby2F8PDw7p7927+sZsp16YkVDzAs7OzunDhgtxdFy5cUDabLdienZ1dd8zaq9pyj52amirYf/78+fya58+f1+zsrDKZjObm5tbNVGpfKatrLC4uSlLBbAAqb2lpSbdv35a7lzxvp6amNDExUdCF4j9f24uxsbH8MVNTU5teBZdrU1IaE322Es6dO6f79+9Lku7du6fh4eGC7dHRUbn7un0vvfTSho9dtbp/eXk5v29paUmjo6O6ePFiol/L4uKiTp06pfb29kSfd6fJZrPa9TOPHqMidn18R9nsT3X27NnoUbaFa9euyX3jfysvv/zyui6Us7S0VLA9PDysN954o+zxxQ1abVNSNr0CNrNTZpYxs8zMzMwDL/D222/n47i8vKypqamC7fHx8XXHjI+Pb/rYVav71/4lubvGx8e3fKX7ID766KPEnxNAacXneylzc3NbOq6U4ivmYuXalJRNr4Dd/TVJr0lSV1fXA1+2PP300xobG9Py8rIaGxvV3t6u6enp/HZPT0/+pcHafZs9Nv8F5PZfv349H2EzU09Pjy5evJhohBsbG3X8+PFEvwPuRGfPntXE5K3oMSri/kP71NnRpldffTV6lG3h2Wef3fRl/969e/Xxxx//nyKcSqU2/PPiBq22KSkVvwd88uRJ7dq1skxDQ4OGhoYKtvv7+9cd09/fv+ljV63ub2z83+8lTU1N6u/vVzqdTvRrWTsbgMpra2uTmW14zCuvvLKuC+U0NTUVbA8NDW14fLk2JaXiAW5tbdWxY8dkZjp27Jg6OzsLtltbW9cd09rauuljU6lUwf7e3t78mr29vWptbVVXV5f27t27bqZS+0pZXWP37t2SVDAbgMpramrS/v37ZWYlz9tUKqUnn3yyoAvFf762F319ffljUqmUOjs7N1y/XJuSUpWPoZ08eVJHjhwpuLJdu11u30aPHRoaWrf/iSee0OHDhwueI51Oy8z06KOPqqOjQ83NzUqn0zp8+LAee+wxtbe3y8zU1NQkM9O+ffskSQcOHMivcejQIbW0tHD1CwRoa2vTkSNHlE6n1dzcrN27d+vQoUPas2dP/gp2bRc6OjokSR0dHflzeG0vhoaG1NLSsunV76pybUqCbfYO41pdXV2eyWQSH6LWrb6jzX29ZKzeA174dF9V1mu+OiZJVVmv+eqYnuQecGK2y7lnZhPu3lW8nx9FBoAgBBgAghBgAAhCgAEgCAEGgCAEGACCEGAACEKAASAIAQaAIAQYAIIQYAAIQoABIAgBBoAgBBgAghBgAAhCgAEgCAEGgCAEGACCEGAACEKAASAIAQaAIAQYAIIQYAAIQoABIAgBBoAgBBgAghBgAAhCgAEgCAEGgCAEGACCEGAACEKAASAIAQaAIAQYAIIQYAAIQoABIAgBBoAgBBgAgjRGD1APOjs7o0cAdqTtfu4R4C0YHByMHgHYkbb7ucctCAAIQoABIAgBBoAgBBgAghBgAAhCgAEgCAEGgCAEGACCEGAACEKAASAIAQaAIAQYAIIQYAAIQoABIAgBBoAgBBgAghBgAAhCgAEgCAEGgCAEGACCEGAACEKAASAIAQaAIAQYAIIQYAAIQoABIAgBBoAgBBgAghBgAAhCgAEgCAEGgCAEGACCEGAACEKAASAIAQaAIAQYAIIQYAAIQoABIAgBBoAgBBgAgjRGD4CdqWH+tpqvjlVprVlJqsp6DfO3JbVVfB1sDwQYVdfZ2VnV9W7eXJYkHTxYjTC2Vf3rQ/0iwKi6wcHB6BGAmsA9YAAIQoABIAgBBoAgBBgAghBgAAhCgAEgCAEGgCAEGACCEGAACEKAASAIAQaAIAQYAIIQYAAIQoABIAgBBoAgBBgAghBgAAhCgAEgCAEGgCAEGACCmLtv/WCzGUnXNznsYUk//v8MVSX1MGc9zCjVx5z1MKNUH3My44P7JXc/ULzzgQK8FWaWcfeuRJ+0AuphznqYUaqPOethRqk+5mTG5HALAgCCEGAACFKJAL9WgeeshHqYsx5mlOpjznqYUaqPOZkxIYnfAwYAbA23IAAgCAEGgCCJBdjM/srMPjSz95N6zqSZ2aNm9o6ZXTaz/zSzs9EzlWJmD5nZd83se7k5X4meqRwzazCzfzezf4yepRwzmzKzS2b2nplloucpxcw+aWZvmdlVM7tiZr8WPVMxM3s8999w9dcdM/ty9FzFzOyl3Hnzvpm9aWYPRc9UTmL3gM3si5LmJI26+y8n8qQJM7NHJD3i7u+a2c9LmpD0O+5+OXi0AmZmklrcfc7MmiR9S9JZd/9O8GjrmNkfSuqStM/dvxQ9TylmNiWpy91r6YP5BczsnKRvuvvrZvZzkva4+38Hj1WWmTVIuinpV9x9sx/OqhozO6iV8+Wwuy+Y2d9JGnP3N2InKy2xK2B3/xdJt5N6vkpw9x+6+7u53/9U0hVJB2OnWs9XzOU2m3K/au7dUjNrl3Rc0uvRs9QzM/uEpC9K+ookufvPajm+OU9J+n4txXeNRknNZtYoaY+k/wqep6wdew/YzFKSPivpX4NHKSn30v49SR9KGnf3WpzzLyT9kaT7wXNsxiV9w8wmzOxU9DAlfErSjKS/zt3Oed3MWqKH2sTvSnozeohi7n5T0p9JuiHph5J+4u7fiJ2qvB0ZYDPbK+lrkr7s7nei5ynF3e+5+2cktUv6vJnV1G0dM/uSpA/dfSJ6li34DXf/nKReSX+Qu11WSxolfU7SX7r7ZyXdlfTHsSOVl7tFckLS30fPUszMfkHSb2vlm9ovSmoxs+djpypvxwU4d0/1a5K+6u5fj55nM7mXou9IOhY8SrEvSDqRu7/6t5J+08z+Jnak0nJXRXL3DyX9g6TPx060zrSk6TWvct7SSpBrVa+kd939VvQgJTwt6QfuPuPuS5K+LunXg2cqa0cFOPfm1lckXXH3P4+epxwzO2Bmn8z9vllSj6SroUMVcfc/cfd2d09p5eXoP7t7zV1pmFlL7g1X5V7W/5akmvqkjrv/SNIHZvZ4btdTkmrqjeEiv6cavP2Qc0PSr5rZntz5/pRW3uupSUl+DO1NSd+W9LiZTZvZ7yf13An6gqQXtHK1tvpRmr7ooUp4RNI7ZvYfkv5NK/eAa/ZjXjWuTdK3zOx7kr4r6Z/c/ULwTKUMSvpq7u/8M5L+NHac0nLfxHq0cmVZc3KvIt6S9K6kS1ppXM3+WDI/igwAQXbULQgAqCUEGACCEGAACEKAASAIAQaAIAQYAIIQYAAI8j84qcWbzlK/eAAAAABJRU5ErkJggg==\n",
- "text/plain": [
- ""
- ]
- },
- "metadata": {
- "needs_background": "light"
- },
- "output_type": "display_data"
- }
- ],
- "source": [
- "# for boxplots see https://en.wikipedia.org/wiki/Interquartile_range (or ask!)\n",
- "sns.boxplot(x=s1)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Heavy-tailed\n",
- "Distributions with a small but non-negligible amount of observations with high values. Several probability distributions follow this pattern: https://en.wikipedia.org/wiki/Heavy-tailed_distribution#Common_heavy-tailed_distributions.\n",
- "\n",
- "We pick the lognormal here: https://en.wikipedia.org/wiki/Log-normal_distribution"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 22,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- ""
- ]
- },
- "execution_count": 22,
- "metadata": {},
- "output_type": "execute_result"
- },
- {
- "data": {
- "image/png": "iVBORw0KGgoAAAANSUhEUgAAAWAAAAFgCAYAAACFYaNMAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAAAWCElEQVR4nO3df5Bd5X3f8fd3tVoJ4wQJqmGIxAzymHHDZNqakR1sUk+L0hiTTEQzBJh4LBkkkxLHtU0nCa7/SPtX644bbDwpRkZWUEr9i5BCHBdXBmzq2gjLP+ofYIcNro00YITLLhln791d3W//uM+ur+SVdrXas8/q3vdrZuee85znnP2ePauPzj73nHMjM5EkLb+h2gVI0qAygCWpEgNYkioxgCWpEgNYkioZrl3A6bjyyivzwQcfrF2GJM0n5mo8o8+AX3jhhdolSNKindEBLElnMgNYkioxgCWpEgNYkioxgCWpEgNYkioxgCWpEgNYkioxgCWpEgNYkioxgCWpEgNYkioxgCWpEgNYkioZyAButVq0Wq3aZUgacAMZwJK0EhjAklSJASxJlRjAklSJASxJlQxkAGcmrVaLzKxdiqQBNpAB3G632X7no7Tb7dqlSBpgAxnAAEOrR2qXIGnADWwAS1JtBrAkVWIAS1IlBrAkVWIAS1IlBrAkVWIAS1IlBrAkVWIAS1IlBrAkVWIAS1IlBrAkVWIAS1IlBrAkVdJoAEfEuyPiOxHx7Yj4WESsjYjNEXEgIkYj4hMRMVL6rinzo2X5RU3WJkm1NRbAEbER+NfAlsz8JWAVcD3wPuC2zHwl8CKws6yyE3ixtN9W+klS32p6CGIYOCsihoGXAc8CVwD3luV3A1eX6W1lnrJ8a0REw/VJUjWNBXBmHgbeD/yQbvCOA18FxjJzunQ7BGws0xuBZ8q606X/ecdvNyJuioiDEXHwyJEjTZUvSY1rcghiPd2z2s3ALwBnA1ee7nYzc3dmbsnMLRs2bDjdzUlSNU0OQfwq8P3MPJKZU8B9wOXAujIkAbAJOFymDwMXApTl5wA/bqo4PxlZUm1NBvAPgcsi4mVlLHcr8ATwCHBN6bMDuL9MP1DmKcsfzgbTsTM95ScjS6qqyTHgA3TfTPsa8K3yvXYDfwTcEhGjdMd495RV9gDnlfZbgFubqm2Gn4wsqabh+bssXmb+MfDHxzU/Dbx2jr4t4LebrEeSVhLvhJOkSgxgSarEAJakSgxgSapk4AJ45vpfSapt4AK43W5zw52fp9Pp1C5F0oAbuAAGWOX1v5JWgIEMYElaCQxgSarEAJakSgxgSarEAJakSgxgSarEAJakSgxgSarEAJakSgxgSarEAJakSgxgSarEAJakSgxgSarEAJakSgxgSarEAJakSgxgSarEAJakSgxgSarEAJakSgxgSarEAJakSgY6gI9OTdJqtWqXIWlADXQAS1JNBrAkVWIAS1IlBrAkVWIAS1IlBrAkVWIAS1IlBrAkVWIAS1IlBrAkVWIAS1IlBrAkVWIAS1IlBrAkVWIAS1IlBrAkVWIAS1IlBrAkVWIAS1IlBrAkVWIAS1IlBrAkVWIAS1IlBrAkVWIAS1IlBrAkVWIAS1IlBrAkVWIAS1IlBrAkVdJoAEfEuoi4NyK+GxFPRsTrIuLciNgfEU+V1/Wlb0TE7RExGhHfjIhLm6xNkmpr+gz4g8CDmfkPgX8MPAncCjyUmRcDD5V5gDcBF5evm4A7Gq5NkqpqLIAj4hzgDcAegMyczMwxYBtwd+l2N3B1md4G7Muux4B1EXFBU/VJUm1NngFvBo4AeyPi6xFxV0ScDZyfmc+WPs8B55fpjcAzPesfKm3HiIibIuJgRBw8cuRIg+VLUrOaDOBh4FLgjsx8NfATfjrcAEBmJpCnstHM3J2ZWzJzy4YNG5asWElabk0G8CHgUGYeKPP30g3kH80MLZTX58vyw8CFPetvKm2NyUxarRbd/wckaXk1FsCZ+RzwTES8qjRtBZ4AHgB2lLYdwP1l+gFge7ka4jJgvGeoohGd6Sl27X2Mdrvd5LeRpDkNN7z9dwD3RMQI8DRwA93Q/2RE7AR+AFxb+n4GuAoYBf6+9G3c0OqR5fg2kvQzGg3gzPwGsGWORVvn6JvA25usR5JWEu+Ek6RKDGBJqsQAlqRKDGBJqsQAlqRKDGBJqsQAlqRKDGBJqsQAlqRKDGBJqsQAlqRKDGBJqsQAlqRKDGBJqsQAlqRKDGBJqsQAlqRKDGBJqsQAlqRKDGBJqsQAlqRKBj6AM5NWq0X3Q5klafkMfAB3pqfYfuejtNvt2qVIGjADH8AAQ6tHapcgaQAZwJJUiQEsSZUYwJJUiQEsSZUYwJJUiQEsSZUYwJJUyYICOCIuX0ibJGnhFnoG/KEFtkmSFmj4ZAsj4nXA64ENEXFLz6KfB1Y1WZgk9buTBjAwAry89Pu5nvaXgGuaKkqSBsFJAzgzvwB8ISL+LDN/sEw1SdJAmO8MeMaaiNgNXNS7TmZe0URRkjQIFhrAnwI+DNwFHG2uHEkaHAsN4OnMvKPRSiRpwCz0MrS/iojfi4gLIuLcma9GK5OkPrfQM+Ad5fUPetoSeMXSliNJg2NBAZyZm5suRJIGzYICOCK2z9WemfuWthxJGhwLHYJ4Tc/0WmAr8DXAAJakRVroEMQ7eucjYh3w8SYKkqRBsdjHUf4EcFxYkk7DQseA/4ruVQ/QfQjPLwKfbKooSRoECx0Dfn/P9DTwg8w81EA9kjQwFjQEUR7K8126T0RbD0w2WZQkDYKFfiLGtcDjwG8D1wIHIsLHUUrSaVjoEMR7gddk5vMAEbEB+Bxwb1OFSVK/W+hVEEMz4Vv8+BTWlSTNYaFnwA9GxGeBj5X564DPNFOSJA2G+T4T7pXA+Zn5BxHxW8CvlEVfBu5pujhJ6mfzDSN8gO7nv5GZ92XmLZl5C/CXZVlfODo1SavVql2GpAEzXwCfn5nfOr6xtF3USEWSNCDmC+B1J1l21hLWIUkDZ74APhgRbzu+MSJ2AV9tpiRJGgzzXQXxLuAvI+LN/DRwtwAjwL9ssC5J6nsnDeDM/BHw+oj458Avlea/zsyHG69MkvrcQp8H/AjwSMO1SNJAafxutohYFRFfj4hPl/nNEXEgIkYj4hMRMVLa15T50bL8oqZrk6SaluN24ncCT/bMvw+4LTNfCbwI7CztO4EXS/ttpZ8k9a1GAzgiNgG/DtxV5gO4gp8+xOdu4Ooyva3MU5ZvLf0lqS81fQb8AeAPgU6ZPw8Yy8zpMn8I2FimNwLPAJTl46X/MSLipog4GBEHjxw50mDpktSsxgI4In4DeD4zl/R64czcnZlbMnPLhg0blnLTkrSsFvo0tMW4HPjNiLiK7kfZ/zzwQWBdRAyXs9xNwOHS/zBwIXAoIoaBc+g+9lKS+lJjZ8CZ+Z7M3JSZFwHXAw9n5pvpXs4282kaO4D7y/QDZZ6y/OHMTCSpT9V4qPofAbdExCjdMd49pX0PcF5pvwW4tUJtkrRsmhyCmJWZnwc+X6afBl47R58W3c+cW3aZSavVIjPxwgtJy8WPFQI601Ps2vsY7Xa7dimSBogBXAytHqldgqQBYwBLUiUGsCRVYgBLUiUGsCRVYgBLUiUGsCRVYgBLUiUGsCRVYgBLUiUDF8CtVotOpzN/R0lq2MAFsCStFAawJFViAEtSJQZwcXRqklarVbsMSQPEAJakSgxgSarEAJakSgxgSarEAJakSgxgSarEAJakSgxgSarEAJakSgxgSarEAJakSgzgIjNptVpkZu1SJA0IA7joTE+xa+9jtNvt2qVIGhAGcI+h1SO1S5A0QAxgSarEAO7hOLCk5WQA9+hMT7H9zkcdB5a0LAzg4zgOLGm5GMCSVIkBLEmVGMCSVIkBLEmVGMCSVIkBLEmVGMCSVIkBLEmVGMCSVIkBLEmVGMCSVIkBLEmVGMCSVIkBLEmVGMCSVIkBLEmVGMCSVIkBLEmVGMCSVIkBLEmVGMCSVIkBLEmVGMCSVIkBLEmVGMCSVIkBfJyjU5O0Wq3aZUgaAAbwHFqtliEsqXGNBXBEXBgRj0TEExHxnYh4Z2k/NyL2R8RT5XV9aY+IuD0iRiPimxFxaVO1SdJK0OQZ8DTwbzLzEuAy4O0RcQlwK/BQZl4MPFTmAd4EXFy+bgLuaLA2SaqusQDOzGcz82tl+u+AJ4GNwDbg7tLtbuDqMr0N2JddjwHrIuKCpuqTpNqWZQw4Ii4CXg0cAM7PzGfLoueA88v0RuCZntUOlbbjt3VTRByMiINHjhxprmhJaljjARwRLwf+AnhXZr7UuywzE8hT2V5m7s7MLZm5ZcOGDUtY6ez2abVadEuTpOY0GsARsZpu+N6TmfeV5h/NDC2U1+dL+2Hgwp7VN5W2ZdWZnuLmfY/TbreX+1tLGjBNXgURwB7gycz8k55FDwA7yvQO4P6e9u3laojLgPGeoYplNbR6pMa3lTRghhvc9uXAW4BvRcQ3Stu/Bf4j8MmI2An8ALi2LPsMcBUwCvw9cEODtUlSdY0FcGZ+EYgTLN46R/8E3t5UPZK00ngnnCRVYgBLUiUGsCRVYgBLUiUGsCRVYgDPwbvhJC0HA3gOnekpdu19zLvhJDXKAD4B74aT1DQD+AQchpDUNAP4BDrTU2y/81GHISQ1xgA+CYchJDXJAJakSgzgk/Aj6iU1yQCWpEoMYEmqxACWpEoMYEmqxACWpEoMYEmqxACeR6vV8lI0SY0wgCWpEgNYkioxgCWpEgNYkioxgE/CZwJLapIBfBKd6Slu3ve4zwSW1AgDeB4+E1hSUwxgSarEAJ5HZjIxMcHExIRjwZKWlAE8j870FDd+5H9x3e37HQuWtKQM4AUYWj3iWLCkJWcAS1IlBrAkVWIAS1IlBrAkVWIAS1IlBvACHZ2a9MHskpaUAbxAPphH0lIzgBeoMz3Frr2PeTOGpCVjAJ+CBIchJC0ZA1iSKjGAF8kxYUmnywBepHa7zXUf+pxjwpIWzQA+Dat8QI+k0zBcu4AzycyzgdesWVO7FEl9wAA+BTPPBo4I/stbXjM7DrxmzRoionZ5ks4wDkGcoqHVIxDBzfseZ3qyzfY7H3UcWNKiGMCLNPOAdh/ULmmxDGBJqsQAlqRKDODTND3ZZmxsjImJCW9TlnRKDODT1JmeYudHv8z4+Lh3xUk6JQbwIh2dmqTT6XRnIth51xdnr4bwNmVJC2EAL5EYXj07FOFtypIWwgBeIp3pKW7e9/hs6HqbsqT5GMBLyGuCJZ0KA3gJOfYr6VQYwEuo94qIXjMP8ZmYmKDT6RjSkgADeOmVKyImWxO8+OKLTExMMDY2xjX/+a+57vb9vPTSS75BJwnwaWiNGFo9wtGpydknp9127T9iaPUIMbyaVqt1zBt0mUm73faJatIA8gy4ATPXCM88Oe0d//UrdDodOtNTvPWOh5menqbVajE2Nsb4+PicZ8THjyc7viz1nxUVwBFxZUR8LyJGI+LW2vUsld6rI2ame8eFM5OxsbFjwnZ8fJxrb99/zG3O133oc7RarZ8J4plw7nQ6x2xT0sq2YoYgImIV8KfAvwAOAV+JiAcy84m6lS2dmTPjmYD9V3u/RKwaJoaG2PnRL3PXDZeRmUQEN+7+AhlD/M6H9jM8soaPvPWXGRpezfj4ODv3/G/uuvH1s5/MERHs+MgX2b39Nez66JeIoVXs+903zA5rrFmzhna7fcqhPLPu5OQkIyMjs9uICNauXUtEzIY/cExb77DKXH1O5mTDMr3LgAUN38y3zpk8DHQm134maernHCvlTCkiXgf8u8x8Y5l/D0Bm/ocTrbNly5Y8ePDgKX2fsbExrr/tM8SqYfLo9Oxrp9Nh1eqRRU0vZhvTkxNErDpmG9OT3askVg2PMBRxzHpTU21G1p49ux4RZGf62P6Zs+vNLI+hYXbfeDm/u+dRMlYxNDTE0elJYmj4pNPQvZlkz87LedveL/GRG17Pjj/9n0wfPcrIWS9n79v+6ezP9MbdnydWreaet28FoNVq8ba9X+LPb76CtWvX0mq1+J3bH5zts3bt2pMeo1arxVvueHh2/d723m0Dc/Y72fbmWudE328l633wU+3ae/9z7VczvyOfevdVi93POVN7JQXwNcCVmbmrzL8F+OXM/P3j+t0E3FRmXwV8bxHf7h8AL5xGuWcS97V/DdL+nun7+kJmXnl844oZgliozNwN7D6dbUTEwczcskQlrWjua/8apP3t131dSW/CHQYu7JnfVNokqS+tpAD+CnBxRGyOiBHgeuCByjVJUmNWzBBEZk5HxO8DnwVWAR/NzO809O1OawjjDOO+9q9B2t++3NcV8yacJA2alTQEIUkDxQCWpEoGKoD74VbniLgwIh6JiCci4jsR8c7Sfm5E7I+Ip8rr+tIeEXF72edvRsSlPdvaUfo/FRE7au3TfCJiVUR8PSI+XeY3R8SBsk+fKG/aEhFryvxoWX5RzzbeU9q/FxFvrLQr84qIdRFxb0R8NyKejIjX9euxjYh3l9/hb0fExyJibT8f2zll5kB80X1j72+BVwAjwP8BLqld1yL24wLg0jL9c8DfAJcA/wm4tbTfCryvTF8F/A+6d+JcBhwo7ecCT5fX9WV6fe39O8E+3wL8N+DTZf6TwPVl+sPAzWX694APl+nrgU+U6UvK8V4DbC6/B6tq79cJ9vVuYFeZHgHW9eOxBTYC3wfO6jmmb+3nYzvX1yCdAb8WGM3MpzNzEvg4sK1yTacsM5/NzK+V6b8DnqT7y7yN7j9eyuvVZXobsC+7HgPWRcQFwBuB/Zn5/zLzRWA/8DN36tQWEZuAXwfuKvMBXAHcW7ocv68zP4N7ga2l/zbg45nZzszvA6N0fx9WlIg4B3gDsAcgMyczc4w+PbZ0r8I6KyKGgZcBz9Knx/ZEBimANwLP9MwfKm1nrPJn2KuBA8D5mflsWfQccH6ZPtF+nyk/jw8Afwh0yvx5wFhmTpf53rpn96ksHy/9z5R93QwcAfaWIZe7IuJs+vDYZuZh4P3AD+kG7zjwVfr32M5pkAK4r0TEy4G/AN6VmS/1Lsvu32Zn/PWFEfEbwPOZ+dXatSyTYeBS4I7MfDXwE7pDDrP66Niup3v2uhn4BeBsVuZZeqMGKYD75lbniFhNN3zvycz7SvOPyp+flNfnS/uJ9vtM+HlcDvxmRPxfukNGVwAfpPun9sxNRL11z+5TWX4O8GPOjH2F7tnbocw8UObvpRvI/XhsfxX4fmYeycwp4D66x7tfj+2cBimA++JW5zLutQd4MjP/pGfRA8DMu907gPt72reXd8wvA8bLn7OfBX4tItaXs5FfK20rRma+JzM3ZeZFdI/Xw5n5ZuAR4JrS7fh9nfkZXFP6Z2m/vryTvhm4GHh8mXZjwTLzOeCZiHhVadoKPEEfHlu6Qw+XRcTLyu/0zL725bE9odrvAi7nF913jf+G7jul761dzyL34Vfo/gn6TeAb5esquuNhDwFPAZ8Dzi39g+6D7v8W+BawpWdbN9J902IUuKH2vs2z3/+Mn14F8Qq6/8hGgU8Ba0r72jI/Wpa/omf995afwfeAN9Xen5Ps5z8BDpbj+9/pXsXQl8cW+PfAd4FvA39O90qGvj22c315K7IkVTJIQxCStKIYwJJUiQEsSZUYwJJUiQEsSZUYwJJUiQEsSZX8f+QOJFZPad5aAAAAAElFTkSuQmCC\n",
- "text/plain": [
- ""
- ]
- },
- "metadata": {
- "needs_background": "light"
- },
- "output_type": "display_data"
- }
- ],
- "source": [
- "s2 = np.random.lognormal(5, 1, 10000)\n",
- "sns.displot(s2)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 23,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- ""
- ]
- },
- "execution_count": 23,
- "metadata": {},
- "output_type": "execute_result"
- },
- {
- "data": {
- "image/png": "iVBORw0KGgoAAAANSUhEUgAAAWAAAAD4CAYAAADSIzzWAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAAAN4UlEQVR4nO3df2xVZx3H8c+X3rLBphuUpZlloZAukv3lZmNmNIZsbKPViH/sjyUmVI0x0aQgW2K2jIQQ+o/GMIEYl4XFtEbddC7KTIsB3f4ULTrGpAyu7CLcbB3cuV+2dLQ8/nGf3t57uaWl5d7vvb3vV3LTc57znOc5z9PTD+eec5ZZCEEAgMpb5H0AAFCvCGAAcEIAA4ATAhgAnBDAAOAkcS2VV6xYEVpbW8t0KACwMB05cuRCCOG24vJrCuDW1lYNDg5ev6MCgDpgZmdKlXMLAgCcEMAA4IQABgAnBDAAOCGAAcAJAQwATghgAHBCAAOAEwIYAJwQwADghAAGACcEMAA4IYABwAkBDABOCGAAcEIAA4ATAhgAnBDAAOCEAAYAJ9f0/4Sbr7179yqZTEqS0um0JKmlpaWgTltbm7q7uyt5WADgoqIBnEwm9errQ5pYulwNI+9Lkt4emzqEhpF3K3k4AOCqogEsSRNLl2t0baeWnOiXJI2u7cxtmywDgHrAPWAAcEIAA4ATAhgAnBDAAOCEAAYAJwQwADghgAHACQEMAE4IYABwQgADgBMCGACcEMAA4IQABgAnBDAAOCGAAcAJAQwATghgAHBCAAOAEwIYAJwQwADghAAGACcEMAA4IYABwAkBDABOCGAAcEIAA4ATAhgAnBDAAOCEAAYAJwQwADghgAHACQEMAE4IYABwQgADgBMCGACcEMAA4IQABgAnBDAAOCGAAcAJAQwATghgAHCSqEQne/fuLVub3d3d171tAKiEigRwMpmsiTYBoJK4BQEATghgAHBCAAOAEwIYAJwQwADghAAGACcEMAA4IYABwAkBDABOCGAAcEIAA4ATAhgAnBDAAOCEAAYAJwQwADghgAHACQEMAE4IYABwQgADgBMCGACcEMAA4IQABgAnBDAAOCGAAcAJAQwATghgAHBCAAOAEwIYAJwQwADghAAGACcEMAA4IYABwAkBDABOCGAAcEIAA4ATAhgAnBDAAOCEAAYAJwnvA5iro0ePSpLWrVtX8b4bGho0MTExYz0zUwih5LaWlhZduHBBZqbm5mYNDw/n6ra0tGhsbEzpdFqPPvqo9u/fr3Q6rebmZi1evDh3DI899pj27NmjzZs3a9euXRoZGdHw8LBWrFihTCajnTt3at++fZqYmFBDQ4N6enokSTt27NDmzZu1Z88ebd++XU1NTcpkMiXLZzK5X3H9TCajbdu2ycy0c+fOgj5m03Zx3VL7Xkt71Sx/HJKqakwLZY7no5xzwBXwHMwmfCVNG76SlE6nNTY2posXL+rMmTO6ePGixsbGNDY2ptOnTyudTkuSnnrqKSWTSY2OjiqVSunkyZM6efKkhoaG1NPTo2PHjqmnp0fHjx9XKpXS6Oiozp49q5GREW3fvl1DQ0O5+n19fert7c3tc+zYMfX19UnStOUzmdyvuH5vb6+GhoZ0/PjxK/qYTdvFdUvtey3tVbP8cVTbmKrteDyUcw5qMoA9rnq9XC3EU6mUQghKpVIlt3/00UcF6/39/Tpw4EBunxCCDhw4oGQyWbI8k8lc9dgymUxuv/z6mUxGAwMDuXoDAwMFfczUdnG7pfadru9akz+OgYEBDQwMVM2YFsocz0e556AityDS6bRGR0clSYs+nj5QFl38QMnkh9qyZUslDqvuXLp0SWZWUDYxMaGenh5dvnz5ivK+vj5t3bp12vZ6e3tz++XX7+3t1fj4eEG/+X3M1HZxu6X2DSHMur1qlj/WS5cu5cqrYUzT/X7rSbnnYMYrYDP7jpkNmtng+fPnr1vH8FF8RT0+Pq5UKlUQmJPlBw8evGpbhw4dyu2XX//QoUMF/UxeWZeqO5t2S+07Xd+1Jn8cIYTcvFXDmBbKHM9HuedgxivgEMIzkp6RpPb29ukvX6+ipaUlt3zk9PC09S7f+Em1rWnW7t27r9pePd2CuN6KHwwmEgmtXLlS586dKwjhRCKhBx544KptrV+/Xv39/RofHy+ov379er300ku5fsxMq1atyvUxU9vF7eYf3+S+IYSSfdea/LFOfjsJIVTFmKb7/daTcs9BTd4Dxtw0NjaqsbGxoKyhoUHbtm3TokWLrijftGnTVdvr6urK7Zdfv6urS4nE1L/tjY2NBX3M1HZxu6X2na7vWpM/jsbGxty8VcOYFsocz0e556AmA/iVV17xPoSKKb5nm6+1tVVmptbW1pLbb7755oL1zs5ObdiwIbePmWnDhg1qa2srWT7TKzdNTU25/fLrNzU1qaOjI1evo6OjoI+Z2i5ut9S+0/Vda/LH0dHRoY6OjqoZ00KZ4/ko9xzU7HvAnir5HvDWrVvn/B7wjh07Ct4DnvzXO5VK5d73zb9qLVU+k8n9iut3dXXp1KlTMrMr+phN28V1S+17Le1Vs+JxVNOYFsocz0c558Cu9ppTsfb29jA4OHjNneS/1XDk9LBG13ZqyYl+SdLo2s7ctiUn+vXZWdwDzm9zNnUBwJOZHQkhtBeX1+QtCABYCAhgAHBCAAOAEwIYAJwQwADghAAGACcEMAA4IYABwAkBDABOCGAAcEIAA4ATAhgAnBDAAOCEAAYAJwQwADghgAHACQEMAE4IYABwQgADgBMCGACcEMAA4IQABgAnBDAAOCGAAcAJAQwATghgAHBCAAOAEwIYAJwQwADghAAGACcEMAA4IYABwAkBDABOCGAAcEIAA4ATAhgAnBDAAOCEAAYAJ4lKdNLW1iZJSiaT171NAKhVFQng7u5uSdKWLVuue5sAUKu4BQEATghgAHBCAAOAEwIYAJwQwADghAAGACcEMAA4IYABwAkBDABOCGAAcEIAA4ATAhgAnBDAAOCEAAYAJwQwADghgAHACQEMAE4IYABwQgADgBMCGACcEMAA4IQABgAnBDAAOCGAAcAJAQwATghgAHBCAAOAEwIYAJwQwADghAAGACcEMAA4IYABwAkBDABOCGAAcEIAA4ATAhgAnBDAAOCEAAYAJwQwADghgAHASaLSHTaMvKslJ/rVMJKRJC050V+wTWqu9CEBgIuKBnBbW1tuOZ0elyS1tOQHbnNBHQBYyCoawN3d3ZXsDgCqGveAAcAJAQwATghgAHBCAAOAEwIYAJwQwADghAAGACcEMAA4IYABwAkBDABOCGAAcEIAA4ATAhgAnBDAAOCEAAYAJwQwADghgAHACQEMAE4IYABwQgADgBMLIcy+stl5SWfm2NcKSRfmuO9CwjxMYS6mMBdZC3UeVoUQbisuvKYAng8zGwwhtFeksyrGPExhLqYwF1n1Ng/cggAAJwQwADipZAA/U8G+qhnzMIW5mMJcZNXVPFTsHjAAoBC3IADACQEMAE7KHsBmtsHM3jCzpJk9Xu7+PJjZHWb2spkdN7N/mdmWWL7czA6a2an4c1ksNzPbE+fkNTO7J6+trlj/lJl1eY1pPsyswcz+aWZ/jOurzexwHO/zZrY4lt8Q15Nxe2teG0/E8jfM7CGnocyLmd1qZi+Y2QkzGzKzz9fjOWFmW+Pfxetm9mszu7Fez4krhBDK9pHUIOnfktZIWizpqKS7ytmnx0fS7ZLuicufkHRS0l2SfiTp8Vj+uKQfxuVOSQOSTNK9kg7H8uWSTsefy+LyMu/xzWE+HpX0K0l/jOu/kfRIXH5a0nfj8vckPR2XH5H0fFy+K54rN0haHc+hBu9xzWEeeiV9Oy4vlnRrvZ0TklokvSlpSd658I16PSeKP+W+Av6cpGQI4XQI4WNJz0naWOY+Ky6E8FYI4R9x+UNJQ8qeeBuV/SNU/Pm1uLxRUl/I+qukW83sdkkPSToYQng3hPBfSQclbajcSObPzFZK+rKkfXHdJN0n6YVYpXgeJufnBUn3x/obJT0XQhgLIbwpKansuVQzzOwWSV+S9KwkhRA+DiG8pzo8JyQlJC0xs4SkpZLeUh2eE6WUO4BbJJ3NWz8Xyxas+JXpbkmHJTWHEN6Km96W1ByXp5uXhTBfP5H0A0mX43qTpPdCCONxPX9MufHG7e/H+gthHlZLOi/p5/F2zD4zu0l1dk6EENKSfizpP8oG7/uSjqg+z4kr8BDuOjKzmyX9TtL3Qwgf5G8L2e9RC/qdPzP7iqR3QghHvI+lCiQk3SPpZyGEuyX9T9lbDjl1ck4sU/bqdbWkT0m6SbV3BV825Q7gtKQ78tZXxrIFx8walQ3fX4YQXozFw/FrpOLPd2L5dPNS6/P1BUlfNbOUsreb7pO0W9mv04lYJ39MufHG7bdIyqj250HKXqGdCyEcjusvKBvI9XZOrJf0ZgjhfAjhkqQXlT1P6vGcuEK5A/jvku6MTzwXK3tTfX+Z+6y4eI/qWUlDIYRdeZv2S5p8at0l6Q955Zvik+97Jb0fv5b+SdKDZrYsXjk8GMtqQgjhiRDCyhBCq7K/67+EEL4u6WVJD8dqxfMwOT8Px/ohlj8Sn4ivlnSnpL9VaBjXRQjhbUlnzezTseh+ScdVZ+eEsrce7jWzpfHvZHIe6u6cKKncT/mUfbp7Utmnlk96P3Us0xi/qOxXydckvRo/ncreu/qzpFOSDklaHuubpJ/GOTkmqT2vrW8p+4AhKemb3mObx5ys09RbEGuU/WNJSvqtpBti+Y1xPRm3r8nb/8k4P29I6vAezxzn4DOSBuN58Xtl32Kou3NC0g5JJyS9LukXyr7JUJfnRPGH/xQZAJzwEA4AnBDAAOCEAAYAJwQwADghgAHACQEMAE4IYABw8n/Y4kuV5/nC1AAAAABJRU5ErkJggg==\n",
- "text/plain": [
- ""
- ]
- },
- "metadata": {
- "needs_background": "light"
- },
- "output_type": "display_data"
- }
- ],
- "source": [
- "sns.boxplot(x=s2)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 24,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- ""
- ]
- },
- "execution_count": 24,
- "metadata": {},
- "output_type": "execute_result"
- },
- {
- "data": {
- "image/png": "iVBORw0KGgoAAAANSUhEUgAAAWAAAAFgCAYAAACFYaNMAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAAAX1ElEQVR4nO3dfaxc9X3n8fc3xjc8xNjEcW3nmsh2YmVLXDUGF5OQjVLYVECzga3SbKJugiK63s06iDSrpKT9o6qUlZLdqrhBiK4F2ZjdBEoJyDSLICwPKZUKjWHcMITscuMF44uxzUMcCGUvxt/9Yw6X8XBtD/Y985uH90u6uuf85sw9X1m+n5n7m99DZCaSpN57S+kCJGlUGcCSVIgBLEmFGMCSVIgBLEmFHFe6gGNx3nnn5e233166DEk6kpipcaDfAT/zzDOlS5CkozbQASxJg8wAlqRCDGBJKsQAlqRCDGBJKsQAlqRCDGBJKsQAlqRCDGBJKsQAlqRCDGBJKsQAlqRCag3giHg8Ih6OiG0RsbVqe3tE3BkRj1XfT6naIyK+GRETEfHjiDi9ztokqbReLEf5m5nZvmzZ5cBdmfn1iLi8Ov9D4HxgVfW1Dri6+i7NqqmpKRqNxkFta9asYWxsrFBFGlUl1gO+EPhIdbwZuJdWAF8IXJetbZrvj4gFEbE0M3cVqFFDrNFocOlVW5g/vhKAfZPbuXIDrFvn6716q+4ATuAHEZHAf83MTcDitlB9GlhcHY8DT7Y9d2fVdlAAR8R6YD3Au971rhpL1zCbP76ShSveV7oMjbi6A/hDmTkZEb8C3BkRP21/MDOzCueuVSG+CWDt2rVv6rmS1E9q/RAuMyer73uAW4Azgd0RsRSg+r6nunwSOLXt6cuqNkkaSrUFcEScFBHzXjsGfgtoArcCF1eXXQxsqY5vBT5bjYY4C9hn/6+kYVZnF8Ri4JaIeO0+383M2yPiR8CNEXEJ8ATwyer624ALgAngJeBzNdamIeYoBw2K2gI4M7cDvz5D+7PAuTO0J7Chrno0OhzloEEx0NvSS4fiKAcNAqciS1IhvgPWyDvw6n6azeZBbfYZqxcMYI28F3bvYOPjL7NkojWs3D5j9YoBLAHzliy3z1g9Zx+wJBXiO2ANvc4+3mazSR5wFrvKM4A19Dr7eCe33ceCVWcUrkoygDUi2vt49z21vXA1UosBLHXB6c2qgwEsdcHpzaqDASx1yenNmm0OQ5OkQgxgSSrEAJakQgxgSSrEAJakQhwFoYHXOUbXqcYaFAawBl7nGF2nGmtQGMAaCu1jdJ1qrEFhAEsdZtohw24N1cEAljp0rp4GdmuoHgawNIPOHTLs1lAdHIYmSYUYwJJUiAEsSYUYwJJUiAEsSYUYwJJUiMPQpKMw02QN94jTm2UAS0ehc7LG8zse4/PnNFm9evX0NQayjsQAlo5S51b3G+94ZDqQ3bRT3TCApVnSOXtOOhI/hJOkQgxgSSrEAJakQgxgSSrEAJakQgxgSSrEAJakQgxgSSrEAJakQpwJp4EyNTVFo9E4qM0dizWoDGANlEajwaVXbWH++MrpNncs1qAygDVw5o+vdMdiDQX7gCWpEANYkgoxgCWpEANYkgoxgCWpEANYkgoxgCWpkNoDOCLmREQjIr5fna+IiAciYiIi/ioixqr2t1bnE9Xjy+uuTZJK6sU74MuAR9vOvwFckZnvAZ4HLqnaLwGer9qvqK6TpKFVawBHxDLgt4FrqvMAzgFuqi7ZDFxUHV9YnVM9fm51vSQNpbrfAW8EvgIcqM4XAj/PzP3V+U5gvDoeB54EqB7fV11/kIhYHxFbI2Lr3r17ayxdkupVWwBHxMeAPZn54Gz+3MzclJlrM3PtokWLZvNHS1JP1bkYz9nAxyPiAuB44GTgL4AFEXFc9S53GTBZXT8JnArsjIjjgPnAszXWJ0lF1fYOODO/mpnLMnM58Cng7sz8PeAe4BPVZRcDW6rjW6tzqsfvzkwXeZU0tEqMA/5D4EsRMUGrj/faqv1aYGHV/iXg8gK1SVLP9GQ94My8F7i3Ot4OnDnDNS8Dv9uLejQ4OnfAcPcLDRMXZFdf69wBw90vNEwMYPW99h0w3P1Cw8S1ICSpEANYkgoxgCWpEANYkgoxgCWpEANYkgoxgCWpEANYkgoxgCWpEGfCSTU48Op+ms3mQW1r1qxhbGysUEXqRwawVIMXdu9g4+Mvs2SitXDQ8zse4/PnNFm9evX0NQayDGCpJvOWLD9oDYuNdzwyHcj7Jrdz5QZYt25dyRJVmAEs9Uh7IEvgh3CSVIwBLEmFGMCSVIgBLEmFGMCSVIijINRX3IRTo8QAVl9xE06NEgNYfcdNODUq7AOWpEIMYEkqxACWpEIMYEkqxACWpEIMYEkqxACWpEIcBywVMNOWReAuGaPGAJYK6NyyCNwlYxQZwFIh7pAh+4AlqRADWJIKMYAlqRADWJIKMYAlqRADWJIKMYAlqRADWJIKMYAlqRADWJIKMYAlqRADWJIKMYAlqRADWJIKcTlKFTU1NUWj0Zg+bzab5IE8zDOk4WEAq6hGo8GlV21h/vhKACa33ceCVWcUrkrqDQNYxc0fXzm9MPm+p7YXrkbqHfuAJakQA1iSCqktgCPi+Ij4h4j4x4h4JCL+tGpfEREPRMRERPxVRIxV7W+tzieqx5fXVZsk9YM63wH/P+CczPx14P3AeRFxFvAN4IrMfA/wPHBJdf0lwPNV+xXVdZI0tGoL4Gx5sTqdW30lcA5wU9W+GbioOr6wOqd6/NyIiLrqk6TSau0Djog5EbEN2APcCfwM+Hlm7q8u2QmMV8fjwJMA1eP7gIUz/Mz1EbE1Irbu3bu3zvIlqVa1BnBmvpqZ7weWAWcC/2wWfuamzFybmWsXLVp0rD9OkorpySiIzPw5cA/wAWBBRLw2/ngZMFkdTwKnAlSPzwee7UV9klRCnaMgFkXEgur4BOCjwKO0gvgT1WUXA1uq41urc6rH785M56RKGlp1zoRbCmyOiDm0gv7GzPx+RPwEuCEivgY0gGur668F/ntETADPAZ+qsTZJKq62AM7MHwNrZmjfTqs/uLP9ZeB366pH6ncHXt1Ps9k8qG3NmjWMjY0Vqkh1cy0IqU+8sHsHGx9/mSUTrZ63fZPbuXIDrFu3rnBlqosBLPWReUuWTy9MpOHnWhCSVIgBLEmFdBXAEXF2N22SpO51+w74yi7bJEldOuyHcBHxAeCDwKKI+FLbQycDc+osTJKG3ZFGQYwBb6uum9fW/gten80mSToKhw3gzPwh8MOI+HZmPtGjmiRpJHQ7DvitEbEJWN7+nMw8p46iJGkUdBvAfw38JXAN8Gp95UjS6Og2gPdn5tW1ViJJI6bbAP6biPgPwC209noDIDOfq6UqDYWpqSkajcYb2l1gRmrpNoBfW6f3y21tCayc3XI0TBqNBpdetYX546//N3GBGel1XQVwZq6ouxANp/njK11cRjqErgI4Ij47U3tmXje75UjS6Oi2C+I32o6PB84FHgIMYEk6St12QVzafl7t9XZDHQVJ0qg42uUofwnYLyxJx6DbPuC/oTXqAVqL8PwqcGNdRUnSKOi2D/jP2o73A09k5s4a6pGkkdFVF0S1KM9Paa2IdgowVWdRkjQKuu2C+CTwX4B7gQCujIgvZ+ZNNdamIdS59Xqz2SQP5GGeIQ2vbrsg/hj4jczcAxARi4D/BRjAelM6t16f3HYfC1adUbiq/tT5YgVO4x423QbwW14L38qzuKGnjlL71uv7ntpeuJr+1fli5TTu4dNtAN8eEXcA11fn/xq4rZ6SJL2m/cVKw+dIe8K9B1icmV+OiN8BPlQ99PfAd+ouTpKG2ZHeAW8EvgqQmTcDNwNExK9Vj/3LGmuTpKF2pH7cxZn5cGdj1ba8lookaUQcKYAXHOaxE2axDkkaOUcK4K0R8W87GyPi94EH6ylJkkbDkfqAvwjcEhG/x+uBuxYYA/5VjXVJ0tA7bABn5m7ggxHxm8Dqqvl/ZubdtVcmSUOu2/WA7wHuqbkWSRopzmaTpEIMYEkqxACWpEIMYEkqxACWpEIMYEkqxACWpEIMYEkqpNsF2aUjmpqaotFoTJ+735t0eAawZk2j0eDSq7Ywf3wl4H5v0pEYwJpV88dXut+b1CX7gCWpEANYkgoxgCWpEANYkgoxgCWpEANYkgoxgCWpkNoCOCJOjYh7IuInEfFIRFxWtb89Iu6MiMeq76dU7RER34yIiYj4cUScXldtktQP6nwHvB/4j5l5GnAWsCEiTgMuB+7KzFXAXdU5wPnAquprPXB1jbVJUnG1BXBm7srMh6rjF4BHgXHgQmBzddlm4KLq+ELgumy5H1gQEUvrqk+SSutJH3BELAfWAA8AizNzV/XQ08Di6ngceLLtaTurts6ftT4itkbE1r1799ZXtCTVrPa1ICLibcD3gC9m5i8iYvqxzMyIeFPLZWXmJmATwNq1a11qSyPjwKv7aTabB7WtWbOGsbGxQhXpWNUawBExl1b4ficzb66ad0fE0szcVXUx7KnaJ4FT256+rGqTBLywewcbH3+ZJROt9x37Jrdz5QZYt25d4cp0tOocBRHAtcCjmfnnbQ/dClxcHV8MbGlr/2w1GuIsYF9bV4UkYN6S5Sxc8T4Wrnjf9LKfGlx1vgM+G/gM8HBEbKva/gj4OnBjRFwCPAF8snrsNuACYAJ4CfhcjbVJUnG1BXBm/h0Qh3j43BmuT2BDXfVIUr9xJpwkFWIAS1IhBrAkFWIAS1IhBrAkFeKuyNKAcmbc4DOApQHlzLjBZwBLA+y1mXEaTPYBS1IhBrAkFWIAS1Ih9gFLQ2KmURHgyIh+ZgBLQ6JzVAQ4MqLfGcDSEHFUxGCxD1iSCjGAJakQA1iSCjGAJakQA1iSCjGAJakQA1iSCjGAJakQA1iSCjGAJakQA1iSCjGAJakQA1iSCjGAJakQA1iSCnE9YB21qakpGo3G9Hmz2SQP5GGeIamdAayj1mg0uPSqLcwfXwnA5Lb7WLDqjMJVSYPDANYxmT++cnoHhn1PbS9cjTRY7AOWpEIMYEkqxACWpEIMYEkqxACWpEIcBaFD6hzn+8orrwAwd+5cwHG/0rEygHVIM43znTNvIUvefdr0ueN++9uBV/fTbDYPaluzZg1jY2OFKlI7A1iH1TnO97j5Sxz3O0Be2L2DjY+/zJKJ1l8q+ya3c+UGWLduXeHKBAawNPTmLVk+/aKp/uKHcJJUiAEsSYUYwJJUiAEsSYUYwJJUiAEsSYUYwJJUiAEsSYUYwJJUiAEsSYUYwJJUSG0BHBHfiog9EdFsa3t7RNwZEY9V30+p2iMivhkRExHx44g4va66JKlf1PkO+NvAeR1tlwN3ZeYq4K7qHOB8YFX1tR64usa6JKkv1BbAmfm3wHMdzRcCm6vjzcBFbe3XZcv9wIKIWFpXbZLUD3rdB7w4M3dVx08Di6vjceDJtut2Vm1vEBHrI2JrRGzdu3dvfZVKUs2KrQecmRkRb3o/m8zcBGwCWLt2rfvhzKLOLYjcckiqV68DeHdELM3MXVUXw56qfRI4te26ZVWbemimLYjcckiqT68D+FbgYuDr1fctbe1fiIgbgHXAvrauCvVQ5xZEGi7uEddfagvgiLge+AjwjojYCfwJreC9MSIuAZ4APlldfhtwATABvAR8rq66pFHmHnH9pbYAzsxPH+Khc2e4NoENddUi6XXuEdc/nAknSYUYwJJUiAEsSYUUGwcsqTxHRZRlAI+ozkkX4MSLUeSoiLIM4BHVOekCnHgxqhwVUY4BPMLaJ12AEy+kXvNDOEkqxACWpEIMYEkqxACWpEL8EG5EuNav1H8M4BHhWr9S/zGAR4hr/Ur9xT5gSSrEAJakQgxgSSrEAJakQvwQTtI0l6fsLQNY0jSXp+wtA1jSQVyesncM4CHlzDep/xnAQ8qZb1L/M4CHmDPfpP7mMDRJKsQAlqRCDGBJKsQAlqRC/BBO0iE5M65eBvCQcNyv6tA5M+75HY/x+XOarF69+qDrDOWjYwAPCcf9qi7tM+P2PbWdjXc8Mh3I4HTlY2EADxHH/aoXOqcq201x9AxgScfEBXyOngEs6Zi5gM/RcRiaJBViAEtSIQawJBViH7CkWeWoiO4ZwJJmlaMiumcADyhnvqmfOSqiOwbwgJgpcP/y3gkWLHs34Mw3aRAZwAPiUFONnfkmDS4DeIA41VgaLgZwH+jsXgA/NZZGgQHcBzq7F/zUWBoNBnCfaO9ekIaJ44IPzQDuQzP9h3WYmQZVN4u6j2ogG8B9qPM/LDjMTIPtcIu6j3KXmwHcpzoHsjvqQcOk/f/3KHdRGMCSihrlqcsGcA84zEw6vFGdumwA94DDzKTuzdQlAcP5pqWvAjgizgP+ApgDXJOZXy9cUlc63+G+8sorAMydOxdojWA4eemKkXyFl96smT6EHtaRE30TwBExB7gK+CiwE/hRRNyamT+Zzfsca3fATM+faWGcOfMWsuTdp02ft49g6HyFd4iZdLCZPoRuHznRGcidb3rg4N/rmX5vj3TNkX7mbOibAAbOBCYycztARNwAXAjMagA3Gg0++8dXcNI7lgLwy2d28ZVPf/SgV9bDaTab/Ofr75x+PsAzP2syf8WvHfZ5Lzz9OM+edCIAux7+e752/4uc8s7mQc+PtwQAL+6ZZM4/vTx9/Uxts30+qPcY1Lr9tzmKe8xbOH3+0nNP87XNEwf9Ds054WROeee7gDf+Xs/0e3uka2b6mdf9pz+Y1a7DyOyPd14R8QngvMz8/er8M8C6zPxCx3XrgfXV6XuB/30Ut3sH8MwxlDsb+qEG6I86+qEG6I86rOF1/VDHbNXwTGae19nYT++Au5KZm4BNx/IzImJrZq6dpZIGtoZ+qaMfauiXOqyhv+qou4Z+2pRzEji17XxZ1SZJQ6mfAvhHwKqIWBERY8CngFsL1yRJtembLojM3B8RXwDuoDUM7VuZ+UhNtzumLoxZ0g81QH/U0Q81QH/UYQ2v64c6aq2hbz6Ek6RR009dEJI0UgxgSSpkpAI4Ir4VEXsi4o0TzXtXw6kRcU9E/CQiHomIywrUcHxE/ENE/GNVw5/2uoa2WuZERCMivl+whscj4uGI2BYRWwvVsCAiboqIn0bEoxHxgQI1vLf6N3jt6xcR8cUCdfxB9f+yGRHXR8TxBWq4rLr/I3X+G4xUH3BEfBh4EbguM7ub+jb7NSwFlmbmQxExD3gQuGi2p1wfoYYATsrMFyNiLvB3wGWZeX+vamir5UvAWuDkzPxYr+9f1fA4sDYziw36j4jNwH2ZeU01CujEzPx5wXrm0BoGui4zn+jhfcdp/X88LTP/KSJuBG7LzG/3sIbVwA20ZudOAbcD/z4zJ2b7XiP1Djgz/xZ4rnANuzLzoer4BeBRYLzHNWRmvlidzq2+ev5KHBHLgN8Grun1vftJRMwHPgxcC5CZUyXDt3Iu8LNehm+b44ATIuI44ETgqR7f/1eBBzLzpczcD/wQ+J06bjRSAdxvImI5sAZ4oMC950TENmAPcGdm9rwGYCPwFeBAgXu3S+AHEfFgNdW911YAe4H/VnXHXBMRJxWoo92ngOt7fdPMnAT+DNgB7AL2ZeYPelxGE/jnEbEwIk4ELuDgSWKzxgAuJCLeBnwP+GJm/qLX98/MVzPz/bRmHJ5Z/dnVMxHxMWBPZj7Yy/sewocy83TgfGBD1VXVS8cBpwNXZ+Ya4JfA5T2uYVrVBfJx4K8L3PsUWotwrQDeCZwUEf+mlzVk5qPAN4Af0Op+2Aa8Wse9DOACqn7X7wHfycybS9ZS/al7D/CGhUJqdjbw8ar/9QbgnIj4Hz2uAZh+10Vm7gFuodX310s7gZ1tf4XcRCuQSzkfeCgzdxe4978A/m9m7s3MV4CbgQ/2uojMvDYzz8jMDwPPA/+njvsYwD1WfQB2LfBoZv55oRoWRcSC6vgEWmsw/7SXNWTmVzNzWWYup/Xn7t2Z2dN3OgARcVL1YSjVn/2/RetP0J7JzKeBJyPivVXTuczyMqxv0qcp0P1Q2QGcFREnVr8r59L6nKSnIuJXqu/votX/+9067tM3U5F7ISKuBz4CvCMidgJ/kpnX9riMs4HPAA9XfbAAf5SZt/WwhqXA5uqT7rcAN2ZmsWFghS0Gbmn9rnMc8N3MvL1AHZcC36n+/N8OfK5ADa+9CH0U+Hcl7p+ZD0TETcBDwH6gQZkpyd+LiIXAK8CGuj4UHalhaJLUT+yCkKRCDGBJKsQAlqRCDGBJKsQAlqRCDGBJKsQAlqRC/j+pJYsbFLnGdwAAAABJRU5ErkJggg==\n",
- "text/plain": [
- ""
- ]
- },
- "metadata": {
- "needs_background": "light"
- },
- "output_type": "display_data"
- }
- ],
- "source": [
- "# Why \"lognormal\"?\n",
- "\n",
- "sns.displot(np.log(s2))"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "#### Box plots\n",
- "\n",
- " "
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Outliers, missing values\n",
- "\n",
- "An *outlier* is an observation far from the center of mass of the distribution. It might be an error or a genuine observation: this distinction requires domain knowledge. Outliers infuence the outcomes of several statistics and machine learning methods: it is important to decide how to deal with them.\n",
- "\n",
- "A *missing value* is an observation without a value. There can be many reasons for a missing value: the value might not exist (hence its absence is informative and it should be left empty) or might not be known (hence the value is existing but missing in the dataset and it should be marked as NA).\n",
- "\n",
- "*One way to think about the difference is with this Zen-like koan: An explicit missing value is the presence of an absence; an implicit missing value is the absence of a presence.*"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Summary statistics\n",
- "A statistic is a function of a collection of observations, or otherwise stated a measure over a distribution. \n",
- "\n",
- "A statistic is said to be *robust* if not sensitive to outliers.\n",
- "\n",
- "* Not robust: min, max, mean, standard deviation.\n",
- "* Robust: mode, median, other quartiles.\n",
- "\n",
- "A closer look at the mean:\n",
- "\n",
- "$\\bar{x} = \\frac{1}{n} \\sum_{i}x_i$\n",
- "\n",
- "And variance (the standard deviation is the square root of the variance):\n",
- "\n",
- "$Var(x) = \\frac{1}{n} \\sum_{i}(x_i - \\bar{x})^2$\n",
- "\n",
- "The mean, the median, etc. are measures of location (e.g., the typical value); the variance is a measure of dispersion."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- " "
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 25,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "4.9780085768562925\n",
- "251.20393308182292\n"
- ]
- }
- ],
- "source": [
- "# Not robust: min, max, mean, mode, standard deviation\n",
- "\n",
- "print(np.mean(s1)) # should be 5\n",
- "print(np.mean(s2))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 26,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "4.990913153725976\n",
- "148.01429155699833\n"
- ]
- }
- ],
- "source": [
- "# Robust: median, other quartiles\n",
- "\n",
- "print(np.quantile(s1, 0.5)) # should coincide with mean and mode\n",
- "print(np.quantile(s2, 0.5))"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "#### Questions\n",
- "\n",
- "* Calculate the min, max, mode and sd. *hint: explore the numpy documentation!*\n",
- "* Calculate the 90% quantile values.\n",
- "* Consider our normally distributed data in s1. Add an outlier (e.g., value 100). What happens to the mean and mode? Write down your answer and then check."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 27,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " annual_salary \n",
- " a_age \n",
- " length \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " count \n",
- " 7870.000000 \n",
- " 9303.000000 \n",
- " 9645.000000 \n",
- " \n",
- " \n",
- " mean \n",
- " 5.916921 \n",
- " 14.266688 \n",
- " 5.005694 \n",
- " \n",
- " \n",
- " std \n",
- " 6.985214 \n",
- " 2.902770 \n",
- " 1.462343 \n",
- " \n",
- " \n",
- " min \n",
- " 0.166667 \n",
- " 1.000000 \n",
- " 0.083333 \n",
- " \n",
- " \n",
- " 25% \n",
- " 3.000000 \n",
- " 12.000000 \n",
- " 4.000000 \n",
- " \n",
- " \n",
- " 50% \n",
- " 4.000000 \n",
- " 14.000000 \n",
- " 5.000000 \n",
- " \n",
- " \n",
- " 75% \n",
- " 6.000000 \n",
- " 16.000000 \n",
- " 6.000000 \n",
- " \n",
- " \n",
- " max \n",
- " 180.000000 \n",
- " 50.000000 \n",
- " 15.000000 \n",
- " \n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " annual_salary a_age length\n",
- "count 7870.000000 9303.000000 9645.000000\n",
- "mean 5.916921 14.266688 5.005694\n",
- "std 6.985214 2.902770 1.462343\n",
- "min 0.166667 1.000000 0.083333\n",
- "25% 3.000000 12.000000 4.000000\n",
- "50% 4.000000 14.000000 5.000000\n",
- "75% 6.000000 16.000000 6.000000\n",
- "max 180.000000 50.000000 15.000000"
- ]
- },
- "execution_count": 27,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# Let's explore our dataset\n",
- "df_contracts[[\"annual_salary\",\"a_age\",\"length\"]].describe()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Relating two variables\n",
- "\n",
- "### Covariance\n",
- "\n",
- "Measure of association, specifically of the joint linear variability of two variables:\n",
- "\n",
- " \n",
- "\n",
- "Its normalized version is called the (Pearson's) correlation coefficient:\n",
- "\n",
- " \n",
- "\n",
- "Correlation is helpful to spot possible relations, but is of tricky interpretation and is not exhaustive:\n",
- "\n",
- " \n",
- "\n",
- "See: https://en.wikipedia.org/wiki/Covariance and https://en.wikipedia.org/wiki/Pearson_correlation_coefficient.\n",
- "\n",
- "*Note: correlation is not causation!*"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 28,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " annual_salary \n",
- " a_age \n",
- " length \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " annual_salary \n",
- " 1.000000 \n",
- " 0.205404 \n",
- " -0.361611 \n",
- " \n",
- " \n",
- " a_age \n",
- " 0.205404 \n",
- " 1.000000 \n",
- " -0.430062 \n",
- " \n",
- " \n",
- " length \n",
- " -0.361611 \n",
- " -0.430062 \n",
- " 1.000000 \n",
- " \n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " annual_salary a_age length\n",
- "annual_salary 1.000000 0.205404 -0.361611\n",
- "a_age 0.205404 1.000000 -0.430062\n",
- "length -0.361611 -0.430062 1.000000"
- ]
- },
- "execution_count": 28,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df_contracts[[\"annual_salary\",\"a_age\",\"length\"]].corr()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 29,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- ""
- ]
- },
- "execution_count": 29,
- "metadata": {},
- "output_type": "execute_result"
- },
- {
- "data": {
- "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYYAAAEGCAYAAABhMDI9AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAABAeUlEQVR4nO3deXxcdbn48c8zk0wme9M0TUJLm6ZNF9INCIWrbLaiBUvLJogKsl3kd0G44oKibEW9CghXhCsiOyiComwXuSJwAb0gpNCVAi3daJukadpmn5nMzPf3xyydkzlpM5OZzqR53q9XXklOZr55Ztqc55zv8nzFGINSSikV4ch0AEoppbKLJgallFIWmhiUUkpZaGJQSilloYlBKaWURU6mAxiqMWPGmJqamkyHoZRSw8qyZct2GmMq7H427BNDTU0NjY2NmQ5DKaWGFRHZPNDPtCtJKaWUhSYGpZRSFpoYlFJKWWhiUEopZaGJQSmllMWwn5U0FMGgYVNbNy0dHipL3NSUF+JwSKbDUkqpjBqxiSEYNLy4ppmrn1yOpy+IO9fB7WfPZWF9lSYHpdSINmK7kja1dUeTAoCnL8jVTy5nU1t3hiNTSqnMGrGJoaXDE00KEZ6+IDs6PRmKSCmlssOITQyVJW7cudaX7851MLbYnaGIlFIqO4zYxFBTXsjtZ8+NJofIGENNeWGGI1NKqcxK6+CziDwALAJ2GGNmho89AUwLP2QUsMcYM1dEaoC1wIfhn71ljLksXbE5HMLC+iqmX3kcOzo9jC3WWUlKKQXpn5X0EHAX8EjkgDHmnMjXIvJzoD3m8R8bY+amOaYoh0OorSiitqLoQP1KpZTKemlNDMaY18N3AnFERICzgfnpjEEppVRiMjnGcBzQYoxZF3Nskoi8JyKvichxAz1RRC4VkUYRaWxtbU1/pEopNYJkMjGcCzwe830TMMEYczhwNfA7ESmxe6Ix5l5jTIMxpqGiwnafCaWUUknKSGIQkRzgDOCJyDFjjNcY0xb+ehnwMTA1E/EppdRIlqk7hs8CHxhjtkYOiEiFiDjDX9cCdcCGDMWnlFIjVloTg4g8DrwJTBORrSJycfhHX8LajQRwPLBSRJYDfwQuM8bsSmd8Siml4qV7VtK5Axy/wObYU8BT6YxHKaXU/o3Ylc9KKaXsaWJQSilloYlBKaWUhSYGpZRSFpoYlFJKWWhiUEopZaGJQSmllIUmBqWUUhaaGJRSSlloYlBKKWWhiUEppZSFJgallFIWmhiUUkpZaGJQSilloYlBKaWUhSYGpZRSFpoYlFJKWWhiUEopZZHuPZ8fEJEdIrI65tiNIrJNRJaHP06J+dn3RWS9iHwoIp9PZ2xKKaXspfuO4SFgoc3xO4wxc8MfLwCIyGHAl4D68HP+S0ScaY5PKaVUP2lNDMaY14Fdg3z4EuD3xhivMWYjsB6Yl7bglFJK2crUGMMVIrIy3NVUFj42Dvgk5jFbw8fiiMilItIoIo2tra3pjlUppUaUTCSGXwGTgblAE/DzRBswxtxrjGkwxjRUVFSkODyllBrZDnhiMMa0GGMCxpgg8Bv2dhdtAw6Neej48DGllFIH0AFPDCJSHfPt6UBkxtKzwJdEJE9EJgF1wNsHOj6llBrpctLZuIg8DpwIjBGRrcANwIkiMhcwwCbg6wDGmDUi8iTwPuAHLjfGBNIZn1JKqXhijMl0DEPS0NBgGhsbMx2GUkoNKyKyzBjTYPczXfmslFLKQhODUkopC00MSimlLDQxKKWUstDEoJRSykITg1JKKQtNDEoppSw0MSillLLQxKCUUspCE4NSSikLTQxKKaUsNDEopZSy0MSglFLKQhODUkopC00MSimlLDQxKKWUstDEoJRSykITg1JKKQtNDEoppSzSmhhE5AER2SEiq2OO3SoiH4jIShH5s4iMCh+vEZFeEVke/rgnnbEppZSyl+47hoeAhf2OvQTMNMbMBj4Cvh/zs4+NMXPDH5elOTallFI20poYjDGvA7v6HfurMcYf/vYtYHw6Y1BKKZWYTI8xXAT8Jeb7SSLynoi8JiLHDfQkEblURBpFpLG1tTX9USql1AiSscQgIj8A/MBvw4eagAnGmMOBq4HfiUiJ3XONMfcaYxqMMQ0VFRUHJmCllBohMpIYROQCYBHwFWOMATDGeI0xbeGvlwEfA1MzEZ9SSo1kBzwxiMhC4LvAYmNMT8zxChFxhr+uBeqADQc6PqWUGuly0tm4iDwOnAiMEZGtwA2EZiHlAS+JCMBb4RlIxwNLRaQPCAKXGWN22TaslFIqbdKaGIwx59ocvn+Axz4FPJXOeJRSSu3foLuSRGRWOgNRSimVHRIZY/gvEXlbRP5NRErTFpFSSqmMGnRiMMYcB3wFOBRYJiK/E5GT0haZUkqpjEhoVpIxZh3wQ+Aa4ATgznDdozPSEZxSSqkDL5ExhtkicgewFpgPnGqMmRH++o40xaeUUuoAS2RW0i+B+4BrjTG9kYPGmO0i8sOUR6aUUiojBpUYwgvPthljHrX7+UDHlVJKDT+D6koyxgSAQ0XEleZ4lFJKZVgiXUkbgX+IyLNAd+SgMeb2lEellFIqYxJJDB+HPxxAcXrCUUoplWmDTgzGmJvSGYhSSqnsMOjEICIVhKqi1gPuyHFjzPw0xKWUUipDElng9lvgA2AScBOwCXgnDTEppZTKoEQSQ7kx5n6gzxjzmjHmIkKL25RSSh1EEhl87gt/bhKRLwDbgdGpD0kppVQmJZIYfhSuqvotQqugS4BvpiUqpZRSGZPIrKTnw1+2A59JTzhKKaUybb+JQUR+CZiBfm6MuTKlESmllMqowdwxNCbbuIg8ACwCdhhjZoaPjQaeAGoIzWw62xizW0IbQP8COAXoAS4wxryb7O9Ww0swaNjU1k1Lh4fKEjc15YU4HJLpsJQakfabGIwxDw+h/YeAu4BHYo59D3jZGPNTEfle+PtrgJOBuvDH0cCvwp/VQS4YNLy4ppmrn1yOpy+IO9fB7WfPZWF9lSYHpTIgkf0YKkTkNhF5QUReiXzs6znGmNeBXf0OLwEiyeZh4LSY44+YkLeAUSJSPdj41PC1qa07mhQAPH1Brn5yOZvauvfzTKVUOiS6wG0tQ1/gVmmMaQp/3QxUhr8eB3wS87it4WNxRORSEWkUkcbW1tYkQlDZpKXDE00KEZ6+IDs6PRmKSKmRLaML3Iwxhn0MbO/jefcaYxqMMQ0VFRVDCUFlgcoSN+5c639Fd66DscXuAZ6hlEqnRBKDZYGbiBxOcgvcWiJdROHPO8LHtwGHxjxufPiYOsjVlBdy+9lzo8khMsZQU16Y4ciUGpkyscDtWeBrwE/Dn5+JOX6FiPye0KBze0yXkzqIORzCwvoqpl95HDs6PYwt1llJSmVSWhe4icjjwInAGBHZCtxAKCE8KSIXA5uBs8MPf4HQVNX1hKarXjjY2NTw53AItRVF1FYUZToUpUa8RMpu3wL8COgFXgRmA980xjw20HOMMecO8KMFNo81wOWDjUcppVR6JDLG8DljTAehBWubgCnAd9IRlFJKqcxJJDFE7i6+APzBGNOehniUUkplWCKDz8+LyAeEupL+X3hHtxE/0TwdpRxS3aaWm1BKJSKRwefvhccZ2o0xARHpIbRaGQAROckY81I6gsxW6SjlkOo2tdyEUipRiXQlYYzZZYwJhL/uNsY0x/z4ZymNbBhIRymHVLep5SaUUolKKDHsx4i7/ExHKYdUt6nlJpRSiUplYki4tMVwl45SDqluU8tNKKUSlcrEMOKko5RDqtvUchNKqURJaF1ZChoS+ZMx5oyUNJaAhoYG09iY9F5CQxaZ8ZPKUg6pbjMdMSqlhjcRWWaMabD92f4Sg4js82RvjPnTEGIbskwnBqWUGo72lRgGM1311H38zAAZTQxKKaVSazBbe2oxO6WUGkESWfmMiHwBqAeiU1qMMUtTHZRSSqnMSaS66j1AAaGS2/cBZwFvpymuEU1LYiilMimRO4ZPGWNmi8hKY8xNIvJz4C/pCmyk0pIYSqlMS2QdQ2/4c4+IHEJoq8/q1Ic0smlJDKVUpiWSGJ4XkVHArcC7hPZkeDwNMY1oWhJDKZVpiVRXvTn85VMi8jzg1j0ZUi9SwiL2ZJ6Kkhipak8pdfAb9B2DiJwf+QDOAZaEv06YiEwTkeUxHx0i8u8icqOIbIs5fkoy7Q9nWhJDKZVpgy6JISK/jPnWTWjf5neNMWcNKQARJ7ANOBq4EOgyxtw22OcfjCuf/f4ga5raaWr3UF3qpr66lJyc5MtaaUkMpVR/Q135DIAx5hv9Gh0F/H5ooQGhBPOxMWaziJ6sgkHDX9e2pHQWkcMh1FYUUVtRlOJolVIHo6FUV+0GJqUghi9hHcS+QkRWisgDIlKWgvaHFZ1FpJTKtETGGJ4TkWfDH88DHwJ/HsovFxEXsBj4Q/jQr4DJwFygCfj5AM+7VEQaRaSxtbV1KCFkHZ1FlBrBoGFDaxdvfryTDa1dBIMjbrsQpZKWyAK32H5/P7DZGLN1iL//ZELjFC0Akc8AIvIb4Hm7Jxlj7gXuhdAYwxBjyCo6i2jodFGfUkOTyBjDa2n4/ecS040kItXGmKbwt6cDq9PwOzNisGUpasoLuevLh7NyaztBA06BWeNLs2oWUTpKbKSyzYG646ZfeZyOsyg1CInUSjoD+BkwltD+zgIYY0xJMr9YRAqBk4Cvxxy+RUTmEirnvanfz4atRK9gfX7Dva9vsDw2W6TjajzVbe6rO04Tg1L7l8jg8y3AYmNMqTGmxBhTnGxSADDGdBtjymMXyRljzjPGzDLGzDbGLI65exjWEhlQzvbB53TEl+o2dZ9rpYYmkcTQYoxZm7ZIhqnBDHImMqCc7YPP6Ygv1W3qoj6lhiaRwedGEXkCeBrwRg5memvPTBpsF0giA8rZPvicjvhS3abDISysr2L6lcfpoj6lkpDIyucHbQ4bY8xFqQ0pMZlc+byhtYtT7nwj7oT2Qr9BzkT60LN9Rk0waHjlw5a4wfH50yqHNMbwxvoddPYG6Pb6KXTnUOx2ctyUsVnxmpU6GKVq5bNu8dnPYAc5E7mCHQ5Xu6keHA8GDa2dPn749Opomz86bSbBoMmq163USJHIrKQK4F+BmtjnZfqOIZMS6QJJpCxFNpewSMdU0DVN7dGkEGnzh0+vpm5sEXMOHXGL35XKuEQGn58BSoG/Af8d8zFijcRBznQMPje127fZ3J4dA+5KjTSJDD4XGGOuSVskw9Bw6PZJtXQMPleX5tu2WVWaHQPuSo00ie7gNuL2R9ifSLfPMbVjqK0oOqiTAqTnLqm+uoQfnTbT0uaPTptJfXVpSmJWSiUmkVlJnUAhoamqfQxx5XOqHIz7Mfh8AVZub6e5w0N1iZtZh5TicjkzHVaUx+NnVVM7zR1eqkrymFVditudyM1nvN7ePlY1d9DS4aWyJI9ZVSXk5+cm3V46ynYodTBJ1aykYhEZDdQR2qhHpYHPF+Dpldu5/pm9M3SWLpnJabMPyYrk4PMFeHZ1U0rj8/kCPLemOWVtZvuUX6WyXSJlty8BXgNeBG4Mf74+PWGNXCu3t0dPkBAahL3+mdWs3J4d22unI75Ut5ntZUWUynaJjDFcBRxFqNz2Z4DDgew4Wx1EmgeY9dPSkR0zdNIRX6rbzPayIkplu0QSg8cY4wEQkTxjzAfAtPSENXJVD1AArrIkO3rv0hFfdal9m1VJtqlF9JQamkQSw9bwPs9PAy+JyDPA5nQENZLNOqSUpUusM3SWLpnJ7EOyY4ZOOuIrL3Rxw6n1ljZvOLWe8iJXUu2NxPUlSqXSoGclWZ4kcgKhxW4vGmN8KY8qAZmelZSO2S+RWUmRNmdn2aykVMf35sc7+Y8X1nLJ8ZPp9fnJd+Vw3+sfc+0XZnBM7Zik2oz8u4yU9SVKJSols5JipWk3t4xL9CSfrtkvLpeThprRST8/3XJyHIwudNEXCDK60EVOTiI3nvEqS9x8tKOLKx9/L3psqF0/2VxWRKlsN7S/6INI5CR/yp1vcO5v/skpd77Bi2ua97mJ/Mad9rNfNu48eGe/JPM+7Y92/SiVXYa2KukgkkxxuM27uikrcHHGEeOR8A3CU8u2smVXN5PHHpxXqukooudwCJ+bUckTlx5DU7uH6tJ86qtLtOtHqQzRxBCWzD7Bpe5czv+Xifzi5XXRrqSrFtRR4k5+xW62S8d+ysGg4a9rW3RBmlJZImNdSSKySURWichyEWkMHxstIi+JyLrw5wNWczmZKY65TokmBQidIH/x8jpynUM7mfn9QVZ8spsXVzex4pM9+P3B/T9pHzweP+9sbOO5Fdt5Z2MbHo8/6bbSMRU0HQvSBrPlqlLKXqbHGD5jjJkbMzL+PeBlY0wd8HL4+wMimX7uLm/A9uq52xdIOg6/P8jTK7Zxzr1vcdlj73LOvW/y9IptSScHj8fPs6uaOO+Bt/nG4+9x3gNv8+yqpqSTw4SyAtuCdxPKCpJqD6B5gLLbyS5wS8c4iFIjSbZ1JS0BTgx//TDwv8ABKfWdTAntwrwc23LRBUOYupnqTWtWNbVz/bP9yk08u5pJYwo4alJ5wu1t2d3DL19Zx8XH1iICxsAvX1nHERPKku5Kystx2L6Puc7krlvSMQ6i1EiSyTsGA/xVRJaJyKXhY5XGmKbw181Apd0TReRSEWkUkcbW1taUBZRoCW1fIMCV8+ssV89Xzq+jL5B810+qN61p7vAOcDXuTaq9lg4Pm9t6ufvV9dz1ynrufnU9m9t6h1Ruot3js30fOz3JLZHRkhhKDU0m7xiONcZsE5GxhFZSfxD7Q2OMERHbe39jzL3AvRBa4Jb+UO2VF+bxROMWy9XzE41bWDizKuk2U71pTVVJnm17lSV5SbWXjo16ygvdPNH4ftz7eOeXDs+aGJUaSTJ2x2CM2Rb+vAP4MzAPaBGRaoDw5x2Zim8wasoLuWbhDO7/+wbuemU99/99A9csnJFVm9bMqi5l6eJ+JSwWz2RWku2la6Oeb8yvs7yP35hfl/Rr1nURSg1NUiUxhvxLRQoBhzGmM/z1S8BSYAHQZoz5qYh8DxhtjPnuvtpKd0mMfa2GDgYNr3zYwsqt7QQNOARmjy9l/rTKIU2z9PuDrGlqp7ndQ1Wpm/rq0iGtLk7XJjipLDeR6s2JtCSGUvu2r5IYmUoMtYTuEiDUnfU7Y8yPRaQceBKYQKhA39nGmF37aiudiWF/JS82tHZxyp1vxHVZvJBFg5zDYdOa4RCjUgebfSWGjHQlGWM2GGPmhD/qjTE/Dh9vM8YsMMbUGWM+u7+kkG77m18/0CBntuydAMNj05rhEKNSI0mm1zFktf3Nbilw5dgu9hrKdNVUGw4zdIZDjEqNJJoY9mF/q3zTMV011YbDpjXDIUalRpJsW+CWNYJBg0PgJ6fP4to/r7L0fUdmt5QX5rH8kzZ+fd6R7O7uY3RhLo+9tdF2umpkQDm2SNxAA8qpHCyuKS/kl+cezqptoQFyp8DMcaVDmqHT3etlTXNXNL76qiIK85Ob/hqJ8ZGLGggEhdZOLxXFeTgdZkgxpmOfDKVGCk0MNmIHQ8sKXFx6fC1TK4uZUVXCpDF7TzDjS/P57GGH8PVHl0UTx9IlMxlfmm9pL1LmIrKiOTIF9bQ54+KSQ29vH8+tbo6uVo5MLz11ZlVSycHnC7Cnp497X98Qbe/mJTPx+QK43Yn/83f3evnv1Tvi4vvCzLFJJ4dur5dNOz1xbc6o8lKcn/hdgw5mKzU02pVkI3YwtKndw50vr+fbf1iBCJYTy9qWDu5+NVQe4or5U7jkuFrufnUda1s6LO0NVOZiTVN73O9e1dxhW8JiVXNH3GMHY1VTO9c9Y23vumdWs8rmdw/GmuYu2/jWNHcl1R7A2uZu2zbXNic3+KyD2UoNjd4x2Bhsaem2bi/nNEzgzlf2lt2+cn4du7qt5Sb2VeZizqH9f3dqS1ikviRGattLR5vpKA2u1Eiidww2BjsYWuJ2RZMChE4+d76yjmK3dRP7SJmL/u3ZlbmoDJew6P/YZEtYVKW4vVTHl442dTBbqaEZkYlhf7X6ByqpMKGswPI8X1+QsgIXl39mClfMD32UFbjiZiUlUuYiL8fBTYvrLY+N/T5RqS6JUV9VZNtefVXyV+JlBU6W9nvNSxfXM7owuWm/WhJDqaEZcV1JgxmYtCvBPaGsIG6XsYcuOMp2B7fqfncCOTkOTpszjrqxRfstc1HszmXZpp08cMFRtHV5KS/K4+l3t3BUzeikXq/L5aSmws2DFxxFa5eXiqI8nE6TdLmJwvw8Tp5ZQc2YedFZSTOqCoc0K8khOXT09PLIhfNo6fRQWexm5Sc7EZJ7zQCuHOHS42ujpUpcOTrorNRgjbjEMNha/ZES3JFjG1q74p733id7bHdwO2lGfLXwnBwHcw4tixtT6G98aT4Nkyq46KF39jnTabC27Opm2ab2uOQ1tiifmjGJX+X7/UH+5/3WQc2wGiyHQJ9xcv6Db1tiTHYC0aa2bq743XtZXapEqWw24rqSkl1la/e8bp/9Dm6f7O5JOr61LR1c328W0fXPrI6b6TRYzR1e2+SV7MBuIjOsBmtjWzePvLk5Orvr4mNreeTNzUnPIhoOpUqUymYjLjEkOzBp9zynMEBJjORvxFK9UU9Hb59te+29fVkRH0ChK8fS1SPhrp9k38fhUKpEqWw24hJDsgOTds+bc2gpN5xqHTS94dR6qkqT729PZAbTYBS5nbbtFeYld5JMdXwAVaV5XHbClOh+DPe9sYHLTpiS9Ps4HEqVKJXNRtwYQzJ7Ow/0vGDQ8Pu3N/Pr845kT3cfowpz+e1bG5mX5EAxhGYwPXzRkWCc0d+DBJLetKasIJc7zp5DIAjdXj+F7hycAqMLkyuxUV9dwm1fnMNHLZ3REht1lcVJxwfgD8CrHzTFlRZJ9n0sL8xjV1cPD104j9bwe/jSmm2MLkx+Zz1IrKyJUsPZiEsMED+wnOzz3tuyi89Mq7aUxLhhUT0dSe5VDODr87N5pzeuPMSsKj85Oa79N9CPO8dJe6+fG59bE23vxlPrceckd8cQDBp6fYG4Ehv9p/wmotPjZcEM6/t40+J6Oj1eIPHB4uqiPKZWlXFBzGD20sUzqS5K/k4ukbImSg13+j96CPwBw1PvbuGWs+bwszNmcetZc3jq3S34A8mfJFc3d9qWh1jd3JlUezs6fdGkEGnvxufWsKMzueS1crt9iY2V25MffPYF4IZnrTHe8OwafIHk2ls9QFmR1UmWFYH0DLorla1G5B1DqvT4/JzdMIHv/nFFzJVpPT0+f9Jtpro8RGuXfXs7u5ItsZH6GT8DveYdWVIGBBIra6LUcKd3DENQlJfL9f2udK9/dg1FefH9935/kBWf7ObF1U2s+GQPfr/9QGiqy0OMKXLZtldelHi3FED1ALO6KkuSH3we6DWPzZIyIJCeQXelslVGEoOIHCoir4rI+yKyRkSuCh+/UUS2icjy8McpmYhvsAa+Grd200T6p8+59y0ue+xdzrn3TZ5esc02OYwtdtmWxBhbnNyJvLIoz3bmVGWS/e2zDill6ZJ+JTGWzGT2IckPPjvEcMOifjEuqschyXXJpboMCCRW1kSp4S5TXUl+4FvGmHdFpBhYJiIvhX92hzHmtgzFlZBCV2gqaP8Vtvkua74dqH+6bmwRcw4tszy2qd3LB9v3WGbUvLhqGxPLC6gZU5xwjO0eP04x3HbWHLp9fgpdOfT4+mj3JNfd5XI5OaW+gpryvSUxDqsqTLrEBkBZgZsd7Tt4+MJ57OgMbazz5roW5k1KblaS251jW7Yjmf0nIhwOoaLYZXkfi/Odur+DOihlJDEYY5qApvDXnSKyFhiXiVjAutvX2GI3TkeoT3mgnb8ij/f6g9z95SNY+vwaNrf1Rq/GRxe62NDaFd09LJH+6ariPKYfMsoyo+amxclf4W/b08tjb23hkuMngwEDPPbWFr5+wmTmTijb7/P76+3t44XVrSnbSAggxxmguqyYr/WbRZTjTG70ube3j7+kOMZNbd18/dF3tcyGGhEyPvgsIjXA4cA/gU8DV4jI+UAjobuK3en8/XZF9a5aUMcjb25md48vrsCe3eOvW3QYnZ4+Oj0B7nltPbedNYez7nmTsgIXX2wYz2HVJVy1YApPNm6lKbxC2J3riCu2B9DW7eMPjaGZTr1ePwV5OTz8fxuYPKaQSUm8vupSNyfPqrYMkNsV+husgTYSqhlTwLxJ5Um12dLht23zkYvmMTGJJtMTo+7xoEaOjA4+i0gR8BTw78aYDuBXwGRgLqE7ip8P8LxLRaRRRBpbW1uHFINdUb1fvLyOM44Yb7vzV//HlxW4aOnwMG5UAdOriil159Lc4aGswMV5x0zk3tc3cNlj7/Lr1zdw/r9MpLrUHT05F7vjr167+/yceURoptM1f1rFd/64gjOPmEBPX3JdP15/wLZWkneAwe/9SXTW1P5KnAPsGGhWUmf2bCakezyokSRjiUFEcgklhd8aY/4EYIxpMcYEjDFB4DfAPLvnGmPuNcY0GGMaKioqhhTHQFeCInu/ji2wF/v46lJ39OT/jcff4zt/XMG5R09kfFk+ZxwxPm4Tn1+8vI5rT5kRLRLXYlO4z52Tw03PW2c63fT8GvJykru5a+/1p7RW0oCzporju7oid1en3PkG5/7mn5xy5xu8uKY5LjmUFw8wc6owyQH3BGIcLN3jQY0kGelKEhEB7gfWGmNujzleHR5/ADgdWJ3uWCJXgv37jo3Z+3XsVWHs4884YjxPNG7h4mNro4nkntfWc8uZsxHB9oT8YUsnd7+6Hneug4qi+KvNnV1eygpcnHHE+GibTy3bmvS6g4qiPNvXNybJMYtDyvL41VcPxykOdoXLVwRMkENGx7e3qa2bn7241vL+/OzFtUyvKrZ0v+TnCDecWs9NMauzbzi1nnybzYlix4MGGgNKJMbBcjiE+XUVPHrRPJo7vFSV5DGrulQHn9VBKVNjDJ8GzgNWicjy8LFrgXNFZC6hMdJNwNfTHUjkStBujMHuqnBCWQFLl8zk+mdWU+x22u753OHxRyuv2iWcyO9w2tyvjRvltt38Z9yo5LoscpyGpYvro+stIovwcp3JTQUtysuhtaMvbmD38AnW/0rBoGFnl5crPlPH1t09PNm4ld09vuie2LGJocidQ1lBjmXGT44zVACwf5v722QpkRgT4fMFeHZ1U7QkemSa7mmzDxnSjCylspEYk3z5hmzQ0NBgGhsbh9RG5Cp0R6eHiqLQrKTmDvsCextau7jwobdZNHscx04p58LwhjoR7lwHj1w4j2VbdlOSn8vNz79vqSnkcgoOh4M/LfuEy06cQtAYy5Vv46ZdfPX+f8a1+djFR9OQRFG5tze28Z0/rmDR7HGIgDHw/Mpt3HrWnKQGYt/e2Mb5D7wd/5ovmhdtz+4EfuX8Oh59KzSg/8Slx1im6b6zsY1v28R421lzOComxg2tXZxy5xv7nRk0mBgTlep/F6UyTUSWGWMa7H6W8VlJ2cCuqN5Au5u1dHjY3NbL3a+up/6QYtvuorZuLz978UOqS91cfGwtk8YUUOjK4acvrrVMa/3uUyui30eufHd0JreR0EBaOrzRePsf728w3TQtHV6mji3ikuMnR2dN/eb1jy3t2Q3o3/nKOi4+tpa7X11PT78iSDs67WPsP/g82JlB6Rh8TkcpEKWylSaGBMWOMYwZoP++vDDUl93U7uHuV9dzzcJp/PBl6/TJm55bEz1RRmY/Tb/yuAHbHFOYXP94ZUkeE8vzo1fjAM+t2BZXHmKw3TQTRudz7tETLdNfbzi1ngmj9249uq8BfbvyGRXF9q+5ot84yEDjQf1nBkUGn/s/bkglMQb43UMpBaJUttJaSQPwePy8s7GN51Zs552NbXg8fvz+IJ29fdx61hzu+vLhdHr7bDeE6fT1WY5NLC/Y58ynyPc7Oj3s6bVvs93TFzftczD1lwpcwuUn1lk2wbn8xDoKXNY7gYH2wu6/vWbQGO55bX10G85LjqvlntfWE4zpkoyd2lld6ubyz0zhygVTmF5VzF1fPjxuJk+HZ4D30WudOTXYmUFTqwq5uV/ZjpuXzGRqVfIziNJRCkSpbKV3DDY8Hj/PrmqyDF7evGQm+blOvh1zpXzPV4+0zEoyBp5o3MLtX5zLC1cex65uL7lOBz0+/z5nPkW+H1vsxiFe2zZvPXNOtH/dnevg51+cS44T1mzvIGhgbVMHW3Z1c3J9tWV/gF6fGXDxWKzBdtPs7vFxybG1tHZ5oxv1XHJsLXt69taHipzAf/bi2rjB+dvPnhv3fpcVuHjlg+bQoj6fnwJXaFHfvEmHWR432E2WNrf2ImK49PhaggYcAiKGza29jJqQ3BW+y+VkUX0lNeUF0TIbM6uKdeBZHZQ0MdhY1dQedzK97pnVXHp8reXYXa98xOWfqYubqVKS76CmvJANO7tYubWdCaMLuOOcufz0L9YxhnteC/Wpx175Oh1+rj15Bn0BE91xbfa4GZQVOi2/+1t/WM7dXz7cEvfubh8ftHQwc9yovcd6fLbTX2NP5DD4bprygjw+bO6ybNRz1YI6Rhfs7aaJnMDHjXJzzr1vxd2FTO83WOxyCF85pob1O/buCveVY2rIc8ZPBR3MJktdPj8/fHpN3Gt54Gu242yD4vMFeH5Ni85KUiOCJgYbA9XzHzcqnyvmT+GpZaHSFkfXVvC397fHbUlZO6aOLbu6WddiPYEuXVxPaUGojtLj/9zMj0+bRY5TLFe+e3oMu3r64ub0V/msJ+iyAhfNHd64E3SX17pCekxRnu301/7rGOym7dp103R6/bYrqWeNs3apOBxCjy8wqLuQLp+f5nZP3GupSnJMoNtrv6ivy5vkzj+ENii6vt8GRdc/s5raMYU6K0kddDQx9OP3BwccvNyyq5f7/74hOvWyusTFxNHWLSmXLq4nYALs6PTS2xfgkuNqeWrZVgC27unFEDoJu3KEorwcevusJ6seXyCaFGDvQPWDFxxledwXG8ZHp8JGHveLl9dx3/kN0QJ+Y4vddPsC/P4d6yK837+zhZnjSi2F/mrKCwfVTdM1wEm32xtfsqOyxE3DxFLO/1Stpe5T/7sQfxBeXtscVx+qf7IZrPzcnAGq3iZ/ZT/QrKTmIcxKGswssEy2p0YuTQwxIvsm/HXNdm5aXB/dbjJ2Hn5k6uWlx9cyraqE82Lmy3v6gtz9v+u5asFUrv3zquhzv79wOn1Bw21//TB67Menz+JH/72Gxs3tlhlAbd0+2xPQrm5f9GTnznUwrdJ+qqzXH+CUO9+IFvCbO6HUdhGePxC0jFn86LRZ1FUURsc1BjK60GV70i0riC9fMb40n7OPss5gWrpkJuNL8y2PEwlG60NF75IW1SOSXD0nV45w1YK6uLskV07yJ8nIhkfxK8iTK9sx2FlgmWpPjWyaGGLE7pvQ54dfn3ckHb19fNDcxaNvbY5WRvX0BZlWVWx7El80e1w0KUQe29bji3aTRI794M+r+MHJM6JX00Fj2LqnmzFFLtur7PIiFxcfW8uEsnx29fioKLafhlqcl8tVC+qii+sevWheXM2mO19Zx6MXzbMc++UrH3HZCVMsXVi3nz2Xz82oZMvunuhVaI8vNIOof6KxK/K3tqXDtvtlWqV1HwqXM4d7Xl9jLS3y+npuPWtOXJt+f5A1Te00tXuoLs2nvrrEMtgOkOt0xO2d4PH7yXUkPwmvwOXkxlPro/tnu3Md3HhqPQW5yd2FDDQLrP/4S6bai9C7kJFJE0OM2H0TXv1oJ69+tJMr5k/h/r9viLtS/LC5k4aJZXFXkU5HfI2koLGvm1RakNtvv+iZzDm0mPM/NckyEHv+pyZRXhj6p3I6hZryQnwBP5cdPyVacC9yld3b56e3b29F1T299ncgsUX0qkvdXPP56XzQ0hnt+mpq93D1k8v5zfkN/OsjjdHf8chF8+xnTYVP4rEnkt099r+7/z4UHZ4+27uaTo91umrkji6SvEN3OjM5bc64fskhiDFimUF285KZQHJ3IADd3j5K3NayHQ6Bbl9yxQgHmgXW0pFcGe90lAXXu5CRSxNDjMi+vrF/YM+t2MZ1iw6zlLZYurieO/62juPrxsR1WcyoLom2UV3q5owjxjOhLN92P4b1rV1x00gfv+Rotu3ujRuIrS51RxOUO9fBr756pG0V1kcunGdJRKML7MdLRoW7fiIVYq/+w96T6PcXTqfT68fjD9LnD1JW4IomzR5fH1+eN5E7/vZR9PHf/OxUenx+3t7YRo/PT16Okx2dXqoGmOlUFt7IKHL1WeLOjStGGJtsIt4fYCe8qWOLmB1zB+IPCk+8szluzOK7C2ck/X/D6XBy9R+Wxb2W/tN+B6vAZT8OUpDkOMhgZ5UlIl13ISr7aWKIEdnXN/aK9JyGCTzxtvUKeUJ5Abt7fHyyq5txo9yW+fIODD85fRa/ePmjuKvg2A2Ali6eyR1/+8jy+yMzZ+xm/dx73pGWY+9t2W17hbin18fRNaO5L3ySyAtfLV/3jHVNhjvXgTvXEVcevKzARU9fgLvCK7Jjx1ea2j3k5Tj3FrwLT6ft8faR73LS3tNHIGjo8gTY3NZD055ebjlzNt99aqUlqXZ6+vD5g3zc2kXtmCLae+3vGDrCi/qiO+ENcFXc1O5ldswdSG+fnzOPnBC3Ottj093V0+tjdXOnZW1CQX78uMGOztTuGeELBPjmZ6fGJdi+QHJ3NYOdVZaIVN/VqOFDE0MMh0MYVZAbPdHXV5dE6xut3NYBwMTyfKaMLeSqBXUU5rnY0+Nl6tjiaPdCe28f06pKuObz06NX4bD3BH/rWXP4oLmTPT1edvdbS+DOdexj1o919tLYAUpnjC7M41t/WB4dB+jtC9Dr81uSV68v1N101YI6KorzLG2cccT4uMR05yuhuH/ywlpK3Dm0dfdx/bPvWxLe1EoHP3rh/bgT/NUnTeXRi+exobWbUncu+S4H63d0c8iofIImtIlPaUGu7TjIIxfNswyQ//q8I23HVfpXYc3PzeGm5961tHfTc2viru67e7180NIBxkGooC+sbWlnRmVpXHIYaPDZbs+IwfTLVxTlkZ/rsPy75OcmXw59sIv/EpHquxo1fGhiiLGprZsrfvde9A/hmoXTuPT4yZZupGsWzuDqJ5dTVuDilrNm8+MXPo47Uf3otFl0euxP8JH9GKpL3XFdVDecWk/5ACegskLrbm+TxxZy9UlTuf2ljywnYQFOnTOOF1c3cfGxtfT6Atz/j43RGIMG7v/HRm5eMpPyglyK8nKiJ9u8HAczDynhT+Guo9i4e31+/t8Jtezq8dtOf501rpRFs8fFneB/+8/NzB0/ikJXDqWFubR3e/AHDd+MubK94+w5tu9VW7fX0lZrRy//duIUy2yxmxbXM7rA+t4M5uo+GDRs7+jl41ZPXHnukvxc6volhhJ3DrecNZv1O7qiYz+TxxZR4o4vN/7Khy2s3Noefdys8aXMn1ZpOUkHgvD39Tv4yjGTLGtgPj2lItpOooO+g1n8lwhfIGA70SDZuxo1fGhiiNH/1rnbF0CE6FWdMbB+x95xgb5AwLYLZHe3l0/29O6zDMbuHh/dnj7u+vIR7OnxUZyXQ8AYCnKdcVNlb1pcT36O0zJdtcPjJ89pveLMczrY2e3jvjf2rrU4qmYUF31qEm09vuiJ6qJPTcIYQ5+BP727hf93whTLbJurT5rKg//YZBkP2bqnl6K8HAJB+9fsDwbjNieqLnVzwacmcf6Db1sGi196v8lywh83ym17J3BIv32pi90ufvIX68Y///W/67nlzL1jEcGgoaLYPrnGFuXb1NbN7p7AoMqFAOTlgDvXGbo7DHehuXIc9N+ddXNb/MLGqxbUMXlMEZNiTtjdvj6+NG8isaf6L82bSLevL+lB38HM2EpEeWGe7USDhTOrkm5TDQ+aGGLYDeA9/vaW6ImwrMDFD06ZwbUnT6PQnYs/gO2g6XWnHMZTy7bGXW3FbgC0dMlMJo8p5Oo/LGdzWy9XLpjC/61v5bpF9eTnCveGV1OXFeayp8cLAhcfW4vTAUdNHE1eroOrfr887uT38IXzol0xFx9bS4k7l56+QNyJqtidy83Pv8sdZ8+NXr1D6OR4+0sfcdWCOn724oe4cx1ct+gwOj19/Pafm7ntrDkDTn+NxBD52b+dMJlnVmyNGwT+txPruOjhRkvcl59YF3fl3n8Ht6DBNslFCgxOKCvgr2tbOKw6n1vOnM361pir+4oiKkv2doG0dHjY2WU/a8quPHevL7SieuPO7mibNWMK6bX2BrK9vdd2jGj2+FJLYnCI0Nrpi7sAGFvsTmrQd/AztgavpryQ6xYdFr37yXHAdYsO0+1MRwBNDDH6D+A9t2Ibl3+mjrtfXRddG/DtP67g4mNruf1v67j1rNm2V89GQlNfH31rMxcfW8v0qmJaOno5dHQh//7ZOvJdOdz3+sd8/YTJfPtz0/mwpZPJYwpx5zhp6/biDxo6ev14+gJ0eAR/0NDW7Y3uV/CLc+ZS5HbantTaur1Ul7ppavfgdIRWKu9rMDsQNLbtjC8r4Ir5Uzisqpib/3ttdPe1tm6f7X4Mbd0+nluxzZIMayrybReu5eVar3o7eu2v3B+7aJ7lLqmqJC+UJGO4coSKQhfPLN/GlLHFNO3p4ZDSPPbErB1x5zq49uTp7OnJjw5mF7hyqCwR2zuLypI8gkFjuTrv9QdwiqFhYll0u9A9PV56/QHLAPlAXYhdHusYUbc3wMtrm+LKqdSUFw7Yxr6mnq7Zbj9jq66iiDkTymyfMxgFuQ7Lax7CDYgaRjQxxLAbwJtQVkDDxDJaO718LdwlEukyKcnP5c5XVsZdPd8fLtbW1O7h/r9v4MELjuI7f1wRdwIqcDnp8ga4740N3Bq+En/iX4+mpcPHtX+2zqgpD08vdec6GFWYS36u0/akFgyGBpDv//sGpo4txusP2p5kvP5Q26X5uXHTa50OKM3P4bkV25g17rBoAT6vP0BFkct2P4aKIheLZo/D4YBbzprDpp3d5DlzuOn5foPA4Sm1sSf8nQOs9m7r9nHLWXNYv6OTQDBU8rsvYF2W3RcwBI3h169vCG2XKuD1B7n/Hxstd3L3/2Mjt5w5OzqYPbE8n4cvOIJbzpzF+tbumDuLQiqKnLywqokZ1SVMGhPq289xgNcPl8aUP7l5yUxyHER39HM64LgpY2z/XapH9RtUlgALZljLqdy0uB4kQGVxQcJTT7e3e2yLJTZ1eIhfJjg4n+zu5pPdnri7mnG7u5lYrrOSMindCw81MfRjN4BXW1EUN/7gznXQOsAg556evuhjrpxfx8c7Om1LNKxr6eTB/9vMVQvq2NzWjacvSE94Bk3/GTUPXnBU9Ir7ly9/xNdPmGI7MLh1Tw9OByxdXI8rx3rij4291J3LlfProuUjfv/Olri7n/84fRZ7enyW9ROfnjLGNr5HLppn2YHNnetgcoX9oHJrl5f7v9ZAR28fTnFQXGA/+6UkP5d/fNzGXa+E2j2urpxuX3y3mD981xOZ9eXpsx8H8fQFLMliV3cwrhDh1SdNZfyoAq54/D1L374/QHTKb+R1XPfMah69aJ7ldz2zfFvcHtvf+fw0ilzWPzXBGT3hRtq74dnQ+5jM1NPqUvtiiVXFyW9O1NzutY3x4QvnJZ0YdCX10B2IhYdZmRhEZCHwC8AJ3GeM+WmGQ7KMP0TGD3Z2ege4ssvjivlTMAYefWszAD/8wgzLQHFdZRFTKoqYfegoqkrctPf6uevV9bQN0O/d1u3jsYuPJscBt5w1l51d9vs2LJk7juOmjEEEygry+Hhnl20Caff08UTjFk6YOobaMYV853PToyuFI79zY1t3XCmPgbbNbI15LyK/Y8Cd2Yrz2NHhZeueHnp8AY6YUGobo9cfwCl7xy36/Ma2W+w35zVEv+/x+qkszrOf/nrhPEuSO6qmLDqrK/K421/6iF+fd2T0+0jf/r7uamJ/l89v8PQFLP/WOSJsb++ldmxx9LmtXfbv484ub1JTT/sC9u9NQ5IL8IABL3xau5Jbu6ErqVPjQCw8zLrEICJO4G7gJGAr8I6IPGuMeT+TccVexTW1e3iicQs/Pn1W3Ayib39uGqu3tUevciF0Yqs/pIQZ1SVxf+iRvaWDQcPtZ88dcLrq2KI8S3nnrbu742YTXbWgjvFl+Rx+aFl0wHF3j30Cue2sOXxjfh3TKkuYUV3K/6xpjjsJ2JXyKM23v7ofXejiP8+eiy8QZH1rqLbUjEOKuGFRfVzZjm6fn7XNndF4cnOctjH+5PRZnDZ3HNOqSrj6yeX09tn3vUcq1LpzHbR2eSly5wx4Qov9Y/L02ZcF9/qClu93dHooyrN/3YV51t91xhHj+clfPoh73CMXWk/QFfvZwjXRqactKV6ABzB2gCrDY5O8C9GV1KmRjvIn/WVdYgDmAeuNMRsAROT3wBIgo4nB7irOIfCDP6+ynNAefWsTNy+ZZbl6vv3suUwYXRj9Y99X++83tcd1RSxdXE+ByzrqV5qfx50vr+OOs+fiDxoKXU627u5h0phCyyyUXCe2mwm5crDMWLFbwBV7tR6xpa2bG06tj9svotDlZHdPH6fMrGbL7h4+NbkchwhPvbs+ujNbviuHR8KlKWK7nT4341/4ytET49ZkFLqcTKooYmJ5IdOvPI6OXt8AdyCh2L/52an87u3QzKmB7lRiVQ50R1Pisnw/tthNa6fHvmKr0zqA3X/KLoQXKPqsq67be+2LEbZ7kqu9VD5A1dvRNgvwBqu3z2+b2HttVpAPxoE4oY0E6Sh/0l82JoZxwCcx328Fjo59gIhcClwKMGHChAMWWP+ruGDQRBe8xSaBT9WW80ISK1AjjylxO0PTVXv6KCvIpcfbh8H6/PrqEr40b6JlodiPTpvJ9MoSy+P6AtC0q5OHL5wXjeet9S3UjimwJJBZ1aUsXTzTMmV0ckURP//iHL4VU0epotgNgqWYXG6OUFogzBxXZXmP1rXs4eyGCf0KBdZTmm9dOVtZ4qCq1FpapKrUTVWJw/K+7+n18OPTZvGDp/eWNP/xabMYU+zgiUuPoS8Q5KTDKunxeeNey9LFM+lfGfx3/9zET06fZSmR/pPTZ/H4PzcB1p31ur19FLqclhgLXU7yXU5+duZsrgmX/bBLpu5cBxNGW8cHygpyB7yTS0ZRnnPAhJ2sIlcuT727Li6xf+/kw/b/ZBsH4oQ2EqSj/El/YvZVfD8DROQsYKEx5pLw9+cBRxtjrrB7fENDg2lsbLT70QERGUxLVRkCvz/IX9Y0sS68wtYhUDe2KG4v58hj1zS109zuoarUTX11adxjenp9tltSLqqvjCv74PH4WdXUHq0bNKu6FJfLaXl9uU4/7b1BeryGlk4PlcVuCvKEYreDieWjLO1tbO1i255unA4nO7u8jCnKIxAM4HQ4ufChd6LxPHv5v+B2QfOeQLTNqlFOPD6YWmVtc0+vh4+au6MxTq0qZFS+9cSyZtsu/vzuVj5bP47WTg8VxW7+tmYbC2eN46v3v235Yxo3yoWnj+jrc+dCUV4erV3Wf889vR7+/tEuPgpXvXUITB1bzLFTR1OSlxd9j6pK3KzZ3sm3/rDvfvSNrXt4Z1NnXPJqqCmmtsL6mgfD4/Hz2set9PmNJWGfMLkCtzu56z+fL8DTK7enbDtTHWNInVScd0RkmTHGdr/bbEwM/wLcaIz5fPj77wMYY/7D7vGZTgzpMJgTfiIGWyhusLF90LInLjFMrxwVF2P/8hAOgdnjSzmxbixbdvdE/1MfUuzmzU076fYGoye1wjwHn66tSOoE5PMF+Mv7zdGreHeug5+dOZvPT69ke6fH8scEDPoPbDBJKfK699emzxdgVdMuAgFH9H10OoPMqh6d9B7Sdok92aQQG+fK7e3RWUSzDykd0h7Xqb6QUskbbokhB/gIWABsA94BvmyMWWP3+IMxMWS7RBLXYE8EqT4Bpbq9dBgOMaqD17BKDAAicgrwn4Smqz5gjPnxQI/VxKCUUonbV2LIxsFnjDEvAC9kOg6llBqJtPKJUkopC00MSimlLDQxKKWUstDEoJRSyiIrZyUlQkRagc1JPHUMsDPF4aRatseY7fGBxpgqGuPQZVt8E40xFXY/GPaJIVki0jjQVK1ske0xZnt8oDGmisY4dNkeXyztSlJKKWWhiUEppZTFSE4M92Y6gEHI9hizPT7QGFNFYxy6bI8vasSOMSillLI3ku8YlFJK2dDEoJRSymLEJQYRWSgiH4rIehH5Xqbj6U9EDhWRV0XkfRFZIyJXZTqmgYiIU0TeE5HnMx2LHREZJSJ/FJEPRGRteK+PrCEi3wz/G68WkcdFJCu2MhORB0Rkh4isjjk2WkReEpF14c9lWRbfreF/55Ui8mcRGZWp+MLxxMUY87NviYgRkTGZiG0wRlRiEBEncDdwMnAYcK6IJLdPYfr4gW8ZYw4DjgEuz8IYI64C1mY6iH34BfCiMWY6MIcsilVExgFXAg3GmJmESsx/KbNRRT0ELOx37HvAy8aYOuDl8PeZ8hDx8b0EzDTGzCa0n8v3D3RQ/TxEfIyIyKHA54AtBzqgRIyoxADMA9YbYzYYY3zA74ElGY7JwhjTZIx5N/x1J6GT2bjMRhVPRMYDXwDuy3QsdkSkFDgeuB/AGOMzxuzJaFDxcoD88OZUBcD2DMcDgDHmdWBXv8NLgIfDXz8MnHYgY4plF58x5q/GGH/427eA8Qc8MGs8du8hwB3Ad4GsnvUz0hLDOOCTmO+3koUn3QgRqQEOB/6Z4VDs/Ceh/+DB/TwuUyYBrcCD4e6u+0QkdbulD5ExZhtwG6Erxyag3Rjz18xGtU+Vxpim8NfNQGUmg9mPi4C/ZDqI/kRkCbDNGLMi07Hsz0hLDMOGiBQBTwH/bozpyHQ8sURkEbDDGLMs07HsQw5wBPArY8zhQDeZ7f6wCPfRLyGUwA4BCkXkq5mNanBMaI57Vl7xisgPCHXH/jbTscQSkQLgWuD6TMcyGCMtMWwDDo35fnz4WFYRkVxCSeG3xpg/ZToeG58GFovIJkLdcfNF5LHMhhRnK7DVGBO52/ojoUSRLT4LbDTGtBpj+oA/AZ/KcEz70iIi1QDhzzsyHE8cEbkAWAR8xWTfAq3JhC4CVoT/bsYD74pIVUajGsBISwzvAHUiMklEXIQG+57NcEwWIiKE+sXXGmNuz3Q8dowx3zfGjDfG1BB6D18xxmTV1a4xphn4RESmhQ8tAN7PYEj9bQGOEZGC8L/5ArJocNzGs8DXwl9/DXgmg7HEEZGFhLo2FxtjejIdT3/GmFXGmLHGmJrw381W4Ijw/9OsM6ISQ3hw6grgfwj9ET5pjFmT2ajifBo4j9BV+PLwxymZDmqY+gbwWxFZCcwFfpLZcPYK38n8EXgXWEXobzErSiaIyOPAm8A0EdkqIhcDPwVOEpF1hO52fppl8d0FFAMvhf9m7slUfPuIcdjQkhhKKaUsRtQdg1JKqf3TxKCUUspCE4NSSikLTQxKKaUsNDEopZSy0MSg1H6ISFca2pwbOw1ZRG4UkW+n+vcolQxNDEplxlxA16eorKSJQakEiMh3ROSdcN3/m8LHasL7PfwmvL/CX0UkP/yzo8KPXR7eM2B1eNX9UuCc8PFzws0fJiL/KyIbROTKDL1EpTQxKDVYIvI5oI5Q+fa5wJEicnz4x3XA3caYemAPcGb4+IPA140xc4EAhEqAEyqm9oQxZq4x5onwY6cDnw+3f0O4ZpZSB5wmBqUG73Phj/cIlbKYTighQKgg3vLw18uAmvAuYsXGmDfDx3+3n/b/2xjjNcbsJFSkLptLW6uDWE6mA1BqGBHgP4wxv7YcDO2b4Y05FADyk2i/fxv696kyQu8YlBq8/wEuCu+VgYiME5GxAz04vGNcp4gcHT4Uu3VnJ6Gib0plHU0MSg1SeIe13wFvisgqQtVR93dyvxj4jYgsBwqB9vDxVwkNNscOPiuVFbS6qlJpJCJFxpiu8NffA6qNMVdlOCyl9kn7MJVKry+IyPcJ/a1tBi7IbDhK7Z/eMSillLLQMQallFIWmhiUUkpZaGJQSilloYlBKaWUhSYGpZRSFv8f6MpO5ujW5sgAAAAASUVORK5CYII=\n",
- "text/plain": [
- ""
- ]
- },
- "metadata": {
- "needs_background": "light"
- },
- "output_type": "display_data"
- }
- ],
- "source": [
- "sns.scatterplot(x=df_contracts.length,y=df_contracts.annual_salary)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Other ways to measure correlation exist. For example, if you are interested into how one variable will increase (or decrease) as another variable increases (or decreases), the *Spearmanās or Kendallās rank correlation coefficients* might work well."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "#### Questions\n",
- "\n",
- "* Try to explore the correlation of other variables in the dataset.\n",
- "* Can you think of a possible motivation for the trend we see: older apprentices with a shorter contract getting on average a higher annual salary?"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Sampling and uncertainty\n",
- "\n",
- "Often, we work with samples and we want the sample to be representative of the population it is taken from, in order to draw conclusions that generalise from the sample to the full population.\n",
- "\n",
- "Sampling is *tricky*. Samples have *variance* (variation between samples from the same population) and *bias* (systematic variation from the population)."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Further reading\n",
- "\n",
- "* For a complementary introduction to statistics and data analysis, see https://www.humanitiesdataanalysis.org/statistics-essentials/notebook.html.\n",
- "* Related to statistics and data analysis is the realm of probability theory, which allows us to formally model and calculate the likelihood of events. For an introduction, see https://www.humanitiesdataanalysis.org/intro-probability/notebook.html."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "---"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Part 2: working with texts\n",
- "\n",
- "Let's get some basics (or a refresher) of working with texts in Python. Texts are sequences of discrete symbols (words or, more generically, tokens).\n",
- "\n",
- "Key challenge: representing text for further processing. Two mainstream approaches:\n",
- "* *Bag of words*: a text is a collection of tokens occurring with a certain frequence and assumed independently from each other within the text. The mapping from texts to features is determinsitic and straighforward, each text is represented as a vector of the size of the vocabulary.\n",
- "* *Embeddings*: a method is used (typically, neural networks), to learn a mapping from each token to a (usually small) vector representing it. A text can be represented in turn as an aggregation of these embeddings."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Import the dataset\n",
- "Let us import the Elon Musk's tweets dataset in memory.\n",
- "\n",
- " "
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 30,
- "metadata": {},
- "outputs": [],
- "source": [
- "root_folder = \"../data/musk_tweets\"\n",
- "df_elon = pd.read_csv(codecs.open(os.path.join(root_folder,\"elonmusk_tweets.csv\"), encoding=\"utf8\"), sep=\",\")\n",
- "df_elon['text'] = df_elon['text'].str[1:]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 31,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " id \n",
- " created_at \n",
- " text \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " 0 \n",
- " 849636868052275200 \n",
- " 2017-04-05 14:56:29 \n",
- " 'And so the robots spared humanity ... https:/... \n",
- " \n",
- " \n",
- " 1 \n",
- " 848988730585096192 \n",
- " 2017-04-03 20:01:01 \n",
- " \"@ForIn2020 @waltmossberg @mims @defcon_5 Exac... \n",
- " \n",
- " \n",
- " 2 \n",
- " 848943072423497728 \n",
- " 2017-04-03 16:59:35 \n",
- " '@waltmossberg @mims @defcon_5 Et tu, Walt?' \n",
- " \n",
- " \n",
- " 3 \n",
- " 848935705057280001 \n",
- " 2017-04-03 16:30:19 \n",
- " 'Stormy weather in Shortville ...' \n",
- " \n",
- " \n",
- " 4 \n",
- " 848416049573658624 \n",
- " 2017-04-02 06:05:23 \n",
- " \"@DaveLeeBBC @verge Coal is dying due to nat g... \n",
- " \n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " id created_at \\\n",
- "0 849636868052275200 2017-04-05 14:56:29 \n",
- "1 848988730585096192 2017-04-03 20:01:01 \n",
- "2 848943072423497728 2017-04-03 16:59:35 \n",
- "3 848935705057280001 2017-04-03 16:30:19 \n",
- "4 848416049573658624 2017-04-02 06:05:23 \n",
- "\n",
- " text \n",
- "0 'And so the robots spared humanity ... https:/... \n",
- "1 \"@ForIn2020 @waltmossberg @mims @defcon_5 Exac... \n",
- "2 '@waltmossberg @mims @defcon_5 Et tu, Walt?' \n",
- "3 'Stormy weather in Shortville ...' \n",
- "4 \"@DaveLeeBBC @verge Coal is dying due to nat g... "
- ]
- },
- "execution_count": 31,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df_elon.head(5)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 32,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "(2819, 3)"
- ]
- },
- "execution_count": 32,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df_elon.shape"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Natural Language Processing in Python"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 33,
- "metadata": {},
- "outputs": [],
- "source": [
- "# import some of the most popular libraries for NLP in Python\n",
- "import spacy\n",
- "import nltk\n",
- "import string\n",
- "import sklearn"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 34,
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "[nltk_data] Downloading package punkt to /Users/matteo/nltk_data...\n",
- "[nltk_data] Unzipping tokenizers/punkt.zip.\n"
- ]
- },
- {
- "data": {
- "text/plain": [
- "True"
- ]
- },
- "execution_count": 34,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "nltk.download('punkt')"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "A typical NLP pipeline might look like the following:\n",
- " \n",
- " \n",
- "\n",
- "### Tokenization: splitting a text into constituent tokens"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 35,
- "metadata": {},
- "outputs": [],
- "source": [
- "from nltk.tokenize import TweetTokenizer, word_tokenize\n",
- "tknzr = TweetTokenizer(preserve_case=True, reduce_len=False, strip_handles=False)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 36,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\"@ForIn2020 @waltmossberg @mims @defcon_5 Exactly. Tesla is absurdly overvalued if based on the past, but that's irr\\xe2\\x80\\xa6 https://t.co/qQcTqkzgMl\"\n"
- ]
- }
- ],
- "source": [
- "example_tweet = df_elon.text[1]\n",
- "print(example_tweet)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 37,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "['\"', '@ForIn2020', '@waltmossberg', '@mims', '@defcon_5', 'Exactly', '.', 'Tesla', 'is', 'absurdly', 'overvalued', 'if', 'based', 'on', 'the', 'past', ',', 'but', \"that's\", 'irr', '\\\\', 'xe2', '\\\\', 'x80', '\\\\', 'xa6', 'https://t.co/qQcTqkzgMl', '\"']\n",
- "['``', '@', 'ForIn2020', '@', 'waltmossberg', '@', 'mims', '@', 'defcon_5', 'Exactly', '.', 'Tesla', 'is', 'absurdly', 'overvalued', 'if', 'based', 'on', 'the', 'past', ',', 'but', 'that', \"'s\", 'irr\\\\xe2\\\\x80\\\\xa6', 'https', ':', '//t.co/qQcTqkzgMl', \"''\"]\n"
- ]
- }
- ],
- "source": [
- "tkz1 = tknzr.tokenize(example_tweet)\n",
- "print(tkz1)\n",
- "tkz2 = word_tokenize(example_tweet)\n",
- "print(tkz2)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Question: can you spot what the Twitter tokenizer is doing instead of a standard one?"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 38,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "'!\"#$%&\\'()*+,-./:;<=>?@[\\\\]^_`{|}~'"
- ]
- },
- "execution_count": 38,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "string.punctuation"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 39,
- "metadata": {},
- "outputs": [],
- "source": [
- "# some more pre-processing\n",
- "\n",
- "def filter(tweet):\n",
- " \n",
- " # remove punctuation and short words and urls\n",
- " tweet = [t for t in tweet if t not in string.punctuation and len(t) > 3 and not t.startswith(\"http\")]\n",
- " return tweet\n",
- "\n",
- "def tokenize_and_string(tweet):\n",
- " \n",
- " tkz = tknzr.tokenize(tweet)\n",
- " \n",
- " tkz = filter(tkz)\n",
- " \n",
- " return \" \".join(tkz)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 40,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "['\"', '@ForIn2020', '@waltmossberg', '@mims', '@defcon_5', 'Exactly', '.', 'Tesla', 'is', 'absurdly', 'overvalued', 'if', 'based', 'on', 'the', 'past', ',', 'but', \"that's\", 'irr', '\\\\', 'xe2', '\\\\', 'x80', '\\\\', 'xa6', 'https://t.co/qQcTqkzgMl', '\"']\n",
- "['@ForIn2020', '@waltmossberg', '@mims', '@defcon_5', 'Exactly', 'Tesla', 'absurdly', 'overvalued', 'based', 'past', \"that's\"]\n"
- ]
- }
- ],
- "source": [
- "print(tkz1)\n",
- "print(filter(tkz1))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 41,
- "metadata": {},
- "outputs": [],
- "source": [
- "df_elon[\"clean_text\"] = df_elon[\"text\"].apply(tokenize_and_string)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 42,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " id \n",
- " created_at \n",
- " text \n",
- " clean_text \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " 0 \n",
- " 849636868052275200 \n",
- " 2017-04-05 14:56:29 \n",
- " 'And so the robots spared humanity ... https:/... \n",
- " robots spared humanity \n",
- " \n",
- " \n",
- " 1 \n",
- " 848988730585096192 \n",
- " 2017-04-03 20:01:01 \n",
- " \"@ForIn2020 @waltmossberg @mims @defcon_5 Exac... \n",
- " @ForIn2020 @waltmossberg @mims @defcon_5 Exact... \n",
- " \n",
- " \n",
- " 2 \n",
- " 848943072423497728 \n",
- " 2017-04-03 16:59:35 \n",
- " '@waltmossberg @mims @defcon_5 Et tu, Walt?' \n",
- " @waltmossberg @mims @defcon_5 Walt \n",
- " \n",
- " \n",
- " 3 \n",
- " 848935705057280001 \n",
- " 2017-04-03 16:30:19 \n",
- " 'Stormy weather in Shortville ...' \n",
- " Stormy weather Shortville \n",
- " \n",
- " \n",
- " 4 \n",
- " 848416049573658624 \n",
- " 2017-04-02 06:05:23 \n",
- " \"@DaveLeeBBC @verge Coal is dying due to nat g... \n",
- " @DaveLeeBBC @verge Coal dying fracking It's ba... \n",
- " \n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " id created_at \\\n",
- "0 849636868052275200 2017-04-05 14:56:29 \n",
- "1 848988730585096192 2017-04-03 20:01:01 \n",
- "2 848943072423497728 2017-04-03 16:59:35 \n",
- "3 848935705057280001 2017-04-03 16:30:19 \n",
- "4 848416049573658624 2017-04-02 06:05:23 \n",
- "\n",
- " text \\\n",
- "0 'And so the robots spared humanity ... https:/... \n",
- "1 \"@ForIn2020 @waltmossberg @mims @defcon_5 Exac... \n",
- "2 '@waltmossberg @mims @defcon_5 Et tu, Walt?' \n",
- "3 'Stormy weather in Shortville ...' \n",
- "4 \"@DaveLeeBBC @verge Coal is dying due to nat g... \n",
- "\n",
- " clean_text \n",
- "0 robots spared humanity \n",
- "1 @ForIn2020 @waltmossberg @mims @defcon_5 Exact... \n",
- "2 @waltmossberg @mims @defcon_5 Walt \n",
- "3 Stormy weather Shortville \n",
- "4 @DaveLeeBBC @verge Coal dying fracking It's ba... "
- ]
- },
- "execution_count": 42,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df_elon.head(5)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 43,
- "metadata": {},
- "outputs": [],
- "source": [
- "# save cleaned up version\n",
- "\n",
- "df_elon.to_csv(os.path.join(root_folder,\"df_elon.csv\"), index=False)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Building a dictionary"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 44,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "(2819, 7864)"
- ]
- },
- "execution_count": 44,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "from sklearn.feature_extraction.text import CountVectorizer\n",
- "count_vect = CountVectorizer(lowercase=False, tokenizer=tknzr.tokenize)\n",
- "X_count = count_vect.fit_transform(df_elon.clean_text)\n",
- "X_count.shape"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 45,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "6617"
- ]
- },
- "execution_count": 45,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "word_list = count_vect.get_feature_names_out() \n",
- "count_list = X_count.toarray().sum(axis=0)\n",
- "dictionary = dict(zip(word_list,count_list))\n",
- "count_vect.vocabulary_.get(\"robots\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 46,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "3"
- ]
- },
- "execution_count": 46,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "X_count[:,count_vect.vocabulary_.get(\"robots\")].toarray().sum()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 47,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "3"
- ]
- },
- "execution_count": 47,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "dictionary[\"robots\"]"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "#### Questions\n",
- "\n",
- "* Find the tokens most used by Elon.\n",
- "* Find the twitter users most referred to by Elon (hint: use the @ handler to spot them)."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 48,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "[('Tesla', 322),\n",
- " ('Model', 236),\n",
- " ('that', 223),\n",
- " ('will', 218),\n",
- " ('with', 177),\n",
- " ('@SpaceX', 169),\n",
- " ('from', 163),\n",
- " ('this', 159),\n",
- " ('@TeslaMotors', 149),\n",
- " ('launch', 124)]"
- ]
- },
- "execution_count": 48,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "dictionary_list = sorted(dictionary.items(), key=lambda x:x[1], reverse=True)\n",
- "[d for d in dictionary_list][:10]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 49,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "[('@SpaceX', 169),\n",
- " ('@TeslaMotors', 149),\n",
- " ('@elonmusk', 85),\n",
- " ('@NASA', 48),\n",
- " ('@Space_Station', 19),\n",
- " ('@FredericLambert', 17),\n",
- " ('@ID_AA_Carmack', 15),\n",
- " ('@WIRED', 14),\n",
- " ('@vicentes', 14),\n",
- " ('@BadAstronomer', 11)]"
- ]
- },
- "execution_count": 49,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "dictionary_list_users = sorted(dictionary.items(), key=lambda x:x[1], reverse=True)\n",
- "[d for d in dictionary_list if d[0].startswith('@')][:10]"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Representing tweets as vectors\n",
- "\n",
- "Texts are of variable length and need to be represented numerically in some way. Most typically, we represent them as *equally-sized vectors*.\n",
- "\n",
- "Actually, this is what we have already done! Let's take a closer look at `X_count` above.."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 50,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "id 849636868052275200\n",
- "created_at 2017-04-05 14:56:29\n",
- "text 'And so the robots spared humanity ... https:/...\n",
- "clean_text robots spared humanity\n",
- "Name: 0, dtype: object"
- ]
- },
- "execution_count": 50,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# This is the first Tweet of the data frame\n",
- "\n",
- "df_elon.loc[0]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 51,
- "metadata": {},
- "outputs": [],
- "source": [
- "# let's get the vector representation for this Tweet\n",
- "\n",
- "vector_representation = X_count[0,:]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 52,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "3"
- ]
- },
- "execution_count": 52,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# there are 3 positions not to zero, as we would expect: the vector contains 1 in the columns related to the 3 words that make up the Tweet. \n",
- "# It would contain a number higher than 1 if a given word were occurring multiple times.\n",
- "\n",
- "np.sum(vector_representation)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 53,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "1\n",
- "1\n",
- "1\n"
- ]
- }
- ],
- "source": [
- "# Let's check that indeed the vector contains 1s for the right words\n",
- "# Remember, the vector has shape (1 x size of the vocabulary)\n",
- "\n",
- "print(vector_representation[0,count_vect.vocabulary_.get(\"robots\")])\n",
- "print(vector_representation[0,count_vect.vocabulary_.get(\"spared\")])\n",
- "print(vector_representation[0,count_vect.vocabulary_.get(\"humanity\")])"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Term Frequency - Inverse Document Frequency\n",
- "We can use boolean counts (1/0) and raw counts (as we did before) to represent a Tweet over the space of the vocabulary, but there exist improvements on this basic idea. For example, the TF-IDF weighting scheme:\n",
- "\n",
- "$tfidf(t, d, D) = tf(t, d) \\cdot idf(t, D)$\n",
- "\n",
- "$tf(t, d) = f_{t,d}$\n",
- "\n",
- "$idf(t, D) = log \\Big( \\frac{|D|}{|{d \\in D: t \\in d}|} \\Big)$"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 54,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "(2819, 7864)"
- ]
- },
- "execution_count": 54,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "from sklearn.feature_extraction.text import TfidfVectorizer\n",
- "count_vect = TfidfVectorizer(lowercase=False, tokenizer=tknzr.tokenize)\n",
- "X_count_tfidf = count_vect.fit_transform(df_elon.clean_text)\n",
- "X_count_tfidf.shape"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 55,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "1.7226760995112569"
- ]
- },
- "execution_count": 55,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "X_count_tfidf[0,:].sum()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 56,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "3"
- ]
- },
- "execution_count": 56,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "X_count[0,:].sum()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "#### Sparse vectors (mention)\n",
- "How is Python representing these vectors in memory? Most of their cells are set to zero. \n",
- "\n",
- "We call any vector or matrix whose cells are mostly to zero *sparse*.\n",
- "There are efficient ways to store them in memory."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 57,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "<1x7864 sparse matrix of type ''\n",
- "\twith 3 stored elements in Compressed Sparse Row format>"
- ]
- },
- "execution_count": 57,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "X_count_tfidf[0,:]"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Spacy pipelines\n",
- "\n",
- "Useful to construct sequences of pre-processing steps: https://spacy.io/usage/processing-pipelines."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 58,
- "metadata": {},
- "outputs": [
- {
- "ename": "OSError",
- "evalue": "[E050] Can't find model 'en_core_web_sm'. It doesn't seem to be a Python package or a valid path to a data directory.",
- "output_type": "error",
- "traceback": [
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
- "\u001b[0;31mOSError\u001b[0m Traceback (most recent call last)",
- "Input \u001b[0;32mIn [58]\u001b[0m, in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# Load a pre-trained pipeline (Web Small): https://spacy.io/usage/models\u001b[39;00m\n\u001b[1;32m 2\u001b[0m \n\u001b[1;32m 3\u001b[0m \u001b[38;5;66;03m#!python -m spacy download en_core_web_sm\u001b[39;00m\n\u001b[0;32m----> 4\u001b[0m nlp \u001b[38;5;241m=\u001b[39m \u001b[43mspacy\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mload\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43men_core_web_sm\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\n",
- "File \u001b[0;32m~/.pyenv/versions/3.10.0/envs/ada-dhoxss-2022/lib/python3.10/site-packages/spacy/__init__.py:51\u001b[0m, in \u001b[0;36mload\u001b[0;34m(name, vocab, disable, exclude, config)\u001b[0m\n\u001b[1;32m 30\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mload\u001b[39m(\n\u001b[1;32m 31\u001b[0m name: Union[\u001b[38;5;28mstr\u001b[39m, Path],\n\u001b[1;32m 32\u001b[0m \u001b[38;5;241m*\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 36\u001b[0m config: Union[Dict[\u001b[38;5;28mstr\u001b[39m, Any], Config] \u001b[38;5;241m=\u001b[39m util\u001b[38;5;241m.\u001b[39mSimpleFrozenDict(),\n\u001b[1;32m 37\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Language:\n\u001b[1;32m 38\u001b[0m \u001b[38;5;124;03m\"\"\"Load a spaCy model from an installed package or a local path.\u001b[39;00m\n\u001b[1;32m 39\u001b[0m \n\u001b[1;32m 40\u001b[0m \u001b[38;5;124;03m name (str): Package name or model path.\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 49\u001b[0m \u001b[38;5;124;03m RETURNS (Language): The loaded nlp object.\u001b[39;00m\n\u001b[1;32m 50\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m---> 51\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mutil\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mload_model\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 52\u001b[0m \u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mvocab\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mvocab\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdisable\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdisable\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mexclude\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mexclude\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconfig\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconfig\u001b[49m\n\u001b[1;32m 53\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n",
- "File \u001b[0;32m~/.pyenv/versions/3.10.0/envs/ada-dhoxss-2022/lib/python3.10/site-packages/spacy/util.py:427\u001b[0m, in \u001b[0;36mload_model\u001b[0;34m(name, vocab, disable, exclude, config)\u001b[0m\n\u001b[1;32m 425\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m name \u001b[38;5;129;01min\u001b[39;00m OLD_MODEL_SHORTCUTS:\n\u001b[1;32m 426\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mIOError\u001b[39;00m(Errors\u001b[38;5;241m.\u001b[39mE941\u001b[38;5;241m.\u001b[39mformat(name\u001b[38;5;241m=\u001b[39mname, full\u001b[38;5;241m=\u001b[39mOLD_MODEL_SHORTCUTS[name])) \u001b[38;5;66;03m# type: ignore[index]\u001b[39;00m\n\u001b[0;32m--> 427\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mIOError\u001b[39;00m(Errors\u001b[38;5;241m.\u001b[39mE050\u001b[38;5;241m.\u001b[39mformat(name\u001b[38;5;241m=\u001b[39mname))\n",
- "\u001b[0;31mOSError\u001b[0m: [E050] Can't find model 'en_core_web_sm'. It doesn't seem to be a Python package or a valid path to a data directory."
- ]
- }
- ],
- "source": [
- "# Load a pre-trained pipeline (Web Small): https://spacy.io/usage/models\n",
- "\n",
- "#!python -m spacy download en_core_web_sm\n",
- "nlp = spacy.load('en_core_web_sm')"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "*.. the modelās meta.json tells spaCy to use the language \"en\" and the pipeline [\"tagger\", \"parser\", \"ner\"]. spaCy will then initialize spacy.lang.en.English, and create each pipeline component and add it to the processing pipeline. Itāll then load in the modelās data from its data directory and return the modified Language class for you to use as the nlp object.*\n",
- "\n",
- "Let's create a simple pipeline that does **lemmatization**, **part of speech tagging** and **named entity recognition** using spaCy models.\n",
- "\n",
- "*If you don't know what these NLP tasks are, please ask!*"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "tweet_pos = list()\n",
- "tweet_ner = list()\n",
- "tweet_lemmas = list()\n",
- "\n",
- "for tweet in df_elon.text.values:\n",
- " spacy_tweet = nlp(tweet)\n",
- " \n",
- " local_tweet_pos = list()\n",
- " local_tweet_ner = list()\n",
- " local_tweet_lemmas = list()\n",
- " \n",
- " for sentence in list(spacy_tweet.sents):\n",
- " # --- lemmatization, remove punctuation and stop wors\n",
- " local_tweet_lemmas.extend([token.lemma_ for token in sentence if not token.is_punct | token.is_stop])\n",
- " local_tweet_pos.extend([token.pos_ for token in sentence if not token.is_punct | token.is_stop])\n",
- " for ent in spacy_tweet.ents:\n",
- " local_tweet_ner.append(ent)\n",
- "\n",
- " tweet_pos.append(local_tweet_pos)\n",
- " tweet_ner.append(local_tweet_ner)\n",
- " tweet_lemmas.append(local_tweet_lemmas)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "tweet_lemmas[0]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "tweet_pos[0]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "tweet_ner[0]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# but it actually works!\n",
- "\n",
- "tweet_ner[3]"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "*Note: we are really just scratching the surface of spaCy, but it is worth knowing it's there.*"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Searching tweets\n",
- "\n",
- "Once we have represented Tweets as vectors, we can easily find similar ones using basic operations such as filtering."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "target = 0\n",
- "print(df_elon.clean_text[target])"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "condition = X_count_tfidf[target,:] > 0"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "print(condition)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "X_filtered = X_count_tfidf[:,np.ravel(condition.toarray())]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "X_filtered"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "print(X_filtered)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "from scipy import sparse\n",
- "\n",
- "sparse.find(X_filtered)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "tweet_indices = list(sparse.find(X_filtered)[0])"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "print(\"TARGET: \" + df_elon.clean_text[target])\n",
- "\n",
- "for n, tweet_index in enumerate(list(set(tweet_indices))):\n",
- " if tweet_index != target:\n",
- " print(str(n) +\")\"+ df_elon.clean_text[tweet_index])"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "#### Questions\n",
- "\n",
- "* Can you rank the matched tweets using their tf-idf weights, so to put higher weighted tweets first?\n",
- "* Which limitations do you think a bag of words representation has?\n",
- "* Can you spot any limitations of this approach based on similarity measures over bag of words representations?"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "---"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3 (ipykernel)",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.10.0"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}
diff --git a/notebooks/3.2 Exploratory data analysis and working with texts.ipynb b/notebooks/3.2 Exploratory data analysis and working with texts.ipynb
new file mode 100644
index 0000000..34d44c9
--- /dev/null
+++ b/notebooks/3.2 Exploratory data analysis and working with texts.ipynb
@@ -0,0 +1,2788 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# 3.2 Exploratory data analysis and working with texts\n",
+ "\n",
+ "In this notebook, we learn about:\n",
+ "1. descriptive statistics to explore data;\n",
+ "2. working with texts (hints)."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Part 1: descriptive statistics\n",
+ "\n",
+ "*The goal of exploratory data analysis is to develop an understanding of your data. EDA is fundamentally a creative process. And like most creative processes, the key to asking quality questions is to generate a large quantity of questions.* \n",
+ "\n",
+ "Key questions:\n",
+ "* Which kind of variation occurs within variables?\n",
+ "* Which kind of co-variation occurs between variables?\n",
+ "\n",
+ "https://r4ds.had.co.nz/exploratory-data-analysis.html"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# imports\n",
+ "\n",
+ "import os, codecs\n",
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "import seaborn as sns\n",
+ "import matplotlib.pyplot as plt"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Import the dataset\n",
+ "Let us import the Venetian apprenticeship contracts dataset in memory."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "root_folder = \"../data/apprenticeship_venice/\"\n",
+ "df_contracts = pd.read_csv(codecs.open(os.path.join(root_folder,\"professions_data.csv\"), encoding=\"utf8\"), sep=\";\")\n",
+ "df_professions = pd.read_csv(codecs.open(os.path.join(root_folder,\"professions_classification.csv\"), encoding=\"utf8\"), sep=\",\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Let's take another look to the dataset."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "RangeIndex: 9653 entries, 0 to 9652\n",
+ "Data columns (total 47 columns):\n",
+ " # Column Non-Null Count Dtype \n",
+ "--- ------ -------------- ----- \n",
+ " 0 page_title 9653 non-null object \n",
+ " 1 register 9653 non-null object \n",
+ " 2 annual_salary 7870 non-null float64\n",
+ " 3 a_profession 9653 non-null object \n",
+ " 4 profession_code_strict 9618 non-null object \n",
+ " 5 profession_code_gen 9614 non-null object \n",
+ " 6 profession_cat 9597 non-null object \n",
+ " 7 corporation 9350 non-null object \n",
+ " 8 keep_profession_a 9653 non-null int64 \n",
+ " 9 complete_profession_a 9653 non-null int64 \n",
+ " 10 enrolmentY 9628 non-null float64\n",
+ " 11 enrolmentM 9631 non-null float64\n",
+ " 12 startY 9533 non-null float64\n",
+ " 13 startM 9539 non-null float64\n",
+ " 14 length 9645 non-null float64\n",
+ " 15 has_fled 9653 non-null int64 \n",
+ " 16 m_profession 9535 non-null object \n",
+ " 17 m_profession_code_strict 9508 non-null object \n",
+ " 18 m_profession_code_gen 9506 non-null object \n",
+ " 19 m_profession_cat 9489 non-null object \n",
+ " 20 m_corporation 9276 non-null object \n",
+ " 21 keep_profession_m 9653 non-null int64 \n",
+ " 22 complete_profession_m 9653 non-null int64 \n",
+ " 23 m_gender 9554 non-null float64\n",
+ " 24 m_name 9623 non-null object \n",
+ " 25 m_surname 6960 non-null object \n",
+ " 26 m_patronimic 2620 non-null object \n",
+ " 27 m_atelier 1434 non-null object \n",
+ " 28 m_coords 9639 non-null object \n",
+ " 29 a_name 9653 non-null object \n",
+ " 30 a_age 9303 non-null float64\n",
+ " 31 a_gender 9522 non-null float64\n",
+ " 32 a_geo_origins 7149 non-null object \n",
+ " 33 a_geo_origins_std 4636 non-null object \n",
+ " 34 a_coords 9610 non-null object \n",
+ " 35 a_quondam 7848 non-null float64\n",
+ " 36 accommodation_master 9653 non-null int64 \n",
+ " 37 personal_care_master 9653 non-null int64 \n",
+ " 38 clothes_master 9653 non-null int64 \n",
+ " 39 generic_expenses_master 9653 non-null int64 \n",
+ " 40 salary_in_kind_master 9653 non-null int64 \n",
+ " 41 pledge_goods_master 9653 non-null int64 \n",
+ " 42 pledge_money_master 9653 non-null int64 \n",
+ " 43 salary_master 9653 non-null int64 \n",
+ " 44 female_guarantor 9653 non-null int64 \n",
+ " 45 period_cat 7891 non-null float64\n",
+ " 46 incremental_salary 9653 non-null int64 \n",
+ "dtypes: float64(11), int64(15), object(21)\n",
+ "memory usage: 3.5+ MB\n"
+ ]
+ }
+ ],
+ "source": [
+ "df_contracts.info()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " page_title \n",
+ " register \n",
+ " annual_salary \n",
+ " a_profession \n",
+ " profession_code_strict \n",
+ " profession_code_gen \n",
+ " profession_cat \n",
+ " corporation \n",
+ " keep_profession_a \n",
+ " complete_profession_a \n",
+ " ... \n",
+ " personal_care_master \n",
+ " clothes_master \n",
+ " generic_expenses_master \n",
+ " salary_in_kind_master \n",
+ " pledge_goods_master \n",
+ " pledge_money_master \n",
+ " salary_master \n",
+ " female_guarantor \n",
+ " period_cat \n",
+ " incremental_salary \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " Carlo Della sosta (Orese) 1592-08-03 \n",
+ " asv, giustizia vecchia, accordi dei garzoni, 1... \n",
+ " NaN \n",
+ " orese \n",
+ " orese \n",
+ " orefice \n",
+ " orefice \n",
+ " Oresi \n",
+ " 1 \n",
+ " 1 \n",
+ " ... \n",
+ " 1 \n",
+ " 1 \n",
+ " 1 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " NaN \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " Antonio quondam Andrea (squerariol) 1583-01-09 \n",
+ " asv, giustizia vecchia, accordi dei garzoni, 1... \n",
+ " 12.5 \n",
+ " squerariol \n",
+ " squerariol \n",
+ " lavori allo squero \n",
+ " lavori allo squero \n",
+ " Squerarioli \n",
+ " 1 \n",
+ " 1 \n",
+ " ... \n",
+ " 0 \n",
+ " 0 \n",
+ " 1 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 1 \n",
+ " 0 \n",
+ " 1.0 \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " 2 \n",
+ " Cristofollo di Zuane (batioro in carta) 1591-0... \n",
+ " asv, giustizia vecchia, accordi dei garzoni, 1... \n",
+ " NaN \n",
+ " batioro \n",
+ " batioro \n",
+ " battioro \n",
+ " fabbricatore di foglie/fili/cordelle d'oro o a... \n",
+ " Battioro \n",
+ " 1 \n",
+ " 1 \n",
+ " ... \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " 0 \n",
+ " NaN \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " \n",
+ " 3 rows Ć 47 columns \n",
+ " "
+ ],
+ "text/plain": [
+ " page_title \\\n",
+ "0 Carlo Della sosta (Orese) 1592-08-03 \n",
+ "1 Antonio quondam Andrea (squerariol) 1583-01-09 \n",
+ "2 Cristofollo di Zuane (batioro in carta) 1591-0... \n",
+ "\n",
+ " register annual_salary \\\n",
+ "0 asv, giustizia vecchia, accordi dei garzoni, 1... NaN \n",
+ "1 asv, giustizia vecchia, accordi dei garzoni, 1... 12.5 \n",
+ "2 asv, giustizia vecchia, accordi dei garzoni, 1... NaN \n",
+ "\n",
+ " a_profession profession_code_strict profession_code_gen \\\n",
+ "0 orese orese orefice \n",
+ "1 squerariol squerariol lavori allo squero \n",
+ "2 batioro batioro battioro \n",
+ "\n",
+ " profession_cat corporation \\\n",
+ "0 orefice Oresi \n",
+ "1 lavori allo squero Squerarioli \n",
+ "2 fabbricatore di foglie/fili/cordelle d'oro o a... Battioro \n",
+ "\n",
+ " keep_profession_a complete_profession_a ... personal_care_master \\\n",
+ "0 1 1 ... 1 \n",
+ "1 1 1 ... 0 \n",
+ "2 1 1 ... 0 \n",
+ "\n",
+ " clothes_master generic_expenses_master salary_in_kind_master \\\n",
+ "0 1 1 0 \n",
+ "1 0 1 0 \n",
+ "2 0 0 0 \n",
+ "\n",
+ " pledge_goods_master pledge_money_master salary_master female_guarantor \\\n",
+ "0 0 0 0 0 \n",
+ "1 0 0 1 0 \n",
+ "2 0 0 0 0 \n",
+ "\n",
+ " period_cat incremental_salary \n",
+ "0 NaN 0 \n",
+ "1 1.0 0 \n",
+ "2 NaN 0 \n",
+ "\n",
+ "[3 rows x 47 columns]"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_contracts.head(3)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Index(['page_title', 'register', 'annual_salary', 'a_profession',\n",
+ " 'profession_code_strict', 'profession_code_gen', 'profession_cat',\n",
+ " 'corporation', 'keep_profession_a', 'complete_profession_a',\n",
+ " 'enrolmentY', 'enrolmentM', 'startY', 'startM', 'length', 'has_fled',\n",
+ " 'm_profession', 'm_profession_code_strict', 'm_profession_code_gen',\n",
+ " 'm_profession_cat', 'm_corporation', 'keep_profession_m',\n",
+ " 'complete_profession_m', 'm_gender', 'm_name', 'm_surname',\n",
+ " 'm_patronimic', 'm_atelier', 'm_coords', 'a_name', 'a_age', 'a_gender',\n",
+ " 'a_geo_origins', 'a_geo_origins_std', 'a_coords', 'a_quondam',\n",
+ " 'accommodation_master', 'personal_care_master', 'clothes_master',\n",
+ " 'generic_expenses_master', 'salary_in_kind_master',\n",
+ " 'pledge_goods_master', 'pledge_money_master', 'salary_master',\n",
+ " 'female_guarantor', 'period_cat', 'incremental_salary'],\n",
+ " dtype='object')"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_contracts.columns"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Every row represents an apprenticeship contract. Contracts were registered both at the guild's and at a public office. This is a sample of contracts from a much larger set of records.\n",
+ "\n",
+ "Some of the variables we will work with are:\n",
+ "* `annual_salary`: the annual salary paid to the apprencice, if any (in Venetian ducats).\n",
+ "* `a_profession` to `corporation`: increasingly generic classifications for the apprentice's stated profession.\n",
+ "* `startY` and `enrolmentY`: contract start and registration year respectively.\n",
+ "* `length`: of the contract, in years.\n",
+ "* `m_gender` and `a_gender`: of master and apprentice respectively.\n",
+ "* `a_age`: age of the apprentice at entry, in years.\n",
+ "* `female_guarantor`: if at least one of the contract's guarantors was female, boolean."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 27,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "1.0 9424\n",
+ "0.0 130\n",
+ "Name: m_gender, dtype: int64"
+ ]
+ },
+ "execution_count": 27,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_contracts.m_gender.value_counts()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " Trascrizione \n",
+ " Standard \n",
+ " Gruppo 0 \n",
+ " Gruppo 1 \n",
+ " Gruppo 2 \n",
+ " Gruppo 3 \n",
+ " Gruppo 4 \n",
+ " Corporazione \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " al negotio del libraro \n",
+ " librer \n",
+ " libraio \n",
+ " librai - diverse specializzazioni \n",
+ " stampa \n",
+ " altre lavorazioni manifatturiere \n",
+ " beni \n",
+ " libreri, stampatori e ligadori \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " arte de far arpicordi \n",
+ " arte de far arpicordi \n",
+ " fabbricatore di arpicordi \n",
+ " fabbricatore di strumenti musicali \n",
+ " musica \n",
+ " altri servizi \n",
+ " servizi \n",
+ " NaN \n",
+ " \n",
+ " \n",
+ " 2 \n",
+ " arte de' colori \n",
+ " arte dei colori \n",
+ " fabbricazione/vendita di colori \n",
+ " colori \n",
+ " colori \n",
+ " decorazioni e mestieri dell'arte \n",
+ " beni \n",
+ " spezieri \n",
+ " \n",
+ " \n",
+ " \n",
+ " "
+ ],
+ "text/plain": [
+ " Trascrizione Standard \\\n",
+ "0 al negotio del libraro librer \n",
+ "1 arte de far arpicordi arte de far arpicordi \n",
+ "2 arte de' colori arte dei colori \n",
+ "\n",
+ " Gruppo 0 Gruppo 1 \\\n",
+ "0 libraio librai - diverse specializzazioni \n",
+ "1 fabbricatore di arpicordi fabbricatore di strumenti musicali \n",
+ "2 fabbricazione/vendita di colori colori \n",
+ "\n",
+ " Gruppo 2 Gruppo 3 Gruppo 4 \\\n",
+ "0 stampa altre lavorazioni manifatturiere beni \n",
+ "1 musica altri servizi servizi \n",
+ "2 colori decorazioni e mestieri dell'arte beni \n",
+ "\n",
+ " Corporazione \n",
+ "0 libreri, stampatori e ligadori \n",
+ "1 NaN \n",
+ "2 spezieri "
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_professions.head(3)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The professions data frame contains a classification system for each profession as found in the records (transcription, first column). The last column is the guild (or corporation) which governed the given profession. This work was performed manually by historians. We don't use it here as the classifications we need are already part of the main dataframe."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Questions\n",
+ "\n",
+ "* Plot the distribution (histogram) of the apprentices' age, contract length, annual salary and start year.\n",
+ "* Calculate the proportion of female apprentices and masters, and of contracts with a female guarantor.\n",
+ "* How likely it is for a female apprentice to have a female master? And for a male apprentice?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 23,
+ "metadata": {},
+ "output_type": "execute_result"
+ },
+ {
+ "data": {
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAD4CAYAAAAAczaOAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAUuElEQVR4nO3df4zcd53f8ecLk4YovpCkSVbGdutU56suP4pRVq6l9KQ1pBcfoDpIF8koJYmgMoqCBKqrnsM/wCFL+eOANoJENRcUp/ywrIM0FpBecy6rFCnB2DSc4/xQrIubc2zFuiOAlz9cbN79Y76+jsxkd7w7u47383xIo5l5z+f7nc/bk7z2u5/5zmyqCklSG952vicgSVo4hr4kNcTQl6SGGPqS1BBDX5Ia8vbzPYGZXHXVVbVq1appx/zqV7/i0ksvXZgJvYXYd1vsuy1z7Xv//v1/V1VXn11/y4f+qlWr2Ldv37RjJicnmZiYWJgJvYXYd1vsuy1z7TvJ/xlUd3lHkhpi6EtSQ2YM/STvSLI3yU+THEzyua7+2SSvJXm2u7y/b5v7khxK8lKSW/vqNyU50D32QJLMT1uSpEGGWdM/Cby3qqaSXAT8MMkT3WNfqqo/6x+c5DpgE3A98C7gr5L8XlWdBh4CNgPPAN8HNgBPIElaEDMe6VfPVHf3ou4y3Rf2bAR2VtXJqnoFOASsTbIMuKyqnq7eF/48Ctw2p9lLks7JUGfvJFkC7Ad+F/hKVf0oyR8Bn0hyJ7AP2FJVbwDL6R3Jn3Gkq/26u312fdDzbab3GwFjY2NMTk5OO7+pqakZxyxG9t0W+27LfPU9VOh3SzNrklwOPJbkBnpLNZ+nd9T/eeALwEeBQev0NU190PNtB7YDjI+P10ynLXlKV1vsuy32PVrndPZOVf0cmAQ2VNXrVXW6qn4DfBVY2w07Aqzs22wFcLSrrxhQlyQtkGHO3rm6O8InySXALcCL3Rr9GR8Cnutu7wY2Jbk4ybXAamBvVR0DTiRZ1521cyfw+OhakSTNZJjlnWXAjm5d/23Arqr6bpL/mmQNvSWaw8DHAarqYJJdwPPAKeDebnkI4B7gEeASemfteObOebJq6/eGGnf4/g/M80wkLaQZQ7+q/hp4z4D6R6bZZhuwbUB9H3DDOc5RkjQifiJXkhpi6EtSQwx9SWqIoS9JDTH0Jakhhr4kNcTQl6SGGPqS1BBDX5IaYuhLUkMMfUlqiKEvSQ0x9CWpIYa+JDXE0Jekhhj6ktQQQ1+SGmLoS1JDDH1JaoihL0kNMfQlqSEzhn6SdyTZm+SnSQ4m+VxXvzLJk0le7q6v6NvmviSHkryU5Na++k1JDnSPPZAk89OWJGmQYY70TwLvrap3A2uADUnWAVuBPVW1GtjT3SfJdcAm4HpgA/BgkiXdvh4CNgOru8uG0bUiSZrJjKFfPVPd3Yu6SwEbgR1dfQdwW3d7I7Czqk5W1SvAIWBtkmXAZVX1dFUV8GjfNpKkBfD2YQZ1R+r7gd8FvlJVP0oyVlXHAKrqWJJruuHLgWf6Nj/S1X7d3T67Puj5NtP7jYCxsTEmJyennd/U1NSMYxajufS95cZTQ417K/67+nq3xb5Ha6jQr6rTwJoklwOPJblhmuGD1ulrmvqg59sObAcYHx+viYmJaec3OTnJTGMWo7n0fffW7w017vAds9v/fPL1bot9j9Y5nb1TVT8HJumtxb/eLdnQXR/vhh0BVvZttgI42tVXDKhLkhbIMGfvXN0d4ZPkEuAW4EVgN3BXN+wu4PHu9m5gU5KLk1xL7w3bvd1S0Ikk67qzdu7s20aStACGWd5ZBuzo1vXfBuyqqu8meRrYleRjwKvA7QBVdTDJLuB54BRwb7c8BHAP8AhwCfBEd5EkLZAZQ7+q/hp4z4D63wPve5NttgHbBtT3AdO9HyBJmkd+IleSGmLoS1JDDH1JaoihL0kNMfQlqSGGviQ1xNCXpIYY+pLUEENfkhpi6EtSQwx9SWqIoS9JDTH0Jakhhr4kNcTQl6SGGPqS1BBDX5IaYuhLUkMMfUlqiKEvSQ0x9CWpITOGfpKVSX6Q5IUkB5N8sqt/NslrSZ7tLu/v2+a+JIeSvJTk1r76TUkOdI89kCTz05YkaZC3DzHmFLClqn6S5HeA/Ume7B77UlX9Wf/gJNcBm4DrgXcBf5Xk96rqNPAQsBl4Bvg+sAF4YjStSJJmMuORflUdq6qfdLdPAC8Ay6fZZCOws6pOVtUrwCFgbZJlwGVV9XRVFfAocNtcG5AkDS+9/B1ycLIKeAq4Afj3wN3AL4F99H4beCPJl4Fnqurr3TYP0zuaPwzcX1W3dPU/AP6kqj444Hk20/uNgLGxsZt27tw57bympqZYunTp0H0sFnPp+8Brvxhq3I3L3zmr/c8nX++22PfsrF+/fn9VjZ9dH2Z5B4AkS4FvA5+qql8meQj4PFDd9ReAjwKD1ulrmvpvF6u2A9sBxsfHa2JiYtq5TU5OMtOYxWgufd+99XtDjTt8x+z2P598vdti36M11Nk7SS6iF/jfqKrvAFTV61V1uqp+A3wVWNsNPwKs7Nt8BXC0q68YUJckLZBhzt4J8DDwQlV9sa++rG/Yh4Dnutu7gU1JLk5yLbAa2FtVx4ATSdZ1+7wTeHxEfUiShjDM8s7NwEeAA0me7WqfBj6cZA29JZrDwMcBqupgkl3A8/TO/Lm3O3MH4B7gEeASeuv8nrkjSQtoxtCvqh8yeD3++9Nssw3YNqC+j96bwJKk88BP5EpSQwx9SWqIoS9JDTH0Jakhhr4kNcTQl6SGGPqS1BBDX5IaYuhLUkMMfUlqiKEvSQ0x9CWpIYa+JDXE0Jekhhj6ktQQQ1+SGmLoS1JDDH1JaoihL0kNMfQlqSGGviQ1ZMbQT7IyyQ+SvJDkYJJPdvUrkzyZ5OXu+oq+be5LcijJS0lu7avflORA99gDSTI/bUmSBhnmSP8UsKWqfh9YB9yb5DpgK7CnqlYDe7r7dI9tAq4HNgAPJlnS7eshYDOwurtsGGEvkqQZzBj6VXWsqn7S3T4BvAAsBzYCO7phO4DbutsbgZ1VdbKqXgEOAWuTLAMuq6qnq6qAR/u2kSQtgPTyd8jBySrgKeAG4NWqurzvsTeq6ookXwaeqaqvd/WHgSeAw8D9VXVLV/8D4E+q6oMDnmczvd8IGBsbu2nnzp3TzmtqaoqlS5cO3cdiMZe+D7z2i6HG3bj8nbPa/3zy9W6Lfc/O+vXr91fV+Nn1tw+7gyRLgW8Dn6qqX06zHD/ogZqm/tvFqu3AdoDx8fGamJiYdm6Tk5PMNGYxmkvfd2/93lDjDt8xu/3PJ1/vttj3aA119k6Si+gF/jeq6jtd+fVuyYbu+nhXPwKs7Nt8BXC0q68YUJckLZBhzt4J8DDwQlV9se+h3cBd3e27gMf76puSXJzkWnpv2O6tqmPAiSTrun3e2beNJGkBDLO8czPwEeBAkme72qeB+4FdST4GvArcDlBVB5PsAp6nd+bPvVV1utvuHuAR4BJ66/xPjKYNSdIwZgz9qvohg9fjAd73JttsA7YNqO+j9yawJOk88BO5ktQQQ1+SGmLoS1JDDH1JaoihL0kNMfQlqSGGviQ1xNCXpIYY+pLUEENfkhpi6EtSQwx9SWrI0H9ERReGVUP+cRRJbfJIX5IaYuhLUkMMfUlqiKEvSQ0x9CWpIYa+JDXE0Jekhhj6ktSQGUM/ydeSHE/yXF/ts0leS/Jsd3l/32P3JTmU5KUkt/bVb0pyoHvsgSQZfTuSpOkMc6T/CLBhQP1LVbWmu3wfIMl1wCbg+m6bB5Ms6cY/BGwGVneXQfuUJM2jGUO/qp4Cfjbk/jYCO6vqZFW9AhwC1iZZBlxWVU9XVQGPArfNcs6SpFmay3fvfCLJncA+YEtVvQEsB57pG3Okq/26u312faAkm+n9VsDY2BiTk5PTTmRqamrGMYvRoL633HhqpM/xVvx39fVui32P1mxD/yHg80B1118APgoMWqevaeoDVdV2YDvA+Ph4TUxMTDuZyclJZhqzGA3q++4Rf+Ha4TsmZhyz0Hy922LfozWrs3eq6vWqOl1VvwG+CqztHjoCrOwbugI42tVXDKhLkhbQrEK/W6M/40PAmTN7dgObklyc5Fp6b9jurapjwIkk67qzdu4EHp/DvCVJszDj8k6SbwETwFVJjgCfASaSrKG3RHMY+DhAVR1Msgt4HjgF3FtVp7td3UPvTKBLgCe6iyRpAc0Y+lX14QHlh6cZvw3YNqC+D7jhnGYnSRopP5ErSQ0x9CWpIYa+JDXE0Jekhhj6ktQQQ1+SGjKX796R/sGqIb/+4fD9H5jnmUiajkf6ktQQQ1+SGmLoS1JDDH1JaoihL0kNMfQlqSGGviQ1xNCXpIYY+pLUEENfkhpi6EtSQwx9SWqIoS9JDTH0JakhM4Z+kq8lOZ7kub7alUmeTPJyd31F32P3JTmU5KUkt/bVb0pyoHvsgSQZfTuSpOkMc6T/CLDhrNpWYE9VrQb2dPdJch2wCbi+2+bBJEu6bR4CNgOru8vZ+5QkzbMZQ7+qngJ+dlZ5I7Cju70DuK2vvrOqTlbVK8AhYG2SZcBlVfV0VRXwaN82kqQFMts1/bGqOgbQXV/T1ZcDf9s37khXW97dPrsuSVpAo/5ziYPW6Wua+uCdJJvpLQUxNjbG5OTktE86NTU145jFaFDfW248NdLnGPbfddjnHcXr5OvdFvserdmG/utJllXVsW7p5nhXPwKs7Bu3Ajja1VcMqA9UVduB7QDj4+M1MTEx7WQmJyeZacxiNKjvu4f8W7VDO/CrIQcO95/S4TsmZj2VM3y922LfozXb5Z3dwF3d7buAx/vqm5JcnORaem/Y7u2WgE4kWdedtXNn3zaSpAUy4+FZkm8BE8BVSY4AnwHuB3Yl+RjwKnA7QFUdTLILeB44BdxbVae7Xd1D70ygS4AnuoskaQHNGPpV9eE3eeh9bzJ+G7BtQH0fcMM5zU6SNFJ+IleSGmLoS1JDDH1JaoihL0kNMfQlqSGGviQ1xNCXpIYY+pLUEENfkhpi6EtSQwx9SWqIoS9JDTH0Jakhhr4kNcTQl6SGGPqS1JBR/2F0aVqrzuFv+B6+/wPzOBOpTR7pS1JDDH1JaoihL0kNMfQlqSGGviQ1ZE6hn+RwkgNJnk2yr6tdmeTJJC9311f0jb8vyaEkLyW5da6TlySdm1Ec6a+vqjVVNd7d3wrsqarVwJ7uPkmuAzYB1wMbgAeTLBnB80uShjQfyzsbgR3d7R3AbX31nVV1sqpeAQ4Ba+fh+SVJbyJVNfuNk1eAN4AC/ktVbU/y86q6vG/MG1V1RZIvA89U1de7+sPAE1X1FwP2uxnYDDA2NnbTzp07p53H1NQUS5cunXUfF6pBfR947RfnaTajd+Pydw6s+3q3xb5nZ/369fv7VmD+wVw/kXtzVR1Ncg3wZJIXpxmbAbWBP3GqajuwHWB8fLwmJiamncTk5CQzjVmMBvV99zl84vWt7vAdEwPrvt5tse/RmtPyTlUd7a6PA4/RW655PckygO76eDf8CLCyb/MVwNG5PL8k6dzM+kg/yaXA26rqRHf7D4E/BXYDdwH3d9ePd5vsBr6Z5IvAu4DVwN45zL0pg76zZsuNpxbVkb2k+TeX5Z0x4LEkZ/bzzar670l+DOxK8jHgVeB2gKo6mGQX8DxwCri3qk7PafaSpHMy69Cvqr8B3j2g/vfA+95km23Attk+pyRpbvxEriQ1xO/T11vWm333/tnvZfi9+9LwPNKXpIYY+pLUEENfkhpi6EtSQwx9SWqIoS9JDTH0Jakhhr4kNcTQl6SGGPqS1BC/huE8e7OvGpCk+eCRviQ1xNCXpIYY+pLUEENfkhpi6EtSQzx7Rxe8Yc+A8o+tSB7pS1JTDH1JasiCL+8k2QD8Z2AJ8OdVdf9Cz0GajstFWswWNPSTLAG+Avxr4Ajw4yS7q+r5hZzHfPNTtpLeqhb6SH8tcKiq/gYgyU5gI7CoQl9vTaP+YbzQP9y33HiKu7vn9LcMzVaqauGeLPljYENV/bvu/keAf1lVnzhr3GZgc3f3nwMvzbDrq4C/G/F0LwT23Rb7bstc+/6nVXX12cWFPtLPgNpv/dSpqu3A9qF3muyrqvG5TOxCZN9tse+2zFffC332zhFgZd/9FcDRBZ6DJDVroUP/x8DqJNcm+UfAJmD3As9Bkpq1oMs7VXUqySeAv6R3yubXqurgCHY99FLQImPfbbHvtsxL3wv6Rq4k6fzyE7mS1BBDX5IackGHfpINSV5KcijJ1vM9n/mU5GtJjid5rq92ZZInk7zcXV9xPuc4aklWJvlBkheSHEzyya6+2Pt+R5K9SX7a9f25rr6o+z4jyZIk/zvJd7v7rfR9OMmBJM8m2dfVRt77BRv6fV/p8EfAdcCHk1x3fmc1rx4BNpxV2wrsqarVwJ7u/mJyCthSVb8PrAPu7V7jxd73SeC9VfVuYA2wIck6Fn/fZ3wSeKHvfit9A6yvqjV95+ePvPcLNvTp+0qHqvq/wJmvdFiUquop4GdnlTcCO7rbO4DbFnJO862qjlXVT7rbJ+gFwXIWf99VVVPd3Yu6S7HI+wZIsgL4APDnfeVF3/c0Rt77hRz6y4G/7bt/pKu1ZKyqjkEvIIFrzvN85k2SVcB7gB/RQN/dEsezwHHgyapqom/gPwH/EfhNX62FvqH3g/1/JNnffRUNzEPvF/JfzhrqKx104UuyFPg28Kmq+mUy6KVfXKrqNLAmyeXAY0luOM9TmndJPggcr6r9SSbO83TOh5ur6miSa4Ank7w4H09yIR/p+5UO8HqSZQDd9fHzPJ+RS3IRvcD/RlV9pysv+r7PqKqfA5P03s9Z7H3fDPybJIfpLde+N8nXWfx9A1BVR7vr48Bj9JawR977hRz6fqVDr9+7utt3AY+fx7mMXHqH9A8DL1TVF/seWux9X90d4ZPkEuAW4EUWed9VdV9VraiqVfT+f/6fVfVvWeR9AyS5NMnvnLkN/CHwHPPQ+wX9idwk76e3BnjmKx22nd8ZzZ8k3wIm6H3d6uvAZ4D/BuwC/gnwKnB7VZ39Zu8FK8m/Av4XcID/v8b7aXrr+ou5739B7027JfQOzHZV1Z8m+ccs4r77dcs7/6GqPthC30n+Gb2je+gtu3+zqrbNR+8XdOhLks7Nhby8I0k6R4a+JDXE0Jekhhj6ktQQQ1+SGmLoS1JDDH1Jasj/A72+cYT5usntAAAAAElFTkSuQmCC\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "df_contracts.a_age.hist(bins=30)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "count 9303.000000\n",
+ "mean 14.266688\n",
+ "std 2.902770\n",
+ "min 1.000000\n",
+ "25% 12.000000\n",
+ "50% 14.000000\n",
+ "75% 16.000000\n",
+ "max 50.000000\n",
+ "Name: a_age, dtype: float64"
+ ]
+ },
+ "execution_count": 17,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_contracts.a_age.describe()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 25,
+ "metadata": {},
+ "output_type": "execute_result"
+ },
+ {
+ "data": {
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAD4CAYAAAAAczaOAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAVUElEQVR4nO3dfaxc9Z3f8fdnIWEpDgFKcmXZdE1aNy0PyoOvKFVKZBe0OAmN6QOVI7o4LZVVRFaJmpUwjdRu/7DqbcVKgSzsuiHFNOx63exGtjZiu8jNbVSJhMUJiTGE4gSXePHa3ZAHnEakpt/+MYdosO/1HV/mztzh935Joznznd858z3njj/3zG/mjlNVSJLa8AvjbkCSNDqGviQ1xNCXpIYY+pLUEENfkhpy9rgbmM/FF19cq1atWtC6P/nJTzjvvPOG29AisM/hm5Re7XP4JqXXxe5z3759f1FVbzvljqpa0pc1a9bUQn35y19e8LqjZJ/DNym92ufwTUqvi90n8HjNkqlO70hSQwx9SWqIoS9JDTH0Jakhhr4kNcTQl6SGGPqS1JCBQj/JBUm+kOTbSZ5O8reTXJTkkSTPdtcX9o2/M8nBJM8kub6vvibJ/u6+u5NkMXZKkjS7Qc/0Pw38cVX9DeBdwNPAFmBvVa0G9na3SXIZsBG4HFgP3JvkrG479wGbgdXdZf2Q9kOSNIB5v4YhyfnA+4GPAlTVz4CfJdkArO2G7QBmgDuADcDOqnoZeC7JQeCqJIeA86vq0W67DwI3Ag8PbW9Osv/PfsRHt3xp3nGHtn1osVqQpCVlkDP9dwD/G/hPSb6R5LNJzgOmquoIQHf99m78CuB7fesf7moruuWT65KkERnkC9fOBt4L/GpVfS3Jp+mmcuYw2zx9naZ+6gaSzfSmgZiammJmZmaANk81dS588soT845b6PaH5fjx42PvYRCT0idMTq/2OXyT0uu4+hwk9A8Dh6vqa93tL9AL/aNJllfVkSTLgWN94y/pW38l8EJXXzlL/RRVtR3YDjA9PV1r164dbG9Ocs9Du7lr//y7eOjmhW1/WGZmZljoPo7SpPQJk9OrfQ7fpPQ6rj7nnd6pqj8HvpfknV3pWuApYA+wqattAnZ3y3uAjUnOSXIpvTdsH+umgF5KcnX3qZ1b+taRJI3AoN+n/6vAQ0neDHwX+Kf0fmHsSnIr8DxwE0BVHUiyi94vhhPA7VX1Sred24AHgHPpvYG7aG/iSpJONVDoV9UTwPQsd107x/itwNZZ6o8DV5xBf5KkIfIvciWpIYa+JDXE0Jekhhj6ktQQQ1+SGmLoS1JDDH1JaoihL0kNMfQlqSGGviQ1xNCXpIYY+pLUEENfkhpi6EtSQwx9SWqIoS9JDTH0Jakhhr4kNcTQl6SGGPqS1BBDX5IaYuhLUkMMfUlqiKEvSQ0x9CWpIYa+JDVkoNBPcijJ/iRPJHm8q12U5JEkz3bXF/aNvzPJwSTPJLm+r76m287BJHcnyfB3SZI0lzM5019XVe+uqunu9hZgb1WtBvZ2t0lyGbARuBxYD9yb5KxunfuAzcDq7rL+9e+CJGlQr2d6ZwOwo1veAdzYV99ZVS9X1XPAQeCqJMuB86vq0aoq4MG+dSRJI5Be/s4zKHkO+AFQwO9U1fYkP6yqC/rG/KCqLkzyGeCrVfX5rn4/8DBwCNhWVdd19WuAO6rqhlkebzO9VwRMTU2t2blz54J27tiLP+LoT+cfd+WKty5o+8Ny/Phxli1bNtYeBjEpfcLk9GqfwzcpvS52n+vWrdvXNzPzc2cPuP77quqFJG8HHkny7dOMnW2evk5TP7VYtR3YDjA9PV1r164dsM3Xuueh3dy1f/5dPHTzwrY/LDMzMyx0H0dpUvqEyenVPodvUnodV58DTe9U1Qvd9THgi8BVwNFuyobu+lg3/DBwSd/qK4EXuvrKWeqSpBGZN/STnJfkLa8uA78MPAnsATZ1wzYBu7vlPcDGJOckuZTeG7aPVdUR4KUkV3ef2rmlbx1J0ggMMr0zBXyx+3Tl2cDvVtUfJ/lTYFeSW4HngZsAqupAkl3AU8AJ4PaqeqXb1m3AA8C59Ob5Hx7ivkiS5jFv6FfVd4F3zVL/PnDtHOtsBbbOUn8cuOLM25QkDYN/kStJDTH0Jakhhr4kNcTQl6SGGPqS1BBDX5IaYuhLUkMMfUlqiKEvSQ0x9CWpIYa+JDXE0Jekhhj6ktQQQ1+SGmLoS1JDDH1JaoihL0kNMfQlqSGGviQ1xNCXpIYY+pLUEENfkhpi6EtSQwx9SWqIoS9JDRk49JOcleQbSf6ou31RkkeSPNtdX9g39s4kB5M8k+T6vvqaJPu7++5OkuHujiTpdM7kTP/jwNN9t7cAe6tqNbC3u02Sy4CNwOXAeuDeJGd169wHbAZWd5f1r6t7SdIZGSj0k6wEPgR8tq+8AdjRLe8Abuyr76yql6vqOeAgcFWS5cD5VfVoVRXwYN86kqQRSC9/5xmUfAH4d8BbgF+rqhuS/LCqLugb84OqujDJZ4CvVtXnu/r9wMPAIWBbVV3X1a8B7qiqG2Z5vM30XhEwNTW1ZufOnQvauWMv/oijP51/3JUr3rqg7Q/L8ePHWbZs2Vh7GMSk9AmT06t9Dt+k9LrYfa5bt25fVU2fXD97vhWT3AAcq6p9SdYO8FizzdPXaeqnFqu2A9sBpqena+3aQR72VPc8tJu79s+7ixy6eWHbH5aZmRkWuo+jNCl9wuT0ap/DNym9jqvP+RMR3gd8OMkHgV8Ezk/yeeBokuVVdaSbujnWjT8MXNK3/krgha6+cpa6JGlE5p3Tr6o7q2plVa2i9wbtf6uqfwLsATZ1wzYBu7vlPcDGJOckuZTeG7aPVdUR4KUkV3ef2rmlbx1J0ggMcqY/l23AriS3As8DNwFU1YEku4CngBPA7VX1SrfObcADwLn05vkffh2PL0k6Q2cU+lU1A8x0y98Hrp1j3FZg6yz1x4ErzrRJSdJw+Be5ktQQQ1+SGmLoS1JDDH1JaoihL0kNMfQlqSGGviQ1xNCXpIYY+pLUEENfkhpi6EtSQwx9SWqIoS9JDTH0Jakhhr4kNcTQl6SGGPqS1BBDX5IaYuhLUkMMfUlqiKEvSQ0x9CWpIYa+JDXE0Jekhhj6ktSQeUM/yS8meSzJN5McSPJvu/pFSR5J8mx3fWHfOncmOZjkmSTX99XXJNnf3Xd3kizObkmSZjPImf7LwN+tqncB7wbWJ7ka2ALsrarVwN7uNkkuAzYClwPrgXuTnNVt6z5gM7C6u6wf3q5IkuYzb+hXz/Hu5pu6SwEbgB1dfQdwY7e8AdhZVS9X1XPAQeCqJMuB86vq0aoq4MG+dSRJI5Be/s4zqHemvg/4a8BvVdUdSX5YVRf0jflBVV2Y5DPAV6vq8139fuBh4BCwraqu6+rXAHdU1Q2zPN5meq8ImJqaWrNz584F7dyxF3/E0Z/OP+7KFW9d0PaH5fjx4yxbtmysPQxiUvqEyenVPodvUnpd7D7XrVu3r6qmT66fPcjKVfUK8O4kFwBfTHLFaYbPNk9fp6nP9njbge0A09PTtXbt2kHaPMU9D+3mrv3z7+Khmxe2/WGZmZlhofs4SpPSJ0xOr/Y5fJPS67j6PKNP71TVD4EZenPxR7spG7rrY92ww8AlfautBF7o6itnqUuSRmSQT++8rTvDJ8m5wHXAt4E9wKZu2CZgd7e8B9iY5Jwkl9J7w/axqjoCvJTk6u5TO7f0rSNJGoFBpneWAzu6ef1fAHZV1R8leRTYleRW4HngJoCqOpBkF/AUcAK4vZseArgNeAA4l948/8PD3BlJ0unNG/pV9S3gPbPUvw9cO8c6W4Gts9QfB073foAkaRH5F7mS1BBDX5IaYuhLUkMMfUlqiKEvSQ0x9CWpIYa+JDXE0Jekhhj6ktQQQ1+SGmLoS1JDDH1JaoihL0kNMfQlqSGGviQ1xNCXpIYY+pLUEENfkhpi6EtSQwx9SWqIoS9JDTH0Jakhhr4kNcTQl6SGGPqS1JB5Qz/JJUm+nOTpJAeSfLyrX5TkkSTPdtcX9q1zZ5KDSZ5Jcn1ffU2S/d19dyfJ4uyWJGk2g5zpnwA+WVV/E7gauD3JZcAWYG9VrQb2drfp7tsIXA6sB+5Ncla3rfuAzcDq7rJ+iPsiSZrHvKFfVUeq6uvd8kvA08AKYAOwoxu2A7ixW94A7Kyql6vqOeAgcFWS5cD5VfVoVRXwYN86kqQRSC9/BxycrAK+AlwBPF9VF/Td94OqujDJZ4CvVtXnu/r9wMPAIWBbVV3X1a8B7qiqG2Z5nM30XhEwNTW1ZufOnQvauWMv/oijP51/3JUr3rqg7Q/L8ePHWbZs2Vh7GMSk9AmT06t9Dt+k9LrYfa5bt25fVU2fXD970A0kWQb8AfCJqvrxaabjZ7ujTlM/tVi1HdgOMD09XWvXrh20zde456Hd3LV//l08dPPCtj8sMzMzLHQfR2lS+oTJ6dU+h29Seh1XnwN9eifJm+gF/kNV9Ydd+Wg3ZUN3fayrHwYu6Vt9JfBCV185S12SNCKDfHonwP3A01X1m3137QE2dcubgN199Y1JzklyKb03bB+rqiPAS0mu7rZ5S986kqQRGGR6533ArwD7kzzR1f4VsA3YleRW4HngJoCqOpBkF/AUvU/+3F5Vr3Tr3QY8AJxLb57/4eHshiRpEPOGflX9D2afjwe4do51tgJbZ6k/Tu9NYEnSGPgXuZLUEENfkhpi6EtSQwx9SWqIoS9JDTH0Jakhhr4kNcTQl6SGGPqS1BBDX5IaYuhLUkMMfUlqiKEvSQ0x9CWpIYa+JDXE0Jekhhj6ktQQQ1+SGmLoS1JDBvmP0d/wVm350kDjDm370CJ3IkmLyzN9SWqIoS9JDTH0Jakhhr4kNcTQl6SGzBv6ST6X5FiSJ/tqFyV5JMmz3fWFfffdmeRgkmeSXN9XX5Nkf3ff3Uky/N2RJJ3OIGf6DwDrT6ptAfZW1Wpgb3ebJJcBG4HLu3XuTXJWt859wGZgdXc5eZuSpEU2b+hX1VeAF08qbwB2dMs7gBv76jur6uWqeg44CFyVZDlwflU9WlUFPNi3jiRpRBY6pz9VVUcAuuu3d/UVwPf6xh3uaiu65ZPrkqQRGvZf5M42T1+nqc++kWQzvakgpqammJmZWVAzU+fCJ688saB1Z7PQPuZz/PjxRdv2ME1KnzA5vdrn8E1Kr+Pqc6GhfzTJ8qo60k3dHOvqh4FL+satBF7o6itnqc+qqrYD2wGmp6dr7dq1C2rynod2c9f+4f1eO3TzwvqYz8zMDAvdx1GalD5hcnq1z+GblF7H1edCp3f2AJu65U3A7r76xiTnJLmU3hu2j3VTQC8lubr71M4tfetIkkZk3tPgJL8HrAUuTnIY+DfANmBXkluB54GbAKrqQJJdwFPACeD2qnql29Rt9D4JdC7wcHeRJI3QvKFfVR+Z465r5xi/Fdg6S/1x4Ioz6k6SNFT+Ra4kNcTQl6SGGPqS1BBDX5IaYuhLUkMMfUlqiKEvSQ0x9CWpIYa+JDXE0Jekhhj6ktQQQ1+SGmLoS1JDDH1JaoihL0kNMfQlqSGGviQ1xNCXpIYY+pLUEENfkhpi6EtSQ84edwOTZNWWLw007tC2Dy1yJ5K0MJ7pS1JDDH1JaoihL0kNcU5/EQw69w/O/0sarZGHfpL1wKeBs4DPVtW2UfewlKza8iU+eeUJPjrPLwp/OUgahpFO7yQ5C/gt4APAZcBHklw2yh4kqWWjPtO/CjhYVd8FSLIT2AA8NeI+Js6ZTBkNYtBXDv2PO8grkmE9rqTFkaoa3YMl/whYX1X/vLv9K8DfqqqPnTRuM7C5u/lO4JkFPuTFwF8scN1Rss/hm5Re7XP4JqXXxe7zl6rqbScXR32mn1lqp/zWqartwPbX/WDJ41U1/Xq3s9jsc/gmpVf7HL5J6XVcfY76I5uHgUv6bq8EXhhxD5LUrFGH/p8Cq5NcmuTNwEZgz4h7kKRmjXR6p6pOJPkY8F/pfWTzc1V1YBEf8nVPEY2IfQ7fpPRqn8M3Kb2Opc+RvpErSRovv4ZBkhpi6EtSQ96QoZ9kfZJnkhxMsmXc/bwqySVJvpzk6SQHkny8q/96kj9L8kR3+eC4ewVIcijJ/q6nx7vaRUkeSfJsd33hmHt8Z99xeyLJj5N8Yikc0ySfS3IsyZN9tTmPX5I7u+fsM0muXwK9/ock307yrSRfTHJBV1+V5Kd9x/a3x9znnD/rcR3TOfr8/b4eDyV5oquP9nhW1RvqQu8N4u8A7wDeDHwTuGzcfXW9LQfe2y2/Bfif9L6O4teBXxt3f7P0ewi4+KTavwe2dMtbgN8Yd58n/ez/HPilpXBMgfcD7wWenO/4dc+DbwLnAJd2z+GzxtzrLwNnd8u/0dfrqv5xS+CYzvqzHucxna3Pk+6/C/jX4zieb8Qz/Z9/1UNV/Qx49asexq6qjlTV17vll4CngRXj7eqMbQB2dMs7gBvH18oprgW+U1X/a9yNAFTVV4AXTyrPdfw2ADur6uWqeg44SO+5PBKz9VpVf1JVJ7qbX6X3dzVjNccxncvYjunp+kwS4B8DvzeKXk72Rgz9FcD3+m4fZgkGa5JVwHuAr3Wlj3Uvoz837imTPgX8SZJ93VdjAExV1RHo/RID3j627k61kdf+Q1qKx3Su47fUn7f/DHi47/alSb6R5L8nuWZcTfWZ7We9VI/pNcDRqnq2rzay4/lGDP2BvuphnJIsA/4A+ERV/Ri4D/irwLuBI/Re+i0F76uq99L7VtTbk7x/3A3Npftjvw8D/6UrLdVjOpcl+7xN8ingBPBQVzoC/JWqeg/wL4HfTXL+uPpj7p/1Uj2mH+G1JycjPZ5vxNBf0l/1kORN9AL/oar6Q4CqOlpVr1TV/wP+IyN8WX86VfVCd30M+CK9vo4mWQ7QXR8bX4ev8QHg61V1FJbuMWXu47ckn7dJNgE3ADdXNwHdTZd8v1veR2+u/K+Pq8fT/KyX3DFNcjbwD4Dff7U26uP5Rgz9JftVD91c3v3A01X1m3315X3D/j7w5MnrjlqS85K85dVlem/qPUnvWG7qhm0Cdo+nw1O85uxpKR7TzlzHbw+wMck5SS4FVgOPjaG/n0vvPzy6A/hwVf2fvvrb0vu/MUjyDnq9fnc8XZ72Z73kjilwHfDtqjr8amHkx3NU7xiP8gJ8kN4nY74DfGrc/fT19Xfovbz8FvBEd/kg8J+B/V19D7B8CfT6DnqffPgmcODV4wj8ZWAv8Gx3fdES6PUvAd8H3tpXG/sxpfdL6Ajwf+mddd56uuMHfKp7zj4DfGAJ9HqQ3pz4q8/V3+7G/sPuOfFN4OvA3xtzn3P+rMd1TGfrs6s/APyLk8aO9Hj6NQyS1JA34vSOJGkOhr4kNcTQl6SGGPqS1BBDX5IaYuhLUkMMfUlqyP8HoM4Qkzxoov4AAAAASUVORK5CYII=\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "df_contracts.annual_salary.hist(bins=30)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "count 7870.000000\n",
+ "mean 5.916921\n",
+ "std 6.985214\n",
+ "min 0.166667\n",
+ "25% 3.000000\n",
+ "50% 4.000000\n",
+ "75% 6.000000\n",
+ "max 180.000000\n",
+ "Name: annual_salary, dtype: float64"
+ ]
+ },
+ "execution_count": 26,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_contracts.annual_salary.describe()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ },
+ {
+ "data": {
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYYAAAD4CAYAAADo30HgAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAVkklEQVR4nO3df4wc91nH8feD3Qa318YOoYexLRyQVUhifsSnECit7pRCTBPVARHkKlAHgqyitATkSnWoRPnHwoCC1BJSZHBUl0S9mjQlJqmhkclRIdUJcUh7cdw0LjGpE9eGNkl7JQpcePhjx7Df697d7s7u3l38fkmnnf3Od2aenZ3bz82PnYvMRJKks75roQuQJC0uBoMkqWAwSJIKBoMkqWAwSJIKyxe6gPlceOGFuX79+rb7f/vb3+b1r399/wqqyfrqsb56rK+epVTfkSNH/iMzv7erGWXmov7ZtGlTduLBBx/sqP+gWV891leP9dWzlOoDHskuP3c9lCRJKhgMkqSCwSBJKhgMkqSCwSBJKhgMkqSCwSBJKhgMkqSCwSBJKiz6W2K8mqzfeT87Nk5zw877257mxO6r+1iRJH0n9xgkSQWDQZJUMBgkSQWDQZJUMBgkSQWDQZJUMBgkSQWDQZJUMBgkSQWDQZJUmDcYIuKOiDgTEY83tf1xRHwpIr4YEZ+OiJVN426JiOMR8WREXNXUvikiJqtxH4mI6PmrkSTV1s4ew8eAzTPaHgAuzcwfBb4M3AIQERcDW4FLqmluj4hl1TQfBbYDG6qfmfOUJC0C8wZDZn4O+MaMts9m5nT19DCwthreAoxn5suZ+TRwHLg8IlYDb8zMz2dmAh8Hru3Ra5Ak9VA0Pqfn6RSxHrgvMy9tMe5vgU9m5p0RcRtwODPvrMbtBQ4CJ4Ddmfn2qv2twAcy85pZlredxt4Fw8PDm8bHx9t+QVNTUwwNDbXdf5Amn32R4RVw+qX2p9m45vz+FdTCYl5/YH11WV89S6m+sbGxI5k50s18at12OyI+CEwDd51tatEt52hvKTP3AHsARkZGcnR0tO2aJiYm6KT/IN1Q3Xb71sn2V/uJ60f7V1ALi3n9gfXVZX31nCv1dR0MEbENuAa4Mv9/t+MksK6p21rguap9bYt2SdIi09XlqhGxGfgA8M7M/M+mUQeArRFxXkRcROMk88OZeQr4VkRcUV2N9G7g3pq1S5L6YN49hoj4BDAKXBgRJ4EP0bgK6Tzggeqq08OZ+Z7MPBoR+4EnaBxiuikzX6lm9Zs0rnBaQeO8w8HevhRJUi/MGwyZ+a4WzXvn6L8L2NWi/RHgO05eS5IWF7/5LEkq1LoqSYvP+p33d9T/xO6r+1SJpKXKPQZJUsFgkCQVDAZJUsFgkCQVDAZJUsFgkCQVDAZJUsFgkCQVDAZJUsFgkCQVDAZJUsFgkCQVDAZJUsFgkCQVDAZJUsFgkCQVDAZJUsFgkCQVDAZJUsFgkCQVDAZJUmHeYIiIOyLiTEQ83tR2QUQ8EBFPVY+rmsbdEhHHI+LJiLiqqX1TRExW4z4SEdH7lyNJqqudPYaPAZtntO0EDmXmBuBQ9ZyIuBjYClxSTXN7RCyrpvkosB3YUP3MnKckaRGYNxgy83PAN2Y0bwH2VcP7gGub2scz8+XMfBo4DlweEauBN2bm5zMzgY83TSNJWkSi8Tk9T6eI9cB9mXlp9fyFzFzZNP75zFwVEbcBhzPzzqp9L3AQOAHszsy3V+1vBT6QmdfMsrztNPYuGB4e3jQ+Pt72C5qammJoaKjt/oM0+eyLDK+A0y+1P83GNed3vIxOzJz/Yl5/YH11WV89S6m+sbGxI5k50s18lve0Kmh13iDnaG8pM/cAewBGRkZydHS07QImJibopP8g3bDzfnZsnObWyfZX+4nrRzteRidmzn8xrz+wvrqsr55zpb5ur0o6XR0eono8U7WfBNY19VsLPFe1r23RLklaZLoNhgPAtmp4G3BvU/vWiDgvIi6icZL54cw8BXwrIq6orkZ6d9M0kqRFZN5jGhHxCWAUuDAiTgIfAnYD+yPiRuAZ4DqAzDwaEfuBJ4Bp4KbMfKWa1W/SuMJpBY3zDgd7+kokST0xbzBk5rtmGXXlLP13AbtatD8CXNpRdZKkgfObz5KkQq+vStISs37GVUw7Nk7PeWXTid1X97skSQvMPQZJUsFgkCQVDAZJUsFzDDXMPD4vSa8G7jFIkgoGgySpYDBIkgoGgySpYDBIkgoGgySpYDBIkgoGgySpYDBIkgoGgySpYDBIkgoGgySpYDBIkgoGgySpYDBIkgoGgySpYDBIkgq1giEificijkbE4xHxiYj47oi4ICIeiIinqsdVTf1viYjjEfFkRFxVv3xJUq91HQwRsQb4LWAkMy8FlgFbgZ3AoczcAByqnhMRF1fjLwE2A7dHxLJ65UuSeq3uoaTlwIqIWA68DngO2ALsq8bvA66thrcA45n5cmY+DRwHLq+5fElSj0Vmdj9xxM3ALuAl4LOZeX1EvJCZK5v6PJ+ZqyLiNuBwZt5Zte8FDmbm3S3mux3YDjA8PLxpfHy87ZqmpqYYGhrq+jV1YvLZFzueZngFnH6p/f4b15zf0fy7qanZfPV1Wk+vDfL97Yb11WN99TTXNzY2diQzR7qZz/JuC6jOHWwBLgJeAP46In5lrklatLVMpczcA+wBGBkZydHR0bbrmpiYoJP+ddyw8/6Op9mxcZpbJ9tf7SeuH+1o/t3U1Gy++jqtp9cG+f52w/rqsb56elVfnUNJbweezsx/z8z/Bu4Bfho4HRGrAarHM1X/k8C6punX0jj0JElaROoEwzPAFRHxuogI4ErgGHAA2Fb12QbcWw0fALZGxHkRcRGwAXi4xvIlSX3Q9aGkzHwoIu4GHgWmgX+hcfhnCNgfETfSCI/rqv5HI2I/8ETV/6bMfKVm/ZKkHus6GAAy80PAh2Y0v0xj76FV/100TlZriVrf4TmME7uv7lMlkvrFbz5LkgoGgySpYDBIkgoGgySpYDBIkgoGgySpYDBIkgoGgySpYDBIkgoGgySpYDBIkgoGgySpUOsmeuq/Tm9aJ0l1uccgSSoYDJKkgsEgSSoYDJKkgsEgSSoYDJKkgsEgSSoYDJKkgsEgSSoYDJKkQq1giIiVEXF3RHwpIo5FxE9FxAUR8UBEPFU9rmrqf0tEHI+IJyPiqvrlS5J6re4ew4eBv8vMHwZ+DDgG7AQOZeYG4FD1nIi4GNgKXAJsBm6PiGU1ly9J6rGugyEi3gi8DdgLkJn/lZkvAFuAfVW3fcC11fAWYDwzX87Mp4HjwOXdLl+S1B+Rmd1NGPHjwB7gCRp7C0eAm4FnM3NlU7/nM3NVRNwGHM7MO6v2vcDBzLy7xby3A9sBhoeHN42Pj7dd19TUFENDQ129pk5NPvtix9MMr4DTL/WhmB7pdX0b15zfu5kx2Pe3G9ZXj/XV01zf2NjYkcwc6WY+dW67vRy4DHhfZj4UER+mOmw0i2jR1jKVMnMPjdBhZGQkR0dH2y5qYmKCTvrXcUMXt8TesXGaWycX793Oe13fietHezYvGOz72w3rq8f66ulVfXXOMZwETmbmQ9Xzu2kExemIWA1QPZ5p6r+uafq1wHM1li9J6oOugyEzvwZ8NSLeXDVdSeOw0gFgW9W2Dbi3Gj4AbI2I8yLiImAD8HC3y5ck9UfdYwbvA+6KiNcC/wr8Go2w2R8RNwLPANcBZObRiNhPIzymgZsy85Way5ck9VitYMjMx4BWJzeunKX/LmBXnWVKkvrLbz5LkgoGgySpYDBIkgoGgySpYDBIkgoGgySpYDBIkgoGgySpYDBIkgoGgySpYDBIkgoGgySpYDBIkgoGgySpYDBIkgoGgySpYDBIkgoGgySpYDBIkgoGgySpYDBIkgoGgySpYDBIkgq1gyEilkXEv0TEfdXzCyLigYh4qnpc1dT3log4HhFPRsRVdZctSeq9Xuwx3Awca3q+EziUmRuAQ9VzIuJiYCtwCbAZuD0ilvVg+ZKkHqoVDBGxFrga+Mum5i3Avmp4H3BtU/t4Zr6cmU8Dx4HL6yxfktR7kZndTxxxN/AHwBuA92fmNRHxQmaubOrzfGauiojbgMOZeWfVvhc4mJl3t5jvdmA7wPDw8Kbx8fG2a5qammJoaKjr19SJyWdf7Hia4RVw+qU+FNMjva5v45rzezczBvv+dsP66rG+eprrGxsbO5KZI93MZ3m3BUTENcCZzDwSEaPtTNKirWUqZeYeYA/AyMhIjo62M/uGiYkJOulfxw077+94mh0bp7l1suvV3ne9ru/E9aM9mxcM9v3thvXVY3319Kq+Op8AbwHeGRHvAL4beGNE3AmcjojVmXkqIlYDZ6r+J4F1TdOvBZ6rsXxJUh90fY4hM2/JzLWZuZ7GSeV/yMxfAQ4A26pu24B7q+EDwNaIOC8iLgI2AA93XbkkqS/6cUxjN7A/Im4EngGuA8jMoxGxH3gCmAZuysxX+rB8SVINPQmGzJwAJqrhrwNXztJvF7CrF8uUJPWH33yWJBUMBklSYfFeN6lXhfUdXtJ7YvfVfapEUrvcY5AkFQwGSVLBYJAkFQwGSVLBYJAkFQwGSVLBYJAkFQwGSVLBYJAkFQwGSVLBYJAkFbxXUpNO7+sjSa9G7jFIkgoGgySpYDBIkgoGgySpYDBIkgpelSTNo5Or1XZsnGa0f6VIA2EwaMnz34dKveWhJElSoetgiIh1EfFgRByLiKMRcXPVfkFEPBART1WPq5qmuSUijkfEkxFxVS9egCSpt+rsMUwDOzLzR4ArgJsi4mJgJ3AoMzcAh6rnVOO2ApcAm4HbI2JZneIlSb3XdTBk5qnMfLQa/hZwDFgDbAH2Vd32AddWw1uA8cx8OTOfBo4Dl3e7fElSf/TkHENErAd+AngIGM7MU9AID+BNVbc1wFebJjtZtUmSFpHIzHoziBgC/hHYlZn3RMQLmbmyafzzmbkqIv4M+Hxm3lm17wU+k5mfajHP7cB2gOHh4U3j4+Nt1zM1NcXQ0FBXr2Xy2Re7mq4Twyvg9Et9X0zXFrq+jWvOn3N8q/e30/dtvmXM1Mn8h1fAmy7obP6DVOf3YxCsr57m+sbGxo5k5kg386l1uWpEvAb4FHBXZt5TNZ+OiNWZeSoiVgNnqvaTwLqmydcCz7Wab2buAfYAjIyM5OjoaNs1TUxM0En/ZjcM4O6qOzZOc+vk4r1KeKHrO3H96JzjW72/nb5v8y1jpk7mv2PjNL/c5fY3CHV+PwbB+urpVX11rkoKYC9wLDP/pGnUAWBbNbwNuLepfWtEnBcRFwEbgIe7Xb4kqT/q/Gn4FuBXgcmIeKxq+11gN7A/Im4EngGuA8jMoxGxH3iCxhVNN2XmKzWWL0nqg66DITP/CYhZRl85yzS7gF3dLlOS1H+L92C3zknz3d5ix8bpgZwLks5l3hJDklQwGCRJBYNBklQwGCRJBYNBklQwGCRJBYNBklTwewzSEuS/M1U/uccgSSoYDJKkgsEgSSoYDJKkgiefdc7p9MStdK5xj0GSVDAYJEkFg0GSVPAcg6SBm+08z2z/iMkv6A2WewySpILBIEkqGAySpILnGKRzwNlj+rMdw5+p02P6/f5uSDfz97xE917VweAXmSSpc6/qYJCWAv+A0WIz8GCIiM3Ah4FlwF9m5u5B1yBpbobV/F7N/xNjoMEQEcuAPwN+FjgJ/HNEHMjMJwZZh9RPfqguDq/mD+5+G/Qew+XA8cz8V4CIGAe2AAaDpAXVTpC0e/K+2/k3W8igiswc3MIifgnYnJm/UT3/VeAnM/O9M/ptB7ZXT98MPNnBYi4E/qMH5faL9dVjffVYXz1Lqb4fyMzv7WYmg95jiBZt35FMmbkH2NPVAiIeycyRbqYdBOurx/rqsb56zpX6Bv0Ft5PAuqbna4HnBlyDJGkOgw6GfwY2RMRFEfFaYCtwYMA1SJLmMNBDSZk5HRHvBf6exuWqd2Tm0R4vpqtDUANkffVYXz3WV885Ud9ATz5LkhY/b6InSSoYDJKkwpINhojYHBFPRsTxiNjZYnxExEeq8V+MiMsGWNu6iHgwIo5FxNGIuLlFn9GIeDEiHqt+fm9Q9VXLPxERk9WyH2kxfiHX35ub1stjEfHNiPjtGX0Guv4i4o6IOBMRjze1XRARD0TEU9XjqlmmnXNb7WN9fxwRX6rev09HxMpZpp1zW+hjfb8fEc82vYfvmGXahVp/n2yq7UREPDbLtINYfy0/U/q2DWbmkvuhceL6K8APAq8FvgBcPKPPO4CDNL47cQXw0ADrWw1cVg2/Afhyi/pGgfsWcB2eAC6cY/yCrb8W7/XXaHxZZ8HWH/A24DLg8aa2PwJ2VsM7gT+cpf45t9U+1vdzwPJq+A9b1dfOttDH+n4feH8b7/+CrL8Z428Ffm8B11/Lz5R+bYNLdY/h/26tkZn/BZy9tUazLcDHs+EwsDIiVg+iuMw8lZmPVsPfAo4Bawax7B5asPU3w5XAVzLz3xZg2f8nMz8HfGNG8xZgXzW8D7i2xaTtbKt9qS8zP5uZ09XTwzS+N7QgZll/7Viw9XdWRATwy8Aner3cds3xmdKXbXCpBsMa4KtNz0/ynR+87fTpu4hYD/wE8FCL0T8VEV+IiIMRcclgKyOBz0bEkWjcgmSmRbH+aHzXZbZfyIVcfwDDmXkKGr+4wJta9Fks6/HXaewBtjLfttBP760Odd0xy2GQxbD+3gqczsynZhk/0PU34zOlL9vgUg2Gdm6t0dbtN/opIoaATwG/nZnfnDH6URqHR34M+FPgbwZZG/CWzLwM+Hngpoh424zxi2H9vRZ4J/DXLUYv9Ppr12JYjx8EpoG7Zuky37bQLx8Ffgj4ceAUjcM1My34+gPexdx7CwNbf/N8psw6WYu2OdfhUg2Gdm6tsaC334iI19B4A+/KzHtmjs/Mb2bmVDX8GeA1EXHhoOrLzOeqxzPAp2nsbjZbDLcv+Xng0cw8PXPEQq+/yumzh9eqxzMt+iz0drgNuAa4PqsDzjO1sS30RWaezsxXMvN/gL+YZbkLvf6WA78IfHK2PoNaf7N8pvRlG1yqwdDOrTUOAO+urq65Anjx7C5Xv1XHJPcCxzLzT2bp831VPyLichrvxdcHVN/rI+INZ4dpnKR8fEa3BVt/TWb9S20h11+TA8C2angbcG+LPgt2G5ho/FOsDwDvzMz/nKVPO9tCv+prPmf1C7Msd6Fvo/N24EuZebLVyEGtvzk+U/qzDfbzTHo/f2hcNfNlGmfbP1i1vQd4TzUcNP4p0FeASWBkgLX9DI1dtS8Cj1U/75hR33uBozSuEDgM/PQA6/vBarlfqGpYVOuvWv7raHzQn9/UtmDrj0ZAnQL+m8ZfYDcC3wMcAp6qHi+o+n4/8Jm5ttUB1XecxrHls9vgn8+sb7ZtYUD1/VW1bX2RxgfV6sW0/qr2j53d5pr6LsT6m+0zpS/boLfEkCQVluqhJElSnxgMkqSCwSBJKhgMkqSCwSBJKhgMkqSCwSBJKvwvlUId3avGll4AAAAASUVORK5CYII=\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "df_contracts[df_contracts.annual_salary < 20].annual_salary.hist(bins=25)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ },
+ {
+ "data": {
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAD4CAYAAAAAczaOAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAATvklEQVR4nO3dfYxl9X3f8fenYBPCmgd3lRVmqZZKUImHxO1ONlQu7aztBmJbAquxtA41ILvaCOEqUUnLklYKkbXVNsrGFaJGXQvLUFKPaGwHZCAIo0wIEZTsEuJlIchr74ouILaOycLQlGTX3/5xz1Z3x/O0s3Pn3pnf+yWN5p7vPU/f+/CZM7975kyqCklSG/7OsHdAkrR8DH1JaoihL0kNMfQlqSGGviQ15PRh78B81q5dWxs2bBjIut955x3OOuusgax7VKz2Hu1v5VvtPQ6jv7Vr1/LYY489VlXXTL9v5EN/w4YN7N69eyDrnpycZHx8fCDrHhWrvUf7W/lWe4/D6i/J2pnqDu9IUkMMfUlqiKEvSQ0x9CWpIYa+JDXE0Jekhhj6ktQQQ1+SGmLoS1JDRv4vcleiDdseHtq2D+74+NC2LWn0zXukn+TCJH+Y5KUk+5L8Sle/I8mrSZ7vvj7Wt8ztSfYneTnJ1X31jUn2dvfdmSSDaUuSNJOFHOkfBW6tqueSvA/Yk+Tx7r4vVtVv98+c5FJgC3AZ8AHg20kuqapjwN3AVuAZ4BHgGuDRpWlFkjSfeY/0q+r1qnquu/028BJwwRyLXAtMVNW7VXUA2A9sSnI+cHZVPV29f8x7H3DdqTYgSVq4nMw/Rk+yAXgSuBz4N8BNwFvAbnq/DbyZ5C7gmaq6v1vmHnpH8weBHVX10a5+FXBbVX1ihu1spfcbAevWrds4MTGxyPbmNjU1xZo1a5Z8vXtfPbLk61yoKy4454TpQfU4Kuxv5VvtPQ6rv82bN++pqrHp9QV/kJtkDfB14Fer6q0kdwNfAKr7vhP4LDDTOH3NUf/xYtUuYBfA2NhYDeqypIO65OlNw/wg9/rxE6a9bO3Kttr7g9Xf46j1t6BTNpO8h17g/25VfQOgqt6oqmNV9SPgy8CmbvZDwIV9i68HXuvq62eoS5KWyULO3glwD/BSVf1OX/38vtk+CbzQ3X4I2JLkjCQXARcDz1bV68DbSa7s1nkD8OAS9SFJWoCFDO98CPgMsDfJ813t14FPJ/kgvSGag8AvA1TVviQPAC/SO/Pnlu7MHYCbga8CZ9Ib5/fMHUlaRvOGflU9xczj8Y/Mscx2YPsM9d30PgSWJA2Bl2GQpIYY+pLUEENfkhpi6EtSQwx9SWqIoS9JDTH0Jakhhr4kNcTQl6SGGPqS1BBDX5IaYuhLUkMMfUlqiKEvSQ0x9CWpIYa+JDXE0Jekhhj6ktQQQ1+SGmLoS1JDDH1JaoihL0kNMfQlqSGGviQ1xNCXpIYY+pLUEENfkhpi6EtSQwx9SWqIoS9JDTH0Jakhhr4kNWTe0E9yYZI/TPJSkn1JfqWrvz/J40m+230/r2+Z25PsT/Jykqv76huT7O3uuzNJBtOWJGkmpy9gnqPArVX1XJL3AXuSPA7cBDxRVTuSbAO2AbcluRTYAlwGfAD4dpJLquoYcDewFXgGeAS4Bnh0qZtSWzZse3go2z244+ND2a50KuY90q+q16vque7228BLwAXAtcC93Wz3Atd1t68FJqrq3ao6AOwHNiU5Hzi7qp6uqgLu61tGkrQM0svfBc6cbACeBC4HXqmqc/vue7OqzktyF/BMVd3f1e+hdzR/ENhRVR/t6lcBt1XVJ2bYzlZ6vxGwbt26jRMTE4tqbj5TU1OsWbNmyde799UjS77OhbrignNOmB5Uj6NiamqKA0eODWXb0x/rQVjtzx+s/h6H1d/mzZv3VNXY9PpChncASLIG+Drwq1X11hzD8TPdUXPUf7xYtQvYBTA2Nlbj4+ML3c2TMjk5ySDWfdOQhhsADl4/fsL0oHocFZOTk+x86p2hbHv6Yz0Iq/35g9Xf46j1t6Czd5K8h17g/25VfaMrv9EN2dB9P9zVDwEX9i2+Hnitq6+foS5JWiYLOXsnwD3AS1X1O313PQTc2N2+EXiwr74lyRlJLgIuBp6tqteBt5Nc2a3zhr5lJEnLYCHDOx8CPgPsTfJ8V/t1YAfwQJLPAa8AnwKoqn1JHgBepHfmzy3dmTsANwNfBc6kN87vmTuStIzmDf2qeoqZx+MBPjLLMtuB7TPUd9P7EFiSNAT+Ra4kNcTQl6SGGPqS1BBDX5IaYuhLUkMMfUlqiKEvSQ0x9CWpIYa+JDXE0Jekhhj6ktQQQ1+SGrLgf6IiaXT4f4G1WB7pS1JDDH1JaoihL0kNMfQlqSGGviQ1xNCXpIYY+pLUEENfkhpi6EtSQwx9SWqIoS9JDTH0Jakhhr4kNcTQl6SGGPqS1BBDX5IaYuhLUkMMfUlqyLyhn+QrSQ4neaGvdkeSV5M83319rO++25PsT/Jykqv76huT7O3uuzNJlr4dSdJcFnKk/1XgmhnqX6yqD3ZfjwAkuRTYAlzWLfOlJKd1898NbAUu7r5mWqckaYDmDf2qehL44QLXdy0wUVXvVtUBYD+wKcn5wNlV9XRVFXAfcN0i91mStEjpZfA8MyUbgG9V1eXd9B3ATcBbwG7g1qp6M8ldwDNVdX833z3Ao8BBYEdVfbSrXwXcVlWfmGV7W+n9VsC6des2TkxMLL7DOUxNTbFmzZolX+/eV48s+ToX6ooLzjlhelA9joqpqSkOHDk2lG1Pf6wHYbbnb1ivsUH03MJrdBj9bd68eU9VjU2vn77I9d0NfAGo7vtO4LPATOP0NUd9RlW1C9gFMDY2VuPj44vczblNTk4yiHXftO3hJV/nQh28fvyE6UH1OComJyfZ+dQ7Q9n29Md6EGZ7/ob1GhtEzy28Rkepv0WdvVNVb1TVsar6EfBlYFN31yHgwr5Z1wOvdfX1M9QlSctoUaHfjdEf90ng+Jk9DwFbkpyR5CJ6H9g+W1WvA28nubI7a+cG4MFT2G9J0iLMO7yT5GvAOLA2ySHgN4DxJB+kN0RzEPhlgKral+QB4EXgKHBLVR0fcL2Z3plAZ9Ib5390CfuQJC3AvKFfVZ+eoXzPHPNvB7bPUN8NXH5SeydJWlL+Ra4kNcTQl6SGGPqS1BBDX5IaYuhLUkMMfUlqiKEvSQ0x9CWpIYa+JDXE0Jekhhj6ktQQQ1+SGmLoS1JDDH1JaoihL0kNMfQlqSGGviQ1xNCXpIYY+pLUEENfkhoy7z9Gl6TjNmx7eMnXeesVR7lpnvUe3PHxJd9uqzzSl6SGrOoj/fmOShZyhCFJq4lH+pLUEENfkhpi6EtSQwx9SWqIoS9JDTH0Jakhhr4kNcTQl6SGGPqS1JB5Qz/JV5IcTvJCX+39SR5P8t3u+3l9992eZH+Sl5Nc3VffmGRvd9+dSbL07UiS5rKQI/2vAtdMq20Dnqiqi4EnummSXApsAS7rlvlSktO6Ze4GtgIXd1/T1ylJGrB5Q7+qngR+OK18LXBvd/te4Lq++kRVvVtVB4D9wKYk5wNnV9XTVVXAfX3LSJKWSXoZPM9MyQbgW1V1eTf9V1V1bt/9b1bVeUnuAp6pqvu7+j3Ao8BBYEdVfbSrXwXcVlWfmGV7W+n9VsC6des2TkxMLKq5va8emfP+dWfCG3+9qFWPrCsuOOeE6ampKdasWTOkvRm8qakpDhw5NpRtT3+sB2G252++1/ZKspD34XI81oMyrPfg5s2b91TV2PT6Ul9lc6Zx+pqjPqOq2gXsAhgbG6vx8fFF7cx8V9C89Yqj7Ny7ui40evD68ROmJycnWezjtxJMTk6y86l3hrLt6Y/1IMz2/K2mq8Mu5H24HI/1oIzae3CxZ++80Q3Z0H0/3NUPARf2zbceeK2rr5+hLklaRosN/YeAG7vbNwIP9tW3JDkjyUX0PrB9tqpeB95OcmV31s4NfctIkpbJvGMbSb4GjANrkxwCfgPYATyQ5HPAK8CnAKpqX5IHgBeBo8AtVXV8wPVmemcCnUlvnP/RJe1EkjSveUO/qj49y10fmWX+7cD2Geq7gctPau8kSUvKv8iVpIYY+pLUEENfkhpi6EtSQwx9SWqIoS9JDTH0Jakhhr4kNcTQl6SGGPqS1BBDX5IaYuhLUkMMfUlqiKEvSQ0x9CWpIYa+JDXE0Jekhhj6ktQQQ1+SGmLoS1JDDH1JaoihL0kNOX3YOyBJ89mw7eGhbfvgjo8PbduD4JG+JDXE0Jekhhj6ktQQQ1+SGmLoS1JDDH1JaoihL0kNMfQlqSGnFPpJDibZm+T5JLu72vuTPJ7ku9338/rmvz3J/iQvJ7n6VHdeknRyluJIf3NVfbCqxrrpbcATVXUx8EQ3TZJLgS3AZcA1wJeSnLYE25ckLdAghneuBe7tbt8LXNdXn6iqd6vqALAf2DSA7UuSZpGqWvzCyQHgTaCA/1pVu5L8VVWd2zfPm1V1XpK7gGeq6v6ufg/waFX93gzr3QpsBVi3bt3GiYmJRe3f3lePzHn/ujPhjb9e1KpH1hUXnHPC9NTUFGvWrBnS3gze1NQUB44cG8q2pz/WgzDb8zffa3slGfX34ak+z8N6D27evHlP3wjM/3eqF1z7UFW9luSngMeT/MUc82aG2ow/capqF7ALYGxsrMbHxxe1czfNc5GmW684ys69q+uacwevHz9henJyksU+fivB5OQkO596Zyjbnv5YD8Jsz998r+2VZNTfh6f6PI/ae/CUhneq6rXu+2Hgm/SGa95Icj5A9/1wN/sh4MK+xdcDr53K9iVJJ2fRoZ/krCTvO34b+HngBeAh4MZuthuBB7vbDwFbkpyR5CLgYuDZxW5fknTyTuV3qnXAN5McX89/r6o/SPKnwANJPge8AnwKoKr2JXkAeBE4CtxSVcMZjJWkRi069Kvq+8DPzFD/S+AjsyyzHdi+2G1Kkk6Nf5ErSQ0x9CWpIYa+JDXE0Jekhhj6ktQQQ1+SGmLoS1JDDH1JaoihL0kNMfQlqSGGviQ1xNCXpIYY+pLUEENfkhpi6EtSQwx9SWqIoS9JDTH0Jakhhr4kNcTQl6SGLPofo0ut27Dt4YFv49YrjnLTMmxH7fBIX5IaYuhLUkMMfUlqiKEvSQ0x9CWpIYa+JDXE0Jekhhj6ktQQQ1+SGmLoS1JDDH1Jasiyh36Sa5K8nGR/km3LvX1Jatmyhn6S04D/AvwCcCnw6SSXLuc+SFLLlvtIfxOwv6q+X1V/A0wA1y7zPkhSs1JVy7ex5BeBa6rqX3XTnwF+rqo+P22+rcDWbvIfAC8PaJfWAj8Y0LpHxWrv0f5WvtXe4zD6+wFAVV0z/Y7lvp5+Zqj92E+dqtoF7Br4ziS7q2ps0NsZptXeo/2tfKu9x1Hrb7mHdw4BF/ZNrwdeW+Z9kKRmLXfo/ylwcZKLkrwX2AI8tMz7IEnNWtbhnao6muTzwGPAacBXqmrfcu7DNAMfQhoBq71H+1v5VnuPI9Xfsn6QK0kaLv8iV5IaYuhLUkNWXegn+UqSw0le6KvdkeTVJM93Xx/r6u9Jcm+SvUleSnJ73zIbu/r+JHcmmel002U3U39d/V93l7fYl+S3+uq3dz28nOTqvvqK7y/JP0+yp+tjT5IP980/kv3ByT+H3X1/L8lUkl/rq41kj4t4jf50kqe7+t4kP9HVR7I/OOnX6WjlTFWtqi/gnwL/CHihr3YH8GszzPtLwER3+yeBg8CGbvpZ4B/T+9uCR4FfGHZvc/S3Gfg2cEY3/VPd90uBPwfOAC4Cvgector6+4fAB7rblwOv9i0zkv2dbI99938d+B/9r+NR7fEkn8PTge8AP9NN/91Rf40uoseRyplVd6RfVU8CP1zo7MBZSU4HzgT+BngryfnA2VX1dPWemfuA6waxvydrlv5uBnZU1bvdPIe7+rX0XmzvVtUBYD+wabX0V1V/VlXH/85jH/ATSc4Y5f7gpJ9DklwHfJ9ej8drI9vjSfb388B3qurPu/pfVtWxUe4PTrrHkcqZVRf6c/h8ku90v5ad19V+D3gHeB14BfjtqvohcAG9PyQ77lBXG1WXAFcl+Z9J/ijJz3b1C4D/1Tff8T5WS3/9/gXwZ90bbqX1B7P0mOQs4DbgN6fNv9J6nO05vASoJI8leS7Jv+vqK60/mL3HkcqZ5b4Mw7DcDXyB3k/cLwA7gc/SuwDcMeADwHnAHyf5Ngu8XMQIOZ3e/l8J/CzwQJK/z+x9rIr+uqMjklwG/Cd6R42w8vqD2Z/D3wS+WFVT04Z7V1qPs/V3OvBPutr/AZ5Isgd4a4Z1jHJ/MHuPI5UzTYR+Vb1x/HaSLwPf6iZ/CfiDqvpb4HCSPwHGgD+md4mI40b9chGHgG90Ifhskh/Ru8jTbJe9OMTq6O9/J1kPfBO4oaq+1zf/SuoPZu/x54Bf7D4UPBf4UZL/S2+MfyX1ONdr9I+q6gcASR6hN1Z+PyurP5i9x5HKmSaGd7qxs+M+CRz/xP0V4MPpOYveT+i/qKrXgbeTXNl9mn4D8OCy7vTJ+X3gwwBJLgHeS+8qew8BW7px7ouAi4FnV0t/Sc4FHgZur6o/OT7zCuwPZumxqq6qqg1VtQH4z8B/rKq7VmCPv8/Mr9HHgJ9O8pPdmPc/A15cgf3B7D2OVs4M+pPi5f4CvkZv7Oxv6f3k/Rzw34C99M4SeAg4v5t3Db0zIvYBLwL/tm89Y/R+OHwPuIvur5eH/TVLf++ld2T0AvAc8OG++f9918PL9J0ZsBr6A/4DvbHS5/u+jp8xMZL9LeY57FvuDk48e2cke1zEa/Rfdu/BF4DfGvX+FvE6Hamc8TIMktSQJoZ3JEk9hr4kNcTQl6SGGPqS1BBDX5IaYuhLUkMMfUlqyP8DDE9W+GtusxEAAAAASUVORK5CYII=\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "df_contracts.startY.hist(bins=10)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 41,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "1.0 0.987293\n",
+ "0.0 0.012707\n",
+ "Name: a_gender, dtype: float64"
+ ]
+ },
+ "execution_count": 41,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_contracts.a_gender.value_counts(1)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "9401.0"
+ ]
+ },
+ "execution_count": 28,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_contracts.a_gender.sum()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 42,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "9401.0"
+ ]
+ },
+ "execution_count": 42,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_contracts.a_gender.sum()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 44,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "3"
+ ]
+ },
+ "execution_count": 44,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "sum([True,False,False,True,True])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 35,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.026105873821609893"
+ ]
+ },
+ "execution_count": 35,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "1-(df_contracts.a_gender.sum()/df_contracts.shape[0])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 36,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "2.6105873821609893"
+ ]
+ },
+ "execution_count": 36,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# proportion of female apprentices\n",
+ "(1-(df_contracts.a_gender.sum()/df_contracts.shape[0]))*100"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "2.3723194861701047"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# proportion of female masters\n",
+ "(1-(df_contracts.m_gender.sum()/df_contracts.shape[0]))*100"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "73.10924369747899"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# prop female apprentices with male master\n",
+ "(df_contracts[(df_contracts.a_gender == 0) & (df_contracts.startY < 1800)].m_gender.sum()\\\n",
+ " /df_contracts[(df_contracts.a_gender == 0) & (df_contracts.startY < 1800)].shape[0])*100"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "98.10528582193993 %\n"
+ ]
+ }
+ ],
+ "source": [
+ "# prop male apprentices with male master\n",
+ "print((df_contracts[(df_contracts.a_gender == 1) & (df_contracts.startY < 1800)].m_gender.sum()\\\n",
+ " /df_contracts[(df_contracts.a_gender == 1) & (df_contracts.startY < 1800)].shape[0])*100,\"%\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Looking at empirical distributions"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 46,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 46,
+ "metadata": {},
+ "output_type": "execute_result"
+ },
+ {
+ "data": {
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAD4CAYAAAAAczaOAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAT2ElEQVR4nO3df4zk9X3f8eerh0OpiRM74NXlDveIdLbCj+QqVhTJbbTESbkYK+AqTg5RA7Wrsy0s2epVLaSR7MY6CbVx3FqJic4GgWWXCwqxQcEkITQrXAmK72zi44epD3OxlzvdySY1rGNdc/jdP+a7ZbLs7e7M7M3CfJ4PaTQzn+/38/1+5s3x2u985jvfSVUhSWrDP1jvAUiSxsfQl6SGGPqS1BBDX5IaYuhLUkNOW+8BrOSss86qLVu2DNzvBz/4Aa997WvXfkCvMtbhJdaixzr0THod9u/f/92qOntx+ys+9Lds2cK+ffsG7jc7O8vMzMzaD+hVxjq8xFr0WIeeSa9Dkr9eqt3pHUlqiKEvSQ0x9CWpIYa+JDXE0Jekhhj6ktQQQ1+SGmLoS1JDDH1Jasgr/hu562XLDfcuu/zQTZePaSSStHY80pekhhj6ktQQQ1+SGmLoS1JDDH1JaoihL0kNWTH0k9ya5FiSx/ra/jDJo93tUJJHu/YtSX7Yt+wP+vpclORAkoNJPpkkp+QVSZJOajXn6d8G/B7w2YWGqvqNhcdJPg58v2/9p6tq2xLbuRnYCTwMfAnYDtw38IglSUNb8Ui/qh4EnltqWXe0/uvAHcttI8lG4HVV9VBVFb0/IFcOPFpJ0khG/UbuPweOVtU3+9rOTfI14Hngt6rqy8AmYK5vnbmubUlJdtJ7V8DU1BSzs7MDD2x+fn6ofgt2XXhi2eWjbHucRq3DJLEWPdahp9U6jBr6V/H3j/KPAG+qqu8luQj4YpLzgaXm7+tkG62qPcAegOnp6Rrmx4tH/dHj61a6DMPVw297nCb9x58HYS16rENPq3UYOvSTnAb8S+CihbaqOg4c7x7vT/I08GZ6R/ab+7pvBg4Pu29J0nBGOWXzl4BvVNX/n7ZJcnaSDd3jnwG2At+qqiPAC0ku6T4HuAa4e4R9S5KGsJpTNu8AHgLekmQuyXu7RTt4+Qe4vwB8PclfAX8EvL+qFj4E/gDwGeAg8DSeuSNJY7fi9E5VXXWS9uuWaLsLuOsk6+8DLhhwfJKkNeQ3ciWpIYa+JDXE0Jekhhj6ktQQQ1+SGmLoS1JDDH1JaoihL0kNMfQlqSGGviQ1xNCXpIYY+pLUEENfkhpi6EtSQwx9SWqIoS9JDTH0Jakhhr4kNcTQl6SGrOaH0W9NcizJY31tH03ybJJHu9vb+5bdmORgkqeSXNbXflGSA92yTybJ2r8cSdJyVnOkfxuwfYn2T1TVtu72JYAk5wE7gPO7Pp9KsqFb/2ZgJ7C1uy21TUnSKbRi6FfVg8Bzq9zeFcDeqjpeVc8AB4GLk2wEXldVD1VVAZ8FrhxyzJKkIZ02Qt8PJrkG2Afsqqq/ATYBD/etM9e1/V33eHH7kpLspPeugKmpKWZnZwce3Pz8/FD9Fuy68MSyy0fZ9jiNWodJYi16rENPq3UYNvRvBj4GVHf/ceA9wFLz9LVM+5Kqag+wB2B6erpmZmYGHuDs7CzD9Ftw3Q33Lrv80NXDb3ucRq3DJLEWPdahp9U6DHX2TlUdraoXq+pHwKeBi7tFc8A5fatuBg537ZuXaJckjdFQod/N0S94J7BwZs89wI4kpyc5l94Hto9U1RHghSSXdGftXAPcPcK4JUlDWHF6J8kdwAxwVpI54CPATJJt9KZoDgHvA6iqx5PcCTwBnACur6oXu019gN6ZQGcA93U3SdIYrRj6VXXVEs23LLP+bmD3Eu37gAsGGp0kaU35jVxJaoihL0kNMfQlqSGGviQ1xNCXpIYY+pLUEENfkhpi6EtSQwx9SWqIoS9JDTH0Jakhhr4kNcTQl6SGGPqS1BBDX5IaYuhLUkMMfUlqiKEvSQ1Z8ecStbQtN9x70mWHbrp8jCORpNVb8Ug/ya1JjiV5rK/tvyT5RpKvJ/lCkp/s2rck+WGSR7vbH/T1uSjJgSQHk3wySU7JK5IkndRqpnduA7YvarsfuKCqfg7438CNfcuerqpt3e39fe03AzuBrd1t8TYlSafYiqFfVQ8Czy1q+/OqOtE9fRjYvNw2kmwEXldVD1VVAZ8FrhxqxJKkoa3FnP57gD/se35ukq8BzwO/VVVfBjYBc33rzHVtS0qyk967AqamppidnR14UPPz80P1W7DrwhMrr3QSo+x3rY1ah0liLXqsQ0+rdRgp9JP8R+AE8Pmu6Qjwpqr6XpKLgC8mOR9Yav6+TrbdqtoD7AGYnp6umZmZgcc2OzvLMP0WXLfMB7UrOXT18Ptda6PWYZJYix7r0NNqHYYO/STXAu8A3tZN2VBVx4Hj3eP9SZ4G3kzvyL5/CmgzcHjYfUuShjPUefpJtgP/AfjVqvrbvvazk2zoHv8MvQ9sv1VVR4AXklzSnbVzDXD3yKOXJA1kxSP9JHcAM8BZSeaAj9A7W+d04P7uzMuHuzN1fgH47SQngBeB91fVwofAH6B3JtAZwH3dTZI0RiuGflVdtUTzLSdZ9y7grpMs2wdcMNDoJElrysswSFJDDH1JaoihL0kNMfQlqSGGviQ1xNCXpIYY+pLUEENfkhpi6EtSQwx9SWpI07+Ru9zv3ErSJPJIX5IaYuhLUkOant45VVaaNjp00+VjGokk/X0e6UtSQwx9SWqIoS9JDTH0Jakhhr4kNWTF0E9ya5JjSR7ra3tDkvuTfLO7f33fshuTHEzyVJLL+tovSnKgW/bJdL+oLkkan9Uc6d8GbF/UdgPwQFVtBR7onpPkPGAHcH7X51NJNnR9bgZ2Alu72+JtSpJOsRVDv6oeBJ5b1HwFcHv3+Hbgyr72vVV1vKqeAQ4CFyfZCLyuqh6qqgI+29dHkjQmw345a6qqjgBU1ZEkb+zaNwEP960317X9Xfd4cfuSkuyk966AqakpZmdnBx7g/Pz8iv12XXhi4O2uhWFez7BWU4dWWIse69DTah3W+hu5S83T1zLtS6qqPcAegOnp6ZqZmRl4ILOzs6zU77r1uuDagR8su3gtv7G7mjq0wlr0WIeeVusw7Nk7R7spG7r7Y137HHBO33qbgcNd++Yl2iVJYzRs6N8DXNs9vha4u699R5LTk5xL7wPbR7qpoBeSXNKdtXNNXx9J0pisOL2T5A5gBjgryRzwEeAm4M4k7wW+DbwLoKoeT3In8ARwAri+ql7sNvUBemcCnQHc190kSWO0YuhX1VUnWfS2k6y/G9i9RPs+4IKBRidJWlN+I1eSGmLoS1JDDH1JaoihL0kNMfQlqSGGviQ1xNCXpIYY+pLUEENfkhpi6EtSQwx9SWqIoS9JDTH0Jakhhr4kNcTQl6SGGPqS1BBDX5IaYuhLUkMMfUlqyNChn+QtSR7tuz2f5MNJPprk2b72t/f1uTHJwSRPJblsbV6CJGm1Vvxh9JOpqqeAbQBJNgDPAl8A/jXwiar6nf71k5wH7ADOB34a+Iskb66qF4cdgyRpMGs1vfM24Omq+utl1rkC2FtVx6vqGeAgcPEa7V+StAprFfo7gDv6nn8wydeT3Jrk9V3bJuA7fevMdW2SpDFJVY22geTHgMPA+VV1NMkU8F2ggI8BG6vqPUl+H3ioqj7X9bsF+FJV3bXENncCOwGmpqYu2rt378Djmp+f58wzz1x2nQPPfn/g7Y7DhZt+Ys22tZo6tMJa9FiHnkmvw6WXXrq/qqYXtw89p9/nV4CvVtVRgIV7gCSfBv6kezoHnNPXbzO9PxYvU1V7gD0A09PTNTMzM/CgZmdnWanfdTfcO/B2x+HQ1TNrtq3V1KEV1qLHOvS0Woe1mN65ir6pnSQb+5a9E3ise3wPsCPJ6UnOBbYCj6zB/iVJqzTSkX6SfwT8MvC+vub/nGQbvemdQwvLqurxJHcCTwAngOs9c0eSxmuk0K+qvwV+alHbu5dZfzewe5R9SpKG5zdyJakhhr4kNcTQl6SGGPqS1BBDX5IaYuhLUkMMfUlqiKEvSQ0x9CWpIYa+JDXE0Jekhhj6ktQQQ1+SGmLoS1JDDH1JaoihL0kNMfQlqSGGviQ1xNCXpIaMFPpJDiU5kOTRJPu6tjckuT/JN7v71/etf2OSg0meSnLZqIOXJA1mLY70L62qbVU13T2/AXigqrYCD3TPSXIesAM4H9gOfCrJhjXYvyRplU7F9M4VwO3d49uBK/va91bV8ap6BjgIXHwK9i9JOolRQ7+AP0+yP8nOrm2qqo4AdPdv7No3Ad/p6zvXtUmSxuS0Efu/taoOJ3kjcH+SbyyzbpZoqyVX7P0B2QkwNTXF7OzswAObn59fsd+uC08MvN1xGOb1nsxq6tAKa9FjHXparcNIoV9Vh7v7Y0m+QG+65miSjVV1JMlG4Fi3+hxwTl/3zcDhk2x3D7AHYHp6umZmZgYe2+zsLCv1u+6Gewfe7jgcunpmzba1mjq0wlr0WIeeVusw9PROktcm+fGFx8C/AB4D7gGu7Va7Fri7e3wPsCPJ6UnOBbYCjwy7f0nS4EY50p8CvpBkYTv/var+NMlXgDuTvBf4NvAugKp6PMmdwBPACeD6qnpxpNFLkgYydOhX1beAn1+i/XvA207SZzewe9h9SpJG4zdyJakhhr4kNcTQl6SGGPqS1BBDX5IaYuhLUkMMfUlqiKEvSQ0x9CWpIYa+JDVk1Esr6xTYsszVPw/ddPkYRyJp0nikL0kNMfQlqSGGviQ1xNCXpIYY+pLUEENfkhpi6EtSQwx9SWqIoS9JDRk69JOck+QvkzyZ5PEkH+raP5rk2SSPdre39/W5McnBJE8luWwtXoAkafVGuQzDCWBXVX01yY8D+5Pc3y37RFX9Tv/KSc4DdgDnAz8N/EWSN1fViyOMQZI0gKGP9KvqSFV9tXv8AvAksGmZLlcAe6vqeFU9AxwELh52/5KkwaWqRt9IsgV4ELgA+LfAdcDzwD567wb+JsnvAQ9X1ee6PrcA91XVHy2xvZ3AToCpqamL9u7dO/CY5ufnOfPMM5dd58Cz3x94u+vtwk0/sezyxa9p6gw4+sPV9Z10q/k30QLr0DPpdbj00kv3V9X04vaRr7KZ5EzgLuDDVfV8kpuBjwHV3X8ceA+QJbov+RenqvYAewCmp6drZmZm4HHNzs6yUr/rlrma5SvVoatnll2++DXtuvAEHz9w2qr6TrrV/JtogXXoabUOI529k+Q19AL/81X1xwBVdbSqXqyqHwGf5qUpnDngnL7um4HDo+xfkjSYUc7eCXAL8GRV/W5f+8a+1d4JPNY9vgfYkeT0JOcCW4FHht2/JGlwo0zvvBV4N3AgyaNd228CVyXZRm/q5hDwPoCqejzJncAT9M78ud4zdyRpvIYO/ar6nyw9T/+lZfrsBnYPu09J0mj8Rq4kNcTQl6SGGPqS1BBDX5IaYuhLUkMMfUlqyMiXYdDk2LLCZSkO3XT5mEYi6VSZ6NBfKcRejSbxNUkaH6d3JKkhhr4kNcTQl6SGTPScvsbHD4GlVweP9CWpIR7pa6L1vwPZdeGJl/2ymO9A1BqP9CWpIYa+JDXE0JekhjinLzXmwLPff9lnGwv8jGPyGfoai+VO6ZzUoGnxNb9SLfXfYuGD/db+W4w99JNsB/4bsAH4TFXdNO4xtGrU6/as13V/XqnfAfA6SHo1GmvoJ9kA/D7wy8Ac8JUk91TVE+Mch15ZXqnh+UodlzSKcR/pXwwcrKpvASTZC1wBGPoa2ijh/Ep99zKKUd75jDqu5fY9yrZXek2v1j/Q6zEFmKo6JRtecmfJrwHbq+rfdM/fDfzTqvrgovV2Aju7p28Bnhpid2cB3x1huJPCOrzEWvRYh55Jr8M/rqqzFzeO+0g/S7S97K9OVe0B9oy0o2RfVU2Pso1JYB1eYi16rENPq3UY93n6c8A5fc83A4fHPAZJata4Q/8rwNYk5yb5MWAHcM+YxyBJzRrr9E5VnUjyQeDP6J2yeWtVPX6KdjfS9NAEsQ4vsRY91qGnyTqM9YNcSdL68to7ktQQQ1+SGjJxoZ9ke5KnkhxMcsN6j2ecktya5FiSx/ra3pDk/iTf7O5fv55jHIck5yT5yyRPJnk8yYe69qZqkeQfJnkkyV91dfhPXXtTdeiXZEOSryX5k+55c7WYqNDvu8zDrwDnAVclOW99RzVWtwHbF7XdADxQVVuBB7rnk+4EsKuqfha4BLi++3fQWi2OA79YVT8PbAO2J7mE9urQ70PAk33Pm6vFRIU+fZd5qKr/Cyxc5qEJVfUg8Nyi5iuA27vHtwNXjnNM66GqjlTVV7vHL9D7n3wTjdWieua7p6/pbkVjdViQZDNwOfCZvubmajFpob8J+E7f87murWVTVXUEemEIvHGdxzNWSbYA/wT4XzRYi24641HgGHB/VTVZh85/Bf498KO+tuZqMWmhv6rLPKgNSc4E7gI+XFXPr/d41kNVvVhV2+h9+/3iJBes85DWRZJ3AMeqav96j2W9TVroe5mHlzuaZCNAd39sncczFkleQy/wP19Vf9w1N1kLgKr6P8Asvc98WqzDW4FfTXKI3rTvLyb5HA3WYtJC38s8vNw9wLXd42uBu9dxLGORJMAtwJNV9bt9i5qqRZKzk/xk9/gM4JeAb9BYHQCq6saq2lxVW+jlwv+oqn9Fg7WYuG/kJnk7vbm7hcs87F7fEY1PkjuAGXqXjD0KfAT4InAn8Cbg28C7qmrxh70TJck/A74MHOCl+dvfpDev30wtkvwcvQ8nN9A7wLuzqn47yU/RUB0WSzID/LuqekeLtZi40JckndykTe9IkpZh6EtSQwx9SWqIoS9JDTH0Jakhhr4kNcTQl6SG/D8WXZv35dC0cAAAAABJRU5ErkJggg==\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "df_contracts[df_contracts.annual_salary < 50].annual_salary.hist(bins=40)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 47,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 47,
+ "metadata": {},
+ "output_type": "execute_result"
+ },
+ {
+ "data": {
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX8AAAD4CAYAAAAEhuazAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAQaUlEQVR4nO3df6xf9V3H8efLMpHQbRTZGkLRommMQBXlBknmzG02Rx1LYEZMCRklznRZINkS/ljZP5uaJo1xU5cJsROyks01jduksaISshtcMmTtgpYfQ5pRsbRpM2GMLgYte/vHPa3fdd/b9nt7f33P5/lIvvme8znnfM/nndP7uud+zvmepqqQJLXlJxa7A5KkhWf4S1KDDH9JapDhL0kNMvwlqUHnLXYHzuSSSy6p1atXn5z/wQ9+wIUXXrh4HZonfa0L+lubdY2fvtY2rK69e/d+t6reNtM2Sz78V69ezZ49e07OT01NMTk5uXgdmid9rQv6W5t1jZ++1jasriT/cbptHPaRpAYZ/pLUIMNfkhpk+EtSgwx/SWqQ4S9JDTL8JalBhr8kNcjwl6QGLflv+EpzbfXm3SOtf2DrjfPUE2nxeOYvSQ0y/CWpQYa/JDXI8JekBhn+ktQgw1+SGmT4S1KDDH9JapDhL0kNMvwlqUGGvyQ1yPCXpAYZ/pLUIMNfkhpk+EtSg84Y/kkuT/K1JM8meTrJR7r2i5M8kuT57n3FwDb3JNmf5LkkNwy0X5tkX7fsM0kyP2VJkk7nbM78jwN3V9UvAtcDdya5EtgMPFpVa4BHu3m6ZRuAq4D1wL1JlnWfdR+wCVjTvdbPYS2SpLN0xvCvqsNV9a1u+jXgWeAy4CZge7faduDmbvomYEdVvV5VLwD7geuSXAq8paq+UVUFPDiwjSRpAWU6h89y5WQ18BhwNfBiVV00sOyVqlqR5LPA41X1ha79fuBh4ACwtare3bW/E/hYVb1vyH42Mf0XAitXrrx2x44dJ5cdO3aM5cuXj1blGOhrXbD0atv30qsjrb/2srcObV9qdc2VvtYF/a1tWF3r1q3bW1UTM21z1v+Hb5LlwJeBj1bV908zXD9sQZ2m/ccbq7YB2wAmJiZqcnLy5LKpqSkG5/uir3XB0qvtjlH/D9/bJoe2L7W65kpf64L+1jabus7qbp8kb2I6+L9YVV/pmo90Qzl070e79oPA5QObrwIOde2rhrRLkhbY2dztE+B+4Nmq+vTAol3Axm56I/DQQPuGJOcnuYLpC7tPVNVh4LUk13efefvANpKkBXQ2wz7vAD4A7EvyZNf2cWArsDPJB4EXgVsAqurpJDuBZ5i+U+jOqnqj2+7DwOeBC5i+DvDw3JQhSRrFGcO/qr7O8PF6gHfNsM0WYMuQ9j1MXyyWJC0iv+ErSQ0y/CWpQYa/JDXI8JekBhn+ktQgw1+SGmT4S1KDDH9JapDhL0kNMvwlqUGGvyQ1yPCXpAYZ/pLUIMNfkhpk+EtSgwx/SWqQ4S9JDTL8JalBhr8kNcjwl6QGGf6S1CDDX5IaZPhLUoMMf0lqkOEvSQ0y/CWpQYa/JDXI8JekBhn+ktQgw1+SGmT4S1KDDH9JapDhL0kNMvwlqUGGvyQ1yPCXpAYZ/pLUIMNfkhp0xvBP8kCSo0meGmj7ZJKXkjzZvd47sOyeJPuTPJfkhoH2a5Ps65Z9JknmvhxJ0tk4mzP/zwPrh7T/aVVd073+HiDJlcAG4Kpum3uTLOvWvw/YBKzpXsM+U5K0AM4Y/lX1GPDyWX7eTcCOqnq9ql4A9gPXJbkUeEtVfaOqCngQuHmWfZYknaPzzmHbu5LcDuwB7q6qV4DLgMcH1jnYtf1vN31q+1BJNjH9VwIrV65kamrq5LJjx479yHxf9LUuWHq13b32+Ejrz9T3pVbXXOlrXdDf2mZT12zD/z7gj4Dq3j8F/B4wbBy/TtM+VFVtA7YBTExM1OTk5MllU1NTDM73RV/rgqVX2x2bd4+0/oHbJoe2L7W65kpf64L+1jabumZ1t09VHamqN6rqh8DngOu6RQeBywdWXQUc6tpXDWmXJC2CWYV/N4Z/wvuBE3cC7QI2JDk/yRVMX9h9oqoOA68lub67y+d24KFz6Lck6RyccdgnyZeASeCSJAeBTwCTSa5heujmAPAhgKp6OslO4BngOHBnVb3RfdSHmb5z6ALg4e4lSVoEZwz/qrp1SPP9p1l/C7BlSPse4OqReidJmhd+w1eSGmT4S1KDDH9JapDhL0kNMvwlqUGGvyQ1yPCXpAYZ/pLUIMNfkhpk+EtSgwx/SWqQ4S9JDTL8JalBhr8kNcjwl6QGGf6S1CDDX5IadMb/yUtaSKs37x55mwNbb5yHnkj95pm/JDXI8JekBjnso7E3m6EiqXWe+UtSgwx/SWqQ4S9JDTL8JalBhr8kNcjwl6QGGf6S1CDDX5IaZPhLUoMMf0lqkOEvSQ0y/CWpQYa/JDXI8JekBhn+ktQgw1+SGmT4S1KDzhj+SR5IcjTJUwNtFyd5JMnz3fuKgWX3JNmf5LkkNwy0X5tkX7fsM0ky9+VIks7G2Zz5fx5Yf0rbZuDRqloDPNrNk+RKYANwVbfNvUmWddvcB2wC1nSvUz9TkrRAzhj+VfUY8PIpzTcB27vp7cDNA+07qur1qnoB2A9cl+RS4C1V9Y2qKuDBgW0kSQss01l8hpWS1cDfVdXV3fz3quqigeWvVNWKJJ8FHq+qL3Tt9wMPAweArVX17q79ncDHqup9M+xvE9N/JbBy5cprd+zYcXLZsWPHWL58+eiVLnF9rQtGq23fS6/Oc29Gt/aytw5t7+sx62td0N/ahtW1bt26vVU1MdM2581xH4aN49dp2oeqqm3ANoCJiYmanJw8uWxqaorB+b7oa10wWm13bN49v52ZhQO3TQ5t7+sx62td0N/aZlPXbMP/SJJLq+pwN6RztGs/CFw+sN4q4FDXvmpIu7TkrZ7hF9Lda48P/WV1YOuN890l6ZzN9lbPXcDGbnoj8NBA+4Yk5ye5gukLu09U1WHgtSTXd3f53D6wjSRpgZ3xzD/Jl4BJ4JIkB4FPAFuBnUk+CLwI3AJQVU8n2Qk8AxwH7qyqN7qP+jDTdw5dwPR1gIfntBJJ0lk7Y/hX1a0zLHrXDOtvAbYMad8DXD1S7yRJ82KuL/hKzZvpGsFMvEagxeDjHSSpQYa/JDXI8JekBjnmr3m1evPuGe+Hl7R4PPOXpAYZ/pLUIMNfkhpk+EtSgwx/SWqQ4S9JDTL8JalBhr8kNcjwl6QGGf6S1CDDX5IaZPhLUoMMf0lqkOEvSQ0y/CWpQYa/JDXI8JekBhn+ktQgw1+SGmT4S1KDDH9JapDhL0kNMvwlqUHnLXYHJI1u9ebdI61/YOuN89QTjSvP/CWpQYa/JDXI8JekBhn+ktQgw1+SGmT4S1KDDH9JapDhL0kNMvwlqUHnFP5JDiTZl+TJJHu6touTPJLk+e59xcD69yTZn+S5JDeca+clSbMzF2f+66rqmqqa6OY3A49W1Rrg0W6eJFcCG4CrgPXAvUmWzcH+JUkjmo9hn5uA7d30duDmgfYdVfV6Vb0A7Aeum4f9S5LOIFU1+42TF4BXgAL+sqq2JfleVV00sM4rVbUiyWeBx6vqC137/cDDVfU3Qz53E7AJYOXKldfu2LHj5LJjx46xfPnyWfd5qeprXfteepWVF8CR/17snsy9uapr7WVvHXmbfS+9Om/76Ou/RehvbcPqWrdu3d6BEZkfc65P9XxHVR1K8nbgkSTfPs26GdI29DdPVW0DtgFMTEzU5OTkyWVTU1MMzvdFX+u6Y/Nu7l57nE/t698DZOeqrgO3TY68zR2jPtVzhH309d8i9Le22dR1TsM+VXWoez8KfJXpYZwjSS4F6N6PdqsfBC4f2HwVcOhc9i9Jmp1Zh3+SC5O8+cQ08B7gKWAXsLFbbSPwUDe9C9iQ5PwkVwBrgCdmu39J0uydy9+sK4GvJjnxOX9dVf+Q5JvAziQfBF4EbgGoqqeT7ASeAY4Dd1bVG+fUe0nSrMw6/KvqO8AvD2n/L+BdM2yzBdgy231KkuaG3/CVpAYZ/pLUIMNfkhpk+EtSgwx/SWqQ4S9JDerfd+6lMbN6xEc1zPc+7l57nMn564qWCM/8JalBhr8kNcjwl6QGGf6S1CDDX5IaZPhLUoMMf0lqkOEvSQ0y/CWpQYa/JDXI8JekBhn+ktQgw1+SGmT4S1KDDH9JapDhL0kNMvwlqUGGvyQ1yPCXpAYZ/pLUIMNfkhpk+EtSg85b7A5ovKzevHuxuyBpDnjmL0kNMvwlqUGGvyQ1yPCXpAYZ/pLUIO/2aZx370htMvwlnbNRTyIObL1xnnqis2X4S1ryZvMXqr9gTm/Bwz/JeuDPgWXAX1XV1oXug6TTcziw/xY0/JMsA/4C+E3gIPDNJLuq6pmF7Mc48YdQ0nxY6DP/64D9VfUdgCQ7gJsAw19qyEKc1Azbx91rj3PHHO171GGlpXZdJFU1rzv4kZ0lvwOsr6rf7+Y/APxaVd11ynqbgE3d7C8Azw0svgT47gJ0d6H1tS7ob23WNX76Wtuwun62qt420wYLfeafIW0/9tunqrYB24Z+QLKnqibmumOLra91QX9rs67x09faZlPXQn/J6yBw+cD8KuDQAvdBkpq30OH/TWBNkiuS/CSwAdi1wH2QpOYt6LBPVR1Pchfwj0zf6vlAVT094scMHQ7qgb7WBf2tzbrGT19rG7muBb3gK0laGnywmyQ1yPCXpAaNTfgnWZ/kuST7k2xe7P7MpSQHkuxL8mSSPYvdn9lK8kCSo0meGmi7OMkjSZ7v3lcsZh9na4baPpnkpe64PZnkvYvZx9lIcnmSryV5NsnTST7StY/1cTtNXX04Zj+V5Ikk/9rV9gdd+0jHbCzG/LvHQvw7A4+FAG7ty2MhkhwAJqpqrL98kuQ3gGPAg1V1ddf2x8DLVbW1+6W9oqo+tpj9nI0ZavskcKyq/mQx+3YuklwKXFpV30ryZmAvcDNwB2N83E5T1+8y/scswIVVdSzJm4CvAx8BfpsRjtm4nPmffCxEVf0PcOKxEFpCquox4OVTmm8CtnfT25n+ARw7M9Q29qrqcFV9q5t+DXgWuIwxP26nqWvs1bRj3eybulcx4jEbl/C/DPjPgfmD9ORAdgr4pyR7u0db9MnKqjoM0z+QwNsXuT9z7a4k/9YNC43V0MipkqwGfgX4F3p03E6pC3pwzJIsS/IkcBR4pKpGPmbjEv5n9ViIMfaOqvpV4LeAO7shBi199wE/D1wDHAY+tai9OQdJlgNfBj5aVd9f7P7MlSF19eKYVdUbVXUN009JuC7J1aN+xriEf68fC1FVh7r3o8BXmR7m6osj3fjriXHYo4vcnzlTVUe6H8IfAp9jTI9bN278ZeCLVfWVrnnsj9uwuvpyzE6oqu8BU8B6Rjxm4xL+vX0sRJILuwtSJLkQeA/w1Om3Giu7gI3d9EbgoUXsy5w68YPWeT9jeNy6i4f3A89W1acHFo31cZuprp4cs7cluaibvgB4N/BtRjxmY3G3D0B3S9af8f+PhdiyuD2aG0l+jumzfZh+3MZfj2ttSb4ETDL9eNkjwCeAvwV2Aj8DvAjcUlVjd+F0htommR4+KOAA8KETY67jIsmvA/8M7AN+2DV/nOnx8bE9bqep61bG/5j9EtMXdJcxfQK/s6r+MMlPM8IxG5vwlyTNnXEZ9pEkzSHDX5IaZPhLUoMMf0lqkOEvSQ0y/CWpQYa/JDXo/wCyR1tTDJOpXgAAAABJRU5ErkJggg==\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "df_contracts[df_contracts.a_age < 30].a_age.hist(bins=25)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Two very important distributions"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Normal\n",
+ "\n",
+ "Also known as Gaussian, is a bell-shaped distribution with mass around the mean and exponentially decaying on the sides. It is fully characterized by the mean (center of mass) and standard deviation (spread).\n",
+ "\n",
+ "https://en.wikipedia.org/wiki/Normal_distribution"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 48,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 48,
+ "metadata": {},
+ "output_type": "execute_result"
+ },
+ {
+ "data": {
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAWAAAAFgCAYAAACFYaNMAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAWpElEQVR4nO3df5Dc9X3f8efb0hkMsoIVJHxIXE5MMRH4hvjmQh3T6TiGJmriBCcTXKWNq+mo1R8oNm4zDsKdTtPpqMNMGY8zLqTRkNRKYhurDh6UlIEYbNzpDDZgx81anF0roOAzFw6wXZmkkJN494/9nvzV6iQt0n73s3v7fMwwt/vZ7/fuvaO9F9/7fD8/IjORJPXf60oXIEmjygCWpEIMYEkqxACWpEIMYEkqZHXpAs7F1q1b84EHHihdhiSdSSzXONRXwC+88ELpEiTprA11AEvSMDOAJakQA1iSCjGAJakQA1iSCjGAJakQA1iSCjGAJakQA1iSCjGAJakQA1iSCjGAJakQA1iSChnq5SilQbK4uEir1Tr+fGpqirGxsYIVadAZwFKPtFotbr7zAGvHJzkyf5i7dsH09HTpsjTADGCph9aOT7Ju4srSZWhIGMBSA149dpTZ2dkT2uySUKdGAzgiDgM/AI4BRzNzJiLWAZ8GJoHDwHsz83vV8bcBO6rjP5CZDzZZn9SUlxbmuOP+l1k/uwhgl4SW1Y8r4J/OzPreQbuBhzPz9ojYXT2/NSKuArYBVwOXAg9FxFsy81gfapR6bs2GCbsjdFolhqHdCOyrHu8D3lNrvyczX8nMp4FDwLX9L0+S+qPpK+AE/iwiEvjdzNwLXJKZ8wCZOR8RG6pjNwJfqp07V7WdICJ2AjsBJiYmmqxdakznkDWwj3gUNR3A12Xms1XIfi4ivnGaY5fbtjlPamiH+F6AmZmZk16XhkF9yBrYRzyqGg3gzHy2+roQEZ+l3aXwXESMV1e/48BCdfgccFnt9E3As03WJ5XkkDU11gccERdGxBuXHgM/A3wdOABsrw7bDtxXPT4AbIuI8yJiM3AF8FhT9UlSaU1eAV8CfDYiln7OJzPzgYh4HNgfETuAZ4CbADLzYETsB54EjgK7HAEhaSVrLIAz8yngmmXaXwSuP8U5e4A9TdUkSYPEmXDSAOicOeeIiNFgAEsDoD5zzhERo8MAlgaEM+dGjwEsnaXOyRSzs7OQDk1X9wxg6Sx1TqaYbz3KRZefdN9ZOiUDWDoH9ckUR+YPly1GQ8c94SSpEANYkgoxgCWpEANYkgoxgCWpEEdBSH3QOdXYMcMCA1jqi85NOh0zLDCApb6pTzV2zLDAPmBJKsYAlqRC7IKQuuTiO+o1A1jqkovvqNcMYOk1cPEd9ZJ9wJJUiAEsSYUYwJJUiAEsSYUYwJJUiAEsSYUYwJJUiOOApQHXOQMPYGpqirGxsUIVqVcMYGnAdc7AOzJ/mLt2wfT0dNnCdM4MYGkI1GfgaeWwD1iSCjGAJakQA1iSCjGAJakQA1iSCjGAJakQA1iSCjGAJakQA1iSCjGAJakQA1iSCjGAJakQA1iSCjGAJakQA1iSCjGAJakQF2SXBsyrx44yOzt7/Pns7CxkFqxITTGApQHz0sIcd9z/MutnFwGYbz3KRZdfU7gqNcEAlgbQmg0Tx7cgOjJ/uGwxaox9wJJUiAEsSYUYwJJUSON9wBGxCngC+E5mvjsi1gGfBiaBw8B7M/N71bG3ATuAY8AHMvPBpuuTliwuLtJqtU5om5qaYmxsrFBFWun6cRPuFmAWWFs93w08nJm3R8Tu6vmtEXEVsA24GrgUeCgi3pKZx/pQo0Sr1eLmOw+wdnwSaN/8umsXTE9Ply1MK1ajXRARsQn4eeDuWvONwL7q8T7gPbX2ezLzlcx8GjgEXNtkfVKnteOTrJu4knUTVx4PYqkpTV8BfxT4TeCNtbZLMnMeIDPnI2JD1b4R+FLtuLmq7QQRsRPYCTAxMdFAyVKbEyLUtMYCOCLeDSxk5lci4p3dnLJM20mf9szcC+wFmJmZ8bdBjXFChJrW5BXwdcAvRsTPAecDayPij4DnImK8uvodBxaq4+eAy2rnbwKebbA+6YycEKEmNdYHnJm3ZeamzJykfXPt85n5a8ABYHt12HbgvurxAWBbRJwXEZuBK4DHmqpPkkorMRX5dmB/ROwAngFuAsjMgxGxH3gSOArscgSEpJWsLwGcmY8Aj1SPXwSuP8Vxe4A9/ahJkkpzJpwkFWIAS1IhBrAkFWIAS1IhBrAkFWIAS1IhBrAkFWIAS1IhBrAkFWIAS1IhBrAkFVJiMR5J56BzoXhw77phZQBLQ6ZzoXj3rhteBrA0hOoLxWt42QcsSYUYwJJUiAEsSYUYwJJUiAEsSYUYwJJUiAEsSYUYwJJUiAEsSYUYwJJUiAEsSYUYwJJUiAEsSYUYwJJUiAEsSYUYwJJUiAEsSYUYwJJUiAEsSYUYwJJUiAEsSYUYwJJUiAEsSYUYwJJUyOrSBUj9tLi4SKvVOqFtamqKsbGxQhWdu1ePHWV2dvb482F/P6PEANZIabVa3HznAdaOTwLw/e/8JbfcMMuWLVvaIZZZtsCz8NLCHHfc/zLrZxc5Mn+Yu3bB9PR06bLUBQNYI2ft+CTrJq4E4Mj8Ye64v8X62UXmW49y0eXXFK7u7KzZMHH8PWl42AeskbcUXhdePF66FI0YA1iSCjGAJakQA1iSCjGAJakQA1iSCjGAJakQA1iSCjGAJakQA1iSCmksgCPi/Ih4LCL+d0QcjIj/ULWvi4jPRcS3qq9vqp1zW0QciohvRsTPNlWbJA2CJq+AXwHelZnXAD8BbI2ItwO7gYcz8wrg4eo5EXEVsA24GtgK3BURqxqsT5KKaiyAs+2l6ulY9V8CNwL7qvZ9wHuqxzcC92TmK5n5NHAIuLap+iSptEb7gCNiVUR8DVgAPpeZXwYuycx5gOrrhurwjcC3a6fPVW2d33NnRDwREU88//zzTZYvSY1qdDnKzDwG/EREXAR8NiLeeprDY7lvscz33AvsBZiZmRm+xVvVV50LsA/rmr9amfqyHnBmfj8iHqHdt/tcRIxn5nxEjNO+Oob2Fe9ltdM2Ac/2oz6tXJ0LsA/zmr9aeZocBbG+uvIlIt4A3AB8AzgAbK8O2w7cVz0+AGyLiPMiYjNwBfBYU/VpdCwtwO6avxo0TV4BjwP7qpEMrwP2Z+afRsSjwP6I2AE8A9wEkJkHI2I/8CRwFNhVdWFI0orUWABn5l8Ab1um/UXg+lOcswfY01RNkjRInAknSYV0FcARcV03bZKk7nV7BfyxLtskSV06bR9wRPwU8A5gfUT8m9pLawGnCUsD5tVjR9tjnWumpqYYGxsrVJFO50w34V4PrKmOe2Ot/QjwK00VJensvLQwxx33v8z62UUAjswf5q5dMD09XbgyLee0AZyZXwS+GBEfz8y/6lNNks7Bmg0TrJu4snQZ6kK3w9DOi4i9wGT9nMx8VxNFSdIo6DaA/zvwX4G7ASdHSFIPdBvARzPzdxqtRJJGTLfD0P4kIm6OiPFqR4t1EbGu0cokaYXr9gp4afGcD9XaEri8t+VI6iWHpQ22rgI4Mzc3XYik3nNY2mDrKoAj4p8v156Zf9DbciT1msPSBle3XRA/WXt8Pu3VzL4KGMCSdJa67YJ4f/15RPwI8IeNVCRJI+Jsl6P8W9o7VkiSzlK3fcB/wg83yFwFbAH2N1WUJI2CbvuA76g9Pgr8VWbONVCPJI2MbvuAvxgRl/DDm3Hfaq4k6ey5Db2GSbddEO8F/jPwCBDAxyLiQ5n5mQZrk14zt6HXMOm2C+LfAj+ZmQvQ3nIeeAgwgDVwlrahh/bEA2lQdTsK4nVL4Vt58TWcK0laRrdXwA9ExIPAp6rn/wS4v5mSJGk0nGlPuL8HXJKZH4qIXwb+Ae0+4EeBT/ShPkk95OI8g+VMV8AfBT4MkJn3AvcCRMRM9dovNFibpB5zcZ7BcqYAnszMv+hszMwnImKymZIkNcnFeQbHmW6knX+a197Qy0IkadScKYAfj4h/1dkYETuArzRTkiSNhjN1QXwQ+GxE/DN+GLgzwOuBX2qwLkla8U4bwJn5HPCOiPhp4K1V8//IzM83XpkkrXDdrgXxBeALDdciSSPF2WySVIgBLEmFGMCSVIgBLEmFGMCSVIgBLEmFGMCSVIgBLEmFGMCSVIgBLEmFGMCSVEi3e8JJWoE6tyhye6L+MoClEVbfosjtifrPAJZGnFsUlWMfsCQVYgBLUiEGsCQVYgBLUiEGsCQVYgBLUiGNBXBEXBYRX4iI2Yg4GBG3VO3rIuJzEfGt6uubaufcFhGHIuKbEfGzTdUmSYOgySvgo8BvZOYW4O3Aroi4CtgNPJyZVwAPV8+pXtsGXA1sBe6KiFUN1idJRTU2ESMz54H56vEPImIW2AjcCLyzOmwf8Ahwa9V+T2a+AjwdEYeAa4FHm6pRK8Pi4iKtVgugPa02s3BFUnf6MhMuIiaBtwFfBi6pwpnMnI+IDdVhG4Ev1U6bq9o6v9dOYCfAxMREg1VrWLRaLW6+8wBrxyeZbz3KRZdfU7okqSuN34SLiDXAHwMfzMwjpzt0mbaTLmUyc29mzmTmzPr163tVpobc2vFJ1k1cyYUXj5cuRepao1fAETFGO3w/kZn3Vs3PRcR4dfU7DixU7XPAZbXTNwHPNlmfpB/qXBkNXB2taY0FcEQE8HvAbGZ+pPbSAWA7cHv19b5a+ycj4iPApcAVwGNN1SfpRPWV0QBXR+uDJq+ArwPeB7Qi4mtV24dpB+/+iNgBPAPcBJCZByNiP/Ak7REUuzLzWIP1Sergymj91eQoiP/F8v26ANef4pw9wJ6mapKkQeJMOEkqxACWpEIMYEkqxACWpEIMYEkqxACWpEIMYEkqxACWpEIMYEkqxACWpEL6sh6w1Ev1BdjBRdg1vAxgDZ36AuyAi7BraBnAGkpLC7BDe9lEaRjZByxJhRjAklSIXRAaeN5000plAGvgedNNK5UBrKHgTTetRPYBS1IhBrAkFWIAS1IhBrAkFWIAS1IhBrAkFWIAS1IhBrAkFWIAS1IhzoSTtKxXjx1tr7tRMzU1xdjYWKGKVh4DWNKyXlqY4477X2b97CLQngJ+1y6Ynp4uXNnKYQBLOqU1GyaOr8Gh3rMPWJIK8QpYUlfsE+49A1hSV+wT7j0DWFLX7BPuLfuAJakQA1iSCrELQgPHTTg1KgxgDRw34dSoMIA1kNyEU6PAPmBJKsQAlqRCDGBJKsQAlqRCDGBJKsQAlqRCDGBJKsQAlqRCDGBJKsSZcBoI9fUfXPtBo8IA1kCor//g2g8aFXZBaGAsrf9w4cXjpUuR+qKxK+CI+H3g3cBCZr61alsHfBqYBA4D783M71Wv3QbsAI4BH8jMB5uqTdK5c4+4c9dkF8THgf8C/EGtbTfwcGbeHhG7q+e3RsRVwDbgauBS4KGIeEtmHmuwPknnwD3izl1jAZyZ/zMiJjuabwTeWT3eBzwC3Fq135OZrwBPR8Qh4Frg0abqk3Tu3CPu3PS7D/iSzJwHqL5uqNo3At+uHTdXtZ0kInZGxBMR8cTzzz/faLGS1KRBuQkXy7QtOw4pM/dm5kxmzqxfv77hsiSpOf0O4OciYhyg+rpQtc8Bl9WO2wQ82+faJKmv+h3AB4Dt1ePtwH219m0RcV5EbAauAB7rc22S1FdNDkP7FO0bbhdHxBzw74Hbgf0RsQN4BrgJIDMPRsR+4EngKLDLERCSVromR0H86ileuv4Ux+8B9jRVjwaLW89LTkVWIW49LxnAKsit5zXqBmUYmiSNHK+A1Rf2+UonM4DVF/b5SiczgNU39vlKJ7IPWJIKMYAlqRADWJIKMYAlqRADWJIKMYAlqRADWJIKcRywpJ7o3CXZHZLPzACW1BP1XZLdIbk7BrCknnGX5NfGPmBJKsQAlqRCDGBJKsQAlqRCvAknqec6h6SBw9KWYwBL6rn6kDTAYWmnYABLaoRD0s7MAFYj3ANOOjMDWI1wDzjpzAxgNcY94HQqnX8hwWjepDOAJfVd519Io3qTzgCWVET9L6RR5UQMSSrEK2D1TL1fz1EPquucmOHno80AVs/U+/Uc9aC6zokZfj7aDGD11FK/nqMe1Kk+McPPR5t9wJJUiAEsSYUYwJJUiAEsSYUYwJJUiAEsSYU4DE1nzSUnpXNjAOusueSkeqVzplx9ZbSVvHKaAaxz4pKT6oX6TLnOldFW8sppBrC6ZpeDmnS6LYxW6sppBrC6ZpeD1FsGsI7rpq/NLgepdwxgHbeS+9o0PEZp6UoDWCdYqX1tGh6jtHSlASxp4Jxu6crOK2QY3mFpBrCkodJ5hTzMXWUGsE5plPriNFxON2RtmBjAI6ZzpMPi4iIRwerVq08K2FHqi9Pwql8o1D/PSwa5e8IAHjHLjeVdveZNrN/848sGrNvIaNDVLxTqn2cY/O6JgQvgiNgK/DawCrg7M28vXNJQW2722to3/9gJoTq2doP7uGmoLV0o1D/Pyxm0dSUGKoAjYhVwJ/CPgDng8Yg4kJlP9upnDNo/wJJ6XWf6M+q1vAdnr2mULXcf42MP/x/WXroZgO9/5y+55YZZtmzZcvyYpd+lfmTFQAUwcC1wKDOfAoiIe4AbgZ4FcKvV4n3/7mNc8KNvBuBvX/xrPvxPbzjhH6CE2dlZ/tMnH+KCH30zLz51kFVveCMXjU8sW2P92OVe7/y+nV5aeIbvXngBAH/zwjyrX36Z7154wQmPO197rc9H7dxBqcP3f+Jrzz35OL/1xEtcNP51AF586iA/8mNXHf9d+H/fXeC3Pv7U8dfrv0vL/Z794X98f0+7MyIH6K52RPwKsDUz/2X1/H3A38/MX68dsxPYWT29EvhmD370xcALPfg+w2YU37fveXQM0vt+ITO3djYO2hVwLNN2wv8hMnMvsLenPzTiicyc6eX3HAaj+L59z6NjGN73oG1JNAdcVnu+CXi2UC2S1KhBC+DHgSsiYnNEvB7YBhwoXJMkNWKguiAy82hE/DrwIO1haL+fmQf78KN72qUxREbxffueR8fAv++BugknSaNk0LogJGlkGMCSVMjIB3BEbI2Ib0bEoYjYXbqepkXEZRHxhYiYjYiDEXFL6Zr6JSJWRcSfR8Sflq6lXyLiooj4TER8o/o3/6nSNTUtIv519dn+ekR8KiLOL13TqYx0ANemPv9j4CrgVyPiqtOfNfSOAr+RmVuAtwO7RuA9L7kFOHlq4Mr228ADmfnjwDWs8PcfERuBDwAzmflW2jfzt5Wt6tRGOoCpTX3OzL8DlqY+r1iZOZ+ZX60e/4D2L+TGslU1LyI2AT8P3F26ln6JiLXAPwR+DyAz/y4zv1+0qP5YDbwhIlYDFzDAcwlGPYA3At+uPZ9jBMJoSURMAm8Dvly4lH74KPCbwKuF6+iny4Hngf9Wdb3cHREXli6qSZn5HeAO4BlgHvi/mflnZas6tVEP4DNOfV6pImIN8MfABzPzSOl6mhQR7wYWMvMrpWvps9XANPA7mfk24G+AFX2fIyLeRPuv2M3ApcCFEfFrZas6tVEP4JGc+hwRY7TD9xOZeW/pevrgOuAXI+Iw7W6md0XEH5UtqS/mgLnMXPoL5zO0A3kluwF4OjOfz8xF4F7gHYVrOqVRD+CRm/ocEUG7T3A2Mz9Sup5+yMzbMnNTZk7S/jf+fGYO7FVRr2TmXwPfjoil1cmvp4dLuw6oZ4C3R8QF1Wf9egb4xuNATUXut4JTn0u6Dngf0IqIr1VtH87M+8uVpAa9H/hEdYHxFPAvCtfTqMz8ckR8Bvgq7RE/f84AT0l2KrIkFTLqXRCSVIwBLEmFGMCSVIgBLEmFGMCSVIgBLEmFGMCSVMj/B/qjFlrdIbQIAAAAAElFTkSuQmCC\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "s1 = np.random.normal(5, 1, 10000)\n",
+ "sns.displot(s1)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 49,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 49,
+ "metadata": {},
+ "output_type": "execute_result"
+ },
+ {
+ "data": {
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAWAAAAD4CAYAAADSIzzWAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAMCElEQVR4nO3db2hd9R3H8c+3SWcTi2xWURaNV7lzbZkMtzDchFHbirnZ2B4JDreEgRSqi10djK1cEOHioyGTIANxGymTDXQ+mCPpprjHskQt1aaTg4u1md1qZP5ZUm3a3x7kz24al9y09+ZzkrxfUG567+k533N6z7snJwmNlJIAACtvg3sAAFivCDAAmBBgADAhwABgQoABwKR5OQtfccUVqVAoNGgUAFibhoeH30kpXXn+88sKcKFQ0NDQUP2mAoB1ICLe/KTnuQUBACYEGABMCDAAmBBgADAhwABgQoABwIQAA4AJAQYAEwIMACYEGABMCDAAmBBgADAhwABgQoABwIQAA4AJAQYAEwIMACYEGABMCDAAmCzr/4QD8qyvr09ZljVk3WNjY5Kktra2hqy/WCyqt7e3IetGfhFgrBlZlumVV0d0tvXyuq+7aeI9SdLJj+p/yjRNvFv3dWJ1IMBYU862Xq7JrV11X2/LsQFJaui6sf5wDxgATAgwAJgQYAAwIcAAYEKAAcCEAAOACQEGABMCDAAmBBgATAgwAJgQYAAwIcAAYEKAAcCEAAOACQEGABMCDAAmBBgATAgwAJgQYAAwIcAAYEKAAcCEAAOACQEGABMCDAAmBBgATAgwAJgQYAAwIcAAYEKAAcCEAAOACQEGABMCDAAmBBgATAgwAJgQYAAwIcAAYEKAAcCEAAOACQEGABMCDAAmBBgATAjwKtXX16e+vj73GEDdraf3drN7AFyYLMvcIwANsZ7e21wBA4AJAQYAEwIMACYEGABMCDAAmBBgADAhwABgQoABwIQAA4AJAQYAEwIMACYEGABMCDAAmBBgADAhwABgQoABwIQAA4AJAQYAEwIMACYEGABMCDAAmBBgADAhwABgQoABwIQAA4AJAQYAEwIMACYEGABMCDAAmBBgADAhwABgQoABwIQAA4AJAQYAEwIMACYEGABMCDAAmBBgADBZkQCPj4/r/vvv1/j4+Eps7qLNzptl2bzHWuavZV+rl8myTKVSSXv27NHw8LBKpZLuvvtu3XbbbXr22WdVKpV01113aceOHXO/du3apcOHD+vw4cPKsqyeuw7kwsTEhHbv3q0dO3boscceU2dn57xzoLu7W1mW6d5779XevXv1wgsvaOfOneru7tbevXvnzr/x8XHdc8896urqUpZl8869WrvUyH6tSID7+/t15MgRHTx4cCU2d9Fm561UKvMea5m/ln2tXqZSqWhyclKvv/66HnzwQU1OTmpsbEwpJT3yyCOanJzUyZMn5/35s2fPzn1cqVQufEeBnDp+/LimpqYkSU899ZROnz694PVKpaKjR49qZGREDz/8sM6dO6fjx49rZGRk7vzr7+9XlmWamJhQpVKZd+7V2qVG9qvhAR4fH9ehQ4eUUtKhQ4dyfxVcPe/o6Oi8x6Xmr2Vfq5cZGBjQ6Ojo3GsffvjhvGVTSkvOOzo6ylUw1pSJiQl99NFHSy5Xfe7MxnrW4OCgsizTwMDAvOUHBweVUtLg4ODcx4ud143uV3Nd1/YJ+vv7de7cOUnTV24HDx7U/v37G73ZC1Y97/mWmr+Wfa1e5syZM3WZ+b777tPWrVvrsq7VLMsybfh46X+08mbD6feVZR9o37597lFyoR4XFGfOnFGlUlkQ5tlzrvrcW+y8bnS/lrwCjog9ETEUEUOnTp1a9gaef/75uYMwNTWl5557bvlTrqDqec+31Py17Oti679QtVwtAKtFLZ/51bKO6ivkT3p9djuLndeN7teSV8AppcclPS5JHR0dyz4yu3fv1sDAgKamptTc3Kzbb7/9AsZcOdXznm+p+WvZ18XWf6EKhYIeffTRuq1vtdq3b5+G3/ine4xlO7fpMhVvuIq/wxl33HHHRV9URISuu+66/xvhiJA0HeLFzutG96vh94B7enq0YcP0ZpqamtTd3d3oTV6U6nnPt9T8texr9TIbN26sw8RSuVyuy3qAPGhvb7/odWzcuFHlclnNzc0Lnp99nH1tsfO60f1qeIC3bNmizs5ORYQ6Ozu1ZcuWRm/yolTPWygU5j0uNX8t+1q9TFdXlwqFwtxrmzdvnrfs7L/SiykUCioWi7XvIJBzra2tuuSSS5ZcrvrcOT+0pVJJxWJRXV1d85YvlUqKCJVKpbmPFzuvG92vFfk2tJ6eHt100025v/qdNTtvuVye91jL/LXsa/Uy5XJZLS0tuvHGG/XQQw+ppaVFbW1tigg98MADamlp0dVXXz3vzzc1Nc19zNUv1qL29va5qN55553atGnTgtfL5bK2b9+ubdu26cCBA9qwYYPa29u1bdu2ufOvp6dHxWJRra2tKpfL8869WrvUyH7Fcm54d3R0pKGhoboPgeWb/Yo59w3/Z/Ye8OTWrqUXXqaWY9PfztSodX+Ze8Bz1uJ7OyKGU0od5z/PjyIDgAkBBgATAgwAJgQYAEwIMACYEGAAMCHAAGBCgAHAhAADgAkBBgATAgwAJgQYAEwIMACYEGAAMCHAAGBCgAHAhAADgAkBBgATAgwAJgQYAEwIMACYEGAAMCHAAGBCgAHAhAADgAkBBgATAgwAJgQYAEwIMACYEGAAMCHAAGBCgAHAhAADgAkBBgATAgwAJgQYAEwIMACYEGAAMGl2D4ALUywW3SMADbGe3tsEeJXq7e11jwA0xHp6b3MLAgBMCDAAmBBgADAhwABgQoABwIQAA4AJAQYAEwIMACYEGABMCDAAmBBgADAhwABgQoABwIQAA4AJAQYAEwIMACYEGABMCDAAmBBgADAhwABgQoABwIQAA4AJAQYAEwIMACYEGABMCDAAmBBgADAhwABgQoABwIQAA4AJAQYAEwIMACYEGABMCDAAmBBgADAhwABgQoABwIQAA4AJAQYAEwIMACbN7gGAemqaeFctxwYasN5xSWrQut+VdFXd14v8I8BYM4rFYsPWPTY2JUlqa2tEKK9q6OzILwKMNaO3t9c9ArAs3AMGABMCDAAmBBgATAgwAJgQYAAwIcAAYEKAAcCEAAOACQEGABMCDAAmBBgATAgwAJgQYAAwIcAAYEKAAcCEAAOACQEGABMCDAAmBBgATAgwAJhESqn2hSNOSXqzjtu/QtI7dVzfWsAxWYhjshDHZKE8H5PrUkpXnv/ksgJcbxExlFLqsA2QQxyThTgmC3FMFlqNx4RbEABgQoABwMQd4MfN288jjslCHJOFOCYLrbpjYr0HDADrmfsKGADWLQIMACaWAEdEZ0T8LSKyiPiJY4Y8iYhrI+IvETESEa9FxD73THkREU0R8XJE/NE9Sx5ExKcj4umIODbzfvmqeya3iNg/c968GhG/jYhN7plqteIBjogmSY9JKknaLuk7EbF9pefImSlJP0opbZN0i6T7OCZz9kkacQ+RI49KOpRS2irpi1rnxyYi2iTdL6kjpfQFSU2S7vJOVTvHFfBXJGUppTdSSh9L+p2kbxvmyI2U0tsppZdmPv5A0ydVm3cqv4i4RtI3JD3hniUPIuIySV+X9EtJSil9nFL6t3WofGiW1BIRzZJaJf3DPE/NHAFuk/RW1e9PiNjMiYiCpJslvWgeJQ9+LunHks6Z58iLGySdkvTrmdsyT0TEpe6hnFJKY5J+Jum4pLclvZdS+rN3qto5Ahyf8BzfCycpIjZL+r2kH6aU3nfP4xQR35T0r5TSsHuWHGmW9CVJv0gp3SzpP5LW9ddQIuIzmv4M+npJn5V0aUR81ztV7RwBPiHp2qrfX6NV9ClDo0TERk3H98mU0jPueXLgVknfiohRTd+m2hkRv/GOZHdC0omU0uxnR09rOsjr2W5Jf08pnUopnZH0jKSvmWeqmSPAf5X0uYi4PiI+pekb5n8wzJEbERGavq83klJ6xD1PHqSUfppSuialVND0e+SFlNKqubJphJTSSUlvRcTnZ57aJemocaQ8OC7plohonTmPdmkVfWGyeaU3mFKaiogfSPqTpr9i+auU0msrPUfO3Crpe5KORMQrM88dSCkN+EZCTvVKenLm4uUNSd83z2OVUnoxIp6W9JKmv5voZa2iH0nmR5EBwISfhAMAEwIMACYEGABMCDAAmBBgADAhwABgQoABwOS/2yHVGtk42qoAAAAASUVORK5CYII=\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "# for boxplots see https://en.wikipedia.org/wiki/Interquartile_range (or ask!)\n",
+ "sns.boxplot(x=s1)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Heavy-tailed\n",
+ "Distributions with a small but non-negligible amount of observations with high values. Several probability distributions follow this pattern: https://en.wikipedia.org/wiki/Heavy-tailed_distribution#Common_heavy-tailed_distributions.\n",
+ "\n",
+ "We pick the lognormal here: https://en.wikipedia.org/wiki/Log-normal_distribution"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 50,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 50,
+ "metadata": {},
+ "output_type": "execute_result"
+ },
+ {
+ "data": {
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAWEAAAFgCAYAAABqo8hyAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAbWklEQVR4nO3df4xd513n8fennnicH/Y2KU7k2pEatBZLUtEUhmzbINQ2QAxFONKSYhDFDWZdiZTSgkDJIi3LHxbdFfQH24attyl1+dHIlHZjWjap1zRAVVTXbVOIk3rjkpI49sZuEZkpjaZM/N0/5ox7Y4/tcewzz52Z90sanXOe+5wz38dKPvfMc885N1WFJKmNF7QuQJKWMkNYkhoyhCWpIUNYkhoyhCWpoZHWBZyPDRs21H333de6DEmaTebSaUGfCX/ta19rXYIknZcFHcKStNAZwpLUkCEsSQ0ZwpLUkCEsSQ0ZwpLUkCEsSQ0ZwpLUkCEsSQ0ZwpLUkCEsSQ0ZwpLUkCEsSQ0t2RCuKsbHx/GLTiW1tGRDeGJigk3v/AQTExOtS5G0hC3ZEAYYWXFJ6xIkLXFLOoQlqTVDWJIaMoQlqaFeQzjJ25LsT/JQkg8nWZHkiiS7kzzaLS8f6H9nkoNJDiS5uc/aJGkY9BbCSdYCbwHGquqlwDJgE3AHsKeq1gN7um2SXNu9fh2wAbgrybK+6pOkYdD3dMQIcHGSEeAS4DCwEdjRvb4DuKVb3wjcU1WTVfUYcBC4oc/ivFZYUmu9hXBVPQn8DvA4cAR4uqo+CVxVVUe6PkeAK7td1gJPDBziUNfWm6nJZ9iy/QGvFZbUTJ/TEZczfXZ7DfBi4NIkP3umXWZpO+UUNcnWJPuS7Dt27Nh51zky6rXCktrpczrih4DHqupYVf0r8FHgVcBTSdYAdMujXf9DwNUD+69jevriOapqe1WNVdXY6tWreyxfkvrXZwg/DrwiySVJAtwEPALsAjZ3fTYD93bru4BNSUaTXAOsB/b2WJ8kNTfS14Gr6rNJPgJ8AZgCvghsBy4DdibZwnRQ39r1359kJ/Bw1//2qnq2r/okaRj0FsIAVfWbwG+e1DzJ9FnxbP23Adv6rEmShol3zElSQ4awJDVkCEtSQ4awJDVkCEtSQ0s+hH1+hKSWlnwI+/wISS0t+RAGnx8hqR1DWJIaMoQlqSFDWJIaMoQlqSFDWJIaMoQlqaElGcIzN2hIUmtLMoQnJia47b338+yUz4yX1NaSDGGAkRXeoCGpvSUbwpI0DAxhSWrIEJakhgxhSWrIEJakhgxhSWrIEJakhgxhSWrIEJakhgxhSWrIEJakhgxhSWqotxBO8l1JHhz4GU/y1iRXJNmd5NFuefnAPncmOZjkQJKb+6pNkoZFbyFcVQeq6vqquh74PuCbwMeAO4A9VbUe2NNtk+RaYBNwHbABuCvJsr7qO6lWxsfHqar5+HWSdMJ8TUfcBHylqv4R2Ajs6Np3ALd06xuBe6pqsqoeAw4CN8xHcVOTz7Bl+wNMTEzMx6+TpBPmK4Q3AR/u1q+qqiMA3fLKrn0t8MTAPoe6tudIsjXJviT7jh07dsEKHBn1+cKS5l/vIZxkOfATwJ+eressbafMD1TV9qoaq6qx1atXX4gSJamZ+TgT/lHgC1X1VLf9VJI1AN3yaNd+CLh6YL91wOF5qE+SmpmPEP5pvj0VAbAL2NytbwbuHWjflGQ0yTXAemDvPNQnSc2M9HnwJJcAPwy8aaD57cDOJFuAx4FbAapqf5KdwMPAFHB7VflNnJIWtV5DuKq+CbzopLavM321xGz9twHb+qxJkoaJd8xJUkOGsCQ1ZAhLUkOGsCQ1ZAhLUkOGsCQ1ZAhLUkOGsCQ1ZAhLUkOGsCQ1ZAhLUkOGsCQ1ZAhLUkOGsCQ1ZAhLUkOGsCQ1ZAhLUkOGsCQ1ZAhLUkOGsCQ1ZAhLUkOGsCQ1ZAhLUkOGsCQ1ZAh3qorx8XGqqnUpkpYQQ7gzNfkMW7Y/wMTEROtSJC0hhvCAkdFLWpcgaYkxhCWpoV5DOMkLk3wkyZeTPJLklUmuSLI7yaPd8vKB/ncmOZjkQJKb+6xNkoZB32fC7wbuq6p/B7wMeAS4A9hTVeuBPd02Sa4FNgHXARuAu5Is67k+SWqqtxBOsgr4QeBugKr6VlX9M7AR2NF12wHc0q1vBO6pqsmqegw4CNzQV32SNAz6PBP+TuAY8AdJvpjk/UkuBa6qqiMA3fLKrv9a4ImB/Q91bc+RZGuSfUn2HTt2rMfyJal/fYbwCPC9wO9X1cuBf6GbejiNzNJ2ykW7VbW9qsaqamz16tUXplJJaqTPED4EHKqqz3bbH2E6lJ9KsgagWx4d6H/1wP7rgMM91idJzfUWwlX1/4AnknxX13QT8DCwC9jctW0G7u3WdwGbkowmuQZYD+ztqz5JGgYjPR//l4A/TrIc+AfgNqaDf2eSLcDjwK0AVbU/yU6mg3oKuL2qnu25PklqqtcQrqoHgbFZXrrpNP23Adv6rEmShol3zElSQ4awJDVkCEtSQ4awJDVkCEtSQ4awJDVkCEtSQ4awJDVkCEtSQ4awJDVkCEtSQ4awJDVkCEtSQ4awJDVkCEtSQ4awJDVkCEtSQ4awJDVkCEtSQ4awJDVkCEtSQ4bwgKpifHycqmpdiqQlwhAeMDX5DFu2P8DExETrUiQtEYbwSUZGL2ldgqQlxBCWpIYMYUlqyBCWpIYMYUlqqNcQTvLVJH+f5MEk+7q2K5LsTvJot7x8oP+dSQ4mOZDk5j5rk6RhMB9nwq+pquuraqzbvgPYU1XrgT3dNkmuBTYB1wEbgLuSLJuH+iSpmRbTERuBHd36DuCWgfZ7qmqyqh4DDgI3zH95kjR/+g7hAj6Z5PNJtnZtV1XVEYBueWXXvhZ4YmDfQ13bcyTZmmRfkn3Hjh3rsXRJ6t9Iz8e/saoOJ7kS2J3ky2fom1naTrl/uKq2A9sBxsbGvL9Y0oLW65lwVR3ulkeBjzE9vfBUkjUA3fJo1/0QcPXA7uuAw33WJ0mt9RbCSS5NsnJmHfgR4CFgF7C567YZuLdb3wVsSjKa5BpgPbC3r/okaRj0OR1xFfCxJDO/50+q6r4knwN2JtkCPA7cClBV+5PsBB4GpoDbq+rZHuuTpOZ6C+Gq+gfgZbO0fx246TT7bAO29VWTJA0b75iTpIYMYUlqyBCWpIYMYUlqyBCWpIYMYUlqyBCWpIbmFMJJbpxLmyTp3Mz1TPi/z7FNknQOznjHXJJXAq8CVif5lYGXVgE+cF2SztPZblteDlzW9Vs50D4O/GRfRUnSUnHGEK6qvwL+KskHq+of56kmSVoy5voAn9Ek24GXDO5TVa/toyhJWirmGsJ/CvwP4P2Aj5eUpAtkriE8VVW/32slQ6KqGB8fZ+XKlXTPQpak3sz1ErU/T/KLSdYkuWLmp9fKGpmafIYt2x9gYmKidSmSloC5ngnPfB3Rrw20FfCdF7ac4TAyeknrEiQtEXMK4aq6pu9CJGkpmlMIJ/m52dqr6kMXthxJWlrmOh3x/QPrK5j+jrgvAIawJJ2HuU5H/NLgdpJ/A/xhLxVJ0hLyfB9l+U1g/YUsRJKWornOCf8501dDwPSDe74b2NlXUZK0VMx1Tvh3BtangH+sqkM91CNJS8qcpiO6B/l8meknqV0OfKvPoiRpqZjrN2u8HtgL3Aq8HvhsEh9lKUnnaa7TEb8BfH9VHQVIshr4P8BH+iqsJZ8fIWm+zPXqiBfMBHDn6+ew74Lj8yMkzZe5Bul9Se5P8sYkbwQ+AfzFXHZMsizJF5N8vNu+IsnuJI92y8sH+t6Z5GCSA0luPtfBXEg+P0LSfDhjCCf5t0lurKpfA94HfA/wMuBvge1z/B2/DDwysH0HsKeq1gN7um2SXAtsAq4DNgB3JfF77CQtamc7E34XMAFQVR+tql+pqrcxfRb8rrMdPMk64HVMPwx+xkZgR7e+A7hloP2eqpqsqseAg8ANcxqFJC1QZwvhl1TV353cWFX7mP6qo7N5F/DrwPGBtquq6kh3nCPAlV37WuCJgX6HurbnSLI1yb4k+44dOzaHEiRpeJ0thFec4bWLz7Rjkh8HjlbV5+dYy2yXIdQpDVXbq2qsqsZWr149x0NL0nA6Wwh/Lsl/PLkxyRbgbOF6I/ATSb4K3AO8NskfAU8lWdMdZw0wc9XFIeDqgf3XAYfPOgJJWsDOFsJvBW5L8kCS3+1+/gr4BaY/cDutqrqzqtZV1UuY/sDtL6vqZ4FdfPubOjYD93bru4BNSUaTXMP0A4L2Pp9BSdJCccabNarqKeBVSV4DvLRr/kRV/eV5/M63Azu7s+nHmb4Lj6ran2Qn8DDTz6e4var8ZmdJi9pcnyf8KeBTz/eXVNUDwAPd+teZfij8bP22Adue7++RpIVm0d71JkkLgSEsSQ0ZwpLUkCEsSQ0ZwpLUkCEsSQ0ZwpLUkCEsSQ0ZwpLUkCEsSQ0ZwpLUkCF8GjPfuFx1yiONJemCMYRPw29cljQfDOEz8BuXJfXNED4DpyQk9c0QPgOnJCT1zRA+C6ckJPXJEJakhgxhSWrIEJakhgxhSWrIEJakhgxhSWrIEJakhgxhSWrIEJakhgxhSWrIEJakhnoL4SQrkuxN8qUk+5P8Vtd+RZLdSR7tlpcP7HNnkoNJDiS5ua/aJGlY9HkmPAm8tqpeBlwPbEjyCuAOYE9VrQf2dNskuRbYBFwHbADuSrLsQhc183hKSRoGvYVwTftGt3lR91PARmBH174DuKVb3wjcU1WTVfUYcBC44ULXNTExwW3vvZ9np5690IeWpHPW65xwkmVJHgSOArur6rPAVVV1BKBbXtl1Xws8MbD7oa7t5GNuTbIvyb5jx449r7pGVvh4SknDodcQrqpnq+p6YB1wQ5KXnqF7ZjvELMfcXlVjVTW2evXqC1SpJLUxL1dHVNU/Aw8wPdf7VJI1AN3yaNftEHD1wG7rgMPzUZ8ktdLn1RGrk7ywW78Y+CHgy8AuYHPXbTNwb7e+C9iUZDTJNcB6YG9f9UnSMBjp8dhrgB3dFQ4vAHZW1ceT/C2wM8kW4HHgVoCq2p9kJ/AwMAXcXlV+eiZpUesthKvq74CXz9L+deCm0+yzDdjWV02SNGy8Y06SGjKEJakhQ1iSGjKEJakhQ1iSGjKEz2LmgT9Vp9y8J0nnzRA+i6nJZ9iy/QEmJiZalyJpETKE52Bk1Af+SOqHISxJDRnCc+C8sKS+GMJz4LywpL4YwnPkvLCkPhjCktSQISxJDRnCktSQISxJDRnCktSQISxJDRnCktSQISxJDRnCktSQITxHPj9CUh8M4Tny+RGS+mAInwOfHyHpQjOEJakhQ1iSGjKEJakhQ1iSGuothJNcneRTSR5Jsj/JL3ftVyTZneTRbnn5wD53JjmY5ECSm/uqTZKGRZ9nwlPAr1bVdwOvAG5Pci1wB7CnqtYDe7ptutc2AdcBG4C7kizrsT5Jaq63EK6qI1X1hW59AngEWAtsBHZ03XYAt3TrG4F7qmqyqh4DDgI39FWfJA2DeZkTTvIS4OXAZ4GrquoITAc1cGXXbS3wxMBuh7q2oePdc5IulN5DOMllwJ8Bb62q8TN1naXtlJRLsjXJviT7jh07dqHKPCcTExNseucnvHtO0nnrNYSTXMR0AP9xVX20a34qyZru9TXA0a79EHD1wO7rgMMnH7OqtlfVWFWNrV69ur/iz2JkhXfPSTp/fV4dEeBu4JGqesfAS7uAzd36ZuDegfZNSUaTXAOsB/b2Vd/zMTMNMT5+phN6SZq7kR6PfSPwBuDvkzzYtf0n4O3AziRbgMeBWwGqan+SncDDTF9ZcXtVPdtjfedsavIZ3rRjL8enJnnByGjrciQtAr2FcFV9mtnneQFuOs0+24BtfdV0IVy04lKOTy3j2amhen+QtEB5x5wkNWQIS1JDhrAkNWQIS1JDhrAkNWQIS1JDhrAkNWQIS1JDhrAkNWQIS1JDhrAkNWQIS1JDhrAkNWQIS1JDhrAkNWQIP09+2aekC8EQfp6mJp9hy/YH/LJPSefFED4PI6N+2aek82MIS1JDhrAkNWQIS1JDhvB58AoJSefLED4PM1dIjI+PG8aSnhdD+DyNjF7CxMQEP/WOj/Pkk08axJLOiSF8oSReNyzpnBnCF5DXDUs6V4bweaoqz34lPW+G8HmamnyGN3/w0zw79WzrUiQtQL2FcJIPJDma5KGBtiuS7E7yaLe8fOC1O5McTHIgyc191dWHZcsvbl2CpAWqzzPhDwIbTmq7A9hTVeuBPd02Sa4FNgHXdfvclWRZj7X1wuuGJZ2r3kK4qv4a+KeTmjcCO7r1HcAtA+33VNVkVT0GHARu6Ku2vvhkNUnnar7nhK+qqiMA3fLKrn0t8MRAv0Nd24LjFRKSzsWwfDCXWdpm/Zs+ydYk+5LsO3bsWM9lSVK/5juEn0qyBqBbHu3aDwFXD/RbBxye7QBVtb2qxqpqbPXq1b0WK0l9m+8Q3gVs7tY3A/cOtG9KMprkGmA9sHeea5OkeTfS14GTfBh4NfAdSQ4Bvwm8HdiZZAvwOHArQFXtT7ITeBiYAm6vKi+8lbTo9RbCVfXTp3npptP03wZs66ue+TRzqdrKlStJZpvulqRpw/LB3KIyMTHBpnd+wkvVJJ2VIXyBzTxLYmSFl6pJOjtD+ALzWRKSzoUh3AOfJSFprgxhSWrIEJakhgzhHvlUNUlnYwj3yEvVJJ2NIdwzL1WTdCaGcE/87jlJc2EI92TwemHnhiWdjiHco2XLL6aqOHz4sHPDkmZlCPds5oyYZcs9G5Z0CkN4HixbfrHfPydpVobwPFq2/GLGx8c5fvy4Z8WSAEN4Xs2cDTtHLGmGITzPli2/mImJCZaNXuzZsCRDeL7NfFA3+S/fcI5YkiHcwsyjLkdGvZtOWuoM4Yaqiqeffpqnn3561mkJb/KQFj9DuKGpyWd443vu49bf2cWTTz5JVT0nmMfHx/0AT1rkevu2Zc3NzNTEz7/vU3zgTa8B4Lb33k9GlvPunxnzAUDSImcID43wph17OT41SUZGAXjzBz/NilUvalyXpD45HTFELlpxKRcNnPn6XXXS4mcID7mZD+cG77Kbre1M+/rBnjS8DOEhd/JdduPj4zz55JNseucnntM221UWfrOHNPycE14ABu+yO3z4MG/50GdYftnl068NtGVkOR9402tYu3YtSU68Png2vGrVqhOvSWrPM+EFYPAuuzd/8NNkZPTEN3fMvDb9YV74+fd96sTlbjP7vmnHXja96y94/e/++SlnxU5ZSG0ZwgvEzId0M8vBb+547gd400F86NAhnn76aeDbH/idfIdeVZ2Y2jjTlIVBLfVn6KYjkmwA3g0sA95fVW9vXNLQOv3VE+GN77nvlD4zN4IcP34cmJ4z/vm7PslFl77wRHsSVq5cycTEBFV1Yurip9/1F9zzttedeO2yyy7jG9/4BitXrgQ4EdJJTjvlMXP2vnLlSqdEpM5QhXCSZcB7gR8GDgGfS7Krqh5uW9nCM1tAz9yhd3K/wfZly1fw7p8Z4y0f+gzHjx8/sb1s9OITH/5tvftveMdPvZxf3fkg97ztdQD85Ns/cqL/3VtffSKcB0N9fHycrXf/Df/zF36QF7/4xSfOvleuXHki0JM850tSZ8L+sssue07/mfVVq1YBp74JAKd9sxh08hvObMcefOMYfCMZPN7gG8/gXw6Db0oz7afrP1v7yW9apzv2fJupbfDfdzG+sc7HiUOG6U/MJK8E/ktV3dxt3wlQVb89W/+xsbHat2/fOf2O8fFx/sNv/+mJs8FztWLVizg+Ncm3vvmN57V/i2Of63GnJp9hZPTiU7anJp8BOLG+YtUV3L311cD0XX4z/6Yz/Wb6vueNP8Avvn8Px589fuK4M20vGBnlPW/8Ad7yh5/hD37xZlatWnUirAF+9/XX85Y//Ay/94ZXPaf/zPqOX/pRAN7wzv914vgzbbfddT+/94ZXnTj2TL/ll76Q4/86yfHjx0+pbbZj33bX/c+pbWZ75ngzfQffEE6uZ2bfM/WfrX3wd5/p2PNtprbBf98WdfRtZpx/dsetz2d8c0rtYQvhnwQ2VNUvdNtvAP59Vb15oM9WYGu3+V3Agefxq74D+Np5ljuMFuu4YPGObbGOCxbv2OY6rq9V1YazdRqq6Qhmf+d4zrtEVW0Htp/XL0n2VdXY+RxjGC3WccHiHdtiHRcs3rFd6HEN29URh4CrB7bXAYcb1SJJvRu2EP4csD7JNUmWA5uAXY1rkqTeDNV0RFVNJXkzcD/Tl6h9oKr29/Crzms6Y4gt1nHB4h3bYh0XLN6xXdBxDdUHc5K01AzbdIQkLSmGsCQ1tKRCOMmGJAeSHExyR+t6zibJB5IcTfLQQNsVSXYnebRbXj7w2p3d2A4kuXmg/fuS/H332u9lCG5tSnJ1kk8leSTJ/iS/3LUv6PElWZFkb5IvdeP6ra59QY9roKZlSb6Y5OPd9mIZ11e7mh5Msq9rm5+xzTwkfLH/MP1B31eA7wSWA18Crm1d11lq/kHge4GHBtr+G3BHt34H8F+79Wu7MY0C13RjXda9thd4JdPXYf9v4EeHYGxrgO/t1lcC/7cbw4IeX1fDZd36RcBngVcs9HENjO9XgD8BPr7I/nv8KvAdJ7XNy9iW0pnwDcDBqvqHqvoWcA+wsXFNZ1RVfw3800nNG4Ed3foO4JaB9nuqarKqHgMOAjckWQOsqqq/ren/Sj40sE8zVXWkqr7QrU8AjwBrWeDjq2kz94df1P0UC3xcAEnWAa8D3j/QvODHdQbzMralFMJrgScGtg91bQvNVVV1BKaDDLiyaz/d+NZ26ye3D40kLwFezvRZ44IfX/cn+4PAUWB3VS2KcQHvAn4dGHzwymIYF0y/UX4yyee7RyPAPI1tqK4T7tlZb4le4E43vqEed5LLgD8D3lpV42eYQlsw46uqZ4Hrk7wQ+FiSl56h+4IYV5IfB45W1eeTvHouu8zSNnTjGnBjVR1OciWwO8mXz9D3go5tKZ0JL5Zbop/q/uyhWx7t2k83vkPd+sntzSW5iOkA/uOq+mjXvGjGV1X/DDwAbGDhj+tG4CeSfJXpqbzXJvkjFv64AKiqw93yKPAxpqcv52VsSymEF8st0buAzd36ZuDegfZNSUaTXAOsB/Z2f0ZNJHlF90ntzw3s00xXy93AI1X1joGXFvT4kqzuzoBJcjHwQ8CXWeDjqqo7q2pdVb2E6f93/rKqfpYFPi6AJJcmWTmzDvwI8BDzNbbWn0rO5w/wY0x/Cv8V4Dda1zOHej8MHAH+lel32S3Ai4A9wKPd8oqB/r/Rje0AA5/KAmPdf1RfAd5Dd6dk47H9ANN/qv0d8GD382MLfXzA9wBf7Mb1EPCfu/YFPa6Txvhqvn11xIIfF9NXTH2p+9k/kw3zNTZvW5akhpbSdIQkDR1DWJIaMoQlqSFDWJIaMoQlqSFDWJIaMoQlqaH/D/51zou5Vl5DAAAAAElFTkSuQmCC\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "s2 = np.random.lognormal(5, 1, 10000)\n",
+ "sns.displot(s2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 51,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 51,
+ "metadata": {},
+ "output_type": "execute_result"
+ },
+ {
+ "data": {
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAWIAAAD4CAYAAADW1uzrAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAPHklEQVR4nO3dX2xUZ3rH8d/D2Oyy9a4KJhsRgzJZeaXVqqratbXask2FELSYoGyl3OxFY19UWikXDg1RmqzW1LY0UtRGIiHuVdRWsdM/e5OsSoiNCm1Xe1M1tbtJYW1iJnhQIXTJGrXExGb95+2Fzzl7PB47NnjmMTPfj2R5zjvnPO/7zMDPZ44PwkIIAgD42eK9AACodQQxADgjiAHAGUEMAM4IYgBwVreenXfu3Bmy2WyZlgIA1WlkZOQXIYQHVnp+XUGczWY1PDx876sCgBpiZldWe55LEwDgjCAGAGcEMQA4I4gBwBlBDADOCGIAcEYQA4AzghgAnBHEAOCMIAYAZwQxADgjiAHAGUEMAM4IYgBwRhADgDOCGACcEcQA4IwgBgBnBDEAOFvX/1m3Ufr6+pTP55eMXbt2TZLU1NS04nHNzc3q7Ows69oAoNJcgjifz+u9C2Oa/8KOZCzz6f9Jkv7nTuklZT69WZG1AUCluQSxJM1/YYemv3Y42d52cVCSloylxc8DQLXhGjEAOCOIAcAZQQwAzghiAHBGEAOAM4IYAJwRxADgjCAGAGcEMQA4I4gBwBlBDADOCGIAcEYQA4AzghgAnBHEAOCMIAYAZwQxADgjiAHAGUEMAM4IYgBwRhADgDOCGACcEcQA4IwgBgBnBDEAOCOIAcAZQQwAzghiAHBGEAOAM4IYAJwRxADgjCAGAGcEMQA4I4gBwBlBDADOCGIAcEYQA4AzghgAnBHEAOCMIAYAZwQxADirSBD39fWpr6+vElOtyWZbD4DaVleJSfL5fCWmWbPNth4AtY1LEwDgjCAGAGcEMQA4I4gBwBlBDADOCGIAcEYQA4AzghgAnBHEAOCMIAYAZwQxADgjiAHAGUEMAM4IYgBwRhADgDOCGACcEcQA4IwgBgBnBDEAOCOIAcAZQQwAzghiAHBGEAOAM4IYAJwRxADgjCAGAGcEMQA4I4gBwBlBDADOCGIAcEYQA4AzghgAnBHEAOCMIAYAZwQxADgjiAHAGUEMAM4IYgBwVue9AA+3bt3SxMSE9u3b572Ue5LJZDQ/P598l6T6+notLCwsGX/iiSf01ltvJdshBGUyGS0sLGjHjh2anJzUs88+q7179+r555/X1atXNTs7q/n5eT355JMaGRnRzMyMrl+/rqeeekqvvPKKXnrpJbW0tEiSJicn1dvbq/b2dh0/fly7du1SXV2d5ubmdP36dfX19UmSOjs7tWfPHr344ouamJjQc889p4ceekg3b95UX1+fmpubl9Tq7u7WyZMnk/Guri7duXMnqbl9+3Z1dXVpfn5eCwsL2rp1q44dO6YTJ04ohKBcLidJ6u3tVXd3txobG5P66e24RiaTUS6XU2NjY9LX8ePHk96L1/j000/r1VdfTWrF4uPiNaTrpecufu3S/W4G6fVKWrb2WlHqfdtoNXlGfOXKFe8lbIg4fOPvkpIATY+/+eabCiFobm5OIYTkuRCCJicnJUknTpxQf3+/8vm8ZmZmkmPfeOMNjY6O6vLly5qentbLL7+shYWF5C+nJPX39+v8+fPq6enR9PS0Ll++rPHx8eSYXC6nXC6n6elpjY+Pa2BgQD09PQoh6Nq1a8k+xbVu3769ZHxsbGxJzXhsfHxc+Xxeo6OjyuVyGh0d1djYmAYGBpJ6AwMDS+qnt+Ma8THpvtK9F68xl8stqVV8XKl6xfuv1O9mkF5vqbXXikr0XnNBPDw8rIWFBe9lbDohBJ06dWpN+0nS1NSURkZGNDk5qTNnziiEoKmpqZLHFAoFFQqFZPv06dPL9i0UChoZGVlWKx4fGhpatv8777xTcq7Y4OBgUu/MmTPK5/PLtovrDg0NaXJyMulrtTUWCoWkVvwDrfi44nrp/Uu9doVCQfl8vuTrWEnptQ0NDS1be60o9b6VQ0UuTcRnPUePHpUk5fN5bfllWFeNLTO3lM9/ktS4W+fPn7+n4/Er3d3d2r9//7p/sKXP4IvrlarV3d2tubm5NdeJzc7OysySfXO5XFI/3i6uOzs7q4GBAYUQNDs7u6Y1zs/Pa2BgQM8884z6+/uXHJeul567eCwtl8vp9ddfX7W3cuvv70/Wlu4n3WstSL8O5ez9M8+Izex7ZjZsZsMff/zxhi+g0jgb3jhTU1M6d+5cyZC823qlak1NTSVn4usVHzc3N6dCoZDUj7eL64YQdPbsWZ07d67knKXWODc3p7Nnz0rSsuPS9dJzF4+lpc/qvaTXFkJY8jrGvdaCUu9bOXzmGXEI4TVJr0lSa2vrXf1taGpqkiSdPHlSknT06FGNXP75umosfP5Lav7Kg0mNu3XkyJEVP0JjfRoaGrR//34NDg5uSBg3NDRoZmZmWa2Ghgbdvn37rsLYzBRCUF1dnXbv3q2rV69qbm4u2b5y5cqSumamgwcPKoSgt99+e9mcpdZYV1engwcPSpIOHDiw5Lh0vfh1ivdPj6Vls9l197nRDhw4kKwt/lQRv45xr7Ug/TqUs/eau0bc09PjvYSq0dvbq46ODm3Zsr4/RplMZsV6pWr19vaqrm75OcNKdWL19fWqr69P9u3q6krqx9vFdevr69Xe3q6Ojo7k2M9aYyaTUXt7uyQtOy5dLz138VhaV1fXqn1VQnptxa9j3GstKPW+lUPNBXFra+u6g6MWmJkef/zxNe0nLZ4ZtrS0qLGxUYcOHZKZqaGhoeQx2Wx2yVnekSNHlu2bzWbV0tKyrFY83tbWtmz/xx57rORcscOHDyf1Dh06pObm5mXbxXXb2trU2NiY9LXaGrPZbFIrvq2p+Ljieun9S7122Wx2U9y+ll5bW1vbsrXXilLvWznU5H3EDz/8sCYmJryXcc828j7iY8eOae/evRodHV3TfcS9vb3JOjo6OlQoFFa8jzg+w4vvI25vb9ejjz665D7ieJ90re7u7iXjly5dSu4j7urq0vbt23Xp0qUV7yOOz17iesX103Xj1yt9xtPR0ZHczpfuI64R30dcfJYUH5deQ6m5V+t3Myheb/Haa0Wp922j2Xquu7W2tobh4eF1TxLf6VB8jXj6a4eTfbZdHJSkJWNp2y4OqmUDrhGXWg8AlJOZjYQQWld6ns/oAOCMIAYAZwQxADgjiAHAGUEMAM4IYgBwRhADgDOCGACcEcQA4IwgBgBnBDEAOCOIAcAZQQwAzghiAHBGEAOAM4IYAJwRxADgjCAGAGcEMQA4I4gBwBlBDADOCGIAcEYQA4AzghgAnBHEAOCMIAYAZwQxADgjiAHAGUEMAM4IYgBwRhADgDOCGACcEcQA4IwgBgBnBDEAOCOIAcAZQQwAzghiAHBWV4lJmpubKzHNmm229QCobRUJ4s7OzkpMs2abbT0AahuXJgDAGUEMAM4IYgBwRhADgDOCGACcEcQA4IwgBgBnBDEAOCOIAcAZQQwAzghiAHBGEAOAM4IYAJwRxADgjCAGAGcEMQA4I4gBwBlBDADOCGIAcEYQA4AzghgAnBHEAOCMIAYAZwQxADgjiAHAGUEMAM4IYgBwRhADgDOCGACcEcQA4IwgBgBnBDEAOCOIAcAZQQwAzghiAHBGEAOAM4IYAJwRxADgjCAGAGcEMQA4q/OaOPPpTW27OJjanpSkJWPF+0sPVmJpAFBRLkHc3Ny8bOzatTlJUlPTSmH7YMnjAOB+5xLEnZ2dHtMCwKbENWIAcEYQA4AzghgAnBHEAOCMIAYAZwQxADgjiAHAGUEMAM4IYgBwRhADgDOCGACcEcQA4IwgBgBnBDEAOCOIAcAZQQwAzghiAHBGEAOAM4IYAJwRxADgzEIIa9/Z7GNJV+5yrp2SfnGXx96varFnib5rTS32vd6eHw4hPLDSk+sK4nthZsMhhNaKTLZJ1GLPEn17r6PSarHvje6ZSxMA4IwgBgBnlQzi1yo412ZRiz1L9F1rarHvDe25YteIAQClcWkCAJwRxADgrOxBbGaHzOwDM8ub2Qvlnq/czOxvzOyGmV1Ije0ws7Nmdin6vj313Pej3j8wsz9IjbeY2fnouVfNzCrdy1qZ2R4z+1czGzOzn5nZ0Wi82vv+vJm9a2bvR333RuNV3bckmVnGzH5qZqej7VrouRCt9z0zG47GKtN3CKFsX5Iykj6U9BVJWyW9L+nr5Zyz3F+Sfk/SNyRdSI39haQXoscvSPrz6PHXo54/J+mR6LXIRM+9K+l3JJmkIUlt3r2t0vMuSd+IHn9R0njUW7X3bZIaosf1kv5d0reqve9ovcck/b2k07XwZzxab0HSzqKxivRd7jPib0rKhxAuhxB+KemHkr5T5jnLKoTwE0k3i4a/I6k/etwv6Q9T4z8MIdwJIUxIykv6ppntkvSlEMK/hcV3biB1zKYTQrgeQvjP6PEnksYkNan6+w4hhKlosz76Cqryvs1st6THJP1Variqe15FRfoudxA3Sfrv1PbVaKzaPBhCuC4thpakL0fjK/XfFD0uHt/0zCwr6be1eHZY9X1HH9Hfk3RD0tkQQi30/YqkP5W0kBqr9p6lxR+y/2RmI2b2vWisIn3X3ePCP0upayO1dL/cSv3fl6+LmTVIelPSn4QQbq1y6atq+g4hzEv6LTP7dUk/MrPfWGX3+75vMzsi6UYIYcTM9q3lkBJj91XPKd8OIXxkZl+WdNbMLq6y74b2Xe4z4quS9qS2d0v6qMxzevh59JFE0fcb0fhK/V+NHhePb1pmVq/FEP67EMJb0XDV9x0LIfyvpB9LOqTq7vvbkh43s4IWLyXuN7O/VXX3LEkKIXwUfb8h6UdavLRakb7LHcT/IemrZvaImW2V9F1Jp8o8p4dTkjqixx2S/jE1/l0z+5yZPSLpq5LejT7ifGJm34p+o9qeOmbTidb415LGQggnUk9Ve98PRGfCMrNtkg5Iuqgq7juE8P0Qwu4QQlaLf1//JYTwR6riniXJzH7NzL4YP5b0+5IuqFJ9V+A3kYe1+Fv2DyX9oBK//SxzP/8g6bqkWS3+9PtjSY2S/lnSpej7jtT+P4h6/0Cp355Kao3e6A8l/aWif+W4Gb8k/a4WP179l6T3oq/DNdD3b0r6adT3BUl/Fo1Xdd+pNe/Tr+6aqOqetXhn1/vR18/irKpU3/wTZwBwxr+sAwBnBDEAOCOIAcAZQQwAzghiAHBGEAOAM4IYAJz9P7PtgOygI3TYAAAAAElFTkSuQmCC\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "sns.boxplot(x=s2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 52,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 52,
+ "metadata": {},
+ "output_type": "execute_result"
+ },
+ {
+ "data": {
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAWAAAAFgCAYAAACFYaNMAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAX7klEQVR4nO3df5Bd5X3f8fcXYWsxiEo4gq5ZuUC19gScAXt2qWMqT2LSWElcQzrBVaZxNR1adQaSmqZNBtw/OumMOmQm4yHTqdLRYGqlsU0Umx+K48HGskkgP2CFzLX5Od4iCzYIJEhV5FRSkPTtH3sk3b17JS2rPfc5d+/7NaO59z73nKvvaHY/Ovc5z4/ITCRJvXdO6QIkaVAZwJJUiAEsSYUYwJJUiAEsSYWcW7qAs7F27dp86KGHSpchSWcS3Rr7+gr49ddfL12CJM1bXwewJPUzA1iSCjGAJakQA1iSCjGAJakQA1iSCjGAJakQA1iSCqk1gCPihxHx/Yh4KiJ2VG0XRcTDEfGD6nFF2/F3RMRkRLwQER+vszZJKq0XV8A/nZnXZOZY9fp2YHtmjgLbq9dExJXAOuAqYC2wKSKW9KA+SSqiRBfEDcCW6vkW4Ma29nsz83Bm7gImgWt7X54k9UbdAZzANyPiyYjYULVdkpl7AKrHi6v2S4GX286dqtpmiIgNEbEjInbs27evxtIlqV51r4Z2XWa+EhEXAw9HxPOnObbbakGzNqzLzM3AZoCxsTE3tJPUt2oN4Mx8pXrcGxH3M92l8FpEDGfmnogYBvZWh08Bq9pOHwFeqbM+DaZDhw4xMTExo218fJyhoaFCFWlQ1RbAEXE+cE5mHqie/yzwX4BtwHrgzurxweqUbcCXIuJzwHuAUeCJuurT4JqYmOC2TQ+wfGQ1APunJrnrFlizZk3hyjRo6rwCvgS4PyKO/z1fysyHImIC2BoRNwMvATcBZOYzEbEVeBY4AtyamUdrrE8DbPnIalaOXlO6DA242gI4M18Eru7S/gZw/SnO2QhsrKsmSWoSZ8JJUiEGsCQVYgBLUiEGsCQV0tfb0ksL4eiRt2i1WrPaHRusuhnAGngHXt3Npl0HGd59cu0nxwarFwxgLWrdZr21Wi2OHZvZ+7Zs+IrTjgvu9jngVbLOjgGsRa1z1hvA1M5HWf6+8bP+HK+SdbYMYC16nbPe9k9NLsjnSGfLURCSVIgBLEmF2AWhRWOuN9ykpjCAtWgs1A03qVcMYC0qC3XDTeoFv5tJUiEGsCQVYgBLUiH2AUtddC7Q42gK1cEAlrroXKDH0RSqgwEsnUL7Aj2OplAd/E4lSYUYwJJUiAEsSYUYwJJUiAEsSYU4CkJaQN1WZHPbIp2KASwtoM4V2dy2SKdjAEvz1G07+1arxYXv+YduXaQ5MYCleeq2nb0z5vR2GMDSWejczt4Zc3o7HAUhSYUYwJJUiAEsSYUYwJJUiDfh1Lc6Jz24aLr6jQGsvtU56cEhYOo3BrD6Wvs29A4BU7/x+5okFWIAS1IhBrAkFWIAS1IhBrAkFWIAS1IhBrAkFWIAS1IhBrAkFeJMOPWFbptduvaD+p0BrL7Que4DuPaD+p8BrL7Rvu4DuPaD+p/f3ySpEANYkgoxgCWpEANYkgqpPYAjYklEfDcivla9vigiHo6IH1SPK9qOvSMiJiPihYj4eN21SVJJvbgC/gzwXNvr24HtmTkKbK9eExFXAuuAq4C1wKaIWNKD+iSpiFoDOCJGgF8A7m5rvgHYUj3fAtzY1n5vZh7OzF3AJHBtnfVJUkl1jwO+C/hNYFlb2yWZuQcgM/dExMVV+6XAX7UdN1W1zRARG4ANAO9973trKFlaOEePvEWr1ZrVPj4+ztDQUIGK1CS1BXBEfALYm5lPRsRPzeWULm05qyFzM7AZYGxsbNb7UpMceHU3m3YdZHj3yd60/VOT3HULrFmzpmBlaoI6r4CvAz4ZET8PDAEXRsQfAK9FxHB19TsM7K2OnwJWtZ0/ArxSY31STywbvmLGDD7puNr6gDPzjswcyczLmL659u3M/BVgG7C+Omw98GD1fBuwLiKWRsTlwCjwRF31SVJpJdaCuBPYGhE3Ay8BNwFk5jMRsRV4FjgC3JqZRwvUJ0k90ZMAzsxHgEeq528A15/iuI3Axl7UJEmlORNOkgoxgCWpEANYkgoxgCWpEANYkgoxgCWpEANYkgoxgCWpEANYkgoxgCWpkBJrQUgDrdsawa4PPJgMYKnHOtcIdn3gwWUASwW4RrDAPmBJKsYAlqRCDGBJKsQAlqRCDGBJKsQAlqRCDGBJKsQAlqRCDGBJKsQAlqRCDGBJKsQAlqRCDGBJKsTV0NRIhw4dYmJi4sTrVqvFsWNeL2hxMYDVSBMTE9y26QGWj6wGYGrnoyx/33jhqqSFZQCrsZaPrD6xZu7+qcmyxUg18DudJBViAEtSIQawJBViAEtSIQawJBViAEtSIQawJBViAEtSIU7EkAo7euQtWq3WrPbx8XGGhoYKVKReMYClwg68uptNuw4yvHvJibb9U5PcdQusWbOmYGWqmwEsNcCy4StOTLvW4LAPWJIKMYAlqRADWJIKMYAlqRADWJIKMYAlqRADWJIKMYAlqRAnYqi4zh2QwV2QNRgMYBXXuQMyuAuyBoMBrEZo3wEZ3AVZg8HveJJUSG0BHBFDEfFERLQi4pmI+K2q/aKIeDgiflA9rmg7546ImIyIFyLi43XVJklNUOcV8GHgY5l5NXANsDYiPgzcDmzPzFFge/WaiLgSWAdcBawFNkXEkm4fLEmLQW0BnNN+VL18R/UngRuALVX7FuDG6vkNwL2ZeTgzdwGTwLV11SdJpdXaBxwRSyLiKWAv8HBmPg5ckpl7AKrHi6vDLwVebjt9qmqTpEWp1gDOzKOZeQ0wAlwbER84zeHR7SNmHRSxISJ2RMSOffv2LVClktR7PRmGlpn7I+IRpvt2X4uI4czcExHDTF8dw/QV76q200aAV7p81mZgM8DY2NisgFazOelCOqm2AI6IlcBbVfieB/wM8NvANmA9cGf1+GB1yjbgSxHxOeA9wCjwRF31qQwnXUgn1XkFPAxsqUYynANszcyvRcRfAlsj4mbgJeAmgMx8JiK2As8CR4BbM/NojfWpECddSNNqC+DM/B7wwS7tbwDXn+KcjcDGumqSpCax402SCjGAJakQA1iSCnE1NKlPdBvCNz4+ztDQUKGKdLYMYKlPdA7h2z81yV23wJo1awpXpvkygKU+0jmET/3NAJYa6OiRt2i1WjPanDG4+MwpgCPiusz88zO1SVoYB17dzaZdBxnefXJFVmcMLj5zvQL+b8CH5tAmaYEsG77CGYOL3GkDOCJ+EvgIsDIifr3trQsBF0uXpLNwpivgdwIXVMcta2t/E/iluoqSpEFw2gDOzD8F/jQivpCZu3tUkyQNhLn2AS+NiM3AZe3nZObH6ihKkgbBXAP4j4D/AdwNuESkJC2AuQbwkcz8vVorkaQBM9dR3X8cEbdExHBEXHT8T62VSdIiN9cr4PXV42+0tSVwxcKWI0mDY04BnJmX112IJA2auU5F/pfd2jPz9xe2HEkaHHPtgmifgD7E9J5uOwEDWJLmaa5dEL/W/joi/h7wv2qpSJIGxHzXtvt/wOhCFiJJg2aufcB/zPSoB5hehOfHga11FSVJg2CufcC/0/b8CLA7M6dqqEeSBsacuiCqRXmeZ3pFtBXA39VZlCQNgjkFcER8CngCuAn4FPB4RLgcpSSdhbl2QfwnYDwz9wJExErgW8BX6ipMkha7uQbwOcfDt/IG8x9BoQFy6NAhJiYmTrx2Y0nppLkG8EMR8Q3gy9Xrfw58vZ6S1K86wxamA/eex15kxarpUYtuLCmddKY94VYDl2Tmb0TEPwP+MRDAXwJf7EF96iMTExPctukBlo+sPtF2PHCPby7pxpLSSWe6Ar4L+CxAZt4H3AcQEWPVe/+0xtrUh5aPrHYnX2mOztQZd1lmfq+zMTN3ML09kSRpns4UwEOnee+8hSxEkgbNmQJ4IiL+TWdjRNwMPFlPSZI0GM7UB3wbcH9E/AtOBu4Y8E7gF2usS5IWvdMGcGa+BnwkIn4a+EDV/CeZ+e3aK5OkRW6u6wF/B/hOzbVI0kBxSpIkFWIAS1IhBrAkFWIAS1IhBrAkFWIAS1IhBrAkFWIAS1IhBrAkFWIAS1Ihc92SSFLDHD3yFq1Wa1b7+Pg4Q0OnW0lWTWEAS33qwKu72bTrIMO7l5xo2z81yV23wJo1awpWprkygKU+tmz4ihlbQKm/2AcsSYUYwJJUiF0QmrdDhw4xMTFx4nWr1eLYMf9Pl+aqtgCOiFXA7wN/HzgGbM7M342Ii4A/ZHpX5R8Cn8rM/1OdcwdwM3AU+HeZ+Y266tPZm5iY4LZND7B8ZDUAUzsfZfn7xgtXJfWPOi9XjgD/ITN/HPgwcGtEXAncDmzPzFFge/Wa6r11wFXAWmBTRCzp+slqjOUjq1k5eg0rR6/hgotHSpcj9ZXaAjgz92Tmzur5AeA54FLgBmBLddgW4Mbq+Q3AvZl5ODN3AZPAtXXVJ0ml9aTDLiIuAz4IPA5ckpl7YDqkgYurwy4FXm47bapq6/ysDRGxIyJ27Nu3r9a6JalOtQdwRFwAfBW4LTPfPN2hXdpyVkPm5swcy8yxlStXLlSZktRztQZwRLyD6fD9YmbeVzW/FhHD1fvDwN6qfQpY1Xb6CPBKnfVJUkm1BXBEBPB54LnM/FzbW9uA9dXz9cCDbe3rImJpRFwOjAJP1FWfJJVW5zjg64BPA9+PiKeqts8CdwJbI+Jm4CXgJoDMfCYitgLPMj2C4tbMPFpjfZJUVG0BnJmP0b1fF+D6U5yzEdhYV02S1CROW5KkQpyKLC0i3dYIdn3g5jKApUWkc41g1wduNgNYWmRcI7h/2AcsSYUYwJJUiAEsSYUYwJJUiAEsSYUYwJJUiAEsSYUYwJJUiAEsSYUYwJJUiFORpUWs2+I84AI9TWEAS4tY5+I84AI9TWIAS4uci/M0l33AklSIASxJhRjAklSIASxJhRjAklSIoyA0y6FDh5iYmJjRdvjwYQCWLl16oq3VanHsmP+HS/NlAGuWiYkJbtv0AMtHVp9om9r5CEuWvZvh0Z9oa3uU5e8bL1GitCgYwOpq+cjqGWNH909Ncu7y4VltkubP74+SVIgBLEmFGMCSVIgBLEmFGMCSVIgBLEmFGMCSVIgBLEmFGMCSVIgBLEmFGMCSVIgBLEmFuBiPpK5LkLp1ff0MYGnAHD3yFq1Wa0Zbq9XinsdeZMWqUcCt63vFAJYGzIFXd7Np10GGdy850XZ8bWe3r+8tA1gaQMuGr3Bt5wbwJpwkFWIAS1IhBrAkFWIAS1Ih3oTTrDGgbjcv9YYBrFnb0LvdvNQbBrCAmdvQOyRJ6g2/Z0pSIQawJBViF8SA6bboijfdpDIM4AHTecMNvOkmlWIAD6D2G27gTTeplNq+d0bEPRGxNyKebmu7KCIejogfVI8r2t67IyImI+KFiPh4XXVJUlPU2fH3BWBtR9vtwPbMHAW2V6+JiCuBdcBV1TmbImIJkrSI1RbAmflnwN90NN8AbKmebwFubGu/NzMPZ+YuYBK4tq7aJKkJen3r+5LM3ANQPV5ctV8KvNx23FTVNktEbIiIHRGxY9++fbUWK0l1asrYo+jSlt0OzMzNmTmWmWMrV66suSxJqk+vA/i1iBgGqB73Vu1TwKq240aAV3pcmyT1VK8DeBuwvnq+HniwrX1dRCyNiMuBUeCJHtcmST1V2zjgiPgy8FPAj0XEFPCfgTuBrRFxM/AScBNAZj4TEVuBZ4EjwK2ZebSu2iSpCWoL4Mz85VO8df0pjt8IbKyrHklnp9s09vHxcYaGhgpV1P+cCSdpTjqnse+fmuSuW2DNmjWFK+tfBrCkOeucxq6z05RhaJI0cAxgSSrELghJsxw98hatVmtGm+tGLzwDWNIsB17dzaZdBxnefXJNLNeNXngGsKSulg1f4brRNfP7hCQVYgBLUiEGsCQVYgBLUiEGsCQVYgBLUiEOQ5M0L90ma4ArpL0dBrCkeek2WcMV0t4eA1jSvHVO1tDbYx+wJBXiFXCf6rY7Aczuf+s8zgVVpOYwgPtU5+4E0L3/rfM4F1SRmsMA7mNz3Z2g/TgXVJGaw++iklSIASxJhRjAklSIASxJhRjAklSIoyAWETdSlPqLAbyIuJGiSut2EeDiPKdmAC8ybqSokjovAlyc5/QMYEkLygV65s7OQUkqxACWpEIMYEkqxACWpEK8CdcnXNdXWnwM4D7hur7qR27ceXoGcB9xXV/1GzfuPD0DWFKtOscFe1V8kgEsqae8Kj7JAJbUc86Wm+ZtdEkqxCvgBuq25bzDzqTFxwBuoG5bzjvsTFp8DOCG6txy3mFn0uJjADeAs9ykwWQAN4Cz3DToBnUnDQO4IZzlpkE2qDtpGMCSGmEQxwYbwDXqNpwMBuOrlbTQuv0+9fvvkgFco27DyQblq5V0Nrr1CbdaLe557EVWrBoFFsfvkgFcs87hZKf6wXLUg3RSt/Uijt+cPv771O136fDhwwAsXbp0RntTr5QN4B473Q+WpJM6+4Q7b053/116hCXL3s3w6E/MOK+pV8oG8AKa63jeM/1gSZqbbr9L5y4f7pubeQMXwAvVkX+q9Rra+6i8spXKa/IY48YFcESsBX4XWALcnZl3LuTnd94Y+5vdz3PzR1tcffXVJ47p1o/U2dYZtjC7j8orW6m8Jo8xblQAR8QS4L8D/wSYAiYiYltmPruQf0/npIdN33r2jP1InW2dYXv8syQ1T3tXxVxu3vXqZl6jAhi4FpjMzBcBIuJe4AZgQQO4PSh/tHeKJcvePa/PObDnRfZdcP7Mzzp48ERb5+u5ti3UMX62n136s0v//d3a9nzvz/ntv3iTFd96+sQxr7/4NEvOW8aK4X/Q9TXA376xhy/81/+4oFfOkZkL9mFnKyJ+CVibmf+6ev1p4B9l5q+2HbMB2FC9fD/wQs8Lne3HgNdLF9FFU+sCa5uPptYF1nYmr2fm2s7Gpl0BR5e2Gf9DZOZmYHNvypmbiNiRmWOl6+jU1LrA2uajqXWBtc1X00b/TwGr2l6PAK8UqkWSatW0AJ4ARiPi8oh4J7AO2Fa4JkmqRaO6IDLzSET8KvANpoeh3ZOZzxQuay4a1SXSpql1gbXNR1PrAmubl0bdhJOkQdK0LghJGhgGsCQVYgDPU0SsiojvRMRzEfFMRHymdE3HRcRQRDwREa2qtt8qXVO7iFgSEd+NiK+VrqVdRPwwIr4fEU9FxI7S9bSLiOUR8ZWIeL76mfvJ0jUBRMT7q3+v43/ejIjbStcFEBH/vvr5fzoivhwR5Rd/6GAf8DxFxDAwnJk7I2IZ8CRw40JPm56PiAjg/Mz8UUS8A3gM+Exm/lXh0gCIiF8HxoALM/MTpes5LiJ+CIxlZulB+7NExBbg0cy8uxoh9K7M3F+4rBmqpQT+munJU7sL13Ip0z/3V2bmwYjYCnw9M79Qsq5OXgHPU2buycyd1fMDwHPApWWrmpbTflS9fEf1pxH/00bECPALwN2la+kXEXEh8FHg8wCZ+XdNC9/K9cD/Lh2+bc4FzouIc4F30cA5BQbwAoiIy4APAo8XLuWE6mv+U8Be4OHMbEptdwG/CRwrXEc3CXwzIp6sprw3xRXAPuB/Vl03d0fE+Wc6qYB1wJdLFwGQmX8N/A7wErAH+L+Z+c2yVc1mAJ+liLgA+CpwW2a+Wbqe4zLzaGZew/Rswmsj4gOFSyIiPgHszcwnS9dyCtdl5oeAnwNujYiPli6oci7wIeD3MvODwN8Ct5ctaaaqW+STwB+VrgUgIlYwvZDX5cB7gPMj4lfKVjWbAXwWqv7VrwJfzMz7StfTTfVV9RFg1kIgBVwHfLLqa70X+FhE/EHZkk7KzFeqx73A/UyvztcEU8BU27eYrzAdyE3yc8DOzHytdCGVnwF2Zea+zHwLuA/4SOGaZjGA56m60fV54LnM/FzpetpFxMqIWF49P4/pH8bnixYFZOYdmTmSmZcx/XX125nZiKuSiDi/uplK9fX+Z4GnT39Wb2Tmq8DLEfH+qul6FniJ1gXwyzSk+6HyEvDhiHhX9bt6PdP3aRqlUVOR+8x1wKeB71d9rQCfzcyvlyvphGFgS3VX+hxga2Y2ashXA10C3D/9u8q5wJcy86GyJc3wa8AXq6/6LwL/qnA9J0TEu5jeROHflq7luMx8PCK+AuwEjgDfpYFTkh2GJkmF2AUhSYUYwJJUiAEsSYUYwJJUiAEsSYUYwJJUiAEsSYX8f+fVXVuUDryBAAAAAElFTkSuQmCC\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "# Why \"lognormal\"?\n",
+ "\n",
+ "sns.displot(np.log(s2))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### Box plots\n",
+ "\n",
+ " "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Outliers, missing values\n",
+ "\n",
+ "An *outlier* is an observation far from the center of mass of the distribution. It might be an error or a genuine observation: this distinction requires domain knowledge. Outliers infuence the outcomes of several statistics and machine learning methods: it is important to decide how to deal with them.\n",
+ "\n",
+ "A *missing value* is an observation without a value. There can be many reasons for a missing value: the value might not exist (hence its absence is informative and it should be left empty) or might not be known (hence the value is existing but missing in the dataset and it should be marked as NA).\n",
+ "\n",
+ "*One way to think about the difference is with this Zen-like koan: An explicit missing value is the presence of an absence; an implicit missing value is the absence of a presence.*"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Summary statistics\n",
+ "A statistic is a function of a collection of observations, or otherwise stated a measure over a distribution. \n",
+ "\n",
+ "A statistic is said to be *robust* if not sensitive to outliers.\n",
+ "\n",
+ "* Not robust: min, max, mean, standard deviation.\n",
+ "* Robust: mode, median, other quartiles.\n",
+ "\n",
+ "A closer look at the mean:\n",
+ "\n",
+ "$\\bar{x} = \\frac{1}{n} \\sum_{i}x_i$\n",
+ "\n",
+ "And variance (the standard deviation is the square root of the variance):\n",
+ "\n",
+ "$Var(x) = \\frac{1}{n} \\sum_{i}(x_i - \\bar{x})^2$\n",
+ "\n",
+ "The mean, the median, etc. are measures of location (e.g., the typical value); the variance is a measure of dispersion."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ " "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 53,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "4.993993145847454\n",
+ "240.65275836549188\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Not robust: min, max, mean, mode, standard deviation\n",
+ "\n",
+ "print(np.mean(s1)) # should be 5\n",
+ "print(np.mean(s2))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 57,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "4.993993145847454"
+ ]
+ },
+ "execution_count": 57,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "np.mean(s1)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 60,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "4.990826061241218\n",
+ "147.72833254812608\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Robust: median, other quartiles\n",
+ "\n",
+ "print(np.quantile(s1, 0.5)) # should coincide with mean and mode\n",
+ "print(np.quantile(s2, 0.5))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### Questions\n",
+ "\n",
+ "* Calculate the min, max, mode and sd. *hint: explore the numpy documentation!*\n",
+ "* Calculate the 90% quantile values.\n",
+ "* Consider our normally distributed data in s1. Add an outlier (e.g., value 100). What happens to the mean and mode? Write down your answer and then check."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 32,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " annual_salary \n",
+ " a_age \n",
+ " length \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " count \n",
+ " 7870.000000 \n",
+ " 9303.000000 \n",
+ " 9645.000000 \n",
+ " \n",
+ " \n",
+ " mean \n",
+ " 5.916921 \n",
+ " 14.266688 \n",
+ " 5.005694 \n",
+ " \n",
+ " \n",
+ " std \n",
+ " 6.985214 \n",
+ " 2.902770 \n",
+ " 1.462343 \n",
+ " \n",
+ " \n",
+ " min \n",
+ " 0.166667 \n",
+ " 1.000000 \n",
+ " 0.083333 \n",
+ " \n",
+ " \n",
+ " 25% \n",
+ " 3.000000 \n",
+ " 12.000000 \n",
+ " 4.000000 \n",
+ " \n",
+ " \n",
+ " 50% \n",
+ " 4.000000 \n",
+ " 14.000000 \n",
+ " 5.000000 \n",
+ " \n",
+ " \n",
+ " 75% \n",
+ " 6.000000 \n",
+ " 16.000000 \n",
+ " 6.000000 \n",
+ " \n",
+ " \n",
+ " max \n",
+ " 180.000000 \n",
+ " 50.000000 \n",
+ " 15.000000 \n",
+ " \n",
+ " \n",
+ " \n",
+ " "
+ ],
+ "text/plain": [
+ " annual_salary a_age length\n",
+ "count 7870.000000 9303.000000 9645.000000\n",
+ "mean 5.916921 14.266688 5.005694\n",
+ "std 6.985214 2.902770 1.462343\n",
+ "min 0.166667 1.000000 0.083333\n",
+ "25% 3.000000 12.000000 4.000000\n",
+ "50% 4.000000 14.000000 5.000000\n",
+ "75% 6.000000 16.000000 6.000000\n",
+ "max 180.000000 50.000000 15.000000"
+ ]
+ },
+ "execution_count": 32,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Let's explore our dataset\n",
+ "df_contracts[[\"annual_salary\",\"a_age\",\"length\"]].describe()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Relating two variables\n",
+ "\n",
+ "### Covariance\n",
+ "\n",
+ "Measure of association, specifically of the joint linear variability of two variables:\n",
+ "\n",
+ " \n",
+ "\n",
+ "Its normalized version is called the (Pearson's) correlation coefficient:\n",
+ "\n",
+ " \n",
+ "\n",
+ "Correlation is helpful to spot possible relations, but is of tricky interpretation and is not exhaustive:\n",
+ "\n",
+ " \n",
+ "\n",
+ "See: https://en.wikipedia.org/wiki/Covariance and https://en.wikipedia.org/wiki/Pearson_correlation_coefficient.\n",
+ "\n",
+ "*Note: correlation is not causation!*"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 33,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " annual_salary \n",
+ " a_age \n",
+ " length \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " annual_salary \n",
+ " 1.000000 \n",
+ " 0.205404 \n",
+ " -0.361611 \n",
+ " \n",
+ " \n",
+ " a_age \n",
+ " 0.205404 \n",
+ " 1.000000 \n",
+ " -0.430062 \n",
+ " \n",
+ " \n",
+ " length \n",
+ " -0.361611 \n",
+ " -0.430062 \n",
+ " 1.000000 \n",
+ " \n",
+ " \n",
+ " \n",
+ " "
+ ],
+ "text/plain": [
+ " annual_salary a_age length\n",
+ "annual_salary 1.000000 0.205404 -0.361611\n",
+ "a_age 0.205404 1.000000 -0.430062\n",
+ "length -0.361611 -0.430062 1.000000"
+ ]
+ },
+ "execution_count": 33,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_contracts[[\"annual_salary\",\"a_age\",\"length\"]].corr()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 34,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 34,
+ "metadata": {},
+ "output_type": "execute_result"
+ },
+ {
+ "data": {
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYYAAAEGCAYAAABhMDI9AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAABAjElEQVR4nO3deXycZbnw8d81k0wme9M0TdKWNk2bLqQbECoqm61oxdKyCaKCAr7Ie0A44oIbIEU9ighHhFdEdhAERWU5yBGBI+gBIQW6UaClG22TNE3b7DOTmbnfP2bpPJln2sxkpjNpru/nk0+SJzP3XJk2z/U893LdYoxBKaWUinBkOwCllFK5RRODUkopC00MSimlLDQxKKWUstDEoJRSyiIv2wEM17hx40xdXV22w1BKqRFl5cqVu40xVXY/G/GJoa6ujubm5myHoZRSI4qIbE30M+1KUkopZaGJQSmllIUmBqWUUhaaGJRSSlloYlBKKWUx4mclDUcwaNjS0Utbl4fqMjd1lcU4HJLtsJRSKqtGbWIIBg3PrmvlqsfewjMQxJ3v4OZzFrCksUaTg1JqVBu1XUlbOnqjSQHAMxDkqsfeYktHb5YjU0qp7Bq1iaGtyxNNChGegSC7uj1ZikgppXLDqE0M1WVu3PnWX9+d72B8qTtLESmlVG4YtYmhrrKYm89ZEE0OkTGGusriLEemlFLZldHBZxG5B1gK7DLGzAkfexSYGX7IGGCfMWaBiNQB64F3wz971RhzaaZicziEJY01zLriBHZ1exhfqrOSlFIKMj8r6T7gNuCByAFjzLmRr0Xk50BnzOPfN8YsyHBMUQ6HUF9VQn1VyaF6SaWUynkZTQzGmJfCdwJxRESAc4BFmYxBKaVUcrI5xnAC0GaM2RBzbKqIvCkifxeRExI9UUQuEZFmEWlub2/PfKRKKTWKZDMxnAc8EvN9CzDZGHMUcBXwsIiU2T3RGHOnMabJGNNUVWW7z4RSSqkUZSUxiEgecCbwaOSYMcZrjOkIf70SeB+YkY34lFJqNMvWHcPHgXeMMdsjB0SkSkSc4a/rgQZgU5biU0qpUSujiUFEHgFeAWaKyHYRuTj8o89i7UYCOBFYLSKrgD8Alxpj9mQyPqWUUvEyPSvpvATHv2Rz7HHg8UzGo5RS6uBG7cpnpZRS9jQxKKWUstDEoJRSykITg1JKKQtNDEoppSw0MSillLLQxKCUUspCE4NSSikLTQxKKaUsNDEopZSy0MSglFLKQhODUkopC00MSimlLDQxKKWUstDEoJRSykITg1JKKQtNDEoppSw0MSillLLI9J7P94jILhFZG3PsByKyQ0TeCn+cGvOz74jIRhF5V0Q+mcnYlFJK2cv0HcN9wBKb47cYYxaEP54BEJEjgc8CjeHn/D8RcWY4PqWUUoNkNDEYY14C9gzx4cuB3xljvMaYzcBGYGHGglNKKWUrW2MMl4vI6nBXU0X42ETgg5jHbA8fiyMil4hIs4g0t7e3ZzpWpZQaVbKRGH4FTAMWAC3Az8PHxeaxxq4BY8ydxpgmY0xTVVVVRoJUSqnR6pAnBmNMmzEmYIwJAr9hf3fRduCImIdOAnYe6viUUmq0O+SJQURqY749A4jMWHoS+KyIFIjIVKABeO1Qx6eUUqNdXiYbF5FHgJOBcSKyHbgOOFlEFhDqJtoCfAXAGLNORB4D3gb8wGXGmEAm41NKKRVPjLHtxh8xmpqaTHNzc7bDUEqpEUVEVhpjmux+piuflVJKWWhiUEopZaGJQSmllIUmBqWUUhaaGJRSSlloYlBKKWWhiUEppZSFJgallFIWmhiUUkpZaGJQSilloYlBKaWUhSYGpZRSFpoYlFJKWWhiUEopZaGJQSmllIUmBqWUUhaaGJRSSlloYlBKKWWhiUEppZRFRhODiNwjIrtEZG3MsZ+JyDsislpE/iQiY8LH60SkX0TeCn/ckcnYlFJK2cv0HcN9wJJBx54D5hhj5gHvAd+J+dn7xpgF4Y9LMxybUkopGxlNDMaYl4A9g4791RjjD3/7KjApkzEopZRKTrbHGC4C/hLz/VQReVNE/i4iJyR6kohcIiLNItLc3t6e+SiVUmoUyVpiEJHvAX7gt+FDLcBkY8xRwFXAwyJSZvdcY8ydxpgmY0xTVVXVoQlYKaVGiawkBhH5IrAU+LwxxgAYY7zGmI7w1yuB94EZ2YhPKaVGs0OeGERkCXA1sMwY0xdzvEpEnOGv64EGYNOhjk8ppUa7vEw2LiKPACcD40RkO3AdoVlIBcBzIgLwangG0onAChHxAwHgUmPMHtuGlVJKZUxGE4Mx5jybw3cneOzjwOOZjEcppdTBDbkrSUTmZDIQpZRSuSGZMYY7ROQ1Efm3yGplpZRSh58hJwZjzPHA54EjgGYReVhETslYZEoppbIiqVlJxpgNwPcJzSo6Cbg1XPfozEwEp5RS6tBLZoxhnojcAqwHFgGnGWNmh7++JUPxKaWUOsSSmZV0G/Ab4LvGmP7IQWPMThH5ftojU0oplRVDSgzhhWcfGGMetPt5ouNKKaVGniF1JRljAkCliLgyHI9SSqksS6YraSvwTxF5EuiNHDTG3Jz2qJRSSmVNMolhZ/jDAZRmJhyllFLZNuTEYIy5PpOBKKWUyg1DTgwiUgV8C2gE3JHjxphFGYhLKaVUliSzwO23wDvAVOB6YAvwegZiUkoplUXJJIZKY8zdwIAx5u/GmIuA4zIUl1JKqSxJZvB5IPy5RUQ+TWggelL6Q1JKKZVNySSGH4pIOfB14JdAGfC1jESllFIqa5KZlfR0+MtO4GOZCUcppVS2HTQxiMgvAZPo58aYK9IakVJKqawayh1Dc6qNi8g9wFJglzFmTvjYWOBRoI7QzKZzjDF7wz/7DnAxoT2frzDG/Heqr61GlmDQsKWjl7YuD9Vlbuoqi3E4JNthKTUqHTQxGGPuH0b79xGqyvpAzLFvA88bY34iIt8Of3+1iBwJfJbQOokJwN9EZEa4TpM6jAWDhmfXtXLVY2/hGQjizndw8zkLWNJYo8lBqSxIZj+GKhG5SUSeEZEXIh8Heo4x5iVgz6DDy4FIsrkfOD3m+O+MMV5jzGZgI7BwqPGpkWtLR280KQB4BoJc9dhbbOnoPcgzlVKZkOwCt/UMf4FbtTGmBSD8eXz4+ETgg5jHbQ8fiyMil4hIs4g0t7e3pxCCyiVtXZ5oUojwDATZ1e3JUkRKjW65tMDNrs/AdtDbGHOnMabJGNNUVVWVxhBUNlSXuXHnW/8ruvMdjC91J3iGUiqTkkkMlgVuInIUqS1waxORWoDw513h49uBI2IeN4nQIjp1mKurLObmcxZEk0NkjKGusjjLkSk1OmVjgduTwBeBn4Q/PxFz/GERuZnQ4HMD8FoK7asRxuEQljTWMOuKE9jV7WF8qc5KUiqbMrrATUQeAU4GxonIduA6QgnhMRG5GNgGfCbc/joReQx4G/ADl+mMpNHD4RDqq0qoryrJdihKjXrJlN2+Efgh0A88C8wH/t0Y81Ci5xhjzkvwo8UJHv8j4EdDjUkppVT6JTPG8AljTBehBWvbgRnANzMSlVJKqaxJJjHkhz+fCjxijBm8PkEppdRhIJnB56dE5B1CXUn/Ft7RbdRPNM9EKYd0t6nlJpRSyUhm8PnbIvJToMsYExCRPkKrlQEQkVOMMc9lIshclYlSDuluU8tNKKWSlUxXEsaYvZGZQsaYXmNMa8yPf5rWyEaATJRySHebWm5CKZWspBLDQYy6y89MlHJId5tabkIplax0JoaEezYcrjJRyiHdbWq5CaVUstKZGEadTJRySHebWm5CKZUsMSY9F/oi8kdjzJlpaSwJTU1Nprk55b2Ehi0y4yedpRzS3WYmYlRKjWwistIY02T7s4MlBhE54MneGPPHYcQ2bNlODEopNRIdKDEMZbrqaQf4mQGymhiUUkql11C29rzwUASilFIqNySz8hkR+TShPZmjU1qMMSvSHZRSSqnsSaa66h1AEaGS23cBZ6P7JWSElsRQSmVTMncMHzHGzBOR1caY60Xk5+j4QtppSQylVLYls46hP/y5T0QmENrqc2r6QxrdtCSGUirbkkkMT4vIGOBnwBvAFuB3GYhpVNOSGEqpbEumuuoN4S8fF5GnAbcxpjMzYY1ekRIWsSfzdJTESFd7SqnD35DvGETkgsgHcC6wPPx10kRkpoi8FfPRJSL/LiI/EJEdMcdPTaX9kUxLYiilsm3IJTFE5Jcx37oJ7dv8hjHm7GEFIOIEdgAfAi4EeowxNw31+Yfjyme/P8i6lk5aOj3UlrtprC0nLy/1slZaEkMpNdhwVz4DYIz56qBGy4EHhxkbhBLM+8aYrSJ6sgoGDX9d35bWWUQOh1BfVUJ9VUmao1VKHY6GU121D2hIQwyfBR6J+f5yEVktIveISEUa2h9RdBaRUirbkhljeEpEngx/PA28CzwxnBcXERewDPh9+NCvgGnAAqAF+HmC510iIs0i0tze3j6cEHKOziJKj2DQsKm9h1fe382m9h6CwVG3XYhSKUtmgVtsv78f2GqM2T7M1/8UoXGKNoDIZwAR+Q3wtN2TjDF3AndCaIxhmDHkFJ1FNHy6qE+p4UlmjOHvGXj984jpRhKRWmNMS/jbM4C1GXjNrBhqWYq6ymJu+9xRrN7eSdCAU2DupPKcmkWUiRIb6WwzUXfcrCtO0HEWpYYgmVpJZwI/BcYT2t9ZAGOMKUvlhUWkCDgF+ErM4RtFZAGhct5bBv1sxEr2CtbnN9z50ibLY3NFJq7G093mgbrjNDEodXDJDD7fCCwzxpQbY8qMMaWpJgUAY0yfMaYydpGcMeZ8Y8xcY8w8Y8yymLuHES2ZAeVcH3zORHzpblP3uVZqeJJJDG3GmPUZi2SEGsogZzIDyrk++JyJ+NLdpi7qU2p4khl8bhaRR4E/A97IwWxv7ZlNQ+0CSWZAOdcHnzMRX7rbdDiEJY01zLriBF3Up1QKkln5fK/NYWOMuSi9ISUnmyufN7X3cOqtL8ed0J4ZNMiZTB96rs+oCQYNL7zbFjc4vmhm9bDGGF7euIvu/gC9Xj/F7jxK3U5OmD4+J35npQ5H6Vr5rFt8DjLUQc5krmBHwtVuugfHg0FDe7eP7/95bbTNH54+h2DQ5NTvrdRokcyspCrg/wB1sc/L9h1DNiXTBZJMWYpcLmGRiamg61o6o0kh0ub3/7yWhvElzD9i1C1+Vyrrkhl8fgIoB/4G/FfMx6g1Ggc5MzH43NJp32ZrZ24MuCs12iQz+FxkjLk6Y5GMQCOh2yfdMjH4XFteaNtmTXluDLgrNdoku4PbqNsf4WAi3T7H1Y+jvqrksE4KkJm7pMbaMn54+hxLmz88fQ6NteVpiVkplZxkZiV1A8WEpqoOMMyVz+lyOO7H4PMFWL2zk9YuD7VlbuZOKMflcmY7rCiPx8+alk5au7zUlBUwt7YctzuZm894/f0DrGntoq3LS3VZAXNryigszE+5vUyU7VDqcJKuWUmlIjKWUKltvcfPEJ8vwJ9X7+TaJ/bP0FmxfA6nz5uQE8nB5wvw5NqWtMbn8wV4al1r2trM9Sm/SuW6ZMpufxn4O/As8IPw52szE9botXpnZ/QECaFB2GufWMvqnbmxvXYm4kt3m7leVkSpXJfMGMOVwLGEym1/DDgK2J2RqEax1gSzftq6cmOGTibiS3ebuV5WRKlcl0xi8BhjPAAiUmCMeQeYmZmwRq/aBAXgqstyo/cuE/HVltu3WZNim1pET6nhSSYxbBeRMYRqJT0nIk8AOzMR1Gg2d0I5K5ZbZ+isWD6HeRNyY4ZOJuKrLHZx3WmNljavO62RyhJXSu2NxvUlSqXTkGclWZ4kchKhxW7PGmN8aY8qCdmelZSJ2S+RWUmRNufl2KykdMf3yvu7+Y9n1vPlE6fR7/NT6Mrjrpfe57ufns1x9eNSajPy7zJa1pcolay0zEqKlaHd3LIu2ZN8pma/uFxOmurGpvz8TMvLczC22MVAIMjYYhd5ecnceMarLnPz3q4ernjkzeix4Xb95HJZEaVy3fD+og8jkZP8qbe+zHm/+Ren3voyz65rPeAm8pt3289+2bz78J39ksr7dDDa9aNUbhneqqTDSCrF4bbu6aWiyMWZR09CwjcIj6/czrY9vUwbf3heqWaiiJ7DIXxidjWPXnIcLZ0eassLaawt064fpbJEE0NYKvsEl7vzueDDU/jF8xuiXUlXLm6gzJ36it1cl4n9lINBw1/Xt+mCNKVyRNa6kkRki4isEZG3RKQ5fGysiDwnIhvCnw9ZzeVUpjjmOyWaFCB0gvzF8xvIdw7vZOb3B1n1wV6eXdvCqg/24fcHD/6kA/B4/Ly+uYOnVu3k9c0deDz+lNvKxFTQTCxIG8qWq0ope9keY/iYMWZBzMj4t4HnjTENwPPh7w+JVPq5e7wB26vnXl8g5Tj8/iB/XrWDc+98lUsfeoNz73yFP6/akXJy8Hj8PLmmhfPveY2vPvIm59/zGk+uaUk5OUyuKLIteDe5oiil9gBaE5TdTnWBWybGQZQaTXKtK2k5cHL46/uB/wEOSanvVEpoFxfk2ZaLLhrG1M10b1qzpqWTa58cVG7iybVMHVfEsVMrk25v294+fvnCBi4+vh4RMAZ++cIGjp5ckXJXUkGew/Z9zHemdt2SiXEQpUaTbN4xGOCvIrJSRC4JH6s2xrQAhD+Pt3uiiFwiIs0i0tze3p62gJItoe0LBLhiUYPl6vmKRQ0MBFLv+kn3pjWtXd4EV+PelNpr6/KwtaOf21/cyG0vbOT2FzeytaN/WOUmOj0+2/ex25PaEhktiaHU8GTzjuGjxpidIjKe0Erqd4b6RGPMncCdEFrglqkAD6ayuIBHm7dZrp4fbd7Gkjk1KbeZ7k1rasoKbNurLitIqb1MbNRTWezm0ea3497HWz97VM7EqNRokrU7BmPMzvDnXcCfgIVAm4jUAoQ/78pWfENRV1nM1Utmc/c/NnHbCxu5+x+buHrJ7JzatGZubTkrlg0qYbFsDnNTbC9TG/V8dVGD5X386qKGlH9nXReh1PCkVBJj2C8qUgw4jDHd4a+fA1YAi4EOY8xPROTbwFhjzLcO1FamS2IcaDV0MGh44d02Vm/vJGjAITBvUjmLZlYPa5ql3x9kXUsnrZ0easrdNNaWD2t1caY2wUlnuYl0b06kJTGUOrADlcTIVmKoJ3SXAKHurIeNMT8SkUrgMWAysA34jDFmz4HaymRiOFjJi03tPZx668txXRbP5NAg50jYtGYkxKjU4eZAiSErXUnGmE3GmPnhj0ZjzI/CxzuMMYuNMQ3hzwdMCpl2sPn1iQY5c2XvBBgZm9aMhBiVGk2yvY4hpx1sdkuRK892sddwpqum20iYoTMSYlRqNNHEcAAHW+Wbiemq6TYSNq0ZCTEqNZrk2gK3nBEMGhwCPz5jLt/90xpL33dkdktlcQFvfdDBr88/hr29A4wtzuehVzfbTleNDCjHFolLNKCczsHiuspifnneUazZERogdwrMmVg+rBk6vf1e1rX2RONrrCmhuDC16a+RGB+4qIlAUGjv9lJVWoDTYYYVYyb2yVBqtNDEYCN2MLSiyMUlJ9Yzo7qU2TVlTB23/wQzqbyQjx85ga88uDKaOFYsn8Ok8kJLe5EyF5EVzZEpqKfPnxiXHPr7B3hqbWt0tXJkeulpc2pSSg4+X4B9fQPc+dKmaHs3LJ+DzxfA7U7+n7+338t/rd0VF9+n54xPOTn0er1s2e2Ja3N2jZfSwuTvGnQwW6nh0a4kG7GDoS2dHm59fiPf+P0qRLCcWNa3dXH7i6HyEJcvms6XT6jn9hc3sL6ty9JeojIX61o64157TWuXbQmLNa1dcY8dijUtnVzzhLW9a55Yyxqb1x6Kda09tvGta+1JqT2A9a29tm2ub01t8FkHs5UaHr1jsDHU0tIdvV7ObZrMrS/sL7t9xaIG9vRay00cqMzF/CMGv3Z6S1ikvyRGetvLRJuZKA2u1Giidww2hjoYWuZ2RZMChE4+t76wgVK3dRP7SJmLwe3ZlbmoDpewGPzYVEtY1KS5vXTHl4k2dTBbqeEZlYnhYLX6E5VUmFxRZHmebyBIRZGLyz42ncsXhT4qilxxs5KSKXNRkOfg+mWNlsfGfp+sdJfEaKwpsW2vsSb1K/GKIicrBv3OK5Y1MrY4tWm/WhJDqeEZdV1JQxmYtCvBPbmiKG6Xsfu+dKztDm61g+4E8vIcnD5/Ig3jSw5a5qLUnc/KLbu550vH0tHjpbKkgD+/sY1j68am9Pu6XE7qqtzc+6Vjae/xUlVSgNNpUi43UVxYwKfmVFE3bmF0VtLsmuJhzUpySB5dff08cOFC2ro9VJe6Wf3BboTUfmcAV55wyYn10VIlrjwddFZqqEZdYhhqrf5ICe7IsU3tPXHPe/ODfbY7uJ0yuzrudfPyHMw/oiJuTGGwSeWFNE2t4qL7Xj/gTKeh2ranl5VbOuOS1/iSQurGJX+V7/cH+e+324c0w2qoHAIDxskF975miTHVCURbOnq5/OE3c7pUiVK5bNR1JaW6ytbueb0++x3cPtjbl3J869u6uHbQLKJrn1gbN9NpqFq7vLbJK9WB3WRmWA3V5o5eHnhla3R218XH1/PAK1tTnkU0EkqVKJXLRl1iSHVg0u55TiFBSYzUb8TSvVFPV/+AbXud/QM5ER9AsSvP0tUj4a6fVN/HkVCqRKlcNuoSQ6oDk3bPm39EOdedZh00ve60RmrKU+9vT2YG01CUuJ227RUXpHaSTHd8ADXlBVx60vTofgx3vbyJS0+anvL7OBJKlSiVy0bdGEMqezsnel4waPjda1v59fnHsK93gDHF+fz21c0sTHGgGEIzmO6/6BgwzujrIIGUN62pKMrnlnPmEwhCr9dPsTsPp8DY4tRKbDTWlnHTZ+bzXlt3tMRGQ3VpyvEB+APw4jstcaVFUn0fK4sL2NPTx30XLqQ9/B4+t24HY4tT31kPkitrotRINuoSA8QPLKf6vDe37eFjM2stJTGuW9pIV4p7FQP4Bvxs3e2NKw8xt8ZPXp7r4A0M4s5z0tnv5wdPrYu294PTGnHnpXbHEAwa+n2BuBIbg6f8JqPb42XxbOv7eP2yRro9XiD5weLakgJm1FTwpZjB7BXL5lBbkvqdXDJlTZQa6fR/9DD4A4bH39jGjWfP56dnzuVnZ8/n8Te24Q+kfpJc29ptWx5ibWt3Su3t6vZFk0KkvR88tY5d3aklr9U77UtsrN6Z+uCzLwDXPWmN8bon1+ELpNbe2gRlRdamWFYEMjPorlSuGpV3DOnS5/NzTtNkvvWHVTFXpo30+fwpt5nu8hDtPfbt7e5JtcRG+mf8JPqdd+VIGRBIrqyJUiOd3jEMQ0lBPtcOutK99sl1lBTE99/7/UFWfbCXZ9e2sOqDffj99gOh6S4PMa7EZdteZUny3VIAtQlmdVWXpT74nOh3Hp8jZUAgM4PuSuWqrCQGETlCRF4UkfUisk5Ergwf/4GI7BCRt8Ifp2YjvqFKfDVu7aaJ9E+fe+erXPrQG5x75yv8edUO2+QwvtRlWxJjfGlqJ/LqkgLbmVPVKfa3z51Qzorlg0piLJ/DvAmpDz47xHDd0kExLm3EIal1yaW7DAgkV9ZEqZEuW11JfuDrxpg3RKQUWCkiz4V/dosx5qYsxZWUYldoKujgFbaFLmu+TdQ/3TC+hPlHVFge29Lp5Z2d+ywzap5ds4MplUXUjStNOsZOjx+nGG46ez69Pj/Frjz6fAN0elLr7nK5nJzaWEVd5f6SGEfWFKdcYgOgosjNrs5d3H/hQnZ1hzbWeWVDGwunpjYrye3Osy3bkcr+ExEOh1BV6rK8j6WFTt3fQR2WspIYjDEtQEv4624RWQ9MzEYsYN3ta3ypG6cj1KecaOevyOO9/iC3f+5oVjy9jq0d/dGr8bHFLja190R3D0umf7qmtIBZE8ZYZtRcvyz1K/wd+/p56NVtfPnEaWDAAA+9uo2vnDSNBZMrDvr8wfr7B3hmbXvaNhICyHMGqK0o5YuDZhHlOVMbfe7vH+AvaY5xS0cvX3nwDS2zoUaFrA8+i0gdcBTwL+CjwOUicgHQTOiuYm8mX9+uqN6Vixt44JWt7O3zxRXYs3v8NUuPpNszQLcnwB1/38hNZ8/n7DteoaLIxWeaJnFkbRlXLp7OY83baQmvEHbnO+KK7QF09Pr4fXNoplO/109RQR73/+8mpo0rZmoKv19tuZtPza21DJDbFfobqkQbCdWNK2Lh1MqU2mzr8tu2+cBFC5mSQpOZiVH3eFCjR1YHn0WkBHgc+HdjTBfwK2AasIDQHcXPEzzvEhFpFpHm9vb2YcVgV1TvF89v4MyjJ9nu/DX48RVFLtq6PEwcU8SsmlLK3fm0dnmoKHJx/nFTuPOlTVz60Bv8+qVNXPDhKdSWu6Mn51J3/NVr74Cfs44OzXS6+o9r+OYfVnHW0ZPpG0it68frD9jWSvImGPw+mGRnTR2sxDnArkSzkrpzZzMh3eNBjSZZSwwikk8oKfzWGPNHAGNMmzEmYIwJAr8BFto91xhzpzGmyRjTVFVVNaw4El0Jiuz/OrbAXuzja8vd0ZP/Vx95k2/+YRXnfWgKkyoKOfPoSXGb+Pzi+Q1899TZ0SJxbTaF+9x5eVz/tHWm0/VPr6MgL7Wbu85+f1prJSWcNVUa39UVubs69daXOe83/+LUW1/m2XWtccmhsjTBzKniFAfck4hxqHSPBzWaZKUrSUQEuBtYb4y5OeZ4bXj8AeAMYG2mY4lcCQ7uOzZm/9exV4Wxjz/z6Ek82ryNi4+vjyaSO/6+kRvPmocItifkd9u6uf3FjbjzHVSVxF9t7u7xUlHk4syjJ0XbfHzl9pTXHVSVFNj+fuNSHLOYUFHAr75wFE5xsCdcviJggkwYG9/elo5efvrsesv789Nn1zOrptTS/VKYJ1x3WiPXx6zOvu60RgptNieKHQ9KNAaUTIxD5XAIixqqePCihbR2eakpK2BubbkOPqvDUrbGGD4KnA+sEZG3wse+C5wnIgsIjZFuAb6S6UAiV4J2Ywx2V4WTK4pYsXwO1z6xllK303bP5y6PP1p51S7hRF7DaXO/NnGM23bzn4ljUuuyyHMaVixrjK63iCzCy3emNhW0pCCP9q6BuIHdoyZb/ysFg4bdPV4u/1gD2/f28Vjzdvb2+aJ7YscmhhJ3HhVFeZYZP3nOUAHAwW0ebJOlZGJMhs8X4Mm1LdGS6JFpuqfPmzCsGVlK5SIxJvXyDbmgqanJNDc3D6uNyFXorm4PVSWhWUmtXfYF9ja193Dhfa+xdN5Ejp9eyYXhDXUi3PkOHrhwISu37aWsMJ8bnn7bUlPI5RQcDgd/XPkBl548naAxlivf5i17+MLd/4pr86GLP0RTCkXlXtvcwTf/sIql8yYiAsbA06t38LOz56c0EPva5g4uuOe1+N/5ooXR9uxO4FcsauDBV0MD+o9ecpxlmu7rmzv4hk2MN509n2NjYtzU3sOpt7580JlBQ4kxWen+d1Eq20RkpTGmye5nWZ+VlAvsiuol2t2srcvD1o5+bn9xI40TSm27izp6vfz02XepLXdz8fH1TB1XRLErj588u94yrfVbj6+Kfh+58t3VndpGQom0dXmj8Q4+PthQumnaurzMGF/Cl0+cFp019ZuX3re0Zzegf+sLG7j4+Hpuf3EjfYOKIO3qto9x8ODzUGcGZWLwOROlQJTKVZoYkhQ7xjAuQf99ZXGoL7ul08PtL27k6iUz+f7z1umT1z+1LnqijMx+mnXFCQnbHFecWv94dVkBUyoLo1fjAE+t2hFXHmKo3TSTxxZy3oemWKa/XndaI5PH7t969EAD+nblM6pK7X/nqkHjIInGgwbPDIoMPg9+3LBKYiR47eGUAlEqV2mtpAQ8Hj+vb+7gqVU7eX1zBx6PH78/SHf/AD87ez63fe4our0DthvCdPsGLMemVBYdcOZT5Ptd3R729du32ekZiJv2OZT6S0Uu4bKTGyyb4Fx2cgNFLuudQKK9sAdvrxk0hjv+vjG6DeeXT6jnjr9vJBjTJRk7tbO23M1lH5vOFYunM6umlNs+d1TcTJ4uT4L30WudOTXUmUEzaoq5YVDZjhuWz2FGTeoziDJRCkSpXKV3DDY8Hj9PrmmxDF7esHwOhflOvhFzpXzHF46xzEoyBh5t3sbNn1nAM1ecwJ5eL/lOB30+/wFnPkW+H1/qxiFe2zZ/dtb8aP+6O9/Bzz+zgDwnrNvZRdDA+pYutu3p5VONtZb9Afp9JuHisVhD7abZ2+fjy8fX097jjW7U8+Xj69nXt78+VOQE/tNn18cNzt98zoK497uiyMUL77SGFvX5/BS5Qov6Fk490vK4oW6ytLW9HxHDJSfWEzTgEBAxbG3vZ8zk1K7wXS4nSxurqassipbZmFNTqgPP6rCkicHGmpbOuJPpNU+s5ZIT6y3HbnvhPS77WEPcTJWyQgd1lcVs2t3D6u2dTB5bxC3nLuAnf7GOMdzx91CfeuyVr9Ph57ufms1AwER3XJs3cTYVxU7La3/9929x++eOssS9t9fHO21dzJk4Zv+xPp/t9NfYEzkMvZumsqiAd1t7LBv1XLm4gbFF+7tpIifwiWPcnHvnq3F3IbMGDRa7HMLnj6tj4679u8J9/rg6CpzxU0GHsslSj8/P9/+8Lu53ueeLtuNsQ+LzBXh6XZvOSlKjgiYGG4nq+U8cU8jli6bz+MpQaYsP1Vfxt7d3xm1JWT+ugW17etnQZj2BrljWSHlRqI7SI//ayo9On0ueUyxXvvv6DHv6BuLm9Nf4rCfoiiIXrV3euBN0j9e6QnpcSYHt9NfB6xjspu3addN0e/22K6nnTrR2qTgcQp8vMKS7kB6fn9ZOT9zvUpPimECv135RX483xZ1/CG1QdO2gDYqufWIt9eOKdVaSOuxoYhjE7w8mHLzctqefu/+xKTr1srbMxZSx1i0pVyxrJGAC7Or20j8Q4Msn1PP4yu0AbN/XjyF0EnblCSUFefQPWE9Wfb5ANCnA/oHqe790rOVxn2maFJ0KG3ncL57fwF0XNEUL+I0vddPrC/C7162L8H73+jbmTCy3FPqrqyweUjdNT4KTbq83vmRHdZmbpinlXPCRekvdp8F3If4gPL++Na4+1OBkM1SF+XkJqt6mfmWfaFZS6zBmJQ1lFlg221OjlyaGGJF9E/66bifXL2uMbjcZOw8/MvXykhPrmVlTxvkx8+U9A0Fu/5+NXLl4Bt/905roc7+zZBYDQcNNf303euxHZ8zlh/+1juatnZYZQB29PtsT0J5eX/Rk5853MLPafqqs1x/g1FtfjhbwWzC53HYRnj8QtIxZ/PD0uTRUFUfHNRIZW+yyPelWFMWXr5hUXsg5x1pnMK1YPodJ5YWWx4kEo/WhondJSxsRSa2ekytPuHJxQ9xdkisv9ZNkZMOj+BXkqZXtGOossGy1p0Y3TQwxYvdNGPDDr88/hq7+Ad5p7eHBV7dGK6N6BoLMrCm1PYkvnTcxmhQij+3o80W7SSLHvvenNXzvU7OjV9NBY9i+r5dxJS7bq+zKEhcXH1/P5IpC9vT5qCq1n4ZaWpDPlYsboovrHrxoYVzNpltf2MCDFy20HPvlC+9x6UnTLV1YN5+zgE/Mrmbb3r7oVWifLzSDaHCisSvyt76ty7b7ZWa1dR8KlzOPO15aZy0t8tJGfnb2/Lg2/f4g61o6aen0UFteSGNtmWWwHSDf6YjbO8Hj95PvSH0SXpHLyQ9Oa4zun+3Od/CD0xopyk/tLiTRLLDB4y/Zai9C70JGJ00MMWL3TXjxvd28+N5uLl80nbv/sSnuSvHd1m6aplTEXUU6HfE1koLGvm5SeVH+oP2i5zD/iFIu+MhUy0DsBR+ZSmVx6J/K6RTqKovxBfxceuL0aMG9yFV2/4Cf/oH9FVX39dvfgcQW0astd3P1J2fxTlt3tOurpdPDVY+9xW8uaOL/PNAcfY0HLlpoP2sqfBKPPZHs7bN/7cH7UHR5Bmzvaro91umqkTu6SPIO3enM4fT5EwclhyDGiGUG2Q3L5wCp3YEA9HoHKHNby3Y4BHp9qRUjTDQLrK0rtTLemSgLrncho5cmhhiRfX1j/8CeWrWDa5YeaSltsWJZI7f8bQMnNoyL67KYXVsWbaO23M2ZR09ickWh7X4MG9t74qaRPvLlD7Fjb3/cQGxtuTuaoNz5Dn71hWNsq7A+cOFCSyIaW2Q/XjIm3PUTqRB71e/3n0S/s2QW3V4/Hn+QAX+QiiJXNGn2+Qb43MIp3PK396KP/9rHZ9Dn8/Pa5g76fH4K8pzs6vZSk2CmU0V4I6PI1WeZOz+uGGFssol4O8FOeDPGlzAv5g7EHxQefX1r3JjFt5bMTvn/htPh5Krfr4z7XQZP+x2qIpf9OEhRiuMgQ51VloxM3YWo3KeJIUZkX9/YK9Jzmybz6GvWK+TJlUXs7fPxwZ5eJo5xW+bLOzD8+Iy5/OL59+KugmM3AFqxbA63/O09y+tHZs7Yzfq58/xjLMfe3LbX9gpxX7+PD9WN5a7wSaIgfLV8zRPWNRnufAfufEdcefCKIhd9AwFuC6/Ijh1faen0UJDn3F/wLjydts87QKHLSWffAIGgoccTYGtHHy37+rnxrHl86/HVlqTa7RnA5w/yfnsP9eNK6Oy3v2PoCi/qi+6El+CquKXTy7yYO5D+AT9nHTM5bnW2x6a7q6/fx9rWbsvahKLC+HGDXd3p3TPCFwjwtY/PiEuwA4HU7mqGOqssGem+q1EjhyaGGA6HMKYoP3qib6wti9Y3Wr2jC4AplYVMH1/MlYsbKC5wsa/Py4zxpdHuhc7+AWbWlHH1J2dFr8Jh/wn+Z2fP553Wbvb1edk7aC2BO99xgFk/1tlL4xOUzhhbXMDXf/9WdBygfyBAv89vSV79vlB305WLG6gqLbC0cebRk+IS060vhOL+8TPrKXPn0dE7wLVPvm1JeDOqHfzwmbfjTvBXnTKDBy9eyKb2Xsrd+RS6HGzc1cuEMYUETWgTn/KifNtxkAcuWmgZIP/1+cfYjqsMrsJamJ/H9U+9YWnv+qfWxV3d9/Z7eaetC4yDUEFfWN/Wyezq8rjkkGjw2W7PiKH0y1eVFFCY77D8uxTmp14OfaiL/5KR7rsaNXJoYoixpaOXyx9+M/qHcPWSmVxy4jRLN9LVS2Zz1WNvUVHk4saz5/GjZ96PO1H98PS5dHvsT/CR/Rhqy91xXVTXndZIZYITUEWxdbe3aeOLueqUGdz83HuWk7AAp82fyLNrW7j4+Hr6fQHu/ufmaIxBA3f/czM3LJ9DZVE+JQV50ZNtQZ6DORPK+GO46yg27n6fn/97Uj17+vy201/nTixn6byJcSf43/5rKwsmjaHYlUd5cT6dvR78QcPXYq5sbzlnvu171dHrtbTV3tXPv5083TJb7PpljYwtsr43Q7m6DwYNO7v6eb/dE1eeu6wwn4ZBiaHMnceNZ89j466e6NjPtPEllLnjy42/8G4bq7d3Rh83d1I5i2ZWW07SgSD8Y+MuPn/cVMsamI9Or4q2k+yg71AW/yXDFwjYTjRI9a5GjRyaGGIMvnXu9QUQIXpVZwxs3LV/XGAgELDtAtnb6+WDff0HLIOxt89Hr2eA2z53NPv6fJQW5BEwhqJ8Z9xU2euXNVKY57RMV+3y+ClwWq84C5wOdvf6uOvl/Wstjq0bw0UfmUpHny96orroI1MxxjBg4I9vbOP/njTdMtvmqlNmcO8/t1jGQ7bv66ekII9A0P539geDcZsT1Za7+dJHpnLBva9ZBoufe7vFcsKfOMZteycwYdC+1KVuFz/+i3Xjn//3Pxu58az9YxHBoKGq1D65xhbl29LRy96+wJDKhQAU5IE73xm6Owx3obnyHAzenXVrR/zCxisXNzBtXAlTY07Yvb4BPrtwCrGn+s8unEKvbyDlQd+hzNhKRmVxge1EgyVzalJuU40Mmhhi2A3gPfLatuiJsKLIxfdOnc13PzWTYnc+/gC2g6bXnHokj6/cHne1FbsB0Irlc5g2rpirfv8WWzv6uWLxdP53YzvXLG2kMF+4M7yauqI4n319XhC4+Ph6nA44dspYCvIdXPm7t+JOfvdfuDDaFXPx8fWUufPpGwjEnahK3fnc8PQb3HLOgujVO4ROjjc/9x5XLm7gp8++izvfwTVLj6TbM8Bv/7WVm86en3D6aySGyM/+7aRpPLFqe9wg8L+d3MBF9zdb4r7s5Ia4K/fBO7gFDbZJLlJgcHJFEX9d38aRtYXceNY8NrbHXN1XlVBdtr8LpK3Lw+4e+1lTduW5+32hFdWbd/dG26wbV0y/tTeQnZ39tmNE8yaVWxKDQ4T2bl/cBcD4UndKg75Dn7E1dHWVxVyz9Mjo3U+eA65ZeqRuZzoKaGKIMXgA76lVO7jsYw3c/uKG6NqAb/xhFRcfX8/Nf9vAz86eZ3v1bCQ09fXBV7dy8fH1zKoppa2rnyPGFvPvH2+g0JXHXS+9z1dOmsY3PjGLd9u6mTauGHeek45eL/6goavfj2cgQJdH8AcNHb3e6H4Fvzh3ASVup+1JraPXS225m5ZOD05HaKXygQazA0Fj286kiiIuXzSdI2tKueG/1kd3X+vo9dnux9DR6+OpVTssybCuqtB24VpBvvWqt6vf/sr9oYsWWu6SasoKQkkyhitPqCp28cRbO5g+vpSWfX1MKC9gX8zaEXe+g+9+ahb7+gqjg9lFrjyqy8T2zqK6rIBg0Fiuzvv9AZxiaJpSEd0udF+fl35/wDJAnqgLscdjHSPq9QZ4fn1LXDmVusrihG0caOrpup32M7YaqkqYP7nC9jlDUZTvsPzOw7gBUSOIJoYYdgN4kyuKaJpSQXu3ly+Gu0QiXSZlhfnc+sLquKvnu8PF2lo6Pdz9j03c+6Vj+eYfVsWdgIpcTnq8Ae56eRM/C1+JP/p/PkRbl4/v/sk6o6YyPL3Une9gTHE+hflO25NaMBgaQL77H5uYMb4Urz9oe5Lx+kNtlxfmx02vdTqgvDCPp1btYO7EI6MF+Lz+AFUlLtv9GKpKXCydNxGHA248ez5bdvdS4Mzj+qcHDQKHp9TGnvB3J1jt3dHr48az57NxVzeBYKjk90DAuix7IGAIGsOvX9oU2i5VwOsPcvc/N1vu5O7+52ZuPGtedDB7SmUh93/paG48ay4b23tj7iyKqSpx8syaFmbXljF1XKhvP88BXj9cElP+5Iblc8hzEN3Rz+mAE6aPs/13qR0zaFBZAiyebS2ncv2yRpAA1aVFSU893dnpsS2W2NLlIX6Z4NB8sLeXD/Z64u5qJu7tZUqlzkrKpkwvPNTEMIjdAF59VUnc+IM730F7gkHOfX0D0cdcsaiB93d125Zo2NDWzb3/u5UrFzewtaMXz0CQvvAMmsEzau790rHRK+5fPv8eXzlpuu3A4PZ9fTgdsGJZI64864k/NvZydz5XLGqIlo/43evb4u5+/uOMuezr81nWT3x0+jjb+B64aKFlBzZ3voNpVfaDyu09Xu7+YhNd/QM4xUFpkf3sl7LCfP75fge3vRBq94SGSnp98d1i/vBdT2TWl2fAfhzEMxCwJIs9vcG4QoRXnTKDSWOKuPyRNy19+/4A0Sm/kd/jmifW8uBFCy2v9cRbO+L22P7mJ2dS4rL+qQnO6Ak30t51T4bex1SmntaW2xdLrClNfXOi1k6vbYz3X7gw5cSgK6mH71AsPMy5xCAiS4BfAE7gLmPMT7IcEmAdf4iMH+zu9ia4sivg8kXTMQYefHUrAN//9GzLQHFDdQnTq0qYd8QYasrcdPb7ue3FjXQk6Pfu6PXx0MUfIs8BN569gN099vs2LF8wkROmj0MEKooKeH93j20C6fQM8GjzNk6aMY76ccV88xOzoiuFI6+5uaM3rpRHom0z22Pei8hrJNyZrbSAXV1etu/ro88X4OjJ5bYxev0BnLJ/3GLAb2y7xX5zflP0+z6vn+rSAvvprxcutCS5Y+sqorO6Io+7+bn3+PX5x0S/j/TtH+iuJva1fH6DZyBg+bfOE2FnZz/140ujz23vsX8fd/d4U5p6OhCwf2+aUlyAByS88GnvSW3thq6kTo9DsfAwpxKDiDiB24FTgO3A6yLypDHm7exGZh1/aOn08GjzNn50xty4GUTf+MRM1u7ojF7lQujE1jihjNm1ZXF/6JG9pYNBw83nLEg4XXV8SYGlvPP2vb1xs4muXNzApIpCjjqiIjrguLfPPoHcdPZ8vrqogZnVZcyuLee/17XGnQTsSnmUF9pf3Y8tdvGf5yzAFwiysT1UW2r2hBKuW9oYV7aj1+dnfWt3NJ78PKdtjD8+Yy6nL5jIzJoyrnrsLfoH7PveIxVq3fkO2nu8lLjzEp7QYv+YPAP2ZcG9vqDl+13dHkoK7H/v4gLra5159CR+/Jd34h73wIXWE3TVQbZwTXbqaVuaF+ABjE9QZXh8inchupI6PTJR/mSwnEoMwEJgozFmE4CI/A5YDmQ9MdhdxTkEvvenNZYT2oOvbuGG5XMtV883n7OAyWOLo3/sB2r/7ZbOuK6IFcsaKXJZR/3KCwu49fkN3HLOAvxBQ7HLyfa9fUwdV2yZhZLvxHYzIVcelhkrdgu4Yq/WI7Z19HLdaY1x+0UUu5zs7Rvg1Dm1bNvbx0emVeIQ4fE3NkZ3Zit05fFAuDRFbLfTJ2Z/mM9/aErcmoxil5OpVSVMqSxm1hUn0NXvS3AHEor9ax+fwcOvhWZOJbpTiVWd6I6mzGX5fnypm/Zuj33FVqd1AHvwlF0IL1D0WVddd/bbFyPs9KRWe6kyQdXbsTYL8Iaqf8Bvm9j7bVaQD8WhOKGNBpkofzJYriWGicAHMd9vBz40+EEicglwCcDkyZMPTWTEX8UFgya64C02CXykvpJnUliBGnlMmdsZmq7aN0BFUT593gEM1uc31pbx2YVTLAvFfnj6HGZVl1keNxCAlj3d3H/hwmg8r25so35ckSWBzK0tZ8WyOZYpo9OqSvj5Z+bz9Zg6SlWlbhAsxeTy84TyImHOxBrLe7ShbR/nNE0eVCiwkfJC68rZ6jIHNeXW0iI15W5qyhyW931fv4cfnT6X7/15f0nzH50+l3GlDh695DgGAkFOObKaPp837ndZsWwOgyuDP/yvLfz4jLmWEuk/PmMuj/xrC2DdWa/XO0Cxy2mJsdjlpNDl5KdnzePqcNkPu2Tqzncweax1fKCiKD/hnVwqSgqcCRN2qkpc+Tz+xoa4xP7tTx158CfbOBQntNEgE+VPBhNzoOL7h5iIfAb4pDHmy+HvzwcWGmO+mug5TU1Nprm5OdGPMy4ymJauMgR+f5C/rGthQ3iFrUOgYXxJ3F7Okceua+mktdNDTbmbxtryuMf09ftst6Rc2lgdV/bB4/GzpqUzWjdobm05LpfT8vvlO/109gfp8xrauj1Ul7opKhBK3Q6mVI6xtLe5vYcd+3pxOpzs7vEyrqSAQDCA0+Hkwvtej8bz5GUfxu2C1n2BaJs1Y5x4fDCjxtrmvn4P77X2RmOcUVPMmELriWXdjj386Y3tfLxxIu3dHqpK3fxt3Q6WzJ3IF+5+zfLHNHGMC88A0d/PnQ8lBQW091j/Pff1e/jHe3t4L1z11iEwY3wpx88YS1lBQfQ9qilzs25nN1///YH70Te37+P1Ld1xyauprpT6KuvvPBQej5+/v9/OgN9YEvZJ06pwu1O7/vP5Avx59c60bWeqYwzpk47zjoisNMbY7neba4nhw8APjDGfDH//HQBjzH8kek62E0MmDOWEn4yhFoobamzvtO2LSwyzqsfExTi4PIRDYN6kck5uGM+2vX3R/9QTSt28smU3vd5g9KRWXODgo/VVKZ2AfL4Af3m7NXoV78538NOz5vHJWdXs7PZY/piAIf+BDSUpRX7vg7Xp8wVY07KHQMARfR+dziBza8emvIe0XWJPNSnExrl6Z2d0FtG8CeXD2uM63RdSKnUjKTHkAe8Bi4EdwOvA54wx6xI953BMDLkumcQ11BNBuk9A6W4vE0ZCjOrwNWISA4CInAr8J6HpqvcYY350oMdrYlBKqeQdKDHk2uAzxphngGeyHYdSSo1WWvlEKaWUhSYGpZRSFpoYlFJKWWhiUEopZZFzs5KSJSLtwNYUnjoO2J3mcNIt12PM9fhAY0wXjXH4ci2+KcaYKrsfjPjEkCoRaU40VStX5HqMuR4faIzpojEOX67HF0u7kpRSSlloYlBKKWUxmhPDndkOYAhyPcZcjw80xnTRGIcv1+OLGrVjDEoppeyN5jsGpZRSNjQxKKWUshh1iUFElojIuyKyUUS+ne14BhORI0TkRRFZLyLrROTKbMeUiIg4ReRNEXk627HYEZExIvIHEXkn/H5+ONsxxRKRr4X/jdeKyCMikhNbmYnIPSKyS0TWxhwbKyLPiciG8OeKHIvvZ+F/59Ui8icRGZOt+MLxxMUY87NviIgRkXHZiG0oRlViEBEncDvwKeBI4DwRSW2fwszxA183xswGjgMuy8EYI64E1mc7iAP4BfCsMWYWMJ8cilVEJgJXAE3GmDmEysx/NrtRRd0HLBl07NvA88aYBuD58PfZch/x8T0HzDHGzCO0p8t3DnVQg9xHfIyIyBHAKcC2Qx1QMkZVYgAWAhuNMZuMMT7gd8DyLMdkYYxpMca8Ef66m9DJbGJ2o4onIpOATwN3ZTsWOyJSBpwI3A1gjPEZY/ZlNah4eUBheIOqImBnluMBwBjzErBn0OHlwP3hr+8HTj+UMcWyi88Y81djjD/87avApEMemDUeu/cQ4BbgW0BOz/oZbYlhIvBBzPfbycGTboSI1AFHAf/Kcih2/pPQf/DgQR6XLfVAO3BvuLvrLhFJ327pw2SM2QHcROjKsQXoNMb8NbtRHVC1MaYFQhcvwPgsx3MgFwF/yXYQg4nIMmCHMWZVtmM5mNGWGOw2l83JzC0iJcDjwL8bY7qyHU8sEVkK7DLGrMx2LAeQBxwN/MoYcxTQS3a7PyzCffTLganABKBYRL6Q3ahGPhH5HqHu2N9mO5ZYIlIEfA+4NtuxDMVoSwzbgSNivp9Ejty+xxKRfEJJ4bfGmD9mOx4bHwWWicgWQt1xi0TkoeyGFGc7sN0YE7nb+gOhRJErPg5sNsa0G2MGgD8CH8lyTAfSJiK1AOHPu7IcTxwR+SKwFPi8yb0FWtMIXQSsCv/dTALeEJGarEaVwGhLDK8DDSIyVURchAb7nsxyTBYiIoT6xdcbY27Odjx2jDHfMcZMMsbUEXoPXzDG5NTVrjGmFfhARGaGDy0G3s5iSINtA44TkaLwv/licmhw3MaTwBfDX38ReCKLscQRkSXA1cAyY0xftuMZzBizxhgz3hhTF/672Q4cHf5/mnNGVWIID05dDvw3oT/Cx4wx67IbVZyPAucTugp/K/xxaraDGqG+CvxWRFYDC4AfZzec/cJ3Mn8A3gDWEPpbzImSCSLyCPAKMFNEtovIxcBPgFNEZAOhWTU/ybH4bgNKgefCfzN3ZCu+A8Q4YmhJDKWUUhaj6o5BKaXUwWliUEopZaGJQSmllIUmBqWUUhaaGJRSSlloYlDqIESkJwNtLoidhiwiPxCRb6T7dZRKhSYGpbJjAaDrU1RO0sSgVBJE5Jsi8nq47v/14WN14f0efhPeX+GvIlIY/tmx4ce+Et4zYG141f0K4NzwYqxzw80fKSL/IyKbROSKLP2KSmliUGqoROQTQAOh8u0LgGNE5MTwjxuA240xjcA+4Kzw8XuBS40xHwYCECoBTqiY2qPGmAXGmEfDj50FfDLc/nXhmllKHXKaGJQauk+EP94kVMpiFqGEAKGCeG+Fv14J1IV3ESs1xvxv+PjDB2n/v4wxXmPMbkJF6qrTGLtSQ5aX7QCUGkEE+A9jzK8tB0P7ZnhjDgWAQuzLvB/I4Db071Nlhd4xKDV0/w1cFN4rAxGZKCIJN6wxxuwFukXkuPCh2K07uwkVfVMq52hiUGqIwjusPQy8IiJrCFVHPdjJ/WLgThF5hdAdRGf4+IuEBptjB5+VyglaXVWpDBKREmNMT/jrbwO1xpgrsxyWUgekfZhKZdanReQ7hP7WtgJfym44Sh2c3jEopZSy0DEGpZRSFpoYlFJKWWhiUEopZaGJQSmllIUmBqWUUhb/H8hjVCoTroWfAAAAAElFTkSuQmCC\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "sns.scatterplot(x=df_contracts.length,y=df_contracts.annual_salary)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Other ways to measure correlation exist. For example, if you are interested into how one variable will increase (or decrease) as another variable increases (or decreases), the *Spearmanās or Kendallās rank correlation coefficients* might work well."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### Questions\n",
+ "\n",
+ "* Try to explore the correlation of other variables in the dataset.\n",
+ "* Can you think of a possible motivation for the trend we see: older apprentices with a shorter contract getting on average a higher annual salary?"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Sampling and uncertainty\n",
+ "\n",
+ "Often, we work with samples and we want the sample to be representative of the population it is taken from, in order to draw conclusions that generalise from the sample to the full population.\n",
+ "\n",
+ "Sampling is *tricky*. Samples have *variance* (variation between samples from the same population) and *bias* (systematic variation from the population)."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Further reading\n",
+ "\n",
+ "* For a complementary introduction to statistics and data analysis, see https://www.humanitiesdataanalysis.org/statistics-essentials/notebook.html.\n",
+ "* Related to statistics and data analysis is the realm of probability theory, which allows us to formally model and calculate the likelihood of events. For an introduction, see https://www.humanitiesdataanalysis.org/intro-probability/notebook.html."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "---"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Part 2: working with texts\n",
+ "\n",
+ "Let's get some basics (or a refresher) of working with texts in Python. Texts are sequences of discrete symbols (words or, more generically, tokens).\n",
+ "\n",
+ "Key challenge: representing text for further processing. Two mainstream approaches:\n",
+ "* *Bag of words*: a text is a collection of tokens occurring with a certain frequence and assumed independently from each other within the text. The mapping from texts to features is determinsitic and straighforward, each text is represented as a vector of the size of the vocabulary.\n",
+ "* *Embeddings*: a method is used (typically, neural networks), to learn a mapping from each token to a (usually small) vector representing it. A text can be represented in turn as an aggregation of these embeddings."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Import the dataset\n",
+ "Let us import the Elon Musk's tweets dataset in memory.\n",
+ "\n",
+ " "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "root_folder = \"../data/musk_tweets\"\n",
+ "df_elon = pd.read_csv(codecs.open(os.path.join(root_folder,\"elonmusk_tweets.csv\"), encoding=\"utf8\"), sep=\",\")\n",
+ "df_elon['text'] = df_elon['text'].str[1:]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " id \n",
+ " created_at \n",
+ " text \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " 849636868052275200 \n",
+ " 2017-04-05 14:56:29 \n",
+ " 'And so the robots spared humanity ... https:/... \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " 848988730585096192 \n",
+ " 2017-04-03 20:01:01 \n",
+ " \"@ForIn2020 @waltmossberg @mims @defcon_5 Exac... \n",
+ " \n",
+ " \n",
+ " 2 \n",
+ " 848943072423497728 \n",
+ " 2017-04-03 16:59:35 \n",
+ " '@waltmossberg @mims @defcon_5 Et tu, Walt?' \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " 848935705057280001 \n",
+ " 2017-04-03 16:30:19 \n",
+ " 'Stormy weather in Shortville ...' \n",
+ " \n",
+ " \n",
+ " 4 \n",
+ " 848416049573658624 \n",
+ " 2017-04-02 06:05:23 \n",
+ " \"@DaveLeeBBC @verge Coal is dying due to nat g... \n",
+ " \n",
+ " \n",
+ " \n",
+ " "
+ ],
+ "text/plain": [
+ " id created_at \\\n",
+ "0 849636868052275200 2017-04-05 14:56:29 \n",
+ "1 848988730585096192 2017-04-03 20:01:01 \n",
+ "2 848943072423497728 2017-04-03 16:59:35 \n",
+ "3 848935705057280001 2017-04-03 16:30:19 \n",
+ "4 848416049573658624 2017-04-02 06:05:23 \n",
+ "\n",
+ " text \n",
+ "0 'And so the robots spared humanity ... https:/... \n",
+ "1 \"@ForIn2020 @waltmossberg @mims @defcon_5 Exac... \n",
+ "2 '@waltmossberg @mims @defcon_5 Et tu, Walt?' \n",
+ "3 'Stormy weather in Shortville ...' \n",
+ "4 \"@DaveLeeBBC @verge Coal is dying due to nat g... "
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_elon.head(5)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(2819, 3)"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_elon.shape"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Natural Language Processing in Python"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# import some of the most popular libraries for NLP in Python\n",
+ "import spacy\n",
+ "import nltk\n",
+ "import string\n",
+ "import sklearn"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "[nltk_data] Downloading package punkt to\n",
+ "[nltk_data] /Users/giovannicolavizza/nltk_data...\n",
+ "[nltk_data] Unzipping tokenizers/punkt.zip.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "True"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "nltk.download('punkt')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "A typical NLP pipeline might look like the following:\n",
+ " \n",
+ " \n",
+ "\n",
+ "### Tokenization: splitting a text into constituent tokens"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from nltk.tokenize import TweetTokenizer, word_tokenize\n",
+ "tknzr = TweetTokenizer(preserve_case=True, reduce_len=False, strip_handles=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\"@ForIn2020 @waltmossberg @mims @defcon_5 Exactly. Tesla is absurdly overvalued if based on the past, but that's irr\\xe2\\x80\\xa6 https://t.co/qQcTqkzgMl\"\n"
+ ]
+ }
+ ],
+ "source": [
+ "example_tweet = df_elon.text[1]\n",
+ "print(example_tweet)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['\"', '@ForIn2020', '@waltmossberg', '@mims', '@defcon_5', 'Exactly', '.', 'Tesla', 'is', 'absurdly', 'overvalued', 'if', 'based', 'on', 'the', 'past', ',', 'but', \"that's\", 'irr', '\\\\', 'xe2', '\\\\', 'x80', '\\\\', 'xa6', 'https://t.co/qQcTqkzgMl', '\"']\n",
+ "['``', '@', 'ForIn2020', '@', 'waltmossberg', '@', 'mims', '@', 'defcon_5', 'Exactly', '.', 'Tesla', 'is', 'absurdly', 'overvalued', 'if', 'based', 'on', 'the', 'past', ',', 'but', 'that', \"'s\", 'irr\\\\xe2\\\\x80\\\\xa6', 'https', ':', '//t.co/qQcTqkzgMl', \"''\"]\n"
+ ]
+ }
+ ],
+ "source": [
+ "tkz1 = tknzr.tokenize(example_tweet)\n",
+ "print(tkz1)\n",
+ "tkz2 = word_tokenize(example_tweet)\n",
+ "print(tkz2)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Question: can you spot what the Twitter tokenizer is doing instead of a standard one?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'!\"#$%&\\'()*+,-./:;<=>?@[\\\\]^_`{|}~'"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "string.punctuation"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# some more pre-processing\n",
+ "\n",
+ "def filter(tweet):\n",
+ " \n",
+ " # remove punctuation and short words and urls\n",
+ " tweet = [t for t in tweet if t not in string.punctuation and len(t) > 3 and not t.startswith(\"http\")]\n",
+ " return tweet\n",
+ "\n",
+ "def tokenize_and_string(tweet):\n",
+ " \n",
+ " tkz = tknzr.tokenize(tweet)\n",
+ " \n",
+ " tkz = filter(tkz)\n",
+ " \n",
+ " return \" \".join(tkz)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['\"', '@ForIn2020', '@waltmossberg', '@mims', '@defcon_5', 'Exactly', '.', 'Tesla', 'is', 'absurdly', 'overvalued', 'if', 'based', 'on', 'the', 'past', ',', 'but', \"that's\", 'irr', '\\\\', 'xe2', '\\\\', 'x80', '\\\\', 'xa6', 'https://t.co/qQcTqkzgMl', '\"']\n",
+ "['@ForIn2020', '@waltmossberg', '@mims', '@defcon_5', 'Exactly', 'Tesla', 'absurdly', 'overvalued', 'based', 'past', \"that's\"]\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(tkz1)\n",
+ "print(filter(tkz1))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df_elon[\"clean_text\"] = df_elon[\"text\"].apply(tokenize_and_string)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " id \n",
+ " created_at \n",
+ " text \n",
+ " clean_text \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " 849636868052275200 \n",
+ " 2017-04-05 14:56:29 \n",
+ " 'And so the robots spared humanity ... https:/... \n",
+ " robots spared humanity \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " 848988730585096192 \n",
+ " 2017-04-03 20:01:01 \n",
+ " \"@ForIn2020 @waltmossberg @mims @defcon_5 Exac... \n",
+ " @ForIn2020 @waltmossberg @mims @defcon_5 Exact... \n",
+ " \n",
+ " \n",
+ " 2 \n",
+ " 848943072423497728 \n",
+ " 2017-04-03 16:59:35 \n",
+ " '@waltmossberg @mims @defcon_5 Et tu, Walt?' \n",
+ " @waltmossberg @mims @defcon_5 Walt \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " 848935705057280001 \n",
+ " 2017-04-03 16:30:19 \n",
+ " 'Stormy weather in Shortville ...' \n",
+ " Stormy weather Shortville \n",
+ " \n",
+ " \n",
+ " 4 \n",
+ " 848416049573658624 \n",
+ " 2017-04-02 06:05:23 \n",
+ " \"@DaveLeeBBC @verge Coal is dying due to nat g... \n",
+ " @DaveLeeBBC @verge Coal dying fracking It's ba... \n",
+ " \n",
+ " \n",
+ " \n",
+ " "
+ ],
+ "text/plain": [
+ " id created_at \\\n",
+ "0 849636868052275200 2017-04-05 14:56:29 \n",
+ "1 848988730585096192 2017-04-03 20:01:01 \n",
+ "2 848943072423497728 2017-04-03 16:59:35 \n",
+ "3 848935705057280001 2017-04-03 16:30:19 \n",
+ "4 848416049573658624 2017-04-02 06:05:23 \n",
+ "\n",
+ " text \\\n",
+ "0 'And so the robots spared humanity ... https:/... \n",
+ "1 \"@ForIn2020 @waltmossberg @mims @defcon_5 Exac... \n",
+ "2 '@waltmossberg @mims @defcon_5 Et tu, Walt?' \n",
+ "3 'Stormy weather in Shortville ...' \n",
+ "4 \"@DaveLeeBBC @verge Coal is dying due to nat g... \n",
+ "\n",
+ " clean_text \n",
+ "0 robots spared humanity \n",
+ "1 @ForIn2020 @waltmossberg @mims @defcon_5 Exact... \n",
+ "2 @waltmossberg @mims @defcon_5 Walt \n",
+ "3 Stormy weather Shortville \n",
+ "4 @DaveLeeBBC @verge Coal dying fracking It's ba... "
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_elon.head(5)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# save cleaned up version\n",
+ "\n",
+ "df_elon.to_csv(os.path.join(root_folder,\"df_elon.csv\"), index=False)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Building a dictionary"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(2819, 7864)"
+ ]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from sklearn.feature_extraction.text import CountVectorizer\n",
+ "count_vect = CountVectorizer(lowercase=False, tokenizer=tknzr.tokenize)\n",
+ "X_count = count_vect.fit_transform(df_elon.clean_text)\n",
+ "X_count.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "6617"
+ ]
+ },
+ "execution_count": 17,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "word_list = count_vect.get_feature_names_out() \n",
+ "count_list = X_count.toarray().sum(axis=0)\n",
+ "dictionary = dict(zip(word_list,count_list))\n",
+ "count_vect.vocabulary_.get(\"robots\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "3"
+ ]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "X_count[:,count_vect.vocabulary_.get(\"robots\")].toarray().sum()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "3"
+ ]
+ },
+ "execution_count": 19,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "dictionary[\"robots\"]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### Questions\n",
+ "\n",
+ "* Find the tokens most used by Elon.\n",
+ "* Find the twitter users most referred to by Elon (hint: use the @ handler to spot them)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[('Tesla', 322),\n",
+ " ('Model', 236),\n",
+ " ('that', 223),\n",
+ " ('will', 218),\n",
+ " ('with', 177),\n",
+ " ('@SpaceX', 169),\n",
+ " ('from', 163),\n",
+ " ('this', 159),\n",
+ " ('@TeslaMotors', 149),\n",
+ " ('launch', 124)]"
+ ]
+ },
+ "execution_count": 20,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "dictionary_list = sorted(dictionary.items(), key=lambda x:x[1], reverse=True)\n",
+ "[d for d in dictionary_list][:10]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[('@SpaceX', 169),\n",
+ " ('@TeslaMotors', 149),\n",
+ " ('@elonmusk', 85),\n",
+ " ('@NASA', 48),\n",
+ " ('@Space_Station', 19),\n",
+ " ('@FredericLambert', 17),\n",
+ " ('@ID_AA_Carmack', 15),\n",
+ " ('@WIRED', 14),\n",
+ " ('@vicentes', 14),\n",
+ " ('@BadAstronomer', 11)]"
+ ]
+ },
+ "execution_count": 22,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "[d for d in dictionary_list if d[0].startswith('@')][:10]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Representing tweets as vectors\n",
+ "\n",
+ "Texts are of variable length and need to be represented numerically in some way. Most typically, we represent them as *equally-sized vectors*.\n",
+ "\n",
+ "Actually, this is what we have already done! Let's take a closer look at `X_count` above.."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "id 849636868052275200\n",
+ "created_at 2017-04-05 14:56:29\n",
+ "text 'And so the robots spared humanity ... https:/...\n",
+ "clean_text robots spared humanity\n",
+ "Name: 0, dtype: object"
+ ]
+ },
+ "execution_count": 23,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# This is the first Tweet of the data frame\n",
+ "\n",
+ "df_elon.loc[0]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# let's get the vector representation for this Tweet\n",
+ "\n",
+ "vector_representation = X_count[0,:]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "3"
+ ]
+ },
+ "execution_count": 25,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# there are 3 positions not to zero, as we would expect: the vector contains 1 in the columns related to the 3 words that make up the Tweet. \n",
+ "# It would contain a number higher than 1 if a given word were occurring multiple times.\n",
+ "\n",
+ "np.sum(vector_representation)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "1\n",
+ "1\n",
+ "1\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Let's check that indeed the vector contains 1s for the right words\n",
+ "# Remember, the vector has shape (1 x size of the vocabulary)\n",
+ "\n",
+ "print(vector_representation[0,count_vect.vocabulary_.get(\"robots\")])\n",
+ "print(vector_representation[0,count_vect.vocabulary_.get(\"spared\")])\n",
+ "print(vector_representation[0,count_vect.vocabulary_.get(\"humanity\")])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Term Frequency - Inverse Document Frequency\n",
+ "We can use boolean counts (1/0) and raw counts (as we did before) to represent a Tweet over the space of the vocabulary, but there exist improvements on this basic idea. For example, the TF-IDF weighting scheme:\n",
+ "\n",
+ "$tfidf(t, d, D) = tf(t, d) \\cdot idf(t, D)$\n",
+ "\n",
+ "$tf(t, d) = f_{t,d}$\n",
+ "\n",
+ "$idf(t, D) = log \\Big( \\frac{|D|}{|{d \\in D: t \\in d}|} \\Big)$"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 27,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(2819, 7864)"
+ ]
+ },
+ "execution_count": 27,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from sklearn.feature_extraction.text import TfidfVectorizer\n",
+ "count_vect = TfidfVectorizer(lowercase=False, tokenizer=tknzr.tokenize)\n",
+ "X_count_tfidf = count_vect.fit_transform(df_elon.clean_text)\n",
+ "X_count_tfidf.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "1.7226760995112569"
+ ]
+ },
+ "execution_count": 28,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "X_count_tfidf[0,:].sum()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "3"
+ ]
+ },
+ "execution_count": 29,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "X_count[0,:].sum()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### Sparse vectors (mention)\n",
+ "How is Python representing these vectors in memory? Most of their cells are set to zero. \n",
+ "\n",
+ "We call any vector or matrix whose cells are mostly to zero *sparse*.\n",
+ "There are efficient ways to store them in memory."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "<1x7864 sparse matrix of type ''\n",
+ "\twith 3 stored elements in Compressed Sparse Row format>"
+ ]
+ },
+ "execution_count": 30,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "X_count_tfidf[0,:]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Spacy pipelines\n",
+ "\n",
+ "Useful to construct sequences of pre-processing steps: https://spacy.io/usage/processing-pipelines."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 31,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Load a pre-trained pipeline (Web Small): https://spacy.io/usage/models\n",
+ "\n",
+ "#!python -m spacy download en_core_web_sm\n",
+ "nlp = spacy.load('en_core_web_sm')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "*.. the modelās meta.json tells spaCy to use the language \"en\" and the pipeline [\"tagger\", \"parser\", \"ner\"]. spaCy will then initialize spacy.lang.en.English, and create each pipeline component and add it to the processing pipeline. Itāll then load in the modelās data from its data directory and return the modified Language class for you to use as the nlp object.*\n",
+ "\n",
+ "Let's create a simple pipeline that does **lemmatization**, **part of speech tagging** and **named entity recognition** using spaCy models.\n",
+ "\n",
+ "*If you don't know what these NLP tasks are, please ask!*"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 32,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "tweet_pos = list()\n",
+ "tweet_ner = list()\n",
+ "tweet_lemmas = list()\n",
+ "\n",
+ "for tweet in df_elon.text.values:\n",
+ " spacy_tweet = nlp(tweet)\n",
+ " \n",
+ " local_tweet_pos = list()\n",
+ " local_tweet_ner = list()\n",
+ " local_tweet_lemmas = list()\n",
+ " \n",
+ " for sentence in list(spacy_tweet.sents):\n",
+ " # --- lemmatization, remove punctuation and stop wors\n",
+ " local_tweet_lemmas.extend([token.lemma_ for token in sentence if not token.is_punct | token.is_stop])\n",
+ " local_tweet_pos.extend([token.pos_ for token in sentence if not token.is_punct | token.is_stop])\n",
+ " for ent in spacy_tweet.ents:\n",
+ " local_tweet_ner.append(ent)\n",
+ "\n",
+ " tweet_pos.append(local_tweet_pos)\n",
+ " tweet_ner.append(local_tweet_ner)\n",
+ " tweet_lemmas.append(local_tweet_lemmas)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 33,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['robot', 'spare', 'humanity', 'https://t.co/v7JUJQWfCv']"
+ ]
+ },
+ "execution_count": 33,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "tweet_lemmas[0]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 34,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['NOUN', 'VERB', 'NOUN', 'NOUN']"
+ ]
+ },
+ "execution_count": 34,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "tweet_pos[0]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 35,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[https://t.co/v7JUJQWfCv]"
+ ]
+ },
+ "execution_count": 35,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "tweet_ner[0]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 36,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[Shortville]"
+ ]
+ },
+ "execution_count": 36,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# but it actually works!\n",
+ "\n",
+ "tweet_ner[3]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "*Note: we are really just scratching the surface of spaCy, but it is worth knowing it's there.*"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Searching tweets\n",
+ "\n",
+ "Once we have represented Tweets as vectors, we can easily find similar ones using basic operations such as filtering."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 37,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "robots spared humanity\n"
+ ]
+ }
+ ],
+ "source": [
+ "target = 0\n",
+ "print(df_elon.clean_text[target])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 38,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "condition = X_count_tfidf[target,:] > 0"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 39,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " (0, 5198)\tTrue\n",
+ " (0, 6617)\tTrue\n",
+ " (0, 6949)\tTrue\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(condition)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 40,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "X_filtered = X_count_tfidf[:,np.ravel(condition.toarray())]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 41,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "<2819x3 sparse matrix of type ''\n",
+ "\twith 16 stored elements in Compressed Sparse Row format>"
+ ]
+ },
+ "execution_count": 41,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "X_filtered"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 45,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " (0, 0)\t0.495283407359234\n",
+ " (0, 2)\t0.6406029997190412\n",
+ " (0, 1)\t0.5867896924329815\n",
+ " (217, 0)\t0.2972381925908634\n",
+ " (271, 0)\t0.3284547085372313\n",
+ " (464, 0)\t0.2273880239746895\n",
+ " (473, 0)\t0.5667220639589731\n",
+ " (734, 1)\t0.3846355279044392\n",
+ " (940, 0)\t0.27312597149485407\n",
+ " (1004, 0)\t0.28161575586607157\n",
+ " (1550, 1)\t0.33303254164524276\n",
+ " (1862, 0)\t0.3196675199194523\n",
+ " (2493, 0)\t0.2685018991334563\n",
+ " (2559, 0)\t0.31145247014227906\n",
+ " (2565, 0)\t0.2645117238497897\n",
+ " (2661, 0)\t0.2729016388865858\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(X_filtered)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 46,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(array([ 0, 217, 271, 464, 473, 940, 1004, 1862, 2493, 2559, 2565,\n",
+ " 2661, 0, 734, 1550, 0], dtype=int32),\n",
+ " array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2], dtype=int32),\n",
+ " array([0.49528341, 0.29723819, 0.32845471, 0.22738802, 0.56672206,\n",
+ " 0.27312597, 0.28161576, 0.31966752, 0.2685019 , 0.31145247,\n",
+ " 0.26451172, 0.27290164, 0.58678969, 0.38463553, 0.33303254,\n",
+ " 0.640603 ]))"
+ ]
+ },
+ "execution_count": 46,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from scipy import sparse\n",
+ "\n",
+ "sparse.find(X_filtered)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 50,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "tweet_indices = list(sparse.find(X_filtered)[0])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 51,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "TARGET: robots spared humanity\n",
+ "1)@JustBe74 important make humanity proud this case particular duty owed American taxpayer\n",
+ "2)@pud Faith restored humanity French toast money\n",
+ "3)humanity have exciting inspiring future cannot confined Earth forever @love_to_dream #APSpaceChat\n",
+ "4)@ShireeshAgrawal like humanity\n",
+ "5)Creating neural lace thing that really matters humanity achieve symbiosis with machines\n",
+ "6)@tzepr Certainly agree that first foremost triumph humanity cheering good spirit\n",
+ "7)@ReesAndersen @FLIxrisk believe that critical ensure good future humanity\n",
+ "8)@NASA #Mars hard x99s worth risks extend humanity x99s frontier beyond Earth Learn about neighbor planet\n",
+ "9)Astronomer Royal Martin Rees soon will robots take over world @Telegraph\n",
+ "10)@thelogicbox @IanrossWins Mars critical long-term survival humanity life Earth know\n",
+ "11)humanity wishes become multi-planet species then must figure move millions people Mars\n",
+ "12)Sure feels weird find myself defending robots\n",
+ "13)Neil Armstrong hero humanity spirit will carry stars\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(\"TARGET: \" + df_elon.clean_text[target])\n",
+ "\n",
+ "for n, tweet_index in enumerate(list(set(tweet_indices))):\n",
+ " if tweet_index != target:\n",
+ " print(str(n) +\")\"+ df_elon.clean_text[tweet_index])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### Questions\n",
+ "\n",
+ "* Can you rank the matched tweets using their tf-idf weights, so to put higher weighted tweets first?\n",
+ "* Which limitations do you think a bag of words representation has?\n",
+ "* Can you spot any limitations of this approach based on similarity measures over bag of words representations?\n",
+ "\n",
+ "#### Exercises\n",
+ "\n",
+ "* Find the highest IDF tokens in the corpus and discuss your results.\n",
+ "* Find the most frequent named entities and discuss your results.\n",
+ "* Are there trends in the topics of tweets during the day? And over time?\n",
+ "* Perform a **sentiment analysis** of the corpus, see here https://spacy.io/universe/project/spacy-textblob"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 55,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#!pip install spacytextblob\n",
+ "#!python -m spacy download en_core_web_sm\n",
+ "#!python -m textblob.download_corpora"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 56,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "-0.125"
+ ]
+ },
+ "execution_count": 56,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import spacy\n",
+ "from spacytextblob.spacytextblob import SpacyTextBlob\n",
+ "\n",
+ "nlp = spacy.load('en_core_web_sm')\n",
+ "nlp.add_pipe('spacytextblob')\n",
+ "text = 'I had a really horrible day. It was the worst day ever! But every now and then I have a really good day that makes me happy.'\n",
+ "doc = nlp(text)\n",
+ "doc._.blob.polarity "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "---"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.4"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
|