diff --git a/100-pandas-puzzles-with-solutions.ipynb b/100-pandas-puzzles-with-solutions.ipynb index 0b5927d3d..71cf5750a 100644 --- a/100-pandas-puzzles-with-solutions.ipynb +++ b/100-pandas-puzzles-with-solutions.ipynb @@ -41,7 +41,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -57,9 +57,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "'1.2.4'" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "pd.__version__" ] @@ -73,9 +84,73 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "INSTALLED VERSIONS\n", + "------------------\n", + "commit : 2cb96529396d93b46abab7bbc73a208e708c642e\n", + "python : 3.8.8.final.0\n", + "python-bits : 64\n", + "OS : Windows\n", + "OS-release : 10\n", + "Version : 10.0.22000\n", + "machine : AMD64\n", + "processor : AMD64 Family 25 Model 80 Stepping 0, AuthenticAMD\n", + "byteorder : little\n", + "LC_ALL : None\n", + "LANG : None\n", + "LOCALE : English_United States.1252\n", + "\n", + "pandas : 1.2.4\n", + "numpy : 1.20.1\n", + "pytz : 2021.1\n", + "dateutil : 2.8.1\n", + "pip : 21.0.1\n", + "setuptools : 52.0.0.post20210125\n", + "Cython : 0.29.23\n", + "pytest : 6.2.3\n", + "hypothesis : None\n", + "sphinx : 4.0.1\n", + "blosc : None\n", + "feather : None\n", + "xlsxwriter : 1.3.8\n", + "lxml.etree : 4.6.3\n", + "html5lib : 1.1\n", + "pymysql : None\n", + "psycopg2 : None\n", + "jinja2 : 2.11.3\n", + "IPython : 7.22.0\n", + "pandas_datareader: 0.10.0\n", + "bs4 : 4.9.3\n", + "bottleneck : 1.3.2\n", + "fsspec : 0.9.0\n", + "fastparquet : None\n", + "gcsfs : None\n", + "matplotlib : 3.3.4\n", + "numexpr : 2.7.3\n", + "odfpy : None\n", + "openpyxl : 3.0.7\n", + "pandas_gbq : None\n", + "pyarrow : None\n", + "pyxlsb : None\n", + "s3fs : None\n", + "scipy : 1.6.2\n", + "sqlalchemy : 1.4.7\n", + "tables : 3.6.1\n", + "tabulate : None\n", + "xarray : None\n", + "xlrd : 2.0.1\n", + "xlwt : 1.3.0\n", + "numba : 0.53.1\n" + ] + } + ], "source": [ "pd.show_versions()" ] @@ -112,7 +187,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -137,9 +212,113 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Index: 10 entries, a to j\n", + "Data columns (total 4 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 animal 10 non-null object \n", + " 1 age 8 non-null float64\n", + " 2 visits 10 non-null int64 \n", + " 3 priority 10 non-null object \n", + "dtypes: float64(1), int64(1), object(2)\n", + "memory usage: 400.0+ bytes\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
agevisits
count8.00000010.000000
mean3.4375001.900000
std2.0077970.875595
min0.5000001.000000
25%2.3750001.000000
50%3.0000002.000000
75%4.6250002.750000
max7.0000003.000000
\n", + "
" + ], + "text/plain": [ + " age visits\n", + "count 8.000000 10.000000\n", + "mean 3.437500 1.900000\n", + "std 2.007797 0.875595\n", + "min 0.500000 1.000000\n", + "25% 2.375000 1.000000\n", + "50% 3.000000 2.000000\n", + "75% 4.625000 2.750000\n", + "max 7.000000 3.000000" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df.info()\n", "\n", @@ -157,9 +336,74 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
animalagevisitspriority
acat2.51yes
bcat3.03yes
csnake0.52no
\n", + "
" + ], + "text/plain": [ + " animal age visits priority\n", + "a cat 2.5 1 yes\n", + "b cat 3.0 3 yes\n", + "c snake 0.5 2 no" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df.iloc[:3]\n", "\n", @@ -177,9 +421,108 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
animalage
acat2.5
bcat3.0
csnake0.5
ddogNaN
edog5.0
fcat2.0
gsnake4.5
hcatNaN
idog7.0
jdog3.0
\n", + "
" + ], + "text/plain": [ + " animal age\n", + "a cat 2.5\n", + "b cat 3.0\n", + "c snake 0.5\n", + "d dog NaN\n", + "e dog 5.0\n", + "f cat 2.0\n", + "g snake 4.5\n", + "h cat NaN\n", + "i dog 7.0\n", + "j dog 3.0" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df.loc[:, ['animal', 'age']]\n", "\n", @@ -197,9 +540,66 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
animalage
ddogNaN
edog5.0
idog7.0
\n", + "
" + ], + "text/plain": [ + " animal age\n", + "d dog NaN\n", + "e dog 5.0\n", + "i dog 7.0" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df.loc[df.index[[3, 4, 8]], ['animal', 'age']]" ] @@ -213,9 +613,52 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
animalagevisitspriority
\n", + "
" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: [animal, age, visits, priority]\n", + "Index: []" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df[df['visits'] > 3]" ] @@ -229,9 +672,66 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
animalagevisitspriority
ddogNaN3yes
hcatNaN1yes
\n", + "
" + ], + "text/plain": [ + " animal age visits priority\n", + "d dog NaN 3 yes\n", + "h cat NaN 1 yes" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df[df['age'].isnull()]" ] @@ -245,9 +745,66 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
animalagevisitspriority
acat2.51yes
fcat2.03no
\n", + "
" + ], + "text/plain": [ + " animal age visits priority\n", + "a cat 2.5 1 yes\n", + "f cat 2.0 3 no" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df[(df['animal'] == 'cat') & (df['age'] < 3)]" ] @@ -261,9 +818,82 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
animalagevisitspriority
acat2.51yes
bcat3.03yes
fcat2.03no
jdog3.01no
\n", + "
" + ], + "text/plain": [ + " animal age visits priority\n", + "a cat 2.5 1 yes\n", + "b cat 3.0 3 yes\n", + "f cat 2.0 3 no\n", + "j dog 3.0 1 no" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df[df['age'].between(2, 4)]" ] @@ -277,7 +907,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ @@ -293,9 +923,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "19" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df['visits'].sum()" ] @@ -309,9 +950,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "animal\n", + "cat 2.333333\n", + "dog 5.000000\n", + "snake 2.500000\n", + "Name: age, dtype: float64" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df.groupby('animal')['age'].mean()" ] @@ -325,7 +981,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [ @@ -345,9 +1001,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "dog 4\n", + "cat 4\n", + "snake 2\n", + "Name: animal, dtype: int64" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df['animal'].value_counts()" ] @@ -361,9 +1031,130 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
animalagevisitspriority
idog7.02no
edog5.02no
gsnake4.51no
jdog3.01no
bcat3.03yes
acat2.51yes
fcat1.53no
csnake0.52no
hcatNaN1yes
ddogNaN3yes
\n", + "
" + ], + "text/plain": [ + " animal age visits priority\n", + "i dog 7.0 2 no\n", + "e dog 5.0 2 no\n", + "g snake 4.5 1 no\n", + "j dog 3.0 1 no\n", + "b cat 3.0 3 yes\n", + "a cat 2.5 1 yes\n", + "f cat 1.5 3 no\n", + "c snake 0.5 2 no\n", + "h cat NaN 1 yes\n", + "d dog NaN 3 yes" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df.sort_values(by=['age', 'visits'], ascending=[False, True])" ] @@ -377,7 +1168,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, "metadata": {}, "outputs": [], "source": [ @@ -393,7 +1184,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 20, "metadata": {}, "outputs": [], "source": [ @@ -404,14 +1195,99 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "**21.** For each animal type and each number of visits, find the mean age. In other words, each row is an animal, each column is a number of visits and the values are the mean ages (*hint: use a pivot table*)." + "**20.1** Change datatype of age and visits column from object to float and int respectively." ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 23, "metadata": {}, "outputs": [], + "source": [ + "df['age'] = df['age'].astype(float)\n", + "df['visits'] = df['visits'].astype(int)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**21.** For each animal type and each number of visits, find the mean age. In other words, each row is an animal, each column is a number of visits and the values are the mean ages (*hint: use a pivot table*)." + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
visits123
animal
cat2.5NaN2.25
dog3.06.0NaN
python4.50.5NaN
\n", + "
" + ], + "text/plain": [ + "visits 1 2 3\n", + "animal \n", + "cat 2.5 NaN 2.25\n", + "dog 3.0 6.0 NaN\n", + "python 4.5 0.5 NaN" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df.pivot_table(index='animal', columns='visits', values='age', aggfunc='mean')" ] @@ -1680,7 +2556,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.4" + "version": "3.8.8" } }, "nbformat": 4,