Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Initial distributions of permanent income and mNrm #922

Merged
merged 7 commits into from
Jan 20, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions HARK/datasets/SCF/WealthIncomeDist/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Summary statistics of wealth and permanent income in the United States

# TODO

The summary statistics in `WealthIncomeStats.csv` are computed using the
Survey of Consumer Finances. The file can be replicated directly from the
unprocessed SCF summary files using the repository [SCF-IncWealthDist](https://github.com/Mv77/SCF-IncWealthDist),
created by [Mateo Velasquez-Giraldo](https://mv77.github.io/).
680 changes: 680 additions & 0 deletions HARK/datasets/SCF/WealthIncomeDist/WealthIncomeStats.csv

Large diffs are not rendered by default.

Empty file.
97 changes: 97 additions & 0 deletions HARK/datasets/SCF/WealthIncomeDist/parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
# -*- coding: utf-8 -*-
"""
Created on Fri Jan 8 15:36:14 2021
@author: Mateo
"""

import numpy as np
import pandas as pd
from warnings import warn
import os

scf_sumstats_dir = os.path.dirname(os.path.abspath(__file__))


def get_scf_distr_stats():
"""
"""

filename = os.path.join(scf_sumstats_dir, "WealthIncomeStats.csv")

# Read csv
table = pd.read_csv(filename, sep=",")

return table


def parse_scf_distr_stats(
age = None, education = None, year = None
):

# Pre-process year to make it a five-year bracket as in the table
if age is not None:

u_bound = int(np.ceil(age/5) * 5)
l_bound = u_bound - 5
age_bracket = '(' + str(l_bound) + ',' + str(u_bound) + ']'

else:

# If no year is given, use all years.
age_bracket = 'All'

# Check whether education is in one of the allowed categories
if education is not None:

message = ("If an education level is provided, it must be one of " +
"'NoHS', 'HS', or 'College'.")
assert education in ['NoHS','HS','College'], message

else:

education = 'All'

# Parse the year
year_str = 'All' if year is None else str(int(year))

# Read table
filename = os.path.join(scf_sumstats_dir, "WealthIncomeStats.csv")

# Read csv
table = pd.read_csv(filename, sep=",",
index_col = ['Educ','YEAR','Age_grp'],
dtype = {'Educ': str,'YEAR': str,'Age_grp': str})

# Try to access the requested combination
try:

row = table.loc[(education, year_str, age_bracket)]

except KeyError as e:

message = ("The summary statistics do not contain the "+
"Age/Year/Education combination that was requested.")
raise Exception(message).with_traceback(e.__traceback__)

# Check for NAs
if any(row.isna()):
warn("There were not enough observations in the requested " +
"Age/Year/Education combination to compute all summary" +
"statistics.")

return row.to_dict()

def income_wealth_dists_from_scf(age = None, education = None, year = None):

stats = parse_scf_distr_stats(age, education, year)

param_dict = {
'aNrmInitMean' : stats['lnNrmWealth.mean'], # Mean of log initial assets (only matters for simulation)
'aNrmInitStd' : stats['lnNrmWealth.sd'], # Standard deviation of log initial assets (only for simulation)
'pLvlInitMean' : stats['lnPermIncome.mean'],# Mean of log initial permanent income (only matters for simulation)
'pLvlInitStd' : stats['lnPermIncome.sd'], # Standard deviation of log initial permanent income (only matters for simulation)
}

return param_dict
Empty file added HARK/datasets/SCF/__init__.py
Empty file.
39 changes: 39 additions & 0 deletions examples/Calibration/SCF_distributions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# -*- coding: utf-8 -*-
"""
Created on Mon Jan 18 13:57:50 2021
@author: Mateo
"""

from HARK.datasets.SCF.WealthIncomeDist.parser import income_wealth_dists_from_scf
import seaborn as sns
from itertools import product, starmap
import pandas as pd

# List the education levels and years
educ_lvls = ['NoHS', 'HS', 'College']
years = list(range(1995,2022,3))

age = 25

# %% Get the distribution of aNrm and pLvl at each year x education
params = list(product([age],educ_lvls, years))
age, education, year = list(zip(*params))

frame = pd.DataFrame({'age': age, 'education': education, 'year': year})

results = list(starmap(income_wealth_dists_from_scf, params))
frame = pd.concat([frame, pd.DataFrame(results)], axis = 1)

# %% Plot time trends at different education levels.

# Formatting
frame = frame.melt(id_vars = ['age', 'education','year'])
aux = frame["variable"].str.split("(Mean|Std)", n = 1, expand = True)
frame["variable"] = aux[0]
frame["stat"] = aux[1]

# Plot
g = sns.FacetGrid(frame, col="stat", row = "variable", hue="education", sharey = True)
g.map(sns.scatterplot, "year", "value", alpha=.7)
g.add_legend()