Skip to content

Commit

Permalink
Merge pull request #922 from Mv77/Calibration/InitDist
Browse files Browse the repository at this point in the history
[WIP] Initial distributions of permanent income and mNrm
  • Loading branch information
llorracc authored Jan 20, 2021
2 parents 2d7b8ab + a40acc0 commit ce3785a
Show file tree
Hide file tree
Showing 6 changed files with 824 additions and 0 deletions.
8 changes: 8 additions & 0 deletions HARK/datasets/SCF/WealthIncomeDist/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Summary statistics of wealth and permanent income in the United States

# TODO

The summary statistics in `WealthIncomeStats.csv` are computed using the
Survey of Consumer Finances. The file can be replicated directly from the
unprocessed SCF summary files using the repository [SCF-IncWealthDist](https://github.com/Mv77/SCF-IncWealthDist),
created by [Mateo Velasquez-Giraldo](https://mv77.github.io/).
680 changes: 680 additions & 0 deletions HARK/datasets/SCF/WealthIncomeDist/WealthIncomeStats.csv

Large diffs are not rendered by default.

Empty file.
97 changes: 97 additions & 0 deletions HARK/datasets/SCF/WealthIncomeDist/parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
# -*- coding: utf-8 -*-
"""
Created on Fri Jan 8 15:36:14 2021
@author: Mateo
"""

import numpy as np
import pandas as pd
from warnings import warn
import os

scf_sumstats_dir = os.path.dirname(os.path.abspath(__file__))


def get_scf_distr_stats():
"""
"""

filename = os.path.join(scf_sumstats_dir, "WealthIncomeStats.csv")

# Read csv
table = pd.read_csv(filename, sep=",")

return table


def parse_scf_distr_stats(
age = None, education = None, year = None
):

# Pre-process year to make it a five-year bracket as in the table
if age is not None:

u_bound = int(np.ceil(age/5) * 5)
l_bound = u_bound - 5
age_bracket = '(' + str(l_bound) + ',' + str(u_bound) + ']'

else:

# If no year is given, use all years.
age_bracket = 'All'

# Check whether education is in one of the allowed categories
if education is not None:

message = ("If an education level is provided, it must be one of " +
"'NoHS', 'HS', or 'College'.")
assert education in ['NoHS','HS','College'], message

else:

education = 'All'

# Parse the year
year_str = 'All' if year is None else str(int(year))

# Read table
filename = os.path.join(scf_sumstats_dir, "WealthIncomeStats.csv")

# Read csv
table = pd.read_csv(filename, sep=",",
index_col = ['Educ','YEAR','Age_grp'],
dtype = {'Educ': str,'YEAR': str,'Age_grp': str})

# Try to access the requested combination
try:

row = table.loc[(education, year_str, age_bracket)]

except KeyError as e:

message = ("The summary statistics do not contain the "+
"Age/Year/Education combination that was requested.")
raise Exception(message).with_traceback(e.__traceback__)

# Check for NAs
if any(row.isna()):
warn("There were not enough observations in the requested " +
"Age/Year/Education combination to compute all summary" +
"statistics.")

return row.to_dict()

def income_wealth_dists_from_scf(age = None, education = None, year = None):

stats = parse_scf_distr_stats(age, education, year)

param_dict = {
'aNrmInitMean' : stats['lnNrmWealth.mean'], # Mean of log initial assets (only matters for simulation)
'aNrmInitStd' : stats['lnNrmWealth.sd'], # Standard deviation of log initial assets (only for simulation)
'pLvlInitMean' : stats['lnPermIncome.mean'],# Mean of log initial permanent income (only matters for simulation)
'pLvlInitStd' : stats['lnPermIncome.sd'], # Standard deviation of log initial permanent income (only matters for simulation)
}

return param_dict
Empty file added HARK/datasets/SCF/__init__.py
Empty file.
39 changes: 39 additions & 0 deletions examples/Calibration/SCF_distributions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# -*- coding: utf-8 -*-
"""
Created on Mon Jan 18 13:57:50 2021
@author: Mateo
"""

from HARK.datasets.SCF.WealthIncomeDist.parser import income_wealth_dists_from_scf
import seaborn as sns
from itertools import product, starmap
import pandas as pd

# List the education levels and years
educ_lvls = ['NoHS', 'HS', 'College']
years = list(range(1995,2022,3))

age = 25

# %% Get the distribution of aNrm and pLvl at each year x education
params = list(product([age],educ_lvls, years))
age, education, year = list(zip(*params))

frame = pd.DataFrame({'age': age, 'education': education, 'year': year})

results = list(starmap(income_wealth_dists_from_scf, params))
frame = pd.concat([frame, pd.DataFrame(results)], axis = 1)

# %% Plot time trends at different education levels.

# Formatting
frame = frame.melt(id_vars = ['age', 'education','year'])
aux = frame["variable"].str.split("(Mean|Std)", n = 1, expand = True)
frame["variable"] = aux[0]
frame["stat"] = aux[1]

# Plot
g = sns.FacetGrid(frame, col="stat", row = "variable", hue="education", sharey = True)
g.map(sns.scatterplot, "year", "value", alpha=.7)
g.add_legend()

0 comments on commit ce3785a

Please sign in to comment.