-
-
Notifications
You must be signed in to change notification settings - Fork 199
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #922 from Mv77/Calibration/InitDist
[WIP] Initial distributions of permanent income and mNrm
- Loading branch information
Showing
6 changed files
with
824 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
# Summary statistics of wealth and permanent income in the United States | ||
|
||
# TODO | ||
|
||
The summary statistics in `WealthIncomeStats.csv` are computed using the | ||
Survey of Consumer Finances. The file can be replicated directly from the | ||
unprocessed SCF summary files using the repository [SCF-IncWealthDist](https://github.com/Mv77/SCF-IncWealthDist), | ||
created by [Mateo Velasquez-Giraldo](https://mv77.github.io/). |
680 changes: 680 additions & 0 deletions
680
HARK/datasets/SCF/WealthIncomeDist/WealthIncomeStats.csv
Large diffs are not rendered by default.
Oops, something went wrong.
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
# -*- coding: utf-8 -*- | ||
""" | ||
Created on Fri Jan 8 15:36:14 2021 | ||
@author: Mateo | ||
""" | ||
|
||
import numpy as np | ||
import pandas as pd | ||
from warnings import warn | ||
import os | ||
|
||
scf_sumstats_dir = os.path.dirname(os.path.abspath(__file__)) | ||
|
||
|
||
def get_scf_distr_stats(): | ||
""" | ||
""" | ||
|
||
filename = os.path.join(scf_sumstats_dir, "WealthIncomeStats.csv") | ||
|
||
# Read csv | ||
table = pd.read_csv(filename, sep=",") | ||
|
||
return table | ||
|
||
|
||
def parse_scf_distr_stats( | ||
age = None, education = None, year = None | ||
): | ||
|
||
# Pre-process year to make it a five-year bracket as in the table | ||
if age is not None: | ||
|
||
u_bound = int(np.ceil(age/5) * 5) | ||
l_bound = u_bound - 5 | ||
age_bracket = '(' + str(l_bound) + ',' + str(u_bound) + ']' | ||
|
||
else: | ||
|
||
# If no year is given, use all years. | ||
age_bracket = 'All' | ||
|
||
# Check whether education is in one of the allowed categories | ||
if education is not None: | ||
|
||
message = ("If an education level is provided, it must be one of " + | ||
"'NoHS', 'HS', or 'College'.") | ||
assert education in ['NoHS','HS','College'], message | ||
|
||
else: | ||
|
||
education = 'All' | ||
|
||
# Parse the year | ||
year_str = 'All' if year is None else str(int(year)) | ||
|
||
# Read table | ||
filename = os.path.join(scf_sumstats_dir, "WealthIncomeStats.csv") | ||
|
||
# Read csv | ||
table = pd.read_csv(filename, sep=",", | ||
index_col = ['Educ','YEAR','Age_grp'], | ||
dtype = {'Educ': str,'YEAR': str,'Age_grp': str}) | ||
|
||
# Try to access the requested combination | ||
try: | ||
|
||
row = table.loc[(education, year_str, age_bracket)] | ||
|
||
except KeyError as e: | ||
|
||
message = ("The summary statistics do not contain the "+ | ||
"Age/Year/Education combination that was requested.") | ||
raise Exception(message).with_traceback(e.__traceback__) | ||
|
||
# Check for NAs | ||
if any(row.isna()): | ||
warn("There were not enough observations in the requested " + | ||
"Age/Year/Education combination to compute all summary" + | ||
"statistics.") | ||
|
||
return row.to_dict() | ||
|
||
def income_wealth_dists_from_scf(age = None, education = None, year = None): | ||
|
||
stats = parse_scf_distr_stats(age, education, year) | ||
|
||
param_dict = { | ||
'aNrmInitMean' : stats['lnNrmWealth.mean'], # Mean of log initial assets (only matters for simulation) | ||
'aNrmInitStd' : stats['lnNrmWealth.sd'], # Standard deviation of log initial assets (only for simulation) | ||
'pLvlInitMean' : stats['lnPermIncome.mean'],# Mean of log initial permanent income (only matters for simulation) | ||
'pLvlInitStd' : stats['lnPermIncome.sd'], # Standard deviation of log initial permanent income (only matters for simulation) | ||
} | ||
|
||
return param_dict |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
# -*- coding: utf-8 -*- | ||
""" | ||
Created on Mon Jan 18 13:57:50 2021 | ||
@author: Mateo | ||
""" | ||
|
||
from HARK.datasets.SCF.WealthIncomeDist.parser import income_wealth_dists_from_scf | ||
import seaborn as sns | ||
from itertools import product, starmap | ||
import pandas as pd | ||
|
||
# List the education levels and years | ||
educ_lvls = ['NoHS', 'HS', 'College'] | ||
years = list(range(1995,2022,3)) | ||
|
||
age = 25 | ||
|
||
# %% Get the distribution of aNrm and pLvl at each year x education | ||
params = list(product([age],educ_lvls, years)) | ||
age, education, year = list(zip(*params)) | ||
|
||
frame = pd.DataFrame({'age': age, 'education': education, 'year': year}) | ||
|
||
results = list(starmap(income_wealth_dists_from_scf, params)) | ||
frame = pd.concat([frame, pd.DataFrame(results)], axis = 1) | ||
|
||
# %% Plot time trends at different education levels. | ||
|
||
# Formatting | ||
frame = frame.melt(id_vars = ['age', 'education','year']) | ||
aux = frame["variable"].str.split("(Mean|Std)", n = 1, expand = True) | ||
frame["variable"] = aux[0] | ||
frame["stat"] = aux[1] | ||
|
||
# Plot | ||
g = sns.FacetGrid(frame, col="stat", row = "variable", hue="education", sharey = True) | ||
g.map(sns.scatterplot, "year", "value", alpha=.7) | ||
g.add_legend() |