-
-
Notifications
You must be signed in to change notification settings - Fork 199
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
ConditionalDistribution #1018
ConditionalDistribution #1018
Changes from all commits
e8fbd69
8a8d67f
1a9b874
81e44a8
2d03dec
68fdcbb
a64b64e
2f11c4d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,6 +15,7 @@ | |
|
||
from HARK.distribution import ( | ||
combine_indep_dstns, | ||
IndexDistribution, | ||
Lognormal, | ||
Bernoulli, | ||
) | ||
|
@@ -90,13 +91,17 @@ def update_RiskyDstn(self): | |
# Generate a discrete approximation to the risky return distribution if the | ||
# agent has age-varying beliefs about the risky asset | ||
if "RiskyAvg" in self.time_vary: | ||
self.RiskyDstn = [] | ||
for t in range(self.T_cycle): | ||
self.RiskyDstn.append( | ||
Lognormal.from_mean_std(self.RiskyAvg[t], self.RiskyStd[t]).approx( | ||
self.RiskyCount | ||
) | ||
) | ||
self.RiskyDstn = IndexDistribution( | ||
Lognormal.from_mean_std, | ||
{ | ||
'mean' : self.RiskyAvg, | ||
'std' : self.RiskyStd | ||
}, | ||
seed=self.RNG.randint(0, 2 ** 31 - 1) | ||
).approx( | ||
self.RiskyCount | ||
) | ||
|
||
self.add_to_time_vary("RiskyDstn") | ||
|
||
# Generate a discrete approximation to the risky return distribution if the | ||
|
@@ -202,25 +207,11 @@ def get_Adjust(self): | |
------- | ||
None | ||
""" | ||
if not ("AdjustPrb" in self.time_vary): | ||
|
||
self.shocks["Adjust"] = Bernoulli( | ||
self.AdjustPrb, seed=self.RNG.randint(0, 2 ** 31 - 1) | ||
).draw(self.AgentCount) | ||
|
||
else: | ||
|
||
Adjust = np.zeros(self.AgentCount) # Initialize shock array | ||
for t in range(self.T_cycle): | ||
these = t == self.t_cycle | ||
N = np.sum(these) | ||
if N > 0: | ||
AdjustPrb = self.AdjustPrb[t - 1] | ||
Adjust[these] = Bernoulli( | ||
AdjustPrb, seed=self.RNG.randint(0, 2 ** 31 - 1) | ||
).draw(N) | ||
|
||
self.shocks["Adjust"] = Adjust | ||
self.shocks['Adjust'] = IndexDistribution( | ||
Bernoulli, | ||
{'p' : self.AdjustPrb}, | ||
seed=self.RNG.randint(0, 2 ** 31 - 1) | ||
).draw(self.t_cycle) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @sbenthall thinking about indices more carefully now, I think that according to the previous code and time indexing of solutions vs simulations this should be There is still the question of why would newborns use the final parameters ( There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes, that's right. In python, an index of -1 gets you the last element of a list/collection. so correcting this means working in the extra 'newborn' logic. I think it would be better to figure out how to fix #1022 across the board. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Well yes, newborns are a separate issue. But the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm. Prior to this PR:
For agents of age t, use AdjustPr[t -1]. If t = 0, this would use AdjustPrb[-1], the last AdjustPrb. After #1018 merge:
Now the draws are "off by one". Either way, there is a bug! But this was a case that nobody bothered writing a test for. Whoever wrote the original AdjustPrb simulation code forgot to resolve the newborn case. Now the question is: should we put the special newborn case in everywhere that it's been left out? My preference is to fix #1022 instead. But your point is well-taken that this is a problem with the implementation at the moment. I'll make a separate issue for it. |
||
|
||
def initialize_sim(self): | ||
""" | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,6 +8,8 @@ | |
|
||
class Distribution: | ||
""" | ||
Base class for all probability distributions. | ||
|
||
Parameters | ||
---------- | ||
seed : int | ||
|
@@ -27,6 +29,147 @@ def reset(self): | |
""" | ||
self.RNG = np.random.RandomState(self.seed) | ||
|
||
class IndexDistribution(Distribution): | ||
""" | ||
This class provides a way to define a distribution that | ||
is conditional on an index. | ||
|
||
The current implementation combines a defined distribution | ||
class (such as Bernoulli, LogNormal, etc.) with information | ||
about the conditions on the parameters of the distribution. | ||
|
||
For example, an IndexDistribution can be defined as | ||
a Bernoulli distribution whose parameter p is a function of | ||
a different inpute parameter. | ||
|
||
Parameters | ||
---------- | ||
|
||
engine : Distribution class | ||
A Distribution subclass. | ||
|
||
conditional: dict | ||
Information about the conditional variation | ||
on the input parameters of the engine distribution. | ||
Keys should match the arguments to the engine class | ||
constructor. | ||
|
||
seed : int | ||
Seed for random number generator. | ||
""" | ||
conditional = None | ||
engine = None | ||
|
||
def __init__(self, engine, conditional, seed = 0): | ||
# Set up the RNG | ||
super().__init__(seed) | ||
|
||
self.conditional = conditional | ||
self.engine = engine | ||
|
||
def __getitem__(self, y): | ||
# test one item to determine case handling | ||
item0 = list(self.conditional.values())[0] | ||
|
||
if type(item0) is list: | ||
cond = {key : val[y] for (key, val) in self.conditional.items()} | ||
return self.engine( | ||
seed=self.RNG.randint(0, 2 ** 31 - 1), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Re: the previous comment I see that might not be the case. The seed you pass gets used to draw the (index-varying) seeds of every item. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I'm glad you are sensitive to this. I'm not 100% sure I have done this correctly. I was trying to adapt to the seed-handling techniques that @mnwhite has coded into HARK. I'm somewhat surprised that I got the tests to pass without modifying them, to be honest, but it appears I guessed well as to how to adapt it over. Maybe a different, more extensive test could verify that there is not a problem here. Maybe: test the conditional distribution with two cases, but where the numerical values of the inputs in each case (say, mu and sigma), are equal. Then test to make sure samples drawn from the two cases are different? I'll code that up now and add it. |
||
**cond | ||
) | ||
else: | ||
raise(Exception( | ||
f"IndexDistribution: Unhandled case for __getitem__ access. y: {y}; conditional: {conditional}" | ||
)) | ||
|
||
def approx(self, N, **kwds): | ||
""" | ||
Approximation of the distribution. | ||
|
||
Parameters | ||
---------- | ||
N : init | ||
Number of discrete points to approximate | ||
continuous distribution into. | ||
|
||
kwds: dict | ||
Other keyword arguments passed to engine | ||
distribution approx() method. | ||
|
||
Returns: | ||
------------ | ||
dists : [DiscreteDistribution] | ||
A list of DiscreteDistributions that are the | ||
approximation of engine distribution under each condition. | ||
|
||
TODO: It would be better if there were a conditional discrete | ||
distribution representation. But that integrates with the | ||
solution code. This implementation will return the list of | ||
distributions representations expected by the solution code. | ||
""" | ||
|
||
# test one item to determine case handling | ||
item0 = list(self.conditional.values())[0] | ||
|
||
if type(item0) is float: | ||
# degenerate case. Treat the parameterization as constant. | ||
return self.engine( | ||
seed = self.RNG.randint(0, 2 ** 31 - 1), | ||
**self.conditional | ||
).approx(N,**kwds) | ||
|
||
if type(item0) is list: | ||
return [self[i].approx(N,**kwds) for i, _ in enumerate(item0)] | ||
|
||
|
||
def draw(self, condition): | ||
""" | ||
Generate arrays of draws. | ||
The input is an array containing the conditions. | ||
The output is an array of the same length (axis 1 dimension) | ||
as the conditions containing random draws of the conditional | ||
distribution. | ||
|
||
Parameters | ||
---------- | ||
condition : np.array | ||
The input conditions to the distribution. | ||
|
||
Returns: | ||
------------ | ||
draws : np.array | ||
""" | ||
# for now, assume that all the conditionals | ||
# are of the same type. | ||
# this matches the HARK 'time-varying' model architecture. | ||
|
||
# test one item to determine case handling | ||
item0 = list(self.conditional.values())[0] | ||
|
||
if type(item0) is float: | ||
# degenerate case. Treat the parameterization as constant. | ||
N = condition.size | ||
|
||
return self.engine( | ||
seed = self.RNG.randint(0, 2 ** 31 - 1), | ||
**self.conditional | ||
).draw(N) | ||
|
||
if type(item0) is list: | ||
# conditions are indices into list | ||
# somewhat convoluted sampling strategy retained | ||
# for test backwards compatibility | ||
|
||
draws = np.zeros(condition.size) | ||
|
||
for c in np.unique(condition): | ||
these = c == condition | ||
N = np.sum(these) | ||
|
||
cond = {key : val[c] for (key, val) in self.conditional.items()} | ||
draws[these] = self[c].draw(N) | ||
|
||
return draws | ||
|
||
### CONTINUOUS DISTRIBUTIONS | ||
|
||
|
@@ -64,7 +207,7 @@ def __init__(self, mu=0.0, sigma=1.0, seed=0): | |
|
||
def draw(self, N): | ||
""" | ||
Generate arrays of lognormal draws. The sigma input can be a number | ||
Generate arrays of lognormal draws. The sigma parameter can be a number | ||
or list-like. If a number, output is a length N array of draws from the | ||
lognormal distribution with standard deviation sigma. If a list, output is | ||
a length T list whose t-th entry is a length N array of draws from the | ||
|
@@ -74,8 +217,6 @@ def draw(self, N): | |
---------- | ||
N : int | ||
Number of draws in each row. | ||
seed : int | ||
Seed for random number generator. | ||
|
||
Returns: | ||
------------ | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If I understand the code right, you are using a single seed to draw all the shocks. The previous implementation seems to draw a different seed for every "t".
Does this have any perverse RNG implication?