Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

a not-too-general value backwards induction algorithm built on DBlocks #1438

Open
wants to merge 20 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Documentation/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ Release Date: TBD
- Introduces `HARK.parser' module for parsing configuration files into models [#1427](https://github.com/econ-ark/HARK/pull/1427)
- Allows construction of shocks with arguments based on mathematical expressions [#1464](https://github.com/econ-ark/HARK/pull/1464)
- YAML configuration file for the normalized consumption and portolio choice [#1465](https://github.com/econ-ark/HARK/pull/1465)
- Introduces value backup algorithm on DBlocks [#1438]https://github.com/econ-ark/HARK/pull/1438)

#### Minor Changes

Expand Down
1 change: 1 addition & 0 deletions Documentation/reference/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ API Reference
:caption: Tools
:maxdepth: 1

tools/algos
tools/core
tools/dcegm
tools/distribution
Expand Down
7 changes: 7 additions & 0 deletions Documentation/reference/tools/algos.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
algos
--------

.. toctree::
:maxdepth: 3

algos/vbi
7 changes: 7 additions & 0 deletions Documentation/reference/tools/algos/vbi.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
algos.vbi
----------

.. automodule:: HARK.algos.vbi
:members:
:undoc-members:
:show-inheritance:
27 changes: 27 additions & 0 deletions Documentation/reference/tools/index.rst.orig
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
:orphan:

Tools
=====

<<<<<<< HEAD
.. toctree::
:maxdepth: 3

algos
core
dcegm
distribution
econforgeinterp
estimation
frame
helpers
interpolation
numba
parallel
rewards
simulation
utilities
validators
=======
See :doc:`../index`.
>>>>>>> master
Empty file added HARK/algos/__init__.py
Empty file.
70 changes: 70 additions & 0 deletions HARK/algos/tests/test_vbi.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import HARK.algos.vbi as vbi
from HARK.distribution import Bernoulli
from HARK.model import Control, DBlock
import HARK.models.consumer as cons
import numpy as np
import unittest


block_1 = DBlock(
**{
"name": "vbi_test_1",
"shocks": {
"coin": Bernoulli(p=0.5),
},
"dynamics": {
"m": lambda y, coin: y + coin,
"c": Control(["m"], lower_bound=lambda m: 0, upper_bound=lambda m: m),
"a": lambda m, c: m - c,
},
"reward": {"u": lambda c: 1 - (c - 1) ** 2},
}
)

block_2 = DBlock( # has no control variable
**{
"name": "vbi_test_1",
"shocks": {
"coin": Bernoulli(p=0.5),
},
"dynamics": {
"m": lambda y, coin: y + coin,
"a": lambda m: m - 1,
},
"reward": {"u": lambda m: 0},
}
)


class test_vbi(unittest.TestCase):
# def setUp(self):
# pass

def test_solve_block_1(self):
state_grid = {"m": np.linspace(0, 2, 10)}

dr, dec_vf, arr_vf = vbi.solve(block_1, lambda a: a, state_grid)

self.assertAlmostEqual(dr["c"](**{"m": 1}), 0.5)

def test_solve_block_2(self):
# no control variable case.
state_grid = {"m": np.linspace(0, 2, 10)}

dr, dec_vf, arr_vf = vbi.solve(block_2, lambda a: a, state_grid)

# arrival value function gives the correct expect value of continuation
self.assertAlmostEqual(arr_vf({"y": 10}), 9.5)

def test_solve_consumption_problem(self):
state_grid = {"m": np.linspace(0, 5, 10)}

dr, dec_vf, arr_vf = vbi.solve(
cons.consumption_block_normalized,
lambda a: 0,
state_grid,
disc_params={"theta": {"N": 7}},
calibration=cons.calibration,
)

self.assertAlmostEqual(dr["c"](**{"m": 1.5}), 1.5)
182 changes: 182 additions & 0 deletions HARK/algos/vbi.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
"""
Use backwards induction to derive the arrival value function
from a continuation value function and stage dynamics.
"""

from HARK.model import DBlock
from inspect import signature
import itertools
import numpy as np
from scipy.optimize import minimize
from typing import Mapping, Sequence
import xarray as xr


def get_action_rule(action):
"""
Produce a function from any inputs to a given value.
This is useful for constructing decision rules with fixed actions.
"""

def ar():
return action

return ar


def ar_from_data(da):
"""
Produce a function from any inputs to a given value.
This is useful for constructing decision rules with fixed actions.
"""

def ar(**args):
return da.interp(**args).values.tolist()

return ar


Grid = Mapping[str, Sequence]


def grid_to_data_array(
grid: Grid = {}, ## TODO: Better data structure here.
):
"""
Construct a zero-valued DataArray with the coordinates
based on the Grid passed in.

Parameters
----------
grid: Grid
A mapping from variable labels to a sequence of numerical values.

Returns
--------
da xarray.DataArray
An xarray.DataArray with coordinates given by both grids.
"""

coords = {**grid}

da = xr.DataArray(
np.empty([len(v) for v in coords.values()]), dims=coords.keys(), coords=coords
)

return da


def solve(
block: DBlock, continuation, state_grid: Grid, disc_params={}, calibration={}
):
"""
Solve a DBlock using backwards induction on the value function.

Parameters
-----------
block
continuation

state_grid: Grid
This is a grid over all variables that the optimization will range over.
This should be just the information set of the decision variables.

disc_params
calibration
"""

# state-rule value function
srv_function = block.get_state_rule_value_function_from_continuation(
continuation, screen=True
)

controls = block.get_controls()

# pseudo
policy_data = grid_to_data_array(state_grid)
value_data = grid_to_data_array(state_grid)

# loop through every point in the state grid
for state_point in itertools.product(*state_grid.values()):
# build a dictionary from these states, as scope for the optimization
state_vals = {k: v for k, v in zip(state_grid.keys(), state_point)}

# The value of the action is computed given
# the problem calibration and the states for the current point on the
# state-grid.
pre_states = calibration.copy()
pre_states.update(state_vals)

# prepare function to optimize
def negated_value(a): # old! (should be negative)
dr = {c: get_action_rule(a[i]) for i, c in enumerate(controls)}

# negative, for minimization later
return -srv_function(pre_states, dr)

if len(controls) == 0:
# if no controls, no optimization is necessary
pass
elif len(controls) == 1:
## get lower bound.
## assumes only one control currently
lower_bound = -1e-6 ## a really low number!
feq = block.dynamics[controls[0]].lower_bound
if feq is not None:
lower_bound = feq(
*[pre_states[var] for var in signature(feq).parameters]
)

## get upper bound
## assumes only one control currently
upper_bound = 1e-12 # a very high number
feq = block.dynamics[controls[0]].upper_bound
if feq is not None:
upper_bound = feq(
*[pre_states[var] for var in signature(feq).parameters]
)

bounds = ((lower_bound, upper_bound),)

res = minimize( # choice of
negated_value,
1, # x0 is starting guess, here arbitrary.
bounds=bounds,
)

dr_best = {c: get_action_rule(res.x[i]) for i, c in enumerate(controls)}

if res.success:
policy_data.sel(**state_vals).variable.data.put(
0, res.x[0]
) # will only work for scalar actions
value_data.sel(**state_vals).variable.data.put(
0, srv_function(pre_states, dr_best)
)
else:
print(f"Optimization failure at {state_vals}.")
print(res)

dr_best = {c: get_action_rule(res.x[i]) for i, c in enumerate(controls)}

policy_data.sel(**state_vals).variable.data.put(0, res.x[0]) # ?
value_data.sel(**state_vals).variable.data.put(
0, srv_function(pre_states, dr_best)
)
elif len(controls) > 1:
raise Exception(
f"Value backup iteration is not yet implemented for stages with {len(controls)} > 1 control variables."
)

# use the xarray interpolator to create a decision rule.
dr_from_data = {
c: ar_from_data(
policy_data
) # maybe needs to be more sensitive to the information set
for i, c in enumerate(controls)
}

dec_vf = block.get_decision_value_function(dr_from_data, continuation)
arr_vf = block.get_arrival_value_function(disc_params, dr_from_data, continuation)

return dr_from_data, dec_vf, arr_vf
Loading
Loading