econ-ark · sbenthall · Jun 3, 2024 · Jun 3, 2024 · Jun 3, 2024 · Jun 3, 2024
diff --git a/Documentation/CHANGELOG.md b/Documentation/CHANGELOG.md
@@ -19,6 +19,7 @@ Release Date: TBD
 - Introduces `HARK.parser' module for parsing configuration files into models [#1427](https://github.com/econ-ark/HARK/pull/1427)
 - Allows construction of shocks with arguments based on mathematical expressions [#1464](https://github.com/econ-ark/HARK/pull/1464)
 - YAML configuration file for the normalized consumption and portolio choice [#1465](https://github.com/econ-ark/HARK/pull/1465)
+- Introduces value backup algorithm on DBlocks [#1438]https://github.com/econ-ark/HARK/pull/1438)
 
 #### Minor Changes
 

diff --git a/Documentation/reference/index.rst b/Documentation/reference/index.rst
@@ -5,6 +5,7 @@ API Reference
    :caption: Tools
    :maxdepth: 1
 
+   tools/algos
    tools/core
    tools/dcegm
    tools/distribution

diff --git a/Documentation/reference/tools/algos.rst b/Documentation/reference/tools/algos.rst
@@ -0,0 +1,7 @@
+algos
+--------
+
+.. toctree::
+   :maxdepth: 3
+
+   algos/vbi
diff --git a/Documentation/reference/tools/algos/vbi.rst b/Documentation/reference/tools/algos/vbi.rst
@@ -0,0 +1,7 @@
+algos.vbi
+----------
+
+.. automodule:: HARK.algos.vbi
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/Documentation/reference/tools/index.rst.orig b/Documentation/reference/tools/index.rst.orig
@@ -0,0 +1,27 @@
+:orphan:
+
+Tools
+=====
+
+<<<<<<< HEAD
+.. toctree::
+   :maxdepth: 3
+
+   algos
+   core
+   dcegm
+   distribution
+   econforgeinterp
+   estimation
+   frame
+   helpers
+   interpolation
+   numba
+   parallel
+   rewards
+   simulation
+   utilities
+   validators
+=======
+See :doc:`../index`.
+>>>>>>> master
diff --git a/HARK/algos/__init__.py b/HARK/algos/__init__.py
diff --git a/HARK/algos/tests/test_vbi.py b/HARK/algos/tests/test_vbi.py
@@ -0,0 +1,70 @@
+import HARK.algos.vbi as vbi
+from HARK.distribution import Bernoulli
+from HARK.model import Control, DBlock
+import HARK.models.consumer as cons
+import numpy as np
+import unittest
+
+
+block_1 = DBlock(
+    **{
+        "name": "vbi_test_1",
+        "shocks": {
+            "coin": Bernoulli(p=0.5),
+        },
+        "dynamics": {
+            "m": lambda y, coin: y + coin,
+            "c": Control(["m"], lower_bound=lambda m: 0, upper_bound=lambda m: m),
+            "a": lambda m, c: m - c,
+        },
+        "reward": {"u": lambda c: 1 - (c - 1) ** 2},
+    }
+)
+
+block_2 = DBlock(  # has no control variable
+    **{
+        "name": "vbi_test_1",
+        "shocks": {
+            "coin": Bernoulli(p=0.5),
+        },
+        "dynamics": {
+            "m": lambda y, coin: y + coin,
+            "a": lambda m: m - 1,
+        },
+        "reward": {"u": lambda m: 0},
+    }
+)
+
+
+class test_vbi(unittest.TestCase):
+    # def setUp(self):
+    #    pass
+
+    def test_solve_block_1(self):
+        state_grid = {"m": np.linspace(0, 2, 10)}
+
+        dr, dec_vf, arr_vf = vbi.solve(block_1, lambda a: a, state_grid)
+
+        self.assertAlmostEqual(dr["c"](**{"m": 1}), 0.5)
+
+    def test_solve_block_2(self):
+        # no control variable case.
+        state_grid = {"m": np.linspace(0, 2, 10)}
+
+        dr, dec_vf, arr_vf = vbi.solve(block_2, lambda a: a, state_grid)
+
+        # arrival value function gives the correct expect value of continuation
+        self.assertAlmostEqual(arr_vf({"y": 10}), 9.5)
+
+    def test_solve_consumption_problem(self):
+        state_grid = {"m": np.linspace(0, 5, 10)}
+
+        dr, dec_vf, arr_vf = vbi.solve(
+            cons.consumption_block_normalized,
+            lambda a: 0,
+            state_grid,
+            disc_params={"theta": {"N": 7}},
+            calibration=cons.calibration,
+        )
+
+        self.assertAlmostEqual(dr["c"](**{"m": 1.5}), 1.5)
diff --git a/HARK/algos/vbi.py b/HARK/algos/vbi.py
@@ -0,0 +1,182 @@
+"""
+Use backwards induction to derive the arrival value function
+from a continuation value function and stage dynamics.
+"""
+
+from HARK.model import DBlock
+from inspect import signature
+import itertools
+import numpy as np
+from scipy.optimize import minimize
+from typing import Mapping, Sequence
+import xarray as xr
+
+
+def get_action_rule(action):
+    """
+    Produce a function from any inputs to a given value.
+    This is useful for constructing decision rules with fixed actions.
+    """
+
+    def ar():
+        return action
+
+    return ar
+
+
+def ar_from_data(da):
+    """
+    Produce a function from any inputs to a given value.
+    This is useful for constructing decision rules with fixed actions.
+    """
+
+    def ar(**args):
+        return da.interp(**args).values.tolist()
+
+    return ar
+
+
+Grid = Mapping[str, Sequence]
+
+
+def grid_to_data_array(
+    grid: Grid = {},  ## TODO: Better data structure here.
+):
+    """
+    Construct a zero-valued DataArray with the coordinates
+    based on the Grid passed in.
+
+    Parameters
+    ----------
+    grid: Grid
+        A mapping from variable labels to a sequence of numerical values.
+
+    Returns
+    --------
+    da xarray.DataArray
+        An xarray.DataArray with coordinates given by both grids.
+    """
+
+    coords = {**grid}
+
+    da = xr.DataArray(
+        np.empty([len(v) for v in coords.values()]), dims=coords.keys(), coords=coords
+    )
+
+    return da
+
+
+def solve(
+    block: DBlock, continuation, state_grid: Grid, disc_params={}, calibration={}
+):
+    """
+    Solve a DBlock using backwards induction on the value function.
+
+    Parameters
+    -----------
+    block
+    continuation
+
+    state_grid: Grid
+        This is a grid over all variables that the optimization will range over.
+        This should be just the information set of the decision variables.
+
+    disc_params
+    calibration
+    """
+
+    # state-rule value function
+    srv_function = block.get_state_rule_value_function_from_continuation(
+        continuation, screen=True
+    )
+
+    controls = block.get_controls()
+
+    # pseudo
+    policy_data = grid_to_data_array(state_grid)
+    value_data = grid_to_data_array(state_grid)
+
+    # loop through every point in the state grid
+    for state_point in itertools.product(*state_grid.values()):
+        # build a dictionary from these states, as scope for the optimization
+        state_vals = {k: v for k, v in zip(state_grid.keys(), state_point)}
+
+        # The value of the action is computed given
+        # the problem calibration and the states for the current point on the
+        # state-grid.
+        pre_states = calibration.copy()
+        pre_states.update(state_vals)
+
+        # prepare function to optimize
+        def negated_value(a):  # old! (should be negative)
+            dr = {c: get_action_rule(a[i]) for i, c in enumerate(controls)}
+
+            # negative, for minimization later
+            return -srv_function(pre_states, dr)
+
+        if len(controls) == 0:
+            # if no controls, no optimization is necessary
+            pass
+        elif len(controls) == 1:
+            ## get lower bound.
+            ## assumes only one control currently
+            lower_bound = -1e-6  ## a really low number!
+            feq = block.dynamics[controls[0]].lower_bound
+            if feq is not None:
+                lower_bound = feq(
+                    *[pre_states[var] for var in signature(feq).parameters]
+                )
+
+            ## get upper bound
+            ## assumes only one control currently
+            upper_bound = 1e-12  # a very high number
+            feq = block.dynamics[controls[0]].upper_bound
+            if feq is not None:
+                upper_bound = feq(
+                    *[pre_states[var] for var in signature(feq).parameters]
+                )
+
+            bounds = ((lower_bound, upper_bound),)
+
+            res = minimize(  # choice of
+                negated_value,
+                1,  # x0 is starting guess, here arbitrary.
+                bounds=bounds,
+            )
+
+            dr_best = {c: get_action_rule(res.x[i]) for i, c in enumerate(controls)}
+
+            if res.success:
+                policy_data.sel(**state_vals).variable.data.put(
+                    0, res.x[0]
+                )  # will only work for scalar actions
+                value_data.sel(**state_vals).variable.data.put(
+                    0, srv_function(pre_states, dr_best)
+                )
+            else:
+                print(f"Optimization failure at {state_vals}.")
+                print(res)
+
+                dr_best = {c: get_action_rule(res.x[i]) for i, c in enumerate(controls)}
+
+                policy_data.sel(**state_vals).variable.data.put(0, res.x[0])  # ?
+                value_data.sel(**state_vals).variable.data.put(
+                    0, srv_function(pre_states, dr_best)
+                )
+        elif len(controls) > 1:
+            raise Exception(
+                f"Value backup iteration is not yet implemented for stages with {len(controls)} > 1 control variables."
+            )
+
+    # use the xarray interpolator to create a decision rule.
+    dr_from_data = {
+        c: ar_from_data(
+            policy_data
+        )  # maybe needs to be more sensitive to the information set
+        for i, c in enumerate(controls)
+    }
+
+    dec_vf = block.get_decision_value_function(dr_from_data, continuation)
+    arr_vf = block.get_arrival_value_function(disc_params, dr_from_data, continuation)
+
+    return dr_from_data, dec_vf, arr_vf