diff --git a/quantecon/markov/mdp.py b/quantecon/markov/mdp.py index ee9c3a1eb..443db1b6e 100644 --- a/quantecon/markov/mdp.py +++ b/quantecon/markov/mdp.py @@ -598,8 +598,11 @@ def solve(self, method='policy_iteration', Solution method. v_init : array_like(float, ndim=1), optional(default=None) - Initial value function, of length n. If None, set v_init(s) - = max_a r(s, a). + Initial value function, of length n. If None, `v_init` is + set such that v_init(s) = max_a r(s, a) for value iteration + and policy iteration; for modified policy iteration, + v_init(s) = min_(s', a) r(s', a)/(1 - beta) to guarantee + convergence. epsilon : scalar(float), optional(default=None) Value for epsilon-optimality. If None, the value stored in @@ -733,7 +736,7 @@ def midrange(z): v = np.empty(self.num_states) if v_init is None: - self.s_wise_max(self.R, out=v) + v[:] = self.R[self.R > -np.inf].min() / (1 - self.beta) else: v[:] = v_init