diff --git a/src/seals/base_envs.py b/src/seals/base_envs.py index f5f9681..50c74bf 100644 --- a/src/seals/base_envs.py +++ b/src/seals/base_envs.py @@ -115,13 +115,6 @@ def step(self, action: ActType) -> Tuple[ObsType, float, bool, bool, dict]: infos = {"old_state": old_state, "new_state": self._cur_state} return obs, reward, terminated, truncated, infos - @property - def rand_state(self) -> np.random.Generator: - """Random state.""" - rand_state = self._np_random - if rand_state is None: - raise RuntimeError("Need to call reset() before accessing rand_state") - return rand_state class ExposePOMDPStateWrapper( @@ -275,7 +268,7 @@ def initial_state(self) -> DiscreteSpaceInt: return DiscreteSpaceInt( util.sample_distribution( self.initial_state_dist, - random=self.rand_state, + random=self._np_random, ), ) @@ -288,7 +281,7 @@ def transition( return DiscreteSpaceInt( util.sample_distribution( self.transition_matrix[state, action], - random=self.rand_state, + random=self._np_random, ), ) diff --git a/src/seals/diagnostics/largest_sum.py b/src/seals/diagnostics/largest_sum.py index 35a03f7..b1864d1 100644 --- a/src/seals/diagnostics/largest_sum.py +++ b/src/seals/diagnostics/largest_sum.py @@ -34,7 +34,7 @@ def terminal(self, state: np.ndarray, n_actions_taken: int) -> bool: def initial_state(self) -> np.ndarray: """Returns vector sampled uniformly in [0, 1]**L.""" - init_state = self.rand_state.random((self._length,)) + init_state = self._np_random.random((self._length,)) return init_state.astype(self.observation_space.dtype) def reward(self, state: np.ndarray, act: int, next_state: np.ndarray) -> float: diff --git a/src/seals/diagnostics/noisy_obs.py b/src/seals/diagnostics/noisy_obs.py index 5fd8730..d0d8793 100644 --- a/src/seals/diagnostics/noisy_obs.py +++ b/src/seals/diagnostics/noisy_obs.py @@ -52,7 +52,7 @@ def initial_state(self) -> np.ndarray: """Returns one of the grid's corners.""" n = self._size corners = np.array([[0, 0], [n - 1, 0], [0, n - 1], [n - 1, n - 1]]) - return corners[self.rand_state.integers(4)] + return corners[self._np_random.integers(4)] def reward(self, state: np.ndarray, action: int, new_state: np.ndarray) -> float: """Returns +1.0 reward if state is the goal and 0.0 otherwise.""" @@ -69,5 +69,5 @@ def transition(self, state: np.ndarray, action: int) -> np.ndarray: def obs_from_state(self, state: np.ndarray) -> np.ndarray: """Returns (x, y) concatenated with Gaussian noise.""" - noise_vector = self.rand_state.normal(size=self._noise_length) + noise_vector = self._np_random.normal(size=self._noise_length) return np.concatenate([state, noise_vector]).astype(np.float32) diff --git a/src/seals/diagnostics/parabola.py b/src/seals/diagnostics/parabola.py index 47483ed..6c79aaf 100644 --- a/src/seals/diagnostics/parabola.py +++ b/src/seals/diagnostics/parabola.py @@ -40,7 +40,7 @@ def terminal(self, state: int, n_actions_taken: int) -> bool: def initial_state(self) -> np.ndarray: """Get state by sampling a random parabola.""" - a, b, c = -1 + 2 * self.rand_state.random((3,)) + a, b, c = -1 + 2 * self._np_random.random((3,)) x, y = 0, c return np.array([x, y, a, b, c], dtype=self.state_space.dtype) diff --git a/src/seals/diagnostics/proc_goal.py b/src/seals/diagnostics/proc_goal.py index d212f44..0845344 100644 --- a/src/seals/diagnostics/proc_goal.py +++ b/src/seals/diagnostics/proc_goal.py @@ -40,11 +40,11 @@ def terminal(self, state: np.ndarray, n_actions_taken: int) -> bool: def initial_state(self) -> np.ndarray: """Samples random agent position and random goal.""" - pos = self.rand_state.integers(low=-self._bounds, high=self._bounds, size=(2,)) + pos = self._np_random.integers(low=-self._bounds, high=self._bounds, size=(2,)) - x_dist = self.rand_state.integers(self._distance) + x_dist = self._np_random.integers(self._distance) y_dist = self._distance - x_dist - random_signs = 2 * self.rand_state.integers(2, size=2) - 1 + random_signs = 2 * self._np_random.integers(2, size=2) - 1 goal = pos + random_signs * (x_dist, y_dist) return np.concatenate([pos, goal]).astype(self.observation_space.dtype) diff --git a/src/seals/diagnostics/sort.py b/src/seals/diagnostics/sort.py index 0ae8621..c86fec7 100644 --- a/src/seals/diagnostics/sort.py +++ b/src/seals/diagnostics/sort.py @@ -31,7 +31,7 @@ def terminal(self, state: np.ndarray, n_actions_taken: int) -> bool: def initial_state(self): """Sample random vector uniformly in [0, 1]**L.""" - sample = self.rand_state.random(size=self._length) + sample = self._np_random.random(size=self._length) return sample.astype(self.state_space.dtype) def reward(