Remove rand_state property of ResettablePOMDP and use the canonical _…

…np_random of the superclass instead.
HumanCompatibleAI · Aug 15, 2023 · 6d7e71d · 6d7e71d
1 parent d7cbaa3
commit 6d7e71d
Show file tree

Hide file tree

Showing 6 changed files with 10 additions and 17 deletions.
diff --git a/src/seals/base_envs.py b/src/seals/base_envs.py
@@ -115,13 +115,6 @@ def step(self, action: ActType) -> Tuple[ObsType, float, bool, bool, dict]:
         infos = {"old_state": old_state, "new_state": self._cur_state}
         return obs, reward, terminated, truncated, infos
 
-    @property
-    def rand_state(self) -> np.random.Generator:
-        """Random state."""
-        rand_state = self._np_random
-        if rand_state is None:
-            raise RuntimeError("Need to call reset() before accessing rand_state")
-        return rand_state
 
 
 class ExposePOMDPStateWrapper(
@@ -275,7 +268,7 @@ def initial_state(self) -> DiscreteSpaceInt:
         return DiscreteSpaceInt(
             util.sample_distribution(
                 self.initial_state_dist,
-                random=self.rand_state,
+                random=self._np_random,
             ),
         )
 
@@ -288,7 +281,7 @@ def transition(
         return DiscreteSpaceInt(
             util.sample_distribution(
                 self.transition_matrix[state, action],
-                random=self.rand_state,
+                random=self._np_random,
             ),
         )
 

diff --git a/src/seals/diagnostics/largest_sum.py b/src/seals/diagnostics/largest_sum.py
@@ -34,7 +34,7 @@ def terminal(self, state: np.ndarray, n_actions_taken: int) -> bool:
 
     def initial_state(self) -> np.ndarray:
         """Returns vector sampled uniformly in [0, 1]**L."""
-        init_state = self.rand_state.random((self._length,))
+        init_state = self._np_random.random((self._length,))
         return init_state.astype(self.observation_space.dtype)
 
     def reward(self, state: np.ndarray, act: int, next_state: np.ndarray) -> float:

diff --git a/src/seals/diagnostics/noisy_obs.py b/src/seals/diagnostics/noisy_obs.py
@@ -52,7 +52,7 @@ def initial_state(self) -> np.ndarray:
         """Returns one of the grid's corners."""
         n = self._size
         corners = np.array([[0, 0], [n - 1, 0], [0, n - 1], [n - 1, n - 1]])
-        return corners[self.rand_state.integers(4)]
+        return corners[self._np_random.integers(4)]
 
     def reward(self, state: np.ndarray, action: int, new_state: np.ndarray) -> float:
         """Returns  +1.0 reward if state is the goal and 0.0 otherwise."""
@@ -69,5 +69,5 @@ def transition(self, state: np.ndarray, action: int) -> np.ndarray:
 
     def obs_from_state(self, state: np.ndarray) -> np.ndarray:
         """Returns (x, y) concatenated with Gaussian noise."""
-        noise_vector = self.rand_state.normal(size=self._noise_length)
+        noise_vector = self._np_random.normal(size=self._noise_length)
         return np.concatenate([state, noise_vector]).astype(np.float32)
diff --git a/src/seals/diagnostics/parabola.py b/src/seals/diagnostics/parabola.py
@@ -40,7 +40,7 @@ def terminal(self, state: int, n_actions_taken: int) -> bool:
 
     def initial_state(self) -> np.ndarray:
         """Get state by sampling a random parabola."""
-        a, b, c = -1 + 2 * self.rand_state.random((3,))
+        a, b, c = -1 + 2 * self._np_random.random((3,))
         x, y = 0, c
         return np.array([x, y, a, b, c], dtype=self.state_space.dtype)
 

diff --git a/src/seals/diagnostics/proc_goal.py b/src/seals/diagnostics/proc_goal.py
@@ -40,11 +40,11 @@ def terminal(self, state: np.ndarray, n_actions_taken: int) -> bool:
 
     def initial_state(self) -> np.ndarray:
         """Samples random agent position and random goal."""
-        pos = self.rand_state.integers(low=-self._bounds, high=self._bounds, size=(2,))
+        pos = self._np_random.integers(low=-self._bounds, high=self._bounds, size=(2,))
 
-        x_dist = self.rand_state.integers(self._distance)
+        x_dist = self._np_random.integers(self._distance)
         y_dist = self._distance - x_dist
-        random_signs = 2 * self.rand_state.integers(2, size=2) - 1
+        random_signs = 2 * self._np_random.integers(2, size=2) - 1
         goal = pos + random_signs * (x_dist, y_dist)
 
         return np.concatenate([pos, goal]).astype(self.observation_space.dtype)

diff --git a/src/seals/diagnostics/sort.py b/src/seals/diagnostics/sort.py
@@ -31,7 +31,7 @@ def terminal(self, state: np.ndarray, n_actions_taken: int) -> bool:
 
     def initial_state(self):
         """Sample random vector uniformly in [0, 1]**L."""
-        sample = self.rand_state.random(size=self._length)
+        sample = self._np_random.random(size=self._length)
         return sample.astype(self.state_space.dtype)
 
     def reward(