Skip to content

Commit

Permalink
Remove rand_state property of ResettablePOMDP and use the canonical _…
Browse files Browse the repository at this point in the history
…np_random of the superclass instead.
  • Loading branch information
ernestum committed Aug 15, 2023
1 parent d7cbaa3 commit 6d7e71d
Show file tree
Hide file tree
Showing 6 changed files with 10 additions and 17 deletions.
11 changes: 2 additions & 9 deletions src/seals/base_envs.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,13 +115,6 @@ def step(self, action: ActType) -> Tuple[ObsType, float, bool, bool, dict]:
infos = {"old_state": old_state, "new_state": self._cur_state}
return obs, reward, terminated, truncated, infos

@property
def rand_state(self) -> np.random.Generator:
"""Random state."""
rand_state = self._np_random
if rand_state is None:
raise RuntimeError("Need to call reset() before accessing rand_state")
return rand_state


class ExposePOMDPStateWrapper(
Expand Down Expand Up @@ -275,7 +268,7 @@ def initial_state(self) -> DiscreteSpaceInt:
return DiscreteSpaceInt(
util.sample_distribution(
self.initial_state_dist,
random=self.rand_state,
random=self._np_random,
),
)

Expand All @@ -288,7 +281,7 @@ def transition(
return DiscreteSpaceInt(
util.sample_distribution(
self.transition_matrix[state, action],
random=self.rand_state,
random=self._np_random,
),
)

Expand Down
2 changes: 1 addition & 1 deletion src/seals/diagnostics/largest_sum.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def terminal(self, state: np.ndarray, n_actions_taken: int) -> bool:

def initial_state(self) -> np.ndarray:
"""Returns vector sampled uniformly in [0, 1]**L."""
init_state = self.rand_state.random((self._length,))
init_state = self._np_random.random((self._length,))
return init_state.astype(self.observation_space.dtype)

def reward(self, state: np.ndarray, act: int, next_state: np.ndarray) -> float:
Expand Down
4 changes: 2 additions & 2 deletions src/seals/diagnostics/noisy_obs.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def initial_state(self) -> np.ndarray:
"""Returns one of the grid's corners."""
n = self._size
corners = np.array([[0, 0], [n - 1, 0], [0, n - 1], [n - 1, n - 1]])
return corners[self.rand_state.integers(4)]
return corners[self._np_random.integers(4)]

def reward(self, state: np.ndarray, action: int, new_state: np.ndarray) -> float:
"""Returns +1.0 reward if state is the goal and 0.0 otherwise."""
Expand All @@ -69,5 +69,5 @@ def transition(self, state: np.ndarray, action: int) -> np.ndarray:

def obs_from_state(self, state: np.ndarray) -> np.ndarray:
"""Returns (x, y) concatenated with Gaussian noise."""
noise_vector = self.rand_state.normal(size=self._noise_length)
noise_vector = self._np_random.normal(size=self._noise_length)
return np.concatenate([state, noise_vector]).astype(np.float32)
2 changes: 1 addition & 1 deletion src/seals/diagnostics/parabola.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def terminal(self, state: int, n_actions_taken: int) -> bool:

def initial_state(self) -> np.ndarray:
"""Get state by sampling a random parabola."""
a, b, c = -1 + 2 * self.rand_state.random((3,))
a, b, c = -1 + 2 * self._np_random.random((3,))
x, y = 0, c
return np.array([x, y, a, b, c], dtype=self.state_space.dtype)

Expand Down
6 changes: 3 additions & 3 deletions src/seals/diagnostics/proc_goal.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,11 @@ def terminal(self, state: np.ndarray, n_actions_taken: int) -> bool:

def initial_state(self) -> np.ndarray:
"""Samples random agent position and random goal."""
pos = self.rand_state.integers(low=-self._bounds, high=self._bounds, size=(2,))
pos = self._np_random.integers(low=-self._bounds, high=self._bounds, size=(2,))

x_dist = self.rand_state.integers(self._distance)
x_dist = self._np_random.integers(self._distance)
y_dist = self._distance - x_dist
random_signs = 2 * self.rand_state.integers(2, size=2) - 1
random_signs = 2 * self._np_random.integers(2, size=2) - 1
goal = pos + random_signs * (x_dist, y_dist)

return np.concatenate([pos, goal]).astype(self.observation_space.dtype)
Expand Down
2 changes: 1 addition & 1 deletion src/seals/diagnostics/sort.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def terminal(self, state: np.ndarray, n_actions_taken: int) -> bool:

def initial_state(self):
"""Sample random vector uniformly in [0, 1]**L."""
sample = self.rand_state.random(size=self._length)
sample = self._np_random.random(size=self._length)
return sample.astype(self.state_space.dtype)

def reward(
Expand Down

0 comments on commit 6d7e71d

Please sign in to comment.