ryanjulian · jonashen · Jun 7, 2018 · Jun 8, 2018 · Jun 8, 2018 · Jun 8, 2018
diff --git a/contrib/bichengcao/examples/trpo_gym_Acrobot-v1.py b/contrib/bichengcao/examples/trpo_gym_Acrobot-v1.py
@@ -1,27 +1,29 @@
+import gym
+
 from rllab.algos import TRPO
 from rllab.baselines import LinearFeatureBaseline
-from rllab.envs import GymEnv
 from rllab.envs import normalize
+from rllab.envs.util import horizon, spec
 from rllab.misc import run_experiment_lite
 from rllab.policies import CategoricalMLPPolicy
 
 
 def run_task(*_):
-    env = normalize(GymEnv("Acrobot-v1"))
+    env = normalize(gym.make("Acrobot-v1"))
 
     policy = CategoricalMLPPolicy(
-        env_spec=env.spec,
+        env_spec=spec(env),
         hidden_sizes=(32, 32)
     )
 
-    baseline = LinearFeatureBaseline(env_spec=env.spec)
+    baseline = LinearFeatureBaseline(env_spec=spec(env))
 
     algo = TRPO(
         env=env,
         policy=policy,
         baseline=baseline,
         batch_size=4000,
-        max_path_length=env.horizon,
+        max_path_length=horizon(env),
         n_itr=50,
         discount=0.99,
         step_size=0.01,

diff --git a/contrib/bichengcao/examples/trpo_gym_CartPole-v0.py b/contrib/bichengcao/examples/trpo_gym_CartPole-v0.py
@@ -1,27 +1,29 @@
+import gym
+
 from rllab.algos import TRPO
 from rllab.baselines import LinearFeatureBaseline
-from rllab.envs import GymEnv
 from rllab.envs import normalize
+from rllab.envs.util import horizon, spec
 from rllab.misc import run_experiment_lite
 from rllab.policies import CategoricalMLPPolicy
 
 
 def run_task(*_):
-    env = normalize(GymEnv("CartPole-v0"))
+    env = gym.make("CartPole-v0")
 
     policy = CategoricalMLPPolicy(
-        env_spec=env.spec,
+        env_spec=spec(env),
         hidden_sizes=(32, 32)
     )
 
-    baseline = LinearFeatureBaseline(env_spec=env.spec)
+    baseline = LinearFeatureBaseline(env_spec=spec(env))
 
     algo = TRPO(
         env=env,
         policy=policy,
         baseline=baseline,
         batch_size=4000,
-        max_path_length=env.horizon,
+        max_path_length=horizon(env),
         n_itr=50,
         discount=0.99,
         step_size=0.01,

diff --git a/contrib/bichengcao/examples/trpo_gym_CartPole-v1.py b/contrib/bichengcao/examples/trpo_gym_CartPole-v1.py
@@ -1,27 +1,29 @@
+import gym
+
 from rllab.algos import TRPO
 from rllab.baselines import LinearFeatureBaseline
-from rllab.envs import GymEnv
 from rllab.envs import normalize
+from rllab.envs.util import horizon, spec
 from rllab.misc import run_experiment_lite
 from rllab.policies import CategoricalMLPPolicy
 
 
 def run_task(*_):
-    env = normalize(GymEnv("CartPole-v1"))
+    env = gym.make("CartPole-v1")
 
     policy = CategoricalMLPPolicy(
-        env_spec=env.spec,
+        env_spec=spec(env),
         hidden_sizes=(32, 32)
     )
 
-    baseline = LinearFeatureBaseline(env_spec=env.spec)
+    baseline = LinearFeatureBaseline(env_spec=spec(env))
 
     algo = TRPO(
         env=env,
         policy=policy,
         baseline=baseline,
         batch_size=4000,
-        max_path_length=env.horizon,
+        max_path_length=horizon(env),
         n_itr=50,
         discount=0.99,
         step_size=0.01,

diff --git a/contrib/bichengcao/examples/trpo_gym_MountainCar-v0.py b/contrib/bichengcao/examples/trpo_gym_MountainCar-v0.py
@@ -1,29 +1,30 @@
 # This doesn't work. After 150 iterations still didn't learn anything.
+import gym
 
 from rllab.algos import TRPO
 from rllab.baselines import LinearFeatureBaseline
-from rllab.envs import GymEnv
 from rllab.envs import normalize
+from rllab.envs.util import horizon, spec
 from rllab.misc import run_experiment_lite
 from rllab.policies import CategoricalMLPPolicy
 
 
 def run_task(*_):
-    env = normalize(GymEnv("MountainCar-v0"))
+    env = gym.make("MountainCar-v0")
 
     policy = CategoricalMLPPolicy(
-        env_spec=env.spec,
+        env_spec=spec(env),
         hidden_sizes=(32, 32)
     )
 
-    baseline = LinearFeatureBaseline(env_spec=env.spec)
+    baseline = LinearFeatureBaseline(env_spec=spec(env))
 
     algo = TRPO(
         env=env,
         policy=policy,
         baseline=baseline,
         batch_size=4000,
-        max_path_length=env.horizon,
+        max_path_length=horizon(env),
         n_itr=150,
         discount=0.99,
         step_size=0.1,

diff --git a/contrib/bichengcao/examples/trpo_gym_Pendulum-v0.py b/contrib/bichengcao/examples/trpo_gym_Pendulum-v0.py
@@ -1,27 +1,29 @@
+import gym
+
 from rllab.algos import TRPO
 from rllab.baselines import LinearFeatureBaseline
-from rllab.envs import GymEnv
 from rllab.envs import normalize
+from rllab.envs.util import horizon, spec
 from rllab.misc import run_experiment_lite
 from rllab.policies import GaussianMLPPolicy
 
 
 def run_task(*_):
-    env = normalize(GymEnv("Pendulum-v0"))
+    env = gym.make("Pendulum-v0")
 
     policy = GaussianMLPPolicy(
-        env_spec=env.spec,
+        env_spec=spec(env),
         hidden_sizes=(32, 32)
     )
 
-    baseline = LinearFeatureBaseline(env_spec=env.spec)
+    baseline = LinearFeatureBaseline(env_spec=spec(env))
 
     algo = TRPO(
         env=env,
         policy=policy,
         baseline=baseline,
         batch_size=4000,
-        max_path_length=env.horizon,
+        max_path_length=horizon(env),
         n_itr=50,
         discount=0.99,
         step_size=0.01,

diff --git a/contrib/ros/envs/ros_env.py b/contrib/ros/envs/ros_env.py
@@ -1,3 +1,4 @@
+import gym
 import numpy as np
 import rospy
 
@@ -6,7 +7,7 @@
 from rllab.misc.ext import get_seed
 
 
-class RosEnv(Env, Serializable):
+class RosEnv(gym.Env, Serializable):
     """
     Superclass for all ros environment
     """

diff --git a/contrib/ros/envs/sawyer_env.py b/contrib/ros/envs/sawyer_env.py
@@ -8,7 +8,6 @@
 
 from rllab.core.serializable import Serializable
 from rllab.envs.base import Step
-from rllab.spaces import Box
 
 from contrib.ros.envs.ros_env import RosEnv
 from contrib.ros.robots.sawyer import Sawyer
@@ -165,8 +164,11 @@ def observation_space(self):
         """
         Returns a Space object
         """
-        return Box(
-            -np.inf, np.inf, shape=self.get_observation()['observation'].shape)
+        return gym.spaces.Box(
+            -np.inf,
+            np.inf,
+            shape=self.get_observation()['observation'].shape,
+            dtype=np.float32)
 
     # ================================================
     # Functions that gazebo env asks to implement

diff --git a/contrib/ros/robots/sawyer.py b/contrib/ros/robots/sawyer.py
@@ -4,11 +4,10 @@
 
 from intera_core_msgs.msg import JointLimits
 import intera_interface
+import gym
 import numpy as np
 import rospy
 
-from rllab.spaces import Box
-
 from contrib.ros.robots.robot import Robot
 
 INITIAL_SIM_ROBOT_JOINT_POS = {
@@ -101,7 +100,11 @@ def get_observation(self):
 
     @property
     def observation_space(self):
-        return Box(-np.inf, np.inf, shape=self.get_observation().shape)
+        return gym.spaces.Box(
+            -np.inf,
+            np.inf,
+            shape=self.get_observation().shape,
+            dtype=np.float32)
 
     def send_command(self, commands):
         """
@@ -163,6 +166,7 @@ def action_space(self):
             else:
                 raise ValueError(
                     'Control mode %s is not known!' % self._control_mode)
-        return Box(
+        return gym.spaces.Box(
             np.concatenate((lower_bounds, np.array([0]))),
-            np.concatenate((upper_bounds, np.array([100]))))
+            np.concatenate((upper_bounds, np.array([100]))),
+            dtype=np.float32)
diff --git a/contrib/ros/util/task_object_manager.py b/contrib/ros/util/task_object_manager.py
@@ -1,10 +1,9 @@
 from gazebo_msgs.msg import ModelStates
 from geometry_msgs.msg import Point
+import gym
 import numpy as np
 import rospy
 
-from rllab.spaces import Box
-
 
 class TaskObject(object):
     def __init__(self, name, initial_pos, random_delta_range, resource=None):
@@ -127,7 +126,8 @@ def get_manipulatables_observation(self):
 
     @property
     def manipulatables_observation_space(self):
-        return Box(
+        return gym.spaces.Box(
             -np.inf,
             np.inf,
-            shape=self.get_manipulatables_observation()['obs'].shape)
+            shape=self.get_manipulatables_observation()['obs'].shape,
+            dtype=np.float32)
diff --git a/docs/user/gym_integration.rst b/docs/user/gym_integration.rst
diff --git a/examples/cluster_demo.py b/examples/cluster_demo.py
@@ -1,7 +1,10 @@
+import sys
+
 from rllab.algos import TRPO
 from rllab.baselines import LinearFeatureBaseline
-from rllab.envs.box2d import CartpoleEnv
 from rllab.envs import normalize
+from rllab.envs.box2d import CartpoleEnv
+from rllab.envs.util import spec
 from rllab.misc import stub, run_experiment_lite
 from rllab.policies import GaussianMLPPolicy
 import sys
@@ -11,12 +14,11 @@ def run_task(v):
     env = normalize(CartpoleEnv())
 
     policy = GaussianMLPPolicy(
-        env_spec=env.spec,
+        env_spec=spec(env),
         # The neural network policy should have two hidden layers, each with 32 hidden units.
-        hidden_sizes=(32, 32)
-    )
+        hidden_sizes=(32, 32))
 
-    baseline = LinearFeatureBaseline(env_spec=env.spec)
+    baseline = LinearFeatureBaseline(env_spec=spec(env))
 
     algo = TRPO(
         env=env,