Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replaced rllab.envs.Env with gym.Env #129

Open
wants to merge 13 commits into
base: integration
Choose a base branch
from
12 changes: 7 additions & 5 deletions contrib/bichengcao/examples/trpo_gym_Acrobot-v1.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,29 @@
import gym

from rllab.algos import TRPO
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why remove this file?

from rllab.baselines import LinearFeatureBaseline
from rllab.envs import GymEnv
from rllab.envs import normalize
from rllab.envs.util import horizon, spec
from rllab.misc import run_experiment_lite
from rllab.policies import CategoricalMLPPolicy


def run_task(*_):
env = normalize(GymEnv("Acrobot-v1"))
env = normalize(gym.make("Acrobot-v1"))

policy = CategoricalMLPPolicy(
env_spec=env.spec,
env_spec=spec(env),
hidden_sizes=(32, 32)
)

baseline = LinearFeatureBaseline(env_spec=env.spec)
baseline = LinearFeatureBaseline(env_spec=spec(env))

algo = TRPO(
env=env,
policy=policy,
baseline=baseline,
batch_size=4000,
max_path_length=env.horizon,
max_path_length=horizon(env),
n_itr=50,
discount=0.99,
step_size=0.01,
Expand Down
12 changes: 7 additions & 5 deletions contrib/bichengcao/examples/trpo_gym_CartPole-v0.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,29 @@
import gym

from rllab.algos import TRPO
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why remove this file?

from rllab.baselines import LinearFeatureBaseline
from rllab.envs import GymEnv
from rllab.envs import normalize
from rllab.envs.util import horizon, spec
from rllab.misc import run_experiment_lite
from rllab.policies import CategoricalMLPPolicy


def run_task(*_):
env = normalize(GymEnv("CartPole-v0"))
env = gym.make("CartPole-v0")

policy = CategoricalMLPPolicy(
env_spec=env.spec,
env_spec=spec(env),
hidden_sizes=(32, 32)
)

baseline = LinearFeatureBaseline(env_spec=env.spec)
baseline = LinearFeatureBaseline(env_spec=spec(env))

algo = TRPO(
env=env,
policy=policy,
baseline=baseline,
batch_size=4000,
max_path_length=env.horizon,
max_path_length=horizon(env),
n_itr=50,
discount=0.99,
step_size=0.01,
Expand Down
12 changes: 7 additions & 5 deletions contrib/bichengcao/examples/trpo_gym_CartPole-v1.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,29 @@
import gym

from rllab.algos import TRPO
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why remove this file?

from rllab.baselines import LinearFeatureBaseline
from rllab.envs import GymEnv
from rllab.envs import normalize
from rllab.envs.util import horizon, spec
from rllab.misc import run_experiment_lite
from rllab.policies import CategoricalMLPPolicy


def run_task(*_):
env = normalize(GymEnv("CartPole-v1"))
env = gym.make("CartPole-v1")

policy = CategoricalMLPPolicy(
env_spec=env.spec,
env_spec=spec(env),
hidden_sizes=(32, 32)
)

baseline = LinearFeatureBaseline(env_spec=env.spec)
baseline = LinearFeatureBaseline(env_spec=spec(env))

algo = TRPO(
env=env,
policy=policy,
baseline=baseline,
batch_size=4000,
max_path_length=env.horizon,
max_path_length=horizon(env),
n_itr=50,
discount=0.99,
step_size=0.01,
Expand Down
11 changes: 6 additions & 5 deletions contrib/bichengcao/examples/trpo_gym_MountainCar-v0.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,30 @@
# This doesn't work. After 150 iterations still didn't learn anything.
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why remove this file?

import gym

from rllab.algos import TRPO
from rllab.baselines import LinearFeatureBaseline
from rllab.envs import GymEnv
from rllab.envs import normalize
from rllab.envs.util import horizon, spec
from rllab.misc import run_experiment_lite
from rllab.policies import CategoricalMLPPolicy


def run_task(*_):
env = normalize(GymEnv("MountainCar-v0"))
env = gym.make("MountainCar-v0")

policy = CategoricalMLPPolicy(
env_spec=env.spec,
env_spec=spec(env),
hidden_sizes=(32, 32)
)

baseline = LinearFeatureBaseline(env_spec=env.spec)
baseline = LinearFeatureBaseline(env_spec=spec(env))

algo = TRPO(
env=env,
policy=policy,
baseline=baseline,
batch_size=4000,
max_path_length=env.horizon,
max_path_length=horizon(env),
n_itr=150,
discount=0.99,
step_size=0.1,
Expand Down
12 changes: 7 additions & 5 deletions contrib/bichengcao/examples/trpo_gym_Pendulum-v0.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,29 @@
import gym

from rllab.algos import TRPO
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why remove this file?

from rllab.baselines import LinearFeatureBaseline
from rllab.envs import GymEnv
from rllab.envs import normalize
from rllab.envs.util import horizon, spec
from rllab.misc import run_experiment_lite
from rllab.policies import GaussianMLPPolicy


def run_task(*_):
env = normalize(GymEnv("Pendulum-v0"))
env = gym.make("Pendulum-v0")

policy = GaussianMLPPolicy(
env_spec=env.spec,
env_spec=spec(env),
hidden_sizes=(32, 32)
)

baseline = LinearFeatureBaseline(env_spec=env.spec)
baseline = LinearFeatureBaseline(env_spec=spec(env))

algo = TRPO(
env=env,
policy=policy,
baseline=baseline,
batch_size=4000,
max_path_length=env.horizon,
max_path_length=horizon(env),
n_itr=50,
discount=0.99,
step_size=0.01,
Expand Down
3 changes: 2 additions & 1 deletion contrib/ros/envs/ros_env.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import gym
import numpy as np
import rospy

Expand All @@ -6,7 +7,7 @@
from rllab.misc.ext import get_seed


class RosEnv(Env, Serializable):
class RosEnv(gym.Env, Serializable):
"""
Superclass for all ros environment
"""
Expand Down
8 changes: 5 additions & 3 deletions contrib/ros/envs/sawyer_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@

from rllab.core.serializable import Serializable
from rllab.envs.base import Step
from rllab.spaces import Box

from contrib.ros.envs.ros_env import RosEnv
from contrib.ros.robots.sawyer import Sawyer
Expand Down Expand Up @@ -165,8 +164,11 @@ def observation_space(self):
"""
Returns a Space object
"""
return Box(
-np.inf, np.inf, shape=self.get_observation()['observation'].shape)
return gym.spaces.Box(
-np.inf,
np.inf,
shape=self.get_observation()['observation'].shape,
dtype=np.float32)

# ================================================
# Functions that gazebo env asks to implement
Expand Down
14 changes: 9 additions & 5 deletions contrib/ros/robots/sawyer.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,10 @@

from intera_core_msgs.msg import JointLimits
import intera_interface
import gym
import numpy as np
import rospy

from rllab.spaces import Box

from contrib.ros.robots.robot import Robot

INITIAL_SIM_ROBOT_JOINT_POS = {
Expand Down Expand Up @@ -101,7 +100,11 @@ def get_observation(self):

@property
def observation_space(self):
return Box(-np.inf, np.inf, shape=self.get_observation().shape)
return gym.spaces.Box(
-np.inf,
np.inf,
shape=self.get_observation().shape,
dtype=np.float32)

def send_command(self, commands):
"""
Expand Down Expand Up @@ -163,6 +166,7 @@ def action_space(self):
else:
raise ValueError(
'Control mode %s is not known!' % self._control_mode)
return Box(
return gym.spaces.Box(
np.concatenate((lower_bounds, np.array([0]))),
np.concatenate((upper_bounds, np.array([100]))))
np.concatenate((upper_bounds, np.array([100]))),
dtype=np.float32)
8 changes: 4 additions & 4 deletions contrib/ros/util/task_object_manager.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
from gazebo_msgs.msg import ModelStates
from geometry_msgs.msg import Point
import gym
import numpy as np
import rospy

from rllab.spaces import Box


class TaskObject(object):
def __init__(self, name, initial_pos, random_delta_range, resource=None):
Expand Down Expand Up @@ -127,7 +126,8 @@ def get_manipulatables_observation(self):

@property
def manipulatables_observation_space(self):
return Box(
return gym.spaces.Box(
-np.inf,
np.inf,
shape=self.get_manipulatables_observation()['obs'].shape)
shape=self.get_manipulatables_observation()['obs'].shape,
dtype=np.float32)
93 changes: 0 additions & 93 deletions docs/user/gym_integration.rst

This file was deleted.

12 changes: 7 additions & 5 deletions examples/cluster_demo.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
import sys

from rllab.algos import TRPO
from rllab.baselines import LinearFeatureBaseline
from rllab.envs.box2d import CartpoleEnv
from rllab.envs import normalize
from rllab.envs.box2d import CartpoleEnv
from rllab.envs.util import spec
from rllab.misc import stub, run_experiment_lite
from rllab.policies import GaussianMLPPolicy
import sys
Expand All @@ -11,12 +14,11 @@ def run_task(v):
env = normalize(CartpoleEnv())

policy = GaussianMLPPolicy(
env_spec=env.spec,
env_spec=spec(env),
# The neural network policy should have two hidden layers, each with 32 hidden units.
hidden_sizes=(32, 32)
)
hidden_sizes=(32, 32))

baseline = LinearFeatureBaseline(env_spec=env.spec)
baseline = LinearFeatureBaseline(env_spec=spec(env))

algo = TRPO(
env=env,
Expand Down
Loading