Migrate to gymnasium maintaining python 3.8 compatibility (#73)

* Initial commit * py38 compatible type hints * gymnasium compatible reset * gymnasium compatibility changes * gymnasium compatible reset and random * Make type annotations python 3.8 compatible. * Fix some grammar issues. * Raise RuntimeErrors and ValueErrors in the proper places. * Undoing unrelated formatting fixes in the readme. * Remove unused ruff configuration. * Add Adams wording suggestions. * switch to alpha-version of the circle-ci image (reverst this before merge) * Update Xdummy-entrypoint.py to python3 * Update Dockerfile to Ubuntu 20.04 and add ssh. * Dont mention gym in inline comment but gymnasium. * Absorb terminated AND truncated steps. * Treat done == (terminated or truncated) and stop mentioning done in the documentation. * Remove outdated make_env_no_wrappers * Use registry keys instead of extracting env_id from the spec. * Ensure to seed environments upon the first reset. * Add missing shimmy dependency for atari. * Detect atari envs by looking for shimmy entrypoint instead of gym entrypoint. * Add missing observation space to TabularModelMDP * Look for render modes in new location of the environment metadata. * When testing the rollout schema, check for both termination and truncation. * Remove outdated asserts on the result of env.reset(). * Adapt reset() of MaskScroeWrapper to new gymnasium API * Switch to v4 versions of the MuJoCo environments. * Simplify tests for render modes. * Forward args and kwargs when constructing environments, so we can pass in the render mode. * Add `Casino-Unmasked-v5` to the list of slow envs with randomness. * Add some missing commas. * Add pygame to setup.py * Update ale-py version. * Make test_sample_distribution deterministic by introducing a seed. * Fixing isort issues. * Add missing trailing commas. * Minor formatting fixes. * Fix trailing whitespace. * Black fixes. * Explicitly seed dummy environment. * Remove unnecessary cast to int. * Fix some typing issues. * Fix more typing issues. * Simplify ObsCastWrapper by inheriting from gym.ObservationWrapper instead of gym.Wrapper. * Small typos in docstrings. * Add reset info when generating rollouts. * Remove unneeded default params to rand_gen.normal() * Remove unneeded setter for the observation space property in a ResettableMDP. * Ignore coverage for edge cases of where the observation space has no shape. * Add a test case that ensures that options in the reset to a ResettablePOMDP are rejected. * Remove rand_state property of ResettablePOMDP and use the canonical np_random of the superclass instead. * Remove newline in base_envs.py * Fix type annotations of FixedHorizonCartPole.reset() * Remove leftover usages of rand_state. * Fix quicks in dependencies that are no longer needed. * Store unused info in _ * Make test_sample_distribution by seeding the used rng instead of setting the global seed. * Add missing test dependency. * Ensure we have the newest pip version to make the dependency resolution work. * Make the dependencies cache also dependent on ci/build_venv.sh --------- Co-authored-by: Juan Rocamonde <[email protected]> Co-authored-by: Maximilian Ernestus <[email protected]>
HumanCompatibleAI · Aug 31, 2023 · a7954c2 · a7954c2
1 parent de29873
commit a7954c2
Show file tree

Hide file tree

Showing 28 changed files with 449 additions and 433 deletions.
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -8,7 +8,7 @@ orbs:
 
 defaults: &defaults
   docker:
-    - image: humancompatibleai/seals:base
+    - image: humancompatibleai/seals:base-alpha
       auth:
         username: $DOCKERHUB_USERNAME
         password: $DOCKERHUB_PASSWORD
@@ -51,7 +51,7 @@ commands:
       # released that you want to upgrade to, without mandating the newer version in setup.py.
       - restore_cache:
           keys:
-            - v2-dependencies-{{ checksum "setup.py" }}
+            - v2-dependencies-{{ checksum "setup.py" }}-{{ checksum "ci/build_venv.sh" }}
 
       # Create virtual environment and install dependencies using `ci/build_venv.sh`.
       # `mujoco_py` needs a MuJoCo key, so download that first.
@@ -64,7 +64,7 @@ commands:
       - save_cache:
           paths:
             - /venv
-          key: v2-dependencies-{{ checksum "setup.py" }}
+          key: v2-dependencies-{{ checksum "setup.py" }}-{{ checksum "ci/build_venv.sh" }}
 
       # Install seals.
       # Note we install the source distribution, not in developer mode (`pip install -e`).

diff --git a/Dockerfile b/Dockerfile
@@ -1,6 +1,6 @@
 # base stage contains just binary dependencies.
 # This is used in the CI build.
-FROM nvidia/cuda:10.0-runtime-ubuntu18.04 AS base
+FROM nvidia/cuda:11.8.0-cudnn8-runtime-ubuntu20.04 AS base
 ARG DEBIAN_FRONTEND=noninteractive
 
 RUN    apt-get update -q \
@@ -9,6 +9,7 @@ RUN    apt-get update -q \
     curl \
     ffmpeg \
     git \
+    ssh \
     libgl1-mesa-dev \
     libgl1-mesa-glx \
     libglew-dev \

diff --git a/README.md b/README.md
@@ -39,7 +39,7 @@ All *seals* environments are available in the Gym registry. Simply import it and
 would with your usual RL or specification learning algroithm:
 
 ```python
-import gym
+import gymnasium as gym
 import seals
 
 env = gym.make('seals/CartPole-v0')

diff --git a/ci/Xdummy-entrypoint.py b/ci/Xdummy-entrypoint.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/python3
 
 # Adapted from https://github.com/openai/mujoco-py/blob/master/vendor/Xdummy-entrypoint
 # Copyright OpenAI; MIT License

diff --git a/ci/build_venv.sh b/ci/build_venv.sh
@@ -9,4 +9,5 @@ fi
 
 virtualenv -p python3.8 ${venv}
 source ${venv}/bin/activate
+pip install --upgrade pip  # Ensure we have the newest pip
 pip install .[cpu,docs,mujoco,test]
diff --git a/pyproject.toml b/pyproject.toml
@@ -6,8 +6,5 @@ build-backend = "setuptools.build_meta"
 target-version = ["py38"]
 
 [[tool.mypy.overrides]]
-module = [
-    "gym.*",
-    "setuptools_scm.*",
-]
+module = ["gym.*", "setuptools_scm.*"]
 ignore_missing_imports = true
diff --git a/setup.py b/setup.py
@@ -88,14 +88,14 @@ def get_readme() -> str:
 
 ATARI_REQUIRE = [
     "opencv-python",
-    "ale-py==0.7.4",
+    "ale-py~=0.8.1",
     "pillow",
     "autorom[accept-rom-license]~=0.4.2",
+    "shimmy[atari] >=0.1.0,<1.0",
 ]
 TESTS_REQUIRE = [
-    # remove pin once https://github.com/nedbat/coveragepy/issues/881 fixed
     "black",
-    "coverage==4.5.4",
+    "coverage~=4.5.4",
     "codecov",
     "codespell",
     "darglint>=1.5.6",
@@ -115,11 +115,8 @@ def get_readme() -> str:
     "pytest-xdist",
     "pytype",
     "stable-baselines3>=0.9.0",
-    # TODO(adam): remove pyglet pin once Gym upgraded to >0.21
-    # Workaround for https://github.com/openai/gym/issues/2986
-    # Discussed in https://github.com/HumanCompatibleAI/imitation/pull/603
-    "pyglet==1.5.27",
     "setuptools_scm~=7.0.5",
+    "gymnasium[classic-control,mujoco]",
     *ATARI_REQUIRE,
 ]
 DOCS_REQUIRE = [
@@ -140,16 +137,14 @@ def get_readme() -> str:
     packages=find_packages("src"),
     package_dir={"": "src"},
     package_data={"seals": ["py.typed"]},
-    install_requires=["gym", "numpy"],
+    install_requires=["gymnasium", "numpy"],
     tests_require=TESTS_REQUIRE,
     extras_require={
         # recommended packages for development
         "dev": ["ipdb", "jupyter", *TESTS_REQUIRE, *DOCS_REQUIRE],
         "docs": DOCS_REQUIRE,
         "test": TESTS_REQUIRE,
-        # We'd like to specify `gym[mujoco]`, but this is a no-op when Gym is already
-        # installed. See https://github.com/pypa/pip/issues/4957 for issue.
-        "mujoco": ["mujoco_py>=1.50, <2.0", "imageio"],
+        "mujoco": ["gymnasium[mujoco]"],
         "atari": ATARI_REQUIRE,
     },
     url="https://github.com/HumanCompatibleAI/benchmark-environments",

diff --git a/src/seals/__init__.py b/src/seals/__init__.py
@@ -2,7 +2,7 @@
 
 from importlib import metadata
 
-import gym
+import gymnasium as gym
 
 from seals import atari, util
 import seals.diagnostics  # noqa: F401
@@ -31,12 +31,12 @@
 
 for env_base in ["Ant", "HalfCheetah", "Hopper", "Humanoid", "Swimmer", "Walker2d"]:
     gym.register(
-        id=f"seals/{env_base}-v0",
+        id=f"seals/{env_base}-v1",
         entry_point=f"seals.mujoco:{env_base}Env",
-        max_episode_steps=util.get_gym_max_episode_steps(f"{env_base}-v3"),
+        max_episode_steps=util.get_gym_max_episode_steps(f"{env_base}-v4"),
     )
 
 # Atari
 
-GYM_ATARI_ENV_SPECS = list(filter(atari._supported_atari_env, gym.envs.registry.all()))
+GYM_ATARI_ENV_SPECS = list(filter(atari._supported_atari_env, gym.registry.values()))
 atari.register_atari_envs(GYM_ATARI_ENV_SPECS)
diff --git a/src/seals/atari.py b/src/seals/atari.py
@@ -2,7 +2,8 @@
 
 from typing import Dict, Iterable, Optional
 
-import gym
+import gymnasium as gym
+from gymnasium.envs.registration import EnvSpec
 
 from seals.util import (
     AutoResetWrapper,
@@ -35,9 +36,9 @@ def _get_score_region(atari_env_id: str) -> Optional[MaskedRegionSpecifier]:
     return SCORE_REGIONS.get(basename)
 
 
-def make_atari_env(atari_env_id: str, masked: bool) -> gym.Env:
+def make_atari_env(atari_env_id: str, masked: bool, *args, **kwargs) -> gym.Env:
     """Fixed-length, optionally masked-score variant of a given Atari environment."""
-    env = AutoResetWrapper(gym.make(atari_env_id))
+    env: gym.Env = AutoResetWrapper(gym.make(atari_env_id, *args, **kwargs))
 
     if masked:
         score_region = _get_score_region(atari_env_id)
@@ -59,15 +60,15 @@ def _not_ram_or_det(env_id: str) -> bool:
     after_slash = slash_separated[-1]
     hyphen_separated = after_slash.split("-")
     assert len(hyphen_separated) > 1
-    not_ram = not ("ram" in hyphen_separated[1])
-    not_deterministic = not ("Deterministic" in env_id)
+    not_ram = "ram" not in hyphen_separated[1]
+    not_deterministic = "Deterministic" not in env_id
     return not_ram and not_deterministic
 
 
-def _supported_atari_env(gym_spec: gym.envs.registration.EnvSpec) -> bool:
+def _supported_atari_env(gym_spec: EnvSpec) -> bool:
     """Checks if a gym Atari environment is one of the ones we will support."""
-    is_atari = gym_spec.entry_point == "gym.envs.atari:AtariEnv"
-    v5_and_plain = gym_spec.id.endswith("-v5") and not ("NoFrameskip" in gym_spec.id)
+    is_atari = gym_spec.entry_point == "shimmy.atari_env:AtariEnv"
+    v5_and_plain = gym_spec.id.endswith("-v5") and "NoFrameskip" not in gym_spec.id
     v4_and_no_frameskip = gym_spec.id.endswith("-v4") and "NoFrameskip" in gym_spec.id
     return (
         is_atari
@@ -76,7 +77,7 @@ def _supported_atari_env(gym_spec: gym.envs.registration.EnvSpec) -> bool:
     )
 
 
-def _seals_name(gym_spec: gym.envs.registration.EnvSpec, masked: bool) -> str:
+def _seals_name(gym_spec: EnvSpec, masked: bool) -> str:
     """Makes a Gym ID for an Atari environment in the seals namespace."""
     slash_separated = gym_spec.id.split("/")
     name = "seals/" + slash_separated[-1]
@@ -88,7 +89,7 @@ def _seals_name(gym_spec: gym.envs.registration.EnvSpec, masked: bool) -> str:
 
 
 def register_atari_envs(
-    gym_atari_env_specs: Iterable[gym.envs.registration.EnvSpec],
+    gym_atari_env_specs: Iterable[EnvSpec],
 ) -> None:
     """Register masked and unmasked wrapped gym Atari environments."""