ray-project · sven1977 · Nov 7, 2024 · Oct 28, 2024 · Oct 29, 2024 · Oct 29, 2024
diff --git a/doc/source/ray-core/examples/plot_pong_example.ipynb b/doc/source/ray-core/examples/plot_pong_example.ipynb
@@ -292,7 +292,7 @@
     "@ray.remote\n",
     "class RolloutWorker(object):\n",
     "    def __init__(self):\n",
-    "        self.env = gym.make(\"ALE/Pong-v5\")\n",
+    "        self.env = gym.make(\"ale_py:ALE/Pong-v5\")\n",
     "\n",
     "    def compute_gradient(self, model):\n",
     "        # Compute a simulation episode.\n",

diff --git a/doc/source/rllib/doc_code/dreamerv3_inference.py b/doc/source/rllib/doc_code/dreamerv3_inference.py
@@ -10,7 +10,7 @@
 
 env_name = "CartPole-v1"
 # Use the vector env API.
-env = gym.vector.make(env_name, num_envs=1, asynchronous=False)
+env = gym.make_vec(env_name, num_envs=1, vectorization_mode="sync")
 
 terminated = truncated = False
 # Reset the env.

diff --git a/doc/source/rllib/doc_code/training.py b/doc/source/rllib/doc_code/training.py
@@ -4,7 +4,7 @@
 try:
     import gymnasium as gym
 
-    env = gym.make("ALE/Pong-v5")
+    env = gym.make("ale_py:ALE/Pong-v5")
     obs, infos = env.reset()
 except Exception:
     import gym

@@ -280,7 +280,7 @@ in roughly 5min. It can be run like this on a single g5.24xlarge (or g6.24xlarge
 .. code-block:: bash
 
     $ cd ray/rllib/tuned_examples/ppo
-    $ python atari_ppo.py --env ALE/Pong-v5 --num-gpus=4 --num-env-runners=95
+    $ python atari_ppo.py --env=ale_py:ALE/Pong-v5 --num-learners=4 --num-env-runners=95
 
 Note that some of the files in this folder are used for RLlib's daily or weekly
 release tests as well.

diff --git a/python/requirements.txt b/python/requirements.txt
@@ -38,7 +38,7 @@ colorful
 rich
 opentelemetry-sdk
 fastapi
-gymnasium==0.28.1
+gymnasium==1.0.0
 virtualenv!=20.21.1,>=20.0.24
 opentelemetry-api
 opencensus

diff --git a/python/requirements/ml/rllib-test-requirements.txt b/python/requirements/ml/rllib-test-requirements.txt
@@ -3,43 +3,28 @@
 # Environment adapters.
 # ---------------------
 # Atari
-gymnasium==0.28.1; python_version < "3.12"
-imageio; python_version < "3.12"
-ale_py==0.8.1; python_version < "3.12"
+imageio==2.34.2
+ale_py==0.10.1
 # For testing MuJoCo envs with gymnasium.
-mujoco==2.3.6; python_version < "3.12"
+mujoco==3.2.4
 dm_control==1.0.12; python_version < "3.12"
 
 # For tests on PettingZoo's multi-agent envs.
-pettingzoo==1.23.1
-# When installing pettingzoo, chess is missing, even though its a dependancy
-# TODO: remove if a future pettingzoo and/or ray version fixes this dependancy issue.
-chess==1.7.0
+pettingzoo==1.24.3
 pymunk==6.2.1
-supersuit==3.8.0; python_version < "3.12"
-tinyscaler==1.2.6; python_version < "3.12"
-shimmy
-
-# Kaggle envs.
-kaggle_environments==1.7.11
-# Unity3D testing
-# TODO(sven): Add this back to rllib-requirements.txt once mlagents no longer pins torch<1.9.0 version.
-#mlagents==0.28.0
-mlagents_envs==0.28.0
+tinyscaler==1.2.8
+shimmy==2.0.0
+supersuit==3.9.3
 
 # For tests on minigrid.
-minigrid
-# For tests on RecSim and Kaggle envs.
-# Explicitly depends on `tensorflow` and doesn't accept `tensorflow-macos`
-recsim==0.2.4; (sys_platform != 'darwin' or platform_machine != 'arm64') and python_version < "3.12"
-# recsim depends on dopamine-rl, but dopamine-rl pins gym <= 0.25.2, which break some envs
-dopamine-rl==4.0.5; (sys_platform != 'darwin' or platform_machine != 'arm64') and python_version < "3.12"
+minigrid==2.3.1
 tensorflow_estimator
 # DeepMind's OpenSpiel
 open-spiel==1.4
+# Unity3D testing
+mlagents_envs==0.28.0
 
 # Requires libtorrent which is unavailable for arm64
-autorom[accept-rom-license]; platform_machine != "arm64"
 h5py==3.10.0
 
 # Requirements for rendering.

diff --git a/python/requirements_compiled.txt b/python/requirements_compiled.txt
@@ -75,10 +75,10 @@ aiosqlite==0.19.0
     # via ypy-websocket
 alabaster==0.7.13
     # via sphinx
-ale-py==0.8.1 ; python_version < "3.12"
+ale-py==0.10.1
     # via
     #   -r /ray/ci/../python/requirements/ml/rllib-test-requirements.txt
-    #   gym
+    #   gymnasium
 alembic==1.12.1
     # via
     #   aim
@@ -272,8 +272,6 @@ charset-normalizer==3.3.2
     # via
     #   requests
     #   snowflake-connector-python
-chess==1.7.0
-    # via -r /ray/ci/../python/requirements/ml/rllib-test-requirements.txt
 chex==0.1.7
     # via optax
 clang-format==12.0.1
@@ -306,7 +304,6 @@ cloudpickle==2.2.0
     #   -r /ray/ci/../python/requirements/test-requirements.txt
     #   dask
     #   distributed
-    #   gym
     #   gymnasium
     #   hyperopt
     #   mlagents-envs
@@ -704,13 +701,7 @@ gsutil==5.27
     # via -r /ray/ci/../python/requirements/docker/ray-docker-requirements.txt
 gunicorn==20.1.0
     # via mlflow
-gym==0.26.2
-    # via
-    #   dopamine-rl
-    #   recsim
-gym-notices==0.0.8
-    # via gym
-gymnasium==0.28.1 ; python_version < "3.12"
+gymnasium==1.0.0
     # via
     #   -r /ray/ci/../python/requirements.txt
     #   -r /ray/ci/../python/requirements/ml/rllib-test-requirements.txt
@@ -1126,7 +1117,7 @@ msrestazure==0.6.4
     # via
     #   -r /ray/ci/../python/requirements/test-requirements.txt
     #   azure-cli-core
-mujoco==2.3.6 ; python_version < "3.12"
+mujoco==3.2.4
     # via
     #   -r /ray/ci/../python/requirements/ml/rllib-test-requirements.txt
     #   dm-control
@@ -1246,7 +1237,6 @@ numpy==1.26.4
     #   flax
     #   gpy
     #   gradio
-    #   gym
     #   gymnasium
     #   h5py
     #   hpbandster
@@ -1290,7 +1280,6 @@ numpy==1.26.4
     #   pyro-ppl
     #   pytorch-lightning
     #   raydp
-    #   recsim
     #   scikit-image
     #   scikit-learn
     #   scipy
@@ -1489,7 +1478,7 @@ pbr==6.0.0
     #   sarif-om
 peewee==3.17.0
     # via semgrep
-pettingzoo==1.23.1
+pettingzoo==1.24.3
     # via -r /ray/ci/../python/requirements/ml/rllib-test-requirements.txt
 pexpect==4.8.0
     # via
@@ -1862,8 +1851,6 @@ querystring-parser==1.2.4
     # via raydp
 raydp==1.7.0b20231020.dev0
     # via -r /ray/ci/../python/requirements/ml/data-test-requirements.txt
-recsim==0.2.4 ; (sys_platform != "darwin" or platform_machine != "arm64") and python_version < "3.12"
-    # via -r /ray/ci/../python/requirements/ml/rllib-test-requirements.txt
 redis==4.4.2
     # via -r /ray/ci/../python/requirements/test-requirements.txt
 regex==2024.5.15
@@ -2049,7 +2036,7 @@ shellcheck-py==0.7.1.1
     # via -r /ray/ci/../python/requirements/lint-requirements.txt
 shellingham==1.5.4
     # via typer
-shimmy==1.3.0
+shimmy==2.0.0
     # via -r /ray/ci/../python/requirements/ml/rllib-test-requirements.txt
 shortuuid==1.0.1
     # via -r /ray/ci/../python/requirements/ml/tune-test-requirements.txt
@@ -2167,9 +2154,7 @@ statsmodels==0.14.0
     # via
     #   hpbandster
     #   statsforecast
-strictyaml==1.7.3
-    # via pyiceberg
-supersuit==3.8.0 ; python_version < "3.12"
+supersuit==3.9.3
     # via -r /ray/ci/../python/requirements/ml/rllib-test-requirements.txt
 sympy==1.13.1
     # via
@@ -2256,7 +2241,7 @@ timm==0.9.2
     # via -r /ray/ci/../python/requirements/ml/tune-test-requirements.txt
 tinycss2==1.3.0
     # via nbconvert
-tinyscaler==1.2.6 ; python_version < "3.12"
+tinyscaler==1.2.8
     # via
     #   -r /ray/ci/../python/requirements/ml/rllib-test-requirements.txt
     #   supersuit

diff --git a/python/setup.py b/python/setup.py
@@ -299,7 +299,7 @@ def get_packages(self):
 
     setup_spec.extras["rllib"] = setup_spec.extras["tune"] + [
         "dm_tree",
-        "gymnasium==0.28.1",
+        "gymnasium==1.0.0",
         "lz4",
         "scikit-image",
         "pyyaml",

diff --git a/release/long_running_tests/workloads/apex.py b/release/long_running_tests/workloads/apex.py
@@ -39,7 +39,7 @@
     {
         "apex": {
             "run": "APEX",
-            "env": "ALE/Pong-v5",
+            "env": "ale_py:ALE/Pong-v5",
             "config": {
                 "num_workers": 3,
                 "num_gpus": 0,

diff --git a/release/ml_user_tests/tune_rllib/run_connect_tests.py b/release/ml_user_tests/tune_rllib/run_connect_tests.py
@@ -26,7 +26,7 @@ def run(smoke_test=False, storage_path: str = None):
 
     config = (
         APPOConfig()
-        .environment("ALE/Pong-v5", clip_rewards=True)
+        .environment("ale_py:ALE/Pong-v5", clip_rewards=True)
         .framework(tune.grid_search(["tf", "torch"]))
         .rollouts(
             rollout_fragment_length=50,

diff --git a/release/ray_release/byod/requirements_byod_3.9.txt b/release/ray_release/byod/requirements_byod_3.9.txt
@@ -116,7 +116,7 @@ aiosignal==1.3.1 \
     # via
     #   -c release/ray_release/byod/requirements_compiled.txt
     #   aiohttp
-ale-py==0.8.1 \
+ale-py==0.9.0 \
     --hash=sha256:0006d80dfe7745eb5a93444492337203c8bc7eb594a2c24c6a651c5c5b0eaf09 \
     --hash=sha256:0856ca777473ec4ae8a59f3af9580259adb0fd4a47d586a125a440c62e82fc10 \
     --hash=sha256:0ffecb5c956749596030e464827642945162170a132d093c3d4fa2d7e5725c18 \
@@ -1242,17 +1242,6 @@ gsutil==5.27 \
     # via
     #   -c release/ray_release/byod/requirements_compiled.txt
     #   -r release/ray_release/byod/requirements_byod_3.9.in
-gym[atari]==0.26.2 \
-    --hash=sha256:e0d882f4b54f0c65f203104c24ab8a38b039f1289986803c7d02cdbe214fbcc4
-    # via
-    #   -c release/ray_release/byod/requirements_compiled.txt
-    #   -r release/ray_release/byod/requirements_byod_3.9.in
-gym-notices==0.0.8 \
-    --hash=sha256:ad25e200487cafa369728625fe064e88ada1346618526102659b4640f2b4b911 \
-    --hash=sha256:e5f82e00823a166747b4c2a07de63b6560b1acb880638547e0cabf825a01e463
-    # via
-    #   -c release/ray_release/byod/requirements_compiled.txt
-    #   gym
 h5py==3.10.0 \
     --hash=sha256:012ab448590e3c4f5a8dd0f3533255bc57f80629bf7c5054cf4c87b30085063c \
     --hash=sha256:212bb997a91e6a895ce5e2f365ba764debeaef5d2dca5c6fb7098d66607adf99 \
@@ -1739,7 +1728,6 @@ numpy==1.26.4 \
     #   ale-py
     #   bokeh
     #   dask
-    #   gym
     #   h5py
     #   lightgbm
     #   ml-dtypes

diff --git a/release/release_tests.yaml b/release/release_tests.yaml
@@ -2716,7 +2716,7 @@
 
   run:
     timeout: 43200  # 12h
-    script: python learning_tests/tuned_examples/dreamerv3/atari_100k.py --framework=tf2 --env=ALE/Pong-v5 --num-gpus=1 --stop-reward=15.0 --as-release-test
+    script: python learning_tests/tuned_examples/dreamerv3/atari_100k.py --framework=tf2 --env=ale_py:ALE/Pong-v5 --num-learners=1 --stop-reward=15.0 --as-release-test
 
   alert: default
 
@@ -2751,7 +2751,7 @@
 
   run:
     timeout: 1200
-    script: python learning_tests/tuned_examples/ppo/atari_ppo.py --enable-new-api-stack --env=ALE/Pong-v5 --num-gpus=4 --num-env-runners=95 --stop-reward=20.0 --as-release-test
+    script: python learning_tests/tuned_examples/ppo/atari_ppo.py --enable-new-api-stack --env=ale_py:ALE/Pong-v5 --num-learners=4 --num-env-runners=95 --stop-reward=20.0 --as-release-test
 
   alert: default
 
@@ -2786,7 +2786,7 @@
 
   run:
     timeout: 7200
-    script: python learning_tests/tuned_examples/sac/halfcheetah_sac.py --enable-new-api-stack --num-gpus=4 --num-env-runners=8 --stop-reward=1000.0 --as-release-test
+    script: python learning_tests/tuned_examples/sac/halfcheetah_sac.py --enable-new-api-stack --num-learners=4 --num-env-runners=8 --stop-reward=1000.0 --as-release-test
 
   alert: default
 

diff --git a/release/rllib_contrib/learning_tests/yaml_files/a2c/a2c-breakout-v5.yaml b/release/rllib_contrib/learning_tests/yaml_files/a2c/a2c-breakout-v5.yaml
@@ -1,5 +1,5 @@
 a2c-breakoutnoframeskip-v5:
-    env: ALE/Breakout-v5
+    env: ale_py:ALE/Breakout-v5
     run: A2C
     # Minimum reward and total ts (in given time_total_s) to pass this test.
     pass_criteria:

diff --git a/release/rllib_contrib/learning_tests/yaml_files/a3c/a3c-pongdeterministic-v5.yaml b/release/rllib_contrib/learning_tests/yaml_files/a3c/a3c-pongdeterministic-v5.yaml
@@ -1,5 +1,5 @@
 a3c-pongdeterministic-v5:
-    env: ALE/Pong-v5
+    env: ale_py:ALE/Pong-v5
     run: A3C
     # Minimum reward and total ts (in given time_total_s) to pass this test.
     pass_criteria:

diff --git a/release/rllib_contrib/learning_tests/yaml_files/apex/apex-breakoutnoframeskip-v5.yaml b/release/rllib_contrib/learning_tests/yaml_files/apex/apex-breakoutnoframeskip-v5.yaml
@@ -1,5 +1,5 @@
 apex-breakoutnoframeskip-v5:
-    env: ALE/Breakout-v5
+    env: ale_py:ALE/Breakout-v5
     run: APEX
     # Minimum reward and total ts (in given time_total_s) to pass this test.
     pass_criteria:

diff --git a/release/rllib_tests/learning_tests/yaml_files/appo/hybrid_stack/appo-pongnoframeskip-v5.yaml b/release/rllib_tests/learning_tests/yaml_files/appo/hybrid_stack/appo-pongnoframeskip-v5.yaml
@@ -1,5 +1,5 @@
 appo-pongnoframeskip-v5:
-    env: ALE/Pong-v5
+    env: ale_py:ALE/Pong-v5
     run: APPO
     # Minimum reward and total ts (in given time_total_s) to pass this test.
     pass_criteria:

diff --git a/release/rllib_tests/learning_tests/yaml_files/appo/old_stack/appo-pongnoframeskip-v5.yaml b/release/rllib_tests/learning_tests/yaml_files/appo/old_stack/appo-pongnoframeskip-v5.yaml
@@ -1,5 +1,5 @@
 appo-pongnoframeskip-v5:
-    env: ALE/Pong-v5
+    env: ale_py:ALE/Pong-v5
     run: APPO
     # Minimum reward and total ts (in given time_total_s) to pass this test.
     pass_criteria:

diff --git a/release/rllib_tests/learning_tests/yaml_files/dqn/dqn-breakoutnoframeskip-v5.yaml b/release/rllib_tests/learning_tests/yaml_files/dqn/dqn-breakoutnoframeskip-v5.yaml
@@ -1,5 +1,5 @@
 dqn-breakoutnoframeskip-v5:
-    env: ALE/Breakout-v5
+    env: ale_py:ALE/Breakout-v5
     run: DQN
     # Minimum reward and total ts (in given time_total_s) to pass this test.
     pass_criteria:

diff --git a/release/rllib_tests/learning_tests/yaml_files/impala/impala-breakoutnoframeskip-v5.yaml b/release/rllib_tests/learning_tests/yaml_files/impala/impala-breakoutnoframeskip-v5.yaml
@@ -1,5 +1,5 @@
 impala-breakoutnoframeskip-v5:
-    env: ALE/Breakout-v5
+    env: ale_py:ALE/Breakout-v5
     run: IMPALA
     # Minimum reward and total ts (in given time_total_s) to pass this test.
     pass_criteria:

diff --git a/release/rllib_tests/learning_tests/yaml_files/ppo/new_stack/ppo_breakout.py b/release/rllib_tests/learning_tests/yaml_files/ppo/new_stack/ppo_breakout.py
@@ -20,7 +20,7 @@ def _make_learner_connector(input_observation_space, input_action_space):
 # We would like our frame stacking connector to do this job.
 def _env_creator(cfg):
     return wrap_atari_for_new_api_stack(
-        gym.make("ALE/Breakout-v5", **cfg, **{"render_mode": "rgb_array"}),
+        gym.make("ale_py:ALE/Breakout-v5", **cfg, **{"render_mode": "rgb_array"}),
         # Perform through ConnectorV2 API.
         framestack=None,
     )

diff --git a/release/rllib_tests/learning_tests/yaml_files/ppo/new_stack/ppo_pong.py b/release/rllib_tests/learning_tests/yaml_files/ppo/new_stack/ppo_pong.py
@@ -20,7 +20,7 @@ def _make_learner_connector(input_observation_space, input_action_space):
 # We would like our frame stacking connector to do this job.
 def _env_creator(cfg):
     return wrap_atari_for_new_api_stack(
-        gym.make("ALE/Pong-v5", **cfg, **{"render_mode": "rgb_array"}),
+        gym.make("ale_py:ALE/Pong-v5", **cfg, **{"render_mode": "rgb_array"}),
         # Perform through ConnectorV2 API.
         framestack=None,
     )

diff --git a/release/rllib_tests/learning_tests/yaml_files/ppo/old_stack/ppo-breakoutnoframeskip-v5.yaml b/release/rllib_tests/learning_tests/yaml_files/ppo/old_stack/ppo-breakoutnoframeskip-v5.yaml
@@ -1,5 +1,5 @@
 ppo-breakoutnoframeskip-v5:
-    env: ALE/Breakout-v5
+    env: ale_py:ALE/Breakout-v5
     run: PPO
     # Minimum reward and total ts (in given time_total_s) to pass this test.
     pass_criteria: