Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Mujoco Env #1511

Merged
merged 40 commits into from
Dec 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
c74ef36
add first iteration nao model
schmidma Oct 16, 2024
9abac0c
add uv venv
schluis Oct 20, 2024
f639bc0
record videos
schluis Oct 20, 2024
2493343
fix dependencies
schluis Oct 21, 2024
8e571b9
change solver, add more options to naoxml
schluis Oct 23, 2024
6d1a76c
fix dependencies (really)
schluis Oct 25, 2024
8631789
add nao_standup env
schluis Oct 25, 2024
a75a86c
add test script
schluis Oct 25, 2024
aa0b0fe
fix dependencies (really2)
schluis Oct 25, 2024
394cd83
improve nao env
schluis Oct 25, 2024
7cccd5e
use nao in scripts
schluis Oct 25, 2024
869662a
use face-down keyframe for standup
schluis Oct 27, 2024
6ada11c
improve naming
schluis Oct 27, 2024
f065f23
tune motors
schluis Oct 27, 2024
65cbff3
add interactive script, fix joint ranges
schluis Oct 27, 2024
5b9cad4
limit arms
schluis Oct 27, 2024
6b6d192
add tensorboard, better video naming
schluis Oct 27, 2024
a40e704
improve feet, improve body
schluis Oct 31, 2024
22f0831
add wandb
schluis Oct 31, 2024
4cc4348
add site head_center and use in reward
schluis Oct 31, 2024
53f08d5
add power-unlimited nao
schluis Oct 31, 2024
7889612
use bhuman model, update env
schluis Nov 11, 2024
c86be99
add face-down keyframe
schluis Nov 11, 2024
37a8d2f
add site head_center
schluis Nov 11, 2024
bfa0883
fix reward xpos
schluis Nov 13, 2024
1b89d46
fix reward xpos
schluis Nov 17, 2024
06f781c
use named access for site head_center
schluis Nov 17, 2024
4818404
rename env
schluis Nov 22, 2024
9bd497b
remove wrong comments
schluis Nov 22, 2024
b364143
update readme
schluis Nov 22, 2024
f18d733
remove redundant model
schluis Nov 27, 2024
ee00602
Add interactive_viewer
schluis Nov 27, 2024
c4e050a
Use external model
schluis Nov 27, 2024
566fb0e
update readme, add gitignore for video and run files
schluis Nov 27, 2024
a3d5e67
remove jupyter notebook
schluis Nov 27, 2024
c4eb1b5
change neovim to dev-dependency
schluis Nov 27, 2024
1591f65
add entry_point of nao_env in test_script
schluis Nov 27, 2024
449042c
Add glfw dependency to readme
schluis Dec 2, 2024
9021b19
remove test_script
schluis Dec 2, 2024
58a73a7
remove ruff stuff and sort imports
schluis Dec 2, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
604 changes: 182 additions & 422 deletions Cargo.lock

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions tools/machine-learning/mujoco/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
videos/
runs/
20 changes: 20 additions & 0 deletions tools/machine-learning/mujoco/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Setup

Make sure `glfw` is installed on your machine.

For python use [uv](https://docs.astral.sh/uv/).
After installing uv, run `uv sync` to install all python dependencies or directly execute an example from below.

ThagonDuarte marked this conversation as resolved.
Show resolved Hide resolved
## Example usage

To view the model:

- `uv run interactive_viewer.py`

To train the standup task:

- `uv run standup.py`

## To build a custom NAO environment

Add a new `MujocoEnv` class in the `nao_env` folder and add it to the `__init__.py` file.
13 changes: 13 additions & 0 deletions tools/machine-learning/mujoco/interactive_viewer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import os
ThagonDuarte marked this conversation as resolved.
Show resolved Hide resolved

import mujoco
from mujoco import viewer

os.environ["MUJOCO_GL"] = "egl"

model = mujoco.MjModel.from_xml_path("model/scene.xml")
data = mujoco.MjData(model)

# mujoco.mj_resetDataKeyframe(model, data, 2)

viewer.launch(model, data)
6 changes: 6 additions & 0 deletions tools/machine-learning/mujoco/nao_env/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from gymnasium.envs.mujoco.mujoco_env import MujocoEnv, MuJocoPyEnv # noqa: F401

# ^^^^^ so that user gets the correct error
# message if mujoco is not installed correctly

from nao_env.nao_standup import NaoStandup
132 changes: 132 additions & 0 deletions tools/machine-learning/mujoco/nao_env/nao_standup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
from pathlib import Path

import numpy as np
from gymnasium import utils
from gymnasium.envs.mujoco.mujoco_env import MujocoEnv
from gymnasium.spaces import Box

DEFAULT_CAMERA_CONFIG = {
"trackbodyid": 1,
"distance": 4.0,
"lookat": np.array((0.0, 0.0, 0.8925)),
"elevation": -20.0,
}


class NaoStandup(MujocoEnv, utils.EzPickle):
metadata = {
"render_modes": [
"human",
"rgb_array",
"depth_array",
],
}

def __init__(self, **kwargs) -> None:
observation_space = Box(
low=-np.inf,
high=np.inf,
shape=(661,),
dtype=np.float64,
)

MujocoEnv.__init__(
self,
str(Path.cwd().joinpath("model", "scene.xml")),
5,
observation_space=observation_space,
default_camera_config=DEFAULT_CAMERA_CONFIG,
**kwargs,
)
utils.EzPickle.__init__(self, **kwargs)

def _get_obs(self) -> np.ndarray:
data = self.data
return np.concatenate(
[
data.qpos.flat[2:],
data.qvel.flat,
data.cinert.flat,
data.cvel.flat,
data.qfrc_actuator.flat,
data.cfrc_ext.flat,
],
)

def step(self, a):
self.do_simulation(a, self.frame_skip)
data = self.data

head_center_id = self.model.site("head_center").id
head_center_z = data.site_xpos[head_center_id][2]
uph_cost = (head_center_z - 0) / self.model.opt.timestep

quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum()
quad_impact_cost = 0.5e-6 * np.square(data.cfrc_ext).sum()
quad_impact_cost = min(quad_impact_cost, 10)
reward = uph_cost - quad_ctrl_cost - quad_impact_cost + 1

if self.render_mode == "human":
self.render()
return (
self._get_obs(),
reward,
False,
False,
{
"reward_linup": uph_cost,
"reward_quadctrl": -quad_ctrl_cost,
"reward_impact": -quad_impact_cost,
},
)

def reset_model(self):
half_random_offset = 0.03
face_down_keyframe_qpos = [
0.452845,
0.219837,
0.0556939,
0.710551,
-0.0810676,
0.693965,
0.0834173,
-0.000571484,
0.0239414,
0.000401842,
-3.89047e-05,
-0.00175077,
0.357233,
0.0114063,
0.000212495,
0.000422366,
3.92127e-05,
-0.00133669,
0.356939,
0.0112884,
-0.000206283,
1.46985,
0.110264,
0.000766453,
-0.034298,
3.65047e-05,
1.47067,
-0.110094,
-0.00201064,
0.0342998,
-0.00126886,
]
self.set_state(
face_down_keyframe_qpos
+ self.np_random.uniform(
low=-half_random_offset,
high=half_random_offset,
size=self.model.nq,
),
self.init_qvel
+ self.np_random.uniform(
low=-half_random_offset,
high=half_random_offset,
size=self.model.nv,
),
)
return self._get_obs()
16 changes: 10 additions & 6 deletions tools/machine-learning/mujoco/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,16 @@ description = "Add your description here"
readme = "README.md"
requires-python = ">=3.10"
dependencies = [
"gymnasium[classic-control,mujoco]==0.28.1",
"mujoco>=3.2.4",
"gymnasium[classic-control]",
"ipykernel>=6.29.5",
"mediapy>=1.2.2",
"mujoco>=3.2.4",
"numpy>=2.1.2",
"scipy>=1.14.1",
"moviepy>=1.0.3",
"stable-baselines3>=2.3.2",
"wandb>=0.18.5",
"tensorboard>=2.18.0",
]

[tool.ruff]
Expand Down Expand Up @@ -64,8 +66,10 @@ ignore = [
[tool.ruff.lint.per-file-ignores]
"tests/*" = ["S101", "S603"]

[tool.ruff.lint.isort]
required-imports = ["from __future__ import annotations"]

[tool.uv]
dev-dependencies = ["pytest>=8.3.3", "ruff>=0.7.3"]

dev-dependencies = [
"neovim>=0.3.1",
"pytest>=8.3.3",
"ruff>=0.7.3",
]
102 changes: 86 additions & 16 deletions tools/machine-learning/mujoco/standup.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,93 @@
import gymnasium as gym
import os

import gymnasium as gym
import torch
import wandb
from stable_baselines3 import PPO
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.utils import get_device
from stable_baselines3.common.vec_env import DummyVecEnv, VecVideoRecorder
from wandb.integration.sb3 import WandbCallback

if get_device() != torch.device("cpu"):
NVIDIA_ICD_CONFIG_PATH = "/usr/share/glvnd/egl_vendor.d/10_nvidia.json"
if not os.path.exists(NVIDIA_ICD_CONFIG_PATH):
with open(NVIDIA_ICD_CONFIG_PATH, "w") as f:
_ = f.write("""{
"file_format_version" : "1.0.0",
"ICD" : {
"library_path" : "libEGL_nvidia.so.0"
}
}""")

# Configure MuJoCo to use the EGL rendering backend (requires GPU)
os.environ["MUJOCO_GL"] = "egl"


# taken from https://gymnasium.farama.org/main/_modules/gymnasium/wrappers/record_video/
def capped_cubic_video_schedule(episode_id: int) -> bool:
"""The default episode trigger.

This function will trigger recordings at the episode indices 0, 1, 8, 27, ..., :math:`k^3`, ..., 729, 1000, 2000, 3000, ...

Args:
episode_id: The episode number

Returns:
If to apply a video schedule number
"""
if episode_id < 10000:
return int(round(episode_id ** (1.0 / 3))) ** 3 == episode_id
else:
return episode_id % 10000 == 0


gym.register(
id="NaoStandup-v1",
entry_point="nao_env:NaoStandup",
max_episode_steps=2500,
)

config = {
"policy_type": "MlpPolicy",
"total_timesteps": 1000000,
"env_name": "NaoStandup-v1",
"render_mode": "rgb_array",
}


env = gym.make("CartPole-v1", render_mode="human")
run = wandb.init(
project="nao_standup",
config=config,
sync_tensorboard=True,
monitor_gym=True,
save_code=False,
mode="disabled",
)

model = PPO("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=10_000)

vec_env = model.get_env()
if vec_env is None:
raise ValueError("Model does not have a VecEnv")
def make_env():
env = gym.make(config["env_name"], render_mode=config["render_mode"])
env = Monitor(env) # record stats such as returns
return env

obs = vec_env.reset()
for i in range(1000):
action, _states = model.predict(obs, deterministic=True)
obs, reward, done, info = vec_env.step(action)
vec_env.render()
# VecEnv resets automatically
# if done:
# obs = env.reset()

env.close()
env = DummyVecEnv([make_env])
env = VecVideoRecorder(
env,
f"videos/{run.id}",
record_video_trigger=capped_cubic_video_schedule,
video_length=200,
)
model = PPO(
config["policy_type"], env, verbose=1, tensorboard_log=f"runs/{run.id}"
)
model.learn(
total_timesteps=config["total_timesteps"],
callback=WandbCallback(
gradient_save_freq=100,
model_save_path=f"models/{run.id}",
verbose=2,
),
)
run.finish()
Loading