diff --git a/baselines/goseek-ppo.ipynb b/baselines/goseek-ppo.ipynb index 6d358e1..4196806 100644 --- a/baselines/goseek-ppo.ipynb +++ b/baselines/goseek-ppo.ipynb @@ -81,7 +81,10 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "#### Set environment parameters\n" + "#### Set environment parameters\n", + "\n", + "\n", + "__Note__ To minimize training time during initial use, we've set `total_timestamps` and `n_environments` to 1e5 and 2 respectively. Setting `total_timestamps` to 3e6 and `n_environments` to 4 should produce an agent that approximates our baseline. " ] }, { @@ -90,14 +93,12 @@ "metadata": {}, "outputs": [], "source": [ - "n_environments = 2\n", - "total_timesteps = 5000000\n", + "n_environments = 2 # number of environments to train over\n", + "total_timesteps = 100000 # number of training timesteps\n", "scene_id = [1, 2, 3, 4, 5] # list all available scenes\n", - "success_dist = 2\n", - "n_targets = 30\n", + "n_targets = 30 # number of targets spawned in each scene\n", + "target_found_reward = 2 # reward per found target\n", "episode_length = 400\n", - "target_found_reward = 2\n", - "step_rate = 20\n", "\n", "\n", "def make_unity_env(filename, num_env):\n", @@ -109,9 +110,7 @@ " str(filename),\n", " network_config=get_network_config(worker_id=rank),\n", " n_targets=n_targets,\n", - " success_dist=success_dist,\n", " episode_length=episode_length,\n", - " step_rate=step_rate,\n", " scene_id=scene_id[rank],\n", " target_found_reward=target_found_reward,\n", " )\n", diff --git a/setup.py b/setup.py index 13d407f..3cc673b 100644 --- a/setup.py +++ b/setup.py @@ -23,7 +23,7 @@ setup( name="tesse_gym", - version="0.1.2", + version="0.1.3", description="TESSE OpenAI Gym python interface", packages=find_packages("src"), # tell setuptools that all packages will be under the 'src' directory diff --git a/src/tesse_gym/core/continuous_control.py b/src/tesse_gym/core/continuous_control.py index 8ed34e6..64027c1 100644 --- a/src/tesse_gym/core/continuous_control.py +++ b/src/tesse_gym/core/continuous_control.py @@ -19,23 +19,75 @@ # this work. ################################################################################################### +from collections import namedtuple +from typing import Callable, List, Optional, Tuple +from xml.etree.ElementTree import Element + import defusedxml.ElementTree as ET from scipy.spatial.transform import Rotation from tesse.msgs import * from tesse.utils import UdpListener + # gains 1: 150, 35, 1.6, 0.27 # gains 2: 200, 35, 1.6, 0.27 -def get_attributes(root, element, *attributes): +# Hold agent's state +Position = namedtuple("Position", ["x", "y", "z"]) +Quaternion = namedtuple("Quaternion", ["x", "y", "z", "w"]) +Rot = namedtuple("Rotation", ["roll", "pitch", "yaw"]) +Velocity = namedtuple("Velocity", ["x", "y", "z"]) +AngularVelocity = namedtuple("AngularVelocity", ["x", "y", "z"]) +Acceleration = namedtuple("Acceleration", ["x", "y", "z"]) +AngularAcceleration = namedtuple("AngularAcceleration", ["x", "y", "z"]) +AgentState = namedtuple( + "AgentState", + [ + "position", + "quaternion", + "rotation", + "velocity", + "angular_velocity", + "acceleration", + "angular_acceleration", + "time", + "collision", + ], +) + +# The agent is considered in a collision if +# (1) It has applied force in the forward direction above `force_limit` +# (2) The current position error is above `position_error_limit` +# (3) The net error change is below `position_error_change_limit` +# +# Essentially checking if force has been applied to correct an error, +# but the agent has not moved +CollisionThresholds = namedtuple( + "CollisionThresholds", + ["force_limit", "position_error_limit", "position_error_change_limit"], + defaults=(1e-2, 1e-2, 1e-5), +) + + +# Gains for a standard Proportional-Derivative controller +PDGains = namedtuple( + "PDGains", + ["pos_error_gain", "pos_error_rate_gain", "yaw_error_gain", "yaw_error_rate_gain"], + defaults=(150, 35, 1.6, 0.27), +) + + +def get_attributes( + root: Element, element: str, attributes: Tuple[str, ...] +) -> List[float]: """ Get XML element attributes. Args: root (Element): XML root. element (str): Attribute element. - *attributes (str): Attributes to fetch. + attributes (Tuple[str, ...]): Attributes to fetch. Returns: List[str]: Requested attributes. @@ -44,228 +96,277 @@ def get_attributes(root, element, *attributes): return [float(element.attrib[attrib]) for attrib in attributes] -def parse_metadata(metadata): +def parse_metadata(metadata: str) -> AgentState: """ Get position, orientation, velocity, and acceleration from metadata Args: metadata (str): TESSE metadata. Returns: - Dict[str]: Dictionary containing position, orientation, velocity, and + AgentState: Object containing position, orientation, velocity, and acceleration values. """ - data = {} root = ET.fromstring(metadata) - x, y, z = get_attributes(root, "position", "x", "y", "z") - data["position"] = {"x": x, "y": y, "z": z} + position = Position(*get_attributes(root, "position", ("x", "y", "z"))) - x, y, z, w = get_attributes(root, "quaternion", "x", "y", "z", "w") - data["quaternion"] = {"x": x, "y": y, "z": z, "w": w} - data["rotation"] = Rotation((x, y, z, w)).as_euler("zxy") + x, y, z, w = get_attributes(root, "quaternion", ("x", "y", "z", "w")) + quat = Quaternion(x, y, z, w) + rot = Rot(*Rotation((x, y, z, w)).as_euler("zxy")) - x_dot, y_dot, z_dot = get_attributes(root, "velocity", "x_dot", "y_dot", "z_dot") - data["velocity"] = {"x_dot": x_dot, "y_dot": y_dot, "z_dot": z_dot} + velocity = Velocity(*get_attributes(root, "velocity", ("x_dot", "y_dot", "z_dot"))) - x_ang_dot, y_ang_dot, z_ang_dot = get_attributes( - root, "angular_velocity", "x_ang_dot", "y_ang_dot", "z_ang_dot" + angular_velocity = AngularVelocity( + *get_attributes( + root, "angular_velocity", ("x_ang_dot", "y_ang_dot", "z_ang_dot") + ) ) - data["angular_velocity"] = { - "x_ang_dot": x_ang_dot, - "y_ang_dot": y_ang_dot, - "z_ang_dot": z_ang_dot, - } - - x_ddot, y_ddot, z_ddot = get_attributes( - root, "acceleration", "x_ddot", "y_ddot", "z_ddot" + + acceleration = Acceleration( + *get_attributes(root, "acceleration", ("x_ddot", "y_ddot", "z_ddot")) ) - data["acceleration"] = {"x_ddot": x_ddot, "y_ddot": y_ddot, "z_ddot": z_ddot} - x_ang_ddot, y_ang_ddot, z_ang_ddot = get_attributes( - root, "angular_acceleration", "x_ang_ddot", "y_ang_ddot", "z_ang_ddot" + angular_acceleration = AngularAcceleration( + *get_attributes( + root, "angular_acceleration", ("x_ang_ddot", "y_ang_ddot", "z_ang_ddot") + ) ) - data["angular_acceleration"] = { - "x_ang_ddot": x_ang_ddot, - "y_ang_ddot": y_ang_ddot, - "z_ang_ddot": z_ang_ddot, - } - data["time"] = float(root.find("time").text) - data["collision"] = root.find("collision").attrib["status"].lower() == "true" + state = AgentState( + position=position, + quaternion=quat, + rotation=rot, + velocity=velocity, + angular_velocity=angular_velocity, + acceleration=acceleration, + angular_acceleration=angular_acceleration, + time=float(root.find("time").text), + collision=root.find("collision").attrib["status"].lower() == "true", + ) - return data + return state class ContinuousController: + udp_listener_rate = 200 # frequency in hz at which to listen to UDP broadcasts + collision_limit = 5 # break current control loop after this many collisions + def __init__( self, + *, env, - threshold=np.array([0.05, 0.05, 0.01]), - rate_threshold=np.array([0.01, 0.01, 0.01]), - framerate=20, - max_steps=100, - pos_error_gain=150, - pos_error_rate_gain=35, - yaw_error_gain=1.6, - yaw_error_rate_gain=0.27, - udp_port=9004, + position_error_threshold: Optional[np.ndarray] = np.array([0.05, 0.05, 0.01]), + velocity_error_threshold: Optional[np.ndarray] = np.array([0.01, 0.01, 0.01]), + framerate: Optional[int] = 20, + max_steps: Optional[int] = 100, + pd_gains: Optional[PDGains] = PDGains(), + udp_port: Optional[int] = 9004, + collision_thresholds: Optional[CollisionThresholds] = CollisionThresholds(), ): """ Initialize PD controller. Args: env (Env): Tesse Env object. - threshold (np.ndarray): (x, z, rotation) error threshold to + position_error_threshold (Optional[np.ndarray]): (x, z, rotation) error threshold to be considered at the goal point. - rate_threshold (np.ndarray): (x velocity, z velocity, angular velocity) + velocity_error_threshold (Optional[np.ndarray]): (x velocity, z velocity, angular velocity) limit to be considered at goal. - framerate (int): TESSE step mode framerate. - max_steps (int): Maximum steps controller will take to reach goal. - pos_error_gain (float): Position Proportional gain. - pos_error_rate_gain (float): Position derivative gain. - yaw_error_gain (float): Yaw proportional gain. - yaw_error_rate_gain (float): Yaw derivative gain. + framerate (Optional[int]): TESSE step mode framerate. + max_steps (Optional[int]): Maximum steps controller will take to reach goal. + pd_gains (Optional[PDGains]): Proportional-Derivative controller gains. + udp_port (Optional[int]): Port on which to listen for metadata UDP broadcasts. + collision_thresholds (Optional[CollisionThresholds]): Thresholds defining when an agent is in a + collision. See `_in_collision` for details. """ self.env = env - self.threshold = threshold - self.rate_threshold = rate_threshold - self.env.send(SetFrameRate(framerate)) # Put into step mode + self.position_error_threshold = position_error_threshold + self.velocity_error_threshold = velocity_error_threshold + self.env.send(SetFrameRate(framerate)) # Put sim into step mode self.max_steps = max_steps - self.pos_error_gain = pos_error_gain - self.pos_error_rate_gain = pos_error_rate_gain - self.yaw_error_gain = yaw_error_gain - self.yaw_error_rate_gain = yaw_error_rate_gain + self.pd_gains = pd_gains + self.collision_thresholds = collision_thresholds self.goal = [] self.last_metadata = None - self.udp_listener = UdpListener(port=udp_port, rate=200) + self.udp_listener = UdpListener(port=udp_port, rate=self.udp_listener_rate) self.udp_listener.subscribe("catch_metadata", self.catch_udp_broadcast) self.udp_listener.start() - def catch_udp_broadcast(self, udp_metadata): + def catch_udp_broadcast(self, udp_metadata: Callable[[str], None]) -> None: + """ Catch UDP metadata broadcast from TESSE. """ self.last_metadata = udp_metadata - def transform(self, translate_x=0.0, translate_z=0.0, rotate_y=0.0): + def transform( + self, translate_x: float = 0.0, translate_z: float = 0.0, rotate_y: float = 0.0 + ) -> None: """ Apply desired transform via force commands. Args: translate_x (float): Desired x position relative to agent. translate_z (float): Desired z position relative to agent. rotate_y (float): Desired rotation (in radians) relative to agent. - """ + """ data = self.get_data() self.set_goal(data, translate_x, translate_z, rotate_y) + # track movement across steps to find collisions + last_z_err, last_z_rate_err = 0, 0 + collision_count = 0 + # Apply controls until at goal point, a collision occurs, or max steps reached i = 0 while not self.at_goal(data) and i < self.max_steps: - self.control(data) + force_z, z_error = self.control(data) data = self.get_data() - i += 1 - # check for collisions after at least two steps - if data["collision"] and i > 1: + + if self._in_collision(force_z, z_error, last_z_err): + collision_count += 1 + + if collision_count > self.collision_limit: break + last_z_err = z_error + self.set_goal(data) - def get_data(self): - """ Gets current data for agent """ + def _in_collision( + self, force_z: float, z_pos_error: float, last_z_pos_err: float + ) -> bool: + """ Check if agent is in collision with an object at a given step. + + Count collision if + (1) There is error in the forward direction + (2) Force has been applied in the forward direction + (3) The agent has not moved in the forward direction + + Args: + force_z (float): Force applied in the z direction at + current step. + z_pos_error (float): Z position error at current step. + last_z_pos_err (float): Z position error at previous + step. + + Returns: + bool: True if the agent is in a collision. + """ + return ( + np.sign(force_z) == np.sign(z_pos_error) + and np.abs(z_pos_error) > self.collision_thresholds.position_error_limit + and np.abs(force_z) > self.collision_thresholds.force_limit + and np.abs(z_pos_error - last_z_pos_err) + < self.collision_thresholds.position_error_change_limit + ) + + def get_data(self) -> AgentState: + """ Gets agent's most recent data. """ if self.last_metadata is None: response = self.env.request(MetadataRequest()).metadata else: response = self.last_metadata return parse_metadata(response) - def set_goal(self, data, translate_x=0.0, translate_z=0.0, rotate_y=0.0): + def set_goal( + self, + data: AgentState, + translate_x: float = 0.0, + translate_z: float = 0.0, + rotate_y: float = 0.0, + ) -> None: """ Sets the goal for the controller via creating a waypoint based on the desired transform. Args: - data (Dict[str]): Agent's position, orientation, velocity, + data (AgentState): Agent's position, orientation, velocity, and acceleration. translate_x (float): Desired x position relative to agent. translate_z (float): Desired z position relative to agent. rotate_y (float): Desired rotation (in radians) relative to agent. """ # Update goal point - yaw = data["rotation"][2] - x = ( - data["position"]["x"] - + translate_x * np.cos(-yaw) - - translate_z * np.sin(-yaw) - ) - z = ( - data["position"]["z"] - + translate_x * np.sin(-yaw) - + translate_z * np.cos(-yaw) - ) + yaw = data.rotation.yaw + x = data.position.x + translate_x * np.cos(-yaw) - translate_z * np.sin(-yaw) + z = data.position.z + translate_x * np.sin(-yaw) + translate_z * np.cos(-yaw) self.goal = np.array([x, z, yaw + rotate_y]) - def at_goal(self, data): - """ Returns True if at the goal location within the threshold. + def at_goal(self, data: AgentState) -> bool: + """ Returns true if within position and velocity thresholds. Args: - data (Dict[str, Dict[str, str]]): Agent's position, orientation, velocity, + data (AgentState): Object with agent's position, orientation, velocity, and acceleration. """ # check position - current = np.array( - [data["position"]["x"], data["position"]["z"], data["rotation"][2]] - ) + current = np.array([data.position.x, data.position.z, data.rotation.yaw]) error = current - self.goal error[2] = (error[2] + np.pi) % (2 * np.pi) - np.pi # wrap to pi current_rate = np.array( - [ - data["velocity"]["x_dot"], - data["velocity"]["z_dot"], - data["angular_velocity"]["y_ang_dot"], - ] + [data.velocity.x, data.velocity.z, data.angular_velocity.y,] ) - return np.all(np.abs(error) < self.threshold) and np.all( - np.abs(current_rate) < self.rate_threshold + return np.all(np.abs(error) < self.position_error_threshold) and np.all( + np.abs(current_rate) < self.velocity_error_threshold ) - def control(self, data): + @staticmethod + def _wrap_angle(ang: float) -> float: + """ Wrap angle between [-2*pi, 2*pi] + + Args: + ang (float): Angle in radians. + + Returns: + float: Angle wrapped between [-2*pi, 2*pi]. + """ + return (ang + np.pi) % (2 * np.pi) - np.pi + + def control(self, data: AgentState) -> Tuple[float, float]: """ Applies PD-control to move to the goal point. Args: - data (Dict[str, Dict[str,str]]): Agent's position, orientation, velocity, + data (AgentState): Agent's position, orientation, velocity, and acceleration. + + Returns: + Tuple[float, float]: Applied force and position error + in the z direction. """ # First, calculate position errors and a force in x- and z- to apply - x_error = self.goal[0] - data["position"]["x"] - z_error = self.goal[1] - data["position"]["z"] + x_error = self.goal[0] - data.position.x + z_error = self.goal[1] - data.position.z # Rotate errors into body coordinates - yaw = data["rotation"][2] + yaw = data.rotation.yaw z_error_body = z_error * np.cos(-yaw) - x_error * np.sin(-yaw) x_error_body = z_error * np.sin(-yaw) + x_error * np.cos(-yaw) - z_error_body_rate = -1 * data["velocity"]["z_dot"] - x_error_body_rate = -1 * data["velocity"]["x_dot"] + z_error_body_rate = -1 * data.velocity.z + x_error_body_rate = -1 * data.velocity.x force_z = ( - self.pos_error_gain * z_error_body - + self.pos_error_rate_gain * z_error_body_rate + self.pd_gains.pos_error_gain * z_error_body + + self.pd_gains.pos_error_rate_gain * z_error_body_rate ) force_x = ( - self.pos_error_gain * x_error_body - + self.pos_error_rate_gain * x_error_body_rate + self.pd_gains.pos_error_gain * x_error_body + + self.pd_gains.pos_error_rate_gain * x_error_body_rate ) # Second, calculate yaw error assuming we want to point to where we are going yaw_error = self.goal[2] - yaw yaw_error = (yaw_error + np.pi) % (2 * np.pi) - np.pi # wrap to pi - yaw_error_rate = -1 * data["angular_velocity"]["y_ang_dot"] + yaw_error_rate = -1 * data.angular_velocity.y torque_y = ( - self.yaw_error_gain * yaw_error + self.yaw_error_rate_gain * yaw_error_rate + self.pd_gains.yaw_error_gain * yaw_error + + self.pd_gains.yaw_error_rate_gain * yaw_error_rate ) self.env.send(StepWithForce(force_z, torque_y, force_x)) + return force_z, z_error_body def close(self): + """ Called upon destruction, join UDP listener. """ self.udp_listener.join() diff --git a/src/tesse_gym/core/tesse_gym.py b/src/tesse_gym/core/tesse_gym.py index b515c7e..3f4d198 100644 --- a/src/tesse_gym/core/tesse_gym.py +++ b/src/tesse_gym/core/tesse_gym.py @@ -130,7 +130,7 @@ def __init__( if not self.ground_truth_mode: self.continuous_controller = ContinuousController( - self.env, framerate=step_rate + env=self.env, framerate=step_rate ) self.episode_length = episode_length diff --git a/src/tesse_gym/tasks/goseek/goseek.py b/src/tesse_gym/tasks/goseek/goseek.py index 7c13d7f..761f97e 100644 --- a/src/tesse_gym/tasks/goseek/goseek.py +++ b/src/tesse_gym/tasks/goseek/goseek.py @@ -56,9 +56,9 @@ def __init__( build_path: str, network_config: Optional[NetworkConfig] = NetworkConfig(), scene_id: Optional[int] = None, - episode_length: Optional[int] = 300, + episode_length: Optional[int] = 400, step_rate: Optional[int] = -1, - n_targets: Optional[int] = 50, + n_targets: Optional[int] = 30, success_dist: Optional[float] = 2, restart_on_collision: Optional[bool] = False, init_hook: Optional[Callable[[TesseGym], None]] = None,