diff --git a/.gitignore b/.gitignore index 57aaafb1a..c726e50d7 100644 --- a/.gitignore +++ b/.gitignore @@ -23,6 +23,7 @@ viz/ datasets/ scene_datasets/ datadump/ +video_dir/ */datasets/ data/ diff --git a/ablation_runner_1.sh b/ablation_runner_1.sh new file mode 100644 index 000000000..e57ca8fc9 --- /dev/null +++ b/ablation_runner_1.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +python projects/habitat_ovmm/eval_baselines_agent.py --env_config_path projects/habitat_ovmm/configs/env/hssd_eval_v6.yaml habitat.dataset.episode_ids=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,151,182,193] +python projects/habitat_ovmm/eval_baselines_agent.py --env_config_path projects/habitat_ovmm/configs/env/hssd_eval_v6b.yaml habitat.dataset.episode_ids=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,151,182,193] +python projects/habitat_ovmm/eval_baselines_agent.py --env_config_path projects/habitat_ovmm/configs/env/hssd_eval_v7.yaml habitat.dataset.episode_ids=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,151,182,193] diff --git a/ablation_runner_2.sh b/ablation_runner_2.sh new file mode 100644 index 000000000..f12c7081b --- /dev/null +++ b/ablation_runner_2.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +python projects/habitat_ovmm/eval_baselines_agent.py --env_config_path projects/habitat_ovmm/configs/env/hssd_eval_v1.yaml habitat.dataset.episode_ids=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,151,182,193] +python projects/habitat_ovmm/eval_baselines_agent.py --env_config_path projects/habitat_ovmm/configs/env/hssd_eval_v1b.yaml habitat.dataset.episode_ids=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,151,182,193] +python projects/habitat_ovmm/eval_baselines_agent.py --env_config_path projects/habitat_ovmm/configs/env/hssd_eval_v2.yaml habitat.dataset.episode_ids=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,151,182,193] +python projects/habitat_ovmm/eval_baselines_agent.py --env_config_path projects/habitat_ovmm/configs/env/hssd_eval_v2b.yaml habitat.dataset.episode_ids=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,151,182,193] +python projects/habitat_ovmm/eval_baselines_agent.py --env_config_path projects/habitat_ovmm/configs/env/hssd_eval_v3.yaml habitat.dataset.episode_ids=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,151,182,193] +python projects/habitat_ovmm/eval_baselines_agent.py --env_config_path projects/habitat_ovmm/configs/env/hssd_eval_v3b.yaml habitat.dataset.episode_ids=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,151,182,193] \ No newline at end of file diff --git a/ablation_runner_3.sh b/ablation_runner_3.sh new file mode 100644 index 000000000..24a5873dc --- /dev/null +++ b/ablation_runner_3.sh @@ -0,0 +1,6 @@ +#!/bin/bash + +python projects/habitat_ovmm/eval_baselines_agent.py --env_config_path projects/habitat_ovmm/configs/env/hssd_eval_v4.yaml habitat.dataset.episode_ids=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,151,182,193] +python projects/habitat_ovmm/eval_baselines_agent.py --env_config_path projects/habitat_ovmm/configs/env/hssd_eval_v4b.yaml habitat.dataset.episode_ids=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,151,182,193] +python projects/habitat_ovmm/eval_baselines_agent.py --env_config_path projects/habitat_ovmm/configs/env/hssd_eval_v5.yaml habitat.dataset.episode_ids=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,151,182,193] +python projects/habitat_ovmm/eval_baselines_agent.py --env_config_path projects/habitat_ovmm/configs/env/hssd_eval_v5b.yaml habitat.dataset.episode_ids=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,151,182,193] \ No newline at end of file diff --git a/ablation_runner_4.sh b/ablation_runner_4.sh new file mode 100644 index 000000000..ca609b658 --- /dev/null +++ b/ablation_runner_4.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +python projects/habitat_ovmm/eval_baselines_agent.py --env_config_path projects/habitat_ovmm/configs/env/hssd_eval_v7b.yaml habitat.dataset.episode_ids=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,151,182,193] +python projects/habitat_ovmm/eval_baselines_agent.py --env_config_path projects/habitat_ovmm/configs/env/hssd_eval_v6c.yaml habitat.dataset.episode_ids=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,151,182,193] +python projects/habitat_ovmm/eval_baselines_agent.py --env_config_path projects/habitat_ovmm/configs/env/hssd_eval_v7c.yaml habitat.dataset.episode_ids=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,151,182,193] \ No newline at end of file diff --git a/projects/habitat_ovmm/configs/agent/oracle_agent.yaml b/projects/habitat_ovmm/configs/agent/oracle_agent.yaml new file mode 100644 index 000000000..2301df74e --- /dev/null +++ b/projects/habitat_ovmm/configs/agent/oracle_agent.yaml @@ -0,0 +1,169 @@ +max_steps: 10000 # maximum number of steps before stopping an episode during navigation; a lower value set for habitat episode termination +panorama_start: 1 # 1: turn around 360 degrees when starting an episode, 0: don't +exploration_strategy: seen_frontier # exploration strategy ("seen_frontier", "been_close_to_frontier") +radius: 0.05 # robot radius (in meters) +fall_wait_steps: 100 # number of steps to wait after the object has been dropped +store_all_categories: False # whether to store all semantic categories in the map or just task-relevant ones +detection_module: detic # the detector to use for perception in case ground_truth_semantics are turned off +SEMANTIC_MAP: + semantic_categories: rearrange # map semantic channel categories ("coco_indoor", "longtail_indoor", "mukul_indoor", "rearrange") + num_sem_categories: 5 # Following 5 categories: ["misc", "object_category", "start_receptacle", "goal_receptacle", "others"] + map_size_cm: 4800 # global map size (in centimeters) + map_resolution: 5 # size of map bins (in centimeters) + vision_range: 100 # diameter of local map region visible by the agent (in cells) + global_downscaling: 2 # ratio of global over local map + du_scale: 4 # frame downscaling before projecting to point cloud + cat_pred_threshold: 1.0 # number of depth points to be in bin to classify it as a certain semantic category + exp_pred_threshold: 1.0 # number of depth points to be in bin to consider it as explored + map_pred_threshold: 1.0 # number of depth points to be in bin to consider it as obstacle + been_close_to_radius: 100 # radius (in centimeters) of been close to region + explored_radius: 150 # radius (in centimeters) of visually explored region + must_explore_close: False + min_obs_height_cm: 10 # minimum height (in centimeters) of obstacle + # erosion and filtering to reduce the number of spurious artifacts + dilate_obstacles: True + dilate_size: 3 + dilate_iter: 1 + record_instance_ids: False + max_instances: 0 + +SKILLS: + GAZE_OBJ: + type: rl #end_to_end #heuristic #hardcoded + checkpoint_path: data/checkpoints/ovmm_baseline_home_robot_challenge_2023/gaze_at_obj.pth + rl_config: projects/habitat_ovmm/configs/agent/skills/gaze_rl.yaml # with continuous actions + gym_obs_keys: + - robot_head_depth + - object_embedding + - object_segmentation + - joint + - is_holding + - relative_resting_position + allowed_actions: + - arm_action + - base_velocity + arm_joint_mask: [0, 0, 0, 0, 0, 0, 1] # the arm joints that the policy can control + max_displacement: 0.25 # used when training the policy + max_turn_degrees: 30.0 + min_turn_degrees: 5.0 + min_displacement: 0.1 + sensor_height: 160 + sensor_width: 120 + nav_goal_seg_channels: 1 + terminate_condition: grip + grip_threshold: 0.8 + max_joint_delta: 0.1 + min_joint_delta: 0.02 + + PICK: + type: heuristic + + NAV_TO_OBJ: + type: oracle # heuristic or rl or oracle + checkpoint_path: data/checkpoints/ovmm_baseline_home_robot_challenge_2023/nav_to_obj.pth + rl_config: projects/habitat_ovmm/configs/agent/skills/nav_to_obj_rl.yaml + gym_obs_keys: + - robot_head_depth + - object_embedding + - ovmm_nav_goal_segmentation + - receptacle_segmentation + - start_receptacle + - robot_start_gps + - robot_start_compass + - joint + allowed_actions: + # - base_velocity + # - rearrange_stop + - stop + - move_forward + - turn_left + - turn_right + arm_joint_mask: [0, 0, 0, 0, 0, 0, 0] # the arm joints that the policy can control + max_displacement: 0.25 # used when training the policy; could be different from the eval values + max_turn_degrees: 30.0 + min_turn_degrees: 5.0 + min_displacement: 0.1 + sensor_height: 160 + sensor_width: 120 + terminate_condition: discrete_stop + nav_goal_seg_channels: 2 + + NAV_TO_REC: + type: oracle # heuristic or rl or oracle + checkpoint_path: data/checkpoints/ovmm_baseline_home_robot_challenge_2023/nav_to_rec.pth + rl_config: projects/habitat_ovmm/configs/agent/skills/nav_to_obj_rl.yaml + gym_obs_keys: + - robot_head_depth + - ovmm_nav_goal_segmentation + - receptacle_segmentation + - goal_receptacle + - robot_start_gps + - robot_start_compass + - joint + allowed_actions: + # - base_velocity + # - rearrange_stop + - stop + - move_forward + - turn_left + - turn_right + arm_joint_mask: [0, 0, 0, 0, 0, 0, 0] # the arm joints that the policy can control + max_displacement: 0.25 # used when training the policy; could be different from the eval values + max_turn_degrees: 30.0 + min_turn_degrees: 5.0 + min_displacement: 0.1 + sensor_height: 160 + sensor_width: 120 + terminate_condition: discrete_stop + nav_goal_seg_channels: 1 + + + PLACE: + type: heuristic # "rl" or "heuristic" or "hardcoded" + checkpoint_path: data/checkpoints/ovmm_baseline_home_robot_challenge_2023/place.pth + rl_config: projects/habitat_ovmm/configs/agent/skills/place_rl.yaml # with continuous actions + gym_obs_keys: + - robot_head_depth + - goal_receptacle + - joint + - goal_recep_segmentation + - is_holding + - object_embedding + allowed_actions: + - arm_action + - base_velocity + - manipulation_mode + arm_joint_mask: [1, 1, 1, 1, 1, 0, 0] # the arm joints that the policy can control + max_displacement: 0.25 # used when training the policy + max_turn_degrees: 30.0 + min_turn_degrees: 5.0 + min_displacement: 0.1 + sensor_height: 160 + sensor_width: 120 + nav_goal_seg_channels: 1 + terminate_condition: ungrip + grip_threshold: -0.8 + manip_mode_threshold: 0.8 + constraint_base_in_manip_mode: True + max_joint_delta: 0.1 + min_joint_delta: 0.02 + +skip_skills: + nav_to_obj: False + nav_to_rec: False + gaze_at_obj: True + gaze_at_rec: True + pick: False + place: False + +PLANNER: + collision_threshold: 0.10 # forward move distance under which we consider there's a collision (in meters) + obs_dilation_selem_radius: 3 # radius (in cells) of obstacle dilation structuring element + goal_dilation_selem_radius: 10 # radius (in cells) of goal dilation structuring element + step_size: 5 # maximum distance of the short-term goal selected by the planner + use_dilation_for_stg: False + min_obs_dilation_selem_radius: 1 # radius (in cells) of obstacle dilation structuring element + map_downsample_factor: 1 # optional downsampling of traversible and goal map before fmm distance call (1 for no downsampling, 2 for halving resolution) + map_update_frequency: 1 # compute fmm distance map every n steps + discrete_actions: True # discrete motion planner output space or not + verbose: False # display debug information during planning diff --git a/projects/habitat_ovmm/configs/env/hssd_eval.yaml b/projects/habitat_ovmm/configs/env/hssd_eval.yaml index d5fd0ba1d..f7ad77c4d 100644 --- a/projects/habitat_ovmm/configs/env/hssd_eval.yaml +++ b/projects/habitat_ovmm/configs/env/hssd_eval.yaml @@ -3,14 +3,27 @@ NUM_ENVIRONMENTS: 1 # number of environments (per agent process) DUMP_LOCATION: datadump # path to dump models and log EXP_NAME: eval_hssd # experiment name VISUALIZE: 0 # 1: render observation and predicted semantic map, 0: no visualization -PRINT_IMAGES: 0 # 1: save visualization as images, 0: no image saving -GROUND_TRUTH_SEMANTICS: 0 # 1: use ground-truth semantics (for debugging / ablations) +PRINT_IMAGES: 1 # 1: save visualization as images, 0: no image saving +GROUND_TRUTH_SEMANTICS: 1 # 1: use ground-truth semantics (for debugging / ablations) seed: 0 # seed SHOW_RL_OBS: False # whether to show the observations passed to RL policices, for debugging +EXPERIMENT: + type: v0_extra_navigation_instructions_here + NAV_TO_OBJ: + type: oracle + segmentation: False + fallback: none + goal: max_iou_viewpoint + NAV_TO_REC: + type: oracle + segmentation: False + fallback: none + goal: max_iou_viewpoint + ENVIRONMENT: - forward: 0.25 # forward motion (in meters) - turn_angle: 30.0 # agent turn angle (in degrees) + forward: 0.10 # forward motion (in meters) Note: changing this should require a corresponding change in the habitat benchmark config + turn_angle: 5 # agent turn angle (in degrees) Note: changing this should require a corresponding change in the habitat benchmark config frame_height: 640 # first-person frame height (in pixels) frame_width: 480 # first-person frame width (in pixels) camera_height: 1.31 # camera sensor height (in metres) @@ -26,6 +39,6 @@ EVAL_VECTORIZED: simulator_gpu_ids: [1, 2, 3, 4, 5, 6, 7] # IDs of GPUs to use for vectorized environments split: val # eval split num_episodes_per_env: null # number of eval episodes per environment - record_videos: 0 # 1: record videos from printed images, 0: don't + record_videos: 1 # 1: record videos from printed images, 0: don't record_planner_videos: 0 # 1: record planner videos (if record videos), 0: don't metrics_save_freq: 5 # save metrics after every n episodes diff --git a/projects/habitat_ovmm/configs/env/hssd_eval_v1.yaml b/projects/habitat_ovmm/configs/env/hssd_eval_v1.yaml new file mode 100644 index 000000000..519d89e7d --- /dev/null +++ b/projects/habitat_ovmm/configs/env/hssd_eval_v1.yaml @@ -0,0 +1,46 @@ +NO_GPU: 0 # 1: ignore IDs above and run on CPU, 0: run on GPUs with IDs above +NUM_ENVIRONMENTS: 1 # number of environments (per agent process) +DUMP_LOCATION: datadump # path to dump models and log +EXP_NAME: eval_hssd_v1 # experiment name +VISUALIZE: 0 # 1: render observation and predicted semantic map, 0: no visualization +PRINT_IMAGES: 1 # 1: save visualization as images, 0: no image saving +GROUND_TRUTH_SEMANTICS: 1 # 1: use ground-truth semantics (for debugging / ablations) +seed: 0 # seed +SHOW_RL_OBS: False # whether to show the observations passed to RL policices, for debugging + +EXPERIMENT: + type: v1__nav-oracle-nosegmentation__pick-heur-vp__place-heur-vp + NAV_TO_OBJ: + type: oracle + segmentation: False + fallback: none + goal: max_iou_viewpoint + goal_radius: 0.1 + NAV_TO_REC: + type: oracle + segmentation: False + fallback: none + goal: max_iou_viewpoint + goal_radius: 0.1 + +ENVIRONMENT: + forward: 0.10 # forward motion (in meters) Note: changing this should require a corresponding change in the habitat benchmark config + turn_angle: 5 # agent turn angle (in degrees) Note: changing this should require a corresponding change in the habitat benchmark config + frame_height: 640 # first-person frame height (in pixels) + frame_width: 480 # first-person frame width (in pixels) + camera_height: 1.31 # camera sensor height (in metres) + hfov: 42.0 # horizontal field of view (in degrees) + min_depth: 0.0 # minimum depth for depth sensor (in metres) + max_depth: 10.0 # maximum depth for depth sensor (in metres) + num_receptacles: 21 + category_map_file: projects/real_world_ovmm/configs/example_cat_map.json + use_detic_viz: False + evaluate_instance_tracking: False # whether to evaluate the built instance map against groundtruth instance ids + +EVAL_VECTORIZED: + simulator_gpu_ids: [1, 2, 3, 4, 5, 6, 7] # IDs of GPUs to use for vectorized environments + split: val # eval split + num_episodes_per_env: null # number of eval episodes per environment + record_videos: 1 # 1: record videos from printed images, 0: don't + record_planner_videos: 0 # 1: record planner videos (if record videos), 0: don't + metrics_save_freq: 5 # save metrics after every n episodes diff --git a/projects/habitat_ovmm/configs/env/hssd_eval_v1b.yaml b/projects/habitat_ovmm/configs/env/hssd_eval_v1b.yaml new file mode 100644 index 000000000..c82f2914c --- /dev/null +++ b/projects/habitat_ovmm/configs/env/hssd_eval_v1b.yaml @@ -0,0 +1,46 @@ +NO_GPU: 0 # 1: ignore IDs above and run on CPU, 0: run on GPUs with IDs above +NUM_ENVIRONMENTS: 1 # number of environments (per agent process) +DUMP_LOCATION: datadump # path to dump models and log +EXP_NAME: eval_hssd_v1b # experiment name +VISUALIZE: 0 # 1: render observation and predicted semantic map, 0: no visualization +PRINT_IMAGES: 1 # 1: save visualization as images, 0: no image saving +GROUND_TRUTH_SEMANTICS: 1 # 1: use ground-truth semantics (for debugging / ablations) +seed: 0 # seed +SHOW_RL_OBS: False # whether to show the observations passed to RL policices, for debugging + +EXPERIMENT: + type: v1b__nav-oracle-nosegmentation__pick-heur-vp__place-heur-vp + NAV_TO_OBJ: + type: oracle + segmentation: False + fallback: none + goal: max_iou_viewpoint + goal_radius: 0.3 + NAV_TO_REC: + type: oracle + segmentation: False + fallback: none + goal: max_iou_viewpoint + goal_radius: 0.3 + +ENVIRONMENT: + forward: 0.10 # forward motion (in meters) Note: changing this should require a corresponding change in the habitat benchmark config + turn_angle: 5 # agent turn angle (in degrees) Note: changing this should require a corresponding change in the habitat benchmark config + frame_height: 640 # first-person frame height (in pixels) + frame_width: 480 # first-person frame width (in pixels) + camera_height: 1.31 # camera sensor height (in metres) + hfov: 42.0 # horizontal field of view (in degrees) + min_depth: 0.0 # minimum depth for depth sensor (in metres) + max_depth: 10.0 # maximum depth for depth sensor (in metres) + num_receptacles: 21 + category_map_file: projects/real_world_ovmm/configs/example_cat_map.json + use_detic_viz: False + evaluate_instance_tracking: False # whether to evaluate the built instance map against groundtruth instance ids + +EVAL_VECTORIZED: + simulator_gpu_ids: [1, 2, 3, 4, 5, 6, 7] # IDs of GPUs to use for vectorized environments + split: val # eval split + num_episodes_per_env: null # number of eval episodes per environment + record_videos: 1 # 1: record videos from printed images, 0: don't + record_planner_videos: 0 # 1: record planner videos (if record videos), 0: don't + metrics_save_freq: 5 # save metrics after every n episodes diff --git a/projects/habitat_ovmm/configs/env/hssd_eval_v2.yaml b/projects/habitat_ovmm/configs/env/hssd_eval_v2.yaml new file mode 100644 index 000000000..63277dd09 --- /dev/null +++ b/projects/habitat_ovmm/configs/env/hssd_eval_v2.yaml @@ -0,0 +1,46 @@ +NO_GPU: 0 # 1: ignore IDs above and run on CPU, 0: run on GPUs with IDs above +NUM_ENVIRONMENTS: 1 # number of environments (per agent process) +DUMP_LOCATION: datadump # path to dump models and log +EXP_NAME: eval_hssd_v2 # experiment name +VISUALIZE: 0 # 1: render observation and predicted semantic map, 0: no visualization +PRINT_IMAGES: 1 # 1: save visualization as images, 0: no image saving +GROUND_TRUTH_SEMANTICS: 1 # 1: use ground-truth semantics (for debugging / ablations) +seed: 0 # seed +SHOW_RL_OBS: False # whether to show the observations passed to RL policices, for debugging + +EXPERIMENT: + type: v2__nav-oracle-nosegmentation-heurnearobject__pick-heur-vp__place-heur-vp + NAV_TO_OBJ: + type: oracle + segmentation: False + fallback: heuristic_nav + goal: max_iou_viewpoint + goal_radius: 0.1 + NAV_TO_REC: + type: oracle + segmentation: False + fallback: none + goal: max_iou_viewpoint + goal_radius: 0.1 + +ENVIRONMENT: + forward: 0.10 # forward motion (in meters) Note: changing this should require a corresponding change in the habitat benchmark config + turn_angle: 5 # agent turn angle (in degrees) Note: changing this should require a corresponding change in the habitat benchmark config + frame_height: 640 # first-person frame height (in pixels) + frame_width: 480 # first-person frame width (in pixels) + camera_height: 1.31 # camera sensor height (in metres) + hfov: 42.0 # horizontal field of view (in degrees) + min_depth: 0.0 # minimum depth for depth sensor (in metres) + max_depth: 10.0 # maximum depth for depth sensor (in metres) + num_receptacles: 21 + category_map_file: projects/real_world_ovmm/configs/example_cat_map.json + use_detic_viz: False + evaluate_instance_tracking: False # whether to evaluate the built instance map against groundtruth instance ids + +EVAL_VECTORIZED: + simulator_gpu_ids: [1, 2, 3, 4, 5, 6, 7] # IDs of GPUs to use for vectorized environments + split: val # eval split + num_episodes_per_env: null # number of eval episodes per environment + record_videos: 1 # 1: record videos from printed images, 0: don't + record_planner_videos: 0 # 1: record planner videos (if record videos), 0: don't + metrics_save_freq: 5 # save metrics after every n episodes diff --git a/projects/habitat_ovmm/configs/env/hssd_eval_v2b.yaml b/projects/habitat_ovmm/configs/env/hssd_eval_v2b.yaml new file mode 100644 index 000000000..86c9eec82 --- /dev/null +++ b/projects/habitat_ovmm/configs/env/hssd_eval_v2b.yaml @@ -0,0 +1,46 @@ +NO_GPU: 0 # 1: ignore IDs above and run on CPU, 0: run on GPUs with IDs above +NUM_ENVIRONMENTS: 1 # number of environments (per agent process) +DUMP_LOCATION: datadump # path to dump models and log +EXP_NAME: eval_hssd_v2b # experiment name +VISUALIZE: 0 # 1: render observation and predicted semantic map, 0: no visualization +PRINT_IMAGES: 1 # 1: save visualization as images, 0: no image saving +GROUND_TRUTH_SEMANTICS: 1 # 1: use ground-truth semantics (for debugging / ablations) +seed: 0 # seed +SHOW_RL_OBS: False # whether to show the observations passed to RL policices, for debugging + +EXPERIMENT: + type: v2b__nav-oracle-nosegmentation-heurnearobject__pick-heur-vp__place-heur-vp + NAV_TO_OBJ: + type: oracle + segmentation: False + fallback: heuristic_nav + goal: max_iou_viewpoint + goal_radius: 0.3 + NAV_TO_REC: + type: oracle + segmentation: False + fallback: none + goal: max_iou_viewpoint + goal_radius: 0.3 + +ENVIRONMENT: + forward: 0.10 # forward motion (in meters) Note: changing this should require a corresponding change in the habitat benchmark config + turn_angle: 5 # agent turn angle (in degrees) Note: changing this should require a corresponding change in the habitat benchmark config + frame_height: 640 # first-person frame height (in pixels) + frame_width: 480 # first-person frame width (in pixels) + camera_height: 1.31 # camera sensor height (in metres) + hfov: 42.0 # horizontal field of view (in degrees) + min_depth: 0.0 # minimum depth for depth sensor (in metres) + max_depth: 10.0 # maximum depth for depth sensor (in metres) + num_receptacles: 21 + category_map_file: projects/real_world_ovmm/configs/example_cat_map.json + use_detic_viz: False + evaluate_instance_tracking: False # whether to evaluate the built instance map against groundtruth instance ids + +EVAL_VECTORIZED: + simulator_gpu_ids: [1, 2, 3, 4, 5, 6, 7] # IDs of GPUs to use for vectorized environments + split: val # eval split + num_episodes_per_env: null # number of eval episodes per environment + record_videos: 1 # 1: record videos from printed images, 0: don't + record_planner_videos: 0 # 1: record planner videos (if record videos), 0: don't + metrics_save_freq: 5 # save metrics after every n episodes diff --git a/projects/habitat_ovmm/configs/env/hssd_eval_v3.yaml b/projects/habitat_ovmm/configs/env/hssd_eval_v3.yaml new file mode 100644 index 000000000..9f4121329 --- /dev/null +++ b/projects/habitat_ovmm/configs/env/hssd_eval_v3.yaml @@ -0,0 +1,47 @@ +NO_GPU: 0 # 1: ignore IDs above and run on CPU, 0: run on GPUs with IDs above +NUM_ENVIRONMENTS: 1 # number of environments (per agent process) +DUMP_LOCATION: datadump # path to dump models and log +EXP_NAME: eval_hssd_v3 # experiment name +VISUALIZE: 0 # 1: render observation and predicted semantic map, 0: no visualization +PRINT_IMAGES: 1 # 1: save visualization as images, 0: no image saving +GROUND_TRUTH_SEMANTICS: 1 # 1: use ground-truth semantics (for debugging / ablations) +seed: 0 # seed +SHOW_RL_OBS: False # whether to show the observations passed to RL policices, for debugging +# experiment_type: v1__nav-oracle-nosegmentation__pick-heur-vp__place-heur-vp +# experiment_type: v2__nav-oracle-nosegmentation-heurnearobject__pick-heur-vp__place-heur-vp +EXPERIMENT: + type: v3__nav-oracle-withsegmentation-heurnearobject__pick-heur-vp__place-heur-vp + NAV_TO_OBJ: + type: oracle + segmentation: True + fallback: heuristic_nav + goal: max_iou_viewpoint + goal_radius: 0.1 + NAV_TO_REC: + type: oracle + segmentation: True + fallback: none + goal: max_iou_viewpoint + goal_radius: 0.1 + +ENVIRONMENT: + forward: 0.10 # forward motion (in meters) Note: changing this should require a corresponding change in the habitat benchmark config + turn_angle: 5 # agent turn angle (in degrees) Note: changing this should require a corresponding change in the habitat benchmark config + frame_height: 640 # first-person frame height (in pixels) + frame_width: 480 # first-person frame width (in pixels) + camera_height: 1.31 # camera sensor height (in metres) + hfov: 42.0 # horizontal field of view (in degrees) + min_depth: 0.0 # minimum depth for depth sensor (in metres) + max_depth: 10.0 # maximum depth for depth sensor (in metres) + num_receptacles: 21 + category_map_file: projects/real_world_ovmm/configs/example_cat_map.json + use_detic_viz: False + evaluate_instance_tracking: False # whether to evaluate the built instance map against groundtruth instance ids + +EVAL_VECTORIZED: + simulator_gpu_ids: [1, 2, 3, 4, 5, 6, 7] # IDs of GPUs to use for vectorized environments + split: val # eval split + num_episodes_per_env: null # number of eval episodes per environment + record_videos: 1 # 1: record videos from printed images, 0: don't + record_planner_videos: 0 # 1: record planner videos (if record videos), 0: don't + metrics_save_freq: 5 # save metrics after every n episodes diff --git a/projects/habitat_ovmm/configs/env/hssd_eval_v3b.yaml b/projects/habitat_ovmm/configs/env/hssd_eval_v3b.yaml new file mode 100644 index 000000000..de5fc3fd0 --- /dev/null +++ b/projects/habitat_ovmm/configs/env/hssd_eval_v3b.yaml @@ -0,0 +1,47 @@ +NO_GPU: 0 # 1: ignore IDs above and run on CPU, 0: run on GPUs with IDs above +NUM_ENVIRONMENTS: 1 # number of environments (per agent process) +DUMP_LOCATION: datadump # path to dump models and log +EXP_NAME: eval_hssd_v3b # experiment name +VISUALIZE: 0 # 1: render observation and predicted semantic map, 0: no visualization +PRINT_IMAGES: 1 # 1: save visualization as images, 0: no image saving +GROUND_TRUTH_SEMANTICS: 1 # 1: use ground-truth semantics (for debugging / ablations) +seed: 0 # seed +SHOW_RL_OBS: False # whether to show the observations passed to RL policices, for debugging +# experiment_type: v1__nav-oracle-nosegmentation__pick-heur-vp__place-heur-vp +# experiment_type: v2__nav-oracle-nosegmentation-heurnearobject__pick-heur-vp__place-heur-vp +EXPERIMENT: + type: v3b__nav-oracle-withsegmentation-heurnearobject__pick-heur-vp__place-heur-vp + NAV_TO_OBJ: + type: oracle + segmentation: True + fallback: heuristic_nav + goal: max_iou_viewpoint + goal_radius: 0.1 + NAV_TO_REC: + type: oracle + segmentation: True + fallback: none + goal: max_iou_viewpoint + goal_radius: 0.1 + +ENVIRONMENT: + forward: 0.10 # forward motion (in meters) Note: changing this should require a corresponding change in the habitat benchmark config + turn_angle: 5 # agent turn angle (in degrees) Note: changing this should require a corresponding change in the habitat benchmark config + frame_height: 640 # first-person frame height (in pixels) + frame_width: 480 # first-person frame width (in pixels) + camera_height: 1.31 # camera sensor height (in metres) + hfov: 42.0 # horizontal field of view (in degrees) + min_depth: 0.0 # minimum depth for depth sensor (in metres) + max_depth: 10.0 # maximum depth for depth sensor (in metres) + num_receptacles: 21 + category_map_file: projects/real_world_ovmm/configs/example_cat_map.json + use_detic_viz: False + evaluate_instance_tracking: False # whether to evaluate the built instance map against groundtruth instance ids + +EVAL_VECTORIZED: + simulator_gpu_ids: [1, 2, 3, 4, 5, 6, 7] # IDs of GPUs to use for vectorized environments + split: val # eval split + num_episodes_per_env: null # number of eval episodes per environment + record_videos: 1 # 1: record videos from printed images, 0: don't + record_planner_videos: 0 # 1: record planner videos (if record videos), 0: don't + metrics_save_freq: 5 # save metrics after every n episodes diff --git a/projects/habitat_ovmm/configs/env/hssd_eval_v4.yaml b/projects/habitat_ovmm/configs/env/hssd_eval_v4.yaml new file mode 100644 index 000000000..28e04cd31 --- /dev/null +++ b/projects/habitat_ovmm/configs/env/hssd_eval_v4.yaml @@ -0,0 +1,46 @@ +NO_GPU: 0 # 1: ignore IDs above and run on CPU, 0: run on GPUs with IDs above +NUM_ENVIRONMENTS: 1 # number of environments (per agent process) +DUMP_LOCATION: datadump # path to dump models and log +EXP_NAME: eval_hssd_v4 # experiment name +VISUALIZE: 0 # 1: render observation and predicted semantic map, 0: no visualization +PRINT_IMAGES: 1 # 1: save visualization as images, 0: no image saving +GROUND_TRUTH_SEMANTICS: 1 # 1: use ground-truth semantics (for debugging / ablations) +seed: 0 # seed +SHOW_RL_OBS: False # whether to show the observations passed to RL policices, for debugging + +EXPERIMENT: + type: v4__nav-oracle-withsegmentation-heurnearobject__pick-heur-vp__place-heur-exact + NAV_TO_OBJ: + type: oracle + segmentation: True + fallback: heuristic_nav + goal: max_iou_viewpoint + goal_radius: 0.1 + NAV_TO_REC: + type: oracle + segmentation: True + fallback: none + goal: exact + goal_radius: 0.1 + +ENVIRONMENT: + forward: 0.10 # forward motion (in meters) Note: changing this should require a corresponding change in the habitat benchmark config + turn_angle: 5 # agent turn angle (in degrees) Note: changing this should require a corresponding change in the habitat benchmark config + frame_height: 640 # first-person frame height (in pixels) + frame_width: 480 # first-person frame width (in pixels) + camera_height: 1.31 # camera sensor height (in metres) + hfov: 42.0 # horizontal field of view (in degrees) + min_depth: 0.0 # minimum depth for depth sensor (in metres) + max_depth: 10.0 # maximum depth for depth sensor (in metres) + num_receptacles: 21 + category_map_file: projects/real_world_ovmm/configs/example_cat_map.json + use_detic_viz: False + evaluate_instance_tracking: False # whether to evaluate the built instance map against groundtruth instance ids + +EVAL_VECTORIZED: + simulator_gpu_ids: [1, 2, 3, 4, 5, 6, 7] # IDs of GPUs to use for vectorized environments + split: val # eval split + num_episodes_per_env: null # number of eval episodes per environment + record_videos: 1 # 1: record videos from printed images, 0: don't + record_planner_videos: 0 # 1: record planner videos (if record videos), 0: don't + metrics_save_freq: 5 # save metrics after every n episodes diff --git a/projects/habitat_ovmm/configs/env/hssd_eval_v4b.yaml b/projects/habitat_ovmm/configs/env/hssd_eval_v4b.yaml new file mode 100644 index 000000000..b9f309fff --- /dev/null +++ b/projects/habitat_ovmm/configs/env/hssd_eval_v4b.yaml @@ -0,0 +1,46 @@ +NO_GPU: 0 # 1: ignore IDs above and run on CPU, 0: run on GPUs with IDs above +NUM_ENVIRONMENTS: 1 # number of environments (per agent process) +DUMP_LOCATION: datadump # path to dump models and log +EXP_NAME: eval_hssd_v4b # experiment name +VISUALIZE: 0 # 1: render observation and predicted semantic map, 0: no visualization +PRINT_IMAGES: 1 # 1: save visualization as images, 0: no image saving +GROUND_TRUTH_SEMANTICS: 1 # 1: use ground-truth semantics (for debugging / ablations) +seed: 0 # seed +SHOW_RL_OBS: False # whether to show the observations passed to RL policices, for debugging + +EXPERIMENT: + type: v4b__nav-oracle-withsegmentation-heurnearobject__pick-heur-vp__place-heur-exact + NAV_TO_OBJ: + type: oracle + segmentation: True + fallback: heuristic_nav + goal: max_iou_viewpoint + goal_radius: 0.1 + NAV_TO_REC: + type: oracle + segmentation: True + fallback: none + goal: exact + goal_radius: 0.1 + +ENVIRONMENT: + forward: 0.10 # forward motion (in meters) Note: changing this should require a corresponding change in the habitat benchmark config + turn_angle: 5 # agent turn angle (in degrees) Note: changing this should require a corresponding change in the habitat benchmark config + frame_height: 640 # first-person frame height (in pixels) + frame_width: 480 # first-person frame width (in pixels) + camera_height: 1.31 # camera sensor height (in metres) + hfov: 42.0 # horizontal field of view (in degrees) + min_depth: 0.0 # minimum depth for depth sensor (in metres) + max_depth: 10.0 # maximum depth for depth sensor (in metres) + num_receptacles: 21 + category_map_file: projects/real_world_ovmm/configs/example_cat_map.json + use_detic_viz: False + evaluate_instance_tracking: False # whether to evaluate the built instance map against groundtruth instance ids + +EVAL_VECTORIZED: + simulator_gpu_ids: [1, 2, 3, 4, 5, 6, 7] # IDs of GPUs to use for vectorized environments + split: val # eval split + num_episodes_per_env: null # number of eval episodes per environment + record_videos: 1 # 1: record videos from printed images, 0: don't + record_planner_videos: 0 # 1: record planner videos (if record videos), 0: don't + metrics_save_freq: 5 # save metrics after every n episodes diff --git a/projects/habitat_ovmm/configs/env/hssd_eval_v5.yaml b/projects/habitat_ovmm/configs/env/hssd_eval_v5.yaml new file mode 100644 index 000000000..577e50825 --- /dev/null +++ b/projects/habitat_ovmm/configs/env/hssd_eval_v5.yaml @@ -0,0 +1,46 @@ +NO_GPU: 0 # 1: ignore IDs above and run on CPU, 0: run on GPUs with IDs above +NUM_ENVIRONMENTS: 1 # number of environments (per agent process) +DUMP_LOCATION: datadump # path to dump models and log +EXP_NAME: eval_hssd_v5 # experiment name +VISUALIZE: 0 # 1: render observation and predicted semantic map, 0: no visualization +PRINT_IMAGES: 1 # 1: save visualization as images, 0: no image saving +GROUND_TRUTH_SEMANTICS: 1 # 1: use ground-truth semantics (for debugging / ablations) +seed: 0 # seed +SHOW_RL_OBS: False # whether to show the observations passed to RL policices, for debugging + +EXPERIMENT: + type: v5__nav-oracle-nosegmentation-heurnearobject__pick-heur-vp__place-heur-exact + NAV_TO_OBJ: + type: oracle + segmentation: False + fallback: heuristic_nav + goal: max_iou_viewpoint + goal_radius: 0.1 + NAV_TO_REC: + type: oracle + segmentation: False + fallback: none + goal: exact + goal_radius: 0.1 + +ENVIRONMENT: + forward: 0.10 # forward motion (in meters) Note: changing this should require a corresponding change in the habitat benchmark config + turn_angle: 5 # agent turn angle (in degrees) Note: changing this should require a corresponding change in the habitat benchmark config + frame_height: 640 # first-person frame height (in pixels) + frame_width: 480 # first-person frame width (in pixels) + camera_height: 1.31 # camera sensor height (in metres) + hfov: 42.0 # horizontal field of view (in degrees) + min_depth: 0.0 # minimum depth for depth sensor (in metres) + max_depth: 10.0 # maximum depth for depth sensor (in metres) + num_receptacles: 21 + category_map_file: projects/real_world_ovmm/configs/example_cat_map.json + use_detic_viz: False + evaluate_instance_tracking: False # whether to evaluate the built instance map against groundtruth instance ids + +EVAL_VECTORIZED: + simulator_gpu_ids: [1, 2, 3, 4, 5, 6, 7] # IDs of GPUs to use for vectorized environments + split: val # eval split + num_episodes_per_env: null # number of eval episodes per environment + record_videos: 1 # 1: record videos from printed images, 0: don't + record_planner_videos: 0 # 1: record planner videos (if record videos), 0: don't + metrics_save_freq: 5 # save metrics after every n episodes diff --git a/projects/habitat_ovmm/configs/env/hssd_eval_v5b.yaml b/projects/habitat_ovmm/configs/env/hssd_eval_v5b.yaml new file mode 100644 index 000000000..534a44181 --- /dev/null +++ b/projects/habitat_ovmm/configs/env/hssd_eval_v5b.yaml @@ -0,0 +1,46 @@ +NO_GPU: 0 # 1: ignore IDs above and run on CPU, 0: run on GPUs with IDs above +NUM_ENVIRONMENTS: 1 # number of environments (per agent process) +DUMP_LOCATION: datadump # path to dump models and log +EXP_NAME: eval_hssd_v5b # experiment name +VISUALIZE: 0 # 1: render observation and predicted semantic map, 0: no visualization +PRINT_IMAGES: 1 # 1: save visualization as images, 0: no image saving +GROUND_TRUTH_SEMANTICS: 1 # 1: use ground-truth semantics (for debugging / ablations) +seed: 0 # seed +SHOW_RL_OBS: False # whether to show the observations passed to RL policices, for debugging + +EXPERIMENT: + type: v5b__nav-oracle-nosegmentation-heurnearobject__pick-heur-vp__place-heur-exact + NAV_TO_OBJ: + type: oracle + segmentation: False + fallback: heuristic_nav + goal: max_iou_viewpoint + goal_radius: 0.3 + NAV_TO_REC: + type: oracle + segmentation: False + fallback: none + goal: exact + goal_radius: 0.3 + +ENVIRONMENT: + forward: 0.10 # forward motion (in meters) Note: changing this should require a corresponding change in the habitat benchmark config + turn_angle: 5 # agent turn angle (in degrees) Note: changing this should require a corresponding change in the habitat benchmark config + frame_height: 640 # first-person frame height (in pixels) + frame_width: 480 # first-person frame width (in pixels) + camera_height: 1.31 # camera sensor height (in metres) + hfov: 42.0 # horizontal field of view (in degrees) + min_depth: 0.0 # minimum depth for depth sensor (in metres) + max_depth: 10.0 # maximum depth for depth sensor (in metres) + num_receptacles: 21 + category_map_file: projects/real_world_ovmm/configs/example_cat_map.json + use_detic_viz: False + evaluate_instance_tracking: False # whether to evaluate the built instance map against groundtruth instance ids + +EVAL_VECTORIZED: + simulator_gpu_ids: [1, 2, 3, 4, 5, 6, 7] # IDs of GPUs to use for vectorized environments + split: val # eval split + num_episodes_per_env: null # number of eval episodes per environment + record_videos: 1 # 1: record videos from printed images, 0: don't + record_planner_videos: 0 # 1: record planner videos (if record videos), 0: don't + metrics_save_freq: 5 # save metrics after every n episodes diff --git a/projects/habitat_ovmm/configs/env/hssd_eval_v6.yaml b/projects/habitat_ovmm/configs/env/hssd_eval_v6.yaml new file mode 100644 index 000000000..478f8fb4a --- /dev/null +++ b/projects/habitat_ovmm/configs/env/hssd_eval_v6.yaml @@ -0,0 +1,46 @@ +NO_GPU: 0 # 1: ignore IDs above and run on CPU, 0: run on GPUs with IDs above +NUM_ENVIRONMENTS: 1 # number of environments (per agent process) +DUMP_LOCATION: datadump # path to dump models and log +EXP_NAME: eval_hssd_v6 # experiment name +VISUALIZE: 0 # 1: render observation and predicted semantic map, 0: no visualization +PRINT_IMAGES: 1 # 1: save visualization as images, 0: no image saving +GROUND_TRUTH_SEMANTICS: 1 # 1: use ground-truth semantics (for debugging / ablations) +seed: 0 # seed +SHOW_RL_OBS: False # whether to show the observations passed to RL policices, for debugging + +EXPERIMENT: + type: v6__nav-oracle-nosegmentation-heurnearobject__pick-heur-exact__place-heur-exact + NAV_TO_OBJ: + type: oracle + segmentation: False + fallback: heuristic_nav + goal: exact + goal_radius: 0.1 + NAV_TO_REC: + type: oracle + segmentation: False + fallback: none + goal: exact + goal_radius: 0.1 + +ENVIRONMENT: + forward: 0.10 # forward motion (in meters) Note: changing this should require a corresponding change in the habitat benchmark config + turn_angle: 5 # agent turn angle (in degrees) Note: changing this should require a corresponding change in the habitat benchmark config + frame_height: 640 # first-person frame height (in pixels) + frame_width: 480 # first-person frame width (in pixels) + camera_height: 1.31 # camera sensor height (in metres) + hfov: 42.0 # horizontal field of view (in degrees) + min_depth: 0.0 # minimum depth for depth sensor (in metres) + max_depth: 10.0 # maximum depth for depth sensor (in metres) + num_receptacles: 21 + category_map_file: projects/real_world_ovmm/configs/example_cat_map.json + use_detic_viz: False + evaluate_instance_tracking: False # whether to evaluate the built instance map against groundtruth instance ids + +EVAL_VECTORIZED: + simulator_gpu_ids: [1, 2, 3, 4, 5, 6, 7] # IDs of GPUs to use for vectorized environments + split: val # eval split + num_episodes_per_env: null # number of eval episodes per environment + record_videos: 1 # 1: record videos from printed images, 0: don't + record_planner_videos: 0 # 1: record planner videos (if record videos), 0: don't + metrics_save_freq: 5 # save metrics after every n episodes diff --git a/projects/habitat_ovmm/configs/env/hssd_eval_v6b.yaml b/projects/habitat_ovmm/configs/env/hssd_eval_v6b.yaml new file mode 100644 index 000000000..a2a8c5a88 --- /dev/null +++ b/projects/habitat_ovmm/configs/env/hssd_eval_v6b.yaml @@ -0,0 +1,46 @@ +NO_GPU: 0 # 1: ignore IDs above and run on CPU, 0: run on GPUs with IDs above +NUM_ENVIRONMENTS: 1 # number of environments (per agent process) +DUMP_LOCATION: datadump # path to dump models and log +EXP_NAME: eval_hssd_v6b # experiment name +VISUALIZE: 0 # 1: render observation and predicted semantic map, 0: no visualization +PRINT_IMAGES: 1 # 1: save visualization as images, 0: no image saving +GROUND_TRUTH_SEMANTICS: 1 # 1: use ground-truth semantics (for debugging / ablations) +seed: 0 # seed +SHOW_RL_OBS: False # whether to show the observations passed to RL policices, for debugging + +EXPERIMENT: + type: v6b__nav-oracle-nosegmentation-heurnearobject__pick-heur-exact__place-heur-exact + NAV_TO_OBJ: + type: oracle + segmentation: False + fallback: heuristic_nav + goal: exact + goal_radius: 0.3 + NAV_TO_REC: + type: oracle + segmentation: False + fallback: none + goal: exact + goal_radius: 0.3 + +ENVIRONMENT: + forward: 0.10 # forward motion (in meters) Note: changing this should require a corresponding change in the habitat benchmark config + turn_angle: 5 # agent turn angle (in degrees) Note: changing this should require a corresponding change in the habitat benchmark config + frame_height: 640 # first-person frame height (in pixels) + frame_width: 480 # first-person frame width (in pixels) + camera_height: 1.31 # camera sensor height (in metres) + hfov: 42.0 # horizontal field of view (in degrees) + min_depth: 0.0 # minimum depth for depth sensor (in metres) + max_depth: 10.0 # maximum depth for depth sensor (in metres) + num_receptacles: 21 + category_map_file: projects/real_world_ovmm/configs/example_cat_map.json + use_detic_viz: False + evaluate_instance_tracking: False # whether to evaluate the built instance map against groundtruth instance ids + +EVAL_VECTORIZED: + simulator_gpu_ids: [1, 2, 3, 4, 5, 6, 7] # IDs of GPUs to use for vectorized environments + split: val # eval split + num_episodes_per_env: null # number of eval episodes per environment + record_videos: 1 # 1: record videos from printed images, 0: don't + record_planner_videos: 0 # 1: record planner videos (if record videos), 0: don't + metrics_save_freq: 5 # save metrics after every n episodes diff --git a/projects/habitat_ovmm/configs/env/hssd_eval_v6c.yaml b/projects/habitat_ovmm/configs/env/hssd_eval_v6c.yaml new file mode 100644 index 000000000..8eeb7d503 --- /dev/null +++ b/projects/habitat_ovmm/configs/env/hssd_eval_v6c.yaml @@ -0,0 +1,46 @@ +NO_GPU: 0 # 1: ignore IDs above and run on CPU, 0: run on GPUs with IDs above +NUM_ENVIRONMENTS: 1 # number of environments (per agent process) +DUMP_LOCATION: datadump # path to dump models and log +EXP_NAME: eval_hssd_v6c # experiment name +VISUALIZE: 0 # 1: render observation and predicted semantic map, 0: no visualization +PRINT_IMAGES: 1 # 1: save visualization as images, 0: no image saving +GROUND_TRUTH_SEMANTICS: 1 # 1: use ground-truth semantics (for debugging / ablations) +seed: 0 # seed +SHOW_RL_OBS: False # whether to show the observations passed to RL policices, for debugging + +EXPERIMENT: + type: v6c__nav-oracle-nosegmentation-heurnearobject__pick-heur-exact__place-heur-exact + NAV_TO_OBJ: + type: oracle + segmentation: False + fallback: heuristic_nav + goal: exact + goal_radius: 0.5 + NAV_TO_REC: + type: oracle + segmentation: False + fallback: none + goal: exact + goal_radius: 0.5 + +ENVIRONMENT: + forward: 0.10 # forward motion (in meters) Note: changing this should require a corresponding change in the habitat benchmark config + turn_angle: 5 # agent turn angle (in degrees) Note: changing this should require a corresponding change in the habitat benchmark config + frame_height: 640 # first-person frame height (in pixels) + frame_width: 480 # first-person frame width (in pixels) + camera_height: 1.31 # camera sensor height (in metres) + hfov: 42.0 # horizontal field of view (in degrees) + min_depth: 0.0 # minimum depth for depth sensor (in metres) + max_depth: 10.0 # maximum depth for depth sensor (in metres) + num_receptacles: 21 + category_map_file: projects/real_world_ovmm/configs/example_cat_map.json + use_detic_viz: False + evaluate_instance_tracking: False # whether to evaluate the built instance map against groundtruth instance ids + +EVAL_VECTORIZED: + simulator_gpu_ids: [1, 2, 3, 4, 5, 6, 7] # IDs of GPUs to use for vectorized environments + split: val # eval split + num_episodes_per_env: null # number of eval episodes per environment + record_videos: 1 # 1: record videos from printed images, 0: don't + record_planner_videos: 0 # 1: record planner videos (if record videos), 0: don't + metrics_save_freq: 5 # save metrics after every n episodes diff --git a/projects/habitat_ovmm/configs/env/hssd_eval_v7.yaml b/projects/habitat_ovmm/configs/env/hssd_eval_v7.yaml new file mode 100644 index 000000000..0e497ccc7 --- /dev/null +++ b/projects/habitat_ovmm/configs/env/hssd_eval_v7.yaml @@ -0,0 +1,46 @@ +NO_GPU: 0 # 1: ignore IDs above and run on CPU, 0: run on GPUs with IDs above +NUM_ENVIRONMENTS: 1 # number of environments (per agent process) +DUMP_LOCATION: datadump # path to dump models and log +EXP_NAME: eval_hssd_v7 # experiment name +VISUALIZE: 0 # 1: render observation and predicted semantic map, 0: no visualization +PRINT_IMAGES: 1 # 1: save visualization as images, 0: no image saving +GROUND_TRUTH_SEMANTICS: 1 # 1: use ground-truth semantics (for debugging / ablations) +seed: 0 # seed +SHOW_RL_OBS: False # whether to show the observations passed to RL policices, for debugging + +EXPERIMENT: + type: v7__nav-oracle-nosegmentation-heurnearobject__pick-heur-vpthenexact__place-heur-vpthenexact + NAV_TO_OBJ: + type: oracle + segmentation: False + fallback: heuristic_nav + goal: vp_then_exact + goal_radius: 0.1 + NAV_TO_REC: + type: oracle + segmentation: False + fallback: none + goal: vp_then_exact + goal_radius: 0.1 + +ENVIRONMENT: + forward: 0.10 # forward motion (in meters) Note: changing this should require a corresponding change in the habitat benchmark config + turn_angle: 5 # agent turn angle (in degrees) Note: changing this should require a corresponding change in the habitat benchmark config + frame_height: 640 # first-person frame height (in pixels) + frame_width: 480 # first-person frame width (in pixels) + camera_height: 1.31 # camera sensor height (in metres) + hfov: 42.0 # horizontal field of view (in degrees) + min_depth: 0.0 # minimum depth for depth sensor (in metres) + max_depth: 10.0 # maximum depth for depth sensor (in metres) + num_receptacles: 21 + category_map_file: projects/real_world_ovmm/configs/example_cat_map.json + use_detic_viz: False + evaluate_instance_tracking: False # whether to evaluate the built instance map against groundtruth instance ids + +EVAL_VECTORIZED: + simulator_gpu_ids: [1, 2, 3, 4, 5, 6, 7] # IDs of GPUs to use for vectorized environments + split: val # eval split + num_episodes_per_env: null # number of eval episodes per environment + record_videos: 1 # 1: record videos from printed images, 0: don't + record_planner_videos: 0 # 1: record planner videos (if record videos), 0: don't + metrics_save_freq: 5 # save metrics after every n episodes diff --git a/projects/habitat_ovmm/configs/env/hssd_eval_v7b.yaml b/projects/habitat_ovmm/configs/env/hssd_eval_v7b.yaml new file mode 100644 index 000000000..a1d82aca4 --- /dev/null +++ b/projects/habitat_ovmm/configs/env/hssd_eval_v7b.yaml @@ -0,0 +1,46 @@ +NO_GPU: 0 # 1: ignore IDs above and run on CPU, 0: run on GPUs with IDs above +NUM_ENVIRONMENTS: 1 # number of environments (per agent process) +DUMP_LOCATION: datadump # path to dump models and log +EXP_NAME: eval_hssd_v7b # experiment name +VISUALIZE: 0 # 1: render observation and predicted semantic map, 0: no visualization +PRINT_IMAGES: 1 # 1: save visualization as images, 0: no image saving +GROUND_TRUTH_SEMANTICS: 1 # 1: use ground-truth semantics (for debugging / ablations) +seed: 0 # seed +SHOW_RL_OBS: False # whether to show the observations passed to RL policices, for debugging + +EXPERIMENT: + type: v7b__nav-oracle-nosegmentation-heurnearobject__pick-heur-vpthenexact__place-heur-vpthenexact + NAV_TO_OBJ: + type: oracle + segmentation: False + fallback: heuristic_nav + goal: vp_then_exact + goal_radius: 0.3 + NAV_TO_REC: + type: oracle + segmentation: False + fallback: none + goal: vp_then_exact + goal_radius: 0.3 + +ENVIRONMENT: + forward: 0.10 # forward motion (in meters) Note: changing this should require a corresponding change in the habitat benchmark config + turn_angle: 5 # agent turn angle (in degrees) Note: changing this should require a corresponding change in the habitat benchmark config + frame_height: 640 # first-person frame height (in pixels) + frame_width: 480 # first-person frame width (in pixels) + camera_height: 1.31 # camera sensor height (in metres) + hfov: 42.0 # horizontal field of view (in degrees) + min_depth: 0.0 # minimum depth for depth sensor (in metres) + max_depth: 10.0 # maximum depth for depth sensor (in metres) + num_receptacles: 21 + category_map_file: projects/real_world_ovmm/configs/example_cat_map.json + use_detic_viz: False + evaluate_instance_tracking: False # whether to evaluate the built instance map against groundtruth instance ids + +EVAL_VECTORIZED: + simulator_gpu_ids: [1, 2, 3, 4, 5, 6, 7] # IDs of GPUs to use for vectorized environments + split: val # eval split + num_episodes_per_env: null # number of eval episodes per environment + record_videos: 1 # 1: record videos from printed images, 0: don't + record_planner_videos: 0 # 1: record planner videos (if record videos), 0: don't + metrics_save_freq: 5 # save metrics after every n episodes diff --git a/projects/habitat_ovmm/configs/env/hssd_eval_v7c.yaml b/projects/habitat_ovmm/configs/env/hssd_eval_v7c.yaml new file mode 100644 index 000000000..83bd47935 --- /dev/null +++ b/projects/habitat_ovmm/configs/env/hssd_eval_v7c.yaml @@ -0,0 +1,46 @@ +NO_GPU: 0 # 1: ignore IDs above and run on CPU, 0: run on GPUs with IDs above +NUM_ENVIRONMENTS: 1 # number of environments (per agent process) +DUMP_LOCATION: datadump # path to dump models and log +EXP_NAME: eval_hssd_v7c # experiment name +VISUALIZE: 0 # 1: render observation and predicted semantic map, 0: no visualization +PRINT_IMAGES: 1 # 1: save visualization as images, 0: no image saving +GROUND_TRUTH_SEMANTICS: 1 # 1: use ground-truth semantics (for debugging / ablations) +seed: 0 # seed +SHOW_RL_OBS: False # whether to show the observations passed to RL policices, for debugging + +EXPERIMENT: + type: v7c__nav-oracle-nosegmentation-heurnearobject__pick-heur-vpthenexact__place-heur-vpthenexact + NAV_TO_OBJ: + type: oracle + segmentation: False + fallback: heuristic_nav + goal: vp_then_exact + goal_radius: 0.5 + NAV_TO_REC: + type: oracle + segmentation: False + fallback: none + goal: vp_then_exact + goal_radius: 0.5 + +ENVIRONMENT: + forward: 0.10 # forward motion (in meters) Note: changing this should require a corresponding change in the habitat benchmark config + turn_angle: 5 # agent turn angle (in degrees) Note: changing this should require a corresponding change in the habitat benchmark config + frame_height: 640 # first-person frame height (in pixels) + frame_width: 480 # first-person frame width (in pixels) + camera_height: 1.31 # camera sensor height (in metres) + hfov: 42.0 # horizontal field of view (in degrees) + min_depth: 0.0 # minimum depth for depth sensor (in metres) + max_depth: 10.0 # maximum depth for depth sensor (in metres) + num_receptacles: 21 + category_map_file: projects/real_world_ovmm/configs/example_cat_map.json + use_detic_viz: False + evaluate_instance_tracking: False # whether to evaluate the built instance map against groundtruth instance ids + +EVAL_VECTORIZED: + simulator_gpu_ids: [1, 2, 3, 4, 5, 6, 7] # IDs of GPUs to use for vectorized environments + split: val # eval split + num_episodes_per_env: null # number of eval episodes per environment + record_videos: 1 # 1: record videos from printed images, 0: don't + record_planner_videos: 0 # 1: record planner videos (if record videos), 0: don't + metrics_save_freq: 5 # save metrics after every n episodes diff --git a/projects/habitat_ovmm/configs/env/hssd_eval_v8.yaml b/projects/habitat_ovmm/configs/env/hssd_eval_v8.yaml new file mode 100644 index 000000000..59e467380 --- /dev/null +++ b/projects/habitat_ovmm/configs/env/hssd_eval_v8.yaml @@ -0,0 +1,46 @@ +NO_GPU: 0 # 1: ignore IDs above and run on CPU, 0: run on GPUs with IDs above +NUM_ENVIRONMENTS: 1 # number of environments (per agent process) +DUMP_LOCATION: datadump # path to dump models and log +EXP_NAME: eval_hssd_v8 # experiment name +VISUALIZE: 0 # 1: render observation and predicted semantic map, 0: no visualization +PRINT_IMAGES: 1 # 1: save visualization as images, 0: no image saving +GROUND_TRUTH_SEMANTICS: 1 # 1: use ground-truth semantics (for debugging / ablations) +seed: 0 # seed +SHOW_RL_OBS: False # whether to show the observations passed to RL policices, for debugging + +EXPERIMENT: + type: v8__nav-oracle-nosegmentation-heurnearobject__pick-heur-vp__place-heur-vpthenexact + NAV_TO_OBJ: + type: oracle + segmentation: False + fallback: heuristic_nav + goal: max_iou_viewpoint + goal_radius: 0.3 + NAV_TO_REC: + type: oracle + segmentation: False + fallback: none + goal: vp_then_exact + goal_radius: 0.5 + +ENVIRONMENT: + forward: 0.10 # forward motion (in meters) Note: changing this should require a corresponding change in the habitat benchmark config + turn_angle: 5 # agent turn angle (in degrees) Note: changing this should require a corresponding change in the habitat benchmark config + frame_height: 640 # first-person frame height (in pixels) + frame_width: 480 # first-person frame width (in pixels) + camera_height: 1.31 # camera sensor height (in metres) + hfov: 42.0 # horizontal field of view (in degrees) + min_depth: 0.0 # minimum depth for depth sensor (in metres) + max_depth: 10.0 # maximum depth for depth sensor (in metres) + num_receptacles: 21 + category_map_file: projects/real_world_ovmm/configs/example_cat_map.json + use_detic_viz: False + evaluate_instance_tracking: False # whether to evaluate the built instance map against groundtruth instance ids + +EVAL_VECTORIZED: + simulator_gpu_ids: [1, 2, 3, 4, 5, 6, 7] # IDs of GPUs to use for vectorized environments + split: val # eval split + num_episodes_per_env: null # number of eval episodes per environment + record_videos: 1 # 1: record videos from printed images, 0: don't + record_planner_videos: 0 # 1: record planner videos (if record videos), 0: don't + metrics_save_freq: 5 # save metrics after every n episodes diff --git a/projects/habitat_ovmm/eval_baselines_agent.py b/projects/habitat_ovmm/eval_baselines_agent.py index 3ff0ff139..87c96b57b 100644 --- a/projects/habitat_ovmm/eval_baselines_agent.py +++ b/projects/habitat_ovmm/eval_baselines_agent.py @@ -41,7 +41,7 @@ parser.add_argument( "--baseline_config_path", type=str, - default="projects/habitat_ovmm/configs/agent/heuristic_agent.yaml", + default="projects/habitat_ovmm/configs/agent/oracle_agent.yaml", help="Path to config yaml", ) parser.add_argument( @@ -57,6 +57,9 @@ choices=["baseline", "random"], help="Agent to evaluate", ) + parser.add_argument("--ep_start", type=int, default=None) + parser.add_argument("--ep_end", type=int, default=None) + parser.add_argument( "overrides", default=None, @@ -64,7 +67,10 @@ help="Modify config options from command line", ) args = parser.parse_args() - + if args.ep_start is not None and args.ep_end is not None: + args.overrides.append( + f'habitat.dataset.episode_ids=[{",".join([str(i) for i in range(args.ep_start, args.ep_end)])}]' + ) # get habitat config habitat_config, _ = get_habitat_config( args.habitat_config_path, overrides=args.overrides diff --git a/projects/habitat_ovmm/evaluator.py b/projects/habitat_ovmm/evaluator.py index 9306ac213..507136b4f 100644 --- a/projects/habitat_ovmm/evaluator.py +++ b/projects/habitat_ovmm/evaluator.py @@ -3,7 +3,6 @@ # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. - import json import os import time @@ -11,13 +10,16 @@ from enum import Enum from typing import TYPE_CHECKING, Any, Dict, Optional +import cv2 import numpy as np import pandas as pd +from habitat.utils.visualizations.utils import build_text_image, images_to_video from habitat_baselines.rl.ppo.ppo_trainer import PPOTrainer from omegaconf import DictConfig from tqdm import tqdm from utils.env_utils import create_ovmm_env_fn from utils.metrics_utils import get_stats_from_episode_metrics +from utils.video_utils import get_snapshots_from_disk, record_video if TYPE_CHECKING: from habitat.core.dataset import BaseEpisode @@ -41,8 +43,12 @@ def __init__(self, eval_config: DictConfig) -> None: self.results_dir = os.path.join( eval_config.DUMP_LOCATION, "results", eval_config.EXP_NAME ) + self.images_dir = os.path.join( + eval_config.DUMP_LOCATION, "images", eval_config.EXP_NAME + ) self.videos_dir = eval_config.habitat_baselines.video_dir os.makedirs(self.results_dir, exist_ok=True) + os.makedirs(self.images_dir, exist_ok=True) os.makedirs(self.videos_dir, exist_ok=True) super().__init__(eval_config) @@ -194,8 +200,8 @@ def _aggregate_metrics(self, episode_metrics: Dict[str, Any]) -> Dict[str, float [ k for metrics_per_episode in episode_metrics.values() - for k in metrics_per_episode - if k != "goal_name" + for k, v in metrics_per_episode.items() + if not isinstance(v, str) ] ) for v in episode_metrics.values(): @@ -212,6 +218,7 @@ def _aggregate_metrics(self, episode_metrics: Dict[str, Any]) -> Dict[str, float f"{k1}/mean": np.mean(v1), f"{k1}/min": np.min(v1), f"{k1}/max": np.max(v1), + f"{k1}/sum": np.sum(v1), }.items() }.items() ) @@ -220,13 +227,69 @@ def _aggregate_metrics(self, episode_metrics: Dict[str, Any]) -> Dict[str, float return aggregated_metrics def _write_results( - self, episode_metrics: Dict[str, Dict], aggregated_metrics: Dict[str, float] + self, + episode_metrics: Dict[str, Dict], + aggregated_metrics: Dict[str, float], + average_metrics: Dict[str, float], ) -> None: """Writes metrics tracked by environment to a file.""" with open(f"{self.results_dir}/aggregated_results.json", "w") as f: json.dump(aggregated_metrics, f, indent=4) with open(f"{self.results_dir}/episode_results.json", "w") as f: json.dump(episode_metrics, f, indent=4) + with open(f"{self.results_dir}/summary_results.json", "w") as f: + json.dump(average_metrics, f, indent=4) + + def _get_episode_completion_stage(self, metrics_at_episode_end): + # TODO: temporary + if metrics_at_episode_end["END.ovmm_place_success"] == 1: + return "0_overall_success" + elif metrics_at_episode_end["END.obj_anywhere_on_goal.0"] == 1: + return "1_place_anywhere_on_goal_success" + elif metrics_at_episode_end["END.ovmm_find_recep_phase_success"] == 1: + return "2_nav_to_goal_success_but_place_failure" + elif metrics_at_episode_end["END.ovmm_pick_object_phase_success"] == 1: + return "3_pick_success_but_nav_to_goal_failure" + elif metrics_at_episode_end["END.ovmm_find_object_phase_success"] == 1: + return "4_nav_to_object_success_but_pick_failure" + return "5_nav_to_object_failure" + + def get_episode_completion_stage(self, metrics_at_episode_end, coarse=False): + episode_completion_stage = self._get_episode_completion_stage( + metrics_at_episode_end + ) + if coarse: + coarse_stage = ( + "success" + if episode_completion_stage + in ["0_overall_success", "1_place_anywhere_on_goal_success"] + else "failure" + ) + return coarse_stage + return episode_completion_stage + + def get_all_episode_completion_stages(self): + # TODO: temporary: should be an Enum if productionized + return [ + "0_overall_success", + "1_place_anywhere_on_goal_success", + "2_nav_to_goal_success_but_place_failure", + "3_pick_success_but_nav_to_goal_failure", + "4_nav_to_object_success_but_pick_failure", + "5_nav_to_object_failure", + ] + + def initialize_episode_and_agent(self, agent): + observations, done = self._env.reset(), False + current_episode = self._env.get_current_episode() + agent.reset() + self._check_set_planner_vis_dir(agent, current_episode) + print( + f"Starting evaluation for {self.config.EVAL_VECTORIZED.split} episode {current_episode.episode_id}" + ) + print(f"Using strategy: {self.config.EXPERIMENT.type}") + agent.set_oracle_info(self._env) + return observations, done, current_episode def local_evaluate( self, agent: "Agent", num_episodes: Optional[int] = None @@ -253,29 +316,116 @@ def local_evaluate( episode_metrics: Dict = {} count_episodes: int = 0 + computed_episodes = [] + skip_computed = False # TODO: temporary pbar = tqdm(total=num_episodes) while count_episodes < num_episodes: - observations, done = self._env.reset(), False - current_episode = self._env.get_current_episode() - agent.reset() - self._check_set_planner_vis_dir(agent, current_episode) - - current_episode_key = ( - f"{current_episode.scene_id.split('/')[-1].split('.')[0]}_" - f"{current_episode.episode_id}" + observations, done, current_episode = self.initialize_episode_and_agent( + agent ) + current_scene_name = current_episode.scene_id.split("/")[-1].split(".")[0] + current_episode_key = f"{current_scene_name}_{current_episode.episode_id}" current_episode_metrics = {} - while not done: + if skip_computed: + computed_episodes = [] + for completion_stage in self.get_all_episode_completion_stages(): + target_dir_experiment = os.path.join( + self.videos_dir, completion_stage, self.config.EXPERIMENT.type + ) + target_file_experiment = f"split_{self.config.EVAL_VECTORIZED.split}_scene_{current_scene_name}_episode_{current_episode.episode_id}" + if os.path.exists( + f"{target_dir_experiment}/{target_file_experiment}.json" + ): + computed_episodes.append(current_episode_key) + break + if current_episode_key in computed_episodes: + try: + with open( + f"{target_dir_experiment}/{target_file_experiment}.json", + "r", + ) as f: + episode_metrics[current_episode_key] = json.load(f) + print( + f"Skipping episode {current_episode.episode_id} because it has already been computed" + ) + count_episodes += 1 + pbar.update(1) + + if self.config.EVAL_VECTORIZED.record_videos: + source_dir = os.path.join( + self.images_dir, current_episode_key + ) + target_dir_annotation = os.path.join( + "video_dir_annotation", + self.get_episode_completion_stage( + episode_metrics[current_episode_key], + coarse=True, + ), + f"scene_{current_scene_name}", + ) + target_file_annotation = f"split_{self.config.EVAL_VECTORIZED.split}_episode_{current_episode.episode_id}" + os.makedirs(target_dir_annotation, exist_ok=True) + with open( + f"{target_dir_annotation}/{target_file_annotation}.json", + "w", + ) as f: + json.dump( + episode_metrics[current_episode_key], f, indent=4 + ) + + import shutil + + shutil.copyfile( + f"{target_dir_experiment}/{target_file_experiment}.mp4", + f"{target_dir_annotation}/{target_file_annotation}.mp4", + ) + + continue + except Exception as e: + print( + f"Error {e} loading metrics for {current_episode_key}. Not skipping. Recomputing..." + ) + + steps, max_steps = -1, 2000 + max_nav_obj_steps = 800 + start_time = time.time() + + while not done and steps < max_steps: + steps += 1 action, info, _ = agent.act(observations) observations, done, hab_info = self._env.apply_action(action, info) + print( + f"Timestep:\t{steps}\t{info['curr_skill']}\t({hab_info['ovmm_dist_to_pick_goal']:.4f},\t{hab_info['ovmm_dist_to_place_goal']:.4f})", + end="\r", + ) + # print(f"Current skill: {info['curr_skill']}") + # print( + # f"info['ovmm_dist_to_pick_goal']:\t{hab_info['ovmm_dist_to_pick_goal']:.4f}" + # ) + # print( + # f"info['ovmm_dist_to_keep_goal']:\t{hab_info['ovmm_dist_to_place_goal']:.4f}" + # ) + + if info["curr_skill"] == "NAV_TO_OBJ" and steps > max_nav_obj_steps: + print("Nav to obj is taking too long, moving to next episode") + break if "skill_done" in info and info["skill_done"] != "": metrics = self._extract_scalars_from_info(hab_info) metrics_at_skill_end = { f"{info['skill_done']}." + k: v for k, v in metrics.items() } + if ( + info["curr_skill"] == "NAV_TO_REC" + and info["skill_done"] == "PICK" + and metrics_at_skill_end["PICK.ovmm_pick_object_phase_success"] + == 0 + ): + print("Pick failure, the rest of the episode is moot") + break + current_episode_metrics = { **metrics_at_skill_end, **current_episode_metrics, @@ -283,29 +433,122 @@ def local_evaluate( if "goal_name" in info: current_episode_metrics["goal_name"] = info["goal_name"] + end_time = time.time() + print(f"Episode took {end_time - start_time} seconds") + metrics = self._extract_scalars_from_info(hab_info) + metrics["total_time_in_seconds"] = end_time - start_time + metrics["done"] = 1.0 if done else 0.0 metrics_at_episode_end = {"END." + k: v for k, v in metrics.items()} current_episode_metrics = { **metrics_at_episode_end, **current_episode_metrics, } + current_episode_metrics["data_split"] = self.config.EVAL_VECTORIZED.split + current_episode_metrics["scene_name"] = current_scene_name + current_episode_metrics["episode_id"] = current_episode.episode_id + current_episode_metrics[ + "episode_completion_stage" + ] = self.get_episode_completion_stage(current_episode_metrics) + current_episode_metrics["experiment_name"] = self.config.EXP_NAME + current_episode_metrics["experiment_type"] = self.config.EXPERIMENT.type if "goal_name" in info: current_episode_metrics["goal_name"] = info["goal_name"] - episode_metrics[current_episode_key] = current_episode_metrics - if len(episode_metrics) % self.metrics_save_freq == 0: - aggregated_metrics = self._aggregate_metrics(episode_metrics) - self._write_results(episode_metrics, aggregated_metrics) + source_dir = os.path.join(self.images_dir, current_episode_key) + target_dir_experiment = os.path.join( + self.videos_dir, + self.get_episode_completion_stage( + current_episode_metrics, coarse=False + ), + self.config.EXPERIMENT.type, + ) + target_file_experiment = f"split_{self.config.EVAL_VECTORIZED.split}_scene_{current_scene_name}_episode_{current_episode.episode_id}" + + target_dir_annotation = os.path.join( + "video_dir_annotation", + self.get_episode_completion_stage(current_episode_metrics, coarse=True), + f"scene_{current_scene_name}", + ) + target_file_annotation = f"split_{self.config.EVAL_VECTORIZED.split}_episode_{current_episode.episode_id}" + + save_down_videos = self.config.EVAL_VECTORIZED.record_videos + # try: + # # if the episode has already been computed, and the new episode run is not better than the older, don't save down videos + # # the definition of better is (1) new computation results in overall success, (2) new computation has less steps than older one. + # metrics_from_earlier_run_of_same_episode = None + # with open(f"""{os.path.join("video_dir_annotation", "success", f"scene_{current_scene_name}")}/{target_file_annotation}.json""", "r") as f: + # metrics_from_earlier_run_of_same_episode = json.load(f) + # if ( + # metrics_from_earlier_run_of_same_episode is not None and ( + # metrics_from_earlier_run_of_same_episode["END.num_steps"] <= current_episode_metrics["END.num_steps"] + # or self.get_episode_completion_stage(current_episode_metrics, coarse=True) != "success" + # ) + # ): + # save_down_videos = False + # except Exception: + # pass + + if ( + "v8" not in self.config.EXP_NAME + and self.get_episode_completion_stage( + current_episode_metrics, coarse=True + ) + == "failure" + ): + save_down_videos = False + + if save_down_videos: + os.makedirs(target_dir_experiment, exist_ok=True) + with open( + f"{target_dir_experiment}/{target_file_experiment}.json", "w" + ) as f: + json.dump(current_episode_metrics, f, indent=4) + # frames = get_snapshots_from_disk(source_dir, snapshot_file_prefix="tp_snapshot") + # if frames is not None and len(frames) > 0: + # images_to_video(frames, target_dir_experiment, target_file_experiment, fps=24, quality=5) + + os.makedirs(target_dir_annotation, exist_ok=True) + with open( + f"{target_dir_annotation}/{target_file_annotation}.json", "w" + ) as f: + json.dump(current_episode_metrics, f, indent=4) + + episode_frames = self._env.habitat_env.env._env._env._task._frames + if episode_frames is not None and len(episode_frames) > 0: + robot_goal_text = build_text_image( + episode_frames[0], + f"Robot's goal: {current_episode_metrics['goal_name'].replace('_', ' ')}", + color="black", + ) + human_goal_text = build_text_image( + episode_frames[0], + "Your goal: Say the actions the robot is performing in natural language.", + color="black", + ) + episode_frames = [ + np.concatenate( + (robot_goal_text, frame, human_goal_text), axis=0 + ) + for frame in episode_frames + ] + images_to_video( + episode_frames, + target_dir_annotation, + target_file_annotation, + fps=24, + quality=5, + ) + episode_metrics[current_episode_key] = current_episode_metrics count_episodes += 1 pbar.update(1) self._env.close() aggregated_metrics = self._aggregate_metrics(episode_metrics) - self._write_results(episode_metrics, aggregated_metrics) - average_metrics = self._summarize_metrics(episode_metrics) + self._write_results(episode_metrics, aggregated_metrics, average_metrics) self._print_summary(average_metrics) return average_metrics diff --git a/projects/habitat_ovmm/shortest_path_follower_example.py b/projects/habitat_ovmm/shortest_path_follower_example.py new file mode 100644 index 000000000..6698aff8c --- /dev/null +++ b/projects/habitat_ovmm/shortest_path_follower_example.py @@ -0,0 +1,238 @@ +#!/usr/bin/env python3 + +# Copyright (c) Meta Platforms, Inc. and its affiliates. +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import os + +import magnum as mn +import numpy as np +from habitat.core.utils import try_cv2_import +from habitat.sims.habitat_simulator.actions import HabitatSimActions +from habitat.tasks.nav.shortest_path_follower import ShortestPathFollower +from habitat.tasks.utils import cartesian_to_polar +from habitat.utils.geometry_utils import quaternion_from_coeff, quaternion_rotate_vector +from habitat.utils.visualizations import maps +from habitat.utils.visualizations.utils import images_to_video +from utils.config_utils import ( + create_agent_config, + create_env_config, + get_habitat_config, + get_omega_config, +) +from utils.env_utils import create_ovmm_env_fn + +from home_robot.core.interfaces import DiscreteNavigationAction + +cv2 = try_cv2_import() + +IMAGE_DIR = os.path.join("examples", "images") +if not os.path.exists(IMAGE_DIR): + os.makedirs(IMAGE_DIR) + +os.environ["OPENBLAS_NUM_THREADS"] = "1" +os.environ["NUMEXPR_NUM_THREADS"] = "1" +os.environ["MKL_NUM_THREADS"] = "1" + +# def _quat_to_xy_heading(quat): +# direction_vector = np.array([0, 0, -1]) + +# heading_vector = quaternion_rotate_vector(quat, direction_vector) + +# phi = cartesian_to_polar(-heading_vector[2], heading_vector[0])[1] +# return np.array([phi], dtype=np.float32) + +DISCRETE_ACTION_MAP = { + HabitatSimActions.stop: DiscreteNavigationAction.STOP, + HabitatSimActions.move_forward: DiscreteNavigationAction.MOVE_FORWARD, + HabitatSimActions.turn_left: DiscreteNavigationAction.TURN_LEFT, + HabitatSimActions.turn_right: DiscreteNavigationAction.TURN_RIGHT, +} + + +def draw_top_down_map(info, output_size): + return maps.colorize_draw_agent_and_fit_to_height(info["top_down_map"], output_size) + + +def shortest_path_example(config): + """ + Example script for performing oracle navigation to object in OVMM episodes. + Utilizes ShortestPathFollower to output discrete actions in HabitatOpenVocabManipEnv. + Note: HabitatOpenVocabManipEnv internally takes care of converting discrete actions to continuous actions. + Note: The environment heirarchy above is as follows: + ovmm_env + HabitatOpenVocabManipEnv + ovmm_env.habitat_env + GymHabitatEnv + ovmm_env.habitat_env.env + HabGymWrapper instance + ovmm_env.habitat_env.env._env + RLTaskEnv instance + ovmm_env.habitat_env.env._env._env + habitat.core.env.Env + ovmm_env.habitat_env.env._env._env.sim + OVMMSim + """ + ovmm_env = create_ovmm_env_fn(config) + print(f"Total number of episodes in env: {ovmm_env.number_of_episodes}") + + # Keep a default minimum goal radius is 0.1, but increase it if robot step size is too large + goal_radius = max(0.1, getattr(config.habitat.simulator, "forward_step_size", 0.1)) + + follower = ShortestPathFollower( + ovmm_env.habitat_env.env._env.habitat_env.sim, goal_radius, False + ) + + for _ in range(ovmm_env.number_of_episodes): + ovmm_env.reset() + episode_id = ovmm_env.get_current_episode().episode_id + dirname = os.path.join( + IMAGE_DIR, + "shortest_path_example_ovmm", + f"{episode_id}", + ) + if not os.path.exists(dirname): + os.makedirs(dirname) + log_file_name = f"logs_ep_{episode_id}.txt" + + with open(os.path.join(os.getcwd(), dirname, log_file_name), "w") as f: + f.write("Environment creation successful\n") + f.write("Agent stepping around inside environment.\n") + images_third_person = [] + steps, max_steps = 0, 1000 + info = None + object_pos = ovmm_env.habitat_env.env._env.habitat_env.current_episode.candidate_objects[ + 0 + ].position + goal_pos = ovmm_env.habitat_env.env._env.habitat_env.current_episode.candidate_goal_receps[ + 0 + ].position + + goal_pos_viewpoints = ovmm_env.habitat_env.env._env.habitat_env.current_episode.candidate_goal_receps[ + 0 + ].view_points + ious = [viewpoint.iou for viewpoint in goal_pos_viewpoints] + max_iou_idx = ious.index(max(ious)) + goal_pos_max_iou_viewpoint = goal_pos_viewpoints[ + max_iou_idx + ].agent_state.position + + # for idx, viewpoint in enumerate(goal_pos_viewpoints): + # print(f"\nViewpoint {idx}") + # print(f"Viewpoint position: {viewpoint.agent_state.position}") + # print(f"Viewpoint rotation: {viewpoint.agent_state.rotation}") + # print(f"Viewpoint IOU: {viewpoint.iou}") + + # print(f"\nMax IOU viewpoint {max_iou_idx}") + # print(f"Max IOU viewpoint position: {goal_pos_max_iou_viewpoint}") + goal = mn.Vector3(goal_pos_max_iou_viewpoint) + goal_orientation = quaternion_from_coeff( + goal_pos_viewpoints[max_iou_idx].agent_state.rotation + ) + while ( + not ovmm_env.habitat_env.env._env.habitat_env.episode_over + and steps < max_steps + ): + if steps != 0: + # curr_quat = follower._sim.robot.sim_obj.rotation + # curr_rotation = [ + # curr_quat.vector.x, + # curr_quat.vector.y, + # curr_quat.vector.z, + # curr_quat.scalar, + # ] + # curr_quat = quaternion_from_coeff( + # curr_rotation + # ) + # # get heading angle + # rot = _quat_to_xy_heading( + # curr_quat.inverse() + # ) + # rot = rot - np.pi / 2 + # # convert back to quaternion + # ang_pos = rot[0] + # curr_rot = mn.Quaternion( + # mn.Vector3(0, np.sin(ang_pos / 2), 0), np.cos(ang_pos / 2) + # ) + + f.write( + f"Current agent location:\t{follower._sim.robot.base_pos}\n" + # f"Current agent orientation:\t{curr_quat}\n" + # f"Current agent orientation:\t{curr_rot}\n" + f"Navigation goal location:\t{goal}\n" + f"Navigation goal orientation:\t{goal_orientation}\n" + # f"Difference between orientations:\t{curr_quat * goal_orientation.inverse()}\n" + f"info['ovmm_dist_to_pick_goal']:\t{info['ovmm_dist_to_pick_goal']}\n" + f"info['ovmm_dist_to_keep_goal']:\t{info['ovmm_dist_to_place_goal']}\n" + ) + + f.write(f"\nTimestep: {steps}\n") + print(f"Timestep: {steps}") + best_action = DISCRETE_ACTION_MAP[follower.get_next_action(goal)] + f.write(f"Agent action taken: {best_action}\n") + if best_action is None: + break + + observations, done, info = ovmm_env.apply_action(best_action, info) + steps += 1 + info["timestep"] = steps + if config.PRINT_IMAGES and config.GROUND_TRUTH_SEMANTICS: + images_third_person.append(observations.third_person_image) + + if len(images_third_person): + images_to_video(images_third_person, dirname, "trajectory_third_person") + if steps >= max_steps: + f.write("Max steps reached! Aborting episode...") + else: + f.write("Episode finished succesfully") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--evaluation_type", + type=str, + choices=["local", "local_vectorized", "remote"], + default="local", + ) + parser.add_argument("--num_episodes", type=int, default=None) + parser.add_argument( + "--habitat_config_path", + type=str, + default="ovmm/ovmm_eval.yaml", + help="Path to config yaml", + ) + parser.add_argument( + "--env_config_path", + type=str, + default="projects/habitat_ovmm/configs/env/hssd_eval.yaml", + help="Path to config yaml", + ) + parser.add_argument( + "overrides", + default=None, + nargs=argparse.REMAINDER, + help="Modify config options from command line", + ) + args = parser.parse_args() + + # get habitat config + habitat_config, _ = get_habitat_config( + args.habitat_config_path, + overrides=args.overrides + + [ + "+habitat/task/measurements@habitat.task.measurements.top_down_map=top_down_map" + ], + ) + + # get env config + env_config = get_omega_config(args.env_config_path) + + # merge habitat and env config to create env config + env_config = create_env_config( + habitat_config, env_config, evaluation_type=args.evaluation_type + ) + + shortest_path_example(env_config) diff --git a/projects/habitat_ovmm/utils/drive_utils.py b/projects/habitat_ovmm/utils/drive_utils.py new file mode 100644 index 000000000..c16078123 --- /dev/null +++ b/projects/habitat_ovmm/utils/drive_utils.py @@ -0,0 +1,72 @@ +import os + +from google.oauth2.credentials import Credentials +from google_auth_oauthlib.flow import InstalledAppFlow +from googleapiclient.discovery import build +from googleapiclient.http import MediaFileUpload + +# Google Drive API credentials +SCOPES = ["https://www.googleapis.com/auth/drive"] +CLIENT_SECRET_FILE = "client_secret.json" # Update with your file name +API_NAME = "drive" +API_VERSION = "v3" + +# Folder ID of the destination folder in Google Drive +DESTINATION_FOLDER_ID = ( + "1Hvy2AEZ9C1_a6AFwqEPoe3K7Q7v5di-K" # Update with your folder ID +) + + +def authenticate(): + creds = None + if os.path.exists("token.json"): + creds = Credentials.from_authorized_user_file("token.json", SCOPES) + if not creds or not creds.valid: + flow = InstalledAppFlow.from_client_secrets_file(CLIENT_SECRET_FILE, SCOPES) + creds = flow.run_local_server(port=0) + with open("token.json", "w") as token: + token.write(creds.to_authorized_user_file()) + return creds + + +def upload_to_drive(service, local_path, parent_id): + file_name = os.path.basename(local_path) + media = MediaFileUpload(local_path) + + file_metadata = {"name": file_name, "parents": [parent_id]} + + uploaded_file = ( + service.files() + .create(body=file_metadata, media_body=media, fields="id") + .execute() + ) + + print(f'Uploaded {file_name} with ID: {uploaded_file["id"]}') + + +def upload_folder_contents(service, local_folder, parent_id): + for item in os.listdir(local_folder): + item_path = os.path.join(local_folder, item) + if os.path.isfile(item_path): + upload_to_drive(service, item_path, parent_id) + elif os.path.isdir(item_path): + folder_name = os.path.basename(item_path) + folder_metadata = { + "name": folder_name, + "mimeType": "application/vnd.google-apps.folder", + "parents": [parent_id], + } + created_folder = ( + service.files().create(body=folder_metadata, fields="id").execute() + ) + upload_folder_contents(service, item_path, created_folder["id"]) + + +def main(): + creds = authenticate() + service = build(API_NAME, API_VERSION, credentials=creds) + upload_folder_contents(service, "video_dir", DESTINATION_FOLDER_ID) + + +if __name__ == "__main__": + main() diff --git a/projects/habitat_ovmm/utils/metrics_utils.py b/projects/habitat_ovmm/utils/metrics_utils.py index f6d266ac9..e6baa9465 100644 --- a/projects/habitat_ovmm/utils/metrics_utils.py +++ b/projects/habitat_ovmm/utils/metrics_utils.py @@ -22,12 +22,22 @@ def aggregate_metrics(episode_metrics_df: pd.DataFrame) -> pd.DataFrame: """ # Drop the columns with string values episode_metrics_df = episode_metrics_df.drop( - columns=["episode_id", "goal_name", "END.ovmm_place_object_phase_success"], + columns=[ + "episode_id", + "goal_name", + "data_split", + "scene_name", + "episode_completion_stage", + "experiment_name", + "experiment_type", + ], errors="ignore", ) # Compute aggregated metrics for each column, excluding NaN values, to get mean, min, max, and count - aggregated_metrics = episode_metrics_df.agg(["mean", "min", "max", "count"], axis=0) + aggregated_metrics = episode_metrics_df.agg( + ["mean", "min", "max", "sum", "count"], axis=0 + ) return aggregated_metrics.T @@ -49,6 +59,13 @@ def compute_stats(aggregated_metrics: pd.DataFrame) -> dict: """ stats = {} stats["episode_count"] = aggregated_metrics.loc["END.ovmm_place_success"]["count"] + stats["episode_count_overall_success"] = aggregated_metrics.loc[ + "END.ovmm_place_success" + ]["sum"] + stats["episode_count_partial_success"] = aggregated_metrics.loc[ + "END.obj_anywhere_on_goal.0" + ]["sum"] + stats["does_want_terminate"] = aggregated_metrics.loc["END.does_want_terminate"][ "mean" ] @@ -56,9 +73,13 @@ def compute_stats(aggregated_metrics: pd.DataFrame) -> dict: # find indices in the DataFrame with stage success in their name and compute success rate for k in aggregated_metrics.index: - if ("phase_success" in k and "END" in k) or "overall_success" in k: + if "phase_success" in k and "END" in k: stats[k.replace("END.ovmm_", "")] = aggregated_metrics.loc[k]["mean"] + stats["place_object_anywhere_on_goal_phase_success"] = aggregated_metrics.loc[ + "END.obj_anywhere_on_goal.0" + ]["mean"] + stats["overall_success"] = aggregated_metrics.loc["overall_success"]["mean"] stats["partial_success"] = aggregated_metrics.loc["partial_success"]["mean"] return stats @@ -91,6 +112,7 @@ def get_stats_from_episode_metrics( episode_ids = episode_ids.astype(str) # The task is considered successful if the agent places the object without robot collisions + enough_success = episode_metrics["END.obj_anywhere_on_goal.0"] == 1 overall_success = ( episode_metrics["END.robot_collisions.robot_scene_colls"] == 0 ) * (episode_metrics["END.ovmm_place_success"] == 1) @@ -100,8 +122,9 @@ def get_stats_from_episode_metrics( episode_metrics["END.ovmm_find_object_phase_success"] + episode_metrics["END.ovmm_pick_object_phase_success"] + episode_metrics["END.ovmm_find_recep_phase_success"] + + enough_success + overall_success - ) / 4.0 + ) / 5.0 episode_metrics = episode_metrics.assign( episode_id=episode_ids, diff --git a/projects/habitat_ovmm/utils/video_utils.py b/projects/habitat_ovmm/utils/video_utils.py new file mode 100644 index 000000000..09449ce46 --- /dev/null +++ b/projects/habitat_ovmm/utils/video_utils.py @@ -0,0 +1,48 @@ +import glob +import os +import shutil + +import cv2 +from natsort import natsorted + + +def get_snapshots_from_disk(source_dir: str, snapshot_file_prefix: str = "snapshot"): + frames = [] + image_paths = natsorted(glob.glob(f"{source_dir}/{snapshot_file_prefix}*.png")) + if len(image_paths) == 0: + return frames + + for filename in image_paths: + frames.append(cv2.imread(filename)) + return frames + + +def record_video(source_dir: str, target_dir: str, target_file: str): + # shutil.rmtree(target_dir, ignore_errors=True) + raise NotImplementedError + os.makedirs(target_dir, exist_ok=True) + print(f"Recording video {target_dir}/{target_file}") + + frames = get_snapshots_from_disk(source_dir, snapshot_file_prefix="snapshot") + # Get the dimensions of the first image (assuming all images have the same dimensions) + first_image = frames[0] + height, width, _ = first_image.shape + size = (width, height) + + out = cv2.VideoWriter( + f"{target_dir}/{target_file}.mp4", + cv2.VideoWriter_fourcc(*"mp4v"), + 15, + size, + ) + for frame in frames: + out.write(frame) + out.release() + + +if __name__ == "__main__": + record_video( + source_dir="datadump/images/eval_hssd/107733960_175999701_3", + target_dir="video_dir", + target_file="test", + ) diff --git a/src/home_robot/home_robot/agent/objectnav_agent/oracle_nav_agent.py b/src/home_robot/home_robot/agent/objectnav_agent/oracle_nav_agent.py new file mode 100644 index 000000000..624225afb --- /dev/null +++ b/src/home_robot/home_robot/agent/objectnav_agent/oracle_nav_agent.py @@ -0,0 +1,101 @@ +import os +from typing import Union + +import numpy as np +from habitat.core.agent import Agent +from habitat.sims.habitat_simulator.actions import HabitatSimActions +from habitat.tasks.nav.shortest_path_follower import ShortestPathFollower + +from home_robot.core.interfaces import DiscreteNavigationAction + +# Quiet the Habitat simulator logging +os.environ["MAGNUM_LOG"] = "quiet" +os.environ["HABITAT_SIM_LOG"] = "quiet" + + +def euclidean(v1, v2): + return sum((p - q) ** 2 for p, q in zip(v1, v2)) ** 0.5 + + +class ShortestPathFollowerAgent(Agent): + r"""Implementation of the :ref:`habitat.core.agent.Agent` interface that + uses :ref`habitat.tasks.nav.shortest_path_follower.ShortestPathFollower` utility class + for extracting the action on the shortest path to the goal. + """ + + def __init__(self): + self.env = None + self.shortest_path_follower = None + self.goal_coordinates = None # needs to be a list, and the agent is implemented to follow one after the other in order + self.discrete_action_map = { + HabitatSimActions.stop: DiscreteNavigationAction.STOP, + HabitatSimActions.move_forward: DiscreteNavigationAction.MOVE_FORWARD, + HabitatSimActions.turn_left: DiscreteNavigationAction.TURN_LEFT, + HabitatSimActions.turn_right: DiscreteNavigationAction.TURN_RIGHT, + } + self.current_goal = ( + 0 # index of the current goal in the list of goal coordinates + ) + self.coarse_navigation = False + + def set_oracle_info(self, env, goal_coordinates, goal_radius=0.5): + """Instantiate shortest path follower + + Args: + env: Habitat env + goal_coordinates: List of xyz goal coordinates. Agent implemented to follow one after the other in order + """ + self.env = env + self.shortest_path_follower = ShortestPathFollower( + sim=env.habitat_env.sim, + goal_radius=goal_radius, + return_one_hot=False, + ) + + filtered_goal_coordinates = [] + final_reference_goal = goal_coordinates[-1] + for goal in goal_coordinates: + euclidean_distance_to_final_goal = euclidean(goal, final_reference_goal) + if euclidean_distance_to_final_goal < 4: + # only add the goal if it is within 4m of the final goal + filtered_goal_coordinates.append(goal) + if not len(filtered_goal_coordinates) and len(goal_coordinates): + filtered_goal_coordinates = goal_coordinates + + self.goal_coordinates = filtered_goal_coordinates + self.current_goal = ( + 0 # index of the current goal in the list of goal coordinates + ) + + def act(self, observations, info) -> Union[int, np.ndarray]: + action = self.discrete_action_map[ + self.shortest_path_follower.get_next_action( + self.goal_coordinates[self.current_goal] + ) + ] + # print(f"Oracle action: {action}") + # print(f"Goal: {self.goal_coordinates[self.current_goal]}") + # print(f"Agent: {self.shortest_path_follower._sim.robot.base_pos}") + + terminate = False + if action == DiscreteNavigationAction.STOP: + if self.current_goal >= len(self.goal_coordinates) - 1: + terminate = True # completed all goals + else: + print() + print("Reached goal! Moving to next goal...") + print(f"Curr goal: {self.goal_coordinates[self.current_goal]}") + print(f"Next goal: {self.goal_coordinates[self.current_goal+1]}") + self.current_goal += 1 # move to next goal + return self.act(observations, info) + + return action, terminate + + def reset(self) -> None: + self.env = None + self.shortest_path_follower = None + self.goal_coordinates = None + self.goal_candidate = 0 + + def reset_vectorized(self) -> None: + self.reset() # or NotImplementedError, really. diff --git a/src/home_robot/home_robot/agent/ovmm_agent/ovmm_agent.py b/src/home_robot/home_robot/agent/ovmm_agent/ovmm_agent.py index fa95ddd42..9aac589da 100644 --- a/src/home_robot/home_robot/agent/ovmm_agent/ovmm_agent.py +++ b/src/home_robot/home_robot/agent/ovmm_agent/ovmm_agent.py @@ -8,10 +8,13 @@ from enum import IntEnum, auto from typing import Any, Dict, Optional, Tuple +import magnum as mn import numpy as np import torch +from habitat.tasks.nav.object_nav_task import ObjectGoal from home_robot.agent.objectnav_agent.objectnav_agent import ObjectNavAgent +from home_robot.agent.objectnav_agent.oracle_nav_agent import ShortestPathFollowerAgent from home_robot.agent.ovmm_agent.ovmm_perception import ( OvmmPerception, build_vocab_from_category_map, @@ -20,6 +23,9 @@ from home_robot.core.interfaces import DiscreteNavigationAction, Observations from home_robot.manipulation import HeuristicPickPolicy, HeuristicPlacePolicy from home_robot.perception.constants import RearrangeBasicCategories +from home_robot_sim.env.habitat_ovmm_env.habitat_ovmm_env import ( + HabitatOpenVocabManipEnv, +) class Skill(IntEnum): @@ -59,11 +65,14 @@ def __init__(self, config, device_id: int = 0): self.gaze_agent = None self.nav_to_obj_agent = None self.nav_to_rec_agent = None + self.nav_to_obj_closeness_check = False + self.nav_to_rec_closeness_check = False self.pick_agent = None self.place_agent = None self.pick_policy = None self.place_policy = None self.semantic_sensor = None + self._env = None if config.GROUND_TRUTH_SEMANTICS == 1 and self.store_all_categories_in_map: # currently we get ground truth semantics of only the target object category and all scene receptacles from the simulator @@ -81,9 +90,7 @@ def __init__(self, config, device_id: int = 0): config, self.device, verbose=self.verbose ) if config.AGENT.SKILLS.PLACE.type == "heuristic" and not self.skip_skills.place: - self.place_policy = HeuristicPlacePolicy( - config, self.device, verbose=self.verbose - ) + self.place_policy = HeuristicPlacePolicy(config, self.device, verbose=True) elif config.AGENT.SKILLS.PLACE.type == "rl" and not self.skip_skills.place: from home_robot.agent.ovmm_agent.ppo_agent import PPOAgent @@ -112,6 +119,12 @@ def __init__(self, config, device_id: int = 0): config.AGENT.SKILLS.NAV_TO_OBJ, device_id=device_id, ) + if ( + config.AGENT.SKILLS.NAV_TO_OBJ.type == "oracle" + and not self.skip_skills.nav_to_obj + ): + self.nav_to_obj_agent = ShortestPathFollowerAgent() + if ( config.AGENT.SKILLS.NAV_TO_REC.type == "rl" and not self.skip_skills.nav_to_rec @@ -123,9 +136,96 @@ def __init__(self, config, device_id: int = 0): config.AGENT.SKILLS.NAV_TO_REC, device_id=device_id, ) + if ( + config.AGENT.SKILLS.NAV_TO_REC.type == "oracle" + and not self.skip_skills.nav_to_rec + ): + self.nav_to_rec_agent = ShortestPathFollowerAgent() + self._fall_wait_steps = getattr(config.AGENT, "fall_wait_steps", 0) self.config = config + def get_position_for_max_iou_viewpoint(self, object_goal: ObjectGoal): + """ + Returns the position of the viewpoint with the maximum IOU with the object. + """ + ious = [viewpoint.iou for viewpoint in object_goal.view_points] + max_iou_idx = ious.index(max(ious)) + max_iou_viewpoint_position = object_goal.view_points[ + max_iou_idx + ].agent_state.position + return max_iou_viewpoint_position + + def set_oracle_info(self, ovmm_env: HabitatOpenVocabManipEnv): + self._env = ovmm_env + + self.use_segmentation = False + if "withsegmentation" in self.config.EXPERIMENT.type: + self.use_segmentation = True + + if ( + self.config.AGENT.SKILLS.NAV_TO_OBJ.type == "oracle" + and not self.skip_skills.nav_to_obj + ): + # Extract the habitat_env from the ovmm env and provide it to the agent + episode_id = ovmm_env.get_current_episode().episode_id + print( + f"Providing oracle environment information to NAV_TO_OBJ agent for episode {episode_id}" + ) + object_goal = ovmm_env.habitat_env.env._env.habitat_env.current_episode.candidate_objects[ + 0 + ] + object_vp_nav_goal = self.get_position_for_max_iou_viewpoint(object_goal) + object_nav_goal = object_goal.position + + if self.config.EXPERIMENT.NAV_TO_OBJ.goal == "exact": + goal_coordinates = [object_nav_goal] + elif self.config.EXPERIMENT.NAV_TO_OBJ.goal == "max_iou_viewpoint": + goal_coordinates = [object_vp_nav_goal] + elif self.config.EXPERIMENT.NAV_TO_OBJ.goal == "vp_then_exact": + goal_coordinates = [object_vp_nav_goal, object_nav_goal] + + self.nav_to_obj_agent.set_oracle_info( + ovmm_env.habitat_env.env._env, + goal_coordinates=goal_coordinates, + goal_radius=self.config.EXPERIMENT.NAV_TO_OBJ.goal_radius, + ) + + if ( + self.config.AGENT.SKILLS.NAV_TO_REC.type == "oracle" + and not self.skip_skills.nav_to_rec + ): + # Extract the habitat_env from the ovmm env and provide it to the agent + print( + f"Providing oracle environment information to NAV_TO_REC agent for episode {episode_id}" + ) + # candidate_goal_receps = ovmm_env.habitat_env.env._env.habitat_env.current_episode.candidate_goal_receps + # candidate_goal_recep_coords = [] + # for goal_recep in candidate_goal_receps: + # candidate_goal_recep_coords.append(mn.Vector3(self.get_position_for_max_iou_viewpoint(goal_recep))) + # print(f"Candidate goal position: {ovmm_env.habitat_env.env._env.habitat_env.current_episode.candidate_goal_receps[0].position}") + # for vp_idx, vp in enumerate(ovmm_env.habitat_env.env._env.habitat_env.current_episode.candidate_goal_receps[0].view_points): + # print(f"Viewpoint {vp_idx}: {vp.agent_state.position}") + # exit(1) + object_goal = ovmm_env.habitat_env.env._env.habitat_env.current_episode.candidate_goal_receps[ + 0 + ] + object_vp_nav_goal = self.get_position_for_max_iou_viewpoint(object_goal) + object_nav_goal = object_goal.position + + if self.config.EXPERIMENT.NAV_TO_REC.goal == "exact": + goal_coordinates = [object_nav_goal] + elif self.config.EXPERIMENT.NAV_TO_REC.goal == "max_iou_viewpoint": + goal_coordinates = [object_vp_nav_goal] + elif self.config.EXPERIMENT.NAV_TO_REC.goal == "vp_then_exact": + goal_coordinates = [object_vp_nav_goal, object_nav_goal] + + self.nav_to_rec_agent.set_oracle_info( + ovmm_env.habitat_env.env._env, + goal_coordinates=goal_coordinates, + goal_radius=self.config.EXPERIMENT.NAV_TO_REC.goal_radius, + ) + def _get_info(self, obs: Observations) -> Dict[str, torch.Tensor]: """Get inputs for visual skill.""" use_detic_viz = self.config.ENVIRONMENT.use_detic_viz @@ -331,6 +431,21 @@ def _heuristic_nav( terminate = False return action, info, terminate + def _oracle_nav( + self, + obs: Observations, + info: Dict[str, Any], + oracle_agent: ShortestPathFollowerAgent, + ) -> Tuple[DiscreteNavigationAction, Any]: + if self.use_segmentation: + _, planner_info = super().act(obs) + # info overwrites planner_info entries for keys with same name + info = {**planner_info, **info} + self.timesteps[0] -= 1 # objectnav agent increments timestep + info["timestep"] = self.timesteps[0] + action, terminate = oracle_agent.act(obs, info) + return action, info, terminate + def _heuristic_pick( self, obs: Observations, info: Dict[str, Any] ) -> Tuple[DiscreteNavigationAction, Any]: @@ -401,6 +516,24 @@ def _nav_to_obj( action, info, terminate = self._heuristic_nav(obs, info) elif nav_to_obj_type == "rl": action, info, terminate = self.nav_to_obj_agent.act(obs, info) + elif nav_to_obj_type == "oracle": + action, info, terminate = self._oracle_nav(obs, info, self.nav_to_obj_agent) + if ( + terminate + and self.config.EXPERIMENT.NAV_TO_OBJ.fallback == "heuristic_nav" + ): # Fallback to heuristic nav after oracle nav is done + print("[OVMM AGENT] Fallback to heuristic nav after oracle nav is done") + action, info, terminate = self._heuristic_nav(obs, info) + # if not self.nav_to_obj_closeness_check: + # action, info, terminate = self._oracle_nav(obs, info, self.nav_to_obj_agent) + # if terminate: + # terminate = False + # self.nav_to_obj_closeness_check = True + # if self.nav_to_obj_closeness_check: # Fallback to heuristic nav after oracle nav is done + # action, info, terminate = self._heuristic_nav(obs, info) + # self.timesteps[0] += 1 + # info["timestep"] = self.timesteps[0] + else: raise ValueError( f"Got unexpected value for NAV_TO_OBJ.type: {nav_to_obj_type}" @@ -486,6 +619,8 @@ def _nav_to_rec( action, info, terminate = self._heuristic_nav(obs, info) elif nav_to_rec_type == "rl": action, info, terminate = self.nav_to_rec_agent.act(obs, info) + elif nav_to_rec_type == "oracle": + action, info, terminate = self._oracle_nav(obs, info, self.nav_to_rec_agent) else: raise ValueError( f"Got unexpected value for NAV_TO_REC.type: {nav_to_rec_type}" diff --git a/src/home_robot/home_robot/navigation_planner/fmm_planner.py b/src/home_robot/home_robot/navigation_planner/fmm_planner.py index fbfb88bc6..f79ccdfd3 100644 --- a/src/home_robot/home_robot/navigation_planner/fmm_planner.py +++ b/src/home_robot/home_robot/navigation_planner/fmm_planner.py @@ -152,7 +152,7 @@ def set_multi_goal( if self.print_images and timestep is not None: cv2.imwrite( - os.path.join(self.vis_dir, f"planner_snapshot_{timestep}.png"), + os.path.join(self.vis_dir, f"planner_snapshot_{timestep:04d}.png"), (dist_vis * 255).astype(int), ) return dd diff --git a/src/home_robot_hw/home_robot_hw/env/visualizer.py b/src/home_robot_hw/home_robot_hw/env/visualizer.py index 2caf7b662..47e8cc8d7 100644 --- a/src/home_robot_hw/home_robot_hw/env/visualizer.py +++ b/src/home_robot_hw/home_robot_hw/env/visualizer.py @@ -268,7 +268,7 @@ def visualize( if self.print_images: cv2.imwrite( - os.path.join(self.vis_dir, "snapshot_{:03d}.png".format(timestep)), + os.path.join(self.vis_dir, "snapshot_{:04d}.png".format(timestep)), self.image_vis, ) @@ -486,11 +486,11 @@ def visualize( cv2.imshow("Visualization", self.image_vis) cv2.waitKey(1) - if self.print_images: - cv2.imwrite( - os.path.join(self.vis_dir, "snapshot_{:03d}.png".format(timestep)), - self.image_vis, - ) + # if self.print_images: + # cv2.imwrite( + # os.path.join(self.vis_dir, "snapshot_{:04d}.png".format(timestep)), + # self.image_vis, + # ) def _init_vis_image(self, goal_name: str): vis_image = np.ones((655, 1165, 3)).astype(np.uint8) * 255 diff --git a/src/home_robot_sim/home_robot_sim/env/habitat_objectnav_env/visualizer.py b/src/home_robot_sim/home_robot_sim/env/habitat_objectnav_env/visualizer.py index 57eb33bc3..778125364 100644 --- a/src/home_robot_sim/home_robot_sim/env/habitat_objectnav_env/visualizer.py +++ b/src/home_robot_sim/home_robot_sim/env/habitat_objectnav_env/visualizer.py @@ -449,11 +449,15 @@ def visualize( if self.show_images: cv2.imshow("Visualization", image_vis) cv2.waitKey(1) - if self.print_images: - cv2.imwrite( - os.path.join(self.vis_dir, "snapshot_{:03d}.png".format(timestep)), - image_vis, - ) + # if self.print_images: + # cv2.imwrite( + # os.path.join(self.vis_dir, "snapshot_{:04d}.png".format(timestep)), + # image_vis, + # ) + # cv2.imwrite( + # os.path.join(self.vis_dir, "tp_snapshot_{:04d}.png".format(timestep)), + # image_vis[V.Y1 : V.Y2, V.THIRD_PERSON_X1 : V.THIRD_PERSON_X2], + # ) def _visualize_semantic_frame( self, image_vis: np.ndarray, semantic_frame: np.ndarray, palette: List @@ -506,7 +510,7 @@ def _visualize_instance_counts( num_views_per_instance[instance.category_id.item()].append( len(instance.instance_views) ) - text = "Instance counts" + text = "Stretch Robot" offset = 48 y_pos = offset diff --git a/src/home_robot_sim/home_robot_sim/env/habitat_ovmm_env/habitat_ovmm_env.py b/src/home_robot_sim/home_robot_sim/env/habitat_ovmm_env/habitat_ovmm_env.py index ca635a776..ad9d49c6d 100644 --- a/src/home_robot_sim/home_robot_sim/env/habitat_ovmm_env/habitat_ovmm_env.py +++ b/src/home_robot_sim/home_robot_sim/env/habitat_ovmm_env/habitat_ovmm_env.py @@ -355,9 +355,10 @@ def apply_action( habitat_action = self._preprocess_action(action, self._last_habitat_obs) habitat_obs, _, dones, infos = self.habitat_env.step(habitat_action) # copy the keys in info starting with the prefix "is_curr_skill" into infos - for key in info: - if key.startswith("is_curr_skill"): - infos[key] = info[key] + if info is not None: + for key in info: + if key.startswith("is_curr_skill"): + infos[key] = info[key] self._last_habitat_obs = habitat_obs self._last_obs = self._preprocess_obs(habitat_obs) return self._last_obs, dones, infos