facebookresearch · prasoonvarshney · Aug 10, 2023 · Aug 19, 2023 · Aug 19, 2023 · Aug 21, 2023
diff --git a/.gitignore b/.gitignore
@@ -23,6 +23,7 @@ viz/
 datasets/
 scene_datasets/
 datadump/
+video_dir/
 */datasets/
 data/
 

diff --git a/ablation_runner_1.sh b/ablation_runner_1.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+
+python projects/habitat_ovmm/eval_baselines_agent.py --env_config_path projects/habitat_ovmm/configs/env/hssd_eval_v6.yaml habitat.dataset.episode_ids=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,151,182,193]
+python projects/habitat_ovmm/eval_baselines_agent.py --env_config_path projects/habitat_ovmm/configs/env/hssd_eval_v6b.yaml habitat.dataset.episode_ids=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,151,182,193]
+python projects/habitat_ovmm/eval_baselines_agent.py --env_config_path projects/habitat_ovmm/configs/env/hssd_eval_v7.yaml habitat.dataset.episode_ids=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,151,182,193]
diff --git a/ablation_runner_2.sh b/ablation_runner_2.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+
+python projects/habitat_ovmm/eval_baselines_agent.py --env_config_path projects/habitat_ovmm/configs/env/hssd_eval_v1.yaml habitat.dataset.episode_ids=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,151,182,193]
+python projects/habitat_ovmm/eval_baselines_agent.py --env_config_path projects/habitat_ovmm/configs/env/hssd_eval_v1b.yaml habitat.dataset.episode_ids=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,151,182,193]
+python projects/habitat_ovmm/eval_baselines_agent.py --env_config_path projects/habitat_ovmm/configs/env/hssd_eval_v2.yaml habitat.dataset.episode_ids=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,151,182,193]
+python projects/habitat_ovmm/eval_baselines_agent.py --env_config_path projects/habitat_ovmm/configs/env/hssd_eval_v2b.yaml habitat.dataset.episode_ids=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,151,182,193]
+python projects/habitat_ovmm/eval_baselines_agent.py --env_config_path projects/habitat_ovmm/configs/env/hssd_eval_v3.yaml habitat.dataset.episode_ids=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,151,182,193]
+python projects/habitat_ovmm/eval_baselines_agent.py --env_config_path projects/habitat_ovmm/configs/env/hssd_eval_v3b.yaml habitat.dataset.episode_ids=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,151,182,193]
diff --git a/ablation_runner_3.sh b/ablation_runner_3.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+
+python projects/habitat_ovmm/eval_baselines_agent.py --env_config_path projects/habitat_ovmm/configs/env/hssd_eval_v4.yaml habitat.dataset.episode_ids=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,151,182,193]
+python projects/habitat_ovmm/eval_baselines_agent.py --env_config_path projects/habitat_ovmm/configs/env/hssd_eval_v4b.yaml habitat.dataset.episode_ids=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,151,182,193]
+python projects/habitat_ovmm/eval_baselines_agent.py --env_config_path projects/habitat_ovmm/configs/env/hssd_eval_v5.yaml habitat.dataset.episode_ids=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,151,182,193]
+python projects/habitat_ovmm/eval_baselines_agent.py --env_config_path projects/habitat_ovmm/configs/env/hssd_eval_v5b.yaml habitat.dataset.episode_ids=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,151,182,193]
diff --git a/ablation_runner_4.sh b/ablation_runner_4.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+
+python projects/habitat_ovmm/eval_baselines_agent.py --env_config_path projects/habitat_ovmm/configs/env/hssd_eval_v7b.yaml habitat.dataset.episode_ids=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,151,182,193]
+python projects/habitat_ovmm/eval_baselines_agent.py --env_config_path projects/habitat_ovmm/configs/env/hssd_eval_v6c.yaml habitat.dataset.episode_ids=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,151,182,193]
+python projects/habitat_ovmm/eval_baselines_agent.py --env_config_path projects/habitat_ovmm/configs/env/hssd_eval_v7c.yaml habitat.dataset.episode_ids=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,151,182,193]
diff --git a/projects/habitat_ovmm/configs/agent/oracle_agent.yaml b/projects/habitat_ovmm/configs/agent/oracle_agent.yaml
@@ -0,0 +1,169 @@
+max_steps: 10000        # maximum number of steps before stopping an episode during navigation; a lower value set for habitat episode termination 
+panorama_start: 1       # 1: turn around 360 degrees when starting an episode, 0: don't
+exploration_strategy: seen_frontier  # exploration strategy ("seen_frontier", "been_close_to_frontier")
+radius: 0.05            # robot radius (in meters)
+fall_wait_steps: 100    # number of steps to wait after the object has been dropped
+store_all_categories: False  # whether to store all semantic categories in the map or just task-relevant ones
+detection_module: detic # the detector to use for perception in case ground_truth_semantics are turned off
+SEMANTIC_MAP:
+  semantic_categories: rearrange # map semantic channel categories ("coco_indoor", "longtail_indoor", "mukul_indoor", "rearrange")
+  num_sem_categories: 5    # Following 5 categories: ["misc", "object_category", "start_receptacle", "goal_receptacle", "others"]
+  map_size_cm: 4800        # global map size (in centimeters)
+  map_resolution: 5        # size of map bins (in centimeters)
+  vision_range: 100        # diameter of local map region visible by the agent (in cells)
+  global_downscaling: 2    # ratio of global over local map
+  du_scale: 4              # frame downscaling before projecting to point cloud
+  cat_pred_threshold: 1.0  # number of depth points to be in bin to classify it as a certain semantic category
+  exp_pred_threshold: 1.0  # number of depth points to be in bin to consider it as explored
+  map_pred_threshold: 1.0  # number of depth points to be in bin to consider it as obstacle
+  been_close_to_radius: 100  # radius (in centimeters) of been close to region
+  explored_radius: 150       # radius (in centimeters) of visually explored region
+  must_explore_close: False
+  min_obs_height_cm: 10    # minimum height (in centimeters) of obstacle
+  # erosion and filtering to reduce the number of spurious artifacts
+  dilate_obstacles: True
+  dilate_size: 3
+  dilate_iter: 1
+  record_instance_ids: False
+  max_instances: 0
+
+SKILLS:
+  GAZE_OBJ:
+    type: rl #end_to_end #heuristic #hardcoded
+    checkpoint_path: data/checkpoints/ovmm_baseline_home_robot_challenge_2023/gaze_at_obj.pth
+    rl_config: projects/habitat_ovmm/configs/agent/skills/gaze_rl.yaml # with continuous actions
+    gym_obs_keys:
+      - robot_head_depth
+      - object_embedding
+      - object_segmentation
+      - joint
+      - is_holding
+      - relative_resting_position
+    allowed_actions:
+      - arm_action
+      - base_velocity
+    arm_joint_mask: [0, 0, 0, 0, 0, 0, 1] # the arm joints that the policy can control
+    max_displacement: 0.25                     # used when training the policy
+    max_turn_degrees: 30.0
+    min_turn_degrees: 5.0
+    min_displacement: 0.1
+    sensor_height: 160
+    sensor_width: 120
+    nav_goal_seg_channels: 1
+    terminate_condition: grip
+    grip_threshold: 0.8
+    max_joint_delta: 0.1
+    min_joint_delta: 0.02
+
+  PICK:
+    type: heuristic
+
+  NAV_TO_OBJ:
+    type: oracle  # heuristic or rl or oracle
+    checkpoint_path: data/checkpoints/ovmm_baseline_home_robot_challenge_2023/nav_to_obj.pth
+    rl_config: projects/habitat_ovmm/configs/agent/skills/nav_to_obj_rl.yaml
+    gym_obs_keys:
+      - robot_head_depth
+      - object_embedding
+      - ovmm_nav_goal_segmentation
+      - receptacle_segmentation
+      - start_receptacle
+      - robot_start_gps
+      - robot_start_compass
+      - joint
+    allowed_actions:
+      # - base_velocity
+      # - rearrange_stop
+      - stop
+      - move_forward
+      - turn_left
+      - turn_right
+    arm_joint_mask: [0, 0, 0, 0, 0, 0, 0] # the arm joints that the policy can control
+    max_displacement: 0.25                # used when training the policy; could be different from the eval values
+    max_turn_degrees: 30.0
+    min_turn_degrees: 5.0
+    min_displacement: 0.1
+    sensor_height: 160
+    sensor_width: 120
+    terminate_condition: discrete_stop
+    nav_goal_seg_channels: 2
+
+  NAV_TO_REC:
+    type: oracle  # heuristic or rl or oracle
+    checkpoint_path: data/checkpoints/ovmm_baseline_home_robot_challenge_2023/nav_to_rec.pth
+    rl_config: projects/habitat_ovmm/configs/agent/skills/nav_to_obj_rl.yaml
+    gym_obs_keys:
+      - robot_head_depth
+      - ovmm_nav_goal_segmentation
+      - receptacle_segmentation
+      - goal_receptacle
+      - robot_start_gps
+      - robot_start_compass
+      - joint
+    allowed_actions:
+      # - base_velocity
+      # - rearrange_stop
+      - stop
+      - move_forward
+      - turn_left
+      - turn_right
+    arm_joint_mask: [0, 0, 0, 0, 0, 0, 0] # the arm joints that the policy can control
+    max_displacement: 0.25                # used when training the policy; could be different from the eval values
+    max_turn_degrees: 30.0
+    min_turn_degrees: 5.0
+    min_displacement: 0.1
+    sensor_height: 160
+    sensor_width: 120
+    terminate_condition: discrete_stop
+    nav_goal_seg_channels: 1
+
+
+  PLACE:
+    type: heuristic  # "rl" or "heuristic" or "hardcoded"
+    checkpoint_path: data/checkpoints/ovmm_baseline_home_robot_challenge_2023/place.pth
+    rl_config: projects/habitat_ovmm/configs/agent/skills/place_rl.yaml # with continuous actions
+    gym_obs_keys:
+      - robot_head_depth
+      - goal_receptacle
+      - joint
+      - goal_recep_segmentation
+      - is_holding
+      - object_embedding
+    allowed_actions:
+      - arm_action
+      - base_velocity
+      - manipulation_mode
+    arm_joint_mask: [1, 1, 1, 1, 1, 0, 0] # the arm joints that the policy can control
+    max_displacement: 0.25                     # used when training the policy
+    max_turn_degrees: 30.0
+    min_turn_degrees: 5.0
+    min_displacement: 0.1
+    sensor_height: 160
+    sensor_width: 120
+    nav_goal_seg_channels: 1
+    terminate_condition: ungrip
+    grip_threshold: -0.8
+    manip_mode_threshold: 0.8
+    constraint_base_in_manip_mode: True
+    max_joint_delta: 0.1
+    min_joint_delta: 0.02
+
+skip_skills:
+  nav_to_obj: False
+  nav_to_rec: False
+  gaze_at_obj: True
+  gaze_at_rec: True
+  pick: False
+  place: False
+
+PLANNER:
+  collision_threshold: 0.10       # forward move distance under which we consider there's a collision (in meters)
+  obs_dilation_selem_radius: 3    # radius (in cells) of obstacle dilation structuring element
+  goal_dilation_selem_radius: 10  # radius (in cells) of goal dilation structuring element
+  step_size: 5                    # maximum distance of the short-term goal selected by the planner
+  use_dilation_for_stg: False
+  min_obs_dilation_selem_radius: 1    # radius (in cells) of obstacle dilation structuring element
+  map_downsample_factor: 1            # optional downsampling of traversible and goal map before fmm distance call (1 for no downsampling, 2 for halving resolution)
+  map_update_frequency: 1             # compute fmm distance map every n steps 
+  discrete_actions: True         # discrete motion planner output space or not
+  verbose: False                 # display debug information during planning
diff --git a/projects/habitat_ovmm/configs/env/hssd_eval.yaml b/projects/habitat_ovmm/configs/env/hssd_eval.yaml
@@ -3,14 +3,27 @@ NUM_ENVIRONMENTS: 1       # number of environments (per agent process)
 DUMP_LOCATION: datadump   # path to dump models and log
 EXP_NAME: eval_hssd       # experiment name
 VISUALIZE: 0              # 1: render observation and predicted semantic map, 0: no visualization
-PRINT_IMAGES: 0           # 1: save visualization as images, 0: no image saving
-GROUND_TRUTH_SEMANTICS: 0 # 1: use ground-truth semantics (for debugging / ablations)
+PRINT_IMAGES: 1           # 1: save visualization as images, 0: no image saving
+GROUND_TRUTH_SEMANTICS: 1 # 1: use ground-truth semantics (for debugging / ablations)
 seed: 0                   # seed
 SHOW_RL_OBS: False         # whether to show the observations passed to RL policices, for debugging
 
+EXPERIMENT: 
+  type: v0_extra_navigation_instructions_here
+  NAV_TO_OBJ:
+    type: oracle
+    segmentation: False
+    fallback: none
+    goal: max_iou_viewpoint
+  NAV_TO_REC:
+    type: oracle
+    segmentation: False
+    fallback: none
+    goal: max_iou_viewpoint
+
 ENVIRONMENT:
-  forward: 0.25           # forward motion (in meters)
-  turn_angle: 30.0        # agent turn angle (in degrees)
+  forward: 0.10           # forward motion (in meters)  Note: changing this should require a corresponding change in the habitat benchmark config
+  turn_angle: 5           # agent turn angle (in degrees)  Note: changing this should require a corresponding change in the habitat benchmark config
   frame_height: 640       # first-person frame height (in pixels)
   frame_width: 480        # first-person frame width (in pixels)
   camera_height: 1.31     # camera sensor height (in metres)
@@ -26,6 +39,6 @@ EVAL_VECTORIZED:
   simulator_gpu_ids: [1, 2, 3, 4, 5, 6, 7] # IDs of GPUs to use for vectorized environments
   split: val                # eval split
   num_episodes_per_env: null # number of eval episodes per environment
-  record_videos: 0          # 1: record videos from printed images, 0: don't
+  record_videos: 1          # 1: record videos from printed images, 0: don't
   record_planner_videos: 0  # 1: record planner videos (if record videos), 0: don't
   metrics_save_freq: 5      # save metrics after every n episodes
diff --git a/projects/habitat_ovmm/configs/env/hssd_eval_v1.yaml b/projects/habitat_ovmm/configs/env/hssd_eval_v1.yaml
@@ -0,0 +1,46 @@
+NO_GPU: 0                 # 1: ignore IDs above and run on CPU, 0: run on GPUs with IDs above
+NUM_ENVIRONMENTS: 1       # number of environments (per agent process)
+DUMP_LOCATION: datadump   # path to dump models and log
+EXP_NAME: eval_hssd_v1       # experiment name
+VISUALIZE: 0              # 1: render observation and predicted semantic map, 0: no visualization
+PRINT_IMAGES: 1           # 1: save visualization as images, 0: no image saving
+GROUND_TRUTH_SEMANTICS: 1 # 1: use ground-truth semantics (for debugging / ablations)
+seed: 0                   # seed
+SHOW_RL_OBS: False         # whether to show the observations passed to RL policices, for debugging
+
+EXPERIMENT:
+  type: v1__nav-oracle-nosegmentation__pick-heur-vp__place-heur-vp
+  NAV_TO_OBJ:
+    type: oracle
+    segmentation: False
+    fallback: none
+    goal: max_iou_viewpoint
+    goal_radius: 0.1
+  NAV_TO_REC:
+    type: oracle
+    segmentation: False
+    fallback: none
+    goal: max_iou_viewpoint
+    goal_radius: 0.1
+
+ENVIRONMENT:
+  forward: 0.10           # forward motion (in meters)  Note: changing this should require a corresponding change in the habitat benchmark config
+  turn_angle: 5           # agent turn angle (in degrees)  Note: changing this should require a corresponding change in the habitat benchmark config
+  frame_height: 640       # first-person frame height (in pixels)
+  frame_width: 480        # first-person frame width (in pixels)
+  camera_height: 1.31     # camera sensor height (in metres)
+  hfov: 42.0              # horizontal field of view (in degrees)
+  min_depth: 0.0          # minimum depth for depth sensor (in metres)
+  max_depth: 10.0         # maximum depth for depth sensor (in metres)
+  num_receptacles: 21
+  category_map_file: projects/real_world_ovmm/configs/example_cat_map.json
+  use_detic_viz: False
+  evaluate_instance_tracking: False # whether to evaluate the built instance map against groundtruth instance ids
+
+EVAL_VECTORIZED:
+  simulator_gpu_ids: [1, 2, 3, 4, 5, 6, 7] # IDs of GPUs to use for vectorized environments
+  split: val                # eval split
+  num_episodes_per_env: null # number of eval episodes per environment
+  record_videos: 1          # 1: record videos from printed images, 0: don't
+  record_planner_videos: 0  # 1: record planner videos (if record videos), 0: don't
+  metrics_save_freq: 5      # save metrics after every n episodes
diff --git a/projects/habitat_ovmm/configs/env/hssd_eval_v1b.yaml b/projects/habitat_ovmm/configs/env/hssd_eval_v1b.yaml
@@ -0,0 +1,46 @@
+NO_GPU: 0                 # 1: ignore IDs above and run on CPU, 0: run on GPUs with IDs above
+NUM_ENVIRONMENTS: 1       # number of environments (per agent process)
+DUMP_LOCATION: datadump   # path to dump models and log
+EXP_NAME: eval_hssd_v1b       # experiment name
+VISUALIZE: 0              # 1: render observation and predicted semantic map, 0: no visualization
+PRINT_IMAGES: 1           # 1: save visualization as images, 0: no image saving
+GROUND_TRUTH_SEMANTICS: 1 # 1: use ground-truth semantics (for debugging / ablations)
+seed: 0                   # seed
+SHOW_RL_OBS: False         # whether to show the observations passed to RL policices, for debugging
+
+EXPERIMENT:
+  type: v1b__nav-oracle-nosegmentation__pick-heur-vp__place-heur-vp
+  NAV_TO_OBJ:
+    type: oracle
+    segmentation: False
+    fallback: none
+    goal: max_iou_viewpoint
+    goal_radius: 0.3
+  NAV_TO_REC:
+    type: oracle
+    segmentation: False
+    fallback: none
+    goal: max_iou_viewpoint
+    goal_radius: 0.3
+
+ENVIRONMENT:
+  forward: 0.10           # forward motion (in meters)  Note: changing this should require a corresponding change in the habitat benchmark config
+  turn_angle: 5           # agent turn angle (in degrees)  Note: changing this should require a corresponding change in the habitat benchmark config
+  frame_height: 640       # first-person frame height (in pixels)
+  frame_width: 480        # first-person frame width (in pixels)
+  camera_height: 1.31     # camera sensor height (in metres)
+  hfov: 42.0              # horizontal field of view (in degrees)
+  min_depth: 0.0          # minimum depth for depth sensor (in metres)
+  max_depth: 10.0         # maximum depth for depth sensor (in metres)
+  num_receptacles: 21
+  category_map_file: projects/real_world_ovmm/configs/example_cat_map.json
+  use_detic_viz: False
+  evaluate_instance_tracking: False # whether to evaluate the built instance map against groundtruth instance ids
+
+EVAL_VECTORIZED:
+  simulator_gpu_ids: [1, 2, 3, 4, 5, 6, 7] # IDs of GPUs to use for vectorized environments
+  split: val                # eval split
+  num_episodes_per_env: null # number of eval episodes per environment
+  record_videos: 1          # 1: record videos from printed images, 0: don't
+  record_planner_videos: 0  # 1: record planner videos (if record videos), 0: don't
+  metrics_save_freq: 5      # save metrics after every n episodes
diff --git a/projects/habitat_ovmm/configs/env/hssd_eval_v2.yaml b/projects/habitat_ovmm/configs/env/hssd_eval_v2.yaml
@@ -0,0 +1,46 @@
+NO_GPU: 0                 # 1: ignore IDs above and run on CPU, 0: run on GPUs with IDs above
+NUM_ENVIRONMENTS: 1       # number of environments (per agent process)
+DUMP_LOCATION: datadump   # path to dump models and log
+EXP_NAME: eval_hssd_v2       # experiment name
+VISUALIZE: 0              # 1: render observation and predicted semantic map, 0: no visualization
+PRINT_IMAGES: 1           # 1: save visualization as images, 0: no image saving
+GROUND_TRUTH_SEMANTICS: 1 # 1: use ground-truth semantics (for debugging / ablations)
+seed: 0                   # seed
+SHOW_RL_OBS: False         # whether to show the observations passed to RL policices, for debugging
+
+EXPERIMENT: 
+  type: v2__nav-oracle-nosegmentation-heurnearobject__pick-heur-vp__place-heur-vp
+  NAV_TO_OBJ:
+    type: oracle
+    segmentation: False
+    fallback: heuristic_nav
+    goal: max_iou_viewpoint
+    goal_radius: 0.1
+  NAV_TO_REC:
+    type: oracle
+    segmentation: False
+    fallback: none
+    goal: max_iou_viewpoint
+    goal_radius: 0.1
+
+ENVIRONMENT:
+  forward: 0.10           # forward motion (in meters)  Note: changing this should require a corresponding change in the habitat benchmark config
+  turn_angle: 5           # agent turn angle (in degrees)  Note: changing this should require a corresponding change in the habitat benchmark config
+  frame_height: 640       # first-person frame height (in pixels)
+  frame_width: 480        # first-person frame width (in pixels)
+  camera_height: 1.31     # camera sensor height (in metres)
+  hfov: 42.0              # horizontal field of view (in degrees)
+  min_depth: 0.0          # minimum depth for depth sensor (in metres)
+  max_depth: 10.0         # maximum depth for depth sensor (in metres)
+  num_receptacles: 21
+  category_map_file: projects/real_world_ovmm/configs/example_cat_map.json
+  use_detic_viz: False
+  evaluate_instance_tracking: False # whether to evaluate the built instance map against groundtruth instance ids
+
+EVAL_VECTORIZED:
+  simulator_gpu_ids: [1, 2, 3, 4, 5, 6, 7] # IDs of GPUs to use for vectorized environments
+  split: val                # eval split
+  num_episodes_per_env: null # number of eval episodes per environment
+  record_videos: 1          # 1: record videos from printed images, 0: don't
+  record_planner_videos: 0  # 1: record planner videos (if record videos), 0: don't
+  metrics_save_freq: 5      # save metrics after every n episodes