diff --git a/.gitignore b/.gitignore
index 57aaafb1a..c726e50d7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -23,6 +23,7 @@ viz/
 datasets/
 scene_datasets/
 datadump/
+video_dir/
 */datasets/
 data/
 
diff --git a/ablation_runner_1.sh b/ablation_runner_1.sh
new file mode 100644
index 000000000..e57ca8fc9
--- /dev/null
+++ b/ablation_runner_1.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+
+python projects/habitat_ovmm/eval_baselines_agent.py --env_config_path projects/habitat_ovmm/configs/env/hssd_eval_v6.yaml habitat.dataset.episode_ids=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,151,182,193]
+python projects/habitat_ovmm/eval_baselines_agent.py --env_config_path projects/habitat_ovmm/configs/env/hssd_eval_v6b.yaml habitat.dataset.episode_ids=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,151,182,193]
+python projects/habitat_ovmm/eval_baselines_agent.py --env_config_path projects/habitat_ovmm/configs/env/hssd_eval_v7.yaml habitat.dataset.episode_ids=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,151,182,193]
diff --git a/ablation_runner_2.sh b/ablation_runner_2.sh
new file mode 100644
index 000000000..f12c7081b
--- /dev/null
+++ b/ablation_runner_2.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+
+python projects/habitat_ovmm/eval_baselines_agent.py --env_config_path projects/habitat_ovmm/configs/env/hssd_eval_v1.yaml habitat.dataset.episode_ids=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,151,182,193]
+python projects/habitat_ovmm/eval_baselines_agent.py --env_config_path projects/habitat_ovmm/configs/env/hssd_eval_v1b.yaml habitat.dataset.episode_ids=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,151,182,193]
+python projects/habitat_ovmm/eval_baselines_agent.py --env_config_path projects/habitat_ovmm/configs/env/hssd_eval_v2.yaml habitat.dataset.episode_ids=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,151,182,193]
+python projects/habitat_ovmm/eval_baselines_agent.py --env_config_path projects/habitat_ovmm/configs/env/hssd_eval_v2b.yaml habitat.dataset.episode_ids=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,151,182,193]
+python projects/habitat_ovmm/eval_baselines_agent.py --env_config_path projects/habitat_ovmm/configs/env/hssd_eval_v3.yaml habitat.dataset.episode_ids=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,151,182,193]
+python projects/habitat_ovmm/eval_baselines_agent.py --env_config_path projects/habitat_ovmm/configs/env/hssd_eval_v3b.yaml habitat.dataset.episode_ids=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,151,182,193]
\ No newline at end of file
diff --git a/ablation_runner_3.sh b/ablation_runner_3.sh
new file mode 100644
index 000000000..24a5873dc
--- /dev/null
+++ b/ablation_runner_3.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+
+python projects/habitat_ovmm/eval_baselines_agent.py --env_config_path projects/habitat_ovmm/configs/env/hssd_eval_v4.yaml habitat.dataset.episode_ids=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,151,182,193]
+python projects/habitat_ovmm/eval_baselines_agent.py --env_config_path projects/habitat_ovmm/configs/env/hssd_eval_v4b.yaml habitat.dataset.episode_ids=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,151,182,193]
+python projects/habitat_ovmm/eval_baselines_agent.py --env_config_path projects/habitat_ovmm/configs/env/hssd_eval_v5.yaml habitat.dataset.episode_ids=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,151,182,193]
+python projects/habitat_ovmm/eval_baselines_agent.py --env_config_path projects/habitat_ovmm/configs/env/hssd_eval_v5b.yaml habitat.dataset.episode_ids=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,151,182,193]
\ No newline at end of file
diff --git a/ablation_runner_4.sh b/ablation_runner_4.sh
new file mode 100644
index 000000000..ca609b658
--- /dev/null
+++ b/ablation_runner_4.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+
+python projects/habitat_ovmm/eval_baselines_agent.py --env_config_path projects/habitat_ovmm/configs/env/hssd_eval_v7b.yaml habitat.dataset.episode_ids=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,151,182,193]
+python projects/habitat_ovmm/eval_baselines_agent.py --env_config_path projects/habitat_ovmm/configs/env/hssd_eval_v6c.yaml habitat.dataset.episode_ids=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,151,182,193]
+python projects/habitat_ovmm/eval_baselines_agent.py --env_config_path projects/habitat_ovmm/configs/env/hssd_eval_v7c.yaml habitat.dataset.episode_ids=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,151,182,193]
\ No newline at end of file
diff --git a/projects/habitat_ovmm/configs/agent/oracle_agent.yaml b/projects/habitat_ovmm/configs/agent/oracle_agent.yaml
new file mode 100644
index 000000000..2301df74e
--- /dev/null
+++ b/projects/habitat_ovmm/configs/agent/oracle_agent.yaml
@@ -0,0 +1,169 @@
+max_steps: 10000        # maximum number of steps before stopping an episode during navigation; a lower value set for habitat episode termination 
+panorama_start: 1       # 1: turn around 360 degrees when starting an episode, 0: don't
+exploration_strategy: seen_frontier  # exploration strategy ("seen_frontier", "been_close_to_frontier")
+radius: 0.05            # robot radius (in meters)
+fall_wait_steps: 100    # number of steps to wait after the object has been dropped
+store_all_categories: False  # whether to store all semantic categories in the map or just task-relevant ones
+detection_module: detic # the detector to use for perception in case ground_truth_semantics are turned off
+SEMANTIC_MAP:
+  semantic_categories: rearrange # map semantic channel categories ("coco_indoor", "longtail_indoor", "mukul_indoor", "rearrange")
+  num_sem_categories: 5    # Following 5 categories: ["misc", "object_category", "start_receptacle", "goal_receptacle", "others"]
+  map_size_cm: 4800        # global map size (in centimeters)
+  map_resolution: 5        # size of map bins (in centimeters)
+  vision_range: 100        # diameter of local map region visible by the agent (in cells)
+  global_downscaling: 2    # ratio of global over local map
+  du_scale: 4              # frame downscaling before projecting to point cloud
+  cat_pred_threshold: 1.0  # number of depth points to be in bin to classify it as a certain semantic category
+  exp_pred_threshold: 1.0  # number of depth points to be in bin to consider it as explored
+  map_pred_threshold: 1.0  # number of depth points to be in bin to consider it as obstacle
+  been_close_to_radius: 100  # radius (in centimeters) of been close to region
+  explored_radius: 150       # radius (in centimeters) of visually explored region
+  must_explore_close: False
+  min_obs_height_cm: 10    # minimum height (in centimeters) of obstacle
+  # erosion and filtering to reduce the number of spurious artifacts
+  dilate_obstacles: True
+  dilate_size: 3
+  dilate_iter: 1
+  record_instance_ids: False
+  max_instances: 0
+
+SKILLS:
+  GAZE_OBJ:
+    type: rl #end_to_end #heuristic #hardcoded
+    checkpoint_path: data/checkpoints/ovmm_baseline_home_robot_challenge_2023/gaze_at_obj.pth
+    rl_config: projects/habitat_ovmm/configs/agent/skills/gaze_rl.yaml # with continuous actions
+    gym_obs_keys:
+      - robot_head_depth
+      - object_embedding
+      - object_segmentation
+      - joint
+      - is_holding
+      - relative_resting_position
+    allowed_actions:
+      - arm_action
+      - base_velocity
+    arm_joint_mask: [0, 0, 0, 0, 0, 0, 1] # the arm joints that the policy can control
+    max_displacement: 0.25                     # used when training the policy
+    max_turn_degrees: 30.0
+    min_turn_degrees: 5.0
+    min_displacement: 0.1
+    sensor_height: 160
+    sensor_width: 120
+    nav_goal_seg_channels: 1
+    terminate_condition: grip
+    grip_threshold: 0.8
+    max_joint_delta: 0.1
+    min_joint_delta: 0.02
+
+  PICK:
+    type: heuristic
+
+  NAV_TO_OBJ:
+    type: oracle  # heuristic or rl or oracle
+    checkpoint_path: data/checkpoints/ovmm_baseline_home_robot_challenge_2023/nav_to_obj.pth
+    rl_config: projects/habitat_ovmm/configs/agent/skills/nav_to_obj_rl.yaml
+    gym_obs_keys:
+      - robot_head_depth
+      - object_embedding
+      - ovmm_nav_goal_segmentation
+      - receptacle_segmentation
+      - start_receptacle
+      - robot_start_gps
+      - robot_start_compass
+      - joint
+    allowed_actions:
+      # - base_velocity
+      # - rearrange_stop
+      - stop
+      - move_forward
+      - turn_left
+      - turn_right
+    arm_joint_mask: [0, 0, 0, 0, 0, 0, 0] # the arm joints that the policy can control
+    max_displacement: 0.25                # used when training the policy; could be different from the eval values
+    max_turn_degrees: 30.0
+    min_turn_degrees: 5.0
+    min_displacement: 0.1
+    sensor_height: 160
+    sensor_width: 120
+    terminate_condition: discrete_stop
+    nav_goal_seg_channels: 2
+
+  NAV_TO_REC:
+    type: oracle  # heuristic or rl or oracle
+    checkpoint_path: data/checkpoints/ovmm_baseline_home_robot_challenge_2023/nav_to_rec.pth
+    rl_config: projects/habitat_ovmm/configs/agent/skills/nav_to_obj_rl.yaml
+    gym_obs_keys:
+      - robot_head_depth
+      - ovmm_nav_goal_segmentation
+      - receptacle_segmentation
+      - goal_receptacle
+      - robot_start_gps
+      - robot_start_compass
+      - joint
+    allowed_actions:
+      # - base_velocity
+      # - rearrange_stop
+      - stop
+      - move_forward
+      - turn_left
+      - turn_right
+    arm_joint_mask: [0, 0, 0, 0, 0, 0, 0] # the arm joints that the policy can control
+    max_displacement: 0.25                # used when training the policy; could be different from the eval values
+    max_turn_degrees: 30.0
+    min_turn_degrees: 5.0
+    min_displacement: 0.1
+    sensor_height: 160
+    sensor_width: 120
+    terminate_condition: discrete_stop
+    nav_goal_seg_channels: 1
+
+
+  PLACE:
+    type: heuristic  # "rl" or "heuristic" or "hardcoded"
+    checkpoint_path: data/checkpoints/ovmm_baseline_home_robot_challenge_2023/place.pth
+    rl_config: projects/habitat_ovmm/configs/agent/skills/place_rl.yaml # with continuous actions
+    gym_obs_keys:
+      - robot_head_depth
+      - goal_receptacle
+      - joint
+      - goal_recep_segmentation
+      - is_holding
+      - object_embedding
+    allowed_actions:
+      - arm_action
+      - base_velocity
+      - manipulation_mode
+    arm_joint_mask: [1, 1, 1, 1, 1, 0, 0] # the arm joints that the policy can control
+    max_displacement: 0.25                     # used when training the policy
+    max_turn_degrees: 30.0
+    min_turn_degrees: 5.0
+    min_displacement: 0.1
+    sensor_height: 160
+    sensor_width: 120
+    nav_goal_seg_channels: 1
+    terminate_condition: ungrip
+    grip_threshold: -0.8
+    manip_mode_threshold: 0.8
+    constraint_base_in_manip_mode: True
+    max_joint_delta: 0.1
+    min_joint_delta: 0.02
+
+skip_skills:
+  nav_to_obj: False
+  nav_to_rec: False
+  gaze_at_obj: True
+  gaze_at_rec: True
+  pick: False
+  place: False
+
+PLANNER:
+  collision_threshold: 0.10       # forward move distance under which we consider there's a collision (in meters)
+  obs_dilation_selem_radius: 3    # radius (in cells) of obstacle dilation structuring element
+  goal_dilation_selem_radius: 10  # radius (in cells) of goal dilation structuring element
+  step_size: 5                    # maximum distance of the short-term goal selected by the planner
+  use_dilation_for_stg: False
+  min_obs_dilation_selem_radius: 1    # radius (in cells) of obstacle dilation structuring element
+  map_downsample_factor: 1            # optional downsampling of traversible and goal map before fmm distance call (1 for no downsampling, 2 for halving resolution)
+  map_update_frequency: 1             # compute fmm distance map every n steps 
+  discrete_actions: True         # discrete motion planner output space or not
+  verbose: False                 # display debug information during planning
diff --git a/projects/habitat_ovmm/configs/env/hssd_eval.yaml b/projects/habitat_ovmm/configs/env/hssd_eval.yaml
index d5fd0ba1d..f7ad77c4d 100644
--- a/projects/habitat_ovmm/configs/env/hssd_eval.yaml
+++ b/projects/habitat_ovmm/configs/env/hssd_eval.yaml
@@ -3,14 +3,27 @@ NUM_ENVIRONMENTS: 1       # number of environments (per agent process)
 DUMP_LOCATION: datadump   # path to dump models and log
 EXP_NAME: eval_hssd       # experiment name
 VISUALIZE: 0              # 1: render observation and predicted semantic map, 0: no visualization
-PRINT_IMAGES: 0           # 1: save visualization as images, 0: no image saving
-GROUND_TRUTH_SEMANTICS: 0 # 1: use ground-truth semantics (for debugging / ablations)
+PRINT_IMAGES: 1           # 1: save visualization as images, 0: no image saving
+GROUND_TRUTH_SEMANTICS: 1 # 1: use ground-truth semantics (for debugging / ablations)
 seed: 0                   # seed
 SHOW_RL_OBS: False         # whether to show the observations passed to RL policices, for debugging
 
+EXPERIMENT: 
+  type: v0_extra_navigation_instructions_here
+  NAV_TO_OBJ:
+    type: oracle
+    segmentation: False
+    fallback: none
+    goal: max_iou_viewpoint
+  NAV_TO_REC:
+    type: oracle
+    segmentation: False
+    fallback: none
+    goal: max_iou_viewpoint
+
 ENVIRONMENT:
-  forward: 0.25           # forward motion (in meters)
-  turn_angle: 30.0        # agent turn angle (in degrees)
+  forward: 0.10           # forward motion (in meters)  Note: changing this should require a corresponding change in the habitat benchmark config
+  turn_angle: 5           # agent turn angle (in degrees)  Note: changing this should require a corresponding change in the habitat benchmark config
   frame_height: 640       # first-person frame height (in pixels)
   frame_width: 480        # first-person frame width (in pixels)
   camera_height: 1.31     # camera sensor height (in metres)
@@ -26,6 +39,6 @@ EVAL_VECTORIZED:
   simulator_gpu_ids: [1, 2, 3, 4, 5, 6, 7] # IDs of GPUs to use for vectorized environments
   split: val                # eval split
   num_episodes_per_env: null # number of eval episodes per environment
-  record_videos: 0          # 1: record videos from printed images, 0: don't
+  record_videos: 1          # 1: record videos from printed images, 0: don't
   record_planner_videos: 0  # 1: record planner videos (if record videos), 0: don't
   metrics_save_freq: 5      # save metrics after every n episodes
diff --git a/projects/habitat_ovmm/configs/env/hssd_eval_v1.yaml b/projects/habitat_ovmm/configs/env/hssd_eval_v1.yaml
new file mode 100644
index 000000000..519d89e7d
--- /dev/null
+++ b/projects/habitat_ovmm/configs/env/hssd_eval_v1.yaml
@@ -0,0 +1,46 @@
+NO_GPU: 0                 # 1: ignore IDs above and run on CPU, 0: run on GPUs with IDs above
+NUM_ENVIRONMENTS: 1       # number of environments (per agent process)
+DUMP_LOCATION: datadump   # path to dump models and log
+EXP_NAME: eval_hssd_v1       # experiment name
+VISUALIZE: 0              # 1: render observation and predicted semantic map, 0: no visualization
+PRINT_IMAGES: 1           # 1: save visualization as images, 0: no image saving
+GROUND_TRUTH_SEMANTICS: 1 # 1: use ground-truth semantics (for debugging / ablations)
+seed: 0                   # seed
+SHOW_RL_OBS: False         # whether to show the observations passed to RL policices, for debugging
+
+EXPERIMENT:
+  type: v1__nav-oracle-nosegmentation__pick-heur-vp__place-heur-vp
+  NAV_TO_OBJ:
+    type: oracle
+    segmentation: False
+    fallback: none
+    goal: max_iou_viewpoint
+    goal_radius: 0.1
+  NAV_TO_REC:
+    type: oracle
+    segmentation: False
+    fallback: none
+    goal: max_iou_viewpoint
+    goal_radius: 0.1
+    
+ENVIRONMENT:
+  forward: 0.10           # forward motion (in meters)  Note: changing this should require a corresponding change in the habitat benchmark config
+  turn_angle: 5           # agent turn angle (in degrees)  Note: changing this should require a corresponding change in the habitat benchmark config
+  frame_height: 640       # first-person frame height (in pixels)
+  frame_width: 480        # first-person frame width (in pixels)
+  camera_height: 1.31     # camera sensor height (in metres)
+  hfov: 42.0              # horizontal field of view (in degrees)
+  min_depth: 0.0          # minimum depth for depth sensor (in metres)
+  max_depth: 10.0         # maximum depth for depth sensor (in metres)
+  num_receptacles: 21
+  category_map_file: projects/real_world_ovmm/configs/example_cat_map.json
+  use_detic_viz: False
+  evaluate_instance_tracking: False # whether to evaluate the built instance map against groundtruth instance ids
+
+EVAL_VECTORIZED:
+  simulator_gpu_ids: [1, 2, 3, 4, 5, 6, 7] # IDs of GPUs to use for vectorized environments
+  split: val                # eval split
+  num_episodes_per_env: null # number of eval episodes per environment
+  record_videos: 1          # 1: record videos from printed images, 0: don't
+  record_planner_videos: 0  # 1: record planner videos (if record videos), 0: don't
+  metrics_save_freq: 5      # save metrics after every n episodes
diff --git a/projects/habitat_ovmm/configs/env/hssd_eval_v1b.yaml b/projects/habitat_ovmm/configs/env/hssd_eval_v1b.yaml
new file mode 100644
index 000000000..c82f2914c
--- /dev/null
+++ b/projects/habitat_ovmm/configs/env/hssd_eval_v1b.yaml
@@ -0,0 +1,46 @@
+NO_GPU: 0                 # 1: ignore IDs above and run on CPU, 0: run on GPUs with IDs above
+NUM_ENVIRONMENTS: 1       # number of environments (per agent process)
+DUMP_LOCATION: datadump   # path to dump models and log
+EXP_NAME: eval_hssd_v1b       # experiment name
+VISUALIZE: 0              # 1: render observation and predicted semantic map, 0: no visualization
+PRINT_IMAGES: 1           # 1: save visualization as images, 0: no image saving
+GROUND_TRUTH_SEMANTICS: 1 # 1: use ground-truth semantics (for debugging / ablations)
+seed: 0                   # seed
+SHOW_RL_OBS: False         # whether to show the observations passed to RL policices, for debugging
+
+EXPERIMENT:
+  type: v1b__nav-oracle-nosegmentation__pick-heur-vp__place-heur-vp
+  NAV_TO_OBJ:
+    type: oracle
+    segmentation: False
+    fallback: none
+    goal: max_iou_viewpoint
+    goal_radius: 0.3
+  NAV_TO_REC:
+    type: oracle
+    segmentation: False
+    fallback: none
+    goal: max_iou_viewpoint
+    goal_radius: 0.3
+    
+ENVIRONMENT:
+  forward: 0.10           # forward motion (in meters)  Note: changing this should require a corresponding change in the habitat benchmark config
+  turn_angle: 5           # agent turn angle (in degrees)  Note: changing this should require a corresponding change in the habitat benchmark config
+  frame_height: 640       # first-person frame height (in pixels)
+  frame_width: 480        # first-person frame width (in pixels)
+  camera_height: 1.31     # camera sensor height (in metres)
+  hfov: 42.0              # horizontal field of view (in degrees)
+  min_depth: 0.0          # minimum depth for depth sensor (in metres)
+  max_depth: 10.0         # maximum depth for depth sensor (in metres)
+  num_receptacles: 21
+  category_map_file: projects/real_world_ovmm/configs/example_cat_map.json
+  use_detic_viz: False
+  evaluate_instance_tracking: False # whether to evaluate the built instance map against groundtruth instance ids
+
+EVAL_VECTORIZED:
+  simulator_gpu_ids: [1, 2, 3, 4, 5, 6, 7] # IDs of GPUs to use for vectorized environments
+  split: val                # eval split
+  num_episodes_per_env: null # number of eval episodes per environment
+  record_videos: 1          # 1: record videos from printed images, 0: don't
+  record_planner_videos: 0  # 1: record planner videos (if record videos), 0: don't
+  metrics_save_freq: 5      # save metrics after every n episodes
diff --git a/projects/habitat_ovmm/configs/env/hssd_eval_v2.yaml b/projects/habitat_ovmm/configs/env/hssd_eval_v2.yaml
new file mode 100644
index 000000000..63277dd09
--- /dev/null
+++ b/projects/habitat_ovmm/configs/env/hssd_eval_v2.yaml
@@ -0,0 +1,46 @@
+NO_GPU: 0                 # 1: ignore IDs above and run on CPU, 0: run on GPUs with IDs above
+NUM_ENVIRONMENTS: 1       # number of environments (per agent process)
+DUMP_LOCATION: datadump   # path to dump models and log
+EXP_NAME: eval_hssd_v2       # experiment name
+VISUALIZE: 0              # 1: render observation and predicted semantic map, 0: no visualization
+PRINT_IMAGES: 1           # 1: save visualization as images, 0: no image saving
+GROUND_TRUTH_SEMANTICS: 1 # 1: use ground-truth semantics (for debugging / ablations)
+seed: 0                   # seed
+SHOW_RL_OBS: False         # whether to show the observations passed to RL policices, for debugging
+
+EXPERIMENT: 
+  type: v2__nav-oracle-nosegmentation-heurnearobject__pick-heur-vp__place-heur-vp
+  NAV_TO_OBJ:
+    type: oracle
+    segmentation: False
+    fallback: heuristic_nav
+    goal: max_iou_viewpoint
+    goal_radius: 0.1
+  NAV_TO_REC:
+    type: oracle
+    segmentation: False
+    fallback: none
+    goal: max_iou_viewpoint
+    goal_radius: 0.1
+
+ENVIRONMENT:
+  forward: 0.10           # forward motion (in meters)  Note: changing this should require a corresponding change in the habitat benchmark config
+  turn_angle: 5           # agent turn angle (in degrees)  Note: changing this should require a corresponding change in the habitat benchmark config
+  frame_height: 640       # first-person frame height (in pixels)
+  frame_width: 480        # first-person frame width (in pixels)
+  camera_height: 1.31     # camera sensor height (in metres)
+  hfov: 42.0              # horizontal field of view (in degrees)
+  min_depth: 0.0          # minimum depth for depth sensor (in metres)
+  max_depth: 10.0         # maximum depth for depth sensor (in metres)
+  num_receptacles: 21
+  category_map_file: projects/real_world_ovmm/configs/example_cat_map.json
+  use_detic_viz: False
+  evaluate_instance_tracking: False # whether to evaluate the built instance map against groundtruth instance ids
+
+EVAL_VECTORIZED:
+  simulator_gpu_ids: [1, 2, 3, 4, 5, 6, 7] # IDs of GPUs to use for vectorized environments
+  split: val                # eval split
+  num_episodes_per_env: null # number of eval episodes per environment
+  record_videos: 1          # 1: record videos from printed images, 0: don't
+  record_planner_videos: 0  # 1: record planner videos (if record videos), 0: don't
+  metrics_save_freq: 5      # save metrics after every n episodes
diff --git a/projects/habitat_ovmm/configs/env/hssd_eval_v2b.yaml b/projects/habitat_ovmm/configs/env/hssd_eval_v2b.yaml
new file mode 100644
index 000000000..86c9eec82
--- /dev/null
+++ b/projects/habitat_ovmm/configs/env/hssd_eval_v2b.yaml
@@ -0,0 +1,46 @@
+NO_GPU: 0                 # 1: ignore IDs above and run on CPU, 0: run on GPUs with IDs above
+NUM_ENVIRONMENTS: 1       # number of environments (per agent process)
+DUMP_LOCATION: datadump   # path to dump models and log
+EXP_NAME: eval_hssd_v2b       # experiment name
+VISUALIZE: 0              # 1: render observation and predicted semantic map, 0: no visualization
+PRINT_IMAGES: 1           # 1: save visualization as images, 0: no image saving
+GROUND_TRUTH_SEMANTICS: 1 # 1: use ground-truth semantics (for debugging / ablations)
+seed: 0                   # seed
+SHOW_RL_OBS: False         # whether to show the observations passed to RL policices, for debugging
+
+EXPERIMENT: 
+  type: v2b__nav-oracle-nosegmentation-heurnearobject__pick-heur-vp__place-heur-vp
+  NAV_TO_OBJ:
+    type: oracle
+    segmentation: False
+    fallback: heuristic_nav
+    goal: max_iou_viewpoint
+    goal_radius: 0.3
+  NAV_TO_REC:
+    type: oracle
+    segmentation: False
+    fallback: none
+    goal: max_iou_viewpoint
+    goal_radius: 0.3
+
+ENVIRONMENT:
+  forward: 0.10           # forward motion (in meters)  Note: changing this should require a corresponding change in the habitat benchmark config
+  turn_angle: 5           # agent turn angle (in degrees)  Note: changing this should require a corresponding change in the habitat benchmark config
+  frame_height: 640       # first-person frame height (in pixels)
+  frame_width: 480        # first-person frame width (in pixels)
+  camera_height: 1.31     # camera sensor height (in metres)
+  hfov: 42.0              # horizontal field of view (in degrees)
+  min_depth: 0.0          # minimum depth for depth sensor (in metres)
+  max_depth: 10.0         # maximum depth for depth sensor (in metres)
+  num_receptacles: 21
+  category_map_file: projects/real_world_ovmm/configs/example_cat_map.json
+  use_detic_viz: False
+  evaluate_instance_tracking: False # whether to evaluate the built instance map against groundtruth instance ids
+
+EVAL_VECTORIZED:
+  simulator_gpu_ids: [1, 2, 3, 4, 5, 6, 7] # IDs of GPUs to use for vectorized environments
+  split: val                # eval split
+  num_episodes_per_env: null # number of eval episodes per environment
+  record_videos: 1          # 1: record videos from printed images, 0: don't
+  record_planner_videos: 0  # 1: record planner videos (if record videos), 0: don't
+  metrics_save_freq: 5      # save metrics after every n episodes
diff --git a/projects/habitat_ovmm/configs/env/hssd_eval_v3.yaml b/projects/habitat_ovmm/configs/env/hssd_eval_v3.yaml
new file mode 100644
index 000000000..9f4121329
--- /dev/null
+++ b/projects/habitat_ovmm/configs/env/hssd_eval_v3.yaml
@@ -0,0 +1,47 @@
+NO_GPU: 0                 # 1: ignore IDs above and run on CPU, 0: run on GPUs with IDs above
+NUM_ENVIRONMENTS: 1       # number of environments (per agent process)
+DUMP_LOCATION: datadump   # path to dump models and log
+EXP_NAME: eval_hssd_v3       # experiment name
+VISUALIZE: 0              # 1: render observation and predicted semantic map, 0: no visualization
+PRINT_IMAGES: 1           # 1: save visualization as images, 0: no image saving
+GROUND_TRUTH_SEMANTICS: 1 # 1: use ground-truth semantics (for debugging / ablations)
+seed: 0                   # seed
+SHOW_RL_OBS: False         # whether to show the observations passed to RL policices, for debugging
+# experiment_type: v1__nav-oracle-nosegmentation__pick-heur-vp__place-heur-vp
+# experiment_type: v2__nav-oracle-nosegmentation-heurnearobject__pick-heur-vp__place-heur-vp
+EXPERIMENT: 
+  type: v3__nav-oracle-withsegmentation-heurnearobject__pick-heur-vp__place-heur-vp
+  NAV_TO_OBJ:
+    type: oracle
+    segmentation: True
+    fallback: heuristic_nav
+    goal: max_iou_viewpoint
+    goal_radius: 0.1
+  NAV_TO_REC:
+    type: oracle
+    segmentation: True
+    fallback: none
+    goal: max_iou_viewpoint
+    goal_radius: 0.1
+
+ENVIRONMENT:
+  forward: 0.10           # forward motion (in meters)  Note: changing this should require a corresponding change in the habitat benchmark config
+  turn_angle: 5           # agent turn angle (in degrees)  Note: changing this should require a corresponding change in the habitat benchmark config
+  frame_height: 640       # first-person frame height (in pixels)
+  frame_width: 480        # first-person frame width (in pixels)
+  camera_height: 1.31     # camera sensor height (in metres)
+  hfov: 42.0              # horizontal field of view (in degrees)
+  min_depth: 0.0          # minimum depth for depth sensor (in metres)
+  max_depth: 10.0         # maximum depth for depth sensor (in metres)
+  num_receptacles: 21
+  category_map_file: projects/real_world_ovmm/configs/example_cat_map.json
+  use_detic_viz: False
+  evaluate_instance_tracking: False # whether to evaluate the built instance map against groundtruth instance ids
+
+EVAL_VECTORIZED:
+  simulator_gpu_ids: [1, 2, 3, 4, 5, 6, 7] # IDs of GPUs to use for vectorized environments
+  split: val                # eval split
+  num_episodes_per_env: null # number of eval episodes per environment
+  record_videos: 1          # 1: record videos from printed images, 0: don't
+  record_planner_videos: 0  # 1: record planner videos (if record videos), 0: don't
+  metrics_save_freq: 5      # save metrics after every n episodes
diff --git a/projects/habitat_ovmm/configs/env/hssd_eval_v3b.yaml b/projects/habitat_ovmm/configs/env/hssd_eval_v3b.yaml
new file mode 100644
index 000000000..de5fc3fd0
--- /dev/null
+++ b/projects/habitat_ovmm/configs/env/hssd_eval_v3b.yaml
@@ -0,0 +1,47 @@
+NO_GPU: 0                 # 1: ignore IDs above and run on CPU, 0: run on GPUs with IDs above
+NUM_ENVIRONMENTS: 1       # number of environments (per agent process)
+DUMP_LOCATION: datadump   # path to dump models and log
+EXP_NAME: eval_hssd_v3b       # experiment name
+VISUALIZE: 0              # 1: render observation and predicted semantic map, 0: no visualization
+PRINT_IMAGES: 1           # 1: save visualization as images, 0: no image saving
+GROUND_TRUTH_SEMANTICS: 1 # 1: use ground-truth semantics (for debugging / ablations)
+seed: 0                   # seed
+SHOW_RL_OBS: False         # whether to show the observations passed to RL policices, for debugging
+# experiment_type: v1__nav-oracle-nosegmentation__pick-heur-vp__place-heur-vp
+# experiment_type: v2__nav-oracle-nosegmentation-heurnearobject__pick-heur-vp__place-heur-vp
+EXPERIMENT: 
+  type: v3b__nav-oracle-withsegmentation-heurnearobject__pick-heur-vp__place-heur-vp
+  NAV_TO_OBJ:
+    type: oracle
+    segmentation: True
+    fallback: heuristic_nav
+    goal: max_iou_viewpoint
+    goal_radius: 0.1
+  NAV_TO_REC:
+    type: oracle
+    segmentation: True
+    fallback: none
+    goal: max_iou_viewpoint
+    goal_radius: 0.1
+
+ENVIRONMENT:
+  forward: 0.10           # forward motion (in meters)  Note: changing this should require a corresponding change in the habitat benchmark config
+  turn_angle: 5           # agent turn angle (in degrees)  Note: changing this should require a corresponding change in the habitat benchmark config
+  frame_height: 640       # first-person frame height (in pixels)
+  frame_width: 480        # first-person frame width (in pixels)
+  camera_height: 1.31     # camera sensor height (in metres)
+  hfov: 42.0              # horizontal field of view (in degrees)
+  min_depth: 0.0          # minimum depth for depth sensor (in metres)
+  max_depth: 10.0         # maximum depth for depth sensor (in metres)
+  num_receptacles: 21
+  category_map_file: projects/real_world_ovmm/configs/example_cat_map.json
+  use_detic_viz: False
+  evaluate_instance_tracking: False # whether to evaluate the built instance map against groundtruth instance ids
+
+EVAL_VECTORIZED:
+  simulator_gpu_ids: [1, 2, 3, 4, 5, 6, 7] # IDs of GPUs to use for vectorized environments
+  split: val                # eval split
+  num_episodes_per_env: null # number of eval episodes per environment
+  record_videos: 1          # 1: record videos from printed images, 0: don't
+  record_planner_videos: 0  # 1: record planner videos (if record videos), 0: don't
+  metrics_save_freq: 5      # save metrics after every n episodes
diff --git a/projects/habitat_ovmm/configs/env/hssd_eval_v4.yaml b/projects/habitat_ovmm/configs/env/hssd_eval_v4.yaml
new file mode 100644
index 000000000..28e04cd31
--- /dev/null
+++ b/projects/habitat_ovmm/configs/env/hssd_eval_v4.yaml
@@ -0,0 +1,46 @@
+NO_GPU: 0                 # 1: ignore IDs above and run on CPU, 0: run on GPUs with IDs above
+NUM_ENVIRONMENTS: 1       # number of environments (per agent process)
+DUMP_LOCATION: datadump   # path to dump models and log
+EXP_NAME: eval_hssd_v4       # experiment name
+VISUALIZE: 0              # 1: render observation and predicted semantic map, 0: no visualization
+PRINT_IMAGES: 1           # 1: save visualization as images, 0: no image saving
+GROUND_TRUTH_SEMANTICS: 1 # 1: use ground-truth semantics (for debugging / ablations)
+seed: 0                   # seed
+SHOW_RL_OBS: False         # whether to show the observations passed to RL policices, for debugging
+
+EXPERIMENT: 
+  type: v4__nav-oracle-withsegmentation-heurnearobject__pick-heur-vp__place-heur-exact
+  NAV_TO_OBJ:
+    type: oracle
+    segmentation: True
+    fallback: heuristic_nav
+    goal: max_iou_viewpoint
+    goal_radius: 0.1
+  NAV_TO_REC:
+    type: oracle
+    segmentation: True
+    fallback: none
+    goal: exact
+    goal_radius: 0.1
+
+ENVIRONMENT:
+  forward: 0.10           # forward motion (in meters)  Note: changing this should require a corresponding change in the habitat benchmark config
+  turn_angle: 5           # agent turn angle (in degrees)  Note: changing this should require a corresponding change in the habitat benchmark config
+  frame_height: 640       # first-person frame height (in pixels)
+  frame_width: 480        # first-person frame width (in pixels)
+  camera_height: 1.31     # camera sensor height (in metres)
+  hfov: 42.0              # horizontal field of view (in degrees)
+  min_depth: 0.0          # minimum depth for depth sensor (in metres)
+  max_depth: 10.0         # maximum depth for depth sensor (in metres)
+  num_receptacles: 21
+  category_map_file: projects/real_world_ovmm/configs/example_cat_map.json
+  use_detic_viz: False
+  evaluate_instance_tracking: False # whether to evaluate the built instance map against groundtruth instance ids
+
+EVAL_VECTORIZED:
+  simulator_gpu_ids: [1, 2, 3, 4, 5, 6, 7] # IDs of GPUs to use for vectorized environments
+  split: val                # eval split
+  num_episodes_per_env: null # number of eval episodes per environment
+  record_videos: 1          # 1: record videos from printed images, 0: don't
+  record_planner_videos: 0  # 1: record planner videos (if record videos), 0: don't
+  metrics_save_freq: 5      # save metrics after every n episodes
diff --git a/projects/habitat_ovmm/configs/env/hssd_eval_v4b.yaml b/projects/habitat_ovmm/configs/env/hssd_eval_v4b.yaml
new file mode 100644
index 000000000..b9f309fff
--- /dev/null
+++ b/projects/habitat_ovmm/configs/env/hssd_eval_v4b.yaml
@@ -0,0 +1,46 @@
+NO_GPU: 0                 # 1: ignore IDs above and run on CPU, 0: run on GPUs with IDs above
+NUM_ENVIRONMENTS: 1       # number of environments (per agent process)
+DUMP_LOCATION: datadump   # path to dump models and log
+EXP_NAME: eval_hssd_v4b       # experiment name
+VISUALIZE: 0              # 1: render observation and predicted semantic map, 0: no visualization
+PRINT_IMAGES: 1           # 1: save visualization as images, 0: no image saving
+GROUND_TRUTH_SEMANTICS: 1 # 1: use ground-truth semantics (for debugging / ablations)
+seed: 0                   # seed
+SHOW_RL_OBS: False         # whether to show the observations passed to RL policices, for debugging
+
+EXPERIMENT: 
+  type: v4b__nav-oracle-withsegmentation-heurnearobject__pick-heur-vp__place-heur-exact
+  NAV_TO_OBJ:
+    type: oracle
+    segmentation: True
+    fallback: heuristic_nav
+    goal: max_iou_viewpoint
+    goal_radius: 0.1
+  NAV_TO_REC:
+    type: oracle
+    segmentation: True
+    fallback: none
+    goal: exact
+    goal_radius: 0.1
+
+ENVIRONMENT:
+  forward: 0.10           # forward motion (in meters)  Note: changing this should require a corresponding change in the habitat benchmark config
+  turn_angle: 5           # agent turn angle (in degrees)  Note: changing this should require a corresponding change in the habitat benchmark config
+  frame_height: 640       # first-person frame height (in pixels)
+  frame_width: 480        # first-person frame width (in pixels)
+  camera_height: 1.31     # camera sensor height (in metres)
+  hfov: 42.0              # horizontal field of view (in degrees)
+  min_depth: 0.0          # minimum depth for depth sensor (in metres)
+  max_depth: 10.0         # maximum depth for depth sensor (in metres)
+  num_receptacles: 21
+  category_map_file: projects/real_world_ovmm/configs/example_cat_map.json
+  use_detic_viz: False
+  evaluate_instance_tracking: False # whether to evaluate the built instance map against groundtruth instance ids
+
+EVAL_VECTORIZED:
+  simulator_gpu_ids: [1, 2, 3, 4, 5, 6, 7] # IDs of GPUs to use for vectorized environments
+  split: val                # eval split
+  num_episodes_per_env: null # number of eval episodes per environment
+  record_videos: 1          # 1: record videos from printed images, 0: don't
+  record_planner_videos: 0  # 1: record planner videos (if record videos), 0: don't
+  metrics_save_freq: 5      # save metrics after every n episodes
diff --git a/projects/habitat_ovmm/configs/env/hssd_eval_v5.yaml b/projects/habitat_ovmm/configs/env/hssd_eval_v5.yaml
new file mode 100644
index 000000000..577e50825
--- /dev/null
+++ b/projects/habitat_ovmm/configs/env/hssd_eval_v5.yaml
@@ -0,0 +1,46 @@
+NO_GPU: 0                 # 1: ignore IDs above and run on CPU, 0: run on GPUs with IDs above
+NUM_ENVIRONMENTS: 1       # number of environments (per agent process)
+DUMP_LOCATION: datadump   # path to dump models and log
+EXP_NAME: eval_hssd_v5       # experiment name
+VISUALIZE: 0              # 1: render observation and predicted semantic map, 0: no visualization
+PRINT_IMAGES: 1           # 1: save visualization as images, 0: no image saving
+GROUND_TRUTH_SEMANTICS: 1 # 1: use ground-truth semantics (for debugging / ablations)
+seed: 0                   # seed
+SHOW_RL_OBS: False         # whether to show the observations passed to RL policices, for debugging
+
+EXPERIMENT: 
+  type: v5__nav-oracle-nosegmentation-heurnearobject__pick-heur-vp__place-heur-exact
+  NAV_TO_OBJ:
+    type: oracle
+    segmentation: False
+    fallback: heuristic_nav
+    goal: max_iou_viewpoint
+    goal_radius: 0.1
+  NAV_TO_REC:
+    type: oracle
+    segmentation: False
+    fallback: none
+    goal: exact
+    goal_radius: 0.1
+
+ENVIRONMENT:
+  forward: 0.10           # forward motion (in meters)  Note: changing this should require a corresponding change in the habitat benchmark config
+  turn_angle: 5           # agent turn angle (in degrees)  Note: changing this should require a corresponding change in the habitat benchmark config
+  frame_height: 640       # first-person frame height (in pixels)
+  frame_width: 480        # first-person frame width (in pixels)
+  camera_height: 1.31     # camera sensor height (in metres)
+  hfov: 42.0              # horizontal field of view (in degrees)
+  min_depth: 0.0          # minimum depth for depth sensor (in metres)
+  max_depth: 10.0         # maximum depth for depth sensor (in metres)
+  num_receptacles: 21
+  category_map_file: projects/real_world_ovmm/configs/example_cat_map.json
+  use_detic_viz: False
+  evaluate_instance_tracking: False # whether to evaluate the built instance map against groundtruth instance ids
+
+EVAL_VECTORIZED:
+  simulator_gpu_ids: [1, 2, 3, 4, 5, 6, 7] # IDs of GPUs to use for vectorized environments
+  split: val                # eval split
+  num_episodes_per_env: null # number of eval episodes per environment
+  record_videos: 1          # 1: record videos from printed images, 0: don't
+  record_planner_videos: 0  # 1: record planner videos (if record videos), 0: don't
+  metrics_save_freq: 5      # save metrics after every n episodes
diff --git a/projects/habitat_ovmm/configs/env/hssd_eval_v5b.yaml b/projects/habitat_ovmm/configs/env/hssd_eval_v5b.yaml
new file mode 100644
index 000000000..534a44181
--- /dev/null
+++ b/projects/habitat_ovmm/configs/env/hssd_eval_v5b.yaml
@@ -0,0 +1,46 @@
+NO_GPU: 0                 # 1: ignore IDs above and run on CPU, 0: run on GPUs with IDs above
+NUM_ENVIRONMENTS: 1       # number of environments (per agent process)
+DUMP_LOCATION: datadump   # path to dump models and log
+EXP_NAME: eval_hssd_v5b       # experiment name
+VISUALIZE: 0              # 1: render observation and predicted semantic map, 0: no visualization
+PRINT_IMAGES: 1           # 1: save visualization as images, 0: no image saving
+GROUND_TRUTH_SEMANTICS: 1 # 1: use ground-truth semantics (for debugging / ablations)
+seed: 0                   # seed
+SHOW_RL_OBS: False         # whether to show the observations passed to RL policices, for debugging
+
+EXPERIMENT: 
+  type: v5b__nav-oracle-nosegmentation-heurnearobject__pick-heur-vp__place-heur-exact
+  NAV_TO_OBJ:
+    type: oracle
+    segmentation: False
+    fallback: heuristic_nav
+    goal: max_iou_viewpoint
+    goal_radius: 0.3
+  NAV_TO_REC:
+    type: oracle
+    segmentation: False
+    fallback: none
+    goal: exact
+    goal_radius: 0.3
+
+ENVIRONMENT:
+  forward: 0.10           # forward motion (in meters)  Note: changing this should require a corresponding change in the habitat benchmark config
+  turn_angle: 5           # agent turn angle (in degrees)  Note: changing this should require a corresponding change in the habitat benchmark config
+  frame_height: 640       # first-person frame height (in pixels)
+  frame_width: 480        # first-person frame width (in pixels)
+  camera_height: 1.31     # camera sensor height (in metres)
+  hfov: 42.0              # horizontal field of view (in degrees)
+  min_depth: 0.0          # minimum depth for depth sensor (in metres)
+  max_depth: 10.0         # maximum depth for depth sensor (in metres)
+  num_receptacles: 21
+  category_map_file: projects/real_world_ovmm/configs/example_cat_map.json
+  use_detic_viz: False
+  evaluate_instance_tracking: False # whether to evaluate the built instance map against groundtruth instance ids
+
+EVAL_VECTORIZED:
+  simulator_gpu_ids: [1, 2, 3, 4, 5, 6, 7] # IDs of GPUs to use for vectorized environments
+  split: val                # eval split
+  num_episodes_per_env: null # number of eval episodes per environment
+  record_videos: 1          # 1: record videos from printed images, 0: don't
+  record_planner_videos: 0  # 1: record planner videos (if record videos), 0: don't
+  metrics_save_freq: 5      # save metrics after every n episodes
diff --git a/projects/habitat_ovmm/configs/env/hssd_eval_v6.yaml b/projects/habitat_ovmm/configs/env/hssd_eval_v6.yaml
new file mode 100644
index 000000000..478f8fb4a
--- /dev/null
+++ b/projects/habitat_ovmm/configs/env/hssd_eval_v6.yaml
@@ -0,0 +1,46 @@
+NO_GPU: 0                 # 1: ignore IDs above and run on CPU, 0: run on GPUs with IDs above
+NUM_ENVIRONMENTS: 1       # number of environments (per agent process)
+DUMP_LOCATION: datadump   # path to dump models and log
+EXP_NAME: eval_hssd_v6       # experiment name
+VISUALIZE: 0              # 1: render observation and predicted semantic map, 0: no visualization
+PRINT_IMAGES: 1           # 1: save visualization as images, 0: no image saving
+GROUND_TRUTH_SEMANTICS: 1 # 1: use ground-truth semantics (for debugging / ablations)
+seed: 0                   # seed
+SHOW_RL_OBS: False         # whether to show the observations passed to RL policices, for debugging
+
+EXPERIMENT: 
+  type: v6__nav-oracle-nosegmentation-heurnearobject__pick-heur-exact__place-heur-exact
+  NAV_TO_OBJ:
+    type: oracle
+    segmentation: False
+    fallback: heuristic_nav
+    goal: exact
+    goal_radius: 0.1
+  NAV_TO_REC:
+    type: oracle
+    segmentation: False
+    fallback: none
+    goal: exact
+    goal_radius: 0.1
+
+ENVIRONMENT:
+  forward: 0.10           # forward motion (in meters)  Note: changing this should require a corresponding change in the habitat benchmark config
+  turn_angle: 5           # agent turn angle (in degrees)  Note: changing this should require a corresponding change in the habitat benchmark config
+  frame_height: 640       # first-person frame height (in pixels)
+  frame_width: 480        # first-person frame width (in pixels)
+  camera_height: 1.31     # camera sensor height (in metres)
+  hfov: 42.0              # horizontal field of view (in degrees)
+  min_depth: 0.0          # minimum depth for depth sensor (in metres)
+  max_depth: 10.0         # maximum depth for depth sensor (in metres)
+  num_receptacles: 21
+  category_map_file: projects/real_world_ovmm/configs/example_cat_map.json
+  use_detic_viz: False
+  evaluate_instance_tracking: False # whether to evaluate the built instance map against groundtruth instance ids
+
+EVAL_VECTORIZED:
+  simulator_gpu_ids: [1, 2, 3, 4, 5, 6, 7] # IDs of GPUs to use for vectorized environments
+  split: val                # eval split
+  num_episodes_per_env: null # number of eval episodes per environment
+  record_videos: 1          # 1: record videos from printed images, 0: don't
+  record_planner_videos: 0  # 1: record planner videos (if record videos), 0: don't
+  metrics_save_freq: 5      # save metrics after every n episodes
diff --git a/projects/habitat_ovmm/configs/env/hssd_eval_v6b.yaml b/projects/habitat_ovmm/configs/env/hssd_eval_v6b.yaml
new file mode 100644
index 000000000..a2a8c5a88
--- /dev/null
+++ b/projects/habitat_ovmm/configs/env/hssd_eval_v6b.yaml
@@ -0,0 +1,46 @@
+NO_GPU: 0                 # 1: ignore IDs above and run on CPU, 0: run on GPUs with IDs above
+NUM_ENVIRONMENTS: 1       # number of environments (per agent process)
+DUMP_LOCATION: datadump   # path to dump models and log
+EXP_NAME: eval_hssd_v6b      # experiment name
+VISUALIZE: 0              # 1: render observation and predicted semantic map, 0: no visualization
+PRINT_IMAGES: 1           # 1: save visualization as images, 0: no image saving
+GROUND_TRUTH_SEMANTICS: 1 # 1: use ground-truth semantics (for debugging / ablations)
+seed: 0                   # seed
+SHOW_RL_OBS: False         # whether to show the observations passed to RL policices, for debugging
+
+EXPERIMENT: 
+  type: v6b__nav-oracle-nosegmentation-heurnearobject__pick-heur-exact__place-heur-exact
+  NAV_TO_OBJ:
+    type: oracle
+    segmentation: False
+    fallback: heuristic_nav
+    goal: exact
+    goal_radius: 0.3
+  NAV_TO_REC:
+    type: oracle
+    segmentation: False
+    fallback: none
+    goal: exact
+    goal_radius: 0.3
+
+ENVIRONMENT:
+  forward: 0.10           # forward motion (in meters)  Note: changing this should require a corresponding change in the habitat benchmark config
+  turn_angle: 5           # agent turn angle (in degrees)  Note: changing this should require a corresponding change in the habitat benchmark config
+  frame_height: 640       # first-person frame height (in pixels)
+  frame_width: 480        # first-person frame width (in pixels)
+  camera_height: 1.31     # camera sensor height (in metres)
+  hfov: 42.0              # horizontal field of view (in degrees)
+  min_depth: 0.0          # minimum depth for depth sensor (in metres)
+  max_depth: 10.0         # maximum depth for depth sensor (in metres)
+  num_receptacles: 21
+  category_map_file: projects/real_world_ovmm/configs/example_cat_map.json
+  use_detic_viz: False
+  evaluate_instance_tracking: False # whether to evaluate the built instance map against groundtruth instance ids
+
+EVAL_VECTORIZED:
+  simulator_gpu_ids: [1, 2, 3, 4, 5, 6, 7] # IDs of GPUs to use for vectorized environments
+  split: val                # eval split
+  num_episodes_per_env: null # number of eval episodes per environment
+  record_videos: 1          # 1: record videos from printed images, 0: don't
+  record_planner_videos: 0  # 1: record planner videos (if record videos), 0: don't
+  metrics_save_freq: 5      # save metrics after every n episodes
diff --git a/projects/habitat_ovmm/configs/env/hssd_eval_v6c.yaml b/projects/habitat_ovmm/configs/env/hssd_eval_v6c.yaml
new file mode 100644
index 000000000..8eeb7d503
--- /dev/null
+++ b/projects/habitat_ovmm/configs/env/hssd_eval_v6c.yaml
@@ -0,0 +1,46 @@
+NO_GPU: 0                 # 1: ignore IDs above and run on CPU, 0: run on GPUs with IDs above
+NUM_ENVIRONMENTS: 1       # number of environments (per agent process)
+DUMP_LOCATION: datadump   # path to dump models and log
+EXP_NAME: eval_hssd_v6c      # experiment name
+VISUALIZE: 0              # 1: render observation and predicted semantic map, 0: no visualization
+PRINT_IMAGES: 1           # 1: save visualization as images, 0: no image saving
+GROUND_TRUTH_SEMANTICS: 1 # 1: use ground-truth semantics (for debugging / ablations)
+seed: 0                   # seed
+SHOW_RL_OBS: False         # whether to show the observations passed to RL policices, for debugging
+
+EXPERIMENT: 
+  type: v6c__nav-oracle-nosegmentation-heurnearobject__pick-heur-exact__place-heur-exact
+  NAV_TO_OBJ:
+    type: oracle
+    segmentation: False
+    fallback: heuristic_nav
+    goal: exact
+    goal_radius: 0.5
+  NAV_TO_REC:
+    type: oracle
+    segmentation: False
+    fallback: none
+    goal: exact
+    goal_radius: 0.5
+
+ENVIRONMENT:
+  forward: 0.10           # forward motion (in meters)  Note: changing this should require a corresponding change in the habitat benchmark config
+  turn_angle: 5           # agent turn angle (in degrees)  Note: changing this should require a corresponding change in the habitat benchmark config
+  frame_height: 640       # first-person frame height (in pixels)
+  frame_width: 480        # first-person frame width (in pixels)
+  camera_height: 1.31     # camera sensor height (in metres)
+  hfov: 42.0              # horizontal field of view (in degrees)
+  min_depth: 0.0          # minimum depth for depth sensor (in metres)
+  max_depth: 10.0         # maximum depth for depth sensor (in metres)
+  num_receptacles: 21
+  category_map_file: projects/real_world_ovmm/configs/example_cat_map.json
+  use_detic_viz: False
+  evaluate_instance_tracking: False # whether to evaluate the built instance map against groundtruth instance ids
+
+EVAL_VECTORIZED:
+  simulator_gpu_ids: [1, 2, 3, 4, 5, 6, 7] # IDs of GPUs to use for vectorized environments
+  split: val                # eval split
+  num_episodes_per_env: null # number of eval episodes per environment
+  record_videos: 1          # 1: record videos from printed images, 0: don't
+  record_planner_videos: 0  # 1: record planner videos (if record videos), 0: don't
+  metrics_save_freq: 5      # save metrics after every n episodes
diff --git a/projects/habitat_ovmm/configs/env/hssd_eval_v7.yaml b/projects/habitat_ovmm/configs/env/hssd_eval_v7.yaml
new file mode 100644
index 000000000..0e497ccc7
--- /dev/null
+++ b/projects/habitat_ovmm/configs/env/hssd_eval_v7.yaml
@@ -0,0 +1,46 @@
+NO_GPU: 0                 # 1: ignore IDs above and run on CPU, 0: run on GPUs with IDs above
+NUM_ENVIRONMENTS: 1       # number of environments (per agent process)
+DUMP_LOCATION: datadump   # path to dump models and log
+EXP_NAME: eval_hssd_v7       # experiment name
+VISUALIZE: 0              # 1: render observation and predicted semantic map, 0: no visualization
+PRINT_IMAGES: 1           # 1: save visualization as images, 0: no image saving
+GROUND_TRUTH_SEMANTICS: 1 # 1: use ground-truth semantics (for debugging / ablations)
+seed: 0                   # seed
+SHOW_RL_OBS: False         # whether to show the observations passed to RL policices, for debugging
+
+EXPERIMENT: 
+  type: v7__nav-oracle-nosegmentation-heurnearobject__pick-heur-vpthenexact__place-heur-vpthenexact
+  NAV_TO_OBJ:
+    type: oracle
+    segmentation: False
+    fallback: heuristic_nav
+    goal: vp_then_exact
+    goal_radius: 0.1
+  NAV_TO_REC:
+    type: oracle
+    segmentation: False
+    fallback: none
+    goal: vp_then_exact
+    goal_radius: 0.1
+
+ENVIRONMENT:
+  forward: 0.10           # forward motion (in meters)  Note: changing this should require a corresponding change in the habitat benchmark config
+  turn_angle: 5           # agent turn angle (in degrees)  Note: changing this should require a corresponding change in the habitat benchmark config
+  frame_height: 640       # first-person frame height (in pixels)
+  frame_width: 480        # first-person frame width (in pixels)
+  camera_height: 1.31     # camera sensor height (in metres)
+  hfov: 42.0              # horizontal field of view (in degrees)
+  min_depth: 0.0          # minimum depth for depth sensor (in metres)
+  max_depth: 10.0         # maximum depth for depth sensor (in metres)
+  num_receptacles: 21
+  category_map_file: projects/real_world_ovmm/configs/example_cat_map.json
+  use_detic_viz: False
+  evaluate_instance_tracking: False # whether to evaluate the built instance map against groundtruth instance ids
+
+EVAL_VECTORIZED:
+  simulator_gpu_ids: [1, 2, 3, 4, 5, 6, 7] # IDs of GPUs to use for vectorized environments
+  split: val                # eval split
+  num_episodes_per_env: null # number of eval episodes per environment
+  record_videos: 1          # 1: record videos from printed images, 0: don't
+  record_planner_videos: 0  # 1: record planner videos (if record videos), 0: don't
+  metrics_save_freq: 5      # save metrics after every n episodes
diff --git a/projects/habitat_ovmm/configs/env/hssd_eval_v7b.yaml b/projects/habitat_ovmm/configs/env/hssd_eval_v7b.yaml
new file mode 100644
index 000000000..a1d82aca4
--- /dev/null
+++ b/projects/habitat_ovmm/configs/env/hssd_eval_v7b.yaml
@@ -0,0 +1,46 @@
+NO_GPU: 0                 # 1: ignore IDs above and run on CPU, 0: run on GPUs with IDs above
+NUM_ENVIRONMENTS: 1       # number of environments (per agent process)
+DUMP_LOCATION: datadump   # path to dump models and log
+EXP_NAME: eval_hssd_v7b       # experiment name
+VISUALIZE: 0              # 1: render observation and predicted semantic map, 0: no visualization
+PRINT_IMAGES: 1           # 1: save visualization as images, 0: no image saving
+GROUND_TRUTH_SEMANTICS: 1 # 1: use ground-truth semantics (for debugging / ablations)
+seed: 0                   # seed
+SHOW_RL_OBS: False         # whether to show the observations passed to RL policices, for debugging
+
+EXPERIMENT: 
+  type: v7b__nav-oracle-nosegmentation-heurnearobject__pick-heur-vpthenexact__place-heur-vpthenexact
+  NAV_TO_OBJ:
+    type: oracle
+    segmentation: False
+    fallback: heuristic_nav
+    goal: vp_then_exact
+    goal_radius: 0.3
+  NAV_TO_REC:
+    type: oracle
+    segmentation: False
+    fallback: none
+    goal: vp_then_exact
+    goal_radius: 0.3
+
+ENVIRONMENT:
+  forward: 0.10           # forward motion (in meters)  Note: changing this should require a corresponding change in the habitat benchmark config
+  turn_angle: 5           # agent turn angle (in degrees)  Note: changing this should require a corresponding change in the habitat benchmark config
+  frame_height: 640       # first-person frame height (in pixels)
+  frame_width: 480        # first-person frame width (in pixels)
+  camera_height: 1.31     # camera sensor height (in metres)
+  hfov: 42.0              # horizontal field of view (in degrees)
+  min_depth: 0.0          # minimum depth for depth sensor (in metres)
+  max_depth: 10.0         # maximum depth for depth sensor (in metres)
+  num_receptacles: 21
+  category_map_file: projects/real_world_ovmm/configs/example_cat_map.json
+  use_detic_viz: False
+  evaluate_instance_tracking: False # whether to evaluate the built instance map against groundtruth instance ids
+
+EVAL_VECTORIZED:
+  simulator_gpu_ids: [1, 2, 3, 4, 5, 6, 7] # IDs of GPUs to use for vectorized environments
+  split: val                # eval split
+  num_episodes_per_env: null # number of eval episodes per environment
+  record_videos: 1          # 1: record videos from printed images, 0: don't
+  record_planner_videos: 0  # 1: record planner videos (if record videos), 0: don't
+  metrics_save_freq: 5      # save metrics after every n episodes
diff --git a/projects/habitat_ovmm/configs/env/hssd_eval_v7c.yaml b/projects/habitat_ovmm/configs/env/hssd_eval_v7c.yaml
new file mode 100644
index 000000000..83bd47935
--- /dev/null
+++ b/projects/habitat_ovmm/configs/env/hssd_eval_v7c.yaml
@@ -0,0 +1,46 @@
+NO_GPU: 0                 # 1: ignore IDs above and run on CPU, 0: run on GPUs with IDs above
+NUM_ENVIRONMENTS: 1       # number of environments (per agent process)
+DUMP_LOCATION: datadump   # path to dump models and log
+EXP_NAME: eval_hssd_v7c       # experiment name
+VISUALIZE: 0              # 1: render observation and predicted semantic map, 0: no visualization
+PRINT_IMAGES: 1           # 1: save visualization as images, 0: no image saving
+GROUND_TRUTH_SEMANTICS: 1 # 1: use ground-truth semantics (for debugging / ablations)
+seed: 0                   # seed
+SHOW_RL_OBS: False         # whether to show the observations passed to RL policices, for debugging
+
+EXPERIMENT: 
+  type: v7c__nav-oracle-nosegmentation-heurnearobject__pick-heur-vpthenexact__place-heur-vpthenexact
+  NAV_TO_OBJ:
+    type: oracle
+    segmentation: False
+    fallback: heuristic_nav
+    goal: vp_then_exact
+    goal_radius: 0.5
+  NAV_TO_REC:
+    type: oracle
+    segmentation: False
+    fallback: none
+    goal: vp_then_exact
+    goal_radius: 0.5
+
+ENVIRONMENT:
+  forward: 0.10           # forward motion (in meters)  Note: changing this should require a corresponding change in the habitat benchmark config
+  turn_angle: 5           # agent turn angle (in degrees)  Note: changing this should require a corresponding change in the habitat benchmark config
+  frame_height: 640       # first-person frame height (in pixels)
+  frame_width: 480        # first-person frame width (in pixels)
+  camera_height: 1.31     # camera sensor height (in metres)
+  hfov: 42.0              # horizontal field of view (in degrees)
+  min_depth: 0.0          # minimum depth for depth sensor (in metres)
+  max_depth: 10.0         # maximum depth for depth sensor (in metres)
+  num_receptacles: 21
+  category_map_file: projects/real_world_ovmm/configs/example_cat_map.json
+  use_detic_viz: False
+  evaluate_instance_tracking: False # whether to evaluate the built instance map against groundtruth instance ids
+
+EVAL_VECTORIZED:
+  simulator_gpu_ids: [1, 2, 3, 4, 5, 6, 7] # IDs of GPUs to use for vectorized environments
+  split: val                # eval split
+  num_episodes_per_env: null # number of eval episodes per environment
+  record_videos: 1          # 1: record videos from printed images, 0: don't
+  record_planner_videos: 0  # 1: record planner videos (if record videos), 0: don't
+  metrics_save_freq: 5      # save metrics after every n episodes
diff --git a/projects/habitat_ovmm/configs/env/hssd_eval_v8.yaml b/projects/habitat_ovmm/configs/env/hssd_eval_v8.yaml
new file mode 100644
index 000000000..59e467380
--- /dev/null
+++ b/projects/habitat_ovmm/configs/env/hssd_eval_v8.yaml
@@ -0,0 +1,46 @@
+NO_GPU: 0                 # 1: ignore IDs above and run on CPU, 0: run on GPUs with IDs above
+NUM_ENVIRONMENTS: 1       # number of environments (per agent process)
+DUMP_LOCATION: datadump   # path to dump models and log
+EXP_NAME: eval_hssd_v8       # experiment name
+VISUALIZE: 0              # 1: render observation and predicted semantic map, 0: no visualization
+PRINT_IMAGES: 1           # 1: save visualization as images, 0: no image saving
+GROUND_TRUTH_SEMANTICS: 1 # 1: use ground-truth semantics (for debugging / ablations)
+seed: 0                   # seed
+SHOW_RL_OBS: False         # whether to show the observations passed to RL policices, for debugging
+
+EXPERIMENT: 
+  type: v8__nav-oracle-nosegmentation-heurnearobject__pick-heur-vp__place-heur-vpthenexact
+  NAV_TO_OBJ:
+    type: oracle
+    segmentation: False
+    fallback: heuristic_nav
+    goal: max_iou_viewpoint
+    goal_radius: 0.3
+  NAV_TO_REC:
+    type: oracle
+    segmentation: False
+    fallback: none
+    goal: vp_then_exact
+    goal_radius: 0.5
+
+ENVIRONMENT:
+  forward: 0.10           # forward motion (in meters)  Note: changing this should require a corresponding change in the habitat benchmark config
+  turn_angle: 5           # agent turn angle (in degrees)  Note: changing this should require a corresponding change in the habitat benchmark config
+  frame_height: 640       # first-person frame height (in pixels)
+  frame_width: 480        # first-person frame width (in pixels)
+  camera_height: 1.31     # camera sensor height (in metres)
+  hfov: 42.0              # horizontal field of view (in degrees)
+  min_depth: 0.0          # minimum depth for depth sensor (in metres)
+  max_depth: 10.0         # maximum depth for depth sensor (in metres)
+  num_receptacles: 21
+  category_map_file: projects/real_world_ovmm/configs/example_cat_map.json
+  use_detic_viz: False
+  evaluate_instance_tracking: False # whether to evaluate the built instance map against groundtruth instance ids
+
+EVAL_VECTORIZED:
+  simulator_gpu_ids: [1, 2, 3, 4, 5, 6, 7] # IDs of GPUs to use for vectorized environments
+  split: val                # eval split
+  num_episodes_per_env: null # number of eval episodes per environment
+  record_videos: 1          # 1: record videos from printed images, 0: don't
+  record_planner_videos: 0  # 1: record planner videos (if record videos), 0: don't
+  metrics_save_freq: 5      # save metrics after every n episodes
diff --git a/projects/habitat_ovmm/eval_baselines_agent.py b/projects/habitat_ovmm/eval_baselines_agent.py
index 3ff0ff139..87c96b57b 100644
--- a/projects/habitat_ovmm/eval_baselines_agent.py
+++ b/projects/habitat_ovmm/eval_baselines_agent.py
@@ -41,7 +41,7 @@
     parser.add_argument(
         "--baseline_config_path",
         type=str,
-        default="projects/habitat_ovmm/configs/agent/heuristic_agent.yaml",
+        default="projects/habitat_ovmm/configs/agent/oracle_agent.yaml",
         help="Path to config yaml",
     )
     parser.add_argument(
@@ -57,6 +57,9 @@
         choices=["baseline", "random"],
         help="Agent to evaluate",
     )
+    parser.add_argument("--ep_start", type=int, default=None)
+    parser.add_argument("--ep_end", type=int, default=None)
+
     parser.add_argument(
         "overrides",
         default=None,
@@ -64,7 +67,10 @@
         help="Modify config options from command line",
     )
     args = parser.parse_args()
-
+    if args.ep_start is not None and args.ep_end is not None:
+        args.overrides.append(
+            f'habitat.dataset.episode_ids=[{",".join([str(i) for i in range(args.ep_start, args.ep_end)])}]'
+        )
     # get habitat config
     habitat_config, _ = get_habitat_config(
         args.habitat_config_path, overrides=args.overrides
diff --git a/projects/habitat_ovmm/evaluator.py b/projects/habitat_ovmm/evaluator.py
index 9306ac213..507136b4f 100644
--- a/projects/habitat_ovmm/evaluator.py
+++ b/projects/habitat_ovmm/evaluator.py
@@ -3,7 +3,6 @@
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
 
-
 import json
 import os
 import time
@@ -11,13 +10,16 @@
 from enum import Enum
 from typing import TYPE_CHECKING, Any, Dict, Optional
 
+import cv2
 import numpy as np
 import pandas as pd
+from habitat.utils.visualizations.utils import build_text_image, images_to_video
 from habitat_baselines.rl.ppo.ppo_trainer import PPOTrainer
 from omegaconf import DictConfig
 from tqdm import tqdm
 from utils.env_utils import create_ovmm_env_fn
 from utils.metrics_utils import get_stats_from_episode_metrics
+from utils.video_utils import get_snapshots_from_disk, record_video
 
 if TYPE_CHECKING:
     from habitat.core.dataset import BaseEpisode
@@ -41,8 +43,12 @@ def __init__(self, eval_config: DictConfig) -> None:
         self.results_dir = os.path.join(
             eval_config.DUMP_LOCATION, "results", eval_config.EXP_NAME
         )
+        self.images_dir = os.path.join(
+            eval_config.DUMP_LOCATION, "images", eval_config.EXP_NAME
+        )
         self.videos_dir = eval_config.habitat_baselines.video_dir
         os.makedirs(self.results_dir, exist_ok=True)
+        os.makedirs(self.images_dir, exist_ok=True)
         os.makedirs(self.videos_dir, exist_ok=True)
 
         super().__init__(eval_config)
@@ -194,8 +200,8 @@ def _aggregate_metrics(self, episode_metrics: Dict[str, Any]) -> Dict[str, float
             [
                 k
                 for metrics_per_episode in episode_metrics.values()
-                for k in metrics_per_episode
-                if k != "goal_name"
+                for k, v in metrics_per_episode.items()
+                if not isinstance(v, str)
             ]
         )
         for v in episode_metrics.values():
@@ -212,6 +218,7 @@ def _aggregate_metrics(self, episode_metrics: Dict[str, Any]) -> Dict[str, float
                         f"{k1}/mean": np.mean(v1),
                         f"{k1}/min": np.min(v1),
                         f"{k1}/max": np.max(v1),
+                        f"{k1}/sum": np.sum(v1),
                     }.items()
                 }.items()
             )
@@ -220,13 +227,69 @@ def _aggregate_metrics(self, episode_metrics: Dict[str, Any]) -> Dict[str, float
         return aggregated_metrics
 
     def _write_results(
-        self, episode_metrics: Dict[str, Dict], aggregated_metrics: Dict[str, float]
+        self,
+        episode_metrics: Dict[str, Dict],
+        aggregated_metrics: Dict[str, float],
+        average_metrics: Dict[str, float],
     ) -> None:
         """Writes metrics tracked by environment to a file."""
         with open(f"{self.results_dir}/aggregated_results.json", "w") as f:
             json.dump(aggregated_metrics, f, indent=4)
         with open(f"{self.results_dir}/episode_results.json", "w") as f:
             json.dump(episode_metrics, f, indent=4)
+        with open(f"{self.results_dir}/summary_results.json", "w") as f:
+            json.dump(average_metrics, f, indent=4)
+
+    def _get_episode_completion_stage(self, metrics_at_episode_end):
+        # TODO: temporary
+        if metrics_at_episode_end["END.ovmm_place_success"] == 1:
+            return "0_overall_success"
+        elif metrics_at_episode_end["END.obj_anywhere_on_goal.0"] == 1:
+            return "1_place_anywhere_on_goal_success"
+        elif metrics_at_episode_end["END.ovmm_find_recep_phase_success"] == 1:
+            return "2_nav_to_goal_success_but_place_failure"
+        elif metrics_at_episode_end["END.ovmm_pick_object_phase_success"] == 1:
+            return "3_pick_success_but_nav_to_goal_failure"
+        elif metrics_at_episode_end["END.ovmm_find_object_phase_success"] == 1:
+            return "4_nav_to_object_success_but_pick_failure"
+        return "5_nav_to_object_failure"
+
+    def get_episode_completion_stage(self, metrics_at_episode_end, coarse=False):
+        episode_completion_stage = self._get_episode_completion_stage(
+            metrics_at_episode_end
+        )
+        if coarse:
+            coarse_stage = (
+                "success"
+                if episode_completion_stage
+                in ["0_overall_success", "1_place_anywhere_on_goal_success"]
+                else "failure"
+            )
+            return coarse_stage
+        return episode_completion_stage
+
+    def get_all_episode_completion_stages(self):
+        # TODO: temporary: should be an Enum if productionized
+        return [
+            "0_overall_success",
+            "1_place_anywhere_on_goal_success",
+            "2_nav_to_goal_success_but_place_failure",
+            "3_pick_success_but_nav_to_goal_failure",
+            "4_nav_to_object_success_but_pick_failure",
+            "5_nav_to_object_failure",
+        ]
+
+    def initialize_episode_and_agent(self, agent):
+        observations, done = self._env.reset(), False
+        current_episode = self._env.get_current_episode()
+        agent.reset()
+        self._check_set_planner_vis_dir(agent, current_episode)
+        print(
+            f"Starting evaluation for {self.config.EVAL_VECTORIZED.split} episode {current_episode.episode_id}"
+        )
+        print(f"Using strategy: {self.config.EXPERIMENT.type}")
+        agent.set_oracle_info(self._env)
+        return observations, done, current_episode
 
     def local_evaluate(
         self, agent: "Agent", num_episodes: Optional[int] = None
@@ -253,29 +316,116 @@ def local_evaluate(
         episode_metrics: Dict = {}
 
         count_episodes: int = 0
+        computed_episodes = []
+        skip_computed = False  # TODO: temporary
 
         pbar = tqdm(total=num_episodes)
         while count_episodes < num_episodes:
-            observations, done = self._env.reset(), False
-            current_episode = self._env.get_current_episode()
-            agent.reset()
-            self._check_set_planner_vis_dir(agent, current_episode)
-
-            current_episode_key = (
-                f"{current_episode.scene_id.split('/')[-1].split('.')[0]}_"
-                f"{current_episode.episode_id}"
+            observations, done, current_episode = self.initialize_episode_and_agent(
+                agent
             )
+            current_scene_name = current_episode.scene_id.split("/")[-1].split(".")[0]
+            current_episode_key = f"{current_scene_name}_{current_episode.episode_id}"
             current_episode_metrics = {}
 
-            while not done:
+            if skip_computed:
+                computed_episodes = []
+                for completion_stage in self.get_all_episode_completion_stages():
+                    target_dir_experiment = os.path.join(
+                        self.videos_dir, completion_stage, self.config.EXPERIMENT.type
+                    )
+                    target_file_experiment = f"split_{self.config.EVAL_VECTORIZED.split}_scene_{current_scene_name}_episode_{current_episode.episode_id}"
+                    if os.path.exists(
+                        f"{target_dir_experiment}/{target_file_experiment}.json"
+                    ):
+                        computed_episodes.append(current_episode_key)
+                        break
+                if current_episode_key in computed_episodes:
+                    try:
+                        with open(
+                            f"{target_dir_experiment}/{target_file_experiment}.json",
+                            "r",
+                        ) as f:
+                            episode_metrics[current_episode_key] = json.load(f)
+                        print(
+                            f"Skipping episode {current_episode.episode_id} because it has already been computed"
+                        )
+                        count_episodes += 1
+                        pbar.update(1)
+
+                        if self.config.EVAL_VECTORIZED.record_videos:
+                            source_dir = os.path.join(
+                                self.images_dir, current_episode_key
+                            )
+                            target_dir_annotation = os.path.join(
+                                "video_dir_annotation",
+                                self.get_episode_completion_stage(
+                                    episode_metrics[current_episode_key],
+                                    coarse=True,
+                                ),
+                                f"scene_{current_scene_name}",
+                            )
+                            target_file_annotation = f"split_{self.config.EVAL_VECTORIZED.split}_episode_{current_episode.episode_id}"
+                            os.makedirs(target_dir_annotation, exist_ok=True)
+                            with open(
+                                f"{target_dir_annotation}/{target_file_annotation}.json",
+                                "w",
+                            ) as f:
+                                json.dump(
+                                    episode_metrics[current_episode_key], f, indent=4
+                                )
+
+                            import shutil
+
+                            shutil.copyfile(
+                                f"{target_dir_experiment}/{target_file_experiment}.mp4",
+                                f"{target_dir_annotation}/{target_file_annotation}.mp4",
+                            )
+
+                        continue
+                    except Exception as e:
+                        print(
+                            f"Error {e} loading metrics for {current_episode_key}. Not skipping. Recomputing..."
+                        )
+
+            steps, max_steps = -1, 2000
+            max_nav_obj_steps = 800
+            start_time = time.time()
+
+            while not done and steps < max_steps:
+                steps += 1
                 action, info, _ = agent.act(observations)
                 observations, done, hab_info = self._env.apply_action(action, info)
+                print(
+                    f"Timestep:\t{steps}\t{info['curr_skill']}\t({hab_info['ovmm_dist_to_pick_goal']:.4f},\t{hab_info['ovmm_dist_to_place_goal']:.4f})",
+                    end="\r",
+                )
+                # print(f"Current skill: {info['curr_skill']}")
+                # print(
+                #     f"info['ovmm_dist_to_pick_goal']:\t{hab_info['ovmm_dist_to_pick_goal']:.4f}"
+                # )
+                # print(
+                #     f"info['ovmm_dist_to_keep_goal']:\t{hab_info['ovmm_dist_to_place_goal']:.4f}"
+                # )
+
+                if info["curr_skill"] == "NAV_TO_OBJ" and steps > max_nav_obj_steps:
+                    print("Nav to obj is taking too long, moving to next episode")
+                    break
 
                 if "skill_done" in info and info["skill_done"] != "":
                     metrics = self._extract_scalars_from_info(hab_info)
                     metrics_at_skill_end = {
                         f"{info['skill_done']}." + k: v for k, v in metrics.items()
                     }
+                    if (
+                        info["curr_skill"] == "NAV_TO_REC"
+                        and info["skill_done"] == "PICK"
+                        and metrics_at_skill_end["PICK.ovmm_pick_object_phase_success"]
+                        == 0
+                    ):
+                        print("Pick failure, the rest of the episode is moot")
+                        break
+
                     current_episode_metrics = {
                         **metrics_at_skill_end,
                         **current_episode_metrics,
@@ -283,29 +433,122 @@ def local_evaluate(
                     if "goal_name" in info:
                         current_episode_metrics["goal_name"] = info["goal_name"]
 
+            end_time = time.time()
+            print(f"Episode took {end_time - start_time} seconds")
+
             metrics = self._extract_scalars_from_info(hab_info)
+            metrics["total_time_in_seconds"] = end_time - start_time
+            metrics["done"] = 1.0 if done else 0.0
             metrics_at_episode_end = {"END." + k: v for k, v in metrics.items()}
             current_episode_metrics = {
                 **metrics_at_episode_end,
                 **current_episode_metrics,
             }
+            current_episode_metrics["data_split"] = self.config.EVAL_VECTORIZED.split
+            current_episode_metrics["scene_name"] = current_scene_name
+            current_episode_metrics["episode_id"] = current_episode.episode_id
+            current_episode_metrics[
+                "episode_completion_stage"
+            ] = self.get_episode_completion_stage(current_episode_metrics)
+            current_episode_metrics["experiment_name"] = self.config.EXP_NAME
+            current_episode_metrics["experiment_type"] = self.config.EXPERIMENT.type
             if "goal_name" in info:
                 current_episode_metrics["goal_name"] = info["goal_name"]
 
-            episode_metrics[current_episode_key] = current_episode_metrics
-            if len(episode_metrics) % self.metrics_save_freq == 0:
-                aggregated_metrics = self._aggregate_metrics(episode_metrics)
-                self._write_results(episode_metrics, aggregated_metrics)
+            source_dir = os.path.join(self.images_dir, current_episode_key)
+            target_dir_experiment = os.path.join(
+                self.videos_dir,
+                self.get_episode_completion_stage(
+                    current_episode_metrics, coarse=False
+                ),
+                self.config.EXPERIMENT.type,
+            )
+            target_file_experiment = f"split_{self.config.EVAL_VECTORIZED.split}_scene_{current_scene_name}_episode_{current_episode.episode_id}"
+
+            target_dir_annotation = os.path.join(
+                "video_dir_annotation",
+                self.get_episode_completion_stage(current_episode_metrics, coarse=True),
+                f"scene_{current_scene_name}",
+            )
+            target_file_annotation = f"split_{self.config.EVAL_VECTORIZED.split}_episode_{current_episode.episode_id}"
+
+            save_down_videos = self.config.EVAL_VECTORIZED.record_videos
+            # try:
+            #     # if the episode has already been computed, and the new episode run is not better than the older, don't save down videos
+            #     # the definition of better is (1) new computation results in overall success, (2) new computation has less steps than older one.
+            #     metrics_from_earlier_run_of_same_episode = None
+            #     with open(f"""{os.path.join("video_dir_annotation", "success", f"scene_{current_scene_name}")}/{target_file_annotation}.json""", "r") as f:
+            #         metrics_from_earlier_run_of_same_episode = json.load(f)
+            #     if (
+            #         metrics_from_earlier_run_of_same_episode is not None and (
+            #             metrics_from_earlier_run_of_same_episode["END.num_steps"] <= current_episode_metrics["END.num_steps"]
+            #             or self.get_episode_completion_stage(current_episode_metrics, coarse=True) != "success"
+            #         )
+            #     ):
+            #         save_down_videos = False
+            # except Exception:
+            #     pass
+
+            if (
+                "v8" not in self.config.EXP_NAME
+                and self.get_episode_completion_stage(
+                    current_episode_metrics, coarse=True
+                )
+                == "failure"
+            ):
+                save_down_videos = False
+
+            if save_down_videos:
+                os.makedirs(target_dir_experiment, exist_ok=True)
+                with open(
+                    f"{target_dir_experiment}/{target_file_experiment}.json", "w"
+                ) as f:
+                    json.dump(current_episode_metrics, f, indent=4)
+                # frames = get_snapshots_from_disk(source_dir, snapshot_file_prefix="tp_snapshot")
+                # if frames is not None and len(frames) > 0:
+                #     images_to_video(frames, target_dir_experiment, target_file_experiment, fps=24, quality=5)
+
+                os.makedirs(target_dir_annotation, exist_ok=True)
+                with open(
+                    f"{target_dir_annotation}/{target_file_annotation}.json", "w"
+                ) as f:
+                    json.dump(current_episode_metrics, f, indent=4)
+
+                episode_frames = self._env.habitat_env.env._env._env._task._frames
+                if episode_frames is not None and len(episode_frames) > 0:
+                    robot_goal_text = build_text_image(
+                        episode_frames[0],
+                        f"Robot's goal: {current_episode_metrics['goal_name'].replace('_', ' ')}",
+                        color="black",
+                    )
+                    human_goal_text = build_text_image(
+                        episode_frames[0],
+                        "Your goal: Say the actions the robot is performing in natural language.",
+                        color="black",
+                    )
+                    episode_frames = [
+                        np.concatenate(
+                            (robot_goal_text, frame, human_goal_text), axis=0
+                        )
+                        for frame in episode_frames
+                    ]
+                    images_to_video(
+                        episode_frames,
+                        target_dir_annotation,
+                        target_file_annotation,
+                        fps=24,
+                        quality=5,
+                    )
 
+            episode_metrics[current_episode_key] = current_episode_metrics
             count_episodes += 1
             pbar.update(1)
 
         self._env.close()
 
         aggregated_metrics = self._aggregate_metrics(episode_metrics)
-        self._write_results(episode_metrics, aggregated_metrics)
-
         average_metrics = self._summarize_metrics(episode_metrics)
+        self._write_results(episode_metrics, aggregated_metrics, average_metrics)
         self._print_summary(average_metrics)
 
         return average_metrics
diff --git a/projects/habitat_ovmm/shortest_path_follower_example.py b/projects/habitat_ovmm/shortest_path_follower_example.py
new file mode 100644
index 000000000..6698aff8c
--- /dev/null
+++ b/projects/habitat_ovmm/shortest_path_follower_example.py
@@ -0,0 +1,238 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Meta Platforms, Inc. and its affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+import argparse
+import os
+
+import magnum as mn
+import numpy as np
+from habitat.core.utils import try_cv2_import
+from habitat.sims.habitat_simulator.actions import HabitatSimActions
+from habitat.tasks.nav.shortest_path_follower import ShortestPathFollower
+from habitat.tasks.utils import cartesian_to_polar
+from habitat.utils.geometry_utils import quaternion_from_coeff, quaternion_rotate_vector
+from habitat.utils.visualizations import maps
+from habitat.utils.visualizations.utils import images_to_video
+from utils.config_utils import (
+    create_agent_config,
+    create_env_config,
+    get_habitat_config,
+    get_omega_config,
+)
+from utils.env_utils import create_ovmm_env_fn
+
+from home_robot.core.interfaces import DiscreteNavigationAction
+
+cv2 = try_cv2_import()
+
+IMAGE_DIR = os.path.join("examples", "images")
+if not os.path.exists(IMAGE_DIR):
+    os.makedirs(IMAGE_DIR)
+
+os.environ["OPENBLAS_NUM_THREADS"] = "1"
+os.environ["NUMEXPR_NUM_THREADS"] = "1"
+os.environ["MKL_NUM_THREADS"] = "1"
+
+# def _quat_to_xy_heading(quat):
+#     direction_vector = np.array([0, 0, -1])
+
+#     heading_vector = quaternion_rotate_vector(quat, direction_vector)
+
+#     phi = cartesian_to_polar(-heading_vector[2], heading_vector[0])[1]
+#     return np.array([phi], dtype=np.float32)
+
+DISCRETE_ACTION_MAP = {
+    HabitatSimActions.stop: DiscreteNavigationAction.STOP,
+    HabitatSimActions.move_forward: DiscreteNavigationAction.MOVE_FORWARD,
+    HabitatSimActions.turn_left: DiscreteNavigationAction.TURN_LEFT,
+    HabitatSimActions.turn_right: DiscreteNavigationAction.TURN_RIGHT,
+}
+
+
+def draw_top_down_map(info, output_size):
+    return maps.colorize_draw_agent_and_fit_to_height(info["top_down_map"], output_size)
+
+
+def shortest_path_example(config):
+    """
+    Example script for performing oracle navigation to object in OVMM episodes.
+    Utilizes ShortestPathFollower to output discrete actions in HabitatOpenVocabManipEnv.
+    Note: HabitatOpenVocabManipEnv internally takes care of converting discrete actions to continuous actions.
+    Note: The environment heirarchy above is as follows:
+    ovmm_env
+        HabitatOpenVocabManipEnv
+    ovmm_env.habitat_env
+        GymHabitatEnv<HabGymWrapper instance>
+    ovmm_env.habitat_env.env
+        HabGymWrapper instance
+    ovmm_env.habitat_env.env._env
+        RLTaskEnv instance
+    ovmm_env.habitat_env.env._env._env
+        habitat.core.env.Env
+    ovmm_env.habitat_env.env._env._env.sim
+        OVMMSim
+    """
+    ovmm_env = create_ovmm_env_fn(config)
+    print(f"Total number of episodes in env: {ovmm_env.number_of_episodes}")
+
+    # Keep a default minimum goal radius is 0.1, but increase it if robot step size is too large
+    goal_radius = max(0.1, getattr(config.habitat.simulator, "forward_step_size", 0.1))
+
+    follower = ShortestPathFollower(
+        ovmm_env.habitat_env.env._env.habitat_env.sim, goal_radius, False
+    )
+
+    for _ in range(ovmm_env.number_of_episodes):
+        ovmm_env.reset()
+        episode_id = ovmm_env.get_current_episode().episode_id
+        dirname = os.path.join(
+            IMAGE_DIR,
+            "shortest_path_example_ovmm",
+            f"{episode_id}",
+        )
+        if not os.path.exists(dirname):
+            os.makedirs(dirname)
+        log_file_name = f"logs_ep_{episode_id}.txt"
+
+        with open(os.path.join(os.getcwd(), dirname, log_file_name), "w") as f:
+            f.write("Environment creation successful\n")
+            f.write("Agent stepping around inside environment.\n")
+            images_third_person = []
+            steps, max_steps = 0, 1000
+            info = None
+            object_pos = ovmm_env.habitat_env.env._env.habitat_env.current_episode.candidate_objects[
+                0
+            ].position
+            goal_pos = ovmm_env.habitat_env.env._env.habitat_env.current_episode.candidate_goal_receps[
+                0
+            ].position
+
+            goal_pos_viewpoints = ovmm_env.habitat_env.env._env.habitat_env.current_episode.candidate_goal_receps[
+                0
+            ].view_points
+            ious = [viewpoint.iou for viewpoint in goal_pos_viewpoints]
+            max_iou_idx = ious.index(max(ious))
+            goal_pos_max_iou_viewpoint = goal_pos_viewpoints[
+                max_iou_idx
+            ].agent_state.position
+
+            # for idx, viewpoint in enumerate(goal_pos_viewpoints):
+            #     print(f"\nViewpoint {idx}")
+            #     print(f"Viewpoint position: {viewpoint.agent_state.position}")
+            #     print(f"Viewpoint rotation: {viewpoint.agent_state.rotation}")
+            #     print(f"Viewpoint IOU: {viewpoint.iou}")
+
+            # print(f"\nMax IOU viewpoint {max_iou_idx}")
+            # print(f"Max IOU viewpoint position: {goal_pos_max_iou_viewpoint}")
+            goal = mn.Vector3(goal_pos_max_iou_viewpoint)
+            goal_orientation = quaternion_from_coeff(
+                goal_pos_viewpoints[max_iou_idx].agent_state.rotation
+            )
+            while (
+                not ovmm_env.habitat_env.env._env.habitat_env.episode_over
+                and steps < max_steps
+            ):
+                if steps != 0:
+                    # curr_quat = follower._sim.robot.sim_obj.rotation
+                    # curr_rotation = [
+                    #     curr_quat.vector.x,
+                    #     curr_quat.vector.y,
+                    #     curr_quat.vector.z,
+                    #     curr_quat.scalar,
+                    # ]
+                    # curr_quat = quaternion_from_coeff(
+                    # curr_rotation
+                    # )
+                    # # get heading angle
+                    # rot = _quat_to_xy_heading(
+                    #     curr_quat.inverse()
+                    # )
+                    # rot = rot - np.pi / 2
+                    # # convert back to quaternion
+                    # ang_pos = rot[0]
+                    # curr_rot = mn.Quaternion(
+                    #     mn.Vector3(0, np.sin(ang_pos / 2), 0), np.cos(ang_pos / 2)
+                    # )
+
+                    f.write(
+                        f"Current agent location:\t{follower._sim.robot.base_pos}\n"
+                        # f"Current agent orientation:\t{curr_quat}\n"
+                        # f"Current agent orientation:\t{curr_rot}\n"
+                        f"Navigation goal location:\t{goal}\n"
+                        f"Navigation goal orientation:\t{goal_orientation}\n"
+                        # f"Difference between orientations:\t{curr_quat * goal_orientation.inverse()}\n"
+                        f"info['ovmm_dist_to_pick_goal']:\t{info['ovmm_dist_to_pick_goal']}\n"
+                        f"info['ovmm_dist_to_keep_goal']:\t{info['ovmm_dist_to_place_goal']}\n"
+                    )
+
+                f.write(f"\nTimestep: {steps}\n")
+                print(f"Timestep: {steps}")
+                best_action = DISCRETE_ACTION_MAP[follower.get_next_action(goal)]
+                f.write(f"Agent action taken: {best_action}\n")
+                if best_action is None:
+                    break
+
+                observations, done, info = ovmm_env.apply_action(best_action, info)
+                steps += 1
+                info["timestep"] = steps
+                if config.PRINT_IMAGES and config.GROUND_TRUTH_SEMANTICS:
+                    images_third_person.append(observations.third_person_image)
+
+            if len(images_third_person):
+                images_to_video(images_third_person, dirname, "trajectory_third_person")
+            if steps >= max_steps:
+                f.write("Max steps reached! Aborting episode...")
+            else:
+                f.write("Episode finished succesfully")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--evaluation_type",
+        type=str,
+        choices=["local", "local_vectorized", "remote"],
+        default="local",
+    )
+    parser.add_argument("--num_episodes", type=int, default=None)
+    parser.add_argument(
+        "--habitat_config_path",
+        type=str,
+        default="ovmm/ovmm_eval.yaml",
+        help="Path to config yaml",
+    )
+    parser.add_argument(
+        "--env_config_path",
+        type=str,
+        default="projects/habitat_ovmm/configs/env/hssd_eval.yaml",
+        help="Path to config yaml",
+    )
+    parser.add_argument(
+        "overrides",
+        default=None,
+        nargs=argparse.REMAINDER,
+        help="Modify config options from command line",
+    )
+    args = parser.parse_args()
+
+    # get habitat config
+    habitat_config, _ = get_habitat_config(
+        args.habitat_config_path,
+        overrides=args.overrides
+        + [
+            "+habitat/task/measurements@habitat.task.measurements.top_down_map=top_down_map"
+        ],
+    )
+
+    # get env config
+    env_config = get_omega_config(args.env_config_path)
+
+    # merge habitat and env config to create env config
+    env_config = create_env_config(
+        habitat_config, env_config, evaluation_type=args.evaluation_type
+    )
+
+    shortest_path_example(env_config)
diff --git a/projects/habitat_ovmm/utils/drive_utils.py b/projects/habitat_ovmm/utils/drive_utils.py
new file mode 100644
index 000000000..c16078123
--- /dev/null
+++ b/projects/habitat_ovmm/utils/drive_utils.py
@@ -0,0 +1,72 @@
+import os
+
+from google.oauth2.credentials import Credentials
+from google_auth_oauthlib.flow import InstalledAppFlow
+from googleapiclient.discovery import build
+from googleapiclient.http import MediaFileUpload
+
+# Google Drive API credentials
+SCOPES = ["https://www.googleapis.com/auth/drive"]
+CLIENT_SECRET_FILE = "client_secret.json"  # Update with your file name
+API_NAME = "drive"
+API_VERSION = "v3"
+
+# Folder ID of the destination folder in Google Drive
+DESTINATION_FOLDER_ID = (
+    "1Hvy2AEZ9C1_a6AFwqEPoe3K7Q7v5di-K"  # Update with your folder ID
+)
+
+
+def authenticate():
+    creds = None
+    if os.path.exists("token.json"):
+        creds = Credentials.from_authorized_user_file("token.json", SCOPES)
+    if not creds or not creds.valid:
+        flow = InstalledAppFlow.from_client_secrets_file(CLIENT_SECRET_FILE, SCOPES)
+        creds = flow.run_local_server(port=0)
+        with open("token.json", "w") as token:
+            token.write(creds.to_authorized_user_file())
+    return creds
+
+
+def upload_to_drive(service, local_path, parent_id):
+    file_name = os.path.basename(local_path)
+    media = MediaFileUpload(local_path)
+
+    file_metadata = {"name": file_name, "parents": [parent_id]}
+
+    uploaded_file = (
+        service.files()
+        .create(body=file_metadata, media_body=media, fields="id")
+        .execute()
+    )
+
+    print(f'Uploaded {file_name} with ID: {uploaded_file["id"]}')
+
+
+def upload_folder_contents(service, local_folder, parent_id):
+    for item in os.listdir(local_folder):
+        item_path = os.path.join(local_folder, item)
+        if os.path.isfile(item_path):
+            upload_to_drive(service, item_path, parent_id)
+        elif os.path.isdir(item_path):
+            folder_name = os.path.basename(item_path)
+            folder_metadata = {
+                "name": folder_name,
+                "mimeType": "application/vnd.google-apps.folder",
+                "parents": [parent_id],
+            }
+            created_folder = (
+                service.files().create(body=folder_metadata, fields="id").execute()
+            )
+            upload_folder_contents(service, item_path, created_folder["id"])
+
+
+def main():
+    creds = authenticate()
+    service = build(API_NAME, API_VERSION, credentials=creds)
+    upload_folder_contents(service, "video_dir", DESTINATION_FOLDER_ID)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/projects/habitat_ovmm/utils/metrics_utils.py b/projects/habitat_ovmm/utils/metrics_utils.py
index f6d266ac9..e6baa9465 100644
--- a/projects/habitat_ovmm/utils/metrics_utils.py
+++ b/projects/habitat_ovmm/utils/metrics_utils.py
@@ -22,12 +22,22 @@ def aggregate_metrics(episode_metrics_df: pd.DataFrame) -> pd.DataFrame:
     """
     # Drop the columns with string values
     episode_metrics_df = episode_metrics_df.drop(
-        columns=["episode_id", "goal_name", "END.ovmm_place_object_phase_success"],
+        columns=[
+            "episode_id",
+            "goal_name",
+            "data_split",
+            "scene_name",
+            "episode_completion_stage",
+            "experiment_name",
+            "experiment_type",
+        ],
         errors="ignore",
     )
 
     # Compute aggregated metrics for each column, excluding NaN values, to get mean, min, max, and count
-    aggregated_metrics = episode_metrics_df.agg(["mean", "min", "max", "count"], axis=0)
+    aggregated_metrics = episode_metrics_df.agg(
+        ["mean", "min", "max", "sum", "count"], axis=0
+    )
     return aggregated_metrics.T
 
 
@@ -49,6 +59,13 @@ def compute_stats(aggregated_metrics: pd.DataFrame) -> dict:
     """
     stats = {}
     stats["episode_count"] = aggregated_metrics.loc["END.ovmm_place_success"]["count"]
+    stats["episode_count_overall_success"] = aggregated_metrics.loc[
+        "END.ovmm_place_success"
+    ]["sum"]
+    stats["episode_count_partial_success"] = aggregated_metrics.loc[
+        "END.obj_anywhere_on_goal.0"
+    ]["sum"]
+
     stats["does_want_terminate"] = aggregated_metrics.loc["END.does_want_terminate"][
         "mean"
     ]
@@ -56,9 +73,13 @@ def compute_stats(aggregated_metrics: pd.DataFrame) -> dict:
 
     # find indices in the DataFrame with stage success in their name and compute success rate
     for k in aggregated_metrics.index:
-        if ("phase_success" in k and "END" in k) or "overall_success" in k:
+        if "phase_success" in k and "END" in k:
             stats[k.replace("END.ovmm_", "")] = aggregated_metrics.loc[k]["mean"]
 
+    stats["place_object_anywhere_on_goal_phase_success"] = aggregated_metrics.loc[
+        "END.obj_anywhere_on_goal.0"
+    ]["mean"]
+    stats["overall_success"] = aggregated_metrics.loc["overall_success"]["mean"]
     stats["partial_success"] = aggregated_metrics.loc["partial_success"]["mean"]
     return stats
 
@@ -91,6 +112,7 @@ def get_stats_from_episode_metrics(
     episode_ids = episode_ids.astype(str)
 
     # The task is considered successful if the agent places the object without robot collisions
+    enough_success = episode_metrics["END.obj_anywhere_on_goal.0"] == 1
     overall_success = (
         episode_metrics["END.robot_collisions.robot_scene_colls"] == 0
     ) * (episode_metrics["END.ovmm_place_success"] == 1)
@@ -100,8 +122,9 @@ def get_stats_from_episode_metrics(
         episode_metrics["END.ovmm_find_object_phase_success"]
         + episode_metrics["END.ovmm_pick_object_phase_success"]
         + episode_metrics["END.ovmm_find_recep_phase_success"]
+        + enough_success
         + overall_success
-    ) / 4.0
+    ) / 5.0
 
     episode_metrics = episode_metrics.assign(
         episode_id=episode_ids,
diff --git a/projects/habitat_ovmm/utils/video_utils.py b/projects/habitat_ovmm/utils/video_utils.py
new file mode 100644
index 000000000..09449ce46
--- /dev/null
+++ b/projects/habitat_ovmm/utils/video_utils.py
@@ -0,0 +1,48 @@
+import glob
+import os
+import shutil
+
+import cv2
+from natsort import natsorted
+
+
+def get_snapshots_from_disk(source_dir: str, snapshot_file_prefix: str = "snapshot"):
+    frames = []
+    image_paths = natsorted(glob.glob(f"{source_dir}/{snapshot_file_prefix}*.png"))
+    if len(image_paths) == 0:
+        return frames
+
+    for filename in image_paths:
+        frames.append(cv2.imread(filename))
+    return frames
+
+
+def record_video(source_dir: str, target_dir: str, target_file: str):
+    # shutil.rmtree(target_dir, ignore_errors=True)
+    raise NotImplementedError
+    os.makedirs(target_dir, exist_ok=True)
+    print(f"Recording video {target_dir}/{target_file}")
+
+    frames = get_snapshots_from_disk(source_dir, snapshot_file_prefix="snapshot")
+    # Get the dimensions of the first image (assuming all images have the same dimensions)
+    first_image = frames[0]
+    height, width, _ = first_image.shape
+    size = (width, height)
+
+    out = cv2.VideoWriter(
+        f"{target_dir}/{target_file}.mp4",
+        cv2.VideoWriter_fourcc(*"mp4v"),
+        15,
+        size,
+    )
+    for frame in frames:
+        out.write(frame)
+    out.release()
+
+
+if __name__ == "__main__":
+    record_video(
+        source_dir="datadump/images/eval_hssd/107733960_175999701_3",
+        target_dir="video_dir",
+        target_file="test",
+    )
diff --git a/src/home_robot/home_robot/agent/objectnav_agent/oracle_nav_agent.py b/src/home_robot/home_robot/agent/objectnav_agent/oracle_nav_agent.py
new file mode 100644
index 000000000..624225afb
--- /dev/null
+++ b/src/home_robot/home_robot/agent/objectnav_agent/oracle_nav_agent.py
@@ -0,0 +1,101 @@
+import os
+from typing import Union
+
+import numpy as np
+from habitat.core.agent import Agent
+from habitat.sims.habitat_simulator.actions import HabitatSimActions
+from habitat.tasks.nav.shortest_path_follower import ShortestPathFollower
+
+from home_robot.core.interfaces import DiscreteNavigationAction
+
+# Quiet the Habitat simulator logging
+os.environ["MAGNUM_LOG"] = "quiet"
+os.environ["HABITAT_SIM_LOG"] = "quiet"
+
+
+def euclidean(v1, v2):
+    return sum((p - q) ** 2 for p, q in zip(v1, v2)) ** 0.5
+
+
+class ShortestPathFollowerAgent(Agent):
+    r"""Implementation of the :ref:`habitat.core.agent.Agent` interface that
+    uses :ref`habitat.tasks.nav.shortest_path_follower.ShortestPathFollower` utility class
+    for extracting the action on the shortest path to the goal.
+    """
+
+    def __init__(self):
+        self.env = None
+        self.shortest_path_follower = None
+        self.goal_coordinates = None  # needs to be a list, and the agent is implemented to follow one after the other in order
+        self.discrete_action_map = {
+            HabitatSimActions.stop: DiscreteNavigationAction.STOP,
+            HabitatSimActions.move_forward: DiscreteNavigationAction.MOVE_FORWARD,
+            HabitatSimActions.turn_left: DiscreteNavigationAction.TURN_LEFT,
+            HabitatSimActions.turn_right: DiscreteNavigationAction.TURN_RIGHT,
+        }
+        self.current_goal = (
+            0  # index of the current goal in the list of goal coordinates
+        )
+        self.coarse_navigation = False
+
+    def set_oracle_info(self, env, goal_coordinates, goal_radius=0.5):
+        """Instantiate shortest path follower
+
+        Args:
+            env: Habitat env
+            goal_coordinates: List of xyz goal coordinates. Agent implemented to follow one after the other in order
+        """
+        self.env = env
+        self.shortest_path_follower = ShortestPathFollower(
+            sim=env.habitat_env.sim,
+            goal_radius=goal_radius,
+            return_one_hot=False,
+        )
+
+        filtered_goal_coordinates = []
+        final_reference_goal = goal_coordinates[-1]
+        for goal in goal_coordinates:
+            euclidean_distance_to_final_goal = euclidean(goal, final_reference_goal)
+            if euclidean_distance_to_final_goal < 4:
+                # only add the goal if it is within 4m of the final goal
+                filtered_goal_coordinates.append(goal)
+        if not len(filtered_goal_coordinates) and len(goal_coordinates):
+            filtered_goal_coordinates = goal_coordinates
+
+        self.goal_coordinates = filtered_goal_coordinates
+        self.current_goal = (
+            0  # index of the current goal in the list of goal coordinates
+        )
+
+    def act(self, observations, info) -> Union[int, np.ndarray]:
+        action = self.discrete_action_map[
+            self.shortest_path_follower.get_next_action(
+                self.goal_coordinates[self.current_goal]
+            )
+        ]
+        # print(f"Oracle action: {action}")
+        # print(f"Goal: {self.goal_coordinates[self.current_goal]}")
+        # print(f"Agent: {self.shortest_path_follower._sim.robot.base_pos}")
+
+        terminate = False
+        if action == DiscreteNavigationAction.STOP:
+            if self.current_goal >= len(self.goal_coordinates) - 1:
+                terminate = True  # completed all goals
+            else:
+                print()
+                print("Reached goal! Moving to next goal...")
+                print(f"Curr goal: {self.goal_coordinates[self.current_goal]}")
+                print(f"Next goal: {self.goal_coordinates[self.current_goal+1]}")
+                self.current_goal += 1  # move to next goal
+                return self.act(observations, info)
+
+        return action, terminate
+
+    def reset(self) -> None:
+        self.env = None
+        self.shortest_path_follower = None
+        self.goal_coordinates = None
+        self.goal_candidate = 0
+
+    def reset_vectorized(self) -> None:
+        self.reset()  # or NotImplementedError, really.
diff --git a/src/home_robot/home_robot/agent/ovmm_agent/ovmm_agent.py b/src/home_robot/home_robot/agent/ovmm_agent/ovmm_agent.py
index fa95ddd42..9aac589da 100644
--- a/src/home_robot/home_robot/agent/ovmm_agent/ovmm_agent.py
+++ b/src/home_robot/home_robot/agent/ovmm_agent/ovmm_agent.py
@@ -8,10 +8,13 @@
 from enum import IntEnum, auto
 from typing import Any, Dict, Optional, Tuple
 
+import magnum as mn
 import numpy as np
 import torch
+from habitat.tasks.nav.object_nav_task import ObjectGoal
 
 from home_robot.agent.objectnav_agent.objectnav_agent import ObjectNavAgent
+from home_robot.agent.objectnav_agent.oracle_nav_agent import ShortestPathFollowerAgent
 from home_robot.agent.ovmm_agent.ovmm_perception import (
     OvmmPerception,
     build_vocab_from_category_map,
@@ -20,6 +23,9 @@
 from home_robot.core.interfaces import DiscreteNavigationAction, Observations
 from home_robot.manipulation import HeuristicPickPolicy, HeuristicPlacePolicy
 from home_robot.perception.constants import RearrangeBasicCategories
+from home_robot_sim.env.habitat_ovmm_env.habitat_ovmm_env import (
+    HabitatOpenVocabManipEnv,
+)
 
 
 class Skill(IntEnum):
@@ -59,11 +65,14 @@ def __init__(self, config, device_id: int = 0):
         self.gaze_agent = None
         self.nav_to_obj_agent = None
         self.nav_to_rec_agent = None
+        self.nav_to_obj_closeness_check = False
+        self.nav_to_rec_closeness_check = False
         self.pick_agent = None
         self.place_agent = None
         self.pick_policy = None
         self.place_policy = None
         self.semantic_sensor = None
+        self._env = None
 
         if config.GROUND_TRUTH_SEMANTICS == 1 and self.store_all_categories_in_map:
             # currently we get ground truth semantics of only the target object category and all scene receptacles from the simulator
@@ -81,9 +90,7 @@ def __init__(self, config, device_id: int = 0):
                 config, self.device, verbose=self.verbose
             )
         if config.AGENT.SKILLS.PLACE.type == "heuristic" and not self.skip_skills.place:
-            self.place_policy = HeuristicPlacePolicy(
-                config, self.device, verbose=self.verbose
-            )
+            self.place_policy = HeuristicPlacePolicy(config, self.device, verbose=True)
         elif config.AGENT.SKILLS.PLACE.type == "rl" and not self.skip_skills.place:
             from home_robot.agent.ovmm_agent.ppo_agent import PPOAgent
 
@@ -112,6 +119,12 @@ def __init__(self, config, device_id: int = 0):
                 config.AGENT.SKILLS.NAV_TO_OBJ,
                 device_id=device_id,
             )
+        if (
+            config.AGENT.SKILLS.NAV_TO_OBJ.type == "oracle"
+            and not self.skip_skills.nav_to_obj
+        ):
+            self.nav_to_obj_agent = ShortestPathFollowerAgent()
+
         if (
             config.AGENT.SKILLS.NAV_TO_REC.type == "rl"
             and not self.skip_skills.nav_to_rec
@@ -123,9 +136,96 @@ def __init__(self, config, device_id: int = 0):
                 config.AGENT.SKILLS.NAV_TO_REC,
                 device_id=device_id,
             )
+        if (
+            config.AGENT.SKILLS.NAV_TO_REC.type == "oracle"
+            and not self.skip_skills.nav_to_rec
+        ):
+            self.nav_to_rec_agent = ShortestPathFollowerAgent()
+
         self._fall_wait_steps = getattr(config.AGENT, "fall_wait_steps", 0)
         self.config = config
 
+    def get_position_for_max_iou_viewpoint(self, object_goal: ObjectGoal):
+        """
+        Returns the position of the viewpoint with the maximum IOU with the object.
+        """
+        ious = [viewpoint.iou for viewpoint in object_goal.view_points]
+        max_iou_idx = ious.index(max(ious))
+        max_iou_viewpoint_position = object_goal.view_points[
+            max_iou_idx
+        ].agent_state.position
+        return max_iou_viewpoint_position
+
+    def set_oracle_info(self, ovmm_env: HabitatOpenVocabManipEnv):
+        self._env = ovmm_env
+
+        self.use_segmentation = False
+        if "withsegmentation" in self.config.EXPERIMENT.type:
+            self.use_segmentation = True
+
+        if (
+            self.config.AGENT.SKILLS.NAV_TO_OBJ.type == "oracle"
+            and not self.skip_skills.nav_to_obj
+        ):
+            # Extract the habitat_env from the ovmm env and provide it to the agent
+            episode_id = ovmm_env.get_current_episode().episode_id
+            print(
+                f"Providing oracle environment information to NAV_TO_OBJ agent for episode {episode_id}"
+            )
+            object_goal = ovmm_env.habitat_env.env._env.habitat_env.current_episode.candidate_objects[
+                0
+            ]
+            object_vp_nav_goal = self.get_position_for_max_iou_viewpoint(object_goal)
+            object_nav_goal = object_goal.position
+
+            if self.config.EXPERIMENT.NAV_TO_OBJ.goal == "exact":
+                goal_coordinates = [object_nav_goal]
+            elif self.config.EXPERIMENT.NAV_TO_OBJ.goal == "max_iou_viewpoint":
+                goal_coordinates = [object_vp_nav_goal]
+            elif self.config.EXPERIMENT.NAV_TO_OBJ.goal == "vp_then_exact":
+                goal_coordinates = [object_vp_nav_goal, object_nav_goal]
+
+            self.nav_to_obj_agent.set_oracle_info(
+                ovmm_env.habitat_env.env._env,
+                goal_coordinates=goal_coordinates,
+                goal_radius=self.config.EXPERIMENT.NAV_TO_OBJ.goal_radius,
+            )
+
+        if (
+            self.config.AGENT.SKILLS.NAV_TO_REC.type == "oracle"
+            and not self.skip_skills.nav_to_rec
+        ):
+            # Extract the habitat_env from the ovmm env and provide it to the agent
+            print(
+                f"Providing oracle environment information to NAV_TO_REC agent for episode {episode_id}"
+            )
+            # candidate_goal_receps = ovmm_env.habitat_env.env._env.habitat_env.current_episode.candidate_goal_receps
+            # candidate_goal_recep_coords = []
+            # for goal_recep in candidate_goal_receps:
+            #     candidate_goal_recep_coords.append(mn.Vector3(self.get_position_for_max_iou_viewpoint(goal_recep)))
+            # print(f"Candidate goal position: {ovmm_env.habitat_env.env._env.habitat_env.current_episode.candidate_goal_receps[0].position}")
+            # for vp_idx, vp in enumerate(ovmm_env.habitat_env.env._env.habitat_env.current_episode.candidate_goal_receps[0].view_points):
+            #     print(f"Viewpoint {vp_idx}: {vp.agent_state.position}")
+            # exit(1)
+            object_goal = ovmm_env.habitat_env.env._env.habitat_env.current_episode.candidate_goal_receps[
+                0
+            ]
+            object_vp_nav_goal = self.get_position_for_max_iou_viewpoint(object_goal)
+            object_nav_goal = object_goal.position
+
+            if self.config.EXPERIMENT.NAV_TO_REC.goal == "exact":
+                goal_coordinates = [object_nav_goal]
+            elif self.config.EXPERIMENT.NAV_TO_REC.goal == "max_iou_viewpoint":
+                goal_coordinates = [object_vp_nav_goal]
+            elif self.config.EXPERIMENT.NAV_TO_REC.goal == "vp_then_exact":
+                goal_coordinates = [object_vp_nav_goal, object_nav_goal]
+
+            self.nav_to_rec_agent.set_oracle_info(
+                ovmm_env.habitat_env.env._env,
+                goal_coordinates=goal_coordinates,
+                goal_radius=self.config.EXPERIMENT.NAV_TO_REC.goal_radius,
+            )
+
     def _get_info(self, obs: Observations) -> Dict[str, torch.Tensor]:
         """Get inputs for visual skill."""
         use_detic_viz = self.config.ENVIRONMENT.use_detic_viz
@@ -331,6 +431,21 @@ def _heuristic_nav(
             terminate = False
         return action, info, terminate
 
+    def _oracle_nav(
+        self,
+        obs: Observations,
+        info: Dict[str, Any],
+        oracle_agent: ShortestPathFollowerAgent,
+    ) -> Tuple[DiscreteNavigationAction, Any]:
+        if self.use_segmentation:
+            _, planner_info = super().act(obs)
+            # info overwrites planner_info entries for keys with same name
+            info = {**planner_info, **info}
+            self.timesteps[0] -= 1  # objectnav agent increments timestep
+        info["timestep"] = self.timesteps[0]
+        action, terminate = oracle_agent.act(obs, info)
+        return action, info, terminate
+
     def _heuristic_pick(
         self, obs: Observations, info: Dict[str, Any]
     ) -> Tuple[DiscreteNavigationAction, Any]:
@@ -401,6 +516,24 @@ def _nav_to_obj(
             action, info, terminate = self._heuristic_nav(obs, info)
         elif nav_to_obj_type == "rl":
             action, info, terminate = self.nav_to_obj_agent.act(obs, info)
+        elif nav_to_obj_type == "oracle":
+            action, info, terminate = self._oracle_nav(obs, info, self.nav_to_obj_agent)
+            if (
+                terminate
+                and self.config.EXPERIMENT.NAV_TO_OBJ.fallback == "heuristic_nav"
+            ):  # Fallback to heuristic nav after oracle nav is done
+                print("[OVMM AGENT] Fallback to heuristic nav after oracle nav is done")
+                action, info, terminate = self._heuristic_nav(obs, info)
+            # if not self.nav_to_obj_closeness_check:
+            #     action, info, terminate = self._oracle_nav(obs, info, self.nav_to_obj_agent)
+            #     if terminate:
+            #         terminate = False
+            #         self.nav_to_obj_closeness_check = True
+            # if self.nav_to_obj_closeness_check:  # Fallback to heuristic nav after oracle nav is done
+            #     action, info, terminate = self._heuristic_nav(obs, info)
+            # self.timesteps[0] += 1
+            # info["timestep"] = self.timesteps[0]
+
         else:
             raise ValueError(
                 f"Got unexpected value for NAV_TO_OBJ.type: {nav_to_obj_type}"
@@ -486,6 +619,8 @@ def _nav_to_rec(
             action, info, terminate = self._heuristic_nav(obs, info)
         elif nav_to_rec_type == "rl":
             action, info, terminate = self.nav_to_rec_agent.act(obs, info)
+        elif nav_to_rec_type == "oracle":
+            action, info, terminate = self._oracle_nav(obs, info, self.nav_to_rec_agent)
         else:
             raise ValueError(
                 f"Got unexpected value for NAV_TO_REC.type: {nav_to_rec_type}"
diff --git a/src/home_robot/home_robot/navigation_planner/fmm_planner.py b/src/home_robot/home_robot/navigation_planner/fmm_planner.py
index fbfb88bc6..f79ccdfd3 100644
--- a/src/home_robot/home_robot/navigation_planner/fmm_planner.py
+++ b/src/home_robot/home_robot/navigation_planner/fmm_planner.py
@@ -152,7 +152,7 @@ def set_multi_goal(
 
             if self.print_images and timestep is not None:
                 cv2.imwrite(
-                    os.path.join(self.vis_dir, f"planner_snapshot_{timestep}.png"),
+                    os.path.join(self.vis_dir, f"planner_snapshot_{timestep:04d}.png"),
                     (dist_vis * 255).astype(int),
                 )
         return dd
diff --git a/src/home_robot_hw/home_robot_hw/env/visualizer.py b/src/home_robot_hw/home_robot_hw/env/visualizer.py
index 2caf7b662..47e8cc8d7 100644
--- a/src/home_robot_hw/home_robot_hw/env/visualizer.py
+++ b/src/home_robot_hw/home_robot_hw/env/visualizer.py
@@ -268,7 +268,7 @@ def visualize(
 
         if self.print_images:
             cv2.imwrite(
-                os.path.join(self.vis_dir, "snapshot_{:03d}.png".format(timestep)),
+                os.path.join(self.vis_dir, "snapshot_{:04d}.png".format(timestep)),
                 self.image_vis,
             )
 
@@ -486,11 +486,11 @@ def visualize(
             cv2.imshow("Visualization", self.image_vis)
             cv2.waitKey(1)
 
-        if self.print_images:
-            cv2.imwrite(
-                os.path.join(self.vis_dir, "snapshot_{:03d}.png".format(timestep)),
-                self.image_vis,
-            )
+        # if self.print_images:
+        #     cv2.imwrite(
+        #         os.path.join(self.vis_dir, "snapshot_{:04d}.png".format(timestep)),
+        #         self.image_vis,
+        #     )
 
     def _init_vis_image(self, goal_name: str):
         vis_image = np.ones((655, 1165, 3)).astype(np.uint8) * 255
diff --git a/src/home_robot_sim/home_robot_sim/env/habitat_objectnav_env/visualizer.py b/src/home_robot_sim/home_robot_sim/env/habitat_objectnav_env/visualizer.py
index 57eb33bc3..778125364 100644
--- a/src/home_robot_sim/home_robot_sim/env/habitat_objectnav_env/visualizer.py
+++ b/src/home_robot_sim/home_robot_sim/env/habitat_objectnav_env/visualizer.py
@@ -449,11 +449,15 @@ def visualize(
         if self.show_images:
             cv2.imshow("Visualization", image_vis)
             cv2.waitKey(1)
-        if self.print_images:
-            cv2.imwrite(
-                os.path.join(self.vis_dir, "snapshot_{:03d}.png".format(timestep)),
-                image_vis,
-            )
+        # if self.print_images:
+        #     cv2.imwrite(
+        #         os.path.join(self.vis_dir, "snapshot_{:04d}.png".format(timestep)),
+        #         image_vis,
+        #     )
+        #     cv2.imwrite(
+        #         os.path.join(self.vis_dir, "tp_snapshot_{:04d}.png".format(timestep)),
+        #         image_vis[V.Y1 : V.Y2, V.THIRD_PERSON_X1 : V.THIRD_PERSON_X2],
+        #     )
 
     def _visualize_semantic_frame(
         self, image_vis: np.ndarray, semantic_frame: np.ndarray, palette: List
@@ -506,7 +510,7 @@ def _visualize_instance_counts(
             num_views_per_instance[instance.category_id.item()].append(
                 len(instance.instance_views)
             )
-        text = "Instance counts"
+        text = "Stretch Robot"
         offset = 48
         y_pos = offset
 
diff --git a/src/home_robot_sim/home_robot_sim/env/habitat_ovmm_env/habitat_ovmm_env.py b/src/home_robot_sim/home_robot_sim/env/habitat_ovmm_env/habitat_ovmm_env.py
index ca635a776..ad9d49c6d 100644
--- a/src/home_robot_sim/home_robot_sim/env/habitat_ovmm_env/habitat_ovmm_env.py
+++ b/src/home_robot_sim/home_robot_sim/env/habitat_ovmm_env/habitat_ovmm_env.py
@@ -355,9 +355,10 @@ def apply_action(
         habitat_action = self._preprocess_action(action, self._last_habitat_obs)
         habitat_obs, _, dones, infos = self.habitat_env.step(habitat_action)
         # copy the keys in info starting with the prefix "is_curr_skill" into infos
-        for key in info:
-            if key.startswith("is_curr_skill"):
-                infos[key] = info[key]
+        if info is not None:
+            for key in info:
+                if key.startswith("is_curr_skill"):
+                    infos[key] = info[key]
         self._last_habitat_obs = habitat_obs
         self._last_obs = self._preprocess_obs(habitat_obs)
         return self._last_obs, dones, infos