facebookresearch · kimbring2 · Apr 16, 2022 · Apr 16, 2022 · Apr 17, 2022 · Apr 18, 2022
diff --git a/introtonethack.ipynb b/introtonethack.ipynb
diff --git a/nle/env/base.py b/nle/env/base.py
@@ -233,6 +233,7 @@ def __init__(
 
         if actions is None:
             actions = FULL_ACTIONS
+
         self.actions = actions
 
         self.last_observation = ()
@@ -354,6 +355,8 @@ def step(self, action: int):
         # Careful: By default we re-use Numpy arrays, so copy before!
         last_observation = tuple(a.copy() for a in self.last_observation)
 
+        #print("self.actions: ", self.actions)
+
         observation, done = self.nethack.step(self.actions[action])
         is_game_over = observation[self._program_state_index][0] == 1
         if is_game_over or not self._allow_all_modes:

diff --git a/nle/env/tasks.py b/nle/env/tasks.py
@@ -140,6 +140,7 @@ def _is_episode_end(self, observation):
             neighbors = glyphs[y - 1 : y + 2, x - 1 : x + 2]
             if np.any(nethack.glyph_is_pet(neighbors)):
                 return self.StepStatus.TASK_SUCCESSFUL
+
         return self.StepStatus.RUNNING
 
 
@@ -157,6 +158,7 @@ def __init__(self, *args, **kwargs):
             if nethack.permonst(nethack.glyph_to_mon(glyph)).mname == "Oracle":
                 self.oracle_glyph = glyph
                 break
+
         assert self.oracle_glyph is not None
 
     def _is_episode_end(self, observation):
@@ -167,6 +169,7 @@ def _is_episode_end(self, observation):
         neighbors = glyphs[y - 1 : y + 2, x - 1 : x + 2]
         if np.any(neighbors == self.oracle_glyph):
             return self.StepStatus.TASK_SUCCESSFUL
+
         return self.StepStatus.RUNNING
 
 
@@ -278,9 +281,11 @@ def _reward_fn(self, last_observation, action, observation, end_status):
         explored_old = 0
         if key in self.dungeon_explored:
             explored_old = self.dungeon_explored[key]
+
         reward = explored - explored_old
         self.dungeon_explored[key] = explored
         time_penalty = self._get_time_penalty(last_observation, observation)
+
         return reward + time_penalty
 
 

diff --git a/nle/scripts/play.py b/nle/scripts/play.py
@@ -16,6 +16,10 @@
 import nle  # noqa: F401
 from nle import nethack
 
+import nle.visualization.utils as vis_utils
+from nle.nethack import actions as A
+
+
 _ACTIONS = tuple(
     [nethack.MiscAction.MORE]
     + list(nethack.CompassDirection)
@@ -53,6 +57,7 @@ def get_action(env, is_raw_env):
         while True:
             with no_echo():
                 ch = ord(os.read(0, 1))
+
             if ch in [nethack.C("c")]:
                 print("Received exit code {}. Aborting.".format(ch))
                 return None
@@ -61,6 +66,7 @@ def get_action(env, is_raw_env):
                     action = ch
                 else:
                     action = env.actions.index(ch)
+
                 break
             except ValueError:
                 print(
@@ -69,7 +75,9 @@ def get_action(env, is_raw_env):
                 )
                 if not FLAGS.print_frames_separately:
                     print("\033[2A")  # Go up 2 lines.
+
                 continue
+
     return action
 
 
@@ -82,7 +90,7 @@ def play():
             ttyrec = os.path.join(FLAGS.savedir, "nle.ttyrec.bz2")
         else:
             ttyrec = "/dev/null"
-        env = nethack.Nethack(ttyrec=ttyrec, wizard=FLAGS.wizard)
+        env = nethack.Nethack(ttyrec=ttyrec, wizard=FLAGS.wizard, character=FLAGS.character)
     else:
         env = gym.make(
             FLAGS.env,
@@ -92,7 +100,9 @@ def play():
             allow_all_yn_questions=True,
             allow_all_modes=True,
             wizard=FLAGS.wizard,
+            character=FLAGS.character
         )
+
         if FLAGS.seeds is not None:
             env.seed(FLAGS.seeds)
 
@@ -109,6 +119,28 @@ def play():
     total_start_time = timeit.default_timer()
     start_time = total_start_time
 
+    agent = vis_utils.Agent()
+    last_obs = obs
+    action_history = list()
+    message_history = list()
+    popup_history = list()
+    attribute_flag = False
+
+    glyphs = obs['glyphs']
+    blstats = vis_utils.BLStats(*obs['blstats'][:-1])
+    agent.update_blstats(blstats)
+
+    tty_chars = bytes(obs['tty_chars'].reshape(-1)).decode('ascii')
+    if "attribute" in tty_chars:
+        gender, race, role, alignment = vis_utils.parse_attribute(tty_chars)
+        agent.update_chracters(role, alignment, race, gender)
+
+    agent.update_items(obs['inv_letters'], obs['inv_glyphs'], obs['inv_strs'], obs['inv_oclasses'])
+
+    message, popup, _ = vis_utils.get_message_and_popup(last_obs)
+
+    vis_utils.draw_all(glyphs, agent, last_obs, steps, action_history, message_history, popup_history)
+
     while True:
         if not FLAGS.no_render:
             if not is_raw_env:
@@ -117,7 +149,7 @@ def play():
                 act_str = repr(env.actions[action]) if action is not None else ""
                 print(f"Previous action: {str(act_str):64s}")
                 print("-" * 8)
-                env.render(FLAGS.render_mode)
+                #env.render(FLAGS.render_mode)
                 print("-" * 8)
                 print(obs["blstats"])
                 if not FLAGS.print_frames_separately:
@@ -130,25 +162,60 @@ def play():
                         obs["tty_chars"], obs["tty_colors"], obs["tty_cursor"]
                     )
                 )
+
                 if not FLAGS.print_frames_separately:
                     go_back(num_lines=len(obs["tty_chars"]) + 3)
 
         action = get_action(env, is_raw_env)
 
+        if attribute_flag == False:
+            action = env.actions.index(A.Command.ATTRIBUTES)
+            attribute_flag = True
+
+        #print("action: ", action)
+        action_history.append(str(env.actions[int(action)]))
+
         if action is None:
             break
 
         if is_raw_env:
             obs, done = env.step(action)
         else:
             obs, reward, done, info = env.step(action)
+
         steps += 1
 
+        #print("obs: ", obs)
+
+        glyphs = obs['glyphs']
+
+        tty_chars = bytes(obs['tty_chars'].reshape(-1)).decode('ascii')
+        if "attribute" in tty_chars:
+            gender, race, role, alignment = vis_utils.parse_attribute(tty_chars)
+            agent.update_chracters(role, alignment, race, gender)
+
+        blstats = vis_utils.BLStats(*obs['blstats'][:-1])
+        agent.update_blstats(blstats)
+
+        agent.update_items(obs['inv_letters'], obs['inv_glyphs'], obs['inv_strs'], obs['inv_oclasses'])
+
+        #messages = bytes(obs['message']).decode('ascii')
+        message, popup, _ = vis_utils.get_message_and_popup(last_obs)
+        #print("popup: ", popup)
+
+        message_history.append(message)
+        popup_history.append(popup)
+        #print("message: ", message)
+        #print("")
+
+        vis_utils.draw_all(glyphs, agent, last_obs, steps, action_history, message_history, popup_history)
+
         if is_raw_env:
             done = done or steps >= FLAGS.max_steps  # NLE does this by default.
         else:
             mean_reward += (reward - mean_reward) / steps
 
+        last_obs = obs
         if not done:
             continue
 
@@ -172,7 +239,9 @@ def play():
 
         if episodes == FLAGS.ngames:
             break
+
         env.reset()
+
     env.close()
     print(
         "Finished after %i episodes and %f seconds. Mean sps: %f"
@@ -252,6 +321,13 @@ def main():
         action="store_true",
         help="Use wizard mode.",
     )
+    parser.add_argument(
+        "--character",
+        default="kni-hum-neu-mal",
+        help="(str): name of character. "
+        "Defaults to 'mon-hum-neu-mal'.",
+    )
+
     global FLAGS
     FLAGS = parser.parse_args()
 

diff --git a/nle/visualization/3.6.1tiles32.png b/nle/visualization/3.6.1tiles32.png