google-deepmind · Theomat · Dec 7, 2020
diff --git a/hanabi_learning_environment/hanabi_lib/hanabi_hand.cc b/hanabi_learning_environment/hanabi_lib/hanabi_hand.cc
@@ -84,6 +84,11 @@ void HanabiHand::AddCard(HanabiCard card,
   card_knowledge_.push_back(initial_knowledge);
 }
 
+void HanabiHand::InsertCard(HanabiCard card, int card_index) {
+  REQUIRE(card.IsValid());
+  cards_.insert(cards_.begin() + card_index,card);
+}
+
 void HanabiHand::RemoveFromHand(int card_index,
                                 std::vector<HanabiCard>* discard_pile) {
   if (discard_pile != nullptr) {
@@ -93,6 +98,11 @@ void HanabiHand::RemoveFromHand(int card_index,
   card_knowledge_.erase(card_knowledge_.begin() + card_index);
 }
 
+void HanabiHand::ReturnFromHand(int card_index) {
+  // Adding to deck is handled by ApplyMove in hanabi_state
+  cards_.erase(cards_.begin() + card_index);
+}
+
 uint8_t HanabiHand::RevealColor(const int color) {
   uint8_t mask = 0;
   assert(cards_.size() <= 8);  // More than 8 cards is currently not supported.

diff --git a/hanabi_learning_environment/hanabi_lib/hanabi_hand.h b/hanabi_learning_environment/hanabi_lib/hanabi_hand.h
@@ -102,9 +102,13 @@ class HanabiHand {
     return card_knowledge_;
   }
   void AddCard(HanabiCard card, const CardKnowledge& initial_knowledge);
+  // Insert the specified card while maintaining knowledge about the card
+  void InsertCard(HanabiCard card, int card_index);
   // Remove card_index card from hand. Put in discard_pile if not nullptr
   // (pushes the card to the back of the discard_pile vector).
   void RemoveFromHand(int card_index, std::vector<HanabiCard>* discard_pile);
+  // Remove card_index card from hand and put it back into the deck.
+  void ReturnFromHand(int card_index);
   // Make cards with the given rank visible.
   // Returns new information bitmask, bit_i set if card_i color was revealed
   // and was previously unknown.

diff --git a/hanabi_learning_environment/hanabi_lib/hanabi_move.cc b/hanabi_learning_environment/hanabi_lib/hanabi_move.cc
@@ -32,8 +32,12 @@ bool HanabiMove::operator==(const HanabiMove& other_move) const {
     case kRevealRank:
       return TargetOffset() == other_move.TargetOffset() &&
              Rank() == other_move.Rank();
+    case kDealSpecific:
     case kDeal:
       return Color() == other_move.Color() && Rank() == other_move.Rank();
+    case kReturn:
+      return CardIndex() == other_move.CardIndex() &&
+             TargetOffset() == other_move.TargetOffset();
     default:
       return true;
   }
@@ -58,6 +62,16 @@ std::string HanabiMove::ToString() const {
       } else {
         return std::string("(Deal XX)");
       }
+    case kDealSpecific:
+      if (color_ >= 0) {
+        return std::string("(Deal ") + ColorIndexToChar(Color()) +
+               RankIndexToChar(Rank()) + ")";
+      } else {
+        return std::string("(Deal XX)");
+      }
+      case kReturn:
+      return "(Return " +  std::to_string(CardIndex()) + "from Player " +
+             std::to_string(TargetOffset()) + ")";
     default:
       return "(INVALID)";
   }

diff --git a/hanabi_learning_environment/hanabi_lib/hanabi_move.h b/hanabi_learning_environment/hanabi_lib/hanabi_move.h
@@ -31,7 +31,7 @@ namespace hanabi_learning_env {
 class HanabiMove {
   // HanabiMove is small, and intended to be passed by value.
  public:
-  enum Type { kInvalid, kPlay, kDiscard, kRevealColor, kRevealRank, kDeal };
+  enum Type { kInvalid, kPlay, kDiscard, kRevealColor, kRevealRank, kDeal, kReturn, kDealSpecific};
 
   HanabiMove(Type move_type, int8_t card_index, int8_t target_offset,
              int8_t color, int8_t rank)

diff --git a/hanabi_learning_environment/hanabi_lib/hanabi_state.cc b/hanabi_learning_environment/hanabi_lib/hanabi_state.cc
@@ -87,6 +87,12 @@ HanabiCard HanabiState::HanabiDeck::DealCard(int color, int rank) {
   return HanabiCard(IndexToColor(index), IndexToRank(index));
 }
 
+void HanabiState::HanabiDeck::ReturnCard(int color, int rank) {
+  int index = CardToIndex(color, rank);
+  ++card_count_[index];
+  ++total_count_;
+}
+
 HanabiState::HanabiState(const HanabiGame* parent_game, int start_player)
     : parent_game_(parent_game),
       deck_(*parent_game),
@@ -165,6 +171,7 @@ int HanabiState::PlayerToDeal() const {
 
 bool HanabiState::MoveIsLegal(HanabiMove move) const {
   switch (move.MoveType()) {
+    case HanabiMove::kDealSpecific:
     case HanabiMove::kDeal:
       if (cur_player_ != kChancePlayerId) {
         return false;
@@ -212,6 +219,11 @@ bool HanabiState::MoveIsLegal(HanabiMove move) const {
       }
       break;
     }
+    case HanabiMove::kReturn:
+      if (move.CardIndex() >= hands_[move.TargetOffset()].Cards().size()) {
+        return false;
+      }
+      break;
     default:
       return false;
   }
@@ -220,7 +232,10 @@ bool HanabiState::MoveIsLegal(HanabiMove move) const {
 
 void HanabiState::ApplyMove(HanabiMove move) {
   REQUIRE(MoveIsLegal(move));
-  if (deck_.Empty()) {
+  // Special moves are virtual moves used to manipulate the game.
+  bool special_move = move.MoveType() == HanabiMove::kDealSpecific ||
+                      move.MoveType() == HanabiMove::kReturn;
+  if (deck_.Empty() && !special_move) {
     --turns_to_play_;
   }
   HanabiHistoryItem history(move);
@@ -239,12 +254,21 @@ void HanabiState::ApplyMove(HanabiMove move) {
             card_knowledge);
       }
       break;
+    case HanabiMove::kDealSpecific:
+      hands_[move.TargetOffset()].InsertCard(
+          deck_.DealCard(move.Color(), move.Rank()), move.CardIndex());
+      break;
     case HanabiMove::kDiscard:
       history.information_token = IncrementInformationTokens();
       history.color = hands_[cur_player_].Cards()[move.CardIndex()].Color();
       history.rank = hands_[cur_player_].Cards()[move.CardIndex()].Rank();
       hands_[cur_player_].RemoveFromHand(move.CardIndex(), &discard_pile_);
       break;
+    case HanabiMove::kReturn:
+      deck_.ReturnCard(hands_[move.TargetOffset()].Cards()[move.CardIndex()].Color()
+                      ,hands_[move.TargetOffset()].Cards()[move.CardIndex()].Rank());
+      hands_[move.TargetOffset()].ReturnFromHand(move.CardIndex());
+      break;
     case HanabiMove::kPlay:
       history.color = hands_[cur_player_].Cards()[move.CardIndex()].Color();
       history.rank = hands_[cur_player_].Cards()[move.CardIndex()].Rank();
@@ -270,7 +294,9 @@ void HanabiState::ApplyMove(HanabiMove move) {
     default:
       std::abort();  // Should not be possible.
   }
-  move_history_.push_back(history);
+  if(!special_move){
+    move_history_.push_back(history);
+  }
   AdvanceToNextPlayer();
 }
 

diff --git a/hanabi_learning_environment/hanabi_lib/hanabi_state.h b/hanabi_learning_environment/hanabi_lib/hanabi_state.h
@@ -35,6 +35,7 @@ class HanabiState {
    public:
     explicit HanabiDeck(const HanabiGame& game);
     // DealCard returns invalid card on failure.
+    void ReturnCard(int color, int rank);
     HanabiCard DealCard(int color, int rank);
     HanabiCard DealCard(std::mt19937* rng);
     int Size() const { return total_count_; }

diff --git a/hanabi_learning_environment/pyhanabi.cc b/hanabi_learning_environment/pyhanabi.cc
@@ -165,6 +165,14 @@ int MoveRank(pyhanabi_move_t* move) {
       ->Rank();
 }
 
+bool GetDealSpecificMove(int card_index, int player, int color, int rank,
+                         pyhanabi_move_t* move) {
+  REQUIRE(move != nullptr);
+  move->move = new hanabi_learning_env::HanabiMove(
+      hanabi_learning_env::HanabiMove::kDealSpecific, card_index, player, color, rank);
+  return move->move != nullptr;
+}
+
 bool GetDiscardMove(int card_index, pyhanabi_move_t* move) {
   REQUIRE(move != nullptr);
   move->move = new hanabi_learning_env::HanabiMove(
@@ -179,6 +187,13 @@ bool GetPlayMove(int card_index, pyhanabi_move_t* move) {
   return move->move != nullptr;
 }
 
+bool GetReturnMove(int card_index,int player, pyhanabi_move_t* move) {
+  REQUIRE(move != nullptr);
+  move->move = new hanabi_learning_env::HanabiMove(
+      hanabi_learning_env::HanabiMove::kReturn, card_index, player, -1, -1);
+  return move->move != nullptr;
+}
+
 bool GetRevealColorMove(int target_offset, int color, pyhanabi_move_t* move) {
   REQUIRE(move != nullptr);
   move->move = new hanabi_learning_env::HanabiMove(

diff --git a/hanabi_learning_environment/pyhanabi.h b/hanabi_learning_environment/pyhanabi.h
@@ -89,8 +89,11 @@ int CardIndex(pyhanabi_move_t* move);
 int TargetOffset(pyhanabi_move_t* move);
 int MoveColor(pyhanabi_move_t* move);
 int MoveRank(pyhanabi_move_t* move);
+bool GetDealSpecificMove(int card_index, int player, int color, int rank,
+                         pyhanabi_move_t* move);
 bool GetDiscardMove(int card_index, pyhanabi_move_t* move);
 bool GetPlayMove(int card_index, pyhanabi_move_t* move);
+bool GetReturnMove(int card_index, int player, pyhanabi_move_t* move);
 bool GetRevealColorMove(int target_offset, int color, pyhanabi_move_t* move);
 bool GetRevealRankMove(int target_offset, int rank, pyhanabi_move_t* move);
 

diff --git a/hanabi_learning_environment/pyhanabi.py b/hanabi_learning_environment/pyhanabi.py
@@ -291,6 +291,8 @@ class HanabiMoveType(enum.IntEnum):
   REVEAL_COLOR = 3
   REVEAL_RANK = 4
   DEAL = 5
+  RETURN = 6
+  DEAL_SPECIFIC = 7
 
 
 class HanabiMove(object):
@@ -326,6 +328,12 @@ def rank(self):
     """Returns 0-based rank index for REVEAL_RANK and DEAL moves."""
     return lib.MoveRank(self._move)
 
+  @staticmethod
+  def get_deal_specific_move(card_index, player, color, rank):
+    c_move = ffi.new("pyhanabi_move_t*")
+    assert lib.GetDealSpecificMove(card_index, player, color, rank, c_move)
+    return HanabiMove(c_move)
+
   @staticmethod
   def get_discard_move(card_index):
     c_move = ffi.new("pyhanabi_move_t*")
@@ -338,6 +346,12 @@ def get_play_move(card_index):
     assert lib.GetPlayMove(card_index, c_move)
     return HanabiMove(c_move)
 
+  @staticmethod
+  def get_return_move(card_index, player):
+    c_move = ffi.new("pyhanabi_move_t*")
+    assert lib.GetReturnMove(card_index, player, c_move)
+    return HanabiMove(c_move)
+
   @staticmethod
   def get_reveal_color_move(target_offset, color):
     """current player is 0, next player clockwise is target_offset 1, etc."""
@@ -390,6 +404,11 @@ def to_dict(self):
     elif move_type == HanabiMoveType.DEAL:
       move_dict["color"] = color_idx_to_char(self.color())
       move_dict["rank"] = self.rank()
+    elif move_type == HanabiMoveType.DEAL_SPECIFIC:
+      move_dict["color"] = color_idx_to_char(self.color())
+      move_dict["rank"] = self.rank()
+    elif move_type == HanabiMoveType.RETURN:
+      move_dict["card_index"] = self.card_index()
     else:
       raise ValueError("Unsupported move: {}".format(self))
 
@@ -568,6 +587,34 @@ def deal_random_card(self):
     """If cur_player == CHANCE_PLAYER_ID, make a random card-deal move."""
     lib.StateDealRandomCard(self._state)
 
+  def deal_specific_card(self, player_id, color, rank, card_index):
+    """If cur_player == CHANCE_PLAYER_ID, make a specific card-deal move."""
+    assert self.cur_player() == CHANCE_PLAYER_ID
+    move = HanabiMove.get_deal_specific_move(card_index, player_id, color, rank)
+    self.apply_move(move)
+
+  def return_card(self, player_id, card_index):
+    """Return the specific card from the hand of the specified player to the
+     deck, but does not remove knowledge."""
+    hand_size = lib.StateGetHandSize(self._state, player_id)
+    assert card_index < hand_size
+    move = HanabiMove.get_return_move(card_index=card_index, player=player_id)
+    self.apply_move(move)
+
+  def set_hand(self, player_id, hand):
+    """Set the hand of the specified player to the specified hand given in the
+    same manner as in the observations vector.
+
+    hand arg is a list of dict with keys 'color' with string value and 'rank'
+    with int value."""
+    hand_size = lib.StateGetHandSize(self._state, player_id)
+    for _ in range(hand_size):
+        self.return_card(player_id, 0)
+    for card_index, card in enumerate(hand):
+        color = color_char_to_idx(card["color"])
+        rank = card["rank"]
+        self.deal_specific_card(player_id, color, rank, card_index)
+
   def player_hands(self):
     """Returns a list of all hands, with cards ordered oldest to newest."""
     hand_list = []