Skip to content
This repository has been archived by the owner on Apr 18, 2024. It is now read-only.

Feature: change the hands of players #40

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions hanabi_learning_environment/hanabi_lib/hanabi_hand.cc
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,11 @@ void HanabiHand::AddCard(HanabiCard card,
card_knowledge_.push_back(initial_knowledge);
}

void HanabiHand::InsertCard(HanabiCard card, int card_index) {
REQUIRE(card.IsValid());
cards_.insert(cards_.begin() + card_index,card);
}

void HanabiHand::RemoveFromHand(int card_index,
std::vector<HanabiCard>* discard_pile) {
if (discard_pile != nullptr) {
Expand All @@ -93,6 +98,11 @@ void HanabiHand::RemoveFromHand(int card_index,
card_knowledge_.erase(card_knowledge_.begin() + card_index);
}

void HanabiHand::ReturnFromHand(int card_index) {
// Adding to deck is handled by ApplyMove in hanabi_state
cards_.erase(cards_.begin() + card_index);
}

uint8_t HanabiHand::RevealColor(const int color) {
uint8_t mask = 0;
assert(cards_.size() <= 8); // More than 8 cards is currently not supported.
Expand Down
4 changes: 4 additions & 0 deletions hanabi_learning_environment/hanabi_lib/hanabi_hand.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,9 +102,13 @@ class HanabiHand {
return card_knowledge_;
}
void AddCard(HanabiCard card, const CardKnowledge& initial_knowledge);
// Insert the specified card while maintaining knowledge about the card
void InsertCard(HanabiCard card, int card_index);
// Remove card_index card from hand. Put in discard_pile if not nullptr
// (pushes the card to the back of the discard_pile vector).
void RemoveFromHand(int card_index, std::vector<HanabiCard>* discard_pile);
// Remove card_index card from hand and put it back into the deck.
void ReturnFromHand(int card_index);
// Make cards with the given rank visible.
// Returns new information bitmask, bit_i set if card_i color was revealed
// and was previously unknown.
Expand Down
14 changes: 14 additions & 0 deletions hanabi_learning_environment/hanabi_lib/hanabi_move.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,12 @@ bool HanabiMove::operator==(const HanabiMove& other_move) const {
case kRevealRank:
return TargetOffset() == other_move.TargetOffset() &&
Rank() == other_move.Rank();
case kDealSpecific:
case kDeal:
return Color() == other_move.Color() && Rank() == other_move.Rank();
case kReturn:
return CardIndex() == other_move.CardIndex() &&
TargetOffset() == other_move.TargetOffset();
default:
return true;
}
Expand All @@ -58,6 +62,16 @@ std::string HanabiMove::ToString() const {
} else {
return std::string("(Deal XX)");
}
case kDealSpecific:
if (color_ >= 0) {
return std::string("(Deal ") + ColorIndexToChar(Color()) +
RankIndexToChar(Rank()) + ")";
} else {
return std::string("(Deal XX)");
}
case kReturn:
return "(Return " + std::to_string(CardIndex()) + "from Player " +
std::to_string(TargetOffset()) + ")";
default:
return "(INVALID)";
}
Expand Down
2 changes: 1 addition & 1 deletion hanabi_learning_environment/hanabi_lib/hanabi_move.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ namespace hanabi_learning_env {
class HanabiMove {
// HanabiMove is small, and intended to be passed by value.
public:
enum Type { kInvalid, kPlay, kDiscard, kRevealColor, kRevealRank, kDeal };
enum Type { kInvalid, kPlay, kDiscard, kRevealColor, kRevealRank, kDeal, kReturn, kDealSpecific};

HanabiMove(Type move_type, int8_t card_index, int8_t target_offset,
int8_t color, int8_t rank)
Expand Down
30 changes: 28 additions & 2 deletions hanabi_learning_environment/hanabi_lib/hanabi_state.cc
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,12 @@ HanabiCard HanabiState::HanabiDeck::DealCard(int color, int rank) {
return HanabiCard(IndexToColor(index), IndexToRank(index));
}

void HanabiState::HanabiDeck::ReturnCard(int color, int rank) {
int index = CardToIndex(color, rank);
++card_count_[index];
++total_count_;
}

HanabiState::HanabiState(const HanabiGame* parent_game, int start_player)
: parent_game_(parent_game),
deck_(*parent_game),
Expand Down Expand Up @@ -165,6 +171,7 @@ int HanabiState::PlayerToDeal() const {

bool HanabiState::MoveIsLegal(HanabiMove move) const {
switch (move.MoveType()) {
case HanabiMove::kDealSpecific:
case HanabiMove::kDeal:
if (cur_player_ != kChancePlayerId) {
return false;
Expand Down Expand Up @@ -212,6 +219,11 @@ bool HanabiState::MoveIsLegal(HanabiMove move) const {
}
break;
}
case HanabiMove::kReturn:
if (move.CardIndex() >= hands_[move.TargetOffset()].Cards().size()) {
return false;
}
break;
default:
return false;
}
Expand All @@ -220,7 +232,10 @@ bool HanabiState::MoveIsLegal(HanabiMove move) const {

void HanabiState::ApplyMove(HanabiMove move) {
REQUIRE(MoveIsLegal(move));
if (deck_.Empty()) {
// Special moves are virtual moves used to manipulate the game.
bool special_move = move.MoveType() == HanabiMove::kDealSpecific ||
move.MoveType() == HanabiMove::kReturn;
if (deck_.Empty() && !special_move) {
--turns_to_play_;
}
HanabiHistoryItem history(move);
Expand All @@ -239,12 +254,21 @@ void HanabiState::ApplyMove(HanabiMove move) {
card_knowledge);
}
break;
case HanabiMove::kDealSpecific:
hands_[move.TargetOffset()].InsertCard(
deck_.DealCard(move.Color(), move.Rank()), move.CardIndex());
break;
case HanabiMove::kDiscard:
history.information_token = IncrementInformationTokens();
history.color = hands_[cur_player_].Cards()[move.CardIndex()].Color();
history.rank = hands_[cur_player_].Cards()[move.CardIndex()].Rank();
hands_[cur_player_].RemoveFromHand(move.CardIndex(), &discard_pile_);
break;
case HanabiMove::kReturn:
deck_.ReturnCard(hands_[move.TargetOffset()].Cards()[move.CardIndex()].Color()
,hands_[move.TargetOffset()].Cards()[move.CardIndex()].Rank());
hands_[move.TargetOffset()].ReturnFromHand(move.CardIndex());
break;
case HanabiMove::kPlay:
history.color = hands_[cur_player_].Cards()[move.CardIndex()].Color();
history.rank = hands_[cur_player_].Cards()[move.CardIndex()].Rank();
Expand All @@ -270,7 +294,9 @@ void HanabiState::ApplyMove(HanabiMove move) {
default:
std::abort(); // Should not be possible.
}
move_history_.push_back(history);
if(!special_move){
move_history_.push_back(history);
}
AdvanceToNextPlayer();
}

Expand Down
1 change: 1 addition & 0 deletions hanabi_learning_environment/hanabi_lib/hanabi_state.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ class HanabiState {
public:
explicit HanabiDeck(const HanabiGame& game);
// DealCard returns invalid card on failure.
void ReturnCard(int color, int rank);
HanabiCard DealCard(int color, int rank);
HanabiCard DealCard(std::mt19937* rng);
int Size() const { return total_count_; }
Expand Down
15 changes: 15 additions & 0 deletions hanabi_learning_environment/pyhanabi.cc
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,14 @@ int MoveRank(pyhanabi_move_t* move) {
->Rank();
}

bool GetDealSpecificMove(int card_index, int player, int color, int rank,
pyhanabi_move_t* move) {
REQUIRE(move != nullptr);
move->move = new hanabi_learning_env::HanabiMove(
hanabi_learning_env::HanabiMove::kDealSpecific, card_index, player, color, rank);
return move->move != nullptr;
}

bool GetDiscardMove(int card_index, pyhanabi_move_t* move) {
REQUIRE(move != nullptr);
move->move = new hanabi_learning_env::HanabiMove(
Expand All @@ -179,6 +187,13 @@ bool GetPlayMove(int card_index, pyhanabi_move_t* move) {
return move->move != nullptr;
}

bool GetReturnMove(int card_index,int player, pyhanabi_move_t* move) {
REQUIRE(move != nullptr);
move->move = new hanabi_learning_env::HanabiMove(
hanabi_learning_env::HanabiMove::kReturn, card_index, player, -1, -1);
return move->move != nullptr;
}

bool GetRevealColorMove(int target_offset, int color, pyhanabi_move_t* move) {
REQUIRE(move != nullptr);
move->move = new hanabi_learning_env::HanabiMove(
Expand Down
3 changes: 3 additions & 0 deletions hanabi_learning_environment/pyhanabi.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,11 @@ int CardIndex(pyhanabi_move_t* move);
int TargetOffset(pyhanabi_move_t* move);
int MoveColor(pyhanabi_move_t* move);
int MoveRank(pyhanabi_move_t* move);
bool GetDealSpecificMove(int card_index, int player, int color, int rank,
pyhanabi_move_t* move);
bool GetDiscardMove(int card_index, pyhanabi_move_t* move);
bool GetPlayMove(int card_index, pyhanabi_move_t* move);
bool GetReturnMove(int card_index, int player, pyhanabi_move_t* move);
bool GetRevealColorMove(int target_offset, int color, pyhanabi_move_t* move);
bool GetRevealRankMove(int target_offset, int rank, pyhanabi_move_t* move);

Expand Down
47 changes: 47 additions & 0 deletions hanabi_learning_environment/pyhanabi.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,8 @@ class HanabiMoveType(enum.IntEnum):
REVEAL_COLOR = 3
REVEAL_RANK = 4
DEAL = 5
RETURN = 6
DEAL_SPECIFIC = 7


class HanabiMove(object):
Expand Down Expand Up @@ -326,6 +328,12 @@ def rank(self):
"""Returns 0-based rank index for REVEAL_RANK and DEAL moves."""
return lib.MoveRank(self._move)

@staticmethod
def get_deal_specific_move(card_index, player, color, rank):
c_move = ffi.new("pyhanabi_move_t*")
assert lib.GetDealSpecificMove(card_index, player, color, rank, c_move)
return HanabiMove(c_move)

@staticmethod
def get_discard_move(card_index):
c_move = ffi.new("pyhanabi_move_t*")
Expand All @@ -338,6 +346,12 @@ def get_play_move(card_index):
assert lib.GetPlayMove(card_index, c_move)
return HanabiMove(c_move)

@staticmethod
def get_return_move(card_index, player):
c_move = ffi.new("pyhanabi_move_t*")
assert lib.GetReturnMove(card_index, player, c_move)
return HanabiMove(c_move)

@staticmethod
def get_reveal_color_move(target_offset, color):
"""current player is 0, next player clockwise is target_offset 1, etc."""
Expand Down Expand Up @@ -390,6 +404,11 @@ def to_dict(self):
elif move_type == HanabiMoveType.DEAL:
move_dict["color"] = color_idx_to_char(self.color())
move_dict["rank"] = self.rank()
elif move_type == HanabiMoveType.DEAL_SPECIFIC:
move_dict["color"] = color_idx_to_char(self.color())
move_dict["rank"] = self.rank()
elif move_type == HanabiMoveType.RETURN:
move_dict["card_index"] = self.card_index()
else:
raise ValueError("Unsupported move: {}".format(self))

Expand Down Expand Up @@ -568,6 +587,34 @@ def deal_random_card(self):
"""If cur_player == CHANCE_PLAYER_ID, make a random card-deal move."""
lib.StateDealRandomCard(self._state)

def deal_specific_card(self, player_id, color, rank, card_index):
"""If cur_player == CHANCE_PLAYER_ID, make a specific card-deal move."""
assert self.cur_player() == CHANCE_PLAYER_ID
move = HanabiMove.get_deal_specific_move(card_index, player_id, color, rank)
self.apply_move(move)

def return_card(self, player_id, card_index):
"""Return the specific card from the hand of the specified player to the
deck, but does not remove knowledge."""
hand_size = lib.StateGetHandSize(self._state, player_id)
assert card_index < hand_size
move = HanabiMove.get_return_move(card_index=card_index, player=player_id)
self.apply_move(move)

def set_hand(self, player_id, hand):
"""Set the hand of the specified player to the specified hand given in the
same manner as in the observations vector.

hand arg is a list of dict with keys 'color' with string value and 'rank'
with int value."""
hand_size = lib.StateGetHandSize(self._state, player_id)
for _ in range(hand_size):
self.return_card(player_id, 0)
for card_index, card in enumerate(hand):
color = color_char_to_idx(card["color"])
rank = card["rank"]
self.deal_specific_card(player_id, color, rank, card_index)

def player_hands(self):
"""Returns a list of all hands, with cards ordered oldest to newest."""
hand_list = []
Expand Down