Skip to content

Commit

Permalink
Merge pull request #1202 from google-deepmind:lanctot-v1.5-python3.12
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 624921284
Change-Id: Ib03c3ed3797ed1e1e75ba7cb6e35e980cfb31058
  • Loading branch information
lanctot committed Apr 15, 2024
2 parents d2326da + cb36957 commit e5d1f09
Show file tree
Hide file tree
Showing 12 changed files with 90 additions and 28 deletions.
17 changes: 14 additions & 3 deletions .github/workflows/actions.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,20 @@ jobs:
strategy:
matrix:
include:
# Most current platform.
# Most current platforms and Python versions.
# TODO: change this first one to Ubuntu 24.04 when ready
- os: ubuntu-22.04
OS_PYTHON_VERSION: "3.12"
DEFAULT_OPTIONAL_DEPENDENCY: "OFF"
BUILD_SHARED_LIB: "OFF"
OPEN_SPIEL_BUILD_WITH_ORTOOLS: "OFF"
OPEN_SPIEL_BUILD_WITH_ORTOOLS_DOWNLOAD_URL: ""
- os: macos-14
OS_PYTHON_VERSION: "3.12"
DEFAULT_OPTIONAL_DEPENDENCY: "OFF"
BUILD_SHARED_LIB: "OFF"
OPEN_SPIEL_BUILD_WITH_ORTOOLS: "OFF"
OPEN_SPIEL_BUILD_WITH_ORTOOLS_DOWNLOAD_URL: ""
- os: ubuntu-22.04
OS_PYTHON_VERSION: "3.11"
DEFAULT_OPTIONAL_DEPENDENCY: "ON"
Expand Down Expand Up @@ -39,14 +52,12 @@ jobs:
OPEN_SPIEL_BUILD_WITH_ORTOOLS: "OFF"
OPEN_SPIEL_BUILD_WITH_ORTOOLS_DOWNLOAD_URL: ""
# Standard or older platforms with older Python versions.
# Older Python version on Ubuntu 20.04
- os: macos-12
OS_PYTHON_VERSION: "3.9"
DEFAULT_OPTIONAL_DEPENDENCY: "OFF"
BUILD_SHARED_LIB: "OFF"
OPEN_SPIEL_BUILD_WITH_ORTOOLS: "OFF"
OPEN_SPIEL_BUILD_WITH_ORTOOLS_DOWNLOAD_URL: ""
# Older Python version on Ubuntu 20.04
- os: ubuntu-20.04
OS_PYTHON_VERSION: "3.9"
DEFAULT_OPTIONAL_DEPENDENCY: "ON"
Expand Down
17 changes: 13 additions & 4 deletions open_spiel/algorithms/oos_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,12 @@

#include "open_spiel/algorithms/oos.h"

#include <memory>
#include <utility>
#include <vector>

#include "open_spiel/algorithms/tabular_exploitability.h"
#include "open_spiel/policy.h"
#include "open_spiel/spiel.h"
#include "open_spiel/spiel_utils.h"

Expand All @@ -41,6 +44,8 @@ namespace open_spiel {
namespace algorithms {
namespace {

constexpr double kFloatTolerance = 1e-10;

void EpsExploreSamplingPolicyTest() {
std::shared_ptr<const Game> game = LoadGame("kuhn_poker");

Expand Down Expand Up @@ -78,10 +83,14 @@ void EpsExploreSamplingPolicyTest() {
table[pl1_info_state].current_policy = current_policy;

auto p = ExplorativeSamplingPolicy(table, 0.4);
SPIEL_CHECK_EQ(p.GetStatePolicy(*card_to_player0), chn_3cards_dist);
SPIEL_CHECK_EQ(p.GetStatePolicy(*card_to_player1), chn_2cards_dist);
SPIEL_CHECK_EQ(p.GetStatePolicy(*player0_plays), expected_mix);
SPIEL_CHECK_EQ(p.GetStatePolicy(*player1_plays), expected_mix);
SPIEL_CHECK_TRUE(StatePoliciesEqual(p.GetStatePolicy(*card_to_player0),
chn_3cards_dist, kFloatTolerance));
SPIEL_CHECK_TRUE(StatePoliciesEqual(p.GetStatePolicy(*card_to_player1),
chn_2cards_dist, kFloatTolerance));
SPIEL_CHECK_TRUE(StatePoliciesEqual(p.GetStatePolicy(*player0_plays),
expected_mix, kFloatTolerance));
SPIEL_CHECK_TRUE(StatePoliciesEqual(p.GetStatePolicy(*player1_plays),
expected_mix, kFloatTolerance));
}

std::vector<std::unique_ptr<State>> CollectStatesInGame(
Expand Down
2 changes: 1 addition & 1 deletion open_spiel/algorithms/tabular_exploitability.cc
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ double NashConv(const Game& game, const Policy& policy,
double nash_conv = 0;
for (auto p = Player{0}; p < game.NumPlayers(); ++p) {
double deviation_incentive = best_response_values[p] - on_policy_values[p];
if (deviation_incentive < -FloatingPointDefaultThresholdRatio()) {
if (deviation_incentive < -FloatingPointDefaultTolerance()) {
SpielFatalError(
absl::StrCat("Negative Nash deviation incentive for player ", p, ": ",
deviation_incentive, ". Does you game have imperfect ",
Expand Down
2 changes: 1 addition & 1 deletion open_spiel/games/pathfinding/pathfinding.cc
Original file line number Diff line number Diff line change
Expand Up @@ -542,7 +542,7 @@ int PathfindingGame::MaxChanceOutcomes() const {

double PathfindingGame::MinUtility() const {
// Add a small constant here due to numeral issues.
return horizon_ * step_reward_ - FloatingPointDefaultThresholdRatio();
return horizon_ * step_reward_ - FloatingPointDefaultTolerance();
}

double PathfindingGame::MaxUtility() const {
Expand Down
21 changes: 21 additions & 0 deletions open_spiel/policy.cc
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,27 @@ ActionsAndProbs ToDeterministicPolicy(const ActionsAndProbs& actions_and_probs,
return new_policy;
}

bool StatePoliciesEqual(const ActionsAndProbs& state_policy1,
const ActionsAndProbs& state_policy2,
double float_tolerance) {
if (state_policy1.size() != state_policy2.size()) {
return false;
}

for (int i = 0; i < state_policy1.size(); ++i) {
if (state_policy1[i].first != state_policy2[i].first) {
return false;
}

if (!Near(state_policy1[i].second, state_policy2[i].second,
float_tolerance)) {
return false;
}
}

return true;
}

ActionsAndProbs GetDeterministicPolicy(const std::vector<Action>& legal_actions,
Action action) {
ActionsAndProbs new_policy;
Expand Down
6 changes: 6 additions & 0 deletions open_spiel/policy.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,12 @@ ActionsAndProbs ToDeterministicPolicy(const ActionsAndProbs& actions_and_probs,
ActionsAndProbs GetDeterministicPolicy(const std::vector<Action>& legal_actions,
Action action);

// Check that two state policies are equal (within a float tolerance). Does an
// exact check, so the actions must be in the same order.
bool StatePoliciesEqual(const ActionsAndProbs& state_policy1,
const ActionsAndProbs& state_policy2,
double float_tolerance);

// A general policy object. A policy is a mapping from states to list of
// (action, prob) pairs for all the legal actions at the state.
class Policy {
Expand Down
9 changes: 6 additions & 3 deletions open_spiel/python/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -291,9 +291,11 @@ endif()
if (OPEN_SPIEL_ENABLE_TENSORFLOW)
set(PYTHON_TESTS ${PYTHON_TESTS}
algorithms/alpha_zero/evaluator_test.py
algorithms/alpha_zero/model_test.py
# Broken in Python 3.12. Must port to Keras 3. See https://github.com/google-deepmind/open_spiel/issues/1206.
# algorithms/alpha_zero/model_test.py
algorithms/deep_cfr_test.py
algorithms/deep_cfr_tf2_test.py
# Broken in Python 3.12. Must port to Keras 3. See https://github.com/google-deepmind/open_spiel/issues/1208.
# algorithms/deep_cfr_tf2_test.py
algorithms/discounted_cfr_test.py
algorithms/dqn_test.py
algorithms/eva_test.py
Expand All @@ -305,7 +307,8 @@ if (OPEN_SPIEL_ENABLE_TENSORFLOW)
algorithms/nfsp_test.py
algorithms/policy_gradient_test.py
algorithms/psro_v2/strategy_selectors_test.py
algorithms/rcfr_test.py
# Broken in Python 3.12. Must port to Keras 3. https://github.com/google-deepmind/open_spiel/issues/1207.
# algorithms/rcfr_test.py
)
if (OPEN_SPIEL_ENABLE_PYTHON_MISC)
set(PYTHON_TESTS ${PYTHON_TESTS}
Expand Down
11 changes: 7 additions & 4 deletions open_spiel/scripts/ci_script.sh
Original file line number Diff line number Diff line change
Expand Up @@ -41,19 +41,22 @@ PYBIN=`which $PYBIN`

source ./open_spiel/scripts/python_extra_deps.sh $PYBIN

${PYBIN} -m pip install --upgrade pip
${PYBIN} -m pip install --upgrade setuptools

if [[ "$OS" = "Linux" && ( "$OS_PYTHON_VERSION" = "3.9" || "$OS_PYTHON_VERSION" = "3.10" || "$OS_PYTHON_VERSION" = "3.11" ) ]]; then
if [[ "$OS" = "Linux" && ( "$OS_PYTHON_VERSION" = "3.9" || "$OS_PYTHON_VERSION" = "3.10" || "$OS_PYTHON_VERSION" = "3.11" || "$OS_PYTHON_VERSION" = "3.12" ) ]]; then
# Ubuntu 22.04 must execute the virtual env this way:
${PYBIN} -m venv ./venv
elif [[ "$OS" = "Darwin" && "$OS_PYTHON_VERSION" = "3.12" ]]; then
${PYBIN} -m venv ./venv
else
# Ubuntu 20.04 and earlier
${PYBIN} -m pip install virtualenv
virtualenv -p ${PYBIN} ./venv
fi

source ./venv/bin/activate

pip install --upgrade pip
pip install --upgrade setuptools

# Can use python and pip directly after here because we're in the virtual env

python --version
Expand Down
1 change: 0 additions & 1 deletion open_spiel/scripts/find_tensorflow.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@

read -r -d '' TESTSCRIPT << EOT
import tensorflow as tf
import tensorflow_probability
print(tf.__version__)
EOT

Expand Down
10 changes: 8 additions & 2 deletions open_spiel/scripts/install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,12 @@ if [[ "$OSTYPE" == "linux-gnu" ]]; then
echo "Adding Python 3.11 ppa repos"
sudo add-apt-repository ppa:deadsnakes/ppa
PYTHON_PKGS="python3.11 python3.11-dev python3-pip python3-setuptools python3-wheel python3-tk python3.11-venv"
elif [[ "$OS_PYTHON_VERSION" == "3.12" ]]; then
# Need to special-case this until it's installed by default.
# https://ubuntuhandbook.org/index.php/2023/05/install-python-3-12-ubuntu/
echo "Adding Python 3.12 ppa repos"
sudo add-apt-repository ppa:deadsnakes/ppa
PYTHON_PKGS="python3.12 python3.12-dev python3-pip python3-setuptools python3-wheel python3-tk python3.12-venv"
fi
EXT_DEPS="virtualenv clang cmake curl $PYTHON_PKGS"
if [[ ${OPEN_SPIEL_BUILD_WITH_GO:-"OFF"} == "ON" ]]; then
Expand Down Expand Up @@ -300,8 +306,8 @@ elif [[ "$OSTYPE" == "darwin"* ]]; then # Mac OSX
fi
fi
# Removed getting pip via git-pip.py. See #1200.
# brew install virtualenv # May be the required way to do this as of Python 3.12?
${PYBIN} -m pip install virtualenv
brew install virtualenv # May be the required way to do this as of Python 3.12?
# ${PYBIN} -m pip install virtualenv
else
echo "The OS '$OSTYPE' is not supported (Only Linux and MacOS is). " \
"Feel free to contribute the install for a new OS."
Expand Down
12 changes: 9 additions & 3 deletions open_spiel/scripts/python_extra_deps.sh
Original file line number Diff line number Diff line change
Expand Up @@ -51,17 +51,23 @@ verlt() {

echo "Set Python version: $PY_VER"
if verlt $PY_VER 3.10; then
echo "Python < 3.10 detected"
echo "Detected Python version < 3.10"
export OPEN_SPIEL_PYTHON_PYTORCH_DEPS="torch==1.13.1"
export OPEN_SPIEL_PYTHON_JAX_DEPS="jax==0.4.6 jaxlib==0.4.6 dm-haiku==0.0.10 optax==0.1.7 chex==0.1.7 rlax==0.1.5 distrax==0.1.3"
export OPEN_SPIEL_PYTHON_TENSORFLOW_DEPS="numpy==1.23.5 tensorflow==2.13.1 tensorflow-probability==0.19.0 tensorflow_datasets==4.9.2 keras==2.13.1"
export OPEN_SPIEL_PYTHON_MISC_DEPS="IPython==5.8.0 networkx==2.4 matplotlib==3.5.2 mock==4.0.2 nashpy==0.0.19 scipy==1.10.1 testresources==2.0.1 cvxopt==1.3.1 cvxpy==1.2.0 ecos==2.0.10 osqp==0.6.2.post5 clu==0.0.6 flax==0.5.3"
else
echo "Python >= 3.10 detected"
elif verlt $PY_VER 3.12; then
echo "Detected Python version in {3.10, 3.11}"
export OPEN_SPIEL_PYTHON_PYTORCH_DEPS="torch==2.1.0"
export OPEN_SPIEL_PYTHON_JAX_DEPS="jax==0.4.20 jaxlib==0.4.20 dm-haiku==0.0.10 optax==0.1.7 chex==0.1.84 rlax==0.1.6 distrax==0.1.4"
export OPEN_SPIEL_PYTHON_TENSORFLOW_DEPS="numpy==1.26.1 tensorflow==2.14.0 tensorflow-probability==0.22.1 tensorflow_datasets==4.9.2 keras==2.14.0"
export OPEN_SPIEL_PYTHON_MISC_DEPS="IPython==5.8.0 networkx==3.2 matplotlib==3.5.2 mock==4.0.2 nashpy==0.0.19 scipy==1.11.3 testresources==2.0.1 cvxopt==1.3.1 cvxpy==1.4.1 ecos==2.0.10 osqp==0.6.2.post5 clu==0.0.6 flax==0.5.3"
else
echo "Detected Python version >= 3.12"
export OPEN_SPIEL_PYTHON_PYTORCH_DEPS="torch==2.2.2"
export OPEN_SPIEL_PYTHON_JAX_DEPS="jax==0.4.26 jaxlib==0.4.26 dm-haiku==0.0.12 optax==0.2.2 chex==0.1.86 rlax==0.1.6 distrax==0.1.5"
export OPEN_SPIEL_PYTHON_TENSORFLOW_DEPS="numpy==1.26.4 tensorflow==2.16.1 tensorflow_datasets==4.9.4 keras==3.1.1"
export OPEN_SPIEL_PYTHON_MISC_DEPS="IPython==8.23.0 networkx==3.3 matplotlib==3.8.4 mock==5.1.0 nashpy==0.0.41 scipy==1.11.4 testresources==2.0.1 cvxopt==1.3.2 cvxpy==1.4.2 ecos==2.0.13 osqp==0.6.5 clu==0.0.11 flax==0.8.2"
fi


Expand Down
10 changes: 4 additions & 6 deletions open_spiel/spiel_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -132,9 +132,8 @@ std::string SpielStrCat(Args&&... args) {
using Player = int;
using Action = int64_t;

// Floating point comparisons use this as a multiplier on the larger of the two
// numbers as the threshold.
inline constexpr float FloatingPointDefaultThresholdRatio() { return 1e-5; }
// Default floating point tolerance between two numbers.
inline constexpr float FloatingPointDefaultTolerance() { return 1e-6; }

// Default tolerance applied when validating variables are valid probability.
inline constexpr float ProbabilityDefaultTolerance() { return 1e-9; }
Expand Down Expand Up @@ -181,13 +180,12 @@ std::string VectorOfPairsToString(const std::vector<std::pair<A, B>>& vec,
const std::string& pair_delimiter);

// Returns whether the absolute difference between floating point values a and
// b is less than or equal to FloatingPointThresholdRatio() * max(|a|, |b|).
// b is less than or equal to.
template <typename T>
bool Near(T a, T b) {
static_assert(std::is_floating_point<T>::value,
"Near() is only for floating point args.");
return fabs(a - b) <=
(std::max(fabs(a), fabs(b)) * FloatingPointDefaultThresholdRatio());
return fabs(a - b) <= FloatingPointDefaultTolerance();
}

// Returns whether |a - b| <= epsilon.
Expand Down

0 comments on commit e5d1f09

Please sign in to comment.