diff --git a/.github/workflows/actions.yml b/.github/workflows/actions.yml index ea11320b99..234f214827 100644 --- a/.github/workflows/actions.yml +++ b/.github/workflows/actions.yml @@ -11,7 +11,20 @@ jobs: strategy: matrix: include: - # Most current platform. + # Most current platforms and Python versions. + # TODO: change this first one to Ubuntu 24.04 when ready + - os: ubuntu-22.04 + OS_PYTHON_VERSION: "3.12" + DEFAULT_OPTIONAL_DEPENDENCY: "OFF" + BUILD_SHARED_LIB: "OFF" + OPEN_SPIEL_BUILD_WITH_ORTOOLS: "OFF" + OPEN_SPIEL_BUILD_WITH_ORTOOLS_DOWNLOAD_URL: "" + - os: macos-14 + OS_PYTHON_VERSION: "3.12" + DEFAULT_OPTIONAL_DEPENDENCY: "OFF" + BUILD_SHARED_LIB: "OFF" + OPEN_SPIEL_BUILD_WITH_ORTOOLS: "OFF" + OPEN_SPIEL_BUILD_WITH_ORTOOLS_DOWNLOAD_URL: "" - os: ubuntu-22.04 OS_PYTHON_VERSION: "3.11" DEFAULT_OPTIONAL_DEPENDENCY: "ON" @@ -39,14 +52,12 @@ jobs: OPEN_SPIEL_BUILD_WITH_ORTOOLS: "OFF" OPEN_SPIEL_BUILD_WITH_ORTOOLS_DOWNLOAD_URL: "" # Standard or older platforms with older Python versions. - # Older Python version on Ubuntu 20.04 - os: macos-12 OS_PYTHON_VERSION: "3.9" DEFAULT_OPTIONAL_DEPENDENCY: "OFF" BUILD_SHARED_LIB: "OFF" OPEN_SPIEL_BUILD_WITH_ORTOOLS: "OFF" OPEN_SPIEL_BUILD_WITH_ORTOOLS_DOWNLOAD_URL: "" - # Older Python version on Ubuntu 20.04 - os: ubuntu-20.04 OS_PYTHON_VERSION: "3.9" DEFAULT_OPTIONAL_DEPENDENCY: "ON" diff --git a/open_spiel/algorithms/oos_test.cc b/open_spiel/algorithms/oos_test.cc index a52c1d358c..60126d9a3b 100644 --- a/open_spiel/algorithms/oos_test.cc +++ b/open_spiel/algorithms/oos_test.cc @@ -41,6 +41,8 @@ namespace open_spiel { namespace algorithms { namespace { +constexpr double kFloatTolerance = 1e-10; + void EpsExploreSamplingPolicyTest() { std::shared_ptr game = LoadGame("kuhn_poker"); @@ -78,10 +80,14 @@ void EpsExploreSamplingPolicyTest() { table[pl1_info_state].current_policy = current_policy; auto p = ExplorativeSamplingPolicy(table, 0.4); - SPIEL_CHECK_EQ(p.GetStatePolicy(*card_to_player0), chn_3cards_dist); - SPIEL_CHECK_EQ(p.GetStatePolicy(*card_to_player1), chn_2cards_dist); - SPIEL_CHECK_EQ(p.GetStatePolicy(*player0_plays), expected_mix); - SPIEL_CHECK_EQ(p.GetStatePolicy(*player1_plays), expected_mix); + SPIEL_CHECK_TRUE(StatePoliciesEqual(p.GetStatePolicy(*card_to_player0), + chn_3cards_dist, kFloatTolerance)); + SPIEL_CHECK_TRUE(StatePoliciesEqual(p.GetStatePolicy(*card_to_player1), + chn_2cards_dist, kFloatTolerance)); + SPIEL_CHECK_TRUE(StatePoliciesEqual(p.GetStatePolicy(*player0_plays), + expected_mix, kFloatTolerance)); + SPIEL_CHECK_TRUE(StatePoliciesEqual(p.GetStatePolicy(*player1_plays), + expected_mix, kFloatTolerance)); } std::vector> CollectStatesInGame( diff --git a/open_spiel/algorithms/tabular_exploitability.cc b/open_spiel/algorithms/tabular_exploitability.cc index 55912fd71f..dcf775d69a 100644 --- a/open_spiel/algorithms/tabular_exploitability.cc +++ b/open_spiel/algorithms/tabular_exploitability.cc @@ -76,7 +76,7 @@ double NashConv(const Game& game, const Policy& policy, double nash_conv = 0; for (auto p = Player{0}; p < game.NumPlayers(); ++p) { double deviation_incentive = best_response_values[p] - on_policy_values[p]; - if (deviation_incentive < -FloatingPointDefaultThresholdRatio()) { + if (deviation_incentive < -FloatingPointDefaultTolerance()) { SpielFatalError( absl::StrCat("Negative Nash deviation incentive for player ", p, ": ", deviation_incentive, ". Does you game have imperfect ", diff --git a/open_spiel/games/pathfinding/pathfinding.cc b/open_spiel/games/pathfinding/pathfinding.cc index df14e56c7f..ae6457eb16 100644 --- a/open_spiel/games/pathfinding/pathfinding.cc +++ b/open_spiel/games/pathfinding/pathfinding.cc @@ -542,7 +542,7 @@ int PathfindingGame::MaxChanceOutcomes() const { double PathfindingGame::MinUtility() const { // Add a small constant here due to numeral issues. - return horizon_ * step_reward_ - FloatingPointDefaultThresholdRatio(); + return horizon_ * step_reward_ - FloatingPointDefaultTolerance(); } double PathfindingGame::MaxUtility() const { diff --git a/open_spiel/policy.cc b/open_spiel/policy.cc index fa408015f5..1887f28f39 100644 --- a/open_spiel/policy.cc +++ b/open_spiel/policy.cc @@ -76,6 +76,27 @@ ActionsAndProbs ToDeterministicPolicy(const ActionsAndProbs& actions_and_probs, return new_policy; } +bool StatePoliciesEqual(const ActionsAndProbs& state_policy1, + const ActionsAndProbs& state_policy2, + double float_tolerance) { + if (state_policy1.size() != state_policy2.size()) { + return false; + } + + for (int i = 0; i < state_policy1.size(); ++i) { + if (state_policy1[i].first != state_policy2[i].first) { + return false; + } + + if (!Near(state_policy1[i].second, state_policy2[i].second, + float_tolerance)) { + return false; + } + } + + return true; +} + ActionsAndProbs GetDeterministicPolicy(const std::vector& legal_actions, Action action) { ActionsAndProbs new_policy; diff --git a/open_spiel/policy.h b/open_spiel/policy.h index 7402c72064..79e043fddb 100644 --- a/open_spiel/policy.h +++ b/open_spiel/policy.h @@ -58,6 +58,12 @@ ActionsAndProbs ToDeterministicPolicy(const ActionsAndProbs& actions_and_probs, ActionsAndProbs GetDeterministicPolicy(const std::vector& legal_actions, Action action); +// Check that two state policies are equal (within a float tolerance). Does an +// exact check, so the actions must be in the same order. +bool StatePoliciesEqual(const ActionsAndProbs& state_policy1, + const ActionsAndProbs& state_policy2, + double float_tolerance); + // A general policy object. A policy is a mapping from states to list of // (action, prob) pairs for all the legal actions at the state. class Policy { diff --git a/open_spiel/python/CMakeLists.txt b/open_spiel/python/CMakeLists.txt index e695ce05e3..c75a963991 100644 --- a/open_spiel/python/CMakeLists.txt +++ b/open_spiel/python/CMakeLists.txt @@ -305,9 +305,11 @@ endif() if (OPEN_SPIEL_ENABLE_TENSORFLOW) set(PYTHON_TESTS ${PYTHON_TESTS} algorithms/alpha_zero/evaluator_test.py - algorithms/alpha_zero/model_test.py + # Broken in Python 3.12. Must port to Keras 3. See https://github.com/google-deepmind/open_spiel/issues/1206. + # algorithms/alpha_zero/model_test.py algorithms/deep_cfr_test.py - algorithms/deep_cfr_tf2_test.py + # Broken in Python 3.12. Must port to Keras 3. See https://github.com/google-deepmind/open_spiel/issues/1208. + # algorithms/deep_cfr_tf2_test.py algorithms/discounted_cfr_test.py algorithms/dqn_test.py algorithms/eva_test.py @@ -319,7 +321,8 @@ if (OPEN_SPIEL_ENABLE_TENSORFLOW) algorithms/nfsp_test.py algorithms/policy_gradient_test.py algorithms/psro_v2/strategy_selectors_test.py - algorithms/rcfr_test.py + # Broken in Python 3.12. Must port to Keras 3. https://github.com/google-deepmind/open_spiel/issues/1207. + # algorithms/rcfr_test.py ) if (OPEN_SPIEL_ENABLE_PYTHON_MISC) set(PYTHON_TESTS ${PYTHON_TESTS} diff --git a/open_spiel/scripts/ci_script.sh b/open_spiel/scripts/ci_script.sh index bb7c7b32ac..2599cd1652 100755 --- a/open_spiel/scripts/ci_script.sh +++ b/open_spiel/scripts/ci_script.sh @@ -41,19 +41,22 @@ PYBIN=`which $PYBIN` source ./open_spiel/scripts/python_extra_deps.sh $PYBIN -${PYBIN} -m pip install --upgrade pip -${PYBIN} -m pip install --upgrade setuptools - -if [[ "$OS" = "Linux" && ( "$OS_PYTHON_VERSION" = "3.9" || "$OS_PYTHON_VERSION" = "3.10" || "$OS_PYTHON_VERSION" = "3.11" ) ]]; then +if [[ "$OS" = "Linux" && ( "$OS_PYTHON_VERSION" = "3.9" || "$OS_PYTHON_VERSION" = "3.10" || "$OS_PYTHON_VERSION" = "3.11" || "$OS_PYTHON_VERSION" = "3.12" ) ]]; then # Ubuntu 22.04 must execute the virtual env this way: ${PYBIN} -m venv ./venv +elif [[ "$OS" = "Darwin" && "$OS_PYTHON_VERSION" = "3.12" ]]; then + ${PYBIN} -m venv ./venv else # Ubuntu 20.04 and earlier ${PYBIN} -m pip install virtualenv virtualenv -p ${PYBIN} ./venv fi + source ./venv/bin/activate +pip install --upgrade pip +pip install --upgrade setuptools + # Can use python and pip directly after here because we're in the virtual env python --version diff --git a/open_spiel/scripts/find_tensorflow.sh b/open_spiel/scripts/find_tensorflow.sh index 41b1f726d5..8f8b1f80ff 100755 --- a/open_spiel/scripts/find_tensorflow.sh +++ b/open_spiel/scripts/find_tensorflow.sh @@ -16,7 +16,6 @@ read -r -d '' TESTSCRIPT << EOT import tensorflow as tf -import tensorflow_probability print(tf.__version__) EOT diff --git a/open_spiel/scripts/install.sh b/open_spiel/scripts/install.sh index f6d137311a..5356c4e29e 100755 --- a/open_spiel/scripts/install.sh +++ b/open_spiel/scripts/install.sh @@ -242,6 +242,12 @@ if [[ "$OSTYPE" == "linux-gnu" ]]; then echo "Adding Python 3.11 ppa repos" sudo add-apt-repository ppa:deadsnakes/ppa PYTHON_PKGS="python3.11 python3.11-dev python3-pip python3-setuptools python3-wheel python3-tk python3.11-venv" + elif [[ "$OS_PYTHON_VERSION" == "3.12" ]]; then + # Need to special-case this until it's installed by default. + # https://ubuntuhandbook.org/index.php/2023/05/install-python-3-12-ubuntu/ + echo "Adding Python 3.12 ppa repos" + sudo add-apt-repository ppa:deadsnakes/ppa + PYTHON_PKGS="python3.12 python3.12-dev python3-pip python3-setuptools python3-wheel python3-tk python3.12-venv" fi EXT_DEPS="virtualenv clang cmake curl $PYTHON_PKGS" if [[ ${OPEN_SPIEL_BUILD_WITH_GO:-"OFF"} == "ON" ]]; then @@ -307,8 +313,8 @@ elif [[ "$OSTYPE" == "darwin"* ]]; then # Mac OSX fi fi # Removed getting pip via git-pip.py. See #1200. - # brew install virtualenv # May be the required way to do this as of Python 3.12? - ${PYBIN} -m pip install virtualenv + brew install virtualenv # May be the required way to do this as of Python 3.12? + # ${PYBIN} -m pip install virtualenv else echo "The OS '$OSTYPE' is not supported (Only Linux and MacOS is). " \ "Feel free to contribute the install for a new OS." diff --git a/open_spiel/scripts/python_extra_deps.sh b/open_spiel/scripts/python_extra_deps.sh index d0448669cd..75030afb9a 100644 --- a/open_spiel/scripts/python_extra_deps.sh +++ b/open_spiel/scripts/python_extra_deps.sh @@ -51,17 +51,23 @@ verlt() { echo "Set Python version: $PY_VER" if verlt $PY_VER 3.10; then - echo "Python < 3.10 detected" + echo "Detected Python version < 3.10" export OPEN_SPIEL_PYTHON_PYTORCH_DEPS="torch==1.13.1" export OPEN_SPIEL_PYTHON_JAX_DEPS="jax==0.4.6 jaxlib==0.4.6 dm-haiku==0.0.10 optax==0.1.7 chex==0.1.7 rlax==0.1.5 distrax==0.1.3" export OPEN_SPIEL_PYTHON_TENSORFLOW_DEPS="numpy==1.23.5 tensorflow==2.13.1 tensorflow-probability==0.19.0 tensorflow_datasets==4.9.2 keras==2.13.1" export OPEN_SPIEL_PYTHON_MISC_DEPS="IPython==5.8.0 networkx==2.4 matplotlib==3.5.2 mock==4.0.2 nashpy==0.0.19 scipy==1.10.1 testresources==2.0.1 cvxopt==1.3.1 cvxpy==1.2.0 ecos==2.0.10 osqp==0.6.2.post5 clu==0.0.6 flax==0.5.3" -else - echo "Python >= 3.10 detected" +elif verlt $PY_VER 3.12; then + echo "Detected Python version in {3.10, 3.11}" export OPEN_SPIEL_PYTHON_PYTORCH_DEPS="torch==2.1.0" export OPEN_SPIEL_PYTHON_JAX_DEPS="jax==0.4.20 jaxlib==0.4.20 dm-haiku==0.0.10 optax==0.1.7 chex==0.1.84 rlax==0.1.6 distrax==0.1.4" export OPEN_SPIEL_PYTHON_TENSORFLOW_DEPS="numpy==1.26.1 tensorflow==2.14.0 tensorflow-probability==0.22.1 tensorflow_datasets==4.9.2 keras==2.14.0" export OPEN_SPIEL_PYTHON_MISC_DEPS="IPython==5.8.0 networkx==3.2 matplotlib==3.5.2 mock==4.0.2 nashpy==0.0.19 scipy==1.11.3 testresources==2.0.1 cvxopt==1.3.1 cvxpy==1.4.1 ecos==2.0.10 osqp==0.6.2.post5 clu==0.0.6 flax==0.5.3" +else + echo "Detected Python version >= 3.12" + export OPEN_SPIEL_PYTHON_PYTORCH_DEPS="torch==2.2.2" + export OPEN_SPIEL_PYTHON_JAX_DEPS="jax==0.4.26 jaxlib==0.4.26 dm-haiku==0.0.12 optax==0.2.2 chex==0.1.86 rlax==0.1.6 distrax==0.1.5" + export OPEN_SPIEL_PYTHON_TENSORFLOW_DEPS="numpy==1.26.4 tensorflow==2.16.1 tensorflow_datasets==4.9.4 keras==3.1.1" + export OPEN_SPIEL_PYTHON_MISC_DEPS="IPython==8.23.0 networkx==3.3 matplotlib==3.8.4 mock==5.1.0 nashpy==0.0.41 scipy==1.11.4 testresources==2.0.1 cvxopt==1.3.2 cvxpy==1.4.2 ecos==2.0.13 osqp==0.6.5 clu==0.0.11 flax==0.8.2" fi diff --git a/open_spiel/spiel_utils.h b/open_spiel/spiel_utils.h index 58424e8414..5e90968076 100644 --- a/open_spiel/spiel_utils.h +++ b/open_spiel/spiel_utils.h @@ -132,9 +132,8 @@ std::string SpielStrCat(Args&&... args) { using Player = int; using Action = int64_t; -// Floating point comparisons use this as a multiplier on the larger of the two -// numbers as the threshold. -inline constexpr float FloatingPointDefaultThresholdRatio() { return 1e-5; } +// Default floating point tolerance between two numbers. +inline constexpr float FloatingPointDefaultTolerance() { return 1e-6; } // Default tolerance applied when validating variables are valid probability. inline constexpr float ProbabilityDefaultTolerance() { return 1e-9; } @@ -181,13 +180,12 @@ std::string VectorOfPairsToString(const std::vector>& vec, const std::string& pair_delimiter); // Returns whether the absolute difference between floating point values a and -// b is less than or equal to FloatingPointThresholdRatio() * max(|a|, |b|). +// b is less than or equal to template bool Near(T a, T b) { static_assert(std::is_floating_point::value, "Near() is only for floating point args."); - return fabs(a - b) <= - (std::max(fabs(a), fabs(b)) * FloatingPointDefaultThresholdRatio()); + return fabs(a - b) <= FloatingPointDefaultTolerance(); } // Returns whether |a - b| <= epsilon.