google-deepmind · lanctot · Apr 15, 2024 · Apr 7, 2024 · Apr 7, 2024 · Apr 6, 2024
diff --git a/.github/workflows/actions.yml b/.github/workflows/actions.yml
@@ -11,7 +11,20 @@ jobs:
     strategy:
       matrix:
         include:
-        # Most current platform.
+        # Most current platforms and Python versions.
+        # TODO: change this first one to Ubuntu 24.04 when ready
+        - os: ubuntu-22.04
+          OS_PYTHON_VERSION: "3.12"
+          DEFAULT_OPTIONAL_DEPENDENCY: "OFF"
+          BUILD_SHARED_LIB: "OFF"
+          OPEN_SPIEL_BUILD_WITH_ORTOOLS: "OFF"
+          OPEN_SPIEL_BUILD_WITH_ORTOOLS_DOWNLOAD_URL: ""
+        - os: macos-14
+          OS_PYTHON_VERSION: "3.12"
+          DEFAULT_OPTIONAL_DEPENDENCY: "OFF"
+          BUILD_SHARED_LIB: "OFF"
+          OPEN_SPIEL_BUILD_WITH_ORTOOLS: "OFF"
+          OPEN_SPIEL_BUILD_WITH_ORTOOLS_DOWNLOAD_URL: ""
         - os: ubuntu-22.04
           OS_PYTHON_VERSION: "3.11"
           DEFAULT_OPTIONAL_DEPENDENCY: "ON"
@@ -39,14 +52,12 @@ jobs:
           OPEN_SPIEL_BUILD_WITH_ORTOOLS: "OFF"
           OPEN_SPIEL_BUILD_WITH_ORTOOLS_DOWNLOAD_URL: ""
         # Standard or older platforms with older Python versions.
-        # Older Python version on Ubuntu 20.04
         - os: macos-12
           OS_PYTHON_VERSION: "3.9"
           DEFAULT_OPTIONAL_DEPENDENCY: "OFF"
           BUILD_SHARED_LIB: "OFF"
           OPEN_SPIEL_BUILD_WITH_ORTOOLS: "OFF"
           OPEN_SPIEL_BUILD_WITH_ORTOOLS_DOWNLOAD_URL: ""
-        # Older Python version on Ubuntu 20.04
         - os: ubuntu-20.04
           OS_PYTHON_VERSION: "3.9"
           DEFAULT_OPTIONAL_DEPENDENCY: "ON"

diff --git a/open_spiel/algorithms/oos_test.cc b/open_spiel/algorithms/oos_test.cc
@@ -41,6 +41,8 @@ namespace open_spiel {
 namespace algorithms {
 namespace {
 
+constexpr double kFloatTolerance = 1e-10;
+
 void EpsExploreSamplingPolicyTest() {
   std::shared_ptr<const Game> game = LoadGame("kuhn_poker");
 
@@ -78,10 +80,14 @@ void EpsExploreSamplingPolicyTest() {
   table[pl1_info_state].current_policy = current_policy;
 
   auto p = ExplorativeSamplingPolicy(table, 0.4);
-  SPIEL_CHECK_EQ(p.GetStatePolicy(*card_to_player0), chn_3cards_dist);
-  SPIEL_CHECK_EQ(p.GetStatePolicy(*card_to_player1), chn_2cards_dist);
-  SPIEL_CHECK_EQ(p.GetStatePolicy(*player0_plays), expected_mix);
-  SPIEL_CHECK_EQ(p.GetStatePolicy(*player1_plays), expected_mix);
+  SPIEL_CHECK_TRUE(StatePoliciesEqual(p.GetStatePolicy(*card_to_player0),
+                                      chn_3cards_dist, kFloatTolerance));
+  SPIEL_CHECK_TRUE(StatePoliciesEqual(p.GetStatePolicy(*card_to_player1),
+		                      chn_2cards_dist, kFloatTolerance));
+  SPIEL_CHECK_TRUE(StatePoliciesEqual(p.GetStatePolicy(*player0_plays),
+                                      expected_mix, kFloatTolerance));
+  SPIEL_CHECK_TRUE(StatePoliciesEqual(p.GetStatePolicy(*player1_plays),
+                                      expected_mix, kFloatTolerance));
 }
 
 std::vector<std::unique_ptr<State>> CollectStatesInGame(

diff --git a/open_spiel/algorithms/tabular_exploitability.cc b/open_spiel/algorithms/tabular_exploitability.cc
@@ -76,7 +76,7 @@ double NashConv(const Game& game, const Policy& policy,
   double nash_conv = 0;
   for (auto p = Player{0}; p < game.NumPlayers(); ++p) {
     double deviation_incentive = best_response_values[p] - on_policy_values[p];
-    if (deviation_incentive < -FloatingPointDefaultThresholdRatio()) {
+    if (deviation_incentive < -FloatingPointDefaultTolerance()) {
       SpielFatalError(
           absl::StrCat("Negative Nash deviation incentive for player ", p, ": ",
                        deviation_incentive, ". Does you game have imperfect ",

diff --git a/open_spiel/games/pathfinding/pathfinding.cc b/open_spiel/games/pathfinding/pathfinding.cc
@@ -542,7 +542,7 @@ int PathfindingGame::MaxChanceOutcomes() const {
 
 double PathfindingGame::MinUtility() const {
   // Add a small constant here due to numeral issues.
-  return horizon_ * step_reward_ - FloatingPointDefaultThresholdRatio();
+  return horizon_ * step_reward_ - FloatingPointDefaultTolerance();
 }
 
 double PathfindingGame::MaxUtility() const {

diff --git a/open_spiel/policy.cc b/open_spiel/policy.cc
@@ -76,6 +76,27 @@ ActionsAndProbs ToDeterministicPolicy(const ActionsAndProbs& actions_and_probs,
   return new_policy;
 }
 
+bool StatePoliciesEqual(const ActionsAndProbs& state_policy1,
+		        const ActionsAndProbs& state_policy2,
+			double float_tolerance) {
+  if (state_policy1.size() != state_policy2.size()) {
+    return false;
+  }
+
+  for (int i = 0; i < state_policy1.size(); ++i) {
+    if (state_policy1[i].first != state_policy2[i].first) {
+      return false;
+    }
+
+    if (!Near(state_policy1[i].second, state_policy2[i].second,
+              float_tolerance)) {
+      return false;
+    }
+  }
+
+  return true;
+}
+
 ActionsAndProbs GetDeterministicPolicy(const std::vector<Action>& legal_actions,
                                        Action action) {
   ActionsAndProbs new_policy;

diff --git a/open_spiel/policy.h b/open_spiel/policy.h
@@ -58,6 +58,12 @@ ActionsAndProbs ToDeterministicPolicy(const ActionsAndProbs& actions_and_probs,
 ActionsAndProbs GetDeterministicPolicy(const std::vector<Action>& legal_actions,
                                        Action action);
 
+// Check that two state policies are equal (within a float tolerance). Does an
+// exact check, so the actions must be in the same order.
+bool StatePoliciesEqual(const ActionsAndProbs& state_policy1,
+		        const ActionsAndProbs& state_policy2,
+			double float_tolerance);
+
 // A general policy object. A policy is a mapping from states to list of
 // (action, prob) pairs for all the legal actions at the state.
 class Policy {

diff --git a/open_spiel/python/CMakeLists.txt b/open_spiel/python/CMakeLists.txt
@@ -305,9 +305,11 @@ endif()
 if (OPEN_SPIEL_ENABLE_TENSORFLOW)
   set(PYTHON_TESTS ${PYTHON_TESTS}
       algorithms/alpha_zero/evaluator_test.py
-      algorithms/alpha_zero/model_test.py
+      # Broken in Python 3.12. Must port to Keras 3. See https://github.com/google-deepmind/open_spiel/issues/1206.
+      # algorithms/alpha_zero/model_test.py
       algorithms/deep_cfr_test.py
-      algorithms/deep_cfr_tf2_test.py
+      # Broken in Python 3.12. Must port to Keras 3. See https://github.com/google-deepmind/open_spiel/issues/1208.
+      # algorithms/deep_cfr_tf2_test.py
       algorithms/discounted_cfr_test.py
       algorithms/dqn_test.py
       algorithms/eva_test.py
@@ -319,7 +321,8 @@ if (OPEN_SPIEL_ENABLE_TENSORFLOW)
       algorithms/nfsp_test.py
       algorithms/policy_gradient_test.py
       algorithms/psro_v2/strategy_selectors_test.py
-      algorithms/rcfr_test.py
+      # Broken in Python 3.12. Must port to Keras 3. https://github.com/google-deepmind/open_spiel/issues/1207.
+      # algorithms/rcfr_test.py
   )
   if (OPEN_SPIEL_ENABLE_PYTHON_MISC)
     set(PYTHON_TESTS ${PYTHON_TESTS}

diff --git a/open_spiel/scripts/ci_script.sh b/open_spiel/scripts/ci_script.sh
@@ -41,19 +41,22 @@ PYBIN=`which $PYBIN`
 
 source ./open_spiel/scripts/python_extra_deps.sh $PYBIN
 
-${PYBIN} -m pip install --upgrade pip
-${PYBIN} -m pip install --upgrade setuptools
-
-if [[ "$OS" = "Linux" && ( "$OS_PYTHON_VERSION" = "3.9" || "$OS_PYTHON_VERSION" = "3.10" || "$OS_PYTHON_VERSION" = "3.11" ) ]]; then
+if [[ "$OS" = "Linux" && ( "$OS_PYTHON_VERSION" = "3.9" || "$OS_PYTHON_VERSION" = "3.10" || "$OS_PYTHON_VERSION" = "3.11" || "$OS_PYTHON_VERSION" = "3.12" ) ]]; then
   # Ubuntu 22.04 must execute the virtual env this way:
   ${PYBIN} -m venv ./venv
+elif [[ "$OS" = "Darwin" && "$OS_PYTHON_VERSION" = "3.12" ]]; then
+  ${PYBIN} -m venv ./venv
 else
   # Ubuntu 20.04 and earlier
   ${PYBIN} -m pip install virtualenv
   virtualenv -p ${PYBIN} ./venv
 fi
+
 source ./venv/bin/activate
 
+pip install --upgrade pip
+pip install --upgrade setuptools
+
 # Can use python and pip directly after here because we're in the virtual env
 
 python --version

diff --git a/open_spiel/scripts/find_tensorflow.sh b/open_spiel/scripts/find_tensorflow.sh
@@ -16,7 +16,6 @@
 
 read -r -d '' TESTSCRIPT << EOT
 import tensorflow as tf
-import tensorflow_probability
 print(tf.__version__)
 EOT
 

diff --git a/open_spiel/scripts/install.sh b/open_spiel/scripts/install.sh
@@ -242,6 +242,12 @@ if [[ "$OSTYPE" == "linux-gnu" ]]; then
     echo "Adding Python 3.11 ppa repos"
     sudo add-apt-repository ppa:deadsnakes/ppa
     PYTHON_PKGS="python3.11 python3.11-dev python3-pip python3-setuptools python3-wheel python3-tk python3.11-venv"
+  elif [[ "$OS_PYTHON_VERSION" == "3.12" ]]; then
+    # Need to special-case this until it's installed by default.
+    # https://ubuntuhandbook.org/index.php/2023/05/install-python-3-12-ubuntu/
+    echo "Adding Python 3.12 ppa repos"
+    sudo add-apt-repository ppa:deadsnakes/ppa
+    PYTHON_PKGS="python3.12 python3.12-dev python3-pip python3-setuptools python3-wheel python3-tk python3.12-venv"
   fi
   EXT_DEPS="virtualenv clang cmake curl $PYTHON_PKGS"
   if [[ ${OPEN_SPIEL_BUILD_WITH_GO:-"OFF"} == "ON" ]]; then
@@ -307,8 +313,8 @@ elif [[ "$OSTYPE" == "darwin"* ]]; then  # Mac OSX
     fi
   fi
   # Removed getting pip via git-pip.py. See #1200.
-  # brew install virtualenv   # May be the required way to do this as of Python 3.12?
-  ${PYBIN} -m pip install virtualenv
+  brew install virtualenv   # May be the required way to do this as of Python 3.12?
+  # ${PYBIN} -m pip install virtualenv
 else
   echo "The OS '$OSTYPE' is not supported (Only Linux and MacOS is). " \
        "Feel free to contribute the install for a new OS."

diff --git a/open_spiel/scripts/python_extra_deps.sh b/open_spiel/scripts/python_extra_deps.sh
@@ -51,17 +51,23 @@ verlt() {
 
 echo "Set Python version: $PY_VER"
 if verlt $PY_VER 3.10; then
-  echo "Python < 3.10 detected"
+  echo "Detected Python version < 3.10"
   export OPEN_SPIEL_PYTHON_PYTORCH_DEPS="torch==1.13.1"
   export OPEN_SPIEL_PYTHON_JAX_DEPS="jax==0.4.6 jaxlib==0.4.6 dm-haiku==0.0.10 optax==0.1.7 chex==0.1.7 rlax==0.1.5 distrax==0.1.3"
   export OPEN_SPIEL_PYTHON_TENSORFLOW_DEPS="numpy==1.23.5 tensorflow==2.13.1 tensorflow-probability==0.19.0 tensorflow_datasets==4.9.2 keras==2.13.1"
   export OPEN_SPIEL_PYTHON_MISC_DEPS="IPython==5.8.0 networkx==2.4 matplotlib==3.5.2 mock==4.0.2 nashpy==0.0.19 scipy==1.10.1 testresources==2.0.1 cvxopt==1.3.1 cvxpy==1.2.0 ecos==2.0.10 osqp==0.6.2.post5 clu==0.0.6 flax==0.5.3"
-else
-  echo "Python >= 3.10 detected"
+elif verlt $PY_VER 3.12; then
+  echo "Detected Python version in {3.10, 3.11}"
   export OPEN_SPIEL_PYTHON_PYTORCH_DEPS="torch==2.1.0"
   export OPEN_SPIEL_PYTHON_JAX_DEPS="jax==0.4.20 jaxlib==0.4.20 dm-haiku==0.0.10 optax==0.1.7 chex==0.1.84 rlax==0.1.6 distrax==0.1.4"
   export OPEN_SPIEL_PYTHON_TENSORFLOW_DEPS="numpy==1.26.1 tensorflow==2.14.0 tensorflow-probability==0.22.1 tensorflow_datasets==4.9.2 keras==2.14.0"
   export OPEN_SPIEL_PYTHON_MISC_DEPS="IPython==5.8.0 networkx==3.2 matplotlib==3.5.2 mock==4.0.2 nashpy==0.0.19 scipy==1.11.3 testresources==2.0.1 cvxopt==1.3.1 cvxpy==1.4.1 ecos==2.0.10 osqp==0.6.2.post5 clu==0.0.6 flax==0.5.3"
+else
+  echo "Detected Python version >= 3.12"
+  export OPEN_SPIEL_PYTHON_PYTORCH_DEPS="torch==2.2.2"
+  export OPEN_SPIEL_PYTHON_JAX_DEPS="jax==0.4.26 jaxlib==0.4.26 dm-haiku==0.0.12 optax==0.2.2 chex==0.1.86 rlax==0.1.6 distrax==0.1.5"
+  export OPEN_SPIEL_PYTHON_TENSORFLOW_DEPS="numpy==1.26.4 tensorflow==2.16.1 tensorflow_datasets==4.9.4 keras==3.1.1"
+  export OPEN_SPIEL_PYTHON_MISC_DEPS="IPython==8.23.0 networkx==3.3 matplotlib==3.8.4 mock==5.1.0 nashpy==0.0.41 scipy==1.11.4 testresources==2.0.1 cvxopt==1.3.2 cvxpy==1.4.2 ecos==2.0.13 osqp==0.6.5 clu==0.0.11 flax==0.8.2"
 fi
 
 

diff --git a/open_spiel/spiel_utils.h b/open_spiel/spiel_utils.h
@@ -132,9 +132,8 @@ std::string SpielStrCat(Args&&... args) {
 using Player = int;
 using Action = int64_t;
 
-// Floating point comparisons use this as a multiplier on the larger of the two
-// numbers as the threshold.
-inline constexpr float FloatingPointDefaultThresholdRatio() { return 1e-5; }
+// Default floating point tolerance between two numbers.
+inline constexpr float FloatingPointDefaultTolerance() { return 1e-6; }
 
 // Default tolerance applied when validating variables are valid probability.
 inline constexpr float ProbabilityDefaultTolerance() { return 1e-9; }
@@ -181,13 +180,12 @@ std::string VectorOfPairsToString(const std::vector<std::pair<A, B>>& vec,
                                   const std::string& pair_delimiter);
 
 // Returns whether the absolute difference between floating point values a and
-// b is less than or equal to FloatingPointThresholdRatio() * max(|a|, |b|).
+// b is less than or equal to   
 template <typename T>
 bool Near(T a, T b) {
   static_assert(std::is_floating_point<T>::value,
                 "Near() is only for floating point args.");
-  return fabs(a - b) <=
-         (std::max(fabs(a), fabs(b)) * FloatingPointDefaultThresholdRatio());
+  return fabs(a - b) <= FloatingPointDefaultTolerance();
 }
 
 // Returns whether |a - b| <= epsilon.