Changelog for v0.17.0-rc1 (LeelaChessZero#273)

* Changelog for v0.17.0-rc1 * More details in changelog. * Code review fixes. * Mistype fix. * Clang-format all files. * Fix comment. * Add "for FPU reduction" to changelog. * Fix typo. * Typo. * typo * Typo LeelaChessZero#5. * Fix number_out_of_order.
borg323 · Sep 6, 2018 · c9d20a3 · c9d20a3
1 parent 6901a44
commit c9d20a3
Show file tree

Hide file tree

Showing 32 changed files with 154 additions and 95 deletions.
diff --git a/changelog.txt b/changelog.txt
@@ -1,3 +1,65 @@
+v0.17.0-rc1 (2018-08-19)
+~~~~~~~~~~~
+
+New visible features:
+* Implemented ponder support.
+* Tablebases are supported now (only WDL probe for now).
+  Command line parameter is
+  --syzygy-paths=/path/to/syzygy/
+* Old smart pruning flag is gone. Instead there is
+  --futile-search-aversion flag.
+  --futile-search-aversion=0 is equivalent to old --no-smart-pruning.
+  --futile-search-aversion=1 is equivalent to old --smart-pruning.
+  Now default is 1.47, which means that engine will sometimes decide to
+  stop search earlier even when there is theoretical chance (but not very
+  probable) that best move decision could be changed if allowed to think more.
+* Lc0 now supports configuration files. Options can be listed there instead of
+  command line flags / uci params.
+  Config should be named lc0.config and located in the same directory as lc0.
+  Should list one command line option per line, with '--' in the beginning
+  being optional, for example:
+
+     syzygy-paths=/path/to/syzygy/
+
+* In uci info, "depth" is now average depth rather than full depth
+  (which was 4 all the time).
+  Also, depth values do not include reused tree, only nodes visited during the
+  current search session.
+* --sticky-checkmates experimental flag (default off), supposed to find shorter
+  checkmate sequences.
+* More features in backend "check".
+
+
+Performance optimizations:
+* Release windows executables are built with "whole program optimization".
+* Added --out-of-order-eval flag (default is off).
+  Switching it on makes cached/terminal nodes higher priority, which increases
+  nps.
+* OpenCL backend now supports batches (up to 5x speedup!)
+* Performance optimizations for BLAS backend.
+* Total visited policy (for FPU reduction) is now cached.
+* Values of priors (P) are stored now as 16-bit float rather than 32-bit float,
+  that saves considerable amount of RAM.
+
+
+Bugfixes:
+* Fixed en passant detection bug which caused the position after pawn moving by
+  two squares not counted towards threefold repetition even if en passant was
+  not possible.
+* Fixed the bug which caused --cache-history-length for values 2..7 work the
+  same as --cache-history-length=1.
+  This is fixed, but default is temporarily changed to --cache-history-length=1
+  during play. (For training games, it's 7)
+
+
+Removed features:
+* Backpropagation beta / backpropagation gamma parameters have been removed.
+
+
+Other changes:
+* Release lc0-windows-cuda.zip package now contains NVdia CUDA and cuDNN .dlls.
+
+
 v0.16.0 (2018-07-20)
 ~~~~~~~
 

diff --git a/src/engine.h b/src/engine.h
@@ -33,8 +33,8 @@
 #include "neural/network.h"
 #include "syzygy/syzygy.h"
 #include "utils/mutex.h"
-#include "utils/optionsparser.h"
 #include "utils/optional.h"
+#include "utils/optionsparser.h"
 
 // CUDNN eval
 // comment/disable this to enable tensor flow path

diff --git a/src/main.cc b/src/main.cc
@@ -34,7 +34,8 @@
 int main(int argc, const char** argv) {
   std::cerr << "       _" << std::endl;
   std::cerr << "|   _ | |" << std::endl;
-  std::cerr << "|_ |_ |_| v" << GetVersionStr() << " built " << __DATE__ << std::endl;
+  std::cerr << "|_ |_ |_| v" << GetVersionStr() << " built " << __DATE__
+            << std::endl;
   using namespace lczero;
   CommandLine::Init(argc, argv);
   CommandLine::RegisterMode("uci", "(default) Act as UCI engine");

diff --git a/src/mcts/node.cc b/src/mcts/node.cc
@@ -197,8 +197,7 @@ void Node::CreateEdges(const MoveList& moves) {
 Node::ConstIterator Node::Edges() const { return {edges_, &child_}; }
 Node::Iterator Node::Edges() { return {edges_, &child_}; }
 
-float Node::GetVisitedPolicy() const { return visited_policy_;  }
-
+float Node::GetVisitedPolicy() const { return visited_policy_; }
 
 Edge* Node::GetEdgeToNode(const Node* node) const {
   assert(node->parent_ == this);
@@ -247,7 +246,6 @@ void Node::FinalizeScoreUpdate(float v) {
   --n_in_flight_;
 }
 
-
 Node::NodeRange Node::ChildNodes() const { return child_.get(); }
 
 void Node::ReleaseChildren() { gNodeGc.AddToGcQueue(std::move(child_)); }

diff --git a/src/mcts/search.cc b/src/mcts/search.cc
@@ -390,8 +390,8 @@ std::pair<Move, Move> Search::GetBestMoveInternal() const
   Move ponder_move;  // Default is "null move" which means "don't display
                      // anything".
   if (best_node.HasNode() && best_node.node()->HasChildren()) {
-   ponder_move =
-       GetBestChildNoTemperature(best_node.node()).GetMove(!played_history_.IsBlackToMove());
+    ponder_move = GetBestChildNoTemperature(best_node.node())
+                      .GetMove(!played_history_.IsBlackToMove());
   }
   return {best_node.GetMove(played_history_.IsBlackToMove()), ponder_move};
 }
@@ -604,6 +604,7 @@ void SearchWorker::GatherMinibatch() {
         if (picked_node.nn_queried) computation_->PopCacheHit();
         minibatch_.pop_back();
         --minibatch_size;
+        ++number_out_of_order;
       }
     }
   }
@@ -735,12 +736,12 @@ void SearchWorker::ExtendNode(Node* node) {
       node->MakeTerminal(GameResult::DRAW);
       return;
     }
-    
+
     // Neither by-position or by-rule termination, but maybe it's a TB position.
     if (search_->syzygy_tb_ && board.castlings().no_legal_castle() &&
         history_.Last().GetNoCaptureNoPawnPly() == 0 &&
         (board.ours() + board.theirs()).count() <=
-          search_->syzygy_tb_->max_cardinality()) {
+            search_->syzygy_tb_->max_cardinality()) {
       ProbeState state;
       WDLScore wdl = search_->syzygy_tb_->probe_wdl(history_.Last(), &state);
       // Only fail state means the WDL is wrong, probe_wdl may produce correct
@@ -751,8 +752,8 @@ void SearchWorker::ExtendNode(Node* node) {
           node->MakeTerminal(GameResult::BLACK_WON);
         } else if (wdl == WDL_LOSS) {
           node->MakeTerminal(GameResult::WHITE_WON);
-        } else { // Cursed wins and blessed losses count as draws.
-          node->MakeTerminal(GameResult::DRAW); 
+        } else {  // Cursed wins and blessed losses count as draws.
+          node->MakeTerminal(GameResult::DRAW);
         }
         search_->tb_hits_.fetch_add(1, std::memory_order_acq_rel);
         return;

diff --git a/src/neural/blas/convolution1.cc b/src/neural/blas/convolution1.cc
@@ -59,7 +59,6 @@ void Convolution1::Forward(const size_t batch_size, const size_t input_channels,
                 0.0f,                  // beta
                 batch_output,          // C
                 kSquares);             // ldc, leading rank of B
-
   }
 }
 

diff --git a/src/neural/blas/convolution1.h b/src/neural/blas/convolution1.h
@@ -38,4 +38,4 @@ class Convolution1 {
   static constexpr auto kHeight = 8;
   static constexpr auto kSquares = kWidth * kHeight;
 };
-}
+}  // namespace lczero
diff --git a/src/neural/blas/fully_connected_layer.h b/src/neural/blas/fully_connected_layer.h
@@ -46,4 +46,4 @@ class FullyConnectedLayer {
   static constexpr auto kSquares = kWidth * kHeight;
 };
 
-}  // lczero
+}  // namespace lczero
diff --git a/src/neural/blas/network_blas.cc b/src/neural/blas/network_blas.cc
@@ -16,13 +16,13 @@
  along with Leela Chess.  If not, see <http://www.gnu.org/licenses/>.
  */
 
-#include "neural/network.h"
 #include "neural/blas/batchnorm.h"
 #include "neural/blas/blas.h"
 #include "neural/blas/convolution1.h"
 #include "neural/blas/fully_connected_layer.h"
 #include "neural/blas/winograd_convolution3.h"
 #include "neural/factory.h"
+#include "neural/network.h"
 
 #include <algorithm>
 #include <cassert>

diff --git a/src/neural/blas/winograd_convolution3.h b/src/neural/blas/winograd_convolution3.h
@@ -83,4 +83,4 @@ class WinogradConvolution3 {
   std::vector<float> V_;
   std::vector<float> M_;
 };
-}
+}  // namespace lczero
diff --git a/src/neural/blas/winograd_transform.ispc b/src/neural/blas/winograd_transform.ispc
@@ -22,18 +22,16 @@ uniform const size_t kWidth = 8;
 uniform const size_t kHeight = 8;
 uniform const size_t kSquares = kWidth * kHeight;
 
-uniform const size_t kWtiles = 4; //(kWidth + 1) / 2; 
-uniform const size_t kTiles = kWtiles * kWtiles; // 16
+uniform const size_t kWtiles = 4;                 //(kWidth + 1) / 2;
+uniform const size_t kTiles = kWtiles * kWtiles;  // 16
 
 uniform const size_t kWinogradAlpha = 4;
 uniform const size_t kWinogradTile = kWinogradAlpha * kWinogradAlpha;
 
 export void winograd_TransformIn_ispc(uniform size_t batch_size,
                                       const uniform float input[],
                                       uniform size_t channels,
-                                      uniform float output[])
-{
-
+                                      uniform float output[]) {
   float x[kWinogradAlpha][kWinogradAlpha];
   float T1[kWinogradAlpha][kWinogradAlpha];
 
@@ -47,18 +45,16 @@ export void winograd_TransformIn_ispc(uniform size_t batch_size,
         const uniform int yin = 2 * block_y - 1;
         const uniform int xin = 2 * block_x - 1;
 
-        foreach(channel = 0 ... channels) {
+        foreach (channel = 0 ... channels) {
           size_t V_channel = V_batch + channel;
           size_t input_channel = input_batch + channel * (kWidth * kHeight);
 
           for (uniform int i = 0; i < kWinogradAlpha; i++) {
             for (uniform int j = 0; j < kWinogradAlpha; j++) {
-              if ((yin + i) >= 0 && (xin + j) >= 0 &&
-                  (yin + i) < kHeight && (xin + j) < kWidth) {
-                x[i][j] = input[input_channel +
-                          (yin + i) * kWidth + (xin + j)];
-              }
-              else {
+              if ((yin + i) >= 0 && (xin + j) >= 0 && (yin + i) < kHeight &&
+                  (xin + j) < kWidth) {
+                x[i][j] = input[input_channel + (yin + i) * kWidth + (xin + j)];
+              } else {
                 x[i][j] = 0.0f;
               }
             }
@@ -82,8 +78,8 @@ export void winograd_TransformIn_ispc(uniform size_t batch_size,
           T1[3][3] = x[1][3] - x[3][3];
 
           const size_t V_incr = channels * kTiles * batch_size;
-          const size_t wTile_V = V_channel +
-                                 channels * (block_y * kWtiles + block_x);
+          const size_t wTile_V =
+              V_channel + channels * (block_y * kWtiles + block_x);
 
           output[wTile_V + V_incr * 0] = T1[0][0] - T1[0][2];
           output[wTile_V + V_incr * 1] = T1[0][1] + T1[0][2];
@@ -107,11 +103,10 @@ export void winograd_TransformIn_ispc(uniform size_t batch_size,
   }
 }
 
-
 export void winograd_TransformOut_ispc(uniform size_t batch_size,
-                          const uniform float input[], uniform size_t channels,
-                          uniform float output[])
-{
+                                       const uniform float input[],
+                                       uniform size_t channels,
+                                       uniform float output[]) {
   float m[kWinogradTile];
 
   for (uniform size_t batch_index = 0; batch_index < batch_size;
@@ -132,7 +127,7 @@ export void winograd_TransformOut_ispc(uniform size_t batch_size,
           const uniform int M_incr = channels * kTiles * batch_size;
 
           for (uniform int wTile = 0; wTile < kWinogradTile; wTile++) {
-            m[wTile] = input[M_wtile + wTile*M_incr];
+            m[wTile] = input[M_wtile + wTile * M_incr];
           }
 
           float o11 = m[0 * 4 + 0] + m[0 * 4 + 1] + m[0 * 4 + 2] +
@@ -160,4 +155,3 @@ export void winograd_TransformOut_ispc(uniform size_t batch_size,
     }
   }
 }
-
diff --git a/src/neural/loader.cc b/src/neural/loader.cc
@@ -42,7 +42,6 @@
 
 namespace lczero {
 
-
 namespace {
 const std::uint32_t kWeightMagic = 0x1c0;
 
@@ -125,7 +124,8 @@ FloatVectors LoadFloatsFromPbFile(const std::string& buffer) {
                     net.min_version().patch());
 
   if (net_ver > lc0_ver)
-    throw Exception("Invalid weight file: lc0 version >= " + min_version + " required.");
+    throw Exception("Invalid weight file: lc0 version >= " + min_version +
+                    " required.");
 
   if (net.format().weights_encoding() != pblczero::Format::LINEAR16)
     throw Exception("Invalid weight file: wrong encoding.");
@@ -258,7 +258,7 @@ std::string DiscoverWeightsFile() {
 
     // First byte of the protobuf stream is 0x0d for fixed32, so we ignore it as
     // our own magic should suffice.
-    auto magic = reinterpret_cast<std::uint32_t*>(buf+1);
+    auto magic = reinterpret_cast<std::uint32_t*>(buf + 1);
     if (*magic == kWeightMagic) {
       std::cerr << "Found pb network file: " << candidate.second << std::endl;
       return candidate.second;

diff --git a/src/neural/network_check.cc b/src/neural/network_check.cc
@@ -25,8 +25,8 @@
   Program grant you additional permission to convey the resulting work.
  */
 
-#include "neural/network.h"
 #include "neural/factory.h"
+#include "neural/network.h"
 #include "utils/histogram.h"
 #include "utils/random.h"
 

diff --git a/src/neural/network_mux.cc b/src/neural/network_mux.cc
@@ -208,5 +208,5 @@ void MuxingComputation::ComputeBlocking() {
 }  // namespace
 
 REGISTER_NETWORK("multiplexing", MuxingNetwork, -1000)
-  
+
 }  // namespace lczero
diff --git a/src/neural/network_random.cc b/src/neural/network_random.cc
@@ -35,12 +35,13 @@ namespace lczero {
 
 class RandomNetworkComputation : public NetworkComputation {
  public:
-  RandomNetworkComputation(int delay, int seed) : delay_ms_(delay), seed_(seed) {}
+  RandomNetworkComputation(int delay, int seed)
+      : delay_ms_(delay), seed_(seed) {}
   void AddInput(InputPlanes&& input) override {
     std::uint64_t hash = seed_;
     for (const auto& plane : input) {
       hash = HashCat({hash, plane.mask});
-      std::uint64_t value_hash = 
+      std::uint64_t value_hash =
           *reinterpret_cast<const std::uint32_t*>(&plane.value);
       hash = HashCat({hash, value_hash});
     }
@@ -71,7 +72,7 @@ class RandomNetworkComputation : public NetworkComputation {
 class RandomNetwork : public Network {
  public:
   RandomNetwork(const Weights& /*weights*/, const OptionsDict& options)
-      : delay_ms_(options.GetOrDefault<int>("delay", 0)), 
+      : delay_ms_(options.GetOrDefault<int>("delay", 0)),
         seed_(options.GetOrDefault<int>("seed", 0)) {}
   std::unique_ptr<NetworkComputation> NewComputation() override {
     return std::make_unique<RandomNetworkComputation>(delay_ms_, seed_);

diff --git a/src/neural/network_st_batch.h b/src/neural/network_st_batch.h
@@ -69,8 +69,8 @@ class SingleThreadBatchingNetworkComputation : public NetworkComputation {
 
   // Adds a sample to the parent batch.
   void AddInput(InputPlanes&& input) override;
-  // May not actually compute immediately. Instead computes when all computations
-  // of the network called this.
+  // May not actually compute immediately. Instead computes when all
+  // computations of the network called this.
   void ComputeBlocking() override;
   // Returns how many times AddInput() was called.
   int GetBatchSize() const override { return batch_size_; }

diff --git a/src/neural/opencl/OpenCLParams.h b/src/neural/opencl/OpenCLParams.h
@@ -26,5 +26,4 @@ struct OpenCLParams {
   bool force_tune = false;
   bool tune_exhaustive = false;
   int tune_batch_size = 1;
-
 };
diff --git a/src/neural/opencl/OpenCLTuner.cc b/src/neural/opencl/OpenCLTuner.cc
@@ -350,8 +350,8 @@ std::string Tuner::tune_sgemm(const int m, const int n, const int k,
     }
   }
   if (best_time == 0) {
-    std::cerr << "Failed to find a working configuration." << std::endl <<
-                 "Check your OpenCL drivers." << std::endl;
+    std::cerr << "Failed to find a working configuration." << std::endl
+              << "Check your OpenCL drivers." << std::endl;
     throw std::runtime_error("Tuner failed to find working configuration.");
   }
   return best_params;
-Original file line number
+Diff line change
@@ Expand Up @@
 .0f,                  // beta
                     batch_output,          // C
                     kSquares);             // ldc, leading rank of B
       }
     }
@@ Expand Down @@
Original file line number	Diff line number	Diff line change
Expand Up		@@ -208,5 +208,5 @@ void MuxingComputation::ComputeBlocking() {
		} // namespace

		REGISTER_NETWORK("multiplexing", MuxingNetwork, -1000)


		} // namespace lczero