diff --git a/appveyor.yml b/appveyor.yml
index b8afe0fb1a..f773ea3859 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -17,7 +17,8 @@ install:
- cmd: IF %NAME%==opencl set OPENCL=true
- cmd: IF %NAME%==blas set BLAS=true
- cmd: IF %NAME%==blas set GTEST=true
-- cmd: IF %BLAS%==true nuget install OpenBLAS -Version 0.2.14.1 -OutputDirectory C:\cache
+- cmd: IF %BLAS%==true IF NOT EXIST C:\cache\OpenBLAS appveyor DownloadFile https://sjeng.org/ftp/OpenBLAS-0.3.3-win-oldthread.zip
+- cmd: IF %BLAS%==true IF NOT EXIST C:\cache\OpenBLAS 7z x OpenBLAS-0.3.3-win-oldthread.zip -oC:\cache\OpenBLAS
- cmd: IF %OPENCL%==true nuget install opencl-nug -Version 0.777.12 -OutputDirectory C:\cache
- cmd: IF %BLAS%==true IF NOT EXIST C:\cache\ispc-v1.9.2-windows appveyor DownloadFile https://sourceforge.net/projects/ispcmirror/files/v1.9.2/ispc-v1.9.2-windows.zip
- cmd: IF %BLAS%==true IF NOT EXIST C:\cache\ispc-v1.9.2-windows 7z x ispc-v1.9.2-windows.zip -oC:\cache
@@ -52,12 +53,12 @@ cache:
- C:\projects\lc0\subprojects\packagecache
before_build:
- cmd: git submodule update --init --recursive
-- cmd: meson build --backend vs2017 --buildtype release -Dgtest=%GTEST% -Dopencl=%OPENCL% -Dblas=%BLAS% -Dcudnn=%CUDA% -Dispc_native_only=false -Dpopcnt=false -Dcudnn_include="%CUDA_PATH%\include","%PKG_FOLDER%\cuda\include" -Dcudnn_libdirs="%CUDA_PATH%\lib\x64","%PKG_FOLDER%\cuda\lib\x64" -Dprotobuf_include="%PKG_FOLDER%\protobuf\include" -Dprotobuf_libdir="%PKG_FOLDER%\protobuf\lib" -Dopenblas_include="%PKG_FOLDER%\OpenBLAS.0.2.14.1\lib\native\include" -Dopenblas_libdirs="%PKG_FOLDER%\OpenBLAS.0.2.14.1\lib\native\lib\x64" -Dopencl_include="%PKG_FOLDER%\opencl-nug.0.777.12\build\native\include" -Dopencl_libdirs="%PKG_FOLDER%\opencl-nug.0.777.12\build\native\lib\x64" -Ddefault_library=static
+- cmd: meson build --backend vs2017 --buildtype release -Dgtest=%GTEST% -Dopencl=%OPENCL% -Dblas=%BLAS% -Dcudnn=%CUDA% -Dispc_native_only=false -Dpopcnt=false -Dcudnn_include="%CUDA_PATH%\include","%PKG_FOLDER%\cuda\include" -Dcudnn_libdirs="%CUDA_PATH%\lib\x64","%PKG_FOLDER%\cuda\lib\x64" -Dprotobuf_include="%PKG_FOLDER%\protobuf\include" -Dprotobuf_libdir="%PKG_FOLDER%\protobuf\lib" -Dopenblas_include="%PKG_FOLDER%\OpenBLAS\dist64\include" -Dopenblas_libdirs="%PKG_FOLDER%\OpenBLAS\dist64\lib" -Dopencl_include="%PKG_FOLDER%\opencl-nug.0.777.12\build\native\include" -Dopencl_libdirs="%PKG_FOLDER%\opencl-nug.0.777.12\build\native\lib\x64" -Ddefault_library=static
build_script:
- cmd: IF %APPVEYOR_REPO_TAG%==false msbuild "C:\projects\lc0\build\lc0.sln" /m /p:WholeProgramOptimization=true /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll"
- cmd: IF %APPVEYOR_REPO_TAG%==true msbuild "C:\projects\lc0\build\lc0.sln" /m /p:WholeProgramOptimization=PGInstrument /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll"
- cmd: cd build
-- cmd: IF %APPVEYOR_REPO_TAG%==true IF %BLAS%==true copy C:\cache\OpenBLAS.0.2.14.1\lib\native\bin\x64\*.dll
+- cmd: IF %APPVEYOR_REPO_TAG%==true IF %BLAS%==true copy C:\cache\OpenBLAS\dist64\bin\libopenblas.dll
- cmd: IF %APPVEYOR_REPO_TAG%==true IF %OPENCL%==true copy C:\cache\opencl-nug.0.777.12\build\native\bin\OpenCL.dll
- cmd: IF %APPVEYOR_REPO_TAG%==true IF %CUDA%==true copy "%CUDA_PATH%"\bin\*.dll
- cmd: IF %APPVEYOR_REPO_TAG%==true IF %CUDA%==true copy %PKG_FOLDER%\cuda\bin\cudnn64_7.dll
@@ -68,7 +69,7 @@ after_build:
- cmd: IF %APPVEYOR_REPO_TAG%==true 7z a lc0-%APPVEYOR_REPO_TAG_NAME%-windows-%NAME%.zip %APPVEYOR_BUILD_FOLDER%\build\lc0.exe
- cmd: IF %APPVEYOR_REPO_TAG%==true appveyor DownloadFile "https://ci.appveyor.com/api/projects/LeelaChessZero/lczero-client/artifacts/client.exe?branch=release&pr=false&job=Environment%%3A%%20NAME%%3D.exe%%2C%%20GOOS%%3Dwindows"
- cmd: IF %APPVEYOR_REPO_TAG%==true 7z a lc0-%APPVEYOR_REPO_TAG_NAME%-windows-%NAME%.zip client.exe
-- cmd: IF %APPVEYOR_REPO_TAG%==true IF %BLAS%==true 7z a lc0-%APPVEYOR_REPO_TAG_NAME%-windows-%NAME%.zip C:\cache\OpenBLAS.0.2.14.1\lib\native\bin\x64\*.dll
+- cmd: IF %APPVEYOR_REPO_TAG%==true IF %BLAS%==true 7z a lc0-%APPVEYOR_REPO_TAG_NAME%-windows-%NAME%.zip C:\cache\OpenBLAS\dist64\bin\libopenblas.dll
- cmd: IF %APPVEYOR_REPO_TAG%==true IF %OPENCL%==true 7z a lc0-%APPVEYOR_REPO_TAG_NAME%-windows-%NAME%.zip C:\cache\opencl-nug.0.777.12\build\native\bin\OpenCL.dll
- cmd: IF %APPVEYOR_REPO_TAG%==true IF %CUDA%==true 7z a lc0-%APPVEYOR_REPO_TAG_NAME%-windows-%NAME%.zip "%CUDA_PATH%\bin\cudart64_100.dll" "%CUDA_PATH%\bin\cublas64_100.dll"
- cmd: IF %APPVEYOR_REPO_TAG%==true IF %CUDA%==true 7z a lc0-%APPVEYOR_REPO_TAG_NAME%-windows-%NAME%.zip "%PKG_FOLDER%\cuda\bin\cudnn64_7.dll"
@@ -91,7 +92,7 @@ deploy:
appveyor_repo_tag: true
test_script:
- cmd: cd build
-- cmd: IF %GTEST%==true copy C:\cache\OpenBLAS.0.2.14.1\lib\native\bin\x64\*.dll
+- cmd: IF %GTEST%==true copy C:\cache\OpenBLAS\dist64\bin\libopenblas.dll
- cmd: IF %GTEST%==true xcopy /s /i C:\cache\syzygy syzygy
- cmd: IF %GTEST%==true meson test --print-errorlogs
- cmd: cd ..
diff --git a/changelog.txt b/changelog.txt
index bbf4629767..fbb902a7bb 100644
--- a/changelog.txt
+++ b/changelog.txt
@@ -1,3 +1,72 @@
+v0.20.0-rc1 (2018-12-22)
+~~~~~~~~~~~
+
+* Squeeze-and-Excitation Networks are now supported! (lc0.org/se)
+* Older text network files are no longer supported.
+* Various performance fixes (most major being having fast approximate math
+ functions).
+* For systems with multiple GPUs, in addition to "multiplexing" backend
+ we now also have "demux" backend and "roundrobin" backend.
+* Compiler settings tweaks (use VS2017 for windows builds, always have LTO
+ enabled, windows releases have PGO enabled).
+* Benchmark mode has more options now (e.g. movetime) and saner defaults.
+* Added an option to prevent engine to resign too early (used in training).
+* Fixed a bug when number of visits could be too high in collision nodes.
+ The fix is pretty hacky, there will be better fix later.
+* 32-bit version compiles again.
+
+v0.19.1 (2018-12-10)
+~~~~~~~
+
+(no changes relative to v0.19.1-rc2)
+
+v0.19.1-rc2 (2018-12-07)
+~~~~~~~~~~~
+
+* Temperature and FPU related params. (#568)
+* Rework Cpuct related params. (#567)
+
+v0.19.1-rc1 (2018-12-06)
+~~~~~~~~~~~
+
+* Updated cpuct formula from alphazero paper. (#563)
+* remove UpdateFromUciOptions() from EnsureReady() (#558)
+* revert IsSearchActive() and better fix for one of #500 crashes (#555)
+
+v0.19.0 (2018-11-19)
+~~~~~~~
+
+* remove Wait() from EngineController::Stop() (#522)
+
+v0.19.0-rc5 (2018-11-17)
+~~~~~~~~~~~
+
+* OpenCL: replace thread_local with a resource pool. (#516)
+* optional wtime and btime (#515)
+* Make convolve1 work with workgroup size of 128 (#514)
+* adjust average depth calculation for multivisits (#510)
+
+v0.19.0-rc4 (2018-11-12)
+~~~~~~~~~~~
+
+* Microseconds have 6 digits, not 3! (#505)
+* use bestmove_is_sent_ for Search::IsSearchActive() (#502)
+
+v0.19.0-rc3 (2018-11-07)
+~~~~~~~~~~~
+
+* Fix OpenCL tuner always loading the first saved tuning (#491)
+* Do not show warning when ComputeBlocking() takes too much time. (#494)
+* Output microseconds in log rather than milliseconds. (#495)
+* Add benchmark features (#483)
+* Fix EncodePositionForNN test failure (#490)
+
+v0.19.0-rc2 (2018-11-03)
+~~~~~~~~~~~
+
+* Version v0.19.0-rc1 reported it's version as v0.19.0-dev
+ Therefore v0.19.0-rc2 is released with this issue fixed.
+
v0.19.0-rc1 (2018-11-03)
~~~~~~~~~~~
diff --git a/meson.build b/meson.build
index c30d4592b6..9bbebb794c 100644
--- a/meson.build
+++ b/meson.build
@@ -15,7 +15,7 @@
# along with Leela Chess. If not, see .
project('lc0', 'cpp',
- default_options : ['cpp_std=c++14', 'b_ndebug=if-release', 'b_lto=true'],
+ default_options : ['cpp_std=c++14', 'b_ndebug=if-release'],
meson_version: '>=0.45')
cc = meson.get_compiler('cpp')
@@ -26,7 +26,6 @@ endif
if cc.get_id() == 'clang' or cc.get_id() == 'gcc'
add_project_arguments('-Wextra', language : 'cpp')
add_project_arguments('-pedantic', language : 'cpp')
- add_project_arguments('-ffast-math', language : 'cpp')
if get_option('buildtype') == 'release'
add_project_arguments('-march=native', language : 'cpp')
@@ -51,7 +50,10 @@ else
endif
protoc = find_program('protoc', required : false)
# For tensorflow skip system protobuf, chances are it will not work.
-if not protobuf_dep.found() or not protoc.found() or get_option('tensorflow')
+if get_option('protobuf-3-6-0')
+ deps += subproject('protobuf-3.6.0').get_variable('protobuf_dep')
+ protoc = subproject('protobuf-3.6.0').get_variable('protoc')
+elif not protobuf_dep.found() or not protoc.found() or get_option('tensorflow')
deps += subproject('protobuf').get_variable('protobuf_dep')
protoc = subproject('protobuf').get_variable('protoc')
else
@@ -98,9 +100,11 @@ files += [
'src/neural/factory.cc',
'src/neural/loader.cc',
'src/neural/network_check.cc',
+ 'src/neural/network_demux.cc',
'src/neural/network_legacy.cc',
'src/neural/network_mux.cc',
'src/neural/network_random.cc',
+ 'src/neural/network_rr.cc',
'src/neural/network_st_batch.cc',
'src/neural/writer.cc',
'src/selfplay/game.cc',
@@ -155,6 +159,7 @@ if get_option('build_backends')
tensorflow_include,
tensorflow_include[0] + '/bazel-genfiles',
tensorflow_include[0] + '/tensorflow/contrib/makefile/downloads',
+ tensorflow_include[0] + '/tensorflow/contrib/makefile/downloads/absl',
tensorflow_include[0] + '/tensorflow/contrib/makefile/downloads/eigen',
tensorflow_include[0] + '/tensorflow/contrib/makefile/downloads/gemmlowp',
tensorflow_include[0] + '/tensorflow/contrib/makefile/downloads/nsync/public',
@@ -179,6 +184,9 @@ if get_option('build_backends')
mkl_libdirs = get_option('mkl_libdirs')
mkl_lib = cc.find_library('mkl_rt', dirs: mkl_libdirs, required: false)
+ if not mkl_lib.found()
+ mkl_lib = cc.find_library('mklml', dirs: mkl_libdirs, required: false)
+ endif
openblas_libdirs = get_option('openblas_libdirs')
openblas_lib = cc.find_library('openblas.dll', dirs: openblas_libdirs, required: false)
@@ -394,9 +402,9 @@ endif # if get_option('build_backends')
if not has_backends and get_option('build_backends')
error('''
- No usable computation backends (cudnn/tensorflow/etc) are found.
- If you want to build it with random only backend, pass
- -D build_backends=false to a meson build.''')
+ No usable computation backends (cudnn/opencl/blas/etc) enabled.
+ If you want to build with the random backend only, add
+ -Dbuild_backends=false to the build command line.''')
endif
diff --git a/meson_options.txt b/meson_options.txt
index 5d8bd7d012..04993f626f 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -117,3 +117,8 @@ option('gtest',
type: 'boolean',
value: true,
description: 'Build gtest tests')
+
+option('protobuf-3-6-0',
+ type: 'boolean',
+ value: false,
+ description: 'Use the protobuf 3.6.0 subproject')
diff --git a/src/benchmark/benchmark.cc b/src/benchmark/benchmark.cc
index eb83368fea..234f0b2b5c 100644
--- a/src/benchmark/benchmark.cc
+++ b/src/benchmark/benchmark.cc
@@ -56,17 +56,6 @@ void Benchmark::Run() {
options.Add(kNNCacheSizeId, 0, 999999999) = 200000;
options.Add(kThreadsOptionId, 1, 128) = kDefaultThreads;
- auto defaults = options.GetMutableDefaultsOptions();
-
- defaults->Set(SearchParams::kMiniBatchSizeId.GetId(), 256);
- defaults->Set(SearchParams::kFpuReductionId.GetId(), 1.2f);
- defaults->Set(SearchParams::kCpuctId.GetId(), 3.4f);
- defaults->Set(SearchParams::kPolicySoftmaxTempId.GetId(), 2.2f);
- defaults->Set(SearchParams::kMaxCollisionVisitsId.GetId(), 9999);
- defaults->Set(SearchParams::kMaxCollisionEventsId.GetId(), 32);
- defaults->Set(SearchParams::kCacheHistoryLengthId.GetId(), 0);
- defaults->Set(SearchParams::kOutOfOrderEvalId.GetId(), true);
-
if (!options.ProcessAllFlags()) return;
try {
diff --git a/src/chess/board.cc b/src/chess/board.cc
index 609ae57ff2..d9689ab26c 100644
--- a/src/chess/board.cc
+++ b/src/chess/board.cc
@@ -188,6 +188,7 @@ BitBoard ChessBoard::en_passant() const { return pawns_ - pawns(); }
MoveList ChessBoard::GeneratePseudolegalMoves() const {
MoveList result;
+ result.reserve(60);
for (auto source : our_pieces_) {
// King
if (source == our_king_) {
@@ -336,8 +337,8 @@ MoveList ChessBoard::GeneratePseudolegalMoves() const {
}
// Knight.
{
- for (const auto destination : kKnightAttacks[source.as_int()]) {
- if (our_pieces_.get(destination)) continue;
+ for (const auto destination :
+ kKnightAttacks[source.as_int()] - our_pieces_) {
result.emplace_back(source, destination);
}
}
@@ -405,9 +406,6 @@ bool ChessBoard::ApplyMove(Move move) {
return reset_50_moves;
}
- // Now destination square for our piece is known.
- our_pieces_.set(to);
-
// Promotion
if (move.promotion() != Move::Promotion::None) {
switch (move.promotion()) {
@@ -456,13 +454,13 @@ bool ChessBoard::ApplyMove(Move move) {
bool ChessBoard::IsUnderAttack(BoardSquare square) const {
const int row = square.row();
const int col = square.col();
- // Check king
+ // Check king.
{
const int krow = their_king_.row();
const int kcol = their_king_.col();
if (std::abs(krow - row) <= 1 && std::abs(kcol - col) <= 1) return true;
}
- // Check Rooks (and queen)
+ // Check rooks (and queens).
if (kRookAttacks[square.as_int()].intersects(their_pieces_ * rooks_)) {
for (const auto& direction : kRookDirections) {
auto dst_row = row;
@@ -480,7 +478,7 @@ bool ChessBoard::IsUnderAttack(BoardSquare square) const {
}
}
}
- // Check Bishops
+ // Check bishops.
if (kBishopAttacks[square.as_int()].intersects(their_pieces_ * bishops_)) {
for (const auto& direction : kBishopDirections) {
auto dst_row = row;
@@ -498,11 +496,11 @@ bool ChessBoard::IsUnderAttack(BoardSquare square) const {
}
}
}
- // Check pawns
+ // Check pawns.
if (kPawnAttacks[square.as_int()].intersects(their_pieces_ * pawns_)) {
return true;
}
- // Check knights
+ // Check knights.
{
if (kKnightAttacks[square.as_int()].intersects(their_pieces_ - their_king_ -
rooks_ - bishops_ -
@@ -513,18 +511,135 @@ bool ChessBoard::IsUnderAttack(BoardSquare square) const {
return false;
}
-bool ChessBoard::IsLegalMove(Move move, bool was_under_check) const {
- const auto& from = move.from();
- const auto& to = move.to();
+KingAttackInfo ChessBoard::GenerateKingAttackInfo() const {
+ KingAttackInfo king_attack_info;
- // If we are already under check, also apply move and check if valid.
- // TODO(mooskagh) Optimize this case
- if (was_under_check) {
- ChessBoard board(*this);
- board.ApplyMove(move);
- return !board.IsUnderCheck();
+ // Number of attackers that give check (used for double check detection).
+ unsigned num_king_attackers = 0;
+
+ const int row = our_king_.row();
+ const int col = our_king_.col();
+ // King checks are unnecessary, as kings cannot give check.
+ // Check rooks (and queens).
+ if (kRookAttacks[our_king_.as_int()].intersects(their_pieces_ * rooks_)) {
+ for (const auto& direction : kRookDirections) {
+ auto dst_row = row;
+ auto dst_col = col;
+ BitBoard attack_line(0);
+ bool possible_pinned_piece_found = false;
+ BoardSquare possible_pinned_piece;
+ while (true) {
+ dst_row += direction.first;
+ dst_col += direction.second;
+ if (!BoardSquare::IsValid(dst_row, dst_col)) break;
+ const BoardSquare destination(dst_row, dst_col);
+ if (our_pieces_.get(destination)) {
+ if (possible_pinned_piece_found) {
+ // No pieces pinned.
+ break;
+ } else {
+ // This is a possible pinned piece.
+ possible_pinned_piece_found = true;
+ possible_pinned_piece = destination;
+ }
+ }
+ if (!possible_pinned_piece_found) {
+ attack_line.set(destination);
+ }
+ if (their_pieces_.get(destination)) {
+ if (rooks_.get(destination)) {
+ if (possible_pinned_piece_found) {
+ // Store the pinned piece.
+ king_attack_info.pinned_pieces_.set(possible_pinned_piece);
+ } else {
+ // Update attacking lines.
+ king_attack_info.attacking_lines_ =
+ king_attack_info.attacking_lines_ + attack_line;
+ num_king_attackers++;
+ }
+ }
+ break;
+ }
+ }
+ }
+ }
+ // Check bishops.
+ if (kBishopAttacks[our_king_.as_int()].intersects(their_pieces_ * bishops_)) {
+ for (const auto& direction : kBishopDirections) {
+ auto dst_row = row;
+ auto dst_col = col;
+ BitBoard attack_line(0);
+ bool possible_pinned_piece_found = false;
+ BoardSquare possible_pinned_piece;
+ while (true) {
+ dst_row += direction.first;
+ dst_col += direction.second;
+ if (!BoardSquare::IsValid(dst_row, dst_col)) break;
+ const BoardSquare destination(dst_row, dst_col);
+ if (our_pieces_.get(destination)) {
+ if (possible_pinned_piece_found) {
+ // No pieces pinned.
+ break;
+ } else {
+ // This is a possible pinned piece.
+ possible_pinned_piece_found = true;
+ possible_pinned_piece = destination;
+ }
+ }
+ if (!possible_pinned_piece_found) {
+ attack_line.set(destination);
+ }
+ if (their_pieces_.get(destination)) {
+ if (bishops_.get(destination)) {
+ if (possible_pinned_piece_found) {
+ // Store the pinned piece.
+ king_attack_info.pinned_pieces_.set(possible_pinned_piece);
+ } else {
+ // Update attacking lines.
+ king_attack_info.attacking_lines_ =
+ king_attack_info.attacking_lines_ + attack_line;
+ num_king_attackers++;
+ }
+ }
+ break;
+ }
+ }
+ }
+ }
+ // Check pawns.
+ const BitBoard attacking_pawns =
+ kPawnAttacks[our_king_.as_int()] * their_pieces_ * pawns_;
+ king_attack_info.attacking_lines_ =
+ king_attack_info.attacking_lines_ + attacking_pawns;
+
+ if (attacking_pawns.as_int()) {
+ // No more than one pawn can give check.
+ num_king_attackers++;
}
+ // Check knights.
+ const BitBoard attacking_knights =
+ kKnightAttacks[our_king_.as_int()] *
+ (their_pieces_ - their_king_ - rooks_ - bishops_ - (pawns_ * kPawnMask));
+ king_attack_info.attacking_lines_ =
+ king_attack_info.attacking_lines_ + attacking_knights;
+
+ if (attacking_knights.as_int()) {
+ // No more than one knight can give check.
+ num_king_attackers++;
+ }
+
+ assert(num_king_attackers <= 2);
+ king_attack_info.double_check_ = (num_king_attackers == 2);
+
+ return king_attack_info;
+}
+
+bool ChessBoard::IsLegalMove(Move move,
+ const KingAttackInfo& king_attack_info) const {
+ const auto& from = move.from();
+ const auto& to = move.to();
+
// En passant. Complex but rare. Just apply
// and check that we are not under check.
if (from.row() == 4 && pawns_.get(from) && from.col() != to.col() &&
@@ -534,83 +649,72 @@ bool ChessBoard::IsLegalMove(Move move, bool was_under_check) const {
return !board.IsUnderCheck();
}
- // If it's kings move, check that destination
- // is not under attack.
+ // Check if we are already under check.
+ if (king_attack_info.in_check()) {
+ // King move.
+ if (from == our_king_) {
+ // Just apply and check that we are not under check.
+ ChessBoard board(*this);
+ board.ApplyMove(move);
+ return !board.IsUnderCheck();
+ }
+
+ // Pinned pieces can never resolve a check.
+ if (king_attack_info.is_pinned(from)) {
+ return false;
+ }
+
+ // The piece to move is no king and is not pinned.
+ if (king_attack_info.in_double_check()) {
+ // Only a king move can resolve the double check.
+ return false;
+ } else {
+ // Only one attacking piece gives check.
+ // Our piece is free to move (not pinned). Check if the attacker is
+ // captured or interposed after the piece has moved to its destination
+ // square.
+ return king_attack_info.is_on_attack_line(to);
+ }
+ }
+
+ // Castlings were checked earlier.
+ // Moreover, no pseudolegal king moves to an attacked square are generated.
+ // If it's king's move at this moment, its certainly legal.
if (from == our_king_) {
- // Castlings were checked earlier.
- if (std::abs(static_cast(from.col()) - static_cast(to.col())) > 1)
- return true;
- return !IsUnderAttack(to);
+ return true;
}
- // Not check that piece was pinned. And it was, check that after the move
- // it is still on like of attack.
- int dx = from.col() - our_king_.col();
- int dy = from.row() - our_king_.row();
-
- // If it's not on the same file/rank/diagonal as our king, cannot be pinned.
- if (dx != 0 && dy != 0 && std::abs(dx) != std::abs(dy)) return true;
- dx = (dx > 0) - (dx < 0); // Sign.
- dy = (dy > 0) - (dy < 0);
- auto col = our_king_.col();
- auto row = our_king_.row();
- while (true) {
- col += dx;
- row += dy;
- // Attacking line left board, good.
- if (!BoardSquare::IsValid(row, col)) return true;
- const BoardSquare square(row, col);
- // The source square of the move is now free.
- if (square == from) continue;
- // The destination square if the move is our piece. King is not under
- // attack.
- if (square == to) return true;
- // Our piece on the line. Not under attack.
- if (our_pieces_.get(square)) return true;
- if (their_pieces_.get(square)) {
- if (dx == 0 || dy == 0) {
- // Have to be afraid of rook-like piece.
- return !rooks_.get(square);
- } else {
- // Have to be afraid of bishop-like piece.
- return !bishops_.get(square);
- }
- return true;
- }
+ // If we get here, we are not under check.
+ // If the piece is not pinned, it is free to move anywhere.
+ if (!king_attack_info.is_pinned(from)) return true;
+
+ // The piece is pinned. Now check that it stays on the same line w.r.t. the
+ // king.
+ int dx_from = from.col() - our_king_.col();
+ int dy_from = from.row() - our_king_.row();
+ int dx_to = to.col() - our_king_.col();
+ int dy_to = to.row() - our_king_.row();
+
+ if (dx_from == 0 || dx_to == 0) {
+ return (dx_from == dx_to);
+ } else {
+ return (dx_from * dy_to == dx_to * dy_from);
}
}
MoveList ChessBoard::GenerateLegalMoves() const {
- const bool was_under_check = IsUnderCheck();
+ const KingAttackInfo king_attack_info = GenerateKingAttackInfo();
MoveList move_list = GeneratePseudolegalMoves();
MoveList result;
result.reserve(move_list.size());
for (Move m : move_list) {
- if (IsLegalMove(m, was_under_check)) result.emplace_back(m);
+ if (IsLegalMove(m, king_attack_info)) result.emplace_back(m);
}
return result;
}
-std::vector ChessBoard::GenerateLegalMovesAndPositions() const {
- MoveList move_list = GeneratePseudolegalMoves();
- std::vector result;
-
- for (const auto& move : move_list) {
- result.emplace_back();
- auto& newboard = result.back().board;
- newboard = *this;
- result.back().reset_50_moves = newboard.ApplyMove(move);
- if (newboard.IsUnderCheck()) {
- result.pop_back();
- continue;
- }
- result.back().move = move;
- }
- return result;
-}
-
void ChessBoard::SetFromFen(const std::string& fen, int* no_capture_ply,
int* moves) {
Clear();
diff --git a/src/chess/board.h b/src/chess/board.h
index 830e4c35cf..9188c5f6f3 100644
--- a/src/chess/board.h
+++ b/src/chess/board.h
@@ -33,7 +33,22 @@
namespace lczero {
-struct MoveExecution;
+// Represents king attack info used during legal move detection.
+class KingAttackInfo {
+ public:
+ bool in_check() const { return attacking_lines_.as_int(); }
+ bool in_double_check() const { return double_check_; }
+ bool is_pinned(const BoardSquare square) const {
+ return pinned_pieces_.get(square);
+ }
+ bool is_on_attack_line(const BoardSquare square) const {
+ return attacking_lines_.get(square);
+ }
+
+ bool double_check_ = 0;
+ BitBoard pinned_pieces_ = {0};
+ BitBoard attacking_lines_ = {0};
+};
// Represents a board position.
// Unlike most chess engines, the board is mirrored for black.
@@ -66,23 +81,25 @@ class ChessBoard {
bool ApplyMove(Move move);
// Checks if the square is under attack from "theirs" (black).
bool IsUnderAttack(BoardSquare square) const;
+ // Generates the king attack info used for legal move detection.
+ KingAttackInfo GenerateKingAttackInfo() const;
// Checks if "our" (white) king is under check.
bool IsUnderCheck() const { return IsUnderAttack(our_king_); }
- // Checks whether at least one of the sides has mating material.
+ // Checks whether at least one of the sides has mating material.
bool HasMatingMaterial() const;
// Generates legal moves.
MoveList GenerateLegalMoves() const;
// Check whether pseudolegal move is legal.
- bool IsLegalMove(Move move, bool was_under_check) const;
- // Returns a list of legal moves and board positions after the move is made.
- std::vector GenerateLegalMovesAndPositions() const;
+ bool IsLegalMove(Move move, const KingAttackInfo& king_attack_info) const;
uint64_t Hash() const {
return HashCat({our_pieces_.as_int(), their_pieces_.as_int(),
rooks_.as_int(), bishops_.as_int(), pawns_.as_int(),
- our_king_.as_int(), their_king_.as_int(),
- castlings_.as_int(), flipped_});
+ (static_cast(our_king_.as_int()) << 24) |
+ (static_cast(their_king_.as_int()) << 16) |
+ (static_cast(castlings_.as_int()) << 8) |
+ static_cast(flipped_)});
}
class Castlings {
@@ -168,8 +185,8 @@ class ChessBoard {
// Pawns.
// Ranks 1 and 8 have special meaning. Pawn at rank 1 means that
// corresponding white pawn on rank 4 can be taken en passant. Rank 8 is the
- // same for black pawns. Those "fake" pawns are not present in white_ and
- // black_ bitboards.
+ // same for black pawns. Those "fake" pawns are not present in our_pieces_ and
+ // their_pieces_ bitboards.
BitBoard pawns_;
BoardSquare our_king_;
BoardSquare their_king_;
@@ -177,11 +194,4 @@ class ChessBoard {
bool flipped_ = false; // aka "Black to move".
};
-// Stores the move and state of the board after the move is done.
-struct MoveExecution {
- Move move;
- ChessBoard board;
- bool reset_50_moves;
-};
-
} // namespace lczero
diff --git a/src/engine.cc b/src/engine.cc
index ec80370dbb..6d160f8fcd 100644
--- a/src/engine.cc
+++ b/src/engine.cc
@@ -58,19 +58,17 @@ const OptionId kMoveOverheadId{
"Amount of time, in milliseconds, that the engine subtracts from it's "
"total available time (to compensate for slow connection, interprocess "
"communication, etc)."};
-const OptionId kTimePeakPlyId{"time-peak-halfmove", "TimePeakHalfmove",
- "For which halfmove the time budgeting algorithm "
- "should allocate the maximum amount of time."};
-const OptionId kTimeLeftWidthId{
- "time-left-width", "TimeLeftWidth",
- "\"Width\" of time budget graph to the left of the peak value. For small "
- "values, moves far from the peak will get little time; for larger values, "
- "they will get almost the same time as the peak move."};
-const OptionId kTimeRightWidthId{
- "time-right-width", "TimeRightWidth",
- "\"Width\" of time budget graph to the right of the peak value. For small "
- "values, moves far from the peak will get little time; for larger values, "
- "they will get almost the same time as the peak move."};
+const OptionId kTimeMidpointMoveId{
+ "time-midpoint-move", "TimeMidpointMove",
+ "The move where the time budgeting algorithm guesses half of all "
+ "games to be completed by. Half of the time allocated for the first move "
+ "is allocated at approximately this move."};
+const OptionId kTimeSteepnessId{
+ "time-steepness", "TimeSteepness",
+ "\"Steepness\" of the function the time budgeting algorithm uses to "
+ "consider when games are completed. Lower values leave more time for "
+ "the endgame, higher values use more time for each move before the "
+ "midpoint."};
const OptionId kSyzygyTablebaseId{
"syzygy-paths", "SyzygyPath",
"List of Syzygy tablebase directories, list entries separated by system "
@@ -99,13 +97,22 @@ const size_t kAvgCacheItemSize =
NNCache::GetItemStructSize() + sizeof(CachedNNRequest) +
sizeof(CachedNNRequest::IdxAndProb) * kAvgMovesPerPosition;
-float ComputeMoveWeight(int ply, float peak, float left_width,
- float right_width) {
- // Inflection points of the function are at ply = peak +/- width.
- // At these points the function is at 2/3 of its max value.
- const float width = ply > peak ? right_width : left_width;
- constexpr float width_scaler = 1.518651485f; // 2 / log(2 + sqrt(3))
- return std::pow(std::cosh((ply - peak) / width / width_scaler), -2.0f);
+float ComputeEstimatedMovesToGo(int ply, float midpoint, float steepness) {
+ // An analysis of chess games shows that the distribution of game lengths
+ // looks like a log-logistic distribution. The mean residual time function
+ // calculates how many more moves are expected in the game given that we are
+ // at the current ply. Given that this function can be expensive to compute,
+ // we calculate the median residual time function instead. This is derived and
+ // shown to be similar to the mean residual time in "Some Useful Properties of
+ // Log-Logistic Random Variables for Health Care Simulations" (Clark &
+ // El-Taha, 2015).
+ // midpoint: The median length of games.
+ // steepness: How quickly the function drops off from its maximum value,
+ // around the midpoint.
+ float move = ply / 2.0f;
+ return midpoint * std::pow(1 + 2 * std::pow(move / midpoint, steepness),
+ 1 / steepness) -
+ move;
}
} // namespace
@@ -125,36 +132,22 @@ void EngineController::PopulateOptions(OptionsParser* options) {
options->Add(kNNCacheSizeId, 0, 999999999) = 200000;
options->Add(kSlowMoverId, 0.0f, 100.0f) = 1.0f;
options->Add(kMoveOverheadId, 0, 100000000) = 200;
- options->Add(kTimePeakPlyId, -1000.0f, 1000.0f) = 26.2f;
- options->Add(kTimeLeftWidthId, 0.0f, 1000.0f) = 82.0f;
- options->Add(kTimeRightWidthId, 0.0f, 1000.0f) = 74.0f;
+ options->Add(kTimeMidpointMoveId, 1.0f, 100.0f) = 51.5f;
+ options->Add(kTimeSteepnessId, 1.0f, 100.0f) = 7.0f;
options->Add(kSyzygyTablebaseId);
// Add "Ponder" option to signal to GUIs that we support pondering.
// This option is currently not used by lc0 in any way.
options->Add(kPonderId) = true;
- options->Add(kSpendSavedTimeId, 0.0f, 1.0f) = 0.6f;
+ options->Add(kSpendSavedTimeId, 0.0f, 1.0f) = 1.0f;
options->Add(kRamLimitMbId, 0, 100000000) = 0;
// Hide time curve options.
- options->HideOption(kTimePeakPlyId);
- options->HideOption(kTimeLeftWidthId);
- options->HideOption(kTimeRightWidthId);
+ options->HideOption(kTimeMidpointMoveId);
+ options->HideOption(kTimeSteepnessId);
NetworkFactory::PopulateOptions(options);
SearchParams::Populate(options);
ConfigFile::PopulateOptions(options);
-
- auto defaults = options->GetMutableDefaultsOptions();
-
- defaults->Set(SearchParams::kMiniBatchSizeId.GetId(), 256);
- defaults->Set(SearchParams::kFpuReductionId.GetId(), 1.2f);
- defaults->Set(SearchParams::kCpuctId.GetId(), 3.0f);
- defaults->Set(SearchParams::kCpuctFactorId.GetId(), 2.0f);
- defaults->Set(SearchParams::kPolicySoftmaxTempId.GetId(), 2.2f);
- defaults->Set(SearchParams::kMaxCollisionVisitsId.GetId(), 9999);
- defaults->Set(SearchParams::kMaxCollisionEventsId.GetId(), 32);
- defaults->Set(SearchParams::kCacheHistoryLengthId.GetId(), 0);
- defaults->Set(SearchParams::kOutOfOrderEvalId.GetId(), true);
}
SearchLimits EngineController::PopulateSearchLimits(
@@ -162,11 +155,6 @@ SearchLimits EngineController::PopulateSearchLimits(
std::chrono::steady_clock::time_point start_time) {
SearchLimits limits;
int64_t move_overhead = options_.Get(kMoveOverheadId.GetId());
- if (params.movetime) {
- limits.search_deadline = start_time + std::chrono::milliseconds(
- *params.movetime - move_overhead);
- }
-
const optional& time = (is_black ? params.btime : params.wtime);
if (!params.searchmoves.empty()) {
limits.searchmoves.reserve(params.searchmoves.size());
@@ -175,6 +163,10 @@ SearchLimits EngineController::PopulateSearchLimits(
}
}
limits.infinite = params.infinite || params.ponder;
+ if (params.movetime && !limits.infinite) {
+ limits.search_deadline = start_time + std::chrono::milliseconds(
+ *params.movetime - move_overhead);
+ }
if (params.nodes) limits.visits = *params.nodes;
int ram_limit = options_.Get(kRamLimitMbId.GetId());
if (ram_limit) {
@@ -192,19 +184,26 @@ SearchLimits EngineController::PopulateSearchLimits(
const optional& inc = is_black ? params.binc : params.winc;
int increment = inc ? std::max(int64_t(0), *inc) : 0;
- int movestogo = params.movestogo.value_or(50);
- // Fix non-standard uci command.
- if (movestogo == 0) movestogo = 1;
-
// How to scale moves time.
float slowmover = options_.Get(kSlowMoverId.GetId());
- float time_curve_peak = options_.Get(kTimePeakPlyId.GetId());
- float time_curve_left_width = options_.Get(kTimeLeftWidthId.GetId());
- float time_curve_right_width = options_.Get(kTimeRightWidthId.GetId());
+ float time_curve_midpoint = options_.Get(kTimeMidpointMoveId.GetId());
+ float time_curve_steepness = options_.Get(kTimeSteepnessId.GetId());
+
+ float movestogo =
+ ComputeEstimatedMovesToGo(ply, time_curve_midpoint, time_curve_steepness);
+
+ // If the number of moves remaining until the time control are less than
+ // the estimated number of moves left in the game, then use the number of
+ // moves until the time control instead.
+ if (params.movestogo &&
+ *params.movestogo > 0 && // Ignore non-standard uci command.
+ *params.movestogo < movestogo) {
+ movestogo = *params.movestogo;
+ }
- // Total time till control including increments.
+ // Total time, including increments, until time control.
auto total_moves_time =
- std::max(int64_t{0}, *time + increment * (movestogo - 1) - move_overhead);
+ std::max(0.0f, *time + increment * (movestogo - 1) - move_overhead);
// If there is time spared from previous searches, the `time_to_squander` part
// of it will be used immediately, remove that from planning.
@@ -216,20 +215,12 @@ SearchLimits EngineController::PopulateSearchLimits(
total_moves_time -= time_to_squander;
}
- constexpr int kSmartPruningToleranceMs = 200;
- float this_move_weight = ComputeMoveWeight(
- ply, time_curve_peak, time_curve_left_width, time_curve_right_width);
- float other_move_weights = 0.0f;
- for (int i = 1; i < movestogo; ++i)
- other_move_weights +=
- ComputeMoveWeight(ply + 2 * i, time_curve_peak, time_curve_left_width,
- time_curve_right_width);
- // Compute the move time without slowmover.
- float this_move_time = total_moves_time * this_move_weight /
- (this_move_weight + other_move_weights);
+ // Evenly split total time between all moves.
+ float this_move_time = total_moves_time / movestogo;
// Only extend thinking time with slowmover if smart pruning can potentially
// reduce it.
+ constexpr int kSmartPruningToleranceMs = 200;
if (slowmover < 1.0 ||
this_move_time * slowmover > kSmartPruningToleranceMs) {
this_move_time *= slowmover;
diff --git a/src/mcts/node.cc b/src/mcts/node.cc
index 2d754dd161..d968377be2 100644
--- a/src/mcts/node.cc
+++ b/src/mcts/node.cc
@@ -233,7 +233,10 @@ bool Node::TryStartScoreUpdate() {
return true;
}
-void Node::CancelScoreUpdate(int multivisit) { n_in_flight_ -= multivisit; }
+void Node::CancelScoreUpdate(int multivisit) {
+ n_in_flight_ -= multivisit;
+ best_child_cached_ = nullptr;
+}
void Node::FinalizeScoreUpdate(float v, int multivisit) {
// Recompute Q.
@@ -246,6 +249,18 @@ void Node::FinalizeScoreUpdate(float v, int multivisit) {
n_ += multivisit;
// Decrement virtual loss.
n_in_flight_ -= multivisit;
+ // Best child is potentially no longer valid.
+ best_child_cached_ = nullptr;
+}
+
+void Node::UpdateBestChild(const Iterator& best_edge, int visits_allowed) {
+ best_child_cached_ = best_edge.node();
+ // An edge can point to an unexpanded node with n==0. These nodes don't
+ // increment their n_in_flight_ the same way and thus are not safe to cache.
+ if (best_child_cached_ && best_child_cached_->GetN() == 0) {
+ best_child_cached_ = nullptr;
+ }
+ best_child_cache_in_flight_limit_ = visits_allowed + n_in_flight_;
}
Node::NodeRange Node::ChildNodes() const { return child_.get(); }
diff --git a/src/mcts/node.h b/src/mcts/node.h
index de7f87d0df..26d020b2ef 100644
--- a/src/mcts/node.h
+++ b/src/mcts/node.h
@@ -183,6 +183,23 @@ class Node {
// Updates max depth, if new depth is larger.
void UpdateMaxDepth(int depth);
+ // Caches the best child if possible.
+ void UpdateBestChild(const Iterator& best_edge, int collisions_allowed);
+
+ // Gets a cached best child if it is still valid.
+ Node* GetCachedBestChild() {
+ if (n_in_flight_ < best_child_cache_in_flight_limit_) {
+ return best_child_cached_;
+ }
+ return nullptr;
+ }
+
+ // Gets how many more visits the cached value is valid for. Only valid if
+ // GetCachedBestChild returns a value.
+ int GetRemainingCacheVisits() {
+ return best_child_cache_in_flight_limit_ - n_in_flight_;
+ }
+
// Calculates the full depth if new depth is larger, updates it, returns
// in depth parameter, and returns true if it was indeed updated.
bool UpdateFullDepth(uint16_t* depth);
@@ -216,6 +233,13 @@ class Node {
std::string DebugString() const;
private:
+ // Performs construction time type initialization. For use only with a node
+ // that has not been used beyond its construction.
+ void Reinit(Node* parent, uint16_t index) {
+ parent_ = parent;
+ index_ = index;
+ }
+
// To minimize the number of padding bytes and to avoid having unnecessary
// padding when new fields are added, we arrange the fields by size, largest
// to smallest.
@@ -231,6 +255,9 @@ class Node {
std::unique_ptr child_;
// Pointer to a next sibling. nullptr if there are no further siblings.
std::unique_ptr sibling_;
+ // Cached pointer to best child, valid while n_in_flight <
+ // best_child_cache_in_flight_limit_
+ Node* best_child_cached_ = nullptr;
// 4 byte fields.
// Average value (from value head of neural network) of all visited nodes in
@@ -246,6 +273,9 @@ class Node {
// but not finished). This value is added to n during selection which node
// to pick in MCTS, and also when selecting the best move.
uint32_t n_in_flight_ = 0;
+ // If best_child_cached_ is non-null, and n_in_flight_ < this,
+ // best_child_cached_ is still the best child.
+ uint32_t best_child_cache_in_flight_limit_ = 0;
// 2 byte fields.
// Index of this node is parent's edge list.
@@ -273,9 +303,9 @@ class Node {
// A basic sanity check. This must be adjusted when Node members are adjusted.
#if defined(__i386__) || (defined(__arm__) && !defined(__aarch64__))
-static_assert(sizeof(Node) == 40, "Unexpected size of Node for 32bit compile");
+static_assert(sizeof(Node) == 48, "Unexpected size of Node for 32bit compile");
#else
-static_assert(sizeof(Node) == 64, "Unexpected size of Node");
+static_assert(sizeof(Node) == 72, "Unexpected size of Node");
#endif
// Contains Edge and Node pair and set of proxy functions to simplify access
@@ -392,7 +422,8 @@ class Edge_Iterator : public EdgeAndNode {
Edge_Iterator& operator*() { return *this; }
// If there is node, return it. Otherwise spawn a new one and return it.
- Node* GetOrSpawnNode(Node* parent) {
+ Node* GetOrSpawnNode(Node* parent,
+ std::unique_ptr* node_source = nullptr) {
if (node_) return node_; // If there is already a node, return it.
Actualize(); // But maybe other thread already did that.
if (node_) return node_; // If it did, return.
@@ -408,7 +439,12 @@ class Edge_Iterator : public EdgeAndNode {
// 2. Create fresh Node(idx_.5):
// node_ptr_ -> &Node(idx_.3).sibling_ -> Node(idx_.5)
// tmp -> Node(idx_.7)
- *node_ptr_ = std::make_unique(parent, current_idx_);
+ if (node_source && *node_source) {
+ (*node_source)->Reinit(parent, current_idx_);
+ *node_ptr_ = std::move(*node_source);
+ } else {
+ *node_ptr_ = std::make_unique(parent, current_idx_);
+ }
// 3. Attach stored pointer back to a list:
// node_ptr_ ->
// &Node(idx_.3).sibling_ -> Node(idx_.5).sibling_ -> Node(idx_.7)
diff --git a/src/mcts/params.cc b/src/mcts/params.cc
index 6aa7d5ae8b..85dcb5ff5e 100644
--- a/src/mcts/params.cc
+++ b/src/mcts/params.cc
@@ -162,14 +162,13 @@ const OptionId SearchParams::kHistoryFillId{
"synthesize them (always, never, or only at non-standard fen position)."};
void SearchParams::Populate(OptionsParser* options) {
- // Here the "safe defaults" are listed.
- // Many of them are overridden with optimized defaults in engine.cc and
- // tournament.cc
- options->Add(kMiniBatchSizeId, 1, 1024) = 1;
+ // Here the uci optimized defaults" are set.
+ // Many of them are overridden with training specific values in tournament.cc.
+ options->Add(kMiniBatchSizeId, 1, 1024) = 256;
options->Add(kMaxPrefetchBatchId, 0, 1024) = 32;
- options->Add(kCpuctId, 0.0f, 100.0f) = 1.2f;
+ options->Add(kCpuctId, 0.0f, 100.0f) = 3.0f;
options->Add(kCpuctBaseId, 1.0f, 1000000000.0f) = 19652.0f;
- options->Add(kCpuctFactorId, 0.0f, 1000.0f) = 0.0f;
+ options->Add(kCpuctFactorId, 0.0f, 1000.0f) = 2.0f;
options->Add(kTemperatureId, 0.0f, 100.0f) = 0.0f;
options->Add(kTempDecayMovesId, 0, 100) = 0;
options->Add(kTemperatureCutoffMoveId, 0, 1000) = 0;
@@ -182,13 +181,13 @@ void SearchParams::Populate(OptionsParser* options) {
options->Add(kSmartPruningFactorId, 0.0f, 10.0f) = 1.33f;
std::vector fpu_strategy = {"reduction", "absolute"};
options->Add(kFpuStrategyId, fpu_strategy) = "reduction";
- options->Add(kFpuReductionId, -100.0f, 100.0f) = 0.0f;
+ options->Add(kFpuReductionId, -100.0f, 100.0f) = 1.2f;
options->Add(kFpuValueId, -1.0f, 1.0f) = -1.0f;
- options->Add(kCacheHistoryLengthId, 0, 7) = 7;
- options->Add(kPolicySoftmaxTempId, 0.1f, 10.0f) = 1.0f;
- options->Add(kMaxCollisionEventsId, 1, 1024) = 1;
- options->Add(kMaxCollisionVisitsId, 1, 1000000) = 1;
- options->Add(kOutOfOrderEvalId) = false;
+ options->Add(kCacheHistoryLengthId, 0, 7) = 0;
+ options->Add(kPolicySoftmaxTempId, 0.1f, 10.0f) = 2.2f;
+ options->Add(kMaxCollisionEventsId, 1, 1024) = 32;
+ options->Add(kMaxCollisionVisitsId, 1, 1000000) = 9999;
+ options->Add(kOutOfOrderEvalId) = true;
options->Add(kMultiPvId, 1, 500) = 1;
std::vector score_type = {"centipawn", "win_percentage", "Q"};
options->Add(kScoreTypeId, score_type) = "centipawn";
@@ -213,7 +212,8 @@ SearchParams::SearchParams(const OptionsDict& options)
kMaxCollisionVisits(options.Get(kMaxCollisionVisitsId.GetId())),
kOutOfOrderEval(options.Get(kOutOfOrderEvalId.GetId())),
kHistoryFill(
- EncodeHistoryFill(options.Get(kHistoryFillId.GetId()))) {
+ EncodeHistoryFill(options.Get(kHistoryFillId.GetId()))),
+ kMiniBatchSize(options.Get(kMiniBatchSizeId.GetId())){
}
} // namespace lczero
diff --git a/src/mcts/params.h b/src/mcts/params.h
index 1218416ccb..bcbe780f46 100644
--- a/src/mcts/params.h
+++ b/src/mcts/params.h
@@ -43,7 +43,7 @@ class SearchParams {
// Parameter getters.
int GetMiniBatchSize() const {
- return options_.Get(kMiniBatchSizeId.GetId());
+ return kMiniBatchSize;
}
int GetMaxPrefetchBatch() const {
return options_.Get(kMaxPrefetchBatchId.GetId());
@@ -138,6 +138,7 @@ class SearchParams {
const int kMaxCollisionVisits;
const bool kOutOfOrderEval;
const FillEmptyHistory kHistoryFill;
+ const int kMiniBatchSize;
};
} // namespace lczero
diff --git a/src/mcts/search.cc b/src/mcts/search.cc
index b4b9ea28b4..d946741f08 100644
--- a/src/mcts/search.cc
+++ b/src/mcts/search.cc
@@ -411,6 +411,10 @@ void Search::UpdateRemainingMoves() {
}
// Even if we exceeded limits, don't go crazy by not allowing any playouts.
if (remaining_playouts_ <= 1) remaining_playouts_ = 1;
+ // Since remaining_playouts_ has changed, the logic for selecting visited root
+ // nodes may also change. Use a 0 visit cancel score update to clear out any
+ // cached best edge.
+ root_node_->CancelScoreUpdate(0);
}
// Return the evaluation of the actual best child, regardless of temperature
@@ -656,7 +660,7 @@ void Search::Stop() {
void Search::Abort() {
Mutex::Lock lock(counters_mutex_);
- if (!stop_.load(std::memory_order_acquire)) {
+ if (!stop_.load(std::memory_order_acquire) || !bestmove_is_sent_) {
bestmove_is_sent_ = true;
FireStopInternal();
}
@@ -812,8 +816,12 @@ SearchWorker::NodeToProcess SearchWorker::PickNodeToExtend(
Node* node = search_->root_node_;
Node::Iterator best_edge;
Node::Iterator second_best_edge;
- // Initialize position sequence with pre-move position.
- history_.Trim(search_->played_history_.GetLength());
+
+ // Precache a newly constructed node to avoid memory allocations being
+ // performed while the mutex is held.
+ if (!precached_node_) {
+ precached_node_ = std::make_unique(nullptr, 0);
+ }
SharedMutex::Lock lock(search_->nodes_mutex_);
@@ -823,6 +831,7 @@ SearchWorker::NodeToProcess SearchWorker::PickNodeToExtend(
// True on first iteration, false as we dive deeper.
bool is_root_node = true;
uint16_t depth = 0;
+ bool node_already_updated = true;
while (true) {
// First, terminate if we find collisions or leaf nodes.
@@ -832,7 +841,9 @@ SearchWorker::NodeToProcess SearchWorker::PickNodeToExtend(
// in the beginning (and there would be no need for "if
// (!is_root_node)"), but that would mean extra mutex lock.
// Will revisit that after rethinking locking strategy.
- if (!is_root_node) node = best_edge.GetOrSpawnNode(/* parent */ node);
+ if (!node_already_updated) {
+ node = best_edge.GetOrSpawnNode(/* parent */ node, &precached_node_);
+ }
best_edge.Reset();
depth++;
// n_in_flight_ is incremented. If the method returns false, then there is
@@ -852,6 +863,18 @@ SearchWorker::NodeToProcess SearchWorker::PickNodeToExtend(
return NodeToProcess::Extension(node, depth);
}
}
+ Node* possible_shortcut_child = node->GetCachedBestChild();
+ if (possible_shortcut_child) {
+ // Add two here to reverse the conservatism that goes into calculating the
+ // remaining cache visits.
+ collision_limit =
+ std::min(collision_limit, node->GetRemainingCacheVisits() + 2);
+ is_root_node = false;
+ node = possible_shortcut_child;
+ node_already_updated = true;
+ continue;
+ }
+ node_already_updated = false;
// If we fall through, then n_in_flight_ has been incremented but this
// playout remains incomplete; we must go deeper.
@@ -895,14 +918,19 @@ SearchWorker::NodeToProcess SearchWorker::PickNodeToExtend(
}
if (second_best_edge) {
+ int estimated_visits_to_change_best =
+ best_edge.GetVisitsToReachU(second_best, puct_mult, fpu);
+ // Only cache for n-2 steps as the estimate created by GetVisitsToReachU
+ // has potential rounding errors and some conservative logic that can push
+ // it up to 2 away from the real value.
+ node->UpdateBestChild(best_edge,
+ std::max(0, estimated_visits_to_change_best - 2));
collision_limit =
- std::min(collision_limit,
- best_edge.GetVisitsToReachU(second_best, puct_mult, fpu));
+ std::min(collision_limit, estimated_visits_to_change_best);
assert(collision_limit >= 1);
second_best_edge.Reset();
}
- history_.Append(best_edge.GetMove());
if (is_root_node && possible_moves <= 1 && !search_->limits_.infinite) {
// If there is only one move theoretically possible within remaining time,
// output it.
@@ -914,6 +942,22 @@ SearchWorker::NodeToProcess SearchWorker::PickNodeToExtend(
}
void SearchWorker::ExtendNode(Node* node) {
+ // Initialize position sequence with pre-move position.
+ history_.Trim(search_->played_history_.GetLength());
+ std::vector to_add;
+ // Could instead reserve one more than the difference between history_.size()
+ // and history_.capacity().
+ to_add.reserve(60);
+ Node* cur = node;
+ while (cur != search_->root_node_) {
+ Node* prev = cur->GetParent();
+ to_add.push_back(prev->GetEdgeToNode(cur)->GetMove());
+ cur = prev;
+ }
+ for (int i = to_add.size() - 1; i >= 0; i--) {
+ history_.Append(to_add[i]);
+ }
+
// We don't need the mutex because other threads will see that N=0 and
// N-in-flight=1 and will not touch this node.
const auto& board = history_.Last().GetBoard();
@@ -992,7 +1036,8 @@ bool SearchWorker::AddNodeToComputation(Node* node, bool add_if_cached) {
if (node && node->HasChildren()) {
// Legal moves are known, use them.
- for (auto edge : node->Edges()) {
+ moves.reserve(node->GetNumEdges());
+ for (const auto& edge : node->Edges()) {
moves.emplace_back(edge.GetMove().as_nn_index());
}
} else {
diff --git a/src/mcts/search.h b/src/mcts/search.h
index 128c7d8109..14b5bcd51f 100644
--- a/src/mcts/search.h
+++ b/src/mcts/search.h
@@ -294,6 +294,7 @@ class SearchWorker {
bool root_move_filter_populated_ = false;
int number_out_of_order_ = 0;
const SearchParams& params_;
+ std::unique_ptr precached_node_;
};
} // namespace lczero
diff --git a/src/neural/cuda/layers.cc b/src/neural/cuda/layers.cc
index 53a3cce8d1..25ff7a2fae 100644
--- a/src/neural/cuda/layers.cc
+++ b/src/neural/cuda/layers.cc
@@ -117,7 +117,7 @@ ConvLayer::ConvLayer(BaseLayer* ip, int C, int H, int W,
cudnnSetConvolutionMathType(conv_desc_, CUDNN_TENSOR_OP_MATH));
// TODO: dynamic selection of algorithm!
- if ((C > 32) && (!fp16)) {
+ if ((C > 32) && (!fp16) && (filter_size_ > 1)) {
conv_algo_ = CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED;
} else {
conv_algo_ = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM;
diff --git a/src/neural/cuda/network_cudnn.cc b/src/neural/cuda/network_cudnn.cc
index 013ac2200a..69f4d2e833 100644
--- a/src/neural/cuda/network_cudnn.cc
+++ b/src/neural/cuda/network_cudnn.cc
@@ -59,11 +59,15 @@ struct InputsOutputs {
ReportCUDAErrors(
cudaHostGetDevicePointer(&input_val_mem_gpu_, input_val_mem_, 0));
+
ReportCUDAErrors(cudaHostAlloc(
- &op_policy_mem_, maxBatchSize * kNumOutputPolicy * sizeof(float),
- cudaHostAllocMapped));
- ReportCUDAErrors(
- cudaHostGetDevicePointer(&op_policy_mem_gpu_, op_policy_mem_, 0));
+ &op_policy_mem_, maxBatchSize * kNumOutputPolicy * sizeof(float), 0));
+
+ // Seperate device memory copy for policy output.
+ // It's faster to write to device memory and then copy to host memory
+ // than having the kernel write directly to it.
+ ReportCUDAErrors(cudaMalloc(&op_policy_mem_gpu_,
+ maxBatchSize * kNumOutputPolicy * sizeof(float)));
ReportCUDAErrors(cudaHostAlloc(&op_value_mem_, maxBatchSize * sizeof(float),
cudaHostAllocMapped));
@@ -74,6 +78,7 @@ struct InputsOutputs {
ReportCUDAErrors(cudaFreeHost(input_masks_mem_));
ReportCUDAErrors(cudaFreeHost(input_val_mem_));
ReportCUDAErrors(cudaFreeHost(op_policy_mem_));
+ ReportCUDAErrors(cudaFree(op_policy_mem_gpu_));
ReportCUDAErrors(cudaFreeHost(op_value_mem_));
}
uint64_t* input_masks_mem_;
@@ -81,11 +86,13 @@ struct InputsOutputs {
float* op_policy_mem_;
float* op_value_mem_;
- // GPU pointers for the above allocations
+ // GPU pointers for the above allocations.
uint64_t* input_masks_mem_gpu_;
float* input_val_mem_gpu_;
- float* op_policy_mem_gpu_;
float* op_value_mem_gpu_;
+
+ // This is a seperate copy.
+ float* op_policy_mem_gpu_;
};
template
@@ -173,16 +180,21 @@ class CudnnNetwork : public Network {
has_se_ = false;
// 0. Process weights.
- processConvBlock(weights.input, true);
+
+ // TODO: Get filter sizes from proto file?
+ // Hardcoded right now:
+ // 3 for input and residual block convolutions.
+ // 1 for policy and value head convolutions.
+ processConvBlock(weights.input, true, 3);
for (int i = 0; i < numBlocks_; i++) {
if (weights.residual[i].has_se) {
has_se_ = true;
}
- processConvBlock(weights.residual[i].conv1, true);
- processConvBlock(weights.residual[i].conv2, true);
+ processConvBlock(weights.residual[i].conv1, true, 3);
+ processConvBlock(weights.residual[i].conv2, true, 3);
}
- processConvBlock(weights.policy);
- processConvBlock(weights.value);
+ processConvBlock(weights.policy, true, 1);
+ processConvBlock(weights.value, true, 1);
// 1. Allocate scratch space (used internally by cudnn to run convolutions,
// and also for format/layout conversion for weights).
@@ -283,15 +295,11 @@ class CudnnNetwork : public Network {
// Policy head.
{
auto convPol = std::make_unique>(
- resi_last_, weights.policy.bn_means.size(), 8, 8, 1, kNumFilters);
- convPol->LoadWeights(&weights.policy.weights[0], nullptr, scratch_mem_);
+ resi_last_, weights.policy.bn_means.size(), 8, 8, 1, kNumFilters, true, true);
+ convPol->LoadWeights(&weights.policy.weights[0],
+ &weights.policy.biases[0], scratch_mem_);
network_.emplace_back(std::move(convPol));
- auto BNPol = std::make_unique>(getLastLayer(), true);
- BNPol->LoadWeights(&weights.policy.bn_means[0],
- &weights.policy.bn_stddivs[0]);
- network_.emplace_back(std::move(BNPol));
-
auto FCPol = std::make_unique>(
getLastLayer(), weights.ip_pol_b.size(), 1, 1, false, true);
FCPol->LoadWeights(&weights.ip_pol_w[0], &weights.ip_pol_b[0],
@@ -307,15 +315,11 @@ class CudnnNetwork : public Network {
// Value head.
{
auto convVal = std::make_unique>(
- resi_last_, weights.value.bn_means.size(), 8, 8, 1, kNumFilters);
- convVal->LoadWeights(&weights.value.weights[0], nullptr, scratch_mem_);
+ resi_last_, weights.value.biases.size(), 8, 8, 1, kNumFilters, true, true);
+ convVal->LoadWeights(&weights.value.weights[0], &weights.value.biases[0],
+ scratch_mem_);
network_.emplace_back(std::move(convVal));
- auto BNVal = std::make_unique>(getLastLayer(), true);
- BNVal->LoadWeights(&weights.value.bn_means[0],
- &weights.value.bn_stddivs[0]);
- network_.emplace_back(std::move(BNVal));
-
auto FCVal1 = std::make_unique>(
getLastLayer(), weights.ip1_val_b.size(), 1, 1, true, true);
FCVal1->LoadWeights(&weights.ip1_val_w[0], &weights.ip1_val_b[0],
@@ -403,43 +407,44 @@ class CudnnNetwork : public Network {
scratch_mem_, scratch_size_, cudnn_,
cublas_); // pol conv
network_[l++]->Eval(batchSize, tensor_mem_[1], tensor_mem_[0], nullptr,
- scratch_mem_, scratch_size_, cudnn_,
- cublas_); // pol BN
- network_[l++]->Eval(batchSize, tensor_mem_[0], tensor_mem_[1], nullptr,
scratch_mem_, scratch_size_, cudnn_,
cublas_); // pol FC
if (std::is_same::value) {
// TODO: consider softmax layer that writes directly to fp32.
- network_[l++]->Eval(batchSize, tensor_mem_[1], tensor_mem_[0], nullptr,
+ network_[l++]->Eval(batchSize, tensor_mem_[0], tensor_mem_[1], nullptr,
scratch_mem_, scratch_size_, cudnn_,
cublas_); // pol softmax
- copyTypeConverted(opPol, (half*)(tensor_mem_[1]),
+ copyTypeConverted(opPol, (half*)(tensor_mem_[0]),
batchSize * kNumOutputPolicy); // POLICY
} else {
- network_[l++]->Eval(batchSize, (DataType*)opPol, tensor_mem_[0], nullptr,
+ network_[l++]->Eval(batchSize, (DataType*)opPol, tensor_mem_[1], nullptr,
scratch_mem_, scratch_size_, cudnn_,
cublas_); // pol softmax // POLICY
}
+ // Copy policy output from device memory to host memory.
+ ReportCUDAErrors(cudaMemcpyAsync(io->op_policy_mem_,
+ io->op_policy_mem_gpu_,
+ sizeof(float) * kNumOutputPolicy *
+ batchSize, cudaMemcpyDeviceToHost));
+
// value head
network_[l++]->Eval(batchSize, tensor_mem_[0], tensor_mem_[2], nullptr,
scratch_mem_, scratch_size_, cudnn_,
cublas_); // value conv
- network_[l++]->Eval(batchSize, tensor_mem_[2], tensor_mem_[0], nullptr,
- scratch_mem_, scratch_size_, cudnn_,
- cublas_); // value BN
- network_[l++]->Eval(batchSize, tensor_mem_[0], tensor_mem_[2], nullptr,
+
+ network_[l++]->Eval(batchSize, tensor_mem_[1], tensor_mem_[0], nullptr,
scratch_mem_, scratch_size_, cudnn_,
cublas_); // value FC1
if (std::is_same::value) {
// TODO: consider fusing the bias-add of FC2 with format conversion.
- network_[l++]->Eval(batchSize, tensor_mem_[2], tensor_mem_[0], nullptr,
+ network_[l++]->Eval(batchSize, tensor_mem_[2], tensor_mem_[1], nullptr,
scratch_mem_, scratch_size_, cudnn_,
cublas_); // value FC2
copyTypeConverted(opVal, (half*)(tensor_mem_[2]), batchSize); // VALUE
} else {
- network_[l++]->Eval(batchSize, (DataType*)opVal, tensor_mem_[0], nullptr,
+ network_[l++]->Eval(batchSize, (DataType*)opVal, tensor_mem_[1], nullptr,
scratch_mem_, scratch_size_, cudnn_,
cublas_); // value FC2 // VALUE
}
@@ -535,7 +540,8 @@ class CudnnNetwork : public Network {
mutable std::mutex inputs_outputs_lock_;
std::list> free_inputs_outputs_;
- void processConvBlock(LegacyWeights::ConvBlock& block, bool foldBNLayer = false) {
+ void processConvBlock(LegacyWeights::ConvBlock& block, bool foldBNLayer,
+ int filterSize) {
const float epsilon = 1e-5f;
// Compute reciprocal of std-dev from the variances (so that it can be
@@ -557,13 +563,15 @@ class CudnnNetwork : public Network {
// convolution idea proposed by Henrik Forstén and first implemented in
// leela go zero.
if (foldBNLayer) {
+ const int spatialSize = filterSize * filterSize;
const int outputs = block.biases.size();
- const int channels = block.weights.size() / (outputs * 3 * 3);
-
+ const int channels = block.weights.size() / (outputs * spatialSize);
+
for (auto o = 0; o < outputs; o++) {
for (auto c = 0; c < channels; c++) {
- for (auto i = 0; i < 9; i++) {
- block.weights[o * channels * 9 + c * 9 + i] *= block.bn_stddivs[o];
+ for (auto i = 0; i < spatialSize; i++) {
+ block.weights[o * channels * spatialSize + c * spatialSize + i] *=
+ block.bn_stddivs[o];
}
}
diff --git a/src/neural/loader.cc b/src/neural/loader.cc
index c9a2b076e7..9921babee0 100644
--- a/src/neural/loader.cc
+++ b/src/neural/loader.cc
@@ -164,7 +164,7 @@ std::string DiscoverWeightsFile() {
gzFile file = gzopen(candidate.second.c_str(), "rb");
if (!file) continue;
- char buf[256];
+ unsigned char buf[256];
int sz = gzread(file, buf, 256);
gzclose(file);
if (sz < 0) continue;
@@ -180,8 +180,10 @@ std::string DiscoverWeightsFile() {
// First byte of the protobuf stream is 0x0d for fixed32, so we ignore it as
// our own magic should suffice.
- auto magic = reinterpret_cast(buf + 1);
- if (*magic == kWeightMagic) {
+ auto magic = buf[1] | (static_cast(buf[2]) << 8) |
+ (static_cast(buf[3]) << 16) |
+ (static_cast(buf[4]) << 24);
+ if (magic == kWeightMagic) {
CERR << "Found pb network file: " << candidate.second;
return candidate.second;
}
diff --git a/src/neural/network_demux.cc b/src/neural/network_demux.cc
new file mode 100644
index 0000000000..c791d1f11b
--- /dev/null
+++ b/src/neural/network_demux.cc
@@ -0,0 +1,223 @@
+/*
+ This file is part of Leela Chess Zero.
+ Copyright (C) 2018 The LCZero Authors
+
+ Leela Chess is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ Leela Chess is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with Leela Chess. If not, see .
+
+ Additional permission under GNU GPL version 3 section 7
+
+ If you modify this Program, or any covered work, by linking or
+ combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA
+ Toolkit and the NVIDIA CUDA Deep Neural Network library (or a
+ modified version of those libraries), containing parts covered by the
+ terms of the respective license agreement, the licensors of this
+ Program grant you additional permission to convey the resulting work.
+*/
+
+#include "neural/factory.h"
+
+#include
+#include
+#include
+#include "utils/exception.h"
+
+namespace lczero {
+namespace {
+
+class DemuxingNetwork;
+class DemuxingComputation : public NetworkComputation {
+ public:
+ DemuxingComputation(DemuxingNetwork* network) : network_(network) {}
+
+ void AddInput(InputPlanes&& input) override { planes_.emplace_back(input); }
+
+ void ComputeBlocking() override;
+
+ int GetBatchSize() const override { return planes_.size(); }
+
+ float GetQVal(int sample) const override {
+ int idx = sample / partial_size_;
+ int offset = sample % partial_size_;
+ return parents_[idx]->GetQVal(offset);
+ }
+
+ float GetPVal(int sample, int move_id) const override {
+ int idx = sample / partial_size_;
+ int offset = sample % partial_size_;
+ return parents_[idx]->GetPVal(offset, move_id);
+ }
+
+ void NotifyComplete() {
+ std::unique_lock lock(mutex_);
+ dataready_--;
+ if (dataready_ == 0) {
+ dataready_cv_.notify_one();
+ }
+ }
+
+ NetworkComputation* AddParentFromNetwork(Network* network) {
+ std::unique_lock lock(mutex_);
+ parents_.emplace_back(network->NewComputation());
+ int cur_idx = (parents_.size() - 1) * partial_size_;
+ for (int i = cur_idx; i < std::min(GetBatchSize(), cur_idx + partial_size_);
+ i++) {
+ parents_.back()->AddInput(std::move(planes_[i]));
+ }
+ return parents_.back().get();
+ }
+
+ private:
+ std::vector planes_;
+ DemuxingNetwork* network_;
+ std::vector> parents_;
+
+ std::mutex mutex_;
+ std::condition_variable dataready_cv_;
+ int dataready_ = 0;
+ int partial_size_ = 0;
+};
+
+class DemuxingNetwork : public Network {
+ public:
+ DemuxingNetwork(const WeightsFile& weights, const OptionsDict& options) {
+ minimum_split_size_ = options.GetOrDefault("minimum-split-size", 0);
+ const auto parents = options.ListSubdicts();
+ if (parents.empty()) {
+ // If options are empty, or multiplexer configured in root object,
+ // initialize on root object and default backend.
+ auto backends = NetworkFactory::Get()->GetBackendsList();
+ AddBackend(backends[0], weights, options);
+ }
+
+ for (const auto& name : parents) {
+ AddBackend(name, weights, options.GetSubdict(name));
+ }
+ }
+
+ void AddBackend(const std::string& name, const WeightsFile& weights,
+ const OptionsDict& opts) {
+ const int nn_threads = opts.GetOrDefault("threads", 1);
+ const std::string backend = opts.GetOrDefault("backend", name);
+
+ networks_.emplace_back(
+ NetworkFactory::Get()->Create(backend, weights, opts));
+
+ for (int i = 0; i < nn_threads; ++i) {
+ threads_.emplace_back([this]() { Worker(); });
+ }
+ }
+
+ std::unique_ptr NewComputation() override {
+ return std::make_unique(this);
+ }
+
+ void Enqueue(DemuxingComputation* computation) {
+ std::lock_guard lock(mutex_);
+ queue_.push(computation);
+ cv_.notify_one();
+ }
+
+ ~DemuxingNetwork() {
+ Abort();
+ Wait();
+ // Unstuck waiting computations.
+ while (!queue_.empty()) {
+ queue_.front()->NotifyComplete();
+ queue_.pop();
+ }
+ }
+
+ void Worker() {
+ // While Abort() is not called (and it can only be called from destructor).
+ while (!abort_) {
+ {
+ {
+ std::unique_lock lock(mutex_);
+ // Wait until there's come work to compute.
+ cv_.wait(lock, [&] { return abort_ || !queue_.empty(); });
+ if (abort_) break;
+ }
+
+ // While there is a work in queue, process it.
+ while (true) {
+
+ DemuxingComputation* to_notify;
+ {
+ std::unique_lock lock(mutex_);
+ if (queue_.empty()) break;
+ to_notify = queue_.front();
+ queue_.pop();
+ }
+ long long net_idx = ++(counter_) % networks_.size();
+ NetworkComputation* to_compute = to_notify->AddParentFromNetwork(networks_[net_idx].get());
+ to_compute->ComputeBlocking();
+ to_notify->NotifyComplete();
+ }
+ }
+ }
+ }
+
+ void Abort() {
+ {
+ std::lock_guard lock(mutex_);
+ abort_ = true;
+ }
+ cv_.notify_all();
+ }
+
+ void Wait() {
+ while (!threads_.empty()) {
+ threads_.back().join();
+ threads_.pop_back();
+ }
+ }
+
+ std::vector> networks_;
+ std::queue queue_;
+ int minimum_split_size_ = 0;
+ std::atomic counter_;
+ bool abort_ = false;
+
+ std::mutex mutex_;
+ std::condition_variable cv_;
+
+ std::vector threads_;
+};
+
+void DemuxingComputation::ComputeBlocking() {
+ if (GetBatchSize() == 0) return;
+ partial_size_ = (GetBatchSize() + network_->networks_.size() - 1) /
+ network_->networks_.size();
+ if (partial_size_ < network_->minimum_split_size_) {
+ partial_size_ = std::min(GetBatchSize(), network_->minimum_split_size_);
+ }
+ int splits = (GetBatchSize() + partial_size_ - 1) / partial_size_;
+
+ std::unique_lock lock(mutex_);
+ dataready_ = splits;
+ for (int j=0; j < splits; j++) {
+ network_->Enqueue(this);
+ }
+ dataready_cv_.wait(lock, [this]() { return dataready_ == 0; });
+}
+
+std::unique_ptr MakeDemuxingNetwork(const WeightsFile& weights,
+ const OptionsDict& options) {
+ return std::make_unique(weights, options);
+}
+
+REGISTER_NETWORK("demux", MakeDemuxingNetwork, -1001)
+
+} // namespace
+} // namespace lczero
diff --git a/src/neural/network_rr.cc b/src/neural/network_rr.cc
new file mode 100644
index 0000000000..5979b9b2c7
--- /dev/null
+++ b/src/neural/network_rr.cc
@@ -0,0 +1,82 @@
+/*
+ This file is part of Leela Chess Zero.
+ Copyright (C) 2018 The LCZero Authors
+
+ Leela Chess is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ Leela Chess is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with Leela Chess. If not, see .
+
+ Additional permission under GNU GPL version 3 section 7
+
+ If you modify this Program, or any covered work, by linking or
+ combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA
+ Toolkit and the NVIDIA CUDA Deep Neural Network library (or a
+ modified version of those libraries), containing parts covered by the
+ terms of the respective license agreement, the licensors of this
+ Program grant you additional permission to convey the resulting work.
+*/
+
+#include "neural/factory.h"
+
+#include
+#include
+#include
+#include "utils/exception.h"
+
+namespace lczero {
+namespace {
+
+class RoundRobinNetwork : public Network {
+ public:
+ RoundRobinNetwork(const WeightsFile& weights, const OptionsDict& options) {
+ const auto parents = options.ListSubdicts();
+ if (parents.empty()) {
+ // If options are empty, or multiplexer configured in root object,
+ // initialize on root object and default backend.
+ auto backends = NetworkFactory::Get()->GetBackendsList();
+ AddBackend(backends[0], weights, options);
+ }
+
+ for (const auto& name : parents) {
+ AddBackend(name, weights, options.GetSubdict(name));
+ }
+ }
+
+ void AddBackend(const std::string& name, const WeightsFile& weights,
+ const OptionsDict& opts) {
+ const std::string backend = opts.GetOrDefault("backend", name);
+
+ networks_.emplace_back(
+ NetworkFactory::Get()->Create(backend, weights, opts));
+ }
+
+ std::unique_ptr NewComputation() override {
+ long long val = ++counter_;
+ return networks_[val % networks_.size()]->NewComputation();
+ }
+
+ ~RoundRobinNetwork() {}
+
+ private:
+ std::vector> networks_;
+ std::atomic counter_;
+};
+
+std::unique_ptr MakeRoundRobinNetwork(const WeightsFile& weights,
+ const OptionsDict& options) {
+ return std::make_unique(weights, options);
+}
+
+REGISTER_NETWORK("roundrobin", MakeRoundRobinNetwork, -999)
+
+} // namespace
+} // namespace lczero
diff --git a/src/selfplay/game.cc b/src/selfplay/game.cc
index 2b64db9bf5..ae199ec023 100644
--- a/src/selfplay/game.cc
+++ b/src/selfplay/game.cc
@@ -38,11 +38,15 @@ const OptionId kReuseTreeId{"reuse-tree", "ReuseTree",
const OptionId kResignPercentageId{
"resign-percentage", "ResignPercentage",
"Resign when win percentage drops below specified value."};
+const OptionId kResignEarliestMoveId{"resign-earliest-move",
+ "ResignEarliestMove",
+ "Earliest move that resign is allowed."};
} // namespace
void SelfPlayGame::PopulateUciParams(OptionsParser* options) {
options->Add(kReuseTreeId) = false;
options->Add(kResignPercentageId, 0.0f, 100.0f) = 0.0f;
+ options->Add(kResignEarliestMoveId, 0, 1000) = 0;
}
SelfPlayGame::SelfPlayGame(PlayerOptions player1, PlayerOptions player2,
@@ -104,7 +108,9 @@ void SelfPlayGame::Play(int white_threads, int black_threads, bool training,
float eval = search_->GetBestEval();
eval = (eval + 1) / 2;
if (eval < min_eval_[idx]) min_eval_[idx] = eval;
- if (enable_resign) {
+ int move_number = tree_[0]->GetPositionHistory().GetLength() / 2 + 1;
+ if (enable_resign && move_number >= options_[idx].uci_options->Get(
+ kResignEarliestMoveId.GetId())) {
const float resignpct =
options_[idx].uci_options->Get(kResignPercentageId.GetId()) /
100;
diff --git a/src/selfplay/tournament.cc b/src/selfplay/tournament.cc
index 81dee9f5c5..8e319cb4b4 100644
--- a/src/selfplay/tournament.cc
+++ b/src/selfplay/tournament.cc
@@ -86,6 +86,13 @@ void SelfPlayTournament::PopulateOptions(OptionsParser* options) {
SelfPlayGame::PopulateUciParams(options);
auto defaults = options->GetMutableDefaultsOptions();
defaults->Set(SearchParams::kMiniBatchSizeId.GetId(), 32);
+ defaults->Set(SearchParams::kCpuctId.GetId(), 1.2f);
+ defaults->Set(SearchParams::kCpuctFactorId.GetId(), 0.0f);
+ defaults->Set(SearchParams::kPolicySoftmaxTempId.GetId(), 1.0f);
+ defaults->Set(SearchParams::kMaxCollisionVisitsId.GetId(), 1);
+ defaults->Set(SearchParams::kMaxCollisionEventsId.GetId(), 1);
+ defaults->Set(SearchParams::kCacheHistoryLengthId.GetId(), 7);
+ defaults->Set(SearchParams::kOutOfOrderEvalId.GetId(), false);
defaults->Set(SearchParams::kSmartPruningFactorId.GetId(), 0.0f);
defaults->Set(SearchParams::kTemperatureId.GetId(), 1.0f);
defaults->Set(SearchParams::kNoiseId.GetId(), true);
diff --git a/src/utils/cache.h b/src/utils/cache.h
index 2e2556e2d9..e820055c63 100644
--- a/src/utils/cache.h
+++ b/src/utils/cache.h
@@ -57,11 +57,10 @@ class LruCache {
}
// Inserts the element under key @key with value @val.
- // If the element is pinned, old value is still kept (until fully unpinned),
- // but new lookups will return updated value.
- // If @pinned, pins inserted element, Unpin has to be called to unpin.
- // In any case, puts element to front of the queue (makes it last to evict).
- V* Insert(K key, std::unique_ptr val, bool pinned = false) {
+ // Puts element to front of the queue (makes it last to evict).
+ void Insert(K key, std::unique_ptr val) {
+ if (capacity_.load(std::memory_order_relaxed) == 0) return;
+
Mutex::Lock lock(mutex_);
auto hash = hasher_(key) % hash_.size();
@@ -76,16 +75,17 @@ class LruCache {
ShrinkToCapacity(capacity_ - 1);
++size_;
++allocated_;
- Item* new_item = new Item(key, std::move(val), pinned ? 1 : 0);
+ Item* new_item = new Item(key, std::move(val));
new_item->next_in_hash = hash_head;
hash_head = new_item;
InsertIntoLru(new_item);
- return new_item->value.get();
}
// Checks whether a key exists. Doesn't lock. Of course the next moment the
// key may be evicted.
bool ContainsKey(K key) {
+ if (capacity_.load(std::memory_order_relaxed) == 0) return false;
+
Mutex::Lock lock(mutex_);
auto hash = hasher_(key) % hash_.size();
for (Item* iter = hash_[hash]; iter; iter = iter->next_in_hash) {
@@ -99,6 +99,8 @@ class LruCache {
// evict); furthermore, a call to Unpin must be made for each such element.
// Use of LruCacheLock is recommended to automate this pin management.
V* LookupAndPin(K key) {
+ if (capacity_.load(std::memory_order_relaxed) == 0) return nullptr;
+
Mutex::Lock lock(mutex_);
auto hash = hasher_(key) % hash_.size();
@@ -149,9 +151,9 @@ class LruCache {
void SetCapacity(int capacity) {
Mutex::Lock lock(mutex_);
- if (capacity_ == capacity) return;
+ if (capacity_.load(std::memory_order_relaxed) == capacity) return;
ShrinkToCapacity(capacity);
- capacity_ = capacity;
+ capacity_.store(capacity);
std::vector- new_hash(
static_cast(capacity * kLoadFactor + 1));
@@ -179,16 +181,15 @@ class LruCache {
Mutex::Lock lock(mutex_);
return size_;
}
- int GetCapacity() const {
- Mutex::Lock lock(mutex_);
- return capacity_;
+ int GetCapacity() const {
+ return capacity_.load(std::memory_order_relaxed);
}
static constexpr size_t GetItemStructSize() { return sizeof(Item); }
private:
struct Item {
- Item(K key, std::unique_ptr value, int pins)
- : key(key), value(std::move(value)), pins(pins) {}
+ Item(K key, std::unique_ptr value)
+ : key(key), value(std::move(value)) {}
K key;
std::unique_ptr value;
int pins = 0;
@@ -268,7 +269,7 @@ class LruCache {
}
// Fresh in front, stale on back.
- int capacity_ GUARDED_BY(mutex_);
+ std::atomic capacity_;
int size_ GUARDED_BY(mutex_) = 0;
int allocated_ GUARDED_BY(mutex_) = 0;
Item* lru_head_ GUARDED_BY(mutex_) = nullptr; // Newest elements.
diff --git a/src/utils/fastmath.h b/src/utils/fastmath.h
index 9f182e423a..ba0855a5eb 100644
--- a/src/utils/fastmath.h
+++ b/src/utils/fastmath.h
@@ -36,9 +36,9 @@ namespace lczero {
// The approximation used here is log2(2^N*(1+f)) ~ N+f*(1.342671-0.342671*f)
// where N is the integer and f the fractional part, f>=0.
inline float FastLog2(const float a) {
- int32_t tmp;
+ uint32_t tmp;
std::memcpy(&tmp, &a, sizeof(float));
- int expb = (tmp >> 23);
+ uint32_t expb = tmp >> 23;
tmp = (tmp & 0x7fffff) | (0x7f << 23);
float out;
std::memcpy(&out, &tmp, sizeof(float));
@@ -50,12 +50,12 @@ inline float FastLog2(const float a) {
// where N is the integer and f the fractional part, f>=0.
inline float FastPow2(const float a) {
if (a < -126) return 0.0;
- int exp = floor(a);
+ int32_t exp = floor(a);
float out = a - exp;
out = 1.0f + out * (0.656366f + 0.343634f * out);
int32_t tmp;
std::memcpy(&tmp, &out, sizeof(float));
- tmp += exp << 23;
+ tmp += static_cast(static_cast(exp) << 23);
std::memcpy(&out, &tmp, sizeof(float));
return out;
}
diff --git a/src/version.inc b/src/version.inc
index 02885e82d8..5662d344ef 100644
--- a/src/version.inc
+++ b/src/version.inc
@@ -1,4 +1,4 @@
#define LC0_VERSION_MAJOR 0
-#define LC0_VERSION_MINOR 20
+#define LC0_VERSION_MINOR 21
#define LC0_VERSION_PATCH 0
#define LC0_VERSION_POSTFIX "dev"
diff --git a/subprojects/protobuf-3.6.0.wrap b/subprojects/protobuf-3.6.0.wrap
new file mode 100644
index 0000000000..5a10b720b8
--- /dev/null
+++ b/subprojects/protobuf-3.6.0.wrap
@@ -0,0 +1,10 @@
+[wrap-file]
+directory = protobuf-3.6.0
+
+source_url = https://github.com/protocolbuffers/protobuf/releases/download/v3.6.0/protobuf-all-3.6.0.tar.gz
+source_filename = protobuf-all-3.6.0.tar.gz
+source_hash = 1532154addf85080330fdd037949d4653dfce16550df5c70ea0cd212d8aff3af
+
+patch_url = https://github.com/borg323/protobuf/releases/download/3.6.0/protobuf-3.6.0-wrap.zip
+patch_filename = protobuf-3.6.0-wrap.zip
+patch_hash = a14730d2e3702c4a0d7b3f05a380ec6b2c0b138a5b00539705b5c3a8df9885e3
diff --git a/tensorflow.md b/tensorflow.md
new file mode 100644
index 0000000000..d5dd9831f3
--- /dev/null
+++ b/tensorflow.md
@@ -0,0 +1,11 @@
+To build with tensorflow under linux you need to install Tensorflow_cc from
+. Either release v1.9.0 or v1.12.0.
+Tensorflow_cc requires a specific version of protobuf, which constrains the
+build. Release v1.9.0 works out of the box, since the default protobuf
+subproject (v3.5.1) is compatible and is used instead of a system installed
+version. In contrast release v1.12.0 needs protobuf v3.6.0 which can be built
+by adding `-Dprotobuf-3-6-0=true` to the build command line. Note that this
+protobuf version has issues with static builds and crashes so is not
+recommended for normal use. The crashes look very similar to:
+*
+*