diff --git a/appveyor.yml b/appveyor.yml index b8afe0fb1a..f773ea3859 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -17,7 +17,8 @@ install: - cmd: IF %NAME%==opencl set OPENCL=true - cmd: IF %NAME%==blas set BLAS=true - cmd: IF %NAME%==blas set GTEST=true -- cmd: IF %BLAS%==true nuget install OpenBLAS -Version 0.2.14.1 -OutputDirectory C:\cache +- cmd: IF %BLAS%==true IF NOT EXIST C:\cache\OpenBLAS appveyor DownloadFile https://sjeng.org/ftp/OpenBLAS-0.3.3-win-oldthread.zip +- cmd: IF %BLAS%==true IF NOT EXIST C:\cache\OpenBLAS 7z x OpenBLAS-0.3.3-win-oldthread.zip -oC:\cache\OpenBLAS - cmd: IF %OPENCL%==true nuget install opencl-nug -Version 0.777.12 -OutputDirectory C:\cache - cmd: IF %BLAS%==true IF NOT EXIST C:\cache\ispc-v1.9.2-windows appveyor DownloadFile https://sourceforge.net/projects/ispcmirror/files/v1.9.2/ispc-v1.9.2-windows.zip - cmd: IF %BLAS%==true IF NOT EXIST C:\cache\ispc-v1.9.2-windows 7z x ispc-v1.9.2-windows.zip -oC:\cache @@ -52,12 +53,12 @@ cache: - C:\projects\lc0\subprojects\packagecache before_build: - cmd: git submodule update --init --recursive -- cmd: meson build --backend vs2017 --buildtype release -Dgtest=%GTEST% -Dopencl=%OPENCL% -Dblas=%BLAS% -Dcudnn=%CUDA% -Dispc_native_only=false -Dpopcnt=false -Dcudnn_include="%CUDA_PATH%\include","%PKG_FOLDER%\cuda\include" -Dcudnn_libdirs="%CUDA_PATH%\lib\x64","%PKG_FOLDER%\cuda\lib\x64" -Dprotobuf_include="%PKG_FOLDER%\protobuf\include" -Dprotobuf_libdir="%PKG_FOLDER%\protobuf\lib" -Dopenblas_include="%PKG_FOLDER%\OpenBLAS.0.2.14.1\lib\native\include" -Dopenblas_libdirs="%PKG_FOLDER%\OpenBLAS.0.2.14.1\lib\native\lib\x64" -Dopencl_include="%PKG_FOLDER%\opencl-nug.0.777.12\build\native\include" -Dopencl_libdirs="%PKG_FOLDER%\opencl-nug.0.777.12\build\native\lib\x64" -Ddefault_library=static +- cmd: meson build --backend vs2017 --buildtype release -Dgtest=%GTEST% -Dopencl=%OPENCL% -Dblas=%BLAS% -Dcudnn=%CUDA% -Dispc_native_only=false -Dpopcnt=false -Dcudnn_include="%CUDA_PATH%\include","%PKG_FOLDER%\cuda\include" -Dcudnn_libdirs="%CUDA_PATH%\lib\x64","%PKG_FOLDER%\cuda\lib\x64" -Dprotobuf_include="%PKG_FOLDER%\protobuf\include" -Dprotobuf_libdir="%PKG_FOLDER%\protobuf\lib" -Dopenblas_include="%PKG_FOLDER%\OpenBLAS\dist64\include" -Dopenblas_libdirs="%PKG_FOLDER%\OpenBLAS\dist64\lib" -Dopencl_include="%PKG_FOLDER%\opencl-nug.0.777.12\build\native\include" -Dopencl_libdirs="%PKG_FOLDER%\opencl-nug.0.777.12\build\native\lib\x64" -Ddefault_library=static build_script: - cmd: IF %APPVEYOR_REPO_TAG%==false msbuild "C:\projects\lc0\build\lc0.sln" /m /p:WholeProgramOptimization=true /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" - cmd: IF %APPVEYOR_REPO_TAG%==true msbuild "C:\projects\lc0\build\lc0.sln" /m /p:WholeProgramOptimization=PGInstrument /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" - cmd: cd build -- cmd: IF %APPVEYOR_REPO_TAG%==true IF %BLAS%==true copy C:\cache\OpenBLAS.0.2.14.1\lib\native\bin\x64\*.dll +- cmd: IF %APPVEYOR_REPO_TAG%==true IF %BLAS%==true copy C:\cache\OpenBLAS\dist64\bin\libopenblas.dll - cmd: IF %APPVEYOR_REPO_TAG%==true IF %OPENCL%==true copy C:\cache\opencl-nug.0.777.12\build\native\bin\OpenCL.dll - cmd: IF %APPVEYOR_REPO_TAG%==true IF %CUDA%==true copy "%CUDA_PATH%"\bin\*.dll - cmd: IF %APPVEYOR_REPO_TAG%==true IF %CUDA%==true copy %PKG_FOLDER%\cuda\bin\cudnn64_7.dll @@ -68,7 +69,7 @@ after_build: - cmd: IF %APPVEYOR_REPO_TAG%==true 7z a lc0-%APPVEYOR_REPO_TAG_NAME%-windows-%NAME%.zip %APPVEYOR_BUILD_FOLDER%\build\lc0.exe - cmd: IF %APPVEYOR_REPO_TAG%==true appveyor DownloadFile "https://ci.appveyor.com/api/projects/LeelaChessZero/lczero-client/artifacts/client.exe?branch=release&pr=false&job=Environment%%3A%%20NAME%%3D.exe%%2C%%20GOOS%%3Dwindows" - cmd: IF %APPVEYOR_REPO_TAG%==true 7z a lc0-%APPVEYOR_REPO_TAG_NAME%-windows-%NAME%.zip client.exe -- cmd: IF %APPVEYOR_REPO_TAG%==true IF %BLAS%==true 7z a lc0-%APPVEYOR_REPO_TAG_NAME%-windows-%NAME%.zip C:\cache\OpenBLAS.0.2.14.1\lib\native\bin\x64\*.dll +- cmd: IF %APPVEYOR_REPO_TAG%==true IF %BLAS%==true 7z a lc0-%APPVEYOR_REPO_TAG_NAME%-windows-%NAME%.zip C:\cache\OpenBLAS\dist64\bin\libopenblas.dll - cmd: IF %APPVEYOR_REPO_TAG%==true IF %OPENCL%==true 7z a lc0-%APPVEYOR_REPO_TAG_NAME%-windows-%NAME%.zip C:\cache\opencl-nug.0.777.12\build\native\bin\OpenCL.dll - cmd: IF %APPVEYOR_REPO_TAG%==true IF %CUDA%==true 7z a lc0-%APPVEYOR_REPO_TAG_NAME%-windows-%NAME%.zip "%CUDA_PATH%\bin\cudart64_100.dll" "%CUDA_PATH%\bin\cublas64_100.dll" - cmd: IF %APPVEYOR_REPO_TAG%==true IF %CUDA%==true 7z a lc0-%APPVEYOR_REPO_TAG_NAME%-windows-%NAME%.zip "%PKG_FOLDER%\cuda\bin\cudnn64_7.dll" @@ -91,7 +92,7 @@ deploy: appveyor_repo_tag: true test_script: - cmd: cd build -- cmd: IF %GTEST%==true copy C:\cache\OpenBLAS.0.2.14.1\lib\native\bin\x64\*.dll +- cmd: IF %GTEST%==true copy C:\cache\OpenBLAS\dist64\bin\libopenblas.dll - cmd: IF %GTEST%==true xcopy /s /i C:\cache\syzygy syzygy - cmd: IF %GTEST%==true meson test --print-errorlogs - cmd: cd .. diff --git a/changelog.txt b/changelog.txt index bbf4629767..fbb902a7bb 100644 --- a/changelog.txt +++ b/changelog.txt @@ -1,3 +1,72 @@ +v0.20.0-rc1 (2018-12-22) +~~~~~~~~~~~ + +* Squeeze-and-Excitation Networks are now supported! (lc0.org/se) +* Older text network files are no longer supported. +* Various performance fixes (most major being having fast approximate math + functions). +* For systems with multiple GPUs, in addition to "multiplexing" backend + we now also have "demux" backend and "roundrobin" backend. +* Compiler settings tweaks (use VS2017 for windows builds, always have LTO + enabled, windows releases have PGO enabled). +* Benchmark mode has more options now (e.g. movetime) and saner defaults. +* Added an option to prevent engine to resign too early (used in training). +* Fixed a bug when number of visits could be too high in collision nodes. + The fix is pretty hacky, there will be better fix later. +* 32-bit version compiles again. + +v0.19.1 (2018-12-10) +~~~~~~~ + +(no changes relative to v0.19.1-rc2) + +v0.19.1-rc2 (2018-12-07) +~~~~~~~~~~~ + +* Temperature and FPU related params. (#568) +* Rework Cpuct related params. (#567) + +v0.19.1-rc1 (2018-12-06) +~~~~~~~~~~~ + +* Updated cpuct formula from alphazero paper. (#563) +* remove UpdateFromUciOptions() from EnsureReady() (#558) +* revert IsSearchActive() and better fix for one of #500 crashes (#555) + +v0.19.0 (2018-11-19) +~~~~~~~ + +* remove Wait() from EngineController::Stop() (#522) + +v0.19.0-rc5 (2018-11-17) +~~~~~~~~~~~ + +* OpenCL: replace thread_local with a resource pool. (#516) +* optional wtime and btime (#515) +* Make convolve1 work with workgroup size of 128 (#514) +* adjust average depth calculation for multivisits (#510) + +v0.19.0-rc4 (2018-11-12) +~~~~~~~~~~~ + +* Microseconds have 6 digits, not 3! (#505) +* use bestmove_is_sent_ for Search::IsSearchActive() (#502) + +v0.19.0-rc3 (2018-11-07) +~~~~~~~~~~~ + +* Fix OpenCL tuner always loading the first saved tuning (#491) +* Do not show warning when ComputeBlocking() takes too much time. (#494) +* Output microseconds in log rather than milliseconds. (#495) +* Add benchmark features (#483) +* Fix EncodePositionForNN test failure (#490) + +v0.19.0-rc2 (2018-11-03) +~~~~~~~~~~~ + +* Version v0.19.0-rc1 reported it's version as v0.19.0-dev + Therefore v0.19.0-rc2 is released with this issue fixed. + v0.19.0-rc1 (2018-11-03) ~~~~~~~~~~~ diff --git a/meson.build b/meson.build index c30d4592b6..9bbebb794c 100644 --- a/meson.build +++ b/meson.build @@ -15,7 +15,7 @@ # along with Leela Chess. If not, see . project('lc0', 'cpp', - default_options : ['cpp_std=c++14', 'b_ndebug=if-release', 'b_lto=true'], + default_options : ['cpp_std=c++14', 'b_ndebug=if-release'], meson_version: '>=0.45') cc = meson.get_compiler('cpp') @@ -26,7 +26,6 @@ endif if cc.get_id() == 'clang' or cc.get_id() == 'gcc' add_project_arguments('-Wextra', language : 'cpp') add_project_arguments('-pedantic', language : 'cpp') - add_project_arguments('-ffast-math', language : 'cpp') if get_option('buildtype') == 'release' add_project_arguments('-march=native', language : 'cpp') @@ -51,7 +50,10 @@ else endif protoc = find_program('protoc', required : false) # For tensorflow skip system protobuf, chances are it will not work. -if not protobuf_dep.found() or not protoc.found() or get_option('tensorflow') +if get_option('protobuf-3-6-0') + deps += subproject('protobuf-3.6.0').get_variable('protobuf_dep') + protoc = subproject('protobuf-3.6.0').get_variable('protoc') +elif not protobuf_dep.found() or not protoc.found() or get_option('tensorflow') deps += subproject('protobuf').get_variable('protobuf_dep') protoc = subproject('protobuf').get_variable('protoc') else @@ -98,9 +100,11 @@ files += [ 'src/neural/factory.cc', 'src/neural/loader.cc', 'src/neural/network_check.cc', + 'src/neural/network_demux.cc', 'src/neural/network_legacy.cc', 'src/neural/network_mux.cc', 'src/neural/network_random.cc', + 'src/neural/network_rr.cc', 'src/neural/network_st_batch.cc', 'src/neural/writer.cc', 'src/selfplay/game.cc', @@ -155,6 +159,7 @@ if get_option('build_backends') tensorflow_include, tensorflow_include[0] + '/bazel-genfiles', tensorflow_include[0] + '/tensorflow/contrib/makefile/downloads', + tensorflow_include[0] + '/tensorflow/contrib/makefile/downloads/absl', tensorflow_include[0] + '/tensorflow/contrib/makefile/downloads/eigen', tensorflow_include[0] + '/tensorflow/contrib/makefile/downloads/gemmlowp', tensorflow_include[0] + '/tensorflow/contrib/makefile/downloads/nsync/public', @@ -179,6 +184,9 @@ if get_option('build_backends') mkl_libdirs = get_option('mkl_libdirs') mkl_lib = cc.find_library('mkl_rt', dirs: mkl_libdirs, required: false) + if not mkl_lib.found() + mkl_lib = cc.find_library('mklml', dirs: mkl_libdirs, required: false) + endif openblas_libdirs = get_option('openblas_libdirs') openblas_lib = cc.find_library('openblas.dll', dirs: openblas_libdirs, required: false) @@ -394,9 +402,9 @@ endif # if get_option('build_backends') if not has_backends and get_option('build_backends') error(''' - No usable computation backends (cudnn/tensorflow/etc) are found. - If you want to build it with random only backend, pass - -D build_backends=false to a meson build.''') + No usable computation backends (cudnn/opencl/blas/etc) enabled. + If you want to build with the random backend only, add + -Dbuild_backends=false to the build command line.''') endif diff --git a/meson_options.txt b/meson_options.txt index 5d8bd7d012..04993f626f 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -117,3 +117,8 @@ option('gtest', type: 'boolean', value: true, description: 'Build gtest tests') + +option('protobuf-3-6-0', + type: 'boolean', + value: false, + description: 'Use the protobuf 3.6.0 subproject') diff --git a/src/benchmark/benchmark.cc b/src/benchmark/benchmark.cc index eb83368fea..234f0b2b5c 100644 --- a/src/benchmark/benchmark.cc +++ b/src/benchmark/benchmark.cc @@ -56,17 +56,6 @@ void Benchmark::Run() { options.Add(kNNCacheSizeId, 0, 999999999) = 200000; options.Add(kThreadsOptionId, 1, 128) = kDefaultThreads; - auto defaults = options.GetMutableDefaultsOptions(); - - defaults->Set(SearchParams::kMiniBatchSizeId.GetId(), 256); - defaults->Set(SearchParams::kFpuReductionId.GetId(), 1.2f); - defaults->Set(SearchParams::kCpuctId.GetId(), 3.4f); - defaults->Set(SearchParams::kPolicySoftmaxTempId.GetId(), 2.2f); - defaults->Set(SearchParams::kMaxCollisionVisitsId.GetId(), 9999); - defaults->Set(SearchParams::kMaxCollisionEventsId.GetId(), 32); - defaults->Set(SearchParams::kCacheHistoryLengthId.GetId(), 0); - defaults->Set(SearchParams::kOutOfOrderEvalId.GetId(), true); - if (!options.ProcessAllFlags()) return; try { diff --git a/src/chess/board.cc b/src/chess/board.cc index 609ae57ff2..d9689ab26c 100644 --- a/src/chess/board.cc +++ b/src/chess/board.cc @@ -188,6 +188,7 @@ BitBoard ChessBoard::en_passant() const { return pawns_ - pawns(); } MoveList ChessBoard::GeneratePseudolegalMoves() const { MoveList result; + result.reserve(60); for (auto source : our_pieces_) { // King if (source == our_king_) { @@ -336,8 +337,8 @@ MoveList ChessBoard::GeneratePseudolegalMoves() const { } // Knight. { - for (const auto destination : kKnightAttacks[source.as_int()]) { - if (our_pieces_.get(destination)) continue; + for (const auto destination : + kKnightAttacks[source.as_int()] - our_pieces_) { result.emplace_back(source, destination); } } @@ -405,9 +406,6 @@ bool ChessBoard::ApplyMove(Move move) { return reset_50_moves; } - // Now destination square for our piece is known. - our_pieces_.set(to); - // Promotion if (move.promotion() != Move::Promotion::None) { switch (move.promotion()) { @@ -456,13 +454,13 @@ bool ChessBoard::ApplyMove(Move move) { bool ChessBoard::IsUnderAttack(BoardSquare square) const { const int row = square.row(); const int col = square.col(); - // Check king + // Check king. { const int krow = their_king_.row(); const int kcol = their_king_.col(); if (std::abs(krow - row) <= 1 && std::abs(kcol - col) <= 1) return true; } - // Check Rooks (and queen) + // Check rooks (and queens). if (kRookAttacks[square.as_int()].intersects(their_pieces_ * rooks_)) { for (const auto& direction : kRookDirections) { auto dst_row = row; @@ -480,7 +478,7 @@ bool ChessBoard::IsUnderAttack(BoardSquare square) const { } } } - // Check Bishops + // Check bishops. if (kBishopAttacks[square.as_int()].intersects(their_pieces_ * bishops_)) { for (const auto& direction : kBishopDirections) { auto dst_row = row; @@ -498,11 +496,11 @@ bool ChessBoard::IsUnderAttack(BoardSquare square) const { } } } - // Check pawns + // Check pawns. if (kPawnAttacks[square.as_int()].intersects(their_pieces_ * pawns_)) { return true; } - // Check knights + // Check knights. { if (kKnightAttacks[square.as_int()].intersects(their_pieces_ - their_king_ - rooks_ - bishops_ - @@ -513,18 +511,135 @@ bool ChessBoard::IsUnderAttack(BoardSquare square) const { return false; } -bool ChessBoard::IsLegalMove(Move move, bool was_under_check) const { - const auto& from = move.from(); - const auto& to = move.to(); +KingAttackInfo ChessBoard::GenerateKingAttackInfo() const { + KingAttackInfo king_attack_info; - // If we are already under check, also apply move and check if valid. - // TODO(mooskagh) Optimize this case - if (was_under_check) { - ChessBoard board(*this); - board.ApplyMove(move); - return !board.IsUnderCheck(); + // Number of attackers that give check (used for double check detection). + unsigned num_king_attackers = 0; + + const int row = our_king_.row(); + const int col = our_king_.col(); + // King checks are unnecessary, as kings cannot give check. + // Check rooks (and queens). + if (kRookAttacks[our_king_.as_int()].intersects(their_pieces_ * rooks_)) { + for (const auto& direction : kRookDirections) { + auto dst_row = row; + auto dst_col = col; + BitBoard attack_line(0); + bool possible_pinned_piece_found = false; + BoardSquare possible_pinned_piece; + while (true) { + dst_row += direction.first; + dst_col += direction.second; + if (!BoardSquare::IsValid(dst_row, dst_col)) break; + const BoardSquare destination(dst_row, dst_col); + if (our_pieces_.get(destination)) { + if (possible_pinned_piece_found) { + // No pieces pinned. + break; + } else { + // This is a possible pinned piece. + possible_pinned_piece_found = true; + possible_pinned_piece = destination; + } + } + if (!possible_pinned_piece_found) { + attack_line.set(destination); + } + if (their_pieces_.get(destination)) { + if (rooks_.get(destination)) { + if (possible_pinned_piece_found) { + // Store the pinned piece. + king_attack_info.pinned_pieces_.set(possible_pinned_piece); + } else { + // Update attacking lines. + king_attack_info.attacking_lines_ = + king_attack_info.attacking_lines_ + attack_line; + num_king_attackers++; + } + } + break; + } + } + } + } + // Check bishops. + if (kBishopAttacks[our_king_.as_int()].intersects(their_pieces_ * bishops_)) { + for (const auto& direction : kBishopDirections) { + auto dst_row = row; + auto dst_col = col; + BitBoard attack_line(0); + bool possible_pinned_piece_found = false; + BoardSquare possible_pinned_piece; + while (true) { + dst_row += direction.first; + dst_col += direction.second; + if (!BoardSquare::IsValid(dst_row, dst_col)) break; + const BoardSquare destination(dst_row, dst_col); + if (our_pieces_.get(destination)) { + if (possible_pinned_piece_found) { + // No pieces pinned. + break; + } else { + // This is a possible pinned piece. + possible_pinned_piece_found = true; + possible_pinned_piece = destination; + } + } + if (!possible_pinned_piece_found) { + attack_line.set(destination); + } + if (their_pieces_.get(destination)) { + if (bishops_.get(destination)) { + if (possible_pinned_piece_found) { + // Store the pinned piece. + king_attack_info.pinned_pieces_.set(possible_pinned_piece); + } else { + // Update attacking lines. + king_attack_info.attacking_lines_ = + king_attack_info.attacking_lines_ + attack_line; + num_king_attackers++; + } + } + break; + } + } + } + } + // Check pawns. + const BitBoard attacking_pawns = + kPawnAttacks[our_king_.as_int()] * their_pieces_ * pawns_; + king_attack_info.attacking_lines_ = + king_attack_info.attacking_lines_ + attacking_pawns; + + if (attacking_pawns.as_int()) { + // No more than one pawn can give check. + num_king_attackers++; } + // Check knights. + const BitBoard attacking_knights = + kKnightAttacks[our_king_.as_int()] * + (their_pieces_ - their_king_ - rooks_ - bishops_ - (pawns_ * kPawnMask)); + king_attack_info.attacking_lines_ = + king_attack_info.attacking_lines_ + attacking_knights; + + if (attacking_knights.as_int()) { + // No more than one knight can give check. + num_king_attackers++; + } + + assert(num_king_attackers <= 2); + king_attack_info.double_check_ = (num_king_attackers == 2); + + return king_attack_info; +} + +bool ChessBoard::IsLegalMove(Move move, + const KingAttackInfo& king_attack_info) const { + const auto& from = move.from(); + const auto& to = move.to(); + // En passant. Complex but rare. Just apply // and check that we are not under check. if (from.row() == 4 && pawns_.get(from) && from.col() != to.col() && @@ -534,83 +649,72 @@ bool ChessBoard::IsLegalMove(Move move, bool was_under_check) const { return !board.IsUnderCheck(); } - // If it's kings move, check that destination - // is not under attack. + // Check if we are already under check. + if (king_attack_info.in_check()) { + // King move. + if (from == our_king_) { + // Just apply and check that we are not under check. + ChessBoard board(*this); + board.ApplyMove(move); + return !board.IsUnderCheck(); + } + + // Pinned pieces can never resolve a check. + if (king_attack_info.is_pinned(from)) { + return false; + } + + // The piece to move is no king and is not pinned. + if (king_attack_info.in_double_check()) { + // Only a king move can resolve the double check. + return false; + } else { + // Only one attacking piece gives check. + // Our piece is free to move (not pinned). Check if the attacker is + // captured or interposed after the piece has moved to its destination + // square. + return king_attack_info.is_on_attack_line(to); + } + } + + // Castlings were checked earlier. + // Moreover, no pseudolegal king moves to an attacked square are generated. + // If it's king's move at this moment, its certainly legal. if (from == our_king_) { - // Castlings were checked earlier. - if (std::abs(static_cast(from.col()) - static_cast(to.col())) > 1) - return true; - return !IsUnderAttack(to); + return true; } - // Not check that piece was pinned. And it was, check that after the move - // it is still on like of attack. - int dx = from.col() - our_king_.col(); - int dy = from.row() - our_king_.row(); - - // If it's not on the same file/rank/diagonal as our king, cannot be pinned. - if (dx != 0 && dy != 0 && std::abs(dx) != std::abs(dy)) return true; - dx = (dx > 0) - (dx < 0); // Sign. - dy = (dy > 0) - (dy < 0); - auto col = our_king_.col(); - auto row = our_king_.row(); - while (true) { - col += dx; - row += dy; - // Attacking line left board, good. - if (!BoardSquare::IsValid(row, col)) return true; - const BoardSquare square(row, col); - // The source square of the move is now free. - if (square == from) continue; - // The destination square if the move is our piece. King is not under - // attack. - if (square == to) return true; - // Our piece on the line. Not under attack. - if (our_pieces_.get(square)) return true; - if (their_pieces_.get(square)) { - if (dx == 0 || dy == 0) { - // Have to be afraid of rook-like piece. - return !rooks_.get(square); - } else { - // Have to be afraid of bishop-like piece. - return !bishops_.get(square); - } - return true; - } + // If we get here, we are not under check. + // If the piece is not pinned, it is free to move anywhere. + if (!king_attack_info.is_pinned(from)) return true; + + // The piece is pinned. Now check that it stays on the same line w.r.t. the + // king. + int dx_from = from.col() - our_king_.col(); + int dy_from = from.row() - our_king_.row(); + int dx_to = to.col() - our_king_.col(); + int dy_to = to.row() - our_king_.row(); + + if (dx_from == 0 || dx_to == 0) { + return (dx_from == dx_to); + } else { + return (dx_from * dy_to == dx_to * dy_from); } } MoveList ChessBoard::GenerateLegalMoves() const { - const bool was_under_check = IsUnderCheck(); + const KingAttackInfo king_attack_info = GenerateKingAttackInfo(); MoveList move_list = GeneratePseudolegalMoves(); MoveList result; result.reserve(move_list.size()); for (Move m : move_list) { - if (IsLegalMove(m, was_under_check)) result.emplace_back(m); + if (IsLegalMove(m, king_attack_info)) result.emplace_back(m); } return result; } -std::vector ChessBoard::GenerateLegalMovesAndPositions() const { - MoveList move_list = GeneratePseudolegalMoves(); - std::vector result; - - for (const auto& move : move_list) { - result.emplace_back(); - auto& newboard = result.back().board; - newboard = *this; - result.back().reset_50_moves = newboard.ApplyMove(move); - if (newboard.IsUnderCheck()) { - result.pop_back(); - continue; - } - result.back().move = move; - } - return result; -} - void ChessBoard::SetFromFen(const std::string& fen, int* no_capture_ply, int* moves) { Clear(); diff --git a/src/chess/board.h b/src/chess/board.h index 830e4c35cf..9188c5f6f3 100644 --- a/src/chess/board.h +++ b/src/chess/board.h @@ -33,7 +33,22 @@ namespace lczero { -struct MoveExecution; +// Represents king attack info used during legal move detection. +class KingAttackInfo { + public: + bool in_check() const { return attacking_lines_.as_int(); } + bool in_double_check() const { return double_check_; } + bool is_pinned(const BoardSquare square) const { + return pinned_pieces_.get(square); + } + bool is_on_attack_line(const BoardSquare square) const { + return attacking_lines_.get(square); + } + + bool double_check_ = 0; + BitBoard pinned_pieces_ = {0}; + BitBoard attacking_lines_ = {0}; +}; // Represents a board position. // Unlike most chess engines, the board is mirrored for black. @@ -66,23 +81,25 @@ class ChessBoard { bool ApplyMove(Move move); // Checks if the square is under attack from "theirs" (black). bool IsUnderAttack(BoardSquare square) const; + // Generates the king attack info used for legal move detection. + KingAttackInfo GenerateKingAttackInfo() const; // Checks if "our" (white) king is under check. bool IsUnderCheck() const { return IsUnderAttack(our_king_); } - // Checks whether at least one of the sides has mating material. + // Checks whether at least one of the sides has mating material. bool HasMatingMaterial() const; // Generates legal moves. MoveList GenerateLegalMoves() const; // Check whether pseudolegal move is legal. - bool IsLegalMove(Move move, bool was_under_check) const; - // Returns a list of legal moves and board positions after the move is made. - std::vector GenerateLegalMovesAndPositions() const; + bool IsLegalMove(Move move, const KingAttackInfo& king_attack_info) const; uint64_t Hash() const { return HashCat({our_pieces_.as_int(), their_pieces_.as_int(), rooks_.as_int(), bishops_.as_int(), pawns_.as_int(), - our_king_.as_int(), their_king_.as_int(), - castlings_.as_int(), flipped_}); + (static_cast(our_king_.as_int()) << 24) | + (static_cast(their_king_.as_int()) << 16) | + (static_cast(castlings_.as_int()) << 8) | + static_cast(flipped_)}); } class Castlings { @@ -168,8 +185,8 @@ class ChessBoard { // Pawns. // Ranks 1 and 8 have special meaning. Pawn at rank 1 means that // corresponding white pawn on rank 4 can be taken en passant. Rank 8 is the - // same for black pawns. Those "fake" pawns are not present in white_ and - // black_ bitboards. + // same for black pawns. Those "fake" pawns are not present in our_pieces_ and + // their_pieces_ bitboards. BitBoard pawns_; BoardSquare our_king_; BoardSquare their_king_; @@ -177,11 +194,4 @@ class ChessBoard { bool flipped_ = false; // aka "Black to move". }; -// Stores the move and state of the board after the move is done. -struct MoveExecution { - Move move; - ChessBoard board; - bool reset_50_moves; -}; - } // namespace lczero diff --git a/src/engine.cc b/src/engine.cc index ec80370dbb..6d160f8fcd 100644 --- a/src/engine.cc +++ b/src/engine.cc @@ -58,19 +58,17 @@ const OptionId kMoveOverheadId{ "Amount of time, in milliseconds, that the engine subtracts from it's " "total available time (to compensate for slow connection, interprocess " "communication, etc)."}; -const OptionId kTimePeakPlyId{"time-peak-halfmove", "TimePeakHalfmove", - "For which halfmove the time budgeting algorithm " - "should allocate the maximum amount of time."}; -const OptionId kTimeLeftWidthId{ - "time-left-width", "TimeLeftWidth", - "\"Width\" of time budget graph to the left of the peak value. For small " - "values, moves far from the peak will get little time; for larger values, " - "they will get almost the same time as the peak move."}; -const OptionId kTimeRightWidthId{ - "time-right-width", "TimeRightWidth", - "\"Width\" of time budget graph to the right of the peak value. For small " - "values, moves far from the peak will get little time; for larger values, " - "they will get almost the same time as the peak move."}; +const OptionId kTimeMidpointMoveId{ + "time-midpoint-move", "TimeMidpointMove", + "The move where the time budgeting algorithm guesses half of all " + "games to be completed by. Half of the time allocated for the first move " + "is allocated at approximately this move."}; +const OptionId kTimeSteepnessId{ + "time-steepness", "TimeSteepness", + "\"Steepness\" of the function the time budgeting algorithm uses to " + "consider when games are completed. Lower values leave more time for " + "the endgame, higher values use more time for each move before the " + "midpoint."}; const OptionId kSyzygyTablebaseId{ "syzygy-paths", "SyzygyPath", "List of Syzygy tablebase directories, list entries separated by system " @@ -99,13 +97,22 @@ const size_t kAvgCacheItemSize = NNCache::GetItemStructSize() + sizeof(CachedNNRequest) + sizeof(CachedNNRequest::IdxAndProb) * kAvgMovesPerPosition; -float ComputeMoveWeight(int ply, float peak, float left_width, - float right_width) { - // Inflection points of the function are at ply = peak +/- width. - // At these points the function is at 2/3 of its max value. - const float width = ply > peak ? right_width : left_width; - constexpr float width_scaler = 1.518651485f; // 2 / log(2 + sqrt(3)) - return std::pow(std::cosh((ply - peak) / width / width_scaler), -2.0f); +float ComputeEstimatedMovesToGo(int ply, float midpoint, float steepness) { + // An analysis of chess games shows that the distribution of game lengths + // looks like a log-logistic distribution. The mean residual time function + // calculates how many more moves are expected in the game given that we are + // at the current ply. Given that this function can be expensive to compute, + // we calculate the median residual time function instead. This is derived and + // shown to be similar to the mean residual time in "Some Useful Properties of + // Log-Logistic Random Variables for Health Care Simulations" (Clark & + // El-Taha, 2015). + // midpoint: The median length of games. + // steepness: How quickly the function drops off from its maximum value, + // around the midpoint. + float move = ply / 2.0f; + return midpoint * std::pow(1 + 2 * std::pow(move / midpoint, steepness), + 1 / steepness) - + move; } } // namespace @@ -125,36 +132,22 @@ void EngineController::PopulateOptions(OptionsParser* options) { options->Add(kNNCacheSizeId, 0, 999999999) = 200000; options->Add(kSlowMoverId, 0.0f, 100.0f) = 1.0f; options->Add(kMoveOverheadId, 0, 100000000) = 200; - options->Add(kTimePeakPlyId, -1000.0f, 1000.0f) = 26.2f; - options->Add(kTimeLeftWidthId, 0.0f, 1000.0f) = 82.0f; - options->Add(kTimeRightWidthId, 0.0f, 1000.0f) = 74.0f; + options->Add(kTimeMidpointMoveId, 1.0f, 100.0f) = 51.5f; + options->Add(kTimeSteepnessId, 1.0f, 100.0f) = 7.0f; options->Add(kSyzygyTablebaseId); // Add "Ponder" option to signal to GUIs that we support pondering. // This option is currently not used by lc0 in any way. options->Add(kPonderId) = true; - options->Add(kSpendSavedTimeId, 0.0f, 1.0f) = 0.6f; + options->Add(kSpendSavedTimeId, 0.0f, 1.0f) = 1.0f; options->Add(kRamLimitMbId, 0, 100000000) = 0; // Hide time curve options. - options->HideOption(kTimePeakPlyId); - options->HideOption(kTimeLeftWidthId); - options->HideOption(kTimeRightWidthId); + options->HideOption(kTimeMidpointMoveId); + options->HideOption(kTimeSteepnessId); NetworkFactory::PopulateOptions(options); SearchParams::Populate(options); ConfigFile::PopulateOptions(options); - - auto defaults = options->GetMutableDefaultsOptions(); - - defaults->Set(SearchParams::kMiniBatchSizeId.GetId(), 256); - defaults->Set(SearchParams::kFpuReductionId.GetId(), 1.2f); - defaults->Set(SearchParams::kCpuctId.GetId(), 3.0f); - defaults->Set(SearchParams::kCpuctFactorId.GetId(), 2.0f); - defaults->Set(SearchParams::kPolicySoftmaxTempId.GetId(), 2.2f); - defaults->Set(SearchParams::kMaxCollisionVisitsId.GetId(), 9999); - defaults->Set(SearchParams::kMaxCollisionEventsId.GetId(), 32); - defaults->Set(SearchParams::kCacheHistoryLengthId.GetId(), 0); - defaults->Set(SearchParams::kOutOfOrderEvalId.GetId(), true); } SearchLimits EngineController::PopulateSearchLimits( @@ -162,11 +155,6 @@ SearchLimits EngineController::PopulateSearchLimits( std::chrono::steady_clock::time_point start_time) { SearchLimits limits; int64_t move_overhead = options_.Get(kMoveOverheadId.GetId()); - if (params.movetime) { - limits.search_deadline = start_time + std::chrono::milliseconds( - *params.movetime - move_overhead); - } - const optional& time = (is_black ? params.btime : params.wtime); if (!params.searchmoves.empty()) { limits.searchmoves.reserve(params.searchmoves.size()); @@ -175,6 +163,10 @@ SearchLimits EngineController::PopulateSearchLimits( } } limits.infinite = params.infinite || params.ponder; + if (params.movetime && !limits.infinite) { + limits.search_deadline = start_time + std::chrono::milliseconds( + *params.movetime - move_overhead); + } if (params.nodes) limits.visits = *params.nodes; int ram_limit = options_.Get(kRamLimitMbId.GetId()); if (ram_limit) { @@ -192,19 +184,26 @@ SearchLimits EngineController::PopulateSearchLimits( const optional& inc = is_black ? params.binc : params.winc; int increment = inc ? std::max(int64_t(0), *inc) : 0; - int movestogo = params.movestogo.value_or(50); - // Fix non-standard uci command. - if (movestogo == 0) movestogo = 1; - // How to scale moves time. float slowmover = options_.Get(kSlowMoverId.GetId()); - float time_curve_peak = options_.Get(kTimePeakPlyId.GetId()); - float time_curve_left_width = options_.Get(kTimeLeftWidthId.GetId()); - float time_curve_right_width = options_.Get(kTimeRightWidthId.GetId()); + float time_curve_midpoint = options_.Get(kTimeMidpointMoveId.GetId()); + float time_curve_steepness = options_.Get(kTimeSteepnessId.GetId()); + + float movestogo = + ComputeEstimatedMovesToGo(ply, time_curve_midpoint, time_curve_steepness); + + // If the number of moves remaining until the time control are less than + // the estimated number of moves left in the game, then use the number of + // moves until the time control instead. + if (params.movestogo && + *params.movestogo > 0 && // Ignore non-standard uci command. + *params.movestogo < movestogo) { + movestogo = *params.movestogo; + } - // Total time till control including increments. + // Total time, including increments, until time control. auto total_moves_time = - std::max(int64_t{0}, *time + increment * (movestogo - 1) - move_overhead); + std::max(0.0f, *time + increment * (movestogo - 1) - move_overhead); // If there is time spared from previous searches, the `time_to_squander` part // of it will be used immediately, remove that from planning. @@ -216,20 +215,12 @@ SearchLimits EngineController::PopulateSearchLimits( total_moves_time -= time_to_squander; } - constexpr int kSmartPruningToleranceMs = 200; - float this_move_weight = ComputeMoveWeight( - ply, time_curve_peak, time_curve_left_width, time_curve_right_width); - float other_move_weights = 0.0f; - for (int i = 1; i < movestogo; ++i) - other_move_weights += - ComputeMoveWeight(ply + 2 * i, time_curve_peak, time_curve_left_width, - time_curve_right_width); - // Compute the move time without slowmover. - float this_move_time = total_moves_time * this_move_weight / - (this_move_weight + other_move_weights); + // Evenly split total time between all moves. + float this_move_time = total_moves_time / movestogo; // Only extend thinking time with slowmover if smart pruning can potentially // reduce it. + constexpr int kSmartPruningToleranceMs = 200; if (slowmover < 1.0 || this_move_time * slowmover > kSmartPruningToleranceMs) { this_move_time *= slowmover; diff --git a/src/mcts/node.cc b/src/mcts/node.cc index 2d754dd161..d968377be2 100644 --- a/src/mcts/node.cc +++ b/src/mcts/node.cc @@ -233,7 +233,10 @@ bool Node::TryStartScoreUpdate() { return true; } -void Node::CancelScoreUpdate(int multivisit) { n_in_flight_ -= multivisit; } +void Node::CancelScoreUpdate(int multivisit) { + n_in_flight_ -= multivisit; + best_child_cached_ = nullptr; +} void Node::FinalizeScoreUpdate(float v, int multivisit) { // Recompute Q. @@ -246,6 +249,18 @@ void Node::FinalizeScoreUpdate(float v, int multivisit) { n_ += multivisit; // Decrement virtual loss. n_in_flight_ -= multivisit; + // Best child is potentially no longer valid. + best_child_cached_ = nullptr; +} + +void Node::UpdateBestChild(const Iterator& best_edge, int visits_allowed) { + best_child_cached_ = best_edge.node(); + // An edge can point to an unexpanded node with n==0. These nodes don't + // increment their n_in_flight_ the same way and thus are not safe to cache. + if (best_child_cached_ && best_child_cached_->GetN() == 0) { + best_child_cached_ = nullptr; + } + best_child_cache_in_flight_limit_ = visits_allowed + n_in_flight_; } Node::NodeRange Node::ChildNodes() const { return child_.get(); } diff --git a/src/mcts/node.h b/src/mcts/node.h index de7f87d0df..26d020b2ef 100644 --- a/src/mcts/node.h +++ b/src/mcts/node.h @@ -183,6 +183,23 @@ class Node { // Updates max depth, if new depth is larger. void UpdateMaxDepth(int depth); + // Caches the best child if possible. + void UpdateBestChild(const Iterator& best_edge, int collisions_allowed); + + // Gets a cached best child if it is still valid. + Node* GetCachedBestChild() { + if (n_in_flight_ < best_child_cache_in_flight_limit_) { + return best_child_cached_; + } + return nullptr; + } + + // Gets how many more visits the cached value is valid for. Only valid if + // GetCachedBestChild returns a value. + int GetRemainingCacheVisits() { + return best_child_cache_in_flight_limit_ - n_in_flight_; + } + // Calculates the full depth if new depth is larger, updates it, returns // in depth parameter, and returns true if it was indeed updated. bool UpdateFullDepth(uint16_t* depth); @@ -216,6 +233,13 @@ class Node { std::string DebugString() const; private: + // Performs construction time type initialization. For use only with a node + // that has not been used beyond its construction. + void Reinit(Node* parent, uint16_t index) { + parent_ = parent; + index_ = index; + } + // To minimize the number of padding bytes and to avoid having unnecessary // padding when new fields are added, we arrange the fields by size, largest // to smallest. @@ -231,6 +255,9 @@ class Node { std::unique_ptr child_; // Pointer to a next sibling. nullptr if there are no further siblings. std::unique_ptr sibling_; + // Cached pointer to best child, valid while n_in_flight < + // best_child_cache_in_flight_limit_ + Node* best_child_cached_ = nullptr; // 4 byte fields. // Average value (from value head of neural network) of all visited nodes in @@ -246,6 +273,9 @@ class Node { // but not finished). This value is added to n during selection which node // to pick in MCTS, and also when selecting the best move. uint32_t n_in_flight_ = 0; + // If best_child_cached_ is non-null, and n_in_flight_ < this, + // best_child_cached_ is still the best child. + uint32_t best_child_cache_in_flight_limit_ = 0; // 2 byte fields. // Index of this node is parent's edge list. @@ -273,9 +303,9 @@ class Node { // A basic sanity check. This must be adjusted when Node members are adjusted. #if defined(__i386__) || (defined(__arm__) && !defined(__aarch64__)) -static_assert(sizeof(Node) == 40, "Unexpected size of Node for 32bit compile"); +static_assert(sizeof(Node) == 48, "Unexpected size of Node for 32bit compile"); #else -static_assert(sizeof(Node) == 64, "Unexpected size of Node"); +static_assert(sizeof(Node) == 72, "Unexpected size of Node"); #endif // Contains Edge and Node pair and set of proxy functions to simplify access @@ -392,7 +422,8 @@ class Edge_Iterator : public EdgeAndNode { Edge_Iterator& operator*() { return *this; } // If there is node, return it. Otherwise spawn a new one and return it. - Node* GetOrSpawnNode(Node* parent) { + Node* GetOrSpawnNode(Node* parent, + std::unique_ptr* node_source = nullptr) { if (node_) return node_; // If there is already a node, return it. Actualize(); // But maybe other thread already did that. if (node_) return node_; // If it did, return. @@ -408,7 +439,12 @@ class Edge_Iterator : public EdgeAndNode { // 2. Create fresh Node(idx_.5): // node_ptr_ -> &Node(idx_.3).sibling_ -> Node(idx_.5) // tmp -> Node(idx_.7) - *node_ptr_ = std::make_unique(parent, current_idx_); + if (node_source && *node_source) { + (*node_source)->Reinit(parent, current_idx_); + *node_ptr_ = std::move(*node_source); + } else { + *node_ptr_ = std::make_unique(parent, current_idx_); + } // 3. Attach stored pointer back to a list: // node_ptr_ -> // &Node(idx_.3).sibling_ -> Node(idx_.5).sibling_ -> Node(idx_.7) diff --git a/src/mcts/params.cc b/src/mcts/params.cc index 6aa7d5ae8b..85dcb5ff5e 100644 --- a/src/mcts/params.cc +++ b/src/mcts/params.cc @@ -162,14 +162,13 @@ const OptionId SearchParams::kHistoryFillId{ "synthesize them (always, never, or only at non-standard fen position)."}; void SearchParams::Populate(OptionsParser* options) { - // Here the "safe defaults" are listed. - // Many of them are overridden with optimized defaults in engine.cc and - // tournament.cc - options->Add(kMiniBatchSizeId, 1, 1024) = 1; + // Here the uci optimized defaults" are set. + // Many of them are overridden with training specific values in tournament.cc. + options->Add(kMiniBatchSizeId, 1, 1024) = 256; options->Add(kMaxPrefetchBatchId, 0, 1024) = 32; - options->Add(kCpuctId, 0.0f, 100.0f) = 1.2f; + options->Add(kCpuctId, 0.0f, 100.0f) = 3.0f; options->Add(kCpuctBaseId, 1.0f, 1000000000.0f) = 19652.0f; - options->Add(kCpuctFactorId, 0.0f, 1000.0f) = 0.0f; + options->Add(kCpuctFactorId, 0.0f, 1000.0f) = 2.0f; options->Add(kTemperatureId, 0.0f, 100.0f) = 0.0f; options->Add(kTempDecayMovesId, 0, 100) = 0; options->Add(kTemperatureCutoffMoveId, 0, 1000) = 0; @@ -182,13 +181,13 @@ void SearchParams::Populate(OptionsParser* options) { options->Add(kSmartPruningFactorId, 0.0f, 10.0f) = 1.33f; std::vector fpu_strategy = {"reduction", "absolute"}; options->Add(kFpuStrategyId, fpu_strategy) = "reduction"; - options->Add(kFpuReductionId, -100.0f, 100.0f) = 0.0f; + options->Add(kFpuReductionId, -100.0f, 100.0f) = 1.2f; options->Add(kFpuValueId, -1.0f, 1.0f) = -1.0f; - options->Add(kCacheHistoryLengthId, 0, 7) = 7; - options->Add(kPolicySoftmaxTempId, 0.1f, 10.0f) = 1.0f; - options->Add(kMaxCollisionEventsId, 1, 1024) = 1; - options->Add(kMaxCollisionVisitsId, 1, 1000000) = 1; - options->Add(kOutOfOrderEvalId) = false; + options->Add(kCacheHistoryLengthId, 0, 7) = 0; + options->Add(kPolicySoftmaxTempId, 0.1f, 10.0f) = 2.2f; + options->Add(kMaxCollisionEventsId, 1, 1024) = 32; + options->Add(kMaxCollisionVisitsId, 1, 1000000) = 9999; + options->Add(kOutOfOrderEvalId) = true; options->Add(kMultiPvId, 1, 500) = 1; std::vector score_type = {"centipawn", "win_percentage", "Q"}; options->Add(kScoreTypeId, score_type) = "centipawn"; @@ -213,7 +212,8 @@ SearchParams::SearchParams(const OptionsDict& options) kMaxCollisionVisits(options.Get(kMaxCollisionVisitsId.GetId())), kOutOfOrderEval(options.Get(kOutOfOrderEvalId.GetId())), kHistoryFill( - EncodeHistoryFill(options.Get(kHistoryFillId.GetId()))) { + EncodeHistoryFill(options.Get(kHistoryFillId.GetId()))), + kMiniBatchSize(options.Get(kMiniBatchSizeId.GetId())){ } } // namespace lczero diff --git a/src/mcts/params.h b/src/mcts/params.h index 1218416ccb..bcbe780f46 100644 --- a/src/mcts/params.h +++ b/src/mcts/params.h @@ -43,7 +43,7 @@ class SearchParams { // Parameter getters. int GetMiniBatchSize() const { - return options_.Get(kMiniBatchSizeId.GetId()); + return kMiniBatchSize; } int GetMaxPrefetchBatch() const { return options_.Get(kMaxPrefetchBatchId.GetId()); @@ -138,6 +138,7 @@ class SearchParams { const int kMaxCollisionVisits; const bool kOutOfOrderEval; const FillEmptyHistory kHistoryFill; + const int kMiniBatchSize; }; } // namespace lczero diff --git a/src/mcts/search.cc b/src/mcts/search.cc index b4b9ea28b4..d946741f08 100644 --- a/src/mcts/search.cc +++ b/src/mcts/search.cc @@ -411,6 +411,10 @@ void Search::UpdateRemainingMoves() { } // Even if we exceeded limits, don't go crazy by not allowing any playouts. if (remaining_playouts_ <= 1) remaining_playouts_ = 1; + // Since remaining_playouts_ has changed, the logic for selecting visited root + // nodes may also change. Use a 0 visit cancel score update to clear out any + // cached best edge. + root_node_->CancelScoreUpdate(0); } // Return the evaluation of the actual best child, regardless of temperature @@ -656,7 +660,7 @@ void Search::Stop() { void Search::Abort() { Mutex::Lock lock(counters_mutex_); - if (!stop_.load(std::memory_order_acquire)) { + if (!stop_.load(std::memory_order_acquire) || !bestmove_is_sent_) { bestmove_is_sent_ = true; FireStopInternal(); } @@ -812,8 +816,12 @@ SearchWorker::NodeToProcess SearchWorker::PickNodeToExtend( Node* node = search_->root_node_; Node::Iterator best_edge; Node::Iterator second_best_edge; - // Initialize position sequence with pre-move position. - history_.Trim(search_->played_history_.GetLength()); + + // Precache a newly constructed node to avoid memory allocations being + // performed while the mutex is held. + if (!precached_node_) { + precached_node_ = std::make_unique(nullptr, 0); + } SharedMutex::Lock lock(search_->nodes_mutex_); @@ -823,6 +831,7 @@ SearchWorker::NodeToProcess SearchWorker::PickNodeToExtend( // True on first iteration, false as we dive deeper. bool is_root_node = true; uint16_t depth = 0; + bool node_already_updated = true; while (true) { // First, terminate if we find collisions or leaf nodes. @@ -832,7 +841,9 @@ SearchWorker::NodeToProcess SearchWorker::PickNodeToExtend( // in the beginning (and there would be no need for "if // (!is_root_node)"), but that would mean extra mutex lock. // Will revisit that after rethinking locking strategy. - if (!is_root_node) node = best_edge.GetOrSpawnNode(/* parent */ node); + if (!node_already_updated) { + node = best_edge.GetOrSpawnNode(/* parent */ node, &precached_node_); + } best_edge.Reset(); depth++; // n_in_flight_ is incremented. If the method returns false, then there is @@ -852,6 +863,18 @@ SearchWorker::NodeToProcess SearchWorker::PickNodeToExtend( return NodeToProcess::Extension(node, depth); } } + Node* possible_shortcut_child = node->GetCachedBestChild(); + if (possible_shortcut_child) { + // Add two here to reverse the conservatism that goes into calculating the + // remaining cache visits. + collision_limit = + std::min(collision_limit, node->GetRemainingCacheVisits() + 2); + is_root_node = false; + node = possible_shortcut_child; + node_already_updated = true; + continue; + } + node_already_updated = false; // If we fall through, then n_in_flight_ has been incremented but this // playout remains incomplete; we must go deeper. @@ -895,14 +918,19 @@ SearchWorker::NodeToProcess SearchWorker::PickNodeToExtend( } if (second_best_edge) { + int estimated_visits_to_change_best = + best_edge.GetVisitsToReachU(second_best, puct_mult, fpu); + // Only cache for n-2 steps as the estimate created by GetVisitsToReachU + // has potential rounding errors and some conservative logic that can push + // it up to 2 away from the real value. + node->UpdateBestChild(best_edge, + std::max(0, estimated_visits_to_change_best - 2)); collision_limit = - std::min(collision_limit, - best_edge.GetVisitsToReachU(second_best, puct_mult, fpu)); + std::min(collision_limit, estimated_visits_to_change_best); assert(collision_limit >= 1); second_best_edge.Reset(); } - history_.Append(best_edge.GetMove()); if (is_root_node && possible_moves <= 1 && !search_->limits_.infinite) { // If there is only one move theoretically possible within remaining time, // output it. @@ -914,6 +942,22 @@ SearchWorker::NodeToProcess SearchWorker::PickNodeToExtend( } void SearchWorker::ExtendNode(Node* node) { + // Initialize position sequence with pre-move position. + history_.Trim(search_->played_history_.GetLength()); + std::vector to_add; + // Could instead reserve one more than the difference between history_.size() + // and history_.capacity(). + to_add.reserve(60); + Node* cur = node; + while (cur != search_->root_node_) { + Node* prev = cur->GetParent(); + to_add.push_back(prev->GetEdgeToNode(cur)->GetMove()); + cur = prev; + } + for (int i = to_add.size() - 1; i >= 0; i--) { + history_.Append(to_add[i]); + } + // We don't need the mutex because other threads will see that N=0 and // N-in-flight=1 and will not touch this node. const auto& board = history_.Last().GetBoard(); @@ -992,7 +1036,8 @@ bool SearchWorker::AddNodeToComputation(Node* node, bool add_if_cached) { if (node && node->HasChildren()) { // Legal moves are known, use them. - for (auto edge : node->Edges()) { + moves.reserve(node->GetNumEdges()); + for (const auto& edge : node->Edges()) { moves.emplace_back(edge.GetMove().as_nn_index()); } } else { diff --git a/src/mcts/search.h b/src/mcts/search.h index 128c7d8109..14b5bcd51f 100644 --- a/src/mcts/search.h +++ b/src/mcts/search.h @@ -294,6 +294,7 @@ class SearchWorker { bool root_move_filter_populated_ = false; int number_out_of_order_ = 0; const SearchParams& params_; + std::unique_ptr precached_node_; }; } // namespace lczero diff --git a/src/neural/cuda/layers.cc b/src/neural/cuda/layers.cc index 53a3cce8d1..25ff7a2fae 100644 --- a/src/neural/cuda/layers.cc +++ b/src/neural/cuda/layers.cc @@ -117,7 +117,7 @@ ConvLayer::ConvLayer(BaseLayer* ip, int C, int H, int W, cudnnSetConvolutionMathType(conv_desc_, CUDNN_TENSOR_OP_MATH)); // TODO: dynamic selection of algorithm! - if ((C > 32) && (!fp16)) { + if ((C > 32) && (!fp16) && (filter_size_ > 1)) { conv_algo_ = CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED; } else { conv_algo_ = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM; diff --git a/src/neural/cuda/network_cudnn.cc b/src/neural/cuda/network_cudnn.cc index 013ac2200a..69f4d2e833 100644 --- a/src/neural/cuda/network_cudnn.cc +++ b/src/neural/cuda/network_cudnn.cc @@ -59,11 +59,15 @@ struct InputsOutputs { ReportCUDAErrors( cudaHostGetDevicePointer(&input_val_mem_gpu_, input_val_mem_, 0)); + ReportCUDAErrors(cudaHostAlloc( - &op_policy_mem_, maxBatchSize * kNumOutputPolicy * sizeof(float), - cudaHostAllocMapped)); - ReportCUDAErrors( - cudaHostGetDevicePointer(&op_policy_mem_gpu_, op_policy_mem_, 0)); + &op_policy_mem_, maxBatchSize * kNumOutputPolicy * sizeof(float), 0)); + + // Seperate device memory copy for policy output. + // It's faster to write to device memory and then copy to host memory + // than having the kernel write directly to it. + ReportCUDAErrors(cudaMalloc(&op_policy_mem_gpu_, + maxBatchSize * kNumOutputPolicy * sizeof(float))); ReportCUDAErrors(cudaHostAlloc(&op_value_mem_, maxBatchSize * sizeof(float), cudaHostAllocMapped)); @@ -74,6 +78,7 @@ struct InputsOutputs { ReportCUDAErrors(cudaFreeHost(input_masks_mem_)); ReportCUDAErrors(cudaFreeHost(input_val_mem_)); ReportCUDAErrors(cudaFreeHost(op_policy_mem_)); + ReportCUDAErrors(cudaFree(op_policy_mem_gpu_)); ReportCUDAErrors(cudaFreeHost(op_value_mem_)); } uint64_t* input_masks_mem_; @@ -81,11 +86,13 @@ struct InputsOutputs { float* op_policy_mem_; float* op_value_mem_; - // GPU pointers for the above allocations + // GPU pointers for the above allocations. uint64_t* input_masks_mem_gpu_; float* input_val_mem_gpu_; - float* op_policy_mem_gpu_; float* op_value_mem_gpu_; + + // This is a seperate copy. + float* op_policy_mem_gpu_; }; template @@ -173,16 +180,21 @@ class CudnnNetwork : public Network { has_se_ = false; // 0. Process weights. - processConvBlock(weights.input, true); + + // TODO: Get filter sizes from proto file? + // Hardcoded right now: + // 3 for input and residual block convolutions. + // 1 for policy and value head convolutions. + processConvBlock(weights.input, true, 3); for (int i = 0; i < numBlocks_; i++) { if (weights.residual[i].has_se) { has_se_ = true; } - processConvBlock(weights.residual[i].conv1, true); - processConvBlock(weights.residual[i].conv2, true); + processConvBlock(weights.residual[i].conv1, true, 3); + processConvBlock(weights.residual[i].conv2, true, 3); } - processConvBlock(weights.policy); - processConvBlock(weights.value); + processConvBlock(weights.policy, true, 1); + processConvBlock(weights.value, true, 1); // 1. Allocate scratch space (used internally by cudnn to run convolutions, // and also for format/layout conversion for weights). @@ -283,15 +295,11 @@ class CudnnNetwork : public Network { // Policy head. { auto convPol = std::make_unique>( - resi_last_, weights.policy.bn_means.size(), 8, 8, 1, kNumFilters); - convPol->LoadWeights(&weights.policy.weights[0], nullptr, scratch_mem_); + resi_last_, weights.policy.bn_means.size(), 8, 8, 1, kNumFilters, true, true); + convPol->LoadWeights(&weights.policy.weights[0], + &weights.policy.biases[0], scratch_mem_); network_.emplace_back(std::move(convPol)); - auto BNPol = std::make_unique>(getLastLayer(), true); - BNPol->LoadWeights(&weights.policy.bn_means[0], - &weights.policy.bn_stddivs[0]); - network_.emplace_back(std::move(BNPol)); - auto FCPol = std::make_unique>( getLastLayer(), weights.ip_pol_b.size(), 1, 1, false, true); FCPol->LoadWeights(&weights.ip_pol_w[0], &weights.ip_pol_b[0], @@ -307,15 +315,11 @@ class CudnnNetwork : public Network { // Value head. { auto convVal = std::make_unique>( - resi_last_, weights.value.bn_means.size(), 8, 8, 1, kNumFilters); - convVal->LoadWeights(&weights.value.weights[0], nullptr, scratch_mem_); + resi_last_, weights.value.biases.size(), 8, 8, 1, kNumFilters, true, true); + convVal->LoadWeights(&weights.value.weights[0], &weights.value.biases[0], + scratch_mem_); network_.emplace_back(std::move(convVal)); - auto BNVal = std::make_unique>(getLastLayer(), true); - BNVal->LoadWeights(&weights.value.bn_means[0], - &weights.value.bn_stddivs[0]); - network_.emplace_back(std::move(BNVal)); - auto FCVal1 = std::make_unique>( getLastLayer(), weights.ip1_val_b.size(), 1, 1, true, true); FCVal1->LoadWeights(&weights.ip1_val_w[0], &weights.ip1_val_b[0], @@ -403,43 +407,44 @@ class CudnnNetwork : public Network { scratch_mem_, scratch_size_, cudnn_, cublas_); // pol conv network_[l++]->Eval(batchSize, tensor_mem_[1], tensor_mem_[0], nullptr, - scratch_mem_, scratch_size_, cudnn_, - cublas_); // pol BN - network_[l++]->Eval(batchSize, tensor_mem_[0], tensor_mem_[1], nullptr, scratch_mem_, scratch_size_, cudnn_, cublas_); // pol FC if (std::is_same::value) { // TODO: consider softmax layer that writes directly to fp32. - network_[l++]->Eval(batchSize, tensor_mem_[1], tensor_mem_[0], nullptr, + network_[l++]->Eval(batchSize, tensor_mem_[0], tensor_mem_[1], nullptr, scratch_mem_, scratch_size_, cudnn_, cublas_); // pol softmax - copyTypeConverted(opPol, (half*)(tensor_mem_[1]), + copyTypeConverted(opPol, (half*)(tensor_mem_[0]), batchSize * kNumOutputPolicy); // POLICY } else { - network_[l++]->Eval(batchSize, (DataType*)opPol, tensor_mem_[0], nullptr, + network_[l++]->Eval(batchSize, (DataType*)opPol, tensor_mem_[1], nullptr, scratch_mem_, scratch_size_, cudnn_, cublas_); // pol softmax // POLICY } + // Copy policy output from device memory to host memory. + ReportCUDAErrors(cudaMemcpyAsync(io->op_policy_mem_, + io->op_policy_mem_gpu_, + sizeof(float) * kNumOutputPolicy * + batchSize, cudaMemcpyDeviceToHost)); + // value head network_[l++]->Eval(batchSize, tensor_mem_[0], tensor_mem_[2], nullptr, scratch_mem_, scratch_size_, cudnn_, cublas_); // value conv - network_[l++]->Eval(batchSize, tensor_mem_[2], tensor_mem_[0], nullptr, - scratch_mem_, scratch_size_, cudnn_, - cublas_); // value BN - network_[l++]->Eval(batchSize, tensor_mem_[0], tensor_mem_[2], nullptr, + + network_[l++]->Eval(batchSize, tensor_mem_[1], tensor_mem_[0], nullptr, scratch_mem_, scratch_size_, cudnn_, cublas_); // value FC1 if (std::is_same::value) { // TODO: consider fusing the bias-add of FC2 with format conversion. - network_[l++]->Eval(batchSize, tensor_mem_[2], tensor_mem_[0], nullptr, + network_[l++]->Eval(batchSize, tensor_mem_[2], tensor_mem_[1], nullptr, scratch_mem_, scratch_size_, cudnn_, cublas_); // value FC2 copyTypeConverted(opVal, (half*)(tensor_mem_[2]), batchSize); // VALUE } else { - network_[l++]->Eval(batchSize, (DataType*)opVal, tensor_mem_[0], nullptr, + network_[l++]->Eval(batchSize, (DataType*)opVal, tensor_mem_[1], nullptr, scratch_mem_, scratch_size_, cudnn_, cublas_); // value FC2 // VALUE } @@ -535,7 +540,8 @@ class CudnnNetwork : public Network { mutable std::mutex inputs_outputs_lock_; std::list> free_inputs_outputs_; - void processConvBlock(LegacyWeights::ConvBlock& block, bool foldBNLayer = false) { + void processConvBlock(LegacyWeights::ConvBlock& block, bool foldBNLayer, + int filterSize) { const float epsilon = 1e-5f; // Compute reciprocal of std-dev from the variances (so that it can be @@ -557,13 +563,15 @@ class CudnnNetwork : public Network { // convolution idea proposed by Henrik Forstén and first implemented in // leela go zero. if (foldBNLayer) { + const int spatialSize = filterSize * filterSize; const int outputs = block.biases.size(); - const int channels = block.weights.size() / (outputs * 3 * 3); - + const int channels = block.weights.size() / (outputs * spatialSize); + for (auto o = 0; o < outputs; o++) { for (auto c = 0; c < channels; c++) { - for (auto i = 0; i < 9; i++) { - block.weights[o * channels * 9 + c * 9 + i] *= block.bn_stddivs[o]; + for (auto i = 0; i < spatialSize; i++) { + block.weights[o * channels * spatialSize + c * spatialSize + i] *= + block.bn_stddivs[o]; } } diff --git a/src/neural/loader.cc b/src/neural/loader.cc index c9a2b076e7..9921babee0 100644 --- a/src/neural/loader.cc +++ b/src/neural/loader.cc @@ -164,7 +164,7 @@ std::string DiscoverWeightsFile() { gzFile file = gzopen(candidate.second.c_str(), "rb"); if (!file) continue; - char buf[256]; + unsigned char buf[256]; int sz = gzread(file, buf, 256); gzclose(file); if (sz < 0) continue; @@ -180,8 +180,10 @@ std::string DiscoverWeightsFile() { // First byte of the protobuf stream is 0x0d for fixed32, so we ignore it as // our own magic should suffice. - auto magic = reinterpret_cast(buf + 1); - if (*magic == kWeightMagic) { + auto magic = buf[1] | (static_cast(buf[2]) << 8) | + (static_cast(buf[3]) << 16) | + (static_cast(buf[4]) << 24); + if (magic == kWeightMagic) { CERR << "Found pb network file: " << candidate.second; return candidate.second; } diff --git a/src/neural/network_demux.cc b/src/neural/network_demux.cc new file mode 100644 index 0000000000..c791d1f11b --- /dev/null +++ b/src/neural/network_demux.cc @@ -0,0 +1,223 @@ +/* + This file is part of Leela Chess Zero. + Copyright (C) 2018 The LCZero Authors + + Leela Chess is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Leela Chess is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Leela Chess. If not, see . + + Additional permission under GNU GPL version 3 section 7 + + If you modify this Program, or any covered work, by linking or + combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA + Toolkit and the NVIDIA CUDA Deep Neural Network library (or a + modified version of those libraries), containing parts covered by the + terms of the respective license agreement, the licensors of this + Program grant you additional permission to convey the resulting work. +*/ + +#include "neural/factory.h" + +#include +#include +#include +#include "utils/exception.h" + +namespace lczero { +namespace { + +class DemuxingNetwork; +class DemuxingComputation : public NetworkComputation { + public: + DemuxingComputation(DemuxingNetwork* network) : network_(network) {} + + void AddInput(InputPlanes&& input) override { planes_.emplace_back(input); } + + void ComputeBlocking() override; + + int GetBatchSize() const override { return planes_.size(); } + + float GetQVal(int sample) const override { + int idx = sample / partial_size_; + int offset = sample % partial_size_; + return parents_[idx]->GetQVal(offset); + } + + float GetPVal(int sample, int move_id) const override { + int idx = sample / partial_size_; + int offset = sample % partial_size_; + return parents_[idx]->GetPVal(offset, move_id); + } + + void NotifyComplete() { + std::unique_lock lock(mutex_); + dataready_--; + if (dataready_ == 0) { + dataready_cv_.notify_one(); + } + } + + NetworkComputation* AddParentFromNetwork(Network* network) { + std::unique_lock lock(mutex_); + parents_.emplace_back(network->NewComputation()); + int cur_idx = (parents_.size() - 1) * partial_size_; + for (int i = cur_idx; i < std::min(GetBatchSize(), cur_idx + partial_size_); + i++) { + parents_.back()->AddInput(std::move(planes_[i])); + } + return parents_.back().get(); + } + + private: + std::vector planes_; + DemuxingNetwork* network_; + std::vector> parents_; + + std::mutex mutex_; + std::condition_variable dataready_cv_; + int dataready_ = 0; + int partial_size_ = 0; +}; + +class DemuxingNetwork : public Network { + public: + DemuxingNetwork(const WeightsFile& weights, const OptionsDict& options) { + minimum_split_size_ = options.GetOrDefault("minimum-split-size", 0); + const auto parents = options.ListSubdicts(); + if (parents.empty()) { + // If options are empty, or multiplexer configured in root object, + // initialize on root object and default backend. + auto backends = NetworkFactory::Get()->GetBackendsList(); + AddBackend(backends[0], weights, options); + } + + for (const auto& name : parents) { + AddBackend(name, weights, options.GetSubdict(name)); + } + } + + void AddBackend(const std::string& name, const WeightsFile& weights, + const OptionsDict& opts) { + const int nn_threads = opts.GetOrDefault("threads", 1); + const std::string backend = opts.GetOrDefault("backend", name); + + networks_.emplace_back( + NetworkFactory::Get()->Create(backend, weights, opts)); + + for (int i = 0; i < nn_threads; ++i) { + threads_.emplace_back([this]() { Worker(); }); + } + } + + std::unique_ptr NewComputation() override { + return std::make_unique(this); + } + + void Enqueue(DemuxingComputation* computation) { + std::lock_guard lock(mutex_); + queue_.push(computation); + cv_.notify_one(); + } + + ~DemuxingNetwork() { + Abort(); + Wait(); + // Unstuck waiting computations. + while (!queue_.empty()) { + queue_.front()->NotifyComplete(); + queue_.pop(); + } + } + + void Worker() { + // While Abort() is not called (and it can only be called from destructor). + while (!abort_) { + { + { + std::unique_lock lock(mutex_); + // Wait until there's come work to compute. + cv_.wait(lock, [&] { return abort_ || !queue_.empty(); }); + if (abort_) break; + } + + // While there is a work in queue, process it. + while (true) { + + DemuxingComputation* to_notify; + { + std::unique_lock lock(mutex_); + if (queue_.empty()) break; + to_notify = queue_.front(); + queue_.pop(); + } + long long net_idx = ++(counter_) % networks_.size(); + NetworkComputation* to_compute = to_notify->AddParentFromNetwork(networks_[net_idx].get()); + to_compute->ComputeBlocking(); + to_notify->NotifyComplete(); + } + } + } + } + + void Abort() { + { + std::lock_guard lock(mutex_); + abort_ = true; + } + cv_.notify_all(); + } + + void Wait() { + while (!threads_.empty()) { + threads_.back().join(); + threads_.pop_back(); + } + } + + std::vector> networks_; + std::queue queue_; + int minimum_split_size_ = 0; + std::atomic counter_; + bool abort_ = false; + + std::mutex mutex_; + std::condition_variable cv_; + + std::vector threads_; +}; + +void DemuxingComputation::ComputeBlocking() { + if (GetBatchSize() == 0) return; + partial_size_ = (GetBatchSize() + network_->networks_.size() - 1) / + network_->networks_.size(); + if (partial_size_ < network_->minimum_split_size_) { + partial_size_ = std::min(GetBatchSize(), network_->minimum_split_size_); + } + int splits = (GetBatchSize() + partial_size_ - 1) / partial_size_; + + std::unique_lock lock(mutex_); + dataready_ = splits; + for (int j=0; j < splits; j++) { + network_->Enqueue(this); + } + dataready_cv_.wait(lock, [this]() { return dataready_ == 0; }); +} + +std::unique_ptr MakeDemuxingNetwork(const WeightsFile& weights, + const OptionsDict& options) { + return std::make_unique(weights, options); +} + +REGISTER_NETWORK("demux", MakeDemuxingNetwork, -1001) + +} // namespace +} // namespace lczero diff --git a/src/neural/network_rr.cc b/src/neural/network_rr.cc new file mode 100644 index 0000000000..5979b9b2c7 --- /dev/null +++ b/src/neural/network_rr.cc @@ -0,0 +1,82 @@ +/* + This file is part of Leela Chess Zero. + Copyright (C) 2018 The LCZero Authors + + Leela Chess is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Leela Chess is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Leela Chess. If not, see . + + Additional permission under GNU GPL version 3 section 7 + + If you modify this Program, or any covered work, by linking or + combining it with NVIDIA Corporation's libraries from the NVIDIA CUDA + Toolkit and the NVIDIA CUDA Deep Neural Network library (or a + modified version of those libraries), containing parts covered by the + terms of the respective license agreement, the licensors of this + Program grant you additional permission to convey the resulting work. +*/ + +#include "neural/factory.h" + +#include +#include +#include +#include "utils/exception.h" + +namespace lczero { +namespace { + +class RoundRobinNetwork : public Network { + public: + RoundRobinNetwork(const WeightsFile& weights, const OptionsDict& options) { + const auto parents = options.ListSubdicts(); + if (parents.empty()) { + // If options are empty, or multiplexer configured in root object, + // initialize on root object and default backend. + auto backends = NetworkFactory::Get()->GetBackendsList(); + AddBackend(backends[0], weights, options); + } + + for (const auto& name : parents) { + AddBackend(name, weights, options.GetSubdict(name)); + } + } + + void AddBackend(const std::string& name, const WeightsFile& weights, + const OptionsDict& opts) { + const std::string backend = opts.GetOrDefault("backend", name); + + networks_.emplace_back( + NetworkFactory::Get()->Create(backend, weights, opts)); + } + + std::unique_ptr NewComputation() override { + long long val = ++counter_; + return networks_[val % networks_.size()]->NewComputation(); + } + + ~RoundRobinNetwork() {} + + private: + std::vector> networks_; + std::atomic counter_; +}; + +std::unique_ptr MakeRoundRobinNetwork(const WeightsFile& weights, + const OptionsDict& options) { + return std::make_unique(weights, options); +} + +REGISTER_NETWORK("roundrobin", MakeRoundRobinNetwork, -999) + +} // namespace +} // namespace lczero diff --git a/src/selfplay/game.cc b/src/selfplay/game.cc index 2b64db9bf5..ae199ec023 100644 --- a/src/selfplay/game.cc +++ b/src/selfplay/game.cc @@ -38,11 +38,15 @@ const OptionId kReuseTreeId{"reuse-tree", "ReuseTree", const OptionId kResignPercentageId{ "resign-percentage", "ResignPercentage", "Resign when win percentage drops below specified value."}; +const OptionId kResignEarliestMoveId{"resign-earliest-move", + "ResignEarliestMove", + "Earliest move that resign is allowed."}; } // namespace void SelfPlayGame::PopulateUciParams(OptionsParser* options) { options->Add(kReuseTreeId) = false; options->Add(kResignPercentageId, 0.0f, 100.0f) = 0.0f; + options->Add(kResignEarliestMoveId, 0, 1000) = 0; } SelfPlayGame::SelfPlayGame(PlayerOptions player1, PlayerOptions player2, @@ -104,7 +108,9 @@ void SelfPlayGame::Play(int white_threads, int black_threads, bool training, float eval = search_->GetBestEval(); eval = (eval + 1) / 2; if (eval < min_eval_[idx]) min_eval_[idx] = eval; - if (enable_resign) { + int move_number = tree_[0]->GetPositionHistory().GetLength() / 2 + 1; + if (enable_resign && move_number >= options_[idx].uci_options->Get( + kResignEarliestMoveId.GetId())) { const float resignpct = options_[idx].uci_options->Get(kResignPercentageId.GetId()) / 100; diff --git a/src/selfplay/tournament.cc b/src/selfplay/tournament.cc index 81dee9f5c5..8e319cb4b4 100644 --- a/src/selfplay/tournament.cc +++ b/src/selfplay/tournament.cc @@ -86,6 +86,13 @@ void SelfPlayTournament::PopulateOptions(OptionsParser* options) { SelfPlayGame::PopulateUciParams(options); auto defaults = options->GetMutableDefaultsOptions(); defaults->Set(SearchParams::kMiniBatchSizeId.GetId(), 32); + defaults->Set(SearchParams::kCpuctId.GetId(), 1.2f); + defaults->Set(SearchParams::kCpuctFactorId.GetId(), 0.0f); + defaults->Set(SearchParams::kPolicySoftmaxTempId.GetId(), 1.0f); + defaults->Set(SearchParams::kMaxCollisionVisitsId.GetId(), 1); + defaults->Set(SearchParams::kMaxCollisionEventsId.GetId(), 1); + defaults->Set(SearchParams::kCacheHistoryLengthId.GetId(), 7); + defaults->Set(SearchParams::kOutOfOrderEvalId.GetId(), false); defaults->Set(SearchParams::kSmartPruningFactorId.GetId(), 0.0f); defaults->Set(SearchParams::kTemperatureId.GetId(), 1.0f); defaults->Set(SearchParams::kNoiseId.GetId(), true); diff --git a/src/utils/cache.h b/src/utils/cache.h index 2e2556e2d9..e820055c63 100644 --- a/src/utils/cache.h +++ b/src/utils/cache.h @@ -57,11 +57,10 @@ class LruCache { } // Inserts the element under key @key with value @val. - // If the element is pinned, old value is still kept (until fully unpinned), - // but new lookups will return updated value. - // If @pinned, pins inserted element, Unpin has to be called to unpin. - // In any case, puts element to front of the queue (makes it last to evict). - V* Insert(K key, std::unique_ptr val, bool pinned = false) { + // Puts element to front of the queue (makes it last to evict). + void Insert(K key, std::unique_ptr val) { + if (capacity_.load(std::memory_order_relaxed) == 0) return; + Mutex::Lock lock(mutex_); auto hash = hasher_(key) % hash_.size(); @@ -76,16 +75,17 @@ class LruCache { ShrinkToCapacity(capacity_ - 1); ++size_; ++allocated_; - Item* new_item = new Item(key, std::move(val), pinned ? 1 : 0); + Item* new_item = new Item(key, std::move(val)); new_item->next_in_hash = hash_head; hash_head = new_item; InsertIntoLru(new_item); - return new_item->value.get(); } // Checks whether a key exists. Doesn't lock. Of course the next moment the // key may be evicted. bool ContainsKey(K key) { + if (capacity_.load(std::memory_order_relaxed) == 0) return false; + Mutex::Lock lock(mutex_); auto hash = hasher_(key) % hash_.size(); for (Item* iter = hash_[hash]; iter; iter = iter->next_in_hash) { @@ -99,6 +99,8 @@ class LruCache { // evict); furthermore, a call to Unpin must be made for each such element. // Use of LruCacheLock is recommended to automate this pin management. V* LookupAndPin(K key) { + if (capacity_.load(std::memory_order_relaxed) == 0) return nullptr; + Mutex::Lock lock(mutex_); auto hash = hasher_(key) % hash_.size(); @@ -149,9 +151,9 @@ class LruCache { void SetCapacity(int capacity) { Mutex::Lock lock(mutex_); - if (capacity_ == capacity) return; + if (capacity_.load(std::memory_order_relaxed) == capacity) return; ShrinkToCapacity(capacity); - capacity_ = capacity; + capacity_.store(capacity); std::vector new_hash( static_cast(capacity * kLoadFactor + 1)); @@ -179,16 +181,15 @@ class LruCache { Mutex::Lock lock(mutex_); return size_; } - int GetCapacity() const { - Mutex::Lock lock(mutex_); - return capacity_; + int GetCapacity() const { + return capacity_.load(std::memory_order_relaxed); } static constexpr size_t GetItemStructSize() { return sizeof(Item); } private: struct Item { - Item(K key, std::unique_ptr value, int pins) - : key(key), value(std::move(value)), pins(pins) {} + Item(K key, std::unique_ptr value) + : key(key), value(std::move(value)) {} K key; std::unique_ptr value; int pins = 0; @@ -268,7 +269,7 @@ class LruCache { } // Fresh in front, stale on back. - int capacity_ GUARDED_BY(mutex_); + std::atomic capacity_; int size_ GUARDED_BY(mutex_) = 0; int allocated_ GUARDED_BY(mutex_) = 0; Item* lru_head_ GUARDED_BY(mutex_) = nullptr; // Newest elements. diff --git a/src/utils/fastmath.h b/src/utils/fastmath.h index 9f182e423a..ba0855a5eb 100644 --- a/src/utils/fastmath.h +++ b/src/utils/fastmath.h @@ -36,9 +36,9 @@ namespace lczero { // The approximation used here is log2(2^N*(1+f)) ~ N+f*(1.342671-0.342671*f) // where N is the integer and f the fractional part, f>=0. inline float FastLog2(const float a) { - int32_t tmp; + uint32_t tmp; std::memcpy(&tmp, &a, sizeof(float)); - int expb = (tmp >> 23); + uint32_t expb = tmp >> 23; tmp = (tmp & 0x7fffff) | (0x7f << 23); float out; std::memcpy(&out, &tmp, sizeof(float)); @@ -50,12 +50,12 @@ inline float FastLog2(const float a) { // where N is the integer and f the fractional part, f>=0. inline float FastPow2(const float a) { if (a < -126) return 0.0; - int exp = floor(a); + int32_t exp = floor(a); float out = a - exp; out = 1.0f + out * (0.656366f + 0.343634f * out); int32_t tmp; std::memcpy(&tmp, &out, sizeof(float)); - tmp += exp << 23; + tmp += static_cast(static_cast(exp) << 23); std::memcpy(&out, &tmp, sizeof(float)); return out; } diff --git a/src/version.inc b/src/version.inc index 02885e82d8..5662d344ef 100644 --- a/src/version.inc +++ b/src/version.inc @@ -1,4 +1,4 @@ #define LC0_VERSION_MAJOR 0 -#define LC0_VERSION_MINOR 20 +#define LC0_VERSION_MINOR 21 #define LC0_VERSION_PATCH 0 #define LC0_VERSION_POSTFIX "dev" diff --git a/subprojects/protobuf-3.6.0.wrap b/subprojects/protobuf-3.6.0.wrap new file mode 100644 index 0000000000..5a10b720b8 --- /dev/null +++ b/subprojects/protobuf-3.6.0.wrap @@ -0,0 +1,10 @@ +[wrap-file] +directory = protobuf-3.6.0 + +source_url = https://github.com/protocolbuffers/protobuf/releases/download/v3.6.0/protobuf-all-3.6.0.tar.gz +source_filename = protobuf-all-3.6.0.tar.gz +source_hash = 1532154addf85080330fdd037949d4653dfce16550df5c70ea0cd212d8aff3af + +patch_url = https://github.com/borg323/protobuf/releases/download/3.6.0/protobuf-3.6.0-wrap.zip +patch_filename = protobuf-3.6.0-wrap.zip +patch_hash = a14730d2e3702c4a0d7b3f05a380ec6b2c0b138a5b00539705b5c3a8df9885e3 diff --git a/tensorflow.md b/tensorflow.md new file mode 100644 index 0000000000..d5dd9831f3 --- /dev/null +++ b/tensorflow.md @@ -0,0 +1,11 @@ +To build with tensorflow under linux you need to install Tensorflow_cc from +. Either release v1.9.0 or v1.12.0. +Tensorflow_cc requires a specific version of protobuf, which constrains the +build. Release v1.9.0 works out of the box, since the default protobuf +subproject (v3.5.1) is compatible and is used instead of a system installed +version. In contrast release v1.12.0 needs protobuf v3.6.0 which can be built +by adding `-Dprotobuf-3-6-0=true` to the build command line. Note that this +protobuf version has issues with static builds and crashes so is not +recommended for normal use. The crashes look very similar to: +* +*