diff --git a/appveyor.yml b/appveyor.yml index a1a7b91103..ac296701a4 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -5,19 +5,19 @@ image: - Visual Studio 2017 environment: matrix: - - NAME: cuda - - NAME: opencl - - NAME: blas + - NAME: gpu-nvidia-cuda + - NAME: gpu-opencl + - NAME: cpu-openblas clone_folder: c:\projects\lc0 install: - cmd: set CUDA=false - cmd: set OPENCL=false - cmd: set BLAS=false - cmd: set GTEST=false -- cmd: IF %NAME%==cuda set CUDA=true -- cmd: IF %NAME%==opencl set OPENCL=true -- cmd: IF %NAME%==blas set BLAS=true -- cmd: IF %NAME%==blas set GTEST=true +- cmd: IF %NAME%==gpu-nvidia-cuda set CUDA=true +- cmd: IF %NAME%==gpu-opencl set OPENCL=true +- cmd: IF %NAME%==cpu-openblas set BLAS=true +- cmd: IF %NAME%==cpu-openblas set GTEST=true - cmd: IF %BLAS%==true IF NOT EXIST C:\cache\OpenBLAS appveyor DownloadFile https://sjeng.org/ftp/OpenBLAS-0.3.3-win-oldthread.zip - cmd: IF %BLAS%==true IF NOT EXIST C:\cache\OpenBLAS 7z x OpenBLAS-0.3.3-win-oldthread.zip -oC:\cache\OpenBLAS - cmd: IF %OPENCL%==true nuget install opencl-nug -Version 0.777.77 -OutputDirectory C:\cache diff --git a/build-cl.cmd b/build-cl.cmd index 762c2c8ac4..785a439629 100644 --- a/build-cl.cmd +++ b/build-cl.cmd @@ -6,7 +6,8 @@ set MSBuild="C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\MSBui rem call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" amd64 call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvarsall.bat" amd64 -meson.py build --backend vs2017 --buildtype release ^ +rem change to '-Dblas=true' to also build the blas backend with mkl +meson build --backend vs2017 --buildtype release -Dblas=false ^ -Dmkl_include="C:\Program Files (x86)\IntelSWTools\compilers_and_libraries\windows\mkl\include" ^ -Dmkl_libdirs="C:\Program Files (x86)\IntelSWTools\compilers_and_libraries\windows\mkl\lib\intel64" ^ -Dopencl_libdirs="C:\Program Files (x86)\AMD APP SDK\3.0\lib\x86_64" ^ @@ -28,4 +29,3 @@ cd build %MSBuild% /p:Configuration=Release /p:Platform=x64 ^ /p:PreferredToolArchitecture=x64 lc0@exe.vcxproj ^ /filelogger - diff --git a/meson.build b/meson.build index b75c9a241e..651e0d4979 100644 --- a/meson.build +++ b/meson.build @@ -66,20 +66,48 @@ endif gen = generator(protoc, output: ['@BASENAME@.pb.cc', '@BASENAME@.pb.h'], arguments : ['--proto_path=@CURRENT_SOURCE_DIR@/libs/lczero-common', '--cpp_out=@BUILD_DIR@', '@INPUT@']) +# Handle submodules. +git = find_program('git', required: false) if run_command('checkdir.py', 'libs/lczero-common/proto').returncode() != 0 - if run_command('git', 'status').returncode() == 0 - message('updating git submodule libs/lczero-common') - run_command('git', 'submodule', 'update', '--init', '--recursive') + if git.found() + if run_command(git, 'status').returncode() == 0 + message('updating git submodule libs/lczero-common') + run_command(git, 'submodule', 'update', '--init', '--recursive') + else + message('cloning lczero-common.git into libs/lczero-common') + run_command(git, 'clone', '--depth=1', + 'https://github.com/LeelaChessZero/lczero-common.git', + 'libs/lczero-common/') + endif else - message('cloning lczero-common.git into libs/lczero-common') - run_command('git', 'clone', '--depth=1', - 'https://github.com/LeelaChessZero/lczero-common.git', - 'libs/lczero-common/') + error('Please install git to automatically fetch submodules or download the archives manually from GitHub.') endif endif files += gen.process('libs/lczero-common/proto/net.proto', preserve_path_from : meson.current_source_dir() + '/libs/lczero-common/') + +# Extract git short revision. +short_rev = 'unknown' +if git.found() + r = run_command(git, 'rev-parse', '--short', 'HEAD') + if r.returncode() == 0 + # Now let's check if the working directory is clean. + if run_command(git, 'diff-index', '--quiet', 'HEAD').returncode() == 0 + short_rev = r.stdout().strip() + else + short_rev = 'dirty' + warning('Cannot extract valid git short revision from dirty working directory.') + endif + else + warning('Failed to parse short revision. Use git clone instead of downloading the archive from GitHub.') + endif +endif + +# Construct build identifier. +build_identifier = 'git.' + short_rev +add_project_arguments('-DBUILD_IDENTIFIER="' + build_identifier + '"', language : 'cpp') +message('Using build identifier "' + build_identifier + '".') ############################################################################# ## Main files @@ -207,7 +235,6 @@ if get_option('build_backends') has_blas = true elif get_option('accelerate') and accelerate_lib.found() - includes += include_directories('/System/Library/Frameworks/Accelerate.framework/Frameworks/vecLib.framework/Headers') deps += [ accelerate_lib ] has_blas = true @@ -311,7 +338,7 @@ if get_option('build_backends') deps += [ opencl_framework ] has_opencl = true - elif opencl_lib.found() + elif opencl_lib.found() and cc.has_header('CL/opencl.h', args: '-I' + get_option('opencl_include')) deps += [ opencl_lib ] has_opencl = true @@ -332,7 +359,9 @@ if get_option('build_backends') 'src/neural/shared/winograd_filter.cc', ] - includes += include_directories(get_option('opencl_include')) + if not opencl_framework.found() + includes += include_directories(get_option('opencl_include')) + endif files += opencl_files has_backends = true diff --git a/meson_options.txt b/meson_options.txt index 872e2e8a6d..0c783209e4 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -14,9 +14,9 @@ option('openblas_include', description: 'Paths to openblas include directories') option('opencl_include', - type: 'array', - value: ['/usr/include/'], - description: 'Paths to OpenCL include directories') + type: 'string', + value: '/usr/include/', + description: 'Path to OpenCL include directory') option('tensorflow_libdir', type: 'array', diff --git a/src/chess/board.cc b/src/chess/board.cc index 3c0b6383c8..21f5b9b7a3 100644 --- a/src/chess/board.cc +++ b/src/chess/board.cc @@ -982,16 +982,28 @@ void ChessBoard::SetFromFen(const std::string& fen, int* no_capture_ply, for (char c : castlings) { switch (c) { case 'K': - castlings_.set_we_can_00(); + if (our_king_.as_string() == "e1" && our_pieces_.get(0, 7) && + rooks_.get(0, 7)) { + castlings_.set_we_can_00(); + } break; case 'k': - castlings_.set_they_can_00(); + if (their_king_.as_string() == "e8" && their_pieces_.get(7, 7) && + rooks_.get(7, 7)) { + castlings_.set_they_can_00(); + } break; case 'Q': - castlings_.set_we_can_000(); + if (our_king_.as_string() == "e1" && our_pieces_.get(0, 0) && + rooks_.get(0, 0)) { + castlings_.set_we_can_000(); + } break; case 'q': - castlings_.set_they_can_000(); + if (their_king_.as_string() == "e8" && their_pieces_.get(7, 0) && + rooks_.get(7, 0)) { + castlings_.set_they_can_000(); + } break; default: throw Exception("Bad fen string: " + fen); diff --git a/src/mcts/node.h b/src/mcts/node.h index 24488fa5d7..280b44f8b6 100644 --- a/src/mcts/node.h +++ b/src/mcts/node.h @@ -37,6 +37,7 @@ #include "chess/position.h" #include "neural/encoder.h" #include "neural/writer.h" +#include "utils/fastmath.h" #include "utils/mutex.h" namespace lczero { @@ -336,8 +337,12 @@ class EdgeAndNode { Node* node() const { return node_; } // Proxy functions for easier access to node/edge. - float GetQ(float default_q) const { - return (node_ && node_->GetN() > 0) ? node_->GetQ() : default_q; + float GetQ(float default_q, bool logit_q = false) const { + return (node_ && node_->GetN() > 0) + ? + // Scale Q slightly to avoid logit(1) = infinity. + (logit_q ? FastLogit(0.9999999f * node_->GetQ()) : node_->GetQ()) + : default_q; } float GetD() const { return (node_ && node_->GetN() > 0) ? node_->GetD() : 0.0f; @@ -362,9 +367,9 @@ class EdgeAndNode { return numerator * GetP() / (1 + GetNStarted()); } - int GetVisitsToReachU(float target_score, float numerator, - float default_q) const { - const auto q = GetQ(default_q); + int GetVisitsToReachU(float target_score, float numerator, float default_q, + bool logit_q) const { + const auto q = GetQ(default_q, logit_q); if (q >= target_score) return std::numeric_limits::max(); const auto n1 = GetNStarted() + 1; return std::max( diff --git a/src/mcts/params.cc b/src/mcts/params.cc index 1abce86784..6b2522370c 100644 --- a/src/mcts/params.cc +++ b/src/mcts/params.cc @@ -49,6 +49,10 @@ const OptionId SearchParams::kMaxPrefetchBatchId{ "When the engine cannot gather a large enough batch for immediate use, try " "to prefetch up to X positions which are likely to be useful soon, and put " "them into cache."}; +const OptionId SearchParams::kLogitQId{ + "logit-q", "LogitQ", + "Apply logit to Q when determining Q+U best child. This makes the U term " + "less dominant when Q is near -1 or +1."}; const OptionId SearchParams::kCpuctId{ "cpuct", "CPuct", "cpuct_init constant from \"UCT search\" algorithm. Higher values promote " @@ -93,6 +97,15 @@ const OptionId SearchParams::kNoiseId{ "engine to discover new ideas during training by exploring moves which are " "known to be bad. Not normally used during play.", 'n'}; +const OptionId SearchParams::kNoiseEpsilonId{ + "noise-epsilon", "DirichletNoiseEpsilon", + "Amount of Dirichlet noise to combine with root priors. This allows the " + "engine to discover new ideas during training by exploring moves which are " + "known to be bad. Not normally used during play."}; +const OptionId SearchParams::kNoiseAlphaId{ + "noise-alpha", "DirichletNoiseAlpha", + "Alpha of Dirichlet noise to control the sharpness of move probabilities. " + "Larger values result in flatter / more evenly distributed values."}; const OptionId SearchParams::kVerboseStatsId{ "verbose-move-stats", "VerboseMoveStats", "Display Q, V, N, U and P values of every move candidate after each move."}; @@ -192,6 +205,7 @@ void SearchParams::Populate(OptionsParser* options) { // Many of them are overridden with training specific values in tournament.cc. options->Add(kMiniBatchSizeId, 1, 1024) = 256; options->Add(kMaxPrefetchBatchId, 0, 1024) = 32; + options->Add(kLogitQId) = false; options->Add(kCpuctId, 0.0f, 100.0f) = 3.0f; options->Add(kCpuctBaseId, 1.0f, 1000000000.0f) = 19652.0f; options->Add(kCpuctFactorId, 0.0f, 1000.0f) = 2.0f; @@ -203,6 +217,8 @@ void SearchParams::Populate(OptionsParser* options) { options->Add(kTemperatureVisitOffsetId, -1000.0f, 1000.0f) = 0.0f; options->Add(kNoiseId) = false; + options->Add(kNoiseEpsilonId, 0.0f, 1.0f) = 0.0f; + options->Add(kNoiseAlphaId, 0.0f, 10000000.0f) = 0.3f; options->Add(kVerboseStatsId) = false; options->Add(kLogLiveStatsId) = false; options->Add(kSmartPruningFactorId, 0.0f, 10.0f) = 1.33f; @@ -228,15 +244,21 @@ void SearchParams::Populate(OptionsParser* options) { options->Add(kKLDGainAverageInterval, 1, 10000000) = 100; options->Add(kMinimumKLDGainPerNode, 0.0f, 1.0f) = 0.0f; + options->HideOption(kNoiseEpsilonId); + options->HideOption(kNoiseAlphaId); options->HideOption(kLogLiveStatsId); } SearchParams::SearchParams(const OptionsDict& options) : options_(options), + kLogitQ(options.Get(kLogitQId.GetId())), kCpuct(options.Get(kCpuctId.GetId())), kCpuctBase(options.Get(kCpuctBaseId.GetId())), kCpuctFactor(options.Get(kCpuctFactorId.GetId())), - kNoise(options.Get(kNoiseId.GetId())), + kNoiseEpsilon(options.Get(kNoiseId.GetId()) + ? 0.25f + : options.Get(kNoiseEpsilonId.GetId())), + kNoiseAlpha(options.Get(kNoiseAlphaId.GetId())), kSmartPruningFactor(options.Get(kSmartPruningFactorId.GetId())), kFpuAbsolute(options.Get(kFpuStrategyId.GetId()) == "absolute"), diff --git a/src/mcts/params.h b/src/mcts/params.h index 14f1189435..b5b448d4e4 100644 --- a/src/mcts/params.h +++ b/src/mcts/params.h @@ -48,6 +48,7 @@ class SearchParams { int GetMaxPrefetchBatch() const { return options_.Get(kMaxPrefetchBatchId.GetId()); } + bool GetLogitQ() const { return kLogitQ; } float GetCpuct() const { return kCpuct; } float GetCpuctBase() const { return kCpuctBase; } float GetCpuctFactor() const { return kCpuctFactor; } @@ -70,7 +71,8 @@ class SearchParams { return options_.Get(kTemperatureWinpctCutoffId.GetId()); } - bool GetNoise() const { return kNoise; } + float GetNoiseEpsilon() const { return kNoiseEpsilon; } + float GetNoiseAlpha() const { return kNoiseAlpha; } bool GetVerboseStats() const { return options_.Get(kVerboseStatsId.GetId()); } @@ -102,6 +104,7 @@ class SearchParams { // Search parameter IDs. static const OptionId kMiniBatchSizeId; static const OptionId kMaxPrefetchBatchId; + static const OptionId kLogitQId; static const OptionId kCpuctId; static const OptionId kCpuctBaseId; static const OptionId kCpuctFactorId; @@ -112,6 +115,8 @@ class SearchParams { static const OptionId kTemperatureWinpctCutoffId; static const OptionId kTemperatureVisitOffsetId; static const OptionId kNoiseId; + static const OptionId kNoiseEpsilonId; + static const OptionId kNoiseAlphaId; static const OptionId kVerboseStatsId; static const OptionId kLogLiveStatsId; static const OptionId kSmartPruningFactorId; @@ -140,10 +145,12 @@ class SearchParams { // 2. Parameter has to stay the say during the search. // TODO(crem) Some of those parameters can be converted to be dynamic after // trivial search optimiations. + const bool kLogitQ; const float kCpuct; const float kCpuctBase; const float kCpuctFactor; - const bool kNoise; + const float kNoiseEpsilon; + const float kNoiseAlpha; const float kSmartPruningFactor; const bool kFpuAbsolute; const float kFpuValue; diff --git a/src/mcts/search.cc b/src/mcts/search.cc index 31e70e30fd..4f721a205b 100644 --- a/src/mcts/search.cc +++ b/src/mcts/search.cc @@ -212,16 +212,19 @@ std::vector Search::GetVerboseStats(Node* node, const float fpu = GetFpu(params_, node, node == root_node_); const float cpuct = ComputeCpuct(params_, node->GetN()); const float U_coeff = - cpuct * std::sqrt(std::max(node->GetChildrenVisits(), 1u)); + cpuct * std::sqrt(std::max(node->GetChildrenVisits(), 1u)); + const bool logit_q = params_.GetLogitQ(); std::vector edges; for (const auto& edge : node->Edges()) edges.push_back(edge); std::sort( edges.begin(), edges.end(), - [&fpu, &U_coeff](EdgeAndNode a, EdgeAndNode b) { - return std::forward_as_tuple(a.GetN(), a.GetQ(fpu) + a.GetU(U_coeff)) < - std::forward_as_tuple(b.GetN(), b.GetQ(fpu) + b.GetU(U_coeff)); + [&fpu, &U_coeff, &logit_q](EdgeAndNode a, EdgeAndNode b) { + return std::forward_as_tuple( + a.GetN(), a.GetQ(fpu, logit_q) + a.GetU(U_coeff)) < + std::forward_as_tuple( + b.GetN(), b.GetQ(fpu, logit_q) + b.GetU(U_coeff)); }); std::vector infos; @@ -250,7 +253,8 @@ std::vector Search::GetVerboseStats(Node* node, << ") "; oss << "(Q+U: " << std::setw(8) << std::setprecision(5) - << edge.GetQ(fpu) + edge.GetU(U_coeff) << ") "; + << edge.GetQ(fpu, logit_q) + edge.GetU(U_coeff) + << ") "; oss << "(V: "; optional v; @@ -955,7 +959,7 @@ SearchWorker::NodeToProcess SearchWorker::PickNodeToExtend( } ++possible_moves; } - const float Q = child.GetQ(fpu); + const float Q = child.GetQ(fpu, params_.GetLogitQ()); const float score = child.GetU(puct_mult) + Q; if (score > best) { second_best = best; @@ -970,7 +974,8 @@ SearchWorker::NodeToProcess SearchWorker::PickNodeToExtend( if (second_best_edge) { int estimated_visits_to_change_best = - best_edge.GetVisitsToReachU(second_best, puct_mult, fpu); + best_edge.GetVisitsToReachU(second_best, puct_mult, fpu, + params_.GetLogitQ()); // Only cache for n-2 steps as the estimate created by GetVisitsToReachU // has potential rounding errors and some conservative logic that can push // it up to 2 away from the real value. @@ -982,7 +987,8 @@ SearchWorker::NodeToProcess SearchWorker::PickNodeToExtend( second_best_edge.Reset(); } - if (is_root_node && possible_moves <= 1 && !search_->limits_.infinite) { + if (is_root_node && possible_moves <= 1 && !search_->limits_.infinite && + params_.GetSmartPruningFactor()) { // If there is only one move theoretically possible within remaining time, // output it. Mutex::Lock counters_lock(search_->counters_mutex_); @@ -1262,8 +1268,9 @@ void SearchWorker::FetchSingleNodeResult(NodeToProcess* node_to_process, for (auto edge : node->Edges()) edge.edge()->SetP(edge.GetP() * scale); } // Add Dirichlet noise if enabled and at root. - if (params_.GetNoise() && node == search_->root_node_) { - ApplyDirichletNoise(node, 0.25, 0.3); + if (params_.GetNoiseEpsilon() && node == search_->root_node_) { + ApplyDirichletNoise(node, params_.GetNoiseEpsilon(), + params_.GetNoiseAlpha()); } } diff --git a/src/selfplay/tournament.cc b/src/selfplay/tournament.cc index c4b2ecd0c1..dea95279ea 100644 --- a/src/selfplay/tournament.cc +++ b/src/selfplay/tournament.cc @@ -97,12 +97,13 @@ void SelfPlayTournament::PopulateOptions(OptionsParser* options) { defaults->Set(SearchParams::kOutOfOrderEvalId.GetId(), false); defaults->Set(SearchParams::kSmartPruningFactorId.GetId(), 0.0f); defaults->Set(SearchParams::kTemperatureId.GetId(), 1.0f); - defaults->Set(SearchParams::kNoiseId.GetId(), true); + defaults->Set(SearchParams::kNoiseEpsilonId.GetId(), 0.25f); defaults->Set(SearchParams::kFpuValueId.GetId(), 0.0f); defaults->Set(SearchParams::kHistoryFillId.GetId(), "no"); defaults->Set(NetworkFactory::kBackendId.GetId(), "multiplexing"); defaults->Set(SearchParams::kStickyEndgamesId.GetId(), false); + defaults->Set(SearchParams::kLogitQId.GetId(), false); } SelfPlayTournament::SelfPlayTournament(const OptionsDict& options, diff --git a/src/utils/fastmath.h b/src/utils/fastmath.h index c81523e5c8..9d1e2cd3fe 100644 --- a/src/utils/fastmath.h +++ b/src/utils/fastmath.h @@ -29,12 +29,16 @@ #include + + namespace lczero { // These stunts are performed by trained professionals, do not try this at home. // Fast approximate log2(x). Does no range checking. -// The approximation used here is log2(2^N*(1+f)) ~ N+f*(1.342671-0.342671*f) -// where N is the integer and f the fractional part, f>=0. +// The approximation used here is log2(2^N*(1+f)) ~ N+f*(1+k-k*f) where N is the +// exponent and f the fraction (mantissa), f>=0. The constant k is used to tune +// the approximation accuracy. In the final version some constants were slightly +// modified for better accuracy with 32 bit floating point math. inline float FastLog2(const float a) { uint32_t tmp; std::memcpy(&tmp, &a, sizeof(float)); @@ -42,17 +46,22 @@ inline float FastLog2(const float a) { tmp = (tmp & 0x7fffff) | (0x7f << 23); float out; std::memcpy(&out, &tmp, sizeof(float)); - return out * (2.028011f - 0.342671f * out) - 128.68534f + expb; + out -= 1.0f; + // Minimize max absolute error. + return out * (1.3465552f - 0.34655523f * out) - 127 + expb; } // Fast approximate 2^x. Does only limited range checking. -// The approximation used here is 2^(N+f) ~ 2^N*(1+f*(0.656366+0.343634*f)) -// where N is the integer and f the fractional part, f>=0. +// The approximation used here is 2^(N+f) ~ 2^N*(1+f*(1-k+k*f)) where N is the +// integer and f the fractional part, f>=0. The constant k is used to tune the +// approximation accuracy. In the final version some constants were slightly +// modified for better accuracy with 32 bit floating point math. inline float FastPow2(const float a) { if (a < -126) return 0.0; int32_t exp = floor(a); float out = a - exp; - out = 1.0f + out * (0.656366f + 0.343634f * out); + // Minimize max relative error. + out = 1.0f + out * (0.6602339f + 0.33976606f * out); int32_t tmp; std::memcpy(&tmp, &out, sizeof(float)); tmp += static_cast(static_cast(exp) << 23); @@ -68,4 +77,9 @@ inline float FastLog(const float a) { // Fast approximate exp(x). Does only limited range checking. inline float FastExp(const float a) { return FastPow2(1.442695040f * a); } +// Fast logit for more readable code. +inline float FastLogit(const float a) { + return 0.5f * FastLog((1.0f + a) / (1.0f - a)); +} + } // namespace lczero diff --git a/src/version.cc b/src/version.cc index 9c4e2d0138..b460a5ab89 100644 --- a/src/version.cc +++ b/src/version.cc @@ -31,9 +31,10 @@ std::uint32_t GetVersionInt(int major, int minor, int patch) { } std::string GetVersionStr(int major, int minor, int patch, - const std::string& postfix) { + const std::string& postfix, + const std::string& build_id) { auto v = std::to_string(major) + "." + std::to_string(minor) + "." + std::to_string(patch); - if (postfix.empty()) return v; - return v + "-" + postfix; + if (postfix.empty()) return v + "+" + build_id; + return v + "-" + postfix + "+" + build_id; } diff --git a/src/version.h b/src/version.h index ee07ebb406..cc9eff3108 100644 --- a/src/version.h +++ b/src/version.h @@ -39,4 +39,5 @@ std::uint32_t GetVersionInt(int major = LC0_VERSION_MAJOR, std::string GetVersionStr(int major = LC0_VERSION_MAJOR, int minor = LC0_VERSION_MINOR, int patch = LC0_VERSION_PATCH, - const std::string& postfix = LC0_VERSION_POSTFIX); + const std::string& postfix = LC0_VERSION_POSTFIX, + const std::string& build_id = BUILD_IDENTIFIER);