diff --git a/appveyor.yml b/appveyor.yml
index a1a7b91103..ac296701a4 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -5,19 +5,19 @@ image:
 - Visual Studio 2017
 environment:
   matrix:
-  - NAME: cuda
-  - NAME: opencl
-  - NAME: blas
+  - NAME: gpu-nvidia-cuda
+  - NAME: gpu-opencl
+  - NAME: cpu-openblas
 clone_folder: c:\projects\lc0
 install:
 - cmd: set CUDA=false
 - cmd: set OPENCL=false
 - cmd: set BLAS=false
 - cmd: set GTEST=false
-- cmd: IF %NAME%==cuda set CUDA=true
-- cmd: IF %NAME%==opencl set OPENCL=true
-- cmd: IF %NAME%==blas set BLAS=true
-- cmd: IF %NAME%==blas set GTEST=true
+- cmd: IF %NAME%==gpu-nvidia-cuda set CUDA=true
+- cmd: IF %NAME%==gpu-opencl set OPENCL=true
+- cmd: IF %NAME%==cpu-openblas set BLAS=true
+- cmd: IF %NAME%==cpu-openblas set GTEST=true
 - cmd: IF %BLAS%==true IF NOT EXIST C:\cache\OpenBLAS appveyor DownloadFile https://sjeng.org/ftp/OpenBLAS-0.3.3-win-oldthread.zip
 - cmd: IF %BLAS%==true IF NOT EXIST C:\cache\OpenBLAS 7z x OpenBLAS-0.3.3-win-oldthread.zip -oC:\cache\OpenBLAS
 - cmd: IF %OPENCL%==true nuget install opencl-nug -Version 0.777.77 -OutputDirectory C:\cache
diff --git a/build-cl.cmd b/build-cl.cmd
index 762c2c8ac4..785a439629 100644
--- a/build-cl.cmd
+++ b/build-cl.cmd
@@ -6,7 +6,8 @@ set MSBuild="C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\MSBui
 rem call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" amd64
 call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvarsall.bat" amd64
 
-meson.py build --backend vs2017 --buildtype release ^
+rem change to '-Dblas=true' to also build the blas backend with mkl
+meson build --backend vs2017 --buildtype release -Dblas=false ^
 -Dmkl_include="C:\Program Files (x86)\IntelSWTools\compilers_and_libraries\windows\mkl\include" ^
 -Dmkl_libdirs="C:\Program Files (x86)\IntelSWTools\compilers_and_libraries\windows\mkl\lib\intel64" ^
 -Dopencl_libdirs="C:\Program Files (x86)\AMD APP SDK\3.0\lib\x86_64" ^
@@ -28,4 +29,3 @@ cd build
 %MSBuild% /p:Configuration=Release /p:Platform=x64 ^
 /p:PreferredToolArchitecture=x64 lc0@exe.vcxproj ^
 /filelogger
-
diff --git a/meson.build b/meson.build
index b75c9a241e..651e0d4979 100644
--- a/meson.build
+++ b/meson.build
@@ -66,20 +66,48 @@ endif
 gen = generator(protoc, output: ['@BASENAME@.pb.cc', '@BASENAME@.pb.h'],
   arguments : ['--proto_path=@CURRENT_SOURCE_DIR@/libs/lczero-common', '--cpp_out=@BUILD_DIR@', '@INPUT@'])
 
+# Handle submodules.
+git = find_program('git', required: false)
 if run_command('checkdir.py', 'libs/lczero-common/proto').returncode() != 0
-  if run_command('git', 'status').returncode() == 0
-    message('updating git submodule libs/lczero-common')
-    run_command('git', 'submodule', 'update', '--init', '--recursive')
+  if git.found()
+    if run_command(git, 'status').returncode() == 0
+      message('updating git submodule libs/lczero-common')
+      run_command(git, 'submodule', 'update', '--init', '--recursive')
+    else
+      message('cloning lczero-common.git into libs/lczero-common')
+      run_command(git, 'clone', '--depth=1',
+                  'https://github.com/LeelaChessZero/lczero-common.git',
+                  'libs/lczero-common/')
+    endif
   else
-    message('cloning lczero-common.git into libs/lczero-common')
-    run_command('git', 'clone', '--depth=1',
-                'https://github.com/LeelaChessZero/lczero-common.git',
-                'libs/lczero-common/')
+    error('Please install git to automatically fetch submodules or download the archives manually from GitHub.')
   endif
 endif
 
 files += gen.process('libs/lczero-common/proto/net.proto',
   preserve_path_from : meson.current_source_dir() + '/libs/lczero-common/')
+  
+# Extract git short revision.
+short_rev = 'unknown'
+if git.found()
+  r = run_command(git, 'rev-parse', '--short', 'HEAD')
+  if r.returncode() == 0
+    # Now let's check if the working directory is clean.
+    if run_command(git, 'diff-index', '--quiet', 'HEAD').returncode() == 0
+      short_rev = r.stdout().strip()
+    else
+      short_rev = 'dirty'
+      warning('Cannot extract valid git short revision from dirty working directory.')
+    endif
+  else
+    warning('Failed to parse short revision. Use git clone instead of downloading the archive from GitHub.')
+  endif
+endif
+
+# Construct build identifier.
+build_identifier = 'git.' + short_rev
+add_project_arguments('-DBUILD_IDENTIFIER="' + build_identifier + '"', language : 'cpp')
+message('Using build identifier "' + build_identifier + '".')
 
 #############################################################################
 ## Main files
@@ -207,7 +235,6 @@ if get_option('build_backends')
       has_blas = true
 
     elif get_option('accelerate') and accelerate_lib.found()
-      includes += include_directories('/System/Library/Frameworks/Accelerate.framework/Frameworks/vecLib.framework/Headers')
       deps += [ accelerate_lib ]
       has_blas = true
 
@@ -311,7 +338,7 @@ if get_option('build_backends')
       deps += [ opencl_framework ]
       has_opencl = true
 
-  elif opencl_lib.found()
+  elif opencl_lib.found() and cc.has_header('CL/opencl.h', args: '-I' + get_option('opencl_include'))
 
       deps += [ opencl_lib ]
       has_opencl = true
@@ -332,7 +359,9 @@ if get_option('build_backends')
     'src/neural/shared/winograd_filter.cc',
     ]
 
-    includes += include_directories(get_option('opencl_include'))
+    if not opencl_framework.found()
+      includes += include_directories(get_option('opencl_include'))
+    endif
     files += opencl_files
     has_backends = true
 
diff --git a/meson_options.txt b/meson_options.txt
index 872e2e8a6d..0c783209e4 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -14,9 +14,9 @@ option('openblas_include',
        description: 'Paths to openblas include directories')
 
 option('opencl_include',
-       type: 'array',
-       value: ['/usr/include/'],
-       description: 'Paths to OpenCL include directories')
+       type: 'string',
+       value: '/usr/include/',
+       description: 'Path to OpenCL include directory')
 
 option('tensorflow_libdir',
        type: 'array',
diff --git a/src/chess/board.cc b/src/chess/board.cc
index 3c0b6383c8..21f5b9b7a3 100644
--- a/src/chess/board.cc
+++ b/src/chess/board.cc
@@ -982,16 +982,28 @@ void ChessBoard::SetFromFen(const std::string& fen, int* no_capture_ply,
     for (char c : castlings) {
       switch (c) {
         case 'K':
-          castlings_.set_we_can_00();
+          if (our_king_.as_string() == "e1" && our_pieces_.get(0, 7) &&
+              rooks_.get(0, 7)) {
+            castlings_.set_we_can_00();
+          }
           break;
         case 'k':
-          castlings_.set_they_can_00();
+          if (their_king_.as_string() == "e8" && their_pieces_.get(7, 7) &&
+              rooks_.get(7, 7)) {
+            castlings_.set_they_can_00();
+          }
           break;
         case 'Q':
-          castlings_.set_we_can_000();
+          if (our_king_.as_string() == "e1" && our_pieces_.get(0, 0) &&
+              rooks_.get(0, 0)) {
+            castlings_.set_we_can_000();
+          }
           break;
         case 'q':
-          castlings_.set_they_can_000();
+          if (their_king_.as_string() == "e8" && their_pieces_.get(7, 0) &&
+              rooks_.get(7, 0)) {
+            castlings_.set_they_can_000();
+          }
           break;
         default:
           throw Exception("Bad fen string: " + fen);
diff --git a/src/mcts/node.h b/src/mcts/node.h
index 24488fa5d7..280b44f8b6 100644
--- a/src/mcts/node.h
+++ b/src/mcts/node.h
@@ -37,6 +37,7 @@
 #include "chess/position.h"
 #include "neural/encoder.h"
 #include "neural/writer.h"
+#include "utils/fastmath.h"
 #include "utils/mutex.h"
 
 namespace lczero {
@@ -336,8 +337,12 @@ class EdgeAndNode {
   Node* node() const { return node_; }
 
   // Proxy functions for easier access to node/edge.
-  float GetQ(float default_q) const {
-    return (node_ && node_->GetN() > 0) ? node_->GetQ() : default_q;
+  float GetQ(float default_q, bool logit_q = false) const {
+    return (node_ && node_->GetN() > 0)
+               ?
+               // Scale Q slightly to avoid logit(1) = infinity.
+               (logit_q ? FastLogit(0.9999999f * node_->GetQ()) : node_->GetQ())
+               : default_q;
   }
   float GetD() const {
     return (node_ && node_->GetN() > 0) ? node_->GetD() : 0.0f;
@@ -362,9 +367,9 @@ class EdgeAndNode {
     return numerator * GetP() / (1 + GetNStarted());
   }
 
-  int GetVisitsToReachU(float target_score, float numerator,
-                        float default_q) const {
-    const auto q = GetQ(default_q);
+  int GetVisitsToReachU(float target_score, float numerator, float default_q,
+                        bool logit_q) const {
+    const auto q = GetQ(default_q, logit_q);
     if (q >= target_score) return std::numeric_limits<int>::max();
     const auto n1 = GetNStarted() + 1;
     return std::max(
diff --git a/src/mcts/params.cc b/src/mcts/params.cc
index 1abce86784..6b2522370c 100644
--- a/src/mcts/params.cc
+++ b/src/mcts/params.cc
@@ -49,6 +49,10 @@ const OptionId SearchParams::kMaxPrefetchBatchId{
     "When the engine cannot gather a large enough batch for immediate use, try "
     "to prefetch up to X positions which are likely to be useful soon, and put "
     "them into cache."};
+const OptionId SearchParams::kLogitQId{
+    "logit-q", "LogitQ",
+    "Apply logit to Q when determining Q+U best child. This makes the U term "
+    "less dominant when Q is near -1 or +1."};
 const OptionId SearchParams::kCpuctId{
     "cpuct", "CPuct",
     "cpuct_init constant from \"UCT search\" algorithm. Higher values promote "
@@ -93,6 +97,15 @@ const OptionId SearchParams::kNoiseId{
     "engine to discover new ideas during training by exploring moves which are "
     "known to be bad. Not normally used during play.",
     'n'};
+const OptionId SearchParams::kNoiseEpsilonId{
+    "noise-epsilon", "DirichletNoiseEpsilon",
+    "Amount of Dirichlet noise to combine with root priors. This allows the "
+    "engine to discover new ideas during training by exploring moves which are "
+    "known to be bad. Not normally used during play."};
+const OptionId SearchParams::kNoiseAlphaId{
+    "noise-alpha", "DirichletNoiseAlpha",
+    "Alpha of Dirichlet noise to control the sharpness of move probabilities. "
+    "Larger values result in flatter / more evenly distributed values."};
 const OptionId SearchParams::kVerboseStatsId{
     "verbose-move-stats", "VerboseMoveStats",
     "Display Q, V, N, U and P values of every move candidate after each move."};
@@ -192,6 +205,7 @@ void SearchParams::Populate(OptionsParser* options) {
   // Many of them are overridden with training specific values in tournament.cc.
   options->Add<IntOption>(kMiniBatchSizeId, 1, 1024) = 256;
   options->Add<IntOption>(kMaxPrefetchBatchId, 0, 1024) = 32;
+  options->Add<BoolOption>(kLogitQId) = false;
   options->Add<FloatOption>(kCpuctId, 0.0f, 100.0f) = 3.0f;
   options->Add<FloatOption>(kCpuctBaseId, 1.0f, 1000000000.0f) = 19652.0f;
   options->Add<FloatOption>(kCpuctFactorId, 0.0f, 1000.0f) = 2.0f;
@@ -203,6 +217,8 @@ void SearchParams::Populate(OptionsParser* options) {
   options->Add<FloatOption>(kTemperatureVisitOffsetId, -1000.0f, 1000.0f) =
       0.0f;
   options->Add<BoolOption>(kNoiseId) = false;
+  options->Add<FloatOption>(kNoiseEpsilonId, 0.0f, 1.0f) = 0.0f;
+  options->Add<FloatOption>(kNoiseAlphaId, 0.0f, 10000000.0f) = 0.3f;
   options->Add<BoolOption>(kVerboseStatsId) = false;
   options->Add<BoolOption>(kLogLiveStatsId) = false;
   options->Add<FloatOption>(kSmartPruningFactorId, 0.0f, 10.0f) = 1.33f;
@@ -228,15 +244,21 @@ void SearchParams::Populate(OptionsParser* options) {
   options->Add<IntOption>(kKLDGainAverageInterval, 1, 10000000) = 100;
   options->Add<FloatOption>(kMinimumKLDGainPerNode, 0.0f, 1.0f) = 0.0f;
 
+  options->HideOption(kNoiseEpsilonId);
+  options->HideOption(kNoiseAlphaId);
   options->HideOption(kLogLiveStatsId);
 }
 
 SearchParams::SearchParams(const OptionsDict& options)
     : options_(options),
+      kLogitQ(options.Get<bool>(kLogitQId.GetId())),
       kCpuct(options.Get<float>(kCpuctId.GetId())),
       kCpuctBase(options.Get<float>(kCpuctBaseId.GetId())),
       kCpuctFactor(options.Get<float>(kCpuctFactorId.GetId())),
-      kNoise(options.Get<bool>(kNoiseId.GetId())),
+      kNoiseEpsilon(options.Get<bool>(kNoiseId.GetId())
+                        ? 0.25f
+                        : options.Get<float>(kNoiseEpsilonId.GetId())),
+      kNoiseAlpha(options.Get<float>(kNoiseAlphaId.GetId())),
       kSmartPruningFactor(options.Get<float>(kSmartPruningFactorId.GetId())),
       kFpuAbsolute(options.Get<std::string>(kFpuStrategyId.GetId()) ==
                    "absolute"),
diff --git a/src/mcts/params.h b/src/mcts/params.h
index 14f1189435..b5b448d4e4 100644
--- a/src/mcts/params.h
+++ b/src/mcts/params.h
@@ -48,6 +48,7 @@ class SearchParams {
   int GetMaxPrefetchBatch() const {
     return options_.Get<int>(kMaxPrefetchBatchId.GetId());
   }
+  bool GetLogitQ() const { return kLogitQ; }
   float GetCpuct() const { return kCpuct; }
   float GetCpuctBase() const { return kCpuctBase; }
   float GetCpuctFactor() const { return kCpuctFactor; }
@@ -70,7 +71,8 @@ class SearchParams {
     return options_.Get<float>(kTemperatureWinpctCutoffId.GetId());
   }
 
-  bool GetNoise() const { return kNoise; }
+  float GetNoiseEpsilon() const { return kNoiseEpsilon; }
+  float GetNoiseAlpha() const { return kNoiseAlpha; }
   bool GetVerboseStats() const {
     return options_.Get<bool>(kVerboseStatsId.GetId());
   }
@@ -102,6 +104,7 @@ class SearchParams {
   // Search parameter IDs.
   static const OptionId kMiniBatchSizeId;
   static const OptionId kMaxPrefetchBatchId;
+  static const OptionId kLogitQId;
   static const OptionId kCpuctId;
   static const OptionId kCpuctBaseId;
   static const OptionId kCpuctFactorId;
@@ -112,6 +115,8 @@ class SearchParams {
   static const OptionId kTemperatureWinpctCutoffId;
   static const OptionId kTemperatureVisitOffsetId;
   static const OptionId kNoiseId;
+  static const OptionId kNoiseEpsilonId;
+  static const OptionId kNoiseAlphaId;
   static const OptionId kVerboseStatsId;
   static const OptionId kLogLiveStatsId;
   static const OptionId kSmartPruningFactorId;
@@ -140,10 +145,12 @@ class SearchParams {
   // 2. Parameter has to stay the say during the search.
   // TODO(crem) Some of those parameters can be converted to be dynamic after
   //            trivial search optimiations.
+  const bool kLogitQ;
   const float kCpuct;
   const float kCpuctBase;
   const float kCpuctFactor;
-  const bool kNoise;
+  const float kNoiseEpsilon;
+  const float kNoiseAlpha;
   const float kSmartPruningFactor;
   const bool kFpuAbsolute;
   const float kFpuValue;
diff --git a/src/mcts/search.cc b/src/mcts/search.cc
index 31e70e30fd..4f721a205b 100644
--- a/src/mcts/search.cc
+++ b/src/mcts/search.cc
@@ -212,16 +212,19 @@ std::vector<std::string> Search::GetVerboseStats(Node* node,
   const float fpu = GetFpu(params_, node, node == root_node_);
   const float cpuct = ComputeCpuct(params_, node->GetN());
   const float U_coeff =
-      cpuct * std::sqrt(std::max(node->GetChildrenVisits(), 1u));
+    cpuct * std::sqrt(std::max(node->GetChildrenVisits(), 1u));
+  const bool logit_q = params_.GetLogitQ();
 
   std::vector<EdgeAndNode> edges;
   for (const auto& edge : node->Edges()) edges.push_back(edge);
 
   std::sort(
       edges.begin(), edges.end(),
-      [&fpu, &U_coeff](EdgeAndNode a, EdgeAndNode b) {
-        return std::forward_as_tuple(a.GetN(), a.GetQ(fpu) + a.GetU(U_coeff)) <
-               std::forward_as_tuple(b.GetN(), b.GetQ(fpu) + b.GetU(U_coeff));
+      [&fpu, &U_coeff, &logit_q](EdgeAndNode a, EdgeAndNode b) {
+        return std::forward_as_tuple(
+          a.GetN(), a.GetQ(fpu, logit_q) + a.GetU(U_coeff)) <
+          std::forward_as_tuple(
+          b.GetN(), b.GetQ(fpu, logit_q) + b.GetU(U_coeff));
       });
 
   std::vector<std::string> infos;
@@ -250,7 +253,8 @@ std::vector<std::string> Search::GetVerboseStats(Node* node,
         << ") ";
 
     oss << "(Q+U: " << std::setw(8) << std::setprecision(5)
-        << edge.GetQ(fpu) + edge.GetU(U_coeff) << ") ";
+        << edge.GetQ(fpu, logit_q) + edge.GetU(U_coeff)
+        << ") ";
 
     oss << "(V: ";
     optional<float> v;
@@ -955,7 +959,7 @@ SearchWorker::NodeToProcess SearchWorker::PickNodeToExtend(
         }
         ++possible_moves;
       }
-      const float Q = child.GetQ(fpu);
+      const float Q = child.GetQ(fpu, params_.GetLogitQ());
       const float score = child.GetU(puct_mult) + Q;
       if (score > best) {
         second_best = best;
@@ -970,7 +974,8 @@ SearchWorker::NodeToProcess SearchWorker::PickNodeToExtend(
 
     if (second_best_edge) {
       int estimated_visits_to_change_best =
-          best_edge.GetVisitsToReachU(second_best, puct_mult, fpu);
+          best_edge.GetVisitsToReachU(second_best, puct_mult, fpu,
+                                      params_.GetLogitQ());
       // Only cache for n-2 steps as the estimate created by GetVisitsToReachU
       // has potential rounding errors and some conservative logic that can push
       // it up to 2 away from the real value.
@@ -982,7 +987,8 @@ SearchWorker::NodeToProcess SearchWorker::PickNodeToExtend(
       second_best_edge.Reset();
     }
 
-    if (is_root_node && possible_moves <= 1 && !search_->limits_.infinite) {
+    if (is_root_node && possible_moves <= 1 && !search_->limits_.infinite &&
+        params_.GetSmartPruningFactor()) {
       // If there is only one move theoretically possible within remaining time,
       // output it.
       Mutex::Lock counters_lock(search_->counters_mutex_);
@@ -1262,8 +1268,9 @@ void SearchWorker::FetchSingleNodeResult(NodeToProcess* node_to_process,
     for (auto edge : node->Edges()) edge.edge()->SetP(edge.GetP() * scale);
   }
   // Add Dirichlet noise if enabled and at root.
-  if (params_.GetNoise() && node == search_->root_node_) {
-    ApplyDirichletNoise(node, 0.25, 0.3);
+  if (params_.GetNoiseEpsilon() && node == search_->root_node_) {
+    ApplyDirichletNoise(node, params_.GetNoiseEpsilon(),
+                        params_.GetNoiseAlpha());
   }
 }
 
diff --git a/src/selfplay/tournament.cc b/src/selfplay/tournament.cc
index c4b2ecd0c1..dea95279ea 100644
--- a/src/selfplay/tournament.cc
+++ b/src/selfplay/tournament.cc
@@ -97,12 +97,13 @@ void SelfPlayTournament::PopulateOptions(OptionsParser* options) {
   defaults->Set<bool>(SearchParams::kOutOfOrderEvalId.GetId(), false);
   defaults->Set<float>(SearchParams::kSmartPruningFactorId.GetId(), 0.0f);
   defaults->Set<float>(SearchParams::kTemperatureId.GetId(), 1.0f);
-  defaults->Set<bool>(SearchParams::kNoiseId.GetId(), true);
+  defaults->Set<float>(SearchParams::kNoiseEpsilonId.GetId(), 0.25f);
   defaults->Set<float>(SearchParams::kFpuValueId.GetId(), 0.0f);
   defaults->Set<std::string>(SearchParams::kHistoryFillId.GetId(), "no");
   defaults->Set<std::string>(NetworkFactory::kBackendId.GetId(),
                              "multiplexing");
   defaults->Set<bool>(SearchParams::kStickyEndgamesId.GetId(), false);
+  defaults->Set<bool>(SearchParams::kLogitQId.GetId(), false);
 }
 
 SelfPlayTournament::SelfPlayTournament(const OptionsDict& options,
diff --git a/src/utils/fastmath.h b/src/utils/fastmath.h
index c81523e5c8..9d1e2cd3fe 100644
--- a/src/utils/fastmath.h
+++ b/src/utils/fastmath.h
@@ -29,12 +29,16 @@
 
 #include <cstring>
 
+
+
 namespace lczero {
 // These stunts are performed by trained professionals, do not try this at home.
 
 // Fast approximate log2(x). Does no range checking.
-// The approximation used here is log2(2^N*(1+f)) ~ N+f*(1.342671-0.342671*f)
-// where N is the integer and f the fractional part, f>=0.
+// The approximation used here is log2(2^N*(1+f)) ~ N+f*(1+k-k*f) where N is the
+// exponent and f the fraction (mantissa), f>=0. The constant k is used to tune
+// the approximation accuracy. In the final version some constants were slightly
+// modified for better accuracy with 32 bit floating point math.
 inline float FastLog2(const float a) {
   uint32_t tmp;
   std::memcpy(&tmp, &a, sizeof(float));
@@ -42,17 +46,22 @@ inline float FastLog2(const float a) {
   tmp = (tmp & 0x7fffff) | (0x7f << 23);
   float out;
   std::memcpy(&out, &tmp, sizeof(float));
-  return out * (2.028011f - 0.342671f * out) - 128.68534f + expb;
+  out -= 1.0f;
+  // Minimize max absolute error.
+  return out * (1.3465552f - 0.34655523f * out) - 127 + expb;
 }
 
 // Fast approximate 2^x. Does only limited range checking.
-// The approximation used here is 2^(N+f) ~ 2^N*(1+f*(0.656366+0.343634*f))
-// where N is the integer and f the fractional part, f>=0.
+// The approximation used here is 2^(N+f) ~ 2^N*(1+f*(1-k+k*f)) where N is the
+// integer and f the fractional part, f>=0. The constant k is used to tune the
+// approximation accuracy. In the final version some constants were slightly
+// modified for better accuracy with 32 bit floating point math.
 inline float FastPow2(const float a) {
   if (a < -126) return 0.0;
   int32_t exp = floor(a);
   float out = a - exp;
-  out = 1.0f + out * (0.656366f + 0.343634f * out);
+  // Minimize max relative error.
+  out = 1.0f + out * (0.6602339f + 0.33976606f * out);
   int32_t tmp;
   std::memcpy(&tmp, &out, sizeof(float));
   tmp += static_cast<int32_t>(static_cast<uint32_t>(exp) << 23);
@@ -68,4 +77,9 @@ inline float FastLog(const float a) {
 // Fast approximate exp(x). Does only limited range checking.
 inline float FastExp(const float a) { return FastPow2(1.442695040f * a); }
 
+// Fast logit for more readable code.
+inline float FastLogit(const float a) {
+  return 0.5f * FastLog((1.0f + a) / (1.0f - a));
+}
+
 }  // namespace lczero
diff --git a/src/version.cc b/src/version.cc
index 9c4e2d0138..b460a5ab89 100644
--- a/src/version.cc
+++ b/src/version.cc
@@ -31,9 +31,10 @@ std::uint32_t GetVersionInt(int major, int minor, int patch) {
 }
 
 std::string GetVersionStr(int major, int minor, int patch,
-                          const std::string& postfix) {
+                          const std::string& postfix,
+                          const std::string& build_id) {
   auto v = std::to_string(major) + "." + std::to_string(minor) + "." +
            std::to_string(patch);
-  if (postfix.empty()) return v;
-  return v + "-" + postfix;
+  if (postfix.empty()) return v + "+" + build_id;
+  return v + "-" + postfix + "+" + build_id;
 }
diff --git a/src/version.h b/src/version.h
index ee07ebb406..cc9eff3108 100644
--- a/src/version.h
+++ b/src/version.h
@@ -39,4 +39,5 @@ std::uint32_t GetVersionInt(int major = LC0_VERSION_MAJOR,
 std::string GetVersionStr(int major = LC0_VERSION_MAJOR,
                           int minor = LC0_VERSION_MINOR,
                           int patch = LC0_VERSION_PATCH,
-                          const std::string& postfix = LC0_VERSION_POSTFIX);
+                          const std::string& postfix = LC0_VERSION_POSTFIX,
+                          const std::string& build_id = BUILD_IDENTIFIER);