Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merge commits from last week #3

Merged
merged 7 commits into from
Jun 1, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
674 changes: 674 additions & 0 deletions COPYING

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ if get_option('build_backends')
if has_blas

blas_files = [
'src/neural/transforms.cc',
'src/neural/CL/transforms.cc',
'src/neural/network_blas.cc'
]

Expand Down
84 changes: 57 additions & 27 deletions src/mcts/search.cc
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,9 @@ const char* Search::kFpuReductionStr = "First Play Urgency Reduction";
const char* Search::kCacheHistoryLengthStr =
"Length of history to include in cache";
const char* Search::kExtraVirtualLossStr = "Extra virtual loss";
const char* Search::KPolicySoftmaxTempStr = "Policy softmax temperature";
const char* Search::kPolicySoftmaxTempStr = "Policy softmax temperature";
const char* Search::kAllowedNodeCollisionsStr =
"Allowed node collisions, per batch";

namespace {
const int kSmartPruningToleranceNodes = 100;
Expand All @@ -70,7 +72,10 @@ void Search::PopulateUciParams(OptionsParser* options) {
"cache-history-length") = 7;
options->Add<FloatOption>(kExtraVirtualLossStr, 0.0, 100.0,
"extra-virtual-loss") = 0.0f;
options->Add<FloatOption>(KPolicySoftmaxTempStr, 0.1, 10.0, "policy-softmax-temp") = 1.0f;
options->Add<FloatOption>(kPolicySoftmaxTempStr, 0.1, 10.0,
"policy-softmax-temp") = 1.0f;
options->Add<IntOption>(kAllowedNodeCollisionsStr, 0, 1024,
"allowed-node-collisions") = 32;
}

Search::Search(const NodeTree& tree, Network* network,
Expand Down Expand Up @@ -98,7 +103,8 @@ Search::Search(const NodeTree& tree, Network* network,
kFpuReduction(options.Get<float>(kFpuReductionStr)),
kCacheHistoryLength(options.Get<int>(kCacheHistoryLengthStr)),
kExtraVirtualLoss(options.Get<float>(kExtraVirtualLossStr)),
KPolicySoftmaxTemp(options.Get<float>(KPolicySoftmaxTempStr)) {}
kPolicySoftmaxTemp(options.Get<float>(kPolicySoftmaxTempStr)),
kAllowedNodeCollisions(options.Get<int>(kAllowedNodeCollisionsStr)) {}

// Returns whether node was already in cache.
bool Search::AddNodeToCompute(Node* node, CachingComputation* computation,
Expand Down Expand Up @@ -160,22 +166,40 @@ void ApplyDirichletNoise(Node* node, float eps, double alpha) {
} // namespace

void Search::Worker() {
// Nodes to be extended/update counters.
std::vector<Node*> nodes_to_process;
// Nodes, for which collision happened. For those only n_in_flight have to
// be rolled back.
std::vector<Node*> node_collisions;
PositionHistory history(played_history_);

// Exit check is at the end of the loop as at least one iteration is
// necessary.
while (true) {
nodes_to_process.clear();
node_collisions.clear();
auto computation = CachingComputation(network_->NewComputation(), cache_);

// Gather nodes to process in the current batch.
for (int i = 0; i < kMiniBatchSize; ++i) {
while (static_cast<int>(nodes_to_process.size()) < kMiniBatchSize) {
// Initialize position sequence with pre-move position.
history.Trim(played_history_.GetLength());
// If there's something to do without touching slow neural net, do it.
if (i > 0 && computation.GetCacheMisses() == 0) break;
Node* node = PickNodeToExtend(root_node_, &history);
if (!nodes_to_process.empty() && computation.GetCacheMisses() == 0) break;
// Returns <Node, whether it's computable> pair. The node is not
// computable if there is collision.
auto node_and_computable = PickNodeToExtend(root_node_, &history);
Node* node = node_and_computable.first;
const bool computable = node_and_computable.second;

// If there is collision, add to a vector to undo the virtual loss later.
if (!computable) {
node_collisions.emplace_back(node);
if (static_cast<int>(node_collisions.size()) > kAllowedNodeCollisions)
break;
continue;
}

// If we hit the node that is already processed (by our batch or in
// another thread) stop gathering and process smaller batch.
if (!node) break;
Expand All @@ -194,6 +218,7 @@ void Search::Worker() {
}
}

// TODO(mooskagh) Remove prefetch into cache if node collisions work well.
// If there are requests to NN, but the batch is not full, try to prefetch
// nodes which are likely useful in future.
if (computation.GetCacheMisses() > 0 &&
Expand All @@ -219,8 +244,8 @@ void Search::Worker() {
for (Node* n : node->Children()) {
float p = computation.GetPVal(idx_in_computation,
n->GetMove().as_nn_index());
if(KPolicySoftmaxTemp != 1.0f){
p = pow(p, 1/KPolicySoftmaxTemp);
if (kPolicySoftmaxTemp != 1.0f) {
p = pow(p, 1 / kPolicySoftmaxTemp);
}
total += p;
n->SetP(p);
Expand Down Expand Up @@ -270,6 +295,14 @@ void Search::Worker() {
}
}
total_playouts_ += nodes_to_process.size();

// Remove virtual loss from node collisions.
for (Node* node : node_collisions) {
for (node = node->GetParent(); node != root_node_->GetParent();
node = node->GetParent()) {
node->CancelScoreUpdate();
}
}
}
UpdateRemainingMoves(); // Update remaining moves using smart pruning.
MaybeOutputInfo();
Expand Down Expand Up @@ -375,8 +408,8 @@ Node* GetBestChild(Node* parent) {
// * If that number is larger than 0, the one wil larger eval wins.
std::tuple<int, float, float> best(-1, 0.0, 0.0);
for (Node* node : parent->Children()) {
std::tuple<int, float, float> val(node->GetNStarted(),
node->GetQ(-10.0, 0.0), node->GetP());
std::tuple<int, float, float> val(node->GetN(), node->GetQ(-10.0, 0.0),
node->GetP());
if (val > best) {
best = val;
best_node = node;
Expand All @@ -391,7 +424,7 @@ Node* GetBestChildWithTemperature(Node* parent, float temperature) {
const float n_parent = parent->GetN();

for (Node* node : parent->Children()) {
sum += std::pow(node->GetNStarted() / n_parent, 1 / temperature);
sum += std::pow(node->GetN() / n_parent, 1 / temperature);
cumulative_sums.push_back(sum);
}

Expand Down Expand Up @@ -459,9 +492,8 @@ void Search::SendMovesStats() const {
for (Node* iter : root_node_->Children()) {
nodes.emplace_back(iter);
}
std::sort(nodes.begin(), nodes.end(), [](const Node* a, const Node* b) {
return a->GetNStarted() < b->GetNStarted();
});
std::sort(nodes.begin(), nodes.end(),
[](const Node* a, const Node* b) { return a->GetN() < b->GetN(); });

const bool is_black_to_move = played_history_.IsBlackToMove();
ThinkingInfo info;
Expand Down Expand Up @@ -610,12 +642,15 @@ void Search::ExtendNode(Node* node, const PositionHistory& history) {
for (const auto& move : legal_moves) node->CreateChild(move);
}

Node* Search::PickNodeToExtend(Node* node, PositionHistory* history) {
// Returns node and whether it should be processed.
// (false if it is a collision).
std::pair<Node*, bool> Search::PickNodeToExtend(Node* node,
PositionHistory* history) {
// Fetch the current best root node visits for possible smart pruning.
int best_node_n = 0;
{
SharedMutex::Lock lock(nodes_mutex_);
if (best_move_node_) best_node_n = best_move_node_->GetNStarted();
if (best_move_node_) best_node_n = best_move_node_->GetN();
}

// True on first iteration, false as we dive deeper.
Expand All @@ -624,17 +659,9 @@ Node* Search::PickNodeToExtend(Node* node, PositionHistory* history) {
{
SharedMutex::Lock lock(nodes_mutex_);
// Check whether we are in the leave.
if (!node->TryStartScoreUpdate()) {
// The node is currently being processed by another thread.
// Undo the increments of anschestor nodes, and return null.
for (node = node->GetParent(); node != root_node_->GetParent();
node = node->GetParent()) {
node->CancelScoreUpdate();
}
return nullptr;
}
if (!node->TryStartScoreUpdate()) return {node, false};
// Found leave, and we are the the first to visit it.
if (!node->HasChildren()) return node;
if (!node->HasChildren()) return {node, true};
}

// Now we are not in leave, we need to go deeper.
Expand All @@ -655,7 +682,8 @@ Node* Search::PickNodeToExtend(Node* node, PositionHistory* history) {
// To ensure we have at least one node to expand, always include
// current best node.
if (iter != best_move_node_ &&
remaining_playouts_ < best_node_n - iter->GetNStarted()) {
remaining_playouts_ <
best_node_n - static_cast<int>(iter->GetN())) {
continue;
}
++possible_moves;
Expand Down Expand Up @@ -709,10 +737,12 @@ std::pair<Move, Move> Search::GetBestMoveInternal() const
: GetBestChild(root_node_);

Move ponder_move;
/* // Doesn't seem to work for now, so disabling.
if (best_node->HasChildren()) {
ponder_move =
GetBestChild(best_node)->GetMove(!played_history_.IsBlackToMove());
}
*/
return {best_node->GetMove(played_history_.IsBlackToMove()), ponder_move};
}

Expand Down
8 changes: 5 additions & 3 deletions src/mcts/search.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,8 @@ class Search {
static const char* kFpuReductionStr;
static const char* kCacheHistoryLengthStr;
static const char* kExtraVirtualLossStr;
static const char* KPolicySoftmaxTempStr;
static const char* kPolicySoftmaxTempStr;
static const char* kAllowedNodeCollisionsStr;

private:
// Can run several copies of it in separate threads.
Expand All @@ -104,7 +105,7 @@ class Search {

void SendUciInfo(); // Requires nodes_mutex_ to be held.

Node* PickNodeToExtend(Node* node, PositionHistory* history);
std::pair<Node*, bool> PickNodeToExtend(Node* node, PositionHistory* history);
void ExtendNode(Node* node, const PositionHistory& history);

mutable Mutex counters_mutex_ ACQUIRED_AFTER(nodes_mutex_);
Expand Down Expand Up @@ -156,7 +157,8 @@ class Search {
const float kFpuReduction;
const bool kCacheHistoryLength;
const float kExtraVirtualLoss;
const float KPolicySoftmaxTemp;
const float kPolicySoftmaxTemp;
const int kAllowedNodeCollisions;
};

} // namespace lczero
29 changes: 14 additions & 15 deletions src/neural/transforms.cc → src/neural/CL/transforms.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@
namespace lczero {

std::vector<float> Transforms::ZeropadU(const std::vector<float>& U,
const int outputs, const int channels,
const int outputs_pad,
const int channels_pad) {
const int outputs, const int channels,
const int outputs_pad,
const int channels_pad) {
// Fill with zeroes
auto Upad = std::vector<float>(kWinogradTile * outputs_pad * channels_pad);

Expand All @@ -50,8 +50,8 @@ std::vector<float> Transforms::ZeropadU(const std::vector<float>& U,
}

std::vector<float> Transforms::WinogradTransformF(const std::vector<float>& f,
const int outputs,
const int channels) {
const int outputs,
const int channels) {
// F(2x2, 3x3) Winograd filter transformation
// transpose(G.dot(f).dot(G.transpose()))
// U matrix is transposed for better memory layout in SGEMM
Expand Down Expand Up @@ -89,7 +89,7 @@ std::vector<float> Transforms::WinogradTransformF(const std::vector<float>& f,
}

void Transforms::WinogradTransformIn(const std::vector<float>& in,
std::vector<float>& V, const int C) {
std::vector<float>& V, const int C) {
constexpr auto W = 8;
constexpr auto H = 8;
constexpr auto wtiles = (W + 1) / 2;
Expand Down Expand Up @@ -173,8 +173,8 @@ void Transforms::WinogradTransformIn(const std::vector<float>& in,
}

void Transforms::WinogradSgemm(const std::vector<float>& U,
std::vector<float>& V, std::vector<float>& M,
const int C, const int K) {
std::vector<float>& V, std::vector<float>& M,
const int C, const int K) {
constexpr auto P = 8 * 8 / kWinogradAlpha;

for (auto b = 0; b < kWinogradTile; b++) {
Expand All @@ -188,7 +188,7 @@ void Transforms::WinogradSgemm(const std::vector<float>& U,
}

void Transforms::WinogradTransformOut(const std::vector<float>& M,
std::vector<float>& Y, const int K) {
std::vector<float>& Y, const int K) {
constexpr auto W = 8;
constexpr auto H = 8;
constexpr auto wtiles = (W + 1) / 2;
Expand Down Expand Up @@ -247,11 +247,10 @@ void Transforms::WinogradTransformOut(const std::vector<float>& M,
}

void Transforms::WinogradConvolve3(const int outputs,
const std::vector<float>& input,
const std::vector<float>& U,
std::vector<float>& V,
std::vector<float>& M,
std::vector<float>& output) {
const std::vector<float>& input,
const std::vector<float>& U,
std::vector<float>& V, std::vector<float>& M,
std::vector<float>& output) {
constexpr unsigned int filter_len = kWinogradAlpha * kWinogradAlpha;
const auto input_channels = U.size() / (outputs * filter_len);

Expand Down Expand Up @@ -419,7 +418,7 @@ void Transforms::OffsetBatchNormMeans(std::vector<float>& bn_means,
// still have non-zero biases.
// Move biases to batchnorm means to make the output match without having
// to separately add the biases.
for (auto i = 0; i < bn_means.size(); i++) bn_means[i] -= biases[i];
for (size_t i = 0; i < bn_means.size(); i++) bn_means[i] -= biases[i];
}

void Transforms::InvertBatchNormStddev(std::vector<float>& weights) {
Expand Down
File renamed without changes.
23 changes: 7 additions & 16 deletions src/neural/network_blas.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@
along with Leela Chess. If not, see <http://www.gnu.org/licenses/>.
*/

#include "neural/network.h"
#include "neural/CL/transforms.h"
#include "neural/factory.h"
#include "neural/transforms.h"
#include "neural/network.h"

#include <algorithm>
#include <cassert>
Expand Down Expand Up @@ -84,17 +84,8 @@ class BlasComputation : public NetworkComputation {
constexpr int height = 8;
constexpr int tiles = width * height / 4;

/*
static constexpr int NUM_VALUE_INPUT_PLANES = 32;
static constexpr int NUM_POLICY_INPUT_PLANES = 32;
static constexpr int NUM_OUTPUT_POLICY = 1858;
static constexpr int NUM_VALUE_CHANNELS = 128;
*/

int NUM_VALUE_INPUT_PLANES = weights_.value.bn_means.size();
int NUM_POLICY_INPUT_PLANES = weights_.policy.bn_means.size();
int NUM_OUTPUT_POLICY = weights_.ip_pol_b.size();
int NUM_VALUE_CHANNELS = weights_.ip1_val_b.size();

static constexpr auto kWinogradAlpha = 4;
static constexpr auto kWinogradTile = kWinogradAlpha * kWinogradAlpha;
Expand All @@ -116,7 +107,7 @@ class BlasComputation : public NetworkComputation {
std::vector<float> value_data(NUM_VALUE_INPUT_PLANES * width * height);

Transforms::WinogradConvolve3(output_channels, input,
weights_.input.weights, V, M, conv_out);
weights_.input.weights, V, M, conv_out);
Transforms::Batchnorm<64>(output_channels, conv_out,
weights_.input.bn_means.data(),
weights_.input.bn_stddivs.data());
Expand All @@ -132,15 +123,15 @@ class BlasComputation : public NetworkComputation {
std::copy(begin(conv_in), end(conv_in), begin(res));

Transforms::WinogradConvolve3(output_channels, conv_in, conv1.weights, V,
M, conv_out);
M, conv_out);
Transforms::Batchnorm<64>(output_channels, conv_out,
conv1.bn_means.data(), conv1.bn_stddivs.data());

auto& conv2 = residual.conv2;
output_channels = conv2.biases.size();
std::swap(conv_out, conv_in);
Transforms::WinogradConvolve3(output_channels, conv_in, conv2.weights, V,
M, conv_out);
M, conv_out);
Transforms::Batchnorm<64>(output_channels, conv_out,
conv2.bn_means.data(), conv2.bn_stddivs.data(),
res.data());
Expand Down Expand Up @@ -214,7 +205,7 @@ class BlasNetwork : public Network {
Transforms::InvertBatchNormStddev(input_batchnorm_stddivs);

// residual blocks
for (auto i = 0; i < residual_blocks; i++) {
for (size_t i = 0; i < residual_blocks; i++) {
auto& residual = weights_.residual[i];
auto& conv1 = residual.conv1;
auto& conv2 = residual.conv2;
Expand Down Expand Up @@ -275,4 +266,4 @@ class BlasNetwork : public Network {

REGISTER_NETWORK("blas", BlasNetwork, 50)

} // namespace lc0
} // namespace lczero
Loading