Skip to content

Commit

Permalink
Refine paddle.manual_seed (#26496)
Browse files Browse the repository at this point in the history
* refine manual seed

* fix ci problem

* fix unittests

* fix unittest

* set is_init_py=false in manual_seed

* fix unittest

* fix bernoulli_op

* fix(unittest): change random_seed to manual_seed

* 🐞fix(unittest): fix manual_seed

* trigger ci

* fix test_sentiment

* fix test_imperative_save_load

* fix test_uniform_random_op

* fix test_uniform_random_op

* fix test_jit_save_load

* merge develop

* fix manual_seed

* fix manual_seed

* use global engine

* use shared_ptr

* fix double free

* fix bug

* fix bug

* fix bug

* fix test bug

* fix test bug

* fix test bug

* fix ci
  • Loading branch information
zhiqiu authored Aug 28, 2020
1 parent 31f422a commit 844583c
Show file tree
Hide file tree
Showing 78 changed files with 700 additions and 646 deletions.
107 changes: 81 additions & 26 deletions paddle/fluid/framework/generator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,67 +12,122 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/fluid/framework/generator.h"

#include <glog/logging.h>

#include <deque>
#include <memory>
#include <unordered_map>
#include <unordered_set>
#include <utility>

#include "paddle/fluid/framework/generator.h"

namespace paddle {
namespace framework {

std::shared_ptr<Generator> Generator::gen_instance_ = NULL;
const std::shared_ptr<Generator>& DefaultCPUGenerator() {
static auto default_cpu_generator =
std::make_shared<Generator>(GetRandomSeed());
VLOG(4) << "initial seed: " << default_cpu_generator->GetCurrentSeed()
<< ", cpu engine: " << default_cpu_generator->GetCPUEngine().get();
return default_cpu_generator;
}

std::shared_ptr<std::mt19937_64> OpDefaultCPUEngine() {
static auto op_default_cpu_engine = std::make_shared<std::mt19937_64>();
return op_default_cpu_engine;
}

// NOTE(zhiqiu): there are 3 conditions:
// (1) op seed is not set and DefaultCPUGenerator is inited, use
// DefaultCPUGenerator
// (2) op seed is not set and DefaultCPUGenerator is not inited, use se
// OpDefaultCPUEngine() and set a radnom seed
// (3) op seed is set, use OpDefaultCPUEngine() and set the seed
std::shared_ptr<std::mt19937_64> GetCPURandomEngine(uint64_t seed) {
if (DefaultCPUGenerator()->GetIsInitPy() && seed == 0) {
VLOG(4) << "Use random engine from generator";
return DefaultCPUGenerator()->GetCPUEngine();
} else {
// NOTE(zhiqiu): creating an engine instance everytime instead of using
// OpDefaultCPUEngine(), this is the legacy behavior of random operators.
// The benefit is that when runing PE with fixed-seed in multiple thrads,
// each thread has their own engine, and doesn't affect each other.
//
// And we need to measure the determinacy of Generator in PE.
auto engine = std::make_shared<std::mt19937_64>();
if (seed == 0) {
seed = GetRandomSeed();
VLOG(4) << "Use default random engine with random seed = " << seed;
} else {
VLOG(4) << "Use default random engine with fixed random seed = " << seed;
}
static std::mutex mu_;
{
std::lock_guard<std::mutex> lock(mu_);
engine->seed(seed);
}
return engine;
}
}

GeneratorState* Generator::GetState() {
std::lock_guard<std::mutex> lock(this->mutex);
return this->state_.get();
GeneratorState Generator::GetState() {
std::lock_guard<std::mutex> lock(this->mu_);
state_.cpu_engine = *engine_;
return this->state_;
}

void Generator::SetState(GeneratorState* state_in) {
std::lock_guard<std::mutex> lock(this->mutex);
*this->state_ = *state_in;
void Generator::SetState(const GeneratorState& state) {
std::lock_guard<std::mutex> lock(this->mu_);
this->state_ = state;
this->engine_ = std::make_shared<std::mt19937_64>(state.cpu_engine);
}

uint64_t Generator::GetCurrentSeed() {
std::lock_guard<std::mutex> lock(this->mutex);
return this->state_->current_seed;
std::lock_guard<std::mutex> lock(this->mu_);
return this->state_.current_seed;
}

uint64_t Generator::Seed() {
std::lock_guard<std::mutex> lock(this->mutex);
std::lock_guard<std::mutex> lock(this->mu_);
uint64_t seed;
std::random_device de;
seed = ((((uint64_t)de()) << 32) + de()) & 0x1FFFFFFFFFFFFF;
this->state_->current_seed = seed;
this->state_.current_seed = seed;
std::seed_seq seq({seed});
this->state_->cpu_engine.seed(seq);
this->engine_->seed(seq);

return this->state_->current_seed;
return this->state_.current_seed;
}

void Generator::SetCurrentSeed(uint64_t seed) {
std::lock_guard<std::mutex> lock(this->mutex);
this->state_->current_seed = uint64_t(seed);
std::lock_guard<std::mutex> lock(this->mu_);
this->state_.current_seed = seed;
std::seed_seq seq({seed});
this->state_->cpu_engine.seed(seq);
this->engine_->seed(seq);
}

std::mt19937_64& Generator::GetCPUEngine() {
std::lock_guard<std::mutex> lock(this->mutex);
return this->state_->cpu_engine;
std::shared_ptr<std::mt19937_64> Generator::GetCPUEngine() {
std::lock_guard<std::mutex> lock(this->mu_);
return this->engine_;
}

void Generator::SetCPUEngine(std::mt19937_64 engine) {
std::lock_guard<std::mutex> lock(this->mutex);
this->state_->cpu_engine = std::mt19937_64(engine);
void Generator::SetCPUEngine(std::shared_ptr<std::mt19937_64> engine) {
std::lock_guard<std::mutex> lock(this->mu_);
this->engine_ = engine;
}

uint64_t Generator::Random64() {
std::lock_guard<std::mutex> lock(this->mutex);
return this->state_->cpu_engine();
std::lock_guard<std::mutex> lock(this->mu_);
auto engine = this->engine_;
return (*engine)();
}

void Generator::SetIsInitPy(bool is_init_py) {
this->is_init_py_ = is_init_py;
VLOG(4) << "SetIsInitPy:" << this->is_init_py_;
}
bool Generator::GetIsInitPy() const { return this->is_init_py_; }

} // namespace framework
} // namespace paddle
87 changes: 50 additions & 37 deletions paddle/fluid/framework/generator.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@ limitations under the License. */

#pragma once

#include <glog/logging.h>
#include <stdint.h>

#include <atomic>
#include <deque>
#include <iostream> // temp for debug
Expand All @@ -27,6 +29,12 @@ limitations under the License. */
namespace paddle {
namespace framework {

static uint64_t GetRandomSeed() {
std::random_device rd;
// double has 53 bit significant, so limit uint64 to 53 bits
return ((((uint64_t)rd()) << 32) + rd()) & 0x1FFFFFFFFFFFFF;
}

struct GeneratorState {
int64_t device = -1;
uint64_t current_seed = 34342423252;
Expand All @@ -35,62 +43,67 @@ struct GeneratorState {

struct Generator {
Generator() {
GeneratorState default_gen_state_cpu;
default_gen_state_cpu.device = -1;
default_gen_state_cpu.current_seed = 34342423252;
std::seed_seq seq({34342423252});
default_gen_state_cpu.cpu_engine = std::mt19937_64(seq);
this->state_ = std::make_shared<GeneratorState>(default_gen_state_cpu);
auto seed = GetRandomSeed();
std::seed_seq seq({seed});
auto engine = std::make_shared<std::mt19937_64>(seq);
this->state_.cpu_engine = *engine;
this->state_.device = -1;
this->state_.current_seed = seed;
this->engine_ = engine;
VLOG(4) << "initial seed: " << this->state_.current_seed
<< ", cpu engine: " << &this->state_.cpu_engine;
}
explicit Generator(uint64_t seed) {
std::seed_seq seq({seed});
auto engine = std::make_shared<std::mt19937_64>(seq);
this->state_.cpu_engine = *engine;
this->state_.device = -1;
this->state_.current_seed = seed;
this->engine_ = engine;
VLOG(4) << "initial seed: " << this->state_.current_seed
<< ", cpu engine: " << &this->state_.cpu_engine;
this->is_init_py_ = true; // TODO(zhiqiu): remove it in future
}
explicit Generator(GeneratorState state_in)
: state_{std::make_shared<GeneratorState>(state_in)} {}
Generator(const Generator& other)
: Generator(other, std::lock_guard<std::mutex>(other.mutex)) {}
Generator(const Generator& other) = delete;

// get random state
GeneratorState* GetState();
GeneratorState GetState();
// set random state
void SetState(GeneratorState* state_in);
void SetState(const GeneratorState&);
// get current seed
uint64_t GetCurrentSeed();
// random a seed and get
uint64_t Seed();

// set seed
void SetCurrentSeed(uint64_t seed);
// get cpu engine
std::mt19937_64& GetCPUEngine();
std::shared_ptr<std::mt19937_64> GetCPUEngine();
// set cpu engine
void SetCPUEngine(std::mt19937_64 engine);
void SetCPUEngine(std::shared_ptr<std::mt19937_64>);

uint64_t Random64();

bool is_init_py = false;
void SetIsInitPy(bool);
bool GetIsInitPy() const;

// CPU Generator singleton
static std::shared_ptr<Generator> GetInstance() {
if (NULL == gen_instance_) {
gen_instance_.reset(new paddle::framework::Generator());
}
return gen_instance_;
}
private:
GeneratorState state_;
std::shared_ptr<std::mt19937_64> engine_;
mutable std::mutex mu_;

// NOTE(zhiqiu): is_init_py_ is used to make generator be compatible with
// old seed, and it should be removed after all random-related operators
// and unittests upgrades to use generator.
bool is_init_py_ = false;
};

static std::shared_ptr<Generator> GetInstanceX() {
if (NULL == gen_instance_) {
gen_instance_.reset(new paddle::framework::Generator());
}
gen_instance_->is_init_py = true;
return gen_instance_;
}
// The DefaultCPUGenerator is used in manual_seed()
const std::shared_ptr<Generator>& DefaultCPUGenerator();

private:
static std::shared_ptr<Generator> gen_instance_;
std::shared_ptr<GeneratorState> state_;
mutable std::mutex mutex;
// If op seed is set or global is not set, the OpDefaultCPUEngine is used.
std::shared_ptr<std::mt19937_64> OpDefaultCPUEngine();

Generator(const Generator& other, const std::lock_guard<std::mutex>&)
: state_(std::make_shared<GeneratorState>(*(other.state_))) {}
};
std::shared_ptr<std::mt19937_64> GetCPURandomEngine(uint64_t);

} // namespace framework
} // namespace paddle
6 changes: 3 additions & 3 deletions paddle/fluid/operators/bernoulli_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -64,11 +64,11 @@ class BernoulliOpKernel<platform::CPUDeviceContext, T>

int64_t size = x->numel();
std::uniform_real_distribution<T> dist(0.0, 1.0);
auto gen_ptr = framework::Generator::GetInstance();
std::mt19937_64 &gen_engine = gen_ptr->GetCPUEngine();
auto gen_ptr = framework::DefaultCPUGenerator();
auto engine = gen_ptr->GetCPUEngine();

for (int64_t i = 0; i < size; ++i) {
out_data[i] = BernoulliFunctor(in_data[i], dist(gen_engine));
out_data[i] = BernoulliFunctor(in_data[i], dist(*engine));
}
}
}; // namespace operators
Expand Down
34 changes: 8 additions & 26 deletions paddle/fluid/operators/distributed/large_scale_kv.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,20 +14,19 @@

#pragma once

#include <ThreadPool.h>
#include <gflags/gflags.h>

#include <functional>
#include <future> // NOLINT
#include <memory>
#include <string>
#include <thread> // NOLINT
#include <unordered_map>
#include <unordered_set>
#include <utility>
#include <vector>

#include <thread> // NOLINT

#include <ThreadPool.h>
#include "paddle/fluid/framework/generator.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/rw_lock.h"
Expand Down Expand Up @@ -89,26 +88,17 @@ class UniformInitializer : public Initializer {
min_ = std::stof(attrs[2]);
max_ = std::stof(attrs[3]);

if (seed_ == 0) {
seed_ = std::random_device()();
}

random_engine_.seed(seed_);
dist_ = std::uniform_real_distribution<float>(min_, max_);
random_engine_ = framework::GetCPURandomEngine(seed_);
}

float GetValue() override {
return framework::Generator::GetInstance()->is_init_py
? dist_(framework::Generator::GetInstance()->GetCPUEngine())
: dist_(random_engine_);
// return dist_(random_engine_);
}
float GetValue() override { return dist_(*random_engine_); }

private:
float min_;
float max_;

std::minstd_rand random_engine_;
std::shared_ptr<std::mt19937_64> random_engine_;
std::uniform_real_distribution<float> dist_;
};

Expand Down Expand Up @@ -139,26 +129,18 @@ class GaussianInitializer : public Initializer {
mean_ = std::stof(attrs[2]);
std_ = std::stof(attrs[3]);

if (seed_ == 0) {
seed_ = std::random_device()();
}
random_engine_ = framework::GetCPURandomEngine(seed_);

random_engine_.seed(seed_);
dist_ = std::normal_distribution<float>(mean_, std_);
}

float GetValue() override {
return framework::Generator::GetInstance()->is_init_py
? dist_(framework::Generator::GetInstance()->GetCPUEngine())
: dist_(random_engine_);
// return dist_(random_engine_);
}
float GetValue() override { return dist_(*random_engine_); }

private:
float std_;
float mean_;

std::minstd_rand random_engine_;
std::shared_ptr<std::mt19937_64> random_engine_;
std::normal_distribution<float> dist_;
};

Expand Down
Loading

0 comments on commit 844583c

Please sign in to comment.