From 36c19d2631bc2f6b572ee3d24bb43b94d6dd88c4 Mon Sep 17 00:00:00 2001 From: Anastasia Popova Date: Mon, 6 Sep 2021 11:49:27 +0300 Subject: [PATCH] RandomUniform reference implementation. (#7012) * Added RandomUniform reference implementation. * Corrected comments. * Small correction. * Code style correction. * Added has_evaluate() method. * Added comments, added names to consts. * Small fix. * Replaced arrays with vectors. * Apply suggestions from code review Co-authored-by: Ilya Churaev * Code refactoring. * Corrected tests, code style. * Added comment. * Added comments. * Temporarily added debug output. * Temporarily added debug output. * Removed debug output. * Added comment. * Added comment. * Enabled state saving for RandomUniform. * Code style. * Used to template to convert types. * Added comments. Co-authored-by: Ilya Churaev --- .../op_reference/random_uniform.cpp | 205 +++++++++++ ...isable_random_uniform_constant_folding.hpp | 27 ++ .../common_optimizations.cpp | 2 + ...isable_random_uniform_constant_folding.cpp | 24 ++ .../core/include/ngraph/op/random_uniform.hpp | 16 +- .../runtime/reference/random_uniform.hpp | 39 ++ .../src/runtime/reference/random_uniform.cpp | 333 ++++++++++++++++++ ngraph/core/src/op/random_uniform.cpp | 94 ++++- 8 files changed, 737 insertions(+), 3 deletions(-) create mode 100644 docs/template_plugin/tests/functional/op_reference/random_uniform.cpp create mode 100644 inference-engine/src/transformations/include/transformations/common_optimizations/disable_random_uniform_constant_folding.hpp create mode 100644 inference-engine/src/transformations/src/transformations/common_optimizations/disable_random_uniform_constant_folding.cpp create mode 100644 ngraph/core/reference/include/ngraph/runtime/reference/random_uniform.hpp create mode 100644 ngraph/core/reference/src/runtime/reference/random_uniform.cpp diff --git a/docs/template_plugin/tests/functional/op_reference/random_uniform.cpp b/docs/template_plugin/tests/functional/op_reference/random_uniform.cpp new file mode 100644 index 00000000000000..2e454e4a1453ee --- /dev/null +++ b/docs/template_plugin/tests/functional/op_reference/random_uniform.cpp @@ -0,0 +1,205 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include + +#include "base_reference_test.hpp" +#include "ngraph/opsets/opset8.hpp" +#include "ngraph/util.hpp" + +using namespace ngraph; + +namespace reference_tests { +namespace { + +struct RandomUniformParams { + RandomUniformParams(const std::vector& paramOutShape, + const Tensor& paramMinValue, + const Tensor& paramMaxValue, + ngraph::element::Type paramOutType, + int64_t paramGlobalSeed, + int64_t paramOpSeed, + const Tensor& paramExpected, + const std::string& test_name) + : out_shape(paramOutShape), + min_val(paramMinValue), + max_val(paramMaxValue), + out_type(paramOutType), + global_seed(paramGlobalSeed), + op_seed(paramOpSeed), + expected(paramExpected), + test_case_name(test_name) {} + std::vector out_shape; + Tensor min_val; + Tensor max_val; + ngraph::element::Type out_type; + int64_t global_seed; + int64_t op_seed; + Tensor expected; + std::string test_case_name; +}; + +class ReferenceRandomUniformLayerTest : public testing::TestWithParam, public CommonReferenceTest { +public: + void SetUp() override { + auto params = GetParam(); + function = CreateFunction(params.out_shape, + params.min_val, + params.max_val, + params.out_type, + params.global_seed, + params.op_seed); + inputData = {params.min_val.data, params.max_val.data}; + refOutData = {params.expected.data}; + } + static std::string getTestCaseName(const testing::TestParamInfo& obj) { + auto param = obj.param; + return param.test_case_name; + } + +private: + static std::shared_ptr CreateFunction(const std::vector& out_shape, + const Tensor& min_val, + const Tensor& max_val, + const ngraph::element::Type& out_type, + int64_t global_seed, + int64_t op_seed) { + const auto min_val_param = std::make_shared(min_val.type, min_val.shape); + const auto max_val_param = std::make_shared(max_val.type, max_val.shape); + auto out_shape_ = std::make_shared(element::i64, Shape{out_shape.size()}, out_shape); + + return std::make_shared(NodeVector{std::make_shared(out_shape_, + min_val_param, + max_val_param, + out_type, + global_seed, + op_seed)}, + ParameterVector{min_val_param, max_val_param}); + } +}; + +TEST_P(ReferenceRandomUniformLayerTest, RandomUniformWithHardcodedRefs) { + Exec(); +} + +} // namespace + +// Reference values for the following tests are obtained from single layer TensorFlow model with tf.random.uniform(). +INSTANTIATE_TEST_SUITE_P( + smoke_RandomUniform_With_Hardcoded_Refs, + ReferenceRandomUniformLayerTest, + ::testing::Values( + RandomUniformParams(std::vector{3, 2, 4}, + Tensor{{1}, element::f32, std::vector{0}}, + Tensor{{1}, element::f32, std::vector{1}}, + element::Type_t::f32, + 150, + 10, + Tensor{{3, 2, 4}, + element::f32, + std::vector{0.70112360, 0.30539632, 0.93931055, 0.94560349, 0.11694777, + 0.50770056, 0.51971972, 0.22727466, 0.99137402, 0.35519040, + 0.82692313, 0.59864855, 0.31364107, 0.57481313, 0.41399086, + 0.96308255, 0.37140799, 0.85253167, 0.09358585, 0.08200955, + 0.23655081, 0.81056309, 0.74226606, 0.76106691}}, + "float32_default_min_max"), + RandomUniformParams(std::vector{3, 2, 4}, + Tensor{{1}, element::f16, std::vector{0}}, + Tensor{{1}, element::f16, std::vector{1}}, + element::Type_t::f16, + 150, + 10, + Tensor{{3, 2, 4}, + element::f16, + std::vector{0.60449219, 0.80664062, 0.83203125, 0.38378906, 0.03613281, + 0.08300781, 0.54394531, 0.83398438, 0.33593750, 0.71972656, + 0.15429688, 0.12890625, 0.34765625, 0.86914062, 0.41308594, + 0.57226562, 0.57421875, 0.93945312, 0.65527344, 0.82226562, + 0.82421875, 0.13281250, 0.64355469, 0.66015625}}, + "float16_default_min_max"), + RandomUniformParams(std::vector{3, 2, 4}, + Tensor{{1}, element::f32, std::vector{-650}}, + Tensor{{1}, element::f32, std::vector{450}}, + element::Type_t::f32, + 150, + 10, + Tensor{{3, 2, 4}, + element::f32, + std::vector{121.23596191, -314.06405640, 383.24157715, 390.16381836, + -521.35742188, -91.52935791, -78.30828857, -399.99786377, + 440.51147461, -259.29055786, 259.61541748, 8.51342773, + -304.99481201, -17.70556641, -194.61004639, 409.39074707, + -241.45120239, 287.78485107, -547.05554199, -559.78948975, + -389.79409790, 241.61938477, 166.49267578, 187.17358398}}, + "float32_non_default_min_max"), + RandomUniformParams(std::vector{3, 2, 4}, + Tensor{{1}, element::f16, std::vector{-1.5}}, + Tensor{{1}, element::f16, std::vector{-1.0}}, + element::Type_t::f16, + 150, + 10, + Tensor{{3, 2, 4}, + element::f16, + std::vector{-1.19726562, -1.09667969, -1.08398438, -1.30859375, -1.48242188, + -1.45898438, -1.22851562, -1.08300781, -1.33203125, -1.14062500, + -1.42285156, -1.43554688, -1.32617188, -1.06542969, -1.29296875, + -1.21386719, -1.21289062, -1.03027344, -1.17187500, -1.08886719, + -1.08789062, -1.43359375, -1.17773438, -1.16992188}}, + "float16_non_default_min_max"), + RandomUniformParams(std::vector{2, 3, 4}, + Tensor{{1}, element::i32, std::vector{-100}}, + Tensor{{1}, element::i32, std::vector{50}}, + element::Type_t::i32, + 100, + 350, + Tensor{{2, 3, 4}, + element::i32, + std::vector{ + 22, -56, -33, -89, -98, -33, -3, -48, -82, 5, -66, 21, + 29, -42, -73, -37, 3, 36, -35, 20, -11, -8, -78, 47, + }}, + "int32"), + RandomUniformParams(std::vector{5, 4, 3}, + Tensor{{1}, element::i64, std::vector{-2600}}, + Tensor{{1}, element::i64, std::vector{3700}}, + element::Type_t::i64, + 755, + 951, + Tensor{{5, 4, 3}, + element::i64, + std::vector{ + 2116, -1581, 2559, -339, -1660, 519, 90, 2027, -210, 3330, 1831, -1737, + 2683, 2661, 3473, 1220, 3534, -2384, 2199, 1935, 499, 2861, 2743, 3223, + -531, -836, -65, 3435, 632, 1765, 2613, 1891, 1698, 3069, 169, -792, + -32, 2976, -1552, -2588, 3327, -1756, 2637, -1084, 3567, -778, -1465, 2967, + 1242, 2672, -1585, -2271, 3536, -1502, 400, 2241, 3126, 908, 1073, -2110}}, + "int64"), + RandomUniformParams(std::vector{7, 3}, + Tensor{{1}, element::bf16, std::vector{0}}, + Tensor{{1}, element::bf16, std::vector{1}}, + element::Type_t::bf16, + 4978, + 5164, + Tensor{{7, 3}, + element::bf16, + std::vector{0.8984375, 0.84375, 0.1640625, 0.1875, 0.46875, 0.6875, + 0.5234375, 0.3046875, 0.9140625, 0.453125, 0.953125, 0.328125, + 0.359375, 0.1875, 0.9453125, 0.390625, 0.21875, 0.9921875, + 0.8203125, 0.453125, 0.875}}, + "bfloat16_default_min_max"), + RandomUniformParams(std::vector{7, 3}, + Tensor{{1}, element::bf16, std::vector{-150}}, + Tensor{{1}, element::bf16, std::vector{200}}, + element::Type_t::bf16, + 4978, + 5164, + Tensor{{7, 3}, + element::bf16, + std::vector{164, 146, -92.5, -84.5, 14, 90, 33, -43.5, 170, 8, 182, + -35, -24, -84.5, 180, -14, -73.5, 198, 138, 8, 156}}, + "bfloat16_non_default_min_max")), + ReferenceRandomUniformLayerTest::getTestCaseName); +} // namespace reference_tests diff --git a/inference-engine/src/transformations/include/transformations/common_optimizations/disable_random_uniform_constant_folding.hpp b/inference-engine/src/transformations/include/transformations/common_optimizations/disable_random_uniform_constant_folding.hpp new file mode 100644 index 00000000000000..e9c59587eb67de --- /dev/null +++ b/inference-engine/src/transformations/include/transformations/common_optimizations/disable_random_uniform_constant_folding.hpp @@ -0,0 +1,27 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +namespace ngraph { +namespace pass { + +class DisableRandomUniformConstantFolding; + +} // namespace pass +} // namespace ngraph + +/** + * @ingroup ie_transformation_common_api + * @brief Disables ConstantFolding for RandomUniform operation. It is required as RandomUniform + * should generate new sequence each run. + */ +class ngraph::pass::DisableRandomUniformConstantFolding : public ngraph::pass::MatcherPass { +public: + NGRAPH_RTTI_DECLARATION; + DisableRandomUniformConstantFolding(); +}; diff --git a/inference-engine/src/transformations/src/transformations/common_optimizations/common_optimizations.cpp b/inference-engine/src/transformations/src/transformations/common_optimizations/common_optimizations.cpp index 253c4f113ab073..4e176543504b20 100644 --- a/inference-engine/src/transformations/src/transformations/common_optimizations/common_optimizations.cpp +++ b/inference-engine/src/transformations/src/transformations/common_optimizations/common_optimizations.cpp @@ -27,6 +27,7 @@ #include "transformations/common_optimizations/hswish_fusion.hpp" #include "transformations/common_optimizations/convert_quantize_dequantize.hpp" #include "transformations/common_optimizations/relu_fake_quantize_fusion.hpp" +#include "transformations/common_optimizations/disable_random_uniform_constant_folding.hpp" #include "transformations/common_optimizations/add_fake_quantize_fusion.hpp" #include "transformations/common_optimizations/mul_fake_quantize_fusion.hpp" #include "transformations/common_optimizations/clamp_fusion.hpp" @@ -88,6 +89,7 @@ bool ngraph::pass::CommonOptimizations::run_on_function(std::shared_ptr(); + manager.register_pass(); manager.register_pass(); manager.register_pass(); manager.register_pass(); // Resolves dynamism (replaces NonZero), CF needed diff --git a/inference-engine/src/transformations/src/transformations/common_optimizations/disable_random_uniform_constant_folding.cpp b/inference-engine/src/transformations/src/transformations/common_optimizations/disable_random_uniform_constant_folding.cpp new file mode 100644 index 00000000000000..7c93745d3fa449 --- /dev/null +++ b/inference-engine/src/transformations/src/transformations/common_optimizations/disable_random_uniform_constant_folding.cpp @@ -0,0 +1,24 @@ +// Copyright (C) 2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "transformations/common_optimizations/disable_random_uniform_constant_folding.hpp" + +#include +#include +#include +#include + +NGRAPH_RTTI_DEFINITION(ngraph::pass::DisableRandomUniformConstantFolding, "DisableRandomUniformConstantFolding", 0); + +ngraph::pass::DisableRandomUniformConstantFolding::DisableRandomUniformConstantFolding() { + auto random_uniform = pattern::wrap_type(); + + ngraph::matcher_pass_callback callback = [=](pattern::Matcher& m) { + disable_constant_folding(m.get_match_root()); + return true; + }; + + auto m = std::make_shared(random_uniform, "DisableRandomUniformConstantFolding"); + this->register_matcher(m, callback); +} diff --git a/ngraph/core/include/ngraph/op/random_uniform.hpp b/ngraph/core/include/ngraph/op/random_uniform.hpp index 242294cc748d89..f20ddecccab431 100644 --- a/ngraph/core/include/ngraph/op/random_uniform.hpp +++ b/ngraph/core/include/ngraph/op/random_uniform.hpp @@ -30,8 +30,8 @@ class NGRAPH_API RandomUniform : public Op { const Output& min_val, const Output& max_val, const ngraph::element::Type& out_type, - uint64_t global_seed, - uint64_t op_seed); + uint64_t global_seed = 0, + uint64_t op_seed = 0); void validate_and_infer_types() override; @@ -39,6 +39,11 @@ class NGRAPH_API RandomUniform : public Op { std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; + /// \return Turns off constant folding for RandomUniform operation. + bool constant_fold(OutputVector& output_values, const OutputVector& inputs_values) override { + return false; + } + /// \return The output tensor type. const ngraph::element::Type& get_out_type() const { return m_output_type; @@ -63,10 +68,17 @@ class NGRAPH_API RandomUniform : public Op { m_op_seed = seed2; } + bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override; + + bool has_evaluate() const override; + protected: ngraph::element::Type m_output_type; uint64_t m_global_seed; uint64_t m_op_seed; + + mutable std::mutex m_state_mutex; + mutable std::pair m_state; }; } // namespace v8 } // namespace op diff --git a/ngraph/core/reference/include/ngraph/runtime/reference/random_uniform.hpp b/ngraph/core/reference/include/ngraph/runtime/reference/random_uniform.hpp new file mode 100644 index 00000000000000..43df6529de33a6 --- /dev/null +++ b/ngraph/core/reference/include/ngraph/runtime/reference/random_uniform.hpp @@ -0,0 +1,39 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +#include "ngraph/shape.hpp" + +namespace ngraph { +namespace runtime { +namespace reference { +std::pair random_uniform(const uint64_t* out_shape, + const char* min_val, + const char* max_val, + char* out, + const Shape& out_shape_shape, + const ngraph::element::Type& elem_type, + uint64_t seed, + uint64_t seed2, + std::pair prev_state); + +// Following const values are taken from the original paper: +// https://www.thesalmons.org/john/random123/papers/random123sc11.pdf +const uint32_t crush_resistance_const_lower_value = 0x9E3779B9; +const uint32_t crush_resistance_const_upper_value = 0xBB67AE85; +const uint64_t statistic_maximizing_multiplier_n = 0xD2511F53; +const uint64_t statistic_maximizing_multiplier_counter = 0xCD9E8D57; +const size_t rounds_number = 10; + +// Determines how many sequence elements of RNG sequence are skipped between runs. +// Can be any positive value, 256 is chosen for parity with Tensorflow. +const uint64_t skip_const = 256; + +} // namespace reference +} // namespace runtime +} // namespace ngraph diff --git a/ngraph/core/reference/src/runtime/reference/random_uniform.cpp b/ngraph/core/reference/src/runtime/reference/random_uniform.cpp new file mode 100644 index 00000000000000..6e6f1f7c95d06a --- /dev/null +++ b/ngraph/core/reference/src/runtime/reference/random_uniform.cpp @@ -0,0 +1,333 @@ +// Copyright (C) 2018-2021 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "ngraph/runtime/reference/random_uniform.hpp" + +#include + +#include "ngraph/shape.hpp" + +namespace ngraph { +namespace runtime { +namespace reference { + +// Splits uint64 value into two uint32 values with right and left part of original value. +std::pair split_high_low(uint64_t value) { + uint32_t low = static_cast(value); + uint32_t high = static_cast(value >> 32); + return {low, high}; +} + +// Concatenates two uint32 values into single uint64 values. +uint64_t unite_high_low(uint32_t high, uint32_t low) { + return (static_cast(high) << 32) + low; +} + +// Runs single "round" of Philox algorithm. +void calculate_round(uint64_t key, uint64_t& counter, uint64_t& n) { + // Split key, counter and n into two uint32 values. + auto counter_lr = split_high_low(counter); + auto key_lr = split_high_low(key); + auto n_lr = split_high_low(n); + + // Each round performs following updating for n and counter: + // left uint32 part = mullo(R, M) + // right uint32 part = mulhi(R, M) xor k xor L + // mulhi(a, b) = floor((a * b) / 2^32) + // mullo(a, b) = (a * b) mod 2^32, + // where M - statistic_maximizing_multiplier const + auto prod0 = split_high_low(statistic_maximizing_multiplier_n * n_lr.first); + auto prod1 = split_high_low(statistic_maximizing_multiplier_counter * counter_lr.first); + n_lr.first = prod1.second ^ n_lr.second ^ key_lr.first; + n_lr.second = prod1.first; + counter_lr.first = prod0.second ^ counter_lr.second ^ key_lr.second; + counter_lr.second = prod0.first; + + // Unite counter and n into uint64 values. + counter = unite_high_low(counter_lr.second, counter_lr.first); + n = unite_high_low(n_lr.second, n_lr.first); +} + +// Increases key value. +void raise_key(uint64_t& key) { + auto key_lr = split_high_low(key); + key_lr.first += crush_resistance_const_lower_value; + key_lr.second += crush_resistance_const_upper_value; + key = unite_high_low(key_lr.second, key_lr.first); +} + +// Helper function for converting uint32 values to float32. Sets fractional part of +// floating value with bits from uint32 value. Resulting value is in interval [0,1). +float uint32_to_float(uint32_t x) { + // float32 is formatted as follows: sign(1 bit) exponent(8 bits) mantissa(23 bits). The value is interpreted + // The value is interpreted using following formula: + // (-1)^sign * 1, mantissa * 2 ^ (exponent - 127) + // Here we set the following values: + // sign = 0 + // exponent = 127, for obtaining a zero exponent. + // mantissa = 23 right bits from generated uint32 random value. + + uint32_t x_uint32 = (static_cast(127) << 23) | (x & 0x7fffffu); + + float x_float; + memcpy(&x_float, &x_uint32, sizeof(x_uint32)); + return x_float - 1.0f; +} + +// Helper function for converting uint32 values to float16.Sets fractional part of +// floating value with bits from uint32 value. Resulting value is in interval [0,1). +float16 uint32_to_float16(uint32_t x) { + // float16 is formatted as follows: sign(1 bit) exponent(5 bits) mantissa(10 bits). The value is interpreted + // The value is interpreted using following formula: + // (-1)^sign * 1, mantissa * 2 ^ (exponent - 15) + // Here we set the following values: + // sign = 0 + // exponent = 15, for obtaining a zero exponent. + // mantissa = 10 right bits from generated uint32 random value. + + uint16_t x_uint16 = static_cast(x); + x_uint16 = (static_cast(15) << 10) | (x_uint16 & 0x3ffu); + + float16 x_float16; + memcpy(&x_float16, &x_uint16, sizeof(x_uint16)); + return x_float16 - static_cast(1); +} + +// Helper function for converting uint32 values to double. Sets fractional part of +// floating double with bits from uint32 values. Resulting value is in interval [0,1). +double uint32_to_double(uint32_t x1, uint32_t x2) { + // float64 is formatted as follows: sign(1 bit) exponent(11 bits) mantissa(52 bits). The value is interpreted + // The value is interpreted using following formula: + // (-1)^sign * 1, mantissa * 2 ^ (exponent - 1023) + // Here we set the following values: + // sign = 0 + // exponent = 1023, for obtaining a zero exponent. + // mantissa = 52 right bits from two concatenated uint32 values from random integer generator. + + uint64_t significant = ((static_cast(x1) & 0xfffffu) << 32) | static_cast(x2); + uint64_t x_uint64 = ((static_cast(1023) << 52) | significant); + + double x_double; + memcpy(&x_double, &x_uint64, sizeof(x_uint64)); + return x_double - 1.0; +} + +// Helper function for converting uint32 values to bfloat16. Sets fractional part of +// floating value with bits from uint32 value. Resulting value is in interval [0,1). +bfloat16 uint32_to_bfloat16(uint32_t x) { + // bfloat16 is formatted as follows: sign(1 bit) exponent(8 bits) mantissa(7 bits). The value is interpreted + // The value is interpreted using following formula: + // (-1)^sign * 1, mantissa * 2 ^ (exponent - 127) + // Here we set the following values: + // sign = 0 + // exponent = 127, for obtaining a zero exponent. + // mantissa = 7 right bits from generated uint32 random value. + + uint16_t x_uint16 = static_cast(x); + x_uint16 = (static_cast(127) << 7) | (x_uint16 & 0x7fu); + + bfloat16 x_bfloat16; + memcpy(&x_bfloat16, &x_uint16, sizeof(x_uint16)); + return x_bfloat16 - static_cast(1); +} + +// Runs Philox algorithm. +void run_philox(uint64_t key, uint64_t counter, uint64_t n, size_t n_rounds, std::vector& res) { + for (size_t i = 0; i < n_rounds; i++) { + calculate_round(key, counter, n); + if (i < n_rounds - 1) + raise_key(key); + } + auto res1 = split_high_low(n); + auto res2 = split_high_low(counter); + res[0] = res1.first; + res[1] = res1.second; + res[2] = res2.first; + res[3] = res2.second; +} + +// Converts uint32 values to destination type and normalizes to required range +template +void convert_to_output_type(const std::vector& res, + size_t step, + const ngraph::element::Type& elem_type, + const char* min_val, + const char* max_val, + char* out, + size_t k, + size_t elem_count, + T (*convert_single_input)(uint32_t) = nullptr, + T (*convert_two_inputs)(uint32_t, uint32_t, T, T) = nullptr, + T (*mod_func)(uint32_t, T, T) = nullptr) { + // Get min and max values + T mn[1]; + T mx[1]; + memcpy(mn, min_val, elem_type.size()); + memcpy(mx, max_val, elem_type.size()); + + std::vector res_out_type(step); + if (elem_type.size() > 4) { + // Each element of resulting sequence is formed using two uint32 values + res_out_type[0] = convert_two_inputs(res[0], res[1], mn[0], mx[0]); + res_out_type[1] = convert_two_inputs(res[2], res[3], mn[0], mx[0]); + } else { + // Each element of resulting sequence is formed using single uint32 value + std::transform(res.data(), + res.data() + step, + res_out_type.data(), + [&mn, &mx, &convert_single_input, &mod_func](uint32_t elem) { + if (convert_single_input != nullptr) { + return convert_single_input(elem) * (mx[0] - mn[0]) + mn[0]; + } else { + return mod_func(elem, mn[0], mx[0]); + } + }); + } + + memcpy(out + k * elem_type.size(), res_out_type.data(), std::min(step, elem_count - k) * elem_type.size()); +} + +// Implementation of RandomUniform that uses Philox algorithm as inner random unsigned integer generator. +std::pair random_uniform(const uint64_t* out_shape, + const char* min_val, + const char* max_val, + char* out, + const Shape& out_shape_shape, + const ngraph::element::Type& elem_type, + uint64_t seed, + uint64_t seed2, + std::pair prev_state) { + // When both seed values are equal to zero RandomUniform should generate non-deterministic sequence. + // Implementation in plugins may differ for this case. + if (seed == 0 && seed2 == 0) { + std::srand(std::time(nullptr)); + seed = std::rand(); + } + + // Get previous counter state + uint64_t n_state = prev_state.first; + uint64_t counter_state = prev_state.second; + + // Initialize Philox key and counters + uint64_t key = seed; + uint64_t counter = counter_state > 0 ? counter_state : seed2; + uint64_t n = n_state; + + // Calculate total element count for generation + size_t shape_count = shape_size(out_shape_shape); + size_t elem_count = 1; + for (size_t i = 0; i < shape_count; i++) { + elem_count *= out_shape[i]; + } + + // Philox algorithm returns 4 elements of RNG sequence per each invocation + const size_t philox_output_size = 4; + + // Each run of Philox algorithm generates 4 uint32 values. + // If output_type is int32, f32, bf16, or f16 each value is converted to + // corresponding type so we have 4 result values. For f64 and i64 we use + // a pair of values for conversion, so we have 2 result values. + // Step indicates how many values we generate in one iteration. + const size_t step = elem_type.size() > 4 ? 2 : 4; + + for (size_t k = 0; k < elem_count; k += step) { + // generate 4 random uint32 values using Philox algorithm + std::vector res(philox_output_size); + run_philox(key, counter, n, rounds_number, res); + + // convert values to corresponding output_type + switch (elem_type) { + case ngraph::element::Type_t::f32: { + convert_to_output_type(res, step, elem_type, min_val, max_val, out, k, elem_count, uint32_to_float); + break; + } + case ngraph::element::Type_t::f16: { + convert_to_output_type(res, + step, + elem_type, + min_val, + max_val, + out, + k, + elem_count, + uint32_to_float16); + break; + } + case ngraph::element::Type_t::bf16: { + convert_to_output_type(res, + step, + elem_type, + min_val, + max_val, + out, + k, + elem_count, + uint32_to_bfloat16); + break; + } + case ngraph::element::Type_t::f64: { + convert_to_output_type(res, + step, + elem_type, + min_val, + max_val, + out, + k, + elem_count, + nullptr, + [](uint32_t a, uint32_t b, double mn, double mx) { + return uint32_to_double(a, b) * (mx - mn) + mn; + }); + break; + } + case ngraph::element::Type_t::i32: { + convert_to_output_type(res, + step, + elem_type, + min_val, + max_val, + out, + k, + elem_count, + nullptr, + nullptr, + [](uint32_t x, int mn, int mx) { + return static_cast(x % (mx - mn) + mn); + }); + break; + } + case ngraph::element::Type_t::i64: { + convert_to_output_type(res, + step, + elem_type, + min_val, + max_val, + out, + k, + elem_count, + nullptr, + [](uint32_t a, uint32_t b, int64_t mn, int64_t mx) { + return static_cast(unite_high_low(b, a) % (mx - mn) + mn); + }); + break; + } + default: + throw ngraph_error("Unsupported type of RandomUniform: " + elem_type.get_type_name()); + } + if (++n == 0) + ++counter; + } + + // Calculate counter values for next RandomUniform run + uint64_t skip_count = elem_count * skip_const; + n_state += skip_count; + if (n_state < skip_count) + counter_state++; + + return {n_state, counter_state}; +} + +} // namespace reference +} // namespace runtime +} // namespace ngraph diff --git a/ngraph/core/src/op/random_uniform.cpp b/ngraph/core/src/op/random_uniform.cpp index 9b0b6cdc9fcb93..90a356cdef350b 100644 --- a/ngraph/core/src/op/random_uniform.cpp +++ b/ngraph/core/src/op/random_uniform.cpp @@ -7,6 +7,7 @@ #include #include "itt.hpp" +#include "ngraph/runtime/reference/random_uniform.hpp" using namespace std; using namespace ngraph; @@ -116,7 +117,7 @@ bool op::v8::RandomUniform::visit_attributes(AttributeVisitor& visitor) { } shared_ptr op::v8::RandomUniform::clone_with_new_inputs(const OutputVector& new_args) const { - NGRAPH_OP_SCOPE(v8_Roll_clone_with_new_inputs); + NGRAPH_OP_SCOPE(v8_RandomUniform_clone_with_new_inputs); check_new_args_count(this, new_args); return make_shared(new_args[0], new_args[1], @@ -125,3 +126,94 @@ shared_ptr op::v8::RandomUniform::clone_with_new_inputs(const OutputVector m_global_seed, m_op_seed); } + +bool op::v8::RandomUniform::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const { + NGRAPH_OP_SCOPE(v8_RandomUniform_evaluate); + const uint64_t* out_shape; + std::vector out_shape_uint64(shape_size(inputs[0]->get_shape())); + + if (inputs[0]->get_element_type() == element::Type_t::u64) { + out_shape = inputs[0]->get_data_ptr(); + } else if (inputs[0]->get_element_type() == element::Type_t::i32) { + auto out_shape_i32 = inputs[0]->get_data_ptr(); + std::transform(out_shape_i32, + out_shape_i32 + shape_size(inputs[0]->get_shape()), + out_shape_uint64.begin(), + [](const int32_t& elem) { + return static_cast(elem); + }); + out_shape = out_shape_uint64.data(); + } else if (inputs[0]->get_element_type() == element::Type_t::i64) { + auto out_shape_i64 = inputs[0]->get_data_ptr(); + std::transform(out_shape_i64, + out_shape_i64 + shape_size(inputs[0]->get_shape()), + out_shape_uint64.begin(), + [](const int64_t& elem) { + return static_cast(elem); + }); + out_shape = out_shape_uint64.data(); + } else { + throw ngraph_error("Unsupported type of out shape in RandomUniform operation: " + + inputs[0]->get_element_type().get_type_name()); + } + + element::Type_t t_out = get_out_type(); + char* out; + switch (t_out) { + case element::Type_t::i32: + out = (char*)outputs[0]->get_data_ptr(); + break; + case element::Type_t::i64: + out = (char*)outputs[0]->get_data_ptr(); + break; + case element::Type_t::f16: + out = (char*)outputs[0]->get_data_ptr(); + break; + case element::Type_t::bf16: + out = (char*)outputs[0]->get_data_ptr(); + break; + case element::Type_t::f32: + out = (char*)outputs[0]->get_data_ptr(); + break; + case element::Type_t::f64: + out = (char*)outputs[0]->get_data_ptr(); + break; + default: + throw ngraph_error("Unsupported type of RandomUniform: " + get_out_type().get_type_name()); + } + + auto state = runtime::reference::random_uniform(out_shape, + inputs[1]->get_data_ptr(), + inputs[2]->get_data_ptr(), + out, + inputs[0]->get_shape(), + get_out_type(), + get_global_seed(), + get_op_seed(), + m_state); + + // Update RandomUniform state + std::lock_guard guard(m_state_mutex); + m_state = state; + return true; +} + +bool op::v8::RandomUniform::has_evaluate() const { + NGRAPH_OP_SCOPE(v8_RandomUniform_has_evaluate); + if (get_input_element_type(0) != ngraph::element::i32 && get_input_element_type(0) != ngraph::element::i64) { + return false; + } + + switch (get_out_type()) { + case ngraph::element::i32: + case ngraph::element::i64: + case ngraph::element::f16: + case ngraph::element::bf16: + case ngraph::element::f32: + case ngraph::element::f64: + return true; + default: + break; + } + return false; +}