From 36c19d2631bc2f6b572ee3d24bb43b94d6dd88c4 Mon Sep 17 00:00:00 2001
From: Anastasia Popova <anastasia.popova@intel.com>
Date: Mon, 6 Sep 2021 11:49:27 +0300
Subject: [PATCH] RandomUniform reference implementation. (#7012)

* Added RandomUniform reference implementation.

* Corrected comments.

* Small correction.

* Code style correction.

* Added has_evaluate() method.

* Added comments, added names to consts.

* Small fix.

* Replaced arrays with vectors.

* Apply suggestions from code review

Co-authored-by: Ilya Churaev <ilyachur@gmail.com>

* Code refactoring.

* Corrected tests, code style.

* Added comment.

* Added comments.

* Temporarily added debug output.

* Temporarily added debug output.

* Removed debug output.

* Added comment.

* Added comment.

* Enabled state saving for RandomUniform.

* Code style.

* Used to template to convert types.

* Added comments.

Co-authored-by: Ilya Churaev <ilyachur@gmail.com>
---
 .../op_reference/random_uniform.cpp           | 205 +++++++++++
 ...isable_random_uniform_constant_folding.hpp |  27 ++
 .../common_optimizations.cpp                  |   2 +
 ...isable_random_uniform_constant_folding.cpp |  24 ++
 .../core/include/ngraph/op/random_uniform.hpp |  16 +-
 .../runtime/reference/random_uniform.hpp      |  39 ++
 .../src/runtime/reference/random_uniform.cpp  | 333 ++++++++++++++++++
 ngraph/core/src/op/random_uniform.cpp         |  94 ++++-
 8 files changed, 737 insertions(+), 3 deletions(-)
 create mode 100644 docs/template_plugin/tests/functional/op_reference/random_uniform.cpp
 create mode 100644 inference-engine/src/transformations/include/transformations/common_optimizations/disable_random_uniform_constant_folding.hpp
 create mode 100644 inference-engine/src/transformations/src/transformations/common_optimizations/disable_random_uniform_constant_folding.cpp
 create mode 100644 ngraph/core/reference/include/ngraph/runtime/reference/random_uniform.hpp
 create mode 100644 ngraph/core/reference/src/runtime/reference/random_uniform.cpp
diff --git a/docs/template_plugin/tests/functional/op_reference/random_uniform.cpp b/docs/template_plugin/tests/functional/op_reference/random_uniform.cpp
new file mode 100644
index 00000000000000..2e454e4a1453ee
--- /dev/null
+++ b/docs/template_plugin/tests/functional/op_reference/random_uniform.cpp
@@ -0,0 +1,205 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include <gtest/gtest.h>
+
+#include <vector>
+
+#include "base_reference_test.hpp"
+#include "ngraph/opsets/opset8.hpp"
+#include "ngraph/util.hpp"
+
+using namespace ngraph;
+
+namespace reference_tests {
+namespace {
+
+struct RandomUniformParams {
+    RandomUniformParams(const std::vector<int64_t>& paramOutShape,
+                        const Tensor& paramMinValue,
+                        const Tensor& paramMaxValue,
+                        ngraph::element::Type paramOutType,
+                        int64_t paramGlobalSeed,
+                        int64_t paramOpSeed,
+                        const Tensor& paramExpected,
+                        const std::string& test_name)
+        : out_shape(paramOutShape),
+          min_val(paramMinValue),
+          max_val(paramMaxValue),
+          out_type(paramOutType),
+          global_seed(paramGlobalSeed),
+          op_seed(paramOpSeed),
+          expected(paramExpected),
+          test_case_name(test_name) {}
+    std::vector<int64_t> out_shape;
+    Tensor min_val;
+    Tensor max_val;
+    ngraph::element::Type out_type;
+    int64_t global_seed;
+    int64_t op_seed;
+    Tensor expected;
+    std::string test_case_name;
+};
+
+class ReferenceRandomUniformLayerTest : public testing::TestWithParam<RandomUniformParams>, public CommonReferenceTest {
+public:
+    void SetUp() override {
+        auto params = GetParam();
+        function = CreateFunction(params.out_shape,
+                                  params.min_val,
+                                  params.max_val,
+                                  params.out_type,
+                                  params.global_seed,
+                                  params.op_seed);
+        inputData = {params.min_val.data, params.max_val.data};
+        refOutData = {params.expected.data};
+    }
+    static std::string getTestCaseName(const testing::TestParamInfo<RandomUniformParams>& obj) {
+        auto param = obj.param;
+        return param.test_case_name;
+    }
+
+private:
+    static std::shared_ptr<Function> CreateFunction(const std::vector<int64_t>& out_shape,
+                                                    const Tensor& min_val,
+                                                    const Tensor& max_val,
+                                                    const ngraph::element::Type& out_type,
+                                                    int64_t global_seed,
+                                                    int64_t op_seed) {
+        const auto min_val_param = std::make_shared<opset8::Parameter>(min_val.type, min_val.shape);
+        const auto max_val_param = std::make_shared<opset8::Parameter>(max_val.type, max_val.shape);
+        auto out_shape_ = std::make_shared<opset8::Constant>(element::i64, Shape{out_shape.size()}, out_shape);
+
+        return std::make_shared<Function>(NodeVector{std::make_shared<opset8::RandomUniform>(out_shape_,
+                                                                                             min_val_param,
+                                                                                             max_val_param,
+                                                                                             out_type,
+                                                                                             global_seed,
+                                                                                             op_seed)},
+                                          ParameterVector{min_val_param, max_val_param});
+    }
+};
+
+TEST_P(ReferenceRandomUniformLayerTest, RandomUniformWithHardcodedRefs) {
+    Exec();
+}
+
+}  // namespace
+
+// Reference values for the following tests are obtained from single layer TensorFlow model with tf.random.uniform().
+INSTANTIATE_TEST_SUITE_P(
+    smoke_RandomUniform_With_Hardcoded_Refs,
+    ReferenceRandomUniformLayerTest,
+    ::testing::Values(
+        RandomUniformParams(std::vector<int64_t>{3, 2, 4},
+                            Tensor{{1}, element::f32, std::vector<float>{0}},
+                            Tensor{{1}, element::f32, std::vector<float>{1}},
+                            element::Type_t::f32,
+                            150,
+                            10,
+                            Tensor{{3, 2, 4},
+                                   element::f32,
+                                   std::vector<float>{0.70112360, 0.30539632, 0.93931055, 0.94560349, 0.11694777,
+                                                      0.50770056, 0.51971972, 0.22727466, 0.99137402, 0.35519040,
+                                                      0.82692313, 0.59864855, 0.31364107, 0.57481313, 0.41399086,
+                                                      0.96308255, 0.37140799, 0.85253167, 0.09358585, 0.08200955,
+                                                      0.23655081, 0.81056309, 0.74226606, 0.76106691}},
+                            "float32_default_min_max"),
+        RandomUniformParams(std::vector<int64_t>{3, 2, 4},
+                            Tensor{{1}, element::f16, std::vector<float16>{0}},
+                            Tensor{{1}, element::f16, std::vector<float16>{1}},
+                            element::Type_t::f16,
+                            150,
+                            10,
+                            Tensor{{3, 2, 4},
+                                   element::f16,
+                                   std::vector<float16>{0.60449219, 0.80664062, 0.83203125, 0.38378906, 0.03613281,
+                                                        0.08300781, 0.54394531, 0.83398438, 0.33593750, 0.71972656,
+                                                        0.15429688, 0.12890625, 0.34765625, 0.86914062, 0.41308594,
+                                                        0.57226562, 0.57421875, 0.93945312, 0.65527344, 0.82226562,
+                                                        0.82421875, 0.13281250, 0.64355469, 0.66015625}},
+                            "float16_default_min_max"),
+        RandomUniformParams(std::vector<int64_t>{3, 2, 4},
+                            Tensor{{1}, element::f32, std::vector<float>{-650}},
+                            Tensor{{1}, element::f32, std::vector<float>{450}},
+                            element::Type_t::f32,
+                            150,
+                            10,
+                            Tensor{{3, 2, 4},
+                                   element::f32,
+                                   std::vector<float>{121.23596191,  -314.06405640, 383.24157715,  390.16381836,
+                                                      -521.35742188, -91.52935791,  -78.30828857,  -399.99786377,
+                                                      440.51147461,  -259.29055786, 259.61541748,  8.51342773,
+                                                      -304.99481201, -17.70556641,  -194.61004639, 409.39074707,
+                                                      -241.45120239, 287.78485107,  -547.05554199, -559.78948975,
+                                                      -389.79409790, 241.61938477,  166.49267578,  187.17358398}},
+                            "float32_non_default_min_max"),
+        RandomUniformParams(std::vector<int64_t>{3, 2, 4},
+                            Tensor{{1}, element::f16, std::vector<float16>{-1.5}},
+                            Tensor{{1}, element::f16, std::vector<float16>{-1.0}},
+                            element::Type_t::f16,
+                            150,
+                            10,
+                            Tensor{{3, 2, 4},
+                                   element::f16,
+                                   std::vector<float16>{-1.19726562, -1.09667969, -1.08398438, -1.30859375, -1.48242188,
+                                                        -1.45898438, -1.22851562, -1.08300781, -1.33203125, -1.14062500,
+                                                        -1.42285156, -1.43554688, -1.32617188, -1.06542969, -1.29296875,
+                                                        -1.21386719, -1.21289062, -1.03027344, -1.17187500, -1.08886719,
+                                                        -1.08789062, -1.43359375, -1.17773438, -1.16992188}},
+                            "float16_non_default_min_max"),
+        RandomUniformParams(std::vector<int64_t>{2, 3, 4},
+                            Tensor{{1}, element::i32, std::vector<int32_t>{-100}},
+                            Tensor{{1}, element::i32, std::vector<int32_t>{50}},
+                            element::Type_t::i32,
+                            100,
+                            350,
+                            Tensor{{2, 3, 4},
+                                   element::i32,
+                                   std::vector<int32_t>{
+                                       22, -56, -33, -89, -98, -33, -3,  -48, -82, 5,  -66, 21,
+                                       29, -42, -73, -37, 3,   36,  -35, 20,  -11, -8, -78, 47,
+                                   }},
+                            "int32"),
+        RandomUniformParams(std::vector<int64_t>{5, 4, 3},
+                            Tensor{{1}, element::i64, std::vector<int64_t>{-2600}},
+                            Tensor{{1}, element::i64, std::vector<int64_t>{3700}},
+                            element::Type_t::i64,
+                            755,
+                            951,
+                            Tensor{{5, 4, 3},
+                                   element::i64,
+                                   std::vector<int64_t>{
+                                       2116, -1581, 2559,  -339,  -1660, 519,   90,   2027,  -210, 3330, 1831,  -1737,
+                                       2683, 2661,  3473,  1220,  3534,  -2384, 2199, 1935,  499,  2861, 2743,  3223,
+                                       -531, -836,  -65,   3435,  632,   1765,  2613, 1891,  1698, 3069, 169,   -792,
+                                       -32,  2976,  -1552, -2588, 3327,  -1756, 2637, -1084, 3567, -778, -1465, 2967,
+                                       1242, 2672,  -1585, -2271, 3536,  -1502, 400,  2241,  3126, 908,  1073,  -2110}},
+                            "int64"),
+        RandomUniformParams(std::vector<int64_t>{7, 3},
+                            Tensor{{1}, element::bf16, std::vector<bfloat16>{0}},
+                            Tensor{{1}, element::bf16, std::vector<bfloat16>{1}},
+                            element::Type_t::bf16,
+                            4978,
+                            5164,
+                            Tensor{{7, 3},
+                                   element::bf16,
+                                   std::vector<bfloat16>{0.8984375, 0.84375,   0.1640625, 0.1875,   0.46875,  0.6875,
+                                                         0.5234375, 0.3046875, 0.9140625, 0.453125, 0.953125, 0.328125,
+                                                         0.359375,  0.1875,    0.9453125, 0.390625, 0.21875,  0.9921875,
+                                                         0.8203125, 0.453125,  0.875}},
+                            "bfloat16_default_min_max"),
+        RandomUniformParams(std::vector<int64_t>{7, 3},
+                            Tensor{{1}, element::bf16, std::vector<bfloat16>{-150}},
+                            Tensor{{1}, element::bf16, std::vector<bfloat16>{200}},
+                            element::Type_t::bf16,
+                            4978,
+                            5164,
+                            Tensor{{7, 3},
+                                   element::bf16,
+                                   std::vector<bfloat16>{164, 146, -92.5, -84.5, 14,  90,    33,  -43.5, 170, 8,  182,
+                                                         -35, -24, -84.5, 180,   -14, -73.5, 198, 138,   8,   156}},
+                            "bfloat16_non_default_min_max")),
+    ReferenceRandomUniformLayerTest::getTestCaseName);
+}  // namespace reference_tests
diff --git a/inference-engine/src/transformations/include/transformations/common_optimizations/disable_random_uniform_constant_folding.hpp b/inference-engine/src/transformations/include/transformations/common_optimizations/disable_random_uniform_constant_folding.hpp
new file mode 100644
index 00000000000000..e9c59587eb67de
--- /dev/null
+++ b/inference-engine/src/transformations/include/transformations/common_optimizations/disable_random_uniform_constant_folding.hpp
@@ -0,0 +1,27 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <memory>
+#include <ngraph/pass/graph_rewrite.hpp>
+
+namespace ngraph {
+namespace pass {
+
+class DisableRandomUniformConstantFolding;
+
+}  // namespace pass
+}  // namespace ngraph
+
+/**
+ * @ingroup ie_transformation_common_api
+ * @brief Disables ConstantFolding for RandomUniform operation. It is required as RandomUniform
+ * should generate new sequence each run.
+ */
+class ngraph::pass::DisableRandomUniformConstantFolding : public ngraph::pass::MatcherPass {
+public:
+    NGRAPH_RTTI_DECLARATION;
+    DisableRandomUniformConstantFolding();
+};
diff --git a/inference-engine/src/transformations/src/transformations/common_optimizations/common_optimizations.cpp b/inference-engine/src/transformations/src/transformations/common_optimizations/common_optimizations.cpp
index 253c4f113ab073..4e176543504b20 100644
--- a/inference-engine/src/transformations/src/transformations/common_optimizations/common_optimizations.cpp
+++ b/inference-engine/src/transformations/src/transformations/common_optimizations/common_optimizations.cpp
@@ -27,6 +27,7 @@
 #include "transformations/common_optimizations/hswish_fusion.hpp"
 #include "transformations/common_optimizations/convert_quantize_dequantize.hpp"
 #include "transformations/common_optimizations/relu_fake_quantize_fusion.hpp"
+#include "transformations/common_optimizations/disable_random_uniform_constant_folding.hpp"
 #include "transformations/common_optimizations/add_fake_quantize_fusion.hpp"
 #include "transformations/common_optimizations/mul_fake_quantize_fusion.hpp"
 #include "transformations/common_optimizations/clamp_fusion.hpp"
@@ -88,6 +89,7 @@ bool ngraph::pass::CommonOptimizations::run_on_function(std::shared_ptr<ngraph::
 
     // This pass must be called first in pipeline
     manager.register_pass<ngraph::pass::InitNodeInfo>();
+    manager.register_pass<ngraph::pass::DisableRandomUniformConstantFolding>();
     manager.register_pass<ngraph::pass::SimplifyShapeOfSubGraph>();
     manager.register_pass<ngraph::pass::ConstantFolding>();
     manager.register_pass<ngraph::pass::RemoveFilteringBoxesBySize>(); // Resolves dynamism (replaces NonZero), CF needed
diff --git a/inference-engine/src/transformations/src/transformations/common_optimizations/disable_random_uniform_constant_folding.cpp b/inference-engine/src/transformations/src/transformations/common_optimizations/disable_random_uniform_constant_folding.cpp
new file mode 100644
index 00000000000000..7c93745d3fa449
--- /dev/null
+++ b/inference-engine/src/transformations/src/transformations/common_optimizations/disable_random_uniform_constant_folding.cpp
@@ -0,0 +1,24 @@
+// Copyright (C) 2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "transformations/common_optimizations/disable_random_uniform_constant_folding.hpp"
+
+#include <memory>
+#include <ngraph/opsets/opset8.hpp>
+#include <ngraph/pattern/op/wrap_type.hpp>
+#include <transformations/rt_info/disable_constant_folding.hpp>
+
+NGRAPH_RTTI_DEFINITION(ngraph::pass::DisableRandomUniformConstantFolding, "DisableRandomUniformConstantFolding", 0);
+
+ngraph::pass::DisableRandomUniformConstantFolding::DisableRandomUniformConstantFolding() {
+    auto random_uniform = pattern::wrap_type<opset8::RandomUniform>();
+
+    ngraph::matcher_pass_callback callback = [=](pattern::Matcher& m) {
+        disable_constant_folding(m.get_match_root());
+        return true;
+    };
+
+    auto m = std::make_shared<ngraph::pattern::Matcher>(random_uniform, "DisableRandomUniformConstantFolding");
+    this->register_matcher(m, callback);
+}
diff --git a/ngraph/core/include/ngraph/op/random_uniform.hpp b/ngraph/core/include/ngraph/op/random_uniform.hpp
index 242294cc748d89..f20ddecccab431 100644
--- a/ngraph/core/include/ngraph/op/random_uniform.hpp
+++ b/ngraph/core/include/ngraph/op/random_uniform.hpp
@@ -30,8 +30,8 @@ class NGRAPH_API RandomUniform : public Op {
                   const Output<Node>& min_val,
                   const Output<Node>& max_val,
                   const ngraph::element::Type& out_type,
-                  uint64_t global_seed,
-                  uint64_t op_seed);
+                  uint64_t global_seed = 0,
+                  uint64_t op_seed = 0);
 
     void validate_and_infer_types() override;
 
@@ -39,6 +39,11 @@ class NGRAPH_API RandomUniform : public Op {
 
     std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
 
+    /// \return Turns off constant folding for RandomUniform operation.
+    bool constant_fold(OutputVector& output_values, const OutputVector& inputs_values) override {
+        return false;
+    }
+
     /// \return The output tensor type.
     const ngraph::element::Type& get_out_type() const {
         return m_output_type;
@@ -63,10 +68,17 @@ class NGRAPH_API RandomUniform : public Op {
         m_op_seed = seed2;
     }
 
+    bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override;
+
+    bool has_evaluate() const override;
+
 protected:
     ngraph::element::Type m_output_type;
     uint64_t m_global_seed;
     uint64_t m_op_seed;
+
+    mutable std::mutex m_state_mutex;
+    mutable std::pair<uint64_t, uint64_t> m_state;
 };
 }  // namespace v8
 }  // namespace op
diff --git a/ngraph/core/reference/include/ngraph/runtime/reference/random_uniform.hpp b/ngraph/core/reference/include/ngraph/runtime/reference/random_uniform.hpp
new file mode 100644
index 00000000000000..43df6529de33a6
--- /dev/null
+++ b/ngraph/core/reference/include/ngraph/runtime/reference/random_uniform.hpp
@@ -0,0 +1,39 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <ctime>
+#include <ngraph/type/element_type.hpp>
+
+#include "ngraph/shape.hpp"
+
+namespace ngraph {
+namespace runtime {
+namespace reference {
+std::pair<uint64_t, uint64_t> random_uniform(const uint64_t* out_shape,
+                                             const char* min_val,
+                                             const char* max_val,
+                                             char* out,
+                                             const Shape& out_shape_shape,
+                                             const ngraph::element::Type& elem_type,
+                                             uint64_t seed,
+                                             uint64_t seed2,
+                                             std::pair<uint64_t, uint64_t> prev_state);
+
+// Following const values are taken from the original paper:
+// https://www.thesalmons.org/john/random123/papers/random123sc11.pdf
+const uint32_t crush_resistance_const_lower_value = 0x9E3779B9;
+const uint32_t crush_resistance_const_upper_value = 0xBB67AE85;
+const uint64_t statistic_maximizing_multiplier_n = 0xD2511F53;
+const uint64_t statistic_maximizing_multiplier_counter = 0xCD9E8D57;
+const size_t rounds_number = 10;
+
+// Determines how many sequence elements of RNG sequence are skipped between runs.
+// Can be any positive value, 256 is chosen for parity with Tensorflow.
+const uint64_t skip_const = 256;
+
+}  // namespace reference
+}  // namespace runtime
+}  // namespace ngraph
diff --git a/ngraph/core/reference/src/runtime/reference/random_uniform.cpp b/ngraph/core/reference/src/runtime/reference/random_uniform.cpp
new file mode 100644
index 00000000000000..6e6f1f7c95d06a
--- /dev/null
+++ b/ngraph/core/reference/src/runtime/reference/random_uniform.cpp
@@ -0,0 +1,333 @@
+// Copyright (C) 2018-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "ngraph/runtime/reference/random_uniform.hpp"
+
+#include <ctime>
+
+#include "ngraph/shape.hpp"
+
+namespace ngraph {
+namespace runtime {
+namespace reference {
+
+// Splits uint64 value into two uint32 values with right and left part of original value.
+std::pair<uint32_t, uint32_t> split_high_low(uint64_t value) {
+    uint32_t low = static_cast<uint32_t>(value);
+    uint32_t high = static_cast<uint32_t>(value >> 32);
+    return {low, high};
+}
+
+// Concatenates two uint32 values into single uint64 values.
+uint64_t unite_high_low(uint32_t high, uint32_t low) {
+    return (static_cast<uint64_t>(high) << 32) + low;
+}
+
+// Runs single "round" of Philox algorithm.
+void calculate_round(uint64_t key, uint64_t& counter, uint64_t& n) {
+    // Split key, counter and n into two uint32 values.
+    auto counter_lr = split_high_low(counter);
+    auto key_lr = split_high_low(key);
+    auto n_lr = split_high_low(n);
+
+    // Each round performs following updating for n and counter:
+    // left uint32 part = mullo(R, M)
+    // right uint32 part  = mulhi(R, M) xor k xor L
+    // mulhi(a, b) = floor((a * b) / 2^32)
+    // mullo(a, b) = (a * b) mod 2^32,
+    // where M - statistic_maximizing_multiplier const
+    auto prod0 = split_high_low(statistic_maximizing_multiplier_n * n_lr.first);
+    auto prod1 = split_high_low(statistic_maximizing_multiplier_counter * counter_lr.first);
+    n_lr.first = prod1.second ^ n_lr.second ^ key_lr.first;
+    n_lr.second = prod1.first;
+    counter_lr.first = prod0.second ^ counter_lr.second ^ key_lr.second;
+    counter_lr.second = prod0.first;
+
+    // Unite counter and n into uint64 values.
+    counter = unite_high_low(counter_lr.second, counter_lr.first);
+    n = unite_high_low(n_lr.second, n_lr.first);
+}
+
+// Increases key value.
+void raise_key(uint64_t& key) {
+    auto key_lr = split_high_low(key);
+    key_lr.first += crush_resistance_const_lower_value;
+    key_lr.second += crush_resistance_const_upper_value;
+    key = unite_high_low(key_lr.second, key_lr.first);
+}
+
+// Helper function for converting uint32 values to float32. Sets fractional part of
+// floating value with bits from uint32 value. Resulting value is in interval [0,1).
+float uint32_to_float(uint32_t x) {
+    // float32 is formatted as follows: sign(1 bit) exponent(8 bits) mantissa(23 bits). The value is interpreted
+    // The value is interpreted using following formula:
+    // (-1)^sign * 1, mantissa * 2 ^ (exponent - 127)
+    // Here we set the following values:
+    // sign = 0
+    // exponent = 127, for obtaining a zero exponent.
+    // mantissa = 23 right bits from generated uint32 random value.
+
+    uint32_t x_uint32 = (static_cast<uint32_t>(127) << 23) | (x & 0x7fffffu);
+
+    float x_float;
+    memcpy(&x_float, &x_uint32, sizeof(x_uint32));
+    return x_float - 1.0f;
+}
+
+// Helper function for converting uint32 values to float16.Sets fractional part of
+// floating value with bits from uint32 value. Resulting value is in interval [0,1).
+float16 uint32_to_float16(uint32_t x) {
+    // float16 is formatted as follows: sign(1 bit) exponent(5 bits) mantissa(10 bits). The value is interpreted
+    // The value is interpreted using following formula:
+    // (-1)^sign * 1, mantissa * 2 ^ (exponent - 15)
+    // Here we set the following values:
+    // sign = 0
+    // exponent = 15, for obtaining a zero exponent.
+    // mantissa = 10 right bits from generated uint32 random value.
+
+    uint16_t x_uint16 = static_cast<uint16_t>(x);
+    x_uint16 = (static_cast<uint16_t>(15) << 10) | (x_uint16 & 0x3ffu);
+
+    float16 x_float16;
+    memcpy(&x_float16, &x_uint16, sizeof(x_uint16));
+    return x_float16 - static_cast<float16>(1);
+}
+
+// Helper function for converting uint32 values to double. Sets fractional part of
+// floating double with bits from uint32 values. Resulting value is in interval [0,1).
+double uint32_to_double(uint32_t x1, uint32_t x2) {
+    // float64 is formatted as follows: sign(1 bit) exponent(11 bits) mantissa(52 bits). The value is interpreted
+    // The value is interpreted using following formula:
+    // (-1)^sign * 1, mantissa * 2 ^ (exponent - 1023)
+    // Here we set the following values:
+    // sign = 0
+    // exponent = 1023, for obtaining a zero exponent.
+    // mantissa = 52 right bits from two concatenated uint32 values from random integer generator.
+
+    uint64_t significant = ((static_cast<uint64_t>(x1) & 0xfffffu) << 32) | static_cast<uint64_t>(x2);
+    uint64_t x_uint64 = ((static_cast<uint64_t>(1023) << 52) | significant);
+
+    double x_double;
+    memcpy(&x_double, &x_uint64, sizeof(x_uint64));
+    return x_double - 1.0;
+}
+
+// Helper function for converting uint32 values to bfloat16. Sets fractional part of
+// floating value with bits from uint32 value. Resulting value is in interval [0,1).
+bfloat16 uint32_to_bfloat16(uint32_t x) {
+    // bfloat16 is formatted as follows: sign(1 bit) exponent(8 bits) mantissa(7 bits). The value is interpreted
+    // The value is interpreted using following formula:
+    // (-1)^sign * 1, mantissa * 2 ^ (exponent - 127)
+    // Here we set the following values:
+    // sign = 0
+    // exponent = 127, for obtaining a zero exponent.
+    // mantissa = 7 right bits from generated uint32 random value.
+
+    uint16_t x_uint16 = static_cast<uint16_t>(x);
+    x_uint16 = (static_cast<uint16_t>(127) << 7) | (x_uint16 & 0x7fu);
+
+    bfloat16 x_bfloat16;
+    memcpy(&x_bfloat16, &x_uint16, sizeof(x_uint16));
+    return x_bfloat16 - static_cast<bfloat16>(1);
+}
+
+// Runs Philox algorithm.
+void run_philox(uint64_t key, uint64_t counter, uint64_t n, size_t n_rounds, std::vector<uint32_t>& res) {
+    for (size_t i = 0; i < n_rounds; i++) {
+        calculate_round(key, counter, n);
+        if (i < n_rounds - 1)
+            raise_key(key);
+    }
+    auto res1 = split_high_low(n);
+    auto res2 = split_high_low(counter);
+    res[0] = res1.first;
+    res[1] = res1.second;
+    res[2] = res2.first;
+    res[3] = res2.second;
+}
+
+// Converts uint32 values to destination type and normalizes to required range
+template <typename T>
+void convert_to_output_type(const std::vector<uint32_t>& res,
+                            size_t step,
+                            const ngraph::element::Type& elem_type,
+                            const char* min_val,
+                            const char* max_val,
+                            char* out,
+                            size_t k,
+                            size_t elem_count,
+                            T (*convert_single_input)(uint32_t) = nullptr,
+                            T (*convert_two_inputs)(uint32_t, uint32_t, T, T) = nullptr,
+                            T (*mod_func)(uint32_t, T, T) = nullptr) {
+    // Get min and max values
+    T mn[1];
+    T mx[1];
+    memcpy(mn, min_val, elem_type.size());
+    memcpy(mx, max_val, elem_type.size());
+
+    std::vector<T> res_out_type(step);
+    if (elem_type.size() > 4) {
+        // Each element of resulting sequence is formed using two uint32 values
+        res_out_type[0] = convert_two_inputs(res[0], res[1], mn[0], mx[0]);
+        res_out_type[1] = convert_two_inputs(res[2], res[3], mn[0], mx[0]);
+    } else {
+        // Each element of resulting sequence is formed using single uint32 value
+        std::transform(res.data(),
+                       res.data() + step,
+                       res_out_type.data(),
+                       [&mn, &mx, &convert_single_input, &mod_func](uint32_t elem) {
+                           if (convert_single_input != nullptr) {
+                               return convert_single_input(elem) * (mx[0] - mn[0]) + mn[0];
+                           } else {
+                               return mod_func(elem, mn[0], mx[0]);
+                           }
+                       });
+    }
+
+    memcpy(out + k * elem_type.size(), res_out_type.data(), std::min(step, elem_count - k) * elem_type.size());
+}
+
+// Implementation of RandomUniform that uses Philox algorithm as inner random unsigned integer generator.
+std::pair<uint64_t, uint64_t> random_uniform(const uint64_t* out_shape,
+                                             const char* min_val,
+                                             const char* max_val,
+                                             char* out,
+                                             const Shape& out_shape_shape,
+                                             const ngraph::element::Type& elem_type,
+                                             uint64_t seed,
+                                             uint64_t seed2,
+                                             std::pair<uint64_t, uint64_t> prev_state) {
+    // When both seed values are equal to zero RandomUniform should generate non-deterministic sequence.
+    // Implementation in plugins may differ for this case.
+    if (seed == 0 && seed2 == 0) {
+        std::srand(std::time(nullptr));
+        seed = std::rand();
+    }
+
+    // Get previous counter state
+    uint64_t n_state = prev_state.first;
+    uint64_t counter_state = prev_state.second;
+
+    // Initialize Philox key and counters
+    uint64_t key = seed;
+    uint64_t counter = counter_state > 0 ? counter_state : seed2;
+    uint64_t n = n_state;
+
+    // Calculate total element count for generation
+    size_t shape_count = shape_size(out_shape_shape);
+    size_t elem_count = 1;
+    for (size_t i = 0; i < shape_count; i++) {
+        elem_count *= out_shape[i];
+    }
+
+    // Philox algorithm returns 4 elements of RNG sequence per each invocation
+    const size_t philox_output_size = 4;
+
+    // Each run of Philox algorithm generates 4 uint32 values.
+    // If output_type is int32, f32, bf16, or f16 each value is converted to
+    // corresponding type so we have 4 result values. For f64 and i64 we use
+    // a pair of values for conversion, so we have 2 result values.
+    // Step indicates how many values we generate in one iteration.
+    const size_t step = elem_type.size() > 4 ? 2 : 4;
+
+    for (size_t k = 0; k < elem_count; k += step) {
+        // generate 4 random uint32 values using Philox algorithm
+        std::vector<uint32_t> res(philox_output_size);
+        run_philox(key, counter, n, rounds_number, res);
+
+        // convert values to corresponding output_type
+        switch (elem_type) {
+        case ngraph::element::Type_t::f32: {
+            convert_to_output_type<float>(res, step, elem_type, min_val, max_val, out, k, elem_count, uint32_to_float);
+            break;
+        }
+        case ngraph::element::Type_t::f16: {
+            convert_to_output_type<float16>(res,
+                                            step,
+                                            elem_type,
+                                            min_val,
+                                            max_val,
+                                            out,
+                                            k,
+                                            elem_count,
+                                            uint32_to_float16);
+            break;
+        }
+        case ngraph::element::Type_t::bf16: {
+            convert_to_output_type<bfloat16>(res,
+                                             step,
+                                             elem_type,
+                                             min_val,
+                                             max_val,
+                                             out,
+                                             k,
+                                             elem_count,
+                                             uint32_to_bfloat16);
+            break;
+        }
+        case ngraph::element::Type_t::f64: {
+            convert_to_output_type<double>(res,
+                                           step,
+                                           elem_type,
+                                           min_val,
+                                           max_val,
+                                           out,
+                                           k,
+                                           elem_count,
+                                           nullptr,
+                                           [](uint32_t a, uint32_t b, double mn, double mx) {
+                                               return uint32_to_double(a, b) * (mx - mn) + mn;
+                                           });
+            break;
+        }
+        case ngraph::element::Type_t::i32: {
+            convert_to_output_type<int>(res,
+                                        step,
+                                        elem_type,
+                                        min_val,
+                                        max_val,
+                                        out,
+                                        k,
+                                        elem_count,
+                                        nullptr,
+                                        nullptr,
+                                        [](uint32_t x, int mn, int mx) {
+                                            return static_cast<int>(x % (mx - mn) + mn);
+                                        });
+            break;
+        }
+        case ngraph::element::Type_t::i64: {
+            convert_to_output_type<int64_t>(res,
+                                            step,
+                                            elem_type,
+                                            min_val,
+                                            max_val,
+                                            out,
+                                            k,
+                                            elem_count,
+                                            nullptr,
+                                            [](uint32_t a, uint32_t b, int64_t mn, int64_t mx) {
+                                                return static_cast<int64_t>(unite_high_low(b, a) % (mx - mn) + mn);
+                                            });
+            break;
+        }
+        default:
+            throw ngraph_error("Unsupported type of RandomUniform: " + elem_type.get_type_name());
+        }
+        if (++n == 0)
+            ++counter;
+    }
+
+    // Calculate counter values for next RandomUniform run
+    uint64_t skip_count = elem_count * skip_const;
+    n_state += skip_count;
+    if (n_state < skip_count)
+        counter_state++;
+
+    return {n_state, counter_state};
+}
+
+}  // namespace reference
+}  // namespace runtime
+}  // namespace ngraph
diff --git a/ngraph/core/src/op/random_uniform.cpp b/ngraph/core/src/op/random_uniform.cpp
index 9b0b6cdc9fcb93..90a356cdef350b 100644
--- a/ngraph/core/src/op/random_uniform.cpp
+++ b/ngraph/core/src/op/random_uniform.cpp
@@ -7,6 +7,7 @@
 #include <ngraph/validation_util.hpp>
 
 #include "itt.hpp"
+#include "ngraph/runtime/reference/random_uniform.hpp"
 
 using namespace std;
 using namespace ngraph;
@@ -116,7 +117,7 @@ bool op::v8::RandomUniform::visit_attributes(AttributeVisitor& visitor) {
 }
 
 shared_ptr<Node> op::v8::RandomUniform::clone_with_new_inputs(const OutputVector& new_args) const {
-    NGRAPH_OP_SCOPE(v8_Roll_clone_with_new_inputs);
+    NGRAPH_OP_SCOPE(v8_RandomUniform_clone_with_new_inputs);
     check_new_args_count(this, new_args);
     return make_shared<v8::RandomUniform>(new_args[0],
                                           new_args[1],
@@ -125,3 +126,94 @@ shared_ptr<Node> op::v8::RandomUniform::clone_with_new_inputs(const OutputVector
                                           m_global_seed,
                                           m_op_seed);
 }
+
+bool op::v8::RandomUniform::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const {
+    NGRAPH_OP_SCOPE(v8_RandomUniform_evaluate);
+    const uint64_t* out_shape;
+    std::vector<uint64_t> out_shape_uint64(shape_size(inputs[0]->get_shape()));
+
+    if (inputs[0]->get_element_type() == element::Type_t::u64) {
+        out_shape = inputs[0]->get_data_ptr<const uint64_t>();
+    } else if (inputs[0]->get_element_type() == element::Type_t::i32) {
+        auto out_shape_i32 = inputs[0]->get_data_ptr<const int32_t>();
+        std::transform(out_shape_i32,
+                       out_shape_i32 + shape_size(inputs[0]->get_shape()),
+                       out_shape_uint64.begin(),
+                       [](const int32_t& elem) {
+                           return static_cast<uint64_t>(elem);
+                       });
+        out_shape = out_shape_uint64.data();
+    } else if (inputs[0]->get_element_type() == element::Type_t::i64) {
+        auto out_shape_i64 = inputs[0]->get_data_ptr<const int64_t>();
+        std::transform(out_shape_i64,
+                       out_shape_i64 + shape_size(inputs[0]->get_shape()),
+                       out_shape_uint64.begin(),
+                       [](const int64_t& elem) {
+                           return static_cast<uint64_t>(elem);
+                       });
+        out_shape = out_shape_uint64.data();
+    } else {
+        throw ngraph_error("Unsupported type of out shape in RandomUniform operation: " +
+                           inputs[0]->get_element_type().get_type_name());
+    }
+
+    element::Type_t t_out = get_out_type();
+    char* out;
+    switch (t_out) {
+    case element::Type_t::i32:
+        out = (char*)outputs[0]->get_data_ptr<const int32_t>();
+        break;
+    case element::Type_t::i64:
+        out = (char*)outputs[0]->get_data_ptr<const int64_t>();
+        break;
+    case element::Type_t::f16:
+        out = (char*)outputs[0]->get_data_ptr<const float16>();
+        break;
+    case element::Type_t::bf16:
+        out = (char*)outputs[0]->get_data_ptr<const bfloat16>();
+        break;
+    case element::Type_t::f32:
+        out = (char*)outputs[0]->get_data_ptr<const float>();
+        break;
+    case element::Type_t::f64:
+        out = (char*)outputs[0]->get_data_ptr<const double>();
+        break;
+    default:
+        throw ngraph_error("Unsupported type of RandomUniform: " + get_out_type().get_type_name());
+    }
+
+    auto state = runtime::reference::random_uniform(out_shape,
+                                                    inputs[1]->get_data_ptr<const char>(),
+                                                    inputs[2]->get_data_ptr<const char>(),
+                                                    out,
+                                                    inputs[0]->get_shape(),
+                                                    get_out_type(),
+                                                    get_global_seed(),
+                                                    get_op_seed(),
+                                                    m_state);
+
+    // Update RandomUniform state
+    std::lock_guard<std::mutex> guard(m_state_mutex);
+    m_state = state;
+    return true;
+}
+
+bool op::v8::RandomUniform::has_evaluate() const {
+    NGRAPH_OP_SCOPE(v8_RandomUniform_has_evaluate);
+    if (get_input_element_type(0) != ngraph::element::i32 && get_input_element_type(0) != ngraph::element::i64) {
+        return false;
+    }
+
+    switch (get_out_type()) {
+    case ngraph::element::i32:
+    case ngraph::element::i64:
+    case ngraph::element::f16:
+    case ngraph::element::bf16:
+    case ngraph::element::f32:
+    case ngraph::element::f64:
+        return true;
+    default:
+        break;
+    }
+    return false;
+}