From f52008153ca1b92571d9a3cf52af0497e68d2d4d Mon Sep 17 00:00:00 2001 From: Changqing Li Date: Mon, 20 Nov 2023 13:52:56 +0800 Subject: [PATCH] [UT] Add layer_norm unit test. (#61) --- include/dtype.h | 1 + include/layers_norm.h | 8 +- src/kernels/layernorm_kernels.cpp | 19 +++-- src/kernels/layernorm_kernels.h | 9 ++- src/layers/layer_norm.cpp | 10 +-- src/layers/rms_norm.cpp | 8 +- tests/ut/CMakeLists.txt | 1 + tests/ut/layers_norm_test.cpp | 117 ++++++++++++++++++++++++++++++ 8 files changed, 150 insertions(+), 23 deletions(-) create mode 100644 tests/ut/layers_norm_test.cpp diff --git a/include/dtype.h b/include/dtype.h index 48bed7f8..ae5653d6 100644 --- a/include/dtype.h +++ b/include/dtype.h @@ -16,6 +16,7 @@ namespace xft { enum class DataType { + fp32, bf16, fp16, int8, diff --git a/include/layers_norm.h b/include/layers_norm.h index 75d1778e..921ea8a9 100644 --- a/include/layers_norm.h +++ b/include/layers_norm.h @@ -18,8 +18,8 @@ namespace xft { -void invokeLayerNorm(DataType dt, void *output, const void *input, const void *gamma, const void *beta, const int rows, - const int size, int iStride = -1, int oStride = -1, const float epsilon = 1e-5); +void invokeLayerNorm(DataType dt, void *output, const void *input, const void *gamma, const void *beta, int rows, + int cols, int iStride = -1, int oStride = -1, float epsilon = 1e-5); void invokeRmsNorm(DataType dt, void *output, const void *input, const void *weight, int rows, int cols, int iStride = -1, int oStride = -1, float epsilon = 1e-6); @@ -30,7 +30,7 @@ class LayerNorm { LayerNorm(); ~LayerNorm(); - void setWeight(const float *gamma, const float *beta, int size); + void setWeight(const float *gamma, const float *beta, int cols); // input and output are in shape of (rows, normSize) // TODO: column-wise parallel @@ -49,7 +49,7 @@ class RmsNorm { RmsNorm(); ~RmsNorm(); - void setWeight(const float *w, const float *, int size); + void setWeight(const float *w, const float *, int cols); // input and output are in shape of (rows, normSize) void forward(const float *input, float *output, int rows, int iStride = -1, int oStride = -1, float epsilon = 1e-6); diff --git a/src/kernels/layernorm_kernels.cpp b/src/kernels/layernorm_kernels.cpp index 630419ca..623c887f 100644 --- a/src/kernels/layernorm_kernels.cpp +++ b/src/kernels/layernorm_kernels.cpp @@ -23,9 +23,10 @@ namespace xft { -void invokeLayerNorm(float *output, const float *input, const float *gamma, const float *beta, const int rows, - const int size, int iStride, int oStride, const float epsilon) { +void invokeLayerNorm(float *output, const float *input, const float *gamma, const float *beta, int rows, int cols, + int iStride, int oStride, float epsilon) { + int size = cols; if (iStride == -1) iStride = size; if (oStride == -1) oStride = size; @@ -78,8 +79,9 @@ void invokeLayerNorm(float *output, const float *input, const float *gamma, cons } void invokeLayerNorm(bfloat16_t *output, const bfloat16_t *input, const bfloat16_t *gamma, const bfloat16_t *beta, - const int rows, const int size, int iStride, int oStride, const float epsilon) { + int rows, int cols, int iStride, int oStride, float epsilon) { + int size = cols; if (iStride == -1) iStride = size; if (oStride == -1) oStride = size; @@ -131,11 +133,14 @@ void invokeLayerNorm(bfloat16_t *output, const bfloat16_t *input, const bfloat16 } } -void invokeLayerNorm(DataType dt, void *output, const void *input, const void *gamma, const void *beta, const int rows, - const int size, int iStride, int oStride, const float epsilon) { - if (dt == DataType::bf16) { +void invokeLayerNorm(DataType dt, void *output, const void *input, const void *gamma, const void *beta, int rows, + int cols, int iStride, int oStride, float epsilon) { + if (dt == DataType::fp32) { + invokeLayerNorm((float *)output, (const float *)input, (const float *)gamma, + (const float *)beta, rows, cols, iStride, oStride, epsilon); + } else if (dt == DataType::bf16) { invokeLayerNorm((bfloat16_t *)output, (const bfloat16_t *)input, (const bfloat16_t *)gamma, - (const bfloat16_t *)beta, rows, size, iStride, oStride, epsilon); + (const bfloat16_t *)beta, rows, cols, iStride, oStride, epsilon); } } diff --git a/src/kernels/layernorm_kernels.h b/src/kernels/layernorm_kernels.h index a6a03aee..c2851d1b 100644 --- a/src/kernels/layernorm_kernels.h +++ b/src/kernels/layernorm_kernels.h @@ -29,10 +29,13 @@ struct LayerNormWeight { const T *beta = nullptr; }; -void invokeLayerNorm(float *output, const float *input, const float *gamma, const float *beta, const int rows, - const int size, int iStride = -1, int oStride = -1, const float epsilon = 1e-5); +void invokeLayerNorm(float *output, const float *input, const float *gamma, const float *beta, int rows, + int cols, int iStride = -1, int oStride = -1, float epsilon = 1e-5); + +void invokeLayerNorm(float16_t *output, const float16_t *input, const float16_t *gamma, const float16_t *beta, + int rows, int cols, int iStride = -1, int oStride = -1, float epsilon = 1e-5); void invokeLayerNorm(bfloat16_t *output, const bfloat16_t *input, const bfloat16_t *gamma, const bfloat16_t *beta, - const int rows, const int size, int iStride = -1, int oStride = -1, const float epsilon = 1e-5); + int rows, int cols, int iStride = -1, int oStride = -1, float epsilon = 1e-5); } // namespace xft \ No newline at end of file diff --git a/src/layers/layer_norm.cpp b/src/layers/layer_norm.cpp index 26551c3a..63f896f2 100644 --- a/src/layers/layer_norm.cpp +++ b/src/layers/layer_norm.cpp @@ -33,11 +33,11 @@ LayerNorm::~LayerNorm() { if (weights) { free(weights); } } -void LayerNorm::setWeight(const float *gamma, const float *beta, int size) { - this->normSize = size; - this->weights = (float *)aligned_alloc(64, 2 * size * sizeof(float)); - memcpy(weights, gamma, size * sizeof(float)); - memcpy(weights + size, beta, size * sizeof(float)); +void LayerNorm::setWeight(const float *gamma, const float *beta, int cols) { + this->normSize = cols; + this->weights = (float *)aligned_alloc(64, 2 * cols * sizeof(float)); + memcpy(weights, gamma, cols * sizeof(float)); + memcpy(weights + cols, beta, cols * sizeof(float)); } // input and output are in shape of (rows, normSize) diff --git a/src/layers/rms_norm.cpp b/src/layers/rms_norm.cpp index 9acca4d0..381dd220 100644 --- a/src/layers/rms_norm.cpp +++ b/src/layers/rms_norm.cpp @@ -32,10 +32,10 @@ RmsNorm::~RmsNorm() { if (weight) { free(weight); } } -void RmsNorm::setWeight(const float *w, const float *, int size) { - this->normSize = size; - this->weight = (float *)aligned_alloc(64, size * sizeof(float)); - memcpy(weight, w, size * sizeof(float)); +void RmsNorm::setWeight(const float *w, const float *, int cols) { + this->normSize = cols; + this->weight = (float *)aligned_alloc(64, cols * sizeof(float)); + memcpy(weight, w, cols * sizeof(float)); } // input and output are in shape of (rows, normSize) diff --git a/tests/ut/CMakeLists.txt b/tests/ut/CMakeLists.txt index 0e6e4b6d..c3e28ea9 100644 --- a/tests/ut/CMakeLists.txt +++ b/tests/ut/CMakeLists.txt @@ -84,6 +84,7 @@ foreach(src ${sources}) target_link_libraries(${executable} PUBLIC stdc++) target_link_libraries(${executable} PUBLIC mpi) target_link_libraries(${executable} PUBLIC numa) + target_link_libraries(${executable} PUBLIC xfastertransformer) # List of executable names and their corresponding libraries set(executables_need_gemm kv_reorder_test beam_search_test small_gemm_test) diff --git a/tests/ut/layers_norm_test.cpp b/tests/ut/layers_norm_test.cpp new file mode 100644 index 00000000..8ecf7f5c --- /dev/null +++ b/tests/ut/layers_norm_test.cpp @@ -0,0 +1,117 @@ +// Copyright (c) 2023 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// ============================================================================ +#include +#include + +#include "bfloat16.h" +#include "float16.h" +#include "layers_norm.h" +#include "gtest/gtest.h" + +template +static void layer_norm_ref(T *output, const T *input, const T *gamma, const T *beta, int rows, int cols, + int iStride = -1, int oStride = -1, const float epsilon = 1e-5) { + if (iStride == -1) iStride = cols; + if (oStride == -1) oStride = cols; + + // Iterate over rows + for (int i = 0; i < rows; ++i) { + // Compute mean + float mean = 0.0; + for (int j = 0; j < cols; ++j) { + mean += input[i * iStride + j]; + } + mean /= cols; + + // Compute variance + float variance = 0.0; + for (int j = 0; j < cols; ++j) { + T diff = input[i * iStride + j] - mean; + variance += diff * diff; + } + variance /= cols; + + // Normalize + T inv_std_dev = static_cast(1.0 / std::sqrt(variance + epsilon)); + for (int j = 0; j < cols; ++j) { + output[i * oStride + j] = gamma[j] * (input[i * iStride + j] - mean) * inv_std_dev + beta[j]; + } + } +} + +template +static void compareLayerNorm(int rows, int cols) { + + T *input = (T *)aligned_alloc(64, rows * cols * sizeof(T)); + T *gamma = (T *)aligned_alloc(64, cols * sizeof(T)); + T *beta = (T *)aligned_alloc(64, cols * sizeof(T)); + T *ourOutput = (T *)aligned_alloc(64, rows * cols * sizeof(T)); + T *refOutput = (T *)aligned_alloc(64, rows * cols * sizeof(T)); + + for (int i = 0; i < rows * cols; ++i) { + input[i] = static_cast(1.0f * rand() / RAND_MAX); + } + + for (int i = 0; i < cols; ++i) { + gamma[i] = static_cast(1.0f); + } + + for (int i = 0; i < cols; ++i) { + beta[i] = static_cast(0.0f); + } + + if constexpr (std::is_same::value) { + xft::invokeLayerNorm(xft::DataType::fp32, (void *)ourOutput, (const void *)input, (const void *)gamma, + (const void *)beta, rows, cols); + layer_norm_ref(refOutput, (const T *)input, (const T *)gamma, (const T *)beta, rows, cols); + } else if constexpr (std::is_same::value) { + xft::invokeLayerNorm(xft::DataType::fp16, (void *)ourOutput, (const void *)input, (const void *)gamma, + (const void *)beta, rows, cols); + layer_norm_ref(refOutput, (const T *)input, (const T *)gamma, (const T *)beta, rows, cols); + } else if constexpr (std::is_same::value) { + xft::invokeLayerNorm(xft::DataType::bf16, (void *)ourOutput, (const void *)input, (const void *)gamma, + (const void *)beta, rows, cols); + layer_norm_ref(refOutput, (const T *)input, (const T *)gamma, (const T *)beta, rows, cols); + } + + for (int i = 0; i < rows * cols; ++i) { + EXPECT_LT(((float)refOutput[i] - (float)ourOutput[i]), 0.01); + } + + free(input); + free(gamma); + free(beta); + free(ourOutput); + free(refOutput); +} + +TEST(LayerNorm, float) { + compareLayerNorm(128, 128); + compareLayerNorm(5120, 5120); + compareLayerNorm(5120, 5120 * 3); + compareLayerNorm(rand() % 100 + 100, rand() % 100 + 100); +} + +TEST(LayerNorm, bfloat16_t) { + compareLayerNorm(128, 128); + compareLayerNorm(5120, 5120); + compareLayerNorm(5120, 5120 * 3); + compareLayerNorm(rand() % 100 + 100, rand() % 100 + 100); +} + +int main(int argc, char **argv) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} \ No newline at end of file