Skip to content

Commit

Permalink
[UT] Add layer_norm unit test. (#61)
Browse files Browse the repository at this point in the history
  • Loading branch information
changqi1 authored Nov 20, 2023
1 parent de72f01 commit f520081
Show file tree
Hide file tree
Showing 8 changed files with 150 additions and 23 deletions.
1 change: 1 addition & 0 deletions include/dtype.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

namespace xft {
enum class DataType {
fp32,
bf16,
fp16,
int8,
Expand Down
8 changes: 4 additions & 4 deletions include/layers_norm.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@

namespace xft {

void invokeLayerNorm(DataType dt, void *output, const void *input, const void *gamma, const void *beta, const int rows,
const int size, int iStride = -1, int oStride = -1, const float epsilon = 1e-5);
void invokeLayerNorm(DataType dt, void *output, const void *input, const void *gamma, const void *beta, int rows,
int cols, int iStride = -1, int oStride = -1, float epsilon = 1e-5);

void invokeRmsNorm(DataType dt, void *output, const void *input, const void *weight, int rows, int cols,
int iStride = -1, int oStride = -1, float epsilon = 1e-6);
Expand All @@ -30,7 +30,7 @@ class LayerNorm {
LayerNorm();
~LayerNorm();

void setWeight(const float *gamma, const float *beta, int size);
void setWeight(const float *gamma, const float *beta, int cols);

// input and output are in shape of (rows, normSize)
// TODO: column-wise parallel
Expand All @@ -49,7 +49,7 @@ class RmsNorm {
RmsNorm();
~RmsNorm();

void setWeight(const float *w, const float *, int size);
void setWeight(const float *w, const float *, int cols);

// input and output are in shape of (rows, normSize)
void forward(const float *input, float *output, int rows, int iStride = -1, int oStride = -1, float epsilon = 1e-6);
Expand Down
19 changes: 12 additions & 7 deletions src/kernels/layernorm_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,10 @@

namespace xft {

void invokeLayerNorm(float *output, const float *input, const float *gamma, const float *beta, const int rows,
const int size, int iStride, int oStride, const float epsilon) {
void invokeLayerNorm(float *output, const float *input, const float *gamma, const float *beta, int rows, int cols,
int iStride, int oStride, float epsilon) {

int size = cols;
if (iStride == -1) iStride = size;
if (oStride == -1) oStride = size;

Expand Down Expand Up @@ -78,8 +79,9 @@ void invokeLayerNorm(float *output, const float *input, const float *gamma, cons
}

void invokeLayerNorm(bfloat16_t *output, const bfloat16_t *input, const bfloat16_t *gamma, const bfloat16_t *beta,
const int rows, const int size, int iStride, int oStride, const float epsilon) {
int rows, int cols, int iStride, int oStride, float epsilon) {

int size = cols;
if (iStride == -1) iStride = size;
if (oStride == -1) oStride = size;

Expand Down Expand Up @@ -131,11 +133,14 @@ void invokeLayerNorm(bfloat16_t *output, const bfloat16_t *input, const bfloat16
}
}

void invokeLayerNorm(DataType dt, void *output, const void *input, const void *gamma, const void *beta, const int rows,
const int size, int iStride, int oStride, const float epsilon) {
if (dt == DataType::bf16) {
void invokeLayerNorm(DataType dt, void *output, const void *input, const void *gamma, const void *beta, int rows,
int cols, int iStride, int oStride, float epsilon) {
if (dt == DataType::fp32) {
invokeLayerNorm((float *)output, (const float *)input, (const float *)gamma,
(const float *)beta, rows, cols, iStride, oStride, epsilon);
} else if (dt == DataType::bf16) {
invokeLayerNorm((bfloat16_t *)output, (const bfloat16_t *)input, (const bfloat16_t *)gamma,
(const bfloat16_t *)beta, rows, size, iStride, oStride, epsilon);
(const bfloat16_t *)beta, rows, cols, iStride, oStride, epsilon);
}
}

Expand Down
9 changes: 6 additions & 3 deletions src/kernels/layernorm_kernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,13 @@ struct LayerNormWeight {
const T *beta = nullptr;
};

void invokeLayerNorm(float *output, const float *input, const float *gamma, const float *beta, const int rows,
const int size, int iStride = -1, int oStride = -1, const float epsilon = 1e-5);
void invokeLayerNorm(float *output, const float *input, const float *gamma, const float *beta, int rows,
int cols, int iStride = -1, int oStride = -1, float epsilon = 1e-5);

void invokeLayerNorm(float16_t *output, const float16_t *input, const float16_t *gamma, const float16_t *beta,
int rows, int cols, int iStride = -1, int oStride = -1, float epsilon = 1e-5);

void invokeLayerNorm(bfloat16_t *output, const bfloat16_t *input, const bfloat16_t *gamma, const bfloat16_t *beta,
const int rows, const int size, int iStride = -1, int oStride = -1, const float epsilon = 1e-5);
int rows, int cols, int iStride = -1, int oStride = -1, float epsilon = 1e-5);

} // namespace xft
10 changes: 5 additions & 5 deletions src/layers/layer_norm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,11 @@ LayerNorm::~LayerNorm() {
if (weights) { free(weights); }
}

void LayerNorm::setWeight(const float *gamma, const float *beta, int size) {
this->normSize = size;
this->weights = (float *)aligned_alloc(64, 2 * size * sizeof(float));
memcpy(weights, gamma, size * sizeof(float));
memcpy(weights + size, beta, size * sizeof(float));
void LayerNorm::setWeight(const float *gamma, const float *beta, int cols) {
this->normSize = cols;
this->weights = (float *)aligned_alloc(64, 2 * cols * sizeof(float));
memcpy(weights, gamma, cols * sizeof(float));
memcpy(weights + cols, beta, cols * sizeof(float));
}

// input and output are in shape of (rows, normSize)
Expand Down
8 changes: 4 additions & 4 deletions src/layers/rms_norm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,10 @@ RmsNorm::~RmsNorm() {
if (weight) { free(weight); }
}

void RmsNorm::setWeight(const float *w, const float *, int size) {
this->normSize = size;
this->weight = (float *)aligned_alloc(64, size * sizeof(float));
memcpy(weight, w, size * sizeof(float));
void RmsNorm::setWeight(const float *w, const float *, int cols) {
this->normSize = cols;
this->weight = (float *)aligned_alloc(64, cols * sizeof(float));
memcpy(weight, w, cols * sizeof(float));
}

// input and output are in shape of (rows, normSize)
Expand Down
1 change: 1 addition & 0 deletions tests/ut/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ foreach(src ${sources})
target_link_libraries(${executable} PUBLIC stdc++)
target_link_libraries(${executable} PUBLIC mpi)
target_link_libraries(${executable} PUBLIC numa)
target_link_libraries(${executable} PUBLIC xfastertransformer)

# List of executable names and their corresponding libraries
set(executables_need_gemm kv_reorder_test beam_search_test small_gemm_test)
Expand Down
117 changes: 117 additions & 0 deletions tests/ut/layers_norm_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
// Copyright (c) 2023 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// ============================================================================
#include <cmath>
#include <type_traits>

#include "bfloat16.h"
#include "float16.h"
#include "layers_norm.h"
#include "gtest/gtest.h"

template <typename T>
static void layer_norm_ref(T *output, const T *input, const T *gamma, const T *beta, int rows, int cols,
int iStride = -1, int oStride = -1, const float epsilon = 1e-5) {
if (iStride == -1) iStride = cols;
if (oStride == -1) oStride = cols;

// Iterate over rows
for (int i = 0; i < rows; ++i) {
// Compute mean
float mean = 0.0;
for (int j = 0; j < cols; ++j) {
mean += input[i * iStride + j];
}
mean /= cols;

// Compute variance
float variance = 0.0;
for (int j = 0; j < cols; ++j) {
T diff = input[i * iStride + j] - mean;
variance += diff * diff;
}
variance /= cols;

// Normalize
T inv_std_dev = static_cast<T>(1.0 / std::sqrt(variance + epsilon));
for (int j = 0; j < cols; ++j) {
output[i * oStride + j] = gamma[j] * (input[i * iStride + j] - mean) * inv_std_dev + beta[j];
}
}
}

template <typename T>
static void compareLayerNorm(int rows, int cols) {

T *input = (T *)aligned_alloc(64, rows * cols * sizeof(T));
T *gamma = (T *)aligned_alloc(64, cols * sizeof(T));
T *beta = (T *)aligned_alloc(64, cols * sizeof(T));
T *ourOutput = (T *)aligned_alloc(64, rows * cols * sizeof(T));
T *refOutput = (T *)aligned_alloc(64, rows * cols * sizeof(T));

for (int i = 0; i < rows * cols; ++i) {
input[i] = static_cast<T>(1.0f * rand() / RAND_MAX);
}

for (int i = 0; i < cols; ++i) {
gamma[i] = static_cast<T>(1.0f);
}

for (int i = 0; i < cols; ++i) {
beta[i] = static_cast<T>(0.0f);
}

if constexpr (std::is_same<T, float>::value) {
xft::invokeLayerNorm(xft::DataType::fp32, (void *)ourOutput, (const void *)input, (const void *)gamma,
(const void *)beta, rows, cols);
layer_norm_ref<float>(refOutput, (const T *)input, (const T *)gamma, (const T *)beta, rows, cols);
} else if constexpr (std::is_same<T, float16_t>::value) {
xft::invokeLayerNorm(xft::DataType::fp16, (void *)ourOutput, (const void *)input, (const void *)gamma,
(const void *)beta, rows, cols);
layer_norm_ref<float16_t>(refOutput, (const T *)input, (const T *)gamma, (const T *)beta, rows, cols);
} else if constexpr (std::is_same<T, bfloat16_t>::value) {
xft::invokeLayerNorm(xft::DataType::bf16, (void *)ourOutput, (const void *)input, (const void *)gamma,
(const void *)beta, rows, cols);
layer_norm_ref<bfloat16_t>(refOutput, (const T *)input, (const T *)gamma, (const T *)beta, rows, cols);
}

for (int i = 0; i < rows * cols; ++i) {
EXPECT_LT(((float)refOutput[i] - (float)ourOutput[i]), 0.01);
}

free(input);
free(gamma);
free(beta);
free(ourOutput);
free(refOutput);
}

TEST(LayerNorm, float) {
compareLayerNorm<float>(128, 128);
compareLayerNorm<float>(5120, 5120);
compareLayerNorm<float>(5120, 5120 * 3);
compareLayerNorm<float>(rand() % 100 + 100, rand() % 100 + 100);
}

TEST(LayerNorm, bfloat16_t) {
compareLayerNorm<bfloat16_t>(128, 128);
compareLayerNorm<bfloat16_t>(5120, 5120);
compareLayerNorm<bfloat16_t>(5120, 5120 * 3);
compareLayerNorm<bfloat16_t>(rand() % 100 + 100, rand() % 100 + 100);
}

int main(int argc, char **argv) {
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

0 comments on commit f520081

Please sign in to comment.