Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add embeddings, lstm, gelu #68

Merged
merged 16 commits into from
Sep 30, 2024
14 changes: 14 additions & 0 deletions crates/core/src/cpu/activation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ pub struct CPUActivation {

type ActivationFn = fn(x: &f32) -> f32;

const ROOT_2_BY_PI: f32 = 0.7978845608028654;
const GELU_APPROX: f32 = 0.044715;

impl CPUActivation {
pub fn from(activation: Activation) -> Self {
let (activate, prime): (ActivationFn, ActivationFn) = match activation {
Expand All @@ -15,6 +18,7 @@ impl CPUActivation {
Activation::Linear => (linear, linear_prime),
Activation::Relu => (relu, relu_prime),
Activation::Relu6 => (relu6, relu6_prime),
Activation::Gelu => (gelu, gelu_prime),
Activation::Selu => (selu, selu_prime),
Activation::Sigmoid => (sigmoid, sigmoid_prime),
Activation::Tanh => (tanh, tanh_prime),
Expand Down Expand Up @@ -75,6 +79,16 @@ fn relu_prime(x: &f32) -> f32 {
return if *x > 0.0 { 1.0 } else { 0.0 };
}

fn gelu(x: &f32) -> f32 {
return (0.5 * x) * (1.0 + (ROOT_2_BY_PI * (x + GELU_APPROX * x.powi(3))).tanh());
}

fn gelu_prime(x: &f32) -> f32 {
let tanned = (ROOT_2_BY_PI * (x + GELU_APPROX * x.powi(3))).tanh();
return (0.5 * (1.0 + tanned))
+ (0.5 * x * (1.0 - tanned.powi(2))) * ROOT_2_BY_PI * (1.0 + 3.0 * GELU_APPROX * x.powi(2));
}

fn relu6(x: &f32) -> f32 {
return x.max(0.0).min(6.0);
}
Expand Down
28 changes: 24 additions & 4 deletions crates/core/src/cpu/backend.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ use crate::{
Pool2DCPULayer, PostProcessor, SoftmaxCPULayer, Tensor, Tensors, Timer,
};

use super::{EmbeddingCPULayer, LSTMCPULayer};

pub struct Backend {
pub silent: bool,
pub config: BackendConfig,
Expand Down Expand Up @@ -71,18 +73,28 @@ impl Backend {
size = layer.output_size().to_vec();
layers.push(CPULayer::Dense(layer));
}
Layer::Flatten(config) => {
let layer = FlattenCPULayer::new(config, IxDyn(&size));
Layer::Embedding(config) => {
let layer = EmbeddingCPULayer::new(config, IxDyn(&size));
size = layer.output_size().to_vec();
layers.push(CPULayer::Embedding(layer));
}
Layer::Flatten => {
let layer = FlattenCPULayer::new(IxDyn(&size));
size = layer.output_size().to_vec();
layers.push(CPULayer::Flatten(layer));
}
Layer::LSTM(config) => {
let layer = LSTMCPULayer::new(config, IxDyn(&size), None);
size = layer.output_size().to_vec();
layers.push(CPULayer::LSTM(layer));
}
Layer::Pool2D(config) => {
let layer = Pool2DCPULayer::new(config, IxDyn(&size));
size = layer.output_size().to_vec();
layers.push(CPULayer::Pool2D(layer));
}
Layer::Softmax => {
let layer = SoftmaxCPULayer::new(IxDyn(&size));
Layer::Softmax(config) => {
let layer = SoftmaxCPULayer::new(config, IxDyn(&size));
layers.push(CPULayer::Softmax(layer));
}
}
Expand Down Expand Up @@ -125,7 +137,10 @@ impl Backend {
}
}
None => {
// let mut i = 0;
for layer in &mut self.layers {
// i += 1;
// println!("\n\nLayer +{}: {:?}", i, &inputs);
inputs = layer.forward_propagate(inputs, training);
}
}
Expand All @@ -138,9 +153,14 @@ impl Backend {
outputs: ArrayViewD<'b, f32>,
data: ArrayViewD<'b, f32>,
) -> ArrayD<f32> {
// println!("\n\nOutput: {:?}", &outputs);
let mut d_outputs = (self.cost.prime)(outputs, data);
// println!("\n\nD Output: {:?}", &d_outputs);
// let mut i = 0;
for layer in self.layers.iter_mut().rev() {
// i += 1;
d_outputs = layer.backward_propagate(d_outputs);
// println!("\n\nLayer -{}: {:?}", i, &d_outputs);
}
d_outputs
}
Expand Down
45 changes: 45 additions & 0 deletions crates/core/src/cpu/layer_norm.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
extern crate ndarray;
use ndarray::{Array1, ArrayD, Axis};

pub struct LayerNorm {
pub gamma: Array1<f32>,
pub beta: Array1<f32>,
pub epsilon: f32,
}

impl LayerNorm {
pub fn new(hidden_size: usize, epsilon: f32) -> Self {
LayerNorm {
gamma: Array1::ones(hidden_size),
beta: Array1::zeros(hidden_size),
epsilon,
}
}

pub fn forward(&self, input: ArrayD<f32>) -> ArrayD<f32> {
let shape = input.shape();
let last_axis = shape.len() - 1;

let mean = input.mean_axis(Axis(last_axis)).unwrap();
let variance = input.var_axis(Axis(last_axis), 0.0);

let mut normalized_input = input.clone();
normalized_input
.axis_iter_mut(Axis(last_axis))
.enumerate()
.for_each(|(i, mut row)| {
let mean_i = mean[i];
let var_i = variance[i].sqrt() + self.epsilon;
row -= mean_i;
row /= var_i;
});

normalized_input
.axis_iter_mut(Axis(last_axis))
.for_each(|mut item| {
let new = &item * &self.gamma + &self.beta;
item.assign(&new);
});
normalized_input
}
}
15 changes: 9 additions & 6 deletions crates/core/src/cpu/layers/activation.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use ndarray::{s, ArrayD, Dimension, IxDyn};
use std::ops::{Div, Mul, Sub};
use std::{f32::EPSILON, ops::{Div, Mul, Sub}};

use crate::{ActivationLayer, CPUActivation};
use crate::{ActivationLayer, CPUActivation, SoftmaxLayer};

pub struct ActivationCPULayer {
pub outputs: ArrayD<f32>,
Expand Down Expand Up @@ -45,11 +45,13 @@ impl ActivationCPULayer {

pub struct SoftmaxCPULayer {
pub outputs: ArrayD<f32>,
pub temperature: f32,
}

impl SoftmaxCPULayer {
pub fn new(size: IxDyn) -> Self {
pub fn new(config: SoftmaxLayer, size: IxDyn) -> Self {
Self {
temperature: config.temperature.unwrap_or(1f32),
outputs: ArrayD::zeros(size),
}
}
Expand All @@ -68,18 +70,19 @@ impl SoftmaxCPULayer {
self.outputs = inputs.clone();
let batches = self.outputs.dim()[0];
for b in 0..batches {
let exp = inputs.slice(s![b, ..]).map(|x| x.exp());
let current_input = inputs.slice(s![b, ..]).map(|x| x / self.temperature);
let max = current_input.iter().cloned().fold(f32::NEG_INFINITY, f32::max);
let exp = current_input.map(|x| (x - max).exp());
self.outputs
.slice_mut(s![b, ..])
.assign(&exp.clone().div(exp.sum()));
.assign(&exp.clone().div(exp.sum() + EPSILON));
}
self.outputs.clone().into_dyn()
}

pub fn backward_propagate(&mut self, d_outputs: ArrayD<f32>) -> ArrayD<f32> {
let batches = self.outputs.dim()[0];
let array_size = self.outputs.dim().size() / batches;

let mut d_inputs = ArrayD::zeros(self.outputs.dim());
for b in 0..batches {
for y in 0..array_size {
Expand Down
2 changes: 1 addition & 1 deletion crates/core/src/cpu/layers/dropout.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ impl Dropout1DCPULayer {

pub fn forward_propagate(&mut self, inputs: ArrayD<f32>, training: bool) -> ArrayD<f32> {
if training {
self.mask = ArrayD::random(self.mask.dim(), Uniform::new(0.0, 1.0))
self.mask = ArrayD::random(inputs.dim(), Uniform::new(0.0, 1.0))
.map(|x| (if x > &self.probability { 1.0 } else { 0.0 }));
inputs.mul(&self.mask).mul(1.0 / 1.0 - self.probability)
} else {
Expand Down
68 changes: 68 additions & 0 deletions crates/core/src/cpu/layers/embedding.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
use std::ops::AddAssign;
use ndarray::{Array2, ArrayD, Axis, Ix2, IxDyn};

use crate::{CPUInit, CPURegularizer, EmbeddingLayer, Init};

pub struct EmbeddingCPULayer {
pub input_size: IxDyn,
pub input_indices: Vec<usize>,
pub output_size: Vec<usize>,
pub vocab_size: usize,
pub embedding_size: usize,
pub embeddings: Array2<f32>,
pub d_embeddings: Array2<f32>,
// regularization
pub l_embeddings: Array2<f32>,

pub regularizer: CPURegularizer,
}

impl EmbeddingCPULayer {
pub fn new(config: EmbeddingLayer, size: IxDyn) -> Self {
let init = CPUInit::from(Init::Uniform);
let output_size = vec![size[0], size[1], config.embedding_size];
let embeddings = init.init(IxDyn(&[config.vocab_size, config.embedding_size]), 0, 0).into_dimensionality::<Ix2>().unwrap();
let d_embeddings = Array2::zeros((config.vocab_size, config.embedding_size));
Self {
input_size: size,
input_indices: vec![],
output_size,
vocab_size: config.vocab_size,
embedding_size: config.embedding_size,
embeddings,
d_embeddings,
l_embeddings: Array2::zeros((config.vocab_size, config.embedding_size)),
regularizer: CPURegularizer::from(config.c.unwrap_or(0.0), config.l1_ratio.unwrap_or(1.0))
}
}

pub fn output_size(&self) -> Vec<usize> {
self.output_size.clone()
}

pub fn reset(&mut self, batches: usize) {
self.output_size[0] = batches
}

pub fn forward_propagate(&mut self, inputs: ArrayD<f32>) -> ArrayD<f32> {
let input_indices: Vec<usize> = inputs.iter().map(|&x| x as usize).collect();
self.input_indices = input_indices.clone();
let embeddings = self.embeddings.select(Axis(0), input_indices.as_slice());
// let output_size = IxDyn(&self.output_size);
embeddings.into_shape_with_order(IxDyn(&[inputs.shape()[0], inputs.shape()[1], self.embedding_size])).unwrap()
}

pub fn backward_propagate(&mut self, d_outputs: ArrayD<f32>) -> ArrayD<f32> {
let indices = Array2::from_shape_vec(Ix2(d_outputs.shape()[0], self.input_size[1]), self.input_indices.clone());
self.d_embeddings = Array2::zeros((self.d_embeddings.shape()[0], self.d_embeddings.shape()[1]));
d_outputs.axis_iter(Axis(0)).zip(indices).for_each(|(rec, i)| {
rec.axis_iter(Axis(0)).zip(i).for_each(|(grad, idx)| {
self.d_embeddings.index_axis_mut(Axis(0), idx).add_assign(&grad);
});
});
self.l_embeddings = self.regularizer.coeff(&self.embeddings.clone().into_dyn()).into_dimensionality::<Ix2>().unwrap();
let mut input_size = self.input_size.clone();
input_size[0] = d_outputs.shape()[0];
ArrayD::from_shape_vec(input_size, self.input_indices.iter().map(|x| *x as f32).collect()).unwrap()
}
}
24 changes: 8 additions & 16 deletions crates/core/src/cpu/layers/flatten.rs
Original file line number Diff line number Diff line change
@@ -1,26 +1,15 @@
use ndarray::{ArrayD, Dimension, IxDyn};

use crate::FlattenLayer;

pub struct FlattenCPULayer {
pub input_size: IxDyn,
pub output_size: Vec<usize>,
}

impl FlattenCPULayer {
pub fn new(config: FlattenLayer, size: IxDyn) -> Self {
let mut new_size = config.size.clone();
new_size.insert(0, size[0]);
let output_size = IxDyn(&new_size);
if output_size.size() != size.size() {
panic!(
"Shape {:#?} is incompatible with shape {:#?}",
output_size, size
)
}
pub fn new(size: IxDyn) -> Self {
Self {
input_size: size,
output_size: new_size,
input_size: size.clone(),
output_size: vec![size[0], size.size() / size[0]],
}
}

Expand All @@ -33,11 +22,14 @@ impl FlattenCPULayer {
}

pub fn forward_propagate(&mut self, inputs: ArrayD<f32>) -> ArrayD<f32> {
let output_size = IxDyn(&self.output_size);
let output_size = IxDyn(&[inputs.shape()[0], self.output_size[1]]);
println!("O {:?} {:?}", inputs.shape(), self.output_size);
inputs.into_shape_with_order(output_size).unwrap()
}

pub fn backward_propagate(&mut self, d_outputs: ArrayD<f32>) -> ArrayD<f32> {
d_outputs.into_shape_with_order(self.input_size.clone()).unwrap()
let mut current_size = self.input_size.clone();
current_size[0] = d_outputs.shape()[0];
d_outputs.to_shape(current_size).unwrap().to_owned()
}
}
Loading
Loading