From aacb77e15f0f123b5204565710d7a2ffbce7cff4 Mon Sep 17 00:00:00 2001 From: Kye Date: Wed, 21 Feb 2024 08:23:18 -0800 Subject: [PATCH] [CLEANUP] --- limoe/main.py | 52 ++++++++------------------------------------------- 1 file changed, 8 insertions(+), 44 deletions(-) diff --git a/limoe/main.py b/limoe/main.py index cdd0916..e6b83ee 100644 --- a/limoe/main.py +++ b/limoe/main.py @@ -1,9 +1,9 @@ import torch from torch import nn, Tensor from zeta.nn import ( - MixtureOfExperts, Attention, ) +from swarms_torch import SimpleMoE class DenseEncoderLayer(nn.Module): @@ -61,14 +61,13 @@ def __init__( gpu = "cuda" if torch.cuda.is_available() else "cpu" # Experts - self.experts = MixtureOfExperts( - dim=self.dim, - num_experts=self.num_experts, - # dim_head=self.dim_head, - dropout=self.dropout, - ff_mult=ff_mult, + self.experts = SimpleMoE( + dim, + dim * ff_mult, + dim, + num_experts, + ff_mult, ) - # Attention self.attn = Attention( dim, @@ -113,39 +112,4 @@ def forward(self, x: Tensor): dropout=0.1, ff_mult=4, ) -print(model(x).shape) - - -# LiMoE: Linear Mixture of Experts -# class LiMoE(nn.Module): -# def __init__( -# self, -# dim: int, -# depth: int, -# num_experts: int, -# dim_head: int, -# dropout: float, -# *args, -# **kwargs, -# ): -# super().__init__() -# self.dim = dim -# self.depth = depth -# self.num_experts = num_experts -# self.dim_head = dim_head -# self.dropout = dropout -# self.heads = self.dim // self.dim_head -# self.scale = self.dim_head**-0.5 - -# def forward(self, x: Tensor): -# # Encoder -# for _ in range(self.depth): -# x = DenseEncoderLayer( -# dim=self.dim, -# depth=self.depth, -# num_experts=self.num_experts, -# dim_head=self.dim_head, -# dropout=self.dropout, -# )(x) - -# return x +print(model(x))