From 41175b29c9e50964b46178dfba36bb1613cfe029 Mon Sep 17 00:00:00 2001
From: Eddy Oyieko <67474838+mobley-trent@users.noreply.github.com>
Date: Sat, 22 Jul 2023 16:23:17 +0000
Subject: [PATCH 01/16] Base Layers / Classes

---
 ivy_models/regnet/__init__.py |   2 +
 ivy_models/regnet/layers.py   | 477 ++++++++++++++++++++++++++++++++++
 ivy_models/regnet/regnet.py   |   0
 3 files changed, 479 insertions(+)
 create mode 100644 ivy_models/regnet/__init__.py
 create mode 100644 ivy_models/regnet/layers.py
 create mode 100644 ivy_models/regnet/regnet.py

diff --git a/ivy_models/regnet/__init__.py b/ivy_models/regnet/__init__.py
new file mode 100644
index 00000000..8b3b17a1
--- /dev/null
+++ b/ivy_models/regnet/__init__.py
@@ -0,0 +1,2 @@
+from . import regnet
+from .regnet import *
diff --git a/ivy_models/regnet/layers.py b/ivy_models/regnet/layers.py
new file mode 100644
index 00000000..c8ddccc2
--- /dev/null
+++ b/ivy_models/regnet/layers.py
@@ -0,0 +1,477 @@
+from typing import Callable, List, Optional, Sequence, Tuple, Union, Any
+
+import ivy
+
+import math
+import collections
+from collections import OrderedDict
+from itertools import repeat
+import warnings
+
+
+def _make_ntuple(x: Any, n: int) -> Tuple[Any, ...]:
+    """
+    Make n-tuple from input x. If x is an iterable, then we just convert it to tuple.
+    Otherwise, we will make a tuple of length n, all with value of x.
+    reference: https://github.com/pytorch/pytorch/blob/master/torch/nn/modules/utils.py#L8 # noqa: E501
+
+    Args:
+    ----
+        x (Any): input value
+        n (int): length of the resulting tuple
+    """
+    if isinstance(x, collections.abc.Iterable):
+        return tuple(x)
+    return tuple(repeat(x, n))
+
+
+def _make_divisible(v: float, divisor: int, min_value: Optional[int] = None) -> int:
+    """
+    A function taken from the original tf repo.
+    It ensures that all layers have a channel number that is divisible by 8
+    It can be seen here:
+    https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
+    """
+    if min_value is None:
+        min_value = divisor
+    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
+    # Make sure that round down does not go down by more than 10%.
+    if new_v < 0.9 * v:
+        new_v += divisor
+    return new_v
+
+
+class ConvNormActivation(ivy.Sequential):
+    def __init__(
+        self,
+        in_channels: int,
+        out_channels: int,
+        kernel_size: Union[int, Tuple[int, ...]] = 3,
+        stride: Union[int, Tuple[int, ...]] = 1,
+        padding: Optional[Union[int, Tuple[int, ...], str]] = None,
+        groups: int = 1,
+        norm_layer: Optional[Callable[..., ivy.Module]] = ivy.BatchNorm2D,
+        activation_layer: Optional[Callable[..., ivy.Module]] = ivy.ReLU,
+        dilation: Union[int, Tuple[int, ...]] = 1,
+        inplace: Optional[bool] = True,
+        bias: Optional[bool] = True,
+        conv_layer: Callable[..., ivy.Module] = ivy.Conv2D,
+    ) -> None:
+        if padding is None:
+            if isinstance(kernel_size, int) and isinstance(dilation, int):
+                padding = (kernel_size - 1) // 2 * dilation
+            else:
+                _conv_dim = (
+                    len(kernel_size)
+                    if isinstance(kernel_size, Sequence)
+                    else len(dilation)
+                )
+                kernel_size = _make_ntuple(kernel_size, _conv_dim)
+                dilation = _make_ntuple(dilation, _conv_dim)
+                padding = tuple(
+                    (kernel_size[i] - 1) // 2 * dilation[i] for i in range(_conv_dim)
+                )
+        if bias is None:
+            bias = norm_layer is None
+
+        layers = [
+            conv_layer(
+                in_channels,
+                out_channels,
+                kernel_size,
+                stride,
+                padding,
+                dilation=dilation,
+                groups=groups,
+                bias=bias,
+            )
+        ]
+
+        if norm_layer is not None:
+            layers.append(norm_layer(out_channels))
+
+        if activation_layer is not None:
+            params = {} if inplace is None else {"inplace": inplace}
+            layers.append(activation_layer(**params))
+        super().__init__(*layers)
+        # _log_api_usage_once(self) # TODO: Does Ivy have this ?
+        self.out_channels = out_channels
+
+        if self.__class__ == ConvNormActivation:
+            warnings.warn(
+                "Don't use ConvNormActivation directly, please use Conv2dNormActivation and Conv3dNormActivation instead."  # noqa: E501
+            )
+
+
+class Conv2dNormActivation(ConvNormActivation):
+    """
+    Configurable block used for Convolution2d-Normalization-Activation blocks.
+
+    Args:
+    ----
+        in_channels (int): Number of channels in the input image
+        out_channels (int): Number of channels produced by the Convolution-Normalization-Activation block # noqa: E501
+        kernel_size: (int, optional): Size of the convolving kernel. Default: 3
+        stride (int, optional): Stride of the convolution. Default: 1
+        padding (int, tuple or str, optional): Padding added to all four sides of the input. Default: None, in which case it will be calculated as ``padding = (kernel_size - 1) // 2 * dilation`` # noqa: E501
+        groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1 # noqa: E501
+        norm_layer (Callable[..., torch.nn.Module], optional): Norm layer that will be stacked on top of the convolution layer. If ``None`` this layer won't be used. Default: ``torch.nn.BatchNorm2d`` # noqa: E501
+        activation_layer (Callable[..., torch.nn.Module], optional): Activation function which will be stacked on top of the normalization layer (if not None), otherwise on top of the conv layer. If ``None`` this layer won't be used. Default: ``torch.nn.ReLU`` # noqa: E501
+        dilation (int): Spacing between kernel elements. Default: 1
+        inplace (bool): Parameter for the activation layer, which can optionally do the operation in-place. Default ``True`` # noqa: E501
+        bias (bool, optional): Whether to use bias in the convolution layer. By default, biases are included if ``norm_layer is None``. # noqa: E501
+
+    """
+
+    def __init__(
+        self,
+        in_channels: int,
+        out_channels: int,
+        kernel_size: Union[int, Tuple[int, int]] = 3,
+        stride: Union[int, Tuple[int, int]] = 1,
+        padding: Optional[Union[int, Tuple[int, int], str]] = None,
+        groups: int = 1,
+        norm_layer: Optional[Callable[..., ivy.Module]] = ivy.BatchNorm2D,
+        activation_layer: Optional[Callable[..., ivy.Module]] = ivy.ReLU,
+        dilation: Union[int, Tuple[int, int]] = 1,
+        inplace: Optional[bool] = True,
+        bias: Optional[bool] = None,
+    ) -> None:
+        super().__init__(
+            in_channels,
+            out_channels,
+            kernel_size,
+            stride,
+            padding,
+            groups,
+            norm_layer,
+            activation_layer,
+            dilation,
+            inplace,
+            bias,
+            ivy.stateful.Conv2D,
+        )
+
+
+class SqueezeExcitation(ivy.Module):
+    """
+    The Squeeze-and-Excitation block from https://arxiv.org/abs/1709.01507 (see Fig. 1). # noqa: E501
+    Parameters ``activation``, and ``scale_activation`` correspond to ``delta`` and ``sigma`` in eq. 3. # noqa: E501
+
+    Args:
+    ----
+        input_channels (int): Number of channels in the input image
+        squeeze_channels (int): Number of squeeze channels
+        activation (Callable[..., torch.nn.Module], optional): ``delta`` activation. Default: ``torch.nn.ReLU`` # noqa: E501
+        scale_activation (Callable[..., torch.nn.Module]): ``sigma`` activation. Default: ``torch.nn.Sigmoid`` # noqa: E501
+    """
+
+    def __init__(
+        self,
+        input_channels: int,
+        squeeze_channels: int,
+        activation: Callable[..., ivy.Module] = ivy.ReLU,
+        scale_activation: Callable[..., ivy.Module] = ivy.sigmoid,
+    ) -> None:
+        super().__init__()
+        # _log_api_usage_once(self)
+        self.avgpool = ivy.AdaptiveAvgPool2d(1)
+        self.fc1 = ivy.Conv2D(input_channels, squeeze_channels, 1, strides=1, padding=0)
+        self.fc2 = ivy.Conv2D(squeeze_channels, input_channels, 1, strides=1, padding=0)
+        self.activation = activation()
+        self.scale_activation = scale_activation()
+
+    def _scale(self, input: ivy.Array) -> ivy.Array:
+        scale = self.avgpool(input)
+        scale = self.fc1(scale)
+        scale = self.activation(scale)
+        scale = self.fc2(scale)
+        return self.scale_activation(scale)
+
+    def forward(self, input: ivy.Array) -> ivy.Array:
+        scale = self._scale(input)
+        return scale * input
+
+
+class SimpleStemIN(Conv2dNormActivation):
+    """Simple stem for ImageNet: 3x3, BN, ReLU."""
+
+    def __init__(
+        self,
+        width_in: int,
+        width_out: int,
+        norm_layer: Callable[..., ivy.Module],
+        activation_layer: Callable[..., ivy.Module],
+    ) -> None:
+        super().__init__(
+            width_in,
+            width_out,
+            kernel_size=3,
+            stride=2,
+            norm_layer=norm_layer,
+            activation_layer=activation_layer,
+        )
+
+
+class BottleneckTransform(ivy.Sequential):
+    """Bottleneck transformation: 1x1, 3x3 [+SE], 1x1."""
+
+    def __init__(
+        self,
+        width_in: int,
+        width_out: int,
+        stride: int,
+        norm_layer: Callable[..., ivy.Module],
+        activation_layer: Callable[..., ivy.Module],
+        group_width: int,
+        bottleneck_multiplier: float,
+        se_ratio: Optional[float],
+    ) -> None:
+        layers = OrderedDict()
+        w_b = int(round(width_out * bottleneck_multiplier))
+        g = w_b // group_width
+
+        layers["a"] = Conv2dNormActivation(
+            width_in,
+            w_b,
+            kernel_size=1,
+            stride=1,
+            norm_layer=norm_layer,
+            activation_layer=activation_layer,
+        )
+
+        layers["b"] = Conv2dNormActivation(
+            w_b,
+            w_b,
+            kernel_size=3,
+            stride=stride,
+            groups=g,
+            norm_layer=norm_layer,
+            activation_layer=activation_layer,
+        )
+
+        if se_ratio:
+            # The SE reduction ratio is defined with respect to the
+            # beginning of the block
+            width_se_out = int(round(se_ratio * width_in))
+            layers["se"] = SqueezeExcitation(
+                input_channels=w_b,
+                squeeze_channels=width_se_out,
+                activation=activation_layer,
+            )
+
+        layers["c"] = Conv2dNormActivation(
+            w_b,
+            width_out,
+            kernel_size=1,
+            stride=1,
+            norm_layer=norm_layer,
+            activation_layer=None,
+        )
+        super().__init__(layers)
+
+
+class ResBottleneckBlock(ivy.Module):
+    """Residual bottleneck block: x + F(x), F = bottleneck transform."""
+
+    def __init__(
+        self,
+        width_in: int,
+        width_out: int,
+        stride: int,
+        norm_layer: Callable[..., ivy.Module],
+        activation_layer: Callable[..., ivy.Module],
+        group_width: int = 1,
+        bottleneck_multiplier: float = 1.0,
+        se_ratio: Optional[float] = None,
+    ) -> None:
+        super().__init__()
+
+        # Use skip connection with projection if shape changes
+        self.proj = None
+        should_proj = (width_in != width_out) or (stride != 1)
+        if should_proj:
+            self.proj = Conv2dNormActivation(
+                width_in,
+                width_out,
+                kernel_size=1,
+                stride=stride,
+                norm_layer=norm_layer,
+                activation_layer=None,
+            )
+        self.f = BottleneckTransform(
+            width_in,
+            width_out,
+            stride,
+            norm_layer,
+            activation_layer,
+            group_width,
+            bottleneck_multiplier,
+            se_ratio,
+        )
+        self.activation = activation_layer(inplace=True)
+
+    def forward(self, x: ivy.Array) -> ivy.Array:
+        if self.proj is not None:
+            x = self.proj(x) + self.f(x)
+        else:
+            x = x + self.f(x)
+        return self.activation(x)
+
+
+class AnyStage(ivy.Sequential):
+    """AnyNet stage (sequence of blocks w/ the same output shape)."""
+
+    def __init__(
+        self,
+        width_in: int,
+        width_out: int,
+        stride: int,
+        depth: int,
+        block_constructor: Callable[..., ivy.Module],
+        norm_layer: Callable[..., ivy.Module],
+        activation_layer: Callable[..., ivy.Module],
+        group_width: int,
+        bottleneck_multiplier: float,
+        se_ratio: Optional[float] = None,
+        stage_index: int = 0,
+    ) -> None:
+        super().__init__()
+
+        for i in range(depth):
+            block = block_constructor(  # noqa: F841
+                width_in if i == 0 else width_out,
+                width_out,
+                stride if i == 0 else 1,
+                norm_layer,
+                activation_layer,
+                group_width,
+                bottleneck_multiplier,
+                se_ratio,
+            )
+
+            # self.add_module(f"block{stage_index}-{i}", block) # TODO: Implement add_module method in ivy.Module # noqa: E501
+
+
+class BlockParams:
+    def __init__(
+        self,
+        depths: List[int],
+        widths: List[int],
+        group_widths: List[int],
+        bottleneck_multipliers: List[float],
+        strides: List[int],
+        se_ratio: Optional[float] = None,
+    ) -> None:
+        self.depths = depths
+        self.widths = widths
+        self.group_widths = group_widths
+        self.bottleneck_multipliers = bottleneck_multipliers
+        self.strides = strides
+        self.se_ratio = se_ratio
+
+    @classmethod
+    def from_init_params(
+        cls,
+        depth: int,
+        w_0: int,
+        w_a: float,
+        w_m: float,
+        group_width: int,
+        bottleneck_multiplier: float = 1.0,
+        se_ratio: Optional[float] = None,
+        **kwargs: Any,
+    ) -> "BlockParams":
+        """
+        Programmatically compute all the per-block settings,
+        given the RegNet parameters.
+
+        The first step is to compute the quantized linear block parameters,
+        in log space. Key parameters are:
+        - `w_a` is the width progression slope
+        - `w_0` is the initial width
+        - `w_m` is the width stepping in the log space
+
+        In other terms
+        `log(block_width) = log(w_0) + w_m * block_capacity`,
+        with `bock_capacity` ramping up following the w_0 and w_a params.
+        This block width is finally quantized to multiples of 8.
+
+        The second step is to compute the parameters per stage,
+        taking into account the skip connection and the final 1x1 convolutions.
+        We use the fact that the output width is constant within a stage.
+        """
+        QUANT = 8
+        STRIDE = 2
+
+        if w_a < 0 or w_0 <= 0 or w_m <= 1 or w_0 % 8 != 0:
+            raise ValueError("Invalid RegNet settings")
+        # Compute the block widths. Each stage has one unique block width
+        widths_cont = ivy.arange(depth) * w_a + w_0
+        block_capacity = ivy.round(ivy.log(widths_cont / w_0) / math.log(w_m))
+        block_widths = (
+            (ivy.round(ivy.divide(w_0 * ivy.pow(w_m, block_capacity), QUANT)) * QUANT)
+            .int()
+            .tolist()
+        )
+        num_stages = len(set(block_widths))
+
+        # Convert to per stage parameters
+        split_helper = zip(
+            block_widths + [0],
+            [0] + block_widths,
+            block_widths + [0],
+            [0] + block_widths,
+        )
+        splits = [w != wp or r != rp for w, wp, r, rp in split_helper]
+
+        stage_widths = [w for w, t in zip(block_widths, splits[:-1]) if t]
+        stage_depths = (
+            ivy.diff(ivy.array([d for d, t in enumerate(splits) if t])).int().tolist()
+        )
+
+        strides = [STRIDE] * num_stages
+        bottleneck_multipliers = [bottleneck_multiplier] * num_stages
+        group_widths = [group_width] * num_stages
+
+        # Adjust the compatibility of stage widths and group widths
+        stage_widths, group_widths = cls._adjust_widths_groups_compatibilty(
+            stage_widths, bottleneck_multipliers, group_widths
+        )
+
+        return cls(
+            depths=stage_depths,
+            widths=stage_widths,
+            group_widths=group_widths,
+            bottleneck_multipliers=bottleneck_multipliers,
+            strides=strides,
+            se_ratio=se_ratio,
+        )
+
+    def _get_expanded_params(self):
+        return zip(
+            self.widths,
+            self.strides,
+            self.depths,
+            self.group_widths,
+            self.bottleneck_multipliers,
+        )
+
+    @staticmethod
+    def _adjust_widths_groups_compatibilty(
+        stage_widths: List[int], bottleneck_ratios: List[float], group_widths: List[int]
+    ) -> Tuple[List[int], List[int]]:
+        """
+        Adjusts the compatibility of widths and groups,
+        depending on the bottleneck ratio.
+        """
+        # Compute all widths for the current settings
+        widths = [int(w * b) for w, b in zip(stage_widths, bottleneck_ratios)]
+        group_widths_min = [min(g, w_bot) for g, w_bot in zip(group_widths, widths)]
+
+        # Compute the adjusted widths so that stage and group widths fit
+        ws_bot = [
+            _make_divisible(w_bot, g) for w_bot, g in zip(widths, group_widths_min)
+        ]
+        stage_widths = [int(w_bot / b) for w_bot, b in zip(ws_bot, bottleneck_ratios)]
+        return stage_widths, group_widths_min
diff --git a/ivy_models/regnet/regnet.py b/ivy_models/regnet/regnet.py
new file mode 100644
index 00000000..e69de29b

From fbe7c2bf436a24d44b1ac56f3930dd59b89100ee Mon Sep 17 00:00:00 2001
From: Eddy Oyieko <67474838+mobley-trent@users.noreply.github.com>
Date: Wed, 26 Jul 2023 10:05:53 +0000
Subject: [PATCH 02/16] Updated regnet.py

---
 ivy_models/regnet/regnet.py | 81 +++++++++++++++++++++++++++++++++++++
 1 file changed, 81 insertions(+)

diff --git a/ivy_models/regnet/regnet.py b/ivy_models/regnet/regnet.py
index e69de29b..211e2aec 100644
--- a/ivy_models/regnet/regnet.py
+++ b/ivy_models/regnet/regnet.py
@@ -0,0 +1,81 @@
+import ivy
+from .layers import BlockParams, SimpleStemIN, ResBottleneckBlock, AnyStage
+from typing import Optional, Callable
+
+from collections import OrderedDict
+
+
+class RegNet(ivy.Module):
+    def __init__(
+        self,
+        block_params: BlockParams,
+        num_classes: int = 1000,
+        stem_width: int = 32,
+        stem_type: Optional[Callable[..., ivy.Module]] = None,
+        block_type: Optional[Callable[..., ivy.Module]] = None,
+        norm_layer: Optional[Callable[..., ivy.Module]] = None,
+        activation: Optional[Callable[..., ivy.Module]] = None,
+    ) -> None:
+        super().__init__()
+        # _log_api_usage_once(self) # TODO: API Logging
+
+        if stem_type is None:
+            stem_type = SimpleStemIN
+        if norm_layer is None:
+            norm_layer = ivy.BatchNorm2D
+        if block_type is None:
+            block_type = ResBottleneckBlock
+        if activation is None:
+            activation = ivy.ReLU
+
+        self.stem = stem_type(
+            3,  # width_in
+            stem_width,
+            norm_layer,
+            activation,
+        )
+
+        current_width = stem_width
+
+        blocks = []
+        for i, (
+            width_out,
+            stride,
+            depth,
+            group_width,
+            bottleneck_multiplier,
+        ) in enumerate(block_params._get_expanded_params()):
+            blocks.append(
+                (
+                    f"block{i+1}",
+                    AnyStage(
+                        current_width,
+                        width_out,
+                        stride,
+                        depth,
+                        block_type,
+                        norm_layer,
+                        activation,
+                        group_width,
+                        bottleneck_multiplier,
+                        block_params.se_ratio,
+                        stage_index=i + 1,
+                    ),
+                )
+            )
+
+            current_width = width_out
+
+        self.trunk_output = ivy.Sequential(OrderedDict(blocks))
+
+        self.avgpool = ivy.AdaptiveAvgPool2d((1, 1))
+        self.fc = ivy.Linear(current_width, num_classes)
+
+    def _forward(self, x: ivy.Array) -> ivy.Array:
+        x = self.stem(x)
+        x = self.trunk_output(x)
+        x = self.avgpool(x)
+        # x = ivy.reshape(x, (x.shape[0], -1))
+        x = x.flatten(start_dim=1)
+        x = self.fc(x)
+        return x

From 2aea6f9e9b0f64c8125242f3eb7af535bc47eb8e Mon Sep 17 00:00:00 2001
From: Eddy Oyieko <67474838+mobley-trent@users.noreply.github.com>
Date: Mon, 31 Jul 2023 09:25:11 +0000
Subject: [PATCH 03/16] Updated layers.py, regnet.py

---
 ivy_models/regnet/layers.py | 6 +++---
 ivy_models/regnet/regnet.py | 2 --
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/ivy_models/regnet/layers.py b/ivy_models/regnet/layers.py
index c8ddccc2..4b12aad0 100644
--- a/ivy_models/regnet/layers.py
+++ b/ivy_models/regnet/layers.py
@@ -94,7 +94,7 @@ def __init__(
             params = {} if inplace is None else {"inplace": inplace}
             layers.append(activation_layer(**params))
         super().__init__(*layers)
-        # _log_api_usage_once(self) # TODO: Does Ivy have this ?
+
         self.out_channels = out_channels
 
         if self.__class__ == ConvNormActivation:
@@ -338,6 +338,7 @@ def __init__(
     ) -> None:
         super().__init__()
 
+        self.blocks = OrderedDict()
         for i in range(depth):
             block = block_constructor(  # noqa: F841
                 width_in if i == 0 else width_out,
@@ -349,8 +350,7 @@ def __init__(
                 bottleneck_multiplier,
                 se_ratio,
             )
-
-            # self.add_module(f"block{stage_index}-{i}", block) # TODO: Implement add_module method in ivy.Module # noqa: E501
+            self.blocks[f"block{stage_index}-{i}"] = block
 
 
 class BlockParams:
diff --git a/ivy_models/regnet/regnet.py b/ivy_models/regnet/regnet.py
index 211e2aec..c19ad17d 100644
--- a/ivy_models/regnet/regnet.py
+++ b/ivy_models/regnet/regnet.py
@@ -17,7 +17,6 @@ def __init__(
         activation: Optional[Callable[..., ivy.Module]] = None,
     ) -> None:
         super().__init__()
-        # _log_api_usage_once(self) # TODO: API Logging
 
         if stem_type is None:
             stem_type = SimpleStemIN
@@ -75,7 +74,6 @@ def _forward(self, x: ivy.Array) -> ivy.Array:
         x = self.stem(x)
         x = self.trunk_output(x)
         x = self.avgpool(x)
-        # x = ivy.reshape(x, (x.shape[0], -1))
         x = x.flatten(start_dim=1)
         x = self.fc(x)
         return x

From 9423da93caaa9f89f5ebd2eba67bad260d136cb1 Mon Sep 17 00:00:00 2001
From: Eddy Oyieko <67474838+mobley-trent@users.noreply.github.com>
Date: Tue, 8 Aug 2023 06:29:49 +0000
Subject: [PATCH 04/16] Added test_regnet.py

---
 ivy_models_tests/regnet/test_regnet.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 ivy_models_tests/regnet/test_regnet.py

diff --git a/ivy_models_tests/regnet/test_regnet.py b/ivy_models_tests/regnet/test_regnet.py
new file mode 100644
index 00000000..e69de29b

From 9c173a6373ce69f68e6cbc37231b4952d03653e5 Mon Sep 17 00:00:00 2001
From: Eddy Oyieko <67474838+mobley-trent@users.noreply.github.com>
Date: Tue, 8 Aug 2023 08:24:29 +0000
Subject: [PATCH 05/16] Added testing pipeline

---
 ivy_models/regnet/regnet.py            | 41 ++++++++++++++++++++++
 ivy_models_tests/regnet/test_regnet.py | 47 ++++++++++++++++++++++++++
 2 files changed, 88 insertions(+)

diff --git a/ivy_models/regnet/regnet.py b/ivy_models/regnet/regnet.py
index c19ad17d..101f8f4d 100644
--- a/ivy_models/regnet/regnet.py
+++ b/ivy_models/regnet/regnet.py
@@ -1,4 +1,5 @@
 import ivy
+import ivy_models
 from .layers import BlockParams, SimpleStemIN, ResBottleneckBlock, AnyStage
 from typing import Optional, Callable
 
@@ -77,3 +78,43 @@ def _forward(self, x: ivy.Array) -> ivy.Array:
         x = x.flatten(start_dim=1)
         x = self.fc(x)
         return x
+
+
+def _regnet_torch_weights_mapping(old_key, new_key):
+    new_mapping = new_key
+    if "weight" in old_key:
+        new_mapping = {"key_chain": new_key, "pattern": "b c h w -> h w c b"}
+    elif "bias" in old_key:
+        new_mapping = {"key_chain": new_key, "pattern": "h -> 1 h 1 1"}
+
+    return new_mapping
+
+
+def regnet_y_400mf(pretrained=True):
+    """RegNet-Y-400MF model"""
+    model = RegNet
+    if pretrained:
+        url = "https://download.pytorch.org/models/regnet_y_400mf-c65dace8.pth"
+        w_clean = ivy_models.helpers.load_torch_weights(
+            url,
+            model,
+            raw_keys_to_prune=["num_batches_tracked"],
+            custom_mapping=_regnet_torch_weights_mapping,
+        )
+        model.v = w_clean
+    return model
+
+
+def regnet_y_800mf(pretrained=True):
+    """RegNet-Y-800MF model"""
+    model = RegNet
+    if pretrained:
+        url = "https://download.pytorch.org/models/regnet_y_400mf-e6988f5f.pth"
+        w_clean = ivy_models.helpers.load_torch_weights(
+            url,
+            model,
+            raw_keys_to_prune=["num_batches_tracked"],
+            custom_mapping=_regnet_torch_weights_mapping,
+        )
+        model.v = w_clean
+    return model
diff --git a/ivy_models_tests/regnet/test_regnet.py b/ivy_models_tests/regnet/test_regnet.py
index e69de29b..0e0c0f3a 100644
--- a/ivy_models_tests/regnet/test_regnet.py
+++ b/ivy_models_tests/regnet/test_regnet.py
@@ -0,0 +1,47 @@
+from ivy_models.regnet import regnet_y_400mf, regnet_y_800mf
+from ivy_models_tests import helpers
+import ivy
+import random
+import os
+
+
+VARIANTS = {
+    "regnet_y_400mf": regnet_y_400mf,
+    "regnet_y_800mf": regnet_y_800mf,
+}
+
+load_weights = random.choice([False, True])
+model_var = random.choice(list(VARIANTS.keys()))
+model = VARIANTS[model_var](pretrained=load_weights)
+v = ivy.to_numpy(model.v)
+
+
+def test_regnet(device, fw):
+    num_classes = 1000
+    batch_shape = [1]
+    this_dir = os.path.dirname(os.path.realpath(__file__))
+
+    # Load image
+    img = ivy.asarray(
+        helpers.load_and_preprocess_img(
+            os.path.join(this_dir, "..", "..", "images", "cat.jpg"),
+            256,
+            224,
+            data_format="NHWC",
+            to_ivy=True,
+        )
+    )
+
+    # Create model
+    model.v = ivy.asarray(v)
+    logits = model(img)
+
+    # Cardinality test
+    assert logits.shape == tuple([ivy.to_scalar(batch_shape), num_classes])
+
+    # Value test
+    if load_weights:
+        np_out = ivy.to_numpy(logits[0])
+        true_indices = ivy.to_numpy(ivy.sort(ivy.array([282, 281, 285, 287])))
+        calc_indices = ivy.to_numpy(ivy.sort(ivy.argsort(np_out)[-5:][::-1]))
+        assert ivy.array_equal(true_indices, calc_indices[:4])

From aa3c4c51064b3d02fb733c20c2128a817b2f84cc Mon Sep 17 00:00:00 2001
From: Eddy Oyieko <67474838+mobley-trent@users.noreply.github.com>
Date: Tue, 8 Aug 2023 09:53:08 +0000
Subject: [PATCH 06/16] Updated regnet.py, layers.py

---
 ivy_models/regnet/layers.py |  1 -
 ivy_models/regnet/regnet.py | 82 +++++++++++++++++++++++++++++--------
 2 files changed, 66 insertions(+), 17 deletions(-)

diff --git a/ivy_models/regnet/layers.py b/ivy_models/regnet/layers.py
index 4b12aad0..14f4c63a 100644
--- a/ivy_models/regnet/layers.py
+++ b/ivy_models/regnet/layers.py
@@ -174,7 +174,6 @@ def __init__(
         scale_activation: Callable[..., ivy.Module] = ivy.sigmoid,
     ) -> None:
         super().__init__()
-        # _log_api_usage_once(self)
         self.avgpool = ivy.AdaptiveAvgPool2d(1)
         self.fc1 = ivy.Conv2D(input_channels, squeeze_channels, 1, strides=1, padding=0)
         self.fc2 = ivy.Conv2D(squeeze_channels, input_channels, 1, strides=1, padding=0)
diff --git a/ivy_models/regnet/regnet.py b/ivy_models/regnet/regnet.py
index 101f8f4d..1af16f42 100644
--- a/ivy_models/regnet/regnet.py
+++ b/ivy_models/regnet/regnet.py
@@ -1,12 +1,13 @@
 import ivy
 import ivy_models
+from ivy_models.base import BaseModel, BaseSpec
 from .layers import BlockParams, SimpleStemIN, ResBottleneckBlock, AnyStage
 from typing import Optional, Callable
 
 from collections import OrderedDict
 
 
-class RegNet(ivy.Module):
+class RegNetSpec(BaseSpec):
     def __init__(
         self,
         block_params: BlockParams,
@@ -17,25 +18,71 @@ def __init__(
         norm_layer: Optional[Callable[..., ivy.Module]] = None,
         activation: Optional[Callable[..., ivy.Module]] = None,
     ) -> None:
-        super().__init__()
+        super(RegNetSpec, self).__init__(
+            block_params=block_params,
+            num_classes=num_classes,
+            stem_width=stem_width,
+            stem_type=stem_type,
+            block_type=block_type,
+            norm_layer=norm_layer,
+            activation=activation,
+        )
+
 
-        if stem_type is None:
+class RegNet(BaseModel):
+    def __init__(
+        self,
+        block_params: BlockParams,
+        num_classes: int = 1000,
+        stem_width: int = 32,
+        stem_type: Optional[Callable[..., ivy.Module]] = None,
+        block_type: Optional[Callable[..., ivy.Module]] = None,
+        norm_layer: Optional[Callable[..., ivy.Module]] = None,
+        activation: Optional[Callable[..., ivy.Module]] = None,
+        spec=None,
+        v: ivy.Container = None,
+    ) -> None:
+        self.block_params = block_params
+        self.num_classes = num_classes
+        self.stem_width = stem_width
+        self.stem_type = stem_type
+        self.block_type = block_type
+        self.norm_layer = norm_layer
+        self.activation = activation
+
+        self.spec = (
+            spec
+            if spec and isinstance(spec, RegNetSpec)
+            else RegNetSpec(
+                block_params,
+                num_classes,
+                stem_width,
+                stem_type,
+                block_type,
+                norm_layer,
+                activation,
+            )
+        )
+        super(RegNet, self).__init__(v=v)
+
+    def _build(self, *args, **kwargs):
+        if self.stem_type is None:
             stem_type = SimpleStemIN
-        if norm_layer is None:
+        if self.norm_layer is None:
             norm_layer = ivy.BatchNorm2D
-        if block_type is None:
+        if self.block_type is None:
             block_type = ResBottleneckBlock
-        if activation is None:
+        if self.activation is None:
             activation = ivy.ReLU
 
         self.stem = stem_type(
             3,  # width_in
-            stem_width,
+            self.stem_width,
             norm_layer,
             activation,
         )
 
-        current_width = stem_width
+        current_width = self.stem_width
 
         blocks = []
         for i, (
@@ -44,7 +91,7 @@ def __init__(
             depth,
             group_width,
             bottleneck_multiplier,
-        ) in enumerate(block_params._get_expanded_params()):
+        ) in enumerate(self.block_params._get_expanded_params()):
             blocks.append(
                 (
                     f"block{i+1}",
@@ -58,7 +105,7 @@ def __init__(
                         activation,
                         group_width,
                         bottleneck_multiplier,
-                        block_params.se_ratio,
+                        self.block_params.se_ratio,
                         stage_index=i + 1,
                     ),
                 )
@@ -67,9 +114,12 @@ def __init__(
             current_width = width_out
 
         self.trunk_output = ivy.Sequential(OrderedDict(blocks))
-
         self.avgpool = ivy.AdaptiveAvgPool2d((1, 1))
-        self.fc = ivy.Linear(current_width, num_classes)
+        self.fc = ivy.Linear(current_width, self.num_classes)
+
+    @classmethod
+    def get_spec_class(self):
+        return RegNetSpec
 
     def _forward(self, x: ivy.Array) -> ivy.Array:
         x = self.stem(x)
@@ -90,9 +140,9 @@ def _regnet_torch_weights_mapping(old_key, new_key):
     return new_mapping
 
 
-def regnet_y_400mf(pretrained=True):
+def regnet_y_400mf(num_classes: int = 1000, stem_width: int = 32, pretrained=True):
     """RegNet-Y-400MF model"""
-    model = RegNet
+    model = RegNet(BlockParams, num_classes, stem_width)
     if pretrained:
         url = "https://download.pytorch.org/models/regnet_y_400mf-c65dace8.pth"
         w_clean = ivy_models.helpers.load_torch_weights(
@@ -105,9 +155,9 @@ def regnet_y_400mf(pretrained=True):
     return model
 
 
-def regnet_y_800mf(pretrained=True):
+def regnet_y_800mf(num_classes: int = 1000, stem_width: int = 32, pretrained=True):
     """RegNet-Y-800MF model"""
-    model = RegNet
+    model = RegNet(BlockParams, num_classes, stem_width)
     if pretrained:
         url = "https://download.pytorch.org/models/regnet_y_400mf-e6988f5f.pth"
         w_clean = ivy_models.helpers.load_torch_weights(

From 2368ae56364fbff05576265a43ce439eb156e094 Mon Sep 17 00:00:00 2001
From: Eddy Oyieko <67474838+mobley-trent@users.noreply.github.com>
Date: Wed, 9 Aug 2023 07:01:31 +0000
Subject: [PATCH 07/16] Updated regnet.py

---
 ivy_models/regnet/regnet.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/ivy_models/regnet/regnet.py b/ivy_models/regnet/regnet.py
index 1af16f42..ce2c7ee2 100644
--- a/ivy_models/regnet/regnet.py
+++ b/ivy_models/regnet/regnet.py
@@ -67,19 +67,19 @@ def __init__(
 
     def _build(self, *args, **kwargs):
         if self.stem_type is None:
-            stem_type = SimpleStemIN
+            self.stem_type = SimpleStemIN
         if self.norm_layer is None:
-            norm_layer = ivy.BatchNorm2D
+            self.norm_layer = ivy.BatchNorm2D
         if self.block_type is None:
-            block_type = ResBottleneckBlock
+            self.block_type = ResBottleneckBlock
         if self.activation is None:
-            activation = ivy.ReLU
+            self.activation = ivy.ReLU
 
-        self.stem = stem_type(
+        self.stem = self.stem_type(
             3,  # width_in
             self.stem_width,
-            norm_layer,
-            activation,
+            self.norm_layer,
+            self.activation,
         )
 
         current_width = self.stem_width
@@ -100,9 +100,9 @@ def _build(self, *args, **kwargs):
                         width_out,
                         stride,
                         depth,
-                        block_type,
-                        norm_layer,
-                        activation,
+                        self.block_type,
+                        self.norm_layer,
+                        self.activation,
                         group_width,
                         bottleneck_multiplier,
                         self.block_params.se_ratio,

From b34a092b86618d0b86119dff4830bd7c869a922c Mon Sep 17 00:00:00 2001
From: Eddy Oyieko <67474838+mobley-trent@users.noreply.github.com>
Date: Wed, 9 Aug 2023 09:04:01 +0000
Subject: [PATCH 08/16] Updated test_regnet.py

---
 ivy_models_tests/regnet/test_regnet.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/ivy_models_tests/regnet/test_regnet.py b/ivy_models_tests/regnet/test_regnet.py
index 0e0c0f3a..caf4b617 100644
--- a/ivy_models_tests/regnet/test_regnet.py
+++ b/ivy_models_tests/regnet/test_regnet.py
@@ -16,7 +16,8 @@
 v = ivy.to_numpy(model.v)
 
 
-def test_regnet(device, fw):
+def test_regnet_img_classification(device, fw):
+    """Test RegNet image classification."""
     num_classes = 1000
     batch_shape = [1]
     this_dir = os.path.dirname(os.path.realpath(__file__))

From 6052bce4bba094f03f9ad01b84e8fc0fc0e3fad7 Mon Sep 17 00:00:00 2001
From: Eddy Oyieko <67474838+mobley-trent@users.noreply.github.com>
Date: Wed, 9 Aug 2023 10:20:07 +0000
Subject: [PATCH 09/16] Updated layers.py

---
 ivy_models/regnet/layers.py | 22 ++++++++++------------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/ivy_models/regnet/layers.py b/ivy_models/regnet/layers.py
index 14f4c63a..038a932e 100644
--- a/ivy_models/regnet/layers.py
+++ b/ivy_models/regnet/layers.py
@@ -6,7 +6,6 @@
 import collections
 from collections import OrderedDict
 from itertools import repeat
-import warnings
 
 
 def _make_ntuple(x: Any, n: int) -> Tuple[Any, ...]:
@@ -98,8 +97,8 @@ def __init__(
         self.out_channels = out_channels
 
         if self.__class__ == ConvNormActivation:
-            warnings.warn(
-                "Don't use ConvNormActivation directly, please use Conv2dNormActivation and Conv3dNormActivation instead."  # noqa: E501
+            ivy.warnings.warn(
+                "Don't use ConvNormActivation directly, please use Conv2dNormActivation instead."
             )
 
 
@@ -108,18 +107,17 @@ class Conv2dNormActivation(ConvNormActivation):
     Configurable block used for Convolution2d-Normalization-Activation blocks.
 
     Args:
-    ----
         in_channels (int): Number of channels in the input image
-        out_channels (int): Number of channels produced by the Convolution-Normalization-Activation block # noqa: E501
+        out_channels (int): Number of channels produced by the Convolution-Normalization-Activation block
         kernel_size: (int, optional): Size of the convolving kernel. Default: 3
         stride (int, optional): Stride of the convolution. Default: 1
-        padding (int, tuple or str, optional): Padding added to all four sides of the input. Default: None, in which case it will be calculated as ``padding = (kernel_size - 1) // 2 * dilation`` # noqa: E501
-        groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1 # noqa: E501
-        norm_layer (Callable[..., torch.nn.Module], optional): Norm layer that will be stacked on top of the convolution layer. If ``None`` this layer won't be used. Default: ``torch.nn.BatchNorm2d`` # noqa: E501
-        activation_layer (Callable[..., torch.nn.Module], optional): Activation function which will be stacked on top of the normalization layer (if not None), otherwise on top of the conv layer. If ``None`` this layer won't be used. Default: ``torch.nn.ReLU`` # noqa: E501
+        padding (int, tuple or str, optional): Padding added to all four sides of the input. Default: None, in which case it will be calculated as ``padding = (kernel_size - 1) // 2 * dilation``
+        groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1
+        norm_layer (Callable[..., ivy.Module], optional): Norm layer that will be stacked on top of the convolution layer. If ``None`` this layer won't be used. Default: ``ivy.BatchNorm2D``
+        activation_layer (Callable[..., ivy.Module], optional): Activation function which will be stacked on top of the normalization layer (if not None), otherwise on top of the conv layer. If ``None`` this layer won't be used. Default: ``ivy.ReLU``
         dilation (int): Spacing between kernel elements. Default: 1
-        inplace (bool): Parameter for the activation layer, which can optionally do the operation in-place. Default ``True`` # noqa: E501
-        bias (bool, optional): Whether to use bias in the convolution layer. By default, biases are included if ``norm_layer is None``. # noqa: E501
+        inplace (bool): Parameter for the activation layer, which can optionally do the operation in-place. Default ``True``
+        bias (bool, optional): Whether to use bias in the convolution layer. By default, biases are included if ``norm_layer is None``.
 
     """
 
@@ -149,7 +147,7 @@ def __init__(
             dilation,
             inplace,
             bias,
-            ivy.stateful.Conv2D,
+            ivy.Conv2D,
         )
 
 

From 7c839af7a196094afdde3162ff9c0b31a676dfce Mon Sep 17 00:00:00 2001
From: Eddy Oyieko <67474838+mobley-trent@users.noreply.github.com>
Date: Mon, 14 Aug 2023 18:05:33 +0000
Subject: [PATCH 10/16] Updated layers.py, regnet.py

---
 ivy_models/regnet/layers.py | 45 ++++++++++++++++++++-----------------
 ivy_models/regnet/regnet.py | 10 ++++++++-
 2 files changed, 33 insertions(+), 22 deletions(-)

diff --git a/ivy_models/regnet/layers.py b/ivy_models/regnet/layers.py
index 038a932e..c6e7f512 100644
--- a/ivy_models/regnet/layers.py
+++ b/ivy_models/regnet/layers.py
@@ -48,7 +48,6 @@ def __init__(
         kernel_size: Union[int, Tuple[int, ...]] = 3,
         stride: Union[int, Tuple[int, ...]] = 1,
         padding: Optional[Union[int, Tuple[int, ...], str]] = None,
-        groups: int = 1,
         norm_layer: Optional[Callable[..., ivy.Module]] = ivy.BatchNorm2D,
         activation_layer: Optional[Callable[..., ivy.Module]] = ivy.ReLU,
         dilation: Union[int, Tuple[int, ...]] = 1,
@@ -77,28 +76,24 @@ def __init__(
             conv_layer(
                 in_channels,
                 out_channels,
-                kernel_size,
+                [kernel_size],
                 stride,
                 padding,
-                dilation=dilation,
-                groups=groups,
-                bias=bias,
+                dilations=dilation,
+                with_bias=bias,
             )
         ]
 
         if norm_layer is not None:
             layers.append(norm_layer(out_channels))
 
-        if activation_layer is not None:
-            params = {} if inplace is None else {"inplace": inplace}
-            layers.append(activation_layer(**params))
         super().__init__(*layers)
 
         self.out_channels = out_channels
 
         if self.__class__ == ConvNormActivation:
             ivy.warnings.warn(
-                "Don't use ConvNormActivation directly, please use Conv2dNormActivation instead."
+                "Don't use ConvNormActivation directly, please use Conv2dNormActivation instead."  # noqa: E501
             )
 
 
@@ -107,17 +102,28 @@ class Conv2dNormActivation(ConvNormActivation):
     Configurable block used for Convolution2d-Normalization-Activation blocks.
 
     Args:
+    ----
         in_channels (int): Number of channels in the input image
-        out_channels (int): Number of channels produced by the Convolution-Normalization-Activation block
+        out_channels (int): Number of channels produced by
+            the Convolution-Normalization-Activation block
         kernel_size: (int, optional): Size of the convolving kernel. Default: 3
         stride (int, optional): Stride of the convolution. Default: 1
-        padding (int, tuple or str, optional): Padding added to all four sides of the input. Default: None, in which case it will be calculated as ``padding = (kernel_size - 1) // 2 * dilation``
-        groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1
-        norm_layer (Callable[..., ivy.Module], optional): Norm layer that will be stacked on top of the convolution layer. If ``None`` this layer won't be used. Default: ``ivy.BatchNorm2D``
-        activation_layer (Callable[..., ivy.Module], optional): Activation function which will be stacked on top of the normalization layer (if not None), otherwise on top of the conv layer. If ``None`` this layer won't be used. Default: ``ivy.ReLU``
-        dilation (int): Spacing between kernel elements. Default: 1
-        inplace (bool): Parameter for the activation layer, which can optionally do the operation in-place. Default ``True``
-        bias (bool, optional): Whether to use bias in the convolution layer. By default, biases are included if ``norm_layer is None``.
+        padding (int, tuple or str, optional): Padding added to all
+            four sides of the input. Default: None, in which case it will be
+            calculated as ``padding = (kernel_size - 1) // 2 * dilation``
+        norm_layer (Callable[..., ivy.Module], optional): Norm layer that will
+            be stacked on top of the convolution layer.If ``None`` this
+            layer won't be used. Default: ``ivy.BatchNorm2D``
+        activation_layer (Callable[..., ivy.Module], optional): Activation function
+            which will be stacked on top of the normalization layer (if not None),
+            otherwise on top of the conv layer. If ``None`` this layer won't be
+            used. Default: ``ivy.ReLU``
+        dilation (int): Spacing between kernel elements.
+            Default: 1
+        inplace (bool): Parameter for the activation layer, which can optionally
+            do the operation in-place. Default ``True``
+        bias (bool, optional): Whether to use bias in the convolution layer. By
+            default, biases are included if ``norm_layer is None``.
 
     """
 
@@ -128,7 +134,6 @@ def __init__(
         kernel_size: Union[int, Tuple[int, int]] = 3,
         stride: Union[int, Tuple[int, int]] = 1,
         padding: Optional[Union[int, Tuple[int, int], str]] = None,
-        groups: int = 1,
         norm_layer: Optional[Callable[..., ivy.Module]] = ivy.BatchNorm2D,
         activation_layer: Optional[Callable[..., ivy.Module]] = ivy.ReLU,
         dilation: Union[int, Tuple[int, int]] = 1,
@@ -141,7 +146,6 @@ def __init__(
             kernel_size,
             stride,
             padding,
-            groups,
             norm_layer,
             activation_layer,
             dilation,
@@ -226,7 +230,7 @@ def __init__(
     ) -> None:
         layers = OrderedDict()
         w_b = int(round(width_out * bottleneck_multiplier))
-        g = w_b // group_width
+        w_b // group_width
 
         layers["a"] = Conv2dNormActivation(
             width_in,
@@ -242,7 +246,6 @@ def __init__(
             w_b,
             kernel_size=3,
             stride=stride,
-            groups=g,
             norm_layer=norm_layer,
             activation_layer=activation_layer,
         )
diff --git a/ivy_models/regnet/regnet.py b/ivy_models/regnet/regnet.py
index ce2c7ee2..e9084dbf 100644
--- a/ivy_models/regnet/regnet.py
+++ b/ivy_models/regnet/regnet.py
@@ -91,7 +91,15 @@ def _build(self, *args, **kwargs):
             depth,
             group_width,
             bottleneck_multiplier,
-        ) in enumerate(self.block_params._get_expanded_params()):
+        ) in enumerate(
+            zip(
+                self.block_params.widths,
+                self.block_params.strides,
+                self.block_params.depths,
+                self.block_params.group_widths,
+                self.block_params.bottleneck_multipliers,
+            )
+        ):
             blocks.append(
                 (
                     f"block{i+1}",

From 6bc43258a620fbaef6febb3653f2d45047b77021 Mon Sep 17 00:00:00 2001
From: Eddy Oyieko <67474838+mobley-trent@users.noreply.github.com>
Date: Mon, 21 Aug 2023 09:07:56 +0000
Subject: [PATCH 11/16] Added script for testing - scratch.py

---
 scratch.py | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)
 create mode 100644 scratch.py

diff --git a/scratch.py b/scratch.py
new file mode 100644
index 00000000..58305aa5
--- /dev/null
+++ b/scratch.py
@@ -0,0 +1,46 @@
+from ivy_models.helpers import load_torch_weights
+from ivy_models.regnet.regnet import RegNet
+from ivy_models.regnet.layers import BlockParams
+
+# num_classes = 1000
+# dropout = 0.5
+# data_format = "NCHW"
+# v=None
+# pretrained=True
+
+
+num_classes = 1000
+stem_width = 32
+pretrained = True
+
+
+# model = SqueezeNet(
+#     "1_0", num_classes, dropout, data_format=data_format, v=v
+# )
+
+model = RegNet(BlockParams, num_classes, stem_width)
+
+
+# url = "https://download.pytorch.org/models/squeezenet1_0-b66bff10.pth"
+url = "https://download.pytorch.org/models/regnet_y_400mf-c65dace8.pth"
+
+
+def _torch_weights_mapping(old_key, new_key):
+    print(f"====== OLD KEY ======\n{old_key}\n\n\n")
+    print(f"====== NEW KEY ======\n{new_key}\n\n\n")
+    # if "weight" in old_key:
+    #     new_mapping = {"key_chain": new_key, "pattern": "b c h w -> h w c b"}
+    # elif "bias" in old_key:
+    #     new_mapping = {"key_chain": new_key, "pattern": "h -> 1 h 1 1"}
+
+    # return new_mapping
+
+
+w_clean = load_torch_weights(
+    url,
+    model,
+    raw_keys_to_prune=["num_batches_tracked"],
+    custom_mapping=_torch_weights_mapping,
+)
+
+print(w_clean)

From 6083074e53c0389021ca36f33e79ac82d051afcd Mon Sep 17 00:00:00 2001
From: Eddy Oyieko <67474838+mobley-trent@users.noreply.github.com>
Date: Thu, 24 Aug 2023 12:47:46 +0000
Subject: [PATCH 12/16] Updated layers.py, regnet.py

---
 ivy_models/regnet/layers.py |  4 ++--
 ivy_models/regnet/regnet.py | 11 +----------
 2 files changed, 3 insertions(+), 12 deletions(-)

diff --git a/ivy_models/regnet/layers.py b/ivy_models/regnet/layers.py
index c6e7f512..8f29c0a9 100644
--- a/ivy_models/regnet/layers.py
+++ b/ivy_models/regnet/layers.py
@@ -164,8 +164,8 @@ class SqueezeExcitation(ivy.Module):
     ----
         input_channels (int): Number of channels in the input image
         squeeze_channels (int): Number of squeeze channels
-        activation (Callable[..., torch.nn.Module], optional): ``delta`` activation. Default: ``torch.nn.ReLU`` # noqa: E501
-        scale_activation (Callable[..., torch.nn.Module]): ``sigma`` activation. Default: ``torch.nn.Sigmoid`` # noqa: E501
+        activation (Callable[..., ivy.Module], optional): ``delta`` activation. Default: ``ivy.ReLU`` # noqa: E501
+        scale_activation (Callable[..., ivy.Module]): ``sigma`` activation. Default: ``ivy.Sigmoid`` # noqa: E501
     """
 
     def __init__(
diff --git a/ivy_models/regnet/regnet.py b/ivy_models/regnet/regnet.py
index e9084dbf..b1eadef1 100644
--- a/ivy_models/regnet/regnet.py
+++ b/ivy_models/regnet/regnet.py
@@ -65,7 +65,6 @@ def __init__(
         )
         super(RegNet, self).__init__(v=v)
 
-    def _build(self, *args, **kwargs):
         if self.stem_type is None:
             self.stem_type = SimpleStemIN
         if self.norm_layer is None:
@@ -91,15 +90,7 @@ def _build(self, *args, **kwargs):
             depth,
             group_width,
             bottleneck_multiplier,
-        ) in enumerate(
-            zip(
-                self.block_params.widths,
-                self.block_params.strides,
-                self.block_params.depths,
-                self.block_params.group_widths,
-                self.block_params.bottleneck_multipliers,
-            )
-        ):
+        ) in enumerate(self.block_params._get_expanded_params()):
             blocks.append(
                 (
                     f"block{i+1}",

From 4c4b63011aec98d2fd0331cabb61cc8745dcaf18 Mon Sep 17 00:00:00 2001
From: Eddy Oyieko <67474838+mobley-trent@users.noreply.github.com>
Date: Thu, 24 Aug 2023 15:53:55 +0000
Subject: [PATCH 13/16] Updated regnet layers and test

---
 ivy_models/regnet/layers.py            | 42 +++++++++++++++-----------
 ivy_models/regnet/regnet.py            | 40 +++++++++++++++++++++---
 ivy_models_tests/regnet/test_regnet.py | 13 ++++----
 scratch.py                             | 36 +++++++++++++++++++---
 4 files changed, 99 insertions(+), 32 deletions(-)

diff --git a/ivy_models/regnet/layers.py b/ivy_models/regnet/layers.py
index 8f29c0a9..71d8a072 100644
--- a/ivy_models/regnet/layers.py
+++ b/ivy_models/regnet/layers.py
@@ -285,33 +285,41 @@ def __init__(
         bottleneck_multiplier: float = 1.0,
         se_ratio: Optional[float] = None,
     ) -> None:
-        super().__init__()
-
+        self.width_in = width_in
+        self.width_out = width_out
+        self.stride = stride
+        self.norm_layer = norm_layer
+        self.activation_layer = activation_layer
+        self.group_width = group_width
+        self.bottleneck_multiplier = bottleneck_multiplier
+        self.se_ratio = se_ratio
         # Use skip connection with projection if shape changes
         self.proj = None
-        should_proj = (width_in != width_out) or (stride != 1)
+        super().__init__()
+
+    def _build(self, *args, **kwargs):
+        should_proj = (self.width_in != self.width_out) or (self.stride != 1)
         if should_proj:
             self.proj = Conv2dNormActivation(
-                width_in,
-                width_out,
+                self.width_in,
+                self.width_out,
                 kernel_size=1,
-                stride=stride,
-                norm_layer=norm_layer,
+                stride=self.stride,
+                norm_layer=self.norm_layer,
                 activation_layer=None,
             )
         self.f = BottleneckTransform(
-            width_in,
-            width_out,
-            stride,
-            norm_layer,
-            activation_layer,
-            group_width,
-            bottleneck_multiplier,
-            se_ratio,
+            self.width_in,
+            self.width_out,
+            self.stride,
+            self.norm_layer,
+            self.activation_layer,
+            self.group_width,
+            self.bottleneck_multiplier,
+            self.se_ratio,
         )
-        self.activation = activation_layer(inplace=True)
 
-    def forward(self, x: ivy.Array) -> ivy.Array:
+    def _forward(self, x: ivy.Array) -> ivy.Array:
         if self.proj is not None:
             x = self.proj(x) + self.f(x)
         else:
diff --git a/ivy_models/regnet/regnet.py b/ivy_models/regnet/regnet.py
index b1eadef1..3ed058df 100644
--- a/ivy_models/regnet/regnet.py
+++ b/ivy_models/regnet/regnet.py
@@ -65,6 +65,7 @@ def __init__(
         )
         super(RegNet, self).__init__(v=v)
 
+    def _build(self, *args, **kwargs):
         if self.stem_type is None:
             self.stem_type = SimpleStemIN
         if self.norm_layer is None:
@@ -131,10 +132,10 @@ def _forward(self, x: ivy.Array) -> ivy.Array:
 
 def _regnet_torch_weights_mapping(old_key, new_key):
     new_mapping = new_key
-    if "weight" in old_key:
-        new_mapping = {"key_chain": new_key, "pattern": "b c h w -> h w c b"}
-    elif "bias" in old_key:
-        new_mapping = {"key_chain": new_key, "pattern": "h -> 1 h 1 1"}
+    # if "weight" in old_key:
+    #     new_mapping = {"key_chain": new_key, "pattern": "b c h w -> h w c b"}
+    # elif "bias" in old_key:
+    #     new_mapping = {"key_chain": new_key, "pattern": "h -> 1 h 1 1"}
 
     return new_mapping
 
@@ -167,3 +168,34 @@ def regnet_y_800mf(num_classes: int = 1000, stem_width: int = 32, pretrained=Tru
         )
         model.v = w_clean
     return model
+
+
+def regnet_y_1_6gf(
+    num_classes: int = 1000,
+    stem_width: int = 32,
+    pretrained=True,
+    depth: int = 18,
+    w_0: int = 200,
+    w_a: float = 106.23,
+    w_m: float = 2.48,
+    group_width: int = 112,
+):
+    """RegNet-Y-1.6GF model"""
+    block_params = BlockParams.from_init_params(
+        depth=depth,
+        w_0=w_0,
+        w_a=w_a,
+        w_m=w_m,
+        group_width=group_width,
+    )
+    model = RegNet(block_params, num_classes, stem_width)
+    if pretrained:
+        url = "https://download.pytorch.org/models/regnet_y_1_6gf-b11a554e.pth"
+        w_clean = ivy_models.helpers.load_torch_weights(
+            url,
+            model,
+            raw_keys_to_prune=["num_batches_tracked"],
+            custom_mapping=_regnet_torch_weights_mapping,
+        )
+        model.v = w_clean
+    return model
diff --git a/ivy_models_tests/regnet/test_regnet.py b/ivy_models_tests/regnet/test_regnet.py
index caf4b617..3b57687c 100644
--- a/ivy_models_tests/regnet/test_regnet.py
+++ b/ivy_models_tests/regnet/test_regnet.py
@@ -1,13 +1,13 @@
-from ivy_models.regnet import regnet_y_400mf, regnet_y_800mf
+from ivy_models.regnet import regnet_y_1_6gf
 from ivy_models_tests import helpers
 import ivy
 import random
 import os
 
+ivy.set_backend("torch")
 
 VARIANTS = {
-    "regnet_y_400mf": regnet_y_400mf,
-    "regnet_y_800mf": regnet_y_800mf,
+    "regnet_y_1_6gf": regnet_y_1_6gf,
 }
 
 load_weights = random.choice([False, True])
@@ -42,7 +42,8 @@ def test_regnet_img_classification(device, fw):
 
     # Value test
     if load_weights:
-        np_out = ivy.to_numpy(logits[0])
-        true_indices = ivy.to_numpy(ivy.sort(ivy.array([282, 281, 285, 287])))
-        calc_indices = ivy.to_numpy(ivy.sort(ivy.argsort(np_out)[-5:][::-1]))
+        output = logits[0]
+        true_indices = ivy.sort(ivy.array([282, 281, 285, 287]))
+        calc_indices = ivy.sort(ivy.argsort(output)[-5:][::-1])
+
         assert ivy.array_equal(true_indices, calc_indices[:4])
diff --git a/scratch.py b/scratch.py
index 58305aa5..50b5a9ec 100644
--- a/scratch.py
+++ b/scratch.py
@@ -1,6 +1,9 @@
 from ivy_models.helpers import load_torch_weights
 from ivy_models.regnet.regnet import RegNet
 from ivy_models.regnet.layers import BlockParams
+import ivy
+
+ivy.set_backend("torch")
 
 # num_classes = 1000
 # dropout = 0.5
@@ -13,27 +16,50 @@
 stem_width = 32
 pretrained = True
 
+depth = 18
+w_0 = 200
+w_a = 106.23
+w_m = 2.48
+group_width = 112
+
+_block_params = BlockParams.from_init_params(
+    depth=depth,
+    w_0=w_0,
+    w_a=w_a,
+    w_m=w_m,
+    group_width=group_width,
+)
+
 
 # model = SqueezeNet(
 #     "1_0", num_classes, dropout, data_format=data_format, v=v
 # )
 
-model = RegNet(BlockParams, num_classes, stem_width)
+model = RegNet(_block_params, num_classes, stem_width)
 
 
 # url = "https://download.pytorch.org/models/squeezenet1_0-b66bff10.pth"
-url = "https://download.pytorch.org/models/regnet_y_400mf-c65dace8.pth"
+# url = "https://download.pytorch.org/models/regnet_y_400mf-c65dace8.pth"
+# url = "https://download.pytorch.org/models/regnet_y_3_2gf-b5a9779c.pth"
+url = "https://download.pytorch.org/models/regnet_y_1_6gf-b11a554e.pth"
+
+# trial = 0
 
 
 def _torch_weights_mapping(old_key, new_key):
-    print(f"====== OLD KEY ======\n{old_key}\n\n\n")
-    print(f"====== NEW KEY ======\n{new_key}\n\n\n")
+    # global trial
+    # print(f"========# {trial} ============\n")
+    # print(f"====== OLD KEY ======\n{old_key}\n\n")
+    # print(f"====== NEW KEY ======\n{new_key}\n")
+    # trial += 1
+
+    new_mapping = new_key
     # if "weight" in old_key:
     #     new_mapping = {"key_chain": new_key, "pattern": "b c h w -> h w c b"}
     # elif "bias" in old_key:
     #     new_mapping = {"key_chain": new_key, "pattern": "h -> 1 h 1 1"}
 
-    # return new_mapping
+    return new_mapping
 
 
 w_clean = load_torch_weights(

From adf548e1716463f353f3d872deacf8980c1c54f1 Mon Sep 17 00:00:00 2001
From: Eddy Oyieko <67474838+mobley-trent@users.noreply.github.com>
Date: Thu, 24 Aug 2023 16:38:37 +0000
Subject: [PATCH 14/16] Updated layers.py, test_regnet.py

---
 ivy_models/regnet/layers.py            | 15 ++++++++-------
 ivy_models_tests/regnet/test_regnet.py |  1 -
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/ivy_models/regnet/layers.py b/ivy_models/regnet/layers.py
index 71d8a072..436dffe4 100644
--- a/ivy_models/regnet/layers.py
+++ b/ivy_models/regnet/layers.py
@@ -417,11 +417,11 @@ def from_init_params(
         # Compute the block widths. Each stage has one unique block width
         widths_cont = ivy.arange(depth) * w_a + w_0
         block_capacity = ivy.round(ivy.log(widths_cont / w_0) / math.log(w_m))
-        block_widths = (
-            (ivy.round(ivy.divide(w_0 * ivy.pow(w_m, block_capacity), QUANT)) * QUANT)
-            .int()
-            .tolist()
-        )
+        _block_widths = (
+            ivy.round(ivy.divide(w_0 * ivy.pow(w_m, block_capacity), QUANT)) * QUANT
+        )  # noqa: E501
+        _block_widths = _block_widths.astype(ivy.int64)
+        block_widths = (_block_widths).tolist()
         num_stages = len(set(block_widths))
 
         # Convert to per stage parameters
@@ -434,9 +434,10 @@ def from_init_params(
         splits = [w != wp or r != rp for w, wp, r, rp in split_helper]
 
         stage_widths = [w for w, t in zip(block_widths, splits[:-1]) if t]
-        stage_depths = (
-            ivy.diff(ivy.array([d for d, t in enumerate(splits) if t])).int().tolist()
+        stage_depths = ivy.diff(
+            ivy.array([d for d, t in enumerate(splits) if t]),
         )
+        stage_depths = stage_depths.astype(ivy.int64).tolist()
 
         strides = [STRIDE] * num_stages
         bottleneck_multipliers = [bottleneck_multiplier] * num_stages
diff --git a/ivy_models_tests/regnet/test_regnet.py b/ivy_models_tests/regnet/test_regnet.py
index 3b57687c..3304ec52 100644
--- a/ivy_models_tests/regnet/test_regnet.py
+++ b/ivy_models_tests/regnet/test_regnet.py
@@ -4,7 +4,6 @@
 import random
 import os
 
-ivy.set_backend("torch")
 
 VARIANTS = {
     "regnet_y_1_6gf": regnet_y_1_6gf,

From 53ef379dd5fd98efe2838e4d6bb6899fd8e5c89a Mon Sep 17 00:00:00 2001
From: Eddy Oyieko <67474838+mobley-trent@users.noreply.github.com>
Date: Fri, 25 Aug 2023 07:24:42 +0000
Subject: [PATCH 15/16] Updated layers.py

---
 ivy_models/regnet/layers.py | 2 +-
 scratch.py                  | 3 ---
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/ivy_models/regnet/layers.py b/ivy_models/regnet/layers.py
index 436dffe4..42370e01 100644
--- a/ivy_models/regnet/layers.py
+++ b/ivy_models/regnet/layers.py
@@ -189,7 +189,7 @@ def _scale(self, input: ivy.Array) -> ivy.Array:
         scale = self.fc2(scale)
         return self.scale_activation(scale)
 
-    def forward(self, input: ivy.Array) -> ivy.Array:
+    def _forward(self, input: ivy.Array) -> ivy.Array:
         scale = self._scale(input)
         return scale * input
 
diff --git a/scratch.py b/scratch.py
index 50b5a9ec..3b39c86f 100644
--- a/scratch.py
+++ b/scratch.py
@@ -1,9 +1,6 @@
 from ivy_models.helpers import load_torch_weights
 from ivy_models.regnet.regnet import RegNet
 from ivy_models.regnet.layers import BlockParams
-import ivy
-
-ivy.set_backend("torch")
 
 # num_classes = 1000
 # dropout = 0.5

From ca812705974b58db60e5cef564f2149892d1c5cd Mon Sep 17 00:00:00 2001
From: Eddy Oyieko <67474838+mobley-trent@users.noreply.github.com>
Date: Tue, 26 Sep 2023 07:19:02 +0000
Subject: [PATCH 16/16] Updated regnet.py, test_regnet.py

---
 ivy_models/regnet/regnet.py            | 4 ++++
 ivy_models_tests/regnet/test_regnet.py | 1 +
 2 files changed, 5 insertions(+)

diff --git a/ivy_models/regnet/regnet.py b/ivy_models/regnet/regnet.py
index 3ed058df..cc32d10c 100644
--- a/ivy_models/regnet/regnet.py
+++ b/ivy_models/regnet/regnet.py
@@ -5,6 +5,7 @@
 from typing import Optional, Callable
 
 from collections import OrderedDict
+import builtins
 
 
 class RegNetSpec(BaseSpec):
@@ -131,11 +132,14 @@ def _forward(self, x: ivy.Array) -> ivy.Array:
 
 
 def _regnet_torch_weights_mapping(old_key, new_key):
+    W_KEY = ["conv1/weight", "conv2/weight", "conv3/weight", "downsample/0/weight"]
     new_mapping = new_key
     # if "weight" in old_key:
     #     new_mapping = {"key_chain": new_key, "pattern": "b c h w -> h w c b"}
     # elif "bias" in old_key:
     #     new_mapping = {"key_chain": new_key, "pattern": "h -> 1 h 1 1"}
+    if builtins.any([kc in old_key for kc in W_KEY]):
+        new_mapping = {"key_chain": new_key, "pattern": "b c h w -> h w c b"}
 
     return new_mapping
 
diff --git a/ivy_models_tests/regnet/test_regnet.py b/ivy_models_tests/regnet/test_regnet.py
index 3304ec52..34e8a199 100644
--- a/ivy_models_tests/regnet/test_regnet.py
+++ b/ivy_models_tests/regnet/test_regnet.py
@@ -31,6 +31,7 @@ def test_regnet_img_classification(device, fw):
             to_ivy=True,
         )
     )
+    img = ivy.squeeze(img, axis=0)
 
     # Create model
     model.v = ivy.asarray(v)