From c82ce86f8f853f2ceb964018810deedb2280e9a2 Mon Sep 17 00:00:00 2001
From: Ross Wightman <rwightman@gmail.com>
Date: Mon, 14 Oct 2024 12:27:56 -0700
Subject: [PATCH] Add 384x384 mambaout_base_plus model weights

---
 README.md               | 6 ++++--
 timm/models/mambaout.py | 4 ++++
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 0157e985dc..388cb20330 100644
--- a/README.md
+++ b/README.md
@@ -17,6 +17,7 @@
 
 |model                                                                                                                |img_size|top1  |top5  |param_count|
 |---------------------------------------------------------------------------------------------------------------------|--------|------|------|-----------|
+|[mambaout_base_plus_rw.sw_e150_r384_in12k_ft_in1k](http://huggingface.co/timm/mambaout_base_plus_rw.sw_e150_r384_in12k_ft_in1k)|384     |87.506|98.428|101.66     |
 |[mambaout_base_plus_rw.sw_e150_in12k_ft_in1k](http://huggingface.co/timm/mambaout_base_plus_rw.sw_e150_in12k_ft_in1k)|288     |86.912|98.236|101.66     |
 |[mambaout_base_plus_rw.sw_e150_in12k_ft_in1k](http://huggingface.co/timm/mambaout_base_plus_rw.sw_e150_in12k_ft_in1k)|224     |86.632|98.156|101.66     |
 |[mambaout_base_tall_rw.sw_e500_in1k](http://huggingface.co/timm/mambaout_base_tall_rw.sw_e500_in1k)                  |288     |84.974|97.332|86.48      |
@@ -41,8 +42,8 @@
 * SigLIP SO400M ViT fine-tunes on ImageNet-1k @ 378x378, added 378x378 option for existing SigLIP 384x384 models
   *  [vit_so400m_patch14_siglip_378.webli_ft_in1k](https://huggingface.co/timm/vit_so400m_patch14_siglip_378.webli_ft_in1k) - 89.42 top-1
   *  [vit_so400m_patch14_siglip_gap_378.webli_ft_in1k](https://huggingface.co/timm/vit_so400m_patch14_siglip_gap_378.webli_ft_in1k) - 89.03
-* SigLIP SO400M ViT encoder from multi-lingual (i18n) patch16 @ 256x256 added (https://huggingface.co/timm/ViT-SO400M-16-SigLIP-i18n-256). OpenCLIP update pending.
-* Add two ConNeXt 'Zepto' models & weights (one w/ overlapped stem and one w/ patch stem). Uses RMSNorm, smaller than previous 'Atto', 2.2M params
+* SigLIP SO400M ViT encoder from recent multi-lingual (i18n) variant, patch16 @ 256x256 (https://huggingface.co/timm/ViT-SO400M-16-SigLIP-i18n-256). OpenCLIP update pending.
+* Add two ConvNeXt 'Zepto' models & weights (one w/ overlapped stem and one w/ patch stem). Uses RMSNorm, smaller than previous 'Atto', 2.2M params.
   * [convnext_zepto_rms_ols.ra4_e3600_r224_in1k](https://huggingface.co/timm/convnext_zepto_rms_ols.ra4_e3600_r224_in1k) - 73.20 top-1 @ 224
   * [convnext_zepto_rms.ra4_e3600_r224_in1k](https://huggingface.co/timm/convnext_zepto_rms.ra4_e3600_r224_in1k) - 72.81 @ 224
 
@@ -54,6 +55,7 @@
   * [mobilenetv3_large_150d.ra4_e3600_r256_in1k](http://hf.co/timm/mobilenetv3_large_150d.ra4_e3600_r256_in1k) - 81.81 @ 320, 80.94 @ 256
   * [mobilenetv3_large_100.ra4_e3600_r224_in1k](http://hf.co/timm/mobilenetv3_large_100.ra4_e3600_r224_in1k) - 77.16 @ 256, 76.31 @ 224
 
+
 ### Aug 21, 2024
 * Updated SBB ViT models trained on ImageNet-12k and fine-tuned on ImageNet-1k, challenging quite a number of much larger, slower models
 
diff --git a/timm/models/mambaout.py b/timm/models/mambaout.py
index c748e408ea..c077b01ff1 100644
--- a/timm/models/mambaout.py
+++ b/timm/models/mambaout.py
@@ -500,6 +500,10 @@ def _cfg(url='', **kwargs):
     'mambaout_base_plus_rw.sw_e150_in12k_ft_in1k': _cfg(
         hf_hub_id='timm/',
     ),
+    'mambaout_base_plus_rw.sw_e150_r384_in12k_ft_in1k': _cfg(
+        hf_hub_id='timm/',
+        input_size=(3, 384, 384), test_input_size=(3, 384, 384), crop_mode='squash', pool_size=(12, 12),
+    ),
     'mambaout_base_plus_rw.sw_e150_in12k': _cfg(
         hf_hub_id='timm/',
         num_classes=11821,