From c82ce86f8f853f2ceb964018810deedb2280e9a2 Mon Sep 17 00:00:00 2001 From: Ross Wightman Date: Mon, 14 Oct 2024 12:27:56 -0700 Subject: [PATCH] Add 384x384 mambaout_base_plus model weights --- README.md | 6 ++++-- timm/models/mambaout.py | 4 ++++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 0157e985dc..388cb20330 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,7 @@ |model |img_size|top1 |top5 |param_count| |---------------------------------------------------------------------------------------------------------------------|--------|------|------|-----------| +|[mambaout_base_plus_rw.sw_e150_r384_in12k_ft_in1k](http://huggingface.co/timm/mambaout_base_plus_rw.sw_e150_r384_in12k_ft_in1k)|384 |87.506|98.428|101.66 | |[mambaout_base_plus_rw.sw_e150_in12k_ft_in1k](http://huggingface.co/timm/mambaout_base_plus_rw.sw_e150_in12k_ft_in1k)|288 |86.912|98.236|101.66 | |[mambaout_base_plus_rw.sw_e150_in12k_ft_in1k](http://huggingface.co/timm/mambaout_base_plus_rw.sw_e150_in12k_ft_in1k)|224 |86.632|98.156|101.66 | |[mambaout_base_tall_rw.sw_e500_in1k](http://huggingface.co/timm/mambaout_base_tall_rw.sw_e500_in1k) |288 |84.974|97.332|86.48 | @@ -41,8 +42,8 @@ * SigLIP SO400M ViT fine-tunes on ImageNet-1k @ 378x378, added 378x378 option for existing SigLIP 384x384 models * [vit_so400m_patch14_siglip_378.webli_ft_in1k](https://huggingface.co/timm/vit_so400m_patch14_siglip_378.webli_ft_in1k) - 89.42 top-1 * [vit_so400m_patch14_siglip_gap_378.webli_ft_in1k](https://huggingface.co/timm/vit_so400m_patch14_siglip_gap_378.webli_ft_in1k) - 89.03 -* SigLIP SO400M ViT encoder from multi-lingual (i18n) patch16 @ 256x256 added (https://huggingface.co/timm/ViT-SO400M-16-SigLIP-i18n-256). OpenCLIP update pending. -* Add two ConNeXt 'Zepto' models & weights (one w/ overlapped stem and one w/ patch stem). Uses RMSNorm, smaller than previous 'Atto', 2.2M params +* SigLIP SO400M ViT encoder from recent multi-lingual (i18n) variant, patch16 @ 256x256 (https://huggingface.co/timm/ViT-SO400M-16-SigLIP-i18n-256). OpenCLIP update pending. +* Add two ConvNeXt 'Zepto' models & weights (one w/ overlapped stem and one w/ patch stem). Uses RMSNorm, smaller than previous 'Atto', 2.2M params. * [convnext_zepto_rms_ols.ra4_e3600_r224_in1k](https://huggingface.co/timm/convnext_zepto_rms_ols.ra4_e3600_r224_in1k) - 73.20 top-1 @ 224 * [convnext_zepto_rms.ra4_e3600_r224_in1k](https://huggingface.co/timm/convnext_zepto_rms.ra4_e3600_r224_in1k) - 72.81 @ 224 @@ -54,6 +55,7 @@ * [mobilenetv3_large_150d.ra4_e3600_r256_in1k](http://hf.co/timm/mobilenetv3_large_150d.ra4_e3600_r256_in1k) - 81.81 @ 320, 80.94 @ 256 * [mobilenetv3_large_100.ra4_e3600_r224_in1k](http://hf.co/timm/mobilenetv3_large_100.ra4_e3600_r224_in1k) - 77.16 @ 256, 76.31 @ 224 + ### Aug 21, 2024 * Updated SBB ViT models trained on ImageNet-12k and fine-tuned on ImageNet-1k, challenging quite a number of much larger, slower models diff --git a/timm/models/mambaout.py b/timm/models/mambaout.py index c748e408ea..c077b01ff1 100644 --- a/timm/models/mambaout.py +++ b/timm/models/mambaout.py @@ -500,6 +500,10 @@ def _cfg(url='', **kwargs): 'mambaout_base_plus_rw.sw_e150_in12k_ft_in1k': _cfg( hf_hub_id='timm/', ), + 'mambaout_base_plus_rw.sw_e150_r384_in12k_ft_in1k': _cfg( + hf_hub_id='timm/', + input_size=(3, 384, 384), test_input_size=(3, 384, 384), crop_mode='squash', pool_size=(12, 12), + ), 'mambaout_base_plus_rw.sw_e150_in12k': _cfg( hf_hub_id='timm/', num_classes=11821,