From dfafc140dd17d77c1afd3d10bb2ccb1c06445ad4 Mon Sep 17 00:00:00 2001
From: "wangluting.wlt" <wangluting.wlt@bytedance.com>
Date: Thu, 19 Sep 2024 09:39:44 +0800
Subject: [PATCH] feat: ram

---
 .todd_version                |  2 +-
 README.md                    | 20 +++++++++++---------
 configs/oake/ram_cuda.py     | 26 ++++++++++++++++++++++++++
 oadp/oake/models/__init__.py |  1 +
 oadp/oake/models/ram.py      | 36 ++++++++++++++++++++++++++++++++++++
 5 files changed, 75 insertions(+), 10 deletions(-)
 create mode 100644 configs/oake/ram_cuda.py
 create mode 100644 oadp/oake/models/ram.py

diff --git a/.todd_version b/.todd_version
index 32068c5..878d720 100755
--- a/.todd_version
+++ b/.todd_version
@@ -1 +1 @@
-7da60bb75eb45b3834f33479d4046ec7fab08345
+db442d303be0dd469d9caa556f1fa6d6a93cbf89
diff --git a/README.md b/README.md
index cb28678..d8ac9f0 100644
--- a/README.md
+++ b/README.md
@@ -83,15 +83,17 @@ The following scripts extract features with CLIP, which can be very time-consumi
 Extract globals and blocks features, which can be used for both coco and lvis
 
 ```bash
-[DRY_RUN=True] bash tools/torchrun.sh -m oadp.oake.val oake/coco/clip_globals_cuda configs/oake/clip_globals_cuda.py --config-options dataset::COCO [--auto-fix]
-[DRY_RUN=True] bash tools/torchrun.sh -m oadp.oake.val oake/coco/clip_blocks_cuda configs/oake/clip_blocks_cuda.py --config-options dataset::COCO [--auto-fix]
-[DRY_RUN=True] bash tools/torchrun.sh -m oadp.oake.val oake/coco/clip_objects_cuda configs/oake/clip_objects_cuda.py --config-options dataset::COCO [--auto-fix]
-[DRY_RUN=True] bash tools/torchrun.sh -m oadp.oake.val oake/lvis/clip_objects_cuda configs/oake/clip_objects_cuda.py --config-options dataset::LVIS [--auto-fix]
-
-[DRY_RUN=True] bash tools/torchrun.sh -m oadp.oake.val oake/coco/dino_globals_cuda configs/oake/dino_globals_cuda.py --config-options dataset::COCO [--auto-fix]
-[DRY_RUN=True] bash tools/torchrun.sh -m oadp.oake.val oake/coco/dino_blocks_cuda configs/oake/dino_blocks_cuda.py --config-options dataset::COCO [--auto-fix]
-[DRY_RUN=True] bash tools/torchrun.sh -m oadp.oake.val oake/coco/dino_objects_cuda configs/oake/dino_objects_cuda.py --config-options dataset::COCO [--auto-fix]
-[DRY_RUN=True] bash tools/torchrun.sh -m oadp.oake.val oake/lvis/dino_objects_cuda configs/oake/dino_objects_cuda.py --config-options dataset::LVIS [--auto-fix]
+bash tools/torchrun.sh -m oadp.oake.val oake/coco/clip_globals_cuda configs/oake/clip_globals_cuda.py --config-options dataset::COCO [--auto-fix]
+bash tools/torchrun.sh -m oadp.oake.val oake/coco/clip_blocks_cuda configs/oake/clip_blocks_cuda.py --config-options dataset::COCO [--auto-fix]
+bash tools/torchrun.sh -m oadp.oake.val oake/coco/clip_objects_cuda configs/oake/clip_objects_cuda.py --config-options dataset::COCO [--auto-fix]
+bash tools/torchrun.sh -m oadp.oake.val oake/lvis/clip_objects_cuda configs/oake/clip_objects_cuda.py --config-options dataset::LVIS [--auto-fix]
+
+bash tools/torchrun.sh -m oadp.oake.val oake/coco/dino_globals_cuda configs/oake/dino_globals_cuda.py --config-options dataset::COCO [--auto-fix]
+bash tools/torchrun.sh -m oadp.oake.val oake/coco/dino_blocks_cuda configs/oake/dino_blocks_cuda.py --config-options dataset::COCO [--auto-fix]
+bash tools/torchrun.sh -m oadp.oake.val oake/coco/dino_objects_cuda configs/oake/dino_objects_cuda.py --config-options dataset::COCO [--auto-fix]
+bash tools/torchrun.sh -m oadp.oake.val oake/lvis/dino_objects_cuda configs/oake/dino_objects_cuda.py --config-options dataset::LVIS [--auto-fix]
+
+bash tools/torchrun.sh -m oadp.oake.val oake/coco/ram_cuda configs/oake/ram_cuda.py --config-options dataset::COCO
 ```
 
 The number of files generated by OAKE-objects may be less than the number of images in the dataset.
diff --git a/configs/oake/ram_cuda.py b/configs/oake/ram_cuda.py
new file mode 100644
index 0000000..f718306
--- /dev/null
+++ b/configs/oake/ram_cuda.py
@@ -0,0 +1,26 @@
+from typing import Any
+
+from todd.configs import PyConfig
+
+_kwargs_: dict[str, Any]
+_kwargs_ = dict(_kwargs_)
+
+_kwargs_.setdefault('branch', 'Global')
+_kwargs_.setdefault('strategy', 'cuda')
+
+_base_ = [
+    PyConfig.load('configs/oake/interface.py', **_kwargs_),
+]
+
+runner = dict(
+    model=dict(type='ram_plus', expand_mask_size=None, adaptive=False),
+)
+custom_imports = [
+    'oadp.oake.globals_',
+]
+
+_export_ = dict(
+    trainer=runner,
+    validator=runner,
+    custom_imports=custom_imports,
+)
diff --git a/oadp/oake/models/__init__.py b/oadp/oake/models/__init__.py
index d01c6a6..fe99f93 100644
--- a/oadp/oake/models/__init__.py
+++ b/oadp/oake/models/__init__.py
@@ -1,3 +1,4 @@
 from .clip import *
 from .dino import *
 from .expanders import *
+from .ram import *
diff --git a/oadp/oake/models/ram.py b/oadp/oake/models/ram.py
new file mode 100644
index 0000000..76893e6
--- /dev/null
+++ b/oadp/oake/models/ram.py
@@ -0,0 +1,36 @@
+__all__ = [
+    'ram_plus',
+]
+
+import todd.tasks.image_classification as ic
+import torch
+import torchvision.transforms.v2 as tf_v2
+from todd.datasets import IMAGENET_MEAN, IMAGENET_STD
+from todd.tasks.image_classification.models.ram import Categories
+from torch import nn
+
+from ..registries import OAKEModelRegistry
+
+
+@OAKEModelRegistry.register_()
+def ram_plus(
+    expand_mask_size: int | None,
+    adaptive: bool,
+) -> tuple[nn.Module, tf_v2.Compose]:
+    assert expand_mask_size is None
+    assert not adaptive
+
+    categories = Categories.load()
+    model = ic.models.RAMplus(num_categories=len(categories))
+    model.load_pretrained('pretrained/ram/ram_plus_swin_large_14m.pth')
+    model.requires_grad_(False)
+    model.eval()
+
+    transforms = tf_v2.Compose([
+        tf_v2.Resize((384, 384)),
+        tf_v2.ToImage(),
+        tf_v2.ToDtype(torch.float32, True),
+        tf_v2.Normalize(IMAGENET_MEAN, IMAGENET_STD),
+    ])
+
+    return model, transforms