Skip to content

Commit

Permalink
Merge pull request #88 from shinning0821/main
Browse files Browse the repository at this point in the history
update cfg options
  • Loading branch information
shinning0821 authored Jan 28, 2024
2 parents 36510b3 + 48a0e3d commit 53b7a9e
Show file tree
Hide file tree
Showing 22 changed files with 83 additions and 39 deletions.
Binary file modified __pycache__/cfg.cpython-37.pyc
Binary file not shown.
1 change: 1 addition & 0 deletions cfg.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ def parse_args():
parser.add_argument('-num_sample', type=int, default=4 , help='sample pos and neg')
parser.add_argument('-roi_size', type=int, default=96 , help='resolution of roi')
parser.add_argument('-evl_chunk', type=int, default=None , help='evaluation chunk')
parser.add_argument('-mid_dim', type=int, default=None , help='middle dim of adapter or the rank of lora matrix')
parser.add_argument(
'-data_path',
type=str,
Expand Down
Binary file not shown.
Binary file not shown.
Binary file modified models/ImageEncoder/tinyvit/__pycache__/lora_block.cpython-37.pyc
Binary file not shown.
20 changes: 13 additions & 7 deletions models/ImageEncoder/tinyvit/adalora_block.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,13 @@

class Mlp(nn.Module):
def __init__(self, in_features, hidden_features=None,
out_features=None, act_layer=nn.GELU, drop=0.):
out_features=None, act_layer=nn.GELU, drop=0., lora_rank=4):
super().__init__()
out_features = out_features or in_features
hidden_features = hidden_features or in_features
self.norm = nn.LayerNorm(in_features)
self.fc1 = lora.SVDLinear(in_features, hidden_features,r=4)
self.fc2 = lora.SVDLinear(hidden_features, out_features,r=4)
self.fc1 = lora.SVDLinear(in_features, hidden_features,r=lora_rank)
self.fc2 = lora.SVDLinear(hidden_features, out_features,r=lora_rank)
self.act = act_layer()
self.drop = nn.Dropout(drop)

Expand Down Expand Up @@ -59,6 +59,7 @@ class Attention(torch.nn.Module):
def __init__(self, dim, key_dim, num_heads=8,
attn_ratio=4,
resolution=(14, 14),
lora_rank=4,
):
super().__init__()
# (h, w)
Expand All @@ -73,8 +74,8 @@ def __init__(self, dim, key_dim, num_heads=8,
h = self.dh + nh_kd * 2

self.norm = nn.LayerNorm(dim)
self.qkv = lora.SVDLinear(dim, h, r=4)
self.proj = lora.SVDLinear(self.dh, dim,r=4)
self.qkv = lora.SVDLinear(dim, h, r=lora_rank)
self.proj = lora.SVDLinear(self.dh, dim,r=lora_rank)

points = list(itertools.product(
range(resolution[0]), range(resolution[1])))
Expand Down Expand Up @@ -157,6 +158,11 @@ def __init__(self, args, dim, input_resolution, num_heads, window_size=7,
self.window_size = window_size
self.mlp_ratio = mlp_ratio

if(args.mid_dim != None):
lora_rank = args.mid_dim
else:
lora_rank = 4

self.drop_path = DropPath(
drop_path) if drop_path > 0. else nn.Identity()

Expand All @@ -165,12 +171,12 @@ def __init__(self, args, dim, input_resolution, num_heads, window_size=7,

window_resolution = (window_size, window_size)
self.attn = Attention(dim, head_dim, num_heads,
attn_ratio=1, resolution=window_resolution)
attn_ratio=1, resolution=window_resolution,lora_rank=lora_rank)

mlp_hidden_dim = int(dim * mlp_ratio)
mlp_activation = activation
self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim,
act_layer=mlp_activation, drop=drop)
act_layer=mlp_activation, drop=drop,lora_rank=lora_rank)

pad = local_conv_size // 2
self.local_conv = Conv2d_BN(
Expand Down
11 changes: 8 additions & 3 deletions models/ImageEncoder/tinyvit/adapter_block.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,11 @@ def __init__(self, args, dim, input_resolution, num_heads, window_size=7,
self.window_size = window_size
self.mlp_ratio = mlp_ratio

if(args.mid_dim != None):
adapter_dim = args.mid_dim
else:
adapter_dim = dim

self.drop_path = DropPath(
drop_path) if drop_path > 0. else nn.Identity()

Expand All @@ -126,9 +131,9 @@ def __init__(self, args, dim, input_resolution, num_heads, window_size=7,
self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim,
act_layer=mlp_activation, drop=drop)

self.MLP_Adapter = Adapter(dim, skip_connect=False) # MLP-adapter, no skip connection
self.Space_Adapter = Adapter(dim) # with skip connection
self.Depth_Adapter = Adapter(dim, skip_connect=False) # no skip connection
self.MLP_Adapter = Adapter(adapter_dim, skip_connect=False) # MLP-adapter, no skip connection
self.Space_Adapter = Adapter(adapter_dim) # with skip connection
self.Depth_Adapter = Adapter(adapter_dim, skip_connect=False) # no skip connection

pad = local_conv_size // 2
self.local_conv = Conv2d_BN(
Expand Down
23 changes: 15 additions & 8 deletions models/ImageEncoder/tinyvit/lora_block.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,15 @@

class Mlp(nn.Module):
def __init__(self, in_features, hidden_features=None,
out_features=None, act_layer=nn.GELU, drop=0.):
out_features=None, act_layer=nn.GELU, drop=0., lora_rank=4):
super().__init__()
out_features = out_features or in_features
hidden_features = hidden_features or in_features
self.norm = nn.LayerNorm(in_features)
self.fc1 = lora.Linear(in_features, hidden_features,r=4)
self.fc2 = lora.Linear(hidden_features, out_features,r=4)
self.fc1 = lora.Linear(in_features, hidden_features,r=lora_rank)
self.fc2 = lora.Linear(hidden_features, out_features,r=lora_rank)
# self.fc1 = nn.Linear(in_features, hidden_features)
# self.fc2 = nn.Linear(hidden_features, out_features)
self.act = act_layer()
self.drop = nn.Dropout(drop)

Expand Down Expand Up @@ -59,6 +61,7 @@ class Attention(torch.nn.Module):
def __init__(self, dim, key_dim, num_heads=8,
attn_ratio=4,
resolution=(14, 14),
lora_rank = 4,
):
super().__init__()
# (h, w)
Expand All @@ -73,8 +76,8 @@ def __init__(self, dim, key_dim, num_heads=8,
h = self.dh + nh_kd * 2

self.norm = nn.LayerNorm(dim)
self.qkv = lora.Linear(dim, h, r=4)
self.proj = lora.Linear(self.dh, dim,r=4)
self.qkv = lora.MergedLinear(dim, h, r=lora_rank, enable_lora=[True, False, True])
self.proj = nn.Linear(self.dh, dim)

points = list(itertools.product(
range(resolution[0]), range(resolution[1])))
Expand Down Expand Up @@ -108,7 +111,6 @@ def forward(self, x): # x (B,N,C)

# Normalization
x = self.norm(x)

qkv = self.qkv(x)
# (B, N, num_heads, d)
q, k, v = qkv.view(B, N, self.num_heads, -
Expand Down Expand Up @@ -157,6 +159,11 @@ def __init__(self, args, dim, input_resolution, num_heads, window_size=7,
self.window_size = window_size
self.mlp_ratio = mlp_ratio

if(args.mid_dim != None):
lora_rank = args.mid_dim
else:
lora_rank = 4

self.drop_path = DropPath(
drop_path) if drop_path > 0. else nn.Identity()

Expand All @@ -165,12 +172,12 @@ def __init__(self, args, dim, input_resolution, num_heads, window_size=7,

window_resolution = (window_size, window_size)
self.attn = Attention(dim, head_dim, num_heads,
attn_ratio=1, resolution=window_resolution)
attn_ratio=1, resolution=window_resolution,lora_rank=lora_rank)

mlp_hidden_dim = int(dim * mlp_ratio)
mlp_activation = activation
self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim,
act_layer=mlp_activation, drop=drop)
act_layer=mlp_activation, drop=drop,lora_rank=lora_rank)

pad = local_conv_size // 2
self.local_conv = Conv2d_BN(
Expand Down
Binary file modified models/ImageEncoder/vit/__pycache__/adalora_block.cpython-37.pyc
Binary file not shown.
Binary file modified models/ImageEncoder/vit/__pycache__/adapter_block.cpython-37.pyc
Binary file not shown.
Binary file modified models/ImageEncoder/vit/__pycache__/block.cpython-37.pyc
Binary file not shown.
Binary file modified models/ImageEncoder/vit/__pycache__/lora_block.cpython-37.pyc
Binary file not shown.
20 changes: 14 additions & 6 deletions models/ImageEncoder/vit/adalora_block.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,17 +41,23 @@ def __init__(
"""
super().__init__()
self.norm1 = norm_layer(dim)
if(args.mid_dim != None):
lora_rank = args.mid_dim
else:
lora_rank = 4

self.attn = Attention(
dim,
num_heads=num_heads,
qkv_bias=qkv_bias,
use_rel_pos=use_rel_pos,
rel_pos_zero_init=rel_pos_zero_init,
input_size=input_size if window_size == 0 else (window_size, window_size),
lora_rank = lora_rank,
input_size=(64,64) if window_size == 0 else (window_size, window_size),
)

self.norm2 = norm_layer(dim)
self.mlp = MLPBlock(embedding_dim=dim, mlp_dim=int(dim * mlp_ratio), act=act_layer)
self.mlp = MLPBlock(embedding_dim=dim, mlp_dim=int(dim * mlp_ratio), act=act_layer,lora_rank = lora_rank)

self.window_size = window_size

Expand Down Expand Up @@ -79,10 +85,11 @@ def __init__(
embedding_dim: int,
mlp_dim: int,
act: Type[nn.Module] = nn.GELU,
lora_rank: int = 4,
) -> None:
super().__init__()
self.lin1 = lora.SVDLinear(embedding_dim, mlp_dim, r=4)
self.lin2 = lora.SVDLinear(mlp_dim, embedding_dim, r=4)
self.lin1 = lora.SVDLinear(embedding_dim, mlp_dim, r=lora_rank)
self.lin2 = lora.SVDLinear(mlp_dim, embedding_dim, r=lora_rank)
self.act = act()

def forward(self, x: torch.Tensor) -> torch.Tensor:
Expand All @@ -99,6 +106,7 @@ def __init__(
qkv_bias: bool = True,
use_rel_pos: bool = False,
rel_pos_zero_init: bool = True,
lora_rank: int = 4,
input_size: Optional[Tuple[int, int]] = None,
) -> None:
"""
Expand All @@ -116,8 +124,8 @@ def __init__(
head_dim = dim // num_heads
self.scale = head_dim**-0.5

self.qkv = lora.SVDLinear(dim, dim * 3, bias=qkv_bias, r=4)
self.proj = lora.SVDLinear(dim, dim, r=4)
self.qkv = lora.SVDLinear(dim, dim * 3, bias=qkv_bias, r=lora_rank)
self.proj = nn.Linear(dim, dim)

self.use_rel_pos = use_rel_pos
if self.use_rel_pos:
Expand Down
15 changes: 11 additions & 4 deletions models/ImageEncoder/vit/adapter_block.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,12 +51,18 @@ def __init__(
qkv_bias=qkv_bias,
use_rel_pos=use_rel_pos,
rel_pos_zero_init=rel_pos_zero_init,
input_size=input_size if window_size == 0 else (window_size, window_size),
input_size=(64,64) if window_size == 0 else (window_size, window_size),
)
self.MLP_Adapter = Adapter(dim, skip_connect=False) # MLP-adapter, no skip connection
self.Space_Adapter = Adapter(dim) # with skip connection

if(args.mid_dim != None):
adapter_dim = args.mid_dim
else:
adapter_dim = dim

self.MLP_Adapter = Adapter(adapter_dim, skip_connect=False) # MLP-adapter, no skip connection
self.Space_Adapter = Adapter(adapter_dim) # with skip connection
self.scale = scale
self.Depth_Adapter = Adapter(dim, skip_connect=False) # no skip connection
self.Depth_Adapter = Adapter(adapter_dim, skip_connect=False) # no skip connection
self.norm2 = norm_layer(dim)
self.mlp = MLPBlock(embedding_dim=dim, mlp_dim=int(dim * mlp_ratio), act=act_layer)

Expand Down Expand Up @@ -138,6 +144,7 @@ def __init__(
self.rel_pos_h = nn.Parameter(torch.zeros(2 * input_size[0] - 1, head_dim))
self.rel_pos_w = nn.Parameter(torch.zeros(2 * input_size[1] - 1, head_dim))


def forward(self, x: torch.Tensor) -> torch.Tensor:
B, H, W, _ = x.shape
# qkv with shape (3, B, nHead, H * W, C)
Expand Down
2 changes: 1 addition & 1 deletion models/ImageEncoder/vit/block.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def __init__(
qkv_bias=qkv_bias,
use_rel_pos=use_rel_pos,
rel_pos_zero_init=rel_pos_zero_init,
input_size=input_size if window_size == 0 else (window_size, window_size),
input_size=(64,64) if window_size == 0 else (window_size, window_size),
)

self.norm2 = norm_layer(dim)
Expand Down
23 changes: 16 additions & 7 deletions models/ImageEncoder/vit/lora_block.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,17 +41,23 @@ def __init__(
"""
super().__init__()
self.norm1 = norm_layer(dim)
if(args.mid_dim != None):
lora_rank = args.mid_dim
else:
lora_rank = 4

self.attn = Attention(
dim,
num_heads=num_heads,
qkv_bias=qkv_bias,
use_rel_pos=use_rel_pos,
rel_pos_zero_init=rel_pos_zero_init,
input_size=input_size if window_size == 0 else (window_size, window_size),
lora_rank = lora_rank,
input_size=(64,64) if window_size == 0 else (window_size, window_size),
)

self.norm2 = norm_layer(dim)
self.mlp = MLPBlock(embedding_dim=dim, mlp_dim=int(dim * mlp_ratio), act=act_layer)
self.mlp = MLPBlock(embedding_dim=dim, mlp_dim=int(dim * mlp_ratio), act=act_layer,lora_rank=lora_rank)

self.window_size = window_size

Expand Down Expand Up @@ -79,10 +85,11 @@ def __init__(
embedding_dim: int,
mlp_dim: int,
act: Type[nn.Module] = nn.GELU,
lora_rank: int = 4,
) -> None:
super().__init__()
self.lin1 = lora.Linear(embedding_dim, mlp_dim, r=4)
self.lin2 = lora.Linear(mlp_dim, embedding_dim, r=4)
self.lin1 = lora.Linear(embedding_dim, mlp_dim, r=lora_rank)
self.lin2 = lora.Linear(mlp_dim, embedding_dim, r=lora_rank)
self.act = act()

def forward(self, x: torch.Tensor) -> torch.Tensor:
Expand All @@ -99,6 +106,7 @@ def __init__(
qkv_bias: bool = True,
use_rel_pos: bool = False,
rel_pos_zero_init: bool = True,
lora_rank: int = 4,
input_size: Optional[Tuple[int, int]] = None,
) -> None:
"""
Expand All @@ -116,8 +124,8 @@ def __init__(
head_dim = dim // num_heads
self.scale = head_dim**-0.5

self.qkv = lora.Linear(dim, dim * 3, bias=qkv_bias, r=4)
self.proj = lora.Linear(dim, dim, r=4)
self.qkv = lora.MergedLinear(dim, dim * 3, bias=qkv_bias, r=lora_rank, enable_lora=[True, False, True])
self.proj = nn.Linear(dim, dim)

self.use_rel_pos = use_rel_pos
if self.use_rel_pos:
Expand All @@ -129,8 +137,9 @@ def __init__(
self.rel_pos_w = nn.Parameter(torch.zeros(2 * input_size[1] - 1, head_dim))

def forward(self, x: torch.Tensor) -> torch.Tensor:
B, H, W, _ = x.shape
B, H, W, n = x.shape
# qkv with shape (3, B, nHead, H * W, C)
x = x.reshape(B,H*W,n)
qkv = self.qkv(x).reshape(B, H * W, 3, self.num_heads, -1).permute(2, 0, 3, 1, 4)
# q, k, v with shape (B * nHead, H * W, C)
q, k, v = qkv.reshape(3, B * self.num_heads, H * W, -1).unbind(0)
Expand Down
Binary file not shown.
1 change: 1 addition & 0 deletions models/efficient_sam/efficient_sam_encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ def __init__(
args = self.args,
dim=patch_embed_dim,
num_heads=num_heads,
use_rel_pos=True,
mlp_ratio=mlp_ratio,
input_size=(img_size // patch_size, img_size // patch_size),
)
Expand Down
Binary file modified models/sam/__pycache__/build_sam.cpython-37.pyc
Binary file not shown.
4 changes: 2 additions & 2 deletions models/sam/build_sam.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,8 @@ def _build_sam(
num_heads=encoder_num_heads,
patch_size=vit_patch_size,
qkv_bias=True,
# use_rel_pos=True,
use_rel_pos=False,
use_rel_pos=True,
# use_rel_pos=False,
global_attn_indexes=encoder_global_attn_indexes,
window_size=14,
out_chans=prompt_embed_dim,
Expand Down
Binary file modified models/sam/modeling/__pycache__/image_encoder.cpython-37.pyc
Binary file not shown.
2 changes: 1 addition & 1 deletion models/sam/modeling/image_encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import torch.nn.functional as F
from einops import rearrange

from ...common import Adapter, LayerNorm2d
from ...common import LayerNorm2d
from ...ImageEncoder import AdapterBlock, Block, LoraBlock


Expand Down

0 comments on commit 53b7a9e

Please sign in to comment.