first commit

keetsky · Jul 11, 2019 · f8b564e · f8b564e
commit f8b564e
Show file tree

Hide file tree

Showing 6 changed files with 580 additions and 0 deletions.
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2018 lyakaap
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
@@ -0,0 +1,58 @@
+# NetVLAD-pytorch
+Pytorch implementation of NetVLAD &amp; Online Hardest Triplet Loss.
+In NetVLAD, broadcasting is used to calculate residuals of clusters and it makes whole calculation time much faster. 
+
+NetVLAD: https://arxiv.org/abs/1511.07247
+
+In Defense of the Triplet Loss for Person Re-Identification: https://arxiv.org/abs/1703.07737 https://omoindrot.github.io/triplet-loss
+
+## Usage
+```
+import torch
+import torch.nn as nn
+from torch.autograd import Variable
+
+from netvlad import NetVLAD
+from netvlad import EmbedNet
+from hard_triplet_loss import HardTripletLoss
+from torchvision.models import resnet18
+
+
+# Discard layers at the end of base network
+encoder = resnet18(pretrained=True)
+base_model = nn.Sequential(
+    encoder.conv1,
+    encoder.bn1,
+    encoder.relu,
+    encoder.maxpool,
+    encoder.layer1,
+    encoder.layer2,
+    encoder.layer3,
+    encoder.layer4,
+])
+dim = list(base_model.parameters())[-1].shape[0]  # last channels (512)
+
+# Define model for embedding
+net_vlad = NetVLAD(num_clusters=32, dim=dim, alpha=1.0)
+model = EmbedNet(base_model, net_vlad).cuda()
+
+# Define loss
+criterion = HardTripletLoss(margin=0.1).cuda()
+
+# This is just toy example. Typically, the number of samples in each classes are 4.
+labels = torch.randint(0, 10, (40, )).long()
+x = torch.rand(40, 3, 128, 128).cuda()
+output = model(x)
+
+triplet_loss = criterion(output, labels)
+```
+
+
+# ghostVlAD
+use fc features 
+contain NetVLAD and ghostVLAD
+RUN 
+```
+python ghostVLAD.py
+```
+
diff --git a/gostVALD.py b/gostVALD.py
@@ -0,0 +1,231 @@
+#codeing=utf-8
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torchvision.models import resnet18
+from torch.autograd import Variable
+'''
+针对人脸问题，针对同一人多张人脸照片问题，多张人脸特征后进行特征融合，修改VLAD，将FC层替换掉卷积层
+
+'''
+class netVLAD(nn.Module):
+    '''
+    参数量:8*128*
+    '''
+    def __init__(self,num_clusters=8,dim=128,normalize_input=True):
+        super(netVLAD, self).__init__()
+        self.num_clusters=num_clusters
+        self.dim=dim
+        self.normalize_input=normalize_input
+        self.fc=nn.Linear(dim,num_clusters)
+        self.centroids=nn.Parameter(torch.rand(num_clusters,dim))
+        self._init_params()
+    def _init_params(self):
+        nn.init.xavier_normal_(self.fc.weight.data)  
+        nn.init.constant_(self.fc.bias.data, 0.0) 
+        #self.alpha=100.
+        #self.fc.weight = nn.Parameter(
+        #    (2.0 * self.alpha * self.centroids).unsqueeze(-1).unsqueeze(-1)
+        #)
+        #self.fc.bias = nn.Parameter(
+        #    - self.alpha * self.centroids.norm(dim=1)
+        #)
+    def forward(self,x):
+        '''
+        x:(10,128)
+        '''
+        N,C=x.shape[:2]#10,128
+        assert C==self.dim ,"feature dim not correct"
+        if self.normalize_input:
+            x=F.normalize(x,p=2,dim=0)
+        soft_assign=self.fc(x).unsqueeze(0).permute(0,2,1)#(10,8)->(1,10,8)->(1,8,10)
+        soft_assign=F.softmax(soft_assign,dim=1) #nn.Softmax(dim=1)
+        x_flatten=x.view(1,C,-1)
+        #print(x_flatten.shape)
+        #print(x_flatten.expand(self.num_clusters, -1, -1, -1).permute(1, 0, 2, 3).shape)
+        #print(self.centroids.expand(x_flatten.size(-1), -1, -1).permute(1, 2, 0).unsqueeze(0).shape)
+        residual = x_flatten.expand(self.num_clusters, -1, -1, -1).permute(1, 0, 2, 3) - \
+            self.centroids.expand(x_flatten.size(-1), -1, -1).permute(1, 2, 0).unsqueeze(0)
+        residual *= soft_assign.unsqueeze(2)                
+        vlad = residual.sum(dim=-1)#(1,8,128)
+        vlad = F.normalize(vlad, p=2, dim=2)
+        vlad = vlad.view(1, -1)
+        vlad = F.normalize(vlad, p=2, dim=1) #(1,8*128)
+        return vlad
+
+class netVLAD2(nn.Module):
+    '''
+    参数量:8*128*
+    '''
+    def __init__(self,num_clusters=8,dim=128,normalize_input=True):
+        super(netVLAD2, self).__init__()
+        self.num_clusters=num_clusters
+        self.dim=dim
+        self.normalize_input=normalize_input
+        self.fc=nn.Linear(dim,num_clusters)
+        self.batch_norm = nn.BatchNorm1d(num_clusters, eps=1e-3, momentum=0.01)
+        self.softmax = nn.Softmax(dim=1)
+        self.centroids=nn.Parameter(torch.rand(num_clusters,dim))
+        self._init_params()
+    def _init_params(self):
+        nn.init.xavier_normal_(self.fc.weight.data)  
+        nn.init.constant_(self.fc.bias.data, 0.0)
+    def forward(self,x):
+        N,C=x.shape[:2]
+        if self.normalize_input:
+            x=F.normalize(x,p=2,dim=1)
+        soft_assign=self.fc(x)
+        soft_assign=self.softmax(soft_assign).unsqueeze(0)#(1,10,8)
+        a_sum = soft_assign.sum(-2).unsqueeze(1)#(1,1,8)
+        a = torch.mul(a_sum, self.centroids.transpose(1,0).unsqueeze(0))#(1,128,8)
+        print(soft_assign.size(),a_sum.size(),a.size())
+        soft_assign = soft_assign.permute(0, 2, 1).contiguous()
+        x=x.view([-1, N, self.dim])
+        vlad = torch.matmul(soft_assign, x).permute(0, 2, 1).contiguous() 
+        vlad = vlad.sub(a).view([-1, self.num_clusters * self.dim])
+        vlad = F.normalize(vlad, p=2, dim=1)
+        return vlad
+    def forward2(self,x):
+        '''
+        x:(10,128)
+        '''
+        N,C=x.shape[:2]#10,128
+        assert C==self.dim ,"feature dim not correct"
+        if self.normalize_input:
+            x=F.normalize(x,p=2,dim=1)
+        soft_assign=self.fc(x).unsqueeze(0).permute(0,2,1)#(10,8)->(1,10,8)->(1,8,10)
+        soft_assign=F.softmax(soft_assign,dim=1) #nn.Softmax(dim=1) #(1,8,10)
+        x_flatten=x.unsqueeze(0).permute(0,2,1)#(1,128,10)
+        #print(x_flatten.shape)
+        #print(x_flatten.expand(self.num_clusters, -1, -1, -1).shape)#(8,1,128,40)
+        #print(self.centroids.expand(x_flatten.size(-1), -1, -1).permute(1, 2, 0).unsqueeze(0).shape)
+        #[(1,128,10)->(8,1,128,10)->(1,8,128,10)]-[(8,128)->(10,8,128)->(8,128,10)->(1,8,128,10)]
+        residual = x_flatten.expand(self.num_clusters, -1, -1, -1).permute(1, 0, 2, 3) - \
+            self.centroids.expand(x_flatten.size(-1), -1, -1).permute(1, 2, 0).unsqueeze(0)
+        #print(residual.size())#(1,8,128,10)
+        residual *= soft_assign.unsqueeze(2)  #(1,8,128,10)*(1,8,1,10)->(1,8,128,10)              
+        vlad = residual.sum(dim=-1)#(1,8,128)
+        vlad = F.normalize(vlad, p=2, dim=2)
+        vlad = vlad.view(1, -1)
+        vlad = F.normalize(vlad, p=2, dim=1) #(1,8*128)
+        return vlad
+class gostVLAD(nn.Module):
+    def __init__(self,num_clusters=8,gost=1,dim=128,normalize_input=True):
+        super(gostVLAD, self).__init__()
+        self.num_clusters=num_clusters
+        self.dim=dim
+        self.gost=gost
+        self.normalize_input=normalize_input
+        self.fc=nn.Linear(dim,num_clusters+gost)
+        self.centroids=nn.Parameter(torch.rand(num_clusters,dim))
+        self._init_params()
+    def _init_params(self):
+        nn.init.xavier_normal_(self.fc.weight.data)  
+        nn.init.constant_(self.fc.bias.data, 0.0) 
+    def forward(self,x):
+        '''
+        x:NxD 
+        '''
+        N,C=x.shape[:2]#10,128
+        assert C==self.dim ,"feature dim not correct"
+        if self.normalize_input:
+            x=F.normalize(x,p=2,dim=0)
+        soft_assign=self.fc(x).unsqueeze(0).permute(0,2,1)#(10,9)->(1,10,9)->(1,9,10)
+        soft_assign=F.softmax(soft_assign,dim=1) 
+
+        soft_assign=soft_assign[:,:self.num_clusters,:]#(1,8,10)
+
+        x_flatten=x.view(1,C,-1)
+        residual = x_flatten.expand(self.num_clusters, -1, -1, -1).permute(1, 0, 2, 3) - \
+            self.centroids.expand(x_flatten.size(-1), -1, -1).permute(1, 2, 0).unsqueeze(0)
+        residual *= soft_assign.unsqueeze(2)                
+        vlad = residual.sum(dim=-1)#(1,8,128)
+        vlad = F.normalize(vlad, p=2, dim=2)
+        vlad = vlad.view(1, -1)
+        vlad = F.normalize(vlad, p=2, dim=1) #(1,8*128)
+        return vlad
+
+
+class gostVLAD2(nn.Module):
+    def __init__(self,num_clusters=8,gost=1,dim=128,normalize_input=True):
+        super(gostVLAD2, self).__init__()
+        self.num_clusters=num_clusters
+        self.dim=dim
+        self.gost=gost
+        self.normalize_input=normalize_input
+        self.fc=nn.Linear(dim,num_clusters+gost)
+        self.centroids=nn.Parameter(torch.rand(num_clusters+gost,dim))
+        self._init_params()
+    def _init_params(self):
+        nn.init.xavier_normal_(self.fc.weight.data)  
+        nn.init.constant_(self.fc.bias.data, 0.0) 
+    def forward(self,x):
+        '''
+        x:NxD 
+        '''
+        N,C=x.shape[:2]#10,128
+        assert C==self.dim ,"feature dim not correct"
+        if self.normalize_input:
+            x=F.normalize(x,p=2,dim=0)
+        soft_assign=self.fc(x).unsqueeze(0).permute(0,2,1)#(10,9)->(1,10,9)->(1,9,10)
+        soft_assign=F.softmax(soft_assign,dim=1) 
+
+        #soft_assign=soft_assign[:,:self.num_clusters,:]#(1,8,10)
+
+        x_flatten=x.unsqueeze(0).permute(0,2,1)#x.view(1,C,-1)
+        residual = x_flatten.expand(self.num_clusters+self.gost, -1, -1, -1).permute(1, 0, 2, 3) - \
+            self.centroids.expand(x_flatten.size(-1), -1, -1).permute(1, 2, 0).unsqueeze(0)
+        residual *= soft_assign.unsqueeze(2)                
+        vlad = residual.sum(dim=-1)#(1,9,128)
+        vald=vald[:,:self.num_clusters,:]#(1,8,128)
+        vlad = F.normalize(vlad, p=2, dim=2)
+        vlad = vlad.view(1, -1)
+        vlad = F.normalize(vlad, p=2, dim=1) #(1,8*128)
+        return vlad
+
+
+class EmbedNet(nn.Module):
+    def __init__(self, base_model, net_vlad,dim_in=512,dim_out=128):
+        super(EmbedNet, self).__init__()
+        self.base_model = base_model
+        self.net_vlad = net_vlad
+        self.conv=nn.Conv2d(dim_in,dim_out,kernel_size=(1,1),bias=True)
+        self.avgp=nn.AdaptiveAvgPool2d(1)
+    def forward(self, x):
+        x = self.base_model(x)
+        x=self.conv(x) #
+        x=self.avgp(x)
+        x=x.squeeze() #(N,128)
+        embedded_x = self.net_vlad.forward(x)
+        emb2=self.net_vlad.forward2(x)
+        return embedded_x,emb2
+
+
+def test():
+    encoder = resnet18(pretrained=False)
+    base_model = nn.Sequential(
+        encoder.conv1,
+        encoder.bn1,
+        encoder.relu,
+        encoder.maxpool,
+        encoder.layer1,
+        encoder.layer2,
+        encoder.layer3,
+        encoder.layer4,
+    )
+    dim_in = list(base_model.parameters())[-1].shape[0]#512
+    dim_out=128
+    net_vlad=netVLAD2(dim=dim_out)
+    #net_vlad=gostVLAD(dim=dim_out)
+    model=EmbedNet(base_model,net_vlad,dim_in=dim_in,dim_out=dim_out)
+
+    x=torch.rand(10,3,128,128)
+    output1,output2=model(x)
+    print(output1.shape,output2.shape)#(1,8*128)
+    print(output1)
+    print(output2.detach().numpy())
+
+
+test()
+
+