diff --git a/image_resnet/fluid/reader.py b/image_resnet/fluid/reader.py new file mode 100644 index 0000000..00a23fc --- /dev/null +++ b/image_resnet/fluid/reader.py @@ -0,0 +1,237 @@ +import os +import math +import random +import cPickle +import functools +import numpy as np +import paddle.v2 as paddle +from PIL import Image, ImageEnhance + +random.seed(0) + +DATA_DIM = 224 + +THREAD = 8 +BUF_SIZE = 10240 + +img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1)) +img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1)) + + +def resize_short(img, target_size): + percent = float(target_size) / min(img.size[0], img.size[1]) + resized_width = int(round(img.size[0] * percent)) + resized_height = int(round(img.size[1] * percent)) + img = img.resize((resized_width, resized_height), Image.BILINEAR) + return img + + +def Scale(img, size): + w, h = img.size + if (w <= h and w == size) or (h <= w and h == size): + return img + if w < h: + ow = size + oh = int(size * h / w) + return img.resize((ow, oh), Image.BILINEAR) + else: + oh = size + ow = int(size * w / h) + return img.resize((ow, oh), Image.BILINEAR) + + +def CenterCrop(img, size): + w, h = img.size + th, tw = int(size), int(size) + x1 = int(round((w - tw) / 2.)) + y1 = int(round((h - th) / 2.)) + return img.crop((x1, y1, x1 + tw, y1 + th)) + + +def crop_image(img, target_size, center): + width, height = img.size + size = target_size + if center == True: + w_start = (width - size) / 2 + h_start = (height - size) / 2 + else: + w_start = random.randint(0, width - size) + h_start = random.randint(0, height - size) + w_end = w_start + size + h_end = h_start + size + img = img.crop((w_start, h_start, w_end, h_end)) + return img + + +def random_crop_ycx(img, size): + for attempt in range(10): + area = img.size[0] * img.size[1] + target_area = random.uniform(0.08, 1.0) * area + aspect_ratio = random.uniform(3. / 4, 4. / 3) + + w = int(round(math.sqrt(target_area * aspect_ratio))) + h = int(round(math.sqrt(target_area / aspect_ratio))) + + if random.random() < 0.5: + w, h = h, w + + if w <= img.size[0] and h <= img.size[1]: + x1 = random.randint(0, img.size[0] - w) + y1 = random.randint(0, img.size[1] - h) + + img = img.crop((x1, y1, x1 + w, y1 + h)) + assert (img.size == (w, h)) + + return img.resize((size, size), Image.BILINEAR) + + img = Scale(img, size) + img = CenterCrop(img, size) + return img + + +def random_crop(img, size, scale=[0.08, 1.0], ratio=[3. / 4., 4. / 3.]): + aspect_ratio = math.sqrt(random.uniform(*ratio)) + w = 1. * aspect_ratio + h = 1. / aspect_ratio + + bound = min((float(img.size[0]) / img.size[1]) / (w**2), + (float(img.size[1]) / img.size[0]) / (h**2)) + scale_max = min(scale[1], bound) + scale_min = min(scale[0], bound) + + target_area = img.size[0] * img.size[1] * random.uniform(scale_min, + scale_max) + target_size = math.sqrt(target_area) + w = int(target_size * w) + h = int(target_size * h) + + i = random.randint(0, img.size[0] - w) + j = random.randint(0, img.size[1] - h) + + img = img.crop((i, j, i + w, j + h)) + img = img.resize((size, size), Image.BILINEAR) + return img + + +def rotate_image(img): + angle = random.randint(-10, 10) + img = img.rotate(angle) + return img + + +def distort_color(img): + def random_brightness(img, lower=0.5, upper=1.5): + e = random.uniform(lower, upper) + return ImageEnhance.Brightness(img).enhance(e) + + def random_contrast(img, lower=0.5, upper=1.5): + e = random.uniform(lower, upper) + return ImageEnhance.Contrast(img).enhance(e) + + def random_color(img, lower=0.5, upper=1.5): + e = random.uniform(lower, upper) + return ImageEnhance.Color(img).enhance(e) + + ops = [random_brightness, random_contrast, random_color] + random.shuffle(ops) + + img = ops[0](img) + img = ops[1](img) + img = ops[2](img) + + return img + + +def process_image_imagepath2(sample, mode): + img = np.random.rand(3, 224, 224) + img -= img_mean + img /= img_std + + lab = np.random.randint(0, 999) + + if mode == 'train' or mode == 'test': + return img, lab + elif mode == 'infer': + return img + + +def fake_reader(): + img, lab = process_image_imagepath2(None, "train") + while True: + yield img, lab + + +def process_image_imagepath(sample, mode, color_jitter, rotate): + imgpath = sample[0] + img = Image.open(imgpath) + if mode == 'train': + if rotate: img = rotate_image(img) + img = random_crop_ycx(img, DATA_DIM) + else: + img = Scale(img, 256) + img = CenterCrop(img, DATA_DIM) + if mode == 'train': + if color_jitter: + img = distort_color(img) + if random.randint(0, 1) == 1: + img = img.transpose(Image.FLIP_LEFT_RIGHT) + + if img.mode != 'RGB': + img = img.convert('RGB') + + img = np.array(img).astype('float32').transpose((2, 0, 1)) / 255 + img -= img_mean + img /= img_std + + if mode == 'train' or mode == 'test': + return img, sample[1] + elif mode == 'infer': + return img + + +def _reader_creator_imagepath(data, + mode, + shuffle=False, + color_jitter=False, + rotate=False): + def reader(): + index = range(0, len(data['image'])) + if shuffle: + random.shuffle(index) + for idx in index: + if mode == 'train' or mode == 'test': + yield data['image'][idx], data['label'][idx] + elif mode == 'infer': + yield [data['image'][idx]] + + mapper = functools.partial(process_image_imagepath2, mode=mode) + #mapper = functools.partial( + # process_image_imagepath, mode=mode, color_jitter=color_jitter, rotate=rotate) + + return paddle.reader.xmap_readers(mapper, reader, THREAD, BUF_SIZE) + + +def _reader_creator(data, mode, shuffle=False, color_jitter=False, + rotate=False): + def reader(): + index = range(0, len(data['image'])) + if shuffle: + random.shuffle(index) + for idx in index: + if mode == 'train' or mode == 'test': + yield data['image'][idx], data['label'][idx] + elif mode == 'infer': + yield [data['image'][idx]] + + mapper = functools.partial( + process_image, mode=mode, color_jitter=color_jitter, rotate=rotate) + + return paddle.reader.xmap_readers(mapper, reader, THREAD, BUF_SIZE) + + +def train(): + return fake_reader + + +def test(): + return fake_reader diff --git a/image_resnet/fluid/run.sh b/image_resnet/fluid/run.sh new file mode 100644 index 0000000..a76b1fa --- /dev/null +++ b/image_resnet/fluid/run.sh @@ -0,0 +1,2 @@ +export CUDA_VISIBLE_DEVICES=0 +python se_resnext152_parallel.py diff --git a/image_resnet/fluid/se_resnext152_parallel.py b/image_resnet/fluid/se_resnext152_parallel.py new file mode 100644 index 0000000..184ee5d --- /dev/null +++ b/image_resnet/fluid/se_resnext152_parallel.py @@ -0,0 +1,270 @@ +import os +import numpy as np +import sys +import time +import paddle.v2 as paddle +import paddle.fluid as fluid +import reader + +#fluid.default_startup_program().random_seed = 111 + + +def load_persistables_if_exist(executor, dirname, main_program=None): + filenames = next(os.walk(dirname))[2] + filenames = set(filenames) + + def _is_presistable_and_exist_(var): + if not fluid.io.is_persistable(var): + return False + else: + return var.name in filenames + + fluid.io.load_vars( + executor, + dirname, + main_program=main_program, + vars=None, + predicate=_is_presistable_and_exist_) + + +def conv_bn_layer(input, num_filters, filter_size, stride=1, groups=1, + act=None): + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) / 2, + groups=groups, + act=None, + bias_attr=False) + return fluid.layers.batch_norm(input=conv, act=act, momentum=0.1) + + +def squeeze_excitation(input, num_channels, reduction_ratio): + pool = fluid.layers.pool2d( + input=input, pool_size=0, pool_type='avg', global_pooling=True) + squeeze = fluid.layers.fc(input=pool, + size=num_channels / reduction_ratio, + act='relu') + excitation = fluid.layers.fc(input=squeeze, + size=num_channels, + act='sigmoid') + scale = fluid.layers.elementwise_mul(x=input, y=excitation, axis=0) + return scale + + +def shortcut(input, ch_out, stride): + ch_in = input.shape[1] + if ch_in != ch_out: + if stride == 1: + filter_size = 1 + else: + filter_size = 3 + return conv_bn_layer(input, ch_out, filter_size, stride) + else: + return input + + +def bottleneck_block(input, num_filters, stride, cardinality, reduction_ratio): + # The number of first 1x1 convolutional channels for each bottleneck build block + # was halved to reduce the compution cost. + conv0 = conv_bn_layer( + input=input, num_filters=num_filters, filter_size=1, act='relu') + conv1 = conv_bn_layer( + input=conv0, + num_filters=num_filters * 2, + filter_size=3, + stride=stride, + groups=cardinality, + act='relu') + conv2 = conv_bn_layer( + input=conv1, num_filters=num_filters * 2, filter_size=1, act=None) + scale = squeeze_excitation( + input=conv2, + num_channels=num_filters * 2, + reduction_ratio=reduction_ratio) + + short = shortcut(input, num_filters * 2, stride) + + return fluid.layers.elementwise_add(x=short, y=scale, act='relu') + + +def SE_ResNeXt152(input, class_dim): + cardinality = 64 + reduction_ratio = 16 + depth = [3, 8, 36, 3] + num_filters = [128, 256, 512, 1024] + + conv = conv_bn_layer( + input=input, num_filters=64, filter_size=3, stride=2, act='relu') + conv = conv_bn_layer( + input=conv, num_filters=64, filter_size=3, stride=1, act='relu') + conv = conv_bn_layer( + input=conv, num_filters=128, filter_size=3, stride=1, act='relu') + conv = fluid.layers.pool2d( + input=conv, pool_size=3, pool_stride=2, pool_padding=1, pool_type='max') + + for block in range(len(depth)): + for i in range(depth[block]): + conv = bottleneck_block( + input=conv, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + cardinality=cardinality, + reduction_ratio=reduction_ratio) + + pool = fluid.layers.pool2d( + input=conv, pool_size=0, pool_type='avg', global_pooling=True) + #yancanxiang: A drop out layer(with a drop ratio of 0.2) was inserted before the classifier layer. + dropout = fluid.layers.dropout(x=pool, dropout_prob=0.2) + # Classifier layer: + out = fluid.layers.fc(input=dropout, size=class_dim, act='softmax') + return out + + +def net_conf(image, label, class_dim): + out = SE_ResNeXt152(input=image, class_dim=class_dim) + cost = fluid.layers.cross_entropy(input=out, label=label) + avg_cost = fluid.layers.mean(x=cost) + #accuracy = fluid.evaluator.Accuracy(input=out, label=label) + #accuracy5 = fluid.evaluator.Accuracy(input=out, label=label, k=5) + accuracy = fluid.layers.accuracy(input=out, label=label) + accuracy5 = fluid.layers.accuracy(input=out, label=label, k=5) + return out, avg_cost, accuracy, accuracy5 + + +def train(learning_rate, + batch_size, + num_passes, + init_model=None, + model_save_dir='model'): + class_dim = 1000 + image_shape = [3, 224, 224] + + image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') + label = fluid.layers.data(name='label', shape=[1], dtype='int64') + + parallel = False + if parallel: + places = fluid.layers.get_places() + pd = fluid.layers.ParallelDo(places) + with pd.do(): + img_ = pd.read_input(image) + label_ = pd.read_input(label) + prediction, avg_cost, accuracy, accuracy5 = net_conf(img_, label_, + class_dim) + + for o in [avg_cost, accuracy, accuracy5]: + pd.write_output(o) + + avg_cost, accuracy, accuracy5 = pd() + # get mean loss and acc through every devices. + avg_cost = fluid.layers.mean(x=avg_cost) + accuracy = fluid.layers.mean(x=accuracy) + accuracy5 = fluid.layers.mean(x=accuracy5) + else: + prediction, avg_cost, accuracy, accuracy5 = net_conf(image, label, + class_dim) + + #print("network:", fluid.default_main_program()) + #print("network:", fluid.default_startup_program()) + + inference_program = fluid.default_main_program().clone() + + epoch = [30, 60, 90] + total_images = 1281167 + pass_each_epoch = int(total_images / batch_size + 1) + bd = [e * pass_each_epoch for e in epoch] + lr = [0.1, 0.01, 0.001, 0.0001] + + print("Training with learning rates:", bd, lr) + + optimizer = fluid.optimizer.Momentum( + learning_rate=fluid.layers.piecewise_decay( + boundaries=bd, values=lr), + momentum=0.9, + regularization=fluid.regularizer.L2Decay(1e-4)) + opts = optimizer.minimize(avg_cost) + fluid.memory_optimize(fluid.default_main_program()) + + #print(inference_program) + #with fluid.program_guard(inference_program): + # test_target = [avg_cost, accuracy, accuracy5] + # inference_program = fluid.io.get_inference_program(test_target) + + place = fluid.CUDAPlace(0) + exe = fluid.Executor(place) + exe.run(fluid.default_startup_program()) + + if init_model is not None: + load_persistables_if_exist(exe, init_model) + #fluid.io.load_persistables(exe, init_model) + + train_reader = paddle.batch(reader.train(), batch_size=batch_size) + test_reader = paddle.batch(reader.test(), batch_size=batch_size) + feeder = fluid.DataFeeder(place=place, feed_list=[image, label]) + + for pass_id in range(0, num_passes): + train_info = [[], [], []] + test_info = [[], [], []] + for batch_id, data in enumerate(train_reader()): + t1 = time.time() + loss, acc, acc5 = exe.run( + fluid.default_main_program(), + feed=feeder.feed(data), + fetch_list=[avg_cost, accuracy, accuracy5]) + t2 = time.time() + period = t2 - t1 + train_info[0].append(loss[0]) + train_info[1].append(acc[0]) + train_info[2].append(acc5[0]) + if batch_id % 10 == 0: + print( + "Pass {0}, trainbatch {1}, loss {2}, acc {3}, acc5 {4} time{5}". + format(pass_id, batch_id, loss[0], acc[0], acc5[0], + "%2.2f sec" % period)) + sys.stdout.flush() + #if batch_id == 10: + # break + train_loss = np.array(train_info[0]).mean() + train_acc = np.array(train_info[1]).mean() + train_acc5 = np.array(train_info[2]).mean() + + for batch_id, data in enumerate(test_reader()): + t1 = time.time() + loss, acc, acc5 = exe.run( + inference_program, + feed=feeder.feed(data), + fetch_list=[avg_cost, accuracy, accuracy5]) + t2 = time.time() + period = t2 - t1 + test_info[0].append(loss[0]) + test_info[1].append(acc[0]) + test_info[2].append(acc5[0]) + if batch_id % 10 == 0: + print( + "Pass {0}, testbatch {1}, loss {2}, acc {3}, acc5 {4} time{5}". + format(pass_id, batch_id, loss[0], acc[0], acc5[0], + "%2.2f sec" % period)) + sys.stdout.flush() + #if batch_id == 10: + # break + + test_loss = np.array(test_info[0]).mean() + test_acc = np.array(test_info[1]).mean() + test_acc5 = np.array(test_info[2]).mean() + print( + "End pass {0}, train_loss {1}, train_acc {2}, train_acc5 {3}, test_loss {4}, test_acc {5}, test_acc5 {6}". + format(pass_id, train_loss, train_acc, train_acc5, test_loss, + test_acc, test_acc5)) + sys.stdout.flush() + + model_path = os.path.join(model_save_dir, str(pass_id)) + if not os.path.isdir(model_path): + os.makedirs(model_path) + fluid.io.save_persistables(exe, model_path) + + +if __name__ == '__main__': + train(learning_rate=0.1, batch_size=12, num_passes=90, init_model=None) diff --git a/image_resnet/pytorch/install_dependency.sh b/image_resnet/pytorch/install_dependency.sh new file mode 100644 index 0000000..6cef28f --- /dev/null +++ b/image_resnet/pytorch/install_dependency.sh @@ -0,0 +1,6 @@ + +pip install http://download.pytorch.org/whl/cu80/torch-0.3.1-cp27-cp27mu-linux_x86_64.whl +pip install torchvision + +# if the above command does not work, then you have python 2.7 UCS2, use this command +#pip install http://download.pytorch.org/whl/cu80/torch-0.3.1-cp27-cp27m-linux_x86_64.whl diff --git a/image_resnet/pytorch/resnet.py b/image_resnet/pytorch/resnet.py new file mode 100644 index 0000000..be9cc20 --- /dev/null +++ b/image_resnet/pytorch/resnet.py @@ -0,0 +1,256 @@ +import torch.nn as nn +import math +import torch.utils.model_zoo as model_zoo + +__all__ = [ + 'ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152' +] + +model_urls = { + 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', + 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', + 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', + 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', + 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', +} + + +def conv3x3(in_planes, out_planes, stride=1): + "3x3 convolution with padding" + return nn.Conv2d( + in_planes, + out_planes, + kernel_size=3, + stride=stride, + padding=1, + bias=False) + + +class BasicBlock(nn.Module): + expansion = 1 + + def __init__(self, inplanes, planes, stride=1, downsample=None): + super(BasicBlock, self).__init__() + self.conv1 = conv3x3(inplanes, planes, stride) + self.bn1 = nn.BatchNorm2d(planes) + self.relu = nn.ReLU(inplace=True) + self.conv2 = conv3x3(planes, planes) + self.bn2 = nn.BatchNorm2d(planes) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.relu(out) + + return out + + +class Bottleneck(nn.Module): + expansion = 4 + + def __init__(self, inplanes, planes, stride=1, downsample=None): + super(Bottleneck, self).__init__() + self.bottlenect_planes = planes + self.outplanes = planes + self.conv1 = nn.Conv2d( + inplanes, self.bottlenect_planes, kernel_size=1, bias=False) + self.bn1 = nn.BatchNorm2d(self.bottlenect_planes) + self.conv2 = nn.Conv2d( + self.bottlenect_planes, + self.bottlenect_planes, + kernel_size=3, + stride=stride, + padding=1, + bias=False, + groups=64) + self.bn2 = nn.BatchNorm2d(self.bottlenect_planes) + self.conv3 = nn.Conv2d( + self.bottlenect_planes, self.outplanes, kernel_size=1, bias=False) + self.bn3 = nn.BatchNorm2d(self.outplanes) + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + self.stride = stride + + self.se_gpool = nn.AdaptiveAvgPool2d(1) + self.se_s = nn.Linear(self.outplanes, self.outplanes / 16) + self.se_e = nn.Linear(self.outplanes / 16, self.outplanes) + self.sigmoid = nn.Sigmoid() + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + + att = self.se_gpool(out) + att = att.view(att.size(0), -1) + att = self.sigmoid(self.se_e(self.relu(self.se_s(att)))) + att = att.unsqueeze(2).unsqueeze(3).expand_as(out) + + out = out * att + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.relu(out) + + return out + + +class ResNet(nn.Module): + def __init__(self, block, layers, num_classes=1000): + self.inplanes = 64 + super(ResNet, self).__init__() + self.conv1 = nn.Conv2d( + 3, 64, kernel_size=3, stride=2, padding=1, bias=False) + self.bn1 = nn.BatchNorm2d(64) + self.relu = nn.ReLU(inplace=True) + self.conv2 = nn.Conv2d( + 64, 64, kernel_size=3, stride=1, padding=1, bias=False) + self.bn2 = nn.BatchNorm2d(64) + self.conv3 = nn.Conv2d( + 64, 128, kernel_size=3, stride=1, padding=1, bias=False) + self.bn3 = nn.BatchNorm2d(128) + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + self.layer1 = self._make_layer(block, 128, 256, layers[0]) + self.layer2 = self._make_layer(block, 256, 512, layers[1], stride=2) + self.layer3 = self._make_layer(block, 512, 1024, layers[2], stride=2) + self.layer4 = self._make_layer(block, 1024, 2048, layers[3], stride=2) + self.avgpool = nn.AvgPool2d(7) + self.fc = nn.Linear(2048, num_classes) + self.drop = nn.Dropout(p=0.2) + self.softmax = nn.Softmax(dim=1) + for m in self.modules(): + if isinstance(m, nn.Conv2d): + n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + m.weight.data.normal_(0, math.sqrt(2. / n)) + elif isinstance(m, nn.BatchNorm2d): + m.weight.data.fill_(1) + m.bias.data.zero_() + + def _make_layer(self, block, inplanes, outplanes, blocks, stride=1): + downsample = None + if stride != 1 or inplanes != outplanes: + kernel_size = 3 + pad = 1 + if stride == 1: + kernel_size = 1 + pad = 0 + downsample = nn.Sequential( + nn.Conv2d( + inplanes, + outplanes, + kernel_size=kernel_size, + stride=stride, + padding=pad, + bias=False), + nn.BatchNorm2d(outplanes), ) + + layers = [] + layers.append(block(inplanes, outplanes, stride, downsample)) + #self.inplanes = planes * block.expansion + for i in range(1, blocks): + layers.append(block(outplanes, outplanes)) + + return nn.Sequential(*layers) + + def forward(self, x): + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + x = self.relu(self.bn2(self.conv2(x))) + x = self.relu(self.bn3(self.conv3(x))) + x = self.maxpool(x) + + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + + x = self.avgpool(x) + x = x.view(x.size(0), -1) + x = self.drop(x) + x = self.fc(x) + return x + + +def resnet18(pretrained=False, **kwargs): + """Constructs a ResNet-18 model. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs) + if pretrained: + model.load_state_dict(model_zoo.load_url(model_urls['resnet18'])) + return model + + +def resnet34(pretrained=False, **kwargs): + """Constructs a ResNet-34 model. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs) + if pretrained: + model.load_state_dict(model_zoo.load_url(model_urls['resnet34'])) + return model + + +def resnet50(pretrained=False, **kwargs): + """Constructs a ResNet-50 model. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs) + if pretrained: + model.load_state_dict(model_zoo.load_url(model_urls['resnet50'])) + return model + + +def resnet101(pretrained=False, **kwargs): + """Constructs a ResNet-101 model. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs) + if pretrained: + model.load_state_dict(model_zoo.load_url(model_urls['resnet101'])) + return model + + +def resnet152(pretrained=False, **kwargs): + """Constructs a ResNet-152 model. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs) + if pretrained: + model.load_state_dict(model_zoo.load_url(model_urls['resnet152'])) + return model diff --git a/image_resnet/pytorch/run.sh b/image_resnet/pytorch/run.sh new file mode 100644 index 0000000..d76ca1d --- /dev/null +++ b/image_resnet/pytorch/run.sh @@ -0,0 +1,2 @@ +export CUDA_VISIBLE_DEVICES=0 +python train_resnet.py --batch-size=12 --datadir=. diff --git a/image_resnet/pytorch/train_resnet.py b/image_resnet/pytorch/train_resnet.py new file mode 100644 index 0000000..34ced82 --- /dev/null +++ b/image_resnet/pytorch/train_resnet.py @@ -0,0 +1,527 @@ +import argparse +import os +import shutil +import time +import math + +import torch +import torch.nn as nn +import torch.nn.parallel +import torch.backends.cudnn as cudnn +import torch.optim +import torch.utils.data +import torchvision.transforms as transforms +import torchvision.datasets as datasets +import torchvision.models as models +import resnet + +model_names = sorted(name for name in models.__dict__ + if name.islower() and not name.startswith("__") and + callable(models.__dict__[name])) + +parser = argparse.ArgumentParser(description='PyTorch ImageNet Training') +parser.add_argument( + '-j', + '--workers', + default=8, + type=int, + metavar='N', + help='number of data loading workers (default: 4)') +parser.add_argument( + '--epochs', + default=120, + type=int, + metavar='N', + help='number of total epochs to run') +parser.add_argument( + '--start-epoch', + default=0, + type=int, + metavar='N', + help='manual epoch number (useful on restarts)') +parser.add_argument( + '-b', + '--batch-size', + default=48, + type=int, + metavar='N', + help='mini-batch size (default: 256)') +parser.add_argument( + '--lr', + '--learning-rate', + default=0.1, + type=float, + metavar='LR', + help='initial learning rate') +parser.add_argument( + '--momentum', default=0.9, type=float, metavar='M', help='momentum') +parser.add_argument( + '--weight-decay', + '--wd', + default=1e-4, + type=float, + metavar='W', + help='weight decay (default: 1e-4)') +parser.add_argument( + '--print-freq', + '-p', + default=10, + type=int, + metavar='N', + help='print frequency (default: 10)') +parser.add_argument( + '--resume', + default='', + type=str, + metavar='PATH', + help='path to latest checkpoint (default: none)') +parser.add_argument( + '--datadir', + default=None, + type=str, + metavar='PATH', + help='path to train data(default: ./data)') +parser.add_argument( + '-e', + '--evaluate', + dest='evaluate', + action='store_true', + help='evaluate model on validation set') +parser.add_argument( + '--pretrained', + dest='pretrained', + action='store_true', + help='use pre-trained model') +parser.add_argument( + '--weights', + default='', + type=str, + metavar='weights', + help='pretrained model') + +parser.add_argument( + '--n_iterate', + type=int, + default=26000, + help='max number of iterate per epoch') +parser.add_argument( + '--valid_iterate', + type=int, + default=1800, + help='max number of iterate per epoch') + +best_prec1 = 0 + + +class ReduceLROnPlateau(object): + """Reduce learning rate when a metric has stopped improving. + Models often benefit from reducing the learning rate by a factor + of 2-10 once learning stagnates. This scheduler reads a metrics + quantity and if no improvement is seen for a 'patience' number + of epochs, the learning rate is reduced. + + Args: + optimizer (Optimizer): Wrapped optimizer. + mode (str): One of `min`, `max`. In `min` mode, lr will + be reduced when the quantity monitored has stopped + decreasing; in `max` mode it will be reduced when the + quantity monitored has stopped increasing. Default: 'min'. + factor (float): Factor by which the learning rate will be + reduced. new_lr = lr * factor. Default: 0.1. + patience (int): Number of epochs with no improvement after + which learning rate will be reduced. Default: 10. + verbose (bool): If True, prints a message to stdout for + each update. Default: False. + threshold (float): Threshold for measuring the new optimum, + to only focus on significant changes. Default: 1e-4. + threshold_mode (str): One of `rel`, `abs`. In `rel` mode, + dynamic_threshold = best * ( 1 + threshold ) in 'max' + mode or best * ( 1 - threshold ) in `min` mode. + In `abs` mode, dynamic_threshold = best + threshold in + `max` mode or best - threshold in `min` mode. Default: 'rel'. + cooldown (int): Number of epochs to wait before resuming + normal operation after lr has been reduced. Default: 0. + min_lr (float or list): A scalar or a list of scalars. A + lower bound on the learning rate of all param groups + or each group respectively. Default: 0. + eps (float): Minimal decay applied to lr. If the difference + between new and old lr is smaller than eps, the update is + ignored. Default: 1e-8. + + Example: + >>> optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9) + >>> scheduler = torch.optim.ReduceLROnPlateau(optimizer, 'min') + >>> for epoch in range(10): + >>> train(...) + >>> val_loss = validate(...) + >>> # Note that step should be called after validate() + >>> scheduler.step(val_loss) + """ + + def __init__(self, + optimizer, + mode='min', + factor=0.1, + patience=10, + verbose=False, + threshold=1e-4, + threshold_mode='rel', + cooldown=0, + min_lr=0, + eps=1e-8): + + if factor >= 1.0: + raise ValueError('Factor should be < 1.0.') + self.factor = factor + self.optimizer = optimizer + + if isinstance(min_lr, list) or isinstance(min_lr, tuple): + if len(min_lr) != len(optimizer.param_groups): + raise ValueError("expected {} min_lrs, got {}".format( + len(optimizer.param_groups), len(min_lr))) + self.min_lrs = list(min_lr) + else: + self.min_lrs = [min_lr] * len(optimizer.param_groups) + + self.patience = patience + self.verbose = verbose + self.cooldown = cooldown + self.cooldown_counter = 0 + self.mode = mode + self.threshold = threshold + self.threshold_mode = threshold_mode + self.best = None + self.num_bad_epochs = None + self.mode_worse = None # the worse value for the chosen mode + self.is_better = None + self.eps = eps + self.last_epoch = -1 + self._init_is_better( + mode=mode, threshold=threshold, threshold_mode=threshold_mode) + self._reset() + + def _reset(self): + """Resets num_bad_epochs counter and cooldown counter.""" + self.best = self.mode_worse + self.cooldown_counter = 0 + self.num_bad_epochs = 0 + + def step(self, metrics, epoch=None): + current = metrics + if epoch is None: + epoch = self.last_epoch = self.last_epoch + 1 + self.last_epoch = epoch + + if self.is_better(current, self.best): + self.best = current + self.num_bad_epochs = 0 + else: + self.num_bad_epochs += 1 + + if self.in_cooldown: + self.cooldown_counter -= 1 + self.num_bad_epochs = 0 # ignore any bad epochs in cooldown + + if self.num_bad_epochs > self.patience: + self._reduce_lr(epoch) + self.cooldown_counter = self.cooldown + self.num_bad_epochs = 0 + + def _reduce_lr(self, epoch): + for i, param_group in enumerate(self.optimizer.param_groups): + old_lr = float(param_group['lr']) + new_lr = max(old_lr * self.factor, self.min_lrs[i]) + if old_lr - new_lr > self.eps: + param_group['lr'] = new_lr + if self.verbose: + print('Epoch {:5d}: reducing learning rate' + ' of group {} to {:.4e}.'.format(epoch, i, new_lr)) + + @property + def in_cooldown(self): + return self.cooldown_counter > 0 + + def _init_is_better(self, mode, threshold, threshold_mode): + if mode not in {'min', 'max'}: + raise ValueError('mode ' + mode + ' is unknown!') + if threshold_mode not in {'rel', 'abs'}: + raise ValueError('threshold mode ' + mode + ' is unknown!') + if mode == 'min' and threshold_mode == 'rel': + rel_epsilon = 1. - threshold + self.is_better = lambda a, best: a < best * rel_epsilon + self.mode_worse = float('Inf') + elif mode == 'min' and threshold_mode == 'abs': + self.is_better = lambda a, best: a < best - threshold + self.mode_worse = float('Inf') + elif mode == 'max' and threshold_mode == 'rel': + rel_epsilon = threshold + 1. + self.is_better = lambda a, best: a > best * rel_epsilon + self.mode_worse = -float('Inf') + else: # mode == 'max' and epsilon_mode == 'abs': + self.is_better = lambda a, best: a > best + threshold + self.mode_worse = -float('Inf') + + +def main(): + global args, best_prec1 + args = parser.parse_args() + model = resnet.resnet152(num_classes=1000) + + # create model + normalize = transforms.Normalize( + mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + + model = torch.nn.DataParallel(model).cuda() + #model = model.cuda() + torch.backends.cudnn.benchmark = True + + # define loss function (criterion) and optimizer + criterion = nn.CrossEntropyLoss().cuda() + optimizer = torch.optim.SGD(model.parameters(), + args.lr, + momentum=args.momentum, + weight_decay=args.weight_decay) + + # optionally resume from a checkpoint + if args.resume: + if os.path.isfile(args.resume): + print("=> loading checkpoint '{}'".format(args.resume)) + + checkpoint = torch.load(args.resume) + args.start_epoch = 1 + + best_prec1 = checkpoint['best_prec1'] + #optimizer.load_state_dict(checkpoint['optimizer']) + + model_dict = model.state_dict() + checkpoint = { + k: v + for k, v in checkpoint['state_dict'].items() if k in model_dict + } + + model_dict.update(checkpoint) + model.load_state_dict(model_dict) + #print("=> loaded checkpoint '{}' (epoch {})" + # .format(args.resume, checkpoint['epoch'])) + else: + print("=> no checkpoint found at '{}'".format(args.resume)) + cudnn.benchmark = True + + traindir = args.datadir + valdir = traindir + if traindir is None: + print("please set --datadir") + exit(1) + + train_dataset = datasets.ImageFolder(traindir, + transforms.Compose([ + transforms.RandomSizedCrop(224), + transforms.RandomHorizontalFlip(), + transforms.ToTensor(), + normalize, + ])) + train_loader = torch.utils.data.DataLoader( + train_dataset, + batch_size=args.batch_size, + shuffle=True, + num_workers=args.workers, + pin_memory=True) + + val_loader = torch.utils.data.DataLoader( + datasets.ImageFolder(valdir, + transforms.Compose([ + transforms.Scale(243), + transforms.CenterCrop(224), + transforms.ToTensor(), + normalize, + ])), + batch_size=args.batch_size, + shuffle=False, + num_workers=args.workers, + pin_memory=True) + + if args.evaluate: + validate(val_loader, model, criterion) + return + + for epoch in range(args.start_epoch, args.epochs): + adjust_learning_rate(optimizer, epoch, args.epochs) + + #print_learning_rate(optimizer,epoch) + # train for one epoch + + train(train_loader, model, criterion, optimizer, epoch) + + # evaluate on validation set + + prec1, val_loss = validate(val_loader, model, criterion) + #scheduler.step(val_loss) + # remember best prec@1 and save checkpoint + is_best = prec1 > best_prec1 + best_prec1 = max(prec1, best_prec1) + print "best top1 :" + str(best_prec1) + save_checkpoint({ + 'epoch': epoch + 1, + 'state_dict': model.state_dict(), + 'best_prec1': best_prec1, + 'optimizer': optimizer.state_dict(), + }, is_best) + + +def train(train_loader, model, criterion, optimizer, epoch): + batch_time = AverageMeter() + data_time = AverageMeter() + losses = AverageMeter() + top1 = AverageMeter() + top5 = AverageMeter() + + # switch to train mode + model.train() + + end = time.time() + for i, (input, target) in enumerate(train_loader): + # measure data loading time + data_time.update(time.time() - end) + + input_image = input.cuda() + #print input.size() + target = target.cuda(async=True) + + input_var = torch.autograd.Variable(input_image) + target_var = torch.autograd.Variable(target) + + # compute output + output = model(input_var) + loss = criterion(output, target_var) + # measure accuracy and record loss + prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) + losses.update(loss.data[0], input.size(0)) + top1.update(prec1[0], input.size(0)) + top5.update(prec5[0], input.size(0)) + + # compute gradient and do SGD step + optimizer.zero_grad() + loss.backward() + optimizer.step() + + # measure elapsed time + batch_time.update(time.time() - end) + end = time.time() + + if i % args.print_freq == 0: + print('Epoch: [{0}][{1}]\t' + 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' + 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' + 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' + 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' + 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( + epoch, + i, + batch_time=batch_time, + data_time=data_time, + loss=losses, + top1=top1, + top5=top5)) + + +def validate(val_loader, model, criterion): + batch_time = AverageMeter() + losses = AverageMeter() + output_top1 = AverageMeter() + output_top5 = AverageMeter() + # switch to evaluate mode + model.eval() + + end = time.time() + for i, (input, target) in enumerate(val_loader): + target = target.cuda(async=True) + input_var = torch.autograd.Variable(input, volatile=True) + target_var = torch.autograd.Variable(target, volatile=True) + + # compute output + output = model(input_var) + loss = criterion(output, target_var) + + losses.update(loss.data[0], input.size(0)) + #softmax=torch.nn.Softmax() + + # measure accuracy and record loss + prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) + output_top1.update(prec1[0], input.size(0)) + output_top5.update(prec5[0], input.size(0)) + + # measure elapsed time + batch_time.update(time.time() - end) + end = time.time() + + if i % args.print_freq == 0: + print('Test: [{0}]\t' + 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' + 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' + 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' + 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( + i, + batch_time=batch_time, + loss=losses, + top1=output_top1, + top5=output_top5)) + print(' * Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f}'.format( + top1=output_top1, top5=output_top5)) + return output_top1.avg, losses.avg + + +def save_checkpoint(state, is_best, filename='output/checkpoint.pth.tar'): + torch.save(state, filename) + if is_best: + shutil.copyfile(filename, 'output/model_best.pth.tar') + + +class AverageMeter(object): + """Computes and stores the average and current value""" + + def __init__(self): + self.reset() + + def reset(self): + self.val = 0 + self.avg = 0 + self.sum = 0 + self.count = 0 + + def update(self, val, n=1): + self.val = val + self.sum += val * n + self.count += n + self.avg = self.sum / self.count + + +def adjust_learning_rate(optimizer, epoch, nepoch): + """Sets the learning rate to the initial LR decayed by 10 every 30 epochs""" + lr = 0.1**(epoch // 30) + for i, param_group in enumerate(optimizer.param_groups): + param_group['lr'] = lr + + +def print_learning_rate(optimizer, epoch): + for param_group in optimizer.param_groups: + print str(epoch) + ' epoch learning_rate:' + str(param_group['lr']) + + +def accuracy(output, target, topk=(1, )): + """Computes the precision@k for the specified values of k""" + maxk = max(topk) + batch_size = target.size(0) + + _, pred = output.topk(maxk, 1, True, True) + pred = pred.t() + correct = pred.eq(target.view(1, -1).expand_as(pred)) + + res = [] + for k in topk: + correct_k = correct[:k].view(-1).float().sum(0) + res.append(correct_k.mul_(100.0 / batch_size)) + return res + + +if __name__ == '__main__': + main()