diff --git a/image_resnet/fluid/reader.py b/image_resnet/fluid/reader.py
new file mode 100644
index 0000000..00a23fc
--- /dev/null
+++ b/image_resnet/fluid/reader.py
@@ -0,0 +1,237 @@
+import os
+import math
+import random
+import cPickle
+import functools
+import numpy as np
+import paddle.v2 as paddle
+from PIL import Image, ImageEnhance
+
+random.seed(0)
+
+DATA_DIM = 224
+
+THREAD = 8
+BUF_SIZE = 10240
+
+img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1))
+img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1))
+
+
+def resize_short(img, target_size):
+    percent = float(target_size) / min(img.size[0], img.size[1])
+    resized_width = int(round(img.size[0] * percent))
+    resized_height = int(round(img.size[1] * percent))
+    img = img.resize((resized_width, resized_height), Image.BILINEAR)
+    return img
+
+
+def Scale(img, size):
+    w, h = img.size
+    if (w <= h and w == size) or (h <= w and h == size):
+        return img
+    if w < h:
+        ow = size
+        oh = int(size * h / w)
+        return img.resize((ow, oh), Image.BILINEAR)
+    else:
+        oh = size
+        ow = int(size * w / h)
+        return img.resize((ow, oh), Image.BILINEAR)
+
+
+def CenterCrop(img, size):
+    w, h = img.size
+    th, tw = int(size), int(size)
+    x1 = int(round((w - tw) / 2.))
+    y1 = int(round((h - th) / 2.))
+    return img.crop((x1, y1, x1 + tw, y1 + th))
+
+
+def crop_image(img, target_size, center):
+    width, height = img.size
+    size = target_size
+    if center == True:
+        w_start = (width - size) / 2
+        h_start = (height - size) / 2
+    else:
+        w_start = random.randint(0, width - size)
+        h_start = random.randint(0, height - size)
+    w_end = w_start + size
+    h_end = h_start + size
+    img = img.crop((w_start, h_start, w_end, h_end))
+    return img
+
+
+def random_crop_ycx(img, size):
+    for attempt in range(10):
+        area = img.size[0] * img.size[1]
+        target_area = random.uniform(0.08, 1.0) * area
+        aspect_ratio = random.uniform(3. / 4, 4. / 3)
+
+        w = int(round(math.sqrt(target_area * aspect_ratio)))
+        h = int(round(math.sqrt(target_area / aspect_ratio)))
+
+        if random.random() < 0.5:
+            w, h = h, w
+
+        if w <= img.size[0] and h <= img.size[1]:
+            x1 = random.randint(0, img.size[0] - w)
+            y1 = random.randint(0, img.size[1] - h)
+
+            img = img.crop((x1, y1, x1 + w, y1 + h))
+            assert (img.size == (w, h))
+
+            return img.resize((size, size), Image.BILINEAR)
+
+    img = Scale(img, size)
+    img = CenterCrop(img, size)
+    return img
+
+
+def random_crop(img, size, scale=[0.08, 1.0], ratio=[3. / 4., 4. / 3.]):
+    aspect_ratio = math.sqrt(random.uniform(*ratio))
+    w = 1. * aspect_ratio
+    h = 1. / aspect_ratio
+
+    bound = min((float(img.size[0]) / img.size[1]) / (w**2),
+                (float(img.size[1]) / img.size[0]) / (h**2))
+    scale_max = min(scale[1], bound)
+    scale_min = min(scale[0], bound)
+
+    target_area = img.size[0] * img.size[1] * random.uniform(scale_min,
+                                                             scale_max)
+    target_size = math.sqrt(target_area)
+    w = int(target_size * w)
+    h = int(target_size * h)
+
+    i = random.randint(0, img.size[0] - w)
+    j = random.randint(0, img.size[1] - h)
+
+    img = img.crop((i, j, i + w, j + h))
+    img = img.resize((size, size), Image.BILINEAR)
+    return img
+
+
+def rotate_image(img):
+    angle = random.randint(-10, 10)
+    img = img.rotate(angle)
+    return img
+
+
+def distort_color(img):
+    def random_brightness(img, lower=0.5, upper=1.5):
+        e = random.uniform(lower, upper)
+        return ImageEnhance.Brightness(img).enhance(e)
+
+    def random_contrast(img, lower=0.5, upper=1.5):
+        e = random.uniform(lower, upper)
+        return ImageEnhance.Contrast(img).enhance(e)
+
+    def random_color(img, lower=0.5, upper=1.5):
+        e = random.uniform(lower, upper)
+        return ImageEnhance.Color(img).enhance(e)
+
+    ops = [random_brightness, random_contrast, random_color]
+    random.shuffle(ops)
+
+    img = ops[0](img)
+    img = ops[1](img)
+    img = ops[2](img)
+
+    return img
+
+
+def process_image_imagepath2(sample, mode):
+    img = np.random.rand(3, 224, 224)
+    img -= img_mean
+    img /= img_std
+
+    lab = np.random.randint(0, 999)
+
+    if mode == 'train' or mode == 'test':
+        return img, lab
+    elif mode == 'infer':
+        return img
+
+
+def fake_reader():
+    img, lab = process_image_imagepath2(None, "train")
+    while True:
+        yield img, lab
+
+
+def process_image_imagepath(sample, mode, color_jitter, rotate):
+    imgpath = sample[0]
+    img = Image.open(imgpath)
+    if mode == 'train':
+        if rotate: img = rotate_image(img)
+        img = random_crop_ycx(img, DATA_DIM)
+    else:
+        img = Scale(img, 256)
+        img = CenterCrop(img, DATA_DIM)
+    if mode == 'train':
+        if color_jitter:
+            img = distort_color(img)
+        if random.randint(0, 1) == 1:
+            img = img.transpose(Image.FLIP_LEFT_RIGHT)
+
+    if img.mode != 'RGB':
+        img = img.convert('RGB')
+
+    img = np.array(img).astype('float32').transpose((2, 0, 1)) / 255
+    img -= img_mean
+    img /= img_std
+
+    if mode == 'train' or mode == 'test':
+        return img, sample[1]
+    elif mode == 'infer':
+        return img
+
+
+def _reader_creator_imagepath(data,
+                              mode,
+                              shuffle=False,
+                              color_jitter=False,
+                              rotate=False):
+    def reader():
+        index = range(0, len(data['image']))
+        if shuffle:
+            random.shuffle(index)
+        for idx in index:
+            if mode == 'train' or mode == 'test':
+                yield data['image'][idx], data['label'][idx]
+            elif mode == 'infer':
+                yield [data['image'][idx]]
+
+    mapper = functools.partial(process_image_imagepath2, mode=mode)
+    #mapper = functools.partial(
+    #    process_image_imagepath, mode=mode, color_jitter=color_jitter, rotate=rotate)
+
+    return paddle.reader.xmap_readers(mapper, reader, THREAD, BUF_SIZE)
+
+
+def _reader_creator(data, mode, shuffle=False, color_jitter=False,
+                    rotate=False):
+    def reader():
+        index = range(0, len(data['image']))
+        if shuffle:
+            random.shuffle(index)
+        for idx in index:
+            if mode == 'train' or mode == 'test':
+                yield data['image'][idx], data['label'][idx]
+            elif mode == 'infer':
+                yield [data['image'][idx]]
+
+    mapper = functools.partial(
+        process_image, mode=mode, color_jitter=color_jitter, rotate=rotate)
+
+    return paddle.reader.xmap_readers(mapper, reader, THREAD, BUF_SIZE)
+
+
+def train():
+    return fake_reader
+
+
+def test():
+    return fake_reader
diff --git a/image_resnet/fluid/run.sh b/image_resnet/fluid/run.sh
new file mode 100644
index 0000000..a76b1fa
--- /dev/null
+++ b/image_resnet/fluid/run.sh
@@ -0,0 +1,2 @@
+export CUDA_VISIBLE_DEVICES=0
+python se_resnext152_parallel.py
diff --git a/image_resnet/fluid/se_resnext152_parallel.py b/image_resnet/fluid/se_resnext152_parallel.py
new file mode 100644
index 0000000..184ee5d
--- /dev/null
+++ b/image_resnet/fluid/se_resnext152_parallel.py
@@ -0,0 +1,270 @@
+import os
+import numpy as np
+import sys
+import time
+import paddle.v2 as paddle
+import paddle.fluid as fluid
+import reader
+
+#fluid.default_startup_program().random_seed = 111
+
+
+def load_persistables_if_exist(executor, dirname, main_program=None):
+    filenames = next(os.walk(dirname))[2]
+    filenames = set(filenames)
+
+    def _is_presistable_and_exist_(var):
+        if not fluid.io.is_persistable(var):
+            return False
+        else:
+            return var.name in filenames
+
+    fluid.io.load_vars(
+        executor,
+        dirname,
+        main_program=main_program,
+        vars=None,
+        predicate=_is_presistable_and_exist_)
+
+
+def conv_bn_layer(input, num_filters, filter_size, stride=1, groups=1,
+                  act=None):
+    conv = fluid.layers.conv2d(
+        input=input,
+        num_filters=num_filters,
+        filter_size=filter_size,
+        stride=stride,
+        padding=(filter_size - 1) / 2,
+        groups=groups,
+        act=None,
+        bias_attr=False)
+    return fluid.layers.batch_norm(input=conv, act=act, momentum=0.1)
+
+
+def squeeze_excitation(input, num_channels, reduction_ratio):
+    pool = fluid.layers.pool2d(
+        input=input, pool_size=0, pool_type='avg', global_pooling=True)
+    squeeze = fluid.layers.fc(input=pool,
+                              size=num_channels / reduction_ratio,
+                              act='relu')
+    excitation = fluid.layers.fc(input=squeeze,
+                                 size=num_channels,
+                                 act='sigmoid')
+    scale = fluid.layers.elementwise_mul(x=input, y=excitation, axis=0)
+    return scale
+
+
+def shortcut(input, ch_out, stride):
+    ch_in = input.shape[1]
+    if ch_in != ch_out:
+        if stride == 1:
+            filter_size = 1
+        else:
+            filter_size = 3
+        return conv_bn_layer(input, ch_out, filter_size, stride)
+    else:
+        return input
+
+
+def bottleneck_block(input, num_filters, stride, cardinality, reduction_ratio):
+    # The number of first 1x1 convolutional channels for each bottleneck build block
+    # was halved to reduce the compution cost.
+    conv0 = conv_bn_layer(
+        input=input, num_filters=num_filters, filter_size=1, act='relu')
+    conv1 = conv_bn_layer(
+        input=conv0,
+        num_filters=num_filters * 2,
+        filter_size=3,
+        stride=stride,
+        groups=cardinality,
+        act='relu')
+    conv2 = conv_bn_layer(
+        input=conv1, num_filters=num_filters * 2, filter_size=1, act=None)
+    scale = squeeze_excitation(
+        input=conv2,
+        num_channels=num_filters * 2,
+        reduction_ratio=reduction_ratio)
+
+    short = shortcut(input, num_filters * 2, stride)
+
+    return fluid.layers.elementwise_add(x=short, y=scale, act='relu')
+
+
+def SE_ResNeXt152(input, class_dim):
+    cardinality = 64
+    reduction_ratio = 16
+    depth = [3, 8, 36, 3]
+    num_filters = [128, 256, 512, 1024]
+
+    conv = conv_bn_layer(
+        input=input, num_filters=64, filter_size=3, stride=2, act='relu')
+    conv = conv_bn_layer(
+        input=conv, num_filters=64, filter_size=3, stride=1, act='relu')
+    conv = conv_bn_layer(
+        input=conv, num_filters=128, filter_size=3, stride=1, act='relu')
+    conv = fluid.layers.pool2d(
+        input=conv, pool_size=3, pool_stride=2, pool_padding=1, pool_type='max')
+
+    for block in range(len(depth)):
+        for i in range(depth[block]):
+            conv = bottleneck_block(
+                input=conv,
+                num_filters=num_filters[block],
+                stride=2 if i == 0 and block != 0 else 1,
+                cardinality=cardinality,
+                reduction_ratio=reduction_ratio)
+
+    pool = fluid.layers.pool2d(
+        input=conv, pool_size=0, pool_type='avg', global_pooling=True)
+    #yancanxiang: A drop out layer(with a drop ratio of 0.2) was inserted before the classifier layer.
+    dropout = fluid.layers.dropout(x=pool, dropout_prob=0.2)
+    # Classifier layer:
+    out = fluid.layers.fc(input=dropout, size=class_dim, act='softmax')
+    return out
+
+
+def net_conf(image, label, class_dim):
+    out = SE_ResNeXt152(input=image, class_dim=class_dim)
+    cost = fluid.layers.cross_entropy(input=out, label=label)
+    avg_cost = fluid.layers.mean(x=cost)
+    #accuracy = fluid.evaluator.Accuracy(input=out, label=label)
+    #accuracy5 = fluid.evaluator.Accuracy(input=out, label=label, k=5)
+    accuracy = fluid.layers.accuracy(input=out, label=label)
+    accuracy5 = fluid.layers.accuracy(input=out, label=label, k=5)
+    return out, avg_cost, accuracy, accuracy5
+
+
+def train(learning_rate,
+          batch_size,
+          num_passes,
+          init_model=None,
+          model_save_dir='model'):
+    class_dim = 1000
+    image_shape = [3, 224, 224]
+
+    image = fluid.layers.data(name='image', shape=image_shape, dtype='float32')
+    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+
+    parallel = False
+    if parallel:
+        places = fluid.layers.get_places()
+        pd = fluid.layers.ParallelDo(places)
+        with pd.do():
+            img_ = pd.read_input(image)
+            label_ = pd.read_input(label)
+            prediction, avg_cost, accuracy, accuracy5 = net_conf(img_, label_,
+                                                                 class_dim)
+
+            for o in [avg_cost, accuracy, accuracy5]:
+                pd.write_output(o)
+
+        avg_cost, accuracy, accuracy5 = pd()
+        # get mean loss and acc through every devices.
+        avg_cost = fluid.layers.mean(x=avg_cost)
+        accuracy = fluid.layers.mean(x=accuracy)
+        accuracy5 = fluid.layers.mean(x=accuracy5)
+    else:
+        prediction, avg_cost, accuracy, accuracy5 = net_conf(image, label,
+                                                             class_dim)
+
+    #print("network:", fluid.default_main_program())
+    #print("network:", fluid.default_startup_program())
+
+    inference_program = fluid.default_main_program().clone()
+
+    epoch = [30, 60, 90]
+    total_images = 1281167
+    pass_each_epoch = int(total_images / batch_size + 1)
+    bd = [e * pass_each_epoch for e in epoch]
+    lr = [0.1, 0.01, 0.001, 0.0001]
+
+    print("Training with learning rates:", bd, lr)
+
+    optimizer = fluid.optimizer.Momentum(
+        learning_rate=fluid.layers.piecewise_decay(
+            boundaries=bd, values=lr),
+        momentum=0.9,
+        regularization=fluid.regularizer.L2Decay(1e-4))
+    opts = optimizer.minimize(avg_cost)
+    fluid.memory_optimize(fluid.default_main_program())
+
+    #print(inference_program)
+    #with fluid.program_guard(inference_program):
+    #    test_target = [avg_cost, accuracy, accuracy5]
+    #    inference_program = fluid.io.get_inference_program(test_target)
+
+    place = fluid.CUDAPlace(0)
+    exe = fluid.Executor(place)
+    exe.run(fluid.default_startup_program())
+
+    if init_model is not None:
+        load_persistables_if_exist(exe, init_model)
+        #fluid.io.load_persistables(exe, init_model)
+
+    train_reader = paddle.batch(reader.train(), batch_size=batch_size)
+    test_reader = paddle.batch(reader.test(), batch_size=batch_size)
+    feeder = fluid.DataFeeder(place=place, feed_list=[image, label])
+
+    for pass_id in range(0, num_passes):
+        train_info = [[], [], []]
+        test_info = [[], [], []]
+        for batch_id, data in enumerate(train_reader()):
+            t1 = time.time()
+            loss, acc, acc5 = exe.run(
+                fluid.default_main_program(),
+                feed=feeder.feed(data),
+                fetch_list=[avg_cost, accuracy, accuracy5])
+            t2 = time.time()
+            period = t2 - t1
+            train_info[0].append(loss[0])
+            train_info[1].append(acc[0])
+            train_info[2].append(acc5[0])
+            if batch_id % 10 == 0:
+                print(
+                    "Pass {0}, trainbatch {1}, loss {2}, acc {3}, acc5 {4} time{5}".
+                    format(pass_id, batch_id, loss[0], acc[0], acc5[0],
+                           "%2.2f sec" % period))
+                sys.stdout.flush()
+            #if batch_id == 10:
+            #    break
+        train_loss = np.array(train_info[0]).mean()
+        train_acc = np.array(train_info[1]).mean()
+        train_acc5 = np.array(train_info[2]).mean()
+
+        for batch_id, data in enumerate(test_reader()):
+            t1 = time.time()
+            loss, acc, acc5 = exe.run(
+                inference_program,
+                feed=feeder.feed(data),
+                fetch_list=[avg_cost, accuracy, accuracy5])
+            t2 = time.time()
+            period = t2 - t1
+            test_info[0].append(loss[0])
+            test_info[1].append(acc[0])
+            test_info[2].append(acc5[0])
+            if batch_id % 10 == 0:
+                print(
+                    "Pass {0}, testbatch {1}, loss {2}, acc {3}, acc5 {4} time{5}".
+                    format(pass_id, batch_id, loss[0], acc[0], acc5[0],
+                           "%2.2f sec" % period))
+                sys.stdout.flush()
+            #if batch_id == 10:
+            #    break
+
+        test_loss = np.array(test_info[0]).mean()
+        test_acc = np.array(test_info[1]).mean()
+        test_acc5 = np.array(test_info[2]).mean()
+        print(
+            "End pass {0}, train_loss {1}, train_acc {2}, train_acc5 {3}, test_loss {4}, test_acc {5}, test_acc5 {6}".
+            format(pass_id, train_loss, train_acc, train_acc5, test_loss,
+                   test_acc, test_acc5))
+        sys.stdout.flush()
+
+        model_path = os.path.join(model_save_dir, str(pass_id))
+        if not os.path.isdir(model_path):
+            os.makedirs(model_path)
+        fluid.io.save_persistables(exe, model_path)
+
+
+if __name__ == '__main__':
+    train(learning_rate=0.1, batch_size=12, num_passes=90, init_model=None)
diff --git a/image_resnet/pytorch/install_dependency.sh b/image_resnet/pytorch/install_dependency.sh
new file mode 100644
index 0000000..6cef28f
--- /dev/null
+++ b/image_resnet/pytorch/install_dependency.sh
@@ -0,0 +1,6 @@
+
+pip install http://download.pytorch.org/whl/cu80/torch-0.3.1-cp27-cp27mu-linux_x86_64.whl 
+pip install torchvision 
+
+# if the above command does not work, then you have python 2.7 UCS2, use this command 
+#pip install http://download.pytorch.org/whl/cu80/torch-0.3.1-cp27-cp27m-linux_x86_64.whl
diff --git a/image_resnet/pytorch/resnet.py b/image_resnet/pytorch/resnet.py
new file mode 100644
index 0000000..be9cc20
--- /dev/null
+++ b/image_resnet/pytorch/resnet.py
@@ -0,0 +1,256 @@
+import torch.nn as nn
+import math
+import torch.utils.model_zoo as model_zoo
+
+__all__ = [
+    'ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152'
+]
+
+model_urls = {
+    'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
+    'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
+    'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
+    'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
+    'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
+}
+
+
+def conv3x3(in_planes, out_planes, stride=1):
+    "3x3 convolution with padding"
+    return nn.Conv2d(
+        in_planes,
+        out_planes,
+        kernel_size=3,
+        stride=stride,
+        padding=1,
+        bias=False)
+
+
+class BasicBlock(nn.Module):
+    expansion = 1
+
+    def __init__(self, inplanes, planes, stride=1, downsample=None):
+        super(BasicBlock, self).__init__()
+        self.conv1 = conv3x3(inplanes, planes, stride)
+        self.bn1 = nn.BatchNorm2d(planes)
+        self.relu = nn.ReLU(inplace=True)
+        self.conv2 = conv3x3(planes, planes)
+        self.bn2 = nn.BatchNorm2d(planes)
+        self.downsample = downsample
+        self.stride = stride
+
+    def forward(self, x):
+        residual = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+
+        if self.downsample is not None:
+            residual = self.downsample(x)
+
+        out += residual
+        out = self.relu(out)
+
+        return out
+
+
+class Bottleneck(nn.Module):
+    expansion = 4
+
+    def __init__(self, inplanes, planes, stride=1, downsample=None):
+        super(Bottleneck, self).__init__()
+        self.bottlenect_planes = planes
+        self.outplanes = planes
+        self.conv1 = nn.Conv2d(
+            inplanes, self.bottlenect_planes, kernel_size=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(self.bottlenect_planes)
+        self.conv2 = nn.Conv2d(
+            self.bottlenect_planes,
+            self.bottlenect_planes,
+            kernel_size=3,
+            stride=stride,
+            padding=1,
+            bias=False,
+            groups=64)
+        self.bn2 = nn.BatchNorm2d(self.bottlenect_planes)
+        self.conv3 = nn.Conv2d(
+            self.bottlenect_planes, self.outplanes, kernel_size=1, bias=False)
+        self.bn3 = nn.BatchNorm2d(self.outplanes)
+        self.relu = nn.ReLU(inplace=True)
+        self.downsample = downsample
+        self.stride = stride
+
+        self.se_gpool = nn.AdaptiveAvgPool2d(1)
+        self.se_s = nn.Linear(self.outplanes, self.outplanes / 16)
+        self.se_e = nn.Linear(self.outplanes / 16, self.outplanes)
+        self.sigmoid = nn.Sigmoid()
+
+    def forward(self, x):
+        residual = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+
+        out = self.conv3(out)
+        out = self.bn3(out)
+
+        att = self.se_gpool(out)
+        att = att.view(att.size(0), -1)
+        att = self.sigmoid(self.se_e(self.relu(self.se_s(att))))
+        att = att.unsqueeze(2).unsqueeze(3).expand_as(out)
+
+        out = out * att
+
+        if self.downsample is not None:
+            residual = self.downsample(x)
+
+        out += residual
+        out = self.relu(out)
+
+        return out
+
+
+class ResNet(nn.Module):
+    def __init__(self, block, layers, num_classes=1000):
+        self.inplanes = 64
+        super(ResNet, self).__init__()
+        self.conv1 = nn.Conv2d(
+            3, 64, kernel_size=3, stride=2, padding=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(64)
+        self.relu = nn.ReLU(inplace=True)
+        self.conv2 = nn.Conv2d(
+            64, 64, kernel_size=3, stride=1, padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(64)
+        self.conv3 = nn.Conv2d(
+            64, 128, kernel_size=3, stride=1, padding=1, bias=False)
+        self.bn3 = nn.BatchNorm2d(128)
+        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+        self.layer1 = self._make_layer(block, 128, 256, layers[0])
+        self.layer2 = self._make_layer(block, 256, 512, layers[1], stride=2)
+        self.layer3 = self._make_layer(block, 512, 1024, layers[2], stride=2)
+        self.layer4 = self._make_layer(block, 1024, 2048, layers[3], stride=2)
+        self.avgpool = nn.AvgPool2d(7)
+        self.fc = nn.Linear(2048, num_classes)
+        self.drop = nn.Dropout(p=0.2)
+        self.softmax = nn.Softmax(dim=1)
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
+                m.weight.data.normal_(0, math.sqrt(2. / n))
+            elif isinstance(m, nn.BatchNorm2d):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+
+    def _make_layer(self, block, inplanes, outplanes, blocks, stride=1):
+        downsample = None
+        if stride != 1 or inplanes != outplanes:
+            kernel_size = 3
+            pad = 1
+            if stride == 1:
+                kernel_size = 1
+                pad = 0
+            downsample = nn.Sequential(
+                nn.Conv2d(
+                    inplanes,
+                    outplanes,
+                    kernel_size=kernel_size,
+                    stride=stride,
+                    padding=pad,
+                    bias=False),
+                nn.BatchNorm2d(outplanes), )
+
+        layers = []
+        layers.append(block(inplanes, outplanes, stride, downsample))
+        #self.inplanes = planes * block.expansion
+        for i in range(1, blocks):
+            layers.append(block(outplanes, outplanes))
+
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.relu(self.bn2(self.conv2(x)))
+        x = self.relu(self.bn3(self.conv3(x)))
+        x = self.maxpool(x)
+
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+
+        x = self.avgpool(x)
+        x = x.view(x.size(0), -1)
+        x = self.drop(x)
+        x = self.fc(x)
+        return x
+
+
+def resnet18(pretrained=False, **kwargs):
+    """Constructs a ResNet-18 model.
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+    """
+    model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
+    if pretrained:
+        model.load_state_dict(model_zoo.load_url(model_urls['resnet18']))
+    return model
+
+
+def resnet34(pretrained=False, **kwargs):
+    """Constructs a ResNet-34 model.
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+    """
+    model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)
+    if pretrained:
+        model.load_state_dict(model_zoo.load_url(model_urls['resnet34']))
+    return model
+
+
+def resnet50(pretrained=False, **kwargs):
+    """Constructs a ResNet-50 model.
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+    """
+    model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
+    if pretrained:
+        model.load_state_dict(model_zoo.load_url(model_urls['resnet50']))
+    return model
+
+
+def resnet101(pretrained=False, **kwargs):
+    """Constructs a ResNet-101 model.
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+    """
+    model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
+    if pretrained:
+        model.load_state_dict(model_zoo.load_url(model_urls['resnet101']))
+    return model
+
+
+def resnet152(pretrained=False, **kwargs):
+    """Constructs a ResNet-152 model.
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+    """
+    model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs)
+    if pretrained:
+        model.load_state_dict(model_zoo.load_url(model_urls['resnet152']))
+    return model
diff --git a/image_resnet/pytorch/run.sh b/image_resnet/pytorch/run.sh
new file mode 100644
index 0000000..d76ca1d
--- /dev/null
+++ b/image_resnet/pytorch/run.sh
@@ -0,0 +1,2 @@
+export CUDA_VISIBLE_DEVICES=0
+python train_resnet.py --batch-size=12 --datadir=.
diff --git a/image_resnet/pytorch/train_resnet.py b/image_resnet/pytorch/train_resnet.py
new file mode 100644
index 0000000..34ced82
--- /dev/null
+++ b/image_resnet/pytorch/train_resnet.py
@@ -0,0 +1,527 @@
+import argparse
+import os
+import shutil
+import time
+import math
+
+import torch
+import torch.nn as nn
+import torch.nn.parallel
+import torch.backends.cudnn as cudnn
+import torch.optim
+import torch.utils.data
+import torchvision.transforms as transforms
+import torchvision.datasets as datasets
+import torchvision.models as models
+import resnet
+
+model_names = sorted(name for name in models.__dict__
+                     if name.islower() and not name.startswith("__") and
+                     callable(models.__dict__[name]))
+
+parser = argparse.ArgumentParser(description='PyTorch ImageNet Training')
+parser.add_argument(
+    '-j',
+    '--workers',
+    default=8,
+    type=int,
+    metavar='N',
+    help='number of data loading workers (default: 4)')
+parser.add_argument(
+    '--epochs',
+    default=120,
+    type=int,
+    metavar='N',
+    help='number of total epochs to run')
+parser.add_argument(
+    '--start-epoch',
+    default=0,
+    type=int,
+    metavar='N',
+    help='manual epoch number (useful on restarts)')
+parser.add_argument(
+    '-b',
+    '--batch-size',
+    default=48,
+    type=int,
+    metavar='N',
+    help='mini-batch size (default: 256)')
+parser.add_argument(
+    '--lr',
+    '--learning-rate',
+    default=0.1,
+    type=float,
+    metavar='LR',
+    help='initial learning rate')
+parser.add_argument(
+    '--momentum', default=0.9, type=float, metavar='M', help='momentum')
+parser.add_argument(
+    '--weight-decay',
+    '--wd',
+    default=1e-4,
+    type=float,
+    metavar='W',
+    help='weight decay (default: 1e-4)')
+parser.add_argument(
+    '--print-freq',
+    '-p',
+    default=10,
+    type=int,
+    metavar='N',
+    help='print frequency (default: 10)')
+parser.add_argument(
+    '--resume',
+    default='',
+    type=str,
+    metavar='PATH',
+    help='path to latest checkpoint (default: none)')
+parser.add_argument(
+    '--datadir',
+    default=None,
+    type=str,
+    metavar='PATH',
+    help='path to train data(default: ./data)')
+parser.add_argument(
+    '-e',
+    '--evaluate',
+    dest='evaluate',
+    action='store_true',
+    help='evaluate model on validation set')
+parser.add_argument(
+    '--pretrained',
+    dest='pretrained',
+    action='store_true',
+    help='use pre-trained model')
+parser.add_argument(
+    '--weights',
+    default='',
+    type=str,
+    metavar='weights',
+    help='pretrained model')
+
+parser.add_argument(
+    '--n_iterate',
+    type=int,
+    default=26000,
+    help='max number of iterate per epoch')
+parser.add_argument(
+    '--valid_iterate',
+    type=int,
+    default=1800,
+    help='max number of iterate per epoch')
+
+best_prec1 = 0
+
+
+class ReduceLROnPlateau(object):
+    """Reduce learning rate when a metric has stopped improving.
+    Models often benefit from reducing the learning rate by a factor
+    of 2-10 once learning stagnates. This scheduler reads a metrics
+    quantity and if no improvement is seen for a 'patience' number
+    of epochs, the learning rate is reduced.
+
+    Args:
+        optimizer (Optimizer): Wrapped optimizer.
+        mode (str): One of `min`, `max`. In `min` mode, lr will
+            be reduced when the quantity monitored has stopped
+            decreasing; in `max` mode it will be reduced when the
+            quantity monitored has stopped increasing. Default: 'min'.
+        factor (float): Factor by which the learning rate will be
+            reduced. new_lr = lr * factor. Default: 0.1.
+        patience (int): Number of epochs with no improvement after
+            which learning rate will be reduced. Default: 10.
+        verbose (bool): If True, prints a message to stdout for
+            each update. Default: False.
+        threshold (float): Threshold for measuring the new optimum,
+            to only focus on significant changes. Default: 1e-4.
+        threshold_mode (str): One of `rel`, `abs`. In `rel` mode,
+            dynamic_threshold = best * ( 1 + threshold ) in 'max'
+            mode or best * ( 1 - threshold ) in `min` mode.
+            In `abs` mode, dynamic_threshold = best + threshold in
+            `max` mode or best - threshold in `min` mode. Default: 'rel'.
+        cooldown (int): Number of epochs to wait before resuming
+            normal operation after lr has been reduced. Default: 0.
+        min_lr (float or list): A scalar or a list of scalars. A
+            lower bound on the learning rate of all param groups
+            or each group respectively. Default: 0.
+        eps (float): Minimal decay applied to lr. If the difference
+            between new and old lr is smaller than eps, the update is
+            ignored. Default: 1e-8.
+
+    Example:
+        >>> optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9)
+        >>> scheduler = torch.optim.ReduceLROnPlateau(optimizer, 'min')
+        >>> for epoch in range(10):
+        >>>     train(...)
+        >>>     val_loss = validate(...)
+        >>>     # Note that step should be called after validate()
+        >>>     scheduler.step(val_loss)
+    """
+
+    def __init__(self,
+                 optimizer,
+                 mode='min',
+                 factor=0.1,
+                 patience=10,
+                 verbose=False,
+                 threshold=1e-4,
+                 threshold_mode='rel',
+                 cooldown=0,
+                 min_lr=0,
+                 eps=1e-8):
+
+        if factor >= 1.0:
+            raise ValueError('Factor should be < 1.0.')
+        self.factor = factor
+        self.optimizer = optimizer
+
+        if isinstance(min_lr, list) or isinstance(min_lr, tuple):
+            if len(min_lr) != len(optimizer.param_groups):
+                raise ValueError("expected {} min_lrs, got {}".format(
+                    len(optimizer.param_groups), len(min_lr)))
+            self.min_lrs = list(min_lr)
+        else:
+            self.min_lrs = [min_lr] * len(optimizer.param_groups)
+
+        self.patience = patience
+        self.verbose = verbose
+        self.cooldown = cooldown
+        self.cooldown_counter = 0
+        self.mode = mode
+        self.threshold = threshold
+        self.threshold_mode = threshold_mode
+        self.best = None
+        self.num_bad_epochs = None
+        self.mode_worse = None  # the worse value for the chosen mode
+        self.is_better = None
+        self.eps = eps
+        self.last_epoch = -1
+        self._init_is_better(
+            mode=mode, threshold=threshold, threshold_mode=threshold_mode)
+        self._reset()
+
+    def _reset(self):
+        """Resets num_bad_epochs counter and cooldown counter."""
+        self.best = self.mode_worse
+        self.cooldown_counter = 0
+        self.num_bad_epochs = 0
+
+    def step(self, metrics, epoch=None):
+        current = metrics
+        if epoch is None:
+            epoch = self.last_epoch = self.last_epoch + 1
+        self.last_epoch = epoch
+
+        if self.is_better(current, self.best):
+            self.best = current
+            self.num_bad_epochs = 0
+        else:
+            self.num_bad_epochs += 1
+
+        if self.in_cooldown:
+            self.cooldown_counter -= 1
+            self.num_bad_epochs = 0  # ignore any bad epochs in cooldown
+
+        if self.num_bad_epochs > self.patience:
+            self._reduce_lr(epoch)
+            self.cooldown_counter = self.cooldown
+            self.num_bad_epochs = 0
+
+    def _reduce_lr(self, epoch):
+        for i, param_group in enumerate(self.optimizer.param_groups):
+            old_lr = float(param_group['lr'])
+            new_lr = max(old_lr * self.factor, self.min_lrs[i])
+            if old_lr - new_lr > self.eps:
+                param_group['lr'] = new_lr
+                if self.verbose:
+                    print('Epoch {:5d}: reducing learning rate'
+                          ' of group {} to {:.4e}.'.format(epoch, i, new_lr))
+
+    @property
+    def in_cooldown(self):
+        return self.cooldown_counter > 0
+
+    def _init_is_better(self, mode, threshold, threshold_mode):
+        if mode not in {'min', 'max'}:
+            raise ValueError('mode ' + mode + ' is unknown!')
+        if threshold_mode not in {'rel', 'abs'}:
+            raise ValueError('threshold mode ' + mode + ' is unknown!')
+        if mode == 'min' and threshold_mode == 'rel':
+            rel_epsilon = 1. - threshold
+            self.is_better = lambda a, best: a < best * rel_epsilon
+            self.mode_worse = float('Inf')
+        elif mode == 'min' and threshold_mode == 'abs':
+            self.is_better = lambda a, best: a < best - threshold
+            self.mode_worse = float('Inf')
+        elif mode == 'max' and threshold_mode == 'rel':
+            rel_epsilon = threshold + 1.
+            self.is_better = lambda a, best: a > best * rel_epsilon
+            self.mode_worse = -float('Inf')
+        else:  # mode == 'max' and epsilon_mode == 'abs':
+            self.is_better = lambda a, best: a > best + threshold
+            self.mode_worse = -float('Inf')
+
+
+def main():
+    global args, best_prec1
+    args = parser.parse_args()
+    model = resnet.resnet152(num_classes=1000)
+
+    # create model
+    normalize = transforms.Normalize(
+        mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+
+    model = torch.nn.DataParallel(model).cuda()
+    #model = model.cuda()
+    torch.backends.cudnn.benchmark = True
+
+    # define loss function (criterion) and optimizer
+    criterion = nn.CrossEntropyLoss().cuda()
+    optimizer = torch.optim.SGD(model.parameters(),
+                                args.lr,
+                                momentum=args.momentum,
+                                weight_decay=args.weight_decay)
+
+    # optionally resume from a checkpoint
+    if args.resume:
+        if os.path.isfile(args.resume):
+            print("=> loading checkpoint '{}'".format(args.resume))
+
+            checkpoint = torch.load(args.resume)
+            args.start_epoch = 1
+
+            best_prec1 = checkpoint['best_prec1']
+            #optimizer.load_state_dict(checkpoint['optimizer'])
+
+            model_dict = model.state_dict()
+            checkpoint = {
+                k: v
+                for k, v in checkpoint['state_dict'].items() if k in model_dict
+            }
+
+            model_dict.update(checkpoint)
+            model.load_state_dict(model_dict)
+            #print("=> loaded checkpoint '{}' (epoch {})"
+            #      .format(args.resume, checkpoint['epoch'])) 
+        else:
+            print("=> no checkpoint found at '{}'".format(args.resume))
+    cudnn.benchmark = True
+
+    traindir = args.datadir
+    valdir = traindir
+    if traindir is None:
+        print("please set --datadir")
+        exit(1)
+
+    train_dataset = datasets.ImageFolder(traindir,
+                                         transforms.Compose([
+                                             transforms.RandomSizedCrop(224),
+                                             transforms.RandomHorizontalFlip(),
+                                             transforms.ToTensor(),
+                                             normalize,
+                                         ]))
+    train_loader = torch.utils.data.DataLoader(
+        train_dataset,
+        batch_size=args.batch_size,
+        shuffle=True,
+        num_workers=args.workers,
+        pin_memory=True)
+
+    val_loader = torch.utils.data.DataLoader(
+        datasets.ImageFolder(valdir,
+                             transforms.Compose([
+                                 transforms.Scale(243),
+                                 transforms.CenterCrop(224),
+                                 transforms.ToTensor(),
+                                 normalize,
+                             ])),
+        batch_size=args.batch_size,
+        shuffle=False,
+        num_workers=args.workers,
+        pin_memory=True)
+
+    if args.evaluate:
+        validate(val_loader, model, criterion)
+        return
+
+    for epoch in range(args.start_epoch, args.epochs):
+        adjust_learning_rate(optimizer, epoch, args.epochs)
+
+        #print_learning_rate(optimizer,epoch)
+        # train for one epoch
+
+        train(train_loader, model, criterion, optimizer, epoch)
+
+        # evaluate on validation set
+
+        prec1, val_loss = validate(val_loader, model, criterion)
+        #scheduler.step(val_loss)
+        # remember best prec@1 and save checkpoint
+        is_best = prec1 > best_prec1
+        best_prec1 = max(prec1, best_prec1)
+        print "best top1 :" + str(best_prec1)
+        save_checkpoint({
+            'epoch': epoch + 1,
+            'state_dict': model.state_dict(),
+            'best_prec1': best_prec1,
+            'optimizer': optimizer.state_dict(),
+        }, is_best)
+
+
+def train(train_loader, model, criterion, optimizer, epoch):
+    batch_time = AverageMeter()
+    data_time = AverageMeter()
+    losses = AverageMeter()
+    top1 = AverageMeter()
+    top5 = AverageMeter()
+
+    # switch to train mode
+    model.train()
+
+    end = time.time()
+    for i, (input, target) in enumerate(train_loader):
+        # measure data loading time
+        data_time.update(time.time() - end)
+
+        input_image = input.cuda()
+        #print input.size()
+        target = target.cuda(async=True)
+
+        input_var = torch.autograd.Variable(input_image)
+        target_var = torch.autograd.Variable(target)
+
+        # compute output
+        output = model(input_var)
+        loss = criterion(output, target_var)
+        # measure accuracy and record loss
+        prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
+        losses.update(loss.data[0], input.size(0))
+        top1.update(prec1[0], input.size(0))
+        top5.update(prec5[0], input.size(0))
+
+        # compute gradient and do SGD step
+        optimizer.zero_grad()
+        loss.backward()
+        optimizer.step()
+
+        # measure elapsed time
+        batch_time.update(time.time() - end)
+        end = time.time()
+
+        if i % args.print_freq == 0:
+            print('Epoch: [{0}][{1}]\t'
+                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
+                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
+                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
+                  'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
+                  'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
+                      epoch,
+                      i,
+                      batch_time=batch_time,
+                      data_time=data_time,
+                      loss=losses,
+                      top1=top1,
+                      top5=top5))
+
+
+def validate(val_loader, model, criterion):
+    batch_time = AverageMeter()
+    losses = AverageMeter()
+    output_top1 = AverageMeter()
+    output_top5 = AverageMeter()
+    # switch to evaluate mode
+    model.eval()
+
+    end = time.time()
+    for i, (input, target) in enumerate(val_loader):
+        target = target.cuda(async=True)
+        input_var = torch.autograd.Variable(input, volatile=True)
+        target_var = torch.autograd.Variable(target, volatile=True)
+
+        # compute output
+        output = model(input_var)
+        loss = criterion(output, target_var)
+
+        losses.update(loss.data[0], input.size(0))
+        #softmax=torch.nn.Softmax()
+
+        # measure accuracy and record loss
+        prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
+        output_top1.update(prec1[0], input.size(0))
+        output_top5.update(prec5[0], input.size(0))
+
+        # measure elapsed time
+        batch_time.update(time.time() - end)
+        end = time.time()
+
+        if i % args.print_freq == 0:
+            print('Test: [{0}]\t'
+                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
+                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
+                  'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
+                  'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
+                      i,
+                      batch_time=batch_time,
+                      loss=losses,
+                      top1=output_top1,
+                      top5=output_top5))
+    print(' * Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f}'.format(
+        top1=output_top1, top5=output_top5))
+    return output_top1.avg, losses.avg
+
+
+def save_checkpoint(state, is_best, filename='output/checkpoint.pth.tar'):
+    torch.save(state, filename)
+    if is_best:
+        shutil.copyfile(filename, 'output/model_best.pth.tar')
+
+
+class AverageMeter(object):
+    """Computes and stores the average and current value"""
+
+    def __init__(self):
+        self.reset()
+
+    def reset(self):
+        self.val = 0
+        self.avg = 0
+        self.sum = 0
+        self.count = 0
+
+    def update(self, val, n=1):
+        self.val = val
+        self.sum += val * n
+        self.count += n
+        self.avg = self.sum / self.count
+
+
+def adjust_learning_rate(optimizer, epoch, nepoch):
+    """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
+    lr = 0.1**(epoch // 30)
+    for i, param_group in enumerate(optimizer.param_groups):
+        param_group['lr'] = lr
+
+
+def print_learning_rate(optimizer, epoch):
+    for param_group in optimizer.param_groups:
+        print str(epoch) + ' epoch learning_rate:' + str(param_group['lr'])
+
+
+def accuracy(output, target, topk=(1, )):
+    """Computes the precision@k for the specified values of k"""
+    maxk = max(topk)
+    batch_size = target.size(0)
+
+    _, pred = output.topk(maxk, 1, True, True)
+    pred = pred.t()
+    correct = pred.eq(target.view(1, -1).expand_as(pred))
+
+    res = []
+    for k in topk:
+        correct_k = correct[:k].view(-1).float().sum(0)
+        res.append(correct_k.mul_(100.0 / batch_size))
+    return res
+
+
+if __name__ == '__main__':
+    main()