From 71ea9a66db41ef7fcce355de5afd22e7313cae2e Mon Sep 17 00:00:00 2001 From: Akhil V Date: Wed, 10 Jul 2019 00:28:12 -0400 Subject: [PATCH 01/13] Updated everything for Python3.6+ and TensorFlow1.13+ --- README.md | 5 +- demo/birds_skip_thought_demo.py | 91 ++--- demo/demo.py | 93 ++--- misc/config.py | 5 +- misc/custom_ops.py | 387 +++++++++++++------ misc/datasets.py | 49 +-- misc/preprocess_birds.py | 11 +- misc/preprocess_flowers.py | 7 +- misc/tf_upgrade.py | 665 ++++++++++++++++++++++++++++++++ misc/utils.py | 23 +- stageI/model.py | 322 +++++++--------- stageI/run_exp.py | 41 +- stageI/trainer.py | 268 +++++-------- stageII/cfg/birds.yml | 6 +- stageII/model.py | 445 ++++++++++----------- stageII/run_exp.py | 43 +-- stageII/trainer.py | 407 +++++++------------ 17 files changed, 1702 insertions(+), 1166 deletions(-) create mode 100644 misc/tf_upgrade.py diff --git a/README.md b/README.md index 4800bd2..227ed1b 100755 --- a/README.md +++ b/README.md @@ -11,16 +11,15 @@ Tensorflow implementation for reproducing main results in the paper [StackGAN: T ### Dependencies -python 2.7 +python 3.6+ -[TensorFlow 0.12](https://www.tensorflow.org/get_started/os_setup) +[TensorFlow 1.13+](https://www.tensorflow.org/get_started/os_setup) [Optional] [Torch](http://torch.ch/docs/getting-started.html#_) is needed, if use the pre-trained char-CNN-RNN text encoder. [Optional] [skip-thought](https://github.com/ryankiros/skip-thoughts) is needed, if use the skip-thought text encoder. In addition, please add the project folder to PYTHONPATH and `pip install` the following packages: -- `prettytensor` - `progressbar` - `python-dateutil` - `easydict` diff --git a/demo/birds_skip_thought_demo.py b/demo/birds_skip_thought_demo.py index cddb21f..3d3f475 100644 --- a/demo/birds_skip_thought_demo.py +++ b/demo/birds_skip_thought_demo.py @@ -1,30 +1,30 @@ from __future__ import division from __future__ import print_function -import prettytensor as pt +# import prettytensor as pt import tensorflow as tf import numpy as np -import scipy.misc +import imageio +# import scipy.misc import os import argparse from PIL import Image, ImageDraw, ImageFont -from misc.config import cfg, cfg_from_file -from misc.utils import mkdir_p -from misc import skipthoughts -from stageII.model import CondGAN +import sys +sys.path.append('./misc') +sys.path.append('stageII') + +from config import cfg, cfg_from_file +from utils import mkdir_p +from model import CondGAN +from skimage.transform import resize def parse_args(): parser = argparse.ArgumentParser(description='Train a GAN network') - parser.add_argument('--cfg', dest='cfg_file', - help='optional config file', - default=None, type=str) - parser.add_argument('--gpu', dest='gpu_id', - help='GPU device id to use [0]', - default=-1, type=int) - parser.add_argument('--caption_path', type=str, default=None, - help='Path to the file with text sentences') + parser.add_argument('--cfg', dest='cfg_file', help='optional config file', default=None, type=str) + parser.add_argument('--gpu', dest='gpu_id', help='GPU device id to use [0]', default=-1, type=int) + parser.add_argument('--caption_path', type=str, default=None, help='Path to the file with text sentences') # if len(sys.argv) == 1: # parser.print_help() # sys.exit(1) @@ -49,21 +49,17 @@ def sample_encoded_context(embeddings, model, bAugmentation=True): def build_model(sess, embedding_dim, batch_size): - model = CondGAN( - lr_imsize=cfg.TEST.LR_IMSIZE, - hr_lr_ratio=int(cfg.TEST.HR_IMSIZE/cfg.TEST.LR_IMSIZE)) - - embeddings = tf.placeholder( - tf.float32, [batch_size, embedding_dim], - name='conditional_embeddings') - with pt.defaults_scope(phase=pt.Phase.test): - with tf.variable_scope("g_net"): - c = sample_encoded_context(embeddings, model) - z = tf.random_normal([batch_size, cfg.Z_DIM]) - fake_images = model.get_generator(tf.concat(1, [c, z])) - with tf.variable_scope("hr_g_net"): - hr_c = sample_encoded_context(embeddings, model) - hr_fake_images = model.hr_get_generator(fake_images, hr_c) + model = CondGAN(lr_imsize=cfg.TEST.LR_IMSIZE, hr_lr_ratio=int(cfg.TEST.HR_IMSIZE/cfg.TEST.LR_IMSIZE)) + + embeddings = tf.placeholder(tf.float32, [batch_size, embedding_dim], name='conditional_embeddings') + # with pt.defaults_scope(phase=pt.Phase.test): + with tf.variable_scope("g_net"): + c = sample_encoded_context(embeddings, model) + z = tf.random_normal([batch_size, cfg.Z_DIM]) + fake_images = model.get_generator(tf.concat([c, z], 1,), False) + with tf.variable_scope("hr_g_net"): + hr_c = sample_encoded_context(embeddings, model) + hr_fake_images = model.hr_get_generator(fake_images, hr_c, False) ckt_path = cfg.TEST.PRETRAINED_MODEL if ckt_path.find('.ckpt') != -1: @@ -101,9 +97,7 @@ def drawCaption(img, caption): return img_txt -def save_super_images(sample_batchs, hr_sample_batchs, - captions_batch, batch_size, - startID, save_dir): +def save_super_images(sample_batchs, hr_sample_batchs, captions_batch, batch_size, startID, save_dir): if not os.path.isdir(save_dir): print('Make a new folder: ', save_dir) mkdir_p(save_dir) @@ -119,7 +113,7 @@ def save_super_images(sample_batchs, hr_sample_batchs, lr_img = sample_batchs[i][j] hr_img = hr_sample_batchs[i][j] hr_img = (hr_img + 1.0) * 127.5 - re_sample = scipy.misc.imresize(lr_img, hr_img.shape[:2]) + re_sample = resize(lr_img, hr_img.shape[:2]) row1.append(re_sample) row2.append(hr_img) row1 = np.concatenate(row1, axis=1) @@ -134,27 +128,23 @@ def save_super_images(sample_batchs, hr_sample_batchs, lr_img = sample_batchs[i][j] hr_img = hr_sample_batchs[i][j] hr_img = (hr_img + 1.0) * 127.5 - re_sample = scipy.misc.imresize(lr_img, hr_img.shape[:2]) + re_sample = resize(lr_img, hr_img.shape[:2]) row1.append(re_sample) row2.append(hr_img) row1 = np.concatenate(row1, axis=1) row2 = np.concatenate(row2, axis=1) super_row = np.concatenate([row1, row2], axis=0) superimage2 = np.zeros_like(superimage) - superimage2[:super_row.shape[0], - :super_row.shape[1], - :super_row.shape[2]] = super_row + superimage2[:super_row.shape[0], :super_row.shape[1], :super_row.shape[2]] = super_row mid_padding = np.zeros((64, superimage.shape[1], 3)) - superimage =\ - np.concatenate([superimage, mid_padding, superimage2], axis=0) + superimage = np.concatenate([superimage, mid_padding, superimage2], axis=0) top_padding = np.zeros((128, superimage.shape[1], 3)) - superimage =\ - np.concatenate([top_padding, superimage], axis=0) + superimage = np.concatenate([top_padding, superimage], axis=0) fullpath = '%s/sentence%d.jpg' % (save_dir, startID + j) superimage = drawCaption(np.uint8(superimage), captions_batch[j]) - scipy.misc.imsave(fullpath, superimage) + imageio.imsave(fullpath, superimage) if __name__ == "__main__": @@ -188,8 +178,8 @@ def save_super_images(sample_batchs, hr_sample_batchs, config = tf.ConfigProto(allow_soft_placement=True) with tf.Session(config=config) as sess: with tf.device("/gpu:%d" % cfg.GPU_ID): - embeddings_holder, fake_images_opt, hr_fake_images_opt =\ - build_model(sess, embeddings.shape[-1], batch_size) + embeddings_holder, fake_images_opt, hr_fake_images_opt = build_model(sess, embeddings.shape[-1], + batch_size) count = 0 while count < num_embeddings: @@ -205,19 +195,14 @@ def save_super_images(sample_batchs, hr_sample_batchs, # Generate up to 16 images for each sentence with # randomness from noise z and conditioning augmentation. for i in range(np.minimum(16, cfg.TEST.NUM_COPY)): - hr_samples, samples =\ - sess.run([hr_fake_images_opt, fake_images_opt], - {embeddings_holder: embeddings_batch}) + hr_samples, samples = sess.run([hr_fake_images_opt, fake_images_opt], + {embeddings_holder: embeddings_batch}) samples_batchs.append(samples) hr_samples_batchs.append(hr_samples) - save_super_images(samples_batchs, - hr_samples_batchs, - captions_batch, - batch_size, - count, save_dir) + save_super_images(samples_batchs, hr_samples_batchs, captions_batch, batch_size, count, save_dir) count += batch_size print('Finish generating samples for %d sentences:' % num_embeddings) print('Example sentences:') - for i in xrange(np.minimum(10, num_embeddings)): + for i in range(np.minimum(10, num_embeddings)): print('Sentence %d: %s' % (i, captions_list[i])) diff --git a/demo/demo.py b/demo/demo.py index 6f21a72..c893aac 100644 --- a/demo/demo.py +++ b/demo/demo.py @@ -1,31 +1,32 @@ from __future__ import division from __future__ import print_function -import prettytensor as pt +# import prettytensor as pt import tensorflow as tf import numpy as np -import scipy.misc +import imageio +# import scipy.misc import os import argparse import torchfile from PIL import Image, ImageDraw, ImageFont import re -from misc.config import cfg, cfg_from_file -from misc.utils import mkdir_p -from stageII.model import CondGAN +import sys +sys.path.append('./misc') +sys.path.append('stageII') + +from config import cfg, cfg_from_file +from utils import mkdir_p +from model import CondGAN +from skimage.transform import resize def parse_args(): parser = argparse.ArgumentParser(description='Train a GAN network') - parser.add_argument('--cfg', dest='cfg_file', - help='optional config file', - default=None, type=str) - parser.add_argument('--gpu', dest='gpu_id', - help='GPU device id to use [0]', - default=-1, type=int) - parser.add_argument('--caption_path', type=str, default=None, - help='Path to the file with text sentences') + parser.add_argument('--cfg', dest='cfg_file', help='optional config file', default=None, type=str) + parser.add_argument('--gpu', dest='gpu_id', help='GPU device id to use [0]', default=-1, type=int) + parser.add_argument('--caption_path', type=str, default=None, help='Path to the file with text sentences') # if len(sys.argv) == 1: # parser.print_help() # sys.exit(1) @@ -50,26 +51,23 @@ def sample_encoded_context(embeddings, model, bAugmentation=True): def build_model(sess, embedding_dim, batch_size): - model = CondGAN( - lr_imsize=cfg.TEST.LR_IMSIZE, - hr_lr_ratio=int(cfg.TEST.HR_IMSIZE/cfg.TEST.LR_IMSIZE)) - - embeddings = tf.placeholder( - tf.float32, [batch_size, embedding_dim], - name='conditional_embeddings') - with pt.defaults_scope(phase=pt.Phase.test): - with tf.variable_scope("g_net"): - c = sample_encoded_context(embeddings, model) - z = tf.random_normal([batch_size, cfg.Z_DIM]) - fake_images = model.get_generator(tf.concat(1, [c, z])) - with tf.variable_scope("hr_g_net"): - hr_c = sample_encoded_context(embeddings, model) - hr_fake_images = model.hr_get_generator(fake_images, hr_c) + model = CondGAN(lr_imsize=cfg.TEST.LR_IMSIZE, hr_lr_ratio=int(cfg.TEST.HR_IMSIZE/cfg.TEST.LR_IMSIZE)) + + embeddings = tf.placeholder(tf.float32, [batch_size, embedding_dim], name='conditional_embeddings') + # with pt.defaults_scope(phase=pt.Phase.test): + with tf.variable_scope("g_net"): + c = sample_encoded_context(embeddings, model) + z = tf.random_normal([batch_size, cfg.Z_DIM]) + fake_images = model.get_generator(tf.concat([c, z], 1), False) + with tf.variable_scope("hr_g_net"): + hr_c = sample_encoded_context(embeddings, model) + hr_fake_images = model.hr_get_generator(fake_images, hr_c, False) ckt_path = cfg.TEST.PRETRAINED_MODEL if ckt_path.find('.ckpt') != -1: print("Reading model parameters from %s" % ckt_path) - saver = tf.train.Saver(tf.all_variables()) + saver = tf.train.Saver(tf.global_variables()) + # saver = tf.train.Saver(tf.all_variables()) saver.restore(sess, ckt_path) else: print("Input a valid model path.") @@ -102,9 +100,7 @@ def drawCaption(img, caption): return img_txt -def save_super_images(sample_batchs, hr_sample_batchs, - captions_batch, batch_size, - startID, save_dir): +def save_super_images(sample_batchs, hr_sample_batchs, captions_batch, batch_size, startID, save_dir): if not os.path.isdir(save_dir): print('Make a new folder: ', save_dir) mkdir_p(save_dir) @@ -123,7 +119,7 @@ def save_super_images(sample_batchs, hr_sample_batchs, lr_img = sample_batchs[i][j] hr_img = hr_sample_batchs[i][j] hr_img = (hr_img + 1.0) * 127.5 - re_sample = scipy.misc.imresize(lr_img, hr_img.shape[:2]) + re_sample = resize(lr_img, hr_img.shape[:2]) row1.append(re_sample) row2.append(hr_img) row1 = np.concatenate(row1, axis=1) @@ -138,27 +134,23 @@ def save_super_images(sample_batchs, hr_sample_batchs, lr_img = sample_batchs[i][j] hr_img = hr_sample_batchs[i][j] hr_img = (hr_img + 1.0) * 127.5 - re_sample = scipy.misc.imresize(lr_img, hr_img.shape[:2]) + re_sample = resize(lr_img, hr_img.shape[:2]) row1.append(re_sample) row2.append(hr_img) row1 = np.concatenate(row1, axis=1) row2 = np.concatenate(row2, axis=1) super_row = np.concatenate([row1, row2], axis=0) superimage2 = np.zeros_like(superimage) - superimage2[:super_row.shape[0], - :super_row.shape[1], - :super_row.shape[2]] = super_row + superimage2[:super_row.shape[0], :super_row.shape[1], :super_row.shape[2]] = super_row mid_padding = np.zeros((64, superimage.shape[1], 3)) - superimage =\ - np.concatenate([superimage, mid_padding, superimage2], axis=0) + superimage = np.concatenate([superimage, mid_padding, superimage2], axis=0) top_padding = np.zeros((128, superimage.shape[1], 3)) - superimage =\ - np.concatenate([top_padding, superimage], axis=0) + superimage = np.concatenate([top_padding, superimage], axis=0) fullpath = '%s/sentence%d.jpg' % (save_dir, startID + j) superimage = drawCaption(np.uint8(superimage), captions_batch[j]) - scipy.misc.imsave(fullpath, superimage) + imageio.imwrite(fullpath, superimage) if __name__ == "__main__": @@ -188,8 +180,8 @@ def save_super_images(sample_batchs, hr_sample_batchs, config = tf.ConfigProto(allow_soft_placement=True) with tf.Session(config=config) as sess: with tf.device("/gpu:%d" % cfg.GPU_ID): - embeddings_holder, fake_images_opt, hr_fake_images_opt =\ - build_model(sess, embeddings.shape[-1], batch_size) + embeddings_holder, fake_images_opt, hr_fake_images_opt = build_model(sess, embeddings.shape[-1], + batch_size) count = 0 while count < num_embeddings: @@ -205,19 +197,14 @@ def save_super_images(sample_batchs, hr_sample_batchs, # Generate up to 16 images for each sentence with # randomness from noise z and conditioning augmentation. for i in range(np.minimum(16, cfg.TEST.NUM_COPY)): - hr_samples, samples =\ - sess.run([hr_fake_images_opt, fake_images_opt], - {embeddings_holder: embeddings_batch}) + hr_samples, samples = sess.run([hr_fake_images_opt, fake_images_opt], + {embeddings_holder: embeddings_batch}) samples_batchs.append(samples) hr_samples_batchs.append(hr_samples) - save_super_images(samples_batchs, - hr_samples_batchs, - captions_batch, - batch_size, - count, save_dir) + save_super_images(samples_batchs, hr_samples_batchs, captions_batch, batch_size, count, save_dir) count += batch_size print('Finish generating samples for %d sentences:' % num_embeddings) print('Example sentences:') - for i in xrange(np.minimum(10, num_embeddings)): + for i in range(np.minimum(10, num_embeddings)): print('Sentence %d: %s' % (i, captions_list[i])) diff --git a/misc/config.py b/misc/config.py index 3ff777b..707fabe 100644 --- a/misc/config.py +++ b/misc/config.py @@ -66,9 +66,10 @@ def _merge_a_into_b(a, b): if type(a) is not edict: return - for k, v in a.iteritems(): + for k, v in a.items(): # a must specify keys that are in b - if not b.has_key(k): + if k not in b: + #if not b.has_key(k): raise KeyError('{} is not a valid config key'.format(k)) # the types must match, too diff --git a/misc/custom_ops.py b/misc/custom_ops.py index 11b48e8..6d3994a 100644 --- a/misc/custom_ops.py +++ b/misc/custom_ops.py @@ -5,132 +5,271 @@ from __future__ import division from __future__ import print_function -import prettytensor as pt +# import prettytensor as pt +# from prettytensor.pretty_tensor_class import Phase from tensorflow.python.training import moving_averages import tensorflow as tf -from prettytensor.pretty_tensor_class import Phase import numpy as np -class conv_batch_norm(pt.VarStoreMethod): - """Code modification of: - http://stackoverflow.com/questions/33949786/how-could-i-use-batch-normalization-in-tensorflow - and - https://github.com/tensorflow/models/blob/master/inception/inception/slim/ops.py""" - - def __call__(self, input_layer, epsilon=1e-5, decay=0.9, name="batch_norm", - in_dim=None, phase=Phase.train): - shape = input_layer.shape - shp = in_dim or shape[-1] - with tf.variable_scope(name) as scope: - self.mean = self.variable('mean', [shp], init=tf.constant_initializer(0.), train=False) - self.variance = self.variable('variance', [shp], init=tf.constant_initializer(1.0), train=False) - - self.gamma = self.variable("gamma", [shp], init=tf.random_normal_initializer(1., 0.02)) - self.beta = self.variable("beta", [shp], init=tf.constant_initializer(0.)) - - if phase == Phase.train: - mean, variance = tf.nn.moments(input_layer.tensor, [0, 1, 2]) - mean.set_shape((shp,)) - variance.set_shape((shp,)) - - update_moving_mean = moving_averages.assign_moving_average(self.mean, mean, decay) - update_moving_variance = moving_averages.assign_moving_average(self.variance, variance, decay) - - with tf.control_dependencies([update_moving_mean, update_moving_variance]): - normalized_x = tf.nn.batch_norm_with_global_normalization( - input_layer.tensor, mean, variance, self.beta, self.gamma, epsilon, - scale_after_normalization=True) - else: - normalized_x = tf.nn.batch_norm_with_global_normalization( - input_layer.tensor, self.mean, self.variance, - self.beta, self.gamma, epsilon, - scale_after_normalization=True) - return input_layer.with_tensor(normalized_x, parameters=self.vars) - - -pt.Register(assign_defaults=('phase'))(conv_batch_norm) - - -@pt.Register(assign_defaults=('phase')) -class fc_batch_norm(conv_batch_norm): - def __call__(self, input_layer, *args, **kwargs): - ori_shape = input_layer.shape - if ori_shape[0] is None: - ori_shape[0] = -1 - new_shape = [ori_shape[0], 1, 1, ori_shape[1]] - x = tf.reshape(input_layer.tensor, new_shape) - normalized_x = super(self.__class__, self).__call__(input_layer.with_tensor(x), *args, **kwargs) # input_layer) - return normalized_x.reshape(ori_shape) - - -def leaky_rectify(x, leakiness=0.01): - assert leakiness <= 1 - ret = tf.maximum(x, leakiness * x) - # import ipdb; ipdb.set_trace() - return ret - - -@pt.Register -class custom_conv2d(pt.VarStoreMethod): - def __call__(self, input_layer, output_dim, - k_h=5, k_w=5, d_h=2, d_w=2, stddev=0.02, in_dim=None, padding='SAME', - name="conv2d"): - with tf.variable_scope(name): - w = self.variable('w', [k_h, k_w, in_dim or input_layer.shape[-1], output_dim], - init=tf.truncated_normal_initializer(stddev=stddev)) - conv = tf.nn.conv2d(input_layer.tensor, w, strides=[1, d_h, d_w, 1], padding=padding) - - # biases = self.variable('biases', [output_dim], init=tf.constant_initializer(0.0)) - # import ipdb; ipdb.set_trace() - # return input_layer.with_tensor(tf.nn.bias_add(conv, biases), parameters=self.vars) - return input_layer.with_tensor(conv, parameters=self.vars) - - -@pt.Register -class custom_deconv2d(pt.VarStoreMethod): - def __call__(self, input_layer, output_shape, - k_h=5, k_w=5, d_h=2, d_w=2, stddev=0.02, - name="deconv2d"): - output_shape[0] = input_layer.shape[0] - ts_output_shape = tf.pack(output_shape) - with tf.variable_scope(name): - # filter : [height, width, output_channels, in_channels] - w = self.variable('w', [k_h, k_w, output_shape[-1], input_layer.shape[-1]], - init=tf.random_normal_initializer(stddev=stddev)) - - try: - deconv = tf.nn.conv2d_transpose(input_layer, w, - output_shape=ts_output_shape, - strides=[1, d_h, d_w, 1]) - - # Support for versions of TensorFlow before 0.7.0 - except AttributeError: - deconv = tf.nn.deconv2d(input_layer, w, output_shape=ts_output_shape, - strides=[1, d_h, d_w, 1]) - - # biases = self.variable('biases', [output_shape[-1]], init=tf.constant_initializer(0.0)) - # deconv = tf.reshape(tf.nn.bias_add(deconv, biases), [-1] + output_shape[1:]) - deconv = tf.reshape(deconv, [-1] + output_shape[1:]) - - return deconv - - -@pt.Register -class custom_fully_connected(pt.VarStoreMethod): - def __call__(self, input_layer, output_size, scope=None, in_dim=None, stddev=0.02, bias_start=0.0): - shape = input_layer.shape - input_ = input_layer.tensor - try: - if len(shape) == 4: - input_ = tf.reshape(input_, tf.pack([tf.shape(input_)[0], np.prod(shape[1:])])) - input_.set_shape([None, np.prod(shape[1:])]) - shape = input_.get_shape().as_list() - - with tf.variable_scope(scope or "Linear"): - matrix = self.variable("Matrix", [in_dim or shape[1], output_size], dt=tf.float32, - init=tf.random_normal_initializer(stddev=stddev)) - bias = self.variable("bias", [output_size], init=tf.constant_initializer(bias_start)) - return input_layer.with_tensor(tf.matmul(input_, matrix) + bias, parameters=self.vars) - except Exception: - import ipdb; ipdb.set_trace() +def fc(inputs, num_out, name, activation_fn=None, reuse=None): + shape = inputs.get_shape() + if len(shape) == 4: + inputs = tf.reshape(inputs, tf.stack([tf.shape(inputs)[0], np.prod(shape[1:])])) + inputs.set_shape([None, np.prod(shape[1:])]) + + w_init = tf.random_normal_initializer(stddev=0.02) # tf.keras.layers.Dense + # return tf.layers.dense(inputs=inputs, units=num_out, activation=activation_fn, kernel_initializer=w_init, + # use_bias=biased, name=name) + return tf.contrib.layers.fully_connected(inputs, num_out, activation_fn=activation_fn, weights_initializer=w_init, + reuse=reuse, scope=name) + + +def concat(inputs, axis): + return tf.concat(values=inputs, axis=axis) + + +def conv_batch_normalization(inputs, name, epsilon=1e-5, in_dim=None, is_training=True, activation_fn=None, reuse=None): + return tf.contrib.layers.batch_norm(inputs, decay=0.9, center=True, scale=True, epsilon=epsilon, + activation_fn=activation_fn, + param_initializers={'beta': tf.constant_initializer(0.), + 'gamma': tf.random_normal_initializer(1., 0.02)}, + reuse=reuse, is_training=is_training, scope=name) + + # ema = tf.train.ExponentialMovingAverage(decay=0.9) + # shape = inputs.get_shape() + # shp = in_dim or shape[-1] + # mean, variance = tf.nn.moments(inputs, [0, 1, 2]) + # mean.set_shape((shp,)) + # variance.set_shape((shp,)) + # ema_apply_op = ema.apply([mean, variance]) + # + # with tf.variable_scope(name, reuse=reuse): + # gamma = tf.get_variable(name='gamma', shape=[shp], initializer=tf.random_normal_initializer(1., 0.02)) + # beta = tf.get_variable(name='beta', shape=[shp], initializer=tf.constant_initializer(0.)) + # + # if is_training: + # with tf.control_dependencies([ema_apply_op]): + # normalized_x = tf.nn.batch_normalization(inputs, mean, variance, beta, gamma, epsilon) + # else: + # normalized_x = tf.nn.batch_normalization(inputs, ema.average(mean), ema.average(variance), + # beta, gamma, epsilon) + # + # if activation_fn is not None: + # normalized_x = activation_fn(normalized_x) + # + # return normalized_x + + +def fc_batch_normalization(inputs, name, epsilon=1e-5, in_dim=None, is_training=True, activation_fn=None, reuse=None): + ori_shape = inputs.get_shape() + if ori_shape[0] is None: + ori_shape = -1 + new_shape = [ori_shape[0], 1, 1, ori_shape[1]] + x = tf.reshape(inputs, new_shape) + normalized_x = conv_batch_normalization(x, name, epsilon=epsilon, in_dim=in_dim, is_training=is_training, + activation_fn=activation_fn, reuse=reuse) + return tf.reshape(normalized_x, ori_shape) + + +def reshape(inputs, shape, name): + return tf.reshape(inputs, shape, name) + + +def Conv2d(inputs, k_h, k_w, c_o, s_h, s_w, name, activation_fn=None, reuse=None, padding='SAME', biased=False): + c_i = inputs.get_shape()[-1] + w_init = tf.random_normal_initializer(stddev=0.02) + + convolve = lambda i, k: tf.nn.conv2d(i, k, [1, s_h, s_w, 1], padding=padding) + with tf.variable_scope(name, reuse=reuse) as scope: + kernel = tf.get_variable(name='weights', shape=[k_h, k_w, c_i, c_o], initializer=w_init) + output = convolve(inputs, kernel) + + if biased: + biases = tf.get_variable(name='biases', shape=[c_o]) + output = tf.nn.bias_add(output, biases) + if activation_fn is not None: + output = activation_fn(output, name=scope.name) + + return output + + +def Deconv2d(inputs, output_shape, name, k_h, k_w, s_h=2, s_w=2, reuse=None, activation_fn=None, biased=False): + output_shape[0] = inputs.get_shape()[0] + ts_output_shape = tf.stack(output_shape) + w_init = tf.random_normal_initializer(stddev=0.02) + + deconvolve = lambda i, k: tf.nn.conv2d_transpose(i, k, output_shape=ts_output_shape, strides=[1, s_h, s_w, 1]) + with tf.variable_scope(name, reuse=reuse) as scope: + kernel = tf.get_variable(name='weights', shape=[k_h, k_w, output_shape[-1], inputs.get_shape()[-1]], + initializer=w_init) + output = deconvolve(inputs, kernel) + + if biased: + biases = tf.get_variable(name='biases', shape=[output_shape[-1]]) + output = tf.nn.bias_add(output, biases) + if activation_fn is not None: + output = activation_fn(output, name=scope.name) + + deconv = tf.reshape(output, [-1] + output_shape[1:]) + + return deconv + + +def add(inputs, name): + return tf.add_n(inputs, name=name) + + +def UpSample(inputs, size, method, align_corners, name): + return tf.image.resize_images(inputs, size, method, align_corners) + + +def flatten(inputs, name): + input_shape = inputs.get_shape() + dim = 1 + for d in input_shape[1:].as_list(): + dim *= d + inputs = tf.reshape(inputs, [-1, dim]) + + return inputs + + +# class conv_batch_norm(pt.VarStoreMethod): +# """Code modification of: +# http://stackoverflow.com/questions/33949786/how-could-i-use-batch-normalization-in-tensorflow +# and +# https://github.com/tensorflow/models/blob/master/inception/inception/slim/ops.py""" +# +# def __call__(self, input_layer, epsilon=1e-5, decay=0.9, name="batch_norm", in_dim=None, phase=Phase.train): +# shape = input_layer.shape +# shp = in_dim or shape[-1] +# +# self.gamma = self.variable("gamma", [shp], init=tf.random_normal_initializer(1., 0.02)) +# self.beta = self.variable("beta", [shp], init=tf.constant_initializer(0.)) +# +# self.mean, self.variance = tf.nn.moments(input_layer, [0, 1, 2]) +# # sigh...tf's shape system is so.. +# self.mean.set_shape((shp,)) +# self.variance.set_shape((shp,)) +# +# normalized_x = tf.nn.batch_normalization(input_layer, self.mean, self.variance, None, None, epsilon) +# return input_layer.with_tensor(normalized_x, parameters=self.vars) +# +# # def __call__(self, input_layer, epsilon=1e-5, momentum=0.1, name="batch_norm", +# # in_dim=None, phase=Phase.train): +# # self.ema = tf.train.ExponentialMovingAverage(decay=0.9) +# # +# # shape = input_layer.shape +# # shp = in_dim or shape[-1] +# # with tf.variable_scope (name) as scope: +# # self.gamma = self.variable ("gamma", [shp], init=tf.random_normal_initializer (1., 0.02)) +# # self.beta = self.variable ("beta", [shp], init=tf.constant_initializer (0.)) +# # +# # self.mean, self.variance = tf.nn.moments(input_layer.tensor, [0, 1, 2]) +# # # sigh...tf's shape system is so.. +# # self.mean.set_shape ((shp,)) +# # self.variance.set_shape ((shp,)) +# # self.ema_apply_op = self.ema.apply ([self.mean, self.variance]) +# # +# # if phase == Phase.train: +# # with tf.control_dependencies ([self.ema_apply_op]): +# # normalized_x = tf.nn.batch_norm_with_global_normalization ( +# # input_layer.tensor, self.mean, self.variance, self.beta, self.gamma, epsilon, +# # scale_after_normalization=True) +# # else: +# # normalized_x = tf.nn.batch_norm_with_global_normalization ( +# # x, self.ema.average (self.mean), self.ema.average (self.variance), self.beta, +# # self.gamma, epsilon, +# # scale_after_normalization=True) +# # +# # return input_layer.with_tensor (normalized_x, parameters=self.vars) +# +# # +# pt.Register(assign_defaults=('phase'))(conv_batch_norm) +# +# +# @pt.Register(assign_defaults=('phase')) +# class fc_batch_norm(conv_batch_norm): +# def __call__(self, input_layer, *args, **kwargs): +# ori_shape = input_layer.shape +# if ori_shape[0] is None: +# ori_shape[0] = -1 +# new_shape = [ori_shape[0], 1, 1, ori_shape[1]] +# x = tf.reshape(input_layer.tensor, new_shape) +# normalized_x = super(self.__class__, self).__call__(input_layer.with_tensor(x), *args, **kwargs) # input_layer) +# return normalized_x.reshape(ori_shape) +# +# +# def leaky_rectify(x, leakiness=0.01): +# assert leakiness <= 1 +# ret = tf.maximum(x, leakiness * x) +# # import ipdb; ipdb.set_trace() +# return ret +# +# +# @pt.Register +# class custom_conv2d(pt.VarStoreMethod): +# # class custom_conv2d(object): +# def __call__(self, input_layer, output_dim, k_h=5, k_w=5, d_h=2, d_w=2, stddev=0.02, in_dim=None, padding='SAME', +# name="conv2d"): +# with tf.variable_scope(name): +# w = self.variable('w', [k_h, k_w, in_dim or input_layer.shape[-1], output_dim], +# init=tf.truncated_normal_initializer(stddev=stddev)) +# # w = tf.get_variable('w', shape=[k_h, k_w, in_dim or input_layer.shape[-1], output_dim], dtype=tf.float32, +# # initializer=tf.truncated_normal_initializer(stddev=stddev)) +# conv = tf.nn.conv2d(input_layer.tensor, w, strides=[1, d_h, d_w, 1], padding=padding) +# +# # biases = self.variable('biases', [output_dim], init=tf.constant_initializer(0.0)) +# # import ipdb; ipdb.set_trace() +# # return input_layer.with_tensor(tf.nn.bias_add(conv, biases), parameters=self.vars) +# return input_layer.with_tensor(conv, parameters=self.vars) +# +# +# @pt.Register +# class custom_deconv2d(pt.VarStoreMethod): +# # class custom_deconv2d(object): +# def __call__(self, input_layer, output_shape, k_h=5, k_w=5, d_h=2, d_w=2, stddev=0.02, name="deconv2d"): +# output_shape[0] = input_layer.shape[0] +# ts_output_shape = tf.stack(output_shape) +# with tf.variable_scope(name): +# # filter : [height, width, output_channels, in_channels] +# w = self.variable('w', [k_h, k_w, output_shape[-1], input_layer.shape[-1]], +# init=tf.random_normal_initializer(stddev=stddev)) +# +# # w = tf.get_variable('w', shape=[k_h, k_w, output_shape[-1], input_layer.shape[-1]], dtype=tf.float32, +# # initializer=tf.random_normal_initializer(stddev=stddev)) +# +# try: +# deconv = tf.nn.conv2d_transpose(input_layer, w, output_shape=ts_output_shape, strides=[1, d_h, d_w, 1]) +# +# # Support for versions of TensorFlow before 0.7.0 +# except AttributeError: +# deconv = tf.nn.deconv2d(input_layer, w, output_shape=ts_output_shape, +# strides=[1, d_h, d_w, 1]) +# +# # biases = self.variable('biases', [output_shape[-1]], init=tf.constant_initializer(0.0)) +# # deconv = tf.reshape(tf.nn.bias_add(deconv, biases), [-1] + output_shape[1:]) +# deconv = tf.reshape(deconv, [-1] + output_shape[1:]) +# +# return deconv +# +# +# @pt.Register +# class custom_fully_connected(pt.VarStoreMethod): +# # class custom_fully_connected(object): +# def __call__(self, input_layer, output_size, scope=None, in_dim=None, stddev=0.02, bias_start=0.0): +# shape = input_layer.shape +# input_ = input_layer.tensor +# try: +# if len(shape) == 4: +# input_ = tf.reshape(input_, tf.stack([tf.shape(input_)[0], np.prod(shape[1:])])) +# input_.set_shape([None, np.prod(shape[1:])]) +# shape = input_.get_shape().as_list() +# +# with tf.variable_scope(scope or "Linear"): +# matrix = self.variable("Matrix", [in_dim or shape[1], output_size], dt=tf.float32, +# init=tf.random_normal_initializer(stddev=stddev)) +# bias = self.variable("bias", [output_size], init=tf.constant_initializer(bias_start)) +# return input_layer.with_tensor(tf.matmul(input_, matrix) + bias, parameters=self.vars) +# except Exception: +# import ipdb; ipdb.set_trace() diff --git a/misc/datasets.py b/misc/datasets.py index 624e6af..a72e865 100644 --- a/misc/datasets.py +++ b/misc/datasets.py @@ -8,9 +8,7 @@ class Dataset(object): - def __init__(self, images, imsize, embeddings=None, - filenames=None, workdir=None, - labels=None, aug_flag=True, + def __init__(self, images, imsize, embeddings=None, filenames=None, workdir=None, labels=None, aug_flag=True, class_id=None, class_range=None): self._images = images self._embeddings = embeddings @@ -59,8 +57,7 @@ def readCaptions(self, filenames, class_id): if name.find('jpg/') != -1: # flowers dataset class_name = 'class_%05d/' % class_id name = name.replace('jpg/', class_name) - cap_path = '%s/text_c10/%s.txt' %\ - (self.workdir, name) + cap_path = '%s/text_c10/%s.txt' % (self.workdir, name) with open(cap_path, "r") as f: captions = f.read().split('\n') captions = [cap for cap in captions if len(cap) > 0] @@ -68,14 +65,13 @@ def readCaptions(self, filenames, class_id): def transform(self, images): if self._aug_flag: - transformed_images =\ - np.zeros([images.shape[0], self._imsize, self._imsize, 3]) + transformed_images = np.zeros([images.shape[0], self._imsize, self._imsize, 3]) ori_size = images.shape[1] for i in range(images.shape[0]): h1 = np.floor((ori_size - self._imsize) * np.random.random()) w1 = np.floor((ori_size - self._imsize) * np.random.random()) - cropped_image =\ - images[i][w1: w1 + self._imsize, h1: h1 + self._imsize, :] + cropped_image = images[i][int(w1): int(w1 + self._imsize), int(h1): int(h1 + self._imsize), :] + # cropped_image = images[i][w1: w1 + self._imsize, h1: h1 + self._imsize, :] if random.random() > 0.5: transformed_images[i] = np.fliplr(cropped_image) else: @@ -93,12 +89,10 @@ def sample_embeddings(self, embeddings, filenames, class_id, sample_num): sampled_embeddings = [] sampled_captions = [] for i in range(batch_size): - randix = np.random.choice(embedding_num, - sample_num, replace=False) + randix = np.random.choice(embedding_num, sample_num, replace=False) if sample_num == 1: randix = int(randix) - captions = self.readCaptions(filenames[i], - class_id[i]) + captions = self.readCaptions(filenames[i], class_id[i]) sampled_captions.append(captions[randix]) sampled_embeddings.append(embeddings[i, randix, :]) else: @@ -128,11 +122,8 @@ def next_batch(self, batch_size, window): current_ids = self._perm[start:end] fake_ids = np.random.randint(self._num_examples, size=batch_size) - collision_flag =\ - (self._class_id[current_ids] == self._class_id[fake_ids]) - fake_ids[collision_flag] =\ - (fake_ids[collision_flag] + - np.random.randint(100, 200)) % self._num_examples + collision_flag = (self._class_id[current_ids] == self._class_id[fake_ids]) + fake_ids[collision_flag] = (fake_ids[collision_flag] + np.random.randint(100, 200)) % self._num_examples sampled_images = self._images[current_ids] sampled_wrong_images = self._images[fake_ids, :, :, :] @@ -148,9 +139,8 @@ def next_batch(self, batch_size, window): if self._embeddings is not None: filenames = [self._filenames[i] for i in current_ids] class_id = [self._class_id[i] for i in current_ids] - sampled_embeddings, sampled_captions = \ - self.sample_embeddings(self._embeddings[current_ids], - filenames, class_id, window) + sampled_embeddings, sampled_captions = self.sample_embeddings(self._embeddings[current_ids], filenames, + class_id, window) ret_list.append(sampled_embeddings) ret_list.append(sampled_captions) else: @@ -185,8 +175,7 @@ def next_batch_test(self, batch_size, start, max_captions): sampled_filenames = self._filenames[start:end] sampled_class_id = self._class_id[start:end] for i in range(len(sampled_filenames)): - captions = self.readCaptions(sampled_filenames[i], - sampled_class_id[i]) + captions = self.readCaptions(sampled_filenames[i], sampled_class_id[i]) # print(captions) sampled_captions.append(captions) @@ -194,8 +183,7 @@ def next_batch_test(self, batch_size, start, max_captions): batch = sampled_embeddings[:, i, :] sampled_embeddings_batchs.append(np.squeeze(batch)) - return [sampled_images, sampled_embeddings_batchs, - self._saveIDs[start:end], sampled_captions] + return [sampled_images, sampled_embeddings_batchs, self._saveIDs[start:end], sampled_captions] class TextDataset(object): @@ -207,8 +195,7 @@ def __init__(self, workdir, embedding_type, hr_lr_ratio): elif self.hr_lr_ratio == 4: self.image_filename = '/304images.pickle' - self.image_shape = [lr_imsize * self.hr_lr_ratio, - lr_imsize * self.hr_lr_ratio, 3] + self.image_shape = [lr_imsize * self.hr_lr_ratio, lr_imsize * self.hr_lr_ratio, 3] self.image_dim = self.image_shape[0] * self.image_shape[1] * 3 self.embedding_shape = None self.train = None @@ -226,7 +213,7 @@ def get_data(self, pickle_path, aug_flag=True): print('images: ', images.shape) with open(pickle_path + self.embedding_filename, 'rb') as f: - embeddings = pickle.load(f) + embeddings = pickle.load(f, encoding="latin-1") embeddings = np.array(embeddings) self.embedding_shape = [embeddings.shape[-1]] print('embeddings: ', embeddings.shape) @@ -234,8 +221,6 @@ def get_data(self, pickle_path, aug_flag=True): list_filenames = pickle.load(f) print('list_filenames: ', len(list_filenames), list_filenames[0]) with open(pickle_path + '/class_info.pickle', 'rb') as f: - class_id = pickle.load(f) + class_id = pickle.load(f, encoding="latin-1") - return Dataset(images, self.image_shape[0], embeddings, - list_filenames, self.workdir, None, - aug_flag, class_id) + return Dataset(images, self.image_shape[0], embeddings, list_filenames, self.workdir, None, aug_flag, class_id) diff --git a/misc/preprocess_birds.py b/misc/preprocess_birds.py index c93736a..717f703 100644 --- a/misc/preprocess_birds.py +++ b/misc/preprocess_birds.py @@ -7,10 +7,12 @@ import numpy as np import os import pickle -from misc.utils import get_image -import scipy.misc +from utils import get_image import pandas as pd +# > Python3 +from skimage.transform import resize + # from glob import glob # TODO: 1. current label is temporary, need to change according to real label @@ -43,7 +45,7 @@ def load_bbox(data_dir): # filename_bbox = {img_file[:-4]: [] for img_file in filenames} numImgs = len(filenames) - for i in xrange(0, numImgs): + for i in range(numImgs): # bbox = [x-left, y-top, width, height] bbox = df_bounding_boxes.iloc[i][1:].tolist() @@ -64,7 +66,8 @@ def save_data_list(inpath, outpath, filenames, filename_bbox): img = get_image(f_name, LOAD_SIZE, is_crop=True, bbox=bbox) img = img.astype('uint8') hr_images.append(img) - lr_img = scipy.misc.imresize(img, [lr_size, lr_size], 'bicubic') + lr_img = resize(img, [lr_size, lr_size], order=3) + #lr_img = scipy.misc.imresize(img, [lr_size, lr_size], 'bicubic') lr_images.append(lr_img) cnt += 1 if cnt % 100 == 0: diff --git a/misc/preprocess_flowers.py b/misc/preprocess_flowers.py index e6f9789..b273c4e 100644 --- a/misc/preprocess_flowers.py +++ b/misc/preprocess_flowers.py @@ -7,9 +7,12 @@ import numpy as np import os import pickle -from misc.utils import get_image +from utils import get_image import scipy.misc +# > Python3 +from skimage.transform import resize + # from glob import glob # TODO: 1. current label is temporary, need to change according to real label @@ -39,7 +42,7 @@ def save_data_list(inpath, outpath, filenames): img = get_image(f_name, LOAD_SIZE, is_crop=False) img = img.astype('uint8') hr_images.append(img) - lr_img = scipy.misc.imresize(img, [lr_size, lr_size], 'bicubic') + lr_img = resize (img, [lr_size, lr_size], order=3) lr_images.append(lr_img) cnt += 1 if cnt % 100 == 0: diff --git a/misc/tf_upgrade.py b/misc/tf_upgrade.py new file mode 100644 index 0000000..11c8378 --- /dev/null +++ b/misc/tf_upgrade.py @@ -0,0 +1,665 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Upgrader for Python scripts from pre-1.0 TensorFlow to 1.0 TensorFlow.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +import argparse +import ast +import collections +import os +import shutil +import sys +import tempfile +import traceback + + +class APIChangeSpec(object): + """List of maps that describe what changed in the API.""" + + def __init__(self): + # Maps from a function name to a dictionary that describes how to + # map from an old argument keyword to the new argument keyword. + self.function_keyword_renames = { + "tf.count_nonzero": { + "reduction_indices": "axis" + }, + "tf.reduce_all": { + "reduction_indices": "axis" + }, + "tf.reduce_any": { + "reduction_indices": "axis" + }, + "tf.reduce_max": { + "reduction_indices": "axis" + }, + "tf.reduce_mean": { + "reduction_indices": "axis" + }, + "tf.reduce_min": { + "reduction_indices": "axis" + }, + "tf.reduce_prod": { + "reduction_indices": "axis" + }, + "tf.reduce_sum": { + "reduction_indices": "axis" + }, + "tf.reduce_logsumexp": { + "reduction_indices": "axis" + }, + "tf.expand_dims": { + "dim": "axis" + }, + "tf.argmax": { + "dimension": "axis" + }, + "tf.argmin": { + "dimension": "axis" + }, + "tf.reduce_join": { + "reduction_indices": "axis" + }, + "tf.sparse_concat": { + "concat_dim": "axis" + }, + "tf.sparse_split": { + "split_dim": "axis" + }, + "tf.sparse_reduce_sum": { + "reduction_axes": "axis" + }, + "tf.reverse_sequence": { + "seq_dim": "seq_axis", + "batch_dim": "batch_axis" + }, + "tf.sparse_reduce_sum_sparse": { + "reduction_axes": "axis" + }, + "tf.squeeze": { + "squeeze_dims": "axis" + }, + "tf.split": { + "split_dim": "axis", + "num_split": "num_or_size_splits" + }, + "tf.concat": { + "concat_dim": "axis" + }, + } + + # Mapping from function to the new name of the function + self.function_renames = { + "tf.inv": "tf.reciprocal", + "tf.contrib.deprecated.scalar_summary": "tf.summary.scalar", + "tf.contrib.deprecated.histogram_summary": "tf.summary.histogram", + "tf.listdiff": "tf.setdiff1d", + "tf.list_diff": "tf.setdiff1d", + "tf.mul": "tf.multiply", + "tf.neg": "tf.negative", + "tf.sub": "tf.subtract", + "tf.train.SummaryWriter": "tf.summary.FileWriter", + "tf.scalar_summary": "tf.summary.scalar", + "tf.histogram_summary": "tf.summary.histogram", + "tf.audio_summary": "tf.summary.audio", + "tf.image_summary": "tf.summary.image", + "tf.merge_summary": "tf.summary.merge", + "tf.merge_all_summaries": "tf.summary.merge_all", + "tf.image.per_image_whitening": "tf.image.per_image_standardization", + "tf.all_variables": "tf.global_variables", + "tf.VARIABLES": "tf.GLOBAL_VARIABLES", + "tf.initialize_all_variables": "tf.global_variables_initializer", + "tf.initialize_variables": "tf.variables_initializer", + "tf.initialize_local_variables": "tf.local_variables_initializer", + "tf.batch_matrix_diag": "tf.matrix_diag", + "tf.batch_band_part": "tf.band_part", + "tf.batch_set_diag": "tf.set_diag", + "tf.batch_matrix_transpose": "tf.matrix_transpose", + "tf.batch_matrix_determinant": "tf.matrix_determinant", + "tf.batch_matrix_inverse": "tf.matrix_inverse", + "tf.batch_cholesky": "tf.cholesky", + "tf.batch_cholesky_solve": "tf.cholesky_solve", + "tf.batch_matrix_solve": "tf.matrix_solve", + "tf.batch_matrix_triangular_solve": "tf.matrix_triangular_solve", + "tf.batch_matrix_solve_ls": "tf.matrix_solve_ls", + "tf.batch_self_adjoint_eig": "tf.self_adjoint_eig", + "tf.batch_self_adjoint_eigvals": "tf.self_adjoint_eigvals", + "tf.batch_svd": "tf.svd", + "tf.batch_fft": "tf.fft", + "tf.batch_ifft": "tf.ifft", + "tf.batch_ifft2d": "tf.ifft2d", + "tf.batch_fft3d": "tf.fft3d", + "tf.batch_ifft3d": "tf.ifft3d", + "tf.select": "tf.where", + "tf.complex_abs": "tf.abs", + "tf.batch_matmul": "tf.matmul", + "tf.pack": "tf.stack", + "tf.unpack": "tf.unstack", + } + + self.change_to_function = { + "tf.ones_initializer", + "tf.zeros_initializer", + } + + # Functions that were reordered should be changed to the new keyword args + # for safety, if positional arguments are used. If you have reversed the + # positional arguments yourself, this could do the wrong thing. + self.function_reorders = { + "tf.split": ["axis", "num_or_size_splits", "value", "name"], + "tf.sparse_split": ["axis", "num_or_size_splits", "value", "name"], + "tf.concat": ["concat_dim", "values", "name"], + "tf.svd": ["tensor", "compute_uv", "full_matrices", "name"], + "tf.nn.softmax_cross_entropy_with_logits": [ + "logits", "labels", "dim", "name"], + "tf.nn.sparse_softmax_cross_entropy_with_logits": [ + "logits", "labels", "name"], + "tf.nn.sigmoid_cross_entropy_with_logits": [ + "logits", "labels", "name"] + } + + # Specially handled functions. + self.function_handle = {"tf.reverse": self._reverse_handler} + + @staticmethod + def _reverse_handler(file_edit_recorder, node): + # TODO(aselle): Could check for a literal list of bools and try to convert + # them to indices. + comment = ("ERROR: tf.reverse has had its argument semantics changed\n" + "significantly the converter cannot detect this reliably, so you" + "need to inspect this usage manually.\n") + file_edit_recorder.add(comment, + node.lineno, + node.col_offset, + "tf.reverse", + "tf.reverse", + error="tf.reverse requires manual check.") + + +class FileEditTuple(collections.namedtuple( + "FileEditTuple", ["comment", "line", "start", "old", "new"])): + """Each edit that is recorded by a FileEditRecorder. + Fields: + comment: A description of the edit and why it was made. + line: The line number in the file where the edit occurs (1-indexed). + start: The line number in the file where the edit occurs (0-indexed). + old: text string to remove (this must match what was in file). + new: text string to add in place of `old`. + """ + + __slots__ = () + + +class FileEditRecorder(object): + """Record changes that need to be done to the file.""" + + def __init__(self, filename): + # all edits are lists of chars + self._filename = filename + + self._line_to_edit = collections.defaultdict(list) + self._errors = [] + + def process(self, text): + """Process a list of strings, each corresponding to the recorded changes. + Args: + text: A list of lines of text (assumed to contain newlines) + Returns: + A tuple of the modified text and a textual description of what is done. + Raises: + ValueError: if substitution source location does not have expected text. + """ + + change_report = "" + + # Iterate of each line + for line, edits in self._line_to_edit.items(): + offset = 0 + # sort by column so that edits are processed in order in order to make + # indexing adjustments cumulative for changes that change the string + # length + edits.sort(key=lambda x: x.start) + + # Extract each line to a list of characters, because mutable lists + # are editable, unlike immutable strings. + char_array = list(text[line - 1]) + + # Record a description of the change + change_report += "%r Line %d\n" % (self._filename, line) + change_report += "-" * 80 + "\n\n" + for e in edits: + change_report += "%s\n" % e.comment + change_report += "\n Old: %s" % (text[line - 1]) + + # Make underscore buffers for underlining where in the line the edit was + change_list = [" "] * len(text[line - 1]) + change_list_new = [" "] * len(text[line - 1]) + + # Iterate for each edit + for e in edits: + # Create effective start, end by accounting for change in length due + # to previous edits + start_eff = e.start + offset + end_eff = start_eff + len(e.old) + + # Make sure the edit is changing what it should be changing + old_actual = "".join(char_array[start_eff:end_eff]) + if old_actual != e.old: + raise ValueError("Expected text %r but got %r" % + ("".join(e.old), "".join(old_actual))) + # Make the edit + char_array[start_eff:end_eff] = list(e.new) + + # Create the underline highlighting of the before and after + change_list[e.start:e.start + len(e.old)] = "~" * len(e.old) + change_list_new[start_eff:end_eff] = "~" * len(e.new) + + # Keep track of how to generate effective ranges + offset += len(e.new) - len(e.old) + + # Finish the report comment + change_report += " %s\n" % "".join(change_list) + text[line - 1] = "".join(char_array) + change_report += " New: %s" % (text[line - 1]) + change_report += " %s\n\n" % "".join(change_list_new) + return "".join(text), change_report, self._errors + + def add(self, comment, line, start, old, new, error=None): + """Add a new change that is needed. + Args: + comment: A description of what was changed + line: Line number (1 indexed) + start: Column offset (0 indexed) + old: old text + new: new text + error: this "edit" is something that cannot be fixed automatically + Returns: + None + """ + + self._line_to_edit[line].append( + FileEditTuple(comment, line, start, old, new)) + if error: + self._errors.append("%s:%d: %s" % (self._filename, line, error)) + + +class TensorFlowCallVisitor(ast.NodeVisitor): + """AST Visitor that finds TensorFlow Function calls. + Updates function calls from old API version to new API version. + """ + + def __init__(self, filename, lines): + self._filename = filename + self._file_edit = FileEditRecorder(filename) + self._lines = lines + self._api_change_spec = APIChangeSpec() + + def process(self, lines): + return self._file_edit.process(lines) + + def generic_visit(self, node): + ast.NodeVisitor.generic_visit(self, node) + + def _rename_functions(self, node, full_name): + function_renames = self._api_change_spec.function_renames + try: + new_name = function_renames[full_name] + self._file_edit.add("Renamed function %r to %r" % (full_name, + new_name), + node.lineno, node.col_offset, full_name, new_name) + except KeyError: + pass + + def _get_attribute_full_path(self, node): + """Traverse an attribute to generate a full name e.g. tf.foo.bar. + Args: + node: A Node of type Attribute. + Returns: + a '.'-delimited full-name or None if the tree was not a simple form. + i.e. `foo()+b).bar` returns None, while `a.b.c` would return "a.b.c". + """ + curr = node + items = [] + while not isinstance(curr, ast.Name): + if not isinstance(curr, ast.Attribute): + return None + items.append(curr.attr) + curr = curr.value + items.append(curr.id) + return ".".join(reversed(items)) + + def _find_true_position(self, node): + """Return correct line number and column offset for a given node. + This is necessary mainly because ListComp's location reporting reports + the next token after the list comprehension list opening. + Args: + node: Node for which we wish to know the lineno and col_offset + """ + import re + find_open = re.compile("^\s*(\\[).*$") + find_string_chars = re.compile("['\"]") + + if isinstance(node, ast.ListComp): + # Strangely, ast.ListComp returns the col_offset of the first token + # after the '[' token which appears to be a bug. Workaround by + # explicitly finding the real start of the list comprehension. + line = node.lineno + col = node.col_offset + # loop over lines + while 1: + # Reverse the text to and regular expression search for whitespace + text = self._lines[line-1] + reversed_preceding_text = text[:col][::-1] + # First find if a [ can be found with only whitespace between it and + # col. + m = find_open.match(reversed_preceding_text) + if m: + new_col_offset = col - m.start(1) - 1 + return line, new_col_offset + else: + if (reversed_preceding_text=="" or + reversed_preceding_text.isspace()): + line = line - 1 + prev_line = self._lines[line - 1] + # TODO(aselle): + # this is poor comment detection, but it is good enough for + # cases where the comment does not contain string literal starting/ + # ending characters. If ast gave us start and end locations of the + # ast nodes rather than just start, we could use string literal + # node ranges to filter out spurious #'s that appear in string + # literals. + comment_start = prev_line.find("#") + if comment_start == -1: + col = len(prev_line) -1 + elif find_string_chars.search(prev_line[comment_start:]) is None: + col = comment_start + else: + return None, None + else: + return None, None + # Most other nodes return proper locations (with notably does not), but + # it is not possible to use that in an argument. + return node.lineno, node.col_offset + + + def visit_Call(self, node): # pylint: disable=invalid-name + """Handle visiting a call node in the AST. + Args: + node: Current Node + """ + + + # Find a simple attribute name path e.g. "tf.foo.bar" + full_name = self._get_attribute_full_path(node.func) + + # Make sure the func is marked as being part of a call + node.func.is_function_for_call = True + + if full_name and full_name.startswith("tf."): + # Call special handlers + function_handles = self._api_change_spec.function_handle + if full_name in function_handles: + function_handles[full_name](self._file_edit, node) + + # Examine any non-keyword argument and make it into a keyword argument + # if reordering required. + function_reorders = self._api_change_spec.function_reorders + function_keyword_renames = ( + self._api_change_spec.function_keyword_renames) + + if full_name in function_reorders: + reordered = function_reorders[full_name] + for idx, arg in enumerate(node.args): + lineno, col_offset = self._find_true_position(arg) + if lineno is None or col_offset is None: + self._file_edit.add( + "Failed to add keyword %r to reordered function %r" + % (reordered[idx], full_name), arg.lineno, arg.col_offset, + "", "", + error="A necessary keyword argument failed to be inserted.") + else: + keyword_arg = reordered[idx] + if (full_name in function_keyword_renames and + keyword_arg in function_keyword_renames[full_name]): + keyword_arg = function_keyword_renames[full_name][keyword_arg] + self._file_edit.add("Added keyword %r to reordered function %r" + % (reordered[idx], full_name), lineno, + col_offset, "", keyword_arg + "=") + + # Examine each keyword argument and convert it to the final renamed form + renamed_keywords = ({} if full_name not in function_keyword_renames else + function_keyword_renames[full_name]) + for keyword in node.keywords: + argkey = keyword.arg + argval = keyword.value + + if argkey in renamed_keywords: + argval_lineno, argval_col_offset = self._find_true_position(argval) + if (argval_lineno is not None and argval_col_offset is not None): + # TODO(aselle): We should scan backward to find the start of the + # keyword key. Unfortunately ast does not give you the location of + # keyword keys, so we are forced to infer it from the keyword arg + # value. + key_start = argval_col_offset - len(argkey) - 1 + key_end = key_start + len(argkey) + 1 + if self._lines[argval_lineno - 1][key_start:key_end] == argkey + "=": + self._file_edit.add("Renamed keyword argument from %r to %r" % + (argkey, renamed_keywords[argkey]), + argval_lineno, + argval_col_offset - len(argkey) - 1, + argkey + "=", renamed_keywords[argkey] + "=") + continue + self._file_edit.add( + "Failed to rename keyword argument from %r to %r" % + (argkey, renamed_keywords[argkey]), + argval.lineno, + argval.col_offset - len(argkey) - 1, + "", "", + error="Failed to find keyword lexographically. Fix manually.") + + ast.NodeVisitor.generic_visit(self, node) + + def visit_Attribute(self, node): # pylint: disable=invalid-name + """Handle bare Attributes i.e. [tf.foo, tf.bar]. + Args: + node: Node that is of type ast.Attribute + """ + full_name = self._get_attribute_full_path(node) + if full_name and full_name.startswith("tf."): + self._rename_functions(node, full_name) + if full_name in self._api_change_spec.change_to_function: + if not hasattr(node, "is_function_for_call"): + new_text = full_name + "()" + self._file_edit.add("Changed %r to %r"%(full_name, new_text), + node.lineno, node.col_offset, full_name, new_text) + + ast.NodeVisitor.generic_visit(self, node) + + +class TensorFlowCodeUpgrader(object): + """Class that handles upgrading a set of Python files to TensorFlow 1.0.""" + + def __init__(self): + pass + + def process_file(self, in_filename, out_filename): + """Process the given python file for incompatible changes. + Args: + in_filename: filename to parse + out_filename: output file to write to + Returns: + A tuple representing number of files processed, log of actions, errors + """ + + # Write to a temporary file, just in case we are doing an implace modify. + with open(in_filename, "r") as in_file, \ + tempfile.NamedTemporaryFile("w", delete=False) as temp_file: + ret = self.process_opened_file( + in_filename, in_file, out_filename, temp_file) + + shutil.move(temp_file.name, out_filename) + return ret + + # Broad exceptions are required here because ast throws whatever it wants. + # pylint: disable=broad-except + def process_opened_file(self, in_filename, in_file, out_filename, out_file): + """Process the given python file for incompatible changes. + This function is split out to facilitate StringIO testing from + tf_upgrade_test.py. + Args: + in_filename: filename to parse + in_file: opened file (or StringIO) + out_filename: output file to write to + out_file: opened file (or StringIO) + Returns: + A tuple representing number of files processed, log of actions, errors + """ + process_errors = [] + text = "-" * 80 + "\n" + text += "Processing file %r\n outputting to %r\n" % (in_filename, + out_filename) + text += "-" * 80 + "\n\n" + + parsed_ast = None + lines = in_file.readlines() + try: + parsed_ast = ast.parse("".join(lines)) + except Exception: + text += "Failed to parse %r\n\n" % in_filename + text += traceback.format_exc() + if parsed_ast: + visitor = TensorFlowCallVisitor(in_filename, lines) + visitor.visit(parsed_ast) + out_text, new_text, process_errors = visitor.process(lines) + text += new_text + if out_file: + out_file.write(out_text) + text += "\n" + return 1, text, process_errors + # pylint: enable=broad-except + + def process_tree(self, root_directory, output_root_directory): + """Processes upgrades on an entire tree of python files in place. + Note that only Python files. If you have custom code in other languages, + you will need to manually upgrade those. + Args: + root_directory: Directory to walk and process. + output_root_directory: Directory to use as base + Returns: + A tuple of files processed, the report string ofr all files, and errors + """ + + # make sure output directory doesn't exist + if output_root_directory and os.path.exists(output_root_directory): + print("Output directory %r must not already exist." % ( + output_root_directory)) + sys.exit(1) + + # make sure output directory does not overlap with root_directory + norm_root = os.path.split(os.path.normpath(root_directory)) + norm_output = os.path.split(os.path.normpath(output_root_directory)) + if norm_root == norm_output: + print("Output directory %r same as input directory %r" % ( + root_directory, output_root_directory)) + sys.exit(1) + + # Collect list of files to process (we do this to correctly handle if the + # user puts the output directory in some sub directory of the input dir) + files_to_process = [] + for dir_name, _, file_list in os.walk(root_directory): + py_files = [f for f in file_list if f.endswith(".py")] + for filename in py_files: + fullpath = os.path.join(dir_name, filename) + fullpath_output = os.path.join( + output_root_directory, os.path.relpath(fullpath, root_directory)) + files_to_process.append((fullpath, fullpath_output)) + + file_count = 0 + tree_errors = [] + report = "" + report += ("=" * 80) + "\n" + report += "Input tree: %r\n" % root_directory + report += ("=" * 80) + "\n" + + for input_path, output_path in files_to_process: + output_directory = os.path.dirname(output_path) + if not os.path.isdir(output_directory): + os.makedirs(output_directory) + file_count += 1 + _, l_report, l_errors = self.process_file(input_path, output_path) + tree_errors += l_errors + report += l_report + return file_count, report, tree_errors + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + formatter_class=argparse.RawDescriptionHelpFormatter, + description="""Convert a TensorFlow Python file to 1.0 +Simple usage: + tf_convert.py --infile foo.py --outfile bar.py + tf_convert.py --intree ~/code/old --outtree ~/code/new +""") + parser.add_argument( + "--infile", + dest="input_file", + help="If converting a single file, the name of the file " + "to convert") + parser.add_argument( + "--outfile", + dest="output_file", + help="If converting a single file, the output filename.") + parser.add_argument( + "--intree", + dest="input_tree", + help="If converting a whole tree of files, the directory " + "to read from (relative or absolute).") + parser.add_argument( + "--outtree", + dest="output_tree", + help="If converting a whole tree of files, the output " + "directory (relative or absolute).") + parser.add_argument( + "--reportfile", + dest="report_filename", + help=("The name of the file where the report log is " + "stored." + "(default: %(default)s)"), + default="report.txt") + args = parser.parse_args() + + upgrade = TensorFlowCodeUpgrader() + report_text = None + report_filename = args.report_filename + files_processed = 0 + if args.input_file: + files_processed, report_text, errors = upgrade.process_file( + args.input_file, args.output_file) + files_processed = 1 + elif args.input_tree: + files_processed, report_text, errors = upgrade.process_tree( + args.input_tree, args.output_tree) + else: + parser.print_help() + if report_text: + open(report_filename, "w").write(report_text) + print("TensorFlow 1.0 Upgrade Script") + print("-----------------------------") + print("Converted %d files\n" % files_processed) + print("Detected %d errors that require attention" % len(errors)) + print("-" * 80) + print("\n".join(errors)) +print("\nMake sure to read the detailed log %r\n" % report_filename) diff --git a/misc/utils.py b/misc/utils.py index 961954d..ef437a5 100644 --- a/misc/utils.py +++ b/misc/utils.py @@ -6,10 +6,13 @@ from __future__ import print_function import numpy as np -import scipy.misc import os import errno +# > Python3 +from skimage.transform import resize +import imageio + def get_image(image_path, image_size, is_crop=False, bbox=None): global index @@ -43,14 +46,17 @@ def transform(image, image_size, is_crop, bbox): image = colorize(image) if is_crop: image = custom_crop(image, bbox) - # - transformed_image =\ - scipy.misc.imresize(image, [image_size, image_size], 'bicubic') - return np.array(transformed_image) + transformed_image = resize(image, [image_size, image_size], order=3) + # order = 3 means bicubic + #https: // scikit - image.org / docs / dev / api / skimage.transform.html # skimage.transform.resize + + #scipy.misc.imresize(image, [image_size, image_size], 'bicubic') + return transformed_image + #return np.array(transformed_image) def imread(path): - img = scipy.misc.imread(path) + img = imageio.imread(path) if len(img.shape) == 0: raise ValueError(path + " got loaded as a dimensionless array!") return img.astype(np.float) @@ -65,6 +71,11 @@ def colorize(img): return img +def convert_to_uint8(img): + img = (img + 1.) * (255 / 2.) + img = img.astype(np.uint8) + return img + def mkdir_p(path): try: os.makedirs(path) diff --git a/stageI/model.py b/stageI/model.py index 15d8d9c..6fdcafc 100644 --- a/stageI/model.py +++ b/stageI/model.py @@ -1,11 +1,14 @@ from __future__ import division from __future__ import print_function -import prettytensor as pt +# import prettytensor as pt import tensorflow as tf -import misc.custom_ops -from misc.custom_ops import leaky_rectify -from misc.config import cfg +import sys + +sys.path.append('./misc') + +from custom_ops import fc, conv_batch_normalization, fc_batch_normalization, reshape, Conv2d, Deconv2d, UpSample, add +from config import cfg class CondGAN(object): @@ -17,208 +20,171 @@ def __init__(self, image_shape): self.df_dim = cfg.GAN.DF_DIM self.ef_dim = cfg.GAN.EMBEDDING_DIM - self.image_shape = image_shape self.s = image_shape[0] - self.s2, self.s4, self.s8, self.s16 =\ - int(self.s / 2), int(self.s / 4), int(self.s / 8), int(self.s / 16) - - # Since D is only used during training, we build a template - # for safe reuse the variables during computing loss for fake/real/wrong images - # We do not do this for G, - # because batch_norm needs different options for training and testing - if cfg.GAN.NETWORK_TYPE == "default": - with tf.variable_scope("d_net"): - self.d_encode_img_template = self.d_encode_image() - self.d_context_template = self.context_embedding() - self.discriminator_template = self.discriminator() - elif cfg.GAN.NETWORK_TYPE == "simple": - with tf.variable_scope("d_net"): - self.d_encode_img_template = self.d_encode_image_simple() - self.d_context_template = self.context_embedding() - self.discriminator_template = self.discriminator() - else: - raise NotImplementedError + self.s2, self.s4, self.s8, self.s16 = int(self.s / 2), int(self.s / 4), int(self.s / 8), int(self.s / 16) # g-net def generate_condition(self, c_var): - conditions =\ - (pt.wrap(c_var). - flatten(). - custom_fully_connected(self.ef_dim * 2). - apply(leaky_rectify, leakiness=0.2)) + conditions = fc(c_var, self.ef_dim * 2, 'gen_cond/fc', activation_fn=tf.nn.leaky_relu) mean = conditions[:, :self.ef_dim] log_sigma = conditions[:, self.ef_dim:] return [mean, log_sigma] - def generator(self, z_var): - node1_0 =\ - (pt.wrap(z_var). - flatten(). - custom_fully_connected(self.s16 * self.s16 * self.gf_dim * 8). - fc_batch_norm(). - reshape([-1, self.s16, self.s16, self.gf_dim * 8])) - node1_1 = \ - (node1_0. - custom_conv2d(self.gf_dim * 2, k_h=1, k_w=1, d_h=1, d_w=1). - conv_batch_norm(). - apply(tf.nn.relu). - custom_conv2d(self.gf_dim * 2, k_h=3, k_w=3, d_h=1, d_w=1). - conv_batch_norm(). - apply(tf.nn.relu). - custom_conv2d(self.gf_dim * 8, k_h=3, k_w=3, d_h=1, d_w=1). - conv_batch_norm()) - node1 = \ - (node1_0. - apply(tf.add, node1_1). - apply(tf.nn.relu)) - - node2_0 = \ - (node1. - # custom_deconv2d([0, self.s8, self.s8, self.gf_dim * 4], k_h=4, k_w=4). - apply(tf.image.resize_nearest_neighbor, [self.s8, self.s8]). - custom_conv2d(self.gf_dim * 4, k_h=3, k_w=3, d_h=1, d_w=1). - conv_batch_norm()) - node2_1 = \ - (node2_0. - custom_conv2d(self.gf_dim * 1, k_h=1, k_w=1, d_h=1, d_w=1). - conv_batch_norm(). - apply(tf.nn.relu). - custom_conv2d(self.gf_dim * 1, k_h=3, k_w=3, d_h=1, d_w=1). - conv_batch_norm(). - apply(tf.nn.relu). - custom_conv2d(self.gf_dim * 4, k_h=3, k_w=3, d_h=1, d_w=1). - conv_batch_norm()) - node2 = \ - (node2_0. - apply(tf.add, node2_1). - apply(tf.nn.relu)) - - output_tensor = \ - (node2. - # custom_deconv2d([0, self.s4, self.s4, self.gf_dim * 2], k_h=4, k_w=4). - apply(tf.image.resize_nearest_neighbor, [self.s4, self.s4]). - custom_conv2d(self.gf_dim * 2, k_h=3, k_w=3, d_h=1, d_w=1). - conv_batch_norm(). - apply(tf.nn.relu). - # custom_deconv2d([0, self.s2, self.s2, self.gf_dim], k_h=4, k_w=4). - apply(tf.image.resize_nearest_neighbor, [self.s2, self.s2]). - custom_conv2d(self.gf_dim, k_h=3, k_w=3, d_h=1, d_w=1). - conv_batch_norm(). - apply(tf.nn.relu). - # custom_deconv2d([0] + list(self.image_shape), k_h=4, k_w=4). - apply(tf.image.resize_nearest_neighbor, [self.s, self.s]). - custom_conv2d(3, k_h=3, k_w=3, d_h=1, d_w=1). - apply(tf.nn.tanh)) + def generator(self, z_var, training=True): + node1_0 = fc(z_var, self.s16 * self.s16 * self.gf_dim * 8, 'g_n1.0/fc') + node1_0 = fc_batch_normalization(node1_0, 'g_n1.0/batch_norm') + node1_0 = reshape(node1_0, [-1, self.s16, self.s16, self.gf_dim * 8], name='g_n1.0/reshape') + + node1_1 = Conv2d(node1_0, 1, 1, self.gf_dim * 2, 1, 1, name='g_n1.1/conv2d') + node1_1 = conv_batch_normalization(node1_1, 'g_n1.1/batch_norm_1', activation_fn=tf.nn.relu, + is_training=training) + node1_1 = Conv2d(node1_1, 3, 3, self.gf_dim * 2, 1, 1, name='g_n1.1/conv2d2') + node1_1 = conv_batch_normalization(node1_1, 'g_n1.1/batch_norm_2', activation_fn=tf.nn.relu, + is_training=training) + node1_1 = Conv2d(node1_1, 3, 3, self.gf_dim * 8, 1, 1, name='g_n1.1/conv2d3') + node1_1 = conv_batch_normalization(node1_1, 'g_n1.1/batch_norm_3', activation_fn=tf.nn.relu, + is_training=training) + + node1 = add([node1_0, node1_1], name='g_n1_res/add') + node1_output = tf.nn.relu(node1) + + node2_0 = UpSample(node1_output, size=[self.s8, self.s8], method=1, align_corners=False, name='g_n2.0/upsample') + node2_0 = Conv2d(node2_0, 3, 3, self.gf_dim * 4, 1, 1, name='g_n2.0/conv2d') + node2_0 = conv_batch_normalization(node2_0, 'g_n2.0/batch_norm', is_training=training) + + node2_1 = Conv2d(node2_0, 1, 1, self.gf_dim * 1, 1, 1, name='g_n2.1/conv2d') + node2_1 = conv_batch_normalization(node2_1, 'g_n2.1/batch_norm', activation_fn=tf.nn.relu, is_training=training) + node2_1 = Conv2d(node2_1, 3, 3, self.gf_dim * 1, 1, 1, name='g_n2.1/conv2d2') + node2_1 = conv_batch_normalization(node2_1, 'g_n2.1/batch_norm2', activation_fn=tf.nn.relu, + is_training=training) + node2_1 = Conv2d(node2_1, 3, 3, self.gf_dim * 4, 1, 1, name='g_n2.1/conv2d3') + node2_1 = conv_batch_normalization(node2_1, 'g_n2.1/batch_norm3', is_training=training) + + node2 = add([node2_0, node2_1], name='g_n2_res/add') + node2_output = tf.nn.relu(node2) + + output_tensor = UpSample(node2_output, size=[self.s4, self.s4], method=1, align_corners=False, + name='g_OT/upsample') + output_tensor = Conv2d(output_tensor, 3, 3, self.gf_dim * 2, 1, 1, name='g_OT/conv2d') + output_tensor = conv_batch_normalization(output_tensor, 'g_OT/batch_norm', activation_fn=tf.nn.relu, + is_training=training) + output_tensor = UpSample(output_tensor, size=[self.s2, self.s2], method=1, align_corners=False, + name='g_OT/upsample2') + output_tensor = Conv2d(output_tensor, 3, 3, self.gf_dim, 1, 1, name='g_OT/conv2d2') + output_tensor = conv_batch_normalization(output_tensor, 'g_OT/batch_norm2', activation_fn=tf.nn.relu, + is_training=training) + output_tensor = UpSample(output_tensor, size=[self.s, self.s], method=1, align_corners=False, + name='g_OT/upsample3') + output_tensor = Conv2d(output_tensor, 3, 3, 3, 1, 1, activation_fn=tf.nn.tanh, name='g_OT/conv2d3') return output_tensor - def generator_simple(self, z_var): - output_tensor =\ - (pt.wrap(z_var). - flatten(). - custom_fully_connected(self.s16 * self.s16 * self.gf_dim * 8). - reshape([-1, self.s16, self.s16, self.gf_dim * 8]). - conv_batch_norm(). - apply(tf.nn.relu). - custom_deconv2d([0, self.s8, self.s8, self.gf_dim * 4], k_h=4, k_w=4). - # apply(tf.image.resize_nearest_neighbor, [self.s8, self.s8]). - # custom_conv2d(self.gf_dim * 4, k_h=3, k_w=3, d_h=1, d_w=1). - conv_batch_norm(). - apply(tf.nn.relu). - custom_deconv2d([0, self.s4, self.s4, self.gf_dim * 2], k_h=4, k_w=4). - # apply(tf.image.resize_nearest_neighbor, [self.s4, self.s4]). - # custom_conv2d(self.gf_dim * 2, k_h=3, k_w=3, d_h=1, d_w=1). - conv_batch_norm(). - apply(tf.nn.relu). - custom_deconv2d([0, self.s2, self.s2, self.gf_dim], k_h=4, k_w=4). - # apply(tf.image.resize_nearest_neighbor, [self.s2, self.s2]). - # custom_conv2d(self.gf_dim, k_h=3, k_w=3, d_h=1, d_w=1). - conv_batch_norm(). - apply(tf.nn.relu). - custom_deconv2d([0] + list(self.image_shape), k_h=4, k_w=4). - # apply(tf.image.resize_nearest_neighbor, [self.s, self.s]). - # custom_conv2d(3, k_h=3, k_w=3, d_h=1, d_w=1). - apply(tf.nn.tanh)) + def generator_simple(self, z_var, training=True): + output_tensor = fc(z_var, self.s16 * self.s16 * self.gf_dim * 8, 'g_simple_OT/fc') + output_tensor = reshape(output_tensor, [-1, self.s16, self.s16, self.gf_dim * 8], name='g_simple_OT/reshape') + output_tensor = conv_batch_normalization(output_tensor, 'g_simple_OT/batch_norm', activation_fn=tf.nn.relu, + is_training=training) + output_tensor = Deconv2d(output_tensor, [0, self.s8, self.s8, self.gf_dim * 4], name='g_simple_OT/deconv2d', + k_h=4, k_w=4) + output_tensor = conv_batch_normalization(output_tensor, 'g_simple_OT/batch_norm2', activation_fn=tf.nn.relu, + is_training=training) + output_tensor = Deconv2d(output_tensor, [0, self.s4, self.s4, self.gf_dim * 2], name='g_simple_OT/deconv2d2', + k_h=4, k_w=4) + output_tensor = conv_batch_normalization(output_tensor, 'g_simple_OT/batch_norm3', activation_fn=tf.nn.relu, + is_training=training) + output_tensor = Deconv2d(output_tensor, [0, self.s2, self.s2, self.gf_dim], name='g_simple_OT/deconv2d3', + k_h=4, k_w=4) + output_tensor = conv_batch_normalization(output_tensor, 'g_simple_OT/batch_norm4', activation_fn=tf.nn.relu, + is_training=training) + output_tensor = Deconv2d(output_tensor, [0] + list(self.image_shape), name='g_simple_OT/deconv2d4', + k_h=4, k_w=4, activation_fn=tf.nn.tanh) + return output_tensor - def get_generator(self, z_var): + + def get_generator(self, z_var, is_training): if cfg.GAN.NETWORK_TYPE == "default": - return self.generator(z_var) + return self.generator(z_var, training=is_training) elif cfg.GAN.NETWORK_TYPE == "simple": - return self.generator_simple(z_var) + return self.generator_simple(z_var, training=is_training) else: raise NotImplementedError # d-net - def context_embedding(self): - template = (pt.template("input"). - custom_fully_connected(self.ef_dim). - apply(leaky_rectify, leakiness=0.2)) + def context_embedding(self, inputs=None, if_reuse=None): + template = fc(inputs, self.ef_dim, 'd_embedd/fc', activation_fn=tf.nn.leaky_relu, reuse=if_reuse) return template - def d_encode_image(self): - node1_0 = \ - (pt.template("input"). - custom_conv2d(self.df_dim, k_h=4, k_w=4). - apply(leaky_rectify, leakiness=0.2). - custom_conv2d(self.df_dim * 2, k_h=4, k_w=4). - conv_batch_norm(). - apply(leaky_rectify, leakiness=0.2). - custom_conv2d(self.df_dim * 4, k_h=4, k_w=4). - conv_batch_norm(). - custom_conv2d(self.df_dim * 8, k_h=4, k_w=4). - conv_batch_norm()) - node1_1 = \ - (node1_0. - custom_conv2d(self.df_dim * 2, k_h=1, k_w=1, d_h=1, d_w=1). - conv_batch_norm(). - apply(leaky_rectify, leakiness=0.2). - custom_conv2d(self.df_dim * 2, k_h=3, k_w=3, d_h=1, d_w=1). - conv_batch_norm(). - apply(leaky_rectify, leakiness=0.2). - custom_conv2d(self.df_dim * 8, k_h=3, k_w=3, d_h=1, d_w=1). - conv_batch_norm()) - - node1 = \ - (node1_0. - apply(tf.add, node1_1). - apply(leaky_rectify, leakiness=0.2)) + def d_encode_image(self, training=True, inputs=None, if_reuse=None): + node1_0 = Conv2d(inputs, 4, 4, self.df_dim, 2, 2, name='d_n1.0/conv2d', activation_fn=tf.nn.leaky_relu, + reuse=if_reuse) + node1_0 = Conv2d(node1_0, 4, 4, self.df_dim * 2, 2, 2, name='d_n1.0/conv2d2', reuse=if_reuse) + node1_0 = conv_batch_normalization(node1_0, 'd_n1.0/batch_norm', is_training=training, + activation_fn=tf.nn.leaky_relu, reuse=if_reuse) + node1_0 = Conv2d(node1_0, 4, 4, self.df_dim * 4, 2, 2, name='d_n1.0/conv2d3', reuse=if_reuse) + node1_0 = conv_batch_normalization(node1_0, 'd_n1.0/batch_norm2', is_training=training, reuse=if_reuse) + node1_0 = Conv2d(node1_0, 4, 4, self.df_dim * 8, 2, 2, name='d_n1.0/conv2d4', reuse=if_reuse) + node1_0 = conv_batch_normalization(node1_0, 'd_n1.0/batch_norm3', is_training=training, reuse=if_reuse) + + node1_1 = Conv2d(node1_0, 1, 1, self.df_dim * 2, 1, 1, name='d_n1.1/conv2d', reuse=if_reuse) + node1_1 = conv_batch_normalization(node1_1, 'd_n1.1/batch_norm', is_training=training, + activation_fn=tf.nn.leaky_relu, reuse=if_reuse) + node1_1 = Conv2d(node1_1, 3, 3, self.df_dim * 2, 1, 1, name='d_n1.1/conv2d2', reuse=if_reuse) + node1_1 = conv_batch_normalization(node1_1, 'd_n1.1/batch_norm2', is_training=training, + activation_fn=tf.nn.leaky_relu, reuse=if_reuse) + node1_1 = Conv2d(node1_1, 3, 3, self.df_dim * 8, 1, 1, name='d_n1.1/conv2d3', reuse=if_reuse) + node1_1 = conv_batch_normalization(node1_1, 'd_n1.1/batch_norm3', is_training=training, reuse=if_reuse) + + node1 = add([node1_0, node1_1], name='d_n1_res/add') + node1 = tf.nn.leaky_relu(node1) return node1 - def d_encode_image_simple(self): - template = \ - (pt.template("input"). - custom_conv2d(self.df_dim, k_h=4, k_w=4). - apply(leaky_rectify, leakiness=0.2). - custom_conv2d(self.df_dim * 2, k_h=4, k_w=4). - conv_batch_norm(). - apply(leaky_rectify, leakiness=0.2). - custom_conv2d(self.df_dim * 4, k_h=4, k_w=4). - conv_batch_norm(). - apply(leaky_rectify, leakiness=0.2). - custom_conv2d(self.df_dim * 8, k_h=4, k_w=4). - conv_batch_norm(). - apply(leaky_rectify, leakiness=0.2)) + def d_encode_image_simple(self, training=True, inputs=None, if_reuse=None): + template = Conv2d(inputs, 4, 4, self.df_dim, 2, 2, activation_fn=tf.nn.leaky_relu, name='d_template/conv2d', + reuse=if_reuse) + template = Conv2d(template, 4, 4, self.df_dim * 2, 2, 2, name='d_template/conv2d2', reuse=if_reuse) + template = conv_batch_normalization(template, 'd_template/batch_norm', is_training=training, + activation_fn=tf.nn.leaky_relu, reuse=if_reuse) + template = Conv2d(template, 4, 4, self.df_dim * 4, 2, 2, name='d_template/conv2d3', reuse=if_reuse) + template = conv_batch_normalization(template, 'd_template/batch_norm2', is_training=training, + activation_fn=tf.nn.leaky_relu, reuse=if_reuse) + template = Conv2d(template, 4, 4, self.df_dim * 8, 2, 2, name='d_template/conv2d4', reuse=if_reuse) + template = conv_batch_normalization(template, 'd_template/batch_norm3', is_training=training, + activation_fn=tf.nn.leaky_relu, reuse=if_reuse) return template - def discriminator(self): - template = \ - (pt.template("input"). # 128*9*4*4 - custom_conv2d(self.df_dim * 8, k_h=1, k_w=1, d_h=1, d_w=1). # 128*8*4*4 - conv_batch_norm(). - apply(leaky_rectify, leakiness=0.2). - # custom_fully_connected(1)) - custom_conv2d(1, k_h=self.s16, k_w=self.s16, d_h=self.s16, d_w=self.s16)) + def discriminator(self, training=True, inputs=None, if_reuse=None): + template = Conv2d(inputs, 1, 1, self.df_dim * 8, 1, 1, name='d_template/conv2d', reuse=if_reuse) + template = conv_batch_normalization(template, 'd_template/batch_norm', is_training=training, + activation_fn=tf.nn.leaky_relu, reuse=if_reuse) + template = Conv2d(template, self.s16, self.s16, 1, self.s16, self.s16, name='d_template/conv2d2', + reuse=if_reuse) return template - def get_discriminator(self, x_var, c_var): - x_code = self.d_encode_img_template.construct(input=x_var) - c_code = self.d_context_template.construct(input=c_var) - c_code = tf.expand_dims(tf.expand_dims(c_code, 1), 1) - c_code = tf.tile(c_code, [1, self.s16, self.s16, 1]) + # Since D is only used during training, we build a template + # for safe reuse the variables during computing loss for fake/real/wrong images + # We do not do this for G, + # because batch_norm needs different options for training and testing + def get_discriminator(self, x_var, c_var, is_training, no_reuse=None): + if cfg.GAN.NETWORK_TYPE == "default": + x_code = self.d_encode_image(training=is_training, inputs=x_var, if_reuse=no_reuse) + c_code = self.context_embedding(inputs=c_var, if_reuse=no_reuse) + c_code = tf.expand_dims(tf.expand_dims(c_code, 1), 1) + c_code = tf.tile(c_code, [1, self.s16, self.s16, 1]) + x_c_code = tf.concat([x_code, c_code], 3) + + return self.discriminator(training=is_training, inputs=x_c_code, if_reuse=no_reuse) + + elif cfg.GAN.NETWORK_TYPE == "simple": + x_code = self.d_encode_image_simple(training=is_training, inputs=x_var, if_reuse=no_reuse) + c_code = self.context_embedding(inputs=c_var, if_reuse=no_reuse) + c_code = tf.expand_dims(tf.expand_dims(c_code, 1), 1) + c_code = tf.tile(c_code, [1, self.s16, self.s16, 1]) + x_c_code = tf.concat([x_code, c_code], 3) - x_c_code = tf.concat(3, [x_code, c_code]) - return self.discriminator_template.construct(input=x_c_code) + return self.discriminator(training=is_training, inputs=x_c_code, if_reuse=no_reuse) + else: + raise NotImplementedError diff --git a/stageI/run_exp.py b/stageI/run_exp.py index 8535fea..4076f2f 100644 --- a/stageI/run_exp.py +++ b/stageI/run_exp.py @@ -7,21 +7,22 @@ import argparse import pprint -from misc.datasets import TextDataset -from stageI.model import CondGAN -from stageI.trainer import CondGANTrainer -from misc.utils import mkdir_p -from misc.config import cfg, cfg_from_file +import sys +sys.path.append('./misc') +sys.path.append('stageI') + +from datasets import TextDataset +from model import CondGAN +from trainer import CondGANTrainer +from utils import mkdir_p +from config import cfg, cfg_from_file +from tensorflow.contrib.framework.python.framework import checkpoint_utils def parse_args(): parser = argparse.ArgumentParser(description='Train a GAN network') - parser.add_argument('--cfg', dest='cfg_file', - help='optional config file', - default=None, type=str) - parser.add_argument('--gpu', dest='gpu_id', - help='GPU device id to use [0]', - default=-1, type=int) + parser.add_argument('--cfg', dest='cfg_file', help='optional config file', default=None, type=str) + parser.add_argument('--gpu', dest='gpu_id', help='GPU device id to use [0]', default=-1, type=int) # if len(sys.argv) == 1: # parser.print_help() # sys.exit(1) @@ -48,25 +49,23 @@ def parse_args(): filename_train = '%s/train' % (datadir) dataset.train = dataset.get_data(filename_train) - ckt_logs_dir = "ckt_logs/%s/%s_%s" % \ - (cfg.DATASET_NAME, cfg.CONFIG_NAME, timestamp) + ckt_logs_dir = "ckt_logs/%s/%s_%s" % (cfg.DATASET_NAME, cfg.CONFIG_NAME, timestamp) mkdir_p(ckt_logs_dir) else: s_tmp = cfg.TRAIN.PRETRAINED_MODEL ckt_logs_dir = s_tmp[:s_tmp.find('.ckpt')] - model = CondGAN( - image_shape=dataset.image_shape - ) + model = CondGAN(image_shape=dataset.image_shape) + + algo = CondGANTrainer(model=model, dataset=dataset, ckt_logs_dir=ckt_logs_dir) - algo = CondGANTrainer( - model=model, - dataset=dataset, - ckt_logs_dir=ckt_logs_dir - ) if cfg.TRAIN.FLAG: algo.train() else: + # var_list = checkpoint_utils.list_variables(cfg.TRAIN.PRETRAINED_MODEL) + # # var_list = checkpoint_utils.list_variables('models/birds_model_164000.ckpt') + # for v in var_list: + # print(v) ''' For every input text embedding/sentence in the training and test datasets, generate cfg.TRAIN.NUM_COPY images with randomness from noise z and conditioning augmentation.''' diff --git a/stageI/trainer.py b/stageI/trainer.py index 001666a..f2750c1 100644 --- a/stageI/trainer.py +++ b/stageI/trainer.py @@ -1,18 +1,20 @@ from __future__ import division from __future__ import print_function -import prettytensor as pt import tensorflow as tf import numpy as np -import scipy.misc import os -import sys from six.moves import range from progressbar import ETA, Bar, Percentage, ProgressBar +# > Python3 +import imageio + +import sys +sys.path.append('./misc') -from misc.config import cfg -from misc.utils import mkdir_p +from config import cfg +from utils import mkdir_p, convert_to_uint8 TINY = 1e-8 @@ -26,12 +28,7 @@ def KL_loss(mu, log_sigma): class CondGANTrainer(object): - def __init__(self, - model, - dataset=None, - exp_name="model", - ckt_logs_dir="ckt_logs", - ): + def __init__(self, model, dataset=None, exp_name="model", ckt_logs_dir="ckt_logs",): """ :type model: RegularizedGAN """ @@ -48,28 +45,18 @@ def __init__(self, self.log_vars = [] + tf.reset_default_graph() + def build_placeholder(self): - '''Helper function for init_opt''' - self.images = tf.placeholder( - tf.float32, [self.batch_size] + self.dataset.image_shape, - name='real_images') - self.wrong_images = tf.placeholder( - tf.float32, [self.batch_size] + self.dataset.image_shape, - name='wrong_images' - ) - self.embeddings = tf.placeholder( - tf.float32, [self.batch_size] + self.dataset.embedding_shape, - name='conditional_embeddings' - ) - - self.generator_lr = tf.placeholder( - tf.float32, [], - name='generator_learning_rate' - ) - self.discriminator_lr = tf.placeholder( - tf.float32, [], - name='discriminator_learning_rate' - ) + ''' Helper function for init_opt ''' + self.images = tf.placeholder(tf.float32, [self.batch_size] + self.dataset.image_shape, name='real_images') + self.wrong_images = tf.placeholder(tf.float32, [self.batch_size] + self.dataset.image_shape, + name='wrong_images') + self.embeddings = tf.placeholder(tf.float32, [self.batch_size] + self.dataset.embedding_shape, + name='conditional_embeddings') + + self.generator_lr = tf.placeholder(tf.float32, [], name='generator_learning_rate') + self.discriminator_lr = tf.placeholder(tf.float32, [], name='discriminator_learning_rate') def sample_encoded_context(self, embeddings): '''Helper function for init_opt''' @@ -91,21 +78,19 @@ def sample_encoded_context(self, embeddings): def init_opt(self): self.build_placeholder() - with pt.defaults_scope(phase=pt.Phase.train): - with tf.variable_scope("g_net"): - # ####get output from G network################################ - c, kl_loss = self.sample_encoded_context(self.embeddings) - z = tf.random_normal([self.batch_size, cfg.Z_DIM]) - self.log_vars.append(("hist_c", c)) - self.log_vars.append(("hist_z", z)) - fake_images = self.model.get_generator(tf.concat(1, [c, z])) + with tf.variable_scope("g_net"): + # ####get output from G network################################ + c, kl_loss = self.sample_encoded_context(self.embeddings) + z = tf.random_normal([self.batch_size, cfg.Z_DIM]) + self.log_vars.append(("hist_c", c)) + self.log_vars.append(("hist_z", z)) + + fake_images = self.model.get_generator(tf.concat([c, z], 1), True) # training = True + with tf.variable_scope("d_net"): #, reuse=tf.AUTO_REUSE): # For training # ####get discriminator_loss and generator_loss ################### - discriminator_loss, generator_loss =\ - self.compute_losses(self.images, - self.wrong_images, - fake_images, - self.embeddings) + discriminator_loss, generator_loss = self.compute_losses(self.images, self.wrong_images, fake_images, + self.embeddings) generator_loss += kl_loss self.log_vars.append(("g_loss_kl_loss", kl_loss)) self.log_vars.append(("g_loss", generator_loss)) @@ -116,11 +101,11 @@ def init_opt(self): # #######define self.g_sum, self.d_sum,....######################## self.define_summaries() - with pt.defaults_scope(phase=pt.Phase.test): - with tf.variable_scope("g_net", reuse=True): - self.sampler() - self.visualization(cfg.TRAIN.NUM_COPY) - print("success") + with tf.variable_scope("g_net", reuse=True): + self.sampler() + self.visualization(cfg.TRAIN.NUM_COPY) + print("success") + def sampler(self): c, _ = self.sample_encoded_context(self.embeddings) @@ -128,62 +113,45 @@ def sampler(self): z = tf.zeros([self.batch_size, cfg.Z_DIM]) # Expect similar BGs else: z = tf.random_normal([self.batch_size, cfg.Z_DIM]) - self.fake_images = self.model.get_generator(tf.concat(1, [c, z])) + self.fake_images = self.model.get_generator(tf.concat([c, z], 1), False) def compute_losses(self, images, wrong_images, fake_images, embeddings): - real_logit = self.model.get_discriminator(images, embeddings) - wrong_logit = self.model.get_discriminator(wrong_images, embeddings) - fake_logit = self.model.get_discriminator(fake_images, embeddings) + real_logit = self.model.get_discriminator(images, embeddings, True) + wrong_logit = self.model.get_discriminator(wrong_images, embeddings, True, no_reuse=tf.AUTO_REUSE) + fake_logit = self.model.get_discriminator(fake_images, embeddings, True, no_reuse=tf.AUTO_REUSE) - real_d_loss =\ - tf.nn.sigmoid_cross_entropy_with_logits(real_logit, - tf.ones_like(real_logit)) + real_d_loss = tf.nn.sigmoid_cross_entropy_with_logits(logits=real_logit, labels=tf.ones_like(real_logit)) real_d_loss = tf.reduce_mean(real_d_loss) - wrong_d_loss =\ - tf.nn.sigmoid_cross_entropy_with_logits(wrong_logit, - tf.zeros_like(wrong_logit)) + wrong_d_loss = tf.nn.sigmoid_cross_entropy_with_logits(logits=wrong_logit, labels=tf.zeros_like(wrong_logit)) wrong_d_loss = tf.reduce_mean(wrong_d_loss) - fake_d_loss =\ - tf.nn.sigmoid_cross_entropy_with_logits(fake_logit, - tf.zeros_like(fake_logit)) + fake_d_loss = tf.nn.sigmoid_cross_entropy_with_logits(logits=fake_logit, labels=tf.zeros_like(fake_logit)) fake_d_loss = tf.reduce_mean(fake_d_loss) if cfg.TRAIN.B_WRONG: - discriminator_loss =\ - real_d_loss + (wrong_d_loss + fake_d_loss) / 2. + discriminator_loss = real_d_loss + (wrong_d_loss + fake_d_loss) / 2. self.log_vars.append(("d_loss_wrong", wrong_d_loss)) else: discriminator_loss = real_d_loss + fake_d_loss self.log_vars.append(("d_loss_real", real_d_loss)) self.log_vars.append(("d_loss_fake", fake_d_loss)) - generator_loss = \ - tf.nn.sigmoid_cross_entropy_with_logits(fake_logit, - tf.ones_like(fake_logit)) + generator_loss = tf.nn.sigmoid_cross_entropy_with_logits(logits=fake_logit, labels=tf.ones_like(fake_logit)) generator_loss = tf.reduce_mean(generator_loss) return discriminator_loss, generator_loss def prepare_trainer(self, generator_loss, discriminator_loss): - '''Helper function for init_opt''' + ''' Helper function for init_opt ''' all_vars = tf.trainable_variables() - g_vars = [var for var in all_vars if - var.name.startswith('g_')] - d_vars = [var for var in all_vars if - var.name.startswith('d_')] - - generator_opt = tf.train.AdamOptimizer(self.generator_lr, - beta1=0.5) - self.generator_trainer =\ - pt.apply_optimizer(generator_opt, - losses=[generator_loss], - var_list=g_vars) - discriminator_opt = tf.train.AdamOptimizer(self.discriminator_lr, - beta1=0.5) - self.discriminator_trainer =\ - pt.apply_optimizer(discriminator_opt, - losses=[discriminator_loss], - var_list=d_vars) + g_vars = [var for var in all_vars if var.name.startswith('g_')] + d_vars = [var for var in all_vars if var.name.startswith('d_')] + + generator_opt = tf.train.AdamOptimizer(self.generator_lr, beta1=0.5) + self.generator_trainer = generator_opt.minimize(generator_loss, var_list=g_vars) + + discriminator_opt = tf.train.AdamOptimizer(self.discriminator_lr, beta1=0.5) + + self.discriminator_trainer = discriminator_opt.minimize(discriminator_loss, var_list=d_vars) self.log_vars.append(("g_learning_rate", self.generator_lr)) self.log_vars.append(("d_learning_rate", self.discriminator_lr)) @@ -192,15 +160,15 @@ def define_summaries(self): all_sum = {'g': [], 'd': [], 'hist': []} for k, v in self.log_vars: if k.startswith('g'): - all_sum['g'].append(tf.scalar_summary(k, v)) + all_sum['g'].append(tf.summary.scalar(k, v)) elif k.startswith('d'): - all_sum['d'].append(tf.scalar_summary(k, v)) + all_sum['d'].append(tf.summary.scalar(k, v)) elif k.startswith('hist'): - all_sum['hist'].append(tf.histogram_summary(k, v)) + all_sum['hist'].append(tf.summary.histogram(k, v)) - self.g_sum = tf.merge_summary(all_sum['g']) - self.d_sum = tf.merge_summary(all_sum['d']) - self.hist_sum = tf.merge_summary(all_sum['hist']) + self.g_sum = tf.summary.merge(all_sum['g']) + self.d_sum = tf.summary.merge(all_sum['d']) + self.hist_sum = tf.summary.merge(all_sum['hist']) def visualize_one_superimage(self, img_var, images, rows, filename): stacked_img = [] @@ -210,22 +178,18 @@ def visualize_one_superimage(self, img_var, images, rows, filename): for col in range(rows): row_img.append(img_var[row * rows + col, :, :, :]) # each rows is 1realimage +10_fakeimage - stacked_img.append(tf.concat(1, row_img)) - imgs = tf.expand_dims(tf.concat(0, stacked_img), 0) - current_img_summary = tf.image_summary(filename, imgs) + stacked_img.append(tf.concat(row_img, 1)) + imgs = tf.expand_dims(tf.concat(stacked_img, 0), 0) + current_img_summary = tf.summary.image(filename, imgs) return current_img_summary, imgs def visualization(self, n): - fake_sum_train, superimage_train = \ - self.visualize_one_superimage(self.fake_images[:n * n], - self.images[:n * n], - n, "train") - fake_sum_test, superimage_test = \ - self.visualize_one_superimage(self.fake_images[n * n:2 * n * n], - self.images[n * n:2 * n * n], - n, "test") - self.superimages = tf.concat(0, [superimage_train, superimage_test]) - self.image_summary = tf.merge_summary([fake_sum_train, fake_sum_test]) + fake_sum_train, superimage_train = self.visualize_one_superimage(self.fake_images[:n * n], self.images[:n * n], + n, "train") + fake_sum_test, superimage_test = self.visualize_one_superimage(self.fake_images[n * n:2 * n * n], + self.images[n * n:2 * n * n], n, "test") + self.superimages = tf.concat([superimage_train, superimage_test], 0) + self.image_summary = tf.summary.merge([fake_sum_train, fake_sum_test]) def preprocess(self, x, n): # make sure every row with n column have the same embeddings @@ -235,33 +199,29 @@ def preprocess(self, x, n): return x def epoch_sum_images(self, sess, n): - images_train, _, embeddings_train, captions_train, _ =\ - self.dataset.train.next_batch(n * n, cfg.TRAIN.NUM_EMBEDDING) + images_train, _, embeddings_train, captions_train, _ = self.dataset.train.next_batch(n * n, + cfg.TRAIN.NUM_EMBEDDING) images_train = self.preprocess(images_train, n) embeddings_train = self.preprocess(embeddings_train, n) - images_test, _, embeddings_test, captions_test, _ = \ - self.dataset.test.next_batch(n * n, 1) + images_test, _, embeddings_test, captions_test, _ = self.dataset.test.next_batch(n * n, 1) images_test = self.preprocess(images_test, n) embeddings_test = self.preprocess(embeddings_test, n) images = np.concatenate([images_train, images_test], axis=0) - embeddings =\ - np.concatenate([embeddings_train, embeddings_test], axis=0) + embeddings = np.concatenate([embeddings_train, embeddings_test], axis=0) if self.batch_size > 2 * n * n: - images_pad, _, embeddings_pad, _, _ =\ - self.dataset.test.next_batch(self.batch_size - 2 * n * n, 1) + images_pad, _, embeddings_pad, _, _ = self.dataset.test.next_batch(self.batch_size - 2 * n * n, 1) images = np.concatenate([images, images_pad], axis=0) embeddings = np.concatenate([embeddings, embeddings_pad], axis=0) feed_dict = {self.images: images, self.embeddings: embeddings} - gen_samples, img_summary =\ - sess.run([self.superimages, self.image_summary], feed_dict) + gen_samples, img_summary = sess.run([self.superimages, self.image_summary], feed_dict) # save images generated for train and test captions - scipy.misc.imsave('%s/train.jpg' % (self.log_dir), gen_samples[0]) - scipy.misc.imsave('%s/test.jpg' % (self.log_dir), gen_samples[1]) + imageio.imwrite('%s/train.jpg' % (self.log_dir), convert_to_uint8(gen_samples[0])) + imageio.imwrite('%s/test.jpg' % (self.log_dir), convert_to_uint8(gen_samples[1])) # pfi_train = open(self.log_dir + "/train.txt", "w") pfi_test = open(self.log_dir + "/test.txt", "w") @@ -278,11 +238,12 @@ def epoch_sum_images(self, sess, n): def build_model(self, sess): self.init_opt() - sess.run(tf.initialize_all_variables()) + sess.run(tf.global_variables_initializer()) if len(self.model_path) > 0: print("Reading model parameters from %s" % self.model_path) - restore_vars = tf.all_variables() + restore_vars = tf.global_variables() + # restore_vars = tf.all_variables() # all_vars = tf.all_variables() # restore_vars = [var for var in all_vars if # var.name.startswith('g_') or @@ -301,15 +262,14 @@ def build_model(self, sess): def train(self): config = tf.ConfigProto(allow_soft_placement=True) + config.gpu_options.per_process_gpu_memory_fraction = 0.7 with tf.Session(config=config) as sess: with tf.device("/gpu:%d" % cfg.GPU_ID): counter = self.build_model(sess) - saver = tf.train.Saver(tf.all_variables(), - keep_checkpoint_every_n_hours=2) + saver = tf.train.Saver(tf.global_variables(), keep_checkpoint_every_n_hours=2) # summary_op = tf.merge_all_summaries() - summary_writer = tf.train.SummaryWriter(self.log_dir, - sess.graph) + summary_writer = tf.summary.FileWriter(self.log_dir, sess.graph) keys = ["d_loss", "g_loss"] log_vars = [] @@ -327,10 +287,8 @@ def train(self): updates_per_epoch = int(number_example / self.batch_size) epoch_start = int(counter / updates_per_epoch) for epoch in range(epoch_start, self.max_epoch): - widgets = ["epoch #%d|" % epoch, - Percentage(), Bar(), ETA()] - pbar = ProgressBar(maxval=updates_per_epoch, - widgets=widgets) + widgets = ["epoch #%d|" % epoch, Percentage(), Bar(), ETA()] + pbar = ProgressBar(maxval=updates_per_epoch, widgets=widgets) pbar.start() if epoch % lr_decay_step == 0 and epoch != 0: @@ -341,8 +299,7 @@ def train(self): for i in range(updates_per_epoch): pbar.update(i) # training d - images, wrong_images, embeddings, _, _ =\ - self.dataset.train.next_batch(self.batch_size, + images, wrong_images, embeddings, _, _ = self.dataset.train.next_batch(self.batch_size, num_embedding) feed_dict = {self.images: images, self.wrong_images: wrong_images, @@ -351,28 +308,19 @@ def train(self): self.discriminator_lr: discriminator_lr } # train d - feed_out = [self.discriminator_trainer, - self.d_sum, - self.hist_sum, - log_vars] - _, d_sum, hist_sum, log_vals = sess.run(feed_out, - feed_dict) + feed_out = [self.discriminator_trainer, self.d_sum, self.hist_sum, log_vars] + _, d_sum, hist_sum, log_vals = sess.run(feed_out, feed_dict) summary_writer.add_summary(d_sum, counter) summary_writer.add_summary(hist_sum, counter) all_log_vals.append(log_vals) # train g - feed_out = [self.generator_trainer, - self.g_sum] - _, g_sum = sess.run(feed_out, - feed_dict) + feed_out = [self.generator_trainer, self.g_sum] + _, g_sum = sess.run(feed_out, feed_dict) summary_writer.add_summary(g_sum, counter) # save checkpoint counter += 1 if counter % self.snapshot_interval == 0: - snapshot_path = "%s/%s_%s.ckpt" %\ - (self.checkpoint_dir, - self.exp_name, - str(counter)) + snapshot_path = "%s/%s_%s.ckpt" % (self.checkpoint_dir, self.exp_name, str(counter)) fn = saver.save(sess, snapshot_path) print("Model saved in file: %s" % fn) @@ -385,21 +333,17 @@ def train(self): dic_logs[k] = v # print(k, v) - log_line = "; ".join("%s: %s" % - (str(k), str(dic_logs[k])) - for k in dic_logs) + log_line = "; ".join("%s: %s" % (str(k), str(dic_logs[k])) for k in dic_logs) print("Epoch %d | " % (epoch) + log_line) sys.stdout.flush() if np.any(np.isnan(avg_log_vals)): raise ValueError("NaN detected!") - def save_super_images(self, images, sample_batchs, filenames, - sentenceID, save_dir, subset): + def save_super_images(self, images, sample_batchs, filenames, sentenceID, save_dir, subset): # batch_size samples for each embedding numSamples = len(sample_batchs) for j in range(len(filenames)): - s_tmp = '%s-1real-%dsamples/%s/%s' %\ - (save_dir, numSamples, subset, filenames[j]) + s_tmp = '%s-1real-%dsamples/%s/%s' % (save_dir, numSamples, subset, filenames[j]) folder = s_tmp[:s_tmp.rfind('/')] if not os.path.isdir(folder): print('Make a new folder: ', folder) @@ -411,42 +355,40 @@ def save_super_images(self, images, sample_batchs, filenames, superimage = np.concatenate(superimage, axis=1) fullpath = '%s_sentence%d.jpg' % (s_tmp, sentenceID) - scipy.misc.imsave(fullpath, superimage) + imageio.imwrite(fullpath, convert_to_uint8(superimage)) def eval_one_dataset(self, sess, dataset, save_dir, subset='train'): count = 0 print('num_examples:', dataset._num_examples) while count < dataset._num_examples: start = count % dataset._num_examples - images, embeddings_batchs, filenames, _ =\ - dataset.next_batch_test(self.batch_size, start, 1) + images, embeddings_batchs, filenames, _ = dataset.next_batch_test(self.batch_size, start, 1) print('count = ', count, 'start = ', start) for i in range(len(embeddings_batchs)): samples_batchs = [] # Generate up to 16 images for each sentence, # with randomness from noise z and conditioning augmentation. for j in range(np.minimum(16, cfg.TRAIN.NUM_COPY)): - samples = sess.run(self.fake_images, - {self.embeddings: embeddings_batchs[i]}) + samples = sess.run(self.fake_images, {self.embeddings: embeddings_batchs[i]}) samples_batchs.append(samples) - self.save_super_images(images, samples_batchs, - filenames, i, save_dir, - subset) + self.save_super_images(images, samples_batchs, filenames, i, save_dir, subset) count += self.batch_size def evaluate(self): config = tf.ConfigProto(allow_soft_placement=True) + config.gpu_options.per_process_gpu_memory_fraction = 0.7 with tf.Session(config=config) as sess: with tf.device("/gpu:%d" % cfg.GPU_ID): if self.model_path.find('.ckpt') != -1: self.init_opt() print("Reading model parameters from %s" % self.model_path) - saver = tf.train.Saver(tf.all_variables()) + saver = tf.train.Saver(tf.global_variables()) + print(tf.global_variables()) saver.restore(sess, self.model_path) - # self.eval_one_dataset(sess, self.dataset.train, - # self.log_dir, subset='train') - self.eval_one_dataset(sess, self.dataset.test, - self.log_dir, subset='test') + + # self.eval_one_dataset(sess, self.dataset.train, self.log_dir, subset='train') + + self.eval_one_dataset(sess, self.dataset.test, self.log_dir, subset='test') else: print("Input a valid model path.") diff --git a/stageII/cfg/birds.yml b/stageII/cfg/birds.yml index 4e3ce8d..5d2db8a 100644 --- a/stageII/cfg/birds.yml +++ b/stageII/cfg/birds.yml @@ -7,11 +7,11 @@ Z_DIM: 100 TRAIN: FLAG: True - PRETRAINED_MODEL: './ckt_logs/birds/stageI/model_82000.ckpt' - PRETRAINED_EPOCH: 600 + PRETRAINED_MODEL: './ckt_logs/birds/stageI_2019_07_09_19_42_16/model_12000.ckpt' #'' + PRETRAINED_EPOCH: 100 # 600 BATCH_SIZE: 64 NUM_COPY: 4 - MAX_EPOCH: 1200 + MAX_EPOCH: 200 # 1200 SNAPSHOT_INTERVAL: 2000 DISCRIMINATOR_LR: 0.0002 GENERATOR_LR: 0.0002 diff --git a/stageII/model.py b/stageII/model.py index 28aee30..18f2bb0 100644 --- a/stageII/model.py +++ b/stageII/model.py @@ -1,11 +1,13 @@ from __future__ import division from __future__ import print_function -import prettytensor as pt import tensorflow as tf -import misc.custom_ops -from misc.custom_ops import leaky_rectify -from misc.config import cfg +import sys + +sys.path.append('./misc') + +from custom_ops import fc, conv_batch_normalization, fc_batch_normalization, reshape, Conv2d, Deconv2d, UpSample, add +from config import cfg # TODO: Does template.constrct() really shared the computation # when multipel times of construct are done @@ -22,300 +24,263 @@ def __init__(self, lr_imsize, hr_lr_ratio): self.s = lr_imsize print('lr_imsize: ', lr_imsize) - self.s2, self.s4, self.s8, self.s16 = \ - int(self.s / 2), int(self.s / 4), int(self.s / 8), int(self.s / 16) - if cfg.GAN.NETWORK_TYPE == "default": - with tf.variable_scope("d_net"): - self.d_context_template = self.context_embedding() - self.d_image_template = self.d_encode_image() - self.d_discriminator_template = self.discriminator() - - with tf.variable_scope("hr_d_net"): - self.hr_d_context_template = self.context_embedding() - self.hr_d_image_template = self.hr_d_encode_image() - self.hr_discriminator_template = self.discriminator() - else: - raise NotImplementedError + self.s2, self.s4, self.s8, self.s16 = int(self.s / 2), int(self.s / 4), int(self.s / 8), int(self.s / 16) # conditioning augmentation structure for text embedding # are shared by g and hr_g # g and hr_g build this structure separately and do not share parameters def generate_condition(self, c_var): - conditions =\ - (pt.wrap(c_var). - flatten(). - custom_fully_connected(self.ef_dim * 2). - apply(leaky_rectify, leakiness=0.2)) + conditions = fc(c_var, self.ef_dim * 2, 'gen_cond/fc', activation_fn=tf.nn.leaky_relu) mean = conditions[:, :self.ef_dim] log_sigma = conditions[:, self.ef_dim:] return [mean, log_sigma] # stage I generator (g) - def generator(self, z_var): - node1_0 =\ - (pt.wrap(z_var). - flatten(). - custom_fully_connected(self.s16 * self.s16 * self.gf_dim * 8). - fc_batch_norm(). - reshape([-1, self.s16, self.s16, self.gf_dim * 8])) - node1_1 = \ - (node1_0. - custom_conv2d(self.gf_dim * 2, k_h=1, k_w=1, d_h=1, d_w=1). - conv_batch_norm(). - apply(tf.nn.relu). - custom_conv2d(self.gf_dim * 2, k_h=3, k_w=3, d_h=1, d_w=1). - conv_batch_norm(). - apply(tf.nn.relu). - custom_conv2d(self.gf_dim * 8, k_h=3, k_w=3, d_h=1, d_w=1). - conv_batch_norm()) - node1 = \ - (node1_0. - apply(tf.add, node1_1). - apply(tf.nn.relu)) - - node2_0 = \ - (node1. - # custom_deconv2d([0, self.s8, self.s8, self.gf_dim * 4], k_h=4, k_w=4). - apply(tf.image.resize_nearest_neighbor, [self.s8, self.s8]). - custom_conv2d(self.gf_dim * 4, k_h=3, k_w=3, d_h=1, d_w=1). - conv_batch_norm()) - node2_1 = \ - (node2_0. - custom_conv2d(self.gf_dim * 1, k_h=1, k_w=1, d_h=1, d_w=1). - conv_batch_norm(). - apply(tf.nn.relu). - custom_conv2d(self.gf_dim * 1, k_h=3, k_w=3, d_h=1, d_w=1). - conv_batch_norm(). - apply(tf.nn.relu). - custom_conv2d(self.gf_dim * 4, k_h=3, k_w=3, d_h=1, d_w=1). - conv_batch_norm()) - node2 = \ - (node2_0. - apply(tf.add, node2_1). - apply(tf.nn.relu)) - - output_tensor = \ - (node2. - # custom_deconv2d([0, self.s4, self.s4, self.gf_dim * 2], k_h=4, k_w=4). - apply(tf.image.resize_nearest_neighbor, [self.s4, self.s4]). - custom_conv2d(self.gf_dim * 2, k_h=3, k_w=3, d_h=1, d_w=1). - conv_batch_norm(). - apply(tf.nn.relu). - # custom_deconv2d([0, self.s2, self.s2, self.gf_dim], k_h=4, k_w=4). - apply(tf.image.resize_nearest_neighbor, [self.s2, self.s2]). - custom_conv2d(self.gf_dim, k_h=3, k_w=3, d_h=1, d_w=1). - conv_batch_norm(). - apply(tf.nn.relu). - # custom_deconv2d([0] + list(self.image_shape), k_h=4, k_w=4). - apply(tf.image.resize_nearest_neighbor, [self.s, self.s]). - custom_conv2d(3, k_h=3, k_w=3, d_h=1, d_w=1). - apply(tf.nn.tanh)) + def generator(self, z_var, training=True): + node1_0 = fc(z_var, self.s16 * self.s16 * self.gf_dim * 8, 'g_n1.0/fc') + node1_0 = fc_batch_normalization(node1_0, 'g_n1.0/batch_norm') + node1_0 = reshape(node1_0, [-1, self.s16, self.s16, self.gf_dim * 8], name='g_n1.0/reshape') + + node1_1 = Conv2d(node1_0, 1, 1, self.gf_dim * 2, 1, 1, name='g_n1.1/conv2d') + node1_1 = conv_batch_normalization(node1_1, 'g_n1.1/batch_norm_1', activation_fn=tf.nn.relu, + is_training=training) + node1_1 = Conv2d(node1_1, 3, 3, self.gf_dim * 2, 1, 1, name='g_n1.1/conv2d2') + node1_1 = conv_batch_normalization(node1_1, 'g_n1.1/batch_norm_2', activation_fn=tf.nn.relu, + is_training=training) + node1_1 = Conv2d(node1_1, 3, 3, self.gf_dim * 8, 1, 1, name='g_n1.1/conv2d3') + node1_1 = conv_batch_normalization(node1_1, 'g_n1.1/batch_norm_3', activation_fn=tf.nn.relu, + is_training=training) + + node1 = add([node1_0, node1_1], name='g_n1_res/add') + node1_output = tf.nn.relu(node1) + + node2_0 = UpSample(node1_output, size=[self.s8, self.s8], method=1, align_corners=False, name='g_n2.0/upsample') + node2_0 = Conv2d(node2_0, 3, 3, self.gf_dim * 4, 1, 1, name='g_n2.0/conv2d') + node2_0 = conv_batch_normalization(node2_0, 'g_n2.0/batch_norm', is_training=training) + + node2_1 = Conv2d(node2_0, 1, 1, self.gf_dim * 1, 1, 1, name='g_n2.1/conv2d') + node2_1 = conv_batch_normalization(node2_1, 'g_n2.1/batch_norm', activation_fn=tf.nn.relu, + is_training=training) + node2_1 = Conv2d(node2_1, 3, 3, self.gf_dim * 1, 1, 1, name='g_n2.1/conv2d2') + node2_1 = conv_batch_normalization(node2_1, 'g_n2.1/batch_norm2', activation_fn=tf.nn.relu, + is_training=training) + node2_1 = Conv2d(node2_1, 3, 3, self.gf_dim * 4, 1, 1, name='g_n2.1/conv2d3') + node2_1 = conv_batch_normalization(node2_1, 'g_n2.1/batch_norm3', is_training=training) + + node2 = add([node2_0, node2_1], name='g_n2_res/add') + node2_output = tf.nn.relu(node2) + + output_tensor = UpSample(node2_output, size=[self.s4, self.s4], method=1, align_corners=False, + name='g_OT/upsample') + output_tensor = Conv2d(output_tensor, 3, 3, self.gf_dim * 2, 1, 1, name='g_OT/conv2d') + output_tensor = conv_batch_normalization(output_tensor, 'g_OT/batch_norm', activation_fn=tf.nn.relu, + is_training=training) + output_tensor = UpSample(output_tensor, size=[self.s2, self.s2], method=1, align_corners=False, + name='g_OT/upsample2') + output_tensor = Conv2d(output_tensor, 3, 3, self.gf_dim, 1, 1, name='g_OT/conv2d2') + output_tensor = conv_batch_normalization(output_tensor, 'g_OT/batch_norm2', activation_fn=tf.nn.relu, + is_training=training) + output_tensor = UpSample(output_tensor, size=[self.s, self.s], method=1, align_corners=False, + name='g_OT/upsample3') + output_tensor = Conv2d(output_tensor, 3, 3, 3, 1, 1, activation_fn=tf.nn.tanh, name='g_OT/conv2d3') + return output_tensor - def get_generator(self, z_var): + def get_generator(self, z_var, is_training): if cfg.GAN.NETWORK_TYPE == "default": - return self.generator(z_var) + return self.generator(z_var, training=is_training) else: raise NotImplementedError # stage II generator (hr_g) - def residual_block(self, x_c_code): - node0_0 = pt.wrap(x_c_code) # -->s4 * s4 * gf_dim * 4 - node0_1 = \ - (pt.wrap(x_c_code). # -->s4 * s4 * gf_dim * 4 - custom_conv2d(self.gf_dim * 4, k_h=3, k_w=3, d_h=1, d_w=1). - conv_batch_norm(). - apply(tf.nn.relu). - custom_conv2d(self.gf_dim * 4, k_h=3, k_w=3, d_h=1, d_w=1). - conv_batch_norm()) - output_tensor = \ - (node0_0. - apply(tf.add, node0_1). - apply(tf.nn.relu)) + def residual_block(self, x_c_code, name, training=True): + node0_0 = x_c_code # -->s4 * s4 * gf_dim * 4 + + node0_1 = Conv2d(x_c_code, 3, 3, self.gf_dim * 4, 1, 1, name=name+'/conv2d') + node0_1 = conv_batch_normalization(node0_1, name+'/batch_norm', is_training=training, + activation_fn=tf.nn.relu) + node0_1 = Conv2d(node0_1, 3, 3, self.gf_dim * 4, 1, 1, name=name+'/conv2d2') + node0_1 = conv_batch_normalization(node0_1, name+'/batch_norm2', is_training=training) + + output_tensor = add([node0_0, node0_1], name='resid_block/add') + output_tensor = tf.nn.relu(output_tensor) + return output_tensor - def hr_g_encode_image(self, x_var): - output_tensor = \ - (pt.wrap(x_var). # -->s * s * 3 - custom_conv2d(self.gf_dim, k_h=3, k_w=3, d_h=1, d_w=1). # s * s * gf_dim - apply(tf.nn.relu). - custom_conv2d(self.gf_dim * 2, k_h=4, k_w=4). # s2 * s2 * gf_dim * 2 - conv_batch_norm(). - apply(tf.nn.relu). - custom_conv2d(self.gf_dim * 4, k_h=4, k_w=4). # s4 * s4 * gf_dim * 4 - conv_batch_norm(). - apply(tf.nn.relu)) + def hr_g_encode_image(self, x_var, training=True): + # x_var: -->s * s * 3 + output_tensor = Conv2d(x_var, 3, 3, self.gf_dim, 1, 1, activation_fn=tf.nn.relu, name='hr_g_OT/conv2d') # s * s * gf_dim + output_tensor = Conv2d(output_tensor, 4, 4, self.gf_dim * 2, 2, 2, name='hr_g_OT/conv2d2') # s2 * s2 * gf_dim * 2 + output_tensor = conv_batch_normalization(output_tensor, 'hr_g_OT/batch_norm', is_training=training, + activation_fn=tf.nn.relu) + output_tensor = Conv2d(output_tensor, 4, 4, self.gf_dim * 4, 2, 2, name='hr_g_OT/conv2d3') # s4 * s4 * gf_dim * 4 + output_tensor = conv_batch_normalization(output_tensor, 'hr_g_OT/batch_norm2', is_training=training, + activation_fn=tf.nn.relu) return output_tensor - def hr_g_joint_img_text(self, x_c_code): - output_tensor = \ - (pt.wrap(x_c_code). # -->s4 * s4 * (ef_dim+gf_dim*4) - custom_conv2d(self.gf_dim * 4, k_h=3, k_w=3, d_h=1, d_w=1). # s4 * s4 * gf_dim * 4 - conv_batch_norm(). - apply(tf.nn.relu)) + def hr_g_joint_img_text(self, x_c_code, training=True): + # x_code: -->s4 * s4 * (ef_dim+gf_dim*4) + output_tensor = Conv2d(x_c_code, 3, 3, self.gf_dim * 4, 1, 1, name='hr_g_joint_OT/conv2d') # s4 * s4 * gf_dim * 4 + output_tensor = conv_batch_normalization(output_tensor, 'hr_g_joint_OT/batch_norm', is_training=training, + activation_fn=tf.nn.relu) return output_tensor - def hr_generator(self, x_c_code): - output_tensor = \ - (pt.wrap(x_c_code). # -->s4 * s4 * gf_dim*4 - # custom_deconv2d([0, self.s2, self.s2, self.gf_dim * 2], k_h=4, k_w=4). # -->s2 * s2 * gf_dim*2 - apply(tf.image.resize_nearest_neighbor, [self.s2, self.s2]). - custom_conv2d(self.gf_dim * 2, k_h=3, k_w=3, d_h=1, d_w=1). - conv_batch_norm(). - apply(tf.nn.relu). - # custom_deconv2d([0, self.s, self.s, self.gf_dim], k_h=4, k_w=4). # -->s * s * gf_dim - apply(tf.image.resize_nearest_neighbor, [self.s, self.s]). - custom_conv2d(self.gf_dim, k_h=3, k_w=3, d_h=1, d_w=1). - conv_batch_norm(). - apply(tf.nn.relu). - # custom_deconv2d([0, self.s * 2, self.s * 2, self.gf_dim // 2], k_h=4, k_w=4). # -->2s * 2s * gf_dim/2 - apply(tf.image.resize_nearest_neighbor, [self.s * 2, self.s * 2]). - custom_conv2d(self.gf_dim // 2, k_h=3, k_w=3, d_h=1, d_w=1). - conv_batch_norm(). - apply(tf.nn.relu). - # custom_deconv2d([0, self.s * 4, self.s * 4, self.gf_dim // 4], k_h=4, k_w=4). # -->4s * 4s * gf_dim//4 - apply(tf.image.resize_nearest_neighbor, [self.s * 4, self.s * 4]). - custom_conv2d(self.gf_dim // 4, k_h=3, k_w=3, d_h=1, d_w=1). - conv_batch_norm(). - apply(tf.nn.relu). - custom_conv2d(3, k_h=3, k_w=3, d_h=1, d_w=1). # -->4s * 4s * 3 - apply(tf.nn.tanh)) + def hr_generator(self, x_c_code, training=True): + # Inputs: -->s4 * s4 * gf_dim*4 + output_tensor = UpSample(x_c_code, size=[self.s2, self.s2], method=1, align_corners=False, + name='hr_gen/upsample') + output_tensor = Conv2d(output_tensor, 3, 3, self.gf_dim * 2, 1, 1, name='hr_gen/conv2d') + output_tensor = conv_batch_normalization(output_tensor, 'hr_gen/batch_norm', is_training=training, + activation_fn=tf.nn.relu) + output_tensor = UpSample(output_tensor, size=[self.s, self.s], method=1, align_corners=False, + name='hr_gen/upsample2') + output_tensor = Conv2d(output_tensor, 3, 3, self.gf_dim, 1, 1, name='hr_gen/conv2d2') + output_tensor = conv_batch_normalization(output_tensor, 'hr_gen/batch_norm2', is_training=training, + activation_fn=tf.nn.relu) + output_tensor = UpSample(output_tensor, size=[self.s * 2, self.s * 2], method=1, align_corners=False, + name='hr_gen/upsample3') + output_tensor = Conv2d(output_tensor, 3, 3, self.gf_dim//2, 1, 1, name='hr_gen/conv2d3') + output_tensor = conv_batch_normalization(output_tensor, 'hr_gen/batch_norm3', is_training=training, + activation_fn=tf.nn.relu) + output_tensor = UpSample(output_tensor, size=[self.s * 4, self.s * 4], method=1, align_corners=False, + name='hr_gen/upsample3') + output_tensor = Conv2d(output_tensor, 3, 3, self.gf_dim//4, 1, 1, name='hr_gen/conv2d4') + output_tensor = conv_batch_normalization(output_tensor, 'hr_gen/batch_norm4', is_training=training, + activation_fn=tf.nn.relu) + output_tensor = Conv2d(output_tensor, 3, 3, 3, 1, 1, name='hr_gen/conv2d5', activation_fn=tf.nn.tanh) # -->4s * 4s * 3 return output_tensor - def hr_get_generator(self, x_var, c_code): + def hr_get_generator(self, x_var, c_code, is_training): if cfg.GAN.NETWORK_TYPE == "default": # image x_var: self.s * self.s *3 - x_code = self.hr_g_encode_image(x_var) # -->s4 * s4 * gf_dim * 4 + x_code = self.hr_g_encode_image(x_var, training=is_training) # -->s4 * s4 * gf_dim * 4 # text c_code: ef_dim c_code = tf.expand_dims(tf.expand_dims(c_code, 1), 1) c_code = tf.tile(c_code, [1, self.s4, self.s4, 1]) # combine both --> s4 * s4 * (ef_dim+gf_dim*4) - x_c_code = tf.concat(3, [x_code, c_code]) + x_c_code = tf.concat([x_code, c_code], 3) # Joint learning from text and image -->s4 * s4 * gf_dim * 4 node0 = self.hr_g_joint_img_text(x_c_code) - node1 = self.residual_block(node0) - node2 = self.residual_block(node1) - node3 = self.residual_block(node2) - node4 = self.residual_block(node3) + node1 = self.residual_block(node0, 'node1_resid_block', training=is_training) + node2 = self.residual_block(node1, 'node2_resid_block', training=is_training) + node3 = self.residual_block(node2, 'node3_resid_block', training=is_training) + node4 = self.residual_block(node3, 'node4_resid_block', training=is_training) # Up-sampling - return self.hr_generator(node4) # -->4s * 4s * 3 + return self.hr_generator(node4, training=is_training) # -->4s * 4s * 3 else: raise NotImplementedError # structure shared by d and hr_d # d and hr_d build this structure separately and do not share parameters - def context_embedding(self): - template = (pt.template("input"). - custom_fully_connected(self.ef_dim). - apply(leaky_rectify, leakiness=0.2)) + def context_embedding(self, inputs=None, if_reuse=None): + template = fc(inputs, self.ef_dim, 'd_embedd/fc', activation_fn=tf.nn.leaky_relu, reuse=if_reuse) + return template - def discriminator(self): - template = \ - (pt.template("input"). # s16 * s16 * 128*9 - custom_conv2d(self.df_dim * 8, k_h=1, k_w=1, d_h=1, d_w=1). # s16 * s16 * 128*8 - conv_batch_norm(). - apply(leaky_rectify, leakiness=0.2). - # custom_fully_connected(1)) - custom_conv2d(1, k_h=self.s16, k_w=self.s16, d_h=self.s16, d_w=self.s16)) + def discriminator(self, training=True, inputs=None, if_reuse=None): + template = Conv2d(inputs, 1, 1, self.df_dim * 8, 1, 1, name='d_template/conv2d', reuse=if_reuse) + template = conv_batch_normalization(template, 'd_template/batch_norm', is_training=training, + activation_fn=tf.nn.leaky_relu, reuse=if_reuse) + template = Conv2d(template, self.s16, self.s16, 1, self.s16, self.s16, name='d_template/conv2d2', + reuse=if_reuse) return template # d-net - def d_encode_image(self): - node1_0 = \ - (pt.template("input"). # s * s * 3 - custom_conv2d(self.df_dim, k_h=4, k_w=4). # s2 * s2 * df_dim - apply(leaky_rectify, leakiness=0.2). - custom_conv2d(self.df_dim * 2, k_h=4, k_w=4). # s4 * s4 * df_dim*2 - conv_batch_norm(). - apply(leaky_rectify, leakiness=0.2). - custom_conv2d(self.df_dim * 4, k_h=4, k_w=4). # s8 * s8 * df_dim*4 - conv_batch_norm(). - custom_conv2d(self.df_dim * 8, k_h=4, k_w=4). # s16 * s16 * df_dim*8 - conv_batch_norm()) - node1_1 = \ - (node1_0. - custom_conv2d(self.df_dim * 2, k_h=1, k_w=1, d_h=1, d_w=1). - conv_batch_norm(). - apply(leaky_rectify, leakiness=0.2). - custom_conv2d(self.df_dim * 2, k_h=3, k_w=3, d_h=1, d_w=1). - conv_batch_norm(). - apply(leaky_rectify, leakiness=0.2). - custom_conv2d(self.df_dim * 8, k_h=3, k_w=3, d_h=1, d_w=1). - conv_batch_norm()) - - node1 = \ - (node1_0. - apply(tf.add, node1_1). - apply(leaky_rectify, leakiness=0.2)) + def d_encode_image(self, inputs=None, training=True, if_reuse=None): + # input: s * s * 3 + node1_0 = Conv2d(inputs, 4, 4, self.df_dim, 2, 2, activation_fn=tf.nn.leaky_relu, name='d_n1.0/conv2d', + reuse=if_reuse) # s2 * s2 * df_dim + node1_0 = Conv2d(node1_0, 4, 4, self.df_dim * 2, 2, 2, name='d_n1.0/conv2d2', reuse=if_reuse) # s4 * s4 * df_dim*2 + node1_0 = conv_batch_normalization(node1_0, 'd_n1.0/batch_norm', is_training=training, + activation_fn=tf.nn.leaky_relu, reuse=if_reuse) + node1_0 = Conv2d(node1_0, 4, 4, self.df_dim * 4, 2, 2, name='d_n1.0/conv2d3', reuse=if_reuse) # s8 * s8 * df_dim*4 + node1_0 = conv_batch_normalization(node1_0, 'd_n1.0/batch_norm2', is_training=training, reuse=if_reuse) + node1_0 = Conv2d(node1_0, 4, 4, self.df_dim * 8, 2, 2, name='d_n1.0/conv2d4', reuse=if_reuse) # s16 * s16 * df_dim*8 + node1_0 = conv_batch_normalization(node1_0, 'd_n1.0/batch_norm3', is_training=training, reuse=if_reuse) + + node1_1 = Conv2d(node1_0, 1, 1, self.df_dim * 2, 1, 1, name='d_n1.1/conv2d', reuse=if_reuse) + node1_1 = conv_batch_normalization(node1_1, 'd_n1.1/batch_norm', is_training=training, + activation_fn=tf.nn.leaky_relu, reuse=if_reuse) + node1_1 = Conv2d(node1_1, 3, 3, self.df_dim * 2, 1, 1, name='d_n1.1/conv2d2', reuse=if_reuse) + node1_1 = conv_batch_normalization(node1_1, 'd_n1.1/batch_norm2', is_training=training, + activation_fn=tf.nn.leaky_relu, reuse=if_reuse) + node1_1 = Conv2d(node1_1, 3, 3, self.df_dim * 8, 1, 1, name='d_n1.1/conv2d3', reuse=if_reuse) + node1_1 = conv_batch_normalization(node1_1, 'd_n1.1/batch_norm3', is_training=training, reuse=if_reuse) + + node1 = add([node1_0, node1_1], name='d_n1/add') + node1 = tf.nn.leaky_relu(node1) return node1 - def get_discriminator(self, x_var, c_var): - x_code = self.d_image_template.construct(input=x_var) # s16 * s16 * df_dim*8 + def get_discriminator(self, x_var, c_var, is_training, no_reuse=None): + if cfg.GAN.NETWORK_TYPE == "default": + x_code = self.d_encode_image(training=is_training, inputs=x_var, if_reuse=no_reuse) # s16 * s16 * df_dim*8 - c_code = self.d_context_template.construct(input=c_var) - c_code = tf.expand_dims(tf.expand_dims(c_code, 1), 1) - c_code = tf.tile(c_code, [1, self.s16, self.s16, 1]) # s16 * s16 * ef_dim + c_code = self.context_embedding(inputs=c_var, if_reuse=no_reuse) + c_code = tf.expand_dims(tf.expand_dims(c_code, 1), 1) + c_code = tf.tile(c_code, [1, self.s16, self.s16, 1]) # s16 * s16 * ef_dim - x_c_code = tf.concat(3, [x_code, c_code]) - return self.d_discriminator_template.construct(input=x_c_code) + x_c_code = tf.concat([x_code, c_code], 3) + return self.discriminator(training=is_training, inputs=x_c_code, if_reuse=no_reuse) + else: + raise NotImplementedError # hr_d_net - def hr_d_encode_image(self): - node1_0 = \ - (pt.template("input"). # 4s * 4s * 3 - custom_conv2d(self.df_dim, k_h=4, k_w=4). # 2s * 2s * df_dim - apply(leaky_rectify, leakiness=0.2). - custom_conv2d(self.df_dim * 2, k_h=4, k_w=4). # s * s * df_dim*2 - conv_batch_norm(). - apply(leaky_rectify, leakiness=0.2). - custom_conv2d(self.df_dim * 4, k_h=4, k_w=4). # s2 * s2 * df_dim*4 - conv_batch_norm(). - apply(leaky_rectify, leakiness=0.2). - custom_conv2d(self.df_dim * 8, k_h=4, k_w=4). # s4 * s4 * df_dim*8 - conv_batch_norm(). - apply(leaky_rectify, leakiness=0.2). - custom_conv2d(self.df_dim * 16, k_h=4, k_w=4). # s8 * s8 * df_dim*16 - conv_batch_norm(). - apply(leaky_rectify, leakiness=0.2). - custom_conv2d(self.df_dim * 32, k_h=4, k_w=4). # s16 * s16 * df_dim*32 - conv_batch_norm(). - apply(leaky_rectify, leakiness=0.2). - custom_conv2d(self.df_dim * 16, k_h=1, k_w=1, d_h=1, d_w=1). # s16 * s16 * df_dim*16 - conv_batch_norm(). - apply(leaky_rectify, leakiness=0.2). - custom_conv2d(self.df_dim * 8, k_h=1, k_w=1, d_h=1, d_w=1). # s16 * s16 * df_dim*8 - conv_batch_norm()) - node1_1 = \ - (node1_0. - custom_conv2d(self.df_dim * 2, k_h=1, k_w=1, d_h=1, d_w=1). - conv_batch_norm(). - apply(leaky_rectify, leakiness=0.2). - custom_conv2d(self.df_dim * 2, k_h=3, k_w=3, d_h=1, d_w=1). - conv_batch_norm(). - apply(leaky_rectify, leakiness=0.2). - custom_conv2d(self.df_dim * 8, k_h=3, k_w=3, d_h=1, d_w=1). - conv_batch_norm()) - - node1 = \ - (node1_0. - apply(tf.add, node1_1). - apply(leaky_rectify, leakiness=0.2)) + def hr_d_encode_image(self, inputs=None, training=True, if_reuse=None): + # input: 4s * 4s * 3 + node1_0 = Conv2d(inputs, 4, 4, self.df_dim, 2, 2, activation_fn=tf.nn.leaky_relu, + name='hr_d_encode_n1.0/conv2d1', reuse=if_reuse) # 2s * 2s * df_dim + node1_0 = Conv2d(node1_0, 4, 4, self.df_dim * 2, 2, 2, name='hr_d_encode_n1.0/conv2d2', reuse=if_reuse) # s * s * df_dim*2 + node1_0 = conv_batch_normalization(node1_0, 'hr_d_encode_n1.0/batch_norm', is_training=training, + activation_fn=tf.nn.leaky_relu, reuse=if_reuse) + node1_0 = Conv2d(node1_0, 4, 4, self.df_dim * 4, 2, 2, name='hr_d_encode_n1.0/conv2d3', reuse=if_reuse) # s2 * s2 * df_dim*4 + node1_0 = conv_batch_normalization(node1_0, 'hr_d_encode_n1.0/batch_norm2', is_training=training, + activation_fn=tf.nn.leaky_relu, reuse=if_reuse) + node1_0 = Conv2d(node1_0, 4, 4, self.df_dim * 8, 2, 2, name='hr_d_encode_n1.0/conv2d4', reuse=if_reuse) # s4 * s4 * df_dim*8 + node1_0 = conv_batch_normalization(node1_0, 'hr_d_encode_n1.0/batch_norm3', is_training=training, + activation_fn=tf.nn.leaky_relu, reuse=if_reuse) + node1_0 = Conv2d(node1_0, 4, 4, self.df_dim * 16, 2, 2, name='hr_d_encode_n1.0/conv2d5', reuse=if_reuse) # s8 * s8 * df_dim*16 + node1_0 = conv_batch_normalization(node1_0, 'hr_d_encode_n1.0/batch_norm4', is_training=training, + activation_fn=tf.nn.leaky_relu, reuse=if_reuse) + node1_0 = Conv2d(node1_0, 4, 4, self.df_dim * 32, 2, 2, name='hr_d_encode_n1.0/conv2d6', reuse=if_reuse) # s16 * s16 * df_dim*32 + node1_0 = conv_batch_normalization(node1_0, 'hr_d_encode_n1.0/batch_norm5', is_training=training, + activation_fn=tf.nn.leaky_relu, reuse=if_reuse) + node1_0 = Conv2d(node1_0, 1, 1, self.df_dim * 16, 1, 1, name='hr_d_encode_n1.0/conv2d7', reuse=if_reuse) # s16 * s16 * df_dim*16 + node1_0 = conv_batch_normalization(node1_0, 'hr_d_encode_n1.0/batch_norm6', is_training=training, + activation_fn=tf.nn.leaky_relu, reuse=if_reuse) + node1_0 = Conv2d(node1_0, 1, 1, self.df_dim * 8, 1, 1, name='hr_d_encode_n1.0/conv2d8', reuse=if_reuse) # s16 * s16 * df_dim*8 + node1_0 = conv_batch_normalization(node1_0, 'hr_d_encode_n1.0/batch_norm7', is_training=training, + reuse=if_reuse) + + node1_1 = Conv2d(node1_0, 1, 1, self.df_dim * 2, 1, 1, name='hr_d_encode_n1.1/conv2d', reuse=if_reuse) + node1_1 = conv_batch_normalization(node1_1, 'hr_d_encode_n1.1/batch_norm', is_training=training, + activation_fn=tf.nn.leaky_relu, reuse=if_reuse) + node1_1 = Conv2d(node1_1, 3, 3, self.df_dim * 2, 1, 1, name='hr_d_encode_n1.1/conv2d2', reuse=if_reuse) + node1_1 = conv_batch_normalization(node1_1, 'hr_d_encode_n1.1/batch_norm2', is_training=training, + activation_fn=tf.nn.leaky_relu, reuse=if_reuse) + node1_1 = Conv2d(node1_1, 3, 3, self.df_dim * 8, 1, 1, name='hr_d_encode_n1.1/conv2d3', reuse=if_reuse) + node1_1 = conv_batch_normalization(node1_1, 'hr_d_encode_n1.1/batch_norm3', is_training=training, + reuse=if_reuse) + + node1 = add([node1_0, node1_1], name='hr_d_encode_n1/add') + node1 = tf.nn.leaky_relu(node1) return node1 - def hr_get_discriminator(self, x_var, c_var): - x_code = self.hr_d_image_template.construct(input=x_var) # s16 * s16 * df_dim*8 + def hr_get_discriminator(self, x_var, c_var, is_training, no_reuse=None): + if cfg.GAN.NETWORK_TYPE == "default": + x_code = self.hr_d_encode_image(training=is_training, inputs=x_var, if_reuse=no_reuse) # s16 * s16 * df_dim*8 - c_code = self.hr_d_context_template.construct(input=c_var) - c_code = tf.expand_dims(tf.expand_dims(c_code, 1), 1) - c_code = tf.tile(c_code, [1, self.s16, self.s16, 1]) # s16 * s16 * ef_dim + c_code = self.context_embedding(inputs=c_var, if_reuse=no_reuse) + c_code = tf.expand_dims(tf.expand_dims(c_code, 1), 1) + c_code = tf.tile(c_code, [1, self.s16, self.s16, 1]) # s16 * s16 * ef_dim - x_c_code = tf.concat(3, [x_code, c_code]) - return self.hr_discriminator_template.construct(input=x_c_code) + x_c_code = tf.concat([x_code, c_code], 3) + return self.discriminator(training=is_training, inputs=x_c_code, if_reuse=no_reuse) + else: + raise NotImplementedError diff --git a/stageII/run_exp.py b/stageII/run_exp.py index 1ab9a7d..0a23b50 100644 --- a/stageII/run_exp.py +++ b/stageII/run_exp.py @@ -8,21 +8,23 @@ import argparse import pprint -from misc.datasets import TextDataset -from stageII.model import CondGAN -from stageII.trainer import CondGANTrainer -from misc.utils import mkdir_p -from misc.config import cfg, cfg_from_file +import sys +sys.path.append('./misc') +sys.path.append('stageII') + +from datasets import TextDataset +from model import CondGAN +from trainer import CondGANTrainer +from utils import mkdir_p +from config import cfg, cfg_from_file + +from tensorflow.contrib.framework.python.framework import checkpoint_utils def parse_args(): parser = argparse.ArgumentParser(description='Train a GAN network') - parser.add_argument('--cfg', dest='cfg_file', - help='optional config file', - default=None, type=str) - parser.add_argument('--gpu', dest='gpu_id', - help='GPU device id to use [0]', - default=-1, type=int) + parser.add_argument('--cfg', dest='cfg_file', help='optional config file', default=None, type=str) + parser.add_argument('--gpu', dest='gpu_id', help='GPU device id to use [0]', default=-1, type=int) # if len(sys.argv) == 1: # parser.print_help() # sys.exit(1) @@ -49,25 +51,20 @@ def parse_args(): if cfg.TRAIN.FLAG: filename_train = '%s/train' % (datadir) dataset.train = dataset.get_data(filename_train) - ckt_logs_dir = "ckt_logs/%s/%s_%s" % \ - (cfg.DATASET_NAME, cfg.CONFIG_NAME, timestamp) + ckt_logs_dir = "ckt_logs/%s/%s_%s" % (cfg.DATASET_NAME, cfg.CONFIG_NAME, timestamp) mkdir_p(ckt_logs_dir) else: s_tmp = cfg.TRAIN.PRETRAINED_MODEL ckt_logs_dir = s_tmp[:s_tmp.find('.ckpt')] + model = CondGAN(lr_imsize=int(dataset.image_shape[0] / dataset.hr_lr_ratio), hr_lr_ratio=dataset.hr_lr_ratio) - model = CondGAN( - lr_imsize=int(dataset.image_shape[0] / dataset.hr_lr_ratio), - hr_lr_ratio=dataset.hr_lr_ratio - ) - - algo = CondGANTrainer( - model=model, - dataset=dataset, - ckt_logs_dir=ckt_logs_dir - ) + algo = CondGANTrainer(model=model, dataset=dataset, ckt_logs_dir=ckt_logs_dir) if cfg.TRAIN.FLAG: + var_list = checkpoint_utils.list_variables(cfg.TRAIN.PRETRAINED_MODEL) + # var_list = checkpoint_utils.list_variables('models/birds_model_164000.ckpt') + for v in var_list: + print(v) algo.train() else: ''' For every input text embedding/sentence in the diff --git a/stageII/trainer.py b/stageII/trainer.py index 457405c..8691796 100644 --- a/stageII/trainer.py +++ b/stageII/trainer.py @@ -1,19 +1,22 @@ from __future__ import division from __future__ import print_function -import prettytensor as pt +# import prettytensor as pt import tensorflow as tf import numpy as np -import scipy.misc +import imageio +# import scipy.misc import os -import sys from six.moves import range from progressbar import ETA, Bar, Percentage, ProgressBar from PIL import Image, ImageDraw, ImageFont +import sys +sys.path.append('./misc') -from misc.config import cfg -from misc.utils import mkdir_p +from config import cfg +from utils import mkdir_p +from skimage.transform import resize TINY = 1e-8 @@ -27,12 +30,7 @@ def KL_loss(mu, log_sigma): class CondGANTrainer(object): - def __init__(self, - model, - dataset=None, - exp_name="model", - ckt_logs_dir="ckt_logs", - ): + def __init__(self, model, dataset=None, exp_name="model", ckt_logs_dir="ckt_logs"): """ :type model: RegularizedGAN """ @@ -51,39 +49,24 @@ def __init__(self, self.hr_image_shape = self.dataset.image_shape ratio = self.dataset.hr_lr_ratio - self.lr_image_shape = [int(self.hr_image_shape[0] / ratio), - int(self.hr_image_shape[1] / ratio), + self.lr_image_shape = [int(self.hr_image_shape[0] / ratio), int(self.hr_image_shape[1] / ratio), self.hr_image_shape[2]] print('hr_image_shape', self.hr_image_shape) print('lr_image_shape', self.lr_image_shape) def build_placeholder(self): '''Helper function for init_opt''' - self.hr_images = tf.placeholder( - tf.float32, [self.batch_size] + self.hr_image_shape, - name='real_hr_images') - self.hr_wrong_images = tf.placeholder( - tf.float32, [self.batch_size] + self.hr_image_shape, - name='wrong_hr_images' - ) - self.embeddings = tf.placeholder( - tf.float32, [self.batch_size] + self.dataset.embedding_shape, - name='conditional_embeddings' - ) - - self.generator_lr = tf.placeholder( - tf.float32, [], - name='generator_learning_rate' - ) - self.discriminator_lr = tf.placeholder( - tf.float32, [], - name='discriminator_learning_rate' - ) + self.hr_images = tf.placeholder(tf.float32, [self.batch_size] + self.hr_image_shape, name='real_hr_images') + self.hr_wrong_images = tf.placeholder(tf.float32, [self.batch_size] + self.hr_image_shape, + name='wrong_hr_images') + self.embeddings = tf.placeholder(tf.float32, [self.batch_size] + self.dataset.embedding_shape, + name='conditional_embeddings') + + self.generator_lr = tf.placeholder(tf.float32, [], name='generator_learning_rate') + self.discriminator_lr = tf.placeholder(tf.float32, [], name='discriminator_learning_rate') # - self.images = tf.image.resize_bilinear(self.hr_images, - self.lr_image_shape[:2]) - self.wrong_images = tf.image.resize_bilinear(self.hr_wrong_images, - self.lr_image_shape[:2]) + self.images = tf.image.resize_bilinear(self.hr_images, self.lr_image_shape[:2]) + self.wrong_images = tf.image.resize_bilinear(self.hr_wrong_images, self.lr_image_shape[:2]) def sample_encoded_context(self, embeddings): '''Helper function for init_opt''' @@ -107,95 +90,71 @@ def sample_encoded_context(self, embeddings): def init_opt(self): self.build_placeholder() - with pt.defaults_scope(phase=pt.Phase.train): - # ####get output from G network#################################### - with tf.variable_scope("g_net"): - c, kl_loss = self.sample_encoded_context(self.embeddings) - z = tf.random_normal([self.batch_size, cfg.Z_DIM]) - self.log_vars.append(("hist_c", c)) - self.log_vars.append(("hist_z", z)) - fake_images = self.model.get_generator(tf.concat(1, [c, z])) - + # ####get output from G network#################################### + with tf.variable_scope("g_net"): + c, kl_loss = self.sample_encoded_context(self.embeddings) + z = tf.random_normal([self.batch_size, cfg.Z_DIM]) + self.log_vars.append(("hist_c", c)) + self.log_vars.append(("hist_z", z)) + fake_images = self.model.get_generator(tf.concat([c, z], 1), True) + with tf.variable_scope("d_net"): # ####get discriminator_loss and generator_loss ################### - discriminator_loss, generator_loss =\ - self.compute_losses(self.images, - self.wrong_images, - fake_images, - self.embeddings, - flag='lr') + discriminator_loss, generator_loss = self.compute_losses(self.images, self.wrong_images, fake_images, + self.embeddings, flag='lr') generator_loss += kl_loss self.log_vars.append(("g_loss_kl_loss", kl_loss)) self.log_vars.append(("g_loss", generator_loss)) self.log_vars.append(("d_loss", discriminator_loss)) - # #### For hr_g and hr_d ######################################### - with tf.variable_scope("hr_g_net"): - hr_c, hr_kl_loss = self.sample_encoded_context(self.embeddings) - self.log_vars.append(("hist_hr_c", hr_c)) - hr_fake_images = self.model.hr_get_generator(fake_images, hr_c) + # #### For hr_g and hr_d ######################################### + with tf.variable_scope("hr_g_net"): + hr_c, hr_kl_loss = self.sample_encoded_context(self.embeddings) + self.log_vars.append(("hist_hr_c", hr_c)) + hr_fake_images = self.model.hr_get_generator(fake_images, hr_c, True) + + with tf.variable_scope("hr_d_net"): # get losses - hr_discriminator_loss, hr_generator_loss =\ - self.compute_losses(self.hr_images, - self.hr_wrong_images, - hr_fake_images, - self.embeddings, - flag='hr') + hr_discriminator_loss, hr_generator_loss = self.compute_losses(self.hr_images, self.hr_wrong_images, + hr_fake_images, self.embeddings, flag='hr') hr_generator_loss += hr_kl_loss self.log_vars.append(("hr_g_loss", hr_generator_loss)) self.log_vars.append(("hr_d_loss", hr_discriminator_loss)) # #######define self.g_sum, self.d_sum,....######################## - self.prepare_trainer(discriminator_loss, generator_loss, - hr_discriminator_loss, hr_generator_loss) + self.prepare_trainer(discriminator_loss, generator_loss, hr_discriminator_loss, hr_generator_loss) self.define_summaries() - with pt.defaults_scope(phase=pt.Phase.test): - self.sampler() - self.visualization(cfg.TRAIN.NUM_COPY) - print("success") + self.sampler() + self.visualization(cfg.TRAIN.NUM_COPY) + print("success") def sampler(self): with tf.variable_scope("g_net", reuse=True): c, _ = self.sample_encoded_context(self.embeddings) z = tf.random_normal([self.batch_size, cfg.Z_DIM]) - self.fake_images = self.model.get_generator(tf.concat(1, [c, z])) + self.fake_images = self.model.get_generator(tf.concat([c, z], 1), False) with tf.variable_scope("hr_g_net", reuse=True): hr_c, _ = self.sample_encoded_context(self.embeddings) - self.hr_fake_images =\ - self.model.hr_get_generator(self.fake_images, hr_c) + self.hr_fake_images = self.model.hr_get_generator(self.fake_images, hr_c, False) - def compute_losses(self, images, wrong_images, - fake_images, embeddings, flag='lr'): + def compute_losses(self, images, wrong_images, fake_images, embeddings, flag='lr'): if flag == 'lr': - real_logit =\ - self.model.get_discriminator(images, embeddings) - wrong_logit =\ - self.model.get_discriminator(wrong_images, embeddings) - fake_logit =\ - self.model.get_discriminator(fake_images, embeddings) + real_logit = self.model.get_discriminator(images, embeddings, True) + wrong_logit = self.model.get_discriminator(wrong_images, embeddings, True, no_reuse=tf.AUTO_REUSE) + fake_logit = self.model.get_discriminator(fake_images, embeddings, True, no_reuse=tf.AUTO_REUSE) else: - real_logit =\ - self.model.hr_get_discriminator(images, embeddings) - wrong_logit =\ - self.model.hr_get_discriminator(wrong_images, embeddings) - fake_logit =\ - self.model.hr_get_discriminator(fake_images, embeddings) - - real_d_loss =\ - tf.nn.sigmoid_cross_entropy_with_logits(real_logit, - tf.ones_like(real_logit)) + real_logit = self.model.hr_get_discriminator(images, embeddings, True) + wrong_logit = self.model.hr_get_discriminator(wrong_images, embeddings, True, no_reuse=tf.AUTO_REUSE) + fake_logit = self.model.hr_get_discriminator(fake_images, embeddings, True, no_reuse=tf.AUTO_REUSE) + + real_d_loss = tf.nn.sigmoid_cross_entropy_with_logits(logits=real_logit, labels=tf.ones_like(real_logit)) real_d_loss = tf.reduce_mean(real_d_loss) - wrong_d_loss =\ - tf.nn.sigmoid_cross_entropy_with_logits(wrong_logit, - tf.zeros_like(wrong_logit)) + wrong_d_loss = tf.nn.sigmoid_cross_entropy_with_logits(logits=wrong_logit, labels=tf.zeros_like(wrong_logit)) wrong_d_loss = tf.reduce_mean(wrong_d_loss) - fake_d_loss =\ - tf.nn.sigmoid_cross_entropy_with_logits(fake_logit, - tf.zeros_like(fake_logit)) + fake_d_loss = tf.nn.sigmoid_cross_entropy_with_logits(logits=fake_logit, labels=tf.zeros_like(fake_logit)) fake_d_loss = tf.reduce_mean(fake_d_loss) if cfg.TRAIN.B_WRONG: - discriminator_loss =\ - real_d_loss + (wrong_d_loss + fake_d_loss) / 2. + discriminator_loss = real_d_loss + (wrong_d_loss + fake_d_loss) / 2. else: discriminator_loss = real_d_loss + fake_d_loss if flag == 'lr': @@ -209,9 +168,7 @@ def compute_losses(self, images, wrong_images, if cfg.TRAIN.B_WRONG: self.log_vars.append(("hr_d_loss_wrong", wrong_d_loss)) - generator_loss = \ - tf.nn.sigmoid_cross_entropy_with_logits(fake_logit, - tf.ones_like(fake_logit)) + generator_loss = tf.nn.sigmoid_cross_entropy_with_logits(logits=fake_logit, labels=tf.ones_like(fake_logit)) generator_loss = tf.reduce_mean(generator_loss) if flag == 'lr': self.log_vars.append(("g_loss_fake", generator_loss)) @@ -223,37 +180,22 @@ def compute_losses(self, images, wrong_images, def define_one_trainer(self, loss, learning_rate, key_word): '''Helper function for init_opt''' all_vars = tf.trainable_variables() - tarin_vars = [var for var in all_vars if - var.name.startswith(key_word)] + tarin_vars = [var for var in all_vars if var.name.startswith(key_word)] opt = tf.train.AdamOptimizer(learning_rate, beta1=0.5) - trainer = pt.apply_optimizer(opt, losses=[loss], var_list=tarin_vars) + trainer = opt.minimize(loss, var_list=tarin_vars) return trainer - def prepare_trainer(self, discriminator_loss, generator_loss, - hr_discriminator_loss, hr_generator_loss): + def prepare_trainer(self, discriminator_loss, generator_loss, hr_discriminator_loss, hr_generator_loss): ft_lr_retio = cfg.TRAIN.FT_LR_RETIO - self.discriminator_trainer =\ - self.define_one_trainer(discriminator_loss, - self.discriminator_lr * ft_lr_retio, - 'd_') - self.generator_trainer =\ - self.define_one_trainer(generator_loss, - self.generator_lr * ft_lr_retio, - 'g_') - self.hr_discriminator_trainer =\ - self.define_one_trainer(hr_discriminator_loss, - self.discriminator_lr, - 'hr_d_') - self.hr_generator_trainer =\ - self.define_one_trainer(hr_generator_loss, - self.generator_lr, - 'hr_g_') - - self.ft_generator_trainer = \ - self.define_one_trainer(hr_generator_loss, - self.generator_lr * cfg.TRAIN.FT_LR_RETIO, - 'g_') + self.discriminator_trainer = self.define_one_trainer(discriminator_loss, self.discriminator_lr * ft_lr_retio, + 'd_') + self.generator_trainer = self.define_one_trainer(generator_loss, self.generator_lr * ft_lr_retio, 'g_') + self.hr_discriminator_trainer = self.define_one_trainer(hr_discriminator_loss, self.discriminator_lr, 'hr_d_') + self.hr_generator_trainer = self.define_one_trainer(hr_generator_loss, self.generator_lr, 'hr_g_') + + self.ft_generator_trainer = self.define_one_trainer(hr_generator_loss, + self.generator_lr * cfg.TRAIN.FT_LR_RETIO, 'g_') self.log_vars.append(("hr_d_learning_rate", self.discriminator_lr)) self.log_vars.append(("hr_g_learning_rate", self.generator_lr)) @@ -263,21 +205,21 @@ def define_summaries(self): all_sum = {'g': [], 'd': [], 'hr_g': [], 'hr_d': [], 'hist': []} for k, v in self.log_vars: if k.startswith('g'): - all_sum['g'].append(tf.scalar_summary(k, v)) + all_sum['g'].append(tf.summary.scalar(k, v)) elif k.startswith('d'): - all_sum['d'].append(tf.scalar_summary(k, v)) + all_sum['d'].append(tf.summary.scalar(k, v)) elif k.startswith('hr_g'): - all_sum['hr_g'].append(tf.scalar_summary(k, v)) + all_sum['hr_g'].append(tf.summary.scalar(k, v)) elif k.startswith('hr_d'): - all_sum['hr_d'].append(tf.scalar_summary(k, v)) + all_sum['hr_d'].append(tf.summary.scalar(k, v)) elif k.startswith('hist'): - all_sum['hist'].append(tf.histogram_summary(k, v)) + all_sum['hist'].append(tf.summary.histogram(k, v)) - self.g_sum = tf.merge_summary(all_sum['g']) - self.d_sum = tf.merge_summary(all_sum['d']) - self.hr_g_sum = tf.merge_summary(all_sum['hr_g']) - self.hr_d_sum = tf.merge_summary(all_sum['hr_d']) - self.hist_sum = tf.merge_summary(all_sum['hist']) + self.g_sum = tf.summary.merge(all_sum['g']) + self.d_sum = tf.summary.merge(all_sum['d']) + self.hr_g_sum = tf.summary.merge(all_sum['hr_g']) + self.hr_d_sum = tf.summary.merge(all_sum['hr_d']) + self.hist_sum = tf.summary.merge(all_sum['hist']) def visualize_one_superimage(self, img_var, images, rows, filename): stacked_img = [] @@ -287,35 +229,27 @@ def visualize_one_superimage(self, img_var, images, rows, filename): for col in range(rows): row_img.append(img_var[row * rows + col, :, :, :]) # each rows is 1realimage +10_fakeimage - stacked_img.append(tf.concat(1, row_img)) - imgs = tf.expand_dims(tf.concat(0, stacked_img), 0) - current_img_summary = tf.image_summary(filename, imgs) + stacked_img.append(tf.concat(row_img, 1)) + imgs = tf.expand_dims(tf.concat(stacked_img, 0), 0) + current_img_summary = tf.summary.image(filename, imgs) return current_img_summary, imgs def visualization(self, n): - fake_sum_train, superimage_train =\ - self.visualize_one_superimage(self.fake_images[:n * n], - self.images[:n * n], - n, "train") - fake_sum_test, superimage_test =\ - self.visualize_one_superimage(self.fake_images[n * n:2 * n * n], - self.images[n * n:2 * n * n], - n, "test") - self.superimages = tf.concat(0, [superimage_train, superimage_test]) - self.image_summary = tf.merge_summary([fake_sum_train, fake_sum_test]) - - hr_fake_sum_train, hr_superimage_train =\ - self.visualize_one_superimage(self.hr_fake_images[:n * n], - self.hr_images[:n * n, :, :, :], - n, "hr_train") - hr_fake_sum_test, hr_superimage_test =\ - self.visualize_one_superimage(self.hr_fake_images[n * n:2 * n * n], - self.hr_images[n * n:2 * n * n], - n, "hr_test") - self.hr_superimages =\ - tf.concat(0, [hr_superimage_train, hr_superimage_test]) - self.hr_image_summary =\ - tf.merge_summary([hr_fake_sum_train, hr_fake_sum_test]) + fake_sum_train, superimage_train = self.visualize_one_superimage(self.fake_images[:n * n], self.images[:n * n], + n, "train") + fake_sum_test, superimage_test = self.visualize_one_superimage(self.fake_images[n * n:2 * n * n], + self.images[n * n:2 * n * n], n, "test") + self.superimages = tf.concat([superimage_train, superimage_test], 0) + self.image_summary = tf.summary.merge([fake_sum_train, fake_sum_test]) + + hr_fake_sum_train, hr_superimage_train = self.visualize_one_superimage(self.hr_fake_images[:n * n], + self.hr_images[:n * n, :, :, :], n, + "hr_train") + hr_fake_sum_test, hr_superimage_test = self.visualize_one_superimage(self.hr_fake_images[n * n:2 * n * n], + self.hr_images[n * n:2 * n * n], n, + "hr_test") + self.hr_superimages = tf.concat([hr_superimage_train, hr_superimage_test], 0) + self.hr_image_summary = tf.summary.merge([hr_fake_sum_train, hr_fake_sum_test]) def preprocess(self, x, n): # make sure every row with n column have the same embeddings @@ -325,43 +259,33 @@ def preprocess(self, x, n): return x def epoch_sum_images(self, sess, n): - images_train, _, embeddings_train, captions_train, _ =\ - self.dataset.train.next_batch(n * n, cfg.TRAIN.NUM_EMBEDDING) + images_train, _, embeddings_train, captions_train, _ = self.dataset.train.next_batch(n * n, + cfg.TRAIN.NUM_EMBEDDING) images_train = self.preprocess(images_train, n) embeddings_train = self.preprocess(embeddings_train, n) - images_test, _, embeddings_test, captions_test, _ =\ - self.dataset.test.next_batch(n * n, 1) + images_test, _, embeddings_test, captions_test, _ = self.dataset.test.next_batch(n * n, 1) images_test = self.preprocess(images_test, n) embeddings_test = self.preprocess(embeddings_test, n) images = np.concatenate([images_train, images_test], axis=0) - embeddings =\ - np.concatenate([embeddings_train, embeddings_test], axis=0) + embeddings = np.concatenate([embeddings_train, embeddings_test], axis=0) if self.batch_size > 2 * n * n: - images_pad, _, embeddings_pad, _, _ =\ - self.dataset.test.next_batch(self.batch_size - 2 * n * n, 1) + images_pad, _, embeddings_pad, _, _ = self.dataset.test.next_batch(self.batch_size - 2 * n * n, 1) images = np.concatenate([images, images_pad], axis=0) embeddings = np.concatenate([embeddings, embeddings_pad], axis=0) - feed_out = [self.superimages, self.image_summary, - self.hr_superimages, self.hr_image_summary] - feed_dict = {self.hr_images: images, - self.embeddings: embeddings} - gen_samples, img_summary, hr_gen_samples, hr_img_summary =\ - sess.run(feed_out, feed_dict) + feed_out = [self.superimages, self.image_summary, self.hr_superimages, self.hr_image_summary] + feed_dict = {self.hr_images: images, self.embeddings: embeddings} + gen_samples, img_summary, hr_gen_samples, hr_img_summary = sess.run(feed_out, feed_dict) # save images generated for train and test captions - scipy.misc.imsave('%s/lr_fake_train.jpg' % - (self.log_dir), gen_samples[0]) - scipy.misc.imsave('%s/lr_fake_test.jpg' % - (self.log_dir), gen_samples[1]) + imageio.imwrite('%s/lr_fake_train.jpg' % (self.log_dir), gen_samples[0]) + imageio.imwrite('%s/lr_fake_test.jpg' % (self.log_dir), gen_samples[1]) # - scipy.misc.imsave('%s/hr_fake_train.jpg' % - (self.log_dir), hr_gen_samples[0]) - scipy.misc.imsave('%s/hr_fake_test.jpg' % - (self.log_dir), hr_gen_samples[1]) + imageio.imwrite('%s/hr_fake_train.jpg' % (self.log_dir), hr_gen_samples[0]) + imageio.imwrite('%s/hr_fake_test.jpg' % (self.log_dir), hr_gen_samples[1]) # pfi_train = open(self.log_dir + "/train.txt", "w") pfi_test = open(self.log_dir + "/test.txt", "w") @@ -378,17 +302,15 @@ def epoch_sum_images(self, sess, n): def build_model(self, sess): self.init_opt() - - sess.run(tf.initialize_all_variables()) + sess.run(tf.global_variables_initializer()) if len(self.model_path) > 0: print("Reading model parameters from %s" % self.model_path) all_vars = tf.trainable_variables() - # all_vars = tf.all_variables() - restore_vars = [] - for var in all_vars: - if var.name.startswith('g_') or var.name.startswith('d_'): - restore_vars.append(var) - # print(var.name) + restore_vars = [var for var in all_vars if var.name.startswith('g_') or var.name.startswith('d_')] + # restore_vars = [] + # for var in all_vars: + # if var.name.startswith('g_') or var.name.startswith('d_'): + # restore_vars.append(var) saver = tf.train.Saver(restore_vars) saver.restore(sess, self.model_path) @@ -401,33 +323,24 @@ def build_model(self, sess): counter = 0 return counter - def train_one_step(self, generator_lr, - discriminator_lr, - counter, summary_writer, log_vars, sess): + def train_one_step(self, generator_lr, discriminator_lr, counter, summary_writer, log_vars, sess): # training d - hr_images, hr_wrong_images, embeddings, _, _ =\ - self.dataset.train.next_batch(self.batch_size, - cfg.TRAIN.NUM_EMBEDDING) + hr_images, hr_wrong_images, embeddings, _, _ = self.dataset.train.next_batch(self.batch_size, + cfg.TRAIN.NUM_EMBEDDING) feed_dict = {self.hr_images: hr_images, self.hr_wrong_images: hr_wrong_images, self.embeddings: embeddings, self.generator_lr: generator_lr, - self.discriminator_lr: discriminator_lr - } + self.discriminator_lr: discriminator_lr} if cfg.TRAIN.FINETUNE_LR: # train d1 - feed_out_d = [self.hr_discriminator_trainer, - self.hr_d_sum, - log_vars, - self.hist_sum] + feed_out_d = [self.hr_discriminator_trainer, self.hr_d_sum, log_vars, self.hist_sum] ret_list = sess.run(feed_out_d, feed_dict) summary_writer.add_summary(ret_list[1], counter) log_vals = ret_list[2] summary_writer.add_summary(ret_list[3], counter) # train g1 and finetune g0 with the loss of g1 - feed_out_g = [self.hr_generator_trainer, - self.ft_generator_trainer, - self.hr_g_sum] + feed_out_g = [self.hr_generator_trainer, self.ft_generator_trainer, self.hr_g_sum] _, _, hr_g_sum = sess.run(feed_out_g, feed_dict) summary_writer.add_summary(hr_g_sum, counter) # finetune d0 with the loss of d0 @@ -440,17 +353,13 @@ def train_one_step(self, generator_lr, summary_writer.add_summary(g_sum, counter) else: # train d1 - feed_out_d = [self.hr_discriminator_trainer, - self.hr_d_sum, - log_vars, - self.hist_sum] + feed_out_d = [self.hr_discriminator_trainer, self.hr_d_sum, log_vars, self.hist_sum] ret_list = sess.run(feed_out_d, feed_dict) summary_writer.add_summary(ret_list[1], counter) log_vals = ret_list[2] summary_writer.add_summary(ret_list[3], counter) # train g1 - feed_out_g = [self.hr_generator_trainer, - self.hr_g_sum] + feed_out_g = [self.hr_generator_trainer, self.hr_g_sum] _, hr_g_sum = sess.run(feed_out_g, feed_dict) summary_writer.add_summary(hr_g_sum, counter) @@ -458,15 +367,14 @@ def train_one_step(self, generator_lr, def train(self): config = tf.ConfigProto(allow_soft_placement=True) + config.gpu_options.per_process_gpu_memory_fraction = 0.7 with tf.Session(config=config) as sess: with tf.device("/gpu:%d" % cfg.GPU_ID): counter = self.build_model(sess) - saver = tf.train.Saver(tf.all_variables(), - keep_checkpoint_every_n_hours=5) + saver = tf.train.Saver(tf.global_variables(), keep_checkpoint_every_n_hours=5) # summary_op = tf.merge_all_summaries() - summary_writer = tf.train.SummaryWriter(self.log_dir, - sess.graph) + summary_writer = tf.summary.FileWriter(self.log_dir, sess.graph) if cfg.TRAIN.FINETUNE_LR: keys = ["hr_d_loss", "hr_g_loss", "d_loss", "g_loss"] @@ -487,10 +395,8 @@ def train(self): decay_start = cfg.TRAIN.PRETRAINED_EPOCH epoch_start = int(counter / updates_per_epoch) for epoch in range(epoch_start, self.max_epoch): - widgets = ["epoch #%d|" % epoch, - Percentage(), Bar(), ETA()] - pbar = ProgressBar(maxval=updates_per_epoch, - widgets=widgets) + widgets = ["epoch #%d|" % epoch, Percentage(), Bar(), ETA()] + pbar = ProgressBar(maxval=updates_per_epoch, widgets=widgets) pbar.start() if epoch % lr_decay_step == 0 and epoch > decay_start: @@ -500,23 +406,17 @@ def train(self): all_log_vals = [] for i in range(updates_per_epoch): pbar.update(i) - log_vals = self.train_one_step(generator_lr, - discriminator_lr, - counter, summary_writer, + log_vals = self.train_one_step(generator_lr, discriminator_lr, counter, summary_writer, log_vars, sess) all_log_vals.append(log_vals) # save checkpoint counter += 1 if counter % self.snapshot_interval == 0: - snapshot_path = "%s/%s_%s.ckpt" %\ - (self.checkpoint_dir, - self.exp_name, - str(counter)) + snapshot_path = "%s/%s_%s.ckpt" % (self.checkpoint_dir, self.exp_name, str(counter)) fn = saver.save(sess, snapshot_path) print("Model saved in file: %s" % fn) - img_summary, img_summary2 =\ - self.epoch_sum_images(sess, cfg.TRAIN.NUM_COPY) + img_summary, img_summary2 = self.epoch_sum_images(sess, cfg.TRAIN.NUM_COPY) summary_writer.add_summary(img_summary, counter) summary_writer.add_summary(img_summary2, counter) @@ -526,9 +426,7 @@ def train(self): dic_logs[k] = v # print(k, v) - log_line = "; ".join("%s: %s" % - (str(k), str(dic_logs[k])) - for k in dic_logs) + log_line = "; ".join("%s: %s" % (str(k), str(dic_logs[k])) for k in dic_logs) print("Epoch %d | " % (epoch) + log_line) sys.stdout.flush() if np.any(np.isnan(avg_log_vals)): @@ -559,15 +457,13 @@ def drawCaption(self, img, caption): return img_txt - def save_super_images(self, images, sample_batchs, hr_sample_batchs, - savenames, captions_batchs, - sentenceID, save_dir, subset): + def save_super_images(self, images, sample_batchs, hr_sample_batchs, savenames, captions_batchs, sentenceID, + save_dir, subset): # batch_size samples for each embedding # Up to 16 samples for each text embedding/sentence numSamples = len(sample_batchs) for j in range(len(savenames)): - s_tmp = '%s-1real-%dsamples/%s/%s' %\ - (save_dir, numSamples, subset, savenames[j]) + s_tmp = '%s-1real-%dsamples/%s/%s' % (save_dir, numSamples, subset, savenames[j]) folder = s_tmp[:s_tmp.rfind('/')] if not os.path.isdir(folder): print('Make a new folder: ', folder) @@ -585,7 +481,7 @@ def save_super_images(self, images, sample_batchs, hr_sample_batchs, lr_img = sample_batchs[i][j] hr_img = hr_sample_batchs[i][j] hr_img = (hr_img + 1.0) * 127.5 - re_sample = scipy.misc.imresize(lr_img, hr_img.shape[:2]) + re_sample = resize(lr_img, hr_img.shape[:2]) row1.append(re_sample) row2.append(hr_img) row1 = np.concatenate(row1, axis=1) @@ -600,36 +496,31 @@ def save_super_images(self, images, sample_batchs, hr_sample_batchs, lr_img = sample_batchs[i][j] hr_img = hr_sample_batchs[i][j] hr_img = (hr_img + 1.0) * 127.5 - re_sample = scipy.misc.imresize(lr_img, hr_img.shape[:2]) + re_sample = resize(lr_img, hr_img.shape[:2]) row1.append(re_sample) row2.append(hr_img) row1 = np.concatenate(row1, axis=1) row2 = np.concatenate(row2, axis=1) super_row = np.concatenate([row1, row2], axis=0) superimage2 = np.zeros_like(superimage) - superimage2[:super_row.shape[0], - :super_row.shape[1], - :super_row.shape[2]] = super_row + superimage2[:super_row.shape[0], :super_row.shape[1], :super_row.shape[2]] = super_row mid_padding = np.zeros((64, superimage.shape[1], 3)) - superimage = np.concatenate([superimage, mid_padding, - superimage2], axis=0) + superimage = np.concatenate([superimage, mid_padding, superimage2], axis=0) top_padding = np.zeros((128, superimage.shape[1], 3)) - superimage =\ - np.concatenate([top_padding, superimage], axis=0) + superimage = np.concatenate([top_padding, superimage], axis=0) captions = captions_batchs[j][sentenceID] fullpath = '%s_sentence%d.jpg' % (s_tmp, sentenceID) superimage = self.drawCaption(np.uint8(superimage), captions) - scipy.misc.imsave(fullpath, superimage) + imageio.imwrite(fullpath, superimage) def eval_one_dataset(self, sess, dataset, save_dir, subset='train'): count = 0 print('num_examples:', dataset._num_examples) while count < dataset._num_examples: start = count % dataset._num_examples - images, embeddings_batchs, savenames, captions_batchs =\ - dataset.next_batch_test(self.batch_size, start, 1) + images, embeddings_batchs, savenames, captions_batchs = dataset.next_batch_test(self.batch_size, start, 1) print('count = ', count, 'start = ', start) # the i-th sentence/caption @@ -640,26 +531,24 @@ def eval_one_dataset(self, sess, dataset, save_dir, subset='train'): # with randomness from noise z and conditioning augmentation. numSamples = np.minimum(16, cfg.TRAIN.NUM_COPY) for j in range(numSamples): - hr_samples, samples =\ - sess.run([self.hr_fake_images, self.fake_images], - {self.embeddings: embeddings_batchs[i]}) + hr_samples, samples = sess.run([self.hr_fake_images, self.fake_images], + {self.embeddings: embeddings_batchs[i]}) samples_batchs.append(samples) hr_samples_batchs.append(hr_samples) - self.save_super_images(images, samples_batchs, - hr_samples_batchs, - savenames, captions_batchs, - i, save_dir, subset) + self.save_super_images(images, samples_batchs, hr_samples_batchs, savenames, captions_batchs, i, + save_dir, subset) count += self.batch_size def evaluate(self): config = tf.ConfigProto(allow_soft_placement=True) + config.gpu_options.per_process_gpu_memory_fraction = 0.7 with tf.Session(config=config) as sess: with tf.device("/gpu:%d" % cfg.GPU_ID): if self.model_path.find('.ckpt') != -1: self.init_opt() print("Reading model parameters from %s" % self.model_path) - saver = tf.train.Saver(tf.all_variables()) + saver = tf.train.Saver(tf.global_variables()) saver.restore(sess, self.model_path) # self.eval_one_dataset(sess, self.dataset.train, # self.log_dir, subset='train') From 6c2364b8826f1fd9083722ba9ca2dd14150dcd84 Mon Sep 17 00:00:00 2001 From: Akhil V Date: Wed, 10 Jul 2019 00:37:02 -0400 Subject: [PATCH 02/13] Fixed stage1/model.py --- .idea/StackGAN.iml | 8 +++++ .idea/misc.xml | 4 +++ .idea/modules.xml | 8 +++++ .idea/vcs.xml | 6 ++++ .idea/workspace.xml | 79 +++++++++++++++++++++++++++++++++++++++++++++ stageI/trainer.py | 8 ++--- 6 files changed, 109 insertions(+), 4 deletions(-) create mode 100644 .idea/StackGAN.iml create mode 100644 .idea/misc.xml create mode 100644 .idea/modules.xml create mode 100644 .idea/vcs.xml create mode 100644 .idea/workspace.xml diff --git a/.idea/StackGAN.iml b/.idea/StackGAN.iml new file mode 100644 index 0000000..d0876a7 --- /dev/null +++ b/.idea/StackGAN.iml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..65531ca --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..12cf37d --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..94a25f7 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/.idea/workspace.xml b/.idea/workspace.xml new file mode 100644 index 0000000..c24ecf8 --- /dev/null +++ b/.idea/workspace.xml @@ -0,0 +1,79 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 1562733251523 + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/stageI/trainer.py b/stageI/trainer.py index f2750c1..9152657 100644 --- a/stageI/trainer.py +++ b/stageI/trainer.py @@ -14,7 +14,7 @@ sys.path.append('./misc') from config import cfg -from utils import mkdir_p, convert_to_uint8 +from utils import mkdir_p TINY = 1e-8 @@ -220,8 +220,8 @@ def epoch_sum_images(self, sess, n): gen_samples, img_summary = sess.run([self.superimages, self.image_summary], feed_dict) # save images generated for train and test captions - imageio.imwrite('%s/train.jpg' % (self.log_dir), convert_to_uint8(gen_samples[0])) - imageio.imwrite('%s/test.jpg' % (self.log_dir), convert_to_uint8(gen_samples[1])) + imageio.imwrite('%s/train.jpg' % (self.log_dir), gen_samples[0]) + imageio.imwrite('%s/test.jpg' % (self.log_dir), gen_samples[1]) # pfi_train = open(self.log_dir + "/train.txt", "w") pfi_test = open(self.log_dir + "/test.txt", "w") @@ -355,7 +355,7 @@ def save_super_images(self, images, sample_batchs, filenames, sentenceID, save_d superimage = np.concatenate(superimage, axis=1) fullpath = '%s_sentence%d.jpg' % (s_tmp, sentenceID) - imageio.imwrite(fullpath, convert_to_uint8(superimage)) + imageio.imwrite(fullpath, superimage) def eval_one_dataset(self, sess, dataset, save_dir, subset='train'): count = 0 From 6e5a2cf1c0ee87c0594159407364668645da8541 Mon Sep 17 00:00:00 2001 From: akhilvasvani <1943443+akhilvasvani@users.noreply.github.com> Date: Wed, 10 Jul 2019 00:39:15 -0400 Subject: [PATCH 03/13] don't know what happened --- .idea/StackGAN.iml | 8 -------- 1 file changed, 8 deletions(-) delete mode 100644 .idea/StackGAN.iml diff --git a/.idea/StackGAN.iml b/.idea/StackGAN.iml deleted file mode 100644 index d0876a7..0000000 --- a/.idea/StackGAN.iml +++ /dev/null @@ -1,8 +0,0 @@ - - - - - - - - \ No newline at end of file From 8fe1b451d554ffc92ce169c02b372c0f78d9fe46 Mon Sep 17 00:00:00 2001 From: akhilvasvani <1943443+akhilvasvani@users.noreply.github.com> Date: Wed, 10 Jul 2019 00:42:11 -0400 Subject: [PATCH 04/13] Delete this file --- .idea/vcs.xml | 6 ------ 1 file changed, 6 deletions(-) delete mode 100644 .idea/vcs.xml diff --git a/.idea/vcs.xml b/.idea/vcs.xml deleted file mode 100644 index 94a25f7..0000000 --- a/.idea/vcs.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - - - \ No newline at end of file From 788c50c6e6a6ad71a735ac600d4049d0619cfcc6 Mon Sep 17 00:00:00 2001 From: Akhil V Date: Wed, 10 Jul 2019 14:37:01 -0400 Subject: [PATCH 05/13] Updated code to avoid the generator outputting strange pictures --- stageI/cfg/birds.yml | 4 ++-- stageI/run_exp.py | 4 ---- stageI/trainer.py | 13 +++++++++---- stageII/cfg/birds.yml | 8 ++++---- stageII/run_exp.py | 4 ---- stageII/trainer.py | 10 ++++++---- 6 files changed, 21 insertions(+), 22 deletions(-) diff --git a/stageI/cfg/birds.yml b/stageI/cfg/birds.yml index af02386..7537341 100644 --- a/stageI/cfg/birds.yml +++ b/stageI/cfg/birds.yml @@ -6,8 +6,8 @@ GPU_ID: 0 Z_DIM: 100 TRAIN: - FLAG: True - PRETRAINED_MODEL: '' + FLAG: False # True + PRETRAINED_MODEL: './ckt_logs/birds/stageI_2019_07_10_09_33_08/model_82000.ckpt' # '' BATCH_SIZE: 64 NUM_COPY: 4 MAX_EPOCH: 600 diff --git a/stageI/run_exp.py b/stageI/run_exp.py index 4076f2f..7121cb8 100644 --- a/stageI/run_exp.py +++ b/stageI/run_exp.py @@ -62,10 +62,6 @@ def parse_args(): if cfg.TRAIN.FLAG: algo.train() else: - # var_list = checkpoint_utils.list_variables(cfg.TRAIN.PRETRAINED_MODEL) - # # var_list = checkpoint_utils.list_variables('models/birds_model_164000.ckpt') - # for v in var_list: - # print(v) ''' For every input text embedding/sentence in the training and test datasets, generate cfg.TRAIN.NUM_COPY images with randomness from noise z and conditioning augmentation.''' diff --git a/stageI/trainer.py b/stageI/trainer.py index 9152657..4bc3a61 100644 --- a/stageI/trainer.py +++ b/stageI/trainer.py @@ -146,12 +146,17 @@ def prepare_trainer(self, generator_loss, discriminator_loss): g_vars = [var for var in all_vars if var.name.startswith('g_')] d_vars = [var for var in all_vars if var.name.startswith('d_')] - generator_opt = tf.train.AdamOptimizer(self.generator_lr, beta1=0.5) - self.generator_trainer = generator_opt.minimize(generator_loss, var_list=g_vars) + update_ops_D = [var for var in tf.get_collection(tf.GraphKeys.UPDATE_OPS) if var.name.startswith('d_')] + update_ops_G = [var for var in tf.get_collection(tf.GraphKeys.UPDATE_OPS) if var.name.startswith('g_')] - discriminator_opt = tf.train.AdamOptimizer(self.discriminator_lr, beta1=0.5) + with tf.control_dependencies(update_ops_G): + generator_opt = tf.train.AdamOptimizer(self.generator_lr, beta1=0.5) + self.generator_trainer = generator_opt.minimize(generator_loss, var_list=g_vars) + + with tf.control_dependencies(update_ops_D): + discriminator_opt = tf.train.AdamOptimizer(self.discriminator_lr, beta1=0.5) + self.discriminator_trainer = discriminator_opt.minimize(discriminator_loss, var_list=d_vars) - self.discriminator_trainer = discriminator_opt.minimize(discriminator_loss, var_list=d_vars) self.log_vars.append(("g_learning_rate", self.generator_lr)) self.log_vars.append(("d_learning_rate", self.discriminator_lr)) diff --git a/stageII/cfg/birds.yml b/stageII/cfg/birds.yml index 5d2db8a..f8cddb5 100644 --- a/stageII/cfg/birds.yml +++ b/stageII/cfg/birds.yml @@ -7,11 +7,11 @@ Z_DIM: 100 TRAIN: FLAG: True - PRETRAINED_MODEL: './ckt_logs/birds/stageI_2019_07_09_19_42_16/model_12000.ckpt' #'' - PRETRAINED_EPOCH: 100 # 600 - BATCH_SIZE: 64 + PRETRAINED_MODEL: './ckt_logs/birds/stageI/model_82000.ckpt' + PRETRAINED_EPOCH: 600 + BATCH_SIZE: 32 # 64 NUM_COPY: 4 - MAX_EPOCH: 200 # 1200 + MAX_EPOCH: 1200 SNAPSHOT_INTERVAL: 2000 DISCRIMINATOR_LR: 0.0002 GENERATOR_LR: 0.0002 diff --git a/stageII/run_exp.py b/stageII/run_exp.py index 0a23b50..dae8100 100644 --- a/stageII/run_exp.py +++ b/stageII/run_exp.py @@ -61,10 +61,6 @@ def parse_args(): algo = CondGANTrainer(model=model, dataset=dataset, ckt_logs_dir=ckt_logs_dir) if cfg.TRAIN.FLAG: - var_list = checkpoint_utils.list_variables(cfg.TRAIN.PRETRAINED_MODEL) - # var_list = checkpoint_utils.list_variables('models/birds_model_164000.ckpt') - for v in var_list: - print(v) algo.train() else: ''' For every input text embedding/sentence in the diff --git a/stageII/trainer.py b/stageII/trainer.py index 8691796..b0f7321 100644 --- a/stageII/trainer.py +++ b/stageII/trainer.py @@ -182,8 +182,12 @@ def define_one_trainer(self, loss, learning_rate, key_word): all_vars = tf.trainable_variables() tarin_vars = [var for var in all_vars if var.name.startswith(key_word)] - opt = tf.train.AdamOptimizer(learning_rate, beta1=0.5) - trainer = opt.minimize(loss, var_list=tarin_vars) + update_ops_vars = [var for var in tf.get_collection(tf.GraphKeys.UPDATE_OPS) if var.name.startswith(key_word)] + + with tf.control_dependencies(update_ops_vars): + opt = tf.train.AdamOptimizer(learning_rate, beta1=0.5) + trainer = opt.minimize(loss, var_list=tarin_vars) + return trainer def prepare_trainer(self, discriminator_loss, generator_loss, hr_discriminator_loss, hr_generator_loss): @@ -367,7 +371,6 @@ def train_one_step(self, generator_lr, discriminator_lr, counter, summary_writer def train(self): config = tf.ConfigProto(allow_soft_placement=True) - config.gpu_options.per_process_gpu_memory_fraction = 0.7 with tf.Session(config=config) as sess: with tf.device("/gpu:%d" % cfg.GPU_ID): counter = self.build_model(sess) @@ -542,7 +545,6 @@ def eval_one_dataset(self, sess, dataset, save_dir, subset='train'): def evaluate(self): config = tf.ConfigProto(allow_soft_placement=True) - config.gpu_options.per_process_gpu_memory_fraction = 0.7 with tf.Session(config=config) as sess: with tf.device("/gpu:%d" % cfg.GPU_ID): if self.model_path.find('.ckpt') != -1: From f5446cdd524161f516f43ad276d05c7e2307dd47 Mon Sep 17 00:00:00 2001 From: Akhil V Date: Sun, 14 Jul 2019 13:58:54 -0400 Subject: [PATCH 06/13] Updated files --- demo/birds_demo.sh | 3 +- demo/birds_skip_thought_demo.py | 12 +- demo/cfg/birds-demo.yml | 2 +- demo/demo.py | 15 +- demo/flowers_demo.sh | 3 +- misc/config.py | 2 +- misc/custom_ops.py | 179 +------ misc/datasets.py | 1 - misc/tf_upgrade.py | 818 ++++++++------------------------ misc/utils.py | 3 + stageI/model.py | 3 +- stageI/run_exp.py | 8 +- stageI/trainer.py | 25 +- stageII/__init__.py | 1 + stageII/cfg/birds.yml | 2 +- stageII/model.py | 70 +-- stageII/run_exp.py | 10 +- stageII/trainer.py | 37 +- 18 files changed, 308 insertions(+), 886 deletions(-) mode change 100644 => 100755 demo/birds_demo.sh diff --git a/demo/birds_demo.sh b/demo/birds_demo.sh old mode 100644 new mode 100755 index 66ff9ab..162cec8 --- a/demo/birds_demo.sh +++ b/demo/birds_demo.sh @@ -1,3 +1,4 @@ +#!/usr/bin/env bash # # Extract text embeddings from the encoder # @@ -15,7 +16,7 @@ th demo/get_embedding.lua # # Generate image from text embeddings # -python demo/demo.py \ +python3 demo/demo.py \ --cfg demo/cfg/birds-demo.yml \ --gpu ${GPU} \ --caption_path ${CAPTION_PATH}.t7 diff --git a/demo/birds_skip_thought_demo.py b/demo/birds_skip_thought_demo.py index 3d3f475..c334702 100644 --- a/demo/birds_skip_thought_demo.py +++ b/demo/birds_skip_thought_demo.py @@ -1,22 +1,20 @@ from __future__ import division from __future__ import print_function -# import prettytensor as pt import tensorflow as tf import numpy as np import imageio -# import scipy.misc import os import argparse from PIL import Image, ImageDraw, ImageFont import sys -sys.path.append('./misc') +sys.path.append('misc') sys.path.append('stageII') -from config import cfg, cfg_from_file -from utils import mkdir_p -from model import CondGAN +from misc.config import cfg, cfg_from_file +from misc.utils import mkdir_p +from stageII.model import CondGAN from skimage.transform import resize @@ -52,7 +50,7 @@ def build_model(sess, embedding_dim, batch_size): model = CondGAN(lr_imsize=cfg.TEST.LR_IMSIZE, hr_lr_ratio=int(cfg.TEST.HR_IMSIZE/cfg.TEST.LR_IMSIZE)) embeddings = tf.placeholder(tf.float32, [batch_size, embedding_dim], name='conditional_embeddings') - # with pt.defaults_scope(phase=pt.Phase.test): + with tf.variable_scope("g_net"): c = sample_encoded_context(embeddings, model) z = tf.random_normal([batch_size, cfg.Z_DIM]) diff --git a/demo/cfg/birds-demo.yml b/demo/cfg/birds-demo.yml index 8526652..946c306 100644 --- a/demo/cfg/birds-demo.yml +++ b/demo/cfg/birds-demo.yml @@ -5,7 +5,7 @@ GPU_ID: 0 Z_DIM: 100 TEST: - PRETRAINED_MODEL: './models/birds_model_164000.ckpt' + PRETRAINED_MODEL: './models/model_330000.ckpt' BATCH_SIZE: 64 NUM_COPY: 8 diff --git a/demo/demo.py b/demo/demo.py index c893aac..b733fa8 100644 --- a/demo/demo.py +++ b/demo/demo.py @@ -1,11 +1,9 @@ from __future__ import division from __future__ import print_function -# import prettytensor as pt import tensorflow as tf import numpy as np import imageio -# import scipy.misc import os import argparse import torchfile @@ -13,11 +11,11 @@ import re import sys -sys.path.append('./misc') +sys.path.append('misc') sys.path.append('stageII') from config import cfg, cfg_from_file -from utils import mkdir_p +from utils import mkdir_p, caption_convert from model import CondGAN from skimage.transform import resize @@ -54,7 +52,7 @@ def build_model(sess, embedding_dim, batch_size): model = CondGAN(lr_imsize=cfg.TEST.LR_IMSIZE, hr_lr_ratio=int(cfg.TEST.HR_IMSIZE/cfg.TEST.LR_IMSIZE)) embeddings = tf.placeholder(tf.float32, [batch_size, embedding_dim], name='conditional_embeddings') - # with pt.defaults_scope(phase=pt.Phase.test): + with tf.variable_scope("g_net"): c = sample_encoded_context(embeddings, model) z = tf.random_normal([batch_size, cfg.Z_DIM]) @@ -67,14 +65,13 @@ def build_model(sess, embedding_dim, batch_size): if ckt_path.find('.ckpt') != -1: print("Reading model parameters from %s" % ckt_path) saver = tf.train.Saver(tf.global_variables()) - # saver = tf.train.Saver(tf.all_variables()) saver.restore(sess, ckt_path) else: print("Input a valid model path.") return embeddings, fake_images, hr_fake_images - def drawCaption(img, caption): + caption = caption_convert(caption) img_txt = Image.fromarray(img) # get a font fnt = ImageFont.truetype('Pillow/Tests/fonts/FreeMono.ttf', 50) @@ -108,7 +105,7 @@ def save_super_images(sample_batchs, hr_sample_batchs, captions_batch, batch_siz # Save up to 16 samples for each text embedding/sentence img_shape = hr_sample_batchs[0][0].shape for j in range(batch_size): - if not re.search('[a-zA-Z]+', captions_batch[j]): + if not re.search(b'[a-zA-Z]+', captions_batch[j]): continue padding = np.zeros(img_shape) @@ -207,4 +204,4 @@ def save_super_images(sample_batchs, hr_sample_batchs, captions_batch, batch_siz print('Finish generating samples for %d sentences:' % num_embeddings) print('Example sentences:') for i in range(np.minimum(10, num_embeddings)): - print('Sentence %d: %s' % (i, captions_list[i])) + print('Sentence %d: %s' % (i, caption_convert(captions_list[i]))) diff --git a/demo/flowers_demo.sh b/demo/flowers_demo.sh index 287699b..28d854b 100644 --- a/demo/flowers_demo.sh +++ b/demo/flowers_demo.sh @@ -1,3 +1,4 @@ +#!/usr/bin/env bash # # Extract text embeddings from the encoder # @@ -16,7 +17,7 @@ th demo/get_embedding.lua # # Generate image from text embeddings # -python demo/demo.py \ +python3 demo/demo.py \ --cfg demo/cfg/flowers-demo.yml \ --gpu ${GPU} \ --caption_path ${CAPTION_PATH}.t7 diff --git a/misc/config.py b/misc/config.py index 707fabe..ac01f7c 100644 --- a/misc/config.py +++ b/misc/config.py @@ -48,7 +48,7 @@ __C.TRAIN.COEFF = edict() __C.TRAIN.COEFF.KL = 2.0 -__C.TRAIN.FINETUNE_LR = False +__C.TRAIN.FINETUNE_LR = True # False __C.TRAIN.FT_LR_RETIO = 0.1 # Modal options diff --git a/misc/custom_ops.py b/misc/custom_ops.py index 6d3994a..458edd8 100644 --- a/misc/custom_ops.py +++ b/misc/custom_ops.py @@ -5,9 +5,6 @@ from __future__ import division from __future__ import print_function -# import prettytensor as pt -# from prettytensor.pretty_tensor_class import Phase -from tensorflow.python.training import moving_averages import tensorflow as tf import numpy as np @@ -18,9 +15,8 @@ def fc(inputs, num_out, name, activation_fn=None, reuse=None): inputs = tf.reshape(inputs, tf.stack([tf.shape(inputs)[0], np.prod(shape[1:])])) inputs.set_shape([None, np.prod(shape[1:])]) - w_init = tf.random_normal_initializer(stddev=0.02) # tf.keras.layers.Dense - # return tf.layers.dense(inputs=inputs, units=num_out, activation=activation_fn, kernel_initializer=w_init, - # use_bias=biased, name=name) + w_init = tf.random_normal_initializer(stddev=0.02) + return tf.contrib.layers.fully_connected(inputs, num_out, activation_fn=activation_fn, weights_initializer=w_init, reuse=reuse, scope=name) @@ -29,45 +25,21 @@ def concat(inputs, axis): return tf.concat(values=inputs, axis=axis) -def conv_batch_normalization(inputs, name, epsilon=1e-5, in_dim=None, is_training=True, activation_fn=None, reuse=None): +def conv_batch_normalization(inputs, name, epsilon=1e-5, is_training=True, activation_fn=None, reuse=None): return tf.contrib.layers.batch_norm(inputs, decay=0.9, center=True, scale=True, epsilon=epsilon, activation_fn=activation_fn, param_initializers={'beta': tf.constant_initializer(0.), 'gamma': tf.random_normal_initializer(1., 0.02)}, reuse=reuse, is_training=is_training, scope=name) - # ema = tf.train.ExponentialMovingAverage(decay=0.9) - # shape = inputs.get_shape() - # shp = in_dim or shape[-1] - # mean, variance = tf.nn.moments(inputs, [0, 1, 2]) - # mean.set_shape((shp,)) - # variance.set_shape((shp,)) - # ema_apply_op = ema.apply([mean, variance]) - # - # with tf.variable_scope(name, reuse=reuse): - # gamma = tf.get_variable(name='gamma', shape=[shp], initializer=tf.random_normal_initializer(1., 0.02)) - # beta = tf.get_variable(name='beta', shape=[shp], initializer=tf.constant_initializer(0.)) - # - # if is_training: - # with tf.control_dependencies([ema_apply_op]): - # normalized_x = tf.nn.batch_normalization(inputs, mean, variance, beta, gamma, epsilon) - # else: - # normalized_x = tf.nn.batch_normalization(inputs, ema.average(mean), ema.average(variance), - # beta, gamma, epsilon) - # - # if activation_fn is not None: - # normalized_x = activation_fn(normalized_x) - # - # return normalized_x - -def fc_batch_normalization(inputs, name, epsilon=1e-5, in_dim=None, is_training=True, activation_fn=None, reuse=None): +def fc_batch_normalization(inputs, name, epsilon=1e-5, is_training=True, activation_fn=None, reuse=None): ori_shape = inputs.get_shape() if ori_shape[0] is None: ori_shape = -1 new_shape = [ori_shape[0], 1, 1, ori_shape[1]] x = tf.reshape(inputs, new_shape) - normalized_x = conv_batch_normalization(x, name, epsilon=epsilon, in_dim=in_dim, is_training=is_training, + normalized_x = conv_batch_normalization(x, name, epsilon=epsilon, is_training=is_training, activation_fn=activation_fn, reuse=reuse) return tf.reshape(normalized_x, ori_shape) @@ -132,144 +104,3 @@ def flatten(inputs, name): inputs = tf.reshape(inputs, [-1, dim]) return inputs - - -# class conv_batch_norm(pt.VarStoreMethod): -# """Code modification of: -# http://stackoverflow.com/questions/33949786/how-could-i-use-batch-normalization-in-tensorflow -# and -# https://github.com/tensorflow/models/blob/master/inception/inception/slim/ops.py""" -# -# def __call__(self, input_layer, epsilon=1e-5, decay=0.9, name="batch_norm", in_dim=None, phase=Phase.train): -# shape = input_layer.shape -# shp = in_dim or shape[-1] -# -# self.gamma = self.variable("gamma", [shp], init=tf.random_normal_initializer(1., 0.02)) -# self.beta = self.variable("beta", [shp], init=tf.constant_initializer(0.)) -# -# self.mean, self.variance = tf.nn.moments(input_layer, [0, 1, 2]) -# # sigh...tf's shape system is so.. -# self.mean.set_shape((shp,)) -# self.variance.set_shape((shp,)) -# -# normalized_x = tf.nn.batch_normalization(input_layer, self.mean, self.variance, None, None, epsilon) -# return input_layer.with_tensor(normalized_x, parameters=self.vars) -# -# # def __call__(self, input_layer, epsilon=1e-5, momentum=0.1, name="batch_norm", -# # in_dim=None, phase=Phase.train): -# # self.ema = tf.train.ExponentialMovingAverage(decay=0.9) -# # -# # shape = input_layer.shape -# # shp = in_dim or shape[-1] -# # with tf.variable_scope (name) as scope: -# # self.gamma = self.variable ("gamma", [shp], init=tf.random_normal_initializer (1., 0.02)) -# # self.beta = self.variable ("beta", [shp], init=tf.constant_initializer (0.)) -# # -# # self.mean, self.variance = tf.nn.moments(input_layer.tensor, [0, 1, 2]) -# # # sigh...tf's shape system is so.. -# # self.mean.set_shape ((shp,)) -# # self.variance.set_shape ((shp,)) -# # self.ema_apply_op = self.ema.apply ([self.mean, self.variance]) -# # -# # if phase == Phase.train: -# # with tf.control_dependencies ([self.ema_apply_op]): -# # normalized_x = tf.nn.batch_norm_with_global_normalization ( -# # input_layer.tensor, self.mean, self.variance, self.beta, self.gamma, epsilon, -# # scale_after_normalization=True) -# # else: -# # normalized_x = tf.nn.batch_norm_with_global_normalization ( -# # x, self.ema.average (self.mean), self.ema.average (self.variance), self.beta, -# # self.gamma, epsilon, -# # scale_after_normalization=True) -# # -# # return input_layer.with_tensor (normalized_x, parameters=self.vars) -# -# # -# pt.Register(assign_defaults=('phase'))(conv_batch_norm) -# -# -# @pt.Register(assign_defaults=('phase')) -# class fc_batch_norm(conv_batch_norm): -# def __call__(self, input_layer, *args, **kwargs): -# ori_shape = input_layer.shape -# if ori_shape[0] is None: -# ori_shape[0] = -1 -# new_shape = [ori_shape[0], 1, 1, ori_shape[1]] -# x = tf.reshape(input_layer.tensor, new_shape) -# normalized_x = super(self.__class__, self).__call__(input_layer.with_tensor(x), *args, **kwargs) # input_layer) -# return normalized_x.reshape(ori_shape) -# -# -# def leaky_rectify(x, leakiness=0.01): -# assert leakiness <= 1 -# ret = tf.maximum(x, leakiness * x) -# # import ipdb; ipdb.set_trace() -# return ret -# -# -# @pt.Register -# class custom_conv2d(pt.VarStoreMethod): -# # class custom_conv2d(object): -# def __call__(self, input_layer, output_dim, k_h=5, k_w=5, d_h=2, d_w=2, stddev=0.02, in_dim=None, padding='SAME', -# name="conv2d"): -# with tf.variable_scope(name): -# w = self.variable('w', [k_h, k_w, in_dim or input_layer.shape[-1], output_dim], -# init=tf.truncated_normal_initializer(stddev=stddev)) -# # w = tf.get_variable('w', shape=[k_h, k_w, in_dim or input_layer.shape[-1], output_dim], dtype=tf.float32, -# # initializer=tf.truncated_normal_initializer(stddev=stddev)) -# conv = tf.nn.conv2d(input_layer.tensor, w, strides=[1, d_h, d_w, 1], padding=padding) -# -# # biases = self.variable('biases', [output_dim], init=tf.constant_initializer(0.0)) -# # import ipdb; ipdb.set_trace() -# # return input_layer.with_tensor(tf.nn.bias_add(conv, biases), parameters=self.vars) -# return input_layer.with_tensor(conv, parameters=self.vars) -# -# -# @pt.Register -# class custom_deconv2d(pt.VarStoreMethod): -# # class custom_deconv2d(object): -# def __call__(self, input_layer, output_shape, k_h=5, k_w=5, d_h=2, d_w=2, stddev=0.02, name="deconv2d"): -# output_shape[0] = input_layer.shape[0] -# ts_output_shape = tf.stack(output_shape) -# with tf.variable_scope(name): -# # filter : [height, width, output_channels, in_channels] -# w = self.variable('w', [k_h, k_w, output_shape[-1], input_layer.shape[-1]], -# init=tf.random_normal_initializer(stddev=stddev)) -# -# # w = tf.get_variable('w', shape=[k_h, k_w, output_shape[-1], input_layer.shape[-1]], dtype=tf.float32, -# # initializer=tf.random_normal_initializer(stddev=stddev)) -# -# try: -# deconv = tf.nn.conv2d_transpose(input_layer, w, output_shape=ts_output_shape, strides=[1, d_h, d_w, 1]) -# -# # Support for versions of TensorFlow before 0.7.0 -# except AttributeError: -# deconv = tf.nn.deconv2d(input_layer, w, output_shape=ts_output_shape, -# strides=[1, d_h, d_w, 1]) -# -# # biases = self.variable('biases', [output_shape[-1]], init=tf.constant_initializer(0.0)) -# # deconv = tf.reshape(tf.nn.bias_add(deconv, biases), [-1] + output_shape[1:]) -# deconv = tf.reshape(deconv, [-1] + output_shape[1:]) -# -# return deconv -# -# -# @pt.Register -# class custom_fully_connected(pt.VarStoreMethod): -# # class custom_fully_connected(object): -# def __call__(self, input_layer, output_size, scope=None, in_dim=None, stddev=0.02, bias_start=0.0): -# shape = input_layer.shape -# input_ = input_layer.tensor -# try: -# if len(shape) == 4: -# input_ = tf.reshape(input_, tf.stack([tf.shape(input_)[0], np.prod(shape[1:])])) -# input_.set_shape([None, np.prod(shape[1:])]) -# shape = input_.get_shape().as_list() -# -# with tf.variable_scope(scope or "Linear"): -# matrix = self.variable("Matrix", [in_dim or shape[1], output_size], dt=tf.float32, -# init=tf.random_normal_initializer(stddev=stddev)) -# bias = self.variable("bias", [output_size], init=tf.constant_initializer(bias_start)) -# return input_layer.with_tensor(tf.matmul(input_, matrix) + bias, parameters=self.vars) -# except Exception: -# import ipdb; ipdb.set_trace() diff --git a/misc/datasets.py b/misc/datasets.py index a72e865..cd7bcee 100644 --- a/misc/datasets.py +++ b/misc/datasets.py @@ -1,7 +1,6 @@ from __future__ import division from __future__ import print_function - import numpy as np import pickle import random diff --git a/misc/tf_upgrade.py b/misc/tf_upgrade.py index 11c8378..e0a8dcd 100644 --- a/misc/tf_upgrade.py +++ b/misc/tf_upgrade.py @@ -17,649 +17,239 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import argparse -import ast -import collections -import os -import shutil -import sys -import tempfile -import traceback - - -class APIChangeSpec(object): - """List of maps that describe what changed in the API.""" - - def __init__(self): - # Maps from a function name to a dictionary that describes how to - # map from an old argument keyword to the new argument keyword. - self.function_keyword_renames = { - "tf.count_nonzero": { - "reduction_indices": "axis" - }, - "tf.reduce_all": { - "reduction_indices": "axis" - }, - "tf.reduce_any": { - "reduction_indices": "axis" - }, - "tf.reduce_max": { - "reduction_indices": "axis" - }, - "tf.reduce_mean": { - "reduction_indices": "axis" - }, - "tf.reduce_min": { - "reduction_indices": "axis" - }, - "tf.reduce_prod": { - "reduction_indices": "axis" - }, - "tf.reduce_sum": { - "reduction_indices": "axis" - }, - "tf.reduce_logsumexp": { - "reduction_indices": "axis" - }, - "tf.expand_dims": { - "dim": "axis" - }, - "tf.argmax": { - "dimension": "axis" - }, - "tf.argmin": { - "dimension": "axis" - }, - "tf.reduce_join": { - "reduction_indices": "axis" - }, - "tf.sparse_concat": { - "concat_dim": "axis" - }, - "tf.sparse_split": { - "split_dim": "axis" - }, - "tf.sparse_reduce_sum": { - "reduction_axes": "axis" - }, - "tf.reverse_sequence": { - "seq_dim": "seq_axis", - "batch_dim": "batch_axis" - }, - "tf.sparse_reduce_sum_sparse": { - "reduction_axes": "axis" - }, - "tf.squeeze": { - "squeeze_dims": "axis" - }, - "tf.split": { - "split_dim": "axis", - "num_split": "num_or_size_splits" - }, - "tf.concat": { - "concat_dim": "axis" - }, - } - - # Mapping from function to the new name of the function - self.function_renames = { - "tf.inv": "tf.reciprocal", - "tf.contrib.deprecated.scalar_summary": "tf.summary.scalar", - "tf.contrib.deprecated.histogram_summary": "tf.summary.histogram", - "tf.listdiff": "tf.setdiff1d", - "tf.list_diff": "tf.setdiff1d", - "tf.mul": "tf.multiply", - "tf.neg": "tf.negative", - "tf.sub": "tf.subtract", - "tf.train.SummaryWriter": "tf.summary.FileWriter", - "tf.scalar_summary": "tf.summary.scalar", - "tf.histogram_summary": "tf.summary.histogram", - "tf.audio_summary": "tf.summary.audio", - "tf.image_summary": "tf.summary.image", - "tf.merge_summary": "tf.summary.merge", - "tf.merge_all_summaries": "tf.summary.merge_all", - "tf.image.per_image_whitening": "tf.image.per_image_standardization", - "tf.all_variables": "tf.global_variables", - "tf.VARIABLES": "tf.GLOBAL_VARIABLES", - "tf.initialize_all_variables": "tf.global_variables_initializer", - "tf.initialize_variables": "tf.variables_initializer", - "tf.initialize_local_variables": "tf.local_variables_initializer", - "tf.batch_matrix_diag": "tf.matrix_diag", - "tf.batch_band_part": "tf.band_part", - "tf.batch_set_diag": "tf.set_diag", - "tf.batch_matrix_transpose": "tf.matrix_transpose", - "tf.batch_matrix_determinant": "tf.matrix_determinant", - "tf.batch_matrix_inverse": "tf.matrix_inverse", - "tf.batch_cholesky": "tf.cholesky", - "tf.batch_cholesky_solve": "tf.cholesky_solve", - "tf.batch_matrix_solve": "tf.matrix_solve", - "tf.batch_matrix_triangular_solve": "tf.matrix_triangular_solve", - "tf.batch_matrix_solve_ls": "tf.matrix_solve_ls", - "tf.batch_self_adjoint_eig": "tf.self_adjoint_eig", - "tf.batch_self_adjoint_eigvals": "tf.self_adjoint_eigvals", - "tf.batch_svd": "tf.svd", - "tf.batch_fft": "tf.fft", - "tf.batch_ifft": "tf.ifft", - "tf.batch_ifft2d": "tf.ifft2d", - "tf.batch_fft3d": "tf.fft3d", - "tf.batch_ifft3d": "tf.ifft3d", - "tf.select": "tf.where", - "tf.complex_abs": "tf.abs", - "tf.batch_matmul": "tf.matmul", - "tf.pack": "tf.stack", - "tf.unpack": "tf.unstack", - } - - self.change_to_function = { - "tf.ones_initializer", - "tf.zeros_initializer", - } - - # Functions that were reordered should be changed to the new keyword args - # for safety, if positional arguments are used. If you have reversed the - # positional arguments yourself, this could do the wrong thing. - self.function_reorders = { - "tf.split": ["axis", "num_or_size_splits", "value", "name"], - "tf.sparse_split": ["axis", "num_or_size_splits", "value", "name"], - "tf.concat": ["concat_dim", "values", "name"], - "tf.svd": ["tensor", "compute_uv", "full_matrices", "name"], - "tf.nn.softmax_cross_entropy_with_logits": [ - "logits", "labels", "dim", "name"], - "tf.nn.sparse_softmax_cross_entropy_with_logits": [ - "logits", "labels", "name"], - "tf.nn.sigmoid_cross_entropy_with_logits": [ - "logits", "labels", "name"] - } - - # Specially handled functions. - self.function_handle = {"tf.reverse": self._reverse_handler} - - @staticmethod - def _reverse_handler(file_edit_recorder, node): - # TODO(aselle): Could check for a literal list of bools and try to convert - # them to indices. - comment = ("ERROR: tf.reverse has had its argument semantics changed\n" - "significantly the converter cannot detect this reliably, so you" - "need to inspect this usage manually.\n") - file_edit_recorder.add(comment, - node.lineno, - node.col_offset, - "tf.reverse", - "tf.reverse", - error="tf.reverse requires manual check.") - - -class FileEditTuple(collections.namedtuple( - "FileEditTuple", ["comment", "line", "start", "old", "new"])): - """Each edit that is recorded by a FileEditRecorder. - Fields: - comment: A description of the edit and why it was made. - line: The line number in the file where the edit occurs (1-indexed). - start: The line number in the file where the edit occurs (0-indexed). - old: text string to remove (this must match what was in file). - new: text string to add in place of `old`. - """ - - __slots__ = () - - -class FileEditRecorder(object): - """Record changes that need to be done to the file.""" - - def __init__(self, filename): - # all edits are lists of chars - self._filename = filename - - self._line_to_edit = collections.defaultdict(list) - self._errors = [] - - def process(self, text): - """Process a list of strings, each corresponding to the recorded changes. - Args: - text: A list of lines of text (assumed to contain newlines) - Returns: - A tuple of the modified text and a textual description of what is done. - Raises: - ValueError: if substitution source location does not have expected text. - """ - - change_report = "" - - # Iterate of each line - for line, edits in self._line_to_edit.items(): - offset = 0 - # sort by column so that edits are processed in order in order to make - # indexing adjustments cumulative for changes that change the string - # length - edits.sort(key=lambda x: x.start) - - # Extract each line to a list of characters, because mutable lists - # are editable, unlike immutable strings. - char_array = list(text[line - 1]) - - # Record a description of the change - change_report += "%r Line %d\n" % (self._filename, line) - change_report += "-" * 80 + "\n\n" - for e in edits: - change_report += "%s\n" % e.comment - change_report += "\n Old: %s" % (text[line - 1]) - - # Make underscore buffers for underlining where in the line the edit was - change_list = [" "] * len(text[line - 1]) - change_list_new = [" "] * len(text[line - 1]) - - # Iterate for each edit - for e in edits: - # Create effective start, end by accounting for change in length due - # to previous edits - start_eff = e.start + offset - end_eff = start_eff + len(e.old) - - # Make sure the edit is changing what it should be changing - old_actual = "".join(char_array[start_eff:end_eff]) - if old_actual != e.old: - raise ValueError("Expected text %r but got %r" % - ("".join(e.old), "".join(old_actual))) - # Make the edit - char_array[start_eff:end_eff] = list(e.new) - - # Create the underline highlighting of the before and after - change_list[e.start:e.start + len(e.old)] = "~" * len(e.old) - change_list_new[start_eff:end_eff] = "~" * len(e.new) - - # Keep track of how to generate effective ranges - offset += len(e.new) - len(e.old) - - # Finish the report comment - change_report += " %s\n" % "".join(change_list) - text[line - 1] = "".join(char_array) - change_report += " New: %s" % (text[line - 1]) - change_report += " %s\n\n" % "".join(change_list_new) - return "".join(text), change_report, self._errors - - def add(self, comment, line, start, old, new, error=None): - """Add a new change that is needed. - Args: - comment: A description of what was changed - line: Line number (1 indexed) - start: Column offset (0 indexed) - old: old text - new: new text - error: this "edit" is something that cannot be fixed automatically - Returns: - None - """ - - self._line_to_edit[line].append( - FileEditTuple(comment, line, start, old, new)) - if error: - self._errors.append("%s:%d: %s" % (self._filename, line, error)) - - -class TensorFlowCallVisitor(ast.NodeVisitor): - """AST Visitor that finds TensorFlow Function calls. - Updates function calls from old API version to new API version. - """ - - def __init__(self, filename, lines): - self._filename = filename - self._file_edit = FileEditRecorder(filename) - self._lines = lines - self._api_change_spec = APIChangeSpec() - - def process(self, lines): - return self._file_edit.process(lines) - - def generic_visit(self, node): - ast.NodeVisitor.generic_visit(self, node) - def _rename_functions(self, node, full_name): - function_renames = self._api_change_spec.function_renames - try: - new_name = function_renames[full_name] - self._file_edit.add("Renamed function %r to %r" % (full_name, - new_name), - node.lineno, node.col_offset, full_name, new_name) - except KeyError: - pass - - def _get_attribute_full_path(self, node): - """Traverse an attribute to generate a full name e.g. tf.foo.bar. - Args: - node: A Node of type Attribute. - Returns: - a '.'-delimited full-name or None if the tree was not a simple form. - i.e. `foo()+b).bar` returns None, while `a.b.c` would return "a.b.c". - """ - curr = node - items = [] - while not isinstance(curr, ast.Name): - if not isinstance(curr, ast.Attribute): - return None - items.append(curr.attr) - curr = curr.value - items.append(curr.id) - return ".".join(reversed(items)) - - def _find_true_position(self, node): - """Return correct line number and column offset for a given node. - This is necessary mainly because ListComp's location reporting reports - the next token after the list comprehension list opening. - Args: - node: Node for which we wish to know the lineno and col_offset - """ - import re - find_open = re.compile("^\s*(\\[).*$") - find_string_chars = re.compile("['\"]") - - if isinstance(node, ast.ListComp): - # Strangely, ast.ListComp returns the col_offset of the first token - # after the '[' token which appears to be a bug. Workaround by - # explicitly finding the real start of the list comprehension. - line = node.lineno - col = node.col_offset - # loop over lines - while 1: - # Reverse the text to and regular expression search for whitespace - text = self._lines[line-1] - reversed_preceding_text = text[:col][::-1] - # First find if a [ can be found with only whitespace between it and - # col. - m = find_open.match(reversed_preceding_text) - if m: - new_col_offset = col - m.start(1) - 1 - return line, new_col_offset - else: - if (reversed_preceding_text=="" or - reversed_preceding_text.isspace()): - line = line - 1 - prev_line = self._lines[line - 1] - # TODO(aselle): - # this is poor comment detection, but it is good enough for - # cases where the comment does not contain string literal starting/ - # ending characters. If ast gave us start and end locations of the - # ast nodes rather than just start, we could use string literal - # node ranges to filter out spurious #'s that appear in string - # literals. - comment_start = prev_line.find("#") - if comment_start == -1: - col = len(prev_line) -1 - elif find_string_chars.search(prev_line[comment_start:]) is None: - col = comment_start - else: - return None, None - else: - return None, None - # Most other nodes return proper locations (with notably does not), but - # it is not possible to use that in an argument. - return node.lineno, node.col_offset - - - def visit_Call(self, node): # pylint: disable=invalid-name - """Handle visiting a call node in the AST. - Args: - node: Current Node - """ - - - # Find a simple attribute name path e.g. "tf.foo.bar" - full_name = self._get_attribute_full_path(node.func) - - # Make sure the func is marked as being part of a call - node.func.is_function_for_call = True - - if full_name and full_name.startswith("tf."): - # Call special handlers - function_handles = self._api_change_spec.function_handle - if full_name in function_handles: - function_handles[full_name](self._file_edit, node) - - # Examine any non-keyword argument and make it into a keyword argument - # if reordering required. - function_reorders = self._api_change_spec.function_reorders - function_keyword_renames = ( - self._api_change_spec.function_keyword_renames) - - if full_name in function_reorders: - reordered = function_reorders[full_name] - for idx, arg in enumerate(node.args): - lineno, col_offset = self._find_true_position(arg) - if lineno is None or col_offset is None: - self._file_edit.add( - "Failed to add keyword %r to reordered function %r" - % (reordered[idx], full_name), arg.lineno, arg.col_offset, - "", "", - error="A necessary keyword argument failed to be inserted.") - else: - keyword_arg = reordered[idx] - if (full_name in function_keyword_renames and - keyword_arg in function_keyword_renames[full_name]): - keyword_arg = function_keyword_renames[full_name][keyword_arg] - self._file_edit.add("Added keyword %r to reordered function %r" - % (reordered[idx], full_name), lineno, - col_offset, "", keyword_arg + "=") - - # Examine each keyword argument and convert it to the final renamed form - renamed_keywords = ({} if full_name not in function_keyword_renames else - function_keyword_renames[full_name]) - for keyword in node.keywords: - argkey = keyword.arg - argval = keyword.value - - if argkey in renamed_keywords: - argval_lineno, argval_col_offset = self._find_true_position(argval) - if (argval_lineno is not None and argval_col_offset is not None): - # TODO(aselle): We should scan backward to find the start of the - # keyword key. Unfortunately ast does not give you the location of - # keyword keys, so we are forced to infer it from the keyword arg - # value. - key_start = argval_col_offset - len(argkey) - 1 - key_end = key_start + len(argkey) + 1 - if self._lines[argval_lineno - 1][key_start:key_end] == argkey + "=": - self._file_edit.add("Renamed keyword argument from %r to %r" % - (argkey, renamed_keywords[argkey]), - argval_lineno, - argval_col_offset - len(argkey) - 1, - argkey + "=", renamed_keywords[argkey] + "=") - continue - self._file_edit.add( - "Failed to rename keyword argument from %r to %r" % - (argkey, renamed_keywords[argkey]), - argval.lineno, - argval.col_offset - len(argkey) - 1, - "", "", - error="Failed to find keyword lexographically. Fix manually.") - - ast.NodeVisitor.generic_visit(self, node) - - def visit_Attribute(self, node): # pylint: disable=invalid-name - """Handle bare Attributes i.e. [tf.foo, tf.bar]. - Args: - node: Node that is of type ast.Attribute - """ - full_name = self._get_attribute_full_path(node) - if full_name and full_name.startswith("tf."): - self._rename_functions(node, full_name) - if full_name in self._api_change_spec.change_to_function: - if not hasattr(node, "is_function_for_call"): - new_text = full_name + "()" - self._file_edit.add("Changed %r to %r"%(full_name, new_text), - node.lineno, node.col_offset, full_name, new_text) - - ast.NodeVisitor.generic_visit(self, node) - - -class TensorFlowCodeUpgrader(object): - """Class that handles upgrading a set of Python files to TensorFlow 1.0.""" - - def __init__(self): - pass - - def process_file(self, in_filename, out_filename): - """Process the given python file for incompatible changes. - Args: - in_filename: filename to parse - out_filename: output file to write to - Returns: - A tuple representing number of files processed, log of actions, errors - """ - - # Write to a temporary file, just in case we are doing an implace modify. - with open(in_filename, "r") as in_file, \ - tempfile.NamedTemporaryFile("w", delete=False) as temp_file: - ret = self.process_opened_file( - in_filename, in_file, out_filename, temp_file) - - shutil.move(temp_file.name, out_filename) - return ret - - # Broad exceptions are required here because ast throws whatever it wants. - # pylint: disable=broad-except - def process_opened_file(self, in_filename, in_file, out_filename, out_file): - """Process the given python file for incompatible changes. - This function is split out to facilitate StringIO testing from - tf_upgrade_test.py. - Args: - in_filename: filename to parse - in_file: opened file (or StringIO) - out_filename: output file to write to - out_file: opened file (or StringIO) - Returns: - A tuple representing number of files processed, log of actions, errors - """ - process_errors = [] - text = "-" * 80 + "\n" - text += "Processing file %r\n outputting to %r\n" % (in_filename, - out_filename) - text += "-" * 80 + "\n\n" - - parsed_ast = None - lines = in_file.readlines() - try: - parsed_ast = ast.parse("".join(lines)) - except Exception: - text += "Failed to parse %r\n\n" % in_filename - text += traceback.format_exc() - if parsed_ast: - visitor = TensorFlowCallVisitor(in_filename, lines) - visitor.visit(parsed_ast) - out_text, new_text, process_errors = visitor.process(lines) - text += new_text - if out_file: - out_file.write(out_text) - text += "\n" - return 1, text, process_errors - # pylint: enable=broad-except - - def process_tree(self, root_directory, output_root_directory): - """Processes upgrades on an entire tree of python files in place. - Note that only Python files. If you have custom code in other languages, - you will need to manually upgrade those. - Args: - root_directory: Directory to walk and process. - output_root_directory: Directory to use as base - Returns: - A tuple of files processed, the report string ofr all files, and errors - """ - - # make sure output directory doesn't exist - if output_root_directory and os.path.exists(output_root_directory): - print("Output directory %r must not already exist." % ( - output_root_directory)) - sys.exit(1) - - # make sure output directory does not overlap with root_directory - norm_root = os.path.split(os.path.normpath(root_directory)) - norm_output = os.path.split(os.path.normpath(output_root_directory)) - if norm_root == norm_output: - print("Output directory %r same as input directory %r" % ( - root_directory, output_root_directory)) - sys.exit(1) - - # Collect list of files to process (we do this to correctly handle if the - # user puts the output directory in some sub directory of the input dir) - files_to_process = [] - for dir_name, _, file_list in os.walk(root_directory): - py_files = [f for f in file_list if f.endswith(".py")] - for filename in py_files: - fullpath = os.path.join(dir_name, filename) - fullpath_output = os.path.join( - output_root_directory, os.path.relpath(fullpath, root_directory)) - files_to_process.append((fullpath, fullpath_output)) - - file_count = 0 - tree_errors = [] - report = "" - report += ("=" * 80) + "\n" - report += "Input tree: %r\n" % root_directory - report += ("=" * 80) + "\n" +import argparse - for input_path, output_path in files_to_process: - output_directory = os.path.dirname(output_path) - if not os.path.isdir(output_directory): - os.makedirs(output_directory) - file_count += 1 - _, l_report, l_errors = self.process_file(input_path, output_path) - tree_errors += l_errors - report += l_report - return file_count, report, tree_errors +from tensorflow.tools.compatibility import ast_edits + + +class TFAPIChangeSpec(ast_edits.APIChangeSpec): + """List of maps that describe what changed in the API.""" + + def __init__(self): + # Maps from a function name to a dictionary that describes how to + # map from an old argument keyword to the new argument keyword. + self.function_keyword_renames = { + "tf.batch_matmul": { + "adj_x": "adjoint_a", + "adj_y": "adjoint_b", + }, + "tf.count_nonzero": { + "reduction_indices": "axis" + }, + "tf.reduce_all": { + "reduction_indices": "axis" + }, + "tf.reduce_any": { + "reduction_indices": "axis" + }, + "tf.reduce_max": { + "reduction_indices": "axis" + }, + "tf.reduce_mean": { + "reduction_indices": "axis" + }, + "tf.reduce_min": { + "reduction_indices": "axis" + }, + "tf.reduce_prod": { + "reduction_indices": "axis" + }, + "tf.reduce_sum": { + "reduction_indices": "axis" + }, + "tf.reduce_logsumexp": { + "reduction_indices": "axis" + }, + "tf.expand_dims": { + "dim": "axis" + }, + "tf.argmax": { + "dimension": "axis" + }, + "tf.argmin": { + "dimension": "axis" + }, + "tf.reduce_join": { + "reduction_indices": "axis" + }, + "tf.sparse_concat": { + "concat_dim": "axis" + }, + "tf.sparse_split": { + "split_dim": "axis" + }, + "tf.sparse_reduce_sum": { + "reduction_axes": "axis" + }, + "tf.reverse_sequence": { + "seq_dim": "seq_axis", + "batch_dim": "batch_axis" + }, + "tf.sparse_reduce_sum_sparse": { + "reduction_axes": "axis" + }, + "tf.squeeze": { + "squeeze_dims": "axis" + }, + "tf.split": { + "split_dim": "axis", + "num_split": "num_or_size_splits" + }, + "tf.concat": { + "concat_dim": "axis" + }, + } + + # Mapping from function to the new name of the function + self.symbol_renames = { + "tf.inv": "tf.reciprocal", + "tf.contrib.deprecated.scalar_summary": "tf.summary.scalar", + "tf.contrib.deprecated.histogram_summary": "tf.summary.histogram", + "tf.listdiff": "tf.setdiff1d", + "tf.list_diff": "tf.setdiff1d", + "tf.mul": "tf.multiply", + "tf.neg": "tf.negative", + "tf.sub": "tf.subtract", + "tf.train.SummaryWriter": "tf.summary.FileWriter", + "tf.scalar_summary": "tf.summary.scalar", + "tf.histogram_summary": "tf.summary.histogram", + "tf.audio_summary": "tf.summary.audio", + "tf.image_summary": "tf.summary.image", + "tf.merge_summary": "tf.summary.merge", + "tf.merge_all_summaries": "tf.summary.merge_all", + "tf.image.per_image_whitening": "tf.image.per_image_standardization", + "tf.all_variables": "tf.global_variables", + "tf.VARIABLES": "tf.GLOBAL_VARIABLES", + "tf.initialize_all_variables": "tf.global_variables_initializer", + "tf.initialize_variables": "tf.variables_initializer", + "tf.initialize_local_variables": "tf.local_variables_initializer", + "tf.batch_matrix_diag": "tf.matrix_diag", + "tf.batch_band_part": "tf.band_part", + "tf.batch_set_diag": "tf.set_diag", + "tf.batch_matrix_transpose": "tf.matrix_transpose", + "tf.batch_matrix_determinant": "tf.matrix_determinant", + "tf.batch_matrix_inverse": "tf.matrix_inverse", + "tf.batch_cholesky": "tf.cholesky", + "tf.batch_cholesky_solve": "tf.cholesky_solve", + "tf.batch_matrix_solve": "tf.matrix_solve", + "tf.batch_matrix_triangular_solve": "tf.matrix_triangular_solve", + "tf.batch_matrix_solve_ls": "tf.matrix_solve_ls", + "tf.batch_self_adjoint_eig": "tf.self_adjoint_eig", + "tf.batch_self_adjoint_eigvals": "tf.self_adjoint_eigvals", + "tf.batch_svd": "tf.svd", + "tf.batch_fft": "tf.fft", + "tf.batch_ifft": "tf.ifft", + "tf.batch_fft2d": "tf.fft2d", + "tf.batch_ifft2d": "tf.ifft2d", + "tf.batch_fft3d": "tf.fft3d", + "tf.batch_ifft3d": "tf.ifft3d", + "tf.select": "tf.where", + "tf.complex_abs": "tf.abs", + "tf.batch_matmul": "tf.matmul", + "tf.pack": "tf.stack", + "tf.unpack": "tf.unstack", + "tf.op_scope": "tf.name_scope", + } + + self.change_to_function = { + "tf.ones_initializer", + "tf.zeros_initializer", + } + + # Functions that were reordered should be changed to the new keyword args + # for safety, if positional arguments are used. If you have reversed the + # positional arguments yourself, this could do the wrong thing. + self.function_reorders = { + "tf.split": ["axis", "num_or_size_splits", "value", "name"], + "tf.sparse_split": ["axis", "num_or_size_splits", "value", "name"], + "tf.concat": ["concat_dim", "values", "name"], + "tf.svd": ["tensor", "compute_uv", "full_matrices", "name"], + "tf.nn.softmax_cross_entropy_with_logits": [ + "logits", "labels", "dim", "name" + ], + "tf.nn.sparse_softmax_cross_entropy_with_logits": [ + "logits", "labels", "name" + ], + "tf.nn.sigmoid_cross_entropy_with_logits": ["logits", "labels", "name"], + "tf.op_scope": ["values", "name", "default_name"], + } + + # Warnings that should be printed if corresponding functions are used. + self.function_warnings = { + "tf.reverse": ( + ast_edits.ERROR, + "tf.reverse has had its argument semantics changed " + "significantly. The converter cannot detect this reliably, so " + "you need to inspect this usage manually.\n"), + } + + self.module_deprecations = {} if __name__ == "__main__": - parser = argparse.ArgumentParser( + parser = argparse.ArgumentParser( formatter_class=argparse.RawDescriptionHelpFormatter, description="""Convert a TensorFlow Python file to 1.0 + Simple usage: tf_convert.py --infile foo.py --outfile bar.py tf_convert.py --intree ~/code/old --outtree ~/code/new """) - parser.add_argument( + + parser.add_argument( "--infile", dest="input_file", help="If converting a single file, the name of the file " "to convert") - parser.add_argument( + parser.add_argument( "--outfile", dest="output_file", help="If converting a single file, the output filename.") - parser.add_argument( + parser.add_argument( "--intree", dest="input_tree", help="If converting a whole tree of files, the directory " "to read from (relative or absolute).") - parser.add_argument( + parser.add_argument( "--outtree", dest="output_tree", help="If converting a whole tree of files, the output " "directory (relative or absolute).") - parser.add_argument( + parser.add_argument( + "--copyotherfiles", + dest="copy_other_files", + help=("If converting a whole tree of files, whether to " + "copy the other files."), + type=bool, + default=False) + parser.add_argument( "--reportfile", dest="report_filename", help=("The name of the file where the report log is " "stored." "(default: %(default)s)"), default="report.txt") - args = parser.parse_args() - - upgrade = TensorFlowCodeUpgrader() - report_text = None - report_filename = args.report_filename - files_processed = 0 - if args.input_file: - files_processed, report_text, errors = upgrade.process_file( - args.input_file, args.output_file) - files_processed = 1 - elif args.input_tree: - files_processed, report_text, errors = upgrade.process_tree( - args.input_tree, args.output_tree) - else: - parser.print_help() - if report_text: - open(report_filename, "w").write(report_text) - print("TensorFlow 1.0 Upgrade Script") - print("-----------------------------") - print("Converted %d files\n" % files_processed) - print("Detected %d errors that require attention" % len(errors)) - print("-" * 80) - print("\n".join(errors)) -print("\nMake sure to read the detailed log %r\n" % report_filename) + args = parser.parse_args() + + upgrade = ast_edits.ASTCodeUpgrader(TFAPIChangeSpec()) + report_text = None + report_filename = args.report_filename + files_processed = 0 + if args.input_file: + files_processed, report_text, errors = upgrade.process_file(args.input_file, args.output_file) + files_processed = 1 + elif args.input_tree: + files_processed, report_text, errors = upgrade.process_tree(args.input_tree, args.output_tree, + args.copy_other_files) + else: + parser.print_help() + if report_text: + open(report_filename, "w").write(report_text) + print("TensorFlow 1.0 Upgrade Script") + print("-----------------------------") + print("Converted %d files\n" % files_processed) + print("Detected %d errors that require attention" % len(errors)) + print("-" * 80) + print("\n".join(errors)) + print("\nMake sure to read the detailed log %r\n" % report_filename) diff --git a/misc/utils.py b/misc/utils.py index ef437a5..06b0f6e 100644 --- a/misc/utils.py +++ b/misc/utils.py @@ -76,6 +76,9 @@ def convert_to_uint8(img): img = img.astype(np.uint8) return img +def caption_convert(caption): + return caption.decode("utf-8") + def mkdir_p(path): try: os.makedirs(path) diff --git a/stageI/model.py b/stageI/model.py index 6fdcafc..20e2652 100644 --- a/stageI/model.py +++ b/stageI/model.py @@ -1,11 +1,10 @@ from __future__ import division from __future__ import print_function -# import prettytensor as pt import tensorflow as tf import sys -sys.path.append('./misc') +sys.path.append('misc') from custom_ops import fc, conv_batch_normalization, fc_batch_normalization, reshape, Conv2d, Deconv2d, UpSample, add from config import cfg diff --git a/stageI/run_exp.py b/stageI/run_exp.py index 7121cb8..53d0b8e 100644 --- a/stageI/run_exp.py +++ b/stageI/run_exp.py @@ -1,22 +1,20 @@ from __future__ import division from __future__ import print_function -import dateutil import dateutil.tz import datetime import argparse import pprint import sys -sys.path.append('./misc') +sys.path.append('misc') sys.path.append('stageI') from datasets import TextDataset -from model import CondGAN -from trainer import CondGANTrainer from utils import mkdir_p from config import cfg, cfg_from_file -from tensorflow.contrib.framework.python.framework import checkpoint_utils +from model import CondGAN +from trainer import CondGANTrainer def parse_args(): diff --git a/stageI/trainer.py b/stageI/trainer.py index 4bc3a61..981bc2e 100644 --- a/stageI/trainer.py +++ b/stageI/trainer.py @@ -1,17 +1,15 @@ from __future__ import division from __future__ import print_function +from six.moves import range +from progressbar import ETA, Bar, Percentage, ProgressBar import tensorflow as tf import numpy as np import os -from six.moves import range -from progressbar import ETA, Bar, Percentage, ProgressBar - -# > Python3 import imageio import sys -sys.path.append('./misc') +sys.path.append('misc') from config import cfg from utils import mkdir_p @@ -78,16 +76,16 @@ def sample_encoded_context(self, embeddings): def init_opt(self): self.build_placeholder() - with tf.variable_scope("g_net"): + with tf.variable_scope("g_net"): # For training # ####get output from G network################################ c, kl_loss = self.sample_encoded_context(self.embeddings) z = tf.random_normal([self.batch_size, cfg.Z_DIM]) self.log_vars.append(("hist_c", c)) self.log_vars.append(("hist_z", z)) - fake_images = self.model.get_generator(tf.concat([c, z], 1), True) # training = True + fake_images = self.model.get_generator(tf.concat([c, z], 1), True) # set training to be True - with tf.variable_scope("d_net"): #, reuse=tf.AUTO_REUSE): # For training + with tf.variable_scope("d_net"): # For training # ####get discriminator_loss and generator_loss ################### discriminator_loss, generator_loss = self.compute_losses(self.images, self.wrong_images, fake_images, self.embeddings) @@ -101,7 +99,7 @@ def init_opt(self): # #######define self.g_sum, self.d_sum,....######################## self.define_summaries() - with tf.variable_scope("g_net", reuse=True): + with tf.variable_scope("g_net", reuse=True): # For testing self.sampler() self.visualization(cfg.TRAIN.NUM_COPY) print("success") @@ -113,10 +111,11 @@ def sampler(self): z = tf.zeros([self.batch_size, cfg.Z_DIM]) # Expect similar BGs else: z = tf.random_normal([self.batch_size, cfg.Z_DIM]) - self.fake_images = self.model.get_generator(tf.concat([c, z], 1), False) + self.fake_images = self.model.get_generator(tf.concat([c, z], 1), False) # for testing def compute_losses(self, images, wrong_images, fake_images, embeddings): real_logit = self.model.get_discriminator(images, embeddings, True) + # Reuse the weights wrong_logit = self.model.get_discriminator(wrong_images, embeddings, True, no_reuse=tf.AUTO_REUSE) fake_logit = self.model.get_discriminator(fake_images, embeddings, True, no_reuse=tf.AUTO_REUSE) @@ -146,14 +145,15 @@ def prepare_trainer(self, generator_loss, discriminator_loss): g_vars = [var for var in all_vars if var.name.startswith('g_')] d_vars = [var for var in all_vars if var.name.startswith('d_')] + # Update the trainable variables update_ops_D = [var for var in tf.get_collection(tf.GraphKeys.UPDATE_OPS) if var.name.startswith('d_')] update_ops_G = [var for var in tf.get_collection(tf.GraphKeys.UPDATE_OPS) if var.name.startswith('g_')] - with tf.control_dependencies(update_ops_G): + with tf.control_dependencies(update_ops_G): # Update the moving mean and variance from the batch normalization generator_opt = tf.train.AdamOptimizer(self.generator_lr, beta1=0.5) self.generator_trainer = generator_opt.minimize(generator_loss, var_list=g_vars) - with tf.control_dependencies(update_ops_D): + with tf.control_dependencies(update_ops_D): # Update the moving mean and variance from the batch normalization discriminator_opt = tf.train.AdamOptimizer(self.discriminator_lr, beta1=0.5) self.discriminator_trainer = discriminator_opt.minimize(discriminator_loss, var_list=d_vars) @@ -248,7 +248,6 @@ def build_model(self, sess): if len(self.model_path) > 0: print("Reading model parameters from %s" % self.model_path) restore_vars = tf.global_variables() - # restore_vars = tf.all_variables() # all_vars = tf.all_variables() # restore_vars = [var for var in all_vars if # var.name.startswith('g_') or diff --git a/stageII/__init__.py b/stageII/__init__.py index f78a8b1..008827b 100644 --- a/stageII/__init__.py +++ b/stageII/__init__.py @@ -1,2 +1,3 @@ from __future__ import division from __future__ import print_function + diff --git a/stageII/cfg/birds.yml b/stageII/cfg/birds.yml index f8cddb5..4e3ce8d 100644 --- a/stageII/cfg/birds.yml +++ b/stageII/cfg/birds.yml @@ -9,7 +9,7 @@ TRAIN: FLAG: True PRETRAINED_MODEL: './ckt_logs/birds/stageI/model_82000.ckpt' PRETRAINED_EPOCH: 600 - BATCH_SIZE: 32 # 64 + BATCH_SIZE: 64 NUM_COPY: 4 MAX_EPOCH: 1200 SNAPSHOT_INTERVAL: 2000 diff --git a/stageII/model.py b/stageII/model.py index 18f2bb0..a4bddfa 100644 --- a/stageII/model.py +++ b/stageII/model.py @@ -3,15 +3,11 @@ import tensorflow as tf import sys +sys.path.append('misc') -sys.path.append('./misc') - -from custom_ops import fc, conv_batch_normalization, fc_batch_normalization, reshape, Conv2d, Deconv2d, UpSample, add +from custom_ops import fc, conv_batch_normalization, fc_batch_normalization, reshape, Conv2d, UpSample, add from config import cfg -# TODO: Does template.constrct() really shared the computation -# when multipel times of construct are done - class CondGAN(object): def __init__(self, lr_imsize, hr_lr_ratio): @@ -107,26 +103,28 @@ def residual_block(self, x_c_code, name, training=True): return output_tensor - def hr_g_encode_image(self, x_var, training=True): - # x_var: -->s * s * 3 - output_tensor = Conv2d(x_var, 3, 3, self.gf_dim, 1, 1, activation_fn=tf.nn.relu, name='hr_g_OT/conv2d') # s * s * gf_dim - output_tensor = Conv2d(output_tensor, 4, 4, self.gf_dim * 2, 2, 2, name='hr_g_OT/conv2d2') # s2 * s2 * gf_dim * 2 + def hr_g_encode_image(self, x_var, training=True): # input: x_var --> s * s * 3 + # s * s * gf_dim + output_tensor = Conv2d(x_var, 3, 3, self.gf_dim, 1, 1, activation_fn=tf.nn.relu, name='hr_g_OT/conv2d') + + # s2 * s2 * gf_dim * 2 + output_tensor = Conv2d(output_tensor, 4, 4, self.gf_dim * 2, 2, 2, name='hr_g_OT/conv2d2') output_tensor = conv_batch_normalization(output_tensor, 'hr_g_OT/batch_norm', is_training=training, activation_fn=tf.nn.relu) - output_tensor = Conv2d(output_tensor, 4, 4, self.gf_dim * 4, 2, 2, name='hr_g_OT/conv2d3') # s4 * s4 * gf_dim * 4 + # s4 * s4 * gf_dim * 4 + output_tensor = Conv2d(output_tensor, 4, 4, self.gf_dim * 4, 2, 2, name='hr_g_OT/conv2d3') output_tensor = conv_batch_normalization(output_tensor, 'hr_g_OT/batch_norm2', is_training=training, activation_fn=tf.nn.relu) return output_tensor - def hr_g_joint_img_text(self, x_c_code, training=True): - # x_code: -->s4 * s4 * (ef_dim+gf_dim*4) - output_tensor = Conv2d(x_c_code, 3, 3, self.gf_dim * 4, 1, 1, name='hr_g_joint_OT/conv2d') # s4 * s4 * gf_dim * 4 + def hr_g_joint_img_text(self, x_c_code, training=True): # input: x_code: -->s4 * s4 * (ef_dim+gf_dim*4) + # s4 * s4 * gf_dim * 4 + output_tensor = Conv2d(x_c_code, 3, 3, self.gf_dim * 4, 1, 1, name='hr_g_joint_OT/conv2d') output_tensor = conv_batch_normalization(output_tensor, 'hr_g_joint_OT/batch_norm', is_training=training, activation_fn=tf.nn.relu) return output_tensor - def hr_generator(self, x_c_code, training=True): - # Inputs: -->s4 * s4 * gf_dim*4 + def hr_generator(self, x_c_code, training=True): # Input: x_c_code -->s4 * s4 * gf_dim*4 output_tensor = UpSample(x_c_code, size=[self.s2, self.s2], method=1, align_corners=False, name='hr_gen/upsample') output_tensor = Conv2d(output_tensor, 3, 3, self.gf_dim * 2, 1, 1, name='hr_gen/conv2d') @@ -147,7 +145,8 @@ def hr_generator(self, x_c_code, training=True): output_tensor = Conv2d(output_tensor, 3, 3, self.gf_dim//4, 1, 1, name='hr_gen/conv2d4') output_tensor = conv_batch_normalization(output_tensor, 'hr_gen/batch_norm4', is_training=training, activation_fn=tf.nn.relu) - output_tensor = Conv2d(output_tensor, 3, 3, 3, 1, 1, name='hr_gen/conv2d5', activation_fn=tf.nn.tanh) # -->4s * 4s * 3 + # -->4s * 4s * 3 + output_tensor = Conv2d(output_tensor, 3, 3, 3, 1, 1, name='hr_gen/conv2d5', activation_fn=tf.nn.tanh) return output_tensor def hr_get_generator(self, x_var, c_code, is_training): @@ -195,12 +194,16 @@ def d_encode_image(self, inputs=None, training=True, if_reuse=None): # input: s * s * 3 node1_0 = Conv2d(inputs, 4, 4, self.df_dim, 2, 2, activation_fn=tf.nn.leaky_relu, name='d_n1.0/conv2d', reuse=if_reuse) # s2 * s2 * df_dim - node1_0 = Conv2d(node1_0, 4, 4, self.df_dim * 2, 2, 2, name='d_n1.0/conv2d2', reuse=if_reuse) # s4 * s4 * df_dim*2 + + # s4 * s4 * df_dim*2 + node1_0 = Conv2d(node1_0, 4, 4, self.df_dim * 2, 2, 2, name='d_n1.0/conv2d2', reuse=if_reuse) node1_0 = conv_batch_normalization(node1_0, 'd_n1.0/batch_norm', is_training=training, activation_fn=tf.nn.leaky_relu, reuse=if_reuse) - node1_0 = Conv2d(node1_0, 4, 4, self.df_dim * 4, 2, 2, name='d_n1.0/conv2d3', reuse=if_reuse) # s8 * s8 * df_dim*4 + # s8 * s8 * df_dim*4 + node1_0 = Conv2d(node1_0, 4, 4, self.df_dim * 4, 2, 2, name='d_n1.0/conv2d3', reuse=if_reuse) node1_0 = conv_batch_normalization(node1_0, 'd_n1.0/batch_norm2', is_training=training, reuse=if_reuse) - node1_0 = Conv2d(node1_0, 4, 4, self.df_dim * 8, 2, 2, name='d_n1.0/conv2d4', reuse=if_reuse) # s16 * s16 * df_dim*8 + # s16 * s16 * df_dim*8 + node1_0 = Conv2d(node1_0, 4, 4, self.df_dim * 8, 2, 2, name='d_n1.0/conv2d4', reuse=if_reuse) node1_0 = conv_batch_normalization(node1_0, 'd_n1.0/batch_norm3', is_training=training, reuse=if_reuse) node1_1 = Conv2d(node1_0, 1, 1, self.df_dim * 2, 1, 1, name='d_n1.1/conv2d', reuse=if_reuse) @@ -234,26 +237,34 @@ def get_discriminator(self, x_var, c_var, is_training, no_reuse=None): def hr_d_encode_image(self, inputs=None, training=True, if_reuse=None): # input: 4s * 4s * 3 node1_0 = Conv2d(inputs, 4, 4, self.df_dim, 2, 2, activation_fn=tf.nn.leaky_relu, - name='hr_d_encode_n1.0/conv2d1', reuse=if_reuse) # 2s * 2s * df_dim - node1_0 = Conv2d(node1_0, 4, 4, self.df_dim * 2, 2, 2, name='hr_d_encode_n1.0/conv2d2', reuse=if_reuse) # s * s * df_dim*2 + name='hr_d_encode_n1.0/conv2d1', reuse=if_reuse) # 2s * 2s * df_dim + + # s * s * df_dim*2 + node1_0 = Conv2d(node1_0, 4, 4, self.df_dim * 2, 2, 2, name='hr_d_encode_n1.0/conv2d2', reuse=if_reuse) node1_0 = conv_batch_normalization(node1_0, 'hr_d_encode_n1.0/batch_norm', is_training=training, activation_fn=tf.nn.leaky_relu, reuse=if_reuse) - node1_0 = Conv2d(node1_0, 4, 4, self.df_dim * 4, 2, 2, name='hr_d_encode_n1.0/conv2d3', reuse=if_reuse) # s2 * s2 * df_dim*4 + # s2 * s2 * df_dim*4 + node1_0 = Conv2d(node1_0, 4, 4, self.df_dim * 4, 2, 2, name='hr_d_encode_n1.0/conv2d3', reuse=if_reuse) node1_0 = conv_batch_normalization(node1_0, 'hr_d_encode_n1.0/batch_norm2', is_training=training, activation_fn=tf.nn.leaky_relu, reuse=if_reuse) - node1_0 = Conv2d(node1_0, 4, 4, self.df_dim * 8, 2, 2, name='hr_d_encode_n1.0/conv2d4', reuse=if_reuse) # s4 * s4 * df_dim*8 + # s4 * s4 * df_dim*8 + node1_0 = Conv2d(node1_0, 4, 4, self.df_dim * 8, 2, 2, name='hr_d_encode_n1.0/conv2d4', reuse=if_reuse) node1_0 = conv_batch_normalization(node1_0, 'hr_d_encode_n1.0/batch_norm3', is_training=training, activation_fn=tf.nn.leaky_relu, reuse=if_reuse) - node1_0 = Conv2d(node1_0, 4, 4, self.df_dim * 16, 2, 2, name='hr_d_encode_n1.0/conv2d5', reuse=if_reuse) # s8 * s8 * df_dim*16 + # s8 * s8 * df_dim*16 + node1_0 = Conv2d(node1_0, 4, 4, self.df_dim * 16, 2, 2, name='hr_d_encode_n1.0/conv2d5', reuse=if_reuse) node1_0 = conv_batch_normalization(node1_0, 'hr_d_encode_n1.0/batch_norm4', is_training=training, activation_fn=tf.nn.leaky_relu, reuse=if_reuse) - node1_0 = Conv2d(node1_0, 4, 4, self.df_dim * 32, 2, 2, name='hr_d_encode_n1.0/conv2d6', reuse=if_reuse) # s16 * s16 * df_dim*32 + # s16 * s16 * df_dim*32 + node1_0 = Conv2d(node1_0, 4, 4, self.df_dim * 32, 2, 2, name='hr_d_encode_n1.0/conv2d6', reuse=if_reuse) node1_0 = conv_batch_normalization(node1_0, 'hr_d_encode_n1.0/batch_norm5', is_training=training, activation_fn=tf.nn.leaky_relu, reuse=if_reuse) - node1_0 = Conv2d(node1_0, 1, 1, self.df_dim * 16, 1, 1, name='hr_d_encode_n1.0/conv2d7', reuse=if_reuse) # s16 * s16 * df_dim*16 + # s16 * s16 * df_dim*16 + node1_0 = Conv2d(node1_0, 1, 1, self.df_dim * 16, 1, 1, name='hr_d_encode_n1.0/conv2d7', reuse=if_reuse) node1_0 = conv_batch_normalization(node1_0, 'hr_d_encode_n1.0/batch_norm6', is_training=training, activation_fn=tf.nn.leaky_relu, reuse=if_reuse) - node1_0 = Conv2d(node1_0, 1, 1, self.df_dim * 8, 1, 1, name='hr_d_encode_n1.0/conv2d8', reuse=if_reuse) # s16 * s16 * df_dim*8 + # s16 * s16 * df_dim*8 + node1_0 = Conv2d(node1_0, 1, 1, self.df_dim * 8, 1, 1, name='hr_d_encode_n1.0/conv2d8', reuse=if_reuse) node1_0 = conv_batch_normalization(node1_0, 'hr_d_encode_n1.0/batch_norm7', is_training=training, reuse=if_reuse) @@ -274,7 +285,8 @@ def hr_d_encode_image(self, inputs=None, training=True, if_reuse=None): def hr_get_discriminator(self, x_var, c_var, is_training, no_reuse=None): if cfg.GAN.NETWORK_TYPE == "default": - x_code = self.hr_d_encode_image(training=is_training, inputs=x_var, if_reuse=no_reuse) # s16 * s16 * df_dim*8 + # s16 * s16 * df_dim*8 + x_code = self.hr_d_encode_image(training=is_training, inputs=x_var, if_reuse=no_reuse) c_code = self.context_embedding(inputs=c_var, if_reuse=no_reuse) c_code = tf.expand_dims(tf.expand_dims(c_code, 1), 1) diff --git a/stageII/run_exp.py b/stageII/run_exp.py index dae8100..1285112 100644 --- a/stageII/run_exp.py +++ b/stageII/run_exp.py @@ -1,24 +1,20 @@ from __future__ import division from __future__ import print_function -import tensorflow as tf -import dateutil import dateutil.tz import datetime import argparse import pprint import sys -sys.path.append('./misc') +sys.path.append('misc') sys.path.append('stageII') from datasets import TextDataset -from model import CondGAN -from trainer import CondGANTrainer from utils import mkdir_p from config import cfg, cfg_from_file - -from tensorflow.contrib.framework.python.framework import checkpoint_utils +from model import CondGAN +from trainer import CondGANTrainer def parse_args(): diff --git a/stageII/trainer.py b/stageII/trainer.py index b0f7321..1496ed4 100644 --- a/stageII/trainer.py +++ b/stageII/trainer.py @@ -1,18 +1,16 @@ from __future__ import division from __future__ import print_function -# import prettytensor as pt import tensorflow as tf import numpy as np import imageio -# import scipy.misc import os from six.moves import range from progressbar import ETA, Bar, Percentage, ProgressBar from PIL import Image, ImageDraw, ImageFont import sys -sys.path.append('./misc') +sys.path.append('misc') from config import cfg from utils import mkdir_p @@ -91,13 +89,13 @@ def init_opt(self): self.build_placeholder() # ####get output from G network#################################### - with tf.variable_scope("g_net"): + with tf.variable_scope("g_net"): # For training c, kl_loss = self.sample_encoded_context(self.embeddings) z = tf.random_normal([self.batch_size, cfg.Z_DIM]) self.log_vars.append(("hist_c", c)) self.log_vars.append(("hist_z", z)) fake_images = self.model.get_generator(tf.concat([c, z], 1), True) - with tf.variable_scope("d_net"): + with tf.variable_scope("d_net"): # For training # ####get discriminator_loss and generator_loss ################### discriminator_loss, generator_loss = self.compute_losses(self.images, self.wrong_images, fake_images, self.embeddings, flag='lr') @@ -107,12 +105,12 @@ def init_opt(self): self.log_vars.append(("d_loss", discriminator_loss)) # #### For hr_g and hr_d ######################################### - with tf.variable_scope("hr_g_net"): + with tf.variable_scope("hr_g_net"): # For training hr_c, hr_kl_loss = self.sample_encoded_context(self.embeddings) self.log_vars.append(("hist_hr_c", hr_c)) hr_fake_images = self.model.hr_get_generator(fake_images, hr_c, True) - with tf.variable_scope("hr_d_net"): + with tf.variable_scope("hr_d_net"): # For training # get losses hr_discriminator_loss, hr_generator_loss = self.compute_losses(self.hr_images, self.hr_wrong_images, hr_fake_images, self.embeddings, flag='hr') @@ -129,21 +127,23 @@ def init_opt(self): print("success") def sampler(self): - with tf.variable_scope("g_net", reuse=True): + with tf.variable_scope("g_net", reuse=True): # For testing c, _ = self.sample_encoded_context(self.embeddings) z = tf.random_normal([self.batch_size, cfg.Z_DIM]) self.fake_images = self.model.get_generator(tf.concat([c, z], 1), False) - with tf.variable_scope("hr_g_net", reuse=True): + with tf.variable_scope("hr_g_net", reuse=True): # For testing hr_c, _ = self.sample_encoded_context(self.embeddings) self.hr_fake_images = self.model.hr_get_generator(self.fake_images, hr_c, False) def compute_losses(self, images, wrong_images, fake_images, embeddings, flag='lr'): if flag == 'lr': real_logit = self.model.get_discriminator(images, embeddings, True) + # Reuse the weights wrong_logit = self.model.get_discriminator(wrong_images, embeddings, True, no_reuse=tf.AUTO_REUSE) fake_logit = self.model.get_discriminator(fake_images, embeddings, True, no_reuse=tf.AUTO_REUSE) else: real_logit = self.model.hr_get_discriminator(images, embeddings, True) + # Reuse the weights wrong_logit = self.model.hr_get_discriminator(wrong_images, embeddings, True, no_reuse=tf.AUTO_REUSE) fake_logit = self.model.hr_get_discriminator(fake_images, embeddings, True, no_reuse=tf.AUTO_REUSE) @@ -182,8 +182,9 @@ def define_one_trainer(self, loss, learning_rate, key_word): all_vars = tf.trainable_variables() tarin_vars = [var for var in all_vars if var.name.startswith(key_word)] + # Update the specific weights update_ops_vars = [var for var in tf.get_collection(tf.GraphKeys.UPDATE_OPS) if var.name.startswith(key_word)] - + # Only update the moving mean and variance (from the batch normalization) with tf.control_dependencies(update_ops_vars): opt = tf.train.AdamOptimizer(learning_rate, beta1=0.5) trainer = opt.minimize(loss, var_list=tarin_vars) @@ -192,14 +193,12 @@ def define_one_trainer(self, loss, learning_rate, key_word): def prepare_trainer(self, discriminator_loss, generator_loss, hr_discriminator_loss, hr_generator_loss): ft_lr_retio = cfg.TRAIN.FT_LR_RETIO - self.discriminator_trainer = self.define_one_trainer(discriminator_loss, self.discriminator_lr * ft_lr_retio, - 'd_') + self.discriminator_trainer = self.define_one_trainer(discriminator_loss, self.discriminator_lr * ft_lr_retio, 'd_') self.generator_trainer = self.define_one_trainer(generator_loss, self.generator_lr * ft_lr_retio, 'g_') self.hr_discriminator_trainer = self.define_one_trainer(hr_discriminator_loss, self.discriminator_lr, 'hr_d_') self.hr_generator_trainer = self.define_one_trainer(hr_generator_loss, self.generator_lr, 'hr_g_') - self.ft_generator_trainer = self.define_one_trainer(hr_generator_loss, - self.generator_lr * cfg.TRAIN.FT_LR_RETIO, 'g_') + self.ft_generator_trainer = self.define_one_trainer(hr_generator_loss, self.generator_lr * cfg.TRAIN.FT_LR_RETIO, 'g_') self.log_vars.append(("hr_d_learning_rate", self.discriminator_lr)) self.log_vars.append(("hr_g_learning_rate", self.generator_lr)) @@ -311,10 +310,6 @@ def build_model(self, sess): print("Reading model parameters from %s" % self.model_path) all_vars = tf.trainable_variables() restore_vars = [var for var in all_vars if var.name.startswith('g_') or var.name.startswith('d_')] - # restore_vars = [] - # for var in all_vars: - # if var.name.startswith('g_') or var.name.startswith('d_'): - # restore_vars.append(var) saver = tf.train.Saver(restore_vars) saver.restore(sess, self.model_path) @@ -343,14 +338,17 @@ def train_one_step(self, generator_lr, discriminator_lr, counter, summary_writer summary_writer.add_summary(ret_list[1], counter) log_vals = ret_list[2] summary_writer.add_summary(ret_list[3], counter) + # train g1 and finetune g0 with the loss of g1 feed_out_g = [self.hr_generator_trainer, self.ft_generator_trainer, self.hr_g_sum] _, _, hr_g_sum = sess.run(feed_out_g, feed_dict) summary_writer.add_summary(hr_g_sum, counter) + # finetune d0 with the loss of d0 feed_out_d = [self.discriminator_trainer, self.d_sum] _, d_sum = sess.run(feed_out_d, feed_dict) summary_writer.add_summary(d_sum, counter) + # finetune g0 with the loss of g0 feed_out_g = [self.generator_trainer, self.g_sum] _, g_sum = sess.run(feed_out_g, feed_dict) @@ -554,7 +552,6 @@ def evaluate(self): saver.restore(sess, self.model_path) # self.eval_one_dataset(sess, self.dataset.train, # self.log_dir, subset='train') - self.eval_one_dataset(sess, self.dataset.test, - self.log_dir, subset='test') + self.eval_one_dataset(sess, self.dataset.test, self.log_dir, subset='test') else: print("Input a valid model path.") From ea1f7573963483ed54a67abf58ff5846f565d876 Mon Sep 17 00:00:00 2001 From: Akhil V Date: Thu, 18 Jul 2019 17:12:02 -0400 Subject: [PATCH 07/13] Updated skipthoughts.py file and added in correct Pre-trained model links --- demo/birds_skip_thought_demo.py | 6 +- demo/cfg/birds-demo.yml | 2 +- demo/cfg/birds-eval.yml | 2 +- demo/cfg/birds-skip-thought-demo.yml | 2 +- demo/demo.py | 3 + misc/config.py | 5 +- misc/skipthoughts.py | 136 +++++++++++++-------------- misc/utils.py | 10 +- models/README.md | 8 +- stageII/cfg/birds.yml | 5 +- stageII/trainer.py | 2 + 11 files changed, 86 insertions(+), 95 deletions(-) diff --git a/demo/birds_skip_thought_demo.py b/demo/birds_skip_thought_demo.py index c334702..ee415a0 100644 --- a/demo/birds_skip_thought_demo.py +++ b/demo/birds_skip_thought_demo.py @@ -12,9 +12,9 @@ sys.path.append('misc') sys.path.append('stageII') -from misc.config import cfg, cfg_from_file -from misc.utils import mkdir_p -from stageII.model import CondGAN +from config import cfg, cfg_from_file +from utils import mkdir_p +from model import CondGAN from skimage.transform import resize diff --git a/demo/cfg/birds-demo.yml b/demo/cfg/birds-demo.yml index 946c306..96b5286 100644 --- a/demo/cfg/birds-demo.yml +++ b/demo/cfg/birds-demo.yml @@ -5,7 +5,7 @@ GPU_ID: 0 Z_DIM: 100 TEST: - PRETRAINED_MODEL: './models/model_330000.ckpt' + PRETRAINED_MODEL: './models/stageII/model_330000.ckpt' BATCH_SIZE: 64 NUM_COPY: 8 diff --git a/demo/cfg/birds-eval.yml b/demo/cfg/birds-eval.yml index 78ba936..1a44393 100644 --- a/demo/cfg/birds-eval.yml +++ b/demo/cfg/birds-eval.yml @@ -7,7 +7,7 @@ Z_DIM: 100 TRAIN: FLAG: False - PRETRAINED_MODEL: './models/birds_model_164000.ckpt' + PRETRAINED_MODEL: './models/stageII/model_330000.ckpt' BATCH_SIZE: 64 NUM_COPY: 8 diff --git a/demo/cfg/birds-skip-thought-demo.yml b/demo/cfg/birds-skip-thought-demo.yml index e346428..1c00129 100644 --- a/demo/cfg/birds-skip-thought-demo.yml +++ b/demo/cfg/birds-skip-thought-demo.yml @@ -6,7 +6,7 @@ Z_DIM: 100 TEST: CAPTION_PATH: './Data/birds/example_captions.txt' - PRETRAINED_MODEL: './models/birds_skip_thought_model_164000.ckpt' + PRETRAINED_MODEL: './models/stageII/model_330000.ckpt' BATCH_SIZE: 64 NUM_COPY: 8 diff --git a/demo/demo.py b/demo/demo.py index b733fa8..a4bc9d3 100644 --- a/demo/demo.py +++ b/demo/demo.py @@ -70,6 +70,7 @@ def build_model(sess, embedding_dim, batch_size): print("Input a valid model path.") return embeddings, fake_images, hr_fake_images + def drawCaption(img, caption): caption = caption_convert(caption) img_txt = Image.fromarray(img) @@ -114,6 +115,7 @@ def save_super_images(sample_batchs, hr_sample_batchs, captions_batch, batch_siz # First row with up to 8 samples for i in range(np.minimum(8, len(sample_batchs))): lr_img = sample_batchs[i][j] + lr_img = (lr_img + 1.0) * 127.5 hr_img = hr_sample_batchs[i][j] hr_img = (hr_img + 1.0) * 127.5 re_sample = resize(lr_img, hr_img.shape[:2]) @@ -129,6 +131,7 @@ def save_super_images(sample_batchs, hr_sample_batchs, captions_batch, batch_siz row2 = [padding] for i in range(8, len(sample_batchs)): lr_img = sample_batchs[i][j] + lr_img = (lr_img + 1.0) * 127.5 hr_img = hr_sample_batchs[i][j] hr_img = (hr_img + 1.0) * 127.5 re_sample = resize(lr_img, hr_img.shape[:2]) diff --git a/misc/config.py b/misc/config.py index ac01f7c..0653da3 100644 --- a/misc/config.py +++ b/misc/config.py @@ -1,7 +1,6 @@ from __future__ import division from __future__ import print_function -import os.path as osp import numpy as np from easydict import EasyDict as edict @@ -48,7 +47,8 @@ __C.TRAIN.COEFF = edict() __C.TRAIN.COEFF.KL = 2.0 -__C.TRAIN.FINETUNE_LR = True # False +# For Stage II training +__C.TRAIN.FINETUNE_LR = False __C.TRAIN.FT_LR_RETIO = 0.1 # Modal options @@ -69,7 +69,6 @@ def _merge_a_into_b(a, b): for k, v in a.items(): # a must specify keys that are in b if k not in b: - #if not b.has_key(k): raise KeyError('{} is not a valid config key'.format(k)) # the types must match, too diff --git a/misc/skipthoughts.py b/misc/skipthoughts.py index d8be946..6a00f45 100644 --- a/misc/skipthoughts.py +++ b/misc/skipthoughts.py @@ -2,14 +2,14 @@ Skip-thought vectors https://github.com/ryankiros/skip-thoughts ''' -import os +# import os +import warnings import theano import theano.tensor as tensor -import cPickle as pkl +import pickle as pkl import numpy -import copy import nltk from collections import OrderedDict, defaultdict @@ -34,10 +34,10 @@ def load_model(): Load the model with saved tables """ # Load model options - print 'Loading model parameters...' - with open('%s.pkl'%path_to_umodel, 'rb') as f: + print('Loading model parameters...') + with open('%s.pkl' % path_to_umodel, 'rb') as f: uoptions = pkl.load(f) - with open('%s.pkl'%path_to_bmodel, 'rb') as f: + with open('%s.pkl' % path_to_bmodel, 'rb') as f: boptions = pkl.load(f) # Load parameters @@ -49,19 +49,19 @@ def load_model(): btparams = init_tparams(bparams) # Extractor functions - print 'Compiling encoders...' + print('Compiling encoders...') embedding, x_mask, ctxw2v = build_encoder(utparams, uoptions) f_w2v = theano.function([embedding, x_mask], ctxw2v, name='f_w2v') embedding, x_mask, ctxw2v = build_encoder_bi(btparams, boptions) f_w2v2 = theano.function([embedding, x_mask], ctxw2v, name='f_w2v2') # Tables - print 'Loading tables...' + print('Loading tables...') utable, btable = load_tables() # Store everything we need in a dictionary - print 'Packing up...' - model = {} + print('Packing up...') + model = dict() model['uoptions'] = uoptions model['boptions'] = boptions model['utable'] = utable @@ -96,7 +96,7 @@ def encode(model, X, use_norm=True, verbose=True, batch_size=128, use_eos=False) X = preprocess(X) # word dictionary and init - d = defaultdict(lambda : 0) + d = defaultdict(lambda: 0) for w in model['utable'].keys(): d[w] = 1 ufeatures = numpy.zeros((len(X), model['uoptions']['dim']), dtype='float32') @@ -105,14 +105,14 @@ def encode(model, X, use_norm=True, verbose=True, batch_size=128, use_eos=False) # length dictionary ds = defaultdict(list) captions = [s.split() for s in X] - for i,s in enumerate(captions): + for i, s in enumerate(captions): ds[len(s)].append(i) # Get features. This encodes by length, in order to avoid wasting computation for k in ds.keys(): if verbose: - print k - numbatches = len(ds[k]) / batch_size + 1 + print(k) + numbatches = len(ds[k]) // batch_size + 1 for minibatch in range(numbatches): caps = ds[k][minibatch::numbatches] @@ -126,20 +126,20 @@ def encode(model, X, use_norm=True, verbose=True, batch_size=128, use_eos=False) caption = captions[c] for j in range(len(caption)): if d[caption[j]] > 0: - uembedding[j,ind] = model['utable'][caption[j]] - bembedding[j,ind] = model['btable'][caption[j]] + uembedding[j, ind] = model['utable'][caption[j]] + bembedding[j, ind] = model['btable'][caption[j]] else: - uembedding[j,ind] = model['utable']['UNK'] - bembedding[j,ind] = model['btable']['UNK'] + uembedding[j, ind] = model['utable']['UNK'] + bembedding[j, ind] = model['btable']['UNK'] if use_eos: - uembedding[-1,ind] = model['utable'][''] - bembedding[-1,ind] = model['btable'][''] + uembedding[-1, ind] = model['utable'][''] + bembedding[-1, ind] = model['btable'][''] if use_eos: - uff = model['f_w2v'](uembedding, numpy.ones((len(caption)+1,len(caps)), dtype='float32')) - bff = model['f_w2v2'](bembedding, numpy.ones((len(caption)+1,len(caps)), dtype='float32')) + uff = model['f_w2v'](uembedding, numpy.ones((len(caption)+1, len(caps)), dtype='float32')) + bff = model['f_w2v2'](bembedding, numpy.ones((len(caption)+1, len(caps)), dtype='float32')) else: - uff = model['f_w2v'](uembedding, numpy.ones((len(caption),len(caps)), dtype='float32')) - bff = model['f_w2v2'](bembedding, numpy.ones((len(caption),len(caps)), dtype='float32')) + uff = model['f_w2v'](uembedding, numpy.ones((len(caption), len(caps)), dtype='float32')) + bff = model['f_w2v2'](bembedding, numpy.ones((len(caption), len(caps)), dtype='float32')) if use_norm: for j in range(len(uff)): uff[j] /= norm(uff[j]) @@ -180,10 +180,10 @@ def nn(model, text, vectors, query, k=5): scores = numpy.dot(qf, vectors.T).flatten() sorted_args = numpy.argsort(scores)[::-1] sentences = [text[a] for a in sorted_args[:k]] - print 'QUERY: ' + query - print 'NEAREST: ' + print('QUERY: ' + query) + print('NEAREST: ') for i, s in enumerate(sentences): - print s, sorted_args[i] + print(s, sorted_args[i]) def word_features(table): @@ -207,17 +207,17 @@ def nn_words(table, wordvecs, query, k=10): scores = numpy.dot(qf, wordvecs.T).flatten() sorted_args = numpy.argsort(scores)[::-1] words = [keys[a] for a in sorted_args[:k]] - print 'QUERY: ' + query - print 'NEAREST: ' + print('QUERY: ' + query) + print('NEAREST: ') for i, w in enumerate(words): - print w + print(w) def _p(pp, name): """ make prefix-appended name """ - return '%s_%s'%(pp, name) + return '%s_%s' % (pp, name) def init_tparams(params): @@ -225,7 +225,7 @@ def init_tparams(params): initialize Theano shared variables according to the initial parameters """ tparams = OrderedDict() - for kk, pp in params.iteritems(): + for kk, pp in params.items(): tparams[kk] = theano.shared(params[kk], name=kk) return tparams @@ -235,9 +235,9 @@ def load_params(path, params): load parameters """ pp = numpy.load(path) - for kk, vv in params.iteritems(): + for kk, vv in params.items(): if kk not in pp: - warnings.warn('%s is not in the archive'%kk) + warnings.warn('%s is not in the archive' % kk) continue params[kk] = pp[kk] return params @@ -246,6 +246,7 @@ def load_params(path, params): # layers: 'name': ('parameter initializer', 'feedforward') layers = {'gru': ('param_init_gru', 'gru_layer')} + def get_layer(name): fns = layers[name] return (eval(fns[0]), eval(fns[1])) @@ -261,8 +262,8 @@ def init_params(options): params['Wemb'] = norm_weight(options['n_words_src'], options['dim_word']) # encoder: GRU - params = get_layer(options['encoder'])[0](options, params, prefix='encoder', - nin=options['dim_word'], dim=options['dim']) + params = get_layer(options['encoder'])[0](options, params, prefix='encoder', nin=options['dim_word'], + dim=options['dim']) return params @@ -276,10 +277,10 @@ def init_params_bi(options): params['Wemb'] = norm_weight(options['n_words_src'], options['dim_word']) # encoder: GRU - params = get_layer(options['encoder'])[0](options, params, prefix='encoder', - nin=options['dim_word'], dim=options['dim']) - params = get_layer(options['encoder'])[0](options, params, prefix='encoder_r', - nin=options['dim_word'], dim=options['dim']) + params = get_layer(options['encoder'])[0](options, params, prefix='encoder', nin=options['dim_word'], + dim=options['dim']) + params = get_layer(options['encoder'])[0](options, params, prefix='encoder_r', nin=options['dim_word'], + dim=options['dim']) return params @@ -292,9 +293,7 @@ def build_encoder(tparams, options): x_mask = tensor.matrix('x_mask', dtype='float32') # encoder - proj = get_layer(options['encoder'])[1](tparams, embedding, options, - prefix='encoder', - mask=x_mask) + proj = get_layer(options['encoder'])[1](tparams, embedding, options, prefix='encoder', mask=x_mask) ctx = proj[0][-1] return embedding, x_mask, ctx @@ -311,12 +310,8 @@ def build_encoder_bi(tparams, options): xr_mask = x_mask[::-1] # encoder - proj = get_layer(options['encoder'])[1](tparams, embedding, options, - prefix='encoder', - mask=x_mask) - projr = get_layer(options['encoder'])[1](tparams, embeddingr, options, - prefix='encoder_r', - mask=xr_mask) + proj = get_layer(options['encoder'])[1](tparams, embedding, options, prefix='encoder', mask=x_mask) + projr = get_layer(options['encoder'])[1](tparams, embeddingr, options, prefix='encoder_r', mask=xr_mask) ctx = tensor.concatenate([proj[0][-1], projr[0][-1]], axis=1) @@ -330,10 +325,10 @@ def ortho_weight(ndim): return u.astype('float32') -def norm_weight(nin,nout=None, scale=0.1, ortho=True): - if nout == None: +def norm_weight(nin, nout=None, scale=0.1, ortho=True): + if nout is None: nout = nin - if nout == nin and ortho: + if nout is nin and ortho: W = ortho_weight(nin) else: W = numpy.random.uniform(low=-scale, high=scale, size=(nin, nout)) @@ -344,23 +339,21 @@ def param_init_gru(options, params, prefix='gru', nin=None, dim=None): """ parameter init for GRU """ - if nin == None: + if nin is None: nin = options['dim_proj'] - if dim == None: + if dim is None: dim = options['dim_proj'] - W = numpy.concatenate([norm_weight(nin,dim), - norm_weight(nin,dim)], axis=1) - params[_p(prefix,'W')] = W - params[_p(prefix,'b')] = numpy.zeros((2 * dim,)).astype('float32') - U = numpy.concatenate([ortho_weight(dim), - ortho_weight(dim)], axis=1) - params[_p(prefix,'U')] = U + W = numpy.concatenate([norm_weight(nin, dim), norm_weight(nin, dim)], axis=1) + params[_p(prefix, 'W')] = W + params[_p(prefix, 'b')] = numpy.zeros((2 * dim,)).astype('float32') + U = numpy.concatenate([ortho_weight(dim), ortho_weight(dim)], axis=1) + params[_p(prefix, 'U')] = U Wx = norm_weight(nin, dim) - params[_p(prefix,'Wx')] = Wx + params[_p(prefix, 'Wx')] = Wx Ux = ortho_weight(dim) - params[_p(prefix,'Ux')] = Ux - params[_p(prefix,'bx')] = numpy.zeros((dim,)).astype('float32') + params[_p(prefix, 'Ux')] = Ux + params[_p(prefix, 'bx')] = numpy.zeros((dim,)).astype('float32') return params @@ -375,9 +368,9 @@ def gru_layer(tparams, state_below, options, prefix='gru', mask=None, **kwargs): else: n_samples = 1 - dim = tparams[_p(prefix,'Ux')].shape[1] + dim = tparams[_p(prefix, 'Ux')].shape[1] - if mask == None: + if mask is None: mask = tensor.alloc(1., state_below.shape[0], 1) def _slice(_x, n, dim): @@ -404,21 +397,18 @@ def _step_slice(m_, x_, xx_, h_, U, Ux): h = tensor.tanh(preactx) h = u * h_ + (1. - u) * h - h = m_[:,None] * h + (1. - m_)[:,None] * h_ + h = m_[:, None] * h + (1. - m_)[:, None] * h_ return h seqs = [mask, state_below_, state_belowx] _step = _step_slice - rval, updates = theano.scan(_step, - sequences=seqs, - outputs_info = [tensor.alloc(0., n_samples, dim)], - non_sequences = [tparams[_p(prefix, 'U')], - tparams[_p(prefix, 'Ux')]], + rval, updates = theano.scan(_step, sequences=seqs, outputs_info=[tensor.alloc(0., n_samples, dim)], + non_sequences=[tparams[_p(prefix, 'U')], tparams[_p(prefix, 'Ux')]], name=_p(prefix, '_layers'), n_steps=nsteps, profile=profile, strict=True) - rval = [rval] + rval=[rval] return rval diff --git a/misc/utils.py b/misc/utils.py index 06b0f6e..540dc72 100644 --- a/misc/utils.py +++ b/misc/utils.py @@ -8,10 +8,9 @@ import numpy as np import os import errno +import imageio -# > Python3 from skimage.transform import resize -import imageio def get_image(image_path, image_size, is_crop=False, bbox=None): @@ -47,12 +46,7 @@ def transform(image, image_size, is_crop, bbox): if is_crop: image = custom_crop(image, bbox) transformed_image = resize(image, [image_size, image_size], order=3) - # order = 3 means bicubic - #https: // scikit - image.org / docs / dev / api / skimage.transform.html # skimage.transform.resize - - #scipy.misc.imresize(image, [image_size, image_size], 'bicubic') return transformed_image - #return np.array(transformed_image) def imread(path): @@ -76,9 +70,11 @@ def convert_to_uint8(img): img = img.astype(np.uint8) return img + def caption_convert(caption): return caption.decode("utf-8") + def mkdir_p(path): try: os.makedirs(path) diff --git a/models/README.md b/models/README.md index a38141b..71ad6cb 100644 --- a/models/README.md +++ b/models/README.md @@ -1,9 +1,9 @@ **Pretrained StackGAN Models** -- [StackGAN for birds]() trained from char-CNN-RNN text embeddings. Download and save it to `models/`. -- [StackGAN for flowers](https://drive.google.com/open?id=0B3y_msrWZaXLX01FMC1JQW9vaFk) trained from char-CNN-RNN text embeddings. Download and save it to `models/`. -- [StackGAN for birds](https://drive.google.com/open?id=0B3y_msrWZaXLZVNRNFg4d055Q1E) trained from skip-thought text embeddings. Download and save it to `models/` (Just use the same setting as the char-CNN-RNN, we assume better results can be achieved by playing with the hyper-parameters). +- [StackGAN for birds](https://drive.google.com/open?id=1O1JHIoYO3h_qB5o27Td8KklvuLgTgpdV) trained from char-CNN-RNN text embeddings. Download and save it to `models/`. +- [StackGAN for flowers]() trained from char-CNN-RNN text embeddings. Download and save it to `models/`. +- [StackGAN for birds](https://drive.google.com/open?id=1O1JHIoYO3h_qB5o27Td8KklvuLgTgpdV) trained from skip-thought text embeddings. Download and save it to `models/` (Just use the same setting as the char-CNN-RNN, we assume better results can be achieved by playing with the hyper-parameters). **char-CNN-RNN text encoders** - [Download](https://drive.google.com/file/d/0B0ywwgffWnLLZUt0UmQ1LU1oWlU/view) the char-CNN-RNN text encoder for flowers to `models/text_encoder/`. -- [Download](https://drive.google.com/file/d/0B0ywwgffWnLLU0F3UHA3NzFTNEE/view) the char-CNN-RNN text encoder for birds to `models/text_encoder/`. +- [Download](https://drive.google.com/open?id=1a11TUAQKrHxRWnzWBTLpK9FkZdZqhKlT) the char-CNN-RNN text encoder for birds to `models/text_encoder/`. diff --git a/stageII/cfg/birds.yml b/stageII/cfg/birds.yml index 4e3ce8d..e0c805f 100644 --- a/stageII/cfg/birds.yml +++ b/stageII/cfg/birds.yml @@ -7,9 +7,9 @@ Z_DIM: 100 TRAIN: FLAG: True - PRETRAINED_MODEL: './ckt_logs/birds/stageI/model_82000.ckpt' + PRETRAINED_MODEL: './models/stageI/model_82000.ckpt' PRETRAINED_EPOCH: 600 - BATCH_SIZE: 64 + BATCH_SIZE: 64 # 32 (if you do not have enough space) NUM_COPY: 4 MAX_EPOCH: 1200 SNAPSHOT_INTERVAL: 2000 @@ -19,6 +19,7 @@ TRAIN: NUM_EMBEDDING: 4 COEFF: KL: 2.0 + FINETUNE_LR: True GAN: EMBEDDING_DIM: 128 diff --git a/stageII/trainer.py b/stageII/trainer.py index 1496ed4..3899f01 100644 --- a/stageII/trainer.py +++ b/stageII/trainer.py @@ -480,6 +480,7 @@ def save_super_images(self, images, sample_batchs, hr_sample_batchs, savenames, row2 = [padding0, real_img, padding] for i in range(np.minimum(8, numSamples)): lr_img = sample_batchs[i][j] + lr_img = (lr_img + 1.0) * 127.5 hr_img = hr_sample_batchs[i][j] hr_img = (hr_img + 1.0) * 127.5 re_sample = resize(lr_img, hr_img.shape[:2]) @@ -495,6 +496,7 @@ def save_super_images(self, images, sample_batchs, hr_sample_batchs, savenames, row2 = [padding0, real_img, padding] for i in range(8, len(sample_batchs)): lr_img = sample_batchs[i][j] + lr_img = (lr_img + 1.0) * 127.5 hr_img = hr_sample_batchs[i][j] hr_img = (hr_img + 1.0) * 127.5 re_sample = resize(lr_img, hr_img.shape[:2]) From fabf58063199e484a0328a29e9c1103ac1c938b3 Mon Sep 17 00:00:00 2001 From: Akhil V Date: Thu, 18 Jul 2019 17:14:17 -0400 Subject: [PATCH 08/13] updated ReadME.md --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 227ed1b..bf08580 100755 --- a/README.md +++ b/README.md @@ -50,9 +50,9 @@ In addition, please add the project folder to PYTHONPATH and `pip install` the f **Pretrained Model** -- [StackGAN for birds](https://drive.google.com/open?id=0B3y_msrWZaXLNUNKa3BaRjAyTzQ) trained from char-CNN-RNN text embeddings. Download and save it to `models/`. -- [StackGAN for flowers](https://drive.google.com/open?id=0B3y_msrWZaXLX01FMC1JQW9vaFk) trained from char-CNN-RNN text embeddings. Download and save it to `models/`. -- [StackGAN for birds](https://drive.google.com/open?id=0B3y_msrWZaXLZVNRNFg4d055Q1E) trained from skip-thought text embeddings. Download and save it to `models/` (Just used the same setting as the char-CNN-RNN. We assume better results can be achieved by playing with the hyper-parameters). +- [StackGAN for birds](https://drive.google.com/open?id=1O1JHIoYO3h_qB5o27Td8KklvuLgTgpdV) trained from char-CNN-RNN text embeddings. Download and save it to `models/`. +- [StackGAN for flowers]() trained from char-CNN-RNN text embeddings. Download and save it to `models/`. +- [StackGAN for birds](https://drive.google.com/open?id=1O1JHIoYO3h_qB5o27Td8KklvuLgTgpdV) trained from skip-thought text embeddings. Download and save it to `models/` (Just used the same setting as the char-CNN-RNN. We assume better results can be achieved by playing with the hyper-parameters). From f1b9a90a5b2a7ce9e3752344c0e7894e39bff4f3 Mon Sep 17 00:00:00 2001 From: akhilvasvani <1943443+akhilvasvani@users.noreply.github.com> Date: Fri, 19 Jul 2019 10:48:41 -0400 Subject: [PATCH 09/13] Update demo/birds_skip_thought_demo.py Co-Authored-By: kramarov-evg <38880539+kramarov-evg@users.noreply.github.com> --- demo/birds_skip_thought_demo.py | 1 + 1 file changed, 1 insertion(+) diff --git a/demo/birds_skip_thought_demo.py b/demo/birds_skip_thought_demo.py index ee415a0..f25303a 100644 --- a/demo/birds_skip_thought_demo.py +++ b/demo/birds_skip_thought_demo.py @@ -12,6 +12,7 @@ sys.path.append('misc') sys.path.append('stageII') +import skipthoughts from config import cfg, cfg_from_file from utils import mkdir_p from model import CondGAN From 3cf66e4f6c4a6eb942e0b401fd6674cd20f16e3c Mon Sep 17 00:00:00 2001 From: akhilv <1943443+akhilvasvani@users.noreply.github.com> Date: Thu, 24 Oct 2019 14:28:56 -0400 Subject: [PATCH 10/13] Added in Fashion Datasets --- README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/README.md b/README.md index bf08580..5898905 100755 --- a/README.md +++ b/README.md @@ -95,6 +95,11 @@ booktitle = {{ICCV}}, - [StackGAN++: Realistic Image Synthesis with Stacked Generative Adversarial Networks](https://arxiv.org/abs/1710.10916) - [AttnGAN: Fine-Grained Text to Image Generation with Attentional Generative Adversarial Networks](https://arxiv.org/abs/1711.10485) [[supplementary]](https://1drv.ms/b/s!Aj4exx_cRA4ghK5-kUG-EqH7hgknUA) [[code]](https://github.com/taoxugit/AttnGAN) +**Future** + +[Fashion Expansion](https://github.com/arianhosseini/StackGAN) +[Fashion Dataset](https://github.com/ayushidalmia/awesome-fashion-ai#datasets) + **References** - Generative Adversarial Text-to-Image Synthesis [Paper](https://arxiv.org/abs/1605.05396) [Code](https://github.com/reedscot/icml2016) From 7daa69f2d5004f44d7090a08a70d07465be6093f Mon Sep 17 00:00:00 2001 From: akhilv <1943443+akhilvasvani@users.noreply.github.com> Date: Thu, 24 Oct 2019 14:29:14 -0400 Subject: [PATCH 11/13] Update README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 5898905..a93412a 100755 --- a/README.md +++ b/README.md @@ -98,6 +98,7 @@ booktitle = {{ICCV}}, **Future** [Fashion Expansion](https://github.com/arianhosseini/StackGAN) + [Fashion Dataset](https://github.com/ayushidalmia/awesome-fashion-ai#datasets) **References** From 824ecc0d9604e2d7e892318c90e1a88be5de0001 Mon Sep 17 00:00:00 2001 From: akhilv <1943443+akhilvasvani@users.noreply.github.com> Date: Thu, 24 Oct 2019 14:29:50 -0400 Subject: [PATCH 12/13] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index a93412a..5df7adb 100755 --- a/README.md +++ b/README.md @@ -97,7 +97,7 @@ booktitle = {{ICCV}}, **Future** -[Fashion Expansion](https://github.com/arianhosseini/StackGAN) +[Fashion Expansion](https://github.com/1o0ko/StackGAN-v1-TensorFlow) [Fashion Dataset](https://github.com/ayushidalmia/awesome-fashion-ai#datasets) From 1e23cb2fd2237b1be4a1257e32c76ad0ca3abda5 Mon Sep 17 00:00:00 2001 From: akhilv <1943443+akhilvasvani@users.noreply.github.com> Date: Fri, 15 May 2020 15:05:26 -0400 Subject: [PATCH 13/13] Added in skip_thoughts zip file to train the StackGAN model --- README.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 5df7adb..18e890d 100755 --- a/README.md +++ b/README.md @@ -31,7 +31,12 @@ In addition, please add the project folder to PYTHONPATH and `pip install` the f **Data** 1. Download our preprocessed char-CNN-RNN text embeddings for [birds](https://drive.google.com/open?id=0B3y_msrWZaXLT1BZdVdycDY5TEE) and [flowers](https://drive.google.com/open?id=0B3y_msrWZaXLaUc0UXpmcnhaVmM) and save them to `Data/`. + - [Optional] Follow the instructions [reedscot/icml2016](https://github.com/reedscot/icml2016) to download the pretrained char-CNN-RNN text encoders and extract text embeddings. + + - [Optional] Download our preprocessed skip-thoughts text embeddings for [birds](https://drive.google.com/open?id=10jlSsU3g2ywDFXgUmn2Dh_UJCkQectzy) and save them to `Data/`. + + 2. Download the [birds](http://www.vision.caltech.edu/visipedia/CUB-200-2011.html) and [flowers](http://www.robots.ox.ac.uk/~vgg/data/flowers/102/) image data. Extract them to `Data/birds/` and `Data/flowers/`, respectively. 3. Preprocess images. - For birds: `python misc/preprocess_birds.py` @@ -52,7 +57,7 @@ In addition, please add the project folder to PYTHONPATH and `pip install` the f **Pretrained Model** - [StackGAN for birds](https://drive.google.com/open?id=1O1JHIoYO3h_qB5o27Td8KklvuLgTgpdV) trained from char-CNN-RNN text embeddings. Download and save it to `models/`. - [StackGAN for flowers]() trained from char-CNN-RNN text embeddings. Download and save it to `models/`. -- [StackGAN for birds](https://drive.google.com/open?id=1O1JHIoYO3h_qB5o27Td8KklvuLgTgpdV) trained from skip-thought text embeddings. Download and save it to `models/` (Just used the same setting as the char-CNN-RNN. We assume better results can be achieved by playing with the hyper-parameters). +- [StackGAN for birds]() trained from skip-thought text embeddings. Download and save it to `models/` (Just used the same setting as the char-CNN-RNN. We assume better results can be achieved by playing with the hyper-parameters).