From d8f2b2ad4c152ed740e23c9d40a1543c4ab66296 Mon Sep 17 00:00:00 2001
From: zsdonghao <dhsig552@163.com>
Date: Wed, 9 Nov 2016 16:12:36 +0000
Subject: [PATCH] TF 11

---
 .gitignore                                    |    7 +-
 README.md                                     |    2 +
 buildmodel.py                                 |   31 +-
 data/README.md                                |    4 +
 data/build_mscoco_data.py                     |  481 +++
 data/download_and_preprocess_mscoco.sh        |   84 +
 evaluate.py                                   |    2 +-
 ...ed_bug).py => inception_v3(for TF 0.10).py |    0
 tensorlayer/__init__.py                       |    1 +
 tensorlayer/activation.py                     |   47 +-
 tensorlayer/cost.py                           |   65 +-
 tensorlayer/files.py                          |    5 +-
 tensorlayer/iterate.py                        |   34 +-
 tensorlayer/layers.py                         |  654 +++-
 tensorlayer/nlp.py                            |    2 +-
 tensorlayer/ops.py                            |   78 +-
 tensorlayer/prepro.py                         | 1013 ++++++
 tensorlayer/utils.py                          |    5 +-
 tensorlayer/visualize.py                      |   27 +-
 tensorlayer1.2.2/__init__.py                  |   27 +
 tensorlayer1.2.2/activation.py                |  109 +
 tensorlayer1.2.2/cost.py                      |  376 +++
 tensorlayer1.2.2/files.py                     |  812 +++++
 tensorlayer1.2.2/iterate.py                   |  431 +++
 tensorlayer1.2.2/layers.py                    | 2787 +++++++++++++++++
 tensorlayer1.2.2/nlp.py                       |  908 ++++++
 tensorlayer1.2.2/ops.py                       |  174 +
 tensorlayer1.2.2/prepro.py                    |  168 +
 tensorlayer1.2.2/rein.py                      |   68 +
 tensorlayer1.2.2/utils.py                     |  425 +++
 tensorlayer1.2.2/visualize.py                 |  300 ++
 train.py                                      |  209 +-
 32 files changed, 9024 insertions(+), 312 deletions(-)
 create mode 100755 data/README.md
 create mode 100755 data/build_mscoco_data.py
 create mode 100755 data/download_and_preprocess_mscoco.sh
 rename inception_v3(hao_fixed_bug).py => inception_v3(for TF 0.10).py (100%)
 create mode 100755 tensorlayer1.2.2/__init__.py
 create mode 100755 tensorlayer1.2.2/activation.py
 create mode 100755 tensorlayer1.2.2/cost.py
 create mode 100755 tensorlayer1.2.2/files.py
 create mode 100755 tensorlayer1.2.2/iterate.py
 create mode 100755 tensorlayer1.2.2/layers.py
 create mode 100755 tensorlayer1.2.2/nlp.py
 create mode 100755 tensorlayer1.2.2/ops.py
 create mode 100755 tensorlayer1.2.2/prepro.py
 create mode 100755 tensorlayer1.2.2/rein.py
 create mode 100755 tensorlayer1.2.2/utils.py
 create mode 100755 tensorlayer1.2.2/visualize.py

diff --git a/.gitignore b/.gitignore
index e38d721..4ae8ae2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,10 @@
+*.DS_Store
 .ckpt
 .ckpt-*
 .DS_Store
 ._.DS_Store
-
+data/mscoco/*
+data/*ckpt
+*ckpt
+*ckpt*
+model
diff --git a/README.md b/README.md
index 5d33763..3fa4f6a 100755
--- a/README.md
+++ b/README.md
@@ -3,6 +3,8 @@
 We reimplemented the complicated [Google' Image Captioning](https://github.com/tensorflow/models/tree/master/im2txt) model by simple TensorLayer code.
 
 ### 1. Prepare MSCOCO data and Inception model
+Before you run TensorLayer scripts, you need to follow Google's [setup guide]((https://github.com/tensorflow/models/tree/master/im2txt)). Then setup the model, ckpt and data directories in *.py.
+
 - Creat a ``data`` folder.
 - Download and Preprocessing MSCOCO Data [click here](https://github.com/zsdonghao/models/tree/master/im2txt#prepare-the-training-data)
 - Download the Inception_V3 CKPT [click here](https://github.com/zsdonghao/models/tree/master/im2txt#download-the-inception-v3-checkpoint)
diff --git a/buildmodel.py b/buildmodel.py
index bd06d41..f84e9d2 100755
--- a/buildmodel.py
+++ b/buildmodel.py
@@ -138,14 +138,15 @@ def image_summary(name, image):
       # Resize image.
       assert (resize_height > 0) == (resize_width > 0)
       if resize_height:
-        # image = tf.image.resize_images(image,
-        #                                size=[resize_height, resize_width],
-        #                                method=tf.image.ResizeMethod.BILINEAR)
-
-        image = tf.image.resize_images(image,       # DH Modify
-                                       new_height=resize_height,
-                                       new_width=resize_width,
-                                       method=tf.image.ResizeMethod.BILINEAR)
+        try:
+            image = tf.image.resize_images(image,
+                                           size=[resize_height, resize_width],
+                                           method=tf.image.ResizeMethod.BILINEAR)
+        except:
+            image = tf.image.resize_images(image,     # for TF 0.10
+                                           new_height=resize_height,
+                                           new_width=resize_width,
+                                           method=tf.image.ResizeMethod.BILINEAR)
 
       # Crop to final dimensions.
       if is_training:
@@ -581,7 +582,7 @@ def Build_Model(mode, net_image_embeddings, net_seq_embeddings, target_seqs, inp
                                       initial_state = None,
                                       sequence_length = tf.ones([1]),
                                       return_seq_2d = True,   # stack denselayer after it
-                                      name = '',
+                                      name = 'embed',
                                       )
             lstm_scope.reuse_variables()
 
@@ -600,7 +601,7 @@ def Build_Model(mode, net_image_embeddings, net_seq_embeddings, target_seqs, inp
                           initial_state = state_tuple,  # different with training
                           sequence_length = tf.ones([1]),
                           return_seq_2d = True,   # stack denselayer after it
-                          name = '',
+                          name = 'embed',
                           )
             network = net_seq_rnn
             network.all_layers = net_image_embeddings.all_layers + network.all_layers
@@ -620,18 +621,20 @@ def Build_Model(mode, net_image_embeddings, net_seq_embeddings, target_seqs, inp
                                       initial_state = None,
                                       sequence_length = tf.ones([32]),
                                       return_seq_2d = True,   # stack denselayer after it
-                                      name = '',
+                                      name = 'embed',
                                       )
             # Then, uses the hidden state which contains image info as the initial_state when feeding the sentence.
             lstm_scope.reuse_variables()
+            tl.layers.set_name_reuse(True)
             network = tl.layers.DynamicRNNLayer(net_seq_embeddings,
                                       cell_fn = tf.nn.rnn_cell.BasicLSTMCell,
                                       n_hidden = num_lstm_units,
                                       initializer = initializer,
                                       dropout = dropout,
                                       initial_state = net_img_rnn.final_state,      # feed in hidden state after feeding image
+                                      sequence_length = tf.reduce_sum(input_mask, 1),
                                       return_seq_2d = True,     # stack denselayer after it
-                                      name = '',
+                                      name = 'embed',
                                       )
             network.all_layers = net_image_embeddings.all_layers + network.all_layers
             network.all_params = net_image_embeddings.all_params + network.all_params
@@ -640,7 +643,7 @@ def Build_Model(mode, net_image_embeddings, net_seq_embeddings, target_seqs, inp
     network = tl.layers.DenseLayer(network, n_units=vocab_size, act=tf.identity, W_init=initializer, name="logits") # TL
     logits = network.outputs
 
-    network.print_layers()
+    # network.print_layers()
 
     if mode == "inference":
       softmax = tf.nn.softmax(logits, name="softmax")
@@ -660,7 +663,7 @@ def Build_Model(mode, net_image_embeddings, net_seq_embeddings, target_seqs, inp
       total_loss = total_loss
       target_cross_entropy_losses = losses  # Used in evaluation.
       target_cross_entropy_loss_weights = weights  # Used in evaluation.
-      return total_loss, target_cross_entropy_losses, target_cross_entropy_loss_weights
+      return total_loss, target_cross_entropy_losses, target_cross_entropy_loss_weights, network
 
 
 
diff --git a/data/README.md b/data/README.md
new file mode 100755
index 0000000..bd5b4f9
--- /dev/null
+++ b/data/README.md
@@ -0,0 +1,4 @@
+### Download and Preprocessing MSCOCO Data 
+[click here](https://github.com/zsdonghao/models/tree/master/im2txt#prepare-the-training-data)
+#### Download the Inception_V3 CKPT
+[click here](https://github.com/zsdonghao/models/tree/master/im2txt#download-the-inception-v3-checkpoint)
diff --git a/data/build_mscoco_data.py b/data/build_mscoco_data.py
new file mode 100755
index 0000000..c55ec91
--- /dev/null
+++ b/data/build_mscoco_data.py
@@ -0,0 +1,481 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Converts MSCOCO data to TFRecord file format with SequenceExample protos.
+
+The MSCOCO images are expected to reside in JPEG files located in the following
+directory structure:
+
+  train_image_dir/COCO_train2014_000000000151.jpg
+  train_image_dir/COCO_train2014_000000000260.jpg
+  ...
+
+and
+
+  val_image_dir/COCO_val2014_000000000042.jpg
+  val_image_dir/COCO_val2014_000000000073.jpg
+  ...
+
+The MSCOCO annotations JSON files are expected to reside in train_captions_file
+and val_captions_file respectively.
+
+This script converts the combined MSCOCO data into sharded data files consisting
+of 256, 4 and 8 TFRecord files, respectively:
+
+  output_dir/train-00000-of-00256
+  output_dir/train-00001-of-00256
+  ...
+  output_dir/train-00255-of-00256
+
+and
+
+  output_dir/val-00000-of-00004
+  ...
+  output_dir/val-00003-of-00004
+
+and
+
+  output_dir/test-00000-of-00008
+  ...
+  output_dir/test-00007-of-00008
+
+Each TFRecord file contains ~2300 records. Each record within the TFRecord file
+is a serialized SequenceExample proto consisting of precisely one image-caption
+pair. Note that each image has multiple captions (usually 5) and therefore each
+image is replicated multiple times in the TFRecord files.
+
+The SequenceExample proto contains the following fields:
+
+  context:
+    image/image_id: integer MSCOCO image identifier
+    image/data: string containing JPEG encoded image in RGB colorspace
+
+  feature_lists:
+    image/caption: list of strings containing the (tokenized) caption words
+    image/caption_ids: list of integer ids corresponding to the caption words
+
+The captions are tokenized using the NLTK (http://www.nltk.org/) word tokenizer.
+The vocabulary of word identifiers is constructed from the sorted list (by
+descending frequency) of word tokens in the training set. Only tokens appearing
+at least 4 times are considered; all other words get the "unknown" word id.
+
+NOTE: This script will consume around 100GB of disk space because each image
+in the MSCOCO dataset is replicated ~5 times (once per caption) in the output.
+This is done for two reasons:
+  1. In order to better shuffle the training data.
+  2. It makes it easier to perform asynchronous preprocessing of each image in
+     TensorFlow.
+
+Running this script using 16 threads may take around 1 hour on a HP Z420.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from collections import Counter
+from collections import namedtuple
+from datetime import datetime
+import json
+import os.path
+import random
+import sys
+import threading
+
+
+
+import nltk.tokenize
+import numpy as np
+import tensorflow as tf
+
+tf.flags.DEFINE_string("train_image_dir", "/tmp/train2014/",
+                       "Training image directory.")
+tf.flags.DEFINE_string("val_image_dir", "/tmp/val2014",
+                       "Validation image directory.")
+
+tf.flags.DEFINE_string("train_captions_file", "/tmp/captions_train2014.json",
+                       "Training captions JSON file.")
+tf.flags.DEFINE_string("val_captions_file", "/tmp/captions_train2014.json",
+                       "Validation captions JSON file.")
+
+tf.flags.DEFINE_string("output_dir", "/tmp/", "Output data directory.")
+
+tf.flags.DEFINE_integer("train_shards", 256,
+                        "Number of shards in training TFRecord files.")
+tf.flags.DEFINE_integer("val_shards", 4,
+                        "Number of shards in validation TFRecord files.")
+tf.flags.DEFINE_integer("test_shards", 8,
+                        "Number of shards in testing TFRecord files.")
+
+tf.flags.DEFINE_string("start_word", "<S>",
+                       "Special word added to the beginning of each sentence.")
+tf.flags.DEFINE_string("end_word", "</S>",
+                       "Special word added to the end of each sentence.")
+tf.flags.DEFINE_string("unknown_word", "<UNK>",
+                       "Special word meaning 'unknown'.")
+tf.flags.DEFINE_integer("min_word_count", 4,
+                        "The minimum number of occurrences of each word in the "
+                        "training set for inclusion in the vocabulary.")
+tf.flags.DEFINE_string("word_counts_output_file", "/tmp/word_counts.txt",
+                       "Output vocabulary file of word counts.")
+
+tf.flags.DEFINE_integer("num_threads", 8,
+                        "Number of threads to preprocess the images.")
+
+FLAGS = tf.flags.FLAGS
+
+ImageMetadata = namedtuple("ImageMetadata",
+                           ["image_id", "filename", "captions"])
+
+
+class Vocabulary(object):
+  """Simple vocabulary wrapper."""
+
+  def __init__(self, vocab, unk_id):
+    """Initializes the vocabulary.
+
+    Args:
+      vocab: A dictionary of word to word_id.
+      unk_id: Id of the special 'unknown' word.
+    """
+    self._vocab = vocab
+    self._unk_id = unk_id
+
+  def word_to_id(self, word):
+    """Returns the integer id of a word string."""
+    if word in self._vocab:
+      return self._vocab[word]
+    else:
+      return self._unk_id
+
+
+class ImageDecoder(object):
+  """Helper class for decoding images in TensorFlow."""
+
+  def __init__(self):
+    # Create a single TensorFlow Session for all image decoding calls.
+    self._sess = tf.Session()
+
+    # TensorFlow ops for JPEG decoding.
+    self._encoded_jpeg = tf.placeholder(dtype=tf.string)
+    self._decode_jpeg = tf.image.decode_jpeg(self._encoded_jpeg, channels=3)
+
+  def decode_jpeg(self, encoded_jpeg):
+    image = self._sess.run(self._decode_jpeg,
+                           feed_dict={self._encoded_jpeg: encoded_jpeg})
+    assert len(image.shape) == 3
+    assert image.shape[2] == 3
+    return image
+
+
+def _int64_feature(value):
+  """Wrapper for inserting an int64 Feature into a SequenceExample proto."""
+  return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
+
+
+def _bytes_feature(value):
+  """Wrapper for inserting a bytes Feature into a SequenceExample proto."""
+  return tf.train.Feature(bytes_list=tf.train.BytesList(value=[str(value)]))
+
+
+def _int64_feature_list(values):
+  """Wrapper for inserting an int64 FeatureList into a SequenceExample proto."""
+  return tf.train.FeatureList(feature=[_int64_feature(v) for v in values])
+
+
+def _bytes_feature_list(values):
+  """Wrapper for inserting a bytes FeatureList into a SequenceExample proto."""
+  return tf.train.FeatureList(feature=[_bytes_feature(v) for v in values])
+
+
+def _to_sequence_example(image, decoder, vocab):
+  """Builds a SequenceExample proto for an image-caption pair.
+
+  Args:
+    image: An ImageMetadata object.
+    decoder: An ImageDecoder object.
+    vocab: A Vocabulary object.
+
+  Returns:
+    A SequenceExample proto.
+  """
+  with tf.gfile.FastGFile(image.filename, "r") as f:
+    encoded_image = f.read()
+
+  try:
+    decoder.decode_jpeg(encoded_image)
+  except (tf.errors.InvalidArgumentError, AssertionError):
+    print("Skipping file with invalid JPEG data: %s" % image.filename)
+    return
+
+  context = tf.train.Features(feature={
+      "image/image_id": _int64_feature(image.image_id),
+      "image/data": _bytes_feature(encoded_image),
+  })
+
+  assert len(image.captions) == 1
+  caption = image.captions[0]
+  caption_ids = [vocab.word_to_id(word) for word in caption]
+  feature_lists = tf.train.FeatureLists(feature_list={
+      "image/caption": _bytes_feature_list(caption),
+      "image/caption_ids": _int64_feature_list(caption_ids)
+  })
+  sequence_example = tf.train.SequenceExample(
+      context=context, feature_lists=feature_lists)
+
+  return sequence_example
+
+
+def _process_image_files(thread_index, ranges, name, images, decoder, vocab,
+                         num_shards):
+  """Processes and saves a subset of images as TFRecord files in one thread.
+
+  Args:
+    thread_index: Integer thread identifier within [0, len(ranges)].
+    ranges: A list of pairs of integers specifying the ranges of the dataset to
+      process in parallel.
+    name: Unique identifier specifying the dataset.
+    images: List of ImageMetadata.
+    decoder: An ImageDecoder object.
+    vocab: A Vocabulary object.
+    num_shards: Integer number of shards for the output files.
+  """
+  # Each thread produces N shards where N = num_shards / num_threads. For
+  # instance, if num_shards = 128, and num_threads = 2, then the first thread
+  # would produce shards [0, 64).
+  num_threads = len(ranges)
+  assert not num_shards % num_threads
+  num_shards_per_batch = int(num_shards / num_threads)
+
+  shard_ranges = np.linspace(ranges[thread_index][0], ranges[thread_index][1],
+                             num_shards_per_batch + 1).astype(int)
+  num_images_in_thread = ranges[thread_index][1] - ranges[thread_index][0]
+
+  counter = 0
+  for s in xrange(num_shards_per_batch):
+    # Generate a sharded version of the file name, e.g. 'train-00002-of-00010'
+    shard = thread_index * num_shards_per_batch + s
+    output_filename = "%s-%.5d-of-%.5d" % (name, shard, num_shards)
+    output_file = os.path.join(FLAGS.output_dir, output_filename)
+    writer = tf.python_io.TFRecordWriter(output_file)
+
+    shard_counter = 0
+    images_in_shard = np.arange(shard_ranges[s], shard_ranges[s + 1], dtype=int)
+    for i in images_in_shard:
+      image = images[i]
+
+      sequence_example = _to_sequence_example(image, decoder, vocab)
+      if sequence_example is not None:
+        writer.write(sequence_example.SerializeToString())
+        shard_counter += 1
+        counter += 1
+
+      if not counter % 1000:
+        print("%s [thread %d]: Processed %d of %d items in thread batch." %
+              (datetime.now(), thread_index, counter, num_images_in_thread))
+        sys.stdout.flush()
+
+    print("%s [thread %d]: Wrote %d image-caption pairs to %s" %
+          (datetime.now(), thread_index, shard_counter, output_file))
+    sys.stdout.flush()
+    shard_counter = 0
+  print("%s [thread %d]: Wrote %d image-caption pairs to %d shards." %
+        (datetime.now(), thread_index, counter, num_shards_per_batch))
+  sys.stdout.flush()
+
+
+def _process_dataset(name, images, vocab, num_shards):
+  """Processes a complete data set and saves it as a TFRecord.
+
+  Args:
+    name: Unique identifier specifying the dataset.
+    images: List of ImageMetadata.
+    vocab: A Vocabulary object.
+    num_shards: Integer number of shards for the output files.
+  """
+  # Break up each image into a separate entity for each caption.
+  images = [ImageMetadata(image.image_id, image.filename, [caption])
+            for image in images for caption in image.captions]
+
+  # Shuffle the ordering of images. Make the randomization repeatable.
+  random.seed(12345)
+  random.shuffle(images)
+
+  # Break the images into num_threads batches. Batch i is defined as
+  # images[ranges[i][0]:ranges[i][1]].
+  num_threads = min(num_shards, FLAGS.num_threads)
+  spacing = np.linspace(0, len(images), num_threads + 1).astype(np.int)
+  ranges = []
+  threads = []
+  for i in xrange(len(spacing) - 1):
+    ranges.append([spacing[i], spacing[i + 1]])
+
+  # Create a mechanism for monitoring when all threads are finished.
+  coord = tf.train.Coordinator()
+
+  # Create a utility for decoding JPEG images to run sanity checks.
+  decoder = ImageDecoder()
+
+  # Launch a thread for each batch.
+  print("Launching %d threads for spacings: %s" % (num_threads, ranges))
+  for thread_index in xrange(len(ranges)):
+    args = (thread_index, ranges, name, images, decoder, vocab, num_shards)
+    t = threading.Thread(target=_process_image_files, args=args)
+    t.start()
+    threads.append(t)
+
+  # Wait for all the threads to terminate.
+  coord.join(threads)
+  print("%s: Finished processing all %d image-caption pairs in data set '%s'." %
+        (datetime.now(), len(images), name))
+
+
+def _create_vocab(captions):
+  """Creates the vocabulary of word to word_id.
+
+  The vocabulary is saved to disk in a text file of word counts. The id of each
+  word in the file is its corresponding 0-based line number.
+
+  Args:
+    captions: A list of lists of strings.
+
+  Returns:
+    A Vocabulary object.
+  """
+  print("Creating vocabulary.")
+  counter = Counter()
+  for c in captions:
+    counter.update(c)
+  print("Total words:", len(counter))
+
+  # Filter uncommon words and sort by descending count.
+  word_counts = [x for x in counter.items() if x[1] >= FLAGS.min_word_count]
+  word_counts.sort(key=lambda x: x[1], reverse=True)
+  print("Words in vocabulary:", len(word_counts))
+
+  # Write out the word counts file.
+  with tf.gfile.FastGFile(FLAGS.word_counts_output_file, "w") as f:
+    f.write("\n".join(["%s %d" % (w, c) for w, c in word_counts]))
+  print("Wrote vocabulary file:", FLAGS.word_counts_output_file)
+
+  # Create the vocabulary dictionary.
+  reverse_vocab = [x[0] for x in word_counts]
+  unk_id = len(reverse_vocab)
+  vocab_dict = dict([(x, y) for (y, x) in enumerate(reverse_vocab)])
+  vocab = Vocabulary(vocab_dict, unk_id)
+
+  return vocab
+
+
+def _process_caption(caption):
+  """Processes a caption string into a list of tonenized words.
+
+  Args:
+    caption: A string caption.
+
+  Returns:
+    A list of strings; the tokenized caption.
+  """
+  tokenized_caption = [FLAGS.start_word]
+  tokenized_caption.extend(nltk.tokenize.word_tokenize(caption.lower()))
+  tokenized_caption.append(FLAGS.end_word)
+  return tokenized_caption
+
+
+def _load_and_process_metadata(captions_file, image_dir):
+  """Loads image metadata from a JSON file and processes the captions.
+
+  Args:
+    captions_file: JSON file containing caption annotations.
+    image_dir: Directory containing the image files.
+
+  Returns:
+    A list of ImageMetadata.
+  """
+  with tf.gfile.FastGFile(captions_file, "r") as f:
+    caption_data = json.load(f)
+
+  # Extract the filenames.
+  id_to_filename = [(x["id"], x["file_name"]) for x in caption_data["images"]]
+
+  # Extract the captions. Each image_id is associated with multiple captions.
+  id_to_captions = {}
+  for annotation in caption_data["annotations"]:
+    image_id = annotation["image_id"]
+    caption = annotation["caption"]
+    id_to_captions.setdefault(image_id, [])
+    id_to_captions[image_id].append(caption)
+
+  assert len(id_to_filename) == len(id_to_captions)
+  assert set([x[0] for x in id_to_filename]) == set(id_to_captions.keys())
+  print("Loaded caption metadata for %d images from %s" %
+        (len(id_to_filename), captions_file))
+
+  # Process the captions and combine the data into a list of ImageMetadata.
+  print("Proccessing captions.")
+  image_metadata = []
+  num_captions = 0
+  for image_id, base_filename in id_to_filename:
+    filename = os.path.join(image_dir, base_filename)
+    captions = [_process_caption(c) for c in id_to_captions[image_id]]
+    image_metadata.append(ImageMetadata(image_id, filename, captions))
+    num_captions += len(captions)
+  print("Finished processing %d captions for %d images in %s" %
+        (num_captions, len(id_to_filename), captions_file))
+
+  return image_metadata
+
+
+def main(unused_argv):
+  def _is_valid_num_shards(num_shards):
+    """Returns True if num_shards is compatible with FLAGS.num_threads."""
+    return num_shards < FLAGS.num_threads or not num_shards % FLAGS.num_threads
+
+  assert _is_valid_num_shards(FLAGS.train_shards), (
+      "Please make the FLAGS.num_threads commensurate with FLAGS.train_shards")
+  assert _is_valid_num_shards(FLAGS.val_shards), (
+      "Please make the FLAGS.num_threads commensurate with FLAGS.val_shards")
+  assert _is_valid_num_shards(FLAGS.test_shards), (
+      "Please make the FLAGS.num_threads commensurate with FLAGS.test_shards")
+
+  if not tf.gfile.IsDirectory(FLAGS.output_dir):
+    tf.gfile.MakeDirs(FLAGS.output_dir)
+
+  # Load image metadata from caption files.
+  mscoco_train_dataset = _load_and_process_metadata(FLAGS.train_captions_file,
+                                                    FLAGS.train_image_dir)
+  mscoco_val_dataset = _load_and_process_metadata(FLAGS.val_captions_file,
+                                                  FLAGS.val_image_dir)
+
+  # Redistribute the MSCOCO data as follows:
+  #   train_dataset = 100% of mscoco_train_dataset + 85% of mscoco_val_dataset.
+  #   val_dataset = 5% of mscoco_val_dataset (for validation during training).
+  #   test_dataset = 10% of mscoco_val_dataset (for final evaluation).
+  train_cutoff = int(0.85 * len(mscoco_val_dataset))
+  val_cutoff = int(0.90 * len(mscoco_val_dataset))
+  train_dataset = mscoco_train_dataset + mscoco_val_dataset[0:train_cutoff]
+  val_dataset = mscoco_val_dataset[train_cutoff:val_cutoff]
+  test_dataset = mscoco_val_dataset[val_cutoff:]
+
+  # Create vocabulary from the training captions.
+  train_captions = [c for image in train_dataset for c in image.captions]
+  vocab = _create_vocab(train_captions)
+
+  _process_dataset("train", train_dataset, vocab, FLAGS.train_shards)
+  _process_dataset("val", val_dataset, vocab, FLAGS.val_shards)
+  _process_dataset("test", test_dataset, vocab, FLAGS.test_shards)
+
+
+if __name__ == "__main__":
+  tf.app.run()
diff --git a/data/download_and_preprocess_mscoco.sh b/data/download_and_preprocess_mscoco.sh
new file mode 100755
index 0000000..5ac8fa1
--- /dev/null
+++ b/data/download_and_preprocess_mscoco.sh
@@ -0,0 +1,84 @@
+#!/bin/bash
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+# Script to download and preprocess the MSCOCO data set.
+#
+# The outputs of this script are sharded TFRecord files containing serialized
+# SequenceExample protocol buffers. See build_mscoco_data.py for details of how
+# the SequenceExample protocol buffers are constructed.
+#
+# usage:
+#  ./download_and_preprocess_mscoco.sh
+set -e
+
+if [ -z "$1" ]; then
+  echo "usage download_and_preproces_mscoco.sh [data dir]"
+  exit
+fi
+
+# Create the output directories.
+OUTPUT_DIR="${1%/}"
+SCRATCH_DIR="${OUTPUT_DIR}/raw-data"
+mkdir -p "${OUTPUT_DIR}"
+mkdir -p "${SCRATCH_DIR}"
+CURRENT_DIR=$(pwd)
+WORK_DIR="$0.runfiles/im2txt/im2txt"
+
+# Helper function to download and unpack a .zip file.
+function download_and_unzip() {
+  local BASE_URL=${1}
+  local FILENAME=${2}
+
+  if [ ! -f ${FILENAME} ]; then
+    echo "Downloading ${FILENAME} to $(pwd)"
+    wget -nd -c "${BASE_URL}/${FILENAME}"
+  else
+    echo "Skipping download of ${FILENAME}"
+  fi
+  echo "Unzipping ${FILENAME}"
+  unzip -nq ${FILENAME}
+}
+
+cd ${SCRATCH_DIR}
+
+# Download the images.
+BASE_IMAGE_URL="http://msvocds.blob.core.windows.net/coco2014"
+
+TRAIN_IMAGE_FILE="train2014.zip"
+download_and_unzip ${BASE_IMAGE_URL} ${TRAIN_IMAGE_FILE}
+TRAIN_IMAGE_DIR="${SCRATCH_DIR}/train2014"
+
+VAL_IMAGE_FILE="val2014.zip"
+download_and_unzip ${BASE_IMAGE_URL} ${VAL_IMAGE_FILE}
+VAL_IMAGE_DIR="${SCRATCH_DIR}/val2014"
+
+# Download the captions.
+BASE_CAPTIONS_URL="http://msvocds.blob.core.windows.net/annotations-1-0-3"
+CAPTIONS_FILE="captions_train-val2014.zip"
+download_and_unzip ${BASE_CAPTIONS_URL} ${CAPTIONS_FILE}
+TRAIN_CAPTIONS_FILE="${SCRATCH_DIR}/annotations/captions_train2014.json"
+VAL_CAPTIONS_FILE="${SCRATCH_DIR}/annotations/captions_val2014.json"
+
+# Build TFRecords of the image data.
+cd "${CURRENT_DIR}"
+BUILD_SCRIPT="${WORK_DIR}/build_mscoco_data"
+"${BUILD_SCRIPT}" \
+  --train_image_dir="${TRAIN_IMAGE_DIR}" \
+  --val_image_dir="${VAL_IMAGE_DIR}" \
+  --train_captions_file="${TRAIN_CAPTIONS_FILE}" \
+  --val_captions_file="${VAL_CAPTIONS_FILE}" \
+  --output_dir="${OUTPUT_DIR}" \
+  --word_counts_output_file="${OUTPUT_DIR}/word_counts.txt" \
diff --git a/evaluate.py b/evaluate.py
index 1e83d76..c63479f 100755
--- a/evaluate.py
+++ b/evaluate.py
@@ -172,7 +172,7 @@ def run():
     images, input_seqs, target_seqs, input_mask = Build_Inputs(mode, input_file_pattern)
     net_image_embeddings = Build_Image_Embeddings(mode, images, train_inception)
     net_seq_embeddings = Build_Seq_Embeddings(input_seqs)
-    _, target_cross_entropy_losses, target_cross_entropy_loss_weights = \
+    _, target_cross_entropy_losses, target_cross_entropy_loss_weights, network = \
             Build_Model(mode, net_image_embeddings, net_seq_embeddings, target_seqs, input_mask)
 
     global_step = tf.Variable(
diff --git a/inception_v3(hao_fixed_bug).py b/inception_v3(for TF 0.10).py
similarity index 100%
rename from inception_v3(hao_fixed_bug).py
rename to inception_v3(for TF 0.10).py
diff --git a/tensorlayer/__init__.py b/tensorlayer/__init__.py
index aaf3fa8..a4478ae 100755
--- a/tensorlayer/__init__.py
+++ b/tensorlayer/__init__.py
@@ -11,6 +11,7 @@
     raise ImportError("__init__.py : Could not import TensorFlow." + install_instr)
 
 from . import activation
+act = activation
 from . import cost
 from . import files
 # from . import init
diff --git a/tensorlayer/activation.py b/tensorlayer/activation.py
index 5dfc254..347523d 100755
--- a/tensorlayer/activation.py
+++ b/tensorlayer/activation.py
@@ -72,38 +72,31 @@ def leaky_relu(x=None, alpha=0.1, name="LeakyReLU"):
 #Shortcut
 lrelu = leaky_relu
 
-
-## Alternatively we can use tl.layers.PReluLayer()
-def prelu(x, channel_shared=False, W_init=tf.constant_initializer(value=0.0), W_init_args={}, restore=True, name="PReLU"):
-    """ Parametric Rectified Linear Unit.
+def pixel_wise_softmax(output, name='pixel_wise_softmax'):
+    """Return the softmax outputs of images, every pixels have multiple label, the sum of a pixel is 1.
+    Usually be used for image segmentation.
 
     Parameters
-    ----------
-    x : A `Tensor` with type `float`, `double`, `int32`, `int64`, `uint8`,
-        `int16`, or `int8`.
-    channel_shared : `bool`. Single weight is shared by all channels
-    W_init: weights initializer, default zero constant.
-        The initializer for initializing the alphas.
-    restore : `bool`. Restore or not alphas
-    name : A name for this activation op (optional).
+    ------------
+    output : tensor
+        - For 2d image, 4D tensor [batch_size, height, weight, channel], channel >= 2.
+        - For 3d image, 5D tensor [batch_size, depth, height, weight, channel], channel >= 2.
 
-    Returns
-    -------
-    A `Tensor` with the same type as `x`.
+    Examples
+    ---------
+    >>> outputs = pixel_wise_softmax(network.outputs)
+    >>> dice_loss = 1 - dice_coe(outputs, y_, epsilon=1e-5)
 
     References
     -----------
-    - `Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification <http://arxiv.org/pdf/1502.01852v1.pdf>`_
+    - `tf.reverse <https://www.tensorflow.org/versions/master/api_docs/python/array_ops.html#reverse>`_
     """
-    print(' prelu: untested !!!')
-    if channel_shared:
-        w_shape = (1,)
-    else:
-        w_shape = int(x._shape[-1:])
-
     with tf.name_scope(name) as scope:
-        W_init = initializations.get(weights_init)()
-        alphas = tf.get_variable(name='alphas', shape=w_shape, initializer=W_init, **W_init_args )
-        x = tf.nn.relu(x) + tf.mul(alphas, (x - tf.abs(x))) * 0.5
-
-    return x
+        exp_map = tf.exp(output)
+        if output.get_shape().ndims == 4:   # 2d image
+            evidence = tf.add(exp_map, tf.reverse(exp_map, [False, False, False, True]))
+        elif output.get_shape().ndims == 5: # 3d image
+            evidence = tf.add(exp_map, tf.reverse(exp_map, [False, False, False, False, True]))
+        else:
+            raise Exception("output parameters should be 2d or 3d image, not %s" % str(output._shape))
+        return tf.div(exp_map, evidence)
diff --git a/tensorlayer/cost.py b/tensorlayer/cost.py
index 6e40d7a..9c6d205 100755
--- a/tensorlayer/cost.py
+++ b/tensorlayer/cost.py
@@ -37,9 +37,9 @@ def cross_entropy(output, target, name="cross_entropy_loss"):
         # return -1 * tf.reduce_mean(tf.reduce_sum(cross_entropy, 1), name='cross_entropy_mean')
         return tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(output, target))
 
-# Undocumented
-def binary_cross_entropy(preds, targets, name=None):
-    """Computes binary cross entropy given `preds`.
+
+def binary_cross_entropy(output, target, name=None):
+    """Computes binary cross entropy given `output`.
 
     For brevity, let `x = `, `z = targets`.  The logistic loss is
 
@@ -47,17 +47,17 @@ def binary_cross_entropy(preds, targets, name=None):
 
     Parameters
     ----------
-    preds : A `Tensor` of type `float32` or `float64`.
-    targets : A `Tensor` of the same type and shape as `preds`.
+    output : A `Tensor` of type `float32` or `float64`.
+    target : A `Tensor` of the same type and shape as `output`.
     """
-    print("Undocumented")
+    # print("Undocumented")
     from tensorflow.python.framework import ops
     eps = 1e-12
-    with ops.op_scope([preds, targets], name, "bce_loss") as name:
-        preds = ops.convert_to_tensor(preds, name="preds")
-        targets = ops.convert_to_tensor(targets, name="targets")
-        return tf.reduce_mean(-(targets * tf.log(preds + eps) +
-                              (1. - targets) * tf.log(1. - preds + eps)))
+    with ops.op_scope([output, target], name, "bce_loss") as name:
+        output = ops.convert_to_tensor(output, name="preds")
+        target = ops.convert_to_tensor(targets, name="target")
+        return tf.reduce_mean(-(target * tf.log(output + eps) +
+                              (1. - target) * tf.log(1. - output + eps)))
 
 
 def mean_squared_error(output, target):
@@ -74,6 +74,42 @@ def mean_squared_error(output, target):
         mse = tf.reduce_sum(tf.squared_difference(output, target), reduction_indices = 1)
         return tf.reduce_mean(mse)
 
+
+
+def dice_coe(output, target, epsilon=1e-10):
+    """Sørensen–Dice coefficient for comparing the similarity of two distributions,
+    usually be used for binary image segmentation i.e. labels are binary.
+    The coefficient = [0, 1], 1 if totally match.
+
+    Parameters
+    -----------
+    output : tensor
+        A distribution with shape: [batch_size, ....], (any dimensions).
+    target : tensor
+        A distribution with shape: [batch_size, ....], (any dimensions).
+
+    Examples
+    ---------
+    >>> outputs = pixel_wise_softmax(network.outputs)
+    >>> dice_loss = 1 - dice_coe(outputs, y_, epsilon=1e-5)
+    
+    References
+    -----------
+    - `wiki-dice <https://en.wikipedia.org/wiki/Sørensen–Dice_coefficient>`_
+    """
+    # inse = tf.reduce_sum( tf.mul(output, target) )
+    # l = tf.reduce_sum( tf.mul(output, output) )
+    # r = tf.reduce_sum( tf.mul(target, target) )
+    inse = tf.reduce_sum( output * target )
+    l = tf.reduce_sum( output * output )
+    r = tf.reduce_sum( target * target )
+    dice = 2 * (inse) / (l + r)
+    if epsilon == 0:
+        return dice
+    else:
+        return tf.clip_by_value(dice, 0, 1.0-epsilon)
+
+
 def cross_entropy_seq(logits, target_seqs, batch_size=1, num_steps=None):
     """Returns the expression of cross-entropy of two sequences, implement
     softmax internally. Normally be used for Fixed Length RNN outputs.
@@ -117,9 +153,8 @@ def cross_entropy_seq_with_mask(logits, target_seqs, input_mask, return_details=
     input_mask : the mask to compute loss
         The same size with target_seqs, normally 0 and 1.
     return_details : boolean
-        If False (default), only returns the loss
-
-        If True, returns the loss, losses, weights and targets (reshape to one vetcor)
+        - If False (default), only returns the loss.
+        - If True, returns the loss, losses, weights and targets (reshape to one vetcor).
 
     Examples
     --------
@@ -143,8 +178,6 @@ def li_regularizer(scale):
   Returns a function that can be used to apply group li regularization to weights.\n
   The implementation follows `TensorFlow contrib <https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/layers/python/layers/regularizers.py>`_.
 
-
-
   Parameters
   ----------
   scale : float
diff --git a/tensorlayer/files.py b/tensorlayer/files.py
index db2a230..a91f95f 100755
--- a/tensorlayer/files.py
+++ b/tensorlayer/files.py
@@ -160,7 +160,6 @@ def load_cifar10_dataset(shape=(-1, 32, 32, 3), plotable=False, second=3):
     import pickle
     import numpy as np
 
-
     # We first define a download function, supporting both Python 2 and 3.
     filename = 'cifar-10-python.tar.gz'
     if sys.version_info[0] == 2:
@@ -187,12 +186,10 @@ def un_tar(file_name):
         tar.close()
         print("Extracted to %s" % names[0])
 
-
     if not os.path.exists('cifar-10-batches-py'):
         download(filename)
         un_tar(filename)
 
-
     def unpickle(file):
         fp = open(file, 'rb')
         if sys.version_info.major == 2:
@@ -669,7 +666,7 @@ def load_npz(path='', name='model.npz'):
     name : a string or None
         The name of the .npz file.
 
-    Return
+    Returns
     --------
     params : list
         A list of parameters in order.
diff --git a/tensorlayer/iterate.py b/tensorlayer/iterate.py
index d5883d9..be03140 100755
--- a/tensorlayer/iterate.py
+++ b/tensorlayer/iterate.py
@@ -7,8 +7,7 @@
 from six.moves import xrange
 
 def minibatches(inputs=None, targets=None, batch_size=None, shuffle=False):
-    """
-    Generate a generator that input a group of example in numpy.array and
+    """Generate a generator that input a group of example in numpy.array and
     their labels, return the examples and labels by the given batchsize.
 
     Parameters
@@ -50,13 +49,12 @@ def minibatches(inputs=None, targets=None, batch_size=None, shuffle=False):
         yield inputs[excerpt], targets[excerpt]
 
 def seq_minibatches(inputs, targets, batch_size, seq_length, stride=1):
-    """
-    Generate a generator that return a batch of sequence inputs and targets.
+    """Generate a generator that return a batch of sequence inputs and targets.
     If ``batch_size = 100, seq_length = 5``, one return will have ``500`` rows (examples).
 
     Examples
     --------
-    >>> Synced sequence input and output.
+    - Synced sequence input and output.
     >>> X = np.asarray([['a','a'], ['b','b'], ['c','c'], ['d','d'], ['e','e'], ['f','f']])
     >>> y = np.asarray([0, 1, 2, 3, 4, 5])
     >>> for batch in tl.iterate.seq_minibatches(inputs=X, targets=y, batch_size=2, seq_length=2, stride=1):
@@ -73,7 +71,8 @@ def seq_minibatches(inputs, targets, batch_size, seq_length, stride=1):
     ...         dtype='<U1'), array([2, 3, 3, 4]))
     ...
     ...
-    >>> Many to One
+    
+    - Many to One
     >>> return_last = True
     >>> num_steps = 2
     >>> X = np.asarray([['a','a'], ['b','b'], ['c','c'], ['d','d'], ['e','e'], ['f','f']])
@@ -110,22 +109,19 @@ def seq_minibatches(inputs, targets, batch_size, seq_length, stride=1):
         yield flatten_inputs, flatten_targets
 
 def seq_minibatches2(inputs, targets, batch_size, num_steps):
-    """
-    Generate a generator that iterates on two list of words. Yields (Returns) the source contexts and
+    """Generate a generator that iterates on two list of words. Yields (Returns) the source contexts and
     the target context by the given batch_size and num_steps (sequence_length),
-    see ``PTB tutorial``.
-
-    Hint, if the input data are images, you can modify the code from
-
-    data = np.zeros([batch_size, batch_len)
-
-    to
-
-    data = np.zeros([batch_size, batch_len, inputs.shape[1], inputs.shape[2], inputs.shape[3]])
+    see ``PTB tutorial``. In TensorFlow's tutorial, this generates the batch_size pointers into the raw
+    PTB data, and allows minibatch iteration along these pointers.
 
+    - Hint, if the input data are images, you can modify the code as follow.
 
-    In TensorFlow's tutorial, this generates batch_size pointers into the raw
-    PTB data, and allows minibatch iteration along these pointers.
+    .. code-block:: python
+        
+        from
+        data = np.zeros([batch_size, batch_len)
+        to
+        data = np.zeros([batch_size, batch_len, inputs.shape[1], inputs.shape[2], inputs.shape[3]])
 
     Parameters
     ----------
diff --git a/tensorlayer/layers.py b/tensorlayer/layers.py
index 5380ae0..aa02bb0 100755
--- a/tensorlayer/layers.py
+++ b/tensorlayer/layers.py
@@ -123,7 +123,7 @@ def print_all_variables(train_only=False):
 
     Parameters
     ----------
-    train_only : boolen
+    train_only : boolean
         If True, only print the trainable variables, otherwise, print all variables.
     """
     tvar = tf.trainable_variables() if train_only else tf.all_variables()
@@ -159,7 +159,6 @@ def __init__(
         name ='layer'
     ):
         self.inputs = inputs
-        # if name in globals():
         if (name in set_keep['_layers_name_list']) and name_reuse == False:
             raise Exception("Layer '%s' already exists, please choice other 'name'.\
             \nHint : Use different name for different 'Layer' (The name is used to control parameter sharing)" % name)
@@ -171,25 +170,19 @@ def __init__(
 
     def print_params(self, details=True):
         ''' Print all info of parameters in the network'''
-        # try:
         for i, p in enumerate(self.all_params):
             if details:
                 try:
-                    # print("  param %d: %s (mean: %f, median: %f, std: %f)   %s" % (i, str(p.eval().shape), p.eval().mean(), np.median(p.eval()), p.eval().std(), p.name))
                     print("  param {:3}: {:15} (mean: {:<18}, median: {:<18}, std: {:<18})   {}".format(i, str(p.eval().shape), p.eval().mean(), np.median(p.eval()), p.eval().std(), p.name))
                 except:
                     raise Exception("Hint: print params details after sess.run(tf.initialize_all_variables()) or use network.print_params(False).")
             else:
                 print("  param {:3}: {:15}    {}".format(i, str(p.get_shape()), p.name))
         print("  num of params: %d" % self.count_params())
-        # except:
-        #     raise Exception("Hint: print params after sess.run(tf.initialize_all_variables()) or use tl.layers.print_all_variables()")
-
 
     def print_layers(self):
         ''' Print all info of layers in the network '''
         for i, p in enumerate(self.all_layers):
-            # print(vars(p))
             print("  layer %d: %s" % (i, str(p)))
 
     def count_params(self):
@@ -610,7 +603,6 @@ def __init__(
                 self.outputs = act(tf.matmul(self.inputs, W) + b)
             else:
                 self.outputs = act(tf.matmul(self.inputs, W))
-        # self.outputs = act(tf.matmul(self.inputs, W) + b)
 
         # Hint : list(), dict() is pass by value (shallow), without them, it is
         # pass by reference.
@@ -622,8 +614,9 @@ def __init__(
             self.all_params.extend( [W, b] )
         else:
             self.all_params.extend( [W] )
-        # shallow cope allows the weights in network can be changed at the same
-        # time, when ReconLayer updates the weights of encoder.
+        ## when ReconLayer updates the weights of encoder, shallow cope allows
+        # the weights in network can be changed at the same time, as they point
+        # to the same weights.
         #
         # e.g. the encoder points to same physical memory address
         # network = InputLayer(x, name='input_layer')
@@ -821,7 +814,7 @@ def pretrain(self, sess, x, X_train, X_val, denoise_name=None, n_epoch=100, batc
                         visualize.W(self.train_params[0].eval(), second=10, saveable=True, shape=[28,28], name=save_name+str(epoch+1), fig_idx=2012)
                         files.save_npz([self.all_params[0]] , name=save_name+str(epoch+1)+'.npz')
                     except:
-                        raise Exception("You should change visualize.W(), if you want to save the feature images for different dataset")
+                        raise Exception("You should change the visualize.W() in ReconLayer.pretrain(), if you want to save the feature images for different dataset")
 
 
 ## Noise layer
@@ -841,9 +834,24 @@ class DropoutLayer(Layer):
 
     Examples
     --------
+    - Define network
     >>> network = tl.layers.InputLayer(x, name='input_layer')
     >>> network = tl.layers.DropoutLayer(network, keep=0.8, name='drop1')
     >>> network = tl.layers.DenseLayer(network, n_units=800, act = tf.nn.relu, name='relu1')
+    >>> ...
+
+    - For training
+    >>> feed_dict = {x: X_train_a, y_: y_train_a}
+    >>> feed_dict.update( network.all_drop )     # enable noise layers
+    >>> sess.run(train_op, feed_dict=feed_dict)
+    >>> ...
+
+    - For testing
+    >>> dp_dict = tl.utils.dict_to_one( network.all_drop ) # disable noise layers
+    >>> feed_dict = {x: X_val_a, y_: y_val_a}
+    >>> feed_dict.update(dp_dict)
+    >>> err, ac = sess.run([cost, acc], feed_dict=feed_dict)
+    >>> ...
     """
     def __init__(
         self,
@@ -1090,28 +1098,40 @@ class DeConv2dLayer(Layer):
     Examples
     ---------
     - A part of the generator in DCGAN example
-    >>> inputs = tf.placeholder(tf.float32, [64, 100], name='z_noise')
+    >>> batch_size = 64
+    >>> inputs = tf.placeholder(tf.float32, [batch_size, 100], name='z_noise')
     >>> net_in = tl.layers.InputLayer(inputs, name='g/in')
     >>> net_h0 = tl.layers.DenseLayer(net_in, n_units = 8192,
     ...                            W_init = tf.random_normal_initializer(stddev=0.02),
     ...                            act = tf.identity, name='g/h0/lin')
-    >>> print(net_h0.outputs)
+    >>> print(net_h0.outputs._shape)
     ... (64, 8192)
     >>> net_h0 = tl.layers.ReshapeLayer(net_h0, shape = [-1, 4, 4, 512], name='g/h0/reshape')
     >>> net_h0 = tl.layers.BatchNormLayer(net_h0, is_train=is_train, name='g/h0/batch_norm')
     >>> net_h0.outputs = tf.nn.relu(net_h0.outputs, name='g/h0/relu')
-    >>> print(net_h0.outputs)
+    >>> print(net_h0.outputs._shape)
     ... (64, 4, 4, 512)
     >>> net_h1 = tl.layers.DeConv2dLayer(net_h0,
     ...                            shape = [5, 5, 256, 512],
-    ...                            output_shape = [64, 8, 8, 256],
+    ...                            output_shape = [batch_size, 8, 8, 256],
     ...                            strides=[1, 2, 2, 1],
     ...                            act=tf.identity, name='g/h1/decon2d')
     >>> net_h1 = tl.layers.BatchNormLayer(net_h1, is_train=is_train, name='g/h1/batch_norm')
     >>> net_h1.outputs = tf.nn.relu(net_h1.outputs, name='g/h1/relu')
-    >>> print(net_h1.outputs)
+    >>> print(net_h1.outputs._shape)
     ... (64, 8, 8, 256)
 
+    - U-Net
+    >>> ....
+    >>> conv10 = tl.layers.Conv2dLayer(conv9, act=tf.nn.relu,
+    ...        shape=[3,3,1024,1024], strides=[1,1,1,1], padding='SAME',
+    ...        W_init=w_init, b_init=b_init, name='conv10')
+    >>> print(conv10.outputs)
+    ... (batch_size, 32, 32, 1024)
+    >>> deconv1 = tl.layers.DeConv2dLayer(conv10, act=tf.nn.relu,
+    ...         shape=[3,3,512,1024], strides=[1,2,2,1], output_shape=[batch_size,64,64,512],
+    ...         padding='SAME', W_init=w_init, b_init=b_init, name='devcon1_1')
+
     References
     ----------
     - `tf.nn.conv2d_transpose <https://www.tensorflow.org/versions/master/api_docs/python/nn.html#conv2d_transpose>`_
@@ -1133,7 +1153,7 @@ def __init__(
         Layer.__init__(self, name=name)
         self.inputs = layer.outputs
         print("  tensorlayer:Instantiate DeConv2dLayer %s: %s, %s, %s, %s, %s" %
-                            (self.name, str(shape), str(output_shape), str(strides), padding, act))
+                            (self.name, str(shape), str(output_shape), str(strides), padding, act.__name__))
         # print("  DeConv2dLayer: Untested")
         with tf.variable_scope(name) as vs:
             W = tf.get_variable(name='W_deconv2d', shape=shape, initializer=W_init, **W_init_args )
@@ -1264,14 +1284,14 @@ def __init__(
     ):
         Layer.__init__(self, name=name)
         self.inputs = layer.outputs
-        print("  tensorlayer:Instantiate DeConv2dLayer %s: %s, %s, %s, %s, %s" %
-                            (self.name, str(shape), str(output_shape), str(strides), padding, act))
+        print("  tensorlayer:Instantiate DeConv3dLayer %s: %s, %s, %s, %s, %s" %
+                            (self.name, str(shape), str(output_shape), str(strides), padding, act.__name__))
 
         with tf.variable_scope(name) as vs:
             W = tf.get_variable(name='W_deconv3d', shape=shape, initializer=W_init, **W_init_args )
             b = tf.get_variable(name='b_deconv3d', shape=(shape[-2]), initializer=b_init, **b_init_args )
 
-        self.outputs = act( tf.nn.conv3d_transpose(self.inputs, W, output_shape=output_shape, strides=strides, padding=padding) + b )
+            self.outputs = act( tf.nn.conv3d_transpose(self.inputs, W, output_shape=output_shape, strides=strides, padding=padding) + b )
 
         self.all_layers = list(layer.all_layers)
         self.all_params = list(layer.all_params)
@@ -1279,10 +1299,10 @@ def __init__(
         self.all_layers.extend( [self.outputs] )
         self.all_params.extend( [W, b] )
 
-## Normalization layer
+# ## Normalization layer
 class BatchNormLayer(Layer):
     """
-    The :class:`BatchNormLayer` class is a normalization layer, see ``tf.nn.batch_normalization``.
+    The :class:`BatchNormLayer` class is a normalization layer, see ``tf.nn.batch_normalization`` and ``tf.nn.moments``.
 
     Batch normalization on fully-connected or convolutional maps.
 
@@ -1294,22 +1314,21 @@ class BatchNormLayer(Layer):
         A decay factor for ExponentialMovingAverage.
     epsilon : float
         A small float number to avoid dividing by 0.
-    is_train : boolen
+    is_train : boolean
         Whether train or inference.
     name : a string or None
         An optional name to attach to this layer.
 
     References
     ----------
-    - `tf.nn.batch_normalization <https://github.com/tensorflow/tensorflow/blob/master/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.nn.batch_normalization.md>`_
-    - `stackoverflow <http://stackoverflow.com/questions/33949786/how-could-i-use-batch-normalization-in-tensorflow>`_
-    - `tensorflow.contrib <https://github.com/tensorflow/tensorflow/blob/b826b79718e3e93148c3545e7aa3f90891744cc0/tensorflow/contrib/layers/python/layers/layers.py#L100>`_
+    - `Source <https://github.com/ry/tensorflow-resnet/blob/master/resnet.py>`_
+    - `stackoverflow <http://stackoverflow.com/questions/38312668/how-does-one-do-inference-with-batch-normalization-with-tensor-flow>`_
     """
     def __init__(
         self,
         layer = None,
         decay = 0.999,
-        epsilon = 0.001,
+        epsilon = 0.00001,
         is_train = None,
         name ='batchnorm_layer',
     ):
@@ -1317,60 +1336,190 @@ def __init__(
         self.inputs = layer.outputs
         print("  tensorlayer:Instantiate BatchNormLayer %s: decay: %f, epsilon: %f, is_train: %s" %
                             (self.name, decay, epsilon, is_train))
-        if is_train == None:
-            raise Exception("is_train must be True or False")
-
-        # (name, input_var, decay, epsilon, is_train)
-        inputs_shape = self.inputs.get_shape()
-        axis = list(range(len(inputs_shape) - 1))
-        params_shape = inputs_shape[-1:]
+        x_shape = self.inputs.get_shape()
+        params_shape = x_shape[-1:]
+
+        def _get_variable(name,
+                          shape,
+                          initializer,
+                          weight_decay=0.0,
+                          dtype='float',
+                          trainable=True):
+            "A little wrapper around tf.get_variable to do weight decay and add to"
+            "resnet collection"
+            if weight_decay > 0:
+                regularizer = tf.contrib.layers.l2_regularizer(weight_decay)
+            else:
+                regularizer = None
+            # collections = [tf.GraphKeys.VARIABLES, RESNET_VARIABLES]
+            return tf.get_variable(name,
+                                   shape=shape,
+                                   initializer=initializer,
+                                   dtype=dtype,
+                                   regularizer=regularizer,
+                                #    collections=collections,
+                                   trainable=trainable)
+
+        from tensorflow.python.training import moving_averages
+        from tensorflow.python.ops import control_flow_ops
 
         with tf.variable_scope(name) as vs:
-            beta = tf.get_variable(name='beta', shape=params_shape,
-                                 initializer=tf.constant_initializer(0.0))
-            gamma = tf.get_variable(name='gamma', shape=params_shape,
-                                  initializer=tf.constant_initializer(1.0))
-            batch_mean, batch_var = tf.nn.moments(self.inputs,
-                                                axis,
-                                                name='moments')
-            ema = tf.train.ExponentialMovingAverage(decay=decay)
+            # if use_bias:
+            #     bias = _get_variable('bias', params_shape,
+            #                          initializer=tf.zeros_initializer)
+            #     return self.inputs + bias
+
+            axis = list(range(len(x_shape) - 1))
+
+            beta = _get_variable('beta',
+                                 params_shape,
+                                 initializer=tf.zeros_initializer)
+            gamma = _get_variable('gamma',
+                                  params_shape,
+                                  initializer=tf.ones_initializer)
+
+            # trainable=False means : it prevent TF from updating this variable
+            # from the gradient, we have to update this from the mean computed
+            # from each batch during training
+            moving_mean = _get_variable('moving_mean',
+                                        params_shape,
+                                        initializer=tf.zeros_initializer,
+                                        trainable=False)
+            moving_variance = _get_variable('moving_variance',
+                                            params_shape,
+                                            initializer=tf.ones_initializer,
+                                            trainable=False)
+
+            # These ops will only be preformed when training.
+            mean, variance = tf.nn.moments(self.inputs, axis)
+            update_moving_mean = moving_averages.assign_moving_average(moving_mean,
+                                                                       mean, decay)
+            update_moving_variance = moving_averages.assign_moving_average(
+                moving_variance, variance, decay)
+            # tf.add_to_collection(UPDATE_OPS_COLLECTION, update_moving_mean)
+            # tf.add_to_collection(UPDATE_OPS_COLLECTION, update_moving_variance)
 
             def mean_var_with_update():
-              ema_apply_op = ema.apply([batch_mean, batch_var])
-              with tf.control_dependencies([ema_apply_op]):
-                  return tf.identity(batch_mean), tf.identity(batch_var)
+                with tf.control_dependencies([update_moving_mean, update_moving_variance]):
+                    return tf.identity(mean), tf.identity(variance)
 
             if is_train:
-                is_train = tf.cast(tf.ones(1), tf.bool)
+                is_train = tf.cast(tf.ones([]), tf.bool)
             else:
-                is_train = tf.cast(tf.zeros(1), tf.bool)
+                is_train = tf.cast(tf.zeros([]), tf.bool)
 
-            is_train = tf.reshape(is_train, [])
+            mean, variance = control_flow_ops.cond(
+                # is_train, lambda: (mean, variance),     # when training, (x-mean(x))/var(x)
+                is_train, mean_var_with_update,
+                lambda: (moving_mean, moving_variance)) # when inferencing, (x-0)/1
 
-            # print(is_train)
-            # exit()
+            self.outputs = tf.nn.batch_normalization(self.inputs, mean, variance, beta, gamma, epsilon)
+            #x.set_shape(inputs.get_shape()) ??
+            variables = tf.get_collection(tf.GraphKeys.VARIABLES, scope=vs.name)
 
-            mean, var = tf.cond(
-              is_train,
-              mean_var_with_update,
-              lambda: (ema.average(batch_mean), ema.average(batch_var))
-            )
-            normed = tf.nn.batch_normalization(
-              x=self.inputs,
-              mean=mean,
-              variance=var,
-              offset=beta,
-              scale=gamma,
-              variance_epsilon=epsilon,
-              name='tf_bn'
-            )
-        self.outputs = normed
+            # print(len(variables))
+            # for idx, v in enumerate(variables):
+            #     print("  var {:3}: {:15}   {}".format(idx, str(v.get_shape()), v.name))
+            # exit()
 
         self.all_layers = list(layer.all_layers)
         self.all_params = list(layer.all_params)
         self.all_drop = dict(layer.all_drop)
         self.all_layers.extend( [self.outputs] )
-        self.all_params.extend( [beta, gamma] )
+        self.all_params.extend( variables )
+        # self.all_params.extend( [beta, gamma] )
+
+# class BatchNormLayer(Layer):
+#     """
+#     The :class:`BatchNormLayer` class is a normalization layer, see ``tf.nn.batch_normalization``.
+#
+#     Batch normalization on fully-connected or convolutional maps.
+#
+#     Parameters
+#     -----------
+#     layer : a :class:`Layer` instance
+#         The `Layer` class feeding into this layer.
+#     decay : float
+#         A decay factor for ExponentialMovingAverage.
+#     epsilon : float
+#         A small float number to avoid dividing by 0.
+#     is_train : boolean
+#         Whether train or inference.
+#     name : a string or None
+#         An optional name to attach to this layer.
+#
+#     References
+#     ----------
+#     - `tf.nn.batch_normalization <https://github.com/tensorflow/tensorflow/blob/master/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.nn.batch_normalization.md>`_
+#     - `stackoverflow <http://stackoverflow.com/questions/33949786/how-could-i-use-batch-normalization-in-tensorflow>`_
+#     - `tensorflow.contrib <https://github.com/tensorflow/tensorflow/blob/b826b79718e3e93148c3545e7aa3f90891744cc0/tensorflow/contrib/layers/python/layers/layers.py#L100>`_
+#     """
+#     def __init__(
+#         self,
+#         layer = None,
+#         decay = 0.999,
+#         epsilon = 0.001,
+#         is_train = None,
+#         name ='batchnorm_layer',
+#     ):
+#         Layer.__init__(self, name=name)
+#         self.inputs = layer.outputs
+#         print("  tensorlayer:Instantiate BatchNormLayer %s: decay: %f, epsilon: %f, is_train: %s" %
+#                             (self.name, decay, epsilon, is_train))
+#         if is_train == None:
+#             raise Exception("is_train must be True or False")
+#
+#         # (name, input_var, decay, epsilon, is_train)
+#         inputs_shape = self.inputs.get_shape()
+#         axis = list(range(len(inputs_shape) - 1))
+#         params_shape = inputs_shape[-1:]
+#
+#         with tf.variable_scope(name) as vs:
+#             beta = tf.get_variable(name='beta', shape=params_shape,
+#                                  initializer=tf.constant_initializer(0.0))
+#             gamma = tf.get_variable(name='gamma', shape=params_shape,
+#                                   initializer=tf.constant_initializer(1.0))
+#             batch_mean, batch_var = tf.nn.moments(self.inputs,
+#                                                 axis,
+#                                                 name='moments')
+#             ema = tf.train.ExponentialMovingAverage(decay=decay)
+#
+#             def mean_var_with_update():
+#               ema_apply_op = ema.apply([batch_mean, batch_var])
+#               with tf.control_dependencies([ema_apply_op]):
+#                   return tf.identity(batch_mean), tf.identity(batch_var)
+#
+#             if is_train:
+#                 is_train = tf.cast(tf.ones(1), tf.bool)
+#             else:
+#                 is_train = tf.cast(tf.zeros(1), tf.bool)
+#
+#             is_train = tf.reshape(is_train, [])
+#
+#             # print(is_train)
+#             # exit()
+#
+#             mean, var = tf.cond(
+#               is_train,
+#               mean_var_with_update,
+#               lambda: (ema.average(batch_mean), ema.average(batch_var))
+#             )
+#             normed = tf.nn.batch_normalization(
+#               x=self.inputs,
+#               mean=mean,
+#               variance=var,
+#               offset=beta,
+#               scale=gamma,
+#               variance_epsilon=epsilon,
+#               name='tf_bn'
+#             )
+#         self.outputs = normed
+#
+#         self.all_layers = list(layer.all_layers)
+#         self.all_params = list(layer.all_params)
+#         self.all_drop = dict(layer.all_drop)
+#         self.all_layers.extend( [self.outputs] )
+#         self.all_params.extend( [beta, gamma] )
 
 
 ## Pooling layer
@@ -1451,11 +1600,11 @@ class RNNLayer(Layer):
         The sequence length.
     initial_state : None or RNN State
         If None, initial_state is zero_state.
-    return_last : boolen
+    return_last : boolean
         - If True, return the last output, "Sequence input and single output"
         - If False, return all outputs, "Synced sequence input and output"
         - In other word, if you want to apply one or more RNN(s) on this layer, set to False.
-    return_seq_2d : boolen
+    return_seq_2d : boolean
         - When return_last = False
         - If True, return 2D Tensor [n_example, n_hidden], for stacking DenseLayer after it.
         - If False, return 3D Tensor [n_example/n_steps, n_steps, n_hidden], for stacking multiple RNN after it.
@@ -1481,6 +1630,9 @@ class RNNLayer(Layer):
         your state at the begining of each epoch or iteration according to your
         training procedure.
 
+    batch_size : int or tensor
+        Is int, if able to compute the batch_size, otherwise, tensor for ``?``.
+
     Examples
     --------
     - For words
@@ -1565,7 +1717,7 @@ class RNNLayer(Layer):
 
     Notes
     -----
-    Input dimension should be rank 3 : [batch_size, n_steps(max), n_features], if no, please see :class:`ReshapeLayer`.
+    Input dimension should be rank 3 : [batch_size, n_steps, n_features], if no, please see :class:`ReshapeLayer`.
 
     References
     ----------
@@ -1602,10 +1754,10 @@ def __init__(
         try:
             self.inputs.get_shape().with_rank(3)
         except:
-            raise Exception("RNN : Input dimension should be rank 3 : [batch_size, n_steps(max), n_features]")
+            raise Exception("RNN : Input dimension should be rank 3 : [batch_size, n_steps, n_features]")
 
 
-        # is_reshape : boolen (deprecate)
+        # is_reshape : boolean (deprecate)
         #     Reshape the inputs to 3 dimension tensor.\n
         #     If input is［batch_size, n_steps, n_features], we do not need to reshape it.\n
         #     If input is [batch_size * n_steps, n_features], we need to reshape it.
@@ -1673,8 +1825,198 @@ def __init__(
         self.all_layers.extend( [self.outputs] )
         self.all_params.extend( rnn_variables )
 
-# Dynamic RNN
 
+class BiRNNLayer(Layer):
+    """
+    The :class:`BiRNNLayer` class is a Bidirectional RNN layer.
+
+    Parameters
+    ----------
+    layer : a :class:`Layer` instance
+        The `Layer` class feeding into this layer.
+    cell_fn : a TensorFlow's core RNN cell as follow.
+        - see `RNN Cells in TensorFlow <https://www.tensorflow.org/versions/master/api_docs/python/rnn_cell.html>`_
+        - class ``tf.nn.rnn_cell.BasicRNNCell``
+        - class ``tf.nn.rnn_cell.BasicLSTMCell``
+        - class ``tf.nn.rnn_cell.GRUCell``
+        - class ``tf.nn.rnn_cell.LSTMCell``
+    cell_init_args : a dictionary
+        The arguments for the cell initializer.
+    n_hidden : a int
+        The number of hidden units in the layer.
+    initializer : initializer
+        The initializer for initializing the parameters.
+    n_steps : a int
+        The sequence length.
+    fw_initial_state : None or forward RNN State
+        If None, initial_state is zero_state.
+    bw_initial_state : None or backward RNN State
+        If None, initial_state is zero_state.
+    dropout : `tuple` of `float`: (input_keep_prob, output_keep_prob).
+        The input and output keep probability.
+    n_layer : a int, default is 1.
+        The number of RNN layers.
+    return_last : boolean
+        - If True, return the last output, "Sequence input and single output"
+        - If False, return all outputs, "Synced sequence input and output"
+        - In other word, if you want to apply one or more RNN(s) on this layer, set to False.
+    return_seq_2d : boolean
+        - When return_last = False
+        - If True, return 2D Tensor [n_example, n_hidden], for stacking DenseLayer after it.
+        - If False, return 3D Tensor [n_example/n_steps, n_steps, n_hidden], for stacking multiple RNN after it.
+    name : a string or None
+        An optional name to attach to this layer.
+
+    Variables
+    --------------
+    outputs : a tensor
+        The output of this RNN.
+        return_last = False, outputs = all cell_output, which is the hidden state.
+            cell_output.get_shape() = (?, n_hidden)
+
+    fw(bw)_final_state : a tensor or StateTuple
+        When state_is_tuple = False,
+        it is the final hidden and cell states, states.get_shape() = [?, 2 * n_hidden].\n
+        When state_is_tuple = True, it stores two elements: (c, h), in that order.
+        You can get the final state after each iteration during training, then
+        feed it to the initial state of next iteration.
+
+    fw(bw)_initial_state : a tensor or StateTuple
+        It is the initial state of this RNN layer, you can use it to initialize
+        your state at the begining of each epoch or iteration according to your
+        training procedure.
+
+    batch_size : int or tensor
+        Is int, if able to compute the batch_size, otherwise, tensor for ``?``.
+
+    Notes
+    -----
+    - Input dimension should be rank 3 : [batch_size, n_steps, n_features], if no, please see :class:`ReshapeLayer`.
+    - For predicting, the sequence length has to be the same with the sequence length of training, while, for normal
+    RNN, we can use sequence length of 1 for predicting.
+
+    References
+    ----------
+    - `Source <https://github.com/akaraspt/deepsleep/blob/master/deepsleep/model.py>`_
+    """
+    def __init__(
+        self,
+        layer = None,
+        cell_fn = tf.nn.rnn_cell.LSTMCell,
+        cell_init_args = {'use_peepholes':True, 'state_is_tuple':True},
+        n_hidden = 100,
+        initializer = tf.random_uniform_initializer(-0.1, 0.1),
+        n_steps = 5,
+        fw_initial_state = None,
+        bw_initial_state = None,
+        dropout = None,
+        n_layer = 1,
+        return_last = False,
+        return_seq_2d = False,
+        name = 'birnn_layer',
+    ):
+        Layer.__init__(self, name=name)
+        self.inputs = layer.outputs
+
+        print("  tensorlayer:Instantiate BiRNNLayer %s: n_hidden:%d, n_steps:%d, in_dim:%d %s, cell_fn:%s, dropout:%s, n_layer:%d " % (self.name, n_hidden,
+            n_steps, self.inputs.get_shape().ndims, self.inputs.get_shape(), cell_fn.__name__, dropout, n_layer))
+
+        fixed_batch_size = self.inputs.get_shape().with_rank_at_least(1)[0]
+
+        if fixed_batch_size.value:
+            self.batch_size = fixed_batch_size.value
+            print("     RNN batch_size (concurrent processes): %d" % self.batch_size)
+        else:
+            from tensorflow.python.ops import array_ops
+            self.batch_size = array_ops.shape(self.inputs)[0]
+            print("     non specified batch_size, uses a tensor instead.")
+
+        # Input dimension should be rank 3 [batch_size, n_steps(max), n_features]
+        try:
+            self.inputs.get_shape().with_rank(3)
+        except:
+            raise Exception("RNN : Input dimension should be rank 3 : [batch_size, n_steps, n_features]")
+
+        with tf.variable_scope(name, initializer=initializer) as vs:
+            self.fw_cell = cell_fn(num_units=n_hidden, **cell_init_args)
+            self.bw_cell = cell_fn(num_units=n_hidden, **cell_init_args)
+            # Apply dropout
+            if dropout:
+                if type(dropout) in [tuple, list]:
+                    in_keep_prob = dropout[0]
+                    out_keep_prob = dropout[1]
+                elif isinstance(dropout, float):
+                    in_keep_prob, out_keep_prob = dropout, dropout
+                else:
+                    raise Exception("Invalid dropout type (must be a 2-D tuple of "
+                                    "float)")
+                self.fw_cell = tf.nn.rnn_cell.DropoutWrapper(
+                          self.fw_cell,
+                          input_keep_prob=in_keep_prob,
+                          output_keep_prob=out_keep_prob)
+                self.bw_cell = tf.nn.rnn_cell.DropoutWrapper(
+                          self.bw_cell,
+                          input_keep_prob=in_keep_prob,
+                          output_keep_prob=out_keep_prob)
+            # Apply multiple layers
+            if n_layer > 1:
+                print("     n_layer: %d" % n_layer)
+                try:
+                    self.fw_cell = tf.nn.rnn_cell.MultiRNNCell([self.fw_cell] * n_layer,
+                                                          state_is_tuple=True)
+                    self.bw_cell = tf.nn.rnn_cell.MultiRNNCell([self.bw_cell] * n_layer,
+                                                          state_is_tuple=True)
+                except:
+                    self.fw_cell = tf.nn.rnn_cell.MultiRNNCell([self.fw_cell] * n_layer)
+                    self.bw_cell = tf.nn.rnn_cell.MultiRNNCell([self.bw_cell] * n_layer)
+
+            # Initial state of RNN
+            if fw_initial_state is None:
+                self.fw_initial_state = self.fw_cell.zero_state(self.batch_size, dtype=tf.float32)
+            else:
+                self.fw_initial_state = fw_initial_state
+            if bw_initial_state is None:
+                self.bw_initial_state = self.bw_cell.zero_state(self.batch_size, dtype=tf.float32)
+            else:
+                self.bw_initial_state = bw_initial_state
+            # exit()
+            # Feedforward to MultiRNNCell
+            list_rnn_inputs = tf.unpack(self.inputs, axis=1)
+            outputs, fw_state, bw_state = tf.nn.bidirectional_rnn(
+                cell_fw=self.fw_cell,
+                cell_bw=self.bw_cell,
+                inputs=list_rnn_inputs,
+                initial_state_fw=self.fw_initial_state,
+                initial_state_bw=self.bw_initial_state
+            )
+
+            if return_last:
+                self.outputs = outputs[-1]
+            else:
+                self.outputs = outputs
+                if return_seq_2d:
+                    # 2D Tensor [n_example, n_hidden]
+                    self.outputs = tf.reshape(tf.concat(1, self.outputs), [-1, n_hidden*2])
+                else:
+                    # <akara>: stack more RNN layer after that
+                    # 3D Tensor [n_example/n_steps, n_steps, n_hidden]
+                    self.outputs = tf.reshape(tf.concat(1, outputs), [-1, n_steps, n_hidden*2])
+            self.fw_final_state = fw_state
+            self.bw_final_state = bw_state
+
+            # Retrieve just the RNN variables.
+            rnn_variables = tf.get_collection(tf.GraphKeys.VARIABLES, scope=vs.name)
+
+        print("     n_params : %d" % (len(rnn_variables)))
+
+        self.all_layers = list(layer.all_layers)
+        self.all_params = list(layer.all_params)
+        self.all_drop = dict(layer.all_drop)
+        self.all_layers.extend( [self.outputs] )
+        self.all_params.extend( rnn_variables )
+
+
+# Dynamic RNN
 def advanced_indexing_op(input, index):
     """ Advanced Indexing for Sequences. see TFlearn."""
     batch_size = tf.shape(input)[0]
@@ -1721,11 +2063,11 @@ class DynamicRNNLayer(Layer):
         The input and output keep probability.
     n_layer : a int, default is 1.
         The number of RNN layers.
-    return_last : boolen
+    return_last : boolean
         - If True, return the last output, "Sequence input and single output"
         - If False, return all outputs, "Synced sequence input and output"
         - In other word, if you want to apply one or more RNN(s) on this layer, set to False.
-    return_seq_2d : boolen
+    return_seq_2d : boolean
         - When return_last = False
         - If True, return 2D Tensor [n_example, n_hidden], for stacking DenseLayer or computing cost after it.
         - If False, return 3D Tensor [n_example/n_steps(max), n_steps(max), n_hidden], for stacking multiple RNN after it.
@@ -1801,8 +2143,8 @@ def __init__(
         Layer.__init__(self, name=name)
         self.inputs = layer.outputs
 
-        print("  tensorlayer:Instantiate DynamicRNNLayer %s: n_hidden:%d, in_dim:%d %s, cell_fn:%s " % (self.name, n_hidden,
-             self.inputs.get_shape().ndims, self.inputs.get_shape(), cell_fn.__name__))
+        print("  tensorlayer:Instantiate DynamicRNNLayer %s: n_hidden:%d, in_dim:%d %s, cell_fn:%s, dropout:%s, n_layer:%d" % (self.name, n_hidden,
+             self.inputs.get_shape().ndims, self.inputs.get_shape(), cell_fn.__name__, dropout, n_layer))
 
         # Input dimension should be rank 3 [batch_size, n_steps(max), n_features]
         try:
@@ -1841,7 +2183,10 @@ def __init__(
         # Apply multiple layers
         if n_layer > 1:
             print("     n_layer: %d" % n_layer)
-            self.cell = tf.nn.rnn_cell.MultiRNNCell([self.cell] * n_layer, state_is_tuple=True)
+            try:
+                self.cell = tf.nn.rnn_cell.MultiRNNCell([self.cell] * n_layer, state_is_tuple=True)
+            except:
+                self.cell = tf.nn.rnn_cell.MultiRNNCell([self.cell] * n_layer)
 
         # Initialize initial_state
         if initial_state is None:
@@ -1880,15 +2225,15 @@ def __init__(
         else:
             # [batch_size, n_step(max), n_hidden]
             # self.outputs = result[0]["outputs"]
-            self.outputs = outputs
+            # self.outputs = outputs    # it is 3d, but it is a list
             if return_seq_2d:
                 # PTB tutorial:
                 # 2D Tensor [n_example, n_hidden]
-                self.outputs = tf.reshape(tf.concat(1, self.outputs), [-1, n_hidden])
-            # else:
+                self.outputs = tf.reshape(tf.concat(1, outputs), [-1, n_hidden])
+            else:
                 # <akara>:
                 # 3D Tensor [batch_size, n_steps, n_hidden]
-                # self.outputs = tf.reshape(tf.concat(1, self.outputs), [-1, n_steps, n_hidden])
+                self.outputs = tf.reshape(tf.concat(1, outputs), [-1, n_steps, n_hidden])
 
 
         # Final state
@@ -1926,11 +2271,11 @@ class BiDynamicRNNLayer(Layer):
         The number of hidden units in the layer.
     n_steps : a int
         The sequence length.
-    return_last : boolen
+    return_last : boolean
         If True, return the last output, "Sequence input and single output"\n
         If False, return all outputs, "Synced sequence input and output"\n
         In other word, if you want to apply one or more RNN(s) on this layer, set to False.
-    return_seq_2d : boolen
+    return_seq_2d : boolean
         When return_last = False\n
             if True, return 2D Tensor [n_example, n_hidden], for stacking DenseLayer after it.
             if False, return 3D Tensor [n_example/n_steps, n_steps, n_hidden], for stacking multiple RNN after it.
@@ -2190,6 +2535,105 @@ def __init__(
         self.all_drop = dict(layer.all_drop)
         self.all_layers.extend( [self.outputs] )
 
+
+
+class LambdaLayer(Layer):
+    """
+    The :class:`LambdaLayer` class is a layer which is able to use the provided function.
+
+    Parameters
+    ----------
+    layer : a :class:`Layer` instance
+        The `Layer` class feeding into this layer.
+    fn : a function
+        The function that applies to the outputs of previous layer.
+    fn_args : a dictionary
+        The arguments for the function (option).
+    name : a string or None
+        An optional name to attach to this layer.
+
+    Examples
+    ---------
+    >>> x = tf.placeholder(tf.float32, shape=[None, 1], name='x')
+    >>> network = tl.layers.InputLayer(x, name='input_layer')
+    >>> network = LambdaLayer(network, lambda x: 2*x, name='lambda_layer')
+    >>> y = network.outputs
+    >>> sess = tf.InteractiveSession()
+    >>> out = sess.run(y, feed_dict={x : [[1],[2]]})
+    ... [[2],[4]]
+    """
+    def __init__(
+        self,
+        layer = None,
+        fn = None,
+        fn_args = {},
+        name = 'lambda_layer',
+    ):
+        Layer.__init__(self, name=name)
+        self.inputs = layer.outputs
+
+        print("  tensorlayer:Instantiate LambdaLayer  %s" % self.name)
+        with tf.variable_scope(name) as vs:
+            self.outputs = fn(self.inputs, **fn_args)
+            variables = tf.get_collection(tf.GraphKeys.VARIABLES, scope=vs.name)
+
+        self.all_layers = list(layer.all_layers)
+        self.all_params = list(layer.all_params)
+        self.all_drop = dict(layer.all_drop)
+        self.all_layers.extend( [self.outputs] )
+        self.all_params.extend( variables )
+
+## Logic layer
+class ElementwiseLayer(Layer):
+    """
+    The :class:`ElementwiseLayer` class combines multiple :class:`Layer` which have the same output shapes by a given elemwise-wise operation.
+
+    Parameters
+    ----------
+    layer : a list of :class:`Layer` instances
+        The `Layer` class feeding into this layer.
+    combine_fn : a TensorFlow elemwise-merge function
+        e.g. AND is ``tf.minimum`` ;  OR is ``tf.maximum`` ; ADD is ``tf.add`` ; MUL is ``tf.mul`` and so on.
+        See `TensorFlow Math API <https://www.tensorflow.org/versions/master/api_docs/python/math_ops.html#math>`_ .
+    name : a string or None
+        An optional name to attach to this layer.
+
+    Examples
+    --------
+    - AND Logic
+    >>> net_0 = tl.layers.DenseLayer(net_0, n_units=500,
+    ...                        act = tf.nn.relu, name='net_0')
+    >>> net_1 = tl.layers.DenseLayer(net_1, n_units=500,
+    ...                        act = tf.nn.relu, name='net_1')
+    >>> net_com = tl.layers.ElementwiseLayer(layer = [net_0, net_1],
+    ...                         combine_fn = tf.minimum,
+    ...                         name = 'combine_layer')
+    """
+    def __init__(
+        self,
+        layer = [],
+        combine_fn = tf.minimum,
+        name ='elementwise_layer',
+    ):
+        Layer.__init__(self, name=name)
+
+        print("  tensorlayer:Instantiate ElementwiseLayer %s:  %s, %s" % (self.name, layer[0].outputs._shape, combine_fn.__name__))
+
+        self.outputs = layer[0].outputs
+        # print(self.outputs._shape, type(self.outputs._shape))
+        for l in layer[1:]:
+            assert str(self.outputs._shape) == str(l.outputs._shape), "Hint: the input shapes should be the same. %s != %s" %  (self.outputs._shape , str(l.outputs._shape))
+            self.outputs = combine_fn(self.outputs, l.outputs, name=name)
+
+        self.all_layers = list(layer[0].all_layers)
+        self.all_params = list(layer[0].all_params)
+        self.all_drop = dict(layer[0].all_drop)
+
+        for i in range(1, len(layer)):
+            self.all_layers.extend(list(layer[i].all_layers))
+            self.all_params.extend(list(layer[i].all_params))
+            self.all_drop.update(dict(layer[i].all_drop))
+
 ## TF-Slim layer
 class SlimNetsLayer(Layer):
     """
@@ -2203,9 +2647,15 @@ class SlimNetsLayer(Layer):
         The `Layer` class feeding into this layer.
     slim_layer : a slim network function
         The network you want to stack onto, end with ``return net, end_points``.
+    slim_args : dictionary
+        The arguments for the slim model.
     name : a string or None
         An optional name to attach to this layer.
 
+    Examples
+    --------
+    - see Inception V3 example on `Github <https://github.com/zsdonghao/tensorlayer>`_
+
     Notes
     -----
     The due to TF-Slim stores the layers as dictionary, the ``all_layers`` in this
@@ -2216,15 +2666,22 @@ def __init__(
         layer = None,
         slim_layer = None,
         slim_args = {},
-        name ='slim_layer',
+        name ='InceptionV3',
     ):
         Layer.__init__(self, name=name)
         self.inputs = layer.outputs
         print("  tensorlayer:Instantiate SlimNetsLayer %s: %s" % (self.name, slim_layer.__name__))
 
-        with tf.variable_scope(name) as vs:
-            net, end_points = slim_layer(self.inputs, **slim_args)
-            slim_variables = tf.get_collection(tf.GraphKeys.VARIABLES, scope=vs.name)
+        # with tf.variable_scope(name) as vs:
+        #     net, end_points = slim_layer(self.inputs, **slim_args)
+        #     slim_variables = tf.get_collection(tf.GraphKeys.VARIABLES, scope=vs.name)
+
+        net, end_points = slim_layer(self.inputs, **slim_args)
+
+        slim_variables = tf.get_collection(tf.GraphKeys.VARIABLES, scope=name)
+        if slim_variables == []:
+            print("No variables found under %s : the name of SlimNetsLayer should be matched with the begining of the ckpt file, see tutorial_inceptionV3_tfslim.py for more details" % name)
+
 
         self.outputs = net
 
@@ -2250,9 +2707,10 @@ class PReluLayer(Layer):
     x : A `Tensor` with type `float`, `double`, `int32`, `int64`, `uint8`,
         `int16`, or `int8`.
     channel_shared : `bool`. Single weight is shared by all channels
-    W_init: weights initializer, default zero constant.
+    a_init : alpha initializer, default zero constant.
         The initializer for initializing the alphas.
-    restore : `bool`. Restore or not alphas
+    a_init_args : dictionary
+        The arguments for the weights initializer.
     name : A name for this activation op (optional).
 
     References
@@ -2263,30 +2721,28 @@ def __init__(
         self,
         layer = None,
         channel_shared = False,
-        W_init = tf.constant_initializer(value=0.0),
-        W_init_args = {},
-        restore = True,
+        a_init = tf.constant_initializer(value=0.0),
+        a_init_args = {},
+        # restore = True,
         name="prelu_layer"
     ):
         Layer.__init__(self, name=name)
         self.inputs = layer.outputs
-        print("  tensorlayer:Instantiate PReluLayer %s: %s" % (self.name, channel_shared))
-        print('     [Warning] prelu: untested !!!')
+        print("  tensorlayer:Instantiate PReluLayer %s: channel_shared:%s" % (self.name, channel_shared))
         if channel_shared:
             w_shape = (1,)
         else:
-            w_shape = int(self.inputs._shape[-1:])
+            w_shape = int(self.inputs._shape[-1])
 
         with tf.name_scope(name) as scope:
-            # W_init = initializations.get(weights_init)()
-            alphas = tf.get_variable(name='alphas', shape=w_shape, initializer=W_init, **W_init_args )
+            alphas = tf.get_variable(name='alphas', shape=w_shape, initializer=a_init, **a_init_args )
             self.outputs = tf.nn.relu(self.inputs) + tf.mul(alphas, (self.inputs - tf.abs(self.inputs))) * 0.5
 
         self.all_layers = list(layer.all_layers)
         self.all_params = list(layer.all_params)
         self.all_drop = dict(layer.all_drop)
 
-        self.all_layers.extend( self.outputs )
+        self.all_layers.extend( [self.outputs] )
         self.all_params.extend( [alphas] )
 
 
diff --git a/tensorlayer/nlp.py b/tensorlayer/nlp.py
index d39121f..fd2d0bd 100755
--- a/tensorlayer/nlp.py
+++ b/tensorlayer/nlp.py
@@ -533,7 +533,7 @@ def build_words_dataset(words=[], vocabulary_size=50000, printable=True, unk_key
     vocabulary_size : an int
         The maximum vocabulary size, limiting the vocabulary size.
         Then the script replaces rare words with 'UNK' token.
-    printable : boolen
+    printable : boolean
         Whether to print the read vocabulary size of the given words.
     unk_key : a string
         Unknown words = unk_key
diff --git a/tensorlayer/ops.py b/tensorlayer/ops.py
index c884ded..1fe26dd 100755
--- a/tensorlayer/ops.py
+++ b/tensorlayer/ops.py
@@ -19,7 +19,7 @@ def exit_tf(sess=None):
     sess : a session instance of TensorFlow
         TensorFlow session
     """
-    text = "Close tensorboard and nvidia-process if available"
+    text = "[tl] Close tensorboard and nvidia-process if available"
     sess.close()
     # import time
     # time.sleep(2)
@@ -103,7 +103,7 @@ def set_gpu_fraction(sess=None, gpu_fraction=0.3):
 
 
 def disable_print():
-    """Disable console output.
+    """Disable console output, ``suppress_stdout`` is recommended.
 
     Examples
     ---------
@@ -118,7 +118,7 @@ def disable_print():
     sys.stderr = os.devnull
 
 def enable_print():
-    """Enable console output.
+    """Enable console output, ``suppress_stdout`` is recommended.
 
     Examples
     --------
@@ -128,27 +128,50 @@ def enable_print():
     sys.stderr = sys.__stderr__
 
 
-class temporary_disable_print:
+# class temporary_disable_print:
+#     """Temporarily disable console output.
+#
+#     Examples
+#     ---------
+#     >>> print("You can see me")
+#     >>> with tl.ops.temporary_disable_print() as t:
+#     >>>     print("You can't see me")
+#     >>> print("You can see me")
+#     """
+#     def __init__(self):
+#         pass
+#     def __enter__(self):
+#         sys.stdout = None
+#         sys.stderr = os.devnull
+#     def __exit__(self, type, value, traceback):
+#         sys.stdout = sys.__stdout__
+#         sys.stderr = sys.__stderr__
+#         return isinstance(value, TypeError)
+
+
+from contextlib import contextmanager
+@contextmanager
+def suppress_stdout():
     """Temporarily disable console output.
 
     Examples
     ---------
     >>> print("You can see me")
-    >>> with tl.ops.temporary_disable_print() as t:
+    >>> with tl.ops.suppress_stdout():
     >>>     print("You can't see me")
     >>> print("You can see me")
-    """
-    def __init__(self):
-        pass
-    def __enter__(self):
-        sys.stdout = None
-        sys.stderr = os.devnull
-    def __exit__(self, type, value, traceback):
-        sys.stdout = sys.__stdout__
-        sys.stderr = sys.__stderr__
-        return isinstance(value, TypeError)
-
 
+    References
+    -----------
+    - `stackoverflow <http://stackoverflow.com/questions/2125702/how-to-suppress-console-output-in-python>`_
+    """
+    with open(os.devnull, "w") as devnull:
+        old_stdout = sys.stdout
+        sys.stdout = devnull
+        try:
+            yield
+        finally:
+            sys.stdout = old_stdout
 
 
 
@@ -170,5 +193,28 @@ def get_site_packages_directory():
 
 
 
+def empty_trash():
+    """Empty trash folder.
+
+    """
+    text = "[tl] Empty the trash"
+    if _platform == "linux" or _platform == "linux2":
+        print('linux: %s' % text)
+        os.system("rm -rf ~/.local/share/Trash/*")
+    elif _platform == "darwin":
+        print('OS X: %s' % text)
+        os.system("sudo rm -rf ~/.Trash/*")
+    elif _platform == "win32":
+        print('Windows: %s' % text)
+        try:
+            os.system("rd /s c:\$Recycle.Bin")  # Windows 7 or Server 2008
+        except:
+            pass
+        try:
+            os.system("rd /s c:\recycler")  #  Windows XP, Vista, or Server 2003
+        except:
+            pass
+    else:
+        print(_platform)
 
 #
diff --git a/tensorlayer/prepro.py b/tensorlayer/prepro.py
index fb1bd7e..e34034a 100755
--- a/tensorlayer/prepro.py
+++ b/tensorlayer/prepro.py
@@ -5,10 +5,1023 @@
 import tensorflow as tf
 import tensorlayer as tl
 import numpy as np
+
 import time
 import numbers
+import random
+import os
+import re
+import sys
+
+import threading
+# import Queue  # <-- donot work for py3
+is_py2 = sys.version[0] == '2'
+if is_py2:
+    import Queue as queue
+else:
+    import queue as queue
+
+from six.moves import range
+import scipy
+from scipy import linalg
+import scipy.ndimage as ndi
+from skimage import transform
+# import skimage
+from skimage import exposure
+
+# linalg https://docs.scipy.org/doc/scipy/reference/linalg.html
+# ndimage https://docs.scipy.org/doc/scipy/reference/ndimage.html
+
+## Threading
+def threading_data(data=None, fn=None, **kwargs):
+    """Return a batch of result by given data.
+    Usually be used for data augmentation.
+
+    Parameters
+    -----------
+    data : numpy array or zip of numpy array, see Examples below.
+    fn : the function for data processing.
+    more args : the args for fn, see Examples below.
+
+    Examples
+    --------
+    - Single array
+    >>> X --> [batch_size, row, col, 1] greyscale
+    >>> results = threading_data(X, zoom, zoom_range=[0.5, 1], is_random=True)
+    ... results --> [batch_size, row, col, channel]
+    >>> tl.visualize.images2d(images=np.asarray(results), second=0.01, saveable=True, name='after', dtype=None)
+    >>> tl.visualize.images2d(images=np.asarray(X), second=0.01, saveable=True, name='before', dtype=None)
+
+    - List of array (e.g. functions with ``multi``)
+    >>> X, Y --> [batch_size, row, col, 1]  greyscale
+    >>> data = threading_data([_ for _ in zip(X, Y)], zoom_multi, zoom_range=[0.5, 1], is_random=True)
+    ... data --> [batch_size, 2, row, col, 1]
+    >>> X_, Y_ = data.transpose((1,0,2,3,4))
+    ... X_, Y_ --> [batch_size, row, col, 1]
+    >>> tl.visualize.images2d(images=np.asarray(X_), second=0.01, saveable=True, name='after', dtype=None)
+    >>> tl.visualize.images2d(images=np.asarray(Y_), second=0.01, saveable=True, name='before', dtype=None)
+
+    - Customized function for image segmentation
+    >>> def distort_img(data):
+    ...     x, y = data
+    ...     x, y = flip_axis_multi([x, y], axis=0, is_random=True)
+    ...     x, y = flip_axis_multi([x, y], axis=1, is_random=True)
+    ...     x, y = rotation_multi([x, y], rg=10, is_random=True)
+    ...     x, y = shear_multi([x, y], 0.1, is_random=True)
+    ...     x, y = zoom_multi([x, y], zoom_range=[0.9, 1.1], is_random=True)
+    ...     return x, y
+    >>> X, Y --> [batch_size, row, col, channel]
+    >>> data = threading_data([_ for _ in zip(X, Y)], distort_img)
+    >>> X_, Y_ = data.transpose((1,0,2,3,4))
+
+    References
+    ----------
+    - `python queue <https://pymotw.com/2/Queue/index.html#module-Queue>`_
+    """
+    ## plot function info
+    # for name, value in kwargs.items():
+    #     print('{0} = {1}'.format(name, value))
+    # exit()
+    ## define function for threading
+    def function(q, data, kwargs):
+        result = fn(data, **kwargs)
+        q.put(result)
+    ## start threading
+    q = queue.Queue()
+    for i in range(len(data)):
+        d = threading.Thread(
+                        name='threading_and_return',
+                        target=function,
+                        args=(q, data[i], kwargs)
+                        )
+        d.start()
+    ## get results
+    results = []
+    for i in range(len(data)):
+        result = q.get()
+        results.append(result)
+    return np.asarray(results)
+
+
+## Image
+def rotation(x, rg=20, is_random=False, row_index=0, col_index=1, channel_index=2,
+                    fill_mode='nearest', cval=0.):
+    """Rotate an image randomly or non-randomly.
+
+    Parameters
+    -----------
+    x : numpy array
+        An image with dimension of [row, col, channel] (default).
+    rg : int or float
+        Degree to rotate, usually 0 ~ 180.
+    is_random : boolean, default False
+        If True, randomly rotate.
+    row_index, col_index, channel_index : int
+        Index of row, col and channel, default (0, 1, 2), for theano (1, 2, 0).
+    fill_mode : string
+        Method to fill missing pixel, default ‘nearest’, more options ‘constant’, ‘reflect’ or ‘wrap’
+
+        - `scipy ndimage affine_transform <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.ndimage.interpolation.affine_transform.html>`_
+    cval : scalar, optional
+        Value used for points outside the boundaries of the input if mode='constant'. Default is 0.0
+
+        - `scipy ndimage affine_transform <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.ndimage.interpolation.affine_transform.html>`_
+
+    Examples
+    ---------
+    >>> x --> [row, col, 1] greyscale
+    >>> x = rotation(x, rg=40, is_random=False)
+    >>> tl.visualize.frame(x[:,:,0], second=0.01, saveable=True, name='temp',cmap='gray')
+    """
+    if is_random:
+        theta = np.pi / 180 * np.random.uniform(-rg, rg)
+    else:
+        theta = np.pi /180 * rg
+    rotation_matrix = np.array([[np.cos(theta), -np.sin(theta), 0],
+                                [np.sin(theta), np.cos(theta), 0],
+                                [0, 0, 1]])
+
+    h, w = x.shape[row_index], x.shape[col_index]
+    transform_matrix = transform_matrix_offset_center(rotation_matrix, h, w)
+    x = apply_transform(x, transform_matrix, channel_index, fill_mode, cval)
+    return x
+
+def rotation_multi(x, rg=20, is_random=False, row_index=0, col_index=1, channel_index=2,
+                    fill_mode='nearest', cval=0.):
+    """Rotate multiple images with the same arguments, randomly or non-randomly.
+    Usually be used for image segmentation which x=[X, Y], X and Y should be matched.
+
+    Parameters
+    -----------
+    x : list of numpy array
+        List of images with dimension of [n_images, row, col, channel] (default).
+    others : see ``rotation``.
+
+    Examples
+    --------
+    >>> x, y --> [row, col, 1]  greyscale
+    >>> x, y = rotation_multi([x, y], rg=90, is_random=False)
+    >>> tl.visualize.frame(x[:,:,0], second=0.01, saveable=True, name='x',cmap='gray')
+    >>> tl.visualize.frame(y[:,:,0], second=0.01, saveable=True, name='y',cmap='gray')
+    """
+    if is_random:
+        theta = np.pi / 180 * np.random.uniform(-rg, rg)
+    else:
+        theta = np.pi /180 * rg
+    rotation_matrix = np.array([[np.cos(theta), -np.sin(theta), 0],
+                                [np.sin(theta), np.cos(theta), 0],
+                                [0, 0, 1]])
+
+    h, w = x[0].shape[row_index], x[0].shape[col_index]
+    transform_matrix = transform_matrix_offset_center(rotation_matrix, h, w)
+    results = []
+    for data in x:
+        results.append( apply_transform(data, transform_matrix, channel_index, fill_mode, cval))
+    return np.asarray(results)
+
+# crop
+def crop(x, wrg, hrg, is_random=False, row_index=0, col_index=1, channel_index=2):
+    """Randomly or centrally crop an image.
+
+    Parameters
+    ----------
+    x : numpy array
+        An image with dimension of [row, col, channel] (default).
+    wrg : float
+        Size of weight.
+    hrg : float
+        Size of height.
+    is_random : boolean, default False
+        If True, randomly crop, else central crop.
+    row_index, col_index, channel_index : int
+        Index of row, col and channel, default (0, 1, 2), for theano (1, 2, 0).
+    """
+    h, w = x.shape[row_index], x.shape[col_index]
+    assert (h > hrg) and (w > wrg), "The size of cropping should smaller than the original image"
+    if is_random:
+        h_offset = int(np.random.uniform(0, h-hrg) -1)
+        w_offset = int(np.random.uniform(0, w-wrg) -1)
+        # print(h_offset, w_offset, x[h_offset: hrg+h_offset ,w_offset: wrg+w_offset].shape)
+        return x[h_offset: hrg+h_offset ,w_offset: wrg+w_offset]
+    else:
+        # central crop
+        h_offset = (h - hrg)/2
+        w_offset = (w - wrg)/2
+        # print(x[h_offset: h-h_offset ,w_offset: w-w_offset].shape)
+        return x[h_offset: h-h_offset ,w_offset: w-w_offset]
+
+def crop_multi(x, wrg, hrg, is_random=False, row_index=0, col_index=1, channel_index=2):
+    """Randomly or centrally crop multiple images.
+
+    Parameters
+    ----------
+    x : list of numpy array
+        List of images with dimension of [n_images, row, col, channel] (default).
+    others : see ``crop``.
+    """
+    h, w = x[0].shape[row_index], x[0].shape[col_index]
+    assert (h > hrg) and (w > wrg), "The size of cropping should smaller than the original image"
+    if is_random:
+        h_offset = int(np.random.uniform(0, h-hrg) -1)
+        w_offset = int(np.random.uniform(0, w-wrg) -1)
+        results = []
+        for data in x:
+            results.append( data[h_offset: hrg+h_offset ,w_offset: wrg+w_offset])
+        return np.asarray(results)
+    else:
+        # central crop
+        h_offset = (h - hrg)/2
+        w_offset = (w - wrg)/2
+        results = []
+        for data in x:
+            results.append( data[h_offset: h-h_offset ,w_offset: w-w_offset] )
+        return np.asarray(results)
+
+# flip
+def flip_axis(x, axis, is_random=False):
+    """Flip the axis of an image, such as flip left and right, up and down, randomly or non-randomly,
+
+    Parameters
+    ----------
+    x : numpy array
+        An image with dimension of [row, col, channel] (default).
+    axis : int
+        - 0, flip up and down
+        - 1, flip left and right
+        - 2, flip channel
+    is_random : boolean, default False
+        If True, randomly zoom.
+    """
+    if is_random:
+        factor = np.random.uniform(-1, 1)
+        if factor > 0:
+            x = np.asarray(x).swapaxes(axis, 0)
+            x = x[::-1, ...]
+            x = x.swapaxes(0, axis)
+            return x
+        else:
+            return x
+    else:
+        x = np.asarray(x).swapaxes(axis, 0)
+        x = x[::-1, ...]
+        x = x.swapaxes(0, axis)
+        return x
+
+def flip_axis_multi(x, axis, is_random=False):
+    """Flip the axises of multiple images together, such as flip left and right, up and down, randomly or non-randomly,
+
+    Parameters
+    -----------
+    x : list of numpy array
+        List of images with dimension of [n_images, row, col, channel] (default).
+    others : see ``flip_axis``.
+    """
+    if is_random:
+        factor = np.random.uniform(-1, 1)
+        if factor > 0:
+            # x = np.asarray(x).swapaxes(axis, 0)
+            # x = x[::-1, ...]
+            # x = x.swapaxes(0, axis)
+            # return x
+            results = []
+            for data in x:
+                data = np.asarray(data).swapaxes(axis, 0)
+                data = data[::-1, ...]
+                data = data.swapaxes(0, axis)
+                results.append( data )
+            return np.asarray(results)
+        else:
+            return np.asarray(x)
+    else:
+        # x = np.asarray(x).swapaxes(axis, 0)
+        # x = x[::-1, ...]
+        # x = x.swapaxes(0, axis)
+        # return x
+        results = []
+        for data in x:
+            data = np.asarray(data).swapaxes(axis, 0)
+            data = data[::-1, ...]
+            data = data.swapaxes(0, axis)
+            results.append( data )
+        return np.asarray(results)
+
+# shift
+def shift(x, wrg=0.1, hrg=0.1, is_random=False, row_index=0, col_index=1, channel_index=2,
+                 fill_mode='nearest', cval=0.):
+    """Shift an image randomly or non-randomly.
+
+    Parameters
+    -----------
+    x : numpy array
+        An image with dimension of [row, col, channel] (default).
+    wrg : float
+        Percentage of shift in axis x, usually -0.25 ~ 0.25.
+    hrg : float
+        Percentage of shift in axis y, usually -0.25 ~ 0.25.
+    is_random : boolean, default False
+        If True, randomly shift.
+    row_index, col_index, channel_index : int
+        Index of row, col and channel, default (0, 1, 2), for theano (1, 2, 0).
+    fill_mode : string
+        Method to fill missing pixel, default ‘nearest’, more options ‘constant’, ‘reflect’ or ‘wrap’.
+
+        - `scipy ndimage affine_transform <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.ndimage.interpolation.affine_transform.html>`_
+    cval : scalar, optional
+        Value used for points outside the boundaries of the input if mode='constant'. Default is 0.0.
+
+        - `scipy ndimage affine_transform <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.ndimage.interpolation.affine_transform.html>`_
+    """
+    h, w = x.shape[row_index], x.shape[col_index]
+    if is_random:
+        tx = np.random.uniform(-hrg, hrg) * h
+        ty = np.random.uniform(-wrg, wrg) * w
+    else:
+        tx, ty = hrg * h, wrg * w
+    translation_matrix = np.array([[1, 0, tx],
+                                   [0, 1, ty],
+                                   [0, 0, 1]])
+
+    transform_matrix = translation_matrix  # no need to do offset
+    x = apply_transform(x, transform_matrix, channel_index, fill_mode, cval)
+    return x
+
+def shift_multi(x, wrg=0.1, hrg=0.1, is_random=False, row_index=0, col_index=1, channel_index=2,
+                 fill_mode='nearest', cval=0.):
+    """Shift images with the same arguments, randomly or non-randomly.
+    Usually be used for image segmentation which x=[X, Y], X and Y should be matched.
+
+    Parameters
+    -----------
+    x : list of numpy array
+        List of images with dimension of [n_images, row, col, channel] (default).
+    others : see ``shift``.
+    """
+    h, w = x[0].shape[row_index], x[0].shape[col_index]
+    if is_random:
+        tx = np.random.uniform(-hrg, hrg) * h
+        ty = np.random.uniform(-wrg, wrg) * w
+    else:
+        tx, ty = hrg * h, wrg * w
+    translation_matrix = np.array([[1, 0, tx],
+                                   [0, 1, ty],
+                                   [0, 0, 1]])
+
+    transform_matrix = translation_matrix  # no need to do offset
+    results = []
+    for data in x:
+        results.append( apply_transform(data, transform_matrix, channel_index, fill_mode, cval))
+    return np.asarray(results)
+
+# shear
+def shear(x, intensity=0.1, is_random=False, row_index=0, col_index=1, channel_index=2,
+                 fill_mode='nearest', cval=0.):
+    """Shear an image randomly or non-randomly.
+
+    Parameters
+    -----------
+    x : numpy array
+        An image with dimension of [row, col, channel] (default).
+    intensity : float
+        Percentage of shear, usually -0.5 ~ 0.5 (is_random==True), 0 ~ 0.5 (is_random==False),
+        you can have a quick try by shear(X, 1).
+    is_random : boolean, default False
+        If True, randomly shear.
+    row_index, col_index, channel_index : int
+        Index of row, col and channel, default (0, 1, 2), for theano (1, 2, 0).
+    fill_mode : string
+        Method to fill missing pixel, default ‘nearest’, more options ‘constant’, ‘reflect’ or ‘wrap’.
+
+        - `scipy ndimage affine_transform <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.ndimage.interpolation.affine_transform.html>`_
+    cval : scalar, optional
+        Value used for points outside the boundaries of the input if mode='constant'. Default is 0.0.
+
+        - `scipy ndimage affine_transform <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.ndimage.interpolation.affine_transform.html>`_
+    """
+    if is_random:
+        shear = np.random.uniform(-intensity, intensity)
+    else:
+        shear = intensity
+    shear_matrix = np.array([[1, -np.sin(shear), 0],
+                             [0, np.cos(shear), 0],
+                             [0, 0, 1]])
+
+    h, w = x.shape[row_index], x.shape[col_index]
+    transform_matrix = transform_matrix_offset_center(shear_matrix, h, w)
+    x = apply_transform(x, transform_matrix, channel_index, fill_mode, cval)
+    return x
+
+def shear_multi(x, intensity=0.1, is_random=False, row_index=0, col_index=1, channel_index=2,
+                 fill_mode='nearest', cval=0.):
+    """Shear images with the same arguments, randomly or non-randomly.
+    Usually be used for image segmentation which x=[X, Y], X and Y should be matched.
+
+    Parameters
+    -----------
+    x : list of numpy array
+        List of images with dimension of [n_images, row, col, channel] (default).
+    others : see ``shear``.
+    """
+    if is_random:
+        shear = np.random.uniform(-intensity, intensity)
+    else:
+        shear = intensity
+    shear_matrix = np.array([[1, -np.sin(shear), 0],
+                             [0, np.cos(shear), 0],
+                             [0, 0, 1]])
+
+    h, w = x[0].shape[row_index], x[0].shape[col_index]
+    transform_matrix = transform_matrix_offset_center(shear_matrix, h, w)
+    results = []
+    for data in x:
+        results.append( apply_transform(data, transform_matrix, channel_index, fill_mode, cval))
+    return np.asarray(results)
+
+# zoom
+def zoom(x, zoom_range=(0.9, 1.1), is_random=False, row_index=0, col_index=1, channel_index=2,
+                fill_mode='nearest', cval=0.):
+    """Zoom in and out of a single image, randomly or non-randomly.
+
+    Parameters
+    -----------
+    x : numpy array
+        An image with dimension of [row, col, channel] (default).
+    zoom_range : list or tuple
+        - If is_random=False, (h, w) are the fixed zoom factor for row and column axies, factor small than one is zoom in.
+        - If is_random=True, (min zoom out, max zoom out) for x and y with different random zoom in/out factor.
+        e.g (0.5, 1) zoom in 1~2 times.
+    is_random : boolean, default False
+        If True, randomly zoom.
+    row_index, col_index, channel_index : int
+        Index of row, col and channel, default (0, 1, 2), for theano (1, 2, 0).
+    fill_mode : string
+        Method to fill missing pixel, default ‘nearest’, more options ‘constant’, ‘reflect’ or ‘wrap’.
+
+        - `scipy ndimage affine_transform <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.ndimage.interpolation.affine_transform.html>`_
+    cval : scalar, optional
+        Value used for points outside the boundaries of the input if mode='constant'. Default is 0.0.
+
+        - `scipy ndimage affine_transform <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.ndimage.interpolation.affine_transform.html>`_
+    """
+    if len(zoom_range) != 2:
+        raise Exception('zoom_range should be a tuple or list of two floats. '
+                        'Received arg: ', zoom_range)
+    if is_random:
+        if zoom_range[0] == 1 and zoom_range[1] == 1:
+            zx, zy = 1, 1
+            print(" random_zoom : not zoom in/out")
+        else:
+            zx, zy = np.random.uniform(zoom_range[0], zoom_range[1], 2)
+    else:
+        zx, zy = zoom_range
+    # print(zx, zy)
+    zoom_matrix = np.array([[zx, 0, 0],
+                            [0, zy, 0],
+                            [0, 0, 1]])
+
+    h, w = x.shape[row_index], x.shape[col_index]
+    transform_matrix = transform_matrix_offset_center(zoom_matrix, h, w)
+    x = apply_transform(x, transform_matrix, channel_index, fill_mode, cval)
+    return x
+
+def zoom_multi(x, zoom_range=(0.9, 1.1), is_random=False,
+        row_index=0, col_index=1, channel_index=2, fill_mode='nearest', cval=0.):
+    """Zoom in and out of images with the same arguments, randomly or non-randomly.
+    Usually be used for image segmentation which x=[X, Y], X and Y should be matched.
+
+    Parameters
+    -----------
+    x : list of numpy array
+        List of images with dimension of [n_images, row, col, channel] (default).
+    others : see ``zoom``.
+    """
+    if len(zoom_range) != 2:
+        raise Exception('zoom_range should be a tuple or list of two floats. '
+                        'Received arg: ', zoom_range)
+
+    if is_random:
+        if zoom_range[0] == 1 and zoom_range[1] == 1:
+            zx, zy = 1, 1
+            print(" random_zoom : not zoom in/out")
+        else:
+            zx, zy = np.random.uniform(zoom_range[0], zoom_range[1], 2)
+    else:
+        zx, zy = zoom_range
+
+    zoom_matrix = np.array([[zx, 0, 0],
+                            [0, zy, 0],
+                            [0, 0, 1]])
+
+    h, w = x[0].shape[row_index], x[0].shape[col_index]
+    transform_matrix = transform_matrix_offset_center(zoom_matrix, h, w)
+    # x = apply_transform(x, transform_matrix, channel_index, fill_mode, cval)
+    # return x
+    results = []
+    for data in x:
+        results.append( apply_transform(data, transform_matrix, channel_index, fill_mode, cval))
+    return np.asarray(results)
+
+# image = tf.image.random_brightness(image, max_delta=32. / 255.)
+# image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
+# image = tf.image.random_hue(image, max_delta=0.032)
+# image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
+
+# brightness
+def brightness(x, gamma=1, gain=1, is_random=False):
+    """Change the brightness of a single image, randomly or non-randomly.
+
+    Parameters
+    -----------
+    x : numpy array
+        An image with dimension of [row, col, channel] (default).
+    gamma : float, small than 1 means brighter.
+        Non negative real number. Default value is 1.
+            - If is_random is True, gamma in a range of (1-gamma, 1+gamma).
+    gain : float
+        The constant multiplier. Default value is 1.
+    is_random : boolean, default False
+        - If True, randomly change brightness.
+
+    References
+    -----------
+    - `skimage.exposure.adjust_gamma <http://scikit-image.org/docs/dev/api/skimage.exposure.html>`_
+    - `chinese blog <http://www.cnblogs.com/denny402/p/5124402.html>`_
+    """
+    if is_random:
+        gamma = np.random.uniform(1-gamma, 1+gamma)
+    x = exposure.adjust_gamma(x, gamma, gain)
+    return x
+
+def brightness_multi(x, gamma=1, gain=1, is_random=False):
+    """Change the brightness of multiply images, randomly or non-randomly.
+    Usually be used for image segmentation which x=[X, Y], X and Y should be matched.
+
+    Parameters
+    -----------
+    x : list of numpy array
+        List of images with dimension of [n_images, row, col, channel] (default).
+    others : see ``brightness``.
+    """
+    if is_random:
+        gamma = np.random.uniform(1-gamma, 1+gamma)
+
+    results = []
+    for data in x:
+        results.append( exposure.adjust_gamma(data, gamma, gain) )
+    return np.asarray(results)
+
+
+# contrast
+def constant(x, cutoff=0.5, gain=10, inv=False, is_random=False):
+    # TODO
+    x = exposure.adjust_sigmoid(x, cutoff=cutoff, gain=gain, inv=inv)
+    return x
+
+def constant_multi():
+    #TODO
+    pass
+
+# resize
+def imresize(x, size=[100, 100], interp='bilinear', mode=None):
+    """Resize an image by given output size and method.
+
+    Parameters
+    -----------
+    x : numpy array
+        An image with dimension of [row, col, channel] (default).
+    size : int, float or tuple (h, w)
+        - int, Percentage of current size.
+        - float, Fraction of current size.
+        - tuple, Size of the output image.
+    interp : str, optional
+    Interpolation to use for re-sizing (‘nearest’, ‘lanczos’, ‘bilinear’, ‘bicubic’ or ‘cubic’).
+    mode : str, optional
+    The PIL image mode (‘P’, ‘L’, etc.) to convert arr before resizing.
+
+    Returns
+    --------
+    imresize : ndarray
+    The resized array of image.
+
+    References
+    ------------
+    - `scipy.misc.imresize <https://docs.scipy.org/doc/scipy/reference/generated/scipy.misc.imresize.html>`_
+    """
+    if x.shape[-1] == 1:
+        # greyscale
+        x = scipy.misc.imresize(x[:,:,0], size, interp=interp, mode=mode)
+        return x[:, :, np.newaxis]
+    elif x.shape[-1] == 3:
+        # rgb, bgr ..
+        return scipy.misc.imresize(x, size, interp=interp, mode=mode)
+    else:
+        raise Exception("Unsupported channel %d" % x.shape[-1])
+
+# normailization
+def samplewise_norm(x, rescale=None, samplewise_center=False, samplewise_std_normalization=False,
+            channel_index=2, epsilon=1e-7):
+    """Normalize an image by rescale, samplewise centering and samplewise centering in order.
+
+    Parameters
+    -----------
+    x : numpy array
+        An image with dimension of [row, col, channel] (default).
+    rescale : rescaling factor.
+            If None or 0, no rescaling is applied, otherwise we multiply the data by the value provided (before applying any other transformation)
+    samplewise_center : set each sample mean to 0.
+    samplewise_std_normalization : divide each input by its std.
+    epsilon : small position value for dividing standard deviation.
+
+    Examples
+    --------
+    >>> x = samplewise_norm(x, samplewise_center=True, samplewise_std_normalization=True)
+    >>> print(x.shape, np.mean(x), np.std(x))
+    ... (160, 176, 1), 0.0, 1.0
+
+    Notes
+    ------
+    When samplewise_center and samplewise_std_normalization are True.
+
+    - For greyscale image, every pixels are subtracted and divided by the mean and std of whole image.
+    - For RGB image, every pixels are subtracted and divided by the mean and std of this pixel i.e. the mean and std of a pixel is 0 and 1.
+    """
+    if rescale:
+        x *= rescale
+
+    if x.shape[channel_index] == 1:
+        # greyscale
+        if samplewise_center:
+            x = x - np.mean(x)
+        if samplewise_std_normalization:
+            x = x / np.std(x)
+        return x
+    elif x.shape[channel_index] == 3:
+        # rgb
+        if samplewise_center:
+            x = x - np.mean(x, axis=channel_index, keepdims=True)
+        if samplewise_std_normalization:
+            x = x / (np.std(x, axis=channel_index, keepdims=True) + epsilon)
+        return x
+    else:
+        raise Exception("Unsupported channels %d" % x.shape[channel_index])
+
+def featurewise_norm(x, mean=None, std=None, epsilon=1e-7):
+    """Normalize every pixels by the same given mean and std, which are usually
+    compute from all examples.
+
+    Parameters
+    -----------
+    x : numpy array
+        An image with dimension of [row, col, channel] (default).
+    mean : value for subtraction.
+    std : value for division.
+    epsilon : small position value for dividing standard deviation.
+    """
+    if mean:
+        x = x - mean
+    if std:
+        x = x / (std + epsilon)
+    return x
+
+# whitening
+def get_zca_whitening_principal_components_img(X):
+    """Return the ZCA whitening principal components matrix.
+
+    Parameters
+    -----------
+    x : numpy array
+        Batch of image with dimension of [n_example, row, col, channel] (default).
+    """
+    flatX = np.reshape(X, (X.shape[0], X.shape[1] * X.shape[2] * X.shape[3]))
+    print("zca : computing sigma ..")
+    sigma = np.dot(flatX.T, flatX) / flatX.shape[0]
+    print("zca : computing U, S and V ..")
+    U, S, V = linalg.svd(sigma)
+    print("zca : computing principal components ..")
+    principal_components = np.dot(np.dot(U, np.diag(1. / np.sqrt(S + 10e-7))), U.T)
+    return principal_components
+
+def zca_whitening(x, principal_components):
+    """Apply ZCA whitening on an image by given principal components matrix.
+
+    Parameters
+    -----------
+    x : numpy array
+        An image with dimension of [row, col, channel] (default).
+    principal_components : matrix from ``get_zca_whitening_principal_components_img``.
+    """
+    # flatx = np.reshape(x, (x.size))
+    print(principal_components.shape, x.shape)  # ((28160, 28160), (160, 176, 1))
+    # flatx = np.reshape(x, (x.shape))
+    # flatx = np.reshape(x, (x.shape[0], ))
+    print(flatx.shape)  # (160, 176, 1)
+    whitex = np.dot(flatx, principal_components)
+    x = np.reshape(whitex, (x.shape[0], x.shape[1], x.shape[2]))
+    return x
+
+# developing
+# def barrel_transform(x, intensity):
+#     # https://github.com/fchollet/keras/blob/master/keras/preprocessing/image.py
+#     # TODO
+#     pass
+#
+# def barrel_transform_multi(x, intensity):
+#     # https://github.com/fchollet/keras/blob/master/keras/preprocessing/image.py
+#     # TODO
+#     pass
+
+# channel shift
+def channel_shift(x, intensity, is_random=False, channel_index=2):
+    """Shift the channels of an image, randomly or non-randomly, see `numpy.rollaxis <https://docs.scipy.org/doc/numpy/reference/generated/numpy.rollaxis.html>`_.
+
+    Parameters
+    -----------
+    x : numpy array
+        An image with dimension of [row, col, channel] (default).
+    intensity : float
+        Intensity of shifting.
+    is_random : boolean, default False
+        If True, randomly shift.
+    channel_index : int
+        Index of channel, default 2.
+    """
+    if is_random:
+        factor = np.random.uniform(-intensity, intensity)
+    else:
+        factor = intensity
+    x = np.rollaxis(x, channel_index, 0)
+    min_x, max_x = np.min(x), np.max(x)
+    channel_images = [np.clip(x_channel + factor, min_x, max_x)
+                      for x_channel in x]
+    x = np.stack(channel_images, axis=0)
+    x = np.rollaxis(x, 0, channel_index+1)
+    return x
+    # x = np.rollaxis(x, channel_index, 0)
+    # min_x, max_x = np.min(x), np.max(x)
+    # channel_images = [np.clip(x_channel + np.random.uniform(-intensity, intensity), min_x, max_x)
+    #                   for x_channel in x]
+    # x = np.stack(channel_images, axis=0)
+    # x = np.rollaxis(x, 0, channel_index+1)
+    # return x
+
+def channel_shift_multi(x, intensity, channel_index=2):
+    """Shift the channels of images with the same arguments, randomly or non-randomly, see `numpy.rollaxis <https://docs.scipy.org/doc/numpy/reference/generated/numpy.rollaxis.html>`_ .
+    Usually be used for image segmentation which x=[X, Y], X and Y should be matched.
+
+    Parameters
+    -----------
+    x : list of numpy array
+        List of images with dimension of [n_images, row, col, channel] (default).
+    others : see ``channel_shift``.
+    """
+    if is_random:
+        factor = np.random.uniform(-intensity, intensity)
+    else:
+        factor = intensity
+
+    results = []
+    for data in x:
+        data = np.rollaxis(data, channel_index, 0)
+        min_x, max_x = np.min(data), np.max(data)
+        channel_images = [np.clip(x_channel + factor, min_x, max_x)
+                          for x_channel in x]
+        data = np.stack(channel_images, axis=0)
+        data = np.rollaxis(x, 0, channel_index+1)
+        results.append( data )
+    return np.asarray(results)
+
+
+# manual transform
+def transform_matrix_offset_center(matrix, x, y):
+    """Return transform matrix offset center.
+
+    Parameters
+    ----------
+    matrix : numpy array
+        Transform matrix
+    x, y : int
+        Size of image.
+
+    Examples
+    --------
+    - See ``rotation``, ``shear``, ``zoom``.
+    """
+    o_x = float(x) / 2 + 0.5
+    o_y = float(y) / 2 + 0.5
+    offset_matrix = np.array([[1, 0, o_x], [0, 1, o_y], [0, 0, 1]])
+    reset_matrix = np.array([[1, 0, -o_x], [0, 1, -o_y], [0, 0, 1]])
+    transform_matrix = np.dot(np.dot(offset_matrix, matrix), reset_matrix)
+    return transform_matrix
+
+
+def apply_transform(x, transform_matrix, channel_index=2, fill_mode='nearest', cval=0.):
+    """Return transformed images by given transform_matrix from ``transform_matrix_offset_center``.
+
+    Parameters
+    ----------
+    x : numpy array
+        Batch of images with dimension of 3, [batch_size, row, col, channel].
+    transform_matrix : numpy array
+        Transform matrix (offset center), can be generated by ``transform_matrix_offset_center``
+    channel_index : int
+        Index of channel, default 2.
+    fill_mode : string
+        Method to fill missing pixel, default ‘nearest’, more options ‘constant’, ‘reflect’ or ‘wrap’
+
+        - `scipy ndimage affine_transform <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.ndimage.interpolation.affine_transform.html>`_
+    cval : scalar, optional
+        Value used for points outside the boundaries of the input if mode='constant'. Default is 0.0
+
+        - `scipy ndimage affine_transform <https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.ndimage.interpolation.affine_transform.html>`_
+
+    Examples
+    --------
+    - See ``rotation``, ``shift``, ``shear``, ``zoom``.
+    """
+    x = np.rollaxis(x, channel_index, 0)
+    final_affine_matrix = transform_matrix[:2, :2]
+    final_offset = transform_matrix[:2, 2]
+    channel_images = [ndi.interpolation.affine_transform(x_channel, final_affine_matrix,
+                      final_offset, order=0, mode=fill_mode, cval=cval) for x_channel in x]
+    x = np.stack(channel_images, axis=0)
+    x = np.rollaxis(x, 0, channel_index+1)
+    return x
+
+
+def projective_transform_by_points(x, src, dst, map_args={}, output_shape=None, order=1, mode='constant', cval=0.0, clip=True, preserve_range=False):
+    """Projective transform by given coordinates, usually 4 coordinates. see `scikit-image <http://scikit-image.org/docs/dev/auto_examples/applications/plot_geometric.html>`_.
+
+    Parameters
+    -----------
+    x : numpy array
+        An image with dimension of [row, col, channel] (default).
+    src : list or numpy
+        The original coordinates, usually 4 coordinates of (x, y).
+    dst : list or numpy
+        The coordinates after transformation, the number of coordinates is the same with src.
+    map_args : dict, optional
+        Keyword arguments passed to inverse_map.
+    output_shape : tuple (rows, cols), optional
+        Shape of the output image generated. By default the shape of the input image is preserved. Note that, even for multi-band images, only rows and columns need to be specified.
+    order : int, optional
+        The order of interpolation. The order has to be in the range 0-5:
+            - 0 Nearest-neighbor
+            - 1 Bi-linear (default)
+            - 2 Bi-quadratic
+            - 3 Bi-cubic
+            - 4 Bi-quartic
+            - 5 Bi-quintic
+    mode : {‘constant’, ‘edge’, ‘symmetric’, ‘reflect’, ‘wrap’}, optional
+        Points outside the boundaries of the input are filled according to the given mode. Modes match the behaviour of numpy.pad.
+    cval : float, optional
+        Used in conjunction with mode ‘constant’, the value outside the image boundaries.
+    clip : bool, optional
+        Whether to clip the output to the range of values of the input image. This is enabled by default, since higher order interpolation may produce values outside the given input range.
+    preserve_range : bool, optional
+        Whether to keep the original range of values. Otherwise, the input image is converted according to the conventions of img_as_float.
+
+    Examples
+    --------
+    >>> Assume X is an image from CIFAR 10, i.e. shape == (32, 32, 3)
+    >>> src = [[0,0],[0,32],[32,0],[32,32]]
+    >>> dst = [[10,10],[0,32],[32,0],[32,32]]
+    >>> x = projective_transform_by_points(X, src, dst)
+
+    References
+    -----------
+    - `scikit-image : geometric transformations <http://scikit-image.org/docs/dev/auto_examples/applications/plot_geometric.html>`_
+    - `scikit-image : examples <http://scikit-image.org/docs/dev/auto_examples/index.html>`_
+    """
+    if type(src) is list:   # convert to numpy
+        src = np.array(src)
+    if type(dst) is list:
+        dst = np.array(dst)
+    if np.max(x)>1:         # convert to [0, 1]
+        x = x/255
+
+    m = transform.ProjectiveTransform()
+    m.estimate(dst, src)
+    warped = transform.warp(x, m,  map_args=map_args, output_shape=output_shape, order=order, mode=mode, cval=cval, clip=clip, preserve_range=preserve_range)
+    return warped
+
+# Numpy and PIL
+def array_to_img(x, dim_ordering=(0,1,2), scale=True):
+    """Converts a numpy array to PIL image object (uint8 format).
+
+    Parameters
+    ----------
+    x : numpy array
+        A image with dimension of 3 and channels of 1 or 3.
+    dim_ordering : list or tuple of 3 int
+        Index of row, col and channel, default (0, 1, 2), for theano (1, 2, 0).
+    scale : boolean, default is True
+        If True, converts image to [0, 255] from any range of value like [-1, 2].
+
+    References
+    -----------
+    - `PIL Image.fromarray <http://pillow.readthedocs.io/en/3.1.x/reference/Image.html?highlight=fromarray>`_
+    """
+    from PIL import Image
+    # if dim_ordering == 'default':
+    #     dim_ordering = K.image_dim_ordering()
+    # if dim_ordering == 'th':  # theano
+    #     x = x.transpose(1, 2, 0)
+    x = x.transpose(dim_ordering)
+    if scale:
+        x += max(-np.min(x), 0)
+        x_max = np.max(x)
+        if x_max != 0:
+            # print(x_max)
+            # x /= x_max
+            x = x / x_max
+        x *= 255
+    if x.shape[2] == 3:
+        # RGB
+        return Image.fromarray(x.astype('uint8'), 'RGB')
+    elif x.shape[2] == 1:
+        # grayscale
+        return Image.fromarray(x[:, :, 0].astype('uint8'), 'L')
+    else:
+        raise Exception('Unsupported channel number: ', x.shape[2])
+
+
+## Sequence
+def pad_sequences(sequences, maxlen=None, dtype='int32',
+                  padding='pre', truncating='pre', value=0.):
+    """Pads each sequence to the same length:
+    the length of the longest sequence.
+    If maxlen is provided, any sequence longer
+    than maxlen is truncated to maxlen.
+    Truncation happens off either the beginning (default) or
+    the end of the sequence.
+    Supports post-padding and pre-padding (default).
+
+    Parameters
+    ----------
+    sequences : list of lists where each element is a sequence
+    maxlen : int, maximum length
+    dtype : type to cast the resulting sequence.
+    padding : 'pre' or 'post', pad either before or after each sequence.
+    truncating : 'pre' or 'post', remove values from sequences larger than
+        maxlen either in the beginning or in the end of the sequence
+    value : float, value to pad the sequences to the desired value.
+
+    Returns
+    ----------
+    x : numpy array with dimensions (number_of_sequences, maxlen)
+
+    Examples
+    ----------
+    >>> sequences = [[1,1,1,1,1],[2,2,2],[3,3]]
+    >>> sequences = pad_sequences(sequences, maxlen=None, dtype='int32',
+    ...                  padding='pre', truncating='pre', value=0.)
+    ... [[1 1 1 1 1]
+    ...  [0 0 2 2 2]
+    ...  [0 0 0 3 3]]
+    """
+    lengths = [len(s) for s in sequences]
+
+    nb_samples = len(sequences)
+    if maxlen is None:
+        maxlen = np.max(lengths)
+
+    # take the sample shape from the first non empty sequence
+    # checking for consistency in the main loop below.
+    sample_shape = tuple()
+    for s in sequences:
+        if len(s) > 0:
+            sample_shape = np.asarray(s).shape[1:]
+            break
+
+    x = (np.ones((nb_samples, maxlen) + sample_shape) * value).astype(dtype)
+    for idx, s in enumerate(sequences):
+        if len(s) == 0:
+            continue  # empty list was found
+        if truncating == 'pre':
+            trunc = s[-maxlen:]
+        elif truncating == 'post':
+            trunc = s[:maxlen]
+        else:
+            raise ValueError('Truncating type "%s" not understood' % truncating)
+
+        # check `trunc` has expected shape
+        trunc = np.asarray(trunc, dtype=dtype)
+        if trunc.shape[1:] != sample_shape:
+            raise ValueError('Shape of sample %s of sequence at position %s is different from expected shape %s' %
+                             (trunc.shape[1:], idx, sample_shape))
+
+        if padding == 'post':
+            x[idx, :len(trunc)] = trunc
+        elif padding == 'pre':
+            x[idx, -len(trunc):] = trunc
+        else:
+            raise ValueError('Padding type "%s" not understood' % padding)
+    return x
+
+## Text
+# see tensorlayer.nlp
 
 
+## Tensor Opt
 def distorted_images(images=None, height=24, width=24):
     """Distort images for generating more training data.
 
diff --git a/tensorlayer/utils.py b/tensorlayer/utils.py
index cbe2091..c79e910 100755
--- a/tensorlayer/utils.py
+++ b/tensorlayer/utils.py
@@ -7,8 +7,7 @@
 
 
 def fit(sess, network, train_op, cost, X_train, y_train, x, y_, acc=None, batch_size=100, n_epoch=100, print_freq=5, X_val=None, y_val=None, eval_train=True):
-    """
-    Traing a given non time-series network by the given cost function, training data, batch_size, n_epoch etc.
+    """Traing a given non time-series network by the given cost function, training data, batch_size, n_epoch etc.
 
     Parameters
     ----------
@@ -38,7 +37,7 @@ def fit(sess, network, train_op, cost, X_train, y_train, x, y_, acc=None, batch_
         the input of validation data
     y_val : numpy array or None
         the target of validation data
-    eval_train : boolen
+    eval_train : boolean
         if X_val and y_val are not None, it refects whether to evaluate the training data
 
     Examples
diff --git a/tensorlayer/visualize.py b/tensorlayer/visualize.py
index 25958f0..79b5f47 100755
--- a/tensorlayer/visualize.py
+++ b/tensorlayer/visualize.py
@@ -2,8 +2,10 @@
 # -*- coding: utf8 -*-
 
 
-
+import matplotlib
+matplotlib.use('Agg')
 import matplotlib.pyplot as plt
+# import matplotlib.pyplot as plt
 import numpy as np
 import os
 
@@ -18,7 +20,7 @@ def W(W=None, second=10, saveable=True, shape=[28,28], name='mnist', fig_idx=239
         The weight matrix
     second : int
         The display second(s) for the image(s), if saveable is False.
-    saveable : boolen
+    saveable : boolean
         Save or plot the figure.
     shape : a list with 2 int
         The shape of feature image, MNIST is [28, 80].
@@ -58,6 +60,7 @@ def W(W=None, second=10, saveable=True, shape=[28,28], name='mnist', fig_idx=239
             #     feature = np.zeros_like(feature)
             plt.imshow(np.reshape(feature ,(shape[0],shape[1])),
                     cmap='gray', interpolation="nearest")#, vmin=np.min(feature), vmax=np.max(feature))
+            # plt.title(name)
             # ------------------------------------------------------------
             # plt.imshow(np.reshape(W[:,count-1] ,(np.sqrt(size),np.sqrt(size))), cmap='gray', interpolation="nearest")
             plt.gca().xaxis.set_major_locator(plt.NullLocator())    # distable tick
@@ -69,7 +72,7 @@ def W(W=None, second=10, saveable=True, shape=[28,28], name='mnist', fig_idx=239
         plt.draw()
         plt.pause(second)
 
-def frame(I=None, second=5, saveable=True, name='frame', fig_idx=12836):
+def frame(I=None, second=5, saveable=True, name='frame', cmap=None, fig_idx=12836):
     """Display a frame(image). Make sure OpenAI Gym render() is disable before using it.
 
     Parameters
@@ -78,10 +81,12 @@ def frame(I=None, second=5, saveable=True, name='frame', fig_idx=12836):
         The image
     second : int
         The display second(s) for the image(s), if saveable is False.
-    saveable : boolen
+    saveable : boolean
         Save or plot the figure.
     name : a string
         A name to save the image, if saveable is True.
+    cmap : None or string
+        'gray' for greyscale, None for default, etc.
     fig_idx : int
         matplotlib figure index.
 
@@ -95,7 +100,11 @@ def frame(I=None, second=5, saveable=True, name='frame', fig_idx=12836):
         plt.ion()
     fig = plt.figure(fig_idx)      # show all feature images
 
-    plt.imshow(I)
+    if len(I.shape) and I.shape[-1]==1:     # (10,10,1) --> (10,10)
+        I = I[:,:,0]
+
+    plt.imshow(I, cmap)
+    plt.title(name)
     # plt.gca().xaxis.set_major_locator(plt.NullLocator())    # distable tick
     # plt.gca().yaxis.set_major_locator(plt.NullLocator())
 
@@ -114,7 +123,7 @@ def CNN2d(CNN=None, second=10, saveable=True, name='cnn', fig_idx=3119362):
         The image. e.g: 64 5x5 RGB images can be (5, 5, 3, 64).
     second : int
         The display second(s) for the image(s), if saveable is False.
-    saveable : boolen
+    saveable : boolean
         Save or plot the figure.
     name : a string
         A name to save the image, if saveable is True.
@@ -176,7 +185,7 @@ def images2d(images=None, second=10, saveable=True, name='images', dtype=None,
         The images.
     second : int
         The display second(s) for the image(s), if saveable is False.
-    saveable : boolen
+    saveable : boolean
         Save or plot the figure.
     name : a string
         A name to save the image, if saveable is True.
@@ -216,9 +225,11 @@ def images2d(images=None, second=10, saveable=True, name='images', dtype=None,
                 plt.imshow(
                         np.reshape(images[count-1,:,:], (n_row, n_col)),
                         cmap='gray', interpolation="nearest")
+                # plt.title(name)
             elif n_color == 3:
                 plt.imshow(images[count-1,:,:],
                         cmap='gray', interpolation="nearest")
+                # plt.title(name)
             else:
                 raise Exception("Unknown n_color")
             plt.gca().xaxis.set_major_locator(plt.NullLocator())    # distable tick
@@ -244,7 +255,7 @@ def tsne_embedding(embeddings, reverse_dictionary, plot_only=500,
         The number of examples to plot, choice the most common words.
     second : int
         The display second(s) for the image(s), if saveable is False.
-    saveable : boolen
+    saveable : boolean
         Save or plot the figure.
     name : a string
         A name to save the image, if saveable is True.
diff --git a/tensorlayer1.2.2/__init__.py b/tensorlayer1.2.2/__init__.py
new file mode 100755
index 0000000..aaf3fa8
--- /dev/null
+++ b/tensorlayer1.2.2/__init__.py
@@ -0,0 +1,27 @@
+"""
+Deep learning and Reinforcement learning library for Researchers and Engineers
+"""
+# from __future__ import absolute_import
+
+
+try:
+    install_instr = "Please make sure you install a recent enough version of TensorFlow."
+    import tensorflow
+except ImportError:
+    raise ImportError("__init__.py : Could not import TensorFlow." + install_instr)
+
+from . import activation
+from . import cost
+from . import files
+# from . import init
+from . import iterate
+from . import layers
+from . import ops
+from . import utils
+from . import visualize
+from . import prepro        # was preprocesse
+from . import nlp
+from . import rein
+
+
+__version__ = "1.2.3"
diff --git a/tensorlayer1.2.2/activation.py b/tensorlayer1.2.2/activation.py
new file mode 100755
index 0000000..5dfc254
--- /dev/null
+++ b/tensorlayer1.2.2/activation.py
@@ -0,0 +1,109 @@
+#! /usr/bin/python
+# -*- coding: utf8 -*-
+
+
+
+import tensorflow as tf
+
+def identity(x, name=None):
+    """The identity activation function
+
+    Parameters
+    ----------
+    x : a tensor input
+        input(s)
+
+
+    Returns
+    --------
+    A `Tensor` with the same type as `x`.
+    """
+    return x
+
+# Shortcut
+linear = identity
+
+def ramp(x=None, v_min=0, v_max=1, name=None):
+    """The ramp activation function.
+
+    Parameters
+    ----------
+    x : a tensor input
+        input(s)
+    v_min : float
+        if input(s) smaller than v_min, change inputs to v_min
+    v_max : float
+        if input(s) greater than v_max, change inputs to v_max
+    name : a string or None
+        An optional name to attach to this activation function.
+
+
+    Returns
+    --------
+    A `Tensor` with the same type as `x`.
+    """
+    return tf.clip_by_value(x, clip_value_min=v_min, clip_value_max=v_max, name=name)
+
+def leaky_relu(x=None, alpha=0.1, name="LeakyReLU"):
+    """The LeakyReLU.
+
+    Modified version of ReLU, introducing a nonzero gradient for negative
+    input.
+
+    Parameters
+    ----------
+    x : A `Tensor` with type `float`, `double`, `int32`, `int64`, `uint8`,
+        `int16`, or `int8`.
+    alpha : `float`. slope.
+    name : a string or None
+        An optional name to attach to this activation function.
+
+    References
+    ------------
+    - `Rectifier Nonlinearities Improve Neural Network Acoustic Models, Maas et al. (2013) <http://web.stanford.edu/~awni/papers/relu_hybrid_icml2013_final.pdf>`_
+    """
+    with tf.name_scope(name) as scope:
+        # x = tf.nn.relu(x)
+        # m_x = tf.nn.relu(-x)
+        # x -= alpha * m_x
+        x = tf.maximum(x, alpha * x)
+    return x
+
+#Shortcut
+lrelu = leaky_relu
+
+
+## Alternatively we can use tl.layers.PReluLayer()
+def prelu(x, channel_shared=False, W_init=tf.constant_initializer(value=0.0), W_init_args={}, restore=True, name="PReLU"):
+    """ Parametric Rectified Linear Unit.
+
+    Parameters
+    ----------
+    x : A `Tensor` with type `float`, `double`, `int32`, `int64`, `uint8`,
+        `int16`, or `int8`.
+    channel_shared : `bool`. Single weight is shared by all channels
+    W_init: weights initializer, default zero constant.
+        The initializer for initializing the alphas.
+    restore : `bool`. Restore or not alphas
+    name : A name for this activation op (optional).
+
+    Returns
+    -------
+    A `Tensor` with the same type as `x`.
+
+    References
+    -----------
+    - `Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification <http://arxiv.org/pdf/1502.01852v1.pdf>`_
+    """
+    print(' prelu: untested !!!')
+    if channel_shared:
+        w_shape = (1,)
+    else:
+        w_shape = int(x._shape[-1:])
+
+    with tf.name_scope(name) as scope:
+        W_init = initializations.get(weights_init)()
+        alphas = tf.get_variable(name='alphas', shape=w_shape, initializer=W_init, **W_init_args )
+        x = tf.nn.relu(x) + tf.mul(alphas, (x - tf.abs(x))) * 0.5
+
+    return x
diff --git a/tensorlayer1.2.2/cost.py b/tensorlayer1.2.2/cost.py
new file mode 100755
index 0000000..6e40d7a
--- /dev/null
+++ b/tensorlayer1.2.2/cost.py
@@ -0,0 +1,376 @@
+#! /usr/bin/python
+# -*- coding: utf8 -*-
+
+
+
+import tensorflow as tf
+import numbers
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import standard_ops
+
+## Cost Functions
+def cross_entropy(output, target, name="cross_entropy_loss"):
+    """Returns the TensorFlow expression of cross-entropy of two distributions, implement
+    softmax internally.
+
+    Parameters
+    ----------
+    output : Tensorflow variable
+        A distribution with shape: [batch_size, n_feature].
+    target : Tensorflow variable
+        A distribution with shape: [batch_size, n_feature].
+
+    Examples
+    --------
+    >>> ce = tf.cost.cross_entropy(y_logits, y_target_logits)
+
+    References
+    -----------
+    - About cross-entropy: `wiki <https://en.wikipedia.org/wiki/Cross_entropy>`_.\n
+    - The code is borrowed from: `here <https://en.wikipedia.org/wiki/Cross_entropy>`_.
+    """
+    with tf.name_scope(name):
+        # net_output_tf = output
+        # target_tf = target
+        # cross_entropy = tf.add(tf.mul(tf.log(net_output_tf, name=None),target_tf),
+        #                      tf.mul(tf.log(1 - net_output_tf), (1 - target_tf)))
+        # return -1 * tf.reduce_mean(tf.reduce_sum(cross_entropy, 1), name='cross_entropy_mean')
+        return tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(output, target))
+
+# Undocumented
+def binary_cross_entropy(preds, targets, name=None):
+    """Computes binary cross entropy given `preds`.
+
+    For brevity, let `x = `, `z = targets`.  The logistic loss is
+
+        loss(x, z) = - sum_i (x[i] * log(z[i]) + (1 - x[i]) * log(1 - z[i]))
+
+    Parameters
+    ----------
+    preds : A `Tensor` of type `float32` or `float64`.
+    targets : A `Tensor` of the same type and shape as `preds`.
+    """
+    print("Undocumented")
+    from tensorflow.python.framework import ops
+    eps = 1e-12
+    with ops.op_scope([preds, targets], name, "bce_loss") as name:
+        preds = ops.convert_to_tensor(preds, name="preds")
+        targets = ops.convert_to_tensor(targets, name="targets")
+        return tf.reduce_mean(-(targets * tf.log(preds + eps) +
+                              (1. - targets) * tf.log(1. - preds + eps)))
+
+
+def mean_squared_error(output, target):
+    """Return the TensorFlow expression of mean-squre-error of two distributions.
+
+    Parameters
+    ----------
+    output : tensorflow variable
+        A distribution with shape: [batch_size, n_feature].
+    target : tensorflow variable
+        A distribution with shape: [batch_size, n_feature].
+    """
+    with tf.name_scope("mean_squared_error_loss"):
+        mse = tf.reduce_sum(tf.squared_difference(output, target), reduction_indices = 1)
+        return tf.reduce_mean(mse)
+
+def cross_entropy_seq(logits, target_seqs, batch_size=1, num_steps=None):
+    """Returns the expression of cross-entropy of two sequences, implement
+    softmax internally. Normally be used for Fixed Length RNN outputs.
+
+    Parameters
+    ----------
+    logits : Tensorflow variable
+        2D tensor, ``network.outputs``, [batch_size*n_steps (n_examples), number of output units]
+    target_seqs : Tensorflow variable
+        target : 2D tensor [batch_size, n_steps], if the number of step is dynamic, please use ``cross_entropy_seq_with_mask`` instead.
+    batch_size : a int, default is 1
+        RNN batch_size, number of concurrent processes, divide the loss by batch_size.
+    num_steps : a int
+        sequence length
+
+    Examples
+    --------
+    >>> see PTB tutorial for more details
+    >>> input_data = tf.placeholder(tf.int32, [batch_size, num_steps])
+    >>> targets = tf.placeholder(tf.int32, [batch_size, num_steps])
+    >>> cost = tf.cost.cross_entropy_seq(network.outputs, targets, batch_size, num_steps)
+    """
+    loss = tf.nn.seq2seq.sequence_loss_by_example(
+        [logits],
+        [tf.reshape(target_seqs, [-1])],
+        [tf.ones([batch_size * num_steps])])
+    cost = tf.reduce_sum(loss) / batch_size
+    return cost
+
+
+def cross_entropy_seq_with_mask(logits, target_seqs, input_mask, return_details=False):
+    """Returns the expression of cross-entropy of two sequences, implement
+    softmax internally. Normally be used for Dynamic RNN outputs.
+
+    Parameters
+    -----------
+    logits : network identity outputs
+        2D tensor, ``network.outputs``, [batch_size, number of output units].
+    target_seqs : int of tensor, like word ID.
+        [batch_size, ?]
+    input_mask : the mask to compute loss
+        The same size with target_seqs, normally 0 and 1.
+    return_details : boolean
+        If False (default), only returns the loss
+
+        If True, returns the loss, losses, weights and targets (reshape to one vetcor)
+
+    Examples
+    --------
+    - see Image Captioning Example.
+    """
+    print("     cross_entropy_seq_with_mask : Undocumented")
+    targets = tf.reshape(target_seqs, [-1])   # to one vector
+    weights = tf.to_float(tf.reshape(input_mask, [-1]))   # to one vector like targets
+    losses = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, targets)
+    loss = tf.div(tf.reduce_sum(tf.mul(losses, weights)),   # loss from mask. reduce_sum before element-wise mul with mask !!
+                    tf.reduce_sum(weights),
+                    name="seq_loss_with_mask")
+    if return_details:
+        return loss, losses, weights, targets
+    else:
+        return loss
+
+## Regularization Functions
+def li_regularizer(scale):
+  """li regularization removes the neurons of previous layer, `i` represents `inputs`.\n
+  Returns a function that can be used to apply group li regularization to weights.\n
+  The implementation follows `TensorFlow contrib <https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/layers/python/layers/regularizers.py>`_.
+
+
+
+  Parameters
+  ----------
+  scale : float
+    A scalar multiplier `Tensor`. 0.0 disables the regularizer.
+
+  Returns
+  --------
+  A function with signature `li(weights, name=None)` that apply L1 regularization.
+
+  Raises
+  ------
+  ValueError : if scale is outside of the range [0.0, 1.0] or if scale is not a float.
+  """
+  import numbers
+  from tensorflow.python.framework import ops
+  from tensorflow.python.ops import standard_ops
+  # from tensorflow.python.platform import tf_logging as logging
+
+  if isinstance(scale, numbers.Integral):
+    raise ValueError('scale cannot be an integer: %s' % scale)
+  if isinstance(scale, numbers.Real):
+    if scale < 0.:
+      raise ValueError('Setting a scale less than 0 on a regularizer: %g' %
+                       scale)
+    if scale >= 1.:
+      raise ValueError('Setting a scale greater than 1 on a regularizer: %g' %
+                       scale)
+    if scale == 0.:
+      logging.info('Scale of 0 disables regularizer.')
+      return lambda _, name=None: None
+
+  def li(weights, name=None):
+    """Applies li regularization to weights."""
+    with ops.op_scope([weights], name, 'li_regularizer') as scope:
+      my_scale = ops.convert_to_tensor(scale,
+                                       dtype=weights.dtype.base_dtype,
+                                       name='scale')
+    return standard_ops.mul(
+          my_scale,
+          standard_ops.reduce_sum(standard_ops.sqrt(standard_ops.reduce_sum(tf.square(weights), 1))),
+          name=scope)
+  return li
+
+def lo_regularizer(scale):
+  """lo regularization removes the neurons of current layer, `o` represents `outputs`\n
+  Returns a function that can be used to apply group lo regularization to weights.\n
+  The implementation follows `TensorFlow contrib <https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/layers/python/layers/regularizers.py>`_.
+
+  Parameters
+  ----------
+  scale : float
+    A scalar multiplier `Tensor`. 0.0 disables the regularizer.
+
+  Returns
+  -------
+  A function with signature `lo(weights, name=None)` that apply Lo regularization.
+
+  Raises
+  ------
+  ValueError : If scale is outside of the range [0.0, 1.0] or if scale is not a float.
+  """
+  import numbers
+  from tensorflow.python.framework import ops
+  from tensorflow.python.ops import standard_ops
+  # from tensorflow.python.platform import tf_logging as logging
+
+  if isinstance(scale, numbers.Integral):
+    raise ValueError('scale cannot be an integer: %s' % scale)
+  if isinstance(scale, numbers.Real):
+    if scale < 0.:
+      raise ValueError('Setting a scale less than 0 on a regularizer: %g' %
+                       scale)
+    if scale >= 1.:
+      raise ValueError('Setting a scale greater than 1 on a regularizer: %g' %
+                       scale)
+    if scale == 0.:
+      logging.info('Scale of 0 disables regularizer.')
+      return lambda _, name=None: None
+
+  def lo(weights, name=None):
+    """Applies group column regularization to weights."""
+    with ops.op_scope([weights], name, 'lo_regularizer') as scope:
+      my_scale = ops.convert_to_tensor(scale,
+                                       dtype=weights.dtype.base_dtype,
+                                       name='scale')
+      return standard_ops.mul(
+          my_scale,
+          standard_ops.reduce_sum(standard_ops.sqrt(standard_ops.reduce_sum(tf.square(weights), 0))),
+          name=scope)
+  return lo
+
+def maxnorm_regularizer(scale=1.0):
+  """Max-norm regularization returns a function that can be used
+  to apply max-norm regularization to weights.
+  About max-norm: `wiki <https://en.wikipedia.org/wiki/Matrix_norm#Max_norm>`_.\n
+  The implementation follows `TensorFlow contrib <https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/layers/python/layers/regularizers.py>`_.
+
+  Parameters
+  ----------
+  scale : float
+    A scalar multiplier `Tensor`. 0.0 disables the regularizer.
+
+  Returns
+  ---------
+  A function with signature `mn(weights, name=None)` that apply Lo regularization.
+
+  Raises
+  --------
+  ValueError : If scale is outside of the range [0.0, 1.0] or if scale is not a float.
+  """
+  import numbers
+  from tensorflow.python.framework import ops
+  from tensorflow.python.ops import standard_ops
+
+  if isinstance(scale, numbers.Integral):
+    raise ValueError('scale cannot be an integer: %s' % scale)
+  if isinstance(scale, numbers.Real):
+    if scale < 0.:
+      raise ValueError('Setting a scale less than 0 on a regularizer: %g' %
+                       scale)
+    # if scale >= 1.:
+    #   raise ValueError('Setting a scale greater than 1 on a regularizer: %g' %
+    #                    scale)
+    if scale == 0.:
+      logging.info('Scale of 0 disables regularizer.')
+      return lambda _, name=None: None
+
+  def mn(weights, name=None):
+    """Applies max-norm regularization to weights."""
+    with ops.op_scope([weights], name, 'maxnorm_regularizer') as scope:
+      my_scale = ops.convert_to_tensor(scale,
+                                       dtype=weights.dtype.base_dtype,
+                                       name='scale')
+      return standard_ops.mul(my_scale, standard_ops.reduce_max(standard_ops.abs(weights)), name=scope)
+  return mn
+
+def maxnorm_o_regularizer(scale):
+  """Max-norm output regularization removes the neurons of current layer.\n
+  Returns a function that can be used to apply max-norm regularization to each column of weight matrix.\n
+  The implementation follows `TensorFlow contrib <https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/layers/python/layers/regularizers.py>`_.
+
+  Parameters
+  ----------
+  scale : float
+    A scalar multiplier `Tensor`. 0.0 disables the regularizer.
+
+  Returns
+  ---------
+  A function with signature `mn_o(weights, name=None)` that apply Lo regularization.
+
+  Raises
+  ---------
+  ValueError : If scale is outside of the range [0.0, 1.0] or if scale is not a float.
+  """
+  import numbers
+  from tensorflow.python.framework import ops
+  from tensorflow.python.ops import standard_ops
+
+  if isinstance(scale, numbers.Integral):
+    raise ValueError('scale cannot be an integer: %s' % scale)
+  if isinstance(scale, numbers.Real):
+    if scale < 0.:
+      raise ValueError('Setting a scale less than 0 on a regularizer: %g' %
+                       scale)
+    # if scale >= 1.:
+    #   raise ValueError('Setting a scale greater than 1 on a regularizer: %g' %
+    #                    scale)
+    if scale == 0.:
+      logging.info('Scale of 0 disables regularizer.')
+      return lambda _, name=None: None
+
+  def mn_o(weights, name=None):
+    """Applies max-norm regularization to weights."""
+    with ops.op_scope([weights], name, 'maxnorm_o_regularizer') as scope:
+      my_scale = ops.convert_to_tensor(scale,
+                                       dtype=weights.dtype.base_dtype,
+                                               name='scale')
+      return standard_ops.mul(my_scale, standard_ops.reduce_sum(standard_ops.reduce_max(standard_ops.abs(weights), 0)), name=scope)
+  return mn_o
+
+def maxnorm_i_regularizer(scale):
+  """Max-norm input regularization removes the neurons of previous layer.\n
+  Returns a function that can be used to apply max-norm regularization to each row of weight matrix.\n
+  The implementation follows `TensorFlow contrib <https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/layers/python/layers/regularizers.py>`_.
+
+  Parameters
+  ----------
+  scale : float
+    A scalar multiplier `Tensor`. 0.0 disables the regularizer.
+
+  Returns
+  ---------
+  A function with signature `mn_i(weights, name=None)` that apply Lo regularization.
+
+  Raises
+  ---------
+  ValueError : If scale is outside of the range [0.0, 1.0] or if scale is not a float.
+  """
+  import numbers
+  from tensorflow.python.framework import ops
+  from tensorflow.python.ops import standard_ops
+
+  if isinstance(scale, numbers.Integral):
+    raise ValueError('scale cannot be an integer: %s' % scale)
+  if isinstance(scale, numbers.Real):
+    if scale < 0.:
+      raise ValueError('Setting a scale less than 0 on a regularizer: %g' %
+                       scale)
+    # if scale >= 1.:
+    #   raise ValueError('Setting a scale greater than 1 on a regularizer: %g' %
+    #                    scale)
+    if scale == 0.:
+      logging.info('Scale of 0 disables regularizer.')
+      return lambda _, name=None: None
+
+  def mn_i(weights, name=None):
+    """Applies max-norm regularization to weights."""
+    with ops.op_scope([weights], name, 'maxnorm_o_regularizer') as scope:
+      my_scale = ops.convert_to_tensor(scale,
+                                       dtype=weights.dtype.base_dtype,
+                                               name='scale')
+      return standard_ops.mul(my_scale, standard_ops.reduce_sum(standard_ops.reduce_max(standard_ops.abs(weights), 1)), name=scope)
+  return mn_i
+
+
+
+
+
+#
diff --git a/tensorlayer1.2.2/files.py b/tensorlayer1.2.2/files.py
new file mode 100755
index 0000000..db2a230
--- /dev/null
+++ b/tensorlayer1.2.2/files.py
@@ -0,0 +1,812 @@
+#! /usr/bin/python
+# -*- coding: utf8 -*-
+
+
+import tensorflow as tf
+import os
+import numpy as np
+import sys
+from . import visualize
+from . import nlp
+import collections
+from six.moves import xrange
+import six
+import re
+from six.moves import urllib
+from tensorflow.python.platform import gfile
+import tarfile
+import gzip
+
+## Load dataset functions
+def load_mnist_dataset(shape=(-1,784)):
+    """Automatically download MNIST dataset
+    and return the training, validation and test set with 50000, 10000 and 10000
+    digit images respectively.
+
+    Parameters
+    ----------
+    shape : tuple
+        The shape of digit images
+
+    Examples
+    --------
+    >>> X_train, y_train, X_val, y_val, X_test, y_test = tl.files.load_mnist_dataset(shape=(-1,784))
+    >>> X_train, y_train, X_val, y_val, X_test, y_test = tl.files.load_mnist_dataset(shape=(-1, 28, 28, 1))
+    """
+    # We first define a download function, supporting both Python 2 and 3.
+    if sys.version_info[0] == 2:
+        from urllib import urlretrieve
+    else:
+        from urllib.request import urlretrieve
+
+    def download(filename, source='http://yann.lecun.com/exdb/mnist/'):
+        print("Downloading %s" % filename)
+        urlretrieve(source + filename, filename)
+
+    # We then define functions for loading MNIST images and labels.
+    # For convenience, they also download the requested files if needed.
+    import gzip
+
+    def load_mnist_images(filename):
+        if not os.path.exists(filename):
+            download(filename)
+        # Read the inputs in Yann LeCun's binary format.
+        with gzip.open(filename, 'rb') as f:
+            data = np.frombuffer(f.read(), np.uint8, offset=16)
+        # The inputs are vectors now, we reshape them to monochrome 2D images,
+        # following the shape convention: (examples, channels, rows, columns)
+        data = data.reshape(shape)
+        # data = data.reshape(-1, 1, 28, 28)    # for lasagne
+        # data = data.reshape(-1, 28, 28, 1)      # for tensorflow
+        # data = data.reshape(-1, 784)      # for tensorflow
+        # The inputs come as bytes, we convert them to float32 in range [0,1].
+        # (Actually to range [0, 255/256], for compatibility to the version
+        # provided at http://deeplearning.net/data/mnist/mnist.pkl.gz.)
+        return data / np.float32(256)
+
+    def load_mnist_labels(filename):
+        if not os.path.exists(filename):
+            download(filename)
+        # Read the labels in Yann LeCun's binary format.
+        with gzip.open(filename, 'rb') as f:
+            data = np.frombuffer(f.read(), np.uint8, offset=8)
+        # The labels are vectors of integers now, that's exactly what we want.
+        return data
+
+    # We can now download and read the training and test set images and labels.
+    ## you may want to change the path
+    data_dir = ''   #os.getcwd() + '/lasagne_tutorial/'
+    # print('data_dir > %s' % data_dir)
+
+    X_train = load_mnist_images(data_dir+'train-images-idx3-ubyte.gz')
+    y_train = load_mnist_labels(data_dir+'train-labels-idx1-ubyte.gz')
+    X_test = load_mnist_images(data_dir+'t10k-images-idx3-ubyte.gz')
+    y_test = load_mnist_labels(data_dir+'t10k-labels-idx1-ubyte.gz')
+
+    # We reserve the last 10000 training examples for validation.
+    X_train, X_val = X_train[:-10000], X_train[-10000:]
+    y_train, y_val = y_train[:-10000], y_train[-10000:]
+
+    ## you may want to plot one example
+    # print('X_train[0][0] >', X_train[0][0].shape, type(X_train[0][0]))  # for lasagne
+    # print('X_train[0] >', X_train[0].shape, type(X_train[0]))       # for tensorflow
+    # # exit()
+    #         #  [[..],[..]]      (28, 28)      numpy.ndarray
+    #         # plt.imshow 只支持 (28, 28)格式，不支持 (1, 28, 28),所以用 [0][0]
+    # fig = plt.figure()
+    # #plotwindow = fig.add_subplot(111)
+    # # plt.imshow(X_train[0][0], cmap='gray')    # for lasagne (-1, 1, 28, 28)
+    # plt.imshow(X_train[0].reshape(28,28), cmap='gray')     # for tensorflow (-1, 28, 28, 1)
+    # plt.title('A training image')
+    # plt.show()
+
+    # We just return all the arrays in order, as expected in main().
+    # (It doesn't matter how we do this as long as we can read them again.)
+    X_train = np.asarray(X_train, dtype=np.float32)
+    y_train = np.asarray(y_train, dtype=np.int32)
+    X_val = np.asarray(X_val, dtype=np.float32)
+    y_val = np.asarray(y_val, dtype=np.int32)
+    X_test = np.asarray(X_test, dtype=np.float32)
+    y_test = np.asarray(y_test, dtype=np.int32)
+    return X_train, y_train, X_val, y_val, X_test, y_test
+
+def load_cifar10_dataset(shape=(-1, 32, 32, 3), plotable=False, second=3):
+    """The CIFAR-10 dataset consists of 60000 32x32 colour images in 10 classes, with
+    6000 images per class. There are 50000 training images and 10000 test images.
+
+    The dataset is divided into five training batches and one test batch, each with
+    10000 images. The test batch contains exactly 1000 randomly-selected images from
+    each class. The training batches contain the remaining images in random order,
+    but some training batches may contain more images from one class than another.
+    Between them, the training batches contain exactly 5000 images from each class.
+
+    Parameters
+    ----------
+    shape : tupe
+        The shape of digit images: e.g. (-1, 3, 32, 32) , (-1, 32, 32, 3) , (-1, 32*32*3)
+    plotable : True, False
+        Whether to plot some image examples.
+    second : int
+        If ``plotable`` is True, ``second`` is the display time.
+
+    Examples
+    --------
+    >>> X_train, y_train, X_test, y_test = tl.files.load_cifar10_dataset(shape=(-1, 32, 32, 3), plotable=True)
+
+    Notes
+    ------
+    CIFAR-10 images can only be display without color change under uint8.
+    >>> X_train = np.asarray(X_train, dtype=np.uint8)
+    >>> plt.ion()
+    >>> fig = plt.figure(1232)
+    >>> count = 1
+    >>> for row in range(10):
+    >>>     for col in range(10):
+    >>>         a = fig.add_subplot(10, 10, count)
+    >>>         plt.imshow(X_train[count-1], interpolation='nearest')
+    >>>         plt.gca().xaxis.set_major_locator(plt.NullLocator())    # 不显示刻度(tick)
+    >>>         plt.gca().yaxis.set_major_locator(plt.NullLocator())
+    >>>         count = count + 1
+    >>> plt.draw()
+    >>> plt.pause(3)
+
+    References
+    ----------
+    - `CIFAR website <https://www.cs.toronto.edu/~kriz/cifar.html>`_
+    - `Data download link <https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz>`_
+    - `Code references <https://teratail.com/questions/28932>`_
+    """
+    import sys
+    import pickle
+    import numpy as np
+
+
+    # We first define a download function, supporting both Python 2 and 3.
+    filename = 'cifar-10-python.tar.gz'
+    if sys.version_info[0] == 2:
+        from urllib import urlretrieve
+    else:
+        from urllib.request import urlretrieve
+
+    def download(filename, source='https://www.cs.toronto.edu/~kriz/'):
+        print("Downloading %s" % filename)
+        urlretrieve(source + filename, filename)
+
+    # After downloading the cifar-10-python.tar.gz, we need to unzip it.
+    import tarfile
+    def un_tar(file_name):
+        print("Extracting %s" % file_name)
+        tar = tarfile.open(file_name)
+        names = tar.getnames()
+        # if os.path.isdir(file_name + "_files"):
+        #     pass
+        # else:
+        #     os.mkdir(file_name + "_files")
+        for name in names:
+            tar.extract(name) #, file_name.split('.')[0])
+        tar.close()
+        print("Extracted to %s" % names[0])
+
+
+    if not os.path.exists('cifar-10-batches-py'):
+        download(filename)
+        un_tar(filename)
+
+
+    def unpickle(file):
+        fp = open(file, 'rb')
+        if sys.version_info.major == 2:
+            data = pickle.load(fp)
+        elif sys.version_info.major == 3:
+            data = pickle.load(fp, encoding='latin-1')
+        fp.close()
+        return data
+
+    X_train = None
+    y_train = []
+
+    path = '' # you can set a dir to the data here.
+
+    for i in range(1,6):
+        data_dic = unpickle(path+"cifar-10-batches-py/data_batch_{}".format(i))
+        if i == 1:
+            X_train = data_dic['data']
+        else:
+            X_train = np.vstack((X_train, data_dic['data']))
+        y_train += data_dic['labels']
+
+    test_data_dic = unpickle(path+"cifar-10-batches-py/test_batch")
+    X_test = test_data_dic['data']
+    y_test = np.array(test_data_dic['labels'])
+
+    if shape == (-1, 3, 32, 32):
+        X_test = X_test.reshape(shape)
+        X_train = X_train.reshape(shape)
+        # X_train = np.transpose(X_train, (0, 1, 3, 2))
+    elif shape == (-1, 32, 32, 3):
+        X_test = X_test.reshape(shape, order='F')
+        X_train = X_train.reshape(shape, order='F')
+        X_test = np.transpose(X_test, (0, 2, 1, 3))
+        X_train = np.transpose(X_train, (0, 2, 1, 3))
+    else:
+        X_test = X_test.reshape(shape)
+        X_train = X_train.reshape(shape)
+
+    y_train = np.array(y_train)
+
+    if plotable == True:
+        print('\nCIFAR-10')
+        import matplotlib.pyplot as plt
+        fig = plt.figure(1)
+
+        print('Shape of a training image: X_train[0]',X_train[0].shape)
+
+        plt.ion()       # interactive mode
+        count = 1
+        for row in range(10):
+            for col in range(10):
+                a = fig.add_subplot(10, 10, count)
+                if shape == (-1, 3, 32, 32):
+                    # plt.imshow(X_train[count-1], interpolation='nearest')
+                    plt.imshow(np.transpose(X_train[count-1], (1, 2, 0)), interpolation='nearest')
+                    # plt.imshow(np.transpose(X_train[count-1], (2, 1, 0)), interpolation='nearest')
+                elif shape == (-1, 32, 32, 3):
+                    plt.imshow(X_train[count-1], interpolation='nearest')
+                    # plt.imshow(np.transpose(X_train[count-1], (1, 0, 2)), interpolation='nearest')
+                else:
+                    raise Exception("Do not support the given 'shape' to plot the image examples")
+                plt.gca().xaxis.set_major_locator(plt.NullLocator())    # 不显示刻度(tick)
+                plt.gca().yaxis.set_major_locator(plt.NullLocator())
+                count = count + 1
+        plt.draw()      # interactive mode
+        plt.pause(3)   # interactive mode
+
+        print("X_train:",X_train.shape)
+        print("y_train:",y_train.shape)
+        print("X_test:",X_test.shape)
+        print("y_test:",y_test.shape)
+
+    X_train = np.asarray(X_train, dtype=np.float32)
+    X_test = np.asarray(X_test, dtype=np.float32)
+    y_train = np.asarray(y_train, dtype=np.int32)
+    y_test = np.asarray(y_test, dtype=np.int32)
+
+    return X_train, y_train, X_test, y_test
+
+def load_ptb_dataset():
+    """Penn TreeBank (PTB) dataset is used in many LANGUAGE MODELING papers,
+    including "Empirical Evaluation and Combination of Advanced Language
+    Modeling Techniques", "Recurrent Neural Network Regularization".
+
+    It consists of 929k training words, 73k validation words, and 82k test
+    words. It has 10k words in its vocabulary.
+
+    In "Recurrent Neural Network Regularization", they trained regularized LSTMs
+    of two sizes; these are denoted the medium LSTM and large LSTM. Both LSTMs
+    have two layers and are unrolled for 35 steps. They initialize the hidden
+    states to zero. They then use the final hidden states of the current
+    minibatch as the initial hidden state of the subsequent minibatch
+    (successive minibatches sequentially traverse the training set).
+    The size of each minibatch is 20.
+
+    The medium LSTM has 650 units per layer and its parameters are initialized
+    uniformly in [−0.05, 0.05]. They apply 50% dropout on the non-recurrent
+    connections. They train the LSTM for 39 epochs with a learning rate of 1,
+    and after 6 epochs they decrease it by a factor of 1.2 after each epoch.
+    They clip the norm of the gradients (normalized by minibatch size) at 5.
+
+    The large LSTM has 1500 units per layer and its parameters are initialized
+    uniformly in [−0.04, 0.04]. We apply 65% dropout on the non-recurrent
+    connections. They train the model for 55 epochs with a learning rate of 1;
+    after 14 epochs they start to reduce the learning rate by a factor of 1.15
+    after each epoch. They clip the norm of the gradients (normalized by
+    minibatch size) at 10.
+
+    Returns
+    --------
+    train_data, valid_data, test_data, vocabulary size
+
+    Examples
+    --------
+    >>> train_data, valid_data, test_data, vocab_size = tl.files.load_ptb_dataset()
+
+    Code References
+    ---------------
+    - ``tensorflow.models.rnn.ptb import reader``
+
+    Download Links
+    ---------------
+    - `Manual download <http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz>`_
+    """
+    # We first define a download function, supporting both Python 2 and 3.
+    filename = 'simple-examples.tgz'
+    if sys.version_info[0] == 2:
+        from urllib import urlretrieve
+    else:
+        from urllib.request import urlretrieve
+
+    def download(filename, source='http://www.fit.vutbr.cz/~imikolov/rnnlm/'):
+        print("Downloading %s" % filename)
+        urlretrieve(source + filename, filename)
+
+    # After downloading, we need to unzip it.
+    import tarfile
+    def un_tar(file_name):
+        print("Extracting %s" % file_name)
+        tar = tarfile.open(file_name)
+        names = tar.getnames()
+        for name in names:
+            tar.extract(name)
+        tar.close()
+        print("Extracted to /simple-examples")
+
+    if not os.path.exists('simple-examples'):
+        download(filename)
+        un_tar(filename)
+
+    data_path = os.getcwd() + '/simple-examples/data'
+    train_path = os.path.join(data_path, "ptb.train.txt")
+    valid_path = os.path.join(data_path, "ptb.valid.txt")
+    test_path = os.path.join(data_path, "ptb.test.txt")
+
+    word_to_id = nlp.build_vocab(nlp.read_words(train_path))
+
+    train_data = nlp.words_to_word_ids(nlp.read_words(train_path), word_to_id)
+    valid_data = nlp.words_to_word_ids(nlp.read_words(valid_path), word_to_id)
+    test_data = nlp.words_to_word_ids(nlp.read_words(test_path), word_to_id)
+    vocabulary = len(word_to_id)
+
+    # print(nlp.read_words(train_path))     # ... 'according', 'to', 'mr.', '<unk>', '<eos>']
+    # print(train_data)                 # ...  214,         5,    23,    1,       2]
+    # print(word_to_id)                 # ... 'beyond': 1295, 'anti-nuclear': 9599, 'trouble': 1520, '<eos>': 2 ... }
+    # print(vocabulary)                 # 10000
+    # exit()
+    return train_data, valid_data, test_data, vocabulary
+
+def load_matt_mahoney_text8_dataset():
+    """Download a text file from Matt Mahoney's website
+    if not present, and make sure it's the right size.
+    Extract the first file enclosed in a zip file as a list of words.
+    This dataset can be used for Word Embedding.
+
+    Returns
+    --------
+    word_list : a list
+        a list of string (word).\n
+        e.g. [.... 'their', 'families', 'who', 'were', 'expelled', 'from', 'jerusalem', ...]
+
+    Examples
+    --------
+    >>> words = tl.files.load_matt_mahoney_text8_dataset()
+    >>> print('Data size', len(words))
+    """
+    import zipfile
+    from six.moves import urllib
+
+    url = 'http://mattmahoney.net/dc/'
+
+    def download_matt_mahoney_text8(filename, expected_bytes):
+      """Download a text file from Matt Mahoney's website
+      if not present, and make sure it's the right size."""
+      if not os.path.exists(filename):
+        print('Downloading ...')
+        filename, _ = urllib.request.urlretrieve(url + filename, filename)
+      statinfo = os.stat(filename)
+      if statinfo.st_size == expected_bytes:
+        print('Found and verified', filename)
+      else:
+        print(statinfo.st_size)
+        raise Exception(
+            'Failed to verify ' + filename + '. Can you get to it with a browser?')
+      return filename
+
+    filename = download_matt_mahoney_text8('text8.zip', 31344016)
+
+    with zipfile.ZipFile(filename) as f:
+        word_list = f.read(f.namelist()[0]).split()
+    return word_list
+
+def load_imbd_dataset(path="imdb.pkl", nb_words=None, skip_top=0,
+              maxlen=None, test_split=0.2, seed=113,
+              start_char=1, oov_char=2, index_from=3):
+    """Load IMDB dataset
+
+    Examples
+    --------
+    >>> X_train, y_train, X_test, y_test = tl.files.load_imbd_dataset(
+    ...                                 nb_words=20000, test_split=0.2)
+    >>> print('X_train.shape', X_train.shape)
+    ... (20000,)  [[1, 62, 74, ... 1033, 507, 27],[1, 60, 33, ... 13, 1053, 7]..]
+    >>> print('y_train.shape', y_train.shape)
+    ... (20000,)  [1 0 0 ..., 1 0 1]
+
+    References
+    -----------
+    - `Modify from keras. <https://github.com/fchollet/keras/blob/master/keras/datasets/imdb.py>`_
+    """
+    from six.moves import cPickle
+    import gzip
+    # from ..utils.data_utils import get_file
+    from six.moves import zip
+    import numpy as np
+    from six.moves import urllib
+
+    url = 'https://s3.amazonaws.com/text-datasets/'
+    def download_imbd(filename):
+      if not os.path.exists(filename):
+        print('Downloading ...')
+        filename, _ = urllib.request.urlretrieve(url + filename, filename)
+      return filename
+
+    filename = download_imbd(path)
+    # path = get_file(path, origin="https://s3.amazonaws.com/text-datasets/imdb.pkl")
+
+    if filename.endswith(".gz"):
+        f = gzip.open(filename, 'rb')
+    else:
+        f = open(filename, 'rb')
+
+    X, labels = cPickle.load(f)
+    f.close()
+
+    np.random.seed(seed)
+    np.random.shuffle(X)
+    np.random.seed(seed)
+    np.random.shuffle(labels)
+
+    if start_char is not None:
+        X = [[start_char] + [w + index_from for w in x] for x in X]
+    elif index_from:
+        X = [[w + index_from for w in x] for x in X]
+
+    if maxlen:
+        new_X = []
+        new_labels = []
+        for x, y in zip(X, labels):
+            if len(x) < maxlen:
+                new_X.append(x)
+                new_labels.append(y)
+        X = new_X
+        labels = new_labels
+    if not X:
+        raise Exception('After filtering for sequences shorter than maxlen=' +
+                        str(maxlen) + ', no sequence was kept. '
+                        'Increase maxlen.')
+    if not nb_words:
+        nb_words = max([max(x) for x in X])
+
+    # by convention, use 2 as OOV word
+    # reserve 'index_from' (=3 by default) characters: 0 (padding), 1 (start), 2 (OOV)
+    if oov_char is not None:
+        X = [[oov_char if (w >= nb_words or w < skip_top) else w for w in x] for x in X]
+    else:
+        nX = []
+        for x in X:
+            nx = []
+            for w in x:
+                if (w >= nb_words or w < skip_top):
+                    nx.append(w)
+            nX.append(nx)
+        X = nX
+
+    X_train = np.array(X[:int(len(X) * (1 - test_split))])
+    y_train = np.array(labels[:int(len(X) * (1 - test_split))])
+
+    X_test = np.array(X[int(len(X) * (1 - test_split)):])
+    y_test = np.array(labels[int(len(X) * (1 - test_split)):])
+
+    return X_train, y_train, X_test, y_test
+
+def load_nietzsche_dataset():
+    """Load Nietzsche dataset.
+    Returns a string.
+
+    Examples
+    --------
+    >>> see tutorial_generate_text.py
+    >>> words = tl.files.load_nietzsche_dataset()
+    >>> words = basic_clean_str(words)
+    >>> words = words.split()
+    """
+    if sys.version_info[0] == 2:
+        from urllib import urlretrieve
+    else:
+        from urllib.request import urlretrieve
+
+    def download(filename, source='https://s3.amazonaws.com/text-datasets/'):
+        print("Downloading %s" % filename)
+        urlretrieve(source + filename, filename)
+
+    if not os.path.exists("nietzsche.txt"):
+        download("nietzsche.txt")
+
+    # return nlp.read_words("nietzsche.txt", replace = ['', ''])
+    # with tf.gfile.GFile("nietzsche.txt", "r") as f:
+    #     return f.read()
+    with open("nietzsche.txt", "r") as f:
+        words = f.read()
+        return words
+
+def load_wmt_en_fr_dataset(data_dir="wmt"):
+    """It will download English-to-French translation data from the WMT'15
+    Website (10^9-French-English corpus), and the 2013 news test from
+    the same site as development set.
+    Returns the directories of training data and test data.
+
+    Parameters
+    ----------
+    data_dir : a string
+        The directory to store the dataset.
+
+    References
+    ----------
+    - Code modified from /tensorflow/models/rnn/translation/data_utils.py
+
+    Notes
+    -----
+    Usually, it will take a long time to download this dataset.
+    """
+    # URLs for WMT data.
+    _WMT_ENFR_TRAIN_URL = "http://www.statmt.org/wmt10/training-giga-fren.tar"
+    _WMT_ENFR_DEV_URL = "http://www.statmt.org/wmt15/dev-v2.tgz"
+
+    def maybe_download(directory, filename, url):
+        """Download filename from url unless it's already in directory."""
+        if not os.path.exists(directory):
+            print("Creating directory %s" % directory)
+            os.mkdir(directory)
+        filepath = os.path.join(directory, filename)
+        if not os.path.exists(filepath):
+            print("Downloading %s to %s" % (url, filepath))
+            filepath, _ = urllib.request.urlretrieve(url, filepath)
+            statinfo = os.stat(filepath)
+            print("Succesfully downloaded", filename, statinfo.st_size, "bytes")
+        return filepath
+
+    def gunzip_file(gz_path, new_path):
+        """Unzips from gz_path into new_path."""
+        print("Unpacking %s to %s" % (gz_path, new_path))
+        with gzip.open(gz_path, "rb") as gz_file:
+            with open(new_path, "wb") as new_file:
+                for line in gz_file:
+                    new_file.write(line)
+
+    def get_wmt_enfr_train_set(directory):
+      """Download the WMT en-fr training corpus to directory unless it's there."""
+      train_path = os.path.join(directory, "giga-fren.release2")
+      if not (gfile.Exists(train_path +".fr") and gfile.Exists(train_path +".en")):
+           corpus_file = maybe_download(directory, "training-giga-fren.tar",
+                                     _WMT_ENFR_TRAIN_URL)
+           print("Extracting tar file %s" % corpus_file)
+           with tarfile.open(corpus_file, "r") as corpus_tar:
+                    corpus_tar.extractall(directory)
+           gunzip_file(train_path + ".fr.gz", train_path + ".fr")
+           gunzip_file(train_path + ".en.gz", train_path + ".en")
+      return train_path
+
+    def get_wmt_enfr_dev_set(directory):
+      """Download the WMT en-fr training corpus to directory unless it's there."""
+      dev_name = "newstest2013"
+      dev_path = os.path.join(directory, dev_name)
+      if not (gfile.Exists(dev_path + ".fr") and gfile.Exists(dev_path + ".en")):
+            dev_file = maybe_download(directory, "dev-v2.tgz", _WMT_ENFR_DEV_URL)
+            print("Extracting tgz file %s" % dev_file)
+            with tarfile.open(dev_file, "r:gz") as dev_tar:
+              fr_dev_file = dev_tar.getmember("dev/" + dev_name + ".fr")
+              en_dev_file = dev_tar.getmember("dev/" + dev_name + ".en")
+              fr_dev_file.name = dev_name + ".fr"  # Extract without "dev/" prefix.
+              en_dev_file.name = dev_name + ".en"
+              dev_tar.extract(fr_dev_file, directory)
+              dev_tar.extract(en_dev_file, directory)
+      return dev_path
+    ## ==================
+    if data_dir == "":
+        print("Load or Download WMT English-to-French translation > %s"
+            % os.getcwd())
+    else:
+        print("Load or Download WMT English-to-French translation > %s"
+            % data_dir)
+    ## ======== Download if not exist
+    train_path = get_wmt_enfr_train_set(data_dir)
+    dev_path = get_wmt_enfr_dev_set(data_dir)
+
+    return train_path, dev_path
+
+
+
+## Load and save network
+def save_npz(save_list=[], name='model.npz'):
+    """Input parameters and the file name, save parameters into .npz file. Use tl.utils.load_npz() to restore.
+
+    Parameters
+    ----------
+    save_list : a list
+        Parameters want to be saved.
+    name : a string or None
+        The name of the .npz file.
+
+    Examples
+    --------
+    >>> tl.files.save_npz(network.all_params, name='model_test.npz')
+    ... File saved to: model_test.npz
+    >>> load_params = tl.files.load_npz(name='model_test.npz')
+    ... Loading param0, (784, 800)
+    ... Loading param1, (800,)
+    ... Loading param2, (800, 800)
+    ... Loading param3, (800,)
+    ... Loading param4, (800, 10)
+    ... Loading param5, (10,)
+    >>> put parameters into a TensorLayer network, please see assign_params()
+
+    Notes
+    -----
+    If you got session issues, you can change the value.eval() to value.eval(session=sess)
+
+    References
+    ----------
+    - `Saving dictionary using numpy <http://stackoverflow.com/questions/22315595/saving-dictionary-of-header-information-using-numpy-savez>`_
+    """
+    ## save params into a dictionary
+    # rename_dict = {}
+    # for k, value in enumerate(save_dict):
+    #     rename_dict.update({'param'+str(k) : value.eval()})
+    # np.savez(name, **rename_dict)
+    # print('Model is saved to: %s' % name)
+    ## save params into a list
+    save_list_var = []
+    for k, value in enumerate(save_list):
+        save_list_var.append( value.eval() )
+    np.savez(name, params=save_list_var)
+    print('Model is saved to: %s' % name)
+
+def load_npz(path='', name='model.npz'):
+    """Load the parameters of a Model saved by tl.files.save_npz().
+
+    Parameters
+    ----------
+    path : a string
+        Folder path to .npz file.
+    name : a string or None
+        The name of the .npz file.
+
+    Return
+    --------
+    params : list
+        A list of parameters in order.
+
+    Examples
+    --------
+    - See save_npz and assign_params
+
+    References
+    ----------
+    - `Saving dictionary using numpy <http://stackoverflow.com/questions/22315595/saving-dictionary-of-header-information-using-numpy-savez>`_
+    """
+    ## if save_npz save params into a dictionary
+    # d = np.load( path+name )
+    # params = []
+    # print('Load Model')
+    # for key, val in sorted( d.items() ):
+    #     params.append(val)
+    #     print('Loading %s, %s' % (key, str(val.shape)))
+    # return params
+    ## if save_npz save params into a list
+    d = np.load( path+name )
+    # for val in sorted( d.items() ):
+    #     params = val
+    #     return params
+    return d['params']
+    # print(d.items()[0][1]['params'])
+    # exit()
+    # return d.items()[0][1]['params']
+
+def assign_params(sess, params, network):
+    """Assign the given parameters to the TensorLayer network.
+
+    Parameters
+    ----------
+    sess : TensorFlow Session
+    params : a list
+        A list of parameters in order.
+    network : a :class:`Layer` class
+        The network to be assigned
+
+    Examples
+    --------
+    >>> Save your network as follow:
+    >>> tl.files.save_npz(network.all_params, name='model_test.npz')
+    >>> network.print_params()
+    ...
+    ... Next time, load and assign your network as follow:
+    >>> sess.run(tf.initialize_all_variables()) # re-initialize, then save and assign
+    >>> load_params = tl.files.load_npz(name='model_test.npz')
+    >>> tl.files.assign_params(sess, load_params, network)
+    >>> network.print_params()
+
+    References
+    ----------
+    - `Assign value to a TensorFlow variable <http://stackoverflow.com/questions/34220532/how-to-assign-value-to-a-tensorflow-variable>`_
+    """
+    for idx, param in enumerate(params):
+        assign_op = network.all_params[idx].assign(param)
+        sess.run(assign_op)
+
+
+
+# Load and save variables
+def save_any_to_npy(save_dict={}, name='any.npy'):
+    """Save variables to .npy file.
+
+    Examples
+    ---------
+    >>> tl.files.save_any_to_npy(save_dict={'data': ['a','b']}, name='test.npy')
+    >>> data = tl.files.load_npy_to_any(name='test.npy')
+    >>> print(data)
+    ... {'data': ['a','b']}
+    """
+    np.save(name, save_dict)
+
+def load_npy_to_any(path='', name='any.npy'):
+    """Load .npy file.
+
+    Examples
+    ---------
+    - see save_any_to_npy()
+    """
+    try:
+        npy = np.load(path+name).item()
+    except:
+        npy = np.load(path+name)
+    finally:
+        return npy
+
+
+# Visualizing npz files
+def npz_to_W_pdf(path=None, regx='w1pre_[0-9]+\.(npz)'):
+    """Convert the first weight matrix of .npz file to .pdf by using tl.visualize.W().
+
+    Parameters
+    ----------
+    path : a string or None
+        A folder path to npz files.
+    regx : a string
+        Regx for the file name.
+
+    Examples
+    --------
+    >>> Convert the first weight matrix of w1_pre...npz file to w1_pre...pdf.
+    >>> tl.files.npz_to_W_pdf(path='/Users/.../npz_file/', regx='w1pre_[0-9]+\.(npz)')
+    """
+    file_list = load_file_list(path=path, regx=regx)
+    for f in file_list:
+        W = load_npz(path, f)[0]
+        print("%s --> %s" % (f, f.split('.')[0]+'.pdf'))
+        visualize.W(W, second=10, saveable=True, name=f.split('.')[0], fig_idx=2012)
+
+
+## Helper functions
+def load_file_list(path=None, regx='\.npz'):
+    """Return a file list in a folder by given a path and regular expression.
+
+    Parameters
+    ----------
+    path : a string or None
+        A folder path.
+    regx : a string
+        The regx of file name.
+
+    Examples
+    ----------
+    >>> file_list = tl.files.load_file_list(path=None, regx='w1pre_[0-9]+\.(npz)')
+    """
+    if path == False:
+        path = os.getcwd()
+    file_list = os.listdir(path)
+    return_list = []
+    for idx, f in enumerate(file_list):
+        if re.search(regx, f):
+            return_list.append(f)
+    # return_list.sort()
+    print('Match file list = %s' % return_list)
+    print('Number of files = %d' % len(return_list))
+    return return_list
diff --git a/tensorlayer1.2.2/iterate.py b/tensorlayer1.2.2/iterate.py
new file mode 100755
index 0000000..d5883d9
--- /dev/null
+++ b/tensorlayer1.2.2/iterate.py
@@ -0,0 +1,431 @@
+#! /usr/bin/python
+# -*- coding: utf8 -*-
+
+
+
+import numpy as np
+from six.moves import xrange
+
+def minibatches(inputs=None, targets=None, batch_size=None, shuffle=False):
+    """
+    Generate a generator that input a group of example in numpy.array and
+    their labels, return the examples and labels by the given batchsize.
+
+    Parameters
+    ----------
+    inputs : numpy.array
+        (X) The input features, every row is a example.
+    targets : numpy.array
+        (y) The labels of inputs, every row is a example.
+    batch_size : int
+        The batch size.
+    shuffle : boolean
+        Indicating whether to use a shuffling queue, shuffle the dataset before return.
+
+    Examples
+    --------
+    >>> X = np.asarray([['a','a'], ['b','b'], ['c','c'], ['d','d'], ['e','e'], ['f','f']])
+    >>> y = np.asarray([0,1,2,3,4,5])
+    >>> for batch in tl.iterate.minibatches(inputs=X, targets=y, batch_size=2, shuffle=False):
+    >>>     print(batch)
+    ... (array([['a', 'a'],
+    ...        ['b', 'b']],
+    ...         dtype='<U1'), array([0, 1]))
+    ... (array([['c', 'c'],
+    ...        ['d', 'd']],
+    ...         dtype='<U1'), array([2, 3]))
+    ... (array([['e', 'e'],
+    ...        ['f', 'f']],
+    ...         dtype='<U1'), array([4, 5]))
+    """
+    assert len(inputs) == len(targets)
+    if shuffle:
+        indices = np.arange(len(inputs))
+        np.random.shuffle(indices)
+    for start_idx in range(0, len(inputs) - batch_size + 1, batch_size):
+        if shuffle:
+            excerpt = indices[start_idx:start_idx + batch_size]
+        else:
+            excerpt = slice(start_idx, start_idx + batch_size)
+        yield inputs[excerpt], targets[excerpt]
+
+def seq_minibatches(inputs, targets, batch_size, seq_length, stride=1):
+    """
+    Generate a generator that return a batch of sequence inputs and targets.
+    If ``batch_size = 100, seq_length = 5``, one return will have ``500`` rows (examples).
+
+    Examples
+    --------
+    >>> Synced sequence input and output.
+    >>> X = np.asarray([['a','a'], ['b','b'], ['c','c'], ['d','d'], ['e','e'], ['f','f']])
+    >>> y = np.asarray([0, 1, 2, 3, 4, 5])
+    >>> for batch in tl.iterate.seq_minibatches(inputs=X, targets=y, batch_size=2, seq_length=2, stride=1):
+    >>>     print(batch)
+    ... (array([['a', 'a'],
+    ...        ['b', 'b'],
+    ...         ['b', 'b'],
+    ...         ['c', 'c']],
+    ...         dtype='<U1'), array([0, 1, 1, 2]))
+    ... (array([['c', 'c'],
+    ...         ['d', 'd'],
+    ...         ['d', 'd'],
+    ...         ['e', 'e']],
+    ...         dtype='<U1'), array([2, 3, 3, 4]))
+    ...
+    ...
+    >>> Many to One
+    >>> return_last = True
+    >>> num_steps = 2
+    >>> X = np.asarray([['a','a'], ['b','b'], ['c','c'], ['d','d'], ['e','e'], ['f','f']])
+    >>> Y = np.asarray([0,1,2,3,4,5])
+    >>> for batch in tl.iterate.seq_minibatches(inputs=X, targets=Y, batch_size=2, seq_length=num_steps, stride=1):
+    >>>     x, y = batch
+    >>>     if return_last:
+    >>>         tmp_y = y.reshape((-1, num_steps) + y.shape[1:])
+    >>>     y = tmp_y[:, -1]
+    >>>     print(x, y)
+    ... [['a' 'a']
+    ... ['b' 'b']
+    ... ['b' 'b']
+    ... ['c' 'c']] [1 2]
+    ... [['c' 'c']
+    ... ['d' 'd']
+    ... ['d' 'd']
+    ... ['e' 'e']] [3 4]
+    """
+    assert len(inputs) == len(targets)
+    n_loads = (batch_size * stride) + (seq_length - stride)
+    for start_idx in range(0, len(inputs) - n_loads + 1, (batch_size * stride)):
+        seq_inputs = np.zeros((batch_size, seq_length) + inputs.shape[1:],
+                              dtype=inputs.dtype)
+        seq_targets = np.zeros((batch_size, seq_length) + targets.shape[1:],
+                               dtype=targets.dtype)
+        for b_idx in xrange(batch_size):
+            start_seq_idx = start_idx + (b_idx * stride)
+            end_seq_idx = start_seq_idx + seq_length
+            seq_inputs[b_idx] = inputs[start_seq_idx:end_seq_idx]
+            seq_targets[b_idx] = targets[start_seq_idx:end_seq_idx]
+        flatten_inputs = seq_inputs.reshape((-1,) + inputs.shape[1:])
+        flatten_targets = seq_targets.reshape((-1,) + targets.shape[1:])
+        yield flatten_inputs, flatten_targets
+
+def seq_minibatches2(inputs, targets, batch_size, num_steps):
+    """
+    Generate a generator that iterates on two list of words. Yields (Returns) the source contexts and
+    the target context by the given batch_size and num_steps (sequence_length),
+    see ``PTB tutorial``.
+
+    Hint, if the input data are images, you can modify the code from
+
+    data = np.zeros([batch_size, batch_len)
+
+    to
+
+    data = np.zeros([batch_size, batch_len, inputs.shape[1], inputs.shape[2], inputs.shape[3]])
+
+
+    In TensorFlow's tutorial, this generates batch_size pointers into the raw
+    PTB data, and allows minibatch iteration along these pointers.
+
+    Parameters
+    ----------
+    inputs : a list
+            the context in list format; note that context usually be
+            represented by splitting by space, and then convert to unique
+            word IDs.
+    targets : a list
+            the context in list format; note that context usually be
+            represented by splitting by space, and then convert to unique
+            word IDs.
+    batch_size : int
+            the batch size.
+    num_steps : int
+            the number of unrolls. i.e. sequence_length
+
+    Yields
+    ------
+    Pairs of the batched data, each a matrix of shape [batch_size, num_steps].
+
+    Raises
+    ------
+    ValueError : if batch_size or num_steps are too high.
+
+    Examples
+    --------
+    >>> X = [i for i in range(20)]
+    >>> Y = [i for i in range(20,40)]
+    >>> for batch in tl.iterate.seq_minibatches2(X, Y, batch_size=2, num_steps=3):
+    ...     x, y = batch
+    ...     print(x, y)
+    ...
+    ... [[  0.   1.   2.]
+    ... [ 10.  11.  12.]]
+    ... [[ 20.  21.  22.]
+    ... [ 30.  31.  32.]]
+    ...
+    ... [[  3.   4.   5.]
+    ... [ 13.  14.  15.]]
+    ... [[ 23.  24.  25.]
+    ... [ 33.  34.  35.]]
+    ...
+    ... [[  6.   7.   8.]
+    ... [ 16.  17.  18.]]
+    ... [[ 26.  27.  28.]
+    ... [ 36.  37.  38.]]
+
+    Code References
+    ---------------
+    - ``tensorflow/models/rnn/ptb/reader.py``
+    """
+    assert len(inputs) == len(targets)
+    data_len = len(inputs)
+    batch_len = data_len // batch_size
+    # data = np.zeros([batch_size, batch_len])
+    data = np.zeros((batch_size, batch_len) + inputs.shape[1:],
+                          dtype=inputs.dtype)
+    data2 = np.zeros([batch_size, batch_len])
+
+    for i in range(batch_size):
+        data[i] = inputs[batch_len * i:batch_len * (i + 1)]
+        data2[i] = targets[batch_len * i:batch_len * (i + 1)]
+
+    epoch_size = (batch_len - 1) // num_steps
+
+    if epoch_size == 0:
+        raise ValueError("epoch_size == 0, decrease batch_size or num_steps")
+
+    for i in range(epoch_size):
+        x = data[:, i*num_steps:(i+1)*num_steps]
+        x2 = data2[:, i*num_steps:(i+1)*num_steps]
+        yield (x, x2)
+
+
+def ptb_iterator(raw_data, batch_size, num_steps):
+    """
+    Generate a generator that iterates on a list of words, see PTB tutorial. Yields (Returns) the source contexts and
+    the target context by the given batch_size and num_steps (sequence_length).\n
+    see ``PTB tutorial``.
+
+    e.g. x = [0, 1, 2]  y = [1, 2, 3] , when batch_size = 1, num_steps = 3,
+    raw_data = [i for i in range(100)]
+
+    In TensorFlow's tutorial, this generates batch_size pointers into the raw
+    PTB data, and allows minibatch iteration along these pointers.
+
+    Parameters
+    ----------
+    raw_data : a list
+            the context in list format; note that context usually be
+            represented by splitting by space, and then convert to unique
+            word IDs.
+    batch_size : int
+            the batch size.
+    num_steps : int
+            the number of unrolls. i.e. sequence_length
+
+    Yields
+    ------
+    Pairs of the batched data, each a matrix of shape [batch_size, num_steps].
+    The second element of the tuple is the same data time-shifted to the
+    right by one.
+
+    Raises
+    ------
+    ValueError : if batch_size or num_steps are too high.
+
+    Examples
+    --------
+    >>> train_data = [i for i in range(20)]
+    >>> for batch in tl.iterate.ptb_iterator(train_data, batch_size=2, num_steps=3):
+    >>>     x, y = batch
+    >>>     print(x, y)
+    ... [[ 0  1  2] <---x                       1st subset/ iteration
+    ...  [10 11 12]]
+    ... [[ 1  2  3] <---y
+    ...  [11 12 13]]
+    ...
+    ... [[ 3  4  5]  <--- 1st batch input       2nd subset/ iteration
+    ...  [13 14 15]] <--- 2nd batch input
+    ... [[ 4  5  6]  <--- 1st batch target
+    ...  [14 15 16]] <--- 2nd batch target
+    ...
+    ... [[ 6  7  8]                             3rd subset/ iteration
+    ...  [16 17 18]]
+    ... [[ 7  8  9]
+    ...  [17 18 19]]
+
+    Code References
+    ----------------
+    - ``tensorflow/models/rnn/ptb/reader.py``
+    """
+    raw_data = np.array(raw_data, dtype=np.int32)
+
+    data_len = len(raw_data)
+    batch_len = data_len // batch_size
+    data = np.zeros([batch_size, batch_len], dtype=np.int32)
+    for i in range(batch_size):
+        data[i] = raw_data[batch_len * i:batch_len * (i + 1)]
+
+    epoch_size = (batch_len - 1) // num_steps
+
+    if epoch_size == 0:
+        raise ValueError("epoch_size == 0, decrease batch_size or num_steps")
+
+    for i in range(epoch_size):
+        x = data[:, i*num_steps:(i+1)*num_steps]
+        y = data[:, i*num_steps+1:(i+1)*num_steps+1]
+        yield (x, y)
+
+
+
+# def minibatches_for_sequence2D(inputs, targets, batch_size, sequence_length, stride=1):
+#     """
+#     Input a group of example in 2D numpy.array and their labels.
+#     Return the examples and labels by the given batchsize, sequence_length.
+#     Use for RNN.
+#
+#     Parameters
+#     ----------
+#     inputs : numpy.array
+#         (X) The input features, every row is a example.
+#     targets : numpy.array
+#         (y) The labels of inputs, every row is a example.
+#     batchsize : int
+#         The batch size must be a multiple of sequence_length: int(batch_size % sequence_length) == 0
+#     sequence_length : int
+#         The sequence length
+#     stride : int
+#         The stride step
+#
+#     Examples
+#     --------
+#     >>> sequence_length = 2
+#     >>> batch_size = 4
+#     >>> stride = 1
+#     >>> X_train = np.asarray([[1,2,3],[4,5,6],[7,8,9],[10,11,12],[13,14,15],[16,17,18],[19,20,21],[22,23,24]])
+#     >>> y_train = np.asarray(['0','1','2','3','4','5','6','7'])
+#     >>> print('X_train = %s' % X_train)
+#     >>> print('y_train = %s' % y_train)
+#     >>> for batch in minibatches_for_sequence2D(X_train, y_train, batch_size=batch_size, sequence_length=sequence_length, stride=stride):
+#     >>>     inputs, targets = batch
+#     >>>     print(inputs)
+#     >>>     print(targets)
+#     ... [[ 1.  2.  3.]
+#     ... [ 4.  5.  6.]
+#     ... [ 4.  5.  6.]
+#     ... [ 7.  8.  9.]]
+#     ... [1 2]
+#     ... [[  4.   5.   6.]
+#     ... [  7.   8.   9.]
+#     ... [  7.   8.   9.]
+#     ... [ 10.  11.  12.]]
+#     ... [2 3]
+#     ... ...
+#     ... [[ 16.  17.  18.]
+#     ... [ 19.  20.  21.]
+#     ... [ 19.  20.  21.]
+#     ... [ 22.  23.  24.]]
+#     ... [6 7]
+#     """
+#     print('len(targets)=%d batch_size=%d sequence_length=%d stride=%d' % (len(targets), batch_size, sequence_length, stride))
+#     assert len(inputs) == len(targets), '1 feature vector have 1 target vector/value' #* sequence_length
+#     # assert int(batch_size % sequence_length) == 0, 'batch_size % sequence_length must == 0\
+#     # batch_size is number of examples rather than number of targets'
+#
+#     # print(inputs.shape, len(inputs), len(inputs[0]))
+#
+#     n_targets = int(batch_size/sequence_length)
+#     # n_targets = int(np.ceil(batch_size/sequence_length))
+#     X = np.empty(shape=(0,len(inputs[0])), dtype=np.float32)
+#     y = np.zeros(shape=(1, n_targets), dtype=np.int32)
+#
+#     for idx in range(sequence_length, len(inputs), stride):  # go through all example during 1 epoch
+#         for n in range(n_targets):   # for num of target
+#             X = np.concatenate((X, inputs[idx-sequence_length+n:idx+n]))
+#             y[0][n] = targets[idx-1+n]
+#             # y = np.vstack((y, targets[idx-1+n]))
+#         yield X, y[0]
+#         X = np.empty(shape=(0,len(inputs[0])))
+#         # y = np.empty(shape=(1,0))
+#
+#
+# def minibatches_for_sequence4D(inputs, targets, batch_size, sequence_length, stride=1): #
+#     """
+#     Input a group of example in 4D numpy.array and their labels.
+#     Return the examples and labels by the given batchsize, sequence_length.
+#     Use for RNN.
+#
+#     Parameters
+#     ----------
+#     inputs : numpy.array
+#         (X) The input features, every row is a example.
+#     targets : numpy.array
+#         (y) The labels of inputs, every row is a example.
+#     batchsize : int
+#         The batch size must be a multiple of sequence_length: int(batch_size % sequence_length) == 0
+#     sequence_length : int
+#         The sequence length
+#     stride : int
+#         The stride step
+#
+#     Examples
+#     --------
+#     >>> sequence_length = 2
+#     >>> batch_size = 2
+#     >>> stride = 1
+#     >>> X_train = np.asarray([[1,2,3],[4,5,6],[7,8,9],[10,11,12],[13,14,15],[16,17,18],[19,20,21],[22,23,24]])
+#     >>> y_train = np.asarray(['0','1','2','3','4','5','6','7'])
+#     >>> X_train = np.expand_dims(X_train, axis=1)
+#     >>> X_train = np.expand_dims(X_train, axis=3)
+#     >>> for batch in minibatches_for_sequence4D(X_train, y_train, batch_size=batch_size, sequence_length=sequence_length, stride=stride):
+#     >>>     inputs, targets = batch
+#     >>>     print(inputs)
+#     >>>     print(targets)
+#     ... [[[[ 1.]
+#     ...    [ 2.]
+#     ...    [ 3.]]]
+#     ... [[[ 4.]
+#     ...   [ 5.]
+#     ...   [ 6.]]]]
+#     ... [1]
+#     ... [[[[ 4.]
+#     ...    [ 5.]
+#     ...    [ 6.]]]
+#     ... [[[ 7.]
+#     ...   [ 8.]
+#     ...   [ 9.]]]]
+#     ... [2]
+#     ... ...
+#     ... [[[[ 19.]
+#     ...    [ 20.]
+#     ...    [ 21.]]]
+#     ... [[[ 22.]
+#     ...   [ 23.]
+#     ...   [ 24.]]]]
+#     ... [7]
+#     """
+#     print('len(targets)=%d batch_size=%d sequence_length=%d stride=%d' % (len(targets), batch_size, sequence_length, stride))
+#     assert len(inputs) == len(targets), '1 feature vector have 1 target vector/value' #* sequence_length
+#     # assert int(batch_size % sequence_length) == 0, 'in LSTM, batch_size % sequence_length must == 0\
+#     # batch_size is number of X_train rather than number of targets'
+#     assert stride >= 1, 'stride must be >=1, at least move 1 step for each iternation'
+#
+#     n_example, n_channels, width, height = inputs.shape
+#     print('n_example=%d n_channels=%d width=%d height=%d' % (n_example, n_channels, width, height))
+#
+#     n_targets = int(np.ceil(batch_size/sequence_length)) # 实际为 batchsize/sequence_length + 1
+#     print(n_targets)
+#     X = np.zeros(shape=(batch_size, n_channels, width, height), dtype=np.float32)
+#     # X = np.zeros(shape=(n_targets, sequence_length, n_channels, width, height), dtype=np.float32)
+#     y = np.zeros(shape=(1,n_targets), dtype=np.int32)
+#     # y = np.empty(shape=(0,1), dtype=np.float32)
+#     # time.sleep(2)
+#     for idx in range(sequence_length, n_example-n_targets+2, stride):  # go through all example during 1 epoch
+#         for n in range(n_targets):   # for num of target
+#             # print(idx+n, inputs[idx-sequence_length+n : idx+n].shape)
+#             X[n*sequence_length : (n+1)*sequence_length] = inputs[idx+n-sequence_length : idx+n]
+#             # X[n] = inputs[idx-sequence_length+n:idx+n]
+#             y[0][n] = targets[idx+n-1]
+#             # y = np.vstack((y, targets[idx-1+n]))
+#         # y = targets[idx: idx+n_targets]
+#         yield X, y[0]
diff --git a/tensorlayer1.2.2/layers.py b/tensorlayer1.2.2/layers.py
new file mode 100755
index 0000000..5380ae0
--- /dev/null
+++ b/tensorlayer1.2.2/layers.py
@@ -0,0 +1,2787 @@
+#! /usr/bin/python
+# -*- coding: utf8 -*-
+
+
+
+import tensorflow as tf
+import time
+from . import visualize
+from . import utils
+from . import files
+from . import cost
+from . import iterate
+import numpy as np
+from six.moves import xrange
+import random
+import warnings
+
+# __all__ = [
+#     "Layer",
+#     "DenseLayer",
+# ]
+
+## Dynamically creat variables for keep prob
+# set_keep = locals()
+set_keep = globals()
+set_keep['_layers_name_list'] =[]
+set_keep['name_reuse'] = False
+
+## Variable Operation
+def flatten_reshape(variable, name=''):
+    """Reshapes high-dimension input to a vector.
+    [batch_size, mask_row, mask_col, n_mask] ---> [batch_size, mask_row * mask_col * n_mask]
+
+    Parameters
+    ----------
+    variable : a tensorflow variable
+    name : a string or None
+        An optional name to attach to this layer.
+
+    Examples
+    --------
+    >>> W_conv2 = weight_variable([5, 5, 100, 32])   # 64 features for each 5x5 patch
+    >>> b_conv2 = bias_variable([32])
+    >>> W_fc1 = weight_variable([7 * 7 * 32, 256])
+
+    >>> h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
+    >>> h_pool2 = max_pool_2x2(h_conv2)
+    >>> h_pool2.get_shape()[:].as_list() = [batch_size, 7, 7, 32]
+    ...         [batch_size, mask_row, mask_col, n_mask]
+    >>> h_pool2_flat = tl.layers.flatten_reshape(h_pool2)
+    ...         [batch_size, mask_row * mask_col * n_mask]
+    >>> h_pool2_flat_drop = tf.nn.dropout(h_pool2_flat, keep_prob)
+    ...
+    """
+    dim = 1
+    for d in variable.get_shape()[1:].as_list():
+        dim *= d
+    return tf.reshape(variable, shape=[-1, dim], name=name)
+
+def clear_layers_name():
+    """Clear all layer names in set_keep['_layers_name_list'],
+    enable layer name reuse.
+
+    Examples
+    ---------
+    >>> network = tl.layers.InputLayer(x, name='input_layer')
+    >>> network = tl.layers.DenseLayer(network, n_units=800, name='relu1')
+    ...
+    >>> tl.layers.clear_layers_name()
+    >>> network2 = tl.layers.InputLayer(x, name='input_layer')
+    >>> network2 = tl.layers.DenseLayer(network2, n_units=800, name='relu1')
+    ...
+    """
+    set_keep['_layers_name_list'] =[]
+
+def set_name_reuse(enable=True):
+    """Enable or disable reuse layer name. By default, each layer must has unique
+    name. When you want two or more input placeholder (inference) share the same
+    model parameters, you need to enable layer name reuse, then allow the
+    parameters have same name scope.
+
+    Examples
+    ------------
+    - see ``tutorial_ptb_lstm.py`` for example.
+    """
+    set_keep['name_reuse'] = enable
+
+def initialize_rnn_state(state):
+    """Return the initialized RNN state.
+    The input is LSTMStateTuple or State of RNNCells.
+    """
+    if isinstance(state, tf.nn.rnn_cell.LSTMStateTuple):
+        # when state_is_tuple=True for LSTM
+        # print(state)
+        # print(state.c)
+        # print(state.h)
+        # print(state.c.eval())
+        # print(state.h.eval())
+        # exit()
+        c = state.c.eval()
+        h = state.h.eval()
+        return (c, h)
+        # # print(state)
+        # # print(state[0])
+        # new_state = state
+        # new_state[0].assign(state[0].eval())
+        # new_state[1].assign(state[1].eval())
+        # # state[0] = state[0].eval()
+        # # state[1] = state[1].eval()
+        # # state.c = state.c.eval()
+        # # state.h = state.h.eval()
+        # return new_state
+    else:
+        # when state_is_tuple=False for LSTM
+        # or other RNNs
+        new_state = state.eval()
+        return new_state
+
+
+def print_all_variables(train_only=False):
+    """Print all trainable and non-trainable variables
+    without initialize_all_variables()
+
+    Parameters
+    ----------
+    train_only : boolen
+        If True, only print the trainable variables, otherwise, print all variables.
+    """
+    tvar = tf.trainable_variables() if train_only else tf.all_variables()
+    for idx, v in enumerate(tvar):
+        print("  var {:3}: {:15}   {}".format(idx, str(v.get_shape()), v.name))
+
+# def print_all_variables():
+#     """Print all trainable and non-trainable variables
+#     without initialize_all_variables()"""
+#     for idx, v in enumerate(tf.all_variables()):
+#         # print("  var %d: %s   %s" % (idx, v.get_shape(), v.name))
+#         print("  var {:3}: {:15}   {}".format(idx, str(v.get_shape()), v.name))
+
+## Basic layer
+class Layer(object):
+    """
+    The :class:`Layer` class represents a single layer of a neural network. It
+    should be subclassed when implementing new types of layers.
+    Because each layer can keep track of the layer(s) feeding into it, a
+    network's output :class:`Layer` instance can double as a handle to the full
+    network.
+
+    Parameters
+    ----------
+    inputs : a :class:`Layer` instance
+        The `Layer` class feeding into this layer.
+    name : a string or None
+        An optional name to attach to this layer.
+    """
+    def __init__(
+        self,
+        inputs = None,
+        name ='layer'
+    ):
+        self.inputs = inputs
+        # if name in globals():
+        if (name in set_keep['_layers_name_list']) and name_reuse == False:
+            raise Exception("Layer '%s' already exists, please choice other 'name'.\
+            \nHint : Use different name for different 'Layer' (The name is used to control parameter sharing)" % name)
+        else:
+            self.name = name
+            if name not in ['', None, False]:
+                set_keep['_layers_name_list'].append(name)
+
+
+    def print_params(self, details=True):
+        ''' Print all info of parameters in the network'''
+        # try:
+        for i, p in enumerate(self.all_params):
+            if details:
+                try:
+                    # print("  param %d: %s (mean: %f, median: %f, std: %f)   %s" % (i, str(p.eval().shape), p.eval().mean(), np.median(p.eval()), p.eval().std(), p.name))
+                    print("  param {:3}: {:15} (mean: {:<18}, median: {:<18}, std: {:<18})   {}".format(i, str(p.eval().shape), p.eval().mean(), np.median(p.eval()), p.eval().std(), p.name))
+                except:
+                    raise Exception("Hint: print params details after sess.run(tf.initialize_all_variables()) or use network.print_params(False).")
+            else:
+                print("  param {:3}: {:15}    {}".format(i, str(p.get_shape()), p.name))
+        print("  num of params: %d" % self.count_params())
+        # except:
+        #     raise Exception("Hint: print params after sess.run(tf.initialize_all_variables()) or use tl.layers.print_all_variables()")
+
+
+    def print_layers(self):
+        ''' Print all info of layers in the network '''
+        for i, p in enumerate(self.all_layers):
+            # print(vars(p))
+            print("  layer %d: %s" % (i, str(p)))
+
+    def count_params(self):
+        ''' Return the number of parameters in the network '''
+        n_params = 0
+        for i, p in enumerate(self.all_params):
+            n = 1
+            # for s in p.eval().shape:
+            for s in p.get_shape():
+                try:
+                    s = int(s)
+                except:
+                    s = 1
+                if s:
+                    n = n * s
+            n_params = n_params + n
+        return n_params
+
+    # def print_params(self):
+    #     ''' Print all info of parameters in the network after initialize_all_variables()'''
+    #     try:
+    #         for i, p in enumerate(self.all_params):
+    #             print("  param %d: %s (mean: %f, median: %f, std: %f)   %s" % (i, str(p.eval().shape), p.eval().mean(), np.median(p.eval()), p.eval().std(), p.name))
+    #         print("  num of params: %d" % self.count_params())
+    #     except:
+    #         raise Exception("Hint: print params after sess.run(tf.initialize_all_variables()) or use tl.layers.print_all_variables()")
+    #
+    #
+    # def print_layers(self):
+    #     ''' Print all info of layers in the network '''
+    #     for i, p in enumerate(self.all_layers):
+    #         # print(vars(p))
+    #         print("  layer %d: %s" % (i, str(p)))
+    #
+    # def count_params(self):
+    #     ''' Return the number of parameters in the network '''
+    #     n_params = 0
+    #     for i, p in enumerate(self.all_params):
+    #         n = 1
+    #         for s in p.eval().shape:
+    #         # for s in p.get_shape():
+    #             # s = int(s)
+    #             if s:
+    #                 n = n * s
+    #         n_params = n_params + n
+    #     return n_params
+
+    def __str__(self):
+        print("\nIt is a Layer class")
+        self.print_params(False)
+        self.print_layers()
+        return "  Last layer is: %s" % self.__class__.__name__
+
+## Input layer
+class InputLayer(Layer):
+    """
+    The :class:`InputLayer` class is the starting layer of a neural network.
+
+    Parameters
+    ----------
+    inputs : a TensorFlow placeholder
+        The input tensor data.
+    name : a string or None
+        An optional name to attach to this layer.
+    n_features : a int
+        The number of features. If not specify, it will assume the input is
+        with the shape of [batch_size, n_features], then select the second
+        element as the n_features. It is used to specify the matrix size of
+        next layer. If apply Convolutional layer after InputLayer,
+        n_features is not important.
+    """
+    def __init__(
+        self,
+        inputs = None,
+        n_features = None,
+        name ='input_layer'
+    ):
+        Layer.__init__(self, inputs=inputs, name=name)
+        # super(InputLayer, self).__init__()     # initialize all super classes
+        # if n_features:
+        #     self.n_units = n_features
+        # else:
+        #     self.n_units = int(inputs._shape[1])
+        print("  tensorlayer:Instantiate InputLayer  %s: %s" % (self.name, inputs._shape))
+
+        self.outputs = inputs
+
+        self.all_layers = []
+        self.all_params = []
+        self.all_drop = {}
+
+## Word Embedding Input layer
+class Word2vecEmbeddingInputlayer(Layer):
+    """
+    The :class:`Word2vecEmbeddingInputlayer` class is a fully connected layer,
+    for Word Embedding. Words are input as integer index.
+    The output is the embedded word vector.
+
+    Parameters
+    ----------
+    inputs : placeholder
+        For word inputs. integer index format.
+    train_labels : placeholder
+        For word labels. integer index format.
+    vocabulary_size : int
+        The size of vocabulary, number of words.
+    embedding_size : int
+        The number of embedding dimensions.
+    num_sampled : int
+        The Number of negative examples for NCE loss.
+    nce_loss_args : a dictionary
+        The arguments for tf.nn.nce_loss()
+    E_init : embedding initializer
+        The initializer for initializing the embedding matrix.
+    E_init_args : a dictionary
+        The arguments for embedding initializer
+    nce_W_init : NCE decoder biases initializer
+        The initializer for initializing the nce decoder weight matrix.
+    nce_W_init_args : a dictionary
+        The arguments for initializing the nce decoder weight matrix.
+    nce_b_init : NCE decoder biases initializer
+        The initializer for tf.get_variable() of the nce decoder bias vector.
+    nce_b_init_args : a dictionary
+        The arguments for tf.get_variable() of the nce decoder bias vector.
+    name : a string or None
+        An optional name to attach to this layer.
+
+    Variables
+    --------------
+    nce_cost : a tensor
+        The NCE loss.
+    outputs : a tensor
+        The outputs of embedding layer.
+    normalized_embeddings : tensor
+        Normalized embedding matrix
+
+    Examples
+    --------
+    - Without TensorLayer : see tensorflow/examples/tutorials/word2vec/word2vec_basic.py
+    >>> train_inputs = tf.placeholder(tf.int32, shape=[batch_size])
+    >>> train_labels = tf.placeholder(tf.int32, shape=[batch_size, 1])
+    >>> embeddings = tf.Variable(
+    ...     tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0))
+    >>> embed = tf.nn.embedding_lookup(embeddings, train_inputs)
+    >>> nce_weights = tf.Variable(
+    ...     tf.truncated_normal([vocabulary_size, embedding_size],
+    ...                    stddev=1.0 / math.sqrt(embedding_size)))
+    >>> nce_biases = tf.Variable(tf.zeros([vocabulary_size]))
+    >>> cost = tf.reduce_mean(
+    ...    tf.nn.nce_loss(weights=nce_weights, biases=nce_biases,
+    ...               inputs=embed, labels=train_labels,
+    ...               num_sampled=num_sampled, num_classes=vocabulary_size,
+    ...               num_true=1))
+
+    - With TensorLayer : see tutorial_word2vec_basic.py
+    >>> train_inputs = tf.placeholder(tf.int32, shape=[batch_size])
+    >>> train_labels = tf.placeholder(tf.int32, shape=[batch_size, 1])
+    >>> emb_net = tl.layers.Word2vecEmbeddingInputlayer(
+    ...         inputs = train_inputs,
+    ...         train_labels = train_labels,
+    ...         vocabulary_size = vocabulary_size,
+    ...         embedding_size = embedding_size,
+    ...         num_sampled = num_sampled,
+    ...         nce_loss_args = {},
+    ...         E_init = tf.random_uniform,
+    ...         E_init_args = {'minval':-1.0, 'maxval':1.0},
+    ...         nce_W_init = tf.truncated_normal,
+    ...         nce_W_init_args = {'stddev': float(1.0/np.sqrt(embedding_size))},
+    ...         nce_b_init = tf.zeros,
+    ...         nce_b_init_args = {},
+    ...        name ='word2vec_layer',
+    ...    )
+    >>> cost = emb_net.nce_cost
+    >>> train_params = emb_net.all_params
+    >>> train_op = tf.train.GradientDescentOptimizer(learning_rate).minimize(
+    ...                                             cost, var_list=train_params)
+    >>> normalized_embeddings = emb_net.normalized_embeddings
+
+    References
+    ----------
+    - `tensorflow/examples/tutorials/word2vec/word2vec_basic.py <https://github.com/tensorflow/tensorflow/blob/r0.7/tensorflow/examples/tutorials/word2vec/word2vec_basic.py>`_
+    """
+    def __init__(
+        self,
+        inputs = None,
+        train_labels = None,
+        vocabulary_size = 80000,
+        embedding_size = 200,
+        num_sampled = 64,
+        nce_loss_args = {},
+        E_init = tf.random_uniform_initializer(minval=-1.0, maxval=1.0),
+        E_init_args = {},
+        nce_W_init = tf.truncated_normal_initializer(stddev=0.03),
+        nce_W_init_args = {},
+        nce_b_init = tf.constant_initializer(value=0.0),
+        nce_b_init_args = {},
+        name ='word2vec_layer',
+    ):
+        Layer.__init__(self, name=name)
+        self.inputs = inputs
+        self.n_units = embedding_size
+        print("  tensorlayer:Instantiate Word2vecEmbeddingInputlayer %s: (%d, %d)" % (self.name, vocabulary_size, embedding_size))
+        # Look up embeddings for inputs.
+        # Note: a row of 'embeddings' is the vector representation of a word.
+        # for the sake of speed, it is better to slice the embedding matrix
+        # instead of transfering a word id to one-hot-format vector and then
+        # multiply by the embedding matrix.
+        # embed is the outputs of the hidden layer (embedding layer), it is a
+        # row vector with 'embedding_size' values.
+        with tf.variable_scope(name) as vs:
+            embeddings = tf.get_variable(name='embeddings',
+                                    shape=(vocabulary_size, embedding_size),
+                                    initializer=E_init,
+                                    **E_init_args)
+            embed = tf.nn.embedding_lookup(embeddings, self.inputs)
+            # Construct the variables for the NCE loss (i.e. negative sampling)
+            nce_weights = tf.get_variable(name='nce_weights',
+                                    shape=(vocabulary_size, embedding_size),
+                                    initializer=nce_W_init,
+                                    **nce_W_init_args)
+            nce_biases = tf.get_variable(name='nce_biases',
+                                    shape=(vocabulary_size),
+                                    initializer=nce_b_init,
+                                    **nce_b_init_args)
+
+        # Compute the average NCE loss for the batch.
+        # tf.nce_loss automatically draws a new sample of the negative labels
+        # each time we evaluate the loss.
+        self.nce_cost = tf.reduce_mean(
+            tf.nn.nce_loss(weights=nce_weights, biases=nce_biases,
+                           inputs=embed, labels=train_labels,
+                           num_sampled=num_sampled, num_classes=vocabulary_size,
+                           **nce_loss_args))
+
+        self.outputs = embed
+        self.normalized_embeddings = tf.nn.l2_normalize(embeddings, 1)
+
+        self.all_layers = [self.outputs]
+        self.all_params = [embeddings, nce_weights, nce_biases]
+        self.all_drop = {}
+
+class EmbeddingInputlayer(Layer):
+    """
+    The :class:`EmbeddingInputlayer` class is a fully connected layer,
+    for Word Embedding. Words are input as integer index.
+    The output is the embedded word vector.
+
+    This class can not be used to train a word embedding matrix, so you should
+    assign a trained matrix into it. To train a word embedding matrix, you can used
+    class:`Word2vecEmbeddingInputlayer`.
+
+    Note that, do not update this embedding matrix.
+
+    Parameters
+    ----------
+    inputs : placeholder
+        For word inputs. integer index format.
+        a 2D tensor : [batch_size, num_steps(num_words)]
+    vocabulary_size : int
+        The size of vocabulary, number of words.
+    embedding_size : int
+        The number of embedding dimensions.
+    E_init : embedding initializer
+        The initializer for initializing the embedding matrix.
+    E_init_args : a dictionary
+        The arguments for embedding initializer
+    name : a string or None
+        An optional name to attach to this layer.
+
+    Variables
+    ------------
+    outputs : a tensor
+        The outputs of embedding layer.
+        the outputs 3D tensor : [batch_size, num_steps(num_words), embedding_size]
+
+    Examples
+    --------
+    >>> vocabulary_size = 50000
+    >>> embedding_size = 200
+    >>> model_file_name = "model_word2vec_50k_200"
+    >>> batch_size = None
+    ...
+    >>> all_var = tl.files.load_npy_to_any(name=model_file_name+'.npy')
+    >>> data = all_var['data']; count = all_var['count']
+    >>> dictionary = all_var['dictionary']
+    >>> reverse_dictionary = all_var['reverse_dictionary']
+    >>> tl.files.save_vocab(count, name='vocab_'+model_file_name+'.txt')
+    >>> del all_var, data, count
+    ...
+    >>> load_params = tl.files.load_npz(name=model_file_name+'.npz')
+    >>> x = tf.placeholder(tf.int32, shape=[batch_size])
+    >>> y_ = tf.placeholder(tf.int32, shape=[batch_size, 1])
+    >>> emb_net = tl.layers.EmbeddingInputlayer(
+    ...                inputs = x,
+    ...                vocabulary_size = vocabulary_size,
+    ...                embedding_size = embedding_size,
+    ...                name ='embedding_layer')
+    >>> sess.run(tf.initialize_all_variables())
+    >>> tl.files.assign_params(sess, [load_params[0]], emb_net)
+    >>> word = b'hello'
+    >>> word_id = dictionary[word]
+    >>> print('word_id:', word_id)
+    ... 6428
+    ...
+    >>> words = [b'i', b'am', b'hao', b'dong']
+    >>> word_ids = tl.files.words_to_word_ids(words, dictionary)
+    >>> context = tl.files.word_ids_to_words(word_ids, reverse_dictionary)
+    >>> print('word_ids:', word_ids)
+    ... [72, 1226, 46744, 20048]
+    >>> print('context:', context)
+    ... [b'i', b'am', b'hao', b'dong']
+    ...
+    >>> vector = sess.run(emb_net.outputs, feed_dict={x : [word_id]})
+    >>> print('vector:', vector.shape)
+    ... (1, 200)
+    >>> vectors = sess.run(emb_net.outputs, feed_dict={x : word_ids})
+    >>> print('vectors:', vectors.shape)
+    ... (4, 200)
+
+    """
+    def __init__(
+        self,
+        inputs = None,
+        vocabulary_size = 80000,
+        embedding_size = 200,
+        E_init = tf.random_uniform_initializer(-0.1, 0.1),
+        E_init_args = {},
+        name ='embedding_layer',
+    ):
+        Layer.__init__(self, name=name)
+        self.inputs = inputs
+        self.n_units = embedding_size
+        print("  tensorlayer:Instantiate EmbeddingInputlayer %s: (%d, %d)" % (self.name, vocabulary_size, embedding_size))
+
+        with tf.variable_scope(name) as vs:
+            embeddings = tf.get_variable(name='embeddings',
+                                    shape=(vocabulary_size, embedding_size),
+                                    initializer=E_init,
+                                    **E_init_args)
+        embed = tf.nn.embedding_lookup(embeddings, self.inputs)
+
+        self.outputs = embed
+
+        self.all_layers = [self.outputs]
+        self.all_params = [embeddings]
+        self.all_drop = {}
+
+
+## Dense layer
+class DenseLayer(Layer):
+    """
+    The :class:`DenseLayer` class is a fully connected layer.
+
+    Parameters
+    ----------
+    layer : a :class:`Layer` instance
+        The `Layer` class feeding into this layer.
+    n_units : int
+        The number of units of the layer.
+    act : activation function
+        The function that is applied to the layer activations.
+    W_init : weights initializer
+        The initializer for initializing the weight matrix.
+    b_init : biases initializer or None
+        The initializer for initializing the bias vector. If None, skip biases.
+    W_init_args : dictionary
+        The arguments for the weights tf.get_variable.
+    b_init_args : dictionary
+        The arguments for the biases tf.get_variable.
+    name : a string or None
+        An optional name to attach to this layer.
+
+    Examples
+    --------
+    >>> network = tl.layers.InputLayer(x, name='input_layer')
+    >>> network = tl.layers.DenseLayer(
+    ...                 network,
+    ...                 n_units=800,
+    ...                 act = tf.nn.relu,
+    ...                 W_init=tf.truncated_normal_initializer(stddev=0.1),
+    ...                 name ='relu_layer'
+    ...                 )
+
+    >>> Without TensorLayer, you can do as follow.
+    >>> W = tf.Variable(
+    ...     tf.random_uniform([n_in, n_units], -1.0, 1.0), name='W')
+    >>> b = tf.Variable(tf.zeros(shape=[n_units]), name='b')
+    >>> y = tf.nn.relu(tf.matmul(inputs, W) + b)
+
+    Notes
+    -----
+    If the input to this layer has more than two axes, it need to flatten the
+    input by using :class:`FlattenLayer` in this case.
+    """
+    def __init__(
+        self,
+        layer = None,
+        n_units = 100,
+        act = tf.nn.relu,
+        W_init = tf.truncated_normal_initializer(stddev=0.1),
+        b_init = tf.constant_initializer(value=0.0),
+        W_init_args = {},
+        b_init_args = {},
+        name ='dense_layer',
+    ):
+        Layer.__init__(self, name=name)
+        self.inputs = layer.outputs
+        if self.inputs.get_shape().ndims != 2:
+            raise Exception("The input dimension must be rank 2, please reshape or flatten it")
+
+        n_in = int(self.inputs._shape[-1])
+        self.n_units = n_units
+        print("  tensorlayer:Instantiate DenseLayer  %s: %d, %s" % (self.name, self.n_units, act.__name__))
+        with tf.variable_scope(name) as vs:
+            W = tf.get_variable(name='W', shape=(n_in, n_units), initializer=W_init, **W_init_args )
+            if b_init:
+                b = tf.get_variable(name='b', shape=(n_units), initializer=b_init, **b_init_args )
+                self.outputs = act(tf.matmul(self.inputs, W) + b)
+            else:
+                self.outputs = act(tf.matmul(self.inputs, W))
+        # self.outputs = act(tf.matmul(self.inputs, W) + b)
+
+        # Hint : list(), dict() is pass by value (shallow), without them, it is
+        # pass by reference.
+        self.all_layers = list(layer.all_layers)
+        self.all_params = list(layer.all_params)
+        self.all_drop = dict(layer.all_drop)
+        self.all_layers.extend( [self.outputs] )
+        if b_init:
+            self.all_params.extend( [W, b] )
+        else:
+            self.all_params.extend( [W] )
+        # shallow cope allows the weights in network can be changed at the same
+        # time, when ReconLayer updates the weights of encoder.
+        #
+        # e.g. the encoder points to same physical memory address
+        # network = InputLayer(x, name='input_layer')
+        # network = DenseLayer(network, n_units=200, act = tf.nn.sigmoid, name='sigmoid')
+        # recon_layer = ReconLayer(network, n_units=784, act = tf.nn.sigmoid, name='recon_layer')
+        # print(network.all_params)
+        #   [<tensorflow.python.ops.variables.Variable object at 0x10d616f98>,
+        #   <tensorflow.python.ops.variables.Variable object at 0x10d8f6080>]
+        # print(len(network.all_params))
+        #   2
+        # print(recon_layer.all_params)
+        #   [<tensorflow.python.ops.variables.Variable object at 0x10d616f98>,
+        #   <tensorflow.python.ops.variables.Variable object at 0x10d8f6080>,
+        #   <tensorflow.python.ops.variables.Variable object at 0x10d8f6550>,
+        #   <tensorflow.python.ops.variables.Variable object at 0x10d8f6198>]
+        # print(len(recon_layer.all_params))
+        #   4
+
+class ReconLayer(DenseLayer):
+    """
+    The :class:`ReconLayer` class is a reconstruction layer `DenseLayer` which
+    use to pre-train a `DenseLayer`.
+
+    Parameters
+    ----------
+    layer : a :class:`Layer` instance
+        The `Layer` class feeding into this layer.
+    x_recon : tensorflow variable
+        The variables used for reconstruction.
+    name : a string or None
+        An optional name to attach to this layer.
+    n_units : int
+        The number of units of the layer, should be equal to x_recon
+    act : activation function
+        The activation function that is applied to the reconstruction layer.
+        Normally, for sigmoid layer, the reconstruction activation is sigmoid;
+        for rectifying layer, the reconstruction activation is softplus.
+
+    Examples
+    --------
+    >>> network = tl.layers.InputLayer(x, name='input_layer')
+    >>> network = tl.layers.DenseLayer(network, n_units=196,
+    ...                                 act=tf.nn.sigmoid, name='sigmoid1')
+    >>> recon_layer1 = tl.layers.ReconLayer(network, x_recon=x, n_units=784,
+    ...                                 act=tf.nn.sigmoid, name='recon_layer1')
+    >>> recon_layer1.pretrain(sess, x=x, X_train=X_train, X_val=X_val,
+    ...                         denoise_name=None, n_epoch=1200, batch_size=128,
+    ...                         print_freq=10, save=True, save_name='w1pre_')
+
+    Methods
+    -------
+    pretrain(self, sess, x, X_train, X_val, denoise_name=None, n_epoch=100, batch_size=128, print_freq=10, save=True, save_name='w1pre_')
+        Start to pre-train the parameters of previous DenseLayer.
+
+    Notes
+    -----
+    The input layer should be `DenseLayer` or a layer has only one axes.
+    You may need to modify this part to define your own cost function.
+    By default, the cost is implemented as follow:
+    - For sigmoid layer, the implementation can be `UFLDL <http://deeplearning.stanford.edu/wiki/index.php/UFLDL_Tutorial>`_
+    - For rectifying layer, the implementation can be `Glorot (2011). Deep Sparse Rectifier Neural Networks <http://doi.org/10.1.1.208.6449>`_
+    """
+    def __init__(
+        self,
+        layer = None,
+        x_recon = None,
+        name = 'recon_layer',
+        n_units = 784,
+        act = tf.nn.softplus,
+    ):
+        DenseLayer.__init__(self, layer=layer, n_units=n_units, act=act, name=name)
+        print("     tensorlayer: %s is a ReconLayer" % self.name)
+
+        # y : reconstruction outputs; train_params : parameters to train
+        # Note that: train_params = [W_encoder, b_encoder, W_decoder, b_encoder]
+        y = self.outputs
+        self.train_params = self.all_params[-4:]
+
+        # =====================================================================
+        #
+        # You need to modify the below cost function and optimizer so as to
+        # implement your own pre-train method.
+        #
+        # =====================================================================
+        lambda_l2_w = 0.004
+        learning_rate = 0.0001
+        print("     lambda_l2_w: %f" % lambda_l2_w)
+        print("     learning_rate: %f" % learning_rate)
+
+        # Mean-squre-error i.e. quadratic-cost
+        mse = tf.reduce_sum(tf.squared_difference(y, x_recon), reduction_indices = 1)
+        mse = tf.reduce_mean(mse)            # in theano: mse = ((y - x) ** 2 ).sum(axis=1).mean()
+            # mse = tf.reduce_mean(tf.reduce_sum(tf.square(tf.sub(y, x_recon)), reduction_indices = 1))
+            # mse = tf.reduce_mean(tf.squared_difference(y, x_recon)) # <haodong>: Error
+            # mse = tf.sqrt(tf.reduce_mean(tf.square(y - x_recon)))   # <haodong>: Error
+        # Cross-entropy
+            # ce = cost.cross_entropy(y, x_recon)                                               # <haodong>: list , list , Error (only be used for softmax output)
+            # ce = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y, x_recon))          # <haodong>: list , list , Error (only be used for softmax output)
+            # ce = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(y, x_recon))   # <haodong>: list , index , Error (only be used for softmax output)
+        L2_w = tf.contrib.layers.l2_regularizer(lambda_l2_w)(self.train_params[0]) \
+                + tf.contrib.layers.l2_regularizer(lambda_l2_w)(self.train_params[2])           # faster than the code below
+            # L2_w = lambda_l2_w * tf.reduce_mean(tf.square(self.train_params[0])) + lambda_l2_w * tf.reduce_mean( tf.square(self.train_params[2]))
+        # DropNeuro
+        P_o = cost.lo_regularizer(0.03)(self.train_params[0])   # + cost.lo_regularizer(0.5)(self.train_params[2])    # <haodong>: if add lo on decoder, no neuron will be broken
+        P_i = cost.li_regularizer(0.03)(self.train_params[0])  # + cost.li_regularizer(0.001)(self.train_params[2])
+        # L1 of activation outputs
+        activation_out = self.all_layers[-2]
+        L1_a = 0.001 * tf.reduce_mean(activation_out)   # <haodong>:  theano: T.mean( self.a[i] )         # some neuron are broken, white and black
+            # L1_a = 0.001 * tf.reduce_mean( tf.reduce_sum(activation_out, reduction_indices=0) )         # <haodong>: some neuron are broken, white and black
+            # L1_a = 0.001 * 100 * tf.reduce_mean( tf.reduce_sum(activation_out, reduction_indices=1) )   # <haodong>: some neuron are broken, white and black
+        # KL Divergence
+        beta = 4
+        rho = 0.15
+        p_hat = tf.reduce_mean(activation_out, reduction_indices = 0)   # theano: p_hat = T.mean( self.a[i], axis=0 )
+        KLD = beta * tf.reduce_sum( rho * tf.log(tf.div(rho, p_hat)) + (1- rho) * tf.log((1- rho)/ (tf.sub(float(1), p_hat))) )
+            # KLD = beta * tf.reduce_sum( rho * tf.log(rho/ p_hat) + (1- rho) * tf.log((1- rho)/(1- p_hat)) )
+            # theano: L1_a = l1_a[i] * T.sum( rho[i] * T.log(rho[i]/ p_hat) + (1- rho[i]) * T.log((1- rho[i])/(1- p_hat)) )
+        # Total cost
+        if act == tf.nn.softplus:
+            print('     use: mse, L2_w, L1_a')
+            self.cost = mse + L1_a + L2_w
+        elif act == tf.nn.sigmoid:
+            # ----------------------------------------------------
+            # Cross-entropy was used in Denoising AE
+            # print('     use: ce, L2_w, KLD')
+            # self.cost = ce + L2_w + KLD
+            # ----------------------------------------------------
+            # Mean-squared-error was used in Vanilla AE
+            print('     use: mse, L2_w, KLD')
+            self.cost = mse + L2_w + KLD
+            # ----------------------------------------------------
+            # Add DropNeuro penalty (P_o) can remove neurons of AE
+            # print('     use: mse, L2_w, KLD, P_o')
+            # self.cost = mse + L2_w + KLD + P_o
+            # ----------------------------------------------------
+            # Add DropNeuro penalty (P_i) can remove neurons of previous layer
+            #   If previous layer is InputLayer, it means remove useless features
+            # print('     use: mse, L2_w, KLD, P_i')
+            # self.cost = mse + L2_w + KLD + P_i
+        else:
+            raise Exception("Don't support the given reconstruct activation function")
+
+        self.train_op = tf.train.AdamOptimizer(learning_rate, beta1=0.9, beta2=0.999,
+                                        epsilon=1e-08, use_locking=False).minimize(self.cost, var_list=self.train_params)
+                # self.train_op = tf.train.GradientDescentOptimizer(1.0).minimize(self.cost, var_list=self.train_params)
+
+    def pretrain(self, sess, x, X_train, X_val, denoise_name=None, n_epoch=100, batch_size=128, print_freq=10,
+                  save=True, save_name='w1pre_'):
+        # ====================================================
+        #
+        # You need to modify the cost function in __init__() so as to
+        # get your own pre-train method.
+        #
+        # ====================================================
+        print("     tensorlayer:  %s start pretrain" % self.name)
+        print("     batch_size: %d" % batch_size)
+        if denoise_name:
+            print("     denoising layer keep: %f" % self.all_drop[set_keep[denoise_name]])
+            dp_denoise = self.all_drop[set_keep[denoise_name]]
+        else:
+            print("     no denoising layer")
+
+        for epoch in range(n_epoch):
+            start_time = time.time()
+            for X_train_a, _ in iterate.minibatches(X_train, X_train, batch_size, shuffle=True):
+                dp_dict = utils.dict_to_one( self.all_drop )
+                if denoise_name:
+                    dp_dict[set_keep[denoise_name]] = dp_denoise
+                feed_dict = {x: X_train_a}
+                feed_dict.update(dp_dict)
+                sess.run(self.train_op, feed_dict=feed_dict)
+
+            if epoch + 1 == 1 or (epoch + 1) % print_freq == 0:
+                print("Epoch %d of %d took %fs" % (epoch + 1, n_epoch, time.time() - start_time))
+                train_loss, n_batch = 0, 0
+                for X_train_a, _ in iterate.minibatches(X_train, X_train, batch_size, shuffle=True):
+                    dp_dict = utils.dict_to_one( self.all_drop )
+                    feed_dict = {x: X_train_a}
+                    feed_dict.update(dp_dict)
+                    err = sess.run(self.cost, feed_dict=feed_dict)
+                    train_loss += err
+                    n_batch += 1
+                print("   train loss: %f" % (train_loss/ n_batch))
+                val_loss, n_batch = 0, 0
+                for X_val_a, _ in iterate.minibatches(X_val, X_val, batch_size, shuffle=True):
+                    dp_dict = utils.dict_to_one( self.all_drop )
+                    feed_dict = {x: X_val_a}
+                    feed_dict.update(dp_dict)
+                    err = sess.run(self.cost, feed_dict=feed_dict)
+                    val_loss += err
+                    n_batch += 1
+                print("   val loss: %f" % (val_loss/ n_batch))
+                if save:
+                    try:
+                        visualize.W(self.train_params[0].eval(), second=10, saveable=True, shape=[28,28], name=save_name+str(epoch+1), fig_idx=2012)
+                        files.save_npz([self.all_params[0]] , name=save_name+str(epoch+1)+'.npz')
+                    except:
+                        raise Exception("You should change visualize.W(), if you want to save the feature images for different dataset")
+
+
+## Noise layer
+class DropoutLayer(Layer):
+    """
+    The :class:`DropoutLayer` class is a noise layer which randomly set some
+    values to zero by a given keeping probability.
+
+    Parameters
+    ----------
+    layer : a :class:`Layer` instance
+        The `Layer` class feeding into this layer.
+    keep : float
+        The keeping probability, the lower more values will be set to zero.
+    name : a string or None
+        An optional name to attach to this layer.
+
+    Examples
+    --------
+    >>> network = tl.layers.InputLayer(x, name='input_layer')
+    >>> network = tl.layers.DropoutLayer(network, keep=0.8, name='drop1')
+    >>> network = tl.layers.DenseLayer(network, n_units=800, act = tf.nn.relu, name='relu1')
+    """
+    def __init__(
+        self,
+        layer = None,
+        keep = 0.5,
+        name = 'dropout_layer',
+    ):
+        Layer.__init__(self, name=name)
+        self.inputs = layer.outputs
+        print("  tensorlayer:Instantiate DropoutLayer %s: keep: %f" % (self.name, keep))
+
+        # The name of placeholder for keep_prob is the same with the name
+        # of the Layer.
+        set_keep[name] = tf.placeholder(tf.float32)
+        self.outputs = tf.nn.dropout(self.inputs, set_keep[name], name=name) # 1.2
+
+        self.all_layers = list(layer.all_layers)
+        self.all_params = list(layer.all_params)
+        self.all_drop = dict(layer.all_drop)
+        self.all_drop.update( {set_keep[name]: keep} )
+        self.all_layers.extend( [self.outputs] )
+
+        # print(set_keep[name])
+        #   Tensor("Placeholder_2:0", dtype=float32)
+        # print(denoising1)
+        #   Tensor("Placeholder_2:0", dtype=float32)
+        # print(self.all_drop[denoising1])
+        #   0.8
+        #
+        # https://www.tensorflow.org/versions/r0.8/tutorials/mnist/tf/index.html
+        # The optional feed_dict argument allows the caller to override the
+        # value of tensors in the graph. Each key in feed_dict can be one of
+        # the following types:
+        # If the key is a Tensor, the value may be a Python scalar, string,
+        # list, or numpy ndarray that can be converted to the same dtype as that
+        # tensor. Additionally, if the key is a placeholder, the shape of the
+        # value will be checked for compatibility with the placeholder.
+        # If the key is a SparseTensor, the value should be a SparseTensorValue.
+
+class DropconnectDenseLayer(Layer):
+    """
+    The :class:`DropconnectDenseLayer` class is ``DenseLayer`` with DropConnect
+    behaviour which randomly remove connection between this layer to previous
+    layer by a given keeping probability.
+
+    Parameters
+    ----------
+    layer : a :class:`Layer` instance
+        The `Layer` class feeding into this layer.
+    keep : float
+        The keeping probability, the lower more values will be set to zero.
+    n_units : int
+        The number of units of the layer.
+    act : activation function
+        The function that is applied to the layer activations.
+    W_init : weights initializer
+        The initializer for initializing the weight matrix.
+    b_init : biases initializer
+        The initializer for initializing the bias vector.
+    W_init_args : dictionary
+        The arguments for the weights tf.get_variable().
+    b_init_args : dictionary
+        The arguments for the biases tf.get_variable().
+    name : a string or None
+        An optional name to attach to this layer.
+
+    Examples
+    --------
+    >>> network = tl.layers.InputLayer(x, name='input_layer')
+    >>> network = tl.layers.DropconnectDenseLayer(network, keep = 0.8,
+    ...         n_units=800, act = tf.nn.relu, name='dropconnect_relu1')
+    >>> network = tl.layers.DropconnectDenseLayer(network, keep = 0.5,
+    ...         n_units=800, act = tf.nn.relu, name='dropconnect_relu2')
+    >>> network = tl.layers.DropconnectDenseLayer(network, keep = 0.5,
+    ...         n_units=10, act = tl.activation.identity, name='output_layer')
+
+    References
+    ----------
+    - `Wan, L. (2013). Regularization of neural networks using dropconnect <http://machinelearning.wustl.edu/mlpapers/papers/icml2013_wan13>`_
+    """
+    def __init__(
+        self,
+        layer = None,
+        keep = 0.5,
+        n_units = 100,
+        act = tf.nn.relu,
+        W_init = tf.truncated_normal_initializer(stddev=0.1),
+        b_init = tf.constant_initializer(value=0.0),
+        W_init_args = {},
+        b_init_args = {},
+        name ='dropconnect_layer',
+    ):
+        Layer.__init__(self, name=name)
+        self.inputs = layer.outputs
+        if self.inputs.get_shape().ndims != 2:
+            raise Exception("The input dimension must be rank 2")
+        n_in = int(self.inputs._shape[-1])
+        self.n_units = n_units
+        print("  tensorlayer:Instantiate DropconnectDenseLayer %s: %d, %s" % (self.name, self.n_units, act.__name__))
+
+        with tf.variable_scope(name) as vs:
+            W = tf.get_variable(name='W', shape=(n_in, n_units), initializer=W_init, **W_init_args )
+            b = tf.get_variable(name='b', shape=(n_units), initializer=b_init, **b_init_args )
+            self.outputs = act(tf.matmul(self.inputs, W) + b)#, name=name)    # 1.2
+
+        set_keep[name] = tf.placeholder(tf.float32)
+        W_dropcon = tf.nn.dropout(W,  set_keep[name])
+        self.outputs = act(tf.matmul(self.inputs, W_dropcon) + b)
+
+        self.all_layers = list(layer.all_layers)
+        self.all_params = list(layer.all_params)
+        self.all_drop = dict(layer.all_drop)
+        self.all_drop.update( {set_keep[name]: keep} )
+        self.all_layers.extend( [self.outputs] )
+        self.all_params.extend( [W, b] )
+
+
+## Convolutional layer
+class Conv2dLayer(Layer):
+    """
+    The :class:`Conv2dLayer` class is a 2D CNN layer, see ``tf.nn.conv2d``.
+
+    Parameters
+    ----------
+    layer : a :class:`Layer` instance
+        The `Layer` class feeding into this layer.
+    act : activation function
+        The function that is applied to the layer activations.
+    shape : list of shape
+        shape of the filters, [filter_height, filter_width, in_channels, out_channels].
+    strides : a list of ints.
+        The stride of the sliding window for each dimension of input.\n
+        It Must be in the same order as the dimension specified with format.
+    padding : a string from: "SAME", "VALID".
+        The type of padding algorithm to use.
+    W_init : weights initializer
+        The initializer for initializing the weight matrix.
+    b_init : biases initializer or None
+        The initializer for initializing the bias vector. If None, skip biases.
+    W_init_args : dictionary
+        The arguments for the weights tf.get_variable().
+    b_init_args : dictionary
+        The arguments for the biases tf.get_variable().
+    name : a string or None
+        An optional name to attach to this layer.
+
+
+    Examples
+    --------
+    >>> x = tf.placeholder(tf.float32, shape=[None, 28, 28, 1])
+    >>> network = tl.layers.InputLayer(x, name='input_layer')
+    >>> network = tl.layers.Conv2dLayer(network,
+    ...                   act = tf.nn.relu,
+    ...                   shape = [5, 5, 1, 32],  # 32 features for each 5x5 patch
+    ...                   strides=[1, 1, 1, 1],
+    ...                   padding='SAME',
+    ...                   W_init=tf.truncated_normal_initializer(stddev=5e-2),
+    ...                   W_init_args={},
+    ...                   b_init = tf.constant_initializer(value=0.0),
+    ...                   b_init_args = {},
+    ...                   name ='cnn_layer1')     # output: (?, 28, 28, 32)
+    >>> network = tl.layers.PoolLayer(network,
+    ...                   ksize=[1, 2, 2, 1],
+    ...                   strides=[1, 2, 2, 1],
+    ...                   padding='SAME',
+    ...                   pool = tf.nn.max_pool,
+    ...                   name ='pool_layer1',)   # output: (?, 14, 14, 32)
+
+    >>> Without TensorLayer, you can implement 2d convolution as follow.
+    >>> W = tf.Variable(W_init(shape=[5, 5, 1, 32], ), name='W_conv')
+    >>> b = tf.Variable(b_init(shape=[32], ), name='b_conv')
+    >>> outputs = tf.nn.relu( tf.nn.conv2d(inputs, W,
+    ...                       strides=[1, 1, 1, 1],
+    ...                       padding='SAME') + b )
+
+    References
+    ----------
+    - `tf.nn.conv2d <https://www.tensorflow.org/versions/master/api_docs/python/nn.html#conv2d>`_
+    """
+    def __init__(
+        self,
+        layer = None,
+        act = tf.nn.relu,
+        shape = [5, 5, 1, 100],
+        strides=[1, 1, 1, 1],
+        padding='SAME',
+        W_init = tf.truncated_normal_initializer(stddev=0.02),
+        b_init = tf.constant_initializer(value=0.0),
+        W_init_args = {},
+        b_init_args = {},
+        name ='cnn_layer',
+    ):
+        Layer.__init__(self, name=name)
+        self.inputs = layer.outputs
+        print("  tensorlayer:Instantiate Conv2dLayer %s: %s, %s, %s, %s" %
+                            (self.name, str(shape), str(strides), padding, act.__name__))
+
+        with tf.variable_scope(name) as vs:
+            W = tf.get_variable(name='W_conv2d', shape=shape, initializer=W_init, **W_init_args )
+            if b_init:
+                b = tf.get_variable(name='b_conv2d', shape=(shape[-1]), initializer=b_init, **b_init_args )
+                self.outputs = act( tf.nn.conv2d(self.inputs, W, strides=strides, padding=padding) + b ) #1.2
+            else:
+                self.outputs = act( tf.nn.conv2d(self.inputs, W, strides=strides, padding=padding))
+
+        self.all_layers = list(layer.all_layers)
+        self.all_params = list(layer.all_params)
+        self.all_drop = dict(layer.all_drop)
+        self.all_layers.extend( [self.outputs] )
+        if b_init:
+            self.all_params.extend( [W, b] )
+        else:
+            self.all_params.extend( [W] )
+
+class DeConv2dLayer(Layer):
+    """
+    The :class:`DeConv2dLayer` class is deconvolutional 2D layer, see ``tf.nn.conv2d_transpose``.
+
+    Parameters
+    ----------
+    layer : a :class:`Layer` instance
+        The `Layer` class feeding into this layer.
+    act : activation function
+        The function that is applied to the layer activations.
+    shape : list of shape
+        shape of the filters, [height, width, output_channels, in_channels], filter's in_channels dimension must match that of value.
+    output_shape : list of output shape
+        representing the output shape of the deconvolution op.
+    strides : a list of ints.
+        The stride of the sliding window for each dimension of the input tensor.
+    padding : a string from: "SAME", "VALID".
+        The type of padding algorithm to use.
+    W_init : weights initializer
+        The initializer for initializing the weight matrix.
+    b_init : biases initializer
+        The initializer for initializing the bias vector. If None, skip biases.
+    W_init_args : dictionary
+        The arguments for the weights initializer.
+    b_init_args : dictionary
+        The arguments for the biases initializer.
+    name : a string or None
+        An optional name to attach to this layer.
+
+    Examples
+    ---------
+    - A part of the generator in DCGAN example
+    >>> inputs = tf.placeholder(tf.float32, [64, 100], name='z_noise')
+    >>> net_in = tl.layers.InputLayer(inputs, name='g/in')
+    >>> net_h0 = tl.layers.DenseLayer(net_in, n_units = 8192,
+    ...                            W_init = tf.random_normal_initializer(stddev=0.02),
+    ...                            act = tf.identity, name='g/h0/lin')
+    >>> print(net_h0.outputs)
+    ... (64, 8192)
+    >>> net_h0 = tl.layers.ReshapeLayer(net_h0, shape = [-1, 4, 4, 512], name='g/h0/reshape')
+    >>> net_h0 = tl.layers.BatchNormLayer(net_h0, is_train=is_train, name='g/h0/batch_norm')
+    >>> net_h0.outputs = tf.nn.relu(net_h0.outputs, name='g/h0/relu')
+    >>> print(net_h0.outputs)
+    ... (64, 4, 4, 512)
+    >>> net_h1 = tl.layers.DeConv2dLayer(net_h0,
+    ...                            shape = [5, 5, 256, 512],
+    ...                            output_shape = [64, 8, 8, 256],
+    ...                            strides=[1, 2, 2, 1],
+    ...                            act=tf.identity, name='g/h1/decon2d')
+    >>> net_h1 = tl.layers.BatchNormLayer(net_h1, is_train=is_train, name='g/h1/batch_norm')
+    >>> net_h1.outputs = tf.nn.relu(net_h1.outputs, name='g/h1/relu')
+    >>> print(net_h1.outputs)
+    ... (64, 8, 8, 256)
+
+    References
+    ----------
+    - `tf.nn.conv2d_transpose <https://www.tensorflow.org/versions/master/api_docs/python/nn.html#conv2d_transpose>`_
+    """
+    def __init__(
+        self,
+        layer = None,
+        act = tf.nn.relu,
+        shape = [3, 3, 128, 256],
+        output_shape = [1, 256, 256, 128],
+        strides = [1, 2, 2, 1],
+        padding = 'SAME',
+        W_init = tf.truncated_normal_initializer(stddev=0.02),
+        b_init = tf.constant_initializer(value=0.0),
+        W_init_args = {},
+        b_init_args = {},
+        name ='decnn2d_layer',
+    ):
+        Layer.__init__(self, name=name)
+        self.inputs = layer.outputs
+        print("  tensorlayer:Instantiate DeConv2dLayer %s: %s, %s, %s, %s, %s" %
+                            (self.name, str(shape), str(output_shape), str(strides), padding, act))
+        # print("  DeConv2dLayer: Untested")
+        with tf.variable_scope(name) as vs:
+            W = tf.get_variable(name='W_deconv2d', shape=shape, initializer=W_init, **W_init_args )
+            if b_init:
+                b = tf.get_variable(name='b_deconv2d', shape=(shape[-2]), initializer=b_init, **b_init_args )
+                self.outputs = act( tf.nn.conv2d_transpose(self.inputs, W, output_shape=output_shape, strides=strides, padding=padding) + b )
+            else:
+                self.outputs = act( tf.nn.conv2d_transpose(self.inputs, W, output_shape=output_shape, strides=strides, padding=padding))
+
+        self.all_layers = list(layer.all_layers)
+        self.all_params = list(layer.all_params)
+        self.all_drop = dict(layer.all_drop)
+        self.all_layers.extend( [self.outputs] )
+        if b_init:
+            self.all_params.extend( [W, b] )
+        else:
+            self.all_params.extend( [W] )
+
+class Conv3dLayer(Layer):
+    """
+    The :class:`Conv3dLayer` class is a 3D CNN layer, see ``tf.nn.conv3d``.
+
+    Parameters
+    ----------
+    layer : a :class:`Layer` instance
+        The `Layer` class feeding into this layer.
+    act : activation function
+        The function that is applied to the layer activations.
+    shape : list of shape
+        shape of the filters, [filter_depth, filter_height, filter_width, in_channels, out_channels].
+    strides : a list of ints. 1-D of length 4.
+        The stride of the sliding window for each dimension of input. Must be in the same order as the dimension specified with format.
+    padding : a string from: "SAME", "VALID".
+        The type of padding algorithm to use.
+    W_init : weights initializer
+        The initializer for initializing the weight matrix.
+    b_init : biases initializer
+        The initializer for initializing the bias vector.
+    W_init_args : dictionary
+        The arguments for the weights initializer.
+    b_init_args : dictionary
+        The arguments for the biases initializer.
+    name : a string or None
+        An optional name to attach to this layer.
+
+    References
+    ----------
+    - `tf.nn.conv3d <https://www.tensorflow.org/versions/master/api_docs/python/nn.html#conv3d>`_
+    """
+    def __init__(
+        self,
+        layer = None,
+        act = tf.nn.relu,
+        shape = [2, 2, 2, 64, 128],
+        strides=[1, 2, 2, 2, 1],
+        padding='SAME',
+        W_init = tf.truncated_normal_initializer(stddev=0.02),
+        b_init = tf.constant_initializer(value=0.0),
+        W_init_args = {},
+        b_init_args = {},
+        name ='cnn3d_layer',
+    ):
+        Layer.__init__(self, name=name)
+        self.inputs = layer.outputs
+        print("  tensorlayer:Instantiate Conv3dLayer %s: %s, %s, %s, %s" % (self.name, str(shape), str(strides), padding, act.__name__))
+
+        with tf.variable_scope(name) as vs:
+            # W = tf.Variable(W_init(shape=shape, **W_init_args), name='W_conv')
+            # b = tf.Variable(b_init(shape=[shape[-1]], **b_init_args), name='b_conv')
+            W = tf.get_variable(name='W_conv3d', shape=shape, initializer=W_init, **W_init_args )
+            b = tf.get_variable(name='b_conv3d', shape=(shape[-1]), initializer=b_init, **b_init_args )
+            self.outputs = act( tf.nn.conv3d(self.inputs, W, strides=strides, padding=padding, name=None) + b )
+
+        # self.outputs = act( tf.nn.conv3d(self.inputs, W, strides=strides, padding=padding, name=None) + b )
+
+        self.all_layers = list(layer.all_layers)
+        self.all_params = list(layer.all_params)
+        self.all_drop = dict(layer.all_drop)
+        self.all_layers.extend( [self.outputs] )
+        self.all_params.extend( [W, b] )
+
+class DeConv3dLayer(Layer):
+    """
+    The :class:`DeConv3dLayer` class is deconvolutional 3D layer, see ``tf.nn.conv3d_transpose``.
+
+    Parameters
+    ----------
+    layer : a :class:`Layer` instance
+        The `Layer` class feeding into this layer.
+    act : activation function
+        The function that is applied to the layer activations.
+    shape : list of shape
+        shape of the filters, [depth, height, width, output_channels, in_channels], filter's in_channels dimension must match that of value.
+    output_shape : list of output shape
+        representing the output shape of the deconvolution op.
+    strides : a list of ints.
+        The stride of the sliding window for each dimension of the input tensor.
+    padding : a string from: "SAME", "VALID".
+        The type of padding algorithm to use.
+    W_init : weights initializer
+        The initializer for initializing the weight matrix.
+    b_init : biases initializer
+        The initializer for initializing the bias vector.
+    W_init_args : dictionary
+        The arguments for the weights initializer.
+    b_init_args : dictionary
+        The arguments for the biases initializer.
+    name : a string or None
+        An optional name to attach to this layer.
+
+    References
+    ----------
+    - `tf.nn.conv3d_transpose <https://www.tensorflow.org/versions/master/api_docs/python/nn.html#conv3d_transpose>`_
+    """
+    def __init__(
+        self,
+        layer = None,
+        act = tf.nn.relu,
+        shape = [2, 2, 2, 128, 256],
+        output_shape = [1, 12, 32, 32, 128],
+        strides = [1, 2, 2, 2, 1],
+        padding = 'SAME',
+        W_init = tf.truncated_normal_initializer(stddev=0.02),
+        b_init = tf.constant_initializer(value=0.0),
+        W_init_args = {},
+        b_init_args = {},
+        name ='decnn3d_layer',
+    ):
+        Layer.__init__(self, name=name)
+        self.inputs = layer.outputs
+        print("  tensorlayer:Instantiate DeConv2dLayer %s: %s, %s, %s, %s, %s" %
+                            (self.name, str(shape), str(output_shape), str(strides), padding, act))
+
+        with tf.variable_scope(name) as vs:
+            W = tf.get_variable(name='W_deconv3d', shape=shape, initializer=W_init, **W_init_args )
+            b = tf.get_variable(name='b_deconv3d', shape=(shape[-2]), initializer=b_init, **b_init_args )
+
+        self.outputs = act( tf.nn.conv3d_transpose(self.inputs, W, output_shape=output_shape, strides=strides, padding=padding) + b )
+
+        self.all_layers = list(layer.all_layers)
+        self.all_params = list(layer.all_params)
+        self.all_drop = dict(layer.all_drop)
+        self.all_layers.extend( [self.outputs] )
+        self.all_params.extend( [W, b] )
+
+## Normalization layer
+class BatchNormLayer(Layer):
+    """
+    The :class:`BatchNormLayer` class is a normalization layer, see ``tf.nn.batch_normalization``.
+
+    Batch normalization on fully-connected or convolutional maps.
+
+    Parameters
+    -----------
+    layer : a :class:`Layer` instance
+        The `Layer` class feeding into this layer.
+    decay : float
+        A decay factor for ExponentialMovingAverage.
+    epsilon : float
+        A small float number to avoid dividing by 0.
+    is_train : boolen
+        Whether train or inference.
+    name : a string or None
+        An optional name to attach to this layer.
+
+    References
+    ----------
+    - `tf.nn.batch_normalization <https://github.com/tensorflow/tensorflow/blob/master/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.nn.batch_normalization.md>`_
+    - `stackoverflow <http://stackoverflow.com/questions/33949786/how-could-i-use-batch-normalization-in-tensorflow>`_
+    - `tensorflow.contrib <https://github.com/tensorflow/tensorflow/blob/b826b79718e3e93148c3545e7aa3f90891744cc0/tensorflow/contrib/layers/python/layers/layers.py#L100>`_
+    """
+    def __init__(
+        self,
+        layer = None,
+        decay = 0.999,
+        epsilon = 0.001,
+        is_train = None,
+        name ='batchnorm_layer',
+    ):
+        Layer.__init__(self, name=name)
+        self.inputs = layer.outputs
+        print("  tensorlayer:Instantiate BatchNormLayer %s: decay: %f, epsilon: %f, is_train: %s" %
+                            (self.name, decay, epsilon, is_train))
+        if is_train == None:
+            raise Exception("is_train must be True or False")
+
+        # (name, input_var, decay, epsilon, is_train)
+        inputs_shape = self.inputs.get_shape()
+        axis = list(range(len(inputs_shape) - 1))
+        params_shape = inputs_shape[-1:]
+
+        with tf.variable_scope(name) as vs:
+            beta = tf.get_variable(name='beta', shape=params_shape,
+                                 initializer=tf.constant_initializer(0.0))
+            gamma = tf.get_variable(name='gamma', shape=params_shape,
+                                  initializer=tf.constant_initializer(1.0))
+            batch_mean, batch_var = tf.nn.moments(self.inputs,
+                                                axis,
+                                                name='moments')
+            ema = tf.train.ExponentialMovingAverage(decay=decay)
+
+            def mean_var_with_update():
+              ema_apply_op = ema.apply([batch_mean, batch_var])
+              with tf.control_dependencies([ema_apply_op]):
+                  return tf.identity(batch_mean), tf.identity(batch_var)
+
+            if is_train:
+                is_train = tf.cast(tf.ones(1), tf.bool)
+            else:
+                is_train = tf.cast(tf.zeros(1), tf.bool)
+
+            is_train = tf.reshape(is_train, [])
+
+            # print(is_train)
+            # exit()
+
+            mean, var = tf.cond(
+              is_train,
+              mean_var_with_update,
+              lambda: (ema.average(batch_mean), ema.average(batch_var))
+            )
+            normed = tf.nn.batch_normalization(
+              x=self.inputs,
+              mean=mean,
+              variance=var,
+              offset=beta,
+              scale=gamma,
+              variance_epsilon=epsilon,
+              name='tf_bn'
+            )
+        self.outputs = normed
+
+        self.all_layers = list(layer.all_layers)
+        self.all_params = list(layer.all_params)
+        self.all_drop = dict(layer.all_drop)
+        self.all_layers.extend( [self.outputs] )
+        self.all_params.extend( [beta, gamma] )
+
+
+## Pooling layer
+class PoolLayer(Layer):
+    """
+    The :class:`PoolLayer` class is a Pooling layer, you can choose
+    ``tf.nn.max_pool`` and ``tf.nn.avg_pool`` for 2D or
+    ``tf.nn.max_pool3d()`` and ``tf.nn.avg_pool3d()`` for 3D.
+
+    Parameters
+    ----------
+    layer : a :class:`Layer` instance
+        The `Layer` class feeding into this layer.
+    ksize : a list of ints that has length >= 4.
+        The size of the window for each dimension of the input tensor.
+    strides : a list of ints that has length >= 4.
+        The stride of the sliding window for each dimension of the input tensor.
+    padding : a string from: "SAME", "VALID".
+        The type of padding algorithm to use.
+    pool : a pooling function
+        tf.nn.max_pool , tf.nn.avg_pool ...
+    name : a string or None
+        An optional name to attach to this layer.
+
+    Examples
+    --------
+    - see Conv2dLayer
+
+    References
+    ----------
+    - `TensorFlow Pooling <https://www.tensorflow.org/versions/master/api_docs/python/nn.html#pooling>`_
+    """
+    def __init__(
+        self,
+        layer = None,
+        ksize=[1, 2, 2, 1],
+        strides=[1, 2, 2, 1],
+        padding='SAME',
+        pool = tf.nn.max_pool,
+        name ='pool_layer',
+    ):
+        Layer.__init__(self, name=name)
+        self.inputs = layer.outputs
+        print("  tensorlayer:Instantiate PoolLayer   %s: %s, %s, %s, %s" %
+                            (self.name, str(ksize), str(strides), padding, pool.__name__))
+
+        self.outputs = pool(self.inputs, ksize=ksize, strides=strides, padding=padding, name=name)
+
+        self.all_layers = list(layer.all_layers)
+        self.all_params = list(layer.all_params)
+        self.all_drop = dict(layer.all_drop)
+        self.all_layers.extend( [self.outputs] )
+
+
+## Recurrent layer
+class RNNLayer(Layer):
+    """
+    The :class:`RNNLayer` class is a RNN layer, you can implement vanilla RNN,
+    LSTM and GRU with it.
+
+    Parameters
+    ----------
+    layer : a :class:`Layer` instance
+        The `Layer` class feeding into this layer.
+    cell_fn : a TensorFlow's core RNN cell as follow.
+        - see `RNN Cells in TensorFlow <https://www.tensorflow.org/versions/master/api_docs/python/rnn_cell.html>`_
+        - class ``tf.nn.rnn_cell.BasicRNNCell``
+        - class ``tf.nn.rnn_cell.BasicLSTMCell``
+        - class ``tf.nn.rnn_cell.GRUCell``
+        - class ``tf.nn.rnn_cell.LSTMCell``
+    cell_init_args : a dictionary
+        The arguments for the cell initializer.
+    n_hidden : a int
+        The number of hidden units in the layer.
+    initializer : initializer
+        The initializer for initializing the parameters.
+    n_steps : a int
+        The sequence length.
+    initial_state : None or RNN State
+        If None, initial_state is zero_state.
+    return_last : boolen
+        - If True, return the last output, "Sequence input and single output"
+        - If False, return all outputs, "Synced sequence input and output"
+        - In other word, if you want to apply one or more RNN(s) on this layer, set to False.
+    return_seq_2d : boolen
+        - When return_last = False
+        - If True, return 2D Tensor [n_example, n_hidden], for stacking DenseLayer after it.
+        - If False, return 3D Tensor [n_example/n_steps, n_steps, n_hidden], for stacking multiple RNN after it.
+    name : a string or None
+        An optional name to attach to this layer.
+
+    Variables
+    --------------
+    outputs : a tensor
+        The output of this RNN.
+        return_last = False, outputs = all cell_output, which is the hidden state.
+            cell_output.get_shape() = (?, n_hidden)
+
+    final_state : a tensor or StateTuple
+        When state_is_tuple = False,
+        it is the final hidden and cell states, states.get_shape() = [?, 2 * n_hidden].\n
+        When state_is_tuple = True, it stores two elements: (c, h), in that order.
+        You can get the final state after each iteration during training, then
+        feed it to the initial state of next iteration.
+
+    initial_state : a tensor or StateTuple
+        It is the initial state of this RNN layer, you can use it to initialize
+        your state at the begining of each epoch or iteration according to your
+        training procedure.
+
+    Examples
+    --------
+    - For words
+    >>> input_data = tf.placeholder(tf.int32, [batch_size, num_steps])
+    >>> network = tl.layers.EmbeddingInputlayer(
+    ...                 inputs = input_data,
+    ...                 vocabulary_size = vocab_size,
+    ...                 embedding_size = hidden_size,
+    ...                 E_init = tf.random_uniform_initializer(-init_scale, init_scale),
+    ...                 name ='embedding_layer')
+    >>> if is_training:
+    >>>     network = tl.layers.DropoutLayer(network, keep=keep_prob, name='drop1')
+    >>> network = tl.layers.RNNLayer(network,
+    ...             cell_fn=tf.nn.rnn_cell.BasicLSTMCell,
+    ...             cell_init_args={'forget_bias': 0.0},# 'state_is_tuple': True},
+    ...             n_hidden=hidden_size,
+    ...             initializer=tf.random_uniform_initializer(-init_scale, init_scale),
+    ...             n_steps=num_steps,
+    ...             return_last=False,
+    ...             name='basic_lstm_layer1')
+    >>> lstm1 = network
+    >>> if is_training:
+    >>>     network = tl.layers.DropoutLayer(network, keep=keep_prob, name='drop2')
+    >>> network = tl.layers.RNNLayer(network,
+    ...             cell_fn=tf.nn.rnn_cell.BasicLSTMCell,
+    ...             cell_init_args={'forget_bias': 0.0}, # 'state_is_tuple': True},
+    ...             n_hidden=hidden_size,
+    ...             initializer=tf.random_uniform_initializer(-init_scale, init_scale),
+    ...             n_steps=num_steps,
+    ...             return_last=False,
+    ...             return_seq_2d=True,
+    ...             name='basic_lstm_layer2')
+    >>> lstm2 = network
+    >>> if is_training:
+    >>>     network = tl.layers.DropoutLayer(network, keep=keep_prob, name='drop3')
+    >>> network = tl.layers.DenseLayer(network,
+    ...             n_units=vocab_size,
+    ...             W_init=tf.random_uniform_initializer(-init_scale, init_scale),
+    ...             b_init=tf.random_uniform_initializer(-init_scale, init_scale),
+    ...             act = tl.activation.identity, name='output_layer')
+
+    - For CNN+LSTM
+    >>> x = tf.placeholder(tf.float32, shape=[batch_size, image_size, image_size, 1])
+    >>> network = tl.layers.InputLayer(x, name='input_layer')
+    >>> network = tl.layers.Conv2dLayer(network,
+    ...                         act = tf.nn.relu,
+    ...                         shape = [5, 5, 1, 32],  # 32 features for each 5x5 patch
+    ...                         strides=[1, 2, 2, 1],
+    ...                         padding='SAME',
+    ...                         name ='cnn_layer1')
+    >>> network = tl.layers.PoolLayer(network,
+    ...                         ksize=[1, 2, 2, 1],
+    ...                         strides=[1, 2, 2, 1],
+    ...                         padding='SAME',
+    ...                         pool = tf.nn.max_pool,
+    ...                         name ='pool_layer1')
+    >>> network = tl.layers.Conv2dLayer(network,
+    ...                         act = tf.nn.relu,
+    ...                         shape = [5, 5, 32, 10], # 10 features for each 5x5 patch
+    ...                         strides=[1, 2, 2, 1],
+    ...                         padding='SAME',
+    ...                         name ='cnn_layer2')
+    >>> network = tl.layers.PoolLayer(network,
+    ...                         ksize=[1, 2, 2, 1],
+    ...                         strides=[1, 2, 2, 1],
+    ...                         padding='SAME',
+    ...                         pool = tf.nn.max_pool,
+    ...                         name ='pool_layer2')
+    >>> network = tl.layers.FlattenLayer(network, name='flatten_layer')
+    >>> network = tl.layers.ReshapeLayer(network, shape=[-1, num_steps, int(network.outputs._shape[-1])])
+    >>> rnn1 = tl.layers.RNNLayer(network,
+    ...                         cell_fn=tf.nn.rnn_cell.LSTMCell,
+    ...                         cell_init_args={},
+    ...                         n_hidden=200,
+    ...                         initializer=tf.random_uniform_initializer(-0.1, 0.1),
+    ...                         n_steps=num_steps,
+    ...                         return_last=False,
+    ...                         return_seq_2d=True,
+    ...                         name='rnn_layer')
+    >>> network = tl.layers.DenseLayer(rnn1, n_units=3,
+    ...                         act = tl.activation.identity, name='output_layer')
+
+    Notes
+    -----
+    Input dimension should be rank 3 : [batch_size, n_steps(max), n_features], if no, please see :class:`ReshapeLayer`.
+
+    References
+    ----------
+    - `Neural Network RNN Cells in TensorFlow <https://www.tensorflow.org/versions/master/api_docs/python/rnn_cell.html>`_
+    - `tensorflow/python/ops/rnn.py <https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/rnn.py>`_
+    - `tensorflow/python/ops/rnn_cell.py <https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/rnn_cell.py>`_
+    - see TensorFlow tutorial ``ptb_word_lm.py``, TensorLayer tutorials ``tutorial_ptb_lstm*.py`` and ``tutorial_generate_text.py``
+    """
+    def __init__(
+        self,
+        layer = None,
+        cell_fn = tf.nn.rnn_cell.BasicRNNCell,
+        cell_init_args = {},
+        n_hidden = 100,
+        initializer = tf.random_uniform_initializer(-0.1, 0.1),
+        n_steps = 5,
+        initial_state = None,
+        return_last = False,
+        # is_reshape = True,
+        return_seq_2d = False,
+        name = 'rnn_layer',
+    ):
+        Layer.__init__(self, name=name)
+        self.inputs = layer.outputs
+
+        print("  tensorlayer:Instantiate RNNLayer %s: n_hidden:%d, n_steps:%d, in_dim:%d %s, cell_fn:%s " % (self.name, n_hidden,
+            n_steps, self.inputs.get_shape().ndims, self.inputs.get_shape(), cell_fn.__name__))
+        # You can get the dimension by .get_shape() or ._shape, and check the
+        # dimension by .with_rank() as follow.
+        # self.inputs.get_shape().with_rank(2)
+        # self.inputs.get_shape().with_rank(3)
+
+        # Input dimension should be rank 3 [batch_size, n_steps(max), n_features]
+        try:
+            self.inputs.get_shape().with_rank(3)
+        except:
+            raise Exception("RNN : Input dimension should be rank 3 : [batch_size, n_steps(max), n_features]")
+
+
+        # is_reshape : boolen (deprecate)
+        #     Reshape the inputs to 3 dimension tensor.\n
+        #     If input is［batch_size, n_steps, n_features], we do not need to reshape it.\n
+        #     If input is [batch_size * n_steps, n_features], we need to reshape it.
+        # if is_reshape:
+        #     self.inputs = tf.reshape(self.inputs, shape=[-1, n_steps, int(self.inputs._shape[-1])])
+
+        fixed_batch_size = self.inputs.get_shape().with_rank_at_least(1)[0]
+
+        if fixed_batch_size.value:
+            batch_size = fixed_batch_size.value
+            print("     RNN batch_size (concurrent processes): %d" % batch_size)
+        else:
+            from tensorflow.python.ops import array_ops
+            batch_size = array_ops.shape(self.inputs)[0]
+            print("     non specified batch_size, uses a tensor instead.")
+        self.batch_size = batch_size
+
+        # Simplified version of tensorflow.models.rnn.rnn.py's rnn().
+        # This builds an unrolled LSTM for tutorial purposes only.
+        # In general, use the rnn() or state_saving_rnn() from rnn.py.
+        #
+        # The alternative version of the code below is:
+        #
+        # from tensorflow.models.rnn import rnn
+        # inputs = [tf.squeeze(input_, [1])
+        #           for input_ in tf.split(1, num_steps, inputs)]
+        # outputs, state = rnn.rnn(cell, inputs, initial_state=self._initial_state)
+        outputs = []
+        self.cell = cell = cell_fn(num_units=n_hidden, **cell_init_args)
+        if initial_state is None:
+            self.initial_state = cell.zero_state(batch_size, dtype=tf.float32)  # 1.2.3
+        state = self.initial_state
+        # with tf.variable_scope("model", reuse=None, initializer=initializer):
+        with tf.variable_scope(name, initializer=initializer) as vs:
+            for time_step in range(n_steps):
+                if time_step > 0: tf.get_variable_scope().reuse_variables()
+                (cell_output, state) = cell(self.inputs[:, time_step, :], state)
+                outputs.append(cell_output)
+
+            # Retrieve just the RNN variables.
+            # rnn_variables = [v for v in tf.all_variables() if v.name.startswith(vs.name)]
+            rnn_variables = tf.get_collection(tf.GraphKeys.VARIABLES, scope=vs.name)
+
+        print("     n_params : %d" % (len(rnn_variables)))
+
+        if return_last:
+            # 2D Tensor [batch_size, n_hidden]
+            self.outputs = outputs[-1]
+        else:
+            if return_seq_2d:
+                # PTB tutorial: stack dense layer after that, or compute the cost from the output
+                # 2D Tensor [n_example, n_hidden]
+                self.outputs = tf.reshape(tf.concat(1, outputs), [-1, n_hidden])
+            else:
+                # <akara>: stack more RNN layer after that
+                # 3D Tensor [n_example/n_steps, n_steps, n_hidden]
+                self.outputs = tf.reshape(tf.concat(1, outputs), [-1, n_steps, n_hidden])
+
+        self.final_state = state
+
+        self.all_layers = list(layer.all_layers)
+        self.all_params = list(layer.all_params)
+        self.all_drop = dict(layer.all_drop)
+        # print(type(self.outputs))
+        self.all_layers.extend( [self.outputs] )
+        self.all_params.extend( rnn_variables )
+
+# Dynamic RNN
+
+def advanced_indexing_op(input, index):
+    """ Advanced Indexing for Sequences. see TFlearn."""
+    batch_size = tf.shape(input)[0]
+    max_length = int(input.get_shape()[1])
+    dim_size = int(input.get_shape()[2])
+    index = tf.range(0, batch_size) * max_length + (index - 1)
+    flat = tf.reshape(input, [-1, dim_size])
+    relevant = tf.gather(flat, index)
+    return relevant
+
+def retrieve_seq_length_op(data):
+    """ An op to compute the length of a sequence. 0 are masked. see TFlearn."""
+    with tf.name_scope('GetLength'):
+        used = tf.sign(tf.reduce_max(tf.abs(data), reduction_indices=2))
+        length = tf.reduce_sum(used, reduction_indices=1)
+        length = tf.cast(length, tf.int32)
+    return length
+
+class DynamicRNNLayer(Layer):
+    """
+    The :class:`DynamicRNNLayer` class is a Dynamic RNN layer, see ``tf.nn.dynamic_rnn``.
+
+    Parameters
+    ----------
+    layer : a :class:`Layer` instance
+        The `Layer` class feeding into this layer.
+    cell_fn : a TensorFlow's core RNN cell as follow.
+        - see `RNN Cells in TensorFlow <https://www.tensorflow.org/versions/master/api_docs/python/rnn_cell.html>`_
+        - class ``tf.nn.rnn_cell.BasicRNNCell``
+        - class ``tf.nn.rnn_cell.BasicLSTMCell``
+        - class ``tf.nn.rnn_cell.GRUCell``
+        - class ``tf.nn.rnn_cell.LSTMCell``
+    cell_init_args : a dictionary
+        The arguments for the cell initializer.
+    n_hidden : a int
+        The number of hidden units in the layer.
+    initializer : initializer
+        The initializer for initializing the parameters.
+    sequence_length : a tensor, array or None
+        The sequence length of each row of input data. If None, automatically calculate the sequence length for the data.
+    initial_state : None or RNN State
+        If None, initial_state is zero_state.
+    dropout : `tuple` of `float`: (input_keep_prob, output_keep_prob).
+        The input and output keep probability.
+    n_layer : a int, default is 1.
+        The number of RNN layers.
+    return_last : boolen
+        - If True, return the last output, "Sequence input and single output"
+        - If False, return all outputs, "Synced sequence input and output"
+        - In other word, if you want to apply one or more RNN(s) on this layer, set to False.
+    return_seq_2d : boolen
+        - When return_last = False
+        - If True, return 2D Tensor [n_example, n_hidden], for stacking DenseLayer or computing cost after it.
+        - If False, return 3D Tensor [n_example/n_steps(max), n_steps(max), n_hidden], for stacking multiple RNN after it.
+    name : a string or None
+        An optional name to attach to this layer.
+
+    Variables
+    ------------
+    outputs : a tensor
+        The output of this RNN.
+        return_last = False, outputs = all cell_output, which is the hidden state.
+            cell_output.get_shape() = (?, n_hidden)
+
+    final_state : a tensor or StateTuple
+        When state_is_tuple = False,
+        it is the final hidden and cell states, states.get_shape() = [?, 2 * n_hidden].\n
+        When state_is_tuple = True, it stores two elements: (c, h), in that order.
+        You can get the final state after each iteration during training, then
+        feed it to the initial state of next iteration.
+
+    initial_state : a tensor or StateTuple
+        It is the initial state of this RNN layer, you can use it to initialize
+        your state at the begining of each epoch or iteration according to your
+        training procedure.
+
+    Notes
+    -----
+    Input dimension should be rank 3 : [batch_size, n_steps(max), n_features], if no, please see :class:`ReshapeLayer`.
+
+    Examples
+    --------
+    >>> input_feed = tf.placeholder(dtype=tf.int64,
+    ...                              shape=[None],  # word id
+    ...                              name="input_feed")
+    >>> input_seqs = tf.expand_dims(input_feed, 1)
+    >>> network = tl.layers.EmbeddingInputlayer(
+    ...             inputs = input_seqs,
+    ...             vocabulary_size = vocab_size,
+    ...             embedding_size = embedding_size,
+    ...             name = 'seq_embedding')
+    >>> network = tl.layers.DynamicRNNLayer(network,
+    ...             cell_fn = tf.nn.rnn_cell.BasicLSTMCell,
+    ...             n_hidden = embedding_size,
+    ...             dropout = 0.7,
+    ...             return_seq_2d = True,     # stack denselayer or compute cost after it
+    ...             name = 'dynamic_rnn',)
+    ... network = tl.layers.DenseLayer(network, n_units=vocab_size,
+    ...             act=tf.identity, name="output")
+
+    References
+    ----------
+    - `Wild-ML Blog <http://www.wildml.com/2016/08/rnns-in-tensorflow-a-practical-guide-and-undocumented-features/>`_
+    - `dynamic_rnn.ipynb <https://github.com/dennybritz/tf-rnn/blob/master/dynamic_rnn.ipynb>`_
+    - `tf.nn.dynamic_rnn <https://github.com/tensorflow/tensorflow/blob/master/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.nn.dynamic_rnn.md>`_
+    - `tflearn rnn <https://github.com/tflearn/tflearn/blob/master/tflearn/layers/recurrent.py>`_
+    - ``tutorial_dynamic_rnn.py``
+    """
+    def __init__(
+        self,
+        layer = None,
+        cell_fn = tf.nn.rnn_cell.LSTMCell,
+        cell_init_args = {'state_is_tuple' : True},
+        n_hidden = 64,
+        initializer = tf.random_uniform_initializer(-0.1, 0.1),
+        sequence_length = None,
+        initial_state = None,
+        dropout = None,
+        n_layer = 1,
+        return_last = False,
+        return_seq_2d = False,
+        name = 'dyrnn_layer',
+    ):
+        Layer.__init__(self, name=name)
+        self.inputs = layer.outputs
+
+        print("  tensorlayer:Instantiate DynamicRNNLayer %s: n_hidden:%d, in_dim:%d %s, cell_fn:%s " % (self.name, n_hidden,
+             self.inputs.get_shape().ndims, self.inputs.get_shape(), cell_fn.__name__))
+
+        # Input dimension should be rank 3 [batch_size, n_steps(max), n_features]
+        try:
+            self.inputs.get_shape().with_rank(3)
+        except:
+            raise Exception("RNN : Input dimension should be rank 3 : [batch_size, n_steps(max), n_features]")
+
+        # Get the batch_size
+        fixed_batch_size = self.inputs.get_shape().with_rank_at_least(1)[0]
+        if fixed_batch_size.value:
+            batch_size = fixed_batch_size.value
+            print("     batch_size (concurrent processes): %d" % batch_size)
+        else:
+            from tensorflow.python.ops import array_ops
+            batch_size = array_ops.shape(self.inputs)[0]
+            print("     non specified batch_size, uses a tensor instead.")
+        self.batch_size = batch_size
+
+        # Creats the cell function
+        self.cell = cell_fn(num_units=n_hidden, **cell_init_args)
+
+        # Apply dropout
+        if dropout:
+            if type(dropout) in [tuple, list]:
+                in_keep_prob = dropout[0]
+                out_keep_prob = dropout[1]
+            elif isinstance(dropout, float):
+                in_keep_prob, out_keep_prob = dropout, dropout
+            else:
+                raise Exception("Invalid dropout type (must be a 2-D tuple of "
+                                "float)")
+            self.cell = tf.nn.rnn_cell.DropoutWrapper(
+                      self.cell,
+                      input_keep_prob=in_keep_prob,
+                      output_keep_prob=out_keep_prob)
+        # Apply multiple layers
+        if n_layer > 1:
+            print("     n_layer: %d" % n_layer)
+            self.cell = tf.nn.rnn_cell.MultiRNNCell([self.cell] * n_layer, state_is_tuple=True)
+
+        # Initialize initial_state
+        if initial_state is None:
+            self.initial_state = self.cell.zero_state(batch_size, dtype=tf.float32)#dtype="float")
+        else:
+            self.initial_state = initial_state
+
+        # Computes sequence_length
+        if sequence_length is None:
+            sequence_length = retrieve_seq_length_op(
+                        self.inputs if isinstance(self.inputs, tf.Tensor) else tf.pack(self.inputs))
+        # print('sequence_length',sequence_length)
+
+        # Main - Computes outputs and last_states
+        with tf.variable_scope(name, initializer=initializer) as vs:
+            outputs, last_states = tf.nn.dynamic_rnn(
+                cell=self.cell,
+                # inputs=X
+                inputs = self.inputs,
+                # dtype=tf.float64,
+                sequence_length=sequence_length,
+                initial_state = self.initial_state,
+                )
+            # result = tf.contrib.learn.run_n(
+            #     {"outputs": outputs, "last_states": last_states}, n=1, feed_dict=None)
+            rnn_variables = tf.get_collection(tf.GraphKeys.VARIABLES, scope=vs.name)
+
+        print("     n_params : %d" % (len(rnn_variables)))
+        # exit()
+        # Manage the outputs
+        if return_last:
+            # [batch_size, n_hidden]
+            # outputs = tf.transpose(tf.pack(result[0]["outputs"]), [1, 0, 2])
+            outputs = tf.transpose(tf.pack(outputs), [1, 0, 2])
+            self.outputs = advanced_indexing_op(outputs, sequence_length)
+        else:
+            # [batch_size, n_step(max), n_hidden]
+            # self.outputs = result[0]["outputs"]
+            self.outputs = outputs
+            if return_seq_2d:
+                # PTB tutorial:
+                # 2D Tensor [n_example, n_hidden]
+                self.outputs = tf.reshape(tf.concat(1, self.outputs), [-1, n_hidden])
+            # else:
+                # <akara>:
+                # 3D Tensor [batch_size, n_steps, n_hidden]
+                # self.outputs = tf.reshape(tf.concat(1, self.outputs), [-1, n_steps, n_hidden])
+
+
+        # Final state
+        # self.final_state = result[0]["last_states"]
+        self.final_state = last_states
+        # print(self.final_state)
+        # exit()
+
+        self.all_layers = list(layer.all_layers)
+        self.all_params = list(layer.all_params)
+        self.all_drop = dict(layer.all_drop)
+
+        self.all_layers.extend( [self.outputs] )
+        self.all_params.extend( rnn_variables )
+
+# Bidirectional Dynamic RNN
+class BiDynamicRNNLayer(Layer):
+    """
+    The :class:`BiDynamicRNNLayer` class is a RNN layer, you can implement vanilla RNN,
+    LSTM and GRU with it.
+
+    Parameters
+    ----------
+    layer : a :class:`Layer` instance
+        The `Layer` class feeding into this layer.
+    cell_fn : a TensorFlow's core RNN cell as follow.
+        - see `RNN Cells in TensorFlow <https://www.tensorflow.org/versions/master/api_docs/python/rnn_cell.html>`_\n
+        - class ``tf.nn.rnn_cell.BasicRNNCell``
+        - class ``tf.nn.rnn_cell.BasicLSTMCell``
+        - class ``tf.nn.rnn_cell.GRUCell``
+        - class ``tf.nn.rnn_cell.LSTMCell``
+    cell_init_args : a dictionary
+        The arguments for the cell initializer.
+    n_hidden : a int
+        The number of hidden units in the layer.
+    n_steps : a int
+        The sequence length.
+    return_last : boolen
+        If True, return the last output, "Sequence input and single output"\n
+        If False, return all outputs, "Synced sequence input and output"\n
+        In other word, if you want to apply one or more RNN(s) on this layer, set to False.
+    return_seq_2d : boolen
+        When return_last = False\n
+            if True, return 2D Tensor [n_example, n_hidden], for stacking DenseLayer after it.
+            if False, return 3D Tensor [n_example/n_steps, n_steps, n_hidden], for stacking multiple RNN after it.
+    name : a string or None
+        An optional name to attach to this layer.
+
+    Variables
+    -----------------------
+    outputs : a tensor
+        The output of this RNN.
+        return_last = False, outputs = all cell_output, which is the hidden state.
+            cell_output.get_shape() = (?, n_hidden)
+
+    final_state : a tensor or StateTuple
+        When state_is_tuple = False,
+        it is the final hidden and cell states, states.get_shape() = [?, 2 * n_hidden].\n
+        When state_is_tuple = True, it stores two elements: (c, h), in that order.
+        You can get the final state after each iteration during training, then
+        feed it to the initial state of next iteration.
+
+    initial_state : a tensor or StateTuple
+        It is the initial state of this RNN layer, you can use it to initialize
+        your state at the begining of each epoch or iteration according to your
+        training procedure.
+
+    Notes
+    -----
+    Input dimension should be rank 3 : [batch_size, n_steps(max), n_features], if no, please see :class:`ReshapeLayer`.
+
+
+    References
+    ----------
+    - `Wild-ML Blog <http://www.wildml.com/2016/08/rnns-in-tensorflow-a-practical-guide-and-undocumented-features/>`_
+    - `bidirectional_rnn.ipynb <https://github.com/dennybritz/tf-rnn/blob/master/bidirectional_rnn.ipynb>`_
+    """
+    def __init__(
+        self,
+        layer = None,
+        cell_fn = tf.nn.rnn_cell.LSTMCell,
+        cell_init_args = {'state_is_tuple' : True},
+        n_hidden = 64,
+        initializer = tf.random_uniform_initializer(-0.1, 0.1),
+        # n_steps = 5,
+        return_last = False,
+        # is_reshape = True,
+        return_seq_2d = False,
+        name = 'birnn_layer',
+    ):
+        Layer.__init__(self, name=name)
+        self.inputs = layer.outputs
+
+        print("  tensorlayer:Instantiate BiDynamicRNNLayer %s: n_hidden:%d, n_steps:%d, in_dim:%d %s, cell_fn:%s " % (self.name, n_hidden,
+            n_steps, self.inputs.get_shape().ndims, self.inputs.get_shape(), cell_fn.__name__))
+        print("     Untested !!!")
+
+        self.cell = cell = cell_fn(num_units=n_hidden, **cell_init_args)
+        # self.initial_state = cell.zero_state(batch_size, dtype=tf.float32)
+        # state = self.initial_state
+
+        with tf.variable_scope(name, initializer=initializer) as vs:
+            outputs, states  = tf.nn.bidirectional_dynamic_rnn(
+                    cell_fw=cell,
+                    cell_bw=cell,
+                    dtype=tf.float64,
+                    sequence_length=X_lengths,
+                    inputs=X)
+
+            output_fw, output_bw = outputs
+            states_fw, states_bw = states
+
+            result = tf.contrib.learn.run_n(
+                {"output_fw": output_fw, "output_bw": output_bw, "states_fw": states_fw, "states_bw": states_bw},
+                n=1,
+                feed_dict=None)
+            rnn_variables = tf.get_collection(tf.GraphKeys.VARIABLES, scope=vs.name)
+
+        print("     n_params : %d" % (len(rnn_variables)))
+
+        if return_last:
+            # 2D Tensor [batch_size, n_hidden]
+            self.outputs = output_fw
+        else:
+            if return_seq_2d:
+                # PTB tutorial:
+                # 2D Tensor [n_example, n_hidden]
+                self.outputs = tf.reshape(tf.concat(1, output_fw), [-1, n_hidden])
+            else:
+                # <akara>:
+                # 3D Tensor [n_example/n_steps, n_steps, n_hidden]
+                self.outputs = tf.reshape(tf.concat(1, output_fw), [-1, n_steps, n_hidden])
+
+        self.final_state = state
+
+        self.all_layers = list(layer.all_layers)
+        self.all_params = list(layer.all_params)
+        self.all_drop = dict(layer.all_drop)
+
+        self.all_layers.extend( [self.outputs] )
+        self.all_params.extend( rnn_variables )
+
+
+
+
+
+## Shape layer
+class FlattenLayer(Layer):
+    """
+    The :class:`FlattenLayer` class is layer which reshape high-dimension
+    input to a vector. Then we can apply DenseLayer, RNNLayer, ConcatLayer and
+    etc on the top of it.
+
+    [batch_size, mask_row, mask_col, n_mask] ---> [batch_size, mask_row * mask_col * n_mask]
+
+    Parameters
+    ----------
+    layer : a :class:`Layer` instance
+        The `Layer` class feeding into this layer.
+    name : a string or None
+        An optional name to attach to this layer.
+
+    Examples
+    --------
+    >>> x = tf.placeholder(tf.float32, shape=[None, 28, 28, 1])
+    >>> network = tl.layers.InputLayer(x, name='input_layer')
+    >>> network = tl.layers.Conv2dLayer(network,
+    ...                    act = tf.nn.relu,
+    ...                    shape = [5, 5, 32, 64],
+    ...                    strides=[1, 1, 1, 1],
+    ...                    padding='SAME',
+    ...                    name ='cnn_layer')
+    >>> network = tl.layers.Pool2dLayer(network,
+    ...                    ksize=[1, 2, 2, 1],
+    ...                    strides=[1, 2, 2, 1],
+    ...                    padding='SAME',
+    ...                    pool = tf.nn.max_pool,
+    ...                    name ='pool_layer',)
+    >>> network = tl.layers.FlattenLayer(network, name='flatten_layer')
+    """
+    def __init__(
+        self,
+        layer = None,
+        name ='flatten_layer',
+    ):
+        Layer.__init__(self, name=name)
+        self.inputs = layer.outputs
+        self.outputs = flatten_reshape(self.inputs, name=name)
+        self.n_units = int(self.outputs._shape[-1])
+        print("  tensorlayer:Instantiate FlattenLayer %s: %d" % (self.name, self.n_units))
+        self.all_layers = list(layer.all_layers)
+        self.all_params = list(layer.all_params)
+        self.all_drop = dict(layer.all_drop)
+        self.all_layers.extend( [self.outputs] )
+
+class ConcatLayer(Layer):
+    """
+    The :class:`ConcatLayer` class is layer which concat (merge) two or more
+    :class:`DenseLayer` to a single class:`DenseLayer`.
+
+    Parameters
+    ----------
+    layer : a list of :class:`Layer` instances
+        The `Layer` class feeding into this layer.
+    concat_dim : int
+        Dimension along which to concatenate.
+    name : a string or None
+        An optional name to attach to this layer.
+
+    Examples
+    --------
+    >>> sess = tf.InteractiveSession()
+    >>> x = tf.placeholder(tf.float32, shape=[None, 784])
+    >>> inputs = tl.layers.InputLayer(x, name='input_layer')
+    >>> net1 = tl.layers.DenseLayer(inputs, n_units=800, act = tf.nn.relu, name='relu1_1')
+    >>> net2 = tl.layers.DenseLayer(inputs, n_units=300, act = tf.nn.relu, name='relu2_1')
+    >>> network = tl.layers.ConcatLayer(layer = [net1, net2], name ='concat_layer')
+    ...     tensorlayer:Instantiate InputLayer input_layer (?, 784)
+    ...     tensorlayer:Instantiate DenseLayer relu1_1: 800, <function relu at 0x1108e41e0>
+    ...     tensorlayer:Instantiate DenseLayer relu2_1: 300, <function relu at 0x1108e41e0>
+    ...     tensorlayer:Instantiate ConcatLayer concat_layer, 1100
+    ...
+    >>> sess.run(tf.initialize_all_variables())
+    >>> network.print_params()
+    ...     param 0: (784, 800) (mean: 0.000021, median: -0.000020 std: 0.035525)
+    ...     param 1: (800,) (mean: 0.000000, median: 0.000000 std: 0.000000)
+    ...     param 2: (784, 300) (mean: 0.000000, median: -0.000048 std: 0.042947)
+    ...     param 3: (300,) (mean: 0.000000, median: 0.000000 std: 0.000000)
+    ...     num of params: 863500
+    >>> network.print_layers()
+    ...     layer 0: Tensor("Relu:0", shape=(?, 800), dtype=float32)
+    ...     layer 1: Tensor("Relu_1:0", shape=(?, 300), dtype=float32)
+    ...
+    """
+    def __init__(
+        self,
+        layer = [],
+        concat_dim = 1,
+        name ='concat_layer',
+    ):
+        Layer.__init__(self, name=name)
+        self.inputs = []
+        for l in layer:
+            self.inputs.append(l.outputs)
+        self.outputs = tf.concat(concat_dim, self.inputs, name=name) # 1.2
+        self.n_units = int(self.outputs._shape[-1])
+        print("  tensorlayer:Instantiate ConcatLayer %s, %d" % (self.name, self.n_units))
+
+        self.all_layers = list(layer[0].all_layers)
+        self.all_params = list(layer[0].all_params)
+        self.all_drop = dict(layer[0].all_drop)
+
+        for i in range(1, len(layer)):
+            self.all_layers.extend(list(layer[i].all_layers))
+            self.all_params.extend(list(layer[i].all_params))
+            self.all_drop.update(dict(layer[i].all_drop))
+
+
+class ReshapeLayer(Layer):
+    """
+    The :class:`ReshapeLayer` class is layer which reshape the tensor.
+
+    Parameters
+    ----------
+    layer : a :class:`Layer` instance
+        The `Layer` class feeding into this layer.
+    shape : a list
+        The output shape.
+    name : a string or None
+        An optional name to attach to this layer.
+
+    Examples
+    --------
+    - The core of this layer is ``tf.reshape``.
+    - Use TensorFlow only :
+    >>> x = tf.placeholder(tf.float32, shape=[None, 3])
+    >>> y = tf.reshape(x, shape=[-1, 3, 3])
+    >>> sess = tf.InteractiveSession()
+    >>> print(sess.run(y, feed_dict={x:[[1,1,1],[2,2,2],[3,3,3],[4,4,4],[5,5,5],[6,6,6]]}))
+    ... [[[ 1.  1.  1.]
+    ... [ 2.  2.  2.]
+    ... [ 3.  3.  3.]]
+    ... [[ 4.  4.  4.]
+    ... [ 5.  5.  5.]
+    ... [ 6.  6.  6.]]]
+    """
+    def __init__(
+        self,
+        layer = None,
+        shape = [],
+        name ='reshape_layer',
+    ):
+        Layer.__init__(self, name=name)
+        self.inputs = layer.outputs
+        self.outputs = tf.reshape(self.inputs, shape=shape, name=name)
+        print("  tensorlayer:Instantiate ReshapeLayer %s: %s" % (self.name, self.outputs._shape))
+        self.all_layers = list(layer.all_layers)
+        self.all_params = list(layer.all_params)
+        self.all_drop = dict(layer.all_drop)
+        self.all_layers.extend( [self.outputs] )
+
+## TF-Slim layer
+class SlimNetsLayer(Layer):
+    """
+    The :class:`SlimNetsLayer` class can be used to merge all TF-Slim nets into
+    TensorLayer. Model can be found in `slim-model <https://github.com/tensorflow/models/tree/master/slim#Install>`_ , more about slim
+    see `slim-git <https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/slim>`_ .
+
+    Parameters
+    ----------
+    layer : a list of :class:`Layer` instances
+        The `Layer` class feeding into this layer.
+    slim_layer : a slim network function
+        The network you want to stack onto, end with ``return net, end_points``.
+    name : a string or None
+        An optional name to attach to this layer.
+
+    Notes
+    -----
+    The due to TF-Slim stores the layers as dictionary, the ``all_layers`` in this
+    network is not in order ! Fortunately, the ``all_params`` are in order.
+    """
+    def __init__(
+        self,
+        layer = None,
+        slim_layer = None,
+        slim_args = {},
+        name ='slim_layer',
+    ):
+        Layer.__init__(self, name=name)
+        self.inputs = layer.outputs
+        print("  tensorlayer:Instantiate SlimNetsLayer %s: %s" % (self.name, slim_layer.__name__))
+
+        with tf.variable_scope(name) as vs:
+            net, end_points = slim_layer(self.inputs, **slim_args)
+            slim_variables = tf.get_collection(tf.GraphKeys.VARIABLES, scope=vs.name)
+
+        self.outputs = net
+
+        slim_layers = []
+        for v in end_points.values():
+            # tf.contrib.layers.summaries.summarize_activation(v)
+            slim_layers.append(v)
+
+        self.all_layers = list(layer.all_layers)
+        self.all_params = list(layer.all_params)
+        self.all_drop = dict(layer.all_drop)
+
+        self.all_layers.extend( slim_layers )
+        self.all_params.extend( slim_variables )
+
+## Special activation
+class PReluLayer(Layer):
+    """
+    The :class:`PReluLayer` class is Parametric Rectified Linear layer.
+
+    Parameters
+    ----------
+    x : A `Tensor` with type `float`, `double`, `int32`, `int64`, `uint8`,
+        `int16`, or `int8`.
+    channel_shared : `bool`. Single weight is shared by all channels
+    W_init: weights initializer, default zero constant.
+        The initializer for initializing the alphas.
+    restore : `bool`. Restore or not alphas
+    name : A name for this activation op (optional).
+
+    References
+    -----------
+    - `Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification <http://arxiv.org/pdf/1502.01852v1.pdf>`_
+    """
+    def __init__(
+        self,
+        layer = None,
+        channel_shared = False,
+        W_init = tf.constant_initializer(value=0.0),
+        W_init_args = {},
+        restore = True,
+        name="prelu_layer"
+    ):
+        Layer.__init__(self, name=name)
+        self.inputs = layer.outputs
+        print("  tensorlayer:Instantiate PReluLayer %s: %s" % (self.name, channel_shared))
+        print('     [Warning] prelu: untested !!!')
+        if channel_shared:
+            w_shape = (1,)
+        else:
+            w_shape = int(self.inputs._shape[-1:])
+
+        with tf.name_scope(name) as scope:
+            # W_init = initializations.get(weights_init)()
+            alphas = tf.get_variable(name='alphas', shape=w_shape, initializer=W_init, **W_init_args )
+            self.outputs = tf.nn.relu(self.inputs) + tf.mul(alphas, (self.inputs - tf.abs(self.inputs))) * 0.5
+
+        self.all_layers = list(layer.all_layers)
+        self.all_params = list(layer.all_params)
+        self.all_drop = dict(layer.all_drop)
+
+        self.all_layers.extend( self.outputs )
+        self.all_params.extend( [alphas] )
+
+
+## Flow control layer
+class MultiplexerLayer(Layer):
+    """
+    The :class:`MultiplexerLayer` selects one of several input and forwards the selected input into the output,
+    see `tutorial_mnist_multiplexer.py`.
+
+    Parameters
+    ----------
+    layer : a list of :class:`Layer` instances
+        The `Layer` class feeding into this layer.
+    name : a string or None
+        An optional name to attach to this layer.
+
+
+    Variables
+    -----------------------
+    sel : a placeholder
+        Input an int [0, inf], which input is the output
+
+    Examples
+    --------
+    >>> x = tf.placeholder(tf.float32, shape=[None, 784], name='x')
+    >>> y_ = tf.placeholder(tf.int64, shape=[None, ], name='y_')
+    >>> # define the network
+    >>> net_in = tl.layers.InputLayer(x, name='input_layer')
+    >>> net_in = tl.layers.DropoutLayer(net_in, keep=0.8, name='drop1')
+    >>> # net 0
+    >>> net_0 = tl.layers.DenseLayer(net_in, n_units=800,
+    ...                                act = tf.nn.relu, name='net0/relu1')
+    >>> net_0 = tl.layers.DropoutLayer(net_0, keep=0.5, name='net0/drop2')
+    >>> net_0 = tl.layers.DenseLayer(net_0, n_units=800,
+    ...                                act = tf.nn.relu, name='net0/relu2')
+    >>> # net 1
+    >>> net_1 = tl.layers.DenseLayer(net_in, n_units=800,
+    ...                                act = tf.nn.relu, name='net1/relu1')
+    >>> net_1 = tl.layers.DropoutLayer(net_1, keep=0.8, name='net1/drop2')
+    >>> net_1 = tl.layers.DenseLayer(net_1, n_units=800,
+    ...                                act = tf.nn.relu, name='net1/relu2')
+    >>> net_1 = tl.layers.DropoutLayer(net_1, keep=0.8, name='net1/drop3')
+    >>> net_1 = tl.layers.DenseLayer(net_1, n_units=800,
+    ...                                act = tf.nn.relu, name='net1/relu3')
+    >>> # multiplexer
+    >>> net_mux = tl.layers.MultiplexerLayer(layer = [net_0, net_1], name='mux_layer')
+    >>> network = tl.layers.ReshapeLayer(net_mux, shape=[-1, 800], name='reshape_layer') #
+    >>> network = tl.layers.DropoutLayer(network, keep=0.5, name='drop3')
+    >>> # output layer
+    >>> network = tl.layers.DenseLayer(network, n_units=10,
+    ...                                act = tf.identity, name='output_layer')
+
+    References
+    ------------
+    - See ``tf.pack()`` and ``tf.gather()`` at `TensorFlow - Slicing and Joining <https://www.tensorflow.org/versions/master/api_docs/python/array_ops.html#slicing-and-joining>`_
+    """
+    def __init__(self,
+               layer = [],
+               name='mux_layer'):
+        Layer.__init__(self, name=name)
+        self.n_inputs = len(layer)
+
+        self.inputs = []
+        for l in layer:
+            self.inputs.append(l.outputs)
+        all_inputs = tf.pack(self.inputs, name=name) # pack means concat a list of tensor in a new dim  # 1.2
+
+        print("  tensorlayer:Instantiate MultiplexerLayer %s: n_inputs: %d" % (self.name, self.n_inputs))
+
+        self.sel = tf.placeholder(tf.int32)
+        self.outputs = tf.gather(all_inputs, self.sel, name=name) # [sel, :, : ...] # 1.2
+
+        # print(self.outputs, vars(self.outputs))
+        #         # tf.reshape(self.outputs, shape=)
+        # exit()
+        # the same with ConcatLayer
+        self.all_layers = list(layer[0].all_layers)
+        self.all_params = list(layer[0].all_params)
+        self.all_drop = dict(layer[0].all_drop)
+
+        for i in range(1, len(layer)):
+            self.all_layers.extend(list(layer[i].all_layers))
+            self.all_params.extend(list(layer[i].all_params))
+            self.all_drop.update(dict(layer[i].all_drop))
+
+## We can Duplicate the network instead of DemultiplexerLayer
+# class DemultiplexerLayer(Layer):
+#     """
+#     The :class:`DemultiplexerLayer` takes a single input and select one of many output lines, which is connected to the input.
+#
+#     Parameters
+#     ----------
+#     layer : a list of :class:`Layer` instances
+#         The `Layer` class feeding into this layer.
+#     n_outputs : a int
+#         The number of output
+#     name : a string or None
+#         An optional name to attach to this layer.
+#
+#     Field (Class Variables)
+#     -----------------------
+#     sel : a placeholder
+#         Input int [0, inf], the
+#     outputs : a list of Tensor
+#         A list of outputs
+#
+#     Examples
+#     --------
+#     >>>
+#     """
+#     def __init__(self,
+#            layer = None,
+#            name='demux_layer'):
+#         Layer.__init__(self, name=name)
+#         self.outputs = []
+
+## Wrapper
+class EmbeddingAttentionSeq2seqWrapper(Layer):
+  """Sequence-to-sequence model with attention and for multiple buckets.
+
+    This example implements a multi-layer recurrent neural network as encoder,
+    and an attention-based decoder. This is the same as the model described in
+    this paper:
+    - `Grammar as a Foreign Language <http://arxiv.org/abs/1412.7449>`_
+    please look there for details,
+    or into the seq2seq library for complete model implementation.
+    This example also allows to use GRU cells in addition to LSTM cells, and
+    sampled softmax to handle large output vocabulary size. A single-layer
+    version of this model, but with bi-directional encoder, was presented in
+    - `Neural Machine Translation by Jointly Learning to Align and Translate <http://arxiv.org/abs/1409.0473>`_
+    The sampled softmax is described in Section 3 of the following paper.
+    - `On Using Very Large Target Vocabulary for Neural Machine Translation <http://arxiv.org/abs/1412.2007>`_
+
+    Parameters
+    ----------
+    source_vocab_size : size of the source vocabulary.
+    target_vocab_size : size of the target vocabulary.
+    buckets : a list of pairs (I, O), where I specifies maximum input length
+        that will be processed in that bucket, and O specifies maximum output
+        length. Training instances that have inputs longer than I or outputs
+        longer than O will be pushed to the next bucket and padded accordingly.
+        We assume that the list is sorted, e.g., [(2, 4), (8, 16)].
+    size : number of units in each layer of the model.
+    num_layers : number of layers in the model.
+    max_gradient_norm : gradients will be clipped to maximally this norm.
+    batch_size : the size of the batches used during training;
+        the model construction is independent of batch_size, so it can be
+        changed after initialization if this is convenient, e.g., for decoding.
+    learning_rate : learning rate to start with.
+    learning_rate_decay_factor : decay learning rate by this much when needed.
+    use_lstm : if true, we use LSTM cells instead of GRU cells.
+    num_samples : number of samples for sampled softmax.
+    forward_only : if set, we do not construct the backward pass in the model.
+    name : a string or None
+        An optional name to attach to this layer.
+  """
+  def __init__(self,
+               source_vocab_size,
+               target_vocab_size,
+               buckets,
+               size,
+               num_layers,
+               max_gradient_norm,
+               batch_size,
+               learning_rate,
+               learning_rate_decay_factor,
+               use_lstm=False,
+               num_samples=512,
+               forward_only=False,
+               name='wrapper'):
+    Layer.__init__(self)#, name=name)
+
+    self.source_vocab_size = source_vocab_size
+    self.target_vocab_size = target_vocab_size
+    self.buckets = buckets
+    self.batch_size = batch_size
+    self.learning_rate = tf.Variable(float(learning_rate), trainable=False, name='learning_rate')
+    self.learning_rate_decay_op = self.learning_rate.assign(
+        self.learning_rate * learning_rate_decay_factor)
+    self.global_step = tf.Variable(0, trainable=False, name='global_step')
+
+    # =========== Fake output Layer for compute cost ======
+    # If we use sampled softmax, we need an output projection.
+    with tf.variable_scope(name) as vs:
+        output_projection = None
+        softmax_loss_function = None
+        # Sampled softmax only makes sense if we sample less than vocabulary size.
+        if num_samples > 0 and num_samples < self.target_vocab_size:
+          w = tf.get_variable("proj_w", [size, self.target_vocab_size])
+          w_t = tf.transpose(w)
+          b = tf.get_variable("proj_b", [self.target_vocab_size])
+          output_projection = (w, b)
+
+          def sampled_loss(inputs, labels):
+            labels = tf.reshape(labels, [-1, 1])
+            return tf.nn.sampled_softmax_loss(w_t, b, inputs, labels, num_samples,
+                    self.target_vocab_size)
+          softmax_loss_function = sampled_loss
+
+        # ============ Seq Encode Layer =============
+        # Create the internal multi-layer cell for our RNN.
+        single_cell = tf.nn.rnn_cell.GRUCell(size)
+        if use_lstm:
+          single_cell = tf.nn.rnn_cell.BasicLSTMCell(size)
+        cell = single_cell
+        if num_layers > 1:
+          cell = tf.nn.rnn_cell.MultiRNNCell([single_cell] * num_layers)
+
+        # ============== Seq Decode Layer ============
+        # The seq2seq function: we use embedding for the input and attention.
+        def seq2seq_f(encoder_inputs, decoder_inputs, do_decode):
+          return tf.nn.seq2seq.embedding_attention_seq2seq(
+              encoder_inputs, decoder_inputs, cell,
+              num_encoder_symbols=source_vocab_size,
+              num_decoder_symbols=target_vocab_size,
+              embedding_size=size,
+              output_projection=output_projection,
+              feed_previous=do_decode)
+
+        #=============================================================
+        # Feeds for inputs.
+        self.encoder_inputs = []
+        self.decoder_inputs = []
+        self.target_weights = []
+        for i in xrange(buckets[-1][0]):  # Last bucket is the biggest one.
+          self.encoder_inputs.append(tf.placeholder(tf.int32, shape=[None],
+                                                    name="encoder{0}".format(i)))
+        for i in xrange(buckets[-1][1] + 1):
+          self.decoder_inputs.append(tf.placeholder(tf.int32, shape=[None],
+                                                    name="decoder{0}".format(i)))
+          self.target_weights.append(tf.placeholder(tf.float32, shape=[None],
+                                                    name="weight{0}".format(i)))
+
+        # Our targets are decoder inputs shifted by one.
+        targets = [self.decoder_inputs[i + 1]
+                   for i in xrange(len(self.decoder_inputs) - 1)]
+        self.targets = targets  # DH add for debug
+
+
+        # Training outputs and losses.
+        if forward_only:
+          self.outputs, self.losses = tf.nn.seq2seq.model_with_buckets(
+              self.encoder_inputs, self.decoder_inputs, targets,
+              self.target_weights, buckets, lambda x, y: seq2seq_f(x, y, True),
+              softmax_loss_function=softmax_loss_function)
+          # If we use output projection, we need to project outputs for decoding.
+          if output_projection is not None:
+            for b in xrange(len(buckets)):
+              self.outputs[b] = [
+                  tf.matmul(output, output_projection[0]) + output_projection[1]
+                  for output in self.outputs[b]
+              ]
+        else:
+          self.outputs, self.losses = tf.nn.seq2seq.model_with_buckets(
+              self.encoder_inputs, self.decoder_inputs, targets,
+              self.target_weights, buckets,
+              lambda x, y: seq2seq_f(x, y, False),
+              softmax_loss_function=softmax_loss_function)
+
+        # Gradients and SGD update operation for training the model.
+        params = tf.trainable_variables()
+        if not forward_only:
+          self.gradient_norms = []
+          self.updates = []
+          opt = tf.train.GradientDescentOptimizer(self.learning_rate)
+          for b in xrange(len(buckets)):
+            gradients = tf.gradients(self.losses[b], params)
+            clipped_gradients, norm = tf.clip_by_global_norm(gradients,
+                                                             max_gradient_norm)
+            self.gradient_norms.append(norm)
+            self.updates.append(opt.apply_gradients(
+                zip(clipped_gradients, params), global_step=self.global_step))
+
+        # if save into npz
+        self.all_params = tf.get_collection(tf.GraphKeys.VARIABLES, scope=vs.name)
+
+    # if save into ckpt
+    self.saver = tf.train.Saver(tf.all_variables())
+
+  def step(self, session, encoder_inputs, decoder_inputs, target_weights,
+           bucket_id, forward_only):
+    """Run a step of the model feeding the given inputs.
+
+    Parameters
+    ----------
+    session : tensorflow session to use.
+    encoder_inputs : list of numpy int vectors to feed as encoder inputs.
+    decoder_inputs : list of numpy int vectors to feed as decoder inputs.
+    target_weights : list of numpy float vectors to feed as target weights.
+    bucket_id : which bucket of the model to use.
+    forward_only : whether to do the backward step or only forward.
+
+    Returns
+    --------
+    A triple consisting of gradient norm (or None if we did not do backward),
+    average perplexity, and the outputs.
+
+    Raises
+    --------
+    ValueError : if length of encoder_inputs, decoder_inputs, or
+        target_weights disagrees with bucket size for the specified bucket_id.
+    """
+    # Check if the sizes match.
+    encoder_size, decoder_size = self.buckets[bucket_id]
+    if len(encoder_inputs) != encoder_size:
+      raise ValueError("Encoder length must be equal to the one in bucket,"
+                       " %d != %d." % (len(encoder_inputs), encoder_size))
+    if len(decoder_inputs) != decoder_size:
+      raise ValueError("Decoder length must be equal to the one in bucket,"
+                       " %d != %d." % (len(decoder_inputs), decoder_size))
+    if len(target_weights) != decoder_size:
+      raise ValueError("Weights length must be equal to the one in bucket,"
+                       " %d != %d." % (len(target_weights), decoder_size))
+    # print('in model.step()')
+    # print('a',bucket_id, encoder_size, decoder_size)
+
+    # Input feed: encoder inputs, decoder inputs, target_weights, as provided.
+    input_feed = {}
+    for l in xrange(encoder_size):
+      input_feed[self.encoder_inputs[l].name] = encoder_inputs[l]
+    for l in xrange(decoder_size):
+      input_feed[self.decoder_inputs[l].name] = decoder_inputs[l]
+      input_feed[self.target_weights[l].name] = target_weights[l]
+    # print(self.encoder_inputs[l].name)
+    # print(self.decoder_inputs[l].name)
+    # print(self.target_weights[l].name)
+
+    # Since our targets are decoder inputs shifted by one, we need one more.
+    last_target = self.decoder_inputs[decoder_size].name
+    input_feed[last_target] = np.zeros([self.batch_size], dtype=np.int32)
+    # print('last_target', last_target)
+
+    # Output feed: depends on whether we do a backward step or not.
+    if not forward_only:
+      output_feed = [self.updates[bucket_id],  # Update Op that does SGD.
+                     self.gradient_norms[bucket_id],  # Gradient norm.
+                     self.losses[bucket_id]]  # Loss for this batch.
+    else:
+      output_feed = [self.losses[bucket_id]]  # Loss for this batch.
+      for l in xrange(decoder_size):  # Output logits.
+        output_feed.append(self.outputs[bucket_id][l])
+
+    outputs = session.run(output_feed, input_feed)
+    if not forward_only:
+      return outputs[1], outputs[2], None  # Gradient norm, loss, no outputs.
+    else:
+      return None, outputs[0], outputs[1:]  # No gradient norm, loss, outputs.
+
+  def get_batch(self, data, bucket_id, PAD_ID=0, GO_ID=1, EOS_ID=2, UNK_ID=3):
+    """Get a random batch of data from the specified bucket, prepare for step.
+
+    To feed data in step(..) it must be a list of batch-major vectors, while
+    data here contains single length-major cases. So the main logic of this
+    function is to re-index data cases to be in the proper format for feeding.
+
+    Parameters
+    ----------
+    data : a tuple of size len(self.buckets) in which each element contains
+        lists of pairs of input and output data that we use to create a batch.
+    bucket_id : integer, which bucket to get the batch for.
+    PAD_ID : int
+        Index of Padding in vocabulary
+    GO_ID : int
+        Index of GO in vocabulary
+    EOS_ID : int
+        Index of End of sentence in vocabulary
+    UNK_ID : int
+        Index of Unknown word in vocabulary
+
+    Returns
+    -------
+    The triple (encoder_inputs, decoder_inputs, target_weights) for
+    the constructed batch that has the proper format to call step(...) later.
+    """
+    encoder_size, decoder_size = self.buckets[bucket_id]
+    encoder_inputs, decoder_inputs = [], []
+
+    # Get a random batch of encoder and decoder inputs from data,
+    # pad them if needed, reverse encoder inputs and add GO to decoder.
+    for _ in xrange(self.batch_size):
+      encoder_input, decoder_input = random.choice(data[bucket_id])
+
+      # Encoder inputs are padded and then reversed.
+      encoder_pad = [PAD_ID] * (encoder_size - len(encoder_input))
+      encoder_inputs.append(list(reversed(encoder_input + encoder_pad)))
+
+      # Decoder inputs get an extra "GO" symbol, and are padded then.
+      decoder_pad_size = decoder_size - len(decoder_input) - 1
+      decoder_inputs.append([GO_ID] + decoder_input +
+                            [PAD_ID] * decoder_pad_size)
+
+    # Now we create batch-major vectors from the data selected above.
+    batch_encoder_inputs, batch_decoder_inputs, batch_weights = [], [], []
+
+    # Batch encoder inputs are just re-indexed encoder_inputs.
+    for length_idx in xrange(encoder_size):
+      batch_encoder_inputs.append(
+          np.array([encoder_inputs[batch_idx][length_idx]
+                    for batch_idx in xrange(self.batch_size)], dtype=np.int32))
+
+    # Batch decoder inputs are re-indexed decoder_inputs, we create weights.
+    for length_idx in xrange(decoder_size):
+      batch_decoder_inputs.append(
+          np.array([decoder_inputs[batch_idx][length_idx]
+                    for batch_idx in xrange(self.batch_size)], dtype=np.int32))
+
+      # Create target_weights to be 0 for targets that are padding.
+      batch_weight = np.ones(self.batch_size, dtype=np.float32)
+      for batch_idx in xrange(self.batch_size):
+        # We set weight to 0 if the corresponding target is a PAD symbol.
+        # The corresponding target is decoder_input shifted by 1 forward.
+        if length_idx < decoder_size - 1:
+          target = decoder_inputs[batch_idx][length_idx + 1]
+        if length_idx == decoder_size - 1 or target == PAD_ID:
+          batch_weight[batch_idx] = 0.0
+      batch_weights.append(batch_weight)
+    return batch_encoder_inputs, batch_decoder_inputs, batch_weights
+
+
+
+## Developing or Untested
+class MaxoutLayer(Layer):
+    """
+    Waiting for contribution
+
+    Single DenseLayer with Max-out behaviour, work well with Dropout.
+
+    References
+    -----------
+    `Goodfellow (2013) Maxout Networks <http://arxiv.org/abs/1302.4389>`_
+    """
+    def __init__(
+        self,
+        layer = None,
+        n_units = 100,
+        name ='maxout_layer',
+    ):
+        Layer.__init__(self, name=name)
+        self.inputs = layer.outputs
+
+        print("  tensorlayer:Instantiate MaxoutLayer %s: %d" % (self.name, self.n_units))
+        print("    Waiting for contribution")
+        with tf.variable_scope(name) as vs:
+            pass
+            # W = tf.Variable(init.xavier_init(n_inputs=n_in, n_outputs=n_units, uniform=True), name='W')
+            # b = tf.Variable(tf.zeros([n_units]), name='b')
+
+        # self.outputs = act(tf.matmul(self.inputs, W) + b)
+        # https://www.tensorflow.org/versions/r0.9/api_docs/python/array_ops.html#pack
+        # http://stackoverflow.com/questions/34362193/how-to-explicitly-broadcast-a-tensor-to-match-anothers-shape-in-tensorflow
+        # tf.concat tf.pack  tf.tile
+
+        self.all_layers = list(layer.all_layers)
+        self.all_params = list(layer.all_params)
+        self.all_drop = dict(layer.all_drop)
+        self.all_layers.extend( [self.outputs] )
+        self.all_params.extend( [W, b] )
+
+# noise
+class GaussianNoiseLayer(Layer):
+    """
+    Waiting for contribution
+    """
+    def __init__(
+        self,
+        layer = None,
+        # keep = 0.5,
+        name = 'gaussian_noise_layer',
+    ):
+        Layer.__init__(self, name=name)
+        self.inputs = layer.outputs
+        print("  tensorlayer:Instantiate GaussianNoiseLayer %s: keep: %f" % (self.name, keep))
+        print("    Waiting for contribution")
+        with tf.variable_scope(name) as vs:
+            pass
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#
diff --git a/tensorlayer1.2.2/nlp.py b/tensorlayer1.2.2/nlp.py
new file mode 100755
index 0000000..d39121f
--- /dev/null
+++ b/tensorlayer1.2.2/nlp.py
@@ -0,0 +1,908 @@
+#! /usr/bin/python
+# -*- coding: utf8 -*-
+
+
+
+
+import tensorflow as tf
+import os
+from sys import platform as _platform
+import collections
+import random
+import numpy as np
+import warnings
+from six.moves import xrange
+from tensorflow.python.platform import gfile
+import re
+
+## Iteration functions
+def generate_skip_gram_batch(data, batch_size, num_skips, skip_window, data_index=0):
+    """Generate a training batch for the Skip-Gram model.
+
+    Parameters
+    ----------
+    data : a list
+        To present context.
+    batch_size : an int
+        Batch size to return.
+    num_skips : an int
+        How many times to reuse an input to generate a label.
+    skip_window : an int
+        How many words to consider left and right.
+    data_index : an int
+        Index of the context location.
+        without using yield, this code use data_index to instead.
+
+    Returns
+    --------
+    batch : a list
+        Inputs
+    labels : a list
+        Labels
+    data_index : an int
+        Index of the context location.
+
+    Examples
+    --------
+    >>> Setting num_skips=2, skip_window=1, use the right and left words.
+    >>> In the same way, num_skips=4, skip_window=2 means use the nearby 4 words.
+
+    >>> data = [1,2,3,4,5,6,7,8,9,10,11]
+    >>> batch, labels, data_index = tl.nlp.generate_skip_gram_batch(data=data, batch_size=8, num_skips=2, skip_window=1, data_index=0)
+    >>> print(batch)
+    ... [2 2 3 3 4 4 5 5]
+    >>> print(labels)
+    ... [[3]
+    ... [1]
+    ... [4]
+    ... [2]
+    ... [5]
+    ... [3]
+    ... [4]
+    ... [6]]
+
+    References
+    -----------
+    - `TensorFlow word2vec tutorial <https://www.tensorflow.org/versions/r0.9/tutorials/word2vec/index.html#vector-representations-of-words>`_
+    """
+    # global data_index   # you can put data_index outside the function, then
+    #       modify the global data_index in the function without return it.
+    # note: without using yield, this code use data_index to instead.
+    assert batch_size % num_skips == 0
+    assert num_skips <= 2 * skip_window
+    batch = np.ndarray(shape=(batch_size), dtype=np.int32)
+    labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32)
+    span = 2 * skip_window + 1 # [ skip_window target skip_window ]
+    buffer = collections.deque(maxlen=span)
+    for _ in range(span):
+        buffer.append(data[data_index])
+        data_index = (data_index + 1) % len(data)
+    for i in range(batch_size // num_skips):
+        target = skip_window  # target label at the center of the buffer
+        targets_to_avoid = [ skip_window ]
+        for j in range(num_skips):
+            while target in targets_to_avoid:
+                target = random.randint(0, span - 1)
+            targets_to_avoid.append(target)
+            batch[i * num_skips + j] = buffer[skip_window]
+            labels[i * num_skips + j, 0] = buffer[target]
+        buffer.append(data[data_index])
+        data_index = (data_index + 1) % len(data)
+    return batch, labels, data_index
+
+
+## Sampling functions
+def sample(a=[], temperature=1.0):
+    """Sample an index from a probability array.
+
+    Parameters
+    ----------
+    a : a list
+        List of probabilities.
+    temperature : float or None
+        The higher the more uniform.\n
+        When a = [0.1, 0.2, 0.7],\n
+            temperature = 0.7, the distribution will be sharpen [ 0.05048273  0.13588945  0.81362782]\n
+            temperature = 1.0, the distribution will be the same [0.1    0.2    0.7]\n
+            temperature = 1.5, the distribution will be filtered [ 0.16008435  0.25411807  0.58579758]\n
+        If None, it will be ``np.argmax(a)``
+
+    Notes
+    ------
+    No matter what is the temperature and input list, the sum of all probabilities will be one.
+    Even if input list = [1, 100, 200], the sum of all probabilities will still be one.
+
+    For large vocabulary_size, choice a higher temperature to avoid error.
+    """
+    b = np.copy(a)
+    try:
+        if temperature == 1:
+            return np.argmax(np.random.multinomial(1, a, 1))
+        if temperature is None:
+            return np.argmax(a)
+        else:
+            a = np.log(a) / temperature
+            a = np.exp(a) / np.sum(np.exp(a))
+            return np.argmax(np.random.multinomial(1, a, 1))
+    except:
+        # np.set_printoptions(threshold=np.nan)
+        # print(a)
+        # print(np.sum(a))
+        # print(np.max(a))
+        # print(np.min(a))
+        # exit()
+        message = "For large vocabulary_size, choice a higher temperature\
+         to avoid log error. Hint : use ``sample_top``. "
+        warnings.warn(message, Warning)
+        # print(a)
+        # print(b)
+        return np.argmax(np.random.multinomial(1, b, 1))
+
+def sample_top(a=[], top_k=10):
+    """Sample from ``top_k`` probabilities.
+
+    Parameters
+    ----------
+    a : a list
+        List of probabilities.
+    top_k : int
+        Number of candidates to be considered.
+    """
+    a = np.array(a)
+    idx = np.argsort(a)[::-1]
+    idx = idx[:top_k]
+    # a = a[idx]
+    probs = a[idx]
+    probs = probs / np.sum(probs)
+    choice = np.random.choice(idx, p=probs)
+    return choice
+
+
+## Vector representations of words (Advanced)  UNDOCUMENT
+class SimpleVocabulary(object):
+  """Simple vocabulary wrapper, see create_vocab().
+
+  Parameters
+  ------------
+  vocab : A dictionary of word to word_id.
+  unk_id : Id of the special 'unknown' word.
+  """
+
+  def __init__(self, vocab, unk_id):
+    """Initializes the vocabulary."""
+
+
+    self._vocab = vocab
+    self._unk_id = unk_id
+
+  def word_to_id(self, word):
+    """Returns the integer id of a word string."""
+    if word in self._vocab:
+      return self._vocab[word]
+    else:
+      return self._unk_id
+
+class Vocabulary(object):
+  """Create Vocabulary class from a given vocabulary and its id-word, word-id convert,
+  see create_vocab() and ``tutorial_tfrecord3.py``.
+
+  Parameters
+  -----------
+  vocab_file : File containing the vocabulary, where the words are the first
+        whitespace-separated token on each line (other tokens are ignored) and
+        the word ids are the corresponding line numbers.
+  start_word : Special word denoting sentence start.
+  end_word : Special word denoting sentence end.
+  unk_word : Special word denoting unknown words.
+
+  Properties
+  ------------
+  vocab : a dictionary from word to id.
+  reverse_vocab : a list from id to word.
+  start_id : int of start id
+  end_id : int of end id
+  unk_id : int of unk id
+
+  Vocab_files
+  -------------
+  >>> Look as follow, includes `start_word` , `end_word` but no `unk_word` .
+  >>> a 969108
+  >>> <S> 586368
+  >>> </S> 586368
+  >>> . 440479
+  >>> on 213612
+  >>> of 202290
+  >>> the 196219
+  >>> in 182598
+  >>> with 152984
+  >>> and 139109
+  >>> is 97322
+  """
+
+  def __init__(self,
+               vocab_file,
+               start_word="<S>",
+               end_word="</S>",
+               unk_word="<UNK>"):
+    if not tf.gfile.Exists(vocab_file):
+      tf.logging.fatal("Vocab file %s not found.", vocab_file)
+    tf.logging.info("Initializing vocabulary from file: %s", vocab_file)
+
+    with tf.gfile.GFile(vocab_file, mode="r") as f:
+      reverse_vocab = list(f.readlines())
+    reverse_vocab = [line.split()[0] for line in reverse_vocab]
+    assert start_word in reverse_vocab
+    assert end_word in reverse_vocab
+    if unk_word not in reverse_vocab:
+      reverse_vocab.append(unk_word)
+    vocab = dict([(x, y) for (y, x) in enumerate(reverse_vocab)])
+
+    print("  tensorlayer.nlp:Instantiate Vocabulary from %s : %s %s %s" % (vocab_file, start_word, end_word, unk_word))
+    print("     vocabulary with %d words (includes start_word, end_word, unk_word)" % len(vocab))
+    # tf.logging.info("     vocabulary with %d words" % len(vocab))
+
+    self.vocab = vocab  # vocab[word] = id
+    self.reverse_vocab = reverse_vocab  # reverse_vocab[id] = word
+
+    # Save special word ids.
+    self.start_id = vocab[start_word]
+    self.end_id = vocab[end_word]
+    self.unk_id = vocab[unk_word]
+    print("       start_id: %d" % self.start_id)
+    print("       end_id: %d" % self.end_id)
+    print("       unk_id: %d" % self.unk_id)
+
+  def word_to_id(self, word):
+    """Returns the integer word id of a word string."""
+    if word in self.vocab:
+      return self.vocab[word]
+    else:
+      return self.unk_id
+
+  def id_to_word(self, word_id):
+    """Returns the word string of an integer word id."""
+    if word_id >= len(self.reverse_vocab):
+      return self.reverse_vocab[self.unk_id]
+    else:
+      return self.reverse_vocab[word_id]
+
+def process_sentence(sentence, start_word="<S>", end_word="</S>"):
+    """Converts a sentence string into a list of string words, add start_word and end_word,
+    see ``create_vocab()`` and ``tutorial_tfrecord3.py``.
+
+    Parameter
+    ---------
+    sentence : a string sentence.
+
+    Returns
+    ---------
+    A list of strings; the processed caption.
+
+    Examples
+    -----------
+    >>> c = "how are you?"
+    >>> c = tl.nlp.process_sentence(c)
+    >>> print(c)
+    ... ['<S>', 'how', 'are', 'you', '?', '</S>']
+    """
+    try:
+        import nltk
+    except:
+        raise Exception("Hint : NLTK is required.")
+    process_sentence = [start_word]
+    process_sentence.extend(nltk.tokenize.word_tokenize(sentence.lower()))
+    process_sentence.append(end_word)
+    return process_sentence
+
+def create_vocab(sentences, word_counts_output_file, min_word_count=1):
+    """Creates the vocabulary of word to word_id, see create_vocab() and ``tutorial_tfrecord3.py``.
+
+    The vocabulary is saved to disk in a text file of word counts. The id of each
+    word in the file is its corresponding 0-based line number.
+
+    Parameters
+    ------------
+    sentences : a list of lists of strings.
+    word_counts_output_file : A string
+        The file name.
+    min_word_count : a int
+        Minimum number of occurrences for a word.
+
+    Returns
+    --------
+    tl.nlp.SimpleVocabulary object.
+
+    Mores
+    -----
+    tl.nlp.build_vocab()
+
+    Examples
+    --------
+    >>> captions = ["one two , three", "four five five"]
+    >>> processed_capts = []
+    >>> for c in captions:
+    >>>     c = tl.nlp.process_sentence(c, start_word="<S>", end_word="</S>")
+    >>>     processed_capts.append(c)
+    >>> print(processed_capts)
+    ...[['<S>', 'one', 'two', ',', 'three', '</S>'], ['<S>', 'four', 'five', 'five', '</S>']]
+
+    >>> tl.nlp.create_vocab(processed_capts, word_counts_output_file='vocab.txt', min_word_count=1)
+    ...   tensorlayer.nlp:Creating vocabulary.
+    ...   Total words: 8
+    ...   Words in vocabulary: 8
+    ...   Wrote vocabulary file: vocab.txt
+    >>> vocab = tl.nlp.Vocabulary('vocab.txt', start_word="<S>", end_word="</S>", unk_word="<UNK>")
+    ...   tensorlayer.nlp:Instantiate Vocabulary from vocab.txt : <S> </S> <UNK>
+    ...   vocabulary with 9 words (includes unk_word)
+    """
+    from collections import Counter
+    print("  tensorlayer.nlp:Creating vocabulary.")
+    counter = Counter()
+    for c in sentences:
+        counter.update(c)
+        # print('c',c)
+    print("  Total words:", len(counter))
+
+    # Filter uncommon words and sort by descending count.
+    word_counts = [x for x in counter.items() if x[1] >= min_word_count]
+    word_counts.sort(key=lambda x: x[1], reverse=True)
+    # print(word_counts)
+    print("  Words in vocabulary:", len(word_counts))
+
+    # Write out the word counts file.
+    with tf.gfile.FastGFile(word_counts_output_file, "w") as f:
+        f.write("\n".join(["%s %d" % (w, c) for w, c in word_counts]))
+    print("  Wrote vocabulary file:", word_counts_output_file)
+
+    # Create the vocabulary dictionary.
+    reverse_vocab = [x[0] for x in word_counts]
+    unk_id = len(reverse_vocab)
+    vocab_dict = dict([(x, y) for (y, x) in enumerate(reverse_vocab)])
+    vocab = SimpleVocabulary(vocab_dict, unk_id)
+
+    return vocab
+
+
+## Vector representations of words
+def simple_read_words(filename="nietzsche.txt"):
+    """Read context from file without any preprocessing.
+
+    Parameters
+    ----------
+    filename : a string
+        A file path (like .txt file)
+
+    Returns
+    --------
+    The context in a string
+    """
+    with open("nietzsche.txt", "r") as f:
+        words = f.read()
+        return words
+
+def read_words(filename="nietzsche.txt", replace = ['\n', '<eos>']):
+    """File to list format context.
+    Note that, this script can not handle punctuations.
+    For customized read_words method, see ``tutorial_generate_text.py``.
+
+    Parameters
+    ----------
+    filename : a string
+        A file path (like .txt file),
+    replace : a list
+        [original string, target string], to disable replace use ['', '']
+
+    Returns
+    --------
+    The context in a list, split by ' ' by default, and use '<eos>' to represent '\n'.
+    e.g. [... 'how', 'useful', 'it', "'s" ... ]
+
+    Code References
+    ---------------
+    - `tensorflow.models.rnn.ptb.reader <https://github.com/tensorflow/tensorflow/tree/master/tensorflow/models/rnn/ptb>`_
+    """
+    with tf.gfile.GFile(filename, "r") as f:
+        return f.read().replace(*replace).split()
+
+def read_analogies_file(eval_file='questions-words.txt', word2id={}):
+    """Reads through an analogy question file, return its id format.
+
+    Parameters
+    ----------
+    eval_data : a string
+        The file name.
+    word2id : a dictionary
+        Mapping words to unique IDs.
+
+    Returns
+    --------
+    analogy_questions : a [n, 4] numpy array containing the analogy question's
+             word ids.
+             questions_skipped: questions skipped due to unknown words.
+
+    Examples
+    ---------
+    >>> eval_file should be in this format :
+    >>> : capital-common-countries
+    >>> Athens Greece Baghdad Iraq
+    >>> Athens Greece Bangkok Thailand
+    >>> Athens Greece Beijing China
+    >>> Athens Greece Berlin Germany
+    >>> Athens Greece Bern Switzerland
+    >>> Athens Greece Cairo Egypt
+    >>> Athens Greece Canberra Australia
+    >>> Athens Greece Hanoi Vietnam
+    >>> Athens Greece Havana Cuba
+    ...
+
+    >>> words = tl.files.load_matt_mahoney_text8_dataset()
+    >>> data, count, dictionary, reverse_dictionary = \
+                tl.nlp.build_words_dataset(words, vocabulary_size, True)
+    >>> analogy_questions = tl.nlp.read_analogies_file( \
+                eval_file='questions-words.txt', word2id=dictionary)
+    >>> print(analogy_questions)
+    ... [[ 3068  1248  7161  1581]
+    ... [ 3068  1248 28683  5642]
+    ... [ 3068  1248  3878   486]
+    ... ...,
+    ... [ 1216  4309 19982 25506]
+    ... [ 1216  4309  3194  8650]
+    ... [ 1216  4309   140   312]]
+    """
+    questions = []
+    questions_skipped = 0
+    with open(eval_file, "rb") as analogy_f:
+      for line in analogy_f:
+          if line.startswith(b":"):  # Skip comments.
+                continue
+          words = line.strip().lower().split(b" ")  # lowercase
+          ids = [word2id.get(w.strip()) for w in words]
+          if None in ids or len(ids) != 4:
+              questions_skipped += 1
+          else:
+              questions.append(np.array(ids))
+    print("Eval analogy file: ", eval_file)
+    print("Questions: ", len(questions))
+    print("Skipped: ", questions_skipped)
+    analogy_questions = np.array(questions, dtype=np.int32)
+    return analogy_questions
+
+def build_vocab(data):
+    """Build vocabulary.
+    Given the context in list format.
+    Return the vocabulary, which is a dictionary for word to id.
+    e.g. {'campbell': 2587, 'atlantic': 2247, 'aoun': 6746 .... }
+
+    Parameters
+    ----------
+    data : a list of string
+        the context in list format
+
+    Returns
+    --------
+    word_to_id : a dictionary
+        mapping words to unique IDs. e.g. {'campbell': 2587, 'atlantic': 2247, 'aoun': 6746 .... }
+
+    Code References
+    ---------------
+    - `tensorflow.models.rnn.ptb.reader <https://github.com/tensorflow/tensorflow/tree/master/tensorflow/models/rnn/ptb>`_
+
+    Examples
+    --------
+    >>> data_path = os.getcwd() + '/simple-examples/data'
+    >>> train_path = os.path.join(data_path, "ptb.train.txt")
+    >>> word_to_id = build_vocab(read_txt_words(train_path))
+    """
+    # data = _read_words(filename)
+    counter = collections.Counter(data)
+    # print('counter', counter)   # dictionary for the occurrence number of each word, e.g. 'banknote': 1, 'photography': 1, 'kia': 1
+    count_pairs = sorted(counter.items(), key=lambda x: (-x[1], x[0]))
+    # print('count_pairs',count_pairs)  # convert dictionary to list of tuple, e.g. ('ssangyong', 1), ('swapo', 1), ('wachter', 1)
+    words, _ = list(zip(*count_pairs))
+    word_to_id = dict(zip(words, range(len(words))))
+    # print(words)    # list of words
+    # print(word_to_id) # dictionary for word to id, e.g. 'campbell': 2587, 'atlantic': 2247, 'aoun': 6746
+    return word_to_id
+
+def build_reverse_dictionary(word_to_id):
+    """Given a dictionary for converting word to integer id.
+    Returns a reverse dictionary for converting a id to word.
+
+    Parameters
+    ----------
+    word_to_id : dictionary
+        mapping words to unique ids
+
+    Returns
+    --------
+    reverse_dictionary : a dictionary
+        mapping ids to words
+    """
+    reverse_dictionary = dict(zip(word_to_id.values(), word_to_id.keys()))
+    return reverse_dictionary
+
+def build_words_dataset(words=[], vocabulary_size=50000, printable=True, unk_key = 'UNK'):
+    """Build the words dictionary and replace rare words with 'UNK' token.
+    The most common word has the smallest integer id.
+
+    Parameters
+    ----------
+    words : a list of string or byte
+        The context in list format. You may need to do preprocessing on the words,
+        such as lower case, remove marks etc.
+    vocabulary_size : an int
+        The maximum vocabulary size, limiting the vocabulary size.
+        Then the script replaces rare words with 'UNK' token.
+    printable : boolen
+        Whether to print the read vocabulary size of the given words.
+    unk_key : a string
+        Unknown words = unk_key
+
+    Returns
+    --------
+    data : a list of integer
+        The context in a list of ids
+    count : a list of tuple and list
+        count[0] is a list : the number of rare words\n
+        count[1:] are tuples : the number of occurrence of each word\n
+        e.g. [['UNK', 418391], (b'the', 1061396), (b'of', 593677), (b'and', 416629), (b'one', 411764)]
+    dictionary : a dictionary
+        word_to_id, mapping words to unique IDs.
+    reverse_dictionary : a dictionary
+        id_to_word, mapping id to unique word.
+
+
+    Examples
+    --------
+    >>> words = tl.files.load_matt_mahoney_text8_dataset()
+    >>> vocabulary_size = 50000
+    >>> data, count, dictionary, reverse_dictionary = tl.nlp.build_words_dataset(words, vocabulary_size)
+
+    Code References
+    -----------------
+    - `tensorflow/examples/tutorials/word2vec/word2vec_basic.py <https://github.com/tensorflow/tensorflow/blob/r0.7/tensorflow/examples/tutorials/word2vec/word2vec_basic.py>`_
+    """
+    import collections
+    count = [[unk_key, -1]]
+    count.extend(collections.Counter(words).most_common(vocabulary_size - 1))
+    dictionary = dict()
+    for word, _ in count:
+        dictionary[word] = len(dictionary)
+    data = list()
+    unk_count = 0
+    for word in words:
+        if word in dictionary:
+            index = dictionary[word]
+        else:
+            index = 0  # dictionary['UNK']
+            unk_count += 1
+        data.append(index)
+    count[0][1] = unk_count
+    reverse_dictionary = dict(zip(dictionary.values(), dictionary.keys()))
+    if printable:
+        print('Real vocabulary size    %d' % len(collections.Counter(words).keys()))
+        print('Limited vocabulary size {}'.format(vocabulary_size))
+    assert len(collections.Counter(words).keys()) >= vocabulary_size , \
+            "the limited vocabulary_size must be less than or equal to the read vocabulary_size"
+    return data, count, dictionary, reverse_dictionary
+
+def words_to_word_ids(data=[], word_to_id={}, unk_key = 'UNK'):
+    """Given a context (words) in list format and the vocabulary,
+    Returns a list of IDs to represent the context.
+
+    Parameters
+    ----------
+    data : a list of string or byte
+        the context in list format
+    word_to_id : a dictionary
+        mapping words to unique IDs.
+    unk_key : a string
+        Unknown words = unk_key
+
+    Returns
+    --------
+    A list of IDs to represent the context.
+
+    Examples
+    --------
+    >>> words = tl.files.load_matt_mahoney_text8_dataset()
+    >>> vocabulary_size = 50000
+    >>> data, count, dictionary, reverse_dictionary = \
+    ...         tl.nlp.build_words_dataset(words, vocabulary_size, True)
+    >>> context = [b'hello', b'how', b'are', b'you']
+    >>> ids = tl.nlp.words_to_word_ids(words, dictionary)
+    >>> context = tl.nlp.word_ids_to_words(ids, reverse_dictionary)
+    >>> print(ids)
+    ... [6434, 311, 26, 207]
+    >>> print(context)
+    ... [b'hello', b'how', b'are', b'you']
+
+    Code References
+    ---------------
+    - `tensorflow.models.rnn.ptb.reader <https://github.com/tensorflow/tensorflow/tree/master/tensorflow/models/rnn/ptb>`_
+    """
+    # if isinstance(data[0], six.string_types):
+    #     print(type(data[0]))
+    #     # exit()
+    #     print(data[0])
+    #     print(word_to_id)
+    #     return [word_to_id[str(word)] for word in data]
+    # else:
+
+    word_ids = []
+    for word in data:
+        if word_to_id.get(word) is not None:
+            word_ids.append(word_to_id[word])
+        else:
+            word_ids.append(word_to_id[unk_key])
+    return word_ids
+    # return [word_to_id[word] for word in data]    # this one
+
+    # if isinstance(data[0], str):
+    #     # print('is a string object')
+    #     return [word_to_id[word] for word in data]
+    # else:#if isinstance(s, bytes):
+    #     # print('is a unicode object')
+    #     # print(data[0])
+    #     return [word_to_id[str(word)] f
+
+def word_ids_to_words(data, id_to_word):
+    """Given a context (ids) in list format and the vocabulary,
+    Returns a list of words to represent the context.
+
+    Parameters
+    ----------
+    data : a list of integer
+        the context in list format
+    id_to_word : a dictionary
+        mapping id to unique word.
+
+    Returns
+    --------
+    A list of string or byte to represent the context.
+
+    Examples
+    ---------
+    >>> see words_to_word_ids
+    """
+    return [id_to_word[i] for i in data]
+
+def save_vocab(count=[], name='vocab.txt'):
+    """Save the vocabulary to a file so the model can be reloaded.
+
+    Parameters
+    ----------
+    count : a list of tuple and list
+        count[0] is a list : the number of rare words\n
+        count[1:] are tuples : the number of occurrence of each word\n
+        e.g. [['UNK', 418391], (b'the', 1061396), (b'of', 593677), (b'and', 416629), (b'one', 411764)]
+
+    Examples
+    ---------
+    >>> words = tl.files.load_matt_mahoney_text8_dataset()
+    >>> vocabulary_size = 50000
+    >>> data, count, dictionary, reverse_dictionary = \
+    ...     tl.nlp.build_words_dataset(words, vocabulary_size, True)
+    >>> tl.nlp.save_vocab(count, name='vocab_text8.txt')
+    >>> vocab_text8.txt
+    ... UNK 418391
+    ... the 1061396
+    ... of 593677
+    ... and 416629
+    ... one 411764
+    ... in 372201
+    ... a 325873
+    ... to 316376
+    """
+    pwd = os.getcwd()
+    vocabulary_size = len(count)
+    with open(os.path.join(pwd, name), "w") as f:
+        for i in xrange(vocabulary_size):
+            f.write("%s %d\n" % (tf.compat.as_text(count[i][0]), count[i][1]))
+    print("%d vocab saved to %s in %s" % (vocabulary_size, name, pwd))
+
+## Functions for translation
+def basic_tokenizer(sentence, _WORD_SPLIT=re.compile(b"([.,!?\"':;)(])")):
+  """Very basic tokenizer: split the sentence into a list of tokens.
+
+  Parameters
+  -----------
+  sentence : tensorflow.python.platform.gfile.GFile Object
+  _WORD_SPLIT : regular expression for word spliting.
+
+
+  Examples
+  --------
+  >>> see create_vocabulary
+  >>> from tensorflow.python.platform import gfile
+  >>> train_path = "wmt/giga-fren.release2"
+  >>> with gfile.GFile(train_path + ".en", mode="rb") as f:
+  >>>    for line in f:
+  >>>       tokens = tl.nlp.basic_tokenizer(line)
+  >>>       print(tokens)
+  >>>       exit()
+  ... [b'Changing', b'Lives', b'|', b'Changing', b'Society', b'|', b'How',
+  ...   b'It', b'Works', b'|', b'Technology', b'Drives', b'Change', b'Home',
+  ...   b'|', b'Concepts', b'|', b'Teachers', b'|', b'Search', b'|', b'Overview',
+  ...   b'|', b'Credits', b'|', b'HHCC', b'Web', b'|', b'Reference', b'|',
+  ...   b'Feedback', b'Virtual', b'Museum', b'of', b'Canada', b'Home', b'Page']
+
+  References
+  ----------
+  - Code from ``/tensorflow/models/rnn/translation/data_utils.py``
+  """
+  words = []
+  for space_separated_fragment in sentence.strip().split():
+    words.extend(re.split(_WORD_SPLIT, space_separated_fragment))
+  return [w for w in words if w]
+
+def create_vocabulary(vocabulary_path, data_path, max_vocabulary_size,
+                      tokenizer=None, normalize_digits=True,
+                      _DIGIT_RE=re.compile(br"\d"),
+                      _START_VOCAB=[b"_PAD", b"_GO", b"_EOS", b"_UNK"]):
+  """Create vocabulary file (if it does not exist yet) from data file.
+
+  Data file is assumed to contain one sentence per line. Each sentence is
+  tokenized and digits are normalized (if normalize_digits is set).
+  Vocabulary contains the most-frequent tokens up to max_vocabulary_size.
+  We write it to vocabulary_path in a one-token-per-line format, so that later
+  token in the first line gets id=0, second line gets id=1, and so on.
+
+  Parameters
+  -----------
+  vocabulary_path : path where the vocabulary will be created.
+  data_path : data file that will be used to create vocabulary.
+  max_vocabulary_size : limit on the size of the created vocabulary.
+  tokenizer : a function to use to tokenize each data sentence.
+        if None, basic_tokenizer will be used.
+  normalize_digits : Boolean
+        if true, all digits are replaced by 0s.
+
+  References
+  ----------
+  - Code from ``/tensorflow/models/rnn/translation/data_utils.py``
+  """
+  if not gfile.Exists(vocabulary_path):
+    print("Creating vocabulary %s from data %s" % (vocabulary_path, data_path))
+    vocab = {}
+    with gfile.GFile(data_path, mode="rb") as f:
+      counter = 0
+      for line in f:
+        counter += 1
+        if counter % 100000 == 0:
+          print("  processing line %d" % counter)
+        tokens = tokenizer(line) if tokenizer else basic_tokenizer(line)
+        for w in tokens:
+          word = re.sub(_DIGIT_RE, b"0", w) if normalize_digits else w
+          if word in vocab:
+            vocab[word] += 1
+          else:
+            vocab[word] = 1
+      vocab_list = _START_VOCAB + sorted(vocab, key=vocab.get, reverse=True)
+      if len(vocab_list) > max_vocabulary_size:
+        vocab_list = vocab_list[:max_vocabulary_size]
+      with gfile.GFile(vocabulary_path, mode="wb") as vocab_file:
+        for w in vocab_list:
+          vocab_file.write(w + b"\n")
+  else:
+    print("Vocabulary %s from data %s exists" % (vocabulary_path, data_path))
+
+def initialize_vocabulary(vocabulary_path):
+  """Initialize vocabulary from file, return the word_to_id (dictionary)
+  and id_to_word (list).
+
+  We assume the vocabulary is stored one-item-per-line, so a file:\n
+    dog\n
+    cat\n
+  will result in a vocabulary {"dog": 0, "cat": 1}, and this function will
+  also return the reversed-vocabulary ["dog", "cat"].
+
+  Parameters
+  -----------
+  vocabulary_path : path to the file containing the vocabulary.
+
+  Returns
+  --------
+  vocab : a dictionary
+        Word to id. A dictionary mapping string to integers.
+  rev_vocab : a list
+        Id to word. The reversed vocabulary (a list, which reverses the vocabulary mapping).
+
+  Examples
+  ---------
+  >>> Assume 'test' contains
+  ... dog
+  ... cat
+  ... bird
+  >>> vocab, rev_vocab = tl.nlp.initialize_vocabulary("test")
+  >>> print(vocab)
+  >>> {b'cat': 1, b'dog': 0, b'bird': 2}
+  >>> print(rev_vocab)
+  >>> [b'dog', b'cat', b'bird']
+
+  Raises
+  -------
+  ValueError : if the provided vocabulary_path does not exist.
+  """
+  if gfile.Exists(vocabulary_path):
+    rev_vocab = []
+    with gfile.GFile(vocabulary_path, mode="rb") as f:
+      rev_vocab.extend(f.readlines())
+    rev_vocab = [line.strip() for line in rev_vocab]
+    vocab = dict([(x, y) for (y, x) in enumerate(rev_vocab)])
+    return vocab, rev_vocab
+  else:
+    raise ValueError("Vocabulary file %s not found.", vocabulary_path)
+
+def sentence_to_token_ids(sentence, vocabulary,
+                          tokenizer=None, normalize_digits=True,
+                          UNK_ID=3, _DIGIT_RE=re.compile(br"\d")):
+  """Convert a string to list of integers representing token-ids.
+
+  For example, a sentence "I have a dog" may become tokenized into
+  ["I", "have", "a", "dog"] and with vocabulary {"I": 1, "have": 2,
+  "a": 4, "dog": 7"} this function will return [1, 2, 4, 7].
+
+  Parameters
+  -----------
+  sentence :  tensorflow.python.platform.gfile.GFile Object
+        The sentence in bytes format to convert to token-ids.\n
+        see basic_tokenizer(), data_to_token_ids()
+  vocabulary : a dictionary mapping tokens to integers.
+  tokenizer : a function to use to tokenize each sentence;
+        If None, basic_tokenizer will be used.
+  normalize_digits : Boolean
+        If true, all digits are replaced by 0s.
+
+  Returns
+  --------
+  A list of integers, the token-ids for the sentence.
+  """
+
+  if tokenizer:
+    words = tokenizer(sentence)
+  else:
+    words = basic_tokenizer(sentence)
+  if not normalize_digits:
+    return [vocabulary.get(w, UNK_ID) for w in words]
+  # Normalize digits by 0 before looking words up in the vocabulary.
+  return [vocabulary.get(re.sub(_DIGIT_RE, b"0", w), UNK_ID) for w in words]
+
+def data_to_token_ids(data_path, target_path, vocabulary_path,
+                      tokenizer=None, normalize_digits=True,
+                      UNK_ID=3, _DIGIT_RE=re.compile(br"\d")):
+  """Tokenize data file and turn into token-ids using given vocabulary file.
+
+  This function loads data line-by-line from data_path, calls the above
+  sentence_to_token_ids, and saves the result to target_path. See comment
+  for sentence_to_token_ids on the details of token-ids format.
+
+  Parameters
+  -----------
+  data_path : path to the data file in one-sentence-per-line format.
+  target_path : path where the file with token-ids will be created.
+  vocabulary_path : path to the vocabulary file.
+  tokenizer : a function to use to tokenize each sentence;
+      if None, basic_tokenizer will be used.
+  normalize_digits : Boolean; if true, all digits are replaced by 0s.
+
+  References
+  ----------
+  - Code from ``/tensorflow/models/rnn/translation/data_utils.py``
+  """
+  if not gfile.Exists(target_path):
+    print("Tokenizing data in %s" % data_path)
+    vocab, _ = initialize_vocabulary(vocabulary_path)
+    with gfile.GFile(data_path, mode="rb") as data_file:
+      with gfile.GFile(target_path, mode="w") as tokens_file:
+        counter = 0
+        for line in data_file:
+          counter += 1
+          if counter % 100000 == 0:
+            print("  tokenizing line %d" % counter)
+          token_ids = sentence_to_token_ids(line, vocab, tokenizer,
+                                            normalize_digits, UNK_ID=UNK_ID,
+                                            _DIGIT_RE=_DIGIT_RE)
+          tokens_file.write(" ".join([str(tok) for tok in token_ids]) + "\n")
+  else:
+    print("Target path %s exists" % target_path)
diff --git a/tensorlayer1.2.2/ops.py b/tensorlayer1.2.2/ops.py
new file mode 100755
index 0000000..c884ded
--- /dev/null
+++ b/tensorlayer1.2.2/ops.py
@@ -0,0 +1,174 @@
+#! /usr/bin/python
+# -*- coding: utf8 -*-
+
+
+
+
+import tensorflow as tf
+import os
+import sys
+from sys import platform as _platform
+from .layers import set_keep
+
+
+def exit_tf(sess=None):
+    """Close tensorboard and nvidia-process if available
+
+    Parameters
+    ----------
+    sess : a session instance of TensorFlow
+        TensorFlow session
+    """
+    text = "Close tensorboard and nvidia-process if available"
+    sess.close()
+    # import time
+    # time.sleep(2)
+    if _platform == "linux" or _platform == "linux2":
+        print('linux: %s' % text)
+        os.system('nvidia-smi')
+        os.system('fuser 6006/tcp -k')  # kill tensorboard 6006
+        os.system("nvidia-smi | grep python |awk '{print $3}'|xargs kill") # kill all nvidia-smi python process
+    elif _platform == "darwin":
+        print('OS X: %s' % text)
+        os.system("lsof -i tcp:6006 | grep -v PID | awk '{print $2}' | xargs kill") # kill tensorboard 6006
+    elif _platform == "win32":
+        print('Windows: %s' % text)
+    else:
+        print(_platform)
+    exit()
+
+def clear_all(printable=True):
+    """Clears all the placeholder variables of keep prob,
+    including keeping probabilities of all dropout, denoising, dropconnect etc.
+
+    Parameters
+    ----------
+    printable : boolean
+        If True, print all deleted variables.
+    """
+    print('clear all .....................................')
+    gl = globals().copy()
+    for var in gl:
+        if var[0] == '_': continue
+        if 'func' in str(globals()[var]): continue
+        if 'module' in str(globals()[var]): continue
+        if 'class' in str(globals()[var]): continue
+
+        if printable:
+            print(" clear_all ------- %s" % str(globals()[var]))
+
+        del globals()[var]
+
+# def clear_all2(vars, printable=True):
+#     """
+#     The :function:`clear_all()` Clears all the placeholder variables of keep prob,
+#     including keeping probabilities of all dropout, denoising, dropconnect
+#     Parameters
+#     ----------
+#     printable : if True, print all deleted variables.
+#     """
+#     print('clear all .....................................')
+#     for var in vars:
+#         if var[0] == '_': continue
+#         if 'func' in str(var): continue
+#         if 'module' in str(var): continue
+#         if 'class' in str(var): continue
+#
+#         if printable:
+#             print(" clear_all ------- %s" % str(var))
+#
+#         del var
+
+def set_gpu_fraction(sess=None, gpu_fraction=0.3):
+    """Set the GPU memory fraction for the application.
+
+    Parameters
+    ----------
+    sess : a session instance of TensorFlow
+        TensorFlow session
+    gpu_fraction : a float
+        Fraction of GPU memory, (0 ~ 1]
+
+    References
+    ----------
+    - `TensorFlow using GPU <https://www.tensorflow.org/versions/r0.9/how_tos/using_gpu/index.html>`_
+    """
+    print("  tensorlayer: GPU MEM Fraction %f" % gpu_fraction)
+    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_fraction)
+    sess = tf.Session(config = tf.ConfigProto(gpu_options = gpu_options))
+    return sess
+
+
+
+
+
+def disable_print():
+    """Disable console output.
+
+    Examples
+    ---------
+    >>> print("You can see me")
+    >>> tl.ops.disable_print()
+    >>> print(" You can't see me")
+    >>> tl.ops.enable_print()
+    >>> print("You can see me")
+    """
+    # sys.stdout = os.devnull   # this one kill the process
+    sys.stdout = None
+    sys.stderr = os.devnull
+
+def enable_print():
+    """Enable console output.
+
+    Examples
+    --------
+    - see tl.ops.disable_print()
+    """
+    sys.stdout = sys.__stdout__
+    sys.stderr = sys.__stderr__
+
+
+class temporary_disable_print:
+    """Temporarily disable console output.
+
+    Examples
+    ---------
+    >>> print("You can see me")
+    >>> with tl.ops.temporary_disable_print() as t:
+    >>>     print("You can't see me")
+    >>> print("You can see me")
+    """
+    def __init__(self):
+        pass
+    def __enter__(self):
+        sys.stdout = None
+        sys.stderr = os.devnull
+    def __exit__(self, type, value, traceback):
+        sys.stdout = sys.__stdout__
+        sys.stderr = sys.__stderr__
+        return isinstance(value, TypeError)
+
+
+
+
+
+def get_site_packages_directory():
+    """Print and return the site-packages directory.
+
+    Examples
+    ---------
+    >>> loc = tl.ops.get_site_packages_directory()
+    """
+    import site
+    try:
+        loc = site.getsitepackages()
+        print("  tl.ops : site-packages in ", loc)
+        return loc
+    except:
+        print("  tl.ops : Cannot find package dir from virtual environment")
+        return False
+
+
+
+
+#
diff --git a/tensorlayer1.2.2/prepro.py b/tensorlayer1.2.2/prepro.py
new file mode 100755
index 0000000..fb1bd7e
--- /dev/null
+++ b/tensorlayer1.2.2/prepro.py
@@ -0,0 +1,168 @@
+#! /usr/bin/python
+# -*- coding: utf8 -*-
+
+
+import tensorflow as tf
+import tensorlayer as tl
+import numpy as np
+import time
+import numbers
+
+
+def distorted_images(images=None, height=24, width=24):
+    """Distort images for generating more training data.
+
+    Features
+    ---------
+    They are cropped to height * width pixels randomly.
+
+    They are approximately whitened to make the model insensitive to dynamic range.
+
+    Randomly flip the image from left to right.
+
+    Randomly distort the image brightness.
+
+    Randomly distort the image contrast.
+
+    Whiten (Normalize) the images.
+
+    Parameters
+    ----------
+    images : 4D Tensor
+        The tensor or placeholder of images
+    height : int
+        The height for random crop.
+    width : int
+        The width for random crop.
+
+    Returns
+    -------
+    result : tuple of Tensor
+        (Tensor for distorted images, Tensor for while loop index)
+
+    Examples
+    --------
+    >>> X_train, y_train, X_test, y_test = tl.files.load_cifar10_dataset(shape=(-1, 32, 32, 3), plotable=False)
+    >>> sess = tf.InteractiveSession()
+    >>> batch_size = 128
+    >>> x = tf.placeholder(tf.float32, shape=[batch_size, 32, 32, 3])
+    >>> distorted_images_op = tl.preprocess.distorted_images(images=x, height=24, width=24)
+    >>> sess.run(tf.initialize_all_variables())
+    >>> feed_dict={x: X_train[0:batch_size,:,:,:]}
+    >>> distorted_images, idx = sess.run(distorted_images_op, feed_dict=feed_dict)
+    >>> tl.visualize.images2d(X_train[0:9,:,:,:], second=2, saveable=False, name='cifar10', dtype=np.uint8, fig_idx=20212)
+    >>> tl.visualize.images2d(distorted_images[1:10,:,:,:], second=10, saveable=False, name='distorted_images', dtype=None, fig_idx=23012)
+
+    Notes
+    ------
+    - The first image in 'distorted_images' should be removed.
+
+    References
+    -----------
+    - `tensorflow.models.image.cifar10.cifar10_input <https://github.com/tensorflow/tensorflow/blob/r0.9/tensorflow/models/image/cifar10/cifar10_input.py>`_
+    """
+    print(" [Warning] distorted_images will be deprecated due to speed, see TFRecord tutorial for more info...")
+    try:
+        batch_size = int(images._shape[0])
+    except:
+        raise Exception('unknow batch_size of images')
+    distorted_x = tf.Variable(tf.constant(0.1, shape=[1, height, width, 3]))
+    i = tf.Variable(tf.constant(0))
+
+    c = lambda distorted_x, i: tf.less(i, batch_size)
+
+    def body(distorted_x, i):
+        # 1. Randomly crop a [height, width] section of the image.
+        image = tf.random_crop(tf.gather(images, i), [height, width, 3])
+        # 2. Randomly flip the image horizontally.
+        image = tf.image.random_flip_left_right(image)
+        # 3. Randomly change brightness.
+        image = tf.image.random_brightness(image, max_delta=63)
+        # 4. Randomly change contrast.
+        image = tf.image.random_contrast(image, lower=0.2, upper=1.8)
+        # 5. Subtract off the mean and divide by the variance of the pixels.
+        image = tf.image.per_image_whitening(image)
+        # 6. Append the image to a batch.
+        image = tf.expand_dims(image, 0)
+        return tf.concat(0, [distorted_x, image]), tf.add(i, 1)
+
+    result = tf.while_loop(cond=c, body=body, loop_vars=(distorted_x, i), parallel_iterations=16)
+    return result
+
+
+def crop_central_whiten_images(images=None, height=24, width=24):
+    """Crop the central of image, and normailize it for test data.
+
+    They are cropped to central of height * width pixels.
+
+    Whiten (Normalize) the images.
+
+    Parameters
+    ----------
+    images : 4D Tensor
+        The tensor or placeholder of images
+    height : int
+        The height for central crop.
+    width: int
+        The width for central crop.
+
+    Returns
+    -------
+    result : tuple Tensor
+        (Tensor for distorted images, Tensor for while loop index)
+
+    Examples
+    --------
+    >>> X_train, y_train, X_test, y_test = tl.files.load_cifar10_dataset(shape=(-1, 32, 32, 3), plotable=False)
+    >>> sess = tf.InteractiveSession()
+    >>> batch_size = 128
+    >>> x = tf.placeholder(tf.float32, shape=[batch_size, 32, 32, 3])
+    >>> central_images_op = tl.preprocess.crop_central_whiten_images(images=x, height=24, width=24)
+    >>> sess.run(tf.initialize_all_variables())
+    >>> feed_dict={x: X_train[0:batch_size,:,:,:]}
+    >>> central_images, idx = sess.run(central_images_op, feed_dict=feed_dict)
+    >>> tl.visualize.images2d(X_train[0:9,:,:,:], second=2, saveable=False, name='cifar10', dtype=np.uint8, fig_idx=20212)
+    >>> tl.visualize.images2d(central_images[1:10,:,:,:], second=10, saveable=False, name='central_images', dtype=None, fig_idx=23012)
+
+    Notes
+    ------
+    The first image in 'central_images' should be removed.
+
+    Code References
+    ----------------
+    - ``tensorflow.models.image.cifar10.cifar10_input``
+    """
+    print(" [Warning] crop_central_whiten_images will be deprecated due to speed, see TFRecord tutorial for more info...")
+    try:
+        batch_size = int(images._shape[0])
+    except:
+        raise Exception('unknow batch_size of images')
+    central_x = tf.Variable(tf.constant(0.1, shape=[1, height, width, 3]))
+    i = tf.Variable(tf.constant(0))
+
+    c = lambda central_x, i: tf.less(i, batch_size)
+
+    def body(central_x, i):
+        # 1. Crop the central [height, width] of the image.
+        image = tf.image.resize_image_with_crop_or_pad(tf.gather(images, i), height, width)
+        # 2. Subtract off the mean and divide by the variance of the pixels.
+        image = tf.image.per_image_whitening(image)
+        # 5. Append the image to a batch.
+        image = tf.expand_dims(image, 0)
+        return tf.concat(0, [central_x, image]), tf.add(i, 1)
+
+    result = tf.while_loop(cond=c, body=body, loop_vars=(central_x, i), parallel_iterations=16)
+    return result
+
+
+
+
+
+
+
+
+
+
+
+
+#
diff --git a/tensorlayer1.2.2/rein.py b/tensorlayer1.2.2/rein.py
new file mode 100755
index 0000000..b2e7be4
--- /dev/null
+++ b/tensorlayer1.2.2/rein.py
@@ -0,0 +1,68 @@
+#! /usr/bin/python
+# -*- coding: utf8 -*-
+
+
+
+import tensorflow as tf
+import numpy as np
+from six.moves import xrange
+
+def discount_episode_rewards(rewards=[], gamma=0.99):
+    """ Take 1D float array of rewards and compute discounted rewards for an
+    episode. When encount a non-zero value, consider as the end a of an episode.
+
+    Parameters
+    ----------
+    rewards : numpy list
+        a list of rewards
+    gamma : float
+        discounted factor
+
+    Examples
+    ----------
+    >>> rewards = np.asarray([0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1])
+    >>> gamma = 0.9
+    >>> discount_rewards = tl.rein.discount_episode_rewards(rewards, gamma)
+    >>> print(discount_rewards)
+    ... [ 0.72899997  0.81        0.89999998  1.          0.72899997  0.81
+    ... 0.89999998  1.          0.72899997  0.81        0.89999998  1.        ]
+    """
+    discounted_r = np.zeros_like(rewards, dtype=np.float32)
+    running_add = 0
+    for t in reversed(xrange(0, rewards.size)):
+        if rewards[t] != 0: running_add = 0
+
+        running_add = running_add * gamma + rewards[t]
+        discounted_r[t] = running_add
+    return discounted_r
+
+
+def cross_entropy_reward_loss(logits, actions, rewards):
+    """ Calculate the loss for Policy Gradient Network.
+
+    Parameters
+    ----------
+    logits : tensor
+        The network outputs without softmax. This function implements softmax
+        inside.
+    actions : tensor/ placeholder
+        The agent actions.
+    rewards : tensor/ placeholder
+        The rewards.
+
+    Examples
+    ----------
+    >>> states_batch_pl = tf.placeholder(tf.float32, shape=[None, D])   # observation for training
+    >>> network = tl.layers.InputLayer(states_batch_pl, name='input_layer')
+    >>> network = tl.layers.DenseLayer(network, n_units=H, act = tf.nn.relu, name='relu1')
+    >>> network = tl.layers.DenseLayer(network, n_units=3, act = tl.activation.identity, name='output_layer')
+    >>> probs = network.outputs
+    >>> sampling_prob = tf.nn.softmax(probs)
+    >>> actions_batch_pl = tf.placeholder(tf.int32, shape=[None])
+    >>> discount_rewards_batch_pl = tf.placeholder(tf.float32, shape=[None])
+    >>> loss = cross_entropy_reward_loss(probs, actions_batch_pl, discount_rewards_batch_pl)
+    >>> train_op = tf.train.RMSPropOptimizer(learning_rate, decay_rate).minimize(loss)
+    """
+    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, actions)
+    loss = tf.reduce_sum(tf.mul(cross_entropy, rewards))   # element-wise mul
+    return loss
diff --git a/tensorlayer1.2.2/utils.py b/tensorlayer1.2.2/utils.py
new file mode 100755
index 0000000..cbe2091
--- /dev/null
+++ b/tensorlayer1.2.2/utils.py
@@ -0,0 +1,425 @@
+#! /usr/bin/python
+# -*- coding: utf8 -*-
+import tensorflow as tf
+from . import iterate
+import numpy as np
+import time
+
+
+def fit(sess, network, train_op, cost, X_train, y_train, x, y_, acc=None, batch_size=100, n_epoch=100, print_freq=5, X_val=None, y_val=None, eval_train=True):
+    """
+    Traing a given non time-series network by the given cost function, training data, batch_size, n_epoch etc.
+
+    Parameters
+    ----------
+    sess : TensorFlow session
+        sess = tf.InteractiveSession()
+    network : a TensorLayer layer
+        the network will be trained
+    train_op : a TensorFlow optimizer
+        like tf.train.AdamOptimizer
+    X_train : numpy array
+        the input of training data
+    y_train : numpy array
+        the target of training data
+    x : placeholder
+        for inputs
+    y_ : placeholder
+        for targets
+    acc : the TensorFlow expression of accuracy (or other metric) or None
+        if None, would not display the metric
+    batch_size : int
+        batch size for training and evaluating
+    n_epoch : int
+        the number of training epochs
+    print_freq : int
+        display the training information every ``print_freq`` epochs
+    X_val : numpy array or None
+        the input of validation data
+    y_val : numpy array or None
+        the target of validation data
+    eval_train : boolen
+        if X_val and y_val are not None, it refects whether to evaluate the training data
+
+    Examples
+    --------
+    >>> see tutorial_mnist_simple.py
+    >>> tl.utils.fit(sess, network, train_op, cost, X_train, y_train, x, y_,
+    ...            acc=acc, batch_size=500, n_epoch=200, print_freq=5,
+    ...            X_val=X_val, y_val=y_val, eval_train=False)
+    """
+    assert X_train.shape[0] >= batch_size, "Number of training examples should be bigger than the batch size"
+    print("Start training the network ...")
+    start_time_begin = time.time()
+    for epoch in range(n_epoch):
+        start_time = time.time()
+        loss_ep = 0; n_step = 0
+        for X_train_a, y_train_a in iterate.minibatches(X_train, y_train,
+                                                    batch_size, shuffle=True):
+            feed_dict = {x: X_train_a, y_: y_train_a}
+            feed_dict.update( network.all_drop )    # enable noise layers
+            loss, _ = sess.run([cost, train_op], feed_dict=feed_dict)
+            loss_ep += loss
+            n_step += 1
+        loss_ep = loss_ep/ n_step
+
+        if epoch + 1 == 1 or (epoch + 1) % print_freq == 0:
+            if (X_val is not None) and (y_val is not None):
+                print("Epoch %d of %d took %fs" % (epoch + 1, n_epoch, time.time() - start_time))
+                if eval_train is True:
+                    train_loss, train_acc, n_batch = 0, 0, 0
+                    for X_train_a, y_train_a in iterate.minibatches(
+                                            X_train, y_train, batch_size, shuffle=True):
+                        dp_dict = dict_to_one( network.all_drop )    # disable noise layers
+                        feed_dict = {x: X_train_a, y_: y_train_a}
+                        feed_dict.update(dp_dict)
+                        if acc is not None:
+                            err, ac = sess.run([cost, acc], feed_dict=feed_dict)
+                            train_acc += ac
+                        else:
+                            err = sess.run(cost, feed_dict=feed_dict)
+                        train_loss += err;  n_batch += 1
+                    print("   train loss: %f" % (train_loss/ n_batch))
+                    if acc is not None:
+                        print("   train acc: %f" % (train_acc/ n_batch))
+                val_loss, val_acc, n_batch = 0, 0, 0
+                for X_val_a, y_val_a in iterate.minibatches(
+                                            X_val, y_val, batch_size, shuffle=True):
+                    dp_dict = dict_to_one( network.all_drop )    # disable noise layers
+                    feed_dict = {x: X_val_a, y_: y_val_a}
+                    feed_dict.update(dp_dict)
+                    if acc is not None:
+                        err, ac = sess.run([cost, acc], feed_dict=feed_dict)
+                        val_acc += ac
+                    else:
+                        err = sess.run(cost, feed_dict=feed_dict)
+                    val_loss += err; n_batch += 1
+                print("   val loss: %f" % (val_loss/ n_batch))
+                if acc is not None:
+                    print("   val acc: %f" % (val_acc/ n_batch))
+            else:
+                print("Epoch %d of %d took %fs, loss %f" % (epoch + 1, n_epoch, time.time() - start_time, loss_ep))
+    print("Total training time: %fs" % (time.time() - start_time_begin))
+
+
+def test(sess, network, acc, X_test, y_test, x, y_, batch_size, cost=None):
+    """
+    Test a given non time-series network by the given test data and metric.
+
+    Parameters
+    ----------
+    sess : TensorFlow session
+        sess = tf.InteractiveSession()
+    network : a TensorLayer layer
+        the network will be trained
+    acc : the TensorFlow expression of accuracy (or other metric) or None
+        if None, would not display the metric
+    X_test : numpy array
+        the input of test data
+    y_test : numpy array
+        the target of test data
+    x : placeholder
+        for inputs
+    y_ : placeholder
+        for targets
+    batch_size : int or None
+        batch size for testing, when dataset is large, we should use minibatche for testing.
+        when dataset is small, we can set it to None.
+    cost : the TensorFlow expression of cost or None
+        if None, would not display the cost
+
+    Examples
+    --------
+    >>> see tutorial_mnist_simple.py
+    >>> tl.utils.test(sess, network, acc, X_test, y_test, x, y_, batch_size=None, cost=cost)
+    """
+    print('Start testing the network ...')
+    if batch_size is None:
+        dp_dict = dict_to_one( network.all_drop )
+        feed_dict = {x: X_test, y_: y_test}
+        feed_dict.update(dp_dict)
+        if cost is not None:
+            print("   test loss: %f" % sess.run(cost, feed_dict=feed_dict))
+        print("   test acc: %f" % sess.run(acc, feed_dict=feed_dict))
+            # print("   test acc: %f" % np.mean(y_test == sess.run(y_op,
+            #                                           feed_dict=feed_dict)))
+    else:
+        test_loss, test_acc, n_batch = 0, 0, 0
+        for X_test_a, y_test_a in iterate.minibatches(
+                                    X_test, y_test, batch_size, shuffle=True):
+            dp_dict = dict_to_one( network.all_drop )    # disable noise layers
+            feed_dict = {x: X_test_a, y_: y_test_a}
+            feed_dict.update(dp_dict)
+            if cost is not None:
+                err, ac = sess.run([cost, acc], feed_dict=feed_dict)
+                val_loss += err
+            else:
+                ac = sess.run(acc, feed_dict=feed_dict)
+            test_acc += ac; n_batch += 1
+        if cost is not None:
+            print("   test loss: %f" % (test_loss/ n_batch))
+        print("   test acc: %f" % (test_acc/ n_batch))
+
+
+def predict(sess, network, X, x, y_op):
+    """
+    Return the predict results of given non time-series network.
+
+    Parameters
+    ----------
+    sess : TensorFlow session
+        sess = tf.InteractiveSession()
+    network : a TensorLayer layer
+        the network will be trained
+    X : numpy array
+        the input
+    y_op : placeholder
+        the argmax expression of softmax outputs
+
+    Examples
+    --------
+    >>> see tutorial_mnist_simple.py
+    >>> y = network.outputs
+    >>> y_op = tf.argmax(tf.nn.softmax(y), 1)
+    >>> print(tl.utils.predict(sess, network, X_test, x, y_op))
+    """
+    dp_dict = dict_to_one( network.all_drop )    # disable noise layers
+    feed_dict = {x: X,}
+    feed_dict.update(dp_dict)
+    return sess.run(y_op, feed_dict=feed_dict)
+
+## Evaluation
+def evaluation(y_test=None, y_predict=None, n_classes=None):
+    """
+    Input the predicted results, targets results and
+    the number of class, return the confusion matrix, F1-score of each class,
+    accuracy and macro F1-score.
+
+    Parameters
+    ----------
+    y_test : numpy.array or list
+        target results
+    y_predict : numpy.array or list
+        predicted results
+    n_classes : int
+        number of classes
+
+    Examples
+    --------
+    >>> c_mat, f1, acc, f1_macro = evaluation(y_test, y_predict, n_classes)
+    """
+    from sklearn.metrics import confusion_matrix, f1_score, accuracy_score
+    c_mat = confusion_matrix(y_test, y_predict, labels = [x for x in range(n_classes)])
+    f1    = f1_score(y_test, y_predict, average = None, labels = [x for x in range(n_classes)])
+    f1_macro = f1_score(y_test, y_predict, average='macro')
+    acc   = accuracy_score(y_test, y_predict)
+    print('confusion matrix: \n',c_mat)
+    print('f1-score:',f1)
+    print('f1-score(macro):',f1_macro)   # same output with > f1_score(y_true, y_pred, average='macro')
+    print('accuracy-score:', acc)
+    return c_mat, f1, acc, f1_macro
+
+def dict_to_one(dp_dict={}):
+    """
+    Input a dictionary, return a dictionary that all items are set to one,
+    use for disable dropout, dropconnect layer and so on.
+
+    Parameters
+    ----------
+    dp_dict : dictionary
+        keeping probabilities
+
+    Examples
+    --------
+    >>> dp_dict = dict_to_one( network.all_drop )
+    >>> dp_dict = dict_to_one( network.all_drop )
+    >>> feed_dict.update(dp_dict)
+    """
+    return {x: 1 for x in dp_dict}
+
+def flatten_list(list_of_list=[[],[]]):
+    """
+    Input a list of list, return a list that all items are in a list
+
+    Parameters
+    ----------
+    list_of_list : a list of list
+
+    Examples
+    --------
+    >>> tl.utils.flatten_list([[1, 2, 3],[4, 5],[6]])
+    ... [1, 2, 3, 4, 5, 6]
+    """
+    return sum(list_of_list, [])
+
+
+def class_balancing_oversample(X_train=None, y_train=None, printable=True):
+    """Input the features and labels, return the features and labels after oversampling.
+
+    Parameters
+    ----------
+    X_train : numpy.array
+        Features, each row is an example
+    y_train : numpy.array
+        Labels
+
+    Examples
+    --------
+    >>> X_train, y_train = class_balancing_oversample(X_train, y_train, printable=True)
+    """
+    # ======== Classes balancing
+    if printable:
+        print("Classes balancing for training examples...")
+    from collections import Counter
+    c = Counter(y_train)
+    if printable:
+        print('the occurrence number of each stage: %s' % c.most_common())
+        print('the least stage is Label %s have %s instances' % c.most_common()[-1])
+        print('the most stage is  Label %s have %s instances' % c.most_common(1)[0])
+    most_num = c.most_common(1)[0][1]
+    if printable:
+        print('most num is %d, all classes tend to be this num' % most_num)
+
+    locations = {}
+    number = {}
+
+    for lab, num in c.most_common():    # find the index from y_train
+        number[lab] = num
+        locations[lab] = np.where(np.array(y_train)==lab)[0]
+    if printable:
+        print('convert list(np.array) to dict format')
+    X = {}  # convert list to dict
+    for lab, num in number.items():
+        X[lab] = X_train[locations[lab]]
+
+    # oversampling
+    if printable:
+        print('start oversampling')
+    for key in X:
+        temp = X[key]
+        while True:
+            if len(X[key]) >= most_num:
+                break
+            X[key] = np.vstack((X[key], temp))
+    if printable:
+        print('first features of label 0 >', len(X[0][0]))
+        print('the occurrence num of each stage after oversampling')
+    for key in X:
+        print(key, len(X[key]))
+    if printable:
+        print('make each stage have same num of instances')
+    for key in X:
+        X[key] = X[key][0:most_num,:]
+        print(key, len(X[key]))
+
+    # convert dict to list
+    if printable:
+        print('convert from dict to list format')
+    y_train = []
+    X_train = np.empty(shape=(0,len(X[0][0])))
+    for key in X:
+        X_train = np.vstack( (X_train, X[key] ) )
+        y_train.extend([key for i in range(len(X[key]))])
+    # print(len(X_train), len(y_train))
+    c = Counter(y_train)
+    if printable:
+        print('the occurrence number of each stage after oversampling: %s' % c.most_common())
+    # ================ End of Classes balancing
+    return X_train, y_train
+
+
+
+
+#
+# def class_balancing_sequence_4D(X_train, y_train, sequence_length, model='downsampling' ,printable=True):
+#     ''' 输入、输出都是sequence format
+#         oversampling or downsampling
+#     '''
+#     n_features = X_train.shape[2]
+#     # ======== Classes balancing for sequence
+#     if printable:
+#         print("Classes balancing for 4D sequence training examples...")
+#     from collections import Counter
+#     c = Counter(y_train)    # Counter({2: 454, 4: 267, 3: 124, 1: 57, 0: 48})
+#     if printable:
+#         print('the occurrence number of each stage: %s' % c.most_common())
+#         print('the least Label %s have %s instances' % c.most_common()[-1])
+#         print('the most  Label %s have %s instances' % c.most_common(1)[0])
+#     # print(c.most_common()) # [(2, 454), (4, 267), (3, 124), (1, 57), (0, 48)]
+#     most_num = c.most_common(1)[0][1]
+#     less_num = c.most_common()[-1][1]
+#
+#     locations = {}
+#     number = {}
+#     for lab, num in c.most_common():
+#         number[lab] = num
+#         locations[lab] = np.where(np.array(y_train)==lab)[0]
+#     # print(locations)
+#     # print(number)
+#     if printable:
+#         print('  convert list to dict')
+#     X = {}  # convert list to dict
+#     ### a sequence
+#     for lab, _ in number.items():
+#         X[lab] = np.empty(shape=(0,1,n_features,1)) # 4D
+#     for lab, _ in number.items():
+#         #X[lab] = X_train[locations[lab]
+#         for l in locations[lab]:
+#             X[lab] = np.vstack((X[lab], X_train[l*sequence_length : (l+1)*(sequence_length)]))
+#         # X[lab] = X_train[locations[lab]*sequence_length : locations[lab]*(sequence_length+1)]    # a sequence
+#     # print(X)
+#
+#     if model=='oversampling':
+#         if printable:
+#             print('  oversampling -- most num is %d, all classes tend to be this num\nshuffle applied' % most_num)
+#         for key in X:
+#             temp = X[key]
+#             while True:
+#                 if len(X[key]) >= most_num * sequence_length:   # sequence
+#                     break
+#                 X[key] = np.vstack((X[key], temp))
+#             # print(key, len(X[key]))
+#         if printable:
+#             print('  make each stage have same num of instances')
+#         for key in X:
+#             X[key] = X[key][0:most_num*sequence_length,:]   # sequence
+#             if printable:
+#                 print(key, len(X[key]))
+#     elif model=='downsampling':
+#         import random
+#         if printable:
+#             print('  downsampling -- less num is %d, all classes tend to be this num by randomly choice without replacement\nshuffle applied' % less_num)
+#         for key in X:
+#             # print(key, len(X[key]))#, len(X[key])/sequence_length)
+#             s_idx = [ i for i in range(int(len(X[key])/sequence_length))]
+#             s_idx = np.asarray(s_idx)*sequence_length   # start index of sequnce in X[key]
+#             # print('s_idx',s_idx)
+#             r_idx = np.random.choice(s_idx, less_num, replace=False)    # random choice less_num of s_idx
+#             # print('r_idx',r_idx)
+#             temp = X[key]
+#             X[key] = np.empty(shape=(0,1,n_features,1)) # 4D
+#             for idx in r_idx:
+#                 X[key] = np.vstack((X[key], temp[idx:idx+sequence_length]))
+#             # print(key, X[key])
+#             # np.random.choice(l, len(l), replace=False)
+#     else:
+#         raise Exception('  model should be oversampling or downsampling')
+#
+#     # convert dict to list
+#     if printable:
+#         print('  convert dict to list')
+#     y_train = []
+#     # X_train = np.empty(shape=(0,len(X[0][0])))
+#     # X_train = np.empty(shape=(0,len(X[1][0])))    # 2D
+#     X_train = np.empty(shape=(0,1,n_features,1))    # 4D
+#     l_key = list(X.keys())  # shuffle
+#     random.shuffle(l_key)   # shuffle
+#     # for key in X:     # no shuffle
+#     for key in l_key:   # shuffle
+#         X_train = np.vstack( (X_train, X[key] ) )
+#         # print(len(X[key]))
+#         y_train.extend([key for i in range(int(len(X[key])/sequence_length))])
+#     # print(X_train,y_train, type(X_train), type(y_train))
+#     # ================ End of Classes balancing for sequence
+#     # print(X_train.shape, len(y_train))
+#     return X_train, np.asarray(y_train)
diff --git a/tensorlayer1.2.2/visualize.py b/tensorlayer1.2.2/visualize.py
new file mode 100755
index 0000000..10de24b
--- /dev/null
+++ b/tensorlayer1.2.2/visualize.py
@@ -0,0 +1,300 @@
+#! /usr/bin/python
+# -*- coding: utf8 -*-
+
+
+import matplotlib
+matplotlib.use('Agg')
+import matplotlib.pyplot as plt
+# import matplotlib.pyplot as plt
+import numpy as np
+import os
+
+
+
+def W(W=None, second=10, saveable=True, shape=[28,28], name='mnist', fig_idx=2396512):
+    """Visualize every columns of the weight matrix to a group of Greyscale img.
+
+    Parameters
+    ----------
+    W : numpy.array
+        The weight matrix
+    second : int
+        The display second(s) for the image(s), if saveable is False.
+    saveable : boolen
+        Save or plot the figure.
+    shape : a list with 2 int
+        The shape of feature image, MNIST is [28, 80].
+    name : a string
+        A name to save the image, if saveable is True.
+    fig_idx : int
+        matplotlib figure index.
+
+    Examples
+    --------
+    >>> tl.visualize.W(network.all_params[0].eval(), second=10, saveable=True, name='weight_of_1st_layer', fig_idx=2012)
+    """
+    if saveable is False:
+        plt.ion()
+    fig = plt.figure(fig_idx)      # show all feature images
+    size = W.shape[0]
+    n_units = W.shape[1]
+
+    num_r = int(np.sqrt(n_units))  # 每行显示的个数   若25个hidden unit -> 每行显示5个
+    num_c = int(np.ceil(n_units/num_r))
+    count = int(1)
+    for row in range(1, num_r+1):
+        for col in range(1, num_c+1):
+            if count > n_units:
+                break
+            a = fig.add_subplot(num_r, num_c, count)
+            # ------------------------------------------------------------
+            # plt.imshow(np.reshape(W[:,count-1],(28,28)), cmap='gray')
+            # ------------------------------------------------------------
+            feature = W[:,count-1] / np.sqrt( (W[:,count-1]**2).sum())
+            # feature[feature<0.0001] = 0   # value threshold
+            # if count == 1 or count == 2:
+            #     print(np.mean(feature))
+            # if np.std(feature) < 0.03:      # condition threshold
+            #     feature = np.zeros_like(feature)
+            # if np.mean(feature) < -0.015:      # condition threshold
+            #     feature = np.zeros_like(feature)
+            plt.imshow(np.reshape(feature ,(shape[0],shape[1])),
+                    cmap='gray', interpolation="nearest")#, vmin=np.min(feature), vmax=np.max(feature))
+            # ------------------------------------------------------------
+            # plt.imshow(np.reshape(W[:,count-1] ,(np.sqrt(size),np.sqrt(size))), cmap='gray', interpolation="nearest")
+            plt.gca().xaxis.set_major_locator(plt.NullLocator())    # distable tick
+            plt.gca().yaxis.set_major_locator(plt.NullLocator())
+            count = count + 1
+    if saveable:
+        plt.savefig(name+'.pdf',format='pdf')
+    else:
+        plt.draw()
+        plt.pause(second)
+
+def frame(I=None, second=5, saveable=True, name='frame', fig_idx=12836):
+    """Display a frame(image). Make sure OpenAI Gym render() is disable before using it.
+
+    Parameters
+    ----------
+    I : numpy.array
+        The image
+    second : int
+        The display second(s) for the image(s), if saveable is False.
+    saveable : boolen
+        Save or plot the figure.
+    name : a string
+        A name to save the image, if saveable is True.
+    fig_idx : int
+        matplotlib figure index.
+
+    Examples
+    --------
+    >>> env = gym.make("Pong-v0")
+    >>> observation = env.reset()
+    >>> tl.visualize.frame(observation)
+    """
+    if saveable is False:
+        plt.ion()
+    fig = plt.figure(fig_idx)      # show all feature images
+
+    plt.imshow(I)
+    # plt.gca().xaxis.set_major_locator(plt.NullLocator())    # distable tick
+    # plt.gca().yaxis.set_major_locator(plt.NullLocator())
+
+    if saveable:
+        plt.savefig(name+'.pdf',format='pdf')
+    else:
+        plt.draw()
+        plt.pause(second)
+
+def CNN2d(CNN=None, second=10, saveable=True, name='cnn', fig_idx=3119362):
+    """Display a group of RGB or Greyscale CNN masks.
+
+    Parameters
+    ----------
+    CNN : numpy.array
+        The image. e.g: 64 5x5 RGB images can be (5, 5, 3, 64).
+    second : int
+        The display second(s) for the image(s), if saveable is False.
+    saveable : boolen
+        Save or plot the figure.
+    name : a string
+        A name to save the image, if saveable is True.
+    fig_idx : int
+        matplotlib figure index.
+
+    Examples
+    --------
+    >>> tl.visualize.CNN2d(network.all_params[0].eval(), second=10, saveable=True, name='cnn1_mnist', fig_idx=2012)
+    """
+    # print(CNN.shape)    # (5, 5, 3, 64)
+    # exit()
+    n_mask = CNN.shape[3]
+    n_row = CNN.shape[0]
+    n_col = CNN.shape[1]
+    n_color = CNN.shape[2]
+    row = int(np.sqrt(n_mask))
+    col = int(np.ceil(n_mask/row))
+    plt.ion()   # active mode
+    fig = plt.figure(fig_idx)
+    count = 1
+    for ir in range(1, row+1):
+        for ic in range(1, col+1):
+            if count > n_mask:
+                break
+            a = fig.add_subplot(col, row, count)
+            # print(CNN[:,:,:,count-1].shape, n_row, n_col)   # (5, 1, 32) 5 5
+            # exit()
+            # plt.imshow(
+            #         np.reshape(CNN[count-1,:,:,:], (n_row, n_col)),
+            #         cmap='gray', interpolation="nearest")     # theano
+            if n_color == 1:
+                plt.imshow(
+                        np.reshape(CNN[:,:,:,count-1], (n_row, n_col)),
+                        cmap='gray', interpolation="nearest")
+            elif n_color == 3:
+                plt.imshow(
+                        np.reshape(CNN[:,:,:,count-1], (n_row, n_col, n_color)),
+                        cmap='gray', interpolation="nearest")
+            else:
+                raise Exception("Unknown n_color")
+            plt.gca().xaxis.set_major_locator(plt.NullLocator())    # distable tick
+            plt.gca().yaxis.set_major_locator(plt.NullLocator())
+            count = count + 1
+    if saveable:
+        plt.savefig(name+'.pdf',format='pdf')
+    else:
+        plt.draw()
+        plt.pause(second)
+
+
+def images2d(images=None, second=10, saveable=True, name='images', dtype=None,
+                                                            fig_idx=3119362):
+    """Display a group of RGB or Greyscale images.
+
+    Parameters
+    ----------
+    images : numpy.array
+        The images.
+    second : int
+        The display second(s) for the image(s), if saveable is False.
+    saveable : boolen
+        Save or plot the figure.
+    name : a string
+        A name to save the image, if saveable is True.
+    dtype : None or numpy data type
+        The data type for displaying the images.
+    fig_idx : int
+        matplotlib figure index.
+
+    Examples
+    --------
+    >>> X_train, y_train, X_test, y_test = tl.files.load_cifar10_dataset(shape=(-1, 32, 32, 3), plotable=False)
+    >>> tl.visualize.images2d(X_train[0:100,:,:,:], second=10, saveable=False, name='cifar10', dtype=np.uint8, fig_idx=20212)
+    """
+    # print(images.shape)    # (50000, 32, 32, 3)
+    # exit()
+    if dtype:
+        images = np.asarray(images, dtype=dtype)
+    n_mask = images.shape[0]
+    n_row = images.shape[1]
+    n_col = images.shape[2]
+    n_color = images.shape[3]
+    row = int(np.sqrt(n_mask))
+    col = int(np.ceil(n_mask/row))
+    plt.ion()   # active mode
+    fig = plt.figure(fig_idx)
+    count = 1
+    for ir in range(1, row+1):
+        for ic in range(1, col+1):
+            if count > n_mask:
+                break
+            a = fig.add_subplot(col, row, count)
+            # print(images[:,:,:,count-1].shape, n_row, n_col)   # (5, 1, 32) 5 5
+            # plt.imshow(
+            #         np.reshape(images[count-1,:,:,:], (n_row, n_col)),
+            #         cmap='gray', interpolation="nearest")     # theano
+            if n_color == 1:
+                plt.imshow(
+                        np.reshape(images[count-1,:,:], (n_row, n_col)),
+                        cmap='gray', interpolation="nearest")
+            elif n_color == 3:
+                plt.imshow(images[count-1,:,:],
+                        cmap='gray', interpolation="nearest")
+            else:
+                raise Exception("Unknown n_color")
+            plt.gca().xaxis.set_major_locator(plt.NullLocator())    # distable tick
+            plt.gca().yaxis.set_major_locator(plt.NullLocator())
+            count = count + 1
+    if saveable:
+        plt.savefig(name+'.pdf',format='pdf')
+    else:
+        plt.draw()
+        plt.pause(second)
+
+def tsne_embedding(embeddings, reverse_dictionary, plot_only=500,
+                        second=5, saveable=False, name='tsne', fig_idx=9862):
+    """Visualize the embeddings by using t-SNE.
+
+    Parameters
+    ----------
+    embeddings : a matrix
+        The images.
+    reverse_dictionary : a dictionary
+        id_to_word, mapping id to unique word.
+    plot_only : int
+        The number of examples to plot, choice the most common words.
+    second : int
+        The display second(s) for the image(s), if saveable is False.
+    saveable : boolen
+        Save or plot the figure.
+    name : a string
+        A name to save the image, if saveable is True.
+    fig_idx : int
+        matplotlib figure index.
+
+    Examples
+    --------
+    >>> see 'tutorial_word2vec_basic.py'
+    >>> final_embeddings = normalized_embeddings.eval()
+    >>> tl.visualize.tsne_embedding(final_embeddings, labels, reverse_dictionary,
+    ...                   plot_only=500, second=5, saveable=False, name='tsne')
+    """
+    def plot_with_labels(low_dim_embs, labels, figsize=(18, 18), second=5,
+                                    saveable=True, name='tsne', fig_idx=9862):
+        assert low_dim_embs.shape[0] >= len(labels), "More labels than embeddings"
+        if saveable is False:
+            plt.ion()
+            plt.figure(fig_idx)
+        plt.figure(figsize=figsize)  #in inches
+        for i, label in enumerate(labels):
+            x, y = low_dim_embs[i,:]
+            plt.scatter(x, y)
+            plt.annotate(label,
+                     xy=(x, y),
+                     xytext=(5, 2),
+                     textcoords='offset points',
+                     ha='right',
+                     va='bottom')
+        if saveable:
+            plt.savefig(name+'.pdf',format='pdf')
+        else:
+            plt.draw()
+            plt.pause(second)
+
+    try:
+        from sklearn.manifold import TSNE
+        import matplotlib.pyplot as plt
+        from six.moves import xrange
+
+        tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000)
+        # plot_only = 500
+        low_dim_embs = tsne.fit_transform(embeddings[:plot_only,:])
+        labels = [reverse_dictionary[i] for i in xrange(plot_only)]
+        plot_with_labels(low_dim_embs, labels, second=second, saveable=saveable, \
+                                                    name=name, fig_idx=fig_idx)
+    except ImportError:
+        print("Please install sklearn and matplotlib to visualize embeddings.")
+
+
+#
diff --git a/train.py b/train.py
index 0c7fa5e..e12b867 100755
--- a/train.py
+++ b/train.py
@@ -18,12 +18,12 @@
 import numpy as np
 from buildmodel import *
 
-DIR = "/home/dsigpu4/Samba/image_captioning"
+DIR = "/home/haodong/Workspace/image_captioning"
 
 ## DIR =========================================================================
 # Directory containing preprocessed MSCOCO data.
 # MSCOCO_DIR = DIR + "/data/mscoco"
-MSCOCO_DIR = "/home/dsigpu4/Samba/im2txt/im2txt/data/mscoco"
+MSCOCO_DIR = "/home/haodong/Workspace/im2txt/im2txt/data/mscoco"
 # Inception v3 checkpoint file.
 INCEPTION_CHECKPOINT = DIR + "/data/inception_v3.ckpt"
 # Directory to save the model.
@@ -84,107 +84,109 @@
 # Build the TensorFlow graph. ==================================================
 g = tf.Graph()
 with g.as_default():
-    with tf.device('/cpu:0'):
-        sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
-        print("tl : Build Show and Tell Model")
-        images, input_seqs, target_seqs, input_mask = Build_Inputs(mode, input_file_pattern)
-        # ## Example of read data
-        # from im2txt.inference_utils import vocabulary
-        # # vocab = vocabulary.Vocabulary(FLAGS.vocab_file)
-        # vocab = vocabulary.Vocabulary('/home/dsigpu4/Samba/im2txt/im2txt/data/mscoco/word_counts.txt')
-        # print('vocab:',[vocab.id_to_word(w) for w in range(100)])
-        # sess = tf.Session()#tf.InteractiveSession()
-        # sess.run(tf.initialize_all_variables())
-        # with tf.Session() as sess:
-        #     sess.run(tf.initialize_all_variables())
-        #     coord = tf.train.Coordinator()
-        #     threads = tf.train.start_queue_runners(sess=sess, coord=coord)
-        #     for i in range(3):  # number of mini-batch (step)
-        #         print("Step %d" % i)
-        #         img_val, caps, tar, mask = sess.run([images, input_seqs, target_seqs, input_mask])
-        #         print(img_val.shape, caps.shape, tar.shape, mask.shape)
-        #         for i in range(len(caps)):    # print all sentence in a batch, Note : the length is Dynamic !
-        #             sentence = [vocab.id_to_word(id) for id in caps[i]]
-        #             print("input_seqs:"+ " ".join(sentence))
-        #             sentence = [vocab.id_to_word(id) for id in tar[i]]
-        #             print("target_seqs:"+ " ".join(sentence))
-        #             print("input_mask: %s" % mask[i])
-        #     coord.request_stop()
-        #     coord.join(threads)
-        #     sess.close()
-        # # ((32, 299, 299, 3), (32, 18), (32, 18), (32, 18))
-        # # input_seqs:<S> a figurine with a plastic witches head is standing in front of a computer keyboard . a
-        # # target_seqs:a figurine with a plastic witches head is standing in front of a computer keyboard . </S> a
-        # # input_mask: [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0]
-        # exit()
-        # ## End of Example of read data
-        with tf.device('/gpu:0'):
-            net_image_embeddings = Build_Image_Embeddings(mode, images, train_inception)
-            net_seq_embeddings = Build_Seq_Embeddings(input_seqs)
-            total_loss, _, _ = Build_Model(mode, net_image_embeddings, net_seq_embeddings, target_seqs, input_mask)
-
-        tvar = tf.all_variables() # or tf.trainable_variables()
-        for idx, v in enumerate(tvar):
-          print("  var {:3}: {:15}   {}".format(idx, str(v.get_shape()), v.name))
-
-        # Sets up the function to restore inception variables from checkpoint.  setup_inception_initializer()
-        inception_variables = tf.get_collection(
-                tf.GraphKeys.VARIABLES, scope="InceptionV3")
-
-        # Sets up the global step Tensor. setup_global_step()
-        print("tl : Sets up the Global Step")
-        global_step = tf.Variable(
-            initial_value=0,
-            dtype=tf.int32,
-            name="global_step",
-            trainable=False,
-            collections=[tf.GraphKeys.GLOBAL_STEP, tf.GraphKeys.VARIABLES])
-
-        # Set up the learning rate.
-        learning_rate_decay_fn = None
-        if train_inception:
-            # when fine-tune
-            learning_rate = tf.constant(train_inception_learning_rate)
-        else:
-            # when don't update inception_v3
-            learning_rate = tf.constant(initial_learning_rate)
-            if learning_rate_decay_factor > 0:
-                num_batches_per_epoch = (num_examples_per_epoch / batch_size)
-                decay_steps = int(num_batches_per_epoch * num_epochs_per_decay)
-            def _learning_rate_decay_fn(learning_rate, global_step):
-                return tf.train.exponential_decay(
-                    learning_rate,
-                    global_step,
-                    decay_steps=decay_steps,
-                    decay_rate=learning_rate_decay_factor,
-                    staircase=True)
-            learning_rate_decay_fn = _learning_rate_decay_fn
-
-        with tf.device('/gpu:0'):
-            # Set up the training ops.
-            train_op = tf.contrib.layers.optimize_loss(
-                    loss=total_loss,
-                    global_step=global_step,
-                    learning_rate=learning_rate,
-                    optimizer=optimizer,
-                    clip_gradients=clip_gradients,
-                    learning_rate_decay_fn=learning_rate_decay_fn)
-
-        # sess = tf.InteractiveSession()
-        # sess.run(tf.initialize_all_variables())
-        if mode != "inference":
-            print("tl : Restore InceptionV3 model from: %s" % inception_checkpoint_file)
-            saver = tf.train.Saver(inception_variables)
-            saver.restore(sess, inception_checkpoint_file)
-            print("tl : Restore the lastest ckpt model from: %s" % train_dir)
-            try:
-                saver = tf.train.Saver()
-                saver.restore(sess, tf.train.latest_checkpoint(train_dir))
-            except Exception:
-                print("     Not ckpt found")
-
-        # Set up the Saver for saving and restoring model checkpoints.
-        saver = tf.train.Saver(max_to_keep=max_checkpoints_to_keep)
+    # with tf.device('/cpu:0'):
+    sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
+    print("tl : Build Show and Tell Model")
+    images, input_seqs, target_seqs, input_mask = Build_Inputs(mode, input_file_pattern)
+    # ## Example of read data
+    # from im2txt.inference_utils import vocabulary
+    # # vocab = vocabulary.Vocabulary(FLAGS.vocab_file)
+    # vocab = vocabulary.Vocabulary('/home/haodong/Workspace/im2txt/im2txt/data/mscoco/word_counts.txt')
+    # print('vocab:',[vocab.id_to_word(w) for w in range(100)])
+    # sess = tf.Session()#tf.InteractiveSession()
+    # sess.run(tf.initialize_all_variables())
+    # with tf.Session() as sess:
+    #     sess.run(tf.initialize_all_variables())
+    #     coord = tf.train.Coordinator()
+    #     threads = tf.train.start_queue_runners(sess=sess, coord=coord)
+    #     for i in range(3):  # number of mini-batch (step)
+    #         print("Step %d" % i)
+    #         img_val, caps, tar, mask = sess.run([images, input_seqs, target_seqs, input_mask])
+    #         print(img_val.shape, caps.shape, tar.shape, mask.shape)
+    #         for i in range(len(caps)):    # print all sentence in a batch, Note : the length is Dynamic !
+    #             sentence = [vocab.id_to_word(id) for id in caps[i]]
+    #             print("input_seqs:"+ " ".join(sentence))
+    #             sentence = [vocab.id_to_word(id) for id in tar[i]]
+    #             print("target_seqs:"+ " ".join(sentence))
+    #             print("input_mask: %s" % mask[i])
+    #     coord.request_stop()
+    #     coord.join(threads)
+    #     sess.close()
+    # # ((32, 299, 299, 3), (32, 18), (32, 18), (32, 18))
+    # # input_seqs:<S> a figurine with a plastic witches head is standing in front of a computer keyboard . a
+    # # target_seqs:a figurine with a plastic witches head is standing in front of a computer keyboard . </S> a
+    # # input_mask: [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0]
+    # exit()
+    # ## End of Example of read data
+    # with tf.device('/gpu:0'):
+    net_image_embeddings = Build_Image_Embeddings(mode, images, train_inception)
+    net_seq_embeddings = Build_Seq_Embeddings(input_seqs)
+    total_loss, _, _, network = Build_Model(mode, net_image_embeddings, net_seq_embeddings, target_seqs, input_mask)
+
+    network.print_layers()
+
+    tvar = tf.all_variables() # or tf.trainable_variables()
+    for idx, v in enumerate(tvar):
+      print("  var {:3}: {:15}   {}".format(idx, str(v.get_shape()), v.name))
+
+    # Sets up the function to restore inception variables from checkpoint.  setup_inception_initializer()
+    inception_variables = tf.get_collection(
+            tf.GraphKeys.VARIABLES, scope="InceptionV3")
+
+    # Sets up the global step Tensor. setup_global_step()
+    print("tl : Sets up the Global Step")
+    global_step = tf.Variable(
+        initial_value=0,
+        dtype=tf.int32,
+        name="global_step",
+        trainable=False,
+        collections=[tf.GraphKeys.GLOBAL_STEP, tf.GraphKeys.VARIABLES])
+
+    # Set up the learning rate.
+    learning_rate_decay_fn = None
+    if train_inception:
+        # when fine-tune
+        learning_rate = tf.constant(train_inception_learning_rate)
+    else:
+        # when don't update inception_v3
+        learning_rate = tf.constant(initial_learning_rate)
+        if learning_rate_decay_factor > 0:
+            num_batches_per_epoch = (num_examples_per_epoch / batch_size)
+            decay_steps = int(num_batches_per_epoch * num_epochs_per_decay)
+        def _learning_rate_decay_fn(learning_rate, global_step):
+            return tf.train.exponential_decay(
+                learning_rate,
+                global_step,
+                decay_steps=decay_steps,
+                decay_rate=learning_rate_decay_factor,
+                staircase=True)
+        learning_rate_decay_fn = _learning_rate_decay_fn
+
+    # with tf.device('/gpu:0'):
+        # Set up the training ops.
+    train_op = tf.contrib.layers.optimize_loss(
+            loss=total_loss,
+            global_step=global_step,
+            learning_rate=learning_rate,
+            optimizer=optimizer,
+            clip_gradients=clip_gradients,
+            learning_rate_decay_fn=learning_rate_decay_fn)
+
+    sess = tf.InteractiveSession()
+    sess.run(tf.initialize_all_variables())
+    if mode != "inference":
+        print("tl : Restore InceptionV3 model from: %s" % inception_checkpoint_file)
+        saver = tf.train.Saver(inception_variables)
+        saver.restore(sess, inception_checkpoint_file)
+        print("tl : Restore the lastest ckpt model from: %s" % train_dir)
+        try:
+            saver = tf.train.Saver()
+            saver.restore(sess, tf.train.latest_checkpoint(train_dir)) # train_dir+"/model.ckpt-960000")
+        except Exception:
+            print("     Not ckpt found")
+
+    # Set up the Saver for saving and restoring model checkpoints.
+    saver = tf.train.Saver(max_to_keep=max_checkpoints_to_keep)
 
 print('Start training') # the 1st epoch will take a while
 coord = tf.train.Coordinator()
@@ -196,5 +198,6 @@ def _learning_rate_decay_fn(learning_rate, global_step):
     if (step % 10000) == 0 and step != 0:
         # save_path = saver.save(sess, MODEL_DIR+"/train/model.ckpt-"+str(step))
         save_path = saver.save(sess, MODEL_DIR+"/train/model.ckpt", global_step=step)
+        tl.files.save_npz(network.all_params , name=MODEL_DIR+'train/model_image_caption.npz')
 coord.request_stop()
 coord.join(threads)