diff --git a/MANIFEST.in b/MANIFEST.in
deleted file mode 100644
index 0e6a8459826..00000000000
--- a/MANIFEST.in
+++ /dev/null
@@ -1,6 +0,0 @@
-include LICENSE
-include README.md
-include CONTRIBUTING.md
-graft docs
-graft examples
-graft tests
diff --git a/docs/README.md b/docs/README.md
deleted file mode 100644
index 6e0f892ff21..00000000000
--- a/docs/README.md
+++ /dev/null
@@ -1,13 +0,0 @@
-# Keras Documentation
-
-The source for Keras documentation is in this directory. 
-Our documentation uses extended Markdown, as implemented by [MkDocs](http://mkdocs.org).
-
-## Building the documentation
-
-- Install MkDocs: `pip install mkdocs`
-- `pip install -e .` to make sure that Python will import your modified version of Keras.
-- From the root directory, `cd` into the `docs/` folder and run:
-    - `KERAS_BACKEND=tensorflow python autogen.py`
-    - `mkdocs serve`    # Starts a local webserver:  [localhost:8000](http://localhost:8000)
-    - `mkdocs build`    # Builds a static site in `site/` directory
diff --git a/docs/__init__.py b/docs/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/docs/autogen.py b/docs/autogen.py
deleted file mode 100644
index fa575bada86..00000000000
--- a/docs/autogen.py
+++ /dev/null
@@ -1,473 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import print_function
-from __future__ import unicode_literals
-
-import re
-import inspect
-import os
-import shutil
-import six
-
-try:
-    import pathlib
-except ImportError:
-    import pathlib2 as pathlib
-
-import keras
-from keras import backend as K
-from keras.backend import numpy_backend
-
-from docs.structure import EXCLUDE
-from docs.structure import PAGES
-from docs.structure import template_np_implementation
-from docs.structure import template_hidden_np_implementation
-
-import sys
-if sys.version[0] == '2':
-    reload(sys)
-    sys.setdefaultencoding('utf8')
-
-keras_dir = pathlib.Path(__file__).resolve().parents[1]
-
-
-def get_function_signature(function, method=True):
-    wrapped = getattr(function, '_original_function', None)
-    if wrapped is None:
-        signature = inspect.getfullargspec(function)
-    else:
-        signature = inspect.getfullargspec(wrapped)
-    defaults = signature.defaults
-    if method:
-        args = signature.args[1:]
-    else:
-        args = signature.args
-    if defaults:
-        kwargs = zip(args[-len(defaults):], defaults)
-        args = args[:-len(defaults)]
-    else:
-        kwargs = []
-    st = '%s.%s(' % (clean_module_name(function.__module__), function.__name__)
-
-    for a in args:
-        st += str(a) + ', '
-    for a, v in kwargs:
-        if isinstance(v, str):
-            v = '\'' + v + '\''
-        st += str(a) + '=' + str(v) + ', '
-    if kwargs or args:
-        signature = st[:-2] + ')'
-    else:
-        signature = st + ')'
-    return post_process_signature(signature)
-
-
-def get_class_signature(cls):
-    try:
-        class_signature = get_function_signature(cls.__init__)
-        class_signature = class_signature.replace('__init__', cls.__name__)
-    except (TypeError, AttributeError):
-        # in case the class inherits from object and does not
-        # define __init__
-        class_signature = "{clean_module_name}.{cls_name}()".format(
-            clean_module_name=cls.__module__,
-            cls_name=cls.__name__
-        )
-    return post_process_signature(class_signature)
-
-
-def post_process_signature(signature):
-    parts = re.split(r'\.(?!\d)', signature)
-    if len(parts) >= 4:
-        if parts[1] == 'layers':
-            signature = 'keras.layers.' + '.'.join(parts[3:])
-        if parts[1] == 'utils':
-            signature = 'keras.utils.' + '.'.join(parts[3:])
-        if parts[1] == 'backend':
-            signature = 'keras.backend.' + '.'.join(parts[3:])
-        if parts[1] == 'callbacks':
-            signature = 'keras.callbacks.' + '.'.join(parts[3:])
-    return signature
-
-
-def clean_module_name(name):
-    if name.startswith('keras_applications'):
-        name = name.replace('keras_applications', 'keras.applications')
-    if name.startswith('keras_preprocessing'):
-        name = name.replace('keras_preprocessing', 'keras.preprocessing')
-    return name
-
-
-def class_to_source_link(cls):
-    module_name = clean_module_name(cls.__module__)
-    path = module_name.replace('.', '/')
-    path += '.py'
-    line = inspect.getsourcelines(cls)[-1]
-    link = ('https://github.com/keras-team/'
-            'keras/blob/master/' + path + '#L' + str(line))
-    return '[[source]](' + link + ')'
-
-
-def code_snippet(snippet):
-    result = '```python\n'
-    result += snippet.encode('unicode_escape').decode('utf8') + '\n'
-    result += '```\n'
-    return result
-
-
-def count_leading_spaces(s):
-    ws = re.search(r'\S', s)
-    if ws:
-        return ws.start()
-    else:
-        return 0
-
-
-def process_list_block(docstring, starting_point, section_end,
-                       leading_spaces, marker):
-    ending_point = docstring.find('\n\n', starting_point)
-    block = docstring[starting_point:
-                      (ending_point - 1 if ending_point > -1
-                       else section_end)]
-    # Place marker for later reinjection.
-    docstring_slice = docstring[
-        starting_point:section_end].replace(block, marker)
-    docstring = (docstring[:starting_point] +
-                 docstring_slice +
-                 docstring[section_end:])
-    lines = block.split('\n')
-    # Remove the computed number of leading white spaces from each line.
-    lines = [re.sub('^' + ' ' * leading_spaces, '', line) for line in lines]
-    # Usually lines have at least 4 additional leading spaces.
-    # These have to be removed, but first the list roots have to be detected.
-    top_level_regex = r'^    ([^\s\\\(]+):(.*)'
-    top_level_replacement = r'- __\1__:\2'
-    lines = [re.sub(top_level_regex, top_level_replacement, line)
-             for line in lines]
-    # All the other lines get simply the 4 leading space (if present) removed
-    lines = [re.sub(r'^    ', '', line) for line in lines]
-    # Fix text lines after lists
-    indent = 0
-    text_block = False
-    for i in range(len(lines)):
-        line = lines[i]
-        spaces = re.search(r'\S', line)
-        if spaces:
-            # If it is a list element
-            if line[spaces.start()] == '-':
-                indent = spaces.start() + 1
-                if text_block:
-                    text_block = False
-                    lines[i] = '\n' + line
-            elif spaces.start() < indent:
-                text_block = True
-                indent = spaces.start()
-                lines[i] = '\n' + line
-        else:
-            text_block = False
-            indent = 0
-    block = '\n'.join(lines)
-    return docstring, block
-
-
-def process_docstring(docstring):
-    # First, extract code blocks and process them.
-    code_blocks = []
-    if '```' in docstring:
-        tmp = docstring[:]
-        while '```' in tmp:
-            tmp = tmp[tmp.find('```'):]
-            index = tmp[3:].find('```') + 6
-            snippet = tmp[:index]
-            # Place marker in docstring for later reinjection.
-            docstring = docstring.replace(
-                snippet, '$CODE_BLOCK_%d' % len(code_blocks))
-            snippet_lines = snippet.split('\n')
-            # Remove leading spaces.
-            num_leading_spaces = snippet_lines[-1].find('`')
-            snippet_lines = ([snippet_lines[0]] +
-                             [line[num_leading_spaces:]
-                             for line in snippet_lines[1:]])
-            # Most code snippets have 3 or 4 more leading spaces
-            # on inner lines, but not all. Remove them.
-            inner_lines = snippet_lines[1:-1]
-            leading_spaces = None
-            for line in inner_lines:
-                if not line or line[0] == '\n':
-                    continue
-                spaces = count_leading_spaces(line)
-                if leading_spaces is None:
-                    leading_spaces = spaces
-                if spaces < leading_spaces:
-                    leading_spaces = spaces
-            if leading_spaces:
-                snippet_lines = ([snippet_lines[0]] +
-                                 [line[leading_spaces:]
-                                  for line in snippet_lines[1:-1]] +
-                                 [snippet_lines[-1]])
-            snippet = '\n'.join(snippet_lines)
-            code_blocks.append(snippet)
-            tmp = tmp[index:]
-
-    # Format docstring lists.
-    section_regex = r'\n( +)# (.*)\n'
-    section_idx = re.search(section_regex, docstring)
-    shift = 0
-    sections = {}
-    while section_idx and section_idx.group(2):
-        anchor = section_idx.group(2)
-        leading_spaces = len(section_idx.group(1))
-        shift += section_idx.end()
-        next_section_idx = re.search(section_regex, docstring[shift:])
-        if next_section_idx is None:
-            section_end = -1
-        else:
-            section_end = shift + next_section_idx.start()
-        marker = '$' + anchor.replace(' ', '_') + '$'
-        docstring, content = process_list_block(docstring,
-                                                shift,
-                                                section_end,
-                                                leading_spaces,
-                                                marker)
-        sections[marker] = content
-        # `docstring` has changed, so we can't use `next_section_idx` anymore
-        # we have to recompute it
-        section_idx = re.search(section_regex, docstring[shift:])
-
-    # Format docstring section titles.
-    docstring = re.sub(r'\n(\s+)# (.*)\n',
-                       r'\n\1__\2__\n\n',
-                       docstring)
-
-    # Strip all remaining leading spaces.
-    lines = docstring.split('\n')
-    docstring = '\n'.join([line.lstrip(' ') for line in lines])
-
-    # Reinject list blocks.
-    for marker, content in sections.items():
-        docstring = docstring.replace(marker, content)
-
-    # Reinject code blocks.
-    for i, code_block in enumerate(code_blocks):
-        docstring = docstring.replace(
-            '$CODE_BLOCK_%d' % i, code_block)
-    return docstring
-
-
-def add_np_implementation(function, docstring):
-    np_implementation = getattr(numpy_backend, function.__name__)
-    code = inspect.getsource(np_implementation)
-    code_lines = code.split('\n')
-    for i in range(len(code_lines)):
-        if code_lines[i]:
-            # if there is something on the line, add 8 spaces.
-            code_lines[i] = '        ' + code_lines[i]
-    code = '\n'.join(code_lines[:-1])
-
-    if len(code_lines) < 10:
-        section = template_np_implementation.replace('{{code}}', code)
-    else:
-        section = template_hidden_np_implementation.replace('{{code}}', code)
-    return docstring.replace('{{np_implementation}}', section)
-
-
-def read_file(path):
-    with open(path, encoding='utf-8') as f:
-        return f.read()
-
-
-def collect_class_methods(cls, methods):
-    if isinstance(methods, (list, tuple)):
-        return [getattr(cls, m) if isinstance(m, str) else m for m in methods]
-    methods = []
-    for _, method in inspect.getmembers(cls, predicate=inspect.isroutine):
-        if method.__name__[0] == '_' or method.__name__ in EXCLUDE:
-            continue
-        methods.append(method)
-    return methods
-
-
-def render_function(function, method=True):
-    subblocks = []
-    signature = get_function_signature(function, method=method)
-    if method:
-        signature = signature.replace(
-            clean_module_name(function.__module__) + '.', '')
-    subblocks.append('### ' + function.__name__ + '\n')
-    subblocks.append(code_snippet(signature))
-    docstring = function.__doc__
-    if docstring:
-        if ('backend' in signature and
-                '{{np_implementation}}' in docstring):
-            docstring = add_np_implementation(function, docstring)
-        subblocks.append(process_docstring(docstring))
-    return '\n\n'.join(subblocks)
-
-
-def read_page_data(page_data, type):
-    assert type in ['classes', 'functions', 'methods']
-    data = page_data.get(type, [])
-    for module in page_data.get('all_module_{}'.format(type), []):
-        module_data = []
-        for name in dir(module):
-            if name[0] == '_' or name in EXCLUDE:
-                continue
-            module_member = getattr(module, name)
-            if (inspect.isclass(module_member) and type == 'classes' or
-               inspect.isfunction(module_member) and type == 'functions'):
-                instance = module_member
-                if module.__name__ in instance.__module__:
-                    if instance not in module_data:
-                        module_data.append(instance)
-        module_data.sort(key=lambda x: id(x))
-        data += module_data
-    return data
-
-
-def get_module_docstring(filepath):
-    """Extract the module docstring.
-
-    Also finds the line at which the docstring ends.
-    """
-    co = compile(open(filepath, encoding='utf-8').read(), filepath, 'exec')
-    if co.co_consts and isinstance(co.co_consts[0], six.string_types):
-        docstring = co.co_consts[0]
-    else:
-        print('Could not get the docstring from ' + filepath)
-        docstring = ''
-    return docstring, co.co_firstlineno
-
-
-def copy_examples(examples_dir, destination_dir):
-    """Copy the examples directory in the documentation.
-
-    Prettify files by extracting the docstrings written in Markdown.
-    """
-    pathlib.Path(destination_dir).mkdir(exist_ok=True)
-    for file in os.listdir(examples_dir):
-        if not file.endswith('.py'):
-            continue
-        module_path = os.path.join(examples_dir, file)
-        docstring, starting_line = get_module_docstring(module_path)
-        destination_file = os.path.join(destination_dir, file[:-2] + 'md')
-        with open(destination_file, 'w+', encoding='utf-8') as f_out, \
-                open(os.path.join(examples_dir, file),
-                     'r+', encoding='utf-8') as f_in:
-
-            f_out.write(docstring + '\n\n')
-
-            # skip docstring
-            for _ in range(starting_line):
-                next(f_in)
-
-            f_out.write('```python\n')
-            # next line might be empty.
-            line = next(f_in)
-            if line != '\n':
-                f_out.write(line)
-
-            # copy the rest of the file.
-            for line in f_in:
-                f_out.write(line)
-            f_out.write('```')
-
-
-def generate(sources_dir):
-    """Generates the markdown files for the documentation.
-
-    # Arguments
-        sources_dir: Where to put the markdown files.
-    """
-    template_dir = os.path.join(str(keras_dir), 'docs', 'templates')
-
-    if K.backend() != 'tensorflow':
-        raise RuntimeError('The documentation must be built '
-                           'with the TensorFlow backend because this '
-                           'is the only backend with docstrings.')
-
-    print('Cleaning up existing sources directory.')
-    if os.path.exists(sources_dir):
-        shutil.rmtree(sources_dir)
-
-    print('Populating sources directory with templates.')
-    shutil.copytree(template_dir, sources_dir)
-
-    readme = read_file(os.path.join(str(keras_dir), 'README.md'))
-    index = read_file(os.path.join(template_dir, 'index.md'))
-    index = index.replace('{{autogenerated}}', readme[readme.find('##'):])
-    with open(os.path.join(sources_dir, 'index.md'), 'w', encoding='utf-8') as f:
-        f.write(index)
-
-    print('Generating docs for Keras %s.' % keras.__version__)
-    for page_data in PAGES:
-        classes = read_page_data(page_data, 'classes')
-
-        blocks = []
-        for element in classes:
-            if not isinstance(element, (list, tuple)):
-                element = (element, [])
-            cls = element[0]
-            subblocks = []
-            signature = get_class_signature(cls)
-            subblocks.append('<span style="float:right;">' +
-                             class_to_source_link(cls) + '</span>')
-            if element[1]:
-                subblocks.append('## ' + cls.__name__ + ' class\n')
-            else:
-                subblocks.append('### ' + cls.__name__ + '\n')
-            subblocks.append(code_snippet(signature))
-            docstring = cls.__doc__
-            if docstring:
-                subblocks.append(process_docstring(docstring))
-            methods = collect_class_methods(cls, element[1])
-            if methods:
-                subblocks.append('\n---')
-                subblocks.append('## ' + cls.__name__ + ' methods\n')
-                subblocks.append('\n---\n'.join(
-                    [render_function(method, method=True)
-                     for method in methods]))
-            blocks.append('\n'.join(subblocks))
-
-        methods = read_page_data(page_data, 'methods')
-
-        for method in methods:
-            blocks.append(render_function(method, method=True))
-
-        functions = read_page_data(page_data, 'functions')
-
-        for function in functions:
-            blocks.append(render_function(function, method=False))
-
-        if not blocks:
-            raise RuntimeError('Found no content for page ' +
-                               page_data['page'])
-
-        mkdown = '\n----\n\n'.join(blocks)
-        # Save module page.
-        # Either insert content into existing page,
-        # or create page otherwise.
-        page_name = page_data['page']
-        path = os.path.join(sources_dir, page_name)
-        if os.path.exists(path):
-            template = read_file(path)
-            if '{{autogenerated}}' not in template:
-                raise RuntimeError('Template found for ' + path +
-                                   ' but missing {{autogenerated}}'
-                                   ' tag.')
-            mkdown = template.replace('{{autogenerated}}', mkdown)
-            print('...inserting autogenerated content into template:', path)
-        else:
-            print('...creating new page with autogenerated content:', path)
-        subdir = os.path.dirname(path)
-        if not os.path.exists(subdir):
-            os.makedirs(subdir)
-        with open(path, 'w', encoding='utf-8') as f:
-            f.write(mkdown)
-
-    shutil.copyfile(os.path.join(str(keras_dir), 'CONTRIBUTING.md'),
-                    os.path.join(str(sources_dir), 'contributing.md'))
-    copy_examples(os.path.join(str(keras_dir), 'examples'),
-                  os.path.join(str(sources_dir), 'examples'))
-
-
-if __name__ == '__main__':
-    generate(os.path.join(str(keras_dir), 'docs', 'sources'))
diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml
deleted file mode 100644
index 9a0cbb46636..00000000000
--- a/docs/mkdocs.yml
+++ /dev/null
@@ -1,90 +0,0 @@
-site_name: Keras Documentation
-theme:
-  name: null
-  custom_dir: theme
-  static_templates:
-    - 404.html
-  include_search_page: true
-  search_index_only: false
-  highlightjs: true
-  hljs_languages: []
-  include_homepage_in_sidebar: true
-  prev_next_buttons_location: bottom
-  navigation_depth: 4
-  titles_only: false
-  sticky_navigation: true
-  collapse_navigation: true
-
-docs_dir: sources
-repo_url: http://github.com/keras-team/keras
-site_url: http://keras.io/
-site_description: 'Documentation for Keras, the Python Deep Learning library.'
-
-dev_addr: '0.0.0.0:8000'
-google_analytics: ['UA-61785484-1', 'keras.io']
-
-nav:
-- Home: index.md
-- Why use Keras: why-use-keras.md
-- Getting started:
-  - Guide to the Sequential model: getting-started/sequential-model-guide.md
-  - Guide to the Functional API: getting-started/functional-api-guide.md
-  - FAQ: getting-started/faq.md
-- Models:
-  - About Keras models: models/about-keras-models.md
-  - Sequential: models/sequential.md
-  - Model (functional API): models/model.md
-- Layers:
-  - About Keras layers: layers/about-keras-layers.md
-  - Core Layers: layers/core.md
-  - Convolutional Layers: layers/convolutional.md
-  - Pooling Layers: layers/pooling.md
-  - Locally-connected Layers: layers/local.md
-  - Recurrent Layers: layers/recurrent.md
-  - Embedding Layers: layers/embeddings.md
-  - Merge Layers: layers/merge.md
-  - Advanced Activations Layers: layers/advanced-activations.md
-  - Normalization Layers: layers/normalization.md
-  - Noise layers: layers/noise.md
-  - Layer wrappers: layers/wrappers.md
-  - Writing your own Keras layers: layers/writing-your-own-keras-layers.md
-- Preprocessing:
-  - Sequence Preprocessing: preprocessing/sequence.md
-  - Text Preprocessing: preprocessing/text.md
-  - Image Preprocessing: preprocessing/image.md
-- Losses: losses.md
-- Metrics: metrics.md
-- Optimizers: optimizers.md
-- Activations: activations.md
-- Callbacks: callbacks.md
-- Datasets: datasets.md
-- Applications: applications.md
-- Backend: backend.md
-- Initializers: initializers.md
-- Regularizers: regularizers.md
-- Constraints: constraints.md
-- Visualization: visualization.md
-- Scikit-learn API: scikit-learn-api.md
-- Utils: utils.md
-- Contributing: contributing.md
-- Examples:
-  - Addition RNN: examples/addition_rnn.md
-  - Custom layer - antirectifier: examples/antirectifier.md
-  - Baby RNN: examples/babi_rnn.md
-  - Baby MemNN: examples/babi_memnn.md
-  - CIFAR-10 CNN: examples/cifar10_cnn.md
-  - CIFAR-10 ResNet: examples/cifar10_resnet.md
-  - Convolution filter visualization: examples/conv_filter_visualization.md
-  - Convolutional LSTM: examples/conv_lstm.md
-  - Deep Dream: examples/deep_dream.md
-  - Image OCR: examples/image_ocr.md
-  - Bidirectional LSTM: examples/imdb_bidirectional_lstm.md
-  - 1D CNN for text classification: examples/imdb_cnn.md
-  - Sentiment classification CNN-LSTM: examples/imdb_cnn_lstm.md
-  - Fasttext for text classification: examples/imdb_fasttext.md
-  - Sentiment classification LSTM: examples/imdb_lstm.md
-  - Sequence to sequence - training: examples/lstm_seq2seq.md
-  - Sequence to sequence - prediction: examples/lstm_seq2seq_restore.md
-  - Stateful LSTM: examples/lstm_stateful.md
-  - LSTM for text generation: examples/lstm_text_generation.md
-  - Auxiliary Classifier GAN: examples/mnist_acgan.md
diff --git a/docs/structure.py b/docs/structure.py
deleted file mode 100644
index f8d8c4aff84..00000000000
--- a/docs/structure.py
+++ /dev/null
@@ -1,358 +0,0 @@
-# -*- coding: utf-8 -*-
-'''
-General documentation architecture:
-
-Home
-Index
-
-- Getting started
-    Getting started with the sequential model
-    Getting started with the functional api
-    FAQ
-
-- Models
-    About Keras models
-        explain when one should use Sequential or functional API
-        explain compilation step
-        explain weight saving, weight loading
-        explain serialization, deserialization
-    Sequential
-    Model (functional API)
-
-- Layers
-    About Keras layers
-        explain common layer functions: get_weights, set_weights, get_config
-        explain input_shape
-        explain usage on non-Keras tensors
-    Core Layers
-    Convolutional Layers
-    Pooling Layers
-    Locally-connected Layers
-    Recurrent Layers
-    Embedding Layers
-    Merge Layers
-    Advanced Activations Layers
-    Normalization Layers
-    Noise Layers
-    Layer Wrappers
-    Writing your own Keras layers
-
-- Preprocessing
-    Sequence Preprocessing
-    Text Preprocessing
-    Image Preprocessing
-
-Losses
-Metrics
-Optimizers
-Activations
-Callbacks
-Datasets
-Applications
-Backend
-Initializers
-Regularizers
-Constraints
-Visualization
-Scikit-learn API
-Utils
-Contributing
-
-'''
-from keras import utils
-from keras import layers
-from keras.layers import advanced_activations
-from keras.layers import noise
-from keras.layers import wrappers
-from keras import initializers
-from keras import optimizers
-from keras import callbacks
-from keras import models
-from keras import losses
-from keras import metrics
-from keras import backend
-from keras import constraints
-from keras import activations
-from keras import preprocessing
-
-
-EXCLUDE = {
-    'Optimizer',
-    'TFOptimizer',
-    'Wrapper',
-    'get_session',
-    'set_session',
-    'CallbackList',
-    'serialize',
-    'deserialize',
-    'get',
-    'set_image_dim_ordering',
-    'normalize_data_format',
-    'image_dim_ordering',
-    'get_variable_shape',
-    'Constraint'
-}
-
-# For each class to document, it is possible to:
-# 1) Document only the class: [classA, classB, ...]
-# 2) Document all its methods: [classA, (classB, "*")]
-# 3) Choose which methods to document (methods listed as strings):
-# [classA, (classB, ["method1", "method2", ...]), ...]
-# 4) Choose which methods to document (methods listed as qualified names):
-# [classA, (classB, [module.classB.method1, module.classB.method2, ...]), ...]
-PAGES = [
-    {
-        'page': 'models/sequential.md',
-        'methods': [
-            models.Sequential.compile,
-            models.Sequential.fit,
-            models.Sequential.evaluate,
-            models.Sequential.predict,
-            models.Sequential.train_on_batch,
-            models.Sequential.test_on_batch,
-            models.Sequential.predict_on_batch,
-            models.Sequential.fit_generator,
-            models.Sequential.evaluate_generator,
-            models.Sequential.predict_generator,
-            models.Sequential.get_layer,
-        ],
-    },
-    {
-        'page': 'models/model.md',
-        'methods': [
-            models.Model.compile,
-            models.Model.fit,
-            models.Model.evaluate,
-            models.Model.predict,
-            models.Model.train_on_batch,
-            models.Model.test_on_batch,
-            models.Model.predict_on_batch,
-            models.Model.fit_generator,
-            models.Model.evaluate_generator,
-            models.Model.predict_generator,
-            models.Model.get_layer,
-        ]
-    },
-    {
-        'page': 'layers/core.md',
-        'classes': [
-            layers.Dense,
-            layers.Activation,
-            layers.Dropout,
-            layers.Flatten,
-            layers.Input,
-            layers.Reshape,
-            layers.Permute,
-            layers.RepeatVector,
-            layers.Lambda,
-            layers.ActivityRegularization,
-            layers.Masking,
-            layers.SpatialDropout1D,
-            layers.SpatialDropout2D,
-            layers.SpatialDropout3D,
-        ],
-    },
-    {
-        'page': 'layers/convolutional.md',
-        'classes': [
-            layers.Conv1D,
-            layers.Conv2D,
-            layers.SeparableConv1D,
-            layers.SeparableConv2D,
-            layers.DepthwiseConv2D,
-            layers.Conv2DTranspose,
-            layers.Conv3D,
-            layers.Conv3DTranspose,
-            layers.Cropping1D,
-            layers.Cropping2D,
-            layers.Cropping3D,
-            layers.UpSampling1D,
-            layers.UpSampling2D,
-            layers.UpSampling3D,
-            layers.ZeroPadding1D,
-            layers.ZeroPadding2D,
-            layers.ZeroPadding3D,
-        ],
-    },
-    {
-        'page': 'layers/pooling.md',
-        'classes': [
-            layers.MaxPooling1D,
-            layers.MaxPooling2D,
-            layers.MaxPooling3D,
-            layers.AveragePooling1D,
-            layers.AveragePooling2D,
-            layers.AveragePooling3D,
-            layers.GlobalMaxPooling1D,
-            layers.GlobalAveragePooling1D,
-            layers.GlobalMaxPooling2D,
-            layers.GlobalAveragePooling2D,
-            layers.GlobalMaxPooling3D,
-            layers.GlobalAveragePooling3D,
-        ],
-    },
-    {
-        'page': 'layers/local.md',
-        'classes': [
-            layers.LocallyConnected1D,
-            layers.LocallyConnected2D,
-        ],
-    },
-    {
-        'page': 'layers/recurrent.md',
-        'classes': [
-            layers.RNN,
-            layers.SimpleRNN,
-            layers.GRU,
-            layers.LSTM,
-            layers.ConvLSTM2D,
-            layers.ConvLSTM2DCell,
-            layers.SimpleRNNCell,
-            layers.GRUCell,
-            layers.LSTMCell,
-            layers.CuDNNGRU,
-            layers.CuDNNLSTM,
-        ],
-    },
-    {
-        'page': 'layers/embeddings.md',
-        'classes': [
-            layers.Embedding,
-        ],
-    },
-    {
-        'page': 'layers/normalization.md',
-        'classes': [
-            layers.BatchNormalization,
-        ],
-    },
-    {
-        'page': 'layers/advanced-activations.md',
-        'all_module_classes': [advanced_activations],
-    },
-    {
-        'page': 'layers/noise.md',
-        'all_module_classes': [noise],
-    },
-    {
-        'page': 'layers/merge.md',
-        'classes': [
-            layers.Add,
-            layers.Subtract,
-            layers.Multiply,
-            layers.Average,
-            layers.Maximum,
-            layers.Minimum,
-            layers.Concatenate,
-            layers.Dot,
-        ],
-        'functions': [
-            layers.add,
-            layers.subtract,
-            layers.multiply,
-            layers.average,
-            layers.maximum,
-            layers.minimum,
-            layers.concatenate,
-            layers.dot,
-        ]
-    },
-    {
-        'page': 'preprocessing/sequence.md',
-        'functions': [
-            preprocessing.sequence.pad_sequences,
-            preprocessing.sequence.skipgrams,
-            preprocessing.sequence.make_sampling_table,
-        ],
-        'classes': [
-            preprocessing.sequence.TimeseriesGenerator,
-        ]
-    },
-    {
-        'page': 'preprocessing/image.md',
-        'classes': [
-            (preprocessing.image.ImageDataGenerator, '*')
-        ]
-    },
-    {
-        'page': 'preprocessing/text.md',
-        'functions': [
-            preprocessing.text.hashing_trick,
-            preprocessing.text.one_hot,
-            preprocessing.text.text_to_word_sequence,
-        ],
-        'classes': [
-            preprocessing.text.Tokenizer,
-        ]
-    },
-    {
-        'page': 'layers/wrappers.md',
-        'all_module_classes': [wrappers],
-    },
-    {
-        'page': 'metrics.md',
-        'all_module_functions': [metrics],
-    },
-    {
-        'page': 'losses.md',
-        'all_module_functions': [losses],
-    },
-    {
-        'page': 'initializers.md',
-        'all_module_functions': [initializers],
-        'all_module_classes': [initializers],
-    },
-    {
-        'page': 'optimizers.md',
-        'all_module_classes': [optimizers],
-    },
-    {
-        'page': 'callbacks.md',
-        'all_module_classes': [callbacks],
-    },
-    {
-        'page': 'activations.md',
-        'all_module_functions': [activations],
-    },
-    {
-        'page': 'backend.md',
-        'all_module_functions': [backend],
-    },
-    {
-        'page': 'constraints.md',
-        'all_module_classes': [constraints],
-    },
-    {
-        'page': 'utils.md',
-        'functions': [utils.to_categorical,
-                      utils.normalize,
-                      utils.get_file,
-                      utils.print_summary,
-                      utils.plot_model,
-                      utils.multi_gpu_model],
-        'classes': [utils.CustomObjectScope,
-                    utils.HDF5Matrix,
-                    utils.Sequence],
-    },
-]
-
-ROOT = 'http://keras.io/'
-
-template_np_implementation = """# Numpy implementation
-
-    ```python
-{{code}}
-    ```
-"""
-
-template_hidden_np_implementation = """# Numpy implementation
-
-    <details>
-    <summary>Show the Numpy implementation</summary>
-
-    ```python
-{{code}}
-    ```
-
-    </details>
-"""
diff --git a/docs/templates/activations.md b/docs/templates/activations.md
deleted file mode 100644
index 7cca9fa477c..00000000000
--- a/docs/templates/activations.md
+++ /dev/null
@@ -1,33 +0,0 @@
-
-## Usage of activations
-
-Activations can either be used through an `Activation` layer, or through the `activation` argument supported by all forward layers:
-
-```python
-from keras.layers import Activation, Dense
-
-model.add(Dense(64))
-model.add(Activation('tanh'))
-```
-
-This is equivalent to:
-
-```python
-model.add(Dense(64, activation='tanh'))
-```
-
-You can also pass an element-wise TensorFlow/Theano/CNTK function as an activation:
-
-```python
-from keras import backend as K
-
-model.add(Dense(64, activation=K.tanh))
-```
-
-## Available activations
-
-{{autogenerated}}
-
-## On "Advanced Activations"
-
-Activations that are more complex than a simple TensorFlow/Theano/CNTK function (eg. learnable activations, which maintain a state) are available as [Advanced Activation layers](layers/advanced-activations.md), and can be found in the module `keras.layers.advanced_activations`. These include `PReLU` and `LeakyReLU`.
diff --git a/docs/templates/applications.md b/docs/templates/applications.md
deleted file mode 100644
index 4a442296eec..00000000000
--- a/docs/templates/applications.md
+++ /dev/null
@@ -1,808 +0,0 @@
-# Applications
-
-Keras Applications are deep learning models that are made available alongside pre-trained weights.
-These models can be used for prediction, feature extraction, and fine-tuning.
-
-Weights are downloaded automatically when instantiating a model. They are stored at `~/.keras/models/`.
-
-## Available models
-
-### Models for image classification with weights trained on ImageNet:
-
-- [Xception](#xception)
-- [VGG16](#vgg16)
-- [VGG19](#vgg19)
-- [ResNet, ResNetV2](#resnet)
-- [InceptionV3](#inceptionv3)
-- [InceptionResNetV2](#inceptionresnetv2)
-- [MobileNet](#mobilenet)
-- [MobileNetV2](#mobilenetv2)
-- [DenseNet](#densenet)
-- [NASNet](#nasnet)
-
-All of these architectures are compatible with all the backends (TensorFlow, Theano, and CNTK), and upon instantiation the models will be built according to the image data format set in your Keras configuration file at `~/.keras/keras.json`. For instance, if you have set `image_data_format=channels_last`, then any model loaded from this repository will get built according to the TensorFlow data format convention, "Height-Width-Depth".
-
-Note that:
-- For `Keras < 2.2.0`, The Xception model is only available for TensorFlow, due to its reliance on `SeparableConvolution` layers.
-- For `Keras < 2.1.5`, The MobileNet model is only available for TensorFlow, due to its reliance on `DepthwiseConvolution` layers.
-
------
-
-## Usage examples for image classification models
-
-### Classify ImageNet classes with ResNet50
-
-```python
-from keras.applications.resnet50 import ResNet50
-from keras.preprocessing import image
-from keras.applications.resnet50 import preprocess_input, decode_predictions
-import numpy as np
-
-model = ResNet50(weights='imagenet')
-
-img_path = 'elephant.jpg'
-img = image.load_img(img_path, target_size=(224, 224))
-x = image.img_to_array(img)
-x = np.expand_dims(x, axis=0)
-x = preprocess_input(x)
-
-preds = model.predict(x)
-# decode the results into a list of tuples (class, description, probability)
-# (one such list for each sample in the batch)
-print('Predicted:', decode_predictions(preds, top=3)[0])
-# Predicted: [(u'n02504013', u'Indian_elephant', 0.82658225), (u'n01871265', u'tusker', 0.1122357), (u'n02504458', u'African_elephant', 0.061040461)]
-```
-
-### Extract features with VGG16
-
-```python
-from keras.applications.vgg16 import VGG16
-from keras.preprocessing import image
-from keras.applications.vgg16 import preprocess_input
-import numpy as np
-
-model = VGG16(weights='imagenet', include_top=False)
-
-img_path = 'elephant.jpg'
-img = image.load_img(img_path, target_size=(224, 224))
-x = image.img_to_array(img)
-x = np.expand_dims(x, axis=0)
-x = preprocess_input(x)
-
-features = model.predict(x)
-```
-
-### Extract features from an arbitrary intermediate layer with VGG19
-
-```python
-from keras.applications.vgg19 import VGG19
-from keras.preprocessing import image
-from keras.applications.vgg19 import preprocess_input
-from keras.models import Model
-import numpy as np
-
-base_model = VGG19(weights='imagenet')
-model = Model(inputs=base_model.input, outputs=base_model.get_layer('block4_pool').output)
-
-img_path = 'elephant.jpg'
-img = image.load_img(img_path, target_size=(224, 224))
-x = image.img_to_array(img)
-x = np.expand_dims(x, axis=0)
-x = preprocess_input(x)
-
-block4_pool_features = model.predict(x)
-```
-
-### Fine-tune InceptionV3 on a new set of classes
-
-```python
-from keras.applications.inception_v3 import InceptionV3
-from keras.preprocessing import image
-from keras.models import Model
-from keras.layers import Dense, GlobalAveragePooling2D
-from keras import backend as K
-
-# create the base pre-trained model
-base_model = InceptionV3(weights='imagenet', include_top=False)
-
-# add a global spatial average pooling layer
-x = base_model.output
-x = GlobalAveragePooling2D()(x)
-# let's add a fully-connected layer
-x = Dense(1024, activation='relu')(x)
-# and a logistic layer -- let's say we have 200 classes
-predictions = Dense(200, activation='softmax')(x)
-
-# this is the model we will train
-model = Model(inputs=base_model.input, outputs=predictions)
-
-# first: train only the top layers (which were randomly initialized)
-# i.e. freeze all convolutional InceptionV3 layers
-for layer in base_model.layers:
-    layer.trainable = False
-
-# compile the model (should be done *after* setting layers to non-trainable)
-model.compile(optimizer='rmsprop', loss='categorical_crossentropy')
-
-# train the model on the new data for a few epochs
-model.fit_generator(...)
-
-# at this point, the top layers are well trained and we can start fine-tuning
-# convolutional layers from inception V3. We will freeze the bottom N layers
-# and train the remaining top layers.
-
-# let's visualize layer names and layer indices to see how many layers
-# we should freeze:
-for i, layer in enumerate(base_model.layers):
-   print(i, layer.name)
-
-# we chose to train the top 2 inception blocks, i.e. we will freeze
-# the first 249 layers and unfreeze the rest:
-for layer in model.layers[:249]:
-   layer.trainable = False
-for layer in model.layers[249:]:
-   layer.trainable = True
-
-# we need to recompile the model for these modifications to take effect
-# we use SGD with a low learning rate
-from keras.optimizers import SGD
-model.compile(optimizer=SGD(lr=0.0001, momentum=0.9), loss='categorical_crossentropy')
-
-# we train our model again (this time fine-tuning the top 2 inception blocks
-# alongside the top Dense layers
-model.fit_generator(...)
-```
-
-
-### Build InceptionV3 over a custom input tensor
-
-```python
-from keras.applications.inception_v3 import InceptionV3
-from keras.layers import Input
-
-# this could also be the output a different Keras model or layer
-input_tensor = Input(shape=(224, 224, 3))  # this assumes K.image_data_format() == 'channels_last'
-
-model = InceptionV3(input_tensor=input_tensor, weights='imagenet', include_top=True)
-```
-
------
-
-# Documentation for individual models
-
-| Model | Size | Top-1 Accuracy | Top-5 Accuracy | Parameters | Depth |
-| ----- | ----: | --------------: | --------------: | ----------: | -----: |
-| [Xception](#xception) | 88 MB | 0.790 | 0.945 | 22,910,480 | 126 |
-| [VGG16](#vgg16) | 528 MB | 0.713 | 0.901 | 138,357,544 | 23 |
-| [VGG19](#vgg19) | 549 MB | 0.713 | 0.900 | 143,667,240 | 26 |
-| [ResNet50](#resnet) | 98 MB | 0.749 | 0.921 | 25,636,712 | - |
-| [ResNet101](#resnet) | 171 MB | 0.764 | 0.928 | 44,707,176 | - |
-| [ResNet152](#resnet) | 232 MB | 0.766 | 0.931 | 60,419,944 | - |
-| [ResNet50V2](#resnet) | 98 MB | 0.760 | 0.930 | 25,613,800 | - |
-| [ResNet101V2](#resnet) | 171 MB | 0.772 | 0.938 | 44,675,560 | - |
-| [ResNet152V2](#resnet) | 232 MB | 0.780 | 0.942 | 60,380,648 | - |
-| [InceptionV3](#inceptionv3) | 92 MB | 0.779 | 0.937 | 23,851,784 | 159 |
-| [InceptionResNetV2](#inceptionresnetv2) | 215 MB | 0.803 | 0.953 | 55,873,736 | 572 |
-| [MobileNet](#mobilenet) | 16 MB | 0.704 | 0.895 | 4,253,864 | 88 |
-| [MobileNetV2](#mobilenetv2) | 14 MB | 0.713 | 0.901 | 3,538,984 | 88 |
-| [DenseNet121](#densenet) | 33 MB | 0.750 | 0.923 | 8,062,504 | 121 |
-| [DenseNet169](#densenet) | 57 MB | 0.762 | 0.932 | 14,307,880 | 169 |
-| [DenseNet201](#densenet) | 80 MB | 0.773 | 0.936 | 20,242,984 | 201 |
-| [NASNetMobile](#nasnet) | 23 MB | 0.744 | 0.919 | 5,326,716 | - |
-| [NASNetLarge](#nasnet) | 343 MB | 0.825 | 0.960 | 88,949,818 | - |
-
-The top-1 and top-5 accuracy refers to the model's performance on the ImageNet validation dataset.
-
-Depth refers to the topological depth of the network. This includes activation layers, batch normalization layers etc.
-
------
-
-
-## Xception
-
-
-```python
-keras.applications.xception.Xception(include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000)
-```
-
-Xception V1 model, with weights pre-trained on ImageNet.
-
-On ImageNet, this model gets to a top-1 validation accuracy of 0.790
-and a top-5 validation accuracy of 0.945.
-
-This model and can be built both with `'channels_first'` data format (channels, height, width) or `'channels_last'` data format (height, width, channels).
-
-The default input size for this model is 299x299.
-
-### Arguments
-
-- include_top: whether to include the fully-connected layer at the top of the network.
-- weights: one of `None` (random initialization) or `'imagenet'` (pre-training on ImageNet).
-- input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model.
-- input_shape: optional shape tuple, only to be specified
-    if `include_top` is `False` (otherwise the input shape
-    has to be `(299, 299, 3)`.
-    It should have exactly 3 inputs channels,
-    and width and height should be no smaller than 71.
-    E.g. `(150, 150, 3)` would be one valid value.
-- pooling: Optional pooling mode for feature extraction
-    when `include_top` is `False`.
-    - `None` means that the output of the model will be
-        the 4D tensor output of the
-        last convolutional block.
-    - `'avg'` means that global average pooling
-        will be applied to the output of the
-        last convolutional block, and thus
-        the output of the model will be a 2D tensor.
-    - `'max'` means that global max pooling will
-        be applied.
-- classes: optional number of classes to classify images 
-    into, only to be specified if `include_top` is `True`, and 
-    if no `weights` argument is specified.
-
-### Returns
-
-A Keras `Model` instance.
-
-### References
-
-- [Xception: Deep Learning with Depthwise Separable Convolutions](https://arxiv.org/abs/1610.02357)
-
-### License
-
-These weights are trained by ourselves and are released under the MIT license.
-
-
------
-
-
-## VGG16
-
-```python
-keras.applications.vgg16.VGG16(include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000)
-```
-
-VGG16 model, with weights pre-trained on ImageNet.
-
-This model can be built both with `'channels_first'` data format (channels, height, width) or `'channels_last'` data format (height, width, channels).
-
-The default input size for this model is 224x224.
-
-### Arguments
-
-- include_top: whether to include the 3 fully-connected layers at the top of the network.
-- weights: one of `None` (random initialization) or `'imagenet'` (pre-training on ImageNet).
-- input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model.
-- input_shape: optional shape tuple, only to be specified
-    if `include_top` is `False` (otherwise the input shape
-    has to be `(224, 224, 3)` (with `'channels_last'` data format)
-    or `(3, 224, 224)` (with `'channels_first'` data format).
-    It should have exactly 3 inputs channels,
-    and width and height should be no smaller than 32.
-    E.g. `(200, 200, 3)` would be one valid value.
-- pooling: Optional pooling mode for feature extraction
-    when `include_top` is `False`.
-    - `None` means that the output of the model will be
-        the 4D tensor output of the
-        last convolutional block.
-    - `'avg'` means that global average pooling
-        will be applied to the output of the
-        last convolutional block, and thus
-        the output of the model will be a 2D tensor.
-    - `'max'` means that global max pooling will
-        be applied.
-- classes: optional number of classes to classify images 
-    into, only to be specified if `include_top` is `True`, and 
-    if no `weights` argument is specified.
-
-### Returns
-
-A Keras `Model` instance.
-
-### References
-
-- [Very Deep Convolutional Networks for Large-Scale Image Recognition](https://arxiv.org/abs/1409.1556): please cite this paper if you use the VGG models in your work.
-
-### License
-
-These weights are ported from the ones [released by VGG at Oxford](http://www.robots.ox.ac.uk/~vgg/research/very_deep/) under the [Creative Commons Attribution License](https://creativecommons.org/licenses/by/4.0/).
-
------
-
-## VGG19
-
-
-```python
-keras.applications.vgg19.VGG19(include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000)
-```
-
-
-VGG19 model, with weights pre-trained on ImageNet.
-
-This model can be built both with `'channels_first'` data format (channels, height, width) or `'channels_last'` data format (height, width, channels).
-
-The default input size for this model is 224x224.
-
-### Arguments
-
-- include_top: whether to include the 3 fully-connected layers at the top of the network.
-- weights: one of `None` (random initialization) or `'imagenet'` (pre-training on ImageNet).
-- input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model.
-- input_shape: optional shape tuple, only to be specified
-    if `include_top` is `False` (otherwise the input shape
-    has to be `(224, 224, 3)` (with `'channels_last'` data format)
-    or `(3, 224, 224)` (with `'channels_first'` data format).
-    It should have exactly 3 inputs channels,
-    and width and height should be no smaller than 32.
-    E.g. `(200, 200, 3)` would be one valid value.
-- pooling: Optional pooling mode for feature extraction
-    when `include_top` is `False`.
-    - `None` means that the output of the model will be
-        the 4D tensor output of the
-        last convolutional block.
-    - `'avg'` means that global average pooling
-        will be applied to the output of the
-        last convolutional block, and thus
-        the output of the model will be a 2D tensor.
-    - `'max'` means that global max pooling will
-        be applied.
-- classes: optional number of classes to classify images 
-    into, only to be specified if `include_top` is `True`, and 
-    if no `weights` argument is specified.
-
-### Returns
-
-A Keras `Model` instance.
-
-
-### References
-
-- [Very Deep Convolutional Networks for Large-Scale Image Recognition](https://arxiv.org/abs/1409.1556)
-
-### License
-
-These weights are ported from the ones [released by VGG at Oxford](http://www.robots.ox.ac.uk/~vgg/research/very_deep/) under the [Creative Commons Attribution License](https://creativecommons.org/licenses/by/4.0/).
-
------
-
-## ResNet
-
-
-```python
-keras.applications.resnet.ResNet50(include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000)
-keras.applications.resnet.ResNet101(include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000)
-keras.applications.resnet.ResNet152(include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000)
-keras.applications.resnet_v2.ResNet50V2(include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000)
-keras.applications.resnet_v2.ResNet101V2(include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000)
-keras.applications.resnet_v2.ResNet152V2(include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000)
-```
-
-
-ResNet, ResNetV2 models, with weights pre-trained on ImageNet.
-
-This model and can be built both with `'channels_first'` data format (channels, height, width) or `'channels_last'` data format (height, width, channels).
-
-The default input size for this model is 224x224.
-
-
-### Arguments
-
-- include_top: whether to include the fully-connected layer at the top of the network.
-- weights: one of `None` (random initialization) or `'imagenet'` (pre-training on ImageNet).
-- input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model.
-- input_shape: optional shape tuple, only to be specified
-    if `include_top` is `False` (otherwise the input shape
-    has to be `(224, 224, 3)` (with `'channels_last'` data format)
-    or `(3, 224, 224)` (with `'channels_first'` data format).
-    It should have exactly 3 inputs channels,
-    and width and height should be no smaller than 32.
-    E.g. `(200, 200, 3)` would be one valid value.
-- pooling: Optional pooling mode for feature extraction
-    when `include_top` is `False`.
-    - `None` means that the output of the model will be
-        the 4D tensor output of the
-        last convolutional block.
-    - `'avg'` means that global average pooling
-        will be applied to the output of the
-        last convolutional block, and thus
-        the output of the model will be a 2D tensor.
-    - `'max'` means that global max pooling will
-        be applied.
-- classes: optional number of classes to classify images 
-    into, only to be specified if `include_top` is `True`, and 
-    if no `weights` argument is specified.
-
-### Returns
-
-A Keras `Model` instance.
-
-### References
-
-- `ResNet`: [Deep Residual Learning for Image Recognition](https://arxiv.org/abs/1512.03385)
-- `ResNetV2`: [Identity Mappings in Deep Residual Networks](https://arxiv.org/abs/1603.05027)
-
-### License
-
-These weights are ported from the following:
-
-- `ResNet`: [The original repository of Kaiming He](https://github.com/KaimingHe/deep-residual-networks) under the [MIT license](https://github.com/KaimingHe/deep-residual-networks/blob/master/LICENSE).
-- `ResNetV2`: [Facebook](https://github.com/facebook/fb.resnet.torch) under the [BSD license](https://github.com/facebook/fb.resnet.torch/blob/master/LICENSE).
-
------
-
-## InceptionV3
-
-
-```python
-keras.applications.inception_v3.InceptionV3(include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000)
-```
-
-Inception V3 model, with weights pre-trained on ImageNet.
-
-This model and can be built both with `'channels_first'` data format (channels, height, width) or `'channels_last'` data format (height, width, channels).
-
-The default input size for this model is 299x299.
-
-
-### Arguments
-
-- include_top: whether to include the fully-connected layer at the top of the network.
-- weights: one of `None` (random initialization) or `'imagenet'` (pre-training on ImageNet).
-- input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model.
-- input_shape: optional shape tuple, only to be specified
-    if `include_top` is `False` (otherwise the input shape
-    has to be `(299, 299, 3)` (with `'channels_last'` data format)
-    or `(3, 299, 299)` (with `'channels_first'` data format).
-    It should have exactly 3 inputs channels,
-    and width and height should be no smaller than 75.
-    E.g. `(150, 150, 3)` would be one valid value.
-- pooling: Optional pooling mode for feature extraction
-    when `include_top` is `False`.
-    - `None` means that the output of the model will be
-        the 4D tensor output of the
-        last convolutional block.
-    - `'avg'` means that global average pooling
-        will be applied to the output of the
-        last convolutional block, and thus
-        the output of the model will be a 2D tensor.
-    - `'max'` means that global max pooling will
-        be applied.
-- classes: optional number of classes to classify images 
-    into, only to be specified if `include_top` is `True`, and 
-    if no `weights` argument is specified.
-
-### Returns
-
-A Keras `Model` instance.
-
-### References
-
-- [Rethinking the Inception Architecture for Computer Vision](http://arxiv.org/abs/1512.00567)
-
-### License
-
-These weights are released under [the Apache License](https://github.com/tensorflow/models/blob/master/LICENSE).
-
------
-
-## InceptionResNetV2
-
-
-```python
-keras.applications.inception_resnet_v2.InceptionResNetV2(include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000)
-```
-
-Inception-ResNet V2 model, with weights pre-trained on ImageNet.
-
-This model and can be built both with `'channels_first'` data format (channels, height, width) or `'channels_last'` data format (height, width, channels).
-
-The default input size for this model is 299x299.
-
-
-### Arguments
-
-- include_top: whether to include the fully-connected layer at the top of the network.
-- weights: one of `None` (random initialization) or `'imagenet'` (pre-training on ImageNet).
-- input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model.
-- input_shape: optional shape tuple, only to be specified
-    if `include_top` is `False` (otherwise the input shape
-    has to be `(299, 299, 3)` (with `'channels_last'` data format)
-    or `(3, 299, 299)` (with `'channels_first'` data format).
-    It should have exactly 3 inputs channels,
-    and width and height should be no smaller than 75.
-    E.g. `(150, 150, 3)` would be one valid value.
-- pooling: Optional pooling mode for feature extraction
-    when `include_top` is `False`.
-    - `None` means that the output of the model will be
-        the 4D tensor output of the
-        last convolutional block.
-    - `'avg'` means that global average pooling
-        will be applied to the output of the
-        last convolutional block, and thus
-        the output of the model will be a 2D tensor.
-    - `'max'` means that global max pooling will
-        be applied.
-- classes: optional number of classes to classify images 
-    into, only to be specified if `include_top` is `True`, and 
-    if no `weights` argument is specified.
-
-### Returns
-
-A Keras `Model` instance.
-
-### References
-
-- [Inception-v4, Inception-ResNet and the Impact of Residual Connections on Learning](https://arxiv.org/abs/1602.07261)
-
-### License
-
-These weights are released under [the Apache License](https://github.com/tensorflow/models/blob/master/LICENSE).
-
------
-
-## MobileNet
-
-
-```python
-keras.applications.mobilenet.MobileNet(input_shape=None, alpha=1.0, depth_multiplier=1, dropout=1e-3, include_top=True, weights='imagenet', input_tensor=None, pooling=None, classes=1000)
-```
-
-MobileNet model, with weights pre-trained on ImageNet.
-
-This model and can be built both with `'channels_first'` data format (channels, height, width) or `'channels_last'` data format (height, width, channels).
-
-The default input size for this model is 224x224.
-
-### Arguments
-
-- input_shape: optional shape tuple, only to be specified
-    if `include_top` is `False` (otherwise the input shape
-    has to be `(224, 224, 3)` (with `'channels_last'` data format)
-    or `(3, 224, 224)` (with `'channels_first'` data format).
-    It should have exactly 3 inputs channels,
-    and width and height should be no smaller than 32.
-    E.g. `(200, 200, 3)` would be one valid value.
-- alpha: controls the width of the network.
-    - If `alpha` < 1.0, proportionally decreases the number
-        of filters in each layer.
-    - If `alpha` > 1.0, proportionally increases the number
-        of filters in each layer.
-    - If `alpha` = 1, default number of filters from the paper
-        are used at each layer.
-- depth_multiplier: depth multiplier for depthwise convolution
-    (also called the resolution multiplier)
-- dropout: dropout rate
-- include_top: whether to include the fully-connected
-    layer at the top of the network.
-- weights: `None` (random initialization) or
-    `'imagenet'` (ImageNet weights)
-- input_tensor: optional Keras tensor (i.e. output of
-    `layers.Input()`)
-    to use as image input for the model.
-- pooling: Optional pooling mode for feature extraction
-    when `include_top` is `False`.
-    - `None` means that the output of the model
-    will be the 4D tensor output of the
-        last convolutional block.
-    - `'avg'` means that global average pooling
-        will be applied to the output of the
-        last convolutional block, and thus
-        the output of the model will be a
-        2D tensor.
-    - `'max'` means that global max pooling will
-        be applied.
-- classes: optional number of classes to classify images
-    into, only to be specified if `include_top` is `True`, and
-    if no `weights` argument is specified.
-
-### Returns
-
-A Keras `Model` instance.
-
-### References
-
-- [MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications](https://arxiv.org/pdf/1704.04861.pdf)
-
-### License
-
-These weights are released under [the Apache License](https://github.com/tensorflow/models/blob/master/LICENSE).
-
------
-
-## DenseNet
-
-
-```python
-keras.applications.densenet.DenseNet121(include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000)
-keras.applications.densenet.DenseNet169(include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000)
-keras.applications.densenet.DenseNet201(include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000)
-```
-
-DenseNet models, with weights pre-trained on ImageNet.
-
-This model and can be built both with `'channels_first'` data format (channels, height, width) or `'channels_last'` data format (height, width, channels).
-
-The default input size for this model is 224x224.
-
-### Arguments
-
-- blocks: numbers of building blocks for the four dense layers.
-- include_top: whether to include the fully-connected
-    layer at the top of the network.
-- weights: one of `None` (random initialization),
-    'imagenet' (pre-training on ImageNet),
-    or the path to the weights file to be loaded.
-- input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
-    to use as image input for the model.
-- input_shape: optional shape tuple, only to be specified
-    if `include_top` is False (otherwise the input shape
-    has to be `(224, 224, 3)` (with `'channels_last'` data format)
-    or `(3, 224, 224)` (with `'channels_first'` data format).
-    It should have exactly 3 inputs channels,
-    and width and height should be no smaller than 32.
-    E.g. `(200, 200, 3)` would be one valid value.
-- pooling: optional pooling mode for feature extraction
-    when `include_top` is `False`.
-    - `None` means that the output of the model will be
-        the 4D tensor output of the
-        last convolutional block.
-    - `avg` means that global average pooling
-        will be applied to the output of the
-        last convolutional block, and thus
-        the output of the model will be a 2D tensor.
-    - `max` means that global max pooling will
-        be applied.
-- classes: optional number of classes to classify images
-    into, only to be specified if `include_top` is True, and
-    if no `weights` argument is specified.
-
-### Returns
-
-A Keras model instance.
-
-### References
-
-- [Densely Connected Convolutional Networks](https://arxiv.org/abs/1608.06993) (CVPR 2017 Best Paper Award)
-
-### License
-
-These weights are released under [the BSD 3-clause License](https://github.com/liuzhuang13/DenseNet/blob/master/LICENSE).
-
------
-
-## NASNet
-
-
-```python
-keras.applications.nasnet.NASNetLarge(input_shape=None, include_top=True, weights='imagenet', input_tensor=None, pooling=None, classes=1000)
-keras.applications.nasnet.NASNetMobile(input_shape=None, include_top=True, weights='imagenet', input_tensor=None, pooling=None, classes=1000)
-```
-
-Neural Architecture Search Network (NASNet) models, with weights pre-trained on ImageNet.
-
-The default input size for the NASNetLarge model is 331x331 and for the
-NASNetMobile model is 224x224.
-
-### Arguments
-
-- input_shape: optional shape tuple, only to be specified
-    if `include_top` is `False` (otherwise the input shape
-    has to be `(224, 224, 3)` (with `'channels_last'` data format)
-    or `(3, 224, 224)` (with `'channels_first'` data format)
-    for NASNetMobile or `(331, 331, 3)` (with `'channels_last'`
-    data format) or `(3, 331, 331)` (with `'channels_first'`
-    data format) for NASNetLarge.
-    It should have exactly 3 inputs channels,
-    and width and height should be no smaller than 32.
-    E.g. `(200, 200, 3)` would be one valid value.
-- include_top: whether to include the fully-connected
-    layer at the top of the network.
-- weights: `None` (random initialization) or
-    `'imagenet'` (ImageNet weights)
-- input_tensor: optional Keras tensor (i.e. output of
-    `layers.Input()`)
-    to use as image input for the model.
-- pooling: Optional pooling mode for feature extraction
-    when `include_top` is `False`.
-    - `None` means that the output of the model
-    will be the 4D tensor output of the
-        last convolutional block.
-    - `'avg'` means that global average pooling
-        will be applied to the output of the
-        last convolutional block, and thus
-        the output of the model will be a
-        2D tensor.
-    - `'max'` means that global max pooling will
-        be applied.
-- classes: optional number of classes to classify images
-    into, only to be specified if `include_top` is `True`, and
-    if no `weights` argument is specified.
-
-### Returns
-
-A Keras `Model` instance.
-
-### References
-
-- [Learning Transferable Architectures for Scalable Image Recognition](https://arxiv.org/abs/1707.07012)
-
-### License
-
-These weights are released under [the Apache License](https://github.com/tensorflow/models/blob/master/LICENSE).
-
------
-
-## MobileNetV2
-
-
-```python
-keras.applications.mobilenet_v2.MobileNetV2(input_shape=None, alpha=1.0, include_top=True, weights='imagenet', input_tensor=None, pooling=None, classes=1000)
-```
-
-MobileNetV2 model, with weights pre-trained on ImageNet.
-
-This model and can be built both with `'channels_first'` data format (channels, height, width) or `'channels_last'` data format (height, width, channels).
-
-The default input size for this model is 224x224.
-
-### Arguments
-
-- input_shape: optional shape tuple, to be specified if you would
-    like to use a model with an input img resolution that is not
-    (224, 224, 3).
-    It should have exactly 3 inputs channels (224, 224, 3).
-    You can also omit this option if you would like
-    to infer input_shape from an input_tensor.
-    If you choose to include both input_tensor and input_shape then
-    input_shape will be used if they match, if the shapes
-    do not match then we will throw an error.
-    E.g. `(160, 160, 3)` would be one valid value.
-- alpha: controls the width of the network. This is known as the
-    width multiplier in the MobileNetV2 paper.
-    - If `alpha` < 1.0, proportionally decreases the number
-        of filters in each layer.
-    - If `alpha` > 1.0, proportionally increases the number
-        of filters in each layer.
-    - If `alpha` = 1, default number of filters from the paper
-         are used at each layer.
-- include_top: whether to include the fully-connected
-      layer at the top of the network.
-- weights: one of `None` (random initialization),
-        'imagenet' (pre-training on ImageNet),
-        or the path to the weights file to be loaded.
-- input_tensor: optional Keras tensor (i.e. output of
-      `layers.Input()`)
-      to use as image input for the model.
-- pooling: Optional pooling mode for feature extraction
-    when `include_top` is `False`.
-    - `None` means that the output of the model
-    will be the 4D tensor output of the
-        last convolutional block.
-    - `'avg'` means that global average pooling
-        will be applied to the output of the
-        last convolutional block, and thus
-        the output of the model will be a
-        2D tensor.
-    - `'max'` means that global max pooling will
-        be applied. 
-- classes: optional number of classes to classify images
-      into, only to be specified if `include_top` is True, and
-      if no `weights` argument is specified.
-
-### Returns
-
-A Keras model instance.
-
-### Raises
-
-ValueError: in case of invalid argument for `weights`,
-    or invalid input shape, alpha,
-    rows when weights='imagenet'
-
-### References
-
-- [MobileNetV2: Inverted Residuals and Linear Bottlenecks](https://arxiv.org/abs/1801.04381)
-
-### License
-
-These weights are released under [the Apache License](https://github.com/tensorflow/models/blob/master/LICENSE).
diff --git a/docs/templates/backend.md b/docs/templates/backend.md
deleted file mode 100644
index 7b2fb65cd77..00000000000
--- a/docs/templates/backend.md
+++ /dev/null
@@ -1,148 +0,0 @@
-# Keras backends
-
-## What is a "backend"?
-
-Keras is a model-level library, providing high-level building blocks for developing deep learning models. It does not handle low-level operations such as tensor products, convolutions and so on itself. Instead, it relies on a specialized, well optimized tensor manipulation library to do so, serving as the "backend engine" of Keras. Rather than picking one single tensor library and making the implementation of Keras tied to that library, Keras handles the problem in a modular way, and several different backend engines can be plugged seamlessly into Keras.
-
-At this time, Keras has three backend implementations available: the **TensorFlow** backend, the **Theano** backend, and the **CNTK** backend.
-
-- [TensorFlow](http://www.tensorflow.org/) is an open-source symbolic tensor manipulation framework developed by Google.
-- [Theano](http://deeplearning.net/software/theano/) is an open-source symbolic tensor manipulation framework developed by LISA Lab at Université de Montréal.
-- [CNTK](https://www.microsoft.com/en-us/cognitive-toolkit/) is an open-source toolkit for deep learning developed by Microsoft.
-
-In the future, we are likely to add more backend options.
-
-----
-
-## Switching from one backend to another
-
-If you have run Keras at least once, you will find the Keras configuration file at:
-
-`$HOME/.keras/keras.json`
-
-If it isn't there, you can create it.
-
-**NOTE for Windows Users:** Please replace `$HOME` with `%USERPROFILE%`.
-
-The default configuration file looks like this:
-
-```
-{
-    "image_data_format": "channels_last",
-    "epsilon": 1e-07,
-    "floatx": "float32",
-    "backend": "tensorflow"
-}
-```
-
-Simply change the field `backend` to `"theano"`, `"tensorflow"`, or `"cntk"`, and Keras will use the new configuration next time you run any Keras code.
-
-You can also define the environment variable ``KERAS_BACKEND`` and this will
-override what is defined in your config file :
-
-```bash
-KERAS_BACKEND=tensorflow python -c "from keras import backend"
-Using TensorFlow backend.
-```
-
-In Keras it is possible to load more backends than `"tensorflow"`, `"theano"`, and `"cntk"`. Keras can use external backends as well, and this can be performed by changing the `keras.json` configuration file, and the `"backend"` setting. Suppose you have a Python module called `my_module` that you wanted to use as your external backend. The `keras.json` configuration file would be changed as follows:
-
-```
-{
-    "image_data_format": "channels_last",
-    "epsilon": 1e-07,
-    "floatx": "float32",
-    "backend": "my_package.my_module"
-}
-```
-An external backend must be validated in order to be used, a valid backend must have the following functions: `placeholder`, `variable` and `function`.
-
-If an external backend is not valid due to missing a required entry, an error will be logged notifying which entry/entries are missing.
-
-----
-
-## keras.json details
-
-
-The `keras.json` configuration file contains the following settings:
-
-```
-{
-    "image_data_format": "channels_last",
-    "epsilon": 1e-07,
-    "floatx": "float32",
-    "backend": "tensorflow"
-}
-```
-
-You can change these settings by editing `$HOME/.keras/keras.json`. 
-
-* `image_data_format`: String, either `"channels_last"` or `"channels_first"`. It specifies which data format convention Keras will follow. (`keras.backend.image_data_format()` returns it.)
-  - For 2D data (e.g. image), `"channels_last"` assumes `(rows, cols, channels)` while `"channels_first"` assumes `(channels, rows, cols)`. 
-  - For 3D data, `"channels_last"` assumes `(conv_dim1, conv_dim2, conv_dim3, channels)` while `"channels_first"` assumes `(channels, conv_dim1, conv_dim2, conv_dim3)`.
-* `epsilon`: Float, a numeric fuzzing constant used to avoid dividing by zero in some operations.
-* `floatx`: String, `"float16"`, `"float32"`, or `"float64"`. Default float precision.
-* `backend`: String, `"tensorflow"`, `"theano"`, or `"cntk"`.
-
-----
-
-## Using the abstract Keras backend to write new code
-
-If you want the Keras modules you write to be compatible with both Theano (`th`) and TensorFlow (`tf`), you have to write them via the abstract Keras backend API. Here's an intro.
-
-You can import the backend module via:
-```python
-from keras import backend as K
-```
-
-The code below instantiates an input placeholder. It's equivalent to `tf.placeholder()` or `th.tensor.matrix()`, `th.tensor.tensor3()`, etc.
-
-```python
-inputs = K.placeholder(shape=(2, 4, 5))
-# also works:
-inputs = K.placeholder(shape=(None, 4, 5))
-# also works:
-inputs = K.placeholder(ndim=3)
-```
-
-The code below instantiates a variable. It's equivalent to `tf.Variable()` or `th.shared()`.
-
-```python
-import numpy as np
-val = np.random.random((3, 4, 5))
-var = K.variable(value=val)
-
-# all-zeros variable:
-var = K.zeros(shape=(3, 4, 5))
-# all-ones:
-var = K.ones(shape=(3, 4, 5))
-```
-
-Most tensor operations you will need can be done as you would in TensorFlow or Theano:
-
-```python
-# Initializing Tensors with Random Numbers
-b = K.random_uniform_variable(shape=(3, 4), low=0, high=1) # Uniform distribution
-c = K.random_normal_variable(shape=(3, 4), mean=0, scale=1) # Gaussian distribution
-d = K.random_normal_variable(shape=(3, 4), mean=0, scale=1)
-
-# Tensor Arithmetic
-a = b + c * K.abs(d)
-c = K.dot(a, K.transpose(b))
-a = K.sum(b, axis=1)
-a = K.softmax(b)
-a = K.concatenate([b, c], axis=-1)
-# etc...
-```
-
-----
-
-## Backend functions
-
-
-{{autogenerated}}
-
-
-
-
-
diff --git a/docs/templates/callbacks.md b/docs/templates/callbacks.md
deleted file mode 100644
index 7dfa4063775..00000000000
--- a/docs/templates/callbacks.md
+++ /dev/null
@@ -1,70 +0,0 @@
-## Usage of callbacks
-
-A callback is a set of functions to be applied at given stages of the training procedure. You can use callbacks to get a view on internal states and statistics of the model during training. You can pass a list of callbacks (as the keyword argument `callbacks`) to the `.fit()` method of the `Sequential` or `Model` classes. The relevant methods of the callbacks will then be called at each stage of the training. 
-
----
-
-{{autogenerated}}
-
----
-
-
-# Create a callback
-
-You can create a custom callback by extending the base class `keras.callbacks.Callback`. A callback has access to its associated model through the class property `self.model`.
-
-Here's a simple example saving a list of losses over each batch during training:
-```python
-class LossHistory(keras.callbacks.Callback):
-    def on_train_begin(self, logs={}):
-        self.losses = []
-
-    def on_batch_end(self, batch, logs={}):
-        self.losses.append(logs.get('loss'))
-```
-
----
-
-### Example: recording loss history
-
-```python
-class LossHistory(keras.callbacks.Callback):
-    def on_train_begin(self, logs={}):
-        self.losses = []
-
-    def on_batch_end(self, batch, logs={}):
-        self.losses.append(logs.get('loss'))
-
-model = Sequential()
-model.add(Dense(10, input_dim=784, kernel_initializer='uniform'))
-model.add(Activation('softmax'))
-model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
-
-history = LossHistory()
-model.fit(x_train, y_train, batch_size=128, epochs=20, verbose=0, callbacks=[history])
-
-print(history.losses)
-# outputs
-'''
-[0.66047596406559383, 0.3547245744908703, ..., 0.25953155204159617, 0.25901699725311789]
-'''
-```
-
----
-
-### Example: model checkpoints
-
-```python
-from keras.callbacks import ModelCheckpoint
-
-model = Sequential()
-model.add(Dense(10, input_dim=784, kernel_initializer='uniform'))
-model.add(Activation('softmax'))
-model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
-
-'''
-saves the model weights after each epoch if the validation loss decreased
-'''
-checkpointer = ModelCheckpoint(filepath='/tmp/weights.hdf5', verbose=1, save_best_only=True)
-model.fit(x_train, y_train, batch_size=128, epochs=20, verbose=0, validation_data=(X_test, Y_test), callbacks=[checkpointer])
-```
diff --git a/docs/templates/constraints.md b/docs/templates/constraints.md
deleted file mode 100644
index bacc47c78a7..00000000000
--- a/docs/templates/constraints.md
+++ /dev/null
@@ -1,26 +0,0 @@
-## Usage of constraints
-
-Functions from the `constraints` module allow setting constraints (eg. non-negativity) on network parameters during optimization.
-
-The penalties are applied on a per-layer basis. The exact API will depend on the layer, but the layers `Dense`, `Conv1D`, `Conv2D` and `Conv3D` have a unified API.
-
-These layers expose 2 keyword arguments:
-
-- `kernel_constraint` for the main weights matrix
-- `bias_constraint` for the bias.
-
-
-```python
-from keras.constraints import max_norm
-model.add(Dense(64, kernel_constraint=max_norm(2.)))
-```
-
----
-
-## Available constraints
-
-
-{{autogenerated}}
-
----
-
diff --git a/docs/templates/datasets.md b/docs/templates/datasets.md
deleted file mode 100644
index 826a8c73183..00000000000
--- a/docs/templates/datasets.md
+++ /dev/null
@@ -1,209 +0,0 @@
-# Datasets
-
-## CIFAR10 small image classification
-
-Dataset of 50,000 32x32 color training images, labeled over 10 categories, and 10,000 test images.
-
-### Usage:
-
-```python
-from keras.datasets import cifar10
-
-(x_train, y_train), (x_test, y_test) = cifar10.load_data()
-```
-
-- __Returns:__
-    - 2 tuples:
-        - __x_train, x_test__: uint8 array of RGB image data with shape (num_samples, 3, 32, 32) or (num_samples, 32, 32, 3) based on the `image_data_format` backend setting of either `channels_first` or `channels_last` respectively.
-        - __y_train, y_test__: uint8 array of category labels (integers in range 0-9) with shape (num_samples, 1).
-
-
----
-
-## CIFAR100 small image classification
-
-Dataset of 50,000 32x32 color training images, labeled over 100 categories, and 10,000 test images.
-
-### Usage:
-
-```python
-from keras.datasets import cifar100
-
-(x_train, y_train), (x_test, y_test) = cifar100.load_data(label_mode='fine')
-```
-
-- __Returns:__
-    - 2 tuples:
-        - __x_train, x_test__: uint8 array of RGB image data with shape (num_samples, 3, 32, 32) or (num_samples, 32, 32, 3) based on the `image_data_format` backend setting of either `channels_first` or `channels_last` respectively.
-        - __y_train, y_test__: uint8 array of category labels with shape (num_samples, 1).
-
-- __Arguments:__
-
-    - __label_mode__: "fine" or "coarse".
-
-
----
-
-## IMDB Movie reviews sentiment classification
-
-Dataset of 25,000 movies reviews from IMDB, labeled by sentiment (positive/negative). Reviews have been preprocessed, and each review is encoded as a [sequence](preprocessing/sequence.md) of word indexes (integers). For convenience, words are indexed by overall frequency in the dataset, so that for instance the integer "3" encodes the 3rd most frequent word in the data. This allows for quick filtering operations such as: "only consider the top 10,000 most common words, but eliminate the top 20 most common words".
-
-As a convention, "0" does not stand for a specific word, but instead is used to encode any unknown word.
-
-### Usage:
-
-```python
-from keras.datasets import imdb
-
-(x_train, y_train), (x_test, y_test) = imdb.load_data(path="imdb.npz",
-                                                      num_words=None,
-                                                      skip_top=0,
-                                                      maxlen=None,
-                                                      seed=113,
-                                                      start_char=1,
-                                                      oov_char=2,
-                                                      index_from=3)
-```
-- __Returns:__
-    - 2 tuples:
-        - __x_train, x_test__: list of sequences, which are lists of indexes (integers). If the num_words argument was specific, the maximum possible index value is num_words-1. If the maxlen argument was specified, the largest possible sequence length is maxlen.
-        - __y_train, y_test__: list of integer labels (1 or 0). 
-
-- __Arguments:__
-
-    - __path__: if you do not have the data locally (at `'~/.keras/datasets/' + path`), it will be downloaded to this location.
-    - __num_words__: integer or None. Top most frequent words to consider. Any less frequent word will appear as `oov_char` value in the sequence data.
-    - __skip_top__: integer. Top most frequent words to ignore (they will appear as `oov_char` value in the sequence data).
-    - __maxlen__: int. Maximum sequence length. Any longer sequence will be truncated.
-    - __seed__: int. Seed for reproducible data shuffling.
-    - __start_char__: int. The start of a sequence will be marked with this character.
-        Set to 1 because 0 is usually the padding character.
-    - __oov_char__: int. words that were cut out because of the `num_words`
-        or `skip_top` limit will be replaced with this character.
-    - __index_from__: int. Index actual words with this index and higher.
-
-
----
-
-## Reuters newswire topics classification
-
-Dataset of 11,228 newswires from Reuters, labeled over 46 topics. As with the IMDB dataset, each wire is encoded as a sequence of word indexes (same conventions).
-
-### Usage:
-
-```python
-from keras.datasets import reuters
-
-(x_train, y_train), (x_test, y_test) = reuters.load_data(path="reuters.npz",
-                                                         num_words=None,
-                                                         skip_top=0,
-                                                         maxlen=None,
-                                                         test_split=0.2,
-                                                         seed=113,
-                                                         start_char=1,
-                                                         oov_char=2,
-                                                         index_from=3)
-```
-
-The specifications are the same as that of the IMDB dataset, with the addition of:
-
-- __test_split__: float. Fraction of the dataset to be used as test data.
-
-This dataset also makes available the word index used for encoding the sequences:
-
-```python
-word_index = reuters.get_word_index(path="reuters_word_index.json")
-```
-
-- __Returns:__ A dictionary where key are words (str) and values are indexes (integer). eg. `word_index["giraffe"]` might return `1234`. 
-
-- __Arguments:__
-
-    - __path__: if you do not have the index file locally (at `'~/.keras/datasets/' + path`), it will be downloaded to this location.
-    
-
----
-
-## MNIST database of handwritten digits
-
-Dataset of 60,000 28x28 grayscale images of the 10 digits, along with a test set of 10,000 images.
-
-### Usage:
-
-```python
-from keras.datasets import mnist
-
-(x_train, y_train), (x_test, y_test) = mnist.load_data()
-```
-
-- __Returns:__
-    - 2 tuples:
-        - __x_train, x_test__: uint8 array of grayscale image data with shape (num_samples, 28, 28).
-        - __y_train, y_test__: uint8 array of digit labels (integers in range 0-9) with shape (num_samples,).
-
-- __Arguments:__
-
-    - __path__: if you do not have the index file locally (at `'~/.keras/datasets/' + path`), it will be downloaded to this location.
-
-
----
-
-## Fashion-MNIST database of fashion articles
-
-Dataset of 60,000 28x28 grayscale images of 10 fashion categories, along with a test set of 10,000 images. This dataset can be used as a drop-in replacement for MNIST. The class labels are:
-
-| Label | Description |
-| --- | --- |
-| 0 | T-shirt/top |
-| 1 | Trouser |
-| 2 | Pullover |
-| 3 | Dress |
-| 4 | Coat |
-| 5 | Sandal |
-| 6 | Shirt |
-| 7 | Sneaker |
-| 8 | Bag |
-| 9 | Ankle boot |
-
-### Usage:
-
-```python
-from keras.datasets import fashion_mnist
-
-(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()
-```
-
-- __Returns:__
-    - 2 tuples:
-        - __x_train, x_test__: uint8 array of grayscale image data with shape (num_samples, 28, 28).
-        - __y_train, y_test__: uint8 array of labels (integers in range 0-9) with shape (num_samples,).
-
-
----
-
-## Boston housing price regression dataset
-
-
-Dataset taken from the StatLib library which is maintained at Carnegie Mellon University. 
-
-Samples contain 13 attributes of houses at different locations around the Boston suburbs in the late 1970s.
-Targets are the median values of the houses at a location (in k$).
-
-
-### Usage:
-
-```python
-from keras.datasets import boston_housing
-
-(x_train, y_train), (x_test, y_test) = boston_housing.load_data()
-```
-
-- __Arguments:__
-    - __path__: path where to cache the dataset locally
-        (relative to ~/.keras/datasets).
-    - __seed__: Random seed for shuffling the data
-        before computing the test split.
-    - __test_split__: fraction of the data to reserve as test set.
-
-- __Returns:__
-    Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`.
diff --git a/docs/templates/getting-started/faq.md b/docs/templates/getting-started/faq.md
deleted file mode 100644
index 3ce52a5ed38..00000000000
--- a/docs/templates/getting-started/faq.md
+++ /dev/null
@@ -1,658 +0,0 @@
-# Keras FAQ: Frequently Asked Keras Questions
-
-- [How should I cite Keras?](#how-should-i-cite-keras)
-- [How can I run Keras on GPU?](#how-can-i-run-keras-on-gpu)
-- [How can I run a Keras model on multiple GPUs?](#how-can-i-run-a-keras-model-on-multiple-gpus)
-- [What does "sample", "batch", "epoch" mean?](#what-does-sample-batch-epoch-mean)
-- [How can I save a Keras model?](#how-can-i-save-a-keras-model)
-- [Why is the training loss much higher than the testing loss?](#why-is-the-training-loss-much-higher-than-the-testing-loss)
-- [How can I obtain the output of an intermediate layer?](#how-can-i-obtain-the-output-of-an-intermediate-layer)
-- [How can I use Keras with datasets that don't fit in memory?](#how-can-i-use-keras-with-datasets-that-dont-fit-in-memory)
-- [How can I interrupt training when the validation loss isn't decreasing anymore?](#how-can-i-interrupt-training-when-the-validation-loss-isnt-decreasing-anymore)
-- [How is the validation split computed?](#how-is-the-validation-split-computed)
-- [Is the data shuffled during training?](#is-the-data-shuffled-during-training)
-- [How can I record the training / validation loss / accuracy at each epoch?](#how-can-i-record-the-training-validation-loss-accuracy-at-each-epoch)
-- [How can I "freeze" layers?](#how-can-i-freeze-keras-layers)
-- [How can I use stateful RNNs?](#how-can-i-use-stateful-rnns)
-- [How can I remove a layer from a Sequential model?](#how-can-i-remove-a-layer-from-a-sequential-model)
-- [How can I use pre-trained models in Keras?](#how-can-i-use-pre-trained-models-in-keras)
-- [How can I use HDF5 inputs with Keras?](#how-can-i-use-hdf5-inputs-with-keras)
-- [Where is the Keras configuration file stored?](#where-is-the-keras-configuration-file-stored)
-- [How can I obtain reproducible results using Keras during development?](#how-can-i-obtain-reproducible-results-using-keras-during-development)
-- [How can I install HDF5 or h5py to save my models in Keras?](#how-can-i-install-hdf5-or-h5py-to-save-my-models-in-keras)
-
----
-
-### How should I cite Keras?
-
-Please cite Keras in your publications if it helps your research. Here is an example BibTeX entry:
-
-```
-@misc{chollet2015keras,
-  title={Keras},
-  author={Chollet, Fran\c{c}ois and others},
-  year={2015},
-  howpublished={\url{https://keras.io}},
-}
-```
-
----
-
-### How can I run Keras on GPU?
-
-If you are running on the **TensorFlow** or **CNTK** backends, your code will automatically run on GPU if any available GPU is detected.
-
-If you are running on the **Theano** backend, you can use one of the following methods:
-
-**Method 1**: use Theano flags.
-```bash
-THEANO_FLAGS=device=gpu,floatX=float32 python my_keras_script.py
-```
-
-The name 'gpu' might have to be changed depending on your device's identifier (e.g. `gpu0`, `gpu1`, etc).
-
-**Method 2**: set up your `.theanorc`: [Instructions](http://deeplearning.net/software/theano/library/config.html)
-
-**Method 3**: manually set `theano.config.device`, `theano.config.floatX` at the beginning of your code:
-```python
-import theano
-theano.config.device = 'gpu'
-theano.config.floatX = 'float32'
-```
-
----
-
-### How can I run a Keras model on multiple GPUs?
-
-We recommend doing so using the **TensorFlow** backend. There are two ways to run a single model on multiple GPUs: **data parallelism** and **device parallelism**.
-
-In most cases, what you need is most likely data parallelism.
-
-#### Data parallelism
-
-Data parallelism consists in replicating the target model once on each device, and using each replica to process a different fraction of the input data.
-Keras has a built-in utility, `keras.utils.multi_gpu_model`, which can produce a data-parallel version of any model, and achieves quasi-linear speedup on up to 8 GPUs.
-
-For more information, see the documentation for [multi_gpu_model](/utils/#multi_gpu_model). Here is a quick example:
-
-```python
-from keras.utils import multi_gpu_model
-
-# Replicates `model` on 8 GPUs.
-# This assumes that your machine has 8 available GPUs.
-parallel_model = multi_gpu_model(model, gpus=8)
-parallel_model.compile(loss='categorical_crossentropy',
-                       optimizer='rmsprop')
-
-# This `fit` call will be distributed on 8 GPUs.
-# Since the batch size is 256, each GPU will process 32 samples.
-parallel_model.fit(x, y, epochs=20, batch_size=256)
-```
-
-#### Device parallelism
-
-Device parallelism consists in running different parts of a same model on different devices. It works best for models that have a parallel architecture, e.g. a model with two branches.
-
-This can be achieved by using TensorFlow device scopes. Here is a quick example:
-
-```python
-# Model where a shared LSTM is used to encode two different sequences in parallel
-input_a = keras.Input(shape=(140, 256))
-input_b = keras.Input(shape=(140, 256))
-
-shared_lstm = keras.layers.LSTM(64)
-
-# Process the first sequence on one GPU
-with tf.device_scope('/gpu:0'):
-    encoded_a = shared_lstm(tweet_a)
-# Process the next sequence on another GPU
-with tf.device_scope('/gpu:1'):
-    encoded_b = shared_lstm(tweet_b)
-
-# Concatenate results on CPU
-with tf.device_scope('/cpu:0'):
-    merged_vector = keras.layers.concatenate([encoded_a, encoded_b],
-                                             axis=-1)
-```
-
----
-
-### What does "sample", "batch", "epoch" mean?
-
-Below are some common definitions that are necessary to know and understand to correctly utilize Keras:
-
-- **Sample**: one element of a dataset.
-  - *Example:* one image is a **sample** in a convolutional network
-  - *Example:* one audio file is a **sample** for a speech recognition model
-- **Batch**: a set of *N* samples. The samples in a **batch** are processed independently, in parallel. If training, a batch results in only one update to the model.
-  - A **batch** generally approximates the distribution of the input data better than a single input. The larger the batch, the better the approximation; however, it is also true that the batch will take longer to process and will still result in only one update. For inference (evaluate/predict), it is recommended to pick a batch size that is as large as you can afford without going out of memory (since larger batches will usually result in faster evaluation/prediction).
-- **Epoch**: an arbitrary cutoff, generally defined as "one pass over the entire dataset", used to separate training into distinct phases, which is useful for logging and periodic evaluation.
-  - When using `validation_data` or `validation_split` with the `fit` method of Keras models, evaluation will be run at the end of every **epoch**.
-  - Within Keras, there is the ability to add [callbacks](https://keras.io/callbacks/) specifically designed to be run at the end of an **epoch**. Examples of these are learning rate changes and model checkpointing (saving).
-
----
-
-### How can I save a Keras model?
-
-#### Saving/loading whole models (architecture + weights + optimizer state)
-
-*It is not recommended to use pickle or cPickle to save a Keras model.*
-
-You can use `model.save(filepath)` to save a Keras model into a single HDF5 file which will contain:
-
-- the architecture of the model, allowing to re-create the model
-- the weights of the model
-- the training configuration (loss, optimizer)
-- the state of the optimizer, allowing to resume training exactly where you left off.
-
-You can then use `keras.models.load_model(filepath)` to reinstantiate your model.
-`load_model` will also take care of compiling the model using the saved training configuration (unless the model was never compiled in the first place).
-
-Example:
-
-```python
-from keras.models import load_model
-
-model.save('my_model.h5')  # creates a HDF5 file 'my_model.h5'
-del model  # deletes the existing model
-
-# returns a compiled model
-# identical to the previous one
-model = load_model('my_model.h5')
-```
-
-Please also see [How can I install HDF5 or h5py to save my models in Keras?](#how-can-i-install-hdf5-or-h5py-to-save-my-models-in-keras) for instructions on how to install `h5py`.
-
-#### Saving/loading only a model's architecture
-
-If you only need to save the **architecture of a model**, and not its weights or its training configuration, you can do:
-
-```python
-# save as JSON
-json_string = model.to_json()
-
-# save as YAML
-yaml_string = model.to_yaml()
-```
-
-The generated JSON / YAML files are human-readable and can be manually edited if needed.
-
-You can then build a fresh model from this data:
-
-```python
-# model reconstruction from JSON:
-from keras.models import model_from_json
-model = model_from_json(json_string)
-
-# model reconstruction from YAML:
-from keras.models import model_from_yaml
-model = model_from_yaml(yaml_string)
-```
-
-#### Saving/loading only a model's weights
-
-If you need to save the **weights of a model**, you can do so in HDF5 with the code below:
-
-```python
-model.save_weights('my_model_weights.h5')
-```
-
-Assuming you have code for instantiating your model, you can then load the weights you saved into a model with the *same* architecture:
-
-```python
-model.load_weights('my_model_weights.h5')
-```
-
-If you need to load the weights into a *different* architecture (with some layers in common), for instance for fine-tuning or transfer-learning, you can load them by *layer name*:
-
-```python
-model.load_weights('my_model_weights.h5', by_name=True)
-```
-
-Example:
-
-```python
-"""
-Assuming the original model looks like this:
-    model = Sequential()
-    model.add(Dense(2, input_dim=3, name='dense_1'))
-    model.add(Dense(3, name='dense_2'))
-    ...
-    model.save_weights(fname)
-"""
-
-# new model
-model = Sequential()
-model.add(Dense(2, input_dim=3, name='dense_1'))  # will be loaded
-model.add(Dense(10, name='new_dense'))  # will not be loaded
-
-# load weights from first model; will only affect the first layer, dense_1.
-model.load_weights(fname, by_name=True)
-```
-
-Please also see [How can I install HDF5 or h5py to save my models in Keras?](#how-can-i-install-hdf5-or-h5py-to-save-my-models-in-keras) for instructions on how to install `h5py`.
-
-#### Handling custom layers (or other custom objects) in saved models
-
-If the model you want to load includes custom layers or other custom classes or functions, 
-you can pass them to the loading mechanism via the `custom_objects` argument: 
-
-```python
-from keras.models import load_model
-# Assuming your model includes instance of an "AttentionLayer" class
-model = load_model('my_model.h5', custom_objects={'AttentionLayer': AttentionLayer})
-```
-
-Alternatively, you can use a [custom object scope](https://keras.io/utils/#customobjectscope):
-
-```python
-from keras.utils import CustomObjectScope
-
-with CustomObjectScope({'AttentionLayer': AttentionLayer}):
-    model = load_model('my_model.h5')
-```
-
-Custom objects handling works the same way for `load_model`, `model_from_json`, `model_from_yaml`:
-
-```python
-from keras.models import model_from_json
-model = model_from_json(json_string, custom_objects={'AttentionLayer': AttentionLayer})
-```
-
----
-
-### Why is the training loss much higher than the testing loss?
-
-A Keras model has two modes: training and testing. Regularization mechanisms, such as Dropout and L1/L2 weight regularization, are turned off at testing time.
-
-Besides, the training loss is the average of the losses over each batch of training data. Because your model is changing over time, the loss over the first batches of an epoch is generally higher than over the last batches. On the other hand, the testing loss for an epoch is computed using the model as it is at the end of the epoch, resulting in a lower loss.
-
----
-
-### How can I obtain the output of an intermediate layer?
-
-One simple way is to create a new `Model` that will output the layers that you are interested in:
-
-```python
-from keras.models import Model
-
-model = ...  # create the original model
-
-layer_name = 'my_layer'
-intermediate_layer_model = Model(inputs=model.input,
-                                 outputs=model.get_layer(layer_name).output)
-intermediate_output = intermediate_layer_model.predict(data)
-```
-
-Alternatively, you can build a Keras function that will return the output of a certain layer given a certain input, for example:
-
-```python
-from keras import backend as K
-
-# with a Sequential model
-get_3rd_layer_output = K.function([model.layers[0].input],
-                                  [model.layers[3].output])
-layer_output = get_3rd_layer_output([x])[0]
-```
-
-Similarly, you could build a Theano and TensorFlow function directly.
-
-Note that if your model has a different behavior in training and testing phase (e.g. if it uses `Dropout`, `BatchNormalization`, etc.), you will need to pass the learning phase flag to your function:
-
-```python
-get_3rd_layer_output = K.function([model.layers[0].input, K.learning_phase()],
-                                  [model.layers[3].output])
-
-# output in test mode = 0
-layer_output = get_3rd_layer_output([x, 0])[0]
-
-# output in train mode = 1
-layer_output = get_3rd_layer_output([x, 1])[0]
-```
-
----
-
-### How can I use Keras with datasets that don't fit in memory?
-
-You can do batch training using `model.train_on_batch(x, y)` and `model.test_on_batch(x, y)`. See the [models documentation](/models/sequential).
-
-Alternatively, you can write a generator that yields batches of training data and use the method `model.fit_generator(data_generator, steps_per_epoch, epochs)`.
-
-You can see batch training in action in our [CIFAR10 example](https://github.com/keras-team/keras/blob/master/examples/cifar10_cnn.py).
-
----
-
-### How can I interrupt training when the validation loss isn't decreasing anymore?
-
-You can use an `EarlyStopping` callback:
-
-```python
-from keras.callbacks import EarlyStopping
-early_stopping = EarlyStopping(monitor='val_loss', patience=2)
-model.fit(x, y, validation_split=0.2, callbacks=[early_stopping])
-```
-
-Find out more in the [callbacks documentation](/callbacks).
-
----
-
-### How is the validation split computed?
-
-If you set the `validation_split` argument in `model.fit` to e.g. 0.1, then the validation data used will be the *last 10%* of the data. If you set it to 0.25, it will be the last 25% of the data, etc. Note that the data isn't shuffled before extracting the validation split, so the validation is literally just the *last* x% of samples in the input you passed.
-
-The same validation set is used for all epochs (within a same call to `fit`).
-
----
-
-### Is the data shuffled during training?
-
-Yes, if the `shuffle` argument in `model.fit` is set to `True` (which is the default), the training data will be randomly shuffled at each epoch.
-
-Validation data is never shuffled.
-
----
-
-
-### How can I record the training / validation loss / accuracy at each epoch?
-
-The `model.fit` method returns a `History` callback, which has a `history` attribute containing the lists of successive losses and other metrics.
-
-```python
-hist = model.fit(x, y, validation_split=0.2)
-print(hist.history)
-```
-
----
-
-### How can I "freeze" Keras layers?
-
-To "freeze" a layer means to exclude it from training, i.e. its weights will never be updated. This is useful in the context of fine-tuning a model, or using fixed embeddings for a text input.
-
-You can pass a `trainable` argument (boolean) to a layer constructor to set a layer to be non-trainable:
-
-```python
-frozen_layer = Dense(32, trainable=False)
-```
-
-Additionally, you can set the `trainable` property of a layer to `True` or `False` after instantiation. For this to take effect, you will need to call `compile()` on your model after modifying the `trainable` property. Here's an example:
-
-```python
-x = Input(shape=(32,))
-layer = Dense(32)
-layer.trainable = False
-y = layer(x)
-
-frozen_model = Model(x, y)
-# in the model below, the weights of `layer` will not be updated during training
-frozen_model.compile(optimizer='rmsprop', loss='mse')
-
-layer.trainable = True
-trainable_model = Model(x, y)
-# with this model the weights of the layer will be updated during training
-# (which will also affect the above model since it uses the same layer instance)
-trainable_model.compile(optimizer='rmsprop', loss='mse')
-
-frozen_model.fit(data, labels)  # this does NOT update the weights of `layer`
-trainable_model.fit(data, labels)  # this updates the weights of `layer`
-```
-
----
-
-### How can I use stateful RNNs?
-
-Making a RNN stateful means that the states for the samples of each batch will be reused as initial states for the samples in the next batch.
-
-When using stateful RNNs, it is therefore assumed that:
-
-- all batches have the same number of samples
-- If `x1` and `x2` are successive batches of samples, then `x2[i]` is the follow-up sequence to `x1[i]`, for every `i`.
-
-To use statefulness in RNNs, you need to:
-
-- explicitly specify the batch size you are using, by passing a `batch_size` argument to the first layer in your model. E.g. `batch_size=32` for a 32-samples batch of sequences of 10 timesteps with 16 features per timestep.
-- set `stateful=True` in your RNN layer(s).
-- specify `shuffle=False` when calling `fit()`.
-
-To reset the states accumulated:
-
-- use `model.reset_states()` to reset the states of all layers in the model
-- use `layer.reset_states()` to reset the states of a specific stateful RNN layer
-
-Example:
-
-```python
-x  # this is our input data, of shape (32, 21, 16)
-# we will feed it to our model in sequences of length 10
-
-model = Sequential()
-model.add(LSTM(32, input_shape=(10, 16), batch_size=32, stateful=True))
-model.add(Dense(16, activation='softmax'))
-
-model.compile(optimizer='rmsprop', loss='categorical_crossentropy')
-
-# we train the network to predict the 11th timestep given the first 10:
-model.train_on_batch(x[:, :10, :], np.reshape(x[:, 10, :], (32, 16)))
-
-# the state of the network has changed. We can feed the follow-up sequences:
-model.train_on_batch(x[:, 10:20, :], np.reshape(x[:, 20, :], (32, 16)))
-
-# let's reset the states of the LSTM layer:
-model.reset_states()
-
-# another way to do it in this case:
-model.layers[0].reset_states()
-```
-
-Note that the methods `predict`, `fit`, `train_on_batch`, `predict_classes`, etc. will *all* update the states of the stateful layers in a model. This allows you to do not only stateful training, but also stateful prediction.
-
----
-
-### How can I remove a layer from a Sequential model?
-
-You can remove the last added layer in a Sequential model by calling `.pop()`:
-
-```python
-model = Sequential()
-model.add(Dense(32, activation='relu', input_dim=784))
-model.add(Dense(32, activation='relu'))
-
-print(len(model.layers))  # "2"
-
-model.pop()
-print(len(model.layers))  # "1"
-```
-
----
-
-### How can I use pre-trained models in Keras?
-
-Code and pre-trained weights are available for the following image classification models:
-
-- Xception
-- VGG16
-- VGG19
-- ResNet
-- ResNet v2
-- ResNeXt
-- Inception v3
-- Inception-ResNet v2
-- MobileNet v1
-- MobileNet v2
-- DenseNet
-- NASNet
-
-They can be imported from the module `keras.applications`:
-
-```python
-from keras.applications.xception import Xception
-from keras.applications.vgg16 import VGG16
-from keras.applications.vgg19 import VGG19
-from keras.applications.resnet import ResNet50
-from keras.applications.resnet import ResNet101
-from keras.applications.resnet import ResNet152
-from keras.applications.resnet_v2 import ResNet50V2
-from keras.applications.resnet_v2 import ResNet101V2
-from keras.applications.resnet_v2 import ResNet152V2
-from keras.applications.resnext import ResNeXt50
-from keras.applications.resnext import ResNeXt101
-from keras.applications.inception_v3 import InceptionV3
-from keras.applications.inception_resnet_v2 import InceptionResNetV2
-from keras.applications.mobilenet import MobileNet
-from keras.applications.mobilenet_v2 import MobileNetV2
-from keras.applications.densenet import DenseNet121
-from keras.applications.densenet import DenseNet169
-from keras.applications.densenet import DenseNet201
-from keras.applications.nasnet import NASNetLarge
-from keras.applications.nasnet import NASNetMobile
-
-model = VGG16(weights='imagenet', include_top=True)
-```
-
-For a few simple usage examples, see [the documentation for the Applications module](/applications).
-
-For a detailed example of how to use such a pre-trained model for feature extraction or for fine-tuning, see [this blog post](http://blog.keras.io/building-powerful-image-classification-models-using-very-little-data.html).
-
-The VGG16 model is also the basis for several Keras example scripts:
-
-- [Style transfer](https://github.com/keras-team/keras/blob/master/examples/neural_style_transfer.py)
-- [Feature visualization](https://github.com/keras-team/keras/blob/master/examples/conv_filter_visualization.py)
-- [Deep dream](https://github.com/keras-team/keras/blob/master/examples/deep_dream.py)
-
----
-
-### How can I use HDF5 inputs with Keras?
-
-You can use the `HDF5Matrix` class from `keras.utils`. See [the HDF5Matrix documentation](/utils/#hdf5matrix) for details.
-
-You can also directly use a HDF5 dataset:
-
-```python
-import h5py
-with h5py.File('input/file.hdf5', 'r') as f:
-    x_data = f['x_data']
-    model.predict(x_data)
-```
-
-Please also see [How can I install HDF5 or h5py to save my models in Keras?](#how-can-i-install-hdf5-or-h5py-to-save-my-models-in-keras) for instructions on how to install `h5py`.
-
----
-
-### Where is the Keras configuration file stored?
-
-The default directory where all Keras data is stored is:
-
-```bash
-$HOME/.keras/
-```
-
-Note that Windows users should replace `$HOME` with `%USERPROFILE%`.
-In case Keras cannot create the above directory (e.g. due to permission issues), `/tmp/.keras/` is used as a backup.
-
-The Keras configuration file is a JSON file stored at `$HOME/.keras/keras.json`. The default configuration file looks like this:
-
-```
-{
-    "image_data_format": "channels_last",
-    "epsilon": 1e-07,
-    "floatx": "float32",
-    "backend": "tensorflow"
-}
-```
-
-It contains the following fields:
-
-- The image data format to be used as default by image processing layers and utilities (either `channels_last` or `channels_first`).
-- The `epsilon` numerical fuzz factor to be used to prevent division by zero in some operations.
-- The default float data type.
-- The default backend. See the [backend documentation](/backend).
-
-Likewise, cached dataset files, such as those downloaded with [`get_file()`](/utils/#get_file), are stored by default in `$HOME/.keras/datasets/`.
-
----
-
-### How can I obtain reproducible results using Keras during development?
-
-During development of a model, sometimes it is useful to be able to obtain reproducible results from run to run in order to determine if a change in performance is due to an actual model or data modification, or merely a result of a new random sample.
-
-First, you need to set the `PYTHONHASHSEED` environment variable to `0` before the program starts (not within the program itself). This is necessary in Python 3.2.3 onwards to have reproducible behavior for certain hash-based operations (e.g., the item order in a set or a dict, see [Python's documentation](https://docs.python.org/3.7/using/cmdline.html#envvar-PYTHONHASHSEED) or [issue #2280](https://github.com/keras-team/keras/issues/2280#issuecomment-306959926) for further details). One way to set the environment variable is when starting python like this:
-
-```
-$ cat test_hash.py
-print(hash("keras"))
-$ python3 test_hash.py                  # non-reproducible hash (Python 3.2.3+)
--8127205062320133199
-$ python3 test_hash.py                  # non-reproducible hash (Python 3.2.3+)
-3204480642156461591
-$ PYTHONHASHSEED=0 python3 test_hash.py # reproducible hash
-4883664951434749476
-$ PYTHONHASHSEED=0 python3 test_hash.py # reproducible hash
-4883664951434749476
-```
-
-Moreover, when using the TensorFlow backend and running on a GPU, some operations have non-deterministic outputs, in particular `tf.reduce_sum()`. This is due to the fact that GPUs run many operations in parallel, so the order of execution is not always guaranteed. Due to the limited precision of floats, even adding several numbers together may give slightly different results depending on the order in which you add them. You can try to avoid the non-deterministic operations, but some may be created automatically by TensorFlow to compute the gradients, so it is much simpler to just run the code on the CPU. For this, you can set the `CUDA_VISIBLE_DEVICES` environment variable to an empty string, for example:
-
-```
-$ CUDA_VISIBLE_DEVICES="" PYTHONHASHSEED=0 python your_program.py
-```
-
-The below snippet of code provides an example of how to obtain reproducible results - this is geared towards a TensorFlow backend for a Python 3 environment:
-
-```python
-import numpy as np
-import tensorflow as tf
-import random as rn
-
-# The below is necessary for starting Numpy generated random numbers
-# in a well-defined initial state.
-
-np.random.seed(42)
-
-# The below is necessary for starting core Python generated random numbers
-# in a well-defined state.
-
-rn.seed(12345)
-
-# Force TensorFlow to use single thread.
-# Multiple threads are a potential source of non-reproducible results.
-# For further details, see: https://stackoverflow.com/questions/42022950/
-
-session_conf = tf.ConfigProto(intra_op_parallelism_threads=1,
-                              inter_op_parallelism_threads=1)
-
-from keras import backend as K
-
-# The below tf.set_random_seed() will make random number generation
-# in the TensorFlow backend have a well-defined initial state.
-# For further details, see:
-# https://www.tensorflow.org/api_docs/python/tf/set_random_seed
-
-tf.set_random_seed(1234)
-
-sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
-K.set_session(sess)
-
-# Rest of code follows ...
-```
-
----
-
-### How can I install HDF5 or h5py to save my models in Keras?
-
-In order to save your Keras models as HDF5 files, e.g. via
-`keras.callbacks.ModelCheckpoint`, Keras uses the h5py Python package. It is
- a dependency of Keras and should be installed by default. On Debian-based
- distributions, you will have to additionally install `libhdf5`:
-
-```
-sudo apt-get install libhdf5-serial-dev
-```
-
-If you are unsure if h5py is installed you can open a Python shell and load the
-module via
-
-```
-import h5py
-```
-
-If it imports without error it is installed, otherwise you can find detailed
-installation instructions here: http://docs.h5py.org/en/latest/build.html
diff --git a/docs/templates/getting-started/functional-api-guide.md b/docs/templates/getting-started/functional-api-guide.md
deleted file mode 100644
index b02d8de58f7..00000000000
--- a/docs/templates/getting-started/functional-api-guide.md
+++ /dev/null
@@ -1,437 +0,0 @@
-# Getting started with the Keras functional API
-
-The Keras functional API is the way to go for defining complex models, such as multi-output models, directed acyclic graphs, or models with shared layers.
-
-This guide assumes that you are already familiar with the `Sequential` model.
-
-Let's start with something simple.
-
------
-
-## First example: a densely-connected network
-
-The `Sequential` model is probably a better choice to implement such a network, but it helps to start with something really simple.
-
-- A layer instance is callable (on a tensor), and it returns a tensor
-- Input tensor(s) and output tensor(s) can then be used to define a `Model`
-- Such a model can be trained just like Keras `Sequential` models.
-
-```python
-from keras.layers import Input, Dense
-from keras.models import Model
-
-# This returns a tensor
-inputs = Input(shape=(784,))
-
-# a layer instance is callable on a tensor, and returns a tensor
-output_1 = Dense(64, activation='relu')(inputs)
-output_2 = Dense(64, activation='relu')(output_1)
-predictions = Dense(10, activation='softmax')(output_2)
-
-# This creates a model that includes
-# the Input layer and three Dense layers
-model = Model(inputs=inputs, outputs=predictions)
-model.compile(optimizer='rmsprop',
-              loss='categorical_crossentropy',
-              metrics=['accuracy'])
-model.fit(data, labels)  # starts training
-```
-
------
-
-## All models are callable, just like layers
-
-With the functional API, it is easy to reuse trained models: you can treat any model as if it were a layer, by calling it on a tensor. Note that by calling a model you aren't just reusing the *architecture* of the model, you are also reusing its weights.
-
-```python
-x = Input(shape=(784,))
-# This works, and returns the 10-way softmax we defined above.
-y = model(x)
-```
-
-This can allow, for instance, to quickly create models that can process *sequences* of inputs. You could turn an image classification model into a video classification model, in just one line.
-
-```python
-from keras.layers import TimeDistributed
-
-# Input tensor for sequences of 20 timesteps,
-# each containing a 784-dimensional vector
-input_sequences = Input(shape=(20, 784))
-
-# This applies our previous model to every timestep in the input sequences.
-# the output of the previous model was a 10-way softmax,
-# so the output of the layer below will be a sequence of 20 vectors of size 10.
-processed_sequences = TimeDistributed(model)(input_sequences)
-```
-
------
-
-## Multi-input and multi-output models
-
-Here's a good use case for the functional API: models with multiple inputs and outputs. The functional API makes it easy to manipulate a large number of intertwined datastreams.
-
-Let's consider the following model. We seek to predict how many retweets and likes a news headline will receive on Twitter. The main input to the model will be the headline itself, as a sequence of words, but to spice things up, our model will also have an auxiliary input, receiving extra data such as the time of day when the headline was posted, etc.
-The model will also be supervised via two loss functions. Using the main loss function earlier in a model is a good regularization mechanism for deep models.
-
-Here's what our model looks like:
-
-<img src="https://s3.amazonaws.com/keras.io/img/multi-input-multi-output-graph.png" alt="multi-input-multi-output-graph" style="width: 400px;"/>
-
-Let's implement it with the functional API.
-
-The main input will receive the headline, as a sequence of integers (each integer encodes a word).
-The integers will be between 1 and 10,000 (a vocabulary of 10,000 words) and the sequences will be 100 words long.
-
-```python
-from keras.layers import Input, Embedding, LSTM, Dense
-from keras.models import Model
-import numpy as np
-np.random.seed(0)  # Set a random seed for reproducibility
-
-# Headline input: meant to receive sequences of 100 integers, between 1 and 10000.
-# Note that we can name any layer by passing it a "name" argument.
-main_input = Input(shape=(100,), dtype='int32', name='main_input')
-
-# This embedding layer will encode the input sequence
-# into a sequence of dense 512-dimensional vectors.
-x = Embedding(output_dim=512, input_dim=10000, input_length=100)(main_input)
-
-# A LSTM will transform the vector sequence into a single vector,
-# containing information about the entire sequence
-lstm_out = LSTM(32)(x)
-```
-
-Here we insert the auxiliary loss, allowing the LSTM and Embedding layer to be trained smoothly even though the main loss will be much higher in the model.
-
-```python
-auxiliary_output = Dense(1, activation='sigmoid', name='aux_output')(lstm_out)
-```
-
-At this point, we feed into the model our auxiliary input data by concatenating it with the LSTM output:
-
-```python
-auxiliary_input = Input(shape=(5,), name='aux_input')
-x = keras.layers.concatenate([lstm_out, auxiliary_input])
-
-# We stack a deep densely-connected network on top
-x = Dense(64, activation='relu')(x)
-x = Dense(64, activation='relu')(x)
-x = Dense(64, activation='relu')(x)
-
-# And finally we add the main logistic regression layer
-main_output = Dense(1, activation='sigmoid', name='main_output')(x)
-```
-
-This defines a model with two inputs and two outputs:
-
-```python
-model = Model(inputs=[main_input, auxiliary_input], outputs=[main_output, auxiliary_output])
-```
-
-We compile the model and assign a weight of 0.2 to the auxiliary loss.
-To specify different `loss_weights` or `loss` for each different output, you can use a list or a dictionary.
-Here we pass a single loss as the `loss` argument, so the same loss will be used on all outputs.
-
-```python
-model.compile(optimizer='rmsprop', loss='binary_crossentropy',
-              loss_weights=[1., 0.2])
-```
-
-We can train the model by passing it lists of input arrays and target arrays:
-
-```python
-headline_data = np.round(np.abs(np.random.rand(12, 100) * 100))
-additional_data = np.random.randn(12, 5)
-headline_labels = np.random.randn(12, 1)
-additional_labels = np.random.randn(12, 1)
-model.fit([headline_data, additional_data], [headline_labels, additional_labels],
-          epochs=50, batch_size=32)
-```
-
-Since our inputs and outputs are named (we passed them a "name" argument),
-we could also have compiled the model via:
-
-```python
-model.compile(optimizer='rmsprop',
-              loss={'main_output': 'binary_crossentropy', 'aux_output': 'binary_crossentropy'},
-              loss_weights={'main_output': 1., 'aux_output': 0.2})
-
-# And trained it via:
-model.fit({'main_input': headline_data, 'aux_input': additional_data},
-          {'main_output': headline_labels, 'aux_output': additional_labels},
-          epochs=50, batch_size=32)
-```
-
-To use the model for inferencing, use
-```python
-model.predict({'main_input': headline_data, 'aux_input': additional_data})
-```
-or alternatively,
-```python
-pred = model.predict([headline_data, additional_data])
-```
-
------
-
-## Shared layers
-
-Another good use for the functional API are models that use shared layers. Let's take a look at shared layers.
-
-Let's consider a dataset of tweets. We want to build a model that can tell whether two tweets are from the same person or not (this can allow us to compare users by the similarity of their tweets, for instance).
-
-One way to achieve this is to build a model that encodes two tweets into two vectors, concatenates the vectors and then adds a logistic regression; this outputs a probability that the two tweets share the same author. The model would then be trained on positive tweet pairs and negative tweet pairs.
-
-Because the problem is symmetric, the mechanism that encodes the first tweet should be reused (weights and all) to encode the second tweet. Here we use a shared LSTM layer to encode the tweets.
-
-Let's build this with the functional API. We will take as input for a tweet a binary matrix of shape `(280, 256)`, i.e. a sequence of 280 vectors of size 256, where each dimension in the 256-dimensional vector encodes the presence/absence of a character (out of an alphabet of 256 frequent characters).
-
-```python
-import keras
-from keras.layers import Input, LSTM, Dense
-from keras.models import Model
-
-tweet_a = Input(shape=(280, 256))
-tweet_b = Input(shape=(280, 256))
-```
-
-To share a layer across different inputs, simply instantiate the layer once, then call it on as many inputs as you want:
-
-```python
-# This layer can take as input a matrix
-# and will return a vector of size 64
-shared_lstm = LSTM(64)
-
-# When we reuse the same layer instance
-# multiple times, the weights of the layer
-# are also being reused
-# (it is effectively *the same* layer)
-encoded_a = shared_lstm(tweet_a)
-encoded_b = shared_lstm(tweet_b)
-
-# We can then concatenate the two vectors:
-merged_vector = keras.layers.concatenate([encoded_a, encoded_b], axis=-1)
-
-# And add a logistic regression on top
-predictions = Dense(1, activation='sigmoid')(merged_vector)
-
-# We define a trainable model linking the
-# tweet inputs to the predictions
-model = Model(inputs=[tweet_a, tweet_b], outputs=predictions)
-
-model.compile(optimizer='rmsprop',
-              loss='binary_crossentropy',
-              metrics=['accuracy'])
-model.fit([data_a, data_b], labels, epochs=10)
-```
-
-Let's pause to take a look at how to read the shared layer's output or output shape.
-
------
-
-## The concept of layer "node"
-
-Whenever you are calling a layer on some input, you are creating a new tensor (the output of the layer), and you are adding a "node" to the layer, linking the input tensor to the output tensor. When you are calling the same layer multiple times, that layer owns multiple nodes indexed as 0, 1, 2...
-
-In previous versions of Keras, you could obtain the output tensor of a layer instance via `layer.get_output()`, or its output shape via `layer.output_shape`. You still can (except `get_output()` has been replaced by the property `output`). But what if a layer is connected to multiple inputs?
-
-As long as a layer is only connected to one input, there is no confusion, and `.output` will return the one output of the layer:
-
-```python
-a = Input(shape=(280, 256))
-
-lstm = LSTM(32)
-encoded_a = lstm(a)
-
-assert lstm.output == encoded_a
-```
-
-Not so if the layer has multiple inputs:
-```python
-a = Input(shape=(280, 256))
-b = Input(shape=(280, 256))
-
-lstm = LSTM(32)
-encoded_a = lstm(a)
-encoded_b = lstm(b)
-
-lstm.output
-```
-```
->> AttributeError: Layer lstm_1 has multiple inbound nodes,
-hence the notion of "layer output" is ill-defined.
-Use `get_output_at(node_index)` instead.
-```
-
-Okay then. The following works:
-
-```python
-assert lstm.get_output_at(0) == encoded_a
-assert lstm.get_output_at(1) == encoded_b
-```
-
-Simple enough, right?
-
-The same is true for the properties `input_shape` and `output_shape`: as long as the layer has only one node, or as long as all nodes have the same input/output shape, then the notion of "layer output/input shape" is well defined, and that one shape will be returned by `layer.output_shape`/`layer.input_shape`. But if, for instance, you apply the same `Conv2D` layer to an input of shape `(32, 32, 3)`, and then to an input of shape `(64, 64, 3)`, the layer will have multiple input/output shapes, and you will have to fetch them by specifying the index of the node they belong to:
-
-```python
-a = Input(shape=(32, 32, 3))
-b = Input(shape=(64, 64, 3))
-
-conv = Conv2D(16, (3, 3), padding='same')
-conved_a = conv(a)
-
-# Only one input so far, the following will work:
-assert conv.input_shape == (None, 32, 32, 3)
-
-conved_b = conv(b)
-# now the `.input_shape` property wouldn't work, but this does:
-assert conv.get_input_shape_at(0) == (None, 32, 32, 3)
-assert conv.get_input_shape_at(1) == (None, 64, 64, 3)
-```
-
------
-
-## More examples
-
-Code examples are still the best way to get started, so here are a few more.
-
-### Inception module
-
-For more information about the Inception architecture, see [Going Deeper with Convolutions](http://arxiv.org/abs/1409.4842).
-
-```python
-from keras.layers import Conv2D, MaxPooling2D, Input
-
-input_img = Input(shape=(256, 256, 3))
-
-tower_1 = Conv2D(64, (1, 1), padding='same', activation='relu')(input_img)
-tower_1 = Conv2D(64, (3, 3), padding='same', activation='relu')(tower_1)
-
-tower_2 = Conv2D(64, (1, 1), padding='same', activation='relu')(input_img)
-tower_2 = Conv2D(64, (5, 5), padding='same', activation='relu')(tower_2)
-
-tower_3 = MaxPooling2D((3, 3), strides=(1, 1), padding='same')(input_img)
-tower_3 = Conv2D(64, (1, 1), padding='same', activation='relu')(tower_3)
-
-output = keras.layers.concatenate([tower_1, tower_2, tower_3], axis=1)
-```
-
-### Residual connection on a convolution layer
-
-For more information about residual networks, see [Deep Residual Learning for Image Recognition](http://arxiv.org/abs/1512.03385).
-
-```python
-from keras.layers import Conv2D, Input
-
-# input tensor for a 3-channel 256x256 image
-x = Input(shape=(256, 256, 3))
-# 3x3 conv with 3 output channels (same as input channels)
-y = Conv2D(3, (3, 3), padding='same')(x)
-# this returns x + y.
-z = keras.layers.add([x, y])
-```
-
-### Shared vision model
-
-This model reuses the same image-processing module on two inputs, to classify whether two MNIST digits are the same digit or different digits.
-
-```python
-from keras.layers import Conv2D, MaxPooling2D, Input, Dense, Flatten
-from keras.models import Model
-
-# First, define the vision modules
-digit_input = Input(shape=(27, 27, 1))
-x = Conv2D(64, (3, 3))(digit_input)
-x = Conv2D(64, (3, 3))(x)
-x = MaxPooling2D((2, 2))(x)
-out = Flatten()(x)
-
-vision_model = Model(digit_input, out)
-
-# Then define the tell-digits-apart model
-digit_a = Input(shape=(27, 27, 1))
-digit_b = Input(shape=(27, 27, 1))
-
-# The vision model will be shared, weights and all
-out_a = vision_model(digit_a)
-out_b = vision_model(digit_b)
-
-concatenated = keras.layers.concatenate([out_a, out_b])
-out = Dense(1, activation='sigmoid')(concatenated)
-
-classification_model = Model([digit_a, digit_b], out)
-```
-
-### Visual question answering model
-
-This model can select the correct one-word answer when asked a natural-language question about a picture.
-
-It works by encoding the question into a vector, encoding the image into a vector, concatenating the two, and training on top a logistic regression over some vocabulary of potential answers.
-
-```python
-from keras.layers import Conv2D, MaxPooling2D, Flatten
-from keras.layers import Input, LSTM, Embedding, Dense
-from keras.models import Model, Sequential
-
-# First, let's define a vision model using a Sequential model.
-# This model will encode an image into a vector.
-vision_model = Sequential()
-vision_model.add(Conv2D(64, (3, 3), activation='relu', padding='same', input_shape=(224, 224, 3)))
-vision_model.add(Conv2D(64, (3, 3), activation='relu'))
-vision_model.add(MaxPooling2D((2, 2)))
-vision_model.add(Conv2D(128, (3, 3), activation='relu', padding='same'))
-vision_model.add(Conv2D(128, (3, 3), activation='relu'))
-vision_model.add(MaxPooling2D((2, 2)))
-vision_model.add(Conv2D(256, (3, 3), activation='relu', padding='same'))
-vision_model.add(Conv2D(256, (3, 3), activation='relu'))
-vision_model.add(Conv2D(256, (3, 3), activation='relu'))
-vision_model.add(MaxPooling2D((2, 2)))
-vision_model.add(Flatten())
-
-# Now let's get a tensor with the output of our vision model:
-image_input = Input(shape=(224, 224, 3))
-encoded_image = vision_model(image_input)
-
-# Next, let's define a language model to encode the question into a vector.
-# Each question will be at most 100 words long,
-# and we will index words as integers from 1 to 9999.
-question_input = Input(shape=(100,), dtype='int32')
-embedded_question = Embedding(input_dim=10000, output_dim=256, input_length=100)(question_input)
-encoded_question = LSTM(256)(embedded_question)
-
-# Let's concatenate the question vector and the image vector:
-merged = keras.layers.concatenate([encoded_question, encoded_image])
-
-# And let's train a logistic regression over 1000 words on top:
-output = Dense(1000, activation='softmax')(merged)
-
-# This is our final model:
-vqa_model = Model(inputs=[image_input, question_input], outputs=output)
-
-# The next stage would be training this model on actual data.
-```
-
-### Video question answering model
-
-Now that we have trained our image QA model, we can quickly turn it into a video QA model. With appropriate training, you will be able to show it a short video (e.g. 100-frame human action) and ask a natural language question about the video (e.g. "what sport is the boy playing?" -> "football").
-
-```python
-from keras.layers import TimeDistributed
-
-video_input = Input(shape=(100, 224, 224, 3))
-# This is our video encoded via the previously trained vision_model (weights are reused)
-encoded_frame_sequence = TimeDistributed(vision_model)(video_input)  # the output will be a sequence of vectors
-encoded_video = LSTM(256)(encoded_frame_sequence)  # the output will be a vector
-
-# This is a model-level representation of the question encoder, reusing the same weights as before:
-question_encoder = Model(inputs=question_input, outputs=encoded_question)
-
-# Let's use it to encode the question:
-video_question_input = Input(shape=(100,), dtype='int32')
-encoded_video_question = question_encoder(video_question_input)
-
-# And this is our video question answering model:
-merged = keras.layers.concatenate([encoded_video, encoded_video_question])
-output = Dense(1000, activation='softmax')(merged)
-video_qa_model = Model(inputs=[video_input, video_question_input], outputs=output)
-```
diff --git a/docs/templates/getting-started/sequential-model-guide.md b/docs/templates/getting-started/sequential-model-guide.md
deleted file mode 100644
index 853811f65ad..00000000000
--- a/docs/templates/getting-started/sequential-model-guide.md
+++ /dev/null
@@ -1,399 +0,0 @@
-# Getting started with the Keras Sequential model
-
-The `Sequential` model is a linear stack of layers.
-
-You can create a `Sequential` model by passing a list of layer instances to the constructor:
-
-```python
-from keras.models import Sequential
-from keras.layers import Dense, Activation
-
-model = Sequential([
-    Dense(32, input_shape=(784,)),
-    Activation('relu'),
-    Dense(10),
-    Activation('softmax'),
-])
-```
-
-You can also simply add layers via the `.add()` method:
-
-```python
-model = Sequential()
-model.add(Dense(32, input_dim=784))
-model.add(Activation('relu'))
-```
-
-----
-
-## Specifying the input shape
-
-The model needs to know what input shape it should expect. For this reason, the first layer in a `Sequential` model (and only the first, because following layers can do automatic shape inference) needs to receive information about its input shape. There are several possible ways to do this:
-
-- Pass an `input_shape` argument to the first layer. This is a shape tuple (a tuple of integers or `None` entries, where `None` indicates that any positive integer may be expected). In `input_shape`, the batch dimension is not included.
-- Some 2D layers, such as `Dense`, support the specification of their input shape via the argument `input_dim`, and some 3D temporal layers support the arguments `input_dim` and `input_length`.
-- If you ever need to specify a fixed batch size for your inputs (this is useful for stateful recurrent networks), you can pass a `batch_size` argument to a layer. If you pass both `batch_size=32` and `input_shape=(6, 8)` to a layer, it will then expect every batch of inputs to have the batch shape `(32, 6, 8)`.
-
-As such, the following snippets are strictly equivalent:
-```python
-model = Sequential()
-model.add(Dense(32, input_shape=(784,)))
-```
-```python
-model = Sequential()
-model.add(Dense(32, input_dim=784))
-```
-
-----
-
-## Compilation
-
-Before training a model, you need to configure the learning process, which is done via the `compile` method. It receives three arguments:
-
-- An optimizer. This could be the string identifier of an existing optimizer (such as `rmsprop` or `adagrad`), or an instance of the `Optimizer` class. See: [optimizers](/optimizers).
-- A loss function. This is the objective that the model will try to minimize. It can be the string identifier of an existing loss function (such as `categorical_crossentropy` or `mse`), or it can be an objective function. See: [losses](/losses).
-- A list of metrics. For any classification problem you will want to set this to `metrics=['accuracy']`. A metric could be the string identifier of an existing metric or a custom metric function. See: [metrics](/metrics).
-
-```python
-# For a multi-class classification problem
-model.compile(optimizer='rmsprop',
-              loss='categorical_crossentropy',
-              metrics=['accuracy'])
-
-# For a binary classification problem
-model.compile(optimizer='rmsprop',
-              loss='binary_crossentropy',
-              metrics=['accuracy'])
-
-# For a mean squared error regression problem
-model.compile(optimizer='rmsprop',
-              loss='mse')
-
-# For custom metrics
-import keras.backend as K
-
-def mean_pred(y_true, y_pred):
-    return K.mean(y_pred)
-
-model.compile(optimizer='rmsprop',
-              loss='binary_crossentropy',
-              metrics=['accuracy', mean_pred])
-```
-
-----
-
-## Training
-
-Keras models are trained on Numpy arrays of input data and labels. For training a model, you will typically use the `fit` function. [Read its documentation here](/models/sequential).
-
-```python
-# For a single-input model with 2 classes (binary classification):
-
-model = Sequential()
-model.add(Dense(32, activation='relu', input_dim=100))
-model.add(Dense(1, activation='sigmoid'))
-model.compile(optimizer='rmsprop',
-              loss='binary_crossentropy',
-              metrics=['accuracy'])
-
-# Generate dummy data
-import numpy as np
-data = np.random.random((1000, 100))
-labels = np.random.randint(2, size=(1000, 1))
-
-# Train the model, iterating on the data in batches of 32 samples
-model.fit(data, labels, epochs=10, batch_size=32)
-```
-
-```python
-# For a single-input model with 10 classes (categorical classification):
-
-model = Sequential()
-model.add(Dense(32, activation='relu', input_dim=100))
-model.add(Dense(10, activation='softmax'))
-model.compile(optimizer='rmsprop',
-              loss='categorical_crossentropy',
-              metrics=['accuracy'])
-
-# Generate dummy data
-import numpy as np
-data = np.random.random((1000, 100))
-labels = np.random.randint(10, size=(1000, 1))
-
-# Convert labels to categorical one-hot encoding
-one_hot_labels = keras.utils.to_categorical(labels, num_classes=10)
-
-# Train the model, iterating on the data in batches of 32 samples
-model.fit(data, one_hot_labels, epochs=10, batch_size=32)
-```
-
-----
-
-
-## Examples
-
-Here are a few examples to get you started!
-
-In the [examples folder](https://github.com/keras-team/keras/tree/master/examples), you will also find example models for real datasets:
-
-- CIFAR10 small images classification: Convolutional Neural Network (CNN) with realtime data augmentation
-- IMDB movie review sentiment classification: LSTM over sequences of words
-- Reuters newswires topic classification: Multilayer Perceptron (MLP)
-- MNIST handwritten digits classification: MLP & CNN
-- Character-level text generation with LSTM
-
-...and more.
-
-
-### Multilayer Perceptron (MLP) for multi-class softmax classification:
-
-```python
-import keras
-from keras.models import Sequential
-from keras.layers import Dense, Dropout, Activation
-from keras.optimizers import SGD
-
-# Generate dummy data
-import numpy as np
-x_train = np.random.random((1000, 20))
-y_train = keras.utils.to_categorical(np.random.randint(10, size=(1000, 1)), num_classes=10)
-x_test = np.random.random((100, 20))
-y_test = keras.utils.to_categorical(np.random.randint(10, size=(100, 1)), num_classes=10)
-
-model = Sequential()
-# Dense(64) is a fully-connected layer with 64 hidden units.
-# in the first layer, you must specify the expected input data shape:
-# here, 20-dimensional vectors.
-model.add(Dense(64, activation='relu', input_dim=20))
-model.add(Dropout(0.5))
-model.add(Dense(64, activation='relu'))
-model.add(Dropout(0.5))
-model.add(Dense(10, activation='softmax'))
-
-sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
-model.compile(loss='categorical_crossentropy',
-              optimizer=sgd,
-              metrics=['accuracy'])
-
-model.fit(x_train, y_train,
-          epochs=20,
-          batch_size=128)
-score = model.evaluate(x_test, y_test, batch_size=128)
-```
-
-
-### MLP for binary classification:
-
-```python
-import numpy as np
-from keras.models import Sequential
-from keras.layers import Dense, Dropout
-
-# Generate dummy data
-x_train = np.random.random((1000, 20))
-y_train = np.random.randint(2, size=(1000, 1))
-x_test = np.random.random((100, 20))
-y_test = np.random.randint(2, size=(100, 1))
-
-model = Sequential()
-model.add(Dense(64, input_dim=20, activation='relu'))
-model.add(Dropout(0.5))
-model.add(Dense(64, activation='relu'))
-model.add(Dropout(0.5))
-model.add(Dense(1, activation='sigmoid'))
-
-model.compile(loss='binary_crossentropy',
-              optimizer='rmsprop',
-              metrics=['accuracy'])
-
-model.fit(x_train, y_train,
-          epochs=20,
-          batch_size=128)
-score = model.evaluate(x_test, y_test, batch_size=128)
-```
-
-
-### VGG-like convnet:
-
-```python
-import numpy as np
-import keras
-from keras.models import Sequential
-from keras.layers import Dense, Dropout, Flatten
-from keras.layers import Conv2D, MaxPooling2D
-from keras.optimizers import SGD
-
-# Generate dummy data
-x_train = np.random.random((100, 100, 100, 3))
-y_train = keras.utils.to_categorical(np.random.randint(10, size=(100, 1)), num_classes=10)
-x_test = np.random.random((20, 100, 100, 3))
-y_test = keras.utils.to_categorical(np.random.randint(10, size=(20, 1)), num_classes=10)
-
-model = Sequential()
-# input: 100x100 images with 3 channels -> (100, 100, 3) tensors.
-# this applies 32 convolution filters of size 3x3 each.
-model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(100, 100, 3)))
-model.add(Conv2D(32, (3, 3), activation='relu'))
-model.add(MaxPooling2D(pool_size=(2, 2)))
-model.add(Dropout(0.25))
-
-model.add(Conv2D(64, (3, 3), activation='relu'))
-model.add(Conv2D(64, (3, 3), activation='relu'))
-model.add(MaxPooling2D(pool_size=(2, 2)))
-model.add(Dropout(0.25))
-
-model.add(Flatten())
-model.add(Dense(256, activation='relu'))
-model.add(Dropout(0.5))
-model.add(Dense(10, activation='softmax'))
-
-sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
-model.compile(loss='categorical_crossentropy', optimizer=sgd)
-
-model.fit(x_train, y_train, batch_size=32, epochs=10)
-score = model.evaluate(x_test, y_test, batch_size=32)
-```
-
-
-### Sequence classification with LSTM:
-
-```python
-from keras.models import Sequential
-from keras.layers import Dense, Dropout
-from keras.layers import Embedding
-from keras.layers import LSTM
-
-max_features = 1024
-
-model = Sequential()
-model.add(Embedding(max_features, output_dim=256))
-model.add(LSTM(128))
-model.add(Dropout(0.5))
-model.add(Dense(1, activation='sigmoid'))
-
-model.compile(loss='binary_crossentropy',
-              optimizer='rmsprop',
-              metrics=['accuracy'])
-
-model.fit(x_train, y_train, batch_size=16, epochs=10)
-score = model.evaluate(x_test, y_test, batch_size=16)
-```
-
-### Sequence classification with 1D convolutions:
-
-```python
-from keras.models import Sequential
-from keras.layers import Dense, Dropout
-from keras.layers import Embedding
-from keras.layers import Conv1D, GlobalAveragePooling1D, MaxPooling1D
-
-seq_length = 64
-
-model = Sequential()
-model.add(Conv1D(64, 3, activation='relu', input_shape=(seq_length, 100)))
-model.add(Conv1D(64, 3, activation='relu'))
-model.add(MaxPooling1D(3))
-model.add(Conv1D(128, 3, activation='relu'))
-model.add(Conv1D(128, 3, activation='relu'))
-model.add(GlobalAveragePooling1D())
-model.add(Dropout(0.5))
-model.add(Dense(1, activation='sigmoid'))
-
-model.compile(loss='binary_crossentropy',
-              optimizer='rmsprop',
-              metrics=['accuracy'])
-
-model.fit(x_train, y_train, batch_size=16, epochs=10)
-score = model.evaluate(x_test, y_test, batch_size=16)
-```
-
-### Stacked LSTM for sequence classification
-
-In this model, we stack 3 LSTM layers on top of each other,
-making the model capable of learning higher-level temporal representations.
-
-The first two LSTMs return their full output sequences, but the last one only returns
-the last step in its output sequence, thus dropping the temporal dimension
-(i.e. converting the input sequence into a single vector).
-
-<img src="https://keras.io/img/regular_stacked_lstm.png" alt="stacked LSTM" style="width: 300px;"/>
-
-```python
-from keras.models import Sequential
-from keras.layers import LSTM, Dense
-import numpy as np
-
-data_dim = 16
-timesteps = 8
-num_classes = 10
-
-# expected input data shape: (batch_size, timesteps, data_dim)
-model = Sequential()
-model.add(LSTM(32, return_sequences=True,
-               input_shape=(timesteps, data_dim)))  # returns a sequence of vectors of dimension 32
-model.add(LSTM(32, return_sequences=True))  # returns a sequence of vectors of dimension 32
-model.add(LSTM(32))  # return a single vector of dimension 32
-model.add(Dense(10, activation='softmax'))
-
-model.compile(loss='categorical_crossentropy',
-              optimizer='rmsprop',
-              metrics=['accuracy'])
-
-# Generate dummy training data
-x_train = np.random.random((1000, timesteps, data_dim))
-y_train = np.random.random((1000, num_classes))
-
-# Generate dummy validation data
-x_val = np.random.random((100, timesteps, data_dim))
-y_val = np.random.random((100, num_classes))
-
-model.fit(x_train, y_train,
-          batch_size=64, epochs=5,
-          validation_data=(x_val, y_val))
-```
-
-
-### Same stacked LSTM model, rendered "stateful"
-
-A stateful recurrent model is one for which the internal states (memories) obtained after processing a batch
-of samples are reused as initial states for the samples of the next batch. This allows to process longer sequences
-while keeping computational complexity manageable.
-
-[You can read more about stateful RNNs in the FAQ.](/getting-started/faq/#how-can-i-use-stateful-rnns)
-
-```python
-from keras.models import Sequential
-from keras.layers import LSTM, Dense
-import numpy as np
-
-data_dim = 16
-timesteps = 8
-num_classes = 10
-batch_size = 32
-
-# Expected input batch shape: (batch_size, timesteps, data_dim)
-# Note that we have to provide the full batch_input_shape since the network is stateful.
-# the sample of index i in batch k is the follow-up for the sample i in batch k-1.
-model = Sequential()
-model.add(LSTM(32, return_sequences=True, stateful=True,
-               batch_input_shape=(batch_size, timesteps, data_dim)))
-model.add(LSTM(32, return_sequences=True, stateful=True))
-model.add(LSTM(32, stateful=True))
-model.add(Dense(10, activation='softmax'))
-
-model.compile(loss='categorical_crossentropy',
-              optimizer='rmsprop',
-              metrics=['accuracy'])
-
-# Generate dummy training data
-x_train = np.random.random((batch_size * 10, timesteps, data_dim))
-y_train = np.random.random((batch_size * 10, num_classes))
-
-# Generate dummy validation data
-x_val = np.random.random((batch_size * 3, timesteps, data_dim))
-y_val = np.random.random((batch_size * 3, num_classes))
-
-model.fit(x_train, y_train,
-          batch_size=batch_size, epochs=5, shuffle=False,
-          validation_data=(x_val, y_val))
-```
diff --git a/docs/templates/index.md b/docs/templates/index.md
deleted file mode 100644
index 2969073ec56..00000000000
--- a/docs/templates/index.md
+++ /dev/null
@@ -1,5 +0,0 @@
-# Keras: The Python Deep Learning library
-
-<img src='https://s3.amazonaws.com/keras.io/img/keras-logo-2018-large-1200.png', style='max-width: 600px; width: 90%;'>
-
-{{autogenerated}}
\ No newline at end of file
diff --git a/docs/templates/initializers.md b/docs/templates/initializers.md
deleted file mode 100644
index c4e1a637b0a..00000000000
--- a/docs/templates/initializers.md
+++ /dev/null
@@ -1,43 +0,0 @@
-## Usage of initializers
-
-Initializations define the way to set the initial random weights of Keras layers.
-
-The keyword arguments used for passing initializers to layers will depend on the layer. Usually it is simply `kernel_initializer` and `bias_initializer`:
-
-```python
-model.add(Dense(64,
-                kernel_initializer='random_uniform',
-                bias_initializer='zeros'))
-```
-
-## Available initializers
-
-The following built-in initializers are available as part of the `keras.initializers` module:
-
-{{autogenerated}}
-
-
-An initializer may be passed as a string (must match one of the available initializers above), or as a callable:
-
-```python
-from keras import initializers
-
-model.add(Dense(64, kernel_initializer=initializers.random_normal(stddev=0.01)))
-
-# also works; will use the default parameters.
-model.add(Dense(64, kernel_initializer='random_normal'))
-```
-
-
-## Using custom initializers
-
-If passing a custom callable, then it must take the argument `shape` (shape of the variable to initialize) and `dtype` (dtype of generated values):
-
-```python
-from keras import backend as K
-
-def my_init(shape, dtype=None):
-    return K.random_normal(shape, dtype=dtype)
-
-model.add(Dense(64, kernel_initializer=my_init))
-```
diff --git a/docs/templates/layers/about-keras-layers.md b/docs/templates/layers/about-keras-layers.md
deleted file mode 100644
index a12f56062cb..00000000000
--- a/docs/templates/layers/about-keras-layers.md
+++ /dev/null
@@ -1,37 +0,0 @@
-# About Keras layers
-
-All Keras layers have a number of methods in common:
-
-- `layer.get_weights()`: returns the weights of the layer as a list of Numpy arrays.
-- `layer.set_weights(weights)`: sets the weights of the layer from a list of Numpy arrays (with the same shapes as the output of `get_weights`).
-- `layer.get_config()`: returns a dictionary containing the configuration of the layer. The layer can be reinstantiated from its config via:
-
-```python
-layer = Dense(32)
-config = layer.get_config()
-reconstructed_layer = Dense.from_config(config)
-```
-
-Or:
-
-```python
-from keras import layers
-
-config = layer.get_config()
-layer = layers.deserialize({'class_name': layer.__class__.__name__,
-                            'config': config})
-```
-
-If a layer has a single node (i.e. if it isn't a shared layer), you can get its input tensor, output tensor, input shape and output shape via:
-
-- `layer.input`
-- `layer.output`
-- `layer.input_shape`
-- `layer.output_shape`
-
-If the layer has multiple nodes (see: [the concept of layer node and shared layers](/getting-started/functional-api-guide/#the-concept-of-layer-node)), you can use the following methods:
-
-- `layer.get_input_at(node_index)`
-- `layer.get_output_at(node_index)`
-- `layer.get_input_shape_at(node_index)`
-- `layer.get_output_shape_at(node_index)`
\ No newline at end of file
diff --git a/docs/templates/layers/writing-your-own-keras-layers.md b/docs/templates/layers/writing-your-own-keras-layers.md
deleted file mode 100644
index eb163306df0..00000000000
--- a/docs/templates/layers/writing-your-own-keras-layers.md
+++ /dev/null
@@ -1,68 +0,0 @@
-# Writing your own Keras layers
-
-For simple, stateless custom operations, you are probably better off using `layers.core.Lambda` layers. But for any custom operation that has trainable weights, you should implement your own layer.
-
-Here is the skeleton of a Keras layer, **as of Keras 2.0** (if you have an older version, please upgrade). There are only three methods you need to implement:
-
-- `build(input_shape)`: this is where you will define your weights. This method must set `self.built = True` at the end, which can be done by calling `super([Layer], self).build()`.
-- `call(x)`: this is where the layer's logic lives. Unless you want your layer to support masking, you only have to care about the first argument passed to `call`: the input tensor.
-- `compute_output_shape(input_shape)`: in case your layer modifies the shape of its input, you should specify here the shape transformation logic. This allows Keras to do automatic shape inference.
-
-```python
-from keras import backend as K
-from keras.layers import Layer
-
-class MyLayer(Layer):
-
-    def __init__(self, output_dim, **kwargs):
-        self.output_dim = output_dim
-        super(MyLayer, self).__init__(**kwargs)
-
-    def build(self, input_shape):
-        # Create a trainable weight variable for this layer.
-        self.kernel = self.add_weight(name='kernel', 
-                                      shape=(input_shape[1], self.output_dim),
-                                      initializer='uniform',
-                                      trainable=True)
-        super(MyLayer, self).build(input_shape)  # Be sure to call this at the end
-
-    def call(self, x):
-        return K.dot(x, self.kernel)
-
-    def compute_output_shape(self, input_shape):
-        return (input_shape[0], self.output_dim)
-```
-
-It is also possible to define Keras layers which have multiple input tensors and multiple output tensors. To do this, you should assume that the inputs and outputs of the methods `build(input_shape)`, `call(x)` and `compute_output_shape(input_shape)` are lists. Here is an example, similar to the one above:
-
-```python
-from keras import backend as K
-from keras.layers import Layer
-
-class MyLayer(Layer):
-
-    def __init__(self, output_dim, **kwargs):
-        self.output_dim = output_dim
-        super(MyLayer, self).__init__(**kwargs)
-
-    def build(self, input_shape):
-        assert isinstance(input_shape, list)
-        # Create a trainable weight variable for this layer.
-        self.kernel = self.add_weight(name='kernel',
-                                      shape=(input_shape[0][1], self.output_dim),
-                                      initializer='uniform',
-                                      trainable=True)
-        super(MyLayer, self).build(input_shape)  # Be sure to call this at the end
-
-    def call(self, x):
-        assert isinstance(x, list)
-        a, b = x
-        return [K.dot(a, self.kernel) + b, K.mean(b, axis=-1)]
-
-    def compute_output_shape(self, input_shape):
-        assert isinstance(input_shape, list)
-        shape_a, shape_b = input_shape
-        return [(shape_a[0], self.output_dim), shape_b[:-1]]
-```
-
-The existing Keras layers provide examples of how to implement almost anything. Never hesitate to read the source code!
diff --git a/docs/templates/losses.md b/docs/templates/losses.md
deleted file mode 100644
index ab5bfbea07f..00000000000
--- a/docs/templates/losses.md
+++ /dev/null
@@ -1,42 +0,0 @@
-
-## Usage of loss functions
-
-A loss function (or objective function, or optimization score function) is one of the two parameters required to compile a model:
-
-```python
-model.compile(loss='mean_squared_error', optimizer='sgd')
-```
-
-```python
-from keras import losses
-
-model.compile(loss=losses.mean_squared_error, optimizer='sgd')
-```
-
-You can either pass the name of an existing loss function, or pass a TensorFlow/Theano symbolic function that returns a scalar for each data-point and takes the following two arguments:
-
-- __y_true__: True labels. TensorFlow/Theano tensor.
-- __y_pred__: Predictions. TensorFlow/Theano tensor of the same shape as y_true.
-
-The actual optimized objective is the mean of the output array across all datapoints.
-
-For a few examples of such functions, check out the [losses source](https://github.com/keras-team/keras/blob/master/keras/losses.py).
-
-## Available loss functions
-
-{{autogenerated}}
-
-----
-
-**Note**: when using the `categorical_crossentropy` loss, your targets should be in categorical format (e.g. if you have 10 classes, the target for each sample should be a 10-dimensional vector that is all-zeros except for a 1 at the index corresponding to the class of the sample). In order to convert *integer targets* into *categorical targets*, you can use the Keras utility `to_categorical`:
-
-```python
-from keras.utils import to_categorical
-
-categorical_labels = to_categorical(int_labels, num_classes=None)
-```
-
-When using the `sparse_categorical_crossentropy` loss, your targets should be *integer targets*.
-If you have categorical targets, you should use `categorical_crossentropy`.
-
-`categorical_crossentropy` is another term for [multi-class log loss](http://wiki.fast.ai/index.php/Log_Loss). 
diff --git a/docs/templates/metrics.md b/docs/templates/metrics.md
deleted file mode 100644
index 3bca29ad673..00000000000
--- a/docs/templates/metrics.md
+++ /dev/null
@@ -1,58 +0,0 @@
-
-## Usage of metrics
-
-A metric is a function that is used to judge the performance of your model. Metric functions are to be supplied in the `metrics` parameter when a model is compiled. 
-
-```python
-model.compile(loss='mean_squared_error',
-              optimizer='sgd',
-              metrics=['mae', 'acc'])
-```
-
-```python
-from keras import metrics
-
-model.compile(loss='mean_squared_error',
-              optimizer='sgd',
-              metrics=[metrics.mae, metrics.categorical_accuracy])
-```
-
-A metric function is similar to a [loss function](/losses), except that the results from evaluating a metric are not used when training the model. You may use any of the loss functions as a metric function.
-
-You can either pass the name of an existing metric, or pass a Theano/TensorFlow symbolic function (see [Custom metrics](#custom-metrics)).
-
-#### Arguments
-  - __y_true__: True labels. Theano/TensorFlow tensor.
-  - __y_pred__: Predictions. Theano/TensorFlow tensor of the same shape as y_true.
-
-#### Returns
-  Single tensor value representing the mean of the output array across all
-  datapoints.
-
-----
-
-## Available metrics
-
-
-{{autogenerated}}
-
-In addition to the metrics above, you may use any of the loss functions described in the [loss function](/losses) page as metrics.
-
-----
-
-## Custom metrics
-
-Custom metrics can be passed at the compilation step. The
-function would need to take `(y_true, y_pred)` as arguments and return
-a single tensor value.
-
-```python
-import keras.backend as K
-
-def mean_pred(y_true, y_pred):
-    return K.mean(y_pred)
-
-model.compile(optimizer='rmsprop',
-              loss='binary_crossentropy',
-              metrics=['accuracy', mean_pred])
-```
diff --git a/docs/templates/models/about-keras-models.md b/docs/templates/models/about-keras-models.md
deleted file mode 100644
index 69bbdb84dcb..00000000000
--- a/docs/templates/models/about-keras-models.md
+++ /dev/null
@@ -1,94 +0,0 @@
-# About Keras models
-
-There are two main types of models available in Keras: [the Sequential model](/models/sequential), and [the Model class used with the functional API](/models/model).
-
-These models have a number of methods and attributes in common:
-
-- `model.layers` is a flattened list of the layers comprising the model.
-- `model.inputs` is the list of input tensors of the model.
-- `model.outputs` is the list of output tensors of the model.
-- `model.summary()` prints a summary representation of your model. For layers with multiple outputs, `multiple` is displayed instead of each individual output shape due to size limitations. Shortcut for [utils.print_summary](/utils/#print_summary)
-- `model.get_config()` returns a dictionary containing the configuration of the model. The model can be reinstantiated from its config via:
-
-```python
-config = model.get_config()
-model = Model.from_config(config)
-# or, for Sequential:
-model = Sequential.from_config(config)
-```
-
-- `model.get_weights()` returns a list of all weight tensors in the model, as Numpy arrays.
-- `model.set_weights(weights)` sets the values of the weights of the model, from a list of Numpy arrays. The arrays in the list should have the same shape as those returned by `get_weights()`.
-- `model.to_json()` returns a representation of the model as a JSON string. Note that the representation does not include the weights, only the architecture. You can reinstantiate the same model (with reinitialized weights) from the JSON string via:
-
-```python
-from keras.models import model_from_json
-
-json_string = model.to_json()
-model = model_from_json(json_string)
-```
-- `model.to_yaml()` returns a representation of the model as a YAML string. Note that the representation does not include the weights, only the architecture. You can reinstantiate the same model (with reinitialized weights) from the YAML string via:
-
-```python
-from keras.models import model_from_yaml
-
-yaml_string = model.to_yaml()
-model = model_from_yaml(yaml_string)
-```
-
-- `model.save_weights(filepath)` saves the weights of the model as a HDF5 file.
-- `model.load_weights(filepath, by_name=False)` loads the weights of the model from a HDF5 file (created by `save_weights`). By default, the architecture is expected to be unchanged. To load weights into a different architecture (with some layers in common), use `by_name=True` to load only those layers with the same name.
-
-Note: Please also see [How can I install HDF5 or h5py to save my models in Keras?](/getting-started/faq/#how-can-i-install-HDF5-or-h5py-to-save-my-models-in-Keras) in the FAQ for instructions on how to install `h5py`.
-
-
-## Model subclassing
-
-In addition to these two types of models, you may create your own fully-customizable models by subclassing the `Model` class
-and implementing your own forward pass in the `call` method (the `Model` subclassing API was introduced in Keras 2.2.0).
-
-Here's an example of a simple multi-layer perceptron model written as a `Model` subclass:
-
-```python
-import keras
-
-class SimpleMLP(keras.Model):
-
-    def __init__(self, use_bn=False, use_dp=False, num_classes=10):
-        super(SimpleMLP, self).__init__(name='mlp')
-        self.use_bn = use_bn
-        self.use_dp = use_dp
-        self.num_classes = num_classes
-
-        self.dense1 = keras.layers.Dense(32, activation='relu')
-        self.dense2 = keras.layers.Dense(num_classes, activation='softmax')
-        if self.use_dp:
-            self.dp = keras.layers.Dropout(0.5)
-        if self.use_bn:
-            self.bn = keras.layers.BatchNormalization(axis=-1)
-
-    def call(self, inputs):
-        x = self.dense1(inputs)
-        if self.use_dp:
-            x = self.dp(x)
-        if self.use_bn:
-            x = self.bn(x)
-        return self.dense2(x)
-
-model = SimpleMLP()
-model.compile(...)
-model.fit(...)
-```
-
-Layers are defined in `__init__(self, ...)`, and the forward pass is specified in `call(self, inputs)`. In `call`, you may specify custom losses by calling `self.add_loss(loss_tensor)` (like you would in a custom layer).
-
-In subclassed models, the model's topology is defined as Python code (rather than as a static graph of layers).
-That means the model's topology cannot be inspected or serialized. As a result, the following methods and attributes are **not available for subclassed models**:
-
-- `model.inputs` and `model.outputs`.
-- `model.to_yaml()` and `model.to_json()`
-- `model.get_config()` and `model.save()`.
-
-**Key point:** use the right API for the job. The `Model` subclassing API can provide you with greater flexbility for implementing complex models,
-but it comes at a cost (in addition to these missing features):
-it is more verbose, more complex, and has more opportunities for user errors. If possible, prefer using the functional API, which is more user-friendly.
diff --git a/docs/templates/models/model.md b/docs/templates/models/model.md
deleted file mode 100644
index 572ed71acb7..00000000000
--- a/docs/templates/models/model.md
+++ /dev/null
@@ -1,27 +0,0 @@
-# Model class API
-
-In the functional API, given some input tensor(s) and output tensor(s), you can instantiate a `Model` via:
-
-```python
-from keras.models import Model
-from keras.layers import Input, Dense
-
-a = Input(shape=(32,))
-b = Dense(32)(a)
-model = Model(inputs=a, outputs=b)
-```
-
-This model will include all layers required in the computation of `b` given `a`.
-
-In the case of multi-input or multi-output models, you can use lists as well:
-
-```python
-model = Model(inputs=[a1, a2], outputs=[b1, b2, b3])
-```
-
-For a detailed introduction of what `Model` can do, read [this guide to the Keras functional API](/getting-started/functional-api-guide).
-
-
-## Methods
-
-{{autogenerated}}
diff --git a/docs/templates/models/sequential.md b/docs/templates/models/sequential.md
deleted file mode 100644
index d085f9d5244..00000000000
--- a/docs/templates/models/sequential.md
+++ /dev/null
@@ -1,9 +0,0 @@
-# The Sequential model API
-
-To get started, read [this guide to the Keras Sequential model](/getting-started/sequential-model-guide).
-
-----
-
-## Sequential model methods
-
-{{autogenerated}}
\ No newline at end of file
diff --git a/docs/templates/optimizers.md b/docs/templates/optimizers.md
deleted file mode 100644
index b19e49855f1..00000000000
--- a/docs/templates/optimizers.md
+++ /dev/null
@@ -1,49 +0,0 @@
-
-## Usage of optimizers
-
-An optimizer is one of the two arguments required for compiling a Keras model:
-
-```python
-from keras import optimizers
-
-model = Sequential()
-model.add(Dense(64, kernel_initializer='uniform', input_shape=(10,)))
-model.add(Activation('softmax'))
-
-sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
-model.compile(loss='mean_squared_error', optimizer=sgd)
-```
-
-You can either instantiate an optimizer before passing it to `model.compile()` , as in the above example, or you can call it by its name. In the latter case, the default parameters for the optimizer will be used.
-
-```python
-# pass optimizer by name: default parameters will be used
-model.compile(loss='mean_squared_error', optimizer='sgd')
-```
-
----
-
-## Parameters common to all Keras optimizers
-
-The parameters `clipnorm` and `clipvalue` can be used with all optimizers to control gradient clipping:
-
-```python
-from keras import optimizers
-
-# All parameter gradients will be clipped to
-# a maximum norm of 1.
-sgd = optimizers.SGD(lr=0.01, clipnorm=1.)
-```
-
-```python
-from keras import optimizers
-
-# All parameter gradients will be clipped to
-# a maximum value of 0.5 and
-# a minimum value of -0.5.
-sgd = optimizers.SGD(lr=0.01, clipvalue=0.5)
-```
-
----
-
-{{autogenerated}}
diff --git a/docs/templates/preprocessing/image.md b/docs/templates/preprocessing/image.md
deleted file mode 100644
index 5a6f6b00c99..00000000000
--- a/docs/templates/preprocessing/image.md
+++ /dev/null
@@ -1,4 +0,0 @@
-
-# Image Preprocessing
-
-{{autogenerated}}
diff --git a/docs/templates/preprocessing/text.md b/docs/templates/preprocessing/text.md
deleted file mode 100644
index 9daf0bfd602..00000000000
--- a/docs/templates/preprocessing/text.md
+++ /dev/null
@@ -1,4 +0,0 @@
-
-### Text Preprocessing
-
-{{autogenerated}}
diff --git a/docs/templates/regularizers.md b/docs/templates/regularizers.md
deleted file mode 100644
index 3cbf774f5d9..00000000000
--- a/docs/templates/regularizers.md
+++ /dev/null
@@ -1,46 +0,0 @@
-## Usage of regularizers
-
-Regularizers allow to apply penalties on layer parameters or layer activity during optimization. These penalties are incorporated in the loss function that the network optimizes.
-
-The penalties are applied on a per-layer basis. The exact API will depend on the layer, but the layers `Dense`, `Conv1D`, `Conv2D` and `Conv3D` have a unified API.
-
-These layers expose 3 keyword arguments:
-
-- `kernel_regularizer`: instance of `keras.regularizers.Regularizer`
-- `bias_regularizer`: instance of `keras.regularizers.Regularizer`
-- `activity_regularizer`: instance of `keras.regularizers.Regularizer`
-
-
-## Example
-
-```python
-from keras import regularizers
-model.add(Dense(64, input_dim=64,
-                kernel_regularizer=regularizers.l2(0.01),
-                activity_regularizer=regularizers.l1(0.01)))
-```
-
-## Available penalties
-
-```python
-keras.regularizers.l1(0.)
-keras.regularizers.l2(0.)
-keras.regularizers.l1_l2(l1=0.01, l2=0.01)
-```
-
-## Developing new regularizers
-
-Any function that takes in a weight matrix and returns a loss contribution tensor can be used as a regularizer, e.g.:
-
-```python
-from keras import backend as K
-
-def l1_reg(weight_matrix):
-    return 0.01 * K.sum(K.abs(weight_matrix))
-
-model.add(Dense(64, input_dim=64,
-                kernel_regularizer=l1_reg))
-```
-
-Alternatively, you can write your regularizers in an object-oriented way;
-see the [keras/regularizers.py](https://github.com/keras-team/keras/blob/master/keras/regularizers.py) module for examples.
diff --git a/docs/templates/scikit-learn-api.md b/docs/templates/scikit-learn-api.md
deleted file mode 100644
index a909046ccd3..00000000000
--- a/docs/templates/scikit-learn-api.md
+++ /dev/null
@@ -1,45 +0,0 @@
-# Wrappers for the Scikit-Learn API
-
-You can use `Sequential` Keras models (single-input only) as part of your Scikit-Learn workflow via the wrappers found at `keras.wrappers.scikit_learn.py`.
-
-There are two wrappers available:
-
-`keras.wrappers.scikit_learn.KerasClassifier(build_fn=None, **sk_params)`, which implements the Scikit-Learn classifier interface,
-
-`keras.wrappers.scikit_learn.KerasRegressor(build_fn=None, **sk_params)`, which implements the Scikit-Learn regressor interface.
-
-### Arguments
-
-- __build_fn__: callable function or class instance
-- __sk_params__: model parameters & fitting parameters
-
-`build_fn` should construct, compile and return a Keras model, which
-will then be used to fit/predict. One of the following
-three values could be passed to `build_fn`:
-
-1. A function
-2. An instance of a class that implements the `__call__` method
-3. None. This means you implement a class that inherits from either
-`KerasClassifier` or `KerasRegressor`. The `__call__` method of the
-present class will then be treated as the default `build_fn`.
-
-`sk_params` takes both model parameters and fitting parameters. Legal model
-parameters are the arguments of `build_fn`. Note that like all other
-estimators in scikit-learn, `build_fn` should provide default values for
-its arguments, so that you could create the estimator without passing any
-values to `sk_params`.
-
-`sk_params` could also accept parameters for calling `fit`, `predict`,
-`predict_proba`, and `score` methods (e.g., `epochs`, `batch_size`).
-fitting (predicting) parameters are selected in the following order:
-
-1. Values passed to the dictionary arguments of
-`fit`, `predict`, `predict_proba`, and `score` methods
-2. Values passed to `sk_params`
-3. The default values of the `keras.models.Sequential`
-`fit`, `predict`, `predict_proba` and `score` methods
-
-When using scikit-learn's `grid_search` API, legal tunable parameters are
-those you could pass to `sk_params`, including fitting parameters.
-In other words, you could use `grid_search` to search for the best
-`batch_size` or `epochs` as well as the model parameters.
diff --git a/docs/templates/visualization.md b/docs/templates/visualization.md
deleted file mode 100644
index cd296c60176..00000000000
--- a/docs/templates/visualization.md
+++ /dev/null
@@ -1,54 +0,0 @@
-
-## Model visualization
-
-Keras provides utility functions to plot a Keras model (using `graphviz`).
-
-This will plot a graph of the model and save it to a file:
-```python
-from keras.utils import plot_model
-plot_model(model, to_file='model.png')
-```
-
-`plot_model` takes four optional arguments:
-
-- `show_shapes` (defaults to False) controls whether output shapes are shown in the graph.
-- `show_layer_names` (defaults to True) controls whether layer names are shown in the graph.
-- `expand_nested` (defaults to False) controls whether to expand nested models into clusters in the graph.
-- `dpi` (defaults to 96) controls image dpi.
-
-You can also directly obtain the `pydot.Graph` object and render it yourself,
-for example to show it in an ipython notebook :
-```python
-from IPython.display import SVG
-from keras.utils import model_to_dot
-
-SVG(model_to_dot(model).create(prog='dot', format='svg'))
-```
-
-## Training history visualization
-
-The `fit()` method on a Keras `Model` returns a `History` object. The `History.history` attribute is a dictionary recording training loss values and metrics values at successive epochs, as well as validation loss values and validation metrics values (if applicable). Here is a simple example using `matplotlib` to generate loss & accuracy plots for training & validation:
-
-```python
-import matplotlib.pyplot as plt
-
-history = model.fit(x, y, validation_split=0.25, epochs=50, batch_size=16, verbose=1)
-
-# Plot training & validation accuracy values
-plt.plot(history.history['acc'])
-plt.plot(history.history['val_acc'])
-plt.title('Model accuracy')
-plt.ylabel('Accuracy')
-plt.xlabel('Epoch')
-plt.legend(['Train', 'Test'], loc='upper left')
-plt.show()
-
-# Plot training & validation loss values
-plt.plot(history.history['loss'])
-plt.plot(history.history['val_loss'])
-plt.title('Model loss')
-plt.ylabel('Loss')
-plt.xlabel('Epoch')
-plt.legend(['Train', 'Test'], loc='upper left')
-plt.show()
-```
diff --git a/docs/templates/why-use-keras.md b/docs/templates/why-use-keras.md
deleted file mode 100644
index 53d9a27d87a..00000000000
--- a/docs/templates/why-use-keras.md
+++ /dev/null
@@ -1,80 +0,0 @@
-# Why use Keras?
-
-There are countless deep learning frameworks available today. Why use Keras rather than any other? Here are some of the areas in which Keras compares favorably to existing alternatives.
-
----
-
-## Keras prioritizes developer experience
-    
-- Keras is an API designed for human beings, not machines. [Keras follows best practices for reducing cognitive load](https://blog.keras.io/user-experience-design-for-apis.html): it offers consistent & simple APIs, it minimizes the number of user actions required for common use cases, and it provides clear and actionable feedback upon user error.
-- This makes Keras easy to learn and easy to use. As a Keras user, you are more productive, allowing you to try more ideas than your competition, faster -- which in turn [helps you win machine learning competitions](https://www.quora.com/Why-has-Keras-been-so-successful-lately-at-Kaggle-competitions).
-- This ease of use does not come at the cost of reduced flexibility: because Keras integrates with lower-level deep learning languages (in particular TensorFlow), it enables you to implement anything you could have built in the base language. In particular, as `tf.keras`, the Keras API integrates seamlessly with your TensorFlow workflows.
-
----
-
-## Keras has broad adoption in the industry and the research community
-
-<a href='https://towardsdatascience.com/deep-learning-framework-power-scores-2018-23607ddf297a'>
-    <img style='width: 80%; margin-left: 10%;' src='https://s3.amazonaws.com/keras.io/img/dl_frameworks_power_scores.png'/>
-</a>
-<p style='font-style: italic; font-size: 10pt; text-align: center;'>
-    Deep learning frameworks ranking computed by Jeff Hale, based on 11 data sources across 7 categories
-</i>
-
-With over 250,000 individual users as of mid-2018, Keras has stronger adoption in both the industry and the research community than any other deep learning framework except TensorFlow itself (and the Keras API is the official frontend of TensorFlow, via the `tf.keras` module).
-
-You are already constantly interacting with features built with Keras -- it is in use at Netflix, Uber, Yelp, Instacart, Zocdoc, Square, and many others. It is especially popular among startups that place deep learning at the core of their products.
-
-Keras is also a favorite among deep learning researchers, coming in #2 in terms of mentions in scientific papers uploaded to the preprint server [arXiv.org](https://arxiv.org/archive/cs). Keras has also been adopted by researchers at large scientific organizations, in particular CERN and NASA.
-
----
-
-## Keras makes it easy to turn models into products
-
-Your Keras models can be easily deployed across a greater range of platforms than any other deep learning framework:
-
-- On iOS, via [Apple’s CoreML](https://developer.apple.com/documentation/coreml) (Keras support officially provided by Apple). Here's [a tutorial](https://www.pyimagesearch.com/2018/04/23/running-keras-models-on-ios-with-coreml/).
-- On Android, via the TensorFlow Android runtime. Example: [Not Hotdog app](https://medium.com/@timanglade/how-hbos-silicon-valley-built-not-hotdog-with-mobile-tensorflow-keras-react-native-ef03260747f3).
-- In the browser, via GPU-accelerated JavaScript runtimes such as [Keras.js](https://transcranial.github.io/keras-js/#/) and [WebDNN](https://mil-tokyo.github.io/webdnn/).
-- On Google Cloud, via [TensorFlow-Serving](https://www.tensorflow.org/serving/).
-- [In a Python webapp backend (such as a Flask app)](https://blog.keras.io/building-a-simple-keras-deep-learning-rest-api.html).
-- On the JVM, via [DL4J model import provided by SkyMind](https://deeplearning4j.org/model-import-keras).
-- On Raspberry Pi.
-
----
-
-## Keras supports multiple backend engines and does not lock you into one ecosystem
-
-Your Keras models can be developed with a range of different [deep learning backends](https://keras.io/backend/). Importantly, any Keras model that only leverages built-in layers will be portable across all these backends: you can train a model with one backend, and load it with another (e.g. for deployment). Available backends include:
-
-- The TensorFlow backend (from Google)
-- The CNTK backend (from Microsoft)
-- The Theano backend
-
-Amazon also has [a fork of Keras which uses MXNet as backend](https://github.com/awslabs/keras-apache-mxnet).
-
-As such, your Keras model can be trained on a number of different hardware platforms beyond CPUs:
-
-- [NVIDIA GPUs](https://developer.nvidia.com/deep-learning)
-- [Google TPUs](https://cloud.google.com/tpu/), via the TensorFlow backend and Google Cloud
-- OpenCL-enabled GPUs, such as those from AMD, via [the PlaidML Keras backend](https://github.com/plaidml/plaidml)
-
----
-
-## Keras has strong multi-GPU support and distributed training support
-
-- Keras has [built-in support for multi-GPU data parallelism](/utils/#multi_gpu_model)
-- [Horovod](https://github.com/uber/horovod), from Uber, has first-class support for Keras models
-- Keras models [can be turned into TensorFlow Estimators](https://www.tensorflow.org/versions/master/api_docs/python/tf/keras/estimator/model_to_estimator) and trained on [clusters of GPUs on Google Cloud](https://cloud.google.com/solutions/running-distributed-tensorflow-on-compute-engine)
-- Keras can be run on Spark via [Dist-Keras](https://github.com/cerndb/dist-keras) (from CERN) and [Elephas](https://github.com/maxpumperla/elephas)
-
----
-
-## Keras development is backed by key companies in the deep learning ecosystem
-
-Keras development is backed primarily by Google, and the Keras API comes packaged in TensorFlow as `tf.keras`. Additionally, Microsoft maintains the CNTK Keras backend. Amazon AWS is maintaining the Keras fork with MXNet support. Other contributing companies include NVIDIA, Uber, and Apple (with CoreML).
-
-<img src='/img/google-logo.png' style='width:200px; margin-right:15px;'/>
-<img src='/img/microsoft-logo.png' style='width:200px; margin-right:15px;'/>
-<img src='/img/nvidia-logo.png' style='width:200px; margin-right:15px;'/>
-<img src='/img/aws-logo.png' style='width:110px; margin-right:15px;'/>
diff --git a/docs/theme/404.html b/docs/theme/404.html
deleted file mode 100644
index a13ad46759f..00000000000
--- a/docs/theme/404.html
+++ /dev/null
@@ -1,9 +0,0 @@
-{% extends "base.html" %}
-
-{% block content %}
-
-  <h1 id="404-page-not-found">404</h1>
-
-  <p><strong>Page not found</strong></p>
-
-{% endblock %}
diff --git a/docs/theme/base.html b/docs/theme/base.html
deleted file mode 100644
index 0e391d557ed..00000000000
--- a/docs/theme/base.html
+++ /dev/null
@@ -1,168 +0,0 @@
-<!DOCTYPE html>
-<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
-<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
-<head>
-  {%- block site_meta %}
-  <meta charset="utf-8">
-  <meta http-equiv="X-UA-Compatible" content="IE=edge">
-  <meta name="viewport" content="width=device-width, initial-scale=1.0">
-  {% if page and page.is_homepage %}<meta name="description" content="{{ config.site_description }}">{% endif %}
-  {% if config.site_author %}<meta name="author" content="{{ config.site_author }}">{% endif %}
-  {% if page and page.canonical_url %}<link rel="canonical" href="{{ page.canonical_url }}">{% endif %}
-  {% if config.site_favicon %}<link rel="shortcut icon" href="{{ config.site_favicon|url }}">
-  {% else %}<link rel="shortcut icon" href="{{ 'img/favicon.ico'|url }}">{% endif %}
-  {%- endblock %}
-
-  {%- block htmltitle %}
-  <title>{% if page and page.title and not page.is_hompage %}{{ page.title }} - {% endif %}{{ config.site_name }}</title>
-  {%- endblock %}
-
-  {%- block styles %}
-  <link href='https://fonts.googleapis.com/css?family=Lato:400,700|Source+Sans+Pro:400,700|Inconsolata:400,700' rel='stylesheet' type='text/css'>
-
-  <link rel="stylesheet" href="{{ 'css/theme.css'|url }}" type="text/css" />
-  <link rel="stylesheet" href="{{ 'css/theme_extra.css'|url }}" type="text/css" />
-  {%- if config.theme.highlightjs %}
-  <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/github.min.css">
-  {%- endif %}
-  {%- for path in config['extra_css'] %}
-  <link href="{{ path|url }}" rel="stylesheet">
-  {%- endfor %}
-  {%- endblock %}
-
-  {%- block libs %}
-  {% if page %}
-  <script>
-    // Current page data
-    var mkdocs_page_name = {{ page.title|tojson|safe }};
-    var mkdocs_page_input_path = {{ page.file.src_path|string|tojson|safe }};
-    var mkdocs_page_url = {{ page.abs_url|tojson|safe }};
-  </script>
-  {% endif %}
-  <script src="{{ 'js/jquery-2.1.1.min.js'|url }}" defer></script>
-  <script src="{{ 'js/modernizr-2.8.3.min.js'|url }}" defer></script>
-  {%- if config.theme.highlightjs %}
-  <script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/highlight.min.js"></script>
-  {%- for lang in config.theme.hljs_languages %}
-  <script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/languages/{{lang}}.min.js"></script>
-  {%- endfor %}
-  <script>hljs.initHighlightingOnLoad();</script>
-  {%- endif %}
-  {%- endblock %}
-
-  {%- block extrahead %} {% endblock %}
-
-  {%- block analytics %}
-  {% if config.google_analytics %}
-  <script>
-      (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
-      (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
-      m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
-      })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
-
-      ga('create', '{{ config.google_analytics[0] }}', '{{ config.google_analytics[1] }}');
-      ga('send', 'pageview');
-  </script>
-  {% endif %}
-  {%- endblock %}
-</head>
-
-<body class="wy-body-for-nav" role="document">
-
-  <div class="wy-grid-for-nav">
-
-    {# SIDE NAV, TOGGLES ON MOBILE #}
-    <nav data-toggle="wy-nav-shift" class="wy-nav-side stickynav">
-    <div class="wy-side-scroll">
-      <a href="{{ homepage_url }}">
-        <div class="keras-logo">
-          <img src="/img/keras-logo-small.jpg" class="keras-logo-img">
-          Keras Documentation
-        </div>
-      </a>
-
-      <div class="wy-side-nav-search">
-
-	  {%- block search_button %}
-        {% if 'search' in config['plugins'] %}{% include "searchbox.html" %}{% endif %}
-	  {%- endblock %}
-      </div>
-
-      <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
-        {%- block site_nav %}
-        {%- set navlevel = 1 %}
-        {%- for nav_item in nav %}
-            {%- if nav_item.is_section %}
-                <p class="caption"><span class="caption-text">{{ nav_item.title }}</span></p>
-                <ul{% if nav_item.active %} class="current"{% endif %}>
-                    {%- for nav_item in nav_item.children %}
-                    <li class="toctree-l{{ navlevel }}{% if nav_item.active %} current{% endif %}">
-                        {%- include 'nav.html' %}
-                    </li>
-                    {%- endfor %}
-                </ul>
-            {%- elif config.theme.include_homepage_in_sidebar or (not nav_item == nav.homepage) %}
-                <ul{% if nav_item.active %} class="current"{% endif %}>
-                    <li class="toctree-l{{ navlevel }}{% if nav_item.active %} current{% endif %}">
-                        {%- include 'nav.html' %}
-                    </li>
-                </ul>
-            {%- endif %}
-        {%- endfor %}
-    {%- endblock %}
-      </div>
-    </div>
-    </nav>
-
-    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">
-
-      {# MOBILE NAV, TRIGGLES SIDE NAV ON TOGGLE #}
-      <nav class="wy-nav-top" role="navigation" aria-label="top navigation">
-        <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
-        <a href="{{ nav.homepage.url|url }}">{{ config.site_name }}</a>
-      </nav>
-
-      {# PAGE CONTENT #}
-      <div class="wy-nav-content">
-        <div class="rst-content">
-          {% include "breadcrumbs.html" %}
-          <div role="main">
-            <div class="section">
-              {% block content %}
-                {{ page.content }}
-              {% endblock %}
-            </div>
-          </div>
-      {%- block footer %}
-          {% include "footer.html" %}
-      {% endblock %}
-        </div>
-      </div>
-
-    </section>
-
-  </div>
-
-  {% include "versions.html" %}
-
-  {%- block scripts %}
-    <script>var base_url = '{{ base_url }}';</script>
-    <script src="{{ 'js/theme.js'|url }}" defer></script>
-    {%- for path in config['extra_javascript'] %}
-      <script src="{{ path|url }}" defer></script>
-    {%- endfor %}
-    <script type="text/javascript" defer>
-        window.onload = function () {
-            SphinxRtdTheme.Navigation.enable({{ 'true' if config.theme.sticky_navigation else 'false' }});
-        };
-    </script>
-  {%- endblock %}
-
-</body>
-</html>
-{% if page and page.is_homepage %}
-<!--
-MkDocs version : {{ mkdocs_version }}
-Build Date UTC : {{ build_date_utc }}
--->
-{% endif %}
diff --git a/docs/theme/breadcrumbs.html b/docs/theme/breadcrumbs.html
deleted file mode 100644
index e66232b70f8..00000000000
--- a/docs/theme/breadcrumbs.html
+++ /dev/null
@@ -1,41 +0,0 @@
-<div role="navigation" aria-label="breadcrumbs navigation">
-  <ul class="wy-breadcrumbs">
-    <li><a href="{{ nav.homepage.url|url }}">Docs</a> &raquo;</li>
-    {% if page %}
-      {% for doc in page.ancestors %}
-        {% if doc.link %}
-          <li><a href="{{ doc.link|e }}">{{ doc.title }}</a> &raquo;</li>
-        {% else %}
-          <li>{{ doc.title }} &raquo;</li>
-        {% endif %}
-      {% endfor %}
-    {% endif %}
-    {% if page %}<li>{{ page.title }}</li>{% endif %}
-    <li class="wy-breadcrumbs-aside">
-      {%- block repo %}
-      {% if page and page.edit_url %}
-        <a href="https://github.com/keras-team/keras/tree/master/docs"
-        {%- if config.repo_name|lower == 'github' %}
-          class="icon icon-github"
-        {%- elif config.repo_name|lower == 'bitbucket' %}
-          class="icon icon-bitbucket"
-        {%- elif config.repo_name|lower == 'gitlab' %}
-          class="icon icon-gitlab"
-        {% endif %}> Edit on {{ config.repo_name }}</a>
-      {% endif %}
-      {%- endblock %}
-    </li>
-  </ul>
-  {% if config.theme.prev_next_buttons_location|lower in ['top', 'both']
-        and page and (page.next_page or page.previous_page) %}
-    <div class="rst-breadcrumbs-buttons" role="navigation" aria-label="breadcrumb navigation">
-      {%- if page.next_page %}
-        <a href="{{ page.next_page.url|url }}" class="btn btn-neutral float-right" title="{{ page.next_page.title }}">Next <span class="icon icon-circle-arrow-right"></span></a>
-      {%- endif %}
-      {%- if page.previous_page %}
-        <a href="{{ page.previous_page.url|url }}" class="btn btn-neutral" title="{{ page.previous_page.title }}"><span class="icon icon-circle-arrow-left"></span> Previous</a>
-      {%- endif %}
-    </div>
-  {% endif %}
-  <hr/>
-</div>
diff --git a/docs/theme/css/theme.css b/docs/theme/css/theme.css
deleted file mode 100644
index 9e6a5a7a03c..00000000000
--- a/docs/theme/css/theme.css
+++ /dev/null
@@ -1,14 +0,0 @@
-/*
- * This file is copied from the upstream ReadTheDocs Sphinx
- * theme. To aid upgradability this file should *not* be edited.
- * modifications we need should be included in theme_extra.css.
- *
- * https://github.com/rtfd/sphinx_rtd_theme
- */
-
- /* sphinx_rtd_theme version 0.4.1 | MIT license */
- /* Built 20180727 10:07 */
- *{-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box}article,aside,details,figcaption,figure,footer,header,hgroup,nav,section{display:block}audio,canvas,video{display:inline-block;*display:inline;*zoom:1}audio:not([controls]){display:none}[hidden]{display:none}*{-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box}html{font-size:100%;-webkit-text-size-adjust:100%;-ms-text-size-adjust:100%}body{margin:0}a:hover,a:active{outline:0}abbr[title]{border-bottom:1px dotted}b,strong{font-weight:bold}blockquote{margin:0}dfn{font-style:italic}ins{background:#ff9;color:#000;text-decoration:none}mark{background:#ff0;color:#000;font-style:italic;font-weight:bold}pre,code,.rst-content tt,.rst-content code,kbd,samp{font-family:monospace,serif;_font-family:"courier new",monospace;font-size:1em}pre{white-space:pre}q{quotes:none}q:before,q:after{content:"";content:none}small{font-size:85%}sub,sup{font-size:75%;line-height:0;position:relative;vertical-align:baseline}sup{top:-0.5em}sub{bottom:-0.25em}ul,ol,dl{margin:0;padding:0;list-style:none;list-style-image:none}li{list-style:none}dd{margin:0}img{border:0;-ms-interpolation-mode:bicubic;vertical-align:middle;max-width:100%}svg:not(:root){overflow:hidden}figure{margin:0}form{margin:0}fieldset{border:0;margin:0;padding:0}label{cursor:pointer}legend{border:0;*margin-left:-7px;padding:0;white-space:normal}button,input,select,textarea{font-size:100%;margin:0;vertical-align:baseline;*vertical-align:middle}button,input{line-height:normal}button,input[type="button"],input[type="reset"],input[type="submit"]{cursor:pointer;-webkit-appearance:button;*overflow:visible}button[disabled],input[disabled]{cursor:default}input[type="checkbox"],input[type="radio"]{box-sizing:border-box;padding:0;*width:13px;*height:13px}input[type="search"]{-webkit-appearance:textfield;-moz-box-sizing:content-box;-webkit-box-sizing:content-box;box-sizing:content-box}input[type="search"]::-webkit-search-decoration,input[type="search"]::-webkit-search-cancel-button{-webkit-appearance:none}button::-moz-focus-inner,input::-moz-focus-inner{border:0;padding:0}textarea{overflow:auto;vertical-align:top;resize:vertical}table{border-collapse:collapse;border-spacing:0}td{vertical-align:top}.chromeframe{margin:.2em 0;background:#ccc;color:#000;padding:.2em 0}.ir{display:block;border:0;text-indent:-999em;overflow:hidden;background-color:transparent;background-repeat:no-repeat;text-align:left;direction:ltr;*line-height:0}.ir br{display:none}.hidden{display:none !important;visibility:hidden}.visuallyhidden{border:0;clip:rect(0 0 0 0);height:1px;margin:-1px;overflow:hidden;padding:0;position:absolute;width:1px}.visuallyhidden.focusable:active,.visuallyhidden.focusable:focus{clip:auto;height:auto;margin:0;overflow:visible;position:static;width:auto}.invisible{visibility:hidden}.relative{position:relative}big,small{font-size:100%}@media print{html,body,section{background:none !important}*{box-shadow:none !important;text-shadow:none !important;filter:none !important;-ms-filter:none !important}a,a:visited{text-decoration:underline}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:.5cm}p,h2,.rst-content .toctree-wrapper p.caption,h3{orphans:3;widows:3}h2,.rst-content .toctree-wrapper p.caption,h3{page-break-after:avoid}}.fa:before,.wy-menu-vertical li span.toctree-expand:before,.wy-menu-vertical li.on a span.toctree-expand:before,.wy-menu-vertical li.current>a span.toctree-expand:before,.rst-content .admonition-title:before,.rst-content h1 .headerlink:before,.rst-content h2 .headerlink:before,.rst-content h3 .headerlink:before,.rst-content h4 .headerlink:before,.rst-content h5 .headerlink:before,.rst-content h6 .headerlink:before,.rst-content dl dt .headerlink:before,.rst-content p.caption .headerlink:before,.rst-content table>caption .headerlink:before,.rst-content tt.download span:first-child:before,.rst-content code.download span:first-child:before,.icon:before,.wy-dropdown .caret:before,.wy-inline-validate.wy-inline-validate-success .wy-input-context:before,.wy-inline-validate.wy-inline-validate-danger .wy-input-context:before,.wy-inline-validate.wy-inline-validate-warning .wy-input-context:before,.wy-inline-validate.wy-inline-validate-info .wy-input-context:before,.wy-alert,.rst-content .note,.rst-content .attention,.rst-content .caution,.rst-content .danger,.rst-content .error,.rst-content .hint,.rst-content .important,.rst-content .tip,.rst-content .warning,.rst-content .seealso,.rst-content .admonition-todo,.rst-content .admonition,.btn,input[type="text"],input[type="password"],input[type="email"],input[type="url"],input[type="date"],input[type="month"],input[type="time"],input[type="datetime"],input[type="datetime-local"],input[type="week"],input[type="number"],input[type="search"],input[type="tel"],input[type="color"],select,textarea,.wy-menu-vertical li.on a,.wy-menu-vertical li.current>a,.wy-side-nav-search>a,.wy-side-nav-search .wy-dropdown>a,.wy-nav-top a{-webkit-font-smoothing:antialiased}.clearfix{*zoom:1}.clearfix:before,.clearfix:after{display:table;content:""}.clearfix:after{clear:both}/*!
-  *  Font Awesome 4.7.0 by @davegandy - http://fontawesome.io - @fontawesome
-  *  License - http://fontawesome.io/license (Font: SIL OFL 1.1, CSS: MIT License)
-  */@font-face{font-family:'FontAwesome';src:url("../fonts/fontawesome-webfont.eot?v=4.7.0");src:url("../fonts/fontawesome-webfont.eot?#iefix&v=4.7.0") format("embedded-opentype"),url("../fonts/fontawesome-webfont.woff2?v=4.7.0") format("woff2"),url("../fonts/fontawesome-webfont.woff?v=4.7.0") format("woff"),url("../fonts/fontawesome-webfont.ttf?v=4.7.0") format("truetype"),url("../fonts/fontawesome-webfont.svg?v=4.7.0#fontawesomeregular") format("svg");font-weight:normal;font-style:normal}.fa,.wy-menu-vertical li span.toctree-expand,.wy-menu-vertical li.on a span.toctree-expand,.wy-menu-vertical li.current>a span.toctree-expand,.rst-content .admonition-title,.rst-content h1 .headerlink,.rst-content h2 .headerlink,.rst-content h3 .headerlink,.rst-content h4 .headerlink,.rst-content h5 .headerlink,.rst-content h6 .headerlink,.rst-content dl dt .headerlink,.rst-content p.caption .headerlink,.rst-content table>caption .headerlink,.rst-content tt.download span:first-child,.rst-content code.download span:first-child,.icon{display:inline-block;font:normal normal normal 14px/1 FontAwesome;font-size:inherit;text-rendering:auto;-webkit-font-smoothing:antialiased;-moz-osx-font-smoothing:grayscale}.fa-lg{font-size:1.3333333333em;line-height:.75em;vertical-align:-15%}.fa-2x{font-size:2em}.fa-3x{font-size:3em}.fa-4x{font-size:4em}.fa-5x{font-size:5em}.fa-fw{width:1.2857142857em;text-align:center}.fa-ul{padding-left:0;margin-left:2.1428571429em;list-style-type:none}.fa-ul>li{position:relative}.fa-li{position:absolute;left:-2.1428571429em;width:2.1428571429em;top:.1428571429em;text-align:center}.fa-li.fa-lg{left:-1.8571428571em}.fa-border{padding:.2em .25em .15em;border:solid 0.08em #eee;border-radius:.1em}.fa-pull-left{float:left}.fa-pull-right{float:right}.fa.fa-pull-left,.wy-menu-vertical li span.fa-pull-left.toctree-expand,.wy-menu-vertical li.on a span.fa-pull-left.toctree-expand,.wy-menu-vertical li.current>a span.fa-pull-left.toctree-expand,.rst-content .fa-pull-left.admonition-title,.rst-content h1 .fa-pull-left.headerlink,.rst-content h2 .fa-pull-left.headerlink,.rst-content h3 .fa-pull-left.headerlink,.rst-content h4 .fa-pull-left.headerlink,.rst-content h5 .fa-pull-left.headerlink,.rst-content h6 .fa-pull-left.headerlink,.rst-content dl dt .fa-pull-left.headerlink,.rst-content p.caption .fa-pull-left.headerlink,.rst-content table>caption .fa-pull-left.headerlink,.rst-content tt.download span.fa-pull-left:first-child,.rst-content code.download span.fa-pull-left:first-child,.fa-pull-left.icon{margin-right:.3em}.fa.fa-pull-right,.wy-menu-vertical li span.fa-pull-right.toctree-expand,.wy-menu-vertical li.on a span.fa-pull-right.toctree-expand,.wy-menu-vertical li.current>a span.fa-pull-right.toctree-expand,.rst-content .fa-pull-right.admonition-title,.rst-content h1 .fa-pull-right.headerlink,.rst-content h2 .fa-pull-right.headerlink,.rst-content h3 .fa-pull-right.headerlink,.rst-content h4 .fa-pull-right.headerlink,.rst-content h5 .fa-pull-right.headerlink,.rst-content h6 .fa-pull-right.headerlink,.rst-content dl dt .fa-pull-right.headerlink,.rst-content p.caption .fa-pull-right.headerlink,.rst-content table>caption .fa-pull-right.headerlink,.rst-content tt.download span.fa-pull-right:first-child,.rst-content code.download span.fa-pull-right:first-child,.fa-pull-right.icon{margin-left:.3em}.pull-right{float:right}.pull-left{float:left}.fa.pull-left,.wy-menu-vertical li span.pull-left.toctree-expand,.wy-menu-vertical li.on a span.pull-left.toctree-expand,.wy-menu-vertical li.current>a span.pull-left.toctree-expand,.rst-content .pull-left.admonition-title,.rst-content h1 .pull-left.headerlink,.rst-content h2 .pull-left.headerlink,.rst-content h3 .pull-left.headerlink,.rst-content h4 .pull-left.headerlink,.rst-content h5 .pull-left.headerlink,.rst-content h6 .pull-left.headerlink,.rst-content dl dt .pull-left.headerlink,.rst-content p.caption .pull-left.headerlink,.rst-content table>caption .pull-left.headerlink,.rst-content tt.download span.pull-left:first-child,.rst-content code.download span.pull-left:first-child,.pull-left.icon{margin-right:.3em}.fa.pull-right,.wy-menu-vertical li span.pull-right.toctree-expand,.wy-menu-vertical li.on a span.pull-right.toctree-expand,.wy-menu-vertical li.current>a span.pull-right.toctree-expand,.rst-content .pull-right.admonition-title,.rst-content h1 .pull-right.headerlink,.rst-content h2 .pull-right.headerlink,.rst-content h3 .pull-right.headerlink,.rst-content h4 .pull-right.headerlink,.rst-content h5 .pull-right.headerlink,.rst-content h6 .pull-right.headerlink,.rst-content dl dt .pull-right.headerlink,.rst-content p.caption .pull-right.headerlink,.rst-content table>caption .pull-right.headerlink,.rst-content tt.download span.pull-right:first-child,.rst-content code.download span.pull-right:first-child,.pull-right.icon{margin-left:.3em}.fa-spin{-webkit-animation:fa-spin 2s infinite linear;animation:fa-spin 2s infinite linear}.fa-pulse{-webkit-animation:fa-spin 1s infinite steps(8);animation:fa-spin 1s infinite steps(8)}@-webkit-keyframes fa-spin{0%{-webkit-transform:rotate(0deg);transform:rotate(0deg)}100%{-webkit-transform:rotate(359deg);transform:rotate(359deg)}}@keyframes fa-spin{0%{-webkit-transform:rotate(0deg);transform:rotate(0deg)}100%{-webkit-transform:rotate(359deg);transform:rotate(359deg)}}.fa-rotate-90{-ms-filter:"progid:DXImageTransform.Microsoft.BasicImage(rotation=1)";-webkit-transform:rotate(90deg);-ms-transform:rotate(90deg);transform:rotate(90deg)}.fa-rotate-180{-ms-filter:"progid:DXImageTransform.Microsoft.BasicImage(rotation=2)";-webkit-transform:rotate(180deg);-ms-transform:rotate(180deg);transform:rotate(180deg)}.fa-rotate-270{-ms-filter:"progid:DXImageTransform.Microsoft.BasicImage(rotation=3)";-webkit-transform:rotate(270deg);-ms-transform:rotate(270deg);transform:rotate(270deg)}.fa-flip-horizontal{-ms-filter:"progid:DXImageTransform.Microsoft.BasicImage(rotation=0, mirror=1)";-webkit-transform:scale(-1, 1);-ms-transform:scale(-1, 1);transform:scale(-1, 1)}.fa-flip-vertical{-ms-filter:"progid:DXImageTransform.Microsoft.BasicImage(rotation=2, mirror=1)";-webkit-transform:scale(1, -1);-ms-transform:scale(1, -1);transform:scale(1, -1)}:root .fa-rotate-90,:root .fa-rotate-180,:root .fa-rotate-270,:root .fa-flip-horizontal,:root .fa-flip-vertical{filter:none}.fa-stack{position:relative;display:inline-block;width:2em;height:2em;line-height:2em;vertical-align:middle}.fa-stack-1x,.fa-stack-2x{position:absolute;left:0;width:100%;text-align:center}.fa-stack-1x{line-height:inherit}.fa-stack-2x{font-size:2em}.fa-inverse{color:#fff}.fa-glass:before{content:""}.fa-music:before{content:""}.fa-search:before,.icon-search:before{content:""}.fa-envelope-o:before{content:""}.fa-heart:before{content:""}.fa-star:before{content:""}.fa-star-o:before{content:""}.fa-user:before{content:""}.fa-film:before{content:""}.fa-th-large:before{content:""}.fa-th:before{content:""}.fa-th-list:before{content:""}.fa-check:before{content:""}.fa-remove:before,.fa-close:before,.fa-times:before{content:""}.fa-search-plus:before{content:""}.fa-search-minus:before{content:""}.fa-power-off:before{content:""}.fa-signal:before{content:""}.fa-gear:before,.fa-cog:before{content:""}.fa-trash-o:before{content:""}.fa-home:before,.icon-home:before{content:""}.fa-file-o:before{content:""}.fa-clock-o:before{content:""}.fa-road:before{content:""}.fa-download:before,.rst-content tt.download span:first-child:before,.rst-content code.download span:first-child:before{content:""}.fa-arrow-circle-o-down:before{content:""}.fa-arrow-circle-o-up:before{content:""}.fa-inbox:before{content:""}.fa-play-circle-o:before{content:""}.fa-rotate-right:before,.fa-repeat:before{content:""}.fa-refresh:before{content:""}.fa-list-alt:before{content:""}.fa-lock:before{content:""}.fa-flag:before{content:""}.fa-headphones:before{content:""}.fa-volume-off:before{content:""}.fa-volume-down:before{content:""}.fa-volume-up:before{content:""}.fa-qrcode:before{content:""}.fa-barcode:before{content:""}.fa-tag:before{content:""}.fa-tags:before{content:""}.fa-book:before,.icon-book:before{content:""}.fa-bookmark:before{content:""}.fa-print:before{content:""}.fa-camera:before{content:""}.fa-font:before{content:""}.fa-bold:before{content:""}.fa-italic:before{content:""}.fa-text-height:before{content:""}.fa-text-width:before{content:""}.fa-align-left:before{content:""}.fa-align-center:before{content:""}.fa-align-right:before{content:""}.fa-align-justify:before{content:""}.fa-list:before{content:""}.fa-dedent:before,.fa-outdent:before{content:""}.fa-indent:before{content:""}.fa-video-camera:before{content:""}.fa-photo:before,.fa-image:before,.fa-picture-o:before{content:""}.fa-pencil:before{content:""}.fa-map-marker:before{content:""}.fa-adjust:before{content:""}.fa-tint:before{content:""}.fa-edit:before,.fa-pencil-square-o:before{content:""}.fa-share-square-o:before{content:""}.fa-check-square-o:before{content:""}.fa-arrows:before{content:""}.fa-step-backward:before{content:""}.fa-fast-backward:before{content:""}.fa-backward:before{content:""}.fa-play:before{content:""}.fa-pause:before{content:""}.fa-stop:before{content:""}.fa-forward:before{content:""}.fa-fast-forward:before{content:""}.fa-step-forward:before{content:""}.fa-eject:before{content:""}.fa-chevron-left:before{content:""}.fa-chevron-right:before{content:""}.fa-plus-circle:before{content:""}.fa-minus-circle:before{content:""}.fa-times-circle:before,.wy-inline-validate.wy-inline-validate-danger .wy-input-context:before{content:""}.fa-check-circle:before,.wy-inline-validate.wy-inline-validate-success .wy-input-context:before{content:""}.fa-question-circle:before{content:""}.fa-info-circle:before{content:""}.fa-crosshairs:before{content:""}.fa-times-circle-o:before{content:""}.fa-check-circle-o:before{content:""}.fa-ban:before{content:""}.fa-arrow-left:before{content:""}.fa-arrow-right:before{content:""}.fa-arrow-up:before{content:""}.fa-arrow-down:before{content:""}.fa-mail-forward:before,.fa-share:before{content:""}.fa-expand:before{content:""}.fa-compress:before{content:""}.fa-plus:before{content:""}.fa-minus:before{content:""}.fa-asterisk:before{content:""}.fa-exclamation-circle:before,.wy-inline-validate.wy-inline-validate-warning .wy-input-context:before,.wy-inline-validate.wy-inline-validate-info .wy-input-context:before,.rst-content .admonition-title:before{content:""}.fa-gift:before{content:""}.fa-leaf:before{content:""}.fa-fire:before,.icon-fire:before{content:""}.fa-eye:before{content:""}.fa-eye-slash:before{content:""}.fa-warning:before,.fa-exclamation-triangle:before{content:""}.fa-plane:before{content:""}.fa-calendar:before{content:""}.fa-random:before{content:""}.fa-comment:before{content:""}.fa-magnet:before{content:""}.fa-chevron-up:before{content:""}.fa-chevron-down:before{content:""}.fa-retweet:before{content:""}.fa-shopping-cart:before{content:""}.fa-folder:before{content:""}.fa-folder-open:before{content:""}.fa-arrows-v:before{content:""}.fa-arrows-h:before{content:""}.fa-bar-chart-o:before,.fa-bar-chart:before{content:""}.fa-twitter-square:before{content:""}.fa-facebook-square:before{content:""}.fa-camera-retro:before{content:""}.fa-key:before{content:""}.fa-gears:before,.fa-cogs:before{content:""}.fa-comments:before{content:""}.fa-thumbs-o-up:before{content:""}.fa-thumbs-o-down:before{content:""}.fa-star-half:before{content:""}.fa-heart-o:before{content:""}.fa-sign-out:before{content:""}.fa-linkedin-square:before{content:""}.fa-thumb-tack:before{content:""}.fa-external-link:before{content:""}.fa-sign-in:before{content:""}.fa-trophy:before{content:""}.fa-github-square:before{content:""}.fa-upload:before{content:""}.fa-lemon-o:before{content:""}.fa-phone:before{content:""}.fa-square-o:before{content:""}.fa-bookmark-o:before{content:""}.fa-phone-square:before{content:""}.fa-twitter:before{content:""}.fa-facebook-f:before,.fa-facebook:before{content:""}.fa-github:before,.icon-github:before{content:""}.fa-unlock:before{content:""}.fa-credit-card:before{content:""}.fa-feed:before,.fa-rss:before{content:""}.fa-hdd-o:before{content:""}.fa-bullhorn:before{content:""}.fa-bell:before{content:""}.fa-certificate:before{content:""}.fa-hand-o-right:before{content:""}.fa-hand-o-left:before{content:""}.fa-hand-o-up:before{content:""}.fa-hand-o-down:before{content:""}.fa-arrow-circle-left:before,.icon-circle-arrow-left:before{content:""}.fa-arrow-circle-right:before,.icon-circle-arrow-right:before{content:""}.fa-arrow-circle-up:before{content:""}.fa-arrow-circle-down:before{content:""}.fa-globe:before{content:""}.fa-wrench:before{content:""}.fa-tasks:before{content:""}.fa-filter:before{content:""}.fa-briefcase:before{content:""}.fa-arrows-alt:before{content:""}.fa-group:before,.fa-users:before{content:""}.fa-chain:before,.fa-link:before,.icon-link:before{content:""}.fa-cloud:before{content:""}.fa-flask:before{content:""}.fa-cut:before,.fa-scissors:before{content:""}.fa-copy:before,.fa-files-o:before{content:""}.fa-paperclip:before{content:""}.fa-save:before,.fa-floppy-o:before{content:""}.fa-square:before{content:""}.fa-navicon:before,.fa-reorder:before,.fa-bars:before{content:""}.fa-list-ul:before{content:""}.fa-list-ol:before{content:""}.fa-strikethrough:before{content:""}.fa-underline:before{content:""}.fa-table:before{content:""}.fa-magic:before{content:""}.fa-truck:before{content:""}.fa-pinterest:before{content:""}.fa-pinterest-square:before{content:""}.fa-google-plus-square:before{content:""}.fa-google-plus:before{content:""}.fa-money:before{content:""}.fa-caret-down:before,.wy-dropdown .caret:before,.icon-caret-down:before{content:""}.fa-caret-up:before{content:""}.fa-caret-left:before{content:""}.fa-caret-right:before{content:""}.fa-columns:before{content:""}.fa-unsorted:before,.fa-sort:before{content:""}.fa-sort-down:before,.fa-sort-desc:before{content:""}.fa-sort-up:before,.fa-sort-asc:before{content:""}.fa-envelope:before{content:""}.fa-linkedin:before{content:""}.fa-rotate-left:before,.fa-undo:before{content:""}.fa-legal:before,.fa-gavel:before{content:""}.fa-dashboard:before,.fa-tachometer:before{content:""}.fa-comment-o:before{content:""}.fa-comments-o:before{content:""}.fa-flash:before,.fa-bolt:before{content:""}.fa-sitemap:before{content:""}.fa-umbrella:before{content:""}.fa-paste:before,.fa-clipboard:before{content:""}.fa-lightbulb-o:before{content:""}.fa-exchange:before{content:""}.fa-cloud-download:before{content:""}.fa-cloud-upload:before{content:""}.fa-user-md:before{content:""}.fa-stethoscope:before{content:""}.fa-suitcase:before{content:""}.fa-bell-o:before{content:""}.fa-coffee:before{content:""}.fa-cutlery:before{content:""}.fa-file-text-o:before{content:""}.fa-building-o:before{content:""}.fa-hospital-o:before{content:""}.fa-ambulance:before{content:""}.fa-medkit:before{content:""}.fa-fighter-jet:before{content:""}.fa-beer:before{content:""}.fa-h-square:before{content:""}.fa-plus-square:before{content:""}.fa-angle-double-left:before{content:""}.fa-angle-double-right:before{content:""}.fa-angle-double-up:before{content:""}.fa-angle-double-down:before{content:""}.fa-angle-left:before{content:""}.fa-angle-right:before{content:""}.fa-angle-up:before{content:""}.fa-angle-down:before{content:""}.fa-desktop:before{content:""}.fa-laptop:before{content:""}.fa-tablet:before{content:""}.fa-mobile-phone:before,.fa-mobile:before{content:""}.fa-circle-o:before{content:""}.fa-quote-left:before{content:""}.fa-quote-right:before{content:""}.fa-spinner:before{content:""}.fa-circle:before{content:""}.fa-mail-reply:before,.fa-reply:before{content:""}.fa-github-alt:before{content:""}.fa-folder-o:before{content:""}.fa-folder-open-o:before{content:""}.fa-smile-o:before{content:""}.fa-frown-o:before{content:""}.fa-meh-o:before{content:""}.fa-gamepad:before{content:""}.fa-keyboard-o:before{content:""}.fa-flag-o:before{content:""}.fa-flag-checkered:before{content:""}.fa-terminal:before{content:""}.fa-code:before{content:""}.fa-mail-reply-all:before,.fa-reply-all:before{content:""}.fa-star-half-empty:before,.fa-star-half-full:before,.fa-star-half-o:before{content:""}.fa-location-arrow:before{content:""}.fa-crop:before{content:""}.fa-code-fork:before{content:""}.fa-unlink:before,.fa-chain-broken:before{content:""}.fa-question:before{content:""}.fa-info:before{content:""}.fa-exclamation:before{content:""}.fa-superscript:before{content:""}.fa-subscript:before{content:""}.fa-eraser:before{content:""}.fa-puzzle-piece:before{content:""}.fa-microphone:before{content:""}.fa-microphone-slash:before{content:""}.fa-shield:before{content:""}.fa-calendar-o:before{content:""}.fa-fire-extinguisher:before{content:""}.fa-rocket:before{content:""}.fa-maxcdn:before{content:""}.fa-chevron-circle-left:before{content:""}.fa-chevron-circle-right:before{content:""}.fa-chevron-circle-up:before{content:""}.fa-chevron-circle-down:before{content:""}.fa-html5:before{content:""}.fa-css3:before{content:""}.fa-anchor:before{content:""}.fa-unlock-alt:before{content:""}.fa-bullseye:before{content:""}.fa-ellipsis-h:before{content:""}.fa-ellipsis-v:before{content:""}.fa-rss-square:before{content:""}.fa-play-circle:before{content:""}.fa-ticket:before{content:""}.fa-minus-square:before{content:""}.fa-minus-square-o:before,.wy-menu-vertical li.on a span.toctree-expand:before,.wy-menu-vertical li.current>a span.toctree-expand:before{content:""}.fa-level-up:before{content:""}.fa-level-down:before{content:""}.fa-check-square:before{content:""}.fa-pencil-square:before{content:""}.fa-external-link-square:before{content:""}.fa-share-square:before{content:""}.fa-compass:before{content:""}.fa-toggle-down:before,.fa-caret-square-o-down:before{content:""}.fa-toggle-up:before,.fa-caret-square-o-up:before{content:""}.fa-toggle-right:before,.fa-caret-square-o-right:before{content:""}.fa-euro:before,.fa-eur:before{content:""}.fa-gbp:before{content:""}.fa-dollar:before,.fa-usd:before{content:""}.fa-rupee:before,.fa-inr:before{content:""}.fa-cny:before,.fa-rmb:before,.fa-yen:before,.fa-jpy:before{content:""}.fa-ruble:before,.fa-rouble:before,.fa-rub:before{content:""}.fa-won:before,.fa-krw:before{content:""}.fa-bitcoin:before,.fa-btc:before{content:""}.fa-file:before{content:""}.fa-file-text:before{content:""}.fa-sort-alpha-asc:before{content:""}.fa-sort-alpha-desc:before{content:""}.fa-sort-amount-asc:before{content:""}.fa-sort-amount-desc:before{content:""}.fa-sort-numeric-asc:before{content:""}.fa-sort-numeric-desc:before{content:""}.fa-thumbs-up:before{content:""}.fa-thumbs-down:before{content:""}.fa-youtube-square:before{content:""}.fa-youtube:before{content:""}.fa-xing:before{content:""}.fa-xing-square:before{content:""}.fa-youtube-play:before{content:""}.fa-dropbox:before{content:""}.fa-stack-overflow:before{content:""}.fa-instagram:before{content:""}.fa-flickr:before{content:""}.fa-adn:before{content:""}.fa-bitbucket:before,.icon-bitbucket:before{content:""}.fa-bitbucket-square:before{content:""}.fa-tumblr:before{content:""}.fa-tumblr-square:before{content:""}.fa-long-arrow-down:before{content:""}.fa-long-arrow-up:before{content:""}.fa-long-arrow-left:before{content:""}.fa-long-arrow-right:before{content:""}.fa-apple:before{content:""}.fa-windows:before{content:""}.fa-android:before{content:""}.fa-linux:before{content:""}.fa-dribbble:before{content:""}.fa-skype:before{content:""}.fa-foursquare:before{content:""}.fa-trello:before{content:""}.fa-female:before{content:""}.fa-male:before{content:""}.fa-gittip:before,.fa-gratipay:before{content:""}.fa-sun-o:before{content:""}.fa-moon-o:before{content:""}.fa-archive:before{content:""}.fa-bug:before{content:""}.fa-vk:before{content:""}.fa-weibo:before{content:""}.fa-renren:before{content:""}.fa-pagelines:before{content:""}.fa-stack-exchange:before{content:""}.fa-arrow-circle-o-right:before{content:""}.fa-arrow-circle-o-left:before{content:""}.fa-toggle-left:before,.fa-caret-square-o-left:before{content:""}.fa-dot-circle-o:before{content:""}.fa-wheelchair:before{content:""}.fa-vimeo-square:before{content:""}.fa-turkish-lira:before,.fa-try:before{content:""}.fa-plus-square-o:before,.wy-menu-vertical li span.toctree-expand:before{content:""}.fa-space-shuttle:before{content:""}.fa-slack:before{content:""}.fa-envelope-square:before{content:""}.fa-wordpress:before{content:""}.fa-openid:before{content:""}.fa-institution:before,.fa-bank:before,.fa-university:before{content:""}.fa-mortar-board:before,.fa-graduation-cap:before{content:""}.fa-yahoo:before{content:""}.fa-google:before{content:""}.fa-reddit:before{content:""}.fa-reddit-square:before{content:""}.fa-stumbleupon-circle:before{content:""}.fa-stumbleupon:before{content:""}.fa-delicious:before{content:""}.fa-digg:before{content:""}.fa-pied-piper-pp:before{content:""}.fa-pied-piper-alt:before{content:""}.fa-drupal:before{content:""}.fa-joomla:before{content:""}.fa-language:before{content:""}.fa-fax:before{content:""}.fa-building:before{content:""}.fa-child:before{content:""}.fa-paw:before{content:""}.fa-spoon:before{content:""}.fa-cube:before{content:""}.fa-cubes:before{content:""}.fa-behance:before{content:""}.fa-behance-square:before{content:""}.fa-steam:before{content:""}.fa-steam-square:before{content:""}.fa-recycle:before{content:""}.fa-automobile:before,.fa-car:before{content:""}.fa-cab:before,.fa-taxi:before{content:""}.fa-tree:before{content:""}.fa-spotify:before{content:""}.fa-deviantart:before{content:""}.fa-soundcloud:before{content:""}.fa-database:before{content:""}.fa-file-pdf-o:before{content:""}.fa-file-word-o:before{content:""}.fa-file-excel-o:before{content:""}.fa-file-powerpoint-o:before{content:""}.fa-file-photo-o:before,.fa-file-picture-o:before,.fa-file-image-o:before{content:""}.fa-file-zip-o:before,.fa-file-archive-o:before{content:""}.fa-file-sound-o:before,.fa-file-audio-o:before{content:""}.fa-file-movie-o:before,.fa-file-video-o:before{content:""}.fa-file-code-o:before{content:""}.fa-vine:before{content:""}.fa-codepen:before{content:""}.fa-jsfiddle:before{content:""}.fa-life-bouy:before,.fa-life-buoy:before,.fa-life-saver:before,.fa-support:before,.fa-life-ring:before{content:""}.fa-circle-o-notch:before{content:""}.fa-ra:before,.fa-resistance:before,.fa-rebel:before{content:""}.fa-ge:before,.fa-empire:before{content:""}.fa-git-square:before{content:""}.fa-git:before{content:""}.fa-y-combinator-square:before,.fa-yc-square:before,.fa-hacker-news:before{content:""}.fa-tencent-weibo:before{content:""}.fa-qq:before{content:""}.fa-wechat:before,.fa-weixin:before{content:""}.fa-send:before,.fa-paper-plane:before{content:""}.fa-send-o:before,.fa-paper-plane-o:before{content:""}.fa-history:before{content:""}.fa-circle-thin:before{content:""}.fa-header:before{content:""}.fa-paragraph:before{content:""}.fa-sliders:before{content:""}.fa-share-alt:before{content:""}.fa-share-alt-square:before{content:""}.fa-bomb:before{content:""}.fa-soccer-ball-o:before,.fa-futbol-o:before{content:""}.fa-tty:before{content:""}.fa-binoculars:before{content:""}.fa-plug:before{content:""}.fa-slideshare:before{content:""}.fa-twitch:before{content:""}.fa-yelp:before{content:""}.fa-newspaper-o:before{content:""}.fa-wifi:before{content:""}.fa-calculator:before{content:""}.fa-paypal:before{content:""}.fa-google-wallet:before{content:""}.fa-cc-visa:before{content:""}.fa-cc-mastercard:before{content:""}.fa-cc-discover:before{content:""}.fa-cc-amex:before{content:""}.fa-cc-paypal:before{content:""}.fa-cc-stripe:before{content:""}.fa-bell-slash:before{content:""}.fa-bell-slash-o:before{content:""}.fa-trash:before{content:""}.fa-copyright:before{content:""}.fa-at:before{content:""}.fa-eyedropper:before{content:""}.fa-paint-brush:before{content:""}.fa-birthday-cake:before{content:""}.fa-area-chart:before{content:""}.fa-pie-chart:before{content:""}.fa-line-chart:before{content:""}.fa-lastfm:before{content:""}.fa-lastfm-square:before{content:""}.fa-toggle-off:before{content:""}.fa-toggle-on:before{content:""}.fa-bicycle:before{content:""}.fa-bus:before{content:""}.fa-ioxhost:before{content:""}.fa-angellist:before{content:""}.fa-cc:before{content:""}.fa-shekel:before,.fa-sheqel:before,.fa-ils:before{content:""}.fa-meanpath:before{content:""}.fa-buysellads:before{content:""}.fa-connectdevelop:before{content:""}.fa-dashcube:before{content:""}.fa-forumbee:before{content:""}.fa-leanpub:before{content:""}.fa-sellsy:before{content:""}.fa-shirtsinbulk:before{content:""}.fa-simplybuilt:before{content:""}.fa-skyatlas:before{content:""}.fa-cart-plus:before{content:""}.fa-cart-arrow-down:before{content:""}.fa-diamond:before{content:""}.fa-ship:before{content:""}.fa-user-secret:before{content:""}.fa-motorcycle:before{content:""}.fa-street-view:before{content:""}.fa-heartbeat:before{content:""}.fa-venus:before{content:""}.fa-mars:before{content:""}.fa-mercury:before{content:""}.fa-intersex:before,.fa-transgender:before{content:""}.fa-transgender-alt:before{content:""}.fa-venus-double:before{content:""}.fa-mars-double:before{content:""}.fa-venus-mars:before{content:""}.fa-mars-stroke:before{content:""}.fa-mars-stroke-v:before{content:""}.fa-mars-stroke-h:before{content:""}.fa-neuter:before{content:""}.fa-genderless:before{content:""}.fa-facebook-official:before{content:""}.fa-pinterest-p:before{content:""}.fa-whatsapp:before{content:""}.fa-server:before{content:""}.fa-user-plus:before{content:""}.fa-user-times:before{content:""}.fa-hotel:before,.fa-bed:before{content:""}.fa-viacoin:before{content:""}.fa-train:before{content:""}.fa-subway:before{content:""}.fa-medium:before{content:""}.fa-yc:before,.fa-y-combinator:before{content:""}.fa-optin-monster:before{content:""}.fa-opencart:before{content:""}.fa-expeditedssl:before{content:""}.fa-battery-4:before,.fa-battery:before,.fa-battery-full:before{content:""}.fa-battery-3:before,.fa-battery-three-quarters:before{content:""}.fa-battery-2:before,.fa-battery-half:before{content:""}.fa-battery-1:before,.fa-battery-quarter:before{content:""}.fa-battery-0:before,.fa-battery-empty:before{content:""}.fa-mouse-pointer:before{content:""}.fa-i-cursor:before{content:""}.fa-object-group:before{content:""}.fa-object-ungroup:before{content:""}.fa-sticky-note:before{content:""}.fa-sticky-note-o:before{content:""}.fa-cc-jcb:before{content:""}.fa-cc-diners-club:before{content:""}.fa-clone:before{content:""}.fa-balance-scale:before{content:""}.fa-hourglass-o:before{content:""}.fa-hourglass-1:before,.fa-hourglass-start:before{content:""}.fa-hourglass-2:before,.fa-hourglass-half:before{content:""}.fa-hourglass-3:before,.fa-hourglass-end:before{content:""}.fa-hourglass:before{content:""}.fa-hand-grab-o:before,.fa-hand-rock-o:before{content:""}.fa-hand-stop-o:before,.fa-hand-paper-o:before{content:""}.fa-hand-scissors-o:before{content:""}.fa-hand-lizard-o:before{content:""}.fa-hand-spock-o:before{content:""}.fa-hand-pointer-o:before{content:""}.fa-hand-peace-o:before{content:""}.fa-trademark:before{content:""}.fa-registered:before{content:""}.fa-creative-commons:before{content:""}.fa-gg:before{content:""}.fa-gg-circle:before{content:""}.fa-tripadvisor:before{content:""}.fa-odnoklassniki:before{content:""}.fa-odnoklassniki-square:before{content:""}.fa-get-pocket:before{content:""}.fa-wikipedia-w:before{content:""}.fa-safari:before{content:""}.fa-chrome:before{content:""}.fa-firefox:before{content:""}.fa-opera:before{content:""}.fa-internet-explorer:before{content:""}.fa-tv:before,.fa-television:before{content:""}.fa-contao:before{content:""}.fa-500px:before{content:""}.fa-amazon:before{content:""}.fa-calendar-plus-o:before{content:""}.fa-calendar-minus-o:before{content:""}.fa-calendar-times-o:before{content:""}.fa-calendar-check-o:before{content:""}.fa-industry:before{content:""}.fa-map-pin:before{content:""}.fa-map-signs:before{content:""}.fa-map-o:before{content:""}.fa-map:before{content:""}.fa-commenting:before{content:""}.fa-commenting-o:before{content:""}.fa-houzz:before{content:""}.fa-vimeo:before{content:""}.fa-black-tie:before{content:""}.fa-fonticons:before{content:""}.fa-reddit-alien:before{content:""}.fa-edge:before{content:""}.fa-credit-card-alt:before{content:""}.fa-codiepie:before{content:""}.fa-modx:before{content:""}.fa-fort-awesome:before{content:""}.fa-usb:before{content:""}.fa-product-hunt:before{content:""}.fa-mixcloud:before{content:""}.fa-scribd:before{content:""}.fa-pause-circle:before{content:""}.fa-pause-circle-o:before{content:""}.fa-stop-circle:before{content:""}.fa-stop-circle-o:before{content:""}.fa-shopping-bag:before{content:""}.fa-shopping-basket:before{content:""}.fa-hashtag:before{content:""}.fa-bluetooth:before{content:""}.fa-bluetooth-b:before{content:""}.fa-percent:before{content:""}.fa-gitlab:before,.icon-gitlab:before{content:""}.fa-wpbeginner:before{content:""}.fa-wpforms:before{content:""}.fa-envira:before{content:""}.fa-universal-access:before{content:""}.fa-wheelchair-alt:before{content:""}.fa-question-circle-o:before{content:""}.fa-blind:before{content:""}.fa-audio-description:before{content:""}.fa-volume-control-phone:before{content:""}.fa-braille:before{content:""}.fa-assistive-listening-systems:before{content:""}.fa-asl-interpreting:before,.fa-american-sign-language-interpreting:before{content:""}.fa-deafness:before,.fa-hard-of-hearing:before,.fa-deaf:before{content:""}.fa-glide:before{content:""}.fa-glide-g:before{content:""}.fa-signing:before,.fa-sign-language:before{content:""}.fa-low-vision:before{content:""}.fa-viadeo:before{content:""}.fa-viadeo-square:before{content:""}.fa-snapchat:before{content:""}.fa-snapchat-ghost:before{content:""}.fa-snapchat-square:before{content:""}.fa-pied-piper:before{content:""}.fa-first-order:before{content:""}.fa-yoast:before{content:""}.fa-themeisle:before{content:""}.fa-google-plus-circle:before,.fa-google-plus-official:before{content:""}.fa-fa:before,.fa-font-awesome:before{content:""}.fa-handshake-o:before{content:""}.fa-envelope-open:before{content:""}.fa-envelope-open-o:before{content:""}.fa-linode:before{content:""}.fa-address-book:before{content:""}.fa-address-book-o:before{content:""}.fa-vcard:before,.fa-address-card:before{content:""}.fa-vcard-o:before,.fa-address-card-o:before{content:""}.fa-user-circle:before{content:""}.fa-user-circle-o:before{content:""}.fa-user-o:before{content:""}.fa-id-badge:before{content:""}.fa-drivers-license:before,.fa-id-card:before{content:""}.fa-drivers-license-o:before,.fa-id-card-o:before{content:""}.fa-quora:before{content:""}.fa-free-code-camp:before{content:""}.fa-telegram:before{content:""}.fa-thermometer-4:before,.fa-thermometer:before,.fa-thermometer-full:before{content:""}.fa-thermometer-3:before,.fa-thermometer-three-quarters:before{content:""}.fa-thermometer-2:before,.fa-thermometer-half:before{content:""}.fa-thermometer-1:before,.fa-thermometer-quarter:before{content:""}.fa-thermometer-0:before,.fa-thermometer-empty:before{content:""}.fa-shower:before{content:""}.fa-bathtub:before,.fa-s15:before,.fa-bath:before{content:""}.fa-podcast:before{content:""}.fa-window-maximize:before{content:""}.fa-window-minimize:before{content:""}.fa-window-restore:before{content:""}.fa-times-rectangle:before,.fa-window-close:before{content:""}.fa-times-rectangle-o:before,.fa-window-close-o:before{content:""}.fa-bandcamp:before{content:""}.fa-grav:before{content:""}.fa-etsy:before{content:""}.fa-imdb:before{content:""}.fa-ravelry:before{content:""}.fa-eercast:before{content:""}.fa-microchip:before{content:""}.fa-snowflake-o:before{content:""}.fa-superpowers:before{content:""}.fa-wpexplorer:before{content:""}.fa-meetup:before{content:""}.sr-only{position:absolute;width:1px;height:1px;padding:0;margin:-1px;overflow:hidden;clip:rect(0, 0, 0, 0);border:0}.sr-only-focusable:active,.sr-only-focusable:focus{position:static;width:auto;height:auto;margin:0;overflow:visible;clip:auto}.fa,.wy-menu-vertical li span.toctree-expand,.wy-menu-vertical li.on a span.toctree-expand,.wy-menu-vertical li.current>a span.toctree-expand,.rst-content .admonition-title,.rst-content h1 .headerlink,.rst-content h2 .headerlink,.rst-content h3 .headerlink,.rst-content h4 .headerlink,.rst-content h5 .headerlink,.rst-content h6 .headerlink,.rst-content dl dt .headerlink,.rst-content p.caption .headerlink,.rst-content table>caption .headerlink,.rst-content tt.download span:first-child,.rst-content code.download span:first-child,.icon,.wy-dropdown .caret,.wy-inline-validate.wy-inline-validate-success .wy-input-context,.wy-inline-validate.wy-inline-validate-danger .wy-input-context,.wy-inline-validate.wy-inline-validate-warning .wy-input-context,.wy-inline-validate.wy-inline-validate-info .wy-input-context{font-family:inherit}.fa:before,.wy-menu-vertical li span.toctree-expand:before,.wy-menu-vertical li.on a span.toctree-expand:before,.wy-menu-vertical li.current>a span.toctree-expand:before,.rst-content .admonition-title:before,.rst-content h1 .headerlink:before,.rst-content h2 .headerlink:before,.rst-content h3 .headerlink:before,.rst-content h4 .headerlink:before,.rst-content h5 .headerlink:before,.rst-content h6 .headerlink:before,.rst-content dl dt .headerlink:before,.rst-content p.caption .headerlink:before,.rst-content table>caption .headerlink:before,.rst-content tt.download span:first-child:before,.rst-content code.download span:first-child:before,.icon:before,.wy-dropdown .caret:before,.wy-inline-validate.wy-inline-validate-success .wy-input-context:before,.wy-inline-validate.wy-inline-validate-danger .wy-input-context:before,.wy-inline-validate.wy-inline-validate-warning .wy-input-context:before,.wy-inline-validate.wy-inline-validate-info .wy-input-context:before{font-family:"FontAwesome";display:inline-block;font-style:normal;font-weight:normal;line-height:1;text-decoration:inherit}a .fa,a .wy-menu-vertical li span.toctree-expand,.wy-menu-vertical li a span.toctree-expand,.wy-menu-vertical li.on a span.toctree-expand,.wy-menu-vertical li.current>a span.toctree-expand,a .rst-content .admonition-title,.rst-content a .admonition-title,a .rst-content h1 .headerlink,.rst-content h1 a .headerlink,a .rst-content h2 .headerlink,.rst-content h2 a .headerlink,a .rst-content h3 .headerlink,.rst-content h3 a .headerlink,a .rst-content h4 .headerlink,.rst-content h4 a .headerlink,a .rst-content h5 .headerlink,.rst-content h5 a .headerlink,a .rst-content h6 .headerlink,.rst-content h6 a .headerlink,a .rst-content dl dt .headerlink,.rst-content dl dt a .headerlink,a .rst-content p.caption .headerlink,.rst-content p.caption a .headerlink,a .rst-content table>caption .headerlink,.rst-content table>caption a .headerlink,a .rst-content tt.download span:first-child,.rst-content tt.download a span:first-child,a .rst-content code.download span:first-child,.rst-content code.download a span:first-child,a .icon{display:inline-block;text-decoration:inherit}.btn .fa,.btn .wy-menu-vertical li span.toctree-expand,.wy-menu-vertical li .btn span.toctree-expand,.btn .wy-menu-vertical li.on a span.toctree-expand,.wy-menu-vertical li.on a .btn span.toctree-expand,.btn .wy-menu-vertical li.current>a span.toctree-expand,.wy-menu-vertical li.current>a .btn span.toctree-expand,.btn .rst-content .admonition-title,.rst-content .btn .admonition-title,.btn .rst-content h1 .headerlink,.rst-content h1 .btn .headerlink,.btn .rst-content h2 .headerlink,.rst-content h2 .btn .headerlink,.btn .rst-content h3 .headerlink,.rst-content h3 .btn .headerlink,.btn .rst-content h4 .headerlink,.rst-content h4 .btn .headerlink,.btn .rst-content h5 .headerlink,.rst-content h5 .btn .headerlink,.btn .rst-content h6 .headerlink,.rst-content h6 .btn .headerlink,.btn .rst-content dl dt .headerlink,.rst-content dl dt .btn .headerlink,.btn .rst-content p.caption .headerlink,.rst-content p.caption .btn .headerlink,.btn .rst-content table>caption .headerlink,.rst-content table>caption .btn .headerlink,.btn .rst-content tt.download span:first-child,.rst-content tt.download .btn span:first-child,.btn .rst-content code.download span:first-child,.rst-content code.download .btn span:first-child,.btn .icon,.nav .fa,.nav .wy-menu-vertical li span.toctree-expand,.wy-menu-vertical li .nav span.toctree-expand,.nav .wy-menu-vertical li.on a span.toctree-expand,.wy-menu-vertical li.on a .nav span.toctree-expand,.nav .wy-menu-vertical li.current>a span.toctree-expand,.wy-menu-vertical li.current>a .nav span.toctree-expand,.nav .rst-content .admonition-title,.rst-content .nav .admonition-title,.nav .rst-content h1 .headerlink,.rst-content h1 .nav .headerlink,.nav .rst-content h2 .headerlink,.rst-content h2 .nav .headerlink,.nav .rst-content h3 .headerlink,.rst-content h3 .nav .headerlink,.nav .rst-content h4 .headerlink,.rst-content h4 .nav .headerlink,.nav .rst-content h5 .headerlink,.rst-content h5 .nav .headerlink,.nav .rst-content h6 .headerlink,.rst-content h6 .nav .headerlink,.nav .rst-content dl dt .headerlink,.rst-content dl dt .nav .headerlink,.nav .rst-content p.caption .headerlink,.rst-content p.caption .nav .headerlink,.nav .rst-content table>caption .headerlink,.rst-content table>caption .nav .headerlink,.nav .rst-content tt.download span:first-child,.rst-content tt.download .nav span:first-child,.nav .rst-content code.download span:first-child,.rst-content code.download .nav span:first-child,.nav .icon{display:inline}.btn .fa.fa-large,.btn .wy-menu-vertical li span.fa-large.toctree-expand,.wy-menu-vertical li .btn span.fa-large.toctree-expand,.btn .rst-content .fa-large.admonition-title,.rst-content .btn .fa-large.admonition-title,.btn .rst-content h1 .fa-large.headerlink,.rst-content h1 .btn .fa-large.headerlink,.btn .rst-content h2 .fa-large.headerlink,.rst-content h2 .btn .fa-large.headerlink,.btn .rst-content h3 .fa-large.headerlink,.rst-content h3 .btn .fa-large.headerlink,.btn .rst-content h4 .fa-large.headerlink,.rst-content h4 .btn .fa-large.headerlink,.btn .rst-content h5 .fa-large.headerlink,.rst-content h5 .btn .fa-large.headerlink,.btn .rst-content h6 .fa-large.headerlink,.rst-content h6 .btn .fa-large.headerlink,.btn .rst-content dl dt .fa-large.headerlink,.rst-content dl dt .btn .fa-large.headerlink,.btn .rst-content p.caption .fa-large.headerlink,.rst-content p.caption .btn .fa-large.headerlink,.btn .rst-content table>caption .fa-large.headerlink,.rst-content table>caption .btn .fa-large.headerlink,.btn .rst-content tt.download span.fa-large:first-child,.rst-content tt.download .btn span.fa-large:first-child,.btn .rst-content code.download span.fa-large:first-child,.rst-content code.download .btn span.fa-large:first-child,.btn .fa-large.icon,.nav .fa.fa-large,.nav .wy-menu-vertical li span.fa-large.toctree-expand,.wy-menu-vertical li .nav span.fa-large.toctree-expand,.nav .rst-content .fa-large.admonition-title,.rst-content .nav .fa-large.admonition-title,.nav .rst-content h1 .fa-large.headerlink,.rst-content h1 .nav .fa-large.headerlink,.nav .rst-content h2 .fa-large.headerlink,.rst-content h2 .nav .fa-large.headerlink,.nav .rst-content h3 .fa-large.headerlink,.rst-content h3 .nav .fa-large.headerlink,.nav .rst-content h4 .fa-large.headerlink,.rst-content h4 .nav .fa-large.headerlink,.nav .rst-content h5 .fa-large.headerlink,.rst-content h5 .nav .fa-large.headerlink,.nav .rst-content h6 .fa-large.headerlink,.rst-content h6 .nav .fa-large.headerlink,.nav .rst-content dl dt .fa-large.headerlink,.rst-content dl dt .nav .fa-large.headerlink,.nav .rst-content p.caption .fa-large.headerlink,.rst-content p.caption .nav .fa-large.headerlink,.nav .rst-content table>caption .fa-large.headerlink,.rst-content table>caption .nav .fa-large.headerlink,.nav .rst-content tt.download span.fa-large:first-child,.rst-content tt.download .nav span.fa-large:first-child,.nav .rst-content code.download span.fa-large:first-child,.rst-content code.download .nav span.fa-large:first-child,.nav .fa-large.icon{line-height:.9em}.btn .fa.fa-spin,.btn .wy-menu-vertical li span.fa-spin.toctree-expand,.wy-menu-vertical li .btn span.fa-spin.toctree-expand,.btn .rst-content .fa-spin.admonition-title,.rst-content .btn .fa-spin.admonition-title,.btn .rst-content h1 .fa-spin.headerlink,.rst-content h1 .btn .fa-spin.headerlink,.btn .rst-content h2 .fa-spin.headerlink,.rst-content h2 .btn .fa-spin.headerlink,.btn .rst-content h3 .fa-spin.headerlink,.rst-content h3 .btn .fa-spin.headerlink,.btn .rst-content h4 .fa-spin.headerlink,.rst-content h4 .btn .fa-spin.headerlink,.btn .rst-content h5 .fa-spin.headerlink,.rst-content h5 .btn .fa-spin.headerlink,.btn .rst-content h6 .fa-spin.headerlink,.rst-content h6 .btn .fa-spin.headerlink,.btn .rst-content dl dt .fa-spin.headerlink,.rst-content dl dt .btn .fa-spin.headerlink,.btn .rst-content p.caption .fa-spin.headerlink,.rst-content p.caption .btn .fa-spin.headerlink,.btn .rst-content table>caption .fa-spin.headerlink,.rst-content table>caption .btn .fa-spin.headerlink,.btn .rst-content tt.download span.fa-spin:first-child,.rst-content tt.download .btn span.fa-spin:first-child,.btn .rst-content code.download span.fa-spin:first-child,.rst-content code.download .btn span.fa-spin:first-child,.btn .fa-spin.icon,.nav .fa.fa-spin,.nav .wy-menu-vertical li span.fa-spin.toctree-expand,.wy-menu-vertical li .nav span.fa-spin.toctree-expand,.nav .rst-content .fa-spin.admonition-title,.rst-content .nav .fa-spin.admonition-title,.nav .rst-content h1 .fa-spin.headerlink,.rst-content h1 .nav .fa-spin.headerlink,.nav .rst-content h2 .fa-spin.headerlink,.rst-content h2 .nav .fa-spin.headerlink,.nav .rst-content h3 .fa-spin.headerlink,.rst-content h3 .nav .fa-spin.headerlink,.nav .rst-content h4 .fa-spin.headerlink,.rst-content h4 .nav .fa-spin.headerlink,.nav .rst-content h5 .fa-spin.headerlink,.rst-content h5 .nav .fa-spin.headerlink,.nav .rst-content h6 .fa-spin.headerlink,.rst-content h6 .nav .fa-spin.headerlink,.nav .rst-content dl dt .fa-spin.headerlink,.rst-content dl dt .nav .fa-spin.headerlink,.nav .rst-content p.caption .fa-spin.headerlink,.rst-content p.caption .nav .fa-spin.headerlink,.nav .rst-content table>caption .fa-spin.headerlink,.rst-content table>caption .nav .fa-spin.headerlink,.nav .rst-content tt.download span.fa-spin:first-child,.rst-content tt.download .nav span.fa-spin:first-child,.nav .rst-content code.download span.fa-spin:first-child,.rst-content code.download .nav span.fa-spin:first-child,.nav .fa-spin.icon{display:inline-block}.btn.fa:before,.wy-menu-vertical li span.btn.toctree-expand:before,.rst-content .btn.admonition-title:before,.rst-content h1 .btn.headerlink:before,.rst-content h2 .btn.headerlink:before,.rst-content h3 .btn.headerlink:before,.rst-content h4 .btn.headerlink:before,.rst-content h5 .btn.headerlink:before,.rst-content h6 .btn.headerlink:before,.rst-content dl dt .btn.headerlink:before,.rst-content p.caption .btn.headerlink:before,.rst-content table>caption .btn.headerlink:before,.rst-content tt.download span.btn:first-child:before,.rst-content code.download span.btn:first-child:before,.btn.icon:before{opacity:.5;-webkit-transition:opacity .05s ease-in;-moz-transition:opacity .05s ease-in;transition:opacity .05s ease-in}.btn.fa:hover:before,.wy-menu-vertical li span.btn.toctree-expand:hover:before,.rst-content .btn.admonition-title:hover:before,.rst-content h1 .btn.headerlink:hover:before,.rst-content h2 .btn.headerlink:hover:before,.rst-content h3 .btn.headerlink:hover:before,.rst-content h4 .btn.headerlink:hover:before,.rst-content h5 .btn.headerlink:hover:before,.rst-content h6 .btn.headerlink:hover:before,.rst-content dl dt .btn.headerlink:hover:before,.rst-content p.caption .btn.headerlink:hover:before,.rst-content table>caption .btn.headerlink:hover:before,.rst-content tt.download span.btn:first-child:hover:before,.rst-content code.download span.btn:first-child:hover:before,.btn.icon:hover:before{opacity:1}.btn-mini .fa:before,.btn-mini .wy-menu-vertical li span.toctree-expand:before,.wy-menu-vertical li .btn-mini span.toctree-expand:before,.btn-mini .rst-content .admonition-title:before,.rst-content .btn-mini .admonition-title:before,.btn-mini .rst-content h1 .headerlink:before,.rst-content h1 .btn-mini .headerlink:before,.btn-mini .rst-content h2 .headerlink:before,.rst-content h2 .btn-mini .headerlink:before,.btn-mini .rst-content h3 .headerlink:before,.rst-content h3 .btn-mini .headerlink:before,.btn-mini .rst-content h4 .headerlink:before,.rst-content h4 .btn-mini .headerlink:before,.btn-mini .rst-content h5 .headerlink:before,.rst-content h5 .btn-mini .headerlink:before,.btn-mini .rst-content h6 .headerlink:before,.rst-content h6 .btn-mini .headerlink:before,.btn-mini .rst-content dl dt .headerlink:before,.rst-content dl dt .btn-mini .headerlink:before,.btn-mini .rst-content p.caption .headerlink:before,.rst-content p.caption .btn-mini .headerlink:before,.btn-mini .rst-content table>caption .headerlink:before,.rst-content table>caption .btn-mini .headerlink:before,.btn-mini .rst-content tt.download span:first-child:before,.rst-content tt.download .btn-mini span:first-child:before,.btn-mini .rst-content code.download span:first-child:before,.rst-content code.download .btn-mini span:first-child:before,.btn-mini .icon:before{font-size:14px;vertical-align:-15%}.wy-alert,.rst-content .note,.rst-content .attention,.rst-content .caution,.rst-content .danger,.rst-content .error,.rst-content .hint,.rst-content .important,.rst-content .tip,.rst-content .warning,.rst-content .seealso,.rst-content .admonition-todo,.rst-content .admonition{padding:12px;line-height:24px;margin-bottom:24px;background:#e7f2fa}.wy-alert-title,.rst-content .admonition-title{color:#fff;font-weight:bold;display:block;color:#fff;background:#6ab0de;margin:-12px;padding:6px 12px;margin-bottom:12px}.wy-alert.wy-alert-danger,.rst-content .wy-alert-danger.note,.rst-content .wy-alert-danger.attention,.rst-content .wy-alert-danger.caution,.rst-content .danger,.rst-content .error,.rst-content .wy-alert-danger.hint,.rst-content .wy-alert-danger.important,.rst-content .wy-alert-danger.tip,.rst-content .wy-alert-danger.warning,.rst-content .wy-alert-danger.seealso,.rst-content .wy-alert-danger.admonition-todo,.rst-content .wy-alert-danger.admonition{background:#fdf3f2}.wy-alert.wy-alert-danger .wy-alert-title,.rst-content .wy-alert-danger.note .wy-alert-title,.rst-content .wy-alert-danger.attention .wy-alert-title,.rst-content .wy-alert-danger.caution .wy-alert-title,.rst-content .danger .wy-alert-title,.rst-content .error .wy-alert-title,.rst-content .wy-alert-danger.hint .wy-alert-title,.rst-content .wy-alert-danger.important .wy-alert-title,.rst-content .wy-alert-danger.tip .wy-alert-title,.rst-content .wy-alert-danger.warning .wy-alert-title,.rst-content .wy-alert-danger.seealso .wy-alert-title,.rst-content .wy-alert-danger.admonition-todo .wy-alert-title,.rst-content .wy-alert-danger.admonition .wy-alert-title,.wy-alert.wy-alert-danger .rst-content .admonition-title,.rst-content .wy-alert.wy-alert-danger .admonition-title,.rst-content .wy-alert-danger.note .admonition-title,.rst-content .wy-alert-danger.attention .admonition-title,.rst-content .wy-alert-danger.caution .admonition-title,.rst-content .danger .admonition-title,.rst-content .error .admonition-title,.rst-content .wy-alert-danger.hint .admonition-title,.rst-content .wy-alert-danger.important .admonition-title,.rst-content .wy-alert-danger.tip .admonition-title,.rst-content .wy-alert-danger.warning .admonition-title,.rst-content .wy-alert-danger.seealso .admonition-title,.rst-content .wy-alert-danger.admonition-todo .admonition-title,.rst-content .wy-alert-danger.admonition .admonition-title{background:#f29f97}.wy-alert.wy-alert-warning,.rst-content .wy-alert-warning.note,.rst-content .attention,.rst-content .caution,.rst-content .wy-alert-warning.danger,.rst-content .wy-alert-warning.error,.rst-content .wy-alert-warning.hint,.rst-content .wy-alert-warning.important,.rst-content .wy-alert-warning.tip,.rst-content .warning,.rst-content .wy-alert-warning.seealso,.rst-content .admonition-todo,.rst-content .wy-alert-warning.admonition{background:#ffedcc}.wy-alert.wy-alert-warning .wy-alert-title,.rst-content .wy-alert-warning.note .wy-alert-title,.rst-content .attention .wy-alert-title,.rst-content .caution .wy-alert-title,.rst-content .wy-alert-warning.danger .wy-alert-title,.rst-content .wy-alert-warning.error .wy-alert-title,.rst-content .wy-alert-warning.hint .wy-alert-title,.rst-content .wy-alert-warning.important .wy-alert-title,.rst-content .wy-alert-warning.tip .wy-alert-title,.rst-content .warning .wy-alert-title,.rst-content .wy-alert-warning.seealso .wy-alert-title,.rst-content .admonition-todo .wy-alert-title,.rst-content .wy-alert-warning.admonition .wy-alert-title,.wy-alert.wy-alert-warning .rst-content .admonition-title,.rst-content .wy-alert.wy-alert-warning .admonition-title,.rst-content .wy-alert-warning.note .admonition-title,.rst-content .attention .admonition-title,.rst-content .caution .admonition-title,.rst-content .wy-alert-warning.danger .admonition-title,.rst-content .wy-alert-warning.error .admonition-title,.rst-content .wy-alert-warning.hint .admonition-title,.rst-content .wy-alert-warning.important .admonition-title,.rst-content .wy-alert-warning.tip .admonition-title,.rst-content .warning .admonition-title,.rst-content .wy-alert-warning.seealso .admonition-title,.rst-content .admonition-todo .admonition-title,.rst-content .wy-alert-warning.admonition .admonition-title{background:#f0b37e}.wy-alert.wy-alert-info,.rst-content .note,.rst-content .wy-alert-info.attention,.rst-content .wy-alert-info.caution,.rst-content .wy-alert-info.danger,.rst-content .wy-alert-info.error,.rst-content .wy-alert-info.hint,.rst-content .wy-alert-info.important,.rst-content .wy-alert-info.tip,.rst-content .wy-alert-info.warning,.rst-content .seealso,.rst-content .wy-alert-info.admonition-todo,.rst-content .wy-alert-info.admonition{background:#e7f2fa}.wy-alert.wy-alert-info .wy-alert-title,.rst-content .note .wy-alert-title,.rst-content .wy-alert-info.attention .wy-alert-title,.rst-content .wy-alert-info.caution .wy-alert-title,.rst-content .wy-alert-info.danger .wy-alert-title,.rst-content .wy-alert-info.error .wy-alert-title,.rst-content .wy-alert-info.hint .wy-alert-title,.rst-content .wy-alert-info.important .wy-alert-title,.rst-content .wy-alert-info.tip .wy-alert-title,.rst-content .wy-alert-info.warning .wy-alert-title,.rst-content .seealso .wy-alert-title,.rst-content .wy-alert-info.admonition-todo .wy-alert-title,.rst-content .wy-alert-info.admonition .wy-alert-title,.wy-alert.wy-alert-info .rst-content .admonition-title,.rst-content .wy-alert.wy-alert-info .admonition-title,.rst-content .note .admonition-title,.rst-content .wy-alert-info.attention .admonition-title,.rst-content .wy-alert-info.caution .admonition-title,.rst-content .wy-alert-info.danger .admonition-title,.rst-content .wy-alert-info.error .admonition-title,.rst-content .wy-alert-info.hint .admonition-title,.rst-content .wy-alert-info.important .admonition-title,.rst-content .wy-alert-info.tip .admonition-title,.rst-content .wy-alert-info.warning .admonition-title,.rst-content .seealso .admonition-title,.rst-content .wy-alert-info.admonition-todo .admonition-title,.rst-content .wy-alert-info.admonition .admonition-title{background:#6ab0de}.wy-alert.wy-alert-success,.rst-content .wy-alert-success.note,.rst-content .wy-alert-success.attention,.rst-content .wy-alert-success.caution,.rst-content .wy-alert-success.danger,.rst-content .wy-alert-success.error,.rst-content .hint,.rst-content .important,.rst-content .tip,.rst-content .wy-alert-success.warning,.rst-content .wy-alert-success.seealso,.rst-content .wy-alert-success.admonition-todo,.rst-content .wy-alert-success.admonition{background:#dbfaf4}.wy-alert.wy-alert-success .wy-alert-title,.rst-content .wy-alert-success.note .wy-alert-title,.rst-content .wy-alert-success.attention .wy-alert-title,.rst-content .wy-alert-success.caution .wy-alert-title,.rst-content .wy-alert-success.danger .wy-alert-title,.rst-content .wy-alert-success.error .wy-alert-title,.rst-content .hint .wy-alert-title,.rst-content .important .wy-alert-title,.rst-content .tip .wy-alert-title,.rst-content .wy-alert-success.warning .wy-alert-title,.rst-content .wy-alert-success.seealso .wy-alert-title,.rst-content .wy-alert-success.admonition-todo .wy-alert-title,.rst-content .wy-alert-success.admonition .wy-alert-title,.wy-alert.wy-alert-success .rst-content .admonition-title,.rst-content .wy-alert.wy-alert-success .admonition-title,.rst-content .wy-alert-success.note .admonition-title,.rst-content .wy-alert-success.attention .admonition-title,.rst-content .wy-alert-success.caution .admonition-title,.rst-content .wy-alert-success.danger .admonition-title,.rst-content .wy-alert-success.error .admonition-title,.rst-content .hint .admonition-title,.rst-content .important .admonition-title,.rst-content .tip .admonition-title,.rst-content .wy-alert-success.warning .admonition-title,.rst-content .wy-alert-success.seealso .admonition-title,.rst-content .wy-alert-success.admonition-todo .admonition-title,.rst-content .wy-alert-success.admonition .admonition-title{background:#1abc9c}.wy-alert.wy-alert-neutral,.rst-content .wy-alert-neutral.note,.rst-content .wy-alert-neutral.attention,.rst-content .wy-alert-neutral.caution,.rst-content .wy-alert-neutral.danger,.rst-content .wy-alert-neutral.error,.rst-content .wy-alert-neutral.hint,.rst-content .wy-alert-neutral.important,.rst-content .wy-alert-neutral.tip,.rst-content .wy-alert-neutral.warning,.rst-content .wy-alert-neutral.seealso,.rst-content .wy-alert-neutral.admonition-todo,.rst-content .wy-alert-neutral.admonition{background:#f3f6f6}.wy-alert.wy-alert-neutral .wy-alert-title,.rst-content .wy-alert-neutral.note .wy-alert-title,.rst-content .wy-alert-neutral.attention .wy-alert-title,.rst-content .wy-alert-neutral.caution .wy-alert-title,.rst-content .wy-alert-neutral.danger .wy-alert-title,.rst-content .wy-alert-neutral.error .wy-alert-title,.rst-content .wy-alert-neutral.hint .wy-alert-title,.rst-content .wy-alert-neutral.important .wy-alert-title,.rst-content .wy-alert-neutral.tip .wy-alert-title,.rst-content .wy-alert-neutral.warning .wy-alert-title,.rst-content .wy-alert-neutral.seealso .wy-alert-title,.rst-content .wy-alert-neutral.admonition-todo .wy-alert-title,.rst-content .wy-alert-neutral.admonition .wy-alert-title,.wy-alert.wy-alert-neutral .rst-content .admonition-title,.rst-content .wy-alert.wy-alert-neutral .admonition-title,.rst-content .wy-alert-neutral.note .admonition-title,.rst-content .wy-alert-neutral.attention .admonition-title,.rst-content .wy-alert-neutral.caution .admonition-title,.rst-content .wy-alert-neutral.danger .admonition-title,.rst-content .wy-alert-neutral.error .admonition-title,.rst-content .wy-alert-neutral.hint .admonition-title,.rst-content .wy-alert-neutral.important .admonition-title,.rst-content .wy-alert-neutral.tip .admonition-title,.rst-content .wy-alert-neutral.warning .admonition-title,.rst-content .wy-alert-neutral.seealso .admonition-title,.rst-content .wy-alert-neutral.admonition-todo .admonition-title,.rst-content .wy-alert-neutral.admonition .admonition-title{color:#404040;background:#e1e4e5}.wy-alert.wy-alert-neutral a,.rst-content .wy-alert-neutral.note a,.rst-content .wy-alert-neutral.attention a,.rst-content .wy-alert-neutral.caution a,.rst-content .wy-alert-neutral.danger a,.rst-content .wy-alert-neutral.error a,.rst-content .wy-alert-neutral.hint a,.rst-content .wy-alert-neutral.important a,.rst-content .wy-alert-neutral.tip a,.rst-content .wy-alert-neutral.warning a,.rst-content .wy-alert-neutral.seealso a,.rst-content .wy-alert-neutral.admonition-todo a,.rst-content .wy-alert-neutral.admonition a{color:#d00000}.wy-alert p:last-child,.rst-content .note p:last-child,.rst-content .attention p:last-child,.rst-content .caution p:last-child,.rst-content .danger p:last-child,.rst-content .error p:last-child,.rst-content .hint p:last-child,.rst-content .important p:last-child,.rst-content .tip p:last-child,.rst-content .warning p:last-child,.rst-content .seealso p:last-child,.rst-content .admonition-todo p:last-child,.rst-content .admonition p:last-child{margin-bottom:0}.wy-tray-container{position:fixed;bottom:0px;left:0;z-index:600}.wy-tray-container li{display:block;width:300px;background:transparent;color:#fff;text-align:center;box-shadow:0 5px 5px 0 rgba(0,0,0,0.1);padding:0 24px;min-width:20%;opacity:0;height:0;line-height:56px;overflow:hidden;-webkit-transition:all .3s ease-in;-moz-transition:all .3s ease-in;transition:all .3s ease-in}.wy-tray-container li.wy-tray-item-success{background:#27AE60}.wy-tray-container li.wy-tray-item-info{background:#d00000}.wy-tray-container li.wy-tray-item-warning{background:#E67E22}.wy-tray-container li.wy-tray-item-danger{background:#E74C3C}.wy-tray-container li.on{opacity:1;height:56px}@media screen and (max-width: 768px){.wy-tray-container{bottom:auto;top:0;width:100%}.wy-tray-container li{width:100%}}button{font-size:100%;margin:0;vertical-align:baseline;*vertical-align:middle;cursor:pointer;line-height:normal;-webkit-appearance:button;*overflow:visible}button::-moz-focus-inner,input::-moz-focus-inner{border:0;padding:0}button[disabled]{cursor:default}.btn{display:inline-block;border-radius:2px;line-height:normal;white-space:nowrap;text-align:center;cursor:pointer;font-size:100%;padding:6px 12px 8px 12px;color:#fff;border:1px solid rgba(0,0,0,0.1);background-color:#27AE60;text-decoration:none;font-weight:normal;font-family:"Lato","proxima-nova","Helvetica Neue",Arial,sans-serif;box-shadow:0px 1px 2px -1px rgba(255,255,255,0.5) inset,0px -2px 0px 0px rgba(0,0,0,0.1) inset;outline-none:false;vertical-align:middle;*display:inline;zoom:1;-webkit-user-drag:none;-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none;-webkit-transition:all .1s linear;-moz-transition:all .1s linear;transition:all .1s linear}.btn-hover{background:#2e8ece;color:#fff}.btn:hover{background:#2cc36b;color:#fff}.btn:focus{background:#2cc36b;outline:0}.btn:active{box-shadow:0px -1px 0px 0px rgba(0,0,0,0.05) inset,0px 2px 0px 0px rgba(0,0,0,0.1) inset;padding:8px 12px 6px 12px}.btn:visited{color:#fff}.btn:disabled{background-image:none;filter:progid:DXImageTransform.Microsoft.gradient(enabled = false);filter:alpha(opacity=40);opacity:.4;cursor:not-allowed;box-shadow:none}.btn-disabled{background-image:none;filter:progid:DXImageTransform.Microsoft.gradient(enabled = false);filter:alpha(opacity=40);opacity:.4;cursor:not-allowed;box-shadow:none}.btn-disabled:hover,.btn-disabled:focus,.btn-disabled:active{background-image:none;filter:progid:DXImageTransform.Microsoft.gradient(enabled = false);filter:alpha(opacity=40);opacity:.4;cursor:not-allowed;box-shadow:none}.btn::-moz-focus-inner{padding:0;border:0}.btn-small{font-size:80%}.btn-info{background-color:#d00000 !important}.btn-info:hover{background-color:#2e8ece !important}.btn-neutral{background-color:#f3f6f6 !important;color:#404040 !important}.btn-neutral:hover{background-color:#e5ebeb !important;color:#404040}.btn-neutral:visited{color:#404040 !important}.btn-success{background-color:#27AE60 !important}.btn-success:hover{background-color:#295 !important}.btn-danger{background-color:#E74C3C !important}.btn-danger:hover{background-color:#ea6153 !important}.btn-warning{background-color:#E67E22 !important}.btn-warning:hover{background-color:#e98b39 !important}.btn-invert{background-color:#222}.btn-invert:hover{background-color:#2f2f2f !important}.btn-link{background-color:transparent !important;color:#d00000;box-shadow:none;border-color:transparent !important}.btn-link:hover{background-color:transparent !important;color:#409ad5 !important;box-shadow:none}.btn-link:active{background-color:transparent !important;color:#409ad5 !important;box-shadow:none}.btn-link:visited{color:#8E4A4A}.wy-btn-group .btn,.wy-control .btn{vertical-align:middle}.wy-btn-group{margin-bottom:24px;*zoom:1}.wy-btn-group:before,.wy-btn-group:after{display:table;content:""}.wy-btn-group:after{clear:both}.wy-dropdown{position:relative;display:inline-block}.wy-dropdown-active .wy-dropdown-menu{display:block}.wy-dropdown-menu{position:absolute;left:0;display:none;float:left;top:100%;min-width:100%;background:#fcfcfc;z-index:100;border:solid 1px #cfd7dd;box-shadow:0 2px 2px 0 rgba(0,0,0,0.1);padding:12px}.wy-dropdown-menu>dd>a{display:block;clear:both;color:#404040;white-space:nowrap;font-size:90%;padding:0 12px;cursor:pointer}.wy-dropdown-menu>dd>a:hover{background:#d00000;color:#fff}.wy-dropdown-menu>dd.divider{border-top:solid 1px #cfd7dd;margin:6px 0}.wy-dropdown-menu>dd.search{padding-bottom:12px}.wy-dropdown-menu>dd.search input[type="search"]{width:100%}.wy-dropdown-menu>dd.call-to-action{background:#e3e3e3;text-transform:uppercase;font-weight:500;font-size:80%}.wy-dropdown-menu>dd.call-to-action:hover{background:#e3e3e3}.wy-dropdown-menu>dd.call-to-action .btn{color:#fff}.wy-dropdown.wy-dropdown-up .wy-dropdown-menu{bottom:100%;top:auto;left:auto;right:0}.wy-dropdown.wy-dropdown-bubble .wy-dropdown-menu{background:#fcfcfc;margin-top:2px}.wy-dropdown.wy-dropdown-bubble .wy-dropdown-menu a{padding:6px 12px}.wy-dropdown.wy-dropdown-bubble .wy-dropdown-menu a:hover{background:#d00000;color:#fff}.wy-dropdown.wy-dropdown-left .wy-dropdown-menu{right:0;left:auto;text-align:right}.wy-dropdown-arrow:before{content:" ";border-bottom:5px solid #f5f5f5;border-left:5px solid transparent;border-right:5px solid transparent;position:absolute;display:block;top:-4px;left:50%;margin-left:-3px}.wy-dropdown-arrow.wy-dropdown-arrow-left:before{left:11px}.wy-form-stacked select{display:block}.wy-form-aligned input,.wy-form-aligned textarea,.wy-form-aligned select,.wy-form-aligned .wy-help-inline,.wy-form-aligned label{display:inline-block;*display:inline;*zoom:1;vertical-align:middle}.wy-form-aligned .wy-control-group>label{display:inline-block;vertical-align:middle;width:10em;margin:6px 12px 0 0;float:left}.wy-form-aligned .wy-control{float:left}.wy-form-aligned .wy-control label{display:block}.wy-form-aligned .wy-control select{margin-top:6px}fieldset{border:0;margin:0;padding:0}legend{display:block;width:100%;border:0;padding:0;white-space:normal;margin-bottom:24px;font-size:150%;*margin-left:-7px}label{display:block;margin:0 0 .3125em 0;color:#333;font-size:90%}input,select,textarea{font-size:100%;margin:0;vertical-align:baseline;*vertical-align:middle}.wy-control-group{margin-bottom:24px;*zoom:1;max-width:68em;margin-left:auto;margin-right:auto;*zoom:1}.wy-control-group:before,.wy-control-group:after{display:table;content:""}.wy-control-group:after{clear:both}.wy-control-group:before,.wy-control-group:after{display:table;content:""}.wy-control-group:after{clear:both}.wy-control-group.wy-control-group-required>label:after{content:" *";color:#E74C3C}.wy-control-group .wy-form-full,.wy-control-group .wy-form-halves,.wy-control-group .wy-form-thirds{padding-bottom:12px}.wy-control-group .wy-form-full select,.wy-control-group .wy-form-halves select,.wy-control-group .wy-form-thirds select{width:100%}.wy-control-group .wy-form-full input[type="text"],.wy-control-group .wy-form-full input[type="password"],.wy-control-group .wy-form-full input[type="email"],.wy-control-group .wy-form-full input[type="url"],.wy-control-group .wy-form-full input[type="date"],.wy-control-group .wy-form-full input[type="month"],.wy-control-group .wy-form-full input[type="time"],.wy-control-group .wy-form-full input[type="datetime"],.wy-control-group .wy-form-full input[type="datetime-local"],.wy-control-group .wy-form-full input[type="week"],.wy-control-group .wy-form-full input[type="number"],.wy-control-group .wy-form-full input[type="search"],.wy-control-group .wy-form-full input[type="tel"],.wy-control-group .wy-form-full input[type="color"],.wy-control-group .wy-form-halves input[type="text"],.wy-control-group .wy-form-halves input[type="password"],.wy-control-group .wy-form-halves input[type="email"],.wy-control-group .wy-form-halves input[type="url"],.wy-control-group .wy-form-halves input[type="date"],.wy-control-group .wy-form-halves input[type="month"],.wy-control-group .wy-form-halves input[type="time"],.wy-control-group .wy-form-halves input[type="datetime"],.wy-control-group .wy-form-halves input[type="datetime-local"],.wy-control-group .wy-form-halves input[type="week"],.wy-control-group .wy-form-halves input[type="number"],.wy-control-group .wy-form-halves input[type="search"],.wy-control-group .wy-form-halves input[type="tel"],.wy-control-group .wy-form-halves input[type="color"],.wy-control-group .wy-form-thirds input[type="text"],.wy-control-group .wy-form-thirds input[type="password"],.wy-control-group .wy-form-thirds input[type="email"],.wy-control-group .wy-form-thirds input[type="url"],.wy-control-group .wy-form-thirds input[type="date"],.wy-control-group .wy-form-thirds input[type="month"],.wy-control-group .wy-form-thirds input[type="time"],.wy-control-group .wy-form-thirds input[type="datetime"],.wy-control-group .wy-form-thirds input[type="datetime-local"],.wy-control-group .wy-form-thirds input[type="week"],.wy-control-group .wy-form-thirds input[type="number"],.wy-control-group .wy-form-thirds input[type="search"],.wy-control-group .wy-form-thirds input[type="tel"],.wy-control-group .wy-form-thirds input[type="color"]{width:100%}.wy-control-group .wy-form-full{float:left;display:block;margin-right:2.3576515979%;width:100%;margin-right:0}.wy-control-group .wy-form-full:last-child{margin-right:0}.wy-control-group .wy-form-halves{float:left;display:block;margin-right:2.3576515979%;width:48.821174201%}.wy-control-group .wy-form-halves:last-child{margin-right:0}.wy-control-group .wy-form-halves:nth-of-type(2n){margin-right:0}.wy-control-group .wy-form-halves:nth-of-type(2n+1){clear:left}.wy-control-group .wy-form-thirds{float:left;display:block;margin-right:2.3576515979%;width:31.7615656014%}.wy-control-group .wy-form-thirds:last-child{margin-right:0}.wy-control-group .wy-form-thirds:nth-of-type(3n){margin-right:0}.wy-control-group .wy-form-thirds:nth-of-type(3n+1){clear:left}.wy-control-group.wy-control-group-no-input .wy-control{margin:6px 0 0 0;font-size:90%}.wy-control-no-input{display:inline-block;margin:6px 0 0 0;font-size:90%}.wy-control-group.fluid-input input[type="text"],.wy-control-group.fluid-input input[type="password"],.wy-control-group.fluid-input input[type="email"],.wy-control-group.fluid-input input[type="url"],.wy-control-group.fluid-input input[type="date"],.wy-control-group.fluid-input input[type="month"],.wy-control-group.fluid-input input[type="time"],.wy-control-group.fluid-input input[type="datetime"],.wy-control-group.fluid-input input[type="datetime-local"],.wy-control-group.fluid-input input[type="week"],.wy-control-group.fluid-input input[type="number"],.wy-control-group.fluid-input input[type="search"],.wy-control-group.fluid-input input[type="tel"],.wy-control-group.fluid-input input[type="color"]{width:100%}.wy-form-message-inline{display:inline-block;padding-left:.3em;color:#666;vertical-align:middle;font-size:90%}.wy-form-message{display:block;color:#999;font-size:70%;margin-top:.3125em;font-style:italic}.wy-form-message p{font-size:inherit;font-style:italic;margin-bottom:6px}.wy-form-message p:last-child{margin-bottom:0}input{line-height:normal}input[type="button"],input[type="reset"],input[type="submit"]{-webkit-appearance:button;cursor:pointer;font-family:"Lato","proxima-nova","Helvetica Neue",Arial,sans-serif;*overflow:visible}input[type="text"],input[type="password"],input[type="email"],input[type="url"],input[type="date"],input[type="month"],input[type="time"],input[type="datetime"],input[type="datetime-local"],input[type="week"],input[type="number"],input[type="search"],input[type="tel"],input[type="color"]{-webkit-appearance:none;padding:6px;display:inline-block;border:1px solid #ccc;font-size:80%;font-family:"Lato","proxima-nova","Helvetica Neue",Arial,sans-serif;box-shadow:inset 0 1px 3px #ddd;border-radius:0;-webkit-transition:border .3s linear;-moz-transition:border .3s linear;transition:border .3s linear}input[type="datetime-local"]{padding:.34375em .625em}input[disabled]{cursor:default}input[type="checkbox"],input[type="radio"]{-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box;padding:0;margin-right:.3125em;*height:13px;*width:13px}input[type="search"]{-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box}input[type="search"]::-webkit-search-cancel-button,input[type="search"]::-webkit-search-decoration{-webkit-appearance:none}input[type="text"]:focus,input[type="password"]:focus,input[type="email"]:focus,input[type="url"]:focus,input[type="date"]:focus,input[type="month"]:focus,input[type="time"]:focus,input[type="datetime"]:focus,input[type="datetime-local"]:focus,input[type="week"]:focus,input[type="number"]:focus,input[type="search"]:focus,input[type="tel"]:focus,input[type="color"]:focus{outline:0;outline:thin dotted \9;border-color:#333}input.no-focus:focus{border-color:#ccc !important}input[type="file"]:focus,input[type="radio"]:focus,input[type="checkbox"]:focus{outline:thin dotted #333;outline:1px auto #129FEA}input[type="text"][disabled],input[type="password"][disabled],input[type="email"][disabled],input[type="url"][disabled],input[type="date"][disabled],input[type="month"][disabled],input[type="time"][disabled],input[type="datetime"][disabled],input[type="datetime-local"][disabled],input[type="week"][disabled],input[type="number"][disabled],input[type="search"][disabled],input[type="tel"][disabled],input[type="color"][disabled]{cursor:not-allowed;background-color:#fafafa}input:focus:invalid,textarea:focus:invalid,select:focus:invalid{color:#E74C3C;border:1px solid #E74C3C}input:focus:invalid:focus,textarea:focus:invalid:focus,select:focus:invalid:focus{border-color:#E74C3C}input[type="file"]:focus:invalid:focus,input[type="radio"]:focus:invalid:focus,input[type="checkbox"]:focus:invalid:focus{outline-color:#E74C3C}input.wy-input-large{padding:12px;font-size:100%}textarea{overflow:auto;vertical-align:top;width:100%;font-family:"Lato","proxima-nova","Helvetica Neue",Arial,sans-serif}select,textarea{padding:.5em .625em;display:inline-block;border:1px solid #ccc;font-size:80%;box-shadow:inset 0 1px 3px #ddd;-webkit-transition:border .3s linear;-moz-transition:border .3s linear;transition:border .3s linear}select{border:1px solid #ccc;background-color:#fff}select[multiple]{height:auto}select:focus,textarea:focus{outline:0}select[disabled],textarea[disabled],input[readonly],select[readonly],textarea[readonly]{cursor:not-allowed;background-color:#fafafa}input[type="radio"][disabled],input[type="checkbox"][disabled]{cursor:not-allowed}.wy-checkbox,.wy-radio{margin:6px 0;color:#404040;display:block}.wy-checkbox input,.wy-radio input{vertical-align:baseline}.wy-form-message-inline{display:inline-block;*display:inline;*zoom:1;vertical-align:middle}.wy-input-prefix,.wy-input-suffix{white-space:nowrap;padding:6px}.wy-input-prefix .wy-input-context,.wy-input-suffix .wy-input-context{line-height:27px;padding:0 8px;display:inline-block;font-size:80%;background-color:#f3f6f6;border:solid 1px #ccc;color:#999}.wy-input-suffix .wy-input-context{border-left:0}.wy-input-prefix .wy-input-context{border-right:0}.wy-switch{position:relative;display:block;height:24px;margin-top:12px;cursor:pointer}.wy-switch:before{position:absolute;content:"";display:block;left:0;top:0;width:36px;height:12px;border-radius:4px;background:#ccc;-webkit-transition:all .2s ease-in-out;-moz-transition:all .2s ease-in-out;transition:all .2s ease-in-out}.wy-switch:after{position:absolute;content:"";display:block;width:18px;height:18px;border-radius:4px;background:#999;left:-3px;top:-3px;-webkit-transition:all .2s ease-in-out;-moz-transition:all .2s ease-in-out;transition:all .2s ease-in-out}.wy-switch span{position:absolute;left:48px;display:block;font-size:12px;color:#ccc;line-height:1}.wy-switch.active:before{background:#1e8449}.wy-switch.active:after{left:24px;background:#27AE60}.wy-switch.disabled{cursor:not-allowed;opacity:.8}.wy-control-group.wy-control-group-error .wy-form-message,.wy-control-group.wy-control-group-error>label{color:#E74C3C}.wy-control-group.wy-control-group-error input[type="text"],.wy-control-group.wy-control-group-error input[type="password"],.wy-control-group.wy-control-group-error input[type="email"],.wy-control-group.wy-control-group-error input[type="url"],.wy-control-group.wy-control-group-error input[type="date"],.wy-control-group.wy-control-group-error input[type="month"],.wy-control-group.wy-control-group-error input[type="time"],.wy-control-group.wy-control-group-error input[type="datetime"],.wy-control-group.wy-control-group-error input[type="datetime-local"],.wy-control-group.wy-control-group-error input[type="week"],.wy-control-group.wy-control-group-error input[type="number"],.wy-control-group.wy-control-group-error input[type="search"],.wy-control-group.wy-control-group-error input[type="tel"],.wy-control-group.wy-control-group-error input[type="color"]{border:solid 1px #E74C3C}.wy-control-group.wy-control-group-error textarea{border:solid 1px #E74C3C}.wy-inline-validate{white-space:nowrap}.wy-inline-validate .wy-input-context{padding:.5em .625em;display:inline-block;font-size:80%}.wy-inline-validate.wy-inline-validate-success .wy-input-context{color:#27AE60}.wy-inline-validate.wy-inline-validate-danger .wy-input-context{color:#E74C3C}.wy-inline-validate.wy-inline-validate-warning .wy-input-context{color:#E67E22}.wy-inline-validate.wy-inline-validate-info .wy-input-context{color:#d00000}.rotate-90{-webkit-transform:rotate(90deg);-moz-transform:rotate(90deg);-ms-transform:rotate(90deg);-o-transform:rotate(90deg);transform:rotate(90deg)}.rotate-180{-webkit-transform:rotate(180deg);-moz-transform:rotate(180deg);-ms-transform:rotate(180deg);-o-transform:rotate(180deg);transform:rotate(180deg)}.rotate-270{-webkit-transform:rotate(270deg);-moz-transform:rotate(270deg);-ms-transform:rotate(270deg);-o-transform:rotate(270deg);transform:rotate(270deg)}.mirror{-webkit-transform:scaleX(-1);-moz-transform:scaleX(-1);-ms-transform:scaleX(-1);-o-transform:scaleX(-1);transform:scaleX(-1)}.mirror.rotate-90{-webkit-transform:scaleX(-1) rotate(90deg);-moz-transform:scaleX(-1) rotate(90deg);-ms-transform:scaleX(-1) rotate(90deg);-o-transform:scaleX(-1) rotate(90deg);transform:scaleX(-1) rotate(90deg)}.mirror.rotate-180{-webkit-transform:scaleX(-1) rotate(180deg);-moz-transform:scaleX(-1) rotate(180deg);-ms-transform:scaleX(-1) rotate(180deg);-o-transform:scaleX(-1) rotate(180deg);transform:scaleX(-1) rotate(180deg)}.mirror.rotate-270{-webkit-transform:scaleX(-1) rotate(270deg);-moz-transform:scaleX(-1) rotate(270deg);-ms-transform:scaleX(-1) rotate(270deg);-o-transform:scaleX(-1) rotate(270deg);transform:scaleX(-1) rotate(270deg)}@media only screen and (max-width: 480px){.wy-form button[type="submit"]{margin:.7em 0 0}.wy-form input[type="text"],.wy-form input[type="password"],.wy-form input[type="email"],.wy-form input[type="url"],.wy-form input[type="date"],.wy-form input[type="month"],.wy-form input[type="time"],.wy-form input[type="datetime"],.wy-form input[type="datetime-local"],.wy-form input[type="week"],.wy-form input[type="number"],.wy-form input[type="search"],.wy-form input[type="tel"],.wy-form input[type="color"]{margin-bottom:.3em;display:block}.wy-form label{margin-bottom:.3em;display:block}.wy-form input[type="password"],.wy-form input[type="email"],.wy-form input[type="url"],.wy-form input[type="date"],.wy-form input[type="month"],.wy-form input[type="time"],.wy-form input[type="datetime"],.wy-form input[type="datetime-local"],.wy-form input[type="week"],.wy-form input[type="number"],.wy-form input[type="search"],.wy-form input[type="tel"],.wy-form input[type="color"]{margin-bottom:0}.wy-form-aligned .wy-control-group label{margin-bottom:.3em;text-align:left;display:block;width:100%}.wy-form-aligned .wy-control{margin:1.5em 0 0 0}.wy-form .wy-help-inline,.wy-form-message-inline,.wy-form-message{display:block;font-size:80%;padding:6px 0}}@media screen and (max-width: 768px){.tablet-hide{display:none}}@media screen and (max-width: 480px){.mobile-hide{display:none}}.float-left{float:left}.float-right{float:right}.full-width{width:100%}.wy-table,.rst-content table.docutils,.rst-content table.field-list{border-collapse:collapse;border-spacing:0;empty-cells:show;margin-bottom:24px}.wy-table caption,.rst-content table.docutils caption,.rst-content table.field-list caption{color:#000;font:italic 85%/1 arial,sans-serif;padding:1em 0;text-align:center}.wy-table td,.rst-content table.docutils td,.rst-content table.field-list td,.wy-table th,.rst-content table.docutils th,.rst-content table.field-list th{font-size:90%;margin:0;overflow:visible;padding:8px 16px}.wy-table td:first-child,.rst-content table.docutils td:first-child,.rst-content table.field-list td:first-child,.wy-table th:first-child,.rst-content table.docutils th:first-child,.rst-content table.field-list th:first-child{border-left-width:0}.wy-table thead,.rst-content table.docutils thead,.rst-content table.field-list thead{color:#000;text-align:left;vertical-align:bottom;white-space:nowrap}.wy-table thead th,.rst-content table.docutils thead th,.rst-content table.field-list thead th{font-weight:bold;border-bottom:solid 2px #e1e4e5}.wy-table td,.rst-content table.docutils td,.rst-content table.field-list td{background-color:transparent;vertical-align:middle}.wy-table td p,.rst-content table.docutils td p,.rst-content table.field-list td p{line-height:18px}.wy-table td p:last-child,.rst-content table.docutils td p:last-child,.rst-content table.field-list td p:last-child{margin-bottom:0}.wy-table .wy-table-cell-min,.rst-content table.docutils .wy-table-cell-min,.rst-content table.field-list .wy-table-cell-min{width:1%;padding-right:0}.wy-table .wy-table-cell-min input[type=checkbox],.rst-content table.docutils .wy-table-cell-min input[type=checkbox],.rst-content table.field-list .wy-table-cell-min input[type=checkbox],.wy-table .wy-table-cell-min input[type=checkbox],.rst-content table.docutils .wy-table-cell-min input[type=checkbox],.rst-content table.field-list .wy-table-cell-min input[type=checkbox]{margin:0}.wy-table-secondary{color:gray;font-size:90%}.wy-table-tertiary{color:gray;font-size:80%}.wy-table-odd td,.wy-table-striped tr:nth-child(2n-1) td,.rst-content table.docutils:not(.field-list) tr:nth-child(2n-1) td{background-color:#f3f6f6}.wy-table-backed{background-color:#f3f6f6}.wy-table-bordered-all,.rst-content table.docutils{border:1px solid #e1e4e5}.wy-table-bordered-all td,.rst-content table.docutils td{border-bottom:1px solid #e1e4e5;border-left:1px solid #e1e4e5}.wy-table-bordered-all tbody>tr:last-child td,.rst-content table.docutils tbody>tr:last-child td{border-bottom-width:0}.wy-table-bordered{border:1px solid #e1e4e5}.wy-table-bordered-rows td{border-bottom:1px solid #e1e4e5}.wy-table-bordered-rows tbody>tr:last-child td{border-bottom-width:0}.wy-table-horizontal tbody>tr:last-child td{border-bottom-width:0}.wy-table-horizontal td,.wy-table-horizontal th{border-width:0 0 1px 0;border-bottom:1px solid #e1e4e5}.wy-table-horizontal tbody>tr:last-child td{border-bottom-width:0}.wy-table-responsive{margin-bottom:24px;max-width:100%;overflow:auto}.wy-table-responsive table{margin-bottom:0 !important}.wy-table-responsive table td,.wy-table-responsive table th{white-space:nowrap}a{color:#d00000;text-decoration:none;cursor:pointer}a:hover{color:#3091d1}a:visited{color:#8E4A4A}html{height:100%;overflow-x:hidden}body{font-family:"Lato","proxima-nova","Helvetica Neue",Arial,sans-serif;font-weight:normal;color:#404040;min-height:100%;overflow-x:hidden;background:#edf0f2}.wy-text-left{text-align:left}.wy-text-center{text-align:center}.wy-text-right{text-align:right}.wy-text-large{font-size:120%}.wy-text-normal{font-size:100%}.wy-text-small,small{font-size:80%}.wy-text-strike{text-decoration:line-through}.wy-text-warning{color:#E67E22 !important}a.wy-text-warning:hover{color:#eb9950 !important}.wy-text-info{color:#d00000 !important}a.wy-text-info:hover{color:#409ad5 !important}.wy-text-success{color:#27AE60 !important}a.wy-text-success:hover{color:#36d278 !important}.wy-text-danger{color:#E74C3C !important}a.wy-text-danger:hover{color:#ed7669 !important}.wy-text-neutral{color:#404040 !important}a.wy-text-neutral:hover{color:#595959 !important}h1,h2,.rst-content .toctree-wrapper p.caption,h3,h4,h5,h6,legend{margin-top:0;font-weight:700;font-family:"Roboto Slab","ff-tisa-web-pro","Georgia",Arial,sans-serif}p{line-height:24px;margin:0;font-size:16px;margin-bottom:24px}h1{font-size:175%}h2,.rst-content .toctree-wrapper p.caption{font-size:150%}h3{font-size:125%}h4{font-size:115%}h5{font-size:110%}h6{font-size:100%}hr{display:block;height:1px;border:0;border-top:1px solid #e1e4e5;margin:24px 0;padding:0}code,.rst-content tt,.rst-content code{white-space:nowrap;max-width:100%;background:#fff;border:solid 1px #e1e4e5;font-size:75%;padding:0 5px;font-family:SFMono-Regular,Menlo,Monaco,Consolas,"Liberation Mono","Courier New",Courier,monospace;color:#E74C3C;overflow-x:auto}code.code-large,.rst-content tt.code-large{font-size:90%}.wy-plain-list-disc,.rst-content .section ul,.rst-content .toctree-wrapper ul,article ul{list-style:disc;line-height:24px;margin-bottom:24px}.wy-plain-list-disc li,.rst-content .section ul li,.rst-content .toctree-wrapper ul li,article ul li{list-style:disc;margin-left:24px}.wy-plain-list-disc li p:last-child,.rst-content .section ul li p:last-child,.rst-content .toctree-wrapper ul li p:last-child,article ul li p:last-child{margin-bottom:0}.wy-plain-list-disc li ul,.rst-content .section ul li ul,.rst-content .toctree-wrapper ul li ul,article ul li ul{margin-bottom:0}.wy-plain-list-disc li li,.rst-content .section ul li li,.rst-content .toctree-wrapper ul li li,article ul li li{list-style:circle}.wy-plain-list-disc li li li,.rst-content .section ul li li li,.rst-content .toctree-wrapper ul li li li,article ul li li li{list-style:square}.wy-plain-list-disc li ol li,.rst-content .section ul li ol li,.rst-content .toctree-wrapper ul li ol li,article ul li ol li{list-style:decimal}.wy-plain-list-decimal,.rst-content .section ol,.rst-content ol.arabic,article ol{list-style:decimal;line-height:24px;margin-bottom:24px}.wy-plain-list-decimal li,.rst-content .section ol li,.rst-content ol.arabic li,article ol li{list-style:decimal;margin-left:24px}.wy-plain-list-decimal li p:last-child,.rst-content .section ol li p:last-child,.rst-content ol.arabic li p:last-child,article ol li p:last-child{margin-bottom:0}.wy-plain-list-decimal li ul,.rst-content .section ol li ul,.rst-content ol.arabic li ul,article ol li ul{margin-bottom:0}.wy-plain-list-decimal li ul li,.rst-content .section ol li ul li,.rst-content ol.arabic li ul li,article ol li ul li{list-style:disc}.wy-breadcrumbs{*zoom:1}.wy-breadcrumbs:before,.wy-breadcrumbs:after{display:table;content:""}.wy-breadcrumbs:after{clear:both}.wy-breadcrumbs li{display:inline-block}.wy-breadcrumbs li.wy-breadcrumbs-aside{float:right}.wy-breadcrumbs li a{display:inline-block;padding:5px}.wy-breadcrumbs li a:first-child{padding-left:0}.wy-breadcrumbs li code,.wy-breadcrumbs li .rst-content tt,.rst-content .wy-breadcrumbs li tt{padding:5px;border:none;background:none}.wy-breadcrumbs li code.literal,.wy-breadcrumbs li .rst-content tt.literal,.rst-content .wy-breadcrumbs li tt.literal{color:#404040}.wy-breadcrumbs-extra{margin-bottom:0;color:#b3b3b3;font-size:80%;display:inline-block}@media screen and (max-width: 480px){.wy-breadcrumbs-extra{display:none}.wy-breadcrumbs li.wy-breadcrumbs-aside{display:none}}@media print{.wy-breadcrumbs li.wy-breadcrumbs-aside{display:none}}.wy-affix{position:fixed;top:1.618em}.wy-menu a:hover{text-decoration:none}.wy-menu-horiz{*zoom:1}.wy-menu-horiz:before,.wy-menu-horiz:after{display:table;content:""}.wy-menu-horiz:after{clear:both}.wy-menu-horiz ul,.wy-menu-horiz li{display:inline-block}.wy-menu-horiz li:hover{background:rgba(255,255,255,0.1)}.wy-menu-horiz li.divide-left{border-left:solid 1px #404040}.wy-menu-horiz li.divide-right{border-right:solid 1px #404040}.wy-menu-horiz a{height:32px;display:inline-block;line-height:32px;padding:0 16px}.wy-menu-vertical{width:300px}.wy-menu-vertical header,.wy-menu-vertical p.caption{height:32px;display:inline-block;line-height:32px;padding:0 1.618em;margin-bottom:0;display:block;font-weight:bold;text-transform:uppercase;font-size:80%;white-space:nowrap}.wy-menu-vertical ul{margin-bottom:0}.wy-menu-vertical li.divide-top{border-top:solid 1px #404040}.wy-menu-vertical li.divide-bottom{border-bottom:solid 1px #404040}.wy-menu-vertical li.current{background:#e3e3e3}.wy-menu-vertical li.current a{color:gray;border-right:solid 1px #c9c9c9;padding:.4045em 2.427em}.wy-menu-vertical li.current a:hover{background:#d6d6d6}.wy-menu-vertical li code,.wy-menu-vertical li .rst-content tt,.rst-content .wy-menu-vertical li tt{border:none;background:inherit;color:inherit;padding-left:0;padding-right:0}.wy-menu-vertical li span.toctree-expand{display:block;float:left;margin-left:-1.2em;font-size:.8em;line-height:1.6em;color:#4d4d4d}.wy-menu-vertical li.on a,.wy-menu-vertical li.current>a{color:#404040;padding:.4045em 1.618em;font-weight:bold;position:relative;background:#fcfcfc;border:none;padding-left:1.618em -4px}.wy-menu-vertical li.on a:hover,.wy-menu-vertical li.current>a:hover{background:#fcfcfc}.wy-menu-vertical li.on a:hover span.toctree-expand,.wy-menu-vertical li.current>a:hover span.toctree-expand{color:gray}.wy-menu-vertical li.on a span.toctree-expand,.wy-menu-vertical li.current>a span.toctree-expand{display:block;font-size:.8em;line-height:1.6em;color:#333}.wy-menu-vertical li.toctree-l1.current>a{border-bottom:solid 1px #c9c9c9;border-top:solid 1px #c9c9c9}.wy-menu-vertical li.toctree-l2 a,.wy-menu-vertical li.toctree-l3 a,.wy-menu-vertical li.toctree-l4 a{color:#404040}.wy-menu-vertical li.toctree-l1.current li.toctree-l2>ul,.wy-menu-vertical li.toctree-l2.current li.toctree-l3>ul{display:none}.wy-menu-vertical li.toctree-l1.current li.toctree-l2.current>ul,.wy-menu-vertical li.toctree-l2.current li.toctree-l3.current>ul{display:block}.wy-menu-vertical li.toctree-l2.current>a{background:#c9c9c9;padding:.4045em 2.427em}.wy-menu-vertical li.toctree-l2.current li.toctree-l3>a{display:block;background:#c9c9c9;padding:.4045em 4.045em}.wy-menu-vertical li.toctree-l2 a:hover span.toctree-expand{color:gray}.wy-menu-vertical li.toctree-l2 span.toctree-expand{color:#a3a3a3}.wy-menu-vertical li.toctree-l3{font-size:.9em}.wy-menu-vertical li.toctree-l3.current>a{background:#bdbdbd;padding:.4045em 4.045em}.wy-menu-vertical li.toctree-l3.current li.toctree-l4>a{display:block;background:#bdbdbd;padding:.4045em 5.663em}.wy-menu-vertical li.toctree-l3 a:hover span.toctree-expand{color:gray}.wy-menu-vertical li.toctree-l3 span.toctree-expand{color:#969696}.wy-menu-vertical li.toctree-l4{font-size:.9em}.wy-menu-vertical li.current ul{display:block}.wy-menu-vertical li ul{margin-bottom:0;display:none}.wy-menu-vertical li ul li a{margin-bottom:0;color:#d9d9d9;font-weight:normal}.wy-menu-vertical a{display:inline-block;line-height:18px;padding:.4045em 1.618em;display:block;position:relative;font-size:90%;color:#d9d9d9}.wy-menu-vertical a:hover{background-color:#4e4a4a;cursor:pointer}.wy-menu-vertical a:hover span.toctree-expand{color:#d9d9d9}.wy-menu-vertical a:active{background-color:#d00000;cursor:pointer;color:#fff}.wy-menu-vertical a:active span.toctree-expand{color:#fff}.wy-side-nav-search{display:block;width:300px;padding:.809em;margin-bottom:.809em;z-index:200;background-color:#d00000;text-align:center;padding:.809em;display:block;color:#fcfcfc;margin-bottom:.809em}.wy-side-nav-search input[type=text]{width:100%;border-radius:50px;padding:6px 12px;border-color:#555}.wy-side-nav-search img{display:block;margin:auto auto .809em auto;height:45px;width:45px;background-color:#d00000;padding:5px;border-radius:100%}.wy-side-nav-search>a,.wy-side-nav-search .wy-dropdown>a{color:#fcfcfc;font-size:100%;font-weight:bold;display:inline-block;padding:4px 6px;margin-bottom:.809em}.wy-side-nav-search>a:hover,.wy-side-nav-search .wy-dropdown>a:hover{background:rgba(255,255,255,0.1)}.wy-side-nav-search>a img.logo,.wy-side-nav-search .wy-dropdown>a img.logo{display:block;margin:0 auto;height:auto;width:auto;border-radius:0;max-width:100%;background:transparent}.wy-side-nav-search>a.icon img.logo,.wy-side-nav-search .wy-dropdown>a.icon img.logo{margin-top:.85em}.wy-side-nav-search>div.version{margin-top:-.4045em;margin-bottom:.809em;font-weight:normal;color:rgba(255,255,255,0.3)}.wy-nav .wy-menu-vertical header{color:#d00000}.wy-nav .wy-menu-vertical a{color:#b3b3b3}.wy-nav .wy-menu-vertical a:hover{background-color:#d00000;color:#fff}[data-menu-wrap]{-webkit-transition:all .2s ease-in;-moz-transition:all .2s ease-in;transition:all .2s ease-in;position:absolute;opacity:1;width:100%;opacity:0}[data-menu-wrap].move-center{left:0;right:auto;opacity:1}[data-menu-wrap].move-left{right:auto;left:-100%;opacity:0}[data-menu-wrap].move-right{right:-100%;left:auto;opacity:0}.wy-body-for-nav{background:#fcfcfc}.wy-grid-for-nav{position:absolute;width:100%;height:100%}.wy-nav-side{position:fixed;top:0;bottom:0;left:0;padding-bottom:2em;width:300px;overflow-x:hidden;overflow-y:hidden;min-height:100%;color:#9b9b9b;background:#343131;z-index:200}.wy-side-scroll{width:320px;position:relative;overflow-x:hidden;overflow-y:scroll;height:100%}.wy-nav-top{display:none;background:#d00000;color:#fff;padding:.4045em .809em;position:relative;line-height:50px;text-align:center;font-size:100%;*zoom:1}.wy-nav-top:before,.wy-nav-top:after{display:table;content:""}.wy-nav-top:after{clear:both}.wy-nav-top a{color:#fff;font-weight:bold}.wy-nav-top img{margin-right:12px;height:45px;width:45px;background-color:#d00000;padding:5px;border-radius:100%}.wy-nav-top i{font-size:30px;float:left;cursor:pointer;padding-top:inherit}.wy-nav-content-wrap{margin-left:300px;background:#fcfcfc;min-height:100%}.wy-nav-content{padding:1.618em 3.236em;height:100%;max-width:800px;margin:auto}.wy-body-mask{position:fixed;width:100%;height:100%;background:rgba(0,0,0,0.2);display:none;z-index:499}.wy-body-mask.on{display:block}footer{color:gray}footer p{margin-bottom:12px}footer span.commit code,footer span.commit .rst-content tt,.rst-content footer span.commit tt{padding:0px;font-family:SFMono-Regular,Menlo,Monaco,Consolas,"Liberation Mono","Courier New",Courier,monospace;font-size:1em;background:none;border:none;color:gray}.rst-footer-buttons{*zoom:1}.rst-footer-buttons:before,.rst-footer-buttons:after{width:100%}.rst-footer-buttons:before,.rst-footer-buttons:after{display:table;content:""}.rst-footer-buttons:after{clear:both}.rst-breadcrumbs-buttons{margin-top:12px;*zoom:1}.rst-breadcrumbs-buttons:before,.rst-breadcrumbs-buttons:after{display:table;content:""}.rst-breadcrumbs-buttons:after{clear:both}#search-results .search li{margin-bottom:24px;border-bottom:solid 1px #e1e4e5;padding-bottom:24px}#search-results .search li:first-child{border-top:solid 1px #e1e4e5;padding-top:24px}#search-results .search li a{font-size:120%;margin-bottom:12px;display:inline-block}#search-results .context{color:gray;font-size:90%}@media screen and (max-width: 768px){.wy-body-for-nav{background:#fcfcfc}.wy-nav-top{display:block}.wy-nav-side{left:-300px}.wy-nav-side.shift{width:85%;left:0}.wy-side-scroll{width:auto}.wy-side-nav-search{width:auto}.wy-menu.wy-menu-vertical{width:auto}.wy-nav-content-wrap{margin-left:0}.wy-nav-content-wrap .wy-nav-content{padding:1.618em}.wy-nav-content-wrap.shift{position:fixed;min-width:100%;left:85%;top:0;height:100%;overflow:hidden}}@media screen and (min-width: 1100px){.wy-nav-content-wrap{background:rgba(0,0,0,0.05)}.wy-nav-content{margin:0;background:#fcfcfc}}@media print{.rst-versions,footer,.wy-nav-side{display:none}.wy-nav-content-wrap{margin-left:0}}.rst-versions{position:fixed;bottom:0;left:0;width:300px;color:#fcfcfc;background:#1f1d1d;font-family:"Lato","proxima-nova","Helvetica Neue",Arial,sans-serif;z-index:400}.rst-versions a{color:#d00000;text-decoration:none}.rst-versions .rst-badge-small{display:none}.rst-versions .rst-current-version{padding:12px;background-color:#272525;display:block;text-align:right;font-size:90%;cursor:pointer;color:#27AE60;*zoom:1}.rst-versions .rst-current-version:before,.rst-versions .rst-current-version:after{display:table;content:""}.rst-versions .rst-current-version:after{clear:both}.rst-versions .rst-current-version .fa,.rst-versions .rst-current-version .wy-menu-vertical li span.toctree-expand,.wy-menu-vertical li .rst-versions .rst-current-version span.toctree-expand,.rst-versions .rst-current-version .rst-content .admonition-title,.rst-content .rst-versions .rst-current-version .admonition-title,.rst-versions .rst-current-version .rst-content h1 .headerlink,.rst-content h1 .rst-versions .rst-current-version .headerlink,.rst-versions .rst-current-version .rst-content h2 .headerlink,.rst-content h2 .rst-versions .rst-current-version .headerlink,.rst-versions .rst-current-version .rst-content h3 .headerlink,.rst-content h3 .rst-versions .rst-current-version .headerlink,.rst-versions .rst-current-version .rst-content h4 .headerlink,.rst-content h4 .rst-versions .rst-current-version .headerlink,.rst-versions .rst-current-version .rst-content h5 .headerlink,.rst-content h5 .rst-versions .rst-current-version .headerlink,.rst-versions .rst-current-version .rst-content h6 .headerlink,.rst-content h6 .rst-versions .rst-current-version .headerlink,.rst-versions .rst-current-version .rst-content dl dt .headerlink,.rst-content dl dt .rst-versions .rst-current-version .headerlink,.rst-versions .rst-current-version .rst-content p.caption .headerlink,.rst-content p.caption .rst-versions .rst-current-version .headerlink,.rst-versions .rst-current-version .rst-content table>caption .headerlink,.rst-content table>caption .rst-versions .rst-current-version .headerlink,.rst-versions .rst-current-version .rst-content tt.download span:first-child,.rst-content tt.download .rst-versions .rst-current-version span:first-child,.rst-versions .rst-current-version .rst-content code.download span:first-child,.rst-content code.download .rst-versions .rst-current-version span:first-child,.rst-versions .rst-current-version .icon{color:#fcfcfc}.rst-versions .rst-current-version .fa-book,.rst-versions .rst-current-version .icon-book{float:left}.rst-versions .rst-current-version .icon-book{float:left}.rst-versions .rst-current-version.rst-out-of-date{background-color:#E74C3C;color:#fff}.rst-versions .rst-current-version.rst-active-old-version{background-color:#F1C40F;color:#000}.rst-versions.shift-up{height:auto;max-height:100%}.rst-versions.shift-up .rst-other-versions{display:block}.rst-versions .rst-other-versions{font-size:90%;padding:12px;color:gray;display:none}.rst-versions .rst-other-versions hr{display:block;height:1px;border:0;margin:20px 0;padding:0;border-top:solid 1px #413d3d}.rst-versions .rst-other-versions dd{display:inline-block;margin:0}.rst-versions .rst-other-versions dd a{display:inline-block;padding:6px;color:#fcfcfc}.rst-versions.rst-badge{width:auto;bottom:20px;right:20px;left:auto;border:none;max-width:300px}.rst-versions.rst-badge .icon-book{float:none}.rst-versions.rst-badge .fa-book,.rst-versions.rst-badge .icon-book{float:none}.rst-versions.rst-badge.shift-up .rst-current-version{text-align:right}.rst-versions.rst-badge.shift-up .rst-current-version .fa-book,.rst-versions.rst-badge.shift-up .rst-current-version .icon-book{float:left}.rst-versions.rst-badge.shift-up .rst-current-version .icon-book{float:left}.rst-versions.rst-badge .rst-current-version{width:auto;height:30px;line-height:30px;padding:0 6px;display:block;text-align:center}@media screen and (max-width: 768px){.rst-versions{width:85%;display:none}.rst-versions.shift{display:block}}.rst-content img{max-width:100%;height:auto}.rst-content div.figure{margin-bottom:24px}.rst-content div.figure p.caption{font-style:italic}.rst-content div.figure p:last-child.caption{margin-bottom:0px}.rst-content div.figure.align-center{text-align:center}.rst-content .section>img,.rst-content .section>a>img{margin-bottom:24px}.rst-content abbr[title]{text-decoration:none}.rst-content.style-external-links a.reference.external:after{font-family:FontAwesome;content:"";color:#b3b3b3;vertical-align:super;font-size:60%;margin:0 .2em}.rst-content blockquote{margin-left:24px;line-height:24px;margin-bottom:24px}.rst-content pre.literal-block{white-space:pre;margin:0;padding:12px 12px;font-family:SFMono-Regular,Menlo,Monaco,Consolas,"Liberation Mono","Courier New",Courier,monospace;display:block;overflow:auto}.rst-content pre.literal-block,.rst-content div[class^='highlight']{border:1px solid #e1e4e5;overflow-x:auto;margin:1px 0 24px 0}.rst-content pre.literal-block div[class^='highlight'],.rst-content div[class^='highlight'] div[class^='highlight']{padding:0px;border:none;margin:0}.rst-content div[class^='highlight'] td.code{width:100%}.rst-content .linenodiv pre{border-right:solid 1px #e6e9ea;margin:0;padding:12px 12px;font-family:SFMono-Regular,Menlo,Monaco,Consolas,"Liberation Mono","Courier New",Courier,monospace;user-select:none;pointer-events:none}.rst-content div[class^='highlight'] pre{white-space:pre;margin:0;padding:12px 12px;display:block;overflow:auto}.rst-content div[class^='highlight'] pre .hll{display:block;margin:0 -12px;padding:0 12px}.rst-content pre.literal-block,.rst-content div[class^='highlight'] pre,.rst-content .linenodiv pre{font-family:SFMono-Regular,Menlo,Monaco,Consolas,"Liberation Mono","Courier New",Courier,monospace;font-size:12px;line-height:1.4}@media print{.rst-content .codeblock,.rst-content div[class^='highlight'],.rst-content div[class^='highlight'] pre{white-space:pre-wrap}}.rst-content .note .last,.rst-content .attention .last,.rst-content .caution .last,.rst-content .danger .last,.rst-content .error .last,.rst-content .hint .last,.rst-content .important .last,.rst-content .tip .last,.rst-content .warning .last,.rst-content .seealso .last,.rst-content .admonition-todo .last,.rst-content .admonition .last{margin-bottom:0}.rst-content .admonition-title:before{margin-right:4px}.rst-content .admonition table{border-color:rgba(0,0,0,0.1)}.rst-content .admonition table td,.rst-content .admonition table th{background:transparent !important;border-color:rgba(0,0,0,0.1) !important}.rst-content .section ol.loweralpha,.rst-content .section ol.loweralpha li{list-style:lower-alpha}.rst-content .section ol.upperalpha,.rst-content .section ol.upperalpha li{list-style:upper-alpha}.rst-content .section ol p,.rst-content .section ul p{margin-bottom:12px}.rst-content .section ol p:last-child,.rst-content .section ul p:last-child{margin-bottom:24px}.rst-content .line-block{margin-left:0px;margin-bottom:24px;line-height:24px}.rst-content .line-block .line-block{margin-left:24px;margin-bottom:0px}.rst-content .topic-title{font-weight:bold;margin-bottom:12px}.rst-content .toc-backref{color:#404040}.rst-content .align-right{float:right;margin:0px 0px 24px 24px}.rst-content .align-left{float:left;margin:0px 24px 24px 0px}.rst-content .align-center{margin:auto}.rst-content .align-center:not(table){display:block}.rst-content h1 .headerlink,.rst-content h2 .headerlink,.rst-content .toctree-wrapper p.caption .headerlink,.rst-content h3 .headerlink,.rst-content h4 .headerlink,.rst-content h5 .headerlink,.rst-content h6 .headerlink,.rst-content dl dt .headerlink,.rst-content p.caption .headerlink,.rst-content table>caption .headerlink{visibility:hidden;font-size:14px}.rst-content h1 .headerlink:after,.rst-content h2 .headerlink:after,.rst-content .toctree-wrapper p.caption .headerlink:after,.rst-content h3 .headerlink:after,.rst-content h4 .headerlink:after,.rst-content h5 .headerlink:after,.rst-content h6 .headerlink:after,.rst-content dl dt .headerlink:after,.rst-content p.caption .headerlink:after,.rst-content table>caption .headerlink:after{content:"";font-family:FontAwesome}.rst-content h1:hover .headerlink:after,.rst-content h2:hover .headerlink:after,.rst-content .toctree-wrapper p.caption:hover .headerlink:after,.rst-content h3:hover .headerlink:after,.rst-content h4:hover .headerlink:after,.rst-content h5:hover .headerlink:after,.rst-content h6:hover .headerlink:after,.rst-content dl dt:hover .headerlink:after,.rst-content p.caption:hover .headerlink:after,.rst-content table>caption:hover .headerlink:after{visibility:visible}.rst-content table>caption .headerlink:after{font-size:12px}.rst-content .centered{text-align:center}.rst-content .sidebar{float:right;width:40%;display:block;margin:0 0 24px 24px;padding:24px;background:#f3f6f6;border:solid 1px #e1e4e5}.rst-content .sidebar p,.rst-content .sidebar ul,.rst-content .sidebar dl{font-size:90%}.rst-content .sidebar .last{margin-bottom:0}.rst-content .sidebar .sidebar-title{display:block;font-family:"Roboto Slab","ff-tisa-web-pro","Georgia",Arial,sans-serif;font-weight:bold;background:#e1e4e5;padding:6px 12px;margin:-24px;margin-bottom:24px;font-size:100%}.rst-content .highlighted{background:#F1C40F;display:inline-block;font-weight:bold;padding:0 6px}.rst-content .footnote-reference,.rst-content .citation-reference{vertical-align:baseline;position:relative;top:-0.4em;line-height:0;font-size:90%}.rst-content table.docutils.citation,.rst-content table.docutils.footnote{background:none;border:none;color:gray}.rst-content table.docutils.citation td,.rst-content table.docutils.citation tr,.rst-content table.docutils.footnote td,.rst-content table.docutils.footnote tr{border:none;background-color:transparent !important;white-space:normal}.rst-content table.docutils.citation td.label,.rst-content table.docutils.footnote td.label{padding-left:0;padding-right:0;vertical-align:top}.rst-content table.docutils.citation tt,.rst-content table.docutils.citation code,.rst-content table.docutils.footnote tt,.rst-content table.docutils.footnote code{color:#555}.rst-content .wy-table-responsive.citation,.rst-content .wy-table-responsive.footnote{margin-bottom:0}.rst-content .wy-table-responsive.citation+:not(.citation),.rst-content .wy-table-responsive.footnote+:not(.footnote){margin-top:24px}.rst-content .wy-table-responsive.citation:last-child,.rst-content .wy-table-responsive.footnote:last-child{margin-bottom:24px}.rst-content table.docutils th{border-color:#e1e4e5}.rst-content table.docutils td .last,.rst-content table.docutils td .last :last-child{margin-bottom:0}.rst-content table.field-list{border:none}.rst-content table.field-list td{border:none}.rst-content table.field-list td>strong{display:inline-block}.rst-content table.field-list .field-name{padding-right:10px;text-align:left;white-space:nowrap}.rst-content table.field-list .field-body{text-align:left}.rst-content tt,.rst-content tt,.rst-content code{color:#000;font-family:SFMono-Regular,Menlo,Monaco,Consolas,"Liberation Mono","Courier New",Courier,monospace;padding:2px 5px}.rst-content tt big,.rst-content tt em,.rst-content tt big,.rst-content code big,.rst-content tt em,.rst-content code em{font-size:100% !important;line-height:normal}.rst-content tt.literal,.rst-content tt.literal,.rst-content code.literal{color:#E74C3C}.rst-content tt.xref,a .rst-content tt,.rst-content tt.xref,.rst-content code.xref,a .rst-content tt,a .rst-content code{font-weight:bold;color:#404040}.rst-content pre,.rst-content kbd,.rst-content samp{font-family:SFMono-Regular,Menlo,Monaco,Consolas,"Liberation Mono","Courier New",Courier,monospace}.rst-content a tt,.rst-content a tt,.rst-content a code{color:#d00000}.rst-content dl{margin-bottom:24px}.rst-content dl dt{font-weight:bold;margin-bottom:12px}.rst-content dl p,.rst-content dl table,.rst-content dl ul,.rst-content dl ol{margin-bottom:12px !important}.rst-content dl dd{margin:0 0 12px 24px;line-height:24px}.rst-content dl:not(.docutils){margin-bottom:24px}.rst-content dl:not(.docutils) dt{display:table;margin:6px 0;font-size:90%;line-height:normal;background:#e7f2fa;color:#d00000;border-top:solid 3px #6ab0de;padding:6px;position:relative}.rst-content dl:not(.docutils) dt:before{color:#6ab0de}.rst-content dl:not(.docutils) dt .headerlink{color:#404040;font-size:100% !important}.rst-content dl:not(.docutils) dl dt{margin-bottom:6px;border:none;border-left:solid 3px #ccc;background:#f0f0f0;color:#555}.rst-content dl:not(.docutils) dl dt .headerlink{color:#404040;font-size:100% !important}.rst-content dl:not(.docutils) dt:first-child{margin-top:0}.rst-content dl:not(.docutils) tt,.rst-content dl:not(.docutils) tt,.rst-content dl:not(.docutils) code{font-weight:bold}.rst-content dl:not(.docutils) tt.descname,.rst-content dl:not(.docutils) tt.descclassname,.rst-content dl:not(.docutils) tt.descname,.rst-content dl:not(.docutils) code.descname,.rst-content dl:not(.docutils) tt.descclassname,.rst-content dl:not(.docutils) code.descclassname{background-color:transparent;border:none;padding:0;font-size:100% !important}.rst-content dl:not(.docutils) tt.descname,.rst-content dl:not(.docutils) tt.descname,.rst-content dl:not(.docutils) code.descname{font-weight:bold}.rst-content dl:not(.docutils) .optional{display:inline-block;padding:0 4px;color:#000;font-weight:bold}.rst-content dl:not(.docutils) .property{display:inline-block;padding-right:8px}.rst-content .viewcode-link,.rst-content .viewcode-back{display:inline-block;color:#27AE60;font-size:80%;padding-left:24px}.rst-content .viewcode-back{display:block;float:right}.rst-content p.rubric{margin-bottom:12px;font-weight:bold}.rst-content tt.download,.rst-content code.download{background:inherit;padding:inherit;font-weight:normal;font-family:inherit;font-size:inherit;color:inherit;border:inherit;white-space:inherit}.rst-content tt.download span:first-child,.rst-content code.download span:first-child{-webkit-font-smoothing:subpixel-antialiased}.rst-content tt.download span:first-child:before,.rst-content code.download span:first-child:before{margin-right:4px}.rst-content .guilabel{border:1px solid #7fbbe3;background:#e7f2fa;font-size:80%;font-weight:700;border-radius:4px;padding:2.4px 6px;margin:auto 2px}.rst-content .versionmodified{font-style:italic}@media screen and (max-width: 480px){.rst-content .sidebar{width:100%}}span[id*='MathJax-Span']{color:#404040}.math{text-align:center}@font-face{font-family:"Lato";src:url("../fonts/Lato/lato-regular.eot");src:url("../fonts/Lato/lato-regular.eot?#iefix") format("embedded-opentype"),url("../fonts/Lato/lato-regular.woff2") format("woff2"),url("../fonts/Lato/lato-regular.woff") format("woff"),url("../fonts/Lato/lato-regular.ttf") format("truetype");font-weight:400;font-style:normal}@font-face{font-family:"Lato";src:url("../fonts/Lato/lato-bold.eot");src:url("../fonts/Lato/lato-bold.eot?#iefix") format("embedded-opentype"),url("../fonts/Lato/lato-bold.woff2") format("woff2"),url("../fonts/Lato/lato-bold.woff") format("woff"),url("../fonts/Lato/lato-bold.ttf") format("truetype");font-weight:700;font-style:normal}@font-face{font-family:"Lato";src:url("../fonts/Lato/lato-bolditalic.eot");src:url("../fonts/Lato/lato-bolditalic.eot?#iefix") format("embedded-opentype"),url("../fonts/Lato/lato-bolditalic.woff2") format("woff2"),url("../fonts/Lato/lato-bolditalic.woff") format("woff"),url("../fonts/Lato/lato-bolditalic.ttf") format("truetype");font-weight:700;font-style:italic}@font-face{font-family:"Lato";src:url("../fonts/Lato/lato-italic.eot");src:url("../fonts/Lato/lato-italic.eot?#iefix") format("embedded-opentype"),url("../fonts/Lato/lato-italic.woff2") format("woff2"),url("../fonts/Lato/lato-italic.woff") format("woff"),url("../fonts/Lato/lato-italic.ttf") format("truetype");font-weight:400;font-style:italic}@font-face{font-family:"Roboto Slab";font-style:normal;font-weight:400;src:url("../fonts/RobotoSlab/roboto-slab.eot");src:url("../fonts/RobotoSlab/roboto-slab-v7-regular.eot?#iefix") format("embedded-opentype"),url("../fonts/RobotoSlab/roboto-slab-v7-regular.woff2") format("woff2"),url("../fonts/RobotoSlab/roboto-slab-v7-regular.woff") format("woff"),url("../fonts/RobotoSlab/roboto-slab-v7-regular.ttf") format("truetype")}@font-face{font-family:"Roboto Slab";font-style:normal;font-weight:700;src:url("../fonts/RobotoSlab/roboto-slab-v7-bold.eot");src:url("../fonts/RobotoSlab/roboto-slab-v7-bold.eot?#iefix") format("embedded-opentype"),url("../fonts/RobotoSlab/roboto-slab-v7-bold.woff2") format("woff2"),url("../fonts/RobotoSlab/roboto-slab-v7-bold.woff") format("woff"),url("../fonts/RobotoSlab/roboto-slab-v7-bold.ttf") format("truetype")}
diff --git a/docs/theme/css/theme_extra.css b/docs/theme/css/theme_extra.css
deleted file mode 100644
index 5f424b39bdf..00000000000
--- a/docs/theme/css/theme_extra.css
+++ /dev/null
@@ -1,147 +0,0 @@
-/*
- * Wrap inline code samples otherwise they shoot of the side and
- * can't be read at all.
- *
- * https://github.com/mkdocs/mkdocs/issues/313
- * https://github.com/mkdocs/mkdocs/issues/233
- * https://github.com/mkdocs/mkdocs/issues/834
- */
-.rst-content code {
-    white-space: pre-wrap;
-    word-wrap: break-word;
-    padding: 2px 5px;
-}
-
-/**
- * Make code blocks display as blocks and give them the appropriate
- * font size and padding.
- *
- * https://github.com/mkdocs/mkdocs/issues/855
- * https://github.com/mkdocs/mkdocs/issues/834
- * https://github.com/mkdocs/mkdocs/issues/233
- */
-.rst-content pre code {
-  white-space: pre;
-  word-wrap: normal;
-  display: block;
-  padding: 12px;
-  font-size: 12px;
-}
-
-/*
- * Fix link colors when the link text is inline code.
- *
- * https://github.com/mkdocs/mkdocs/issues/718
- */
-a code {
-    color: #2980B9;
-}
-a:hover code {
-    color: #3091d1;
-}
-a:visited code {
-    color: #9B59B6;
-}
-
-/*
- * The CSS classes from highlight.js seem to clash with the
- * ReadTheDocs theme causing some code to be incorrectly made
- * bold and italic.
- *
- * https://github.com/mkdocs/mkdocs/issues/411
- */
-pre .cs, pre .c {
-    font-weight: inherit;
-    font-style: inherit;
-}
-
-/*
- * Fix some issues with the theme and non-highlighted code
- * samples. Without and highlighting styles attached the
- * formatting is broken.
- *
- * https://github.com/mkdocs/mkdocs/issues/319
- */
-.rst-content .no-highlight {
-  display: block;
-  padding: 0.5em;
-  color: #333;
-}
-
-
-/*
- * Additions specific to the search functionality provided by MkDocs
- */
-
-.search-results {
-    margin-top: 23px;
-}
-
-.search-results article {
-    border-top: 1px solid #E1E4E5;
-    padding-top: 24px;
-}
-
-.search-results article:first-child {
-    border-top: none;
-}
-
-form .search-query {
-    width: 100%;
-    border-radius: 50px;
-    padding: 6px 12px;  /* csslint allow: box-model */
-    border-color: #D1D4D5;
-}
-
-/*
- * Improve inline code blocks within admonitions.
- *
- * https://github.com/mkdocs/mkdocs/issues/656
- */
- .rst-content .admonition code {
-  color: #404040;
-  border: 1px solid #c7c9cb;
-  border: 1px solid rgba(0, 0, 0, 0.2);
-  background: #f8fbfd;
-  background: rgba(255, 255, 255, 0.7);
-}
-
-/*
- * Account for wide tables which go off the side.
- * Override borders to avoid weirdness on narrow tables.
- *
- * https://github.com/mkdocs/mkdocs/issues/834
- * https://github.com/mkdocs/mkdocs/pull/1034
- */
-.rst-content .section .docutils {
-    width: 100%;
-    overflow: auto;
-    display: block;
-    border: none;
-}
-
-td, th {
-   border: 1px solid #e1e4e5 !important; /* csslint allow: important */
-   border-collapse: collapse;
-}
-
-/*Keras extras*/
-
-.keras-logo {
-    max-height: 55px;
-    width: 100%;
-    background: #d00000;
-    font-size: 140%;
-    color: white;
-    font-family: "Source Sans Pro", "ff-tisa-web-pro", "Georgia", Arial, sans-serif;
-}
-
-.keras-logo-img {
-    max-width: 45px;
-    margin: 10px;
-}
-
-h1, h2, h3, h4, h5, h6, legend {
-    font-family: "Source Sans Pro", "ff-tisa-web-pro", "Georgia", Arial, sans-serif;,
-}
-
diff --git a/docs/theme/footer.html b/docs/theme/footer.html
deleted file mode 100644
index b8173892aed..00000000000
--- a/docs/theme/footer.html
+++ /dev/null
@@ -1,26 +0,0 @@
-<footer>
-  {%- block next_prev %}
-  {% if config.theme.prev_next_buttons_location|lower in ['bottom', 'both']
-        and page and (page.next_page or page.previous_page) %}
-    <div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
-      {% if page.next_page %}
-        <a href="{{ page.next_page.url|url }}" class="btn btn-neutral float-right" title="{{ page.next_page.title }}">Next <span class="icon icon-circle-arrow-right"></span></a>
-      {% endif %}
-      {% if page.previous_page %}
-        <a href="{{ page.previous_page.url|url }}" class="btn btn-neutral" title="{{ page.previous_page.title }}"><span class="icon icon-circle-arrow-left"></span> Previous</a>
-      {% endif %}
-    </div>
-  {% endif %}
-  {%- endblock %}
-
-  <hr/>
-
-  <div role="contentinfo">
-    <!-- Copyright etc -->
-    {% if config.copyright %}
-      <p>{{ config.copyright }}</p>
-    {% endif %}
-  </div>
-
-  Built with <a href="https://www.mkdocs.org/">MkDocs</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
-</footer>
diff --git a/docs/theme/js/jquery-2.1.1.min.js b/docs/theme/js/jquery-2.1.1.min.js
deleted file mode 100644
index e5ace116b6f..00000000000
--- a/docs/theme/js/jquery-2.1.1.min.js
+++ /dev/null
@@ -1,4 +0,0 @@
-/*! jQuery v2.1.1 | (c) 2005, 2014 jQuery Foundation, Inc. | jquery.org/license */
-!function(a,b){"object"==typeof module&&"object"==typeof module.exports?module.exports=a.document?b(a,!0):function(a){if(!a.document)throw new Error("jQuery requires a window with a document");return b(a)}:b(a)}("undefined"!=typeof window?window:this,function(a,b){var c=[],d=c.slice,e=c.concat,f=c.push,g=c.indexOf,h={},i=h.toString,j=h.hasOwnProperty,k={},l=a.document,m="2.1.1",n=function(a,b){return new n.fn.init(a,b)},o=/^[\s\uFEFF\xA0]+|[\s\uFEFF\xA0]+$/g,p=/^-ms-/,q=/-([\da-z])/gi,r=function(a,b){return b.toUpperCase()};n.fn=n.prototype={jquery:m,constructor:n,selector:"",length:0,toArray:function(){return d.call(this)},get:function(a){return null!=a?0>a?this[a+this.length]:this[a]:d.call(this)},pushStack:function(a){var b=n.merge(this.constructor(),a);return b.prevObject=this,b.context=this.context,b},each:function(a,b){return n.each(this,a,b)},map:function(a){return this.pushStack(n.map(this,function(b,c){return a.call(b,c,b)}))},slice:function(){return this.pushStack(d.apply(this,arguments))},first:function(){return this.eq(0)},last:function(){return this.eq(-1)},eq:function(a){var b=this.length,c=+a+(0>a?b:0);return this.pushStack(c>=0&&b>c?[this[c]]:[])},end:function(){return this.prevObject||this.constructor(null)},push:f,sort:c.sort,splice:c.splice},n.extend=n.fn.extend=function(){var a,b,c,d,e,f,g=arguments[0]||{},h=1,i=arguments.length,j=!1;for("boolean"==typeof g&&(j=g,g=arguments[h]||{},h++),"object"==typeof g||n.isFunction(g)||(g={}),h===i&&(g=this,h--);i>h;h++)if(null!=(a=arguments[h]))for(b in a)c=g[b],d=a[b],g!==d&&(j&&d&&(n.isPlainObject(d)||(e=n.isArray(d)))?(e?(e=!1,f=c&&n.isArray(c)?c:[]):f=c&&n.isPlainObject(c)?c:{},g[b]=n.extend(j,f,d)):void 0!==d&&(g[b]=d));return g},n.extend({expando:"jQuery"+(m+Math.random()).replace(/\D/g,""),isReady:!0,error:function(a){throw new Error(a)},noop:function(){},isFunction:function(a){return"function"===n.type(a)},isArray:Array.isArray,isWindow:function(a){return null!=a&&a===a.window},isNumeric:function(a){return!n.isArray(a)&&a-parseFloat(a)>=0},isPlainObject:function(a){return"object"!==n.type(a)||a.nodeType||n.isWindow(a)?!1:a.constructor&&!j.call(a.constructor.prototype,"isPrototypeOf")?!1:!0},isEmptyObject:function(a){var b;for(b in a)return!1;return!0},type:function(a){return null==a?a+"":"object"==typeof a||"function"==typeof a?h[i.call(a)]||"object":typeof a},globalEval:function(a){var b,c=eval;a=n.trim(a),a&&(1===a.indexOf("use strict")?(b=l.createElement("script"),b.text=a,l.head.appendChild(b).parentNode.removeChild(b)):c(a))},camelCase:function(a){return a.replace(p,"ms-").replace(q,r)},nodeName:function(a,b){return a.nodeName&&a.nodeName.toLowerCase()===b.toLowerCase()},each:function(a,b,c){var d,e=0,f=a.length,g=s(a);if(c){if(g){for(;f>e;e++)if(d=b.apply(a[e],c),d===!1)break}else for(e in a)if(d=b.apply(a[e],c),d===!1)break}else if(g){for(;f>e;e++)if(d=b.call(a[e],e,a[e]),d===!1)break}else for(e in a)if(d=b.call(a[e],e,a[e]),d===!1)break;return a},trim:function(a){return null==a?"":(a+"").replace(o,"")},makeArray:function(a,b){var c=b||[];return null!=a&&(s(Object(a))?n.merge(c,"string"==typeof a?[a]:a):f.call(c,a)),c},inArray:function(a,b,c){return null==b?-1:g.call(b,a,c)},merge:function(a,b){for(var c=+b.length,d=0,e=a.length;c>d;d++)a[e++]=b[d];return a.length=e,a},grep:function(a,b,c){for(var d,e=[],f=0,g=a.length,h=!c;g>f;f++)d=!b(a[f],f),d!==h&&e.push(a[f]);return e},map:function(a,b,c){var d,f=0,g=a.length,h=s(a),i=[];if(h)for(;g>f;f++)d=b(a[f],f,c),null!=d&&i.push(d);else for(f in a)d=b(a[f],f,c),null!=d&&i.push(d);return e.apply([],i)},guid:1,proxy:function(a,b){var c,e,f;return"string"==typeof b&&(c=a[b],b=a,a=c),n.isFunction(a)?(e=d.call(arguments,2),f=function(){return a.apply(b||this,e.concat(d.call(arguments)))},f.guid=a.guid=a.guid||n.guid++,f):void 0},now:Date.now,support:k}),n.each("Boolean Number String Function Array Date RegExp Object Error".split(" "),function(a,b){h["[object "+b+"]"]=b.toLowerCase()});function s(a){var b=a.length,c=n.type(a);return"function"===c||n.isWindow(a)?!1:1===a.nodeType&&b?!0:"array"===c||0===b||"number"==typeof b&&b>0&&b-1 in a}var t=function(a){var b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u="sizzle"+-new Date,v=a.document,w=0,x=0,y=gb(),z=gb(),A=gb(),B=function(a,b){return a===b&&(l=!0),0},C="undefined",D=1<<31,E={}.hasOwnProperty,F=[],G=F.pop,H=F.push,I=F.push,J=F.slice,K=F.indexOf||function(a){for(var b=0,c=this.length;c>b;b++)if(this[b]===a)return b;return-1},L="checked|selected|async|autofocus|autoplay|controls|defer|disabled|hidden|ismap|loop|multiple|open|readonly|required|scoped",M="[\\x20\\t\\r\\n\\f]",N="(?:\\\\.|[\\w-]|[^\\x00-\\xa0])+",O=N.replace("w","w#"),P="\\["+M+"*("+N+")(?:"+M+"*([*^$|!~]?=)"+M+"*(?:'((?:\\\\.|[^\\\\'])*)'|\"((?:\\\\.|[^\\\\\"])*)\"|("+O+"))|)"+M+"*\\]",Q=":("+N+")(?:\\((('((?:\\\\.|[^\\\\'])*)'|\"((?:\\\\.|[^\\\\\"])*)\")|((?:\\\\.|[^\\\\()[\\]]|"+P+")*)|.*)\\)|)",R=new RegExp("^"+M+"+|((?:^|[^\\\\])(?:\\\\.)*)"+M+"+$","g"),S=new RegExp("^"+M+"*,"+M+"*"),T=new RegExp("^"+M+"*([>+~]|"+M+")"+M+"*"),U=new RegExp("="+M+"*([^\\]'\"]*?)"+M+"*\\]","g"),V=new RegExp(Q),W=new RegExp("^"+O+"$"),X={ID:new RegExp("^#("+N+")"),CLASS:new RegExp("^\\.("+N+")"),TAG:new RegExp("^("+N.replace("w","w*")+")"),ATTR:new RegExp("^"+P),PSEUDO:new RegExp("^"+Q),CHILD:new RegExp("^:(only|first|last|nth|nth-last)-(child|of-type)(?:\\("+M+"*(even|odd|(([+-]|)(\\d*)n|)"+M+"*(?:([+-]|)"+M+"*(\\d+)|))"+M+"*\\)|)","i"),bool:new RegExp("^(?:"+L+")$","i"),needsContext:new RegExp("^"+M+"*[>+~]|:(even|odd|eq|gt|lt|nth|first|last)(?:\\("+M+"*((?:-\\d)?\\d*)"+M+"*\\)|)(?=[^-]|$)","i")},Y=/^(?:input|select|textarea|button)$/i,Z=/^h\d$/i,$=/^[^{]+\{\s*\[native \w/,_=/^(?:#([\w-]+)|(\w+)|\.([\w-]+))$/,ab=/[+~]/,bb=/'|\\/g,cb=new RegExp("\\\\([\\da-f]{1,6}"+M+"?|("+M+")|.)","ig"),db=function(a,b,c){var d="0x"+b-65536;return d!==d||c?b:0>d?String.fromCharCode(d+65536):String.fromCharCode(d>>10|55296,1023&d|56320)};try{I.apply(F=J.call(v.childNodes),v.childNodes),F[v.childNodes.length].nodeType}catch(eb){I={apply:F.length?function(a,b){H.apply(a,J.call(b))}:function(a,b){var c=a.length,d=0;while(a[c++]=b[d++]);a.length=c-1}}}function fb(a,b,d,e){var f,h,j,k,l,o,r,s,w,x;if((b?b.ownerDocument||b:v)!==n&&m(b),b=b||n,d=d||[],!a||"string"!=typeof a)return d;if(1!==(k=b.nodeType)&&9!==k)return[];if(p&&!e){if(f=_.exec(a))if(j=f[1]){if(9===k){if(h=b.getElementById(j),!h||!h.parentNode)return d;if(h.id===j)return d.push(h),d}else if(b.ownerDocument&&(h=b.ownerDocument.getElementById(j))&&t(b,h)&&h.id===j)return d.push(h),d}else{if(f[2])return I.apply(d,b.getElementsByTagName(a)),d;if((j=f[3])&&c.getElementsByClassName&&b.getElementsByClassName)return I.apply(d,b.getElementsByClassName(j)),d}if(c.qsa&&(!q||!q.test(a))){if(s=r=u,w=b,x=9===k&&a,1===k&&"object"!==b.nodeName.toLowerCase()){o=g(a),(r=b.getAttribute("id"))?s=r.replace(bb,"\\$&"):b.setAttribute("id",s),s="[id='"+s+"'] ",l=o.length;while(l--)o[l]=s+qb(o[l]);w=ab.test(a)&&ob(b.parentNode)||b,x=o.join(",")}if(x)try{return I.apply(d,w.querySelectorAll(x)),d}catch(y){}finally{r||b.removeAttribute("id")}}}return i(a.replace(R,"$1"),b,d,e)}function gb(){var a=[];function b(c,e){return a.push(c+" ")>d.cacheLength&&delete b[a.shift()],b[c+" "]=e}return b}function hb(a){return a[u]=!0,a}function ib(a){var b=n.createElement("div");try{return!!a(b)}catch(c){return!1}finally{b.parentNode&&b.parentNode.removeChild(b),b=null}}function jb(a,b){var c=a.split("|"),e=a.length;while(e--)d.attrHandle[c[e]]=b}function kb(a,b){var c=b&&a,d=c&&1===a.nodeType&&1===b.nodeType&&(~b.sourceIndex||D)-(~a.sourceIndex||D);if(d)return d;if(c)while(c=c.nextSibling)if(c===b)return-1;return a?1:-1}function lb(a){return function(b){var c=b.nodeName.toLowerCase();return"input"===c&&b.type===a}}function mb(a){return function(b){var c=b.nodeName.toLowerCase();return("input"===c||"button"===c)&&b.type===a}}function nb(a){return hb(function(b){return b=+b,hb(function(c,d){var e,f=a([],c.length,b),g=f.length;while(g--)c[e=f[g]]&&(c[e]=!(d[e]=c[e]))})})}function ob(a){return a&&typeof a.getElementsByTagName!==C&&a}c=fb.support={},f=fb.isXML=function(a){var b=a&&(a.ownerDocument||a).documentElement;return b?"HTML"!==b.nodeName:!1},m=fb.setDocument=function(a){var b,e=a?a.ownerDocument||a:v,g=e.defaultView;return e!==n&&9===e.nodeType&&e.documentElement?(n=e,o=e.documentElement,p=!f(e),g&&g!==g.top&&(g.addEventListener?g.addEventListener("unload",function(){m()},!1):g.attachEvent&&g.attachEvent("onunload",function(){m()})),c.attributes=ib(function(a){return a.className="i",!a.getAttribute("className")}),c.getElementsByTagName=ib(function(a){return a.appendChild(e.createComment("")),!a.getElementsByTagName("*").length}),c.getElementsByClassName=$.test(e.getElementsByClassName)&&ib(function(a){return a.innerHTML="<div class='a'></div><div class='a i'></div>",a.firstChild.className="i",2===a.getElementsByClassName("i").length}),c.getById=ib(function(a){return o.appendChild(a).id=u,!e.getElementsByName||!e.getElementsByName(u).length}),c.getById?(d.find.ID=function(a,b){if(typeof b.getElementById!==C&&p){var c=b.getElementById(a);return c&&c.parentNode?[c]:[]}},d.filter.ID=function(a){var b=a.replace(cb,db);return function(a){return a.getAttribute("id")===b}}):(delete d.find.ID,d.filter.ID=function(a){var b=a.replace(cb,db);return function(a){var c=typeof a.getAttributeNode!==C&&a.getAttributeNode("id");return c&&c.value===b}}),d.find.TAG=c.getElementsByTagName?function(a,b){return typeof b.getElementsByTagName!==C?b.getElementsByTagName(a):void 0}:function(a,b){var c,d=[],e=0,f=b.getElementsByTagName(a);if("*"===a){while(c=f[e++])1===c.nodeType&&d.push(c);return d}return f},d.find.CLASS=c.getElementsByClassName&&function(a,b){return typeof b.getElementsByClassName!==C&&p?b.getElementsByClassName(a):void 0},r=[],q=[],(c.qsa=$.test(e.querySelectorAll))&&(ib(function(a){a.innerHTML="<select msallowclip=''><option selected=''></option></select>",a.querySelectorAll("[msallowclip^='']").length&&q.push("[*^$]="+M+"*(?:''|\"\")"),a.querySelectorAll("[selected]").length||q.push("\\["+M+"*(?:value|"+L+")"),a.querySelectorAll(":checked").length||q.push(":checked")}),ib(function(a){var b=e.createElement("input");b.setAttribute("type","hidden"),a.appendChild(b).setAttribute("name","D"),a.querySelectorAll("[name=d]").length&&q.push("name"+M+"*[*^$|!~]?="),a.querySelectorAll(":enabled").length||q.push(":enabled",":disabled"),a.querySelectorAll("*,:x"),q.push(",.*:")})),(c.matchesSelector=$.test(s=o.matches||o.webkitMatchesSelector||o.mozMatchesSelector||o.oMatchesSelector||o.msMatchesSelector))&&ib(function(a){c.disconnectedMatch=s.call(a,"div"),s.call(a,"[s!='']:x"),r.push("!=",Q)}),q=q.length&&new RegExp(q.join("|")),r=r.length&&new RegExp(r.join("|")),b=$.test(o.compareDocumentPosition),t=b||$.test(o.contains)?function(a,b){var c=9===a.nodeType?a.documentElement:a,d=b&&b.parentNode;return a===d||!(!d||1!==d.nodeType||!(c.contains?c.contains(d):a.compareDocumentPosition&&16&a.compareDocumentPosition(d)))}:function(a,b){if(b)while(b=b.parentNode)if(b===a)return!0;return!1},B=b?function(a,b){if(a===b)return l=!0,0;var d=!a.compareDocumentPosition-!b.compareDocumentPosition;return d?d:(d=(a.ownerDocument||a)===(b.ownerDocument||b)?a.compareDocumentPosition(b):1,1&d||!c.sortDetached&&b.compareDocumentPosition(a)===d?a===e||a.ownerDocument===v&&t(v,a)?-1:b===e||b.ownerDocument===v&&t(v,b)?1:k?K.call(k,a)-K.call(k,b):0:4&d?-1:1)}:function(a,b){if(a===b)return l=!0,0;var c,d=0,f=a.parentNode,g=b.parentNode,h=[a],i=[b];if(!f||!g)return a===e?-1:b===e?1:f?-1:g?1:k?K.call(k,a)-K.call(k,b):0;if(f===g)return kb(a,b);c=a;while(c=c.parentNode)h.unshift(c);c=b;while(c=c.parentNode)i.unshift(c);while(h[d]===i[d])d++;return d?kb(h[d],i[d]):h[d]===v?-1:i[d]===v?1:0},e):n},fb.matches=function(a,b){return fb(a,null,null,b)},fb.matchesSelector=function(a,b){if((a.ownerDocument||a)!==n&&m(a),b=b.replace(U,"='$1']"),!(!c.matchesSelector||!p||r&&r.test(b)||q&&q.test(b)))try{var d=s.call(a,b);if(d||c.disconnectedMatch||a.document&&11!==a.document.nodeType)return d}catch(e){}return fb(b,n,null,[a]).length>0},fb.contains=function(a,b){return(a.ownerDocument||a)!==n&&m(a),t(a,b)},fb.attr=function(a,b){(a.ownerDocument||a)!==n&&m(a);var e=d.attrHandle[b.toLowerCase()],f=e&&E.call(d.attrHandle,b.toLowerCase())?e(a,b,!p):void 0;return void 0!==f?f:c.attributes||!p?a.getAttribute(b):(f=a.getAttributeNode(b))&&f.specified?f.value:null},fb.error=function(a){throw new Error("Syntax error, unrecognized expression: "+a)},fb.uniqueSort=function(a){var b,d=[],e=0,f=0;if(l=!c.detectDuplicates,k=!c.sortStable&&a.slice(0),a.sort(B),l){while(b=a[f++])b===a[f]&&(e=d.push(f));while(e--)a.splice(d[e],1)}return k=null,a},e=fb.getText=function(a){var b,c="",d=0,f=a.nodeType;if(f){if(1===f||9===f||11===f){if("string"==typeof a.textContent)return a.textContent;for(a=a.firstChild;a;a=a.nextSibling)c+=e(a)}else if(3===f||4===f)return a.nodeValue}else while(b=a[d++])c+=e(b);return c},d=fb.selectors={cacheLength:50,createPseudo:hb,match:X,attrHandle:{},find:{},relative:{">":{dir:"parentNode",first:!0}," ":{dir:"parentNode"},"+":{dir:"previousSibling",first:!0},"~":{dir:"previousSibling"}},preFilter:{ATTR:function(a){return a[1]=a[1].replace(cb,db),a[3]=(a[3]||a[4]||a[5]||"").replace(cb,db),"~="===a[2]&&(a[3]=" "+a[3]+" "),a.slice(0,4)},CHILD:function(a){return a[1]=a[1].toLowerCase(),"nth"===a[1].slice(0,3)?(a[3]||fb.error(a[0]),a[4]=+(a[4]?a[5]+(a[6]||1):2*("even"===a[3]||"odd"===a[3])),a[5]=+(a[7]+a[8]||"odd"===a[3])):a[3]&&fb.error(a[0]),a},PSEUDO:function(a){var b,c=!a[6]&&a[2];return X.CHILD.test(a[0])?null:(a[3]?a[2]=a[4]||a[5]||"":c&&V.test(c)&&(b=g(c,!0))&&(b=c.indexOf(")",c.length-b)-c.length)&&(a[0]=a[0].slice(0,b),a[2]=c.slice(0,b)),a.slice(0,3))}},filter:{TAG:function(a){var b=a.replace(cb,db).toLowerCase();return"*"===a?function(){return!0}:function(a){return a.nodeName&&a.nodeName.toLowerCase()===b}},CLASS:function(a){var b=y[a+" "];return b||(b=new RegExp("(^|"+M+")"+a+"("+M+"|$)"))&&y(a,function(a){return b.test("string"==typeof a.className&&a.className||typeof a.getAttribute!==C&&a.getAttribute("class")||"")})},ATTR:function(a,b,c){return function(d){var e=fb.attr(d,a);return null==e?"!="===b:b?(e+="","="===b?e===c:"!="===b?e!==c:"^="===b?c&&0===e.indexOf(c):"*="===b?c&&e.indexOf(c)>-1:"$="===b?c&&e.slice(-c.length)===c:"~="===b?(" "+e+" ").indexOf(c)>-1:"|="===b?e===c||e.slice(0,c.length+1)===c+"-":!1):!0}},CHILD:function(a,b,c,d,e){var f="nth"!==a.slice(0,3),g="last"!==a.slice(-4),h="of-type"===b;return 1===d&&0===e?function(a){return!!a.parentNode}:function(b,c,i){var j,k,l,m,n,o,p=f!==g?"nextSibling":"previousSibling",q=b.parentNode,r=h&&b.nodeName.toLowerCase(),s=!i&&!h;if(q){if(f){while(p){l=b;while(l=l[p])if(h?l.nodeName.toLowerCase()===r:1===l.nodeType)return!1;o=p="only"===a&&!o&&"nextSibling"}return!0}if(o=[g?q.firstChild:q.lastChild],g&&s){k=q[u]||(q[u]={}),j=k[a]||[],n=j[0]===w&&j[1],m=j[0]===w&&j[2],l=n&&q.childNodes[n];while(l=++n&&l&&l[p]||(m=n=0)||o.pop())if(1===l.nodeType&&++m&&l===b){k[a]=[w,n,m];break}}else if(s&&(j=(b[u]||(b[u]={}))[a])&&j[0]===w)m=j[1];else while(l=++n&&l&&l[p]||(m=n=0)||o.pop())if((h?l.nodeName.toLowerCase()===r:1===l.nodeType)&&++m&&(s&&((l[u]||(l[u]={}))[a]=[w,m]),l===b))break;return m-=e,m===d||m%d===0&&m/d>=0}}},PSEUDO:function(a,b){var c,e=d.pseudos[a]||d.setFilters[a.toLowerCase()]||fb.error("unsupported pseudo: "+a);return e[u]?e(b):e.length>1?(c=[a,a,"",b],d.setFilters.hasOwnProperty(a.toLowerCase())?hb(function(a,c){var d,f=e(a,b),g=f.length;while(g--)d=K.call(a,f[g]),a[d]=!(c[d]=f[g])}):function(a){return e(a,0,c)}):e}},pseudos:{not:hb(function(a){var b=[],c=[],d=h(a.replace(R,"$1"));return d[u]?hb(function(a,b,c,e){var f,g=d(a,null,e,[]),h=a.length;while(h--)(f=g[h])&&(a[h]=!(b[h]=f))}):function(a,e,f){return b[0]=a,d(b,null,f,c),!c.pop()}}),has:hb(function(a){return function(b){return fb(a,b).length>0}}),contains:hb(function(a){return function(b){return(b.textContent||b.innerText||e(b)).indexOf(a)>-1}}),lang:hb(function(a){return W.test(a||"")||fb.error("unsupported lang: "+a),a=a.replace(cb,db).toLowerCase(),function(b){var c;do if(c=p?b.lang:b.getAttribute("xml:lang")||b.getAttribute("lang"))return c=c.toLowerCase(),c===a||0===c.indexOf(a+"-");while((b=b.parentNode)&&1===b.nodeType);return!1}}),target:function(b){var c=a.location&&a.location.hash;return c&&c.slice(1)===b.id},root:function(a){return a===o},focus:function(a){return a===n.activeElement&&(!n.hasFocus||n.hasFocus())&&!!(a.type||a.href||~a.tabIndex)},enabled:function(a){return a.disabled===!1},disabled:function(a){return a.disabled===!0},checked:function(a){var b=a.nodeName.toLowerCase();return"input"===b&&!!a.checked||"option"===b&&!!a.selected},selected:function(a){return a.parentNode&&a.parentNode.selectedIndex,a.selected===!0},empty:function(a){for(a=a.firstChild;a;a=a.nextSibling)if(a.nodeType<6)return!1;return!0},parent:function(a){return!d.pseudos.empty(a)},header:function(a){return Z.test(a.nodeName)},input:function(a){return Y.test(a.nodeName)},button:function(a){var b=a.nodeName.toLowerCase();return"input"===b&&"button"===a.type||"button"===b},text:function(a){var b;return"input"===a.nodeName.toLowerCase()&&"text"===a.type&&(null==(b=a.getAttribute("type"))||"text"===b.toLowerCase())},first:nb(function(){return[0]}),last:nb(function(a,b){return[b-1]}),eq:nb(function(a,b,c){return[0>c?c+b:c]}),even:nb(function(a,b){for(var c=0;b>c;c+=2)a.push(c);return a}),odd:nb(function(a,b){for(var c=1;b>c;c+=2)a.push(c);return a}),lt:nb(function(a,b,c){for(var d=0>c?c+b:c;--d>=0;)a.push(d);return a}),gt:nb(function(a,b,c){for(var d=0>c?c+b:c;++d<b;)a.push(d);return a})}},d.pseudos.nth=d.pseudos.eq;for(b in{radio:!0,checkbox:!0,file:!0,password:!0,image:!0})d.pseudos[b]=lb(b);for(b in{submit:!0,reset:!0})d.pseudos[b]=mb(b);function pb(){}pb.prototype=d.filters=d.pseudos,d.setFilters=new pb,g=fb.tokenize=function(a,b){var c,e,f,g,h,i,j,k=z[a+" "];if(k)return b?0:k.slice(0);h=a,i=[],j=d.preFilter;while(h){(!c||(e=S.exec(h)))&&(e&&(h=h.slice(e[0].length)||h),i.push(f=[])),c=!1,(e=T.exec(h))&&(c=e.shift(),f.push({value:c,type:e[0].replace(R," ")}),h=h.slice(c.length));for(g in d.filter)!(e=X[g].exec(h))||j[g]&&!(e=j[g](e))||(c=e.shift(),f.push({value:c,type:g,matches:e}),h=h.slice(c.length));if(!c)break}return b?h.length:h?fb.error(a):z(a,i).slice(0)};function qb(a){for(var b=0,c=a.length,d="";c>b;b++)d+=a[b].value;return d}function rb(a,b,c){var d=b.dir,e=c&&"parentNode"===d,f=x++;return b.first?function(b,c,f){while(b=b[d])if(1===b.nodeType||e)return a(b,c,f)}:function(b,c,g){var h,i,j=[w,f];if(g){while(b=b[d])if((1===b.nodeType||e)&&a(b,c,g))return!0}else while(b=b[d])if(1===b.nodeType||e){if(i=b[u]||(b[u]={}),(h=i[d])&&h[0]===w&&h[1]===f)return j[2]=h[2];if(i[d]=j,j[2]=a(b,c,g))return!0}}}function sb(a){return a.length>1?function(b,c,d){var e=a.length;while(e--)if(!a[e](b,c,d))return!1;return!0}:a[0]}function tb(a,b,c){for(var d=0,e=b.length;e>d;d++)fb(a,b[d],c);return c}function ub(a,b,c,d,e){for(var f,g=[],h=0,i=a.length,j=null!=b;i>h;h++)(f=a[h])&&(!c||c(f,d,e))&&(g.push(f),j&&b.push(h));return g}function vb(a,b,c,d,e,f){return d&&!d[u]&&(d=vb(d)),e&&!e[u]&&(e=vb(e,f)),hb(function(f,g,h,i){var j,k,l,m=[],n=[],o=g.length,p=f||tb(b||"*",h.nodeType?[h]:h,[]),q=!a||!f&&b?p:ub(p,m,a,h,i),r=c?e||(f?a:o||d)?[]:g:q;if(c&&c(q,r,h,i),d){j=ub(r,n),d(j,[],h,i),k=j.length;while(k--)(l=j[k])&&(r[n[k]]=!(q[n[k]]=l))}if(f){if(e||a){if(e){j=[],k=r.length;while(k--)(l=r[k])&&j.push(q[k]=l);e(null,r=[],j,i)}k=r.length;while(k--)(l=r[k])&&(j=e?K.call(f,l):m[k])>-1&&(f[j]=!(g[j]=l))}}else r=ub(r===g?r.splice(o,r.length):r),e?e(null,g,r,i):I.apply(g,r)})}function wb(a){for(var b,c,e,f=a.length,g=d.relative[a[0].type],h=g||d.relative[" "],i=g?1:0,k=rb(function(a){return a===b},h,!0),l=rb(function(a){return K.call(b,a)>-1},h,!0),m=[function(a,c,d){return!g&&(d||c!==j)||((b=c).nodeType?k(a,c,d):l(a,c,d))}];f>i;i++)if(c=d.relative[a[i].type])m=[rb(sb(m),c)];else{if(c=d.filter[a[i].type].apply(null,a[i].matches),c[u]){for(e=++i;f>e;e++)if(d.relative[a[e].type])break;return vb(i>1&&sb(m),i>1&&qb(a.slice(0,i-1).concat({value:" "===a[i-2].type?"*":""})).replace(R,"$1"),c,e>i&&wb(a.slice(i,e)),f>e&&wb(a=a.slice(e)),f>e&&qb(a))}m.push(c)}return sb(m)}function xb(a,b){var c=b.length>0,e=a.length>0,f=function(f,g,h,i,k){var l,m,o,p=0,q="0",r=f&&[],s=[],t=j,u=f||e&&d.find.TAG("*",k),v=w+=null==t?1:Math.random()||.1,x=u.length;for(k&&(j=g!==n&&g);q!==x&&null!=(l=u[q]);q++){if(e&&l){m=0;while(o=a[m++])if(o(l,g,h)){i.push(l);break}k&&(w=v)}c&&((l=!o&&l)&&p--,f&&r.push(l))}if(p+=q,c&&q!==p){m=0;while(o=b[m++])o(r,s,g,h);if(f){if(p>0)while(q--)r[q]||s[q]||(s[q]=G.call(i));s=ub(s)}I.apply(i,s),k&&!f&&s.length>0&&p+b.length>1&&fb.uniqueSort(i)}return k&&(w=v,j=t),r};return c?hb(f):f}return h=fb.compile=function(a,b){var c,d=[],e=[],f=A[a+" "];if(!f){b||(b=g(a)),c=b.length;while(c--)f=wb(b[c]),f[u]?d.push(f):e.push(f);f=A(a,xb(e,d)),f.selector=a}return f},i=fb.select=function(a,b,e,f){var i,j,k,l,m,n="function"==typeof a&&a,o=!f&&g(a=n.selector||a);if(e=e||[],1===o.length){if(j=o[0]=o[0].slice(0),j.length>2&&"ID"===(k=j[0]).type&&c.getById&&9===b.nodeType&&p&&d.relative[j[1].type]){if(b=(d.find.ID(k.matches[0].replace(cb,db),b)||[])[0],!b)return e;n&&(b=b.parentNode),a=a.slice(j.shift().value.length)}i=X.needsContext.test(a)?0:j.length;while(i--){if(k=j[i],d.relative[l=k.type])break;if((m=d.find[l])&&(f=m(k.matches[0].replace(cb,db),ab.test(j[0].type)&&ob(b.parentNode)||b))){if(j.splice(i,1),a=f.length&&qb(j),!a)return I.apply(e,f),e;break}}}return(n||h(a,o))(f,b,!p,e,ab.test(a)&&ob(b.parentNode)||b),e},c.sortStable=u.split("").sort(B).join("")===u,c.detectDuplicates=!!l,m(),c.sortDetached=ib(function(a){return 1&a.compareDocumentPosition(n.createElement("div"))}),ib(function(a){return a.innerHTML="<a href='#'></a>","#"===a.firstChild.getAttribute("href")})||jb("type|href|height|width",function(a,b,c){return c?void 0:a.getAttribute(b,"type"===b.toLowerCase()?1:2)}),c.attributes&&ib(function(a){return a.innerHTML="<input/>",a.firstChild.setAttribute("value",""),""===a.firstChild.getAttribute("value")})||jb("value",function(a,b,c){return c||"input"!==a.nodeName.toLowerCase()?void 0:a.defaultValue}),ib(function(a){return null==a.getAttribute("disabled")})||jb(L,function(a,b,c){var d;return c?void 0:a[b]===!0?b.toLowerCase():(d=a.getAttributeNode(b))&&d.specified?d.value:null}),fb}(a);n.find=t,n.expr=t.selectors,n.expr[":"]=n.expr.pseudos,n.unique=t.uniqueSort,n.text=t.getText,n.isXMLDoc=t.isXML,n.contains=t.contains;var u=n.expr.match.needsContext,v=/^<(\w+)\s*\/?>(?:<\/\1>|)$/,w=/^.[^:#\[\.,]*$/;function x(a,b,c){if(n.isFunction(b))return n.grep(a,function(a,d){return!!b.call(a,d,a)!==c});if(b.nodeType)return n.grep(a,function(a){return a===b!==c});if("string"==typeof b){if(w.test(b))return n.filter(b,a,c);b=n.filter(b,a)}return n.grep(a,function(a){return g.call(b,a)>=0!==c})}n.filter=function(a,b,c){var d=b[0];return c&&(a=":not("+a+")"),1===b.length&&1===d.nodeType?n.find.matchesSelector(d,a)?[d]:[]:n.find.matches(a,n.grep(b,function(a){return 1===a.nodeType}))},n.fn.extend({find:function(a){var b,c=this.length,d=[],e=this;if("string"!=typeof a)return this.pushStack(n(a).filter(function(){for(b=0;c>b;b++)if(n.contains(e[b],this))return!0}));for(b=0;c>b;b++)n.find(a,e[b],d);return d=this.pushStack(c>1?n.unique(d):d),d.selector=this.selector?this.selector+" "+a:a,d},filter:function(a){return this.pushStack(x(this,a||[],!1))},not:function(a){return this.pushStack(x(this,a||[],!0))},is:function(a){return!!x(this,"string"==typeof a&&u.test(a)?n(a):a||[],!1).length}});var y,z=/^(?:\s*(<[\w\W]+>)[^>]*|#([\w-]*))$/,A=n.fn.init=function(a,b){var c,d;if(!a)return this;if("string"==typeof a){if(c="<"===a[0]&&">"===a[a.length-1]&&a.length>=3?[null,a,null]:z.exec(a),!c||!c[1]&&b)return!b||b.jquery?(b||y).find(a):this.constructor(b).find(a);if(c[1]){if(b=b instanceof n?b[0]:b,n.merge(this,n.parseHTML(c[1],b&&b.nodeType?b.ownerDocument||b:l,!0)),v.test(c[1])&&n.isPlainObject(b))for(c in b)n.isFunction(this[c])?this[c](b[c]):this.attr(c,b[c]);return this}return d=l.getElementById(c[2]),d&&d.parentNode&&(this.length=1,this[0]=d),this.context=l,this.selector=a,this}return a.nodeType?(this.context=this[0]=a,this.length=1,this):n.isFunction(a)?"undefined"!=typeof y.ready?y.ready(a):a(n):(void 0!==a.selector&&(this.selector=a.selector,this.context=a.context),n.makeArray(a,this))};A.prototype=n.fn,y=n(l);var B=/^(?:parents|prev(?:Until|All))/,C={children:!0,contents:!0,next:!0,prev:!0};n.extend({dir:function(a,b,c){var d=[],e=void 0!==c;while((a=a[b])&&9!==a.nodeType)if(1===a.nodeType){if(e&&n(a).is(c))break;d.push(a)}return d},sibling:function(a,b){for(var c=[];a;a=a.nextSibling)1===a.nodeType&&a!==b&&c.push(a);return c}}),n.fn.extend({has:function(a){var b=n(a,this),c=b.length;return this.filter(function(){for(var a=0;c>a;a++)if(n.contains(this,b[a]))return!0})},closest:function(a,b){for(var c,d=0,e=this.length,f=[],g=u.test(a)||"string"!=typeof a?n(a,b||this.context):0;e>d;d++)for(c=this[d];c&&c!==b;c=c.parentNode)if(c.nodeType<11&&(g?g.index(c)>-1:1===c.nodeType&&n.find.matchesSelector(c,a))){f.push(c);break}return this.pushStack(f.length>1?n.unique(f):f)},index:function(a){return a?"string"==typeof a?g.call(n(a),this[0]):g.call(this,a.jquery?a[0]:a):this[0]&&this[0].parentNode?this.first().prevAll().length:-1},add:function(a,b){return this.pushStack(n.unique(n.merge(this.get(),n(a,b))))},addBack:function(a){return this.add(null==a?this.prevObject:this.prevObject.filter(a))}});function D(a,b){while((a=a[b])&&1!==a.nodeType);return a}n.each({parent:function(a){var b=a.parentNode;return b&&11!==b.nodeType?b:null},parents:function(a){return n.dir(a,"parentNode")},parentsUntil:function(a,b,c){return n.dir(a,"parentNode",c)},next:function(a){return D(a,"nextSibling")},prev:function(a){return D(a,"previousSibling")},nextAll:function(a){return n.dir(a,"nextSibling")},prevAll:function(a){return n.dir(a,"previousSibling")},nextUntil:function(a,b,c){return n.dir(a,"nextSibling",c)},prevUntil:function(a,b,c){return n.dir(a,"previousSibling",c)},siblings:function(a){return n.sibling((a.parentNode||{}).firstChild,a)},children:function(a){return n.sibling(a.firstChild)},contents:function(a){return a.contentDocument||n.merge([],a.childNodes)}},function(a,b){n.fn[a]=function(c,d){var e=n.map(this,b,c);return"Until"!==a.slice(-5)&&(d=c),d&&"string"==typeof d&&(e=n.filter(d,e)),this.length>1&&(C[a]||n.unique(e),B.test(a)&&e.reverse()),this.pushStack(e)}});var E=/\S+/g,F={};function G(a){var b=F[a]={};return n.each(a.match(E)||[],function(a,c){b[c]=!0}),b}n.Callbacks=function(a){a="string"==typeof a?F[a]||G(a):n.extend({},a);var b,c,d,e,f,g,h=[],i=!a.once&&[],j=function(l){for(b=a.memory&&l,c=!0,g=e||0,e=0,f=h.length,d=!0;h&&f>g;g++)if(h[g].apply(l[0],l[1])===!1&&a.stopOnFalse){b=!1;break}d=!1,h&&(i?i.length&&j(i.shift()):b?h=[]:k.disable())},k={add:function(){if(h){var c=h.length;!function g(b){n.each(b,function(b,c){var d=n.type(c);"function"===d?a.unique&&k.has(c)||h.push(c):c&&c.length&&"string"!==d&&g(c)})}(arguments),d?f=h.length:b&&(e=c,j(b))}return this},remove:function(){return h&&n.each(arguments,function(a,b){var c;while((c=n.inArray(b,h,c))>-1)h.splice(c,1),d&&(f>=c&&f--,g>=c&&g--)}),this},has:function(a){return a?n.inArray(a,h)>-1:!(!h||!h.length)},empty:function(){return h=[],f=0,this},disable:function(){return h=i=b=void 0,this},disabled:function(){return!h},lock:function(){return i=void 0,b||k.disable(),this},locked:function(){return!i},fireWith:function(a,b){return!h||c&&!i||(b=b||[],b=[a,b.slice?b.slice():b],d?i.push(b):j(b)),this},fire:function(){return k.fireWith(this,arguments),this},fired:function(){return!!c}};return k},n.extend({Deferred:function(a){var b=[["resolve","done",n.Callbacks("once memory"),"resolved"],["reject","fail",n.Callbacks("once memory"),"rejected"],["notify","progress",n.Callbacks("memory")]],c="pending",d={state:function(){return c},always:function(){return e.done(arguments).fail(arguments),this},then:function(){var a=arguments;return n.Deferred(function(c){n.each(b,function(b,f){var g=n.isFunction(a[b])&&a[b];e[f[1]](function(){var a=g&&g.apply(this,arguments);a&&n.isFunction(a.promise)?a.promise().done(c.resolve).fail(c.reject).progress(c.notify):c[f[0]+"With"](this===d?c.promise():this,g?[a]:arguments)})}),a=null}).promise()},promise:function(a){return null!=a?n.extend(a,d):d}},e={};return d.pipe=d.then,n.each(b,function(a,f){var g=f[2],h=f[3];d[f[1]]=g.add,h&&g.add(function(){c=h},b[1^a][2].disable,b[2][2].lock),e[f[0]]=function(){return e[f[0]+"With"](this===e?d:this,arguments),this},e[f[0]+"With"]=g.fireWith}),d.promise(e),a&&a.call(e,e),e},when:function(a){var b=0,c=d.call(arguments),e=c.length,f=1!==e||a&&n.isFunction(a.promise)?e:0,g=1===f?a:n.Deferred(),h=function(a,b,c){return function(e){b[a]=this,c[a]=arguments.length>1?d.call(arguments):e,c===i?g.notifyWith(b,c):--f||g.resolveWith(b,c)}},i,j,k;if(e>1)for(i=new Array(e),j=new Array(e),k=new Array(e);e>b;b++)c[b]&&n.isFunction(c[b].promise)?c[b].promise().done(h(b,k,c)).fail(g.reject).progress(h(b,j,i)):--f;return f||g.resolveWith(k,c),g.promise()}});var H;n.fn.ready=function(a){return n.ready.promise().done(a),this},n.extend({isReady:!1,readyWait:1,holdReady:function(a){a?n.readyWait++:n.ready(!0)},ready:function(a){(a===!0?--n.readyWait:n.isReady)||(n.isReady=!0,a!==!0&&--n.readyWait>0||(H.resolveWith(l,[n]),n.fn.triggerHandler&&(n(l).triggerHandler("ready"),n(l).off("ready"))))}});function I(){l.removeEventListener("DOMContentLoaded",I,!1),a.removeEventListener("load",I,!1),n.ready()}n.ready.promise=function(b){return H||(H=n.Deferred(),"complete"===l.readyState?setTimeout(n.ready):(l.addEventListener("DOMContentLoaded",I,!1),a.addEventListener("load",I,!1))),H.promise(b)},n.ready.promise();var J=n.access=function(a,b,c,d,e,f,g){var h=0,i=a.length,j=null==c;if("object"===n.type(c)){e=!0;for(h in c)n.access(a,b,h,c[h],!0,f,g)}else if(void 0!==d&&(e=!0,n.isFunction(d)||(g=!0),j&&(g?(b.call(a,d),b=null):(j=b,b=function(a,b,c){return j.call(n(a),c)})),b))for(;i>h;h++)b(a[h],c,g?d:d.call(a[h],h,b(a[h],c)));return e?a:j?b.call(a):i?b(a[0],c):f};n.acceptData=function(a){return 1===a.nodeType||9===a.nodeType||!+a.nodeType};function K(){Object.defineProperty(this.cache={},0,{get:function(){return{}}}),this.expando=n.expando+Math.random()}K.uid=1,K.accepts=n.acceptData,K.prototype={key:function(a){if(!K.accepts(a))return 0;var b={},c=a[this.expando];if(!c){c=K.uid++;try{b[this.expando]={value:c},Object.defineProperties(a,b)}catch(d){b[this.expando]=c,n.extend(a,b)}}return this.cache[c]||(this.cache[c]={}),c},set:function(a,b,c){var d,e=this.key(a),f=this.cache[e];if("string"==typeof b)f[b]=c;else if(n.isEmptyObject(f))n.extend(this.cache[e],b);else for(d in b)f[d]=b[d];return f},get:function(a,b){var c=this.cache[this.key(a)];return void 0===b?c:c[b]},access:function(a,b,c){var d;return void 0===b||b&&"string"==typeof b&&void 0===c?(d=this.get(a,b),void 0!==d?d:this.get(a,n.camelCase(b))):(this.set(a,b,c),void 0!==c?c:b)},remove:function(a,b){var c,d,e,f=this.key(a),g=this.cache[f];if(void 0===b)this.cache[f]={};else{n.isArray(b)?d=b.concat(b.map(n.camelCase)):(e=n.camelCase(b),b in g?d=[b,e]:(d=e,d=d in g?[d]:d.match(E)||[])),c=d.length;while(c--)delete g[d[c]]}},hasData:function(a){return!n.isEmptyObject(this.cache[a[this.expando]]||{})},discard:function(a){a[this.expando]&&delete this.cache[a[this.expando]]}};var L=new K,M=new K,N=/^(?:\{[\w\W]*\}|\[[\w\W]*\])$/,O=/([A-Z])/g;function P(a,b,c){var d;if(void 0===c&&1===a.nodeType)if(d="data-"+b.replace(O,"-$1").toLowerCase(),c=a.getAttribute(d),"string"==typeof c){try{c="true"===c?!0:"false"===c?!1:"null"===c?null:+c+""===c?+c:N.test(c)?n.parseJSON(c):c}catch(e){}M.set(a,b,c)}else c=void 0;return c}n.extend({hasData:function(a){return M.hasData(a)||L.hasData(a)},data:function(a,b,c){return M.access(a,b,c)},removeData:function(a,b){M.remove(a,b)
-},_data:function(a,b,c){return L.access(a,b,c)},_removeData:function(a,b){L.remove(a,b)}}),n.fn.extend({data:function(a,b){var c,d,e,f=this[0],g=f&&f.attributes;if(void 0===a){if(this.length&&(e=M.get(f),1===f.nodeType&&!L.get(f,"hasDataAttrs"))){c=g.length;while(c--)g[c]&&(d=g[c].name,0===d.indexOf("data-")&&(d=n.camelCase(d.slice(5)),P(f,d,e[d])));L.set(f,"hasDataAttrs",!0)}return e}return"object"==typeof a?this.each(function(){M.set(this,a)}):J(this,function(b){var c,d=n.camelCase(a);if(f&&void 0===b){if(c=M.get(f,a),void 0!==c)return c;if(c=M.get(f,d),void 0!==c)return c;if(c=P(f,d,void 0),void 0!==c)return c}else this.each(function(){var c=M.get(this,d);M.set(this,d,b),-1!==a.indexOf("-")&&void 0!==c&&M.set(this,a,b)})},null,b,arguments.length>1,null,!0)},removeData:function(a){return this.each(function(){M.remove(this,a)})}}),n.extend({queue:function(a,b,c){var d;return a?(b=(b||"fx")+"queue",d=L.get(a,b),c&&(!d||n.isArray(c)?d=L.access(a,b,n.makeArray(c)):d.push(c)),d||[]):void 0},dequeue:function(a,b){b=b||"fx";var c=n.queue(a,b),d=c.length,e=c.shift(),f=n._queueHooks(a,b),g=function(){n.dequeue(a,b)};"inprogress"===e&&(e=c.shift(),d--),e&&("fx"===b&&c.unshift("inprogress"),delete f.stop,e.call(a,g,f)),!d&&f&&f.empty.fire()},_queueHooks:function(a,b){var c=b+"queueHooks";return L.get(a,c)||L.access(a,c,{empty:n.Callbacks("once memory").add(function(){L.remove(a,[b+"queue",c])})})}}),n.fn.extend({queue:function(a,b){var c=2;return"string"!=typeof a&&(b=a,a="fx",c--),arguments.length<c?n.queue(this[0],a):void 0===b?this:this.each(function(){var c=n.queue(this,a,b);n._queueHooks(this,a),"fx"===a&&"inprogress"!==c[0]&&n.dequeue(this,a)})},dequeue:function(a){return this.each(function(){n.dequeue(this,a)})},clearQueue:function(a){return this.queue(a||"fx",[])},promise:function(a,b){var c,d=1,e=n.Deferred(),f=this,g=this.length,h=function(){--d||e.resolveWith(f,[f])};"string"!=typeof a&&(b=a,a=void 0),a=a||"fx";while(g--)c=L.get(f[g],a+"queueHooks"),c&&c.empty&&(d++,c.empty.add(h));return h(),e.promise(b)}});var Q=/[+-]?(?:\d*\.|)\d+(?:[eE][+-]?\d+|)/.source,R=["Top","Right","Bottom","Left"],S=function(a,b){return a=b||a,"none"===n.css(a,"display")||!n.contains(a.ownerDocument,a)},T=/^(?:checkbox|radio)$/i;!function(){var a=l.createDocumentFragment(),b=a.appendChild(l.createElement("div")),c=l.createElement("input");c.setAttribute("type","radio"),c.setAttribute("checked","checked"),c.setAttribute("name","t"),b.appendChild(c),k.checkClone=b.cloneNode(!0).cloneNode(!0).lastChild.checked,b.innerHTML="<textarea>x</textarea>",k.noCloneChecked=!!b.cloneNode(!0).lastChild.defaultValue}();var U="undefined";k.focusinBubbles="onfocusin"in a;var V=/^key/,W=/^(?:mouse|pointer|contextmenu)|click/,X=/^(?:focusinfocus|focusoutblur)$/,Y=/^([^.]*)(?:\.(.+)|)$/;function Z(){return!0}function $(){return!1}function _(){try{return l.activeElement}catch(a){}}n.event={global:{},add:function(a,b,c,d,e){var f,g,h,i,j,k,l,m,o,p,q,r=L.get(a);if(r){c.handler&&(f=c,c=f.handler,e=f.selector),c.guid||(c.guid=n.guid++),(i=r.events)||(i=r.events={}),(g=r.handle)||(g=r.handle=function(b){return typeof n!==U&&n.event.triggered!==b.type?n.event.dispatch.apply(a,arguments):void 0}),b=(b||"").match(E)||[""],j=b.length;while(j--)h=Y.exec(b[j])||[],o=q=h[1],p=(h[2]||"").split(".").sort(),o&&(l=n.event.special[o]||{},o=(e?l.delegateType:l.bindType)||o,l=n.event.special[o]||{},k=n.extend({type:o,origType:q,data:d,handler:c,guid:c.guid,selector:e,needsContext:e&&n.expr.match.needsContext.test(e),namespace:p.join(".")},f),(m=i[o])||(m=i[o]=[],m.delegateCount=0,l.setup&&l.setup.call(a,d,p,g)!==!1||a.addEventListener&&a.addEventListener(o,g,!1)),l.add&&(l.add.call(a,k),k.handler.guid||(k.handler.guid=c.guid)),e?m.splice(m.delegateCount++,0,k):m.push(k),n.event.global[o]=!0)}},remove:function(a,b,c,d,e){var f,g,h,i,j,k,l,m,o,p,q,r=L.hasData(a)&&L.get(a);if(r&&(i=r.events)){b=(b||"").match(E)||[""],j=b.length;while(j--)if(h=Y.exec(b[j])||[],o=q=h[1],p=(h[2]||"").split(".").sort(),o){l=n.event.special[o]||{},o=(d?l.delegateType:l.bindType)||o,m=i[o]||[],h=h[2]&&new RegExp("(^|\\.)"+p.join("\\.(?:.*\\.|)")+"(\\.|$)"),g=f=m.length;while(f--)k=m[f],!e&&q!==k.origType||c&&c.guid!==k.guid||h&&!h.test(k.namespace)||d&&d!==k.selector&&("**"!==d||!k.selector)||(m.splice(f,1),k.selector&&m.delegateCount--,l.remove&&l.remove.call(a,k));g&&!m.length&&(l.teardown&&l.teardown.call(a,p,r.handle)!==!1||n.removeEvent(a,o,r.handle),delete i[o])}else for(o in i)n.event.remove(a,o+b[j],c,d,!0);n.isEmptyObject(i)&&(delete r.handle,L.remove(a,"events"))}},trigger:function(b,c,d,e){var f,g,h,i,k,m,o,p=[d||l],q=j.call(b,"type")?b.type:b,r=j.call(b,"namespace")?b.namespace.split("."):[];if(g=h=d=d||l,3!==d.nodeType&&8!==d.nodeType&&!X.test(q+n.event.triggered)&&(q.indexOf(".")>=0&&(r=q.split("."),q=r.shift(),r.sort()),k=q.indexOf(":")<0&&"on"+q,b=b[n.expando]?b:new n.Event(q,"object"==typeof b&&b),b.isTrigger=e?2:3,b.namespace=r.join("."),b.namespace_re=b.namespace?new RegExp("(^|\\.)"+r.join("\\.(?:.*\\.|)")+"(\\.|$)"):null,b.result=void 0,b.target||(b.target=d),c=null==c?[b]:n.makeArray(c,[b]),o=n.event.special[q]||{},e||!o.trigger||o.trigger.apply(d,c)!==!1)){if(!e&&!o.noBubble&&!n.isWindow(d)){for(i=o.delegateType||q,X.test(i+q)||(g=g.parentNode);g;g=g.parentNode)p.push(g),h=g;h===(d.ownerDocument||l)&&p.push(h.defaultView||h.parentWindow||a)}f=0;while((g=p[f++])&&!b.isPropagationStopped())b.type=f>1?i:o.bindType||q,m=(L.get(g,"events")||{})[b.type]&&L.get(g,"handle"),m&&m.apply(g,c),m=k&&g[k],m&&m.apply&&n.acceptData(g)&&(b.result=m.apply(g,c),b.result===!1&&b.preventDefault());return b.type=q,e||b.isDefaultPrevented()||o._default&&o._default.apply(p.pop(),c)!==!1||!n.acceptData(d)||k&&n.isFunction(d[q])&&!n.isWindow(d)&&(h=d[k],h&&(d[k]=null),n.event.triggered=q,d[q](),n.event.triggered=void 0,h&&(d[k]=h)),b.result}},dispatch:function(a){a=n.event.fix(a);var b,c,e,f,g,h=[],i=d.call(arguments),j=(L.get(this,"events")||{})[a.type]||[],k=n.event.special[a.type]||{};if(i[0]=a,a.delegateTarget=this,!k.preDispatch||k.preDispatch.call(this,a)!==!1){h=n.event.handlers.call(this,a,j),b=0;while((f=h[b++])&&!a.isPropagationStopped()){a.currentTarget=f.elem,c=0;while((g=f.handlers[c++])&&!a.isImmediatePropagationStopped())(!a.namespace_re||a.namespace_re.test(g.namespace))&&(a.handleObj=g,a.data=g.data,e=((n.event.special[g.origType]||{}).handle||g.handler).apply(f.elem,i),void 0!==e&&(a.result=e)===!1&&(a.preventDefault(),a.stopPropagation()))}return k.postDispatch&&k.postDispatch.call(this,a),a.result}},handlers:function(a,b){var c,d,e,f,g=[],h=b.delegateCount,i=a.target;if(h&&i.nodeType&&(!a.button||"click"!==a.type))for(;i!==this;i=i.parentNode||this)if(i.disabled!==!0||"click"!==a.type){for(d=[],c=0;h>c;c++)f=b[c],e=f.selector+" ",void 0===d[e]&&(d[e]=f.needsContext?n(e,this).index(i)>=0:n.find(e,this,null,[i]).length),d[e]&&d.push(f);d.length&&g.push({elem:i,handlers:d})}return h<b.length&&g.push({elem:this,handlers:b.slice(h)}),g},props:"altKey bubbles cancelable ctrlKey currentTarget eventPhase metaKey relatedTarget shiftKey target timeStamp view which".split(" "),fixHooks:{},keyHooks:{props:"char charCode key keyCode".split(" "),filter:function(a,b){return null==a.which&&(a.which=null!=b.charCode?b.charCode:b.keyCode),a}},mouseHooks:{props:"button buttons clientX clientY offsetX offsetY pageX pageY screenX screenY toElement".split(" "),filter:function(a,b){var c,d,e,f=b.button;return null==a.pageX&&null!=b.clientX&&(c=a.target.ownerDocument||l,d=c.documentElement,e=c.body,a.pageX=b.clientX+(d&&d.scrollLeft||e&&e.scrollLeft||0)-(d&&d.clientLeft||e&&e.clientLeft||0),a.pageY=b.clientY+(d&&d.scrollTop||e&&e.scrollTop||0)-(d&&d.clientTop||e&&e.clientTop||0)),a.which||void 0===f||(a.which=1&f?1:2&f?3:4&f?2:0),a}},fix:function(a){if(a[n.expando])return a;var b,c,d,e=a.type,f=a,g=this.fixHooks[e];g||(this.fixHooks[e]=g=W.test(e)?this.mouseHooks:V.test(e)?this.keyHooks:{}),d=g.props?this.props.concat(g.props):this.props,a=new n.Event(f),b=d.length;while(b--)c=d[b],a[c]=f[c];return a.target||(a.target=l),3===a.target.nodeType&&(a.target=a.target.parentNode),g.filter?g.filter(a,f):a},special:{load:{noBubble:!0},focus:{trigger:function(){return this!==_()&&this.focus?(this.focus(),!1):void 0},delegateType:"focusin"},blur:{trigger:function(){return this===_()&&this.blur?(this.blur(),!1):void 0},delegateType:"focusout"},click:{trigger:function(){return"checkbox"===this.type&&this.click&&n.nodeName(this,"input")?(this.click(),!1):void 0},_default:function(a){return n.nodeName(a.target,"a")}},beforeunload:{postDispatch:function(a){void 0!==a.result&&a.originalEvent&&(a.originalEvent.returnValue=a.result)}}},simulate:function(a,b,c,d){var e=n.extend(new n.Event,c,{type:a,isSimulated:!0,originalEvent:{}});d?n.event.trigger(e,null,b):n.event.dispatch.call(b,e),e.isDefaultPrevented()&&c.preventDefault()}},n.removeEvent=function(a,b,c){a.removeEventListener&&a.removeEventListener(b,c,!1)},n.Event=function(a,b){return this instanceof n.Event?(a&&a.type?(this.originalEvent=a,this.type=a.type,this.isDefaultPrevented=a.defaultPrevented||void 0===a.defaultPrevented&&a.returnValue===!1?Z:$):this.type=a,b&&n.extend(this,b),this.timeStamp=a&&a.timeStamp||n.now(),void(this[n.expando]=!0)):new n.Event(a,b)},n.Event.prototype={isDefaultPrevented:$,isPropagationStopped:$,isImmediatePropagationStopped:$,preventDefault:function(){var a=this.originalEvent;this.isDefaultPrevented=Z,a&&a.preventDefault&&a.preventDefault()},stopPropagation:function(){var a=this.originalEvent;this.isPropagationStopped=Z,a&&a.stopPropagation&&a.stopPropagation()},stopImmediatePropagation:function(){var a=this.originalEvent;this.isImmediatePropagationStopped=Z,a&&a.stopImmediatePropagation&&a.stopImmediatePropagation(),this.stopPropagation()}},n.each({mouseenter:"mouseover",mouseleave:"mouseout",pointerenter:"pointerover",pointerleave:"pointerout"},function(a,b){n.event.special[a]={delegateType:b,bindType:b,handle:function(a){var c,d=this,e=a.relatedTarget,f=a.handleObj;return(!e||e!==d&&!n.contains(d,e))&&(a.type=f.origType,c=f.handler.apply(this,arguments),a.type=b),c}}}),k.focusinBubbles||n.each({focus:"focusin",blur:"focusout"},function(a,b){var c=function(a){n.event.simulate(b,a.target,n.event.fix(a),!0)};n.event.special[b]={setup:function(){var d=this.ownerDocument||this,e=L.access(d,b);e||d.addEventListener(a,c,!0),L.access(d,b,(e||0)+1)},teardown:function(){var d=this.ownerDocument||this,e=L.access(d,b)-1;e?L.access(d,b,e):(d.removeEventListener(a,c,!0),L.remove(d,b))}}}),n.fn.extend({on:function(a,b,c,d,e){var f,g;if("object"==typeof a){"string"!=typeof b&&(c=c||b,b=void 0);for(g in a)this.on(g,b,c,a[g],e);return this}if(null==c&&null==d?(d=b,c=b=void 0):null==d&&("string"==typeof b?(d=c,c=void 0):(d=c,c=b,b=void 0)),d===!1)d=$;else if(!d)return this;return 1===e&&(f=d,d=function(a){return n().off(a),f.apply(this,arguments)},d.guid=f.guid||(f.guid=n.guid++)),this.each(function(){n.event.add(this,a,d,c,b)})},one:function(a,b,c,d){return this.on(a,b,c,d,1)},off:function(a,b,c){var d,e;if(a&&a.preventDefault&&a.handleObj)return d=a.handleObj,n(a.delegateTarget).off(d.namespace?d.origType+"."+d.namespace:d.origType,d.selector,d.handler),this;if("object"==typeof a){for(e in a)this.off(e,b,a[e]);return this}return(b===!1||"function"==typeof b)&&(c=b,b=void 0),c===!1&&(c=$),this.each(function(){n.event.remove(this,a,c,b)})},trigger:function(a,b){return this.each(function(){n.event.trigger(a,b,this)})},triggerHandler:function(a,b){var c=this[0];return c?n.event.trigger(a,b,c,!0):void 0}});var ab=/<(?!area|br|col|embed|hr|img|input|link|meta|param)(([\w:]+)[^>]*)\/>/gi,bb=/<([\w:]+)/,cb=/<|&#?\w+;/,db=/<(?:script|style|link)/i,eb=/checked\s*(?:[^=]|=\s*.checked.)/i,fb=/^$|\/(?:java|ecma)script/i,gb=/^true\/(.*)/,hb=/^\s*<!(?:\[CDATA\[|--)|(?:\]\]|--)>\s*$/g,ib={option:[1,"<select multiple='multiple'>","</select>"],thead:[1,"<table>","</table>"],col:[2,"<table><colgroup>","</colgroup></table>"],tr:[2,"<table><tbody>","</tbody></table>"],td:[3,"<table><tbody><tr>","</tr></tbody></table>"],_default:[0,"",""]};ib.optgroup=ib.option,ib.tbody=ib.tfoot=ib.colgroup=ib.caption=ib.thead,ib.th=ib.td;function jb(a,b){return n.nodeName(a,"table")&&n.nodeName(11!==b.nodeType?b:b.firstChild,"tr")?a.getElementsByTagName("tbody")[0]||a.appendChild(a.ownerDocument.createElement("tbody")):a}function kb(a){return a.type=(null!==a.getAttribute("type"))+"/"+a.type,a}function lb(a){var b=gb.exec(a.type);return b?a.type=b[1]:a.removeAttribute("type"),a}function mb(a,b){for(var c=0,d=a.length;d>c;c++)L.set(a[c],"globalEval",!b||L.get(b[c],"globalEval"))}function nb(a,b){var c,d,e,f,g,h,i,j;if(1===b.nodeType){if(L.hasData(a)&&(f=L.access(a),g=L.set(b,f),j=f.events)){delete g.handle,g.events={};for(e in j)for(c=0,d=j[e].length;d>c;c++)n.event.add(b,e,j[e][c])}M.hasData(a)&&(h=M.access(a),i=n.extend({},h),M.set(b,i))}}function ob(a,b){var c=a.getElementsByTagName?a.getElementsByTagName(b||"*"):a.querySelectorAll?a.querySelectorAll(b||"*"):[];return void 0===b||b&&n.nodeName(a,b)?n.merge([a],c):c}function pb(a,b){var c=b.nodeName.toLowerCase();"input"===c&&T.test(a.type)?b.checked=a.checked:("input"===c||"textarea"===c)&&(b.defaultValue=a.defaultValue)}n.extend({clone:function(a,b,c){var d,e,f,g,h=a.cloneNode(!0),i=n.contains(a.ownerDocument,a);if(!(k.noCloneChecked||1!==a.nodeType&&11!==a.nodeType||n.isXMLDoc(a)))for(g=ob(h),f=ob(a),d=0,e=f.length;e>d;d++)pb(f[d],g[d]);if(b)if(c)for(f=f||ob(a),g=g||ob(h),d=0,e=f.length;e>d;d++)nb(f[d],g[d]);else nb(a,h);return g=ob(h,"script"),g.length>0&&mb(g,!i&&ob(a,"script")),h},buildFragment:function(a,b,c,d){for(var e,f,g,h,i,j,k=b.createDocumentFragment(),l=[],m=0,o=a.length;o>m;m++)if(e=a[m],e||0===e)if("object"===n.type(e))n.merge(l,e.nodeType?[e]:e);else if(cb.test(e)){f=f||k.appendChild(b.createElement("div")),g=(bb.exec(e)||["",""])[1].toLowerCase(),h=ib[g]||ib._default,f.innerHTML=h[1]+e.replace(ab,"<$1></$2>")+h[2],j=h[0];while(j--)f=f.lastChild;n.merge(l,f.childNodes),f=k.firstChild,f.textContent=""}else l.push(b.createTextNode(e));k.textContent="",m=0;while(e=l[m++])if((!d||-1===n.inArray(e,d))&&(i=n.contains(e.ownerDocument,e),f=ob(k.appendChild(e),"script"),i&&mb(f),c)){j=0;while(e=f[j++])fb.test(e.type||"")&&c.push(e)}return k},cleanData:function(a){for(var b,c,d,e,f=n.event.special,g=0;void 0!==(c=a[g]);g++){if(n.acceptData(c)&&(e=c[L.expando],e&&(b=L.cache[e]))){if(b.events)for(d in b.events)f[d]?n.event.remove(c,d):n.removeEvent(c,d,b.handle);L.cache[e]&&delete L.cache[e]}delete M.cache[c[M.expando]]}}}),n.fn.extend({text:function(a){return J(this,function(a){return void 0===a?n.text(this):this.empty().each(function(){(1===this.nodeType||11===this.nodeType||9===this.nodeType)&&(this.textContent=a)})},null,a,arguments.length)},append:function(){return this.domManip(arguments,function(a){if(1===this.nodeType||11===this.nodeType||9===this.nodeType){var b=jb(this,a);b.appendChild(a)}})},prepend:function(){return this.domManip(arguments,function(a){if(1===this.nodeType||11===this.nodeType||9===this.nodeType){var b=jb(this,a);b.insertBefore(a,b.firstChild)}})},before:function(){return this.domManip(arguments,function(a){this.parentNode&&this.parentNode.insertBefore(a,this)})},after:function(){return this.domManip(arguments,function(a){this.parentNode&&this.parentNode.insertBefore(a,this.nextSibling)})},remove:function(a,b){for(var c,d=a?n.filter(a,this):this,e=0;null!=(c=d[e]);e++)b||1!==c.nodeType||n.cleanData(ob(c)),c.parentNode&&(b&&n.contains(c.ownerDocument,c)&&mb(ob(c,"script")),c.parentNode.removeChild(c));return this},empty:function(){for(var a,b=0;null!=(a=this[b]);b++)1===a.nodeType&&(n.cleanData(ob(a,!1)),a.textContent="");return this},clone:function(a,b){return a=null==a?!1:a,b=null==b?a:b,this.map(function(){return n.clone(this,a,b)})},html:function(a){return J(this,function(a){var b=this[0]||{},c=0,d=this.length;if(void 0===a&&1===b.nodeType)return b.innerHTML;if("string"==typeof a&&!db.test(a)&&!ib[(bb.exec(a)||["",""])[1].toLowerCase()]){a=a.replace(ab,"<$1></$2>");try{for(;d>c;c++)b=this[c]||{},1===b.nodeType&&(n.cleanData(ob(b,!1)),b.innerHTML=a);b=0}catch(e){}}b&&this.empty().append(a)},null,a,arguments.length)},replaceWith:function(){var a=arguments[0];return this.domManip(arguments,function(b){a=this.parentNode,n.cleanData(ob(this)),a&&a.replaceChild(b,this)}),a&&(a.length||a.nodeType)?this:this.remove()},detach:function(a){return this.remove(a,!0)},domManip:function(a,b){a=e.apply([],a);var c,d,f,g,h,i,j=0,l=this.length,m=this,o=l-1,p=a[0],q=n.isFunction(p);if(q||l>1&&"string"==typeof p&&!k.checkClone&&eb.test(p))return this.each(function(c){var d=m.eq(c);q&&(a[0]=p.call(this,c,d.html())),d.domManip(a,b)});if(l&&(c=n.buildFragment(a,this[0].ownerDocument,!1,this),d=c.firstChild,1===c.childNodes.length&&(c=d),d)){for(f=n.map(ob(c,"script"),kb),g=f.length;l>j;j++)h=c,j!==o&&(h=n.clone(h,!0,!0),g&&n.merge(f,ob(h,"script"))),b.call(this[j],h,j);if(g)for(i=f[f.length-1].ownerDocument,n.map(f,lb),j=0;g>j;j++)h=f[j],fb.test(h.type||"")&&!L.access(h,"globalEval")&&n.contains(i,h)&&(h.src?n._evalUrl&&n._evalUrl(h.src):n.globalEval(h.textContent.replace(hb,"")))}return this}}),n.each({appendTo:"append",prependTo:"prepend",insertBefore:"before",insertAfter:"after",replaceAll:"replaceWith"},function(a,b){n.fn[a]=function(a){for(var c,d=[],e=n(a),g=e.length-1,h=0;g>=h;h++)c=h===g?this:this.clone(!0),n(e[h])[b](c),f.apply(d,c.get());return this.pushStack(d)}});var qb,rb={};function sb(b,c){var d,e=n(c.createElement(b)).appendTo(c.body),f=a.getDefaultComputedStyle&&(d=a.getDefaultComputedStyle(e[0]))?d.display:n.css(e[0],"display");return e.detach(),f}function tb(a){var b=l,c=rb[a];return c||(c=sb(a,b),"none"!==c&&c||(qb=(qb||n("<iframe frameborder='0' width='0' height='0'/>")).appendTo(b.documentElement),b=qb[0].contentDocument,b.write(),b.close(),c=sb(a,b),qb.detach()),rb[a]=c),c}var ub=/^margin/,vb=new RegExp("^("+Q+")(?!px)[a-z%]+$","i"),wb=function(a){return a.ownerDocument.defaultView.getComputedStyle(a,null)};function xb(a,b,c){var d,e,f,g,h=a.style;return c=c||wb(a),c&&(g=c.getPropertyValue(b)||c[b]),c&&(""!==g||n.contains(a.ownerDocument,a)||(g=n.style(a,b)),vb.test(g)&&ub.test(b)&&(d=h.width,e=h.minWidth,f=h.maxWidth,h.minWidth=h.maxWidth=h.width=g,g=c.width,h.width=d,h.minWidth=e,h.maxWidth=f)),void 0!==g?g+"":g}function yb(a,b){return{get:function(){return a()?void delete this.get:(this.get=b).apply(this,arguments)}}}!function(){var b,c,d=l.documentElement,e=l.createElement("div"),f=l.createElement("div");if(f.style){f.style.backgroundClip="content-box",f.cloneNode(!0).style.backgroundClip="",k.clearCloneStyle="content-box"===f.style.backgroundClip,e.style.cssText="border:0;width:0;height:0;top:0;left:-9999px;margin-top:1px;position:absolute",e.appendChild(f);function g(){f.style.cssText="-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box;display:block;margin-top:1%;top:1%;border:1px;padding:1px;width:4px;position:absolute",f.innerHTML="",d.appendChild(e);var g=a.getComputedStyle(f,null);b="1%"!==g.top,c="4px"===g.width,d.removeChild(e)}a.getComputedStyle&&n.extend(k,{pixelPosition:function(){return g(),b},boxSizingReliable:function(){return null==c&&g(),c},reliableMarginRight:function(){var b,c=f.appendChild(l.createElement("div"));return c.style.cssText=f.style.cssText="-webkit-box-sizing:content-box;-moz-box-sizing:content-box;box-sizing:content-box;display:block;margin:0;border:0;padding:0",c.style.marginRight=c.style.width="0",f.style.width="1px",d.appendChild(e),b=!parseFloat(a.getComputedStyle(c,null).marginRight),d.removeChild(e),b}})}}(),n.swap=function(a,b,c,d){var e,f,g={};for(f in b)g[f]=a.style[f],a.style[f]=b[f];e=c.apply(a,d||[]);for(f in b)a.style[f]=g[f];return e};var zb=/^(none|table(?!-c[ea]).+)/,Ab=new RegExp("^("+Q+")(.*)$","i"),Bb=new RegExp("^([+-])=("+Q+")","i"),Cb={position:"absolute",visibility:"hidden",display:"block"},Db={letterSpacing:"0",fontWeight:"400"},Eb=["Webkit","O","Moz","ms"];function Fb(a,b){if(b in a)return b;var c=b[0].toUpperCase()+b.slice(1),d=b,e=Eb.length;while(e--)if(b=Eb[e]+c,b in a)return b;return d}function Gb(a,b,c){var d=Ab.exec(b);return d?Math.max(0,d[1]-(c||0))+(d[2]||"px"):b}function Hb(a,b,c,d,e){for(var f=c===(d?"border":"content")?4:"width"===b?1:0,g=0;4>f;f+=2)"margin"===c&&(g+=n.css(a,c+R[f],!0,e)),d?("content"===c&&(g-=n.css(a,"padding"+R[f],!0,e)),"margin"!==c&&(g-=n.css(a,"border"+R[f]+"Width",!0,e))):(g+=n.css(a,"padding"+R[f],!0,e),"padding"!==c&&(g+=n.css(a,"border"+R[f]+"Width",!0,e)));return g}function Ib(a,b,c){var d=!0,e="width"===b?a.offsetWidth:a.offsetHeight,f=wb(a),g="border-box"===n.css(a,"boxSizing",!1,f);if(0>=e||null==e){if(e=xb(a,b,f),(0>e||null==e)&&(e=a.style[b]),vb.test(e))return e;d=g&&(k.boxSizingReliable()||e===a.style[b]),e=parseFloat(e)||0}return e+Hb(a,b,c||(g?"border":"content"),d,f)+"px"}function Jb(a,b){for(var c,d,e,f=[],g=0,h=a.length;h>g;g++)d=a[g],d.style&&(f[g]=L.get(d,"olddisplay"),c=d.style.display,b?(f[g]||"none"!==c||(d.style.display=""),""===d.style.display&&S(d)&&(f[g]=L.access(d,"olddisplay",tb(d.nodeName)))):(e=S(d),"none"===c&&e||L.set(d,"olddisplay",e?c:n.css(d,"display"))));for(g=0;h>g;g++)d=a[g],d.style&&(b&&"none"!==d.style.display&&""!==d.style.display||(d.style.display=b?f[g]||"":"none"));return a}n.extend({cssHooks:{opacity:{get:function(a,b){if(b){var c=xb(a,"opacity");return""===c?"1":c}}}},cssNumber:{columnCount:!0,fillOpacity:!0,flexGrow:!0,flexShrink:!0,fontWeight:!0,lineHeight:!0,opacity:!0,order:!0,orphans:!0,widows:!0,zIndex:!0,zoom:!0},cssProps:{"float":"cssFloat"},style:function(a,b,c,d){if(a&&3!==a.nodeType&&8!==a.nodeType&&a.style){var e,f,g,h=n.camelCase(b),i=a.style;return b=n.cssProps[h]||(n.cssProps[h]=Fb(i,h)),g=n.cssHooks[b]||n.cssHooks[h],void 0===c?g&&"get"in g&&void 0!==(e=g.get(a,!1,d))?e:i[b]:(f=typeof c,"string"===f&&(e=Bb.exec(c))&&(c=(e[1]+1)*e[2]+parseFloat(n.css(a,b)),f="number"),null!=c&&c===c&&("number"!==f||n.cssNumber[h]||(c+="px"),k.clearCloneStyle||""!==c||0!==b.indexOf("background")||(i[b]="inherit"),g&&"set"in g&&void 0===(c=g.set(a,c,d))||(i[b]=c)),void 0)}},css:function(a,b,c,d){var e,f,g,h=n.camelCase(b);return b=n.cssProps[h]||(n.cssProps[h]=Fb(a.style,h)),g=n.cssHooks[b]||n.cssHooks[h],g&&"get"in g&&(e=g.get(a,!0,c)),void 0===e&&(e=xb(a,b,d)),"normal"===e&&b in Db&&(e=Db[b]),""===c||c?(f=parseFloat(e),c===!0||n.isNumeric(f)?f||0:e):e}}),n.each(["height","width"],function(a,b){n.cssHooks[b]={get:function(a,c,d){return c?zb.test(n.css(a,"display"))&&0===a.offsetWidth?n.swap(a,Cb,function(){return Ib(a,b,d)}):Ib(a,b,d):void 0},set:function(a,c,d){var e=d&&wb(a);return Gb(a,c,d?Hb(a,b,d,"border-box"===n.css(a,"boxSizing",!1,e),e):0)}}}),n.cssHooks.marginRight=yb(k.reliableMarginRight,function(a,b){return b?n.swap(a,{display:"inline-block"},xb,[a,"marginRight"]):void 0}),n.each({margin:"",padding:"",border:"Width"},function(a,b){n.cssHooks[a+b]={expand:function(c){for(var d=0,e={},f="string"==typeof c?c.split(" "):[c];4>d;d++)e[a+R[d]+b]=f[d]||f[d-2]||f[0];return e}},ub.test(a)||(n.cssHooks[a+b].set=Gb)}),n.fn.extend({css:function(a,b){return J(this,function(a,b,c){var d,e,f={},g=0;if(n.isArray(b)){for(d=wb(a),e=b.length;e>g;g++)f[b[g]]=n.css(a,b[g],!1,d);return f}return void 0!==c?n.style(a,b,c):n.css(a,b)},a,b,arguments.length>1)},show:function(){return Jb(this,!0)},hide:function(){return Jb(this)},toggle:function(a){return"boolean"==typeof a?a?this.show():this.hide():this.each(function(){S(this)?n(this).show():n(this).hide()})}});function Kb(a,b,c,d,e){return new Kb.prototype.init(a,b,c,d,e)}n.Tween=Kb,Kb.prototype={constructor:Kb,init:function(a,b,c,d,e,f){this.elem=a,this.prop=c,this.easing=e||"swing",this.options=b,this.start=this.now=this.cur(),this.end=d,this.unit=f||(n.cssNumber[c]?"":"px")},cur:function(){var a=Kb.propHooks[this.prop];return a&&a.get?a.get(this):Kb.propHooks._default.get(this)},run:function(a){var b,c=Kb.propHooks[this.prop];return this.pos=b=this.options.duration?n.easing[this.easing](a,this.options.duration*a,0,1,this.options.duration):a,this.now=(this.end-this.start)*b+this.start,this.options.step&&this.options.step.call(this.elem,this.now,this),c&&c.set?c.set(this):Kb.propHooks._default.set(this),this}},Kb.prototype.init.prototype=Kb.prototype,Kb.propHooks={_default:{get:function(a){var b;return null==a.elem[a.prop]||a.elem.style&&null!=a.elem.style[a.prop]?(b=n.css(a.elem,a.prop,""),b&&"auto"!==b?b:0):a.elem[a.prop]},set:function(a){n.fx.step[a.prop]?n.fx.step[a.prop](a):a.elem.style&&(null!=a.elem.style[n.cssProps[a.prop]]||n.cssHooks[a.prop])?n.style(a.elem,a.prop,a.now+a.unit):a.elem[a.prop]=a.now}}},Kb.propHooks.scrollTop=Kb.propHooks.scrollLeft={set:function(a){a.elem.nodeType&&a.elem.parentNode&&(a.elem[a.prop]=a.now)}},n.easing={linear:function(a){return a},swing:function(a){return.5-Math.cos(a*Math.PI)/2}},n.fx=Kb.prototype.init,n.fx.step={};var Lb,Mb,Nb=/^(?:toggle|show|hide)$/,Ob=new RegExp("^(?:([+-])=|)("+Q+")([a-z%]*)$","i"),Pb=/queueHooks$/,Qb=[Vb],Rb={"*":[function(a,b){var c=this.createTween(a,b),d=c.cur(),e=Ob.exec(b),f=e&&e[3]||(n.cssNumber[a]?"":"px"),g=(n.cssNumber[a]||"px"!==f&&+d)&&Ob.exec(n.css(c.elem,a)),h=1,i=20;if(g&&g[3]!==f){f=f||g[3],e=e||[],g=+d||1;do h=h||".5",g/=h,n.style(c.elem,a,g+f);while(h!==(h=c.cur()/d)&&1!==h&&--i)}return e&&(g=c.start=+g||+d||0,c.unit=f,c.end=e[1]?g+(e[1]+1)*e[2]:+e[2]),c}]};function Sb(){return setTimeout(function(){Lb=void 0}),Lb=n.now()}function Tb(a,b){var c,d=0,e={height:a};for(b=b?1:0;4>d;d+=2-b)c=R[d],e["margin"+c]=e["padding"+c]=a;return b&&(e.opacity=e.width=a),e}function Ub(a,b,c){for(var d,e=(Rb[b]||[]).concat(Rb["*"]),f=0,g=e.length;g>f;f++)if(d=e[f].call(c,b,a))return d}function Vb(a,b,c){var d,e,f,g,h,i,j,k,l=this,m={},o=a.style,p=a.nodeType&&S(a),q=L.get(a,"fxshow");c.queue||(h=n._queueHooks(a,"fx"),null==h.unqueued&&(h.unqueued=0,i=h.empty.fire,h.empty.fire=function(){h.unqueued||i()}),h.unqueued++,l.always(function(){l.always(function(){h.unqueued--,n.queue(a,"fx").length||h.empty.fire()})})),1===a.nodeType&&("height"in b||"width"in b)&&(c.overflow=[o.overflow,o.overflowX,o.overflowY],j=n.css(a,"display"),k="none"===j?L.get(a,"olddisplay")||tb(a.nodeName):j,"inline"===k&&"none"===n.css(a,"float")&&(o.display="inline-block")),c.overflow&&(o.overflow="hidden",l.always(function(){o.overflow=c.overflow[0],o.overflowX=c.overflow[1],o.overflowY=c.overflow[2]}));for(d in b)if(e=b[d],Nb.exec(e)){if(delete b[d],f=f||"toggle"===e,e===(p?"hide":"show")){if("show"!==e||!q||void 0===q[d])continue;p=!0}m[d]=q&&q[d]||n.style(a,d)}else j=void 0;if(n.isEmptyObject(m))"inline"===("none"===j?tb(a.nodeName):j)&&(o.display=j);else{q?"hidden"in q&&(p=q.hidden):q=L.access(a,"fxshow",{}),f&&(q.hidden=!p),p?n(a).show():l.done(function(){n(a).hide()}),l.done(function(){var b;L.remove(a,"fxshow");for(b in m)n.style(a,b,m[b])});for(d in m)g=Ub(p?q[d]:0,d,l),d in q||(q[d]=g.start,p&&(g.end=g.start,g.start="width"===d||"height"===d?1:0))}}function Wb(a,b){var c,d,e,f,g;for(c in a)if(d=n.camelCase(c),e=b[d],f=a[c],n.isArray(f)&&(e=f[1],f=a[c]=f[0]),c!==d&&(a[d]=f,delete a[c]),g=n.cssHooks[d],g&&"expand"in g){f=g.expand(f),delete a[d];for(c in f)c in a||(a[c]=f[c],b[c]=e)}else b[d]=e}function Xb(a,b,c){var d,e,f=0,g=Qb.length,h=n.Deferred().always(function(){delete i.elem}),i=function(){if(e)return!1;for(var b=Lb||Sb(),c=Math.max(0,j.startTime+j.duration-b),d=c/j.duration||0,f=1-d,g=0,i=j.tweens.length;i>g;g++)j.tweens[g].run(f);return h.notifyWith(a,[j,f,c]),1>f&&i?c:(h.resolveWith(a,[j]),!1)},j=h.promise({elem:a,props:n.extend({},b),opts:n.extend(!0,{specialEasing:{}},c),originalProperties:b,originalOptions:c,startTime:Lb||Sb(),duration:c.duration,tweens:[],createTween:function(b,c){var d=n.Tween(a,j.opts,b,c,j.opts.specialEasing[b]||j.opts.easing);return j.tweens.push(d),d},stop:function(b){var c=0,d=b?j.tweens.length:0;if(e)return this;for(e=!0;d>c;c++)j.tweens[c].run(1);return b?h.resolveWith(a,[j,b]):h.rejectWith(a,[j,b]),this}}),k=j.props;for(Wb(k,j.opts.specialEasing);g>f;f++)if(d=Qb[f].call(j,a,k,j.opts))return d;return n.map(k,Ub,j),n.isFunction(j.opts.start)&&j.opts.start.call(a,j),n.fx.timer(n.extend(i,{elem:a,anim:j,queue:j.opts.queue})),j.progress(j.opts.progress).done(j.opts.done,j.opts.complete).fail(j.opts.fail).always(j.opts.always)}n.Animation=n.extend(Xb,{tweener:function(a,b){n.isFunction(a)?(b=a,a=["*"]):a=a.split(" ");for(var c,d=0,e=a.length;e>d;d++)c=a[d],Rb[c]=Rb[c]||[],Rb[c].unshift(b)},prefilter:function(a,b){b?Qb.unshift(a):Qb.push(a)}}),n.speed=function(a,b,c){var d=a&&"object"==typeof a?n.extend({},a):{complete:c||!c&&b||n.isFunction(a)&&a,duration:a,easing:c&&b||b&&!n.isFunction(b)&&b};return d.duration=n.fx.off?0:"number"==typeof d.duration?d.duration:d.duration in n.fx.speeds?n.fx.speeds[d.duration]:n.fx.speeds._default,(null==d.queue||d.queue===!0)&&(d.queue="fx"),d.old=d.complete,d.complete=function(){n.isFunction(d.old)&&d.old.call(this),d.queue&&n.dequeue(this,d.queue)},d},n.fn.extend({fadeTo:function(a,b,c,d){return this.filter(S).css("opacity",0).show().end().animate({opacity:b},a,c,d)},animate:function(a,b,c,d){var e=n.isEmptyObject(a),f=n.speed(b,c,d),g=function(){var b=Xb(this,n.extend({},a),f);(e||L.get(this,"finish"))&&b.stop(!0)};return g.finish=g,e||f.queue===!1?this.each(g):this.queue(f.queue,g)},stop:function(a,b,c){var d=function(a){var b=a.stop;delete a.stop,b(c)};return"string"!=typeof a&&(c=b,b=a,a=void 0),b&&a!==!1&&this.queue(a||"fx",[]),this.each(function(){var b=!0,e=null!=a&&a+"queueHooks",f=n.timers,g=L.get(this);if(e)g[e]&&g[e].stop&&d(g[e]);else for(e in g)g[e]&&g[e].stop&&Pb.test(e)&&d(g[e]);for(e=f.length;e--;)f[e].elem!==this||null!=a&&f[e].queue!==a||(f[e].anim.stop(c),b=!1,f.splice(e,1));(b||!c)&&n.dequeue(this,a)})},finish:function(a){return a!==!1&&(a=a||"fx"),this.each(function(){var b,c=L.get(this),d=c[a+"queue"],e=c[a+"queueHooks"],f=n.timers,g=d?d.length:0;for(c.finish=!0,n.queue(this,a,[]),e&&e.stop&&e.stop.call(this,!0),b=f.length;b--;)f[b].elem===this&&f[b].queue===a&&(f[b].anim.stop(!0),f.splice(b,1));for(b=0;g>b;b++)d[b]&&d[b].finish&&d[b].finish.call(this);delete c.finish})}}),n.each(["toggle","show","hide"],function(a,b){var c=n.fn[b];n.fn[b]=function(a,d,e){return null==a||"boolean"==typeof a?c.apply(this,arguments):this.animate(Tb(b,!0),a,d,e)}}),n.each({slideDown:Tb("show"),slideUp:Tb("hide"),slideToggle:Tb("toggle"),fadeIn:{opacity:"show"},fadeOut:{opacity:"hide"},fadeToggle:{opacity:"toggle"}},function(a,b){n.fn[a]=function(a,c,d){return this.animate(b,a,c,d)}}),n.timers=[],n.fx.tick=function(){var a,b=0,c=n.timers;for(Lb=n.now();b<c.length;b++)a=c[b],a()||c[b]!==a||c.splice(b--,1);c.length||n.fx.stop(),Lb=void 0},n.fx.timer=function(a){n.timers.push(a),a()?n.fx.start():n.timers.pop()},n.fx.interval=13,n.fx.start=function(){Mb||(Mb=setInterval(n.fx.tick,n.fx.interval))},n.fx.stop=function(){clearInterval(Mb),Mb=null},n.fx.speeds={slow:600,fast:200,_default:400},n.fn.delay=function(a,b){return a=n.fx?n.fx.speeds[a]||a:a,b=b||"fx",this.queue(b,function(b,c){var d=setTimeout(b,a);c.stop=function(){clearTimeout(d)}})},function(){var a=l.createElement("input"),b=l.createElement("select"),c=b.appendChild(l.createElement("option"));a.type="checkbox",k.checkOn=""!==a.value,k.optSelected=c.selected,b.disabled=!0,k.optDisabled=!c.disabled,a=l.createElement("input"),a.value="t",a.type="radio",k.radioValue="t"===a.value}();var Yb,Zb,$b=n.expr.attrHandle;n.fn.extend({attr:function(a,b){return J(this,n.attr,a,b,arguments.length>1)},removeAttr:function(a){return this.each(function(){n.removeAttr(this,a)})}}),n.extend({attr:function(a,b,c){var d,e,f=a.nodeType;if(a&&3!==f&&8!==f&&2!==f)return typeof a.getAttribute===U?n.prop(a,b,c):(1===f&&n.isXMLDoc(a)||(b=b.toLowerCase(),d=n.attrHooks[b]||(n.expr.match.bool.test(b)?Zb:Yb)),void 0===c?d&&"get"in d&&null!==(e=d.get(a,b))?e:(e=n.find.attr(a,b),null==e?void 0:e):null!==c?d&&"set"in d&&void 0!==(e=d.set(a,c,b))?e:(a.setAttribute(b,c+""),c):void n.removeAttr(a,b))
-},removeAttr:function(a,b){var c,d,e=0,f=b&&b.match(E);if(f&&1===a.nodeType)while(c=f[e++])d=n.propFix[c]||c,n.expr.match.bool.test(c)&&(a[d]=!1),a.removeAttribute(c)},attrHooks:{type:{set:function(a,b){if(!k.radioValue&&"radio"===b&&n.nodeName(a,"input")){var c=a.value;return a.setAttribute("type",b),c&&(a.value=c),b}}}}}),Zb={set:function(a,b,c){return b===!1?n.removeAttr(a,c):a.setAttribute(c,c),c}},n.each(n.expr.match.bool.source.match(/\w+/g),function(a,b){var c=$b[b]||n.find.attr;$b[b]=function(a,b,d){var e,f;return d||(f=$b[b],$b[b]=e,e=null!=c(a,b,d)?b.toLowerCase():null,$b[b]=f),e}});var _b=/^(?:input|select|textarea|button)$/i;n.fn.extend({prop:function(a,b){return J(this,n.prop,a,b,arguments.length>1)},removeProp:function(a){return this.each(function(){delete this[n.propFix[a]||a]})}}),n.extend({propFix:{"for":"htmlFor","class":"className"},prop:function(a,b,c){var d,e,f,g=a.nodeType;if(a&&3!==g&&8!==g&&2!==g)return f=1!==g||!n.isXMLDoc(a),f&&(b=n.propFix[b]||b,e=n.propHooks[b]),void 0!==c?e&&"set"in e&&void 0!==(d=e.set(a,c,b))?d:a[b]=c:e&&"get"in e&&null!==(d=e.get(a,b))?d:a[b]},propHooks:{tabIndex:{get:function(a){return a.hasAttribute("tabindex")||_b.test(a.nodeName)||a.href?a.tabIndex:-1}}}}),k.optSelected||(n.propHooks.selected={get:function(a){var b=a.parentNode;return b&&b.parentNode&&b.parentNode.selectedIndex,null}}),n.each(["tabIndex","readOnly","maxLength","cellSpacing","cellPadding","rowSpan","colSpan","useMap","frameBorder","contentEditable"],function(){n.propFix[this.toLowerCase()]=this});var ac=/[\t\r\n\f]/g;n.fn.extend({addClass:function(a){var b,c,d,e,f,g,h="string"==typeof a&&a,i=0,j=this.length;if(n.isFunction(a))return this.each(function(b){n(this).addClass(a.call(this,b,this.className))});if(h)for(b=(a||"").match(E)||[];j>i;i++)if(c=this[i],d=1===c.nodeType&&(c.className?(" "+c.className+" ").replace(ac," "):" ")){f=0;while(e=b[f++])d.indexOf(" "+e+" ")<0&&(d+=e+" ");g=n.trim(d),c.className!==g&&(c.className=g)}return this},removeClass:function(a){var b,c,d,e,f,g,h=0===arguments.length||"string"==typeof a&&a,i=0,j=this.length;if(n.isFunction(a))return this.each(function(b){n(this).removeClass(a.call(this,b,this.className))});if(h)for(b=(a||"").match(E)||[];j>i;i++)if(c=this[i],d=1===c.nodeType&&(c.className?(" "+c.className+" ").replace(ac," "):"")){f=0;while(e=b[f++])while(d.indexOf(" "+e+" ")>=0)d=d.replace(" "+e+" "," ");g=a?n.trim(d):"",c.className!==g&&(c.className=g)}return this},toggleClass:function(a,b){var c=typeof a;return"boolean"==typeof b&&"string"===c?b?this.addClass(a):this.removeClass(a):this.each(n.isFunction(a)?function(c){n(this).toggleClass(a.call(this,c,this.className,b),b)}:function(){if("string"===c){var b,d=0,e=n(this),f=a.match(E)||[];while(b=f[d++])e.hasClass(b)?e.removeClass(b):e.addClass(b)}else(c===U||"boolean"===c)&&(this.className&&L.set(this,"__className__",this.className),this.className=this.className||a===!1?"":L.get(this,"__className__")||"")})},hasClass:function(a){for(var b=" "+a+" ",c=0,d=this.length;d>c;c++)if(1===this[c].nodeType&&(" "+this[c].className+" ").replace(ac," ").indexOf(b)>=0)return!0;return!1}});var bc=/\r/g;n.fn.extend({val:function(a){var b,c,d,e=this[0];{if(arguments.length)return d=n.isFunction(a),this.each(function(c){var e;1===this.nodeType&&(e=d?a.call(this,c,n(this).val()):a,null==e?e="":"number"==typeof e?e+="":n.isArray(e)&&(e=n.map(e,function(a){return null==a?"":a+""})),b=n.valHooks[this.type]||n.valHooks[this.nodeName.toLowerCase()],b&&"set"in b&&void 0!==b.set(this,e,"value")||(this.value=e))});if(e)return b=n.valHooks[e.type]||n.valHooks[e.nodeName.toLowerCase()],b&&"get"in b&&void 0!==(c=b.get(e,"value"))?c:(c=e.value,"string"==typeof c?c.replace(bc,""):null==c?"":c)}}}),n.extend({valHooks:{option:{get:function(a){var b=n.find.attr(a,"value");return null!=b?b:n.trim(n.text(a))}},select:{get:function(a){for(var b,c,d=a.options,e=a.selectedIndex,f="select-one"===a.type||0>e,g=f?null:[],h=f?e+1:d.length,i=0>e?h:f?e:0;h>i;i++)if(c=d[i],!(!c.selected&&i!==e||(k.optDisabled?c.disabled:null!==c.getAttribute("disabled"))||c.parentNode.disabled&&n.nodeName(c.parentNode,"optgroup"))){if(b=n(c).val(),f)return b;g.push(b)}return g},set:function(a,b){var c,d,e=a.options,f=n.makeArray(b),g=e.length;while(g--)d=e[g],(d.selected=n.inArray(d.value,f)>=0)&&(c=!0);return c||(a.selectedIndex=-1),f}}}}),n.each(["radio","checkbox"],function(){n.valHooks[this]={set:function(a,b){return n.isArray(b)?a.checked=n.inArray(n(a).val(),b)>=0:void 0}},k.checkOn||(n.valHooks[this].get=function(a){return null===a.getAttribute("value")?"on":a.value})}),n.each("blur focus focusin focusout load resize scroll unload click dblclick mousedown mouseup mousemove mouseover mouseout mouseenter mouseleave change select submit keydown keypress keyup error contextmenu".split(" "),function(a,b){n.fn[b]=function(a,c){return arguments.length>0?this.on(b,null,a,c):this.trigger(b)}}),n.fn.extend({hover:function(a,b){return this.mouseenter(a).mouseleave(b||a)},bind:function(a,b,c){return this.on(a,null,b,c)},unbind:function(a,b){return this.off(a,null,b)},delegate:function(a,b,c,d){return this.on(b,a,c,d)},undelegate:function(a,b,c){return 1===arguments.length?this.off(a,"**"):this.off(b,a||"**",c)}});var cc=n.now(),dc=/\?/;n.parseJSON=function(a){return JSON.parse(a+"")},n.parseXML=function(a){var b,c;if(!a||"string"!=typeof a)return null;try{c=new DOMParser,b=c.parseFromString(a,"text/xml")}catch(d){b=void 0}return(!b||b.getElementsByTagName("parsererror").length)&&n.error("Invalid XML: "+a),b};var ec,fc,gc=/#.*$/,hc=/([?&])_=[^&]*/,ic=/^(.*?):[ \t]*([^\r\n]*)$/gm,jc=/^(?:about|app|app-storage|.+-extension|file|res|widget):$/,kc=/^(?:GET|HEAD)$/,lc=/^\/\//,mc=/^([\w.+-]+:)(?:\/\/(?:[^\/?#]*@|)([^\/?#:]*)(?::(\d+)|)|)/,nc={},oc={},pc="*/".concat("*");try{fc=location.href}catch(qc){fc=l.createElement("a"),fc.href="",fc=fc.href}ec=mc.exec(fc.toLowerCase())||[];function rc(a){return function(b,c){"string"!=typeof b&&(c=b,b="*");var d,e=0,f=b.toLowerCase().match(E)||[];if(n.isFunction(c))while(d=f[e++])"+"===d[0]?(d=d.slice(1)||"*",(a[d]=a[d]||[]).unshift(c)):(a[d]=a[d]||[]).push(c)}}function sc(a,b,c,d){var e={},f=a===oc;function g(h){var i;return e[h]=!0,n.each(a[h]||[],function(a,h){var j=h(b,c,d);return"string"!=typeof j||f||e[j]?f?!(i=j):void 0:(b.dataTypes.unshift(j),g(j),!1)}),i}return g(b.dataTypes[0])||!e["*"]&&g("*")}function tc(a,b){var c,d,e=n.ajaxSettings.flatOptions||{};for(c in b)void 0!==b[c]&&((e[c]?a:d||(d={}))[c]=b[c]);return d&&n.extend(!0,a,d),a}function uc(a,b,c){var d,e,f,g,h=a.contents,i=a.dataTypes;while("*"===i[0])i.shift(),void 0===d&&(d=a.mimeType||b.getResponseHeader("Content-Type"));if(d)for(e in h)if(h[e]&&h[e].test(d)){i.unshift(e);break}if(i[0]in c)f=i[0];else{for(e in c){if(!i[0]||a.converters[e+" "+i[0]]){f=e;break}g||(g=e)}f=f||g}return f?(f!==i[0]&&i.unshift(f),c[f]):void 0}function vc(a,b,c,d){var e,f,g,h,i,j={},k=a.dataTypes.slice();if(k[1])for(g in a.converters)j[g.toLowerCase()]=a.converters[g];f=k.shift();while(f)if(a.responseFields[f]&&(c[a.responseFields[f]]=b),!i&&d&&a.dataFilter&&(b=a.dataFilter(b,a.dataType)),i=f,f=k.shift())if("*"===f)f=i;else if("*"!==i&&i!==f){if(g=j[i+" "+f]||j["* "+f],!g)for(e in j)if(h=e.split(" "),h[1]===f&&(g=j[i+" "+h[0]]||j["* "+h[0]])){g===!0?g=j[e]:j[e]!==!0&&(f=h[0],k.unshift(h[1]));break}if(g!==!0)if(g&&a["throws"])b=g(b);else try{b=g(b)}catch(l){return{state:"parsererror",error:g?l:"No conversion from "+i+" to "+f}}}return{state:"success",data:b}}n.extend({active:0,lastModified:{},etag:{},ajaxSettings:{url:fc,type:"GET",isLocal:jc.test(ec[1]),global:!0,processData:!0,async:!0,contentType:"application/x-www-form-urlencoded; charset=UTF-8",accepts:{"*":pc,text:"text/plain",html:"text/html",xml:"application/xml, text/xml",json:"application/json, text/javascript"},contents:{xml:/xml/,html:/html/,json:/json/},responseFields:{xml:"responseXML",text:"responseText",json:"responseJSON"},converters:{"* text":String,"text html":!0,"text json":n.parseJSON,"text xml":n.parseXML},flatOptions:{url:!0,context:!0}},ajaxSetup:function(a,b){return b?tc(tc(a,n.ajaxSettings),b):tc(n.ajaxSettings,a)},ajaxPrefilter:rc(nc),ajaxTransport:rc(oc),ajax:function(a,b){"object"==typeof a&&(b=a,a=void 0),b=b||{};var c,d,e,f,g,h,i,j,k=n.ajaxSetup({},b),l=k.context||k,m=k.context&&(l.nodeType||l.jquery)?n(l):n.event,o=n.Deferred(),p=n.Callbacks("once memory"),q=k.statusCode||{},r={},s={},t=0,u="canceled",v={readyState:0,getResponseHeader:function(a){var b;if(2===t){if(!f){f={};while(b=ic.exec(e))f[b[1].toLowerCase()]=b[2]}b=f[a.toLowerCase()]}return null==b?null:b},getAllResponseHeaders:function(){return 2===t?e:null},setRequestHeader:function(a,b){var c=a.toLowerCase();return t||(a=s[c]=s[c]||a,r[a]=b),this},overrideMimeType:function(a){return t||(k.mimeType=a),this},statusCode:function(a){var b;if(a)if(2>t)for(b in a)q[b]=[q[b],a[b]];else v.always(a[v.status]);return this},abort:function(a){var b=a||u;return c&&c.abort(b),x(0,b),this}};if(o.promise(v).complete=p.add,v.success=v.done,v.error=v.fail,k.url=((a||k.url||fc)+"").replace(gc,"").replace(lc,ec[1]+"//"),k.type=b.method||b.type||k.method||k.type,k.dataTypes=n.trim(k.dataType||"*").toLowerCase().match(E)||[""],null==k.crossDomain&&(h=mc.exec(k.url.toLowerCase()),k.crossDomain=!(!h||h[1]===ec[1]&&h[2]===ec[2]&&(h[3]||("http:"===h[1]?"80":"443"))===(ec[3]||("http:"===ec[1]?"80":"443")))),k.data&&k.processData&&"string"!=typeof k.data&&(k.data=n.param(k.data,k.traditional)),sc(nc,k,b,v),2===t)return v;i=k.global,i&&0===n.active++&&n.event.trigger("ajaxStart"),k.type=k.type.toUpperCase(),k.hasContent=!kc.test(k.type),d=k.url,k.hasContent||(k.data&&(d=k.url+=(dc.test(d)?"&":"?")+k.data,delete k.data),k.cache===!1&&(k.url=hc.test(d)?d.replace(hc,"$1_="+cc++):d+(dc.test(d)?"&":"?")+"_="+cc++)),k.ifModified&&(n.lastModified[d]&&v.setRequestHeader("If-Modified-Since",n.lastModified[d]),n.etag[d]&&v.setRequestHeader("If-None-Match",n.etag[d])),(k.data&&k.hasContent&&k.contentType!==!1||b.contentType)&&v.setRequestHeader("Content-Type",k.contentType),v.setRequestHeader("Accept",k.dataTypes[0]&&k.accepts[k.dataTypes[0]]?k.accepts[k.dataTypes[0]]+("*"!==k.dataTypes[0]?", "+pc+"; q=0.01":""):k.accepts["*"]);for(j in k.headers)v.setRequestHeader(j,k.headers[j]);if(k.beforeSend&&(k.beforeSend.call(l,v,k)===!1||2===t))return v.abort();u="abort";for(j in{success:1,error:1,complete:1})v[j](k[j]);if(c=sc(oc,k,b,v)){v.readyState=1,i&&m.trigger("ajaxSend",[v,k]),k.async&&k.timeout>0&&(g=setTimeout(function(){v.abort("timeout")},k.timeout));try{t=1,c.send(r,x)}catch(w){if(!(2>t))throw w;x(-1,w)}}else x(-1,"No Transport");function x(a,b,f,h){var j,r,s,u,w,x=b;2!==t&&(t=2,g&&clearTimeout(g),c=void 0,e=h||"",v.readyState=a>0?4:0,j=a>=200&&300>a||304===a,f&&(u=uc(k,v,f)),u=vc(k,u,v,j),j?(k.ifModified&&(w=v.getResponseHeader("Last-Modified"),w&&(n.lastModified[d]=w),w=v.getResponseHeader("etag"),w&&(n.etag[d]=w)),204===a||"HEAD"===k.type?x="nocontent":304===a?x="notmodified":(x=u.state,r=u.data,s=u.error,j=!s)):(s=x,(a||!x)&&(x="error",0>a&&(a=0))),v.status=a,v.statusText=(b||x)+"",j?o.resolveWith(l,[r,x,v]):o.rejectWith(l,[v,x,s]),v.statusCode(q),q=void 0,i&&m.trigger(j?"ajaxSuccess":"ajaxError",[v,k,j?r:s]),p.fireWith(l,[v,x]),i&&(m.trigger("ajaxComplete",[v,k]),--n.active||n.event.trigger("ajaxStop")))}return v},getJSON:function(a,b,c){return n.get(a,b,c,"json")},getScript:function(a,b){return n.get(a,void 0,b,"script")}}),n.each(["get","post"],function(a,b){n[b]=function(a,c,d,e){return n.isFunction(c)&&(e=e||d,d=c,c=void 0),n.ajax({url:a,type:b,dataType:e,data:c,success:d})}}),n.each(["ajaxStart","ajaxStop","ajaxComplete","ajaxError","ajaxSuccess","ajaxSend"],function(a,b){n.fn[b]=function(a){return this.on(b,a)}}),n._evalUrl=function(a){return n.ajax({url:a,type:"GET",dataType:"script",async:!1,global:!1,"throws":!0})},n.fn.extend({wrapAll:function(a){var b;return n.isFunction(a)?this.each(function(b){n(this).wrapAll(a.call(this,b))}):(this[0]&&(b=n(a,this[0].ownerDocument).eq(0).clone(!0),this[0].parentNode&&b.insertBefore(this[0]),b.map(function(){var a=this;while(a.firstElementChild)a=a.firstElementChild;return a}).append(this)),this)},wrapInner:function(a){return this.each(n.isFunction(a)?function(b){n(this).wrapInner(a.call(this,b))}:function(){var b=n(this),c=b.contents();c.length?c.wrapAll(a):b.append(a)})},wrap:function(a){var b=n.isFunction(a);return this.each(function(c){n(this).wrapAll(b?a.call(this,c):a)})},unwrap:function(){return this.parent().each(function(){n.nodeName(this,"body")||n(this).replaceWith(this.childNodes)}).end()}}),n.expr.filters.hidden=function(a){return a.offsetWidth<=0&&a.offsetHeight<=0},n.expr.filters.visible=function(a){return!n.expr.filters.hidden(a)};var wc=/%20/g,xc=/\[\]$/,yc=/\r?\n/g,zc=/^(?:submit|button|image|reset|file)$/i,Ac=/^(?:input|select|textarea|keygen)/i;function Bc(a,b,c,d){var e;if(n.isArray(b))n.each(b,function(b,e){c||xc.test(a)?d(a,e):Bc(a+"["+("object"==typeof e?b:"")+"]",e,c,d)});else if(c||"object"!==n.type(b))d(a,b);else for(e in b)Bc(a+"["+e+"]",b[e],c,d)}n.param=function(a,b){var c,d=[],e=function(a,b){b=n.isFunction(b)?b():null==b?"":b,d[d.length]=encodeURIComponent(a)+"="+encodeURIComponent(b)};if(void 0===b&&(b=n.ajaxSettings&&n.ajaxSettings.traditional),n.isArray(a)||a.jquery&&!n.isPlainObject(a))n.each(a,function(){e(this.name,this.value)});else for(c in a)Bc(c,a[c],b,e);return d.join("&").replace(wc,"+")},n.fn.extend({serialize:function(){return n.param(this.serializeArray())},serializeArray:function(){return this.map(function(){var a=n.prop(this,"elements");return a?n.makeArray(a):this}).filter(function(){var a=this.type;return this.name&&!n(this).is(":disabled")&&Ac.test(this.nodeName)&&!zc.test(a)&&(this.checked||!T.test(a))}).map(function(a,b){var c=n(this).val();return null==c?null:n.isArray(c)?n.map(c,function(a){return{name:b.name,value:a.replace(yc,"\r\n")}}):{name:b.name,value:c.replace(yc,"\r\n")}}).get()}}),n.ajaxSettings.xhr=function(){try{return new XMLHttpRequest}catch(a){}};var Cc=0,Dc={},Ec={0:200,1223:204},Fc=n.ajaxSettings.xhr();a.ActiveXObject&&n(a).on("unload",function(){for(var a in Dc)Dc[a]()}),k.cors=!!Fc&&"withCredentials"in Fc,k.ajax=Fc=!!Fc,n.ajaxTransport(function(a){var b;return k.cors||Fc&&!a.crossDomain?{send:function(c,d){var e,f=a.xhr(),g=++Cc;if(f.open(a.type,a.url,a.async,a.username,a.password),a.xhrFields)for(e in a.xhrFields)f[e]=a.xhrFields[e];a.mimeType&&f.overrideMimeType&&f.overrideMimeType(a.mimeType),a.crossDomain||c["X-Requested-With"]||(c["X-Requested-With"]="XMLHttpRequest");for(e in c)f.setRequestHeader(e,c[e]);b=function(a){return function(){b&&(delete Dc[g],b=f.onload=f.onerror=null,"abort"===a?f.abort():"error"===a?d(f.status,f.statusText):d(Ec[f.status]||f.status,f.statusText,"string"==typeof f.responseText?{text:f.responseText}:void 0,f.getAllResponseHeaders()))}},f.onload=b(),f.onerror=b("error"),b=Dc[g]=b("abort");try{f.send(a.hasContent&&a.data||null)}catch(h){if(b)throw h}},abort:function(){b&&b()}}:void 0}),n.ajaxSetup({accepts:{script:"text/javascript, application/javascript, application/ecmascript, application/x-ecmascript"},contents:{script:/(?:java|ecma)script/},converters:{"text script":function(a){return n.globalEval(a),a}}}),n.ajaxPrefilter("script",function(a){void 0===a.cache&&(a.cache=!1),a.crossDomain&&(a.type="GET")}),n.ajaxTransport("script",function(a){if(a.crossDomain){var b,c;return{send:function(d,e){b=n("<script>").prop({async:!0,charset:a.scriptCharset,src:a.url}).on("load error",c=function(a){b.remove(),c=null,a&&e("error"===a.type?404:200,a.type)}),l.head.appendChild(b[0])},abort:function(){c&&c()}}}});var Gc=[],Hc=/(=)\?(?=&|$)|\?\?/;n.ajaxSetup({jsonp:"callback",jsonpCallback:function(){var a=Gc.pop()||n.expando+"_"+cc++;return this[a]=!0,a}}),n.ajaxPrefilter("json jsonp",function(b,c,d){var e,f,g,h=b.jsonp!==!1&&(Hc.test(b.url)?"url":"string"==typeof b.data&&!(b.contentType||"").indexOf("application/x-www-form-urlencoded")&&Hc.test(b.data)&&"data");return h||"jsonp"===b.dataTypes[0]?(e=b.jsonpCallback=n.isFunction(b.jsonpCallback)?b.jsonpCallback():b.jsonpCallback,h?b[h]=b[h].replace(Hc,"$1"+e):b.jsonp!==!1&&(b.url+=(dc.test(b.url)?"&":"?")+b.jsonp+"="+e),b.converters["script json"]=function(){return g||n.error(e+" was not called"),g[0]},b.dataTypes[0]="json",f=a[e],a[e]=function(){g=arguments},d.always(function(){a[e]=f,b[e]&&(b.jsonpCallback=c.jsonpCallback,Gc.push(e)),g&&n.isFunction(f)&&f(g[0]),g=f=void 0}),"script"):void 0}),n.parseHTML=function(a,b,c){if(!a||"string"!=typeof a)return null;"boolean"==typeof b&&(c=b,b=!1),b=b||l;var d=v.exec(a),e=!c&&[];return d?[b.createElement(d[1])]:(d=n.buildFragment([a],b,e),e&&e.length&&n(e).remove(),n.merge([],d.childNodes))};var Ic=n.fn.load;n.fn.load=function(a,b,c){if("string"!=typeof a&&Ic)return Ic.apply(this,arguments);var d,e,f,g=this,h=a.indexOf(" ");return h>=0&&(d=n.trim(a.slice(h)),a=a.slice(0,h)),n.isFunction(b)?(c=b,b=void 0):b&&"object"==typeof b&&(e="POST"),g.length>0&&n.ajax({url:a,type:e,dataType:"html",data:b}).done(function(a){f=arguments,g.html(d?n("<div>").append(n.parseHTML(a)).find(d):a)}).complete(c&&function(a,b){g.each(c,f||[a.responseText,b,a])}),this},n.expr.filters.animated=function(a){return n.grep(n.timers,function(b){return a===b.elem}).length};var Jc=a.document.documentElement;function Kc(a){return n.isWindow(a)?a:9===a.nodeType&&a.defaultView}n.offset={setOffset:function(a,b,c){var d,e,f,g,h,i,j,k=n.css(a,"position"),l=n(a),m={};"static"===k&&(a.style.position="relative"),h=l.offset(),f=n.css(a,"top"),i=n.css(a,"left"),j=("absolute"===k||"fixed"===k)&&(f+i).indexOf("auto")>-1,j?(d=l.position(),g=d.top,e=d.left):(g=parseFloat(f)||0,e=parseFloat(i)||0),n.isFunction(b)&&(b=b.call(a,c,h)),null!=b.top&&(m.top=b.top-h.top+g),null!=b.left&&(m.left=b.left-h.left+e),"using"in b?b.using.call(a,m):l.css(m)}},n.fn.extend({offset:function(a){if(arguments.length)return void 0===a?this:this.each(function(b){n.offset.setOffset(this,a,b)});var b,c,d=this[0],e={top:0,left:0},f=d&&d.ownerDocument;if(f)return b=f.documentElement,n.contains(b,d)?(typeof d.getBoundingClientRect!==U&&(e=d.getBoundingClientRect()),c=Kc(f),{top:e.top+c.pageYOffset-b.clientTop,left:e.left+c.pageXOffset-b.clientLeft}):e},position:function(){if(this[0]){var a,b,c=this[0],d={top:0,left:0};return"fixed"===n.css(c,"position")?b=c.getBoundingClientRect():(a=this.offsetParent(),b=this.offset(),n.nodeName(a[0],"html")||(d=a.offset()),d.top+=n.css(a[0],"borderTopWidth",!0),d.left+=n.css(a[0],"borderLeftWidth",!0)),{top:b.top-d.top-n.css(c,"marginTop",!0),left:b.left-d.left-n.css(c,"marginLeft",!0)}}},offsetParent:function(){return this.map(function(){var a=this.offsetParent||Jc;while(a&&!n.nodeName(a,"html")&&"static"===n.css(a,"position"))a=a.offsetParent;return a||Jc})}}),n.each({scrollLeft:"pageXOffset",scrollTop:"pageYOffset"},function(b,c){var d="pageYOffset"===c;n.fn[b]=function(e){return J(this,function(b,e,f){var g=Kc(b);return void 0===f?g?g[c]:b[e]:void(g?g.scrollTo(d?a.pageXOffset:f,d?f:a.pageYOffset):b[e]=f)},b,e,arguments.length,null)}}),n.each(["top","left"],function(a,b){n.cssHooks[b]=yb(k.pixelPosition,function(a,c){return c?(c=xb(a,b),vb.test(c)?n(a).position()[b]+"px":c):void 0})}),n.each({Height:"height",Width:"width"},function(a,b){n.each({padding:"inner"+a,content:b,"":"outer"+a},function(c,d){n.fn[d]=function(d,e){var f=arguments.length&&(c||"boolean"!=typeof d),g=c||(d===!0||e===!0?"margin":"border");return J(this,function(b,c,d){var e;return n.isWindow(b)?b.document.documentElement["client"+a]:9===b.nodeType?(e=b.documentElement,Math.max(b.body["scroll"+a],e["scroll"+a],b.body["offset"+a],e["offset"+a],e["client"+a])):void 0===d?n.css(b,c,g):n.style(b,c,d,g)},b,f?d:void 0,f,null)}})}),n.fn.size=function(){return this.length},n.fn.andSelf=n.fn.addBack,"function"==typeof define&&define.amd&&define("jquery",[],function(){return n});var Lc=a.jQuery,Mc=a.$;return n.noConflict=function(b){return a.$===n&&(a.$=Mc),b&&a.jQuery===n&&(a.jQuery=Lc),n},typeof b===U&&(a.jQuery=a.$=n),n});
diff --git a/docs/theme/js/modernizr-2.8.3.min.js b/docs/theme/js/modernizr-2.8.3.min.js
deleted file mode 100644
index 40dd2a9feaa..00000000000
--- a/docs/theme/js/modernizr-2.8.3.min.js
+++ /dev/null
@@ -1 +0,0 @@
-window.Modernizr=function(e,t,n){function r(e){b.cssText=e}function o(e,t){return r(S.join(e+";")+(t||""))}function a(e,t){return typeof e===t}function i(e,t){return!!~(""+e).indexOf(t)}function c(e,t){for(var r in e){var o=e[r];if(!i(o,"-")&&b[o]!==n)return"pfx"==t?o:!0}return!1}function s(e,t,r){for(var o in e){var i=t[e[o]];if(i!==n)return r===!1?e[o]:a(i,"function")?i.bind(r||t):i}return!1}function u(e,t,n){var r=e.charAt(0).toUpperCase()+e.slice(1),o=(e+" "+k.join(r+" ")+r).split(" ");return a(t,"string")||a(t,"undefined")?c(o,t):(o=(e+" "+T.join(r+" ")+r).split(" "),s(o,t,n))}function l(){p.input=function(n){for(var r=0,o=n.length;o>r;r++)j[n[r]]=!!(n[r]in E);return j.list&&(j.list=!(!t.createElement("datalist")||!e.HTMLDataListElement)),j}("autocomplete autofocus list placeholder max min multiple pattern required step".split(" ")),p.inputtypes=function(e){for(var r,o,a,i=0,c=e.length;c>i;i++)E.setAttribute("type",o=e[i]),r="text"!==E.type,r&&(E.value=x,E.style.cssText="position:absolute;visibility:hidden;",/^range$/.test(o)&&E.style.WebkitAppearance!==n?(g.appendChild(E),a=t.defaultView,r=a.getComputedStyle&&"textfield"!==a.getComputedStyle(E,null).WebkitAppearance&&0!==E.offsetHeight,g.removeChild(E)):/^(search|tel)$/.test(o)||(r=/^(url|email)$/.test(o)?E.checkValidity&&E.checkValidity()===!1:E.value!=x)),P[e[i]]=!!r;return P}("search tel url email datetime date month week time datetime-local number range color".split(" "))}var d,f,m="2.8.3",p={},h=!0,g=t.documentElement,v="modernizr",y=t.createElement(v),b=y.style,E=t.createElement("input"),x=":)",w={}.toString,S=" -webkit- -moz- -o- -ms- ".split(" "),C="Webkit Moz O ms",k=C.split(" "),T=C.toLowerCase().split(" "),N={svg:"http://www.w3.org/2000/svg"},M={},P={},j={},$=[],D=$.slice,F=function(e,n,r,o){var a,i,c,s,u=t.createElement("div"),l=t.body,d=l||t.createElement("body");if(parseInt(r,10))for(;r--;)c=t.createElement("div"),c.id=o?o[r]:v+(r+1),u.appendChild(c);return a=["&#173;",'<style id="s',v,'">',e,"</style>"].join(""),u.id=v,(l?u:d).innerHTML+=a,d.appendChild(u),l||(d.style.background="",d.style.overflow="hidden",s=g.style.overflow,g.style.overflow="hidden",g.appendChild(d)),i=n(u,e),l?u.parentNode.removeChild(u):(d.parentNode.removeChild(d),g.style.overflow=s),!!i},z=function(t){var n=e.matchMedia||e.msMatchMedia;if(n)return n(t)&&n(t).matches||!1;var r;return F("@media "+t+" { #"+v+" { position: absolute; } }",function(t){r="absolute"==(e.getComputedStyle?getComputedStyle(t,null):t.currentStyle).position}),r},A=function(){function e(e,o){o=o||t.createElement(r[e]||"div"),e="on"+e;var i=e in o;return i||(o.setAttribute||(o=t.createElement("div")),o.setAttribute&&o.removeAttribute&&(o.setAttribute(e,""),i=a(o[e],"function"),a(o[e],"undefined")||(o[e]=n),o.removeAttribute(e))),o=null,i}var r={select:"input",change:"input",submit:"form",reset:"form",error:"img",load:"img",abort:"img"};return e}(),L={}.hasOwnProperty;f=a(L,"undefined")||a(L.call,"undefined")?function(e,t){return t in e&&a(e.constructor.prototype[t],"undefined")}:function(e,t){return L.call(e,t)},Function.prototype.bind||(Function.prototype.bind=function(e){var t=this;if("function"!=typeof t)throw new TypeError;var n=D.call(arguments,1),r=function(){if(this instanceof r){var o=function(){};o.prototype=t.prototype;var a=new o,i=t.apply(a,n.concat(D.call(arguments)));return Object(i)===i?i:a}return t.apply(e,n.concat(D.call(arguments)))};return r}),M.flexbox=function(){return u("flexWrap")},M.flexboxlegacy=function(){return u("boxDirection")},M.canvas=function(){var e=t.createElement("canvas");return!(!e.getContext||!e.getContext("2d"))},M.canvastext=function(){return!(!p.canvas||!a(t.createElement("canvas").getContext("2d").fillText,"function"))},M.webgl=function(){return!!e.WebGLRenderingContext},M.touch=function(){var n;return"ontouchstart"in e||e.DocumentTouch&&t instanceof DocumentTouch?n=!0:F(["@media (",S.join("touch-enabled),("),v,")","{#modernizr{top:9px;position:absolute}}"].join(""),function(e){n=9===e.offsetTop}),n},M.geolocation=function(){return"geolocation"in navigator},M.postmessage=function(){return!!e.postMessage},M.websqldatabase=function(){return!!e.openDatabase},M.indexedDB=function(){return!!u("indexedDB",e)},M.hashchange=function(){return A("hashchange",e)&&(t.documentMode===n||t.documentMode>7)},M.history=function(){return!(!e.history||!history.pushState)},M.draganddrop=function(){var e=t.createElement("div");return"draggable"in e||"ondragstart"in e&&"ondrop"in e},M.websockets=function(){return"WebSocket"in e||"MozWebSocket"in e},M.rgba=function(){return r("background-color:rgba(150,255,150,.5)"),i(b.backgroundColor,"rgba")},M.hsla=function(){return r("background-color:hsla(120,40%,100%,.5)"),i(b.backgroundColor,"rgba")||i(b.backgroundColor,"hsla")},M.multiplebgs=function(){return r("background:url(https://),url(https://),red url(https://)"),/(url\s*\(.*?){3}/.test(b.background)},M.backgroundsize=function(){return u("backgroundSize")},M.borderimage=function(){return u("borderImage")},M.borderradius=function(){return u("borderRadius")},M.boxshadow=function(){return u("boxShadow")},M.textshadow=function(){return""===t.createElement("div").style.textShadow},M.opacity=function(){return o("opacity:.55"),/^0.55$/.test(b.opacity)},M.cssanimations=function(){return u("animationName")},M.csscolumns=function(){return u("columnCount")},M.cssgradients=function(){var e="background-image:",t="gradient(linear,left top,right bottom,from(#9f9),to(white));",n="linear-gradient(left top,#9f9, white);";return r((e+"-webkit- ".split(" ").join(t+e)+S.join(n+e)).slice(0,-e.length)),i(b.backgroundImage,"gradient")},M.cssreflections=function(){return u("boxReflect")},M.csstransforms=function(){return!!u("transform")},M.csstransforms3d=function(){var e=!!u("perspective");return e&&"webkitPerspective"in g.style&&F("@media (transform-3d),(-webkit-transform-3d){#modernizr{left:9px;position:absolute;height:3px;}}",function(t){e=9===t.offsetLeft&&3===t.offsetHeight}),e},M.csstransitions=function(){return u("transition")},M.fontface=function(){var e;return F('@font-face {font-family:"font";src:url("https://")}',function(n,r){var o=t.getElementById("smodernizr"),a=o.sheet||o.styleSheet,i=a?a.cssRules&&a.cssRules[0]?a.cssRules[0].cssText:a.cssText||"":"";e=/src/i.test(i)&&0===i.indexOf(r.split(" ")[0])}),e},M.generatedcontent=function(){var e;return F(["#",v,"{font:0/0 a}#",v,':after{content:"',x,'";visibility:hidden;font:3px/1 a}'].join(""),function(t){e=t.offsetHeight>=3}),e},M.video=function(){var e=t.createElement("video"),n=!1;try{(n=!!e.canPlayType)&&(n=new Boolean(n),n.ogg=e.canPlayType('video/ogg; codecs="theora"').replace(/^no$/,""),n.h264=e.canPlayType('video/mp4; codecs="avc1.42E01E"').replace(/^no$/,""),n.webm=e.canPlayType('video/webm; codecs="vp8, vorbis"').replace(/^no$/,""))}catch(r){}return n},M.audio=function(){var e=t.createElement("audio"),n=!1;try{(n=!!e.canPlayType)&&(n=new Boolean(n),n.ogg=e.canPlayType('audio/ogg; codecs="vorbis"').replace(/^no$/,""),n.mp3=e.canPlayType("audio/mpeg;").replace(/^no$/,""),n.wav=e.canPlayType('audio/wav; codecs="1"').replace(/^no$/,""),n.m4a=(e.canPlayType("audio/x-m4a;")||e.canPlayType("audio/aac;")).replace(/^no$/,""))}catch(r){}return n},M.localstorage=function(){try{return localStorage.setItem(v,v),localStorage.removeItem(v),!0}catch(e){return!1}},M.sessionstorage=function(){try{return sessionStorage.setItem(v,v),sessionStorage.removeItem(v),!0}catch(e){return!1}},M.webworkers=function(){return!!e.Worker},M.applicationcache=function(){return!!e.applicationCache},M.svg=function(){return!!t.createElementNS&&!!t.createElementNS(N.svg,"svg").createSVGRect},M.inlinesvg=function(){var e=t.createElement("div");return e.innerHTML="<svg/>",(e.firstChild&&e.firstChild.namespaceURI)==N.svg},M.smil=function(){return!!t.createElementNS&&/SVGAnimate/.test(w.call(t.createElementNS(N.svg,"animate")))},M.svgclippaths=function(){return!!t.createElementNS&&/SVGClipPath/.test(w.call(t.createElementNS(N.svg,"clipPath")))};for(var H in M)f(M,H)&&(d=H.toLowerCase(),p[d]=M[H](),$.push((p[d]?"":"no-")+d));return p.input||l(),p.addTest=function(e,t){if("object"==typeof e)for(var r in e)f(e,r)&&p.addTest(r,e[r]);else{if(e=e.toLowerCase(),p[e]!==n)return p;t="function"==typeof t?t():t,"undefined"!=typeof h&&h&&(g.className+=" "+(t?"":"no-")+e),p[e]=t}return p},r(""),y=E=null,function(e,t){function n(e,t){var n=e.createElement("p"),r=e.getElementsByTagName("head")[0]||e.documentElement;return n.innerHTML="x<style>"+t+"</style>",r.insertBefore(n.lastChild,r.firstChild)}function r(){var e=y.elements;return"string"==typeof e?e.split(" "):e}function o(e){var t=v[e[h]];return t||(t={},g++,e[h]=g,v[g]=t),t}function a(e,n,r){if(n||(n=t),l)return n.createElement(e);r||(r=o(n));var a;return a=r.cache[e]?r.cache[e].cloneNode():p.test(e)?(r.cache[e]=r.createElem(e)).cloneNode():r.createElem(e),!a.canHaveChildren||m.test(e)||a.tagUrn?a:r.frag.appendChild(a)}function i(e,n){if(e||(e=t),l)return e.createDocumentFragment();n=n||o(e);for(var a=n.frag.cloneNode(),i=0,c=r(),s=c.length;s>i;i++)a.createElement(c[i]);return a}function c(e,t){t.cache||(t.cache={},t.createElem=e.createElement,t.createFrag=e.createDocumentFragment,t.frag=t.createFrag()),e.createElement=function(n){return y.shivMethods?a(n,e,t):t.createElem(n)},e.createDocumentFragment=Function("h,f","return function(){var n=f.cloneNode(),c=n.createElement;h.shivMethods&&("+r().join().replace(/[\w\-]+/g,function(e){return t.createElem(e),t.frag.createElement(e),'c("'+e+'")'})+");return n}")(y,t.frag)}function s(e){e||(e=t);var r=o(e);return!y.shivCSS||u||r.hasCSS||(r.hasCSS=!!n(e,"article,aside,dialog,figcaption,figure,footer,header,hgroup,main,nav,section{display:block}mark{background:#FF0;color:#000}template{display:none}")),l||c(e,r),e}var u,l,d="3.7.0",f=e.html5||{},m=/^<|^(?:button|map|select|textarea|object|iframe|option|optgroup)$/i,p=/^(?:a|b|code|div|fieldset|h1|h2|h3|h4|h5|h6|i|label|li|ol|p|q|span|strong|style|table|tbody|td|th|tr|ul)$/i,h="_html5shiv",g=0,v={};!function(){try{var e=t.createElement("a");e.innerHTML="<xyz></xyz>",u="hidden"in e,l=1==e.childNodes.length||function(){t.createElement("a");var e=t.createDocumentFragment();return"undefined"==typeof e.cloneNode||"undefined"==typeof e.createDocumentFragment||"undefined"==typeof e.createElement}()}catch(n){u=!0,l=!0}}();var y={elements:f.elements||"abbr article aside audio bdi canvas data datalist details dialog figcaption figure footer header hgroup main mark meter nav output progress section summary template time video",version:d,shivCSS:f.shivCSS!==!1,supportsUnknownElements:l,shivMethods:f.shivMethods!==!1,type:"default",shivDocument:s,createElement:a,createDocumentFragment:i};e.html5=y,s(t)}(this,t),p._version=m,p._prefixes=S,p._domPrefixes=T,p._cssomPrefixes=k,p.mq=z,p.hasEvent=A,p.testProp=function(e){return c([e])},p.testAllProps=u,p.testStyles=F,p.prefixed=function(e,t,n){return t?u(e,t,n):u(e,"pfx")},g.className=g.className.replace(/(^|\s)no-js(\s|$)/,"$1$2")+(h?" js "+$.join(" "):""),p}(this,this.document);
\ No newline at end of file
diff --git a/docs/theme/js/theme.js b/docs/theme/js/theme.js
deleted file mode 100644
index 68d3ba07fab..00000000000
--- a/docs/theme/js/theme.js
+++ /dev/null
@@ -1,3 +0,0 @@
-/* sphinx_rtd_theme version 0.4.1 | MIT license */
-/* Built 20180727 10:07 */
-require=function n(e,i,t){function o(s,a){if(!i[s]){if(!e[s]){var l="function"==typeof require&&require;if(!a&&l)return l(s,!0);if(r)return r(s,!0);var c=new Error("Cannot find module '"+s+"'");throw c.code="MODULE_NOT_FOUND",c}var u=i[s]={exports:{}};e[s][0].call(u.exports,function(n){var i=e[s][1][n];return o(i||n)},u,u.exports,n,e,i,t)}return i[s].exports}for(var r="function"==typeof require&&require,s=0;s<t.length;s++)o(t[s]);return o}({"sphinx-rtd-theme":[function(n,e,i){var jQuery="undefined"!=typeof window?window.jQuery:n("jquery");e.exports.ThemeNav={navBar:null,win:null,winScroll:!1,winResize:!1,linkScroll:!1,winPosition:0,winHeight:null,docHeight:null,isRunning:!1,enable:function(n){var e=this;void 0===n&&(n=!0),e.isRunning||(e.isRunning=!0,jQuery(function(i){e.init(i),e.reset(),e.win.on("hashchange",e.reset),n&&e.win.on("scroll",function(){e.linkScroll||e.winScroll||(e.winScroll=!0,requestAnimationFrame(function(){e.onScroll()}))}),e.win.on("resize",function(){e.winResize||(e.winResize=!0,requestAnimationFrame(function(){e.onResize()}))}),e.onResize()}))},enableSticky:function(){this.enable(!0)},init:function(n){n(document);var e=this;this.navBar=n("div.wy-side-scroll:first"),this.win=n(window),n(document).on("click","[data-toggle='wy-nav-top']",function(){n("[data-toggle='wy-nav-shift']").toggleClass("shift"),n("[data-toggle='rst-versions']").toggleClass("shift")}).on("click",".wy-menu-vertical .current ul li a",function(){var i=n(this);n("[data-toggle='wy-nav-shift']").removeClass("shift"),n("[data-toggle='rst-versions']").toggleClass("shift"),e.toggleCurrent(i),e.hashChange()}).on("click","[data-toggle='rst-current-version']",function(){n("[data-toggle='rst-versions']").toggleClass("shift-up")}),n("table.docutils:not(.field-list,.footnote,.citation)").wrap("<div class='wy-table-responsive'></div>"),n("table.docutils.footnote").wrap("<div class='wy-table-responsive footnote'></div>"),n("table.docutils.citation").wrap("<div class='wy-table-responsive citation'></div>"),n(".wy-menu-vertical ul").not(".simple").siblings("a").each(function(){var i=n(this);expand=n('<span class="toctree-expand"></span>'),expand.on("click",function(n){return e.toggleCurrent(i),n.stopPropagation(),!1}),i.prepend(expand)})},reset:function(){var n=encodeURI(window.location.hash)||"#";try{var e=$(".wy-menu-vertical"),i=e.find('[href="'+n+'"]');if(0===i.length){var t=$('.document [id="'+n.substring(1)+'"]').closest("div.section");0===(i=e.find('[href="#'+t.attr("id")+'"]')).length&&(i=e.find('[href="#"]'))}i.length>0&&($(".wy-menu-vertical .current").removeClass("current"),i.addClass("current"),i.closest("li.toctree-l1").addClass("current"),i.closest("li.toctree-l1").parent().addClass("current"),i.closest("li.toctree-l1").addClass("current"),i.closest("li.toctree-l2").addClass("current"),i.closest("li.toctree-l3").addClass("current"),i.closest("li.toctree-l4").addClass("current"))}catch(o){console.log("Error expanding nav for anchor",o)}},onScroll:function(){this.winScroll=!1;var n=this.win.scrollTop(),e=n+this.winHeight,i=this.navBar.scrollTop()+(n-this.winPosition);n<0||e>this.docHeight||(this.navBar.scrollTop(i),this.winPosition=n)},onResize:function(){this.winResize=!1,this.winHeight=this.win.height(),this.docHeight=$(document).height()},hashChange:function(){this.linkScroll=!0,this.win.one("hashchange",function(){this.linkScroll=!1})},toggleCurrent:function(n){var e=n.closest("li");e.siblings("li.current").removeClass("current"),e.siblings().find("li.current").removeClass("current"),e.find("> ul li.current").removeClass("current"),e.toggleClass("current")}},"undefined"!=typeof window&&(window.SphinxRtdTheme={Navigation:e.exports.ThemeNav,StickyNav:e.exports.ThemeNav}),function(){for(var n=0,e=["ms","moz","webkit","o"],i=0;i<e.length&&!window.requestAnimationFrame;++i)window.requestAnimationFrame=window[e[i]+"RequestAnimationFrame"],window.cancelAnimationFrame=window[e[i]+"CancelAnimationFrame"]||window[e[i]+"CancelRequestAnimationFrame"];window.requestAnimationFrame||(window.requestAnimationFrame=function(e,i){var t=(new Date).getTime(),o=Math.max(0,16-(t-n)),r=window.setTimeout(function(){e(t+o)},o);return n=t+o,r}),window.cancelAnimationFrame||(window.cancelAnimationFrame=function(n){clearTimeout(n)})}()},{jquery:"jquery"}]},{},["sphinx-rtd-theme"]);
diff --git a/docs/theme/main.html b/docs/theme/main.html
deleted file mode 100644
index 94d9808cc76..00000000000
--- a/docs/theme/main.html
+++ /dev/null
@@ -1 +0,0 @@
-{% extends "base.html" %}
diff --git a/docs/theme/nav.html b/docs/theme/nav.html
deleted file mode 100644
index f66815a8131..00000000000
--- a/docs/theme/nav.html
+++ /dev/null
@@ -1,22 +0,0 @@
-<a class="{% if not nav_item.is_link %}reference internal{% endif %}{% if nav_item.active%} current{%endif%}" href="{% if not nav_item.is_section %}{{ nav_item.url|url }}{% else %}#{% endif %}">{{ nav_item.title }}</a>
-{%- set navlevel = navlevel + 1 %}
-{%- if navlevel <= config.theme.navigation_depth
-    and ((nav_item.is_page and nav_item.toc.items
-    and (not config.theme.titles_only
-    and (nav_item == page or not config.theme.collapse_navigation)))
-    or (nav_item.is_section and nav_item.children)) %}
-    <ul{% if nav_item.active %} class="current"{% endif %}>
-        {%- if nav_item.is_page %}
-            {#- Skip first level of toc which is page title. #}
-            {%- set toc_item = nav_item.toc.items[0] %}
-            {%- include 'toc.html' %}
-        {%- elif nav_item.is_section %}
-            {%- for nav_item in nav_item.children %}
-                <li class="toctree-l{{ navlevel }}{% if nav_item.active%} current{%endif%}">
-                    {%- include 'nav.html' %}
-                </li>
-            {%- endfor %}
-        {%- endif %}
-    </ul>
-{%- endif %}
-{%- set navlevel = navlevel - 1 %}
diff --git a/docs/theme/search.html b/docs/theme/search.html
deleted file mode 100644
index 83763d21695..00000000000
--- a/docs/theme/search.html
+++ /dev/null
@@ -1,16 +0,0 @@
-{% extends "base.html" %}
-
-{% block content %}
-
-  <h1 id="search">Search Results</h1>
-
-  <form id="content_search" action="search.html">
-    <span role="status" aria-live="polite" class="ui-helper-hidden-accessible"></span>
-    <input name="q" id="mkdocs-search-query" type="text" class="search_input search-query ui-autocomplete-input" placeholder="Search the Docs" autocomplete="off" autofocus title="Type search term here">
-  </form>
-
-  <div id="mkdocs-search-results" class="search-results">
-    Searching...
-  </div>
-
-{% endblock %}
diff --git a/docs/theme/searchbox.html b/docs/theme/searchbox.html
deleted file mode 100644
index 2a603ddec15..00000000000
--- a/docs/theme/searchbox.html
+++ /dev/null
@@ -1,5 +0,0 @@
-<div role="search">
-  <form id ="rtd-search-form" class="wy-form" action="{{ base_url }}/search.html" method="get">
-    <input type="text" name="q" placeholder="Search docs" title="Type search term here" />
-  </form>
-</div>
diff --git a/docs/theme/toc.html b/docs/theme/toc.html
deleted file mode 100644
index b00416c7095..00000000000
--- a/docs/theme/toc.html
+++ /dev/null
@@ -1,12 +0,0 @@
-
-{%- for toc_item in toc_item.children %}
-    <li class="toctree-l{{ navlevel}}"><a class="reference internal" href="{% if not nav_item == page %}{{ nav_item.url|url }}{% endif %}{{ toc_item.url }}">{{ toc_item.title }}</a>
-    {%- set navlevel = navlevel + 1 %}
-    {%- if navlevel <= config.theme.navigation_depth and toc_item.children %}
-        <ul>
-            {%- include 'toc.html' %}
-        </ul>
-    {%- endif %}
-    {%- set navlevel = navlevel - 1 %}
-    </li>
-{%- endfor %}
diff --git a/docs/theme/versions.html b/docs/theme/versions.html
deleted file mode 100644
index 4683a75640a..00000000000
--- a/docs/theme/versions.html
+++ /dev/null
@@ -1,17 +0,0 @@
-<div class="rst-versions" role="note" aria-label="versions">
-    <span class="rst-current-version" data-toggle="rst-current-version">
-      {% if config.repo_name == 'GitHub' %}
-          <a href="{{ config.repo_url }}" class="fa fa-github" style="float: left; color: #fcfcfc"> GitHub</a>
-      {% elif config.repo_name == 'Bitbucket' %}
-          <a href="{{ config.repo_url }}" class="icon icon-bitbucket" style="float: left; color: #fcfcfc"> BitBucket</a>
-      {% elif config.repo_name == 'GitLab' %}
-          <a href="{{ config.repo_url }}" class="icon icon-gitlab" style="float: left; color: #fcfcfc"> GitLab</a>
-      {% endif %}
-      {% if page.previous_page %}
-        <span><a href="{{ page.previous_page.url|url }}" style="color: #fcfcfc;">&laquo; Previous</a></span>
-      {% endif %}
-      {% if page.next_page %}
-        <span style="margin-left: 15px"><a href="{{ page.next_page.url|url }}" style="color: #fcfcfc">Next &raquo;</a></span>
-      {% endif %}
-    </span>
-</div>
diff --git a/examples/README.md b/examples/README.md
deleted file mode 100644
index 57ec54701fe..00000000000
--- a/examples/README.md
+++ /dev/null
@@ -1,141 +0,0 @@
-# Keras examples directory
-
-## Vision models examples
-
-[mnist_mlp.py](mnist_mlp.py)
-Trains a simple deep multi-layer perceptron on the MNIST dataset.
-
-[mnist_cnn.py](mnist_cnn.py)
-Trains a simple convnet on the MNIST dataset.
-
-[cifar10_cnn.py](cifar10_cnn.py)
-Trains a simple deep CNN on the CIFAR10 small images dataset.
-
-[cifar10_cnn_capsule.py](cifar10_cnn_capsule.py)
-Trains a simple CNN-Capsule Network on the CIFAR10 small images dataset.
-
-[cifar10_resnet.py](cifar10_resnet.py)
-Trains a ResNet on the CIFAR10 small images dataset.
-
-[conv_lstm.py](conv_lstm.py)
-Demonstrates the use of a convolutional LSTM network.
-
-[image_ocr.py](image_ocr.py)
-Trains a convolutional stack followed by a recurrent stack and a CTC logloss function to perform optical character recognition (OCR).
-
-[mnist_acgan.py](mnist_acgan.py)
-Implementation of AC-GAN (Auxiliary Classifier GAN) on the MNIST dataset
-
-[mnist_hierarchical_rnn.py](mnist_hierarchical_rnn.py)
-Trains a Hierarchical RNN (HRNN) to classify MNIST digits.
-
-[mnist_siamese.py](mnist_siamese.py)
-Trains a Siamese multi-layer perceptron on pairs of digits from the MNIST dataset.
-
-[mnist_swwae.py](mnist_swwae.py)
-Trains a Stacked What-Where AutoEncoder built on residual blocks on the MNIST dataset.
-
-[mnist_transfer_cnn.py](mnist_transfer_cnn.py)
-Transfer learning toy example on the MNIST dataset.
-
-[mnist_denoising_autoencoder.py](mnist_denoising_autoencoder.py)
-Trains a denoising autoencoder on the MNIST dataset.
-
-----
-
-## Text & sequences examples
-
-[addition_rnn.py](addition_rnn.py)
-Implementation of sequence to sequence learning for performing addition of two numbers (as strings).
-
-[babi_rnn.py](babi_rnn.py)
-Trains a two-branch recurrent network on the bAbI dataset for reading comprehension.
-
-[babi_memnn.py](babi_memnn.py)
-Trains a memory network on the bAbI dataset for reading comprehension.
-
-[imdb_bidirectional_lstm.py](imdb_bidirectional_lstm.py)
-Trains a Bidirectional LSTM on the IMDB sentiment classification task.
-
-[imdb_cnn.py](imdb_cnn.py)
-Demonstrates the use of Convolution1D for text classification.
-
-[imdb_cnn_lstm.py](imdb_cnn_lstm.py)
-Trains a convolutional stack followed by a recurrent stack network on the IMDB sentiment classification task.
-
-[imdb_fasttext.py](imdb_fasttext.py)
-Trains a FastText model on the IMDB sentiment classification task.
-
-[imdb_lstm.py](imdb_lstm.py)
-Trains an LSTM model on the IMDB sentiment classification task.
-
-[lstm_stateful.py](lstm_stateful.py)
-Demonstrates how to use stateful RNNs to model long sequences efficiently.
-
-[lstm_seq2seq.py](lstm_seq2seq.py)
-Trains a basic character-level sequence-to-sequence model.
-
-[lstm_seq2seq_restore.py](lstm_seq2seq_restore.py)
-Restores a character-level sequence to sequence model from disk (saved by [lstm_seq2seq.py](lstm_seq2seq.py)) and uses it to generate predictions.
-
-[pretrained_word_embeddings.py](pretrained_word_embeddings.py)
-Loads pre-trained word embeddings (GloVe embeddings) into a frozen Keras Embedding layer, and uses it to train a text classification model on the 20 Newsgroup dataset.
-
-[reuters_mlp.py](reuters_mlp.py)
-Trains and evaluate a simple MLP on the Reuters newswire topic classification task.
-
-----
-
-## Generative models examples
-
-[lstm_text_generation.py](lstm_text_generation.py)
-Generates text from Nietzsche's writings.
-
-[conv_filter_visualization.py](conv_filter_visualization.py)
-Visualization of the filters of VGG16, via gradient ascent in input space.
-
-[deep_dream.py](deep_dream.py)
-Deep Dreams in Keras.
-
-[neural_doodle.py](neural_doodle.py)
-Neural doodle.
-
-[neural_style_transfer.py](neural_style_transfer.py)
-Neural style transfer.
-
-[variational_autoencoder.py](variational_autoencoder.py)
-Demonstrates how to build a variational autoencoder.
-
-[variational_autoencoder_deconv.py](variational_autoencoder_deconv.py)
-Demonstrates how to build a variational autoencoder with Keras using deconvolution layers.
-
-----
-
-## Examples demonstrating specific Keras functionality
-
-[antirectifier.py](antirectifier.py)
-Demonstrates how to write custom layers for Keras.
-
-[mnist_sklearn_wrapper.py](mnist_sklearn_wrapper.py)
-Demonstrates how to use the sklearn wrapper.
-
-[mnist_irnn.py](mnist_irnn.py)
-Reproduction of the IRNN experiment with pixel-by-pixel sequential MNIST in "A Simple Way to Initialize Recurrent Networks of Rectified Linear Units" by Le et al.
-
-[mnist_net2net.py](mnist_net2net.py)
-Reproduction of the Net2Net experiment with MNIST in "Net2Net: Accelerating Learning via Knowledge Transfer".
-
-[reuters_mlp_relu_vs_selu.py](reuters_mlp_relu_vs_selu.py)
-Compares self-normalizing MLPs with regular MLPs.
-
-[mnist_tfrecord.py](mnist_tfrecord.py)
-MNIST dataset with TFRecords, the standard TensorFlow data format.
-
-[mnist_dataset_api.py](mnist_dataset_api.py)
-MNIST dataset with TensorFlow's Dataset API.
-
-[cifar10_cnn_tfaugment2d.py](cifar10_cnn_tfaugment2d.py)
-Trains a simple deep CNN on the CIFAR10 small images dataset using Tensorflow internal augmentation APIs.
-
-[tensorboard_embeddings_mnist.py](tensorboard_embeddings_mnist.py)
-Trains a simple convnet on the MNIST dataset and embeds test data which can be later visualized using TensorBoard's Embedding Projector.
\ No newline at end of file
diff --git a/examples/addition_rnn.py b/examples/addition_rnn.py
deleted file mode 100644
index 4de408fb205..00000000000
--- a/examples/addition_rnn.py
+++ /dev/null
@@ -1,211 +0,0 @@
-# -*- coding: utf-8 -*-
-'''
-# An implementation of sequence to sequence learning for performing addition
-
-Input: "535+61"
-Output: "596"
-Padding is handled by using a repeated sentinel character (space)
-
-Input may optionally be reversed, shown to increase performance in many tasks in:
-"Learning to Execute"
-http://arxiv.org/abs/1410.4615
-and
-"Sequence to Sequence Learning with Neural Networks"
-http://papers.nips.cc/paper/5346-sequence-to-sequence-learning-with-neural-networks.pdf
-Theoretically it introduces shorter term dependencies between source and target.
-
-Two digits reversed:
-+ One layer LSTM (128 HN), 5k training examples = 99% train/test accuracy in 55 epochs
-
-Three digits reversed:
-+ One layer LSTM (128 HN), 50k training examples = 99% train/test accuracy in 100 epochs
-
-Four digits reversed:
-+ One layer LSTM (128 HN), 400k training examples = 99% train/test accuracy in 20 epochs
-
-Five digits reversed:
-+ One layer LSTM (128 HN), 550k training examples = 99% train/test accuracy in 30 epochs
-'''  # noqa
-
-from __future__ import print_function
-from keras.models import Sequential
-from keras import layers
-import numpy as np
-from six.moves import range
-
-
-class CharacterTable(object):
-    """Given a set of characters:
-    + Encode them to a one-hot integer representation
-    + Decode the one-hot or integer representation to their character output
-    + Decode a vector of probabilities to their character output
-    """
-    def __init__(self, chars):
-        """Initialize character table.
-
-        # Arguments
-            chars: Characters that can appear in the input.
-        """
-        self.chars = sorted(set(chars))
-        self.char_indices = dict((c, i) for i, c in enumerate(self.chars))
-        self.indices_char = dict((i, c) for i, c in enumerate(self.chars))
-
-    def encode(self, C, num_rows):
-        """One-hot encode given string C.
-
-        # Arguments
-            C: string, to be encoded.
-            num_rows: Number of rows in the returned one-hot encoding. This is
-                used to keep the # of rows for each data the same.
-        """
-        x = np.zeros((num_rows, len(self.chars)))
-        for i, c in enumerate(C):
-            x[i, self.char_indices[c]] = 1
-        return x
-
-    def decode(self, x, calc_argmax=True):
-        """Decode the given vector or 2D array to their character output.
-
-        # Arguments
-            x: A vector or a 2D array of probabilities or one-hot representations;
-                or a vector of character indices (used with `calc_argmax=False`).
-            calc_argmax: Whether to find the character index with maximum
-                probability, defaults to `True`.
-        """
-        if calc_argmax:
-            x = x.argmax(axis=-1)
-        return ''.join(self.indices_char[x] for x in x)
-
-
-class colors:
-    ok = '\033[92m'
-    fail = '\033[91m'
-    close = '\033[0m'
-
-# Parameters for the model and dataset.
-TRAINING_SIZE = 50000
-DIGITS = 3
-REVERSE = True
-
-# Maximum length of input is 'int + int' (e.g., '345+678'). Maximum length of
-# int is DIGITS.
-MAXLEN = DIGITS + 1 + DIGITS
-
-# All the numbers, plus sign and space for padding.
-chars = '0123456789+ '
-ctable = CharacterTable(chars)
-
-questions = []
-expected = []
-seen = set()
-print('Generating data...')
-while len(questions) < TRAINING_SIZE:
-    f = lambda: int(''.join(np.random.choice(list('0123456789'))
-                    for i in range(np.random.randint(1, DIGITS + 1))))
-    a, b = f(), f()
-    # Skip any addition questions we've already seen
-    # Also skip any such that x+Y == Y+x (hence the sorting).
-    key = tuple(sorted((a, b)))
-    if key in seen:
-        continue
-    seen.add(key)
-    # Pad the data with spaces such that it is always MAXLEN.
-    q = '{}+{}'.format(a, b)
-    query = q + ' ' * (MAXLEN - len(q))
-    ans = str(a + b)
-    # Answers can be of maximum size DIGITS + 1.
-    ans += ' ' * (DIGITS + 1 - len(ans))
-    if REVERSE:
-        # Reverse the query, e.g., '12+345  ' becomes '  543+21'. (Note the
-        # space used for padding.)
-        query = query[::-1]
-    questions.append(query)
-    expected.append(ans)
-print('Total addition questions:', len(questions))
-
-print('Vectorization...')
-x = np.zeros((len(questions), MAXLEN, len(chars)), dtype=np.bool)
-y = np.zeros((len(questions), DIGITS + 1, len(chars)), dtype=np.bool)
-for i, sentence in enumerate(questions):
-    x[i] = ctable.encode(sentence, MAXLEN)
-for i, sentence in enumerate(expected):
-    y[i] = ctable.encode(sentence, DIGITS + 1)
-
-# Shuffle (x, y) in unison as the later parts of x will almost all be larger
-# digits.
-indices = np.arange(len(y))
-np.random.shuffle(indices)
-x = x[indices]
-y = y[indices]
-
-# Explicitly set apart 10% for validation data that we never train over.
-split_at = len(x) - len(x) // 10
-(x_train, x_val) = x[:split_at], x[split_at:]
-(y_train, y_val) = y[:split_at], y[split_at:]
-
-print('Training Data:')
-print(x_train.shape)
-print(y_train.shape)
-
-print('Validation Data:')
-print(x_val.shape)
-print(y_val.shape)
-
-# Try replacing GRU, or SimpleRNN.
-RNN = layers.LSTM
-HIDDEN_SIZE = 128
-BATCH_SIZE = 128
-LAYERS = 1
-
-print('Build model...')
-model = Sequential()
-# "Encode" the input sequence using an RNN, producing an output of HIDDEN_SIZE.
-# Note: In a situation where your input sequences have a variable length,
-# use input_shape=(None, num_feature).
-model.add(RNN(HIDDEN_SIZE, input_shape=(MAXLEN, len(chars))))
-# As the decoder RNN's input, repeatedly provide with the last output of
-# RNN for each time step. Repeat 'DIGITS + 1' times as that's the maximum
-# length of output, e.g., when DIGITS=3, max output is 999+999=1998.
-model.add(layers.RepeatVector(DIGITS + 1))
-# The decoder RNN could be multiple layers stacked or a single layer.
-for _ in range(LAYERS):
-    # By setting return_sequences to True, return not only the last output but
-    # all the outputs so far in the form of (num_samples, timesteps,
-    # output_dim). This is necessary as TimeDistributed in the below expects
-    # the first dimension to be the timesteps.
-    model.add(RNN(HIDDEN_SIZE, return_sequences=True))
-
-# Apply a dense layer to the every temporal slice of an input. For each of step
-# of the output sequence, decide which character should be chosen.
-model.add(layers.TimeDistributed(layers.Dense(len(chars), activation='softmax')))
-model.compile(loss='categorical_crossentropy',
-              optimizer='adam',
-              metrics=['accuracy'])
-model.summary()
-
-# Train the model each generation and show predictions against the validation
-# dataset.
-for iteration in range(1, 200):
-    print()
-    print('-' * 50)
-    print('Iteration', iteration)
-    model.fit(x_train, y_train,
-              batch_size=BATCH_SIZE,
-              epochs=1,
-              validation_data=(x_val, y_val))
-    # Select 10 samples from the validation set at random so we can visualize
-    # errors.
-    for i in range(10):
-        ind = np.random.randint(0, len(x_val))
-        rowx, rowy = x_val[np.array([ind])], y_val[np.array([ind])]
-        preds = model.predict_classes(rowx, verbose=0)
-        q = ctable.decode(rowx[0])
-        correct = ctable.decode(rowy[0])
-        guess = ctable.decode(preds[0], calc_argmax=False)
-        print('Q', q[::-1] if REVERSE else q, end=' ')
-        print('T', correct, end=' ')
-        if correct == guess:
-            print(colors.ok + '☑' + colors.close, end=' ')
-        else:
-            print(colors.fail + '☒' + colors.close, end=' ')
-        print(guess)
diff --git a/examples/antirectifier.py b/examples/antirectifier.py
deleted file mode 100644
index b64585e0795..00000000000
--- a/examples/antirectifier.py
+++ /dev/null
@@ -1,108 +0,0 @@
-'''
-#This example demonstrates how to write custom layers for Keras.
-
-We build a custom activation layer called 'Antirectifier',
-which modifies the shape of the tensor that passes through it.
-We need to specify two methods: `compute_output_shape` and `call`.
-
-Note that the same result can also be achieved via a Lambda layer.
-
-Because our custom layer is written with primitives from the Keras
-backend (`K`), our code can run both on TensorFlow and Theano.
-'''
-
-from __future__ import print_function
-import keras
-from keras.models import Sequential
-from keras import layers
-from keras.datasets import mnist
-from keras import backend as K
-
-
-class Antirectifier(layers.Layer):
-    '''This is the combination of a sample-wise
-    L2 normalization with the concatenation of the
-    positive part of the input with the negative part
-    of the input. The result is a tensor of samples that are
-    twice as large as the input samples.
-
-    It can be used in place of a ReLU.
-
-    # Input shape
-        2D tensor of shape (samples, n)
-
-    # Output shape
-        2D tensor of shape (samples, 2*n)
-
-    # Theoretical justification
-        When applying ReLU, assuming that the distribution
-        of the previous output is approximately centered around 0.,
-        you are discarding half of your input. This is inefficient.
-
-        Antirectifier allows to return all-positive outputs like ReLU,
-        without discarding any data.
-
-        Tests on MNIST show that Antirectifier allows to train networks
-        with twice less parameters yet with comparable
-        classification accuracy as an equivalent ReLU-based network.
-    '''
-
-    def compute_output_shape(self, input_shape):
-        shape = list(input_shape)
-        assert len(shape) == 2  # only valid for 2D tensors
-        shape[-1] *= 2
-        return tuple(shape)
-
-    def call(self, inputs):
-        inputs -= K.mean(inputs, axis=1, keepdims=True)
-        inputs = K.l2_normalize(inputs, axis=1)
-        pos = K.relu(inputs)
-        neg = K.relu(-inputs)
-        return K.concatenate([pos, neg], axis=1)
-
-# global parameters
-batch_size = 128
-num_classes = 10
-epochs = 40
-
-# the data, split between train and test sets
-(x_train, y_train), (x_test, y_test) = mnist.load_data()
-
-x_train = x_train.reshape(60000, 784)
-x_test = x_test.reshape(10000, 784)
-x_train = x_train.astype('float32')
-x_test = x_test.astype('float32')
-x_train /= 255
-x_test /= 255
-print(x_train.shape[0], 'train samples')
-print(x_test.shape[0], 'test samples')
-
-# convert class vectors to binary class matrices
-y_train = keras.utils.to_categorical(y_train, num_classes)
-y_test = keras.utils.to_categorical(y_test, num_classes)
-
-# build the model
-model = Sequential()
-model.add(layers.Dense(256, input_shape=(784,)))
-model.add(Antirectifier())
-model.add(layers.Dropout(0.1))
-model.add(layers.Dense(256))
-model.add(Antirectifier())
-model.add(layers.Dropout(0.1))
-model.add(layers.Dense(num_classes))
-model.add(layers.Activation('softmax'))
-
-# compile the model
-model.compile(loss='categorical_crossentropy',
-              optimizer='rmsprop',
-              metrics=['accuracy'])
-
-# train the model
-model.fit(x_train, y_train,
-          batch_size=batch_size,
-          epochs=epochs,
-          verbose=1,
-          validation_data=(x_test, y_test))
-
-# next, compare with an equivalent network
-# with2x bigger Dense layers and ReLU
diff --git a/examples/babi_memnn.py b/examples/babi_memnn.py
deleted file mode 100644
index fa85e855a53..00000000000
--- a/examples/babi_memnn.py
+++ /dev/null
@@ -1,231 +0,0 @@
-'''
-#Trains a memory network on the bAbI dataset.
-
-References:
-
-- Jason Weston, Antoine Bordes, Sumit Chopra, Tomas Mikolov, Alexander M. Rush,
-  ["Towards AI-Complete Question Answering:
-  A Set of Prerequisite Toy Tasks"](http://arxiv.org/abs/1502.05698)
-
-- Sainbayar Sukhbaatar, Arthur Szlam, Jason Weston, Rob Fergus,
-  ["End-To-End Memory Networks"](http://arxiv.org/abs/1503.08895)
-
-Reaches 98.6% accuracy on task 'single_supporting_fact_10k' after 120 epochs.
-Time per epoch: 3s on CPU (core i7).
-'''
-from __future__ import print_function
-
-from keras.models import Sequential, Model
-from keras.layers.embeddings import Embedding
-from keras.layers import Input, Activation, Dense, Permute, Dropout
-from keras.layers import add, dot, concatenate
-from keras.layers import LSTM
-from keras.utils.data_utils import get_file
-from keras.preprocessing.sequence import pad_sequences
-from functools import reduce
-import tarfile
-import numpy as np
-import re
-
-
-def tokenize(sent):
-    '''Return the tokens of a sentence including punctuation.
-
-    >>> tokenize('Bob dropped the apple. Where is the apple?')
-    ['Bob', 'dropped', 'the', 'apple', '.', 'Where', 'is', 'the', 'apple', '?']
-    '''
-    return [x.strip() for x in re.split(r'(\W+)?', sent) if x.strip()]
-
-
-def parse_stories(lines, only_supporting=False):
-    '''Parse stories provided in the bAbi tasks format
-
-    If only_supporting is true, only the sentences
-    that support the answer are kept.
-    '''
-    data = []
-    story = []
-    for line in lines:
-        line = line.decode('utf-8').strip()
-        nid, line = line.split(' ', 1)
-        nid = int(nid)
-        if nid == 1:
-            story = []
-        if '\t' in line:
-            q, a, supporting = line.split('\t')
-            q = tokenize(q)
-            if only_supporting:
-                # Only select the related substory
-                supporting = map(int, supporting.split())
-                substory = [story[i - 1] for i in supporting]
-            else:
-                # Provide all the substories
-                substory = [x for x in story if x]
-            data.append((substory, q, a))
-            story.append('')
-        else:
-            sent = tokenize(line)
-            story.append(sent)
-    return data
-
-
-def get_stories(f, only_supporting=False, max_length=None):
-    '''Given a file name, read the file,
-    retrieve the stories,
-    and then convert the sentences into a single story.
-
-    If max_length is supplied,
-    any stories longer than max_length tokens will be discarded.
-    '''
-    data = parse_stories(f.readlines(), only_supporting=only_supporting)
-    flatten = lambda data: reduce(lambda x, y: x + y, data)
-    data = [(flatten(story), q, answer) for story, q, answer in data
-            if not max_length or len(flatten(story)) < max_length]
-    return data
-
-
-def vectorize_stories(data):
-    inputs, queries, answers = [], [], []
-    for story, query, answer in data:
-        inputs.append([word_idx[w] for w in story])
-        queries.append([word_idx[w] for w in query])
-        answers.append(word_idx[answer])
-    return (pad_sequences(inputs, maxlen=story_maxlen),
-            pad_sequences(queries, maxlen=query_maxlen),
-            np.array(answers))
-
-try:
-    path = get_file('babi-tasks-v1-2.tar.gz',
-                    origin='https://s3.amazonaws.com/text-datasets/'
-                           'babi_tasks_1-20_v1-2.tar.gz')
-except:
-    print('Error downloading dataset, please download it manually:\n'
-          '$ wget http://www.thespermwhale.com/jaseweston/babi/tasks_1-20_v1-2'
-          '.tar.gz\n'
-          '$ mv tasks_1-20_v1-2.tar.gz ~/.keras/datasets/babi-tasks-v1-2.tar.gz')
-    raise
-
-
-challenges = {
-    # QA1 with 10,000 samples
-    'single_supporting_fact_10k': 'tasks_1-20_v1-2/en-10k/qa1_'
-                                  'single-supporting-fact_{}.txt',
-    # QA2 with 10,000 samples
-    'two_supporting_facts_10k': 'tasks_1-20_v1-2/en-10k/qa2_'
-                                'two-supporting-facts_{}.txt',
-}
-challenge_type = 'single_supporting_fact_10k'
-challenge = challenges[challenge_type]
-
-print('Extracting stories for the challenge:', challenge_type)
-with tarfile.open(path) as tar:
-    train_stories = get_stories(tar.extractfile(challenge.format('train')))
-    test_stories = get_stories(tar.extractfile(challenge.format('test')))
-
-vocab = set()
-for story, q, answer in train_stories + test_stories:
-    vocab |= set(story + q + [answer])
-vocab = sorted(vocab)
-
-# Reserve 0 for masking via pad_sequences
-vocab_size = len(vocab) + 1
-story_maxlen = max(map(len, (x for x, _, _ in train_stories + test_stories)))
-query_maxlen = max(map(len, (x for _, x, _ in train_stories + test_stories)))
-
-print('-')
-print('Vocab size:', vocab_size, 'unique words')
-print('Story max length:', story_maxlen, 'words')
-print('Query max length:', query_maxlen, 'words')
-print('Number of training stories:', len(train_stories))
-print('Number of test stories:', len(test_stories))
-print('-')
-print('Here\'s what a "story" tuple looks like (input, query, answer):')
-print(train_stories[0])
-print('-')
-print('Vectorizing the word sequences...')
-
-word_idx = dict((c, i + 1) for i, c in enumerate(vocab))
-inputs_train, queries_train, answers_train = vectorize_stories(train_stories)
-inputs_test, queries_test, answers_test = vectorize_stories(test_stories)
-
-print('-')
-print('inputs: integer tensor of shape (samples, max_length)')
-print('inputs_train shape:', inputs_train.shape)
-print('inputs_test shape:', inputs_test.shape)
-print('-')
-print('queries: integer tensor of shape (samples, max_length)')
-print('queries_train shape:', queries_train.shape)
-print('queries_test shape:', queries_test.shape)
-print('-')
-print('answers: binary (1 or 0) tensor of shape (samples, vocab_size)')
-print('answers_train shape:', answers_train.shape)
-print('answers_test shape:', answers_test.shape)
-print('-')
-print('Compiling...')
-
-# placeholders
-input_sequence = Input((story_maxlen,))
-question = Input((query_maxlen,))
-
-# encoders
-# embed the input sequence into a sequence of vectors
-input_encoder_m = Sequential()
-input_encoder_m.add(Embedding(input_dim=vocab_size,
-                              output_dim=64))
-input_encoder_m.add(Dropout(0.3))
-# output: (samples, story_maxlen, embedding_dim)
-
-# embed the input into a sequence of vectors of size query_maxlen
-input_encoder_c = Sequential()
-input_encoder_c.add(Embedding(input_dim=vocab_size,
-                              output_dim=query_maxlen))
-input_encoder_c.add(Dropout(0.3))
-# output: (samples, story_maxlen, query_maxlen)
-
-# embed the question into a sequence of vectors
-question_encoder = Sequential()
-question_encoder.add(Embedding(input_dim=vocab_size,
-                               output_dim=64,
-                               input_length=query_maxlen))
-question_encoder.add(Dropout(0.3))
-# output: (samples, query_maxlen, embedding_dim)
-
-# encode input sequence and questions (which are indices)
-# to sequences of dense vectors
-input_encoded_m = input_encoder_m(input_sequence)
-input_encoded_c = input_encoder_c(input_sequence)
-question_encoded = question_encoder(question)
-
-# compute a 'match' between the first input vector sequence
-# and the question vector sequence
-# shape: `(samples, story_maxlen, query_maxlen)`
-match = dot([input_encoded_m, question_encoded], axes=(2, 2))
-match = Activation('softmax')(match)
-
-# add the match matrix with the second input vector sequence
-response = add([match, input_encoded_c])  # (samples, story_maxlen, query_maxlen)
-response = Permute((2, 1))(response)  # (samples, query_maxlen, story_maxlen)
-
-# concatenate the match matrix with the question vector sequence
-answer = concatenate([response, question_encoded])
-
-# the original paper uses a matrix multiplication for this reduction step.
-# we choose to use a RNN instead.
-answer = LSTM(32)(answer)  # (samples, 32)
-
-# one regularization layer -- more would probably be needed.
-answer = Dropout(0.3)(answer)
-answer = Dense(vocab_size)(answer)  # (samples, vocab_size)
-# we output a probability distribution over the vocabulary
-answer = Activation('softmax')(answer)
-
-# build the final model
-model = Model([input_sequence, question], answer)
-model.compile(optimizer='rmsprop', loss='sparse_categorical_crossentropy',
-              metrics=['accuracy'])
-
-# train
-model.fit([inputs_train, queries_train], answers_train,
-          batch_size=32,
-          epochs=120,
-          validation_data=([inputs_test, queries_test], answers_test))
diff --git a/examples/babi_rnn.py b/examples/babi_rnn.py
deleted file mode 100644
index e314578b544..00000000000
--- a/examples/babi_rnn.py
+++ /dev/null
@@ -1,228 +0,0 @@
-'''
-# Trains two recurrent neural networks based upon a story and a question.
-
-The resulting merged vector is then queried to answer a range of bAbI tasks.
-
-The results are comparable to those for an LSTM model provided in Weston et al.:
-"Towards AI-Complete Question Answering: A Set of Prerequisite Toy Tasks"
-http://arxiv.org/abs/1502.05698
-
-Task Number                  | FB LSTM Baseline | Keras QA
----                          | ---              | ---
-QA1 - Single Supporting Fact | 50               | 52.1
-QA2 - Two Supporting Facts   | 20               | 37.0
-QA3 - Three Supporting Facts | 20               | 20.5
-QA4 - Two Arg. Relations     | 61               | 62.9
-QA5 - Three Arg. Relations   | 70               | 61.9
-QA6 - yes/No Questions       | 48               | 50.7
-QA7 - Counting               | 49               | 78.9
-QA8 - Lists/Sets             | 45               | 77.2
-QA9 - Simple Negation        | 64               | 64.0
-QA10 - Indefinite Knowledge  | 44               | 47.7
-QA11 - Basic Coreference     | 72               | 74.9
-QA12 - Conjunction           | 74               | 76.4
-QA13 - Compound Coreference  | 94               | 94.4
-QA14 - Time Reasoning        | 27               | 34.8
-QA15 - Basic Deduction       | 21               | 32.4
-QA16 - Basic Induction       | 23               | 50.6
-QA17 - Positional Reasoning  | 51               | 49.1
-QA18 - Size Reasoning        | 52               | 90.8
-QA19 - Path Finding          | 8                | 9.0
-QA20 - Agent's Motivations   | 91               | 90.7
-
-For the resources related to the bAbI project, refer to:
-https://research.facebook.com/researchers/1543934539189348
-
-### Notes
-
-- With default word, sentence, and query vector sizes, the GRU model achieves:
-  - 52.1% test accuracy on QA1 in 20 epochs (2 seconds per epoch on CPU)
-  - 37.0% test accuracy on QA2 in 20 epochs (16 seconds per epoch on CPU)
-In comparison, the Facebook paper achieves 50% and 20% for the LSTM baseline.
-
-- The task does not traditionally parse the question separately. This likely
-improves accuracy and is a good example of merging two RNNs.
-
-- The word vector embeddings are not shared between the story and question RNNs.
-
-- See how the accuracy changes given 10,000 training samples (en-10k) instead
-of only 1000. 1000 was used in order to be comparable to the original paper.
-
-- Experiment with GRU, LSTM, and JZS1-3 as they give subtly different results.
-
-- The length and noise (i.e. 'useless' story components) impact the ability of
-LSTMs / GRUs to provide the correct answer. Given only the supporting facts,
-these RNNs can achieve 100% accuracy on many tasks. Memory networks and neural
-networks that use attentional processes can efficiently search through this
-noise to find the relevant statements, improving performance substantially.
-This becomes especially obvious on QA2 and QA3, both far longer than QA1.
-'''
-
-from __future__ import print_function
-from functools import reduce
-import re
-import tarfile
-
-import numpy as np
-
-from keras.utils.data_utils import get_file
-from keras.layers.embeddings import Embedding
-from keras import layers
-from keras.layers import recurrent
-from keras.models import Model
-from keras.preprocessing.sequence import pad_sequences
-
-
-def tokenize(sent):
-    '''Return the tokens of a sentence including punctuation.
-
-    >>> tokenize('Bob dropped the apple. Where is the apple?')
-    ['Bob', 'dropped', 'the', 'apple', '.', 'Where', 'is', 'the', 'apple', '?']
-    '''
-    return [x.strip() for x in re.split(r'(\W+)', sent) if x.strip()]
-
-
-def parse_stories(lines, only_supporting=False):
-    '''Parse stories provided in the bAbi tasks format
-
-    If only_supporting is true,
-    only the sentences that support the answer are kept.
-    '''
-    data = []
-    story = []
-    for line in lines:
-        line = line.decode('utf-8').strip()
-        nid, line = line.split(' ', 1)
-        nid = int(nid)
-        if nid == 1:
-            story = []
-        if '\t' in line:
-            q, a, supporting = line.split('\t')
-            q = tokenize(q)
-            if only_supporting:
-                # Only select the related substory
-                supporting = map(int, supporting.split())
-                substory = [story[i - 1] for i in supporting]
-            else:
-                # Provide all the substories
-                substory = [x for x in story if x]
-            data.append((substory, q, a))
-            story.append('')
-        else:
-            sent = tokenize(line)
-            story.append(sent)
-    return data
-
-
-def get_stories(f, only_supporting=False, max_length=None):
-    '''Given a file name, read the file, retrieve the stories,
-    and then convert the sentences into a single story.
-
-    If max_length is supplied,
-    any stories longer than max_length tokens will be discarded.
-    '''
-    data = parse_stories(f.readlines(), only_supporting=only_supporting)
-    flatten = lambda data: reduce(lambda x, y: x + y, data)
-    data = [(flatten(story), q, answer) for story, q, answer in data
-            if not max_length or len(flatten(story)) < max_length]
-    return data
-
-
-def vectorize_stories(data, word_idx, story_maxlen, query_maxlen):
-    xs = []
-    xqs = []
-    ys = []
-    for story, query, answer in data:
-        x = [word_idx[w] for w in story]
-        xq = [word_idx[w] for w in query]
-        # let's not forget that index 0 is reserved
-        y = np.zeros(len(word_idx) + 1)
-        y[word_idx[answer]] = 1
-        xs.append(x)
-        xqs.append(xq)
-        ys.append(y)
-    return (pad_sequences(xs, maxlen=story_maxlen),
-            pad_sequences(xqs, maxlen=query_maxlen), np.array(ys))
-
-RNN = recurrent.LSTM
-EMBED_HIDDEN_SIZE = 50
-SENT_HIDDEN_SIZE = 100
-QUERY_HIDDEN_SIZE = 100
-BATCH_SIZE = 32
-EPOCHS = 20
-print('RNN / Embed / Sent / Query = {}, {}, {}, {}'.format(RNN,
-                                                           EMBED_HIDDEN_SIZE,
-                                                           SENT_HIDDEN_SIZE,
-                                                           QUERY_HIDDEN_SIZE))
-
-try:
-    path = get_file('babi-tasks-v1-2.tar.gz',
-                    origin='https://s3.amazonaws.com/text-datasets/'
-                           'babi_tasks_1-20_v1-2.tar.gz')
-except:
-    print('Error downloading dataset, please download it manually:\n'
-          '$ wget http://www.thespermwhale.com/jaseweston/babi/tasks_1-20_v1-2'
-          '.tar.gz\n'
-          '$ mv tasks_1-20_v1-2.tar.gz ~/.keras/datasets/babi-tasks-v1-2.tar.gz')
-    raise
-
-# Default QA1 with 1000 samples
-# challenge = 'tasks_1-20_v1-2/en/qa1_single-supporting-fact_{}.txt'
-# QA1 with 10,000 samples
-# challenge = 'tasks_1-20_v1-2/en-10k/qa1_single-supporting-fact_{}.txt'
-# QA2 with 1000 samples
-challenge = 'tasks_1-20_v1-2/en/qa2_two-supporting-facts_{}.txt'
-# QA2 with 10,000 samples
-# challenge = 'tasks_1-20_v1-2/en-10k/qa2_two-supporting-facts_{}.txt'
-with tarfile.open(path) as tar:
-    train = get_stories(tar.extractfile(challenge.format('train')))
-    test = get_stories(tar.extractfile(challenge.format('test')))
-
-vocab = set()
-for story, q, answer in train + test:
-    vocab |= set(story + q + [answer])
-vocab = sorted(vocab)
-
-# Reserve 0 for masking via pad_sequences
-vocab_size = len(vocab) + 1
-word_idx = dict((c, i + 1) for i, c in enumerate(vocab))
-story_maxlen = max(map(len, (x for x, _, _ in train + test)))
-query_maxlen = max(map(len, (x for _, x, _ in train + test)))
-
-x, xq, y = vectorize_stories(train, word_idx, story_maxlen, query_maxlen)
-tx, txq, ty = vectorize_stories(test, word_idx, story_maxlen, query_maxlen)
-
-print('vocab = {}'.format(vocab))
-print('x.shape = {}'.format(x.shape))
-print('xq.shape = {}'.format(xq.shape))
-print('y.shape = {}'.format(y.shape))
-print('story_maxlen, query_maxlen = {}, {}'.format(story_maxlen, query_maxlen))
-
-print('Build model...')
-
-sentence = layers.Input(shape=(story_maxlen,), dtype='int32')
-encoded_sentence = layers.Embedding(vocab_size, EMBED_HIDDEN_SIZE)(sentence)
-encoded_sentence = RNN(SENT_HIDDEN_SIZE)(encoded_sentence)
-
-question = layers.Input(shape=(query_maxlen,), dtype='int32')
-encoded_question = layers.Embedding(vocab_size, EMBED_HIDDEN_SIZE)(question)
-encoded_question = RNN(QUERY_HIDDEN_SIZE)(encoded_question)
-
-merged = layers.concatenate([encoded_sentence, encoded_question])
-preds = layers.Dense(vocab_size, activation='softmax')(merged)
-
-model = Model([sentence, question], preds)
-model.compile(optimizer='adam',
-              loss='categorical_crossentropy',
-              metrics=['accuracy'])
-
-print('Training')
-model.fit([x, xq], y,
-          batch_size=BATCH_SIZE,
-          epochs=EPOCHS,
-          validation_split=0.05)
-
-print('Evaluation')
-loss, acc = model.evaluate([tx, txq], ty,
-                           batch_size=BATCH_SIZE)
-print('Test loss / test accuracy = {:.4f} / {:.4f}'.format(loss, acc))
diff --git a/examples/cifar10_cnn.py b/examples/cifar10_cnn.py
deleted file mode 100644
index bb013f256d8..00000000000
--- a/examples/cifar10_cnn.py
+++ /dev/null
@@ -1,131 +0,0 @@
-'''
-#Train a simple deep CNN on the CIFAR10 small images dataset.
-
-It gets to 75% validation accuracy in 25 epochs, and 79% after 50 epochs.
-(it's still underfitting at that point, though).
-'''
-
-from __future__ import print_function
-import keras
-from keras.datasets import cifar10
-from keras.preprocessing.image import ImageDataGenerator
-from keras.models import Sequential
-from keras.layers import Dense, Dropout, Activation, Flatten
-from keras.layers import Conv2D, MaxPooling2D
-import os
-
-batch_size = 32
-num_classes = 10
-epochs = 100
-data_augmentation = True
-num_predictions = 20
-save_dir = os.path.join(os.getcwd(), 'saved_models')
-model_name = 'keras_cifar10_trained_model.h5'
-
-# The data, split between train and test sets:
-(x_train, y_train), (x_test, y_test) = cifar10.load_data()
-print('x_train shape:', x_train.shape)
-print(x_train.shape[0], 'train samples')
-print(x_test.shape[0], 'test samples')
-
-# Convert class vectors to binary class matrices.
-y_train = keras.utils.to_categorical(y_train, num_classes)
-y_test = keras.utils.to_categorical(y_test, num_classes)
-
-model = Sequential()
-model.add(Conv2D(32, (3, 3), padding='same',
-                 input_shape=x_train.shape[1:]))
-model.add(Activation('relu'))
-model.add(Conv2D(32, (3, 3)))
-model.add(Activation('relu'))
-model.add(MaxPooling2D(pool_size=(2, 2)))
-model.add(Dropout(0.25))
-
-model.add(Conv2D(64, (3, 3), padding='same'))
-model.add(Activation('relu'))
-model.add(Conv2D(64, (3, 3)))
-model.add(Activation('relu'))
-model.add(MaxPooling2D(pool_size=(2, 2)))
-model.add(Dropout(0.25))
-
-model.add(Flatten())
-model.add(Dense(512))
-model.add(Activation('relu'))
-model.add(Dropout(0.5))
-model.add(Dense(num_classes))
-model.add(Activation('softmax'))
-
-# initiate RMSprop optimizer
-opt = keras.optimizers.RMSprop(learning_rate=0.0001, decay=1e-6)
-
-# Let's train the model using RMSprop
-model.compile(loss='categorical_crossentropy',
-              optimizer=opt,
-              metrics=['accuracy'])
-
-x_train = x_train.astype('float32')
-x_test = x_test.astype('float32')
-x_train /= 255
-x_test /= 255
-
-if not data_augmentation:
-    print('Not using data augmentation.')
-    model.fit(x_train, y_train,
-              batch_size=batch_size,
-              epochs=epochs,
-              validation_data=(x_test, y_test),
-              shuffle=True)
-else:
-    print('Using real-time data augmentation.')
-    # This will do preprocessing and realtime data augmentation:
-    datagen = ImageDataGenerator(
-        featurewise_center=False,  # set input mean to 0 over the dataset
-        samplewise_center=False,  # set each sample mean to 0
-        featurewise_std_normalization=False,  # divide inputs by std of the dataset
-        samplewise_std_normalization=False,  # divide each input by its std
-        zca_whitening=False,  # apply ZCA whitening
-        zca_epsilon=1e-06,  # epsilon for ZCA whitening
-        rotation_range=0,  # randomly rotate images in the range (degrees, 0 to 180)
-        # randomly shift images horizontally (fraction of total width)
-        width_shift_range=0.1,
-        # randomly shift images vertically (fraction of total height)
-        height_shift_range=0.1,
-        shear_range=0.,  # set range for random shear
-        zoom_range=0.,  # set range for random zoom
-        channel_shift_range=0.,  # set range for random channel shifts
-        # set mode for filling points outside the input boundaries
-        fill_mode='nearest',
-        cval=0.,  # value used for fill_mode = "constant"
-        horizontal_flip=True,  # randomly flip images
-        vertical_flip=False,  # randomly flip images
-        # set rescaling factor (applied before any other transformation)
-        rescale=None,
-        # set function that will be applied on each input
-        preprocessing_function=None,
-        # image data format, either "channels_first" or "channels_last"
-        data_format=None,
-        # fraction of images reserved for validation (strictly between 0 and 1)
-        validation_split=0.0)
-
-    # Compute quantities required for feature-wise normalization
-    # (std, mean, and principal components if ZCA whitening is applied).
-    datagen.fit(x_train)
-
-    # Fit the model on the batches generated by datagen.flow().
-    model.fit_generator(datagen.flow(x_train, y_train,
-                                     batch_size=batch_size),
-                        epochs=epochs,
-                        validation_data=(x_test, y_test),
-                        workers=4)
-
-# Save model and weights
-if not os.path.isdir(save_dir):
-    os.makedirs(save_dir)
-model_path = os.path.join(save_dir, model_name)
-model.save(model_path)
-print('Saved trained model at %s ' % model_path)
-
-# Score trained model.
-scores = model.evaluate(x_test, y_test, verbose=1)
-print('Test loss:', scores[0])
-print('Test accuracy:', scores[1])
diff --git a/examples/cifar10_resnet.py b/examples/cifar10_resnet.py
deleted file mode 100644
index 78cd0f16ffa..00000000000
--- a/examples/cifar10_resnet.py
+++ /dev/null
@@ -1,455 +0,0 @@
-"""
-#Trains a ResNet on the CIFAR10 dataset.
-
-ResNet v1:
-[Deep Residual Learning for Image Recognition
-](https://arxiv.org/pdf/1512.03385.pdf)
-
-ResNet v2:
-[Identity Mappings in Deep Residual Networks
-](https://arxiv.org/pdf/1603.05027.pdf)
-
-
-Model|n|200-epoch accuracy|Original paper accuracy |sec/epoch GTX1080Ti
-:------------|--:|-------:|-----------------------:|---:
-ResNet20   v1|  3| 92.16 %|                 91.25 %|35
-ResNet32   v1|  5| 92.46 %|                 92.49 %|50
-ResNet44   v1|  7| 92.50 %|                 92.83 %|70
-ResNet56   v1|  9| 92.71 %|                 93.03 %|90
-ResNet110  v1| 18| 92.65 %|            93.39+-.16 %|165
-ResNet164  v1| 27|     - %|                 94.07 %|  -
-ResNet1001 v1|N/A|     - %|                 92.39 %|  -
-
-&nbsp;
-
-Model|n|200-epoch accuracy|Original paper accuracy |sec/epoch GTX1080Ti
-:------------|--:|-------:|-----------------------:|---:
-ResNet20   v2|  2|     - %|                     - %|---
-ResNet32   v2|N/A| NA    %|            NA         %| NA
-ResNet44   v2|N/A| NA    %|            NA         %| NA
-ResNet56   v2|  6| 93.01 %|            NA         %|100
-ResNet110  v2| 12| 93.15 %|            93.63      %|180
-ResNet164  v2| 18|     - %|            94.54      %|  -
-ResNet1001 v2|111|     - %|            95.08+-.14 %|  -
-"""
-
-from __future__ import print_function
-import keras
-from keras.layers import Dense, Conv2D, BatchNormalization, Activation
-from keras.layers import AveragePooling2D, Input, Flatten
-from keras.optimizers import Adam
-from keras.callbacks import ModelCheckpoint, LearningRateScheduler
-from keras.callbacks import ReduceLROnPlateau
-from keras.preprocessing.image import ImageDataGenerator
-from keras.regularizers import l2
-from keras import backend as K
-from keras.models import Model
-from keras.datasets import cifar10
-import numpy as np
-import os
-
-# Training parameters
-batch_size = 32  # orig paper trained all networks with batch_size=128
-epochs = 200
-data_augmentation = True
-num_classes = 10
-
-# Subtracting pixel mean improves accuracy
-subtract_pixel_mean = True
-
-# Model parameter
-# ----------------------------------------------------------------------------
-#           |      | 200-epoch | Orig Paper| 200-epoch | Orig Paper| sec/epoch
-# Model     |  n   | ResNet v1 | ResNet v1 | ResNet v2 | ResNet v2 | GTX1080Ti
-#           |v1(v2)| %Accuracy | %Accuracy | %Accuracy | %Accuracy | v1 (v2)
-# ----------------------------------------------------------------------------
-# ResNet20  | 3 (2)| 92.16     | 91.25     | -----     | -----     | 35 (---)
-# ResNet32  | 5(NA)| 92.46     | 92.49     | NA        | NA        | 50 ( NA)
-# ResNet44  | 7(NA)| 92.50     | 92.83     | NA        | NA        | 70 ( NA)
-# ResNet56  | 9 (6)| 92.71     | 93.03     | 93.01     | NA        | 90 (100)
-# ResNet110 |18(12)| 92.65     | 93.39+-.16| 93.15     | 93.63     | 165(180)
-# ResNet164 |27(18)| -----     | 94.07     | -----     | 94.54     | ---(---)
-# ResNet1001| (111)| -----     | 92.39     | -----     | 95.08+-.14| ---(---)
-# ---------------------------------------------------------------------------
-n = 3
-
-# Model version
-# Orig paper: version = 1 (ResNet v1), Improved ResNet: version = 2 (ResNet v2)
-version = 1
-
-# Computed depth from supplied model parameter n
-if version == 1:
-    depth = n * 6 + 2
-elif version == 2:
-    depth = n * 9 + 2
-
-# Model name, depth and version
-model_type = 'ResNet%dv%d' % (depth, version)
-
-# Load the CIFAR10 data.
-(x_train, y_train), (x_test, y_test) = cifar10.load_data()
-
-# Input image dimensions.
-input_shape = x_train.shape[1:]
-
-# Normalize data.
-x_train = x_train.astype('float32') / 255
-x_test = x_test.astype('float32') / 255
-
-# If subtract pixel mean is enabled
-if subtract_pixel_mean:
-    x_train_mean = np.mean(x_train, axis=0)
-    x_train -= x_train_mean
-    x_test -= x_train_mean
-
-print('x_train shape:', x_train.shape)
-print(x_train.shape[0], 'train samples')
-print(x_test.shape[0], 'test samples')
-print('y_train shape:', y_train.shape)
-
-# Convert class vectors to binary class matrices.
-y_train = keras.utils.to_categorical(y_train, num_classes)
-y_test = keras.utils.to_categorical(y_test, num_classes)
-
-
-def lr_schedule(epoch):
-    """Learning Rate Schedule
-
-    Learning rate is scheduled to be reduced after 80, 120, 160, 180 epochs.
-    Called automatically every epoch as part of callbacks during training.
-
-    # Arguments
-        epoch (int): The number of epochs
-
-    # Returns
-        lr (float32): learning rate
-    """
-    lr = 1e-3
-    if epoch > 180:
-        lr *= 0.5e-3
-    elif epoch > 160:
-        lr *= 1e-3
-    elif epoch > 120:
-        lr *= 1e-2
-    elif epoch > 80:
-        lr *= 1e-1
-    print('Learning rate: ', lr)
-    return lr
-
-
-def resnet_layer(inputs,
-                 num_filters=16,
-                 kernel_size=3,
-                 strides=1,
-                 activation='relu',
-                 batch_normalization=True,
-                 conv_first=True):
-    """2D Convolution-Batch Normalization-Activation stack builder
-
-    # Arguments
-        inputs (tensor): input tensor from input image or previous layer
-        num_filters (int): Conv2D number of filters
-        kernel_size (int): Conv2D square kernel dimensions
-        strides (int): Conv2D square stride dimensions
-        activation (string): activation name
-        batch_normalization (bool): whether to include batch normalization
-        conv_first (bool): conv-bn-activation (True) or
-            bn-activation-conv (False)
-
-    # Returns
-        x (tensor): tensor as input to the next layer
-    """
-    conv = Conv2D(num_filters,
-                  kernel_size=kernel_size,
-                  strides=strides,
-                  padding='same',
-                  kernel_initializer='he_normal',
-                  kernel_regularizer=l2(1e-4))
-
-    x = inputs
-    if conv_first:
-        x = conv(x)
-        if batch_normalization:
-            x = BatchNormalization()(x)
-        if activation is not None:
-            x = Activation(activation)(x)
-    else:
-        if batch_normalization:
-            x = BatchNormalization()(x)
-        if activation is not None:
-            x = Activation(activation)(x)
-        x = conv(x)
-    return x
-
-
-def resnet_v1(input_shape, depth, num_classes=10):
-    """ResNet Version 1 Model builder [a]
-
-    Stacks of 2 x (3 x 3) Conv2D-BN-ReLU
-    Last ReLU is after the shortcut connection.
-    At the beginning of each stage, the feature map size is halved (downsampled)
-    by a convolutional layer with strides=2, while the number of filters is
-    doubled. Within each stage, the layers have the same number filters and the
-    same number of filters.
-    Features maps sizes:
-    stage 0: 32x32, 16
-    stage 1: 16x16, 32
-    stage 2:  8x8,  64
-    The Number of parameters is approx the same as Table 6 of [a]:
-    ResNet20 0.27M
-    ResNet32 0.46M
-    ResNet44 0.66M
-    ResNet56 0.85M
-    ResNet110 1.7M
-
-    # Arguments
-        input_shape (tensor): shape of input image tensor
-        depth (int): number of core convolutional layers
-        num_classes (int): number of classes (CIFAR10 has 10)
-
-    # Returns
-        model (Model): Keras model instance
-    """
-    if (depth - 2) % 6 != 0:
-        raise ValueError('depth should be 6n+2 (eg 20, 32, 44 in [a])')
-    # Start model definition.
-    num_filters = 16
-    num_res_blocks = int((depth - 2) / 6)
-
-    inputs = Input(shape=input_shape)
-    x = resnet_layer(inputs=inputs)
-    # Instantiate the stack of residual units
-    for stack in range(3):
-        for res_block in range(num_res_blocks):
-            strides = 1
-            if stack > 0 and res_block == 0:  # first layer but not first stack
-                strides = 2  # downsample
-            y = resnet_layer(inputs=x,
-                             num_filters=num_filters,
-                             strides=strides)
-            y = resnet_layer(inputs=y,
-                             num_filters=num_filters,
-                             activation=None)
-            if stack > 0 and res_block == 0:  # first layer but not first stack
-                # linear projection residual shortcut connection to match
-                # changed dims
-                x = resnet_layer(inputs=x,
-                                 num_filters=num_filters,
-                                 kernel_size=1,
-                                 strides=strides,
-                                 activation=None,
-                                 batch_normalization=False)
-            x = keras.layers.add([x, y])
-            x = Activation('relu')(x)
-        num_filters *= 2
-
-    # Add classifier on top.
-    # v1 does not use BN after last shortcut connection-ReLU
-    x = AveragePooling2D(pool_size=8)(x)
-    y = Flatten()(x)
-    outputs = Dense(num_classes,
-                    activation='softmax',
-                    kernel_initializer='he_normal')(y)
-
-    # Instantiate model.
-    model = Model(inputs=inputs, outputs=outputs)
-    return model
-
-
-def resnet_v2(input_shape, depth, num_classes=10):
-    """ResNet Version 2 Model builder [b]
-
-    Stacks of (1 x 1)-(3 x 3)-(1 x 1) BN-ReLU-Conv2D or also known as
-    bottleneck layer
-    First shortcut connection per layer is 1 x 1 Conv2D.
-    Second and onwards shortcut connection is identity.
-    At the beginning of each stage, the feature map size is halved (downsampled)
-    by a convolutional layer with strides=2, while the number of filter maps is
-    doubled. Within each stage, the layers have the same number filters and the
-    same filter map sizes.
-    Features maps sizes:
-    conv1  : 32x32,  16
-    stage 0: 32x32,  64
-    stage 1: 16x16, 128
-    stage 2:  8x8,  256
-
-    # Arguments
-        input_shape (tensor): shape of input image tensor
-        depth (int): number of core convolutional layers
-        num_classes (int): number of classes (CIFAR10 has 10)
-
-    # Returns
-        model (Model): Keras model instance
-    """
-    if (depth - 2) % 9 != 0:
-        raise ValueError('depth should be 9n+2 (eg 56 or 110 in [b])')
-    # Start model definition.
-    num_filters_in = 16
-    num_res_blocks = int((depth - 2) / 9)
-
-    inputs = Input(shape=input_shape)
-    # v2 performs Conv2D with BN-ReLU on input before splitting into 2 paths
-    x = resnet_layer(inputs=inputs,
-                     num_filters=num_filters_in,
-                     conv_first=True)
-
-    # Instantiate the stack of residual units
-    for stage in range(3):
-        for res_block in range(num_res_blocks):
-            activation = 'relu'
-            batch_normalization = True
-            strides = 1
-            if stage == 0:
-                num_filters_out = num_filters_in * 4
-                if res_block == 0:  # first layer and first stage
-                    activation = None
-                    batch_normalization = False
-            else:
-                num_filters_out = num_filters_in * 2
-                if res_block == 0:  # first layer but not first stage
-                    strides = 2    # downsample
-
-            # bottleneck residual unit
-            y = resnet_layer(inputs=x,
-                             num_filters=num_filters_in,
-                             kernel_size=1,
-                             strides=strides,
-                             activation=activation,
-                             batch_normalization=batch_normalization,
-                             conv_first=False)
-            y = resnet_layer(inputs=y,
-                             num_filters=num_filters_in,
-                             conv_first=False)
-            y = resnet_layer(inputs=y,
-                             num_filters=num_filters_out,
-                             kernel_size=1,
-                             conv_first=False)
-            if res_block == 0:
-                # linear projection residual shortcut connection to match
-                # changed dims
-                x = resnet_layer(inputs=x,
-                                 num_filters=num_filters_out,
-                                 kernel_size=1,
-                                 strides=strides,
-                                 activation=None,
-                                 batch_normalization=False)
-            x = keras.layers.add([x, y])
-
-        num_filters_in = num_filters_out
-
-    # Add classifier on top.
-    # v2 has BN-ReLU before Pooling
-    x = BatchNormalization()(x)
-    x = Activation('relu')(x)
-    x = AveragePooling2D(pool_size=8)(x)
-    y = Flatten()(x)
-    outputs = Dense(num_classes,
-                    activation='softmax',
-                    kernel_initializer='he_normal')(y)
-
-    # Instantiate model.
-    model = Model(inputs=inputs, outputs=outputs)
-    return model
-
-
-if version == 2:
-    model = resnet_v2(input_shape=input_shape, depth=depth)
-else:
-    model = resnet_v1(input_shape=input_shape, depth=depth)
-
-model.compile(loss='categorical_crossentropy',
-              optimizer=Adam(learning_rate=lr_schedule(0)),
-              metrics=['accuracy'])
-model.summary()
-print(model_type)
-
-# Prepare model model saving directory.
-save_dir = os.path.join(os.getcwd(), 'saved_models')
-model_name = 'cifar10_%s_model.{epoch:03d}.h5' % model_type
-if not os.path.isdir(save_dir):
-    os.makedirs(save_dir)
-filepath = os.path.join(save_dir, model_name)
-
-# Prepare callbacks for model saving and for learning rate adjustment.
-checkpoint = ModelCheckpoint(filepath=filepath,
-                             monitor='val_acc',
-                             verbose=1,
-                             save_best_only=True)
-
-lr_scheduler = LearningRateScheduler(lr_schedule)
-
-lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1),
-                               cooldown=0,
-                               patience=5,
-                               min_lr=0.5e-6)
-
-callbacks = [checkpoint, lr_reducer, lr_scheduler]
-
-# Run training, with or without data augmentation.
-if not data_augmentation:
-    print('Not using data augmentation.')
-    model.fit(x_train, y_train,
-              batch_size=batch_size,
-              epochs=epochs,
-              validation_data=(x_test, y_test),
-              shuffle=True,
-              callbacks=callbacks)
-else:
-    print('Using real-time data augmentation.')
-    # This will do preprocessing and realtime data augmentation:
-    datagen = ImageDataGenerator(
-        # set input mean to 0 over the dataset
-        featurewise_center=False,
-        # set each sample mean to 0
-        samplewise_center=False,
-        # divide inputs by std of dataset
-        featurewise_std_normalization=False,
-        # divide each input by its std
-        samplewise_std_normalization=False,
-        # apply ZCA whitening
-        zca_whitening=False,
-        # epsilon for ZCA whitening
-        zca_epsilon=1e-06,
-        # randomly rotate images in the range (deg 0 to 180)
-        rotation_range=0,
-        # randomly shift images horizontally
-        width_shift_range=0.1,
-        # randomly shift images vertically
-        height_shift_range=0.1,
-        # set range for random shear
-        shear_range=0.,
-        # set range for random zoom
-        zoom_range=0.,
-        # set range for random channel shifts
-        channel_shift_range=0.,
-        # set mode for filling points outside the input boundaries
-        fill_mode='nearest',
-        # value used for fill_mode = "constant"
-        cval=0.,
-        # randomly flip images
-        horizontal_flip=True,
-        # randomly flip images
-        vertical_flip=False,
-        # set rescaling factor (applied before any other transformation)
-        rescale=None,
-        # set function that will be applied on each input
-        preprocessing_function=None,
-        # image data format, either "channels_first" or "channels_last"
-        data_format=None,
-        # fraction of images reserved for validation (strictly between 0 and 1)
-        validation_split=0.0)
-
-    # Compute quantities required for featurewise normalization
-    # (std, mean, and principal components if ZCA whitening is applied).
-    datagen.fit(x_train)
-
-    # Fit the model on the batches generated by datagen.flow().
-    model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size),
-                        validation_data=(x_test, y_test),
-                        epochs=epochs, verbose=1, workers=4,
-                        callbacks=callbacks)
-
-# Score trained model.
-scores = model.evaluate(x_test, y_test, verbose=1)
-print('Test loss:', scores[0])
-print('Test accuracy:', scores[1])
diff --git a/examples/class_activation_maps.py b/examples/class_activation_maps.py
deleted file mode 100644
index 137fef3fdc5..00000000000
--- a/examples/class_activation_maps.py
+++ /dev/null
@@ -1,93 +0,0 @@
-# -*- coding: utf-8 -*-
-
-import numpy as np
-import argparse
-import cv2
-import matplotlib.pyplot as plt
-
-from keras.models import Model
-
-import keras.applications.resnet50 as resnet
-from keras.layers import UpSampling2D, Conv2D
-
-# Set an appropriate image file
-parser = argparse.ArgumentParser(description='Class activation maps with Keras.')
-parser.add_argument('input_image', metavar='base', type=str,
-                    help='Path to the image to use.')
-args = parser.parse_args()
-input_image = args.input_image
-
-
-################################################################
-# The following parameters can be changed to other models
-# that use global average pooling.
-# e.g.) InceptionResnetV2 / NASNetLarge
-NETWORK_INPUT_SIZE = 224
-MODEL_CLASS = resnet.ResNet50
-PREPROCESS_FN = resnet.preprocess_input
-LAST_CONV_LAYER = 'activation_49'
-PRED_LAYER = 'fc1000'
-################################################################
-
-# number of imagenet classes
-N_CLASSES = 1000
-
-
-def load_img(fname, input_size, preprocess_fn):
-    original_img = cv2.imread(fname)[:, :, ::-1]
-    original_size = (original_img.shape[1], original_img.shape[0])
-    img = cv2.resize(original_img, (input_size, input_size))
-    imgs = np.expand_dims(preprocess_fn(img), axis=0)
-    return imgs, original_img, original_size
-
-
-def get_cam_model(model_class,
-                  input_size=224,
-                  last_conv_layer='activation_49',
-                  pred_layer='fc1000'):
-    model = model_class(input_shape=(input_size, input_size, 3))
-
-    final_params = model.get_layer(pred_layer).get_weights()
-    final_params = (final_params[0].reshape(
-        1, 1, -1, N_CLASSES), final_params[1])
-
-    last_conv_output = model.get_layer(last_conv_layer).output
-    x = UpSampling2D(size=(32, 32), interpolation='bilinear')(
-        last_conv_output)
-    x = Conv2D(filters=N_CLASSES, kernel_size=(
-        1, 1), name='predictions_2')(x)
-
-    cam_model = Model(inputs=model.input,
-                      outputs=[model.output, x])
-    cam_model.get_layer('predictions_2').set_weights(final_params)
-    return cam_model
-
-
-def postprocess(preds, cams, top_k=1):
-    idxes = np.argsort(preds[0])[-top_k:]
-    class_activation_map = np.zeros_like(cams[0, :, :, 0])
-    for i in idxes:
-        class_activation_map += cams[0, :, :, i]
-    return class_activation_map
-
-
-# 1. load image
-imgs, original_img, original_size = load_img(input_image,
-                                             input_size=NETWORK_INPUT_SIZE,
-                                             preprocess_fn=resnet.preprocess_input)
-
-# 2. prediction
-model = get_cam_model(resnet.ResNet50,
-                      NETWORK_INPUT_SIZE,
-                      LAST_CONV_LAYER,
-                      PRED_LAYER)
-preds, cams = model.predict(imgs)
-
-# 4. post processing
-class_activation_map = postprocess(preds, cams)
-
-# 5. plot image+cam to original size
-plt.imshow(original_img, alpha=0.5)
-plt.imshow(cv2.resize(class_activation_map,
-                      original_size), cmap='jet', alpha=0.5)
-plt.show()
diff --git a/examples/cnn_seq2seq.py b/examples/cnn_seq2seq.py
deleted file mode 100644
index c561e1729e7..00000000000
--- a/examples/cnn_seq2seq.py
+++ /dev/null
@@ -1,207 +0,0 @@
-'''# Sequence-to-sequence example in Keras (character-level).
-
-This script demonstrates how to implement a basic character-level CNN
-sequence-to-sequence model. We apply it to translating
-short English sentences into short French sentences,
-character-by-character. Note that it is fairly unusual to
-do character-level machine translation, as word-level
-models are much more common in this domain. This example
-is for demonstration purposes only.
-
-**Summary of the algorithm**
-
-- We start with input sequences from a domain (e.g. English sentences)
-    and corresponding target sequences from another domain
-    (e.g. French sentences).
-- An encoder CNN encodes the input character sequence.
-- A decoder CNN is trained to turn the target sequences into
-    the same sequence but offset by one timestep in the future,
-    a training process called "teacher forcing" in this context.
-    It uses the output from the encoder.
-    Effectively, the decoder learns to generate `targets[t+1...]`
-    given `targets[...t]`, conditioned on the input sequence.
-- In inference mode, when we want to decode unknown input sequences, we:
-    - Encode the input sequence.
-    - Start with a target sequence of size 1
-        (just the start-of-sequence character)
-    - Feed the input sequence and 1-char target sequence
-        to the decoder to produce predictions for the next character
-    - Sample the next character using these predictions
-        (we simply use argmax).
-    - Append the sampled character to the target sequence
-    - Repeat until we hit the character limit.
-
-**Data download**
-
-[English to French sentence pairs.
-](http://www.manythings.org/anki/fra-eng.zip)
-
-[Lots of neat sentence pairs datasets.
-](http://www.manythings.org/anki/)
-
-**References**
-
-- lstm_seq2seq.py
-- https://wanasit.github.io/attention-based-sequence-to-sequence-in-keras.html
-'''
-from __future__ import print_function
-
-import numpy as np
-
-from keras.layers import Input, Convolution1D, Dot, Dense, Activation, Concatenate
-from keras.models import Model
-
-batch_size = 64  # Batch size for training.
-epochs = 100  # Number of epochs to train for.
-num_samples = 10000  # Number of samples to train on.
-# Path to the data txt file on disk.
-data_path = 'fra-eng/fra.txt'
-
-# Vectorize the data.
-input_texts = []
-target_texts = []
-input_characters = set()
-target_characters = set()
-with open(data_path, 'r', encoding='utf-8') as f:
-    lines = f.read().split('\n')
-for line in lines[: min(num_samples, len(lines) - 1)]:
-    input_text, target_text = line.split('\t')
-    # We use "tab" as the "start sequence" character
-    # for the targets, and "\n" as "end sequence" character.
-    target_text = '\t' + target_text + '\n'
-    input_texts.append(input_text)
-    target_texts.append(target_text)
-    for char in input_text:
-        if char not in input_characters:
-            input_characters.add(char)
-    for char in target_text:
-        if char not in target_characters:
-            target_characters.add(char)
-
-input_characters = sorted(list(input_characters))
-target_characters = sorted(list(target_characters))
-num_encoder_tokens = len(input_characters)
-num_decoder_tokens = len(target_characters)
-max_encoder_seq_length = max([len(txt) for txt in input_texts])
-max_decoder_seq_length = max([len(txt) for txt in target_texts])
-
-print('Number of samples:', len(input_texts))
-print('Number of unique input tokens:', num_encoder_tokens)
-print('Number of unique output tokens:', num_decoder_tokens)
-print('Max sequence length for inputs:', max_encoder_seq_length)
-print('Max sequence length for outputs:', max_decoder_seq_length)
-
-input_token_index = dict(
-    [(char, i) for i, char in enumerate(input_characters)])
-target_token_index = dict(
-    [(char, i) for i, char in enumerate(target_characters)])
-
-encoder_input_data = np.zeros(
-    (len(input_texts), max_encoder_seq_length, num_encoder_tokens),
-    dtype='float32')
-decoder_input_data = np.zeros(
-    (len(input_texts), max_decoder_seq_length, num_decoder_tokens),
-    dtype='float32')
-decoder_target_data = np.zeros(
-    (len(input_texts), max_decoder_seq_length, num_decoder_tokens),
-    dtype='float32')
-
-for i, (input_text, target_text) in enumerate(zip(input_texts, target_texts)):
-    for t, char in enumerate(input_text):
-        encoder_input_data[i, t, input_token_index[char]] = 1.
-    for t, char in enumerate(target_text):
-        # decoder_target_data is ahead of decoder_input_data by one timestep
-        decoder_input_data[i, t, target_token_index[char]] = 1.
-        if t > 0:
-            # decoder_target_data will be ahead by one timestep
-            # and will not include the start character.
-            decoder_target_data[i, t - 1, target_token_index[char]] = 1.
-
-# Define an input sequence and process it.
-encoder_inputs = Input(shape=(None, num_encoder_tokens))
-# Encoder
-x_encoder = Convolution1D(256, kernel_size=3, activation='relu',
-                          padding='causal')(encoder_inputs)
-x_encoder = Convolution1D(256, kernel_size=3, activation='relu',
-                          padding='causal', dilation_rate=2)(x_encoder)
-x_encoder = Convolution1D(256, kernel_size=3, activation='relu',
-                          padding='causal', dilation_rate=4)(x_encoder)
-
-decoder_inputs = Input(shape=(None, num_decoder_tokens))
-# Decoder
-x_decoder = Convolution1D(256, kernel_size=3, activation='relu',
-                          padding='causal')(decoder_inputs)
-x_decoder = Convolution1D(256, kernel_size=3, activation='relu',
-                          padding='causal', dilation_rate=2)(x_decoder)
-x_decoder = Convolution1D(256, kernel_size=3, activation='relu',
-                          padding='causal', dilation_rate=4)(x_decoder)
-# Attention
-attention = Dot(axes=[2, 2])([x_decoder, x_encoder])
-attention = Activation('softmax')(attention)
-
-context = Dot(axes=[2, 1])([attention, x_encoder])
-decoder_combined_context = Concatenate(axis=-1)([context, x_decoder])
-
-decoder_outputs = Convolution1D(64, kernel_size=3, activation='relu',
-                                padding='causal')(decoder_combined_context)
-decoder_outputs = Convolution1D(64, kernel_size=3, activation='relu',
-                                padding='causal')(decoder_outputs)
-# Output
-decoder_dense = Dense(num_decoder_tokens, activation='softmax')
-decoder_outputs = decoder_dense(decoder_outputs)
-
-# Define the model that will turn
-# `encoder_input_data` & `decoder_input_data` into `decoder_target_data`
-model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
-model.summary()
-
-# Run training
-model.compile(optimizer='adam', loss='categorical_crossentropy')
-model.fit([encoder_input_data, decoder_input_data], decoder_target_data,
-          batch_size=batch_size,
-          epochs=epochs,
-          validation_split=0.2)
-# Save model
-model.save('cnn_s2s.h5')
-
-# Next: inference mode (sampling).
-
-# Define sampling models
-reverse_input_char_index = dict(
-    (i, char) for char, i in input_token_index.items())
-reverse_target_char_index = dict(
-    (i, char) for char, i in target_token_index.items())
-
-nb_examples = 100
-in_encoder = encoder_input_data[:nb_examples]
-in_decoder = np.zeros(
-    (len(input_texts), max_decoder_seq_length, num_decoder_tokens),
-    dtype='float32')
-
-in_decoder[:, 0, target_token_index["\t"]] = 1
-
-predict = np.zeros(
-    (len(input_texts), max_decoder_seq_length),
-    dtype='float32')
-
-for i in range(max_decoder_seq_length - 1):
-    predict = model.predict([in_encoder, in_decoder])
-    predict = predict.argmax(axis=-1)
-    predict_ = predict[:, i].ravel().tolist()
-    for j, x in enumerate(predict_):
-        in_decoder[j, i + 1, x] = 1
-
-for seq_index in range(nb_examples):
-    # Take one sequence (part of the training set)
-    # for trying out decoding.
-    output_seq = predict[seq_index, :].ravel().tolist()
-    decoded = []
-    for x in output_seq:
-        if reverse_target_char_index[x] == "\n":
-            break
-        else:
-            decoded.append(reverse_target_char_index[x])
-    decoded_sentence = "".join(decoded)
-    print('-')
-    print('Input sentence:', input_texts[seq_index])
-    print('Decoded sentence:', decoded_sentence)
diff --git a/examples/conv_filter_visualization.py b/examples/conv_filter_visualization.py
deleted file mode 100644
index cab8a5c7349..00000000000
--- a/examples/conv_filter_visualization.py
+++ /dev/null
@@ -1,259 +0,0 @@
-"""
-#Visualization of the filters of VGG16, via gradient ascent in input space.
-
-This script can run on CPU in a few minutes.
-
-Results example: ![Visualization](http://i.imgur.com/4nj4KjN.jpg)
-"""
-from __future__ import print_function
-
-import time
-import numpy as np
-from PIL import Image as pil_image
-from keras.preprocessing.image import save_img
-from keras import layers
-from keras.applications import vgg16
-from keras import backend as K
-
-
-def normalize(x):
-    """utility function to normalize a tensor.
-
-    # Arguments
-        x: An input tensor.
-
-    # Returns
-        The normalized input tensor.
-    """
-    return x / (K.sqrt(K.mean(K.square(x))) + K.epsilon())
-
-
-def deprocess_image(x):
-    """utility function to convert a float array into a valid uint8 image.
-
-    # Arguments
-        x: A numpy-array representing the generated image.
-
-    # Returns
-        A processed numpy-array, which could be used in e.g. imshow.
-    """
-    # normalize tensor: center on 0., ensure std is 0.25
-    x -= x.mean()
-    x /= (x.std() + K.epsilon())
-    x *= 0.25
-
-    # clip to [0, 1]
-    x += 0.5
-    x = np.clip(x, 0, 1)
-
-    # convert to RGB array
-    x *= 255
-    if K.image_data_format() == 'channels_first':
-        x = x.transpose((1, 2, 0))
-    x = np.clip(x, 0, 255).astype('uint8')
-    return x
-
-
-def process_image(x, former):
-    """utility function to convert a valid uint8 image back into a float array.
-       Reverses `deprocess_image`.
-
-    # Arguments
-        x: A numpy-array, which could be used in e.g. imshow.
-        former: The former numpy-array.
-                Need to determine the former mean and variance.
-
-    # Returns
-        A processed numpy-array representing the generated image.
-    """
-    if K.image_data_format() == 'channels_first':
-        x = x.transpose((2, 0, 1))
-    return (x / 255 - 0.5) * 4 * former.std() + former.mean()
-
-
-def visualize_layer(model,
-                    layer_name,
-                    step=1.,
-                    epochs=15,
-                    upscaling_steps=9,
-                    upscaling_factor=1.2,
-                    output_dim=(412, 412),
-                    filter_range=(0, None)):
-    """Visualizes the most relevant filters of one conv-layer in a certain model.
-
-    # Arguments
-        model: The model containing layer_name.
-        layer_name: The name of the layer to be visualized.
-                    Has to be a part of model.
-        step: step size for gradient ascent.
-        epochs: Number of iterations for gradient ascent.
-        upscaling_steps: Number of upscaling steps.
-                         Starting image is in this case (80, 80).
-        upscaling_factor: Factor to which to slowly upgrade
-                          the image towards output_dim.
-        output_dim: [img_width, img_height] The output image dimensions.
-        filter_range: Tupel[lower, upper]
-                      Determines the to be computed filter numbers.
-                      If the second value is `None`,
-                      the last filter will be inferred as the upper boundary.
-    """
-
-    def _generate_filter_image(input_img,
-                               layer_output,
-                               filter_index):
-        """Generates image for one particular filter.
-
-        # Arguments
-            input_img: The input-image Tensor.
-            layer_output: The output-image Tensor.
-            filter_index: The to be processed filter number.
-                          Assumed to be valid.
-
-        #Returns
-            Either None if no image could be generated.
-            or a tuple of the image (array) itself and the last loss.
-        """
-        s_time = time.time()
-
-        # we build a loss function that maximizes the activation
-        # of the nth filter of the layer considered
-        if K.image_data_format() == 'channels_first':
-            loss = K.mean(layer_output[:, filter_index, :, :])
-        else:
-            loss = K.mean(layer_output[:, :, :, filter_index])
-
-        # we compute the gradient of the input picture wrt this loss
-        grads = K.gradients(loss, input_img)[0]
-
-        # normalization trick: we normalize the gradient
-        grads = normalize(grads)
-
-        # this function returns the loss and grads given the input picture
-        iterate = K.function([input_img], [loss, grads])
-
-        # we start from a gray image with some random noise
-        intermediate_dim = tuple(
-            int(x / (upscaling_factor ** upscaling_steps)) for x in output_dim)
-        if K.image_data_format() == 'channels_first':
-            input_img_data = np.random.random(
-                (1, 3, intermediate_dim[0], intermediate_dim[1]))
-        else:
-            input_img_data = np.random.random(
-                (1, intermediate_dim[0], intermediate_dim[1], 3))
-        input_img_data = (input_img_data - 0.5) * 20 + 128
-
-        # Slowly upscaling towards the original size prevents
-        # a dominating high-frequency of the to visualized structure
-        # as it would occur if we directly compute the 412d-image.
-        # Behaves as a better starting point for each following dimension
-        # and therefore avoids poor local minima
-        for up in reversed(range(upscaling_steps)):
-            # we run gradient ascent for e.g. 20 steps
-            for _ in range(epochs):
-                loss_value, grads_value = iterate([input_img_data])
-                input_img_data += grads_value * step
-
-                # some filters get stuck to 0, we can skip them
-                if loss_value <= K.epsilon():
-                    return None
-
-            # Calculate upscaled dimension
-            intermediate_dim = tuple(
-                int(x / (upscaling_factor ** up)) for x in output_dim)
-            # Upscale
-            img = deprocess_image(input_img_data[0])
-            img = np.array(pil_image.fromarray(img).resize(intermediate_dim,
-                                                           pil_image.BICUBIC))
-            input_img_data = np.expand_dims(
-                process_image(img, input_img_data[0]), 0)
-
-        # decode the resulting input image
-        img = deprocess_image(input_img_data[0])
-        e_time = time.time()
-        print('Costs of filter {:3}: {:5.0f} ( {:4.2f}s )'.format(filter_index,
-                                                                  loss_value,
-                                                                  e_time - s_time))
-        return img, loss_value
-
-    def _draw_filters(filters, n=None):
-        """Draw the best filters in a nxn grid.
-
-        # Arguments
-            filters: A List of generated images and their corresponding losses
-                     for each processed filter.
-            n: dimension of the grid.
-               If none, the largest possible square will be used
-        """
-        if n is None:
-            n = int(np.floor(np.sqrt(len(filters))))
-
-        # the filters that have the highest loss are assumed to be better-looking.
-        # we will only keep the top n*n filters.
-        filters.sort(key=lambda x: x[1], reverse=True)
-        filters = filters[:n * n]
-
-        # build a black picture with enough space for
-        # e.g. our 8 x 8 filters of size 412 x 412, with a 5px margin in between
-        MARGIN = 5
-        width = n * output_dim[0] + (n - 1) * MARGIN
-        height = n * output_dim[1] + (n - 1) * MARGIN
-        stitched_filters = np.zeros((width, height, 3), dtype='uint8')
-
-        # fill the picture with our saved filters
-        for i in range(n):
-            for j in range(n):
-                img, _ = filters[i * n + j]
-                width_margin = (output_dim[0] + MARGIN) * i
-                height_margin = (output_dim[1] + MARGIN) * j
-                stitched_filters[
-                    width_margin: width_margin + output_dim[0],
-                    height_margin: height_margin + output_dim[1], :] = img
-
-        # save the result to disk
-        save_img('vgg_{0:}_{1:}x{1:}.png'.format(layer_name, n), stitched_filters)
-
-    # this is the placeholder for the input images
-    assert len(model.inputs) == 1
-    input_img = model.inputs[0]
-
-    # get the symbolic outputs of each "key" layer (we gave them unique names).
-    layer_dict = dict([(layer.name, layer) for layer in model.layers[1:]])
-
-    output_layer = layer_dict[layer_name]
-    assert isinstance(output_layer, layers.Conv2D)
-
-    # Compute to be processed filter range
-    filter_lower = filter_range[0]
-    filter_upper = (filter_range[1]
-                    if filter_range[1] is not None
-                    else len(output_layer.get_weights()[1]))
-    assert(filter_lower >= 0
-           and filter_upper <= len(output_layer.get_weights()[1])
-           and filter_upper > filter_lower)
-    print('Compute filters {:} to {:}'.format(filter_lower, filter_upper))
-
-    # iterate through each filter and generate its corresponding image
-    processed_filters = []
-    for f in range(filter_lower, filter_upper):
-        img_loss = _generate_filter_image(input_img, output_layer.output, f)
-
-        if img_loss is not None:
-            processed_filters.append(img_loss)
-
-    print('{} filter processed.'.format(len(processed_filters)))
-    # Finally draw and store the best filters to disk
-    _draw_filters(processed_filters)
-
-
-if __name__ == '__main__':
-    # the name of the layer we want to visualize
-    # (see model definition at keras/applications/vgg16.py)
-    LAYER_NAME = 'block5_conv1'
-
-    # build the VGG16 network with ImageNet weights
-    vgg = vgg16.VGG16(weights='imagenet', include_top=False)
-    print('Model loaded.')
-    vgg.summary()
-
-    # example function call
-    visualize_layer(vgg, LAYER_NAME)
diff --git a/examples/conv_lstm.py b/examples/conv_lstm.py
deleted file mode 100644
index 9df4f57db6c..00000000000
--- a/examples/conv_lstm.py
+++ /dev/null
@@ -1,143 +0,0 @@
-"""
-#This script demonstrates the use of a convolutional LSTM network.
-
-This network is used to predict the next frame of an artificially
-generated movie which contains moving squares.
-"""
-from keras.models import Sequential
-from keras.layers.convolutional import Conv3D
-from keras.layers.convolutional_recurrent import ConvLSTM2D
-from keras.layers.normalization import BatchNormalization
-import numpy as np
-import pylab as plt
-
-# We create a layer which take as input movies of shape
-# (n_frames, width, height, channels) and returns a movie
-# of identical shape.
-
-seq = Sequential()
-seq.add(ConvLSTM2D(filters=40, kernel_size=(3, 3),
-                   input_shape=(None, 40, 40, 1),
-                   padding='same', return_sequences=True))
-seq.add(BatchNormalization())
-
-seq.add(ConvLSTM2D(filters=40, kernel_size=(3, 3),
-                   padding='same', return_sequences=True))
-seq.add(BatchNormalization())
-
-seq.add(ConvLSTM2D(filters=40, kernel_size=(3, 3),
-                   padding='same', return_sequences=True))
-seq.add(BatchNormalization())
-
-seq.add(ConvLSTM2D(filters=40, kernel_size=(3, 3),
-                   padding='same', return_sequences=True))
-seq.add(BatchNormalization())
-
-seq.add(Conv3D(filters=1, kernel_size=(3, 3, 3),
-               activation='sigmoid',
-               padding='same', data_format='channels_last'))
-seq.compile(loss='binary_crossentropy', optimizer='adadelta')
-
-
-# Artificial data generation:
-# Generate movies with 3 to 7 moving squares inside.
-# The squares are of shape 1x1 or 2x2 pixels,
-# which move linearly over time.
-# For convenience we first create movies with bigger width and height (80x80)
-# and at the end we select a 40x40 window.
-
-def generate_movies(n_samples=1200, n_frames=15):
-    row = 80
-    col = 80
-    noisy_movies = np.zeros((n_samples, n_frames, row, col, 1), dtype=np.float)
-    shifted_movies = np.zeros((n_samples, n_frames, row, col, 1),
-                              dtype=np.float)
-
-    for i in range(n_samples):
-        # Add 3 to 7 moving squares
-        n = np.random.randint(3, 8)
-
-        for j in range(n):
-            # Initial position
-            xstart = np.random.randint(20, 60)
-            ystart = np.random.randint(20, 60)
-            # Direction of motion
-            directionx = np.random.randint(0, 3) - 1
-            directiony = np.random.randint(0, 3) - 1
-
-            # Size of the square
-            w = np.random.randint(2, 4)
-
-            for t in range(n_frames):
-                x_shift = xstart + directionx * t
-                y_shift = ystart + directiony * t
-                noisy_movies[i, t, x_shift - w: x_shift + w,
-                             y_shift - w: y_shift + w, 0] += 1
-
-                # Make it more robust by adding noise.
-                # The idea is that if during inference,
-                # the value of the pixel is not exactly one,
-                # we need to train the network to be robust and still
-                # consider it as a pixel belonging to a square.
-                if np.random.randint(0, 2):
-                    noise_f = (-1)**np.random.randint(0, 2)
-                    noisy_movies[i, t,
-                                 x_shift - w - 1: x_shift + w + 1,
-                                 y_shift - w - 1: y_shift + w + 1,
-                                 0] += noise_f * 0.1
-
-                # Shift the ground truth by 1
-                x_shift = xstart + directionx * (t + 1)
-                y_shift = ystart + directiony * (t + 1)
-                shifted_movies[i, t, x_shift - w: x_shift + w,
-                               y_shift - w: y_shift + w, 0] += 1
-
-    # Cut to a 40x40 window
-    noisy_movies = noisy_movies[::, ::, 20:60, 20:60, ::]
-    shifted_movies = shifted_movies[::, ::, 20:60, 20:60, ::]
-    noisy_movies[noisy_movies >= 1] = 1
-    shifted_movies[shifted_movies >= 1] = 1
-    return noisy_movies, shifted_movies
-
-# Train the network
-noisy_movies, shifted_movies = generate_movies(n_samples=1200)
-seq.fit(noisy_movies[:1000], shifted_movies[:1000], batch_size=10,
-        epochs=300, validation_split=0.05)
-
-# Testing the network on one movie
-# feed it with the first 7 positions and then
-# predict the new positions
-which = 1004
-track = noisy_movies[which][:7, ::, ::, ::]
-
-for j in range(16):
-    new_pos = seq.predict(track[np.newaxis, ::, ::, ::, ::])
-    new = new_pos[::, -1, ::, ::, ::]
-    track = np.concatenate((track, new), axis=0)
-
-
-# And then compare the predictions
-# to the ground truth
-track2 = noisy_movies[which][::, ::, ::, ::]
-for i in range(15):
-    fig = plt.figure(figsize=(10, 5))
-
-    ax = fig.add_subplot(121)
-
-    if i >= 7:
-        ax.text(1, 3, 'Predictions !', fontsize=20, color='w')
-    else:
-        ax.text(1, 3, 'Initial trajectory', fontsize=20)
-
-    toplot = track[i, ::, ::, 0]
-
-    plt.imshow(toplot)
-    ax = fig.add_subplot(122)
-    plt.text(1, 3, 'Ground truth', fontsize=20)
-
-    toplot = track2[i, ::, ::, 0]
-    if i >= 2:
-        toplot = shifted_movies[which][i - 1, ::, ::, 0]
-
-    plt.imshow(toplot)
-    plt.savefig('%i_animate.png' % (i + 1))
diff --git a/examples/deep_dream.py b/examples/deep_dream.py
deleted file mode 100644
index a0831ba3491..00000000000
--- a/examples/deep_dream.py
+++ /dev/null
@@ -1,192 +0,0 @@
-'''
-#Deep Dreaming in Keras.
-
-Run the script with:
-```python
-python deep_dream.py path_to_your_base_image.jpg prefix_for_results
-```
-e.g.:
-```python
-python deep_dream.py img/mypic.jpg results/dream
-```
-'''
-from __future__ import print_function
-
-from keras.preprocessing.image import load_img, save_img, img_to_array
-import numpy as np
-import scipy
-import argparse
-
-from keras.applications import inception_v3
-from keras import backend as K
-
-parser = argparse.ArgumentParser(description='Deep Dreams with Keras.')
-parser.add_argument('base_image_path', metavar='base', type=str,
-                    help='Path to the image to transform.')
-parser.add_argument('result_prefix', metavar='res_prefix', type=str,
-                    help='Prefix for the saved results.')
-
-args = parser.parse_args()
-base_image_path = args.base_image_path
-result_prefix = args.result_prefix
-
-# These are the names of the layers
-# for which we try to maximize activation,
-# as well as their weight in the final loss
-# we try to maximize.
-# You can tweak these setting to obtain new visual effects.
-settings = {
-    'features': {
-        'mixed2': 0.2,
-        'mixed3': 0.5,
-        'mixed4': 2.,
-        'mixed5': 1.5,
-    },
-}
-
-
-def preprocess_image(image_path):
-    # Util function to open, resize and format pictures
-    # into appropriate tensors.
-    img = load_img(image_path)
-    img = img_to_array(img)
-    img = np.expand_dims(img, axis=0)
-    img = inception_v3.preprocess_input(img)
-    return img
-
-
-def deprocess_image(x):
-    # Util function to convert a tensor into a valid image.
-    if K.image_data_format() == 'channels_first':
-        x = x.reshape((3, x.shape[2], x.shape[3]))
-        x = x.transpose((1, 2, 0))
-    else:
-        x = x.reshape((x.shape[1], x.shape[2], 3))
-    x /= 2.
-    x += 0.5
-    x *= 255.
-    x = np.clip(x, 0, 255).astype('uint8')
-    return x
-
-K.set_learning_phase(0)
-
-# Build the InceptionV3 network with our placeholder.
-# The model will be loaded with pre-trained ImageNet weights.
-model = inception_v3.InceptionV3(weights='imagenet',
-                                 include_top=False)
-dream = model.input
-print('Model loaded.')
-
-# Get the symbolic outputs of each "key" layer (we gave them unique names).
-layer_dict = dict([(layer.name, layer) for layer in model.layers])
-
-# Define the loss.
-loss = K.variable(0.)
-for layer_name in settings['features']:
-    # Add the L2 norm of the features of a layer to the loss.
-    if layer_name not in layer_dict:
-        raise ValueError('Layer ' + layer_name + ' not found in model.')
-    coeff = settings['features'][layer_name]
-    x = layer_dict[layer_name].output
-    # We avoid border artifacts by only involving non-border pixels in the loss.
-    scaling = K.prod(K.cast(K.shape(x), 'float32'))
-    if K.image_data_format() == 'channels_first':
-        loss = loss + coeff * K.sum(K.square(x[:, :, 2: -2, 2: -2])) / scaling
-    else:
-        loss = loss + coeff * K.sum(K.square(x[:, 2: -2, 2: -2, :])) / scaling
-
-# Compute the gradients of the dream wrt the loss.
-grads = K.gradients(loss, dream)[0]
-# Normalize gradients.
-grads /= K.maximum(K.mean(K.abs(grads)), K.epsilon())
-
-# Set up function to retrieve the value
-# of the loss and gradients given an input image.
-outputs = [loss, grads]
-fetch_loss_and_grads = K.function([dream], outputs)
-
-
-def eval_loss_and_grads(x):
-    outs = fetch_loss_and_grads([x])
-    loss_value = outs[0]
-    grad_values = outs[1]
-    return loss_value, grad_values
-
-
-def resize_img(img, size):
-    img = np.copy(img)
-    if K.image_data_format() == 'channels_first':
-        factors = (1, 1,
-                   float(size[0]) / img.shape[2],
-                   float(size[1]) / img.shape[3])
-    else:
-        factors = (1,
-                   float(size[0]) / img.shape[1],
-                   float(size[1]) / img.shape[2],
-                   1)
-    return scipy.ndimage.zoom(img, factors, order=1)
-
-
-def gradient_ascent(x, iterations, step, max_loss=None):
-    for i in range(iterations):
-        loss_value, grad_values = eval_loss_and_grads(x)
-        if max_loss is not None and loss_value > max_loss:
-            break
-        print('..Loss value at', i, ':', loss_value)
-        x += step * grad_values
-    return x
-
-
-"""Process:
-
-- Load the original image.
-- Define a number of processing scales (i.e. image shapes),
-    from smallest to largest.
-- Resize the original image to the smallest scale.
-- For every scale, starting with the smallest (i.e. current one):
-    - Run gradient ascent
-    - Upscale image to the next scale
-    - Reinject the detail that was lost at upscaling time
-- Stop when we are back to the original size.
-
-To obtain the detail lost during upscaling, we simply
-take the original image, shrink it down, upscale it,
-and compare the result to the (resized) original image.
-"""
-
-
-# Playing with these hyperparameters will also allow you to achieve new effects
-step = 0.01  # Gradient ascent step size
-num_octave = 3  # Number of scales at which to run gradient ascent
-octave_scale = 1.4  # Size ratio between scales
-iterations = 20  # Number of ascent steps per scale
-max_loss = 10.
-
-img = preprocess_image(base_image_path)
-if K.image_data_format() == 'channels_first':
-    original_shape = img.shape[2:]
-else:
-    original_shape = img.shape[1:3]
-successive_shapes = [original_shape]
-for i in range(1, num_octave):
-    shape = tuple([int(dim / (octave_scale ** i)) for dim in original_shape])
-    successive_shapes.append(shape)
-successive_shapes = successive_shapes[::-1]
-original_img = np.copy(img)
-shrunk_original_img = resize_img(img, successive_shapes[0])
-
-for shape in successive_shapes:
-    print('Processing image shape', shape)
-    img = resize_img(img, shape)
-    img = gradient_ascent(img,
-                          iterations=iterations,
-                          step=step,
-                          max_loss=max_loss)
-    upscaled_shrunk_original_img = resize_img(shrunk_original_img, shape)
-    same_size_original = resize_img(original_img, shape)
-    lost_detail = same_size_original - upscaled_shrunk_original_img
-
-    img += lost_detail
-    shrunk_original_img = resize_img(original_img, shape)
-
-save_img(result_prefix + '.png', deprocess_image(np.copy(img)))
diff --git a/examples/image_ocr.py b/examples/image_ocr.py
deleted file mode 100644
index 3d21e6b0a02..00000000000
--- a/examples/image_ocr.py
+++ /dev/null
@@ -1,561 +0,0 @@
-# -*- coding: utf-8 -*-
-'''
-# Optical character recognition
-This example uses a convolutional stack followed by a recurrent stack
-and a CTC logloss function to perform optical character recognition
-of generated text images. I have no evidence of whether it actually
-learns general shapes of text, or just is able to recognize all
-the different fonts thrown at it...the purpose is more to demonstrate CTC
-inside of Keras.  Note that the font list may need to be updated
-for the particular OS in use.
-
-This starts off with 4 letter words.  For the first 12 epochs, the
-difficulty is gradually increased using the TextImageGenerator class
-which is both a generator class for test/train data and a Keras
-callback class. After 20 epochs, longer sequences are thrown at it
-by recompiling the model to handle a wider image and rebuilding
-the word list to include two words separated by a space.
-
-The table below shows normalized edit distance values. Theano uses
-a slightly different CTC implementation, hence the different results.
-
-Epoch |   TF   |   TH
------:|-------:|-------:
-    10|  0.027 | 0.064
-    15|  0.038 | 0.035
-    20|  0.043 | 0.045
-    25|  0.014 | 0.019
-
-# Additional dependencies
-
-This requires ```cairo``` and ```editdistance``` packages:
-
-First, install the Cairo library: https://cairographics.org/
-
-Then install Python dependencies:
-
-```python
-pip install cairocffi
-pip install editdistance
-```
-
-Created by Mike Henry
-https://github.com/mbhenry/
-'''
-import os
-import itertools
-import codecs
-import re
-import datetime
-import cairocffi as cairo
-import editdistance
-import numpy as np
-from scipy import ndimage
-import pylab
-from keras import backend as K
-from keras.layers.convolutional import Conv2D, MaxPooling2D
-from keras.layers import Input, Dense, Activation
-from keras.layers import Reshape, Lambda
-from keras.layers.merge import add, concatenate
-from keras.models import Model
-from keras.layers.recurrent import GRU
-from keras.optimizers import SGD
-from keras.utils.data_utils import get_file
-from keras.preprocessing import image
-import keras.callbacks
-
-
-OUTPUT_DIR = 'image_ocr'
-
-# character classes and matching regex filter
-regex = r'^[a-z ]+$'
-alphabet = u'abcdefghijklmnopqrstuvwxyz '
-
-np.random.seed(55)
-
-
-# this creates larger "blotches" of noise which look
-# more realistic than just adding gaussian noise
-# assumes greyscale with pixels ranging from 0 to 1
-
-def speckle(img):
-    severity = np.random.uniform(0, 0.6)
-    blur = ndimage.gaussian_filter(np.random.randn(*img.shape) * severity, 1)
-    img_speck = (img + blur)
-    img_speck[img_speck > 1] = 1
-    img_speck[img_speck <= 0] = 0
-    return img_speck
-
-
-# paints the string in a random location the bounding box
-# also uses a random font, a slight random rotation,
-# and a random amount of speckle noise
-
-def paint_text(text, w, h, rotate=False, ud=False, multi_fonts=False):
-    surface = cairo.ImageSurface(cairo.FORMAT_RGB24, w, h)
-    with cairo.Context(surface) as context:
-        context.set_source_rgb(1, 1, 1)  # White
-        context.paint()
-        # this font list works in CentOS 7
-        if multi_fonts:
-            fonts = [
-                'Century Schoolbook', 'Courier', 'STIX',
-                'URW Chancery L', 'FreeMono']
-            context.select_font_face(
-                np.random.choice(fonts),
-                cairo.FONT_SLANT_NORMAL,
-                np.random.choice([cairo.FONT_WEIGHT_BOLD, cairo.FONT_WEIGHT_NORMAL]))
-        else:
-            context.select_font_face('Courier',
-                                     cairo.FONT_SLANT_NORMAL,
-                                     cairo.FONT_WEIGHT_BOLD)
-        context.set_font_size(25)
-        box = context.text_extents(text)
-        border_w_h = (4, 4)
-        if box[2] > (w - 2 * border_w_h[1]) or box[3] > (h - 2 * border_w_h[0]):
-            raise IOError(('Could not fit string into image.'
-                           'Max char count is too large for given image width.'))
-
-        # teach the RNN translational invariance by
-        # fitting text box randomly on canvas, with some room to rotate
-        max_shift_x = w - box[2] - border_w_h[0]
-        max_shift_y = h - box[3] - border_w_h[1]
-        top_left_x = np.random.randint(0, int(max_shift_x))
-        if ud:
-            top_left_y = np.random.randint(0, int(max_shift_y))
-        else:
-            top_left_y = h // 2
-        context.move_to(top_left_x - int(box[0]), top_left_y - int(box[1]))
-        context.set_source_rgb(0, 0, 0)
-        context.show_text(text)
-
-    buf = surface.get_data()
-    a = np.frombuffer(buf, np.uint8)
-    a.shape = (h, w, 4)
-    a = a[:, :, 0]  # grab single channel
-    a = a.astype(np.float32) / 255
-    a = np.expand_dims(a, 0)
-    if rotate:
-        a = image.random_rotation(a, 3 * (w - top_left_x) / w + 1)
-    a = speckle(a)
-
-    return a
-
-
-def shuffle_mats_or_lists(matrix_list, stop_ind=None):
-    ret = []
-    assert all([len(i) == len(matrix_list[0]) for i in matrix_list])
-    len_val = len(matrix_list[0])
-    if stop_ind is None:
-        stop_ind = len_val
-    assert stop_ind <= len_val
-
-    a = list(range(stop_ind))
-    np.random.shuffle(a)
-    a += list(range(stop_ind, len_val))
-    for mat in matrix_list:
-        if isinstance(mat, np.ndarray):
-            ret.append(mat[a])
-        elif isinstance(mat, list):
-            ret.append([mat[i] for i in a])
-        else:
-            raise TypeError('`shuffle_mats_or_lists` only supports '
-                            'numpy.array and list objects.')
-    return ret
-
-
-# Translation of characters to unique integer values
-def text_to_labels(text):
-    ret = []
-    for char in text:
-        ret.append(alphabet.find(char))
-    return ret
-
-
-# Reverse translation of numerical classes back to characters
-def labels_to_text(labels):
-    ret = []
-    for c in labels:
-        if c == len(alphabet):  # CTC Blank
-            ret.append("")
-        else:
-            ret.append(alphabet[c])
-    return "".join(ret)
-
-
-# only a-z and space..probably not to difficult
-# to expand to uppercase and symbols
-
-def is_valid_str(in_str):
-    search = re.compile(regex, re.UNICODE).search
-    return bool(search(in_str))
-
-
-# Uses generator functions to supply train/test with
-# data. Image renderings and text are created on the fly
-# each time with random perturbations
-
-class TextImageGenerator(keras.callbacks.Callback):
-
-    def __init__(self, monogram_file, bigram_file, minibatch_size,
-                 img_w, img_h, downsample_factor, val_split,
-                 absolute_max_string_len=16):
-
-        self.minibatch_size = minibatch_size
-        self.img_w = img_w
-        self.img_h = img_h
-        self.monogram_file = monogram_file
-        self.bigram_file = bigram_file
-        self.downsample_factor = downsample_factor
-        self.val_split = val_split
-        self.blank_label = self.get_output_size() - 1
-        self.absolute_max_string_len = absolute_max_string_len
-
-    def get_output_size(self):
-        return len(alphabet) + 1
-
-    # num_words can be independent of the epoch size due to the use of generators
-    # as max_string_len grows, num_words can grow
-    def build_word_list(self, num_words, max_string_len=None, mono_fraction=0.5):
-        assert max_string_len <= self.absolute_max_string_len
-        assert num_words % self.minibatch_size == 0
-        assert (self.val_split * num_words) % self.minibatch_size == 0
-        self.num_words = num_words
-        self.string_list = [''] * self.num_words
-        tmp_string_list = []
-        self.max_string_len = max_string_len
-        self.Y_data = np.ones([self.num_words, self.absolute_max_string_len]) * -1
-        self.X_text = []
-        self.Y_len = [0] * self.num_words
-
-        def _is_length_of_word_valid(word):
-            return (max_string_len == -1 or
-                    max_string_len is None or
-                    len(word) <= max_string_len)
-
-        # monogram file is sorted by frequency in english speech
-        with codecs.open(self.monogram_file, mode='r', encoding='utf-8') as f:
-            for line in f:
-                if len(tmp_string_list) == int(self.num_words * mono_fraction):
-                    break
-                word = line.rstrip()
-                if _is_length_of_word_valid(word):
-                    tmp_string_list.append(word)
-
-        # bigram file contains common word pairings in english speech
-        with codecs.open(self.bigram_file, mode='r', encoding='utf-8') as f:
-            lines = f.readlines()
-            for line in lines:
-                if len(tmp_string_list) == self.num_words:
-                    break
-                columns = line.lower().split()
-                word = columns[0] + ' ' + columns[1]
-                if is_valid_str(word) and _is_length_of_word_valid(word):
-                    tmp_string_list.append(word)
-        if len(tmp_string_list) != self.num_words:
-            raise IOError('Could not pull enough words'
-                          'from supplied monogram and bigram files.')
-        # interlace to mix up the easy and hard words
-        self.string_list[::2] = tmp_string_list[:self.num_words // 2]
-        self.string_list[1::2] = tmp_string_list[self.num_words // 2:]
-
-        for i, word in enumerate(self.string_list):
-            self.Y_len[i] = len(word)
-            self.Y_data[i, 0:len(word)] = text_to_labels(word)
-            self.X_text.append(word)
-        self.Y_len = np.expand_dims(np.array(self.Y_len), 1)
-
-        self.cur_val_index = self.val_split
-        self.cur_train_index = 0
-
-    # each time an image is requested from train/val/test, a new random
-    # painting of the text is performed
-    def get_batch(self, index, size, train):
-        # width and height are backwards from typical Keras convention
-        # because width is the time dimension when it gets fed into the RNN
-        if K.image_data_format() == 'channels_first':
-            X_data = np.ones([size, 1, self.img_w, self.img_h])
-        else:
-            X_data = np.ones([size, self.img_w, self.img_h, 1])
-
-        labels = np.ones([size, self.absolute_max_string_len])
-        input_length = np.zeros([size, 1])
-        label_length = np.zeros([size, 1])
-        source_str = []
-        for i in range(size):
-            # Mix in some blank inputs.  This seems to be important for
-            # achieving translational invariance
-            if train and i > size - 4:
-                if K.image_data_format() == 'channels_first':
-                    X_data[i, 0, 0:self.img_w, :] = self.paint_func('')[0, :, :].T
-                else:
-                    X_data[i, 0:self.img_w, :, 0] = self.paint_func('',)[0, :, :].T
-                labels[i, 0] = self.blank_label
-                input_length[i] = self.img_w // self.downsample_factor - 2
-                label_length[i] = 1
-                source_str.append('')
-            else:
-                if K.image_data_format() == 'channels_first':
-                    X_data[i, 0, 0:self.img_w, :] = (
-                        self.paint_func(self.X_text[index + i])[0, :, :].T)
-                else:
-                    X_data[i, 0:self.img_w, :, 0] = (
-                        self.paint_func(self.X_text[index + i])[0, :, :].T)
-                labels[i, :] = self.Y_data[index + i]
-                input_length[i] = self.img_w // self.downsample_factor - 2
-                label_length[i] = self.Y_len[index + i]
-                source_str.append(self.X_text[index + i])
-        inputs = {'the_input': X_data,
-                  'the_labels': labels,
-                  'input_length': input_length,
-                  'label_length': label_length,
-                  'source_str': source_str  # used for visualization only
-                  }
-        outputs = {'ctc': np.zeros([size])}  # dummy data for dummy loss function
-        return (inputs, outputs)
-
-    def next_train(self):
-        while 1:
-            ret = self.get_batch(self.cur_train_index,
-                                 self.minibatch_size, train=True)
-            self.cur_train_index += self.minibatch_size
-            if self.cur_train_index >= self.val_split:
-                self.cur_train_index = self.cur_train_index % 32
-                (self.X_text, self.Y_data, self.Y_len) = shuffle_mats_or_lists(
-                    [self.X_text, self.Y_data, self.Y_len], self.val_split)
-            yield ret
-
-    def next_val(self):
-        while 1:
-            ret = self.get_batch(self.cur_val_index,
-                                 self.minibatch_size, train=False)
-            self.cur_val_index += self.minibatch_size
-            if self.cur_val_index >= self.num_words:
-                self.cur_val_index = self.val_split + self.cur_val_index % 32
-            yield ret
-
-    def on_train_begin(self, logs={}):
-        self.build_word_list(16000, 4, 1)
-        self.paint_func = lambda text: paint_text(
-            text, self.img_w, self.img_h,
-            rotate=False, ud=False, multi_fonts=False)
-
-    def on_epoch_begin(self, epoch, logs={}):
-        # rebind the paint function to implement curriculum learning
-        if 3 <= epoch < 6:
-            self.paint_func = lambda text: paint_text(
-                text, self.img_w, self.img_h,
-                rotate=False, ud=True, multi_fonts=False)
-        elif 6 <= epoch < 9:
-            self.paint_func = lambda text: paint_text(
-                text, self.img_w, self.img_h,
-                rotate=False, ud=True, multi_fonts=True)
-        elif epoch >= 9:
-            self.paint_func = lambda text: paint_text(
-                text, self.img_w, self.img_h,
-                rotate=True, ud=True, multi_fonts=True)
-        if epoch >= 21 and self.max_string_len < 12:
-            self.build_word_list(32000, 12, 0.5)
-
-
-# the actual loss calc occurs here despite it not being
-# an internal Keras loss function
-
-def ctc_lambda_func(args):
-    y_pred, labels, input_length, label_length = args
-    # the 2 is critical here since the first couple outputs of the RNN
-    # tend to be garbage:
-    y_pred = y_pred[:, 2:, :]
-    return K.ctc_batch_cost(labels, y_pred, input_length, label_length)
-
-
-# For a real OCR application, this should be beam search with a dictionary
-# and language model.  For this example, best path is sufficient.
-
-def decode_batch(test_func, word_batch):
-    out = test_func([word_batch])[0]
-    ret = []
-    for j in range(out.shape[0]):
-        out_best = list(np.argmax(out[j, 2:], 1))
-        out_best = [k for k, g in itertools.groupby(out_best)]
-        outstr = labels_to_text(out_best)
-        ret.append(outstr)
-    return ret
-
-
-class VizCallback(keras.callbacks.Callback):
-
-    def __init__(self, run_name, test_func, text_img_gen, num_display_words=6):
-        self.test_func = test_func
-        self.output_dir = os.path.join(
-            OUTPUT_DIR, run_name)
-        self.text_img_gen = text_img_gen
-        self.num_display_words = num_display_words
-        if not os.path.exists(self.output_dir):
-            os.makedirs(self.output_dir)
-
-    def show_edit_distance(self, num):
-        num_left = num
-        mean_norm_ed = 0.0
-        mean_ed = 0.0
-        while num_left > 0:
-            word_batch = next(self.text_img_gen)[0]
-            num_proc = min(word_batch['the_input'].shape[0], num_left)
-            decoded_res = decode_batch(self.test_func,
-                                       word_batch['the_input'][0:num_proc])
-            for j in range(num_proc):
-                edit_dist = editdistance.eval(decoded_res[j],
-                                              word_batch['source_str'][j])
-                mean_ed += float(edit_dist)
-                mean_norm_ed += float(edit_dist) / len(word_batch['source_str'][j])
-            num_left -= num_proc
-        mean_norm_ed = mean_norm_ed / num
-        mean_ed = mean_ed / num
-        print('\nOut of %d samples:  Mean edit distance:'
-              '%.3f Mean normalized edit distance: %0.3f'
-              % (num, mean_ed, mean_norm_ed))
-
-    def on_epoch_end(self, epoch, logs={}):
-        self.model.save_weights(
-            os.path.join(self.output_dir, 'weights%02d.h5' % (epoch)))
-        self.show_edit_distance(256)
-        word_batch = next(self.text_img_gen)[0]
-        res = decode_batch(self.test_func,
-                           word_batch['the_input'][0:self.num_display_words])
-        if word_batch['the_input'][0].shape[0] < 256:
-            cols = 2
-        else:
-            cols = 1
-        for i in range(self.num_display_words):
-            pylab.subplot(self.num_display_words // cols, cols, i + 1)
-            if K.image_data_format() == 'channels_first':
-                the_input = word_batch['the_input'][i, 0, :, :]
-            else:
-                the_input = word_batch['the_input'][i, :, :, 0]
-            pylab.imshow(the_input.T, cmap='Greys_r')
-            pylab.xlabel(
-                'Truth = \'%s\'\nDecoded = \'%s\'' %
-                (word_batch['source_str'][i], res[i]))
-        fig = pylab.gcf()
-        fig.set_size_inches(10, 13)
-        pylab.savefig(os.path.join(self.output_dir, 'e%02d.png' % (epoch)))
-        pylab.close()
-
-
-def train(run_name, start_epoch, stop_epoch, img_w):
-    # Input Parameters
-    img_h = 64
-    words_per_epoch = 16000
-    val_split = 0.2
-    val_words = int(words_per_epoch * (val_split))
-
-    # Network parameters
-    conv_filters = 16
-    kernel_size = (3, 3)
-    pool_size = 2
-    time_dense_size = 32
-    rnn_size = 512
-    minibatch_size = 32
-
-    if K.image_data_format() == 'channels_first':
-        input_shape = (1, img_w, img_h)
-    else:
-        input_shape = (img_w, img_h, 1)
-
-    fdir = os.path.dirname(
-        get_file('wordlists.tgz',
-                 origin='http://www.mythic-ai.com/datasets/wordlists.tgz',
-                 untar=True))
-
-    img_gen = TextImageGenerator(
-        monogram_file=os.path.join(fdir, 'wordlist_mono_clean.txt'),
-        bigram_file=os.path.join(fdir, 'wordlist_bi_clean.txt'),
-        minibatch_size=minibatch_size,
-        img_w=img_w,
-        img_h=img_h,
-        downsample_factor=(pool_size ** 2),
-        val_split=words_per_epoch - val_words)
-    act = 'relu'
-    input_data = Input(name='the_input', shape=input_shape, dtype='float32')
-    inner = Conv2D(conv_filters, kernel_size, padding='same',
-                   activation=act, kernel_initializer='he_normal',
-                   name='conv1')(input_data)
-    inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max1')(inner)
-    inner = Conv2D(conv_filters, kernel_size, padding='same',
-                   activation=act, kernel_initializer='he_normal',
-                   name='conv2')(inner)
-    inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max2')(inner)
-
-    conv_to_rnn_dims = (img_w // (pool_size ** 2),
-                        (img_h // (pool_size ** 2)) * conv_filters)
-    inner = Reshape(target_shape=conv_to_rnn_dims, name='reshape')(inner)
-
-    # cuts down input size going into RNN:
-    inner = Dense(time_dense_size, activation=act, name='dense1')(inner)
-
-    # Two layers of bidirectional GRUs
-    # GRU seems to work as well, if not better than LSTM:
-    gru_1 = GRU(rnn_size, return_sequences=True,
-                kernel_initializer='he_normal', name='gru1')(inner)
-    gru_1b = GRU(rnn_size, return_sequences=True,
-                 go_backwards=True, kernel_initializer='he_normal',
-                 name='gru1_b')(inner)
-    gru1_merged = add([gru_1, gru_1b])
-    gru_2 = GRU(rnn_size, return_sequences=True,
-                kernel_initializer='he_normal', name='gru2')(gru1_merged)
-    gru_2b = GRU(rnn_size, return_sequences=True, go_backwards=True,
-                 kernel_initializer='he_normal', name='gru2_b')(gru1_merged)
-
-    # transforms RNN output to character activations:
-    inner = Dense(img_gen.get_output_size(), kernel_initializer='he_normal',
-                  name='dense2')(concatenate([gru_2, gru_2b]))
-    y_pred = Activation('softmax', name='softmax')(inner)
-    Model(inputs=input_data, outputs=y_pred).summary()
-
-    labels = Input(name='the_labels',
-                   shape=[img_gen.absolute_max_string_len], dtype='float32')
-    input_length = Input(name='input_length', shape=[1], dtype='int64')
-    label_length = Input(name='label_length', shape=[1], dtype='int64')
-    # Keras doesn't currently support loss funcs with extra parameters
-    # so CTC loss is implemented in a lambda layer
-    loss_out = Lambda(
-        ctc_lambda_func, output_shape=(1,),
-        name='ctc')([y_pred, labels, input_length, label_length])
-
-    # clipnorm seems to speeds up convergence
-    sgd = SGD(learning_rate=0.02,
-              decay=1e-6,
-              momentum=0.9,
-              nesterov=True)
-
-    model = Model(inputs=[input_data, labels, input_length, label_length],
-                  outputs=loss_out)
-
-    # the loss calc occurs elsewhere, so use a dummy lambda func for the loss
-    model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=sgd)
-    if start_epoch > 0:
-        weight_file = os.path.join(
-            OUTPUT_DIR,
-            os.path.join(run_name, 'weights%02d.h5' % (start_epoch - 1)))
-        model.load_weights(weight_file)
-    # captures output of softmax so we can decode the output during visualization
-    test_func = K.function([input_data], [y_pred])
-
-    viz_cb = VizCallback(run_name, test_func, img_gen.next_val())
-
-    model.fit_generator(
-        generator=img_gen.next_train(),
-        steps_per_epoch=(words_per_epoch - val_words) // minibatch_size,
-        epochs=stop_epoch,
-        validation_data=img_gen.next_val(),
-        validation_steps=val_words // minibatch_size,
-        callbacks=[viz_cb, img_gen],
-        initial_epoch=start_epoch)
-
-
-if __name__ == '__main__':
-    run_name = datetime.datetime.now().strftime('%Y:%m:%d:%H:%M:%S')
-    train(run_name, 0, 20, 128)
-    # increase to wider images and start at epoch 20.
-    # The learned weights are reloaded
-    train(run_name, 20, 25, 512)
diff --git a/examples/imdb_bidirectional_lstm.py b/examples/imdb_bidirectional_lstm.py
deleted file mode 100644
index 2841c904637..00000000000
--- a/examples/imdb_bidirectional_lstm.py
+++ /dev/null
@@ -1,49 +0,0 @@
-'''
-#Trains a Bidirectional LSTM on the IMDB sentiment classification task.
-
-Output after 4 epochs on CPU: ~0.8146
-Time per epoch on CPU (Core i7): ~150s.
-'''
-
-from __future__ import print_function
-import numpy as np
-
-from keras.preprocessing import sequence
-from keras.models import Sequential
-from keras.layers import Dense, Dropout, Embedding, LSTM, Bidirectional
-from keras.datasets import imdb
-
-
-max_features = 20000
-# cut texts after this number of words
-# (among top max_features most common words)
-maxlen = 100
-batch_size = 32
-
-print('Loading data...')
-(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
-print(len(x_train), 'train sequences')
-print(len(x_test), 'test sequences')
-
-print('Pad sequences (samples x time)')
-x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
-x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
-print('x_train shape:', x_train.shape)
-print('x_test shape:', x_test.shape)
-y_train = np.array(y_train)
-y_test = np.array(y_test)
-
-model = Sequential()
-model.add(Embedding(max_features, 128, input_length=maxlen))
-model.add(Bidirectional(LSTM(64)))
-model.add(Dropout(0.5))
-model.add(Dense(1, activation='sigmoid'))
-
-# try using different optimizers and different optimizer configs
-model.compile('adam', 'binary_crossentropy', metrics=['accuracy'])
-
-print('Train...')
-model.fit(x_train, y_train,
-          batch_size=batch_size,
-          epochs=4,
-          validation_data=[x_test, y_test])
diff --git a/examples/imdb_cnn.py b/examples/imdb_cnn.py
deleted file mode 100644
index 201cdd15772..00000000000
--- a/examples/imdb_cnn.py
+++ /dev/null
@@ -1,73 +0,0 @@
-'''
-#This example demonstrates the use of Convolution1D for text classification.
-
-Gets to 0.89 test accuracy after 2 epochs. </br>
-90s/epoch on Intel i5 2.4Ghz CPU. </br>
-10s/epoch on Tesla K40 GPU.
-'''
-from __future__ import print_function
-
-from keras.preprocessing import sequence
-from keras.models import Sequential
-from keras.layers import Dense, Dropout, Activation
-from keras.layers import Embedding
-from keras.layers import Conv1D, GlobalMaxPooling1D
-from keras.datasets import imdb
-
-# set parameters:
-max_features = 5000
-maxlen = 400
-batch_size = 32
-embedding_dims = 50
-filters = 250
-kernel_size = 3
-hidden_dims = 250
-epochs = 2
-
-print('Loading data...')
-(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
-print(len(x_train), 'train sequences')
-print(len(x_test), 'test sequences')
-
-print('Pad sequences (samples x time)')
-x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
-x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
-print('x_train shape:', x_train.shape)
-print('x_test shape:', x_test.shape)
-
-print('Build model...')
-model = Sequential()
-
-# we start off with an efficient embedding layer which maps
-# our vocab indices into embedding_dims dimensions
-model.add(Embedding(max_features,
-                    embedding_dims,
-                    input_length=maxlen))
-model.add(Dropout(0.2))
-
-# we add a Convolution1D, which will learn filters
-# word group filters of size filter_length:
-model.add(Conv1D(filters,
-                 kernel_size,
-                 padding='valid',
-                 activation='relu',
-                 strides=1))
-# we use max pooling:
-model.add(GlobalMaxPooling1D())
-
-# We add a vanilla hidden layer:
-model.add(Dense(hidden_dims))
-model.add(Dropout(0.2))
-model.add(Activation('relu'))
-
-# We project onto a single unit output layer, and squash it with a sigmoid:
-model.add(Dense(1))
-model.add(Activation('sigmoid'))
-
-model.compile(loss='binary_crossentropy',
-              optimizer='adam',
-              metrics=['accuracy'])
-model.fit(x_train, y_train,
-          batch_size=batch_size,
-          epochs=epochs,
-          validation_data=(x_test, y_test))
diff --git a/examples/imdb_cnn_lstm.py b/examples/imdb_cnn_lstm.py
deleted file mode 100644
index f0a990bbbf0..00000000000
--- a/examples/imdb_cnn_lstm.py
+++ /dev/null
@@ -1,76 +0,0 @@
-'''
-#Train a recurrent convolutional network on the IMDB sentiment classification task.
-
-Gets to 0.8498 test accuracy after 2 epochs. 41 s/epoch on K520 GPU.
-'''
-from __future__ import print_function
-
-from keras.preprocessing import sequence
-from keras.models import Sequential
-from keras.layers import Dense, Dropout, Activation
-from keras.layers import Embedding
-from keras.layers import LSTM
-from keras.layers import Conv1D, MaxPooling1D
-from keras.datasets import imdb
-
-# Embedding
-max_features = 20000
-maxlen = 100
-embedding_size = 128
-
-# Convolution
-kernel_size = 5
-filters = 64
-pool_size = 4
-
-# LSTM
-lstm_output_size = 70
-
-# Training
-batch_size = 30
-epochs = 2
-
-'''
-Note:
-batch_size is highly sensitive.
-Only 2 epochs are needed as the dataset is very small.
-'''
-
-print('Loading data...')
-(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
-print(len(x_train), 'train sequences')
-print(len(x_test), 'test sequences')
-
-print('Pad sequences (samples x time)')
-x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
-x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
-print('x_train shape:', x_train.shape)
-print('x_test shape:', x_test.shape)
-
-print('Build model...')
-
-model = Sequential()
-model.add(Embedding(max_features, embedding_size, input_length=maxlen))
-model.add(Dropout(0.25))
-model.add(Conv1D(filters,
-                 kernel_size,
-                 padding='valid',
-                 activation='relu',
-                 strides=1))
-model.add(MaxPooling1D(pool_size=pool_size))
-model.add(LSTM(lstm_output_size))
-model.add(Dense(1))
-model.add(Activation('sigmoid'))
-
-model.compile(loss='binary_crossentropy',
-              optimizer='adam',
-              metrics=['accuracy'])
-
-print('Train...')
-model.fit(x_train, y_train,
-          batch_size=batch_size,
-          epochs=epochs,
-          validation_data=(x_test, y_test))
-score, acc = model.evaluate(x_test, y_test, batch_size=batch_size)
-print('Test score:', score)
-print('Test accuracy:', acc)
diff --git a/examples/imdb_fasttext.py b/examples/imdb_fasttext.py
deleted file mode 100644
index 86cfbc61cd8..00000000000
--- a/examples/imdb_fasttext.py
+++ /dev/null
@@ -1,144 +0,0 @@
-'''
-#This example demonstrates the use of fasttext for text classification
-
-Based on Joulin et al's paper:
-
-[Bags of Tricks for Efficient Text Classification
-](https://arxiv.org/abs/1607.01759)
-
-Results on IMDB datasets with uni and bi-gram embeddings:
-
-Embedding|Accuracy, 5 epochs|Speed (s/epoch)|Hardware
-:--------|-----------------:|----:|:-------
-Uni-gram |            0.8813|    8|i7 CPU
-Bi-gram  |            0.9056|    2|GTx 980M GPU
-
-'''
-
-from __future__ import print_function
-import numpy as np
-
-from keras.preprocessing import sequence
-from keras.models import Sequential
-from keras.layers import Dense
-from keras.layers import Embedding
-from keras.layers import GlobalAveragePooling1D
-from keras.datasets import imdb
-
-
-def create_ngram_set(input_list, ngram_value=2):
-    """
-    Extract a set of n-grams from a list of integers.
-
-    >>> create_ngram_set([1, 4, 9, 4, 1, 4], ngram_value=2)
-    {(4, 9), (4, 1), (1, 4), (9, 4)}
-
-    >>> create_ngram_set([1, 4, 9, 4, 1, 4], ngram_value=3)
-    [(1, 4, 9), (4, 9, 4), (9, 4, 1), (4, 1, 4)]
-    """
-    return set(zip(*[input_list[i:] for i in range(ngram_value)]))
-
-
-def add_ngram(sequences, token_indice, ngram_range=2):
-    """
-    Augment the input list of list (sequences) by appending n-grams values.
-
-    Example: adding bi-gram
-    >>> sequences = [[1, 3, 4, 5], [1, 3, 7, 9, 2]]
-    >>> token_indice = {(1, 3): 1337, (9, 2): 42, (4, 5): 2017}
-    >>> add_ngram(sequences, token_indice, ngram_range=2)
-    [[1, 3, 4, 5, 1337, 2017], [1, 3, 7, 9, 2, 1337, 42]]
-
-    Example: adding tri-gram
-    >>> sequences = [[1, 3, 4, 5], [1, 3, 7, 9, 2]]
-    >>> token_indice = {(1, 3): 1337, (9, 2): 42, (4, 5): 2017, (7, 9, 2): 2018}
-    >>> add_ngram(sequences, token_indice, ngram_range=3)
-    [[1, 3, 4, 5, 1337, 2017], [1, 3, 7, 9, 2, 1337, 42, 2018]]
-    """
-    new_sequences = []
-    for input_list in sequences:
-        new_list = input_list[:]
-        for ngram_value in range(2, ngram_range + 1):
-            for i in range(len(new_list) - ngram_value + 1):
-                ngram = tuple(new_list[i:i + ngram_value])
-                if ngram in token_indice:
-                    new_list.append(token_indice[ngram])
-        new_sequences.append(new_list)
-
-    return new_sequences
-
-# Set parameters:
-# ngram_range = 2 will add bi-grams features
-ngram_range = 1
-max_features = 20000
-maxlen = 400
-batch_size = 32
-embedding_dims = 50
-epochs = 5
-
-print('Loading data...')
-(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
-print(len(x_train), 'train sequences')
-print(len(x_test), 'test sequences')
-print('Average train sequence length: {}'.format(
-    np.mean(list(map(len, x_train)), dtype=int)))
-print('Average test sequence length: {}'.format(
-    np.mean(list(map(len, x_test)), dtype=int)))
-
-if ngram_range > 1:
-    print('Adding {}-gram features'.format(ngram_range))
-    # Create set of unique n-gram from the training set.
-    ngram_set = set()
-    for input_list in x_train:
-        for i in range(2, ngram_range + 1):
-            set_of_ngram = create_ngram_set(input_list, ngram_value=i)
-            ngram_set.update(set_of_ngram)
-
-    # Dictionary mapping n-gram token to a unique integer.
-    # Integer values are greater than max_features in order
-    # to avoid collision with existing features.
-    start_index = max_features + 1
-    token_indice = {v: k + start_index for k, v in enumerate(ngram_set)}
-    indice_token = {token_indice[k]: k for k in token_indice}
-
-    # max_features is the highest integer that could be found in the dataset.
-    max_features = np.max(list(indice_token.keys())) + 1
-
-    # Augmenting x_train and x_test with n-grams features
-    x_train = add_ngram(x_train, token_indice, ngram_range)
-    x_test = add_ngram(x_test, token_indice, ngram_range)
-    print('Average train sequence length: {}'.format(
-        np.mean(list(map(len, x_train)), dtype=int)))
-    print('Average test sequence length: {}'.format(
-        np.mean(list(map(len, x_test)), dtype=int)))
-
-print('Pad sequences (samples x time)')
-x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
-x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
-print('x_train shape:', x_train.shape)
-print('x_test shape:', x_test.shape)
-
-print('Build model...')
-model = Sequential()
-
-# we start off with an efficient embedding layer which maps
-# our vocab indices into embedding_dims dimensions
-model.add(Embedding(max_features,
-                    embedding_dims,
-                    input_length=maxlen))
-
-# we add a GlobalAveragePooling1D, which will average the embeddings
-# of all words in the document
-model.add(GlobalAveragePooling1D())
-
-# We project onto a single unit output layer, and squash it with a sigmoid:
-model.add(Dense(1, activation='sigmoid'))
-
-model.compile(loss='binary_crossentropy',
-              optimizer='adam',
-              metrics=['accuracy'])
-
-model.fit(x_train, y_train,
-          batch_size=batch_size,
-          epochs=epochs,
-          validation_data=(x_test, y_test))
diff --git a/examples/imdb_lstm.py b/examples/imdb_lstm.py
deleted file mode 100644
index e6096c7e7bb..00000000000
--- a/examples/imdb_lstm.py
+++ /dev/null
@@ -1,60 +0,0 @@
-'''
-#Trains an LSTM model on the IMDB sentiment classification task.
-
-The dataset is actually too small for LSTM to be of any advantage
-compared to simpler, much faster methods such as TF-IDF + LogReg.
-
-**Notes**
-
-- RNNs are tricky. Choice of batch size is important,
-choice of loss and optimizer is critical, etc.
-Some configurations won't converge.
-
-- LSTM loss decrease patterns during training can be quite different
-from what you see with CNNs/MLPs/etc.
-
-'''
-from __future__ import print_function
-
-from keras.preprocessing import sequence
-from keras.models import Sequential
-from keras.layers import Dense, Embedding
-from keras.layers import LSTM
-from keras.datasets import imdb
-
-max_features = 20000
-# cut texts after this number of words (among top max_features most common words)
-maxlen = 80
-batch_size = 32
-
-print('Loading data...')
-(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
-print(len(x_train), 'train sequences')
-print(len(x_test), 'test sequences')
-
-print('Pad sequences (samples x time)')
-x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
-x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
-print('x_train shape:', x_train.shape)
-print('x_test shape:', x_test.shape)
-
-print('Build model...')
-model = Sequential()
-model.add(Embedding(max_features, 128))
-model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2))
-model.add(Dense(1, activation='sigmoid'))
-
-# try using different optimizers and different optimizer configs
-model.compile(loss='binary_crossentropy',
-              optimizer='adam',
-              metrics=['accuracy'])
-
-print('Train...')
-model.fit(x_train, y_train,
-          batch_size=batch_size,
-          epochs=15,
-          validation_data=(x_test, y_test))
-score, acc = model.evaluate(x_test, y_test,
-                            batch_size=batch_size)
-print('Test score:', score)
-print('Test accuracy:', acc)
diff --git a/examples/lstm_seq2seq.py b/examples/lstm_seq2seq.py
deleted file mode 100644
index 179c64e94f8..00000000000
--- a/examples/lstm_seq2seq.py
+++ /dev/null
@@ -1,234 +0,0 @@
-'''
-#Sequence to sequence example in Keras (character-level).
-
-This script demonstrates how to implement a basic character-level
-sequence-to-sequence model. We apply it to translating
-short English sentences into short French sentences,
-character-by-character. Note that it is fairly unusual to
-do character-level machine translation, as word-level
-models are more common in this domain.
-
-**Summary of the algorithm**
-
-- We start with input sequences from a domain (e.g. English sentences)
-    and corresponding target sequences from another domain
-    (e.g. French sentences).
-- An encoder LSTM turns input sequences to 2 state vectors
-    (we keep the last LSTM state and discard the outputs).
-- A decoder LSTM is trained to turn the target sequences into
-    the same sequence but offset by one timestep in the future,
-    a training process called "teacher forcing" in this context.
-    It uses as initial state the state vectors from the encoder.
-    Effectively, the decoder learns to generate `targets[t+1...]`
-    given `targets[...t]`, conditioned on the input sequence.
-- In inference mode, when we want to decode unknown input sequences, we:
-    - Encode the input sequence into state vectors
-    - Start with a target sequence of size 1
-        (just the start-of-sequence character)
-    - Feed the state vectors and 1-char target sequence
-        to the decoder to produce predictions for the next character
-    - Sample the next character using these predictions
-        (we simply use argmax).
-    - Append the sampled character to the target sequence
-    - Repeat until we generate the end-of-sequence character or we
-        hit the character limit.
-
-**Data download**
-
-[English to French sentence pairs.
-](http://www.manythings.org/anki/fra-eng.zip)
-
-[Lots of neat sentence pairs datasets.
-](http://www.manythings.org/anki/)
-
-**References**
-
-- [Sequence to Sequence Learning with Neural Networks
-   ](https://arxiv.org/abs/1409.3215)
-- [Learning Phrase Representations using
-    RNN Encoder-Decoder for Statistical Machine Translation
-    ](https://arxiv.org/abs/1406.1078)
-'''
-from __future__ import print_function
-
-from keras.models import Model
-from keras.layers import Input, LSTM, Dense
-import numpy as np
-
-batch_size = 64  # Batch size for training.
-epochs = 100  # Number of epochs to train for.
-latent_dim = 256  # Latent dimensionality of the encoding space.
-num_samples = 10000  # Number of samples to train on.
-# Path to the data txt file on disk.
-data_path = 'fra-eng/fra.txt'
-
-# Vectorize the data.
-input_texts = []
-target_texts = []
-input_characters = set()
-target_characters = set()
-with open(data_path, 'r', encoding='utf-8') as f:
-    lines = f.read().split('\n')
-for line in lines[: min(num_samples, len(lines) - 1)]:
-    input_text, target_text, _ = line.split('\t')
-    # We use "tab" as the "start sequence" character
-    # for the targets, and "\n" as "end sequence" character.
-    target_text = '\t' + target_text + '\n'
-    input_texts.append(input_text)
-    target_texts.append(target_text)
-    for char in input_text:
-        if char not in input_characters:
-            input_characters.add(char)
-    for char in target_text:
-        if char not in target_characters:
-            target_characters.add(char)
-
-input_characters = sorted(list(input_characters))
-target_characters = sorted(list(target_characters))
-num_encoder_tokens = len(input_characters)
-num_decoder_tokens = len(target_characters)
-max_encoder_seq_length = max([len(txt) for txt in input_texts])
-max_decoder_seq_length = max([len(txt) for txt in target_texts])
-
-print('Number of samples:', len(input_texts))
-print('Number of unique input tokens:', num_encoder_tokens)
-print('Number of unique output tokens:', num_decoder_tokens)
-print('Max sequence length for inputs:', max_encoder_seq_length)
-print('Max sequence length for outputs:', max_decoder_seq_length)
-
-input_token_index = dict(
-    [(char, i) for i, char in enumerate(input_characters)])
-target_token_index = dict(
-    [(char, i) for i, char in enumerate(target_characters)])
-
-encoder_input_data = np.zeros(
-    (len(input_texts), max_encoder_seq_length, num_encoder_tokens),
-    dtype='float32')
-decoder_input_data = np.zeros(
-    (len(input_texts), max_decoder_seq_length, num_decoder_tokens),
-    dtype='float32')
-decoder_target_data = np.zeros(
-    (len(input_texts), max_decoder_seq_length, num_decoder_tokens),
-    dtype='float32')
-
-for i, (input_text, target_text) in enumerate(zip(input_texts, target_texts)):
-    for t, char in enumerate(input_text):
-        encoder_input_data[i, t, input_token_index[char]] = 1.
-    encoder_input_data[i, t + 1:, input_token_index[' ']] = 1.
-    for t, char in enumerate(target_text):
-        # decoder_target_data is ahead of decoder_input_data by one timestep
-        decoder_input_data[i, t, target_token_index[char]] = 1.
-        if t > 0:
-            # decoder_target_data will be ahead by one timestep
-            # and will not include the start character.
-            decoder_target_data[i, t - 1, target_token_index[char]] = 1.
-    decoder_input_data[i, t + 1:, target_token_index[' ']] = 1.
-    decoder_target_data[i, t:, target_token_index[' ']] = 1.
-# Define an input sequence and process it.
-encoder_inputs = Input(shape=(None, num_encoder_tokens))
-encoder = LSTM(latent_dim, return_state=True)
-encoder_outputs, state_h, state_c = encoder(encoder_inputs)
-# We discard `encoder_outputs` and only keep the states.
-encoder_states = [state_h, state_c]
-
-# Set up the decoder, using `encoder_states` as initial state.
-decoder_inputs = Input(shape=(None, num_decoder_tokens))
-# We set up our decoder to return full output sequences,
-# and to return internal states as well. We don't use the
-# return states in the training model, but we will use them in inference.
-decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True)
-decoder_outputs, _, _ = decoder_lstm(decoder_inputs,
-                                     initial_state=encoder_states)
-decoder_dense = Dense(num_decoder_tokens, activation='softmax')
-decoder_outputs = decoder_dense(decoder_outputs)
-
-# Define the model that will turn
-# `encoder_input_data` & `decoder_input_data` into `decoder_target_data`
-model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
-
-# Run training
-model.compile(optimizer='rmsprop', loss='categorical_crossentropy',
-              metrics=['accuracy'])
-model.fit([encoder_input_data, decoder_input_data], decoder_target_data,
-          batch_size=batch_size,
-          epochs=epochs,
-          validation_split=0.2)
-# Save model
-model.save('s2s.h5')
-
-# Next: inference mode (sampling).
-# Here's the drill:
-# 1) encode input and retrieve initial decoder state
-# 2) run one step of decoder with this initial state
-# and a "start of sequence" token as target.
-# Output will be the next target token
-# 3) Repeat with the current target token and current states
-
-# Define sampling models
-encoder_model = Model(encoder_inputs, encoder_states)
-
-decoder_state_input_h = Input(shape=(latent_dim,))
-decoder_state_input_c = Input(shape=(latent_dim,))
-decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
-decoder_outputs, state_h, state_c = decoder_lstm(
-    decoder_inputs, initial_state=decoder_states_inputs)
-decoder_states = [state_h, state_c]
-decoder_outputs = decoder_dense(decoder_outputs)
-decoder_model = Model(
-    [decoder_inputs] + decoder_states_inputs,
-    [decoder_outputs] + decoder_states)
-
-# Reverse-lookup token index to decode sequences back to
-# something readable.
-reverse_input_char_index = dict(
-    (i, char) for char, i in input_token_index.items())
-reverse_target_char_index = dict(
-    (i, char) for char, i in target_token_index.items())
-
-
-def decode_sequence(input_seq):
-    # Encode the input as state vectors.
-    states_value = encoder_model.predict(input_seq)
-
-    # Generate empty target sequence of length 1.
-    target_seq = np.zeros((1, 1, num_decoder_tokens))
-    # Populate the first character of target sequence with the start character.
-    target_seq[0, 0, target_token_index['\t']] = 1.
-
-    # Sampling loop for a batch of sequences
-    # (to simplify, here we assume a batch of size 1).
-    stop_condition = False
-    decoded_sentence = ''
-    while not stop_condition:
-        output_tokens, h, c = decoder_model.predict(
-            [target_seq] + states_value)
-
-        # Sample a token
-        sampled_token_index = np.argmax(output_tokens[0, -1, :])
-        sampled_char = reverse_target_char_index[sampled_token_index]
-        decoded_sentence += sampled_char
-
-        # Exit condition: either hit max length
-        # or find stop character.
-        if (sampled_char == '\n' or
-           len(decoded_sentence) > max_decoder_seq_length):
-            stop_condition = True
-
-        # Update the target sequence (of length 1).
-        target_seq = np.zeros((1, 1, num_decoder_tokens))
-        target_seq[0, 0, sampled_token_index] = 1.
-
-        # Update states
-        states_value = [h, c]
-
-    return decoded_sentence
-
-
-for seq_index in range(100):
-    # Take one sequence (part of the training set)
-    # for trying out decoding.
-    input_seq = encoder_input_data[seq_index: seq_index + 1]
-    decoded_sentence = decode_sequence(input_seq)
-    print('-')
-    print('Input sentence:', input_texts[seq_index])
-    print('Decoded sentence:', decoded_sentence)
diff --git a/examples/lstm_seq2seq_restore.py b/examples/lstm_seq2seq_restore.py
deleted file mode 100644
index 302e33478ef..00000000000
--- a/examples/lstm_seq2seq_restore.py
+++ /dev/null
@@ -1,151 +0,0 @@
-'''
-#Restore a character-level sequence to sequence model from to generate predictions.
-
-This script loads the ```s2s.h5``` model saved by [lstm_seq2seq.py
-](/examples/lstm_seq2seq/) and generates sequences from it. It assumes
-that no changes have been made (for example: ```latent_dim``` is unchanged,
-and the input data and model architecture are unchanged).
-
-See [lstm_seq2seq.py](/examples/lstm_seq2seq/) for more details on the
-model architecture and how it is trained.
-'''
-from __future__ import print_function
-
-from keras.models import Model, load_model
-from keras.layers import Input
-import numpy as np
-
-batch_size = 64  # Batch size for training.
-epochs = 100  # Number of epochs to train for.
-latent_dim = 256  # Latent dimensionality of the encoding space.
-num_samples = 10000  # Number of samples to train on.
-# Path to the data txt file on disk.
-data_path = 'fra-eng/fra.txt'
-
-# Vectorize the data.  We use the same approach as the training script.
-# NOTE: the data must be identical, in order for the character -> integer
-# mappings to be consistent.
-# We omit encoding target_texts since they are not needed.
-input_texts = []
-target_texts = []
-input_characters = set()
-target_characters = set()
-with open(data_path, 'r', encoding='utf-8') as f:
-    lines = f.read().split('\n')
-for line in lines[: min(num_samples, len(lines) - 1)]:
-    input_text, target_text, _ = line.split('\t')
-    # We use "tab" as the "start sequence" character
-    # for the targets, and "\n" as "end sequence" character.
-    target_text = '\t' + target_text + '\n'
-    input_texts.append(input_text)
-    target_texts.append(target_text)
-    for char in input_text:
-        if char not in input_characters:
-            input_characters.add(char)
-    for char in target_text:
-        if char not in target_characters:
-            target_characters.add(char)
-
-input_characters = sorted(list(input_characters))
-target_characters = sorted(list(target_characters))
-num_encoder_tokens = len(input_characters)
-num_decoder_tokens = len(target_characters)
-max_encoder_seq_length = max([len(txt) for txt in input_texts])
-max_decoder_seq_length = max([len(txt) for txt in target_texts])
-
-print('Number of samples:', len(input_texts))
-print('Number of unique input tokens:', num_encoder_tokens)
-print('Number of unique output tokens:', num_decoder_tokens)
-print('Max sequence length for inputs:', max_encoder_seq_length)
-print('Max sequence length for outputs:', max_decoder_seq_length)
-
-input_token_index = dict(
-    [(char, i) for i, char in enumerate(input_characters)])
-target_token_index = dict(
-    [(char, i) for i, char in enumerate(target_characters)])
-
-encoder_input_data = np.zeros(
-    (len(input_texts), max_encoder_seq_length, num_encoder_tokens),
-    dtype='float32')
-
-for i, input_text in enumerate(input_texts):
-    for t, char in enumerate(input_text):
-        encoder_input_data[i, t, input_token_index[char]] = 1.
-
-# Restore the model and construct the encoder and decoder.
-model = load_model('s2s.h5')
-
-encoder_inputs = model.input[0]   # input_1
-encoder_outputs, state_h_enc, state_c_enc = model.layers[2].output   # lstm_1
-encoder_states = [state_h_enc, state_c_enc]
-encoder_model = Model(encoder_inputs, encoder_states)
-
-decoder_inputs = model.input[1]   # input_2
-decoder_state_input_h = Input(shape=(latent_dim,), name='input_3')
-decoder_state_input_c = Input(shape=(latent_dim,), name='input_4')
-decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
-decoder_lstm = model.layers[3]
-decoder_outputs, state_h_dec, state_c_dec = decoder_lstm(
-    decoder_inputs, initial_state=decoder_states_inputs)
-decoder_states = [state_h_dec, state_c_dec]
-decoder_dense = model.layers[4]
-decoder_outputs = decoder_dense(decoder_outputs)
-decoder_model = Model(
-    [decoder_inputs] + decoder_states_inputs,
-    [decoder_outputs] + decoder_states)
-
-# Reverse-lookup token index to decode sequences back to
-# something readable.
-reverse_input_char_index = dict(
-    (i, char) for char, i in input_token_index.items())
-reverse_target_char_index = dict(
-    (i, char) for char, i in target_token_index.items())
-
-
-# Decodes an input sequence.  Future work should support beam search.
-def decode_sequence(input_seq):
-    # Encode the input as state vectors.
-    states_value = encoder_model.predict(input_seq)
-
-    # Generate empty target sequence of length 1.
-    target_seq = np.zeros((1, 1, num_decoder_tokens))
-    # Populate the first character of target sequence with the start character.
-    target_seq[0, 0, target_token_index['\t']] = 1.
-
-    # Sampling loop for a batch of sequences
-    # (to simplify, here we assume a batch of size 1).
-    stop_condition = False
-    decoded_sentence = ''
-    while not stop_condition:
-        output_tokens, h, c = decoder_model.predict(
-            [target_seq] + states_value)
-
-        # Sample a token
-        sampled_token_index = np.argmax(output_tokens[0, -1, :])
-        sampled_char = reverse_target_char_index[sampled_token_index]
-        decoded_sentence += sampled_char
-
-        # Exit condition: either hit max length
-        # or find stop character.
-        if (sampled_char == '\n' or
-           len(decoded_sentence) > max_decoder_seq_length):
-            stop_condition = True
-
-        # Update the target sequence (of length 1).
-        target_seq = np.zeros((1, 1, num_decoder_tokens))
-        target_seq[0, 0, sampled_token_index] = 1.
-
-        # Update states
-        states_value = [h, c]
-
-    return decoded_sentence
-
-
-for seq_index in range(100):
-    # Take one sequence (part of the training set)
-    # for trying out decoding.
-    input_seq = encoder_input_data[seq_index: seq_index + 1]
-    decoded_sentence = decode_sequence(input_seq)
-    print('-')
-    print('Input sentence:', input_texts[seq_index])
-    print('Decoded sentence:', decoded_sentence)
diff --git a/examples/lstm_stateful.py b/examples/lstm_stateful.py
deleted file mode 100644
index c3f33ca258b..00000000000
--- a/examples/lstm_stateful.py
+++ /dev/null
@@ -1,243 +0,0 @@
-'''
-#How to use a stateful LSTM model, stateful vs stateless LSTM performance comparison
-
-[More documentation about the Keras LSTM model](/layers/recurrent/#lstm)
-
-The models are trained on an input/output pair, where
-the input is a generated uniformly distributed
-random sequence of length = `input_len`,
-and the output is a moving average of the input with window length = `tsteps`.
-Both `input_len` and `tsteps` are defined in the "editable parameters"
-section.
-
-A larger `tsteps` value means that the LSTM will need more memory
-to figure out the input-output relationship.
-This memory length is controlled by the `lahead` variable (more details below).
-
-The rest of the parameters are:
-
-- `input_len`: the length of the generated input sequence
-- `lahead`: the input sequence length that the LSTM
-  is trained on for each output point
-- `batch_size`, `epochs`: same parameters as in the `model.fit(...)`
-  function
-
-When `lahead > 1`, the model input is preprocessed to a "rolling window view"
-of the data, with the window length = `lahead`.
-This is similar to sklearn's `view_as_windows`
-with `window_shape` [being a single number.](
-http://scikit-image.org/docs/0.10.x/api/skimage.util.html#view-as-windows)
-
-When `lahead < tsteps`, only the stateful LSTM converges because its
-statefulness allows it to see beyond the capability that lahead
-gave it to fit the n-point average. The stateless LSTM does not have
-this capability, and hence is limited by its `lahead` parameter,
-which is not sufficient to see the n-point average.
-
-When `lahead >= tsteps`, both the stateful and stateless LSTM converge.
-'''
-from __future__ import print_function
-import numpy as np
-import matplotlib.pyplot as plt
-import pandas as pd
-from keras.models import Sequential
-from keras.layers import Dense, LSTM
-
-# ----------------------------------------------------------
-# EDITABLE PARAMETERS
-# Read the documentation in the script head for more details
-# ----------------------------------------------------------
-
-# length of input
-input_len = 1000
-
-# The window length of the moving average used to generate
-# the output from the input in the input/output pair used
-# to train the LSTM
-# e.g. if tsteps=2 and input=[1, 2, 3, 4, 5],
-#      then output=[1.5, 2.5, 3.5, 4.5]
-tsteps = 2
-
-# The input sequence length that the LSTM is trained on for each output point
-lahead = 1
-
-# training parameters passed to "model.fit(...)"
-batch_size = 1
-epochs = 10
-
-# ------------
-# MAIN PROGRAM
-# ------------
-
-print("*" * 33)
-if lahead >= tsteps:
-    print("STATELESS LSTM WILL ALSO CONVERGE")
-else:
-    print("STATELESS LSTM WILL NOT CONVERGE")
-print("*" * 33)
-
-np.random.seed(1986)
-
-print('Generating Data...')
-
-
-def gen_uniform_amp(amp=1, xn=10000):
-    """Generates uniform random data between
-    -amp and +amp
-    and of length xn
-
-    # Arguments
-        amp: maximum/minimum range of uniform data
-        xn: length of series
-    """
-    data_input = np.random.uniform(-1 * amp, +1 * amp, xn)
-    data_input = pd.DataFrame(data_input)
-    return data_input
-
-# Since the output is a moving average of the input,
-# the first few points of output will be NaN
-# and will be dropped from the generated data
-# before training the LSTM.
-# Also, when lahead > 1,
-# the preprocessing step later of "rolling window view"
-# will also cause some points to be lost.
-# For aesthetic reasons,
-# in order to maintain generated data length = input_len after pre-processing,
-# add a few points to account for the values that will be lost.
-to_drop = max(tsteps - 1, lahead - 1)
-data_input = gen_uniform_amp(amp=0.1, xn=input_len + to_drop)
-
-# set the target to be a N-point average of the input
-expected_output = data_input.rolling(window=tsteps, center=False).mean()
-
-# when lahead > 1, need to convert the input to "rolling window view"
-# https://docs.scipy.org/doc/numpy/reference/generated/numpy.repeat.html
-if lahead > 1:
-    data_input = np.repeat(data_input.values, repeats=lahead, axis=1)
-    data_input = pd.DataFrame(data_input)
-    for i, c in enumerate(data_input.columns):
-        data_input[c] = data_input[c].shift(i)
-
-# drop the nan
-expected_output = expected_output[to_drop:]
-data_input = data_input[to_drop:]
-
-print('Input shape:', data_input.shape)
-print('Output shape:', expected_output.shape)
-print('Input head: ')
-print(data_input.head())
-print('Output head: ')
-print(expected_output.head())
-print('Input tail: ')
-print(data_input.tail())
-print('Output tail: ')
-print(expected_output.tail())
-
-print('Plotting input and expected output')
-plt.plot(data_input[0][:10], '.')
-plt.plot(expected_output[0][:10], '-')
-plt.legend(['Input', 'Expected output'])
-plt.title('Input')
-plt.show()
-
-
-def create_model(stateful):
-    model = Sequential()
-    model.add(LSTM(20,
-              input_shape=(lahead, 1),
-              batch_size=batch_size,
-              stateful=stateful))
-    model.add(Dense(1))
-    model.compile(loss='mse', optimizer='adam')
-    return model
-
-print('Creating Stateful Model...')
-model_stateful = create_model(stateful=True)
-
-
-# split train/test data
-def split_data(x, y, ratio=0.8):
-    to_train = int(input_len * ratio)
-    # tweak to match with batch_size
-    to_train -= to_train % batch_size
-
-    x_train = x[:to_train]
-    y_train = y[:to_train]
-    x_test = x[to_train:]
-    y_test = y[to_train:]
-
-    # tweak to match with batch_size
-    to_drop = x.shape[0] % batch_size
-    if to_drop > 0:
-        x_test = x_test[:-1 * to_drop]
-        y_test = y_test[:-1 * to_drop]
-
-    # some reshaping
-    reshape_3 = lambda x: x.values.reshape((x.shape[0], x.shape[1], 1))
-    x_train = reshape_3(x_train)
-    x_test = reshape_3(x_test)
-
-    reshape_2 = lambda x: x.values.reshape((x.shape[0], 1))
-    y_train = reshape_2(y_train)
-    y_test = reshape_2(y_test)
-
-    return (x_train, y_train), (x_test, y_test)
-
-
-(x_train, y_train), (x_test, y_test) = split_data(data_input, expected_output)
-print('x_train.shape: ', x_train.shape)
-print('y_train.shape: ', y_train.shape)
-print('x_test.shape: ', x_test.shape)
-print('y_test.shape: ', y_test.shape)
-
-print('Training')
-for i in range(epochs):
-    print('Epoch', i + 1, '/', epochs)
-    # Note that the last state for sample i in a batch will
-    # be used as initial state for sample i in the next batch.
-    # Thus we are simultaneously training on batch_size series with
-    # lower resolution than the original series contained in data_input.
-    # Each of these series are offset by one step and can be
-    # extracted with data_input[i::batch_size].
-    model_stateful.fit(x_train,
-                       y_train,
-                       batch_size=batch_size,
-                       epochs=1,
-                       verbose=1,
-                       validation_data=(x_test, y_test),
-                       shuffle=False)
-    model_stateful.reset_states()
-
-print('Predicting')
-predicted_stateful = model_stateful.predict(x_test, batch_size=batch_size)
-
-print('Creating Stateless Model...')
-model_stateless = create_model(stateful=False)
-
-print('Training')
-model_stateless.fit(x_train,
-                    y_train,
-                    batch_size=batch_size,
-                    epochs=epochs,
-                    verbose=1,
-                    validation_data=(x_test, y_test),
-                    shuffle=False)
-
-print('Predicting')
-predicted_stateless = model_stateless.predict(x_test, batch_size=batch_size)
-
-# ----------------------------
-
-print('Plotting Results')
-plt.subplot(3, 1, 1)
-plt.plot(y_test)
-plt.title('Expected')
-plt.subplot(3, 1, 2)
-# drop the first "tsteps-1" because it is not possible to predict them
-# since the "previous" timesteps to use do not exist
-plt.plot((y_test - predicted_stateful).flatten()[tsteps - 1:])
-plt.title('Stateful: Expected - Predicted')
-plt.subplot(3, 1, 3)
-plt.plot((y_test - predicted_stateless).flatten())
-plt.title('Stateless: Expected - Predicted')
-plt.show()
diff --git a/examples/lstm_text_generation.py b/examples/lstm_text_generation.py
deleted file mode 100644
index 4579f473986..00000000000
--- a/examples/lstm_text_generation.py
+++ /dev/null
@@ -1,112 +0,0 @@
-'''
-#Example script to generate text from Nietzsche's writings.
-
-At least 20 epochs are required before the generated text
-starts sounding coherent.
-
-It is recommended to run this script on GPU, as recurrent
-networks are quite computationally intensive.
-
-If you try this script on new data, make sure your corpus
-has at least ~100k characters. ~1M is better.
-'''
-
-from __future__ import print_function
-from keras.callbacks import LambdaCallback
-from keras.models import Sequential
-from keras.layers import Dense
-from keras.layers import LSTM
-from keras.optimizers import RMSprop
-from keras.utils.data_utils import get_file
-import numpy as np
-import random
-import sys
-import io
-
-path = get_file(
-    'nietzsche.txt',
-    origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt')
-with io.open(path, encoding='utf-8') as f:
-    text = f.read().lower()
-print('corpus length:', len(text))
-
-chars = sorted(list(set(text)))
-print('total chars:', len(chars))
-char_indices = dict((c, i) for i, c in enumerate(chars))
-indices_char = dict((i, c) for i, c in enumerate(chars))
-
-# cut the text in semi-redundant sequences of maxlen characters
-maxlen = 40
-step = 3
-sentences = []
-next_chars = []
-for i in range(0, len(text) - maxlen, step):
-    sentences.append(text[i: i + maxlen])
-    next_chars.append(text[i + maxlen])
-print('nb sequences:', len(sentences))
-
-print('Vectorization...')
-x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
-y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
-for i, sentence in enumerate(sentences):
-    for t, char in enumerate(sentence):
-        x[i, t, char_indices[char]] = 1
-    y[i, char_indices[next_chars[i]]] = 1
-
-
-# build the model: a single LSTM
-print('Build model...')
-model = Sequential()
-model.add(LSTM(128, input_shape=(maxlen, len(chars))))
-model.add(Dense(len(chars), activation='softmax'))
-
-optimizer = RMSprop(learning_rate=0.01)
-model.compile(loss='categorical_crossentropy', optimizer=optimizer)
-
-
-def sample(preds, temperature=1.0):
-    # helper function to sample an index from a probability array
-    preds = np.asarray(preds).astype('float64')
-    preds = np.log(preds) / temperature
-    exp_preds = np.exp(preds)
-    preds = exp_preds / np.sum(exp_preds)
-    probas = np.random.multinomial(1, preds, 1)
-    return np.argmax(probas)
-
-
-def on_epoch_end(epoch, _):
-    # Function invoked at end of each epoch. Prints generated text.
-    print()
-    print('----- Generating text after Epoch: %d' % epoch)
-
-    start_index = random.randint(0, len(text) - maxlen - 1)
-    for diversity in [0.2, 0.5, 1.0, 1.2]:
-        print('----- diversity:', diversity)
-
-        generated = ''
-        sentence = text[start_index: start_index + maxlen]
-        generated += sentence
-        print('----- Generating with seed: "' + sentence + '"')
-        sys.stdout.write(generated)
-
-        for i in range(400):
-            x_pred = np.zeros((1, maxlen, len(chars)))
-            for t, char in enumerate(sentence):
-                x_pred[0, t, char_indices[char]] = 1.
-
-            preds = model.predict(x_pred, verbose=0)[0]
-            next_index = sample(preds, diversity)
-            next_char = indices_char[next_index]
-
-            sentence = sentence[1:] + next_char
-
-            sys.stdout.write(next_char)
-            sys.stdout.flush()
-        print()
-
-print_callback = LambdaCallback(on_epoch_end=on_epoch_end)
-
-model.fit(x, y,
-          batch_size=128,
-          epochs=60,
-          callbacks=[print_callback])
diff --git a/examples/mnist_acgan.py b/examples/mnist_acgan.py
deleted file mode 100644
index bf42fe96c34..00000000000
--- a/examples/mnist_acgan.py
+++ /dev/null
@@ -1,347 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-#Train an Auxiliary Classifier GAN (ACGAN) on the MNIST dataset.
-
-[More details on Auxiliary Classifier GANs.](https://arxiv.org/abs/1610.09585)
-
-You should start to see reasonable images after ~5 epochs, and good images
-by ~15 epochs. You should use a GPU, as the convolution-heavy operations are
-very slow on the CPU. Prefer the TensorFlow backend if you plan on iterating,
-as the compilation time can be a blocker using Theano.
-
-Timings:
-
-Hardware           | Backend | Time / Epoch
-:------------------|:--------|------------:
- CPU               | TF      | 3 hrs
- Titan X (maxwell) | TF      | 4 min
- Titan X (maxwell) | TH      | 7 min
-
-Consult [Auxiliary Classifier Generative Adversarial Networks in Keras
-](https://github.com/lukedeo/keras-acgan) for more information and example output.
-"""
-from __future__ import print_function
-
-from collections import defaultdict
-try:
-    import cPickle as pickle
-except ImportError:
-    import pickle
-from PIL import Image
-
-from six.moves import range
-
-from keras.datasets import mnist
-from keras import layers
-from keras.layers import Input, Dense, Reshape, Flatten, Embedding, Dropout
-from keras.layers import BatchNormalization
-from keras.layers.advanced_activations import LeakyReLU
-from keras.layers.convolutional import Conv2DTranspose, Conv2D
-from keras.models import Sequential, Model
-from keras.optimizers import Adam
-from keras.utils.generic_utils import Progbar
-import numpy as np
-
-np.random.seed(1337)
-num_classes = 10
-
-
-def build_generator(latent_size):
-    # we will map a pair of (z, L), where z is a latent vector and L is a
-    # label drawn from P_c, to image space (..., 28, 28, 1)
-    cnn = Sequential()
-
-    cnn.add(Dense(3 * 3 * 384, input_dim=latent_size, activation='relu'))
-    cnn.add(Reshape((3, 3, 384)))
-
-    # upsample to (7, 7, ...)
-    cnn.add(Conv2DTranspose(192, 5, strides=1, padding='valid',
-                            activation='relu',
-                            kernel_initializer='glorot_normal'))
-    cnn.add(BatchNormalization())
-
-    # upsample to (14, 14, ...)
-    cnn.add(Conv2DTranspose(96, 5, strides=2, padding='same',
-                            activation='relu',
-                            kernel_initializer='glorot_normal'))
-    cnn.add(BatchNormalization())
-
-    # upsample to (28, 28, ...)
-    cnn.add(Conv2DTranspose(1, 5, strides=2, padding='same',
-                            activation='tanh',
-                            kernel_initializer='glorot_normal'))
-
-    # this is the z space commonly referred to in GAN papers
-    latent = Input(shape=(latent_size, ))
-
-    # this will be our label
-    image_class = Input(shape=(1,), dtype='int32')
-
-    cls = Embedding(num_classes, latent_size,
-                    embeddings_initializer='glorot_normal')(image_class)
-
-    # hadamard product between z-space and a class conditional embedding
-    h = layers.multiply([latent, cls])
-
-    fake_image = cnn(h)
-
-    return Model([latent, image_class], fake_image)
-
-
-def build_discriminator():
-    # build a relatively standard conv net, with LeakyReLUs as suggested in
-    # the reference paper
-    cnn = Sequential()
-
-    cnn.add(Conv2D(32, 3, padding='same', strides=2,
-                   input_shape=(28, 28, 1)))
-    cnn.add(LeakyReLU(0.2))
-    cnn.add(Dropout(0.3))
-
-    cnn.add(Conv2D(64, 3, padding='same', strides=1))
-    cnn.add(LeakyReLU(0.2))
-    cnn.add(Dropout(0.3))
-
-    cnn.add(Conv2D(128, 3, padding='same', strides=2))
-    cnn.add(LeakyReLU(0.2))
-    cnn.add(Dropout(0.3))
-
-    cnn.add(Conv2D(256, 3, padding='same', strides=1))
-    cnn.add(LeakyReLU(0.2))
-    cnn.add(Dropout(0.3))
-
-    cnn.add(Flatten())
-
-    image = Input(shape=(28, 28, 1))
-
-    features = cnn(image)
-
-    # first output (name=generation) is whether or not the discriminator
-    # thinks the image that is being shown is fake, and the second output
-    # (name=auxiliary) is the class that the discriminator thinks the image
-    # belongs to.
-    fake = Dense(1, activation='sigmoid', name='generation')(features)
-    aux = Dense(num_classes, activation='softmax', name='auxiliary')(features)
-
-    return Model(image, [fake, aux])
-
-
-if __name__ == '__main__':
-    # batch and latent size taken from the paper
-    epochs = 100
-    batch_size = 100
-    latent_size = 100
-
-    # Adam parameters suggested in https://arxiv.org/abs/1511.06434
-    adam_lr = 0.0002
-    adam_beta_1 = 0.5
-
-    # build the discriminator
-    print('Discriminator model:')
-    discriminator = build_discriminator()
-    discriminator.compile(
-        optimizer=Adam(learning_rate=adam_lr, beta_1=adam_beta_1),
-        loss=['binary_crossentropy', 'sparse_categorical_crossentropy']
-    )
-    discriminator.summary()
-
-    # build the generator
-    generator = build_generator(latent_size)
-
-    latent = Input(shape=(latent_size, ))
-    image_class = Input(shape=(1,), dtype='int32')
-
-    # get a fake image
-    fake = generator([latent, image_class])
-
-    # we only want to be able to train generation for the combined model
-    discriminator.trainable = False
-    fake, aux = discriminator(fake)
-    combined = Model([latent, image_class], [fake, aux])
-
-    print('Combined model:')
-    combined.compile(
-        optimizer=Adam(learning_rate=adam_lr, beta_1=adam_beta_1),
-        loss=['binary_crossentropy', 'sparse_categorical_crossentropy']
-    )
-    combined.summary()
-
-    # get our mnist data, and force it to be of shape (..., 28, 28, 1) with
-    # range [-1, 1]
-    (x_train, y_train), (x_test, y_test) = mnist.load_data()
-    x_train = (x_train.astype(np.float32) - 127.5) / 127.5
-    x_train = np.expand_dims(x_train, axis=-1)
-
-    x_test = (x_test.astype(np.float32) - 127.5) / 127.5
-    x_test = np.expand_dims(x_test, axis=-1)
-
-    num_train, num_test = x_train.shape[0], x_test.shape[0]
-
-    train_history = defaultdict(list)
-    test_history = defaultdict(list)
-
-    for epoch in range(1, epochs + 1):
-        print('Epoch {}/{}'.format(epoch, epochs))
-
-        num_batches = int(np.ceil(x_train.shape[0] / float(batch_size)))
-        progress_bar = Progbar(target=num_batches)
-
-        epoch_gen_loss = []
-        epoch_disc_loss = []
-
-        for index in range(num_batches):
-            # get a batch of real images
-            image_batch = x_train[index * batch_size:(index + 1) * batch_size]
-            label_batch = y_train[index * batch_size:(index + 1) * batch_size]
-
-            # generate a new batch of noise
-            noise = np.random.uniform(-1, 1, (len(image_batch), latent_size))
-
-            # sample some labels from p_c
-            sampled_labels = np.random.randint(0, num_classes, len(image_batch))
-
-            # generate a batch of fake images, using the generated labels as a
-            # conditioner. We reshape the sampled labels to be
-            # (len(image_batch), 1) so that we can feed them into the embedding
-            # layer as a length one sequence
-            generated_images = generator.predict(
-                [noise, sampled_labels.reshape((-1, 1))], verbose=0)
-
-            x = np.concatenate((image_batch, generated_images))
-
-            # use one-sided soft real/fake labels
-            # Salimans et al., 2016
-            # https://arxiv.org/pdf/1606.03498.pdf (Section 3.4)
-            soft_zero, soft_one = 0, 0.95
-            y = np.array(
-                [soft_one] * len(image_batch) + [soft_zero] * len(image_batch))
-            aux_y = np.concatenate((label_batch, sampled_labels), axis=0)
-
-            # we don't want the discriminator to also maximize the classification
-            # accuracy of the auxiliary classifier on generated images, so we
-            # don't train discriminator to produce class labels for generated
-            # images (see https://openreview.net/forum?id=rJXTf9Bxg).
-            # To preserve sum of sample weights for the auxiliary classifier,
-            # we assign sample weight of 2 to the real images.
-            disc_sample_weight = [np.ones(2 * len(image_batch)),
-                                  np.concatenate((np.ones(len(image_batch)) * 2,
-                                                  np.zeros(len(image_batch))))]
-
-            # see if the discriminator can figure itself out...
-            epoch_disc_loss.append(discriminator.train_on_batch(
-                x, [y, aux_y], sample_weight=disc_sample_weight))
-
-            # make new noise. we generate 2 * batch size here such that we have
-            # the generator optimize over an identical number of images as the
-            # discriminator
-            noise = np.random.uniform(-1, 1, (2 * len(image_batch), latent_size))
-            sampled_labels = np.random.randint(0, num_classes, 2 * len(image_batch))
-
-            # we want to train the generator to trick the discriminator
-            # For the generator, we want all the {fake, not-fake} labels to say
-            # not-fake
-            trick = np.ones(2 * len(image_batch)) * soft_one
-
-            epoch_gen_loss.append(combined.train_on_batch(
-                [noise, sampled_labels.reshape((-1, 1))],
-                [trick, sampled_labels]))
-
-            progress_bar.update(index + 1)
-
-        print('Testing for epoch {}:'.format(epoch))
-
-        # evaluate the testing loss here
-
-        # generate a new batch of noise
-        noise = np.random.uniform(-1, 1, (num_test, latent_size))
-
-        # sample some labels from p_c and generate images from them
-        sampled_labels = np.random.randint(0, num_classes, num_test)
-        generated_images = generator.predict(
-            [noise, sampled_labels.reshape((-1, 1))], verbose=False)
-
-        x = np.concatenate((x_test, generated_images))
-        y = np.array([1] * num_test + [0] * num_test)
-        aux_y = np.concatenate((y_test, sampled_labels), axis=0)
-
-        # see if the discriminator can figure itself out...
-        discriminator_test_loss = discriminator.evaluate(
-            x, [y, aux_y], verbose=False)
-
-        discriminator_train_loss = np.mean(np.array(epoch_disc_loss), axis=0)
-
-        # make new noise
-        noise = np.random.uniform(-1, 1, (2 * num_test, latent_size))
-        sampled_labels = np.random.randint(0, num_classes, 2 * num_test)
-
-        trick = np.ones(2 * num_test)
-
-        generator_test_loss = combined.evaluate(
-            [noise, sampled_labels.reshape((-1, 1))],
-            [trick, sampled_labels], verbose=False)
-
-        generator_train_loss = np.mean(np.array(epoch_gen_loss), axis=0)
-
-        # generate an epoch report on performance
-        train_history['generator'].append(generator_train_loss)
-        train_history['discriminator'].append(discriminator_train_loss)
-
-        test_history['generator'].append(generator_test_loss)
-        test_history['discriminator'].append(discriminator_test_loss)
-
-        print('{0:<22s} | {1:4s} | {2:15s} | {3:5s}'.format(
-            'component', *discriminator.metrics_names))
-        print('-' * 65)
-
-        ROW_FMT = '{0:<22s} | {1:<4.2f} | {2:<15.4f} | {3:<5.4f}'
-        print(ROW_FMT.format('generator (train)',
-                             *train_history['generator'][-1]))
-        print(ROW_FMT.format('generator (test)',
-                             *test_history['generator'][-1]))
-        print(ROW_FMT.format('discriminator (train)',
-                             *train_history['discriminator'][-1]))
-        print(ROW_FMT.format('discriminator (test)',
-                             *test_history['discriminator'][-1]))
-
-        # save weights every epoch
-        generator.save_weights(
-            'params_generator_epoch_{0:03d}.hdf5'.format(epoch), True)
-        discriminator.save_weights(
-            'params_discriminator_epoch_{0:03d}.hdf5'.format(epoch), True)
-
-        # generate some digits to display
-        num_rows = 40
-        noise = np.tile(np.random.uniform(-1, 1, (num_rows, latent_size)),
-                        (num_classes, 1))
-
-        sampled_labels = np.array([
-            [i] * num_rows for i in range(num_classes)
-        ]).reshape(-1, 1)
-
-        # get a batch to display
-        generated_images = generator.predict(
-            [noise, sampled_labels], verbose=0)
-
-        # prepare real images sorted by class label
-        real_labels = y_train[(epoch - 1) * num_rows * num_classes:
-                              epoch * num_rows * num_classes]
-        indices = np.argsort(real_labels, axis=0)
-        real_images = x_train[(epoch - 1) * num_rows * num_classes:
-                              epoch * num_rows * num_classes][indices]
-
-        # display generated images, white separator, real images
-        img = np.concatenate(
-            (generated_images,
-             np.repeat(np.ones_like(x_train[:1]), num_rows, axis=0),
-             real_images))
-
-        # arrange them into a grid
-        img = (np.concatenate([r.reshape(-1, 28)
-                               for r in np.split(img, 2 * num_classes + 1)
-                               ], axis=-1) * 127.5 + 127.5).astype(np.uint8)
-
-        Image.fromarray(img).save(
-            'plot_epoch_{0:03d}_generated.png'.format(epoch))
-
-    with open('acgan-history.pkl', 'wb') as f:
-        pickle.dump({'train': train_history, 'test': test_history}, f)
diff --git a/examples/mnist_cnn.py b/examples/mnist_cnn.py
deleted file mode 100644
index 25aa07b6a58..00000000000
--- a/examples/mnist_cnn.py
+++ /dev/null
@@ -1,70 +0,0 @@
-'''Trains a simple convnet on the MNIST dataset.
-
-Gets to 99.25% test accuracy after 12 epochs
-(there is still a lot of margin for parameter tuning).
-16 seconds per epoch on a GRID K520 GPU.
-'''
-
-from __future__ import print_function
-import keras
-from keras.datasets import mnist
-from keras.models import Sequential
-from keras.layers import Dense, Dropout, Flatten
-from keras.layers import Conv2D, MaxPooling2D
-from keras import backend as K
-
-batch_size = 128
-num_classes = 10
-epochs = 12
-
-# input image dimensions
-img_rows, img_cols = 28, 28
-
-# the data, split between train and test sets
-(x_train, y_train), (x_test, y_test) = mnist.load_data()
-
-if K.image_data_format() == 'channels_first':
-    x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
-    x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
-    input_shape = (1, img_rows, img_cols)
-else:
-    x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
-    x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
-    input_shape = (img_rows, img_cols, 1)
-
-x_train = x_train.astype('float32')
-x_test = x_test.astype('float32')
-x_train /= 255
-x_test /= 255
-print('x_train shape:', x_train.shape)
-print(x_train.shape[0], 'train samples')
-print(x_test.shape[0], 'test samples')
-
-# convert class vectors to binary class matrices
-y_train = keras.utils.to_categorical(y_train, num_classes)
-y_test = keras.utils.to_categorical(y_test, num_classes)
-
-model = Sequential()
-model.add(Conv2D(32, kernel_size=(3, 3),
-                 activation='relu',
-                 input_shape=input_shape))
-model.add(Conv2D(64, (3, 3), activation='relu'))
-model.add(MaxPooling2D(pool_size=(2, 2)))
-model.add(Dropout(0.25))
-model.add(Flatten())
-model.add(Dense(128, activation='relu'))
-model.add(Dropout(0.5))
-model.add(Dense(num_classes, activation='softmax'))
-
-model.compile(loss=keras.losses.categorical_crossentropy,
-              optimizer=keras.optimizers.Adadelta(),
-              metrics=['accuracy'])
-
-model.fit(x_train, y_train,
-          batch_size=batch_size,
-          epochs=epochs,
-          verbose=1,
-          validation_data=(x_test, y_test))
-score = model.evaluate(x_test, y_test, verbose=0)
-print('Test loss:', score[0])
-print('Test accuracy:', score[1])
diff --git a/examples/mnist_denoising_autoencoder.py b/examples/mnist_denoising_autoencoder.py
deleted file mode 100644
index 0bca602b91a..00000000000
--- a/examples/mnist_denoising_autoencoder.py
+++ /dev/null
@@ -1,149 +0,0 @@
-'''Trains a denoising autoencoder on MNIST dataset.
-
-Denoising is one of the classic applications of autoencoders.
-The denoising process removes unwanted noise that corrupted the
-true signal.
-
-Noise + Data ---> Denoising Autoencoder ---> Data
-
-Given a training dataset of corrupted data as input and
-true signal as output, a denoising autoencoder can recover the
-hidden structure to generate clean data.
-
-This example has modular design. The encoder, decoder and autoencoder
-are 3 models that share weights. For example, after training the
-autoencoder, the encoder can be used to  generate latent vectors
-of input data for low-dim visualization like PCA or TSNE.
-'''
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-import keras
-from keras.layers import Activation, Dense, Input
-from keras.layers import Conv2D, Flatten
-from keras.layers import Reshape, Conv2DTranspose
-from keras.models import Model
-from keras import backend as K
-from keras.datasets import mnist
-import numpy as np
-import matplotlib.pyplot as plt
-from PIL import Image
-
-np.random.seed(1337)
-
-# MNIST dataset
-(x_train, _), (x_test, _) = mnist.load_data()
-
-image_size = x_train.shape[1]
-x_train = np.reshape(x_train, [-1, image_size, image_size, 1])
-x_test = np.reshape(x_test, [-1, image_size, image_size, 1])
-x_train = x_train.astype('float32') / 255
-x_test = x_test.astype('float32') / 255
-
-# Generate corrupted MNIST images by adding noise with normal dist
-# centered at 0.5 and std=0.5
-noise = np.random.normal(loc=0.5, scale=0.5, size=x_train.shape)
-x_train_noisy = x_train + noise
-noise = np.random.normal(loc=0.5, scale=0.5, size=x_test.shape)
-x_test_noisy = x_test + noise
-
-x_train_noisy = np.clip(x_train_noisy, 0., 1.)
-x_test_noisy = np.clip(x_test_noisy, 0., 1.)
-
-# Network parameters
-input_shape = (image_size, image_size, 1)
-batch_size = 128
-kernel_size = 3
-latent_dim = 16
-# Encoder/Decoder number of CNN layers and filters per layer
-layer_filters = [32, 64]
-
-# Build the Autoencoder Model
-# First build the Encoder Model
-inputs = Input(shape=input_shape, name='encoder_input')
-x = inputs
-# Stack of Conv2D blocks
-# Notes:
-# 1) Use Batch Normalization before ReLU on deep networks
-# 2) Use MaxPooling2D as alternative to strides>1
-# - faster but not as good as strides>1
-for filters in layer_filters:
-    x = Conv2D(filters=filters,
-               kernel_size=kernel_size,
-               strides=2,
-               activation='relu',
-               padding='same')(x)
-
-# Shape info needed to build Decoder Model
-shape = K.int_shape(x)
-
-# Generate the latent vector
-x = Flatten()(x)
-latent = Dense(latent_dim, name='latent_vector')(x)
-
-# Instantiate Encoder Model
-encoder = Model(inputs, latent, name='encoder')
-encoder.summary()
-
-# Build the Decoder Model
-latent_inputs = Input(shape=(latent_dim,), name='decoder_input')
-x = Dense(shape[1] * shape[2] * shape[3])(latent_inputs)
-x = Reshape((shape[1], shape[2], shape[3]))(x)
-
-# Stack of Transposed Conv2D blocks
-# Notes:
-# 1) Use Batch Normalization before ReLU on deep networks
-# 2) Use UpSampling2D as alternative to strides>1
-# - faster but not as good as strides>1
-for filters in layer_filters[::-1]:
-    x = Conv2DTranspose(filters=filters,
-                        kernel_size=kernel_size,
-                        strides=2,
-                        activation='relu',
-                        padding='same')(x)
-
-x = Conv2DTranspose(filters=1,
-                    kernel_size=kernel_size,
-                    padding='same')(x)
-
-outputs = Activation('sigmoid', name='decoder_output')(x)
-
-# Instantiate Decoder Model
-decoder = Model(latent_inputs, outputs, name='decoder')
-decoder.summary()
-
-# Autoencoder = Encoder + Decoder
-# Instantiate Autoencoder Model
-autoencoder = Model(inputs, decoder(encoder(inputs)), name='autoencoder')
-autoencoder.summary()
-
-autoencoder.compile(loss='mse', optimizer='adam')
-
-# Train the autoencoder
-autoencoder.fit(x_train_noisy,
-                x_train,
-                validation_data=(x_test_noisy, x_test),
-                epochs=30,
-                batch_size=batch_size)
-
-# Predict the Autoencoder output from corrupted test images
-x_decoded = autoencoder.predict(x_test_noisy)
-
-# Display the 1st 8 corrupted and denoised images
-rows, cols = 10, 30
-num = rows * cols
-imgs = np.concatenate([x_test[:num], x_test_noisy[:num], x_decoded[:num]])
-imgs = imgs.reshape((rows * 3, cols, image_size, image_size))
-imgs = np.vstack(np.split(imgs, rows, axis=1))
-imgs = imgs.reshape((rows * 3, -1, image_size, image_size))
-imgs = np.vstack([np.hstack(i) for i in imgs])
-imgs = (imgs * 255).astype(np.uint8)
-plt.figure()
-plt.axis('off')
-plt.title('Original images: top rows, '
-          'Corrupted Input: middle rows, '
-          'Denoised Input:  third rows')
-plt.imshow(imgs, interpolation='none', cmap='gray')
-Image.fromarray(imgs).save('corrupted_and_denoised.png')
-plt.show()
diff --git a/examples/mnist_hierarchical_rnn.py b/examples/mnist_hierarchical_rnn.py
deleted file mode 100644
index 84cae8204dd..00000000000
--- a/examples/mnist_hierarchical_rnn.py
+++ /dev/null
@@ -1,98 +0,0 @@
-"""Example of using Hierarchical RNN (HRNN) to classify MNIST digits.
-
-HRNNs can learn across multiple levels
-of temporal hierarchy over a complex sequence.
-Usually, the first recurrent layer of an HRNN
-encodes a sentence (e.g. of word vectors)
-into a  sentence vector.
-The second recurrent layer then encodes a sequence of
-such vectors (encoded by the first layer) into a document vector.
-This document vector is considered to preserve both
-the word-level and sentence-level structure of the context.
-
-# References
-
-- [A Hierarchical Neural Autoencoder for Paragraphs and Documents]
-    (https://arxiv.org/abs/1506.01057)
-    Encodes paragraphs and documents with HRNN.
-    Results have shown that HRNN outperforms standard
-    RNNs and may play some role in more sophisticated generation tasks like
-    summarization or question answering.
-- [Hierarchical recurrent neural network for skeleton based action recognition]
-    (http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=7298714)
-    Achieved state-of-the-art results on
-    skeleton based action recognition with 3 levels
-    of bidirectional HRNN combined with fully connected layers.
-
-In the below MNIST example the first LSTM layer first encodes every
-column of pixels of shape (28, 1) to a column vector of shape (128,).
-The second LSTM layer encodes then these 28 column vectors of shape (28, 128)
-to a image vector representing the whole image.
-A final Dense layer is added for prediction.
-
-After 5 epochs: train acc: 0.9858, val acc: 0.9864
-"""
-from __future__ import print_function
-
-import keras
-from keras.datasets import mnist
-from keras.models import Model
-from keras.layers import Input, Dense, TimeDistributed
-from keras.layers import LSTM
-
-# Training parameters.
-batch_size = 32
-num_classes = 10
-epochs = 5
-
-# Embedding dimensions.
-row_hidden = 128
-col_hidden = 128
-
-# The data, split between train and test sets.
-(x_train, y_train), (x_test, y_test) = mnist.load_data()
-
-# Reshapes data to 4D for Hierarchical RNN.
-x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
-x_test = x_test.reshape(x_test.shape[0], 28, 28, 1)
-x_train = x_train.astype('float32')
-x_test = x_test.astype('float32')
-x_train /= 255
-x_test /= 255
-print('x_train shape:', x_train.shape)
-print(x_train.shape[0], 'train samples')
-print(x_test.shape[0], 'test samples')
-
-# Converts class vectors to binary class matrices.
-y_train = keras.utils.to_categorical(y_train, num_classes)
-y_test = keras.utils.to_categorical(y_test, num_classes)
-
-row, col, pixel = x_train.shape[1:]
-
-# 4D input.
-x = Input(shape=(row, col, pixel))
-
-# Encodes a row of pixels using TimeDistributed Wrapper.
-encoded_rows = TimeDistributed(LSTM(row_hidden))(x)
-
-# Encodes columns of encoded rows.
-encoded_columns = LSTM(col_hidden)(encoded_rows)
-
-# Final predictions and model.
-prediction = Dense(num_classes, activation='softmax')(encoded_columns)
-model = Model(x, prediction)
-model.compile(loss='categorical_crossentropy',
-              optimizer='rmsprop',
-              metrics=['accuracy'])
-
-# Training.
-model.fit(x_train, y_train,
-          batch_size=batch_size,
-          epochs=epochs,
-          verbose=1,
-          validation_data=(x_test, y_test))
-
-# Evaluation.
-scores = model.evaluate(x_test, y_test, verbose=0)
-print('Test loss:', scores[0])
-print('Test accuracy:', scores[1])
diff --git a/examples/mnist_irnn.py b/examples/mnist_irnn.py
deleted file mode 100644
index 775d333b5f2..00000000000
--- a/examples/mnist_irnn.py
+++ /dev/null
@@ -1,73 +0,0 @@
-'''This is a reproduction of the IRNN experiment
-with pixel-by-pixel sequential MNIST in
-"A Simple Way to Initialize Recurrent Networks of Rectified Linear Units"
-by Quoc V. Le, Navdeep Jaitly, Geoffrey E. Hinton
-
-arxiv:1504.00941v2 [cs.NE] 7 Apr 2015
-http://arxiv.org/pdf/1504.00941v2.pdf
-
-Optimizer is replaced with RMSprop which yields more stable and steady
-improvement.
-
-Reaches 0.93 train/test accuracy after 900 epochs
-(which roughly corresponds to 1687500 steps in the original paper.)
-'''
-
-from __future__ import print_function
-
-import keras
-from keras.datasets import mnist
-from keras.models import Sequential
-from keras.layers import Dense, Activation
-from keras.layers import SimpleRNN
-from keras import initializers
-from keras.optimizers import RMSprop
-
-batch_size = 32
-num_classes = 10
-epochs = 200
-hidden_units = 100
-
-learning_rate = 1e-6
-clip_norm = 1.0
-
-# the data, split between train and test sets
-(x_train, y_train), (x_test, y_test) = mnist.load_data()
-
-x_train = x_train.reshape(x_train.shape[0], -1, 1)
-x_test = x_test.reshape(x_test.shape[0], -1, 1)
-x_train = x_train.astype('float32')
-x_test = x_test.astype('float32')
-x_train /= 255
-x_test /= 255
-print('x_train shape:', x_train.shape)
-print(x_train.shape[0], 'train samples')
-print(x_test.shape[0], 'test samples')
-
-# convert class vectors to binary class matrices
-y_train = keras.utils.to_categorical(y_train, num_classes)
-y_test = keras.utils.to_categorical(y_test, num_classes)
-
-print('Evaluate IRNN...')
-model = Sequential()
-model.add(SimpleRNN(hidden_units,
-                    kernel_initializer=initializers.RandomNormal(stddev=0.001),
-                    recurrent_initializer=initializers.Identity(gain=1.0),
-                    activation='relu',
-                    input_shape=x_train.shape[1:]))
-model.add(Dense(num_classes))
-model.add(Activation('softmax'))
-rmsprop = RMSprop(learning_rate=learning_rate)
-model.compile(loss='categorical_crossentropy',
-              optimizer=rmsprop,
-              metrics=['accuracy'])
-
-model.fit(x_train, y_train,
-          batch_size=batch_size,
-          epochs=epochs,
-          verbose=1,
-          validation_data=(x_test, y_test))
-
-scores = model.evaluate(x_test, y_test, verbose=0)
-print('IRNN test score:', scores[0])
-print('IRNN test accuracy:', scores[1])
diff --git a/examples/mnist_mlp.py b/examples/mnist_mlp.py
deleted file mode 100644
index cba180ff335..00000000000
--- a/examples/mnist_mlp.py
+++ /dev/null
@@ -1,56 +0,0 @@
-'''Trains a simple deep NN on the MNIST dataset.
-
-Gets to 98.40% test accuracy after 20 epochs
-(there is *a lot* of margin for parameter tuning).
-2 seconds per epoch on a K520 GPU.
-'''
-
-from __future__ import print_function
-
-import keras
-from keras.datasets import mnist
-from keras.models import Sequential
-from keras.layers import Dense, Dropout
-from keras.optimizers import RMSprop
-
-batch_size = 128
-num_classes = 10
-epochs = 20
-
-# the data, split between train and test sets
-(x_train, y_train), (x_test, y_test) = mnist.load_data()
-
-x_train = x_train.reshape(60000, 784)
-x_test = x_test.reshape(10000, 784)
-x_train = x_train.astype('float32')
-x_test = x_test.astype('float32')
-x_train /= 255
-x_test /= 255
-print(x_train.shape[0], 'train samples')
-print(x_test.shape[0], 'test samples')
-
-# convert class vectors to binary class matrices
-y_train = keras.utils.to_categorical(y_train, num_classes)
-y_test = keras.utils.to_categorical(y_test, num_classes)
-
-model = Sequential()
-model.add(Dense(512, activation='relu', input_shape=(784,)))
-model.add(Dropout(0.2))
-model.add(Dense(512, activation='relu'))
-model.add(Dropout(0.2))
-model.add(Dense(num_classes, activation='softmax'))
-
-model.summary()
-
-model.compile(loss='categorical_crossentropy',
-              optimizer=RMSprop(),
-              metrics=['accuracy'])
-
-history = model.fit(x_train, y_train,
-                    batch_size=batch_size,
-                    epochs=epochs,
-                    verbose=1,
-                    validation_data=(x_test, y_test))
-score = model.evaluate(x_test, y_test, verbose=0)
-print('Test loss:', score[0])
-print('Test accuracy:', score[1])
diff --git a/examples/mnist_net2net.py b/examples/mnist_net2net.py
deleted file mode 100644
index 7d7b4bbf0dd..00000000000
--- a/examples/mnist_net2net.py
+++ /dev/null
@@ -1,401 +0,0 @@
-'''This is an implementation of Net2Net experiment with MNIST in
-'Net2Net: Accelerating Learning via Knowledge Transfer'
-by Tianqi Chen, Ian Goodfellow, and Jonathon Shlens
-
-arXiv:1511.05641v4 [cs.LG] 23 Apr 2016
-http://arxiv.org/abs/1511.05641
-
-# Notes
-
-- What:
-  + Net2Net is a group of methods to transfer knowledge from a teacher neural
-    net to a student net,so that the student net can be trained faster than
-    from scratch.
-  + The paper discussed two specific methods of Net2Net, i.e. Net2WiderNet
-    and Net2DeeperNet.
-  + Net2WiderNet replaces a model with an equivalent wider model that has
-    more units in each hidden layer.
-  + Net2DeeperNet replaces a model with an equivalent deeper model.
-  + Both are based on the idea of 'function-preserving transformations of
-    neural nets'.
-- Why:
-  + Enable fast exploration of multiple neural nets in experimentation and
-    design process,by creating a series of wider and deeper models with
-    transferable knowledge.
-  + Enable 'lifelong learning system' by gradually adjusting model complexity
-    to data availability,and reusing transferable knowledge.
-
-# Experiments
-
-- Teacher model: a basic CNN model trained on MNIST for 3 epochs.
-- Net2WiderNet experiment:
-  + Student model has a wider Conv2D layer and a wider FC layer.
-  + Comparison of 'random-padding' vs 'net2wider' weight initialization.
-  + With both methods, after 1 epoch, student model should perform as well as
-    teacher model, but 'net2wider' is slightly better.
-- Net2DeeperNet experiment:
-  + Student model has an extra Conv2D layer and an extra FC layer.
-  + Comparison of 'random-init' vs 'net2deeper' weight initialization.
-  + After 1 epoch, performance of 'net2deeper' is better than 'random-init'.
-- Hyper-parameters:
-  + SGD with momentum=0.9 is used for training teacher and student models.
-  + Learning rate adjustment: it's suggested to reduce learning rate
-    to 1/10 for student model.
-  + Addition of noise in 'net2wider' is used to break weight symmetry
-    and thus enable full capacity of student models. It is optional
-    when a Dropout layer is used.
-
-# Results
-
-- Tested with TF backend and 'channels_last' image_data_format.
-- Running on GPU GeForce GTX Titan X Maxwell
-- Performance Comparisons - validation loss values during first 3 epochs:
-
-Teacher model ...
-(0) teacher_model:             0.0537   0.0354   0.0356
-
-Experiment of Net2WiderNet ...
-(1) wider_random_pad:          0.0320   0.0317   0.0289
-(2) wider_net2wider:           0.0271   0.0274   0.0270
-
-Experiment of Net2DeeperNet ...
-(3) deeper_random_init:        0.0682   0.0506   0.0468
-(4) deeper_net2deeper:         0.0292   0.0294   0.0286
-'''
-
-from __future__ import print_function
-import numpy as np
-import keras
-from keras import backend as K
-from keras.models import Sequential
-from keras.layers import Conv2D, MaxPooling2D, Dense, Flatten
-from keras.optimizers import SGD
-from keras.datasets import mnist
-
-if K.image_data_format() == 'channels_first':
-    input_shape = (1, 28, 28)  # image shape
-else:
-    input_shape = (28, 28, 1)  # image shape
-num_classes = 10  # number of classes
-epochs = 3
-
-
-# load and pre-process data
-def preprocess_input(x):
-    return x.astype('float32').reshape((-1,) + input_shape) / 255
-
-
-def preprocess_output(y):
-    return keras.utils.to_categorical(y)
-
-(x_train, y_train), (x_test, y_test) = mnist.load_data()
-x_train, x_test = map(preprocess_input, [x_train, x_test])
-y_train, y_test = map(preprocess_output, [y_train, y_test])
-print('Loading MNIST data...')
-print('x_train shape:', x_train.shape, 'y_train shape:', y_train.shape)
-print('x_test shape:', x_test.shape, 'y_test shape', y_test.shape)
-
-
-# knowledge transfer algorithms
-def wider2net_conv2d(teacher_w1, teacher_b1, teacher_w2, new_width, init):
-    '''Get initial weights for a wider conv2d layer with a bigger filters,
-    by 'random-padding' or 'net2wider'.
-
-    # Arguments
-        teacher_w1: `weight` of conv2d layer to become wider,
-          of shape (filters1, num_channel1, kh1, kw1)
-        teacher_b1: `bias` of conv2d layer to become wider,
-          of shape (filters1, )
-        teacher_w2: `weight` of next connected conv2d layer,
-          of shape (filters2, num_channel2, kh2, kw2)
-        new_width: new `filters` for the wider conv2d layer
-        init: initialization algorithm for new weights,
-          either 'random-pad' or 'net2wider'
-    '''
-    assert teacher_w1.shape[0] == teacher_w2.shape[1], (
-        'successive layers from teacher model should have compatible shapes')
-    assert teacher_w1.shape[3] == teacher_b1.shape[0], (
-        'weight and bias from same layer should have compatible shapes')
-    assert new_width > teacher_w1.shape[3], (
-        'new width (filters) should be bigger than the existing one')
-
-    n = new_width - teacher_w1.shape[3]
-    if init == 'random-pad':
-        new_w1 = np.random.normal(0, 0.1, size=teacher_w1.shape[:3] + (n,))
-        new_b1 = np.ones(n) * 0.1
-        new_w2 = np.random.normal(
-            0, 0.1,
-            size=teacher_w2.shape[:2] + (n, teacher_w2.shape[3]))
-    elif init == 'net2wider':
-        index = np.random.randint(teacher_w1.shape[3], size=n)
-        factors = np.bincount(index)[index] + 1.
-        new_w1 = teacher_w1[:, :, :, index]
-        new_b1 = teacher_b1[index]
-        new_w2 = teacher_w2[:, :, index, :] / factors.reshape((1, 1, -1, 1))
-    else:
-        raise ValueError('Unsupported weight initializer: %s' % init)
-
-    student_w1 = np.concatenate((teacher_w1, new_w1), axis=3)
-    if init == 'random-pad':
-        student_w2 = np.concatenate((teacher_w2, new_w2), axis=2)
-    elif init == 'net2wider':
-        # add small noise to break symmetry, so that student model will have
-        # full capacity later
-        noise = np.random.normal(0, 5e-2 * new_w2.std(), size=new_w2.shape)
-        student_w2 = np.concatenate((teacher_w2, new_w2 + noise), axis=2)
-        student_w2[:, :, index, :] = new_w2
-    student_b1 = np.concatenate((teacher_b1, new_b1), axis=0)
-
-    return student_w1, student_b1, student_w2
-
-
-def wider2net_fc(teacher_w1, teacher_b1, teacher_w2, new_width, init):
-    '''Get initial weights for a wider fully connected (dense) layer
-       with a bigger nout, by 'random-padding' or 'net2wider'.
-
-    # Arguments
-        teacher_w1: `weight` of fc layer to become wider,
-          of shape (nin1, nout1)
-        teacher_b1: `bias` of fc layer to become wider,
-          of shape (nout1, )
-        teacher_w2: `weight` of next connected fc layer,
-          of shape (nin2, nout2)
-        new_width: new `nout` for the wider fc layer
-        init: initialization algorithm for new weights,
-          either 'random-pad' or 'net2wider'
-    '''
-    assert teacher_w1.shape[1] == teacher_w2.shape[0], (
-        'successive layers from teacher model should have compatible shapes')
-    assert teacher_w1.shape[1] == teacher_b1.shape[0], (
-        'weight and bias from same layer should have compatible shapes')
-    assert new_width > teacher_w1.shape[1], (
-        'new width (nout) should be bigger than the existing one')
-
-    n = new_width - teacher_w1.shape[1]
-    if init == 'random-pad':
-        new_w1 = np.random.normal(0, 0.1, size=(teacher_w1.shape[0], n))
-        new_b1 = np.ones(n) * 0.1
-        new_w2 = np.random.normal(0, 0.1, size=(n, teacher_w2.shape[1]))
-    elif init == 'net2wider':
-        index = np.random.randint(teacher_w1.shape[1], size=n)
-        factors = np.bincount(index)[index] + 1.
-        new_w1 = teacher_w1[:, index]
-        new_b1 = teacher_b1[index]
-        new_w2 = teacher_w2[index, :] / factors[:, np.newaxis]
-    else:
-        raise ValueError('Unsupported weight initializer: %s' % init)
-
-    student_w1 = np.concatenate((teacher_w1, new_w1), axis=1)
-    if init == 'random-pad':
-        student_w2 = np.concatenate((teacher_w2, new_w2), axis=0)
-    elif init == 'net2wider':
-        # add small noise to break symmetry, so that student model will have
-        # full capacity later
-        noise = np.random.normal(0, 5e-2 * new_w2.std(), size=new_w2.shape)
-        student_w2 = np.concatenate((teacher_w2, new_w2 + noise), axis=0)
-        student_w2[index, :] = new_w2
-    student_b1 = np.concatenate((teacher_b1, new_b1), axis=0)
-
-    return student_w1, student_b1, student_w2
-
-
-def deeper2net_conv2d(teacher_w):
-    '''Get initial weights for a deeper conv2d layer by net2deeper'.
-
-    # Arguments
-        teacher_w: `weight` of previous conv2d layer,
-          of shape (kh, kw, num_channel, filters)
-    '''
-    kh, kw, num_channel, filters = teacher_w.shape
-    student_w = np.zeros_like(teacher_w)
-    for i in range(filters):
-        student_w[(kh - 1) // 2, (kw - 1) // 2, i, i] = 1.
-    student_b = np.zeros(filters)
-    return student_w, student_b
-
-
-def copy_weights(teacher_model, student_model, layer_names):
-    '''Copy weights from teacher_model to student_model,
-     for layers with names listed in layer_names
-    '''
-    for name in layer_names:
-        weights = teacher_model.get_layer(name=name).get_weights()
-        student_model.get_layer(name=name).set_weights(weights)
-
-
-# methods to construct teacher_model and student_models
-def make_teacher_model(x_train, y_train,
-                       x_test, y_test,
-                       epochs):
-    '''Train and benchmark performance of a simple CNN.
-    (0) Teacher model
-    '''
-    model = Sequential()
-    model.add(Conv2D(64, 3, input_shape=input_shape,
-                     padding='same', name='conv1'))
-    model.add(MaxPooling2D(2, name='pool1'))
-    model.add(Conv2D(64, 3, padding='same', name='conv2'))
-    model.add(MaxPooling2D(2, name='pool2'))
-    model.add(Flatten(name='flatten'))
-    model.add(Dense(64, activation='relu', name='fc1'))
-    model.add(Dense(num_classes, activation='softmax', name='fc2'))
-    model.compile(loss='categorical_crossentropy',
-                  optimizer=SGD(learning_rate=0.01, momentum=0.9),
-                  metrics=['accuracy'])
-
-    model.fit(x_train, y_train,
-              epochs=epochs,
-              validation_data=(x_test, y_test))
-    return model
-
-
-def make_wider_student_model(teacher_model,
-                             x_train, y_train,
-                             x_test, y_test,
-                             init, epochs):
-    '''Train a wider student model based on teacher_model,
-       with either 'random-pad' (baseline) or 'net2wider'
-    '''
-    new_conv1_width = 128
-    new_fc1_width = 128
-
-    model = Sequential()
-    # a wider conv1 compared to teacher_model
-    model.add(Conv2D(new_conv1_width, 3, input_shape=input_shape,
-                     padding='same', name='conv1'))
-    model.add(MaxPooling2D(2, name='pool1'))
-    model.add(Conv2D(64, 3, padding='same', name='conv2'))
-    model.add(MaxPooling2D(2, name='pool2'))
-    model.add(Flatten(name='flatten'))
-    # a wider fc1 compared to teacher model
-    model.add(Dense(new_fc1_width, activation='relu', name='fc1'))
-    model.add(Dense(num_classes, activation='softmax', name='fc2'))
-
-    # The weights for other layers need to be copied from teacher_model
-    # to student_model, except for widened layers
-    # and their immediate downstreams, which will be initialized separately.
-    # For this example there are no other layers that need to be copied.
-
-    w_conv1, b_conv1 = teacher_model.get_layer('conv1').get_weights()
-    w_conv2, b_conv2 = teacher_model.get_layer('conv2').get_weights()
-    new_w_conv1, new_b_conv1, new_w_conv2 = wider2net_conv2d(
-        w_conv1, b_conv1, w_conv2, new_conv1_width, init)
-    model.get_layer('conv1').set_weights([new_w_conv1, new_b_conv1])
-    model.get_layer('conv2').set_weights([new_w_conv2, b_conv2])
-
-    w_fc1, b_fc1 = teacher_model.get_layer('fc1').get_weights()
-    w_fc2, b_fc2 = teacher_model.get_layer('fc2').get_weights()
-    new_w_fc1, new_b_fc1, new_w_fc2 = wider2net_fc(
-        w_fc1, b_fc1, w_fc2, new_fc1_width, init)
-    model.get_layer('fc1').set_weights([new_w_fc1, new_b_fc1])
-    model.get_layer('fc2').set_weights([new_w_fc2, b_fc2])
-
-    model.compile(loss='categorical_crossentropy',
-                  optimizer=SGD(learning_rate=0.001, momentum=0.9),
-                  metrics=['accuracy'])
-
-    model.fit(x_train, y_train,
-              epochs=epochs,
-              validation_data=(x_test, y_test))
-
-
-def make_deeper_student_model(teacher_model,
-                              x_train, y_train,
-                              x_test, y_test,
-                              init, epochs):
-    '''Train a deeper student model based on teacher_model,
-       with either 'random-init' (baseline) or 'net2deeper'
-    '''
-    model = Sequential()
-    model.add(Conv2D(64, 3, input_shape=input_shape,
-                     padding='same', name='conv1'))
-    model.add(MaxPooling2D(2, name='pool1'))
-    model.add(Conv2D(64, 3, padding='same', name='conv2'))
-    # add another conv2d layer to make original conv2 deeper
-    if init == 'net2deeper':
-        prev_w, _ = model.get_layer('conv2').get_weights()
-        new_weights = deeper2net_conv2d(prev_w)
-        model.add(Conv2D(64, 3, padding='same',
-                         name='conv2-deeper', weights=new_weights))
-    elif init == 'random-init':
-        model.add(Conv2D(64, 3, padding='same', name='conv2-deeper'))
-    else:
-        raise ValueError('Unsupported weight initializer: %s' % init)
-    model.add(MaxPooling2D(2, name='pool2'))
-    model.add(Flatten(name='flatten'))
-    model.add(Dense(64, activation='relu', name='fc1'))
-    # add another fc layer to make original fc1 deeper
-    if init == 'net2deeper':
-        # net2deeper for fc layer with relu, is just an identity initializer
-        model.add(Dense(64, kernel_initializer='identity',
-                        activation='relu', name='fc1-deeper'))
-    elif init == 'random-init':
-        model.add(Dense(64, activation='relu', name='fc1-deeper'))
-    else:
-        raise ValueError('Unsupported weight initializer: %s' % init)
-    model.add(Dense(num_classes, activation='softmax', name='fc2'))
-
-    # copy weights for other layers
-    copy_weights(teacher_model, model, layer_names=[
-                 'conv1', 'conv2', 'fc1', 'fc2'])
-
-    model.compile(loss='categorical_crossentropy',
-                  optimizer=SGD(learning_rate=0.001, momentum=0.9),
-                  metrics=['accuracy'])
-
-    model.fit(x_train, y_train,
-              epochs=epochs,
-              validation_data=(x_test, y_test))
-
-
-# experiments setup
-def net2wider_experiment():
-    '''Benchmark performances of
-    (1) a wider student model with `random_pad` initializer
-    (2) a wider student model with `Net2WiderNet` initializer
-    '''
-    print('\nExperiment of Net2WiderNet ...')
-
-    print('\n(1) building wider student model by random padding ...')
-    make_wider_student_model(teacher_model,
-                             x_train, y_train,
-                             x_test, y_test,
-                             init='random-pad',
-                             epochs=epochs)
-    print('\n(2) building wider student model by net2wider ...')
-    make_wider_student_model(teacher_model,
-                             x_train, y_train,
-                             x_test, y_test,
-                             init='net2wider',
-                             epochs=epochs)
-
-
-def net2deeper_experiment():
-    '''Benchmark performances of
-    (3) a deeper student model with `random_init` initializer
-    (4) a deeper student model with `Net2DeeperNet` initializer
-    '''
-    print('\nExperiment of Net2DeeperNet ...')
-
-    print('\n(3) building deeper student model by random init ...')
-    make_deeper_student_model(teacher_model,
-                              x_train, y_train,
-                              x_test, y_test,
-                              init='random-init',
-                              epochs=epochs)
-    print('\n(4) building deeper student model by net2deeper ...')
-    make_deeper_student_model(teacher_model,
-                              x_train, y_train,
-                              x_test, y_test,
-                              init='net2deeper',
-                              epochs=epochs)
-
-
-print('\n(0) building teacher model ...')
-teacher_model = make_teacher_model(x_train, y_train,
-                                   x_test, y_test,
-                                   epochs=epochs)
-
-# run the experiments
-net2wider_experiment()
-net2deeper_experiment()
diff --git a/examples/mnist_siamese.py b/examples/mnist_siamese.py
deleted file mode 100644
index 3771c3160dd..00000000000
--- a/examples/mnist_siamese.py
+++ /dev/null
@@ -1,143 +0,0 @@
-'''Trains a Siamese MLP on pairs of digits from the MNIST dataset.
-
-It follows Hadsell-et-al.'06 [1] by computing the Euclidean distance on the
-output of the shared network and by optimizing the contrastive loss (see paper
-for more details).
-
-# References
-
-- Dimensionality Reduction by Learning an Invariant Mapping
-    http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf
-
-Gets to 97.2% test accuracy after 20 epochs.
-2 seconds per epoch on a Titan X Maxwell GPU
-'''
-from __future__ import absolute_import
-from __future__ import print_function
-import numpy as np
-
-import random
-from keras.datasets import mnist
-from keras.models import Model
-from keras.layers import Input, Flatten, Dense, Dropout, Lambda
-from keras.optimizers import RMSprop
-from keras import backend as K
-
-num_classes = 10
-epochs = 20
-
-
-def euclidean_distance(vects):
-    x, y = vects
-    sum_square = K.sum(K.square(x - y), axis=1, keepdims=True)
-    return K.sqrt(K.maximum(sum_square, K.epsilon()))
-
-
-def eucl_dist_output_shape(shapes):
-    shape1, shape2 = shapes
-    return (shape1[0], 1)
-
-
-def contrastive_loss(y_true, y_pred):
-    '''Contrastive loss from Hadsell-et-al.'06
-    http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf
-    '''
-    margin = 1
-    square_pred = K.square(y_pred)
-    margin_square = K.square(K.maximum(margin - y_pred, 0))
-    return K.mean(y_true * square_pred + (1 - y_true) * margin_square)
-
-
-def create_pairs(x, digit_indices):
-    '''Positive and negative pair creation.
-    Alternates between positive and negative pairs.
-    '''
-    pairs = []
-    labels = []
-    n = min([len(digit_indices[d]) for d in range(num_classes)]) - 1
-    for d in range(num_classes):
-        for i in range(n):
-            z1, z2 = digit_indices[d][i], digit_indices[d][i + 1]
-            pairs += [[x[z1], x[z2]]]
-            inc = random.randrange(1, num_classes)
-            dn = (d + inc) % num_classes
-            z1, z2 = digit_indices[d][i], digit_indices[dn][i]
-            pairs += [[x[z1], x[z2]]]
-            labels += [1, 0]
-    return np.array(pairs), np.array(labels)
-
-
-def create_base_network(input_shape):
-    '''Base network to be shared (eq. to feature extraction).
-    '''
-    input = Input(shape=input_shape)
-    x = Flatten()(input)
-    x = Dense(128, activation='relu')(x)
-    x = Dropout(0.1)(x)
-    x = Dense(128, activation='relu')(x)
-    x = Dropout(0.1)(x)
-    x = Dense(128, activation='relu')(x)
-    return Model(input, x)
-
-
-def compute_accuracy(y_true, y_pred):
-    '''Compute classification accuracy with a fixed threshold on distances.
-    '''
-    pred = y_pred.ravel() < 0.5
-    return np.mean(pred == y_true)
-
-
-def accuracy(y_true, y_pred):
-    '''Compute classification accuracy with a fixed threshold on distances.
-    '''
-    return K.mean(K.equal(y_true, K.cast(y_pred < 0.5, y_true.dtype)))
-
-
-# the data, split between train and test sets
-(x_train, y_train), (x_test, y_test) = mnist.load_data()
-x_train = x_train.astype('float32')
-x_test = x_test.astype('float32')
-x_train /= 255
-x_test /= 255
-input_shape = x_train.shape[1:]
-
-# create training+test positive and negative pairs
-digit_indices = [np.where(y_train == i)[0] for i in range(num_classes)]
-tr_pairs, tr_y = create_pairs(x_train, digit_indices)
-
-digit_indices = [np.where(y_test == i)[0] for i in range(num_classes)]
-te_pairs, te_y = create_pairs(x_test, digit_indices)
-
-# network definition
-base_network = create_base_network(input_shape)
-
-input_a = Input(shape=input_shape)
-input_b = Input(shape=input_shape)
-
-# because we re-use the same instance `base_network`,
-# the weights of the network
-# will be shared across the two branches
-processed_a = base_network(input_a)
-processed_b = base_network(input_b)
-
-distance = Lambda(euclidean_distance,
-                  output_shape=eucl_dist_output_shape)([processed_a, processed_b])
-
-model = Model([input_a, input_b], distance)
-
-# train
-rms = RMSprop()
-model.compile(loss=contrastive_loss, optimizer=rms, metrics=[accuracy])
-model.fit([tr_pairs[:, 0], tr_pairs[:, 1]], tr_y,
-          batch_size=128,
-          epochs=epochs,
-          validation_data=([te_pairs[:, 0], te_pairs[:, 1]], te_y))
-
-# compute final accuracy on training and test sets
-y_pred = model.predict([tr_pairs[:, 0], tr_pairs[:, 1]])
-tr_acc = compute_accuracy(tr_y, y_pred)
-y_pred = model.predict([te_pairs[:, 0], te_pairs[:, 1]])
-te_acc = compute_accuracy(te_y, y_pred)
-
-print('* Accuracy on training set: %0.2f%%' % (100 * tr_acc))
-print('* Accuracy on test set: %0.2f%%' % (100 * te_acc))
diff --git a/examples/mnist_sklearn_wrapper.py b/examples/mnist_sklearn_wrapper.py
deleted file mode 100644
index 064c3e4db51..00000000000
--- a/examples/mnist_sklearn_wrapper.py
+++ /dev/null
@@ -1,102 +0,0 @@
-'''Example of how to use sklearn wrapper
-
-Builds simple CNN models on MNIST and uses sklearn's GridSearchCV to find best model
-'''
-
-from __future__ import print_function
-
-import keras
-from keras.datasets import mnist
-from keras.models import Sequential
-from keras.layers import Dense, Dropout, Activation, Flatten
-from keras.layers import Conv2D, MaxPooling2D
-from keras.wrappers.scikit_learn import KerasClassifier
-from keras import backend as K
-from sklearn.model_selection import GridSearchCV
-
-
-num_classes = 10
-
-# input image dimensions
-img_rows, img_cols = 28, 28
-
-# load training data and do basic data normalization
-(x_train, y_train), (x_test, y_test) = mnist.load_data()
-
-if K.image_data_format() == 'channels_first':
-    x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
-    x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
-    input_shape = (1, img_rows, img_cols)
-else:
-    x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
-    x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
-    input_shape = (img_rows, img_cols, 1)
-
-x_train = x_train.astype('float32')
-x_test = x_test.astype('float32')
-x_train /= 255
-x_test /= 255
-
-# convert class vectors to binary class matrices
-y_train = keras.utils.to_categorical(y_train, num_classes)
-y_test = keras.utils.to_categorical(y_test, num_classes)
-
-
-def make_model(dense_layer_sizes, filters, kernel_size, pool_size):
-    '''Creates model comprised of 2 convolutional layers followed by dense layers
-
-    dense_layer_sizes: List of layer sizes.
-        This list has one number for each layer
-    filters: Number of convolutional filters in each convolutional layer
-    kernel_size: Convolutional kernel size
-    pool_size: Size of pooling area for max pooling
-    '''
-
-    model = Sequential()
-    model.add(Conv2D(filters, kernel_size,
-                     padding='valid',
-                     input_shape=input_shape))
-    model.add(Activation('relu'))
-    model.add(Conv2D(filters, kernel_size))
-    model.add(Activation('relu'))
-    model.add(MaxPooling2D(pool_size=pool_size))
-    model.add(Dropout(0.25))
-
-    model.add(Flatten())
-    for layer_size in dense_layer_sizes:
-        model.add(Dense(layer_size))
-        model.add(Activation('relu'))
-    model.add(Dropout(0.5))
-    model.add(Dense(num_classes))
-    model.add(Activation('softmax'))
-
-    model.compile(loss='categorical_crossentropy',
-                  optimizer='adadelta',
-                  metrics=['accuracy'])
-    return model
-
-
-dense_size_candidates = [[32], [64], [32, 32], [64, 64]]
-my_classifier = KerasClassifier(make_model, batch_size=32)
-validator = GridSearchCV(my_classifier,
-                         param_grid={'dense_layer_sizes': dense_size_candidates,
-                                     # epochs is avail for tuning even when not
-                                     # an argument to model building function
-                                     'epochs': [3, 6],
-                                     'filters': [8],
-                                     'kernel_size': [3],
-                                     'pool_size': [2]},
-                         scoring='neg_log_loss',
-                         n_jobs=1)
-validator.fit(x_train, y_train)
-
-print('The parameters of the best model are: ')
-print(validator.best_params_)
-
-# validator.best_estimator_ returns sklearn-wrapped version of best model.
-# validator.best_estimator_.model returns the (unwrapped) keras model
-best_model = validator.best_estimator_.model
-metric_names = best_model.metrics_names
-metric_values = best_model.evaluate(x_test, y_test)
-for metric, value in zip(metric_names, metric_values):
-    print(metric, ': ', value)
diff --git a/examples/mnist_swwae.py b/examples/mnist_swwae.py
deleted file mode 100644
index c6dc56a51de..00000000000
--- a/examples/mnist_swwae.py
+++ /dev/null
@@ -1,190 +0,0 @@
-'''Trains a stacked what-where autoencoder built on residual blocks on the
-MNIST dataset. It exemplifies two influential methods that have been developed
-in the past few years.
-
-The first is the idea of properly 'unpooling.' During any max pool, the
-exact location (the 'where') of the maximal value in a pooled receptive field
-is lost, however it can be very useful in the overall reconstruction of an
-input image. Therefore, if the 'where' is handed from the encoder
-to the corresponding decoder layer, features being decoded can be 'placed' in
-the right location, allowing for reconstructions of much higher fidelity.
-
-# References
-
-- Visualizing and Understanding Convolutional Networks
-  Matthew D Zeiler, Rob Fergus
-  https://arxiv.org/abs/1311.2901v3
-- Stacked What-Where Auto-encoders
-  Junbo Zhao, Michael Mathieu, Ross Goroshin, Yann LeCun
-  https://arxiv.org/abs/1506.02351v8
-
-The second idea exploited here is that of residual learning. Residual blocks
-ease the training process by allowing skip connections that give the network
-the ability to be as linear (or non-linear) as the data sees fit.  This allows
-for much deep networks to be easily trained. The residual element seems to
-be advantageous in the context of this example as it allows a nice symmetry
-between the encoder and decoder. Normally, in the decoder, the final
-projection to the space where the image is reconstructed is linear, however
-this does not have to be the case for a residual block as the degree to which
-its output is linear or non-linear is determined by the data it is fed.
-However, in order to cap the reconstruction in this example, a hard softmax is
-applied as a bias because we know the MNIST digits are mapped to [0, 1].
-
-# References
-- Deep Residual Learning for Image Recognition
-  Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
-  https://arxiv.org/abs/1512.03385v1
-- Identity Mappings in Deep Residual Networks
-  Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
-  https://arxiv.org/abs/1603.05027v3
-'''
-from __future__ import print_function
-import numpy as np
-
-from keras.datasets import mnist
-from keras.models import Model
-from keras.layers import Activation
-from keras.layers import UpSampling2D, Conv2D, MaxPooling2D
-from keras.layers import Input, BatchNormalization, ELU
-import matplotlib.pyplot as plt
-import keras.backend as K
-from keras import layers
-
-
-def convresblock(x, nfeats=8, ksize=3, nskipped=2, elu=True):
-    """The proposed residual block from [4].
-
-    Running with elu=True will use ELU nonlinearity and running with
-    elu=False will use BatchNorm + RELU nonlinearity.  While ELU's are fast
-    due to the fact they do not suffer from BatchNorm overhead, they may
-    overfit because they do not offer the stochastic element of the batch
-    formation process of BatchNorm, which acts as a good regularizer.
-
-    # Arguments
-        x: 4D tensor, the tensor to feed through the block
-        nfeats: Integer, number of feature maps for conv layers.
-        ksize: Integer, width and height of conv kernels in first convolution.
-        nskipped: Integer, number of conv layers for the residual function.
-        elu: Boolean, whether to use ELU or BN+RELU.
-
-    # Input shape
-        4D tensor with shape:
-        `(batch, channels, rows, cols)`
-
-    # Output shape
-        4D tensor with shape:
-        `(batch, filters, rows, cols)`
-    """
-    y0 = Conv2D(nfeats, ksize, padding='same')(x)
-    y = y0
-    for i in range(nskipped):
-        if elu:
-            y = ELU()(y)
-        else:
-            y = BatchNormalization(axis=1)(y)
-            y = Activation('relu')(y)
-        y = Conv2D(nfeats, 1, padding='same')(y)
-    return layers.add([y0, y])
-
-
-def getwhere(x):
-    ''' Calculate the 'where' mask that contains switches indicating which
-    index contained the max value when MaxPool2D was applied.  Using the
-    gradient of the sum is a nice trick to keep everything high level.'''
-    y_prepool, y_postpool = x
-    return K.gradients(K.sum(y_postpool), y_prepool)
-
-# This example assume 'channels_first' data format.
-K.set_image_data_format('channels_first')
-
-# input image dimensions
-img_rows, img_cols = 28, 28
-
-# the data, split between train and test sets
-(x_train, _), (x_test, _) = mnist.load_data()
-
-x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
-x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
-x_train = x_train.astype('float32')
-x_test = x_test.astype('float32')
-x_train /= 255
-x_test /= 255
-print('x_train shape:', x_train.shape)
-print(x_train.shape[0], 'train samples')
-print(x_test.shape[0], 'test samples')
-
-# The size of the kernel used for the MaxPooling2D
-pool_size = 2
-# The total number of feature maps at each layer
-nfeats = [8, 16, 32, 64, 128]
-# The sizes of the pooling kernel at each layer
-pool_sizes = np.array([1, 1, 1, 1, 1]) * pool_size
-# The convolution kernel size
-ksize = 3
-# Number of epochs to train for
-epochs = 5
-# Batch size during training
-batch_size = 128
-
-if pool_size == 2:
-    # if using a 5 layer net of pool_size = 2
-    x_train = np.pad(x_train, [[0, 0], [0, 0], [2, 2], [2, 2]],
-                     mode='constant')
-    x_test = np.pad(x_test, [[0, 0], [0, 0], [2, 2], [2, 2]], mode='constant')
-    nlayers = 5
-elif pool_size == 3:
-    # if using a 3 layer net of pool_size = 3
-    x_train = x_train[:, :, :-1, :-1]
-    x_test = x_test[:, :, :-1, :-1]
-    nlayers = 3
-else:
-    import sys
-    sys.exit('Script supports pool_size of 2 and 3.')
-
-# Shape of input to train on (note that model is fully convolutional however)
-input_shape = x_train.shape[1:]
-# The final list of the size of axis=1 for all layers, including input
-nfeats_all = [input_shape[0]] + nfeats
-
-# First build the encoder, all the while keeping track of the 'where' masks
-img_input = Input(shape=input_shape)
-
-# We push the 'where' masks to the following list
-wheres = [None] * nlayers
-y = img_input
-for i in range(nlayers):
-    y_prepool = convresblock(y, nfeats=nfeats_all[i + 1], ksize=ksize)
-    y = MaxPooling2D(pool_size=(pool_sizes[i], pool_sizes[i]))(y_prepool)
-    wheres[i] = layers.Lambda(
-        getwhere, output_shape=lambda x: x[0])([y_prepool, y])
-
-# Now build the decoder, and use the stored 'where' masks to place the features
-for i in range(nlayers):
-    ind = nlayers - 1 - i
-    y = UpSampling2D(size=(pool_sizes[ind], pool_sizes[ind]))(y)
-    y = layers.multiply([y, wheres[ind]])
-    y = convresblock(y, nfeats=nfeats_all[ind], ksize=ksize)
-
-# Use hard_simgoid to clip range of reconstruction
-y = Activation('hard_sigmoid')(y)
-
-# Define the model and it's mean square error loss, and compile it with Adam
-model = Model(img_input, y)
-model.compile('adam', 'mse')
-
-# Fit the model
-model.fit(x_train, x_train,
-          batch_size=batch_size,
-          epochs=epochs,
-          validation_data=(x_test, x_test))
-
-# Plot
-x_recon = model.predict(x_test[:25])
-x_plot = np.concatenate((x_test[:25], x_recon), axis=1)
-x_plot = x_plot.reshape((5, 10, input_shape[-2], input_shape[-1]))
-x_plot = np.vstack([np.hstack(x) for x in x_plot])
-plt.figure()
-plt.axis('off')
-plt.title('Test Samples: Originals/Reconstructions')
-plt.imshow(x_plot, interpolation='none', cmap='gray')
-plt.savefig('reconstructions.png')
diff --git a/examples/mnist_transfer_cnn.py b/examples/mnist_transfer_cnn.py
deleted file mode 100644
index a763293ab5d..00000000000
--- a/examples/mnist_transfer_cnn.py
+++ /dev/null
@@ -1,124 +0,0 @@
-'''Transfer learning toy example.
-
-1 - Train a simple convnet on the MNIST dataset the first 5 digits [0..4].
-2 - Freeze convolutional layers and fine-tune dense layers
-   for the classification of digits [5..9].
-
-Get to 99.8% test accuracy after 5 epochs
-for the first five digits classifier
-and 99.2% for the last five digits after transfer + fine-tuning.
-'''
-
-from __future__ import print_function
-
-import datetime
-import keras
-from keras.datasets import mnist
-from keras.models import Sequential
-from keras.layers import Dense, Dropout, Activation, Flatten
-from keras.layers import Conv2D, MaxPooling2D
-from keras import backend as K
-
-now = datetime.datetime.now
-
-batch_size = 128
-num_classes = 5
-epochs = 5
-
-# input image dimensions
-img_rows, img_cols = 28, 28
-# number of convolutional filters to use
-filters = 32
-# size of pooling area for max pooling
-pool_size = 2
-# convolution kernel size
-kernel_size = 3
-
-if K.image_data_format() == 'channels_first':
-    input_shape = (1, img_rows, img_cols)
-else:
-    input_shape = (img_rows, img_cols, 1)
-
-
-def train_model(model, train, test, num_classes):
-    x_train = train[0].reshape((train[0].shape[0],) + input_shape)
-    x_test = test[0].reshape((test[0].shape[0],) + input_shape)
-    x_train = x_train.astype('float32')
-    x_test = x_test.astype('float32')
-    x_train /= 255
-    x_test /= 255
-    print('x_train shape:', x_train.shape)
-    print(x_train.shape[0], 'train samples')
-    print(x_test.shape[0], 'test samples')
-
-    # convert class vectors to binary class matrices
-    y_train = keras.utils.to_categorical(train[1], num_classes)
-    y_test = keras.utils.to_categorical(test[1], num_classes)
-
-    model.compile(loss='categorical_crossentropy',
-                  optimizer='adadelta',
-                  metrics=['accuracy'])
-
-    t = now()
-    model.fit(x_train, y_train,
-              batch_size=batch_size,
-              epochs=epochs,
-              verbose=1,
-              validation_data=(x_test, y_test))
-    print('Training time: %s' % (now() - t))
-    score = model.evaluate(x_test, y_test, verbose=0)
-    print('Test score:', score[0])
-    print('Test accuracy:', score[1])
-
-
-# the data, split between train and test sets
-(x_train, y_train), (x_test, y_test) = mnist.load_data()
-
-# create two datasets one with digits below 5 and one with 5 and above
-x_train_lt5 = x_train[y_train < 5]
-y_train_lt5 = y_train[y_train < 5]
-x_test_lt5 = x_test[y_test < 5]
-y_test_lt5 = y_test[y_test < 5]
-
-x_train_gte5 = x_train[y_train >= 5]
-y_train_gte5 = y_train[y_train >= 5] - 5
-x_test_gte5 = x_test[y_test >= 5]
-y_test_gte5 = y_test[y_test >= 5] - 5
-
-# define two groups of layers: feature (convolutions) and classification (dense)
-feature_layers = [
-    Conv2D(filters, kernel_size,
-           padding='valid',
-           input_shape=input_shape),
-    Activation('relu'),
-    Conv2D(filters, kernel_size),
-    Activation('relu'),
-    MaxPooling2D(pool_size=pool_size),
-    Dropout(0.25),
-    Flatten(),
-]
-
-classification_layers = [
-    Dense(128),
-    Activation('relu'),
-    Dropout(0.5),
-    Dense(num_classes),
-    Activation('softmax')
-]
-
-# create complete model
-model = Sequential(feature_layers + classification_layers)
-
-# train model for 5-digit classification [0..4]
-train_model(model,
-            (x_train_lt5, y_train_lt5),
-            (x_test_lt5, y_test_lt5), num_classes)
-
-# freeze feature layers and rebuild model
-for l in feature_layers:
-    l.trainable = False
-
-# transfer: train dense layers for new classification task [5..9]
-train_model(model,
-            (x_train_gte5, y_train_gte5),
-            (x_test_gte5, y_test_gte5), num_classes)
diff --git a/examples/neural_doodle.py b/examples/neural_doodle.py
deleted file mode 100644
index ec8837459d2..00000000000
--- a/examples/neural_doodle.py
+++ /dev/null
@@ -1,382 +0,0 @@
-'''Neural doodle with Keras
-
-# Script Usage
-
-## Arguments
-```
---nlabels:              # of regions (colors) in mask images
---style-image:          image to learn style from
---style-mask:           semantic labels for style image
---target-mask:          semantic labels for target image (your doodle)
---content-image:        optional image to learn content from
---target-image-prefix:  path prefix for generated target images
-```
-
-## Example 1: doodle using a style image, style mask
-and target mask.
-```
-python neural_doodle.py --nlabels 4 --style-image Monet/style.png \
---style-mask Monet/style_mask.png --target-mask Monet/target_mask.png \
---target-image-prefix generated/monet
-```
-
-## Example 2: doodle using a style image, style mask,
-target mask and an optional content image.
-```
-python neural_doodle.py --nlabels 4 --style-image Renoir/style.png \
---style-mask Renoir/style_mask.png --target-mask Renoir/target_mask.png \
---content-image Renoir/creek.jpg \
---target-image-prefix generated/renoir
-```
-
-# References
-
-- [Dmitry Ulyanov's blog on fast-neural-doodle]
-    (http://dmitryulyanov.github.io/feed-forward-neural-doodle/)
-- [Torch code for fast-neural-doodle]
-    (https://github.com/DmitryUlyanov/fast-neural-doodle)
-- [Torch code for online-neural-doodle]
-    (https://github.com/DmitryUlyanov/online-neural-doodle)
-- [Paper Texture Networks: Feed-forward Synthesis of Textures and Stylized Images]
-    (http://arxiv.org/abs/1603.03417)
-- [Discussion on parameter tuning]
-    (https://github.com/keras-team/keras/issues/3705)
-
-# Resources
-
-Example images can be downloaded from
-https://github.com/DmitryUlyanov/fast-neural-doodle/tree/master/data
-'''
-from __future__ import print_function
-import time
-import argparse
-import numpy as np
-from scipy.optimize import fmin_l_bfgs_b
-
-from keras import backend as K
-from keras.layers import Input, AveragePooling2D
-from keras.models import Model
-from keras.preprocessing.image import load_img, save_img, img_to_array
-from keras.applications import vgg19
-
-# Command line arguments
-parser = argparse.ArgumentParser(description='Keras neural doodle example')
-parser.add_argument('--nlabels', type=int,
-                    help='number of semantic labels'
-                    ' (regions in differnet colors)'
-                    ' in style_mask/target_mask')
-parser.add_argument('--style-image', type=str,
-                    help='path to image to learn style from')
-parser.add_argument('--style-mask', type=str,
-                    help='path to semantic mask of style image')
-parser.add_argument('--target-mask', type=str,
-                    help='path to semantic mask of target image')
-parser.add_argument('--content-image', type=str, default=None,
-                    help='path to optional content image')
-parser.add_argument('--target-image-prefix', type=str,
-                    help='path prefix for generated results')
-args = parser.parse_args()
-
-style_img_path = args.style_image
-style_mask_path = args.style_mask
-target_mask_path = args.target_mask
-content_img_path = args.content_image
-target_img_prefix = args.target_image_prefix
-use_content_img = content_img_path is not None
-
-num_labels = args.nlabels
-num_colors = 3  # RGB
-# determine image sizes based on target_mask
-ref_img = img_to_array(load_img(target_mask_path))
-img_nrows, img_ncols = ref_img.shape[:2]
-
-num_iterations = 50
-
-total_variation_weight = 50.
-style_weight = 1.
-content_weight = 0.1 if use_content_img else 0
-
-content_feature_layers = ['block5_conv2']
-# To get better generation qualities, use more conv layers for style features
-style_feature_layers = ['block1_conv1', 'block2_conv1', 'block3_conv1',
-                        'block4_conv1', 'block5_conv1']
-
-
-# helper functions for reading/processing images
-def preprocess_image(image_path):
-    img = load_img(image_path, target_size=(img_nrows, img_ncols))
-    img = img_to_array(img)
-    img = np.expand_dims(img, axis=0)
-    img = vgg19.preprocess_input(img)
-    return img
-
-
-def deprocess_image(x):
-    if K.image_data_format() == 'channels_first':
-        x = x.reshape((3, img_nrows, img_ncols))
-        x = x.transpose((1, 2, 0))
-    else:
-        x = x.reshape((img_nrows, img_ncols, 3))
-    # Remove zero-center by mean pixel
-    x[:, :, 0] += 103.939
-    x[:, :, 1] += 116.779
-    x[:, :, 2] += 123.68
-    # 'BGR'->'RGB'
-    x = x[:, :, ::-1]
-    x = np.clip(x, 0, 255).astype('uint8')
-    return x
-
-
-def kmeans(xs, k):
-    assert xs.ndim == 2
-    try:
-        from sklearn.cluster import k_means
-        _, labels, _ = k_means(xs.astype('float64'), k)
-    except ImportError:
-        from scipy.cluster.vq import kmeans2
-        _, labels = kmeans2(xs, k, missing='raise')
-    return labels
-
-
-def load_mask_labels():
-    '''Load both target and style masks.
-    A mask image (nr x nc) with m labels/colors will be loaded
-    as a 4D boolean tensor:
-        (1, m, nr, nc) for 'channels_first' or (1, nr, nc, m) for 'channels_last'
-    '''
-    target_mask_img = load_img(target_mask_path,
-                               target_size=(img_nrows, img_ncols))
-    target_mask_img = img_to_array(target_mask_img)
-    style_mask_img = load_img(style_mask_path,
-                              target_size=(img_nrows, img_ncols))
-    style_mask_img = img_to_array(style_mask_img)
-    if K.image_data_format() == 'channels_first':
-        mask_vecs = np.vstack([style_mask_img.reshape((3, -1)).T,
-                               target_mask_img.reshape((3, -1)).T])
-    else:
-        mask_vecs = np.vstack([style_mask_img.reshape((-1, 3)),
-                               target_mask_img.reshape((-1, 3))])
-
-    labels = kmeans(mask_vecs, num_labels)
-    style_mask_label = labels[:img_nrows *
-                              img_ncols].reshape((img_nrows, img_ncols))
-    target_mask_label = labels[img_nrows *
-                               img_ncols:].reshape((img_nrows, img_ncols))
-
-    stack_axis = 0 if K.image_data_format() == 'channels_first' else -1
-    style_mask = np.stack([style_mask_label == r for r in range(num_labels)],
-                          axis=stack_axis)
-    target_mask = np.stack([target_mask_label == r for r in range(num_labels)],
-                           axis=stack_axis)
-
-    return (np.expand_dims(style_mask, axis=0),
-            np.expand_dims(target_mask, axis=0))
-
-
-# Create tensor variables for images
-if K.image_data_format() == 'channels_first':
-    shape = (1, num_colors, img_nrows, img_ncols)
-else:
-    shape = (1, img_nrows, img_ncols, num_colors)
-
-style_image = K.variable(preprocess_image(style_img_path))
-target_image = K.placeholder(shape=shape)
-if use_content_img:
-    content_image = K.variable(preprocess_image(content_img_path))
-else:
-    content_image = K.zeros(shape=shape)
-
-images = K.concatenate([style_image, target_image, content_image], axis=0)
-
-# Create tensor variables for masks
-raw_style_mask, raw_target_mask = load_mask_labels()
-style_mask = K.variable(raw_style_mask.astype('float32'))
-target_mask = K.variable(raw_target_mask.astype('float32'))
-masks = K.concatenate([style_mask, target_mask], axis=0)
-
-# index constants for images and tasks variables
-STYLE, TARGET, CONTENT = 0, 1, 2
-
-# Build image model, mask model and use layer outputs as features
-# image model as VGG19
-image_model = vgg19.VGG19(include_top=False, input_tensor=images)
-
-# mask model as a series of pooling
-mask_input = Input(tensor=masks, shape=(None, None, None), name='mask_input')
-x = mask_input
-for layer in image_model.layers[1:]:
-    name = 'mask_%s' % layer.name
-    if 'conv' in layer.name:
-        x = AveragePooling2D((3, 3), padding='same', strides=(
-            1, 1), name=name)(x)
-    elif 'pool' in layer.name:
-        x = AveragePooling2D((2, 2), name=name)(x)
-mask_model = Model(mask_input, x)
-
-# Collect features from image_model and task_model
-image_features = {}
-mask_features = {}
-for img_layer, mask_layer in zip(image_model.layers, mask_model.layers):
-    if 'conv' in img_layer.name:
-        assert 'mask_' + img_layer.name == mask_layer.name
-        layer_name = img_layer.name
-        img_feat, mask_feat = img_layer.output, mask_layer.output
-        image_features[layer_name] = img_feat
-        mask_features[layer_name] = mask_feat
-
-
-# Define loss functions
-def gram_matrix(x):
-    assert K.ndim(x) == 3
-    features = K.batch_flatten(x)
-    gram = K.dot(features, K.transpose(features))
-    return gram
-
-
-def region_style_loss(style_image, target_image, style_mask, target_mask):
-    '''Calculate style loss between style_image and target_image,
-    for one common region specified by their (boolean) masks
-    '''
-    assert 3 == K.ndim(style_image) == K.ndim(target_image)
-    assert 2 == K.ndim(style_mask) == K.ndim(target_mask)
-    if K.image_data_format() == 'channels_first':
-        masked_style = style_image * style_mask
-        masked_target = target_image * target_mask
-        num_channels = K.shape(style_image)[0]
-    else:
-        masked_style = K.permute_dimensions(
-            style_image, (2, 0, 1)) * style_mask
-        masked_target = K.permute_dimensions(
-            target_image, (2, 0, 1)) * target_mask
-        num_channels = K.shape(style_image)[-1]
-    num_channels = K.cast(num_channels, dtype='float32')
-    s = gram_matrix(masked_style) / K.mean(style_mask) / num_channels
-    c = gram_matrix(masked_target) / K.mean(target_mask) / num_channels
-    return K.mean(K.square(s - c))
-
-
-def style_loss(style_image, target_image, style_masks, target_masks):
-    '''Calculate style loss between style_image and target_image,
-    in all regions.
-    '''
-    assert 3 == K.ndim(style_image) == K.ndim(target_image)
-    assert 3 == K.ndim(style_masks) == K.ndim(target_masks)
-    loss = K.variable(0)
-    for i in range(num_labels):
-        if K.image_data_format() == 'channels_first':
-            style_mask = style_masks[i, :, :]
-            target_mask = target_masks[i, :, :]
-        else:
-            style_mask = style_masks[:, :, i]
-            target_mask = target_masks[:, :, i]
-        loss = loss + region_style_loss(style_image,
-                                        target_image,
-                                        style_mask,
-                                        target_mask)
-    return loss
-
-
-def content_loss(content_image, target_image):
-    return K.sum(K.square(target_image - content_image))
-
-
-def total_variation_loss(x):
-    assert 4 == K.ndim(x)
-    if K.image_data_format() == 'channels_first':
-        a = K.square(x[:, :, :img_nrows - 1, :img_ncols - 1] -
-                     x[:, :, 1:, :img_ncols - 1])
-        b = K.square(x[:, :, :img_nrows - 1, :img_ncols - 1] -
-                     x[:, :, :img_nrows - 1, 1:])
-    else:
-        a = K.square(x[:, :img_nrows - 1, :img_ncols - 1, :] -
-                     x[:, 1:, :img_ncols - 1, :])
-        b = K.square(x[:, :img_nrows - 1, :img_ncols - 1, :] -
-                     x[:, :img_nrows - 1, 1:, :])
-    return K.sum(K.pow(a + b, 1.25))
-
-
-# Overall loss is the weighted sum of content_loss, style_loss and tv_loss
-# Each individual loss uses features from image/mask models.
-loss = K.variable(0)
-for layer in content_feature_layers:
-    content_feat = image_features[layer][CONTENT, :, :, :]
-    target_feat = image_features[layer][TARGET, :, :, :]
-    loss = loss + content_weight * content_loss(content_feat, target_feat)
-
-for layer in style_feature_layers:
-    style_feat = image_features[layer][STYLE, :, :, :]
-    target_feat = image_features[layer][TARGET, :, :, :]
-    style_masks = mask_features[layer][STYLE, :, :, :]
-    target_masks = mask_features[layer][TARGET, :, :, :]
-    sl = style_loss(style_feat, target_feat, style_masks, target_masks)
-    loss = loss + (style_weight / len(style_feature_layers)) * sl
-
-loss = loss + total_variation_weight * total_variation_loss(target_image)
-loss_grads = K.gradients(loss, target_image)
-
-# Evaluator class for computing efficiency
-outputs = [loss]
-if isinstance(loss_grads, (list, tuple)):
-    outputs += loss_grads
-else:
-    outputs.append(loss_grads)
-
-f_outputs = K.function([target_image], outputs)
-
-
-def eval_loss_and_grads(x):
-    if K.image_data_format() == 'channels_first':
-        x = x.reshape((1, 3, img_nrows, img_ncols))
-    else:
-        x = x.reshape((1, img_nrows, img_ncols, 3))
-    outs = f_outputs([x])
-    loss_value = outs[0]
-    if len(outs[1:]) == 1:
-        grad_values = outs[1].flatten().astype('float64')
-    else:
-        grad_values = np.array(outs[1:]).flatten().astype('float64')
-    return loss_value, grad_values
-
-
-class Evaluator(object):
-
-    def __init__(self):
-        self.loss_value = None
-        self.grads_values = None
-
-    def loss(self, x):
-        assert self.loss_value is None
-        loss_value, grad_values = eval_loss_and_grads(x)
-        self.loss_value = loss_value
-        self.grad_values = grad_values
-        return self.loss_value
-
-    def grads(self, x):
-        assert self.loss_value is not None
-        grad_values = np.copy(self.grad_values)
-        self.loss_value = None
-        self.grad_values = None
-        return grad_values
-
-
-evaluator = Evaluator()
-
-# Generate images by iterative optimization
-if K.image_data_format() == 'channels_first':
-    x = np.random.uniform(0, 255, (1, 3, img_nrows, img_ncols)) - 128.
-else:
-    x = np.random.uniform(0, 255, (1, img_nrows, img_ncols, 3)) - 128.
-
-for i in range(num_iterations):
-    print('Start of iteration', i, '/', num_iterations)
-    start_time = time.time()
-    x, min_val, info = fmin_l_bfgs_b(evaluator.loss, x.flatten(),
-                                     fprime=evaluator.grads, maxfun=20)
-    print('Current loss value:', min_val)
-    # save current generated image
-    img = deprocess_image(x.copy())
-    fname = target_img_prefix + '_at_iteration_%d.png' % i
-    save_img(fname, img)
-    end_time = time.time()
-    print('Image saved as', fname)
-    print('Iteration %d completed in %ds' % (i, end_time - start_time))
diff --git a/examples/neural_style_transfer.py b/examples/neural_style_transfer.py
deleted file mode 100644
index b64884e09ed..00000000000
--- a/examples/neural_style_transfer.py
+++ /dev/null
@@ -1,298 +0,0 @@
-'''Neural style transfer with Keras.
-
-Run the script with:
-```
-python neural_style_transfer.py path_to_your_base_image.jpg \
-    path_to_your_reference.jpg prefix_for_results
-```
-e.g.:
-```
-python neural_style_transfer.py img/tuebingen.jpg \
-    img/starry_night.jpg results/my_result
-```
-Optional parameters:
-```
---iter, To specify the number of iterations \
-    the style transfer takes place (Default is 10)
---content_weight, The weight given to the content loss (Default is 0.025)
---style_weight, The weight given to the style loss (Default is 1.0)
---tv_weight, The weight given to the total variation loss (Default is 1.0)
-```
-
-It is preferable to run this script on GPU, for speed.
-
-Example result: https://twitter.com/fchollet/status/686631033085677568
-
-# Details
-
-Style transfer consists in generating an image
-with the same "content" as a base image, but with the
-"style" of a different picture (typically artistic).
-
-This is achieved through the optimization of a loss function
-that has 3 components: "style loss", "content loss",
-and "total variation loss":
-
-- The total variation loss imposes local spatial continuity between
-the pixels of the combination image, giving it visual coherence.
-
-- The style loss is where the deep learning keeps in --that one is defined
-using a deep convolutional neural network. Precisely, it consists in a sum of
-L2 distances between the Gram matrices of the representations of
-the base image and the style reference image, extracted from
-different layers of a convnet (trained on ImageNet). The general idea
-is to capture color/texture information at different spatial
-scales (fairly large scales --defined by the depth of the layer considered).
-
- - The content loss is a L2 distance between the features of the base
-image (extracted from a deep layer) and the features of the combination image,
-keeping the generated image close enough to the original one.
-
-# References
-    - [A Neural Algorithm of Artistic Style](http://arxiv.org/abs/1508.06576)
-'''
-
-from __future__ import print_function
-from keras.preprocessing.image import load_img, save_img, img_to_array
-import numpy as np
-from scipy.optimize import fmin_l_bfgs_b
-import time
-import argparse
-
-from keras.applications import vgg19
-from keras import backend as K
-
-parser = argparse.ArgumentParser(description='Neural style transfer with Keras.')
-parser.add_argument('base_image_path', metavar='base', type=str,
-                    help='Path to the image to transform.')
-parser.add_argument('style_reference_image_path', metavar='ref', type=str,
-                    help='Path to the style reference image.')
-parser.add_argument('result_prefix', metavar='res_prefix', type=str,
-                    help='Prefix for the saved results.')
-parser.add_argument('--iter', type=int, default=10, required=False,
-                    help='Number of iterations to run.')
-parser.add_argument('--content_weight', type=float, default=0.025, required=False,
-                    help='Content weight.')
-parser.add_argument('--style_weight', type=float, default=1.0, required=False,
-                    help='Style weight.')
-parser.add_argument('--tv_weight', type=float, default=1.0, required=False,
-                    help='Total Variation weight.')
-
-args = parser.parse_args()
-base_image_path = args.base_image_path
-style_reference_image_path = args.style_reference_image_path
-result_prefix = args.result_prefix
-iterations = args.iter
-
-# these are the weights of the different loss components
-total_variation_weight = args.tv_weight
-style_weight = args.style_weight
-content_weight = args.content_weight
-
-# dimensions of the generated picture.
-width, height = load_img(base_image_path).size
-img_nrows = 400
-img_ncols = int(width * img_nrows / height)
-
-# util function to open, resize and format pictures into appropriate tensors
-
-
-def preprocess_image(image_path):
-    img = load_img(image_path, target_size=(img_nrows, img_ncols))
-    img = img_to_array(img)
-    img = np.expand_dims(img, axis=0)
-    img = vgg19.preprocess_input(img)
-    return img
-
-# util function to convert a tensor into a valid image
-
-
-def deprocess_image(x):
-    if K.image_data_format() == 'channels_first':
-        x = x.reshape((3, img_nrows, img_ncols))
-        x = x.transpose((1, 2, 0))
-    else:
-        x = x.reshape((img_nrows, img_ncols, 3))
-    # Remove zero-center by mean pixel
-    x[:, :, 0] += 103.939
-    x[:, :, 1] += 116.779
-    x[:, :, 2] += 123.68
-    # 'BGR'->'RGB'
-    x = x[:, :, ::-1]
-    x = np.clip(x, 0, 255).astype('uint8')
-    return x
-
-# get tensor representations of our images
-base_image = K.variable(preprocess_image(base_image_path))
-style_reference_image = K.variable(preprocess_image(style_reference_image_path))
-
-# this will contain our generated image
-if K.image_data_format() == 'channels_first':
-    combination_image = K.placeholder((1, 3, img_nrows, img_ncols))
-else:
-    combination_image = K.placeholder((1, img_nrows, img_ncols, 3))
-
-# combine the 3 images into a single Keras tensor
-input_tensor = K.concatenate([base_image,
-                              style_reference_image,
-                              combination_image], axis=0)
-
-# build the VGG19 network with our 3 images as input
-# the model will be loaded with pre-trained ImageNet weights
-model = vgg19.VGG19(input_tensor=input_tensor,
-                    weights='imagenet', include_top=False)
-print('Model loaded.')
-
-# get the symbolic outputs of each "key" layer (we gave them unique names).
-outputs_dict = dict([(layer.name, layer.output) for layer in model.layers])
-
-# compute the neural style loss
-# first we need to define 4 util functions
-
-# the gram matrix of an image tensor (feature-wise outer product)
-
-
-def gram_matrix(x):
-    assert K.ndim(x) == 3
-    if K.image_data_format() == 'channels_first':
-        features = K.batch_flatten(x)
-    else:
-        features = K.batch_flatten(K.permute_dimensions(x, (2, 0, 1)))
-    gram = K.dot(features, K.transpose(features))
-    return gram
-
-# the "style loss" is designed to maintain
-# the style of the reference image in the generated image.
-# It is based on the gram matrices (which capture style) of
-# feature maps from the style reference image
-# and from the generated image
-
-
-def style_loss(style, combination):
-    assert K.ndim(style) == 3
-    assert K.ndim(combination) == 3
-    S = gram_matrix(style)
-    C = gram_matrix(combination)
-    channels = 3
-    size = img_nrows * img_ncols
-    return K.sum(K.square(S - C)) / (4.0 * (channels ** 2) * (size ** 2))
-
-# an auxiliary loss function
-# designed to maintain the "content" of the
-# base image in the generated image
-
-
-def content_loss(base, combination):
-    return K.sum(K.square(combination - base))
-
-# the 3rd loss function, total variation loss,
-# designed to keep the generated image locally coherent
-
-
-def total_variation_loss(x):
-    assert K.ndim(x) == 4
-    if K.image_data_format() == 'channels_first':
-        a = K.square(
-            x[:, :, :img_nrows - 1, :img_ncols - 1] - x[:, :, 1:, :img_ncols - 1])
-        b = K.square(
-            x[:, :, :img_nrows - 1, :img_ncols - 1] - x[:, :, :img_nrows - 1, 1:])
-    else:
-        a = K.square(
-            x[:, :img_nrows - 1, :img_ncols - 1, :] - x[:, 1:, :img_ncols - 1, :])
-        b = K.square(
-            x[:, :img_nrows - 1, :img_ncols - 1, :] - x[:, :img_nrows - 1, 1:, :])
-    return K.sum(K.pow(a + b, 1.25))
-
-
-# combine these loss functions into a single scalar
-loss = K.variable(0.0)
-layer_features = outputs_dict['block5_conv2']
-base_image_features = layer_features[0, :, :, :]
-combination_features = layer_features[2, :, :, :]
-loss = loss + content_weight * content_loss(base_image_features,
-                                            combination_features)
-
-feature_layers = ['block1_conv1', 'block2_conv1',
-                  'block3_conv1', 'block4_conv1',
-                  'block5_conv1']
-for layer_name in feature_layers:
-    layer_features = outputs_dict[layer_name]
-    style_reference_features = layer_features[1, :, :, :]
-    combination_features = layer_features[2, :, :, :]
-    sl = style_loss(style_reference_features, combination_features)
-    loss = loss + (style_weight / len(feature_layers)) * sl
-loss = loss + total_variation_weight * total_variation_loss(combination_image)
-
-# get the gradients of the generated image wrt the loss
-grads = K.gradients(loss, combination_image)
-
-outputs = [loss]
-if isinstance(grads, (list, tuple)):
-    outputs += grads
-else:
-    outputs.append(grads)
-
-f_outputs = K.function([combination_image], outputs)
-
-
-def eval_loss_and_grads(x):
-    if K.image_data_format() == 'channels_first':
-        x = x.reshape((1, 3, img_nrows, img_ncols))
-    else:
-        x = x.reshape((1, img_nrows, img_ncols, 3))
-    outs = f_outputs([x])
-    loss_value = outs[0]
-    if len(outs[1:]) == 1:
-        grad_values = outs[1].flatten().astype('float64')
-    else:
-        grad_values = np.array(outs[1:]).flatten().astype('float64')
-    return loss_value, grad_values
-
-# this Evaluator class makes it possible
-# to compute loss and gradients in one pass
-# while retrieving them via two separate functions,
-# "loss" and "grads". This is done because scipy.optimize
-# requires separate functions for loss and gradients,
-# but computing them separately would be inefficient.
-
-
-class Evaluator(object):
-
-    def __init__(self):
-        self.loss_value = None
-        self.grads_values = None
-
-    def loss(self, x):
-        assert self.loss_value is None
-        loss_value, grad_values = eval_loss_and_grads(x)
-        self.loss_value = loss_value
-        self.grad_values = grad_values
-        return self.loss_value
-
-    def grads(self, x):
-        assert self.loss_value is not None
-        grad_values = np.copy(self.grad_values)
-        self.loss_value = None
-        self.grad_values = None
-        return grad_values
-
-
-evaluator = Evaluator()
-
-# run scipy-based optimization (L-BFGS) over the pixels of the generated image
-# so as to minimize the neural style loss
-x = preprocess_image(base_image_path)
-
-for i in range(iterations):
-    print('Start of iteration', i)
-    start_time = time.time()
-    x, min_val, info = fmin_l_bfgs_b(evaluator.loss, x.flatten(),
-                                     fprime=evaluator.grads, maxfun=20)
-    print('Current loss value:', min_val)
-    # save current generated image
-    img = deprocess_image(x.copy())
-    fname = result_prefix + '_at_iteration_%d.png' % i
-    save_img(fname, img)
-    end_time = time.time()
-    print('Image saved as', fname)
-    print('Iteration %d completed in %ds' % (i, end_time - start_time))
diff --git a/examples/pretrained_word_embeddings.py b/examples/pretrained_word_embeddings.py
deleted file mode 100644
index 069024c3fdb..00000000000
--- a/examples/pretrained_word_embeddings.py
+++ /dev/null
@@ -1,144 +0,0 @@
-'''This script loads pre-trained word embeddings (GloVe embeddings)
-into a frozen Keras Embedding layer, and uses it to
-train a text classification model on the 20 Newsgroup dataset
-(classification of newsgroup messages into 20 different categories).
-
-GloVe embedding data can be found at:
-http://nlp.stanford.edu/data/glove.6B.zip
-(source page: http://nlp.stanford.edu/projects/glove/)
-
-20 Newsgroup data can be found at:
-http://www.cs.cmu.edu/afs/cs.cmu.edu/project/theo-20/www/data/news20.html
-'''
-
-from __future__ import print_function
-
-import os
-import sys
-import numpy as np
-from keras.preprocessing.text import Tokenizer
-from keras.preprocessing.sequence import pad_sequences
-from keras.utils import to_categorical
-from keras.layers import Dense, Input, GlobalMaxPooling1D
-from keras.layers import Conv1D, MaxPooling1D, Embedding
-from keras.models import Model
-from keras.initializers import Constant
-
-
-BASE_DIR = ''
-GLOVE_DIR = os.path.join(BASE_DIR, 'glove.6B')
-TEXT_DATA_DIR = os.path.join(BASE_DIR, '20_newsgroup')
-MAX_SEQUENCE_LENGTH = 1000
-MAX_NUM_WORDS = 20000
-EMBEDDING_DIM = 100
-VALIDATION_SPLIT = 0.2
-
-# first, build index mapping words in the embeddings set
-# to their embedding vector
-
-print('Indexing word vectors.')
-
-embeddings_index = {}
-with open(os.path.join(GLOVE_DIR, 'glove.6B.100d.txt')) as f:
-    for line in f:
-        word, coefs = line.split(maxsplit=1)
-        coefs = np.fromstring(coefs, 'f', sep=' ')
-        embeddings_index[word] = coefs
-
-print('Found %s word vectors.' % len(embeddings_index))
-
-# second, prepare text samples and their labels
-print('Processing text dataset')
-
-texts = []  # list of text samples
-labels_index = {}  # dictionary mapping label name to numeric id
-labels = []  # list of label ids
-for name in sorted(os.listdir(TEXT_DATA_DIR)):
-    path = os.path.join(TEXT_DATA_DIR, name)
-    if os.path.isdir(path):
-        label_id = len(labels_index)
-        labels_index[name] = label_id
-        for fname in sorted(os.listdir(path)):
-            if fname.isdigit():
-                fpath = os.path.join(path, fname)
-                args = {} if sys.version_info < (3,) else {'encoding': 'latin-1'}
-                with open(fpath, **args) as f:
-                    t = f.read()
-                    i = t.find('\n\n')  # skip header
-                    if 0 < i:
-                        t = t[i:]
-                    texts.append(t)
-                labels.append(label_id)
-
-print('Found %s texts.' % len(texts))
-
-# finally, vectorize the text samples into a 2D integer tensor
-tokenizer = Tokenizer(num_words=MAX_NUM_WORDS)
-tokenizer.fit_on_texts(texts)
-sequences = tokenizer.texts_to_sequences(texts)
-
-word_index = tokenizer.word_index
-print('Found %s unique tokens.' % len(word_index))
-
-data = pad_sequences(sequences, maxlen=MAX_SEQUENCE_LENGTH)
-
-labels = to_categorical(np.asarray(labels))
-print('Shape of data tensor:', data.shape)
-print('Shape of label tensor:', labels.shape)
-
-# split the data into a training set and a validation set
-indices = np.arange(data.shape[0])
-np.random.shuffle(indices)
-data = data[indices]
-labels = labels[indices]
-num_validation_samples = int(VALIDATION_SPLIT * data.shape[0])
-
-x_train = data[:-num_validation_samples]
-y_train = labels[:-num_validation_samples]
-x_val = data[-num_validation_samples:]
-y_val = labels[-num_validation_samples:]
-
-print('Preparing embedding matrix.')
-
-# prepare embedding matrix
-num_words = min(MAX_NUM_WORDS, len(word_index) + 1)
-embedding_matrix = np.zeros((num_words, EMBEDDING_DIM))
-for word, i in word_index.items():
-    if i >= MAX_NUM_WORDS:
-        continue
-    embedding_vector = embeddings_index.get(word)
-    if embedding_vector is not None:
-        # words not found in embedding index will be all-zeros.
-        embedding_matrix[i] = embedding_vector
-
-# load pre-trained word embeddings into an Embedding layer
-# note that we set trainable = False so as to keep the embeddings fixed
-embedding_layer = Embedding(num_words,
-                            EMBEDDING_DIM,
-                            embeddings_initializer=Constant(embedding_matrix),
-                            input_length=MAX_SEQUENCE_LENGTH,
-                            trainable=False)
-
-print('Training model.')
-
-# train a 1D convnet with global maxpooling
-sequence_input = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32')
-embedded_sequences = embedding_layer(sequence_input)
-x = Conv1D(128, 5, activation='relu')(embedded_sequences)
-x = MaxPooling1D(5)(x)
-x = Conv1D(128, 5, activation='relu')(x)
-x = MaxPooling1D(5)(x)
-x = Conv1D(128, 5, activation='relu')(x)
-x = GlobalMaxPooling1D()(x)
-x = Dense(128, activation='relu')(x)
-preds = Dense(len(labels_index), activation='softmax')(x)
-
-model = Model(sequence_input, preds)
-model.compile(loss='categorical_crossentropy',
-              optimizer='rmsprop',
-              metrics=['acc'])
-
-model.fit(x_train, y_train,
-          batch_size=128,
-          epochs=10,
-          validation_data=(x_val, y_val))
diff --git a/examples/reuters_mlp.py b/examples/reuters_mlp.py
deleted file mode 100644
index e1a141648ed..00000000000
--- a/examples/reuters_mlp.py
+++ /dev/null
@@ -1,60 +0,0 @@
-'''Trains and evaluate a simple MLP
-on the Reuters newswire topic classification task.
-'''
-from __future__ import print_function
-
-import numpy as np
-import keras
-from keras.datasets import reuters
-from keras.models import Sequential
-from keras.layers import Dense, Dropout, Activation
-from keras.preprocessing.text import Tokenizer
-
-max_words = 1000
-batch_size = 32
-epochs = 5
-
-print('Loading data...')
-(x_train, y_train), (x_test, y_test) = reuters.load_data(num_words=max_words,
-                                                         test_split=0.2)
-print(len(x_train), 'train sequences')
-print(len(x_test), 'test sequences')
-
-num_classes = np.max(y_train) + 1
-print(num_classes, 'classes')
-
-print('Vectorizing sequence data...')
-tokenizer = Tokenizer(num_words=max_words)
-x_train = tokenizer.sequences_to_matrix(x_train, mode='binary')
-x_test = tokenizer.sequences_to_matrix(x_test, mode='binary')
-print('x_train shape:', x_train.shape)
-print('x_test shape:', x_test.shape)
-
-print('Convert class vector to binary class matrix '
-      '(for use with categorical_crossentropy)')
-y_train = keras.utils.to_categorical(y_train, num_classes)
-y_test = keras.utils.to_categorical(y_test, num_classes)
-print('y_train shape:', y_train.shape)
-print('y_test shape:', y_test.shape)
-
-print('Building model...')
-model = Sequential()
-model.add(Dense(512, input_shape=(max_words,)))
-model.add(Activation('relu'))
-model.add(Dropout(0.5))
-model.add(Dense(num_classes))
-model.add(Activation('softmax'))
-
-model.compile(loss='categorical_crossentropy',
-              optimizer='adam',
-              metrics=['accuracy'])
-
-history = model.fit(x_train, y_train,
-                    batch_size=batch_size,
-                    epochs=epochs,
-                    verbose=1,
-                    validation_split=0.1)
-score = model.evaluate(x_test, y_test,
-                       batch_size=batch_size, verbose=1)
-print('Test score:', score[0])
-print('Test accuracy:', score[1])
diff --git a/examples/reuters_mlp_relu_vs_selu.py b/examples/reuters_mlp_relu_vs_selu.py
deleted file mode 100644
index 351ca0b02ef..00000000000
--- a/examples/reuters_mlp_relu_vs_selu.py
+++ /dev/null
@@ -1,175 +0,0 @@
-'''Compares self-normalizing MLPs with regular MLPs.
-
-Compares the performance of a simple MLP using two
-different activation functions: RELU and SELU
-on the Reuters newswire topic classification task.
-
-# Reference
-
-- Klambauer, G., Unterthiner, T., Mayr, A., & Hochreiter, S. (2017).
-  Self-Normalizing Neural Networks. arXiv preprint arXiv:1706.02515.
-  https://arxiv.org/abs/1706.02515
-'''
-from __future__ import print_function
-
-import numpy as np
-import matplotlib.pyplot as plt
-import keras
-from keras.datasets import reuters
-from keras.models import Sequential
-from keras.layers import Dense, Activation, Dropout
-from keras.layers.noise import AlphaDropout
-from keras.preprocessing.text import Tokenizer
-
-max_words = 1000
-batch_size = 16
-epochs = 40
-plot = True
-
-
-def create_network(n_dense=6,
-                   dense_units=16,
-                   activation='selu',
-                   dropout=AlphaDropout,
-                   dropout_rate=0.1,
-                   kernel_initializer='lecun_normal',
-                   optimizer='adam',
-                   num_classes=1,
-                   max_words=max_words):
-    """Generic function to create a fully-connected neural network.
-
-    # Arguments
-        n_dense: int > 0. Number of dense layers.
-        dense_units: int > 0. Number of dense units per layer.
-        dropout: keras.layers.Layer. A dropout layer to apply.
-        dropout_rate: 0 <= float <= 1. The rate of dropout.
-        kernel_initializer: str. The initializer for the weights.
-        optimizer: str/keras.optimizers.Optimizer. The optimizer to use.
-        num_classes: int > 0. The number of classes to predict.
-        max_words: int > 0. The maximum number of words per data point.
-
-    # Returns
-        A Keras model instance (compiled).
-    """
-    model = Sequential()
-    model.add(Dense(dense_units, input_shape=(max_words,),
-                    kernel_initializer=kernel_initializer))
-    model.add(Activation(activation))
-    model.add(dropout(dropout_rate))
-
-    for i in range(n_dense - 1):
-        model.add(Dense(dense_units, kernel_initializer=kernel_initializer))
-        model.add(Activation(activation))
-        model.add(dropout(dropout_rate))
-
-    model.add(Dense(num_classes))
-    model.add(Activation('softmax'))
-    model.compile(loss='categorical_crossentropy',
-                  optimizer=optimizer,
-                  metrics=['accuracy'])
-    return model
-
-
-network1 = {
-    'n_dense': 6,
-    'dense_units': 16,
-    'activation': 'relu',
-    'dropout': Dropout,
-    'dropout_rate': 0.5,
-    'kernel_initializer': 'glorot_uniform',
-    'optimizer': 'sgd'
-}
-
-network2 = {
-    'n_dense': 6,
-    'dense_units': 16,
-    'activation': 'selu',
-    'dropout': AlphaDropout,
-    'dropout_rate': 0.1,
-    'kernel_initializer': 'lecun_normal',
-    'optimizer': 'sgd'
-}
-
-print('Loading data...')
-(x_train, y_train), (x_test, y_test) = reuters.load_data(num_words=max_words,
-                                                         test_split=0.2)
-print(len(x_train), 'train sequences')
-print(len(x_test), 'test sequences')
-
-num_classes = np.max(y_train) + 1
-print(num_classes, 'classes')
-
-print('Vectorizing sequence data...')
-tokenizer = Tokenizer(num_words=max_words)
-x_train = tokenizer.sequences_to_matrix(x_train, mode='binary')
-x_test = tokenizer.sequences_to_matrix(x_test, mode='binary')
-print('x_train shape:', x_train.shape)
-print('x_test shape:', x_test.shape)
-
-print('Convert class vector to binary class matrix '
-      '(for use with categorical_crossentropy)')
-y_train = keras.utils.to_categorical(y_train, num_classes)
-y_test = keras.utils.to_categorical(y_test, num_classes)
-print('y_train shape:', y_train.shape)
-print('y_test shape:', y_test.shape)
-
-print('\nBuilding network 1...')
-
-model1 = create_network(num_classes=num_classes, **network1)
-history_model1 = model1.fit(x_train,
-                            y_train,
-                            batch_size=batch_size,
-                            epochs=epochs,
-                            verbose=1,
-                            validation_split=0.1)
-
-score_model1 = model1.evaluate(x_test,
-                               y_test,
-                               batch_size=batch_size,
-                               verbose=1)
-
-
-print('\nBuilding network 2...')
-model2 = create_network(num_classes=num_classes, **network2)
-
-history_model2 = model2.fit(x_train,
-                            y_train,
-                            batch_size=batch_size,
-                            epochs=epochs,
-                            verbose=1,
-                            validation_split=0.1)
-
-score_model2 = model2.evaluate(x_test,
-                               y_test,
-                               batch_size=batch_size,
-                               verbose=1)
-
-print('\nNetwork 1 results')
-print('Hyperparameters:', network1)
-print('Test score:', score_model1[0])
-print('Test accuracy:', score_model1[1])
-print('Network 2 results')
-print('Hyperparameters:', network2)
-print('Test score:', score_model2[0])
-print('Test accuracy:', score_model2[1])
-
-plt.plot(range(epochs),
-         history_model1.history['val_loss'],
-         'g-',
-         label='Network 1 Val Loss')
-plt.plot(range(epochs),
-         history_model2.history['val_loss'],
-         'r-',
-         label='Network 2 Val Loss')
-plt.plot(range(epochs),
-         history_model1.history['loss'],
-         'g--',
-         label='Network 1 Loss')
-plt.plot(range(epochs),
-         history_model2.history['loss'],
-         'r--',
-         label='Network 2 Loss')
-plt.xlabel('Epochs')
-plt.ylabel('Loss')
-plt.legend()
-plt.savefig('comparison_of_networks.png')
diff --git a/examples/variational_autoencoder.py b/examples/variational_autoencoder.py
deleted file mode 100644
index 75171af0d06..00000000000
--- a/examples/variational_autoencoder.py
+++ /dev/null
@@ -1,204 +0,0 @@
-'''Example of VAE on MNIST dataset using MLP
-
-The VAE has a modular design. The encoder, decoder and VAE
-are 3 models that share weights. After training the VAE model,
-the encoder can be used to generate latent vectors.
-The decoder can be used to generate MNIST digits by sampling the
-latent vector from a Gaussian distribution with mean = 0 and std = 1.
-
-# Reference
-
-[1] Kingma, Diederik P., and Max Welling.
-"Auto-Encoding Variational Bayes."
-https://arxiv.org/abs/1312.6114
-'''
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import keras
-from keras.layers import Lambda, Input, Dense
-from keras.models import Model
-from keras.datasets import mnist
-from keras.losses import mse, binary_crossentropy
-from keras.utils import plot_model
-from keras import backend as K
-
-import numpy as np
-import matplotlib.pyplot as plt
-import argparse
-import os
-
-
-# reparameterization trick
-# instead of sampling from Q(z|X), sample epsilon = N(0,I)
-# z = z_mean + sqrt(var) * epsilon
-def sampling(args):
-    """Reparameterization trick by sampling from an isotropic unit Gaussian.
-
-    # Arguments
-        args (tensor): mean and log of variance of Q(z|X)
-
-    # Returns
-        z (tensor): sampled latent vector
-    """
-
-    z_mean, z_log_var = args
-    batch = K.shape(z_mean)[0]
-    dim = K.int_shape(z_mean)[1]
-    # by default, random_normal has mean = 0 and std = 1.0
-    epsilon = K.random_normal(shape=(batch, dim))
-    return z_mean + K.exp(0.5 * z_log_var) * epsilon
-
-
-def plot_results(models,
-                 data,
-                 batch_size=128,
-                 model_name="vae_mnist"):
-    """Plots labels and MNIST digits as a function of the 2D latent vector
-
-    # Arguments
-        models (tuple): encoder and decoder models
-        data (tuple): test data and label
-        batch_size (int): prediction batch size
-        model_name (string): which model is using this function
-    """
-
-    encoder, decoder = models
-    x_test, y_test = data
-    os.makedirs(model_name, exist_ok=True)
-
-    filename = os.path.join(model_name, "vae_mean.png")
-    # display a 2D plot of the digit classes in the latent space
-    z_mean, _, _ = encoder.predict(x_test,
-                                   batch_size=batch_size)
-    plt.figure(figsize=(12, 10))
-    plt.scatter(z_mean[:, 0], z_mean[:, 1], c=y_test)
-    plt.colorbar()
-    plt.xlabel("z[0]")
-    plt.ylabel("z[1]")
-    plt.savefig(filename)
-    plt.show()
-
-    filename = os.path.join(model_name, "digits_over_latent.png")
-    # display a 30x30 2D manifold of digits
-    n = 30
-    digit_size = 28
-    figure = np.zeros((digit_size * n, digit_size * n))
-    # linearly spaced coordinates corresponding to the 2D plot
-    # of digit classes in the latent space
-    grid_x = np.linspace(-4, 4, n)
-    grid_y = np.linspace(-4, 4, n)[::-1]
-
-    for i, yi in enumerate(grid_y):
-        for j, xi in enumerate(grid_x):
-            z_sample = np.array([[xi, yi]])
-            x_decoded = decoder.predict(z_sample)
-            digit = x_decoded[0].reshape(digit_size, digit_size)
-            figure[i * digit_size: (i + 1) * digit_size,
-                   j * digit_size: (j + 1) * digit_size] = digit
-
-    plt.figure(figsize=(10, 10))
-    start_range = digit_size // 2
-    end_range = (n - 1) * digit_size + start_range + 1
-    pixel_range = np.arange(start_range, end_range, digit_size)
-    sample_range_x = np.round(grid_x, 1)
-    sample_range_y = np.round(grid_y, 1)
-    plt.xticks(pixel_range, sample_range_x)
-    plt.yticks(pixel_range, sample_range_y)
-    plt.xlabel("z[0]")
-    plt.ylabel("z[1]")
-    plt.imshow(figure, cmap='Greys_r')
-    plt.savefig(filename)
-    plt.show()
-
-
-# MNIST dataset
-(x_train, y_train), (x_test, y_test) = mnist.load_data()
-
-image_size = x_train.shape[1]
-original_dim = image_size * image_size
-x_train = np.reshape(x_train, [-1, original_dim])
-x_test = np.reshape(x_test, [-1, original_dim])
-x_train = x_train.astype('float32') / 255
-x_test = x_test.astype('float32') / 255
-
-# network parameters
-input_shape = (original_dim, )
-intermediate_dim = 512
-batch_size = 128
-latent_dim = 2
-epochs = 50
-
-# VAE model = encoder + decoder
-# build encoder model
-inputs = Input(shape=input_shape, name='encoder_input')
-x = Dense(intermediate_dim, activation='relu')(inputs)
-z_mean = Dense(latent_dim, name='z_mean')(x)
-z_log_var = Dense(latent_dim, name='z_log_var')(x)
-
-# use reparameterization trick to push the sampling out as input
-# note that "output_shape" isn't necessary with the TensorFlow backend
-z = Lambda(sampling, output_shape=(latent_dim,), name='z')([z_mean, z_log_var])
-
-# instantiate encoder model
-encoder = Model(inputs, [z_mean, z_log_var, z], name='encoder')
-encoder.summary()
-plot_model(encoder, to_file='vae_mlp_encoder.png', show_shapes=True)
-
-# build decoder model
-latent_inputs = Input(shape=(latent_dim,), name='z_sampling')
-x = Dense(intermediate_dim, activation='relu')(latent_inputs)
-outputs = Dense(original_dim, activation='sigmoid')(x)
-
-# instantiate decoder model
-decoder = Model(latent_inputs, outputs, name='decoder')
-decoder.summary()
-plot_model(decoder, to_file='vae_mlp_decoder.png', show_shapes=True)
-
-# instantiate VAE model
-outputs = decoder(encoder(inputs)[2])
-vae = Model(inputs, outputs, name='vae_mlp')
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser()
-    help_ = "Load h5 model trained weights"
-    parser.add_argument("-w", "--weights", help=help_)
-    help_ = "Use mse loss instead of binary cross entropy (default)"
-    parser.add_argument("-m",
-                        "--mse",
-                        help=help_, action='store_true')
-    args = parser.parse_args()
-    models = (encoder, decoder)
-    data = (x_test, y_test)
-
-    # VAE loss = mse_loss or xent_loss + kl_loss
-    if args.mse:
-        reconstruction_loss = mse(inputs, outputs)
-    else:
-        reconstruction_loss = binary_crossentropy(inputs,
-                                                  outputs)
-
-    reconstruction_loss *= original_dim
-    kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)
-    kl_loss = K.sum(kl_loss, axis=-1)
-    kl_loss *= -0.5
-    vae_loss = K.mean(reconstruction_loss + kl_loss)
-    vae.add_loss(vae_loss)
-    vae.compile(optimizer='adam')
-
-    if args.weights:
-        vae.load_weights(args.weights)
-    else:
-        # train the autoencoder
-        vae.fit(x_train,
-                epochs=epochs,
-                batch_size=batch_size,
-                validation_data=(x_test, None))
-        vae.save_weights('vae_mlp_mnist.h5')
-
-    plot_results(models,
-                 data,
-                 batch_size=batch_size,
-                 model_name="vae_mlp")
diff --git a/examples/variational_autoencoder_deconv.py b/examples/variational_autoencoder_deconv.py
deleted file mode 100644
index c0707bbc1b0..00000000000
--- a/examples/variational_autoencoder_deconv.py
+++ /dev/null
@@ -1,230 +0,0 @@
-'''Example of VAE on MNIST dataset using CNN
-
-The VAE has a modular design. The encoder, decoder and VAE
-are 3 models that share weights. After training the VAE model,
-the encoder can be used to  generate latent vectors.
-The decoder can be used to generate MNIST digits by sampling the
-latent vector from a Gaussian distribution with mean=0 and std=1.
-
-# Reference
-
-[1] Kingma, Diederik P., and Max Welling.
-"Auto-encoding variational bayes."
-https://arxiv.org/abs/1312.6114
-'''
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from keras.layers import Dense, Input
-from keras.layers import Conv2D, Flatten, Lambda
-from keras.layers import Reshape, Conv2DTranspose
-from keras.models import Model
-from keras.datasets import mnist
-from keras.losses import mse, binary_crossentropy
-from keras.utils import plot_model
-from keras import backend as K
-
-import numpy as np
-import matplotlib.pyplot as plt
-import argparse
-import os
-
-
-# reparameterization trick
-# instead of sampling from Q(z|X), sample eps = N(0,I)
-# then z = z_mean + sqrt(var)*eps
-def sampling(args):
-    """Reparameterization trick by sampling fr an isotropic unit Gaussian.
-
-    # Arguments
-        args (tensor): mean and log of variance of Q(z|X)
-
-    # Returns
-        z (tensor): sampled latent vector
-    """
-
-    z_mean, z_log_var = args
-    batch = K.shape(z_mean)[0]
-    dim = K.int_shape(z_mean)[1]
-    # by default, random_normal has mean=0 and std=1.0
-    epsilon = K.random_normal(shape=(batch, dim))
-    return z_mean + K.exp(0.5 * z_log_var) * epsilon
-
-
-def plot_results(models,
-                 data,
-                 batch_size=128,
-                 model_name="vae_mnist"):
-    """Plots labels and MNIST digits as function of 2-dim latent vector
-
-    # Arguments
-        models (tuple): encoder and decoder models
-        data (tuple): test data and label
-        batch_size (int): prediction batch size
-        model_name (string): which model is using this function
-    """
-
-    encoder, decoder = models
-    x_test, y_test = data
-    os.makedirs(model_name, exist_ok=True)
-
-    filename = os.path.join(model_name, "vae_mean.png")
-    # display a 2D plot of the digit classes in the latent space
-    z_mean, _, _ = encoder.predict(x_test,
-                                   batch_size=batch_size)
-    plt.figure(figsize=(12, 10))
-    plt.scatter(z_mean[:, 0], z_mean[:, 1], c=y_test)
-    plt.colorbar()
-    plt.xlabel("z[0]")
-    plt.ylabel("z[1]")
-    plt.savefig(filename)
-    plt.show()
-
-    filename = os.path.join(model_name, "digits_over_latent.png")
-    # display a 30x30 2D manifold of digits
-    n = 30
-    digit_size = 28
-    figure = np.zeros((digit_size * n, digit_size * n))
-    # linearly spaced coordinates corresponding to the 2D plot
-    # of digit classes in the latent space
-    grid_x = np.linspace(-4, 4, n)
-    grid_y = np.linspace(-4, 4, n)[::-1]
-
-    for i, yi in enumerate(grid_y):
-        for j, xi in enumerate(grid_x):
-            z_sample = np.array([[xi, yi]])
-            x_decoded = decoder.predict(z_sample)
-            digit = x_decoded[0].reshape(digit_size, digit_size)
-            figure[i * digit_size: (i + 1) * digit_size,
-                   j * digit_size: (j + 1) * digit_size] = digit
-
-    plt.figure(figsize=(10, 10))
-    start_range = digit_size // 2
-    end_range = n * digit_size + start_range + 1
-    pixel_range = np.arange(start_range, end_range, digit_size)
-    sample_range_x = np.round(grid_x, 1)
-    sample_range_y = np.round(grid_y, 1)
-    plt.xticks(pixel_range, sample_range_x)
-    plt.yticks(pixel_range, sample_range_y)
-    plt.xlabel("z[0]")
-    plt.ylabel("z[1]")
-    plt.imshow(figure, cmap='Greys_r')
-    plt.savefig(filename)
-    plt.show()
-
-
-# MNIST dataset
-(x_train, y_train), (x_test, y_test) = mnist.load_data()
-
-image_size = x_train.shape[1]
-x_train = np.reshape(x_train, [-1, image_size, image_size, 1])
-x_test = np.reshape(x_test, [-1, image_size, image_size, 1])
-x_train = x_train.astype('float32') / 255
-x_test = x_test.astype('float32') / 255
-
-# network parameters
-input_shape = (image_size, image_size, 1)
-batch_size = 128
-kernel_size = 3
-filters = 16
-latent_dim = 2
-epochs = 30
-
-# VAE model = encoder + decoder
-# build encoder model
-inputs = Input(shape=input_shape, name='encoder_input')
-x = inputs
-for i in range(2):
-    filters *= 2
-    x = Conv2D(filters=filters,
-               kernel_size=kernel_size,
-               activation='relu',
-               strides=2,
-               padding='same')(x)
-
-# shape info needed to build decoder model
-shape = K.int_shape(x)
-
-# generate latent vector Q(z|X)
-x = Flatten()(x)
-x = Dense(16, activation='relu')(x)
-z_mean = Dense(latent_dim, name='z_mean')(x)
-z_log_var = Dense(latent_dim, name='z_log_var')(x)
-
-# use reparameterization trick to push the sampling out as input
-# note that "output_shape" isn't necessary with the TensorFlow backend
-z = Lambda(sampling, output_shape=(latent_dim,), name='z')([z_mean, z_log_var])
-
-# instantiate encoder model
-encoder = Model(inputs, [z_mean, z_log_var, z], name='encoder')
-encoder.summary()
-plot_model(encoder, to_file='vae_cnn_encoder.png', show_shapes=True)
-
-# build decoder model
-latent_inputs = Input(shape=(latent_dim,), name='z_sampling')
-x = Dense(shape[1] * shape[2] * shape[3], activation='relu')(latent_inputs)
-x = Reshape((shape[1], shape[2], shape[3]))(x)
-
-for i in range(2):
-    x = Conv2DTranspose(filters=filters,
-                        kernel_size=kernel_size,
-                        activation='relu',
-                        strides=2,
-                        padding='same')(x)
-    filters //= 2
-
-outputs = Conv2DTranspose(filters=1,
-                          kernel_size=kernel_size,
-                          activation='sigmoid',
-                          padding='same',
-                          name='decoder_output')(x)
-
-# instantiate decoder model
-decoder = Model(latent_inputs, outputs, name='decoder')
-decoder.summary()
-plot_model(decoder, to_file='vae_cnn_decoder.png', show_shapes=True)
-
-# instantiate VAE model
-outputs = decoder(encoder(inputs)[2])
-vae = Model(inputs, outputs, name='vae')
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser()
-    help_ = "Load h5 model trained weights"
-    parser.add_argument("-w", "--weights", help=help_)
-    help_ = "Use mse loss instead of binary cross entropy (default)"
-    parser.add_argument("-m", "--mse", help=help_, action='store_true')
-    args = parser.parse_args()
-    models = (encoder, decoder)
-    data = (x_test, y_test)
-
-    # VAE loss = mse_loss or xent_loss + kl_loss
-    if args.mse:
-        reconstruction_loss = mse(K.flatten(inputs), K.flatten(outputs))
-    else:
-        reconstruction_loss = binary_crossentropy(K.flatten(inputs),
-                                                  K.flatten(outputs))
-
-    reconstruction_loss *= image_size * image_size
-    kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)
-    kl_loss = K.sum(kl_loss, axis=-1)
-    kl_loss *= -0.5
-    vae_loss = K.mean(reconstruction_loss + kl_loss)
-    vae.add_loss(vae_loss)
-    vae.compile(optimizer='rmsprop')
-    vae.summary()
-    plot_model(vae, to_file='vae_cnn.png', show_shapes=True)
-
-    if args.weights:
-        vae.load_weights(args.weights)
-    else:
-        # train the autoencoder
-        vae.fit(x_train,
-                epochs=epochs,
-                batch_size=batch_size,
-                validation_data=(x_test, None))
-        vae.save_weights('vae_cnn_mnist.h5')
-
-    plot_results(models, data, batch_size=batch_size, model_name="vae_cnn")
diff --git a/keras/__init__.py b/keras/__init__.py
deleted file mode 100644
index 7224ca6d54b..00000000000
--- a/keras/__init__.py
+++ /dev/null
@@ -1,32 +0,0 @@
-
-try:
-    from tensorflow.keras.layers.experimental.preprocessing import RandomRotation
-except ImportError:
-    raise ImportError(
-        'Keras requires TensorFlow 2.2 or higher. '
-        'Install TensorFlow via `pip install tensorflow`')
-
-from . import utils
-from . import activations
-from . import applications
-from . import backend
-from . import datasets
-from . import engine
-from . import layers
-from . import preprocessing
-from . import wrappers
-from . import callbacks
-from . import constraints
-from . import initializers
-from . import metrics
-from . import models
-from . import losses
-from . import optimizers
-from . import regularizers
-
-# Also importable from root
-from .layers import Input
-from .models import Model
-from .models import Sequential
-
-__version__ = '2.4.3'
diff --git a/keras/activations.py b/keras/activations.py
deleted file mode 100644
index dd85975f20e..00000000000
--- a/keras/activations.py
+++ /dev/null
@@ -1,17 +0,0 @@
-"""Built-in activation functions."""
-
-from tensorflow.keras.activations import softmax
-from tensorflow.keras.activations import elu
-from tensorflow.keras.activations import selu
-from tensorflow.keras.activations import softplus
-from tensorflow.keras.activations import softsign
-from tensorflow.keras.activations import relu
-from tensorflow.keras.activations import tanh
-from tensorflow.keras.activations import sigmoid
-from tensorflow.keras.activations import hard_sigmoid
-from tensorflow.keras.activations import exponential
-from tensorflow.keras.activations import linear
-
-from tensorflow.keras.activations import get
-from tensorflow.keras.activations import serialize
-from tensorflow.keras.activations import deserialize
diff --git a/keras/applications/__init__.py b/keras/applications/__init__.py
deleted file mode 100644
index 52cb345bfa8..00000000000
--- a/keras/applications/__init__.py
+++ /dev/null
@@ -1,12 +0,0 @@
-from .vgg16 import VGG16
-from .vgg19 import VGG19
-from .resnet50 import ResNet50
-from .inception_v3 import InceptionV3
-from .inception_resnet_v2 import InceptionResNetV2
-from .xception import Xception
-from .mobilenet import MobileNet
-from .mobilenet_v2 import MobileNetV2
-from .densenet import DenseNet121, DenseNet169, DenseNet201
-from .nasnet import NASNetMobile, NASNetLarge
-from .resnet import ResNet101, ResNet152
-from .resnet_v2 import ResNet50V2, ResNet101V2, ResNet152V2
diff --git a/keras/applications/densenet.py b/keras/applications/densenet.py
deleted file mode 100644
index 7d1963a8cd3..00000000000
--- a/keras/applications/densenet.py
+++ /dev/null
@@ -1,5 +0,0 @@
-from tensorflow.keras.applications.densenet import DenseNet121
-from tensorflow.keras.applications.densenet import DenseNet169
-from tensorflow.keras.applications.densenet import DenseNet201
-from tensorflow.keras.applications.densenet import decode_predictions
-from tensorflow.keras.applications.densenet import preprocess_input
diff --git a/keras/applications/imagenet_utils.py b/keras/applications/imagenet_utils.py
deleted file mode 100644
index 06550594887..00000000000
--- a/keras/applications/imagenet_utils.py
+++ /dev/null
@@ -1,3 +0,0 @@
-"""Utilities for ImageNet data preprocessing & prediction decoding."""
-from tensorflow.keras.applications.imagenet_utils import decode_predictions
-from tensorflow.keras.applications.imagenet_utils import preprocess_input
diff --git a/keras/applications/inception_resnet_v2.py b/keras/applications/inception_resnet_v2.py
deleted file mode 100644
index 5b1926d304e..00000000000
--- a/keras/applications/inception_resnet_v2.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from tensorflow.keras.applications.inception_resnet_v2 import InceptionResNetV2
-from tensorflow.keras.applications.inception_resnet_v2 import decode_predictions
-from tensorflow.keras.applications.inception_resnet_v2 import preprocess_input
diff --git a/keras/applications/inception_v3.py b/keras/applications/inception_v3.py
deleted file mode 100644
index 5fcc8f626b8..00000000000
--- a/keras/applications/inception_v3.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from tensorflow.keras.applications.inception_v3 import InceptionV3
-from tensorflow.keras.applications.inception_v3 import decode_predictions
-from tensorflow.keras.applications.inception_v3 import preprocess_input
diff --git a/keras/applications/mobilenet.py b/keras/applications/mobilenet.py
deleted file mode 100644
index c2d735d5df0..00000000000
--- a/keras/applications/mobilenet.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from tensorflow.keras.applications.mobilenet import MobileNet
-from tensorflow.keras.applications.mobilenet import decode_predictions
-from tensorflow.keras.applications.mobilenet import preprocess_input
diff --git a/keras/applications/mobilenet_v2.py b/keras/applications/mobilenet_v2.py
deleted file mode 100644
index ec2c55ce0f9..00000000000
--- a/keras/applications/mobilenet_v2.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2
-from tensorflow.keras.applications.mobilenet_v2 import decode_predictions
-from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
diff --git a/keras/applications/nasnet.py b/keras/applications/nasnet.py
deleted file mode 100644
index e8eaa15c038..00000000000
--- a/keras/applications/nasnet.py
+++ /dev/null
@@ -1,4 +0,0 @@
-from tensorflow.keras.applications.nasnet import NASNetMobile
-from tensorflow.keras.applications.nasnet import NASNetLarge
-from tensorflow.keras.applications.nasnet import decode_predictions
-from tensorflow.keras.applications.nasnet import preprocess_input
diff --git a/keras/applications/resnet.py b/keras/applications/resnet.py
deleted file mode 100644
index 0683109623b..00000000000
--- a/keras/applications/resnet.py
+++ /dev/null
@@ -1,5 +0,0 @@
-from tensorflow.keras.applications.resnet import ResNet50
-from tensorflow.keras.applications.resnet import ResNet101
-from tensorflow.keras.applications.resnet import ResNet152
-from tensorflow.keras.applications.resnet import decode_predictions
-from tensorflow.keras.applications.resnet import preprocess_input
diff --git a/keras/applications/resnet50.py b/keras/applications/resnet50.py
deleted file mode 100644
index f81378eeb58..00000000000
--- a/keras/applications/resnet50.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from tensorflow.keras.applications.resnet import ResNet50
-from tensorflow.keras.applications.resnet import decode_predictions
-from tensorflow.keras.applications.resnet import preprocess_input
diff --git a/keras/applications/resnet_v2.py b/keras/applications/resnet_v2.py
deleted file mode 100644
index 403d179e093..00000000000
--- a/keras/applications/resnet_v2.py
+++ /dev/null
@@ -1,5 +0,0 @@
-from tensorflow.keras.applications.resnet_v2 import ResNet50V2
-from tensorflow.keras.applications.resnet_v2 import ResNet101V2
-from tensorflow.keras.applications.resnet_v2 import ResNet152V2
-from tensorflow.keras.applications.resnet_v2 import decode_predictions
-from tensorflow.keras.applications.resnet_v2 import preprocess_input
diff --git a/keras/applications/vgg16.py b/keras/applications/vgg16.py
deleted file mode 100644
index c13dbf126e5..00000000000
--- a/keras/applications/vgg16.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from tensorflow.keras.applications.vgg16 import VGG16
-from tensorflow.keras.applications.vgg16 import decode_predictions
-from tensorflow.keras.applications.vgg16 import preprocess_input
diff --git a/keras/applications/vgg19.py b/keras/applications/vgg19.py
deleted file mode 100644
index 8bce12924e0..00000000000
--- a/keras/applications/vgg19.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from tensorflow.keras.applications.vgg19 import VGG19
-from tensorflow.keras.applications.vgg19 import decode_predictions
-from tensorflow.keras.applications.vgg19 import preprocess_input
diff --git a/keras/applications/xception.py b/keras/applications/xception.py
deleted file mode 100644
index a905f02545d..00000000000
--- a/keras/applications/xception.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from tensorflow.keras.applications.xception import Xception
-from tensorflow.keras.applications.xception import decode_predictions
-from tensorflow.keras.applications.xception import preprocess_input
diff --git a/keras/backend.py b/keras/backend.py
deleted file mode 100644
index 63d07e5fbe9..00000000000
--- a/keras/backend.py
+++ /dev/null
@@ -1 +0,0 @@
-from tensorflow.keras.backend import *
\ No newline at end of file
diff --git a/keras/callbacks.py b/keras/callbacks.py
deleted file mode 100644
index cdc6753f309..00000000000
--- a/keras/callbacks.py
+++ /dev/null
@@ -1 +0,0 @@
-from tensorflow.keras.callbacks import *
diff --git a/keras/constraints.py b/keras/constraints.py
deleted file mode 100644
index 819e4684154..00000000000
--- a/keras/constraints.py
+++ /dev/null
@@ -1,30 +0,0 @@
-"""Constraints: functions that impose constraints on weight values.
-"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow.keras.constraints import Constraint
-
-from tensorflow.keras.constraints import MaxNorm
-from tensorflow.keras.constraints import NonNeg
-from tensorflow.keras.constraints import UnitNorm
-from tensorflow.keras.constraints import MinMaxNorm
-
-from tensorflow.keras.constraints import get
-from tensorflow.keras.constraints import serialize
-from tensorflow.keras.constraints import deserialize
-
-# Aliases.
-
-max_norm = MaxNorm
-non_neg = NonNeg
-unit_norm = UnitNorm
-min_max_norm = MinMaxNorm
-
-
-# Legacy aliases.
-maxnorm = max_norm
-nonneg = non_neg
-unitnorm = unit_norm
-
diff --git a/keras/datasets/__init__.py b/keras/datasets/__init__.py
deleted file mode 100644
index fd480667585..00000000000
--- a/keras/datasets/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-from . import mnist
-from . import imdb
-from . import reuters
-from . import cifar10
-from . import cifar100
-from . import boston_housing
-from . import fashion_mnist
diff --git a/keras/datasets/boston_housing.py b/keras/datasets/boston_housing.py
deleted file mode 100644
index 96364001095..00000000000
--- a/keras/datasets/boston_housing.py
+++ /dev/null
@@ -1,3 +0,0 @@
-"""Boston housing price regression dataset."""
-
-from tensorflow.keras.datasets.boston_housing import load_data
diff --git a/keras/datasets/cifar10.py b/keras/datasets/cifar10.py
deleted file mode 100644
index c8f74462115..00000000000
--- a/keras/datasets/cifar10.py
+++ /dev/null
@@ -1,3 +0,0 @@
-"""CIFAR10 small images classification dataset."""
-
-from tensorflow.keras.datasets.cifar10 import load_data
diff --git a/keras/datasets/cifar100.py b/keras/datasets/cifar100.py
deleted file mode 100644
index 772a44b4023..00000000000
--- a/keras/datasets/cifar100.py
+++ /dev/null
@@ -1,3 +0,0 @@
-"""CIFAR100 small images classification dataset."""
-
-from tensorflow.keras.datasets.cifar100 import load_data
diff --git a/keras/datasets/fashion_mnist.py b/keras/datasets/fashion_mnist.py
deleted file mode 100644
index bc2cdd7497a..00000000000
--- a/keras/datasets/fashion_mnist.py
+++ /dev/null
@@ -1,3 +0,0 @@
-"""Fashion-MNIST dataset."""
-
-from tensorflow.keras.datasets.fashion_mnist import load_data
diff --git a/keras/datasets/imdb.py b/keras/datasets/imdb.py
deleted file mode 100644
index a499e3996c4..00000000000
--- a/keras/datasets/imdb.py
+++ /dev/null
@@ -1,4 +0,0 @@
-"""IMDB sentiment classification dataset."""
-
-from tensorflow.keras.datasets.imdb import load_data
-from tensorflow.keras.datasets.imdb import get_word_index
diff --git a/keras/datasets/mnist.py b/keras/datasets/mnist.py
deleted file mode 100644
index 34fc7640214..00000000000
--- a/keras/datasets/mnist.py
+++ /dev/null
@@ -1,3 +0,0 @@
-"""MNIST handwritten digits dataset."""
-
-from tensorflow.keras.datasets.mnist import load_data
diff --git a/keras/datasets/reuters.py b/keras/datasets/reuters.py
deleted file mode 100644
index 26cb138a22a..00000000000
--- a/keras/datasets/reuters.py
+++ /dev/null
@@ -1,4 +0,0 @@
-"""Reuters topic classification dataset."""
-
-from tensorflow.keras.datasets.reuters import load_data
-from tensorflow.keras.datasets.reuters import get_word_index
diff --git a/keras/engine/__init__.py b/keras/engine/__init__.py
deleted file mode 100644
index 2f954c9fd4b..00000000000
--- a/keras/engine/__init__.py
+++ /dev/null
@@ -1,8 +0,0 @@
-# note: `Node` is an internal class,
-# it isn't meant to be used by Keras users.
-from .input_layer import Input
-from .input_layer import InputLayer
-from .base_layer import InputSpec
-from .base_layer import Layer
-from .network import get_source_inputs
-from .training import Model
diff --git a/keras/engine/base_layer.py b/keras/engine/base_layer.py
deleted file mode 100644
index bb3faa044a8..00000000000
--- a/keras/engine/base_layer.py
+++ /dev/null
@@ -1,3 +0,0 @@
-"""Contains the base Layer class, from which all layers inherit."""
-from tensorflow.keras.layers import Layer
-from tensorflow.keras.layers import InputSpec
diff --git a/keras/engine/input_layer.py b/keras/engine/input_layer.py
deleted file mode 100644
index 0e7cd87deb8..00000000000
--- a/keras/engine/input_layer.py
+++ /dev/null
@@ -1,4 +0,0 @@
-"""Input layer code (`Input` and `InputLayer`)."""
-from tensorflow.keras.layers import InputLayer
-from tensorflow.keras import Input
-
diff --git a/keras/engine/network.py b/keras/engine/network.py
deleted file mode 100644
index ce3f1c4b43b..00000000000
--- a/keras/engine/network.py
+++ /dev/null
@@ -1,4 +0,0 @@
-"""A `Network` is way to compose layers: the topological form of a `Model`."""
-
-from tensorflow.keras import Model as Network
-from tensorflow.keras.utils import get_source_inputs
diff --git a/keras/engine/saving.py b/keras/engine/saving.py
deleted file mode 100644
index 11c873e702c..00000000000
--- a/keras/engine/saving.py
+++ /dev/null
@@ -1,6 +0,0 @@
-"""Model saving utilities."""
-from tensorflow.keras.models import save_model
-from tensorflow.keras.models import load_model
-from tensorflow.keras.models import model_from_config
-from tensorflow.keras.models import model_from_yaml
-from tensorflow.keras.models import model_from_json
diff --git a/keras/engine/sequential.py b/keras/engine/sequential.py
deleted file mode 100644
index fc53fc4e6fa..00000000000
--- a/keras/engine/sequential.py
+++ /dev/null
@@ -1,2 +0,0 @@
-"""Sequential model class."""
-from tensorflow.keras import Sequential
diff --git a/keras/engine/topology.py b/keras/engine/topology.py
deleted file mode 100644
index a3af9fd17a8..00000000000
--- a/keras/engine/topology.py
+++ /dev/null
@@ -1,5 +0,0 @@
-"""This module is deprecated, but kept around for backwards compatibility.
-"""
-from .base_layer import Layer, InputSpec
-from .input_layer import Input, InputLayer
-from .network import Network, get_source_inputs
diff --git a/keras/engine/training.py b/keras/engine/training.py
deleted file mode 100644
index 60727804e59..00000000000
--- a/keras/engine/training.py
+++ /dev/null
@@ -1,2 +0,0 @@
-"""Training-related part of the Keras engine."""
-from tensorflow.keras import Model
diff --git a/keras/initializers.py b/keras/initializers.py
deleted file mode 100644
index caa120d43be..00000000000
--- a/keras/initializers.py
+++ /dev/null
@@ -1,3 +0,0 @@
-"""Built-in weight initializers."""
-
-from tensorflow.keras.initializers import *
\ No newline at end of file
diff --git a/keras/layers/__init__.py b/keras/layers/__init__.py
deleted file mode 100644
index 8867c520caf..00000000000
--- a/keras/layers/__init__.py
+++ /dev/null
@@ -1,15 +0,0 @@
-from tensorflow.keras.layers import *
-
-from . import advanced_activations
-from . import convolutional
-from . import convolutional_recurrent
-from . import core
-from . import embeddings
-from . import experimental
-from . import local
-from . import merge
-from . import noise
-from . import normalization
-from . import pooling
-from . import recurrent
-from . import wrappers
diff --git a/keras/layers/advanced_activations.py b/keras/layers/advanced_activations.py
deleted file mode 100644
index 917d5d74f1b..00000000000
--- a/keras/layers/advanced_activations.py
+++ /dev/null
@@ -1,8 +0,0 @@
-"""Layers that act as activation functions."""
-
-from tensorflow.keras.layers import LeakyReLU
-from tensorflow.keras.layers import PReLU
-from tensorflow.keras.layers import ELU
-from tensorflow.keras.layers import ThresholdedReLU
-from tensorflow.keras.layers import Softmax
-from tensorflow.keras.layers import ReLU
diff --git a/keras/layers/convolutional.py b/keras/layers/convolutional.py
deleted file mode 100644
index 29570ff43ca..00000000000
--- a/keras/layers/convolutional.py
+++ /dev/null
@@ -1,38 +0,0 @@
-"""Convolutional layers."""
-from tensorflow.keras.layers import Conv1D
-from tensorflow.keras.layers import Conv2D
-from tensorflow.keras.layers import Conv3D
-from tensorflow.keras.layers import Conv2DTranspose
-from tensorflow.keras.layers import Conv3DTranspose
-from tensorflow.keras.layers import SeparableConv1D
-from tensorflow.keras.layers import SeparableConv2D
-from tensorflow.keras.layers import DepthwiseConv2D
-from tensorflow.keras.layers import UpSampling1D
-from tensorflow.keras.layers import UpSampling2D
-from tensorflow.keras.layers import UpSampling3D
-from tensorflow.keras.layers import ZeroPadding1D
-from tensorflow.keras.layers import ZeroPadding2D
-from tensorflow.keras.layers import ZeroPadding3D
-from tensorflow.keras.layers import Cropping1D
-from tensorflow.keras.layers import Cropping2D
-from tensorflow.keras.layers import Cropping3D
-
-# Aliases
-
-Convolution1D = Conv1D
-Convolution2D = Conv2D
-Convolution3D = Conv3D
-SeparableConvolution1D = SeparableConv1D
-SeparableConvolution2D = SeparableConv2D
-Convolution2DTranspose = Conv2DTranspose
-Deconvolution2D = Deconv2D = Conv2DTranspose
-Deconvolution3D = Deconv3D = Conv3DTranspose
-
-# imports for backwards namespace compatibility
-
-from .pooling import AveragePooling1D
-from .pooling import AveragePooling2D
-from .pooling import AveragePooling3D
-from .pooling import MaxPooling1D
-from .pooling import MaxPooling2D
-from .pooling import MaxPooling3D
diff --git a/keras/layers/convolutional_recurrent.py b/keras/layers/convolutional_recurrent.py
deleted file mode 100644
index b9e67e8c9f3..00000000000
--- a/keras/layers/convolutional_recurrent.py
+++ /dev/null
@@ -1,3 +0,0 @@
-"""Convolutional-recurrent layers."""
-
-from tensorflow.keras.layers import ConvLSTM2D
diff --git a/keras/layers/core.py b/keras/layers/core.py
deleted file mode 100644
index de9a0c0e283..00000000000
--- a/keras/layers/core.py
+++ /dev/null
@@ -1,17 +0,0 @@
-"""Core Keras layers."""
-
-from tensorflow.keras.layers import Dense
-from tensorflow.keras.layers import Activation
-from tensorflow.keras.layers import ActivityRegularization
-from tensorflow.keras.layers import Lambda
-from tensorflow.keras.layers import Masking
-
-from tensorflow.keras.layers import Dropout
-from tensorflow.keras.layers import SpatialDropout1D
-from tensorflow.keras.layers import SpatialDropout2D
-from tensorflow.keras.layers import SpatialDropout3D
-
-from tensorflow.keras.layers import Flatten
-from tensorflow.keras.layers import Permute
-from tensorflow.keras.layers import RepeatVector
-from tensorflow.keras.layers import Reshape
diff --git a/keras/layers/cudnn_recurrent.py b/keras/layers/cudnn_recurrent.py
deleted file mode 100644
index 0e0724a94a2..00000000000
--- a/keras/layers/cudnn_recurrent.py
+++ /dev/null
@@ -1,4 +0,0 @@
-"""Recurrent layers backed by cuDNN."""
-
-from tensorflow.keras.layers import GRU as CuDNNGRU
-from tensorflow.keras.layers import LSTM as CuDNNLSTM
diff --git a/keras/layers/embeddings.py b/keras/layers/embeddings.py
deleted file mode 100644
index fe12070439b..00000000000
--- a/keras/layers/embeddings.py
+++ /dev/null
@@ -1,3 +0,0 @@
-"""Embedding layer."""
-
-from tensorflow.keras.layers import Embedding
diff --git a/keras/layers/experimental/__init__.py b/keras/layers/experimental/__init__.py
deleted file mode 100644
index 87ecfbaa58f..00000000000
--- a/keras/layers/experimental/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from tensorflow.keras.layers.experimental import *
\ No newline at end of file
diff --git a/keras/layers/experimental/preprocessing/__init__.py b/keras/layers/experimental/preprocessing/__init__.py
deleted file mode 100644
index 312b5adb88e..00000000000
--- a/keras/layers/experimental/preprocessing/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from tensorflow.keras.layers.experimental.preprocessing import *
diff --git a/keras/layers/local.py b/keras/layers/local.py
deleted file mode 100644
index 90d1c80d21d..00000000000
--- a/keras/layers/local.py
+++ /dev/null
@@ -1,4 +0,0 @@
-"""Locally-connected layers."""
-
-from tensorflow.keras.layers import LocallyConnected1D
-from tensorflow.keras.layers import LocallyConnected2D
diff --git a/keras/layers/merge.py b/keras/layers/merge.py
deleted file mode 100644
index 02d8cc4f259..00000000000
--- a/keras/layers/merge.py
+++ /dev/null
@@ -1,18 +0,0 @@
-"""Layers that can merge several inputs into one."""
-
-from tensorflow.keras.layers import Add
-from tensorflow.keras.layers import Subtract
-from tensorflow.keras.layers import Multiply
-from tensorflow.keras.layers import Average
-from tensorflow.keras.layers import Maximum
-from tensorflow.keras.layers import Minimum
-from tensorflow.keras.layers import Concatenate
-from tensorflow.keras.layers import Dot
-from tensorflow.keras.layers import add
-from tensorflow.keras.layers import subtract
-from tensorflow.keras.layers import multiply
-from tensorflow.keras.layers import average
-from tensorflow.keras.layers import maximum
-from tensorflow.keras.layers import minimum
-from tensorflow.keras.layers import concatenate
-from tensorflow.keras.layers import dot
diff --git a/keras/layers/noise.py b/keras/layers/noise.py
deleted file mode 100644
index 47cef9e6f9e..00000000000
--- a/keras/layers/noise.py
+++ /dev/null
@@ -1,5 +0,0 @@
-"""Layers that operate regularization via the addition of noise."""
-
-from tensorflow.keras.layers import GaussianNoise
-from tensorflow.keras.layers import GaussianDropout
-from tensorflow.keras.layers import AlphaDropout
diff --git a/keras/layers/normalization.py b/keras/layers/normalization.py
deleted file mode 100644
index 683550e05f8..00000000000
--- a/keras/layers/normalization.py
+++ /dev/null
@@ -1,3 +0,0 @@
-"""Normalization layers."""
-
-from tensorflow.keras.layers import BatchNormalization
diff --git a/keras/layers/pooling.py b/keras/layers/pooling.py
deleted file mode 100644
index 7d357984f29..00000000000
--- a/keras/layers/pooling.py
+++ /dev/null
@@ -1,33 +0,0 @@
-"""Pooling layers."""
-
-from tensorflow.keras.layers import MaxPooling1D
-from tensorflow.keras.layers import MaxPooling2D
-from tensorflow.keras.layers import MaxPooling3D
-
-from tensorflow.keras.layers import AveragePooling1D
-from tensorflow.keras.layers import AveragePooling2D
-from tensorflow.keras.layers import AveragePooling3D
-
-from tensorflow.keras.layers import GlobalAveragePooling1D
-from tensorflow.keras.layers import GlobalAveragePooling2D
-from tensorflow.keras.layers import GlobalAveragePooling3D
-
-from tensorflow.keras.layers import GlobalMaxPooling1D
-from tensorflow.keras.layers import GlobalMaxPooling2D
-from tensorflow.keras.layers import GlobalMaxPooling3D
-
-
-# Aliases
-
-AvgPool1D = AveragePooling1D
-MaxPool1D = MaxPooling1D
-AvgPool2D = AveragePooling2D
-MaxPool2D = MaxPooling2D
-AvgPool3D = AveragePooling3D
-MaxPool3D = MaxPooling3D
-GlobalMaxPool1D = GlobalMaxPooling1D
-GlobalMaxPool2D = GlobalMaxPooling2D
-GlobalMaxPool3D = GlobalMaxPooling3D
-GlobalAvgPool1D = GlobalAveragePooling1D
-GlobalAvgPool2D = GlobalAveragePooling2D
-GlobalAvgPool3D = GlobalAveragePooling3D
diff --git a/keras/layers/recurrent.py b/keras/layers/recurrent.py
deleted file mode 100644
index b0382d03044..00000000000
--- a/keras/layers/recurrent.py
+++ /dev/null
@@ -1,12 +0,0 @@
-"""Recurrent layers and their base classes."""
-
-from tensorflow.keras.layers import RNN
-from tensorflow.keras.layers import StackedRNNCells
-
-from tensorflow.keras.layers import SimpleRNN
-from tensorflow.keras.layers import GRU
-from tensorflow.keras.layers import LSTM
-
-from tensorflow.keras.layers import SimpleRNNCell
-from tensorflow.keras.layers import GRUCell
-from tensorflow.keras.layers import LSTMCell
diff --git a/keras/layers/wrappers.py b/keras/layers/wrappers.py
deleted file mode 100644
index 88aa939cedd..00000000000
--- a/keras/layers/wrappers.py
+++ /dev/null
@@ -1,5 +0,0 @@
-"""Layers that augment the functionality of a base layer."""
-
-from tensorflow.keras.layers import Wrapper
-from tensorflow.keras.layers import TimeDistributed
-from tensorflow.keras.layers import Bidirectional
diff --git a/keras/losses.py b/keras/losses.py
deleted file mode 100644
index 14008e4dddd..00000000000
--- a/keras/losses.py
+++ /dev/null
@@ -1,2 +0,0 @@
-"""Built-in loss functions."""
-from tensorflow.keras.losses import *
diff --git a/keras/metrics.py b/keras/metrics.py
deleted file mode 100644
index e88c7b5f661..00000000000
--- a/keras/metrics.py
+++ /dev/null
@@ -1,2 +0,0 @@
-"""Built-in metrics."""
-from tensorflow.keras.metrics import *
diff --git a/keras/models.py b/keras/models.py
deleted file mode 100644
index 7649312d24d..00000000000
--- a/keras/models.py
+++ /dev/null
@@ -1,12 +0,0 @@
-"""Model-related utilities."""
-from .engine.input_layer import Input
-from .engine.input_layer import InputLayer
-from .engine.training import Model
-from .engine.sequential import Sequential
-from .engine.saving import save_model
-from .engine.saving import load_model
-from .engine.saving import model_from_config
-from .engine.saving import model_from_yaml
-from .engine.saving import model_from_json
-
-from tensorflow.keras.models import clone_model
diff --git a/keras/objectives.py b/keras/objectives.py
deleted file mode 100644
index b4074b4c6e9..00000000000
--- a/keras/objectives.py
+++ /dev/null
@@ -1,6 +0,0 @@
-"""Legacy objectives module.
-
-Only kept for backwards API compatibility.
-"""
-from __future__ import absolute_import
-from .losses import *
diff --git a/keras/optimizers/__init__.py b/keras/optimizers/__init__.py
deleted file mode 100644
index d6d8b4f66dc..00000000000
--- a/keras/optimizers/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from tensorflow.keras.optimizers import *
diff --git a/keras/optimizers/schedules/__init__.py b/keras/optimizers/schedules/__init__.py
deleted file mode 100644
index 2fffeecfd74..00000000000
--- a/keras/optimizers/schedules/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from tensorflow.keras.optimizers.schedules import *
diff --git a/keras/preprocessing/__init__.py b/keras/preprocessing/__init__.py
deleted file mode 100644
index 52a14b47d34..00000000000
--- a/keras/preprocessing/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from tensorflow.keras.preprocessing import *
diff --git a/keras/preprocessing/image.py b/keras/preprocessing/image.py
deleted file mode 100644
index 5c498648c43..00000000000
--- a/keras/preprocessing/image.py
+++ /dev/null
@@ -1,3 +0,0 @@
-"""Utilities for real-time data augmentation on image data."""
-
-from tensorflow.keras.preprocessing.image import *
diff --git a/keras/preprocessing/sequence.py b/keras/preprocessing/sequence.py
deleted file mode 100644
index cfc9c38ff45..00000000000
--- a/keras/preprocessing/sequence.py
+++ /dev/null
@@ -1,3 +0,0 @@
-"""Utilities for preprocessing sequence data."""
-
-from tensorflow.keras.preprocessing.sequence import *
diff --git a/keras/preprocessing/text.py b/keras/preprocessing/text.py
deleted file mode 100644
index ecbe913d8c5..00000000000
--- a/keras/preprocessing/text.py
+++ /dev/null
@@ -1,3 +0,0 @@
-"""Utilities for text input preprocessing."""
-
-from tensorflow.keras.preprocessing.text import *
\ No newline at end of file
diff --git a/keras/regularizers.py b/keras/regularizers.py
deleted file mode 100644
index 807da1b4584..00000000000
--- a/keras/regularizers.py
+++ /dev/null
@@ -1,2 +0,0 @@
-"""Built-in regularizers."""
-from tensorflow.keras.regularizers import *
diff --git a/keras/utils/__init__.py b/keras/utils/__init__.py
deleted file mode 100644
index 3937017ea40..00000000000
--- a/keras/utils/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from tensorflow.keras.utils import *
diff --git a/keras/utils/conv_utils.py b/keras/utils/conv_utils.py
deleted file mode 100644
index 07e328e3353..00000000000
--- a/keras/utils/conv_utils.py
+++ /dev/null
@@ -1,173 +0,0 @@
-"""Utilities used in convolutional layers."""
-
-import numpy as np
-
-
-def normalize_tuple(value, n, name):
-    """Transforms a single int or iterable of ints into an int tuple.
-
-    # Arguments
-        value: The value to validate and convert. Could be an int, or any iterable
-          of ints.
-        n: The size of the tuple to be returned.
-        name: The name of the argument being validated, e.g. `strides` or
-          `kernel_size`. This is only used to format error messages.
-
-    # Returns
-        A tuple of n integers.
-
-    # Raises
-        ValueError: If something else than an int/long or iterable thereof was
-        passed.
-    """
-    if isinstance(value, int):
-        return (value,) * n
-    else:
-        try:
-            value_tuple = tuple(value)
-        except TypeError:
-            raise ValueError('The `{}` argument must be a tuple of {} '
-                             'integers. Received: {}'.format(name, n, value))
-        if len(value_tuple) != n:
-            raise ValueError('The `{}` argument must be a tuple of {} '
-                             'integers. Received: {}'.format(name, n, value))
-        for single_value in value_tuple:
-            try:
-                int(single_value)
-            except ValueError:
-                raise ValueError('The `{}` argument must be a tuple of {} '
-                                 'integers. Received: {} including element {} '
-                                 'of type {}'.format(name, n, value, single_value,
-                                                     type(single_value)))
-    return value_tuple
-
-
-def normalize_padding(value):
-    padding = value.lower()
-    allowed = {'valid', 'same', 'causal'}
-    if padding not in allowed:
-        raise ValueError('The `padding` argument must be one of "valid", "same" '
-                         '(or "causal" for Conv1D). Received: {}'.format(padding))
-    return padding
-
-
-def convert_kernel(kernel):
-    """Converts a Numpy kernel matrix from Theano format to TensorFlow format.
-
-    Also works reciprocally, since the transformation is its own inverse.
-
-    # Arguments
-        kernel: Numpy array (3D, 4D or 5D).
-
-    # Returns
-        The converted kernel.
-
-    # Raises
-        ValueError: in case of invalid kernel shape or invalid data_format.
-    """
-    kernel = np.asarray(kernel)
-    if not 3 <= kernel.ndim <= 5:
-        raise ValueError('Invalid kernel shape:', kernel.shape)
-    slices = [slice(None, None, -1) for _ in range(kernel.ndim)]
-    no_flip = (slice(None, None), slice(None, None))
-    slices[-2:] = no_flip
-    return np.copy(kernel[tuple(slices)])
-
-
-def conv_output_length(input_length, filter_size,
-                       padding, stride, dilation=1):
-    """Determines output length of a convolution given input length.
-
-    # Arguments
-        input_length: integer.
-        filter_size: integer.
-        padding: one of `"same"`, `"valid"`, `"full"`.
-        stride: integer.
-        dilation: dilation rate, integer.
-
-    # Returns
-        The output length (integer).
-    """
-    if input_length is None:
-        return None
-    assert padding in {'same', 'valid', 'full', 'causal'}
-    dilated_filter_size = (filter_size - 1) * dilation + 1
-    if padding == 'same':
-        output_length = input_length
-    elif padding == 'valid':
-        output_length = input_length - dilated_filter_size + 1
-    elif padding == 'causal':
-        output_length = input_length
-    elif padding == 'full':
-        output_length = input_length + dilated_filter_size - 1
-    return (output_length + stride - 1) // stride
-
-
-def conv_input_length(output_length, filter_size, padding, stride):
-    """Determines input length of a convolution given output length.
-
-    # Arguments
-        output_length: integer.
-        filter_size: integer.
-        padding: one of `"same"`, `"valid"`, `"full"`.
-        stride: integer.
-
-    # Returns
-        The input length (integer).
-    """
-    if output_length is None:
-        return None
-    assert padding in {'same', 'valid', 'full'}
-    if padding == 'same':
-        pad = filter_size // 2
-    elif padding == 'valid':
-        pad = 0
-    elif padding == 'full':
-        pad = filter_size - 1
-    return (output_length - 1) * stride - 2 * pad + filter_size
-
-
-def deconv_length(dim_size, stride_size, kernel_size, padding,
-                  output_padding, dilation=1):
-    """Determines output length of a transposed convolution given input length.
-
-    # Arguments
-        dim_size: Integer, the input length.
-        stride_size: Integer, the stride along the dimension of `dim_size`.
-        kernel_size: Integer, the kernel size along the dimension of
-            `dim_size`.
-        padding: One of `"same"`, `"valid"`, `"full"`.
-        output_padding: Integer, amount of padding along the output dimension,
-            Can be set to `None` in which case the output length is inferred.
-        dilation: dilation rate, integer.
-
-    # Returns
-        The output length (integer).
-    """
-    assert padding in {'same', 'valid', 'full'}
-    if dim_size is None:
-        return None
-
-    # Get the dilated kernel size
-    kernel_size = (kernel_size - 1) * dilation + 1
-
-    # Infer length if output padding is None, else compute the exact length
-    if output_padding is None:
-        if padding == 'valid':
-            dim_size = dim_size * stride_size + max(kernel_size - stride_size, 0)
-        elif padding == 'full':
-            dim_size = dim_size * stride_size - (stride_size + kernel_size - 2)
-        elif padding == 'same':
-            dim_size = dim_size * stride_size
-    else:
-        if padding == 'same':
-            pad = kernel_size // 2
-        elif padding == 'valid':
-            pad = 0
-        elif padding == 'full':
-            pad = kernel_size - 1
-
-        dim_size = ((dim_size - 1) * stride_size + kernel_size - 2 * pad +
-                    output_padding)
-
-    return dim_size
diff --git a/keras/utils/data_utils.py b/keras/utils/data_utils.py
deleted file mode 100644
index 69ba87b8a15..00000000000
--- a/keras/utils/data_utils.py
+++ /dev/null
@@ -1,8 +0,0 @@
-"""Utilities for file download and caching."""
-
-from tensorflow.keras.utils import get_file
-
-from tensorflow.keras.utils import Sequence
-from tensorflow.keras.utils import SequenceEnqueuer
-from tensorflow.keras.utils import OrderedEnqueuer
-from tensorflow.keras.utils import GeneratorEnqueuer
diff --git a/keras/utils/generic_utils.py b/keras/utils/generic_utils.py
deleted file mode 100644
index 8bcac50d247..00000000000
--- a/keras/utils/generic_utils.py
+++ /dev/null
@@ -1,199 +0,0 @@
-"""Python utilities required by Keras."""
-
-import inspect
-import sys
-
-from tensorflow.keras.utils import CustomObjectScope
-from tensorflow.keras.utils import custom_object_scope
-from tensorflow.keras.utils import get_custom_objects
-from tensorflow.keras.utils import serialize_keras_object
-from tensorflow.keras.utils import deserialize_keras_object
-from tensorflow.keras.utils import Progbar
-
-
-def to_list(x, allow_tuple=False):
-    """Normalizes a list/tensor into a list.
-
-    If a tensor is passed, we return
-    a list of size 1 containing the tensor.
-
-    # Arguments
-        x: target object to be normalized.
-        allow_tuple: If False and x is a tuple,
-            it will be converted into a list
-            with a single element (the tuple).
-            Else converts the tuple to a list.
-
-    # Returns
-        A list.
-    """
-    if isinstance(x, list):
-        return x
-    if allow_tuple and isinstance(x, tuple):
-        return list(x)
-    return [x]
-
-
-def unpack_singleton(x):
-    """Gets the first element if the iterable has only one value.
-
-    Otherwise return the iterable.
-
-    # Argument
-        x: A list or tuple.
-
-    # Returns
-        The same iterable or the first element.
-    """
-    if len(x) == 1:
-        return x[0]
-    return x
-
-
-def object_list_uid(object_list):
-    object_list = to_list(object_list)
-    return ', '.join((str(abs(id(x))) for x in object_list))
-
-
-def is_all_none(iterable_or_element):
-    iterable = to_list(iterable_or_element, allow_tuple=True)
-    for element in iterable:
-        if element is not None:
-            return False
-    return True
-
-
-def slice_arrays(arrays, start=None, stop=None):
-    """Slices an array or list of arrays.
-
-    This takes an array-like, or a list of
-    array-likes, and outputs:
-        - arrays[start:stop] if `arrays` is an array-like
-        - [x[start:stop] for x in arrays] if `arrays` is a list
-
-    Can also work on list/array of indices: `_slice_arrays(x, indices)`
-
-    # Arguments
-        arrays: Single array or list of arrays.
-        start: can be an integer index (start index)
-            or a list/array of indices
-        stop: integer (stop index); should be None if
-            `start` was a list.
-
-    # Returns
-        A slice of the array(s).
-    """
-    if arrays is None:
-        return [None]
-    elif isinstance(arrays, list):
-        if hasattr(start, '__len__'):
-            # hdf5 datasets only support list objects as indices
-            if hasattr(start, 'shape'):
-                start = start.tolist()
-            return [None if x is None else x[start] for x in arrays]
-        else:
-            return [None if x is None else x[start:stop] for x in arrays]
-    else:
-        if hasattr(start, '__len__'):
-            if hasattr(start, 'shape'):
-                start = start.tolist()
-            return arrays[start]
-        elif hasattr(start, '__getitem__'):
-            return arrays[start:stop]
-        else:
-            return [None]
-
-
-def transpose_shape(shape, target_format, spatial_axes):
-    """Converts a tuple or a list to the correct `data_format`.
-
-    It does so by switching the positions of its elements.
-
-    # Arguments
-        shape: Tuple or list, often representing shape,
-            corresponding to `'channels_last'`.
-        target_format: A string, either `'channels_first'` or `'channels_last'`.
-        spatial_axes: A tuple of integers.
-            Correspond to the indexes of the spatial axes.
-            For example, if you pass a shape
-            representing (batch_size, timesteps, rows, cols, channels),
-            then `spatial_axes=(2, 3)`.
-
-    # Returns
-        A tuple or list, with the elements permuted according
-        to `target_format`.
-
-    # Example
-    ```python
-        >>> from keras.utils.generic_utils import transpose_shape
-        >>> transpose_shape((16, 128, 128, 32),'channels_first', spatial_axes=(1, 2))
-        (16, 32, 128, 128)
-        >>> transpose_shape((16, 128, 128, 32), 'channels_last', spatial_axes=(1, 2))
-        (16, 128, 128, 32)
-        >>> transpose_shape((128, 128, 32), 'channels_first', spatial_axes=(0, 1))
-        (32, 128, 128)
-    ```
-
-    # Raises
-        ValueError: if `value` or the global `data_format` invalid.
-    """
-    if target_format == 'channels_first':
-        new_values = shape[:spatial_axes[0]]
-        new_values += (shape[-1],)
-        new_values += tuple(shape[x] for x in spatial_axes)
-
-        if isinstance(shape, list):
-            return list(new_values)
-        return new_values
-    elif target_format == 'channels_last':
-        return shape
-    else:
-        raise ValueError('The `data_format` argument must be one of '
-                         '"channels_first", "channels_last". Received: ' +
-                         str(target_format))
-
-
-def check_for_unexpected_keys(name, input_dict, expected_values):
-    unknown = set(input_dict.keys()).difference(expected_values)
-    if unknown:
-        raise ValueError('Unknown entries in {} dictionary: {}. Only expected '
-                         'following keys: {}'.format(name, list(unknown),
-                                                     expected_values))
-
-
-def has_arg(fn, name, accept_all=False):
-    """Checks if a callable accepts a given keyword argument.
-    For Python 2, checks if there is an argument with the given name.
-    For Python 3, checks if there is an argument with the given name, and
-    also whether this argument can be called with a keyword (i.e. if it is
-    not a positional-only argument).
-    # Arguments
-        fn: Callable to inspect.
-        name: Check if `fn` can be called with `name` as a keyword argument.
-        accept_all: What to return if there is no parameter called `name`
-                    but the function accepts a `**kwargs` argument.
-    # Returns
-        bool, whether `fn` accepts a `name` keyword argument.
-    """
-    if sys.version_info < (3,):
-        arg_spec = inspect.getargspec(fn)
-        if accept_all and arg_spec.keywords is not None:
-            return True
-        return (name in arg_spec.args)
-    elif sys.version_info < (3, 3):
-        arg_spec = inspect.getfullargspec(fn)
-        if accept_all and arg_spec.varkw is not None:
-            return True
-        return (name in arg_spec.args or
-                name in arg_spec.kwonlyargs)
-    else:
-        signature = inspect.signature(fn)
-        parameter = signature.parameters.get(name)
-        if parameter is None:
-            if accept_all:
-                for param in signature.parameters.values():
-                    if param.kind == inspect.Parameter.VAR_KEYWORD:
-                        return True
-            return False
-        return (parameter.kind in (inspect.Parameter.POSITIONAL_OR_KEYWORD,
-                                   inspect.Parameter.KEYWORD_ONLY))
diff --git a/keras/utils/io_utils.py b/keras/utils/io_utils.py
deleted file mode 100644
index 8a991f0fd69..00000000000
--- a/keras/utils/io_utils.py
+++ /dev/null
@@ -1,3 +0,0 @@
-"""Utilities related to disk I/O."""
-
-from tensorflow.keras.utils import HDF5Matrix
diff --git a/keras/utils/layer_utils.py b/keras/utils/layer_utils.py
deleted file mode 100644
index 0738ae74ea8..00000000000
--- a/keras/utils/layer_utils.py
+++ /dev/null
@@ -1,91 +0,0 @@
-"""Utilities related to layer/model functionality.
-"""
-from .conv_utils import convert_kernel
-from .. import backend as K
-import numpy as np
-
-
-from tensorflow.keras.utils import get_source_inputs
-from tensorflow.python.keras.utils.layer_utils import print_summary
-
-
-def count_params(weights):
-    """Count the total number of scalars composing the weights.
-
-    # Arguments
-        weights: An iterable containing the weights on which to compute params
-
-    # Returns
-        The total number of scalars composing the weights
-    """
-    weight_ids = set()
-    total = 0
-    for w in weights:
-        if id(w) not in weight_ids:
-            weight_ids.add(id(w))
-            total += int(K.count_params(w))
-    return total
-
-
-def convert_all_kernels_in_model(model):
-    """Converts all convolution kernels in a model from Theano to TensorFlow.
-
-    Also works from TensorFlow to Theano.
-
-    # Arguments
-        model: target model for the conversion.
-    """
-    # Note: SeparableConvolution not included
-    # since only supported by TF.
-    conv_classes = {
-        'Conv1D',
-        'Conv2D',
-        'Conv3D',
-        'Conv2DTranspose',
-    }
-    to_assign = []
-    for layer in model.layers:
-        if layer.__class__.__name__ in conv_classes:
-            original_kernel = K.get_value(layer.kernel)
-            converted_kernel = convert_kernel(original_kernel)
-            to_assign.append((layer.kernel, converted_kernel))
-    K.batch_set_value(to_assign)
-
-
-def convert_dense_weights_data_format(dense,
-                                      previous_feature_map_shape,
-                                      target_data_format='channels_first'):
-    """Utility useful when changing a convnet's `data_format`.
-
-    When porting the weights of a convnet from one data format to the other,
-    if the convnet includes a `Flatten` layer
-    (applied to the last convolutional feature map)
-    followed by a `Dense` layer, the weights of that `Dense` layer
-    should be updated to reflect the new dimension ordering.
-
-    # Arguments
-        dense: The target `Dense` layer.
-        previous_feature_map_shape: A shape tuple of 3 integers,
-            e.g. `(512, 7, 7)`. The shape of the convolutional
-            feature map right before the `Flatten` layer that
-            came before the target `Dense` layer.
-        target_data_format: One of "channels_last", "channels_first".
-            Set it "channels_last"
-            if converting a "channels_first" model to "channels_last",
-            or reciprocally.
-    """
-    assert target_data_format in {'channels_last', 'channels_first'}
-    kernel, bias = dense.get_weights()
-    for i in range(kernel.shape[1]):
-        if target_data_format == 'channels_first':
-            c, h, w = previous_feature_map_shape
-            original_fm_shape = (h, w, c)
-            ki = kernel[:, i].reshape(original_fm_shape)
-            ki = np.transpose(ki, (2, 0, 1))  # last -> first
-        else:
-            h, w, c = previous_feature_map_shape
-            original_fm_shape = (c, h, w)
-            ki = kernel[:, i].reshape(original_fm_shape)
-            ki = np.transpose(ki, (1, 2, 0))  # first -> last
-        kernel[:, i] = np.reshape(ki, (np.prod(previous_feature_map_shape),))
-    dense.set_weights([kernel, bias])
diff --git a/keras/utils/multi_gpu_utils.py b/keras/utils/multi_gpu_utils.py
deleted file mode 100644
index a7aa1eb5432..00000000000
--- a/keras/utils/multi_gpu_utils.py
+++ /dev/null
@@ -1,3 +0,0 @@
-"""Multi-GPU training utilities."""
-
-from tensorflow.keras.utils import multi_gpu_model
diff --git a/keras/utils/np_utils.py b/keras/utils/np_utils.py
deleted file mode 100644
index 8389f80601d..00000000000
--- a/keras/utils/np_utils.py
+++ /dev/null
@@ -1,4 +0,0 @@
-"""Numpy-related utilities."""
-
-from tensorflow.keras.utils import to_categorical
-from tensorflow.keras.utils import normalize
diff --git a/keras/utils/test_utils.py b/keras/utils/test_utils.py
deleted file mode 100644
index 80120e395a7..00000000000
--- a/keras/utils/test_utils.py
+++ /dev/null
@@ -1,117 +0,0 @@
-"""Utilities related to Keras unit tests."""
-from __future__ import absolute_import
-
-import numpy as np
-from numpy.testing import assert_allclose
-
-from .generic_utils import has_arg
-from ..engine import Model, Input
-from .. import backend as K
-
-
-
-def get_test_data(num_train=1000, num_test=500, input_shape=(10,),
-                  output_shape=(2,),
-                  classification=True, num_classes=2):
-    """Generates test data to train a model on.
-
-    classification=True overrides output_shape
-    (i.e. output_shape is set to (1,)) and the output
-    consists in integers in [0, num_classes-1].
-
-    Otherwise: float output with shape output_shape.
-    """
-    samples = num_train + num_test
-    if classification:
-        y = np.random.randint(0, num_classes, size=(samples,))
-        X = np.zeros((samples,) + input_shape, dtype=np.float32)
-        for i in range(samples):
-            X[i] = np.random.normal(loc=y[i], scale=0.7, size=input_shape)
-    else:
-        y_loc = np.random.random((samples,))
-        X = np.zeros((samples,) + input_shape, dtype=np.float32)
-        y = np.zeros((samples,) + output_shape, dtype=np.float32)
-        for i in range(samples):
-            X[i] = np.random.normal(loc=y_loc[i], scale=0.7, size=input_shape)
-            y[i] = np.random.normal(loc=y_loc[i], scale=0.7, size=output_shape)
-
-    return (X[:num_train], y[:num_train]), (X[num_train:], y[num_train:])
-
-
-def layer_test(layer_cls, kwargs={}, input_shape=None, input_dtype=None,
-               input_data=None, expected_output=None,
-               expected_output_dtype=None, fixed_batch_size=False):
-    """Test routine for a layer with a single input tensor
-    and single output tensor.
-    """
-    # generate input data
-    if input_data is None:
-        assert input_shape
-        if not input_dtype:
-            input_dtype = K.floatx()
-        input_data_shape = list(input_shape)
-        for i, e in enumerate(input_data_shape):
-            if e is None:
-                input_data_shape[i] = np.random.randint(1, 4)
-        input_data = (10 * np.random.random(input_data_shape))
-        input_data = input_data.astype(input_dtype)
-    else:
-        if input_shape is None:
-            input_shape = input_data.shape
-        if input_dtype is None:
-            input_dtype = input_data.dtype
-    if expected_output_dtype is None:
-        expected_output_dtype = input_dtype
-
-    # instantiation
-    layer = layer_cls(**kwargs)
-
-    # test get_weights , set_weights at layer level
-    weights = layer.get_weights()
-    layer.set_weights(weights)
-
-    expected_output_shape = layer.compute_output_shape(input_shape)
-
-    # test in functional API
-    if fixed_batch_size:
-        x = Input(batch_shape=input_shape, dtype=input_dtype)
-    else:
-        x = Input(shape=input_shape[1:], dtype=input_dtype)
-    y = layer(x)
-    assert K.dtype(y) == expected_output_dtype
-
-    # check with the functional API
-    model = Model(x, y)
-
-    actual_output = model.predict(input_data)
-    actual_output_shape = actual_output.shape
-    for expected_dim, actual_dim in zip(expected_output_shape,
-                                        actual_output_shape):
-        if expected_dim is not None:
-            assert expected_dim == actual_dim
-
-    if expected_output is not None:
-        assert_allclose(actual_output, expected_output, rtol=1e-3)
-
-    # test serialization, weight setting at model level
-    model_config = model.get_config()
-    recovered_model = model.__class__.from_config(model_config)
-    if model.weights:
-        weights = model.get_weights()
-        recovered_model.set_weights(weights)
-        _output = recovered_model.predict(input_data)
-        assert_allclose(_output, actual_output, rtol=1e-3)
-
-    # test training mode (e.g. useful when the layer has a
-    # different behavior at training and testing time).
-    if has_arg(layer.call, 'training'):
-        model.compile('rmsprop', 'mse')
-        model.train_on_batch(input_data, actual_output)
-
-    # test instantiation from layer config
-    layer_config = layer.get_config()
-    layer_config['batch_input_shape'] = input_shape
-    layer = layer.__class__.from_config(layer_config)
-
-    # for further checks in the caller function
-    return actual_output
diff --git a/keras/utils/vis_utils.py b/keras/utils/vis_utils.py
deleted file mode 100644
index 21e056e2f0e..00000000000
--- a/keras/utils/vis_utils.py
+++ /dev/null
@@ -1,4 +0,0 @@
-"""Utilities related to model visualization."""
-
-from tensorflow.keras.utils import model_to_dot
-from tensorflow.keras.utils import plot_model
diff --git a/keras/wrappers/__init__.py b/keras/wrappers/__init__.py
deleted file mode 100644
index b82ada0d8e6..00000000000
--- a/keras/wrappers/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from . import scikit_learn
diff --git a/keras/wrappers/scikit_learn.py b/keras/wrappers/scikit_learn.py
deleted file mode 100644
index e839af2e2c6..00000000000
--- a/keras/wrappers/scikit_learn.py
+++ /dev/null
@@ -1,2 +0,0 @@
-"""Wrapper for using the Scikit-Learn API with Keras models."""
-from tensorflow.keras.wrappers.scikit_learn import *
diff --git a/setup.cfg b/setup.cfg
deleted file mode 100644
index 224a77957f5..00000000000
--- a/setup.cfg
+++ /dev/null
@@ -1,2 +0,0 @@
-[metadata]
-description-file = README.md
\ No newline at end of file
diff --git a/setup.py b/setup.py
deleted file mode 100644
index 8d54b20f7d5..00000000000
--- a/setup.py
+++ /dev/null
@@ -1,48 +0,0 @@
-from setuptools import setup
-from setuptools import find_packages
-
-long_description = '''
-Keras is a high-level neural networks API for Python.
-
-Read the documentation at: https://keras.io/
-
-Keras is compatible with Python 3.6+
-and is distributed under the MIT license.
-'''
-
-setup(name='Keras',
-      version='2.4.3',
-      description='Deep Learning for humans',
-      long_description=long_description,
-      author='Francois Chollet',
-      author_email='francois.chollet@gmail.com',
-      url='https://github.com/keras-team/keras',
-      download_url='https://github.com/keras-team/keras/tarball/2.4.3',
-      license='MIT',
-      install_requires=['numpy>=1.9.1',
-                        'scipy>=0.14',
-                        'pyyaml',
-                        'h5py'],
-      extras_require={
-          'visualize': ['pydot>=1.2.4'],
-          'tests': ['pytest',
-                    'pytest-pep8',
-                    'pytest-xdist',
-                    'flaky',
-                    'pytest-cov',
-                    'pandas',
-                    'requests',
-                    'markdown'],
-      },
-      classifiers=[
-          'Development Status :: 5 - Production/Stable',
-          'Intended Audience :: Developers',
-          'Intended Audience :: Education',
-          'Intended Audience :: Science/Research',
-          'License :: OSI Approved :: MIT License',
-          'Programming Language :: Python :: 3',
-          'Programming Language :: Python :: 3.6',
-          'Topic :: Software Development :: Libraries',
-          'Topic :: Software Development :: Libraries :: Python Modules'
-      ],
-      packages=find_packages())
diff --git a/tests/conftest.py b/tests/conftest.py
deleted file mode 100644
index e33674b8e88..00000000000
--- a/tests/conftest.py
+++ /dev/null
@@ -1,13 +0,0 @@
-import pytest
-from keras import backend as K
-
-
-@pytest.fixture(autouse=True)
-def clear_session_after_test():
-    """Test wrapper to clean up after TensorFlow and CNTK tests.
-
-    This wrapper runs for all the tests in the keras test suite.
-    """
-    yield
-    if K.backend() == 'tensorflow' or K.backend() == 'cntk':
-        K.clear_session()
diff --git a/tests/integration_tests/applications_test.py b/tests/integration_tests/applications_test.py
deleted file mode 100644
index 313d362c958..00000000000
--- a/tests/integration_tests/applications_test.py
+++ /dev/null
@@ -1,78 +0,0 @@
-import pytest
-import random
-import os
-from multiprocessing import Process, Queue
-from keras import applications
-from keras import backend as K
-
-
-MODEL_LIST = [
-    (applications.ResNet50, 2048),
-    (applications.ResNet101, 2048),
-    (applications.ResNet152, 2048),
-    (applications.ResNet50V2, 2048),
-    (applications.ResNet101V2, 2048),
-    (applications.ResNet152V2, 2048),
-    (applications.VGG16, 512),
-    (applications.VGG19, 512),
-    (applications.Xception, 2048),
-    (applications.InceptionV3, 2048),
-    (applications.InceptionResNetV2, 1536),
-    (applications.MobileNet, 1024),
-    (applications.MobileNetV2, 1280),
-    (applications.DenseNet121, 1024),
-    (applications.DenseNet169, 1664),
-    (applications.DenseNet201, 1920),
-    # Note that NASNetLarge is too heavy to test on Travis.
-    (applications.NASNetMobile, 1056)
-]
-
-
-def _get_output_shape(model_fn):
-    if K.backend() == 'cntk':
-        # Create model in a subprocess so that
-        # the memory consumed by InceptionResNetV2 will be
-        # released back to the system after this test
-        # (to deal with OOM error on CNTK backend).
-        # TODO: remove the use of multiprocessing from these tests
-        # once a memory clearing mechanism
-        # is implemented in the CNTK backend.
-        def target(queue):
-            model = model_fn()
-            queue.put(model.output_shape)
-        queue = Queue()
-        p = Process(target=target, args=(queue,))
-        p.start()
-        p.join()
-        # The error in a subprocess won't propagate
-        # to the main process, so we check if the model
-        # is successfully created by checking if the output shape
-        # has been put into the queue
-        assert not queue.empty(), 'Model creation failed.'
-        return queue.get_nowait()
-    else:
-        model = model_fn()
-        return model.output_shape
-
-
-def _test_application_basic(app, last_dim=1000):
-    output_shape = _get_output_shape(lambda: app(weights=None))
-    assert output_shape == (None, last_dim)
-
-
-def _test_application_notop(app, last_dim):
-    output_shape = _get_output_shape(
-        lambda: app(weights=None, include_top=False))
-    assert len(output_shape) == 4
-    assert output_shape[-1] == last_dim
-
-
-def test_applications():
-    for _ in range(3):
-        app, last_dim = random.choice(MODEL_LIST)
-        _test_application_basic(app)
-        _test_application_notop(app, last_dim)
-
-
-if __name__ == '__main__':
-    pytest.main([__file__])
diff --git a/tests/integration_tests/imagenet_utils_test.py b/tests/integration_tests/imagenet_utils_test.py
deleted file mode 100644
index 86140da4c95..00000000000
--- a/tests/integration_tests/imagenet_utils_test.py
+++ /dev/null
@@ -1,113 +0,0 @@
-import pytest
-import numpy as np
-from numpy.testing import assert_allclose
-
-from keras.applications import imagenet_utils as utils
-from keras.models import Model
-from keras.layers import Input, Lambda
-
-
-def test_preprocess_input():
-    # Test image batch with float and int image input
-    x = np.random.uniform(0, 255, (2, 10, 10, 3))
-    xint = x.astype('int32')
-    assert utils.preprocess_input(x).shape == x.shape
-    assert utils.preprocess_input(xint).shape == xint.shape
-
-    out1 = utils.preprocess_input(x, 'channels_last')
-    out1int = utils.preprocess_input(xint, 'channels_last')
-    out2 = utils.preprocess_input(np.transpose(x, (0, 3, 1, 2)),
-                                  'channels_first')
-    out2int = utils.preprocess_input(np.transpose(xint, (0, 3, 1, 2)),
-                                     'channels_first')
-    assert_allclose(out1, out2.transpose(0, 2, 3, 1))
-    assert_allclose(out1int, out2int.transpose(0, 2, 3, 1))
-
-    # Test single image
-    x = np.random.uniform(0, 255, (10, 10, 3))
-    xint = x.astype('int32')
-    assert utils.preprocess_input(x).shape == x.shape
-    assert utils.preprocess_input(xint).shape == xint.shape
-
-    out1 = utils.preprocess_input(x, 'channels_last')
-    out1int = utils.preprocess_input(xint, 'channels_last')
-    out2 = utils.preprocess_input(np.transpose(x, (2, 0, 1)),
-                                  'channels_first')
-    out2int = utils.preprocess_input(np.transpose(xint, (2, 0, 1)),
-                                     'channels_first')
-    assert_allclose(out1, out2.transpose(1, 2, 0))
-    assert_allclose(out1int, out2int.transpose(1, 2, 0))
-
-    # Test that writing over the input data works predictably
-    for mode in ['torch', 'tf']:
-        x = np.random.uniform(0, 255, (2, 10, 10, 3))
-        xint = x.astype('int')
-        x2 = utils.preprocess_input(x, mode=mode)
-        xint2 = utils.preprocess_input(xint)
-        assert_allclose(x, x2)
-        assert xint.astype('float').max() != xint2.max()
-    # Caffe mode works differently from the others
-    x = np.random.uniform(0, 255, (2, 10, 10, 3))
-    xint = x.astype('int')
-    x2 = utils.preprocess_input(x, data_format='channels_last', mode='caffe')
-    xint2 = utils.preprocess_input(xint)
-    assert_allclose(x, x2[..., ::-1])
-    assert xint.astype('float').max() != xint2.max()
-
-
-def test_preprocess_input_symbolic():
-    # Test image batch
-    x = np.random.uniform(0, 255, (2, 10, 10, 3))
-    inputs = Input(shape=x.shape[1:])
-    outputs = Lambda(utils.preprocess_input, output_shape=x.shape[1:])(inputs)
-    model = Model(inputs, outputs)
-    assert model.predict(x).shape == x.shape
-
-    outputs1 = Lambda(lambda x: utils.preprocess_input(x, 'channels_last'),
-                      output_shape=x.shape[1:])(inputs)
-    model1 = Model(inputs, outputs1)
-    out1 = model1.predict(x)
-    x2 = np.transpose(x, (0, 3, 1, 2))
-    inputs2 = Input(shape=x2.shape[1:])
-    outputs2 = Lambda(lambda x: utils.preprocess_input(x, 'channels_first'),
-                      output_shape=x2.shape[1:])(inputs2)
-    model2 = Model(inputs2, outputs2)
-    out2 = model2.predict(x2)
-    assert_allclose(out1, out2.transpose(0, 2, 3, 1))
-
-    # Test single image
-    x = np.random.uniform(0, 255, (10, 10, 3))
-    inputs = Input(shape=x.shape)
-    outputs = Lambda(utils.preprocess_input, output_shape=x.shape)(inputs)
-    model = Model(inputs, outputs)
-    assert model.predict(x[np.newaxis])[0].shape == x.shape
-
-    outputs1 = Lambda(lambda x: utils.preprocess_input(x, 'channels_last'),
-                      output_shape=x.shape)(inputs)
-    model1 = Model(inputs, outputs1)
-    out1 = model1.predict(x[np.newaxis])[0]
-    x2 = np.transpose(x, (2, 0, 1))
-    inputs2 = Input(shape=x2.shape)
-    outputs2 = Lambda(lambda x: utils.preprocess_input(x, 'channels_first'),
-                      output_shape=x2.shape)(inputs2)
-    model2 = Model(inputs2, outputs2)
-    out2 = model2.predict(x2[np.newaxis])[0]
-    assert_allclose(out1, out2.transpose(1, 2, 0))
-
-
-def DISABLED_test_decode_predictions():
-    # Disabled due to SSL issues on Travis.
-    x = np.zeros((2, 1000))
-    x[0, 372] = 1.0
-    x[1, 549] = 1.0
-    outs = utils.decode_predictions(x, top=1)
-    scores = [out[0][2] for out in outs]
-    assert scores[0] == scores[1]
-
-    # the numbers of columns and ImageNet classes are not identical.
-    with pytest.raises(ValueError):
-        utils.decode_predictions(np.ones((2, 100)))
-
-
-if __name__ == '__main__':
-    pytest.main([__file__])
diff --git a/tests/integration_tests/preprocessing/image_test.py b/tests/integration_tests/preprocessing/image_test.py
deleted file mode 100644
index 84ad097d766..00000000000
--- a/tests/integration_tests/preprocessing/image_test.py
+++ /dev/null
@@ -1,603 +0,0 @@
-import pytest
-from keras.preprocessing import image
-from PIL import Image
-import numpy as np
-import os
-import tempfile
-import shutil
-
-
-class TestImage(object):
-
-    def setup_class(cls):
-        cls.img_w = cls.img_h = 20
-        rgb_images = []
-        gray_images = []
-        for n in range(8):
-            bias = np.random.rand(cls.img_w, cls.img_h, 1) * 64
-            variance = np.random.rand(cls.img_w, cls.img_h, 1) * (255 - 64)
-            imarray = np.random.rand(cls.img_w, cls.img_h, 3) * variance + bias
-            im = Image.fromarray(imarray.astype('uint8')).convert('RGB')
-            rgb_images.append(im)
-
-            imarray = np.random.rand(cls.img_w, cls.img_h, 1) * variance + bias
-            im = Image.fromarray(imarray.astype('uint8').squeeze()).convert('L')
-            gray_images.append(im)
-
-        cls.all_test_images = [rgb_images, gray_images]
-
-    def teardown_class(cls):
-        del cls.all_test_images
-
-    def test_image_data_generator(self, tmpdir):
-        for test_images in self.all_test_images:
-            img_list = []
-            for im in test_images:
-                img_list.append(image.img_to_array(im)[None, ...])
-
-            images = np.vstack(img_list)
-            generator = image.ImageDataGenerator(
-                featurewise_center=True,
-                samplewise_center=True,
-                featurewise_std_normalization=True,
-                samplewise_std_normalization=True,
-                zca_whitening=True,
-                rotation_range=90.,
-                width_shift_range=0.1,
-                height_shift_range=0.1,
-                shear_range=0.5,
-                zoom_range=0.2,
-                channel_shift_range=0.,
-                brightness_range=(1, 5),
-                fill_mode='nearest',
-                cval=0.5,
-                horizontal_flip=True,
-                vertical_flip=True)
-            generator.fit(images, augment=True)
-
-            num_samples = images.shape[0]
-            for x, y in generator.flow(images, np.arange(num_samples),
-                                       shuffle=False, save_to_dir=str(tmpdir),
-                                       batch_size=3):
-                assert x.shape == images[:3].shape
-                assert list(y) == [0, 1, 2]
-                break
-
-            # Test with sample weights
-            for x, y, w in generator.flow(images, np.arange(num_samples),
-                                          shuffle=False,
-                                          sample_weight=np.arange(num_samples) + 1,
-                                          save_to_dir=str(tmpdir),
-                                          batch_size=3):
-                assert x.shape == images[:3].shape
-                assert list(y) == [0, 1, 2]
-                assert list(w) == [1, 2, 3]
-                break
-
-            # Test with `shuffle=True`
-            for x, y in generator.flow(images, np.arange(num_samples),
-                                       shuffle=True, save_to_dir=str(tmpdir),
-                                       batch_size=3):
-                assert x.shape == images[:3].shape
-                # Check that the sequence is shuffled.
-                assert list(y) != [0, 1, 2]
-                break
-
-            # Test without y
-            for x in generator.flow(images, None,
-                                    shuffle=True, save_to_dir=str(tmpdir),
-                                    batch_size=3):
-                assert type(x) is np.ndarray
-                assert x.shape == images[:3].shape
-                # Check that the sequence is shuffled.
-                break
-
-            # Test with a single miscellaneous input data array
-            dsize = images.shape[0]
-            x_misc1 = np.random.random(dsize)
-
-            for i, (x, y) in enumerate(generator.flow((images, x_misc1),
-                                                      np.arange(dsize),
-                                                      shuffle=False, batch_size=2)):
-                assert x[0].shape == images[:2].shape
-                assert (x[1] == x_misc1[(i * 2):((i + 1) * 2)]).all()
-                if i == 2:
-                    break
-
-            # Test with two miscellaneous inputs
-            x_misc2 = np.random.random((dsize, 3, 3))
-
-            for i, (x, y) in enumerate(generator.flow((images, [x_misc1, x_misc2]),
-                                                      np.arange(dsize),
-                                                      shuffle=False, batch_size=2)):
-                assert x[0].shape == images[:2].shape
-                assert (x[1] == x_misc1[(i * 2):((i + 1) * 2)]).all()
-                assert (x[2] == x_misc2[(i * 2):((i + 1) * 2)]).all()
-                if i == 2:
-                    break
-
-            # Test cases with `y = None`
-            x = generator.flow(images, None, batch_size=3).next()
-            assert type(x) is np.ndarray
-            assert x.shape == images[:3].shape
-            x = generator.flow((images, x_misc1), None,
-                               batch_size=3, shuffle=False).next()
-            assert type(x) is list
-            assert x[0].shape == images[:3].shape
-            assert (x[1] == x_misc1[:3]).all()
-            x = generator.flow((images, [x_misc1, x_misc2]), None,
-                               batch_size=3, shuffle=False).next()
-            assert type(x) is list
-            assert x[0].shape == images[:3].shape
-            assert (x[1] == x_misc1[:3]).all()
-            assert (x[2] == x_misc2[:3]).all()
-
-            # Test some failure cases:
-            x_misc_err = np.random.random((dsize + 1, 3, 3))
-
-            with pytest.raises(ValueError) as e_info:
-                generator.flow((images, x_misc_err), np.arange(dsize), batch_size=3)
-            assert 'All of the arrays in' in str(e_info.value)
-
-            with pytest.raises(ValueError) as e_info:
-                generator.flow((images, x_misc1),
-                               np.arange(dsize + 1),
-                               batch_size=3)
-            assert '`x` (images tensor) and `y` (labels) ' in str(e_info.value)
-
-            # Test `flow` behavior as Sequence
-            seq = generator.flow(images, np.arange(images.shape[0]),
-                                 shuffle=False, save_to_dir=str(tmpdir),
-                                 batch_size=3)
-            assert len(seq) == images.shape[0] // 3 + 1
-            x, y = seq[0]
-            assert x.shape == images[:3].shape
-            assert list(y) == [0, 1, 2]
-
-            # Test with `shuffle=True`
-            seq = generator.flow(images, np.arange(images.shape[0]),
-                                 shuffle=True, save_to_dir=str(tmpdir),
-                                 batch_size=3, seed=123)
-            x, y = seq[0]
-            # Check that the sequence is shuffled.
-            assert list(y) != [0, 1, 2]
-
-            # `on_epoch_end` should reshuffle the sequence.
-            seq.on_epoch_end()
-            x2, y2 = seq[0]
-            assert list(y) != list(y2)
-
-    def test_image_data_generator_with_split_value_error(self):
-        with pytest.raises(ValueError):
-            generator = image.ImageDataGenerator(validation_split=5)
-
-    def test_image_data_generator_invalid_data(self):
-        generator = image.ImageDataGenerator(
-            featurewise_center=True,
-            samplewise_center=True,
-            featurewise_std_normalization=True,
-            samplewise_std_normalization=True,
-            zca_whitening=True,
-            data_format='channels_last')
-        # Test fit with invalid data
-        with pytest.raises(ValueError):
-            x = np.random.random((3, 10, 10))
-            generator.fit(x)
-
-        # Test flow with invalid data
-        with pytest.raises(ValueError):
-            x = np.random.random((32, 10, 10))
-            generator.flow(np.arange(x.shape[0]))
-
-    def test_image_data_generator_fit(self):
-        generator = image.ImageDataGenerator(
-            featurewise_center=True,
-            samplewise_center=True,
-            featurewise_std_normalization=True,
-            samplewise_std_normalization=True,
-            zca_whitening=True,
-            zoom_range=(0.2, 0.2),
-            data_format='channels_last')
-        # Test grayscale
-        x = np.random.random((32, 10, 10, 1))
-        generator.fit(x)
-        # Test RBG
-        x = np.random.random((32, 10, 10, 3))
-        generator.fit(x)
-        # Test more samples than dims
-        x = np.random.random((32, 4, 4, 1))
-        generator.fit(x)
-        generator = image.ImageDataGenerator(
-            featurewise_center=True,
-            samplewise_center=True,
-            featurewise_std_normalization=True,
-            samplewise_std_normalization=True,
-            zca_whitening=True,
-            data_format='channels_first')
-        # Test grayscale
-        x = np.random.random((32, 1, 10, 10))
-        generator.fit(x)
-        # Test RBG
-        x = np.random.random((32, 3, 10, 10))
-        generator.fit(x)
-        # Test more samples than dims
-        x = np.random.random((32, 1, 4, 4))
-        generator.fit(x)
-
-    def test_directory_iterator(self, tmpdir):
-        num_classes = 2
-
-        # create folders and subfolders
-        paths = []
-        for cl in range(num_classes):
-            class_directory = 'class-{}'.format(cl)
-            classpaths = [
-                class_directory,
-                os.path.join(class_directory, 'subfolder-1'),
-                os.path.join(class_directory, 'subfolder-2'),
-                os.path.join(class_directory, 'subfolder-1', 'sub-subfolder')
-            ]
-            for path in classpaths:
-                tmpdir.join(path).mkdir()
-            paths.append(classpaths)
-
-        # save the images in the paths
-        count = 0
-        filenames = []
-        for test_images in self.all_test_images:
-            for im in test_images:
-                # rotate image class
-                im_class = count % num_classes
-                # rotate subfolders
-                classpaths = paths[im_class]
-                filename = os.path.join(classpaths[count % len(classpaths)],
-                                        'image-{}.jpg'.format(count))
-                filenames.append(filename)
-                im.save(str(tmpdir / filename))
-                count += 1
-
-        # create iterator
-        generator = image.ImageDataGenerator()
-        dir_iterator = generator.flow_from_directory(str(tmpdir))
-
-        # check number of classes and images
-        assert len(dir_iterator.class_indices) == num_classes
-        assert len(dir_iterator.classes) == count
-        assert set(dir_iterator.filenames) == set(filenames)
-
-        # Test invalid use cases
-        with pytest.raises(ValueError):
-            generator.flow_from_directory(str(tmpdir), color_mode='cmyk')
-        with pytest.raises(ValueError):
-            generator.flow_from_directory(str(tmpdir), class_mode='output')
-
-        def preprocessing_function(x):
-            """This will fail if not provided by a Numpy array.
-            Note: This is made to enforce backward compatibility.
-            """
-
-            assert x.shape == (26, 26, 3)
-            assert type(x) is np.ndarray
-
-            return np.zeros_like(x)
-
-        # Test usage as Sequence
-        generator = image.ImageDataGenerator(
-            preprocessing_function=preprocessing_function)
-        dir_seq = generator.flow_from_directory(str(tmpdir),
-                                                target_size=(26, 26),
-                                                color_mode='rgb',
-                                                batch_size=3,
-                                                class_mode='categorical')
-        assert len(dir_seq) == count // 3 + 1
-        x1, y1 = dir_seq[1]
-        assert x1.shape == (3, 26, 26, 3)
-        assert y1.shape == (3, num_classes)
-        x1, y1 = dir_seq[5]
-        assert (x1 == 0).all()
-
-        with pytest.raises(ValueError):
-            x1, y1 = dir_seq[9]
-
-    def test_directory_iterator_class_mode_input(self, tmpdir):
-        tmpdir.join('class-1').mkdir()
-
-        # save the images in the paths
-        count = 0
-        for test_images in self.all_test_images:
-            for im in test_images:
-                filename = str(tmpdir / 'class-1' / 'image-{}.jpg'.format(count))
-                im.save(filename)
-                count += 1
-
-        # create iterator
-        generator = image.ImageDataGenerator()
-        dir_iterator = generator.flow_from_directory(str(tmpdir),
-                                                     class_mode='input')
-        batch = next(dir_iterator)
-
-        # check if input and output have the same shape
-        assert(batch[0].shape == batch[1].shape)
-        # check if the input and output images are not the same numpy array
-        input_img = batch[0][0]
-        output_img = batch[1][0]
-        output_img[0][0][0] += 1
-        assert(input_img[0][0][0] != output_img[0][0][0])
-
-    @pytest.mark.parametrize('validation_split,num_training', [
-        (0.25, 12),
-        (0.40, 10),
-        (0.50, 8),
-    ])
-    def test_directory_iterator_with_validation_split(self, validation_split,
-                                                      num_training):
-        num_classes = 2
-        tmp_folder = tempfile.mkdtemp(prefix='test_images')
-
-        # create folders and subfolders
-        paths = []
-        for cl in range(num_classes):
-            class_directory = 'class-{}'.format(cl)
-            classpaths = [
-                class_directory,
-                os.path.join(class_directory, 'subfolder-1'),
-                os.path.join(class_directory, 'subfolder-2'),
-                os.path.join(class_directory, 'subfolder-1', 'sub-subfolder')
-            ]
-            for path in classpaths:
-                os.mkdir(os.path.join(tmp_folder, path))
-            paths.append(classpaths)
-
-        # save the images in the paths
-        count = 0
-        filenames = []
-        for test_images in self.all_test_images:
-            for im in test_images:
-                # rotate image class
-                im_class = count % num_classes
-                # rotate subfolders
-                classpaths = paths[im_class]
-                filename = os.path.join(classpaths[count % len(classpaths)],
-                                        'image-{}.jpg'.format(count))
-                filenames.append(filename)
-                im.save(os.path.join(tmp_folder, filename))
-                count += 1
-
-        # create iterator
-        generator = image.ImageDataGenerator(validation_split=validation_split)
-
-        with pytest.raises(ValueError):
-            generator.flow_from_directory(tmp_folder, subset='foo')
-
-        train_iterator = generator.flow_from_directory(tmp_folder,
-                                                       subset='training')
-        assert train_iterator.samples == num_training
-
-        valid_iterator = generator.flow_from_directory(tmp_folder,
-                                                       subset='validation')
-        assert valid_iterator.samples == count - num_training
-
-        # check number of classes and images
-        assert len(train_iterator.class_indices) == num_classes
-        assert len(train_iterator.classes) == num_training
-        assert len(set(train_iterator.filenames) & set(filenames)) == num_training
-
-        shutil.rmtree(tmp_folder)
-
-    def test_img_utils(self):
-        height, width = 10, 8
-
-        # Test th data format
-        x = np.random.random((3, height, width))
-        img = image.array_to_img(x, data_format='channels_first')
-        assert img.size == (width, height)
-        x = image.img_to_array(img, data_format='channels_first')
-        assert x.shape == (3, height, width)
-        # Test 2D
-        x = np.random.random((1, height, width))
-        img = image.array_to_img(x, data_format='channels_first')
-        assert img.size == (width, height)
-        x = image.img_to_array(img, data_format='channels_first')
-        assert x.shape == (1, height, width)
-
-        # Test tf data format
-        x = np.random.random((height, width, 3))
-        img = image.array_to_img(x, data_format='channels_last')
-        assert img.size == (width, height)
-        x = image.img_to_array(img, data_format='channels_last')
-        assert x.shape == (height, width, 3)
-        # Test 2D
-        x = np.random.random((height, width, 1))
-        img = image.array_to_img(x, data_format='channels_last')
-        assert img.size == (width, height)
-        x = image.img_to_array(img, data_format='channels_last')
-        assert x.shape == (height, width, 1)
-
-        # Test invalid use case
-        with pytest.raises(ValueError):
-            x = np.random.random((height, width))  # not 3D
-            img = image.array_to_img(x, data_format='channels_first')
-        with pytest.raises(ValueError):  # unknown data_format
-            x = np.random.random((height, width, 3))
-            img = image.array_to_img(x, data_format='channels')
-        with pytest.raises(ValueError):  # neither RGB nor gray-scale
-            x = np.random.random((height, width, 5))
-            img = image.array_to_img(x, data_format='channels_last')
-        with pytest.raises(ValueError):  # unknown data_format
-            x = np.random.random((height, width, 3))
-            img = image.img_to_array(x, data_format='channels')
-        with pytest.raises(ValueError):  # neither RGB nor gray-scale
-            x = np.random.random((height, width, 5, 3))
-            img = image.img_to_array(x, data_format='channels_last')
-
-    def test_random_transforms(self):
-        x = np.random.random((2, 28, 28))
-        assert image.random_rotation(x, 45).shape == (2, 28, 28)
-        assert image.random_shift(x, 1, 1).shape == (2, 28, 28)
-        assert image.random_shear(x, 20).shape == (2, 28, 28)
-        assert image.random_zoom(x, (5, 5)).shape == (2, 28, 28)
-        assert image.random_channel_shift(x, 20).shape == (2, 28, 28)
-
-        # Test get_random_transform with predefined seed
-        seed = 1
-        generator = image.ImageDataGenerator(
-            rotation_range=90.,
-            width_shift_range=0.1,
-            height_shift_range=0.1,
-            shear_range=0.5,
-            zoom_range=0.2,
-            channel_shift_range=0.1,
-            brightness_range=(1, 5),
-            horizontal_flip=True,
-            vertical_flip=True)
-        transform_dict = generator.get_random_transform(x.shape, seed)
-        transform_dict2 = generator.get_random_transform(x.shape, seed * 2)
-        assert transform_dict['theta'] != 0
-        assert transform_dict['theta'] != transform_dict2['theta']
-        assert transform_dict['tx'] != 0
-        assert transform_dict['tx'] != transform_dict2['tx']
-        assert transform_dict['ty'] != 0
-        assert transform_dict['ty'] != transform_dict2['ty']
-        assert transform_dict['shear'] != 0
-        assert transform_dict['shear'] != transform_dict2['shear']
-        assert transform_dict['zx'] != 0
-        assert transform_dict['zx'] != transform_dict2['zx']
-        assert transform_dict['zy'] != 0
-        assert transform_dict['zy'] != transform_dict2['zy']
-        assert transform_dict['channel_shift_intensity'] != 0
-        assert (transform_dict['channel_shift_intensity'] !=
-                transform_dict2['channel_shift_intensity'])
-        assert transform_dict['brightness'] != 0
-        assert transform_dict['brightness'] != transform_dict2['brightness']
-
-        # Test get_random_transform without any randomness
-        generator = image.ImageDataGenerator()
-        transform_dict = generator.get_random_transform(x.shape, seed)
-        assert transform_dict['theta'] == 0
-        assert transform_dict['tx'] == 0
-        assert transform_dict['ty'] == 0
-        assert transform_dict['shear'] == 0
-        assert transform_dict['zx'] == 1
-        assert transform_dict['zy'] == 1
-        assert transform_dict['channel_shift_intensity'] is None
-        assert transform_dict['brightness'] is None
-
-    def test_deterministic_transform(self):
-        x = np.ones((32, 32, 3))
-        generator = image.ImageDataGenerator(
-            rotation_range=90,
-            fill_mode='constant')
-        x = np.random.random((32, 32, 3))
-        assert np.allclose(generator.apply_transform(x, {'flip_vertical': True}),
-                           x[::-1, :, :])
-        assert np.allclose(generator.apply_transform(x, {'flip_horizontal': True}),
-                           x[:, ::-1, :])
-        x = np.ones((3, 3, 3))
-        x_rotated = np.array([[[0., 0., 0.],
-                               [0., 0., 0.],
-                               [1., 1., 1.]],
-                              [[0., 0., 0.],
-                               [1., 1., 1.],
-                               [1., 1., 1.]],
-                              [[0., 0., 0.],
-                               [0., 0., 0.],
-                               [1., 1., 1.]]])
-        assert np.allclose(generator.apply_transform(x, {'theta': 45}),
-                           x_rotated)
-        assert np.allclose(image.apply_affine_transform(
-            x, theta=45, channel_axis=2, fill_mode='constant'), x_rotated)
-
-    def test_batch_standardize(self):
-        # ImageDataGenerator.standardize should work on batches
-        for test_images in self.all_test_images:
-            img_list = []
-            for im in test_images:
-                img_list.append(image.img_to_array(im)[None, ...])
-
-            images = np.vstack(img_list)
-            generator = image.ImageDataGenerator(
-                featurewise_center=True,
-                samplewise_center=True,
-                featurewise_std_normalization=True,
-                samplewise_std_normalization=True,
-                zca_whitening=True,
-                rotation_range=90.,
-                width_shift_range=0.1,
-                height_shift_range=0.1,
-                shear_range=0.5,
-                zoom_range=0.2,
-                channel_shift_range=0.,
-                brightness_range=(1, 5),
-                fill_mode='nearest',
-                cval=0.5,
-                horizontal_flip=True,
-                vertical_flip=True)
-            generator.fit(images, augment=True)
-
-            transformed = np.copy(images)
-            for i, im in enumerate(transformed):
-                transformed[i] = generator.random_transform(im)
-            transformed = generator.standardize(transformed)
-
-    def test_load_img(self, tmpdir):
-        filename = str(tmpdir / 'image.png')
-
-        original_im_array = np.array(255 * np.random.rand(100, 100, 3),
-                                     dtype=np.uint8)
-        original_im = image.array_to_img(original_im_array, scale=False)
-        original_im.save(filename)
-
-        # Test that loaded image is exactly equal to original.
-
-        loaded_im = image.load_img(filename)
-        loaded_im_array = image.img_to_array(loaded_im)
-        assert loaded_im_array.shape == original_im_array.shape
-        assert np.all(loaded_im_array == original_im_array)
-
-        loaded_im = image.load_img(filename, grayscale=True)
-        loaded_im_array = image.img_to_array(loaded_im)
-        assert loaded_im_array.shape == (original_im_array.shape[0],
-                                         original_im_array.shape[1], 1)
-
-        # Test that nothing is changed when target size is equal to original.
-
-        loaded_im = image.load_img(filename, target_size=(100, 100))
-        loaded_im_array = image.img_to_array(loaded_im)
-        assert loaded_im_array.shape == original_im_array.shape
-        assert np.all(loaded_im_array == original_im_array)
-
-        loaded_im = image.load_img(filename, grayscale=True,
-                                   target_size=(100, 100))
-        loaded_im_array = image.img_to_array(loaded_im)
-        assert loaded_im_array.shape == (original_im_array.shape[0],
-                                         original_im_array.shape[1], 1)
-
-        # Test down-sampling with bilinear interpolation.
-
-        loaded_im = image.load_img(filename, target_size=(25, 25))
-        loaded_im_array = image.img_to_array(loaded_im)
-        assert loaded_im_array.shape == (25, 25, 3)
-
-        loaded_im = image.load_img(filename, grayscale=True,
-                                   target_size=(25, 25))
-        loaded_im_array = image.img_to_array(loaded_im)
-        assert loaded_im_array.shape == (25, 25, 1)
-
-        # Test down-sampling with nearest neighbor interpolation.
-
-        loaded_im_nearest = image.load_img(filename, target_size=(25, 25),
-                                           interpolation="nearest")
-        loaded_im_array_nearest = image.img_to_array(loaded_im_nearest)
-        assert loaded_im_array_nearest.shape == (25, 25, 3)
-        assert np.any(loaded_im_array_nearest != loaded_im_array)
-
-        # Check that exception is raised if interpolation not supported.
-
-        loaded_im = image.load_img(filename, interpolation="unsupported")
-        with pytest.raises(ValueError):
-            loaded_im = image.load_img(filename, target_size=(25, 25),
-                                       interpolation="unsupported")
-
-
-if __name__ == '__main__':
-    pytest.main([__file__])
diff --git a/tests/integration_tests/preprocessing/sequence_test.py b/tests/integration_tests/preprocessing/sequence_test.py
deleted file mode 100644
index 63b2a42842e..00000000000
--- a/tests/integration_tests/preprocessing/sequence_test.py
+++ /dev/null
@@ -1,229 +0,0 @@
-from math import ceil
-
-import numpy as np
-from numpy.testing import assert_allclose, assert_raises
-
-import pytest
-
-from keras.preprocessing.sequence import pad_sequences
-from keras.preprocessing.sequence import make_sampling_table
-from keras.preprocessing.sequence import skipgrams
-from keras.preprocessing.sequence import TimeseriesGenerator
-
-
-def test_pad_sequences():
-    a = [[1], [1, 2], [1, 2, 3]]
-
-    # test padding
-    b = pad_sequences(a, maxlen=3, padding='pre')
-    assert_allclose(b, [[0, 0, 1], [0, 1, 2], [1, 2, 3]])
-    b = pad_sequences(a, maxlen=3, padding='post')
-    assert_allclose(b, [[1, 0, 0], [1, 2, 0], [1, 2, 3]])
-
-    # test truncating
-    b = pad_sequences(a, maxlen=2, truncating='pre')
-    assert_allclose(b, [[0, 1], [1, 2], [2, 3]])
-    b = pad_sequences(a, maxlen=2, truncating='post')
-    assert_allclose(b, [[0, 1], [1, 2], [1, 2]])
-
-    # test value
-    b = pad_sequences(a, maxlen=3, value=1)
-    assert_allclose(b, [[1, 1, 1], [1, 1, 2], [1, 2, 3]])
-
-
-def test_pad_sequences_vector():
-    a = [[[1, 1]],
-         [[2, 1], [2, 2]],
-         [[3, 1], [3, 2], [3, 3]]]
-
-    # test padding
-    b = pad_sequences(a, maxlen=3, padding='pre')
-    assert_allclose(b, [[[0, 0], [0, 0], [1, 1]],
-                        [[0, 0], [2, 1], [2, 2]],
-                        [[3, 1], [3, 2], [3, 3]]])
-    b = pad_sequences(a, maxlen=3, padding='post')
-    assert_allclose(b, [[[1, 1], [0, 0], [0, 0]],
-                        [[2, 1], [2, 2], [0, 0]],
-                        [[3, 1], [3, 2], [3, 3]]])
-
-    # test truncating
-    b = pad_sequences(a, maxlen=2, truncating='pre')
-    assert_allclose(b, [[[0, 0], [1, 1]],
-                        [[2, 1], [2, 2]],
-                        [[3, 2], [3, 3]]])
-
-    b = pad_sequences(a, maxlen=2, truncating='post')
-    assert_allclose(b, [[[0, 0], [1, 1]],
-                        [[2, 1], [2, 2]],
-                        [[3, 1], [3, 2]]])
-
-    # test value
-    b = pad_sequences(a, maxlen=3, value=1)
-    assert_allclose(b, [[[1, 1], [1, 1], [1, 1]],
-                        [[1, 1], [2, 1], [2, 2]],
-                        [[3, 1], [3, 2], [3, 3]]])
-
-
-def test_make_sampling_table():
-    a = make_sampling_table(3)
-    assert_allclose(a, np.asarray([0.00315225, 0.00315225, 0.00547597]),
-                    rtol=.1)
-
-
-def test_skipgrams():
-    # test with no window size and binary labels
-    couples, labels = skipgrams(np.arange(3), vocabulary_size=3)
-    for couple in couples:
-        assert couple[0] in [0, 1, 2] and couple[1] in [0, 1, 2]
-
-    # test window size and categorical labels
-    couples, labels = skipgrams(np.arange(5), vocabulary_size=5, window_size=1,
-                                categorical=True)
-    for couple in couples:
-        assert couple[0] - couple[1] <= 3
-    for l in labels:
-        assert len(l) == 2
-
-
-def test_TimeseriesGenerator():
-    data = np.array([[i] for i in range(50)])
-    targets = np.array([[i] for i in range(50)])
-
-    data_gen = TimeseriesGenerator(data, targets,
-                                   length=10, sampling_rate=2,
-                                   batch_size=2)
-    assert len(data_gen) == 20
-    assert (np.allclose(data_gen[0][0],
-                        np.array([[[0], [2], [4], [6], [8]],
-                                  [[1], [3], [5], [7], [9]]])))
-    assert (np.allclose(data_gen[0][1],
-                        np.array([[10], [11]])))
-    assert (np.allclose(data_gen[1][0],
-                        np.array([[[2], [4], [6], [8], [10]],
-                                  [[3], [5], [7], [9], [11]]])))
-    assert (np.allclose(data_gen[1][1],
-                        np.array([[12], [13]])))
-
-    data_gen = TimeseriesGenerator(data, targets,
-                                   length=10, sampling_rate=2, reverse=True,
-                                   batch_size=2)
-    assert len(data_gen) == 20
-    assert (np.allclose(data_gen[0][0],
-                        np.array([[[8], [6], [4], [2], [0]],
-                                  [[9], [7], [5], [3], [1]]])))
-    assert (np.allclose(data_gen[0][1],
-                        np.array([[10], [11]])))
-
-    data_gen = TimeseriesGenerator(data, targets,
-                                   length=10, sampling_rate=2, shuffle=True,
-                                   batch_size=1)
-    batch = data_gen[0]
-    r = batch[1][0][0]
-    assert (np.allclose(batch[0],
-                        np.array([[[r - 10],
-                                   [r - 8],
-                                   [r - 6],
-                                   [r - 4],
-                                   [r - 2]]])))
-    assert (np.allclose(batch[1], np.array([[r], ])))
-
-    data_gen = TimeseriesGenerator(data, targets,
-                                   length=10, sampling_rate=2, stride=2,
-                                   batch_size=2)
-    assert len(data_gen) == 10
-    assert (np.allclose(data_gen[1][0],
-                        np.array([[[4], [6], [8], [10], [12]],
-                                  [[6], [8], [10], [12], [14]]])))
-    assert (np.allclose(data_gen[1][1],
-                        np.array([[14], [16]])))
-
-    data_gen = TimeseriesGenerator(data, targets,
-                                   length=10, sampling_rate=2,
-                                   start_index=10, end_index=30,
-                                   batch_size=2)
-    assert len(data_gen) == 6
-    assert (np.allclose(data_gen[0][0],
-                        np.array([[[10], [12], [14], [16], [18]],
-                                  [[11], [13], [15], [17], [19]]])))
-    assert (np.allclose(data_gen[0][1],
-                        np.array([[20], [21]])))
-
-    data = np.array([np.random.random_sample((1, 2, 3, 4)) for i in range(50)])
-    targets = np.array([np.random.random_sample((3, 2, 1)) for i in range(50)])
-    data_gen = TimeseriesGenerator(data, targets,
-                                   length=10, sampling_rate=2,
-                                   start_index=10, end_index=30,
-                                   batch_size=2)
-    assert len(data_gen) == 6
-    assert np.allclose(data_gen[0][0], np.array(
-        [np.array(data[10:19:2]), np.array(data[11:20:2])]))
-    assert (np.allclose(data_gen[0][1],
-                        np.array([targets[20], targets[21]])))
-
-    with assert_raises(ValueError) as context:
-        TimeseriesGenerator(data, targets, length=50)
-    error = str(context.exception)
-    assert '`start_index+length=50 > end_index=49` is disallowed' in error
-
-
-def test_TimeSeriesGenerator_doesnt_miss_any_sample():
-    x = np.array([[i] for i in range(10)])
-
-    for length in range(3, 10):
-        g = TimeseriesGenerator(x, x,
-                                length=length,
-                                batch_size=1)
-        expected = max(0, len(x) - length)
-        actual = len(g)
-
-        assert expected == actual
-
-        if len(g) > 0:
-            # All elements in range(length, 10) should be used as current step
-            expected = np.arange(length, 10).reshape(-1, 1)
-
-            y = np.concatenate([g[ix][1] for ix in range(len(g))], axis=0)
-            assert_allclose(y, expected)
-
-    x = np.array([[i] for i in range(23)])
-
-    strides = (1, 1, 5, 7, 3, 5, 3)
-    lengths = (3, 3, 4, 3, 1, 3, 7)
-    batch_sizes = (6, 6, 6, 5, 6, 6, 6)
-    shuffles = (False, True, True, False, False, False, False)
-
-    for stride, length, batch_size, shuffle in zip(strides,
-                                                   lengths,
-                                                   batch_sizes,
-                                                   shuffles):
-        g = TimeseriesGenerator(x, x,
-                                length=length,
-                                sampling_rate=1,
-                                stride=stride,
-                                start_index=0,
-                                end_index=None,
-                                shuffle=shuffle,
-                                reverse=False,
-                                batch_size=batch_size)
-        if shuffle:
-            # all batches have the same size when shuffle is True.
-            expected_sequences = ceil(
-                (23 - length) / float(batch_size * stride)) * batch_size
-        else:
-            # last batch will be different if `(samples - length) / stride`
-            # is not a multiple of `batch_size`.
-            expected_sequences = ceil((23 - length) / float(stride))
-
-        expected_batches = ceil(expected_sequences / float(batch_size))
-
-        y = [g[ix][1] for ix in range(len(g))]
-
-        actual_sequences = sum(len(_y) for _y in y)
-        actual_batches = len(y)
-
-        assert expected_sequences == actual_sequences
-        assert expected_batches == actual_batches
-
-
-if __name__ == '__main__':
-    pytest.main([__file__])
diff --git a/tests/integration_tests/preprocessing/text_test.py b/tests/integration_tests/preprocessing/text_test.py
deleted file mode 100644
index ac55e5bd310..00000000000
--- a/tests/integration_tests/preprocessing/text_test.py
+++ /dev/null
@@ -1,126 +0,0 @@
-# -*- coding: utf-8 -*-
-
-import numpy as np
-import pytest
-
-from keras.preprocessing.text import Tokenizer
-from keras.preprocessing.text import one_hot
-from keras.preprocessing.text import hashing_trick
-from keras.preprocessing.text import text_to_word_sequence
-
-
-def test_one_hot():
-    text = 'The cat sat on the mat.'
-    encoded = one_hot(text, 5)
-    assert len(encoded) == 6
-    assert np.max(encoded) <= 4
-    assert np.min(encoded) >= 0
-
-
-def test_hashing_trick_hash():
-    text = 'The cat sat on the mat.'
-    encoded = hashing_trick(text, 5)
-    assert len(encoded) == 6
-    assert np.max(encoded) <= 4
-    assert np.min(encoded) >= 1
-
-
-def test_hashing_trick_md5():
-    text = 'The cat sat on the mat.'
-    encoded = hashing_trick(text, 5, hash_function='md5')
-    assert len(encoded) == 6
-    assert np.max(encoded) <= 4
-    assert np.min(encoded) >= 1
-
-
-def test_tokenizer():
-    texts = ['The cat sat on the mat.',
-             'The dog sat on the log.',
-             'Dogs and cats living together.']
-    tokenizer = Tokenizer(num_words=10)
-    tokenizer.fit_on_texts(texts)
-
-    sequences = []
-    for seq in tokenizer.texts_to_sequences_generator(texts):
-        sequences.append(seq)
-    assert np.max(np.max(sequences)) < 10
-    assert np.min(np.min(sequences)) == 1
-
-    tokenizer.fit_on_sequences(sequences)
-
-    for mode in ['binary', 'count', 'tfidf', 'freq']:
-        matrix = tokenizer.texts_to_matrix(texts, mode)
-
-
-def test_sequential_fit():
-    texts = ['The cat sat on the mat.',
-             'The dog sat on the log.',
-             'Dogs and cats living together.']
-    word_sequences = [
-        ['The', 'cat', 'is', 'sitting'],
-        ['The', 'dog', 'is', 'standing']
-    ]
-
-    tokenizer = Tokenizer()
-    tokenizer.fit_on_texts(texts)
-    tokenizer.fit_on_texts(word_sequences)
-
-    assert tokenizer.document_count == 5
-
-    tokenizer.texts_to_matrix(texts)
-    tokenizer.texts_to_matrix(word_sequences)
-
-
-def test_text_to_word_sequence():
-    text = 'hello! ? world!'
-    assert text_to_word_sequence(text) == ['hello', 'world']
-
-
-def test_text_to_word_sequence_multichar_split():
-    text = 'hello!stop?world!'
-    assert text_to_word_sequence(text, split='stop') == ['hello', 'world']
-
-
-def test_text_to_word_sequence_unicode():
-    text = u'ali! veli? kırk dokuz elli'
-    assert (text_to_word_sequence(text) ==
-            [u'ali', u'veli', u'kırk', u'dokuz', u'elli'])
-
-
-def test_text_to_word_sequence_unicode_multichar_split():
-    text = u'ali!stopveli?stopkırkstopdokuzstopelli'
-    assert (text_to_word_sequence(text, split='stop') ==
-            [u'ali', u'veli', u'kırk', u'dokuz', u'elli'])
-
-
-def test_tokenizer_unicode():
-    texts = [u'ali veli kırk dokuz elli',
-             u'ali veli kırk dokuz elli veli kırk dokuz']
-    tokenizer = Tokenizer(num_words=5)
-    tokenizer.fit_on_texts(texts)
-
-    assert len(tokenizer.word_counts) == 5
-
-
-def test_tokenizer_oov_flag():
-    """
-    Test of Out of Vocabulary (OOV) flag in Tokenizer
-    """
-    x_train = ['This text has only known words']
-    x_test = ['This text has some unknown words']  # 2 OOVs: some, unknown
-
-    # Default, without OOV flag
-    tokenizer = Tokenizer()
-    tokenizer.fit_on_texts(x_train)
-    x_test_seq = tokenizer.texts_to_sequences(x_test)
-    assert len(x_test_seq[0]) == 4  # discards 2 OOVs
-
-    # With OOV feature
-    tokenizer = Tokenizer(oov_token='<unk>')
-    tokenizer.fit_on_texts(x_train)
-    x_test_seq = tokenizer.texts_to_sequences(x_test)
-    assert len(x_test_seq[0]) == 6  # OOVs marked in place
-
-
-if __name__ == '__main__':
-    pytest.main([__file__])
diff --git a/tests/integration_tests/test_datasets.py b/tests/integration_tests/test_datasets.py
deleted file mode 100644
index 6c4cfbd1e5c..00000000000
--- a/tests/integration_tests/test_datasets.py
+++ /dev/null
@@ -1,90 +0,0 @@
-from __future__ import print_function
-import pytest
-import time
-import random
-from keras.datasets import cifar10
-from keras.datasets import cifar100
-from keras.datasets import reuters
-from keras.datasets import imdb
-from keras.datasets import mnist
-from keras.datasets import boston_housing
-from keras.datasets import fashion_mnist
-
-
-def test_cifar():
-    # only run data download tests 20% of the time
-    # to speed up frequent testing
-    random.seed(time.time())
-    if random.random() > 0.8:
-        (x_train, y_train), (x_test, y_test) = cifar10.load_data()
-        assert len(x_train) == len(y_train) == 50000
-        assert len(x_test) == len(y_test) == 10000
-        (x_train, y_train), (x_test, y_test) = cifar100.load_data('fine')
-        assert len(x_train) == len(y_train) == 50000
-        assert len(x_test) == len(y_test) == 10000
-        (x_train, y_train), (x_test, y_test) = cifar100.load_data('coarse')
-        assert len(x_train) == len(y_train) == 50000
-        assert len(x_test) == len(y_test) == 10000
-
-
-def test_reuters():
-    # only run data download tests 20% of the time
-    # to speed up frequent testing
-    random.seed(time.time())
-    if random.random() > 0.8:
-        (x_train, y_train), (x_test, y_test) = reuters.load_data()
-        assert len(x_train) == len(y_train)
-        assert len(x_test) == len(y_test)
-        assert len(x_train) + len(x_test) == 11228
-        (x_train, y_train), (x_test, y_test) = reuters.load_data(maxlen=10)
-        assert len(x_train) == len(y_train)
-        assert len(x_test) == len(y_test)
-        word_index = reuters.get_word_index()
-        assert isinstance(word_index, dict)
-
-
-def test_mnist():
-    # only run data download tests 20% of the time
-    # to speed up frequent testing
-    random.seed(time.time())
-    if random.random() > 0.8:
-        (x_train, y_train), (x_test, y_test) = mnist.load_data()
-        assert len(x_train) == len(y_train) == 60000
-        assert len(x_test) == len(y_test) == 10000
-
-
-def test_imdb():
-    # only run data download tests 20% of the time
-    # to speed up frequent testing
-    random.seed(time.time())
-    if random.random() > 0.8:
-        (x_train, y_train), (x_test, y_test) = imdb.load_data()
-        (x_train, y_train), (x_test, y_test) = imdb.load_data(maxlen=40)
-        assert len(x_train) == len(y_train)
-        assert len(x_test) == len(y_test)
-        word_index = imdb.get_word_index()
-        assert isinstance(word_index, dict)
-
-
-def test_boston_housing():
-    # only run data download tests 20% of the time
-    # to speed up frequent testing
-    random.seed(time.time())
-    if random.random() > 0.8:
-        (x_train, y_train), (x_test, y_test) = boston_housing.load_data()
-        assert len(x_train) == len(y_train)
-        assert len(x_test) == len(y_test)
-
-
-def test_fashion_mnist():
-    # only run data download tests 20% of the time
-    # to speed up frequent testing
-    random.seed(time.time())
-    if random.random() > 0.8:
-        (x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()
-        assert len(x_train) == len(y_train) == 60000
-        assert len(x_test) == len(y_test) == 10000
-
-
-if __name__ == '__main__':
-    pytest.main([__file__])
diff --git a/tests/integration_tests/test_image_data_tasks.py b/tests/integration_tests/test_image_data_tasks.py
deleted file mode 100644
index 5a787b24382..00000000000
--- a/tests/integration_tests/test_image_data_tasks.py
+++ /dev/null
@@ -1,80 +0,0 @@
-from __future__ import print_function
-import numpy as np
-import pytest
-
-from keras.preprocessing.image import ImageDataGenerator
-from keras.utils.test_utils import get_test_data
-from keras.models import Sequential
-from keras import layers
-from keras.utils.np_utils import to_categorical
-
-
-def test_image_classification():
-    np.random.seed(1337)
-    input_shape = (16, 16, 3)
-    (x_train, y_train), (x_test, y_test) = get_test_data(num_train=500,
-                                                         num_test=200,
-                                                         input_shape=input_shape,
-                                                         classification=True,
-                                                         num_classes=4)
-    y_train = to_categorical(y_train)
-    y_test = to_categorical(y_test)
-
-    model = Sequential([
-        layers.Conv2D(filters=8, kernel_size=3,
-                      activation='relu',
-                      input_shape=input_shape),
-        layers.MaxPooling2D(pool_size=2),
-        layers.Conv2D(filters=4, kernel_size=(3, 3),
-                      activation='relu', padding='same'),
-        layers.GlobalAveragePooling2D(),
-        layers.Dense(y_test.shape[-1], activation='softmax')
-    ])
-    model.compile(loss='categorical_crossentropy',
-                  optimizer='rmsprop',
-                  metrics=['accuracy'])
-    model.summary()
-    history = model.fit(x_train, y_train, epochs=15, batch_size=16,
-                        validation_data=(x_test, y_test),
-                        verbose=0)
-    assert history.history['val_accuracy'][-1] > 0.6
-    config = model.get_config()
-    model = Sequential.from_config(config)
-
-
-def test_image_data_generator_training():
-    np.random.seed(1337)
-    img_gen = ImageDataGenerator(rescale=1.)  # Dummy ImageDataGenerator
-    input_shape = (16, 16, 3)
-    (x_train, y_train), (x_test, y_test) = get_test_data(num_train=500,
-                                                         num_test=200,
-                                                         input_shape=input_shape,
-                                                         classification=True,
-                                                         num_classes=4)
-    y_train = to_categorical(y_train)
-    y_test = to_categorical(y_test)
-
-    model = Sequential([
-        layers.Conv2D(filters=8, kernel_size=3,
-                      activation='relu',
-                      input_shape=input_shape),
-        layers.MaxPooling2D(pool_size=2),
-        layers.Conv2D(filters=4, kernel_size=(3, 3),
-                      activation='relu', padding='same'),
-        layers.GlobalAveragePooling2D(),
-        layers.Dense(y_test.shape[-1], activation='softmax')
-    ])
-    model.compile(loss='categorical_crossentropy',
-                  optimizer='rmsprop',
-                  metrics=['accuracy'])
-    history = model.fit_generator(img_gen.flow(x_train, y_train, batch_size=16),
-                                  epochs=15,
-                                  validation_data=img_gen.flow(x_test, y_test,
-                                                               batch_size=16),
-                                  verbose=0)
-    assert history.history['val_accuracy'][-1] > 0.6
-    model.evaluate_generator(img_gen.flow(x_train, y_train, batch_size=16))
-
-
-if __name__ == '__main__':
-    pytest.main([__file__])
diff --git a/tests/integration_tests/test_temporal_data_tasks.py b/tests/integration_tests/test_temporal_data_tasks.py
deleted file mode 100644
index 4d08569ed6a..00000000000
--- a/tests/integration_tests/test_temporal_data_tasks.py
+++ /dev/null
@@ -1,180 +0,0 @@
-from __future__ import print_function
-import numpy as np
-import pytest
-import string
-
-from keras.utils.test_utils import get_test_data
-from keras.utils.np_utils import to_categorical
-from keras.models import Sequential
-from keras import layers, optimizers
-import keras.backend as K
-import keras
-
-
-def test_temporal_classification():
-    '''
-    Classify temporal sequences of float numbers
-    of length 3 into 2 classes using
-    single layer of GRU units and softmax applied
-    to the last activations of the units
-    '''
-    np.random.seed(1337)
-    (x_train, y_train), (x_test, y_test) = get_test_data(num_train=200,
-                                                         num_test=20,
-                                                         input_shape=(3, 4),
-                                                         classification=True,
-                                                         num_classes=2)
-    y_train = to_categorical(y_train)
-    y_test = to_categorical(y_test)
-
-    model = Sequential()
-    model.add(layers.GRU(8,
-                         input_shape=(x_train.shape[1], x_train.shape[2])))
-    model.add(layers.Dense(y_train.shape[-1], activation='softmax'))
-    model.compile(loss='categorical_crossentropy',
-                  optimizer='rmsprop',
-                  metrics=['accuracy'])
-    model.summary()
-    history = model.fit(x_train, y_train, epochs=5, batch_size=10,
-                        validation_data=(x_test, y_test),
-                        verbose=0)
-    assert(history.history['accuracy'][-1] >= 0.8)
-    config = model.get_config()
-    model = Sequential.from_config(config)
-
-
-def test_temporal_classification_functional():
-    '''
-    Classify temporal sequences of float numbers
-    of length 3 into 2 classes using
-    single layer of GRU units and softmax applied
-    to the last activations of the units
-    '''
-    np.random.seed(1337)
-    (x_train, y_train), (x_test, y_test) = get_test_data(num_train=200,
-                                                         num_test=20,
-                                                         input_shape=(3, 4),
-                                                         classification=True,
-                                                         num_classes=2)
-    y_train = to_categorical(y_train)
-    y_test = to_categorical(y_test)
-
-    inputs = layers.Input(shape=(x_train.shape[1], x_train.shape[2]))
-    x = layers.SimpleRNN(8)(inputs)
-    outputs = layers.Dense(y_train.shape[-1], activation='softmax')(x)
-    model = keras.models.Model(inputs, outputs)
-    model.compile(loss='categorical_crossentropy',
-                  optimizer='rmsprop',
-                  metrics=['accuracy'])
-    history = model.fit(x_train, y_train, epochs=5, batch_size=10,
-                        validation_data=(x_test, y_test),
-                        verbose=0)
-    assert(history.history['accuracy'][-1] >= 0.6)
-
-
-def test_temporal_regression():
-    '''
-    Predict float numbers (regression) based on sequences
-    of float numbers of length 3 using a single layer of GRU units
-    '''
-    np.random.seed(1337)
-    (x_train, y_train), (x_test, y_test) = get_test_data(num_train=200,
-                                                         num_test=20,
-                                                         input_shape=(3, 5),
-                                                         output_shape=(2,),
-                                                         classification=False)
-    model = Sequential()
-    model.add(layers.LSTM(y_train.shape[-1],
-                          input_shape=(x_train.shape[1], x_train.shape[2])))
-    model.compile(loss='hinge', optimizer='adam')
-    history = model.fit(x_train, y_train, epochs=5, batch_size=16,
-                        validation_data=(x_test, y_test), verbose=0)
-    assert(history.history['loss'][-1] < 1.1)
-
-
-def test_3d_to_3d():
-    '''
-    Apply a same Dense layer for each element of time dimension of the input
-    and make predictions of the output sequence elements.
-    This does not make use of the temporal structure of the sequence
-    (see TimeDistributedDense for more details)
-    '''
-    np.random.seed(1337)
-    (x_train, y_train), (x_test, y_test) = get_test_data(num_train=100,
-                                                         num_test=20,
-                                                         input_shape=(3, 5),
-                                                         output_shape=(3, 5),
-                                                         classification=False)
-
-    model = Sequential()
-    model.add(layers.TimeDistributed(
-        layers.Dense(y_train.shape[-1]), input_shape=x_train.shape[1:3]))
-    model.compile(loss='hinge', optimizer='rmsprop')
-    history = model.fit(x_train, y_train, epochs=20, batch_size=16,
-                        validation_data=(x_test, y_test), verbose=0)
-    assert(history.history['loss'][-1] < 1.2)
-
-
-def test_stacked_lstm_char_prediction():
-    '''
-    Learn alphabetical char sequence with stacked LSTM.
-    Predict the whole alphabet based on the first two letters ('ab' -> 'ab...z')
-    See non-toy example in examples/lstm_text_generation.py
-    '''
-    # generate alphabet:
-    # http://stackoverflow.com/questions/16060899/alphabet-range-python
-    alphabet = string.ascii_lowercase
-    number_of_chars = len(alphabet)
-
-    # generate char sequences of length 'sequence_length' out of alphabet and
-    # store the next char as label (e.g. 'ab'->'c')
-    sequence_length = 2
-    sentences = [alphabet[i: i + sequence_length]
-                 for i in range(len(alphabet) - sequence_length)]
-    next_chars = [alphabet[i + sequence_length]
-                  for i in range(len(alphabet) - sequence_length)]
-
-    # Transform sequences and labels into 'one-hot' encoding
-    x = np.zeros((len(sentences), sequence_length, number_of_chars), dtype=np.bool)
-    y = np.zeros((len(sentences), number_of_chars), dtype=np.bool)
-    for i, sentence in enumerate(sentences):
-        for t, char in enumerate(sentence):
-            x[i, t, ord(char) - ord('a')] = 1
-        y[i, ord(next_chars[i]) - ord('a')] = 1
-
-    # learn the alphabet with stacked LSTM
-    model = Sequential([
-        layers.LSTM(16, return_sequences=True,
-                    input_shape=(sequence_length, number_of_chars)),
-        layers.LSTM(16, return_sequences=False),
-        layers.Dense(number_of_chars, activation='softmax')
-    ])
-    model.compile(loss='categorical_crossentropy', optimizer='adam')
-    model.fit(x, y, batch_size=1, epochs=60, verbose=1)
-
-    # prime the model with 'ab' sequence and let it generate the learned alphabet
-    sentence = alphabet[:sequence_length]
-    generated = sentence
-    for iteration in range(number_of_chars - sequence_length):
-        x = np.zeros((1, sequence_length, number_of_chars))
-        for t, char in enumerate(sentence):
-            x[0, t, ord(char) - ord('a')] = 1.
-        preds = model.predict(x, verbose=0)[0]
-        next_char = chr(np.argmax(preds) + ord('a'))
-        generated += next_char
-        sentence = sentence[1:] + next_char
-
-    # check that it did generate the alphabet correctly
-    assert(generated == alphabet)
-
-
-@pytest.mark.skipif(K.backend() != 'tensorflow', reason='Requires TF backend')
-def test_embedding_with_clipnorm():
-    model = Sequential()
-    model.add(layers.Embedding(input_dim=1, output_dim=1))
-    model.compile(optimizer=optimizers.SGD(clipnorm=0.1), loss='mse')
-    model.fit(np.array([[0]]), np.array([[[0.5]]]), epochs=1)
-
-
-if __name__ == '__main__':
-    pytest.main([__file__])
diff --git a/tests/integration_tests/test_tensorflow_integration.py b/tests/integration_tests/test_tensorflow_integration.py
deleted file mode 100644
index e9925c91209..00000000000
--- a/tests/integration_tests/test_tensorflow_integration.py
+++ /dev/null
@@ -1,55 +0,0 @@
-from __future__ import print_function
-
-import os
-import tempfile
-import pytest
-import keras
-from keras import layers
-from keras.utils.test_utils import get_test_data
-
-
-@pytest.mark.skipif(keras.backend.backend() != 'tensorflow',
-                    reason='Requires TF backend')
-def test_tf_optimizer():
-    import tensorflow as tf
-
-    num_hidden = 10
-    output_dim = 2
-    input_dim = 10
-    target = 0.8
-
-    if tf.__version__.startswith('1.'):
-        optimizer = tf.train.AdadeltaOptimizer(
-            learning_rate=1., rho=0.95, epsilon=1e-08)
-    else:
-        optimizer = tf.keras.optimizers.Adadelta(
-            learning_rate=1., rho=0.95, epsilon=1e-08)
-
-    (x_train, y_train), (x_test, y_test) = get_test_data(
-        num_train=1000, num_test=200,
-        input_shape=(input_dim,),
-        classification=True, num_classes=output_dim)
-
-    model = keras.Sequential()
-    model.add(layers.Dense(num_hidden,
-                           activation='relu',
-                           input_shape=(input_dim,)))
-    model.add(layers.Dense(output_dim, activation='softmax'))
-
-    model.compile(loss='sparse_categorical_crossentropy',
-                  optimizer=optimizer,
-                  metrics=['accuracy'])
-    history = model.fit(x_train, y_train, epochs=8, batch_size=16,
-                        validation_data=(x_test, y_test), verbose=2)
-    assert history.history['val_accuracy'][-1] >= target
-
-    # Test saving.
-    _, fname = tempfile.mkstemp('.h5')
-    model.save(fname)
-    model = keras.models.load_model(fname)
-    assert len(model.weights) == 4
-    os.remove(fname)
-
-
-if __name__ == '__main__':
-    pytest.main([__file__])
diff --git a/tests/integration_tests/test_vector_data_tasks.py b/tests/integration_tests/test_vector_data_tasks.py
deleted file mode 100644
index eecf8b2d3d9..00000000000
--- a/tests/integration_tests/test_vector_data_tasks.py
+++ /dev/null
@@ -1,90 +0,0 @@
-from __future__ import print_function
-import pytest
-
-from keras.utils.test_utils import get_test_data
-from keras.models import Sequential
-from keras import layers
-import keras
-from keras.utils.np_utils import to_categorical
-
-num_classes = 2
-
-
-def test_vector_classification():
-    '''
-    Classify random float vectors into 2 classes with logistic regression
-    using 2 layer neural network with ReLU hidden units.
-    '''
-    (x_train, y_train), (x_test, y_test) = get_test_data(num_train=500,
-                                                         num_test=200,
-                                                         input_shape=(20,),
-                                                         classification=True,
-                                                         num_classes=num_classes)
-    y_train = to_categorical(y_train)
-    y_test = to_categorical(y_test)
-
-    # Test with Sequential API
-    model = Sequential([
-        layers.Dense(16, input_shape=(x_train.shape[-1],), activation='relu'),
-        layers.Dense(8),
-        layers.Activation('relu'),
-        layers.Dense(num_classes, activation='softmax')
-    ])
-    model.compile(loss='categorical_crossentropy',
-                  optimizer=keras.optimizers.Adam(1e-3),
-                  metrics=['accuracy'])
-    model.summary()
-    history = model.fit(x_train, y_train, epochs=15, batch_size=16,
-                        validation_data=(x_test, y_test),
-                        verbose=0)
-    assert(history.history['val_accuracy'][-1] > 0.8)
-    config = model.get_config()
-    model = Sequential.from_config(config)
-
-
-def test_vector_classification_functional():
-    (x_train, y_train), _ = get_test_data(num_train=500,
-                                          num_test=200,
-                                          input_shape=(20,),
-                                          classification=True,
-                                          num_classes=num_classes)
-    # Test with functional API
-    inputs = layers.Input(shape=(x_train.shape[-1],))
-    x = layers.Dense(16, activation=keras.activations.relu)(inputs)
-    x = layers.Dense(8)(x)
-    x = layers.Activation('relu')(x)
-    outputs = layers.Dense(num_classes, activation='softmax')(x)
-    model = keras.models.Model(inputs, outputs)
-    model.compile(loss=keras.losses.sparse_categorical_crossentropy,
-                  optimizer=keras.optimizers.Adam(1e-3),
-                  metrics=['accuracy'])
-    history = model.fit(x_train, y_train, epochs=15, batch_size=16,
-                        validation_data=(x_train, y_train),
-                        verbose=0)
-    assert(history.history['val_accuracy'][-1] > 0.8)
-
-
-def test_vector_regression():
-    '''
-    Perform float data prediction (regression) using 2 layer MLP
-    with tanh and sigmoid activations.
-    '''
-    (x_train, y_train), (x_test, y_test) = get_test_data(num_train=500,
-                                                         num_test=200,
-                                                         input_shape=(20,),
-                                                         output_shape=(num_classes,),
-                                                         classification=False)
-
-    model = Sequential([
-        layers.Dense(16, input_shape=(x_train.shape[-1],), activation='tanh'),
-        layers.Dense(num_classes)
-    ])
-
-    model.compile(loss='hinge', optimizer=keras.optimizers.Adam(1e-3))
-    history = model.fit(x_train, y_train, epochs=20, batch_size=16,
-                        validation_data=(x_test, y_test), verbose=0)
-    assert (history.history['val_loss'][-1] < 0.9)
-
-
-if __name__ == '__main__':
-    pytest.main([__file__])
diff --git a/tests/keras/activations_test.py b/tests/keras/activations_test.py
deleted file mode 100644
index 32bccc226c7..00000000000
--- a/tests/keras/activations_test.py
+++ /dev/null
@@ -1,248 +0,0 @@
-import pytest
-import numpy as np
-from numpy.testing import assert_allclose
-
-from keras import backend as K
-from keras import activations
-
-from keras.layers.core import Dense
-
-
-def get_standard_values():
-    """A set of floats used for testing the activations.
-    """
-    return np.array([[0, 0.1, 0.5, 0.9, 1.0]], dtype=K.floatx())
-
-
-def test_serialization():
-    all_activations = ['softmax', 'relu', 'elu', 'tanh',
-                       'sigmoid', 'hard_sigmoid', 'linear',
-                       'softplus', 'softsign', 'selu']
-    for name in all_activations:
-        fn = activations.get(name)
-        ref_fn = getattr(activations, name)
-        assert fn == ref_fn
-        config = activations.serialize(fn)
-        fn = activations.deserialize(config)
-        assert fn == ref_fn
-
-
-def test_get_fn():
-    """Activations has a convenience "get" function. All paths of this
-    function are tested here, although the behaviour in some instances
-    seems potentially surprising (e.g. situation 3)
-    """
-
-    # 1. Default returns linear
-    a = activations.get(None)
-    assert a == activations.linear
-
-    # # 2. Passing in a layer raises a warning
-    # layer = Dense(32)
-    # with pytest.warns(UserWarning):
-    #     a = activations.get(layer)
-
-    # 3. Callables return themselves
-    a = activations.get(lambda x: 5)
-    assert a(None) == 5
-
-    # 4. Anything else is not a valid argument
-    with pytest.raises(TypeError):
-        a = activations.get(6)
-
-
-def test_softmax_valid():
-    """Test using a reference implementation of softmax.
-    """
-    def softmax(values):
-        m = np.max(values)
-        e = np.exp(values - m)
-        return e / np.sum(e)
-
-    x = K.placeholder(ndim=2)
-    f = K.function([x], [activations.softmax(x)])
-    test_values = get_standard_values()
-
-    result = f([test_values])[0]
-    expected = softmax(test_values)
-    assert_allclose(result, expected, rtol=1e-05)
-
-
-def test_softmax_invalid():
-    """Test for the expected exception behaviour on invalid input
-    """
-
-    x = K.placeholder(ndim=1)
-
-    # One dimensional arrays are supposed to raise a value error
-    with pytest.raises(ValueError):
-        f = K.function([x], [activations.softmax(x)])
-
-
-def test_softmax_3d():
-    """Test using a reference implementation of softmax.
-    """
-    def softmax(values, axis):
-        m = np.max(values, axis=axis, keepdims=True)
-        e = np.exp(values - m)
-        return e / np.sum(e, axis=axis, keepdims=True)
-
-    x = K.placeholder(ndim=3)
-    f = K.function([x], [activations.softmax(x, axis=1)])
-    test_values = get_standard_values()[:, :, np.newaxis].copy()
-
-    result = f([test_values])[0]
-    expected = softmax(test_values, axis=1)
-    assert_allclose(result, expected, rtol=1e-05)
-
-
-def test_time_distributed_softmax():
-    x = K.placeholder(shape=(1, 1, 5))
-    f = K.function([x], [activations.softmax(x)])
-    test_values = get_standard_values()
-    test_values = np.reshape(test_values, (1, 1, np.size(test_values)))
-    f([test_values])[0]
-
-
-def test_softplus():
-    """Test using a reference softplus implementation.
-    """
-    def softplus(x):
-        return np.log(np.ones_like(x) + np.exp(x))
-
-    x = K.placeholder(ndim=2)
-    f = K.function([x], [activations.softplus(x)])
-    test_values = get_standard_values()
-
-    result = f([test_values])[0]
-    expected = softplus(test_values)
-    assert_allclose(result, expected, rtol=1e-05)
-
-
-def test_softsign():
-    """Test using a reference softsign implementation.
-    """
-    def softsign(x):
-        return np.divide(x, np.ones_like(x) + np.absolute(x))
-
-    x = K.placeholder(ndim=2)
-    f = K.function([x], [activations.softsign(x)])
-    test_values = get_standard_values()
-
-    result = f([test_values])[0]
-    expected = softsign(test_values)
-    assert_allclose(result, expected, rtol=1e-05)
-
-
-def test_sigmoid():
-    """Test using a numerically stable reference sigmoid implementation.
-    """
-    def ref_sigmoid(x):
-        if x >= 0:
-            return 1 / (1 + np.exp(-x))
-        else:
-            z = np.exp(x)
-            return z / (1 + z)
-    sigmoid = np.vectorize(ref_sigmoid)
-
-    x = K.placeholder(ndim=2)
-    f = K.function([x], [activations.sigmoid(x)])
-    test_values = get_standard_values()
-
-    result = f([test_values])[0]
-    expected = sigmoid(test_values)
-    assert_allclose(result, expected, rtol=1e-05)
-
-
-def test_hard_sigmoid():
-    """Test using a reference hard sigmoid implementation.
-    """
-    def ref_hard_sigmoid(x):
-        x = (x * 0.2) + 0.5
-        z = 0.0 if x <= 0 else (1.0 if x >= 1 else x)
-        return z
-    hard_sigmoid = np.vectorize(ref_hard_sigmoid)
-
-    x = K.placeholder(ndim=2)
-    f = K.function([x], [activations.hard_sigmoid(x)])
-    test_values = get_standard_values()
-
-    result = f([test_values])[0]
-    expected = hard_sigmoid(test_values)
-    assert_allclose(result, expected, rtol=1e-05)
-
-
-def test_relu():
-    x = K.placeholder(ndim=2)
-    f = K.function([x], [activations.relu(x)])
-
-    test_values = get_standard_values()
-    result = f([test_values])[0]
-    assert_allclose(result, test_values, rtol=1e-05)
-
-    # Test max_value
-    test_values = np.array([[0.5, 1.5]], dtype=K.floatx())
-    f = K.function([x], [activations.relu(x, max_value=1.)])
-    result = f([test_values])[0]
-    assert np.max(result) <= 1.
-
-    # Test max_value == 6.
-    test_values = np.array([[0.5, 6.]], dtype=K.floatx())
-    f = K.function([x], [activations.relu(x, max_value=1.)])
-    result = f([test_values])[0]
-    assert np.max(result) <= 6.
-
-
-def test_elu():
-    x = K.placeholder(ndim=2)
-    f = K.function([x], [activations.elu(x, 0.5)])
-
-    test_values = get_standard_values()
-    result = f([test_values])[0]
-    assert_allclose(result, test_values, rtol=1e-05)
-
-    negative_values = np.array([[-1, -2]], dtype=K.floatx())
-    result = f([negative_values])[0]
-    true_result = (np.exp(negative_values) - 1) / 2
-
-    assert_allclose(result, true_result)
-
-
-def test_selu():
-    x = K.placeholder(ndim=2)
-    f = K.function([x], [activations.selu(x)])
-    alpha = 1.6732632423543772848170429916717
-    scale = 1.0507009873554804934193349852946
-
-    positive_values = get_standard_values()
-    result = f([positive_values])[0]
-    assert_allclose(result, positive_values * scale, rtol=1e-05)
-
-    negative_values = np.array([[-1, -2]], dtype=K.floatx())
-
-    result = f([negative_values])[0]
-    true_result = (np.exp(negative_values) - 1) * scale * alpha
-
-    assert_allclose(result, true_result)
-
-
-def test_tanh():
-    test_values = get_standard_values()
-
-    x = K.placeholder(ndim=2)
-    exp = activations.tanh(x)
-    f = K.function([x], [exp])
-
-    result = f([test_values])[0]
-    expected = np.tanh(test_values)
-    assert_allclose(result, expected, rtol=1e-05)
-
-
-def test_linear():
-    xs = [1, 5, True, None]
-    for x in xs:
-        assert(x == activations.linear(x))
-
-
-if __name__ == '__main__':
-    pytest.main([__file__])
diff --git a/tests/keras/callbacks/callbacks_test.py b/tests/keras/callbacks/callbacks_test.py
deleted file mode 100644
index fc5162ae6e3..00000000000
--- a/tests/keras/callbacks/callbacks_test.py
+++ /dev/null
@@ -1,998 +0,0 @@
-import os
-import multiprocessing
-
-import numpy as np
-import pytest
-from numpy.testing import assert_allclose
-from csv import reader
-from csv import Sniffer
-import shutil
-from collections import defaultdict
-from keras import optimizers
-from keras import initializers
-from keras import callbacks
-from keras.models import Sequential, Model
-from keras.layers import Input, Dense, Dropout, add, dot, Lambda, Layer
-from keras.layers import Conv2D
-from keras.layers import MaxPooling2D
-from keras.layers import GlobalAveragePooling1D
-from keras.layers import GlobalAveragePooling2D
-from keras.layers import BatchNormalization
-from keras.utils.test_utils import get_test_data
-from keras.utils.generic_utils import to_list
-from keras.utils.generic_utils import unpack_singleton
-from keras import backend as K
-from keras.utils import np_utils
-
-try:
-    from unittest.mock import patch
-except ImportError:
-    from mock import patch
-
-
-input_dim = 2
-num_hidden = 4
-num_classes = 2
-batch_size = 5
-train_samples = 20
-test_samples = 20
-
-
-def data_generator(x, y, batch_size):
-    x = to_list(x)
-    y = to_list(y)
-    max_batch_index = len(x[0]) // batch_size
-    i = 0
-    while 1:
-        x_batch = [array[i * batch_size: (i + 1) * batch_size] for array in x]
-        x_batch = unpack_singleton(x_batch)
-
-        y_batch = [array[i * batch_size: (i + 1) * batch_size] for array in y]
-        y_batch = unpack_singleton(y_batch)
-        yield x_batch, y_batch
-        i += 1
-        i = i % max_batch_index
-
-
-# Changing the default arguments of get_test_data.
-def get_data_callbacks(num_train=train_samples,
-                       num_test=test_samples,
-                       input_shape=(input_dim,),
-                       classification=True,
-                       num_classes=num_classes):
-    return get_test_data(num_train=num_train,
-                         num_test=num_test,
-                         input_shape=input_shape,
-                         classification=classification,
-                         num_classes=num_classes)
-
-
-class Counter(callbacks.Callback):
-    """Counts the number of times each callback method was run.
-
-    # Arguments
-        method_counts: dict, contains the counts of time
-            each callback method was run.
-    """
-
-    def __init__(self):
-        self.method_counts = defaultdict(int)
-        methods_to_count = [
-            'on_batch_begin', 'on_batch_end', 'on_epoch_begin', 'on_epoch_end',
-            'on_train_batch_begin', 'on_train_batch_end',
-            'on_test_batch_begin', 'on_test_batch_end',
-            'on_predict_batch_begin', 'on_predict_batch_end',
-            'on_train_begin', 'on_train_end',
-            'on_predict_begin', 'on_predict_end',
-            'on_test_begin', 'on_test_end',
-        ]
-        for method_name in methods_to_count:
-            setattr(self, method_name,
-                    self.wrap_with_counts(
-                        method_name, getattr(self, method_name)))
-
-    def wrap_with_counts(self, method_name, method):
-
-        def _call_and_count(*args, **kwargs):
-            self.method_counts[method_name] += 1
-            return method(*args, **kwargs)
-
-        return _call_and_count
-
-
-class TestCallbackCounts(object):
-
-    def _check_counts(self, counter, expected_counts):
-        """Checks that counts registered by `counter` are those expected."""
-        for method_name, expected_count in expected_counts.items():
-            count = counter.method_counts[method_name]
-            assert count == expected_count, \
-                'For method {}: expected {}, got: {}'.format(
-                    method_name, expected_count, count)
-
-    def _get_model(self):
-        layers = [
-            Dense(10, activation='relu', input_dim=input_dim),
-            Dense(num_classes, activation='softmax')
-        ]
-        model = Sequential(layers=layers)
-        model.compile(optimizer='adam', loss='binary_crossentropy')
-        return model
-
-    def test_callback_hooks_are_called_in_fit(self):
-        np.random.seed(1337)
-        (X_train, y_train), (X_test, y_test) = get_data_callbacks(num_train=10,
-                                                                  num_test=4)
-        y_train = np_utils.to_categorical(y_train)
-        y_test = np_utils.to_categorical(y_test)
-
-        model = self._get_model()
-        counter = Counter()
-        model.fit(X_train, y_train, validation_data=(X_test, y_test),
-                  batch_size=2, epochs=5, callbacks=[counter])
-
-        self._check_counts(
-            counter, {
-                'on_batch_begin': 25,
-                'on_batch_end': 25,
-                'on_epoch_begin': 5,
-                'on_epoch_end': 5,
-                'on_predict_batch_begin': 0,
-                'on_predict_batch_end': 0,
-                'on_predict_begin': 0,
-                'on_predict_end': 0,
-                'on_test_batch_begin': 10,
-                'on_test_batch_end': 10,
-                'on_test_begin': 5,
-                'on_test_end': 5,
-                'on_train_batch_begin': 25,
-                'on_train_batch_end': 25,
-                'on_train_begin': 1,
-                'on_train_end': 1,
-            })
-
-    def test_callback_hooks_are_called_in_evaluate(self):
-        np.random.seed(1337)
-        (_, _), (X_test, y_test) = get_data_callbacks(num_test=10)
-
-        y_test = np_utils.to_categorical(y_test)
-
-        model = self._get_model()
-        counter = Counter()
-        model.evaluate(X_test, y_test, batch_size=2, callbacks=[counter])
-        self._check_counts(
-            counter, {
-                'on_test_batch_begin': 5,
-                'on_test_batch_end': 5,
-                'on_test_begin': 1,
-                'on_test_end': 1,
-                'on_batch_begin': 0,
-                'on_batch_end': 0,
-                'on_epoch_begin': 0,
-                'on_epoch_end': 0,
-                'on_predict_batch_begin': 0,
-                'on_predict_batch_end': 0,
-                'on_predict_begin': 0,
-                'on_predict_end': 0,
-                'on_train_batch_begin': 0,
-                'on_train_batch_end': 0,
-                'on_train_begin': 0,
-                'on_train_end': 0,
-            })
-
-    def test_callback_hooks_are_called_in_predict(self):
-        np.random.seed(1337)
-        (_, _), (X_test, _) = get_data_callbacks(num_test=10)
-
-        model = self._get_model()
-        counter = Counter()
-        model.predict(X_test, batch_size=2, callbacks=[counter])
-        self._check_counts(
-            counter, {
-                'on_predict_batch_begin': 5,
-                'on_predict_batch_end': 5,
-                'on_predict_begin': 1,
-                'on_predict_end': 1,
-                'on_batch_begin': 0,
-                'on_batch_end': 0,
-                'on_epoch_begin': 0,
-                'on_epoch_end': 0,
-                'on_test_batch_begin': 0,
-                'on_test_batch_end': 0,
-                'on_test_begin': 0,
-                'on_test_end': 0,
-                'on_train_batch_begin': 0,
-                'on_train_batch_end': 0,
-                'on_train_begin': 0,
-                'on_train_end': 0,
-            })
-
-    def test_callback_hooks_are_called_in_fit_generator(self):
-        np.random.seed(1337)
-        (X_train, y_train), (X_test, y_test) = get_data_callbacks(num_train=10,
-                                                                  num_test=4)
-        y_train = np_utils.to_categorical(y_train)
-        y_test = np_utils.to_categorical(y_test)
-        train_generator = data_generator(X_train, y_train, batch_size=2)
-        validation_generator = data_generator(X_test, y_test, batch_size=2)
-
-        model = self._get_model()
-        counter = Counter()
-        model.fit_generator(train_generator,
-                            steps_per_epoch=len(X_train) // 2,
-                            epochs=5,
-                            validation_data=validation_generator,
-                            validation_steps=len(X_test) // 2,
-                            callbacks=[counter])
-
-        self._check_counts(
-            counter, {
-                'on_batch_begin': 25,
-                'on_batch_end': 25,
-                'on_epoch_begin': 5,
-                'on_epoch_end': 5,
-                'on_predict_batch_begin': 0,
-                'on_predict_batch_end': 0,
-                'on_predict_begin': 0,
-                'on_predict_end': 0,
-                'on_test_batch_begin': 10,
-                'on_test_batch_end': 10,
-                'on_test_begin': 5,
-                'on_test_end': 5,
-                'on_train_batch_begin': 25,
-                'on_train_batch_end': 25,
-                'on_train_begin': 1,
-                'on_train_end': 1,
-            })
-
-    def test_callback_hooks_are_called_in_evaluate_generator(self):
-        np.random.seed(1337)
-        (_, _), (X_test, y_test) = get_data_callbacks(num_test=10)
-        y_test = np_utils.to_categorical(y_test)
-
-        model = self._get_model()
-        counter = Counter()
-        model.evaluate_generator(data_generator(X_test, y_test, batch_size=2),
-                                 steps=len(X_test) // 2, callbacks=[counter])
-        self._check_counts(
-            counter, {
-                'on_test_batch_begin': 5,
-                'on_test_batch_end': 5,
-                'on_test_begin': 1,
-                'on_test_end': 1,
-                'on_batch_begin': 0,
-                'on_batch_end': 0,
-                'on_epoch_begin': 0,
-                'on_epoch_end': 0,
-                'on_predict_batch_begin': 0,
-                'on_predict_batch_end': 0,
-                'on_predict_begin': 0,
-                'on_predict_end': 0,
-                'on_train_batch_begin': 0,
-                'on_train_batch_end': 0,
-                'on_train_begin': 0,
-                'on_train_end': 0,
-            })
-
-    def test_callback_hooks_are_called_in_predict_generator(self):
-        np.random.seed(1337)
-        (_, _), (X_test, _) = get_data_callbacks(num_test=10)
-
-        def data_generator(x, batch_size):
-            x = to_list(x)
-            max_batch_index = len(x[0]) // batch_size
-            i = 0
-            while 1:
-                x_batch = [
-                    array[i * batch_size: (i + 1) * batch_size] for array in x]
-                x_batch = unpack_singleton(x_batch)
-
-                yield x_batch
-                i += 1
-                i = i % max_batch_index
-
-        model = self._get_model()
-        counter = Counter()
-        model.predict_generator(data_generator(X_test, batch_size=2),
-                                steps=len(X_test) // 2, callbacks=[counter])
-        self._check_counts(
-            counter, {
-                'on_predict_batch_begin': 5,
-                'on_predict_batch_end': 5,
-                'on_predict_begin': 1,
-                'on_predict_end': 1,
-                'on_batch_begin': 0,
-                'on_batch_end': 0,
-                'on_epoch_begin': 0,
-                'on_epoch_end': 0,
-                'on_test_batch_begin': 0,
-                'on_test_batch_end': 0,
-                'on_test_begin': 0,
-                'on_test_end': 0,
-                'on_train_batch_begin': 0,
-                'on_train_batch_end': 0,
-                'on_train_begin': 0,
-                'on_train_end': 0,
-            })
-
-    def test_callback_list_methods(self):
-        counter = Counter()
-        callback_list = callbacks.CallbackList([counter])
-
-        batch = 0
-        callback_list.on_test_batch_begin(batch)
-        callback_list.on_test_batch_end(batch)
-        callback_list.on_predict_batch_begin(batch)
-        callback_list.on_predict_batch_end(batch)
-
-        self._check_counts(
-            counter, {
-                'on_test_batch_begin': 1,
-                'on_test_batch_end': 1,
-                'on_predict_batch_begin': 1,
-                'on_predict_batch_end': 1,
-                'on_predict_begin': 0,
-                'on_predict_end': 0,
-                'on_batch_begin': 0,
-                'on_batch_end': 0,
-                'on_epoch_begin': 0,
-                'on_epoch_end': 0,
-                'on_test_begin': 0,
-                'on_test_end': 0,
-                'on_train_batch_begin': 0,
-                'on_train_batch_end': 0,
-                'on_train_begin': 0,
-                'on_train_end': 0,
-            })
-
-
-def test_TerminateOnNaN():
-    np.random.seed(1337)
-    (X_train, y_train), (X_test, y_test) = get_data_callbacks()
-
-    y_test = np_utils.to_categorical(y_test)
-    y_train = np_utils.to_categorical(y_train)
-    cbks = [callbacks.TerminateOnNaN()]
-    model = Sequential()
-    initializer = initializers.Constant(value=1e5)
-    for _ in range(5):
-        model.add(Dense(num_hidden, input_dim=input_dim, activation='relu',
-                        kernel_initializer=initializer))
-    model.add(Dense(num_classes, activation='linear'))
-    model.compile(loss='mean_squared_error',
-                  optimizer='rmsprop')
-
-    # case 1 fit
-    history = model.fit(X_train, y_train,
-                        batch_size=batch_size,
-                        validation_data=(X_test, y_test),
-                        callbacks=cbks,
-                        epochs=20)
-    loss = history.history['loss']
-    assert len(loss) == 1
-    assert np.isnan(loss[0])
-    assert len(history.history) < 20
-
-    history = model.fit_generator(data_generator(X_train, y_train, batch_size),
-                                  len(X_train),
-                                  validation_data=(X_test, y_test),
-                                  callbacks=cbks,
-                                  epochs=20)
-    loss = history.history['loss']
-    assert len(loss) == 1
-    assert np.isnan(loss[0])
-    assert len(history.history) < 20
-
-
-def test_stop_training_csv(tmpdir):
-    np.random.seed(1337)
-    fp = str(tmpdir / 'test.csv')
-    (X_train, y_train), (X_test, y_test) = get_data_callbacks()
-
-    y_test = np_utils.to_categorical(y_test)
-    y_train = np_utils.to_categorical(y_train)
-    cbks = [callbacks.TerminateOnNaN(), callbacks.CSVLogger(fp)]
-    model = Sequential()
-    for _ in range(5):
-        model.add(Dense(num_hidden, input_dim=input_dim, activation='relu'))
-    model.add(Dense(num_classes, activation='linear'))
-    model.compile(loss='mean_squared_error',
-                  optimizer='rmsprop')
-
-    def data_generator():
-        i = 0
-        max_batch_index = len(X_train) // batch_size
-        tot = 0
-        while 1:
-            if tot > 3 * len(X_train):
-                yield (np.ones([batch_size, input_dim]) * np.nan,
-                       np.ones([batch_size, num_classes]) * np.nan)
-            else:
-                yield (X_train[i * batch_size: (i + 1) * batch_size],
-                       y_train[i * batch_size: (i + 1) * batch_size])
-            i += 1
-            tot += 1
-            i = i % max_batch_index
-
-    history = model.fit_generator(data_generator(),
-                                  len(X_train) // batch_size,
-                                  validation_data=(X_test, y_test),
-                                  callbacks=cbks,
-                                  epochs=20)
-    loss = history.history['loss']
-    assert len(loss) > 1
-    assert loss[-1] == np.inf or np.isnan(loss[-1])
-
-    values = []
-    with open(fp) as f:
-        for x in reader(f):
-            values.append(x)
-
-    assert 'nan' in values[-1], 'The last epoch was not logged.'
-    os.remove(fp)
-
-
-def test_ModelCheckpoint(tmpdir):
-    np.random.seed(1337)
-    filepath = str(tmpdir / 'checkpoint.h5')
-    (X_train, y_train), (X_test, y_test) = get_data_callbacks()
-    y_test = np_utils.to_categorical(y_test)
-    y_train = np_utils.to_categorical(y_train)
-    # case 1
-    monitor = 'val_loss'
-    save_best_only = False
-    mode = 'auto'
-
-    model = Sequential()
-    model.add(Dense(num_hidden, input_dim=input_dim, activation='relu'))
-    model.add(Dense(num_classes, activation='softmax'))
-    model.compile(loss='categorical_crossentropy',
-                  optimizer='rmsprop',
-                  metrics=['accuracy'])
-
-    cbks = [callbacks.ModelCheckpoint(filepath, monitor=monitor,
-                                      save_best_only=save_best_only, mode=mode)]
-    model.fit(X_train, y_train, batch_size=batch_size,
-              validation_data=(X_test, y_test), callbacks=cbks, epochs=1)
-    assert os.path.isfile(filepath)
-    os.remove(filepath)
-
-    # case 2
-    mode = 'min'
-    cbks = [callbacks.ModelCheckpoint(filepath, monitor=monitor,
-                                      save_best_only=save_best_only, mode=mode)]
-    model.fit(X_train, y_train, batch_size=batch_size,
-              validation_data=(X_test, y_test), callbacks=cbks, epochs=1)
-    assert os.path.isfile(filepath)
-    os.remove(filepath)
-
-    # case 3
-    mode = 'max'
-    monitor = 'val_accuracy'
-    cbks = [callbacks.ModelCheckpoint(filepath,
-                                      monitor=monitor,
-                                      save_best_only=save_best_only,
-                                      mode=mode)]
-    model.fit(X_train, y_train, batch_size=batch_size,
-              validation_data=(X_test, y_test), callbacks=cbks, epochs=1)
-    assert os.path.isfile(filepath)
-    os.remove(filepath)
-
-    # case 4
-    save_best_only = True
-    cbks = [callbacks.ModelCheckpoint(filepath,
-                                      monitor=monitor,
-                                      save_best_only=save_best_only,
-                                      mode=mode)]
-    model.fit(X_train, y_train, batch_size=batch_size,
-              validation_data=(X_test, y_test), callbacks=cbks, epochs=1)
-    assert os.path.isfile(filepath)
-    os.remove(filepath)
-
-    # case 5
-    save_best_only = False
-    period = 2
-    mode = 'auto'
-    filepath = 'checkpoint.{epoch:02d}.h5'
-    cbks = [callbacks.ModelCheckpoint(filepath,
-                                      monitor=monitor,
-                                      save_best_only=save_best_only,
-                                      mode=mode,
-                                      period=period)]
-    model.fit(X_train, y_train, batch_size=batch_size,
-              validation_data=(X_test, y_test), callbacks=cbks, epochs=4)
-    assert os.path.isfile(filepath.format(epoch=2))
-    assert os.path.isfile(filepath.format(epoch=4))
-    assert not os.path.exists(filepath.format(epoch=1))
-    assert not os.path.exists(filepath.format(epoch=3))
-    os.remove(filepath.format(epoch=2))
-    os.remove(filepath.format(epoch=4))
-    assert not tmpdir.listdir()
-
-
-def test_EarlyStopping():
-    np.random.seed(1337)
-    (X_train, y_train), (X_test, y_test) = get_data_callbacks()
-    y_test = np_utils.to_categorical(y_test)
-    y_train = np_utils.to_categorical(y_train)
-    model = Sequential()
-    model.add(Dense(num_hidden, input_dim=input_dim, activation='relu'))
-    model.add(Dense(num_classes, activation='softmax'))
-    model.compile(loss='categorical_crossentropy',
-                  optimizer='rmsprop',
-                  metrics=['accuracy'])
-    mode = 'max'
-    monitor = 'val_acc'
-    patience = 0
-    cbks = [callbacks.EarlyStopping(patience=patience,
-                                    monitor=monitor,
-                                    mode=mode)]
-    history = model.fit(X_train, y_train,
-                        batch_size=batch_size,
-                        validation_data=(X_test, y_test),
-                        callbacks=cbks,
-                        epochs=20)
-    mode = 'auto'
-    monitor = 'val_acc'
-    patience = 2
-    cbks = [callbacks.EarlyStopping(patience=patience,
-                                    monitor=monitor,
-                                    mode=mode)]
-    history = model.fit(X_train, y_train,
-                        batch_size=batch_size,
-                        validation_data=(X_test, y_test),
-                        callbacks=cbks,
-                        epochs=20)
-
-
-def test_EarlyStopping_reuse():
-    np.random.seed(1337)
-    patience = 3
-    data = np.random.random((100, 1))
-    labels = np.where(data > 0.5, 1, 0)
-    model = Sequential((
-        Dense(1, input_dim=1, activation='relu'),
-        Dense(1, activation='sigmoid'),
-    ))
-    model.compile(optimizer='sgd',
-                  loss='binary_crossentropy',
-                  metrics=['accuracy'])
-    stopper = callbacks.EarlyStopping(monitor='acc', patience=patience)
-    weights = model.get_weights()
-
-    hist = model.fit(data, labels, callbacks=[stopper], epochs=20)
-    assert len(hist.epoch) >= patience
-
-    # This should allow training to go for at least `patience` epochs
-    model.set_weights(weights)
-    hist = model.fit(data, labels, callbacks=[stopper], epochs=20)
-    assert len(hist.epoch) >= patience
-
-
-def test_EarlyStopping_patience():
-    class DummyModel(object):
-        def __init__(self):
-            self.stop_training = False
-
-        def get_weights(self):
-            return []
-
-        def set_weights(self, weights):
-            pass
-
-    early_stop = callbacks.EarlyStopping(monitor='val_loss', patience=2)
-    early_stop.model = DummyModel()
-
-    losses = [0.0860, 0.1096, 0.1040, 0.1019]
-
-    # Should stop after epoch 3,
-    # as the loss has not improved after patience=2 epochs.
-    epochs_trained = 0
-    early_stop.on_train_begin()
-
-    for epoch in range(len(losses)):
-        epochs_trained += 1
-        early_stop.on_epoch_end(epoch, logs={'val_loss': losses[epoch]})
-
-        if early_stop.model.stop_training:
-            break
-
-    assert epochs_trained == 3
-
-
-def test_EarlyStopping_baseline():
-    class DummyModel(object):
-        def __init__(self):
-            self.stop_training = False
-
-        def get_weights(self):
-            return []
-
-        def set_weights(self, weights):
-            pass
-
-    def baseline_tester(acc_levels):
-        early_stop = callbacks.EarlyStopping(monitor='val_acc', baseline=0.75,
-                                             patience=2)
-        early_stop.model = DummyModel()
-        epochs_trained = 0
-        early_stop.on_train_begin()
-        for epoch in range(len(acc_levels)):
-            epochs_trained += 1
-            early_stop.on_epoch_end(epoch, logs={'val_acc': acc_levels[epoch]})
-            if early_stop.model.stop_training:
-                break
-        return epochs_trained
-
-    acc_levels = [0.55, 0.76, 0.81, 0.81]
-    baseline_met = baseline_tester(acc_levels)
-    acc_levels = [0.55, 0.74, 0.81, 0.81]
-    baseline_not_met = baseline_tester(acc_levels)
-
-    # All epochs should run because baseline was met in second epoch
-    assert baseline_met == 4
-    # Baseline was not met by second epoch and should stop
-    assert baseline_not_met == 2
-
-
-def test_EarlyStopping_final_weights():
-    class DummyModel(object):
-        def __init__(self):
-            self.stop_training = False
-            self.weights = -1
-
-        def get_weights(self):
-            return self.weights
-
-        def set_weights(self, weights):
-            self.weights = weights
-
-        def set_weight_to_epoch(self, epoch):
-            self.weights = epoch
-
-    early_stop = callbacks.EarlyStopping(monitor='val_loss', patience=2)
-    early_stop.model = DummyModel()
-
-    losses = [0.2, 0.15, 0.1, 0.11, 0.12]
-
-    epochs_trained = 0
-    early_stop.on_train_begin()
-
-    for epoch in range(len(losses)):
-        epochs_trained += 1
-        early_stop.model.set_weight_to_epoch(epoch=epoch)
-        early_stop.on_epoch_end(epoch, logs={'val_loss': losses[epoch]})
-
-        if early_stop.model.stop_training:
-            break
-
-    # The best configuration is in the epoch 2 (loss = 0.1000),
-    # so with patience=2 we need to end up at epoch 4
-    assert early_stop.model.get_weights() == 4
-
-
-def test_EarlyStopping_final_weights_when_restoring_model_weights():
-    class DummyModel(object):
-        def __init__(self):
-            self.stop_training = False
-            self.weights = -1
-
-        def get_weights(self):
-            return self.weights
-
-        def set_weights(self, weights):
-            self.weights = weights
-
-        def set_weight_to_epoch(self, epoch):
-            self.weights = epoch
-
-    early_stop = callbacks.EarlyStopping(monitor='val_loss', patience=2,
-                                         restore_best_weights=True)
-    early_stop.model = DummyModel()
-
-    losses = [0.2, 0.15, 0.1, 0.11, 0.12]
-
-    # The best configuration is in the epoch 2 (loss = 0.1000).
-
-    epochs_trained = 0
-    early_stop.on_train_begin()
-
-    for epoch in range(len(losses)):
-        epochs_trained += 1
-        early_stop.model.set_weight_to_epoch(epoch=epoch)
-        early_stop.on_epoch_end(epoch, logs={'val_loss': losses[epoch]})
-
-        if early_stop.model.stop_training:
-            break
-
-    # The best configuration is in epoch 2 (loss = 0.1000),
-    # and while patience = 2, we're restoring the best weights,
-    # so we end up at the epoch with the best weights, i.e. epoch 2
-    assert early_stop.model.get_weights() == 2
-
-
-def test_LearningRateScheduler():
-    np.random.seed(1337)
-    (X_train, y_train), (X_test, y_test) = get_data_callbacks()
-    y_test = np_utils.to_categorical(y_test)
-    y_train = np_utils.to_categorical(y_train)
-    model = Sequential()
-    model.add(Dense(num_hidden, input_dim=input_dim, activation='relu'))
-    model.add(Dense(num_classes, activation='softmax'))
-    model.compile(loss='categorical_crossentropy',
-                  optimizer='sgd',
-                  metrics=['accuracy'])
-
-    cbks = [callbacks.LearningRateScheduler(lambda x: 1. / (1. + x))]
-    model.fit(X_train, y_train, batch_size=batch_size,
-              validation_data=(X_test, y_test), callbacks=cbks, epochs=5)
-    assert (float(K.get_value(model.optimizer.lr)) - 0.2) < K.epsilon()
-
-
-def test_ReduceLROnPlateau():
-    np.random.seed(1337)
-    (X_train, y_train), (X_test, y_test) = get_data_callbacks()
-    y_test = np_utils.to_categorical(y_test)
-    y_train = np_utils.to_categorical(y_train)
-
-    def make_model():
-        np.random.seed(1337)
-        model = Sequential()
-        model.add(Dense(num_hidden, input_dim=input_dim, activation='relu'))
-        model.add(Dense(num_classes, activation='softmax'))
-
-        model.compile(loss='categorical_crossentropy',
-                      optimizer=optimizers.SGD(lr=0.1),
-                      metrics=['accuracy'])
-        return model
-
-    model = make_model()
-
-    # This should reduce the LR after the first epoch (due to high epsilon).
-    cbks = [callbacks.ReduceLROnPlateau(monitor='val_loss',
-                                        factor=0.1,
-                                        min_delta=10,
-                                        patience=1,
-                                        cooldown=5)]
-    model.fit(X_train, y_train,
-              batch_size=batch_size,
-              validation_data=(X_test, y_test),
-              callbacks=cbks,
-              epochs=5,
-              verbose=2)
-    assert_allclose(
-        float(K.get_value(model.optimizer.lr)), 0.01, atol=K.epsilon())
-
-    model = make_model()
-    cbks = [callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.1,
-                                        min_delta=0, patience=1, cooldown=5)]
-    model.fit(X_train, y_train,
-              batch_size=batch_size,
-              validation_data=(X_test, y_test),
-              callbacks=cbks,
-              epochs=5,
-              verbose=2)
-    assert_allclose(
-        float(K.get_value(model.optimizer.lr)), 0.1, atol=K.epsilon())
-
-
-def test_ReduceLROnPlateau_patience():
-    class DummyOptimizer(object):
-        def __init__(self):
-            self.lr = K.variable(1.0)
-
-    class DummyModel(object):
-        def __init__(self):
-            self.optimizer = DummyOptimizer()
-
-    reduce_on_plateau = callbacks.ReduceLROnPlateau(monitor='val_loss',
-                                                    patience=2)
-    reduce_on_plateau.model = DummyModel()
-
-    losses = [0.0860, 0.1096, 0.1040]
-    lrs = []
-
-    for epoch in range(len(losses)):
-        reduce_on_plateau.on_epoch_end(epoch, logs={'val_loss': losses[epoch]})
-        lrs.append(K.get_value(reduce_on_plateau.model.optimizer.lr))
-
-    # The learning rates should be 1.0 except the last one
-    assert all([lr == 1.0 for lr in lrs[:-1]]) and lrs[-1] < 1.0
-
-
-def DISABLED_test_ReduceLROnPlateau_backwards_compatibility():
-    import warnings
-    with warnings.catch_warnings(record=True) as ws:
-        reduce_on_plateau = callbacks.ReduceLROnPlateau(epsilon=1e-13)
-        # Check if warnings are disabled
-        if os.environ.get("PYTHONWARNINGS") != "ignore":
-            assert "`epsilon` argument is deprecated" in str(ws[0].message)
-    assert not hasattr(reduce_on_plateau, 'epsilon')
-    assert hasattr(reduce_on_plateau, 'min_delta')
-    assert reduce_on_plateau.min_delta == 1e-13
-
-
-def test_CSVLogger(tmpdir):
-    np.random.seed(1337)
-    filepath = str(tmpdir / 'log.tsv')
-    sep = '\t'
-    (X_train, y_train), (X_test, y_test) = get_data_callbacks()
-    y_test = np_utils.to_categorical(y_test)
-    y_train = np_utils.to_categorical(y_train)
-
-    def make_model():
-        np.random.seed(1337)
-        model = Sequential()
-        model.add(Dense(num_hidden, input_dim=input_dim, activation='relu'))
-        model.add(Dense(num_classes, activation='softmax'))
-
-        model.compile(loss='categorical_crossentropy',
-                      optimizer=optimizers.SGD(lr=0.1),
-                      metrics=['accuracy'])
-        return model
-
-    # case 1, create new file with defined separator
-    model = make_model()
-    cbks = [callbacks.CSVLogger(filepath, separator=sep)]
-    model.fit(X_train, y_train, batch_size=batch_size,
-              validation_data=(X_test, y_test), callbacks=cbks, epochs=1)
-
-    assert os.path.isfile(filepath)
-    with open(filepath) as csvfile:
-        dialect = Sniffer().sniff(csvfile.read())
-    assert dialect.delimiter == sep
-    del model
-    del cbks
-
-    # case 2, append data to existing file, skip header
-    model = make_model()
-    cbks = [callbacks.CSVLogger(filepath, separator=sep, append=True)]
-    model.fit(X_train, y_train, batch_size=batch_size,
-              validation_data=(X_test, y_test), callbacks=cbks, epochs=1)
-
-    # case 3, reuse of CSVLogger object
-    model.fit(X_train, y_train, batch_size=batch_size,
-              validation_data=(X_test, y_test), callbacks=cbks, epochs=2)
-
-    import re
-    with open(filepath) as csvfile:
-        list_lines = csvfile.readlines()
-        for line in list_lines:
-            assert line.count(sep) == 4
-        assert len(list_lines) == 5
-        output = " ".join(list_lines)
-        assert len(re.findall('epoch', output)) == 1
-
-    os.remove(filepath)
-    assert not tmpdir.listdir()
-
-
-def DISABLED_test_CallbackValData():
-    np.random.seed(1337)
-    (X_train, y_train), (X_test, y_test) = get_data_callbacks()
-    y_test = np_utils.to_categorical(y_test)
-    y_train = np_utils.to_categorical(y_train)
-    model = Sequential()
-    model.add(Dense(num_hidden, input_dim=input_dim, activation='relu'))
-    model.add(Dense(num_classes, activation='softmax'))
-    model.compile(loss='categorical_crossentropy',
-                  optimizer='sgd',
-                  metrics=['accuracy'])
-
-    cbk = callbacks.LambdaCallback(on_train_end=lambda x: 1)
-    model.fit(X_train, y_train, batch_size=batch_size,
-              validation_data=(X_test, y_test), callbacks=[cbk], epochs=1)
-
-    cbk2 = callbacks.LambdaCallback(on_train_end=lambda x: 1)
-    train_generator = data_generator(X_train, y_train, batch_size)
-    model.fit_generator(train_generator, len(X_train), epochs=1,
-                        validation_data=(X_test, y_test),
-                        callbacks=[cbk2])
-
-    # callback validation data should always have x, y, and sample weights
-    assert len(cbk.validation_data) == len(cbk2.validation_data) == 3
-    assert cbk.validation_data[0] is cbk2.validation_data[0]
-    assert cbk.validation_data[1] is cbk2.validation_data[1]
-    assert cbk.validation_data[2].shape == cbk2.validation_data[2].shape
-
-
-def test_LambdaCallback():
-    np.random.seed(1337)
-    (X_train, y_train), (X_test, y_test) = get_data_callbacks()
-    y_test = np_utils.to_categorical(y_test)
-    y_train = np_utils.to_categorical(y_train)
-    model = Sequential()
-    model.add(Dense(num_hidden, input_dim=input_dim, activation='relu'))
-    model.add(Dense(num_classes, activation='softmax'))
-    model.compile(loss='categorical_crossentropy',
-                  optimizer='sgd',
-                  metrics=['accuracy'])
-
-    # Start an arbitrary process that should run during model training and
-    # be terminated after training has completed.
-    def f():
-        while True:
-            pass
-
-    p = multiprocessing.Process(target=f)
-    p.start()
-    cleanup_callback = callbacks.LambdaCallback(
-        on_train_end=lambda logs: p.terminate())
-
-    cbks = [cleanup_callback]
-    model.fit(X_train, y_train, batch_size=batch_size,
-              validation_data=(X_test, y_test), callbacks=cbks, epochs=5)
-    p.join()
-    assert not p.is_alive()
-
-
-@pytest.mark.skipif(K.backend() != 'tensorflow', reason='Uses TensorBoard')
-def test_TensorBoard_with_ReduceLROnPlateau(tmpdir):
-    import shutil
-    np.random.seed(np.random.randint(1, 1e7))
-    filepath = str(tmpdir / 'logs')
-
-    (X_train, y_train), (X_test, y_test) = get_data_callbacks()
-    y_test = np_utils.to_categorical(y_test)
-    y_train = np_utils.to_categorical(y_train)
-
-    model = Sequential()
-    model.add(Dense(num_hidden, input_dim=input_dim, activation='relu'))
-    model.add(Dense(num_classes, activation='softmax'))
-    model.compile(loss='binary_crossentropy',
-                  optimizer='sgd',
-                  metrics=['accuracy'])
-
-    cbks = [
-        callbacks.ReduceLROnPlateau(
-            monitor='val_loss',
-            factor=0.5,
-            patience=4,
-            verbose=1),
-        callbacks.TensorBoard(
-            log_dir=filepath)]
-
-    model.fit(X_train, y_train, batch_size=batch_size,
-              validation_data=(X_test, y_test), callbacks=cbks, epochs=2)
-
-    assert os.path.isdir(filepath)
-    shutil.rmtree(filepath)
-    assert not tmpdir.listdir()
-
-
-def tests_RemoteMonitor():
-    (X_train, y_train), (X_test, y_test) = get_data_callbacks()
-    y_test = np_utils.to_categorical(y_test)
-    y_train = np_utils.to_categorical(y_train)
-    model = Sequential()
-    model.add(Dense(num_hidden, input_dim=input_dim, activation='relu'))
-    model.add(Dense(num_classes, activation='softmax'))
-    model.compile(loss='categorical_crossentropy',
-                  optimizer='rmsprop',
-                  metrics=['accuracy'])
-    cbks = [callbacks.RemoteMonitor()]
-
-    with patch('requests.post'):
-        model.fit(X_train, y_train, batch_size=batch_size,
-                  validation_data=(X_test, y_test), callbacks=cbks, epochs=1)
-
-
-def tests_RemoteMonitorWithJsonPayload():
-    (X_train, y_train), (X_test, y_test) = get_data_callbacks()
-    y_test = np_utils.to_categorical(y_test)
-    y_train = np_utils.to_categorical(y_train)
-    model = Sequential()
-    model.add(Dense(num_hidden, input_dim=input_dim, activation='relu'))
-    model.add(Dense(num_classes, activation='softmax'))
-    model.compile(loss='categorical_crossentropy',
-                  optimizer='rmsprop',
-                  metrics=['accuracy'])
-    cbks = [callbacks.RemoteMonitor(send_as_json=True)]
-
-    with patch('requests.post'):
-        model.fit(X_train, y_train, batch_size=batch_size,
-                  validation_data=(X_test, y_test), callbacks=cbks, epochs=1)
-
-
-if __name__ == '__main__':
-    pytest.main([__file__])
diff --git a/tests/keras/callbacks/tensorboard_test.py b/tests/keras/callbacks/tensorboard_test.py
deleted file mode 100644
index 48bc9f583f4..00000000000
--- a/tests/keras/callbacks/tensorboard_test.py
+++ /dev/null
@@ -1,273 +0,0 @@
-import os
-import numpy as np
-import pytest
-import shutil
-
-from keras import callbacks
-from keras.models import Sequential, Model
-from keras import layers
-from keras import backend as K
-from keras.utils import np_utils
-from keras.utils.test_utils import get_test_data
-from keras.utils.generic_utils import to_list
-from keras.utils.generic_utils import unpack_singleton
-
-
-input_dim = 2
-num_hidden = 4
-num_classes = 2
-batch_size = 5
-train_samples = 20
-test_samples = 20
-
-
-if K.backend() != 'tensorflow':
-    pytestmark = pytest.mark.skip
-
-
-def data_generator(x, y, batch_size):
-    x = to_list(x)
-    y = to_list(y)
-    max_batch_index = len(x[0]) // batch_size
-    i = 0
-    while 1:
-        x_batch = [array[i * batch_size: (i + 1) * batch_size] for array in x]
-        x_batch = unpack_singleton(x_batch)
-
-        y_batch = [array[i * batch_size: (i + 1) * batch_size] for array in y]
-        y_batch = unpack_singleton(y_batch)
-        yield x_batch, y_batch
-        i += 1
-        i = i % max_batch_index
-
-
-# Changing the default arguments of get_test_data.
-def get_data_callbacks(num_train=train_samples,
-                       num_test=test_samples,
-                       input_shape=(input_dim,),
-                       classification=True,
-                       num_classes=num_classes):
-    return get_test_data(num_train=num_train,
-                         num_test=num_test,
-                         input_shape=input_shape,
-                         classification=classification,
-                         num_classes=num_classes)
-
-
-@pytest.mark.parametrize('update_freq', ['batch', 'epoch', 9])
-def DISABLED_test_TensorBoard(tmpdir, update_freq):
-    np.random.seed(np.random.randint(1, 1e7))
-    filepath = str(tmpdir / 'logs')
-
-    (X_train, y_train), (X_test, y_test) = get_data_callbacks()
-    y_test = np_utils.to_categorical(y_test)
-    y_train = np_utils.to_categorical(y_train)
-
-    class DummyStatefulMetric(layers.Layer):
-
-        def __init__(self, name='dummy_stateful_metric', **kwargs):
-            super(DummyStatefulMetric, self).__init__(name=name, **kwargs)
-            self.stateful = True
-            self.state = K.variable(value=0, dtype='int32')
-
-        def reset_states(self):
-            pass
-
-        def __call__(self, y_true, y_pred):
-            return self.state
-
-    inp = layers.Input((input_dim,))
-    hidden = layers.Dense(num_hidden, activation='relu')(inp)
-    hidden = layers.Dropout(0.1)(hidden)
-    hidden = layers.BatchNormalization()(hidden)
-    output = layers.Dense(num_classes, activation='softmax')(hidden)
-    model = Model(inputs=inp, outputs=output)
-    model.compile(loss='categorical_crossentropy',
-                  optimizer='sgd',
-                  metrics=['accuracy', DummyStatefulMetric()])
-
-    # we must generate new callbacks for each test, as they aren't stateless
-    def callbacks_factory(histogram_freq=0,
-                          embeddings_freq=0,
-                          write_images=False,
-                          write_grads=False):
-        if embeddings_freq:
-            embeddings_layer_names = ['dense_1']
-            embeddings_data = X_test
-        else:
-            embeddings_layer_names = None
-            embeddings_data = None
-        return [callbacks.TensorBoard(log_dir=filepath,
-                                      histogram_freq=histogram_freq,
-                                      write_images=write_images,
-                                      write_grads=write_grads,
-                                      embeddings_freq=embeddings_freq,
-                                      embeddings_layer_names=embeddings_layer_names,
-                                      embeddings_data=embeddings_data,
-                                      update_freq=update_freq)]
-
-    # fit without validation data
-    model.fit(X_train, y_train, batch_size=batch_size,
-              callbacks=callbacks_factory(),
-              epochs=2)
-
-    # fit with validation data and accuracy
-    model.fit(X_train, y_train, batch_size=batch_size,
-              validation_data=(X_test, y_test),
-              callbacks=callbacks_factory(),
-              epochs=2)
-
-    # fit generator without validation data
-    train_generator = data_generator(X_train, y_train, batch_size)
-    model.fit_generator(train_generator, len(X_train), epochs=2,
-                        callbacks=callbacks_factory())
-
-    # fit generator with validation data and accuracy
-    train_generator = data_generator(X_train, y_train, batch_size)
-    model.fit_generator(train_generator, len(X_train), epochs=2,
-                        validation_data=(X_test, y_test),
-                        callbacks=callbacks_factory(histogram_freq=1))
-
-    assert os.path.isdir(filepath)
-    shutil.rmtree(filepath)
-    assert not tmpdir.listdir()
-
-
-def test_TensorBoard_multi_input_output(tmpdir):
-    np.random.seed(np.random.randint(1, 1e7))
-    filepath = str(tmpdir / 'logs')
-
-    (X_train, y_train), (X_test, y_test) = get_data_callbacks(
-        input_shape=(input_dim, input_dim))
-
-    y_test = np_utils.to_categorical(y_test)
-    y_train = np_utils.to_categorical(y_train)
-
-    inp1 = layers.Input((input_dim, input_dim))
-    inp2 = layers.Input((input_dim, input_dim))
-    inp_3d = layers.add([inp1, inp2])
-    inp_2d = layers.GlobalAveragePooling1D()(inp_3d)
-    # test a layer with a list of output tensors
-    inp_pair = layers.Lambda(lambda x: x)([inp_3d, inp_2d])
-    hidden = layers.dot(inp_pair, axes=-1)
-    hidden = layers.Dense(num_hidden, activation='relu')(hidden)
-    hidden = layers.Dropout(0.1)(hidden)
-    output1 = layers.Dense(num_classes, activation='softmax')(hidden)
-    output2 = layers.Dense(num_classes, activation='softmax')(hidden)
-    model = Model(inputs=[inp1, inp2], outputs=[output1, output2])
-    model.compile(loss='categorical_crossentropy',
-                  optimizer='sgd',
-                  metrics=['accuracy'])
-
-    # we must generate new callbacks for each test, as they aren't stateless
-    def callbacks_factory(histogram_freq=0,
-                          embeddings_freq=0,
-                          write_images=False,
-                          write_grads=False):
-        if embeddings_freq:
-            embeddings_layer_names = ['dense_1']
-            embeddings_data = [X_test] * 2
-        else:
-            embeddings_layer_names = None
-            embeddings_data = None
-        return [callbacks.TensorBoard(log_dir=filepath,
-                                      histogram_freq=histogram_freq,
-                                      write_images=write_images,
-                                      write_grads=write_grads,
-                                      embeddings_freq=embeddings_freq,
-                                      embeddings_layer_names=embeddings_layer_names,
-                                      embeddings_data=embeddings_data)]
-
-    # fit without validation data
-    model.fit([X_train] * 2, [y_train] * 2, batch_size=batch_size,
-              callbacks=callbacks_factory(),
-              epochs=3)
-
-    # fit with validation data and accuracy
-    model.fit([X_train] * 2, [y_train] * 2, batch_size=batch_size,
-              validation_data=([X_test] * 2, [y_test] * 2),
-              callbacks=callbacks_factory(histogram_freq=1),
-              epochs=2)
-
-    train_generator = data_generator([X_train] * 2, [y_train] * 2, batch_size)
-
-    # fit generator without validation data
-    model.fit_generator(train_generator, len(X_train), epochs=2,
-                        callbacks=callbacks_factory())
-
-    # fit generator with validation data and accuracy
-    model.fit_generator(train_generator, len(X_train), epochs=2,
-                        validation_data=([X_test] * 2, [y_test] * 2),
-                        callbacks=callbacks_factory())
-
-    assert os.path.isdir(filepath)
-    shutil.rmtree(filepath)
-    assert not tmpdir.listdir()
-
-
-def test_TensorBoard_convnet(tmpdir):
-    np.random.seed(np.random.randint(1, 1e7))
-    filepath = str(tmpdir / 'logs')
-
-    input_shape = (16, 16, 3)
-    (x_train, y_train), (x_test, y_test) = get_data_callbacks(
-        num_train=500,
-        num_test=200,
-        input_shape=input_shape)
-    y_train = np_utils.to_categorical(y_train)
-    y_test = np_utils.to_categorical(y_test)
-
-    model = Sequential([
-        layers.Conv2D(filters=8, kernel_size=3,
-                      activation='relu',
-                      input_shape=input_shape),
-        layers.MaxPooling2D(pool_size=2),
-        layers.Conv2D(filters=4, kernel_size=(3, 3),
-                      activation='relu', padding='same'),
-        layers.BatchNormalization(),
-        layers.GlobalAveragePooling2D(),
-        layers.Dense(num_classes, activation='softmax')
-    ])
-    model.compile(loss='categorical_crossentropy',
-                  optimizer='rmsprop',
-                  metrics=['accuracy'])
-    tsb = callbacks.TensorBoard(filepath, histogram_freq=1)
-    cbks = [tsb]
-    model.summary()
-    history = model.fit(x_train, y_train, epochs=2, batch_size=16,
-                        validation_data=(x_test, y_test),
-                        callbacks=cbks,
-                        verbose=0)
-    assert os.path.isdir(filepath)
-    shutil.rmtree(filepath)
-    assert not tmpdir.listdir()
-
-
-def test_TensorBoard_display_float_from_logs(tmpdir):
-    filepath = str(tmpdir / 'logs')
-
-    input_shape = (3,)
-    (x_train, y_train), _ = get_data_callbacks(num_train=10,
-                                               num_test=0,
-                                               input_shape=input_shape)
-    y_train = np_utils.to_categorical(y_train)
-
-    model = Sequential([
-        layers.Dense(num_classes, activation='softmax')
-    ])
-    model.compile(loss='categorical_crossentropy',
-                  optimizer='rmsprop')
-
-    class CustomCallback(callbacks.Callback):
-
-        def on_epoch_end(self, epoch, logs=None):
-            logs['test'] = 0.
-
-    tsb = callbacks.TensorBoard(log_dir=filepath)
-    cbks = [CustomCallback(), tsb]
-    model.fit(x_train, y_train, epochs=2, batch_size=16,
-              callbacks=cbks,
-              verbose=0)
-    assert os.path.isdir(filepath)
-    shutil.rmtree(filepath)
-    assert not tmpdir.listdir()
diff --git a/tests/keras/constraints_test.py b/tests/keras/constraints_test.py
deleted file mode 100644
index 8aa9d4e62d6..00000000000
--- a/tests/keras/constraints_test.py
+++ /dev/null
@@ -1,79 +0,0 @@
-import pytest
-import numpy as np
-from numpy.testing import assert_allclose
-
-from keras import backend as K
-from keras import constraints
-
-
-def get_test_values():
-    return [0.1, 0.5, 3, 8, 1e-7]
-
-
-def get_example_array():
-    np.random.seed(3537)
-    example_array = np.random.random((100, 100)) * 100. - 50.
-    example_array[0, 0] = 0.  # 0 could possibly cause trouble
-    return example_array
-
-
-def test_serialization():
-    all_activations = ['max_norm', 'non_neg',
-                       'unit_norm', 'min_max_norm']
-    for name in all_activations:
-        fn = constraints.get(name)
-        ref_fn = getattr(constraints, name)()
-        assert fn.__class__ == ref_fn.__class__
-        config = constraints.serialize(fn)
-        fn = constraints.deserialize(config)
-        assert fn.__class__ == ref_fn.__class__
-
-
-def test_max_norm():
-    array = get_example_array()
-    for m in get_test_values():
-        norm_instance = constraints.max_norm(m)
-        normed = norm_instance(K.variable(array))
-        assert(np.all(K.eval(normed) < m))
-
-    # a more explicit example
-    norm_instance = constraints.max_norm(2.0)
-    x = np.array([[0, 0, 0], [1.0, 0, 0], [3, 0, 0], [3, 3, 3]]).T
-    x_normed_target = np.array([[0, 0, 0], [1.0, 0, 0],
-                                [2.0, 0, 0],
-                                [2. / np.sqrt(3),
-                                 2. / np.sqrt(3),
-                                 2. / np.sqrt(3)]]).T
-    x_normed_actual = K.eval(norm_instance(K.variable(x)))
-    assert_allclose(x_normed_actual, x_normed_target, rtol=1e-05)
-
-
-def test_non_neg():
-    non_neg_instance = constraints.non_neg()
-    normed = non_neg_instance(K.variable(get_example_array()))
-    assert(np.all(np.min(K.eval(normed), axis=1) == 0.))
-
-
-def test_unit_norm():
-    unit_norm_instance = constraints.unit_norm()
-    normalized = unit_norm_instance(K.variable(get_example_array()))
-    norm_of_normalized = np.sqrt(np.sum(K.eval(normalized) ** 2, axis=0))
-    # In the unit norm constraint, it should be equal to 1.
-    difference = norm_of_normalized - 1.
-    largest_difference = np.max(np.abs(difference))
-    assert(np.abs(largest_difference) < 10e-5)
-
-
-def test_min_max_norm():
-    array = get_example_array()
-    for m in get_test_values():
-        norm_instance = constraints.min_max_norm(min_value=m, max_value=m * 2)
-        normed = norm_instance(K.variable(array))
-        value = K.eval(normed)
-        l2 = np.sqrt(np.sum(np.square(value), axis=0))
-        assert l2[l2 < m].size == 0
-        assert l2[l2 > m * 2 + 1e-5].size == 0
-
-
-if __name__ == '__main__':
-    pytest.main([__file__])
diff --git a/tests/keras/datasets/datasets_test.py b/tests/keras/datasets/datasets_test.py
deleted file mode 100644
index 729242ac8c5..00000000000
--- a/tests/keras/datasets/datasets_test.py
+++ /dev/null
@@ -1,90 +0,0 @@
-import tempfile
-
-import numpy as np
-import pytest
-
-from keras.datasets import boston_housing
-from keras.datasets import imdb
-from keras.datasets import reuters
-
-
-@pytest.fixture
-def fake_downloaded_boston_path(monkeypatch):
-    num_rows = 100
-    num_cols = 10
-    rng = np.random.RandomState(123)
-
-    x = rng.randint(1, 100, size=(num_rows, num_cols))
-    y = rng.normal(loc=100, scale=15, size=num_rows)
-
-    with tempfile.NamedTemporaryFile('wb', delete=True) as f:
-        np.savez(f, x=x, y=y)
-        monkeypatch.setattr(boston_housing, 'get_file',
-                            lambda *args, **kwargs: f.name)
-        yield f.name
-
-
-@pytest.fixture
-def fake_downloaded_imdb_path(monkeypatch):
-    train_rows = 100
-    test_rows = 20
-    seq_length = 10
-    rng = np.random.RandomState(123)
-
-    x_train = rng.randint(1, 100, size=(train_rows, seq_length))
-    y_train = rng.binomial(n=1, p=0.5, size=train_rows)
-    x_test = rng.randint(1, 100, size=(test_rows, seq_length))
-    y_test = rng.binomial(n=1, p=0.5, size=test_rows)
-
-    with tempfile.NamedTemporaryFile('wb', delete=True) as f:
-        np.savez(f, x_train=x_train, y_train=y_train, x_test=x_test, y_test=y_test)
-        monkeypatch.setattr(imdb, 'get_file', lambda *args, **kwargs: f.name)
-        yield f.name
-
-
-@pytest.fixture
-def fake_downloaded_reuters_path(monkeypatch):
-    num_rows = 100
-    seq_length = 10
-    rng = np.random.RandomState(123)
-
-    x = rng.randint(1, 100, size=(num_rows, seq_length))
-    y = rng.binomial(n=1, p=0.5, size=num_rows)
-
-    with tempfile.NamedTemporaryFile('wb', delete=True) as f:
-        np.savez(f, x=x, y=y)
-        monkeypatch.setattr(reuters, 'get_file', lambda *args, **kwargs: f.name)
-        yield f.name
-
-
-def DISABLED_test_boston_load_does_not_affect_global_rng(fake_downloaded_boston_path):
-    np.random.seed(1337)
-    before = np.random.randint(0, 100, size=10)
-
-    np.random.seed(1337)
-    boston_housing.load_data(path=fake_downloaded_boston_path, seed=9876)
-    after = np.random.randint(0, 100, size=10)
-
-    assert np.array_equal(before, after)
-
-
-def DISABLED_test_imdb_load_does_not_affect_global_rng(fake_downloaded_imdb_path):
-    np.random.seed(1337)
-    before = np.random.randint(0, 100, size=10)
-
-    np.random.seed(1337)
-    imdb.load_data(path=fake_downloaded_imdb_path, seed=9876)
-    after = np.random.randint(0, 100, size=10)
-
-    assert np.array_equal(before, after)
-
-
-def DISABLED_test_reuters_load_does_not_affect_global_rng(fake_downloaded_reuters_path):
-    np.random.seed(1337)
-    before = np.random.randint(0, 100, size=10)
-
-    np.random.seed(1337)
-    reuters.load_data(path=fake_downloaded_reuters_path, seed=9876)
-    after = np.random.randint(0, 100, size=10)
-
-    assert np.array_equal(before, after)
diff --git a/tests/keras/engine/layer_subclassing_tests.py b/tests/keras/engine/layer_subclassing_tests.py
deleted file mode 100644
index 77bd07ab848..00000000000
--- a/tests/keras/engine/layer_subclassing_tests.py
+++ /dev/null
@@ -1,203 +0,0 @@
-import pytest
-import keras
-import numpy as np
-from keras import layers
-from keras import backend as K
-
-
-def test_sublayer_tracking():
-    # basic case
-    class MyLayer(layers.Layer):
-
-        def __init__(self):
-            super(MyLayer, self).__init__()
-            self._input_shape = (2, 4)
-            self.dense = layers.Dense(3)
-            self.bidir = layers.Bidirectional(keras.layers.LSTM(2))
-
-        def call(self, inputs):
-            return self.dense(self.bidir(inputs))
-
-    layer = MyLayer()
-    assert len(layer._layers) == 2
-    layer(K.constant(np.random.random((2,) + layer._input_shape)))
-    assert len(layer.weights) == 2 + 3 + 3
-    assert len(layer._layers[0].weights) == 2
-    assert len(layer._layers[1].weights) == 6
-
-    # recursive case
-    class MyRecursiveLayer(layers.Layer):
-
-        def __init__(self):
-            super(MyRecursiveLayer, self).__init__()
-            self._input_shape = (2, 4)
-            self.my_layer = MyLayer()
-            self.dense = layers.Dense(3)
-            self.bidir = layers.Bidirectional(
-                keras.layers.LSTM(2, return_sequences=True))
-
-        def call(self, inputs):
-            return self.my_layer(self.dense(self.bidir(inputs)))
-
-    layer = MyRecursiveLayer()
-    assert len(layer._layers) == 3
-    layer(K.constant(np.random.random((2,) + layer._input_shape)))
-    assert len(layer.weights) == 16
-
-    # subnetwork case
-    class MyLayerWithSubnetwork(keras.layers.Layer):
-
-        def __init__(self):
-            super(MyLayerWithSubnetwork, self).__init__()
-            self._input_shape = (2,)
-            self.dense = layers.Dense(3)
-            self.sequential = keras.Sequential(
-                [layers.Dense(5), layers.Dense(1)], name='seq')
-            inputs = keras.Input((1,))
-            outputs = layers.Dense(1)(inputs)
-            self.functional = keras.Model(inputs, outputs, name='func')
-
-        def call(self, inputs):
-            x = self.dense(inputs)
-            x = self.sequential(x)
-            return self.functional(x)
-
-    layer = MyLayerWithSubnetwork()
-    assert len(layer._layers) == 3
-    layer(K.constant(np.random.random((2,) + layer._input_shape)))
-    assert len(layer.weights) == 2 + (2 + 2) + 2
-    assert len(layer._layers[0].weights) == 2
-    assert len(layer._layers[1].weights) == 4
-    assert len(layer._layers[2].weights) == 2
-
-
-def test_weight_tracking():
-
-    class MyLayer(layers.Layer):
-
-        def __init__(self):
-            super(MyLayer, self).__init__()
-            self._input_shape = (2,)
-            self.dense = layers.Dense(3)
-            self.w1 = K.variable(0, name='w1')
-
-        def build(self, input_shape):
-            self.w2 = K.variable(1, name='w2')
-            self.w3 = self.add_weight(
-                'w3', shape=(), trainable=False, initializer='zeros')
-
-        def call(self, inputs):
-            return self.dense(inputs) + self.w1 + self.w2
-
-    layer = MyLayer()
-    layer(K.constant(np.random.random((2,) + layer._input_shape)))
-    assert len(layer.weights) == 5
-    assert len(layer.trainable_weights) == 4
-    assert len(layer.non_trainable_weights) == 1
-    assert len(layer._trainable_weights) == 2
-    assert layer._trainable_weights[0] is layer.w1
-    assert layer._trainable_weights[1] is layer.w2
-    assert len(layer._non_trainable_weights) == 1
-    assert layer._non_trainable_weights[0] is layer.w3
-
-
-def test_loss_tracking():
-    # basic case
-    class MyLayer(layers.Layer):
-
-        def __init__(self):
-            super(MyLayer, self).__init__()
-            self.dense = layers.Dense(
-                3, kernel_regularizer='l2', activity_regularizer='l2')
-
-        def call(self, inputs):
-            return self.dense(inputs)
-
-    inputs = keras.Input((2,))
-    outputs = MyLayer()(inputs)
-    model = keras.Model(inputs, outputs)
-
-    assert len(model.layers) == 2  # includes input layer
-    assert len(model.weights) == 2
-    assert len(model.losses) == 2
-    assert len(model.get_losses_for(None)) == 1
-    assert len(model.get_losses_for(inputs)) == 1
-
-
-@pytest.mark.skipif(K.backend() != 'tensorflow',
-                    reason='Requires TF symbols')
-def test_tf_keras_guide():
-    import tensorflow as tf
-
-    class Linear(layers.Layer):
-
-        def __init__(self, units=32, input_dim=32):
-            super(Linear, self).__init__()
-            w_init = tf.random_normal_initializer()
-            self.w = tf.Variable(initial_value=w_init(shape=(input_dim, units),
-                                                      dtype='float32'),
-                                 trainable=True)
-            b_init = tf.zeros_initializer()
-            self.b = tf.Variable(initial_value=b_init(shape=(units,),
-                                                      dtype='float32'),
-                                 trainable=True)
-
-        def call(self, inputs):
-            return tf.matmul(inputs, self.w) + self.b
-
-    x = tf.ones((2, 2))
-    linear_layer = Linear(4, 2)
-    y = linear_layer(x)
-
-    assert len(linear_layer.trainable_weights) == 2
-
-    class Linear(layers.Layer):
-
-        def __init__(self, units=32):
-            super(Linear, self).__init__()
-            self.units = units
-
-        def build(self, input_shape):
-            self.w = self.add_weight(shape=(input_shape[-1], self.units),
-                                     initializer='random_normal',
-                                     trainable=True)
-            self.b = self.add_weight(shape=(self.units,),
-                                     initializer='random_normal',
-                                     trainable=True)
-
-        def call(self, inputs):
-            return tf.matmul(inputs, self.w) + self.b
-
-    class MLPBlock(layers.Layer):
-
-        def __init__(self):
-            super(MLPBlock, self).__init__()
-            self.linear_1 = Linear(32)
-            self.linear_2 = Linear(32)
-            self.linear_3 = Linear(1)
-
-        def call(self, inputs):
-            x = self.linear_1(inputs)
-            x = tf.nn.relu(x)
-            x = self.linear_2(x)
-            x = tf.nn.relu(x)
-            return self.linear_3(x)
-
-    mlp = MLPBlock()
-    y = mlp(tf.ones(shape=(3, 64)))
-    assert len(mlp.weights) == 6
-    assert len(mlp.trainable_weights) == 6
-
-    class OuterLayer(layers.Layer):
-
-        def __init__(self):
-            super(OuterLayer, self).__init__()
-            self.dense = layers.Dense(
-                32, kernel_regularizer=tf.keras.regularizers.l2(1e-3))
-
-        def call(self, inputs):
-            return self.dense(inputs)
-
-    layer = OuterLayer()
-    _ = layer(tf.zeros((1, 1)))
-    assert len(layer.losses) == 1
diff --git a/tests/keras/engine/test_topology.py b/tests/keras/engine/test_topology.py
deleted file mode 100644
index d5205628a54..00000000000
--- a/tests/keras/engine/test_topology.py
+++ /dev/null
@@ -1,800 +0,0 @@
-import pytest
-import json
-import numpy as np
-
-from keras.layers import Dense, Dropout, Conv2D, InputLayer
-from keras import layers
-from keras.engine import Input, Layer, saving, get_source_inputs
-from keras.models import Model, Sequential
-from keras import backend as K
-from keras.models import model_from_json, model_from_yaml
-from keras.initializers import Constant
-from tensorflow.python.keras.saving.hdf5_format import preprocess_weights_for_loading
-
-
-
-def test_get_updates_for():
-    a = Input(shape=(2,))
-    dense_layer = Dense(1)
-    dense_layer.add_update(0, inputs=a)
-    dense_layer.add_update(1, inputs=None)
-
-
-def test_get_losses_for():
-    a = Input(shape=(2,))
-    dense_layer = Dense(1)
-    dense_layer.add_loss(lambda: 0, inputs=a)
-    dense_layer.add_loss(lambda: 1, inputs=None)
-
-
-def test_trainable_weights():
-    a = Input(shape=(2,))
-    b = Dense(1)(a)
-    model = Model(a, b)
-
-    weights = model.weights
-    assert model.trainable_weights == weights
-    assert model.non_trainable_weights == []
-
-    model.trainable = False
-    assert model.trainable_weights == []
-    assert model.non_trainable_weights == weights
-
-    model.trainable = True
-    assert model.trainable_weights == weights
-    assert model.non_trainable_weights == []
-
-    model.layers[1].trainable = False
-    assert model.trainable_weights == []
-    assert model.non_trainable_weights == weights
-
-    # sequential model
-    model = Sequential()
-    model.add(Dense(1, input_dim=2))
-    weights = model.weights
-
-    assert model.trainable_weights == weights
-    assert model.non_trainable_weights == []
-
-    model.trainable = False
-    assert model.trainable_weights == []
-    assert model.non_trainable_weights == weights
-
-    model.trainable = True
-    assert model.trainable_weights == weights
-    assert model.non_trainable_weights == []
-
-    model.layers[0].trainable = False
-    assert model.trainable_weights == []
-    assert model.non_trainable_weights == weights
-
-
-def test_valid_compute_mask():
-    model = Sequential()
-    model.add(Dense(1, input_dim=2))
-    assert model.layers[0].supports_masking is True
-    assert model.layers[0].compute_mask([model.input], [0., 1.]) == [0., 1.]
-
-
-def test_invalid_compute_mask():
-    model = Sequential()
-    model.add(Conv2D(1, [2, 2], input_shape=[3, 3, 1]))
-    assert model.layers[0].supports_masking is False
-    assert model.layers[0].compute_mask([model.input], [None]) is None
-
-    mask = np.array([[0., 1.], [1., 0.]])
-    with pytest.raises(TypeError):
-        model.layers[0].compute_mask([model.input], [mask])
-    with pytest.raises(TypeError):
-        model.layers[0].compute_mask([model.input], mask)
-
-
-def test_get_layer():
-    model = Sequential()
-    model.add(Dense(1, input_dim=2))
-    with pytest.raises(ValueError):
-        model.get_layer(index=5)
-    with pytest.raises(ValueError):
-        model.get_layer(index=None)
-    with pytest.raises(ValueError):
-        model.get_layer(name='conv')
-
-
-def DISABLED_test_learning_phase():
-    a = Input(shape=(32,), name='input_a')
-    b = Input(shape=(32,), name='input_b')
-
-    a_2 = Dense(16, name='dense_1')(a)
-    dp = Dropout(0.5, name='dropout')
-    b_2 = dp(b)
-
-    # test merge
-    m = layers.concatenate([a_2, b_2])
-
-    # Test recursion
-    model = Model([a, b], [a_2, b_2])
-    print(model.input_spec)
-
-    c = Input(shape=(32,), name='input_c')
-    d = Input(shape=(32,), name='input_d')
-
-    c_2, b_2 = model([c, d])
-
-    # try actually running graph
-    fn = K.function(model.inputs + [K.learning_phase()], model.outputs)
-    input_a_np = np.random.random((10, 32))
-    input_b_np = np.random.random((10, 32))
-    fn_outputs_no_dp = fn([input_a_np, input_b_np, 0])
-    fn_outputs_dp = fn([input_a_np, input_b_np, 1])
-    # output a: nothing changes
-    assert fn_outputs_no_dp[0].sum() == fn_outputs_dp[0].sum()
-    # output b: dropout applied
-    assert fn_outputs_no_dp[1].sum() != fn_outputs_dp[1].sum()
-
-
-def test_layer_call_arguments():
-    # Test the ability to pass and serialize arguments to `call`.
-    inp = layers.Input(shape=(2,))
-    x = layers.Dense(3)(inp)
-    x = layers.Dropout(0.5)(x, training=True)
-    model = Model(inp, x)
-
-    # Test that argument is kept when applying the model
-    inp2 = layers.Input(shape=(2,))
-    out2 = model(inp2)
-
-    # Test that argument is kept after loading a model
-    config = model.get_config()
-    model = Model.from_config(config)
-
-
-def test_node_construction():
-    ####################################################
-    # test basics
-
-    a = Input(shape=(32,), name='input_a')
-    b = Input(shape=(32,), name='input_b')
-
-    assert tuple(a.shape) == (None, 32)
-    a_layer, a_node_index, a_tensor_index = a._keras_history
-    b_layer, b_node_index, b_tensor_index = b._keras_history
-    assert len(a_layer._inbound_nodes) == 1
-    assert a_tensor_index is 0
-    node = a_layer._inbound_nodes[a_node_index]
-    assert node.outbound_layer == a_layer
-
-    assert isinstance(node.inbound_layers, list)
-    assert node.inbound_layers == []
-    assert isinstance(node.input_tensors, list)
-    assert node.input_tensors == [a]
-    assert isinstance(node.input_shapes, list)
-    assert node.input_shapes == [(None, 32)]
-
-    assert isinstance(node.output_tensors, list)
-    assert node.output_tensors == [a]
-    assert isinstance(node.output_shapes, list)
-    assert node.output_shapes == [(None, 32)]
-
-    dense = Dense(16, name='dense_1')
-    a_2 = dense(a)
-    b_2 = dense(b)
-
-    assert len(dense._inbound_nodes) == 2
-    assert len(dense._outbound_nodes) == 0
-    assert dense._inbound_nodes[0].inbound_layers == a_layer
-    assert dense._inbound_nodes[0].outbound_layer == dense
-    assert dense._inbound_nodes[1].inbound_layers == b_layer
-    assert dense._inbound_nodes[1].outbound_layer == dense
-
-    # test layer properties
-    test_layer = Dense(16, name='test_layer')
-    a_test = test_layer(a)
-    assert K.int_shape(test_layer.kernel) == (32, 16)
-    assert test_layer.input is a
-    assert test_layer.output is a_test
-    assert test_layer.input_mask is None
-    assert test_layer.output_mask is None
-    assert test_layer.input_shape == (None, 32)
-    assert test_layer.output_shape == (None, 16)
-
-    # with pytest.raises(AttributeError):
-    #     dense.input
-    # with pytest.raises(AttributeError):
-    #     dense.output
-    # with pytest.raises(AttributeError):
-    #     dense.input_mask
-    # with pytest.raises(AttributeError):
-    #     dense.output_mask
-
-    assert dense.get_input_at(0) is a
-    assert dense.get_input_at(1)is b
-    assert dense.get_output_at(0) is a_2
-    assert dense.get_output_at(1) is b_2
-    assert dense.get_input_shape_at(0) == (None, 32)
-    assert dense.get_input_shape_at(1) == (None, 32)
-    assert dense.get_output_shape_at(0) == (None, 16)
-    assert dense.get_output_shape_at(1) == (None, 16)
-    assert dense.get_input_mask_at(0) is None
-    assert dense.get_input_mask_at(1) is None
-    assert dense.get_output_mask_at(0) is None
-    assert dense.get_output_mask_at(1) is None
-
-
-def test_multi_input_layer():
-    ####################################################
-    # test multi-input layer
-    a = Input(shape=(32,), name='input_a')
-    b = Input(shape=(32,), name='input_b')
-
-    dense = Dense(16, name='dense_1')
-    a_2 = dense(a)
-    b_2 = dense(b)
-
-    merged = layers.concatenate([a_2, b_2], name='merge')
-    assert tuple(merged.shape) == (None, 16 * 2)
-    merge_layer, merge_node_index, merge_tensor_index = merged._keras_history
-
-    assert merge_node_index == 0
-    assert merge_tensor_index == 0
-
-    assert len(merge_layer._inbound_nodes) == 1
-    assert len(merge_layer._outbound_nodes) == 0
-
-    assert len(merge_layer._inbound_nodes[0].input_tensors) == 2
-    assert len(merge_layer._inbound_nodes[0].inbound_layers) == 2
-
-    c = Dense(64, name='dense_2')(merged)
-    d = Dense(5, name='dense_3')(c)
-
-    model = Model(inputs=[a, b], outputs=[c, d], name='model')
-    assert len(model.layers) == 6
-    expected_shapes = [(None, 64), (None, 5)]
-    assert [tuple(s) for s in model.compute_output_shape([(None, 32), (None, 32)])] == expected_shapes
-    assert model.compute_mask([a, b], [None, None]) == [None, None]
-    assert [tuple(s) for s in model.compute_output_shape([(None, 32), (None, 32)])] == expected_shapes
-
-    # we don't check names of first 2 layers (inputs) because
-    # ordering of same-level layers is not fixed
-    expected_names = ['dense_1', 'merge', 'dense_2', 'dense_3']
-    assert [l.name for l in model.layers][2:] == expected_names
-    assert [l.name for l in model._input_layers] == ['input_a', 'input_b']
-    assert [l.name for l in model._output_layers] == ['dense_2', 'dense_3']
-
-    # actually run model
-    fn = K.function(model.inputs, model.outputs)
-    input_a_np = np.random.random((10, 32))
-    input_b_np = np.random.random((10, 32))
-    fn_outputs = fn([input_a_np, input_b_np])
-    assert [x.shape for x in fn_outputs] == [(10, 64), (10, 5)]
-
-    # test get_source_inputs
-    source_inputs = get_source_inputs(c)
-    assert source_inputs[0] is a
-    assert source_inputs[1] is b
-
-    # serialization / deserialization
-    json_config = model.to_json()
-    recreated_model = model_from_json(json_config)
-    recreated_model.compile('rmsprop', 'mse')
-
-    assert [l.name for l in recreated_model.layers][2:] == expected_names
-    assert [l.name for l in recreated_model._input_layers] == ['input_a', 'input_b']
-    assert [l.name for l in recreated_model._output_layers] == ['dense_2', 'dense_3']
-
-    fn = K.function(recreated_model.inputs, recreated_model.outputs)
-    input_a_np = np.random.random((10, 32))
-    input_b_np = np.random.random((10, 32))
-    fn_outputs = fn([input_a_np, input_b_np])
-    assert [x.shape for x in fn_outputs] == [(10, 64), (10, 5)]
-
-
-def test_recursion():
-    ####################################################
-    # test recursion
-
-    a = Input(shape=(32,), name='input_a')
-    b = Input(shape=(32,), name='input_b')
-
-    dense = Dense(16, name='dense_1')
-    a_2 = dense(a)
-    b_2 = dense(b)
-    merged = layers.concatenate([a_2, b_2], name='merge')
-    c = Dense(64, name='dense_2')(merged)
-    d = Dense(5, name='dense_3')(c)
-
-    model = Model(inputs=[a, b], outputs=[c, d], name='model')
-
-    e = Input(shape=(32,), name='input_e')
-    f = Input(shape=(32,), name='input_f')
-    g, h = model([e, f])
-
-    # g2, h2 = model([e, f])
-
-    assert tuple(g.shape) == tuple(c.shape)
-    assert tuple(h.shape) == tuple(d.shape)
-
-    # test separate manipulation of different layer outputs
-    i = Dense(7, name='dense_4')(h)
-
-    final_model = Model(inputs=[e, f], outputs=[i, g], name='final')
-    assert len(final_model.inputs) == 2
-    assert len(final_model.outputs) == 2
-    assert len(final_model.layers) == 4
-
-    # we don't check names of first 2 layers (inputs) because
-    # ordering of same-level layers is not fixed
-    expected_shapes = [(10, 7), (10, 64)]
-    assert [layer.name for layer in final_model.layers][2:] == ['model', 'dense_4']
-    assert model.compute_mask([e, f], [None, None]) == [None, None]
-    assert final_model.compute_output_shape([(10, 32), (10, 32)]) == expected_shapes
-
-    # run recursive model
-    fn = K.function(final_model.inputs, final_model.outputs)
-    input_a_np = np.random.random((10, 32))
-    input_b_np = np.random.random((10, 32))
-    fn_outputs = fn([input_a_np, input_b_np])
-    assert [x.shape for x in fn_outputs] == [(10, 7), (10, 64)]
-
-    # test serialization
-    model_config = final_model.get_config()
-    print(json.dumps(model_config, indent=4))
-    recreated_model = Model.from_config(model_config)
-
-    fn = K.function(recreated_model.inputs, recreated_model.outputs)
-    input_a_np = np.random.random((10, 32))
-    input_b_np = np.random.random((10, 32))
-    fn_outputs = fn([input_a_np, input_b_np])
-    assert [x.shape for x in fn_outputs] == [(10, 7), (10, 64)]
-
-    ####################################################
-    # test multi-input multi-output
-
-    j = Input(shape=(32,), name='input_j')
-    k = Input(shape=(32,), name='input_k')
-    m, n = model([j, k])
-
-    o = Input(shape=(32,), name='input_o')
-    p = Input(shape=(32,), name='input_p')
-    q, r = model([o, p])
-
-    assert tuple(n.shape) == (None, 5)
-    assert tuple(q.shape) == (None, 64)
-    s = layers.concatenate([n, q], name='merge_nq')
-    assert tuple(s.shape) == (None, 64 + 5)
-
-    # test with single output as 1-elem list
-    multi_io_model = Model([j, k, o, p], [s])
-
-    fn = K.function(multi_io_model.inputs, multi_io_model.outputs)
-    fn_outputs = fn([np.random.random((10, 32)), np.random.random((10, 32)),
-                     np.random.random((10, 32)), np.random.random((10, 32))])
-    assert [x.shape for x in fn_outputs] == [(10, 69)]
-
-    # test with single output as tensor
-    multi_io_model = Model([j, k, o, p], s)
-
-    fn = K.function(multi_io_model.inputs, multi_io_model.outputs)
-    fn_outputs = fn([np.random.random((10, 32)), np.random.random((10, 32)),
-                     np.random.random((10, 32)), np.random.random((10, 32))])
-    # note that the output of the K.function will still be a 1-elem list
-    assert [x.shape for x in fn_outputs] == [(10, 69)]
-
-    # test serialization
-    model_config = multi_io_model.get_config()
-    recreated_model = Model.from_config(model_config)
-
-    fn = K.function(recreated_model.inputs, recreated_model.outputs)
-    fn_outputs = fn([np.random.random((10, 32)), np.random.random((10, 32)),
-                     np.random.random((10, 32)), np.random.random((10, 32))])
-    # note that the output of the K.function will still be a 1-elem list
-    assert [x.shape for x in fn_outputs] == [(10, 69)]
-
-    config = model.get_config()
-    Model.from_config(config)
-
-    model.summary()
-    json_str = model.to_json()
-    model_from_json(json_str)
-
-    yaml_str = model.to_yaml()
-    model_from_yaml(yaml_str)
-
-    ####################################################
-    # test invalid graphs
-
-    # input is not an Input tensor
-    j = Input(shape=(32,), name='input_j')
-    j = Dense(32)(j)
-    k = Input(shape=(32,), name='input_k')
-    m, n = model([j, k])
-
-    with pytest.raises(ValueError):
-        Model([j, k], [m, n])
-
-    # disconnected graph
-    j = Input(shape=(32,), name='input_j')
-    k = Input(shape=(32,), name='input_k')
-    m, n = model([j, k])
-    with pytest.raises(ValueError):
-        Model([j], [m, n])
-
-    # redundant outputs
-    j = Input(shape=(32,), name='input_j')
-    k = Input(shape=(32,), name='input_k')
-    m, n = model([j, k])
-    # this should work with a warning
-    Model([j, k], [m, n, n])
-
-    # redundant inputs
-    j = Input(shape=(32,), name='input_j')
-    k = Input(shape=(32,), name='input_k')
-    m, n = model([j, k])
-    with pytest.raises(ValueError):
-        Model([j, k, j], [m, n])
-
-    ####################################################
-    # test calling layers/models on placeholders
-    j = Input(shape=(32,), name='input_j')
-    k = Input(shape=(32,), name='input_k')
-    m, n = model([j, k])
-    outer_model = Model([j, k], [m, n])
-
-    j_tf = K.placeholder(shape=(None, 32), dtype=K.floatx())
-    k_tf = K.placeholder(shape=(None, 32), dtype=K.floatx())
-    m_tf, n_tf = outer_model([j_tf, k_tf])
-    assert K.int_shape(m_tf) == (None, 64)
-    assert K.int_shape(n_tf) == (None, 5)
-
-    # test merge
-    layers.concatenate([j_tf, k_tf], axis=1)
-    layers.add([j_tf, k_tf])
-
-    # test tensor input
-    x = K.placeholder(shape=(None, 2), dtype=K.floatx())
-    InputLayer(input_tensor=x)
-
-    x = Input(tensor=x)
-    Dense(2)(x)
-
-
-def test_load_layers():
-    from keras.layers import ConvLSTM2D, TimeDistributed
-    from keras.layers import Bidirectional, Conv2D, Input
-    from keras.models import Model
-
-    if K.backend() == 'tensorflow' or K.backend() == 'cntk':
-        inputs = Input(shape=(10, 20, 20, 1))
-    else:
-        inputs = Input(shape=(10, 1, 20, 20))
-    td_conv = TimeDistributed(Conv2D(15, (5, 5)))(inputs)
-    bi_conv = Bidirectional(ConvLSTM2D(10, (3, 3)), merge_mode='concat')(td_conv)
-    model = Model(inputs=inputs, outputs=bi_conv)
-
-    weight_value_tuples = []
-
-    # TimeDistributed Conv2D layer
-    # use 'channels_first' data format to check that
-    # the function is being called correctly for Conv2D
-    # old: (filters, stack_size, kernel_rows, kernel_cols)
-    # new: (kernel_rows, kernel_cols, stack_size, filters)
-    weight_tensor_td_conv_old = list()
-    weight_tensor_td_conv_old.append(np.zeros((15, 1, 5, 5)))
-    weight_tensor_td_conv_old.append(np.zeros((15,)))
-    td_conv_layer = model.layers[1]
-    td_conv_layer.layer.data_format = 'channels_first'
-    weight_tensor_td_conv_new = preprocess_weights_for_loading(
-        td_conv_layer,
-        weight_tensor_td_conv_old,
-        original_keras_version='1')
-    symbolic_weights = td_conv_layer.weights
-    assert (len(symbolic_weights) == len(weight_tensor_td_conv_new))
-    weight_value_tuples += zip(symbolic_weights, weight_tensor_td_conv_new)
-
-    # Bidirectional ConvLSTM2D layer
-    # old ConvLSTM2D took a list of 12 weight tensors,
-    # returns a list of 3 concatenated larger tensors.
-    weights_bi_conv_old = []
-    for j in range(2):  # bidirectional
-        for i in range(4):
-            weights_bi_conv_old.append(np.zeros((3, 3, 15, 10)))  # kernel
-            weights_bi_conv_old.append(np.zeros((3, 3, 10, 10)))  # recurrent kernel
-            weights_bi_conv_old.append(np.zeros((10,)))  # bias
-
-    bi_convlstm_layer = model.layers[2]
-    weights_bi_conv_new = preprocess_weights_for_loading(
-        bi_convlstm_layer,
-        weights_bi_conv_old,
-        original_keras_version='1')
-
-    symbolic_weights = bi_convlstm_layer.weights
-    assert (len(symbolic_weights) == len(weights_bi_conv_new))
-    weight_value_tuples += zip(symbolic_weights, weights_bi_conv_new)
-
-    K.batch_set_value(weight_value_tuples)
-
-    assert np.all(K.eval(model.layers[1].weights[0]) == weight_tensor_td_conv_new[0])
-    assert np.all(K.eval(model.layers[1].weights[1]) == weight_tensor_td_conv_new[1])
-    assert np.all(K.eval(model.layers[2].weights[0]) == weights_bi_conv_new[0])
-    assert np.all(K.eval(model.layers[2].weights[1]) == weights_bi_conv_new[1])
-    assert np.all(K.eval(model.layers[2].weights[2]) == weights_bi_conv_new[2])
-    assert np.all(K.eval(model.layers[2].weights[3]) == weights_bi_conv_new[3])
-    assert np.all(K.eval(model.layers[2].weights[4]) == weights_bi_conv_new[4])
-    assert np.all(K.eval(model.layers[2].weights[5]) == weights_bi_conv_new[5])
-
-
-def convert_weights(layer, weights):
-    if layer.__class__.__name__ == 'GRU':
-        W = [np.split(w, 3, axis=-1) for w in weights]
-        return sum(map(list, zip(*W)), [])
-    elif layer.__class__.__name__ in ('LSTM', 'ConvLSTM2D'):
-        W = [np.split(w, 4, axis=-1) for w in weights]
-        for w in W:
-            w[2], w[1] = w[1], w[2]
-        return sum(map(list, zip(*W)), [])
-    elif layer.__class__.__name__ == 'Conv2DTranspose':
-        return [np.transpose(weights[0], (2, 3, 0, 1)), weights[1]]
-    return weights
-
-
-@pytest.mark.parametrize("layer", [
-    layers.GRU(2, input_shape=[3, 5]),
-    layers.LSTM(2, input_shape=[3, 5]),
-    layers.ConvLSTM2D(5, (3, 3),
-                      input_shape=[6, 6, 6, 6],
-                      data_format='channels_first'),
-], ids=['GRU', 'LSTM', 'ConvLSTM2D'])
-def test_preprocess_weights_for_loading(layer):
-    # A model is needed to initialize weights.
-    _ = Sequential([layer])
-    weights1 = layer.get_weights()
-    weights2 = preprocess_weights_for_loading(
-        layer, convert_weights(layer, weights1),
-        original_keras_version='1')
-    assert all([np.allclose(x, y, 1e-5)
-                for (x, y) in zip(weights1, weights2)])
-
-
-@pytest.mark.parametrize("layer", [
-    layers.Conv2D(2, (3, 3), input_shape=[5, 5, 3]),
-    layers.Conv2DTranspose(2, (5, 5),
-                           input_shape=[7, 7, 3],
-                           data_format='channels_first'),
-], ids=['Conv2D', 'Conv2DTranspose'])
-def test_preprocess_weights_for_loading_for_model(layer):
-    model = Sequential([layer])
-    weights1 = model.get_weights()
-    weights2 = preprocess_weights_for_loading(
-        model, convert_weights(layer, weights1),
-        original_keras_version='1')
-    assert all([np.allclose(x, y, 1e-5)
-                for (x, y) in zip(weights1, weights2)])
-
-
-@pytest.mark.parametrize('layer_class,args', [
-    (layers.GRU, {'units': 2, 'input_shape': [3, 5]}),
-    (layers.GRU, {'units': 2, 'input_shape': [3, 5], 'reset_after': True}),
-    (layers.LSTM, {'units': 2, 'input_shape': [3, 5]}),
-])
-def test_preprocess_weights_for_loading_rnn_should_be_idempotent(layer_class, args):
-    """
-    Loading weights from a RNN class to itself should not convert the weights.
-    """
-    # layer can be instantiated only for supported backends
-    layer = layer_class(**args)
-    # A model is needed to initialize weights.
-    _ = Sequential([layer])
-    weights1 = layer.get_weights()
-    weights2 = preprocess_weights_for_loading(layer, weights1)
-    assert all([np.allclose(x, y, 1e-5) for (x, y) in zip(weights1, weights2)])
-
-
-def test_recursion_with_bn_and_loss():
-    model1 = Sequential([
-        layers.Dense(5, input_dim=5, activity_regularizer='l1'),
-        layers.BatchNormalization(),
-        layers.Dense(5),
-    ])
-
-    inputs = layers.Input(shape=(5,))
-    outputs = model1(inputs)
-    model2 = Model(inputs=inputs, outputs=outputs)
-
-    model1.compile(optimizer='sgd', loss='categorical_crossentropy')
-    model2.compile(optimizer='sgd', loss='categorical_crossentropy')
-
-    x = np.ones((3, 5))
-    y = np.ones((3, 5))
-    model1.fit(x, y, verbose=0, epochs=1)
-    model2.fit(x, y, verbose=0, epochs=1)
-
-
-def test_activity_regularization_with_model_composition():
-
-    def reg(x):
-        return K.sum(x)
-
-    net_a_input = Input((2,))
-    net_a = net_a_input
-    net_a = Dense(2, kernel_initializer='ones',
-                  use_bias=False,
-                  activity_regularizer=reg)(net_a)
-    model_a = Model([net_a_input], [net_a])
-
-    net_b_input = Input((2,))
-    net_b = model_a(net_b_input)
-    model_b = Model([net_b_input], [net_b])
-
-    model_b.compile(optimizer='sgd', loss=None)
-    x = np.ones((1, 2))
-    loss = model_b.evaluate(x)
-    assert loss == 4
-
-
-def test_shared_layer_depth_is_correct():
-    # Basic outline here: we have a shared embedding layer, and two inputs that
-    # go through different depths of computation in the graph before
-    # the final output.  We need the computed depth of the input layers to be
-    # the same, because they both pass through the embedding layer before anything
-    # else happens.  That's what we're testing.
-    from keras.layers import Embedding, Input, Dense, Concatenate
-    from keras.models import Model
-    input1 = Input(shape=(10,), name='input1')
-    input2 = Input(shape=(10,), name='input2')
-    embedding_layer = Embedding(name='embedding', input_dim=5, output_dim=10)
-    embedded_input1 = embedding_layer(input1)
-    embedded_input2 = embedding_layer(input2)
-    transformed_input2 = Dense(6)(Dense(5)(Dense(3)(embedded_input2)))
-    final_output = Dense(2)(Concatenate()([embedded_input1, transformed_input2]))
-    model = Model(inputs=[input1, input2], outputs=final_output)
-
-
-def test_layer_sharing_at_heterogeneous_depth():
-    x_val = np.random.random((10, 5))
-
-    x = Input(shape=(5,))
-    A = Dense(5, name='A')
-    B = Dense(5, name='B')
-    output = A(B(A(B(x))))
-    M = Model(x, output)
-
-    output_val = M.predict(x_val)
-
-    config = M.get_config()
-    weights = M.get_weights()
-
-    M2 = Model.from_config(config)
-    M2.set_weights(weights)
-
-    output_val_2 = M2.predict(x_val)
-    np.testing.assert_allclose(output_val, output_val_2, atol=1e-6)
-
-
-def test_layer_sharing_at_heterogeneous_depth_with_concat():
-    input_shape = (16, 9, 3)
-    input_layer = Input(shape=input_shape)
-
-    A = Dense(3, name='dense_A')
-    B = Dense(3, name='dense_B')
-    C = Dense(3, name='dense_C')
-
-    x1 = B(A(input_layer))
-    x2 = A(C(input_layer))
-    output = layers.concatenate([x1, x2])
-
-    M = Model(inputs=input_layer, outputs=output)
-
-    x_val = np.random.random((10, 16, 9, 3))
-    output_val = M.predict(x_val)
-
-    config = M.get_config()
-    weights = M.get_weights()
-
-    M2 = Model.from_config(config)
-    M2.set_weights(weights)
-
-    output_val_2 = M2.predict(x_val)
-    np.testing.assert_allclose(output_val, output_val_2, atol=1e-6)
-
-
-def DISABLED_test_layer_sharing_at_heterogeneous_depth_order():
-    # This tests for the bug in this issue
-    # https://github.com/keras-team/keras/issues/11159
-    # It occurs with layer sharing at heterogeneous depth when
-    # the layers need to be applied in an order that differs from
-    # the order that occurs in the config.
-
-    input_shape = (1, 12)
-    input_layer = Input(shape=input_shape)
-
-    A = Dense(12, name='layer_a')
-    r1 = layers.Reshape((12,))(input_layer)
-    Aout1 = A(r1)
-
-    r2 = layers.Reshape((12,))(A(input_layer))
-    Aout2 = A(r2)
-
-    # Note: if the order of the layers in the concat is
-    # changed to ([Aout1, Aout2]) the bug doesn't trigger
-    c1 = layers.concatenate([Aout2, Aout1])
-    output = Dense(2, name='layer_b')(c1)
-
-    M = Model(inputs=input_layer, outputs=output)
-
-    x_val = np.random.random((10,) + input_shape)
-    output_val = M.predict(x_val)
-
-    config = M.get_config()
-    weights = M.get_weights()
-
-    M2 = Model.from_config(config)
-    M2.set_weights(weights)
-
-    output_val_2 = M2.predict(x_val)
-    np.testing.assert_allclose(output_val, output_val_2, atol=1e-6)
-
-
-def test_multi_output_mask():
-    """Fixes #7589"""
-    class TestMultiOutputLayer(Layer):
-        def __init__(self, **kwargs):
-            super(TestMultiOutputLayer, self).__init__(**kwargs)
-
-        def call(self, inputs, **kwargs):
-            return [K.abs(inputs), K.abs(inputs)]
-
-        def compute_output_shape(self, input_shape):
-            out_shape = super(TestMultiOutputLayer, self).compute_output_shape(
-                input_shape)
-            return [out_shape, out_shape]
-
-    class TestMultiInputLayer(Layer):
-        def __init__(self, **kwargs):
-            super(TestMultiInputLayer, self).__init__(**kwargs)
-
-        def call(self, inputs, **kwargs):
-            negative, positive = inputs
-            return negative + positive
-
-    input_layer = Input(shape=(16, 16, 3))
-    x, y = TestMultiOutputLayer()(input_layer)
-    z = TestMultiInputLayer()([x, y])
-    _ = Model(inputs=input_layer, outputs=z)
-    assert K.int_shape(z)[1:] == (16, 16, 3)
-
-
-def test_constant_initializer_with_numpy():
-    model = Sequential()
-    model.add(Dense(2, input_shape=(3,),
-                    kernel_initializer=Constant(1.)))
-    model.add(Dense(3))
-    model.compile(loss='mse', optimizer='sgd', metrics=['acc'])
-
-    json_str = model.to_json()
-    model_from_json(json_str).summary()
-
-    yaml_str = model.to_yaml()
-    model_from_yaml(yaml_str).summary()
-
-
-@pytest.mark.skipif(K.backend() == 'cntk',
-                    reason='Float64 not supported with CNTK.')
-def test_initialization_dtype():
-    class TestLayer(Layer):
-        def __init__(self):
-            super(TestLayer, self).__init__(dtype='int64')
-            self.w = self.add_weight('w', [], initializer=Constant(1))
-
-    layer = TestLayer()
-    assert K.dtype(layer.w) == 'int64'
-
-    class TestModel(Model):
-        def __init__(self):
-            super(TestModel, self).__init__(dtype='int64')
-            self.w = self.add_weight('w', [], initializer=Constant(1))
-
-    model = TestModel()
-    assert K.dtype(model.w) == 'int64'
-
-
-if __name__ == '__main__':
-    pytest.main([__file__])
diff --git a/tests/keras/engine/test_training.py b/tests/keras/engine/test_training.py
deleted file mode 100644
index e9cd6cd1b6f..00000000000
--- a/tests/keras/engine/test_training.py
+++ /dev/null
@@ -1,2141 +0,0 @@
-import threading
-
-import pytest
-import numpy as np
-import pandas as pd
-from numpy.testing import assert_allclose
-import sys
-import scipy.sparse as sparse
-from flaky import flaky
-
-import keras
-from keras import losses
-from keras import metrics
-from keras.layers import Layer, Activation, Dense, Dropout, Conv2D, Concatenate
-from keras.engine import Input
-from keras.engine.training import Model
-from keras.utils.generic_utils import slice_arrays
-from keras.models import Sequential
-from keras import backend as K
-from keras.utils import Sequence
-from keras.callbacks import Callback
-
-if K.backend() == 'tensorflow':
-    import tensorflow as tf
-
-
-class RandomSequence(Sequence):
-    def __init__(self, batch_size, sequence_length=12):
-        self.batch_size = batch_size
-        self.sequence_length = sequence_length
-        self.logs = []  # It will work for use_multiprocessing=False
-
-    def __len__(self):
-        return self.sequence_length
-
-    def __getitem__(self, idx):
-        self.logs.append(idx)
-        return ([np.random.random((self.batch_size, 3)),
-                 np.random.random((self.batch_size, 3))],
-                [np.random.random((self.batch_size, 4)),
-                 np.random.random((self.batch_size, 3))])
-
-    def on_epoch_end(self):
-        pass
-
-
-class IncreaseBatchSizeRandomSequence(Sequence):
-    def __init__(self, initial_batch_size, initial_sequence_length=12,
-                 batch_size_func=lambda x: x + 2):
-        self.batch_size = initial_batch_size
-        self.initial_sequence_length = initial_sequence_length
-        self.batch_size_func = batch_size_func
-        self.logs = []
-
-    def __len__(self):
-        return int(np.ceil(self.initial_sequence_length / float(self.batch_size)))
-
-    def __getitem__(self, idx):
-        self.logs.append(idx)
-        return ([np.random.random((self.batch_size, 3)),
-                 np.random.random((self.batch_size, 3))],
-                [np.random.random((self.batch_size, 4)),
-                 np.random.random((self.batch_size, 3))])
-
-    def on_epoch_end(self):
-        self.batch_size = self.batch_size_func(self.batch_size)
-
-
-class threadsafe_iter:
-    """Takes an iterator/generator and makes it thread-safe by
-    serializing call to the `next` method of given iterator/generator.
-    """
-
-    def __init__(self, it):
-        self.it = it
-        self.lock = threading.Lock()
-
-    def __iter__(self):
-        return self
-
-    def __next__(self):
-        return self.next()
-
-    def next(self):
-        with self.lock:
-            return next(self.it)
-
-
-def threadsafe_generator(f):
-    """A decorator that takes a generator function and makes it thread-safe.
-    """
-
-    def g(*a, **kw):
-        return threadsafe_iter(f(*a, **kw))
-
-    return g
-
-
-def DISABLED_test_check_array_length_consistency():
-    training_utils.check_array_length_consistency(None, None, None)
-    a_np = np.random.random((4, 3, 3))
-    training_utils.check_array_length_consistency(a_np, a_np, a_np)
-    training_utils.check_array_length_consistency(
-        [a_np, a_np], [a_np, a_np], [a_np, a_np])
-    training_utils.check_array_length_consistency([None], [None], [None])
-
-    b_np = np.random.random((3, 4))
-    with pytest.raises(ValueError):
-        training_utils.check_array_length_consistency(a_np, None, None)
-    with pytest.raises(ValueError):
-        training_utils.check_array_length_consistency(a_np, a_np, None)
-    with pytest.raises(ValueError):
-        training_utils.check_array_length_consistency([a_np], [None], None)
-    with pytest.raises(ValueError):
-        training_utils.check_array_length_consistency([a_np], [b_np], None)
-    with pytest.raises(ValueError):
-        training_utils.check_array_length_consistency([a_np], None, [b_np])
-
-
-def testslice_arrays():
-    input_a = np.random.random((10, 3))
-    slice_arrays(None)
-    slice_arrays(input_a, 0)
-    slice_arrays(input_a, 0, 1)
-    slice_arrays(input_a, stop=2)
-    input_a = [None, [1, 1], None, [1, 1]]
-    slice_arrays(input_a, 0)
-    slice_arrays(input_a, 0, 1)
-    slice_arrays(input_a, stop=2)
-    input_a = [None]
-    slice_arrays(input_a, 0)
-    slice_arrays(input_a, 0, 1)
-    slice_arrays(input_a, stop=2)
-    input_a = None
-    slice_arrays(input_a, 0)
-    slice_arrays(input_a, 0, 1)
-    slice_arrays(input_a, stop=2)
-
-
-def DISABLED_test_weighted_masked_objective():
-    a = Input(shape=(3,), name='input_a')
-
-    # weighted_masked_objective
-    def mask_dummy(y_true=None, y_pred=None, weight=None):
-        return K.placeholder(y_true.shape)
-
-    weighted_function = training_utils.weighted_masked_objective(
-        losses.categorical_crossentropy)
-    weighted_function(a, a, None)
-
-
-def get_model(num_outputs=1):
-    a = Input(shape=(3,), name='input_a')
-    b = Input(shape=(3,), name='input_b')
-
-    a_2 = Dense(4, name='dense_1')(a)
-    dp = Dropout(0.5, name='dropout')
-    b_2 = dp(b)
-
-    if num_outputs == 1:
-        model = Model([a, b], a_2)
-    else:
-        model = Model([a, b], [a_2, b_2])
-    return model
-
-
-class TrackerCallback(Callback):
-
-    def __init__(self):
-        # test starting from non-zero initial epoch
-        self.trained_epochs = []
-        self.trained_batches = []
-        self.steps_per_epoch_log = []
-        super(TrackerCallback, self).__init__()
-
-    def set_params(self, params):
-        super(TrackerCallback, self).set_params(params)
-        self.steps_per_epoch_log.append(params['steps'])
-
-    # define tracer callback
-    def on_epoch_begin(self, epoch, logs):
-        self.trained_epochs.append(epoch)
-
-    def on_batch_begin(self, batch, logs):
-        self.trained_batches.append(batch)
-
-
-# TODO: resolve flakyness issue. Tracked with #11560
-@flaky(rerun_filter=lambda err, *args: issubclass(err[0], AssertionError))
-def test_model_methods():
-    model = get_model(num_outputs=2)
-
-    optimizer = 'rmsprop'
-    loss = 'mse'
-    loss_weights = [1., 0.5]
-
-    input_a_np = np.random.random((10, 3))
-    input_b_np = np.random.random((10, 3))
-
-    output_a_np = np.random.random((10, 4))
-    output_b_np = np.random.random((10, 3))
-
-    # training/testing doesn't work before compiling.
-    with pytest.raises(RuntimeError):
-        model.train_on_batch([input_a_np, input_b_np],
-                             [output_a_np, output_b_np])
-
-    model.compile(optimizer, loss, metrics=[], loss_weights=loss_weights,
-                  sample_weight_mode=None)
-
-    # test train_on_batch
-    out = model.train_on_batch([input_a_np, input_b_np],
-                               [output_a_np, output_b_np])
-    out = model.train_on_batch({'input_a': input_a_np, 'input_b': input_b_np},
-                               [output_a_np, output_b_np])
-    out = model.train_on_batch({'input_a': input_a_np, 'input_b': input_b_np},
-                               {'dense_1': output_a_np, 'dropout': output_b_np})
-
-    # test fit
-    out = model.fit([input_a_np, input_b_np],
-                    [output_a_np, output_b_np], epochs=1, batch_size=4)
-    out = model.fit({'input_a': input_a_np, 'input_b': input_b_np},
-                    [output_a_np, output_b_np], epochs=1, batch_size=4)
-    out = model.fit({'input_a': input_a_np, 'input_b': input_b_np},
-                    {'dense_1': output_a_np, 'dropout': output_b_np},
-                    epochs=1, batch_size=4)
-
-    # test validation_split
-    out = model.fit([input_a_np, input_b_np],
-                    [output_a_np, output_b_np],
-                    epochs=1, batch_size=4, validation_split=0.5)
-    out = model.fit({'input_a': input_a_np, 'input_b': input_b_np},
-                    [output_a_np, output_b_np],
-                    epochs=1, batch_size=4, validation_split=0.5)
-
-    # test validation data
-    out = model.fit([input_a_np, input_b_np],
-                    [output_a_np, output_b_np],
-                    epochs=1, batch_size=4,
-                    validation_data=([input_a_np, input_b_np],
-                                     [output_a_np, output_b_np]))
-    out = model.fit({'input_a': input_a_np, 'input_b': input_b_np},
-                    [output_a_np, output_b_np],
-                    epochs=1, batch_size=4, validation_split=0.5,
-                    validation_data=({'input_a': input_a_np,
-                                      'input_b': input_b_np},
-                                     [output_a_np, output_b_np]))
-    out = model.fit({'input_a': input_a_np, 'input_b': input_b_np},
-                    {'dense_1': output_a_np, 'dropout': output_b_np},
-                    epochs=1, batch_size=4, validation_split=0.5,
-                    validation_data=(
-                        {'input_a': input_a_np, 'input_b': input_b_np},
-                        {'dense_1': output_a_np, 'dropout': output_b_np}))
-
-    # test_on_batch
-    out = model.test_on_batch([input_a_np, input_b_np],
-                              [output_a_np, output_b_np])
-    out = model.test_on_batch({'input_a': input_a_np, 'input_b': input_b_np},
-                              [output_a_np, output_b_np])
-    out = model.test_on_batch({'input_a': input_a_np, 'input_b': input_b_np},
-                              {'dense_1': output_a_np, 'dropout': output_b_np})
-
-    # predict_on_batch
-    out = model.predict_on_batch([input_a_np, input_b_np])
-    out = model.predict_on_batch({'input_a': input_a_np,
-                                  'input_b': input_b_np})
-
-    # predict, evaluate
-    input_a_np = np.random.random((10, 3))
-    input_b_np = np.random.random((10, 3))
-
-    output_a_np = np.random.random((10, 4))
-    output_b_np = np.random.random((10, 3))
-
-    out = model.evaluate([input_a_np, input_b_np],
-                         [output_a_np, output_b_np],
-                         batch_size=4)
-    out = model.predict([input_a_np, input_b_np], batch_size=4)
-
-    # with sample_weight
-    input_a_np = np.random.random((10, 3))
-    input_b_np = np.random.random((10, 3))
-
-    output_a_np = np.random.random((10, 4))
-    output_b_np = np.random.random((10, 3))
-
-    sample_weight = [None, np.random.random((10,))]
-    out = model.train_on_batch([input_a_np, input_b_np],
-                               [output_a_np, output_b_np],
-                               sample_weight=sample_weight)
-
-    out = model.test_on_batch([input_a_np, input_b_np],
-                              [output_a_np, output_b_np],
-                              sample_weight=sample_weight)
-
-    # test accuracy metric
-    model.compile(optimizer, loss, metrics=['acc'],
-                  sample_weight_mode=None)
-
-    out = model.train_on_batch([input_a_np, input_b_np],
-                               [output_a_np, output_b_np])
-    assert len(out) == 5
-    out = model.test_on_batch([input_a_np, input_b_np],
-                              [output_a_np, output_b_np])
-    assert len(out) == 5
-
-    # this should also work
-    model.compile(optimizer, loss, metrics={'dense_1': 'acc'},
-                  sample_weight_mode=None)
-
-    out = model.train_on_batch([input_a_np, input_b_np],
-                               [output_a_np, output_b_np])
-    assert len(out) == 4
-    out = model.test_on_batch([input_a_np, input_b_np],
-                              [output_a_np, output_b_np])
-    assert len(out) == 4
-
-    # and this as well
-    model.compile(optimizer, loss, metrics={'dense_1': ['acc']},
-                  sample_weight_mode=None)
-
-    out = model.train_on_batch([input_a_np, input_b_np],
-                               [output_a_np, output_b_np])
-    assert len(out) == 4
-    out = model.test_on_batch([input_a_np, input_b_np],
-                              [output_a_np, output_b_np])
-    assert len(out) == 4
-
-    tracker_cb = TrackerCallback()
-
-    out = model.fit([input_a_np, input_b_np],
-                    [output_a_np, output_b_np], epochs=5, batch_size=4,
-                    initial_epoch=2, callbacks=[tracker_cb])
-    assert tracker_cb.trained_epochs == [2, 3, 4]
-
-    # test starting from non-zero initial epoch for generator too
-    tracker_cb = TrackerCallback()
-
-    @threadsafe_generator
-    def gen_data(batch_sz):
-        while True:
-            yield ([np.random.random((batch_sz, 3)),
-                    np.random.random((batch_sz, 3))],
-                   [np.random.random((batch_sz, 4)),
-                    np.random.random((batch_sz, 3))])
-
-    out = model.fit_generator(gen_data(4), steps_per_epoch=3, epochs=5,
-                              initial_epoch=2, callbacks=[tracker_cb])
-    assert tracker_cb.trained_epochs == [2, 3, 4]
-
-    # test with a custom metric function
-    def mse(y_true, y_pred):
-        return K.mean(K.pow(y_true - y_pred, 2))
-
-    model.compile(optimizer, loss, metrics=[mse],
-                  sample_weight_mode=None)
-
-    out = model.train_on_batch([input_a_np, input_b_np],
-                               [output_a_np, output_b_np])
-    out_len = 1 + 2 * (1 + 1)  # total loss + 2 outputs * (loss + metric)
-    assert len(out) == out_len
-    out = model.test_on_batch([input_a_np, input_b_np],
-                              [output_a_np, output_b_np])
-    assert len(out) == out_len
-
-    input_a_np = np.random.random((10, 3))
-    input_b_np = np.random.random((10, 3))
-
-    output_a_np = np.random.random((10, 4))
-    output_b_np = np.random.random((10, 3))
-
-    out = model.fit([input_a_np, input_b_np],
-                    [output_a_np, output_b_np],
-                    batch_size=4, epochs=1)
-    out = model.evaluate([input_a_np, input_b_np],
-                         [output_a_np, output_b_np],
-                         batch_size=4)
-    out = model.predict([input_a_np, input_b_np], batch_size=4)
-
-    # enable verbose for evaluate_generator
-    out = model.evaluate_generator(gen_data(4), steps=3, verbose=1)
-    # pass generator directly so `is_generator_or_sequence`
-    # doesn't get confused.
-    out = model.evaluate(gen_data(4).it, steps=3, verbose=1)
-
-    # empty batch
-    with pytest.raises(ValueError):
-        @threadsafe_generator
-        def gen_data():
-            while True:
-                yield (np.asarray([]), np.asarray([]))
-
-        out = model.evaluate_generator(gen_data(), steps=1)
-    with pytest.raises(ValueError):
-        @threadsafe_generator
-        def gen_data():
-            while True:
-                yield (np.asarray([]), np.asarray([]))
-
-        out = model.evaluate(gen_data().it, steps=1)
-
-    # x is not a list of numpy arrays.
-    with pytest.raises(ValueError):
-        out = model.predict([None])
-
-    # x does not match _feed_input_names.
-    with pytest.raises(ValueError):
-        out = model.predict([input_a_np, None, input_b_np])
-    with pytest.raises(ValueError):
-        out = model.predict([None, input_a_np, input_b_np])
-
-    # # all input/output/weight arrays should have the same number of samples.
-    # with pytest.raises(ValueError):
-    #     out = model.train_on_batch([input_a_np, input_b_np[:2]],
-    #                                [output_a_np, output_b_np],
-    #                                sample_weight=sample_weight)
-    # with pytest.raises(ValueError):
-    #     out = model.train_on_batch([input_a_np, input_b_np],
-    #                                [output_a_np, output_b_np[:2]],
-    #                                sample_weight=sample_weight)
-    # with pytest.raises(ValueError):
-    #     out = model.train_on_batch([input_a_np, input_b_np],
-    #                                [output_a_np, output_b_np],
-    #                                sample_weight=[sample_weight[1],
-    #                                               sample_weight[1][:2]])
-
-    # # `sample_weight` is neither a dict nor a list.
-    # with pytest.raises(TypeError):
-    #     out = model.train_on_batch([input_a_np, input_b_np],
-    #                                [output_a_np, output_b_np],
-    #                                sample_weight=tuple(sample_weight))
-
-    # # `validation_data` is neither a tuple nor a triple.
-    # with pytest.raises(ValueError):
-    #     out = model.fit([input_a_np, input_b_np],
-    #                     [output_a_np, output_b_np],
-    #                     epochs=1, batch_size=4,
-    #                     validation_data=([input_a_np, input_b_np],))
-
-    # # `loss` does not match outputs.
-    # with pytest.raises(ValueError):
-    #     model.compile(optimizer, loss=['mse', 'mae', 'mape'])
-
-    # # `loss_weights` does not match output_names.
-    # with pytest.raises(ValueError):
-    #     model.compile(optimizer, loss='mse', loss_weights={'lstm': 0.5})
-
-    # # `loss_weights` does not match outputs.
-    # with pytest.raises(ValueError):
-    #     model.compile(optimizer, loss='mse', loss_weights=[0.5])
-
-    # # `loss_weights` is invalid type.
-    # with pytest.raises(TypeError):
-    #     model.compile(optimizer, loss='mse', loss_weights=(0.5, 0.5))
-
-    # # `sample_weight_mode` does not match output_names.
-    # with pytest.raises(ValueError):
-    #     model.compile(optimizer, loss='mse',
-    #                   sample_weight_mode={'lstm': 'temporal'})
-
-    # # `sample_weight_mode` does not match output_names.
-    # with pytest.raises(ValueError):
-    #     model.compile(optimizer, loss='mse', sample_weight_mode=['temporal'])
-
-    # # `sample_weight_mode` matches output_names partially.
-    # with pytest.raises(ValueError):
-    #     model.compile(optimizer, loss='mse',
-    #                   sample_weight_mode={'dense_1': 'temporal'})
-
-    # # `loss` does not exist.
-    # with pytest.raises(ValueError):
-    #     model.compile(optimizer, loss=[])
-
-    model.compile(optimizer, loss=['mse', 'mae'])
-    model.compile(optimizer, loss='mse', loss_weights={'dense_1': 0.2,
-                                                       'dropout': 0.8})
-    model.compile(optimizer, loss='mse', loss_weights=[0.2, 0.8])
-
-    # the rank of weight arrays should be 1.
-    with pytest.raises(ValueError):
-        out = model.train_on_batch(
-            [input_a_np, input_b_np],
-            [output_a_np, output_b_np],
-            sample_weight=[None, np.random.random((10, 20, 30))])
-
-    model.compile(optimizer, loss='mse',
-                  sample_weight_mode={'dense_1': None, 'dropout': 'temporal'})
-    model.compile(optimizer, loss='mse', sample_weight_mode=[None, 'temporal'])
-
-    # # the rank of output arrays should be at least 3D.
-    # with pytest.raises(ValueError):
-    #     out = model.train_on_batch([input_a_np, input_b_np],
-    #                                [output_a_np, output_b_np],
-    #                                sample_weight=sample_weight)
-
-
-# TODO: resolve flakyness issue. Tracked with #11560
-@flaky(rerun_filter=lambda err, *args: issubclass(err[0], AssertionError))
-def DISABLED_test_fit_generator():
-    model = get_model(num_outputs=2)
-    optimizer = 'rmsprop'
-    loss = 'mse'
-    loss_weights = [1., 0.5]
-
-    model.compile(optimizer, loss, metrics=[], loss_weights=loss_weights,
-                  sample_weight_mode=None)
-    tracker_cb = TrackerCallback()
-    val_seq = RandomSequence(4)
-    out = model.fit_generator(generator=RandomSequence(3),
-                              steps_per_epoch=3,
-                              epochs=5,
-                              initial_epoch=0,
-                              validation_data=val_seq,
-                              validation_steps=3,
-                              max_queue_size=1,
-                              callbacks=[tracker_cb])
-    assert tracker_cb.trained_epochs == [0, 1, 2, 3, 4]
-    assert tracker_cb.trained_batches == list(range(3)) * 5
-    # print('val_seq.logs', val_seq.logs)
-    # assert len(val_seq.logs) <= 4 * 5
-
-    tracker_cb = TrackerCallback()
-    val_seq = RandomSequence(4)
-    out = model.fit(RandomSequence(3),
-                    steps_per_epoch=3,
-                    epochs=5,
-                    initial_epoch=0,
-                    validation_data=val_seq,
-                    validation_steps=3,
-                    max_queue_size=1,
-                    callbacks=[tracker_cb])
-    assert tracker_cb.trained_epochs == [0, 1, 2, 3, 4]
-    assert tracker_cb.trained_batches == list(range(3)) * 5
-    # assert len(val_seq.logs) <= 4 * 5
-
-    # steps_per_epoch will be equal to len of sequence if it's unspecified
-    tracker_cb = TrackerCallback()
-    val_seq = RandomSequence(4)
-    out = model.fit_generator(generator=RandomSequence(3),
-                              epochs=5,
-                              initial_epoch=0,
-                              validation_data=val_seq,
-                              callbacks=[tracker_cb],
-                              max_queue_size=1)
-    assert tracker_cb.trained_epochs == [0, 1, 2, 3, 4]
-    assert tracker_cb.trained_batches == list(range(12)) * 5
-    # assert 12 * 5 <= len(val_seq.logs) <= (12 * 5) + 2  # the queue may be full.
-
-    tracker_cb = TrackerCallback()
-    val_seq = RandomSequence(4)
-    out = model.fit(RandomSequence(3),
-                    epochs=5,
-                    initial_epoch=0,
-                    validation_data=val_seq,
-                    callbacks=[tracker_cb],
-                    max_queue_size=1)
-    assert tracker_cb.trained_epochs == [0, 1, 2, 3, 4]
-    assert tracker_cb.trained_batches == list(range(12)) * 5
-    # assert 12 * 5 <= len(val_seq.logs) <= (12 * 5) + 2  # the queue may be full.
-
-    # test for workers = 0
-    tracker_cb = TrackerCallback()
-    val_seq = RandomSequence(4)
-    out = model.fit_generator(generator=RandomSequence(3),
-                              epochs=5,
-                              validation_data=val_seq,
-                              callbacks=[tracker_cb],
-                              workers=0)
-    assert tracker_cb.trained_epochs == [0, 1, 2, 3, 4]
-    assert tracker_cb.trained_batches == list(range(12)) * 5
-    # assert len(val_seq.logs) == 12 * 5
-
-    tracker_cb = TrackerCallback()
-    val_seq = RandomSequence(4)
-    out = model.fit(RandomSequence(3),
-                    steps_per_epoch=3,
-                    epochs=5,
-                    initial_epoch=0,
-                    validation_data=val_seq,
-                    validation_steps=3,
-                    max_queue_size=1,
-                    callbacks=[tracker_cb])
-    assert tracker_cb.trained_epochs == [0, 1, 2, 3, 4]
-    assert tracker_cb.trained_batches == list(range(3)) * 5
-    # assert len(val_seq.logs) <= 4 * 5
-
-    # fit_generator will throw an exception
-    # if steps is unspecified for regular generator
-    with pytest.raises(ValueError):
-        @threadsafe_generator
-        def gen_data():
-            while True:
-                yield (np.asarray([]), np.asarray([]))
-
-        out = model.fit_generator(generator=gen_data(), epochs=5,
-                                  initial_epoch=0, validation_data=gen_data(),
-                                  callbacks=[tracker_cb])
-
-    # Check if generator is only accessed an expected number of times
-    gen_counters = [0, 0]
-
-    @threadsafe_generator
-    def gen_data(i):
-        while True:
-            gen_counters[i] += 1
-            yield ([np.random.random((1, 3)), np.random.random((1, 3))],
-                   [np.random.random((1, 4)), np.random.random((1, 3))])
-    out = model.fit_generator(generator=gen_data(0), epochs=3,
-                              steps_per_epoch=2,
-                              validation_data=gen_data(1),
-                              validation_steps=1,
-                              max_queue_size=2,
-                              workers=2)
-
-    # Need range check here as filling
-    # of the queue depends on sleep in the enqueuers
-    max_train = 3 * 2 + 2 * 2
-    min_train = 2 * 3
-    assert min_train <= gen_counters[0] <= max_train
-    # 12 = (epoch * workers * validation steps * max_queue_size)
-    assert 3 <= gen_counters[1] <= 12
-
-    gen_counters = [0]
-    out = model.fit_generator(generator=RandomSequence(3), epochs=3,
-                              validation_data=gen_data(0),
-                              validation_steps=1,
-                              max_queue_size=2,
-                              workers=2)
-
-    # 12 = (epoch * workers * validation steps * max_queue_size)
-    # Need range check here as filling
-    # of the queue depends on sleep in the enqueuers
-    assert 3 <= gen_counters[0] <= 12
-
-
-def DISABLED_test_fit_generator_dynamic_size_sequence_with_workers():
-    model = get_model(num_outputs=2)
-    optimizer = 'rmsprop'
-    loss = 'mse'
-    loss_weights = [1., 0.5]
-
-    model.compile(optimizer, loss, metrics=[], loss_weights=loss_weights,
-                  sample_weight_mode=None)
-    tracker_cb = TrackerCallback()
-    val_seq = RandomSequence(4)
-    train_seq = IncreaseBatchSizeRandomSequence(3, 20)
-    out = model.fit_generator(generator=train_seq,
-                              epochs=5,
-                              initial_epoch=0,
-                              validation_data=val_seq,
-                              validation_steps=3,
-                              max_queue_size=1,
-                              callbacks=[tracker_cb])
-    assert tracker_cb.trained_epochs == [0, 1, 2, 3, 4]
-    assert tracker_cb.trained_batches == [
-        0, 1, 2, 3, 4, 5, 6,  # 1st epoch -> ceil(20 / 3) = 7 batches
-        0, 1, 2, 3,           # 2nd epoch -> ceil(20 / 5) = 4 batches
-        0, 1, 2,              # 3d  epoch -> ceil(20 / 7) = 3 batches
-        0, 1, 2,              # 4th epoch -> ceil(20 / 9) = 3 batches
-        0, 1,                 # 5th epoch -> ceil(20 /11) = 2 batches
-    ]
-    assert tracker_cb.steps_per_epoch_log[0:5] == [7, 4, 3, 3, 2]
-
-    tracker_cb = TrackerCallback()
-    val_seq = RandomSequence(4)
-    train_seq = IncreaseBatchSizeRandomSequence(3, 30)
-    out = model.fit_generator(generator=train_seq,
-                              epochs=5,
-                              initial_epoch=0,
-                              validation_data=val_seq,
-                              validation_steps=3,
-                              max_queue_size=1,
-                              callbacks=[tracker_cb])
-    assert tracker_cb.trained_epochs == [0, 1, 2, 3, 4]
-    assert tracker_cb.trained_batches == [
-        0, 1, 2, 3, 4, 5, 6, 7, 8, 9,  # 1st epoch -> ceil(30 / 3) = 10 batches
-        0, 1, 2, 3, 4, 5,              # 2nd epoch -> ceil(30 / 5) =  6 batches
-        0, 1, 2, 3, 4,                 # 3d  epoch -> ceil(30 / 7) =  5 batches
-        0, 1, 2, 3,                    # 4th epoch -> ceil(30 / 9) =  4 batches
-        0, 1, 2,                       # 5th epoch -> ceil(30 /11) =  3 batches
-    ]
-    assert tracker_cb.steps_per_epoch_log[0:5] == [10, 6, 5, 4, 3]
-
-    tracker_cb = TrackerCallback()
-    val_seq = RandomSequence(4)
-    train_seq = IncreaseBatchSizeRandomSequence(2, 404, lambda x: x * 2)
-    out = model.fit_generator(generator=train_seq,
-                              epochs=5,
-                              initial_epoch=0,
-                              validation_data=val_seq,
-                              validation_steps=3,
-                              max_queue_size=1,
-                              callbacks=[tracker_cb])
-    assert tracker_cb.trained_epochs == [0, 1, 2, 3, 4]
-    # number of trained batches should match sum of steps per each epoch
-    assert len(tracker_cb.trained_batches) == 202 + 101 + 51 + 26 + 13
-    assert tracker_cb.steps_per_epoch_log[0:5] == [202, 101, 51, 26, 13]
-
-
-def DISABLED_test_fit_generator_dynamic_size_sequence_main_thread():
-    model = get_model(num_outputs=2)
-    optimizer = 'rmsprop'
-    loss = 'mse'
-    loss_weights = [1., 0.5]
-
-    model.compile(optimizer, loss, metrics=[], loss_weights=loss_weights,
-                  sample_weight_mode=None)
-    tracker_cb = TrackerCallback()
-    val_seq = RandomSequence(4)
-    train_seq = IncreaseBatchSizeRandomSequence(3, 20)
-    out = model.fit_generator(generator=train_seq,
-                              epochs=5,
-                              initial_epoch=0,
-                              validation_data=val_seq,
-                              validation_steps=3,
-                              workers=0,
-                              callbacks=[tracker_cb])
-    assert tracker_cb.trained_epochs == [0, 1, 2, 3, 4]
-    assert tracker_cb.trained_batches == [
-        0, 1, 2, 3, 4, 5, 6,  # 1st epoch -> ceil(20 / 3) = 7 batches
-        0, 1, 2, 3,           # 2nd epoch -> ceil(20 / 5) = 4 batches
-        0, 1, 2,              # 3d  epoch -> ceil(20 / 7) = 3 batches
-        0, 1, 2,              # 4th epoch -> ceil(20 / 9) = 3 batches
-        0, 1,                 # 5th epoch -> ceil(20 /11) = 2 batches
-    ]
-    assert tracker_cb.steps_per_epoch_log[0:5] == [7, 4, 3, 3, 2]
-
-    tracker_cb = TrackerCallback()
-    val_seq = RandomSequence(4)
-    train_seq = IncreaseBatchSizeRandomSequence(3, 30)
-    out = model.fit_generator(generator=train_seq,
-                              epochs=5,
-                              initial_epoch=0,
-                              validation_data=val_seq,
-                              validation_steps=3,
-                              workers=0,
-                              callbacks=[tracker_cb])
-    assert tracker_cb.trained_epochs == [0, 1, 2, 3, 4]
-    assert tracker_cb.trained_batches == [
-        0, 1, 2, 3, 4, 5, 6, 7, 8, 9,  # 1st epoch -> ceil(30 / 3) = 10 batches
-        0, 1, 2, 3, 4, 5,              # 2nd epoch -> ceil(30 / 5) =  6 batches
-        0, 1, 2, 3, 4,                 # 3d  epoch -> ceil(30 / 7) =  5 batches
-        0, 1, 2, 3,                    # 4th epoch -> ceil(30 / 9) =  4 batches
-        0, 1, 2,                       # 5th epoch -> ceil(30 /11) =  3 batches
-    ]
-    assert tracker_cb.steps_per_epoch_log[0:5] == [10, 6, 5, 4, 3]
-
-    tracker_cb = TrackerCallback()
-    val_seq = RandomSequence(4)
-    train_seq = IncreaseBatchSizeRandomSequence(2, 404, lambda x: x * 2)
-    out = model.fit_generator(generator=train_seq,
-                              epochs=5,
-                              initial_epoch=0,
-                              validation_data=val_seq,
-                              validation_steps=3,
-                              workers=0,
-                              callbacks=[tracker_cb])
-    assert tracker_cb.trained_epochs == [0, 1, 2, 3, 4]
-    # number of trained batches should match sum of steps per each epoch
-    assert len(tracker_cb.trained_batches) == 202 + 101 + 51 + 26 + 13
-    assert tracker_cb.steps_per_epoch_log[0:5] == [202, 101, 51, 26, 13]
-
-
-def test_fit_generator_shape():
-    # predict_generator output shape behavior should be consistent
-    def expected_shape(batch_size, n_batches):
-        return (batch_size * n_batches, 4), (batch_size * n_batches, 3)
-
-    model = get_model(num_outputs=2)
-    optimizer = 'rmsprop'
-    loss = 'mse'
-
-    # Multiple outputs and one step.
-    batch_size = 5
-    sequence_length = 1
-    shape_0, shape_1 = expected_shape(batch_size, sequence_length)
-    out = model.predict_generator(
-        RandomSequence(batch_size, sequence_length=sequence_length))
-    assert np.shape(out[0]) == shape_0 and np.shape(out[1]) == shape_1
-
-    out = model.predict(
-        RandomSequence(batch_size, sequence_length=sequence_length))
-    assert np.shape(out[0]) == shape_0 and np.shape(out[1]) == shape_1
-
-    # Multiple outputs and multiple steps.
-    batch_size = 5
-    sequence_length = 2
-    shape_0, shape_1 = expected_shape(batch_size, sequence_length)
-    out = model.predict_generator(
-        RandomSequence(batch_size, sequence_length=sequence_length))
-    assert np.shape(out[0]) == shape_0 and np.shape(out[1]) == shape_1
-
-    out = model.predict(
-        RandomSequence(batch_size, sequence_length=sequence_length))
-    assert np.shape(out[0]) == shape_0 and np.shape(out[1]) == shape_1
-
-    # Create a model with a single output.
-    single_output_model = get_model(num_outputs=1)
-    single_output_model.compile(optimizer, loss,
-                                metrics=[], sample_weight_mode=None)
-
-    # Single output and one step.
-    batch_size = 5
-    sequence_length = 1
-    shape_0, _ = expected_shape(batch_size, sequence_length)
-    out = single_output_model.predict_generator(
-        RandomSequence(batch_size, sequence_length=sequence_length))
-    assert np.shape(out) == shape_0
-
-    out = single_output_model.predict(
-        RandomSequence(batch_size, sequence_length=sequence_length))
-    assert np.shape(out) == shape_0
-
-    # Single output and multiple steps.
-    batch_size = 5
-    sequence_length = 2
-    shape_0, _ = expected_shape(batch_size, sequence_length)
-    out = single_output_model.predict_generator(
-        RandomSequence(batch_size, sequence_length=sequence_length))
-    assert np.shape(out) == shape_0
-
-    out = single_output_model.predict(
-        RandomSequence(batch_size, sequence_length=sequence_length))
-    assert np.shape(out) == shape_0
-
-
-def test_training_with_loss_instance():
-    a = Input(shape=(3,), name='input_a')
-    b = Input(shape=(3,), name='input_b')
-
-    dense = Dense(4, name='dense')
-    c = dense(a)
-    d = dense(b)
-    e = Dropout(0.5, name='dropout')(c)
-
-    model = Model([a, b], [d, e])
-    loss_weights = [1., 0.5]
-    model.compile(
-        'sgd',
-        loss=losses.MeanSquaredError(),
-        metrics=['mae'],
-        loss_weights=loss_weights)
-
-    input_a_np = np.random.random((10, 3))
-    input_b_np = np.random.random((10, 3))
-
-    output_d_np = np.random.random((10, 4))
-    output_e_np = np.random.random((10, 4))
-
-    model.fit([input_a_np, input_b_np], [output_d_np, output_e_np],
-              epochs=1,
-              batch_size=5)
-
-
-@pytest.mark.skipif(sys.version_info < (3,),
-                    reason='Cannot catch warnings in python 2')
-def DISABLED_test_warnings():
-    """This test hangs Travis."""
-    a = Input(shape=(3,), name='input_a')
-    b = Input(shape=(3,), name='input_b')
-
-    a_2 = Dense(4, name='dense_1')(a)
-    dp = Dropout(0.5, name='dropout')
-    b_2 = dp(b)
-
-    model = Model([a, b], [a_2, b_2])
-
-    optimizer = 'rmsprop'
-    loss = 'mse'
-    loss_weights = [1., 0.5]
-    model.compile(optimizer, loss, metrics=[], loss_weights=loss_weights,
-                  sample_weight_mode=None)
-
-    @threadsafe_generator
-    def gen_data(batch_sz):
-        while True:
-            yield ([np.random.random((batch_sz, 3)),
-                    np.random.random((batch_sz, 3))],
-                   [np.random.random((batch_sz, 4)),
-                    np.random.random((batch_sz, 3))])
-
-    with pytest.warns(Warning) as w:
-        out = model.fit_generator(gen_data(4),
-                                  steps_per_epoch=10,
-                                  use_multiprocessing=True,
-                                  workers=2)
-    warning_raised = any(['Sequence' in str(w_.message) for w_ in w])
-    assert warning_raised, 'No warning raised when using generator with processes.'
-
-    with pytest.warns(None) as w:
-        out = model.fit_generator(RandomSequence(3),
-                                  steps_per_epoch=4,
-                                  use_multiprocessing=True,
-                                  workers=2)
-    assert all(['Sequence' not in str(w_.message) for w_ in w]), (
-        'A warning was raised for Sequence.')
-
-
-@pytest.mark.skipif(K.backend() == 'tensorflow',
-                    reason='Must for for tf.keras to support sparse ops.')
-def test_sparse_inputs_targets():
-    test_inputs = [sparse.random(6, 3, density=0.25).tocsr() for _ in range(2)]
-    test_outputs = [sparse.random(6, i, density=0.25).tocsr() for i in range(3, 5)]
-    in1 = Input(shape=(3,))
-    in2 = Input(shape=(3,))
-    out1 = Dropout(0.5, name='dropout')(in1)
-    out2 = Dense(4, name='dense_1')(in2)
-    model = Model([in1, in2], [out1, out2])
-    model.predict(test_inputs, batch_size=2)
-    model.compile('rmsprop', 'mse')
-    model.fit(test_inputs, test_outputs,
-              epochs=1, batch_size=2, validation_split=0.5)
-    model.evaluate(test_inputs, test_outputs, batch_size=2)
-
-
-@pytest.mark.skipif(K.backend() != 'tensorflow',
-                    reason='sparse operations supported only by TensorFlow')
-def DISABLED_test_sparse_placeholder_fit():
-    """Must wait for tf.keras to support sparse operations."""
-    test_inputs = [sparse.random(6, 3, density=0.25).tocsr() for _ in range(2)]
-    test_outputs = [sparse.random(6, i, density=0.25).tocsr() for i in range(3, 5)]
-    in1 = Input(shape=(3,))
-    in2 = Input(shape=(3,), sparse=True)
-    out1 = Dropout(0.5, name='dropout')(in1)
-    out2 = Dense(4, name='dense_1')(in2)
-    model = Model([in1, in2], [out1, out2])
-    model.predict(test_inputs, batch_size=2)
-    model.compile('rmsprop', 'mse')
-    model.fit(test_inputs, test_outputs,
-              epochs=1, batch_size=2, validation_split=0.5)
-    model.evaluate(test_inputs, test_outputs, batch_size=2)
-
-
-def test_trainable_argument():
-    x = np.random.random((5, 3))
-    y = np.random.random((5, 2))
-
-    model = Sequential()
-    model.add(Dense(2, input_dim=3, trainable=False))
-    model.compile('rmsprop', 'mse')
-    out = model.predict(x)
-    model.train_on_batch(x, y)
-    out_2 = model.predict(x)
-    assert_allclose(out, out_2)
-
-    # test with nesting
-    inputs = Input(shape=(3,))
-    outputs = model(inputs)
-    model = Model(inputs, outputs)
-    model.compile('rmsprop', 'mse')
-    out = model.predict(x)
-    model.train_on_batch(x, y)
-    out_2 = model.predict(x)
-    assert_allclose(out, out_2)
-
-
-def DISABLED_test_with_list_as_targets():
-    model = Sequential()
-    model.add(Dense(1, input_dim=3, trainable=False))
-    model.compile('rmsprop', 'mse')
-
-    x = np.random.random((2, 3))
-    y = [0, 1]
-    model.train_on_batch(x, y)
-
-
-def DISABLED_test_check_not_failing():
-    a = np.random.random((2, 1, 3))
-    training_utils.check_loss_and_target_compatibility(
-        [a], [losses.categorical_crossentropy], [a.shape])
-    training_utils.check_loss_and_target_compatibility(
-        [a], [losses.categorical_crossentropy], [(2, None, 3)])
-
-
-def DISABLED_test_check_last_is_one():
-    a = np.random.random((2, 3, 1))
-    with pytest.raises(ValueError,
-                       match='You are passing a target array'):
-        training_utils.check_loss_and_target_compatibility(
-            [a], [losses.CategoricalCrossentropy()], [a.shape])
-
-
-def DISABLED_test_check_bad_shape():
-    a = np.random.random((2, 3, 5))
-    with pytest.raises(ValueError,
-                       match='targets to have the same shape'):
-        training_utils.check_loss_and_target_compatibility(
-            [a], [losses.CategoricalCrossentropy()], [(2, 3, 6)])
-
-
-@pytest.mark.skipif(K.backend() != 'tensorflow',
-                    reason='Requires TensorFlow backend')
-def DISABLED_test_model_with_input_feed_tensor():
-    """We test building a model with a TF variable as input.
-    We should be able to call fit, evaluate, predict,
-    by only passing them data for the placeholder inputs
-    in the model.
-    """
-    import tensorflow as tf
-
-    input_a_np = np.random.random((10, 3))
-    input_b_np = np.random.random((10, 3))
-
-    output_a_np = np.random.random((10, 4))
-    output_b_np = np.random.random((10, 3))
-
-    a = Input(tensor=tf.Variable(input_a_np, dtype=tf.float32))
-    b = Input(shape=(3,), name='input_b')
-
-    a_2 = Dense(4, name='dense_1')(a)
-    dp = Dropout(0.5, name='dropout')
-    b_2 = dp(b)
-
-    model = Model([a, b], [a_2, b_2])
-    model.summary()
-
-    optimizer = 'rmsprop'
-    loss = 'mse'
-    loss_weights = [1., 0.5]
-    model.compile(optimizer, loss, metrics=['mean_squared_error'],
-                  loss_weights=loss_weights,
-                  sample_weight_mode=None)
-
-    # test train_on_batch
-    out = model.train_on_batch(input_b_np,
-                               [output_a_np, output_b_np])
-    out = model.train_on_batch({'input_b': input_b_np},
-                               [output_a_np, output_b_np])
-    out = model.test_on_batch({'input_b': input_b_np},
-                              [output_a_np, output_b_np])
-    out = model.predict_on_batch({'input_b': input_b_np})
-
-    # test fit
-    out = model.fit({'input_b': input_b_np},
-                    [output_a_np, output_b_np], epochs=1, batch_size=10)
-    out = model.fit(input_b_np,
-                    [output_a_np, output_b_np], epochs=1, batch_size=10)
-
-    # test evaluate
-    out = model.evaluate({'input_b': input_b_np},
-                         [output_a_np, output_b_np], batch_size=10)
-    out = model.evaluate(input_b_np,
-                         [output_a_np, output_b_np], batch_size=10)
-
-    # test predict
-    out = model.predict({'input_b': input_b_np}, batch_size=10)
-    out = model.predict(input_b_np, batch_size=10)
-    assert len(out) == 2
-
-    # Now test a model with a single input
-    # i.e. we don't pass any data to fit the model.
-    a = Input(tensor=tf.Variable(input_a_np, dtype=tf.float32))
-    a_2 = Dense(4, name='dense_1')(a)
-    a_2 = Dropout(0.5, name='dropout')(a_2)
-    model = Model(a, a_2)
-    model.summary()
-
-    optimizer = 'rmsprop'
-    loss = 'mse'
-    model.compile(optimizer, loss, metrics=['mean_squared_error'])
-
-    # test train_on_batch
-    out = model.train_on_batch(None,
-                               output_a_np)
-    out = model.train_on_batch(None,
-                               output_a_np)
-    out = model.test_on_batch(None,
-                              output_a_np)
-    out = model.predict_on_batch(None)
-    out = model.train_on_batch([],
-                               output_a_np)
-    out = model.train_on_batch({},
-                               output_a_np)
-
-    # test fit
-    out = model.fit(None,
-                    output_a_np, epochs=1, batch_size=10)
-    out = model.fit(None,
-                    output_a_np, epochs=1, batch_size=10)
-
-    # test evaluate
-    out = model.evaluate(None,
-                         output_a_np, batch_size=10)
-    out = model.evaluate(None,
-                         output_a_np, batch_size=10)
-
-    # test predict
-    out = model.predict(None, steps=3)
-    out = model.predict(None, steps=3)
-    assert out.shape == (10 * 3, 4)
-
-    # Same, without learning phase
-    # i.e. we don't pass any data to fit the model.
-    a = Input(tensor=tf.Variable(input_a_np, dtype=tf.float32))
-    a_2 = Dense(4, name='dense_1')(a)
-    model = Model(a, a_2)
-    model.summary()
-
-    optimizer = 'rmsprop'
-    loss = 'mse'
-    model.compile(optimizer, loss, metrics=['mean_squared_error'])
-
-    # test train_on_batch
-    out = model.train_on_batch(None,
-                               output_a_np)
-    out = model.train_on_batch(None,
-                               output_a_np)
-    out = model.test_on_batch(None,
-                              output_a_np)
-    out = model.predict_on_batch(None)
-    out = model.train_on_batch([],
-                               output_a_np)
-    out = model.train_on_batch({},
-                               output_a_np)
-
-    # test fit
-    out = model.fit(None,
-                    output_a_np, epochs=1, batch_size=10)
-    out = model.fit(None,
-                    output_a_np, epochs=1, batch_size=10)
-
-    # test evaluate
-    out = model.evaluate(None,
-                         output_a_np, batch_size=10)
-    out = model.evaluate(None,
-                         output_a_np, batch_size=10)
-
-    # test predict
-    out = model.predict(None, steps=3)
-    out = model.predict(None, steps=3)
-    assert out.shape == (10 * 3, 4)
-
-
-def DISABLED_test_model_with_partial_loss():
-    a = Input(shape=(3,), name='input_a')
-    a_2 = Dense(4, name='dense_1')(a)
-    dp = Dropout(0.5, name='dropout')
-    a_3 = dp(a_2)
-    model = Model(a, [a_2, a_3])
-
-    optimizer = 'rmsprop'
-    loss = {'dropout': 'mse'}
-    model.compile(optimizer, loss, metrics=['mae'])
-
-    input_a_np = np.random.random((10, 3))
-    output_a_np = np.random.random((10, 4))
-
-    # test train_on_batch
-    out = model.train_on_batch(input_a_np, output_a_np)
-    out = model.test_on_batch(input_a_np, output_a_np)
-    # fit
-    out = model.fit(input_a_np, output_a_np)
-    # evaluate
-    out = model.evaluate(input_a_np, output_a_np)
-
-    # Same without dropout.
-    a = Input(shape=(3,), name='input_a')
-    a_2 = Dense(4, name='dense_1')(a)
-    a_3 = Dense(4, name='dense_2')(a_2)
-    model = Model(a, [a_2, a_3])
-
-    optimizer = 'rmsprop'
-    loss = {'dense_2': 'mse'}
-    model.compile(optimizer, loss, metrics={'dense_1': 'mae'})
-
-    # test train_on_batch
-    out = model.train_on_batch(input_a_np, output_a_np)
-    out = model.test_on_batch(input_a_np, output_a_np)
-    # fit
-    out = model.fit(input_a_np, output_a_np)
-    # evaluate
-    out = model.evaluate(input_a_np, output_a_np)
-
-
-@pytest.mark.skipif((K.backend() == 'cntk'),
-                    reason='cntk does not support external loss yet')
-def DISABLED_test_model_with_external_loss():
-    # None loss, only regularization loss.
-    a = Input(shape=(3,), name='input_a')
-    a_2 = Dense(4, name='dense_1',
-                kernel_regularizer='l1',
-                bias_regularizer='l2')(a)
-    dp = Dropout(0.5, name='dropout')
-    a_3 = dp(a_2)
-
-    model = Model(a, [a_2, a_3])
-
-    optimizer = 'rmsprop'
-    loss = None
-    model.compile(optimizer, loss, metrics=['mae'])
-
-    input_a_np = np.random.random((10, 3))
-
-    # test train_on_batch
-    out = model.train_on_batch(input_a_np, None)
-    out = model.test_on_batch(input_a_np, None)
-    # fit
-    out = model.fit(input_a_np, None)
-    # evaluate
-    out = model.evaluate(input_a_np, None)
-
-    # No dropout, external loss.
-    a = Input(shape=(3,), name='input_a')
-    a_2 = Dense(4, name='dense_1')(a)
-    a_3 = Dense(4, name='dense_2')(a)
-
-    model = Model(a, [a_2, a_3])
-    model.add_loss(K.mean(a_3 + a_2))
-
-    optimizer = 'rmsprop'
-    loss = None
-    model.compile(optimizer, loss, metrics=['mae'])
-
-    # test train_on_batch
-    out = model.train_on_batch(input_a_np, None)
-    out = model.test_on_batch(input_a_np, None)
-    # fit
-    out = model.fit(input_a_np, None)
-    # evaluate
-    out = model.evaluate(input_a_np, None)
-
-    # Test fit with no external data at all.
-    if K.backend() == 'tensorflow':
-        import tensorflow as tf
-
-        a = Input(tensor=tf.Variable(input_a_np, dtype=tf.float32))
-        a_2 = Dense(4, name='dense_1')(a)
-        a_2 = Dropout(0.5, name='dropout')(a_2)
-        model = Model(a, a_2)
-        model.add_loss(K.mean(a_2))
-
-        model.compile(optimizer='rmsprop',
-                      loss=None,
-                      metrics=['mean_squared_error'])
-
-        # test train_on_batch
-        out = model.train_on_batch(None, None)
-        out = model.test_on_batch(None, None)
-        out = model.predict_on_batch(None)
-
-        # test fit
-        with pytest.raises(ValueError):
-            out = model.fit(None, None, epochs=1, batch_size=10)
-        out = model.fit(None, None, epochs=1, steps_per_epoch=1)
-
-        # define a generator to produce x=None and y=None
-        @threadsafe_generator
-        def data_tensors_generator():
-            while True:
-                yield (None, None)
-
-        generator = data_tensors_generator()
-
-        # test fit_generator for framework-native data tensors
-        out = model.fit_generator(generator, epochs=1,
-                                  steps_per_epoch=3)
-
-        # test evaluate_generator for framework-native data tensors
-        out = model.evaluate_generator(generator, steps=3)
-        out = model.evaluate(generator, steps=3)
-
-        # test fit with validation data
-        with pytest.raises(ValueError):
-            out = model.fit(None, None,
-                            epochs=1,
-                            steps_per_epoch=None,
-                            validation_steps=2)
-        out = model.fit(None, None,
-                        epochs=1,
-                        steps_per_epoch=2,
-                        validation_steps=2)
-
-        # test evaluate
-        with pytest.raises(ValueError):
-            out = model.evaluate(None, None, batch_size=10)
-        out = model.evaluate(None, None, steps=3)
-
-        # test predict
-        with pytest.raises(ValueError):
-            out = model.predict(None, batch_size=10)
-        out = model.predict(None, steps=3)
-        assert out.shape == (10 * 3, 4)
-
-        # Test multi-output model without external data.
-        a = Input(tensor=tf.Variable(input_a_np, dtype=tf.float32))
-        a_1 = Dense(4, name='dense_1')(a)
-        a_2 = Dropout(0.5, name='dropout')(a_1)
-        model = Model(a, [a_1, a_2])
-        model.add_loss(K.mean(a_2))
-        model.compile(optimizer='rmsprop',
-                      loss=None,
-                      metrics=['mean_squared_error'])
-
-        # test train_on_batch
-        out = model.train_on_batch(None, None)
-        out = model.test_on_batch(None, None)
-        out = model.predict_on_batch(None)
-
-        # test fit
-        with pytest.raises(ValueError):
-            out = model.fit(None, None, epochs=1, batch_size=10)
-        out = model.fit(None, None, epochs=1, steps_per_epoch=1)
-
-        # test fit with validation data
-        with pytest.raises(ValueError):
-            out = model.fit(None, None,
-                            epochs=1,
-                            steps_per_epoch=None,
-                            validation_steps=2)
-        out = model.fit(None, None,
-                        epochs=1,
-                        steps_per_epoch=2,
-                        validation_steps=2)
-
-        # test evaluate
-        with pytest.raises(ValueError):
-            out = model.evaluate(None, None, batch_size=10)
-        out = model.evaluate(None, None, steps=3)
-
-        # test predict
-        with pytest.raises(ValueError):
-            out = model.predict(None, batch_size=10)
-        out = model.predict(None, steps=3)
-        assert len(out) == 2
-        assert out[0].shape == (10 * 3, 4)
-        assert out[1].shape == (10 * 3, 4)
-
-
-def DISABLED_test_target_tensors():
-    # single-output, as list
-    model = keras.models.Sequential()
-    model.add(keras.layers.Dense(4, input_shape=(4,), name='dense'))
-    input_val = np.random.random((10, 4))
-    target_val = np.random.random((10, 4))
-    target = keras.backend.variable(target_val)
-    model.compile(optimizer='rmsprop', loss='mse', target_tensors=[target])
-    model.train_on_batch(input_val, None)
-
-    # single-output, as dict
-    model.compile(optimizer='rmsprop', loss='mse',
-                  target_tensors={'dense': target})
-    model.train_on_batch(input_val, None)
-
-    # single-output, as tensor
-    model.compile(optimizer='rmsprop', loss='mse',
-                  target_tensors=target)
-    model.train_on_batch(input_val, None)
-
-    # test invalid arguments
-    with pytest.raises(TypeError):
-        model.compile(optimizer='rmsprop', loss='mse',
-                      target_tensors=set())
-    with pytest.raises(ValueError):
-        model.compile(optimizer='rmsprop', loss='mse',
-                      target_tensors=[target, target])
-    with pytest.raises(ValueError):
-        model.compile(optimizer='rmsprop', loss='mse',
-                      target_tensors={'dense2': None})
-    with pytest.raises(ValueError):
-        model.compile(optimizer='rmsprop', loss='mse',
-                      target_tensors=[target])
-        model.train_on_batch(input_val, target_val)
-
-    # multi-output, as list
-    input_val = np.random.random((10, 4))
-    target_val_a = np.random.random((10, 4))
-    target_val_b = np.random.random((10, 4))
-    target_a = keras.backend.variable(target_val_a)
-    target_b = keras.backend.variable(target_val_b)
-
-    inputs = keras.layers.Input(shape=(4,))
-    output_a = keras.layers.Dense(4, name='dense_a')(inputs)
-    output_b = keras.layers.Dense(4, name='dense_b')(inputs)
-    model = keras.models.Model(inputs, [output_a, output_b])
-    model.compile(optimizer='rmsprop', loss='mse',
-                  target_tensors=[target_a, target_b])
-    model.train_on_batch(input_val, None)
-
-    # multi-output, as dict
-    model.compile(optimizer='rmsprop', loss='mse',
-                  target_tensors={'dense_a': target_a,
-                                  'dense_b': target_b})
-    model.train_on_batch(input_val, None)
-
-    # multi-output, not enough target tensors when `target_tensors` is not a dict
-    with pytest.raises(ValueError,
-                       match='When passing a list as `target_tensors`, it should '
-                             'have one entry per model output. The model has \\d '
-                             'outputs, but you passed target_tensors='):
-        model.compile(optimizer='rmsprop', loss='mse',
-                      target_tensors=[target_a])
-    with pytest.raises(ValueError,
-                       match='The model has \\d outputs, but you passed a single '
-                             'tensor as `target_tensors`. Expected a list or '
-                             'a dict of tensors.'):
-        model.compile(optimizer='rmsprop', loss='mse',
-                      target_tensors=target_a)
-
-    # test with sample weights
-    model.compile(optimizer='rmsprop', loss='mse',
-                  target_tensors=[target_a, target_b])
-    model.train_on_batch(input_val, None,
-                         sample_weight={'dense_a': np.random.random((10,))})
-
-
-@pytest.mark.skipif(K.backend() == 'tensorflow' and
-                    tf.__version__.startswith('2'),
-                    reason='Cannot have tensors as dict keys in TF2')
-def DISABLED_test_model_custom_target_tensors():
-    a = Input(shape=(3,), name='input_a')
-    b = Input(shape=(3,), name='input_b')
-
-    a_2 = Dense(4, name='dense_1')(a)
-    dp = Dropout(0.5, name='dropout')
-    b_2 = dp(b)
-
-    y = K.placeholder([10, 4], name='y')
-    y1 = K.placeholder([10, 3], name='y1')
-    y2 = K.placeholder([7, 5], name='y2')
-    model = Model([a, b], [a_2, b_2])
-
-    optimizer = 'rmsprop'
-    loss = 'mse'
-    loss_weights = [1., 0.5]
-
-    # test list of target tensors
-    with pytest.raises(ValueError):
-        model.compile(optimizer, loss, metrics=[], loss_weights=loss_weights,
-                      sample_weight_mode=None, target_tensors=[y, y1, y2])
-    model.compile(optimizer, loss, metrics=[], loss_weights=loss_weights,
-                  sample_weight_mode=None, target_tensors=[y, y1])
-    input_a_np = np.random.random((10, 3))
-    input_b_np = np.random.random((10, 3))
-
-    output_a_np = np.random.random((10, 4))
-    output_b_np = np.random.random((10, 3))
-
-    out = model.train_on_batch([input_a_np, input_b_np],
-                               [output_a_np, output_b_np],
-                               {y: np.random.random((10, 4)),
-                                y1: np.random.random((10, 3))})
-    # test dictionary of target_tensors
-    with pytest.raises(ValueError):
-        model.compile(optimizer, loss,
-                      metrics=[],
-                      loss_weights=loss_weights,
-                      sample_weight_mode=None,
-                      target_tensors={'does_not_exist': y2})
-    # test dictionary of target_tensors
-    model.compile(optimizer, loss,
-                  metrics=[],
-                  loss_weights=loss_weights,
-                  sample_weight_mode=None,
-                  target_tensors={'dense_1': y, 'dropout': y1})
-    out = model.train_on_batch([input_a_np, input_b_np],
-                               [output_a_np, output_b_np],
-                               {y: np.random.random((10, 4)),
-                                y1: np.random.random((10, 3))})
-
-    # test with custom placeholder as target
-    pl_target_a = K.placeholder(shape=(None, 4))
-    model.compile(optimizer='rmsprop', loss='mse',
-                  target_tensors={'dense_1': pl_target_a})
-    model.train_on_batch([input_a_np, input_b_np],
-                         [output_a_np, output_b_np])
-
-
-@pytest.mark.skipif(sys.version_info < (3,),
-                    reason='Cannot catch warnings in python 2')
-def DISABLED_test_trainable_weights_count_consistency():
-    """Tests the trainable weights consistency check of Model.
-
-    This verifies that a warning is shown if model.trainable is modified
-    and the model is summarized/run without a new call to .compile()
-
-    Reproduce issue #8121
-    """
-    a = Input(shape=(3,), name='input_a')
-    model1 = Model(inputs=a, outputs=Dense(1)(a))
-
-    model1.trainable = False
-    b = Input(shape=(3,), name='input_b')
-    y = model1(b)
-    model2 = Model(inputs=b, outputs=Dense(1)(y))
-
-    model2.compile(optimizer='adam', loss='mse')
-
-    model1.trainable = True
-
-    # Should warn on .summary()
-    with pytest.warns(UserWarning) as w:
-        model2.summary()
-    warning_raised = any(['Discrepancy' in str(w_.message) for w_ in w])
-    assert warning_raised, (
-        'No warning raised when trainable is modified without .compile.')
-
-    # And on .fit()
-    with pytest.warns(UserWarning) as w:
-        model2.fit(x=np.zeros((5, 3)), y=np.zeros((5, 1)))
-    warning_raised = any(['Discrepancy' in str(w_.message) for w_ in w])
-    assert warning_raised, (
-        'No warning raised when trainable is modified without .compile.')
-
-    # And shouldn't warn if we recompile
-    model2.compile(optimizer='adam', loss='mse')
-    with pytest.warns(None) as w:
-        model2.summary()
-    assert len(w) == 0, (
-        'Warning raised even when .compile() is called after modifying .trainable')
-
-
-def test_pandas_dataframe():
-    input_a = Input(shape=(3,), name='input_a')
-    input_b = Input(shape=(3,), name='input_b')
-
-    x = Dense(4, name='dense_1')(input_a)
-    y = Dense(3, name='desne_2')(input_b)
-
-    model_1 = Model(inputs=input_a, outputs=x)
-    model_2 = Model(inputs=[input_a, input_b], outputs=[x, y])
-
-    optimizer = 'rmsprop'
-    loss = 'mse'
-
-    model_1.compile(optimizer=optimizer, loss=loss)
-    model_2.compile(optimizer=optimizer, loss=loss)
-
-    input_a_df = pd.DataFrame(np.random.random((10, 3)))
-    input_b_df = pd.DataFrame(np.random.random((10, 3)))
-
-    output_a_df = pd.DataFrame(np.random.random((10, 4)))
-    output_b_df = pd.DataFrame(np.random.random((10, 3)))
-
-    model_1.fit(input_a_df,
-                output_a_df)
-    model_2.fit([input_a_df, input_b_df],
-                [output_a_df, output_b_df])
-    model_1.fit([input_a_df],
-                [output_a_df])
-    model_1.fit({'input_a': input_a_df},
-                output_a_df)
-    model_2.fit({'input_a': input_a_df, 'input_b': input_b_df},
-                [output_a_df, output_b_df])
-
-    model_1.predict(input_a_df)
-    model_2.predict([input_a_df, input_b_df])
-    model_1.predict([input_a_df])
-    model_1.predict({'input_a': input_a_df})
-    model_2.predict({'input_a': input_a_df, 'input_b': input_b_df})
-
-    model_1.predict_on_batch(input_a_df)
-    model_2.predict_on_batch([input_a_df, input_b_df])
-    model_1.predict_on_batch([input_a_df])
-    model_1.predict_on_batch({'input_a': input_a_df})
-    model_2.predict_on_batch({'input_a': input_a_df, 'input_b': input_b_df})
-
-    model_1.evaluate(input_a_df,
-                     output_a_df)
-    model_2.evaluate([input_a_df, input_b_df],
-                     [output_a_df, output_b_df])
-    model_1.evaluate([input_a_df],
-                     [output_a_df])
-    model_1.evaluate({'input_a': input_a_df},
-                     output_a_df)
-    model_2.evaluate({'input_a': input_a_df, 'input_b': input_b_df},
-                     [output_a_df, output_b_df])
-
-    model_1.train_on_batch(input_a_df,
-                           output_a_df)
-    model_2.train_on_batch([input_a_df, input_b_df],
-                           [output_a_df, output_b_df])
-    model_1.train_on_batch([input_a_df],
-                           [output_a_df])
-    model_1.train_on_batch({'input_a': input_a_df},
-                           output_a_df)
-    model_2.train_on_batch({'input_a': input_a_df, 'input_b': input_b_df},
-                           [output_a_df, output_b_df])
-
-    model_1.test_on_batch(input_a_df,
-                          output_a_df)
-    model_2.test_on_batch([input_a_df, input_b_df],
-                          [output_a_df, output_b_df])
-    model_1.test_on_batch([input_a_df],
-                          [output_a_df])
-    model_1.test_on_batch({'input_a': input_a_df},
-                          output_a_df)
-    model_2.test_on_batch({'input_a': input_a_df, 'input_b': input_b_df},
-                          [output_a_df, output_b_df])
-
-
-@pytest.mark.skipif(K.backend() != 'tensorflow', reason='Requires TensorFlow')
-def DISABLED_test_training_and_eval_methods_on_backend_tensors_single_io():
-    x = keras.layers.Input(shape=(3,), name='input')
-    y = keras.layers.Dense(4, name='dense')(x)
-    model = keras.Model(x, y)
-
-    optimizer = 'rmsprop'
-    loss = 'mse'
-    metrics = ['mae']
-    model.compile(optimizer, loss, metrics=metrics)
-
-    inputs = keras.backend.zeros(shape=(10, 3))
-    targets = keras.backend.zeros(shape=(10, 4))
-
-    model.fit(inputs, targets, epochs=1, steps_per_epoch=2, verbose=0)
-    model.evaluate(inputs, targets, steps=2, verbose=0)
-    model.predict(inputs, steps=2)
-    model.train_on_batch(inputs, targets)
-    model.test_on_batch(inputs, targets)
-    model.fit(inputs, targets,
-              epochs=1, steps_per_epoch=2, verbose=1,
-              validation_data=(inputs, targets), validation_steps=2)
-
-
-@pytest.mark.skipif(K.backend() != 'tensorflow', reason='Requires TensorFlow')
-def DISABLED_test_training_and_eval_methods_on_backend_tensors_multi_io():
-    a = keras.layers.Input(shape=(3,), name='input_a')
-    b = keras.layers.Input(shape=(3,), name='input_b')
-
-    dense = keras.layers.Dense(4, name='dense')
-    c = dense(a)
-    d = dense(b)
-    e = keras.layers.Dropout(0.5, name='dropout')(c)
-
-    model = keras.models.Model([a, b], [d, e])
-
-    optimizer = 'rmsprop'
-    loss = 'mse'
-    loss_weights = [1., 0.5]
-    metrics = ['mae']
-    model.compile(optimizer, loss, metrics=metrics, loss_weights=loss_weights)
-
-    input_a_tf = keras.backend.zeros(shape=(10, 3))
-    input_b_tf = keras.backend.zeros(shape=(10, 3))
-
-    output_d_tf = keras.backend.zeros(shape=(10, 4))
-    output_e_tf = keras.backend.zeros(shape=(10, 4))
-
-    model.fit(
-        [input_a_tf, input_b_tf], [output_d_tf, output_e_tf],
-        epochs=1,
-        steps_per_epoch=2,
-        verbose=0)
-    with pytest.raises(ValueError,
-                       match='should specify the `steps_per_epoch`'):
-        model.fit(
-            [input_a_tf, input_b_tf], [output_d_tf, output_e_tf],
-            epochs=1,
-            batch_size=5,
-            verbose=0)
-
-    model.train_on_batch([input_a_tf, input_b_tf], [output_d_tf, output_e_tf])
-
-    # Test with dictionary inputs
-    model.fit(
-        {'input_a': input_a_tf,
-         'input_b': input_b_tf},
-        {'dense': output_d_tf,
-         'dropout': output_e_tf},
-        epochs=1,
-        steps_per_epoch=2,
-        verbose=0)
-    model.fit(
-        {'input_a': input_a_tf,
-         'input_b': input_b_tf},
-        {'dense': output_d_tf,
-         'dropout': output_e_tf},
-        validation_data=({'input_a': input_a_tf,
-                          'input_b': input_b_tf},
-                         {'dense': output_d_tf,
-                          'dropout': output_e_tf}),
-        epochs=1,
-        steps_per_epoch=2,
-        validation_steps=2,
-        verbose=0)
-    model.train_on_batch(
-        {'input_a': input_a_tf,
-         'input_b': input_b_tf},
-        {'dense': output_d_tf,
-         'dropout': output_e_tf})
-
-    # Test with validation data
-    model.fit(
-        [input_a_tf, input_b_tf], [output_d_tf, output_e_tf],
-        validation_data=([input_a_tf, input_b_tf],
-                         [output_d_tf, output_e_tf]),
-        epochs=1,
-        steps_per_epoch=2,
-        validation_steps=2,
-        verbose=0)
-    # Test with validation split
-    with pytest.raises(ValueError,
-                       match='you cannot use `validation_split`'):
-        model.fit(
-            [input_a_tf, input_b_tf], [output_d_tf, output_e_tf],
-            epochs=2,
-            steps_per_epoch=2,
-            verbose=0,
-            validation_split=0.2,
-            validation_steps=2)
-
-    # Test evaluation / prediction methods
-    model.evaluate([input_a_tf, input_b_tf], [output_d_tf, output_e_tf],
-                   steps=2, verbose=0)
-    model.predict([input_a_tf, input_b_tf], steps=2)
-    model.test_on_batch([input_a_tf, input_b_tf], [output_d_tf, output_e_tf])
-
-
-def DISABLED_test_model_with_crossentropy_losses_channels_first():
-    """Tests use of all crossentropy losses with `channels_first`.
-
-    Tests `sparse_categorical_crossentropy`, `categorical_crossentropy`,
-    and `binary_crossentropy`.
-    Verifies that evaluate gives the same result with either
-    `channels_first` or `channels_last` image_data_format.
-    Tests PR #9715.
-    """
-
-    def prepare_simple_model(input_tensor, loss_name, target):
-        axis = 1 if K.image_data_format() == 'channels_first' else -1
-        if loss_name == 'sparse_categorical_crossentropy':
-            loss = lambda y_true, y_pred: K.sparse_categorical_crossentropy(
-                y_true, y_pred, axis=axis)
-            num_channels = np.amax(target) + 1
-            activation = 'softmax'
-        elif loss_name == 'categorical_crossentropy':
-            loss = lambda y_true, y_pred: K.categorical_crossentropy(
-                y_true, y_pred, axis=axis)
-            num_channels = target.shape[axis]
-            activation = 'softmax'
-        elif loss_name == 'binary_crossentropy':
-            loss = lambda y_true, y_pred: K.binary_crossentropy(y_true, y_pred)
-            num_channels = target.shape[axis]
-            activation = 'sigmoid'
-        predictions = Conv2D(num_channels, 1, activation=activation,
-                             kernel_initializer='ones',
-                             bias_initializer='ones')(input_tensor)
-        simple_model = Model(inputs=input_tensor, outputs=predictions)
-        simple_model.compile(optimizer='rmsprop', loss=loss)
-        return simple_model
-
-    losses_to_test = ['sparse_categorical_crossentropy',
-                      'categorical_crossentropy', 'binary_crossentropy']
-
-    data_channels_first = np.array([[[[8., 7.1, 0.], [4.5, 2.6, 0.55],
-                                      [0.9, 4.2, 11.2]]]], dtype=np.float32)
-    # Labels for testing 4-class sparse_categorical_crossentropy, 4-class
-    # categorical_crossentropy, and 2-class binary_crossentropy:
-    labels_channels_first = [np.array([[[[0, 1, 3], [2, 1, 0], [2, 2, 1]]]]),
-                             np.array([[[[0, 1, 0], [0, 1, 0], [0, 0, 0]],
-                                        [[1, 0, 0], [0, 0, 1], [0, 1, 0]],
-                                        [[0, 0, 0], [1, 0, 0], [0, 0, 1]],
-                                        [[0, 0, 1], [0, 0, 0], [1, 0, 0]]]]),
-                             np.array([[[[0, 1, 0], [0, 1, 0], [0, 0, 1]],
-                                        [[1, 0, 1], [1, 0, 1], [1, 1, 0]]]])]
-    # Compute one loss for each loss function in the list `losses_to_test`:
-    loss_channels_last = [0., 0., 0.]
-    loss_channels_first = [0., 0., 0.]
-
-    old_data_format = K.image_data_format()
-
-    # Evaluate a simple network with channels last, with all three loss
-    # functions:
-    K.set_image_data_format('channels_last')
-    data = np.moveaxis(data_channels_first, 1, -1)
-    for index, loss_function in enumerate(losses_to_test):
-        labels = np.moveaxis(labels_channels_first[index], 1, -1)
-        inputs = Input(shape=(3, 3, 1))
-        model = prepare_simple_model(inputs, loss_function, labels)
-        loss_channels_last[index] = model.evaluate(x=data, y=labels,
-                                                   batch_size=1, verbose=0)
-
-    # Evaluate the same network with channels first, with all three loss
-    # functions:
-    K.set_image_data_format('channels_first')
-    assert K.image_data_format() == 'channels_first'
-    data = data_channels_first
-    for index, loss_function in enumerate(losses_to_test):
-        labels = labels_channels_first[index]
-        inputs = Input(shape=(1, 3, 3))
-        model = prepare_simple_model(inputs, loss_function, labels)
-        loss_channels_first[index] = model.evaluate(x=data, y=labels,
-                                                    batch_size=1, verbose=0)
-
-    K.set_image_data_format(old_data_format)
-
-    assert_allclose(loss_channels_first, loss_channels_last,
-                    err_msg='{}{}'.format('Computed different losses for ',
-                                          'channels_first and channels_last.'))
-
-
-def DISABLED_test_dynamic_set_inputs():
-    model = Sequential()
-    model.add(Dense(16, input_dim=32))
-    model.add(Activation('relu'))
-
-    model2 = Sequential()
-    model2.add(model.layers[-1])
-    model2.add(Dense(8))
-    preds2 = model2.predict(np.random.random((1, 32)))
-    assert preds2.shape == (1, 8)
-
-    model3 = Model(inputs=model.inputs, outputs=model.outputs)
-    with pytest.raises(ValueError):
-        model3._set_inputs(model.inputs)
-
-    model3.inputs = None
-    model3._set_inputs(model.inputs)
-    preds3 = model3.predict(np.random.random((1, 32)))
-    assert preds3.shape == (1, 16)
-
-    model3.inputs = None
-    model3._set_inputs(model.input)
-    preds3 = model3.predict(np.random.random((1, 32)))
-    assert preds3.shape == (1, 16)
-
-    aux_input = Input(shape=(5,), name='aux_input')
-    aux_model = Dense(3)(aux_input)
-    model4 = Model(inputs=model.inputs + [aux_input],
-                   outputs=Concatenate()(model.outputs + [aux_model]))
-    model4.inputs = None
-    model4._set_inputs(model.inputs + [aux_input])
-    preds4 = model4.predict([np.random.random((1, 32)),
-                             np.random.random((1, 5))])
-    assert preds4.shape == (1, 19)
-
-
-def DISABLED_test_sample_weights():
-    y = np.array([0, 1, 0, 0, 2])
-    sample_weights = np.array([0.5, 1., 1., 0., 2.])
-    class_weights = {0: 0.5, 1: 1., 2: 1.5}
-
-    # Only `sample_weights`.
-    weights = training_utils.standardize_weights(y, sample_weights)
-    assert np.allclose(weights, sample_weights)
-
-    # Only `class_weights`.
-    weights = training_utils.standardize_weights(y, class_weight=class_weights)
-    assert np.allclose(weights, np.array([0.5, 1., 0.5, 0.5, 1.5]))
-
-    # Both 'sample_weights` and 'class_weights`.
-    weights = training_utils.standardize_weights(y, sample_weights,
-                                                 class_weights)
-    expected = sample_weights * np.array([0.5, 1., 0.5, 0.5, 1.5])
-    assert np.allclose(weights, expected)
-
-
-def test_validation_freq():
-    model = Sequential([Dense(1)])
-    model.compile('sgd', 'mse')
-
-    def _gen():
-        while True:
-            yield np.ones((2, 10)), np.ones((2, 1))
-
-    x, y = np.ones((10, 10)), np.ones((10, 1))
-
-    class ValCounter(Callback):
-
-        def __init__(self):
-            self.val_runs = 0
-
-        def on_test_begin(self, logs=None):
-            self.val_runs += 1
-
-    # Test in training_arrays.py
-    val_counter = ValCounter()
-    model.fit(
-        x,
-        y,
-        batch_size=2,
-        epochs=4,
-        validation_data=(x, y),
-        validation_freq=2,
-        callbacks=[val_counter])
-    assert val_counter.val_runs == 2
-
-    # Test in training_generator.py
-    val_counter = ValCounter()
-    model.fit_generator(
-        _gen(),
-        epochs=4,
-        steps_per_epoch=5,
-        validation_data=(x, y),
-        validation_freq=[4, 2, 2, 1],
-        callbacks=[val_counter])
-    assert val_counter.val_runs == 3
-
-
-def test_loss_correctness():
-    class Bias(Layer):
-
-        def build(self, input_shape):
-            self.bias = self.add_weight('bias', (1,), initializer='zeros')
-
-        def call(self, inputs):
-            return inputs + self.bias
-
-    inp = Input(shape=(1,))
-    out = Bias()(inp)
-    model = Model(inp, out)
-    model.compile(
-        keras.optimizers.SGD(lr=0.1),
-        loss=keras.losses.MeanAbsoluteError())
-
-    x = np.array([[0.], [1.], [2.]])
-    y = np.array([[0.5], [2.], [3.5]])
-    history = model.fit(x, y, batch_size=3, epochs=5)
-    np.allclose(history.history['loss'], [1., 0.9, 0.8, 0.7, 0.6])
-
-
-def DISABLED_test_model_metrics_list():
-
-    class LayerWithAddMetric(Layer):
-
-        def __init__(self):
-            super(LayerWithAddMetric, self).__init__()
-            self.dense = keras.layers.Dense(1, kernel_initializer='ones')
-
-        def __call__(self, inputs):
-            outputs = self.dense(inputs)
-            return outputs
-
-    class LayerWithNestedAddMetricLayer(Layer):
-
-        def __init__(self):
-            super(LayerWithNestedAddMetricLayer, self).__init__()
-            self.layer = LayerWithAddMetric()
-
-        def call(self, inputs):
-            outputs = self.layer(inputs)
-            self.add_metric(K.sum(outputs), name='metric_4')
-            return outputs
-
-    x = Input(shape=(1,))
-    y = LayerWithNestedAddMetricLayer()(x)
-
-    model = keras.models.Model(x, y)
-    model.add_metric(K.sum(y), name='metric_2')
-    model.add_metric(metrics.Mean(name='metric_3')(y))
-
-    model.compile(
-        'sgd',
-        loss='mse',
-        metrics=[metrics.MeanSquaredError('metric_1')])
-
-    # # Verify that the metrics added using `compile` and `add_metric` API are
-    # # included
-    # for m1, m2 in zip([m.name for m in model._compile_metrics], ['metric_1']):
-    #     assert m1 == m2
-
-    for m1, m2 in zip(
-            [m.name for m in model.metrics],
-            ['metric_1', 'metric_2', 'metric_3', 'metric_4']):
-        assert m1 == m2
-
-
-def test_model_metrics_list_in_call():
-
-    class TestModel(Model):
-
-        def __init__(self):
-            super(TestModel, self).__init__(name='test_model')
-            self.dense1 = keras.layers.Dense(2)
-
-        def call(self, x):
-            self.add_metric(K.sum(x), name='metric_2')
-            return self.dense1(x)
-
-    model = TestModel()
-    model.compile(
-        loss='mse',
-        optimizer='adam',
-        metrics=[metrics.MeanSquaredError('metric_1')])
-    x = np.ones(shape=(10, 1))
-    y = np.ones(shape=(10, 2))
-    model.fit(x, y, epochs=2, batch_size=5, validation_data=(x, y))
-
-    # # Verify that the metrics added using `compile` and `add_metric` API are
-    # # included
-    # for m1, m2 in zip([m.name for m in model._compile_metrics], ['metric_1']):
-    #     assert m1 == m2
-
-    names = [m.name for m in model.metrics]
-    assert 'metric_1' in names
-    assert 'metric_2' in names
-
-
-def test_duplicate_metric_name_in_add_metric():
-
-    class TestModel(Model):
-
-        def __init__(self):
-            super(TestModel, self).__init__(name='test_model')
-            self.dense1 = keras.layers.Dense(2, kernel_initializer='ones')
-            self.mean = metrics.Mean(name='metric_1')
-            self.mean2 = metrics.Mean(name='metric_1')
-
-        def call(self, x):
-            self.add_metric(self.mean(x), name='metric_1')
-            return self.dense1(x)
-
-    model = TestModel()
-    model.compile(loss='mse', optimizer='adam')
-
-    x = np.ones(shape=(10, 1))
-    y = np.ones(shape=(10, 2))
-    with pytest.raises(ValueError):
-        model.fit(x, y, epochs=2, batch_size=5, validation_data=(x, y))
-
-
-def test_add_metric_on_model():
-    x = Input(shape=(1,))
-    y = Dense(1, kernel_initializer='ones', trainable=False)(x)
-    model = Model(x, y)
-    model.add_metric(K.sum(y), name='metric_1')
-    model.add_metric(K.mean(y), name='metric_2')
-    model.compile('sgd', loss='mse', metrics=['mse'])
-
-    inputs = np.ones(shape=(10, 1))
-    targets = np.zeros(shape=(10, 1))
-    history = model.fit(
-        inputs,
-        targets,
-        epochs=2,
-        batch_size=5,
-        validation_data=(inputs, targets))
-    assert history.history['metric_1'][-1] == 5
-    assert history.history['val_metric_1'][-1] == 5
-
-    assert history.history['metric_2'][-1] == 1
-    assert history.history['val_metric_2'][-1] == 1
-
-    eval_results = model.evaluate(inputs, targets, batch_size=5)
-    assert eval_results[-2] == 5
-    assert eval_results[-1] == 1
-
-    model.predict(inputs, batch_size=5)
-    model.train_on_batch(inputs, targets)
-    model.test_on_batch(inputs, targets)
-
-
-def test_add_metric_in_model_call():
-
-    class TestModel(Model):
-
-        def __init__(self):
-            super(TestModel, self).__init__(name='test_model')
-            self.dense1 = keras.layers.Dense(2, kernel_initializer='ones')
-            self.mean = metrics.Mean(name='metric_1')
-
-        def call(self, x):
-            self.add_metric(K.sum(x), name='metric_2')
-            # Provide same name as in the instance created in __init__
-            # for eager mode
-            self.add_metric(self.mean(x), name='metric_1')
-            return self.dense1(x)
-
-    model = TestModel()
-    model.compile(loss='mse', optimizer='sgd')
-
-    x = np.ones(shape=(10, 1))
-    y = np.ones(shape=(10, 2))
-    history = model.fit(x, y, epochs=2, batch_size=5, validation_data=(x, y))
-    assert np.isclose(history.history['metric_1'][-1], 1, 0)
-    assert np.isclose(history.history['val_metric_1'][-1], 1, 0)
-    assert np.isclose(history.history['metric_2'][-1], 5, 0)
-    assert np.isclose(history.history['val_metric_2'][-1], 5, 0)
-
-    eval_results = model.evaluate(x, y, batch_size=5)
-    assert np.isclose(eval_results[1], 1, 0)
-    assert np.isclose(eval_results[2], 5, 0)
-
-    model.predict(x, batch_size=5)
-    model.train_on_batch(x, y)
-    model.test_on_batch(x, y)
-
-
-def test_multiple_add_metric_calls():
-
-    class TestModel(Model):
-
-        def __init__(self):
-            super(TestModel, self).__init__(name='test_model')
-            self.dense1 = keras.layers.Dense(2, kernel_initializer='ones')
-            self.mean1 = metrics.Mean(name='metric_1')
-            self.mean2 = metrics.Mean(name='metric_2')
-
-        def call(self, x):
-            self.add_metric(self.mean2(x), name='metric_2')
-            self.add_metric(self.mean1(x), name='metric_1')
-            self.add_metric(K.sum(x), name='metric_3')
-            return self.dense1(x)
-
-    model = TestModel()
-    model.compile(loss='mse', optimizer='sgd')
-
-    x = np.ones(shape=(10, 1))
-    y = np.ones(shape=(10, 2))
-    history = model.fit(x, y, epochs=2, batch_size=5, validation_data=(x, y))
-    assert np.isclose(history.history['metric_1'][-1], 1, 0)
-    assert np.isclose(history.history['metric_2'][-1], 1, 0)
-    assert np.isclose(history.history['metric_3'][-1], 5, 0)
-
-    eval_results = model.evaluate(x, y, batch_size=5)
-    assert np.allclose(eval_results[1:4], [1, 1, 5], 0.1)
-
-    model.predict(x, batch_size=5)
-    model.train_on_batch(x, y)
-    model.test_on_batch(x, y)
-
-
-def test_add_metric_in_layer_call():
-
-    class TestLayer(Layer):
-
-        def build(self, input_shape):
-            self.a = self.add_weight(
-                'a', (1, 1), initializer='ones', trainable=False)
-            self.built = True
-
-        def call(self, inputs):
-            self.add_metric(K.sum(inputs), name='metric_1')
-            return inputs + 1
-
-    inp = Input(shape=(1,))
-    x = TestLayer(input_shape=(1,))(inp)
-    x = keras.layers.Dense(2, kernel_initializer='ones')(x)
-
-    model = Model(inp, x)
-    model.compile('adam', loss='mse')
-
-    x = np.ones(shape=(10, 1))
-    y = np.ones(shape=(10, 2))
-    history = model.fit(x, y, epochs=2, batch_size=5, validation_data=(x, y))
-    assert np.isclose(history.history['metric_1'][-1], 5, 0)
-    assert np.isclose(history.history['val_metric_1'][-1], 5, 0)
-
-
-if __name__ == '__main__':
-    pytest.main([__file__])
diff --git a/tests/keras/layers/advanced_activations_test.py b/tests/keras/layers/advanced_activations_test.py
deleted file mode 100644
index 422989659c5..00000000000
--- a/tests/keras/layers/advanced_activations_test.py
+++ /dev/null
@@ -1,64 +0,0 @@
-import pytest
-from keras.utils.test_utils import layer_test
-from keras import layers
-from keras import backend as K
-
-
-@pytest.mark.parametrize('activation_layer',
-                         [layers.LeakyReLU,
-                          layers.ELU])
-@pytest.mark.parametrize('alpha', [0., .5, -1.])
-def test_linear_unit_activations(activation_layer,
-                                 alpha):
-    layer_test(activation_layer, kwargs={'alpha': alpha},
-               input_shape=(2, 3, 4))
-
-
-def test_prelu():
-    layer_test(layers.PReLU, kwargs={},
-               input_shape=(2, 3, 4))
-
-
-def test_prelu_share():
-    layer_test(layers.PReLU, kwargs={'shared_axes': 1},
-               input_shape=(2, 3, 4))
-
-
-def test_thresholded_relu():
-    layer_test(layers.ThresholdedReLU, kwargs={'theta': 0.5},
-               input_shape=(2, 3, 4))
-
-
-@pytest.mark.parametrize('axis', [1, -1])
-def test_softmax(axis):
-    layer_test(layers.Softmax, kwargs={'axis': axis},
-               input_shape=(2, 3, 4))
-
-
-def test_relu():
-    layer_test(layers.ReLU,
-               kwargs={'max_value': 10,
-                       'negative_slope': 0.2,
-                       'threshold': 3.0},
-               input_shape=(2, 3, 4))
-    layer_test(layers.ReLU,
-               kwargs={'max_value': 6},
-               input_shape=(2, 3, 4))
-    layer_test(layers.ReLU,
-               kwargs={'negative_slope': 0.2},
-               input_shape=(2, 3, 4))
-
-    # max_value of ReLU layer cannot be negative value
-    with pytest.raises(ValueError):
-        layer_test(layers.ReLU, kwargs={'max_value': -2.0},
-                   input_shape=(2, 3, 4))
-
-    # negative_slope of ReLU layer cannot be negative value
-    with pytest.raises(ValueError):
-        layer_test(layers.ReLU, kwargs={'negative_slope': -2.0},
-                   input_shape=(2, 3, 4))
-
-
-
-if __name__ == '__main__':
-    pytest.main([__file__])
diff --git a/tests/keras/layers/convolutional_recurrent_test.py b/tests/keras/layers/convolutional_recurrent_test.py
deleted file mode 100644
index 664e52d619c..00000000000
--- a/tests/keras/layers/convolutional_recurrent_test.py
+++ /dev/null
@@ -1,168 +0,0 @@
-import pytest
-import numpy as np
-from numpy.testing import assert_allclose
-
-from keras import backend as K
-from keras.models import Sequential, Model
-from keras.layers import convolutional_recurrent, Input, Masking, Lambda
-from keras.utils.test_utils import layer_test
-from keras import regularizers
-
-num_row = 3
-num_col = 3
-filters = 2
-num_samples = 1
-input_channel = 2
-input_num_row = 5
-input_num_col = 5
-sequence_len = 2
-
-
-@pytest.mark.parametrize('data_format', ['channels_first', 'channels_last'])
-@pytest.mark.parametrize('return_sequences', [True, False])
-@pytest.mark.parametrize('use_mask', [True, False])
-def DISABLED_test_convolutional_recurrent(data_format, return_sequences, use_mask):
-
-    class Masking5D(Masking):
-        """Regular masking layer returns wrong shape of mask for RNN"""
-        def compute_mask(self, inputs, mask=None):
-            return K.any(K.not_equal(inputs, 0.), axis=[2, 3, 4])
-
-    if data_format == 'channels_first':
-        inputs = np.random.rand(num_samples, sequence_len,
-                                input_channel,
-                                input_num_row, input_num_col)
-    else:
-        inputs = np.random.rand(num_samples, sequence_len,
-                                input_num_row, input_num_col,
-                                input_channel)
-
-    # test for return state:
-    x = Input(batch_shape=inputs.shape)
-    kwargs = {'data_format': data_format,
-              'return_sequences': return_sequences,
-              'return_state': True,
-              'stateful': True,
-              'filters': filters,
-              'kernel_size': (num_row, num_col),
-              'padding': 'valid'}
-    layer = convolutional_recurrent.ConvLSTM2D(**kwargs)
-    layer.build(inputs.shape)
-    if use_mask:
-        outputs = layer(Masking5D()(x))
-    else:
-        outputs = layer(x)
-    output, states = outputs[0], outputs[1:]
-    assert len(states) == 2
-    model = Model(x, states[0])
-    state = model.predict(inputs)
-    np.testing.assert_allclose(K.eval(layer.states[0]), state, atol=1e-4)
-
-    # test for output shape:
-    output = layer_test(convolutional_recurrent.ConvLSTM2D,
-                        kwargs={'data_format': data_format,
-                                'return_sequences': return_sequences,
-                                'filters': filters,
-                                'kernel_size': (num_row, num_col),
-                                'padding': 'valid'},
-                        input_shape=inputs.shape)
-
-
-def test_convolutional_recurrent_statefulness():
-
-    data_format = 'channels_last'
-    return_sequences = False
-    inputs = np.random.rand(num_samples, sequence_len,
-                            input_num_row, input_num_col,
-                            input_channel)
-    # Tests for statefulness
-    model = Sequential()
-    kwargs = {'data_format': data_format,
-              'return_sequences': return_sequences,
-              'filters': filters,
-              'kernel_size': (num_row, num_col),
-              'stateful': True,
-              'batch_input_shape': inputs.shape,
-              'padding': 'same'}
-    layer = convolutional_recurrent.ConvLSTM2D(**kwargs)
-
-    model.add(layer)
-    model.compile(optimizer='sgd', loss='mse')
-    out1 = model.predict(np.ones_like(inputs))
-
-    # train once so that the states change
-    model.train_on_batch(np.ones_like(inputs),
-                         np.random.random(out1.shape))
-    out2 = model.predict(np.ones_like(inputs))
-
-    # if the state is not reset, output should be different
-    assert(out1.max() != out2.max())
-
-    # check that output changes after states are reset
-    # (even though the model itself didn't change)
-    layer.reset_states()
-    out3 = model.predict(np.ones_like(inputs))
-    assert(out2.max() != out3.max())
-
-    # check that container-level reset_states() works
-    model.reset_states()
-    out4 = model.predict(np.ones_like(inputs))
-    assert_allclose(out3, out4, atol=1e-5)
-
-    # check that the call to `predict` updated the states
-    out5 = model.predict(np.ones_like(inputs))
-    assert(out4.max() != out5.max())
-
-    # cntk doesn't support eval convolution with static
-    # variable, will enable it later
-    if K.backend() != 'cntk':
-        # check regularizers
-        kwargs = {'data_format': data_format,
-                  'return_sequences': return_sequences,
-                  'kernel_size': (num_row, num_col),
-                  'stateful': True,
-                  'filters': filters,
-                  'batch_input_shape': inputs.shape,
-                  'kernel_regularizer': regularizers.L1L2(l1=0.01),
-                  'recurrent_regularizer': regularizers.L1L2(l1=0.01),
-                  'bias_regularizer': 'l2',
-                  'activity_regularizer': 'l2',
-                  'kernel_constraint': 'max_norm',
-                  'recurrent_constraint': 'max_norm',
-                  'bias_constraint': 'max_norm',
-                  'padding': 'same'}
-
-        layer = convolutional_recurrent.ConvLSTM2D(**kwargs)
-        layer.build(inputs.shape)
-        assert len(layer.losses) == 3
-        assert layer.activity_regularizer
-        output = layer(K.variable(np.ones(inputs.shape)))
-        assert len(layer.losses) == 4
-        K.eval(output)
-
-    # check dropout
-    layer_test(convolutional_recurrent.ConvLSTM2D,
-               kwargs={'data_format': data_format,
-                       'return_sequences': return_sequences,
-                       'filters': filters,
-                       'kernel_size': (num_row, num_col),
-                       'padding': 'same',
-                       'dropout': 0.1,
-                       'recurrent_dropout': 0.1},
-               input_shape=inputs.shape)
-
-    # check state initialization
-    layer = convolutional_recurrent.ConvLSTM2D(
-        filters=filters, kernel_size=(num_row, num_col),
-        data_format=data_format, return_sequences=return_sequences)
-    layer.build(inputs.shape)
-    x = Input(batch_shape=inputs.shape)
-    initial_state = layer.get_initial_state(x)
-    y = layer(x, initial_state=initial_state)
-    model = Model(x, y)
-    assert (model.predict(inputs).shape ==
-            layer.compute_output_shape(inputs.shape))
-
-
-if __name__ == '__main__':
-    pytest.main([__file__])
diff --git a/tests/keras/layers/convolutional_test.py b/tests/keras/layers/convolutional_test.py
deleted file mode 100644
index 58375f880ed..00000000000
--- a/tests/keras/layers/convolutional_test.py
+++ /dev/null
@@ -1,1103 +0,0 @@
-import pytest
-import numpy as np
-from numpy.testing import assert_allclose
-
-from keras.utils.test_utils import layer_test
-from keras import backend as K
-from keras.layers import convolutional
-from keras.models import Sequential
-
-
-# TensorFlow does not support full convolution.
-if K.backend() == 'theano':
-    _convolution_paddings = ['valid', 'same', 'full']
-else:
-    _convolution_paddings = ['valid', 'same']
-
-
-@pytest.mark.parametrize(
-    'layer_kwargs,input_length,expected_output',
-    [
-        # Causal
-        ({'filters': 1, 'kernel_size': 2, 'dilation_rate': 1, 'padding': 'causal',
-          'kernel_initializer': 'ones', 'use_bias': False},
-         4, [[[0], [1], [3], [5]]]),
-        # Non-causal
-        ({'filters': 1, 'kernel_size': 2, 'dilation_rate': 1, 'padding': 'valid',
-          'kernel_initializer': 'ones', 'use_bias': False},
-         4, [[[1], [3], [5]]]),
-        # Causal dilated with larger kernel size
-        ({'filters': 1, 'kernel_size': 3, 'dilation_rate': 2, 'padding': 'causal',
-          'kernel_initializer': 'ones', 'use_bias': False},
-         10, np.float32([[[0], [1], [2], [4], [6], [9], [12], [15], [18], [21]]])),
-    ]
-)
-def test_causal_dilated_conv(layer_kwargs, input_length, expected_output):
-    input_data = np.reshape(np.arange(input_length, dtype='float32'),
-                            (1, input_length, 1))
-    layer_test(convolutional.Conv1D, input_data=input_data,
-               kwargs=layer_kwargs, expected_output=expected_output)
-
-
-@pytest.mark.parametrize(
-    'padding,strides',
-    [(padding, strides)
-     for padding in _convolution_paddings
-     for strides in [1, 2]
-     if not (padding == 'same' and strides != 1)]
-)
-def test_conv_1d(padding, strides):
-    batch_size = 2
-    steps = 8
-    input_dim = 2
-    kernel_size = 3
-    filters = 3
-
-    layer_test(convolutional.Conv1D,
-               kwargs={'filters': filters,
-                       'kernel_size': kernel_size,
-                       'padding': padding,
-                       'strides': strides},
-               input_shape=(batch_size, steps, input_dim))
-
-    layer_test(convolutional.Conv1D,
-               kwargs={'filters': filters,
-                       'kernel_size': kernel_size,
-                       'padding': padding,
-                       'kernel_regularizer': 'l2',
-                       'bias_regularizer': 'l2',
-                       'activity_regularizer': 'l2',
-                       'kernel_constraint': 'max_norm',
-                       'bias_constraint': 'max_norm',
-                       'strides': strides},
-               input_shape=(batch_size, steps, input_dim))
-
-
-def test_conv_1d_dilation():
-    batch_size = 2
-    steps = 8
-    input_dim = 2
-    kernel_size = 3
-    filters = 3
-    padding = _convolution_paddings[-1]
-
-    layer_test(convolutional.Conv1D,
-               kwargs={'filters': filters,
-                       'kernel_size': kernel_size,
-                       'padding': padding,
-                       'dilation_rate': 2},
-               input_shape=(batch_size, steps, input_dim))
-
-
-def DISABLED_test_conv_1d_channels_first():
-    batch_size = 2
-    steps = 8
-    input_dim = 2
-    kernel_size = 3
-    filters = 3
-
-    layer_test(convolutional.Conv1D,
-               kwargs={'filters': filters,
-                       'kernel_size': kernel_size,
-                       'data_format': 'channels_first'},
-               input_shape=(batch_size, input_dim, steps))
-
-
-@pytest.mark.parametrize(
-    'strides,padding',
-    [(strides, padding)
-     for padding in _convolution_paddings
-     for strides in [(1, 1), (2, 2)]
-     if not (padding == 'same' and strides != (1, 1))]
-)
-def test_convolution_2d(strides, padding):
-    num_samples = 2
-    filters = 2
-    stack_size = 3
-    kernel_size = (3, 2)
-    num_row = 7
-    num_col = 6
-
-    layer_test(convolutional.Conv2D,
-               kwargs={'filters': filters,
-                       'kernel_size': kernel_size,
-                       'padding': padding,
-                       'strides': strides,
-                       'data_format': 'channels_last'},
-               input_shape=(num_samples, stack_size, num_row, num_col))
-
-
-def test_convolution_2d_dilation():
-    num_samples = 2
-    filters = 2
-    stack_size = 3
-    kernel_size = (3, 2)
-    num_row = 7
-    num_col = 6
-    padding = 'valid'
-
-    layer_test(convolutional.Conv2D,
-               kwargs={'filters': filters,
-                       'kernel_size': kernel_size,
-                       'padding': padding,
-                       'dilation_rate': (2, 2)},
-               input_shape=(num_samples, num_row, num_col, stack_size))
-
-
-def test_convolution_2d_invalid():
-    filters = 2
-    padding = _convolution_paddings[-1]
-    kernel_size = (3, 2)
-
-    with pytest.raises(ValueError):
-        model = Sequential([convolutional.Conv2D(
-            filters=filters, kernel_size=kernel_size, padding=padding,
-            batch_input_shape=(None, None, 5, None))])
-
-
-@pytest.mark.parametrize(
-    'padding,out_padding,strides',
-    [(padding, out_padding, strides)
-     for padding in _convolution_paddings
-     for out_padding in [None, (0, 0), (1, 1)]
-     for strides in [(1, 1), (2, 2)]
-     if (not (padding == 'same' and strides != (1, 1))
-         and not(strides == (1, 1) and out_padding == (1, 1)))]
-)
-def test_conv2d_transpose(padding, out_padding, strides):
-    num_samples = 2
-    filters = 2
-    stack_size = 3
-    num_row = 5
-    num_col = 6
-
-    layer_test(convolutional.Conv2DTranspose,
-               kwargs={'filters': filters,
-                       'kernel_size': 3,
-                       'padding': padding,
-                       'output_padding': out_padding,
-                       'strides': strides,
-                       'data_format': 'channels_last'},
-               input_shape=(num_samples, num_row, num_col, stack_size),
-               fixed_batch_size=True)
-
-
-def test_conv2d_transpose_dilation():
-
-    layer_test(convolutional.Conv2DTranspose,
-               kwargs={'filters': 2,
-                       'kernel_size': 3,
-                       'padding': 'same',
-                       'data_format': 'channels_last',
-                       'dilation_rate': (2, 2)},
-               input_shape=(2, 5, 6, 3))
-
-    # Check dilated conv transpose returns expected output
-    input_data = np.arange(48).reshape((1, 4, 4, 3)).astype(np.float32)
-    expected_output = np.float32([[192, 228, 192, 228],
-                                  [336, 372, 336, 372],
-                                  [192, 228, 192, 228],
-                                  [336, 372, 336, 372]]).reshape((1, 4, 4, 1))
-
-    layer_test(convolutional.Conv2DTranspose,
-               input_data=input_data,
-               kwargs={'filters': 1,
-                       'kernel_size': 3,
-                       'padding': 'same',
-                       'data_format': 'channels_last',
-                       'dilation_rate': (2, 2),
-                       'kernel_initializer': 'ones'},
-               expected_output=expected_output)
-
-
-def test_conv2d_transpose_channels_first():
-    num_samples = 2
-    filters = 2
-    stack_size = 3
-    num_row = 5
-    num_col = 6
-    padding = 'valid'
-    strides = (2, 2)
-
-    layer_test(convolutional.Conv2DTranspose,
-               kwargs={'filters': filters,
-                       'kernel_size': 3,
-                       'padding': padding,
-                       'data_format': 'channels_first',
-                       'activation': None,
-                       'kernel_regularizer': 'l2',
-                       'bias_regularizer': 'l2',
-                       'activity_regularizer': 'l2',
-                       'kernel_constraint': 'max_norm',
-                       'bias_constraint': 'max_norm',
-                       'strides': strides},
-               input_shape=(num_samples, stack_size, num_row, num_col),
-               fixed_batch_size=True)
-
-
-def test_conv2d_transpose_invalid():
-    filters = 2
-    stack_size = 3
-    num_row = 5
-    num_col = 6
-    padding = 'valid'
-
-    with pytest.raises(ValueError):
-        model = Sequential([convolutional.Conv2DTranspose(
-            filters=filters,
-            kernel_size=3,
-            padding=padding,
-            use_bias=True,
-            batch_input_shape=(None, None, 5, None))])
-
-    # Test invalid output padding for given stride. Output padding equal to stride
-    with pytest.raises(ValueError):
-        model = Sequential([convolutional.Conv2DTranspose(
-            filters=filters,
-            kernel_size=3,
-            padding=padding,
-            output_padding=(0, 3),
-            strides=(1, 3),
-            batch_input_shape=(None, num_row, num_col, stack_size))])
-
-    # Output padding greater than stride
-    with pytest.raises(ValueError):
-        model = Sequential([convolutional.Conv2DTranspose(
-            filters=filters,
-            kernel_size=3,
-            padding=padding,
-            output_padding=(2, 2),
-            strides=(1, 3),
-            batch_input_shape=(None, num_row, num_col, stack_size))])
-
-
-@pytest.mark.parametrize(
-    'padding,strides,multiplier,dilation_rate',
-    [(padding, strides, multiplier, dilation_rate)
-     for padding in _convolution_paddings
-     for strides in [1, 2]
-     for multiplier in [1, 2]
-     for dilation_rate in [1, 2]
-     if (not (padding == 'same' and strides != 1)
-         and not (dilation_rate != 1 and strides != 1)
-         and not (dilation_rate != 1 and K.backend() == 'cntk'))]
-)
-def test_separable_conv_1d(padding, strides, multiplier, dilation_rate):
-    num_samples = 2
-    filters = 6
-    stack_size = 3
-    num_step = 9
-
-    layer_test(convolutional.SeparableConv1D,
-               kwargs={'filters': filters,
-                       'kernel_size': 3,
-                       'padding': padding,
-                       'strides': strides,
-                       'depth_multiplier': multiplier,
-                       'dilation_rate': dilation_rate},
-               input_shape=(num_samples, num_step, stack_size))
-
-
-def test_separable_conv_1d_additional_args():
-    num_samples = 2
-    filters = 6
-    stack_size = 3
-    num_step = 9
-    padding = 'valid'
-    multiplier = 2
-
-    layer_test(convolutional.SeparableConv1D,
-               kwargs={'filters': filters,
-                       'kernel_size': 3,
-                       'padding': padding,
-                       # 'data_format': 'channels_first',
-                       'activation': None,
-                       'depthwise_regularizer': 'l2',
-                       'pointwise_regularizer': 'l2',
-                       'bias_regularizer': 'l2',
-                       'activity_regularizer': 'l2',
-                       'pointwise_constraint': 'unit_norm',
-                       'depthwise_constraint': 'unit_norm',
-                       'strides': 1,
-                       'use_bias': True,
-                       'depth_multiplier': multiplier},
-               input_shape=(num_samples, stack_size, num_step))
-
-
-def test_separable_conv_1d_invalid():
-    filters = 6
-    padding = 'valid'
-    with pytest.raises(ValueError):
-        model = Sequential([convolutional.SeparableConv1D(
-            filters=filters, kernel_size=3, padding=padding,
-            batch_input_shape=(None, 5, None))])
-
-
-@pytest.mark.parametrize(
-    'padding,strides,multiplier,dilation_rate',
-    [(padding, strides, multiplier, dilation_rate)
-     for padding in _convolution_paddings
-     for strides in [(1, 1), (2, 2)]
-     for multiplier in [1, 2]
-     for dilation_rate in [(1, 1), (2, 2), (2, 1), (1, 2)]
-     if (not (padding == 'same' and strides != (1, 1))
-         and not (dilation_rate != (1, 1) and strides != (1, 1))
-         and not (dilation_rate != (1, 1) and multiplier == dilation_rate[0])
-         and not (dilation_rate != (1, 1) and K.backend() == 'cntk'))]
-)
-def test_separable_conv_2d(padding, strides, multiplier, dilation_rate):
-    num_samples = 2
-    filters = 6
-    stack_size = 3
-    num_row = 7
-    num_col = 6
-
-    layer_test(
-        convolutional.SeparableConv2D,
-        kwargs={'filters': filters,
-                'kernel_size': (3, 3),
-                'padding': padding,
-                'strides': strides,
-                'depth_multiplier': multiplier,
-                'dilation_rate': dilation_rate},
-        input_shape=(num_samples, num_row, num_col, stack_size))
-
-
-def test_separable_conv_2d_additional_args():
-    num_samples = 2
-    filters = 6
-    stack_size = 3
-    num_row = 7
-    num_col = 6
-    padding = 'valid'
-    strides = (2, 2)
-    multiplier = 2
-
-    layer_test(convolutional.SeparableConv2D,
-               kwargs={'filters': filters,
-                       'kernel_size': 3,
-                       'padding': padding,
-                       # 'data_format': 'channels_first',
-                       'activation': None,
-                       'depthwise_regularizer': 'l2',
-                       'pointwise_regularizer': 'l2',
-                       'bias_regularizer': 'l2',
-                       'activity_regularizer': 'l2',
-                       'pointwise_constraint': 'unit_norm',
-                       'depthwise_constraint': 'unit_norm',
-                       'strides': strides,
-                       'depth_multiplier': multiplier},
-               input_shape=(num_samples, stack_size, num_row, num_col))
-
-
-def test_separable_conv_2d_invalid():
-    filters = 6
-    padding = 'valid'
-    with pytest.raises(ValueError):
-        model = Sequential([convolutional.SeparableConv2D(
-            filters=filters, kernel_size=3, padding=padding,
-            batch_input_shape=(None, None, 5, None))])
-
-
-@pytest.mark.parametrize(
-    'padding,strides,multiplier,dilation_rate',
-    [(padding, strides, multiplier, dilation_rate)
-     for padding in _convolution_paddings
-     for strides in [(1, 1), (2, 2)]
-     for multiplier in [1, 2]
-     for dilation_rate in [(1, 1), (2, 2), (2, 1), (1, 2)]
-     if (not (padding == 'same' and strides != (1, 1))
-         and not (dilation_rate != (1, 1) and strides != (1, 1))
-         and not (dilation_rate != (1, 1) and multiplier == dilation_rate[0])
-         and not (dilation_rate != (1, 1) and K.backend() == 'cntk'))]
-)
-def test_depthwise_conv_2d(padding, strides, multiplier, dilation_rate):
-    num_samples = 2
-    stack_size = 3
-    num_row = 7
-    num_col = 6
-
-    layer_test(convolutional.DepthwiseConv2D,
-               kwargs={'kernel_size': (3, 3),
-                       'padding': padding,
-                       'strides': strides,
-                       'depth_multiplier': multiplier,
-                       'dilation_rate': dilation_rate},
-               input_shape=(num_samples,
-                            num_row,
-                            num_col,
-                            stack_size))
-
-
-def test_depthwise_conv_2d_additional_args():
-    num_samples = 2
-    stack_size = 3
-    num_row = 7
-    num_col = 6
-    padding = 'valid'
-    strides = (2, 2)
-    multiplier = 2
-
-    layer_test(convolutional.DepthwiseConv2D,
-               kwargs={'kernel_size': 3,
-                       'padding': padding,
-                       # 'data_format': 'channels_first',
-                       'activation': None,
-                       'depthwise_regularizer': 'l2',
-                       'bias_regularizer': 'l2',
-                       'activity_regularizer': 'l2',
-                       'depthwise_constraint': 'unit_norm',
-                       'use_bias': True,
-                       'strides': strides,
-                       'depth_multiplier': multiplier},
-               input_shape=(num_samples, stack_size, num_row, num_col))
-
-
-def test_depthwise_conv_2d_invalid():
-    padding = 'valid'
-    with pytest.raises(ValueError):
-        Sequential([convolutional.DepthwiseConv2D(
-            kernel_size=3,
-            padding=padding,
-            batch_input_shape=(None, None, 5, None))])
-
-
-@pytest.mark.parametrize(
-    'padding,strides',
-    [(padding, strides)
-     for padding in _convolution_paddings
-     for strides in [(1, 1, 1), (2, 2, 2)]
-     if not (padding == 'same' and strides != (1, 1, 1))]
-)
-def test_convolution_3d(padding, strides):
-    num_samples = 2
-    filters = 2
-    stack_size = 3
-
-    input_len_dim1 = 9
-    input_len_dim2 = 8
-    input_len_dim3 = 8
-
-    layer_test(convolutional.Convolution3D,
-               kwargs={'filters': filters,
-                       'kernel_size': 3,
-                       'padding': padding,
-                       'strides': strides},
-               input_shape=(num_samples,
-                            input_len_dim1, input_len_dim2, input_len_dim3,
-                            stack_size))
-
-
-def test_convolution_3d_additional_args():
-    num_samples = 2
-    filters = 2
-    stack_size = 3
-    padding = 'valid'
-    strides = (2, 2, 2)
-
-    input_len_dim1 = 9
-    input_len_dim2 = 8
-    input_len_dim3 = 8
-
-    layer_test(convolutional.Convolution3D,
-               kwargs={'filters': filters,
-                       'kernel_size': (1, 2, 3),
-                       'padding': padding,
-                       'activation': None,
-                       'kernel_regularizer': 'l2',
-                       'bias_regularizer': 'l2',
-                       'activity_regularizer': 'l2',
-                       'kernel_constraint': 'max_norm',
-                       'bias_constraint': 'max_norm',
-                       'strides': strides},
-               input_shape=(num_samples,
-                            input_len_dim1, input_len_dim2, input_len_dim3,
-                            stack_size))
-
-
-@pytest.mark.parametrize(
-    'padding,out_padding,strides,data_format',
-    [(padding, out_padding, strides, data_format)
-     for padding in _convolution_paddings
-     for out_padding in [None, (0, 0, 0), (1, 1, 1)]
-     for strides in [(1, 1, 1), (2, 2, 2)]
-     # for data_format in ['channels_first', 'channels_last']
-     for data_format in ['channels_last']
-     if (not (padding == 'same' and strides != (1, 1, 1))
-         and not (strides == (1, 1, 1) and out_padding == (1, 1, 1)))]
-)
-def test_conv3d_transpose(padding, out_padding, strides, data_format):
-    filters = 2
-    stack_size = 3
-    num_depth = 7
-    num_row = 5
-    num_col = 6
-
-    layer_test(
-        convolutional.Conv3DTranspose,
-        kwargs={'filters': filters,
-                'kernel_size': 3,
-                'padding': padding,
-                'output_padding': out_padding,
-                'strides': strides,
-                'data_format': data_format},
-        input_shape=(None, num_depth, num_row, num_col, stack_size),
-        fixed_batch_size=True)
-
-
-def test_conv3d_transpose_additional_args():
-    filters = 2
-    stack_size = 3
-    num_depth = 7
-    num_row = 5
-    num_col = 6
-    padding = 'valid'
-    strides = (2, 2, 2)
-
-    layer_test(convolutional.Conv3DTranspose,
-               kwargs={'filters': filters,
-                       'kernel_size': 3,
-                       'padding': padding,
-                       # 'data_format': 'channels_first',
-                       'activation': None,
-                       'kernel_regularizer': 'l2',
-                       'bias_regularizer': 'l2',
-                       'activity_regularizer': 'l2',
-                       'kernel_constraint': 'max_norm',
-                       'bias_constraint': 'max_norm',
-                       'use_bias': True,
-                       'strides': strides},
-               input_shape=(None, stack_size, num_depth, num_row, num_col),
-               fixed_batch_size=True)
-
-
-def test_conv3d_transpose_invalid():
-    filters = 2
-    stack_size = 3
-    num_depth = 7
-    num_row = 5
-    num_col = 6
-    padding = 'valid'
-
-    # Test invalid use case
-    with pytest.raises(ValueError):
-        model = Sequential([convolutional.Conv3DTranspose(
-            filters=filters,
-            kernel_size=3,
-            padding=padding,
-            batch_input_shape=(None, None, 5, None, None))])
-
-    # Test invalid output padding for given stride. Output padding equal
-    # to stride
-    with pytest.raises(ValueError):
-        model = Sequential([convolutional.Conv3DTranspose(
-            filters=filters,
-            kernel_size=3,
-            padding=padding,
-            output_padding=(0, 3, 3),
-            strides=(1, 3, 4),
-            batch_input_shape=(None, num_depth, num_row, num_col, stack_size))])
-
-    # Output padding greater than stride
-    with pytest.raises(ValueError):
-        model = Sequential([convolutional.Conv3DTranspose(
-            filters=filters,
-            kernel_size=3,
-            padding=padding,
-            output_padding=(2, 2, 3),
-            strides=(1, 3, 4),
-            batch_input_shape=(None, num_depth, num_row, num_col, stack_size))])
-
-
-def test_zero_padding_1d():
-    num_samples = 2
-    input_dim = 2
-    num_steps = 5
-    shape = (num_samples, num_steps, input_dim)
-    inputs = np.ones(shape)
-
-    # basic test
-    layer_test(convolutional.ZeroPadding1D,
-               kwargs={'padding': 2},
-               input_shape=inputs.shape)
-    layer_test(convolutional.ZeroPadding1D,
-               kwargs={'padding': (1, 2)},
-               input_shape=inputs.shape)
-
-    # correctness test
-    layer = convolutional.ZeroPadding1D(padding=2)
-    layer.build(shape)
-    outputs = layer(K.variable(inputs))
-    np_output = K.eval(outputs)
-    for offset in [0, 1, -1, -2]:
-        assert_allclose(np_output[:, offset, :], 0.)
-    assert_allclose(np_output[:, 2:-2, :], 1.)
-
-    layer = convolutional.ZeroPadding1D(padding=(1, 2))
-    layer.build(shape)
-    outputs = layer(K.variable(inputs))
-    np_output = K.eval(outputs)
-    for left_offset in [0]:
-        assert_allclose(np_output[:, left_offset, :], 0.)
-    for right_offset in [-1, -2]:
-        assert_allclose(np_output[:, right_offset, :], 0.)
-    assert_allclose(np_output[:, 1:-2, :], 1.)
-    layer.get_config()
-
-
-@pytest.mark.parametrize(
-    'data_format,padding',
-    [(data_format, padding)
-     for data_format in ['channels_first', 'channels_last']
-     for padding in [(2, 2), ((1, 2), (3, 4))]]
-)
-def test_zero_padding_2d(data_format, padding):
-    num_samples = 2
-    stack_size = 2
-    input_num_row = 4
-    input_num_col = 5
-
-    if data_format == 'channels_last':
-        inputs = np.ones((num_samples, input_num_row, input_num_col, stack_size))
-    else:
-        inputs = np.ones((num_samples, stack_size, input_num_row, input_num_col))
-
-    layer_test(convolutional.ZeroPadding2D,
-               kwargs={'padding': padding, 'data_format': data_format},
-               input_shape=inputs.shape)
-
-
-@pytest.mark.parametrize('data_format',
-                         ['channels_first', 'channels_last'])
-def test_zero_padding_2d_correctness(data_format):
-    num_samples = 2
-    stack_size = 2
-    input_num_row = 4
-    input_num_col = 5
-    inputs = np.ones((num_samples, stack_size, input_num_row, input_num_col))
-
-    layer = convolutional.ZeroPadding2D(padding=(2, 2),
-                                        data_format=data_format)
-    layer.build(inputs.shape)
-    outputs = layer(K.variable(inputs))
-    np_output = K.eval(outputs)
-    if data_format == 'channels_last':
-        for offset in [0, 1, -1, -2]:
-            assert_allclose(np_output[:, offset, :, :], 0.)
-            assert_allclose(np_output[:, :, offset, :], 0.)
-        assert_allclose(np_output[:, 2:-2, 2:-2, :], 1.)
-    elif data_format == 'channels_first':
-        for offset in [0, 1, -1, -2]:
-            assert_allclose(np_output[:, :, offset, :], 0.)
-            assert_allclose(np_output[:, :, :, offset], 0.)
-        assert_allclose(np_output[:, 2:-2, 2:-2, :], 1.)
-
-    layer = convolutional.ZeroPadding2D(padding=((1, 2), (3, 4)),
-                                        data_format=data_format)
-    layer.build(inputs.shape)
-    outputs = layer(K.variable(inputs))
-    np_output = K.eval(outputs)
-    if data_format == 'channels_last':
-        for top_offset in [0]:
-            assert_allclose(np_output[:, top_offset, :, :], 0.)
-        for bottom_offset in [-1, -2]:
-            assert_allclose(np_output[:, bottom_offset, :, :], 0.)
-        for left_offset in [0, 1, 2]:
-            assert_allclose(np_output[:, :, left_offset, :], 0.)
-        for right_offset in [-1, -2, -3, -4]:
-            assert_allclose(np_output[:, :, right_offset, :], 0.)
-        assert_allclose(np_output[:, 1:-2, 3:-4, :], 1.)
-    elif data_format == 'channels_first':
-        for top_offset in [0]:
-            assert_allclose(np_output[:, :, top_offset, :], 0.)
-        for bottom_offset in [-1, -2]:
-            assert_allclose(np_output[:, :, bottom_offset, :], 0.)
-        for left_offset in [0, 1, 2]:
-            assert_allclose(np_output[:, :, :, left_offset], 0.)
-        for right_offset in [-1, -2, -3, -4]:
-            assert_allclose(np_output[:, :, :, right_offset], 0.)
-        assert_allclose(np_output[:, :, 1:-2, 3:-4], 1.)
-
-
-@pytest.mark.parametrize(
-    'data_format,padding',
-    [(data_format, padding)
-     for data_format in ['channels_first', 'channels_last']
-     for padding in [(2, 2, 2), ((1, 2), (3, 4), (0, 2))]]
-)
-def DISABLED_test_zero_padding_3d(data_format, padding):
-    num_samples = 2
-    stack_size = 2
-    input_len_dim1 = 4
-    input_len_dim2 = 5
-    input_len_dim3 = 3
-    inputs = np.ones((num_samples,
-                     input_len_dim1, input_len_dim2, input_len_dim3,
-                     stack_size))
-
-    layer_test(convolutional.ZeroPadding3D,
-               kwargs={'padding': padding, 'data_format': data_format},
-               input_shape=inputs.shape)
-
-
-@pytest.mark.parametrize('data_format',
-                         ['channels_first', 'channels_last'])
-def test_zero_padding_3d_correctness(data_format):
-    num_samples = 2
-    stack_size = 2
-    input_len_dim1 = 4
-    input_len_dim2 = 5
-    input_len_dim3 = 3
-    inputs = np.ones((num_samples,
-                      input_len_dim1, input_len_dim2, input_len_dim3,
-                      stack_size))
-
-    layer = convolutional.ZeroPadding3D(padding=(2, 2, 2),
-                                        data_format=data_format)
-    layer.build(inputs.shape)
-    outputs = layer(K.variable(inputs))
-    np_output = K.eval(outputs)
-    if data_format == 'channels_last':
-        for offset in [0, 1, -1, -2]:
-            assert_allclose(np_output[:, offset, :, :, :], 0.)
-            assert_allclose(np_output[:, :, offset, :, :], 0.)
-            assert_allclose(np_output[:, :, :, offset, :], 0.)
-        assert_allclose(np_output[:, 2:-2, 2:-2, 2:-2, :], 1.)
-    elif data_format == 'channels_first':
-        for offset in [0, 1, -1, -2]:
-            assert_allclose(np_output[:, :, offset, :, :], 0.)
-            assert_allclose(np_output[:, :, :, offset, :], 0.)
-            assert_allclose(np_output[:, :, :, :, offset], 0.)
-        assert_allclose(np_output[:, :, 2:-2, 2:-2, 2:-2], 1.)
-
-    layer = convolutional.ZeroPadding3D(padding=((1, 2), (3, 4), (0, 2)),
-                                        data_format=data_format)
-    layer.build(inputs.shape)
-    outputs = layer(K.variable(inputs))
-    np_output = K.eval(outputs)
-    if data_format == 'channels_last':
-        for dim1_offset in [0, -1, -2]:
-            assert_allclose(np_output[:, dim1_offset, :, :, :], 0.)
-        for dim2_offset in [0, 1, 2, -1, -2, -3, -4]:
-            assert_allclose(np_output[:, :, dim2_offset, :, :], 0.)
-        for dim3_offset in [-1, -2]:
-            assert_allclose(np_output[:, :, :, dim3_offset, :], 0.)
-        assert_allclose(np_output[:, 1:-2, 3:-4, 0:-2, :], 1.)
-    elif data_format == 'channels_first':
-        for dim1_offset in [0, -1, -2]:
-            assert_allclose(np_output[:, :, dim1_offset, :, :], 0.)
-        for dim2_offset in [0, 1, 2, -1, -2, -3, -4]:
-            assert_allclose(np_output[:, :, :, dim2_offset, :], 0.)
-        for dim3_offset in [-1, -2]:
-            assert_allclose(np_output[:, :, :, :, dim3_offset], 0.)
-        assert_allclose(np_output[:, :, 1:-2, 3:-4, 0:-2], 1.)
-
-
-def test_upsampling_1d():
-    layer_test(convolutional.UpSampling1D,
-               kwargs={'size': 2},
-               input_shape=(3, 5, 4))
-
-
-@pytest.mark.parametrize('data_format',
-                         ['channels_first', 'channels_last'])
-def test_upsampling_2d(data_format):
-    num_samples = 2
-    stack_size = 2
-    input_num_row = 11
-    input_num_col = 12
-
-    if data_format == 'channels_first':
-        inputs = np.random.rand(num_samples, stack_size, input_num_row,
-                                input_num_col)
-    else:  # tf
-        inputs = np.random.rand(num_samples, input_num_row, input_num_col,
-                                stack_size)
-
-    # basic test
-    layer_test(convolutional.UpSampling2D,
-               kwargs={'size': (2, 2), 'data_format': data_format},
-               input_shape=inputs.shape)
-
-    for length_row in [2]:
-        for length_col in [2, 3]:
-            layer = convolutional.UpSampling2D(
-                size=(length_row, length_col),
-                data_format=data_format)
-            layer.build(inputs.shape)
-            outputs = layer(K.variable(inputs))
-            np_output = K.eval(outputs)
-            if data_format == 'channels_first':
-                assert np_output.shape[2] == length_row * input_num_row
-                assert np_output.shape[3] == length_col * input_num_col
-            else:  # tf
-                assert np_output.shape[1] == length_row * input_num_row
-                assert np_output.shape[2] == length_col * input_num_col
-
-            # compare with numpy
-            if data_format == 'channels_first':
-                expected_out = np.repeat(inputs, length_row, axis=2)
-                expected_out = np.repeat(expected_out, length_col, axis=3)
-            else:  # tf
-                expected_out = np.repeat(inputs, length_row, axis=1)
-                expected_out = np.repeat(expected_out, length_col, axis=2)
-
-            assert_allclose(np_output, expected_out)
-
-
-@pytest.mark.parametrize('data_format',
-                         ['channels_first', 'channels_last'])
-def test_upsampling_2d_bilinear(data_format):
-    num_samples = 2
-    stack_size = 2
-    input_num_row = 11
-    input_num_col = 12
-
-    if data_format == 'channels_first':
-        inputs = np.random.rand(num_samples, stack_size, input_num_row,
-                                input_num_col)
-    else:  # tf
-        inputs = np.random.rand(num_samples, input_num_row, input_num_col,
-                                stack_size)
-
-    # basic test
-    layer_test(convolutional.UpSampling2D,
-               kwargs={'size': (2, 2),
-                       'data_format': data_format,
-                       'interpolation': 'bilinear'},
-               input_shape=inputs.shape)
-
-    for length_row in [2]:
-        for length_col in [2, 3]:
-            layer = convolutional.UpSampling2D(
-                size=(length_row, length_col),
-                data_format=data_format)
-            layer.build(inputs.shape)
-            outputs = layer(K.variable(inputs))
-            np_output = K.eval(outputs)
-            if data_format == 'channels_first':
-                assert np_output.shape[2] == length_row * input_num_row
-                assert np_output.shape[3] == length_col * input_num_col
-            else:  # tf
-                assert np_output.shape[1] == length_row * input_num_row
-                assert np_output.shape[2] == length_col * input_num_col
-
-
-@pytest.mark.parametrize('data_format',
-                         ['channels_first', 'channels_last'])
-def test_upsampling_3d(data_format):
-    num_samples = 2
-    stack_size = 2
-    input_len_dim1 = 10
-    input_len_dim2 = 11
-    input_len_dim3 = 12
-
-    if data_format == 'channels_first':
-        inputs = np.random.rand(num_samples,
-                                stack_size,
-                                input_len_dim1, input_len_dim2, input_len_dim3)
-    else:  # tf
-        inputs = np.random.rand(num_samples,
-                                input_len_dim1, input_len_dim2, input_len_dim3,
-                                stack_size)
-
-    # basic test
-    layer_test(convolutional.UpSampling3D,
-               kwargs={'size': (2, 2, 2), 'data_format': data_format},
-               input_shape=inputs.shape)
-
-    for length_dim1 in [2, 3]:
-        for length_dim2 in [2]:
-            for length_dim3 in [3]:
-                layer = convolutional.UpSampling3D(
-                    size=(length_dim1, length_dim2, length_dim3),
-                    data_format=data_format)
-                layer.build(inputs.shape)
-                outputs = layer(K.variable(inputs))
-                np_output = K.eval(outputs)
-                if data_format == 'channels_first':
-                    assert np_output.shape[2] == length_dim1 * input_len_dim1
-                    assert np_output.shape[3] == length_dim2 * input_len_dim2
-                    assert np_output.shape[4] == length_dim3 * input_len_dim3
-                else:  # tf
-                    assert np_output.shape[1] == length_dim1 * input_len_dim1
-                    assert np_output.shape[2] == length_dim2 * input_len_dim2
-                    assert np_output.shape[3] == length_dim3 * input_len_dim3
-
-                # compare with numpy
-                if data_format == 'channels_first':
-                    expected_out = np.repeat(inputs, length_dim1, axis=2)
-                    expected_out = np.repeat(expected_out, length_dim2, axis=3)
-                    expected_out = np.repeat(expected_out, length_dim3, axis=4)
-                else:  # tf
-                    expected_out = np.repeat(inputs, length_dim1, axis=1)
-                    expected_out = np.repeat(expected_out, length_dim2, axis=2)
-                    expected_out = np.repeat(expected_out, length_dim3, axis=3)
-
-                assert_allclose(np_output, expected_out)
-
-
-def test_cropping_1d():
-    num_samples = 2
-    time_length = 4
-    input_len_dim1 = 2
-    inputs = np.random.rand(num_samples, time_length, input_len_dim1)
-
-    layer_test(convolutional.Cropping1D,
-               kwargs={'cropping': (2, 2)},
-               input_shape=inputs.shape)
-
-
-def test_cropping_2d():
-    num_samples = 2
-    stack_size = 2
-    input_len_dim1 = 9
-    input_len_dim2 = 9
-    cropping = ((2, 2), (3, 3))
-
-    for data_format in ['channels_first', 'channels_last']:
-        if data_format == 'channels_first':
-            inputs = np.random.rand(num_samples, stack_size,
-                                    input_len_dim1, input_len_dim2)
-        else:
-            inputs = np.random.rand(num_samples,
-                                    input_len_dim1, input_len_dim2,
-                                    stack_size)
-        # basic test
-        layer_test(convolutional.Cropping2D,
-                   kwargs={'cropping': cropping,
-                           'data_format': data_format},
-                   input_shape=inputs.shape)
-        # correctness test
-        layer = convolutional.Cropping2D(cropping=cropping,
-                                         data_format=data_format)
-        layer.build(inputs.shape)
-        outputs = layer(K.variable(inputs))
-        np_output = K.eval(outputs)
-        # compare with numpy
-        if data_format == 'channels_first':
-            expected_out = inputs[:,
-                                  :,
-                                  cropping[0][0]: -cropping[0][1],
-                                  cropping[1][0]: -cropping[1][1]]
-        else:
-            expected_out = inputs[:,
-                                  cropping[0][0]: -cropping[0][1],
-                                  cropping[1][0]: -cropping[1][1],
-                                  :]
-        assert_allclose(np_output, expected_out)
-
-    for data_format in ['channels_first', 'channels_last']:
-        if data_format == 'channels_first':
-            inputs = np.random.rand(num_samples, stack_size,
-                                    input_len_dim1, input_len_dim2)
-        else:
-            inputs = np.random.rand(num_samples,
-                                    input_len_dim1, input_len_dim2,
-                                    stack_size)
-        # another correctness test (no cropping)
-        cropping = ((0, 0), (0, 0))
-        layer = convolutional.Cropping2D(cropping=cropping,
-                                         data_format=data_format)
-        layer.build(inputs.shape)
-        outputs = layer(K.variable(inputs))
-        np_output = K.eval(outputs)
-        # compare with input
-        assert_allclose(np_output, inputs)
-
-    # Test invalid use cases
-    with pytest.raises(ValueError):
-        layer = convolutional.Cropping2D(cropping=((1, 1),))
-    with pytest.raises(ValueError):
-        layer = convolutional.Cropping2D(cropping=lambda x: x)
-
-
-def test_cropping_3d():
-    num_samples = 2
-    stack_size = 2
-    input_len_dim1 = 8
-    input_len_dim2 = 8
-    input_len_dim3 = 8
-    cropping = ((2, 2), (3, 3), (2, 3))
-
-    for data_format in ['channels_last', 'channels_first']:
-        if data_format == 'channels_first':
-            inputs = np.random.rand(num_samples, stack_size,
-                                    input_len_dim1, input_len_dim2, input_len_dim3)
-        else:
-            inputs = np.random.rand(num_samples,
-                                    input_len_dim1, input_len_dim2,
-                                    input_len_dim3, stack_size)
-        # basic test
-        layer_test(convolutional.Cropping3D,
-                   kwargs={'cropping': cropping,
-                           'data_format': data_format},
-                   input_shape=inputs.shape)
-        # correctness test
-        layer = convolutional.Cropping3D(cropping=cropping,
-                                         data_format=data_format)
-        layer.build(inputs.shape)
-        outputs = layer(K.variable(inputs))
-        np_output = K.eval(outputs)
-        # compare with numpy
-        if data_format == 'channels_first':
-            expected_out = inputs[:,
-                                  :,
-                                  cropping[0][0]: -cropping[0][1],
-                                  cropping[1][0]: -cropping[1][1],
-                                  cropping[2][0]: -cropping[2][1]]
-        else:
-            expected_out = inputs[:,
-                                  cropping[0][0]: -cropping[0][1],
-                                  cropping[1][0]: -cropping[1][1],
-                                  cropping[2][0]: -cropping[2][1],
-                                  :]
-        assert_allclose(np_output, expected_out)
-
-    for data_format in ['channels_last', 'channels_first']:
-        if data_format == 'channels_first':
-            inputs = np.random.rand(num_samples, stack_size,
-                                    input_len_dim1, input_len_dim2, input_len_dim3)
-        else:
-            inputs = np.random.rand(num_samples,
-                                    input_len_dim1, input_len_dim2,
-                                    input_len_dim3, stack_size)
-        # another correctness test (no cropping)
-        cropping = ((0, 0), (0, 0), (0, 0))
-        layer = convolutional.Cropping3D(cropping=cropping,
-                                         data_format=data_format)
-        layer.build(inputs.shape)
-        outputs = layer(K.variable(inputs))
-        np_output = K.eval(outputs)
-        # compare with input
-        assert_allclose(np_output, inputs)
-
-    # Test invalid use cases
-    with pytest.raises(ValueError):
-        layer = convolutional.Cropping3D(cropping=((1, 1),))
-    with pytest.raises(ValueError):
-        layer = convolutional.Cropping3D(cropping=lambda x: x)
-
-
-@pytest.mark.parametrize(
-    'input_shape,conv_class',
-    [((2, 4, 2), convolutional.Conv1D),
-     ((2, 4, 4, 2), convolutional.Conv2D),
-     ((2, 4, 4, 4, 2), convolutional.Conv3D)]
-)
-def test_conv_float64(input_shape, conv_class):
-    kernel_size = 3
-    strides = 1
-    filters = 3
-    K.set_floatx('float64')
-    layer_test(conv_class,
-               kwargs={'filters': filters,
-                       'kernel_size': kernel_size,
-                       'padding': 'valid',
-                       'strides': strides},
-               input_shape=input_shape)
-    K.set_floatx('float32')
-
-
-if __name__ == '__main__':
-    pytest.main([__file__])
diff --git a/tests/keras/layers/core_test.py b/tests/keras/layers/core_test.py
deleted file mode 100644
index 146c47f32c1..00000000000
--- a/tests/keras/layers/core_test.py
+++ /dev/null
@@ -1,353 +0,0 @@
-import pytest
-import numpy as np
-from numpy.testing import assert_allclose
-
-from keras import backend as K
-from keras import layers
-from keras.models import Model
-from keras.models import Sequential
-from keras.utils.test_utils import layer_test
-from keras import regularizers
-from keras import constraints
-from keras.layers import deserialize as deserialize_layer
-
-
-def test_masking():
-    layer_test(layers.Masking,
-               kwargs={},
-               input_shape=(3, 2, 3))
-
-
-def test_dropout():
-    layer_test(layers.Dropout,
-               kwargs={'rate': 0.5},
-               input_shape=(3, 2))
-
-    layer_test(layers.Dropout,
-               kwargs={'rate': 0.5, 'noise_shape': [3, 1]},
-               input_shape=(3, 2))
-
-    layer_test(layers.Dropout,
-               kwargs={'rate': 0.5, 'noise_shape': [None, 1]},
-               input_shape=(3, 2))
-
-    layer_test(layers.SpatialDropout1D,
-               kwargs={'rate': 0.5},
-               input_shape=(2, 3, 4))
-
-    for data_format in ['channels_last', 'channels_first']:
-        for shape in [(4, 5), (4, 5, 6)]:
-            if data_format == 'channels_last':
-                input_shape = (2,) + shape + (3,)
-            else:
-                input_shape = (2, 3) + shape
-            if len(shape) == 2:
-                layer = layers.SpatialDropout2D
-            else:
-                layer = layers.SpatialDropout3D
-            layer_test(layer,
-                       kwargs={'rate': 0.5,
-                               'data_format': data_format},
-                       input_shape=input_shape)
-
-            # Test invalid use cases
-            with pytest.raises(ValueError):
-                layer_test(layer,
-                           kwargs={'rate': 0.5,
-                                   'data_format': 'channels_middle'},
-                           input_shape=input_shape)
-
-
-def test_activation():
-    # with string argument
-    layer_test(layers.Activation,
-               kwargs={'activation': 'relu'},
-               input_shape=(3, 2))
-
-    # with function argument
-    layer_test(layers.Activation,
-               kwargs={'activation': K.relu},
-               input_shape=(3, 2))
-
-
-@pytest.mark.parametrize('target_shape,input_shape',
-                         [((8, 1), (3, 2, 4)),
-                          ((-1, 1), (3, 2, 4)),
-                          ((1, -1), (3, 2, 4)),
-                          ((-1, 1), (None, None, 4))])
-def test_reshape(target_shape, input_shape):
-    layer_test(layers.Reshape,
-               kwargs={'target_shape': target_shape},
-               input_shape=input_shape)
-
-
-def test_permute():
-    layer_test(layers.Permute,
-               kwargs={'dims': (2, 1)},
-               input_shape=(3, 2, 4))
-
-
-def test_flatten():
-
-    def test_4d():
-        np_inp_channels_last = np.arange(24, dtype='float32').reshape(
-                                        (1, 4, 3, 2))
-
-        np_output_cl = layer_test(layers.Flatten,
-                                  kwargs={'data_format':
-                                          'channels_last'},
-                                  input_data=np_inp_channels_last)
-
-        np_inp_channels_first = np.transpose(np_inp_channels_last,
-                                             [0, 3, 1, 2])
-
-        np_output_cf = layer_test(layers.Flatten,
-                                  kwargs={'data_format':
-                                          'channels_first'},
-                                  input_data=np_inp_channels_first,
-                                  expected_output=np_output_cl)
-
-    def test_3d():
-        np_inp_channels_last = np.arange(12, dtype='float32').reshape(
-            (1, 4, 3))
-
-        np_output_cl = layer_test(layers.Flatten,
-                                  kwargs={'data_format':
-                                          'channels_last'},
-                                  input_data=np_inp_channels_last)
-
-        np_inp_channels_first = np.transpose(np_inp_channels_last,
-                                             [0, 2, 1])
-
-        np_output_cf = layer_test(layers.Flatten,
-                                  kwargs={'data_format':
-                                          'channels_first'},
-                                  input_data=np_inp_channels_first,
-                                  expected_output=np_output_cl)
-
-    def test_5d():
-        np_inp_channels_last = np.arange(120, dtype='float32').reshape(
-            (1, 5, 4, 3, 2))
-
-        np_output_cl = layer_test(layers.Flatten,
-                                  kwargs={'data_format':
-                                          'channels_last'},
-                                  input_data=np_inp_channels_last)
-
-        np_inp_channels_first = np.transpose(np_inp_channels_last,
-                                             [0, 4, 1, 2, 3])
-
-        np_output_cf = layer_test(layers.Flatten,
-                                  kwargs={'data_format':
-                                          'channels_first'},
-                                  input_data=np_inp_channels_first,
-                                  expected_output=np_output_cl)
-    test_3d()
-    test_4d()
-    test_5d()
-
-
-def test_repeat_vector():
-    layer_test(layers.RepeatVector,
-               kwargs={'n': 3},
-               input_shape=(3, 2))
-
-
-def test_lambda():
-    layer_test(layers.Lambda,
-               kwargs={'function': lambda x: x + 1},
-               input_shape=(3, 2))
-
-    layer_test(layers.Lambda,
-               kwargs={'function': lambda x, a, b: x * a + b,
-                       'arguments': {'a': 0.6, 'b': 0.4}},
-               input_shape=(3, 2))
-
-    def antirectifier(x):
-        x -= K.mean(x, axis=1, keepdims=True)
-        x = K.l2_normalize(x, axis=1)
-        pos = K.relu(x)
-        neg = K.relu(-x)
-        return K.concatenate([pos, neg], axis=1)
-
-    def antirectifier_output_shape(input_shape):
-        shape = list(input_shape)
-        assert len(shape) == 2  # only valid for 2D tensors
-        shape[-1] *= 2
-        return tuple(shape)
-
-    layer_test(layers.Lambda,
-               kwargs={'function': antirectifier,
-                       'output_shape': antirectifier_output_shape},
-               input_shape=(3, 2))
-
-    # test layer with multiple outputs
-    def test_multiple_outputs():
-        def func(x):
-            return [x * 0.2, x * 0.3]
-
-        def output_shape(input_shape):
-            return [input_shape, input_shape]
-
-        def mask(inputs, mask=None):
-            return [None, None]
-
-        i = layers.Input(shape=(3, 2, 1))
-        o = layers.Lambda(function=func,
-                          output_shape=output_shape,
-                          mask=mask)(i)
-
-        o1, o2 = o
-
-        model = Model(i, o)
-
-        x = np.random.random((4, 3, 2, 1))
-        out1, out2 = model.predict(x)
-        assert out1.shape == (4, 3, 2, 1)
-        assert out2.shape == (4, 3, 2, 1)
-        assert_allclose(out1, x * 0.2, atol=1e-4)
-        assert_allclose(out2, x * 0.3, atol=1e-4)
-
-    test_multiple_outputs()
-
-    # test layer with multiple outputs and no
-    # explicit mask
-    def test_multiple_outputs_no_mask():
-        def func(x):
-            return [x * 0.2, x * 0.3]
-
-        def output_shape(input_shape):
-            return [input_shape, input_shape]
-
-        i = layers.Input(shape=(3, 2, 1))
-        o = layers.Lambda(function=func,
-                          output_shape=output_shape)(i)
-
-        o = layers.add(o)
-        model = Model(i, o)
-
-        i2 = layers.Input(shape=(3, 2, 1))
-        o2 = model(i2)
-        model2 = Model(i2, o2)
-
-        x = np.random.random((4, 3, 2, 1))
-        out = model2.predict(x)
-        assert out.shape == (4, 3, 2, 1)
-        assert_allclose(out, x * 0.2 + x * 0.3, atol=1e-4)
-
-    test_multiple_outputs_no_mask()
-
-    # test serialization with function
-    def f(x):
-        return x + 1
-
-    ld = layers.Lambda(f)
-    config = ld.get_config()
-    ld = deserialize_layer({'class_name': 'Lambda', 'config': config})
-
-    # test with lambda
-    ld = layers.Lambda(
-        lambda x: K.concatenate([K.square(x), x]),
-        output_shape=lambda s: tuple(list(s)[:-1] + [2 * s[-1]]))
-    config = ld.get_config()
-    ld = layers.Lambda.from_config(config)
-
-    # test serialization with output_shape function
-    def f(x):
-        return K.concatenate([K.square(x), x])
-
-    def f_shape(s):
-        return tuple(list(s)[:-1] + [2 * s[-1]])
-
-    ld = layers.Lambda(f, output_shape=f_shape)
-    config = ld.get_config()
-    ld = deserialize_layer({'class_name': 'Lambda', 'config': config})
-
-
-@pytest.mark.skipif((K.backend() == 'theano'),
-                    reason="theano cannot compute "
-                           "the output shape automatically.")
-def test_lambda_output_shape():
-    layer_test(layers.Lambda,
-               kwargs={'function': lambda x: K.mean(x, axis=-1)},
-               input_shape=(3, 2, 4))
-
-
-def test_dense():
-    layer_test(layers.Dense,
-               kwargs={'units': 3},
-               input_shape=(3, 2))
-
-    layer_test(layers.Dense,
-               kwargs={'units': 3},
-               input_shape=(3, 4, 2))
-
-    layer_test(layers.Dense,
-               kwargs={'units': 3},
-               input_shape=(None, None, 2))
-
-    layer_test(layers.Dense,
-               kwargs={'units': 3},
-               input_shape=(3, 4, 5, 2))
-
-    layer_test(layers.Dense,
-               kwargs={'units': 3,
-                       'kernel_regularizer': regularizers.l2(0.01),
-                       'bias_regularizer': regularizers.l1(0.01),
-                       'activity_regularizer': regularizers.L1L2(l1=0.01, l2=0.01),
-                       'kernel_constraint': constraints.MaxNorm(1),
-                       'bias_constraint': constraints.max_norm(1)},
-               input_shape=(3, 2))
-
-    layer = layers.Dense(3,
-                         kernel_regularizer=regularizers.l1(0.01),
-                         bias_regularizer='l1')
-    layer.build((None, 4))
-    assert len(layer.losses) == 2
-
-
-def test_activity_regularization():
-    layer = layers.ActivityRegularization(l1=0.01, l2=0.01)
-
-    # test in functional API
-    x = layers.Input(shape=(3,))
-    z = layers.Dense(2)(x)
-    y = layer(z)
-    model = Model(x, y)
-    model.compile('rmsprop', 'mse')
-
-    model.predict(np.random.random((2, 3)))
-
-    # test serialization
-    model_config = model.get_config()
-    model = Model.from_config(model_config)
-    model.compile('rmsprop', 'mse')
-
-
-def DISABLED_test_sequential_as_downstream_of_masking_layer():
-
-    inputs = layers.Input(shape=(3, 4))
-    x = layers.Masking(mask_value=0., input_shape=(3, 4))(inputs)
-    s = Sequential()
-    s.add(layers.Dense(5, input_shape=(4,)))
-    s.add(layers.Activation('relu'))
-    x = layers.TimeDistributed(s)(x)
-    model = Model(inputs=inputs, outputs=x)
-    model.compile(optimizer='rmsprop', loss='mse')
-    model_input = np.random.randint(low=1, high=5, size=(10, 3, 4))
-    for i in range(4):
-        model_input[i, i:, :] = 0.
-    model.fit(model_input,
-              np.random.random((10, 3, 5)), epochs=1, batch_size=6)
-
-    mask_outputs = [model.layers[1].compute_mask(model.layers[1].input)]
-    mask_outputs += [model.layers[2].compute_mask(model.layers[2].input,
-                                                  mask_outputs[-1])]
-    func = K.function([inputs], mask_outputs)
-    mask_outputs_val = func([model_input])
-    assert np.array_equal(mask_outputs_val[0], np.any(model_input, axis=-1))
-    assert np.array_equal(mask_outputs_val[1], np.any(model_input, axis=-1))
-
-
-if __name__ == '__main__':
-    pytest.main([__file__])
diff --git a/tests/keras/layers/embeddings_test.py b/tests/keras/layers/embeddings_test.py
deleted file mode 100644
index b7b2a367a97..00000000000
--- a/tests/keras/layers/embeddings_test.py
+++ /dev/null
@@ -1,47 +0,0 @@
-import pytest
-from keras.utils.test_utils import layer_test
-from keras.layers.embeddings import Embedding
-from keras.models import Sequential
-import keras.backend as K
-
-
-def test_embedding():
-    layer_test(Embedding,
-               kwargs={'output_dim': 4, 'input_dim': 10, 'input_length': 2},
-               input_shape=(3, 2),
-               input_dtype='int32',
-               expected_output_dtype=K.floatx())
-    layer_test(Embedding,
-               kwargs={'output_dim': 4, 'input_dim': 10, 'mask_zero': True},
-               input_shape=(3, 2),
-               input_dtype='int32',
-               expected_output_dtype=K.floatx())
-    layer_test(Embedding,
-               kwargs={'output_dim': 4, 'input_dim': 10, 'mask_zero': True},
-               input_shape=(3, 2, 5),
-               input_dtype='int32',
-               expected_output_dtype=K.floatx())
-    layer_test(Embedding,
-               kwargs={'output_dim': 4, 'input_dim': 10, 'mask_zero': True,
-                       'input_length': (None, 5)},
-               input_shape=(3, 2, 5),
-               input_dtype='int32',
-               expected_output_dtype=K.floatx())
-
-
-@pytest.mark.parametrize('input_shape',
-                         [(3, 4, 5),
-                          (3, 5)])
-def DISABLED_test_embedding_invalid(input_shape):
-
-    # len(input_length) should be equal to len(input_shape) - 1
-    with pytest.raises(ValueError):
-        model = Sequential([Embedding(
-            input_dim=10,
-            output_dim=4,
-            input_length=2,
-            input_shape=input_shape)])
-
-
-if __name__ == '__main__':
-    pytest.main([__file__])
diff --git a/tests/keras/layers/local_test.py b/tests/keras/layers/local_test.py
deleted file mode 100644
index 4f5f7127901..00000000000
--- a/tests/keras/layers/local_test.py
+++ /dev/null
@@ -1,60 +0,0 @@
-import pytest
-
-from keras.utils.test_utils import layer_test
-from keras.layers import local
-
-
-def test_locallyconnected_1d():
-    num_samples = 2
-    num_steps = 8
-    input_dim = 5
-    filter_length = 3
-    filters = 4
-    padding = 'valid'
-    strides = 1
-
-    layer_test(local.LocallyConnected1D,
-               kwargs={'filters': filters,
-                       'kernel_size': filter_length,
-                       'padding': padding,
-                       'kernel_regularizer': 'l2',
-                       'bias_regularizer': 'l2',
-                       'activity_regularizer': 'l2',
-                       'strides': strides},
-               input_shape=(num_samples, num_steps, input_dim))
-
-
-def test_locallyconnected_2d():
-    num_samples = 5
-    filters = 3
-    stack_size = 4
-    num_row = 6
-    num_col = 8
-    padding = 'valid'
-
-    for strides in [(1, 1), (2, 2)]:
-        layer_test(local.LocallyConnected2D,
-                   kwargs={'filters': filters,
-                           'kernel_size': 3,
-                           'padding': padding,
-                           'kernel_regularizer': 'l2',
-                           'bias_regularizer': 'l2',
-                           'activity_regularizer': 'l2',
-                           'strides': strides,
-                           'data_format': 'channels_last'},
-                   input_shape=(num_samples, num_row, num_col, stack_size))
-
-        layer_test(local.LocallyConnected2D,
-                   kwargs={'filters': filters,
-                           'kernel_size': (3, 3),
-                           'padding': padding,
-                           'kernel_regularizer': 'l2',
-                           'bias_regularizer': 'l2',
-                           'activity_regularizer': 'l2',
-                           'strides': strides,
-                           'data_format': 'channels_first'},
-                   input_shape=(num_samples, stack_size, num_row, num_col))
-
-
-if __name__ == '__main__':
-    pytest.main([__file__])
diff --git a/tests/keras/layers/merge_test.py b/tests/keras/layers/merge_test.py
deleted file mode 100644
index bc2193cebf9..00000000000
--- a/tests/keras/layers/merge_test.py
+++ /dev/null
@@ -1,293 +0,0 @@
-import pytest
-import numpy as np
-from numpy.testing import assert_allclose
-from keras import layers
-from keras import models
-from keras import backend as K
-from keras.utils.test_utils import layer_test
-from keras.layers import merge
-
-
-def test_merge_add():
-    i1 = layers.Input(shape=(4, 5))
-    i2 = layers.Input(shape=(4, 5))
-    i3 = layers.Input(shape=(4, 5))
-    o = layers.add([i1, i2, i3])
-    assert K.int_shape(o) == (None, 4, 5)
-    model = models.Model([i1, i2, i3], o)
-
-    add_layer = layers.Add()
-    o2 = add_layer([i1, i2, i3])
-    assert add_layer.output_shape == (None, 4, 5)
-
-    x1 = np.random.random((2, 4, 5))
-    x2 = np.random.random((2, 4, 5))
-    x3 = np.random.random((2, 4, 5))
-    out = model.predict([x1, x2, x3])
-    assert out.shape == (2, 4, 5)
-    assert_allclose(out, x1 + x2 + x3, atol=1e-4)
-
-    assert add_layer.compute_mask([i1, i2, i3], [None, None, None]) is None
-    assert np.all(K.eval(add_layer.compute_mask(
-        [i1, i2, i3], [K.variable(x1), K.variable(x2), K.variable(x3)])))
-
-    # Test invalid use case
-    with pytest.raises(ValueError):
-        add_layer.compute_mask([i1, i2, i3], x1)
-    with pytest.raises(ValueError):
-        add_layer.compute_mask(i1, [None, None, None])
-    with pytest.raises(ValueError):
-        add_layer.compute_mask([i1, i2, i3], [None, None])
-
-
-def test_merge_subtract():
-    i1 = layers.Input(shape=(4, 5))
-    i2 = layers.Input(shape=(4, 5))
-    i3 = layers.Input(shape=(4, 5))
-    i4 = layers.Input(shape=(3, 5))
-    o = layers.subtract([i1, i2])
-    assert K.int_shape(o) == (None, 4, 5)
-    model = models.Model([i1, i2], o)
-
-    subtract_layer = layers.Subtract()
-    o2 = subtract_layer([i1, i2])
-    assert subtract_layer.output_shape == (None, 4, 5)
-
-    x1 = np.random.random((2, 4, 5))
-    x2 = np.random.random((2, 4, 5))
-    out = model.predict([x1, x2])
-    assert out.shape == (2, 4, 5)
-    assert_allclose(out, x1 - x2, atol=1e-4)
-
-    assert subtract_layer.compute_mask([i1, i2], [None, None]) is None
-    assert np.all(K.eval(subtract_layer.compute_mask(
-        [i1, i2], [K.variable(x1), K.variable(x2)])))
-
-    # Test invalid use case
-    with pytest.raises(ValueError):
-        subtract_layer.compute_mask([i1, i2], x1)
-    with pytest.raises(ValueError):
-        subtract_layer.compute_mask(i1, [None, None])
-    with pytest.raises(ValueError):
-        subtract_layer([i1, i2, i3])
-    with pytest.raises(ValueError):
-        subtract_layer([i1])
-
-
-def test_merge_multiply():
-    i1 = layers.Input(shape=(4, 5))
-    i2 = layers.Input(shape=(4, 5))
-    i3 = layers.Input(shape=(4, 5))
-    o = layers.multiply([i1, i2, i3])
-    assert K.int_shape(o) == (None, 4, 5)
-    model = models.Model([i1, i2, i3], o)
-
-    mul_layer = layers.Multiply()
-    o2 = mul_layer([i1, i2, i3])
-    assert mul_layer.output_shape == (None, 4, 5)
-
-    x1 = np.random.random((2, 4, 5))
-    x2 = np.random.random((2, 4, 5))
-    x3 = np.random.random((2, 4, 5))
-    out = model.predict([x1, x2, x3])
-    assert out.shape == (2, 4, 5)
-    assert_allclose(out, x1 * x2 * x3, atol=1e-4)
-
-
-def test_merge_average():
-    i1 = layers.Input(shape=(4, 5))
-    i2 = layers.Input(shape=(4, 5))
-    o = layers.average([i1, i2])
-    assert K.int_shape(o) == (None, 4, 5)
-    model = models.Model([i1, i2], o)
-
-    avg_layer = layers.Average()
-    o2 = avg_layer([i1, i2])
-    assert avg_layer.output_shape == (None, 4, 5)
-
-    x1 = np.random.random((2, 4, 5))
-    x2 = np.random.random((2, 4, 5))
-    out = model.predict([x1, x2])
-    assert out.shape == (2, 4, 5)
-    assert_allclose(out, 0.5 * (x1 + x2), atol=1e-4)
-
-
-def test_merge_maximum():
-    i1 = layers.Input(shape=(4, 5))
-    i2 = layers.Input(shape=(4, 5))
-    o = layers.maximum([i1, i2])
-    assert K.int_shape(o) == (None, 4, 5)
-    model = models.Model([i1, i2], o)
-
-    max_layer = layers.Maximum()
-    o2 = max_layer([i1, i2])
-    assert max_layer.output_shape == (None, 4, 5)
-
-    x1 = np.random.random((2, 4, 5))
-    x2 = np.random.random((2, 4, 5))
-    out = model.predict([x1, x2])
-    assert out.shape == (2, 4, 5)
-    assert_allclose(out, np.maximum(x1, x2), atol=1e-4)
-
-
-def test_merge_minimum():
-    i1 = layers.Input(shape=(4, 5))
-    i2 = layers.Input(shape=(4, 5))
-    o = layers.minimum([i1, i2])
-    assert K.int_shape(o) == (None, 4, 5)
-    model = models.Model([i1, i2], o)
-
-    max_layer = layers.Minimum()
-    o2 = max_layer([i1, i2])
-    assert max_layer.output_shape == (None, 4, 5)
-
-    x1 = np.random.random((2, 4, 5))
-    x2 = np.random.random((2, 4, 5))
-    out = model.predict([x1, x2])
-    assert out.shape == (2, 4, 5)
-    assert_allclose(out, np.minimum(x1, x2), atol=1e-4)
-
-
-def test_merge_concatenate():
-    i1 = layers.Input(shape=(None, 5))
-    i2 = layers.Input(shape=(None, 5))
-    o = layers.concatenate([i1, i2], axis=1)
-    assert K.int_shape(o) == (None, None, 5)
-    model = models.Model([i1, i2], o)
-
-    i1 = layers.Input(shape=(4, 5))
-    i2 = layers.Input(shape=(4, 5))
-    o = layers.concatenate([i1, i2], axis=1)
-    assert K.int_shape(o) == (None, 8, 5)
-    model = models.Model([i1, i2], o)
-
-    concat_layer = layers.Concatenate(axis=1)
-    o2 = concat_layer([i1, i2])
-    assert concat_layer.output_shape == (None, 8, 5)
-
-    x1 = np.random.random((2, 4, 5))
-    x2 = np.random.random((2, 4, 5))
-    out = model.predict([x1, x2])
-    assert out.shape == (2, 8, 5)
-    assert_allclose(out, np.concatenate([x1, x2], axis=1), atol=1e-4)
-
-    x3 = np.random.random((1, 1, 1))
-    nb_layers = 4
-    x_i = layers.Input(shape=(None, None))
-    x_list = [x_i]
-    x = x_i
-    for i in range(nb_layers):
-        x_list.append(x)
-        x = layers.concatenate(x_list, axis=1)
-    concat_model = models.Model(x_i, x)
-    concat_out = concat_model.predict([x3])
-    x3 = np.repeat(x3, 16, axis=1)
-    assert concat_out.shape == (1, 16, 1)
-    assert_allclose(concat_out, x3)
-
-    assert concat_layer.compute_mask([i1, i2], [None, None]) is None
-    assert np.all(K.eval(concat_layer.compute_mask(
-        [i1, i2], [K.variable(x1), K.variable(x2)])).reshape(-1))
-
-    # Test invalid use case
-    concat_layer = layers.Concatenate(axis=1)
-    with pytest.raises(ValueError):
-        concat_layer.compute_mask([i1, i2], x1)
-    with pytest.raises(ValueError):
-        concat_layer.compute_mask(i1, [None, None])
-    with pytest.raises(ValueError):
-        concat_layer.compute_mask([i1, i2], [None])
-    with pytest.raises(ValueError):
-        concat_layer([i1])
-
-
-def test_merge_dot():
-    i1 = layers.Input(shape=(4,))
-    i2 = layers.Input(shape=(4,))
-    o = layers.dot([i1, i2], axes=1)
-    assert K.int_shape(o) == (None, 1)
-    model = models.Model([i1, i2], o)
-
-    dot_layer = layers.Dot(axes=1)
-    o2 = dot_layer([i1, i2])
-    assert dot_layer.output_shape == (None, 1)
-
-    x1 = np.random.random((2, 4))
-    x2 = np.random.random((2, 4))
-    out = model.predict([x1, x2])
-    assert out.shape == (2, 1)
-    expected = np.zeros((2, 1))
-    expected[0, 0] = np.dot(x1[0], x2[0])
-    expected[1, 0] = np.dot(x1[1], x2[1])
-    assert_allclose(out, expected, atol=1e-4)
-
-    # Test with negative tuple of axes.
-    o = layers.dot([i1, i2], axes=(-1, -1))
-    assert K.int_shape(o) == (None, 1)
-    model = models.Model([i1, i2], o)
-    out = model.predict([x1, x2])
-    assert out.shape == (2, 1)
-    assert_allclose(out, expected, atol=1e-4)
-
-
-def test_merge_broadcast():
-    # shapes provided
-    i1 = layers.Input(shape=(4, 5))
-    i2 = layers.Input(shape=(5,))
-    ops = [layers.add, layers.maximum]
-    for op in ops:
-        o = op([i1, i2])
-        assert K.int_shape(o) == (None, 4, 5)
-        model = models.Model([i1, i2], o)
-
-        x1 = np.random.random((2, 4, 5))
-        x2 = np.random.random((2, 5))
-        out = model.predict([x1, x2])
-        assert out.shape == (2, 4, 5)
-
-    # shapes not provided
-    i1 = layers.Input(shape=(None, None))
-    i2 = layers.Input(shape=(None,))
-    ops = [layers.add, layers.maximum]
-    for op in ops:
-        o = op([i1, i2])
-        assert K.int_shape(o) == (None, None, None)
-        model = models.Model([i1, i2], o)
-
-        x1 = np.random.random((2, 4, 5))
-        x2 = np.random.random((2, 5))
-        out = model.predict([x1, x2])
-        assert out.shape == (2, 4, 5)
-
-    # ndim not provided
-    if K.backend() == 'tensorflow':
-        k_ndim = K.ndim
-        K.ndim = lambda _: None
-
-        i1 = layers.Input(shape=(None, None))
-        i2 = layers.Input(shape=(None,))
-        ops = [layers.add, layers.maximum]
-        for op in ops:
-            o = op([i1, i2])
-            assert K.int_shape(o) == (None, None, None)
-            model = models.Model([i1, i2], o)
-
-            x1 = np.random.random((2, 4, 5))
-            x2 = np.random.random((2, 5))
-            out = model.predict([x1, x2])
-            assert out.shape == (2, 4, 5)
-        K.ndim = k_ndim
-
-
-def test_masking_concatenate():
-    input1 = layers.Input(shape=(6,))
-    input2 = layers.Input(shape=(6,))
-    x1 = layers.Embedding(10, 5, input_length=6, mask_zero=True)(input1)
-    x2 = layers.Embedding(10, 5, input_length=6, mask_zero=True)(input2)
-    x = layers.concatenate([x1, x2])
-    x = layers.TimeDistributed(layers.Dense(3, activation='softmax'))(x)
-    models.Model(inputs=[input1, input2], outputs=[x])
-
-
-if __name__ == '__main__':
-    pytest.main([__file__])
diff --git a/tests/keras/layers/noise_test.py b/tests/keras/layers/noise_test.py
deleted file mode 100644
index 7a666addf85..00000000000
--- a/tests/keras/layers/noise_test.py
+++ /dev/null
@@ -1,32 +0,0 @@
-import pytest
-from keras.utils.test_utils import layer_test
-from keras.layers import noise
-from keras import backend as K
-
-
-@pytest.mark.skipif((K.backend() == 'cntk'),
-                    reason="cntk does not support it yet")
-def test_GaussianNoise():
-    layer_test(noise.GaussianNoise,
-               kwargs={'stddev': 1.},
-               input_shape=(3, 2, 3))
-
-
-@pytest.mark.skipif((K.backend() == 'cntk'),
-                    reason="cntk does not support it yet")
-def test_GaussianDropout():
-    layer_test(noise.GaussianDropout,
-               kwargs={'rate': 0.5},
-               input_shape=(3, 2, 3))
-
-
-@pytest.mark.skipif((K.backend() == 'cntk'),
-                    reason="cntk does not support it yet")
-def test_AlphaDropout():
-    layer_test(noise.AlphaDropout,
-               kwargs={'rate': 0.1},
-               input_shape=(3, 2, 3))
-
-
-if __name__ == '__main__':
-    pytest.main([__file__])
diff --git a/tests/keras/layers/normalization_test.py b/tests/keras/layers/normalization_test.py
deleted file mode 100644
index 0c0c104c655..00000000000
--- a/tests/keras/layers/normalization_test.py
+++ /dev/null
@@ -1,241 +0,0 @@
-import pytest
-import numpy as np
-from numpy.testing import assert_allclose
-
-from keras.layers import Input
-from keras import regularizers
-from keras.utils.test_utils import layer_test
-from keras.layers import normalization
-from keras.models import Sequential, Model
-from keras import backend as K
-
-input_1 = np.arange(10)
-input_2 = np.zeros(10)
-input_3 = np.ones((10))
-input_4 = np.expand_dims(np.arange(10.), axis=1)
-input_shapes = [np.ones((10, 10)), np.ones((10, 10, 10))]
-
-
-def test_basic_batchnorm():
-    layer_test(normalization.BatchNormalization,
-               kwargs={'momentum': 0.9,
-                       'epsilon': 0.1,
-                       'gamma_regularizer': regularizers.l2(0.01),
-                       'beta_regularizer': regularizers.l2(0.01)},
-               input_shape=(3, 4, 2))
-    layer_test(normalization.BatchNormalization,
-               kwargs={'momentum': 0.9,
-                       'epsilon': 0.1,
-                       'axis': 1},
-               input_shape=(1, 4, 1))
-    layer_test(normalization.BatchNormalization,
-               kwargs={'gamma_initializer': 'ones',
-                       'beta_initializer': 'ones',
-                       'moving_mean_initializer': 'zeros',
-                       'moving_variance_initializer': 'ones'},
-               input_shape=(3, 4, 2, 4))
-    if K.backend() != 'theano':
-        layer_test(normalization.BatchNormalization,
-                   kwargs={'momentum': 0.9,
-                           'epsilon': 0.1,
-                           'axis': 1,
-                           'scale': False,
-                           'center': False},
-                   input_shape=(3, 4, 2, 4))
-
-
-def test_batchnorm_correctness_1d():
-    np.random.seed(1337)
-    model = Sequential()
-    norm = normalization.BatchNormalization(input_shape=(10,), momentum=0.8)
-    model.add(norm)
-    model.compile(loss='mse', optimizer='rmsprop')
-
-    # centered on 5.0, variance 10.0
-    x = np.random.normal(loc=5.0, scale=10.0, size=(1000, 10))
-    model.fit(x, x, epochs=5, verbose=0)
-    out = model.predict(x)
-    out -= K.eval(norm.beta)
-    out /= K.eval(norm.gamma)
-
-    assert_allclose(out.mean(), 0.0, atol=1e-1)
-    assert_allclose(out.std(), 1.0, atol=1e-1)
-
-
-def test_batchnorm_correctness_2d():
-    np.random.seed(1337)
-    model = Sequential()
-    norm = normalization.BatchNormalization(axis=1, input_shape=(10, 6),
-                                            momentum=0.8)
-    model.add(norm)
-    model.compile(loss='mse', optimizer='rmsprop')
-
-    # centered on 5.0, variance 10.0
-    x = np.random.normal(loc=5.0, scale=10.0, size=(1000, 10, 6))
-    model.fit(x, x, epochs=5, verbose=0)
-    out = model.predict(x)
-    out -= np.reshape(K.eval(norm.beta), (1, 10, 1))
-    out /= np.reshape(K.eval(norm.gamma), (1, 10, 1))
-
-    assert_allclose(out.mean(axis=(0, 2)), 0.0, atol=1.1e-1)
-    assert_allclose(out.std(axis=(0, 2)), 1.0, atol=1.1e-1)
-
-
-def test_batchnorm_training_argument():
-    np.random.seed(1337)
-    bn1 = normalization.BatchNormalization(input_shape=(10,))
-    x1 = Input(shape=(10,))
-    y1 = bn1(x1, training=True)
-    assert bn1.updates
-
-    model1 = Model(x1, y1)
-    x = np.random.normal(loc=5.0, scale=10.0, size=(20, 10))
-    output_a = model1.predict(x)
-
-    model1.compile(loss='mse', optimizer='rmsprop')
-    model1.fit(x, x, epochs=1, verbose=0)
-    output_b = model1.predict(x)
-    assert np.abs(np.sum(output_a - output_b)) > 0.1
-    assert_allclose(output_b.mean(), 0.0, atol=1e-1)
-    assert_allclose(output_b.std(), 1.0, atol=1e-1)
-
-    bn2 = normalization.BatchNormalization(input_shape=(10,))
-    x2 = Input(shape=(10,))
-    bn2(x2, training=False)
-    assert not bn2.updates
-
-
-def test_batchnorm_mode_twice():
-    # This is a regression test for issue #4881 with the old
-    # batch normalization functions in the Theano backend.
-    model = Sequential()
-    model.add(normalization.BatchNormalization(input_shape=(10, 5, 5), axis=1))
-    model.add(normalization.BatchNormalization(input_shape=(10, 5, 5), axis=1))
-    model.compile(loss='mse', optimizer='sgd')
-
-    x = np.random.normal(loc=5.0, scale=10.0, size=(20, 10, 5, 5))
-    model.fit(x, x, epochs=1, verbose=0)
-    model.predict(x)
-
-
-def test_batchnorm_convnet():
-    np.random.seed(1337)
-    model = Sequential()
-    norm = normalization.BatchNormalization(axis=1, input_shape=(3, 4, 4),
-                                            momentum=0.8)
-    model.add(norm)
-    model.compile(loss='mse', optimizer='sgd')
-
-    # centered on 5.0, variance 10.0
-    x = np.random.normal(loc=5.0, scale=10.0, size=(1000, 3, 4, 4))
-    model.fit(x, x, epochs=4, verbose=0)
-    out = model.predict(x)
-    out -= np.reshape(K.eval(norm.beta), (1, 3, 1, 1))
-    out /= np.reshape(K.eval(norm.gamma), (1, 3, 1, 1))
-
-    assert_allclose(np.mean(out, axis=(0, 2, 3)), 0.0, atol=1e-1)
-    assert_allclose(np.std(out, axis=(0, 2, 3)), 1.0, atol=1e-1)
-
-
-@pytest.mark.skipif((K.backend() == 'theano'),
-                    reason='Bug with theano backend')
-def test_batchnorm_convnet_no_center_no_scale():
-    np.random.seed(1337)
-    model = Sequential()
-    norm = normalization.BatchNormalization(axis=-1, center=False, scale=False,
-                                            input_shape=(3, 4, 4), momentum=0.8)
-    model.add(norm)
-    model.compile(loss='mse', optimizer='sgd')
-
-    # centered on 5.0, variance 10.0
-    x = np.random.normal(loc=5.0, scale=10.0, size=(1000, 3, 4, 4))
-    model.fit(x, x, epochs=4, verbose=0)
-    out = model.predict(x)
-
-    assert_allclose(np.mean(out, axis=(0, 2, 3)), 0.0, atol=1e-1)
-    assert_allclose(np.std(out, axis=(0, 2, 3)), 1.0, atol=1e-1)
-
-
-def test_shared_batchnorm():
-    '''Test that a BN layer can be shared
-    across different data streams.
-    '''
-    # Test single layer reuse
-    bn = normalization.BatchNormalization(input_shape=(10,))
-    x1 = Input(shape=(10,))
-    bn(x1)
-
-    x2 = Input(shape=(10,))
-    y2 = bn(x2)
-
-    x = np.random.normal(loc=5.0, scale=10.0, size=(2, 10))
-    model = Model(x2, y2)
-    model.compile('sgd', 'mse')
-    model.train_on_batch(x, x)
-
-    # Test model-level reuse
-    x3 = Input(shape=(10,))
-    y3 = model(x3)
-    new_model = Model(x3, y3)
-    new_model.compile('sgd', 'mse')
-    new_model.train_on_batch(x, x)
-
-
-def test_that_trainable_disables_updates():
-    val_a = np.random.random((10, 4))
-    val_out = np.random.random((10, 4))
-
-    a = Input(shape=(4,))
-    layer = normalization.BatchNormalization(input_shape=(4,))
-    b = layer(a)
-    model = Model(a, b)
-
-    model.trainable = False
-    assert not model.updates
-
-    model.compile('sgd', 'mse')
-    assert not model.updates
-
-    x1 = model.predict(val_a)
-    model.train_on_batch(val_a, val_out)
-    x2 = model.predict(val_a)
-    assert_allclose(x1, x2, atol=1e-7)
-
-    model.trainable = True
-    model.compile('sgd', 'mse')
-    assert model.updates
-
-    model.train_on_batch(val_a, val_out)
-    x2 = model.predict(val_a)
-    assert np.abs(np.sum(x1 - x2)) > 1e-5
-
-    layer.trainable = False
-    model.compile('sgd', 'mse')
-    assert not model.updates
-
-    x1 = model.predict(val_a)
-    model.train_on_batch(val_a, val_out)
-    x2 = model.predict(val_a)
-    assert_allclose(x1, x2, atol=1e-7)
-
-
-def test_batchnorm_trainable():
-    bn_mean = 0.5
-    bn_std = 10.
-
-    def get_model(bn_mean, bn_std):
-        input = Input(shape=(1,))
-        x = normalization.BatchNormalization()(input)
-        model = Model(input, x)
-        model.set_weights([np.array([1.]), np.array([0.]),
-                           np.array([bn_mean]), np.array([bn_std ** 2])])
-        return model
-    # Simulates training-mode with trainable layer. Should use mini-batch statistics.
-    model = get_model(bn_mean, bn_std)
-    model.compile(loss='mse', optimizer='rmsprop')
-    out = model(input_4, training=True).numpy()
-    assert_allclose((input_4 - np.mean(input_4)) / np.std(input_4), out, atol=1e-3)
-
-
-if __name__ == '__main__':
-    pytest.main([__file__])
diff --git a/tests/keras/layers/pooling_test.py b/tests/keras/layers/pooling_test.py
deleted file mode 100644
index a05de9a5f7a..00000000000
--- a/tests/keras/layers/pooling_test.py
+++ /dev/null
@@ -1,150 +0,0 @@
-import numpy as np
-import pytest
-
-from keras.utils.test_utils import layer_test
-from keras import layers
-from keras.models import Sequential
-
-
-@pytest.mark.parametrize(
-    'padding,stride,data_format',
-    [(padding, stride, data_format)
-     for padding in ['valid', 'same']
-     for stride in [1, 2]
-     for data_format in ['channels_first', 'channels_last']]
-)
-def test_maxpooling_1d(padding, stride, data_format):
-    layer_test(layers.MaxPooling1D,
-               kwargs={'strides': stride,
-                       'padding': padding,
-                       'data_format': data_format},
-               input_shape=(3, 5, 4))
-
-
-@pytest.mark.parametrize(
-    'strides',
-    [(1, 1), (2, 3)]
-)
-def test_maxpooling_2d(strides):
-    pool_size = (3, 3)
-    layer_test(layers.MaxPooling2D,
-               kwargs={'strides': strides,
-                       'padding': 'valid',
-                       'pool_size': pool_size},
-               input_shape=(3, 5, 6, 4))
-
-
-@pytest.mark.parametrize(
-    'strides,data_format,input_shape',
-    [(2, None, (3, 11, 12, 10, 4)),
-     (3, 'channels_first', (3, 4, 11, 12, 10))]
-)
-def test_maxpooling_3d(strides, data_format, input_shape):
-    pool_size = (3, 3, 3)
-    layer_test(layers.MaxPooling3D,
-               kwargs={'strides': strides,
-                       'padding': 'valid',
-                       'data_format': data_format,
-                       'pool_size': pool_size},
-               input_shape=input_shape)
-
-
-@pytest.mark.parametrize(
-    'padding,stride,data_format',
-    [(padding, stride, data_format)
-     for padding in ['valid', 'same']
-     for stride in [1, 2]
-     for data_format in ['channels_first', 'channels_last']]
-)
-def test_averagepooling_1d(padding, stride, data_format):
-    layer_test(layers.AveragePooling1D,
-               kwargs={'strides': stride,
-                       'padding': padding,
-                       'data_format': data_format},
-               input_shape=(3, 5, 4))
-
-
-@pytest.mark.parametrize(
-    'strides,padding,data_format,input_shape',
-    [((2, 2), 'same', None, (3, 5, 6, 4)),
-     ((2, 2), 'valid', None, (3, 5, 6, 4))]
-)
-def test_averagepooling_2d(strides, padding, data_format, input_shape):
-    layer_test(layers.AveragePooling2D,
-               kwargs={'strides': strides,
-                       'padding': padding,
-                       'pool_size': (2, 2),
-                       'data_format': data_format},
-               input_shape=input_shape)
-
-
-@pytest.mark.parametrize(
-    'strides,data_format,input_shape',
-    [(2, None, (3, 11, 12, 10, 4)),
-     (3, 'channels_first', (3, 4, 11, 12, 10))]
-)
-def test_averagepooling_3d(strides, data_format, input_shape):
-    pool_size = (3, 3, 3)
-
-    layer_test(layers.AveragePooling3D,
-               kwargs={'strides': strides,
-                       'padding': 'valid',
-                       'data_format': data_format,
-                       'pool_size': pool_size},
-               input_shape=input_shape)
-
-
-@pytest.mark.parametrize(
-    'data_format,pooling_class',
-    [(data_format, pooling_class)
-     for data_format in ['channels_first', 'channels_last']
-     for pooling_class in [layers.GlobalMaxPooling1D,
-                           layers.GlobalAveragePooling1D]]
-)
-def test_globalpooling_1d(data_format, pooling_class):
-    layer_test(pooling_class,
-               kwargs={'data_format': data_format},
-               input_shape=(3, 4, 5))
-
-
-def test_globalpooling_1d_supports_masking():
-    # Test GlobalAveragePooling1D supports masking
-    model = Sequential()
-    model.add(layers.Masking(mask_value=0., input_shape=(3, 4)))
-    model.add(layers.GlobalAveragePooling1D())
-    model.compile(loss='mae', optimizer='adam')
-
-    model_input = np.random.randint(low=1, high=5, size=(2, 3, 4))
-    model_input[0, 1:, :] = 0
-    output = model.predict(model_input)
-    assert np.array_equal(output[0], model_input[0, 0, :])
-
-
-@pytest.mark.parametrize(
-    'data_format,pooling_class',
-    [(data_format, pooling_class)
-     for data_format in ['channels_first', 'channels_last']
-     for pooling_class in [layers.GlobalMaxPooling2D,
-                           layers.GlobalAveragePooling2D]]
-)
-def test_globalpooling_2d(data_format, pooling_class):
-    layer_test(pooling_class,
-               kwargs={'data_format': data_format},
-               input_shape=(3, 4, 5, 6))
-
-
-@pytest.mark.parametrize(
-    'data_format,pooling_class',
-    [(data_format, pooling_class)
-     for data_format in ['channels_first', 'channels_last']
-     for pooling_class in [layers.GlobalMaxPooling3D,
-                           layers.GlobalAveragePooling3D]]
-)
-def test_globalpooling_3d(data_format, pooling_class):
-    layer_test(pooling_class,
-               kwargs={'data_format': data_format},
-               input_shape=(3, 4, 3, 4, 3))
-
-
-if __name__ == '__main__':
-    pytest.main([__file__])
diff --git a/tests/keras/layers/recurrent_test.py b/tests/keras/layers/recurrent_test.py
deleted file mode 100644
index 9d7172e2314..00000000000
--- a/tests/keras/layers/recurrent_test.py
+++ /dev/null
@@ -1,1052 +0,0 @@
-import pytest
-import numpy as np
-from numpy.testing import assert_allclose
-
-import keras
-from keras.utils.test_utils import layer_test
-from keras.layers import recurrent
-from keras.layers import embeddings
-from keras.models import Sequential
-from keras.models import Model
-from keras.engine import Input
-from keras.layers import Masking
-from keras import regularizers
-from keras import backend as K
-
-num_samples, timesteps, embedding_dim, units = 2, 5, 4, 3
-embedding_num = 12
-
-
-rnn_test = pytest.mark.parametrize('layer_class',
-                                   [recurrent.SimpleRNN,
-                                    recurrent.GRU,
-                                    recurrent.LSTM])
-
-
-rnn_cell_test = pytest.mark.parametrize('cell_class',
-                                        [recurrent.SimpleRNNCell,
-                                         recurrent.GRUCell,
-                                         recurrent.LSTMCell])
-
-
-@rnn_test
-def test_return_sequences(layer_class):
-    layer_test(layer_class,
-               kwargs={'units': units,
-                       'return_sequences': True},
-               input_shape=(num_samples, timesteps, embedding_dim))
-
-
-@rnn_test
-def test_dynamic_behavior(layer_class):
-    layer = layer_class(units, input_shape=(None, embedding_dim))
-    model = Sequential()
-    model.add(layer)
-    model.compile('sgd', 'mse')
-    x = np.random.random((num_samples, timesteps, embedding_dim))
-    y = np.random.random((num_samples, units))
-    model.train_on_batch(x, y)
-
-
-@rnn_test
-def DISABLED_test_stateful_invalid_use(layer_class):
-    layer = layer_class(units,
-                        stateful=True,
-                        batch_input_shape=(num_samples,
-                                           timesteps,
-                                           embedding_dim))
-    model = Sequential()
-    model.add(layer)
-    model.compile('sgd', 'mse')
-    x = np.random.random((num_samples * 2, timesteps, embedding_dim))
-    y = np.random.random((num_samples * 2, units))
-    with pytest.raises(ValueError):
-        model.fit(x, y)
-    with pytest.raises(ValueError):
-        model.predict(x, batch_size=num_samples + 1)
-
-
-@rnn_test
-@pytest.mark.skipif((K.backend() in ['theano']),
-                    reason='Not supported.')
-def test_dropout(layer_class):
-    for unroll in [True, False]:
-        layer_test(layer_class,
-                   kwargs={'units': units,
-                           'dropout': 0.1,
-                           'recurrent_dropout': 0.1,
-                           'unroll': unroll},
-                   input_shape=(num_samples, timesteps, embedding_dim))
-
-        # Test that dropout is applied during training
-        x = K.ones((num_samples, timesteps, embedding_dim))
-        layer = layer_class(units, dropout=0.5, recurrent_dropout=0.5,
-                            input_shape=(timesteps, embedding_dim))
-        y = layer(x)
-
-        y = layer(x, training=True)
-
-        # Test that dropout is not applied during testing
-        x = np.random.random((num_samples, timesteps, embedding_dim))
-        layer = layer_class(units, dropout=0.5, recurrent_dropout=0.5,
-                            unroll=unroll,
-                            input_shape=(timesteps, embedding_dim))
-        model = Sequential([layer])
-        y1 = model.predict(x)
-        y2 = model.predict(x)
-        assert_allclose(y1, y2)
-
-
-@rnn_test
-def test_statefulness(layer_class):
-    model = Sequential()
-    model.add(embeddings.Embedding(embedding_num, embedding_dim,
-                                   mask_zero=True,
-                                   input_length=timesteps,
-                                   batch_input_shape=(num_samples, timesteps)))
-    layer = layer_class(units, return_sequences=False,
-                        stateful=True,
-                        weights=None)
-    model.add(layer)
-    model.compile(optimizer='sgd', loss='mse')
-    out1 = model.predict(np.ones((num_samples, timesteps)))
-    assert(out1.shape == (num_samples, units))
-
-    # train once so that the states change
-    model.train_on_batch(np.ones((num_samples, timesteps)),
-                         np.ones((num_samples, units)))
-    out2 = model.predict(np.ones((num_samples, timesteps)))
-
-    # if the state is not reset, output should be different
-    assert(out1.max() != out2.max())
-
-    # check that output changes after states are reset
-    # (even though the model itself didn't change)
-    layer.reset_states()
-    out3 = model.predict(np.ones((num_samples, timesteps)))
-    assert(out2.max() != out3.max())
-
-    # check that container-level reset_states() works
-    model.reset_states()
-    out4 = model.predict(np.ones((num_samples, timesteps)))
-    assert_allclose(out3, out4, atol=1e-5)
-
-    # check that the call to `predict` updated the states
-    out5 = model.predict(np.ones((num_samples, timesteps)))
-    assert(out4.max() != out5.max())
-
-
-@rnn_test
-def test_masking_correctness(layer_class):
-    # Check masking: output with left padding and right padding
-    # should be the same.
-    model = Sequential()
-    model.add(embeddings.Embedding(embedding_num, embedding_dim,
-                                   mask_zero=True,
-                                   input_length=timesteps,
-                                   batch_input_shape=(num_samples, timesteps)))
-    layer = layer_class(units, return_sequences=False)
-    model.add(layer)
-    model.compile(optimizer='sgd', loss='mse')
-
-    left_padded_input = np.ones((num_samples, timesteps))
-    left_padded_input[0, :1] = 0
-    left_padded_input[1, :2] = 0
-    out6 = model.predict(left_padded_input)
-
-    right_padded_input = np.ones((num_samples, timesteps))
-    right_padded_input[0, -1:] = 0
-    right_padded_input[1, -2:] = 0
-    out7 = model.predict(right_padded_input)
-
-    assert_allclose(out7, out6, atol=1e-5)
-
-
-@pytest.mark.skipif(K.backend() == 'cntk', reason='Not supported.')
-def test_masking_correctness_output_not_equal_to_first_state():
-
-    class Cell(keras.layers.Layer):
-
-        def __init__(self):
-            self.state_size = None
-            self.output_size = None
-            super(Cell, self).__init__()
-
-        def build(self, input_shape):
-            self.state_size = input_shape[-1]
-            self.output_size = input_shape[-1]
-
-        def call(self, inputs, states):
-            return inputs, [s + 1 for s in states]
-
-    num_samples = 5
-    num_timesteps = 4
-    state_size = input_size = 3  # also equal to `output_size`
-
-    # random inputs and state values
-    x_vals = np.random.random((num_samples, num_timesteps, input_size))
-    # last timestep masked for first sample (all zero inputs masked by Masking layer)
-    x_vals[0, -1, :] = 0
-    s_initial_vals = np.random.random((num_samples, state_size))
-
-    # final outputs equal to last inputs
-    y_vals_expected = x_vals[:, -1].copy()
-    # except for first sample, where it is equal to second to last value due to mask
-    y_vals_expected[0] = x_vals[0, -2]
-
-    s_final_vals_expected = s_initial_vals.copy()
-    # states are incremented `num_timesteps - 1` times for first sample
-    s_final_vals_expected[0] += (num_timesteps - 1)
-    # and `num_timesteps - 1` times for remaining samples
-    s_final_vals_expected[1:] += num_timesteps
-
-    for unroll in [True, False]:
-        x = Input((num_timesteps, input_size), name="x")
-        x_masked = Masking()(x)
-        s_initial = Input((state_size,), name="s_initial")
-        y, s_final = recurrent.RNN(Cell(),
-                                   return_state=True,
-                                   unroll=unroll)(x_masked, initial_state=s_initial)
-        model = Model([x, s_initial], [y, s_final])
-        model.compile(optimizer='sgd', loss='mse')
-
-        y_vals, s_final_vals = model.predict([x_vals, s_initial_vals])
-        assert_allclose(y_vals,
-                        y_vals_expected,
-                        err_msg="Unexpected output for unroll={}".format(unroll))
-        assert_allclose(s_final_vals,
-                        s_final_vals_expected,
-                        err_msg="Unexpected state for unroll={}".format(unroll))
-
-
-@pytest.mark.skipif(K.backend() == 'cntk', reason='Not supported.')
-def test_masking_correctness_output_size_not_equal_to_first_state_size():
-
-    class Cell(keras.layers.Layer):
-
-        def __init__(self):
-            self.state_size = None
-            self.output_size = None
-            super(Cell, self).__init__()
-
-        def build(self, input_shape):
-            self.state_size = input_shape[-1]
-            self.output_size = input_shape[-1] * 2
-
-        def call(self, inputs, states):
-            return keras.layers.concatenate([inputs] * 2), [s + 1 for s in states]
-
-    num_samples = 5
-    num_timesteps = 6
-    input_size = state_size = 7
-
-    # random inputs and state values
-    x_vals = np.random.random((num_samples, num_timesteps, input_size))
-    # last timestep masked for first sample (all zero inputs masked by Masking layer)
-    x_vals[0, -1, :] = 0
-    s_initial_vals = np.random.random((num_samples, state_size))
-
-    # final outputs equal to last inputs concatenated
-    y_vals_expected = np.concatenate([x_vals[:, -1]] * 2, axis=-1)
-    # except for first sample, where it is equal to second to last value due to mask
-    y_vals_expected[0] = np.concatenate([x_vals[0, -2]] * 2, axis=-1)
-
-    s_final_vals_expected = s_initial_vals.copy()
-    # states are incremented `num_timesteps - 1` times for first sample
-    s_final_vals_expected[0] += (num_timesteps - 1)
-    # and `num_timesteps - 1` times for remaining samples
-    s_final_vals_expected[1:] += num_timesteps
-
-    for unroll in [True, False]:
-        x = Input((num_timesteps, input_size), name="x")
-        x_masked = Masking()(x)
-        s_initial = Input((state_size,), name="s_initial")
-        y, s_final = recurrent.RNN(Cell(),
-                                   return_state=True,
-                                   unroll=unroll)(x_masked, initial_state=s_initial)
-        model = Model([x, s_initial], [y, s_final])
-        model.compile(optimizer='sgd', loss='mse')
-
-        y_vals, s_final_vals = model.predict([x_vals, s_initial_vals])
-        assert_allclose(y_vals,
-                        y_vals_expected,
-                        err_msg="Unexpected output for unroll={}".format(unroll))
-        assert_allclose(s_final_vals,
-                        s_final_vals_expected,
-                        err_msg="Unexpected state for unroll={}".format(unroll))
-
-
-@rnn_test
-def test_implementation_mode(layer_class):
-    for mode in [1, 2]:
-        # Without dropout
-        layer_test(layer_class,
-                   kwargs={'units': units,
-                           'implementation': mode},
-                   input_shape=(num_samples, timesteps, embedding_dim))
-        # With dropout
-        layer_test(layer_class,
-                   kwargs={'units': units,
-                           'implementation': mode,
-                           'dropout': 0.1,
-                           'recurrent_dropout': 0.1},
-                   input_shape=(num_samples, timesteps, embedding_dim))
-        # Without bias
-        layer_test(layer_class,
-                   kwargs={'units': units,
-                           'implementation': mode,
-                           'use_bias': False},
-                   input_shape=(num_samples, timesteps, embedding_dim))
-
-
-@rnn_test
-def test_regularizer(layer_class):
-    layer = layer_class(units, return_sequences=False, weights=None,
-                        input_shape=(timesteps, embedding_dim),
-                        kernel_regularizer=regularizers.l1(0.01),
-                        recurrent_regularizer=regularizers.l1(0.01),
-                        bias_regularizer='l2')
-    layer.build((None, None, embedding_dim))
-    assert len(layer.losses) == 3
-    assert len(layer.cell.losses) == 3
-
-    layer = layer_class(units, return_sequences=False, weights=None,
-                        input_shape=(timesteps, embedding_dim),
-                        activity_regularizer='l2')
-    assert layer.activity_regularizer
-    x = K.variable(np.ones((num_samples, timesteps, embedding_dim)))
-    layer(x)
-
-
-@rnn_test
-def test_trainability(layer_class):
-    layer = layer_class(units)
-    layer.build((None, None, embedding_dim))
-    assert len(layer.weights) == 3
-    assert len(layer.trainable_weights) == 3
-    assert len(layer.non_trainable_weights) == 0
-    layer.trainable = False
-    assert len(layer.weights) == 3
-    assert len(layer.trainable_weights) == 0
-    assert len(layer.non_trainable_weights) == 3
-    layer.trainable = True
-    assert len(layer.weights) == 3
-    assert len(layer.trainable_weights) == 3
-    assert len(layer.non_trainable_weights) == 0
-
-
-def test_masking_layer():
-    ''' This test based on a previously failing issue here:
-    https://github.com/keras-team/keras/issues/1567
-    '''
-    inputs = np.random.random((6, 3, 4))
-    targets = np.abs(np.random.random((6, 3, 5)))
-    targets /= targets.sum(axis=-1, keepdims=True)
-
-    model = Sequential()
-    model.add(Masking(input_shape=(3, 4)))
-    model.add(recurrent.SimpleRNN(units=5, return_sequences=True, unroll=False))
-    model.compile(loss='categorical_crossentropy', optimizer='adam')
-    model.fit(inputs, targets, epochs=1, batch_size=100, verbose=1)
-
-    model = Sequential()
-    model.add(Masking(input_shape=(3, 4)))
-    model.add(recurrent.SimpleRNN(units=5, return_sequences=True, unroll=True))
-    model.compile(loss='categorical_crossentropy', optimizer='adam')
-    model.fit(inputs, targets, epochs=1, batch_size=100, verbose=1)
-
-
-@rnn_test
-def test_from_config(layer_class):
-    stateful_flags = (False, True)
-    for stateful in stateful_flags:
-        l1 = layer_class(units=1, stateful=stateful)
-        l2 = layer_class.from_config(l1.get_config())
-        assert l1.get_config() == l2.get_config()
-
-
-@rnn_test
-def test_specify_initial_state_keras_tensor(layer_class):
-    num_states = 2 if layer_class is recurrent.LSTM else 1
-
-    # Test with Keras tensor
-    inputs = Input((timesteps, embedding_dim))
-    initial_state = [Input((units,)) for _ in range(num_states)]
-    layer = layer_class(units)
-    if len(initial_state) == 1:
-        output = layer(inputs, initial_state=initial_state[0])
-    else:
-        output = layer(inputs, initial_state=initial_state)
-    assert id(initial_state[0]) in [
-        id(x) for x in layer._inbound_nodes[0].input_tensors]
-
-    model = Model([inputs] + initial_state, output)
-    model.compile(loss='categorical_crossentropy', optimizer='adam')
-
-    inputs = np.random.random((num_samples, timesteps, embedding_dim))
-    initial_state = [np.random.random((num_samples, units))
-                     for _ in range(num_states)]
-    targets = np.random.random((num_samples, units))
-    model.fit([inputs] + initial_state, targets)
-
-
-@rnn_test
-def test_specify_initial_state_non_keras_tensor(layer_class):
-    num_states = 2 if layer_class is recurrent.LSTM else 1
-
-    # Test with non-Keras tensor
-    inputs = Input((timesteps, embedding_dim))
-    initial_state = [K.random_normal_variable((num_samples, units), 0, 1)
-                     for _ in range(num_states)]
-    layer = layer_class(units)
-    output = layer(inputs, initial_state=initial_state)
-
-    model = Model(inputs, output)
-    model.compile(loss='categorical_crossentropy', optimizer='adam')
-
-    inputs = np.random.random((num_samples, timesteps, embedding_dim))
-    targets = np.random.random((num_samples, units))
-    model.fit(inputs, targets)
-
-
-@rnn_test
-def test_reset_states_with_values(layer_class):
-    num_states = 2 if layer_class is recurrent.LSTM else 1
-
-    layer = layer_class(units, stateful=True)
-    layer.build((num_samples, timesteps, embedding_dim))
-    layer.reset_states()
-    assert len(layer.states) == num_states
-    assert layer.states[0] is not None
-    np.testing.assert_allclose(K.eval(layer.states[0]),
-                               np.zeros(K.int_shape(layer.states[0])),
-                               atol=1e-4)
-    state_shapes = [K.int_shape(state) for state in layer.states]
-    values = [np.ones(shape) for shape in state_shapes]
-    if len(values) == 1:
-        values = values[0]
-    layer.reset_states(values)
-    np.testing.assert_allclose(K.eval(layer.states[0]),
-                               np.ones(K.int_shape(layer.states[0])),
-                               atol=1e-4)
-
-    # Test fit with invalid data
-    with pytest.raises(ValueError):
-        layer.reset_states([1] * (len(layer.states) + 1))
-
-
-@rnn_test
-def test_initial_states_as_other_inputs(layer_class):
-    num_states = 2 if layer_class is recurrent.LSTM else 1
-
-    # Test with Keras tensor
-    main_inputs = Input((timesteps, embedding_dim))
-    initial_state = [Input((units,)) for _ in range(num_states)]
-    inputs = [main_inputs] + initial_state
-
-    layer = layer_class(units)
-    output = layer(inputs)
-    assert id(initial_state[0]) in [
-        id(x) for x in layer._inbound_nodes[0].input_tensors]
-
-    model = Model(inputs, output)
-    model.compile(loss='categorical_crossentropy', optimizer='adam')
-
-    main_inputs = np.random.random((num_samples, timesteps, embedding_dim))
-    initial_state = [np.random.random((num_samples, units))
-                     for _ in range(num_states)]
-    targets = np.random.random((num_samples, units))
-    model.train_on_batch([main_inputs] + initial_state, targets)
-
-
-@rnn_test
-def test_specify_state_with_masking(layer_class):
-    ''' This test based on a previously failing issue here:
-    https://github.com/keras-team/keras/issues/1567
-    '''
-    num_states = 2 if layer_class is recurrent.LSTM else 1
-
-    inputs = Input((timesteps, embedding_dim))
-    _ = Masking()(inputs)
-    initial_state = [Input((units,)) for _ in range(num_states)]
-    output = layer_class(units)(inputs, initial_state=initial_state)
-
-    model = Model([inputs] + initial_state, output)
-    model.compile(loss='categorical_crossentropy', optimizer='adam')
-
-    inputs = np.random.random((num_samples, timesteps, embedding_dim))
-    initial_state = [np.random.random((num_samples, units))
-                     for _ in range(num_states)]
-    targets = np.random.random((num_samples, units))
-    model.fit([inputs] + initial_state, targets)
-
-
-@rnn_test
-def test_return_state(layer_class):
-    num_states = 2 if layer_class is recurrent.LSTM else 1
-
-    inputs = Input(batch_shape=(num_samples, timesteps, embedding_dim))
-    layer = layer_class(units, return_state=True, stateful=True)
-    outputs = layer(inputs)
-    output, state = outputs[0], outputs[1:]
-    assert len(state) == num_states
-    model = Model(inputs, state[0])
-
-    inputs = np.random.random((num_samples, timesteps, embedding_dim))
-    state = model.predict(inputs)
-    np.testing.assert_allclose(K.eval(layer.states[0]), state, atol=1e-4)
-
-
-@rnn_test
-def test_state_reuse(layer_class):
-    inputs = Input(batch_shape=(num_samples, timesteps, embedding_dim))
-    layer = layer_class(units, return_state=True, return_sequences=True)
-    outputs = layer(inputs)
-    output, state = outputs[0], outputs[1:]
-    output = layer_class(units)(output, initial_state=state)
-    model = Model(inputs, output)
-
-    inputs = np.random.random((num_samples, timesteps, embedding_dim))
-    outputs = model.predict(inputs)
-
-
-@rnn_test
-@pytest.mark.skipif((K.backend() in ['theano']),
-                    reason='Not supported.')
-def test_state_reuse_with_dropout(layer_class):
-    input1 = Input(batch_shape=(num_samples, timesteps, embedding_dim))
-    layer = layer_class(units, return_state=True, return_sequences=True, dropout=0.2)
-    state = layer(input1)[1:]
-
-    input2 = Input(batch_shape=(num_samples, timesteps, embedding_dim))
-    output = layer_class(units)(input2, initial_state=state)
-    model = Model([input1, input2], output)
-
-    inputs = [np.random.random((num_samples, timesteps, embedding_dim)),
-              np.random.random((num_samples, timesteps, embedding_dim))]
-    outputs = model.predict(inputs)
-
-
-def test_minimal_rnn_cell_non_layer():
-
-    class MinimalRNNCell(object):
-
-        def __init__(self, units, input_dim):
-            self.units = units
-            self.state_size = units
-            self.kernel = keras.backend.variable(
-                np.random.random((input_dim, units)))
-
-        def call(self, inputs, states):
-            prev_output = states[0]
-            output = keras.backend.dot(inputs, self.kernel) + prev_output
-            return output, [output]
-
-    # Basic test case.
-    cell = MinimalRNNCell(32, 5)
-    x = keras.Input((None, 5))
-    layer = recurrent.RNN(cell)
-    y = layer(x)
-    model = keras.models.Model(x, y)
-    model.compile(optimizer='rmsprop', loss='mse')
-    model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32)))
-
-    # Test stacking.
-    cells = [MinimalRNNCell(8, 5),
-             MinimalRNNCell(32, 8),
-             MinimalRNNCell(32, 32)]
-    layer = recurrent.RNN(cells)
-    y = layer(x)
-    model = keras.models.Model(x, y)
-    model.compile(optimizer='rmsprop', loss='mse')
-    model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32)))
-
-
-def test_minimal_rnn_cell_non_layer_multiple_states():
-
-    class MinimalRNNCell(object):
-
-        def __init__(self, units, input_dim):
-            self.units = units
-            self.state_size = (units, units)
-            self.kernel = keras.backend.variable(
-                np.random.random((input_dim, units)))
-
-        def call(self, inputs, states):
-            prev_output_1 = states[0]
-            prev_output_2 = states[1]
-            output = keras.backend.dot(inputs, self.kernel)
-            output += prev_output_1
-            output -= prev_output_2
-            return output, [output * 2, output * 3]
-
-    # Basic test case.
-    cell = MinimalRNNCell(32, 5)
-    x = keras.Input((None, 5))
-    layer = recurrent.RNN(cell)
-    y = layer(x)
-    model = keras.models.Model(x, y)
-    model.compile(optimizer='rmsprop', loss='mse')
-    model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32)))
-
-    # Test stacking.
-    cells = [MinimalRNNCell(8, 5),
-             MinimalRNNCell(16, 8),
-             MinimalRNNCell(32, 16)]
-    layer = recurrent.RNN(cells)
-    y = layer(x)
-    model = keras.models.Model(x, y)
-    model.compile(optimizer='rmsprop', loss='mse')
-    model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32)))
-
-
-def test_minimal_rnn_cell_layer():
-
-    class MinimalRNNCell(keras.layers.Layer):
-
-        def __init__(self, units, **kwargs):
-            self.units = units
-            self.state_size = units
-            super(MinimalRNNCell, self).__init__(**kwargs)
-
-        def build(self, input_shape):
-            # no time axis in the input shape passed to RNN cells
-            assert len(input_shape) == 2
-
-            self.kernel = self.add_weight(shape=(input_shape[-1], self.units),
-                                          initializer='uniform',
-                                          name='kernel')
-            self.recurrent_kernel = self.add_weight(
-                shape=(self.units, self.units),
-                initializer='uniform',
-                name='recurrent_kernel')
-            self.built = True
-
-        def call(self, inputs, states):
-            prev_output = states[0]
-            h = keras.backend.dot(inputs, self.kernel)
-            output = h + keras.backend.dot(prev_output, self.recurrent_kernel)
-            return output, [output]
-
-        def get_config(self):
-            config = {'units': self.units}
-            base_config = super(MinimalRNNCell, self).get_config()
-            return dict(list(base_config.items()) + list(config.items()))
-
-    # Test basic case.
-    x = keras.Input((None, 5))
-    cell = MinimalRNNCell(32)
-    layer = recurrent.RNN(cell)
-    y = layer(x)
-    model = keras.models.Model(x, y)
-    model.compile(optimizer='rmsprop', loss='mse')
-    model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32)))
-
-    # Test basic case serialization.
-    x_np = np.random.random((6, 5, 5))
-    y_np = model.predict(x_np)
-    weights = model.get_weights()
-    config = layer.get_config()
-    with keras.utils.CustomObjectScope({'MinimalRNNCell': MinimalRNNCell}):
-        layer = recurrent.RNN.from_config(config)
-    y = layer(x)
-    model = keras.models.Model(x, y)
-    model.set_weights(weights)
-    y_np_2 = model.predict(x_np)
-    assert_allclose(y_np, y_np_2, atol=1e-4)
-
-    # Test stacking.
-    cells = [MinimalRNNCell(8),
-             MinimalRNNCell(12),
-             MinimalRNNCell(32)]
-    layer = recurrent.RNN(cells)
-    y = layer(x)
-    model = keras.models.Model(x, y)
-    model.compile(optimizer='rmsprop', loss='mse')
-    model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32)))
-
-    # Test stacked RNN serialization.
-    x_np = np.random.random((6, 5, 5))
-    y_np = model.predict(x_np)
-    weights = model.get_weights()
-    config = layer.get_config()
-    with keras.utils.CustomObjectScope({'MinimalRNNCell': MinimalRNNCell}):
-        layer = recurrent.RNN.from_config(config)
-    y = layer(x)
-    model = keras.models.Model(x, y)
-    model.set_weights(weights)
-    y_np_2 = model.predict(x_np)
-    assert_allclose(y_np, y_np_2, atol=1e-4)
-
-
-@rnn_cell_test
-def test_builtin_rnn_cell_layer(cell_class):
-    # Test basic case.
-    x = keras.Input((None, 5))
-    cell = cell_class(32)
-    layer = recurrent.RNN(cell)
-    y = layer(x)
-    model = keras.models.Model(x, y)
-    model.compile(optimizer='rmsprop', loss='mse')
-    model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32)))
-
-    # Test basic case serialization.
-    x_np = np.random.random((6, 5, 5))
-    y_np = model.predict(x_np)
-    weights = model.get_weights()
-    config = layer.get_config()
-    layer = recurrent.RNN.from_config(config)
-    y = layer(x)
-    model = keras.models.Model(x, y)
-    model.set_weights(weights)
-    y_np_2 = model.predict(x_np)
-    assert_allclose(y_np, y_np_2, atol=1e-4)
-
-    # Test stacking.
-    cells = [cell_class(8),
-             cell_class(12),
-             cell_class(32)]
-    layer = recurrent.RNN(cells)
-    y = layer(x)
-    model = keras.models.Model(x, y)
-    model.compile(optimizer='rmsprop', loss='mse')
-    model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32)))
-
-    # Test stacked RNN serialization.
-    x_np = np.random.random((6, 5, 5))
-    y_np = model.predict(x_np)
-    weights = model.get_weights()
-    config = layer.get_config()
-    layer = recurrent.RNN.from_config(config)
-    y = layer(x)
-    model = keras.models.Model(x, y)
-    model.set_weights(weights)
-    y_np_2 = model.predict(x_np)
-    assert_allclose(y_np, y_np_2, atol=1e-4)
-
-
-@pytest.mark.skipif((K.backend() in ['cntk', 'theano']),
-                    reason='Not supported.')
-def test_stacked_rnn_dropout():
-    cells = [recurrent.LSTMCell(3, dropout=0.1, recurrent_dropout=0.1),
-             recurrent.LSTMCell(3, dropout=0.1, recurrent_dropout=0.1)]
-    layer = recurrent.RNN(cells)
-
-    x = keras.Input((None, 5))
-    y = layer(x)
-    model = keras.models.Model(x, y)
-    model.compile('sgd', 'mse')
-    x_np = np.random.random((6, 5, 5))
-    y_np = np.random.random((6, 3))
-    model.train_on_batch(x_np, y_np)
-
-
-def test_stacked_rnn_attributes():
-    cells = [recurrent.LSTMCell(3),
-             recurrent.LSTMCell(3, kernel_regularizer='l2')]
-    layer = recurrent.RNN(cells)
-    layer.build((None, None, 5))
-
-    # Test regularization losses
-    assert len(layer.losses) == 1
-
-    # Test weights
-    assert len(layer.trainable_weights) == 6
-    cells[0].trainable = False
-    assert len(layer.trainable_weights) == 3
-    assert len(layer.non_trainable_weights) == 3
-
-    x = keras.Input((None, 5))
-    y = K.sum(x)
-    cells[0].add_loss(y, inputs=x)
-
-
-def test_stacked_rnn_compute_output_shape():
-    cells = [recurrent.LSTMCell(3),
-             recurrent.LSTMCell(6)]
-    layer = recurrent.RNN(cells, return_state=True, return_sequences=True)
-    output_shape = layer.compute_output_shape((None, timesteps, embedding_dim))
-    expected_output_shape = [(None, timesteps, 6),
-                             (None, 3),
-                             (None, 3),
-                             (None, 6),
-                             (None, 6)]
-    assert [tuple(s) for s in output_shape] == expected_output_shape
-
-    # Test reverse_state_order = True for stacked cell.
-    stacked_cell = recurrent.StackedRNNCells(
-        cells, reverse_state_order=True)
-    layer = recurrent.RNN(
-        stacked_cell, return_state=True, return_sequences=True)
-    output_shape = layer.compute_output_shape((None, timesteps, embedding_dim))
-    expected_output_shape = [(None, timesteps, 6),
-                             (None, 6),
-                             (None, 6),
-                             (None, 3),
-                             (None, 3)]
-    assert [tuple(s) for s in output_shape] == expected_output_shape
-
-
-@rnn_test
-def test_batch_size_equal_one(layer_class):
-    inputs = Input(batch_shape=(1, timesteps, embedding_dim))
-    layer = layer_class(units)
-    outputs = layer(inputs)
-    model = Model(inputs, outputs)
-    model.compile('sgd', 'mse')
-    x = np.random.random((1, timesteps, embedding_dim))
-    y = np.random.random((1, units))
-    model.train_on_batch(x, y)
-
-
-def DISABLED_test_rnn_cell_with_constants_layer():
-
-    class RNNCellWithConstants(keras.layers.Layer):
-
-        def __init__(self, units, **kwargs):
-            self.units = units
-            self.state_size = units
-            super(RNNCellWithConstants, self).__init__(**kwargs)
-
-        def build(self, input_shape):
-            if not isinstance(input_shape, list):
-                raise TypeError('expects `constants` shape')
-            [input_shape, constant_shape] = input_shape
-            # will (and should) raise if more than one constant passed
-
-            self.input_kernel = self.add_weight(
-                shape=(input_shape[-1], self.units),
-                initializer='uniform',
-                name='kernel')
-            self.recurrent_kernel = self.add_weight(
-                shape=(self.units, self.units),
-                initializer='uniform',
-                name='recurrent_kernel')
-            self.constant_kernel = self.add_weight(
-                shape=(constant_shape[-1], self.units),
-                initializer='uniform',
-                name='constant_kernel')
-            self.built = True
-
-        def call(self, inputs, states, constants):
-            [prev_output] = states
-            [constant] = constants
-            h_input = keras.backend.dot(inputs, self.input_kernel)
-            h_state = keras.backend.dot(prev_output, self.recurrent_kernel)
-            h_const = keras.backend.dot(constant, self.constant_kernel)
-            output = h_input + h_state + h_const
-            return output, [output]
-
-        def get_config(self):
-            config = {'units': self.units}
-            base_config = super(RNNCellWithConstants, self).get_config()
-            return dict(list(base_config.items()) + list(config.items()))
-
-    # Test basic case.
-    x = keras.Input((None, 5))
-    c = keras.Input((3,))
-    cell = RNNCellWithConstants(32)
-    layer = recurrent.RNN(cell)
-    y = layer(x, constants=c)
-    model = keras.models.Model([x, c], y)
-    model.compile(optimizer='rmsprop', loss='mse')
-    model.train_on_batch(
-        [np.zeros((6, 5, 5)), np.zeros((6, 3))],
-        np.zeros((6, 32))
-    )
-
-    # Test basic case serialization.
-    x_np = np.random.random((6, 5, 5))
-    c_np = np.random.random((6, 3))
-    y_np = model.predict([x_np, c_np])
-    weights = model.get_weights()
-    config = layer.get_config()
-    custom_objects = {'RNNCellWithConstants': RNNCellWithConstants}
-    with keras.utils.CustomObjectScope(custom_objects):
-        layer = recurrent.RNN.from_config(config.copy())
-    y = layer(x, constants=c)
-    model = keras.models.Model([x, c], y)
-    model.set_weights(weights)
-    y_np_2 = model.predict([x_np, c_np])
-    assert_allclose(y_np, y_np_2, atol=1e-4)
-
-    # test flat list inputs
-    with keras.utils.CustomObjectScope(custom_objects):
-        layer = recurrent.RNN.from_config(config.copy())
-    y = layer([x, c])
-    model = keras.models.Model([x, c], y)
-    model.set_weights(weights)
-    y_np_3 = model.predict([x_np, c_np])
-    assert_allclose(y_np, y_np_3, atol=1e-4)
-
-    # Test stacking.
-    cells = [recurrent.GRUCell(8),
-             RNNCellWithConstants(12),
-             RNNCellWithConstants(32)]
-    layer = recurrent.RNN(cells)
-    y = layer(x, constants=c)
-    model = keras.models.Model([x, c], y)
-    model.compile(optimizer='rmsprop', loss='mse')
-    model.train_on_batch(
-        [np.zeros((6, 5, 5)), np.zeros((6, 3))],
-        np.zeros((6, 32))
-    )
-
-    # Test stacked RNN serialization.
-    x_np = np.random.random((6, 5, 5))
-    c_np = np.random.random((6, 3))
-    y_np = model.predict([x_np, c_np])
-    weights = model.get_weights()
-    config = layer.get_config()
-    with keras.utils.CustomObjectScope(custom_objects):
-        layer = recurrent.RNN.from_config(config.copy())
-    y = layer(x, constants=c)
-    model = keras.models.Model([x, c], y)
-    model.set_weights(weights)
-    y_np_2 = model.predict([x_np, c_np])
-    assert_allclose(y_np, y_np_2, atol=1e-4)
-
-
-def DISABLED_test_rnn_cell_with_constants_layer_passing_initial_state():
-
-    class RNNCellWithConstants(keras.layers.Layer):
-
-        def __init__(self, units, **kwargs):
-            self.units = units
-            self.state_size = units
-            super(RNNCellWithConstants, self).__init__(**kwargs)
-
-        def build(self, input_shape):
-            if not isinstance(input_shape, list):
-                raise TypeError('expects constants shape')
-            [input_shape, constant_shape] = input_shape
-            # will (and should) raise if more than one constant passed
-
-            self.input_kernel = self.add_weight(
-                shape=(input_shape[-1], self.units),
-                initializer='uniform',
-                name='kernel')
-            self.recurrent_kernel = self.add_weight(
-                shape=(self.units, self.units),
-                initializer='uniform',
-                name='recurrent_kernel')
-            self.constant_kernel = self.add_weight(
-                shape=(constant_shape[-1], self.units),
-                initializer='uniform',
-                name='constant_kernel')
-            self.built = True
-
-        def call(self, inputs, states, constants):
-            [prev_output] = states
-            [constant] = constants
-            h_input = keras.backend.dot(inputs, self.input_kernel)
-            h_state = keras.backend.dot(prev_output, self.recurrent_kernel)
-            h_const = keras.backend.dot(constant, self.constant_kernel)
-            output = h_input + h_state + h_const
-            return output, [output]
-
-        def get_config(self):
-            config = {'units': self.units}
-            base_config = super(RNNCellWithConstants, self).get_config()
-            return dict(list(base_config.items()) + list(config.items()))
-
-    # Test basic case.
-    x = keras.Input((None, 5))
-    c = keras.Input((3,))
-    s = keras.Input((32,))
-    cell = RNNCellWithConstants(32)
-    layer = recurrent.RNN(cell)
-    y = layer(x, initial_state=s, constants=c)
-    model = keras.models.Model([x, s, c], y)
-    model.compile(optimizer='rmsprop', loss='mse')
-    model.train_on_batch(
-        [np.zeros((6, 5, 5)), np.zeros((6, 32)), np.zeros((6, 3))],
-        np.zeros((6, 32))
-    )
-
-    # Test basic case serialization.
-    x_np = np.random.random((6, 5, 5))
-    s_np = np.random.random((6, 32))
-    c_np = np.random.random((6, 3))
-    y_np = model.predict([x_np, s_np, c_np])
-    weights = model.get_weights()
-    config = layer.get_config()
-    custom_objects = {'RNNCellWithConstants': RNNCellWithConstants}
-    with keras.utils.CustomObjectScope(custom_objects):
-        layer = recurrent.RNN.from_config(config.copy())
-    y = layer(x, initial_state=s, constants=c)
-    model = keras.models.Model([x, s, c], y)
-    model.set_weights(weights)
-    y_np_2 = model.predict([x_np, s_np, c_np])
-    assert_allclose(y_np, y_np_2, atol=1e-4)
-
-    # verify that state is used
-    y_np_2_different_s = model.predict([x_np, s_np + 10., c_np])
-    with pytest.raises(AssertionError):
-        assert_allclose(y_np, y_np_2_different_s, atol=1e-4)
-
-    # test flat list inputs
-    with keras.utils.CustomObjectScope(custom_objects):
-        layer = recurrent.RNN.from_config(config.copy())
-    y = layer([x, s, c])
-    model = keras.models.Model([x, s, c], y)
-    model.set_weights(weights)
-    y_np_3 = model.predict([x_np, s_np, c_np])
-    assert_allclose(y_np, y_np_3, atol=1e-4)
-
-
-@rnn_test
-def DISABLED_test_rnn_cell_identity_initializer(layer_class):
-    inputs = Input(shape=(1, 2))
-    layer = layer_class(2, recurrent_initializer='identity')
-    layer(inputs)
-    recurrent_kernel = layer.get_weights()[1]
-    num_kernels = recurrent_kernel.shape[1] // recurrent_kernel.shape[0]
-    assert np.array_equal(recurrent_kernel,
-                          np.concatenate([np.identity(2)] * num_kernels, axis=1))
-
-
-@pytest.mark.skipif(K.backend() == 'cntk', reason='Not supported.')
-def test_inconsistent_output_state_size():
-
-    class PlusOneRNNCell(keras.layers.Layer):
-        """Add one to the input and state.
-
-        This cell is used for testing state_size and output_size."""
-
-        def __init__(self, num_unit, **kwargs):
-            self.state_size = num_unit
-            super(PlusOneRNNCell, self).__init__(**kwargs)
-
-        def build(self, input_shape):
-            self.output_size = input_shape[-1]
-
-        def call(self, inputs, states):
-            return inputs + 1, [states[0] + 1]
-
-    batch = 32
-    time_step = 4
-    state_size = 5
-    input_size = 6
-    cell = PlusOneRNNCell(state_size)
-    x = keras.Input((None, input_size))
-    layer = recurrent.RNN(cell)
-    y = layer(x)
-
-    assert cell.state_size == state_size
-    init_state = layer.get_initial_state(x)
-    assert len(init_state) == 1
-    if K.backend() != 'theano':
-        # theano does not support static shape inference.
-        assert K.int_shape(init_state[0]) == (None, state_size)
-
-    model = keras.models.Model(x, y)
-    model.compile(optimizer='rmsprop', loss='mse')
-    model.train_on_batch(
-        np.zeros((batch, time_step, input_size)),
-        np.zeros((batch, input_size)))
-    assert model.output_shape == (None, input_size)
-
-
-if __name__ == '__main__':
-    pytest.main([__file__])
diff --git a/tests/keras/layers/wrappers_test.py b/tests/keras/layers/wrappers_test.py
deleted file mode 100644
index 7975d563c65..00000000000
--- a/tests/keras/layers/wrappers_test.py
+++ /dev/null
@@ -1,606 +0,0 @@
-import pytest
-import numpy as np
-import copy
-from numpy.testing import assert_allclose
-from keras.utils import CustomObjectScope
-from keras.layers import wrappers, Input, Layer
-from keras.layers import RNN
-from keras import layers
-from keras.models import Sequential, Model, model_from_json
-from keras import backend as K
-from keras.utils.generic_utils import object_list_uid, to_list
-
-
-def test_TimeDistributed():
-    # first, test with Dense layer
-    model = Sequential()
-    model.add(wrappers.TimeDistributed(layers.Dense(2), input_shape=(3, 4)))
-    model.add(layers.Activation('relu'))
-    model.compile(optimizer='rmsprop', loss='mse')
-    model.fit(np.random.random((10, 3, 4)), np.random.random((10, 3, 2)),
-              epochs=1,
-              batch_size=10)
-
-    # test config
-    model.get_config()
-
-    # test when specifying a batch_input_shape
-    test_input = np.random.random((1, 3, 4))
-    test_output = model.predict(test_input)
-    weights = model.layers[0].get_weights()
-
-    reference = Sequential()
-    reference.add(wrappers.TimeDistributed(layers.Dense(2),
-                                           batch_input_shape=(1, 3, 4)))
-    reference.add(layers.Activation('relu'))
-    reference.compile(optimizer='rmsprop', loss='mse')
-    reference.layers[0].set_weights(weights)
-
-    reference_output = reference.predict(test_input)
-    assert_allclose(test_output, reference_output, atol=1e-05)
-
-    # test with Embedding
-    model = Sequential()
-    model.add(wrappers.TimeDistributed(layers.Embedding(5, 6),
-                                       batch_input_shape=(10, 3, 4),
-                                       dtype='int32'))
-    model.compile(optimizer='rmsprop', loss='mse')
-    model.fit(np.random.randint(5, size=(10, 3, 4), dtype='int32'),
-              np.random.random((10, 3, 4, 6)), epochs=1, batch_size=10)
-
-    # compare to not using batch_input_shape
-    test_input = np.random.randint(5, size=(10, 3, 4), dtype='int32')
-    test_output = model.predict(test_input)
-    weights = model.layers[0].get_weights()
-
-    reference = Sequential()
-    reference.add(wrappers.TimeDistributed(layers.Embedding(5, 6),
-                                           input_shape=(3, 4), dtype='int32'))
-    reference.compile(optimizer='rmsprop', loss='mse')
-    reference.layers[0].set_weights(weights)
-
-    reference_output = reference.predict(test_input)
-    assert_allclose(test_output, reference_output, atol=1e-05)
-
-    # test with Conv2D
-    model = Sequential()
-    model.add(wrappers.TimeDistributed(layers.Conv2D(5, (2, 2),
-                                                     padding='same'),
-                                       input_shape=(2, 4, 4, 3)))
-    model.add(layers.Activation('relu'))
-    model.compile(optimizer='rmsprop', loss='mse')
-    model.train_on_batch(np.random.random((1, 2, 4, 4, 3)),
-                         np.random.random((1, 2, 4, 4, 5)))
-
-    model = model_from_json(model.to_json())
-    model.summary()
-
-    # test stacked layers
-    model = Sequential()
-    model.add(wrappers.TimeDistributed(layers.Dense(2), input_shape=(3, 4)))
-    model.add(wrappers.TimeDistributed(layers.Dense(3)))
-    model.add(layers.Activation('relu'))
-    model.compile(optimizer='rmsprop', loss='mse')
-
-    model.fit(np.random.random((10, 3, 4)), np.random.random((10, 3, 3)),
-              epochs=1, batch_size=10)
-
-    # test wrapping Sequential model
-    model = Sequential()
-    model.add(layers.Dense(3, input_dim=2))
-    outer_model = Sequential()
-    outer_model.add(wrappers.TimeDistributed(model, input_shape=(3, 2)))
-    outer_model.compile(optimizer='rmsprop', loss='mse')
-    outer_model.fit(np.random.random((10, 3, 2)), np.random.random((10, 3, 3)),
-                    epochs=1, batch_size=10)
-
-    # test with functional API
-    x = Input(shape=(3, 2))
-    y = wrappers.TimeDistributed(model)(x)
-    outer_model = Model(x, y)
-    outer_model.compile(optimizer='rmsprop', loss='mse')
-    outer_model.fit(np.random.random((10, 3, 2)), np.random.random((10, 3, 3)),
-                    epochs=1, batch_size=10)
-
-    # test with BatchNormalization
-    model = Sequential()
-    model.add(wrappers.TimeDistributed(
-        layers.BatchNormalization(center=True, scale=True),
-        name='bn', input_shape=(10, 2)))
-    model.compile(optimizer='rmsprop', loss='mse')
-    # Assert that mean and variance are 0 and 1.
-    td = model.layers[0]
-    assert np.array_equal(td.get_weights()[2], np.array([0, 0]))
-    assert np.array_equal(td.get_weights()[3], np.array([1, 1]))
-    # Train
-    model.train_on_batch(np.random.normal(loc=2, scale=2, size=(1, 10, 2)),
-                         np.broadcast_to(np.array([0, 1]), (1, 10, 2)))
-    # Assert that mean and variance changed.
-    assert not np.array_equal(td.get_weights()[2], np.array([0, 0]))
-    assert not np.array_equal(td.get_weights()[3], np.array([1, 1]))
-
-
-@pytest.mark.skipif((K.backend() == 'cntk'),
-                    reason='Flaky with CNTK backend')
-def test_TimeDistributed_learning_phase():
-    # test layers that need learning_phase to be set
-    np.random.seed(1234)
-    x = Input(shape=(3, 2))
-    y = wrappers.TimeDistributed(layers.Dropout(.999))(x, training=True)
-    model = Model(x, y)
-    y = model.predict(np.random.random((10, 3, 2)))
-    assert_allclose(np.mean(y), 0., atol=1e-1, rtol=1e-1)
-
-
-def test_TimeDistributed_trainable():
-    # test layers that need learning_phase to be set
-    x = Input(shape=(3, 2))
-    layer = wrappers.TimeDistributed(layers.BatchNormalization())
-    _ = layer(x)
-    assert len(layer.trainable_weights) == 2
-    layer.trainable = False
-    assert len(layer.trainable_weights) == 0
-    layer.trainable = True
-    assert len(layer.trainable_weights) == 2
-
-
-@pytest.mark.skipif((K.backend() == 'cntk'),
-                    reason='Unknown timestamps for RNN not supported in CNTK.')
-def test_TimeDistributed_with_masked_embedding_and_unspecified_shape():
-    # test with unspecified shape and Embeddings with mask_zero
-    model = Sequential()
-    model.add(wrappers.TimeDistributed(layers.Embedding(5, 6, mask_zero=True),
-                                       input_shape=(None, None)))
-    # the shape so far: (N, t_1, t_2, 6)
-    model.add(wrappers.TimeDistributed(layers.SimpleRNN(7, return_sequences=True)))
-    model.add(wrappers.TimeDistributed(layers.SimpleRNN(8, return_sequences=False)))
-    model.add(layers.SimpleRNN(1, return_sequences=False))
-    model.compile(optimizer='rmsprop', loss='mse')
-    model_input = np.random.randint(low=1, high=5, size=(10, 3, 4), dtype='int32')
-    for i in range(4):
-        model_input[i, i:, i:] = 0
-    model.fit(model_input,
-              np.random.random((10, 1)), epochs=1, batch_size=10)
-    mask_outputs = [model.layers[0].compute_mask(model.input)]
-    for layer in model.layers[1:]:
-        mask_outputs.append(layer.compute_mask(layer.input, mask_outputs[-1]))
-    func = K.function([model.input], mask_outputs[:-1])
-    mask_outputs_val = func([model_input])
-    ref_mask_val_0 = model_input > 0         # embedding layer
-    ref_mask_val_1 = ref_mask_val_0          # first RNN layer
-    ref_mask_val_2 = np.any(ref_mask_val_1, axis=-1)     # second RNN layer
-    ref_mask_val = [ref_mask_val_0, ref_mask_val_1, ref_mask_val_2]
-    for i in range(3):
-        assert np.array_equal(mask_outputs_val[i], ref_mask_val[i])
-    assert mask_outputs[-1] is None  # final layer
-
-
-def test_TimeDistributed_with_masking_layer():
-    # test with Masking layer
-    model = Sequential()
-    model.add(wrappers.TimeDistributed(layers.Masking(mask_value=0.,),
-                                       input_shape=(None, 4)))
-    model.add(wrappers.TimeDistributed(layers.Dense(5)))
-    model.compile(optimizer='rmsprop', loss='mse')
-    model_input = np.random.randint(low=1, high=5, size=(10, 3, 4))
-    for i in range(4):
-        model_input[i, i:, :] = 0.
-    model.compile(optimizer='rmsprop', loss='mse')
-    model.fit(model_input,
-              np.random.random((10, 3, 5)), epochs=1, batch_size=6)
-    mask_outputs = [model.layers[0].compute_mask(model.input)]
-    mask_outputs += [model.layers[1].compute_mask(model.layers[1].input,
-                                                  mask_outputs[-1])]
-    func = K.function([model.input], mask_outputs)
-    mask_outputs_val = func([model_input])
-    assert np.array_equal(mask_outputs_val[0], np.any(model_input, axis=-1))
-    assert np.array_equal(mask_outputs_val[1], np.any(model_input, axis=-1))
-
-
-def test_regularizers():
-    model = Sequential()
-    model.add(wrappers.TimeDistributed(
-        layers.Dense(2, kernel_regularizer='l1'), input_shape=(3, 4)))
-    model.add(layers.Activation('relu'))
-    model.compile(optimizer='rmsprop', loss='mse')
-    assert len(model.layers[0].layer.losses) == 1
-    assert len(model.layers[0].losses) == 1
-    assert len(model.layers[0].get_losses_for(None)) == 1
-    assert len(model.losses) == 1
-
-    model = Sequential()
-    model.add(wrappers.TimeDistributed(
-        layers.Dense(2, activity_regularizer='l1'), input_shape=(3, 4)))
-    model.add(layers.Activation('relu'))
-    model.compile(optimizer='rmsprop', loss='mse')
-    assert len(model.losses) == 1
-
-
-def test_Bidirectional():
-    rnn = layers.SimpleRNN
-    samples = 2
-    dim = 2
-    timesteps = 2
-    output_dim = 2
-    dropout_rate = 0.2
-    for mode in ['sum', 'concat']:
-        x = np.random.random((samples, timesteps, dim))
-        target_dim = 2 * output_dim if mode == 'concat' else output_dim
-        y = np.random.random((samples, target_dim))
-
-        # test with Sequential model
-        model = Sequential()
-        model.add(wrappers.Bidirectional(rnn(output_dim, dropout=dropout_rate,
-                                             recurrent_dropout=dropout_rate),
-                                         merge_mode=mode,
-                                         input_shape=(timesteps, dim)))
-        model.compile(loss='mse', optimizer='sgd')
-        model.fit(x, y, epochs=1, batch_size=1)
-
-        # test config
-        model.get_config()
-        model = model_from_json(model.to_json())
-        model.summary()
-
-        # test stacked bidirectional layers
-        model = Sequential()
-        model.add(wrappers.Bidirectional(rnn(output_dim,
-                                             return_sequences=True),
-                                         merge_mode=mode,
-                                         input_shape=(timesteps, dim)))
-        model.add(wrappers.Bidirectional(rnn(output_dim), merge_mode=mode))
-        model.compile(loss='mse', optimizer='sgd')
-        model.fit(x, y, epochs=1, batch_size=1)
-
-        # Bidirectional and stateful
-        inputs = Input(batch_shape=(1, timesteps, dim))
-        outputs = wrappers.Bidirectional(rnn(output_dim, stateful=True),
-                                         merge_mode=mode)(inputs)
-        model = Model(inputs, outputs)
-        model.compile(loss='mse', optimizer='sgd')
-        model.fit(x, y, epochs=1, batch_size=1)
-
-
-@pytest.mark.skipif((K.backend() == 'cntk'),
-                    reason='Unknown timestamps not supported in CNTK.')
-def test_Bidirectional_dynamic_timesteps():
-    # test with functional API with dynamic length
-    rnn = layers.SimpleRNN
-    samples = 2
-    dim = 2
-    timesteps = 2
-    output_dim = 2
-    dropout_rate = 0.2
-    for mode in ['sum', 'concat']:
-        x = np.random.random((samples, timesteps, dim))
-        target_dim = 2 * output_dim if mode == 'concat' else output_dim
-        y = np.random.random((samples, target_dim))
-
-        inputs = Input((None, dim))
-        outputs = wrappers.Bidirectional(rnn(output_dim, dropout=dropout_rate,
-                                             recurrent_dropout=dropout_rate),
-                                         merge_mode=mode)(inputs)
-        model = Model(inputs, outputs)
-        model.compile(loss='mse', optimizer='sgd')
-        model.fit(x, y, epochs=1, batch_size=1)
-
-
-@pytest.mark.parametrize('merge_mode', ['sum', 'mul', 'ave', 'concat', None])
-def test_Bidirectional_merged_value(merge_mode):
-    rnn = layers.LSTM
-    samples = 2
-    dim = 5
-    timesteps = 3
-    units = 3
-    X = [np.random.rand(samples, timesteps, dim)]
-
-    if merge_mode == 'sum':
-        merge_func = lambda y, y_rev: y + y_rev
-    elif merge_mode == 'mul':
-        merge_func = lambda y, y_rev: y * y_rev
-    elif merge_mode == 'ave':
-        merge_func = lambda y, y_rev: (y + y_rev) / 2
-    elif merge_mode == 'concat':
-        merge_func = lambda y, y_rev: np.concatenate((y, y_rev), axis=-1)
-    else:
-        merge_func = lambda y, y_rev: [y, y_rev]
-
-    # basic case
-    inputs = Input((timesteps, dim))
-    layer = wrappers.Bidirectional(rnn(units, return_sequences=True),
-                                   merge_mode=merge_mode)
-    f_merged = K.function([inputs], to_list(layer(inputs)))
-    f_forward = K.function([inputs], [layer.forward_layer(inputs)])
-    f_backward = K.function([inputs],
-                            [K.reverse(layer.backward_layer(inputs), 1)])
-
-    y_merged = f_merged(X)
-    y_expected = to_list(merge_func(f_forward(X)[0], f_backward(X)[0]))
-    assert len(y_merged) == len(y_expected)
-    for x1, x2 in zip(y_merged, y_expected):
-        assert_allclose(x1, x2, atol=1e-5)
-
-    # test return_state
-    inputs = Input((timesteps, dim))
-    layer = wrappers.Bidirectional(rnn(units, return_state=True),
-                                   merge_mode=merge_mode)
-    f_merged = K.function([inputs], layer(inputs))
-    f_forward = K.function([inputs], layer.forward_layer(inputs))
-    f_backward = K.function([inputs], layer.backward_layer(inputs))
-    n_states = len(layer.layer.states)
-
-    y_merged = f_merged(X)
-    y_forward = f_forward(X)
-    y_backward = f_backward(X)
-    y_expected = to_list(merge_func(y_forward[0], y_backward[0]))
-    assert len(y_merged) == len(y_expected) + n_states * 2
-    for x1, x2 in zip(y_merged, y_expected):
-        assert_allclose(x1, x2, atol=1e-5)
-
-    # test if the state of a BiRNN is the concatenation of the underlying RNNs
-    y_merged = y_merged[-n_states * 2:]
-    y_forward = y_forward[-n_states:]
-    y_backward = y_backward[-n_states:]
-    for state_birnn, state_inner in zip(y_merged, y_forward + y_backward):
-        assert_allclose(state_birnn, state_inner, atol=1e-5)
-
-
-@pytest.mark.skipif(K.backend() == 'theano', reason='Not supported.')
-@pytest.mark.parametrize('merge_mode', ['sum', 'concat', None])
-def test_Bidirectional_dropout(merge_mode):
-    rnn = layers.LSTM
-    samples = 2
-    dim = 5
-    timesteps = 3
-    units = 3
-    X = [np.random.rand(samples, timesteps, dim)]
-
-    inputs = Input((timesteps, dim))
-    wrapped = wrappers.Bidirectional(rnn(units, dropout=0.2, recurrent_dropout=0.2),
-                                     merge_mode=merge_mode)
-    outputs = to_list(wrapped(inputs, training=True))
-
-    inputs = Input((timesteps, dim))
-    wrapped = wrappers.Bidirectional(rnn(units, dropout=0.2, return_state=True),
-                                     merge_mode=merge_mode)
-    outputs = to_list(wrapped(inputs))
-
-    model = Model(inputs, outputs)
-
-    y1 = to_list(model.predict(X))
-    y2 = to_list(model.predict(X))
-    for x1, x2 in zip(y1, y2):
-        assert_allclose(x1, x2, atol=1e-5)
-
-
-def test_Bidirectional_state_reuse():
-    rnn = layers.LSTM
-    samples = 2
-    dim = 5
-    timesteps = 3
-    units = 3
-
-    input1 = Input((timesteps, dim))
-    layer = wrappers.Bidirectional(rnn(units, return_state=True,
-                                       return_sequences=True))
-    state = layer(input1)[1:]
-
-    # test passing invalid initial_state: passing a tensor
-    input2 = Input((timesteps, dim))
-    with pytest.raises(ValueError):
-        output = wrappers.Bidirectional(rnn(units))(input2, initial_state=state[0])
-
-    # test valid usage: passing a list
-    output = wrappers.Bidirectional(rnn(units))(input2, initial_state=state)
-    model = Model([input1, input2], output)
-    assert len(model.layers) == 4
-    assert isinstance(model.layers[-1].input, list)
-    inputs = [np.random.rand(samples, timesteps, dim),
-              np.random.rand(samples, timesteps, dim)]
-    outputs = model.predict(inputs)
-
-
-def DISABLED_test_Bidirectional_with_constants():
-    class RNNCellWithConstants(Layer):
-        def __init__(self, units, **kwargs):
-            self.units = units
-            self.state_size = units
-            super(RNNCellWithConstants, self).__init__(**kwargs)
-
-        def build(self, input_shape):
-            if not isinstance(input_shape, list):
-                raise TypeError('expects constants shape')
-            [input_shape, constant_shape] = input_shape
-            # will (and should) raise if more than one constant passed
-
-            self.input_kernel = self.add_weight(
-                shape=(input_shape[-1], self.units),
-                initializer='uniform',
-                name='kernel')
-            self.recurrent_kernel = self.add_weight(
-                shape=(self.units, self.units),
-                initializer='uniform',
-                name='recurrent_kernel')
-            self.constant_kernel = self.add_weight(
-                shape=(constant_shape[-1], self.units),
-                initializer='uniform',
-                name='constant_kernel')
-            self.built = True
-
-        def call(self, inputs, states, constants):
-            [prev_output] = states
-            [constant] = constants
-            h_input = K.dot(inputs, self.input_kernel)
-            h_state = K.dot(prev_output, self.recurrent_kernel)
-            h_const = K.dot(constant, self.constant_kernel)
-            output = h_input + h_state + h_const
-            return output, [output]
-
-        def get_config(self):
-            config = {'units': self.units}
-            base_config = super(RNNCellWithConstants, self).get_config()
-            return dict(list(base_config.items()) + list(config.items()))
-
-    # Test basic case.
-    x = Input((5, 5))
-    c = Input((3,))
-    cell = RNNCellWithConstants(32)
-    custom_objects = {'RNNCellWithConstants': RNNCellWithConstants}
-    with CustomObjectScope(custom_objects):
-        layer = wrappers.Bidirectional(RNN(cell))
-    y = layer(x, constants=c)
-    model = Model([x, c], y)
-    model.compile(optimizer='rmsprop', loss='mse')
-    model.train_on_batch(
-        [np.zeros((6, 5, 5)), np.zeros((6, 3))],
-        np.zeros((6, 64))
-    )
-
-    # Test basic case serialization.
-    x_np = np.random.random((6, 5, 5))
-    c_np = np.random.random((6, 3))
-    y_np = model.predict([x_np, c_np])
-    weights = model.get_weights()
-    config = layer.get_config()
-    with CustomObjectScope(custom_objects):
-        layer = wrappers.Bidirectional.from_config(copy.deepcopy(config))
-    y = layer(x, constants=c)
-    model = Model([x, c], y)
-    model.set_weights(weights)
-    y_np_2 = model.predict([x_np, c_np])
-    assert_allclose(y_np, y_np_2, atol=1e-4)
-
-    # test flat list inputs
-    with CustomObjectScope(custom_objects):
-        layer = wrappers.Bidirectional.from_config(copy.deepcopy(config))
-    y = layer([x, c])
-    model = Model([x, c], y)
-    model.set_weights(weights)
-    y_np_3 = model.predict([x_np, c_np])
-    assert_allclose(y_np, y_np_3, atol=1e-4)
-
-
-def DISABLED_test_Bidirectional_with_constants_layer_passing_initial_state():
-    class RNNCellWithConstants(Layer):
-        def __init__(self, units, **kwargs):
-            self.units = units
-            self.state_size = units
-            super(RNNCellWithConstants, self).__init__(**kwargs)
-
-        def build(self, input_shape):
-            if not isinstance(input_shape, list):
-                raise TypeError('expects constants shape')
-            [input_shape, constant_shape] = input_shape
-            # will (and should) raise if more than one constant passed
-
-            self.input_kernel = self.add_weight(
-                shape=(input_shape[-1], self.units),
-                initializer='uniform',
-                name='kernel')
-            self.recurrent_kernel = self.add_weight(
-                shape=(self.units, self.units),
-                initializer='uniform',
-                name='recurrent_kernel')
-            self.constant_kernel = self.add_weight(
-                shape=(constant_shape[-1], self.units),
-                initializer='uniform',
-                name='constant_kernel')
-            self.built = True
-
-        def call(self, inputs, states, constants):
-            [prev_output] = states
-            [constant] = constants
-            h_input = K.dot(inputs, self.input_kernel)
-            h_state = K.dot(prev_output, self.recurrent_kernel)
-            h_const = K.dot(constant, self.constant_kernel)
-            output = h_input + h_state + h_const
-            return output, [output]
-
-        def get_config(self):
-            config = {'units': self.units}
-            base_config = super(RNNCellWithConstants, self).get_config()
-            return dict(list(base_config.items()) + list(config.items()))
-
-    # Test basic case.
-    x = Input((5, 5))
-    c = Input((3,))
-    s_for = Input((32,))
-    s_bac = Input((32,))
-    cell = RNNCellWithConstants(32)
-    custom_objects = {'RNNCellWithConstants': RNNCellWithConstants}
-    with CustomObjectScope(custom_objects):
-        layer = wrappers.Bidirectional(RNN(cell))
-    y = layer(x, initial_state=[s_for, s_bac], constants=c)
-    model = Model([x, s_for, s_bac, c], y)
-    model.compile(optimizer='rmsprop', loss='mse')
-    model.train_on_batch(
-        [np.zeros((6, 5, 5)), np.zeros((6, 32)),
-         np.zeros((6, 32)), np.zeros((6, 3))],
-        np.zeros((6, 64))
-    )
-
-    # Test basic case serialization.
-    x_np = np.random.random((6, 5, 5))
-    s_fw_np = np.random.random((6, 32))
-    s_bk_np = np.random.random((6, 32))
-    c_np = np.random.random((6, 3))
-    y_np = model.predict([x_np, s_fw_np, s_bk_np, c_np])
-    weights = model.get_weights()
-    config = layer.get_config()
-    with CustomObjectScope(custom_objects):
-        layer = wrappers.Bidirectional.from_config(copy.deepcopy(config))
-    y = layer(x, initial_state=[s_for, s_bac], constants=c)
-    model = Model([x, s_for, s_bac, c], y)
-    model.set_weights(weights)
-    y_np_2 = model.predict([x_np, s_fw_np, s_bk_np, c_np])
-    assert_allclose(y_np, y_np_2, atol=1e-4)
-
-    # verify that state is used
-    y_np_2_different_s = model.predict([x_np, s_fw_np + 10., s_bk_np + 10., c_np])
-    with pytest.raises(AssertionError):
-        assert_allclose(y_np, y_np_2_different_s, atol=1e-4)
-
-    # test flat list inputs
-    with CustomObjectScope(custom_objects):
-        layer = wrappers.Bidirectional.from_config(copy.deepcopy(config))
-    y = layer([x, s_for, s_bac, c])
-    model = Model([x, s_for, s_bac, c], y)
-    model.set_weights(weights)
-    y_np_3 = model.predict([x_np, s_fw_np, s_bk_np, c_np])
-    assert_allclose(y_np, y_np_3, atol=1e-4)
-
-
-def test_Bidirectional_trainable():
-    # test layers that need learning_phase to be set
-    x = Input(shape=(3, 2))
-    layer = wrappers.Bidirectional(layers.SimpleRNN(3))
-    _ = layer(x)
-    assert len(layer.trainable_weights) == 6
-    layer.trainable = False
-    assert len(layer.trainable_weights) == 0
-    layer.trainable = True
-    assert len(layer.trainable_weights) == 6
-
-
-def test_Bidirectional_updates():
-    x = Input(shape=(3, 2))
-    layer = wrappers.Bidirectional(layers.SimpleRNN(3))
-    layer.forward_layer.add_update(0, inputs=x)
-    layer.forward_layer.add_update(1, inputs=None)
-    layer.backward_layer.add_update(0, inputs=x)
-    layer.backward_layer.add_update(1, inputs=None)
-
-
-def test_Bidirectional_losses():
-    x = Input(shape=(3, 2))
-    layer = wrappers.Bidirectional(
-        layers.SimpleRNN(3, kernel_regularizer='l1', bias_regularizer='l1'))
-    _ = layer(x)
-    layer.forward_layer.add_loss(lambda: 0)
-    layer.forward_layer.add_loss(lambda: 1)
-    layer.backward_layer.add_loss(lambda: 0)
-    layer.backward_layer.add_loss(lambda: 1)
-
-
-if __name__ == '__main__':
-    pytest.main([__file__])
diff --git a/tests/keras/losses_test.py b/tests/keras/losses_test.py
deleted file mode 100644
index 3693a3fb600..00000000000
--- a/tests/keras/losses_test.py
+++ /dev/null
@@ -1,765 +0,0 @@
-import pytest
-import numpy as np
-
-import keras
-from keras import losses
-from keras.losses import Reduction
-from keras import backend as K
-from keras.utils import custom_object_scope
-
-
-all_functions = [losses.mean_squared_error,
-                 losses.mean_absolute_error,
-                 losses.mean_absolute_percentage_error,
-                 losses.mean_squared_logarithmic_error,
-                 losses.squared_hinge,
-                 losses.hinge,
-                 losses.categorical_crossentropy,
-                 losses.binary_crossentropy,
-                 losses.kullback_leibler_divergence,
-                 losses.poisson,
-                 losses.cosine_similarity,
-                 losses.logcosh,
-                 losses.categorical_hinge]
-all_classes = [
-    losses.Hinge,
-    losses.SquaredHinge,
-    losses.CategoricalHinge,
-    losses.Poisson,
-    losses.LogCosh,
-    losses.KLDivergence,
-    losses.Huber,
-    # losses.SparseCategoricalCrossentropy,
-    losses.BinaryCrossentropy,
-    losses.MeanSquaredLogarithmicError,
-    losses.MeanAbsolutePercentageError,
-    losses.MeanAbsoluteError,
-    losses.MeanSquaredError,
-]
-
-
-class MSE_MAE_loss(losses.Loss):
-    """Loss function with internal state, for testing serialization code."""
-
-    def __init__(self, mse_fraction):
-        self.mse_fraction = mse_fraction
-        super(MSE_MAE_loss, self).__init__()
-
-    def __call__(self, y_true, y_pred, sample_weight=None):
-        return (self.mse_fraction * losses.mse(y_true, y_pred) +
-                (1 - self.mse_fraction) * losses.mae(y_true, y_pred))
-
-    def get_config(self):
-        return {'mse_fraction': self.mse_fraction}
-
-
-class TestLossFunctions(object):
-
-    @pytest.mark.parametrize('loss_fn', all_functions)
-    def test_objective_shapes_3d(self, loss_fn):
-        y_a = K.variable(np.random.random((5, 6, 7)))
-        y_b = K.variable(np.random.random((5, 6, 7)))
-        objective_output = loss_fn(y_a, y_b)
-        assert K.eval(objective_output).shape == (5, 6)
-
-    @pytest.mark.parametrize('loss_fn', all_functions)
-    def test_objective_shapes_2d(self, loss_fn):
-        y_a = K.variable(np.random.random((6, 7)))
-        y_b = K.variable(np.random.random((6, 7)))
-        objective_output = loss_fn(y_a, y_b)
-        assert K.eval(objective_output).shape == (6,)
-
-    def test_cce_one_hot(self):
-        y_a = K.variable(np.random.randint(0, 7, (5, 6)))
-        y_b = K.variable(np.random.random((5, 6, 7)))
-        objective_output = losses.sparse_categorical_crossentropy(y_a, y_b)
-        assert K.eval(objective_output).shape == (5, 6)
-
-        y_a = K.variable(np.random.randint(0, 7, (6,)))
-        y_b = K.variable(np.random.random((6, 7)))
-        assert K.eval(losses.sparse_categorical_crossentropy(y_a, y_b)).shape == (6,)
-
-    def test_categorical_hinge(self):
-        y_pred = K.variable(np.array([[0.3, 0.2, 0.1],
-                                      [0.1, 0.2, 0.7]]))
-        y_true = K.variable(np.array([[0, 1, 0],
-                                      [1, 0, 0]]))
-        expected_loss = ((0.3 - 0.2 + 1) + (0.7 - 0.1 + 1)) / 2.0
-        loss = K.eval(losses.categorical_hinge(y_true, y_pred))
-        assert np.isclose(expected_loss, np.mean(loss))
-
-    def test_sparse_categorical_crossentropy(self):
-        y_pred = K.variable(np.array([[0.3, 0.6, 0.1],
-                                      [0.1, 0.2, 0.7]]))
-        y_true = K.variable(np.array([1, 2]))
-        expected_loss = - (np.log(0.6) + np.log(0.7)) / 2
-        loss = K.eval(losses.sparse_categorical_crossentropy(y_true, y_pred))
-        assert np.isclose(expected_loss, np.mean(loss))
-
-    def test_sparse_categorical_crossentropy_4d(self):
-        y_pred = K.variable(np.array([[[[0.7, 0.1, 0.2],
-                                        [0.0, 0.3, 0.7],
-                                        [0.1, 0.1, 0.8]],
-                                       [[0.3, 0.7, 0.0],
-                                        [0.3, 0.4, 0.3],
-                                        [0.2, 0.5, 0.3]],
-                                       [[0.8, 0.1, 0.1],
-                                        [1.0, 0.0, 0.0],
-                                        [0.4, 0.3, 0.3]]]]))
-        y_true = K.variable(np.array([[[0, 1, 0],
-                                       [2, 1, 0],
-                                       [2, 2, 1]]]))
-        expected_loss = - (np.log(0.7) + np.log(0.3) + np.log(0.1) +
-                           np.log(K.epsilon()) + np.log(0.4) + np.log(0.2) +
-                           np.log(0.1) + np.log(K.epsilon()) + np.log(0.3)) / 9
-        loss = K.eval(losses.sparse_categorical_crossentropy(y_true, y_pred))
-        assert np.isclose(expected_loss, np.mean(loss))
-
-    def test_serializing_loss_class(self):
-        orig_loss_class = MSE_MAE_loss(0.3)
-        with custom_object_scope({'MSE_MAE_loss': MSE_MAE_loss}):
-            serialized = losses.serialize(orig_loss_class)
-
-        with custom_object_scope({'MSE_MAE_loss': MSE_MAE_loss}):
-            deserialized = losses.deserialize(serialized)
-        assert isinstance(deserialized, MSE_MAE_loss)
-        assert deserialized.mse_fraction == 0.3
-
-    def test_serializing_model_with_loss_class(self, tmpdir):
-        model_filename = str(tmpdir / 'custom_loss.hdf')
-
-        with custom_object_scope({'MSE_MAE_loss': MSE_MAE_loss}):
-            loss = MSE_MAE_loss(0.3)
-            inputs = keras.layers.Input((2,))
-            outputs = keras.layers.Dense(1, name='model_output')(inputs)
-            model = keras.models.Model(inputs, outputs)
-            model.compile(optimizer='sgd', loss={'model_output': loss})
-            model.fit(np.random.rand(256, 2), np.random.rand(256, 1))
-            model.save(model_filename)
-
-        with custom_object_scope({'MSE_MAE_loss': MSE_MAE_loss}):
-            loaded_model = keras.models.load_model(model_filename)
-            loaded_model.predict(np.random.rand(128, 2))
-
-
-skipif_not_tf = pytest.mark.skipif(
-    K.backend() != 'tensorflow',
-    reason='Need TensorFlow to __call__ a loss')
-
-
-class TestLossClasses(object):
-
-    @pytest.mark.parametrize('cls', all_classes)
-    def test_objective_shapes_3d(self, cls):
-        y_a = K.variable(np.random.random((5, 6, 7)))
-        y_b = K.variable(np.random.random((5, 6, 7)))
-        sw = K.variable(np.random.random((5, 6)))
-        obj_fn = cls(name='test')
-        objective_output = obj_fn(y_a, y_b, sample_weight=sw)
-        assert K.eval(objective_output).shape == ()
-
-    @pytest.mark.parametrize('cls', all_classes)
-    def test_objective_shapes_2d(self, cls):
-        y_a = K.variable(np.random.random((6, 7)))
-        y_b = K.variable(np.random.random((6, 7)))
-        sw = K.variable(np.random.random((6,)))
-        obj_fn = cls(name='test')
-        objective_output = obj_fn(y_a, y_b, sample_weight=sw)
-        assert K.eval(objective_output).shape == ()
-
-
-@skipif_not_tf
-class TestMeanSquaredError:
-
-    def test_config(self):
-        mse_obj = losses.MeanSquaredError(
-            reduction=Reduction.SUM, name='mse_1')
-        assert mse_obj.name == 'mse_1'
-        assert mse_obj.reduction == Reduction.SUM
-
-    def test_all_correct_unweighted(self):
-        mse_obj = losses.MeanSquaredError()
-        y_true = K.constant([4, 8, 12, 8, 1, 3], shape=(2, 3))
-        loss = mse_obj(y_true, y_true)
-        assert np.isclose(K.eval(loss), 0.0)
-
-    def test_unweighted(self):
-        mse_obj = losses.MeanSquaredError()
-        y_true = K.constant([1, 9, 2, -5, -2, 6], shape=(2, 3))
-        y_pred = K.constant([4, 8, 12, 8, 1, 3], shape=(2, 3))
-        loss = mse_obj(y_true, y_pred)
-        assert np.isclose(K.eval(loss), 49.5, atol=1e-3)
-
-    def test_scalar_weighted(self):
-        mse_obj = losses.MeanSquaredError()
-        y_true = K.constant([1, 9, 2, -5, -2, 6], shape=(2, 3))
-        y_pred = K.constant([4, 8, 12, 8, 1, 3], shape=(2, 3))
-        loss = mse_obj(y_true, y_pred, sample_weight=2.3)
-        assert np.isclose(K.eval(loss), 113.85, atol=1e-3)
-
-    def test_sample_weighted(self):
-        mse_obj = losses.MeanSquaredError()
-        y_true = K.constant([1, 9, 2, -5, -2, 6], shape=(2, 3))
-        y_pred = K.constant([4, 8, 12, 8, 1, 3], shape=(2, 3))
-        sample_weight = K.constant([1.2, 3.4], shape=(2, 1))
-        loss = mse_obj(y_true, y_pred, sample_weight=sample_weight)
-        assert np.isclose(K.eval(loss), 767.8 / 6, atol=1e-3)
-
-    def test_timestep_weighted(self):
-        mse_obj = losses.MeanSquaredError()
-        y_true = K.constant([1, 9, 2, -5, -2, 6], shape=(2, 3, 1))
-        y_pred = K.constant([4, 8, 12, 8, 1, 3], shape=(2, 3, 1))
-        sample_weight = K.constant([3, 6, 5, 0, 4, 2], shape=(2, 3))
-        loss = mse_obj(y_true, y_pred, sample_weight=sample_weight)
-        assert np.isclose(K.eval(loss), 97.833, atol=1e-3)
-
-    def test_zero_weighted(self):
-        mse_obj = losses.MeanSquaredError()
-        y_true = K.constant([1, 9, 2, -5, -2, 6], shape=(2, 3))
-        y_pred = K.constant([4, 8, 12, 8, 1, 3], shape=(2, 3))
-        loss = mse_obj(y_true, y_pred, sample_weight=0)
-        assert np.isclose(K.eval(loss), 0.0)
-
-    def test_invalid_sample_weight(self):
-        mse_obj = losses.MeanSquaredError()
-        y_true = K.constant([1, 9, 2, -5, -2, 6], shape=(2, 3, 1))
-        y_pred = K.constant([4, 8, 12, 8, 1, 3], shape=(2, 3, 1))
-        sample_weight = K.constant([3, 6, 5, 0], shape=(2, 2))
-        with pytest.raises(Exception):
-            mse_obj(y_true, y_pred, sample_weight=sample_weight)
-
-    def test_no_reduction(self):
-        mse_obj = losses.MeanSquaredError(
-            reduction=Reduction.NONE)
-        y_true = K.constant([1, 9, 2, -5, -2, 6], shape=(2, 3))
-        y_pred = K.constant([4, 8, 12, 8, 1, 3], shape=(2, 3))
-        loss = mse_obj(y_true, y_pred, sample_weight=2.3)
-        assert np.allclose(K.eval(loss), [84.3333, 143.3666], atol=1e-3)
-
-    def test_sum_reduction(self):
-        mse_obj = losses.MeanSquaredError(
-            reduction=Reduction.SUM)
-        y_true = K.constant([1, 9, 2, -5, -2, 6], shape=(2, 3))
-        y_pred = K.constant([4, 8, 12, 8, 1, 3], shape=(2, 3))
-        loss = mse_obj(y_true, y_pred, sample_weight=2.3)
-        assert np.isclose(K.eval(loss), 227.69998, atol=1e-3)
-
-
-@skipif_not_tf
-class TestMeanAbsoluteError(object):
-
-    def test_config(self):
-        mae_obj = losses.MeanAbsoluteError(
-            reduction=Reduction.SUM, name='mae_1')
-        assert mae_obj.name == 'mae_1'
-        assert mae_obj.reduction == Reduction.SUM
-
-    def test_all_correct_unweighted(self):
-        mae_obj = losses.MeanAbsoluteError()
-        y_true = K.constant([4, 8, 12, 8, 1, 3], shape=(2, 3))
-        loss = mae_obj(y_true, y_true)
-        assert np.isclose(K.eval(loss), 0.0, atol=1e-3)
-
-    def test_unweighted(self):
-        mae_obj = losses.MeanAbsoluteError()
-        y_true = K.constant([1, 9, 2, -5, -2, 6], shape=(2, 3))
-        y_pred = K.constant([4, 8, 12, 8, 1, 3], shape=(2, 3))
-        loss = mae_obj(y_true, y_pred)
-        assert np.isclose(K.eval(loss), 5.5, atol=1e-3)
-
-    def test_scalar_weighted(self):
-        mae_obj = losses.MeanAbsoluteError()
-        y_true = K.constant([1, 9, 2, -5, -2, 6], shape=(2, 3))
-        y_pred = K.constant([4, 8, 12, 8, 1, 3], shape=(2, 3))
-        loss = mae_obj(y_true, y_pred, sample_weight=2.3)
-        assert np.isclose(K.eval(loss), 12.65, atol=1e-3)
-
-    def test_sample_weighted(self):
-        mae_obj = losses.MeanAbsoluteError()
-        y_true = K.constant([1, 9, 2, -5, -2, 6], shape=(2, 3))
-        y_pred = K.constant([4, 8, 12, 8, 1, 3], shape=(2, 3))
-        sample_weight = K.constant([1.2, 3.4], shape=(2, 1))
-        loss = mae_obj(y_true, y_pred, sample_weight=sample_weight)
-        assert np.isclose(K.eval(loss), 81.4 / 6, atol=1e-3)
-
-    def test_timestep_weighted(self):
-        mae_obj = losses.MeanAbsoluteError()
-        y_true = K.constant([1, 9, 2, -5, -2, 6], shape=(2, 3, 1))
-        y_pred = K.constant([4, 8, 12, 8, 1, 3], shape=(2, 3, 1))
-        sample_weight = K.constant([3, 6, 5, 0, 4, 2], shape=(2, 3))
-        loss = mae_obj(y_true, y_pred, sample_weight=sample_weight)
-        assert np.isclose(K.eval(loss), 13.833, atol=1e-3)
-
-    def test_zero_weighted(self):
-        mae_obj = losses.MeanAbsoluteError()
-        y_true = K.constant([1, 9, 2, -5, -2, 6], shape=(2, 3))
-        y_pred = K.constant([4, 8, 12, 8, 1, 3], shape=(2, 3))
-        loss = mae_obj(y_true, y_pred, sample_weight=0)
-        assert np.isclose(K.eval(loss), 0.0, atol=1e-3)
-
-    def test_invalid_sample_weight(self):
-        mae_obj = losses.MeanAbsoluteError()
-        y_true = K.constant([1, 9, 2, -5, -2, 6], shape=(2, 3, 1))
-        y_pred = K.constant([4, 8, 12, 8, 1, 3], shape=(2, 3, 1))
-        sample_weight = K.constant([3, 6, 5, 0], shape=(2, 2))
-        with pytest.raises(Exception):
-            mae_obj(y_true, y_pred, sample_weight=sample_weight)
-
-    def test_no_reduction(self):
-        mae_obj = losses.MeanAbsoluteError(
-            reduction=Reduction.NONE)
-        y_true = K.constant([1, 9, 2, -5, -2, 6], shape=(2, 3))
-        y_pred = K.constant([4, 8, 12, 8, 1, 3], shape=(2, 3))
-        loss = mae_obj(y_true, y_pred, sample_weight=2.3)
-        assert np.allclose(K.eval(loss), [10.7333, 14.5666], atol=1e-3)
-
-    def test_sum_reduction(self):
-        mae_obj = losses.MeanAbsoluteError(
-            reduction=Reduction.SUM)
-        y_true = K.constant([1, 9, 2, -5, -2, 6], shape=(2, 3))
-        y_pred = K.constant([4, 8, 12, 8, 1, 3], shape=(2, 3))
-        loss = mae_obj(y_true, y_pred, sample_weight=2.3)
-        assert np.isclose(K.eval(loss), 25.29999, atol=1e-3)
-
-
-@skipif_not_tf
-class TestMeanAbsolutePercentageError(object):
-
-    def test_config(self):
-        mape_obj = losses.MeanAbsolutePercentageError(
-            reduction=Reduction.SUM, name='mape_1')
-        assert mape_obj.name == 'mape_1'
-        assert mape_obj.reduction == Reduction.SUM
-
-    def test_all_correct_unweighted(self):
-        mape_obj = losses.MeanAbsolutePercentageError()
-        y_true = K.constant([4, 8, 12, 8, 1, 3], shape=(2, 3))
-        loss = mape_obj(y_true, y_true)
-        assert np.allclose(K.eval(loss), 0.0, atol=1e-3)
-
-    def test_unweighted(self):
-        mape_obj = losses.MeanAbsolutePercentageError()
-        y_true = K.constant([1, 9, 2, -5, -2, 6], shape=(2, 3))
-        y_pred = K.constant([4, 8, 12, 8, 1, 3], shape=(2, 3))
-        loss = mape_obj(y_true, y_pred)
-        assert np.allclose(K.eval(loss), 211.8518, atol=1e-3)
-
-    def test_scalar_weighted(self):
-        mape_obj = losses.MeanAbsolutePercentageError()
-        y_true = K.constant([1, 9, 2, -5, -2, 6], shape=(2, 3))
-        y_pred = K.constant([4, 8, 12, 8, 1, 3], shape=(2, 3))
-        loss = mape_obj(y_true, y_pred, sample_weight=2.3)
-        assert np.allclose(K.eval(loss), 487.259, atol=1e-3)
-
-    def test_sample_weighted(self):
-        mape_obj = losses.MeanAbsolutePercentageError()
-        y_true = K.constant([1, 9, 2, -5, -2, 6], shape=(2, 3))
-        y_pred = K.constant([4, 8, 12, 8, 1, 3], shape=(2, 3))
-        sample_weight = K.constant([1.2, 3.4], shape=(2, 1))
-        loss = mape_obj(y_true, y_pred, sample_weight=sample_weight)
-        assert np.allclose(K.eval(loss), 422.8888, atol=1e-3)
-
-    def test_timestep_weighted(self):
-        mape_obj = losses.MeanAbsolutePercentageError()
-        y_true = K.constant([1, 9, 2, -5, -2, 6], shape=(2, 3, 1))
-        y_pred = K.constant([4, 8, 12, 8, 1, 3], shape=(2, 3, 1))
-        sample_weight = K.constant([3, 6, 5, 0, 4, 2], shape=(2, 3))
-        loss = mape_obj(y_true, y_pred, sample_weight=sample_weight)
-        assert np.allclose(K.eval(loss), 694.4445, atol=1e-3)
-
-    def test_zero_weighted(self):
-        mape_obj = losses.MeanAbsolutePercentageError()
-        y_true = K.constant([1, 9, 2, -5, -2, 6], shape=(2, 3))
-        y_pred = K.constant([4, 8, 12, 8, 1, 3], shape=(2, 3))
-        loss = mape_obj(y_true, y_pred, sample_weight=0)
-        assert np.allclose(K.eval(loss), 0.0, atol=1e-3)
-
-    def test_no_reduction(self):
-        mape_obj = losses.MeanAbsolutePercentageError(
-            reduction=Reduction.NONE)
-        y_true = K.constant([1, 9, 2, -5, -2, 6], shape=(2, 3))
-        y_pred = K.constant([4, 8, 12, 8, 1, 3], shape=(2, 3))
-        loss = mape_obj(y_true, y_pred, sample_weight=2.3)
-        assert np.allclose(K.eval(loss), [621.8518, 352.6666], atol=1e-3)
-
-
-@skipif_not_tf
-class TestMeanSquaredLogarithmicError(object):
-
-    def test_config(self):
-        msle_obj = losses.MeanSquaredLogarithmicError(
-            reduction=Reduction .SUM, name='mape_1')
-        assert msle_obj.name == 'mape_1'
-        assert msle_obj.reduction == Reduction .SUM
-
-    def test_unweighted(self):
-        msle_obj = losses.MeanSquaredLogarithmicError()
-        y_true = K.constant([1, 9, 2, -5, -2, 6], shape=(2, 3))
-        y_pred = K.constant([4, 8, 12, 8, 1, 3], shape=(2, 3))
-        loss = msle_obj(y_true, y_pred)
-        assert np.allclose(K.eval(loss), 1.4370, atol=1e-3)
-
-    def test_scalar_weighted(self):
-        msle_obj = losses.MeanSquaredLogarithmicError()
-        y_true = K.constant([1, 9, 2, -5, -2, 6], shape=(2, 3))
-        y_pred = K.constant([4, 8, 12, 8, 1, 3], shape=(2, 3))
-        loss = msle_obj(y_true, y_pred, sample_weight=2.3)
-        assert np.allclose(K.eval(loss), 3.3051, atol=1e-3)
-
-    def test_sample_weighted(self):
-        msle_obj = losses.MeanSquaredLogarithmicError()
-        y_true = K.constant([1, 9, 2, -5, -2, 6], shape=(2, 3))
-        y_pred = K.constant([4, 8, 12, 8, 1, 3], shape=(2, 3))
-        sample_weight = K.constant([1.2, 3.4], shape=(2, 1))
-        loss = msle_obj(y_true, y_pred, sample_weight=sample_weight)
-        assert np.allclose(K.eval(loss), 3.7856, atol=1e-3)
-
-    def test_timestep_weighted(self):
-        msle_obj = losses.MeanSquaredLogarithmicError()
-        y_true = K.constant([1, 9, 2, -5, -2, 6], shape=(2, 3, 1))
-        y_pred = K.constant([4, 8, 12, 8, 1, 3], shape=(2, 3, 1))
-        sample_weight = K.constant([3, 6, 5, 0, 4, 2], shape=(2, 3))
-        loss = msle_obj(y_true, y_pred, sample_weight=sample_weight)
-        assert np.allclose(K.eval(loss), 2.6473, atol=1e-3)
-
-    def test_zero_weighted(self):
-        msle_obj = losses.MeanSquaredLogarithmicError()
-        y_true = K.constant([1, 9, 2, -5, -2, 6], shape=(2, 3))
-        y_pred = K.constant([4, 8, 12, 8, 1, 3], shape=(2, 3))
-        loss = msle_obj(y_true, y_pred, sample_weight=0)
-        assert np.allclose(K.eval(loss), 0.0, atol=1e-3)
-
-
-@skipif_not_tf
-class TestBinaryCrossentropy(object):
-
-    def test_config(self):
-        bce_obj = losses.BinaryCrossentropy(
-            reduction=Reduction.SUM, name='bce_1')
-        assert bce_obj.name == 'bce_1'
-        assert bce_obj.reduction == Reduction.SUM
-
-    def test_all_correct_unweighted(self):
-        y_true = K.constant([[1., 0., 0.], [0., 1., 0.], [0., 0., 1.]])
-        bce_obj = losses.BinaryCrossentropy()
-        loss = bce_obj(y_true, y_true)
-        assert np.isclose(K.eval(loss), 0.0, atol=1e-3)
-
-        # Test with logits.
-        logits = K.constant([[100.0, -100.0, -100.0],
-                             [-100.0, 100.0, -100.0],
-                             [-100.0, -100.0, 100.0]])
-        bce_obj = losses.BinaryCrossentropy(from_logits=True)
-        loss = bce_obj(y_true, logits)
-        assert np.isclose(K.eval(loss), 0.0, 3)
-
-    def test_unweighted(self):
-        y_true = np.asarray([1, 0, 1, 0]).reshape([2, 2])
-        y_pred = np.asarray([1, 1, 1, 0], dtype=np.float32).reshape([2, 2])
-        bce_obj = losses.BinaryCrossentropy()
-        loss = bce_obj(y_true, y_pred)
-
-        # EPSILON = 1e-7, y = y_true, y` = y_pred, Y_MAX = 0.9999999
-        # y` = clip(output, EPSILON, 1. - EPSILON)
-        # y` = [Y_MAX, Y_MAX, Y_MAX, EPSILON]
-
-        # Loss = -(y log(y` + EPSILON) + (1 - y) log(1 - y` + EPSILON))
-        #      = [-log(Y_MAX + EPSILON), -log(1 - Y_MAX + EPSILON),
-        #         -log(Y_MAX + EPSILON), -log(1)]
-        #      = [0, 15.33, 0, 0]
-        # Reduced loss = 15.33 / 4
-
-        assert np.isclose(K.eval(loss), 3.833, atol=1e-3)
-
-        # Test with logits.
-        y_true = K.constant([[1., 0., 1.], [0., 1., 1.]])
-        logits = K.constant([[100.0, -100.0, 100.0], [100.0, 100.0, -100.0]])
-        bce_obj = losses.BinaryCrossentropy(from_logits=True)
-        loss = bce_obj(y_true, logits)
-
-        # Loss = max(x, 0) - x * z + log(1 + exp(-abs(x)))
-        #            (where x = logits and z = y_true)
-        #      = [((100 - 100 * 1 + log(1 + exp(-100))) +
-        #          (0 + 100 * 0 + log(1 + exp(-100))) +
-        #          (100 - 100 * 1 + log(1 + exp(-100))),
-        #         ((100 - 100 * 0 + log(1 + exp(-100))) +
-        #          (100 - 100 * 1 + log(1 + exp(-100))) +
-        #          (0 + 100 * 1 + log(1 + exp(-100))))]
-        #      = [(0 + 0 + 0) / 3, 200 / 3]
-        # Reduced loss = (0 + 66.666) / 2
-
-        assert np.isclose(K.eval(loss), 33.333, atol=1e-3)
-
-    def test_scalar_weighted(self):
-        bce_obj = losses.BinaryCrossentropy()
-        y_true = np.asarray([1, 0, 1, 0]).reshape([2, 2])
-        y_pred = np.asarray([1, 1, 1, 0], dtype=np.float32).reshape([2, 2])
-        loss = bce_obj(y_true, y_pred, sample_weight=2.3)
-
-        # EPSILON = 1e-7, y = y_true, y` = y_pred, Y_MAX = 0.9999999
-        # y` = clip(output, EPSILON, 1. - EPSILON)
-        # y` = [Y_MAX, Y_MAX, Y_MAX, EPSILON]
-
-        # Loss = -(y log(y` + EPSILON) + (1 - y) log(1 - y` + EPSILON))
-        #      = [-log(Y_MAX + EPSILON), -log(1 - Y_MAX + EPSILON),
-        #         -log(Y_MAX + EPSILON), -log(1)]
-        #      = [0, 15.33, 0, 0]
-        # Weighted loss = [0, 15.33 * 2.3, 0, 0]
-        # Reduced loss = 15.33 * 2.3 / 4
-
-        assert np.isclose(K.eval(loss), 8.817, atol=1e-3)
-
-        # Test with logits.
-        y_true = K.constant([[1, 0, 1], [0, 1, 1]])
-        logits = K.constant([[100.0, -100.0, 100.0], [100.0, 100.0, -100.0]])
-        bce_obj = losses.BinaryCrossentropy(from_logits=True)
-        loss = bce_obj(y_true, logits, sample_weight=2.3)
-
-        # Loss = max(x, 0) - x * z + log(1 + exp(-abs(x)))
-        #            (where x = logits and z = y_true)
-        # Loss = [(0 + 0 + 0) / 3, 200 / 3]
-        # Weighted loss = [0 * 2.3, 66.666 * 2.3]
-        # Reduced loss = (0 + 66.666 * 2.3) / 2
-
-        assert np.isclose(K.eval(loss), 76.667, atol=1e-3)
-
-    def test_sample_weighted(self):
-        bce_obj = losses.BinaryCrossentropy()
-        y_true = np.asarray([1, 0, 1, 0]).reshape([2, 2])
-        y_pred = np.asarray([1, 1, 1, 0], dtype=np.float32).reshape([2, 2])
-        sample_weight = K.constant([1.2, 3.4], shape=(2, 1))
-        loss = bce_obj(y_true, y_pred, sample_weight=sample_weight)
-
-        # EPSILON = 1e-7, y = y_true, y` = y_pred, Y_MAX = 0.9999999
-        # y` = clip(output, EPSILON, 1. - EPSILON)
-        # y` = [Y_MAX, Y_MAX, Y_MAX, EPSILON]
-
-        # Loss = -(y log(y` + EPSILON) + (1 - y) log(1 - y` + EPSILON))
-        #      = [-log(Y_MAX + EPSILON), -log(1 - Y_MAX + EPSILON),
-        #         -log(Y_MAX + EPSILON), -log(1)]
-        #      = [0, 15.33, 0, 0]
-        # Reduced loss = 15.33 * 1.2 / 4
-
-        assert np.isclose(K.eval(loss), 4.6, atol=1e-3)
-
-        # Test with logits.
-        y_true = K.constant([[1, 0, 1], [0, 1, 1]])
-        logits = K.constant([[100.0, -100.0, 100.0], [100.0, 100.0, -100.0]])
-        weights = K.constant([4, 3])
-        bce_obj = losses.BinaryCrossentropy(from_logits=True)
-        loss = bce_obj(y_true, logits, sample_weight=weights)
-
-        # Loss = max(x, 0) - x * z + log(1 + exp(-abs(x)))
-        #            (where x = logits and z = y_true)
-        # Loss = [(0 + 0 + 0)/3, 200 / 3]
-        # Weighted loss = [0 * 4, 66.666 * 3]
-        # Reduced loss = (0 + 66.666 * 3) / 2
-
-        assert np.isclose(K.eval(loss), 100, atol=1e-3)
-
-    def test_no_reduction(self):
-        y_true = K.constant([[1, 0, 1], [0, 1, 1]])
-        logits = K.constant([[100.0, -100.0, 100.0], [100.0, 100.0, -100.0]])
-        bce_obj = losses.BinaryCrossentropy(
-            from_logits=True, reduction=Reduction.NONE)
-        loss = bce_obj(y_true, logits)
-
-        # Loss = max(x, 0) - x * z + log(1 + exp(-abs(x)))
-        #            (where x = logits and z = y_true)
-        # Loss = [(0 + 0 + 0)/3, (200)/3]
-
-        assert np.allclose(K.eval(loss), (0., 66.6666), atol=1e-3)
-
-    def test_label_smoothing(self):
-        logits = K.constant([[100.0, -100.0, -100.0]])
-        y_true = K.constant([[1, 0, 1]])
-        label_smoothing = 0.1
-        # Loss: max(x, 0) - x * z + log(1 + exp(-abs(x)))
-        #            (where x = logits and z = y_true)
-        # Label smoothing: z' = z * (1 - L) + 0.5L
-        #                  1  = 1 - 0.5L
-        #                  0  = 0.5L
-        # Applying the above two fns to the given input:
-        # (100 - 100 * (1 - 0.5 L)  + 0 +
-        #  0   + 100 * (0.5 L)      + 0 +
-        #  0   + 100 * (1 - 0.5 L)  + 0) * (1/3)
-        #  = (100 + 50L) * 1/3
-        bce_obj = losses.BinaryCrossentropy(
-            from_logits=True, label_smoothing=label_smoothing)
-        loss = bce_obj(y_true, logits)
-        expected_value = (100.0 + 50.0 * label_smoothing) / 3.0
-        assert np.isclose(K.eval(loss), expected_value, atol=1e-3)
-
-
-@skipif_not_tf
-class TestCategoricalCrossentropy(object):
-
-    def test_config(self):
-        cce_obj = losses.CategoricalCrossentropy(
-            reduction=Reduction.SUM, name='bce_1')
-        assert cce_obj.name == 'bce_1'
-        assert cce_obj.reduction == Reduction.SUM
-
-    def test_all_correct_unweighted(self):
-        y_true = K.constant([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
-        y_pred = K.constant([[1., 0., 0.], [0., 1., 0.], [0., 0., 1.]])
-        cce_obj = losses.CategoricalCrossentropy()
-        loss = cce_obj(y_true, y_pred)
-        assert np.isclose(K.eval(loss), 0.0, atol=1e-3)
-
-        # Test with logits.
-        logits = K.constant([[10., 0., 0.], [0., 10., 0.], [0., 0., 10.]])
-        cce_obj = losses.CategoricalCrossentropy(from_logits=True)
-        loss = cce_obj(y_true, logits)
-        assert np.isclose(K.eval(loss), 0.0, atol=1e-3)
-
-    def test_unweighted(self):
-        cce_obj = losses.CategoricalCrossentropy()
-        y_true = K.constant([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
-        y_pred = K.constant(
-            [[.9, .05, .05], [.5, .89, .6], [.05, .01, .94]])
-        loss = cce_obj(y_true, y_pred)
-        assert np.isclose(K.eval(loss), .3239, atol=1e-3)
-
-        # Test with logits.
-        logits = K.constant([[8., 1., 1.], [0., 9., 1.], [2., 3., 5.]])
-        cce_obj = losses.CategoricalCrossentropy(from_logits=True)
-        loss = cce_obj(y_true, logits)
-        assert np.isclose(K.eval(loss), .05737, atol=1e-3)
-
-    def test_scalar_weighted(self):
-        cce_obj = losses.CategoricalCrossentropy()
-        y_true = K.constant([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
-        y_pred = K.constant(
-            [[.9, .05, .05], [.5, .89, .6], [.05, .01, .94]])
-        loss = cce_obj(y_true, y_pred, sample_weight=2.3)
-        assert np.isclose(K.eval(loss), .7449, atol=1e-3)
-
-        # Test with logits.
-        logits = K.constant([[8., 1., 1.], [0., 9., 1.], [2., 3., 5.]])
-        cce_obj = losses.CategoricalCrossentropy(from_logits=True)
-        loss = cce_obj(y_true, logits, sample_weight=2.3)
-        assert np.isclose(K.eval(loss), .132, atol=1e-3)
-
-    def test_sample_weighted(self):
-        cce_obj = losses.CategoricalCrossentropy()
-        y_true = K.constant([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
-        y_pred = K.constant(
-            [[.9, .05, .05], [.5, .89, .6], [.05, .01, .94]])
-        sample_weight = K.constant([[1.2], [3.4], [5.6]], shape=(3, 1))
-        loss = cce_obj(y_true, y_pred, sample_weight=sample_weight)
-        assert np.isclose(K.eval(loss), 1.0696, atol=1e-3)
-
-        # Test with logits.
-        logits = K.constant([[8., 1., 1.], [0., 9., 1.], [2., 3., 5.]])
-        cce_obj = losses.CategoricalCrossentropy(from_logits=True)
-        loss = cce_obj(y_true, logits, sample_weight=sample_weight)
-        assert np.isclose(K.eval(loss), 0.31829, atol=1e-3)
-
-    def test_no_reduction(self):
-        y_true = K.constant([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
-        logits = K.constant([[8., 1., 1.], [0., 9., 1.], [2., 3., 5.]])
-        cce_obj = losses.CategoricalCrossentropy(
-            from_logits=True, reduction=Reduction.NONE)
-        loss = cce_obj(y_true, logits)
-        assert np.allclose(K.eval(loss), (0.001822, 0.000459, 0.169846), atol=1e-3)
-
-    def test_label_smoothing(self):
-        logits = K.constant([[100.0, -100.0, -100.0]])
-        y_true = K.constant([[1, 0, 0]])
-        label_smoothing = 0.1
-        # Softmax Cross Entropy Loss: -\sum_i p_i \log q_i
-        # where for a softmax activation
-        # \log q_i = x_i - \log \sum_j \exp x_j
-        #          = x_i - x_max - \log \sum_j \exp (x_j - x_max)
-        # For our activations, [100, -100, -100]
-        # \log ( exp(0) + exp(-200) + exp(-200) ) = 0
-        # so our log softmaxes become: [0, -200, -200]
-        # Label smoothing: z' = z * (1 - L) + L/n
-        #                  1  = 1 - L + L/n
-        #                  0  = L/n
-        # Applying the above two fns to the given input:
-        # -0 * (1 - L + L/n) + 200 * L/n + 200 * L/n = 400 L/n
-        cce_obj = losses.CategoricalCrossentropy(
-            from_logits=True, label_smoothing=label_smoothing)
-        loss = cce_obj(y_true, logits)
-        expected_value = 400.0 * label_smoothing / 3.0
-        assert np.isclose(K.eval(loss), expected_value, atol=1e-3)
-
-
-@skipif_not_tf
-class TestSparseCategoricalCrossentropy(object):
-
-    def test_config(self):
-        cce_obj = losses.SparseCategoricalCrossentropy(
-            reduction=Reduction.SUM, name='scc')
-        assert cce_obj.name == 'scc'
-        assert cce_obj.reduction == Reduction.SUM
-
-    def test_all_correct_unweighted(self):
-        y_true = K.constant([[0], [1], [2]])
-        y_pred = K.constant([[1., 0., 0.], [0., 1., 0.], [0., 0., 1.]])
-        cce_obj = losses.SparseCategoricalCrossentropy()
-        loss = cce_obj(y_true, y_pred)
-        assert np.isclose(K.eval(loss), 0.0, atol=1e-3)
-
-        # Test with logits.
-        logits = K.constant([[10., 0., 0.], [0., 10., 0.], [0., 0., 10.]])
-        cce_obj = losses.SparseCategoricalCrossentropy(from_logits=True)
-        loss = cce_obj(y_true, logits)
-        assert np.isclose(K.eval(loss), 0.0, atol=1e-3)
-
-    def test_unweighted(self):
-        cce_obj = losses.SparseCategoricalCrossentropy()
-        y_true = K.constant([0, 1, 2])
-        y_pred = K.constant(
-            [[.9, .05, .05], [.5, .89, .6], [.05, .01, .94]])
-        loss = cce_obj(y_true, y_pred)
-        assert np.isclose(K.eval(loss), .3239, atol=1e-3)
-
-        # Test with logits.
-        logits = K.constant([[8., 1., 1.], [0., 9., 1.], [2., 3., 5.]])
-        cce_obj = losses.SparseCategoricalCrossentropy(from_logits=True)
-        loss = cce_obj(y_true, logits)
-        assert np.isclose(K.eval(loss), .0573, atol=1e-3)
-
-    def test_scalar_weighted(self):
-        cce_obj = losses.SparseCategoricalCrossentropy()
-        y_true = K.constant([[0], [1], [2]])
-        y_pred = K.constant(
-            [[.9, .05, .05], [.5, .89, .6], [.05, .01, .94]])
-        loss = cce_obj(y_true, y_pred, sample_weight=2.3)
-        assert np.isclose(K.eval(loss), .7449, atol=1e-3)
-
-        # Test with logits.
-        logits = K.constant([[8., 1., 1.], [0., 9., 1.], [2., 3., 5.]])
-        cce_obj = losses.SparseCategoricalCrossentropy(from_logits=True)
-        loss = cce_obj(y_true, logits, sample_weight=2.3)
-        assert np.isclose(K.eval(loss), .1317, atol=1e-3)
-
-    def test_sample_weighted(self):
-        cce_obj = losses.SparseCategoricalCrossentropy()
-        y_true = K.constant([[0], [1], [2]])
-        y_pred = K.constant(
-            [[.9, .05, .05], [.5, .89, .6], [.05, .01, .94]])
-        sample_weight = K.constant([[1.2], [3.4], [5.6]], shape=(3, 1))
-        loss = cce_obj(y_true, y_pred, sample_weight=sample_weight)
-        assert np.isclose(K.eval(loss), 1.0696, atol=1e-3)
-
-        # Test with logits.
-        logits = K.constant([[8., 1., 1.], [0., 9., 1.], [2., 3., 5.]])
-        cce_obj = losses.SparseCategoricalCrossentropy(from_logits=True)
-        loss = cce_obj(y_true, logits, sample_weight=sample_weight)
-        assert np.isclose(K.eval(loss), 0.31829, atol=1e-3)
-
-    def test_no_reduction(self):
-        y_true = K.constant([[0], [1], [2]])
-        logits = K.constant([[8., 1., 1.], [0., 9., 1.], [2., 3., 5.]])
-        cce_obj = losses.SparseCategoricalCrossentropy(
-            from_logits=True, reduction=Reduction.NONE)
-        loss = cce_obj(y_true, logits)
-        assert np.allclose(K.eval(loss), (0.001822, 0.000459, 0.169846), atol=1e-3)
-
-
-if __name__ == '__main__':
-    pytest.main([__file__])
diff --git a/tests/keras/metrics_confusion_matrix_test.py b/tests/keras/metrics_confusion_matrix_test.py
deleted file mode 100644
index 80411c56825..00000000000
--- a/tests/keras/metrics_confusion_matrix_test.py
+++ /dev/null
@@ -1,968 +0,0 @@
-"""Tests for Keras confusion matrix metrics classes."""
-import pytest
-import numpy as np
-
-from keras import metrics
-from keras import backend as K
-from tensorflow.python.keras.utils import metrics_utils
-
-if K.backend() != 'tensorflow':
-    # Need TensorFlow to use metric.__call__
-    pytestmark = pytest.mark.skip
-
-import tensorflow as tf
-
-
-class TestFalsePositives(object):
-
-    def test_config(self):
-        fp_obj = metrics.FalsePositives(name='my_fp', thresholds=[0.4, 0.9])
-        assert fp_obj.name == 'my_fp'
-        assert len(fp_obj.weights) == 1
-        assert fp_obj.thresholds == [0.4, 0.9]
-
-        # Check save and restore config
-        fp_obj2 = metrics.FalsePositives.from_config(fp_obj.get_config())
-        assert fp_obj2.name == 'my_fp'
-        assert len(fp_obj2.weights) == 1
-        assert fp_obj2.thresholds == [.4, 0.9]
-
-    def test_unweighted(self):
-        fp_obj = metrics.FalsePositives()
-
-        y_true = ((0, 1, 0, 1, 0), (0, 0, 1, 1, 1), (1, 1, 1, 1, 0), (0, 0, 0, 0, 1))
-        y_pred = ((0, 0, 1, 1, 0), (1, 1, 1, 1, 1), (0, 1, 0, 1, 0), (1, 1, 1, 1, 1))
-
-        result = fp_obj(y_true, y_pred)
-        assert np.allclose(7., K.eval(result))
-
-    def test_weighted(self):
-        fp_obj = metrics.FalsePositives()
-        y_true = ((0, 1, 0, 1, 0), (0, 0, 1, 1, 1), (1, 1, 1, 1, 0), (0, 0, 0, 0, 1))
-        y_pred = ((0, 0, 1, 1, 0), (1, 1, 1, 1, 1), (0, 1, 0, 1, 0), (1, 1, 1, 1, 1))
-        sample_weight = (1., 1.5, 2., 2.5)
-        result = fp_obj(y_true, y_pred, sample_weight=sample_weight)
-        assert np.allclose(14., K.eval(result))
-
-    def test_unweighted_with_thresholds(self):
-        fp_obj = metrics.FalsePositives(thresholds=[0.15, 0.5, 0.85])
-
-        y_pred = ((0.9, 0.2, 0.8, 0.1), (0.2, 0.9, 0.7, 0.6),
-                  (0.1, 0.2, 0.4, 0.3), (0, 1, 0.7, 0.3))
-        y_true = ((0, 1, 1, 0), (1, 0, 0, 0), (0, 0, 0, 0), (1, 1, 1, 1))
-
-        result = fp_obj(y_true, y_pred)
-        assert np.allclose([7., 4., 2.], K.eval(result))
-
-    def test_weighted_with_thresholds(self):
-        fp_obj = metrics.FalsePositives(thresholds=[0.15, 0.5, 0.85])
-
-        y_pred = ((0.9, 0.2, 0.8, 0.1), (0.2, 0.9, 0.7, 0.6),
-                  (0.1, 0.2, 0.4, 0.3), (0, 1, 0.7, 0.3))
-        y_true = ((0, 1, 1, 0), (1, 0, 0, 0), (0, 0, 0, 0), (1, 1, 1, 1))
-        sample_weight = ((1.0, 2.0, 3.0, 5.0), (7.0, 11.0, 13.0, 17.0),
-                         (19.0, 23.0, 29.0, 31.0), (5.0, 15.0, 10.0, 0))
-
-        result = fp_obj(y_true, y_pred, sample_weight=sample_weight)
-        assert np.allclose([125., 42., 12.], K.eval(result))
-
-    def test_threshold_limit(self):
-        with pytest.raises(Exception):
-            metrics.FalsePositives(thresholds=[-1, 0.5, 2])
-
-        with pytest.raises(Exception):
-            metrics.FalsePositives(thresholds=[None])
-
-
-class TestTruePositives(object):
-
-    def test_config(self):
-        tp_obj = metrics.TruePositives(name='my_tp', thresholds=[0.4, 0.9])
-        assert tp_obj.name == 'my_tp'
-        assert len(tp_obj.weights) == 1
-        assert tp_obj.thresholds == [0.4, 0.9]
-
-        # Check save and restore config
-        tp_obj2 = metrics.TruePositives.from_config(tp_obj.get_config())
-        assert tp_obj2.name == 'my_tp'
-        assert len(tp_obj2.weights) == 1
-        assert tp_obj2.thresholds == [0.4, 0.9]
-
-    def test_unweighted(self):
-        tp_obj = metrics.TruePositives()
-
-        y_true = ((0, 1, 0, 1, 0), (0, 0, 1, 1, 1),
-                  (1, 1, 1, 1, 0), (0, 0, 0, 0, 1))
-        y_pred = ((0, 0, 1, 1, 0), (1, 1, 1, 1, 1),
-                  (0, 1, 0, 1, 0), (1, 1, 1, 1, 1))
-
-        result = tp_obj(y_true, y_pred)
-        assert np.allclose(7., K.eval(result))
-
-    def test_weighted(self):
-        tp_obj = metrics.TruePositives()
-        y_true = ((0, 1, 0, 1, 0), (0, 0, 1, 1, 1),
-                  (1, 1, 1, 1, 0), (0, 0, 0, 0, 1))
-        y_pred = ((0, 0, 1, 1, 0), (1, 1, 1, 1, 1),
-                  (0, 1, 0, 1, 0), (1, 1, 1, 1, 1))
-        sample_weight = (1., 1.5, 2., 2.5)
-        result = tp_obj(y_true, y_pred, sample_weight=sample_weight)
-        assert np.allclose(12., K.eval(result))
-
-    def test_unweighted_with_thresholds(self):
-        tp_obj = metrics.TruePositives(thresholds=[0.15, 0.5, 0.85])
-
-        y_pred = ((0.9, 0.2, 0.8, 0.1), (0.2, 0.9, 0.7, 0.6),
-                  (0.1, 0.2, 0.4, 0.3), (0, 1, 0.7, 0.3))
-        y_true = ((0, 1, 1, 0), (1, 0, 0, 0), (0, 0, 0, 0),
-                  (1, 1, 1, 1))
-
-        result = tp_obj(y_true, y_pred)
-        assert np.allclose([6., 3., 1.], K.eval(result))
-
-    def test_weighted_with_thresholds(self):
-        tp_obj = metrics.TruePositives(thresholds=[0.15, 0.5, 0.85])
-
-        y_pred = ((0.9, 0.2, 0.8, 0.1), (0.2, 0.9, 0.7, 0.6),
-                  (0.1, 0.2, 0.4, 0.3), (0, 1, 0.7, 0.3))
-        y_true = ((0, 1, 1, 0), (1, 0, 0, 0), (0, 0, 0, 0),
-                  (1, 1, 1, 1))
-
-        result = tp_obj(y_true, y_pred, sample_weight=37.)
-        assert np.allclose([222., 111., 37.], K.eval(result))
-
-
-class TestTrueNegatives(object):
-
-    def test_config(self):
-        tn_obj = metrics.TrueNegatives(name='my_tn', thresholds=[0.4, 0.9])
-        assert tn_obj.name == 'my_tn'
-        assert len(tn_obj.weights) == 1
-        assert tn_obj.thresholds == [0.4, 0.9]
-
-        # Check save and restore config
-        tn_obj2 = metrics.TrueNegatives.from_config(tn_obj.get_config())
-        assert tn_obj2.name == 'my_tn'
-        assert len(tn_obj2.weights) == 1
-        assert tn_obj2.thresholds == [0.4, 0.9]
-
-    def test_unweighted(self):
-        tn_obj = metrics.TrueNegatives()
-
-        y_true = ((0, 1, 0, 1, 0), (0, 0, 1, 1, 1),
-                  (1, 1, 1, 1, 0), (0, 0, 0, 0, 1))
-        y_pred = ((0, 0, 1, 1, 0), (1, 1, 1, 1, 1),
-                  (0, 1, 0, 1, 0), (1, 1, 1, 1, 1))
-
-        result = tn_obj(y_true, y_pred)
-        assert np.allclose(3., K.eval(result))
-
-    def test_weighted(self):
-        tn_obj = metrics.TrueNegatives()
-        y_true = ((0, 1, 0, 1, 0), (0, 0, 1, 1, 1),
-                  (1, 1, 1, 1, 0), (0, 0, 0, 0, 1))
-        y_pred = ((0, 0, 1, 1, 0), (1, 1, 1, 1, 1),
-                  (0, 1, 0, 1, 0), (1, 1, 1, 1, 1))
-        sample_weight = (1., 1.5, 2., 2.5)
-        result = tn_obj(y_true, y_pred, sample_weight=sample_weight)
-        assert np.allclose(4., K.eval(result))
-
-    def test_unweighted_with_thresholds(self):
-        tn_obj = metrics.TrueNegatives(thresholds=[0.15, 0.5, 0.85])
-
-        y_pred = ((0.9, 0.2, 0.8, 0.1), (0.2, 0.9, 0.7, 0.6),
-                  (0.1, 0.2, 0.4, 0.3), (0, 1, 0.7, 0.3))
-        y_true = ((0, 1, 1, 0), (1, 0, 0, 0), (0, 0, 0, 0), (1, 1, 1, 1))
-
-        result = tn_obj(y_true, y_pred)
-        assert np.allclose([2., 5., 7.], K.eval(result))
-
-    def test_weighted_with_thresholds(self):
-        tn_obj = metrics.TrueNegatives(thresholds=[0.15, 0.5, 0.85])
-
-        y_pred = ((0.9, 0.2, 0.8, 0.1), (0.2, 0.9, 0.7, 0.6),
-                  (0.1, 0.2, 0.4, 0.3), (0, 1, 0.7, 0.3))
-        y_true = ((0, 1, 1, 0), (1, 0, 0, 0), (0, 0, 0, 0), (1, 1, 1, 1))
-        sample_weight = ((0.0, 2.0, 3.0, 5.0),)
-
-        result = tn_obj(y_true, y_pred, sample_weight=sample_weight)
-        assert np.allclose([5., 15., 23.], K.eval(result))
-
-
-class TestFalseNegatives(object):
-
-    def test_config(self):
-        fn_obj = metrics.FalseNegatives(name='my_fn', thresholds=[0.4, 0.9])
-        assert fn_obj.name == 'my_fn'
-        assert len(fn_obj.weights) == 1
-        assert fn_obj.thresholds == [0.4, 0.9]
-
-        # Check save and restore config
-        fn_obj2 = metrics.FalseNegatives.from_config(fn_obj.get_config())
-        assert fn_obj2.name == 'my_fn'
-        assert len(fn_obj2.weights) == 1
-        assert fn_obj2.thresholds == [0.4, 0.9]
-
-    def test_unweighted(self):
-        fn_obj = metrics.FalseNegatives()
-
-        y_true = ((0, 1, 0, 1, 0), (0, 0, 1, 1, 1),
-                  (1, 1, 1, 1, 0), (0, 0, 0, 0, 1))
-        y_pred = ((0, 0, 1, 1, 0), (1, 1, 1, 1, 1),
-                  (0, 1, 0, 1, 0), (1, 1, 1, 1, 1))
-
-        result = fn_obj(y_true, y_pred)
-        assert np.allclose(3., K.eval(result))
-
-    def test_weighted(self):
-        fn_obj = metrics.FalseNegatives()
-        y_true = ((0, 1, 0, 1, 0), (0, 0, 1, 1, 1),
-                  (1, 1, 1, 1, 0), (0, 0, 0, 0, 1))
-        y_pred = ((0, 0, 1, 1, 0), (1, 1, 1, 1, 1),
-                  (0, 1, 0, 1, 0), (1, 1, 1, 1, 1))
-        sample_weight = (1., 1.5, 2., 2.5)
-        result = fn_obj(y_true, y_pred, sample_weight=sample_weight)
-        assert np.allclose(5., K.eval(result))
-
-    def test_unweighted_with_thresholds(self):
-        fn_obj = metrics.FalseNegatives(thresholds=[0.15, 0.5, 0.85])
-
-        y_pred = ((0.9, 0.2, 0.8, 0.1), (0.2, 0.9, 0.7, 0.6),
-                  (0.1, 0.2, 0.4, 0.3), (0, 1, 0.7, 0.3))
-        y_true = ((0, 1, 1, 0), (1, 0, 0, 0), (0, 0, 0, 0), (1, 1, 1, 1))
-
-        result = fn_obj(y_true, y_pred)
-        assert np.allclose([1., 4., 6.], K.eval(result))
-
-    def test_weighted_with_thresholds(self):
-        fn_obj = metrics.FalseNegatives(thresholds=[0.15, 0.5, 0.85])
-
-        y_pred = ((0.9, 0.2, 0.8, 0.1), (0.2, 0.9, 0.7, 0.6),
-                  (0.1, 0.2, 0.4, 0.3), (0, 1, 0.7, 0.3))
-        y_true = ((0, 1, 1, 0), (1, 0, 0, 0), (0, 0, 0, 0), (1, 1, 1, 1))
-        sample_weight = ((3.0,), (5.0,), (7.0,), (4.0,))
-
-        result = fn_obj(y_true, y_pred, sample_weight=sample_weight)
-        assert np.allclose([4., 16., 23.], K.eval(result))
-
-
-class TestSensitivityAtSpecificity(object):
-
-    def test_config(self):
-        s_obj = metrics.SensitivityAtSpecificity(
-            0.4, num_thresholds=100, name='sensitivity_at_specificity_1')
-        assert s_obj.name == 'sensitivity_at_specificity_1'
-        assert len(s_obj.weights) == 4
-        assert s_obj.specificity == 0.4
-        assert s_obj.num_thresholds == 100
-
-        # Check save and restore config
-        s_obj2 = metrics.SensitivityAtSpecificity.from_config(s_obj.get_config())
-        assert s_obj2.name == 'sensitivity_at_specificity_1'
-        assert len(s_obj2.weights) == 4
-        assert s_obj2.specificity == 0.4
-        assert s_obj2.num_thresholds == 100
-
-    def test_unweighted_all_correct(self):
-        s_obj = metrics.SensitivityAtSpecificity(0.7, num_thresholds=1)
-        inputs = np.random.randint(0, 2, size=(100, 1))
-        y_pred = K.constant(inputs, dtype='float32')
-        y_true = K.constant(inputs)
-        result = s_obj(y_true, y_pred)
-        assert np.isclose(1, K.eval(result))
-
-    def test_unweighted_high_specificity(self):
-        s_obj = metrics.SensitivityAtSpecificity(0.8)
-        pred_values = [0.0, 0.1, 0.2, 0.3, 0.4, 0.1, 0.45, 0.5, 0.8, 0.9]
-        label_values = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1]
-
-        y_pred = K.constant(pred_values, dtype='float32')
-        y_true = K.constant(label_values)
-        result = s_obj(y_true, y_pred)
-        assert np.isclose(0.8, K.eval(result))
-
-    def test_unweighted_low_specificity(self):
-        s_obj = metrics.SensitivityAtSpecificity(0.4)
-        pred_values = [0.0, 0.1, 0.2, 0.3, 0.4, 0.01, 0.02, 0.25, 0.26, 0.26]
-        label_values = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1]
-
-        y_pred = K.constant(pred_values, dtype='float32')
-        y_true = K.constant(label_values)
-        result = s_obj(y_true, y_pred)
-        assert np.isclose(0.6, K.eval(result))
-
-    def test_weighted(self):
-        s_obj = metrics.SensitivityAtSpecificity(0.4)
-        pred_values = [0.0, 0.1, 0.2, 0.3, 0.4, 0.01, 0.02, 0.25, 0.26, 0.26]
-        label_values = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1]
-        weight_values = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
-
-        y_pred = K.constant(pred_values, dtype='float32')
-        y_true = K.constant(label_values, dtype='float32')
-        weights = K.constant(weight_values)
-        result = s_obj(y_true, y_pred, sample_weight=weights)
-        assert np.isclose(0.675, K.eval(result))
-
-    def test_invalid_specificity(self):
-        with pytest.raises(Exception):
-            metrics.SensitivityAtSpecificity(-1)
-
-    def test_invalid_num_thresholds(self):
-        with pytest.raises(Exception):
-            metrics.SensitivityAtSpecificity(0.4, num_thresholds=-1)
-
-
-class TestSpecificityAtSensitivity(object):
-
-    def test_config(self):
-        s_obj = metrics.SpecificityAtSensitivity(
-            0.4, num_thresholds=100, name='specificity_at_sensitivity_1')
-        assert s_obj.name == 'specificity_at_sensitivity_1'
-        assert len(s_obj.weights) == 4
-        assert s_obj.sensitivity == 0.4
-        assert s_obj.num_thresholds == 100
-
-        # Check save and restore config
-        s_obj2 = metrics.SpecificityAtSensitivity.from_config(s_obj.get_config())
-        assert s_obj2.name == 'specificity_at_sensitivity_1'
-        assert len(s_obj2.weights) == 4
-        assert s_obj2.sensitivity == 0.4
-        assert s_obj2.num_thresholds == 100
-
-    def test_unweighted_all_correct(self):
-        s_obj = metrics.SpecificityAtSensitivity(0.7, num_thresholds=1)
-        inputs = np.random.randint(0, 2, size=(100, 1))
-        y_pred = K.constant(inputs, dtype='float32')
-        y_true = K.constant(inputs)
-        result = s_obj(y_true, y_pred)
-        assert np.isclose(1, K.eval(result))
-
-    def DISABLED_test_unweighted_high_sensitivity(self):
-        s_obj = metrics.SpecificityAtSensitivity(0.8)
-        pred_values = [0.0, 0.1, 0.2, 0.3, 0.4, 0.1, 0.45, 0.5, 0.8, 0.9]
-        label_values = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1]
-
-        y_pred = K.constant(pred_values, dtype='float32')
-        y_true = K.constant(label_values)
-        result = s_obj(y_true, y_pred)
-        assert np.isclose(0.4, K.eval(result))
-
-    def test_unweighted_low_sensitivity(self):
-        s_obj = metrics.SpecificityAtSensitivity(0.4)
-        pred_values = [0.0, 0.1, 0.2, 0.3, 0.4, 0.01, 0.02, 0.25, 0.26, 0.26]
-        label_values = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1]
-
-        y_pred = K.constant(pred_values, dtype='float32')
-        y_true = K.constant(label_values)
-        result = s_obj(y_true, y_pred)
-        assert np.isclose(0.6, K.eval(result))
-
-    def test_weighted(self):
-        s_obj = metrics.SpecificityAtSensitivity(0.4)
-        pred_values = [0.0, 0.1, 0.2, 0.3, 0.4, 0.01, 0.02, 0.25, 0.26, 0.26]
-        label_values = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1]
-        weight_values = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
-
-        y_pred = K.constant(pred_values, dtype='float32')
-        y_true = K.constant(label_values, dtype='float32')
-        weights = K.constant(weight_values)
-        result = s_obj(y_true, y_pred, sample_weight=weights)
-        assert np.isclose(0.4, K.eval(result))
-
-    def test_invalid_sensitivity(self):
-        with pytest.raises(Exception):
-            metrics.SpecificityAtSensitivity(-1)
-
-    def test_invalid_num_thresholds(self):
-        with pytest.raises(Exception):
-            metrics.SpecificityAtSensitivity(0.4, num_thresholds=-1)
-
-
-class TestAUC(object):
-
-    def setup(self):
-        self.num_thresholds = 3
-        self.y_pred = K.constant([0, 0.5, 0.3, 0.9], dtype='float32')
-        self.y_true = K.constant([0, 0, 1, 1])
-        self.sample_weight = [1, 2, 3, 4]
-
-        # threshold values are [0 - 1e-7, 0.5, 1 + 1e-7]
-        # y_pred when threshold = 0 - 1e-7  : [1, 1, 1, 1]
-        # y_pred when threshold = 0.5       : [0, 0, 0, 1]
-        # y_pred when threshold = 1 + 1e-7  : [0, 0, 0, 0]
-
-        # without sample_weight:
-        # tp = np.sum([[0, 0, 1, 1], [0, 0, 0, 1], [0, 0, 0, 0]], axis=1)
-        # fp = np.sum([[1, 1, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], axis=1)
-        # fn = np.sum([[0, 0, 0, 0], [0, 0, 1, 0], [0, 0, 1, 1]], axis=1)
-        # tn = np.sum([[0, 0, 0, 0], [1, 1, 0, 0], [1, 1, 0, 0]], axis=1)
-
-        # tp = [2, 1, 0], fp = [2, 0, 0], fn = [0, 1, 2], tn = [0, 2, 2]
-
-        # with sample_weight:
-        # tp = np.sum([[0, 0, 3, 4], [0, 0, 0, 4], [0, 0, 0, 0]], axis=1)
-        # fp = np.sum([[1, 2, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], axis=1)
-        # fn = np.sum([[0, 0, 0, 0], [0, 0, 3, 0], [0, 0, 3, 4]], axis=1)
-        # tn = np.sum([[0, 0, 0, 0], [1, 2, 0, 0], [1, 2, 0, 0]], axis=1)
-
-        # tp = [7, 4, 0], fp = [3, 0, 0], fn = [0, 3, 7], tn = [0, 3, 3]
-
-    def test_config(self):
-        auc_obj = metrics.AUC(
-            num_thresholds=100,
-            curve='PR',
-            summation_method='majoring',
-            name='auc_1')
-        assert auc_obj.name == 'auc_1'
-        assert len(auc_obj.weights) == 4
-        assert auc_obj.num_thresholds == 100
-        assert auc_obj.curve == metrics_utils.AUCCurve.PR
-        assert auc_obj.summation_method == metrics_utils.AUCSummationMethod.MAJORING
-
-        # Check save and restore config.
-        auc_obj2 = metrics.AUC.from_config(auc_obj.get_config())
-        assert auc_obj2.name == 'auc_1'
-        assert len(auc_obj2.weights) == 4
-        assert auc_obj2.num_thresholds == 100
-        assert auc_obj2.curve == metrics_utils.AUCCurve.PR
-        assert auc_obj2.summation_method == metrics_utils.AUCSummationMethod.MAJORING
-
-    def test_config_manual_thresholds(self):
-        auc_obj = metrics.AUC(
-            num_thresholds=None,
-            curve='PR',
-            summation_method='majoring',
-            name='auc_1',
-            thresholds=[0.3, 0.5])
-        assert auc_obj.name == 'auc_1'
-        assert len(auc_obj.weights) == 4
-        assert auc_obj.num_thresholds == 4
-        assert np.allclose(auc_obj.thresholds, [0.0, 0.3, 0.5, 1.0], atol=1e-3)
-        assert auc_obj.curve == metrics_utils.AUCCurve.PR
-        assert auc_obj.summation_method == metrics_utils.AUCSummationMethod.MAJORING
-
-        # Check save and restore config.
-        auc_obj2 = metrics.AUC.from_config(auc_obj.get_config())
-        assert auc_obj2.name == 'auc_1'
-        assert len(auc_obj2.weights) == 4
-        assert auc_obj2.num_thresholds == 4
-        assert auc_obj2.curve == metrics_utils.AUCCurve.PR
-        assert auc_obj2.summation_method == metrics_utils.AUCSummationMethod.MAJORING
-
-    def test_unweighted_all_correct(self):
-        self.setup()
-        auc_obj = metrics.AUC()
-        result = auc_obj(self.y_true, self.y_true)
-        assert K.eval(result) == 1
-
-    def test_unweighted(self):
-        self.setup()
-        auc_obj = metrics.AUC(num_thresholds=self.num_thresholds)
-        result = auc_obj(self.y_true, self.y_pred)
-
-        # tp = [2, 1, 0], fp = [2, 0, 0], fn = [0, 1, 2], tn = [0, 2, 2]
-        # recall = [2/2, 1/(1+1), 0] = [1, 0.5, 0]
-        # fp_rate = [2/2, 0, 0] = [1, 0, 0]
-        # heights = [(1 + 0.5)/2, (0.5 + 0)/2] = [0.75, 0.25]
-        # widths = [(1 - 0), (0 - 0)] = [1, 0]
-        expected_result = (0.75 * 1 + 0.25 * 0)
-        assert np.allclose(K.eval(result), expected_result, atol=1e-3)
-
-    def test_manual_thresholds(self):
-        self.setup()
-        # Verify that when specified, thresholds are used instead of num_thresholds.
-        auc_obj = metrics.AUC(num_thresholds=2, thresholds=[0.5])
-        assert auc_obj.num_thresholds == 3
-        assert np.allclose(auc_obj.thresholds, [0.0, 0.5, 1.0], atol=1e-3)
-        result = auc_obj(self.y_true, self.y_pred)
-
-        # tp = [2, 1, 0], fp = [2, 0, 0], fn = [0, 1, 2], tn = [0, 2, 2]
-        # recall = [2/2, 1/(1+1), 0] = [1, 0.5, 0]
-        # fp_rate = [2/2, 0, 0] = [1, 0, 0]
-        # heights = [(1 + 0.5)/2, (0.5 + 0)/2] = [0.75, 0.25]
-        # widths = [(1 - 0), (0 - 0)] = [1, 0]
-        expected_result = (0.75 * 1 + 0.25 * 0)
-        assert np.allclose(K.eval(result), expected_result, atol=1e-3)
-
-    def test_weighted_roc_interpolation(self):
-        self.setup()
-        auc_obj = metrics.AUC(num_thresholds=self.num_thresholds)
-        result = auc_obj(self.y_true, self.y_pred, sample_weight=self.sample_weight)
-
-        # tp = [7, 4, 0], fp = [3, 0, 0], fn = [0, 3, 7], tn = [0, 3, 3]
-        # recall = [7/7, 4/(4+3), 0] = [1, 0.571, 0]
-        # fp_rate = [3/3, 0, 0] = [1, 0, 0]
-        # heights = [(1 + 0.571)/2, (0.571 + 0)/2] = [0.7855, 0.2855]
-        # widths = [(1 - 0), (0 - 0)] = [1, 0]
-        expected_result = (0.7855 * 1 + 0.2855 * 0)
-        assert np.allclose(K.eval(result), expected_result, atol=1e-3)
-
-    def test_weighted_roc_majoring(self):
-        self.setup()
-        auc_obj = metrics.AUC(
-            num_thresholds=self.num_thresholds, summation_method='majoring')
-        result = auc_obj(self.y_true, self.y_pred, sample_weight=self.sample_weight)
-
-        # tp = [7, 4, 0], fp = [3, 0, 0], fn = [0, 3, 7], tn = [0, 3, 3]
-        # recall = [7/7, 4/(4+3), 0] = [1, 0.571, 0]
-        # fp_rate = [3/3, 0, 0] = [1, 0, 0]
-        # heights = [max(1, 0.571), max(0.571, 0)] = [1, 0.571]
-        # widths = [(1 - 0), (0 - 0)] = [1, 0]
-        expected_result = (1 * 1 + 0.571 * 0)
-        assert np.allclose(K.eval(result), expected_result, atol=1e-3)
-
-    def test_weighted_roc_minoring(self):
-        self.setup()
-        auc_obj = metrics.AUC(
-            num_thresholds=self.num_thresholds, summation_method='minoring')
-        result = auc_obj(self.y_true, self.y_pred, sample_weight=self.sample_weight)
-
-        # tp = [7, 4, 0], fp = [3, 0, 0], fn = [0, 3, 7], tn = [0, 3, 3]
-        # recall = [7/7, 4/(4+3), 0] = [1, 0.571, 0]
-        # fp_rate = [3/3, 0, 0] = [1, 0, 0]
-        # heights = [min(1, 0.571), min(0.571, 0)] = [0.571, 0]
-        # widths = [(1 - 0), (0 - 0)] = [1, 0]
-        expected_result = (0.571 * 1 + 0 * 0)
-        assert np.allclose(K.eval(result), expected_result, atol=1e-3)
-
-    def test_weighted_pr_majoring(self):
-        self.setup()
-        auc_obj = metrics.AUC(
-            num_thresholds=self.num_thresholds,
-            curve='PR',
-            summation_method='majoring')
-        result = auc_obj(self.y_true, self.y_pred, sample_weight=self.sample_weight)
-
-        # tp = [7, 4, 0], fp = [3, 0, 0], fn = [0, 3, 7], tn = [0, 3, 3]
-        # precision = [7/(7+3), 4/4, 0] = [0.7, 1, 0]
-        # recall = [7/7, 4/(4+3), 0] = [1, 0.571, 0]
-        # heights = [max(0.7, 1), max(1, 0)] = [1, 1]
-        # widths = [(1 - 0.571), (0.571 - 0)] = [0.429, 0.571]
-        expected_result = (1 * 0.429 + 1 * 0.571)
-        assert np.allclose(K.eval(result), expected_result, atol=1e-3)
-
-    def test_weighted_pr_minoring(self):
-        self.setup()
-        auc_obj = metrics.AUC(
-            num_thresholds=self.num_thresholds,
-            curve='PR',
-            summation_method='minoring')
-        result = auc_obj(self.y_true, self.y_pred, sample_weight=self.sample_weight)
-
-        # tp = [7, 4, 0], fp = [3, 0, 0], fn = [0, 3, 7], tn = [0, 3, 3]
-        # precision = [7/(7+3), 4/4, 0] = [0.7, 1, 0]
-        # recall = [7/7, 4/(4+3), 0] = [1, 0.571, 0]
-        # heights = [min(0.7, 1), min(1, 0)] = [0.7, 0]
-        # widths = [(1 - 0.571), (0.571 - 0)] = [0.429, 0.571]
-        expected_result = (0.7 * 0.429 + 0 * 0.571)
-        assert np.allclose(K.eval(result), expected_result, atol=1e-3)
-
-    def test_weighted_pr_interpolation(self):
-        self.setup()
-        auc_obj = metrics.AUC(num_thresholds=self.num_thresholds, curve='PR')
-        result = auc_obj(self.y_true, self.y_pred, sample_weight=self.sample_weight)
-
-        # auc = (slope / Total Pos) * [dTP - intercept * log(Pb/Pa)]
-
-        # tp = [7, 4, 0], fp = [3, 0, 0], fn = [0, 3, 7], tn = [0, 3, 3]
-        # P = tp + fp = [10, 4, 0]
-        # dTP = [7-4, 4-0] = [3, 4]
-        # dP = [10-4, 4-0] = [6, 4]
-        # slope = dTP/dP = [0.5, 1]
-        # intercept = (TPa+(slope*Pa) = [(4 - 0.5*4), (0 - 1*0)] = [2, 0]
-        # (Pb/Pa) = (Pb/Pa) if Pb > 0 AND Pa > 0 else 1 = [10/4, 4/0] = [2.5, 1]
-        # auc * TotalPos = [(0.5 * (3 + 2 * log(2.5))), (1 * (4 + 0))]
-        #                = [2.416, 4]
-        # auc = [2.416, 4]/(tp[1:]+fn[1:])
-        # expected_result = (2.416 / 7 + 4 / 7)
-        expected_result = 0.345 + 0.571
-        assert np.allclose(K.eval(result), expected_result, atol=1e-3)
-
-    def test_invalid_num_thresholds(self):
-        with pytest.raises(Exception):
-            metrics.AUC(num_thresholds=-1)
-
-        with pytest.raises(Exception):
-            metrics.AUC(num_thresholds=1)
-
-    def test_invalid_curve(self):
-        with pytest.raises(Exception):
-            metrics.AUC(curve='Invalid')
-
-    def test_invalid_summation_method(self):
-        with pytest.raises(Exception):
-            metrics.AUC(summation_method='Invalid')
-
-
-class TestPrecisionTest(object):
-
-    def test_config(self):
-        p_obj = metrics.Precision(
-            name='my_precision', thresholds=[0.4, 0.9], top_k=15, class_id=12)
-        assert p_obj.name == 'my_precision'
-        assert len(p_obj.weights) == 2
-        assert ([v.name for v in p_obj.weights] ==
-                ['true_positives:0', 'false_positives:0'])
-        assert p_obj.thresholds == [0.4, 0.9]
-        assert p_obj.top_k == 15
-        assert p_obj.class_id == 12
-
-        # Check save and restore config
-        p_obj2 = metrics.Precision.from_config(p_obj.get_config())
-        assert p_obj2.name == 'my_precision'
-        assert len(p_obj2.weights) == 2
-        assert p_obj2.thresholds == [0.4, 0.9]
-        assert p_obj2.top_k == 15
-        assert p_obj2.class_id == 12
-
-    def test_unweighted(self):
-        p_obj = metrics.Precision()
-        y_pred = K.constant([1, 0, 1, 0], shape=(1, 4))
-        y_true = K.constant([0, 1, 1, 0], shape=(1, 4))
-        result = p_obj(y_true, y_pred)
-        assert np.isclose(0.5, K.eval(result))
-
-    def test_unweighted_all_incorrect(self):
-        p_obj = metrics.Precision(thresholds=[0.5])
-        inputs = np.random.randint(0, 2, size=(100, 1))
-        y_pred = K.constant(inputs)
-        y_true = K.constant(1 - inputs)
-        result = p_obj(y_true, y_pred)
-        assert np.isclose(0, K.eval(result))
-
-    def test_weighted(self):
-        p_obj = metrics.Precision()
-        y_pred = K.constant([[1, 0, 1, 0], [1, 0, 1, 0]])
-        y_true = K.constant([[0, 1, 1, 0], [1, 0, 0, 1]])
-        result = p_obj(
-            y_true,
-            y_pred,
-            sample_weight=K.constant([[1, 2, 3, 4], [4, 3, 2, 1]]))
-        weighted_tp = 3.0 + 4.0
-        weighted_positives = (1.0 + 3.0) + (4.0 + 2.0)
-        expected_precision = weighted_tp / weighted_positives
-        assert np.isclose(expected_precision, K.eval(result))
-
-    def test_unweighted_with_threshold(self):
-        p_obj = metrics.Precision(thresholds=[0.5, 0.7])
-        y_pred = K.constant([1, 0, 0.6, 0], shape=(1, 4))
-        y_true = K.constant([0, 1, 1, 0], shape=(1, 4))
-        result = p_obj(y_true, y_pred)
-        assert np.allclose([0.5, 0.], K.eval(result), 0)
-
-    def test_weighted_with_threshold(self):
-        p_obj = metrics.Precision(thresholds=[0.5, 1.])
-        y_true = K.constant([[0, 1], [1, 0]], shape=(2, 2))
-        y_pred = K.constant([[1, 0], [0.6, 0]],
-                            shape=(2, 2),
-                            dtype='float32')
-        weights = K.constant([[4, 0], [3, 1]],
-                             shape=(2, 2),
-                             dtype='float32')
-        result = p_obj(y_true, y_pred, sample_weight=weights)
-        weighted_tp = 0 + 3.
-        weighted_positives = (0 + 3.) + (4. + 0.)
-        expected_precision = weighted_tp / weighted_positives
-        assert np.allclose([expected_precision, 0], K.eval(result), 1e-3)
-
-    def test_unweighted_top_k(self):
-        p_obj = metrics.Precision(top_k=3)
-        y_pred = K.constant([0.2, 0.1, 0.5, 0, 0.2], shape=(1, 5))
-        y_true = K.constant([0, 1, 1, 0, 0], shape=(1, 5))
-        result = p_obj(y_true, y_pred)
-        assert np.isclose(1. / 3, K.eval(result))
-
-    def test_weighted_top_k(self):
-        p_obj = metrics.Precision(top_k=3)
-        y_pred1 = K.constant([0.2, 0.1, 0.4, 0, 0.2], shape=(1, 5))
-        y_true1 = K.constant([0, 1, 1, 0, 1], shape=(1, 5))
-        K.eval(
-            p_obj(
-                y_true1,
-                y_pred1,
-                sample_weight=K.constant([[1, 4, 2, 3, 5]])))
-
-        y_pred2 = K.constant([0.2, 0.6, 0.4, 0.2, 0.2], shape=(1, 5))
-        y_true2 = K.constant([1, 0, 1, 1, 1], shape=(1, 5))
-        result = p_obj(y_true2, y_pred2, sample_weight=K.constant(3))
-
-        tp = (2 + 5) + (3 + 3)
-        predicted_positives = (1 + 2 + 5) + (3 + 3 + 3)
-        expected_precision = float(tp) / predicted_positives
-        assert np.isclose(expected_precision, K.eval(result))
-
-    def test_unweighted_class_id(self):
-        p_obj = metrics.Precision(class_id=2)
-
-        y_pred = K.constant([0.2, 0.1, 0.6, 0, 0.2], shape=(1, 5))
-        y_true = K.constant([0, 1, 1, 0, 0], shape=(1, 5))
-        result = p_obj(y_true, y_pred)
-        assert np.isclose(1, K.eval(result))
-        assert np.isclose(1, K.eval(p_obj.true_positives))
-        assert np.isclose(0, K.eval(p_obj.false_positives))
-
-        y_pred = K.constant([0.2, 0.1, 0, 0, 0.2], shape=(1, 5))
-        y_true = K.constant([0, 1, 1, 0, 0], shape=(1, 5))
-        result = p_obj(y_true, y_pred)
-        assert np.isclose(1, K.eval(result))
-        assert np.isclose(1, K.eval(p_obj.true_positives))
-        assert np.isclose(0, K.eval(p_obj.false_positives))
-
-        y_pred = K.constant([0.2, 0.1, 0.6, 0, 0.2], shape=(1, 5))
-        y_true = K.constant([0, 1, 0, 0, 0], shape=(1, 5))
-        result = p_obj(y_true, y_pred)
-        assert np.isclose(0.5, K.eval(result))
-        assert np.isclose(1, K.eval(p_obj.true_positives))
-        assert np.isclose(1, K.eval(p_obj.false_positives))
-
-    def test_unweighted_top_k_and_class_id(self):
-        p_obj = metrics.Precision(class_id=2, top_k=2)
-
-        y_pred = K.constant([0.2, 0.6, 0.3, 0, 0.2], shape=(1, 5))
-        y_true = K.constant([0, 1, 1, 0, 0], shape=(1, 5))
-        result = p_obj(y_true, y_pred)
-        assert np.isclose(1, K.eval(result))
-        assert np.isclose(1, K.eval(p_obj.true_positives))
-        assert np.isclose(0, K.eval(p_obj.false_positives))
-
-        y_pred = K.constant([1, 1, 0.9, 1, 1], shape=(1, 5))
-        y_true = K.constant([0, 1, 1, 0, 0], shape=(1, 5))
-        result = p_obj(y_true, y_pred)
-        assert np.isclose(1, K.eval(result))
-        assert np.isclose(1, K.eval(p_obj.true_positives))
-        assert np.isclose(0, K.eval(p_obj.false_positives))
-
-    def test_unweighted_top_k_and_threshold(self):
-        p_obj = metrics.Precision(thresholds=.7, top_k=2)
-
-        y_pred = K.constant([0.2, 0.8, 0.6, 0, 0.2], shape=(1, 5))
-        y_true = K.constant([0, 1, 1, 0, 1], shape=(1, 5))
-        result = p_obj(y_true, y_pred)
-        assert np.isclose(1, K.eval(result))
-        assert np.isclose(1, K.eval(p_obj.true_positives))
-        assert np.isclose(0, K.eval(p_obj.false_positives))
-
-
-class TestRecall(object):
-
-    def test_config(self):
-        r_obj = metrics.Recall(
-            name='my_recall', thresholds=[0.4, 0.9], top_k=15, class_id=12)
-        assert r_obj.name == 'my_recall'
-        assert len(r_obj.weights) == 2
-        assert ([v.name for v in r_obj.weights] ==
-                ['true_positives:0', 'false_negatives:0'])
-        assert r_obj.thresholds == [0.4, 0.9]
-        assert r_obj.top_k == 15
-        assert r_obj.class_id == 12
-
-        # Check save and restore config
-        r_obj2 = metrics.Recall.from_config(r_obj.get_config())
-        assert r_obj2.name == 'my_recall'
-        assert len(r_obj2.weights) == 2
-        assert r_obj2.thresholds == [0.4, 0.9]
-        assert r_obj2.top_k == 15
-        assert r_obj2.class_id == 12
-
-    def test_unweighted(self):
-        r_obj = metrics.Recall()
-        y_pred = K.constant([1, 0, 1, 0], shape=(1, 4))
-        y_true = K.constant([0, 1, 1, 0], shape=(1, 4))
-        result = r_obj(y_true, y_pred)
-        assert np.isclose(0.5, K.eval(result))
-
-    def test_unweighted_all_incorrect(self):
-        r_obj = metrics.Recall(thresholds=[0.5])
-        inputs = np.random.randint(0, 2, size=(100, 1))
-        y_pred = K.constant(inputs)
-        y_true = K.constant(1 - inputs)
-        result = r_obj(y_true, y_pred)
-        assert np.isclose(0, K.eval(result))
-
-    def test_weighted(self):
-        r_obj = metrics.Recall()
-        y_pred = K.constant([[1, 0, 1, 0], [0, 1, 0, 1]])
-        y_true = K.constant([[0, 1, 1, 0], [1, 0, 0, 1]])
-        result = r_obj(
-            y_true,
-            y_pred,
-            sample_weight=K.constant([[1, 2, 3, 4], [4, 3, 2, 1]]))
-        weighted_tp = 3.0 + 1.0
-        weighted_t = (2.0 + 3.0) + (4.0 + 1.0)
-        expected_recall = weighted_tp / weighted_t
-        assert np.isclose(expected_recall, K.eval(result))
-
-    def test_unweighted_with_threshold(self):
-        r_obj = metrics.Recall(thresholds=[0.5, 0.7])
-        y_pred = K.constant([1, 0, 0.6, 0], shape=(1, 4))
-        y_true = K.constant([0, 1, 1, 0], shape=(1, 4))
-        result = r_obj(y_true, y_pred)
-        assert np.allclose([0.5, 0.], K.eval(result), 0)
-
-    def test_weighted_with_threshold(self):
-        r_obj = metrics.Recall(thresholds=[0.5, 1.])
-        y_true = K.constant([[0, 1], [1, 0]], shape=(2, 2))
-        y_pred = K.constant([[1, 0], [0.6, 0]],
-                            shape=(2, 2),
-                            dtype='float32')
-        weights = K.constant([[1, 4], [3, 2]],
-                             shape=(2, 2),
-                             dtype='float32')
-        result = r_obj(y_true, y_pred, sample_weight=weights)
-        weighted_tp = 0 + 3.
-        weighted_positives = (0 + 3.) + (4. + 0.)
-        expected_recall = weighted_tp / weighted_positives
-        assert np.allclose([expected_recall, 0], K.eval(result), 1e-3)
-
-    def test_unweighted_top_k(self):
-        r_obj = metrics.Recall(top_k=3)
-        y_pred = K.constant([0.2, 0.1, 0.5, 0, 0.2], shape=(1, 5))
-        y_true = K.constant([0, 1, 1, 0, 0], shape=(1, 5))
-        result = r_obj(y_true, y_pred)
-        assert np.isclose(0.5, K.eval(result))
-
-    def test_weighted_top_k(self):
-        r_obj = metrics.Recall(top_k=3)
-        y_pred1 = K.constant([0.2, 0.1, 0.4, 0, 0.2], shape=(1, 5))
-        y_true1 = K.constant([0, 1, 1, 0, 1], shape=(1, 5))
-        K.eval(
-            r_obj(
-                y_true1,
-                y_pred1,
-                sample_weight=K.constant([[1, 4, 2, 3, 5]])))
-
-        y_pred2 = K.constant([0.2, 0.6, 0.4, 0.2, 0.2], shape=(1, 5))
-        y_true2 = K.constant([1, 0, 1, 1, 1], shape=(1, 5))
-        result = r_obj(y_true2, y_pred2, sample_weight=K.constant(3))
-
-        tp = (2 + 5) + (3 + 3)
-        positives = (4 + 2 + 5) + (3 + 3 + 3 + 3)
-        expected_recall = float(tp) / positives
-        assert np.isclose(expected_recall, K.eval(result))
-
-    def test_unweighted_class_id(self):
-        r_obj = metrics.Recall(class_id=2)
-
-        y_pred = K.constant([0.2, 0.1, 0.6, 0, 0.2], shape=(1, 5))
-        y_true = K.constant([0, 1, 1, 0, 0], shape=(1, 5))
-        result = r_obj(y_true, y_pred)
-        assert np.isclose(1, K.eval(result))
-        assert np.isclose(1, K.eval(r_obj.true_positives))
-        assert np.isclose(0, K.eval(r_obj.false_negatives))
-
-        y_pred = K.constant([0.2, 0.1, 0, 0, 0.2], shape=(1, 5))
-        y_true = K.constant([0, 1, 1, 0, 0], shape=(1, 5))
-        result = r_obj(y_true, y_pred)
-        assert np.isclose(0.5, K.eval(result))
-        assert np.isclose(1, K.eval(r_obj.true_positives))
-        assert np.isclose(1, K.eval(r_obj.false_negatives))
-
-        y_pred = K.constant([0.2, 0.1, 0.6, 0, 0.2], shape=(1, 5))
-        y_true = K.constant([0, 1, 0, 0, 0], shape=(1, 5))
-        result = r_obj(y_true, y_pred)
-        assert np.isclose(0.5, K.eval(result))
-        assert np.isclose(1, K.eval(r_obj.true_positives))
-        assert np.isclose(1, K.eval(r_obj.false_negatives))
-
-    def test_unweighted_top_k_and_class_id(self):
-        r_obj = metrics.Recall(class_id=2, top_k=2)
-
-        y_pred = K.constant([0.2, 0.6, 0.3, 0, 0.2], shape=(1, 5))
-        y_true = K.constant([0, 1, 1, 0, 0], shape=(1, 5))
-        result = r_obj(y_true, y_pred)
-        assert np.isclose(1, K.eval(result))
-        assert np.isclose(1, K.eval(r_obj.true_positives))
-        assert np.isclose(0, K.eval(r_obj.false_negatives))
-
-        y_pred = K.constant([1, 1, 0.9, 1, 1], shape=(1, 5))
-        y_true = K.constant([0, 1, 1, 0, 0], shape=(1, 5))
-        result = r_obj(y_true, y_pred)
-        assert np.isclose(0.5, K.eval(result))
-        assert np.isclose(1, K.eval(r_obj.true_positives))
-        assert np.isclose(1, K.eval(r_obj.false_negatives))
-
-    def test_unweighted_top_k_and_threshold(self):
-        r_obj = metrics.Recall(thresholds=.7, top_k=2)
-
-        y_pred = K.constant([0.2, 0.8, 0.6, 0, 0.2], shape=(1, 5))
-        y_true = K.constant([1, 1, 1, 0, 1], shape=(1, 5))
-        result = r_obj(y_true, y_pred)
-        assert np.isclose(0.25, K.eval(result))
-        assert np.isclose(1, K.eval(r_obj.true_positives))
-        assert np.isclose(3, K.eval(r_obj.false_negatives))
-
-
-@pytest.mark.skipif(not tf.__version__.startswith('2.'),
-                    reason='Requires TF 2')
-class TestMeanIoU(object):
-
-    def test_config(self):
-        m_obj = metrics.MeanIoU(num_classes=2, name='mean_iou')
-        assert m_obj.name == 'mean_iou'
-        assert m_obj.num_classes == 2
-
-        m_obj2 = metrics.MeanIoU.from_config(m_obj.get_config())
-        assert m_obj2.name == 'mean_iou'
-        assert m_obj2.num_classes == 2
-
-    def test_unweighted(self):
-        y_pred = K.constant([0, 1, 0, 1], shape=(1, 4))
-        y_true = K.constant([0, 0, 1, 1], shape=(1, 4))
-
-        m_obj = metrics.MeanIoU(num_classes=2)
-        result = m_obj(y_true, y_pred)
-
-        # cm = [[1, 1],
-        #       [1, 1]]
-        # sum_row = [2, 2], sum_col = [2, 2], true_positives = [1, 1]
-        # iou = true_positives / (sum_row + sum_col - true_positives))
-        expected_result = (1. / (2 + 2 - 1) + 1. / (2 + 2 - 1)) / 2
-        assert np.allclose(K.eval(result), expected_result, atol=1e-3)
-
-    def test_weighted(self):
-        y_pred = K.constant([0, 1, 0, 1], dtype='float32')
-        y_true = K.constant([0, 0, 1, 1])
-        sample_weight = K.constant([0.2, 0.3, 0.4, 0.1])
-
-        m_obj = metrics.MeanIoU(num_classes=2)
-        result = m_obj(y_true, y_pred, sample_weight=sample_weight)
-
-        # cm = [[0.2, 0.3],
-        #       [0.4, 0.1]]
-        # sum_row = [0.6, 0.4], sum_col = [0.5, 0.5], true_positives = [0.2, 0.1]
-        # iou = true_positives / (sum_row + sum_col - true_positives))
-        expected_result = (0.2 / (0.6 + 0.5 - 0.2) + 0.1 / (0.4 + 0.5 - 0.1)) / 2
-        assert np.allclose(K.eval(result), expected_result, atol=1e-3)
-
-    def test_multi_dim_input(self):
-        y_pred = K.constant([[0, 1], [0, 1]], dtype='float32')
-        y_true = K.constant([[0, 0], [1, 1]])
-        sample_weight = K.constant([[0.2, 0.3], [0.4, 0.1]])
-
-        m_obj = metrics.MeanIoU(num_classes=2)
-        result = m_obj(y_true, y_pred, sample_weight=sample_weight)
-
-        # cm = [[0.2, 0.3],
-        #       [0.4, 0.1]]
-        # sum_row = [0.6, 0.4], sum_col = [0.5, 0.5], true_positives = [0.2, 0.1]
-        # iou = true_positives / (sum_row + sum_col - true_positives))
-        expected_result = (0.2 / (0.6 + 0.5 - 0.2) + 0.1 / (0.4 + 0.5 - 0.1)) / 2
-        assert np.allclose(K.eval(result), expected_result, atol=1e-3)
-
-    def test_zero_valid_entries(self):
-        m_obj = metrics.MeanIoU(num_classes=2)
-        assert np.allclose(K.eval(m_obj.result()), 0, atol=1e-3)
-
-    def test_zero_and_non_zero_entries(self):
-        y_pred = K.constant([1], dtype='float32')
-        y_true = K.constant([1])
-
-        m_obj = metrics.MeanIoU(num_classes=2)
-        result = m_obj(y_true, y_pred)
-
-        # cm = [[0, 0],
-        #       [0, 1]]
-        # sum_row = [0, 1], sum_col = [0, 1], true_positives = [0, 1]
-        # iou = true_positives / (sum_row + sum_col - true_positives))
-        expected_result = (0. + 1. / (1 + 1 - 1)) / 1
-        assert np.allclose(K.eval(result), expected_result, atol=1e-3)
diff --git a/tests/keras/metrics_correctness_test.py b/tests/keras/metrics_correctness_test.py
deleted file mode 100644
index 779d84fd9d9..00000000000
--- a/tests/keras/metrics_correctness_test.py
+++ /dev/null
@@ -1,379 +0,0 @@
-"""Tests for Keras metrics correctness."""
-
-import numpy as np
-
-import keras
-from keras import layers
-from keras import losses
-from keras import metrics
-from keras import backend as K
-
-
-def get_multi_io_model():
-    inp_1 = layers.Input(shape=(1,), name='input_1')
-    inp_2 = layers.Input(shape=(1,), name='input_2')
-    dense = layers.Dense(3, kernel_initializer='ones', trainable=False)
-    x_1 = dense(inp_1)
-    x_2 = dense(inp_2)
-    out_1 = layers.Dense(
-        1, kernel_initializer='ones', name='output_1', trainable=False)(x_1)
-    out_2 = layers.Dense(
-        1, kernel_initializer='ones', name='output_2', trainable=False)(x_2)
-    return keras.Model([inp_1, inp_2], [out_1, out_2])
-
-
-def custom_generator_multi_io(sample_weights=None):
-    batch_size = 2
-    num_samples = 4
-    inputs = np.asarray([[1.], [2.], [3.], [4.]])
-    targets_1 = np.asarray([[2.], [4.], [6.], [8.]])
-    targets_2 = np.asarray([[1.], [2.], [3.], [4.]])
-    w1 = sample_weights[0] if sample_weights else None
-    w2 = sample_weights[1] if sample_weights else None
-    i = 0
-    while True:
-        batch_index = i * batch_size % num_samples
-        i += 1
-        start = batch_index
-        end = start + batch_size
-        x = [inputs[start:end], inputs[start:end]]
-        y = [targets_1[start:end], targets_2[start:end]]
-        if sample_weights:
-            w = [
-                None if w1 is None else w1[start:end],
-                None if w2 is None else w2[start:end]
-            ]
-        else:
-            w = None
-        yield x, y, w
-
-
-class TestMetricsCorrectnessMultiIO(object):
-
-    def _get_compiled_multi_io_model(self):
-        model = get_multi_io_model()
-        model.compile(
-            optimizer='rmsprop',
-            loss=losses.MeanSquaredError(),
-            metrics=[metrics.MeanSquaredError(name='mean_squared_error')],
-            weighted_metrics=[
-                metrics.MeanSquaredError(name='mean_squared_error_2')
-            ])
-        return model
-
-    def setUp(self):
-        self.x = np.asarray([[1.], [2.], [3.], [4.]])
-        self.y1 = np.asarray([[2.], [4.], [6.], [8.]])
-        self.y2 = np.asarray([[1.], [2.], [3.], [4.]])
-        self.sample_weight_1 = np.asarray([2., 3., 4., 5.])
-        self.sample_weight_2 = np.asarray([3.5, 2.5, 1.5, 0.5])
-        self.class_weight_1 = {2: 2, 4: 3, 6: 4, 8: 5}
-        self.class_weight_2 = {1: 3.5, 2: 2.5, 3: 1.5, 4: 0.5}
-
-        # y_true_1 = [[2.], [4.], [6.], [8.]], y_pred = [[3.], [6.], [9.], [12.]]
-        # y_true_2 = [[1.], [2.], [3.], [4.]], y_pred = [[3.], [6.], [9.], [12.]]
-
-        # Weighted metric `output_1`:
-        #   Total = ((3 - 2)^2 * 2  + (6 - 4)^2 * 3) +
-        #           ((9 - 6)^2 * 4 + (12 - 8)^2 * 5)
-        #         = 130
-        #   Count = (2 + 3) + (4 + 5)
-        #   Result = 9.2857141
-
-        # Weighted metric `output_2`:
-        #   Total = ((3 - 1)^2 * 3.5 + (6 - 2)^2 * 2.5) +
-        #           ((9 - 3)^2 * 1.5 + (12 - 4)^2 * 0.5)
-        #         = 140
-        #   Count = (3.5 + 2.5) + (1.5 + 0.5)
-        #   Result = 17.5
-
-        # Loss `output_1` with weights:
-        #   Total = ((3 - 2)^2 * 2  + (6 - 4)^2 * 3) +
-        #           ((9 - 6)^2 * 4 + (12 - 8)^2 * 5)
-        #         = 130
-        #   Count = 2 + 2
-        #   Result = 32.5
-
-        # Loss `output_1` without weights/Metric `output_1`:
-        #   Total = ((3 - 2)^2 + (6 - 4)^2) + ((9 - 6)^2 + (12 - 8)^2) = 30
-        #   Count = 2 + 2
-        #   Result = 7.5
-
-        # Loss `output_2` with weights:
-        #   Total = ((3 - 1)^2 * 3.5 + (6 - 2)^2 * 2.5) +
-        #           ((9 - 3)^2 * 1.5 + (12 - 4)^2 * 0.5)
-        #         = 140
-        #   Count = 2 + 2
-        #   Result = 35
-
-        # Loss `output_2` without weights/Metric `output_2`:
-        #   Total = ((3 - 1)^2 + (6 - 2)^2) + ((9 - 3)^2 + (12 - 4)^2) = 120
-        #   Count = 2 + 2
-        #   Result = 30
-
-        # Total loss with weights = 32.5 + 35 = 67.5
-        # Total loss without weights = 7.5 + 30 = 37.5
-
-        self.expected_fit_result_with_weights = {
-            'output_1_mean_squared_error': [7.5, 7.5],
-            'output_2_mean_squared_error': [30, 30],
-            'output_1_mean_squared_error_2': [9.286, 9.286],
-            'output_2_mean_squared_error_2': [17.5, 17.5],
-            'loss': [67.5, 67.5],
-            'output_1_loss': [32.5, 32.5],
-            'output_2_loss': [35, 35],
-        }
-
-        self.expected_fit_result_with_weights_output_2 = {
-            'output_1_mean_squared_error': [7.5, 7.5],
-            'output_2_mean_squared_error': [30, 30],
-            'output_1_mean_squared_error_2': [7.5, 7.5],
-            'output_2_mean_squared_error_2': [17.5, 17.5],
-            'loss': [42.5, 42.5],
-            'output_1_loss': [7.5, 7.5],
-            'output_2_loss': [35, 35],
-        }
-
-        self.expected_fit_result = {
-            'output_1_mean_squared_error': [7.5, 7.5],
-            'output_2_mean_squared_error': [30, 30],
-            'output_1_mean_squared_error_2': [7.5, 7.5],
-            'output_2_mean_squared_error_2': [30, 30],
-            'loss': [37.5, 37.5],
-            'output_1_loss': [7.5, 7.5],
-            'output_2_loss': [30, 30],
-        }
-
-        # In the order: 'loss', 'output_1_loss', 'output_2_loss',
-        # 'output_1_mean_squared_error', 'output_1_mean_squared_error_2',
-        # 'output_2_mean_squared_error', 'output_2_mean_squared_error_2'
-        self.expected_batch_result_with_weights = [
-            67.5, 32.5, 35, 7.5, 9.286, 30, 17.5
-        ]
-        self.expected_batch_result_with_weights_output_2 = [
-            42.5, 7.5, 35, 7.5, 7.5, 30, 17.5
-        ]
-        self.expected_batch_result = [37.5, 7.5, 30, 7.5, 7.5, 30, 30]
-
-    def test_fit(self):
-        self.setUp()
-        model = self._get_compiled_multi_io_model()
-        history = model.fit([self.x, self.x], [self.y1, self.y2],
-                            batch_size=2,
-                            epochs=2,
-                            shuffle=False)
-        for key, value in self.expected_fit_result.items():
-            np.allclose(history.history[key], value, 1e-3)
-
-    def test_fit_with_sample_weight(self):
-        self.setUp()
-        model = self._get_compiled_multi_io_model()
-        history = model.fit([self.x, self.x], [self.y1, self.y2],
-                            sample_weight={
-                                'output_1': self.sample_weight_1,
-                                'output_2': self.sample_weight_2},
-                            batch_size=2,
-                            epochs=2,
-                            shuffle=False)
-        for key, value in self.expected_fit_result_with_weights.items():
-            np.allclose(history.history[key], value, 1e-3)
-
-        # Set weights for one output (use batch size).
-        history = model.fit([self.x, self.x], [self.y1, self.y2],
-                            sample_weight={'output_2': self.sample_weight_2},
-                            batch_size=2,
-                            epochs=2,
-                            shuffle=False)
-
-        for key, value in self.expected_fit_result_with_weights_output_2.items():
-            np.allclose(history.history[key], value, 1e-3)
-
-    def DISABLED_test_fit_with_class_weight(self):
-        self.setUp()
-        model = self._get_compiled_multi_io_model()
-        history = model.fit([self.x, self.x], [self.y1, self.y2],
-                            class_weight={
-                                'output_1': self.class_weight_1,
-                                'output_2': self.class_weight_2},
-                            batch_size=2,
-                            epochs=2,
-                            shuffle=False)
-        for key, value in self.expected_fit_result_with_weights.items():
-            np.allclose(history.history[key], value, 1e-3)
-
-        # Set weights for one output.
-        history = model.fit([self.x, self.x], [self.y1, self.y2],
-                            class_weight={'output_2': self.class_weight_2},
-                            batch_size=2,
-                            epochs=2,
-                            shuffle=False)
-
-        for key, value in self.expected_fit_result_with_weights_output_2.items():
-            np.allclose(history.history[key], value, 1e-3)
-
-    def test_eval(self):
-        self.setUp()
-        model = self._get_compiled_multi_io_model()
-        eval_result = model.evaluate([self.x, self.x], [self.y1, self.y2],
-                                     batch_size=2)
-        np.allclose(eval_result, self.expected_batch_result, 1e-3)
-
-    def test_eval_with_sample_weight(self):
-        self.setUp()
-        model = self._get_compiled_multi_io_model()
-        eval_result = model.evaluate([self.x, self.x], [self.y1, self.y2],
-                                     batch_size=2,
-                                     sample_weight={
-                                         'output_1': self.sample_weight_1,
-                                         'output_2': self.sample_weight_2})
-        np.allclose(eval_result, self.expected_batch_result_with_weights,
-                    1e-3)
-
-        # Set weights for one output.
-        model = self._get_compiled_multi_io_model()
-        eval_result = model.evaluate([self.x, self.x], [self.y1, self.y2],
-                                     batch_size=2,
-                                     sample_weight={
-                                         'output_2': self.sample_weight_2})
-        np.allclose(eval_result,
-                    self.expected_batch_result_with_weights_output_2, 1e-3)
-
-        # Verify that metric value is same with arbitrary weights and batch size.
-        x = np.random.random((50, 1))
-        y = np.random.random((50, 1))
-        w = np.random.random((50,))
-        mse1 = model.evaluate([x, x], [y, y], sample_weight=[w, w], batch_size=5)[3]
-        mse2 = model.evaluate([x, x], [y, y], sample_weight=[w, w],
-                              batch_size=10)[3]
-        np.allclose(mse1, mse2, 1e-3)
-
-    def test_train_on_batch(self):
-        self.setUp()
-        model = self._get_compiled_multi_io_model()
-        result = model.train_on_batch([self.x, self.x], [self.y1, self.y2])
-        np.allclose(result, self.expected_batch_result, 1e-3)
-
-    def test_train_on_batch_with_sample_weight(self):
-        self.setUp()
-        model = self._get_compiled_multi_io_model()
-        result = model.train_on_batch([self.x, self.x], [self.y1, self.y2],
-                                      sample_weight={
-                                          'output_1': self.sample_weight_1,
-                                          'output_2': self.sample_weight_2})
-        np.allclose(result, self.expected_batch_result_with_weights, 1e-3)
-
-        # Set weights for one output.
-        result = model.train_on_batch([self.x, self.x], [self.y1, self.y2],
-                                      sample_weight={
-                                          'output_2': self.sample_weight_2})
-        np.allclose(result, self.expected_batch_result_with_weights_output_2, 1e-3)
-
-    def DISABLED_test_train_on_batch_with_class_weight(self):
-        self.setUp()
-        model = self._get_compiled_multi_io_model()
-        result = model.train_on_batch([self.x, self.x], [self.y1, self.y2],
-                                      class_weight={
-                                          'output_1': self.class_weight_1,
-                                          'output_2': self.class_weight_2})
-        np.allclose(result, self.expected_batch_result_with_weights, 1e-3)
-
-        # Set weights for one output.
-        result = model.train_on_batch([self.x, self.x], [self.y1, self.y2],
-                                      class_weight={
-                                          'output_2': self.class_weight_2})
-        np.allclose(result,
-                    self.expected_batch_result_with_weights_output_2, 1e-3)
-
-    def test_test_on_batch(self):
-        self.setUp()
-        model = self._get_compiled_multi_io_model()
-        result = model.test_on_batch([self.x, self.x], [self.y1, self.y2])
-        np.allclose(result, self.expected_batch_result, 1e-3)
-
-    def test_test_on_batch_with_sample_weight(self):
-        self.setUp()
-        model = self._get_compiled_multi_io_model()
-        result = model.test_on_batch([self.x, self.x], [self.y1, self.y2],
-                                     sample_weight={
-                                         'output_1': self.sample_weight_1,
-                                         'output_2': self.sample_weight_2})
-        np.allclose(result, self.expected_batch_result_with_weights, 1e-3)
-
-        # Set weights for one output.
-        result = model.test_on_batch([self.x, self.x], [self.y1, self.y2],
-                                     sample_weight={
-                                         'output_2': self.sample_weight_2})
-        np.allclose(result,
-                    self.expected_batch_result_with_weights_output_2, 1e-3)
-
-    def test_fit_generator(self):
-        self.setUp()
-        model = self._get_compiled_multi_io_model()
-        history = model.fit_generator(
-            custom_generator_multi_io(), steps_per_epoch=2, epochs=2)
-        for key, value in self.expected_fit_result.items():
-            np.allclose(history.history[key], value, 1e-3)
-
-    def DISABLED_test_fit_generator_with_sample_weight(self):
-        self.setUp()
-        model = self._get_compiled_multi_io_model()
-        history = model.fit_generator(
-            custom_generator_multi_io(
-                sample_weights=[self.sample_weight_1, self.sample_weight_2]),
-            steps_per_epoch=2,
-            epochs=2)
-        for key, value in self.expected_fit_result_with_weights.items():
-            np.allclose(history.history[key], value, 1e-3)
-
-        # Set weights for one output.
-        history = model.fit_generator(
-            custom_generator_multi_io(sample_weights=[None, self.sample_weight_2]),
-            steps_per_epoch=2,
-            epochs=2)
-        for key, value in self.expected_fit_result_with_weights_output_2.items():
-            np.allclose(history.history[key], value, 1e-3)
-
-    def DISABLED_test_fit_generator_with_class_weight(self):
-        self.setUp()
-        model = self._get_compiled_multi_io_model()
-        history = model.fit_generator(
-            custom_generator_multi_io(),
-            class_weight={
-                'output_1': self.class_weight_1,
-                'output_2': self.class_weight_2,
-            },
-            steps_per_epoch=2,
-            epochs=2)
-        for key, value in self.expected_fit_result_with_weights.items():
-            np.allclose(history.history[key], value, 1e-3)
-
-        # Set weights for one output.
-        history = model.fit_generator(
-            custom_generator_multi_io(),
-            class_weight={'output_2': self.class_weight_2},
-            steps_per_epoch=2,
-            epochs=2)
-        for key, value in self.expected_fit_result_with_weights_output_2.items():
-            np.allclose(history.history[key], value, 1e-3)
-
-    def test_eval_generator(self):
-        self.setUp()
-        model = self._get_compiled_multi_io_model()
-        eval_result = model.evaluate_generator(custom_generator_multi_io(), steps=2)
-        np.allclose(eval_result, self.expected_batch_result, 1e-3)
-
-    def DISABLED_test_eval_generator_with_sample_weight(self):
-        self.setUp()
-        model = self._get_compiled_multi_io_model()
-        eval_result = model.evaluate_generator(
-            custom_generator_multi_io(
-                sample_weights=[self.sample_weight_1, self.sample_weight_2]),
-            steps=2)
-        np.allclose(eval_result, self.expected_batch_result_with_weights, 1e-3)
-
-        # Set weights for one output.
-        eval_result = model.evaluate_generator(
-            custom_generator_multi_io(sample_weights=[None, self.sample_weight_2]),
-            steps=2)
-        np.allclose(eval_result,
-                    self.expected_batch_result_with_weights_output_2, 1e-3)
diff --git a/tests/keras/metrics_functional_test.py b/tests/keras/metrics_functional_test.py
deleted file mode 100644
index 5547ea882ca..00000000000
--- a/tests/keras/metrics_functional_test.py
+++ /dev/null
@@ -1,123 +0,0 @@
-import pytest
-import numpy as np
-from numpy.testing import assert_allclose
-from flaky import flaky
-
-import keras
-from keras import metrics
-from keras import backend as K
-
-all_metrics = [
-    metrics.binary_accuracy,
-    metrics.categorical_accuracy,
-    metrics.mean_squared_error,
-    metrics.mean_absolute_error,
-    metrics.mean_absolute_percentage_error,
-    metrics.mean_squared_logarithmic_error,
-    metrics.squared_hinge,
-    metrics.hinge,
-    metrics.categorical_crossentropy,
-    metrics.binary_crossentropy,
-    metrics.poisson,
-]
-
-all_sparse_metrics = [
-    metrics.sparse_categorical_accuracy,
-    metrics.sparse_categorical_crossentropy,
-]
-
-
-@pytest.mark.parametrize('metric', all_metrics)
-def test_metrics(metric):
-    y_a = K.variable(np.random.random((6, 7)))
-    y_b = K.variable(np.random.random((6, 7)))
-    output = metric(y_a, y_b)
-    assert K.eval(output).shape == (6,)
-
-
-@pytest.mark.parametrize('metric', all_sparse_metrics)
-def test_sparse_metrics(metric):
-    y_a = K.variable(np.random.randint(0, 7, (6,)), dtype=K.floatx())
-    y_b = K.variable(np.random.random((6, 7)), dtype=K.floatx())
-    assert K.eval(metric(y_a, y_b)).shape == (6,)
-
-
-@pytest.mark.parametrize('shape', [(6,), (6, 3), (6, 3, 1)])
-def test_sparse_categorical_accuracy_correctness(shape):
-    y_a = K.variable(np.random.randint(0, 7, shape), dtype=K.floatx())
-    y_b_shape = shape + (7,)
-    y_b = K.variable(np.random.random(y_b_shape), dtype=K.floatx())
-    # use one_hot embedding to convert sparse labels to equivalent dense labels
-    y_a_dense_labels = K.cast(K.one_hot(K.cast(y_a, dtype='int32'), 7),
-                              dtype=K.floatx())
-    sparse_categorical_acc = metrics.sparse_categorical_accuracy(y_a, y_b)
-    categorical_acc = metrics.categorical_accuracy(y_a_dense_labels, y_b)
-    assert np.allclose(K.eval(sparse_categorical_acc), K.eval(categorical_acc))
-
-
-def test_serialize():
-    '''This is a mock 'round trip' of serialize and deserialize.
-    '''
-
-    class MockMetric:
-        def __init__(self):
-            self.__name__ = "mock_metric"
-
-    mock = MockMetric()
-    found = metrics.serialize(mock)
-    assert found == "mock_metric"
-
-    found = metrics.deserialize('mock_metric',
-                                custom_objects={'mock_metric': True})
-    assert found is True
-
-
-def test_invalid_get():
-
-    with pytest.raises(ValueError):
-        metrics.get(5)
-
-
-@pytest.mark.skipif((K.backend() == 'cntk'),
-                    reason='CNTK backend does not support top_k yet')
-def test_top_k_categorical_accuracy():
-    y_pred = K.variable(np.array([[0.3, 0.2, 0.1], [0.1, 0.2, 0.7]]))
-    y_true = K.variable(np.array([[0, 1, 0], [1, 0, 0]]))
-    success_result = K.eval(metrics.top_k_categorical_accuracy(y_true, y_pred,
-                                                               k=3))
-    assert np.mean(success_result) == 1
-    partial_result = K.eval(metrics.top_k_categorical_accuracy(y_true, y_pred,
-                                                               k=2))
-    assert np.mean(partial_result) == 0.5
-    failure_result = K.eval(metrics.top_k_categorical_accuracy(y_true, y_pred,
-                                                               k=1))
-    assert np.mean(failure_result) == 0
-
-
-@pytest.mark.skipif((K.backend() == 'cntk'),
-                    reason='CNTK backend does not support top_k yet')
-@pytest.mark.parametrize('y_pred, y_true', [
-    # Test correctness if the shape of y_true is (num_samples, 1)
-    (np.array([[0.3, 0.2, 0.1], [0.1, 0.2, 0.7]]), np.array([[1], [0]])),
-    # Test correctness if the shape of y_true is (num_samples,)
-    (np.array([[0.3, 0.2, 0.1], [0.1, 0.2, 0.7]]), np.array([1, 0])),
-])
-def test_sparse_top_k_categorical_accuracy(y_pred, y_true):
-    y_pred = K.variable(y_pred)
-    y_true = K.variable(y_true)
-    success_result = K.eval(
-        metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=3))
-
-    assert np.mean(success_result) == 1
-    partial_result = K.eval(
-        metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=2))
-
-    assert np.mean(partial_result) == 0.5
-    failure_result = K.eval(
-        metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=1))
-
-    assert np.mean(failure_result) == 0
-
-
-if __name__ == '__main__':
-    pytest.main([__file__])
diff --git a/tests/keras/metrics_test.py b/tests/keras/metrics_test.py
deleted file mode 100644
index 846addd02a2..00000000000
--- a/tests/keras/metrics_test.py
+++ /dev/null
@@ -1,1028 +0,0 @@
-"""Tests for Keras metrics classes."""
-import pytest
-import numpy as np
-import math
-
-from keras import metrics
-from keras import backend as K
-
-
-if K.backend() != 'tensorflow':
-    # Need TensorFlow to use metric.__call__
-    pytestmark = pytest.mark.skip
-
-
-class TestSum(object):
-
-    def test_sum(self):
-        m = metrics.Sum(name='my_sum', dtype='float32')
-
-        # check config
-        assert m.name == 'my_sum'
-        assert m.stateful
-        assert m.dtype == 'float32'
-        assert len(m.weights) == 1
-
-        # check initial state
-        assert K.eval(m.total) == 0
-
-        # check __call__
-        assert K.eval(m(100.0)) == 100
-        assert K.eval(m.total) == 100
-
-        # check update_state() and result() + state accumulation + tensor input
-        result = m([1, 5])
-        assert np.isclose(K.eval(result), 106)
-        assert K.eval(m.total) == 106  # 100 + 1 + 5
-
-        # check reset_states()
-        m.reset_states()
-        assert K.eval(m.total) == 0
-
-    def test_sum_with_sample_weight(self):
-        m = metrics.Sum(dtype='float64')
-        assert m.dtype == 'float64'
-
-        # check scalar weight
-        result_t = m(100, sample_weight=0.5)
-        assert K.eval(result_t) == 50
-        assert K.eval(m.total) == 50
-
-        # check weights not scalar and weights rank matches values rank
-        result_t = m([1, 5], sample_weight=[1, 0.2])
-        result = K.eval(result_t)
-        assert np.isclose(result, 52.)  # 50 + 1 + 5 * 0.2
-        assert np.isclose(K.eval(m.total), 52.)
-
-        # check weights broadcast
-        result_t = m([1, 2], sample_weight=0.5)
-        assert np.isclose(K.eval(result_t), 53.5)  # 52 + 0.5 + 1
-        assert np.isclose(K.eval(m.total), 53.5)
-
-        # check weights squeeze
-        result_t = m([1, 5], sample_weight=[[1], [0.2]])
-        assert np.isclose(K.eval(result_t), 55.5)  # 53.5 + 1 + 1
-        assert np.isclose(K.eval(m.total), 55.5)
-
-        # check weights expand
-        result_t = m([[1], [5]], sample_weight=[1, 0.2])
-        assert np.isclose(K.eval(result_t), 57.5, 2)  # 55.5 + 1 + 1
-        assert np.isclose(K.eval(m.total), 57.5, 1)
-
-        # check values reduced to the dimensions of weight
-        result_t = m([[[1., 2.], [3., 2.], [0.5, 4.]]], sample_weight=[0.5])
-        result = np.round(K.eval(result_t), decimals=2)
-        # result = (prev: 57.5) + 0.5 + 1 + 1.5 + 1 + 0.25 + 2
-        assert np.isclose(result, 63.75, 2)
-        assert np.isclose(K.eval(m.total), 63.75, 2)
-
-
-class TestMean(object):
-
-    def test_mean(self):
-        m = metrics.Mean(name='my_mean')
-
-        # check config
-        assert m.name == 'my_mean'
-        assert m.stateful
-        assert m.dtype == 'float32'
-        assert len(m.weights) == 2
-
-        # check initial state
-        assert K.eval(m.total) == 0
-        assert K.eval(m.count) == 0
-
-        # check __call__()
-        assert K.eval(m(100)) == 100
-        assert K.eval(m.total) == 100
-        assert K.eval(m.count) == 1
-
-        # check update_state() and result()
-        result = m([1, 5])
-        assert np.isclose(K.eval(result), 106. / 3)
-        assert K.eval(m.total) == 106  # 100 + 1 + 5
-        assert K.eval(m.count) == 3
-
-        # check reset_states()
-        m.reset_states()
-        assert K.eval(m.total) == 0
-        assert K.eval(m.count) == 0
-
-        # Check save and restore config
-        m2 = metrics.Mean.from_config(m.get_config())
-        assert m2.name == 'my_mean'
-        assert m2.stateful
-        assert m2.dtype == 'float32'
-        assert len(m2.weights) == 2
-
-    def test_mean_with_sample_weight(self):
-        m = metrics.Mean(dtype='float64')
-        assert m.dtype == 'float64'
-
-        # check scalar weight
-        result_t = m(100, sample_weight=0.5)
-        assert K.eval(result_t) == 50. / 0.5
-        assert K.eval(m.total) == 50
-        assert K.eval(m.count) == 0.5
-
-        # check weights not scalar and weights rank matches values rank
-        result_t = m([1, 5], sample_weight=[1, 0.2])
-        result = K.eval(result_t)
-        assert np.isclose(result, 52. / 1.7)
-        assert np.isclose(K.eval(m.total), 52)  # 50 + 1 + 5 * 0.2
-        assert np.isclose(K.eval(m.count), 1.7)  # 0.5 + 1.2
-
-        # check weights broadcast
-        result_t = m([1, 2], sample_weight=0.5)
-        assert np.isclose(K.eval(result_t), 53.5 / 2.7, rtol=3)
-        assert np.isclose(K.eval(m.total), 53.5, rtol=3)  # 52 + 0.5 + 1
-        assert np.isclose(K.eval(m.count), 2.7, rtol=3)  # 1.7 + 0.5 + 0.5
-
-        # check weights squeeze
-        result_t = m([1, 5], sample_weight=[[1], [0.2]])
-        assert np.isclose(K.eval(result_t), 55.5 / 3.9, rtol=3)
-        assert np.isclose(K.eval(m.total), 55.5, rtol=3)  # 53.5 + 1 + 1
-        assert np.isclose(K.eval(m.count), 3.9, rtol=3)  # 2.7 + 1.2
-
-        # check weights expand
-        result_t = m([[1], [5]], sample_weight=[1, 0.2])
-        assert np.isclose(K.eval(result_t), 57.5 / 5.1, rtol=3)
-        assert np.isclose(K.eval(m.total), 57.5, rtol=3)  # 55.5 + 1 + 1
-        assert np.isclose(K.eval(m.count), 5.1, rtol=3)  # 3.9 + 1.2
-
-    def test_multiple_instances(self):
-        m = metrics.Mean()
-        m2 = metrics.Mean()
-
-        assert m.name == 'mean'
-        assert m2.name == 'mean'
-
-        # check initial state
-        assert K.eval(m.total) == 0
-        assert K.eval(m.count) == 0
-        assert K.eval(m2.total) == 0
-        assert K.eval(m2.count) == 0
-
-        # check __call__()
-        assert K.eval(m(100)) == 100
-        assert K.eval(m.total) == 100
-        assert K.eval(m.count) == 1
-        assert K.eval(m2.total) == 0
-        assert K.eval(m2.count) == 0
-
-        assert K.eval(m2([63, 10])) == 36.5
-        assert K.eval(m2.total) == 73
-        assert K.eval(m2.count) == 2
-        assert K.eval(m.result()) == 100
-        assert K.eval(m.total) == 100
-        assert K.eval(m.count) == 1
-
-
-class TestAccuracy(object):
-
-    def test_accuracy(self):
-        acc_obj = metrics.Accuracy(name='my_acc')
-
-        # check config
-        assert acc_obj.name == 'my_acc'
-        assert acc_obj.stateful
-        assert len(acc_obj.weights) == 2
-        assert acc_obj.dtype == 'float32'
-
-        # verify that correct value is returned
-        result = K.eval(acc_obj([[1], [2], [3], [4]], [[1], [2], [3], [4]]))
-        assert result == 1  # 2/2
-
-        # Check save and restore config
-        a2 = metrics.Accuracy.from_config(acc_obj.get_config())
-        assert a2.name == 'my_acc'
-        assert a2.stateful
-        assert len(a2.weights) == 2
-        assert a2.dtype, 'float32'
-
-        # check with sample_weight
-        result_t = acc_obj([[2], [1]], [[2], [0]], sample_weight=[[0.5], [0.2]])
-        result = K.eval(result_t)
-        assert np.isclose(result, 4.5 / 4.7, atol=1e-3)
-
-    def test_binary_accuracy(self):
-        acc_obj = metrics.BinaryAccuracy(name='my_acc')
-
-        # check config
-        assert acc_obj.name == 'my_acc'
-        assert acc_obj.stateful
-        assert len(acc_obj.weights) == 2
-        assert acc_obj.dtype == 'float32'
-
-        # verify that correct value is returned
-        result_t = acc_obj([[1], [0]], [[1], [0]])
-        result = K.eval(result_t)
-        assert result == 1  # 2/2
-
-        # check y_pred squeeze
-        result_t = acc_obj([[1], [1]], [[[1]], [[0]]])
-        result = K.eval(result_t)
-        assert np.isclose(result, 3. / 4., atol=1e-3)
-
-        # check y_true squeeze
-        result_t = acc_obj([[[1]], [[1]]], [[1], [0]])
-        result = K.eval(result_t)
-        assert np.isclose(result, 4. / 6., atol=1e-3)
-
-        # check with sample_weight
-        result_t = acc_obj([[1], [1]], [[1], [0]], [[0.5], [0.2]])
-        result = K.eval(result_t)
-        assert np.isclose(result, 4.5 / 6.7, atol=1e-3)
-
-    def test_binary_accuracy_threshold(self):
-        acc_obj = metrics.BinaryAccuracy(threshold=0.7)
-        result_t = acc_obj([[1], [1], [0], [0]], [[0.9], [0.6], [0.4], [0.8]])
-        result = K.eval(result_t)
-        assert np.isclose(result, 0.5, atol=1e-3)
-
-    def test_categorical_accuracy(self):
-        acc_obj = metrics.CategoricalAccuracy(name='my_acc')
-
-        # check config
-        assert acc_obj.name == 'my_acc'
-        assert acc_obj.stateful
-        assert len(acc_obj.weights) == 2
-        assert acc_obj.dtype == 'float32'
-
-        # verify that correct value is returned
-        result_t = acc_obj([[0, 0, 1], [0, 1, 0]],
-                           [[0.1, 0.1, 0.8], [0.05, 0.95, 0]])
-        result = K.eval(result_t)
-        assert result == 1  # 2/2
-
-        # check with sample_weight
-        result_t = acc_obj([[0, 0, 1], [0, 1, 0]],
-                           [[0.1, 0.1, 0.8], [0.05, 0, 0.95]],
-                           [[0.5], [0.2]])
-        result = K.eval(result_t)
-        assert np.isclose(result, 2.5 / 2.7, atol=1e-3)  # 2.5/2.7
-
-    def test_sparse_categorical_accuracy(self):
-        acc_obj = metrics.SparseCategoricalAccuracy(name='my_acc')
-
-        # check config
-        assert acc_obj.name == 'my_acc'
-        assert acc_obj.stateful
-        assert len(acc_obj.weights) == 2
-        assert acc_obj.dtype == 'float32'
-
-        # verify that correct value is returned
-        result_t = acc_obj([[2], [1]],
-                           [[0.1, 0.1, 0.8],
-                           [0.05, 0.95, 0]])
-        result = K.eval(result_t)
-        assert result == 1  # 2/2
-
-        # check with sample_weight
-        result_t = acc_obj([[2], [1]],
-                           [[0.1, 0.1, 0.8], [0.05, 0, 0.95]],
-                           [[0.5], [0.2]])
-        result = K.eval(result_t)
-        assert np.isclose(result, 2.5 / 2.7, atol=1e-3)
-
-    def test_sparse_categorical_accuracy_mismatched_dims(self):
-        acc_obj = metrics.SparseCategoricalAccuracy(name='my_acc')
-
-        # check config
-        assert acc_obj.name == 'my_acc'
-        assert acc_obj.stateful
-        assert len(acc_obj.weights) == 2
-        assert acc_obj.dtype == 'float32'
-
-        # verify that correct value is returned
-        result_t = acc_obj([2, 1], [[0.1, 0.1, 0.8], [0.05, 0.95, 0]])
-        result = K.eval(result_t)
-        assert result == 1  # 2/2
-
-        # check with sample_weight
-        result_t = acc_obj([2, 1], [[0.1, 0.1, 0.8], [0.05, 0, 0.95]],
-                           [[0.5], [0.2]])
-        result = K.eval(result_t)
-        assert np.isclose(result, 2.5 / 2.7, atol=1e-3)
-
-
-class TestMeanSquaredErrorTest(object):
-
-    def test_config(self):
-        mse_obj = metrics.MeanSquaredError(name='my_mse', dtype='int32')
-        assert mse_obj.name == 'my_mse'
-        assert mse_obj.dtype == 'int32'
-
-        # Check save and restore config
-        mse_obj2 = metrics.MeanSquaredError.from_config(mse_obj.get_config())
-        assert mse_obj2.name == 'my_mse'
-        assert mse_obj2.dtype == 'int32'
-
-    def test_unweighted(self):
-        mse_obj = metrics.MeanSquaredError()
-        y_true = ((0, 1, 0, 1, 0), (0, 0, 1, 1, 1),
-                  (1, 1, 1, 1, 0), (0, 0, 0, 0, 1))
-        y_pred = ((0, 0, 1, 1, 0), (1, 1, 1, 1, 1),
-                  (0, 1, 0, 1, 0), (1, 1, 1, 1, 1))
-
-        result = mse_obj(y_true, y_pred)
-        np.isclose(0.5, K.eval(result), atol=1e-5)
-
-    def test_weighted(self):
-        mse_obj = metrics.MeanSquaredError()
-        y_true = ((0, 1, 0, 1, 0), (0, 0, 1, 1, 1),
-                  (1, 1, 1, 1, 0), (0, 0, 0, 0, 1))
-        y_pred = ((0, 0, 1, 1, 0), (1, 1, 1, 1, 1),
-                  (0, 1, 0, 1, 0), (1, 1, 1, 1, 1))
-        sample_weight = (1., 1.5, 2., 2.5)
-        result = mse_obj(y_true, y_pred, sample_weight=sample_weight)
-        np.isclose(0.54285, K.eval(result), atol=1e-5)
-
-
-class TestHinge(object):
-
-    def test_config(self):
-        hinge_obj = metrics.Hinge(name='hinge', dtype='int32')
-        assert hinge_obj.name == 'hinge'
-        assert hinge_obj.dtype == 'int32'
-
-        # Check save and restore config
-        hinge_obj2 = metrics.Hinge.from_config(hinge_obj.get_config())
-        assert hinge_obj2.name == 'hinge'
-        assert hinge_obj2.dtype == 'int32'
-
-    def test_unweighted(self):
-        hinge_obj = metrics.Hinge()
-        y_true = K.constant([[0, 1, 0, 1], [0, 0, 1, 1]])
-        y_pred = K.constant([[-0.3, 0.2, -0.1, 1.6],
-                             [-0.25, -1., 0.5, 0.6]])
-
-        result = hinge_obj(y_true, y_pred)
-        assert np.allclose(0.506, K.eval(result), atol=1e-3)
-
-    def test_weighted(self):
-        hinge_obj = metrics.Hinge()
-        y_true = K.constant([[-1, 1, -1, 1], [-1, -1, 1, 1]])
-        y_pred = K.constant([[-0.3, 0.2, -0.1, 1.6],
-                             [-0.25, -1., 0.5, 0.6]])
-        sample_weight = K.constant([1.5, 2.])
-
-        result = hinge_obj(y_true, y_pred, sample_weight=sample_weight)
-        assert np.allclose(0.493, K.eval(result), atol=1e-3)
-
-
-class TestSquaredHinge(object):
-
-    def test_config(self):
-        sq_hinge_obj = metrics.SquaredHinge(name='sq_hinge', dtype='int32')
-        assert sq_hinge_obj.name == 'sq_hinge'
-        assert sq_hinge_obj.dtype == 'int32'
-
-        # Check save and restore config
-        sq_hinge_obj2 = metrics.SquaredHinge.from_config(
-            sq_hinge_obj.get_config())
-        assert sq_hinge_obj2.name == 'sq_hinge'
-        assert sq_hinge_obj2.dtype == 'int32'
-
-    def test_unweighted(self):
-        sq_hinge_obj = metrics.SquaredHinge()
-        y_true = K.constant([[0, 1, 0, 1], [0, 0, 1, 1]])
-        y_pred = K.constant([[-0.3, 0.2, -0.1, 1.6],
-                             [-0.25, -1., 0.5, 0.6]])
-
-        result = sq_hinge_obj(y_true, y_pred)
-        assert np.allclose(0.364, K.eval(result), atol=1e-3)
-
-    def test_weighted(self):
-        sq_hinge_obj = metrics.SquaredHinge()
-        y_true = K.constant([[-1, 1, -1, 1], [-1, -1, 1, 1]])
-        y_pred = K.constant([[-0.3, 0.2, -0.1, 1.6],
-                             [-0.25, -1., 0.5, 0.6]])
-        sample_weight = K.constant([1.5, 2.])
-
-        result = sq_hinge_obj(y_true, y_pred, sample_weight=sample_weight)
-        assert np.allclose(0.347, K.eval(result), atol=1e-3)
-
-
-class TestCategoricalHinge(object):
-
-    def test_config(self):
-        cat_hinge_obj = metrics.CategoricalHinge(
-            name='cat_hinge', dtype='int32')
-        assert cat_hinge_obj.name == 'cat_hinge'
-        assert cat_hinge_obj.dtype == 'int32'
-
-        # Check save and restore config
-        cat_hinge_obj2 = metrics.CategoricalHinge.from_config(
-            cat_hinge_obj.get_config())
-        assert cat_hinge_obj2.name == 'cat_hinge'
-        assert cat_hinge_obj2.dtype == 'int32'
-
-    def test_unweighted(self):
-        cat_hinge_obj = metrics.CategoricalHinge()
-        y_true = K.constant(((0, 1, 0, 1, 0), (0, 0, 1, 1, 1),
-                             (1, 1, 1, 1, 0), (0, 0, 0, 0, 1)))
-        y_pred = K.constant(((0, 0, 1, 1, 0), (1, 1, 1, 1, 1),
-                             (0, 1, 0, 1, 0), (1, 1, 1, 1, 1)))
-
-        result = cat_hinge_obj(y_true, y_pred)
-        assert np.allclose(0.5, K.eval(result), atol=1e-5)
-
-    def test_weighted(self):
-        cat_hinge_obj = metrics.CategoricalHinge()
-        y_true = K.constant(((0, 1, 0, 1, 0), (0, 0, 1, 1, 1),
-                             (1, 1, 1, 1, 0), (0, 0, 0, 0, 1)))
-        y_pred = K.constant(((0, 0, 1, 1, 0), (1, 1, 1, 1, 1),
-                             (0, 1, 0, 1, 0), (1, 1, 1, 1, 1)))
-        sample_weight = K.constant((1., 1.5, 2., 2.5))
-        result = cat_hinge_obj(y_true, y_pred, sample_weight=sample_weight)
-        assert np.allclose(0.5, K.eval(result), atol=1e-5)
-
-
-class TestTopKCategoricalAccuracy(object):
-
-    def test_config(self):
-        a_obj = metrics.TopKCategoricalAccuracy(name='topkca', dtype='int32')
-        assert a_obj.name == 'topkca'
-        assert a_obj.dtype == 'int32'
-
-        a_obj2 = metrics.TopKCategoricalAccuracy.from_config(a_obj.get_config())
-        assert a_obj2.name == 'topkca'
-        assert a_obj2.dtype == 'int32'
-
-    def test_correctness(self):
-        a_obj = metrics.TopKCategoricalAccuracy()
-        y_true = [[0, 0, 1], [0, 1, 0]]
-        y_pred = [[0.1, 0.9, 0.8], [0.05, 0.95, 0]]
-
-        result = a_obj(y_true, y_pred)
-        assert 1 == K.eval(result)  # both the samples match
-
-        # With `k` < 5.
-        a_obj = metrics.TopKCategoricalAccuracy(k=1)
-        result = a_obj(y_true, y_pred)
-        assert 0.5 == K.eval(result)  # only sample #2 matches
-
-        # With `k` > 5.
-        y_true = ([[0, 0, 1, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0, 0]])
-        y_pred = [[0.5, 0.9, 0.1, 0.7, 0.6, 0.5, 0.4],
-                  [0.05, 0.95, 0, 0, 0, 0, 0]]
-        a_obj = metrics.TopKCategoricalAccuracy(k=6)
-        result = a_obj(y_true, y_pred)
-        assert 0.5 == K.eval(result)  # only 1 sample matches.
-
-    def test_weighted(self):
-        a_obj = metrics.TopKCategoricalAccuracy(k=2)
-        y_true = [[0, 1, 0], [1, 0, 0], [0, 0, 1]]
-        y_pred = [[0, 0.9, 0.1], [0, 0.9, 0.1], [0, 0.9, 0.1]]
-        sample_weight = (1.0, 0.0, 1.0)
-        result = a_obj(y_true, y_pred, sample_weight=sample_weight)
-        assert np.allclose(1.0, K.eval(result), atol=1e-5)
-
-
-class TestSparseTopKCategoricalAccuracy(object):
-
-    def test_config(self):
-        a_obj = metrics.SparseTopKCategoricalAccuracy(
-            name='stopkca', dtype='int32')
-        assert a_obj.name == 'stopkca'
-        assert a_obj.dtype == 'int32'
-
-        a_obj2 = metrics.SparseTopKCategoricalAccuracy.from_config(
-            a_obj.get_config())
-        assert a_obj2.name == 'stopkca'
-        assert a_obj2.dtype == 'int32'
-
-    def test_correctness(self):
-        a_obj = metrics.SparseTopKCategoricalAccuracy()
-        y_true = [2, 1]
-        y_pred = [[0.1, 0.9, 0.8], [0.05, 0.95, 0]]
-
-        result = a_obj(y_true, y_pred)
-        assert 1 == K.eval(result)  # both the samples match
-
-        # With `k` < 5.
-        a_obj = metrics.SparseTopKCategoricalAccuracy(k=1)
-        result = a_obj(y_true, y_pred)
-        assert 0.5 == K.eval(result)  # only sample #2 matches
-
-        # With `k` > 5.
-        y_pred = [[0.5, 0.9, 0.1, 0.7, 0.6, 0.5, 0.4],
-                  [0.05, 0.95, 0, 0, 0, 0, 0]]
-        a_obj = metrics.SparseTopKCategoricalAccuracy(k=6)
-        result = a_obj(y_true, y_pred)
-        assert 0.5 == K.eval(result)  # only 1 sample matches.
-
-    def test_weighted(self):
-        a_obj = metrics.SparseTopKCategoricalAccuracy(k=2)
-        y_true = [1, 0, 2]
-        y_pred = [[0, 0.9, 0.1], [0, 0.9, 0.1], [0, 0.9, 0.1]]
-        sample_weight = (1.0, 0.0, 1.0)
-        result = a_obj(y_true, y_pred, sample_weight=sample_weight)
-        assert np.allclose(1.0, K.eval(result), atol=1e-5)
-
-
-class TestLogCoshError(object):
-
-    def setup(self):
-        self.y_pred = np.asarray([1, 9, 2, -5, -2, 6]).reshape((2, 3))
-        self.y_true = np.asarray([4, 8, 12, 8, 1, 3]).reshape((2, 3))
-        self.batch_size = 6
-        error = self.y_pred - self.y_true
-        self.expected_results = np.log((np.exp(error) + np.exp(-error)) / 2)
-
-    def test_config(self):
-        logcosh_obj = metrics.LogCoshError(name='logcosh', dtype='int32')
-        assert logcosh_obj.name == 'logcosh'
-        assert logcosh_obj.dtype == 'int32'
-
-    def test_unweighted(self):
-        self.setup()
-        logcosh_obj = metrics.LogCoshError()
-
-        result = logcosh_obj(self.y_true, self.y_pred)
-        expected_result = np.sum(self.expected_results) / self.batch_size
-        assert np.allclose(K.eval(result), expected_result, atol=1e-3)
-
-    def test_weighted(self):
-        self.setup()
-        logcosh_obj = metrics.LogCoshError()
-        sample_weight = [[1.2], [3.4]]
-        result = logcosh_obj(self.y_true, self.y_pred, sample_weight=sample_weight)
-
-        sample_weight = np.asarray([1.2, 1.2, 1.2, 3.4, 3.4, 3.4]).reshape((2, 3))
-        expected_result = np.multiply(self.expected_results, sample_weight)
-        expected_result = np.sum(expected_result) / np.sum(sample_weight)
-        assert np.allclose(K.eval(result), expected_result, atol=1e-3)
-
-
-class TestPoisson(object):
-
-    def setup(self):
-        self.y_pred = np.asarray([1, 9, 2, 5, 2, 6]).reshape((2, 3))
-        self.y_true = np.asarray([4, 8, 12, 8, 1, 3]).reshape((2, 3))
-        self.batch_size = 6
-        self.expected_results = self.y_pred - np.multiply(
-            self.y_true, np.log(self.y_pred))
-
-    def test_config(self):
-        poisson_obj = metrics.Poisson(name='poisson', dtype='int32')
-        assert poisson_obj.name == 'poisson'
-        assert poisson_obj.dtype == 'int32'
-
-        poisson_obj2 = metrics.Poisson.from_config(poisson_obj.get_config())
-        assert poisson_obj2.name == 'poisson'
-        assert poisson_obj2.dtype == 'int32'
-
-    def test_unweighted(self):
-        self.setup()
-        poisson_obj = metrics.Poisson()
-
-        result = poisson_obj(self.y_true, self.y_pred)
-        expected_result = np.sum(self.expected_results) / self.batch_size
-        assert np.allclose(K.eval(result), expected_result, atol=1e-3)
-
-    def test_weighted(self):
-        self.setup()
-        poisson_obj = metrics.Poisson()
-        sample_weight = [[1.2], [3.4]]
-
-        result = poisson_obj(self.y_true, self.y_pred, sample_weight=sample_weight)
-        sample_weight = np.asarray([1.2, 1.2, 1.2, 3.4, 3.4, 3.4]).reshape((2, 3))
-        expected_result = np.multiply(self.expected_results, sample_weight)
-        expected_result = np.sum(expected_result) / np.sum(sample_weight)
-        assert np.allclose(K.eval(result), expected_result, atol=1e-3)
-
-
-class TestKLDivergence(object):
-
-    def setup(self):
-        self.y_pred = np.asarray([.4, .9, .12, .36, .3, .4]).reshape((2, 3))
-        self.y_true = np.asarray([.5, .8, .12, .7, .43, .8]).reshape((2, 3))
-        self.batch_size = 2
-        self.expected_results = np.multiply(
-            self.y_true, np.log(self.y_true / self.y_pred))
-
-    def test_config(self):
-        k_obj = metrics.KLDivergence(name='kld', dtype='int32')
-        assert k_obj.name == 'kld'
-        assert k_obj.dtype == 'int32'
-
-        k_obj2 = metrics.KLDivergence.from_config(k_obj.get_config())
-        assert k_obj2.name == 'kld'
-        assert k_obj2.dtype == 'int32'
-
-    def test_unweighted(self):
-        self.setup()
-        k_obj = metrics.KLDivergence()
-
-        result = k_obj(self.y_true, self.y_pred)
-        expected_result = np.sum(self.expected_results) / self.batch_size
-        assert np.allclose(K.eval(result), expected_result, atol=1e-3)
-
-    def test_weighted(self):
-        self.setup()
-        k_obj = metrics.KLDivergence()
-        sample_weight = [[1.2], [3.4]]
-        result = k_obj(self.y_true, self.y_pred, sample_weight=sample_weight)
-
-        sample_weight = np.asarray([1.2, 1.2, 1.2, 3.4, 3.4, 3.4]).reshape((2, 3))
-        expected_result = np.multiply(self.expected_results, sample_weight)
-        expected_result = np.sum(expected_result) / (1.2 + 3.4)
-        assert np.allclose(K.eval(result), expected_result, atol=1e-3)
-
-
-class TestCosineSimilarity(object):
-
-    def l2_norm(self, x, axis):
-        epsilon = 1e-12
-        square_sum = np.sum(np.square(x), axis=axis, keepdims=True)
-        x_inv_norm = 1 / np.sqrt(np.maximum(square_sum, epsilon))
-        return np.multiply(x, x_inv_norm)
-
-    def setup(self, axis=1):
-        self.np_y_true = np.asarray([[1, 9, 2], [-5, -2, 6]], dtype=np.float32)
-        self.np_y_pred = np.asarray([[4, 8, 12], [8, 1, 3]], dtype=np.float32)
-
-        y_true = self.l2_norm(self.np_y_true, axis)
-        y_pred = self.l2_norm(self.np_y_pred, axis)
-        self.expected_loss = np.sum(np.multiply(y_true, y_pred), axis=(axis,))
-
-        self.y_true = K.constant(self.np_y_true)
-        self.y_pred = K.constant(self.np_y_pred)
-
-    def test_config(self):
-        cosine_obj = metrics.CosineSimilarity(
-            axis=2, name='my_cos', dtype='int32')
-        assert cosine_obj.name == 'my_cos'
-        assert cosine_obj.dtype == 'int32'
-
-        # Check save and restore config
-        cosine_obj2 = metrics.CosineSimilarity.from_config(cosine_obj.get_config())
-        assert cosine_obj2.name == 'my_cos'
-        assert cosine_obj2.dtype == 'int32'
-
-    def test_unweighted(self):
-        self.setup()
-        cosine_obj = metrics.CosineSimilarity()
-        loss = cosine_obj(self.y_true, self.y_pred)
-        expected_loss = np.mean(self.expected_loss)
-        assert np.allclose(K.eval(loss), expected_loss, 3)
-
-    def test_weighted(self):
-        self.setup()
-        cosine_obj = metrics.CosineSimilarity()
-        sample_weight = np.asarray([1.2, 3.4])
-        loss = cosine_obj(
-            self.y_true,
-            self.y_pred,
-            sample_weight=K.constant(sample_weight))
-        expected_loss = np.sum(
-            self.expected_loss * sample_weight) / np.sum(sample_weight)
-        assert np.allclose(K.eval(loss), expected_loss, 3)
-
-    def test_axis(self):
-        self.setup(axis=1)
-        cosine_obj = metrics.CosineSimilarity(axis=1)
-        loss = cosine_obj(self.y_true, self.y_pred)
-        expected_loss = np.mean(self.expected_loss)
-        assert np.allclose(K.eval(loss), expected_loss, 3)
-
-
-class TestMeanAbsoluteError(object):
-
-    def test_config(self):
-        mae_obj = metrics.MeanAbsoluteError(name='my_mae', dtype='int32')
-        assert mae_obj.name == 'my_mae'
-        assert mae_obj.dtype == 'int32'
-
-        # Check save and restore config
-        mae_obj2 = metrics.MeanAbsoluteError.from_config(mae_obj.get_config())
-        assert mae_obj2.name == 'my_mae'
-        assert mae_obj2.dtype == 'int32'
-
-    def test_unweighted(self):
-        mae_obj = metrics.MeanAbsoluteError()
-        y_true = K.constant(((0, 1, 0, 1, 0), (0, 0, 1, 1, 1),
-                             (1, 1, 1, 1, 0), (0, 0, 0, 0, 1)))
-        y_pred = K.constant(((0, 0, 1, 1, 0), (1, 1, 1, 1, 1),
-                             (0, 1, 0, 1, 0), (1, 1, 1, 1, 1)))
-
-        result = mae_obj(y_true, y_pred)
-        assert np.allclose(0.5, K.eval(result), atol=1e-5)
-
-    def test_weighted(self):
-        mae_obj = metrics.MeanAbsoluteError()
-        y_true = K.constant(((0, 1, 0, 1, 0), (0, 0, 1, 1, 1),
-                             (1, 1, 1, 1, 0), (0, 0, 0, 0, 1)))
-        y_pred = K.constant(((0, 0, 1, 1, 0), (1, 1, 1, 1, 1),
-                             (0, 1, 0, 1, 0), (1, 1, 1, 1, 1)))
-        sample_weight = K.constant((1., 1.5, 2., 2.5))
-        result = mae_obj(y_true, y_pred, sample_weight=sample_weight)
-        assert np.allclose(0.54285, K.eval(result), atol=1e-5)
-
-
-class TestMeanAbsolutePercentageError(object):
-
-    def test_config(self):
-        mape_obj = metrics.MeanAbsolutePercentageError(
-            name='my_mape', dtype='int32')
-        assert mape_obj.name == 'my_mape'
-        assert mape_obj.dtype == 'int32'
-
-        # Check save and restore config
-        mape_obj2 = metrics.MeanAbsolutePercentageError.from_config(
-            mape_obj.get_config())
-        assert mape_obj2.name == 'my_mape'
-        assert mape_obj2.dtype == 'int32'
-
-    def test_unweighted(self):
-        mape_obj = metrics.MeanAbsolutePercentageError()
-        y_true = K.constant(((0, 1, 0, 1, 0), (0, 0, 1, 1, 1),
-                            (1, 1, 1, 1, 0), (0, 0, 0, 0, 1)))
-        y_pred = K.constant(((0, 0, 1, 1, 0), (1, 1, 1, 1, 1),
-                            (0, 1, 0, 1, 0), (1, 1, 1, 1, 1)))
-
-        result = mape_obj(y_true, y_pred)
-        assert np.allclose(35e7, K.eval(result), atol=1e-5)
-
-    def test_weighted(self):
-        mape_obj = metrics.MeanAbsolutePercentageError()
-        y_true = K.constant(((0, 1, 0, 1, 0), (0, 0, 1, 1, 1),
-                            (1, 1, 1, 1, 0), (0, 0, 0, 0, 1)))
-        y_pred = K.constant(((0, 0, 1, 1, 0), (1, 1, 1, 1, 1),
-                            (0, 1, 0, 1, 0), (1, 1, 1, 1, 1)))
-        sample_weight = K.constant((1., 1.5, 2., 2.5))
-        result = mape_obj(y_true, y_pred, sample_weight=sample_weight)
-        assert np.allclose(40e7, K.eval(result), atol=1e-5)
-
-
-class TestMeanSquaredError(object):
-
-    def test_config(self):
-        mse_obj = metrics.MeanSquaredError(name='my_mse', dtype='int32')
-        assert mse_obj.name == 'my_mse'
-        assert mse_obj.dtype == 'int32'
-
-        # Check save and restore config
-        mse_obj2 = metrics.MeanSquaredError.from_config(mse_obj.get_config())
-        assert mse_obj2.name == 'my_mse'
-        assert mse_obj2.dtype == 'int32'
-
-    def test_unweighted(self):
-        mse_obj = metrics.MeanSquaredError()
-        y_true = K.constant(((0, 1, 0, 1, 0), (0, 0, 1, 1, 1),
-                             (1, 1, 1, 1, 0), (0, 0, 0, 0, 1)))
-        y_pred = K.constant(((0, 0, 1, 1, 0), (1, 1, 1, 1, 1),
-                            (0, 1, 0, 1, 0), (1, 1, 1, 1, 1)))
-
-        result = mse_obj(y_true, y_pred)
-        assert np.allclose(0.5, K.eval(result), atol=1e-5)
-
-    def test_weighted(self):
-        mse_obj = metrics.MeanSquaredError()
-        y_true = K.constant(((0, 1, 0, 1, 0), (0, 0, 1, 1, 1),
-                            (1, 1, 1, 1, 0), (0, 0, 0, 0, 1)))
-        y_pred = K.constant(((0, 0, 1, 1, 0), (1, 1, 1, 1, 1),
-                            (0, 1, 0, 1, 0), (1, 1, 1, 1, 1)))
-        sample_weight = K.constant((1., 1.5, 2., 2.5))
-        result = mse_obj(y_true, y_pred, sample_weight=sample_weight)
-        assert np.allclose(0.54285, K.eval(result), atol=1e-5)
-
-
-class TestMeanSquaredLogarithmicError(object):
-
-    def test_config(self):
-        msle_obj = metrics.MeanSquaredLogarithmicError(
-            name='my_msle', dtype='int32')
-        assert msle_obj.name == 'my_msle'
-        assert msle_obj.dtype == 'int32'
-
-        # Check save and restore config
-        msle_obj2 = metrics.MeanSquaredLogarithmicError.from_config(
-            msle_obj.get_config())
-        assert msle_obj2.name == 'my_msle'
-        assert msle_obj2.dtype == 'int32'
-
-    def test_unweighted(self):
-        msle_obj = metrics.MeanSquaredLogarithmicError()
-        y_true = K.constant(((0, 1, 0, 1, 0), (0, 0, 1, 1, 1),
-                            (1, 1, 1, 1, 0), (0, 0, 0, 0, 1)))
-        y_pred = K.constant(((0, 0, 1, 1, 0), (1, 1, 1, 1, 1),
-                            (0, 1, 0, 1, 0), (1, 1, 1, 1, 1)))
-
-        result = msle_obj(y_true, y_pred)
-        assert np.allclose(0.24022, K.eval(result), atol=1e-5)
-
-    def test_weighted(self):
-        msle_obj = metrics.MeanSquaredLogarithmicError()
-        y_true = K.constant(((0, 1, 0, 1, 0), (0, 0, 1, 1, 1),
-                            (1, 1, 1, 1, 0), (0, 0, 0, 0, 1)))
-        y_pred = K.constant(((0, 0, 1, 1, 0), (1, 1, 1, 1, 1),
-                            (0, 1, 0, 1, 0), (1, 1, 1, 1, 1)))
-        sample_weight = K.constant((1., 1.5, 2., 2.5))
-        result = msle_obj(y_true, y_pred, sample_weight=sample_weight)
-        assert np.allclose(0.26082, K.eval(result), atol=1e-5)
-
-
-class TestRootMeanSquaredError(object):
-
-    def test_config(self):
-        rmse_obj = metrics.RootMeanSquaredError(name='rmse', dtype='int32')
-        assert rmse_obj.name == 'rmse'
-        assert rmse_obj.dtype == 'int32'
-
-        rmse_obj2 = metrics.RootMeanSquaredError.from_config(rmse_obj.get_config())
-        assert rmse_obj2.name == 'rmse'
-        assert rmse_obj2.dtype == 'int32'
-
-    def test_unweighted(self):
-        rmse_obj = metrics.RootMeanSquaredError()
-        y_true = K.constant((2, 4, 6))
-        y_pred = K.constant((1, 3, 2))
-
-        update_op = rmse_obj(y_true, y_pred)
-        K.eval(update_op)
-        result = rmse_obj(y_true, y_pred)
-        # error = [-1, -1, -4], square(error) = [1, 1, 16], mean = 18/3 = 6
-        assert np.allclose(math.sqrt(6), K.eval(result), atol=1e-3)
-
-    def test_weighted(self):
-        rmse_obj = metrics.RootMeanSquaredError()
-        y_true = K.constant((2, 4, 6, 8))
-        y_pred = K.constant((1, 3, 2, 3))
-        sample_weight = K.constant((0, 1, 0, 1))
-        result = rmse_obj(y_true, y_pred, sample_weight=sample_weight)
-        assert np.allclose(math.sqrt(13), K.eval(result), atol=1e-3)
-
-
-class TestBinaryCrossentropy(object):
-
-    def test_config(self):
-        bce_obj = metrics.BinaryCrossentropy(
-            name='bce', dtype='int32', label_smoothing=0.2)
-        assert bce_obj.name == 'bce'
-        assert bce_obj.dtype == 'int32'
-
-        old_config = bce_obj.get_config()
-        assert np.allclose(old_config['label_smoothing'], 0.2, atol=1e-3)
-
-    def test_unweighted(self):
-        bce_obj = metrics.BinaryCrossentropy()
-        y_true = np.asarray([1, 0, 1, 0]).reshape([2, 2])
-        y_pred = np.asarray([1, 1, 1, 0], dtype=np.float32).reshape([2, 2])
-        result = bce_obj(y_true, y_pred)
-
-        assert np.allclose(K.eval(result), 3.833, atol=1e-3)
-
-    def test_unweighted_with_logits(self):
-        bce_obj = metrics.BinaryCrossentropy(from_logits=True)
-
-        y_true = [[1, 0, 1], [0, 1, 1]]
-        y_pred = [[100.0, -100.0, 100.0], [100.0, 100.0, -100.0]]
-        result = bce_obj(y_true, y_pred)
-
-        assert np.allclose(K.eval(result), 33.333, atol=1e-3)
-
-    def test_weighted(self):
-        bce_obj = metrics.BinaryCrossentropy()
-        y_true = np.asarray([1, 0, 1, 0]).reshape([2, 2])
-        y_pred = np.asarray([1, 1, 1, 0], dtype=np.float32).reshape([2, 2])
-        sample_weight = [1.5, 2.]
-        result = bce_obj(y_true, y_pred, sample_weight=sample_weight)
-
-        assert np.allclose(K.eval(result), 3.285, atol=1e-3)
-
-    def test_weighted_from_logits(self):
-        bce_obj = metrics.BinaryCrossentropy(from_logits=True)
-        y_true = [[1, 0, 1], [0, 1, 1]]
-        y_pred = [[100.0, -100.0, 100.0], [100.0, 100.0, -100.0]]
-        sample_weight = [2., 2.5]
-        result = bce_obj(y_true, y_pred, sample_weight=sample_weight)
-
-        assert np.allclose(K.eval(result), 37.037, atol=1e-3)
-
-    def test_label_smoothing(self):
-        logits = ((100., -100., -100.))
-        y_true = ((1, 0, 1))
-        label_smoothing = 0.1
-        bce_obj = metrics.BinaryCrossentropy(
-            from_logits=True, label_smoothing=label_smoothing)
-        result = bce_obj(y_true, logits)
-        expected_value = (100.0 + 50.0 * label_smoothing) / 3.0
-        assert np.allclose(expected_value, K.eval(result), atol=1e-3)
-
-
-class TestCategoricalCrossentropy(object):
-
-    def test_config(self):
-        cce_obj = metrics.CategoricalCrossentropy(
-            name='cce', dtype='int32', label_smoothing=0.2)
-        assert cce_obj.name == 'cce'
-        assert cce_obj.dtype == 'int32'
-
-        old_config = cce_obj.get_config()
-        assert np.allclose(old_config['label_smoothing'], 0.2, 1e-3)
-
-    def test_unweighted(self):
-        cce_obj = metrics.CategoricalCrossentropy()
-
-        y_true = np.asarray([[0, 1, 0], [0, 0, 1]])
-        y_pred = np.asarray([[0.05, 0.95, 0], [0.1, 0.8, 0.1]])
-        result = cce_obj(y_true, y_pred)
-
-        assert np.allclose(K.eval(result), 1.176, atol=1e-3)
-
-    def test_unweighted_from_logits(self):
-        cce_obj = metrics.CategoricalCrossentropy(from_logits=True)
-
-        y_true = np.asarray([[0, 1, 0], [0, 0, 1]])
-        logits = np.asarray([[1, 9, 0], [1, 8, 1]], dtype=np.float32)
-        result = cce_obj(y_true, logits)
-
-        assert np.allclose(K.eval(result), 3.5011, atol=1e-3)
-
-    def test_weighted(self):
-        cce_obj = metrics.CategoricalCrossentropy()
-
-        y_true = np.asarray([[0, 1, 0], [0, 0, 1]])
-        y_pred = np.asarray([[0.05, 0.95, 0], [0.1, 0.8, 0.1]])
-        sample_weight = [1.5, 2.]
-        result = cce_obj(y_true, y_pred, sample_weight=sample_weight)
-
-        assert np.allclose(K.eval(result), 1.338, atol=1e-3)
-
-    def test_weighted_from_logits(self):
-        cce_obj = metrics.CategoricalCrossentropy(from_logits=True)
-
-        y_true = np.asarray([[0, 1, 0], [0, 0, 1]])
-        logits = np.asarray([[1, 9, 0], [1, 8, 1]], dtype=np.float32)
-        sample_weight = [1.5, 2.]
-        result = cce_obj(y_true, logits, sample_weight=sample_weight)
-
-        assert np.allclose(K.eval(result), 4.0012, atol=1e-3)
-
-    def test_label_smoothing(self):
-        y_true = np.asarray([[0, 1, 0], [0, 0, 1]])
-        logits = np.asarray([[1, 9, 0], [1, 8, 1]], dtype=np.float32)
-        label_smoothing = 0.1
-
-        cce_obj = metrics.CategoricalCrossentropy(
-            from_logits=True, label_smoothing=label_smoothing)
-        loss = cce_obj(y_true, logits)
-        assert np.allclose(K.eval(loss), 3.667, atol=1e-3)
-
-
-class TestSparseCategoricalCrossentropy(object):
-
-    def test_config(self):
-        scce_obj = metrics.SparseCategoricalCrossentropy(
-            name='scce', dtype='int32')
-        assert scce_obj.name == 'scce'
-        assert scce_obj.dtype == 'int32'
-
-    def test_unweighted(self):
-        scce_obj = metrics.SparseCategoricalCrossentropy()
-
-        y_true = np.asarray([1, 2])
-        y_pred = np.asarray([[0.05, 0.95, 0], [0.1, 0.8, 0.1]])
-        result = scce_obj(y_true, y_pred)
-
-        assert np.allclose(K.eval(result), 1.176, atol=1e-3)
-
-    def test_unweighted_from_logits(self):
-        scce_obj = metrics.SparseCategoricalCrossentropy(from_logits=True)
-
-        y_true = np.asarray([1, 2])
-        logits = np.asarray([[1, 9, 0], [1, 8, 1]], dtype=np.float32)
-        result = scce_obj(y_true, logits)
-
-        assert np.allclose(K.eval(result), 3.5011, atol=1e-3)
-
-    def test_weighted(self):
-        scce_obj = metrics.SparseCategoricalCrossentropy()
-
-        y_true = np.asarray([1, 2])
-        y_pred = np.asarray([[0.05, 0.95, 0], [0.1, 0.8, 0.1]])
-        sample_weight = [1.5, 2.]
-        result = scce_obj(y_true, y_pred, sample_weight=sample_weight)
-
-        assert np.allclose(K.eval(result), 1.338, atol=1e-3)
-
-    def test_weighted_from_logits(self):
-        scce_obj = metrics.SparseCategoricalCrossentropy(from_logits=True)
-        y_true = np.asarray([1, 2])
-        logits = np.asarray([[1, 9, 0], [1, 8, 1]], dtype=np.float32)
-        sample_weight = [1.5, 2.]
-        result = scce_obj(y_true, logits, sample_weight=sample_weight)
-
-        assert np.allclose(K.eval(result), 4.0012, atol=1e-3)
-
-    def test_axis(self):
-        scce_obj = metrics.SparseCategoricalCrossentropy(axis=0)
-
-        y_true = np.asarray([1, 2])
-        y_pred = np.asarray([[0.05, 0.1], [0.95, 0.8], [0, 0.1]])
-        result = scce_obj(y_true, y_pred)
-
-        assert np.allclose(K.eval(result), 1.176, atol=1e-3)
diff --git a/tests/keras/metrics_training_test.py b/tests/keras/metrics_training_test.py
deleted file mode 100644
index 88af63cb204..00000000000
--- a/tests/keras/metrics_training_test.py
+++ /dev/null
@@ -1,94 +0,0 @@
-"""Tests for metric objects training and evaluation."""
-import pytest
-import numpy as np
-
-from keras import metrics
-from keras import backend as K
-from keras.layers import Dense
-from keras.models import Sequential
-
-
-if K.backend() == 'cntk':
-    pytestmark = pytest.mark.skip
-
-
-METRICS = [
-    metrics.Accuracy,
-    metrics.MeanSquaredError,
-    metrics.Hinge,
-    metrics.CategoricalHinge,
-    metrics.SquaredHinge,
-    metrics.FalsePositives,
-    metrics.TruePositives,
-    metrics.FalseNegatives,
-    metrics.TrueNegatives,
-    metrics.BinaryAccuracy,
-    metrics.CategoricalAccuracy,
-    metrics.TopKCategoricalAccuracy,
-    metrics.LogCoshError,
-    metrics.Poisson,
-    metrics.KLDivergence,
-    metrics.CosineSimilarity,
-    metrics.MeanAbsoluteError,
-    metrics.MeanAbsolutePercentageError,
-    metrics.MeanSquaredError,
-    metrics.MeanSquaredLogarithmicError,
-    metrics.RootMeanSquaredError,
-    metrics.BinaryCrossentropy,
-    metrics.CategoricalCrossentropy,
-    metrics.Precision,
-    metrics.Recall,
-    metrics.AUC,
-]
-SPARSE_METRICS = [
-    metrics.SparseCategoricalAccuracy,
-    metrics.SparseTopKCategoricalAccuracy,
-    metrics.SparseCategoricalCrossentropy
-]
-
-
-@pytest.mark.parametrize('metric_cls', METRICS)
-def test_training_and_eval(metric_cls):
-    model = Sequential([Dense(2, activation='sigmoid', input_shape=(3,))])
-    model.compile('rmsprop', 'mse', metrics=[metric_cls()])
-    x = np.random.uniform(0, 1, size=(10, 3))
-    y = np.random.uniform(0, 1, size=(10, 2))
-    model.fit(x, y)
-    model.evaluate(x, y)
-
-
-@pytest.mark.parametrize('metric_cls', SPARSE_METRICS)
-def test_sparse_metrics(metric_cls):
-    model = Sequential([Dense(1, input_shape=(3,))])
-    model.compile('rmsprop', 'mse', metrics=[metric_cls()])
-    x = np.random.uniform(0, 1, size=(10, 3))
-    y = np.random.uniform(0, 1, size=(10,))
-    model.fit(x, y)
-    model.evaluate(x, y)
-
-
-def test_sensitivity_metrics():
-    metrics_list = [
-        metrics.SensitivityAtSpecificity(0.5),
-        metrics.SpecificityAtSensitivity(0.5),
-    ]
-    model = Sequential([Dense(2, activation='sigmoid', input_shape=(3,))])
-    model.compile('rmsprop', 'mse', metrics=metrics_list)
-    x = np.random.uniform(0, 1, size=(10, 3))
-    y = np.random.uniform(0, 1, size=(10, 2))
-    model.fit(x, y)
-    model.evaluate(x, y)
-
-
-@pytest.mark.skipif(True, reason='It is a flaky test, see #13477 for more context.')
-def test_mean_iou():
-    import tensorflow as tf
-    if not tf.__version__.startswith('2.'):
-        return
-
-    model = Sequential([Dense(1, activation='sigmoid', input_shape=(3,))])
-    model.compile('rmsprop', 'mse', metrics=[metrics.MeanIoU(2)])
-    x = np.random.uniform(0, 1, size=(10, 3))
-    y = np.random.uniform(0, 1, size=(10,))
-    model.fit(x, y)
-    model.evaluate(x, y)
diff --git a/tests/keras/optimizers_test.py b/tests/keras/optimizers_test.py
deleted file mode 100644
index 2ee9f08a507..00000000000
--- a/tests/keras/optimizers_test.py
+++ /dev/null
@@ -1,181 +0,0 @@
-from __future__ import print_function
-import pytest
-import numpy as np
-from numpy.testing import assert_allclose
-
-from keras.utils import test_utils
-from keras import optimizers, Input
-from keras.models import Sequential, Model, load_model
-from keras.layers.core import Dense, Activation, Lambda
-from keras.utils.np_utils import to_categorical
-from keras import backend as K
-import tempfile
-
-
-num_classes = 2
-
-
-def get_test_data():
-    np.random.seed(1337)
-    (x_train, y_train), _ = test_utils.get_test_data(num_train=1000,
-                                                     num_test=200,
-                                                     input_shape=(10,),
-                                                     classification=True,
-                                                     num_classes=num_classes)
-    y_train = to_categorical(y_train)
-    return x_train, y_train
-
-
-def _test_optimizer(optimizer, target=0.6):
-    x_train, y_train = get_test_data()
-
-    model = Sequential()
-    model.add(Dense(10, input_shape=(x_train.shape[1],)))
-    model.add(Activation('relu'))
-    model.add(Dense(y_train.shape[1]))
-    model.add(Activation('softmax'))
-    model.compile(loss='categorical_crossentropy',
-                  optimizer=optimizer,
-                  metrics=['accuracy'])
-
-    history = model.fit(x_train, y_train, epochs=3, batch_size=16, verbose=0)
-    assert history.history['accuracy'][-1] >= target
-    config = optimizers.serialize(optimizer)
-    optim = optimizers.deserialize(config)
-    new_config = optimizers.serialize(optim)
-    new_config['class_name'] = new_config['class_name'].lower()
-    assert sorted(config.keys()) == sorted(new_config.keys())
-    # for k in config['config'].keys():
-    #     assert config['config'][k] == new_config['config'][k]
-
-    # Test constraints.
-    model = Sequential()
-    dense = Dense(10,
-                  input_shape=(x_train.shape[1],),
-                  kernel_constraint=lambda x: 0. * x + 1.,
-                  bias_constraint=lambda x: 0. * x + 2.,)
-    model.add(dense)
-    model.add(Activation('relu'))
-    model.add(Dense(y_train.shape[1]))
-    model.add(Activation('softmax'))
-    model.compile(loss='categorical_crossentropy',
-                  optimizer=optimizer,
-                  metrics=['accuracy'])
-    model.train_on_batch(x_train[:10], y_train[:10])
-    kernel, bias = dense.get_weights()
-    assert_allclose(kernel, 1.)
-    assert_allclose(bias, 2.)
-
-    # Test saving.
-    model = Sequential()
-    model.add(Dense(1, input_dim=1))
-    model.compile(loss='mse', optimizer=optimizer)
-    model.fit(np.zeros((1, 1)), np.zeros((1, 1)))
-
-    _, fname = tempfile.mkstemp('.h5')
-    model.save(fname)
-    model2 = load_model(fname)
-
-    for w1, w2 in zip(model.get_weights(), model2.get_weights()):
-        assert_allclose(w1, w2)
-
-
-@pytest.mark.skipif((K.backend() != 'tensorflow'),
-                    reason='Only Tensorflow raises a '
-                           'ValueError if the gradient is null.')
-def test_no_grad():
-    inp = Input([3])
-    x = Dense(10)(inp)
-    x = Lambda(
-        lambda l: 1.0 * K.reshape(K.cast(K.argmax(l), 'float32'), [-1, 1]),
-        output_shape=lambda x: [x[0], 1])(x)
-    mod = Model(inp, x)
-    mod.compile('sgd', 'mse')
-    with pytest.raises(ValueError):
-        mod.fit(np.zeros([10, 3]), np.zeros([10, 1], np.float32),
-                batch_size=10, epochs=10)
-
-
-@pytest.mark.skipif((K.backend() == 'cntk'),
-                    reason='Flaky with CNTK')
-def test_sgd():
-    sgd = optimizers.SGD(lr=0.01, momentum=0.9, nesterov=True)
-    _test_optimizer(sgd)
-
-
-def test_rmsprop():
-    _test_optimizer(optimizers.RMSprop())
-    _test_optimizer(optimizers.RMSprop(decay=1e-3))
-
-
-def test_adagrad():
-    _test_optimizer(optimizers.Adagrad(lr=1.))
-    _test_optimizer(optimizers.Adagrad(lr=1., decay=1e-3))
-
-
-def test_adadelta():
-    _test_optimizer(optimizers.Adadelta(lr=1.), target=0.4)
-    _test_optimizer(optimizers.Adadelta(lr=1., decay=1e-3), target=0.4)
-
-
-def test_adam():
-    _test_optimizer(optimizers.Adam())
-    _test_optimizer(optimizers.Adam(decay=1e-3))
-
-
-def test_adamax():
-    _test_optimizer(optimizers.Adamax(lr=1.))
-    _test_optimizer(optimizers.Adamax(lr=1., decay=1e-3))
-
-
-def test_nadam():
-    _test_optimizer(optimizers.Nadam())
-
-
-def test_adam_amsgrad():
-    _test_optimizer(optimizers.Adam(amsgrad=True))
-    _test_optimizer(optimizers.Adam(amsgrad=True, decay=1e-3))
-
-
-@pytest.mark.skipif((K.backend() == 'cntk'),
-                    reason='Flaky with CNTK')
-def test_clipnorm():
-    sgd = optimizers.SGD(lr=0.01, momentum=0.9, clipnorm=0.5)
-    _test_optimizer(sgd)
-
-
-@pytest.mark.skipif((K.backend() == 'cntk'),
-                    reason='Flaky with CNTK')
-def test_clipvalue():
-    sgd = optimizers.SGD(lr=0.01, momentum=0.9, clipvalue=0.5)
-    _test_optimizer(sgd)
-
-
-@pytest.mark.skipif((K.backend() != 'tensorflow'),
-                    reason='Requires TensorFlow backend')
-def test_tfoptimizer():
-    from keras import constraints
-    import tensorflow as tf
-    if tf.__version__.startswith('1.'):
-        optimizer = optimizers.TFOptimizer(tf.train.AdamOptimizer())
-    else:
-        optimizer = tf.keras.optimizers.Adam()
-
-    model = Sequential()
-    model.add(Dense(num_classes, input_shape=(3,),
-                    kernel_constraint=constraints.MaxNorm(1)))
-    model.compile(loss='mean_squared_error', optimizer=optimizer)
-    model.fit(np.random.random((5, 3)), np.random.random((5, num_classes)),
-              epochs=1, batch_size=5, verbose=0)
-
-    if tf.__version__.startswith('1.'):
-        with pytest.raises(NotImplementedError):
-            optimizer.weights
-        with pytest.raises(NotImplementedError):
-            optimizer.get_config()
-        with pytest.raises(NotImplementedError):
-            optimizer.from_config(None)
-
-
-if __name__ == '__main__':
-    pytest.main([__file__])
diff --git a/tests/keras/regularizers_test.py b/tests/keras/regularizers_test.py
deleted file mode 100644
index 30da15d4d00..00000000000
--- a/tests/keras/regularizers_test.py
+++ /dev/null
@@ -1,109 +0,0 @@
-import pytest
-
-from keras.models import Sequential, Model
-from keras.layers import Dense, Input, Average
-from keras.utils import np_utils
-from keras.utils import test_utils
-from keras import regularizers
-from keras import backend as K
-
-data_dim = 5
-num_classes = 2
-batch_size = 10
-
-
-def get_data():
-    (x_train, y_train), _ = test_utils.get_test_data(
-        num_train=batch_size,
-        num_test=batch_size,
-        input_shape=(data_dim,),
-        classification=True,
-        num_classes=num_classes)
-    y_train = np_utils.to_categorical(y_train, num_classes)
-
-    return x_train, y_train
-
-
-def create_model(kernel_regularizer=None, activity_regularizer=None):
-    model = Sequential()
-    model.add(Dense(num_classes,
-                    kernel_regularizer=kernel_regularizer,
-                    activity_regularizer=activity_regularizer,
-                    input_shape=(data_dim,)))
-    return model
-
-
-def create_multi_input_model_from(layer1, layer2):
-    input_1 = Input(shape=(data_dim,))
-    input_2 = Input(shape=(data_dim,))
-    out1 = layer1(input_1)
-    out2 = layer2(input_2)
-    out = Average()([out1, out2])
-    model = Model([input_1, input_2], out)
-    model.add_loss(K.mean(out2))
-    model.add_loss(lambda: 1)
-    model.add_loss(lambda: 1)
-    return model
-
-
-def test_kernel_regularization():
-    x_train, y_train = get_data()
-    for reg in [regularizers.l1(),
-                regularizers.l2(),
-                regularizers.l1_l2()]:
-        model = create_model(kernel_regularizer=reg)
-        model.compile(loss='categorical_crossentropy', optimizer='sgd')
-        assert len(model.losses) == 1
-        model.train_on_batch(x_train, y_train)
-
-
-def test_activity_regularization():
-    x_train, y_train = get_data()
-    for reg in [regularizers.l1(), regularizers.l2()]:
-        model = create_model(activity_regularizer=reg)
-        model.compile(loss='categorical_crossentropy', optimizer='sgd')
-        assert len(model.losses) == 1
-        model.train_on_batch(x_train, y_train)
-
-
-def test_regularization_shared_layer():
-    dense_layer = Dense(num_classes,
-                        kernel_regularizer=regularizers.l1(),
-                        activity_regularizer=regularizers.l1())
-
-    model = create_multi_input_model_from(dense_layer, dense_layer)
-    model.compile(loss='categorical_crossentropy', optimizer='sgd')
-    assert len(model.losses) == 6
-
-
-def test_regularization_shared_model():
-    dense_layer = Dense(num_classes,
-                        kernel_regularizer=regularizers.l1(),
-                        activity_regularizer=regularizers.l1())
-
-    input_tensor = Input(shape=(data_dim,))
-    dummy_model = Model(input_tensor, dense_layer(input_tensor))
-
-    model = create_multi_input_model_from(dummy_model, dummy_model)
-    model.compile(loss='categorical_crossentropy', optimizer='sgd')
-    # assert len(model.losses) == 6
-
-
-def test_regularization_shared_layer_in_different_models():
-    shared_dense = Dense(num_classes,
-                         kernel_regularizer=regularizers.l1(),
-                         activity_regularizer=regularizers.l1())
-    models = []
-    for _ in range(2):
-        input_tensor = Input(shape=(data_dim,))
-        unshared_dense = Dense(num_classes, kernel_regularizer=regularizers.l1())
-        out = unshared_dense(shared_dense(input_tensor))
-        models.append(Model(input_tensor, out))
-
-    model = create_multi_input_model_from(*models)
-    model.compile(loss='categorical_crossentropy', optimizer='sgd')
-    # assert len(model.losses) == 8
-
-
-if __name__ == '__main__':
-    pytest.main([__file__])
diff --git a/tests/keras/test_sequential_model.py b/tests/keras/test_sequential_model.py
deleted file mode 100644
index d00d88ee4c1..00000000000
--- a/tests/keras/test_sequential_model.py
+++ /dev/null
@@ -1,506 +0,0 @@
-from __future__ import absolute_import
-from __future__ import print_function
-import pytest
-import os
-import numpy as np
-from numpy.testing import assert_allclose
-
-from keras import backend as K
-import keras
-from keras.models import Sequential
-from keras.layers import Dense, Activation
-from keras.utils import np_utils
-from keras.utils.test_utils import get_test_data
-from keras.models import model_from_json, model_from_yaml
-from keras import losses
-
-
-input_dim = 16
-num_hidden = 8
-num_classes = 4
-batch_size = 32
-epochs = 1
-
-
-def make_batches(size, batch_size):
-    """Returns a list of batch indices (tuples of indices).
-    # Arguments
-        size: Integer, total size of the data to slice into batches.
-        batch_size: Integer, batch size.
-    # Returns
-        A list of tuples of array indices.
-    """
-    num_batches = (size + batch_size - 1) // batch_size  # round up
-    return [(i * batch_size, min(size, (i + 1) * batch_size))
-            for i in range(num_batches)]
-
-
-@pytest.fixture
-def in_tmpdir(tmpdir):
-    """Runs a function in a temporary directory.
-
-    Checks that the directory is empty afterwards.
-    """
-    with tmpdir.as_cwd():
-        yield None
-    assert not tmpdir.listdir()
-
-
-def test_sequential_pop():
-    model = Sequential()
-    model.add(Dense(num_hidden, input_dim=input_dim))
-    model.add(Dense(num_classes))
-    model.compile(loss='mse', optimizer='sgd')
-    x = np.random.random((batch_size, input_dim))
-    y = np.random.random((batch_size, num_classes))
-    model.fit(x, y, epochs=1)
-    model.pop()
-    assert len(model.layers) == 1
-    assert model.output_shape == (None, num_hidden)
-    model.compile(loss='mse', optimizer='sgd')
-    y = np.random.random((batch_size, num_hidden))
-    model.fit(x, y, epochs=1)
-
-
-def _get_test_data():
-    np.random.seed(1234)
-
-    train_samples = 100
-    test_samples = 50
-
-    (x_train, y_train), (x_test, y_test) = get_test_data(num_train=train_samples,
-                                                         num_test=test_samples,
-                                                         input_shape=(input_dim,),
-                                                         classification=True,
-                                                         num_classes=num_classes)
-    y_test = np_utils.to_categorical(y_test)
-    y_train = np_utils.to_categorical(y_train)
-    return (x_train, y_train), (x_test, y_test)
-
-
-def test_sequential_fit_generator():
-    (x_train, y_train), (x_test, y_test) = _get_test_data()
-
-    def data_generator(train):
-        if train:
-            max_batch_index = len(x_train) // batch_size
-        else:
-            max_batch_index = len(x_test) // batch_size
-        i = 0
-        while 1:
-            if train:
-                yield (x_train[i * batch_size: (i + 1) * batch_size],
-                       y_train[i * batch_size: (i + 1) * batch_size])
-            else:
-                yield (x_test[i * batch_size: (i + 1) * batch_size],
-                       y_test[i * batch_size: (i + 1) * batch_size])
-            i += 1
-            i = i % max_batch_index
-
-    model = Sequential()
-    model.add(Dense(num_hidden, input_shape=(input_dim,)))
-    model.add(Activation('relu'))
-    model.add(Dense(num_classes))
-    model.pop()
-    model.add(Dense(num_classes))
-    model.add(Activation('softmax'))
-    model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
-
-    model.fit_generator(data_generator(True), 5, epochs)
-    model.fit_generator(data_generator(True), 5, epochs,
-                        validation_data=(x_test, y_test))
-    model.fit_generator(data_generator(True), 5, epochs,
-                        validation_data=data_generator(False),
-                        validation_steps=3)
-    model.fit_generator(data_generator(True), 5, epochs, max_queue_size=2)
-    model.evaluate(x_train, y_train)
-
-
-def test_sequential(in_tmpdir):
-    (x_train, y_train), (x_test, y_test) = _get_test_data()
-
-    # TODO: factor out
-    def data_generator(x, y, batch_size=50):
-        index_array = np.arange(len(x))
-        while 1:
-            batches = make_batches(len(x_test), batch_size)
-            for batch_index, (batch_start, batch_end) in enumerate(batches):
-                batch_ids = index_array[batch_start:batch_end]
-                x_batch = x[batch_ids]
-                y_batch = y[batch_ids]
-                yield (x_batch, y_batch)
-
-    model = Sequential()
-    model.add(Dense(num_hidden, input_shape=(input_dim,)))
-    model.add(Activation('relu'))
-    model.add(Dense(num_classes))
-    model.add(Activation('softmax'))
-    model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
-
-    model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1,
-              validation_data=(x_test, y_test))
-    model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=2,
-              validation_split=0.1)
-    model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=0)
-    model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1,
-              shuffle=False)
-
-    model.train_on_batch(x_train[:32], y_train[:32])
-
-    loss_np = model.evaluate(x_test, y_test)
-    predict_np = model.predict(x_test)
-
-    generator_pred_np = model.predict_generator(
-        data_generator(x_test, y_test), 1,
-        max_queue_size=2, verbose=1)
-    generator_loss_np = model.evaluate_generator(
-        data_generator(x_test, y_test, 50), 1,
-        max_queue_size=2)
-
-    assert_allclose(loss_np, generator_loss_np, atol=1e-5)
-    assert_allclose(predict_np, generator_pred_np, atol=1e-5)
-
-    model.predict(x_test, verbose=0)
-    model.predict_classes(x_test, verbose=0)
-    model.predict_proba(x_test, verbose=0)
-
-    fname = 'test_sequential_temp.h5'
-    model.save_weights(fname, overwrite=True)
-    model = Sequential()
-    model.add(Dense(num_hidden, input_shape=(input_dim,)))
-    model.add(Activation('relu'))
-    model.add(Dense(num_classes))
-    model.add(Activation('softmax'))
-    model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
-    model.load_weights(fname)
-    os.remove(fname)
-
-    nloss = model.evaluate(x_test, y_test, verbose=0)
-    assert(loss_np == nloss)
-
-    # Test serialization
-    config = model.get_config()
-    assert 'name' in config
-    new_model = Sequential.from_config(config)
-    assert new_model.weights  # Model should be built.
-
-    model.summary()
-    json_str = model.to_json()
-    model_from_json(json_str)
-
-    yaml_str = model.to_yaml()
-    model_from_yaml(yaml_str)
-
-
-def test_nested_sequential(in_tmpdir):
-    (x_train, y_train), (x_test, y_test) = _get_test_data()
-
-    inner = Sequential()
-    inner.add(Dense(num_hidden, input_shape=(input_dim,)))
-    inner.add(Activation('relu'))
-    inner.add(Dense(num_classes))
-
-    middle = Sequential()
-    middle.add(inner)
-
-    model = Sequential()
-    model.add(middle)
-    model.add(Activation('softmax'))
-    model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
-
-    model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1,
-              validation_data=(x_test, y_test))
-    model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=2,
-              validation_split=0.1)
-    model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=0)
-    model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1,
-              shuffle=False)
-
-    model.train_on_batch(x_train[:32], y_train[:32])
-
-    loss = model.evaluate(x_test, y_test, verbose=0)
-
-    model.predict(x_test, verbose=0)
-    model.predict_classes(x_test, verbose=0)
-    model.predict_proba(x_test, verbose=0)
-
-    fname = 'test_nested_sequential_temp.h5'
-    model.save_weights(fname, overwrite=True)
-
-    inner = Sequential()
-    inner.add(Dense(num_hidden, input_shape=(input_dim,)))
-    inner.add(Activation('relu'))
-    inner.add(Dense(num_classes))
-
-    middle = Sequential()
-    middle.add(inner)
-
-    model = Sequential()
-    model.add(middle)
-    model.add(Activation('softmax'))
-    model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
-    model.load_weights(fname)
-    os.remove(fname)
-
-    nloss = model.evaluate(x_test, y_test, verbose=0)
-    assert(loss == nloss)
-
-    # Test serialization
-    config = model.get_config()
-    Sequential.from_config(config)
-
-    model.summary()
-    json_str = model.to_json()
-    model_from_json(json_str)
-
-    yaml_str = model.to_yaml()
-    model_from_yaml(yaml_str)
-
-
-def test_sequential_count_params():
-    input_dim = 20
-    num_units = 10
-    num_classes = 2
-
-    n = input_dim * num_units + num_units
-    n += num_units * num_units + num_units
-    n += num_units * num_classes + num_classes
-
-    model = Sequential()
-    model.add(Dense(num_units, input_shape=(input_dim,)))
-    model.add(Dense(num_units))
-    model.add(Dense(num_classes))
-    model.add(Activation('softmax'))
-    model.build()
-
-    assert(n == model.count_params())
-
-    model.compile('sgd', 'binary_crossentropy')
-    assert(n == model.count_params())
-
-
-def test_nested_sequential_trainability():
-    input_dim = 20
-    num_units = 10
-    num_classes = 2
-
-    inner_model = Sequential()
-    inner_model.add(Dense(num_units, input_shape=(input_dim,)))
-
-    model = Sequential()
-    model.add(inner_model)
-    model.add(Dense(num_classes))
-
-    assert len(model.trainable_weights) == 4
-    inner_model.trainable = False
-    assert len(model.trainable_weights) == 2
-    inner_model.trainable = True
-    assert len(model.trainable_weights) == 4
-
-
-def test_rebuild_model():
-    model = Sequential()
-    model.add(Dense(128, input_shape=(784,)))
-    model.add(Dense(64))
-    assert(model.get_layer(index=-1).output_shape == (None, 64))
-
-    model.add(Dense(32))
-    assert(model.get_layer(index=-1).output_shape == (None, 32))
-
-
-def test_clone_functional_model():
-    val_a = np.random.random((10, 4))
-    val_b = np.random.random((10, 4))
-    val_out = np.random.random((10, 4))
-
-    input_a = keras.Input(shape=(4,))
-    input_b = keras.Input(shape=(4,))
-    dense_1 = keras.layers.Dense(4)
-    dense_2 = keras.layers.Dense(4)
-
-    x_a = dense_1(input_a)
-    x_a = keras.layers.Dropout(0.5)(x_a)
-    x_a = keras.layers.BatchNormalization()(x_a)
-    x_b = dense_1(input_b)
-    x_a = dense_2(x_a)
-    outputs = keras.layers.add([x_a, x_b])
-    model = keras.models.Model([input_a, input_b], outputs)
-
-    if K.backend() == 'tensorflow':
-        # Everything should work in a new session.
-        K.clear_session()
-
-    # With placeholder creation
-    new_model = keras.models.clone_model(model)
-    new_model.compile('rmsprop', 'mse')
-    new_model.train_on_batch([val_a, val_b], val_out)
-
-    # On top of new tensors
-    input_a = keras.Input(shape=(4,), name='a')
-    input_b = keras.Input(shape=(4,), name='b')
-    new_model = keras.models.clone_model(
-        model, input_tensors=[input_a, input_b])
-    new_model.compile('rmsprop', 'mse')
-    new_model.train_on_batch([val_a, val_b], val_out)
-
-    # # On top of new, non-Keras tensors
-    # input_a = keras.backend.variable(val_a)
-    # input_b = keras.backend.variable(val_b)
-    # new_model = keras.models.clone_model(
-    #     model, input_tensors=[input_a, input_b])
-    # new_model.compile('rmsprop', 'mse')
-    # new_model.train_on_batch(None, val_out)
-
-
-def test_clone_functional_model_with_multi_outputs():
-    input_layer = keras.Input(shape=(4,))
-
-    # Layer with single input and multiple outputs
-    layer1 = keras.layers.Lambda(lambda x: [x + 1, x],
-                                 lambda shapes: [shapes, shapes])
-    x_a, x_b = layer1(input_layer)
-
-    class SwapLayer(keras.layers.Layer):
-        def call(self, inputs, **kwargs):
-            return [inputs[1], inputs[0]]
-
-        def compute_output_shape(self, input_shape):
-            return [input_shape[1], input_shape[0]]
-
-    # Layer with multiple inputs and outputs
-    x_a, x_b = SwapLayer()([x_a, x_b])
-    model = keras.Model(inputs=[input_layer], outputs=[x_a, x_b])
-    new_model = keras.models.clone_model(model)
-
-    x_test = np.random.random((10, 4))
-    pred_a, pred_b = model.predict(x_test)
-    pred_new_a, pred_new_b = new_model.predict(x_test)
-    assert(pred_a.all() == pred_new_a.all())
-    assert(pred_b.all() == pred_new_b.all())
-
-
-def test_clone_sequential_model():
-    val_a = np.random.random((10, 4))
-    val_out = np.random.random((10, 4))
-
-    model = keras.models.Sequential()
-    model.add(keras.layers.Dense(4, input_shape=(4,)))
-    model.add(keras.layers.BatchNormalization())
-    model.add(keras.layers.Dropout(0.5))
-    model.add(keras.layers.Dense(4))
-
-    if K.backend() == 'tensorflow':
-        # Everything should work in a new session.
-        K.clear_session()
-
-    # With placeholder creation
-    new_model = keras.models.clone_model(model)
-    new_model.compile('rmsprop', 'mse')
-    new_model.train_on_batch(val_a, val_out)
-
-    # On top of new tensor
-    input_a = keras.Input(shape=(4,))
-    new_model = keras.models.clone_model(
-        model, input_tensors=input_a)
-    new_model.compile('rmsprop', 'mse')
-    new_model.train_on_batch(val_a, val_out)
-
-    # # On top of new, non-Keras tensor
-    # input_a = keras.backend.variable(val_a)
-    # new_model = keras.models.clone_model(
-    #     model, input_tensors=input_a)
-    # new_model.compile('rmsprop', 'mse')
-    # new_model.train_on_batch(None, val_out)
-
-
-def test_sequential_update_disabling():
-    val_a = np.random.random((10, 4))
-    val_out = np.random.random((10, 4))
-
-    model = keras.models.Sequential()
-    model.add(keras.layers.BatchNormalization(input_shape=(4,)))
-
-    model.trainable = False
-    assert not model.updates
-
-    model.compile('sgd', 'mse')
-    assert not model.updates
-
-    x1 = model.predict(val_a)
-    model.train_on_batch(val_a, val_out)
-    x2 = model.predict(val_a)
-    assert_allclose(x1, x2, atol=1e-7)
-
-    model.trainable = True
-    model.compile('sgd', 'mse')
-    assert model.updates
-
-    model.train_on_batch(val_a, val_out)
-    x2 = model.predict(val_a)
-    assert np.abs(np.sum(x1 - x2)) > 1e-5
-
-
-def test_sequential_deferred_build():
-    model = keras.models.Sequential()
-    model.add(keras.layers.Dense(3))
-    model.add(keras.layers.Dense(3))
-    model.compile('sgd', 'mse')
-
-    assert model.built is False
-    assert len(model.layers) == 2
-
-    model.train_on_batch(
-        np.random.random((2, 4)), np.random.random((2, 3)))
-
-    assert model.built is True
-    assert len(model.layers) == 2
-    assert len(model.weights) == 4
-
-    # Test serialization
-    config = model.get_config()
-    assert 'name' in config
-    new_model = Sequential.from_config(config)
-    assert new_model.built is True
-    assert len(new_model.layers) == 2
-    assert len(new_model.weights) == 4
-
-
-def test_nested_sequential_deferred_build():
-    inner_model = keras.models.Sequential()
-    inner_model.add(keras.layers.Dense(3))
-    inner_model.add(keras.layers.Dense(3))
-
-    model = keras.models.Sequential()
-    model.add(inner_model)
-    model.add(keras.layers.Dense(5))
-    model.compile('sgd', 'mse')
-
-    assert inner_model.built is False
-    assert len(inner_model.layers) == 2
-    assert model.built is False
-    assert len(model.layers) == 2
-
-    model.train_on_batch(
-        np.random.random((2, 4)), np.random.random((2, 5)))
-
-    assert inner_model.built is True
-    assert len(inner_model.layers) == 2
-    assert len(inner_model.weights) == 4
-    assert model.built is True
-    assert len(model.layers) == 2
-    assert len(model.weights) == 6
-
-    config = model.get_config()
-    new_model = keras.models.Sequential.from_config(config)
-    assert new_model.built is True
-    assert len(new_model.layers) == 2
-    assert len(new_model.weights) == 6
-
-    new_inner_model = new_model.layers[0]
-    assert new_inner_model.built is True
-    assert len(new_inner_model.layers) == 2
-    assert len(new_inner_model.weights) == 4
-
-
-if __name__ == '__main__':
-    pytest.main([__file__])
diff --git a/tests/keras/utils/conv_utils_test.py b/tests/keras/utils/conv_utils_test.py
deleted file mode 100644
index 2288f2f05b0..00000000000
--- a/tests/keras/utils/conv_utils_test.py
+++ /dev/null
@@ -1,77 +0,0 @@
-import pytest
-import numpy as np
-from keras.utils import conv_utils
-from keras import backend as K
-
-
-def test_normalize_tuple():
-    assert conv_utils.normalize_tuple(5, 2, 'kernel_size') == (5, 5)
-    assert conv_utils.normalize_tuple([7, 9], 2, 'kernel_size') == (7, 9)
-
-    with pytest.raises(ValueError):
-        conv_utils.normalize_tuple(None, 2, 'kernel_size')
-    with pytest.raises(ValueError):
-        conv_utils.normalize_tuple([2, 3, 4], 2, 'kernel_size')
-    with pytest.raises(ValueError):
-        conv_utils.normalize_tuple(['str', 'impossible'], 2, 'kernel_size')
-
-
-def test_invalid_padding():
-    with pytest.raises(ValueError):
-        conv_utils.normalize_padding('diagonal')
-
-
-def test_invalid_convert_kernel():
-    with pytest.raises(ValueError):
-        conv_utils.convert_kernel(np.zeros((10, 20)))
-
-
-def test_conv_output_length():
-    assert conv_utils.conv_output_length(None, 7, 'same', 1) is None
-    assert conv_utils.conv_output_length(224, 7, 'same', 1) == 224
-    assert conv_utils.conv_output_length(224, 7, 'same', 2) == 112
-    assert conv_utils.conv_output_length(32, 5, 'valid', 1) == 28
-    assert conv_utils.conv_output_length(32, 5, 'valid', 2) == 14
-    assert conv_utils.conv_output_length(32, 5, 'causal', 1) == 32
-    assert conv_utils.conv_output_length(32, 5, 'causal', 2) == 16
-    assert conv_utils.conv_output_length(32, 5, 'full', 1) == 36
-    assert conv_utils.conv_output_length(32, 5, 'full', 2) == 18
-
-    with pytest.raises(AssertionError):
-        conv_utils.conv_output_length(32, 5, 'diagonal', 2)
-
-
-def test_conv_input_length():
-    assert conv_utils.conv_input_length(None, 7, 'same', 1) is None
-    assert conv_utils.conv_input_length(112, 7, 'same', 1) == 112
-    assert conv_utils.conv_input_length(112, 7, 'same', 2) == 223
-    assert conv_utils.conv_input_length(28, 5, 'valid', 1) == 32
-    assert conv_utils.conv_input_length(14, 5, 'valid', 2) == 31
-    assert conv_utils.conv_input_length(36, 5, 'full', 1) == 32
-    assert conv_utils.conv_input_length(18, 5, 'full', 2) == 31
-
-    with pytest.raises(AssertionError):
-        conv_utils.conv_output_length(18, 5, 'diagonal', 2)
-
-
-def test_deconv_length():
-    assert conv_utils.deconv_length(None, 1, 7, 'same', None) is None
-    assert conv_utils.deconv_length(224, 1, 7, 'same', None) == 224
-    assert conv_utils.deconv_length(224, 2, 7, 'same', None) == 448
-    assert conv_utils.deconv_length(32, 1, 5, 'valid', None) == 36
-    assert conv_utils.deconv_length(32, 2, 5, 'valid', None) == 67
-    assert conv_utils.deconv_length(32, 1, 5, 'full', None) == 28
-    assert conv_utils.deconv_length(32, 2, 5, 'full', None) == 59
-    assert conv_utils.deconv_length(224, 1, 7, 'same', 0) == 224
-    assert conv_utils.deconv_length(224, 2, 7, 'same', 0) == 447
-    assert conv_utils.deconv_length(224, 2, 7, 'same', 1) == 448
-    assert conv_utils.deconv_length(32, 1, 5, 'valid', 0) == 36
-    assert conv_utils.deconv_length(32, 2, 5, 'valid', 0) == 67
-    assert conv_utils.deconv_length(32, 2, 5, 'valid', 1) == 68
-    assert conv_utils.deconv_length(6, 1, 3, 'full', 0) == 4
-    assert conv_utils.deconv_length(6, 2, 3, 'full', 1) == 10
-    assert conv_utils.deconv_length(6, 2, 3, 'full', 2) == 11
-
-
-if __name__ == '__main__':
-    pytest.main([__file__])
diff --git a/tests/keras/utils/data_utils_test.py b/tests/keras/utils/data_utils_test.py
deleted file mode 100644
index 5b539f33b31..00000000000
--- a/tests/keras/utils/data_utils_test.py
+++ /dev/null
@@ -1,542 +0,0 @@
-"""Tests for functions in data_utils.py.
-"""
-import os
-import time
-import sys
-import tarfile
-import threading
-import signal
-import shutil
-import zipfile
-from itertools import cycle
-import multiprocessing as mp
-import numpy as np
-import pytest
-import six
-from six.moves.urllib.parse import urljoin
-from six.moves.urllib.request import pathname2url
-from six.moves import reload_module
-
-from flaky import flaky
-
-from keras.utils import GeneratorEnqueuer
-from keras.utils import OrderedEnqueuer
-from keras.utils import Sequence
-from tensorflow.python.keras.utils.data_utils import _hash_file
-from keras.utils.data_utils import get_file
-from tensorflow.python.keras.utils.data_utils import validate_file
-from keras import backend as K
-
-pytestmark = pytest.mark.skipif(
-    six.PY2 and 'TRAVIS_PYTHON_VERSION' in os.environ,
-    reason='Temporarily disabled until the use_multiprocessing problem is solved')
-
-skip_generators = pytest.mark.skipif(K.backend() in {'tensorflow', 'cntk'} and
-                                     'TRAVIS_PYTHON_VERSION' in os.environ,
-                                     reason='Generators do not work with `spawn`.')
-
-
-def use_spawn(func):
-    """Decorator which uses `spawn` when possible.
-    This is useful on Travis to avoid memory issues.
-    """
-
-    @six.wraps(func)
-    def wrapper(*args, **kwargs):
-        if sys.version_info > (3, 4) and os.name != 'nt':
-            mp.set_start_method('spawn', force=True)
-            out = func(*args, **kwargs)
-            mp.set_start_method('fork', force=True)
-        else:
-            out = func(*args, **kwargs)
-        return out
-
-    return wrapper
-
-
-if sys.version_info < (3,):
-    def next(x):
-        return x.next()
-
-
-@pytest.fixture
-def in_tmpdir(tmpdir):
-    """Runs a function in a temporary directory.
-
-    Checks that the directory is empty afterwards.
-    """
-    with tmpdir.as_cwd():
-        yield None
-    assert not tmpdir.listdir()
-
-
-def test_data_utils(in_tmpdir):
-    """Tests get_file from a url, plus extraction and validation.
-    """
-    dirname = 'data_utils'
-
-    with open('test.txt', 'w') as text_file:
-        text_file.write('Float like a butterfly, sting like a bee.')
-
-    with tarfile.open('test.tar.gz', 'w:gz') as tar_file:
-        tar_file.add('test.txt')
-
-    with zipfile.ZipFile('test.zip', 'w') as zip_file:
-        zip_file.write('test.txt')
-
-    origin = urljoin('file://', pathname2url(os.path.abspath('test.tar.gz')))
-
-    path = get_file(dirname, origin, untar=True)
-    filepath = path + '.tar.gz'
-    data_keras_home = os.path.dirname(os.path.dirname(os.path.abspath(filepath)))
-    os.remove(filepath)
-
-    _keras_home = os.path.join(os.path.abspath('.'), '.keras')
-    if not os.path.exists(_keras_home):
-        os.makedirs(_keras_home)
-    os.environ['KERAS_HOME'] = _keras_home
-    path = get_file(dirname, origin, untar=True)
-    filepath = path + '.tar.gz'
-    data_keras_home = os.path.dirname(os.path.dirname(os.path.abspath(filepath)))
-    os.environ.pop('KERAS_HOME')
-    shutil.rmtree(_keras_home)
-
-    path = get_file(dirname, origin, untar=True)
-    filepath = path + '.tar.gz'
-    hashval_sha256 = _hash_file(filepath)
-    hashval_md5 = _hash_file(filepath, algorithm='md5')
-    path = get_file(dirname, origin, md5_hash=hashval_md5, untar=True)
-    path = get_file(filepath, origin, file_hash=hashval_sha256, extract=True)
-    assert os.path.exists(filepath)
-    assert validate_file(filepath, hashval_sha256)
-    assert validate_file(filepath, hashval_md5)
-    os.remove(filepath)
-    os.remove('test.tar.gz')
-
-    origin = urljoin('file://', pathname2url(os.path.abspath('test.zip')))
-
-    hashval_sha256 = _hash_file('test.zip')
-    hashval_md5 = _hash_file('test.zip', algorithm='md5')
-    path = get_file(dirname, origin, md5_hash=hashval_md5, extract=True)
-    path = get_file(dirname, origin, file_hash=hashval_sha256, extract=True)
-    assert os.path.exists(path)
-    assert validate_file(path, hashval_sha256)
-    assert validate_file(path, hashval_md5)
-
-    os.remove(path)
-    os.remove(os.path.join(os.path.dirname(path), 'test.txt'))
-    os.remove('test.txt')
-    os.remove('test.zip')
-
-
-"""Enqueuers Tests"""
-
-
-class threadsafe_iter:
-    """Takes an iterator/generator and makes it thread-safe by
-    serializing call to the `next` method of given iterator/generator.
-    """
-
-    def __init__(self, it):
-        self.it = it
-        self.lock = threading.Lock()
-
-    def __iter__(self):
-        return self
-
-    def __next__(self):
-        return self.next()
-
-    def next(self):
-        with self.lock:
-            return next(self.it)
-
-
-def threadsafe_generator(f):
-    """A decorator that takes a generator function and makes it thread-safe.
-    """
-
-    def g(*a, **kw):
-        return threadsafe_iter(f(*a, **kw))
-
-    return g
-
-
-class DummySequence(Sequence):
-    def __init__(self, shape, value=1.0):
-        self.shape = shape
-        self.inner = value
-
-    def __getitem__(self, item):
-        time.sleep(0.05)
-        return np.ones(self.shape, dtype=np.uint32) * item * self.inner
-
-    def __len__(self):
-        return 100
-
-    def on_epoch_end(self):
-        self.inner *= 5.0
-
-
-class LengthChangingSequence(Sequence):
-    def __init__(self, shape, size=100, value=1.0):
-        self.shape = shape
-        self.inner = value
-        self.size = size
-
-    def __getitem__(self, item):
-        time.sleep(0.05)
-        return np.ones(self.shape, dtype=np.uint32) * item * self.inner
-
-    def __len__(self):
-        return self.size
-
-    def on_epoch_end(self):
-        self.size = int(np.ceil(self.size / 2))
-        self.inner *= 5.0
-
-
-class FaultSequence(Sequence):
-    def __getitem__(self, item):
-        raise IndexError(item, 'is not present')
-
-    def __len__(self):
-        return 100
-
-    def on_epoch_end(self):
-        pass
-
-
-class SlowSequence(Sequence):
-    def __init__(self, shape, value=1.0):
-        self.shape = shape
-        self.inner = value
-        self.wait = True
-
-    def __getitem__(self, item):
-        if self.wait:
-            self.wait = False
-            time.sleep(40)
-        return np.ones(self.shape, dtype=np.uint32) * item * self.inner
-
-    def __len__(self):
-        return 10
-
-    def on_epoch_end(self):
-        pass
-
-
-@threadsafe_generator
-def create_generator_from_sequence_threads(ds):
-    for i in cycle(range(len(ds))):
-        yield ds[i]
-
-
-def create_generator_from_sequence_pcs(ds):
-    for i in cycle(range(len(ds))):
-        yield ds[i]
-
-
-def test_generator_enqueuer_threads():
-    enqueuer = GeneratorEnqueuer(create_generator_from_sequence_threads(
-        DummySequence([3, 10, 10, 3])), use_multiprocessing=False)
-    enqueuer.start(3, 10)
-    gen_output = enqueuer.get()
-    acc = []
-    for i in range(100):
-        acc.append(int(next(gen_output)[0, 0, 0, 0]))
-
-    """
-     Not comparing the order since it is not guaranteed.
-     It may get ordered, but not a lot, one thread can take
-     the GIL before he was supposed to.
-    """
-    assert len(set(acc) - set(range(100))) == 0, "Output is not the same"
-    enqueuer.stop()
-
-
-@skip_generators
-def DISABLED_test_generator_enqueuer_processes():
-    enqueuer = GeneratorEnqueuer(create_generator_from_sequence_pcs(
-        DummySequence([3, 10, 10, 3])), use_multiprocessing=True)
-    enqueuer.start(3, 10)
-    gen_output = enqueuer.get()
-    acc = []
-    for i in range(100):
-        acc.append(int(next(gen_output)[0, 0, 0, 0]))
-    assert acc != list(range(100)), ('Order was keep in GeneratorEnqueuer '
-                                     'with processes')
-    enqueuer.stop()
-
-
-def test_generator_enqueuer_threadsafe():
-    enqueuer = GeneratorEnqueuer(create_generator_from_sequence_pcs(
-        DummySequence([3, 10, 10, 3])), use_multiprocessing=False)
-    enqueuer.start(3, 10)
-    gen_output = enqueuer.get()
-    with pytest.raises(RuntimeError) as e:
-        [next(gen_output) for _ in range(10)]
-    assert 'thread-safe' in str(e.value)
-    enqueuer.stop()
-
-
-# TODO: resolve flakyness issue. Tracked with #11587
-@flaky(rerun_filter=lambda err, *args: issubclass(err[0], StopIteration))
-def test_generator_enqueuer_fail_threads():
-    enqueuer = GeneratorEnqueuer(create_generator_from_sequence_threads(
-        FaultSequence()), use_multiprocessing=False)
-    enqueuer.start(3, 10)
-    gen_output = enqueuer.get()
-    with pytest.raises(IndexError):
-        next(gen_output)
-
-
-@skip_generators
-def DISABLED_test_generator_enqueuer_fail_processes():
-    enqueuer = GeneratorEnqueuer(create_generator_from_sequence_pcs(
-        FaultSequence()), use_multiprocessing=True)
-    enqueuer.start(3, 10)
-    gen_output = enqueuer.get()
-    with pytest.raises(IndexError):
-        next(gen_output)
-
-
-def test_ordered_enqueuer_threads():
-    enqueuer = OrderedEnqueuer(DummySequence([3, 10, 10, 3]),
-                               use_multiprocessing=False)
-    enqueuer.start(3, 10)
-    gen_output = enqueuer.get()
-    acc = []
-    for i in range(100):
-        acc.append(next(gen_output)[0, 0, 0, 0])
-    assert acc == list(range(100)), ('Order was not keep in GeneratorEnqueuer '
-                                     'with threads')
-    enqueuer.stop()
-
-
-def test_ordered_enqueuer_threads_not_ordered():
-    enqueuer = OrderedEnqueuer(DummySequence([3, 10, 10, 3]),
-                               use_multiprocessing=False,
-                               shuffle=True)
-    enqueuer.start(3, 10)
-    gen_output = enqueuer.get()
-    acc = []
-    for i in range(100):
-        acc.append(next(gen_output)[0, 0, 0, 0])
-    assert acc != list(range(100)), ('Order was not keep in GeneratorEnqueuer '
-                                     'with threads')
-    enqueuer.stop()
-
-
-@use_spawn
-def test_ordered_enqueuer_processes():
-    enqueuer = OrderedEnqueuer(DummySequence([3, 10, 10, 3]),
-                               use_multiprocessing=True)
-    enqueuer.start(3, 10)
-    gen_output = enqueuer.get()
-    acc = []
-    for i in range(100):
-        acc.append(next(gen_output)[0, 0, 0, 0])
-    assert acc == list(range(100)), ('Order was not keep in GeneratorEnqueuer '
-                                     'with processes')
-    enqueuer.stop()
-
-
-def test_ordered_enqueuer_fail_threads():
-    enqueuer = OrderedEnqueuer(FaultSequence(), use_multiprocessing=False)
-    enqueuer.start(3, 10)
-    gen_output = enqueuer.get()
-    with pytest.raises(IndexError):
-        next(gen_output)
-
-
-def DISABLED_test_ordered_enqueuer_timeout_threads():
-    enqueuer = OrderedEnqueuer(SlowSequence([3, 10, 10, 3]),
-                               use_multiprocessing=False)
-
-    def handler(signum, frame):
-        raise TimeoutError('Sequence deadlocked')
-
-    old = signal.signal(signal.SIGALRM, handler)
-    signal.setitimer(signal.ITIMER_REAL, 60)
-    with pytest.warns(UserWarning) as record:
-        enqueuer.start(5, 10)
-        gen_output = enqueuer.get()
-        for epoch_num in range(2):
-            acc = []
-            for i in range(10):
-                acc.append(next(gen_output)[0, 0, 0, 0])
-            assert acc == list(range(10)), 'Order was not keep in ' \
-                                           'OrderedEnqueuer with threads'
-        enqueuer.stop()
-    assert len(record) == 1
-    assert str(record[0].message) == 'The input 0 could not be retrieved. ' \
-                                     'It could be because a worker has died.'
-    signal.setitimer(signal.ITIMER_REAL, 0)
-    signal.signal(signal.SIGALRM, old)
-
-
-@use_spawn
-def test_on_epoch_end_processes():
-    enqueuer = OrderedEnqueuer(DummySequence([3, 10, 10, 3]),
-                               use_multiprocessing=True)
-    enqueuer.start(3, 10)
-    gen_output = enqueuer.get()
-    acc = []
-    for i in range(200):
-        acc.append(next(gen_output)[0, 0, 0, 0])
-    assert acc[100:] == list([k * 5 for k in range(100)]), (
-        'Order was not keep in GeneratorEnqueuer with processes')
-    enqueuer.stop()
-
-
-@use_spawn
-def test_context_switch():
-    enqueuer = OrderedEnqueuer(DummySequence([3, 10, 10, 3]),
-                               use_multiprocessing=True)
-    enqueuer2 = OrderedEnqueuer(DummySequence([3, 10, 10, 3], value=15),
-                                use_multiprocessing=True)
-    enqueuer.start(3, 10)
-    enqueuer2.start(3, 10)
-    gen_output = enqueuer.get()
-    gen_output2 = enqueuer2.get()
-    acc = []
-    for i in range(100):
-        acc.append(next(gen_output)[0, 0, 0, 0])
-    assert acc[-1] == 99
-    # One epoch is completed so enqueuer will switch the Sequence
-
-    acc = []
-    for i in range(100):
-        acc.append(next(gen_output2)[0, 0, 0, 0])
-    assert acc[-1] == 99 * 15
-    # One epoch has been completed so enqueuer2 will switch
-
-    # Be sure that both Sequence were updated
-    assert next(gen_output)[0, 0, 0, 0] == 0
-    assert next(gen_output)[0, 0, 0, 0] == 5
-    assert next(gen_output2)[0, 0, 0, 0] == 0
-    assert next(gen_output2)[0, 0, 0, 0] == 15 * 5
-
-    # Tear down everything
-    enqueuer.stop()
-    enqueuer2.stop()
-
-
-def DISABLED_test_on_epoch_end_threads():
-    enqueuer = OrderedEnqueuer(DummySequence([3, 10, 10, 3]),
-                               use_multiprocessing=False)
-    enqueuer.start(3, 10)
-    gen_output = enqueuer.get()
-    acc = []
-    for i in range(100):
-        acc.append(next(gen_output)[0, 0, 0, 0])
-    acc = []
-    for i in range(100):
-        acc.append(next(gen_output)[0, 0, 0, 0])
-    assert acc == list([k * 5 for k in range(100)]), (
-        'Order was not keep in GeneratorEnqueuer with processes')
-    enqueuer.stop()
-
-
-def DISABLED_test_on_epoch_end_threads_sequence_change_length():
-    seq = LengthChangingSequence([3, 10, 10, 3])
-    enqueuer = OrderedEnqueuer(seq,
-                               use_multiprocessing=False)
-    enqueuer.start(3, 10)
-    gen_output = enqueuer.get()
-    acc = []
-    for i in range(100):
-        acc.append(next(gen_output)[0, 0, 0, 0])
-    assert acc == list(range(100)), ('Order was not keep in GeneratorEnqueuer '
-                                     'with threads')
-
-    enqueuer.join_end_of_epoch()
-    assert len(seq) == 50
-    acc = []
-    for i in range(50):
-        acc.append(next(gen_output)[0, 0, 0, 0])
-    assert acc == list([k * 5 for k in range(50)]), (
-        'Order was not keep in GeneratorEnqueuer with processes')
-    enqueuer.stop()
-
-
-@use_spawn
-def test_ordered_enqueuer_fail_processes():
-    enqueuer = OrderedEnqueuer(FaultSequence(), use_multiprocessing=True)
-    enqueuer.start(3, 10)
-    gen_output = enqueuer.get()
-    with pytest.raises(IndexError):
-        next(gen_output)
-
-
-@threadsafe_generator
-def create_finite_generator_from_sequence_threads(ds):
-    for i in range(len(ds)):
-        yield ds[i]
-
-
-def create_finite_generator_from_sequence_pcs(ds):
-    for i in range(len(ds)):
-        yield ds[i]
-
-
-# TODO: resolve flakyness issue. Tracked with #11586
-@flaky(rerun_filter=lambda err, *args: issubclass(err[0], AssertionError))
-def test_finite_generator_enqueuer_threads():
-    enqueuer = GeneratorEnqueuer(create_finite_generator_from_sequence_threads(
-        DummySequence([3, 10, 10, 3])), use_multiprocessing=False)
-    enqueuer.start(3, 10)
-    gen_output = enqueuer.get()
-    acc = []
-    for output in gen_output:
-        acc.append(int(output[0, 0, 0, 0]))
-    assert set(acc) == set(range(100)), "Output is not the same"
-    enqueuer.stop()
-
-
-@skip_generators
-def DISABLED_test_finite_generator_enqueuer_processes():
-    enqueuer = GeneratorEnqueuer(create_finite_generator_from_sequence_pcs(
-        DummySequence([3, 10, 10, 3])), use_multiprocessing=True)
-    enqueuer.start(3, 10)
-    gen_output = enqueuer.get()
-    acc = []
-    for output in gen_output:
-        acc.append(int(output[0, 0, 0, 0]))
-    assert acc != list(range(100)), ('Order was keep in GeneratorEnqueuer '
-                                     'with processes')
-    enqueuer.stop()
-
-
-@pytest.mark.skipif('TRAVIS_PYTHON_VERSION' in os.environ,
-                    reason='Takes 150s to run')
-def DISABLED_test_missing_inputs():
-    missing_idx = 10
-
-    class TimeOutSequence(DummySequence):
-        def __getitem__(self, item):
-            if item == missing_idx:
-                time.sleep(120)
-            return super(TimeOutSequence, self).__getitem__(item)
-
-    enqueuer = GeneratorEnqueuer(create_finite_generator_from_sequence_pcs(
-        TimeOutSequence([3, 2, 2, 3])), use_multiprocessing=True)
-    enqueuer.start(3, 10)
-    gen_output = enqueuer.get()
-    with pytest.warns(UserWarning, match='An input could not be retrieved.'):
-        for _ in range(4 * missing_idx):
-            next(gen_output)
-
-    enqueuer = OrderedEnqueuer(TimeOutSequence([3, 2, 2, 3]),
-                               use_multiprocessing=True)
-    enqueuer.start(3, 10)
-    gen_output = enqueuer.get()
-    warning_msg = "The input {} could not be retrieved.".format(missing_idx)
-    with pytest.warns(UserWarning, match=warning_msg):
-        for _ in range(11):
-            next(gen_output)
-
-
-if __name__ == '__main__':
-    pytest.main([__file__])
diff --git a/tests/keras/utils/generic_utils_test.py b/tests/keras/utils/generic_utils_test.py
deleted file mode 100644
index eea2ac9fbdb..00000000000
--- a/tests/keras/utils/generic_utils_test.py
+++ /dev/null
@@ -1,83 +0,0 @@
-import sys
-import pytest
-import numpy as np
-import marshal
-from keras.utils.generic_utils import custom_object_scope
-from keras.utils.generic_utils import has_arg
-from keras.utils.generic_utils import Progbar
-from keras import activations
-from keras import regularizers
-
-
-def test_progbar():
-    values_s = [None,
-                [['key1', 1], ['key2', 1e-4]],
-                [['key3', 1], ['key2', 1e-4]]]
-
-    for target in (len(values_s) - 1, None):
-        for verbose in (0, 1, 2):
-            bar = Progbar(target, width=30, verbose=verbose, interval=0.05)
-            for current, values in enumerate(values_s):
-                bar.update(current, values=values)
-
-
-def test_custom_objects_scope():
-
-    def custom_fn():
-        pass
-
-    class CustomClass(object):
-        pass
-
-    with custom_object_scope({'CustomClass': CustomClass,
-                              'custom_fn': custom_fn}):
-        act = activations.get('custom_fn')
-        assert act == custom_fn
-        cl = regularizers.get('CustomClass')
-        assert cl.__class__ == CustomClass
-
-
-@pytest.mark.parametrize('fn, name, accept_all, expected', [
-    ('f(x)', 'x', False, True),
-    ('f(x)', 'y', False, False),
-    ('f(x)', 'y', True, False),
-    ('f(x, y)', 'y', False, True),
-    ('f(x, y=1)', 'y', False, True),
-    ('f(x, **kwargs)', 'x', False, True),
-    ('f(x, **kwargs)', 'y', False, False),
-    ('f(x, **kwargs)', 'y', True, True),
-    ('f(x, y=1, **kwargs)', 'y', False, True),
-    # Keyword-only arguments (Python 3 only)
-    ('f(x, *args, y=1)', 'y', False, True),
-    ('f(x, *args, y=1)', 'z', True, False),
-    ('f(x, *, y=1)', 'x', False, True),
-    ('f(x, *, y=1)', 'y', False, True),
-    # lambda
-    (lambda x: x, 'x', False, True),
-    (lambda x: x, 'y', False, False),
-    (lambda x: x, 'y', True, False),
-])
-def test_has_arg(fn, name, accept_all, expected):
-    if isinstance(fn, str):
-        context = dict()
-        try:
-            exec('def {}: pass'.format(fn), context)
-        except SyntaxError:
-            if sys.version_info >= (3,):
-                raise
-            pytest.skip('Function is not compatible with Python 2')
-        # Sometimes exec adds builtins to the context
-        context.pop('__builtins__', None)
-        fn, = context.values()
-
-    assert has_arg(fn, name, accept_all) is expected
-
-
-@pytest.mark.xfail(sys.version_info < (3, 3),
-                   reason='inspect API does not reveal positional-only arguments')
-def test_has_arg_positional_only():
-    assert has_arg(pow, 'x') is False
-
-
-if __name__ == '__main__':
-    pytest.main([__file__])
diff --git a/tests/keras/utils/layer_utils_test.py b/tests/keras/utils/layer_utils_test.py
deleted file mode 100644
index dc02f6b9ea6..00000000000
--- a/tests/keras/utils/layer_utils_test.py
+++ /dev/null
@@ -1,64 +0,0 @@
-import pytest
-import numpy as np
-from numpy.testing import assert_allclose
-from keras import backend as K
-from keras.layers import Conv2D
-from keras.layers import Dense
-from keras.layers import Flatten
-from keras.models import Sequential
-from keras.utils import layer_utils
-
-
-def DISABLED_test_convert_weights():
-    def get_model(shape, data_format):
-        model = Sequential()
-        model.add(Conv2D(filters=2,
-                         kernel_size=(4, 3),
-                         input_shape=shape,
-                         data_format=data_format))
-        model.add(Flatten())
-        model.add(Dense(5))
-        return model
-
-    for data_format in ['channels_first', 'channels_last']:
-        if data_format == 'channels_first':
-            shape = (3, 5, 5)
-            target_shape = (5, 5, 3)
-            prev_shape = (2, 3, 2)
-            flip = lambda x: np.flip(np.flip(x, axis=2), axis=3)
-            transpose = lambda x: np.transpose(x, (0, 2, 3, 1))
-            target_data_format = 'channels_last'
-        elif data_format == 'channels_last':
-            shape = (5, 5, 3)
-            target_shape = (3, 5, 5)
-            prev_shape = (2, 2, 3)
-            flip = lambda x: np.flip(np.flip(x, axis=1), axis=2)
-            transpose = lambda x: np.transpose(x, (0, 3, 1, 2))
-            target_data_format = 'channels_first'
-
-        model1 = get_model(shape, data_format)
-        model2 = get_model(target_shape, target_data_format)
-        conv = K.function([model1.input], [model1.layers[0].output])
-
-        x = np.random.random((1,) + shape)
-
-        # Test equivalence of convert_all_kernels_in_model
-        convout1 = conv([x])[0]
-        layer_utils.convert_all_kernels_in_model(model1)
-        convout2 = flip(conv([flip(x)])[0])
-
-        assert_allclose(convout1, convout2, atol=1e-5)
-
-        # Test equivalence of convert_dense_weights_data_format
-        out1 = model1.predict(x)
-        layer_utils.convert_dense_weights_data_format(
-            model1.layers[2], prev_shape, target_data_format)
-        for (src, dst) in zip(model1.layers, model2.layers):
-            dst.set_weights(src.get_weights())
-        out2 = model2.predict(transpose(x))
-
-        assert_allclose(out1, out2, atol=1e-5)
-
-
-if __name__ == '__main__':
-    pytest.main([__file__])
diff --git a/tests/keras/utils/np_utils_test.py b/tests/keras/utils/np_utils_test.py
deleted file mode 100644
index 3cc48f7db1d..00000000000
--- a/tests/keras/utils/np_utils_test.py
+++ /dev/null
@@ -1,33 +0,0 @@
-"""Tests for functions in np_utils.py.
-"""
-import numpy as np
-import pytest
-from keras.utils import to_categorical
-
-
-def test_to_categorical():
-    num_classes = 5
-    shapes = [(1,), (3,), (4, 3), (5, 4, 3), (3, 1), (3, 2, 1)]
-    expected_shapes = [(1, num_classes),
-                       (3, num_classes),
-                       (4, 3, num_classes),
-                       (5, 4, 3, num_classes),
-                       (3, num_classes),
-                       (3, 2, num_classes)]
-    labels = [np.random.randint(0, num_classes, shape) for shape in shapes]
-    one_hots = [to_categorical(label, num_classes) for label in labels]
-    for label, one_hot, expected_shape in zip(labels,
-                                              one_hots,
-                                              expected_shapes):
-        # Check shape
-        assert one_hot.shape == expected_shape
-        # Make sure there are only 0s and 1s
-        assert np.array_equal(one_hot, one_hot.astype(bool))
-        # Make sure there is exactly one 1 in a row
-        assert np.all(one_hot.sum(axis=-1) == 1)
-        # Get original labels back from one hots
-        assert np.all(np.argmax(one_hot, -1).reshape(label.shape) == label)
-
-
-if __name__ == '__main__':
-    pytest.main([__file__])
diff --git a/tests/keras/utils/vis_utils_test.py b/tests/keras/utils/vis_utils_test.py
deleted file mode 100644
index 670391cb43c..00000000000
--- a/tests/keras/utils/vis_utils_test.py
+++ /dev/null
@@ -1,56 +0,0 @@
-import pytest
-import os
-import sys
-import numpy as np
-from keras import Input, Model
-
-from keras.layers import Conv2D, Bidirectional
-from keras.layers import Dense
-from keras.layers import Embedding
-from keras.layers import Flatten
-from keras.layers import LSTM
-from keras.layers import TimeDistributed
-from keras.models import Sequential
-from keras.utils import vis_utils
-
-
-def test_plot_model():
-    model = Sequential()
-    model.add(Conv2D(2, kernel_size=(2, 3), input_shape=(3, 5, 5), name='conv'))
-    model.add(Flatten(name='flat'))
-    model.add(Dense(5, name='dense1'))
-    vis_utils.plot_model(model, to_file='model1.png', show_layer_names=False)
-    os.remove('model1.png')
-
-    model = Sequential()
-    model.add(LSTM(16, return_sequences=True, input_shape=(2, 3), name='lstm'))
-    model.add(TimeDistributed(Dense(5, name='dense2')))
-    vis_utils.plot_model(model, to_file='model2.png', show_shapes=True)
-    os.remove('model2.png')
-
-    inner_input = Input(shape=(2, 3), dtype='float32', name='inner_input')
-    inner_lstm = Bidirectional(LSTM(16, name='inner_lstm'), name='bd')(inner_input)
-    encoder = Model(inner_input, inner_lstm, name='Encoder_Model')
-    outer_input = Input(shape=(5, 2, 3), dtype='float32', name='input')
-    inner_encoder = TimeDistributed(encoder, name='td_encoder')(outer_input)
-    lstm = LSTM(16, name='outer_lstm')(inner_encoder)
-    preds = Dense(5, activation='softmax', name='predictions')(lstm)
-    model = Model(outer_input, preds)
-    vis_utils.plot_model(model, to_file='model3.png', show_shapes=True,
-                         expand_nested=True, dpi=300)
-    os.remove('model3.png')
-
-
-def test_plot_sequential_embedding():
-    """Fixes #11376"""
-    model = Sequential()
-    model.add(Embedding(10000, 256, input_length=400, name='embed'))
-    vis_utils.plot_model(model,
-                         to_file='model1.png',
-                         show_shapes=True,
-                         show_layer_names=True)
-    os.remove('model1.png')
-
-
-if __name__ == '__main__':
-    pytest.main([__file__])
diff --git a/tests/keras/wrappers/scikit_learn_test.py b/tests/keras/wrappers/scikit_learn_test.py
deleted file mode 100644
index 624c95a67ef..00000000000
--- a/tests/keras/wrappers/scikit_learn_test.py
+++ /dev/null
@@ -1,204 +0,0 @@
-import pytest
-import numpy as np
-
-from keras.utils.test_utils import get_test_data
-
-from keras.models import Sequential
-from keras.layers.core import Dense, Activation
-from keras.wrappers.scikit_learn import KerasClassifier, KerasRegressor
-
-input_dim = 5
-hidden_dims = 5
-num_train = 100
-num_test = 50
-num_classes = 3
-batch_size = 32
-epochs = 1
-verbosity = 0
-optim = 'adam'
-loss = 'categorical_crossentropy'
-
-np.random.seed(42)
-(X_train, y_train), (X_test, y_test) = get_test_data(
-    num_train=num_train, num_test=num_test, input_shape=(input_dim,),
-    classification=True, num_classes=num_classes)
-
-
-def build_fn_clf(hidden_dims):
-    model = Sequential()
-    model.add(Dense(input_dim, input_shape=(input_dim,)))
-    model.add(Activation('relu'))
-    model.add(Dense(hidden_dims))
-    model.add(Activation('relu'))
-    model.add(Dense(num_classes))
-    model.add(Activation('softmax'))
-    model.compile(optimizer='sgd', loss='categorical_crossentropy',
-                  metrics=['accuracy'])
-    return model
-
-
-def test_classify_build_fn():
-    clf = KerasClassifier(
-        build_fn=build_fn_clf, hidden_dims=hidden_dims,
-        batch_size=batch_size, epochs=epochs)
-
-    assert_classification_works(clf)
-    assert_string_classification_works(clf)
-
-
-def test_classify_class_build_fn():
-    class ClassBuildFnClf(object):
-
-        def __call__(self, hidden_dims):
-            return build_fn_clf(hidden_dims)
-
-    clf = KerasClassifier(
-        build_fn=ClassBuildFnClf(), hidden_dims=hidden_dims,
-        batch_size=batch_size, epochs=epochs)
-
-    assert_classification_works(clf)
-    assert_string_classification_works(clf)
-
-
-def test_classify_inherit_class_build_fn():
-    class InheritClassBuildFnClf(KerasClassifier):
-
-        def __call__(self, hidden_dims):
-            return build_fn_clf(hidden_dims)
-
-    clf = InheritClassBuildFnClf(
-        build_fn=None, hidden_dims=hidden_dims,
-        batch_size=batch_size, epochs=epochs)
-
-    assert_classification_works(clf)
-    assert_string_classification_works(clf)
-
-
-def assert_classification_works(clf):
-    clf.fit(X_train, y_train, sample_weight=np.ones(X_train.shape[0]),
-            batch_size=batch_size, epochs=epochs)
-
-    score = clf.score(X_train, y_train, batch_size=batch_size)
-    assert np.isscalar(score) and np.isfinite(score)
-
-    preds = clf.predict(X_test, batch_size=batch_size)
-    assert preds.shape == (num_test, )
-    for prediction in np.unique(preds):
-        assert prediction in range(num_classes)
-
-    proba = clf.predict_proba(X_test, batch_size=batch_size)
-    assert proba.shape == (num_test, num_classes)
-    assert np.allclose(np.sum(proba, axis=1), np.ones(num_test))
-
-
-def assert_string_classification_works(clf):
-    string_classes = ['cls{}'.format(x) for x in range(num_classes)]
-    str_y_train = np.array(string_classes)[y_train]
-
-    clf.fit(X_train, str_y_train, batch_size=batch_size, epochs=epochs)
-
-    score = clf.score(X_train, str_y_train, batch_size=batch_size)
-    assert np.isscalar(score) and np.isfinite(score)
-
-    preds = clf.predict(X_test, batch_size=batch_size)
-    assert preds.shape == (num_test, )
-    for prediction in np.unique(preds):
-        assert prediction in string_classes
-
-    proba = clf.predict_proba(X_test, batch_size=batch_size)
-    assert proba.shape == (num_test, num_classes)
-    assert np.allclose(np.sum(proba, axis=1), np.ones(num_test))
-
-
-def build_fn_reg(hidden_dims=50):
-    model = Sequential()
-    model.add(Dense(input_dim, input_shape=(input_dim,)))
-    model.add(Activation('relu'))
-    model.add(Dense(hidden_dims))
-    model.add(Activation('relu'))
-    model.add(Dense(1))
-    model.add(Activation('linear'))
-    model.compile(optimizer='sgd', loss='mean_absolute_error',
-                  metrics=['accuracy'])
-    return model
-
-
-def test_regression_build_fn():
-    reg = KerasRegressor(
-        build_fn=build_fn_reg, hidden_dims=hidden_dims,
-        batch_size=batch_size, epochs=epochs)
-
-    assert_regression_works(reg)
-
-
-def test_regression_class_build_fn():
-    class ClassBuildFnReg(object):
-
-        def __call__(self, hidden_dims):
-            return build_fn_reg(hidden_dims)
-
-    reg = KerasRegressor(
-        build_fn=ClassBuildFnReg(), hidden_dims=hidden_dims,
-        batch_size=batch_size, epochs=epochs)
-
-    assert_regression_works(reg)
-
-
-def test_regression_inherit_class_build_fn():
-    class InheritClassBuildFnReg(KerasRegressor):
-
-        def __call__(self, hidden_dims):
-            return build_fn_reg(hidden_dims)
-
-    reg = InheritClassBuildFnReg(
-        build_fn=None, hidden_dims=hidden_dims,
-        batch_size=batch_size, epochs=epochs)
-
-    assert_regression_works(reg)
-
-
-def assert_regression_works(reg):
-    reg.fit(X_train, y_train, batch_size=batch_size, epochs=epochs)
-
-    score = reg.score(X_train, y_train, batch_size=batch_size)
-    assert np.isscalar(score) and np.isfinite(score)
-
-    preds = reg.predict(X_test, batch_size=batch_size)
-    assert preds.shape == (num_test, )
-
-
-def DISABLED_test_regression_predict_shape_correct_num_test_0():
-    assert_regression_predict_shape_correct(num_test=0)
-
-
-def DISABLED_test_regression_predict_shape_correct_num_test_1():
-    assert_regression_predict_shape_correct(num_test=1)
-
-
-def assert_regression_predict_shape_correct(num_test):
-    reg = KerasRegressor(
-        build_fn=build_fn_reg, hidden_dims=hidden_dims,
-        batch_size=batch_size, epochs=epochs)
-    reg.fit(X_train, y_train, batch_size=batch_size, epochs=epochs)
-
-    preds = reg.predict(X_test[:num_test], batch_size=batch_size)
-    assert preds.shape == (num_test, )
-
-
-if __name__ == '__main__':
-    pytest.main([__file__])
-
-# Usage of sklearn's grid_search
-# from sklearn import grid_search
-# parameters = dict(hidden_dims = [20, 30], batch_size=[64, 128],
-#                   epochs=[2], verbose=[0])
-# classifier = Inherit_class_build_fn_clf()
-# clf = grid_search.GridSearchCV(classifier, parameters)
-# clf.fit(X_train, y_train)
-# parameters = dict(hidden_dims = [20, 30], batch_size=[64, 128],
-#                   epochs=[2], verbose=[0])
-# regressor = Inherit_class_build_fn_reg()
-# reg = grid_search.GridSearchCV(regressor, parameters,
-#                                scoring='mean_squared_error',
-#                                n_jobs=1, cv=2, verbose=2)
-# reg.fit(X_train_reg, y_train_reg)
diff --git a/tests/test_api.py b/tests/test_api.py
deleted file mode 100644
index 67ddbc29252..00000000000
--- a/tests/test_api.py
+++ /dev/null
@@ -1,32 +0,0 @@
-import pytest
-import pyux
-import keras
-import json
-import os
-
-import keras.backend.tensorflow_backend
-import keras.backend.theano_backend
-import keras.backend.cntk_backend
-import keras.backend.numpy_backend
-import keras.utils.test_utils
-
-
-def test_api():
-    api_file = os.path.join(os.getcwd(), 'api.json')
-    with open(api_file, 'r') as f:
-        previous_api = json.load(f)
-    current_api = pyux.sign(keras)
-    diff = pyux.diff(current_api, previous_api)
-
-    exceptions = [
-        pyux.ADDED_ARG_WITH_DEFAULT_IN_METHOD,
-        pyux.ADDED_DEFAULT_IN_METHOD
-    ]
-
-    diff = list(filter(lambda c: c[0] not in exceptions, diff))
-    if diff:
-        raise pyux.APIChangedException(diff)
-
-
-if __name__ == '__main__':
-    test_api()
diff --git a/tests/test_dynamic_trainability.py b/tests/test_dynamic_trainability.py
deleted file mode 100644
index 2d2a9b47c0f..00000000000
--- a/tests/test_dynamic_trainability.py
+++ /dev/null
@@ -1,110 +0,0 @@
-from __future__ import absolute_import
-from __future__ import print_function
-import pytest
-
-from keras.models import Model, Sequential
-from keras.layers import Dense, Input
-
-
-def test_layer_trainability_switch():
-    # with constructor argument, in Sequential
-    model = Sequential()
-    model.add(Dense(2, trainable=False, input_dim=1))
-    assert model.trainable_weights == []
-
-    # by setting the `trainable` argument, in Sequential
-    model = Sequential()
-    layer = Dense(2, input_dim=1)
-    model.add(layer)
-    assert model.trainable_weights == layer.trainable_weights
-    layer.trainable = False
-    assert model.trainable_weights == []
-
-    # with constructor argument, in Model
-    x = Input(shape=(1,))
-    y = Dense(2, trainable=False)(x)
-    model = Model(x, y)
-    assert model.trainable_weights == []
-
-    # by setting the `trainable` argument, in Model
-    x = Input(shape=(1,))
-    layer = Dense(2)
-    y = layer(x)
-    model = Model(x, y)
-    assert model.trainable_weights == layer.trainable_weights
-    layer.trainable = False
-    assert model.trainable_weights == []
-
-
-def test_model_trainability_switch():
-    # a non-trainable model has no trainable weights
-    x = Input(shape=(1,))
-    y = Dense(2)(x)
-    model = Model(x, y)
-    model.trainable = False
-    assert model.trainable_weights == []
-
-    # same for Sequential
-    model = Sequential()
-    model.add(Dense(2, input_dim=1))
-    model.trainable = False
-    assert model.trainable_weights == []
-
-
-def test_nested_model_trainability():
-    # a Sequential inside a Model
-    inner_model = Sequential()
-    inner_model.add(Dense(2, input_dim=1))
-
-    x = Input(shape=(1,))
-    y = inner_model(x)
-    outer_model = Model(x, y)
-    assert outer_model.trainable_weights == inner_model.trainable_weights
-    inner_model.trainable = False
-    assert outer_model.trainable_weights == []
-    inner_model.trainable = True
-    inner_model.layers[-1].trainable = False
-    assert outer_model.trainable_weights == []
-
-    # a Sequential inside a Sequential
-    inner_model = Sequential()
-    inner_model.add(Dense(2, input_dim=1))
-    outer_model = Sequential()
-    outer_model.add(inner_model)
-    assert outer_model.trainable_weights == inner_model.trainable_weights
-    inner_model.trainable = False
-    assert outer_model.trainable_weights == []
-    inner_model.trainable = True
-    inner_model.layers[-1].trainable = False
-    assert outer_model.trainable_weights == []
-
-    # a Model inside a Model
-    x = Input(shape=(1,))
-    y = Dense(2)(x)
-    inner_model = Model(x, y)
-    x = Input(shape=(1,))
-    y = inner_model(x)
-    outer_model = Model(x, y)
-    assert outer_model.trainable_weights == inner_model.trainable_weights
-    inner_model.trainable = False
-    assert outer_model.trainable_weights == []
-    inner_model.trainable = True
-    inner_model.layers[-1].trainable = False
-    assert outer_model.trainable_weights == []
-
-    # a Model inside a Sequential
-    x = Input(shape=(1,))
-    y = Dense(2)(x)
-    inner_model = Model(x, y)
-    outer_model = Sequential()
-    outer_model.add(inner_model)
-    assert outer_model.trainable_weights == inner_model.trainable_weights
-    inner_model.trainable = False
-    assert outer_model.trainable_weights == []
-    inner_model.trainable = True
-    inner_model.layers[-1].trainable = False
-    assert outer_model.trainable_weights == []
-
-
-if __name__ == '__main__':
-    pytest.main([__file__])
diff --git a/tests/test_loss_masking.py b/tests/test_loss_masking.py
deleted file mode 100644
index 118224962a7..00000000000
--- a/tests/test_loss_masking.py
+++ /dev/null
@@ -1,37 +0,0 @@
-import numpy as np
-import pytest
-
-from keras.models import Sequential
-from keras.layers import TimeDistributed, Masking, Dense
-from keras import losses
-from keras import backend as K
-
-
-def create_masking_model():
-    model = Sequential()
-    model.add(Masking(mask_value=0, input_shape=(None, 1)))
-    model.add(TimeDistributed(Dense(1, kernel_initializer='one')))
-    model.compile(loss='mse', optimizer='sgd')
-    return model
-
-
-def test_masking():
-    np.random.seed(1337)
-    x = np.array([[[1], [1]],
-                  [[0], [0]]])
-    model = create_masking_model()
-    y = np.array([[[1], [1]],
-                  [[1], [1]]])
-    loss = model.train_on_batch(x, y)
-    assert loss == 0
-
-
-def test_masking_is_all_zeros():
-    x = y = np.array([[[0], [0]]])
-    model = create_masking_model()
-    loss = model.train_on_batch(x, y)
-    assert loss == 0
-
-
-if __name__ == '__main__':
-    pytest.main([__file__])
diff --git a/tests/test_loss_weighting.py b/tests/test_loss_weighting.py
deleted file mode 100644
index 67207a491b7..00000000000
--- a/tests/test_loss_weighting.py
+++ /dev/null
@@ -1,174 +0,0 @@
-from __future__ import absolute_import
-from __future__ import print_function
-import pytest
-import numpy as np
-
-from keras import backend as K
-from keras.utils.test_utils import get_test_data
-from keras.models import Sequential, Model
-from keras.layers import Dense, Activation, GRU, TimeDistributed, Input
-from keras.utils import np_utils
-from numpy.testing import assert_almost_equal, assert_array_almost_equal
-
-num_classes = 10
-batch_size = 128
-epochs = 15
-weighted_class = 5
-high_weight = 10
-train_samples = 5000
-test_samples = 1000
-timesteps = 3
-input_dim = 10
-loss = 'mse'
-standard_weight = 1
-standard_score_sequential = 0.5
-
-decimal_precision = {
-    'cntk': 2,
-    'theano': 6,
-    'tensorflow': 6
-}
-
-
-def _get_test_data():
-    np.random.seed(1337)
-    (x_train, y_train), (x_test, y_test) = get_test_data(num_train=train_samples,
-                                                         num_test=test_samples,
-                                                         input_shape=(input_dim,),
-                                                         classification=True,
-                                                         num_classes=num_classes)
-    int_y_test = y_test.copy()
-    int_y_train = y_train.copy()
-    # convert class vectors to binary class matrices
-    y_train = np_utils.to_categorical(y_train, num_classes)
-    y_test = np_utils.to_categorical(y_test, num_classes)
-    test_ids = np.where(int_y_test == np.array(weighted_class))[0]
-
-    class_weight = dict([(i, standard_weight) for i in range(num_classes)])
-    class_weight[weighted_class] = high_weight
-
-    sample_weight = np.ones((y_train.shape[0])) * standard_weight
-    sample_weight[int_y_train == weighted_class] = high_weight
-
-    return ((x_train, y_train), (x_test, y_test),
-            (sample_weight, class_weight, test_ids))
-
-
-def create_sequential_model():
-    model = Sequential()
-    model.add(Dense(32, input_shape=(input_dim,)))
-    model.add(Activation('relu'))
-    model.add(Dense(num_classes))
-    model.add(Activation('softmax'))
-    return model
-
-
-def create_temporal_sequential_model():
-    model = Sequential()
-    model.add(GRU(32, input_shape=(timesteps, input_dim), return_sequences=True))
-    model.add(TimeDistributed(Dense(num_classes)))
-    model.add(Activation('softmax'))
-    return model
-
-
-def test_sequential_class_weights():
-    model = create_sequential_model()
-    model.compile(loss=loss, optimizer='rmsprop')
-
-    ((x_train, y_train), (x_test, y_test),
-     (sample_weight, class_weight, test_ids)) = _get_test_data()
-
-    model.fit(x_train, y_train, batch_size=batch_size,
-              epochs=epochs // 3, verbose=0,
-              class_weight=class_weight,
-              validation_data=(x_train, y_train, sample_weight))
-    model.fit(x_train, y_train, batch_size=batch_size,
-              epochs=epochs // 2, verbose=0,
-              class_weight=class_weight)
-    model.fit(x_train, y_train, batch_size=batch_size,
-              epochs=epochs // 2, verbose=0,
-              class_weight=class_weight,
-              validation_split=0.1)
-
-    model.train_on_batch(x_train[:32], y_train[:32],
-                         class_weight=class_weight)
-    score = model.evaluate(x_test[test_ids, :], y_test[test_ids, :], verbose=0)
-    assert(score < standard_score_sequential)
-
-
-def test_sequential_sample_weights():
-    model = create_sequential_model()
-    model.compile(loss=loss, optimizer='rmsprop')
-
-    ((x_train, y_train), (x_test, y_test),
-     (sample_weight, class_weight, test_ids)) = _get_test_data()
-
-    model.fit(x_train, y_train, batch_size=batch_size,
-              epochs=epochs // 3, verbose=0,
-              sample_weight=sample_weight)
-    model.fit(x_train, y_train, batch_size=batch_size,
-              epochs=epochs // 3, verbose=0,
-              sample_weight=sample_weight,
-              validation_split=0.1)
-
-    model.train_on_batch(x_train[:32], y_train[:32],
-                         sample_weight=sample_weight[:32])
-    model.test_on_batch(x_train[:32], y_train[:32],
-                        sample_weight=sample_weight[:32])
-    score = model.evaluate(x_test[test_ids, :], y_test[test_ids, :], verbose=0)
-    assert(score < standard_score_sequential)
-
-
-def test_sequential_temporal_sample_weights():
-    ((x_train, y_train), (x_test, y_test),
-     (sample_weight, class_weight, test_ids)) = _get_test_data()
-
-    temporal_x_train = np.reshape(x_train, (len(x_train), 1, x_train.shape[1]))
-    temporal_x_train = np.repeat(temporal_x_train, timesteps, axis=1)
-    temporal_x_test = np.reshape(x_test, (len(x_test), 1, x_test.shape[1]))
-    temporal_x_test = np.repeat(temporal_x_test, timesteps, axis=1)
-
-    temporal_y_train = np.reshape(y_train, (len(y_train), 1, y_train.shape[1]))
-    temporal_y_train = np.repeat(temporal_y_train, timesteps, axis=1)
-    temporal_y_test = np.reshape(y_test, (len(y_test), 1, y_test.shape[1]))
-    temporal_y_test = np.repeat(temporal_y_test, timesteps, axis=1)
-
-    temporal_sample_weight = np.reshape(sample_weight, (len(sample_weight), 1))
-    temporal_sample_weight = np.repeat(temporal_sample_weight, timesteps, axis=1)
-
-    model = create_temporal_sequential_model()
-    model.compile(loss=loss, optimizer='rmsprop',
-                  sample_weight_mode='temporal')
-
-    model.fit(temporal_x_train, temporal_y_train, batch_size=batch_size,
-              epochs=epochs // 3, verbose=0,
-              sample_weight=temporal_sample_weight)
-    model.fit(temporal_x_train, temporal_y_train, batch_size=batch_size,
-              epochs=epochs // 3, verbose=0,
-              sample_weight=temporal_sample_weight,
-              validation_split=0.1)
-
-    model.train_on_batch(temporal_x_train[:32], temporal_y_train[:32],
-                         sample_weight=temporal_sample_weight[:32])
-    model.test_on_batch(temporal_x_train[:32], temporal_y_train[:32],
-                        sample_weight=temporal_sample_weight[:32])
-    score = model.evaluate(temporal_x_test[test_ids], temporal_y_test[test_ids],
-                           verbose=0)
-    assert(score < standard_score_sequential)
-
-
-def test_class_weight_wrong_classes():
-    model = create_sequential_model()
-    model.compile(loss=loss, optimizer='rmsprop')
-
-    ((x_train, y_train), (x_test, y_test),
-     (sample_weight, class_weight, test_ids)) = _get_test_data()
-
-    del class_weight[1]
-    with pytest.raises(ValueError):
-        model.fit(x_train, y_train,
-                  epochs=0, verbose=0, class_weight=class_weight)
-
-
-if __name__ == '__main__':
-    pytest.main([__file__])
diff --git a/tests/test_model_saving.py b/tests/test_model_saving.py
deleted file mode 100644
index cb5458c78ed..00000000000
--- a/tests/test_model_saving.py
+++ /dev/null
@@ -1,675 +0,0 @@
-import io
-import pytest
-import os
-import h5py
-import tempfile
-import warnings
-from contextlib import contextmanager
-import numpy as np
-from numpy.testing import assert_allclose
-from numpy.testing import assert_raises
-
-from keras import backend as K
-from keras.models import Model, Sequential
-from keras.layers import Dense, Lambda, RepeatVector, TimeDistributed
-from keras.layers import Bidirectional, GRU, LSTM
-from keras.layers import Conv2D, Flatten, Activation
-from keras.layers import Input, InputLayer
-from keras.initializers import Constant
-from keras import optimizers
-from keras import losses
-from keras import metrics
-from keras.models import save_model, load_model
-try:
-    from unittest.mock import patch
-except:
-    from mock import patch
-
-
-skipif_no_tf_gpu = True
-
-
-def test_sequential_model_saving():
-    model = Sequential()
-    model.add(Dense(2, input_shape=(3,)))
-    model.add(RepeatVector(3))
-    model.add(TimeDistributed(Dense(3)))
-    model.compile(loss=losses.MeanSquaredError(),
-                  optimizer=optimizers.RMSprop(lr=0.0001),
-                  metrics=[metrics.categorical_accuracy],
-                  sample_weight_mode='temporal')
-    x = np.random.random((1, 3))
-    y = np.random.random((1, 3, 3))
-    model.train_on_batch(x, y)
-
-    out = model.predict(x)
-
-    _, fname = tempfile.mkstemp('.h5')
-    save_model(model, fname)
-    new_model = load_model(fname)
-    os.remove(fname)
-
-    x2 = np.random.random((1, 3))
-    y2 = np.random.random((1, 3, 3))
-    model.train_on_batch(x2, y2)
-    out_2 = model.predict(x2)
-
-    new_out = new_model.predict(x)
-    assert_allclose(out, new_out, atol=1e-05)
-    # test that new updates are the same with both models
-    new_model.train_on_batch(x2, y2)
-    new_out_2 = new_model.predict(x2)
-    assert_allclose(out_2, new_out_2, atol=1e-05)
-
-
-def test_sequential_model_saving_2():
-    # test with custom optimizer, loss
-    custom_opt = optimizers.RMSprop
-    custom_loss = losses.mse
-    model = Sequential()
-    model.add(Dense(2, input_shape=(3,)))
-    model.add(Dense(3))
-    model.compile(loss=custom_loss, optimizer=custom_opt(), metrics=['acc'])
-
-    x = np.random.random((1, 3))
-    y = np.random.random((1, 3))
-    model.train_on_batch(x, y)
-    out = model.predict(x)
-
-    load_kwargs = {'custom_objects': {'custom_opt': custom_opt,
-                                      'custom_loss': custom_loss}}
-    _, fname = tempfile.mkstemp('.h5')
-    save_model(model, fname)
-    new_model = load_model(fname, **load_kwargs)
-    os.remove(fname)
-
-    new_out = new_model.predict(x)
-    assert_allclose(out, new_out, atol=1e-05)
-
-
-def _get_sample_model_and_input():
-    inputs = Input(shape=(3,))
-    x = Dense(2)(inputs)
-    outputs = Dense(3)(x)
-
-    model = Model(inputs, outputs)
-    model.compile(loss=losses.MSE,
-                  optimizer=optimizers.Adam(),
-                  metrics=[metrics.categorical_accuracy])
-    x = np.random.random((1, 3))
-    y = np.random.random((1, 3))
-    model.train_on_batch(x, y)
-
-    return model, x
-
-
-def test_functional_model_saving():
-    model, x = _get_sample_model_and_input()
-    out = model.predict(x)
-    _, fname = tempfile.mkstemp('.h5')
-    save_model(model, fname)
-    new_model = load_model(fname)
-    os.remove(fname)
-
-    new_out = new_model.predict(x)
-    assert_allclose(out, new_out, atol=1e-05)
-
-
-def DISABLED_test_model_saving_to_pre_created_h5py_file():
-    model, x = _get_sample_model_and_input()
-
-    out = model.predict(x)
-    _, fname = tempfile.mkstemp('.h5')
-    with h5py.File(fname, mode='r+') as h5file:
-        save_model(model, h5file)
-        loaded_model = load_model(h5file)
-        out2 = loaded_model.predict(x)
-    assert_allclose(out, out2, atol=1e-05)
-
-    # test non-default options in h5
-    with h5py.File('does not matter', driver='core',
-                   backing_store=False) as h5file:
-        save_model(model, h5file)
-        loaded_model = load_model(h5file)
-        out2 = loaded_model.predict(x)
-    assert_allclose(out, out2, atol=1e-05)
-
-    with h5py.File(fname, mode='r+') as h5file:
-        g = h5file.create_group('model')
-        save_model(model, g)
-        loaded_model = load_model(g)
-        out2 = loaded_model.predict(x)
-    assert_allclose(out, out2, atol=1e-05)
-
-
-@contextmanager
-def temp_filename(filename):
-    """Context that returns a temporary filename and deletes the file on exit if
-    it still exists (so that this is not forgotten).
-    """
-    _, temp_fname = tempfile.mkstemp(filename)
-    yield temp_fname
-    if os.path.exists(temp_fname):
-        os.remove(temp_fname)
-
-
-def DISABLED_test_model_saving_to_binary_stream():
-    model, x = _get_sample_model_and_input()
-    out = model.predict(x)
-
-    with temp_filename('h5') as fname:
-        # save directly to binary file
-        with open(fname, 'wb') as raw_file:
-            save_model(model, raw_file)
-        # Load the data the usual way, and make sure the model is intact.
-        with h5py.File(fname, mode='r') as h5file:
-            loaded_model = load_model(h5file)
-    out2 = loaded_model.predict(x)
-    assert_allclose(out, out2, atol=1e-05)
-
-
-def DISABLED_test_model_loading_from_binary_stream():
-    model, x = _get_sample_model_and_input()
-    out = model.predict(x)
-
-    with temp_filename('h5') as fname:
-        # save the model the usual way
-        with h5py.File(fname, mode='w') as h5file:
-            save_model(model, h5file)
-        # Load the data binary, and make sure the model is intact.
-        with open(fname, 'rb') as raw_file:
-            loaded_model = load_model(raw_file)
-    out2 = loaded_model.predict(x)
-    assert_allclose(out, out2, atol=1e-05)
-
-
-def DISABLED_test_model_save_load_binary_in_memory():
-    model, x = _get_sample_model_and_input()
-    out = model.predict(x)
-
-    stream = io.BytesIO()
-    save_model(model, stream)
-    stream.seek(0)
-    loaded_model = load_model(stream)
-    out2 = loaded_model.predict(x)
-    assert_allclose(out, out2, atol=1e-05)
-
-
-def test_saving_multiple_metrics_outputs():
-    inputs = Input(shape=(5,))
-    x = Dense(5)(inputs)
-    output1 = Dense(1, name='output1')(x)
-    output2 = Dense(1, name='output2')(x)
-
-    model = Model(inputs=inputs, outputs=[output1, output2])
-
-    metrics = {'output1': ['mse', 'binary_accuracy'],
-               'output2': ['mse', 'binary_accuracy']
-               }
-    loss = {'output1': 'mse', 'output2': 'mse'}
-
-    model.compile(loss=loss, optimizer='sgd', metrics=metrics)
-
-    # assure that model is working
-    x = np.array([[1, 1, 1, 1, 1]])
-    out = model.predict(x)
-    _, fname = tempfile.mkstemp('.h5')
-    save_model(model, fname)
-
-    model = load_model(fname)
-    os.remove(fname)
-
-    out2 = model.predict(x)
-    assert_allclose(out, out2, atol=1e-05)
-
-
-def test_saving_without_compilation():
-    """Test saving model without compiling.
-    """
-    model = Sequential()
-    model.add(Dense(2, input_shape=(3,)))
-    model.add(Dense(3))
-
-    _, fname = tempfile.mkstemp('.h5')
-    save_model(model, fname)
-    model = load_model(fname)
-    os.remove(fname)
-
-
-def test_saving_right_after_compilation():
-    model = Sequential()
-    model.add(Dense(2, input_shape=(3,)))
-    model.add(Dense(3))
-    model.compile(loss='mse', optimizer='sgd', metrics=['acc'])
-
-    _, fname = tempfile.mkstemp('.h5')
-    save_model(model, fname)
-    model = load_model(fname)
-    os.remove(fname)
-
-
-def test_saving_unused_layers_is_ok():
-    a = Input(shape=(256, 512, 6))
-    b = Input(shape=(256, 512, 1))
-    c = Lambda(lambda x: x[:, :, :, :1])(a)
-
-    model = Model(inputs=[a, b], outputs=c)
-
-    _, fname = tempfile.mkstemp('.h5')
-    save_model(model, fname)
-    load_model(fname)
-    os.remove(fname)
-
-
-def test_loading_weights_by_name_2():
-    """
-    test loading model weights by name on:
-        - both sequential and functional api models
-        - different architecture with shared names
-    """
-    custom_loss = losses.mse
-
-    # sequential model
-    model = Sequential()
-    model.add(Dense(2, input_shape=(3,), name='rick'))
-    model.add(Dense(3, name='morty'))
-    model.compile(loss=custom_loss, optimizer='rmsprop', metrics=['acc'])
-
-    x = np.random.random((1, 3))
-    y = np.random.random((1, 3))
-    model.train_on_batch(x, y)
-
-    out = model.predict(x)
-    old_weights = [layer.get_weights() for layer in model.layers]
-    _, fname = tempfile.mkstemp('.h5')
-
-    model.save_weights(fname)
-
-    # delete and recreate model using Functional API
-    del(model)
-    data = Input(shape=(3,))
-    rick = Dense(2, name='rick')(data)
-    jerry = Dense(3, name='jerry')(rick)  # add 2 layers (but maintain shapes)
-    jessica = Dense(2, name='jessica')(jerry)
-    morty = Dense(3, name='morty')(jessica)
-
-    model = Model(inputs=[data], outputs=[morty])
-    model.compile(loss=custom_loss, optimizer='rmsprop', metrics=['acc'])
-
-    # load weights from first model
-    model.load_weights(fname, by_name=True)
-    os.remove(fname)
-
-    out2 = model.predict(x)
-    assert np.max(np.abs(out - out2)) > 1e-05
-
-    rick = model.layers[1].get_weights()
-    jerry = model.layers[2].get_weights()
-    jessica = model.layers[3].get_weights()
-    morty = model.layers[4].get_weights()
-
-    assert_allclose(old_weights[0][0], rick[0], atol=1e-05)
-    assert_allclose(old_weights[0][1], rick[1], atol=1e-05)
-    assert_allclose(old_weights[1][0], morty[0], atol=1e-05)
-    assert_allclose(old_weights[1][1], morty[1], atol=1e-05)
-    assert_allclose(np.zeros_like(jerry[1]), jerry[1])  # biases init to 0
-    assert_allclose(np.zeros_like(jessica[1]), jessica[1])  # biases init to 0
-
-
-def test_loading_weights_by_name_skip_mismatch():
-    """
-    test skipping layers while loading model weights by name on:
-        - sequential model
-    """
-
-    # test with custom optimizer, loss
-    custom_loss = losses.mse
-
-    # sequential model
-    model = Sequential()
-    model.add(Dense(2, input_shape=(3,), name='rick'))
-    model.add(Dense(3, name='morty'))
-    model.compile(loss=custom_loss, optimizer='rmsprop', metrics=['acc'])
-
-    x = np.random.random((1, 3))
-    y = np.random.random((1, 3))
-    model.train_on_batch(x, y)
-
-    out = model.predict(x)
-    old_weights = [layer.get_weights() for layer in model.layers]
-    _, fname = tempfile.mkstemp('.h5')
-
-    model.save_weights(fname)
-
-    # delete and recreate model
-    del(model)
-    model = Sequential()
-    model.add(Dense(2, input_shape=(3,), name='rick'))
-    model.add(Dense(4, name='morty'))  # different shape w.r.t. previous model
-    model.compile(loss=custom_loss, optimizer='rmsprop', metrics=['acc'])
-
-    # load weights from first model
-    model.load_weights(fname, by_name=True, skip_mismatch=True)
-    os.remove(fname)
-
-    # assert layers 'rick' are equal
-    for old, new in zip(old_weights[0], model.layers[0].get_weights()):
-        assert_allclose(old, new, atol=1e-05)
-
-    # assert layers 'morty' are not equal, since we skipped loading this layer
-    for old, new in zip(old_weights[1], model.layers[1].get_weights()):
-        assert_raises(AssertionError, assert_allclose, old, new, atol=1e-05)
-
-
-# a function to be called from the Lambda layer
-def square_fn(x):
-    return x * x
-
-
-def test_saving_lambda_custom_objects():
-    inputs = Input(shape=(3,))
-    x = Lambda(lambda x: square_fn(x), output_shape=(3,))(inputs)
-    outputs = Dense(3)(x)
-
-    model = Model(inputs, outputs)
-    model.compile(loss=losses.MSE,
-                  optimizer=optimizers.RMSprop(lr=0.0001),
-                  metrics=[metrics.categorical_accuracy])
-    x = np.random.random((1, 3))
-    y = np.random.random((1, 3))
-    model.train_on_batch(x, y)
-
-    out = model.predict(x)
-    _, fname = tempfile.mkstemp('.h5')
-    save_model(model, fname)
-
-    model = load_model(fname, custom_objects={'square_fn': square_fn})
-    os.remove(fname)
-
-    out2 = model.predict(x)
-    assert_allclose(out, out2, atol=1e-05)
-
-
-def test_saving_lambda_numpy_array_arguments():
-    mean = np.random.random((4, 2, 3))
-    std = np.abs(np.random.random((4, 2, 3))) + 1e-5
-    inputs = Input(shape=(4, 2, 3))
-    outputs = Lambda(lambda image, mu, std: (image - mu) / std,
-                     arguments={'mu': mean, 'std': std})(inputs)
-    model = Model(inputs, outputs)
-    model.compile(loss='mse', optimizer='sgd', metrics=['acc'])
-
-    _, fname = tempfile.mkstemp('.h5')
-    save_model(model, fname)
-
-    model = load_model(fname)
-    os.remove(fname)
-
-    assert_allclose(mean, model.layers[1].arguments['mu'])
-    assert_allclose(std, model.layers[1].arguments['std'])
-
-
-def test_saving_custom_activation_function():
-    x = Input(shape=(3,))
-    output = Dense(3, activation=K.cos)(x)
-
-    model = Model(x, output)
-    model.compile(loss=losses.MSE,
-                  optimizer=optimizers.RMSprop(lr=0.0001),
-                  metrics=[metrics.categorical_accuracy])
-    x = np.random.random((1, 3))
-    y = np.random.random((1, 3))
-    model.train_on_batch(x, y)
-
-    out = model.predict(x)
-    _, fname = tempfile.mkstemp('.h5')
-    save_model(model, fname)
-
-    model = load_model(fname, custom_objects={'cos': K.cos})
-    os.remove(fname)
-
-    out2 = model.predict(x)
-    assert_allclose(out, out2, atol=1e-05)
-
-
-def test_saving_model_with_long_layer_names():
-    # This layer name will make the `layers_name` HDF5 attribute blow
-    # out of proportion. Note that it fits into the internal HDF5
-    # attribute memory limit on its own but because h5py converts
-    # the list of layer names into numpy array, which uses the same
-    # amout of memory for every item, it increases the memory
-    # requirements substantially.
-    x = Input(shape=(2,), name='input_' + ('x' * (2**15)))
-    f = x
-    for i in range(4):
-        f = Dense(2, name='dense_%d' % (i,))(f)
-
-    model = Model(inputs=[x], outputs=[f])
-
-    model.compile(loss='mse', optimizer='adam', metrics=['acc'])
-
-    x = np.random.random((1, 2))
-    y = np.random.random((1, 2))
-    model.train_on_batch(x, y)
-
-    out = model.predict(x)
-
-    _, fname = tempfile.mkstemp('.h5')
-    save_model(model, fname)
-
-    model = load_model(fname)
-
-    # Check that the HDF5 files contains chunked array
-    # of layer names.
-    with h5py.File(fname, 'r') as h5file:
-        n_layer_names_arrays = len([attr for attr in h5file['model_weights'].attrs
-                                    if attr.startswith('layer_names')])
-
-    os.remove(fname)
-
-    # The chunking of layer names array should have happened.
-    assert n_layer_names_arrays > 0
-
-    out2 = model.predict(x)
-    assert_allclose(out, out2, atol=1e-05)
-
-
-def test_saving_model_with_long_weights_names():
-    x = Input(shape=(2,), name='nested_model_input')
-    f = x
-    for i in range(4):
-        f = Dense(2, name='nested_model_dense_%d' % (i,))(f)
-    f = Dense(2, name='nested_model_dense_4', trainable=False)(f)
-    # This layer name will make the `weights_name`
-    # HDF5 attribute blow out of proportion.
-    f = Dense(2, name='nested_model_output' + ('x' * (2**15)))(f)
-    nested_model = Model(inputs=[x], outputs=[f], name='nested_model')
-
-    x = Input(shape=(2,), name='outer_model_input')
-    f = nested_model(x)
-    f = Dense(2, name='outer_model_output')(f)
-
-    model = Model(inputs=[x], outputs=[f])
-
-    model.compile(loss='mse', optimizer='adam', metrics=['acc'])
-
-    x = np.random.random((1, 2))
-    y = np.random.random((1, 2))
-    model.train_on_batch(x, y)
-
-    out = model.predict(x)
-
-    _, fname = tempfile.mkstemp('.h5')
-    save_model(model, fname)
-
-    model = load_model(fname)
-
-    # Check that the HDF5 files contains chunked array
-    # of weight names.
-    with h5py.File(fname, 'r') as h5file:
-        attrs = [attr for attr in h5file['model_weights']['nested_model'].attrs
-                 if attr.startswith('weight_names')]
-        n_weight_names_arrays = len(attrs)
-
-    os.remove(fname)
-
-    # The chunking of layer names array should have happened.
-    assert n_weight_names_arrays > 0
-
-    out2 = model.predict(x)
-    assert_allclose(out, out2, atol=1e-05)
-
-
-def test_saving_recurrent_layer_with_init_state():
-    vector_size = 8
-    input_length = 20
-
-    input_initial_state = Input(shape=(vector_size,))
-    input_x = Input(shape=(input_length, vector_size))
-
-    lstm = LSTM(vector_size, return_sequences=True)(
-        input_x, initial_state=[input_initial_state, input_initial_state])
-
-    model = Model(inputs=[input_x, input_initial_state], outputs=[lstm])
-
-    _, fname = tempfile.mkstemp('.h5')
-    model.save(fname)
-
-    loaded_model = load_model(fname)
-    os.remove(fname)
-
-
-def test_saving_recurrent_layer_without_bias():
-    vector_size = 8
-    input_length = 20
-
-    input_x = Input(shape=(input_length, vector_size))
-    lstm = LSTM(vector_size, use_bias=False)(input_x)
-    model = Model(inputs=[input_x], outputs=[lstm])
-
-    _, fname = tempfile.mkstemp('.h5')
-    model.save(fname)
-
-    loaded_model = load_model(fname)
-    os.remove(fname)
-
-
-def test_loop_model_saving():
-    model = Sequential()
-    model.add(Dense(2, input_shape=(3,)))
-    model.compile(loss=losses.MSE,
-                  optimizer=optimizers.RMSprop(lr=0.0001),
-                  metrics=[metrics.categorical_accuracy])
-
-    x = np.random.random((1, 3))
-    y = np.random.random((1, 2))
-    _, fname = tempfile.mkstemp('.h5')
-
-    for _ in range(3):
-        model.train_on_batch(x, y)
-        save_model(model, fname, overwrite=True)
-        out = model.predict(x)
-
-    new_model = load_model(fname)
-    os.remove(fname)
-
-    out2 = new_model.predict(x)
-    assert_allclose(out, out2, atol=1e-05)
-
-
-def test_saving_constant_initializer_with_numpy():
-    """Test saving and loading model of constant initializer with numpy inputs.
-    """
-    model = Sequential()
-    model.add(Dense(2, input_shape=(3,),
-                    kernel_initializer=Constant(np.ones((3, 2)))))
-    model.add(Dense(3))
-    model.compile(loss='mse', optimizer='sgd', metrics=['acc'])
-
-    _, fname = tempfile.mkstemp('.h5')
-    save_model(model, fname)
-    model = load_model(fname)
-    os.remove(fname)
-
-
-def test_saving_group_naming_h5py(tmpdir):
-    """Test saving model with layer which name is prefix to a previous layer
-    name
-    """
-
-    input_layer = Input((None, None, 3), name='test_input')
-    x = Conv2D(1, 1, name='conv1/conv')(input_layer)
-    x = Activation('relu', name='conv1')(x)
-
-    model = Model(inputs=input_layer, outputs=x)
-    p = tmpdir.mkdir("test").join("test.h5")
-    model.save_weights(p)
-    model.load_weights(p)
-
-
-def _make_nested_model(input_shape, layer, level=1, model_type='func'):
-    # example: make_nested_seq_model((1,), Dense(10), level=2).summary()
-    def make_nested_seq_model(input_shape, layer, level=1):
-        model = layer
-        for i in range(1, level + 1):
-            layers = [InputLayer(input_shape), model] if (i == 1) else [model]
-            model = Sequential(layers)
-        return model
-
-    # example: make_nested_func_model((1,), Dense(10), level=2).summary()
-    def make_nested_func_model(input_shape, layer, level=1):
-        input = Input(input_shape)
-        model = layer
-        for i in range(level):
-            model = Model(input, model(input))
-        return model
-
-    if model_type == 'func':
-        return make_nested_func_model(input_shape, layer, level)
-    elif model_type == 'seq':
-        return make_nested_seq_model(input_shape, layer, level)
-
-
-def _convert_model_weights(source_model, target_model):
-    _, fname = tempfile.mkstemp('.h5')
-    source_model.save_weights(fname)
-    target_model.load_weights(fname)
-    os.remove(fname)
-
-
-def test_model_saving_with_rnn_initial_state_and_args():
-    class CustomRNN(LSTM):
-        def call(self, inputs, arg=1, mask=None, training=None, initial_state=None):
-            if isinstance(inputs, list):
-                inputs = inputs[:]
-                shape = K.int_shape(inputs[0])
-                inputs[0] *= arg
-                inputs[0]._keras_shape = shape  # for theano backend
-            else:
-                shape = K.int_shape(inputs)
-                inputs *= arg
-                inputs._keras_shape = shape  # for theano backend
-            return super(CustomRNN, self).call(inputs, mask, training, initial_state)
-
-    inp = Input((3, 2))
-    rnn_out, h, c = CustomRNN(2, return_state=True, return_sequences=True)(inp)
-    assert hasattr(rnn_out, '_keras_history')
-    assert hasattr(h, '_keras_history')
-    assert hasattr(c, '_keras_history')
-    rnn2_out = CustomRNN(2)(rnn_out, arg=2, initial_state=[h, c])
-    assert hasattr(rnn2_out, '_keras_history')
-    model = Model(inputs=inp, outputs=rnn2_out)
-    x = np.random.random((2, 3, 2))
-    y1 = model.predict(x)
-    _, fname = tempfile.mkstemp('.h5')
-    with warnings.catch_warnings():
-        warnings.filterwarnings('error')
-        model.save(fname)
-    model2 = load_model(fname, custom_objects={'CustomRNN': CustomRNN})
-    y2 = model2.predict(x)
-    assert_allclose(y1, y2, atol=1e-5)
-    os.remove(fname)
-
-
-if __name__ == '__main__':
-    pytest.main([__file__])
diff --git a/tests/test_multiprocessing.py b/tests/test_multiprocessing.py
deleted file mode 100644
index a077d07d7a5..00000000000
--- a/tests/test_multiprocessing.py
+++ /dev/null
@@ -1,1092 +0,0 @@
-from __future__ import print_function
-
-import multiprocessing as mp
-import os
-import sys
-import threading
-import pytest
-import numpy as np
-import six
-
-from keras.models import Sequential
-from keras.layers.core import Dense
-from keras.utils import Sequence
-from keras import backend as K
-
-pytestmark = pytest.mark.skipif(
-    True,
-    reason='Temporarily disabled until the use_multiprocessing problem is solved')
-
-skip_generators = pytest.mark.skipif(K.backend() in {'tensorflow', 'cntk'} and
-                                     'TRAVIS_PYTHON_VERSION' in os.environ,
-                                     reason='Generators do not work with `spawn`.')
-
-
-def use_spawn(func):
-    """Decorator which uses `spawn` when possible.
-    This is useful on Travis to avoid memory issues.
-    """
-
-    @six.wraps(func)
-    def wrapper(*args, **kwargs):
-        if sys.version_info > (3, 4) and os.name != 'nt':
-            mp.set_start_method('spawn', force=True)
-            out = func(*args, **kwargs)
-            mp.set_start_method('fork', force=True)
-        else:
-            out = func(*args, **kwargs)
-        return out
-
-    return wrapper
-
-
-STEPS_PER_EPOCH = 100
-STEPS = 100
-WORKERS = 4 if K.backend() != 'tensorflow' else 2
-
-
-class DummySequence(Sequence):
-    def __getitem__(self, idx):
-        return np.zeros([10, 2]), np.ones([10])
-
-    def __len__(self):
-        return 10
-
-
-class threadsafe_iter:
-    """Takes an iterator/generator and makes it thread-safe by
-    serializing call to the `next` method of given iterator/generator.
-    """
-
-    def __init__(self, it):
-        self.it = it
-        self.lock = threading.Lock()
-
-    def __iter__(self):
-        return self
-
-    def __next__(self):
-        return self.next()
-
-    def next(self):
-        with self.lock:
-            return next(self.it)
-
-
-def threadsafe_generator(f):
-    """A decorator that takes a generator function and makes it thread-safe.
-    """
-
-    def g(*a, **kw):
-        return threadsafe_iter(f(*a, **kw))
-
-    return g
-
-
-@pytest.fixture
-def in_tmpdir(tmpdir):
-    """Runs a function in a temporary directory.
-
-    Checks that the directory is empty afterwards.
-    """
-    with tmpdir.as_cwd():
-        yield None
-    assert not tmpdir.listdir()
-
-
-@skip_generators
-def test_multiprocessing_training():
-    arr_data = np.random.randint(0, 256, (50, 2))
-    arr_labels = np.random.randint(0, 2, 50)
-    arr_weights = np.random.random(50)
-
-    @threadsafe_generator
-    def custom_generator(use_weights=False):
-        batch_size = 10
-        n_samples = 50
-
-        while True:
-            batch_index = np.random.randint(0, n_samples - batch_size)
-            start = batch_index
-            end = start + batch_size
-            X = arr_data[start: end]
-            y = arr_labels[start: end]
-            if use_weights:
-                w = arr_weights[start: end]
-                yield X, y, w
-            else:
-                yield X, y
-
-    # Build a NN
-    model = Sequential()
-    model.add(Dense(1, input_shape=(2,)))
-    model.compile(loss='mse', optimizer='adadelta')
-
-    # - Produce data on 4 worker processes, consume on main process:
-    #   - Each worker process runs OWN copy of generator
-    #   - BUT on Windows, `multiprocessing` won't marshall generators across
-    #     process boundaries -> make sure `fit_generator()` raises ValueError
-    #     exception and does not attempt to run the generator.
-    if os.name == 'nt':
-        with pytest.raises(ValueError):
-            model.fit_generator(custom_generator(),
-                                steps_per_epoch=STEPS_PER_EPOCH,
-                                epochs=1,
-                                verbose=1,
-                                validation_steps=None,
-                                max_queue_size=10,
-                                workers=WORKERS,
-                                use_multiprocessing=True)
-    else:
-        model.fit_generator(custom_generator(),
-                            steps_per_epoch=STEPS_PER_EPOCH,
-                            epochs=1,
-                            verbose=1,
-                            validation_steps=None,
-                            max_queue_size=10,
-                            workers=WORKERS,
-                            use_multiprocessing=True)
-
-    # - Produce data on 4 worker threads, consume on main thread:
-    #   - All worker threads share the SAME generator
-    model.fit_generator(custom_generator(),
-                        steps_per_epoch=STEPS_PER_EPOCH,
-                        epochs=1,
-                        verbose=1,
-                        validation_steps=None,
-                        max_queue_size=10,
-                        workers=WORKERS,
-                        use_multiprocessing=False)
-
-    # - Produce data on 1 worker process, consume on main process:
-    #   - Worker process runs generator
-    #   - BUT on Windows, `multiprocessing` won't marshall generators across
-    #     process boundaries -> make sure `fit_generator()` raises ValueError
-    #     exception and does not attempt to run the generator.
-    if os.name == 'nt':
-        with pytest.raises(ValueError):
-            model.fit_generator(custom_generator(True),
-                                steps_per_epoch=STEPS_PER_EPOCH,
-                                validation_data=(arr_data[:10],
-                                                 arr_labels[:10],
-                                                 arr_weights[:10]),
-                                validation_steps=1,
-                                max_queue_size=10,
-                                workers=1,
-                                use_multiprocessing=True)
-    else:
-        model.fit_generator(custom_generator(True),
-                            steps_per_epoch=STEPS_PER_EPOCH,
-                            validation_data=(arr_data[:10],
-                                             arr_labels[:10],
-                                             arr_weights[:10]),
-                            validation_steps=1,
-                            max_queue_size=10,
-                            workers=1,
-                            use_multiprocessing=True)
-
-    # - Produce data on 1 worker thread, consume on main thread:
-    #   - Worker thread is the only thread running the generator
-    model.fit_generator(custom_generator(True),
-                        steps_per_epoch=STEPS_PER_EPOCH,
-                        validation_data=(arr_data[:10],
-                                         arr_labels[:10],
-                                         arr_weights[:10]),
-                        validation_steps=1,
-                        max_queue_size=10,
-                        workers=1,
-                        use_multiprocessing=False)
-
-    # - Produce data on 1 worker process, consume on main process:
-    #   - Worker process runs generator
-    #   - BUT on Windows, `multiprocessing` won't marshall generators across
-    #     process boundaries -> make sure `fit_generator()` raises ValueError
-    #     exception and does not attempt to run the generator.
-    if os.name == 'nt':
-        with pytest.raises(ValueError):
-            model.fit_generator(custom_generator(True),
-                                steps_per_epoch=STEPS_PER_EPOCH,
-                                validation_data=custom_generator(True),
-                                validation_steps=1,
-                                max_queue_size=10,
-                                workers=1,
-                                use_multiprocessing=True)
-    else:
-        model.fit_generator(custom_generator(True),
-                            steps_per_epoch=STEPS_PER_EPOCH,
-                            validation_data=custom_generator(True),
-                            validation_steps=1,
-                            max_queue_size=10,
-                            workers=1,
-                            use_multiprocessing=True)
-
-    # - Produce data on 1 worker thread AT A TIME, consume on main thread:
-    #   - Worker threads for training and validation run generator SEQUENTIALLY
-    model.fit_generator(custom_generator(True),
-                        steps_per_epoch=STEPS_PER_EPOCH,
-                        validation_data=custom_generator(True),
-                        validation_steps=1,
-                        max_queue_size=10,
-                        workers=1,
-                        use_multiprocessing=False)
-
-    # - Produce and consume data without a queue on main thread
-    #   - Make sure the value of `use_multiprocessing` is ignored
-    model.fit_generator(custom_generator(True),
-                        steps_per_epoch=STEPS_PER_EPOCH,
-                        validation_data=custom_generator(True),
-                        validation_steps=1,
-                        max_queue_size=10,
-                        workers=0,
-                        use_multiprocessing=True)
-    model.fit_generator(custom_generator(True),
-                        steps_per_epoch=STEPS_PER_EPOCH,
-                        validation_data=custom_generator(True),
-                        validation_steps=1,
-                        max_queue_size=10,
-                        workers=0,
-                        use_multiprocessing=False)
-
-    # Test invalid use cases
-    @threadsafe_generator
-    def invalid_generator():
-        while True:
-            yield arr_data[:10], arr_data[:10], arr_labels[:10], arr_labels[:10]
-
-    # not specified `validation_steps`
-    with pytest.raises(ValueError):
-        model.fit_generator(custom_generator(),
-                            steps_per_epoch=STEPS_PER_EPOCH,
-                            validation_data=custom_generator(),
-                            validation_steps=None,
-                            max_queue_size=10,
-                            workers=1,
-                            use_multiprocessing=False)
-
-    # validation data is neither a tuple nor a triple.
-    with pytest.raises(ValueError):
-        model.fit_generator(custom_generator(),
-                            steps_per_epoch=STEPS_PER_EPOCH,
-                            validation_data=(arr_data[:10],
-                                             arr_data[:10],
-                                             arr_labels[:10],
-                                             arr_weights[:10]),
-                            validation_steps=1,
-                            max_queue_size=10,
-                            workers=1,
-                            use_multiprocessing=False)
-
-    # validation generator is neither a tuple nor a triple.
-    with pytest.raises(ValueError):
-        model.fit_generator(custom_generator(),
-                            steps_per_epoch=STEPS_PER_EPOCH,
-                            validation_data=invalid_generator(),
-                            validation_steps=1,
-                            max_queue_size=10,
-                            workers=1,
-                            use_multiprocessing=False)
-
-    # - For Sequence
-    model.fit_generator(DummySequence(),
-                        steps_per_epoch=STEPS_PER_EPOCH,
-                        validation_data=DummySequence(),
-                        validation_steps=1,
-                        max_queue_size=10,
-                        workers=0,
-                        use_multiprocessing=True)
-    model.fit_generator(DummySequence(),
-                        steps_per_epoch=STEPS_PER_EPOCH,
-                        validation_data=DummySequence(),
-                        validation_steps=1,
-                        max_queue_size=10,
-                        workers=0,
-                        use_multiprocessing=False)
-
-
-@skip_generators
-def test_multiprocessing_training_from_file(in_tmpdir):
-    arr_data = np.random.randint(0, 256, (50, 2))
-    arr_labels = np.random.randint(0, 2, 50)
-    np.savez('data.npz', **{'data': arr_data, 'labels': arr_labels})
-
-    @threadsafe_generator
-    def custom_generator():
-
-        batch_size = 10
-        n_samples = 50
-
-        with np.load('data.npz') as arr:
-            while True:
-                batch_index = np.random.randint(0, n_samples - batch_size)
-                start = batch_index
-                end = start + batch_size
-                X = arr['data'][start: end]
-                y = arr['labels'][start: end]
-                yield X, y
-
-    # Build a NN
-    model = Sequential()
-    model.add(Dense(1, input_shape=(2,)))
-    model.compile(loss='mse', optimizer='adadelta')
-
-    # - Produce data on 4 worker processes, consume on main process:
-    #   - Each worker process runs OWN copy of generator
-    #   - BUT on Windows, `multiprocessing` won't marshall generators across
-    #     process boundaries -> make sure `fit_generator()` raises ValueError
-    #     exception and does not attempt to run the generator.
-    if os.name == 'nt':
-        with pytest.raises(ValueError):
-            model.fit_generator(custom_generator(),
-                                steps_per_epoch=STEPS_PER_EPOCH,
-                                epochs=1,
-                                verbose=1,
-                                validation_steps=None,
-                                max_queue_size=10,
-                                workers=WORKERS,
-                                use_multiprocessing=True)
-    else:
-        model.fit_generator(custom_generator(),
-                            steps_per_epoch=STEPS_PER_EPOCH,
-                            epochs=1,
-                            verbose=1,
-                            validation_steps=None,
-                            max_queue_size=10,
-                            workers=WORKERS,
-                            use_multiprocessing=True)
-
-    # - Produce data on 1 worker process, consume on main process:
-    #   - Worker process runs generator
-    #   - BUT on Windows, `multiprocessing` won't marshall generators across
-    #     process boundaries -> make sure `fit_generator()` raises ValueError
-    #     exception and does not attempt to run the generator.
-    if os.name == 'nt':
-        with pytest.raises(ValueError):
-            model.fit_generator(custom_generator(),
-                                steps_per_epoch=STEPS_PER_EPOCH,
-                                epochs=1,
-                                verbose=1,
-                                validation_steps=None,
-                                max_queue_size=10,
-                                workers=1,
-                                use_multiprocessing=True)
-    else:
-        model.fit_generator(custom_generator(),
-                            steps_per_epoch=STEPS_PER_EPOCH,
-                            epochs=1,
-                            verbose=1,
-                            validation_steps=None,
-                            max_queue_size=10,
-                            workers=1,
-                            use_multiprocessing=True)
-
-    # - Produce data on 1 worker thread, consume on main thread:
-    #   - Worker thread is the only thread running the generator
-
-    # - Produce and consume data without a queue on main thread
-    #   - Make sure the value of `use_multiprocessing` is ignored
-    model.fit_generator(custom_generator(),
-                        steps_per_epoch=STEPS_PER_EPOCH,
-                        epochs=1,
-                        verbose=1,
-                        validation_steps=None,
-                        max_queue_size=10,
-                        workers=0,
-                        use_multiprocessing=True)
-
-    os.remove('data.npz')
-
-
-def test_multithreading_from_file():
-    arr_data = np.random.randint(0, 256, (50, 2))
-    arr_labels = np.random.randint(0, 2, 50)
-    np.savez('data_threads.npz', **{'data': arr_data, 'labels': arr_labels})
-
-    @threadsafe_generator
-    def custom_generator():
-        batch_size = 10
-        n_samples = 50
-
-        with np.load('data_threads.npz') as arr:
-            while True:
-                batch_index = np.random.randint(0, n_samples - batch_size)
-                start = batch_index
-                end = start + batch_size
-                X = arr['data'][start: end]
-                y = arr['labels'][start: end]
-                yield X, y
-
-    # Build a NN
-    model = Sequential()
-    model.add(Dense(1, input_shape=(2,)))
-    model.compile(loss='mse', optimizer='adadelta')
-
-    # - Produce data on 4 worker threads, consume on main thread:
-    #   - All worker threads share the SAME generator
-    model.fit_generator(custom_generator(),
-                        steps_per_epoch=STEPS_PER_EPOCH,
-                        epochs=1,
-                        verbose=1,
-                        validation_steps=None,
-                        max_queue_size=10,
-                        workers=WORKERS,
-                        use_multiprocessing=False)
-
-    # - Produce data on 1 worker thread, consume on main thread:
-    #   - Worker thread is the only thread running the generator
-    model.fit_generator(custom_generator(),
-                        steps_per_epoch=STEPS_PER_EPOCH,
-                        epochs=1,
-                        verbose=1,
-                        validation_steps=None,
-                        max_queue_size=10,
-                        workers=1,
-                        use_multiprocessing=False)
-
-    # - Produce and consume data without a queue on main thread
-    #   - Make sure the value of `use_multiprocessing` is ignored
-    model.fit_generator(custom_generator(),
-                        steps_per_epoch=STEPS_PER_EPOCH,
-                        epochs=1,
-                        verbose=1,
-                        validation_steps=None,
-                        max_queue_size=10,
-                        workers=0,
-                        use_multiprocessing=False)
-
-    os.remove('data_threads.npz')
-
-
-@skip_generators
-def test_multiprocessing_predicting():
-    arr_data = np.random.randint(0, 256, (50, 2))
-
-    @threadsafe_generator
-    def custom_generator():
-        batch_size = 10
-        n_samples = 50
-
-        while True:
-            batch_index = np.random.randint(0, n_samples - batch_size)
-            start = batch_index
-            end = start + batch_size
-            X = arr_data[start: end]
-            yield X
-
-    # Build a NN
-    model = Sequential()
-    model.add(Dense(1, input_shape=(2,)))
-    model.compile(loss='mse', optimizer='adadelta')
-
-    # - Produce data on 4 worker processes, consume on main process:
-    #   - Each worker process runs OWN copy of generator
-    #   - BUT on Windows, `multiprocessing` won't marshall generators across
-    #     process boundaries -> make sure `predict_generator()` raises ValueError
-    #     exception and does not attempt to run the generator.
-    if os.name == 'nt':
-        with pytest.raises(ValueError):
-            model.predict_generator(custom_generator(),
-                                    steps=STEPS,
-                                    max_queue_size=10,
-                                    workers=WORKERS,
-                                    use_multiprocessing=True)
-    else:
-        model.predict_generator(custom_generator(),
-                                steps=STEPS,
-                                max_queue_size=10,
-                                workers=WORKERS,
-                                use_multiprocessing=True)
-
-    # - Produce data on 1 worker process, consume on main process:
-    #   - Worker process runs generator
-    #   - BUT on Windows, `multiprocessing` won't marshall generators across
-    #     process boundaries -> make sure `predict_generator()` raises ValueError
-    #     exception and does not attempt to run the generator.
-    if os.name == 'nt':
-        with pytest.raises(ValueError):
-            model.predict_generator(custom_generator(),
-                                    steps=STEPS,
-                                    max_queue_size=10,
-                                    workers=1,
-                                    use_multiprocessing=True)
-    else:
-        model.predict_generator(custom_generator(),
-                                steps=STEPS,
-                                max_queue_size=10,
-                                workers=1,
-                                use_multiprocessing=True)
-
-    # - Main thread runs the generator without a queue
-    #   - Make sure the value of `use_multiprocessing` is ignored
-    model.predict_generator(custom_generator(),
-                            steps=STEPS,
-                            max_queue_size=10,
-                            workers=0,
-                            use_multiprocessing=True)
-
-
-def test_multithreading_predicting():
-    arr_data = np.random.randint(0, 256, (50, 2))
-
-    @threadsafe_generator
-    def custom_generator():
-        batch_size = 10
-        n_samples = 50
-
-        while True:
-            batch_index = np.random.randint(0, n_samples - batch_size)
-            start = batch_index
-            end = start + batch_size
-            X = arr_data[start: end]
-            yield X
-
-    # Build a NN
-    model = Sequential()
-    model.add(Dense(1, input_shape=(2,)))
-    model.compile(loss='mse', optimizer='adadelta')
-
-    # - Produce data on 4 worker threads, consume on main thread:
-    #   - All worker threads share the SAME generator
-    model.predict_generator(custom_generator(),
-                            steps=STEPS,
-                            max_queue_size=10,
-                            workers=WORKERS,
-                            use_multiprocessing=False)
-
-    # - Produce data on 1 worker thread, consume on main thread:
-    #   - Worker thread is the only thread running the generator
-    model.predict_generator(custom_generator(),
-                            steps=STEPS,
-                            max_queue_size=10,
-                            workers=1,
-                            use_multiprocessing=False)
-
-    # - Main thread runs the generator without a queue
-    #   - Make sure the value of `use_multiprocessing` is ignored
-    model.predict_generator(custom_generator(),
-                            steps=STEPS,
-                            max_queue_size=10,
-                            workers=0,
-                            use_multiprocessing=False)
-
-
-@skip_generators
-def test_multiprocessing_evaluating():
-    arr_data = np.random.randint(0, 256, (50, 2))
-    arr_labels = np.random.randint(0, 2, 50)
-
-    @threadsafe_generator
-    def custom_generator():
-        batch_size = 10
-        n_samples = 50
-
-        while True:
-            batch_index = np.random.randint(0, n_samples - batch_size)
-            start = batch_index
-            end = start + batch_size
-            X = arr_data[start: end]
-            y = arr_labels[start: end]
-            yield X, y
-
-    # Build a NN
-    model = Sequential()
-    model.add(Dense(1, input_shape=(2,)))
-    model.compile(loss='mse', optimizer='adadelta')
-
-    # - Produce data on 4 worker processes, consume on main process:
-    #   - Each worker process runs OWN copy of generator
-    #   - BUT on Windows, `multiprocessing` won't marshall generators across
-    #     process boundaries
-    #       -> make sure `evaluate_generator()` raises raises ValueError
-    #          exception and does not attempt to run the generator.
-    if os.name == 'nt':
-        with pytest.raises(ValueError):
-            model.evaluate_generator(custom_generator(),
-                                     steps=STEPS,
-                                     max_queue_size=10,
-                                     workers=WORKERS,
-                                     use_multiprocessing=True)
-    else:
-        model.evaluate_generator(custom_generator(),
-                                 steps=STEPS,
-                                 max_queue_size=10,
-                                 workers=WORKERS,
-                                 use_multiprocessing=True)
-
-    # - Produce data on 1 worker process, consume on main process:
-    #   - Worker process runs generator
-    #   - BUT on Windows, `multiprocessing` won't marshall generators across
-    #     process boundaries -> make sure `evaluate_generator()` raises ValueError
-    #     exception and does not attempt to run the generator.
-    if os.name == 'nt':
-        with pytest.raises(ValueError):
-            model.evaluate_generator(custom_generator(),
-                                     steps=STEPS,
-                                     max_queue_size=10,
-                                     workers=1,
-                                     use_multiprocessing=True)
-    else:
-        model.evaluate_generator(custom_generator(),
-                                 steps=STEPS,
-                                 max_queue_size=10,
-                                 workers=1,
-                                 use_multiprocessing=True)
-
-    # - Produce and consume data without a queue on main thread
-    #   - Make sure the value of `use_multiprocessing` is ignored
-    model.evaluate_generator(custom_generator(),
-                             steps=STEPS,
-                             max_queue_size=10,
-                             workers=0,
-                             use_multiprocessing=True)
-
-
-def test_multithreading_evaluating():
-    arr_data = np.random.randint(0, 256, (50, 2))
-    arr_labels = np.random.randint(0, 2, 50)
-
-    @threadsafe_generator
-    def custom_generator():
-        batch_size = 10
-        n_samples = 50
-
-        while True:
-            batch_index = np.random.randint(0, n_samples - batch_size)
-            start = batch_index
-            end = start + batch_size
-            X = arr_data[start: end]
-            y = arr_labels[start: end]
-            yield X, y
-
-    # Build a NN
-    model = Sequential()
-    model.add(Dense(1, input_shape=(2,)))
-    model.compile(loss='mse', optimizer='adadelta')
-
-    # - Produce data on 4 worker threads, consume on main thread:
-    #   - All worker threads share the SAME generator
-    model.evaluate_generator(custom_generator(),
-                             steps=STEPS,
-                             max_queue_size=10,
-                             workers=WORKERS,
-                             use_multiprocessing=False)
-
-    # - Produce data on 1 worker thread, consume on main thread:
-    #   - Worker thread is the only thread running the generator
-    model.evaluate_generator(custom_generator(),
-                             steps=STEPS,
-                             max_queue_size=10,
-                             workers=1,
-                             use_multiprocessing=False)
-
-    # - Produce and consume data without a queue on main thread
-    #   - Make sure the value of `use_multiprocessing` is ignored
-    model.evaluate_generator(custom_generator(),
-                             steps=STEPS,
-                             max_queue_size=10,
-                             workers=0,
-                             use_multiprocessing=False)
-
-
-@skip_generators
-def test_multiprocessing_fit_error():
-    arr_data = np.random.randint(0, 256, (50, 2))
-    arr_labels = np.random.randint(0, 2, 50)
-    batch_size = 10
-    n_samples = 50
-    good_batches = 3
-
-    @threadsafe_generator
-    def custom_generator(use_weights=False):
-        """Raises an exception after a few good batches"""
-        for i in range(good_batches):
-            batch_index = np.random.randint(0, n_samples - batch_size)
-            start = batch_index
-            end = start + batch_size
-            X = arr_data[start: end]
-            y = arr_labels[start: end]
-            yield X, y
-        raise RuntimeError
-
-    model = Sequential()
-    model.add(Dense(1, input_shape=(2,)))
-    model.compile(loss='mse', optimizer='adadelta')
-
-    samples = batch_size * (good_batches + 1)
-
-    # - Produce data on 4 worker processes, consume on main process:
-    #   - Each worker process runs OWN copy of generator
-    #   - BUT on Windows, `multiprocessing` won't marshall generators across
-    #     process boundaries -> make sure `fit_generator()` raises ValueError
-    #     exception and does not attempt to run the generator.
-    #   - On other platforms, make sure `RuntimeError` exception bubbles up
-    if os.name == 'nt':
-        with pytest.raises(RuntimeError):
-            model.fit_generator(custom_generator(),
-                                steps_per_epoch=samples,
-                                validation_steps=None,
-                                max_queue_size=10,
-                                workers=WORKERS,
-                                use_multiprocessing=True)
-    else:
-        with pytest.raises(RuntimeError):
-            model.fit_generator(custom_generator(),
-                                steps_per_epoch=samples,
-                                validation_steps=None,
-                                max_queue_size=10,
-                                workers=WORKERS,
-                                use_multiprocessing=True)
-
-    # - Produce data on 1 worker process, consume on main process:
-    #   - Worker process runs generator
-    #   - BUT on Windows, `multiprocessing` won't marshall generators across
-    #     process boundaries -> make sure `fit_generator()` raises ValueError
-    #     exception and does not attempt to run the generator.
-    #   - On other platforms, make sure `RuntimeError` exception bubbles up
-    if os.name == 'nt':
-        with pytest.raises(RuntimeError):
-            model.fit_generator(custom_generator(),
-                                steps_per_epoch=samples,
-                                validation_steps=None,
-                                max_queue_size=10,
-                                workers=1,
-                                use_multiprocessing=True)
-    else:
-        with pytest.raises(RuntimeError):
-            model.fit_generator(custom_generator(),
-                                steps_per_epoch=samples,
-                                validation_steps=None,
-                                max_queue_size=10,
-                                workers=1,
-                                use_multiprocessing=True)
-
-    #   - Make sure the value of `use_multiprocessing` is ignored
-    #   - Make sure `RuntimeError` exception bubbles up
-    with pytest.raises(RuntimeError):
-        model.fit_generator(custom_generator(),
-                            steps_per_epoch=samples,
-                            validation_steps=None,
-                            max_queue_size=10,
-                            workers=0,
-                            use_multiprocessing=True)
-
-
-def test_multithreading_fit_error():
-    arr_data = np.random.randint(0, 256, (50, 2))
-    arr_labels = np.random.randint(0, 2, 50)
-    batch_size = 10
-    n_samples = 50
-    good_batches = 3
-
-    @threadsafe_generator
-    def custom_generator():
-        """Raises an exception after a few good batches"""
-        for i in range(good_batches):
-            batch_index = np.random.randint(0, n_samples - batch_size)
-            start = batch_index
-            end = start + batch_size
-            X = arr_data[start: end]
-            y = arr_labels[start: end]
-            yield X, y
-        raise RuntimeError
-
-    model = Sequential()
-    model.add(Dense(1, input_shape=(2,)))
-    model.compile(loss='mse', optimizer='adadelta')
-
-    samples = batch_size * (good_batches + 1)
-
-    # - Produce data on 4 worker threads, consume on main thread:
-    #   - All worker threads share the SAME generator
-    #   - Make sure `RuntimeError` exception bubbles up
-    with pytest.raises(RuntimeError):
-        model.fit_generator(custom_generator(),
-                            steps_per_epoch=samples,
-                            validation_steps=None,
-                            max_queue_size=10,
-                            workers=WORKERS,
-                            use_multiprocessing=False)
-
-    # - Produce data on 1 worker thread, consume on main thread:
-    #   - Worker thread is the only thread running the generator
-    #   - Make sure `RuntimeError` exception bubbles up
-    with pytest.raises(RuntimeError):
-        model.fit_generator(custom_generator(),
-                            steps_per_epoch=samples,
-                            validation_steps=None,
-                            max_queue_size=10,
-                            workers=1,
-                            use_multiprocessing=False)
-
-    # - Produce and consume data without a queue on main thread
-    #   - Make sure the value of `use_multiprocessing` is ignored
-    #   - Make sure `RuntimeError` exception bubbles up
-    with pytest.raises(RuntimeError):
-        model.fit_generator(custom_generator(),
-                            steps_per_epoch=samples,
-                            validation_steps=None,
-                            max_queue_size=10,
-                            workers=0,
-                            use_multiprocessing=False)
-
-
-@skip_generators
-def test_multiprocessing_evaluate_error():
-    arr_data = np.random.randint(0, 256, (50, 2))
-    arr_labels = np.random.randint(0, 2, 50)
-    batch_size = 10
-    n_samples = 50
-    good_batches = 3
-
-    @threadsafe_generator
-    def custom_generator():
-        """Raises an exception after a few good batches"""
-        for i in range(good_batches):
-            batch_index = np.random.randint(0, n_samples - batch_size)
-            start = batch_index
-            end = start + batch_size
-            X = arr_data[start: end]
-            y = arr_labels[start: end]
-            yield X, y
-        raise RuntimeError
-
-    model = Sequential()
-    model.add(Dense(1, input_shape=(2,)))
-    model.compile(loss='mse', optimizer='adadelta')
-
-    # - Produce data on 4 worker processes, consume on main process:
-    #   - Each worker process runs OWN copy of generator
-    #   - BUT on Windows, `multiprocessing` won't marshall generators across
-    #     process boundaries -> make sure `evaluate_generator()` raises ValueError
-    #     exception and does not attempt to run the generator.
-    #   - On other platforms, make sure `RuntimeError` exception bubbles up
-    if os.name == 'nt':
-        with pytest.raises(ValueError):
-            model.evaluate_generator(custom_generator(),
-                                     steps=good_batches * WORKERS + 1,
-                                     max_queue_size=10,
-                                     workers=WORKERS,
-                                     use_multiprocessing=True)
-    else:
-        with pytest.raises(RuntimeError):
-            model.evaluate_generator(custom_generator(),
-                                     steps=good_batches * WORKERS + 1,
-                                     max_queue_size=10,
-                                     workers=WORKERS,
-                                     use_multiprocessing=True)
-
-    # - Produce data on 1 worker process, consume on main process:
-    #   - Worker process runs generator
-    #   - BUT on Windows, `multiprocessing` won't marshall generators across
-    #     process boundaries -> make sure `evaluate_generator()` raises ValueError
-    #     exception and does not attempt to run the generator.
-    #   - On other platforms, make sure `RuntimeError` exception bubbles up
-    if os.name == 'nt':
-        with pytest.raises(RuntimeError):
-            model.evaluate_generator(custom_generator(),
-                                     steps=good_batches + 1,
-                                     max_queue_size=10,
-                                     workers=1,
-                                     use_multiprocessing=True)
-    else:
-        with pytest.raises(RuntimeError):
-            model.evaluate_generator(custom_generator(),
-                                     steps=good_batches + 1,
-                                     max_queue_size=10,
-                                     workers=1,
-                                     use_multiprocessing=True)
-
-    # - Produce and consume data without a queue on main thread
-    #   - Make sure the value of `use_multiprocessing` is ignored
-    #   - Make sure `RuntimeError` exception bubbles up
-    with pytest.raises(RuntimeError):
-        model.evaluate_generator(custom_generator(),
-                                 steps=good_batches + 1,
-                                 max_queue_size=10,
-                                 workers=0,
-                                 use_multiprocessing=True)
-
-
-def test_multithreading_evaluate_error():
-    arr_data = np.random.randint(0, 256, (50, 2))
-    arr_labels = np.random.randint(0, 2, 50)
-    batch_size = 10
-    n_samples = 50
-    good_batches = 3
-
-    @threadsafe_generator
-    def custom_generator():
-        """Raises an exception after a few good batches"""
-        for i in range(good_batches):
-            batch_index = np.random.randint(0, n_samples - batch_size)
-            start = batch_index
-            end = start + batch_size
-            X = arr_data[start: end]
-            y = arr_labels[start: end]
-            yield X, y
-        raise RuntimeError
-
-    model = Sequential()
-    model.add(Dense(1, input_shape=(2,)))
-    model.compile(loss='mse', optimizer='adadelta')
-
-    # - Produce data on 4 worker threads, consume on main thread:
-    #   - All worker threads share the SAME generator
-    #   - Make sure `RuntimeError` exception bubbles up
-    with pytest.raises(RuntimeError):
-        model.evaluate_generator(custom_generator(),
-                                 steps=good_batches * WORKERS + 1,
-                                 max_queue_size=10,
-                                 workers=WORKERS,
-                                 use_multiprocessing=False)
-
-    # - Produce data on 1 worker thread, consume on main thread:
-    #   - Worker thread is the only thread running the generator
-    #   - Make sure `RuntimeError` exception bubbles up
-    with pytest.raises(RuntimeError):
-        model.evaluate_generator(custom_generator(),
-                                 steps=good_batches + 1,
-                                 max_queue_size=10,
-                                 workers=1,
-                                 use_multiprocessing=False)
-
-    # - Produce and consume data without a queue on main thread
-    #   - Make sure the value of `use_multiprocessing` is ignored
-    #   - Make sure `RuntimeError` exception bubbles up
-    with pytest.raises(RuntimeError):
-        model.evaluate_generator(custom_generator(),
-                                 steps=good_batches + 1,
-                                 max_queue_size=10,
-                                 workers=0,
-                                 use_multiprocessing=False)
-
-
-@skip_generators
-def test_multiprocessing_predict_error():
-    arr_data = np.random.randint(0, 256, (50, 2))
-    good_batches = 3
-
-    @threadsafe_generator
-    def custom_generator():
-        """Raises an exception after a few good batches"""
-        batch_size = 10
-        n_samples = 50
-
-        for i in range(good_batches):
-            batch_index = np.random.randint(0, n_samples - batch_size)
-            start = batch_index
-            end = start + batch_size
-            X = arr_data[start: end]
-            yield X
-        raise RuntimeError
-
-    model = Sequential()
-    model.add(Dense(1, input_shape=(2,)))
-    model.compile(loss='mse', optimizer='adadelta')
-
-    # - Produce data on 4 worker processes, consume on main process:
-    #   - Each worker process runs OWN copy of generator
-    #   - BUT on Windows, `multiprocessing` won't marshall generators across
-    #     process boundaries -> make sure `predict_generator()` raises ValueError
-    #     exception and does not attempt to run the generator.
-    #   - On other platforms, make sure `RuntimeError` exception bubbles up
-    if os.name == 'nt':
-        with pytest.raises(StopIteration):
-            model.predict_generator(custom_generator(),
-                                    steps=good_batches * WORKERS + 1,
-                                    max_queue_size=10,
-                                    workers=WORKERS,
-                                    use_multiprocessing=True)
-    else:
-        with pytest.raises(RuntimeError):
-            model.predict_generator(custom_generator(),
-                                    steps=good_batches * WORKERS + 1,
-                                    max_queue_size=10,
-                                    workers=WORKERS,
-                                    use_multiprocessing=True)
-
-    # - Produce data on 1 worker process, consume on main process:
-    #   - Worker process runs generator
-    #   - BUT on Windows, `multiprocessing` won't marshall generators across
-    #     process boundaries -> make sure `predict_generator()` raises ValueError
-    #     exception and does not attempt to run the generator.
-    #   - On other platforms, make sure `RuntimeError` exception bubbles up
-    if os.name == 'nt':
-        with pytest.raises(RuntimeError):
-            model.predict_generator(custom_generator(),
-                                    steps=good_batches + 1,
-                                    max_queue_size=10,
-                                    workers=1,
-                                    use_multiprocessing=True)
-    else:
-        with pytest.raises(RuntimeError):
-            model.predict_generator(custom_generator(),
-                                    steps=good_batches + 1,
-                                    max_queue_size=10,
-                                    workers=1,
-                                    use_multiprocessing=True)
-
-    # - Produce and consume data without a queue on main thread
-    #   - Make sure the value of `use_multiprocessing` is ignored
-    #   - Make sure `RuntimeError` exception bubbles up
-    with pytest.raises(RuntimeError):
-        model.predict_generator(custom_generator(),
-                                steps=good_batches + 1,
-                                max_queue_size=10,
-                                workers=0,
-                                use_multiprocessing=True)
-
-
-def test_multithreading_predict_error():
-    arr_data = np.random.randint(0, 256, (50, 2))
-    good_batches = 3
-
-    @threadsafe_generator
-    def custom_generator():
-        """Raises an exception after a few good batches"""
-        batch_size = 10
-        n_samples = 50
-
-        for i in range(good_batches):
-            batch_index = np.random.randint(0, n_samples - batch_size)
-            start = batch_index
-            end = start + batch_size
-            X = arr_data[start: end]
-            yield X
-        raise RuntimeError
-
-    model = Sequential()
-    model.add(Dense(1, input_shape=(2,)))
-    model.compile(loss='mse', optimizer='adadelta')
-
-    # - Produce data on 4 worker threads, consume on main thread:
-    #   - All worker threads share the SAME generator
-    #   - Make sure `RuntimeError` exception bubbles up
-    with pytest.raises(RuntimeError):
-        model.predict_generator(custom_generator(),
-                                steps=good_batches * WORKERS + 1,
-                                max_queue_size=10,
-                                workers=WORKERS,
-                                use_multiprocessing=False)
-    # - Produce data on 1 worker thread, consume on main thread:
-    #   - Worker thread is the only thread running the generator
-    #   - Make sure `RuntimeError` exception bubbles up
-    with pytest.raises(RuntimeError):
-        model.predict_generator(custom_generator(),
-                                steps=good_batches + 1,
-                                max_queue_size=10,
-                                workers=1,
-                                use_multiprocessing=False)
-
-    # - Produce and consume data without a queue on main thread
-    #   - Make sure the value of `use_multiprocessing` is ignored
-    #   - Make sure `RuntimeError` exception bubbles up
-    with pytest.raises(RuntimeError):
-        model.predict_generator(custom_generator(),
-                                steps=good_batches + 1,
-                                max_queue_size=10,
-                                workers=0,
-                                use_multiprocessing=False)
-
-
-if __name__ == '__main__':
-    pytest.main([__file__])
diff --git a/update_api.py b/update_api.py
deleted file mode 100644
index 56815358ccf..00000000000
--- a/update_api.py
+++ /dev/null
@@ -1,15 +0,0 @@
-import pyux
-import keras
-import json
-
-
-import keras.backend.tensorflow_backend
-import keras.backend.theano_backend
-import keras.backend.cntk_backend
-import keras.backend.numpy_backend
-import keras.utils.test_utils
-
-sign = pyux.sign(keras)
-
-with open('api.json', 'w') as f:
-    json.dump(sign, f)