From ee62c337e26a5e9f7622b5e6dc388d63955b011a Mon Sep 17 00:00:00 2001 From: Jinan Zhou Date: Thu, 14 Feb 2019 18:23:48 -0800 Subject: [PATCH] Training Container with Model Constructor for cifar10 (#345) * Training Container with Model Constructor for cifar10 * fix a small bug * make num_epochs a parameter --- .../cifar10/Dockerfile | 32 +++++++ .../cifar10/ModelConstructor.py | 63 ++++++++++++++ .../cifar10/RunTrial.py | 59 +++++++++++++ .../cifar10/op_library.py | 83 +++++++++++++++++++ .../cifar10/requirements.txt | 2 + 5 files changed, 239 insertions(+) create mode 100644 examples/NAS-training-containers/cifar10/Dockerfile create mode 100644 examples/NAS-training-containers/cifar10/ModelConstructor.py create mode 100644 examples/NAS-training-containers/cifar10/RunTrial.py create mode 100644 examples/NAS-training-containers/cifar10/op_library.py create mode 100644 examples/NAS-training-containers/cifar10/requirements.txt diff --git a/examples/NAS-training-containers/cifar10/Dockerfile b/examples/NAS-training-containers/cifar10/Dockerfile new file mode 100644 index 00000000000..212eb23bc02 --- /dev/null +++ b/examples/NAS-training-containers/cifar10/Dockerfile @@ -0,0 +1,32 @@ +ARG cuda_version=9.0 +ARG cudnn_version=7 +FROM nvidia/cuda:${cuda_version}-cudnn${cudnn_version}-devel + +# Install system packages +RUN apt-get update && apt-get install -y software-properties-common && \ + add-apt-repository ppa:deadsnakes/ppa && \ + apt-get update && \ + apt-get install -y --no-install-recommends \ + bzip2 \ + g++ \ + git \ + graphviz \ + libgl1-mesa-glx \ + libhdf5-dev \ + openmpi-bin \ + python3.5 \ + python3-pip \ + python3-setuptools \ + python3-dev \ + wget && \ + rm -rf /var/lib/apt/lists/* + + +ADD . /app +WORKDIR /app + +RUN pip3 install --upgrade pip +RUN pip3 install --no-cache-dir -r requirements.txt +ENV PYTHONPATH /app + +ENTRYPOINT ["python3.5", "-u", "RunTrial.py"] diff --git a/examples/NAS-training-containers/cifar10/ModelConstructor.py b/examples/NAS-training-containers/cifar10/ModelConstructor.py new file mode 100644 index 00000000000..cbc4bd7c247 --- /dev/null +++ b/examples/NAS-training-containers/cifar10/ModelConstructor.py @@ -0,0 +1,63 @@ +import numpy as np +from keras.models import Model +from keras import backend as K +import json +from keras.layers import Input, Conv2D, ZeroPadding2D, concatenate, MaxPooling2D, \ + AveragePooling2D, Dense, Activation, BatchNormalization, GlobalAveragePooling2D, Dropout +from op_library import concat, conv, reduction + + +class ModelConstructor(object): + def __init__(self, arc_json, nn_json): + self.arch = json.loads(arc_json) + nn_config = json.loads(nn_json) + self.num_layers = nn_config['num_layers'] + self.input_size = nn_config['input_size'] + self.output_size = nn_config['output_size'][-1] + self.embedding = nn_config['embedding'] + + def build_model(self): + # a list of the data all layers + all_layers = [0 for _ in range(self.num_layers + 1)] + # a list of all the dimensions of all layers + all_dims = [0 for _ in range(self.num_layers + 1)] + + # ================= Stacking layers ================= + # Input Layer. Layer 0 + input_layer = Input(shape=self.input_size) + all_layers[0] = input_layer + + # Intermediate Layers. Starting from layer 1. + for l in range(1, self.num_layers + 1): + input_layers = list() + opt = self.arch[l - 1][0] + opt_config = self.embedding[str(opt)] + skip = self.arch[l - 1][1:l+1] + + # set up the connection to the previous layer first + input_layers.append(all_layers[l - 1]) + + # then add skip connections + for i in range(l - 1): + if l > 1 and skip[i] == 1: + input_layers.append(all_layers[i]) + + layer_input = concat(input_layers) + if opt_config['opt_type'] == 'convolution': + layer_output = conv(layer_input, opt_config) + elif opt_config['opt_type'] == 'reduction': + layer_output = reduction(layer_input, opt_config) + + all_layers[l] = layer_output + + # Final Layer + # Global Average Pooling, then Fully connected with softmax. + avgpooled = GlobalAveragePooling2D()(all_layers[self.num_layers]) + dropped = Dropout(0.4)(avgpooled) + logits = Dense(units=self.output_size, + activation='softmax')(dropped) + + # Encapsulate the model + self.model = Model(inputs=input_layer, outputs=logits) + + return self.model diff --git a/examples/NAS-training-containers/cifar10/RunTrial.py b/examples/NAS-training-containers/cifar10/RunTrial.py new file mode 100644 index 00000000000..4a111139c64 --- /dev/null +++ b/examples/NAS-training-containers/cifar10/RunTrial.py @@ -0,0 +1,59 @@ +import keras +import numpy as np +from keras.datasets import cifar10 +from ModelConstructor import ModelConstructor +from keras.utils import to_categorical +import argparse +import time + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='TrainingContainer') + parser.add_argument('--architecture', type=str, default="", metavar='N', + help='architecture of the neural network') + parser.add_argument('--nn_config', type=str, default="", metavar='N', + help='configurations and search space embeddings') + parser.add_argument('--num_epochs', type=int, default=10, metavar='N', + help='number of epoches that each child will be trained') + args = parser.parse_args() + + arch = args.architecture.replace("\'", "\"") + print(">>> arch received by trial") + print(arch) + + nn_config = args.nn_config.replace("\'", "\"") + print(">>> nn_config received by trial") + print(nn_config) + + num_epochs = args.num_epochs + print(">>> num_epochs received by trial") + print(num_epochs) + + print(">>> Constructing Model...") + constructor = ModelConstructor(arch, nn_config) + test_model = constructor.build_model() + print(">>> Model Constructed Successfully") + + test_model.summary() + test_model.compile(loss=keras.losses.categorical_crossentropy, + optimizer=keras.optimizers.Adam(lr=1e-3, decay=1e-4), + metrics=['accuracy']) + + (x_train, y_train), (x_test, y_test) = cifar10.load_data() + x_train = x_train.astype('float32') + x_test = x_test.astype('float32') + x_train /= 255 + x_test /= 255 + y_train = to_categorical(y_train) + y_test = to_categorical(y_test) + + print(">>> Data Loaded. Training start.") + for e in range(num_epochs): + print("\nTotal Epoch {}/{}".format(e+1, num_epochs)) + history = test_model.fit(x=x_train, y=y_train, + shuffle=True, batch_size=128, + epochs=1, verbose=1, + validation_data=(x_test, y_test)) + print("Training-Accuracy={}".format(history.history['acc'][-1])) + print("Training-Loss={}".format(history.history['loss'][-1])) + print("Validation-Accuracy={}".format(history.history['val_acc'][-1])) + print("Validation-Loss={}".format(history.history['val_loss'][-1])) diff --git a/examples/NAS-training-containers/cifar10/op_library.py b/examples/NAS-training-containers/cifar10/op_library.py new file mode 100644 index 00000000000..97ed0d28e89 --- /dev/null +++ b/examples/NAS-training-containers/cifar10/op_library.py @@ -0,0 +1,83 @@ +import numpy as np +from keras import backend as K +from keras.layers import Input, Conv2D, ZeroPadding2D, concatenate, MaxPooling2D, \ + AveragePooling2D, Dense, Activation, BatchNormalization, GlobalAveragePooling2D + + +def concat(inputs): + n = len(inputs) + if n == 1: + return inputs[0] + + total_dim = list() + for x in inputs: + total_dim.append(K.int_shape(x)) + total_dim = np.asarray(total_dim) + max_dim = max(total_dim[:, 1]) + + padded_input = [0 for _ in range(n)] + + for i in range(n): + if total_dim[i][1] < max_dim: + diff = max_dim - total_dim[i][1] + half_diff = int(diff / 2) + if diff % 2 == 0: + padded_input[i] = ZeroPadding2D(padding=(half_diff, half_diff))(inputs[i]) + else: + padded_input[i] = ZeroPadding2D(padding=((half_diff, half_diff + 1), + (half_diff, half_diff + 1)))(inputs[i]) + else: + padded_input[i] = inputs[i] + + result = concatenate(inputs=padded_input, axis=-1) + return result + + +def conv(x, config): + parameters = { + "num_filter": 64, + "filter_size": 3, + "stride": 1, + } + for k in parameters.keys(): + if k in config: + parameters[k] = int(config[k]) + + activated = Activation('relu')(x) + + conved = Conv2D( + filters=parameters['num_filter'], + kernel_size=parameters['filter_size'], + strides=parameters['stride'], + padding='same')(activated) + + result = BatchNormalization()(conved) + + return result + + +def reduction(x, config): + parameters = { + 'reduction_type': "max_pooling", + 'pool_size': 2, + 'stride': None, + } + if 'reduction_type' in config: + parameters['reduction_type'] = config['reduction_type'] + if 'pool_size' in config: + parameters['pool_size'] = int(config['pool_size']) + if 'stride' in config: + parameters['stride'] = int(config['stride']) + + if parameters['reduction_type'] == 'max_pooling': + result = MaxPooling2D( + pool_size=parameters['pool_size'], + strides=parameters['stride'] + )(x) + elif parameters['reduction_type'] == 'avg_pooling': + result = AveragePooling2D( + pool_size=parameters['pool_size'], + strides=parameters['stride'] + )(x) + + return result diff --git a/examples/NAS-training-containers/cifar10/requirements.txt b/examples/NAS-training-containers/cifar10/requirements.txt new file mode 100644 index 00000000000..5b44db65638 --- /dev/null +++ b/examples/NAS-training-containers/cifar10/requirements.txt @@ -0,0 +1,2 @@ +tensorflow-gpu +keras \ No newline at end of file