From eefb1e6ff31ffa512b65cf1d8ced88244d43f501 Mon Sep 17 00:00:00 2001 From: DeeperMind <1155077043@link.cuhk.edu.hk> Date: Fri, 25 Jan 2019 18:39:14 -0800 Subject: [PATCH 1/3] Training Container with Model Constructor for cifar10 --- .../cifar10/Dockerfile | 32 +++++++ .../cifar10/ModelConstructor.py | 67 +++++++++++++++ .../cifar10/RunTrial.py | 52 ++++++++++++ .../cifar10/op_library.py | 83 +++++++++++++++++++ .../cifar10/requirements.txt | 2 + 5 files changed, 236 insertions(+) create mode 100644 examples/NAS-training-containers/cifar10/Dockerfile create mode 100644 examples/NAS-training-containers/cifar10/ModelConstructor.py create mode 100644 examples/NAS-training-containers/cifar10/RunTrial.py create mode 100644 examples/NAS-training-containers/cifar10/op_library.py create mode 100644 examples/NAS-training-containers/cifar10/requirements.txt diff --git a/examples/NAS-training-containers/cifar10/Dockerfile b/examples/NAS-training-containers/cifar10/Dockerfile new file mode 100644 index 00000000000..212eb23bc02 --- /dev/null +++ b/examples/NAS-training-containers/cifar10/Dockerfile @@ -0,0 +1,32 @@ +ARG cuda_version=9.0 +ARG cudnn_version=7 +FROM nvidia/cuda:${cuda_version}-cudnn${cudnn_version}-devel + +# Install system packages +RUN apt-get update && apt-get install -y software-properties-common && \ + add-apt-repository ppa:deadsnakes/ppa && \ + apt-get update && \ + apt-get install -y --no-install-recommends \ + bzip2 \ + g++ \ + git \ + graphviz \ + libgl1-mesa-glx \ + libhdf5-dev \ + openmpi-bin \ + python3.5 \ + python3-pip \ + python3-setuptools \ + python3-dev \ + wget && \ + rm -rf /var/lib/apt/lists/* + + +ADD . /app +WORKDIR /app + +RUN pip3 install --upgrade pip +RUN pip3 install --no-cache-dir -r requirements.txt +ENV PYTHONPATH /app + +ENTRYPOINT ["python3.5", "-u", "RunTrial.py"] diff --git a/examples/NAS-training-containers/cifar10/ModelConstructor.py b/examples/NAS-training-containers/cifar10/ModelConstructor.py new file mode 100644 index 00000000000..2376b189b6c --- /dev/null +++ b/examples/NAS-training-containers/cifar10/ModelConstructor.py @@ -0,0 +1,67 @@ +import numpy as np +from keras.models import Model +from keras import backend as K +import json +from keras.layers import Input, Conv2D, ZeroPadding2D, concatenate, MaxPooling2D, \ + AveragePooling2D, Dense, Activation, BatchNormalization, GlobalAveragePooling2D, Dropout +from op_library import concat, conv, reduction + + +class ModelConstructor(object): + def __init__(self, arc_json, nn_json): + self.arch = json.loads(arc_json) + nn_config = json.loads(nn_json) + self.num_layers = nn_config['num_layers'] + self.input_size = nn_config['input_size'] + self.output_size = nn_config['output_size'][-1] + self.embedding = nn_config['embedding'] + print(">>> ModelConstructor initialized") + + def build_model(self): + # a list of the data all layers + all_layers = [0 for _ in range(self.num_layers + 1)] + # a list of all the dimensions of all layers + all_dims = [0 for _ in range(self.num_layers + 1)] + + # ================= Stacking layers ================= + # Input Layer. Layer 0 + print(">>> Input Layer") + input_layer = Input(shape=self.input_size) + all_layers[0] = input_layer + + # Intermediate Layers. Starting from layer 1. + for l in range(1, self.num_layers + 1): + print(">>> Layer {}".format(l)) + input_layers = list() + opt = self.arch[l - 1][0] + opt_config = self.embedding[str(opt)] + skip = self.arch[l - 1][1:l+1] + + # set up the connection to the previous layer first + input_layers.append(all_layers[l - 1]) + + # then add skip connections + for i in range(l - 1): + if l > 1 and skip[i] == 1: + input_layers.append(all_layers[i]) + + layer_input = concat(input_layers) + if opt_config['opt_type'] == 'convolution': + layer_output = conv(layer_input, opt_config) + elif opt_config['opt_type'] == 'reduction': + layer_output = reduction(layer_input, opt_config) + + all_layers[l] = layer_output + + # Final Layer + # Global Average Pooling, then Fully connected with softmax. + print(">>> Final Layer") + avgpooled = GlobalAveragePooling2D()(all_layers[self.num_layers]) + dropped = Dropout(0.4)(avgpooled) + logits = Dense(units=self.output_size, + activation='softmax')(dropped) + + # Encapsulate the model + self.model = Model(inputs=input_layer, outputs=logits) + + return self.model diff --git a/examples/NAS-training-containers/cifar10/RunTrial.py b/examples/NAS-training-containers/cifar10/RunTrial.py new file mode 100644 index 00000000000..3354fc7dfc9 --- /dev/null +++ b/examples/NAS-training-containers/cifar10/RunTrial.py @@ -0,0 +1,52 @@ +import keras +import numpy as np +from keras.datasets import cifar10 +from ModelConstructor import ModelConstructor +from keras.utils import to_categorical +import argparse +import time + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='TrainingContainer') + parser.add_argument('--architecture', type=str, default="", metavar='N', + help='architecture of the neural network') + parser.add_argument('--nn_config', type=str, default="", metavar='N', + help='configurations and search space embeddings') + args = parser.parse_args() + + arch = args.architecture.replace("\'", "\"") + print(">>> arch received by trial") + print(arch) + nn_config = args.nn_config.replace("\'", "\"") + print(">>> nn_config received by trial") + print(nn_config) + + constructor = ModelConstructor(arch, nn_config) + test_model = constructor.build_model() + + test_model.summary() + test_model.compile(loss=keras.losses.categorical_crossentropy, + optimizer=keras.optimizers.Adam(lr=1e-3, decay=1e-4), + metrics=['accuracy']) + + x_train, y_train, x_test, y_test = cifar10.load_data() + x_train = x_train.astype('float32') + x_test = x_test.astype('float32') + x_train /= 255 + x_test /= 255 + y_train = to_categorical(y_train) + y_test = to_categorical(y_test) + print(">>> Data Loaded.") + + print(">>> Start to train") + num_epochs = 10 + for e in range(num_epochs): + print("\nTotal Epoch {}/{}".format(e+1, num_epochs)) + history = test_model.fit(x=x_train, y=y_train, + shuffle=True, batch_size=128, + epochs=1, verbose=1, + validation_data=(x_test, y_test)) + print("Training-Accuracy={}".format(history.history['acc'][-1])) + print("Training-Loss={}".format(history.history['loss'][-1])) + print("Validation-Accuracy={}".format(history.history['val_acc'][-1])) + print("Validation-Loss={}".format(history.history['val_loss'][-1])) diff --git a/examples/NAS-training-containers/cifar10/op_library.py b/examples/NAS-training-containers/cifar10/op_library.py new file mode 100644 index 00000000000..97ed0d28e89 --- /dev/null +++ b/examples/NAS-training-containers/cifar10/op_library.py @@ -0,0 +1,83 @@ +import numpy as np +from keras import backend as K +from keras.layers import Input, Conv2D, ZeroPadding2D, concatenate, MaxPooling2D, \ + AveragePooling2D, Dense, Activation, BatchNormalization, GlobalAveragePooling2D + + +def concat(inputs): + n = len(inputs) + if n == 1: + return inputs[0] + + total_dim = list() + for x in inputs: + total_dim.append(K.int_shape(x)) + total_dim = np.asarray(total_dim) + max_dim = max(total_dim[:, 1]) + + padded_input = [0 for _ in range(n)] + + for i in range(n): + if total_dim[i][1] < max_dim: + diff = max_dim - total_dim[i][1] + half_diff = int(diff / 2) + if diff % 2 == 0: + padded_input[i] = ZeroPadding2D(padding=(half_diff, half_diff))(inputs[i]) + else: + padded_input[i] = ZeroPadding2D(padding=((half_diff, half_diff + 1), + (half_diff, half_diff + 1)))(inputs[i]) + else: + padded_input[i] = inputs[i] + + result = concatenate(inputs=padded_input, axis=-1) + return result + + +def conv(x, config): + parameters = { + "num_filter": 64, + "filter_size": 3, + "stride": 1, + } + for k in parameters.keys(): + if k in config: + parameters[k] = int(config[k]) + + activated = Activation('relu')(x) + + conved = Conv2D( + filters=parameters['num_filter'], + kernel_size=parameters['filter_size'], + strides=parameters['stride'], + padding='same')(activated) + + result = BatchNormalization()(conved) + + return result + + +def reduction(x, config): + parameters = { + 'reduction_type': "max_pooling", + 'pool_size': 2, + 'stride': None, + } + if 'reduction_type' in config: + parameters['reduction_type'] = config['reduction_type'] + if 'pool_size' in config: + parameters['pool_size'] = int(config['pool_size']) + if 'stride' in config: + parameters['stride'] = int(config['stride']) + + if parameters['reduction_type'] == 'max_pooling': + result = MaxPooling2D( + pool_size=parameters['pool_size'], + strides=parameters['stride'] + )(x) + elif parameters['reduction_type'] == 'avg_pooling': + result = AveragePooling2D( + pool_size=parameters['pool_size'], + strides=parameters['stride'] + )(x) + + return result diff --git a/examples/NAS-training-containers/cifar10/requirements.txt b/examples/NAS-training-containers/cifar10/requirements.txt new file mode 100644 index 00000000000..5b44db65638 --- /dev/null +++ b/examples/NAS-training-containers/cifar10/requirements.txt @@ -0,0 +1,2 @@ +tensorflow-gpu +keras \ No newline at end of file From 1a8e63251f981301db0564fde4566473d321bb28 Mon Sep 17 00:00:00 2001 From: DeeperMind <1155077043@link.cuhk.edu.hk> Date: Fri, 25 Jan 2019 19:08:58 -0800 Subject: [PATCH 2/3] fix a small bug --- examples/NAS-training-containers/cifar10/RunTrial.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/NAS-training-containers/cifar10/RunTrial.py b/examples/NAS-training-containers/cifar10/RunTrial.py index 3354fc7dfc9..94952cbafca 100644 --- a/examples/NAS-training-containers/cifar10/RunTrial.py +++ b/examples/NAS-training-containers/cifar10/RunTrial.py @@ -29,7 +29,7 @@ optimizer=keras.optimizers.Adam(lr=1e-3, decay=1e-4), metrics=['accuracy']) - x_train, y_train, x_test, y_test = cifar10.load_data() + (x_train, y_train), (x_test, y_test) = cifar10.load_data() x_train = x_train.astype('float32') x_test = x_test.astype('float32') x_train /= 255 From c1b9eb245fe868d4f59b5130d59040853ea82b57 Mon Sep 17 00:00:00 2001 From: DeeperMind <1155077043@link.cuhk.edu.hk> Date: Tue, 29 Jan 2019 14:18:58 -0800 Subject: [PATCH 3/3] make num_epochs a parameter --- .../cifar10/ModelConstructor.py | 4 ---- .../NAS-training-containers/cifar10/RunTrial.py | 13 ++++++++++--- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/examples/NAS-training-containers/cifar10/ModelConstructor.py b/examples/NAS-training-containers/cifar10/ModelConstructor.py index 2376b189b6c..cbc4bd7c247 100644 --- a/examples/NAS-training-containers/cifar10/ModelConstructor.py +++ b/examples/NAS-training-containers/cifar10/ModelConstructor.py @@ -15,7 +15,6 @@ def __init__(self, arc_json, nn_json): self.input_size = nn_config['input_size'] self.output_size = nn_config['output_size'][-1] self.embedding = nn_config['embedding'] - print(">>> ModelConstructor initialized") def build_model(self): # a list of the data all layers @@ -25,13 +24,11 @@ def build_model(self): # ================= Stacking layers ================= # Input Layer. Layer 0 - print(">>> Input Layer") input_layer = Input(shape=self.input_size) all_layers[0] = input_layer # Intermediate Layers. Starting from layer 1. for l in range(1, self.num_layers + 1): - print(">>> Layer {}".format(l)) input_layers = list() opt = self.arch[l - 1][0] opt_config = self.embedding[str(opt)] @@ -55,7 +52,6 @@ def build_model(self): # Final Layer # Global Average Pooling, then Fully connected with softmax. - print(">>> Final Layer") avgpooled = GlobalAveragePooling2D()(all_layers[self.num_layers]) dropped = Dropout(0.4)(avgpooled) logits = Dense(units=self.output_size, diff --git a/examples/NAS-training-containers/cifar10/RunTrial.py b/examples/NAS-training-containers/cifar10/RunTrial.py index 94952cbafca..4a111139c64 100644 --- a/examples/NAS-training-containers/cifar10/RunTrial.py +++ b/examples/NAS-training-containers/cifar10/RunTrial.py @@ -12,17 +12,26 @@ help='architecture of the neural network') parser.add_argument('--nn_config', type=str, default="", metavar='N', help='configurations and search space embeddings') + parser.add_argument('--num_epochs', type=int, default=10, metavar='N', + help='number of epoches that each child will be trained') args = parser.parse_args() arch = args.architecture.replace("\'", "\"") print(">>> arch received by trial") print(arch) + nn_config = args.nn_config.replace("\'", "\"") print(">>> nn_config received by trial") print(nn_config) + num_epochs = args.num_epochs + print(">>> num_epochs received by trial") + print(num_epochs) + + print(">>> Constructing Model...") constructor = ModelConstructor(arch, nn_config) test_model = constructor.build_model() + print(">>> Model Constructed Successfully") test_model.summary() test_model.compile(loss=keras.losses.categorical_crossentropy, @@ -36,10 +45,8 @@ x_test /= 255 y_train = to_categorical(y_train) y_test = to_categorical(y_test) - print(">>> Data Loaded.") - print(">>> Start to train") - num_epochs = 10 + print(">>> Data Loaded. Training start.") for e in range(num_epochs): print("\nTotal Epoch {}/{}".format(e+1, num_epochs)) history = test_model.fit(x=x_train, y=y_train,