From 594773f817e1dc306860be661358c335a44737f1 Mon Sep 17 00:00:00 2001 From: Bing Xu Date: Wed, 23 Sep 2015 23:35:17 -0600 Subject: [PATCH] [NDArray] add clip op --- example/notebooks/alexnet.ipynb | 579 --------------------------- example/notebooks/cifar-recipe.ipynb | 57 +-- python/mxnet/__init__.py | 2 + python/mxnet/ndarray.py | 13 + python/mxnet/optimizer.py | 13 +- src/ndarray/ndarray.cc | 3 +- src/ndarray/ndarray_function-inl.h | 2 + src/ndarray/ndarray_function.h | 10 + 8 files changed, 70 insertions(+), 609 deletions(-) delete mode 100644 example/notebooks/alexnet.ipynb diff --git a/example/notebooks/alexnet.ipynb b/example/notebooks/alexnet.ipynb deleted file mode 100644 index e6f2ad94e296..000000000000 --- a/example/notebooks/alexnet.ipynb +++ /dev/null @@ -1,579 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Basic AlexNet Example\n", - "--------\n", - "\n", - "This notebook shows how to use MXNet construct AlexNet. AlexNet is made by Alex Krizhevsky in 2012.\n", - "\n", - "We will show how to train AlexNet in Python with single/multi GPU. All you need is to write a piece of Python code to describe network, then MXNet will help you finish all work without any of your effort. \n", - "\n", - "Notice: This notebook is a basic demo to show MXNet flavor. To train a full state-of-art network, please refer our ```Inception``` example.\n", - "\n", - "Generally, we need \n", - "\n", - "- Declare symbol network\n", - "- Declare data iterator\n", - "- Bind symbol network to device to model\n", - "- Fit the model" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "import mxnet as mx" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now we have successully load MXNet. we will start declare a symbolic network. " - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "input_data = mx.symbol.Variable(name=\"data\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We use a special symbol ```Variable``` to represent input data." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "# stage 1\n", - "conv1 = mx.symbol.Convolution(data=input_data, kernel=(11, 11), stride=(4, 4), num_filter=96)\n", - "relu1 = mx.symbol.Activation(data=conv1, act_type=\"relu\")\n", - "pool1 = mx.symbol.Pooling(data=relu1, pool_type=\"max\", kernel=(3, 3), stride=(2,2))\n", - "lrn1 = mx.symbol.LRN(data=pool1, alpha=0.0001, beta=0.75, knorm=1, nsize=5)\n", - "# stage 2\n", - "conv2 = mx.symbol.Convolution(data=lrn1, kernel=(5, 5), pad=(2, 2), num_filter=256)\n", - "relu2 = mx.symbol.Activation(data=conv2, act_type=\"relu\")\n", - "pool2 = mx.symbol.Pooling(data=relu2, kernel=(3, 3), stride=(2, 2))\n", - "lrn2 = mx.symbol.LRN(data=pool2, alpha=0.0001, beta=0.75, knorm=1, nsize=5)\n", - "# stage 3\n", - "conv3 = mx.symbol.Convolution(data=lrn2, kernel=(3, 3), pad=(1, 1), num_filter=384)\n", - "relu3 = mx.symbol.Activation(data=conv3, act_type=\"relu\")\n", - "conv4 = mx.symbol.Convolution(data=relu3, kernel=(3, 3), pad=(1, 1), num_filter=384)\n", - "relu4 = mx.symbol.Activation(data=conv4, act_type=\"relu\")\n", - "conv5 = mx.symbol.Convolution(data=relu4, kernel=(3, 3), pad=(1, 1), num_filter=256)\n", - "relu5 = mx.symbol.Activation(data=conv5, act_type=\"relu\")\n", - "pool3 = mx.symbol.Pooling(data=relu5, kernel=(3, 3), stride=(2, 2))\n", - "# stage 4\n", - "flatten = mx.symbol.Flatten(data=pool3)\n", - "fc1 = mx.symbol.FullyConnected(data=flatten, num_hidden=4096)\n", - "relu6 = mx.symbol.Activation(data=fc1, act_type=\"relu\")\n", - "dropout1 = mx.symbol.Dropout(data=relu6, p=0.5)\n", - "# stage 5\n", - "fc2 = mx.symbol.FullyConnected(data=dropout1, num_hidden=4096)\n", - "relu7 = mx.symbol.Activation(data=fc2, act_type=\"relu\")\n", - "dropout2 = mx.symbol.Dropout(data=relu7, p=0.5)\n", - "# stage 6\n", - "fc3 = mx.symbol.FullyConnected(data=dropout2, num_hidden=1000)\n", - "softmax = mx.symbol.Softmax(data=fc3)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now we have a AlexNet in symbolic level. The ```softmax``` symbol contains all network structures. By indicate ```data``` for each symbol, the last symbol composite all info we need. We can visualize our network structure. (require ```graphviz``` package)" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "data": { - "image/svg+xml": [ - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "AlexNet\n", - "\n", - "\n", - "null_0\n", - "\n", - "data\n", - "\n", - "\n", - "Convolution_3\n", - "\n", - "Convolution\n", - "11x11/4, 96\n", - "\n", - "\n", - "Convolution_3->null_0\n", - "\n", - "\n", - "\n", - "\n", - "Activation_4\n", - "\n", - "Activation\n", - "relu\n", - "\n", - "\n", - "Activation_4->Convolution_3\n", - "\n", - "\n", - "\n", - "\n", - "Pooling_5\n", - "\n", - "Pooling\n", - "max, 3x3/2\n", - "\n", - "\n", - "Pooling_5->Activation_4\n", - "\n", - "\n", - "\n", - "\n", - "LRN_6\n", - "\n", - "LRN\n", - "\n", - "\n", - "LRN_6->Pooling_5\n", - "\n", - "\n", - "\n", - "\n", - "Convolution_9\n", - "\n", - "Convolution\n", - "5x5/1, 256\n", - "\n", - "\n", - "Convolution_9->LRN_6\n", - "\n", - "\n", - "\n", - "\n", - "Activation_10\n", - "\n", - "Activation\n", - "relu\n", - "\n", - "\n", - "Activation_10->Convolution_9\n", - "\n", - "\n", - "\n", - "\n", - "Pooling_11\n", - "\n", - "Pooling\n", - "max, 3x3/2\n", - "\n", - "\n", - "Pooling_11->Activation_10\n", - "\n", - "\n", - "\n", - "\n", - "LRN_12\n", - "\n", - "LRN\n", - "\n", - "\n", - "LRN_12->Pooling_11\n", - "\n", - "\n", - "\n", - "\n", - "Convolution_15\n", - "\n", - "Convolution\n", - "3x3/1, 384\n", - "\n", - "\n", - "Convolution_15->LRN_12\n", - "\n", - "\n", - "\n", - "\n", - "Activation_16\n", - "\n", - "Activation\n", - "relu\n", - "\n", - "\n", - "Activation_16->Convolution_15\n", - "\n", - "\n", - "\n", - "\n", - "Convolution_19\n", - "\n", - "Convolution\n", - "3x3/1, 384\n", - "\n", - "\n", - "Convolution_19->Activation_16\n", - "\n", - "\n", - "\n", - "\n", - "Activation_20\n", - "\n", - "Activation\n", - "relu\n", - "\n", - "\n", - "Activation_20->Convolution_19\n", - "\n", - "\n", - "\n", - "\n", - "Convolution_23\n", - "\n", - "Convolution\n", - "3x3/1, 256\n", - "\n", - "\n", - "Convolution_23->Activation_20\n", - "\n", - "\n", - "\n", - "\n", - "Activation_24\n", - "\n", - "Activation\n", - "relu\n", - "\n", - "\n", - "Activation_24->Convolution_23\n", - "\n", - "\n", - "\n", - "\n", - "Pooling_25\n", - "\n", - "Pooling\n", - "max, 3x3/2\n", - "\n", - "\n", - "Pooling_25->Activation_24\n", - "\n", - "\n", - "\n", - "\n", - "Flatten_26\n", - "\n", - "Flatten\n", - "\n", - "\n", - "Flatten_26->Pooling_25\n", - "\n", - "\n", - "\n", - "\n", - "FullyConnected_29\n", - "\n", - "FullyConnected\n", - "4096\n", - "\n", - "\n", - "FullyConnected_29->Flatten_26\n", - "\n", - "\n", - "\n", - "\n", - "Activation_30\n", - "\n", - "Activation\n", - "relu\n", - "\n", - "\n", - "Activation_30->FullyConnected_29\n", - "\n", - "\n", - "\n", - "\n", - "Dropout_31\n", - "\n", - "Dropout\n", - "\n", - "\n", - "Dropout_31->Activation_30\n", - "\n", - "\n", - "\n", - "\n", - "FullyConnected_34\n", - "\n", - "FullyConnected\n", - "4096\n", - "\n", - "\n", - "FullyConnected_34->Dropout_31\n", - "\n", - "\n", - "\n", - "\n", - "Activation_35\n", - "\n", - "Activation\n", - "relu\n", - "\n", - "\n", - "Activation_35->FullyConnected_34\n", - "\n", - "\n", - "\n", - "\n", - "Dropout_36\n", - "\n", - "Dropout\n", - "\n", - "\n", - "Dropout_36->Activation_35\n", - "\n", - "\n", - "\n", - "\n", - "FullyConnected_39\n", - "\n", - "FullyConnected\n", - "1000\n", - "\n", - "\n", - "FullyConnected_39->Dropout_36\n", - "\n", - "\n", - "\n", - "\n", - "Softmax_41\n", - "\n", - "Softmax\n", - "\n", - "\n", - "Softmax_41->FullyConnected_39\n", - "\n", - "\n", - "\n", - "\n", - "\n" - ], - "text/plain": [ - "" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "mx.viz.plot_network(softmax)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": true - }, - "source": [ - "The next step is declare data iterator. We provide high perfomance RecordIO image iterator for ImageNet task. Please pack the images into record file before use. For how to pack image and more details about image data iterator and build-in io iterator, please read [io doc](https://github.com/dmlc/mxnet/blob/master/doc/python/io.md)" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# We set batch size for to 256\n", - "batch_size = 256\n", - "# We need to set correct path to image record file\n", - "# For ```mean_image```. if it doesn't exist, the iterator will generate one\n", - "# On HDD, single thread is able to process 800 images / sec\n", - "# the input shape is in format (channel, height, width)\n", - "# rand_crop option make source image randomly cropped to input_shape (3, 224, 224)\n", - "# rand_mirror option make source image randomly mirrored\n", - "# We use 2 threads to processing our data\n", - "train_dataiter = mx.io.ImageRecordIter(\n", - " shuffle=True,\n", - " path_imgrec=\"./Data/ImageNet/train.rec\",\n", - " mean_img=\"./Data/ImageNet/mean_224.bin\",\n", - " rand_crop=True,\n", - " rand_mirror=True,\n", - " data_shape=(3, 224, 224),\n", - " batch_size=batch_size,\n", - " prefetch_buffer=4,\n", - " preprocess_threads=2)\n", - "# similarly, we can declare our validation iterator\n", - "val_dataiter = mx.io.ImageRecordIter(\n", - " path_imgrec=\"./Data/ImageNet/val.rec\",\n", - " mean_img=\"./Data/ImageNet/mean_224.bin\",\n", - " rand_crop=False,\n", - " rand_mirror=False,\n", - " data_shape=(3, 224, 224),\n", - " batch_size=batch_size,\n", - " prefetch_buffer=4,\n", - " preprocess_threads=2)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Next step, we will initialize our model from symbol. To run on a single GPU, we need to declare:" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "# For demo purpose, we just run 1 epoch\n", - "num_round = 1\n", - "# set context to GPU, if you want to use cpu, set it to mx.cpu()\n", - "ctx = mx.gpu() \n", - "# note: for input shape in model, we must contain batch size\n", - "data_shape = (batch_size, 3, 224, 224)\n", - "\n", - "model = mx.model.FeedForward(symbol=softmax, ctx=ctx, input_shape=data_shape, num_round=num_round,\n", - " learning_rate=0.01, momentum=0.9, wd=0.0001)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To run on multiply GPU, we need to declare" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "# For demo purpose, we just run 1 epoch\n", - "num_round = 1\n", - "# Assume we have 4 GPU, we can make a context list contains 4 device\n", - "num_devs = 4\n", - "ctx = [mx.gpu(i) for i in range(num_devs)]\n", - "# note: for input shape in model, we must contain batch size\n", - "data_shape = (batch_size, 3, 224, 224)\n", - "\n", - "model = mx.model.FeedForward(symbol=softmax, ctx=ctx, input_shape=data_shape, num_round=num_round,\n", - " learning_rate=0.01, momentum=0.9, wd=0.0001)" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "ename": "MXNetError", - "evalue": "[12:00:28] src/ndarray/ndarray.cc:157: Check failed: from.shape() == to->shape() operands shape mismatch", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mMXNetError\u001b[0m Traceback (most recent call last)", - "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[1;31m# In this case, eval_data is also a data iterator\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4\u001b[0m \u001b[1;31m# We will use accuracy to measure our model's performace\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 5\u001b[1;33m \u001b[0mmodel\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mX\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mtrain_dataiter\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0meval_data\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mval_dataiter\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0meval_metric\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'acc'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mverbose\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mTrue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[1;32m/home/bing/wtf/mxnet/python/mxnet/model.py\u001b[0m in \u001b[0;36mfit\u001b[1;34m(self, X, y, eval_data, eval_metric, verbose)\u001b[0m\n\u001b[0;32m 304\u001b[0m \u001b[0mtrain_data\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mX\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0meval_data\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0meval_data\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 305\u001b[0m \u001b[0meval_metric\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0meval_metric\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 306\u001b[1;33m verbose=verbose)\n\u001b[0m", - "\u001b[1;32m/home/bing/wtf/mxnet/python/mxnet/model.py\u001b[0m in \u001b[0;36m_train\u001b[1;34m(symbol, ctx, input_shape, arg_params, aux_params, begin_round, end_round, optimizer, train_data, eval_data, eval_metric, iter_end_callback, verbose)\u001b[0m\n\u001b[0;32m 85\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mkey\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mweight\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mlist\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mzip\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0marg_names\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0marg_arrays\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 86\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mkey\u001b[0m \u001b[1;32min\u001b[0m \u001b[0marg_params\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 87\u001b[1;33m \u001b[0marg_params\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcopyto\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mweight\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 88\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mkey\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mweight\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mlist\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mzip\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0maux_names\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maux_arrays\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 89\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mkey\u001b[0m \u001b[1;32min\u001b[0m \u001b[0maux_params\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", - "\u001b[1;32m/home/bing/wtf/mxnet/python/mxnet/ndarray.py\u001b[0m in \u001b[0;36mcopyto\u001b[1;34m(self, other)\u001b[0m\n\u001b[0;32m 306\u001b[0m RuntimeWarning)\n\u001b[0;32m 307\u001b[0m \u001b[1;32mreturn\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 308\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mNDArray\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_copyto\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mout\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mother\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 309\u001b[0m \u001b[1;32melif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mother\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mContext\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 310\u001b[0m \u001b[0mhret\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mNDArray\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0m_new_alloc_handle\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mother\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;32mTrue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", - "\u001b[1;32m/home/bing/wtf/mxnet/python/mxnet/ndarray.py\u001b[0m in \u001b[0;36mgeneric_ndarray_function\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 618\u001b[0m \u001b[0mc_array\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mNDArrayHandle\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m[\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mi\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mhandle\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[1;32min\u001b[0m \u001b[0muse_vars_range\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m\\\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 619\u001b[0m \u001b[0mc_array\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmx_float\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m[\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mi\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mscalar_range\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m\\\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 620\u001b[1;33m c_array(NDArrayHandle, [v.handle for v in mutate_vars])))\n\u001b[0m\u001b[0;32m 621\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mn_mutate_vars\u001b[0m \u001b[1;33m==\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 622\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mmutate_vars\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", - "\u001b[1;32m/home/bing/wtf/mxnet/python/mxnet/base.py\u001b[0m in \u001b[0;36mcheck_call\u001b[1;34m(ret)\u001b[0m\n\u001b[0;32m 95\u001b[0m \"\"\"\n\u001b[0;32m 96\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mret\u001b[0m \u001b[1;33m!=\u001b[0m \u001b[1;36m0\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 97\u001b[1;33m \u001b[1;32mraise\u001b[0m \u001b[0mMXNetError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mpy_str\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0m_LIB\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mMXGetLastError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 98\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 99\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mc_str\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mstring\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", - "\u001b[1;31mMXNetError\u001b[0m: [12:00:28] src/ndarray/ndarray.cc:157: Check failed: from.shape() == to->shape() operands shape mismatch" - ] - } - ], - "source": [ - "# Now we can fit the model with data iterators\n", - "# When we use data iterator, we don't need to set y because label comes from data iterator directly\n", - "# In this case, eval_data is also a data iterator\n", - "# We will use accuracy to measure our model's performace\n", - "model.fit(X=train_dataiter, eval_data=val_dataiter, eval_metric='acc')\n", - "# You need to wait for a while to get the result" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "That's all!" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.4.2" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} diff --git a/example/notebooks/cifar-recipe.ipynb b/example/notebooks/cifar-recipe.ipynb index fccdfcb47e43..05097b026042 100644 --- a/example/notebooks/cifar-recipe.ipynb +++ b/example/notebooks/cifar-recipe.ipynb @@ -237,7 +237,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 10, "metadata": { "collapsed": false }, @@ -247,16 +247,16 @@ "output_type": "stream", "text": [ "INFO:root:Start training with [gpu(0)]\n", - "INFO:root:Batch [50]\tSpeed: 1091.84 samples/sec\n", - "INFO:root:Batch [100]\tSpeed: 1084.80 samples/sec\n", - "INFO:root:Batch [150]\tSpeed: 1084.55 samples/sec\n", - "INFO:root:Batch [200]\tSpeed: 1077.30 samples/sec\n", - "INFO:root:Batch [250]\tSpeed: 1074.73 samples/sec\n", - "INFO:root:Batch [300]\tSpeed: 1075.67 samples/sec\n", - "INFO:root:Batch [350]\tSpeed: 1067.09 samples/sec\n", - "INFO:root:Iteration[0] Train-accuracy=0.525695\n", - "INFO:root:Iteration[0] Time cost=47.012\n", - "INFO:root:Iteration[0] Validation-accuracy=0.660008\n" + "INFO:root:Batch [50]\tSpeed: 1003.50 samples/sec\n", + "INFO:root:Batch [100]\tSpeed: 976.31 samples/sec\n", + "INFO:root:Batch [150]\tSpeed: 975.57 samples/sec\n", + "INFO:root:Batch [200]\tSpeed: 964.21 samples/sec\n", + "INFO:root:Batch [250]\tSpeed: 963.53 samples/sec\n", + "INFO:root:Batch [300]\tSpeed: 963.95 samples/sec\n", + "INFO:root:Batch [350]\tSpeed: 963.71 samples/sec\n", + "INFO:root:Iteration[0] Train-accuracy=0.520520\n", + "INFO:root:Iteration[0] Time cost=52.424\n", + "INFO:root:Iteration[0] Validation-accuracy=0.652393\n" ] } ], @@ -272,14 +272,14 @@ "# eval_data=test_dataiter,\n", "# eval_metric=\"accuracy\",\n", "# epoch_end_callback=mx.helper.Speedometer(batch_size),\n", - "# iter_end_callback=mx.model.do_checkpoint(model_prefix))\n" + "# iter_end_callback=mx.callback.do_checkpoint(model_prefix))\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "After only 1 epoch, our model is able to acheive about 66% accuracy on testset.\n", + "After only 1 epoch, our model is able to acheive about 65% accuracy on testset.\n", "We can save our model by calling either ```save``` or using ```pickle```.\n" ] }, @@ -348,7 +348,7 @@ "output_type": "stream", "text": [ "INFO:root:Finish predict...\n", - "INFO:root:final accuracy = 0.651000\n" + "INFO:root:final accuracy = 0.652600\n" ] } ], @@ -385,33 +385,36 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 14, "metadata": { "collapsed": false }, "outputs": [ { - "ename": "TypeError", - "evalue": "Symbol only support integer index to fetch i-th output", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)", - "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[0;32m 2\u001b[0m \u001b[0minternals\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0msoftmax\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_internals\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 4\u001b[1;33m \u001b[0mfea_symbol\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0minternals\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m\"global_avg_output\"\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 5\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 6\u001b[0m feature_extractor = mx.model.FeedForward(ctx=mx.gpu(), symbol=group, \n", - "\u001b[1;32m/home/bing/wtf/mxnet/python/mxnet/symbol.py\u001b[0m in \u001b[0;36m__getitem__\u001b[1;34m(self, index)\u001b[0m\n\u001b[0;32m 156\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m__getitem__\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mindex\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 157\u001b[0m \u001b[1;32mif\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0misinstance\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mindex\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mint\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 158\u001b[1;33m \u001b[1;32mraise\u001b[0m \u001b[0mTypeError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'Symbol only support integer index to fetch i-th output'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 159\u001b[0m \u001b[0mhandle\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mSymbolHandle\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 160\u001b[0m check_call(_LIB.MXSymbolGetOutput(\n", - "\u001b[1;31mTypeError\u001b[0m: Symbol only support integer index to fetch i-th output" + "name": "stdout", + "output_type": "stream", + "text": [ + "(10000, 336, 1, 1)\n" ] } ], "source": [ - "# predict internal featuremaps\n", + "# Predict internal featuremaps\n", + "# From a symbol, we are able to get all internals. Note it is still a symbol\n", "internals = softmax.get_internals()\n", - "\n", + "# We get get an internal symbol for the feature.\n", + "# By default, the symbol is named as \"symbol_name + _output\"\n", + "# in this case we'd like to get global_avg\" layer's output as feature, so its \"global_avg_output\"\n", + "# You may call ```internals.list_outputs()``` to find the target\n", + "# but we strongly suggests set a special name for special symbol \n", "fea_symbol = internals[\"global_avg_output\"]\n", "\n", - "feature_extractor = mx.model.FeedForward(ctx=mx.gpu(), symbol=group, \n", + "# Make a new model by using an internal symbol. We can reuse all parameters from model we trained before\n", + "# In this case, we must set ```allow_extra_params``` to True\n", + "feature_extractor = mx.model.FeedForward(ctx=mx.gpu(), symbol=fea_symbol, \n", " arg_params=model.arg_params, aux_params=model.aux_params,\n", " allow_extra_params=True)\n", + "# Predict as normal\n", "global_pooling_feature = feature_extractor.predict(test_dataiter)\n", "print(global_pooling_feature.shape)" ] diff --git a/python/mxnet/__init__.py b/python/mxnet/__init__.py index 7bca6efbb46d..b87b9dad924c 100644 --- a/python/mxnet/__init__.py +++ b/python/mxnet/__init__.py @@ -22,6 +22,8 @@ from . import optimizer from . import model from . import initializer +# use mx.init as short for mx.initializer +from . import initializer as init from . import visualization # use viz as short for mx.ndarray from . import visualization as viz diff --git a/python/mxnet/ndarray.py b/python/mxnet/ndarray.py index 2e9659bfaf2c..150210fc09e3 100644 --- a/python/mxnet/ndarray.py +++ b/python/mxnet/ndarray.py @@ -243,6 +243,16 @@ def _slice(self, start, stop): self.handle, start, stop, ctypes.byref(handle))) return NDArray(handle=handle, writable=self.writable) + def clip(self, value): + """Clip NDArray to range [-value, value] and remove NaN + + Parameters + ---------- + value: float + cliped range + """ + return NDArray._clip_scalar(self, float(value)) + def wait_to_read(self): """Block until all pending writes operations on current NDArray are finished. @@ -636,6 +646,9 @@ def generic_ndarray_function(*args, **kwargs): ret_function.__name__ = func_name ret_function.__doc__ = doc_str return ret_function + + + # pylint: enable=too-many-locals, invalid-name def _init_ndarray_module(): diff --git a/python/mxnet/optimizer.py b/python/mxnet/optimizer.py index 5dc444e21620..6c317f493d02 100644 --- a/python/mxnet/optimizer.py +++ b/python/mxnet/optimizer.py @@ -34,14 +34,19 @@ class SGD(Optimizer): rescale_grad : float, optional rescaling factor of gradient. + + clip_gradient : float, optional + clip gradient in range [-clip_gradient, clip_gradient] """ def __init__(self, learning_rate=0.01, momentum=0.0, - wd=0.0001, rescale_grad=1, lr_scheduler=None): + wd=0.0001, rescale_grad=1, clip_gradient=None, + lr_scheduler=None): super(SGD, self).__init__() self.lr = learning_rate self.momentum = momentum self.wd = wd self.rescale_grad = rescale_grad + self.clip_gradient = clip_gradient self.lr_scheduler = lr_scheduler if lr_scheduler != None: self.lr_scheduler.base_lr = learning_rate @@ -89,7 +94,11 @@ def update(self, index, weight, grad, state): if state: mom = state mom[:] *= self.momentum - mom[:] += -lr * (grad * self.rescale_grad + self.wd * weight) + if self.clip_gradient == None: + mom[:] += -lr * (grad * self.rescale_grad + self.wd * weight) + else: + mom[:] += -lr * (grad.clip(self.clip_gradient) * self.rescale_grad + + self.wd * weight) weight[:] += mom else: assert self.momentum == 0.0 diff --git a/src/ndarray/ndarray.cc b/src/ndarray/ndarray.cc index 210d4b7926f3..335baef17198 100644 --- a/src/ndarray/ndarray.cc +++ b/src/ndarray/ndarray.cc @@ -315,6 +315,7 @@ NDArray operator*(const NDArray &lhs, const real_t &rhs) { NDArray operator/(const NDArray &lhs, const real_t &rhs) { return ScalarOpRet(lhs, rhs); } + // Binary NDArray &NDArray::operator=(real_t scalar) { SetValueOp(scalar, this); @@ -510,7 +511,7 @@ MXNET_REGISTER_NDARRAY_FUN(_plus_scalar).set_function(ScalarOp); MXNET_REGISTER_NDARRAY_FUN(_mul_scalar).set_function(ScalarOp); MXNET_REGISTER_NDARRAY_FUN(_div_scalar).set_function(ScalarOp); - +MXNET_REGISTER_NDARRAY_FUN(_clip_scalar).set_function(ScalarOp); // register API function // scalar, reverse scalar MXNET_REGISTER_NDARRAY_FUN(_rminus_scalar).set_function(ScalarOp); diff --git a/src/ndarray/ndarray_function-inl.h b/src/ndarray/ndarray_function-inl.h index 34a81af1bb39..604173c9e68a 100644 --- a/src/ndarray/ndarray_function-inl.h +++ b/src/ndarray/ndarray_function-inl.h @@ -100,11 +100,13 @@ DECL_SCALAR(DEVICE, Plus, EvalScalar_, true) DECL_SCALAR(DEVICE, Minus, EvalScalar_, true) DECL_SCALAR(DEVICE, Mul, EvalScalar_, true) DECL_SCALAR(DEVICE, Div, EvalScalar_, true) +DECL_SCALAR(DEVICE, Clip, EvalScalar_, true) // for reverse seq DECL_SCALAR(DEVICE, Plus, EvalScalar_, false) DECL_SCALAR(DEVICE, Minus, EvalScalar_, false) DECL_SCALAR(DEVICE, Mul, EvalScalar_, false) DECL_SCALAR(DEVICE, Div, EvalScalar_, false) +DECL_SCALAR(DEVICE, Clip, EvalScalar_, false) } // namespace ndarray } // namespace mxnet diff --git a/src/ndarray/ndarray_function.h b/src/ndarray/ndarray_function.h index a54766c75002..e7a405b2b115 100644 --- a/src/ndarray/ndarray_function.h +++ b/src/ndarray/ndarray_function.h @@ -37,6 +37,16 @@ struct Div : public BinaryBase { typedef mshadow::op::div mshadow_op; }; +struct Clip : public BinaryBase { + struct mshadow_op { + MSHADOW_XINLINE static real_t Map(real_t a, real_t b) { + if (isnan(a)) return 0.0f; + if (a < -b) return -b; + if (a > b) return b; + return a; + } + }; +}; // type holder for random number generators struct UniformDistribution {};