diff --git a/code/chapter09_computer-vision/9.11_neural-style.ipynb b/code/chapter09_computer-vision/9.11_neural-style.ipynb new file mode 100644 index 000000000..341b21c0d --- /dev/null +++ b/code/chapter09_computer-vision/9.11_neural-style.ipynb @@ -0,0 +1,2177 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 9.11 样式迁移" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "cuda 1.1.0\n" + ] + } + ], + "source": [ + "%matplotlib inline\n", + "import time\n", + "import torch\n", + "import torch.nn.functional as F\n", + "import torchvision\n", + "import numpy as np\n", + "from PIL import Image\n", + "\n", + "import sys\n", + "sys.path.append(\"..\") \n", + "import d2lzh_pytorch as d2l\n", + "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # 均已测试\n", + "\n", + "print(device, torch.__version__)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 9.11.2 读取内容图像和样式图像" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "d2l.set_figsize()\n", + "content_img = Image.open('../../data/rainier.jpg')\n", + "d2l.plt.imshow(content_img);" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "d2l.set_figsize()\n", + "style_img = Image.open('../../data/autumn_oak.jpg')\n", + "d2l.plt.imshow(style_img);" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 9.11.3. 预处理和后处理图像" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "rgb_mean = np.array([0.485, 0.456, 0.406])\n", + "rgb_std = np.array([0.229, 0.224, 0.225])\n", + "\n", + "def preprocess(PIL_img, image_shape):\n", + " process = torchvision.transforms.Compose([\n", + " torchvision.transforms.Resize(image_shape),\n", + " torchvision.transforms.ToTensor(),\n", + " torchvision.transforms.Normalize(mean=rgb_mean, std=rgb_std)])\n", + "\n", + " return process(PIL_img).unsqueeze(dim = 0) # (batch_size, 3, H, W)\n", + "\n", + "def postprocess(img_tensor):\n", + " inv_normalize = torchvision.transforms.Normalize(\n", + " mean= -rgb_mean / rgb_std,\n", + " std= 1/rgb_std)\n", + " to_PIL_image = torchvision.transforms.ToPILImage()\n", + " return to_PIL_image(inv_normalize(img_tensor[0].cpu()).clamp(0, 1))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 9.11.4 抽取特征" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/data1/tangss/PyTorch_pretrainedmodels\r\n" + ] + } + ], + "source": [ + "!echo $TORCH_HOME # 将会把预训练好的模型下载到此处(没有输出的话默认是.cache/torch)\n", + "pretrained_net = torchvision.models.vgg19(pretrained=True, progress=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "VGG(\n", + " (features): Sequential(\n", + " (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (1): ReLU(inplace)\n", + " (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (3): ReLU(inplace)\n", + " (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n", + " (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (6): ReLU(inplace)\n", + " (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (8): ReLU(inplace)\n", + " (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n", + " (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (11): ReLU(inplace)\n", + " (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (13): ReLU(inplace)\n", + " (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (15): ReLU(inplace)\n", + " (16): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (17): ReLU(inplace)\n", + " (18): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n", + " (19): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (20): ReLU(inplace)\n", + " (21): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (22): ReLU(inplace)\n", + " (23): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (24): ReLU(inplace)\n", + " (25): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (26): ReLU(inplace)\n", + " (27): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n", + " (28): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (29): ReLU(inplace)\n", + " (30): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (31): ReLU(inplace)\n", + " (32): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (33): ReLU(inplace)\n", + " (34): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n", + " (35): ReLU(inplace)\n", + " (36): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)\n", + " )\n", + " (avgpool): AdaptiveAvgPool2d(output_size=(7, 7))\n", + " (classifier): Sequential(\n", + " (0): Linear(in_features=25088, out_features=4096, bias=True)\n", + " (1): ReLU(inplace)\n", + " (2): Dropout(p=0.5)\n", + " (3): Linear(in_features=4096, out_features=4096, bias=True)\n", + " (4): ReLU(inplace)\n", + " (5): Dropout(p=0.5)\n", + " (6): Linear(in_features=4096, out_features=1000, bias=True)\n", + " )\n", + ")" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pretrained_net" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "style_layers, content_layers = [0, 5, 10, 19, 28], [25]" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "net_list = []\n", + "for i in range(max(content_layers + style_layers) + 1):\n", + " net_list.append(pretrained_net.features[i])\n", + "net = torch.nn.Sequential(*net_list)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def extract_features(X, content_layers, style_layers):\n", + " contents = []\n", + " styles = []\n", + " for i in range(len(net)):\n", + " X = net[i](X)\n", + " if i in style_layers:\n", + " styles.append(X)\n", + " if i in content_layers:\n", + " contents.append(X)\n", + " return contents, styles" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def get_contents(image_shape, device):\n", + " content_X = preprocess(content_img, image_shape).to(device)\n", + " contents_Y, _ = extract_features(content_X, content_layers, style_layers)\n", + " return content_X, contents_Y\n", + "\n", + "def get_styles(image_shape, device):\n", + " style_X = preprocess(style_img, image_shape).to(device)\n", + " _, styles_Y = extract_features(style_X, content_layers, style_layers)\n", + " return style_X, styles_Y" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 9.11.5 定义损失函数\n", + "### 9.11.5.1 内容损失" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def content_loss(Y_hat, Y):\n", + " return F.mse_loss(Y_hat, Y)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 9.11.5.2 样式损失" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def gram(X):\n", + " num_channels, n = X.shape[1], X.shape[2] * X.shape[3]\n", + " X = X.view(num_channels, n)\n", + " return torch.matmul(X, X.t()) / (num_channels * n)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def style_loss(Y_hat, gram_Y):\n", + " return F.mse_loss(gram(Y_hat), gram_Y)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 9.11.5.3 总变差损失" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def tv_loss(Y_hat):\n", + " return 0.5 * (F.l1_loss(Y_hat[:, :, 1:, :], Y_hat[:, :, :-1, :]) + \n", + " F.l1_loss(Y_hat[:, :, :, 1:], Y_hat[:, :, :, :-1]))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 9.11.5.4 损失函数" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "content_weight, style_weight, tv_weight = 1, 1e3, 10\n", + "\n", + "def compute_loss(X, contents_Y_hat, styles_Y_hat, contents_Y, styles_Y_gram):\n", + " # 分别计算内容损失、样式损失和总变差损失\n", + " contents_l = [content_loss(Y_hat, Y) * content_weight for Y_hat, Y in zip(\n", + " contents_Y_hat, contents_Y)]\n", + " styles_l = [style_loss(Y_hat, Y) * style_weight for Y_hat, Y in zip(\n", + " styles_Y_hat, styles_Y_gram)]\n", + " tv_l = tv_loss(X) * tv_weight\n", + " # 对所有损失求和\n", + " l = sum(styles_l) + sum(contents_l) + tv_l\n", + " return contents_l, styles_l, tv_l, l" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 9.11.6 创建和初始化合成图像" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "class GeneratedImage(torch.nn.Module):\n", + " def __init__(self, img_shape):\n", + " super(GeneratedImage, self).__init__()\n", + " self.weight = torch.nn.Parameter(torch.rand(*img_shape))\n", + "\n", + " def forward(self):\n", + " return self.weight" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def get_inits(X, device, lr, styles_Y):\n", + " gen_img = GeneratedImage(X.shape).to(device)\n", + " gen_img.weight.data = X.data\n", + " optimizer = torch.optim.Adam(gen_img.parameters(), lr=lr)\n", + " styles_Y_gram = [gram(Y) for Y in styles_Y]\n", + " return gen_img(), styles_Y_gram, optimizer" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 9.11.7 训练" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def train(X, contents_Y, styles_Y, device, lr, max_epochs, lr_decay_epoch):\n", + " print(\"training on \", device)\n", + " X, styles_Y_gram, optimizer = get_inits(X, device, lr, styles_Y)\n", + " scheduler = torch.optim.lr_scheduler.StepLR(optimizer, lr_decay_epoch, gamma=0.1)\n", + " for i in range(max_epochs):\n", + " start = time.time()\n", + " \n", + " contents_Y_hat, styles_Y_hat = extract_features(\n", + " X, content_layers, style_layers)\n", + " contents_l, styles_l, tv_l, l = compute_loss(\n", + " X, contents_Y_hat, styles_Y_hat, contents_Y, styles_Y_gram)\n", + " \n", + " optimizer.zero_grad()\n", + " l.backward(retain_graph = True)\n", + " optimizer.step()\n", + " scheduler.step()\n", + " \n", + " if i % 50 == 0 and i != 0:\n", + " print('epoch %3d, content loss %.2f, style loss %.2f, '\n", + " 'TV loss %.2f, %.2f sec'\n", + " % (i, sum(contents_l).item(), sum(styles_l).item(), tv_l.item(),\n", + " time.time() - start))\n", + " return X.detach()" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "training on cuda\n", + "epoch 50, content loss 0.24, style loss 1.11, TV loss 1.33, 0.07 sec\n", + "epoch 100, content loss 0.24, style loss 0.81, TV loss 1.20, 0.07 sec\n", + "epoch 150, content loss 0.24, style loss 0.72, TV loss 1.12, 0.07 sec\n", + "epoch 200, content loss 0.24, style loss 0.68, TV loss 1.06, 0.07 sec\n", + "epoch 250, content loss 0.23, style loss 0.68, TV loss 1.05, 0.07 sec\n", + "epoch 300, content loss 0.23, style loss 0.67, TV loss 1.04, 0.07 sec\n", + "epoch 350, content loss 0.23, style loss 0.67, TV loss 1.04, 0.07 sec\n", + "epoch 400, content loss 0.23, style loss 0.67, TV loss 1.03, 0.07 sec\n", + "epoch 450, content loss 0.23, style loss 0.67, TV loss 1.03, 0.07 sec\n" + ] + } + ], + "source": [ + "image_shape = (150, 225)\n", + "net = net.to(device)\n", + "content_X, contents_Y = get_contents(image_shape, device)\n", + "style_X, styles_Y = get_styles(image_shape, device)\n", + "output = train(content_X, contents_Y, styles_Y, device, 0.01, 500, 200)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "d2l.plt.imshow(postprocess(output));" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "training on cuda\n", + "epoch 50, content loss 0.34, style loss 0.63, TV loss 0.79, 0.18 sec\n", + "epoch 100, content loss 0.30, style loss 0.50, TV loss 0.74, 0.18 sec\n", + "epoch 150, content loss 0.29, style loss 0.46, TV loss 0.72, 0.18 sec\n", + "epoch 200, content loss 0.28, style loss 0.43, TV loss 0.70, 0.18 sec\n", + "epoch 250, content loss 0.28, style loss 0.43, TV loss 0.69, 0.18 sec\n", + "epoch 300, content loss 0.27, style loss 0.42, TV loss 0.69, 0.18 sec\n", + "epoch 350, content loss 0.27, style loss 0.42, TV loss 0.69, 0.18 sec\n", + "epoch 400, content loss 0.27, style loss 0.42, TV loss 0.69, 0.18 sec\n", + "epoch 450, content loss 0.27, style loss 0.42, TV loss 0.69, 0.18 sec\n" + ] + } + ], + "source": [ + "image_shape = (300, 450)\n", + "_, content_Y = get_contents(image_shape, device)\n", + "_, style_Y = get_styles(image_shape, device)\n", + "X = preprocess(postprocess(output), image_shape).to(device)\n", + "big_output = train(X, content_Y, style_Y, device, 0.01, 500, 200)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "d2l.set_figsize((7, 5))\n", + "d2l.plt.imshow(postprocess(big_output));" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python [default]", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}