Added project files

4f969d01 · PRASHANT SAROJ · 4f969d01 · 4f969d01 · 4f969d01 · 4f969d01
Commit 4f969d01 authored Jul 31, 2019 by PRASHANT SAROJ
5 changed files
--- a/Main.py
+++ b/Main.py
+import os
+import sys
+import matplotlib.pyplot as plt
+from matplotlib.pyplot import imshow, imread
+from PIL import Image
+from nst_utils import load_vgg_model, reshape_and_normalize_image, save_image, generate_noise_image
+import tensorflow as tf
+
+# load model
+model = load_vgg_model("pretrained-model/imagenet-vgg-verydeep-19.mat")
+
+# input image
+
+
+def compute_content_cost(a_C, a_G):
+    """
+    Computes the content cost
+
+    Arguments:
+    a_C -- tensor of dimension (1, n_H, n_W, n_C), hidden layer activations representing content of the image C 
+    a_G -- tensor of dimension (1, n_H, n_W, n_C), hidden layer activations representing content of the image G
+
+    Returns: 
+    J_content 
+    """
+    # Retrieve dimensions from a_G (≈1 line)
+    _, n_H, n_W, n_C = a_G.get_shape().as_list()
+
+    # Reshape a_C and a_G (≈2 lines)
+    a_C_unrolled = tf.transpose(tf.reshape(a_C, [n_H*n_W, n_C]))
+    a_G_unrolled = tf.transpose(tf.reshape(a_G, [n_H*n_W, n_C]))
+
+    # compute the cost with tensorflow (≈1 line)
+    J_content = tf.reduce_sum(tf.square(tf.subtract(
+        a_C_unrolled, a_G_unrolled)))/(4*n_H*n_W*n_C)
+
+    return J_content
+
+
+def gram_matrix(A):
+    """
+    Argument:
+    A -- matrix of shape (n_C, n_H*n_W)
+
+    Returns:
+    GA -- Gram matrix of A, of shape (n_C, n_C)
+    """
+
+    GA = tf.matmul(A, tf.transpose(A))
+
+    return GA
+
+
+def compute_layer_style_cost(a_S, a_G):
+    """
+    Arguments:
+    a_S -- tensor of dimension (1, n_H, n_W, n_C), hidden layer activations representing style of the image S 
+    a_G -- tensor of dimension (1, n_H, n_W, n_C), hidden layer activations representing style of the image G
+
+    Returns: 
+    J_style_layer -- tensor representing a scalar value, style cost defined above by equation (2)
+    """
+
+    _, n_H, n_W, n_C = a_G.get_shape().as_list()
+
+    # Reshape the images to have them of shape (n_C, n_H*n_W) (≈2 lines)
+    a_S = tf.transpose(tf.reshape(a_S, [n_H*n_W, n_C]))
+    a_G = tf.transpose(tf.reshape(a_G, [n_H*n_W, n_C]))
+
+    # Computing gram_matrices for both images S and G (≈2 lines)
+    GS = gram_matrix(a_S)
+    GG = gram_matrix(a_G)
+
+    # Computing the loss (≈1 line)
+    J_style_layer = tf.reduce_sum(
+        tf.square(tf.subtract(GS, GG)))/(2*n_H*n_C*n_W)**2
+
+    return J_style_layer
+
+
+STYLE_LAYERS = [
+    ('conv1_1', 0.2),
+    ('conv2_1', 0.2),
+    ('conv3_1', 0.2),
+    ('conv4_1', 0.2),
+    ('conv5_1', 0.2)]
+
+
+def compute_style_cost(model, STYLE_LAYERS):
+    """
+    Computes the overall style cost from several chosen layers
+
+    Arguments:
+    model -- our tensorflow model
+    STYLE_LAYERS -- A python list containing:
+                        - the names of the layers we would like to extract style from
+                        - a coefficient for each of them
+
+    Returns: 
+    J_style -- tensor representing a scalar value, style cost defined above by equation (2)
+    """
+
+    # initialize the overall style cost
+    J_style = 0
+
+    for layer_name, coeff in STYLE_LAYERS:
+
+        # Select the output tensor of the currently selected layer
+        out = model[layer_name]
+
+        # Set a_S to be the hidden layer activation from the layer we have selected, by running the session on out
+        a_S = sess.run(out)
+
+        # Set a_G to be the hidden layer activation from same layer. Here, a_G references model[layer_name]
+        # and isn't evaluated yet. Later in the code, we'll assign the image G as the model input, so that
+        # when we run the session, this will be the activations drawn from the appropriate layer, with G as input.
+        a_G = out
+
+        # Compute style_cost for the current layer
+        J_style_layer = compute_layer_style_cost(a_S, a_G)
+
+        # Add coeff * J_style_layer of this layer to overall style cost
+        J_style += coeff * J_style_layer
+
+    return J_style
+
+
+def total_cost(J_content, J_style, alpha=10, beta=40):
+    J = alpha*J_content+beta*J_style
+    return J
+
+
+tf.reset_default_graph()
+
+# Start interactive session
+sess = tf.InteractiveSession()
+content_image = imread("images/louvre_small.jpg")
+content_image = reshape_and_normalize_image(content_image)
+
+style_image = imread("images/monet.jpg")
+style_image = reshape_and_normalize_image(style_image)
+
+generated_image = generate_noise_image(content_image)
+imshow(generated_image[0])
+
+model = load_vgg_model("pretrained-model/imagenet-vgg-verydeep-19.mat")
+
+# Assign the content image to be the input of the VGG model.
+sess.run(model['input'].assign(content_image))
+
+# Select the output tensor of layer conv4_2
+out = model['conv4_2']
+
+# Set a_C to be the hidden layer activation from the layer we have selected
+a_C = sess.run(out)
+
+# Set a_G to be the hidden layer activation from same layer. Here, a_G references model['conv4_2']
+# and isn't evaluated yet. Later in the code, we'll assign the image G as the model input, so that
+# when we run the session, this will be the activations drawn from the appropriate layer, with G as input.
+a_G = out
+
+# Compute the content cost
+J_content = compute_content_cost(a_C, a_G)
+# Assign the input of the model to be the "style" image
+sess.run(model['input'].assign(style_image))
+
+# Compute the style cost
+J_style = compute_style_cost(model, STYLE_LAYERS)
+J = total_cost(J_content, J_style, alpha=10, beta=40)
+
+optimizer = tf.train.AdamOptimizer(2.0)
+train_step = optimizer.minimize(J)
+
+
+def model_nn(sess, input_image, num_iterations=200):
+
+    # Initialize global variables (you need to run the session on the initializer)
+    sess.run(tf.global_variables_initializer())
+
+    # Run the noisy input image (initial generated image) through the model. Use assign().
+    sess.run(model['input'].assign(input_image))
+
+    for i in range(num_iterations):
+
+        # Run the session on the train_step to minimize the total cost
+        sess.run(train_step)
+
+        # Compute the generated image by running the session on the current model['input']
+        generated_image = sess.run(model['input'])
+
+        if i % 20 == 0:
+            Jt, Jc, Js = sess.run([J, J_content, J_style])
+            print("Iteration " + str(i) + " :")
+            print("total cost = " + str(Jt))
+            print("content cost = " + str(Jc))
+            print("style cost = " + str(Js))
+
+            # save current generated image in the "/output" directory
+            save_image("output/" + str(i) + ".png", generated_image)
+
+    # save last generated image
+    save_image('output/generated_image.jpg', generated_image)
+
+    return generated_image
+
+
+model_nn(sess, generated_image)
--- a/Pretrained_Model_LICENSE.txt
+++ b/Pretrained_Model_LICENSE.txt
+This programming assignment developped by deeplearning.ai (Kian Katanforoosh, Younes Bensouda Mourri, Andrew Ng) uses a pretrained model that can be downloaded at the following link: http://www.vlfeat.org/matconvnet/pretrained/. The pretrained model's parameters are due to the MatConvNet Team. Their software comes with the license replicated below.
+
+
+---------------------------------------------------------------------
+Copyright (c) 2014-16 The MatConvNet Team.
+All rights reserved.
+
+Redistribution and use in source and binary forms are permitted
+provided that the above copyright notice and this paragraph are
+duplicated in all such forms and that any documentation, advertising
+materials, and other materials related to such distribution and use
+acknowledge that the software was developed by the MatConvNet
+Team. The name of the MatConvNet Team may not be used to endorse or
+promote products derived from this software without specific prior
+written permission.  THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE.
+---------------------------------------------------------------------
\ No newline at end of file
--- a/images/louvre_small.jpg
+++ b/images/louvre_small.jpg
--- a/images/monet.jpg
+++ b/images/monet.jpg
--- a/nst_utils.py
+++ b/nst_utils.py
+import os
+import sys
+import scipy.io
+from matplotlib.pyplot import imsave
+from PIL import Image
+import numpy as np
+import tensorflow as tf
+
+
+class CONFIG:
+    IMAGE_WIDTH = 400
+    IMAGE_HEIGHT = 300
+    COLOR_CHANNELS = 3
+    NOISE_RATIO = 0.6
+    MEANS = np.array([123.68, 116.779, 103.939]).reshape((1, 1, 1, 3))
+    # Pick the VGG 19-layer model by from the paper "Very Deep Convolutional Networks for Large-Scale Image Recognition".
+    VGG_MODEL = 'pretrained-model/imagenet-vgg-verydeep-19.mat'
+    STYLE_IMAGE = None
+    CONTENT_IMAGE = None
+    OUTPUT_DIR = 'output/'
+
+
+def load_vgg_model(path):
+    """
+    Returns a model for the purpose of 'painting' the picture.
+    Takes only the convolution layer weights and wrap using the TensorFlow
+    Conv2d, Relu and AveragePooling layer. VGG actually uses maxpool but
+    the paper indicates that using AveragePooling yields better results.
+    The last few fully connected layers are not used.
+    Here is the detailed configuration of the VGG model:
+        0 is conv1_1 (3, 3, 3, 64)
+        1 is relu
+        2 is conv1_2 (3, 3, 64, 64)
+        3 is relu    
+        4 is maxpool
+        5 is conv2_1 (3, 3, 64, 128)
+        6 is relu
+        7 is conv2_2 (3, 3, 128, 128)
+        8 is relu
+        9 is maxpool
+        10 is conv3_1 (3, 3, 128, 256)
+        11 is relu
+        12 is conv3_2 (3, 3, 256, 256)
+        13 is relu
+        14 is conv3_3 (3, 3, 256, 256)
+        15 is relu
+        16 is conv3_4 (3, 3, 256, 256)
+        17 is relu
+        18 is maxpool
+        19 is conv4_1 (3, 3, 256, 512)
+        20 is relu
+        21 is conv4_2 (3, 3, 512, 512)
+        22 is relu
+        23 is conv4_3 (3, 3, 512, 512)
+        24 is relu
+        25 is conv4_4 (3, 3, 512, 512)
+        26 is relu
+        27 is maxpool
+        28 is conv5_1 (3, 3, 512, 512)
+        29 is relu
+        30 is conv5_2 (3, 3, 512, 512)
+        31 is relu
+        32 is conv5_3 (3, 3, 512, 512)
+        33 is relu
+        34 is conv5_4 (3, 3, 512, 512)
+        35 is relu
+        36 is maxpool
+        37 is fullyconnected (7, 7, 512, 4096)
+        38 is relu
+        39 is fullyconnected (1, 1, 4096, 4096)
+        40 is relu
+        41 is fullyconnected (1, 1, 4096, 1000)
+        42 is softmax
+    """
+
+    vgg = scipy.io.loadmat(path)
+
+    vgg_layers = vgg['layers']
+
+    def _weights(layer, expected_layer_name):
+        """
+        Return the weights and bias from the VGG model for a given layer.
+        """
+        wb = vgg_layers[0][layer][0][0][2]
+        W = wb[0][0]
+        b = wb[0][1]
+        layer_name = vgg_layers[0][layer][0][0][0][0]
+        assert layer_name == expected_layer_name
+        return W, b
+
+    def _relu(conv2d_layer):
+        """
+        Return the RELU function wrapped over a TensorFlow layer. Expects a
+        Conv2d layer input.
+        """
+        return tf.nn.relu(conv2d_layer)
+
+    def _conv2d(prev_layer, layer, layer_name):
+        """
+        Return the Conv2D layer using the weights, biases from the VGG
+        model at 'layer'.
+        """
+        W, b = _weights(layer, layer_name)
+        W = tf.constant(W)
+        b = tf.constant(np.reshape(b, (b.size)))
+        return tf.nn.conv2d(prev_layer, filter=W, strides=[1, 1, 1, 1], padding='SAME') + b
+
+    def _conv2d_relu(prev_layer, layer, layer_name):
+        """
+        Return the Conv2D + RELU layer using the weights, biases from the VGG
+        model at 'layer'.
+        """
+        return _relu(_conv2d(prev_layer, layer, layer_name))
+
+    def _avgpool(prev_layer):
+        """
+        Return the AveragePooling layer.
+        """
+        return tf.nn.avg_pool(prev_layer, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
+
+    # Constructs the graph model.
+    graph = {}
+    graph['input'] = tf.Variable(np.zeros(
+        (1, CONFIG.IMAGE_HEIGHT, CONFIG.IMAGE_WIDTH, CONFIG.COLOR_CHANNELS)), dtype='float32')
+    graph['conv1_1'] = _conv2d_relu(graph['input'], 0, 'conv1_1')
+    graph['conv1_2'] = _conv2d_relu(graph['conv1_1'], 2, 'conv1_2')
+    graph['avgpool1'] = _avgpool(graph['conv1_2'])
+    graph['conv2_1'] = _conv2d_relu(graph['avgpool1'], 5, 'conv2_1')
+    graph['conv2_2'] = _conv2d_relu(graph['conv2_1'], 7, 'conv2_2')
+    graph['avgpool2'] = _avgpool(graph['conv2_2'])
+    graph['conv3_1'] = _conv2d_relu(graph['avgpool2'], 10, 'conv3_1')
+    graph['conv3_2'] = _conv2d_relu(graph['conv3_1'], 12, 'conv3_2')
+    graph['conv3_3'] = _conv2d_relu(graph['conv3_2'], 14, 'conv3_3')
+    graph['conv3_4'] = _conv2d_relu(graph['conv3_3'], 16, 'conv3_4')
+    graph['avgpool3'] = _avgpool(graph['conv3_4'])
+    graph['conv4_1'] = _conv2d_relu(graph['avgpool3'], 19, 'conv4_1')
+    graph['conv4_2'] = _conv2d_relu(graph['conv4_1'], 21, 'conv4_2')
+    graph['conv4_3'] = _conv2d_relu(graph['conv4_2'], 23, 'conv4_3')
+    graph['conv4_4'] = _conv2d_relu(graph['conv4_3'], 25, 'conv4_4')
+    graph['avgpool4'] = _avgpool(graph['conv4_4'])
+    graph['conv5_1'] = _conv2d_relu(graph['avgpool4'], 28, 'conv5_1')
+    graph['conv5_2'] = _conv2d_relu(graph['conv5_1'], 30, 'conv5_2')
+    graph['conv5_3'] = _conv2d_relu(graph['conv5_2'], 32, 'conv5_3')
+    graph['conv5_4'] = _conv2d_relu(graph['conv5_3'], 34, 'conv5_4')
+    graph['avgpool5'] = _avgpool(graph['conv5_4'])
+
+    return graph
+
+
+def generate_noise_image(content_image, noise_ratio=CONFIG.NOISE_RATIO):
+    """
+    Generates a noisy image by adding random noise to the content_image
+    """
+
+    # Generate a random noise_image
+    noise_image = np.random.uniform(-20, 20, (1, CONFIG.IMAGE_HEIGHT,
+                                              CONFIG.IMAGE_WIDTH, CONFIG.COLOR_CHANNELS)).astype('float32')
+
+    # Set the input_image to be a weighted average of the content_image and a noise_image
+    input_image = noise_image * noise_ratio + content_image * (1 - noise_ratio)
+
+    return input_image
+
+
+def reshape_and_normalize_image(image):
+    """
+    Reshape and normalize the input image (content or style)
+    """
+
+    # Reshape image to mach expected input of VGG16
+    image = np.reshape(image, ((1,) + image.shape))
+
+    # Substract the mean to match the expected input of VGG16
+    image = image - CONFIG.MEANS
+
+    return image
+
+
+def save_image(path, image):
+
+    # Un-normalize the image so that it looks good
+    image = image + CONFIG.MEANS
+
+    # Clip and Save the image
+    image = np.clip(image[0], 0, 255).astype('uint8')
+    imsave(path, image)