Commit 4f969d01 authored by PRASHANT SAROJ's avatar PRASHANT SAROJ

Added project files

parents
import os
import sys
import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow, imread
from PIL import Image
from nst_utils import load_vgg_model, reshape_and_normalize_image, save_image, generate_noise_image
import tensorflow as tf
# load model
model = load_vgg_model("pretrained-model/imagenet-vgg-verydeep-19.mat")
# input image
def compute_content_cost(a_C, a_G):
"""
Computes the content cost
Arguments:
a_C -- tensor of dimension (1, n_H, n_W, n_C), hidden layer activations representing content of the image C
a_G -- tensor of dimension (1, n_H, n_W, n_C), hidden layer activations representing content of the image G
Returns:
J_content
"""
# Retrieve dimensions from a_G (≈1 line)
_, n_H, n_W, n_C = a_G.get_shape().as_list()
# Reshape a_C and a_G (≈2 lines)
a_C_unrolled = tf.transpose(tf.reshape(a_C, [n_H*n_W, n_C]))
a_G_unrolled = tf.transpose(tf.reshape(a_G, [n_H*n_W, n_C]))
# compute the cost with tensorflow (≈1 line)
J_content = tf.reduce_sum(tf.square(tf.subtract(
a_C_unrolled, a_G_unrolled)))/(4*n_H*n_W*n_C)
return J_content
def gram_matrix(A):
"""
Argument:
A -- matrix of shape (n_C, n_H*n_W)
Returns:
GA -- Gram matrix of A, of shape (n_C, n_C)
"""
GA = tf.matmul(A, tf.transpose(A))
return GA
def compute_layer_style_cost(a_S, a_G):
"""
Arguments:
a_S -- tensor of dimension (1, n_H, n_W, n_C), hidden layer activations representing style of the image S
a_G -- tensor of dimension (1, n_H, n_W, n_C), hidden layer activations representing style of the image G
Returns:
J_style_layer -- tensor representing a scalar value, style cost defined above by equation (2)
"""
_, n_H, n_W, n_C = a_G.get_shape().as_list()
# Reshape the images to have them of shape (n_C, n_H*n_W) (≈2 lines)
a_S = tf.transpose(tf.reshape(a_S, [n_H*n_W, n_C]))
a_G = tf.transpose(tf.reshape(a_G, [n_H*n_W, n_C]))
# Computing gram_matrices for both images S and G (≈2 lines)
GS = gram_matrix(a_S)
GG = gram_matrix(a_G)
# Computing the loss (≈1 line)
J_style_layer = tf.reduce_sum(
tf.square(tf.subtract(GS, GG)))/(2*n_H*n_C*n_W)**2
return J_style_layer
STYLE_LAYERS = [
('conv1_1', 0.2),
('conv2_1', 0.2),
('conv3_1', 0.2),
('conv4_1', 0.2),
('conv5_1', 0.2)]
def compute_style_cost(model, STYLE_LAYERS):
"""
Computes the overall style cost from several chosen layers
Arguments:
model -- our tensorflow model
STYLE_LAYERS -- A python list containing:
- the names of the layers we would like to extract style from
- a coefficient for each of them
Returns:
J_style -- tensor representing a scalar value, style cost defined above by equation (2)
"""
# initialize the overall style cost
J_style = 0
for layer_name, coeff in STYLE_LAYERS:
# Select the output tensor of the currently selected layer
out = model[layer_name]
# Set a_S to be the hidden layer activation from the layer we have selected, by running the session on out
a_S = sess.run(out)
# Set a_G to be the hidden layer activation from same layer. Here, a_G references model[layer_name]
# and isn't evaluated yet. Later in the code, we'll assign the image G as the model input, so that
# when we run the session, this will be the activations drawn from the appropriate layer, with G as input.
a_G = out
# Compute style_cost for the current layer
J_style_layer = compute_layer_style_cost(a_S, a_G)
# Add coeff * J_style_layer of this layer to overall style cost
J_style += coeff * J_style_layer
return J_style
def total_cost(J_content, J_style, alpha=10, beta=40):
J = alpha*J_content+beta*J_style
return J
tf.reset_default_graph()
# Start interactive session
sess = tf.InteractiveSession()
content_image = imread("images/louvre_small.jpg")
content_image = reshape_and_normalize_image(content_image)
style_image = imread("images/monet.jpg")
style_image = reshape_and_normalize_image(style_image)
generated_image = generate_noise_image(content_image)
imshow(generated_image[0])
model = load_vgg_model("pretrained-model/imagenet-vgg-verydeep-19.mat")
# Assign the content image to be the input of the VGG model.
sess.run(model['input'].assign(content_image))
# Select the output tensor of layer conv4_2
out = model['conv4_2']
# Set a_C to be the hidden layer activation from the layer we have selected
a_C = sess.run(out)
# Set a_G to be the hidden layer activation from same layer. Here, a_G references model['conv4_2']
# and isn't evaluated yet. Later in the code, we'll assign the image G as the model input, so that
# when we run the session, this will be the activations drawn from the appropriate layer, with G as input.
a_G = out
# Compute the content cost
J_content = compute_content_cost(a_C, a_G)
# Assign the input of the model to be the "style" image
sess.run(model['input'].assign(style_image))
# Compute the style cost
J_style = compute_style_cost(model, STYLE_LAYERS)
J = total_cost(J_content, J_style, alpha=10, beta=40)
optimizer = tf.train.AdamOptimizer(2.0)
train_step = optimizer.minimize(J)
def model_nn(sess, input_image, num_iterations=200):
# Initialize global variables (you need to run the session on the initializer)
sess.run(tf.global_variables_initializer())
# Run the noisy input image (initial generated image) through the model. Use assign().
sess.run(model['input'].assign(input_image))
for i in range(num_iterations):
# Run the session on the train_step to minimize the total cost
sess.run(train_step)
# Compute the generated image by running the session on the current model['input']
generated_image = sess.run(model['input'])
if i % 20 == 0:
Jt, Jc, Js = sess.run([J, J_content, J_style])
print("Iteration " + str(i) + " :")
print("total cost = " + str(Jt))
print("content cost = " + str(Jc))
print("style cost = " + str(Js))
# save current generated image in the "/output" directory
save_image("output/" + str(i) + ".png", generated_image)
# save last generated image
save_image('output/generated_image.jpg', generated_image)
return generated_image
model_nn(sess, generated_image)
This programming assignment developped by deeplearning.ai (Kian Katanforoosh, Younes Bensouda Mourri, Andrew Ng) uses a pretrained model that can be downloaded at the following link: http://www.vlfeat.org/matconvnet/pretrained/. The pretrained model's parameters are due to the MatConvNet Team. Their software comes with the license replicated below.
---------------------------------------------------------------------
Copyright (c) 2014-16 The MatConvNet Team.
All rights reserved.
Redistribution and use in source and binary forms are permitted
provided that the above copyright notice and this paragraph are
duplicated in all such forms and that any documentation, advertising
materials, and other materials related to such distribution and use
acknowledge that the software was developed by the MatConvNet
Team. The name of the MatConvNet Team may not be used to endorse or
promote products derived from this software without specific prior
written permission. THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE.
---------------------------------------------------------------------
\ No newline at end of file
import os
import sys
import scipy.io
from matplotlib.pyplot import imsave
from PIL import Image
import numpy as np
import tensorflow as tf
class CONFIG:
IMAGE_WIDTH = 400
IMAGE_HEIGHT = 300
COLOR_CHANNELS = 3
NOISE_RATIO = 0.6
MEANS = np.array([123.68, 116.779, 103.939]).reshape((1, 1, 1, 3))
# Pick the VGG 19-layer model by from the paper "Very Deep Convolutional Networks for Large-Scale Image Recognition".
VGG_MODEL = 'pretrained-model/imagenet-vgg-verydeep-19.mat'
STYLE_IMAGE = None
CONTENT_IMAGE = None
OUTPUT_DIR = 'output/'
def load_vgg_model(path):
"""
Returns a model for the purpose of 'painting' the picture.
Takes only the convolution layer weights and wrap using the TensorFlow
Conv2d, Relu and AveragePooling layer. VGG actually uses maxpool but
the paper indicates that using AveragePooling yields better results.
The last few fully connected layers are not used.
Here is the detailed configuration of the VGG model:
0 is conv1_1 (3, 3, 3, 64)
1 is relu
2 is conv1_2 (3, 3, 64, 64)
3 is relu
4 is maxpool
5 is conv2_1 (3, 3, 64, 128)
6 is relu
7 is conv2_2 (3, 3, 128, 128)
8 is relu
9 is maxpool
10 is conv3_1 (3, 3, 128, 256)
11 is relu
12 is conv3_2 (3, 3, 256, 256)
13 is relu
14 is conv3_3 (3, 3, 256, 256)
15 is relu
16 is conv3_4 (3, 3, 256, 256)
17 is relu
18 is maxpool
19 is conv4_1 (3, 3, 256, 512)
20 is relu
21 is conv4_2 (3, 3, 512, 512)
22 is relu
23 is conv4_3 (3, 3, 512, 512)
24 is relu
25 is conv4_4 (3, 3, 512, 512)
26 is relu
27 is maxpool
28 is conv5_1 (3, 3, 512, 512)
29 is relu
30 is conv5_2 (3, 3, 512, 512)
31 is relu
32 is conv5_3 (3, 3, 512, 512)
33 is relu
34 is conv5_4 (3, 3, 512, 512)
35 is relu
36 is maxpool
37 is fullyconnected (7, 7, 512, 4096)
38 is relu
39 is fullyconnected (1, 1, 4096, 4096)
40 is relu
41 is fullyconnected (1, 1, 4096, 1000)
42 is softmax
"""
vgg = scipy.io.loadmat(path)
vgg_layers = vgg['layers']
def _weights(layer, expected_layer_name):
"""
Return the weights and bias from the VGG model for a given layer.
"""
wb = vgg_layers[0][layer][0][0][2]
W = wb[0][0]
b = wb[0][1]
layer_name = vgg_layers[0][layer][0][0][0][0]
assert layer_name == expected_layer_name
return W, b
def _relu(conv2d_layer):
"""
Return the RELU function wrapped over a TensorFlow layer. Expects a
Conv2d layer input.
"""
return tf.nn.relu(conv2d_layer)
def _conv2d(prev_layer, layer, layer_name):
"""
Return the Conv2D layer using the weights, biases from the VGG
model at 'layer'.
"""
W, b = _weights(layer, layer_name)
W = tf.constant(W)
b = tf.constant(np.reshape(b, (b.size)))
return tf.nn.conv2d(prev_layer, filter=W, strides=[1, 1, 1, 1], padding='SAME') + b
def _conv2d_relu(prev_layer, layer, layer_name):
"""
Return the Conv2D + RELU layer using the weights, biases from the VGG
model at 'layer'.
"""
return _relu(_conv2d(prev_layer, layer, layer_name))
def _avgpool(prev_layer):
"""
Return the AveragePooling layer.
"""
return tf.nn.avg_pool(prev_layer, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
# Constructs the graph model.
graph = {}
graph['input'] = tf.Variable(np.zeros(
(1, CONFIG.IMAGE_HEIGHT, CONFIG.IMAGE_WIDTH, CONFIG.COLOR_CHANNELS)), dtype='float32')
graph['conv1_1'] = _conv2d_relu(graph['input'], 0, 'conv1_1')
graph['conv1_2'] = _conv2d_relu(graph['conv1_1'], 2, 'conv1_2')
graph['avgpool1'] = _avgpool(graph['conv1_2'])
graph['conv2_1'] = _conv2d_relu(graph['avgpool1'], 5, 'conv2_1')
graph['conv2_2'] = _conv2d_relu(graph['conv2_1'], 7, 'conv2_2')
graph['avgpool2'] = _avgpool(graph['conv2_2'])
graph['conv3_1'] = _conv2d_relu(graph['avgpool2'], 10, 'conv3_1')
graph['conv3_2'] = _conv2d_relu(graph['conv3_1'], 12, 'conv3_2')
graph['conv3_3'] = _conv2d_relu(graph['conv3_2'], 14, 'conv3_3')
graph['conv3_4'] = _conv2d_relu(graph['conv3_3'], 16, 'conv3_4')
graph['avgpool3'] = _avgpool(graph['conv3_4'])
graph['conv4_1'] = _conv2d_relu(graph['avgpool3'], 19, 'conv4_1')
graph['conv4_2'] = _conv2d_relu(graph['conv4_1'], 21, 'conv4_2')
graph['conv4_3'] = _conv2d_relu(graph['conv4_2'], 23, 'conv4_3')
graph['conv4_4'] = _conv2d_relu(graph['conv4_3'], 25, 'conv4_4')
graph['avgpool4'] = _avgpool(graph['conv4_4'])
graph['conv5_1'] = _conv2d_relu(graph['avgpool4'], 28, 'conv5_1')
graph['conv5_2'] = _conv2d_relu(graph['conv5_1'], 30, 'conv5_2')
graph['conv5_3'] = _conv2d_relu(graph['conv5_2'], 32, 'conv5_3')
graph['conv5_4'] = _conv2d_relu(graph['conv5_3'], 34, 'conv5_4')
graph['avgpool5'] = _avgpool(graph['conv5_4'])
return graph
def generate_noise_image(content_image, noise_ratio=CONFIG.NOISE_RATIO):
"""
Generates a noisy image by adding random noise to the content_image
"""
# Generate a random noise_image
noise_image = np.random.uniform(-20, 20, (1, CONFIG.IMAGE_HEIGHT,
CONFIG.IMAGE_WIDTH, CONFIG.COLOR_CHANNELS)).astype('float32')
# Set the input_image to be a weighted average of the content_image and a noise_image
input_image = noise_image * noise_ratio + content_image * (1 - noise_ratio)
return input_image
def reshape_and_normalize_image(image):
"""
Reshape and normalize the input image (content or style)
"""
# Reshape image to mach expected input of VGG16
image = np.reshape(image, ((1,) + image.shape))
# Substract the mean to match the expected input of VGG16
image = image - CONFIG.MEANS
return image
def save_image(path, image):
# Un-normalize the image so that it looks good
image = image + CONFIG.MEANS
# Clip and Save the image
image = np.clip(image[0], 0, 255).astype('uint8')
imsave(path, image)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment