add VGG19 (#504)

* add VGG19 demo * add pretrained model url (upload will be later) * fix vgg16 and fix linting in vgg19

add VGG19 (#504)
* add VGG19 demo * add pretrained model url (upload will be later) * fix vgg16 and fix linting in vgg19
d1a1895f · Patrick Wieschollek · Yuxin Wu · ef9fb4b8 · d1a1895f · d1a1895f
Commit d1a1895f authored Nov 22, 2017 by Patrick Wieschollek Committed by Yuxin Wu Nov 22, 2017
Hide whitespace changes
Inline Side-by-side

Showing with 110 additions and 1 deletion

examples/load-vgg16.py examples/load-vgg16.py +7 -1

examples/load-vgg19.py examples/load-vgg19.py +103 -0

No files found.
--- a/examples/load-vgg16.py
+++ b/examples/load-vgg16.py
@@ -78,7 +78,13 @@ def run_test(path, input):
    assert im is not None, input
    im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
    im = cv2.resize(im, (224, 224)).reshape((1, 224, 224, 3)).astype('float32')
-    im = im - 110
+
+    # VGG16 requires channelwise mean substraction
+    VGG_MEAN = [103.939, 116.779, 123.68]
+    im[:, :, 0] -= VGG_MEAN[2]
+    im[:, :, 1] -= VGG_MEAN[1]
+    im[:, :, 2] -= VGG_MEAN[0]
+
    outputs = predict_func(im)[0]
    prob = outputs[0]
    ret = prob.argsort()[-10:][::-1]

--- a/examples/load-vgg19.py
+++ b/examples/load-vgg19.py
+#!/usr/bin/env python
+# -*- coding: UTF-8 -*-
+# File: load-vgg19.py
+
+from __future__ import print_function
+import cv2
+import tensorflow as tf
+import numpy as np
+import os
+import argparse
+
+from tensorpack import *
+from tensorpack.tfutils.symbolic_functions import *
+from tensorpack.tfutils.summary import *
+from tensorpack.dataflow.dataset import ILSVRCMeta
+
+"""
+Usage:
+    python -m tensorpack.utils.loadcaffe \
+            PATH/TO/VGG/{VGG_ILSVRC_19_layers_deploy.prototxt,VGG_ILSVRC_16_layers.caffemodel} vgg19.npy
+    ./load-vgg19.py --load vgg19.npy --input cat.png
+
+    Or download a converted caffe model from http://models.tensorpack.com/caffe/
+    ./load-vgg19.py --load vgg19.npy --input cat.png
+"""
+
+
+def tower_func(image):
+    with argscope(Conv2D, kernel_shape=3, nl=tf.nn.relu):
+        logits = (LinearWrap(image)
+                  .Conv2D('conv1_1', 64)
+                  .Conv2D('conv1_2', 64)
+                  .MaxPooling('pool1', 2)
+                  # 112
+                  .Conv2D('conv2_1', 128)
+                  .Conv2D('conv2_2', 128)
+                  .MaxPooling('pool2', 2)
+                  # 56
+                  .Conv2D('conv3_1', 256)
+                  .Conv2D('conv3_2', 256)
+                  .Conv2D('conv3_3', 256)
+                  .Conv2D('conv3_4', 256)
+                  .MaxPooling('pool3', 2)
+                  # 28
+                  .Conv2D('conv4_1', 512)
+                  .Conv2D('conv4_2', 512)
+                  .Conv2D('conv4_3', 512)
+                  .Conv2D('conv4_4', 512)
+                  .MaxPooling('pool4', 2)
+                  # 14
+                  .Conv2D('conv5_1', 512)
+                  .Conv2D('conv5_2', 512)
+                  .Conv2D('conv5_3', 512)
+                  .Conv2D('conv5_4', 512)
+                  .MaxPooling('pool5', 2)
+                  # 7
+                  .FullyConnected('fc6', 4096, nl=tf.nn.relu)
+                  .Dropout('drop0', 0.5)
+                  .FullyConnected('fc7', 4096, nl=tf.nn.relu)
+                  .Dropout('drop1', 0.5)
+                  .FullyConnected('fc8', out_dim=1000, nl=tf.identity)())
+    tf.nn.softmax(logits, name='prob')
+
+
+def run_test(path, input):
+    param_dict = np.load(path, encoding='latin1').item()
+    predict_func = OfflinePredictor(PredictConfig(
+        inputs_desc=[InputDesc(tf.float32, (None, 224, 224, 3), 'input')],
+        tower_func=tower_func,
+        session_init=DictRestore(param_dict),
+        input_names=['input'],
+        output_names=['prob']   # prob:0 is the probability distribution
+    ))
+
+    im = cv2.imread(input)
+    assert im is not None, input
+    im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
+    im = cv2.resize(im, (224, 224)).reshape((1, 224, 224, 3)).astype('float32')
+
+    # VGG19 requires channelwise mean substraction
+    VGG_MEAN = [103.939, 116.779, 123.68]
+    im[:, :, 0] -= VGG_MEAN[2]
+    im[:, :, 1] -= VGG_MEAN[1]
+    im[:, :, 2] -= VGG_MEAN[0]
+    outputs = predict_func(im)[0]
+    prob = outputs[0]
+    ret = prob.argsort()[-10:][::-1]
+    print("Top10 predictions:", ret)
+
+    meta = ILSVRCMeta().get_synset_words_1000()
+    print("Top10 class names:", [meta[k] for k in ret])
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--gpu', help='comma separated list of GPU(s) to use.')
+    parser.add_argument('--load', required=True,
+                        help='.npy model file generated by tensorpack.utils.loadcaffe')
+    parser.add_argument('--input', help='an input image', required=True)
+    args = parser.parse_args()
+    if args.gpu:
+        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
+    run_test(args.load, args.input)