resize shortest edge

734b64aa · Yuxin Wu · 972e298a · 734b64aa · 734b64aa · 734b64aa
Commit 734b64aa authored Nov 06, 2016 by Yuxin Wu
4 changed files
--- a/examples/OpenAIGym/run-atari.py
+++ b/examples/OpenAIGym/run-atari.py
@@ -64,6 +64,7 @@ class Model(ModelDesc):
 def run_submission(cfg, output, nr):
    player = get_player(dumpdir=output)
    predfunc = get_predict_func(cfg)
+    logger.info("Start evaluation: ")
    for k in range(nr):
        if k != 0:
            player.restart_episode()

--- a/examples/ResNet/imagenet-resnet.py
+++ b/examples/ResNet/imagenet-resnet.py
@@ -4,6 +4,7 @@
 # Author: Yuxin Wu <ppwwyyxx@gmail.com>

 import cv2
+import sys
 import argparse
 import numpy as np
 import os
@@ -121,7 +122,6 @@ class Model(ModelDesc):
                                          200000, 0.7, True)
        wd_cost = tf.mul(wd_w, regularize_cost('.*/W', tf.nn.l2_loss), name='l2_regularize_loss')
        add_moving_summary(loss, wd_cost)
-
        self.cost = tf.add_n([loss, wd_cost], name='cost')

 def get_data(train_or_test):
@@ -172,14 +172,8 @@ def get_data(train_or_test):
            imgaug.MapImage(lambda x: (x * (1.0 / 255) - image_mean) / image_std),
        ]
    else:
-        def resize_func(im):
-            h, w = im.shape[:2]
-            scale = 256.0 / min(h, w)
-            desSize = map(int, [scale * w, scale * h])
-            im = cv2.resize(im, tuple(desSize), interpolation=cv2.INTER_CUBIC)
-            return im
        augmentors = [
-            imgaug.MapImage(resize_func),
+            imgaug.ResizeShortestEdge(256),
            imgaug.CenterCrop((224, 224)),
            imgaug.MapImage(lambda x: (x * (1.0 / 255) - image_mean) / image_std),
        ]
@@ -189,7 +183,6 @@ def get_data(train_or_test):
        ds = PrefetchDataZMQ(ds, min(12, multiprocessing.cpu_count()))
    return ds

-
 def get_config():
    # prepare dataset
    dataset_train = get_data('train')
@@ -209,7 +202,7 @@ def get_config():
                ClassificationError('wrong-top1', 'val-error-top1'),
                ClassificationError('wrong-top5', 'val-error-top5')]),
            ScheduledHyperParamSetter('learning_rate',
-                                      [(30, 1e-2), (60, 1e-3), (85, 2e-4)]),
+                              [(30, 1e-2), (60, 1e-3), (85, 1e-4), (95, 1e-5)]),
            HumanHyperParamSetter('learning_rate'),
        ]),
        session_config=sess_config,
@@ -218,17 +211,39 @@ def get_config():
        max_epoch=110,
    )

+def eval_on_ILSVRC12(model_file, data_dir):
+    ds = get_data('val')
+    pred_config = PredictConfig(
+        model=Model(),
+        input_var_names=['input', 'label'],
+        session_init=get_model_loader(model_file),
+        output_var_names=['wrong-top1', 'wrong-top5']
+    )
+    pred = SimpleDatasetPredictor(pred_config, ds)
+    acc1, acc5 = RatioCounter(), RatioCounter()
+    for o in pred.get_result():
+        batch_size = o[0].shape[0]
+        acc1.feed(o[0].sum(), batch_size)
+        acc5.feed(o[1].sum(), batch_size)
+        print("Top1 Error: {}".format(acc1.ratio))
+        print("Top5 Error: {}".format(acc5.ratio))
+    print("Top1 Error: {}".format(acc1.ratio))
+    print("Top5 Error: {}".format(acc5.ratio))
+
 if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--gpu', help='comma separated list of GPU(s) to use.') # nargs='*' in multi mode
    parser.add_argument('--data', help='ILSVRC dataset dir')
    parser.add_argument('--load', help='load model')
+    parser.add_argument('--eval', action='store_true')
    args = parser.parse_args()
-
-    logger.auto_set_dir()
-
    if args.gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
+    if args.eval:
+        eval_on_ILSVRC12(args.load, args.data)
+        sys.exit()
+
+    logger.auto_set_dir()

    config = get_config()
    if args.load:

--- a/examples/ResNet/load-resnet.py
+++ b/examples/ResNet/load-resnet.py
@@ -100,14 +100,8 @@ def get_inference_augmentor():
    pp_mean = meta.get_per_pixel_mean()
    pp_mean_224 = pp_mean[16:-16,16:-16,:]

-    def resize_func(im):
-        h, w = im.shape[:2]
-        scale = 256.0 / min(h, w)
-        desSize = map(int, [scale * w, scale * h])
-        im = cv2.resize(im, tuple(desSize), interpolation=cv2.INTER_CUBIC)
-        return im
    transformers = imgaug.AugmentorList([
-        imgaug.MapImage(resize_func),
+        imgaug.ResizeShortestEdge(256),
        imgaug.CenterCrop((224, 224)),
        imgaug.MapImage(lambda x: x - pp_mean_224),
    ])
@@ -159,10 +153,8 @@ def name_conversion(caffe_layer_name):
            'bn_conv1/gamma': 'conv0/bn/gamma',
            'bn_conv1/mean/EMA': 'conv0/bn/mean/EMA',
            'bn_conv1/variance/EMA': 'conv0/bn/variance/EMA',
-            'conv1/W': 'conv0/W',
-            'conv1/b': 'conv0/b',
-            'fc1000/W': 'fc1000/W',
-            'fc1000/b': 'fc1000/b'}
+            'conv1/W': 'conv0/W', 'conv1/b': 'conv0/b',
+            'fc1000/W': 'fc1000/W', 'fc1000/b': 'fc1000/b'}
    if caffe_layer_name in NAME_MAP:
        return NAME_MAP[caffe_layer_name]

@@ -196,9 +188,8 @@ def name_conversion(caffe_layer_name):
 if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--gpu', help='comma separated list of GPU(s) to use.') # nargs='*' in multi mode
-    parser.add_argument('--load',
-                        help='.npy model file generated by tensorpack.utils.loadcaffe',
-                        required=True)
+    parser.add_argument('--load', required=True,
+                        help='.npy model file generated by tensorpack.utils.loadcaffe')
    parser.add_argument('--depth', help='resnet depth', required=True, type=int, choices=[50, 101, 152])
    parser.add_argument('--input', help='an input image')
    parser.add_argument('--eval', help='ILSVRC dir to run validation on')

--- a/tensorpack/dataflow/imgaug/noname.py
+++ b/tensorpack/dataflow/imgaug/noname.py
@@ -7,7 +7,7 @@ from ...utils import logger
 import numpy as np
 import cv2

-__all__ = ['Flip', 'Resize', 'RandomResize']
+__all__ = ['Flip', 'Resize', 'RandomResize', 'ResizeShortestEdge']

 class Flip(ImageAugmentor):
    """
@@ -57,6 +57,21 @@ class Resize(ImageAugmentor):
            img, self.shape[::-1],
            interpolation=self.interp)

+class ResizeShortestEdge(ImageAugmentor):
+    """ Resize the shortest edge to a certain number while
+        keeping the aspect ratio
+    """
+    def __init__(self, size):
+        size = size * 1.0
+        self._init(locals())
+
+    def _augment(self, img, _):
+        h, w = img.shape[:2]
+        scale = self.size / min(h, w)
+        desSize = map(int, [scale * w, scale * h])
+        img = cv2.resize(img, tuple(desSize), interpolation=cv2.INTER_CUBIC)
+        return img
+
 class RandomResize(ImageAugmentor):
    """ randomly rescale w and h of the image"""
    def __init__(self, xrange, yrange, minimum=(0,0), aspect_ratio_thres=0.15,