Commit 734b64aa authored by Yuxin Wu's avatar Yuxin Wu

resize shortest edge

parent 972e298a
......@@ -64,6 +64,7 @@ class Model(ModelDesc):
def run_submission(cfg, output, nr):
player = get_player(dumpdir=output)
predfunc = get_predict_func(cfg)
logger.info("Start evaluation: ")
for k in range(nr):
if k != 0:
player.restart_episode()
......
......@@ -4,6 +4,7 @@
# Author: Yuxin Wu <ppwwyyxx@gmail.com>
import cv2
import sys
import argparse
import numpy as np
import os
......@@ -121,7 +122,6 @@ class Model(ModelDesc):
200000, 0.7, True)
wd_cost = tf.mul(wd_w, regularize_cost('.*/W', tf.nn.l2_loss), name='l2_regularize_loss')
add_moving_summary(loss, wd_cost)
self.cost = tf.add_n([loss, wd_cost], name='cost')
def get_data(train_or_test):
......@@ -172,14 +172,8 @@ def get_data(train_or_test):
imgaug.MapImage(lambda x: (x * (1.0 / 255) - image_mean) / image_std),
]
else:
def resize_func(im):
h, w = im.shape[:2]
scale = 256.0 / min(h, w)
desSize = map(int, [scale * w, scale * h])
im = cv2.resize(im, tuple(desSize), interpolation=cv2.INTER_CUBIC)
return im
augmentors = [
imgaug.MapImage(resize_func),
imgaug.ResizeShortestEdge(256),
imgaug.CenterCrop((224, 224)),
imgaug.MapImage(lambda x: (x * (1.0 / 255) - image_mean) / image_std),
]
......@@ -189,7 +183,6 @@ def get_data(train_or_test):
ds = PrefetchDataZMQ(ds, min(12, multiprocessing.cpu_count()))
return ds
def get_config():
# prepare dataset
dataset_train = get_data('train')
......@@ -209,7 +202,7 @@ def get_config():
ClassificationError('wrong-top1', 'val-error-top1'),
ClassificationError('wrong-top5', 'val-error-top5')]),
ScheduledHyperParamSetter('learning_rate',
[(30, 1e-2), (60, 1e-3), (85, 2e-4)]),
[(30, 1e-2), (60, 1e-3), (85, 1e-4), (95, 1e-5)]),
HumanHyperParamSetter('learning_rate'),
]),
session_config=sess_config,
......@@ -218,17 +211,39 @@ def get_config():
max_epoch=110,
)
def eval_on_ILSVRC12(model_file, data_dir):
ds = get_data('val')
pred_config = PredictConfig(
model=Model(),
input_var_names=['input', 'label'],
session_init=get_model_loader(model_file),
output_var_names=['wrong-top1', 'wrong-top5']
)
pred = SimpleDatasetPredictor(pred_config, ds)
acc1, acc5 = RatioCounter(), RatioCounter()
for o in pred.get_result():
batch_size = o[0].shape[0]
acc1.feed(o[0].sum(), batch_size)
acc5.feed(o[1].sum(), batch_size)
print("Top1 Error: {}".format(acc1.ratio))
print("Top5 Error: {}".format(acc5.ratio))
print("Top1 Error: {}".format(acc1.ratio))
print("Top5 Error: {}".format(acc5.ratio))
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--gpu', help='comma separated list of GPU(s) to use.') # nargs='*' in multi mode
parser.add_argument('--data', help='ILSVRC dataset dir')
parser.add_argument('--load', help='load model')
parser.add_argument('--eval', action='store_true')
args = parser.parse_args()
logger.auto_set_dir()
if args.gpu:
os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
if args.eval:
eval_on_ILSVRC12(args.load, args.data)
sys.exit()
logger.auto_set_dir()
config = get_config()
if args.load:
......
......@@ -100,14 +100,8 @@ def get_inference_augmentor():
pp_mean = meta.get_per_pixel_mean()
pp_mean_224 = pp_mean[16:-16,16:-16,:]
def resize_func(im):
h, w = im.shape[:2]
scale = 256.0 / min(h, w)
desSize = map(int, [scale * w, scale * h])
im = cv2.resize(im, tuple(desSize), interpolation=cv2.INTER_CUBIC)
return im
transformers = imgaug.AugmentorList([
imgaug.MapImage(resize_func),
imgaug.ResizeShortestEdge(256),
imgaug.CenterCrop((224, 224)),
imgaug.MapImage(lambda x: x - pp_mean_224),
])
......@@ -159,10 +153,8 @@ def name_conversion(caffe_layer_name):
'bn_conv1/gamma': 'conv0/bn/gamma',
'bn_conv1/mean/EMA': 'conv0/bn/mean/EMA',
'bn_conv1/variance/EMA': 'conv0/bn/variance/EMA',
'conv1/W': 'conv0/W',
'conv1/b': 'conv0/b',
'fc1000/W': 'fc1000/W',
'fc1000/b': 'fc1000/b'}
'conv1/W': 'conv0/W', 'conv1/b': 'conv0/b',
'fc1000/W': 'fc1000/W', 'fc1000/b': 'fc1000/b'}
if caffe_layer_name in NAME_MAP:
return NAME_MAP[caffe_layer_name]
......@@ -196,9 +188,8 @@ def name_conversion(caffe_layer_name):
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--gpu', help='comma separated list of GPU(s) to use.') # nargs='*' in multi mode
parser.add_argument('--load',
help='.npy model file generated by tensorpack.utils.loadcaffe',
required=True)
parser.add_argument('--load', required=True,
help='.npy model file generated by tensorpack.utils.loadcaffe')
parser.add_argument('--depth', help='resnet depth', required=True, type=int, choices=[50, 101, 152])
parser.add_argument('--input', help='an input image')
parser.add_argument('--eval', help='ILSVRC dir to run validation on')
......
......@@ -7,7 +7,7 @@ from ...utils import logger
import numpy as np
import cv2
__all__ = ['Flip', 'Resize', 'RandomResize']
__all__ = ['Flip', 'Resize', 'RandomResize', 'ResizeShortestEdge']
class Flip(ImageAugmentor):
"""
......@@ -57,6 +57,21 @@ class Resize(ImageAugmentor):
img, self.shape[::-1],
interpolation=self.interp)
class ResizeShortestEdge(ImageAugmentor):
""" Resize the shortest edge to a certain number while
keeping the aspect ratio
"""
def __init__(self, size):
size = size * 1.0
self._init(locals())
def _augment(self, img, _):
h, w = img.shape[:2]
scale = self.size / min(h, w)
desSize = map(int, [scale * w, scale * h])
img = cv2.resize(img, tuple(desSize), interpolation=cv2.INTER_CUBIC)
return img
class RandomResize(ImageAugmentor):
""" randomly rescale w and h of the image"""
def __init__(self, xrange, yrange, minimum=(0,0), aspect_ratio_thres=0.15,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment