Commit 16f8f752 authored by Yuxin Wu's avatar Yuxin Wu

imagenet_resnet_utils

parent 2afe02ba
......@@ -16,6 +16,10 @@ from tensorpack.utils.stats import RatioCounter
from tensorpack.tfutils.symbolic_functions import *
from tensorpack.tfutils.summary import *
from imagenet_resnet_utils import (
fbresnet_augmentor, resnet_basicblock, resnet_bottleneck, resnet_backbone,
eval_on_ILSVRC12, image_preprocess, compute_loss_and_error)
TOTAL_BATCH_SIZE = 256
INPUT_SHAPE = 224
DEPTH = None
......@@ -36,91 +40,25 @@ class Model(ModelDesc):
def _build_graph(self, inputs):
image, label = inputs
image = tf.cast(image, tf.float32) * (1.0 / 255)
# It should actually use bgr=True here, but for compatibility with
# pretrained models, we keep the wrong version.
image = image_preprocess(image, bgr=False)
# Wrong mean/std are used for compatibility with pre-trained models.
# Should actually add a RGB-BGR conversion here.
image_mean = tf.constant([0.485, 0.456, 0.406], dtype=tf.float32)
image_std = tf.constant([0.229, 0.224, 0.225], dtype=tf.float32)
image = (image - image_mean) / image_std
if self.data_format == 'NCHW':
image = tf.transpose(image, [0, 3, 1, 2])
def shortcut(l, n_in, n_out, stride):
if n_in != n_out:
return Conv2D('convshortcut', l, n_out, 1, stride=stride)
else:
return l
def basicblock(l, ch_out, stride, preact):
ch_in = l.get_shape().as_list()[1]
if preact == 'both_preact':
l = BNReLU('preact', l)
input = l
elif preact != 'no_preact':
input = l
l = BNReLU('preact', l)
else:
input = l
l = Conv2D('conv1', l, ch_out, 3, stride=stride, nl=BNReLU)
l = Conv2D('conv2', l, ch_out, 3)
return l + shortcut(input, ch_in, ch_out, stride)
def bottleneck(l, ch_out, stride, preact):
ch_in = l.get_shape().as_list()[1]
if preact == 'both_preact':
l = BNReLU('preact', l)
input = l
elif preact != 'no_preact':
input = l
l = BNReLU('preact', l)
else:
input = l
l = Conv2D('conv1', l, ch_out, 1, nl=BNReLU)
l = Conv2D('conv2', l, ch_out, 3, stride=stride, nl=BNReLU)
l = Conv2D('conv3', l, ch_out * 4, 1)
return l + shortcut(input, ch_in, ch_out * 4, stride)
def layer(l, layername, block_func, features, count, stride, first=False):
with tf.variable_scope(layername):
with tf.variable_scope('block0'):
l = block_func(l, features, stride,
'no_preact' if first else 'both_preact')
for i in range(1, count):
with tf.variable_scope('block{}'.format(i)):
l = block_func(l, features, 1, 'default')
return l
cfg = {
18: ([2, 2, 2, 2], basicblock),
34: ([3, 4, 6, 3], basicblock),
50: ([3, 4, 6, 3], bottleneck),
101: ([3, 4, 23, 3], bottleneck)
18: ([2, 2, 2, 2], resnet_basicblock),
34: ([3, 4, 6, 3], resnet_basicblock),
50: ([3, 4, 6, 3], resnet_bottleneck),
101: ([3, 4, 23, 3], resnet_bottleneck)
}
defs, block_func = cfg[DEPTH]
with argscope(Conv2D, nl=tf.identity, use_bias=False,
W_init=variance_scaling_initializer(mode='FAN_OUT')), \
argscope([Conv2D, MaxPooling, GlobalAvgPooling, BatchNorm], data_format=self.data_format):
logits = (LinearWrap(image)
.Conv2D('conv0', 64, 7, stride=2, nl=BNReLU)
.MaxPooling('pool0', shape=3, stride=2, padding='SAME')
.apply(layer, 'group0', block_func, 64, defs[0], 1, first=True)
.apply(layer, 'group1', block_func, 128, defs[1], 2)
.apply(layer, 'group2', block_func, 256, defs[2], 2)
.apply(layer, 'group3', block_func, 512, defs[3], 2)
.BNReLU('bnlast')
.GlobalAvgPooling('gap')
.FullyConnected('linear', 1000, nl=tf.identity)())
loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label)
loss = tf.reduce_mean(loss, name='xentropy-loss')
with argscope([Conv2D, MaxPooling, GlobalAvgPooling, BatchNorm], data_format=self.data_format):
logits = resnet_backbone(image, defs, block_func)
wrong = prediction_incorrect(logits, label, 1, name='wrong-top1')
add_moving_summary(tf.reduce_mean(wrong, name='train-error-top1'))
wrong = prediction_incorrect(logits, label, 5, name='wrong-top5')
add_moving_summary(tf.reduce_mean(wrong, name='train-error-top5'))
loss = compute_loss_and_error(logits, label)
wd_cost = regularize_cost('.*/W', l2_regularizer(1e-4), name='l2_regularize_loss')
add_moving_summary(loss, wd_cost)
......@@ -137,56 +75,9 @@ def get_data(train_or_test):
datadir = args.data
ds = dataset.ILSVRC12(datadir, train_or_test,
shuffle=True if isTrain else False, dir_structure='original')
if isTrain:
class Resize(imgaug.ImageAugmentor):
"""
crop 8%~100% of the original image
See `Going Deeper with Convolutions` by Google.
"""
def _augment(self, img, _):
h, w = img.shape[:2]
area = h * w
for _ in range(10):
targetArea = self.rng.uniform(0.08, 1.0) * area
aspectR = self.rng.uniform(0.75, 1.333)
ww = int(np.sqrt(targetArea * aspectR))
hh = int(np.sqrt(targetArea / aspectR))
if self.rng.uniform() < 0.5:
ww, hh = hh, ww
if hh <= h and ww <= w:
x1 = 0 if w == ww else self.rng.randint(0, w - ww)
y1 = 0 if h == hh else self.rng.randint(0, h - hh)
out = img[y1:y1 + hh, x1:x1 + ww]
out = cv2.resize(out, (224, 224), interpolation=cv2.INTER_CUBIC)
return out
out = cv2.resize(img, (224, 224), interpolation=cv2.INTER_CUBIC)
return out
augmentors = fbresnet_augmentor(isTrain)
augmentors.append(imgaug.ToUint8())
augmentors = [
Resize(),
imgaug.RandomOrderAug(
[imgaug.Brightness(30, clip=False),
imgaug.Contrast((0.8, 1.2), clip=False),
imgaug.Saturation(0.4, rgb=False),
# rgb-bgr conversion
imgaug.Lighting(0.1,
eigval=[0.2175, 0.0188, 0.0045][::-1],
eigvec=np.array(
[[-0.5675, 0.7192, 0.4009],
[-0.5808, -0.0045, -0.8140],
[-0.5836, -0.6948, 0.4203]],
dtype='float32')[::-1, ::-1]
)]),
imgaug.Clip(),
imgaug.Flip(horiz=True),
imgaug.ToUint8()
]
else:
augmentors = [
imgaug.ResizeShortestEdge(256),
imgaug.CenterCrop((224, 224)),
imgaug.ToUint8()
]
ds = AugmentImageComponent(ds, augmentors, copy=False)
if isTrain:
ds = PrefetchDataZMQ(ds, min(20, multiprocessing.cpu_count()))
......@@ -219,24 +110,6 @@ def get_config(fake=False, data_format='NCHW'):
)
def eval_on_ILSVRC12(model_file, data_dir):
ds = get_data('val')
pred_config = PredictConfig(
model=Model(),
session_init=get_model_loader(model_file),
input_names=['input', 'label'],
output_names=['wrong-top1', 'wrong-top5']
)
pred = SimpleDatasetPredictor(pred_config, ds)
acc1, acc5 = RatioCounter(), RatioCounter()
for o in pred.get_result():
batch_size = o[0].shape[0]
acc1.feed(o[0].sum(), batch_size)
acc5.feed(o[1].sum(), batch_size)
print("Top1 Error: {}".format(acc1.ratio))
print("Top5 Error: {}".format(acc5.ratio))
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--gpu', help='comma separated list of GPU(s) to use.', required=True)
......@@ -256,7 +129,8 @@ if __name__ == '__main__':
if args.eval:
BATCH_SIZE = 128 # something that can run on one gpu
eval_on_ILSVRC12(args.load, args.data)
ds = get_data('val')
eval_on_ILSVRC12(Model(), model_file, ds)
sys.exit()
NR_GPU = get_nr_gpu()
......
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# File: imagenet_resnet_utils.py
import numpy as np
import cv2
import tensorflow as tf
from tensorflow.contrib.layers import variance_scaling_initializer
from tensorpack import imgaug
from tensorpack.tfutils import argscope
from tensorpack.models import *
class GoogleNetResize(imgaug.ImageAugmentor):
"""
crop 8%~100% of the original image
See `Going Deeper with Convolutions` by Google.
"""
def _augment(self, img, _):
h, w = img.shape[:2]
area = h * w
for _ in range(10):
targetArea = self.rng.uniform(0.08, 1.0) * area
aspectR = self.rng.uniform(0.75, 1.333)
ww = int(np.sqrt(targetArea * aspectR))
hh = int(np.sqrt(targetArea / aspectR))
if self.rng.uniform() < 0.5:
ww, hh = hh, ww
if hh <= h and ww <= w:
x1 = 0 if w == ww else self.rng.randint(0, w - ww)
y1 = 0 if h == hh else self.rng.randint(0, h - hh)
out = img[y1:y1 + hh, x1:x1 + ww]
out = cv2.resize(out, (224, 224), interpolation=cv2.INTER_CUBIC)
return out
out = cv2.resize(img, (224, 224), interpolation=cv2.INTER_CUBIC)
return out
def fbresnet_augmentor(isTrain):
"""
Augmentor used in fb.resnet.torch, for BGR images.
"""
if isTrain:
augmentors = [
GoogleNetResize(),
imgaug.RandomOrderAug(
[imgaug.Brightness(30, clip=False),
imgaug.Contrast((0.8, 1.2), clip=False),
imgaug.Saturation(0.4, rgb=False),
# rgb-bgr conversion
imgaug.Lighting(0.1,
eigval=[0.2175, 0.0188, 0.0045][::-1],
eigvec=np.array(
[[-0.5675, 0.7192, 0.4009],
[-0.5808, -0.0045, -0.8140],
[-0.5836, -0.6948, 0.4203]],
dtype='float32')[::-1, ::-1]
)]),
imgaug.Clip(),
imgaug.Flip(horiz=True),
]
else:
augmentors = [
imgaug.ResizeShortestEdge(256),
imgaug.CenterCrop((224, 224)),
]
return augmentors
def resnet_shortcut(l, n_in, n_out, stride):
if n_in != n_out:
return Conv2D('convshortcut', l, n_out, 1, stride=stride)
else:
return l
def resnet_basicblock(l, ch_out, stride, preact):
ch_in = l.get_shape().as_list()[1]
if preact == 'both_preact':
l = BNReLU('preact', l)
input = l
elif preact == 'default':
input = l
l = BNReLU('preact', l)
else:
input = l
l = Conv2D('conv1', l, ch_out, 3, stride=stride, nl=BNReLU)
l = Conv2D('conv2', l, ch_out, 3)
return l + resnet_shortcut(input, ch_in, ch_out, stride)
def resnet_bottleneck(l, ch_out, stride, preact):
ch_in = l.get_shape().as_list()[1]
if preact == 'both_preact':
l = BNReLU('preact', l)
input = l
elif preact == 'default':
input = l
l = BNReLU('preact', l)
else:
input = l
l = Conv2D('conv1', l, ch_out, 1, nl=BNReLU)
l = Conv2D('conv2', l, ch_out, 3, stride=stride, nl=BNReLU)
l = Conv2D('conv3', l, ch_out * 4, 1)
return l + resnet_shortcut(input, ch_in, ch_out * 4, stride)
def resnet_group(l, name, block_func, features, count, stride, first=False):
with tf.variable_scope(name):
with tf.variable_scope('block0'):
l = block_func(l, features, stride,
'no_preact' if first else 'both_preact')
for i in range(1, count):
with tf.variable_scope('block{}'.format(i)):
l = block_func(l, features, 1, 'default')
return l
def resnet_backbone(image, num_blocks, block_func):
with argscope(Conv2D, nl=tf.identity, use_bias=False,
W_init=variance_scaling_initializer(mode='FAN_OUT')):
logits = (LinearWrap(image)
.Conv2D('conv0', 64, 7, stride=2, nl=BNReLU)
.MaxPooling('pool0', shape=3, stride=2, padding='SAME')
.apply(resnet_group, 'group0', block_func, 64, num_blocks[0], 1, first=True)
.apply(resnet_group, 'group1', block_func, 128, num_blocks[1], 2)
.apply(resnet_group, 'group2', block_func, 256, num_blocks[2], 2)
.apply(resnet_group, 'group3', block_func, 512, num_blocks[3], 2)
.BNReLU('bnlast')
.GlobalAvgPooling('gap')
.FullyConnected('linear', 1000, nl=tf.identity)())
return logits
def eval_on_ILSVRC12(model, model_file, dataflow):
pred_config = PredictConfig(
model=model,
session_init=get_model_loader(model_file),
input_names=['input', 'label'],
output_names=['wrong-top1', 'wrong-top5']
)
pred = SimpleDatasetPredictor(pred_config, dataflow)
acc1, acc5 = RatioCounter(), RatioCounter()
for o in pred.get_result():
batch_size = o[0].shape[0]
acc1.feed(o[0].sum(), batch_size)
acc5.feed(o[1].sum(), batch_size)
print("Top1 Error: {}".format(acc1.ratio))
print("Top5 Error: {}".format(acc5.ratio))
def image_preprocess(image, bgr=True):
if image.dtype.base_dtype != tf.float32:
image = tf.case(image, tf.float32)
image = image * (1.0 / 255)
mean = [0.485, 0.456, 0.406] # rgb
std = [0.229, 0.224, 0.225]
if bgr:
mean = mean[::-1]
std = std[::-1]
image_mean = tf.constant(mean, dtype=tf.float32)
image_std = tf.constant(std, dtype=tf.float32)
image = (image - image_mean) / image_std
return image
def compute_loss_and_error(logits, label):
loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label)
loss = tf.reduce_mean(loss, name='xentropy-loss')
def prediction_incorrect(logits, label, topk=1, name='incorrect_vector'):
return tf.cast(tf.logical_not(tf.nn.in_top_k(logits, label, topk)),
tf.float32, name=name)
wrong = prediction_incorrect(logits, label, 1, name='wrong-top1')
add_moving_summary(tf.reduce_mean(wrong, name='train-error-top1'))
wrong = prediction_incorrect(logits, label, 5, name='wrong-top5')
add_moving_summary(tf.reduce_mean(wrong, name='train-error-top5'))
return loss
......@@ -15,6 +15,11 @@ from tensorpack import *
from tensorpack.tfutils.symbolic_functions import *
from tensorpack.tfutils.summary import *
from imagenet_resnet_utils import (
fbresnet_augmentor, resnet_basicblock, resnet_bottleneck, resnet_group,
image_preprocess, compute_loss_and_error)
TOTAL_BATCH_SIZE = 256
INPUT_SHAPE = 224
DEPTH = None
......@@ -27,63 +32,14 @@ class Model(ModelDesc):
def _build_graph(self, inputs):
image, label = inputs
image = tf.cast(image, tf.float32) * (1.0 / 255)
image_mean = tf.constant([0.485, 0.456, 0.406], dtype=tf.float32)
image_std = tf.constant([0.229, 0.224, 0.225], dtype=tf.float32)
image = (image - image_mean) / image_std
image = image_preprocess(image, bgr=False)
image = tf.transpose(image, [0, 3, 1, 2])
def shortcut(l, n_in, n_out, stride):
if n_in != n_out:
return Conv2D('convshortcut', l, n_out, 1, stride=stride)
else:
return l
def basicblock(l, ch_out, stride, preact):
ch_in = l.get_shape().as_list()[1]
if preact == 'both_preact':
l = BNReLU('preact', l)
input = l
elif preact != 'no_preact':
input = l
l = BNReLU('preact', l)
else:
input = l
l = Conv2D('conv1', l, ch_out, 3, stride=stride, nl=BNReLU)
l = Conv2D('conv2', l, ch_out, 3)
return l + shortcut(input, ch_in, ch_out, stride)
def bottleneck(l, ch_out, stride, preact):
ch_in = l.get_shape().as_list()[1]
if preact == 'both_preact':
l = BNReLU('preact', l)
input = l
elif preact != 'no_preact':
input = l
l = BNReLU('preact', l)
else:
input = l
l = Conv2D('conv1', l, ch_out, 1, nl=BNReLU)
l = Conv2D('conv2', l, ch_out, 3, stride=stride, nl=BNReLU)
l = Conv2D('conv3', l, ch_out * 4, 1)
return l + shortcut(input, ch_in, ch_out * 4, stride)
def layer(l, layername, block_func, features, count, stride, first=False):
with tf.variable_scope(layername):
with tf.variable_scope('block0'):
l = block_func(l, features, stride,
'no_preact' if first else 'both_preact')
for i in range(1, count):
with tf.variable_scope('block{}'.format(i)):
l = block_func(l, features, 1, 'default')
return l
cfg = {
18: ([2, 2, 2, 2], basicblock),
34: ([3, 4, 6, 3], basicblock),
50: ([3, 4, 6, 3], bottleneck),
101: ([3, 4, 23, 3], bottleneck)
18: ([2, 2, 2, 2], resnet_basicblock),
34: ([3, 4, 6, 3], resnet_basicblock),
50: ([3, 4, 6, 3], resnet_bottleneck),
101: ([3, 4, 23, 3], resnet_bottleneck)
}
defs, block_func = cfg[DEPTH]
......@@ -93,25 +49,17 @@ class Model(ModelDesc):
convmaps = (LinearWrap(image)
.Conv2D('conv0', 64, 7, stride=2, nl=BNReLU)
.MaxPooling('pool0', shape=3, stride=2, padding='SAME')
.apply(layer, 'group0', block_func, 64, defs[0], 1, first=True)
.apply(layer, 'group1', block_func, 128, defs[1], 2)
.apply(layer, 'group2', block_func, 256, defs[2], 2)
.apply(layer, 'group3new', block_func, 512, defs[3], 1)
.apply(resnet_group, 'group0', block_func, 64, defs[0], 1, first=True)
.apply(resnet_group, 'group1', block_func, 128, defs[1], 2)
.apply(resnet_group, 'group2', block_func, 256, defs[2], 2)
.apply(resnet_group, 'group3new', block_func, 512, defs[3], 1)
.BNReLU('bnlast')())
print(convmaps)
logits = (LinearWrap(convmaps)
.GlobalAvgPooling('gap')
.FullyConnected('linearnew', 1000, nl=tf.identity)())
loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label)
loss = tf.reduce_mean(loss, name='xentropy-loss')
wrong = prediction_incorrect(logits, label, 1, name='wrong-top1')
add_moving_summary(tf.reduce_mean(wrong, name='train-error-top1'))
wrong = prediction_incorrect(logits, label, 5, name='wrong-top5')
add_moving_summary(tf.reduce_mean(wrong, name='train-error-top5'))
loss = compute_loss_and_error(logits, label)
wd_cost = regularize_cost('.*/W', l2_regularizer(1e-4), name='l2_regularize_loss')
add_moving_summary(loss, wd_cost)
self.cost = tf.add_n([loss, wd_cost], name='cost')
......@@ -131,52 +79,10 @@ def get_data(train_or_test):
datadir = args.data
ds = dataset.ILSVRC12(datadir, train_or_test,
shuffle=True if isTrain else False,
dir_structure='train')
if isTrain:
class Resize(imgaug.ImageAugmentor):
def _augment(self, img, _):
h, w = img.shape[:2]
area = h * w
for _ in range(10):
targetArea = self.rng.uniform(0.08, 1.0) * area
aspectR = self.rng.uniform(0.75, 1.333)
ww = int(np.sqrt(targetArea * aspectR))
hh = int(np.sqrt(targetArea / aspectR))
if self.rng.uniform() < 0.5:
ww, hh = hh, ww
if hh <= h and ww <= w:
x1 = 0 if w == ww else self.rng.randint(0, w - ww)
y1 = 0 if h == hh else self.rng.randint(0, h - hh)
out = img[y1:y1 + hh, x1:x1 + ww]
out = cv2.resize(out, (224, 224), interpolation=cv2.INTER_CUBIC)
return out
out = cv2.resize(img, (224, 224), interpolation=cv2.INTER_CUBIC)
return out
dir_structure='original')
augmentors = fbresnet_augmentor(isTrain)
augmentors.append(imgaug.ToUint8())
augmentors = [
Resize(),
imgaug.RandomOrderAug(
[imgaug.Brightness(30, clip=False),
imgaug.Contrast((0.8, 1.2), clip=False),
imgaug.Saturation(0.4, rgb=False),
imgaug.Lighting(0.1,
eigval=[0.2175, 0.0188, 0.0045][::-1],
eigvec=np.array(
[[-0.5675, 0.7192, 0.4009],
[-0.5808, -0.0045, -0.8140],
[-0.5836, -0.6948, 0.4203]],
dtype='float32')[::-1, ::-1]
)]),
imgaug.Clip(),
imgaug.Flip(horiz=True),
imgaug.ToUint8()
]
else:
augmentors = [
imgaug.ResizeShortestEdge(256),
imgaug.CenterCrop((224, 224)),
imgaug.ToUint8()
]
ds = AugmentImageComponent(ds, augmentors, copy=False)
if isTrain:
ds = PrefetchDataZMQ(ds, min(20, multiprocessing.cpu_count()))
......
../ResNet/imagenet_resnet_utils.py
\ No newline at end of file
......@@ -71,10 +71,12 @@ class RunUpdateOps(RunOp):
_chief_only = False
def __init__(self, collection=tf.GraphKeys.UPDATE_OPS):
name = 'UPDATE_OPS' if collection == tf.GraphKeys.UPDATE_OPS else collection
def f():
ops = tf.get_collection(collection)
if ops:
logger.info("Applying UPDATE_OPS collection of {} ops.".format(len(ops)))
logger.info("Applying collection {} of {} ops.".format(name, len(ops)))
return tf.group(*ops, name='update_ops')
else:
return tf.no_op(name='empty_update_ops')
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment