Commit 7a0b15d5 authored by Yuxin Wu's avatar Yuxin Wu

add symbolic imagenet

parent 121a5d32
......@@ -3,6 +3,7 @@
import cv2
import os
import numpy as np
import tqdm
import multiprocessing
......@@ -21,6 +22,11 @@ from tensorpack.tfutils.summary import add_moving_summary
from tensorpack.utils import logger
"""
====== DataFlow =======
"""
class GoogleNetResize(imgaug.ImageAugmentor):
"""
crop 8%~100% of the original image
......@@ -88,17 +94,25 @@ def fbresnet_augmentor(isTrain):
def get_imagenet_dataflow(
datadir, name, batch_size,
augmentors, parallel=None):
augmentors=None, parallel=None):
"""
Args:
augmentors (list[imgaug.Augmentor]): Defaults to `fbresnet_augmentor(isTrain)`
Returns: A DataFlow which produces BGR images and labels.
See explanations in the tutorial:
http://tensorpack.readthedocs.io/en/latest/tutorial/efficient-dataflow.html
"""
assert name in ['train', 'val', 'test']
isTrain = name == 'train'
assert datadir is not None
if augmentors is None:
augmentors = fbresnet_augmentor(isTrain)
assert isinstance(augmentors, list)
isTrain = name == 'train'
if parallel is None:
parallel = min(40, multiprocessing.cpu_count() // 2) # assuming hyperthreading
if isTrain:
ds = dataset.ILSVRC12(datadir, name, shuffle=True)
ds = AugmentImageComponent(ds, augmentors, copy=False)
......@@ -121,6 +135,165 @@ def get_imagenet_dataflow(
return ds
"""
====== tf.data =======
"""
def get_imagenet_tfdata(datadir, name, batch_size, mapper=None, parallel=None):
"""
Args:
mapper: a symbolic function that takes a tf.string (the raw bytes read from file) and produces a BGR image.
Defaults to `fbresnet_mapper(isTrain)`.
Returns:
A `tf.data.Dataset`. If training, the dataset is infinite.
The dataset contains BGR images and labels.
"""
def get_imglist(dir, name):
"""
Returns:
[(full filename, label)]
"""
dir = os.path.join(dir, name)
meta = dataset.ILSVRCMeta()
imglist = meta.get_image_list(
name,
dataset.ILSVRCMeta.guess_dir_structure(dir))
def _filter(fname):
# png
return 'n02105855_2933.JPEG' in fname
ret = []
for fname, label in imglist:
if _filter(fname):
logger.info("Image {} was filtered out.".format(fname))
continue
fname = os.path.join(dir, fname)
ret.append((fname, label))
return ret
assert name in ['train', 'val', 'test']
assert datadir is not None
isTrain = name == 'train'
if mapper is None:
mapper = fbresnet_mapper(isTrain)
if parallel is None:
parallel = min(40, multiprocessing.cpu_count() // 2) # assuming hyperthreading
imglist = get_imglist(datadir, name)
N = len(imglist)
filenames = tf.constant([k[0] for k in imglist], name='filenames')
labels = tf.constant([k[1] for k in imglist], dtype=tf.int32, name='labels')
ds = tf.data.Dataset.from_tensor_slices((filenames, labels))
if isTrain:
ds = ds.shuffle(N, reshuffle_each_iteration=True).repeat()
ds = ds.apply(
tf.data.experimental.map_and_batch(
lambda fname, label: (mapper(tf.read_file(fname)), label),
batch_size=batch_size,
num_parallel_batches=parallel))
ds = ds.prefetch(100)
return ds
def fbresnet_mapper(isTrain):
"""
Note: compared to fbresnet_augmentor, it
lacks some photometric augmentation that may have a small effect on accuracy.
"""
JPEG_OPT = {'fancy_upscaling': True, 'dct_method': 'INTEGER_ACCURATE'}
def uint8_resize_bicubic(image, shape):
ret = tf.image.resize_bicubic([image], shape)
return tf.cast(tf.clip_by_value(ret, 0, 255), tf.uint8)[0]
def resize_shortest_edge(image, image_shape, size):
shape = tf.cast(image_shape, tf.float32)
w_greater = tf.greater(image_shape[0], image_shape[1])
shape = tf.cond(w_greater,
lambda: tf.cast([shape[0] / shape[1] * size, size], tf.int32),
lambda: tf.cast([size, shape[1] / shape[0] * size], tf.int32))
return uint8_resize_bicubic(image, shape)
def center_crop(image, size):
image_height = tf.shape(image)[0]
image_width = tf.shape(image)[1]
offset_height = (image_height - size) // 2
offset_width = (image_width - size) // 2
image = tf.slice(image, [offset_height, offset_width, 0], [size, size, -1])
return image
def lighting(image, std, eigval, eigvec):
v = tf.random_uniform(shape=[3]) * std * eigval
inc = tf.matmul(eigvec, tf.reshape(v, [3, 1]))
image = tf.cast(tf.cast(image, tf.float32) + tf.reshape(inc, [3]), image.dtype)
return image
def validation_mapper(byte):
image = tf.image.decode_jpeg(
tf.reshape(byte, shape=[]), 3, **JPEG_OPT)
image = resize_shortest_edge(image, tf.shape(image), 256)
image = center_crop(image, 224)
image = tf.reverse(image, axis=[2]) # to BGR
return image
def training_mapper(byte):
jpeg_shape = tf.image.extract_jpeg_shape(byte) # hwc
bbox_begin, bbox_size, distort_bbox = tf.image.sample_distorted_bounding_box(
jpeg_shape,
bounding_boxes=tf.zeros(shape=[0, 0, 4]),
min_object_covered=0,
aspect_ratio_range=[0.75, 1.33],
area_range=[0.08, 1.0],
max_attempts=10,
use_image_if_no_bounding_boxes=True)
is_bad = tf.reduce_sum(tf.cast(tf.equal(bbox_size, jpeg_shape), tf.int32)) >= 2
def good():
offset_y, offset_x, _ = tf.unstack(bbox_begin)
target_height, target_width, _ = tf.unstack(bbox_size)
crop_window = tf.stack([offset_y, offset_x, target_height, target_width])
image = tf.image.decode_and_crop_jpeg(
byte, crop_window, channels=3, **JPEG_OPT)
image = uint8_resize_bicubic(image, [224, 224])
return image
def bad():
image = tf.image.decode_jpeg(
tf.reshape(byte, shape=[]), 3, **JPEG_OPT)
image = resize_shortest_edge(image, jpeg_shape, 224)
image = center_crop(image, 224)
return image
image = tf.cond(is_bad, bad, good)
# TODO other imgproc
image = lighting(image, 0.1,
eigval=np.array([0.2175, 0.0188, 0.0045], dtype='float32') * 255.0,
eigvec=np.array([[-0.5675, 0.7192, 0.4009],
[-0.5808, -0.0045, -0.8140],
[-0.5836, -0.6948, 0.4203]], dtype='float32'))
image = tf.image.random_flip_left_right(image)
image = tf.reverse(image, axis=[2]) # to BGR
return image
return training_mapper if isTrain else validation_mapper
"""
====== Model & Evaluation =======
"""
def eval_on_ILSVRC12(model, sessinit, dataflow):
pred_config = PredictConfig(
model=model,
......@@ -266,17 +439,30 @@ class ImageNetModel(ModelDesc):
if __name__ == '__main__':
import argparse
from tensorpack.dataflow import TestDataSpeed
from tensorpack.tfutils import get_default_sess_config
parser = argparse.ArgumentParser()
parser.add_argument('--data', required=True)
parser.add_argument('--batch', type=int, default=32)
parser.add_argument('--aug', choices=['train', 'val'], default='val')
parser.add_argument('--symbolic', action='store_true')
args = parser.parse_args()
if args.aug == 'val':
augs = fbresnet_augmentor(False)
elif args.aug == 'train':
augs = fbresnet_augmentor(True)
df = get_imagenet_dataflow(
args.data, 'train', args.batch, augs)
# For val augmentor, Should get >100 it/s (i.e. 3k im/s) here on a decent E5 server.
TestDataSpeed(df).start()
if not args.symbolic:
augs = fbresnet_augmentor(args.aug == 'train')
df = get_imagenet_dataflow(
args.data, 'train', args.batch, augs)
# For val augmentor, Should get >100 it/s (i.e. 3k im/s) here on a decent E5 server.
TestDataSpeed(df).start()
else:
assert args.aug == 'train'
data = get_imagenet_tfdata(args.data, 'train', args.batch)
itr = data.make_initializable_iterator()
dp = itr.get_next()
dpop = tf.group(*dp)
with tf.Session(config=get_default_sess_config()) as sess:
sess.run(itr.initializer)
for _ in tqdm.trange(200):
sess.run(dpop)
for _ in tqdm.trange(5000, smoothing=0.1):
sess.run(dpop)
......@@ -9,7 +9,7 @@ __Training__ code of three variants of ResNet on ImageNet:
The training follows the __exact__ recipe used by the [Training ImageNet in 1 Hour paper](https://arxiv.org/abs/1706.02677)
and gets the same performance.
Models trained with 8 GPUs and a total batch size of 256 are listed in the table below.
Distributed training code & results can be found at [tensorpack/benchmarks](https://github.com/tensorpack/benchmarks/tree/master/ResNet-Horovod).
This recipe has better performance than most open source implementations.
In fact, many papers that claim to "improve" ResNet by .5% only compete with a lower
......@@ -24,9 +24,10 @@ baseline and they actually cannot beat this ResNet recipe.
| ResNet101 | 6.04% | 21.95% | [:arrow_down:](http://models.tensorpack.com/ResNet/ImageNet-ResNet101.npz) |
| ResNet152 | 5.78% | 21.51% | [:arrow_down:](http://models.tensorpack.com/ResNet/ImageNet-ResNet152.npz) |
To train, first decompress ImageNet data into [this structure](http://tensorpack.readthedocs.io/en/latest/modules/dataflow.dataset.html#tensorpack.dataflow.dataset.ILSVRC12), then:
To reproduce the above results,
first decompress ImageNet data into [this structure](http://tensorpack.readthedocs.io/en/latest/modules/dataflow.dataset.html#tensorpack.dataflow.dataset.ILSVRC12), then:
```bash
./imagenet-resnet.py --data /path/to/original/ILSVRC -d 50 [--mode resnet/preact/se]
./imagenet-resnet.py --data /path/to/original/ILSVRC -d 50 [--mode resnet/preact/se] --batch 256
# See ./imagenet-resnet.py -h for other options.
```
......
......@@ -5,7 +5,7 @@
import argparse
import os
from tensorpack import logger, QueueInput
from tensorpack import logger, QueueInput, TFDatasetInput
from tensorpack.models import *
from tensorpack.callbacks import *
from tensorpack.train import (
......@@ -15,8 +15,8 @@ from tensorpack.tfutils import argscope, get_model_loader
from tensorpack.utils.gpu import get_num_gpu
from imagenet_utils import (
fbresnet_augmentor, get_imagenet_dataflow, ImageNetModel,
eval_on_ILSVRC12)
get_imagenet_dataflow, get_imagenet_tfdata,
ImageNetModel, eval_on_ILSVRC12)
from resnet_model import (
preresnet_group, preresnet_basicblock, preresnet_bottleneck,
resnet_group, resnet_basicblock, resnet_bottleneck, se_resnet_bottleneck,
......@@ -49,14 +49,7 @@ class Model(ImageNetModel):
preresnet_group if self.mode == 'preact' else resnet_group, self.block_func)
def get_data(name, batch):
isTrain = name == 'train'
augmentors = fbresnet_augmentor(isTrain)
return get_imagenet_dataflow(
args.data, name, batch, augmentors)
def get_config(model, fake=False):
def get_config(model):
nr_tower = max(get_num_gpu(), 1)
assert args.batch % nr_tower == 0
batch = args.batch // nr_tower
......@@ -64,12 +57,15 @@ def get_config(model, fake=False):
logger.info("Running on {} towers. Batch size per tower: {}".format(nr_tower, batch))
if batch < 32 or batch > 64:
logger.warn("Batch size per tower not in [32, 64]. This probably will lead to worse accuracy than reported.")
if fake:
if args.fake:
data = QueueInput(FakeData(
[[batch, 224, 224, 3], [batch]], 1000, random=False, dtype='uint8'))
callbacks = []
else:
data = QueueInput(get_data('train', batch))
if args.symbolic:
data = TFDatasetInput(get_imagenet_tfdata(args.data, 'train', batch))
else:
data = QueueInput(get_imagenet_dataflow(args.data, 'train', batch))
START_LR = 0.1
BASE_LR = START_LR * (args.batch / 256.0)
......@@ -88,7 +84,7 @@ def get_config(model, fake=False):
infs = [ClassificationError('wrong-top1', 'val-error-top1'),
ClassificationError('wrong-top5', 'val-error-top5')]
dataset_val = get_data('val', batch)
dataset_val = get_imagenet_dataflow(args.data, 'val', batch)
if nr_tower == 1:
# single-GPU inference with queue prefetch
callbacks.append(InferenceRunner(QueueInput(dataset_val), infs))
......@@ -97,6 +93,9 @@ def get_config(model, fake=False):
callbacks.append(DataParallelInferenceRunner(
dataset_val, infs, list(range(nr_tower))))
if get_num_gpu() > 0:
callbacks.append(GPUUtilizationTracker())
return TrainConfig(
model=model,
data=data,
......@@ -112,6 +111,7 @@ if __name__ == '__main__':
parser.add_argument('--data', help='ILSVRC dataset dir')
parser.add_argument('--load', help='load a model for training or evaluation')
parser.add_argument('--fake', help='use FakeData to debug or benchmark this model', action='store_true')
parser.add_argument('--symbolic', help='use symbolic data loader', action='store_true')
parser.add_argument('--data-format', help='image data format',
default='NCHW', choices=['NCHW', 'NHWC'])
parser.add_argument('-d', '--depth', help='ResNet depth',
......@@ -139,9 +139,11 @@ if __name__ == '__main__':
logger.set_logger_dir(os.path.join('train_log', 'tmp'), 'd')
else:
logger.set_logger_dir(
os.path.join('train_log', 'imagenet-{}-d{}-batch{}'.format(args.mode, args.depth, args.batch)))
os.path.join('train_log',
'imagenet-{}-d{}-batch{}'.format(
args.mode, args.depth, args.batch)))
config = get_config(model, fake=args.fake)
config = get_config(model)
if args.load:
config.session_init = get_model_loader(args.load)
trainer = SyncMultiGPUTrainerReplicated(max(get_num_gpu(), 1))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment