Commit 7a0b15d5 authored by Yuxin Wu's avatar Yuxin Wu

add symbolic imagenet

parent 121a5d32
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
import cv2 import cv2
import os
import numpy as np import numpy as np
import tqdm import tqdm
import multiprocessing import multiprocessing
...@@ -21,6 +22,11 @@ from tensorpack.tfutils.summary import add_moving_summary ...@@ -21,6 +22,11 @@ from tensorpack.tfutils.summary import add_moving_summary
from tensorpack.utils import logger from tensorpack.utils import logger
"""
====== DataFlow =======
"""
class GoogleNetResize(imgaug.ImageAugmentor): class GoogleNetResize(imgaug.ImageAugmentor):
""" """
crop 8%~100% of the original image crop 8%~100% of the original image
...@@ -88,17 +94,25 @@ def fbresnet_augmentor(isTrain): ...@@ -88,17 +94,25 @@ def fbresnet_augmentor(isTrain):
def get_imagenet_dataflow( def get_imagenet_dataflow(
datadir, name, batch_size, datadir, name, batch_size,
augmentors, parallel=None): augmentors=None, parallel=None):
""" """
Args:
augmentors (list[imgaug.Augmentor]): Defaults to `fbresnet_augmentor(isTrain)`
Returns: A DataFlow which produces BGR images and labels.
See explanations in the tutorial: See explanations in the tutorial:
http://tensorpack.readthedocs.io/en/latest/tutorial/efficient-dataflow.html http://tensorpack.readthedocs.io/en/latest/tutorial/efficient-dataflow.html
""" """
assert name in ['train', 'val', 'test'] assert name in ['train', 'val', 'test']
isTrain = name == 'train'
assert datadir is not None assert datadir is not None
if augmentors is None:
augmentors = fbresnet_augmentor(isTrain)
assert isinstance(augmentors, list) assert isinstance(augmentors, list)
isTrain = name == 'train'
if parallel is None: if parallel is None:
parallel = min(40, multiprocessing.cpu_count() // 2) # assuming hyperthreading parallel = min(40, multiprocessing.cpu_count() // 2) # assuming hyperthreading
if isTrain: if isTrain:
ds = dataset.ILSVRC12(datadir, name, shuffle=True) ds = dataset.ILSVRC12(datadir, name, shuffle=True)
ds = AugmentImageComponent(ds, augmentors, copy=False) ds = AugmentImageComponent(ds, augmentors, copy=False)
...@@ -121,6 +135,165 @@ def get_imagenet_dataflow( ...@@ -121,6 +135,165 @@ def get_imagenet_dataflow(
return ds return ds
"""
====== tf.data =======
"""
def get_imagenet_tfdata(datadir, name, batch_size, mapper=None, parallel=None):
"""
Args:
mapper: a symbolic function that takes a tf.string (the raw bytes read from file) and produces a BGR image.
Defaults to `fbresnet_mapper(isTrain)`.
Returns:
A `tf.data.Dataset`. If training, the dataset is infinite.
The dataset contains BGR images and labels.
"""
def get_imglist(dir, name):
"""
Returns:
[(full filename, label)]
"""
dir = os.path.join(dir, name)
meta = dataset.ILSVRCMeta()
imglist = meta.get_image_list(
name,
dataset.ILSVRCMeta.guess_dir_structure(dir))
def _filter(fname):
# png
return 'n02105855_2933.JPEG' in fname
ret = []
for fname, label in imglist:
if _filter(fname):
logger.info("Image {} was filtered out.".format(fname))
continue
fname = os.path.join(dir, fname)
ret.append((fname, label))
return ret
assert name in ['train', 'val', 'test']
assert datadir is not None
isTrain = name == 'train'
if mapper is None:
mapper = fbresnet_mapper(isTrain)
if parallel is None:
parallel = min(40, multiprocessing.cpu_count() // 2) # assuming hyperthreading
imglist = get_imglist(datadir, name)
N = len(imglist)
filenames = tf.constant([k[0] for k in imglist], name='filenames')
labels = tf.constant([k[1] for k in imglist], dtype=tf.int32, name='labels')
ds = tf.data.Dataset.from_tensor_slices((filenames, labels))
if isTrain:
ds = ds.shuffle(N, reshuffle_each_iteration=True).repeat()
ds = ds.apply(
tf.data.experimental.map_and_batch(
lambda fname, label: (mapper(tf.read_file(fname)), label),
batch_size=batch_size,
num_parallel_batches=parallel))
ds = ds.prefetch(100)
return ds
def fbresnet_mapper(isTrain):
"""
Note: compared to fbresnet_augmentor, it
lacks some photometric augmentation that may have a small effect on accuracy.
"""
JPEG_OPT = {'fancy_upscaling': True, 'dct_method': 'INTEGER_ACCURATE'}
def uint8_resize_bicubic(image, shape):
ret = tf.image.resize_bicubic([image], shape)
return tf.cast(tf.clip_by_value(ret, 0, 255), tf.uint8)[0]
def resize_shortest_edge(image, image_shape, size):
shape = tf.cast(image_shape, tf.float32)
w_greater = tf.greater(image_shape[0], image_shape[1])
shape = tf.cond(w_greater,
lambda: tf.cast([shape[0] / shape[1] * size, size], tf.int32),
lambda: tf.cast([size, shape[1] / shape[0] * size], tf.int32))
return uint8_resize_bicubic(image, shape)
def center_crop(image, size):
image_height = tf.shape(image)[0]
image_width = tf.shape(image)[1]
offset_height = (image_height - size) // 2
offset_width = (image_width - size) // 2
image = tf.slice(image, [offset_height, offset_width, 0], [size, size, -1])
return image
def lighting(image, std, eigval, eigvec):
v = tf.random_uniform(shape=[3]) * std * eigval
inc = tf.matmul(eigvec, tf.reshape(v, [3, 1]))
image = tf.cast(tf.cast(image, tf.float32) + tf.reshape(inc, [3]), image.dtype)
return image
def validation_mapper(byte):
image = tf.image.decode_jpeg(
tf.reshape(byte, shape=[]), 3, **JPEG_OPT)
image = resize_shortest_edge(image, tf.shape(image), 256)
image = center_crop(image, 224)
image = tf.reverse(image, axis=[2]) # to BGR
return image
def training_mapper(byte):
jpeg_shape = tf.image.extract_jpeg_shape(byte) # hwc
bbox_begin, bbox_size, distort_bbox = tf.image.sample_distorted_bounding_box(
jpeg_shape,
bounding_boxes=tf.zeros(shape=[0, 0, 4]),
min_object_covered=0,
aspect_ratio_range=[0.75, 1.33],
area_range=[0.08, 1.0],
max_attempts=10,
use_image_if_no_bounding_boxes=True)
is_bad = tf.reduce_sum(tf.cast(tf.equal(bbox_size, jpeg_shape), tf.int32)) >= 2
def good():
offset_y, offset_x, _ = tf.unstack(bbox_begin)
target_height, target_width, _ = tf.unstack(bbox_size)
crop_window = tf.stack([offset_y, offset_x, target_height, target_width])
image = tf.image.decode_and_crop_jpeg(
byte, crop_window, channels=3, **JPEG_OPT)
image = uint8_resize_bicubic(image, [224, 224])
return image
def bad():
image = tf.image.decode_jpeg(
tf.reshape(byte, shape=[]), 3, **JPEG_OPT)
image = resize_shortest_edge(image, jpeg_shape, 224)
image = center_crop(image, 224)
return image
image = tf.cond(is_bad, bad, good)
# TODO other imgproc
image = lighting(image, 0.1,
eigval=np.array([0.2175, 0.0188, 0.0045], dtype='float32') * 255.0,
eigvec=np.array([[-0.5675, 0.7192, 0.4009],
[-0.5808, -0.0045, -0.8140],
[-0.5836, -0.6948, 0.4203]], dtype='float32'))
image = tf.image.random_flip_left_right(image)
image = tf.reverse(image, axis=[2]) # to BGR
return image
return training_mapper if isTrain else validation_mapper
"""
====== Model & Evaluation =======
"""
def eval_on_ILSVRC12(model, sessinit, dataflow): def eval_on_ILSVRC12(model, sessinit, dataflow):
pred_config = PredictConfig( pred_config = PredictConfig(
model=model, model=model,
...@@ -266,17 +439,30 @@ class ImageNetModel(ModelDesc): ...@@ -266,17 +439,30 @@ class ImageNetModel(ModelDesc):
if __name__ == '__main__': if __name__ == '__main__':
import argparse import argparse
from tensorpack.dataflow import TestDataSpeed from tensorpack.dataflow import TestDataSpeed
from tensorpack.tfutils import get_default_sess_config
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument('--data', required=True) parser.add_argument('--data', required=True)
parser.add_argument('--batch', type=int, default=32) parser.add_argument('--batch', type=int, default=32)
parser.add_argument('--aug', choices=['train', 'val'], default='val') parser.add_argument('--aug', choices=['train', 'val'], default='val')
parser.add_argument('--symbolic', action='store_true')
args = parser.parse_args() args = parser.parse_args()
if args.aug == 'val': if not args.symbolic:
augs = fbresnet_augmentor(False) augs = fbresnet_augmentor(args.aug == 'train')
elif args.aug == 'train':
augs = fbresnet_augmentor(True)
df = get_imagenet_dataflow( df = get_imagenet_dataflow(
args.data, 'train', args.batch, augs) args.data, 'train', args.batch, augs)
# For val augmentor, Should get >100 it/s (i.e. 3k im/s) here on a decent E5 server. # For val augmentor, Should get >100 it/s (i.e. 3k im/s) here on a decent E5 server.
TestDataSpeed(df).start() TestDataSpeed(df).start()
else:
assert args.aug == 'train'
data = get_imagenet_tfdata(args.data, 'train', args.batch)
itr = data.make_initializable_iterator()
dp = itr.get_next()
dpop = tf.group(*dp)
with tf.Session(config=get_default_sess_config()) as sess:
sess.run(itr.initializer)
for _ in tqdm.trange(200):
sess.run(dpop)
for _ in tqdm.trange(5000, smoothing=0.1):
sess.run(dpop)
...@@ -9,7 +9,7 @@ __Training__ code of three variants of ResNet on ImageNet: ...@@ -9,7 +9,7 @@ __Training__ code of three variants of ResNet on ImageNet:
The training follows the __exact__ recipe used by the [Training ImageNet in 1 Hour paper](https://arxiv.org/abs/1706.02677) The training follows the __exact__ recipe used by the [Training ImageNet in 1 Hour paper](https://arxiv.org/abs/1706.02677)
and gets the same performance. and gets the same performance.
Models trained with 8 GPUs and a total batch size of 256 are listed in the table below. Distributed training code & results can be found at [tensorpack/benchmarks](https://github.com/tensorpack/benchmarks/tree/master/ResNet-Horovod).
This recipe has better performance than most open source implementations. This recipe has better performance than most open source implementations.
In fact, many papers that claim to "improve" ResNet by .5% only compete with a lower In fact, many papers that claim to "improve" ResNet by .5% only compete with a lower
...@@ -24,9 +24,10 @@ baseline and they actually cannot beat this ResNet recipe. ...@@ -24,9 +24,10 @@ baseline and they actually cannot beat this ResNet recipe.
| ResNet101 | 6.04% | 21.95% | [:arrow_down:](http://models.tensorpack.com/ResNet/ImageNet-ResNet101.npz) | | ResNet101 | 6.04% | 21.95% | [:arrow_down:](http://models.tensorpack.com/ResNet/ImageNet-ResNet101.npz) |
| ResNet152 | 5.78% | 21.51% | [:arrow_down:](http://models.tensorpack.com/ResNet/ImageNet-ResNet152.npz) | | ResNet152 | 5.78% | 21.51% | [:arrow_down:](http://models.tensorpack.com/ResNet/ImageNet-ResNet152.npz) |
To train, first decompress ImageNet data into [this structure](http://tensorpack.readthedocs.io/en/latest/modules/dataflow.dataset.html#tensorpack.dataflow.dataset.ILSVRC12), then: To reproduce the above results,
first decompress ImageNet data into [this structure](http://tensorpack.readthedocs.io/en/latest/modules/dataflow.dataset.html#tensorpack.dataflow.dataset.ILSVRC12), then:
```bash ```bash
./imagenet-resnet.py --data /path/to/original/ILSVRC -d 50 [--mode resnet/preact/se] ./imagenet-resnet.py --data /path/to/original/ILSVRC -d 50 [--mode resnet/preact/se] --batch 256
# See ./imagenet-resnet.py -h for other options. # See ./imagenet-resnet.py -h for other options.
``` ```
......
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
import argparse import argparse
import os import os
from tensorpack import logger, QueueInput from tensorpack import logger, QueueInput, TFDatasetInput
from tensorpack.models import * from tensorpack.models import *
from tensorpack.callbacks import * from tensorpack.callbacks import *
from tensorpack.train import ( from tensorpack.train import (
...@@ -15,8 +15,8 @@ from tensorpack.tfutils import argscope, get_model_loader ...@@ -15,8 +15,8 @@ from tensorpack.tfutils import argscope, get_model_loader
from tensorpack.utils.gpu import get_num_gpu from tensorpack.utils.gpu import get_num_gpu
from imagenet_utils import ( from imagenet_utils import (
fbresnet_augmentor, get_imagenet_dataflow, ImageNetModel, get_imagenet_dataflow, get_imagenet_tfdata,
eval_on_ILSVRC12) ImageNetModel, eval_on_ILSVRC12)
from resnet_model import ( from resnet_model import (
preresnet_group, preresnet_basicblock, preresnet_bottleneck, preresnet_group, preresnet_basicblock, preresnet_bottleneck,
resnet_group, resnet_basicblock, resnet_bottleneck, se_resnet_bottleneck, resnet_group, resnet_basicblock, resnet_bottleneck, se_resnet_bottleneck,
...@@ -49,14 +49,7 @@ class Model(ImageNetModel): ...@@ -49,14 +49,7 @@ class Model(ImageNetModel):
preresnet_group if self.mode == 'preact' else resnet_group, self.block_func) preresnet_group if self.mode == 'preact' else resnet_group, self.block_func)
def get_data(name, batch): def get_config(model):
isTrain = name == 'train'
augmentors = fbresnet_augmentor(isTrain)
return get_imagenet_dataflow(
args.data, name, batch, augmentors)
def get_config(model, fake=False):
nr_tower = max(get_num_gpu(), 1) nr_tower = max(get_num_gpu(), 1)
assert args.batch % nr_tower == 0 assert args.batch % nr_tower == 0
batch = args.batch // nr_tower batch = args.batch // nr_tower
...@@ -64,12 +57,15 @@ def get_config(model, fake=False): ...@@ -64,12 +57,15 @@ def get_config(model, fake=False):
logger.info("Running on {} towers. Batch size per tower: {}".format(nr_tower, batch)) logger.info("Running on {} towers. Batch size per tower: {}".format(nr_tower, batch))
if batch < 32 or batch > 64: if batch < 32 or batch > 64:
logger.warn("Batch size per tower not in [32, 64]. This probably will lead to worse accuracy than reported.") logger.warn("Batch size per tower not in [32, 64]. This probably will lead to worse accuracy than reported.")
if fake: if args.fake:
data = QueueInput(FakeData( data = QueueInput(FakeData(
[[batch, 224, 224, 3], [batch]], 1000, random=False, dtype='uint8')) [[batch, 224, 224, 3], [batch]], 1000, random=False, dtype='uint8'))
callbacks = [] callbacks = []
else: else:
data = QueueInput(get_data('train', batch)) if args.symbolic:
data = TFDatasetInput(get_imagenet_tfdata(args.data, 'train', batch))
else:
data = QueueInput(get_imagenet_dataflow(args.data, 'train', batch))
START_LR = 0.1 START_LR = 0.1
BASE_LR = START_LR * (args.batch / 256.0) BASE_LR = START_LR * (args.batch / 256.0)
...@@ -88,7 +84,7 @@ def get_config(model, fake=False): ...@@ -88,7 +84,7 @@ def get_config(model, fake=False):
infs = [ClassificationError('wrong-top1', 'val-error-top1'), infs = [ClassificationError('wrong-top1', 'val-error-top1'),
ClassificationError('wrong-top5', 'val-error-top5')] ClassificationError('wrong-top5', 'val-error-top5')]
dataset_val = get_data('val', batch) dataset_val = get_imagenet_dataflow(args.data, 'val', batch)
if nr_tower == 1: if nr_tower == 1:
# single-GPU inference with queue prefetch # single-GPU inference with queue prefetch
callbacks.append(InferenceRunner(QueueInput(dataset_val), infs)) callbacks.append(InferenceRunner(QueueInput(dataset_val), infs))
...@@ -97,6 +93,9 @@ def get_config(model, fake=False): ...@@ -97,6 +93,9 @@ def get_config(model, fake=False):
callbacks.append(DataParallelInferenceRunner( callbacks.append(DataParallelInferenceRunner(
dataset_val, infs, list(range(nr_tower)))) dataset_val, infs, list(range(nr_tower))))
if get_num_gpu() > 0:
callbacks.append(GPUUtilizationTracker())
return TrainConfig( return TrainConfig(
model=model, model=model,
data=data, data=data,
...@@ -112,6 +111,7 @@ if __name__ == '__main__': ...@@ -112,6 +111,7 @@ if __name__ == '__main__':
parser.add_argument('--data', help='ILSVRC dataset dir') parser.add_argument('--data', help='ILSVRC dataset dir')
parser.add_argument('--load', help='load a model for training or evaluation') parser.add_argument('--load', help='load a model for training or evaluation')
parser.add_argument('--fake', help='use FakeData to debug or benchmark this model', action='store_true') parser.add_argument('--fake', help='use FakeData to debug or benchmark this model', action='store_true')
parser.add_argument('--symbolic', help='use symbolic data loader', action='store_true')
parser.add_argument('--data-format', help='image data format', parser.add_argument('--data-format', help='image data format',
default='NCHW', choices=['NCHW', 'NHWC']) default='NCHW', choices=['NCHW', 'NHWC'])
parser.add_argument('-d', '--depth', help='ResNet depth', parser.add_argument('-d', '--depth', help='ResNet depth',
...@@ -139,9 +139,11 @@ if __name__ == '__main__': ...@@ -139,9 +139,11 @@ if __name__ == '__main__':
logger.set_logger_dir(os.path.join('train_log', 'tmp'), 'd') logger.set_logger_dir(os.path.join('train_log', 'tmp'), 'd')
else: else:
logger.set_logger_dir( logger.set_logger_dir(
os.path.join('train_log', 'imagenet-{}-d{}-batch{}'.format(args.mode, args.depth, args.batch))) os.path.join('train_log',
'imagenet-{}-d{}-batch{}'.format(
args.mode, args.depth, args.batch)))
config = get_config(model, fake=args.fake) config = get_config(model)
if args.load: if args.load:
config.session_init = get_model_loader(args.load) config.session_init = get_model_loader(args.load)
trainer = SyncMultiGPUTrainerReplicated(max(get_num_gpu(), 1)) trainer = SyncMultiGPUTrainerReplicated(max(get_num_gpu(), 1))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment