add symbolic imagenet

7a0b15d5 · Yuxin Wu · 121a5d32 · 7a0b15d5 · 7a0b15d5 · 7a0b15d5
Commit 7a0b15d5 authored Nov 30, 2018 by Yuxin Wu
3 changed files
--- a/examples/ImageNetModels/imagenet_utils.py
+++ b/examples/ImageNetModels/imagenet_utils.py
@@ -3,6 +3,7 @@


 import cv2
+import os
 import numpy as np
 import tqdm
 import multiprocessing
@@ -21,6 +22,11 @@ from tensorpack.tfutils.summary import add_moving_summary
 from tensorpack.utils import logger


+"""
+====== DataFlow =======
+"""
+
+
 class GoogleNetResize(imgaug.ImageAugmentor):
    """
    crop 8%~100% of the original image
@@ -88,17 +94,25 @@ def fbresnet_augmentor(isTrain):

 def get_imagenet_dataflow(
        datadir, name, batch_size,
-        augmentors, parallel=None):
+        augmentors=None, parallel=None):
    """
+    Args:
+        augmentors (list[imgaug.Augmentor]): Defaults to `fbresnet_augmentor(isTrain)`
+
+    Returns: A DataFlow which produces BGR images and labels.
+
    See explanations in the tutorial:
    http://tensorpack.readthedocs.io/en/latest/tutorial/efficient-dataflow.html
    """
    assert name in ['train', 'val', 'test']
+    isTrain = name == 'train'
    assert datadir is not None
+    if augmentors is None:
+        augmentors = fbresnet_augmentor(isTrain)
    assert isinstance(augmentors, list)
-    isTrain = name == 'train'
    if parallel is None:
        parallel = min(40, multiprocessing.cpu_count() // 2)  # assuming hyperthreading
+
    if isTrain:
        ds = dataset.ILSVRC12(datadir, name, shuffle=True)
        ds = AugmentImageComponent(ds, augmentors, copy=False)
@@ -121,6 +135,165 @@ def get_imagenet_dataflow(
    return ds


+"""
+====== tf.data =======
+"""
+
+
+def get_imagenet_tfdata(datadir, name, batch_size, mapper=None, parallel=None):
+    """
+    Args:
+        mapper: a symbolic function that takes a tf.string (the raw bytes read from file) and produces a BGR image.
+            Defaults to `fbresnet_mapper(isTrain)`.
+
+    Returns:
+        A `tf.data.Dataset`. If training, the dataset is infinite.
+        The dataset contains BGR images and labels.
+    """
+
+    def get_imglist(dir, name):
+        """
+        Returns:
+            [(full filename, label)]
+        """
+        dir = os.path.join(dir, name)
+        meta = dataset.ILSVRCMeta()
+        imglist = meta.get_image_list(
+            name,
+            dataset.ILSVRCMeta.guess_dir_structure(dir))
+
+        def _filter(fname):
+            # png
+            return 'n02105855_2933.JPEG' in fname
+
+        ret = []
+        for fname, label in imglist:
+            if _filter(fname):
+                logger.info("Image {} was filtered out.".format(fname))
+                continue
+            fname = os.path.join(dir, fname)
+            ret.append((fname, label))
+        return ret
+
+    assert name in ['train', 'val', 'test']
+    assert datadir is not None
+    isTrain = name == 'train'
+    if mapper is None:
+        mapper = fbresnet_mapper(isTrain)
+    if parallel is None:
+        parallel = min(40, multiprocessing.cpu_count() // 2)  # assuming hyperthreading
+    imglist = get_imglist(datadir, name)
+
+    N = len(imglist)
+    filenames = tf.constant([k[0] for k in imglist], name='filenames')
+    labels = tf.constant([k[1] for k in imglist], dtype=tf.int32, name='labels')
+
+    ds = tf.data.Dataset.from_tensor_slices((filenames, labels))
+
+    if isTrain:
+        ds = ds.shuffle(N, reshuffle_each_iteration=True).repeat()
+
+    ds = ds.apply(
+        tf.data.experimental.map_and_batch(
+            lambda fname, label: (mapper(tf.read_file(fname)), label),
+            batch_size=batch_size,
+            num_parallel_batches=parallel))
+    ds = ds.prefetch(100)
+    return ds
+
+
+def fbresnet_mapper(isTrain):
+    """
+    Note: compared to fbresnet_augmentor, it
+    lacks some photometric augmentation that may have a small effect on accuracy.
+    """
+    JPEG_OPT = {'fancy_upscaling': True, 'dct_method': 'INTEGER_ACCURATE'}
+
+    def uint8_resize_bicubic(image, shape):
+        ret = tf.image.resize_bicubic([image], shape)
+        return tf.cast(tf.clip_by_value(ret, 0, 255), tf.uint8)[0]
+
+    def resize_shortest_edge(image, image_shape, size):
+        shape = tf.cast(image_shape, tf.float32)
+        w_greater = tf.greater(image_shape[0], image_shape[1])
+        shape = tf.cond(w_greater,
+                        lambda: tf.cast([shape[0] / shape[1] * size, size], tf.int32),
+                        lambda: tf.cast([size, shape[1] / shape[0] * size], tf.int32))
+
+        return uint8_resize_bicubic(image, shape)
+
+    def center_crop(image, size):
+        image_height = tf.shape(image)[0]
+        image_width = tf.shape(image)[1]
+
+        offset_height = (image_height - size) // 2
+        offset_width = (image_width - size) // 2
+        image = tf.slice(image, [offset_height, offset_width, 0], [size, size, -1])
+        return image
+
+    def lighting(image, std, eigval, eigvec):
+        v = tf.random_uniform(shape=[3]) * std * eigval
+        inc = tf.matmul(eigvec, tf.reshape(v, [3, 1]))
+        image = tf.cast(tf.cast(image, tf.float32) + tf.reshape(inc, [3]), image.dtype)
+        return image
+
+    def validation_mapper(byte):
+        image = tf.image.decode_jpeg(
+            tf.reshape(byte, shape=[]), 3, **JPEG_OPT)
+        image = resize_shortest_edge(image, tf.shape(image), 256)
+        image = center_crop(image, 224)
+        image = tf.reverse(image, axis=[2])  # to BGR
+        return image
+
+    def training_mapper(byte):
+        jpeg_shape = tf.image.extract_jpeg_shape(byte)  # hwc
+        bbox_begin, bbox_size, distort_bbox = tf.image.sample_distorted_bounding_box(
+            jpeg_shape,
+            bounding_boxes=tf.zeros(shape=[0, 0, 4]),
+            min_object_covered=0,
+            aspect_ratio_range=[0.75, 1.33],
+            area_range=[0.08, 1.0],
+            max_attempts=10,
+            use_image_if_no_bounding_boxes=True)
+
+        is_bad = tf.reduce_sum(tf.cast(tf.equal(bbox_size, jpeg_shape), tf.int32)) >= 2
+
+        def good():
+            offset_y, offset_x, _ = tf.unstack(bbox_begin)
+            target_height, target_width, _ = tf.unstack(bbox_size)
+            crop_window = tf.stack([offset_y, offset_x, target_height, target_width])
+
+            image = tf.image.decode_and_crop_jpeg(
+                byte, crop_window, channels=3, **JPEG_OPT)
+            image = uint8_resize_bicubic(image, [224, 224])
+            return image
+
+        def bad():
+            image = tf.image.decode_jpeg(
+                tf.reshape(byte, shape=[]), 3, **JPEG_OPT)
+            image = resize_shortest_edge(image, jpeg_shape, 224)
+            image = center_crop(image, 224)
+            return image
+
+        image = tf.cond(is_bad, bad, good)
+        # TODO other imgproc
+        image = lighting(image, 0.1,
+                         eigval=np.array([0.2175, 0.0188, 0.0045], dtype='float32') * 255.0,
+                         eigvec=np.array([[-0.5675, 0.7192, 0.4009],
+                                          [-0.5808, -0.0045, -0.8140],
+                                          [-0.5836, -0.6948, 0.4203]], dtype='float32'))
+        image = tf.image.random_flip_left_right(image)
+        image = tf.reverse(image, axis=[2])  # to BGR
+        return image
+
+    return training_mapper if isTrain else validation_mapper
+
+
+"""
+====== Model & Evaluation =======
+"""
+
+
 def eval_on_ILSVRC12(model, sessinit, dataflow):
    pred_config = PredictConfig(
        model=model,
@@ -266,17 +439,30 @@ class ImageNetModel(ModelDesc):
 if __name__ == '__main__':
    import argparse
    from tensorpack.dataflow import TestDataSpeed
+    from tensorpack.tfutils import get_default_sess_config
    parser = argparse.ArgumentParser()
    parser.add_argument('--data', required=True)
    parser.add_argument('--batch', type=int, default=32)
    parser.add_argument('--aug', choices=['train', 'val'], default='val')
+    parser.add_argument('--symbolic', action='store_true')
    args = parser.parse_args()

-    if args.aug == 'val':
-        augs = fbresnet_augmentor(False)
-    elif args.aug == 'train':
-        augs = fbresnet_augmentor(True)
-    df = get_imagenet_dataflow(
-        args.data, 'train', args.batch, augs)
-    # For val augmentor, Should get >100 it/s (i.e. 3k im/s) here on a decent E5 server.
-    TestDataSpeed(df).start()
+    if not args.symbolic:
+        augs = fbresnet_augmentor(args.aug == 'train')
+        df = get_imagenet_dataflow(
+            args.data, 'train', args.batch, augs)
+        # For val augmentor, Should get >100 it/s (i.e. 3k im/s) here on a decent E5 server.
+        TestDataSpeed(df).start()
+    else:
+        assert args.aug == 'train'
+        data = get_imagenet_tfdata(args.data, 'train', args.batch)
+
+        itr = data.make_initializable_iterator()
+        dp = itr.get_next()
+        dpop = tf.group(*dp)
+        with tf.Session(config=get_default_sess_config()) as sess:
+            sess.run(itr.initializer)
+            for _ in tqdm.trange(200):
+                sess.run(dpop)
+            for _ in tqdm.trange(5000, smoothing=0.1):
+                sess.run(dpop)
--- a/examples/ResNet/README.md
+++ b/examples/ResNet/README.md
@@ -9,7 +9,7 @@ __Training__ code of three variants of ResNet on ImageNet:

 The training follows the __exact__ recipe used by the [Training ImageNet in 1 Hour paper](https://arxiv.org/abs/1706.02677)
 and gets the same performance.
-Models trained with 8 GPUs and a total batch size of 256 are listed in the table below.
+Distributed training code & results can be found at [tensorpack/benchmarks](https://github.com/tensorpack/benchmarks/tree/master/ResNet-Horovod).

 This recipe has better performance than most open source implementations.
 In fact, many papers that claim to "improve" ResNet by .5% only compete with a lower
@@ -24,9 +24,10 @@ baseline and they actually cannot beat this ResNet recipe.
 | ResNet101   | 6.04%       | 21.95%      | [:arrow_down:](http://models.tensorpack.com/ResNet/ImageNet-ResNet101.npz)   |
 | ResNet152   | 5.78%       | 21.51%      | [:arrow_down:](http://models.tensorpack.com/ResNet/ImageNet-ResNet152.npz)   |

-To train, first decompress ImageNet data into [this structure](http://tensorpack.readthedocs.io/en/latest/modules/dataflow.dataset.html#tensorpack.dataflow.dataset.ILSVRC12), then:
+To reproduce the above results,
+first decompress ImageNet data into [this structure](http://tensorpack.readthedocs.io/en/latest/modules/dataflow.dataset.html#tensorpack.dataflow.dataset.ILSVRC12), then:
 ```bash
-./imagenet-resnet.py --data /path/to/original/ILSVRC -d 50 [--mode resnet/preact/se]
+./imagenet-resnet.py --data /path/to/original/ILSVRC -d 50 [--mode resnet/preact/se] --batch 256
 # See ./imagenet-resnet.py -h for other options.
 ```


--- a/examples/ResNet/imagenet-resnet.py
+++ b/examples/ResNet/imagenet-resnet.py
@@ -5,7 +5,7 @@
 import argparse
 import os

-from tensorpack import logger, QueueInput
+from tensorpack import logger, QueueInput, TFDatasetInput
 from tensorpack.models import *
 from tensorpack.callbacks import *
 from tensorpack.train import (
@@ -15,8 +15,8 @@ from tensorpack.tfutils import argscope, get_model_loader
 from tensorpack.utils.gpu import get_num_gpu

 from imagenet_utils import (
-    fbresnet_augmentor, get_imagenet_dataflow, ImageNetModel,
-    eval_on_ILSVRC12)
+    get_imagenet_dataflow, get_imagenet_tfdata,
+    ImageNetModel, eval_on_ILSVRC12)
 from resnet_model import (
    preresnet_group, preresnet_basicblock, preresnet_bottleneck,
    resnet_group, resnet_basicblock, resnet_bottleneck, se_resnet_bottleneck,
@@ -49,14 +49,7 @@ class Model(ImageNetModel):
                preresnet_group if self.mode == 'preact' else resnet_group, self.block_func)


-def get_data(name, batch):
-    isTrain = name == 'train'
-    augmentors = fbresnet_augmentor(isTrain)
-    return get_imagenet_dataflow(
-        args.data, name, batch, augmentors)
-
-
-def get_config(model, fake=False):
+def get_config(model):
    nr_tower = max(get_num_gpu(), 1)
    assert args.batch % nr_tower == 0
    batch = args.batch // nr_tower
@@ -64,12 +57,15 @@ def get_config(model, fake=False):
    logger.info("Running on {} towers. Batch size per tower: {}".format(nr_tower, batch))
    if batch < 32 or batch > 64:
        logger.warn("Batch size per tower not in [32, 64]. This probably will lead to worse accuracy than reported.")
-    if fake:
+    if args.fake:
        data = QueueInput(FakeData(
            [[batch, 224, 224, 3], [batch]], 1000, random=False, dtype='uint8'))
        callbacks = []
    else:
-        data = QueueInput(get_data('train', batch))
+        if args.symbolic:
+            data = TFDatasetInput(get_imagenet_tfdata(args.data, 'train', batch))
+        else:
+            data = QueueInput(get_imagenet_dataflow(args.data, 'train', batch))

        START_LR = 0.1
        BASE_LR = START_LR * (args.batch / 256.0)
@@ -88,7 +84,7 @@ def get_config(model, fake=False):

        infs = [ClassificationError('wrong-top1', 'val-error-top1'),
                ClassificationError('wrong-top5', 'val-error-top5')]
-        dataset_val = get_data('val', batch)
+        dataset_val = get_imagenet_dataflow(args.data, 'val', batch)
        if nr_tower == 1:
            # single-GPU inference with queue prefetch
            callbacks.append(InferenceRunner(QueueInput(dataset_val), infs))
@@ -97,6 +93,9 @@ def get_config(model, fake=False):
            callbacks.append(DataParallelInferenceRunner(
                dataset_val, infs, list(range(nr_tower))))

+    if get_num_gpu() > 0:
+        callbacks.append(GPUUtilizationTracker())
+
    return TrainConfig(
        model=model,
        data=data,
@@ -112,6 +111,7 @@ if __name__ == '__main__':
    parser.add_argument('--data', help='ILSVRC dataset dir')
    parser.add_argument('--load', help='load a model for training or evaluation')
    parser.add_argument('--fake', help='use FakeData to debug or benchmark this model', action='store_true')
+    parser.add_argument('--symbolic', help='use symbolic data loader', action='store_true')
    parser.add_argument('--data-format', help='image data format',
                        default='NCHW', choices=['NCHW', 'NHWC'])
    parser.add_argument('-d', '--depth', help='ResNet depth',
@@ -139,9 +139,11 @@ if __name__ == '__main__':
            logger.set_logger_dir(os.path.join('train_log', 'tmp'), 'd')
        else:
            logger.set_logger_dir(
-                os.path.join('train_log', 'imagenet-{}-d{}-batch{}'.format(args.mode, args.depth, args.batch)))
+                os.path.join('train_log',
+                             'imagenet-{}-d{}-batch{}'.format(
+                                 args.mode, args.depth, args.batch)))

-        config = get_config(model, fake=args.fake)
+        config = get_config(model)
        if args.load:
            config.session_init = get_model_loader(args.load)
        trainer = SyncMultiGPUTrainerReplicated(max(get_num_gpu(), 1))