more augmentor

2a444073 · Yuxin Wu · 0266827f · 2a444073 · 2a444073 · 2a444073
Commit 2a444073 authored Oct 06, 2016 by Yuxin Wu
8 changed files
--- a/examples/Inception/inception-bn.py
+++ b/examples/Inception/inception-bn.py
@@ -57,7 +57,7 @@ class Model(ModelDesc):
                outs.append(x4)
                return tf.concat(3, outs, name='concat')

-        with argscope(Conv2D, nl=BNReLU(), use_bias=False):
+        with argscope(Conv2D, nl=BNReLU, use_bias=False):
            l = Conv2D('conv0', image, 64, 7, stride=2)
            l = MaxPooling('pool0', l, 3, 2, padding='SAME')
            l = Conv2D('conv1', l, 64, 1)

--- a/examples/Inception/inceptionv3.py
+++ b/examples/Inception/inceptionv3.py
@@ -36,7 +36,7 @@ class Model(ModelDesc):

    def _build_graph(self, input_vars):
        image, label = input_vars
-        image = image / 128.0 - 1   # ?
+        image = image / 255.0   # ?

        def proj_kk(l, k, ch_r, ch, stride=1):
            l = Conv2D('conv{0}{0}r'.format(k), l, ch_r, 1)
@@ -70,8 +70,8 @@ class Model(ModelDesc):
                .Conv2D('conv277ba', ch_r, [7,1])
                .Conv2D('conv277bb', ch, [1,7])())

-        nl = BNReLU(decay=0.9997, epsilon=1e-3)
-        with argscope(Conv2D, nl=nl, use_bias=False):
+        with argscope(Conv2D, nl=BNReLU, use_bias=False),\
+                argscope(BatchNorm, decay=0.9997, epsilon=1e-3):
            l = (LinearWrap(image)
                .Conv2D('conv0', 32, 3, stride=2, padding='VALID') #299
                .Conv2D('conv1', 32, 3, padding='VALID') #149
@@ -269,8 +269,8 @@ def get_config():
        callbacks=Callbacks([
            StatPrinter(), ModelSaver(),
            InferenceRunner(dataset_val, [
-                ClassificationError('wrong-top1', 'val-top1-error'),
-                ClassificationError('wrong-top5', 'val-top5-error')]),
+                ClassificationError('wrong-top1', 'val-error-top1'),
+                ClassificationError('wrong-top5', 'val-error-top5')]),
            ScheduledHyperParamSetter('learning_rate',
                                      [(5, 0.03), (9, 0.01), (12, 0.006),
                                       (17, 0.003), (22, 1e-3), (36, 2e-4),

--- a/examples/cifar-convnet.py
+++ b/examples/cifar-convnet.py
@@ -41,7 +41,7 @@ class Model(ModelDesc):
            tf.image_summary("train_image", image, 10)

        image = image / 4.0     # just to make range smaller
-        with argscope(Conv2D, nl=BNReLU(), use_bias=False, kernel_shape=3):
+        with argscope(Conv2D, nl=BNReLU, use_bias=False, kernel_shape=3):
            logits = LinearWrap(image) \
                    .Conv2D('conv1.1', out_channel=64) \
                    .Conv2D('conv1.2', out_channel=64) \

--- a/tensorpack/dataflow/dataset/ilsvrc.py
+++ b/tensorpack/dataflow/dataset/ilsvrc.py
@@ -156,7 +156,7 @@ class ILSVRC12(RNGDataFlow):

    def get_data(self):
        """
-        Produce original images of shape [h, w, 3], and label,
+        Produce original images of shape [h, w, 3(BGR)], and label,
        and optionally a bbox of [xmin, ymin, xmax, ymax]
        """
        idxs = np.arange(len(self.imglist))

--- a/tensorpack/dataflow/imgaug/base.py
+++ b/tensorpack/dataflow/imgaug/base.py
@@ -6,10 +6,10 @@ from abc import abstractmethod, ABCMeta
 from ...utils import get_rng
 from six.moves import zip

-__all__ = ['ImageAugmentor', 'AugmentorList']
+__all__ = ['Augmentor', 'ImageAugmentor', 'AugmentorList']

-class ImageAugmentor(object):
-    """ Base class for an image augmentor"""
+class Augmentor(object):
+    """ Base class for an augmentor"""
    __metaclass__ = ABCMeta

    def __init__(self):
@@ -24,37 +24,32 @@ class ImageAugmentor(object):
    def reset_state(self):
        self.rng = get_rng(self)

-    def augment(self, img):
+    def augment(self, d):
        """
-        Perform augmentation on the image in-place.
-        :param img: an [h,w] or [h,w,c] image
-        :returns: the augmented image, always of type 'float32'
+        Perform augmentation on the data.
        """
-        img, params = self._augment_return_params(img)
-        return img
+        d, params = self._augment_return_params(d)
+        return d

-    def _augment_return_params(self, img):
+    def _augment_return_params(self, d):
        """
        Augment the image and return both image and params
        """
-        prms = self._get_augment_params(img)
-        return (self._augment(img, prms), prms)
+        prms = self._get_augment_params(d)
+        return (self._augment(d, prms), prms)

    @abstractmethod
-    def _augment(self, img, param):
+    def _augment(self, d, param):
        """
        augment with the given param and return the new image
        """

-    def _get_augment_params(self, img):
+    def _get_augment_params(self, d):
        """
        get the augmentor parameters
        """
        return None

-    def _fprop_coord(self, coord, param):
-        return coord
-
    def _rand_range(self, low=1.0, high=None, size=None):
        if high is None:
            low, high = 0, low
@@ -62,6 +57,19 @@ class ImageAugmentor(object):
            size = []
        return self.rng.uniform(low, high, size)

+class ImageAugmentor(Augmentor):
+    def augment(self, img):
+        """
+        Perform augmentation on the image in-place.
+        :param img: an [h,w] or [h,w,c] image
+        :returns: the augmented image, always of type 'float32'
+        """
+        img, params = self._augment_return_params(img)
+        return img
+
+    def _fprop_coord(self, coord, param):
+        return coord
+
 class AugmentorList(ImageAugmentor):
    """
    Augment by a list of augmentors

--- a/tensorpack/dataflow/imgaug/imgproc.py
+++ b/tensorpack/dataflow/imgaug/imgproc.py
@@ -7,7 +7,7 @@ import numpy as np
 import cv2

 __all__ = ['Brightness', 'Contrast', 'MeanVarianceNormalize', 'GaussianBlur',
-        'Gamma', 'Clip']
+        'Gamma', 'Clip', 'Saturation']

 class Brightness(ImageAugmentor):
    """
@@ -111,9 +111,22 @@ class Gamma(ImageAugmentor):

 class Clip(ImageAugmentor):
    def __init__(self, min=0, max=255):
-        assert delta > 0
        self._init(locals())

    def _augment(self, img, _):
        img = np.clip(img, self.min, self.max)
        return img
+
+class Saturation(ImageAugmentor):
+    """ Saturation, see 'fb.resnet.torch' https://github.com/facebook/fb.resnet.torch/blob/master/datasets/transforms.lua#L218"""
+    def __init__(self, alpha=0.4):
+        super(Saturation, self).__init__()
+        assert alpha < 1
+        self._init(locals())
+
+    def _get_augment_params(self, _):
+        return 1 + self._rand_range(-self.alpha, self.alpha)
+
+    def _augment(self, img, v):
+        grey = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+        return img * v + (grey * (1 - v))[:,:,np.newaxis]
--- a/tensorpack/dataflow/imgaug/meta.py
+++ b/tensorpack/dataflow/imgaug/meta.py
@@ -6,7 +6,8 @@

 from .base import ImageAugmentor

-__all__ = ['RandomChooseAug', 'MapImage', 'Identity', 'RandomApplyAug']
+__all__ = ['RandomChooseAug', 'MapImage', 'Identity', 'RandomApplyAug',
+        'RandomOrderAug']

 class Identity(ImageAugmentor):
    def _augment(self, img, _):
@@ -15,8 +16,8 @@ class Identity(ImageAugmentor):
 class RandomApplyAug(ImageAugmentor):
    """ Randomly apply the augmentor with a prob. Otherwise do nothing"""
    def __init__(self, aug, prob):
-        super(RandomApplyAug, self).__init__()
        self._init(locals())
+        super(RandomApplyAug, self).__init__()

    def _get_augment_params(self, img):
        p = self.rng.rand()
@@ -41,7 +42,6 @@ class RandomChooseAug(ImageAugmentor):
        """
        :param aug_lists: list of augmentor, or list of (augmentor, probability) tuple
        """
-        super(RandomChooseAug, self).__init__()
        if isinstance(aug_lists[0], (tuple, list)):
            prob = [k[1] for k in aug_lists]
            aug_lists = [k[0] for k in aug_lists]
@@ -49,6 +49,7 @@ class RandomChooseAug(ImageAugmentor):
        else:
            prob = 1.0 / len(aug_lists)
            self._init(locals())
+        super(RandomChooseAug, self).__init__()

    def reset_state(self):
        super(RandomChooseAug, self).reset_state()
@@ -64,6 +65,34 @@ class RandomChooseAug(ImageAugmentor):
        idx, prm = prm
        return self.aug_lists[idx]._augment(img, prm)

+class RandomOrderAug(ImageAugmentor):
+    def __init__(self, aug_lists):
+        """
+        Shuffle the augmentors into random order.
+        :param aug_lists: list of augmentor, or list of (augmentor, probability) tuple
+        """
+        self._init(locals())
+        super(RandomOrderAug, self).__init__()
+
+    def reset_state(self):
+        super(RandomOrderAug, self).reset_state()
+        for a in self.aug_lists:
+            a.reset_state()
+
+    def _get_augment_params(self, img):
+        # Note: If augmentors change the shape of image, get_augment_param might not work
+        # All augmentors should only rely on the shape of image
+        idxs = self.rng.permutation(len(self.aug_lists))
+        prms = [self.aug_lists[k]._get_augment_params(img)
+                for k in range(len(self.aug_lists))]
+        return idxs, prms
+
+    def _augment(self, img, prm):
+        idxs, prms = prm
+        for k in idxs:
+            img = self.aug_lists[k]._augment(img, prms[k])
+        return img
+
 class MapImage(ImageAugmentor):
    """
    Map the image array by a function.

--- a/tensorpack/models/nonlin.py
+++ b/tensorpack/models/nonlin.py
@@ -63,15 +63,7 @@ def LeakyReLU(x, alpha, name=None):
        name = 'output'
    return tf.mul(x, 0.5, name=name)

-# I'm not a layer, but I return a nonlinearity.
-def BNReLU(is_training=None, **kwargs):
-    """
-    :param is_traning: boolean
-    :param kwargs: args for BatchNorm
-    :returns: an activation function that performs BN + ReLU (a too common combination)
-    """
-    def BNReLU(x, name=None):
-        x = BatchNorm('bn', x, use_local_stat=is_training, **kwargs)
+def BNReLU(x, name=None):
+    x = BatchNorm('bn', x, use_local_stat=None)
    x = tf.nn.relu(x, name=name)
    return x
-    return BNReLU