make augmentors type-safe

49b61fc0 · Yuxin Wu · 9b62b218 · 49b61fc0 · 49b61fc0 · 49b61fc0
Commit 49b61fc0 authored Feb 09, 2017 by Yuxin Wu
8 changed files
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -38,7 +38,6 @@ MOCK_MODULES = ['scipy',
                'sklearn', 'functools32']
 for mod_name in MOCK_MODULES:
    sys.modules[mod_name] = mock.Mock(name=mod_name)
-sys.modules['tensorflow'].__version__ = '0.12.0'

 import tensorpack

@@ -55,7 +54,7 @@ extensions = [
    'sphinx.ext.todo',
    'sphinx.ext.napoleon',
    #'sphinx.ext.autosectionlabel',
-    # 'sphinx.ext.coverage',
+    #'sphinx.ext.coverage',
    'sphinx.ext.mathjax',
    'sphinx.ext.intersphinx',
    'sphinx.ext.viewcode',
@@ -104,9 +103,9 @@ author = u'Yuxin Wu'
 # built documents.
 #
 # The short X.Y version.
-version = u'0.2'
+version = tensorpack.__version__
 # The full version, including alpha/beta/rc tags.
-release = u'0.2'
+release = version

 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.

--- a/docs/tutorial/efficient-data.md
+++ b/docs/tutorial/efficient-data.md
@@ -10,7 +10,7 @@ not necessarily the best for different scenarios.

 ### Use TensorFlow queues

-In general, ``feed_dict`` is slow and should never appear in your critical loop.
+In general, `feed_dict` is slow and should never appear in your critical loop.
 i.e., you should avoid loops like this:
 ```python
 while True:
@@ -31,10 +31,44 @@ while True:
  minimize_op.run()	 # minimize_op was built from dequeued tensors
 ```

-This is automatically handled by tensorpack trainers already (unless you used the demo ``SimpleTrainer``),
+This is now automatically handled by tensorpack trainers already (unless you used the demo ``SimpleTrainer``),
 see [Trainer](trainer.md) for details.
 TensorFlow is providing staging interface which may further improve the speed. This is
 [issue#140](https://github.com/ppwwyyxx/tensorpack/issues/140).

+You can also avoid `feed_dict` by using TensorFlow native operators to read data, which is also
+supported here.
+It probably allows you to reach the best performance, but at the cost of implementing the
+reading / preprocessing ops in C++ if there isn't one for your task. We won't talk about it here.
+
 ### Figure out your bottleneck

+For training we will only worry about the throughput but not the latency.
+Thread 1 & 2 runs in parallel, and the faster one will block to wait for the slower one.
+So the overall throughput will appear to be the slower one.
+
+There isn't a way to accurately benchmark the two threads while they are running, without introducing overhead. But
+there are ways to understand which one is the bottleneck:
+
+1. Use the average occupancy (size) of the queue. This information is summarized after every epoch (TODO depend on #125).
+	If the queue is nearly empty, then the data thread is the bottleneck.
+
+2. Benchmark them separately. You can use `TestDataSpeed` to benchmark a DataFlow, and
+	 use `FakeData` as a fast replacement in a dry run to benchmark the training
+	 iterations.
+
+### Load ImageNet efficiently
+
+We take ImageNet dataset as an example of how to optimize a DataFlow for speed.
+We use ILSVRC12 training set, which contains 1.28 million images.
+Following the [ResNet example](../examples/ResNet), our pre-processing need images in their original resolution, so we don't resize them.
+The average resolution is about 400x350 <sup>[[1]]</sup>.
+The original images (JPEG compressed) are 140G in total.
+
+
+
+[1]: #ref
+
+<div id=ref> </div>
+
+[[1]]. [ImageNet: A Large-Scale Hierarchical Image Database](http://www.image-net.org/papers/imagenet_cvpr09.pdf), CVPR09
--- a/setup.py
+++ b/setup.py
@@ -5,9 +5,7 @@ import shutil
 # setup metainfo
 CURRENT_DIR = os.path.dirname(__file__)
 libinfo_py = os.path.join(CURRENT_DIR, 'tensorpack/libinfo.py')
-libinfo = {'__file__': libinfo_py}
-exec(compile(open(libinfo_py, "rb").read(), libinfo_py, 'exec'), libinfo, libinfo)
-__version__ = libinfo['__version__']
+exec(open(libinfo_py, "rb").read())

 # produce rst readme for pypi
 try:

--- a/tensorpack/dataflow/common.py
+++ b/tensorpack/dataflow/common.py
@@ -18,7 +18,7 @@ __all__ = ['TestDataSpeed', 'PrintData', 'BatchData', 'BatchDataByShape', 'Fixed

 class TestDataSpeed(ProxyDataFlow):
    """ Test the speed of some DataFlow """
-    def __init__(self, ds, size=1000):
+    def __init__(self, ds, size=5000):
        """
        Args:
            ds (DataFlow): the DataFlow to test.
@@ -117,7 +117,7 @@ class BatchData(ProxyDataFlow):
                    tp = dt.dtype
                try:
                    result.append(
-                        np.array([x[k] for x in data_holder], dtype=tp))
+                        np.asarray([x[k] for x in data_holder], dtype=tp))
                except KeyboardInterrupt:
                    raise
                except:

--- a/tensorpack/dataflow/imgaug/base.py
+++ b/tensorpack/dataflow/imgaug/base.py
@@ -99,7 +99,6 @@ class AugmentorList(ImageAugmentor):

    def _augment_return_params(self, img):
        assert img.ndim in [2, 3], img.ndim
-        img = img.astype('float32')

        prms = []
        for a in self.augs:
@@ -109,7 +108,6 @@ class AugmentorList(ImageAugmentor):

    def _augment(self, img, param):
        assert img.ndim in [2, 3], img.ndim
-        img = img.astype('float32')
        for aug, prm in zip(self.augs, param):
            img = aug._augment(img, prm)
        return img

--- a/tensorpack/dataflow/imgaug/convert.py
+++ b/tensorpack/dataflow/imgaug/convert.py
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# File: convert.py
+
+from .base import ImageAugmentor
+from .meta import MapImage
+import numpy as np
+import cv2
+
+__all__ = ['ColorSpace', 'Grayscale', 'ToUint8', 'ToFloat32']
+
+
+class ColorSpace(ImageAugmentor):
+    """ Convert into another colorspace.  """
+    def __init__(self, mode=cv2.COLOR_BGR2GRAY, keepdims=True):
+        """
+        Args:
+            mode: opencv colorspace conversion code (e.g., `cv2.COLOR_BGR2HSV`)
+            keepdims (bool): keep the dimension of image unchanged if opencv
+                changes it.
+        """
+        self._init(locals())
+
+    def _augment(self, img, _):
+        transf = cv2.cvtColor(img, self.mode)
+        if self.keepdims:
+            if len(transf.shape) is not len(img.shape):
+                transf = transf[..., None]
+        return transf
+
+
+class Grayscale(ColorSpace):
+    """ Convert image to grayscale.  """
+
+    def __init__(self, keepdims=True, rgb=False):
+        """
+        Args:
+            keepdims (bool): return image of shape [H, W, 1] instead of [H, W]
+            rgb (bool): interpret input as RGB instead of the default BGR
+        """
+        mode = cv2.COLOR_RGB2GRAY if rgb else cv2.COLOR_BGR2GRAY
+        super(Grayscale, self).__init__(mode, keepdims)
+
+
+class ToUint8(MapImage):
+    """ Convert image to uint8. Useful to reduce communication overhead. """
+    def __init__(self):
+        super(ToUint8, self).__init__(lambda x: np.clip(x, 0, 255).astype(np.uint8))
+
+
+class ToFloat32(MapImage):
+    """ Convert image to float32, may increase quality of the augmentor. """
+    def __init__(self):
+        super(ToFloat32, self).__init__(lambda x: x.astype(np.float32))
--- a/tensorpack/dataflow/imgaug/imgproc.py
+++ b/tensorpack/dataflow/imgaug/imgproc.py
@@ -6,46 +6,10 @@ from .base import ImageAugmentor
 import numpy as np
 import cv2

-__all__ = ['ColorSpace', 'Hue', 'Grayscale', 'Brightness', 'Contrast', 'MeanVarianceNormalize',
+__all__ = ['Hue', 'Brightness', 'Contrast', 'MeanVarianceNormalize',
           'GaussianBlur', 'Gamma', 'Clip', 'Saturation', 'Lighting']


-class ColorSpace(ImageAugmentor):
-    """
-    Convert into another colorspace.
-    """
-    def __init__(self, mode=cv2.COLOR_BGR2GRAY, keepdims=True):
-        """
-        Args:
-            mode: opencv colorspace conversion code (e.g., `cv2.COLOR_BGR2HSV`)
-            keepdims (bool): keep the dimension of image unchanged if opencv
-                changes it.
-        """
-        self._init(locals())
-
-    def _augment(self, img, _):
-        transf = cv2.cvtColor(img, self.mode)
-        if self.keepdims:
-            if len(transf.shape) is not len(img.shape):
-                transf = transf[..., None]
-        return transf
-
-
-class Grayscale(ColorSpace):
-    """
-    Convert image to grayscale.
-    """
-
-    def __init__(self, keepdims=True, rgb=False):
-        """
-        Args:
-            keepdims (bool): return image of shape [H, W, 1] instead of [H, W]
-            rgb (bool): interpret input as RGB instead of the default BGR
-        """
-        mode = cv2.COLOR_RGB2GRAY if rgb else cv2.COLOR_BGR2GRAY
-        super(Grayscale, self).__init__(mode, keepdims)
-
-
 class Hue(ImageAugmentor):
    """ Randomly change color hue of a BGR input.
    """
@@ -85,10 +49,12 @@ class Brightness(ImageAugmentor):
        return v

    def _augment(self, img, v):
+        old_dtype = img.dtype
+        img = img.astype('float32')
        img += v
-        if self.clip:
+        if self.clip or old_dtype == np.uint8:
            img = np.clip(img, 0, 255)
-        return img
+        return img.asypte(old_dtype)


 class Contrast(ImageAugmentor):
@@ -109,18 +75,23 @@ class Contrast(ImageAugmentor):
        return self._rand_range(*self.factor_range)

    def _augment(self, img, r):
+        old_dtype = img.dtype
+        img = img.astype('float32')
        mean = np.mean(img, axis=(0, 1), keepdims=True)
        img = (img - mean) * r + mean
-        if self.clip:
+        if self.clip or old_dtype == np.uint8:
            img = np.clip(img, 0, 255)
-        return img
+        return img.astype(old_dtype)


 class MeanVarianceNormalize(ImageAugmentor):
    """
    Linearly scales the image to have zero mean and unit norm.
    ``x = (x - mean) / adjusted_stddev``
-    where ``adjusted_stddev = max(stddev, 1.0/sqrt(num_pixels * channels))``
+    where ``adjusted_stddev = max(stddev, 1.0/sqrt(num_pixels * channels))
+
+    This augmentor always returns float32 images.
+    ``
    """

    def __init__(self, all_channel=True):
@@ -131,6 +102,7 @@ class MeanVarianceNormalize(ImageAugmentor):
        self.all_channel = all_channel

    def _augment(self, img, _):
+        img = img.astype('float32')
        if self.all_channel:
            mean = np.mean(img)
            std = np.std(img)
@@ -178,9 +150,10 @@ class Gamma(ImageAugmentor):
        return self._rand_range(*self.range)

    def _augment(self, img, gamma):
+        old_dtype = img.dtype
        lut = ((np.arange(256, dtype='float32') / 255) ** (1. / (1. + gamma)) * 255).astype('uint8')
        img = np.clip(img, 0, 255).astype('uint8')
-        img = cv2.LUT(img, lut).astype('float32')
+        img = cv2.LUT(img, lut).astype(old_dtype)
        return img


@@ -218,8 +191,10 @@ class Saturation(ImageAugmentor):
        return 1 + self._rand_range(-self.alpha, self.alpha)

    def _augment(self, img, v):
+        old_dtype = img.dtype
        grey = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
-        return img * v + (grey * (1 - v))[:, :, np.newaxis]
+        ret = img * v + (grey * (1 - v))[:, :, np.newaxis]
+        return ret.astype(old_dtype)


 class Lighting(ImageAugmentor):
@@ -248,8 +223,9 @@ class Lighting(ImageAugmentor):
        return self.rng.randn(3) * self.std

    def _augment(self, img, v):
+        old_dtype = img.dtype
        v = v * self.eigval
        v = v.reshape((3, 1))
        inc = np.dot(self.eigvec, v).reshape((3,))
        img += inc
-        return img
+        return img.astype(old_dtype)
--- a/tensorpack/dataflow/imgaug/noise.py
+++ b/tensorpack/dataflow/imgaug/noise.py
@@ -26,7 +26,7 @@ class JpegNoise(ImageAugmentor):

    def _augment(self, img, q):
        enc = cv2.imencode('.jpg', img, [cv2.IMWRITE_JPEG_QUALITY, q])[1]
-        return cv2.imdecode(enc, 1)
+        return cv2.imdecode(enc, 1).astype(img.dtype)


 class GaussianNoise(ImageAugmentor):
@@ -46,10 +46,11 @@ class GaussianNoise(ImageAugmentor):
        return self.rng.randn(*img.shape)

    def _augment(self, img, noise):
+        old_dtype = img.dtype
        ret = img + noise * self.sigma
-        if self.clip:
+        if self.clip or old_dtype == np.uint8:
            ret = np.clip(ret, 0, 255)
-        return ret
+        return ret.astype(old_dtype)


 class SaltPepperNoise(ImageAugmentor):