Move ImageSample to examples.

b96cb78c · Yuxin Wu · 3d2c2f6e · b96cb78c · b96cb78c · b96cb78c
Commit b96cb78c authored Jun 14, 2018 by Yuxin Wu
6 changed files
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -376,6 +376,7 @@ def autodoc_skip_member(app, what, name, obj, skip, options):
        'get_nr_gpu',

        # deprecated or renamed symbolic code
+        'ImageSample',
        'Deconv2D',
        'get_scalar_var', 'psnr',
        'prediction_incorrect', 'huber_loss',

--- a/docs/tutorial/trainer.md
+++ b/docs/tutorial/trainer.md
@@ -27,6 +27,8 @@ In particular, when working with the `ModelDesc` interface, the `build_graph` me
 The tower function needs to follow some conventions:

 1. __It might get called multiple times__ for data-parallel training or inference.
+   * Therefore, to use a tensorflow-hub module, you need to initialize the
+     module outside the tower function, and call the module inside the tower function.
 2. It has to respect variable collections:
   * (Required) Only put variables __trainable by gradient descent__ into `TRAINABLE_VARIABLES`.
   * (Recommended) Put non-trainable variables that need to be used in inference into `MODEL_VARIABLES`.
@@ -35,7 +37,8 @@ The tower function needs to follow some conventions:
     Don't depend on name_scope's name. Don't use variable_scope's name twice.
   * The creation of any trainable variables must respect __reuse__ variable scope.
     To respect variable reuse, use `tf.get_variable` instead of `tf.Variable` in the function.
-     On the other hand, for non-trainable variables, it's OK to use `tf.Variable` to force creation of new variables in each tower.
+     On the other hand, for non-trainable variables, it's OK to use
+     `tf.Variable` to ensure creation of new variables in each tower even when `reuse=True`.
 4. It will always be called under a `TowerContext`, which can be accessed by `get_current_tower_contxt()`.
   The context contains information about training/inference mode, reuse, etc.
     

--- a/examples/DeepQNetwork/expreplay.py
+++ b/examples/DeepQNetwork/expreplay.py
@@ -224,6 +224,14 @@ class ExpReplay(DataFlow, Callback):
        if sample[1] or sample[3]:
            view_state(sample[0])

+    def _process_batch(self, batch_exp):
+        state = np.asarray([e[0] for e in batch_exp], dtype='uint8')
+        reward = np.asarray([e[1] for e in batch_exp], dtype='float32')
+        action = np.asarray([e[2] for e in batch_exp], dtype='int8')
+        isOver = np.asarray([e[3] for e in batch_exp], dtype='bool')
+        return [state, action, reward, isOver]
+
+    # DataFlow method:
    def get_data(self):
        # wait for memory to be initialized
        self._init_memory_flag.wait()
@@ -238,13 +246,7 @@ class ExpReplay(DataFlow, Callback):
            yield self._process_batch(batch_exp)
            self._populate_job_queue.put(1)

-    def _process_batch(self, batch_exp):
-        state = np.asarray([e[0] for e in batch_exp], dtype='uint8')
-        reward = np.asarray([e[1] for e in batch_exp], dtype='float32')
-        action = np.asarray([e[2] for e in batch_exp], dtype='int8')
-        isOver = np.asarray([e[3] for e in batch_exp], dtype='bool')
-        return [state, action, reward, isOver]
-
+    # Callback methods:
    def _setup_graph(self):
        self.predictor = self.trainer.get_predictor(*self.predictor_io_names)

@@ -282,6 +284,3 @@ if __name__ == '__main__':
        import IPython as IP
        IP.embed(config=IP.terminal.ipapp.load_default_config())
        pass
-        # import IPython;
-        # IPython.embed(config=IPython.terminal.ipapp.load_default_config())
-        # break
--- a/examples/ImageNetModels/README.md
+++ b/examples/ImageNetModels/README.md
@@ -28,7 +28,7 @@ Evaluate the [pretrained model](http://models.tensorpack.com/ShuffleNet/):

 ### AlexNet

-This AlexNet script is quite close to the setting in its [original
+This AlexNet script is quite close to the settings in its [original
 paper](https://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks).
 Trained with 64x2 batch size, the script reaches 58% single-crop validation
 accuracy after 100 epochs (21 hours on 2 V100s).
@@ -50,6 +50,9 @@ See `./vgg16.py --help` for usage.
 |:------------------------------------------|---------------------|--------------------:|
 | 29~30% (large variation with random seed) | 28%                 |               27.6% |
 
+ Note that the purpose of this experiment in the paper is not to claim GroupNorm is better
+ than BatchNorm, therefore the training settings and hyperpameters have not been individually tuned for best accuracy.
+
 ### ResNet

 See [ResNet examples](../ResNet). It includes variants like pre-activation

--- a/examples/SpatialTransformer/mnist-addition.py
+++ b/examples/SpatialTransformer/mnist-addition.py
@@ -12,13 +12,95 @@ import argparse

 from tensorpack import *
 from tensorpack.dataflow import dataset
-from tensorpack.tfutils import sesscreate, optimizer, summary, gradproc
+from tensorpack.tfutils import optimizer, summary, gradproc

 IMAGE_SIZE = 42
 WARP_TARGET_SIZE = 28
 HALF_DIFF = (IMAGE_SIZE - WARP_TARGET_SIZE) // 2


+def sample(img, coords):
+    """
+    Args:
+        img: bxhxwxc
+        coords: bxh2xw2x2. each coordinate is (y, x) integer.
+            Out of boundary coordinates will be clipped.
+    Return:
+        bxh2xw2xc image
+    """
+    shape = img.get_shape().as_list()[1:]   # h, w, c
+    batch = tf.shape(img)[0]
+    shape2 = coords.get_shape().as_list()[1:3]  # h2, w2
+    assert None not in shape2, coords.get_shape()
+    max_coor = tf.constant([shape[0] - 1, shape[1] - 1], dtype=tf.float32)
+
+    coords = tf.clip_by_value(coords, 0., max_coor)  # borderMode==repeat
+    coords = tf.to_int32(coords)
+
+    batch_index = tf.range(batch, dtype=tf.int32)
+    batch_index = tf.reshape(batch_index, [-1, 1, 1, 1])
+    batch_index = tf.tile(batch_index, [1, shape2[0], shape2[1], 1])    # bxh2xw2x1
+    indices = tf.concat([batch_index, coords], axis=3)  # bxh2xw2x3
+    sampled = tf.gather_nd(img, indices)
+    return sampled
+
+
+@layer_register(log_shape=True)
+def BilinearSample(inputs, borderMode='repeat'):
+    """
+    Sample the images using the given coordinates, by bilinear interpolation.
+    This was described in the paper:
+    `Spatial Transformer Networks <http://arxiv.org/abs/1506.02025>`_.
+
+    Args:
+        inputs (list): [images, coords]. images has shape NHWC.
+            coords has shape (N, H', W', 2), where each pair in the last dimension is a (y, x) real-value
+            coordinate.
+        borderMode: either "repeat" or "constant" (zero-filled)
+
+    Returns:
+        tf.Tensor: a tensor named ``output`` of shape (N, H', W', C).
+    """
+    image, mapping = inputs
+    assert image.get_shape().ndims == 4 and mapping.get_shape().ndims == 4
+    assert mapping.dtype.is_floating, mapping
+    input_shape = image.get_shape().as_list()[1:]
+    assert None not in input_shape, \
+        "Images must have fully-defined shape"
+    assert borderMode in ['repeat', 'constant']
+
+    orig_mapping = mapping
+    mapping = tf.maximum(mapping, 0.0)
+    lcoor = tf.floor(mapping)
+    ucoor = lcoor + 1
+
+    diff = mapping - lcoor
+    neg_diff = 1.0 - diff  # bxh2xw2x2
+
+    lcoory, lcoorx = tf.split(lcoor, 2, 3)
+    ucoory, ucoorx = tf.split(ucoor, 2, 3)
+
+    lyux = tf.concat([lcoory, ucoorx], 3)
+    uylx = tf.concat([ucoory, lcoorx], 3)
+
+    diffy, diffx = tf.split(diff, 2, 3)
+    neg_diffy, neg_diffx = tf.split(neg_diff, 2, 3)
+
+    ret = tf.add_n([sample(image, lcoor) * neg_diffx * neg_diffy,
+                    sample(image, ucoor) * diffx * diffy,
+                    sample(image, lyux) * neg_diffy * diffx,
+                    sample(image, uylx) * diffy * neg_diffx], name='sampled')
+    if borderMode == 'constant':
+        max_coor = tf.constant([input_shape[0] - 1, input_shape[1] - 1], dtype=tf.float32)
+        mask = tf.greater_equal(orig_mapping, 0.0)
+        mask2 = tf.less_equal(orig_mapping, max_coor)
+        mask = tf.logical_and(mask, mask2)  # bxh2xw2x2
+        mask = tf.reduce_all(mask, [3])  # bxh2xw2 boolean
+        mask = tf.expand_dims(mask, 3)
+        ret = ret * tf.cast(mask, tf.float32)
+    return tf.identity(ret, name='output')
+
+
 class Model(ModelDesc):
    def inputs(self):
        return [tf.placeholder(tf.float32, (None, IMAGE_SIZE, IMAGE_SIZE, 2), 'input'),
@@ -47,7 +129,7 @@ class Model(ModelDesc):
            coor = tf.reshape(tf.matmul(xys, stn),
                              [WARP_TARGET_SIZE, WARP_TARGET_SIZE, -1, 2])
            coor = tf.transpose(coor, [2, 0, 1, 3], 'sampled_coords')  # b h w 2
-            sampled = ImageSample('warp', [image, coor], borderMode='constant')
+            sampled = BilinearSample('warp', [image, coor], borderMode='constant')
            return sampled

        with argscope([Conv2D, FullyConnected], activation=tf.nn.relu):
@@ -162,8 +244,6 @@ def get_config():
                            [ScalarStats('cost'), ClassificationError()]),
            ScheduledHyperParamSetter('learning_rate', [(200, 1e-4)])
        ],
-        session_creator=sesscreate.NewSessionCreator(
-            config=get_default_sess_config(0.5)),
        steps_per_epoch=steps_per_epoch,
        max_epoch=500,
    )

--- a/tensorpack/models/image_sample.py
+++ b/tensorpack/models/image_sample.py
@@ -4,6 +4,7 @@

 import tensorflow as tf

+from ..utils.develop import log_deprecated
 from .common import layer_register
 from ._test import TestModel

@@ -52,6 +53,7 @@ def ImageSample(inputs, borderMode='repeat'):
    Returns:
        tf.Tensor: a tensor named ``output`` of shape (N, H', W', C).
    """
+    log_deprecated("ImageSample", "Please implement it in your own code instead!", "2018-09-01")
    image, mapping = inputs
    assert image.get_shape().ndims == 4 and mapping.get_shape().ndims == 4
    input_shape = image.get_shape().as_list()[1:]