Deprecate "BilinearUpSample" and move it to examples.

01245d68 · Yuxin Wu · 843d44e9 · 01245d68 · 01245d68 · 01245d68
Commit 01245d68 authored Aug 24, 2018 by Yuxin Wu
5 changed files
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -374,6 +374,7 @@ _DEPRECATED_NAMES = set([
    # deprecated or renamed symbolic code
    'ImageSample',
+    'BilinearUpSample'
    'Deconv2D',
    'get_scalar_var', 'psnr',
    'prediction_incorrect', 'huber_loss',

--- a/examples/HED/hed.py
+++ b/examples/HED/hed.py
@@ -5,6 +5,7 @@
 import cv2
 import tensorflow as tf
+import numpy as np
 import argparse
 from six.moves import zip
 import os
@@ -43,6 +44,60 @@ def class_balanced_sigmoid_cross_entropy(logits, label, name='cross_entropy_loss
    return tf.where(zero, 0.0, cost, name=name)
+@layer_register(log_shape=True)
+def CaffeBilinearUpSample(x, shape):
+    """
+    Deterministic bilinearly-upsample the input images.
+    It is implemented by deconvolution with "BilinearFiller" in Caffe.
+    It is aimed to mimic caffe behavior.
+    Args:
+        x (tf.Tensor): a NHWC tensor
+        shape (int): the upsample factor
+    Returns:
+        tf.Tensor: a NHWC tensor.
+    """
+    inp_shape = x.shape.as_list()
+    ch = inp_shape[3]
+    assert ch is not None
+    shape = int(shape)
+    filter_shape = 2 * shape
+    def bilinear_conv_filler(s):
+        """
+        s: width, height of the conv filter
+        https://github.com/BVLC/caffe/blob/99bd99795dcdf0b1d3086a8d67ab1782a8a08383/include/caffe/filler.hpp#L219-L268
+        """
+        f = np.ceil(float(s) / 2)
+        c = float(2 * f - 1 - f % 2) / (2 * f)
+        ret = np.zeros((s, s), dtype='float32')
+        for x in range(s):
+            for y in range(s):
+                ret[x, y] = (1 - abs(x / f - c)) * (1 - abs(y / f - c))
+        return ret
+    w = bilinear_conv_filler(filter_shape)
+    w = np.repeat(w, ch * ch).reshape((filter_shape, filter_shape, ch, ch))
+    weight_var = tf.constant(w, tf.float32,
+                             shape=(filter_shape, filter_shape, ch, ch),
+                             name='bilinear_upsample_filter')
+    x = tf.pad(x, [[0, 0], [shape - 1, shape - 1], [shape - 1, shape - 1], [0, 0]], mode='SYMMETRIC')
+    out_shape = tf.shape(x) * tf.constant([1, shape, shape, 1], tf.int32)
+    deconv = tf.nn.conv2d_transpose(x, weight_var, out_shape,
+                                    [1, shape, shape, 1], 'SAME')
+    edge = shape * (shape - 1)
+    deconv = deconv[:, edge:-edge, edge:-edge, :]
+    if inp_shape[1]:
+        inp_shape[1] *= shape
+    if inp_shape[2]:
+        inp_shape[2] *= shape
+    deconv.set_shape(inp_shape)
+    return deconv
 class Model(ModelDesc):
    def inputs(self):
        return [tf.placeholder(tf.float32, [None, None, None, 3], 'image'),
@@ -58,7 +113,7 @@ class Model(ModelDesc):
                           use_bias=True,
                           kernel_initializer=tf.constant_initializer())
                while up != 1:
-                    l = BilinearUpSample('upsample{}'.format(up), l, 2)
+                    l = CaffeBilinearUpSample('upsample{}'.format(up), l, 2)
                    up = up / 2
                return l

--- a/examples/SpatialTransformer/mnist-addition.py
+++ b/examples/SpatialTransformer/mnist-addition.py
@@ -46,15 +46,21 @@ def sample(img, coords):
 @layer_register(log_shape=True)
-def BilinearSample(inputs, borderMode='repeat'):
+def GridSample(inputs, borderMode='repeat'):
    """
    Sample the images using the given coordinates, by bilinear interpolation.
    This was described in the paper:
    `Spatial Transformer Networks <http://arxiv.org/abs/1506.02025>`_.
+    This is equivalent to `torch.nn.functional.grid_sample`,
+    up to some non-trivial coordinate transformation.
+    This implementation returns pixel value at pixel (1, 1) for a floating point coordinate (1.0, 1.0).
+    Note that this may not be what you need.
    Args:
        inputs (list): [images, coords]. images has shape NHWC.
-            coords has shape (N, H', W', 2), where each pair in the last dimension is a (y, x) real-value
+            coords has shape (N, H', W', 2), where each pair of the last dimension is a (y, x) real-value
            coordinate.
        borderMode: either "repeat" or "constant" (zero-filled)
@@ -63,10 +69,9 @@ def BilinearSample(inputs, borderMode='repeat'):
    """
    image, mapping = inputs
    assert image.get_shape().ndims == 4 and mapping.get_shape().ndims == 4
-    assert mapping.dtype.is_floating, mapping
    input_shape = image.get_shape().as_list()[1:]
    assert None not in input_shape, \
-        "Images must have fully-defined shape"
+        "Images in GridSample layer must have fully-defined shape"
    assert borderMode in ['repeat', 'constant']
    orig_mapping = mapping
@@ -129,7 +134,7 @@ class Model(ModelDesc):
            coor = tf.reshape(tf.matmul(xys, stn),
                              [WARP_TARGET_SIZE, WARP_TARGET_SIZE, -1, 2])
            coor = tf.transpose(coor, [2, 0, 1, 3], 'sampled_coords')  # b h w 2
-            sampled = BilinearSample('warp', [image, coor], borderMode='constant')
+            sampled = GridSample('warp', [image, coor], borderMode='constant')
            return sampled
        with argscope([Conv2D, FullyConnected], activation=tf.nn.relu):

--- a/tensorpack/models/image_sample.py
+++ b/tensorpack/models/image_sample.py
@@ -44,6 +44,12 @@ def ImageSample(inputs, borderMode='repeat'):
    This was described in the paper:
    `Spatial Transformer Networks <http://arxiv.org/abs/1506.02025>`_.
+    This is equivalent to `torch.nn.functional.grid_sample`,
+    up to some non-trivial coordinate transformation.
+    This implementation returns pixel value at pixel (1, 1) for a floating point coordinate (1.0, 1.0).
+    Note that this may not be what you need.
    Args:
        inputs (list): [images, coords]. images has shape NHWC.
            coords has shape (N, H', W', 2), where each pair of the last dimension is a (y, x) real-value
@@ -53,7 +59,7 @@ def ImageSample(inputs, borderMode='repeat'):
    Returns:
        tf.Tensor: a tensor named ``output`` of shape (N, H', W', C).
    """
-    log_deprecated("ImageSample", "Please implement it in your own code instead!", "2018-09-01")
+    log_deprecated("ImageSample", "Please implement it in your own code instead!", "2018-12-01")
    image, mapping = inputs
    assert image.get_shape().ndims == 4 and mapping.get_shape().ndims == 4
    input_shape = image.get_shape().as_list()[1:]

--- a/tensorpack/models/pool.py
+++ b/tensorpack/models/pool.py
@@ -7,6 +7,7 @@ import numpy as np
 from .shape_utils import StaticDynamicShape
 from .common import layer_register
 from ..utils.argtools import shape2d, get_data_format
+from ..utils.develop import log_deprecated
 from ._test import TestModel
 from .tflayer import convert_to_tflayer_args
@@ -145,6 +146,8 @@ def FixedUnPooling(x, shape, unpool_mat=None, data_format='channels_last'):
 def BilinearUpSample(x, shape):
    """
    Deterministic bilinearly-upsample the input images.
+    It is implemented by deconvolution with "BilinearFiller" in Caffe.
+    It is aimed to mimic caffe behavior.
    Args:
        x (tf.Tensor): a NHWC tensor
@@ -153,6 +156,7 @@ def BilinearUpSample(x, shape):
    Returns:
        tf.Tensor: a NHWC tensor.
    """
+    log_deprecated("BilinearUpsample", "Please implement it in your own code instead!", "2019-03-01")
    inp_shape = x.shape.as_list()
    ch = inp_shape[3]
    assert ch is not None
@@ -163,7 +167,7 @@ def BilinearUpSample(x, shape):
    def bilinear_conv_filler(s):
        """
        s: width, height of the conv filter
-        See https://github.com/BVLC/caffe/blob/master/include%2Fcaffe%2Ffiller.hpp#L244
+        https://github.com/BVLC/caffe/blob/99bd99795dcdf0b1d3086a8d67ab1782a8a08383/include/caffe/filler.hpp#L219-L268
        """
        f = np.ceil(float(s) / 2)
        c = float(2 * f - 1 - f % 2) / (2 * f)