Commit b96cb78c authored by Yuxin Wu's avatar Yuxin Wu

Move ImageSample to examples.

parent 3d2c2f6e
......@@ -376,6 +376,7 @@ def autodoc_skip_member(app, what, name, obj, skip, options):
'get_nr_gpu',
# deprecated or renamed symbolic code
'ImageSample',
'Deconv2D',
'get_scalar_var', 'psnr',
'prediction_incorrect', 'huber_loss',
......
......@@ -27,6 +27,8 @@ In particular, when working with the `ModelDesc` interface, the `build_graph` me
The tower function needs to follow some conventions:
1. __It might get called multiple times__ for data-parallel training or inference.
* Therefore, to use a tensorflow-hub module, you need to initialize the
module outside the tower function, and call the module inside the tower function.
2. It has to respect variable collections:
* (Required) Only put variables __trainable by gradient descent__ into `TRAINABLE_VARIABLES`.
* (Recommended) Put non-trainable variables that need to be used in inference into `MODEL_VARIABLES`.
......@@ -35,7 +37,8 @@ The tower function needs to follow some conventions:
Don't depend on name_scope's name. Don't use variable_scope's name twice.
* The creation of any trainable variables must respect __reuse__ variable scope.
To respect variable reuse, use `tf.get_variable` instead of `tf.Variable` in the function.
On the other hand, for non-trainable variables, it's OK to use `tf.Variable` to force creation of new variables in each tower.
On the other hand, for non-trainable variables, it's OK to use
`tf.Variable` to ensure creation of new variables in each tower even when `reuse=True`.
4. It will always be called under a `TowerContext`, which can be accessed by `get_current_tower_contxt()`.
The context contains information about training/inference mode, reuse, etc.
......
......@@ -224,6 +224,14 @@ class ExpReplay(DataFlow, Callback):
if sample[1] or sample[3]:
view_state(sample[0])
def _process_batch(self, batch_exp):
state = np.asarray([e[0] for e in batch_exp], dtype='uint8')
reward = np.asarray([e[1] for e in batch_exp], dtype='float32')
action = np.asarray([e[2] for e in batch_exp], dtype='int8')
isOver = np.asarray([e[3] for e in batch_exp], dtype='bool')
return [state, action, reward, isOver]
# DataFlow method:
def get_data(self):
# wait for memory to be initialized
self._init_memory_flag.wait()
......@@ -238,13 +246,7 @@ class ExpReplay(DataFlow, Callback):
yield self._process_batch(batch_exp)
self._populate_job_queue.put(1)
def _process_batch(self, batch_exp):
state = np.asarray([e[0] for e in batch_exp], dtype='uint8')
reward = np.asarray([e[1] for e in batch_exp], dtype='float32')
action = np.asarray([e[2] for e in batch_exp], dtype='int8')
isOver = np.asarray([e[3] for e in batch_exp], dtype='bool')
return [state, action, reward, isOver]
# Callback methods:
def _setup_graph(self):
self.predictor = self.trainer.get_predictor(*self.predictor_io_names)
......@@ -282,6 +284,3 @@ if __name__ == '__main__':
import IPython as IP
IP.embed(config=IP.terminal.ipapp.load_default_config())
pass
# import IPython;
# IPython.embed(config=IPython.terminal.ipapp.load_default_config())
# break
......@@ -28,7 +28,7 @@ Evaluate the [pretrained model](http://models.tensorpack.com/ShuffleNet/):
### AlexNet
This AlexNet script is quite close to the setting in its [original
This AlexNet script is quite close to the settings in its [original
paper](https://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks).
Trained with 64x2 batch size, the script reaches 58% single-crop validation
accuracy after 100 epochs (21 hours on 2 V100s).
......@@ -50,6 +50,9 @@ See `./vgg16.py --help` for usage.
|:------------------------------------------|---------------------|--------------------:|
| 29~30% (large variation with random seed) | 28% | 27.6% |
Note that the purpose of this experiment in the paper is not to claim GroupNorm is better
than BatchNorm, therefore the training settings and hyperpameters have not been individually tuned for best accuracy.
### ResNet
See [ResNet examples](../ResNet). It includes variants like pre-activation
......
......@@ -12,13 +12,95 @@ import argparse
from tensorpack import *
from tensorpack.dataflow import dataset
from tensorpack.tfutils import sesscreate, optimizer, summary, gradproc
from tensorpack.tfutils import optimizer, summary, gradproc
IMAGE_SIZE = 42
WARP_TARGET_SIZE = 28
HALF_DIFF = (IMAGE_SIZE - WARP_TARGET_SIZE) // 2
def sample(img, coords):
"""
Args:
img: bxhxwxc
coords: bxh2xw2x2. each coordinate is (y, x) integer.
Out of boundary coordinates will be clipped.
Return:
bxh2xw2xc image
"""
shape = img.get_shape().as_list()[1:] # h, w, c
batch = tf.shape(img)[0]
shape2 = coords.get_shape().as_list()[1:3] # h2, w2
assert None not in shape2, coords.get_shape()
max_coor = tf.constant([shape[0] - 1, shape[1] - 1], dtype=tf.float32)
coords = tf.clip_by_value(coords, 0., max_coor) # borderMode==repeat
coords = tf.to_int32(coords)
batch_index = tf.range(batch, dtype=tf.int32)
batch_index = tf.reshape(batch_index, [-1, 1, 1, 1])
batch_index = tf.tile(batch_index, [1, shape2[0], shape2[1], 1]) # bxh2xw2x1
indices = tf.concat([batch_index, coords], axis=3) # bxh2xw2x3
sampled = tf.gather_nd(img, indices)
return sampled
@layer_register(log_shape=True)
def BilinearSample(inputs, borderMode='repeat'):
"""
Sample the images using the given coordinates, by bilinear interpolation.
This was described in the paper:
`Spatial Transformer Networks <http://arxiv.org/abs/1506.02025>`_.
Args:
inputs (list): [images, coords]. images has shape NHWC.
coords has shape (N, H', W', 2), where each pair in the last dimension is a (y, x) real-value
coordinate.
borderMode: either "repeat" or "constant" (zero-filled)
Returns:
tf.Tensor: a tensor named ``output`` of shape (N, H', W', C).
"""
image, mapping = inputs
assert image.get_shape().ndims == 4 and mapping.get_shape().ndims == 4
assert mapping.dtype.is_floating, mapping
input_shape = image.get_shape().as_list()[1:]
assert None not in input_shape, \
"Images must have fully-defined shape"
assert borderMode in ['repeat', 'constant']
orig_mapping = mapping
mapping = tf.maximum(mapping, 0.0)
lcoor = tf.floor(mapping)
ucoor = lcoor + 1
diff = mapping - lcoor
neg_diff = 1.0 - diff # bxh2xw2x2
lcoory, lcoorx = tf.split(lcoor, 2, 3)
ucoory, ucoorx = tf.split(ucoor, 2, 3)
lyux = tf.concat([lcoory, ucoorx], 3)
uylx = tf.concat([ucoory, lcoorx], 3)
diffy, diffx = tf.split(diff, 2, 3)
neg_diffy, neg_diffx = tf.split(neg_diff, 2, 3)
ret = tf.add_n([sample(image, lcoor) * neg_diffx * neg_diffy,
sample(image, ucoor) * diffx * diffy,
sample(image, lyux) * neg_diffy * diffx,
sample(image, uylx) * diffy * neg_diffx], name='sampled')
if borderMode == 'constant':
max_coor = tf.constant([input_shape[0] - 1, input_shape[1] - 1], dtype=tf.float32)
mask = tf.greater_equal(orig_mapping, 0.0)
mask2 = tf.less_equal(orig_mapping, max_coor)
mask = tf.logical_and(mask, mask2) # bxh2xw2x2
mask = tf.reduce_all(mask, [3]) # bxh2xw2 boolean
mask = tf.expand_dims(mask, 3)
ret = ret * tf.cast(mask, tf.float32)
return tf.identity(ret, name='output')
class Model(ModelDesc):
def inputs(self):
return [tf.placeholder(tf.float32, (None, IMAGE_SIZE, IMAGE_SIZE, 2), 'input'),
......@@ -47,7 +129,7 @@ class Model(ModelDesc):
coor = tf.reshape(tf.matmul(xys, stn),
[WARP_TARGET_SIZE, WARP_TARGET_SIZE, -1, 2])
coor = tf.transpose(coor, [2, 0, 1, 3], 'sampled_coords') # b h w 2
sampled = ImageSample('warp', [image, coor], borderMode='constant')
sampled = BilinearSample('warp', [image, coor], borderMode='constant')
return sampled
with argscope([Conv2D, FullyConnected], activation=tf.nn.relu):
......@@ -162,8 +244,6 @@ def get_config():
[ScalarStats('cost'), ClassificationError()]),
ScheduledHyperParamSetter('learning_rate', [(200, 1e-4)])
],
session_creator=sesscreate.NewSessionCreator(
config=get_default_sess_config(0.5)),
steps_per_epoch=steps_per_epoch,
max_epoch=500,
)
......
......@@ -4,6 +4,7 @@
import tensorflow as tf
from ..utils.develop import log_deprecated
from .common import layer_register
from ._test import TestModel
......@@ -52,6 +53,7 @@ def ImageSample(inputs, borderMode='repeat'):
Returns:
tf.Tensor: a tensor named ``output`` of shape (N, H', W', C).
"""
log_deprecated("ImageSample", "Please implement it in your own code instead!", "2018-09-01")
image, mapping = inputs
assert image.get_shape().ndims == 4 and mapping.get_shape().ndims == 4
input_shape = image.get_shape().as_list()[1:]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment