Commit d1cc5a4a authored by Yuxin Wu's avatar Yuxin Wu

tower_func option in InferenceRunner

parent 8f8fe80d
queries:
- exclude: py/unguarded-next-in-generator
- exclude: py/explicit-call-to-delete
- exclude: py/polluting-import
- exclude: py/import-and-import-from
- exclude: py/similar-function
- exclude: py/unused-local-variable
extraction: extraction:
python: python:
prepare: prepare:
......
...@@ -81,14 +81,28 @@ with TowerContext('some_name_or_empty_string', is_training=False): ...@@ -81,14 +81,28 @@ with TowerContext('some_name_or_empty_string', is_training=False):
# build the graph again # build the graph again
``` ```
### Use Other Symbolic Libraries within Tensorpack ### Use Other Symbolic Libraries
When defining the model you can construct the graph using whatever library you feel comfortable with. Tensorpack & `tf.layers` only provide a subset of most common models.
However you can construct the graph using whatever library you feel comfortable with.
Usually, slim/tflearn/tensorlayer are just symbolic function wrappers, calling them is nothing different Functions in slim/tflearn/tensorlayer are just symbolic function wrappers, calling them is nothing different
from calling `tf.add`. You may need to be careful how regularizations/BN updates are supposed from calling `tf.add`. You may need to be careful how regularizations/BN updates are supposed
to be handled in those libraries, though. to be handled in those libraries, though.
It is a bit different to use sonnet/Keras. It is a bit different to use sonnet/Keras.
sonnet/Keras manages the variable scope by their own model classes, and calling their symbolic functions sonnet/Keras manages the variable scope by their own model classes, and calling their symbolic functions
always creates new variable scope. See the [Keras example](../examples/keras) for how to use it within tensorpack. always creates new variable scope. See the [Keras example](../examples/keras) for how to use it within tensorpack.
```eval_rst
.. note:: **It's best to not trust others' layers!**.
For non-standard layers that's not included in TensorFlow or Tensorpack, it's best to implement them yourself.
Non-standard layers often do not have a mathematical definition that people
all agree on, and different people can implement it differently.
Also, deep learning models on github often have bugs, especially when there is
no reproduced experiments with the code.
For your own good, it's best to implement the layers yourself.
This is also why Tensorpack does not contain non-standard layers.
```
...@@ -102,6 +102,7 @@ class ResNetC4Model(DetectionModel): ...@@ -102,6 +102,7 @@ class ResNetC4Model(DetectionModel):
return ret return ret
def build_graph(self, *inputs): def build_graph(self, *inputs):
# TODO need to make tensorpack handles dict better
inputs = dict(zip(self.input_names, inputs)) inputs = dict(zip(self.input_names, inputs))
is_training = get_current_tower_context().is_training is_training = get_current_tower_context().is_training
image = self.preprocess(inputs['image']) # 1CHW image = self.preprocess(inputs['image']) # 1CHW
......
...@@ -85,9 +85,7 @@ def resample(img, flow): ...@@ -85,9 +85,7 @@ def resample(img, flow):
xf = xf + dx xf = xf + dx
yf = yf + dy yf = yf + dy
alpha = tf.expand_dims(xf - tf.floor(xf), axis=0)
alpha = tf.expand_dims(xf - tf.floor(xf), axis=-1) alpha = tf.expand_dims(xf - tf.floor(xf), axis=-1)
beta = tf.expand_dims(yf - tf.floor(yf), axis=0)
beta = tf.expand_dims(yf - tf.floor(yf), axis=-1) beta = tf.expand_dims(yf - tf.floor(yf), axis=-1)
xL = tf.clip_by_value(tf.cast(tf.floor(xf), dtype=tf.int32), 0, w - 1) xL = tf.clip_by_value(tf.cast(tf.floor(xf), dtype=tf.int32), 0, w - 1)
...@@ -406,7 +404,6 @@ class FlowNet2C(FlowNetBase): ...@@ -406,7 +404,6 @@ class FlowNet2C(FlowNetBase):
corr = tf.nn.leaky_relu(corr, 0.1) corr = tf.nn.leaky_relu(corr, 0.1)
conv_redir = tf.layers.conv2d(conv3a, 32, kernel_size=1, strides=1, name='conv_redir') conv_redir = tf.layers.conv2d(conv3a, 32, kernel_size=1, strides=1, name='conv_redir')
x = tf.concat([conv_redir, corr], axis=1, name='concat_redir')
in_conv3_1 = tf.concat([conv_redir, corr], axis=1, name='in_conv3_1') in_conv3_1 = tf.concat([conv_redir, corr], axis=1, name='in_conv3_1')
conv3_1 = tf.layers.conv2d(pad(in_conv3_1, 1), 256, name='conv3_1', strides=1) conv3_1 = tf.layers.conv2d(pad(in_conv3_1, 1), 256, name='conv3_1', strides=1)
......
...@@ -111,7 +111,7 @@ class InferenceRunner(InferenceRunnerBase): ...@@ -111,7 +111,7 @@ class InferenceRunner(InferenceRunnerBase):
A callback that runs a list of :class:`Inferencer` on some :class:`InputSource`. A callback that runs a list of :class:`Inferencer` on some :class:`InputSource`.
""" """
def __init__(self, input, infs, tower_name='InferenceTower', device=0): def __init__(self, input, infs, tower_name='InferenceTower', tower_func=None, device=0):
""" """
Args: Args:
input (InputSource or DataFlow): The :class:`InputSource` to run input (InputSource or DataFlow): The :class:`InputSource` to run
...@@ -119,6 +119,10 @@ class InferenceRunner(InferenceRunnerBase): ...@@ -119,6 +119,10 @@ class InferenceRunner(InferenceRunnerBase):
infs (list): a list of :class:`Inferencer` instances. infs (list): a list of :class:`Inferencer` instances.
tower_name (str): the name scope of the tower to build. Need to set a tower_name (str): the name scope of the tower to build. Need to set a
different one if multiple InferenceRunner are used. different one if multiple InferenceRunner are used.
tower_func (tfutils.TowerFuncWrapper or None): the tower function to be used to build the graph.
By defaults to call `trainer.tower_func` under a `training=False` TowerContext,
but you can change it to a different tower function
if you need to inference with several different graphs.
device (int): the device to use device (int): the device to use
""" """
if isinstance(input, DataFlow): if isinstance(input, DataFlow):
...@@ -128,6 +132,7 @@ class InferenceRunner(InferenceRunnerBase): ...@@ -128,6 +132,7 @@ class InferenceRunner(InferenceRunnerBase):
self._tower_name = tower_name self._tower_name = tower_name
self._device_id = device self._device_id = device
self._device = _device_from_int(device) self._device = _device_from_int(device)
self._tower_func = tower_func
super(InferenceRunner, self).__init__(input, infs) super(InferenceRunner, self).__init__(input, infs)
def _build_hook(self, inf): def _build_hook(self, inf):
...@@ -136,9 +141,10 @@ class InferenceRunner(InferenceRunnerBase): ...@@ -136,9 +141,10 @@ class InferenceRunner(InferenceRunnerBase):
return InferencerToHook(inf, fetches) return InferencerToHook(inf, fetches)
def _setup_graph(self): def _setup_graph(self):
assert self.trainer.tower_func is not None, "You must set tower_func of the trainer to use InferenceRunner!" if self._tower_func is None:
tower_func = self.trainer.tower_func assert self.trainer.tower_func is not None, "You must set tower_func of the trainer to use InferenceRunner!"
input_callbacks = self._input_source.setup(tower_func.inputs_desc) self._tower_func = self.trainer.tower_func
input_callbacks = self._input_source.setup(self._tower_func.inputs_desc)
vs_name = self.trainer._vs_name_for_predictor(self._device_id) vs_name = self.trainer._vs_name_for_predictor(self._device_id)
logger.info("[InferenceRunner] Building tower '{}' on device {} {}...".format( logger.info("[InferenceRunner] Building tower '{}' on device {} {}...".format(
...@@ -147,8 +153,8 @@ class InferenceRunner(InferenceRunnerBase): ...@@ -147,8 +153,8 @@ class InferenceRunner(InferenceRunnerBase):
with tf.variable_scope(tf.get_variable_scope(), reuse=True), \ with tf.variable_scope(tf.get_variable_scope(), reuse=True), \
tf.device(self._device), \ tf.device(self._device), \
PredictTowerContext(self._tower_name, vs_name=vs_name): PredictTowerContext(self._tower_name, vs_name=vs_name):
tower_func(*self._input_source.get_input_tensors()) self._tower_func(*self._input_source.get_input_tensors())
self._tower_handle = tower_func.towers[-1] self._tower_handle = self._tower_func.towers[-1]
for h in [self._build_hook(inf) for inf in self.infs]: for h in [self._build_hook(inf) for inf in self.infs]:
self.register_hook(h) self.register_hook(h)
...@@ -186,11 +192,17 @@ class DataParallelInferenceRunner(InferenceRunnerBase): ...@@ -186,11 +192,17 @@ class DataParallelInferenceRunner(InferenceRunnerBase):
It will run the remainder (when the total size of input is not a multiple of #GPU) It will run the remainder (when the total size of input is not a multiple of #GPU)
sequentially. sequentially.
""" """
def __init__(self, input, infs, gpus, tower_name='InferenceTower'): def __init__(self, input, infs, gpus, tower_name='InferenceTower', tower_func=None):
""" """
Args: Args:
input (DataFlow or QueueInput) input (DataFlow or QueueInput)
gpus (int or list[int]): #gpus, or list of GPU id gpus (int or list[int]): #gpus, or list of GPU id
tower_name (str): the name scope of the tower to build. Need to set a
different one if multiple InferenceRunner are used.
tower_func (tfutils.TowerFuncWrapper or None): the tower function to be used to build the graph.
By defaults to call `trainer.tower_func` under a `training=False` TowerContext,
but you can change it to a different tower function
if you need to inference with several different graphs.
""" """
if isinstance(gpus, int): if isinstance(gpus, int):
gpus = list(range(gpus)) gpus = list(range(gpus))
...@@ -205,13 +217,15 @@ class DataParallelInferenceRunner(InferenceRunnerBase): ...@@ -205,13 +217,15 @@ class DataParallelInferenceRunner(InferenceRunnerBase):
self._hooks = [] self._hooks = []
self._hooks_parallel = [] self._hooks_parallel = []
self._tower_func = tower_func
def _setup_graph(self): def _setup_graph(self):
self._handles = [] self._handles = []
if self._tower_func is None:
assert self.trainer.tower_func is not None, "You must set tower_func of the trainer to use InferenceRunner!"
self._tower_func = self.trainer.tower_func
assert self.trainer.tower_func is not None, "You must set tower_func of the trainer to use InferenceRunner!" input_callbacks = self._input_source.setup(self._tower_func.inputs_desc)
tower_func = self.trainer.tower_func
input_callbacks = self._input_source.setup(tower_func.inputs_desc)
with tf.variable_scope(tf.get_variable_scope(), reuse=True): with tf.variable_scope(tf.get_variable_scope(), reuse=True):
for idx, dev in enumerate(self._devices): for idx, dev in enumerate(self._devices):
vs_name = self.trainer._vs_name_for_predictor(idx) vs_name = self.trainer._vs_name_for_predictor(idx)
...@@ -221,8 +235,8 @@ class DataParallelInferenceRunner(InferenceRunnerBase): ...@@ -221,8 +235,8 @@ class DataParallelInferenceRunner(InferenceRunnerBase):
self._tower_names[idx], dev, self._tower_names[idx], dev,
"with variable scope '{}'".format(vs_name) if vs_name else '')) "with variable scope '{}'".format(vs_name) if vs_name else ''))
# TODO log for tower creation, here or in tower.py? # TODO log for tower creation, here or in tower.py?
tower_func(*self._input_source.get_input_tensors()) self._tower_func(*self._input_source.get_input_tensors())
self._handles.append(tower_func.towers[-1]) self._handles.append(self._tower_func.towers[-1])
# setup callbacks and hooks # setup callbacks and hooks
self._input_callbacks = Callbacks(input_callbacks) self._input_callbacks = Callbacks(input_callbacks)
......
...@@ -209,7 +209,7 @@ class KerasModel(object): ...@@ -209,7 +209,7 @@ class KerasModel(object):
input, trainer=None): input, trainer=None):
""" """
Args: Args:
get_model (input1, input2, ... -> keras.model.Model): get_model (input1, input2, ... -> keras.Model):
Takes tensors and returns a Keras model. Will be part of the tower function. Takes tensors and returns a Keras model. Will be part of the tower function.
inputs_desc ([InputDesc]): inputs_desc ([InputDesc]):
targets_desc ([InputDesc]): targets_desc ([InputDesc]):
......
...@@ -52,7 +52,7 @@ def read_cifar(filenames, cifar_classnum): ...@@ -52,7 +52,7 @@ def read_cifar(filenames, cifar_classnum):
if cifar_classnum == 10: if cifar_classnum == 10:
label = dic[b'labels'] label = dic[b'labels']
IMG_NUM = 10000 # cifar10 data are split into blocks of 10000 IMG_NUM = 10000 # cifar10 data are split into blocks of 10000
elif cifar_classnum == 100: else:
label = dic[b'fine_labels'] label = dic[b'fine_labels']
IMG_NUM = 50000 if 'train' in fname else 10000 IMG_NUM = 50000 if 'train' in fname else 10000
fo.close() fo.close()
......
...@@ -245,9 +245,10 @@ if __name__ == '__main__': ...@@ -245,9 +245,10 @@ if __name__ == '__main__':
print("Numpy Finished, ", idx) print("Numpy Finished, ", idx)
print(time.time()) print(time.time())
HDF5Serializer.save(ds, 'out.h5') paths = ['p1', 'p2']
HDF5Serializer.save(ds, 'out.h5', paths)
print(time.time()) print(time.time())
df = HDF5Serializer.load('out.h5') df = HDF5Serializer.load('out.h5', paths)
df.reset_state() df.reset_state()
for idx, dp in enumerate(df): for idx, dp in enumerate(df):
pass pass
......
...@@ -21,10 +21,10 @@ def get_tensors_inputs(placeholders, tensors, names): ...@@ -21,10 +21,10 @@ def get_tensors_inputs(placeholders, tensors, names):
Args: Args:
placeholders (list[Tensor]): placeholders (list[Tensor]):
tensors (list[Tensor]): list of tf.Tensor tensors (list[Tensor]): list of tf.Tensor
names (list[str]): names matching the tensors names (list[str]): names matching the given tensors
Returns: Returns:
list[Tensor]: inputs to used with build_graph(), list[Tensor]: inputs to used for the tower function,
with the corresponding placeholders replaced by tensors. with the corresponding placeholders replaced by tensors.
""" """
assert len(tensors) == len(names), \ assert len(tensors) == len(names), \
...@@ -74,9 +74,10 @@ class InputSource(object): ...@@ -74,9 +74,10 @@ class InputSource(object):
def get_input_tensors(self): def get_input_tensors(self):
""" """
Returns: Returns:
list: A list of tensors corresponding to the inputs of the model, list[Tensor]: A list of tensors corresponding to the inputs of the model.
used as input of :func:`build_graph`. Will be used as input for the tower function.
For non-placeholder tensors, should always create and return new tensors when called. This method should always create and return new tensors when called,
unless it returns placeholders.
""" """
return self._get_input_tensors() return self._get_input_tensors()
...@@ -204,8 +205,8 @@ class ProxyInputSource(InputSource): ...@@ -204,8 +205,8 @@ class ProxyInputSource(InputSource):
def remap_input_source(input, names): def remap_input_source(input, names):
""" """
When you have some :class:`InputSource` which doesn't match the inputs in When you have some :class:`InputSource` which doesn't match the inputs of
your :class:`ModelDesc`, use `RemapInputSource`. your tower function, use `RemapInputSource`.
It produces placeholders for all the inputs in your model, It produces placeholders for all the inputs in your model,
except that the corresponding ones are replaced with the tensor produced except that the corresponding ones are replaced with the tensor produced
by the given :class:`InputSource`. by the given :class:`InputSource`.
......
...@@ -141,12 +141,10 @@ def BatchNorm(inputs, axis=None, training=None, momentum=0.9, epsilon=1e-5, ...@@ -141,12 +141,10 @@ def BatchNorm(inputs, axis=None, training=None, momentum=0.9, epsilon=1e-5,
if axis is None: if axis is None:
if ndims == 2: if ndims == 2:
data_format = 'NHWC'
axis = 1 axis = 1
else: else:
axis = 1 if data_format == 'NCHW' else 3 axis = 1 if data_format == 'NCHW' else 3
else: assert axis in [1, 3], axis
data_format = 'NCHW' if axis == 1 else 'NHWC'
num_chan = shape[axis] num_chan = shape[axis]
# parse training/ctx # parse training/ctx
......
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
import tensorflow as tf import tensorflow as tf
import numpy as np
from ..utils.develop import log_deprecated from ..utils.develop import log_deprecated
from .common import layer_register from .common import layer_register
...@@ -102,7 +103,6 @@ def ImageSample(inputs, borderMode='repeat'): ...@@ -102,7 +103,6 @@ def ImageSample(inputs, borderMode='repeat'):
class TestSample(TestModel): class TestSample(TestModel):
def test_ImageSample(self): def test_ImageSample(self):
import numpy as np
h, w = 3, 4 h, w = 3, 4
def np_sample(img, coords): def np_sample(img, coords):
...@@ -139,7 +139,6 @@ class TestSample(TestModel): ...@@ -139,7 +139,6 @@ class TestSample(TestModel):
if __name__ == '__main__': if __name__ == '__main__':
import cv2 import cv2
import numpy as np
im = cv2.imread('cat.jpg') im = cv2.imread('cat.jpg')
im = im.reshape((1,) + im.shape).astype('float32') im = im.reshape((1,) + im.shape).astype('float32')
imv = tf.Variable(im) imv = tf.Variable(im)
......
...@@ -29,6 +29,8 @@ def regularize_cost(regex, func, name='regularize_cost'): ...@@ -29,6 +29,8 @@ def regularize_cost(regex, func, name='regularize_cost'):
the matched variables (only print once in multi-tower training). the matched variables (only print once in multi-tower training).
In replicated mode, it will only regularize variables within the current tower. In replicated mode, it will only regularize variables within the current tower.
If called under a TowerContext with `is_training==False`, this function returns a zero constant tensor.
Args: Args:
regex (str): a regex to match variable names, e.g. "conv.*/W" regex (str): a regex to match variable names, e.g. "conv.*/W"
func: the regularization function, which takes a tensor and returns a scalar tensor. func: the regularization function, which takes a tensor and returns a scalar tensor.
......
...@@ -51,7 +51,7 @@ def apply_default_prefetch(input_source_or_dataflow, trainer): ...@@ -51,7 +51,7 @@ def apply_default_prefetch(input_source_or_dataflow, trainer):
def launch_train_with_config(config, trainer): def launch_train_with_config(config, trainer):
""" """
Train with a :class:`TrainConfig` and a :class:`Trainer`, to Train with a :class:`TrainConfig` and a :class:`Trainer`, to
present a simple training interface. It basically does the following present the simple and old training interface. It basically does the following
3 things (and you can easily do them by yourself if you need more control): 3 things (and you can easily do them by yourself if you need more control):
1. Setup the input with automatic prefetching heuristics, 1. Setup the input with automatic prefetching heuristics,
...@@ -76,12 +76,14 @@ def launch_train_with_config(config, trainer): ...@@ -76,12 +76,14 @@ def launch_train_with_config(config, trainer):
assert config.dataflow is not None or config.data is not None assert config.dataflow is not None or config.data is not None
model = config.model model = config.model
inputs_desc = model.get_inputs_desc()
input = config.data or config.dataflow input = config.data or config.dataflow
input = apply_default_prefetch(input, trainer) input = apply_default_prefetch(input, trainer)
# This is the only place where the `ModelDesc` abstraction is useful.
# We should gradually stay away from this unuseful abstraction.
# TowerFuncWrapper is a better abstraction (similar to tf.defun in the future)
trainer.setup_graph( trainer.setup_graph(
inputs_desc, input, model.get_inputs_desc(), input,
model._build_graph_get_cost, model.get_optimizer) model._build_graph_get_cost, model.get_optimizer)
_check_unused_regularization() _check_unused_regularization()
trainer.train_with_defaults( trainer.train_with_defaults(
......
...@@ -33,7 +33,7 @@ def create_dummy_class(klass, dependency): ...@@ -33,7 +33,7 @@ def create_dummy_class(klass, dependency):
class _DummyMetaClass(type): class _DummyMetaClass(type):
# throw error on class attribute access # throw error on class attribute access
def __getattr__(_, __): def __getattr__(_, __):
raise ImportError("Cannot import '{}', therefore '{}' is not available".format(dependency, klass)) raise AttributeError("Cannot import '{}', therefore '{}' is not available".format(dependency, klass))
@six.add_metaclass(_DummyMetaClass) @six.add_metaclass(_DummyMetaClass)
class _Dummy(object): class _Dummy(object):
......
#!/bin/bash -e #!/bin/bash -ev
# File: run-tests.sh # File: run-tests.sh
DIR=$(dirname $0) DIR=$(dirname $0)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment