tower_func option in InferenceRunner

d1cc5a4a · Yuxin Wu · 8f8fe80d · d1cc5a4a · d1cc5a4a · d1cc5a4a
Commit d1cc5a4a authored Sep 26, 2018 by Yuxin Wu
15 changed files
--- a/.lgtm.yml
+++ b/.lgtm.yml
+queries:
+  - exclude: py/unguarded-next-in-generator
+  - exclude: py/explicit-call-to-delete
+  - exclude: py/polluting-import
+  - exclude: py/import-and-import-from
+  - exclude: py/similar-function
+  - exclude: py/unused-local-variable
 extraction:
  python:
    prepare:

--- a/docs/tutorial/symbolic.md
+++ b/docs/tutorial/symbolic.md
@@ -81,14 +81,28 @@ with TowerContext('some_name_or_empty_string', is_training=False):
  # build the graph again
 ```
-### Use Other Symbolic Libraries within Tensorpack
+### Use Other Symbolic Libraries
-When defining the model you can construct the graph using whatever library you feel comfortable with.
+Tensorpack & `tf.layers` only provide a subset of most common models.
+However you can construct the graph using whatever library you feel comfortable with.
-Usually, slim/tflearn/tensorlayer are just symbolic function wrappers, calling them is nothing different
+Functions in slim/tflearn/tensorlayer are just symbolic function wrappers, calling them is nothing different
 from calling `tf.add`. You may need to be careful how regularizations/BN updates are supposed
 to be handled in those libraries, though.
 It is a bit different to use sonnet/Keras.
 sonnet/Keras manages the variable scope by their own model classes, and calling their symbolic functions
 always creates new variable scope. See the [Keras example](../examples/keras) for how to use it within tensorpack.
+```eval_rst
+.. note:: **It's best to not trust others' layers!**. 
+    For non-standard layers that's not included in TensorFlow or Tensorpack, it's best to implement them yourself.
+    Non-standard layers often do not have a mathematical definition that people
+    all agree on, and different people can implement it differently. 
+    Also, deep learning models on github often have bugs, especially when there is
+    no reproduced experiments with the code.
+    For your own good, it's best to implement the layers yourself.
+    This is also why Tensorpack does not contain non-standard layers.
+```
--- a/examples/FasterRCNN/train.py
+++ b/examples/FasterRCNN/train.py
@@ -102,6 +102,7 @@ class ResNetC4Model(DetectionModel):
        return ret
    def build_graph(self, *inputs):
+        # TODO need to make tensorpack handles dict better
        inputs = dict(zip(self.input_names, inputs))
        is_training = get_current_tower_context().is_training
        image = self.preprocess(inputs['image'])     # 1CHW

--- a/examples/OpticalFlow/flownet_models.py
+++ b/examples/OpticalFlow/flownet_models.py
@@ -85,9 +85,7 @@ def resample(img, flow):
    xf = xf + dx
    yf = yf + dy
-    alpha = tf.expand_dims(xf - tf.floor(xf), axis=0)
    alpha = tf.expand_dims(xf - tf.floor(xf), axis=-1)
-    beta = tf.expand_dims(yf - tf.floor(yf), axis=0)
    beta = tf.expand_dims(yf - tf.floor(yf), axis=-1)
    xL = tf.clip_by_value(tf.cast(tf.floor(xf), dtype=tf.int32), 0, w - 1)
@@ -406,7 +404,6 @@ class FlowNet2C(FlowNetBase):
            corr = tf.nn.leaky_relu(corr, 0.1)
            conv_redir = tf.layers.conv2d(conv3a, 32, kernel_size=1, strides=1, name='conv_redir')
-            x = tf.concat([conv_redir, corr], axis=1, name='concat_redir')
            in_conv3_1 = tf.concat([conv_redir, corr], axis=1, name='in_conv3_1')
            conv3_1 = tf.layers.conv2d(pad(in_conv3_1, 1), 256, name='conv3_1', strides=1)

--- a/tensorpack/callbacks/inference_runner.py
+++ b/tensorpack/callbacks/inference_runner.py
@@ -111,7 +111,7 @@ class InferenceRunner(InferenceRunnerBase):
    A callback that runs a list of :class:`Inferencer` on some :class:`InputSource`.
    """
-    def __init__(self, input, infs, tower_name='InferenceTower', device=0):
+    def __init__(self, input, infs, tower_name='InferenceTower', tower_func=None, device=0):
        """
        Args:
            input (InputSource or DataFlow): The :class:`InputSource` to run
@@ -119,6 +119,10 @@ class InferenceRunner(InferenceRunnerBase):
            infs (list): a list of :class:`Inferencer` instances.
            tower_name (str): the name scope of the tower to build. Need to set a
                different one if multiple InferenceRunner are used.
+            tower_func (tfutils.TowerFuncWrapper or None): the tower function to be used to build the graph.
+                By defaults to call `trainer.tower_func` under a `training=False` TowerContext,
+                but you can change it to a different tower function
+                if you need to inference with several different graphs.
            device (int): the device to use
        """
        if isinstance(input, DataFlow):
@@ -128,6 +132,7 @@ class InferenceRunner(InferenceRunnerBase):
        self._tower_name = tower_name
        self._device_id = device
        self._device = _device_from_int(device)
+        self._tower_func = tower_func
        super(InferenceRunner, self).__init__(input, infs)
    def _build_hook(self, inf):
@@ -136,9 +141,10 @@ class InferenceRunner(InferenceRunnerBase):
        return InferencerToHook(inf, fetches)
    def _setup_graph(self):
-        assert self.trainer.tower_func is not None, "You must set tower_func of the trainer to use InferenceRunner!"
+        if self._tower_func is None:
-        tower_func = self.trainer.tower_func
+            assert self.trainer.tower_func is not None, "You must set tower_func of the trainer to use InferenceRunner!"
-        input_callbacks = self._input_source.setup(tower_func.inputs_desc)
+            self._tower_func = self.trainer.tower_func
+        input_callbacks = self._input_source.setup(self._tower_func.inputs_desc)
        vs_name = self.trainer._vs_name_for_predictor(self._device_id)
        logger.info("[InferenceRunner] Building tower '{}' on device {} {}...".format(
@@ -147,8 +153,8 @@ class InferenceRunner(InferenceRunnerBase):
        with tf.variable_scope(tf.get_variable_scope(), reuse=True), \
                tf.device(self._device), \
                PredictTowerContext(self._tower_name, vs_name=vs_name):
-            tower_func(*self._input_source.get_input_tensors())
+            self._tower_func(*self._input_source.get_input_tensors())
-            self._tower_handle = tower_func.towers[-1]
+            self._tower_handle = self._tower_func.towers[-1]
        for h in [self._build_hook(inf) for inf in self.infs]:
            self.register_hook(h)
@@ -186,11 +192,17 @@ class DataParallelInferenceRunner(InferenceRunnerBase):
    It will run the remainder (when the total size of input is not a multiple of #GPU)
    sequentially.
    """
-    def __init__(self, input, infs, gpus, tower_name='InferenceTower'):
+    def __init__(self, input, infs, gpus, tower_name='InferenceTower', tower_func=None):
        """
        Args:
            input (DataFlow or QueueInput)
            gpus (int or list[int]): #gpus, or list of GPU id
+            tower_name (str): the name scope of the tower to build. Need to set a
+                different one if multiple InferenceRunner are used.
+            tower_func (tfutils.TowerFuncWrapper or None): the tower function to be used to build the graph.
+                By defaults to call `trainer.tower_func` under a `training=False` TowerContext,
+                but you can change it to a different tower function
+                if you need to inference with several different graphs.
        """
        if isinstance(gpus, int):
            gpus = list(range(gpus))
@@ -205,13 +217,15 @@ class DataParallelInferenceRunner(InferenceRunnerBase):
        self._hooks = []
        self._hooks_parallel = []
+        self._tower_func = tower_func
    def _setup_graph(self):
        self._handles = []
+        if self._tower_func is None:
+            assert self.trainer.tower_func is not None, "You must set tower_func of the trainer to use InferenceRunner!"
+            self._tower_func = self.trainer.tower_func
-        assert self.trainer.tower_func is not None, "You must set tower_func of the trainer to use InferenceRunner!"
+        input_callbacks = self._input_source.setup(self._tower_func.inputs_desc)
-        tower_func = self.trainer.tower_func
-        input_callbacks = self._input_source.setup(tower_func.inputs_desc)
        with tf.variable_scope(tf.get_variable_scope(), reuse=True):
            for idx, dev in enumerate(self._devices):
                vs_name = self.trainer._vs_name_for_predictor(idx)
@@ -221,8 +235,8 @@ class DataParallelInferenceRunner(InferenceRunnerBase):
                        self._tower_names[idx], dev,
                        "with variable scope '{}'".format(vs_name) if vs_name else ''))
                    # TODO log for tower creation, here or in tower.py?
-                    tower_func(*self._input_source.get_input_tensors())
+                    self._tower_func(*self._input_source.get_input_tensors())
-                    self._handles.append(tower_func.towers[-1])
+                    self._handles.append(self._tower_func.towers[-1])
        # setup callbacks and hooks
        self._input_callbacks = Callbacks(input_callbacks)

--- a/tensorpack/contrib/keras.py
+++ b/tensorpack/contrib/keras.py
@@ -209,7 +209,7 @@ class KerasModel(object):
                 input, trainer=None):
        """
        Args:
-            get_model (input1, input2, ... -> keras.model.Model):
+            get_model (input1, input2, ... -> keras.Model):
                Takes tensors and returns a Keras model. Will be part of the tower function.
            inputs_desc ([InputDesc]):
            targets_desc ([InputDesc]):

--- a/tensorpack/dataflow/dataset/cifar.py
+++ b/tensorpack/dataflow/dataset/cifar.py
@@ -52,7 +52,7 @@ def read_cifar(filenames, cifar_classnum):
        if cifar_classnum == 10:
            label = dic[b'labels']
            IMG_NUM = 10000  # cifar10 data are split into blocks of 10000
-        elif cifar_classnum == 100:
+        else:
            label = dic[b'fine_labels']
            IMG_NUM = 50000 if 'train' in fname else 10000
        fo.close()

--- a/tensorpack/dataflow/serialize.py
+++ b/tensorpack/dataflow/serialize.py
@@ -245,9 +245,10 @@ if __name__ == '__main__':
    print("Numpy Finished, ", idx)
    print(time.time())
-    HDF5Serializer.save(ds, 'out.h5')
+    paths = ['p1', 'p2']
+    HDF5Serializer.save(ds, 'out.h5', paths)
    print(time.time())
-    df = HDF5Serializer.load('out.h5')
+    df = HDF5Serializer.load('out.h5', paths)
    df.reset_state()
    for idx, dp in enumerate(df):
        pass

--- a/tensorpack/input_source/input_source_base.py
+++ b/tensorpack/input_source/input_source_base.py
@@ -21,10 +21,10 @@ def get_tensors_inputs(placeholders, tensors, names):
    Args:
        placeholders (list[Tensor]):
        tensors (list[Tensor]): list of tf.Tensor
-        names (list[str]): names matching the tensors
+        names (list[str]): names matching the given tensors
    Returns:
-        list[Tensor]: inputs to used with build_graph(),
+        list[Tensor]: inputs to used for the tower function,
            with the corresponding placeholders replaced by tensors.
    """
    assert len(tensors) == len(names), \
@@ -74,9 +74,10 @@ class InputSource(object):
    def get_input_tensors(self):
        """
        Returns:
-            list: A list of tensors corresponding to the inputs of the model,
+            list[Tensor]: A list of tensors corresponding to the inputs of the model.
-            used as input of :func:`build_graph`.
+                Will be used as input for the tower function.
-            For non-placeholder tensors, should always create and return new tensors when called.
+                This method should always create and return new tensors when called,
+                unless it returns placeholders.
        """
        return self._get_input_tensors()
@@ -204,8 +205,8 @@ class ProxyInputSource(InputSource):
 def remap_input_source(input, names):
    """
-    When you have some :class:`InputSource` which doesn't match the inputs in
+    When you have some :class:`InputSource` which doesn't match the inputs of
-    your :class:`ModelDesc`, use `RemapInputSource`.
+    your tower function, use `RemapInputSource`.
    It produces placeholders for all the inputs in your model,
    except that the corresponding ones are replaced with the tensor produced
    by the given :class:`InputSource`.

--- a/tensorpack/models/batch_norm.py
+++ b/tensorpack/models/batch_norm.py
@@ -141,12 +141,10 @@ def BatchNorm(inputs, axis=None, training=None, momentum=0.9, epsilon=1e-5,
    if axis is None:
        if ndims == 2:
-            data_format = 'NHWC'
            axis = 1
        else:
            axis = 1 if data_format == 'NCHW' else 3
-    else:
+    assert axis in [1, 3], axis
-        data_format = 'NCHW' if axis == 1 else 'NHWC'
    num_chan = shape[axis]
    # parse training/ctx

--- a/tensorpack/models/image_sample.py
+++ b/tensorpack/models/image_sample.py
@@ -3,6 +3,7 @@
 import tensorflow as tf
+import numpy as np
 from ..utils.develop import log_deprecated
 from .common import layer_register
@@ -102,7 +103,6 @@ def ImageSample(inputs, borderMode='repeat'):
 class TestSample(TestModel):
    def test_ImageSample(self):
-        import numpy as np
        h, w = 3, 4
        def np_sample(img, coords):
@@ -139,7 +139,6 @@ class TestSample(TestModel):
 if __name__ == '__main__':
    import cv2
-    import numpy as np
    im = cv2.imread('cat.jpg')
    im = im.reshape((1,) + im.shape).astype('float32')
    imv = tf.Variable(im)

--- a/tensorpack/models/regularize.py
+++ b/tensorpack/models/regularize.py
@@ -29,6 +29,8 @@ def regularize_cost(regex, func, name='regularize_cost'):
    the matched variables (only print once in multi-tower training).
    In replicated mode, it will only regularize variables within the current tower.
+    If called under a TowerContext with `is_training==False`, this function returns a zero constant tensor.
    Args:
        regex (str): a regex to match variable names, e.g. "conv.*/W"
        func: the regularization function, which takes a tensor and returns a scalar tensor.

--- a/tensorpack/train/interface.py
+++ b/tensorpack/train/interface.py
@@ -51,7 +51,7 @@ def apply_default_prefetch(input_source_or_dataflow, trainer):
 def launch_train_with_config(config, trainer):
    """
    Train with a :class:`TrainConfig` and a :class:`Trainer`, to
-    present a simple training interface. It basically does the following
+    present the simple and old training interface. It basically does the following
    3 things (and you can easily do them by yourself if you need more control):
    1. Setup the input with automatic prefetching heuristics,
@@ -76,12 +76,14 @@ def launch_train_with_config(config, trainer):
    assert config.dataflow is not None or config.data is not None
    model = config.model
-    inputs_desc = model.get_inputs_desc()
    input = config.data or config.dataflow
    input = apply_default_prefetch(input, trainer)
+    # This is the only place where the `ModelDesc` abstraction is useful.
+    # We should gradually stay away from this unuseful abstraction.
+    # TowerFuncWrapper is a better abstraction (similar to tf.defun in the future)
    trainer.setup_graph(
-        inputs_desc, input,
+        model.get_inputs_desc(), input,
        model._build_graph_get_cost, model.get_optimizer)
    _check_unused_regularization()
    trainer.train_with_defaults(

--- a/tensorpack/utils/develop.py
+++ b/tensorpack/utils/develop.py
@@ -33,7 +33,7 @@ def create_dummy_class(klass, dependency):
    class _DummyMetaClass(type):
        # throw error on class attribute access
        def __getattr__(_, __):
-            raise ImportError("Cannot import '{}', therefore '{}' is not available".format(dependency, klass))
+            raise AttributeError("Cannot import '{}', therefore '{}' is not available".format(dependency, klass))
    @six.add_metaclass(_DummyMetaClass)
    class _Dummy(object):

--- a/tests/run-tests.sh
+++ b/tests/run-tests.sh
-#!/bin/bash -e
+#!/bin/bash -ev
 # File: run-tests.sh
 DIR=$(dirname $0)