Do not apply REGULARIZATION_LOSSES when a cost is returned directly by build_graph. (#318)

8b7b3f3c · Yuxin Wu · ae9627cf · 8b7b3f3c · 8b7b3f3c · 8b7b3f3c
Commit 8b7b3f3c authored Mar 16, 2018 by Yuxin Wu
5 changed files
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -8,8 +8,8 @@ so you won't need to look at here very often.
 Here are a list of things that were changed, starting from an early version.
 TensorFlow itself also changed APIs before 1.0 and those are not listed here.
-+ [2018/03/12] `JSONWriter` used a different file name, and will not automatically restore epoch number.
+ [2018/03/12] `JSONWriter` use a different file name, and will not automatically restore epoch number.
-	`AutoResumeTrainConfig` was added to support resuming better.
+	`AutoResumeTrainConfig` was added to support resuming.
 + [2017/10/21]
 	tensorpack is gradually switching to a new Trainer API.
 	The old API will keep working for a while. See [issue](https://github.com/ppwwyyxx/tensorpack/issues/458)

--- a/examples/basics/mnist-tfslim.py
+++ b/examples/basics/mnist-tfslim.py
@@ -26,8 +26,7 @@ class Model(ModelDesc):
        return [tf.placeholder(tf.float32, (None, IMAGE_SIZE, IMAGE_SIZE), 'input'),
                tf.placeholder(tf.int32, (None,), 'label')]
-    def _build_graph(self, inputs):
+    def build_graph(self, image, label):
-        image, label = inputs
        image = tf.expand_dims(image, 3)
        image = image * 2 - 1
@@ -56,9 +55,9 @@ class Model(ModelDesc):
        acc = tf.reduce_mean(acc, name='accuracy')
        summary.add_moving_summary(acc)
-        self.cost = cost
        summary.add_moving_summary(cost)
        summary.add_param_summary(('.*/weights', ['histogram', 'rms']))  # slim uses different variable names
+        return cost + regularize_cost_from_collection()
    def _get_optimizer(self):
        lr = tf.train.exponential_decay(

--- a/examples/keras/mnist-keras.py
+++ b/examples/keras/mnist-keras.py
@@ -44,8 +44,7 @@ class Model(ModelDesc):
        return [tf.placeholder(tf.float32, (None, IMAGE_SIZE, IMAGE_SIZE), 'input'),
                tf.placeholder(tf.int32, (None,), 'label')]
-    def _build_graph(self, inputs):
+    def build_graph(self, image, label):
-        image, label = inputs
        image = tf.expand_dims(image, 3) * 2 - 1
        M = get_keras_model()
@@ -60,8 +59,9 @@ class Model(ModelDesc):
        summary.add_moving_summary(acc)
        wd_cost = tf.add_n(M.losses, name='regularize_loss')    # this is how Keras manage regularizers
-        self.cost = tf.add_n([wd_cost, cost], name='total_cost')
+        cost = tf.add_n([wd_cost, cost], name='total_cost')
-        summary.add_moving_summary(self.cost, wd_cost)
+        summary.add_moving_summary(cost, wd_cost)
+        return cost
    def _get_optimizer(self):
        lr = tf.train.exponential_decay(

--- a/tensorpack/graph_builder/model_desc.py
+++ b/tensorpack/graph_builder/model_desc.py
@@ -118,12 +118,11 @@ class ModelDescBase(object):
    def inputs(self):
        """
        __Create__ and returns a list of placeholders.
-        To be implemented by subclass.
+        A subclass is expected to implement this method.
        The placeholders __have to__ be created inside this method.
        Don't return placeholders created in other methods.
+        Also, you should not call this method by yourself.
-        You should not call this method by yourself.
        Returns:
            a list of `tf.placeholder`, to be converted to :class:`InputDesc`.
@@ -133,9 +132,11 @@ class ModelDescBase(object):
    def build_graph(self, *args):
        """
        Build the whole symbolic graph.
-        This is supposed to be the "tower function" when used with :class:`TowerTrainer`.
+        This is supposed to be part of the "tower function" when used with :class:`TowerTrainer`.
        By default it will call :meth:`_build_graph` with a list of input tensors.
+        A subclass is expected to overwrite this method or the :meth:`_build_graph` method.
        Args:
            args ([tf.Tensor]): tensors that matches the list of inputs defined by ``inputs()``.
@@ -161,7 +162,7 @@ class ModelDescBase(object):
        assert len(inputs) == len(self.get_inputs_desc()), \
            "Number of inputs passed to the graph != number of inputs defined " \
            "in ModelDesc! ({} != {})".format(len(inputs), len(self.get_inputs_desc()))
-        self._build_graph(inputs)
+        return self._build_graph(inputs)
    def _build_graph(self, inputs):
        """
@@ -174,20 +175,28 @@ class ModelDescBase(object):
 class ModelDesc(ModelDescBase):
    """
    A ModelDesc with **single cost** and **single optimizer**.
-    It contains information about InputDesc, how to get cost, and how to get optimizer.
+    It has the following constraints in addition to :class:`ModelDescBase`:
+    1. :meth:`build_graph(...)` method should return a cost.
+      The cost will be the final cost to be optimized by the optimizer.
+      Therefore it should include necessary regularization.
+    2. Subclass is expected to implement :meth:`optimizer()` method.
    """
    def get_cost(self):
        """
-        Return the cost tensor to optimize on.
+        Being deprecated.
+        You're recommended to return a cost tensor in :meth:`build_graph` method directly.
-        This function takes the cost tensor defined by :meth:`build_graph`,
+        This function takes the `self.cost` tensor defined by :meth:`build_graph`,
        and applies the collection
        ``tf.GraphKeys.REGULARIZATION_LOSSES`` to the cost automatically.
        """
        cost = self._get_cost()
        reg_cost = regularize_cost_from_collection()
-        if reg_cost is not None:
+        if reg_cost.op.type != 'Const':
+            logger.warn("Regularization losses found in collection, and a 'cost' tensor was "
+                        "not returned by `build_graph`. Therefore applying regularization automatically!")
            return tf.add(cost, reg_cost, name='cost_with_regularizer')
        else:
            return cost
@@ -215,8 +224,13 @@ class ModelDesc(ModelDescBase):
        """
        Used by trainers to get the final cost for optimization.
        """
-        self.build_graph(*inputs)
+        ret = self.build_graph(*inputs)
-        return self.get_cost()
+        if isinstance(ret, tf.Tensor):  # the preferred way
+            assert ret.shape.ndims == 0, "Cost must be a scalar, but found a tensor of shape {}!".format(ret.shape)
+            _check_unused_regularization()
+            return ret
+        else:   # the old way
+            return self.get_cost()
    # TODO this is deprecated and only used for v1 trainers
    def _build_graph_get_grads(self, *inputs):
@@ -239,3 +253,15 @@ class ModelDesc(ModelDescBase):
            gate_gradients=False, colocate_gradients_with_ops=True)
        grads = FilterNoneGrad().process(grads)
        return grads
+def _check_unused_regularization():
+    coll = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
+    unconsumed_reg = []
+    for c in coll:
+        if len(c.consumers()) == 0:
+            unconsumed_reg.append(c)
+    if unconsumed_reg:
+        logger.warn("The following tensors appear in REGULARIZATION_LOSSES collection but has no "
+                    "consumers! You may have forgotten to add regularization to total cost.")
+        logger.warn("Unconsumed regularization: {}".format(', '.join([x.name for x in unconsumed_reg])))
--- a/tensorpack/models/regularize.py
+++ b/tensorpack/models/regularize.py
@@ -10,7 +10,8 @@ from ..utils.argtools import graph_memoized
 from ..tfutils.tower import get_current_tower_context
 from .common import layer_register
-__all__ = ['regularize_cost', 'l2_regularizer', 'l1_regularizer', 'Dropout']
+__all__ = ['regularize_cost', 'regularize_cost_from_collection',
+           'l2_regularizer', 'l1_regularizer', 'Dropout']
 @graph_memoized
@@ -34,7 +35,7 @@ def regularize_cost(regex, func, name='regularize_cost'):
            E.g., ``tf.contrib.layers.l2_regularizer``.
    Returns:
-        tf.Tensor: the total regularization cost.
+        tf.Tensor: a scalar, the total regularization cost.
    Example:
        .. code-block:: python
@@ -78,7 +79,7 @@ def regularize_cost(regex, func, name='regularize_cost'):
                return name[prefixlen:]
            return name
        names = list(map(f, names))
-    logger.info("regularize_cost() applying regularizers on {} tensors.".format(len(names)))
+    logger.info("regularize_cost() found {} variables to regularize.".format(len(names)))
    _log_once("The following tensors will be regularized: {}".format(', '.join(names)))
    return tf.add_n(costs, name=name)
@@ -87,31 +88,34 @@ def regularize_cost(regex, func, name='regularize_cost'):
 def regularize_cost_from_collection(name='regularize_cost'):
    """
    Get the cost from the regularizers in ``tf.GraphKeys.REGULARIZATION_LOSSES``.
-    In replicated mode, will only regularize variables within the current tower.
+    If in replicated mode, will only regularize variables created within the current tower.
+    Args:
+        name (str): the name of the returned tensor
    Returns:
-        a scalar tensor, the regularization loss, or None
+        tf.Tensor: a scalar, the total regularization cost.
    """
    ctx = get_current_tower_context()
    if not ctx.is_training:
        # TODO Currently cannot build the wd_cost correctly at inference,
        # because ths vs_name used in inference can be '', therefore the
        # variable filter will fail
-        return None
+        return tf.constant(0, dtype=tf.float32, name='empty_' + name)
    # NOTE: this collection doesn't always grow with towers.
-    # It is only added with variables that are newly created.
+    # It only grows with actual variable creation, but not get_variable call.
    if ctx.has_own_variables:   # be careful of the first tower (name='')
        losses = ctx.get_collection_in_tower(tf.GraphKeys.REGULARIZATION_LOSSES)
    else:
        losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
    if len(losses) > 0:
-        logger.info("regularize_cost_from_collection() applying regularizers on "
+        logger.info("regularize_cost_from_collection() found {} regularizers "
-                    "{} tensors in REGULARIZATION_LOSSES.".format(len(losses)))
+                    "in REGULARIZATION_LOSSES collection.".format(len(losses)))
        reg_loss = tf.add_n(losses, name=name)
        return reg_loss
    else:
-        return None
+        return tf.constant(0, dtype=tf.float32, name='empty_' + name)
 @layer_register(use_scope=None)