self.cost -> return cost (#318)

95bd4af5 · Yuxin Wu · a1e107d9 · 95bd4af5 · 95bd4af5 · 95bd4af5
Commit 95bd4af5 authored Mar 20, 2018 by Yuxin Wu
25 changed files
--- a/examples/A3C-Gym/train-atari.py
+++ b/examples/A3C-Gym/train-atari.py
@@ -119,13 +119,12 @@ class Model(ModelDesc):
        advantage = tf.sqrt(tf.reduce_mean(tf.square(advantage)), name='rms_advantage')
        entropy_beta = tf.get_variable('entropy_beta', shape=[],
                                       initializer=tf.constant_initializer(0.01), trainable=False)
-        self.cost = tf.add_n([policy_loss, xentropy_loss * entropy_beta, value_loss])
-        self.cost = tf.truediv(self.cost,
-                               tf.cast(tf.shape(futurereward)[0], tf.float32),
-                               name='cost')
+        cost = tf.add_n([policy_loss, xentropy_loss * entropy_beta, value_loss])
+        cost = tf.truediv(cost, tf.cast(tf.shape(futurereward)[0], tf.float32), name='cost')
        summary.add_moving_summary(policy_loss, xentropy_loss,
                                   value_loss, pred_reward, advantage,
-                                   self.cost, tf.reduce_mean(importance, name='importance'))
+                                   cost, tf.reduce_mean(importance, name='importance'))
+        return cost

    def _get_optimizer(self):
        lr = tf.get_variable('learning_rate', initializer=0.001, trainable=False)

--- a/examples/CTC-TIMIT/train-timit.py
+++ b/examples/CTC-TIMIT/train-timit.py
@@ -53,7 +53,7 @@ class Model(ModelDesc):

        loss = tf.nn.ctc_loss(label, logits, seqlen, time_major=False)

-        self.cost = tf.reduce_mean(loss, name='cost')
+        cost = tf.reduce_mean(loss, name='cost')

        logits = tf.transpose(logits, [1, 0, 2])

@@ -68,7 +68,8 @@ class Model(ModelDesc):
        err = tf.edit_distance(predictions, label, normalize=True)
        err.set_shape([None])
        err = tf.reduce_mean(err, name='error')
-        summary.add_moving_summary(err, self.cost)
+        summary.add_moving_summary(err, cost)
+        return cost

    def _get_optimizer(self):
        lr = tf.get_variable('learning_rate', initializer=5e-3, trainable=False)

--- a/examples/Char-RNN/char-rnn.py
+++ b/examples/Char-RNN/char-rnn.py
@@ -104,9 +104,10 @@ class Model(ModelDesc):

        xent_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=logits, labels=tf.reshape(nextinput, [-1]))
-        self.cost = tf.reduce_mean(xent_loss, name='cost')
+        cost = tf.reduce_mean(xent_loss, name='cost')
        summary.add_param_summary(('.*/W', ['histogram']))   # monitor histogram of all W
-        summary.add_moving_summary(self.cost)
+        summary.add_moving_summary(cost)
+        return cost

    def _get_optimizer(self):
        lr = tf.get_variable('learning_rate', initializer=2e-3, trainable=False)

--- a/examples/DeepQNetwork/DQNModel.py
+++ b/examples/DeepQNetwork/DQNModel.py
@@ -75,11 +75,12 @@ class Model(ModelDesc):

        target = reward + (1.0 - tf.cast(isOver, tf.float32)) * self.gamma * tf.stop_gradient(best_v)

-        self.cost = tf.losses.huber_loss(
+        cost = tf.losses.huber_loss(
            target, pred_action_value, reduction=tf.losses.Reduction.MEAN)
        summary.add_param_summary(('conv.*/W', ['histogram', 'rms']),
                                  ('fc.*/W', ['histogram', 'rms']))   # monitor all W
-        summary.add_moving_summary(self.cost)
+        summary.add_moving_summary(cost)
+        return cost

    def _get_optimizer(self):
        lr = tf.get_variable('learning_rate', initializer=self.learning_rate, trainable=False)

--- a/examples/DisturbLabel/mnist-disturb.py
+++ b/examples/DisturbLabel/mnist-disturb.py
@@ -50,8 +50,7 @@ class Model(mnist_example.Model):
        wd_cost = tf.multiply(1e-5, regularize_cost('fc.*/W', tf.nn.l2_loss),
                              name='regularize_loss')

-        self.cost = tf.add_n([wd_cost, cost], name='cost')
-        add_moving_summary(cost, wd_cost, self.cost)
+        return tf.add_n([wd_cost, cost], name='cost')


 if __name__ == '__main__':

--- a/examples/DoReFa-Net/alexnet-dorefa.py
+++ b/examples/DoReFa-Net/alexnet-dorefa.py
@@ -159,8 +159,9 @@ class Model(ModelDesc):
        wd_cost = regularize_cost('fc.*/W', l2_regularizer(5e-6), name='regularize_cost')

        add_param_summary(('.*/W', ['histogram', 'rms']))
-        self.cost = tf.add_n([cost, wd_cost], name='cost')
-        add_moving_summary(cost, wd_cost, self.cost)
+        total_cost = tf.add_n([cost, wd_cost], name='cost')
+        add_moving_summary(cost, wd_cost, total_cost)
+        return total_cost

    def _get_optimizer(self):
        lr = tf.get_variable('learning_rate', initializer=1e-4, trainable=False)

--- a/examples/DoReFa-Net/svhn-digit-dorefa.py
+++ b/examples/DoReFa-Net/svhn-digit-dorefa.py
@@ -120,8 +120,9 @@ class Model(ModelDesc):
        wd_cost = regularize_cost('fc.*/W', l2_regularizer(1e-7))

        add_param_summary(('.*/W', ['histogram', 'rms']))
-        self.cost = tf.add_n([cost, wd_cost], name='cost')
-        add_moving_summary(cost, wd_cost, self.cost)
+        total_cost = tf.add_n([cost, wd_cost], name='cost')
+        add_moving_summary(cost, wd_cost, total_cost)
+        return total_cost

    def _get_optimizer(self):
        lr = tf.train.exponential_decay(

--- a/examples/DynamicFilterNetwork/steering-filter.py
+++ b/examples/DynamicFilterNetwork/steering-filter.py
@@ -143,8 +143,9 @@ class Model(ModelDesc):
        tf.summary.image('pred_gt_filters', filters, max_outputs=20)
        tf.summary.image('pred_gt_images', images, max_outputs=20)

-        self.cost = tf.reduce_mean(tf.squared_difference(pred_image, gt_image), name="cost")
-        summary.add_moving_summary(self.cost)
+        cost = tf.reduce_mean(tf.squared_difference(pred_image, gt_image), name="cost")
+        summary.add_moving_summary(cost)
+        return cost

    def _get_optimizer(self):
        return tf.train.AdamOptimizer(1e-3)

--- a/examples/FasterRCNN/train.py
+++ b/examples/FasterRCNN/train.py
@@ -180,13 +180,14 @@ class Model(ModelDesc):
                '(?:group1|group2|group3|rpn|fastrcnn|maskrcnn)/.*W',
                l2_regularizer(1e-4), name='wd_cost')

-            self.cost = tf.add_n([
+            total_cost = tf.add_n([
                rpn_label_loss, rpn_box_loss,
                fastrcnn_label_loss, fastrcnn_box_loss,
                mrcnn_loss,
                wd_cost], 'total_cost')

-            add_moving_summary(self.cost, wd_cost)
+            add_moving_summary(total_cost, wd_cost)
+            return total_cost
        else:
            label_probs = tf.nn.softmax(fastrcnn_label_logits, name='fastrcnn_all_probs')  # #proposal x #Class
            anchors = tf.tile(tf.expand_dims(proposal_boxes, 1), [1, config.NUM_CLASS - 1, 1])   # #proposal x #Cat x 4

--- a/examples/HED/hed.py
+++ b/examples/HED/hed.py
@@ -115,8 +115,9 @@ class Model(ModelDesc):
            costs.append(wd_cost)

            add_param_summary(('.*/W', ['histogram']))   # monitor W
-            self.cost = tf.add_n(costs, name='cost')
-            add_moving_summary(costs + [wrong, self.cost])
+            total_cost = tf.add_n(costs, name='cost')
+            add_moving_summary(costs + [wrong, total_cost])
+            return total_cost

    def _get_optimizer(self):
        lr = tf.get_variable('learning_rate', initializer=3e-5, trainable=False)

--- a/examples/ImageNetModels/imagenet_utils.py
+++ b/examples/ImageNetModels/imagenet_utils.py
@@ -165,10 +165,11 @@ class ImageNetModel(ModelDesc):
            wd_loss = regularize_cost('.*/W', tf.contrib.layers.l2_regularizer(self.weight_decay),
                                      name='l2_regularize_loss')
            add_moving_summary(loss, wd_loss)
-            self.cost = tf.add_n([loss, wd_loss], name='cost')
+            total_cost = tf.add_n([loss, wd_loss], name='cost')
        else:
-            self.cost = tf.identity(loss, name='cost')
-            add_moving_summary(self.cost)
+            total_cost = tf.identity(loss, name='cost')
+            add_moving_summary(total_cost)
+        return total_cost

    @abstractmethod
    def get_logits(self, image):

--- a/examples/ImageNetModels/inception-bn.py
+++ b/examples/ImageNetModels/inception-bn.py
@@ -111,8 +111,9 @@ class Model(ModelDesc):
                                          80000, 0.7, True)
        wd_cost = tf.multiply(wd_w, regularize_cost('.*/W', tf.nn.l2_loss), name='l2_regularize_loss')

-        self.cost = tf.add_n([cost, wd_cost], name='cost')
-        add_moving_summary(wd_cost, self.cost)
+        total_cost = tf.add_n([cost, wd_cost], name='cost')
+        add_moving_summary(wd_cost, total_cost)
+        return total_cost

    def _get_optimizer(self):
        lr = tf.get_variable('learning_rate', initializer=0.045, trainable=False)

--- a/examples/PennTreebank/PTB-LSTM.py
+++ b/examples/PennTreebank/PTB-LSTM.py
@@ -96,11 +96,12 @@ class Model(ModelDesc):
            logits=logits, labels=tf.reshape(nextinput, [-1]))

        with tf.control_dependencies(update_state_ops):
-            self.cost = tf.truediv(tf.reduce_sum(xent_loss),
+            cost = tf.truediv(tf.reduce_sum(xent_loss),
                              tf.cast(BATCH, tf.float32), name='cost')  # log-perplexity

-        perpl = tf.exp(self.cost / SEQ_LEN, name='perplexity')
-        summary.add_moving_summary(perpl, self.cost)
+        perpl = tf.exp(cost / SEQ_LEN, name='perplexity')
+        summary.add_moving_summary(perpl, cost)
+        return cost

    def reset_lstm_state(self):
        s = self.state

--- a/examples/ResNet/cifar10-preact18-mixup.py
+++ b/examples/ResNet/cifar10-preact18-mixup.py
@@ -77,7 +77,7 @@ class ResNet_Cifar(ModelDesc):
        # weight decay on all W matrixes. including convolutional layers
        wd_cost = tf.multiply(WEIGHT_DECAY, regularize_cost('.*', tf.nn.l2_loss), name='wd_cost')

-        self.cost = tf.add_n([ce_cost, wd_cost], name='cost')
+        return tf.add_n([ce_cost, wd_cost], name='cost')

    def _get_optimizer(self):
        lr = tf.get_variable('learning_rate', initializer=0.1, trainable=False)

--- a/examples/ResNet/cifar10-resnet.py
+++ b/examples/ResNet/cifar10-resnet.py
@@ -110,7 +110,7 @@ class Model(ModelDesc):
        add_moving_summary(cost, wd_cost)

        add_param_summary(('.*/W', ['histogram']))   # monitor W
-        self.cost = tf.add_n([cost, wd_cost], name='cost')
+        return tf.add_n([cost, wd_cost], name='cost')

    def _get_optimizer(self):
        lr = tf.get_variable('learning_rate', initializer=0.01, trainable=False)

--- a/examples/Saliency/CAM-resnet.py
+++ b/examples/Saliency/CAM-resnet.py
@@ -64,7 +64,7 @@ class Model(ModelDesc):
        loss = compute_loss_and_error(logits, label)
        wd_cost = regularize_cost('.*/W', l2_regularizer(1e-4), name='l2_regularize_loss')
        add_moving_summary(loss, wd_cost)
-        self.cost = tf.add_n([loss, wd_cost], name='cost')
+        return tf.add_n([loss, wd_cost], name='cost')

    def _get_optimizer(self):
        lr = tf.get_variable('learning_rate', initializer=0.1, trainable=False)

--- a/examples/Saliency/saliency-maps.py
+++ b/examples/Saliency/saliency-maps.py
@@ -53,7 +53,7 @@ def saliency_map(output, input, name="saliency_map"):
    return tf.identity(saliency_op, name=name)


-class Model(tp.ModelDesc):
+class Model(tp.ModelDescBase):
    def inputs(self):
        return [tf.placeholder(tf.float32, (IMAGE_SIZE, IMAGE_SIZE, 3), 'image')]


--- a/examples/SimilarityLearning/mnist-embeddings.py
+++ b/examples/SimilarityLearning/mnist-embeddings.py
@@ -253,10 +253,11 @@ class SiameseModel(EmbeddingModel):

        # compute the actual loss
        cost, pos_dist, neg_dist = contrastive_loss(x, y, label, 5., extra=True, scope="loss")
-        self.cost = tf.identity(cost, name="cost")
+        cost = tf.identity(cost, name="cost")

        # track these values during training
-        add_moving_summary(pos_dist, neg_dist, self.cost)
+        add_moving_summary(pos_dist, neg_dist, cost)
+        return cost


 class CosineModel(SiameseModel):
@@ -268,8 +269,9 @@ class CosineModel(SiameseModel):
            tf.identity(self.embed(inputs[0]), name="emb")

        cost = siamese_cosine_loss(x, y, label, scope="loss")
-        self.cost = tf.identity(cost, name="cost")
-        add_moving_summary(self.cost)
+        cost = tf.identity(cost, name="cost")
+        add_moving_summary(cost)
+        return cost


 class TripletModel(EmbeddingModel):
@@ -296,8 +298,9 @@ class TripletModel(EmbeddingModel):

        cost, pos_dist, neg_dist = self.loss(a, p, n)

-        self.cost = tf.identity(cost, name="cost")
-        add_moving_summary(pos_dist, neg_dist, self.cost)
+        cost = tf.identity(cost, name="cost")
+        add_moving_summary(pos_dist, neg_dist, cost)
+        return cost


 class SoftTripletModel(TripletModel):
@@ -333,10 +336,11 @@ class CenterModel(EmbeddingModel):

        cls_cost = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label),
                                  name='classification_costs')
-        self.cost = tf.add(emb_cost, 100 * cls_cost, name="cost")
+        total_cost = tf.add(emb_cost, 100 * cls_cost, name="cost")

        # track these values during training
-        add_moving_summary(self.cost, cls_cost, emb_cost)
+        add_moving_summary(total_cost, cls_cost, emb_cost)
+        return total_cost


 def get_config(model, algorithm_name):

--- a/examples/SpatialTransformer/mnist-addition.py
+++ b/examples/SpatialTransformer/mnist-addition.py
@@ -85,7 +85,7 @@ class Model(ModelDesc):
        wd_cost = tf.multiply(1e-5, regularize_cost('fc.*/W', tf.nn.l2_loss),
                              name='regularize_loss')
        summary.add_moving_summary(cost, wd_cost)
-        self.cost = tf.add_n([wd_cost, cost], name='cost')
+        return tf.add_n([wd_cost, cost], name='cost')

    def _get_optimizer(self):
        lr = tf.get_variable('learning_rate', initializer=5e-4, trainable=False)

--- a/examples/basics/cifar-convnet.py
+++ b/examples/basics/cifar-convnet.py
@@ -72,7 +72,7 @@ class Model(ModelDesc):
        add_moving_summary(cost, wd_cost)

        add_param_summary(('.*/W', ['histogram']))   # monitor W
-        self.cost = tf.add_n([cost, wd_cost], name='cost')
+        return tf.add_n([cost, wd_cost], name='cost')

    def _get_optimizer(self):
        lr = tf.get_variable('learning_rate', initializer=1e-2, trainable=False)

--- a/examples/basics/mnist-convnet.py
+++ b/examples/basics/mnist-convnet.py
@@ -27,12 +27,9 @@ class Model(ModelDesc):
        return [tf.placeholder(tf.float32, (None, IMAGE_SIZE, IMAGE_SIZE), 'input'),
                tf.placeholder(tf.int32, (None,), 'label')]

-    def _build_graph(self, inputs):
+    def build_graph(self, image, label):
        """This function should build the model which takes the input variables
-        and define self.cost at the end"""
-
-        # inputs contains a list of input variables defined above
-        image, label = inputs
+        and return cost at the end"""

        # In tensorflow, inputs to convolution function are assumed to be
        # NHWC. Add a single channel here.
@@ -74,11 +71,12 @@ class Model(ModelDesc):
        wd_cost = tf.multiply(1e-5,
                              regularize_cost('fc.*/W', tf.nn.l2_loss),
                              name='regularize_loss')
-        self.cost = tf.add_n([wd_cost, cost], name='total_cost')
-        summary.add_moving_summary(cost, wd_cost, self.cost)
+        total_cost = tf.add_n([wd_cost, cost], name='total_cost')
+        summary.add_moving_summary(cost, wd_cost, total_cost)

        # monitor histogram of all weight (of conv and fc layers) in tensorboard
        summary.add_param_summary(('.*/W', ['histogram', 'rms']))
+        return total_cost

    def _get_optimizer(self):
        lr = tf.train.exponential_decay(

--- a/examples/basics/mnist-tflayers.py
+++ b/examples/basics/mnist-tflayers.py
@@ -32,9 +32,6 @@ class Model(ModelDesc):
                tf.placeholder(tf.int32, (None,), 'label')]

    def _build_graph(self, inputs):
-        """This function should build the model which takes the input variables
-        and define self.cost at the end"""
-
        # inputs contains a list of input variables defined above
        image, label = inputs

@@ -77,11 +74,12 @@ class Model(ModelDesc):
        wd_cost = tf.multiply(1e-5,
                              regularize_cost('fc.*/kernel', tf.nn.l2_loss),
                              name='regularize_loss')
-        self.cost = tf.add_n([wd_cost, cost], name='total_cost')
-        summary.add_moving_summary(cost, wd_cost, self.cost)
+        total_cost = tf.add_n([wd_cost, cost], name='total_cost')
+        summary.add_moving_summary(cost, wd_cost, total_cost)

        # monitor histogram of all weight (of conv and fc layers) in tensorboard
        summary.add_param_summary(('.*/kernel', ['histogram', 'rms']))
+        return total_cost

    def _get_optimizer(self):
        lr = tf.train.exponential_decay(

--- a/examples/basics/mnist-visualizations.py
+++ b/examples/basics/mnist-visualizations.py
@@ -108,10 +108,7 @@ class Model(ModelDesc):
        wd_cost = tf.multiply(1e-5,
                              regularize_cost('fc.*/W', tf.nn.l2_loss),
                              name='regularize_loss')
-        self.cost = tf.add_n([wd_cost, cost], name='total_cost')
-        summary.add_moving_summary(cost, wd_cost, self.cost, accuracy)
-
-        summary.add_param_summary(('.*/W', ['histogram', 'rms']))
+        return tf.add_n([wd_cost, cost], name='total_cost')

    def _get_optimizer(self):
        lr = tf.train.exponential_decay(

--- a/examples/basics/svhn-digit-convnet.py
+++ b/examples/basics/svhn-digit-convnet.py
@@ -56,7 +56,7 @@ class Model(ModelDesc):
        add_moving_summary(cost, wd_cost)

        add_param_summary(('.*/W', ['histogram', 'rms']))   # monitor W
-        self.cost = tf.add_n([cost, wd_cost], name='cost')
+        return tf.add_n([cost, wd_cost], name='cost')

    def _get_optimizer(self):
        lr = tf.train.exponential_decay(

--- a/tensorpack/graph_builder/model_desc.py
+++ b/tensorpack/graph_builder/model_desc.py
@@ -177,7 +177,7 @@ class ModelDesc(ModelDescBase):
    A ModelDesc with **single cost** and **single optimizer**.
    It has the following constraints in addition to :class:`ModelDescBase`:

-    1. :meth:`build_graph(...)` method should return a cost.
+    1. :meth:`build_graph(...)` method should return a cost when called under a training context.
      The cost will be the final cost to be optimized by the optimizer.
      Therefore it should include necessary regularization.
    2. Subclass is expected to implement :meth:`optimizer()` method.