Do not set tensor by `self.xxx=` in models. (#318)

81f4b575 · Yuxin Wu · 9a156417 · 81f4b575 · 81f4b575 · 81f4b575
Commit 81f4b575 authored Aug 23, 2017 by Yuxin Wu
4 changed files
--- a/examples/A3C-Gym/train-atari.py
+++ b/examples/A3C-Gym/train-atari.py
@@ -102,27 +102,26 @@ class Model(ModelDesc):
    def _build_graph(self, inputs):
        state, action, futurereward, action_prob = inputs
-        logits, self.value = self._get_NN_prediction(state)
+        logits, value = self._get_NN_prediction(state)
-        self.value = tf.squeeze(self.value, [1], name='pred_value')  # (B,)
+        value = tf.squeeze(value, [1], name='pred_value')  # (B,)
-        self.policy = tf.nn.softmax(logits, name='policy')
+        policy = tf.nn.softmax(logits, name='policy')
        is_training = get_current_tower_context().is_training
        if not is_training:
            return
-        log_probs = tf.log(self.policy + 1e-6)
+        log_probs = tf.log(policy + 1e-6)
        log_pi_a_given_s = tf.reduce_sum(
            log_probs * tf.one_hot(action, NUM_ACTIONS), 1)
-        advantage = tf.subtract(tf.stop_gradient(self.value), futurereward, name='advantage')
+        advantage = tf.subtract(tf.stop_gradient(value), futurereward, name='advantage')
-        pi_a_given_s = tf.reduce_sum(self.policy * tf.one_hot(action, NUM_ACTIONS), 1)  # (B,)
+        pi_a_given_s = tf.reduce_sum(policy * tf.one_hot(action, NUM_ACTIONS), 1)  # (B,)
        importance = tf.stop_gradient(tf.clip_by_value(pi_a_given_s / (action_prob + 1e-8), 0, 10))
        policy_loss = tf.reduce_sum(log_pi_a_given_s * advantage * importance, name='policy_loss')
-        xentropy_loss = tf.reduce_sum(
+        xentropy_loss = tf.reduce_sum(policy * log_probs, name='xentropy_loss')
-            self.policy * log_probs, name='xentropy_loss')
+        value_loss = tf.nn.l2_loss(value - futurereward, name='value_loss')
-        value_loss = tf.nn.l2_loss(self.value - futurereward, name='value_loss')
-        pred_reward = tf.reduce_mean(self.value, name='predict_reward')
+        pred_reward = tf.reduce_mean(value, name='predict_reward')
        advantage = symbf.rms(advantage, name='rms_advantage')
        entropy_beta = tf.get_variable('entropy_beta', shape=[],
                                       initializer=tf.constant_initializer(0.01), trainable=False)

--- a/examples/Char-RNN/char-rnn.py
+++ b/examples/Char-RNN/char-rnn.py
@@ -81,9 +81,8 @@ class Model(ModelDesc):
                                  initializer=tf.constant_initializer())
            ret = symbolic_functions.shapeless_placeholder(ret, 0, name=n)
            return ret
-        self.initial = initial = \
+        initial = (rnn.LSTMStateTuple(get_v('c0'), get_v('h0')),
-            (rnn.LSTMStateTuple(get_v('c0'), get_v('h0')),
+                   rnn.LSTMStateTuple(get_v('c1'), get_v('h1')))
-             rnn.LSTMStateTuple(get_v('c1'), get_v('h1')))
        embeddingW = tf.get_variable('embedding', [param.vocab_size, param.rnn_size])
        input_feature = tf.nn.embedding_lookup(embeddingW, input)  # B x seqlen x rnnsize
@@ -91,12 +90,12 @@ class Model(ModelDesc):
        input_list = tf.unstack(input_feature, axis=1)  # seqlen x (Bxrnnsize)
        outputs, last_state = rnn.static_rnn(cell, input_list, initial, scope='rnnlm')
-        self.last_state = tf.identity(last_state, 'last_state')
+        last_state = tf.identity(last_state, 'last_state')
        # seqlen x (Bxrnnsize)
        output = tf.reshape(tf.concat(outputs, 1), [-1, param.rnn_size])  # (Bxseqlen) x rnnsize
        logits = FullyConnected('fc', output, param.vocab_size, nl=tf.identity)
-        self.prob = tf.nn.softmax(logits / param.softmax_temprature, name='prob')
+        prob = tf.nn.softmax(logits / param.softmax_temprature, name='prob')
        xent_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=logits, labels=tf.reshape(nextinput, [-1]))

--- a/examples/GAN/GAN.py
+++ b/examples/GAN/GAN.py
@@ -66,6 +66,10 @@ class GANModelDesc(ModelDescBase):
 class GANTrainer(Trainer):
    def __init__(self, config):
+        """
+        GANTrainer expects a ModelDesc in config which sets the following attribute
+        after :meth:`_build_graph`: g_loss, d_loss, g_vars, d_vars.
+        """
        input = QueueInput(config.dataflow)
        model = config.model

--- a/examples/cifar-convnet.py
+++ b/examples/cifar-convnet.py
@@ -15,7 +15,7 @@ from tensorpack.dataflow import dataset
 """
 A small convnet model for Cifar10 or Cifar100 dataset.
-Cifar10:
+Cifar10 trained on 1 GPU:
    91% accuracy after 50k step.
    41 step/s on TitanX