Commit 81f4b575 authored by Yuxin Wu's avatar Yuxin Wu

Do not set tensor by `self.xxx=` in models. (#318)

parent 9a156417
...@@ -102,27 +102,26 @@ class Model(ModelDesc): ...@@ -102,27 +102,26 @@ class Model(ModelDesc):
def _build_graph(self, inputs): def _build_graph(self, inputs):
state, action, futurereward, action_prob = inputs state, action, futurereward, action_prob = inputs
logits, self.value = self._get_NN_prediction(state) logits, value = self._get_NN_prediction(state)
self.value = tf.squeeze(self.value, [1], name='pred_value') # (B,) value = tf.squeeze(value, [1], name='pred_value') # (B,)
self.policy = tf.nn.softmax(logits, name='policy') policy = tf.nn.softmax(logits, name='policy')
is_training = get_current_tower_context().is_training is_training = get_current_tower_context().is_training
if not is_training: if not is_training:
return return
log_probs = tf.log(self.policy + 1e-6) log_probs = tf.log(policy + 1e-6)
log_pi_a_given_s = tf.reduce_sum( log_pi_a_given_s = tf.reduce_sum(
log_probs * tf.one_hot(action, NUM_ACTIONS), 1) log_probs * tf.one_hot(action, NUM_ACTIONS), 1)
advantage = tf.subtract(tf.stop_gradient(self.value), futurereward, name='advantage') advantage = tf.subtract(tf.stop_gradient(value), futurereward, name='advantage')
pi_a_given_s = tf.reduce_sum(self.policy * tf.one_hot(action, NUM_ACTIONS), 1) # (B,) pi_a_given_s = tf.reduce_sum(policy * tf.one_hot(action, NUM_ACTIONS), 1) # (B,)
importance = tf.stop_gradient(tf.clip_by_value(pi_a_given_s / (action_prob + 1e-8), 0, 10)) importance = tf.stop_gradient(tf.clip_by_value(pi_a_given_s / (action_prob + 1e-8), 0, 10))
policy_loss = tf.reduce_sum(log_pi_a_given_s * advantage * importance, name='policy_loss') policy_loss = tf.reduce_sum(log_pi_a_given_s * advantage * importance, name='policy_loss')
xentropy_loss = tf.reduce_sum( xentropy_loss = tf.reduce_sum(policy * log_probs, name='xentropy_loss')
self.policy * log_probs, name='xentropy_loss') value_loss = tf.nn.l2_loss(value - futurereward, name='value_loss')
value_loss = tf.nn.l2_loss(self.value - futurereward, name='value_loss')
pred_reward = tf.reduce_mean(self.value, name='predict_reward') pred_reward = tf.reduce_mean(value, name='predict_reward')
advantage = symbf.rms(advantage, name='rms_advantage') advantage = symbf.rms(advantage, name='rms_advantage')
entropy_beta = tf.get_variable('entropy_beta', shape=[], entropy_beta = tf.get_variable('entropy_beta', shape=[],
initializer=tf.constant_initializer(0.01), trainable=False) initializer=tf.constant_initializer(0.01), trainable=False)
......
...@@ -81,9 +81,8 @@ class Model(ModelDesc): ...@@ -81,9 +81,8 @@ class Model(ModelDesc):
initializer=tf.constant_initializer()) initializer=tf.constant_initializer())
ret = symbolic_functions.shapeless_placeholder(ret, 0, name=n) ret = symbolic_functions.shapeless_placeholder(ret, 0, name=n)
return ret return ret
self.initial = initial = \ initial = (rnn.LSTMStateTuple(get_v('c0'), get_v('h0')),
(rnn.LSTMStateTuple(get_v('c0'), get_v('h0')), rnn.LSTMStateTuple(get_v('c1'), get_v('h1')))
rnn.LSTMStateTuple(get_v('c1'), get_v('h1')))
embeddingW = tf.get_variable('embedding', [param.vocab_size, param.rnn_size]) embeddingW = tf.get_variable('embedding', [param.vocab_size, param.rnn_size])
input_feature = tf.nn.embedding_lookup(embeddingW, input) # B x seqlen x rnnsize input_feature = tf.nn.embedding_lookup(embeddingW, input) # B x seqlen x rnnsize
...@@ -91,12 +90,12 @@ class Model(ModelDesc): ...@@ -91,12 +90,12 @@ class Model(ModelDesc):
input_list = tf.unstack(input_feature, axis=1) # seqlen x (Bxrnnsize) input_list = tf.unstack(input_feature, axis=1) # seqlen x (Bxrnnsize)
outputs, last_state = rnn.static_rnn(cell, input_list, initial, scope='rnnlm') outputs, last_state = rnn.static_rnn(cell, input_list, initial, scope='rnnlm')
self.last_state = tf.identity(last_state, 'last_state') last_state = tf.identity(last_state, 'last_state')
# seqlen x (Bxrnnsize) # seqlen x (Bxrnnsize)
output = tf.reshape(tf.concat(outputs, 1), [-1, param.rnn_size]) # (Bxseqlen) x rnnsize output = tf.reshape(tf.concat(outputs, 1), [-1, param.rnn_size]) # (Bxseqlen) x rnnsize
logits = FullyConnected('fc', output, param.vocab_size, nl=tf.identity) logits = FullyConnected('fc', output, param.vocab_size, nl=tf.identity)
self.prob = tf.nn.softmax(logits / param.softmax_temprature, name='prob') prob = tf.nn.softmax(logits / param.softmax_temprature, name='prob')
xent_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( xent_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
logits=logits, labels=tf.reshape(nextinput, [-1])) logits=logits, labels=tf.reshape(nextinput, [-1]))
......
...@@ -66,6 +66,10 @@ class GANModelDesc(ModelDescBase): ...@@ -66,6 +66,10 @@ class GANModelDesc(ModelDescBase):
class GANTrainer(Trainer): class GANTrainer(Trainer):
def __init__(self, config): def __init__(self, config):
"""
GANTrainer expects a ModelDesc in config which sets the following attribute
after :meth:`_build_graph`: g_loss, d_loss, g_vars, d_vars.
"""
input = QueueInput(config.dataflow) input = QueueInput(config.dataflow)
model = config.model model = config.model
......
...@@ -15,7 +15,7 @@ from tensorpack.dataflow import dataset ...@@ -15,7 +15,7 @@ from tensorpack.dataflow import dataset
""" """
A small convnet model for Cifar10 or Cifar100 dataset. A small convnet model for Cifar10 or Cifar100 dataset.
Cifar10: Cifar10 trained on 1 GPU:
91% accuracy after 50k step. 91% accuracy after 50k step.
41 step/s on TitanX 41 step/s on TitanX
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment