Commit 81f4b575 authored by Yuxin Wu's avatar Yuxin Wu

Do not set tensor by `self.xxx=` in models. (#318)

parent 9a156417
......@@ -102,27 +102,26 @@ class Model(ModelDesc):
def _build_graph(self, inputs):
state, action, futurereward, action_prob = inputs
logits, self.value = self._get_NN_prediction(state)
self.value = tf.squeeze(self.value, [1], name='pred_value') # (B,)
self.policy = tf.nn.softmax(logits, name='policy')
logits, value = self._get_NN_prediction(state)
value = tf.squeeze(value, [1], name='pred_value') # (B,)
policy = tf.nn.softmax(logits, name='policy')
is_training = get_current_tower_context().is_training
if not is_training:
return
log_probs = tf.log(self.policy + 1e-6)
log_probs = tf.log(policy + 1e-6)
log_pi_a_given_s = tf.reduce_sum(
log_probs * tf.one_hot(action, NUM_ACTIONS), 1)
advantage = tf.subtract(tf.stop_gradient(self.value), futurereward, name='advantage')
advantage = tf.subtract(tf.stop_gradient(value), futurereward, name='advantage')
pi_a_given_s = tf.reduce_sum(self.policy * tf.one_hot(action, NUM_ACTIONS), 1) # (B,)
pi_a_given_s = tf.reduce_sum(policy * tf.one_hot(action, NUM_ACTIONS), 1) # (B,)
importance = tf.stop_gradient(tf.clip_by_value(pi_a_given_s / (action_prob + 1e-8), 0, 10))
policy_loss = tf.reduce_sum(log_pi_a_given_s * advantage * importance, name='policy_loss')
xentropy_loss = tf.reduce_sum(
self.policy * log_probs, name='xentropy_loss')
value_loss = tf.nn.l2_loss(self.value - futurereward, name='value_loss')
xentropy_loss = tf.reduce_sum(policy * log_probs, name='xentropy_loss')
value_loss = tf.nn.l2_loss(value - futurereward, name='value_loss')
pred_reward = tf.reduce_mean(self.value, name='predict_reward')
pred_reward = tf.reduce_mean(value, name='predict_reward')
advantage = symbf.rms(advantage, name='rms_advantage')
entropy_beta = tf.get_variable('entropy_beta', shape=[],
initializer=tf.constant_initializer(0.01), trainable=False)
......
......@@ -81,9 +81,8 @@ class Model(ModelDesc):
initializer=tf.constant_initializer())
ret = symbolic_functions.shapeless_placeholder(ret, 0, name=n)
return ret
self.initial = initial = \
(rnn.LSTMStateTuple(get_v('c0'), get_v('h0')),
rnn.LSTMStateTuple(get_v('c1'), get_v('h1')))
initial = (rnn.LSTMStateTuple(get_v('c0'), get_v('h0')),
rnn.LSTMStateTuple(get_v('c1'), get_v('h1')))
embeddingW = tf.get_variable('embedding', [param.vocab_size, param.rnn_size])
input_feature = tf.nn.embedding_lookup(embeddingW, input) # B x seqlen x rnnsize
......@@ -91,12 +90,12 @@ class Model(ModelDesc):
input_list = tf.unstack(input_feature, axis=1) # seqlen x (Bxrnnsize)
outputs, last_state = rnn.static_rnn(cell, input_list, initial, scope='rnnlm')
self.last_state = tf.identity(last_state, 'last_state')
last_state = tf.identity(last_state, 'last_state')
# seqlen x (Bxrnnsize)
output = tf.reshape(tf.concat(outputs, 1), [-1, param.rnn_size]) # (Bxseqlen) x rnnsize
logits = FullyConnected('fc', output, param.vocab_size, nl=tf.identity)
self.prob = tf.nn.softmax(logits / param.softmax_temprature, name='prob')
prob = tf.nn.softmax(logits / param.softmax_temprature, name='prob')
xent_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
logits=logits, labels=tf.reshape(nextinput, [-1]))
......
......@@ -66,6 +66,10 @@ class GANModelDesc(ModelDescBase):
class GANTrainer(Trainer):
def __init__(self, config):
"""
GANTrainer expects a ModelDesc in config which sets the following attribute
after :meth:`_build_graph`: g_loss, d_loss, g_vars, d_vars.
"""
input = QueueInput(config.dataflow)
model = config.model
......
......@@ -15,7 +15,7 @@ from tensorpack.dataflow import dataset
"""
A small convnet model for Cifar10 or Cifar100 dataset.
Cifar10:
Cifar10 trained on 1 GPU:
91% accuracy after 50k step.
41 step/s on TitanX
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment