Commit 3fcd3b57 authored by Yuxin Wu's avatar Yuxin Wu

fix bug in temperature & async

parent adb684c0
...@@ -15,7 +15,7 @@ def SoftMax(x, use_temperature=False, temperature_init=1.0): ...@@ -15,7 +15,7 @@ def SoftMax(x, use_temperature=False, temperature_init=1.0):
:param x: a 2D tensor :param x: a 2D tensor
""" """
if use_temperature: if use_temperature:
t = tf.get_variable('temp', [1], t = tf.get_variable('invtemp', [],
initializer=tf.constant_initializer(1.0 / float(temperature_init))) initializer=tf.constant_initializer(1.0 / float(temperature_init)))
x = x * t x = x * t
return tf.nn.softmax(x, name='output') return tf.nn.softmax(x, name='output')
...@@ -92,7 +92,8 @@ class AsyncMultiGPUTrainer(MultiGPUTrainer): ...@@ -92,7 +92,8 @@ class AsyncMultiGPUTrainer(MultiGPUTrainer):
# sync have consistent effective learning rate # sync have consistent effective learning rate
def scale(grads): def scale(grads):
with tf.name_scope('async_scale_grad'): with tf.name_scope('async_scale_grad'):
return [(grad / self.config.nr_tower, var) for grad, var in grads] return [(grad / self.config.nr_tower if grad is not None else None, var)
for grad, var in grads]
grad_list = map(scale, grad_list) grad_list = map(scale, grad_list)
grad_list = [self.process_grads(g) for g in grad_list] grad_list = [self.process_grads(g) for g in grad_list]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment