Commit 461d20cd authored by Yuxin Wu's avatar Yuxin Wu

average gradient in replicated trainer

parent 890df78f
...@@ -235,6 +235,7 @@ class SyncMultiGPUTrainerReplicated(MultiGPUTrainerBase, SingleCostFeedfreeTrain ...@@ -235,6 +235,7 @@ class SyncMultiGPUTrainerReplicated(MultiGPUTrainerBase, SingleCostFeedfreeTrain
grads_for_a_var = [] grads_for_a_var = []
for (_, v), g in zip(grad_and_vars, summed): for (_, v), g in zip(grad_and_vars, summed):
g = tf.multiply(g, 1.0 / nr_tower)
grads_for_a_var.append((g, v)) grads_for_a_var.append((g, v))
new_tower_grads.append(grads_for_a_var) new_tower_grads.append(grads_for_a_var)
# NVar * NGPU * 2 # NVar * NGPU * 2
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment