Commit 9c2e2226 authored by Yuxin Wu's avatar Yuxin Wu

fix horovod trainer

parent 69e17d85
......@@ -229,9 +229,10 @@ class HorovodTrainer(SingleCostTrainer):
opt = get_opt_fn()
opt = hvd.DistributedOptimizer(opt)
self.train_op = opt.apply_gradients(grads, name='min_op')
with tf.name_scope('horovod_broadcast'):
op = hvd.broadcast_global_variables(0)
cb = RunOp(
tf.identity(hvd.broadcast_global_variables(0), name='horovod_broadcast_global_variables'),
run_before=True,
op, run_before=True,
run_as_trigger=False, verbose=True)
cb.chief_only = False
return [cb]
......
......@@ -114,6 +114,7 @@ def get_tqdm_kwargs(**kwargs):
f = kwargs.get('file', sys.stderr)
isatty = f.isatty()
# TODO when run under mpirun, isatty is always False
# Jupyter notebook should be recognized as tty.
# Wait for https://github.com/ipython/ipykernel/issues/268
try:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment