Commit 9c2e2226 authored by Yuxin Wu's avatar Yuxin Wu

fix horovod trainer

parent 69e17d85
...@@ -229,9 +229,10 @@ class HorovodTrainer(SingleCostTrainer): ...@@ -229,9 +229,10 @@ class HorovodTrainer(SingleCostTrainer):
opt = get_opt_fn() opt = get_opt_fn()
opt = hvd.DistributedOptimizer(opt) opt = hvd.DistributedOptimizer(opt)
self.train_op = opt.apply_gradients(grads, name='min_op') self.train_op = opt.apply_gradients(grads, name='min_op')
with tf.name_scope('horovod_broadcast'):
op = hvd.broadcast_global_variables(0)
cb = RunOp( cb = RunOp(
tf.identity(hvd.broadcast_global_variables(0), name='horovod_broadcast_global_variables'), op, run_before=True,
run_before=True,
run_as_trigger=False, verbose=True) run_as_trigger=False, verbose=True)
cb.chief_only = False cb.chief_only = False
return [cb] return [cb]
......
...@@ -114,6 +114,7 @@ def get_tqdm_kwargs(**kwargs): ...@@ -114,6 +114,7 @@ def get_tqdm_kwargs(**kwargs):
f = kwargs.get('file', sys.stderr) f = kwargs.get('file', sys.stderr)
isatty = f.isatty() isatty = f.isatty()
# TODO when run under mpirun, isatty is always False
# Jupyter notebook should be recognized as tty. # Jupyter notebook should be recognized as tty.
# Wait for https://github.com/ipython/ipykernel/issues/268 # Wait for https://github.com/ipython/ipykernel/issues/268
try: try:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment