Commit 99565998 authored by Yen-Chen Liu's avatar Yen-Chen Liu Committed by Yuxin Wu

Add the compression parameter to Horovod trainer (#1050)

* Add the compression parameter to Horovod trainer

* fixed typo

* Change compression default value of HorovodTrainer to None

* Check horovod version

* change import

* fix version check

* fix code for PEP8 requirements

* Update trainers.py
parent a9dce5b2
......@@ -370,24 +370,29 @@ class HorovodTrainer(SingleCostTrainer):
for a full example which has handled these common issues.
This example can train ImageNet in roughly an hour following the paper's setup.
"""
def __init__(self, average=True):
def __init__(self, average=True, compression=None):
"""
Args:
average (bool): whether to average or sum the gradients across processes.
compression: `hvd.Compression.fp16` or `hvd.Compression.none`
"""
if 'pyarrow' in sys.modules:
logger.warn("Horovod and pyarrow may conflict due to pyarrow bugs. "
"Uninstall pyarrow and use msgpack instead.")
# lazy import
import horovod.tensorflow as _hvd
import horovod
global hvd
hvd = _hvd
hvd_version = tuple(map(int, horovod.__version__.split('.')))
hvd.init()
self.is_chief = hvd.rank() == 0
self._local_rank = hvd.local_rank()
self._rank = hvd.rank()
self._average = average
self._compression = compression
self._has_compression = hvd_version >= (0, 15, 0)
logger.info("[HorovodTrainer] local rank={}".format(self._local_rank))
super(HorovodTrainer, self).__init__()
......@@ -399,6 +404,9 @@ class HorovodTrainer(SingleCostTrainer):
with tf.name_scope("HVDAllReduce"):
for grad, var in grads:
if grad is not None:
if self._compression is not None and self._has_compression:
avg_grad = hvd.allreduce(grad, average=self._average, compression=self._compression)
else:
avg_grad = hvd.allreduce(grad, average=self._average)
averaged_gradients.append((avg_grad, var))
else:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment