Commit e6edb710 authored by Yuxin Wu's avatar Yuxin Wu

Lazy import and test for horovod-pyarrow conflicts (#936)

parent e7f3a882
......@@ -18,12 +18,11 @@ Some typical questions that we DO NOT answer:
+ "Could you improve/implement an example/paper ?" --
We have no plans to do so. We don't consider feature
requests for examples or implement a paper for you, unless it demonstrates
some Tensorpack features not yet demonstrated in the existing examples.
requests for examples or implement a paper for you.
If you don't know how to do something yourself, you may ask a usage question.
+ "The examples do not perform well after I change the models/dataset/parameters/etc."
Tensorpack maintainers make sure the examples perform well without modification.
But it's your job to make sure the model and parameters is suitable in your own situation.
But it's your job to pick the model and parameters that are suitable for your own situation.
We do not help with such questions unless they appear to be a bug in tensorpack.
+ "Why my model doesn't work?", "I don't understand this paper you implement."
We do not answer machine learning questions.
......
# -*- coding: utf-8 -*-
# File: trainers.py
import sys
import os
import tensorflow as tf
import multiprocessing as mp
......@@ -364,6 +365,14 @@ class HorovodTrainer(SingleCostTrainer):
Args:
average (bool): whether to average or sum the gradients across processes.
"""
if 'pyarrow' in sys.modules:
logger.warn("Horovod and pyarrow may conflict due to pyarrow bugs. "
"Uninstall pyarrow and use msgpack instead.")
# lazy import
import horovod.tensorflow as _hvd
global hvd
hvd = _hvd
hvd.init()
self.is_chief = hvd.rank() == 0
self._local_rank = hvd.local_rank()
......@@ -431,11 +440,5 @@ class HorovodTrainer(SingleCostTrainer):
self.sess.run(self._broadcast_op)
from ..utils.develop import create_dummy_class # noqa
try:
import horovod.tensorflow as hvd
except ImportError:
HorovodTrainer = create_dummy_class('HovorodTrainer', 'horovod') # noqa
except Exception: # could be other than ImportError, e.g. NCCL not found
print("Horovod is installed but cannot be imported. Check `python -c 'import horovod.tensorflow'`.")
HorovodTrainer = create_dummy_class('HovorodTrainer', 'horovod') # noqa
# for lazy import
hvd = None
# -*- coding: utf-8 -*-
# File: serialize.py
import sys
import os
from .develop import create_dummy_func
from . import logger
__all__ = ['loads', 'dumps']
......@@ -48,6 +50,9 @@ try:
# import pyarrow has a lot of side effect: https://github.com/apache/arrow/pull/2329
# So we need an option to disable it.
if os.environ.get('TENSORPACK_SERIALIZE', 'pyarrow') == 'pyarrow':
if 'horovod' in sys.modules:
logger.warn("Horovod and pyarrow may conflict due to pyarrow bugs. "
"Uninstall pyarrow and use msgpack instead.")
import pyarrow as pa
else:
pa = None
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment