Commit e6edb710 authored by Yuxin Wu's avatar Yuxin Wu

Lazy import and test for horovod-pyarrow conflicts (#936)

parent e7f3a882
...@@ -18,12 +18,11 @@ Some typical questions that we DO NOT answer: ...@@ -18,12 +18,11 @@ Some typical questions that we DO NOT answer:
+ "Could you improve/implement an example/paper ?" -- + "Could you improve/implement an example/paper ?" --
We have no plans to do so. We don't consider feature We have no plans to do so. We don't consider feature
requests for examples or implement a paper for you, unless it demonstrates requests for examples or implement a paper for you.
some Tensorpack features not yet demonstrated in the existing examples.
If you don't know how to do something yourself, you may ask a usage question. If you don't know how to do something yourself, you may ask a usage question.
+ "The examples do not perform well after I change the models/dataset/parameters/etc." + "The examples do not perform well after I change the models/dataset/parameters/etc."
Tensorpack maintainers make sure the examples perform well without modification. Tensorpack maintainers make sure the examples perform well without modification.
But it's your job to make sure the model and parameters is suitable in your own situation. But it's your job to pick the model and parameters that are suitable for your own situation.
We do not help with such questions unless they appear to be a bug in tensorpack. We do not help with such questions unless they appear to be a bug in tensorpack.
+ "Why my model doesn't work?", "I don't understand this paper you implement." + "Why my model doesn't work?", "I don't understand this paper you implement."
We do not answer machine learning questions. We do not answer machine learning questions.
......
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# File: trainers.py # File: trainers.py
import sys
import os import os
import tensorflow as tf import tensorflow as tf
import multiprocessing as mp import multiprocessing as mp
...@@ -364,6 +365,14 @@ class HorovodTrainer(SingleCostTrainer): ...@@ -364,6 +365,14 @@ class HorovodTrainer(SingleCostTrainer):
Args: Args:
average (bool): whether to average or sum the gradients across processes. average (bool): whether to average or sum the gradients across processes.
""" """
if 'pyarrow' in sys.modules:
logger.warn("Horovod and pyarrow may conflict due to pyarrow bugs. "
"Uninstall pyarrow and use msgpack instead.")
# lazy import
import horovod.tensorflow as _hvd
global hvd
hvd = _hvd
hvd.init() hvd.init()
self.is_chief = hvd.rank() == 0 self.is_chief = hvd.rank() == 0
self._local_rank = hvd.local_rank() self._local_rank = hvd.local_rank()
...@@ -431,11 +440,5 @@ class HorovodTrainer(SingleCostTrainer): ...@@ -431,11 +440,5 @@ class HorovodTrainer(SingleCostTrainer):
self.sess.run(self._broadcast_op) self.sess.run(self._broadcast_op)
from ..utils.develop import create_dummy_class # noqa # for lazy import
try: hvd = None
import horovod.tensorflow as hvd
except ImportError:
HorovodTrainer = create_dummy_class('HovorodTrainer', 'horovod') # noqa
except Exception: # could be other than ImportError, e.g. NCCL not found
print("Horovod is installed but cannot be imported. Check `python -c 'import horovod.tensorflow'`.")
HorovodTrainer = create_dummy_class('HovorodTrainer', 'horovod') # noqa
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# File: serialize.py # File: serialize.py
import sys
import os import os
from .develop import create_dummy_func from .develop import create_dummy_func
from . import logger
__all__ = ['loads', 'dumps'] __all__ = ['loads', 'dumps']
...@@ -48,6 +50,9 @@ try: ...@@ -48,6 +50,9 @@ try:
# import pyarrow has a lot of side effect: https://github.com/apache/arrow/pull/2329 # import pyarrow has a lot of side effect: https://github.com/apache/arrow/pull/2329
# So we need an option to disable it. # So we need an option to disable it.
if os.environ.get('TENSORPACK_SERIALIZE', 'pyarrow') == 'pyarrow': if os.environ.get('TENSORPACK_SERIALIZE', 'pyarrow') == 'pyarrow':
if 'horovod' in sys.modules:
logger.warn("Horovod and pyarrow may conflict due to pyarrow bugs. "
"Uninstall pyarrow and use msgpack instead.")
import pyarrow as pa import pyarrow as pa
else: else:
pa = None pa = None
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment