update docs

23239bd7 · Yuxin Wu · d8e2929b · 23239bd7 · 23239bd7
Commit 23239bd7 authored Jul 03, 2019 by Yuxin Wu
Show whitespace changes
Inline Side-by-side

Showing with 14 additions and 3 deletions

tensorpack/train/trainers.py tensorpack/train/trainers.py +13 -2

tensorpack/utils/logger.py tensorpack/utils/logger.py +1 -1

No files found.
--- a/tensorpack/train/trainers.py
+++ b/tensorpack/train/trainers.py
@@ -462,9 +462,19 @@ class HorovodTrainer(SingleCostTrainer):
 class BytePSTrainer(HorovodTrainer):
    """
    BytePS trainer. Supports both multi-GPU and distributed training.
+    It achieves better scalability than horovod in distributed training, if the model is communication
+    intensive and you have properly set up the machines following its
+    `best practices <https://github.com/bytedance/byteps/blob/master/docs/best-practice.md>`_
+    which requires a few extra bandwidth servers than horovod.
-    To use it, switch the trainer, and fefer to BytePS documentation on how to
+    To use it, switch the trainer, and refer to BytePS documentation on how to
    launch server/scheduler/workers.
+    Attributes:
+        hvd (module): the byteps module that contains horovod-compatible APIs
+            like `rank(),size()`.
+            This attribute exists so that downstream code that uses these APIs
+            does not need to worry about which library is being used under the hood.
    """
    def __init__(self, average=True):
        """
@@ -474,7 +484,8 @@ class BytePSTrainer(HorovodTrainer):
        import byteps.tensorflow as bps
        self.hvd = bps  # BytePS has the same interface as Horovod
        self.hvd.allreduce = bps.push_pull  # https://github.com/bytedance/byteps/issues/8
-        # TODO bootstrap env vars
+        assert os.environ.get("DMLC_ROLE", None) == "worker"
+        assert "DMLC_WORKER_ID" in os.environ and "DMLC_NUM_WORKER" in os.environ
        bps.init()
        self.is_chief = bps.rank() == 0

--- a/tensorpack/utils/logger.py
+++ b/tensorpack/utils/logger.py
@@ -56,7 +56,7 @@ def _getlogger():
 _logger = _getlogger()
-_LOGGING_METHOD = ['info', 'warning', 'error', 'critical', 'exception', 'debug', 'setLevel']
+_LOGGING_METHOD = ['info', 'warning', 'error', 'critical', 'exception', 'debug', 'setLevel', 'addFilter']
 # export logger functions
 for func in _LOGGING_METHOD:
    locals()[func] = getattr(_logger, func)