some docs improvement about multi-gpu (fix #1084)

5a868442 · Yuxin Wu · 24c1ec26 · 5a868442 · 5a868442
Commit 5a868442 authored Feb 16, 2019 by Yuxin Wu
Show whitespace changes
Inline Side-by-side

Showing with 5 additions and 11 deletions

docs/tutorial/trainer.md docs/tutorial/trainer.md +4 -2

tensorpack/train/config.py tensorpack/train/config.py +1 -9

No files found.
--- a/docs/tutorial/trainer.md
+++ b/docs/tutorial/trainer.md
@@ -83,7 +83,9 @@ Note some __common problems__ when using these trainers:

 1. In each iteration, instead of taking one input tensor for all GPUs and split,
    all GPUs take tensors from the `InputSource`.
-	So the total batch size across all GPUs would become ``(batch size of InputSource) * #GPU``.
+	So the total batch size across all GPUs is ``(batch size of InputSource) * #GPU``.
+    You may want to change `steps_per_epoch` or learing rate appropriately according
+    to the total batch size.

    ```eval_rst
    .. note:: 
@@ -96,7 +98,7 @@ Note some __common problems__ when using these trainers:
    ```

 2. The tower function (your model code) will get called once on each GPU.
-   You must follow the abovementieond rules of tower function.
+   You must follow the abovementioned rules of tower function.

 ### Distributed Trainers


--- a/tensorpack/train/config.py
+++ b/tensorpack/train/config.py
@@ -93,7 +93,7 @@ class TrainConfig(object):

            starting_epoch (int): The index of the first epoch.
            steps_per_epoch (int): the number of steps (defined by :meth:`Trainer.run_step`) to run in each epoch.
-                Defaults to the input data size.
+                Defaults to the input data size. You may want to divide it by the #GPUs in multi-GPU training.
            max_epoch (int): maximum number of epoch to run training.
        """

@@ -156,14 +156,6 @@ class TrainConfig(object):
        self.starting_epoch = int(starting_epoch)
        self.max_epoch = int(max_epoch)

-        if 'nr_tower' in kwargs:
-            self.nr_tower = kwargs.pop('nr_tower')
-        if 'tower' in kwargs:
-            self.tower = kwargs.pop('tower')
-        else:
-            self.tower = [0]
-        assert len(kwargs) == 0, "Unknown arguments: {}".format(kwargs.keys())
-

 class AutoResumeTrainConfig(TrainConfig):
    """