update docs

f0e049a5 · Yuxin Wu · 192de99a · f0e049a5 · f0e049a5 · f0e049a5
Commit f0e049a5 authored Nov 28, 2018 by Yuxin Wu
4 changed files
--- a/examples/FasterRCNN/README.md
+++ b/examples/FasterRCNN/README.md
@@ -48,15 +48,13 @@ On a single machine:
 ./train.py --config \
    MODE_MASK=True MODE_FPN=True \
    DATA.BASEDIR=/path/to/COCO/DIR \
-    BACKBONE.WEIGHTS=/path/to/ImageNet-R50-AlignPadding.npz \
+    BACKBONE.WEIGHTS=/path/to/ImageNet-R50-AlignPadding.npz
 ```

 To run distributed training, set `TRAINER=horovod` and refer to [HorovodTrainer docs](http://tensorpack.readthedocs.io/modules/train.html#tensorpack.train.HorovodTrainer).

-Options can be changed by either the command line or the `config.py` file.
-Recommended configurations are listed in the table below.
-
-The code is only valid for training with 1, 2, 4 or >=8 GPUs.
+Options can be changed by either the command line or the `config.py` file (recommended).
+Some reasonable configurations are listed in the table below.

 ### Inference:

@@ -72,7 +70,7 @@ To evaluate the performance of a model on COCO:
 ```

 Several trained models can be downloaded in the table below. Evaluation and
-prediction will need to be run with the corresponding training configs.
+prediction will need to be run with the corresponding configs used in training.

 ## Results

@@ -81,7 +79,6 @@ All models are fine-tuned from ImageNet pre-trained R50/R101 models in
 [tensorpack model zoo](http://models.tensorpack.com/FasterRCNN/), unless otherwise noted.
 All models are trained with 8 NVIDIA V100s, unless otherwise noted.
 Performance in [Detectron](https://github.com/facebookresearch/Detectron/) can be roughly reproduced.
-Mask R-CNN results contain both box and mask mAP.

 | Backbone                    | mAP<br/>(box;mask)                                                                                                            | Detectron mAP <sup>[1](#ft1)</sup><br/> (box;mask) | Time (on 8 V100s) | Configurations <br/> (click to expand)                                                                                                                                                                                                                                                                                                                     |
 | -                           | -                                                                                                                             | -                                                  | -                 | -                                                                                                                                                                                                                                                                                                                                                          |

--- a/examples/FasterRCNN/config.py
+++ b/examples/FasterRCNN/config.py
@@ -246,7 +246,7 @@ def finalize_configs(is_training):
            assert 'OMPI_COMM_WORLD_SIZE' not in os.environ
            ngpu = get_num_gpu()
        assert ngpu > 0, "Has to run with GPU!"
-        assert ngpu % 8 == 0 or 8 % ngpu == 0, ngpu
+        assert ngpu % 8 == 0 or 8 % ngpu == 0, "Can only run with 1,2,4 or >=8 GPUs, but found {} GPUs".format(ngpu)
        if _C.TRAIN.NUM_GPUS is None:
            _C.TRAIN.NUM_GPUS = ngpu
        else:

--- a/tensorpack/train/base.py
+++ b/tensorpack/train/base.py
@@ -213,8 +213,11 @@ class Trainer(object):
    @call_only_once
    def initialize(self, session_creator, session_init):
        """
-        Initialize self.sess and self.hooked_sess.
-        Must be called after callbacks are setup.
+        Create the session and set `self.sess`.
+        Call `self.initiailize_hooks()`
+        Finalize the graph.
+
+        It must be called after callbacks are setup.

        Args:
            session_creator (tf.train.SessionCreator):
@@ -242,7 +245,7 @@ class Trainer(object):
    @call_only_once
    def initialize_hooks(self):
        """
-        Create SessionRunHooks for all callbacks, and hook it onto self.sess.
+        Create SessionRunHooks for all callbacks, and hook it onto `self.sess` to create `self.hooked_sess`.

        A new trainer may override this method to create multiple groups of hooks,
        which can be useful when the training is not done by a single `train_op`.

--- a/tensorpack/train/config.py
+++ b/tensorpack/train/config.py
@@ -52,6 +52,9 @@ def DEFAULT_MONITORS():
 class TrainConfig(object):
    """
    A collection of options to be used for single-cost trainers.
+
+    Note that you do not have to use :class:`TrainConfig`.
+    You can use the API of :class:`Trainer` directly, to have more fine-grained control of the training.
    """

    def __init__(self,
@@ -67,16 +70,23 @@ class TrainConfig(object):
            data (InputSource):
            model (ModelDesc):

-            callbacks (list): a list of :class:`Callback` to perform during training.
-            extra_callbacks (list): the same as ``callbacks``. This argument
+            callbacks (list[Callback]): a list of :class:`Callback` to use during training.
+            extra_callbacks (list[Callback]): This argument
                is only used to provide the defaults in addition to ``callbacks``.
-                The list of callbacks that will be used in the end is ``callbacks + extra_callbacks``.
+                The list of callbacks that will be used in the end is simply ``callbacks + extra_callbacks``.

-                It is usually left as None and the default value for this
-                option will be the return value of :meth:`train.DEFAULT_CALLBACKS()`.
+                It is usually left as None, and the default value for this argument is :func:`DEFAULT_CALLBACKS()`.
                You can override it when you don't like any of the default callbacks.
-            monitors (list): a list of :class:`TrainingMonitor`.
-                Defaults to the return value of :meth:`train.DEFAULT_MONITORS()`.
+                For example, if you'd like to let the progress bar print tensors, you can use
+
+                .. code-block:: none
+
+                    extra_callbacks=[ProgressBar(names=['name']),
+                                     MovingAverageSummary(),
+                                     MergeAllSummaries(),
+                                     RunUpdateOps()]
+
+            monitors (list[TrainingMonitor]): Defaults to :func:`DEFAULT_MONITORS()`.

            session_creator (tf.train.SessionCreator): Defaults to :class:`sesscreate.NewSessionCreator()`
                with the config returned by :func:`tfutils.get_default_sess_config()`.