update docs

631f011f · Yuxin Wu · 1567c2dc · 631f011f · 631f011f · 631f011f
Commit 631f011f authored Sep 16, 2018 by Yuxin Wu
10 changed files
--- a/docs/tutorial/extend/callback.md
+++ b/docs/tutorial/extend/callback.md
@@ -4,6 +4,7 @@
 __Everything__ other than the training iterations happen in the callbacks.
 Most of the fancy things you want to do will probably end up here.

+Callbacks are called during training.
 The time where each callback method gets called is demonstrated in this snippet.
 ```python
 def train(self):
@@ -28,7 +29,7 @@ Note that at each place, each callback will be called in the order they are give
 ### Explain the Callback Methods

 To write a callback, subclass `Callback` and implement the corresponding underscore-prefixed methods.
-You can overwrite any of the following methods to define a new callback:
+You can overwrite any of the following methods in the new callback:

 * `_setup_graph(self)`

@@ -61,7 +62,7 @@ You can overwrite any of the following methods to define a new callback:
 * `_before_epoch(self)`, `_after_epoch(self)`

  `_trigger_epoch` should be enough for most cases, as can be seen from the scheduling snippet above.
-  Use these two methods __only__ when you really need something to happen __immediately__ before/after an epoch.
+  These two methods should be used __only__ when you really need something to happen __immediately__ before/after an epoch.
 	And when you do need to use them, make sure they are very very fast to avoid affecting other callbacks which use them.

 * `_before_run(self, ctx)`, `_after_run(self, ctx, values)`
@@ -79,8 +80,9 @@ You can overwrite any of the following methods to define a new callback:
  ```

  The training loops would become `sess.run([training_op, my_op])`.
-  This is different from `sess.run(training_op); sess.run(my_op);`,
-  which is what you would get if you write `self.trainer.sess.run(my_op)` in `_trigger_step`.
+  
+  However, if you write `my_op.run()` in `_trigger_step`, the training loop would become
+  `sess.run(training_op); sess.run(my_op);`.
  Usually the difference matters, please choose carefully.

 * `_trigger_step(self)`
@@ -90,7 +92,7 @@ You can overwrite any of the following methods to define a new callback:

 * `_trigger_epoch(self)`

-  Do something after each epoch has finished. Will call `self.trigger()` by default.
+  Do something after each epoch has finished. This method calls `self.trigger()` by default.

 * `_trigger(self)`


--- a/examples/DeepQNetwork/expreplay.py
+++ b/examples/DeepQNetwork/expreplay.py
@@ -289,7 +289,6 @@ if __name__ == '__main__':
                  history_len=4)
    E._init_memory()

-    for k in E.get_data():
+    for _ in E.get_data():
        import IPython as IP
        IP.embed(config=IP.terminal.ipapp.load_default_config())
-        pass
--- a/examples/DynamicFilterNetwork/steering-filter.py
+++ b/examples/DynamicFilterNetwork/steering-filter.py
@@ -123,9 +123,6 @@ class Model(ModelDesc):
    def build_graph(self, theta, image, gt_image, gt_filter):
        kernel_size = 9

-        image = image
-        gt_image = gt_image
-
        theta = tf.reshape(theta, [BATCH, 1, 1, 1]) - np.pi
        image = tf.reshape(image, [BATCH, SHAPE, SHAPE, 1])
        gt_image = tf.reshape(gt_image, [BATCH, SHAPE, SHAPE, 1])

--- a/examples/FasterRCNN/README.md
+++ b/examples/FasterRCNN/README.md
@@ -71,7 +71,7 @@ prediction will need to be run with the corresponding training configs.

 ## Results

-These models are trained with different configurations on trainval35k and evaluated on minival using mAP@IoU=0.50:0.95.
+These models are trained on trainval35k and evaluated on minival using mAP@IoU=0.50:0.95.
 Performance in [Detectron](https://github.com/facebookresearch/Detectron/) can be roughly reproduced.
 Mask R-CNN results contain both box and mask mAP.


--- a/examples/GAN/Image2Image.py
+++ b/examples/GAN/Image2Image.py
@@ -204,7 +204,6 @@ if __name__ == '__main__':
    parser.add_argument('--data', help='Image directory', required=True)
    parser.add_argument('--mode', choices=['AtoB', 'BtoA'], default='AtoB')
    parser.add_argument('-b', '--batch', type=int, default=1)
-    global args
    args = parser.parse_args()
    if args.gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu

--- a/examples/ImageNetModels/README.md
+++ b/examples/ImageNetModels/README.md
@@ -2,7 +2,7 @@
 ImageNet training code of ResNet, ShuffleNet, DoReFa-Net, AlexNet, Inception, VGG with tensorpack.

 To train any of the models, just do `./{model}.py --data /path/to/ilsvrc`.
-More options are available in `./{model}.py -h`.
+More options are available in `./{model}.py --help`.
 Expected format of data directory is described in [docs](http://tensorpack.readthedocs.io/en/latest/modules/dataflow.dataset.html#tensorpack.dataflow.dataset.ILSVRC12).
 Some pretrained models can be downloaded at [tensorpack model zoo](http://models.tensorpack.com/).

@@ -12,12 +12,12 @@ Reproduce ImageNet results of the following two papers:
 + [ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices](https://arxiv.org/abs/1707.01083)
 + [ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design](https://arxiv.org/abs/1807.11164)

-| Model                                                                                                    | Flops | Top 1 Error | Flags         |
-|:---------------------------------------------------------------------------------------------------------|:------|:-----------:|:-------------:|
-| ShuffleNetV1 0.5x  [:arrow_down:](http://models.tensorpack.com/ImageNetModels/ShuffleNetV1-0.5x-g=8.npz) | 40M   | 40.8%       | `-r=0.5`      |
-| ShuffleNetV1 1x    [:arrow_down:](http://models.tensorpack.com/ImageNetModels/ShuffleNetV1-1x-g=8.npz)   | 140M  | 32.6%       | `-r=1`        |
-| ShuffleNetV2 0.5x  [:arrow_down:](http://models.tensorpack.com/ImageNetModels/ShuffleNetV2-0.5x.npz)     | 41M   | 39.5%       | `-r=0.5 --v2` |
-| ShuffleNetV2 1x    [:arrow_down:](http://models.tensorpack.com/ImageNetModels/ShuffleNetV2-1x.npz)       | 146M  | 30.6%       | `-r=1 --v2`   |
+| Model                                                                                                    | Flops | Top-1 Error | Claimed Error | Flags         |
+|:---------------------------------------------------------------------------------------------------------|:------|:-----------:|:-------------:|:-------------:|
+| ShuffleNetV1 0.5x  [:arrow_down:](http://models.tensorpack.com/ImageNetModels/ShuffleNetV1-0.5x-g=8.npz) | 40M   | 40.8%       | 42.3%         | `-r=0.5`      |
+| ShuffleNetV1 1x    [:arrow_down:](http://models.tensorpack.com/ImageNetModels/ShuffleNetV1-1x-g=8.npz)   | 140M  | 32.6%       | 32.4%         | `-r=1`        |
+| ShuffleNetV2 0.5x  [:arrow_down:](http://models.tensorpack.com/ImageNetModels/ShuffleNetV2-0.5x.npz)     | 41M   | 39.5%       | 39.7%         | `-r=0.5 --v2` |
+| ShuffleNetV2 1x    [:arrow_down:](http://models.tensorpack.com/ImageNetModels/ShuffleNetV2-1x.npz)       | 146M  | 30.6%       | 30.6%         | `-r=1 --v2`   |

 To print flops:
 ```bash
@@ -34,14 +34,14 @@ wget http://models.tensorpack.com/ImageNetModels/ShuffleNetV2-0.5x.npz

 This AlexNet script is quite close to the settings in its [original
 paper](https://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks).
-Trained with 64x2 batch size, the script reaches 58% single-crop validation
-accuracy after 100 epochs (21 hours on 2 V100s).
+Trained with 2 GPUs and 64 batch size per GPU, the script reaches 58% single-crop validation
+accuracy after 100 epochs (21h on 2 V100s).
 It also puts in tensorboard the first-layer filter visualizations similar to the paper.
 See `./alexnet.py --help` for usage.

 ### VGG16

-This VGG16 script, when trained with 32x8 batch size, reaches the following
+This VGG16 script, when trained with 8 GPUs and 32 batch size per GPU, reaches the following
 validation error after 100 epochs (30h with 8 P100s). This is the code for the VGG
 experiments in the paper [Group Normalization](https://arxiv.org/abs/1803.08494).
 See `./vgg16.py --help` for usage.
@@ -51,7 +51,7 @@ See `./vgg16.py --help` for usage.
 | 29~30% (large variation with random seed) | 28%                 |               27.6% |

 Note that the purpose of this experiment in the paper is not to claim GroupNorm is better
-than BatchNorm, therefore the training settings and hyperpameters have not been individually tuned for best accuracy.
+than BatchNorm, therefore the training settings and hyperpameters may not be the best for BatchNorm.

 ### Inception-BN


--- a/examples/ImageNetModels/shufflenet.py
+++ b/examples/ImageNetModels/shufflenet.py
@@ -22,8 +22,6 @@ from imagenet_utils import (
    get_imagenet_dataflow,
    ImageNetModel, GoogleNetResize, eval_on_ILSVRC12)

-TOTAL_BATCH_SIZE = 1024
-

 @layer_register(log_shape=True)
 def DepthConv(x, out_channel, kernel_shape, padding='SAME', stride=1,
@@ -235,6 +233,7 @@ if __name__ == '__main__':
    parser.add_argument('--group', type=int, default=8, choices=[3, 4, 8],
                        help="Number of groups for ShuffleNetV1")
    parser.add_argument('--v2', action='store_true', help='Use ShuffleNetV2')
+    parser.add_argument('--batch', type=int, default=1024, help='total batch size')
    parser.add_argument('--load', help='path to load a model from')
    parser.add_argument('--eval', action='store_true')
    parser.add_argument('--flops', action='store_true', help='print flops and exit')
@@ -246,6 +245,10 @@ if __name__ == '__main__':
    if args.v2 and args.group != parser.get_default('group'):
        logger.error("group= is not used in ShuffleNetV2!")

+    if args.batch != 1024:
+        logger.warn("Total batch size != 1024, you need to change other hyperparameters to get the same results.")
+    TOTAL_BATCH_SIZE = args.batch
+
    model = Model()

    if args.eval:

--- a/tensorpack/dataflow/common.py
+++ b/tensorpack/dataflow/common.py
@@ -49,7 +49,7 @@ class TestDataSpeed(ProxyDataFlow):
        self.ds.reset_state()
        itr = self.ds.__iter__()
        if self.warmup:
-            for d in tqdm.trange(self.warmup, **get_tqdm_kwargs()):
+            for _ in tqdm.trange(self.warmup, **get_tqdm_kwargs()):
                next(itr)
        # add smoothing for speed benchmark
        with get_tqdm(total=self.test_size,

--- a/tensorpack/train/tower.py
+++ b/tensorpack/train/tower.py
@@ -84,7 +84,8 @@ class TowerTrainer(Trainer):

    def get_predictor(self, input_names, output_names, device=0):
        """
-        Returns a callable predictor built under ``TowerContext(is_training=False)``.
+        This method will build the tower under ``TowerContext(is_training=False)``,
+        and returns a callable predictor with input placeholders & output tensors in this tower.

        Args:
            input_names (list): list of input names, matching the inputs declared for the trainer.

--- a/tensorpack/utils/logger.py
+++ b/tensorpack/utils/logger.py
@@ -53,8 +53,7 @@ def _get_time_str():
    return datetime.now().strftime('%m%d-%H%M%S')


-# logger file and directory:
-global LOG_DIR, _FILE_HANDLER
+# globals: logger file and directory:
 LOG_DIR = None
 _FILE_HANDLER = None