misc small updates

f91a30ca · Yuxin Wu · 768b835e · f91a30ca · f91a30ca · f91a30ca
Commit f91a30ca authored Dec 17, 2018 by Yuxin Wu
11 changed files
--- a/.gitignore
+++ b/.gitignore
 # tensorpack-specific stuff
 train_log
-tensorpack/user_ops/obj
+train_log_*
+logs
 *.npy
 *.npz
 *.caffemodel
@@ -20,7 +21,6 @@ checkpoint
 # my personal stuff
 snippet
 examples/private
-examples-old
 TODO.md
 .gitignore
 .vimrc.local

--- a/docs/index.rst
+++ b/docs/index.rst
@@ -3,9 +3,13 @@ Tensorpack Documentation

 .. image:: ../.github/tensorpack.png

-Tensorpack is a **training interface** based on TensorFlow.
+Tensorpack is a **training interface** based on TensorFlow, with a focus on speed + flexibility.
+TensorFlow is powerful, but has its own drawbacks:
+Its low-level APIs are too hard and complicated for many users,
+and its existing high-level APIs sacrifice a lot in either speed or flexibility.
+The Tensorpack API brings speed and flexibility together.

-It's Yet Another TF wrapper, but different in:
+Tensorpack is Yet Another TF high-level API, but different in:

 - Focus on **training speed**.


--- a/docs/tutorial/intro.rst
+++ b/docs/tutorial/intro.rst
@@ -8,11 +8,21 @@ you'll use mostly tensorpack high-level APIs to do training, rather than TensorF
 Why tensorpack?
 ~~~~~~~~~~~~~~~~~~~

-TensorFlow is powerful, but at the same time too complicated for a lot of people.
-Users will have to worry a lot about things unrelated to the model, especially when **speed** is a concern.
+TensorFlow is powerful, but has its own drawbacks:
+Its low-level APIs are too hard and complicated for many users,
+and its existing high-level APIs sacrifice a lot in either speed or flexibility.
+The Tensorpack API brings speed and flexibility together.
+
+
+Is TensorFlow Slow?
+~~~~~~~~~~~~~~~~~~~~~
+
+No it's not, but it's not easy to write it in an efficient way.
+
+When **speed** is a concern, users will have to worry a lot about things unrelated to the model.
 Code written with low-level APIs or other existing high-level wrappers is often suboptimal in speed.
-Even a lot of official TensorFlow examples are written for simplicity rather than efficiency,
-which as a result makes people think TensorFlow is slow.
+Even most of the official TensorFlow examples are written for simplicity rather than efficiency,
+which as a result makes people think TensorFlow is __slow__.

 The `official TensorFlow benchmark <https://github.com/tensorflow/benchmarks/tree/master/scripts/tf_cnn_benchmarks>`_ said this in their README:


--- a/examples/FasterRCNN/README.md
+++ b/examples/FasterRCNN/README.md
@@ -9,10 +9,12 @@ following object detection / instance segmentation papers:

 with the support of:
 + Multi-GPU / distributed training
-+ Cross-GPU BatchNorm (from [MegDet: A Large Mini-Batch Object Detector](https://arxiv.org/abs/1711.07240))
+ Cross-GPU BatchNorm (aka Sync-BN, from [MegDet: A Large Mini-Batch Object Detector](https://arxiv.org/abs/1711.07240))
 + [Group Normalization](https://arxiv.org/abs/1803.08494)
 + Training from scratch (from [Rethinking ImageNet Pre-training](https://arxiv.org/abs/1811.08883))

+This is likely the __best-performing__ open source TensorFlow reimplementation of the above papers.
+
 ## Dependencies
 + Python 3.3+; OpenCV
 + TensorFlow ≥ 1.6

--- a/examples/FasterRCNN/config.py
+++ b/examples/FasterRCNN/config.py
@@ -109,6 +109,7 @@ _C.TRAIN.WEIGHT_DECAY = 1e-4
 _C.TRAIN.BASE_LR = 1e-2  # defined for a total batch size of 8. Otherwise it will be adjusted automatically
 _C.TRAIN.WARMUP = 1000   # in terms of iterations. This is not affected by #GPUs
 _C.TRAIN.STEPS_PER_EPOCH = 500
+_C.TRAIN.STARTING_EPOCH = 1  # the first epoch to start with, useful to continue a training

 # LR_SCHEDULE means equivalent steps when the total batch size is 8.
 # When the total bs!=8, the actual iterations to decrease learning rate, and

--- a/examples/FasterRCNN/model_mrcnn.py
+++ b/examples/FasterRCNN/model_mrcnn.py
@@ -18,7 +18,7 @@ def maskrcnn_loss(mask_logits, fg_labels, fg_target_masks):
    Args:
        mask_logits: #fg x #category xhxw
        fg_labels: #fg, in 1~#class, int64
-        fg_target_masks: #fgxhxw, int
+        fg_target_masks: #fgxhxw, float32
    """
    num_fg = tf.size(fg_labels, out_type=tf.int64)
    indices = tf.stack([tf.range(num_fg), fg_labels - 1], axis=1)  # #fgx2

--- a/examples/FasterRCNN/train.py
+++ b/examples/FasterRCNN/train.py
@@ -604,6 +604,7 @@ if __name__ == '__main__':
            steps_per_epoch=stepnum,
            max_epoch=cfg.TRAIN.LR_SCHEDULE[-1] * factor // stepnum,
            session_init=session_init,
+            starting_epoch=cfg.TRAIN.STARTING_EPOCH
        )
        if is_horovod:
            trainer = HorovodTrainer(average=False)

--- a/scripts/README.md
+++ b/scripts/README.md
+
+These scripts are some helpful utilities about the library.
+
+They are meant to be __examples__ on how to do some basic model manipulation
+with tensorpack. The scripts themselves are not part of the library and
+therefore are not subject to any compatibility guarantee.
--- a/tensorpack/dataflow/imgaug/base.py
+++ b/tensorpack/dataflow/imgaug/base.py
@@ -156,6 +156,7 @@ class AugmentorList(ImageAugmentor):
        Args:
            augmentors (list): list of :class:`ImageAugmentor` instance to be applied.
        """
+        assert isinstance(augmentors, (list, tuple)), augmentors
        self.augmentors = augmentors
        super(AugmentorList, self).__init__()


--- a/tensorpack/dataflow/imgaug/imgproc.py
+++ b/tensorpack/dataflow/imgaug/imgproc.py
@@ -3,7 +3,6 @@


 from .base import ImageAugmentor
-from ...utils import logger
 import numpy as np
 import cv2

@@ -15,16 +14,13 @@ class Hue(ImageAugmentor):
    """ Randomly change color hue.
    """

-    def __init__(self, range=(0, 180), rgb=None):
+    def __init__(self, range=(0, 180), rgb=True):
        """
        Args:
            range(list or tuple): range from which the applied hue offset is selected (maximum [-90,90] or [0,180])
            rgb (bool): whether input is RGB or BGR.
        """
        super(Hue, self).__init__()
-        if rgb is None:
-            logger.warn("Hue() now assumes rgb=False, but will by default use rgb=True in the future!")
-            rgb = False
        rgb = bool(rgb)
        self._init(locals())

@@ -104,7 +100,7 @@ class Contrast(ImageAugmentor):
    Apply ``x = (x - mean) * contrast_factor + mean`` to each channel.
    """

-    def __init__(self, factor_range, clip=True):
+    def __init__(self, factor_range, rgb=True, clip=True):
        """
        Args:
            factor_range (list or tuple): an interval to randomly sample the `contrast_factor`.
@@ -118,8 +114,14 @@ class Contrast(ImageAugmentor):

    def _augment(self, img, r):
        old_dtype = img.dtype
-        img = img.astype('float32')
-        mean = np.mean(img, axis=(0, 1), keepdims=True)
+
+        if img.ndim == 3:
+            m = cv2.COLOR_RGB2GRAY if self.rgb else cv2.COLOR_BGR2GRAY
+            grey = cv2.cvtColor(img, m)
+            mean = np.mean(grey)
+        else:
+            mean = np.mean(img)
+
        img = (img - mean) * r + mean
        if self.clip or old_dtype == np.uint8:
            img = np.clip(img, 0, 255)

--- a/tensorpack/train/config.py
+++ b/tensorpack/train/config.py
@@ -197,9 +197,11 @@ class AutoResumeTrainConfig(TrainConfig):
            beginning, but a "resume" model loader when the job was
            interrupted and restarted.
        """
+        found_sessinit = False
        if always_resume or 'session_init' not in kwargs:
            sessinit = self._get_sessinit_resume()
            if sessinit is not None:
+                found_sessinit = True
                path = sessinit.path
                if 'session_init' in kwargs:
                    logger.info("Found checkpoint at {}. "
@@ -208,13 +210,17 @@ class AutoResumeTrainConfig(TrainConfig):
                    logger.info("Will load checkpoint at {}.".format(path))
                kwargs['session_init'] = sessinit

+        found_last_epoch = False
        if always_resume or 'starting_epoch' not in kwargs:
            last_epoch = self._get_last_epoch()
            if last_epoch is not None:
+                found_last_epoch = True
                now_epoch = last_epoch + 1
                logger.info("Found history statistics from JSON. "
                            "Setting starting_epoch to {}.".format(now_epoch))
                kwargs['starting_epoch'] = now_epoch
+        assert found_sessinit == found_last_epoch, \
+            "Found SessionInit={}, Found Last Epoch={}".format(found_sessinit, found_last_epoch)

        super(AutoResumeTrainConfig, self).__init__(**kwargs)