update docs

883ef79f · Yuxin Wu · 87059de5 · 883ef79f · 883ef79f · 883ef79f
Commit 883ef79f authored May 23, 2018 by Yuxin Wu
7 changed files
--- a/examples/FasterRCNN/NOTES.md
+++ b/examples/FasterRCNN/NOTES.md
@@ -50,3 +50,14 @@ Speed:
 3. This implementation is about 14% slower than detectron,
   probably due to the lack of specialized ops (e.g. AffineChannel, ROIAlign) in TensorFlow.
   It's certainly faster than other TF implementation.
+
+Possible Future Enhancements:
+
+1. Data-parallel evaluation during training.
+
+2. Define an interface to load custom dataset.
+
+3. Support batch>1 per GPU.
+
+4. Use dedicated ops to improve speed. (e.g. a TF implementation of ROIAlign op
+   can be found in [light-head RCNN](https://github.com/zengarden/light_head_rcnn/tree/master/lib/lib_kernel))
--- a/examples/FasterRCNN/README.md
+++ b/examples/FasterRCNN/README.md
@@ -70,7 +70,7 @@ MaskRCNN results contain both bbox and segm mAP.
 The two R50-C4 360k models have the same configuration __and mAP__
 as the `R50-C4-2x` entries in
 [Detectron Model Zoo](https://github.com/facebookresearch/Detectron/blob/master/MODEL_ZOO.md#end-to-end-faster--mask-r-cnn-baselines).
-So far this is the only TensorFlow implementation that can reproduce mAP in Detectron.
+So far this is the only public TensorFlow implementation that can reproduce mAP in Detectron.
 The other models listed here do not correspond to any configurations in Detectron.

 ## Notes

--- a/examples/FasterRCNN/basemodel.py
+++ b/examples/FasterRCNN/basemodel.py
@@ -121,13 +121,11 @@ def resnet_fpn_backbone(image, num_blocks, freeze_c2=True):
    mult = config.FPN_RESOLUTION_REQUIREMENT * 1.
    new_shape2d = tf.to_int32(tf.ceil(tf.to_float(shape2d) / mult) * mult)
    pad_shape2d = new_shape2d - shape2d
-    assert len(num_blocks) == 4
-    # TODO pad 1 at each stage
+    assert len(num_blocks) == 4, num_blocks
    with resnet_argscope():
        chan = image.shape[1]
-        l = tf.pad(image,
-                   tf.stack([[0, 0], [0, 0],
-                            [2, 3 + pad_shape2d[0]], [2, 3 + pad_shape2d[1]]]))
+        l = tf.pad(image, tf.stack(
+            [[0, 0], [0, 0], [2, 3 + pad_shape2d[0]], [2, 3 + pad_shape2d[1]]]))
        l.set_shape([None, chan, None, None])
        l = Conv2D('conv0', l, 64, 7, strides=2, activation=BNReLU, padding='VALID')
        l = tf.pad(l, [[0, 0], [0, 0], [0, 1], [0, 1]])

--- a/examples/FasterRCNN/config.py
+++ b/examples/FasterRCNN/config.py
@@ -9,10 +9,10 @@ MODE_FPN = False

 # dataset -----------------------
 BASEDIR = '/path/to/your/COCO/DIR'
-TRAIN_DATASET = ['train2014', 'valminusminival2014']
-VAL_DATASET = 'minival2014'   # only support evaluation on single dataset
-NUM_CLASS = 81
-CLASS_NAMES = []  # NUM_CLASS strings. Will be populated later by coco loader
+TRAIN_DATASET = ['train2014', 'valminusminival2014']   # i.e., trainval35k
+VAL_DATASET = 'minival2014'   # For now, only support evaluation on single dataset
+NUM_CLASS = 81    # 1 background + 80 categories
+CLASS_NAMES = []  # NUM_CLASS strings. Needs to be populated later by data loader

 # basemodel ----------------------
 RESNET_NUM_BLOCK = [3, 4, 6, 3]     # for resnet50

--- a/examples/FasterRCNN/data.py
+++ b/examples/FasterRCNN/data.py
@@ -243,7 +243,7 @@ def get_multilevel_rpn_anchor_input(im, boxes, is_crowd):
    return multilevel_inputs


-def get_train_dataflow(add_mask=False):
+def get_train_dataflow():
    """
    Return a training dataflow. Each datapoint consists of the following:

@@ -260,7 +260,7 @@ def get_train_dataflow(add_mask=False):
    """

    imgs = COCODetection.load_many(
-        config.BASEDIR, config.TRAIN_DATASET, add_gt=True, add_mask=add_mask)
+        config.BASEDIR, config.TRAIN_DATASET, add_gt=True, add_mask=config.MODE_MASK)
    """
    To train on your own data, change this to your loader.
    Produce "imgs" as a list of dict, in the dict the following keys are needed for training:
@@ -291,7 +291,7 @@ def get_train_dataflow(add_mask=False):
        assert im is not None, fname
        im = im.astype('float32')
        # assume floatbox as input
-        assert boxes.dtype == np.float32
+        assert boxes.dtype == np.float32, "Loader has to return floating point boxes!"

        # augmentation:
        im, params = aug.augment_return_params(im)
@@ -319,9 +319,8 @@ def get_train_dataflow(add_mask=False):
            return None

        ret = [im] + list(anchor_inputs) + [boxes, klass]
-        # TODO pad im when FPN

-        if add_mask:
+        if config.MODE_MASK:
            # augmentation will modify the polys in-place
            segmentation = copy.deepcopy(img['segmentation'])
            segmentation = [segmentation[k] for k in range(len(segmentation)) if not is_crowd[k]]
@@ -365,7 +364,7 @@ if __name__ == '__main__':
    import os
    from tensorpack.dataflow import PrintData
    config.BASEDIR = os.path.expanduser('~/data/coco')
-    ds = get_train_dataflow(add_mask=config.MODE_MASK)
+    ds = get_train_dataflow()
    ds = PrintData(ds, 100)
    TestDataSpeed(ds, 50000).start()
    ds.reset_state()

--- a/examples/FasterRCNN/model.py
+++ b/examples/FasterRCNN/model.py
@@ -639,7 +639,7 @@ def fpn_map_rois_to_levels(boxes):
    Assign boxes to level 2~5.

    Args:
-        boxes (nx4)
+        boxes (nx4):

    Returns:
        [tf.Tensor]: 4 tensors for level 2-5. Each tensor is a vector of indices of boxes in its level.

--- a/examples/FasterRCNN/train.py
+++ b/examples/FasterRCNN/train.py
@@ -519,7 +519,7 @@ class EvalCallback(Callback):
    def _before_train(self):
        EVAL_TIMES = 5  # eval 5 times during training
        interval = self.trainer.max_epoch // (EVAL_TIMES + 1)
-        self.epochs_to_eval = set([interval * k for k in range(1, EVAL_TIMES)])
+        self.epochs_to_eval = set([interval * k for k in range(1, EVAL_TIMES + 1)])
        self.epochs_to_eval.add(self.trainer.max_epoch)

    def _eval(self):
@@ -600,19 +600,20 @@ if __name__ == '__main__':

        cfg = TrainConfig(
            model=get_model(),
-            data=QueueInput(get_train_dataflow(add_mask=config.MODE_MASK)),
+            data=QueueInput(get_train_dataflow()),
            callbacks=[
                PeriodicCallback(
                    ModelSaver(max_to_keep=10, keep_checkpoint_every_n_hours=1),
                    every_k_epochs=20),
-                SessionRunTimeout(60000),   # 1 minute timeout
                # linear warmup
                ScheduledHyperParamSetter(
                    'learning_rate', warmup_schedule, interp='linear', step_based=True),
                ScheduledHyperParamSetter('learning_rate', lr_schedule),
                EvalCallback(),
                GPUUtilizationTracker(),
+                PeakMemoryTracker(),
                EstimatedTimeLeft(),
+                SessionRunTimeout(60000),   # 1 minute timeout
            ],
            steps_per_epoch=stepnum,
            max_epoch=config.LR_SCHEDULE[-1] * factor // stepnum,