[MaskRCNN] Support empty training images (fix #1256,#1230)

999846b2 · Yuxin Wu · c9e03b73 · 999846b2 · 999846b2 · 999846b2
Commit 999846b2 authored Jul 16, 2019 by Yuxin Wu
6 changed files
--- a/examples/FasterRCNN/data.py
+++ b/examples/FasterRCNN/data.py
@@ -132,6 +132,7 @@ class TrainingDataPreprocessor:

    def __call__(self, roidb):
        fname, boxes, klass, is_crowd = roidb["file_name"], roidb["boxes"], roidb["class"], roidb["is_crowd"]
+        assert boxes.ndim == 2 and boxes.shape[1] == 4, boxes.shape
        boxes = np.copy(boxes)
        im = cv2.imread(fname, cv2.IMREAD_COLOR)
        assert im is not None, fname
@@ -149,7 +150,8 @@ class TrainingDataPreprocessor:
        points = box_to_point8(boxes)
        points = self.aug.augment_coords(points, params)
        boxes = point8_to_box(points)
-        assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"
+        if len(boxes):
+            assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"

        ret = {"image": im}
        # Add rpn data to dataflow:
@@ -166,8 +168,6 @@ class TrainingDataPreprocessor:
            klass = klass[is_crowd == 0]
            ret["gt_boxes"] = boxes
            ret["gt_labels"] = klass
-            if not len(boxes):
-                raise MalformedData("No valid gt_boxes!")
        except MalformedData as e:
            log_once("Input {} is filtered for training: {}".format(fname, str(e)), "warn")
            return None
@@ -183,13 +183,19 @@ class TrainingDataPreprocessor:
            masks = []
            width_height = np.asarray([width, height], dtype=np.float32)
            gt_mask_width = int(np.ceil(im.shape[1] / 8.0) * 8)   # pad to 8 in order to pack mask into bits
+
            for polys in segmentation:
                if not self.cfg.DATA.ABSOLUTE_COORD:
                    polys = [p * width_height for p in polys]
                polys = [self.aug.augment_coords(p, params) for p in polys]
                masks.append(segmentation_to_mask(polys, im.shape[0], gt_mask_width))
-            masks = np.asarray(masks, dtype='uint8')    # values in {0, 1}
-            masks = np.packbits(masks, axis=-1)
+
+            if len(masks):
+                masks = np.asarray(masks, dtype='uint8')    # values in {0, 1}
+                masks = np.packbits(masks, axis=-1)
+            else:  # no gt on the image
+                masks = np.zeros((0, im.shape[0], gt_mask_width // 8), dtype='uint8')
+
            ret['gt_masks_packed'] = masks

            # from viz import draw_annotation, draw_mask
@@ -314,7 +320,12 @@ class TrainingDataPreprocessor:
            return curr_inds

        NA, NB = len(anchors), len(gt_boxes)
-        assert NB > 0  # empty images should have been filtered already
+        if NB == 0:
+            # No groundtruth. All anchors are either background or ignored.
+            anchor_labels = np.zeros((NA,), dtype="int32")
+            filter_box_label(anchor_labels, 0, self.cfg.RPN.BATCH_PER_IM)
+            return anchor_labels, np.zeros((NA, 4), dtype="float32")
+
        box_ious = np_iou(anchors, gt_boxes)  # NA x NB
        ious_argmax_per_anchor = box_ious.argmax(axis=1)  # NA,
        ious_max_per_anchor = box_ious.max(axis=1)
@@ -380,8 +391,8 @@ def get_train_dataflow():
    roidbs = list(itertools.chain.from_iterable(DatasetRegistry.get(x).training_roidbs() for x in cfg.DATA.TRAIN))
    print_class_histogram(roidbs)

-    # Valid training images should have at least one fg box.
-    # But this filter shall not be applied for testing.
+    # Filter out images that have no gt boxes, but this filter shall not be applied for testing.
+    # The model does support training with empty images, but it is not useful for COCO.
    num = len(roidbs)
    roidbs = list(filter(lambda img: len(img["boxes"][img["is_crowd"] == 0]) > 0, roidbs))
    logger.info(

--- a/examples/FasterRCNN/dataset/coco.py
+++ b/examples/FasterRCNN/dataset/coco.py
@@ -183,7 +183,10 @@ class COCODetection(DatasetSplit):
                        all_segm.append(valid_segs)

        # all geometrically-valid boxes are returned
-        img['boxes'] = np.asarray(all_boxes, dtype='float32')  # (n, 4)
+        if len(all_boxes):
+            img['boxes'] = np.asarray(all_boxes, dtype='float32')  # (n, 4)
+        else:
+            img['boxes'] = np.zeros((0, 4), dtype='float32')
        cls = np.asarray(all_cls, dtype='int32')  # (n,)
        if len(cls):
            assert cls.min() > 0, "Category id in COCO format must > 0!"

--- a/examples/FasterRCNN/modeling/model_frcnn.py
+++ b/examples/FasterRCNN/modeling/model_frcnn.py
@@ -51,11 +51,11 @@ def sample_fast_rcnn_targets(boxes, gt_boxes, gt_labels):
        gt_labels: m, int32

    Returns:
-        A BoxProposals instance.
-        sampled_boxes: tx4 floatbox, the rois
-        sampled_labels: t int64 labels, in [0, #class). Positive means foreground.
-        fg_inds_wrt_gt: #fg indices, each in range [0, m-1].
-            It contains the matching GT of each foreground roi.
+        A BoxProposals instance, with:
+            sampled_boxes: tx4 floatbox, the rois
+            sampled_labels: t int64 labels, in [0, #class). Positive means foreground.
+            fg_inds_wrt_gt: #fg indices, each in range [0, m-1].
+                It contains the matching GT of each foreground roi.
    """
    iou = pairwise_iou(boxes, gt_boxes)     # nxm
    proposal_metrics(iou)
@@ -66,7 +66,9 @@ def sample_fast_rcnn_targets(boxes, gt_boxes, gt_labels):
    # #proposal=n+m from now on

    def sample_fg_bg(iou):
-        fg_mask = tf.reduce_max(iou, axis=1) >= cfg.FRCNN.FG_THRESH
+        fg_mask = tf.cond(tf.shape(iou)[1] > 0,
+                          lambda: tf.reduce_max(iou, axis=1) >= cfg.FRCNN.FG_THRESH,
+                          lambda: tf.zeros([tf.shape(iou)[0]], dtype=tf.bool))

        fg_inds = tf.reshape(tf.where(fg_mask), [-1])
        num_fg = tf.minimum(int(
@@ -86,7 +88,9 @@ def sample_fast_rcnn_targets(boxes, gt_boxes, gt_labels):
    fg_inds, bg_inds = sample_fg_bg(iou)
    # fg,bg indices w.r.t proposals

-    best_iou_ind = tf.argmax(iou, axis=1)   # #proposal, each in 0~m-1
+    best_iou_ind = tf.cond(tf.shape(iou)[1] > 0,
+                           lambda: tf.argmax(iou, axis=1),   # #proposal, each in 0~m-1
+                           lambda: tf.zeros([tf.shape(iou)[0]], dtype=tf.int64))
    fg_inds_wrt_gt = tf.gather(best_iou_ind, fg_inds)   # num_fg

    all_indices = tf.concat([fg_inds, bg_inds], axis=0)   # indices w.r.t all n+m proposal boxes

--- a/examples/FasterRCNN/modeling/model_mrcnn.py
+++ b/examples/FasterRCNN/modeling/model_mrcnn.py
@@ -102,5 +102,5 @@ def unpackbits_masks(masks):
    unpacked = tf.bitwise.bitwise_and(tf.expand_dims(masks, -1), bits) > 0
    unpacked = tf.reshape(
        unpacked,
-        tf.concat([tf.shape(masks)[:-1], [-1]], axis=0))
+        tf.concat([tf.shape(masks)[:-1], [8 * tf.shape(masks)[-1]]], axis=0))
    return unpacked
--- a/tensorpack/callbacks/prof.py
+++ b/tensorpack/callbacks/prof.py
@@ -45,8 +45,9 @@ class GPUUtilizationTracker(Callback):
            env = os.environ.get('CUDA_VISIBLE_DEVICES')
            if env is None:
                self._devices = list(range(get_num_gpu()))
-                logger.warn("[GPUUtilizationTracker] Both devices and CUDA_VISIBLE_DEVICES are None! "
-                            "Will monitor all {} visible GPUs!".format(len(self._devices)))
+                if len(self._devices) > 1:
+                    logger.warn("[GPUUtilizationTracker] Both devices and CUDA_VISIBLE_DEVICES are None! "
+                                "Will monitor all {} visible GPUs!".format(len(self._devices)))
            else:
                if len(env):
                    self._devices = list(map(int, env.split(',')))

--- a/tensorpack/graph_builder/training.py
+++ b/tensorpack/graph_builder/training.py
@@ -300,8 +300,11 @@ class SyncMultiGPUReplicatedBuilder(DataParallelBuilder):
                            grad_and_vars, name='apply_grad_{}'.format(idx)))
        train_op = tf.group(*train_ops, name='train_op')

-        with tf.name_scope('sync_variables'):
-            post_init_op = SyncMultiGPUReplicatedBuilder.get_post_init_ops()
+        if len(self.towers) > 1:
+            with tf.name_scope('sync_variables'):
+                post_init_op = SyncMultiGPUReplicatedBuilder.get_post_init_ops()
+        else:
+            post_init_op = tf.no_op(name='empty_sync_variables')
        return train_op, post_init_op

 # Adopt from https://github.com/tensorflow/benchmarks/blob/master/scripts/tf_cnn_benchmarks/variable_mgr.py