Commit 999846b2 authored by Yuxin Wu's avatar Yuxin Wu

[MaskRCNN] Support empty training images (fix #1256,#1230)

parent c9e03b73
...@@ -132,6 +132,7 @@ class TrainingDataPreprocessor: ...@@ -132,6 +132,7 @@ class TrainingDataPreprocessor:
def __call__(self, roidb): def __call__(self, roidb):
fname, boxes, klass, is_crowd = roidb["file_name"], roidb["boxes"], roidb["class"], roidb["is_crowd"] fname, boxes, klass, is_crowd = roidb["file_name"], roidb["boxes"], roidb["class"], roidb["is_crowd"]
assert boxes.ndim == 2 and boxes.shape[1] == 4, boxes.shape
boxes = np.copy(boxes) boxes = np.copy(boxes)
im = cv2.imread(fname, cv2.IMREAD_COLOR) im = cv2.imread(fname, cv2.IMREAD_COLOR)
assert im is not None, fname assert im is not None, fname
...@@ -149,7 +150,8 @@ class TrainingDataPreprocessor: ...@@ -149,7 +150,8 @@ class TrainingDataPreprocessor:
points = box_to_point8(boxes) points = box_to_point8(boxes)
points = self.aug.augment_coords(points, params) points = self.aug.augment_coords(points, params)
boxes = point8_to_box(points) boxes = point8_to_box(points)
assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!" if len(boxes):
assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"
ret = {"image": im} ret = {"image": im}
# Add rpn data to dataflow: # Add rpn data to dataflow:
...@@ -166,8 +168,6 @@ class TrainingDataPreprocessor: ...@@ -166,8 +168,6 @@ class TrainingDataPreprocessor:
klass = klass[is_crowd == 0] klass = klass[is_crowd == 0]
ret["gt_boxes"] = boxes ret["gt_boxes"] = boxes
ret["gt_labels"] = klass ret["gt_labels"] = klass
if not len(boxes):
raise MalformedData("No valid gt_boxes!")
except MalformedData as e: except MalformedData as e:
log_once("Input {} is filtered for training: {}".format(fname, str(e)), "warn") log_once("Input {} is filtered for training: {}".format(fname, str(e)), "warn")
return None return None
...@@ -183,13 +183,19 @@ class TrainingDataPreprocessor: ...@@ -183,13 +183,19 @@ class TrainingDataPreprocessor:
masks = [] masks = []
width_height = np.asarray([width, height], dtype=np.float32) width_height = np.asarray([width, height], dtype=np.float32)
gt_mask_width = int(np.ceil(im.shape[1] / 8.0) * 8) # pad to 8 in order to pack mask into bits gt_mask_width = int(np.ceil(im.shape[1] / 8.0) * 8) # pad to 8 in order to pack mask into bits
for polys in segmentation: for polys in segmentation:
if not self.cfg.DATA.ABSOLUTE_COORD: if not self.cfg.DATA.ABSOLUTE_COORD:
polys = [p * width_height for p in polys] polys = [p * width_height for p in polys]
polys = [self.aug.augment_coords(p, params) for p in polys] polys = [self.aug.augment_coords(p, params) for p in polys]
masks.append(segmentation_to_mask(polys, im.shape[0], gt_mask_width)) masks.append(segmentation_to_mask(polys, im.shape[0], gt_mask_width))
masks = np.asarray(masks, dtype='uint8') # values in {0, 1}
masks = np.packbits(masks, axis=-1) if len(masks):
masks = np.asarray(masks, dtype='uint8') # values in {0, 1}
masks = np.packbits(masks, axis=-1)
else: # no gt on the image
masks = np.zeros((0, im.shape[0], gt_mask_width // 8), dtype='uint8')
ret['gt_masks_packed'] = masks ret['gt_masks_packed'] = masks
# from viz import draw_annotation, draw_mask # from viz import draw_annotation, draw_mask
...@@ -314,7 +320,12 @@ class TrainingDataPreprocessor: ...@@ -314,7 +320,12 @@ class TrainingDataPreprocessor:
return curr_inds return curr_inds
NA, NB = len(anchors), len(gt_boxes) NA, NB = len(anchors), len(gt_boxes)
assert NB > 0 # empty images should have been filtered already if NB == 0:
# No groundtruth. All anchors are either background or ignored.
anchor_labels = np.zeros((NA,), dtype="int32")
filter_box_label(anchor_labels, 0, self.cfg.RPN.BATCH_PER_IM)
return anchor_labels, np.zeros((NA, 4), dtype="float32")
box_ious = np_iou(anchors, gt_boxes) # NA x NB box_ious = np_iou(anchors, gt_boxes) # NA x NB
ious_argmax_per_anchor = box_ious.argmax(axis=1) # NA, ious_argmax_per_anchor = box_ious.argmax(axis=1) # NA,
ious_max_per_anchor = box_ious.max(axis=1) ious_max_per_anchor = box_ious.max(axis=1)
...@@ -380,8 +391,8 @@ def get_train_dataflow(): ...@@ -380,8 +391,8 @@ def get_train_dataflow():
roidbs = list(itertools.chain.from_iterable(DatasetRegistry.get(x).training_roidbs() for x in cfg.DATA.TRAIN)) roidbs = list(itertools.chain.from_iterable(DatasetRegistry.get(x).training_roidbs() for x in cfg.DATA.TRAIN))
print_class_histogram(roidbs) print_class_histogram(roidbs)
# Valid training images should have at least one fg box. # Filter out images that have no gt boxes, but this filter shall not be applied for testing.
# But this filter shall not be applied for testing. # The model does support training with empty images, but it is not useful for COCO.
num = len(roidbs) num = len(roidbs)
roidbs = list(filter(lambda img: len(img["boxes"][img["is_crowd"] == 0]) > 0, roidbs)) roidbs = list(filter(lambda img: len(img["boxes"][img["is_crowd"] == 0]) > 0, roidbs))
logger.info( logger.info(
......
...@@ -183,7 +183,10 @@ class COCODetection(DatasetSplit): ...@@ -183,7 +183,10 @@ class COCODetection(DatasetSplit):
all_segm.append(valid_segs) all_segm.append(valid_segs)
# all geometrically-valid boxes are returned # all geometrically-valid boxes are returned
img['boxes'] = np.asarray(all_boxes, dtype='float32') # (n, 4) if len(all_boxes):
img['boxes'] = np.asarray(all_boxes, dtype='float32') # (n, 4)
else:
img['boxes'] = np.zeros((0, 4), dtype='float32')
cls = np.asarray(all_cls, dtype='int32') # (n,) cls = np.asarray(all_cls, dtype='int32') # (n,)
if len(cls): if len(cls):
assert cls.min() > 0, "Category id in COCO format must > 0!" assert cls.min() > 0, "Category id in COCO format must > 0!"
......
...@@ -51,11 +51,11 @@ def sample_fast_rcnn_targets(boxes, gt_boxes, gt_labels): ...@@ -51,11 +51,11 @@ def sample_fast_rcnn_targets(boxes, gt_boxes, gt_labels):
gt_labels: m, int32 gt_labels: m, int32
Returns: Returns:
A BoxProposals instance. A BoxProposals instance, with:
sampled_boxes: tx4 floatbox, the rois sampled_boxes: tx4 floatbox, the rois
sampled_labels: t int64 labels, in [0, #class). Positive means foreground. sampled_labels: t int64 labels, in [0, #class). Positive means foreground.
fg_inds_wrt_gt: #fg indices, each in range [0, m-1]. fg_inds_wrt_gt: #fg indices, each in range [0, m-1].
It contains the matching GT of each foreground roi. It contains the matching GT of each foreground roi.
""" """
iou = pairwise_iou(boxes, gt_boxes) # nxm iou = pairwise_iou(boxes, gt_boxes) # nxm
proposal_metrics(iou) proposal_metrics(iou)
...@@ -66,7 +66,9 @@ def sample_fast_rcnn_targets(boxes, gt_boxes, gt_labels): ...@@ -66,7 +66,9 @@ def sample_fast_rcnn_targets(boxes, gt_boxes, gt_labels):
# #proposal=n+m from now on # #proposal=n+m from now on
def sample_fg_bg(iou): def sample_fg_bg(iou):
fg_mask = tf.reduce_max(iou, axis=1) >= cfg.FRCNN.FG_THRESH fg_mask = tf.cond(tf.shape(iou)[1] > 0,
lambda: tf.reduce_max(iou, axis=1) >= cfg.FRCNN.FG_THRESH,
lambda: tf.zeros([tf.shape(iou)[0]], dtype=tf.bool))
fg_inds = tf.reshape(tf.where(fg_mask), [-1]) fg_inds = tf.reshape(tf.where(fg_mask), [-1])
num_fg = tf.minimum(int( num_fg = tf.minimum(int(
...@@ -86,7 +88,9 @@ def sample_fast_rcnn_targets(boxes, gt_boxes, gt_labels): ...@@ -86,7 +88,9 @@ def sample_fast_rcnn_targets(boxes, gt_boxes, gt_labels):
fg_inds, bg_inds = sample_fg_bg(iou) fg_inds, bg_inds = sample_fg_bg(iou)
# fg,bg indices w.r.t proposals # fg,bg indices w.r.t proposals
best_iou_ind = tf.argmax(iou, axis=1) # #proposal, each in 0~m-1 best_iou_ind = tf.cond(tf.shape(iou)[1] > 0,
lambda: tf.argmax(iou, axis=1), # #proposal, each in 0~m-1
lambda: tf.zeros([tf.shape(iou)[0]], dtype=tf.int64))
fg_inds_wrt_gt = tf.gather(best_iou_ind, fg_inds) # num_fg fg_inds_wrt_gt = tf.gather(best_iou_ind, fg_inds) # num_fg
all_indices = tf.concat([fg_inds, bg_inds], axis=0) # indices w.r.t all n+m proposal boxes all_indices = tf.concat([fg_inds, bg_inds], axis=0) # indices w.r.t all n+m proposal boxes
......
...@@ -102,5 +102,5 @@ def unpackbits_masks(masks): ...@@ -102,5 +102,5 @@ def unpackbits_masks(masks):
unpacked = tf.bitwise.bitwise_and(tf.expand_dims(masks, -1), bits) > 0 unpacked = tf.bitwise.bitwise_and(tf.expand_dims(masks, -1), bits) > 0
unpacked = tf.reshape( unpacked = tf.reshape(
unpacked, unpacked,
tf.concat([tf.shape(masks)[:-1], [-1]], axis=0)) tf.concat([tf.shape(masks)[:-1], [8 * tf.shape(masks)[-1]]], axis=0))
return unpacked return unpacked
...@@ -45,8 +45,9 @@ class GPUUtilizationTracker(Callback): ...@@ -45,8 +45,9 @@ class GPUUtilizationTracker(Callback):
env = os.environ.get('CUDA_VISIBLE_DEVICES') env = os.environ.get('CUDA_VISIBLE_DEVICES')
if env is None: if env is None:
self._devices = list(range(get_num_gpu())) self._devices = list(range(get_num_gpu()))
logger.warn("[GPUUtilizationTracker] Both devices and CUDA_VISIBLE_DEVICES are None! " if len(self._devices) > 1:
"Will monitor all {} visible GPUs!".format(len(self._devices))) logger.warn("[GPUUtilizationTracker] Both devices and CUDA_VISIBLE_DEVICES are None! "
"Will monitor all {} visible GPUs!".format(len(self._devices)))
else: else:
if len(env): if len(env):
self._devices = list(map(int, env.split(','))) self._devices = list(map(int, env.split(',')))
......
...@@ -300,8 +300,11 @@ class SyncMultiGPUReplicatedBuilder(DataParallelBuilder): ...@@ -300,8 +300,11 @@ class SyncMultiGPUReplicatedBuilder(DataParallelBuilder):
grad_and_vars, name='apply_grad_{}'.format(idx))) grad_and_vars, name='apply_grad_{}'.format(idx)))
train_op = tf.group(*train_ops, name='train_op') train_op = tf.group(*train_ops, name='train_op')
with tf.name_scope('sync_variables'): if len(self.towers) > 1:
post_init_op = SyncMultiGPUReplicatedBuilder.get_post_init_ops() with tf.name_scope('sync_variables'):
post_init_op = SyncMultiGPUReplicatedBuilder.get_post_init_ops()
else:
post_init_op = tf.no_op(name='empty_sync_variables')
return train_op, post_init_op return train_op, post_init_op
# Adopt from https://github.com/tensorflow/benchmarks/blob/master/scripts/tf_cnn_benchmarks/variable_mgr.py # Adopt from https://github.com/tensorflow/benchmarks/blob/master/scripts/tf_cnn_benchmarks/variable_mgr.py
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment