[FastRCNN] decode_bbox_target preserve shape

e37e91e8 · Yuxin Wu · df869711 · e37e91e8 · e37e91e8 · e37e91e8
Commit e37e91e8 authored Nov 13, 2017 by Yuxin Wu
4 changed files
--- a/examples/FasterRCNN/config.py
+++ b/examples/FasterRCNN/config.py
@@ -41,7 +41,7 @@ TRAIN_POST_NMS_TOPK = 2000
 CROWD_OVERLAP_THRES = 0.7

 # fastrcnn training ---------------------
-FASTRCNN_BATCH_PER_IM = 64
+FASTRCNN_BATCH_PER_IM = 256
 FASTRCNN_BBOX_REG_WEIGHTS = np.array([10, 10, 5, 5], dtype='float32')
 FASTRCNN_FG_THRESH = 0.5
 # keep fg ratio in a batch in this range

--- a/examples/FasterRCNN/eval.py
+++ b/examples/FasterRCNN/eval.py
@@ -53,7 +53,6 @@ def nms_fastrcnn_results(boxes, probs):
    C = probs.shape[1]
    boxes = boxes.copy()

-    boxes_per_class = {}
    nms_func = get_tf_nms(config.RESULTS_PER_IM, config.FASTRCNN_NMS_THRESH)
    ret = []
    for klass in range(1, C):

--- a/examples/FasterRCNN/model.py
+++ b/examples/FasterRCNN/model.py
@@ -100,15 +100,16 @@ def rpn_losses(anchor_labels, anchor_boxes, label_logits, box_logits):
 def decode_bbox_target(box_predictions, anchors):
    """
    Args:
-        box_predictions: fHxfWxNAx4, logits
-        anchors: fHxfWxNAx4, floatbox
+        box_predictions: (..., 4), logits
+        anchors: (..., 4), floatbox. Must have the same shape

    Returns:
-        box_decoded: (fHxfWxNA)x4, float32
+        box_decoded: (..., 4), float32. With the same shape.
    """
+    orig_shape = tf.shape(anchors)
    box_pred_txtytwth = tf.reshape(box_predictions, (-1, 2, 2))
    box_pred_txty, box_pred_twth = tf.split(box_pred_txtytwth, 2, axis=1)
-    # each is (fHxfWxNA)x1x2
+    # each is (...)x1x2
    anchors_x1y1x2y2 = tf.reshape(anchors, (-1, 2, 2))
    anchors_x1y1, anchors_x2y2 = tf.split(anchors_x1y1x2y2, 2, axis=1)

@@ -119,9 +120,9 @@ def decode_bbox_target(box_predictions, anchors):
        box_pred_twth, config.BBOX_DECODE_CLIP)) * waha
    xbyb = box_pred_txty * waha + xaya
    x1y1 = xbyb - wbhb * 0.5
-    x2y2 = xbyb + wbhb * 0.5
-    out = tf.squeeze(tf.concat([x1y1, x2y2], axis=2), axis=1, name='output')
-    return out
+    x2y2 = xbyb + wbhb * 0.5    # (...)x1x2
+    out = tf.concat([x1y1, x2y2], axis=1)
+    return tf.reshape(out, orig_shape)


 @under_name_scope()

--- a/examples/FasterRCNN/train.py
+++ b/examples/FasterRCNN/train.py
@@ -39,7 +39,7 @@ from viz import (
    draw_predictions, draw_final_outputs)
 from common import clip_boxes, CustomResize, print_config
 from eval import (
-    eval_on_dataflow, detect_one_image, print_evaluation_scores, get_tf_nms,
+    eval_on_dataflow, detect_one_image, print_evaluation_scores,
    nms_fastrcnn_results)
 import config

@@ -92,9 +92,9 @@ class Model(ModelDesc):
        rpn_label_loss, rpn_box_loss = rpn_losses(
            anchor_labels, anchor_boxes_encoded, rpn_label_logits, rpn_box_logits)

-        decoded_boxes = decode_bbox_target(rpn_box_logits, fm_anchors)  # (fHxfWxNA)x4, floatbox
+        decoded_boxes = decode_bbox_target(rpn_box_logits, fm_anchors)  # fHxfWxNAx4, floatbox
        proposal_boxes, proposal_scores = generate_rpn_proposals(
-            decoded_boxes,
+            tf.reshape(decoded_boxes, [-1, 4]),
            tf.reshape(rpn_label_logits, [-1]),
            tf.shape(image)[2:])

@@ -127,14 +127,14 @@ class Model(ModelDesc):
            for k in self.cost, wd_cost:
                add_moving_summary(k)
        else:
-            label_probs = tf.nn.softmax(fastrcnn_label_logits, name='fastrcnn_all_probs')  # NP,
+            label_probs = tf.nn.softmax(fastrcnn_label_logits, name='fastrcnn_all_probs')  # #proposal x #Class
            labels = tf.argmax(fastrcnn_label_logits, axis=1)
            fg_ind, fg_box_logits = fastrcnn_predict_boxes(labels, fastrcnn_box_logits)
            fg_label_probs = tf.gather(label_probs, fg_ind, name='fastrcnn_fg_probs')
            fg_boxes = tf.gather(proposal_boxes, fg_ind)

            fg_box_logits = fg_box_logits / tf.constant(config.FASTRCNN_BBOX_REG_WEIGHTS)
-            decoded_boxes = decode_bbox_target(fg_box_logits, fg_boxes)  # Nfx4, floatbox
+            decoded_boxes = decode_bbox_target(fg_box_logits, fg_boxes)  # #fgx4, floatbox
            decoded_boxes = tf.identity(decoded_boxes, name='fastrcnn_fg_boxes')

    def _get_optimizer(self):