[FasterRCNN] postprocess return indices & use simpler results format

dafdabf8 · Yuxin Wu · d3d368ff · dafdabf8 · dafdabf8 · dafdabf8
Commit dafdabf8 authored Nov 14, 2017 by Yuxin Wu
4 changed files
--- a/examples/FasterRCNN/eval.py
+++ b/examples/FasterRCNN/eval.py
@@ -23,7 +23,12 @@ import config
 DetectionResult = namedtuple(
    'DetectionResult',
-    ['class_id', 'boxes', 'scores'])
+    ['class_id', 'box', 'score'])
+"""
+class_id: int, 1~NUM_CLASS
+box: 4 float
+score: float
+"""
 def detect_one_image(img, model_func):
@@ -39,25 +44,15 @@ def detect_one_image(img, model_func):
        [DetectionResult]
    """
-    def group_results_by_class(boxes, probs, labels):
-        dic = defaultdict(list)
-        for box, prob, lab in zip(boxes, probs, labels):
-            dic[lab].append((box, prob))
-        def mapf(lab, values):
-            boxes = np.asarray([k[0] for k in values])
-            probs = np.asarray([k[1] for k in values])
-            return DetectionResult(lab, boxes, probs)
-        return [mapf(k, v) for k, v in six.iteritems(dic)]
    resizer = CustomResize(config.SHORT_EDGE_SIZE, config.MAX_SIZE)
    resized_img = resizer.augment(img)
    scale = (resized_img.shape[0] * 1.0 / img.shape[0] + resized_img.shape[1] * 1.0 / img.shape[1]) / 2
    boxes, probs, labels = model_func(resized_img)
    boxes = boxes / scale
    boxes = clip_boxes(boxes, img.shape[:2])
-    return group_results_by_class(boxes, probs, labels)
+    results = [DetectionResult(*args) for args in zip(labels, boxes, probs)]
+    return results
 def eval_on_dataflow(df, detect_func):
@@ -74,17 +69,16 @@ def eval_on_dataflow(df, detect_func):
    with tqdm.tqdm(total=df.size(), **get_tqdm_kwargs()) as pbar:
        for img, img_id in df.get_data():
            results = detect_func(img)
-            for classid, boxes, scores in results:
+            for classid, box, score in results:
                cat_id = COCOMeta.class_id_to_category_id[classid]
-                boxes[:, 2] -= boxes[:, 0]
+                box[2] -= box[0]
-                boxes[:, 3] -= boxes[:, 1]
+                box[3] -= box[1]
-                for box, score in zip(boxes, scores):
+                all_results.append({
-                    all_results.append({
+                    'image_id': img_id,
-                        'image_id': img_id,
+                    'category_id': cat_id,
-                        'category_id': cat_id,
+                    'bbox': list(map(lambda x: float(round(x, 1)), box)),
-                        'bbox': list(map(lambda x: float(round(x, 1)), box)),
+                    'score': float(round(score, 2)),
-                        'score': float(round(score, 2)),
+                })
-                    })
            pbar.update(1)
    return all_results

--- a/examples/FasterRCNN/model.py
+++ b/examples/FasterRCNN/model.py
@@ -468,16 +468,13 @@ def fastrcnn_predictions(boxes, probs):
    masks = tf.map_fn(f, (probs, boxes), dtype=tf.bool,
                      parallel_iterations=10)     # #cat x N
    selected_indices = tf.where(masks)  # #selection x 2, each is (cat_id, box_id)
-    boxes = tf.boolean_mask(boxes, masks)   # #selection x 4
    probs = tf.boolean_mask(probs, masks)
-    labels = selected_indices[:, 0] + 1
    # filter again by sorting scores
    topk_probs, topk_indices = tf.nn.top_k(
        probs,
        tf.minimum(config.RESULTS_PER_IM, tf.size(probs)),
        sorted=False)
-    topk_probs = tf.identity(topk_probs, name='probs')
+    filtered_selection = tf.gather(selected_indices, topk_indices)
-    topk_boxes = tf.gather(boxes, topk_indices, name='boxes')
+    filtered_selection = tf.reverse(filtered_selection, axis=[1], name='filtered_indices')
-    topk_labels = tf.gather(labels, topk_indices, name='labels')
+    return filtered_selection, topk_probs
-    return topk_boxes, topk_probs, topk_labels
--- a/examples/FasterRCNN/train.py
+++ b/examples/FasterRCNN/train.py
@@ -142,7 +142,11 @@ class Model(ModelDesc):
                tf.constant(config.FASTRCNN_BBOX_REG_WEIGHTS), anchors)
            decoded_boxes = tf.identity(decoded_boxes, name='fastrcnn_all_boxes')
-            pred_boxes, pred_probs, pred_labels = fastrcnn_predictions(decoded_boxes, label_probs)
+            # Nx2. Each index into (#proposal, #category)
+            pred_indices, final_probs = fastrcnn_predictions(decoded_boxes, label_probs)
+            final_probs = tf.identity(final_probs, 'final_probs')
+            final_boxes = tf.gather_nd(decoded_boxes, pred_indices, name='final_boxes')
+            final_labels = tf.add(pred_indices[:, 1], 1, name='final_labels')
    def _get_optimizer(self):
        lr = tf.get_variable('learning_rate', initializer=0.003, trainable=False)
@@ -167,9 +171,9 @@ def visualize(model_path, nr_visualize=50, output_dir='output'):
            'generate_rpn_proposals/boxes',
            'generate_rpn_proposals/probs',
            'fastrcnn_all_probs',
-            'fastrcnn_predictions/boxes',
+            'final_boxes',
-            'fastrcnn_predictions/probs',
+            'final_probs',
-            'fastrcnn_predictions/labels',
+            'final_labels',
        ]))
    df = get_train_dataflow()
    df.reset_state()
@@ -191,7 +195,8 @@ def visualize(model_path, nr_visualize=50, output_dir='output'):
            # draw the scores for the above proposals
            score_viz = draw_predictions(img, rpn_boxes[good_proposals_ind], all_probs[good_proposals_ind])
-            final_viz = draw_final_outputs(img, final_boxes, final_probs, final_labels)
+            results = [DetectionResult(*args) for args in zip(final_labels, final_boxes, final_probs)]
+            final_viz = draw_final_outputs(img, results)
            viz = tpviz.stack_patches([
                gt_viz, proposal_viz,
@@ -209,9 +214,9 @@ def offline_evaluate(model_path, output_file):
        session_init=get_model_loader(model_path),
        input_names=['image'],
        output_names=[
-            'fastrcnn_predictions/boxes',
+            'final_boxes',
-            'fastrcnn_predictions/probs',
+            'final_probs',
-            'fastrcnn_predictions/labels',
+            'final_labels',
        ]))
    df = get_eval_dataflow()
    df = PrefetchDataZMQ(df, 1)
@@ -227,9 +232,9 @@ def predict(model_path, input_file):
        session_init=get_model_loader(model_path),
        input_names=['image'],
        output_names=[
-            'fastrcnn_predictions/boxes',
+            'final_boxes',
-            'fastrcnn_predictions/probs',
+            'final_probs',
-            'fastrcnn_predictions/labels',
+            'final_labels',
        ]))
    img = cv2.imread(input_file, cv2.IMREAD_COLOR)
    results = detect_one_image(img, pred)
@@ -242,9 +247,9 @@ class EvalCallback(Callback):
    def _setup_graph(self):
        self.pred = self.trainer.get_predictor(
            ['image'],
-            ['fastrcnn_predictions/boxes',
+            ['final_boxes',
-             'fastrcnn_predictions/probs',
+             'final_probs',
-             'fastrcnn_predictions/labels'])
+             'final_labels'])
        self.df = PrefetchDataZMQ(get_eval_dataflow(), 1)
    def _before_train(self):

--- a/examples/FasterRCNN/viz.py
+++ b/examples/FasterRCNN/viz.py
@@ -63,19 +63,20 @@ def draw_predictions(img, boxes, scores):
    return viz.draw_boxes(img, boxes, tags)
-def draw_final_outputs(img, final_boxes, final_probs, final_labels):
+def draw_final_outputs(img, results):
    """
    Args:
        results: [DetectionResult]
    """
-    if final_boxes.shape[0] == 0:
+    if len(results) == 0:
        return img
    tags = []
-    for prob, label in zip(final_probs, final_labels):
+    for label, _, score in results:
        tags.append(
-            "{},{:.2f}".format(COCOMeta.class_names[label], prob))
+            "{},{:.2f}".format(COCOMeta.class_names[label], score))
-    return viz.draw_boxes(img, final_boxes, tags)
+    boxes = np.asarray([x.box for x in results])
+    return viz.draw_boxes(img, boxes, tags)
 def draw_mask(im, mask, alpha=0.5, color=None):