Commit dafdabf8 authored by Yuxin Wu's avatar Yuxin Wu

[FasterRCNN] postprocess return indices & use simpler results format

parent d3d368ff
...@@ -23,7 +23,12 @@ import config ...@@ -23,7 +23,12 @@ import config
DetectionResult = namedtuple( DetectionResult = namedtuple(
'DetectionResult', 'DetectionResult',
['class_id', 'boxes', 'scores']) ['class_id', 'box', 'score'])
"""
class_id: int, 1~NUM_CLASS
box: 4 float
score: float
"""
def detect_one_image(img, model_func): def detect_one_image(img, model_func):
...@@ -39,25 +44,15 @@ def detect_one_image(img, model_func): ...@@ -39,25 +44,15 @@ def detect_one_image(img, model_func):
[DetectionResult] [DetectionResult]
""" """
def group_results_by_class(boxes, probs, labels):
dic = defaultdict(list)
for box, prob, lab in zip(boxes, probs, labels):
dic[lab].append((box, prob))
def mapf(lab, values):
boxes = np.asarray([k[0] for k in values])
probs = np.asarray([k[1] for k in values])
return DetectionResult(lab, boxes, probs)
return [mapf(k, v) for k, v in six.iteritems(dic)]
resizer = CustomResize(config.SHORT_EDGE_SIZE, config.MAX_SIZE) resizer = CustomResize(config.SHORT_EDGE_SIZE, config.MAX_SIZE)
resized_img = resizer.augment(img) resized_img = resizer.augment(img)
scale = (resized_img.shape[0] * 1.0 / img.shape[0] + resized_img.shape[1] * 1.0 / img.shape[1]) / 2 scale = (resized_img.shape[0] * 1.0 / img.shape[0] + resized_img.shape[1] * 1.0 / img.shape[1]) / 2
boxes, probs, labels = model_func(resized_img) boxes, probs, labels = model_func(resized_img)
boxes = boxes / scale boxes = boxes / scale
boxes = clip_boxes(boxes, img.shape[:2]) boxes = clip_boxes(boxes, img.shape[:2])
return group_results_by_class(boxes, probs, labels)
results = [DetectionResult(*args) for args in zip(labels, boxes, probs)]
return results
def eval_on_dataflow(df, detect_func): def eval_on_dataflow(df, detect_func):
...@@ -74,17 +69,16 @@ def eval_on_dataflow(df, detect_func): ...@@ -74,17 +69,16 @@ def eval_on_dataflow(df, detect_func):
with tqdm.tqdm(total=df.size(), **get_tqdm_kwargs()) as pbar: with tqdm.tqdm(total=df.size(), **get_tqdm_kwargs()) as pbar:
for img, img_id in df.get_data(): for img, img_id in df.get_data():
results = detect_func(img) results = detect_func(img)
for classid, boxes, scores in results: for classid, box, score in results:
cat_id = COCOMeta.class_id_to_category_id[classid] cat_id = COCOMeta.class_id_to_category_id[classid]
boxes[:, 2] -= boxes[:, 0] box[2] -= box[0]
boxes[:, 3] -= boxes[:, 1] box[3] -= box[1]
for box, score in zip(boxes, scores): all_results.append({
all_results.append({ 'image_id': img_id,
'image_id': img_id, 'category_id': cat_id,
'category_id': cat_id, 'bbox': list(map(lambda x: float(round(x, 1)), box)),
'bbox': list(map(lambda x: float(round(x, 1)), box)), 'score': float(round(score, 2)),
'score': float(round(score, 2)), })
})
pbar.update(1) pbar.update(1)
return all_results return all_results
......
...@@ -468,16 +468,13 @@ def fastrcnn_predictions(boxes, probs): ...@@ -468,16 +468,13 @@ def fastrcnn_predictions(boxes, probs):
masks = tf.map_fn(f, (probs, boxes), dtype=tf.bool, masks = tf.map_fn(f, (probs, boxes), dtype=tf.bool,
parallel_iterations=10) # #cat x N parallel_iterations=10) # #cat x N
selected_indices = tf.where(masks) # #selection x 2, each is (cat_id, box_id) selected_indices = tf.where(masks) # #selection x 2, each is (cat_id, box_id)
boxes = tf.boolean_mask(boxes, masks) # #selection x 4
probs = tf.boolean_mask(probs, masks) probs = tf.boolean_mask(probs, masks)
labels = selected_indices[:, 0] + 1
# filter again by sorting scores # filter again by sorting scores
topk_probs, topk_indices = tf.nn.top_k( topk_probs, topk_indices = tf.nn.top_k(
probs, probs,
tf.minimum(config.RESULTS_PER_IM, tf.size(probs)), tf.minimum(config.RESULTS_PER_IM, tf.size(probs)),
sorted=False) sorted=False)
topk_probs = tf.identity(topk_probs, name='probs') filtered_selection = tf.gather(selected_indices, topk_indices)
topk_boxes = tf.gather(boxes, topk_indices, name='boxes') filtered_selection = tf.reverse(filtered_selection, axis=[1], name='filtered_indices')
topk_labels = tf.gather(labels, topk_indices, name='labels') return filtered_selection, topk_probs
return topk_boxes, topk_probs, topk_labels
...@@ -142,7 +142,11 @@ class Model(ModelDesc): ...@@ -142,7 +142,11 @@ class Model(ModelDesc):
tf.constant(config.FASTRCNN_BBOX_REG_WEIGHTS), anchors) tf.constant(config.FASTRCNN_BBOX_REG_WEIGHTS), anchors)
decoded_boxes = tf.identity(decoded_boxes, name='fastrcnn_all_boxes') decoded_boxes = tf.identity(decoded_boxes, name='fastrcnn_all_boxes')
pred_boxes, pred_probs, pred_labels = fastrcnn_predictions(decoded_boxes, label_probs) # Nx2. Each index into (#proposal, #category)
pred_indices, final_probs = fastrcnn_predictions(decoded_boxes, label_probs)
final_probs = tf.identity(final_probs, 'final_probs')
final_boxes = tf.gather_nd(decoded_boxes, pred_indices, name='final_boxes')
final_labels = tf.add(pred_indices[:, 1], 1, name='final_labels')
def _get_optimizer(self): def _get_optimizer(self):
lr = tf.get_variable('learning_rate', initializer=0.003, trainable=False) lr = tf.get_variable('learning_rate', initializer=0.003, trainable=False)
...@@ -167,9 +171,9 @@ def visualize(model_path, nr_visualize=50, output_dir='output'): ...@@ -167,9 +171,9 @@ def visualize(model_path, nr_visualize=50, output_dir='output'):
'generate_rpn_proposals/boxes', 'generate_rpn_proposals/boxes',
'generate_rpn_proposals/probs', 'generate_rpn_proposals/probs',
'fastrcnn_all_probs', 'fastrcnn_all_probs',
'fastrcnn_predictions/boxes', 'final_boxes',
'fastrcnn_predictions/probs', 'final_probs',
'fastrcnn_predictions/labels', 'final_labels',
])) ]))
df = get_train_dataflow() df = get_train_dataflow()
df.reset_state() df.reset_state()
...@@ -191,7 +195,8 @@ def visualize(model_path, nr_visualize=50, output_dir='output'): ...@@ -191,7 +195,8 @@ def visualize(model_path, nr_visualize=50, output_dir='output'):
# draw the scores for the above proposals # draw the scores for the above proposals
score_viz = draw_predictions(img, rpn_boxes[good_proposals_ind], all_probs[good_proposals_ind]) score_viz = draw_predictions(img, rpn_boxes[good_proposals_ind], all_probs[good_proposals_ind])
final_viz = draw_final_outputs(img, final_boxes, final_probs, final_labels) results = [DetectionResult(*args) for args in zip(final_labels, final_boxes, final_probs)]
final_viz = draw_final_outputs(img, results)
viz = tpviz.stack_patches([ viz = tpviz.stack_patches([
gt_viz, proposal_viz, gt_viz, proposal_viz,
...@@ -209,9 +214,9 @@ def offline_evaluate(model_path, output_file): ...@@ -209,9 +214,9 @@ def offline_evaluate(model_path, output_file):
session_init=get_model_loader(model_path), session_init=get_model_loader(model_path),
input_names=['image'], input_names=['image'],
output_names=[ output_names=[
'fastrcnn_predictions/boxes', 'final_boxes',
'fastrcnn_predictions/probs', 'final_probs',
'fastrcnn_predictions/labels', 'final_labels',
])) ]))
df = get_eval_dataflow() df = get_eval_dataflow()
df = PrefetchDataZMQ(df, 1) df = PrefetchDataZMQ(df, 1)
...@@ -227,9 +232,9 @@ def predict(model_path, input_file): ...@@ -227,9 +232,9 @@ def predict(model_path, input_file):
session_init=get_model_loader(model_path), session_init=get_model_loader(model_path),
input_names=['image'], input_names=['image'],
output_names=[ output_names=[
'fastrcnn_predictions/boxes', 'final_boxes',
'fastrcnn_predictions/probs', 'final_probs',
'fastrcnn_predictions/labels', 'final_labels',
])) ]))
img = cv2.imread(input_file, cv2.IMREAD_COLOR) img = cv2.imread(input_file, cv2.IMREAD_COLOR)
results = detect_one_image(img, pred) results = detect_one_image(img, pred)
...@@ -242,9 +247,9 @@ class EvalCallback(Callback): ...@@ -242,9 +247,9 @@ class EvalCallback(Callback):
def _setup_graph(self): def _setup_graph(self):
self.pred = self.trainer.get_predictor( self.pred = self.trainer.get_predictor(
['image'], ['image'],
['fastrcnn_predictions/boxes', ['final_boxes',
'fastrcnn_predictions/probs', 'final_probs',
'fastrcnn_predictions/labels']) 'final_labels'])
self.df = PrefetchDataZMQ(get_eval_dataflow(), 1) self.df = PrefetchDataZMQ(get_eval_dataflow(), 1)
def _before_train(self): def _before_train(self):
......
...@@ -63,19 +63,20 @@ def draw_predictions(img, boxes, scores): ...@@ -63,19 +63,20 @@ def draw_predictions(img, boxes, scores):
return viz.draw_boxes(img, boxes, tags) return viz.draw_boxes(img, boxes, tags)
def draw_final_outputs(img, final_boxes, final_probs, final_labels): def draw_final_outputs(img, results):
""" """
Args: Args:
results: [DetectionResult] results: [DetectionResult]
""" """
if final_boxes.shape[0] == 0: if len(results) == 0:
return img return img
tags = [] tags = []
for prob, label in zip(final_probs, final_labels): for label, _, score in results:
tags.append( tags.append(
"{},{:.2f}".format(COCOMeta.class_names[label], prob)) "{},{:.2f}".format(COCOMeta.class_names[label], score))
return viz.draw_boxes(img, final_boxes, tags) boxes = np.asarray([x.box for x in results])
return viz.draw_boxes(img, boxes, tags)
def draw_mask(im, mask, alpha=0.5, color=None): def draw_mask(im, mask, alpha=0.5, color=None):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment