Commit 2766bb5a authored by Yuxin Wu's avatar Yuxin Wu

[FasterRCNN] code clean-up and docs

parent 191d4691
......@@ -11,7 +11,7 @@ from tensorpack.utils import logger
from tensorpack.utils.argtools import memoized, log_once
from tensorpack.dataflow import (
MapData, imgaug, TestDataSpeed,
MapDataComponent, DataFromList)
MapDataComponent, DataFromList, PrefetchDataZMQ)
import tensorpack.utils.viz as tpviz
from tensorpack.utils.viz import interactive_imshow
......@@ -258,6 +258,7 @@ def get_train_dataflow(add_mask=False):
return ret
ds = MapData(ds, preprocess)
ds = PrefetchDataZMQ(ds, 1)
return ds
......@@ -271,6 +272,7 @@ def get_eval_dataflow():
assert im is not None, fname
return im
ds = MapDataComponent(ds, f, 0)
ds = PrefetchDataZMQ(ds, 1)
return ds
......
......@@ -91,8 +91,7 @@ def rpn_losses(anchor_labels, anchor_boxes, label_logits, box_logits):
box_loss,
tf.cast(nr_valid, tf.float32), name='box_loss')
for k in [label_loss, box_loss, nr_valid, nr_pos] + summaries:
add_moving_summary(k)
add_moving_summary(*([label_loss, box_loss, nr_valid, nr_pos] + summaries))
return label_loss, box_loss
......@@ -165,6 +164,7 @@ def generate_rpn_proposals(boxes, scores, img_shape):
boxes: kx4 float
scores: k logits
"""
assert boxes.shape.ndims == 2, boxes.shape
if get_current_tower_context().is_training:
PRE_NMS_TOPK = config.TRAIN_PRE_NMS_TOPK
POST_NMS_TOPK = config.TRAIN_POST_NMS_TOPK
......@@ -213,6 +213,8 @@ def generate_rpn_proposals(boxes, scores, img_shape):
@under_name_scope()
def proposal_metrics(iou):
"""
Add summaries for RPN proposals.
Args:
iou: nxm, #proposal x #gt
"""
......@@ -233,6 +235,8 @@ def proposal_metrics(iou):
@under_name_scope()
def sample_fast_rcnn_targets(boxes, gt_boxes, gt_labels):
"""
Sample some ROIs from all proposals for training.
Args:
boxes: nx4 region proposals, floatbox
gt_boxes: mx4, floatbox
......@@ -240,8 +244,9 @@ def sample_fast_rcnn_targets(boxes, gt_boxes, gt_labels):
Returns:
sampled_boxes: tx4 floatbox, the rois
target_boxes: tx4 encoded box, the regression target
labels: t labels
sampled_labels: t labels, in [0, #class-1]. Positive means foreground.
fg_inds_wrt_gt: #fg indices, each in range [0, m-1].
It contains the matching GT of each foreground roi.
"""
iou = pairwise_iou(boxes, gt_boxes) # nxm
proposal_metrics(iou)
......@@ -287,8 +292,8 @@ def sample_fast_rcnn_targets(boxes, gt_boxes, gt_labels):
@under_name_scope()
def crop_and_resize(image, boxes, box_ind, crop_size):
"""
Better-aligned version of tf.image.crop_and_resize,
following our definition of floating point boxes.
Better-aligned version of tf.image.crop_and_resize, following our definition of floating point boxes.
Args:
image: NCHW
boxes: nx4, x1y1x2y2
......@@ -349,9 +354,6 @@ def roi_align(featuremap, boxes, output_shape):
Returns:
NxCxoHxoW
"""
image_shape = tf.shape(featuremap)[2:]
boxes = tf.stop_gradient(boxes) # TODO
# sample 4 locations per roi bin
ret = crop_and_resize(
......@@ -407,6 +409,7 @@ def fastrcnn_losses(labels, label_logits, fg_boxes, fg_box_logits):
tf.to_int32(fg_labels) - 1], axis=1) # #fgx2
fg_box_logits = tf.gather_nd(fg_box_logits, indices)
# some metrics to summarize
fg_label_pred = tf.argmax(tf.gather(label_logits, fg_inds), axis=1)
num_zero = tf.reduce_sum(tf.to_int32(tf.equal(fg_label_pred, 0)), name='num_zero')
false_negative = tf.truediv(num_zero, num_fg, name='false_negative')
......@@ -418,8 +421,7 @@ def fastrcnn_losses(labels, label_logits, fg_boxes, fg_box_logits):
box_loss = tf.truediv(
box_loss, tf.to_float(tf.shape(labels)[0]), name='box_loss')
for k in [label_loss, box_loss, accuracy, fg_accuracy, false_negative]:
add_moving_summary(k)
add_moving_summary(label_loss, box_loss, accuracy, fg_accuracy, false_negative)
return label_loss, box_loss
......@@ -456,10 +458,10 @@ def fastrcnn_predictions(boxes, probs):
box, prob, config.RESULTS_PER_IM, config.FASTRCNN_NMS_THRESH)
selection = tf.to_int32(tf.gather(ids, selection))
# sort available in TF>1.4.0
# selection = tf.contrib.framework.sort(selection, direction='ASCENDING')
sorted_selection, _ = tf.nn.top_k(-selection, k=tf.size(selection))
# sorted_selection = tf.contrib.framework.sort(selection, direction='ASCENDING')
sorted_selection = -tf.nn.top_k(-selection, k=tf.size(selection))[0]
mask = tf.sparse_to_dense(
sparse_indices=-sorted_selection,
sparse_indices=sorted_selection,
output_shape=output_shape,
sparse_values=True,
default_value=False)
......
......@@ -15,11 +15,9 @@ import tensorflow as tf
os.environ['TENSORPACK_TRAIN_API'] = 'v2' # will become default soon
from tensorpack import *
import tensorpack.tfutils.symbolic_functions as symbf
from tensorpack.tfutils.summary import add_moving_summary
from tensorpack.tfutils import optimizer, gradproc
import tensorpack.utils.viz as tpviz
from tensorpack.utils.concurrency import subproc_call
from tensorpack.utils.gpu import get_nr_gpu
......@@ -88,8 +86,6 @@ class Model(ModelDesc):
anchor_boxes_encoded = encode_bbox_target(anchor_boxes, fm_anchors)
featuremap = pretrained_resnet_conv4(image, config.RESNET_NUM_BLOCK[:3])
rpn_label_logits, rpn_box_logits = rpn_head('rpn', featuremap, 1024, config.NUM_ANCHOR)
rpn_label_loss, rpn_box_loss = rpn_losses(
anchor_labels, anchor_boxes_encoded, rpn_label_logits, rpn_box_logits)
decoded_boxes = decode_bbox_target(rpn_box_logits, fm_anchors) # fHxfWxNAx4, floatbox
proposal_boxes, proposal_scores = generate_rpn_proposals(
......@@ -111,6 +107,11 @@ class Model(ModelDesc):
fastrcnn_label_logits, fastrcnn_box_logits = fastrcnn_head('fastrcnn', feature_fastrcnn, config.NUM_CLASS)
if is_training:
# rpn loss
rpn_label_loss, rpn_box_loss = rpn_losses(
anchor_labels, anchor_boxes_encoded, rpn_label_logits, rpn_box_logits)
# fastrcnn loss
fg_inds_wrt_sample = tf.reshape(tf.where(rcnn_labels > 0), [-1]) # fg inds w.r.t all samples
fg_sampled_boxes = tf.gather(rcnn_sampled_boxes, fg_inds_wrt_sample)
......@@ -142,7 +143,7 @@ class Model(ModelDesc):
tf.constant(config.FASTRCNN_BBOX_REG_WEIGHTS), anchors)
decoded_boxes = tf.identity(decoded_boxes, name='fastrcnn_all_boxes')
# Nx2. Each index into (#proposal, #category)
# indices: Nx2. Each index into (#proposal, #category)
pred_indices, final_probs = fastrcnn_predictions(decoded_boxes, label_probs)
final_probs = tf.identity(final_probs, 'final_probs')
final_boxes = tf.gather_nd(decoded_boxes, pred_indices, name='final_boxes')
......@@ -208,36 +209,18 @@ def visualize(model_path, nr_visualize=50, output_dir='output'):
pbar.update()
def offline_evaluate(model_path, output_file):
pred = OfflinePredictor(PredictConfig(
model=Model(),
session_init=get_model_loader(model_path),
input_names=['image'],
output_names=[
'final_boxes',
'final_probs',
'final_labels',
]))
def offline_evaluate(pred_func, output_file):
df = get_eval_dataflow()
df = PrefetchDataZMQ(df, 1)
all_results = eval_on_dataflow(df, lambda img: detect_one_image(img, pred))
all_results = eval_on_dataflow(
df, lambda img: detect_one_image(img, pred_func))
with open(output_file, 'w') as f:
json.dump(all_results, f)
print_evaluation_scores(output_file)
def predict(model_path, input_file):
pred = OfflinePredictor(PredictConfig(
model=Model(),
session_init=get_model_loader(model_path),
input_names=['image'],
output_names=[
'final_boxes',
'final_probs',
'final_labels',
]))
def predict(pred_func, input_file):
img = cv2.imread(input_file, cv2.IMREAD_COLOR)
results = detect_one_image(img, pred)
results = detect_one_image(img, pred_func)
final = draw_final_outputs(img, results)
viz = np.concatenate((img, final), axis=1)
tpviz.interactive_imshow(viz)
......@@ -247,10 +230,8 @@ class EvalCallback(Callback):
def _setup_graph(self):
self.pred = self.trainer.get_predictor(
['image'],
['final_boxes',
'final_probs',
'final_labels'])
self.df = PrefetchDataZMQ(get_eval_dataflow(), 1)
['final_boxes', 'final_probs', 'final_labels'])
self.df = get_eval_dataflow()
def _before_train(self):
EVAL_TIMES = 5 # eval 5 times during training
......@@ -288,18 +269,29 @@ if __name__ == '__main__':
os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
if args.visualize or args.evaluate or args.predict:
# autotune is too slow for inference
os.environ['TF_CUDNN_USE_AUTOTUNE'] = '0'
assert args.load
print_config()
if args.visualize:
visualize(args.load)
elif args.evaluate:
assert args.evaluate.endswith('.json')
# autotune is too slow for inference
os.environ['TF_CUDNN_USE_AUTOTUNE'] = '0'
offline_evaluate(args.load, args.evaluate)
elif args.predict:
COCODetection(config.BASEDIR, 'train2014') # to load the class names into caches
predict(args.load, args.predict)
else:
pred = OfflinePredictor(PredictConfig(
model=Model(),
session_init=get_model_loader(args.load),
input_names=['image'],
output_names=[
'final_boxes',
'final_probs',
'final_labels',
]))
if args.evaluate:
assert args.evaluate.endswith('.json')
offline_evaluate(pred, args.evaluate)
elif args.predict:
COCODetection(config.BASEDIR, 'train2014') # to load the class names into caches
predict(pred, args.predict)
else:
logger.set_logger_dir(args.logdir)
print_config()
......@@ -322,7 +314,6 @@ if __name__ == '__main__':
[(warmup_epoch * factor, 1e-2),
(150000 * factor // stepnum, 1e-3),
(210000 * factor // stepnum, 1e-4)]),
HumanHyperParamSetter('learning_rate'),
EvalCallback(),
GPUUtilizationTracker(),
],
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment