Commit 0ef50616 authored by Yuxin Wu's avatar Yuxin Wu

fix fasterrcnn linting

parent 23446308
...@@ -27,18 +27,20 @@ def image_preprocess(image, bgr=True): ...@@ -27,18 +27,20 @@ def image_preprocess(image, bgr=True):
image = (image - image_mean) / image_std image = (image - image_mean) / image_std
return image return image
def get_bn(zero_init=False): def get_bn(zero_init=False):
if zero_init: if zero_init:
return lambda x, name: BatchNorm('bn', x, gamma_init=tf.zeros_initializer()) return lambda x, name: BatchNorm('bn', x, gamma_init=tf.zeros_initializer())
else: else:
return lambda x, name: BatchNorm('bn', x) return lambda x, name: BatchNorm('bn', x)
def resnet_shortcut(l, n_out, stride, nl=tf.identity): def resnet_shortcut(l, n_out, stride, nl=tf.identity):
data_format = get_arg_scope()['Conv2D']['data_format'] data_format = get_arg_scope()['Conv2D']['data_format']
n_in = l.get_shape().as_list()[1 if data_format == 'NCHW' else 3] n_in = l.get_shape().as_list()[1 if data_format == 'NCHW' else 3]
if n_in != n_out: # change dimension when channel is not the same if n_in != n_out: # change dimension when channel is not the same
if stride == 2 and 'group3' not in tf.get_variable_scope().name: if stride == 2 and 'group3' not in tf.get_variable_scope().name:
l = l[:,:,:-1,:-1] l = l[:, :, :-1, :-1]
return Conv2D('convshortcut', l, n_out, 1, return Conv2D('convshortcut', l, n_out, 1,
stride=stride, padding='VALID', nl=nl) stride=stride, padding='VALID', nl=nl)
else: else:
...@@ -52,7 +54,7 @@ def resnet_bottleneck(l, ch_out, stride): ...@@ -52,7 +54,7 @@ def resnet_bottleneck(l, ch_out, stride):
l, shortcut = l, l l, shortcut = l, l
l = Conv2D('conv1', l, ch_out, 1, nl=BNReLU) l = Conv2D('conv1', l, ch_out, 1, nl=BNReLU)
if stride == 2 and 'group3' not in tf.get_variable_scope().name: if stride == 2 and 'group3' not in tf.get_variable_scope().name:
l = tf.pad(l, [[0,0],[0,0],[0,1],[0,1]]) l = tf.pad(l, [[0, 0], [0, 0], [0, 1], [0, 1]])
l = Conv2D('conv2', l, ch_out, 3, stride=2, nl=BNReLU, padding='VALID') l = Conv2D('conv2', l, ch_out, 3, stride=2, nl=BNReLU, padding='VALID')
else: else:
l = Conv2D('conv2', l, ch_out, 3, stride=stride, nl=BNReLU) l = Conv2D('conv2', l, ch_out, 3, stride=stride, nl=BNReLU)
...@@ -70,14 +72,15 @@ def resnet_group(l, name, block_func, features, count, stride): ...@@ -70,14 +72,15 @@ def resnet_group(l, name, block_func, features, count, stride):
l = tf.nn.relu(l) l = tf.nn.relu(l)
return l return l
def pretrained_resnet_conv4(image, num_blocks): def pretrained_resnet_conv4(image, num_blocks):
assert len(num_blocks) == 3 assert len(num_blocks) == 3
with argscope([Conv2D, MaxPooling, BatchNorm], data_format='NCHW'), \ with argscope([Conv2D, MaxPooling, BatchNorm], data_format='NCHW'), \
argscope(Conv2D, nl=tf.identity, use_bias=False), \ argscope(Conv2D, nl=tf.identity, use_bias=False), \
argscope(BatchNorm, use_local_stat=False): argscope(BatchNorm, use_local_stat=False):
l = tf.pad(image, [[0,0],[0,0],[2,3],[2,3]]) l = tf.pad(image, [[0, 0], [0, 0], [2, 3], [2, 3]])
l = Conv2D('conv0', l, 64, 7, stride=2, nl=BNReLU, padding='VALID') l = Conv2D('conv0', l, 64, 7, stride=2, nl=BNReLU, padding='VALID')
l = tf.pad(l, [[0,0],[0,0],[0,1],[0,1]]) l = tf.pad(l, [[0, 0], [0, 0], [0, 1], [0, 1]])
l = MaxPooling('pool0', l, shape=3, stride=2, padding='VALID') l = MaxPooling('pool0', l, shape=3, stride=2, padding='VALID')
l = resnet_group(l, 'group0', resnet_bottleneck, 64, num_blocks[0], 1) l = resnet_group(l, 'group0', resnet_bottleneck, 64, num_blocks[0], 1)
# TODO replace var by const to enable folding # TODO replace var by const to enable folding
......
...@@ -18,6 +18,7 @@ __all__ = ['COCODetection', 'COCOMeta'] ...@@ -18,6 +18,7 @@ __all__ = ['COCODetection', 'COCOMeta']
COCO_NUM_CATEGORY = 80 COCO_NUM_CATEGORY = 80
class _COCOMeta(object): class _COCOMeta(object):
INSTANCE_TO_BASEDIR = { INSTANCE_TO_BASEDIR = {
'train2014': 'train2014', 'train2014': 'train2014',
...@@ -46,8 +47,10 @@ class _COCOMeta(object): ...@@ -46,8 +47,10 @@ class _COCOMeta(object):
self.class_id_to_category_id = { self.class_id_to_category_id = {
v: k for k, v in self.category_id_to_class_id.items()} v: k for k, v in self.category_id_to_class_id.items()}
COCOMeta = _COCOMeta() COCOMeta = _COCOMeta()
class COCODetection(object): class COCODetection(object):
def __init__(self, basedir, name): def __init__(self, basedir, name):
assert name in COCOMeta.INSTANCE_TO_BASEDIR.keys(), name assert name in COCOMeta.INSTANCE_TO_BASEDIR.keys(), name
...@@ -126,7 +129,7 @@ class COCODetection(object): ...@@ -126,7 +129,7 @@ class COCODetection(object):
valid_objs.append(obj) valid_objs.append(obj)
# all geometrically-valid boxes are returned # all geometrically-valid boxes are returned
boxes = np.asarray([obj['bbox'] for obj in valid_objs], dtype='float32') # (n, 4) boxes = np.asarray([obj['bbox'] for obj in valid_objs], dtype='float32') # (n, 4)
cls = np.asarray([ cls = np.asarray([
COCOMeta.category_id_to_class_id[obj['category_id']] COCOMeta.category_id_to_class_id[obj['category_id']]
for obj in valid_objs], dtype='int32') # (n,) for obj in valid_objs], dtype='int32') # (n,)
...@@ -172,4 +175,4 @@ if __name__ == '__main__': ...@@ -172,4 +175,4 @@ if __name__ == '__main__':
c = COCODetection('train') c = COCODetection('train')
gt_boxes = c.load() gt_boxes = c.load()
print("#Images:", len(gt_boxes)) print("#Images:", len(gt_boxes))
c.print_class_histogram(bb) c.print_class_histogram(gt_boxes)
...@@ -10,6 +10,7 @@ from tensorpack.utils import logger ...@@ -10,6 +10,7 @@ from tensorpack.utils import logger
import config import config
class DataFromListOfDict(RNGDataFlow): class DataFromListOfDict(RNGDataFlow):
def __init__(self, lst, keys, shuffle=False): def __init__(self, lst, keys, shuffle=False):
self._lst = lst self._lst = lst
...@@ -66,10 +67,11 @@ def box_to_point8(boxes): ...@@ -66,10 +67,11 @@ def box_to_point8(boxes):
Returns: Returns:
(nx4)x2 (nx4)x2
""" """
b = boxes[:,[0,1,2,3,0,3,2,1]] b = boxes[:, [0, 1, 2, 3, 0, 3, 2, 1]]
b = b.reshape((-1, 2)) b = b.reshape((-1, 2))
return b return b
def point8_to_box(points): def point8_to_box(points):
""" """
Args: Args:
...@@ -78,8 +80,8 @@ def point8_to_box(points): ...@@ -78,8 +80,8 @@ def point8_to_box(points):
nx4 boxes (x1y1x2y2) nx4 boxes (x1y1x2y2)
""" """
p = points.reshape((-1, 4, 2)) p = points.reshape((-1, 4, 2))
minxy = p.min(axis=1) #nx2 minxy = p.min(axis=1) # nx2
maxxy = p.max(axis=1) #nx2 maxxy = p.max(axis=1) # nx2
return np.concatenate((minxy, maxxy), axis=1) return np.concatenate((minxy, maxxy), axis=1)
...@@ -90,9 +92,9 @@ def clip_boxes(boxes, shape): ...@@ -90,9 +92,9 @@ def clip_boxes(boxes, shape):
shape: h, w shape: h, w
""" """
h, w = shape h, w = shape
boxes[:,[0,1]] = np.maximum(boxes[:,[0,1]], 0) boxes[:, [0, 1]] = np.maximum(boxes[:, [0, 1]], 0)
boxes[:,2] = np.minimum(boxes[:,2], w) boxes[:, 2] = np.minimum(boxes[:, 2], w)
boxes[:,3] = np.minimum(boxes[:,3], h) boxes[:, 3] = np.minimum(boxes[:, 3], h)
return boxes return boxes
......
...@@ -43,8 +43,8 @@ FASTRCNN_FG_THRESH = 0.5 ...@@ -43,8 +43,8 @@ FASTRCNN_FG_THRESH = 0.5
FASTRCNN_FG_RATIO = (0.1, 0.25) FASTRCNN_FG_RATIO = (0.1, 0.25)
# testing ----------------------- # testing -----------------------
TEST_PRE_NMS_TOPK= 6000 TEST_PRE_NMS_TOPK = 6000
TEST_POST_NMS_TOPK= 1000 TEST_POST_NMS_TOPK = 1000
FASTRCNN_NMS_THRESH = 0.5 FASTRCNN_NMS_THRESH = 0.5
RESULT_SCORE_THRESH = 0.05 RESULT_SCORE_THRESH = 0.05
RESULTS_PER_IM = 100 RESULTS_PER_IM = 100
...@@ -23,9 +23,11 @@ from common import ( ...@@ -23,9 +23,11 @@ from common import (
box_to_point8, point8_to_box) box_to_point8, point8_to_box)
import config import config
class MalformedData(BaseException): class MalformedData(BaseException):
pass pass
@memoized @memoized
def get_all_anchors(): def get_all_anchors():
""" """
...@@ -61,7 +63,7 @@ def get_all_anchors(): ...@@ -61,7 +63,7 @@ def get_all_anchors():
# FSxFSxAx4 # FSxFSxAx4
assert np.all(field_of_anchors == field_of_anchors.astype('int32')) assert np.all(field_of_anchors == field_of_anchors.astype('int32'))
field_of_anchors = field_of_anchors.astype('float32') field_of_anchors = field_of_anchors.astype('float32')
field_of_anchors[:,:,:,[2,3]] += 1 field_of_anchors[:, :, :, [2, 3]] += 1
return field_of_anchors return field_of_anchors
...@@ -91,10 +93,10 @@ def get_anchor_labels(anchors, gt_boxes, crowd_boxes): ...@@ -91,10 +93,10 @@ def get_anchor_labels(anchors, gt_boxes, crowd_boxes):
bbox_iou_float = get_iou_callable() bbox_iou_float = get_iou_callable()
NA, NB = len(anchors), len(gt_boxes) NA, NB = len(anchors), len(gt_boxes)
assert NB > 0 # empty images should have been filtered already assert NB > 0 # empty images should have been filtered already
box_ious = bbox_iou_float(anchors, gt_boxes) # NA x NB box_ious = bbox_iou_float(anchors, gt_boxes) # NA x NB
ious_argmax_per_anchor = box_ious.argmax(axis=1) # NA, ious_argmax_per_anchor = box_ious.argmax(axis=1) # NA,
ious_max_per_anchor = box_ious.max(axis=1) ious_max_per_anchor = box_ious.max(axis=1)
ious_max_per_gt = np.amax(box_ious, axis=0, keepdims=True) # 1xNB ious_max_per_gt = np.amax(box_ious, axis=0, keepdims=True) # 1xNB
# for each gt, find all those anchors (including ties) that has the max ious with it # for each gt, find all those anchors (including ties) that has the max ious with it
anchors_with_max_iou_per_gt = np.where(box_ious == ious_max_per_gt)[0] anchors_with_max_iou_per_gt = np.where(box_ious == ious_max_per_gt)[0]
...@@ -131,10 +133,11 @@ def get_anchor_labels(anchors, gt_boxes, crowd_boxes): ...@@ -131,10 +133,11 @@ def get_anchor_labels(anchors, gt_boxes, crowd_boxes):
# Set anchor boxes: the best gt_box for each fg anchor # Set anchor boxes: the best gt_box for each fg anchor
anchor_boxes = np.zeros((NA, 4), dtype='float32') anchor_boxes = np.zeros((NA, 4), dtype='float32')
fg_boxes = gt_boxes[ious_argmax_per_anchor[fg_inds],:] fg_boxes = gt_boxes[ious_argmax_per_anchor[fg_inds], :]
anchor_boxes[fg_inds, :] = fg_boxes anchor_boxes[fg_inds, :] = fg_boxes
return anchor_labels, anchor_boxes return anchor_labels, anchor_boxes
def get_rpn_anchor_input(im, boxes, klass, is_crowd): def get_rpn_anchor_input(im, boxes, klass, is_crowd):
""" """
Args: Args:
...@@ -157,21 +160,21 @@ def get_rpn_anchor_input(im, boxes, klass, is_crowd): ...@@ -157,21 +160,21 @@ def get_rpn_anchor_input(im, boxes, klass, is_crowd):
def filter_box_inside(im, boxes): def filter_box_inside(im, boxes):
h, w = im.shape[:2] h, w = im.shape[:2]
indices = np.where( indices = np.where(
(boxes[:,0] >= 0) & (boxes[:, 0] >= 0) &
(boxes[:,1] >= 0) & (boxes[:, 1] >= 0) &
(boxes[:,2] <= w) & (boxes[:, 2] <= w) &
(boxes[:,3] <= h))[0] (boxes[:, 3] <= h))[0]
return indices return indices
crowd_boxes = boxes[is_crowd == 1] crowd_boxes = boxes[is_crowd == 1]
non_crowd_boxes = boxes[is_crowd == 0] non_crowd_boxes = boxes[is_crowd == 0]
# fHxfWxAx4 # fHxfWxAx4
featuremap_anchors = ALL_ANCHORS[:featureH,:featureW,:,:] featuremap_anchors = ALL_ANCHORS[:featureH, :featureW, :, :]
featuremap_anchors_flatten = featuremap_anchors.reshape((-1, 4)) featuremap_anchors_flatten = featuremap_anchors.reshape((-1, 4))
# only use anchors inside the image # only use anchors inside the image
inside_ind = filter_box_inside(im, featuremap_anchors_flatten) inside_ind = filter_box_inside(im, featuremap_anchors_flatten)
inside_anchors = featuremap_anchors_flatten[inside_ind,:] inside_anchors = featuremap_anchors_flatten[inside_ind, :]
anchor_labels, anchor_boxes = get_anchor_labels(inside_anchors, non_crowd_boxes, crowd_boxes) anchor_labels, anchor_boxes = get_anchor_labels(inside_anchors, non_crowd_boxes, crowd_boxes)
...@@ -203,6 +206,7 @@ def read_and_augment_images(ds): ...@@ -203,6 +206,7 @@ def read_and_augment_images(ds):
augs = [CustomResize(config.SHORT_EDGE_SIZE, config.MAX_SIZE), augs = [CustomResize(config.SHORT_EDGE_SIZE, config.MAX_SIZE),
imgaug.Flip(horiz=True)] imgaug.Flip(horiz=True)]
ds = AugmentImageComponents(ds, augs, index=(0,), coords_index=(1,)) ds = AugmentImageComponents(ds, augs, index=(0,), coords_index=(1,))
def unmapf(points): def unmapf(points):
boxes = point8_to_box(points) boxes = point8_to_box(points)
return boxes return boxes
...@@ -241,10 +245,12 @@ def get_train_dataflow(): ...@@ -241,10 +245,12 @@ def get_train_dataflow():
ds = MapData(ds, add_anchor_to_dp) ds = MapData(ds, add_anchor_to_dp)
return ds return ds
def get_eval_dataflow(): def get_eval_dataflow():
imgs = COCODetection.load_many(config.BASEDIR, config.VAL_DATASET, add_gt=False) imgs = COCODetection.load_many(config.BASEDIR, config.VAL_DATASET, add_gt=False)
# no filter for training # no filter for training
ds = DataFromListOfDict(imgs, ['file_name', 'id']) ds = DataFromListOfDict(imgs, ['file_name', 'id'])
def f(fname): def f(fname):
im = cv2.imread(fname, cv2.IMREAD_COLOR) im = cv2.imread(fname, cv2.IMREAD_COLOR)
assert im is not None, fname assert im is not None, fname
...@@ -252,8 +258,8 @@ def get_eval_dataflow(): ...@@ -252,8 +258,8 @@ def get_eval_dataflow():
ds = MapDataComponent(ds, f, 0) ds = MapDataComponent(ds, f, 0)
return ds return ds
if __name__ == '__main__': if __name__ == '__main__':
#logger.setLevel(logging.DEBUG)
from tensorpack.dataflow import PrintData from tensorpack.dataflow import PrintData
ds = get_train_dataflow('/datasets01/COCO/060817') ds = get_train_dataflow('/datasets01/COCO/060817')
ds = PrintData(ds, 100) ds = PrintData(ds, 100)
...@@ -261,6 +267,3 @@ if __name__ == '__main__': ...@@ -261,6 +267,3 @@ if __name__ == '__main__':
ds.reset_state() ds.reset_state()
for k in ds.get_data(): for k in ds.get_data():
pass pass
#import IPython as IP; IP.embed()
...@@ -25,6 +25,7 @@ DetectionResult = namedtuple( ...@@ -25,6 +25,7 @@ DetectionResult = namedtuple(
'DetectionResult', 'DetectionResult',
['class_id', 'boxes', 'scores']) ['class_id', 'boxes', 'scores'])
@memoized @memoized
def get_tf_nms(): def get_tf_nms():
""" """
...@@ -59,8 +60,8 @@ def nms_fastrcnn_results(boxes, probs): ...@@ -59,8 +60,8 @@ def nms_fastrcnn_results(boxes, probs):
if ids.size == 0: if ids.size == 0:
continue continue
probs_k = probs[ids, klass].flatten() probs_k = probs[ids, klass].flatten()
boxes_k = boxes[ids,:] boxes_k = boxes[ids, :]
selected_ids = nms_func(boxes_k[:,[1,0,3,2]], probs_k) selected_ids = nms_func(boxes_k[:, [1, 0, 3, 2]], probs_k)
selected_boxes = boxes_k[selected_ids, :].copy() selected_boxes = boxes_k[selected_ids, :].copy()
ret.append(DetectionResult(klass, selected_boxes, probs_k[selected_ids])) ret.append(DetectionResult(klass, selected_boxes, probs_k[selected_ids]))
...@@ -73,7 +74,7 @@ def nms_fastrcnn_results(boxes, probs): ...@@ -73,7 +74,7 @@ def nms_fastrcnn_results(boxes, probs):
keep_ids = np.where(scores >= score_thresh)[0] keep_ids = np.where(scores >= score_thresh)[0]
if len(keep_ids): if len(keep_ids):
newret.append(DetectionResult( newret.append(DetectionResult(
klass, boxes[keep_ids,:], scores[keep_ids])) klass, boxes[keep_ids, :], scores[keep_ids]))
ret = newret ret = newret
return ret return ret
...@@ -115,8 +116,8 @@ def eval_on_dataflow(df, detect_func): ...@@ -115,8 +116,8 @@ def eval_on_dataflow(df, detect_func):
results = detect_func(img) results = detect_func(img)
for classid, boxes, scores in results: for classid, boxes, scores in results:
cat_id = COCOMeta.class_id_to_category_id[classid] cat_id = COCOMeta.class_id_to_category_id[classid]
boxes[:,2] -= boxes[:,0] boxes[:, 2] -= boxes[:, 0]
boxes[:,3] -= boxes[:,1] boxes[:, 3] -= boxes[:, 1]
for box, score in zip(boxes, scores): for box, score in zip(boxes, scores):
all_results.append({ all_results.append({
'image_id': img_id, 'image_id': img_id,
...@@ -138,13 +139,7 @@ def print_evaluation_scores(json_file): ...@@ -138,13 +139,7 @@ def print_evaluation_scores(json_file):
cocoDt = coco.loadRes(json_file) cocoDt = coco.loadRes(json_file)
imgIds = sorted(coco.getImgIds()) imgIds = sorted(coco.getImgIds())
cocoEval = COCOeval(coco, cocoDt, 'bbox') cocoEval = COCOeval(coco, cocoDt, 'bbox')
cocoEval.params.imgIds = imgIds cocoEval.params.imgIds = imgIds
cocoEval.evaluate() cocoEval.evaluate()
cocoEval.accumulate() cocoEval.accumulate()
cocoEval.summarize() cocoEval.summarize()
if __name__ == '__main__':
ds = get_eval_dataflow('/home/yuxinwu/data/COCO/')
print("Size: ", ds.size())
TestDataSpeed(ds, 1000).start()
...@@ -13,6 +13,7 @@ from tensorpack.models import Conv2D, FullyConnected ...@@ -13,6 +13,7 @@ from tensorpack.models import Conv2D, FullyConnected
from utils.box_ops import pairwise_iou from utils.box_ops import pairwise_iou
import config import config
def rpn_head(featuremap): def rpn_head(featuremap):
with tf.variable_scope('rpn'), \ with tf.variable_scope('rpn'), \
argscope(Conv2D, data_format='NCHW', argscope(Conv2D, data_format='NCHW',
...@@ -23,12 +24,12 @@ def rpn_head(featuremap): ...@@ -23,12 +24,12 @@ def rpn_head(featuremap):
box_logits = Conv2D('box', hidden, 4 * config.NR_ANCHOR, 1) box_logits = Conv2D('box', hidden, 4 * config.NR_ANCHOR, 1)
# 1, NA(*4), im/16, im/16 (NCHW) # 1, NA(*4), im/16, im/16 (NCHW)
label_logits = tf.transpose(label_logits, [0, 2, 3, 1]) # 1xfHxfWxNA label_logits = tf.transpose(label_logits, [0, 2, 3, 1]) # 1xfHxfWxNA
label_logits = tf.squeeze(label_logits, 0) # fHxfWxNA label_logits = tf.squeeze(label_logits, 0) # fHxfWxNA
shp = tf.shape(box_logits) # 1x(NAx4)xfHxfW shp = tf.shape(box_logits) # 1x(NAx4)xfHxfW
box_logits = tf.transpose(box_logits, [0, 2, 3, 1]) # 1xfHxfWx(NAx4) box_logits = tf.transpose(box_logits, [0, 2, 3, 1]) # 1xfHxfWx(NAx4)
box_logits = tf.reshape(box_logits, tf.stack([shp[2], shp[3], config.NR_ANCHOR, 4])) # fHxfWxNAx4 box_logits = tf.reshape(box_logits, tf.stack([shp[2], shp[3], config.NR_ANCHOR, 4])) # fHxfWxNAx4
return label_logits, box_logits return label_logits, box_logits
...@@ -61,14 +62,14 @@ def rpn_losses(anchor_labels, anchor_boxes, label_logits, box_logits): ...@@ -61,14 +62,14 @@ def rpn_losses(anchor_labels, anchor_boxes, label_logits, box_logits):
valid_prediction = tf.cast(valid_label_prob > th, tf.int32) valid_prediction = tf.cast(valid_label_prob > th, tf.int32)
prediction_corr = tf.count_nonzero(tf.equal(valid_prediction, valid_anchor_labels)) prediction_corr = tf.count_nonzero(tf.equal(valid_prediction, valid_anchor_labels))
pos_prediction_corr = tf.count_nonzero(tf.logical_and( pos_prediction_corr = tf.count_nonzero(tf.logical_and(
valid_label_prob > th, valid_label_prob > th,
tf.equal(valid_prediction, valid_anchor_labels))) tf.equal(valid_prediction, valid_anchor_labels)))
summaries.append(tf.truediv( summaries.append(tf.truediv(
pos_prediction_corr, pos_prediction_corr,
nr_pos, name='recall_th{}'.format(th))) nr_pos, name='recall_th{}'.format(th)))
summaries.append(tf.truediv( summaries.append(tf.truediv(
prediction_corr, prediction_corr,
nr_valid, name='accuracy_th{}'.format(th))) nr_valid, name='accuracy_th{}'.format(th)))
label_loss = tf.nn.sigmoid_cross_entropy_with_logits( label_loss = tf.nn.sigmoid_cross_entropy_with_logits(
labels=tf.to_float(valid_anchor_labels), logits=valid_label_logits) labels=tf.to_float(valid_anchor_labels), logits=valid_label_logits)
...@@ -116,6 +117,7 @@ def decode_bbox_target(box_predictions, anchors): ...@@ -116,6 +117,7 @@ def decode_bbox_target(box_predictions, anchors):
out = tf.squeeze(tf.concat([x1y1, x2y2], axis=2), axis=1, name='output') out = tf.squeeze(tf.concat([x1y1, x2y2], axis=2), axis=1, name='output')
return out return out
@under_name_scope() @under_name_scope()
def encode_bbox_target(boxes, anchors): def encode_bbox_target(boxes, anchors):
""" """
...@@ -179,11 +181,10 @@ def generate_rpn_proposals(boxes, scores, img_shape): ...@@ -179,11 +181,10 @@ def generate_rpn_proposals(boxes, scores, img_shape):
topk_boxes_x1y1, topk_boxes_x2y2 = tf.split(topk_boxes_x1y1x2y2, 2, axis=1) topk_boxes_x1y1, topk_boxes_x2y2 = tf.split(topk_boxes_x1y1x2y2, 2, axis=1)
# nx1x2 each # nx1x2 each
wbhb = tf.squeeze(topk_boxes_x2y2 - topk_boxes_x1y1, axis=1) wbhb = tf.squeeze(topk_boxes_x2y2 - topk_boxes_x1y1, axis=1)
valid = tf.reduce_all(wbhb > config.RPN_MIN_SIZE, axis=1) #n, valid = tf.reduce_all(wbhb > config.RPN_MIN_SIZE, axis=1) # n,
topk_valid_boxes_x1y1x2y2 = tf.boolean_mask(topk_boxes_x1y1x2y2, valid) topk_valid_boxes_x1y1x2y2 = tf.boolean_mask(topk_boxes_x1y1x2y2, valid)
topk_valid_scores = tf.boolean_mask(topk_scores, valid) topk_valid_scores = tf.boolean_mask(topk_scores, valid)
topk_valid_boxes_y1x1y2x2 = tf.reshape( topk_valid_boxes_y1x1y2x2 = tf.reshape(
tf.reverse(topk_valid_boxes_x1y1x2y2, axis=[2]), tf.reverse(topk_valid_boxes_x1y1x2y2, axis=[2]),
(-1, 4), name='nms_input_boxes') (-1, 4), name='nms_input_boxes')
...@@ -228,7 +229,7 @@ def sample_fast_rcnn_targets(boxes, gt_boxes, gt_labels): ...@@ -228,7 +229,7 @@ def sample_fast_rcnn_targets(boxes, gt_boxes, gt_labels):
# find best gt box for each roi box # find best gt box for each roi box
best_iou_ind = tf.argmax(iou, axis=1) # n, each in 1~m best_iou_ind = tf.argmax(iou, axis=1) # n, each in 1~m
best_iou = tf.reduce_max(iou, axis=1) # n, best_iou = tf.reduce_max(iou, axis=1) # n,
best_gt_boxes = tf.gather(gt_boxes, best_iou_ind) #nx4 best_gt_boxes = tf.gather(gt_boxes, best_iou_ind) # nx4
best_gt_labels = tf.gather(gt_labels, best_iou_ind) # n, each in 1~C best_gt_labels = tf.gather(gt_labels, best_iou_ind) # n, each in 1~C
fg_mask = best_iou >= config.FASTRCNN_FG_THRESH fg_mask = best_iou >= config.FASTRCNN_FG_THRESH
...@@ -255,17 +256,16 @@ def sample_fast_rcnn_targets(boxes, gt_boxes, gt_labels): ...@@ -255,17 +256,16 @@ def sample_fast_rcnn_targets(boxes, gt_boxes, gt_labels):
# don't have to add gt for training, but add it anyway # don't have to add gt for training, but add it anyway
fg_inds = tf.reshape(tf.where(fg_mask), [-1]) fg_inds = tf.reshape(tf.where(fg_mask), [-1])
fg_inds = tf.concat([fg_inds, fg_inds = tf.concat([fg_inds, tf.cast(
tf.cast( tf.range(tf.size(gt_labels)) + tf.shape(boxes)[0],
tf.range(tf.size(gt_labels)) + tf.shape(boxes)[0], tf.int64)], 0)
tf.int64)], 0)
num_fg = tf.size(fg_inds) num_fg = tf.size(fg_inds)
num_fg = tf.minimum(int( num_fg = tf.minimum(int(
config.FASTRCNN_BATCH_PER_IM * config.FASTRCNN_FG_RATIO[1]), config.FASTRCNN_BATCH_PER_IM * config.FASTRCNN_FG_RATIO[1]),
num_fg, name='num_fg') num_fg, name='num_fg')
fg_inds = tf.slice(tf.random_shuffle(fg_inds), [0], [num_fg]) fg_inds = tf.slice(tf.random_shuffle(fg_inds), [0], [num_fg])
bg_inds = tf.where(tf.logical_not(fg_mask))[:,0] bg_inds = tf.where(tf.logical_not(fg_mask))[:, 0]
num_bg = tf.size(bg_inds) num_bg = tf.size(bg_inds)
num_bg = tf.minimum(config.FASTRCNN_BATCH_PER_IM - num_fg, num_bg) num_bg = tf.minimum(config.FASTRCNN_BATCH_PER_IM - num_fg, num_bg)
num_bg = tf.minimum( num_bg = tf.minimum(
...@@ -335,10 +335,10 @@ def roi_align(featuremap, boxes, output_shape): ...@@ -335,10 +335,10 @@ def roi_align(featuremap, boxes, output_shape):
return tf.concat([ny0, nx0, ny0 + nh, nx0 + nw], axis=1) return tf.concat([ny0, nx0, ny0 + nh, nx0 + nw], axis=1)
image_shape = tf.shape(featuremap)[2:] image_shape = tf.shape(featuremap)[2:]
featuremap = tf.transpose(featuremap, [0, 2, 3, 1]) # to nhwc featuremap = tf.transpose(featuremap, [0, 2, 3, 1]) # to nhwc
# sample 4 locations per roi bin # sample 4 locations per roi bin
boxes = transform_fpcoor_for_tf(boxes, image_shape, [output_shape * 2, output_shape * 2]) boxes = transform_fpcoor_for_tf(boxes, image_shape, [output_shape * 2, output_shape * 2])
boxes = tf.stop_gradient(boxes) # TODO boxes = tf.stop_gradient(boxes) # TODO
ret = tf.image.crop_and_resize( ret = tf.image.crop_and_resize(
featuremap, boxes, tf.zeros([tf.shape(boxes)[0]], dtype=tf.int32), featuremap, boxes, tf.zeros([tf.shape(boxes)[0]], dtype=tf.int32),
crop_size=[output_shape * 2, output_shape * 2]) crop_size=[output_shape * 2, output_shape * 2])
...@@ -387,6 +387,7 @@ def fastrcnn_predict_boxes(labels, box_logits): ...@@ -387,6 +387,7 @@ def fastrcnn_predict_boxes(labels, box_logits):
fg_box_logits = tf.gather_nd(box_logits, tf.stop_gradient(ind_2d)) fg_box_logits = tf.gather_nd(box_logits, tf.stop_gradient(ind_2d))
return fg_ind, fg_box_logits return fg_ind, fg_box_logits
@under_name_scope() @under_name_scope()
def fastrcnn_losses(labels, boxes, label_logits, box_logits): def fastrcnn_losses(labels, boxes, label_logits, box_logits):
""" """
...@@ -405,7 +406,7 @@ def fastrcnn_losses(labels, boxes, label_logits, box_logits): ...@@ -405,7 +406,7 @@ def fastrcnn_losses(labels, boxes, label_logits, box_logits):
# n x c-1 x 4 -> nfg x 4 # n x c-1 x 4 -> nfg x 4
fg_ind, fg_box_logits = fastrcnn_predict_boxes(labels, box_logits) fg_ind, fg_box_logits = fastrcnn_predict_boxes(labels, box_logits)
fg_boxes = tf.gather(boxes, fg_ind) # nfgx4 fg_boxes = tf.gather(boxes, fg_ind) # nfgx4
fg_label_pred = tf.argmax(tf.gather(label_logits, fg_ind), axis=1) fg_label_pred = tf.argmax(tf.gather(label_logits, fg_ind), axis=1)
num_zero = tf.reduce_sum(tf.cast(tf.equal(fg_label_pred, 0), tf.int32), name='num_zero') num_zero = tf.reduce_sum(tf.cast(tf.equal(fg_label_pred, 0), tf.int32), name='num_zero')
......
...@@ -77,7 +77,7 @@ class Model(ModelDesc): ...@@ -77,7 +77,7 @@ class Model(ModelDesc):
rpn_label_loss, rpn_box_loss = rpn_losses( rpn_label_loss, rpn_box_loss = rpn_losses(
anchor_labels, anchor_boxes_encoded, rpn_label_logits, rpn_box_logits) anchor_labels, anchor_boxes_encoded, rpn_label_logits, rpn_box_logits)
decoded_boxes = decode_bbox_target(rpn_box_logits, fm_anchors) # (fHxfWxNA)x4, floatbox decoded_boxes = decode_bbox_target(rpn_box_logits, fm_anchors) # (fHxfWxNA)x4, floatbox
proposal_boxes, proposal_scores = generate_rpn_proposals( proposal_boxes, proposal_scores = generate_rpn_proposals(
decoded_boxes, decoded_boxes,
tf.reshape(rpn_label_logits, [-1]), tf.reshape(rpn_label_logits, [-1]),
...@@ -88,15 +88,15 @@ class Model(ModelDesc): ...@@ -88,15 +88,15 @@ class Model(ModelDesc):
proposal_boxes, gt_boxes, gt_labels) proposal_boxes, gt_boxes, gt_labels)
boxes_on_featuremap = rcnn_sampled_boxes * (1.0 / config.ANCHOR_STRIDE) boxes_on_featuremap = rcnn_sampled_boxes * (1.0 / config.ANCHOR_STRIDE)
roi_resized = roi_align(featuremap, boxes_on_featuremap, 14) roi_resized = roi_align(featuremap, boxes_on_featuremap, 14)
feature_fastrcnn = resnet_conv5(roi_resized) #nxc feature_fastrcnn = resnet_conv5(roi_resized) # nxc
fastrcnn_label_logits, fastrcnn_box_logits = fastrcnn_head(feature_fastrcnn, config.NUM_CLASS) fastrcnn_label_logits, fastrcnn_box_logits = fastrcnn_head(feature_fastrcnn, config.NUM_CLASS)
fastrcnn_label_loss, fastrcnn_box_loss = fastrcnn_losses( fastrcnn_label_loss, fastrcnn_box_loss = fastrcnn_losses(
rcnn_labels, rcnn_encoded_boxes, fastrcnn_label_logits, fastrcnn_box_logits) rcnn_labels, rcnn_encoded_boxes, fastrcnn_label_logits, fastrcnn_box_logits)
wd_cost = regularize_cost( wd_cost = regularize_cost(
'(?:group1|group2|group3|rpn|fastrcnn)/.*W', '(?:group1|group2|group3|rpn|fastrcnn)/.*W',
l2_regularizer(1e-4), name='wd_cost') l2_regularizer(1e-4), name='wd_cost')
self.cost = tf.add_n([ self.cost = tf.add_n([
rpn_label_loss, rpn_box_loss, rpn_label_loss, rpn_box_loss,
...@@ -107,19 +107,18 @@ class Model(ModelDesc): ...@@ -107,19 +107,18 @@ class Model(ModelDesc):
add_moving_summary(k) add_moving_summary(k)
else: else:
roi_resized = roi_align(featuremap, proposal_boxes * (1.0 / config.ANCHOR_STRIDE), 14) roi_resized = roi_align(featuremap, proposal_boxes * (1.0 / config.ANCHOR_STRIDE), 14)
feature_fastrcnn = resnet_conv5(roi_resized) #nxc feature_fastrcnn = resnet_conv5(roi_resized) # nxc
label_logits, fastrcnn_box_logits = fastrcnn_head(feature_fastrcnn, config.NUM_CLASS) label_logits, fastrcnn_box_logits = fastrcnn_head(feature_fastrcnn, config.NUM_CLASS)
label_probs = tf.nn.softmax(label_logits, name='fastrcnn_all_probs') # NP, label_probs = tf.nn.softmax(label_logits, name='fastrcnn_all_probs') # NP,
labels = tf.argmax(label_logits, axis=1) labels = tf.argmax(label_logits, axis=1)
fg_ind, fg_box_logits = fastrcnn_predict_boxes(labels, fastrcnn_box_logits) fg_ind, fg_box_logits = fastrcnn_predict_boxes(labels, fastrcnn_box_logits)
fg_label_probs = tf.gather(label_probs, fg_ind, name='fastrcnn_fg_probs') fg_label_probs = tf.gather(label_probs, fg_ind, name='fastrcnn_fg_probs')
fg_boxes = tf.gather(proposal_boxes, fg_ind) fg_boxes = tf.gather(proposal_boxes, fg_ind)
fg_box_logits = fg_box_logits / tf.constant(config.FASTRCNN_BBOX_REG_WEIGHTS) fg_box_logits = fg_box_logits / tf.constant(config.FASTRCNN_BBOX_REG_WEIGHTS)
decoded_boxes = decode_bbox_target(fg_box_logits, fg_boxes) # Nfx4, floatbox decoded_boxes = decode_bbox_target(fg_box_logits, fg_boxes) # Nfx4, floatbox
decoded_boxes = tf.identity(decoded_boxes, name='fastrcnn_fg_boxes') decoded_boxes = tf.identity(decoded_boxes, name='fastrcnn_fg_boxes')
def _get_optimizer(self): def _get_optimizer(self):
lr = symbf.get_scalar_var('learning_rate', 0.003, summary=True) lr = symbf.get_scalar_var('learning_rate', 0.003, summary=True)
opt = tf.train.MomentumOptimizer(lr, 0.9) opt = tf.train.MomentumOptimizer(lr, 0.9)
...@@ -261,7 +260,6 @@ if __name__ == '__main__': ...@@ -261,7 +260,6 @@ if __name__ == '__main__':
predict(args.load, args.predict) predict(args.load, args.predict)
sys.exit() sys.exit()
logger.set_logger_dir(args.logdir, 'd') logger.set_logger_dir(args.logdir, 'd')
stepnum = 300 stepnum = 300
warmup_epoch = max(math.ceil(500.0 / stepnum), 5) warmup_epoch = max(math.ceil(500.0 / stepnum), 5)
...@@ -271,17 +269,19 @@ if __name__ == '__main__': ...@@ -271,17 +269,19 @@ if __name__ == '__main__':
callbacks=[ callbacks=[
PeriodicTrigger(ModelSaver(), every_k_epochs=5), PeriodicTrigger(ModelSaver(), every_k_epochs=5),
# linear warmup # linear warmup
ScheduledHyperParamSetter('learning_rate', ScheduledHyperParamSetter(
'learning_rate',
[(0, 0.003), (warmup_epoch, 0.01)], interp='linear'), [(0, 0.003), (warmup_epoch, 0.01)], interp='linear'),
# step decay # step decay
ScheduledHyperParamSetter('learning_rate', ScheduledHyperParamSetter(
[(warmup_epoch, 0.01), ((120000//stepnum) + warmup_epoch, 1e-3), (180000//stepnum, 1e-4)]), 'learning_rate',
[(warmup_epoch, 0.01), (120000 // stepnum, 1e-3), (180000 // stepnum, 1e-4)]),
HumanHyperParamSetter('learning_rate'), HumanHyperParamSetter('learning_rate'),
EvalCallback(), EvalCallback(),
GPUUtilizationTracker(), GPUUtilizationTracker(),
], ],
steps_per_epoch=stepnum, steps_per_epoch=stepnum,
max_epoch=205000//stepnum, max_epoch=205000 // stepnum,
session_init=get_model_loader(args.load), session_init=get_model_loader(args.load),
nr_tower=nr_gpu nr_tower=nr_gpu
) )
......
...@@ -10,6 +10,7 @@ from tensorpack.utils import viz ...@@ -10,6 +10,7 @@ from tensorpack.utils import viz
from coco import COCOMeta from coco import COCOMeta
from utils.box_ops import get_iou_callable from utils.box_ops import get_iou_callable
def draw_annotation(img, boxes, klass, is_crowd=None): def draw_annotation(img, boxes, klass, is_crowd=None):
labels = [] labels = []
assert len(boxes) == len(klass) assert len(boxes) == len(klass)
...@@ -36,12 +37,12 @@ def draw_proposal_recall(img, proposals, proposal_scores, gt_boxes): ...@@ -36,12 +37,12 @@ def draw_proposal_recall(img, proposals, proposal_scores, gt_boxes):
gt_boxes: NG gt_boxes: NG
""" """
bbox_iou_float = get_iou_callable() bbox_iou_float = get_iou_callable()
box_ious = bbox_iou_float(gt_boxes, proposals) #ng x np box_ious = bbox_iou_float(gt_boxes, proposals) # ng x np
box_ious_argsort = np.argsort(-box_ious, axis=1) box_ious_argsort = np.argsort(-box_ious, axis=1)
good_proposals_ind = box_ious_argsort[:,:3] # for each gt, find 3 best proposals good_proposals_ind = box_ious_argsort[:, :3] # for each gt, find 3 best proposals
good_proposals_ind = np.unique(good_proposals_ind.ravel()) good_proposals_ind = np.unique(good_proposals_ind.ravel())
proposals = proposals[good_proposals_ind,:] proposals = proposals[good_proposals_ind, :]
tags = list(map(str, proposal_scores[good_proposals_ind])) tags = list(map(str, proposal_scores[good_proposals_ind]))
img = viz.draw_boxes(img, proposals, tags) img = viz.draw_boxes(img, proposals, tags)
return img, good_proposals_ind return img, good_proposals_ind
......
[flake8] [flake8]
max-line-length = 120 max-line-length = 120
ignore = F403,F401,F405,F841,E401 ignore = F403,F401,F405,F841,E401
exclude = private exclude = private,
FasterRCNN/utils
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment