Commit 4f13f971 authored by Yuxin Wu's avatar Yuxin Wu

reproduce box-1x

parent b5e751b6
......@@ -25,7 +25,7 @@ WARMUP = 1000 # in steps
STEPS_PER_EPOCH = 500
LR_SCHEDULE = [150000, 230000, 280000]
LR_SCHEDULE = [120000, 160000, 180000] # "1x" schedule in detectron
#LR_SCHEDULE = [240000, 320000, 360000] # "2x" schedule in detectron
LR_SCHEDULE = [240000, 320000, 360000] # "2x" schedule in detectron
# image resolution --------------------
SHORT_EDGE_SIZE = 800
......
......@@ -144,14 +144,16 @@ def get_anchor_labels(anchors, gt_boxes, crowd_boxes):
# Subsample fg labels: ignore some fg if fg is too many
target_num_fg = int(config.RPN_BATCH_PER_IM * config.RPN_FG_RATIO)
fg_inds = filter_box_label(anchor_labels, 1, target_num_fg)
if len(fg_inds) == 0:
raise MalformedData("No valid foreground for RPN!")
# Note that fg could be fewer than the target ratio
# Subsample bg labels. num_bg is not allowed to be too many
old_num_bg = np.sum(anchor_labels == 0)
if old_num_bg == 0 or len(fg_inds) == 0:
if old_num_bg == 0:
# No valid bg/fg in this image, skip.
# This can happen if, e.g. the image has large crowd.
raise MalformedData("No valid foreground/background for RPN!")
raise MalformedData("No valid background for RPN!")
target_num_bg = config.RPN_BATCH_PER_IM - len(fg_inds)
filter_box_label(anchor_labels, 0, target_num_bg) # ignore return values
......@@ -341,7 +343,7 @@ def get_train_dataflow(add_mask=False):
# tpviz.interactive_imshow(viz)
return ret
ds = MultiProcessMapDataZMQ(ds, 5, preprocess)
ds = MultiProcessMapDataZMQ(ds, 10, preprocess)
#ds = PrefetchDataZMQ(ds, 3)
return ds
......
......@@ -10,6 +10,7 @@ from tensorpack.models import (
Conv2D, FullyConnected, MaxPooling,
layer_register, Conv2DTranspose, FixedUnPooling)
from tensorpack.utils import logger
from utils.box_ops import pairwise_iou
from utils.box_ops import area as tf_area
import config
......@@ -578,7 +579,13 @@ def fpn_model(features):
def upsample2x(name, x):
# TODO may not be optimal in speed or math
logger.info("Unpool 1111 ...")
return FixedUnPooling(
name, x, 2, unpool_mat=np.ones((2, 2), dtype='float32'),
data_format='channels_first')
with tf.name_scope(name):
logger.info("Nearest neighbor")
shape2d = tf.shape(x)[2:]
x = tf.transpose(x, [0, 2, 3, 1])
x = tf.image.resize_nearest_neighbor(x, shape2d * 2, align_corners=True)
......@@ -616,7 +623,8 @@ def fpn_map_rois_to_levels(boxes):
Be careful that the returned tensor could be empty.
"""
sqrtarea = tf.sqrt(tf_area(boxes))
level = tf.floor(4 + tf.log(sqrtarea * (1. / 224) + 1e-6) * (1.0 / np.log(2)))
level = tf.to_int32(tf.floor(
4 + tf.log(sqrtarea * (1. / 224) + 1e-6) * (1.0 / np.log(2))))
# RoI levels range from 2~5 (not 6)
level_ids = [
......@@ -645,6 +653,7 @@ def fastrcnn_2fc_head(feature, dim, num_classes):
Returns:
cls_logits (Nxnum_class), reg_logits (Nx num_class-1 x 4)
"""
logger.info("fc-head-stddev=0.01")
init = tf.random_normal_initializer(stddev=0.01)
hidden = FullyConnected('fc6', feature, dim, kernel_initializer=init, nl=tf.nn.relu)
hidden = FullyConnected('fc7', hidden, dim, kernel_initializer=init, nl=tf.nn.relu)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment