Commit 1fc18a6e authored by Yuxin Wu's avatar Yuxin Wu

FPN initial commit

parent 04a64849
...@@ -117,9 +117,18 @@ def resnet_conv5(image, num_block): ...@@ -117,9 +117,18 @@ def resnet_conv5(image, num_block):
def pretrained_resnet_fpn_backbone(image, num_blocks, freeze_c2=True): def pretrained_resnet_fpn_backbone(image, num_blocks, freeze_c2=True):
shape2d = tf.shape(image)[2:]
mult = config.FPN_RESOLUTION_REQUIREMENT * 1.
new_shape2d = tf.to_int32(tf.ceil(tf.to_float(shape2d) / mult) * mult)
pad_shape2d = new_shape2d - shape2d
assert len(num_blocks) == 4 assert len(num_blocks) == 4
# TODO pad 1 at each stage
with resnet_argscope(): with resnet_argscope():
l = tf.pad(image, [[0, 0], [0, 0], [2, 3], [2, 3]]) chan = image.shape[1]
l = tf.pad(image,
tf.stack([[0, 0], [0, 0],
[2, 3 + pad_shape2d[0]], [2, 3 + pad_shape2d[1]]]))
l.set_shape([None, chan, None, None])
l = Conv2D('conv0', l, 64, 7, strides=2, activation=BNReLU, padding='VALID') l = Conv2D('conv0', l, 64, 7, strides=2, activation=BNReLU, padding='VALID')
l = tf.pad(l, [[0, 0], [0, 0], [0, 1], [0, 1]]) l = tf.pad(l, [[0, 0], [0, 0], [0, 1], [0, 1]])
l = MaxPooling('pool0', l, 3, strides=2, padding='VALID') l = MaxPooling('pool0', l, 3, strides=2, padding='VALID')
......
...@@ -4,11 +4,12 @@ ...@@ -4,11 +4,12 @@
import numpy as np import numpy as np
# mode flags --------------------- # mode flags ---------------------
MODE_MASK = True MODE_MASK = False
# dataset ----------------------- # dataset -----------------------
BASEDIR = '/path/to/your/COCO/DIR' BASEDIR = '/path/to/your/COCO/DIR'
TRAIN_DATASET = ['train2014', 'valminusminival2014'] TRAIN_DATASET = ['train2014', 'valminusminival2014']
# TRAIN_DATASET = ['valminusminival2014']
VAL_DATASET = 'minival2014' # only support evaluation on single dataset VAL_DATASET = 'minival2014' # only support evaluation on single dataset
NUM_CLASS = 81 NUM_CLASS = 81
CLASS_NAMES = [] # NUM_CLASS strings. Will be populated later by coco loader CLASS_NAMES = [] # NUM_CLASS strings. Will be populated later by coco loader
...@@ -23,12 +24,12 @@ BASE_LR = 1e-2 ...@@ -23,12 +24,12 @@ BASE_LR = 1e-2
WARMUP = 1000 # in steps WARMUP = 1000 # in steps
STEPS_PER_EPOCH = 500 STEPS_PER_EPOCH = 500
LR_SCHEDULE = [150000, 230000, 280000] LR_SCHEDULE = [150000, 230000, 280000]
# LR_SCHEDULE = [120000, 160000, 180000] # "1x" schedule in detectron LR_SCHEDULE = [120000, 160000, 180000] # "1x" schedule in detectron
# LR_SCHEDULE = [240000, 320000, 360000] # "2x" schedule in detectron # LR_SCHEDULE = [240000, 320000, 360000] # "2x" schedule in detectron
# image resolution -------------------- # image resolution --------------------
SHORT_EDGE_SIZE = 800 SHORT_EDGE_SIZE = 800
MAX_SIZE = 1333 MAX_SIZE = 1333 # TODO use 1344
# alternative (worse & faster) setting: 600, 1024 # alternative (worse & faster) setting: 600, 1024
# anchors ------------------------- # anchors -------------------------
...@@ -55,7 +56,7 @@ TRAIN_POST_NMS_TOPK = 2000 ...@@ -55,7 +56,7 @@ TRAIN_POST_NMS_TOPK = 2000
CROWD_OVERLAP_THRES = 0.7 CROWD_OVERLAP_THRES = 0.7
# fastrcnn training --------------------- # fastrcnn training ---------------------
FASTRCNN_BATCH_PER_IM = 256 FASTRCNN_BATCH_PER_IM = 512
FASTRCNN_BBOX_REG_WEIGHTS = np.array([10, 10, 5, 5], dtype='float32') FASTRCNN_BBOX_REG_WEIGHTS = np.array([10, 10, 5, 5], dtype='float32')
FASTRCNN_FG_THRESH = 0.5 FASTRCNN_FG_THRESH = 0.5
# fg ratio in a ROI batch # fg ratio in a ROI batch
...@@ -70,6 +71,9 @@ RESULT_SCORE_THRESH_VIS = 0.3 # only visualize confident results ...@@ -70,6 +71,9 @@ RESULT_SCORE_THRESH_VIS = 0.3 # only visualize confident results
RESULTS_PER_IM = 100 RESULTS_PER_IM = 100
# TODO Not Functioning. Don't USE # TODO Not Functioning. Don't USE
MODE_FPN = False MODE_FPN = True
FPN_NUM_CHANNEL = 256 FPN_NUM_CHANNEL = 256
FPN_SIZE_REQUIREMENT = 32 FASTRCNN_FC_HEAD_DIM = 1024
FPN_RESOLUTION_REQUIREMENT = 32
TRAIN_FPN_NMS_TOPK = 2048
TEST_FPN_NMS_TOPK = 1024
...@@ -51,7 +51,12 @@ def get_all_anchors( ...@@ -51,7 +51,12 @@ def get_all_anchors(
# anchors are intbox here. # anchors are intbox here.
# anchors at featuremap [0,0] are centered at fpcoor (8,8) (half of stride) # anchors at featuremap [0,0] are centered at fpcoor (8,8) (half of stride)
field_size = int(np.ceil(config.MAX_SIZE / stride)) max_size = config.MAX_SIZE
if config.MODE_FPN:
# TODO setting this in config is perhaps better
size_mult = config.FPN_RESOLUTION_REQUIREMENT * 1.
max_size = np.ceil(max_size / size_mult) * size_mult
field_size = int(np.ceil(max_size / stride))
shifts = np.arange(0, field_size) * stride shifts = np.arange(0, field_size) * stride
shift_x, shift_y = np.meshgrid(shifts, shifts) shift_x, shift_y = np.meshgrid(shifts, shifts)
shift_x = shift_x.flatten() shift_x = shift_x.flatten()
...@@ -337,7 +342,7 @@ def get_train_dataflow(add_mask=False): ...@@ -337,7 +342,7 @@ def get_train_dataflow(add_mask=False):
return ret return ret
ds = MapData(ds, preprocess) ds = MapData(ds, preprocess)
ds = PrefetchDataZMQ(ds, 1) ds = PrefetchDataZMQ(ds, 3)
return ds return ds
...@@ -359,7 +364,6 @@ if __name__ == '__main__': ...@@ -359,7 +364,6 @@ if __name__ == '__main__':
import os import os
from tensorpack.dataflow import PrintData from tensorpack.dataflow import PrintData
config.BASEDIR = os.path.expanduser('~/data/coco') config.BASEDIR = os.path.expanduser('~/data/coco')
config.TRAIN_DATASET = ['train2014']
ds = get_train_dataflow(add_mask=config.MODE_MASK) ds = get_train_dataflow(add_mask=config.MODE_MASK)
ds = PrintData(ds, 100) ds = PrintData(ds, 100)
TestDataSpeed(ds, 50000).start() TestDataSpeed(ds, 50000).start()
......
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment