Commit c4a68c6c authored by Yuxin Wu's avatar Yuxin Wu

[MaskRCNN] misc small updates

parent 847fae12
......@@ -24,7 +24,8 @@ class AttrDict():
def __setattr__(self, name, value):
if self._freezed and name not in self.__dict__:
raise AttributeError("Cannot create new attribute!")
raise AttributeError(
"Config was freezed! Unknown config: {}".format(name))
super().__setattr__(name, value)
def __str__(self):
......@@ -54,11 +55,11 @@ class AttrDict():
v = eval(v)
setattr(dic, key, v)
def freeze(self):
self._freezed = True
def freeze(self, freezed=True):
self._freezed = freezed
for v in self.__dict__.values():
if isinstance(v, AttrDict):
v.freeze()
v.freeze(freezed)
# avoid silent bugs
def __eq__(self, _):
......@@ -95,7 +96,6 @@ _C.BACKBONE.FREEZE_AT = 2 # options: 0, 1, 2
# Use a base model with TF-preferred padding mode,
# which may pad more pixels on right/bottom than top/left.
# See https://github.com/tensorflow/tensorflow/issues/18213
# In tensorpack model zoo, ResNet models with TF_PAD_MODE=False are marked with "-AlignPadding".
# All other models under `ResNet/` in the model zoo are using TF_PAD_MODE=True.
# Using either one should probably give the same performance.
......@@ -110,11 +110,16 @@ _C.TRAIN.BASE_LR = 1e-2 # defined for a total batch size of 8. Otherwise it wil
_C.TRAIN.WARMUP = 1000 # in terms of iterations. This is not affected by #GPUs
_C.TRAIN.STEPS_PER_EPOCH = 500
# LR_SCHEDULE means "steps" only when total batch size is 8.
# Otherwise the actual steps to decrease learning rate are computed from the schedule.
# LR_SCHEDULE means equivalent steps when the total batch size is 8.
# When the total bs!=8, the actual iterations to decrease learning rate, and
# the base learning rate are computed from BASE_LR and LR_SCHEDULE.
# Therefore, there is *no need* to modify the config if you only change the number of GPUs.
# LR_SCHEDULE = [120000, 160000, 180000] # "1x" schedule in detectron
_C.TRAIN.LR_SCHEDULE = [240000, 320000, 360000] # "2x" schedule in detectron
# _C.TRAIN.LR_SCHEDULE = [120000, 160000, 180000] # "1x" schedule in detectron
_C.TRAIN.LR_SCHEDULE = [240000, 320000, 360000] # "2x" schedule in detectron
# Longer schedules for from-scratch training (https://arxiv.org/abs/1811.08883):
# _C.TRAIN.LR_SCHEDULE = [960000, 1040000, 1080000] # "6x" schedule in detectron
# _C.TRAIN.LR_SCHEDULE = [1500000, 1580000, 1620000] # "9x" schedule in detectron
_C.TRAIN.EVAL_PERIOD = 25 # period (epochs) to run eva
# preprocessing --------------------
......@@ -167,8 +172,7 @@ _C.FPN.ANCHOR_STRIDES = (4, 8, 16, 32, 64) # strides for each FPN level. Must b
_C.FPN.PROPOSAL_MODE = 'Level' # 'Level', 'Joint'
_C.FPN.NUM_CHANNEL = 256
_C.FPN.NORM = 'None' # 'None', 'GN'
# conv head and fc head are only used in FPN.
# For C4 models, the head is C5
# The head option is only used in FPN. For C4 models, the head is C5
_C.FPN.FRCNN_HEAD_FUNC = 'fastrcnn_2fc_head'
# choices: fastrcnn_2fc_head, fastrcnn_4conv1fc_{,gn_}head
_C.FPN.FRCNN_CONV_HEAD_DIM = 256
......@@ -192,11 +196,14 @@ _C.TEST.RESULT_SCORE_THRESH = 0.05
_C.TEST.RESULT_SCORE_THRESH_VIS = 0.3 # only visualize confident results
_C.TEST.RESULTS_PER_IM = 100
_C.freeze() # avoid typo / wrong config keys
def finalize_configs(is_training):
"""
Run some sanity checks, and populate some configs from others
"""
_C.freeze(False) # populate new keys now
_C.DATA.NUM_CLASS = _C.DATA.NUM_CATEGORY + 1 # +1 background
_C.DATA.BASEDIR = os.path.expanduser(_C.DATA.BASEDIR)
......
......@@ -232,18 +232,9 @@ class ResNetFPNModel(DetectionModel):
) # NR_GT x height x width
return ret
def slice_feature_and_anchors(self, image_shape2d, p23456, anchors):
def slice_feature_and_anchors(self, p23456, anchors):
for i, stride in enumerate(cfg.FPN.ANCHOR_STRIDES):
with tf.name_scope('FPN_slice_lvl{}'.format(i)):
if i < 3:
# Images are padded for p5, which are too large for p2-p4.
# This seems to have no effect on mAP.
pi = p23456[i]
target_shape = tf.to_int32(tf.ceil(tf.to_float(image_shape2d) * (1.0 / stride)))
p23456[i] = tf.slice(pi, [0, 0, 0, 0],
tf.concat([[-1, -1], target_shape], axis=0))
p23456[i].set_shape([1, pi.shape[1], None, None])
anchors[i] = anchors[i].narrow_to(p23456[i])
def backbone(self, image):
......@@ -260,7 +251,7 @@ class ResNetFPNModel(DetectionModel):
all_anchors_fpn[i],
inputs['anchor_labels_lvl{}'.format(i + 2)],
inputs['anchor_boxes_lvl{}'.format(i + 2)]) for i in range(len(all_anchors_fpn))]
self.slice_feature_and_anchors(image_shape2d, features, multilevel_anchors)
self.slice_feature_and_anchors(features, multilevel_anchors)
# Multi-Level RPN Proposals
rpn_outputs = [rpn_head('rpn', pi, cfg.FPN.NUM_CHANNEL, len(cfg.RPN.ANCHOR_RATIOS))
......@@ -472,23 +463,24 @@ class EvalCallback(Callback):
futures.append(executor.submit(eval_coco, dataflow, pred, pbar))
all_results = list(itertools.chain(*[fut.result() for fut in futures]))
else:
filenames = [os.path.join(
logdir, 'outputs{}-part{}.json'.format(self.global_step, rank)
) for rank in range(hvd.local_size())]
if self._horovod_run_eval:
local_results = eval_coco(self.dataflow, self.predictor)
output_partial = os.path.join(
logdir, 'outputs{}-part{}.json'.format(self.global_step, hvd.local_rank()))
with open(output_partial, 'w') as f:
fname = filenames[hvd.local_rank()]
with open(fname, 'w') as f:
json.dump(local_results, f)
self.barrier.eval()
if hvd.rank() > 0:
return
all_results = []
for k in range(hvd.local_size()):
output_partial = os.path.join(
logdir, 'outputs{}-part{}.json'.format(self.global_step, k))
with open(output_partial, 'r') as f:
for fname in filenames:
with open(fname, 'r') as f:
obj = json.load(f)
all_results.extend(obj)
os.unlink(output_partial)
os.unlink(fname)
output_file = os.path.join(
logdir, 'outputs{}.json'.format(self.global_step))
......@@ -615,6 +607,6 @@ if __name__ == '__main__':
if is_horovod:
trainer = HorovodTrainer(average=False)
else:
# nccl mode has better speed than cpu mode
# nccl mode appears faster than cpu mode
trainer = SyncMultiGPUTrainerReplicated(cfg.TRAIN.NUM_GPUS, average=False, mode='nccl')
launch_train_with_config(traincfg, trainer)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment