Commit b097e7d5 authored by Yuxin Wu's avatar Yuxin Wu

[MaskRCNN] use same actual warmup init LR regardless of total bs

parent 1f498ed6
...@@ -108,8 +108,9 @@ _C.BACKBONE.STRIDE_1X1 = False # True for MSRA models ...@@ -108,8 +108,9 @@ _C.BACKBONE.STRIDE_1X1 = False # True for MSRA models
# schedule ----------------------- # schedule -----------------------
_C.TRAIN.NUM_GPUS = None # by default, will be set from code _C.TRAIN.NUM_GPUS = None # by default, will be set from code
_C.TRAIN.WEIGHT_DECAY = 1e-4 _C.TRAIN.WEIGHT_DECAY = 1e-4
_C.TRAIN.BASE_LR = 1e-2 # defined for a total batch size of 8. Otherwise it will be adjusted automatically _C.TRAIN.BASE_LR = 1e-2 # defined for total batch size=8. Otherwise it will be adjusted automatically
_C.TRAIN.WARMUP = 1000 # in terms of iterations. This is not affected by #GPUs _C.TRAIN.WARMUP = 1000 # in terms of iterations. This is not affected by #GPUs
_C.TRAIN.WARMUP_INIT_LR = 1e-2 * 0.33 # defined for total batch size=8. Otherwise it will be adjusted automatically
_C.TRAIN.STEPS_PER_EPOCH = 500 _C.TRAIN.STEPS_PER_EPOCH = 500
_C.TRAIN.STARTING_EPOCH = 1 # the first epoch to start with, useful to continue a training _C.TRAIN.STARTING_EPOCH = 1 # the first epoch to start with, useful to continue a training
...@@ -235,9 +236,7 @@ def finalize_configs(is_training): ...@@ -235,9 +236,7 @@ def finalize_configs(is_training):
if is_training: if is_training:
train_scales = _C.PREPROC.TRAIN_SHORT_EDGE_SIZE train_scales = _C.PREPROC.TRAIN_SHORT_EDGE_SIZE
if not isinstance(train_scales, (list, tuple)): if isinstance(train_scales, (list, tuple)) and train_scales[1] - train_scales[0] > 100:
train_scales = [train_scales, train_scales]
if train_scales[1] - train_scales[0] > 100:
# don't warmup if augmentation is on # don't warmup if augmentation is on
os.environ['TF_CUDNN_USE_AUTOTUNE'] = '0' os.environ['TF_CUDNN_USE_AUTOTUNE'] = '0'
os.environ['TF_AUTOTUNE_THRESHOLD'] = '1' os.environ['TF_AUTOTUNE_THRESHOLD'] = '1'
......
...@@ -548,7 +548,7 @@ if __name__ == '__main__': ...@@ -548,7 +548,7 @@ if __name__ == '__main__':
stepnum = cfg.TRAIN.STEPS_PER_EPOCH stepnum = cfg.TRAIN.STEPS_PER_EPOCH
# warmup is step based, lr is epoch based # warmup is step based, lr is epoch based
init_lr = cfg.TRAIN.BASE_LR * 0.33 * min(8. / cfg.TRAIN.NUM_GPUS, 1.) init_lr = cfg.TRAIN.WARMUP_INIT_LR * min(8. / cfg.TRAIN.NUM_GPUS, 1.)
warmup_schedule = [(0, init_lr), (cfg.TRAIN.WARMUP, cfg.TRAIN.BASE_LR)] warmup_schedule = [(0, init_lr), (cfg.TRAIN.WARMUP, cfg.TRAIN.BASE_LR)]
warmup_end_epoch = cfg.TRAIN.WARMUP * 1. / stepnum warmup_end_epoch = cfg.TRAIN.WARMUP * 1. / stepnum
lr_schedule = [(int(warmup_end_epoch + 0.5), cfg.TRAIN.BASE_LR)] lr_schedule = [(int(warmup_end_epoch + 0.5), cfg.TRAIN.BASE_LR)]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment