[MaskRCNN] use same actual warmup init LR regardless of total bs

b097e7d5 · Yuxin Wu · 1f498ed6 · b097e7d5 · b097e7d5
Commit b097e7d5 authored Dec 25, 2018 by Yuxin Wu
Hide whitespace changes
Inline Side-by-side

Showing with 4 additions and 5 deletions

examples/FasterRCNN/config.py examples/FasterRCNN/config.py +3 -4

examples/FasterRCNN/train.py examples/FasterRCNN/train.py +1 -1

No files found.
--- a/examples/FasterRCNN/config.py
+++ b/examples/FasterRCNN/config.py
@@ -108,8 +108,9 @@ _C.BACKBONE.STRIDE_1X1 = False  # True for MSRA models
 # schedule -----------------------
 _C.TRAIN.NUM_GPUS = None         # by default, will be set from code
 _C.TRAIN.WEIGHT_DECAY = 1e-4
-_C.TRAIN.BASE_LR = 1e-2  # defined for a total batch size of 8. Otherwise it will be adjusted automatically
+_C.TRAIN.BASE_LR = 1e-2  # defined for total batch size=8. Otherwise it will be adjusted automatically
 _C.TRAIN.WARMUP = 1000   # in terms of iterations. This is not affected by #GPUs
+_C.TRAIN.WARMUP_INIT_LR = 1e-2 * 0.33  # defined for total batch size=8. Otherwise it will be adjusted automatically
 _C.TRAIN.STEPS_PER_EPOCH = 500
 _C.TRAIN.STARTING_EPOCH = 1  # the first epoch to start with, useful to continue a training
@@ -235,9 +236,7 @@ def finalize_configs(is_training):
    if is_training:
        train_scales = _C.PREPROC.TRAIN_SHORT_EDGE_SIZE
-        if not isinstance(train_scales, (list, tuple)):
+        if isinstance(train_scales, (list, tuple)) and train_scales[1] - train_scales[0] > 100:
-            train_scales = [train_scales, train_scales]
-        if train_scales[1] - train_scales[0] > 100:
            # don't warmup if augmentation is on
            os.environ['TF_CUDNN_USE_AUTOTUNE'] = '0'
        os.environ['TF_AUTOTUNE_THRESHOLD'] = '1'

--- a/examples/FasterRCNN/train.py
+++ b/examples/FasterRCNN/train.py
@@ -548,7 +548,7 @@ if __name__ == '__main__':
        stepnum = cfg.TRAIN.STEPS_PER_EPOCH
        # warmup is step based, lr is epoch based
-        init_lr = cfg.TRAIN.BASE_LR * 0.33 * min(8. / cfg.TRAIN.NUM_GPUS, 1.)
+        init_lr = cfg.TRAIN.WARMUP_INIT_LR * min(8. / cfg.TRAIN.NUM_GPUS, 1.)
        warmup_schedule = [(0, init_lr), (cfg.TRAIN.WARMUP, cfg.TRAIN.BASE_LR)]
        warmup_end_epoch = cfg.TRAIN.WARMUP * 1. / stepnum
        lr_schedule = [(int(warmup_end_epoch + 0.5), cfg.TRAIN.BASE_LR)]