Commit cfe88a9a authored by Yuxin Wu's avatar Yuxin Wu

lint and other fixes

parent 0018fc13
......@@ -374,6 +374,7 @@ def autodoc_skip_member(app, what, name, obj, skip, options):
'TryResumeTraining',
'LeakyReLU',
'PrefetchOnGPUs',
'PeriodicRunHooks',
'guided_relu', 'saliency_map', 'get_scalar_var', 'psnr',
'prediction_incorrect', 'huber_loss', 'SoftMax'
......
......@@ -20,7 +20,7 @@ RESNET_NUM_BLOCK = [3, 4, 6, 3] # for resnet50
# schedule -----------------------
BASE_LR = 1e-2
WARMUP = 500
WARMUP = 500 # in steps
STEPS_PER_EPOCH = 500
LR_SCHEDULE = [150000, 230000, 280000]
# LR_SCHEDULE = [120000, 160000, 180000] # "1x" schedule in detectron
......
......@@ -368,10 +368,11 @@ if __name__ == '__main__':
print_config()
factor = get_batch_factor()
stepnum = config.STEPS_PER_EPOCH
warmup_epoch = max(1, config.WARMUP / stepnum)
warmup_schedule = [(0, config.BASE_LR / 3), (warmup_epoch * factor, config.BASE_LR)]
lr_schedule = [warmup_schedule[-1]]
# warmup is step based, lr is epoch based
warmup_schedule = [(0, config.BASE_LR / 3), (config.WARMUP * factor, config.BASE_LR)]
warmup_end_epoch = config.WARMUP * factor * 1. / stepnum
lr_schedule = [(int(np.ceil(warmup_end_epoch)), warmup_schedule[-1][1])]
for idx, steps in enumerate(config.LR_SCHEDULE[:-1]):
mult = 0.1 ** (idx + 1)
lr_schedule.append(
......@@ -382,9 +383,9 @@ if __name__ == '__main__':
data=QueueInput(get_train_dataflow(add_mask=config.MODE_MASK)),
callbacks=[
ModelSaver(max_to_keep=10, keep_checkpoint_every_n_hours=1),
# linear warmup # TODO step-wise linear warmup
# linear warmup
ScheduledHyperParamSetter(
'learning_rate', warmup_schedule, interp='linear'),
'learning_rate', warmup_schedule, interp='linear', step_based=True),
ScheduledHyperParamSetter('learning_rate', lr_schedule),
EvalCallback(),
GPUUtilizationTracker(),
......
......@@ -277,7 +277,8 @@ class JSONWriter(TrainingMonitor):
pass
else:
# TODO is this a good idea?
logger.info("Found history statistics from JSON. Rename the first epoch of this training to epoch #{}.".format(epoch))
logger.info("Found history statistics from JSON. "
"Rename the first epoch of this training to epoch #{}.".format(epoch))
self.trainer.loop.starting_epoch = epoch
self.trainer.loop._epoch_num = epoch - 1
else:
......
......@@ -174,7 +174,6 @@ class PeriodicCallback(EnableCallbackIf):
self._epoch_k = every_k_epochs
super(PeriodicCallback, self).__init__(callback, PeriodicCallback.predicate)
def predicate(self):
if self._step_k is not None and self.global_step % self._step_k == 0:
return True
......
......@@ -496,7 +496,8 @@ class StagingInput(FeedfreeInput):
logger.info("Pre-filling StagingArea ...")
for k in range(self.nr_stage):
self.stage_op.run()
logger.info("Successfully put {} element(s) to StagingArea.".format(self.nr_stage))
logger.info("Successfully put {} element{} to StagingArea.".format(
self.nr_stage, "s" if self.nr_stage > 1 else ""))
def _before_run(self, ctx):
# This has to happen once, right before the first iteration.
......
......@@ -80,7 +80,7 @@ def set_logger_dir(dirname, action=None):
Args:
dirname(str): log directory
action(str): an action of ("k","b","d","n","q") to be performed
action(str): an action of ("k","d","q") to be performed
when the directory exists. Will ask user by default.
"""
global LOG_DIR, _FILE_HANDLER
......@@ -88,13 +88,13 @@ def set_logger_dir(dirname, action=None):
# unload and close the old file handler, so that we may safely delete the logger directory
_logger.removeHandler(_FILE_HANDLER)
del _FILE_HANDLER
if os.path.isdir(dirname):
if os.path.isdir(dirname) and len(os.listdir(dirname)):
if not action:
_logger.warn("""\
Log directory {} exists! Please either backup/delete it, or use a new directory.""".format(dirname))
_logger.warn("""\
If you're resuming from a previous run you can choose to keep it.""")
_logger.info("Select Action: k (keep) / b (backup) / d (delete) / n (new) / q (quit):")
_logger.info("Select Action: k (keep) / d (delete) / q (quit):")
while not action:
action = input().lower().strip()
act = action
......@@ -110,7 +110,7 @@ If you're resuming from a previous run you can choose to keep it.""")
elif act == 'k':
pass
elif act == 'q':
sys.exit()
raise OSError("Directory {} exits!".format(dirname))
else:
raise ValueError("Unknown action: {}".format(act))
LOG_DIR = dirname
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment