Commit bfad96d7 authored by Yuxin Wu's avatar Yuxin Wu

Merge branch 'master' into model-redesign

parents 8f8ae315 d38d22bf
...@@ -2,7 +2,11 @@ Bug Reports/Feature Requests/Usage Questions Only: ...@@ -2,7 +2,11 @@ Bug Reports/Feature Requests/Usage Questions Only:
Bug Reports (including performance bug): Bug Reports (including performance bug):
Some part of code (either the library or examples) doesn't work as expected. Some part of code (either the library or examples) doesn't work as expected.
Always include what you did, what you observed, what you expected. Always include the following:
1. What you did. (command you run if using examples; post or describe your code if not)
2. What you observed. (training logs)
3. What you expected, if not obvious
4. Your environment (TF version, GPUs), if it matters.
Feature Requests: Feature Requests:
1. Improve an existing feature. 1. Improve an existing feature.
......
...@@ -39,8 +39,8 @@ To predict on an image (and show output in a window): ...@@ -39,8 +39,8 @@ To predict on an image (and show output in a window):
## Results ## Results
+ trainval35k/minival, FASTRCNN_BATCH=256: 32.9 + trainval35k/minival, FASTRCNN_BATCH=256: 33.4. Takes 49h on 8 TitanX.
+ trainval35k/minival, FASTRCNN_BATCH=64: 31.6. Takes less than one day on 8 Maxwell TitanX. + trainval35k/minival, FASTRCNN_BATCH=64: 32.2. Takes 31h on 8 TitanX.
The hyperparameters are not carefully tuned. You can probably get better performance by e.g. training longer. The hyperparameters are not carefully tuned. You can probably get better performance by e.g. training longer.
......
...@@ -191,9 +191,9 @@ def get_rpn_anchor_input(im, boxes, klass, is_crowd): ...@@ -191,9 +191,9 @@ def get_rpn_anchor_input(im, boxes, klass, is_crowd):
def read_and_augment_images(ds): def read_and_augment_images(ds):
def mapf(dp): def mapf(dp):
fname = dp[0] fname = dp[0]
im = cv2.imread(fname, cv2.IMREAD_COLOR).astype('float32') im = cv2.imread(fname, cv2.IMREAD_COLOR)
assert im is not None, dp[0] assert im is not None, fname
dp[0] = im dp[0] = im.astype('float32')
# assume floatbox as input # assume floatbox as input
assert dp[1].dtype == np.float32 assert dp[1].dtype == np.float32
......
...@@ -288,19 +288,19 @@ if __name__ == '__main__': ...@@ -288,19 +288,19 @@ if __name__ == '__main__':
# linear warmup # linear warmup
ScheduledHyperParamSetter( ScheduledHyperParamSetter(
'learning_rate', 'learning_rate',
[(0, 0.003), (warmup_epoch * factor, 0.01)], interp='linear'), [(0, 3e-3), (warmup_epoch * factor, 1e-2)], interp='linear'),
# step decay # step decay
ScheduledHyperParamSetter( ScheduledHyperParamSetter(
'learning_rate', 'learning_rate',
[(warmup_epoch * factor, 0.01), [(warmup_epoch * factor, 1e-2),
(120000 * factor // stepnum, 1e-3), (150000 * factor // stepnum, 1e-3),
(180000 * factor // stepnum, 1e-4)]), (210000 * factor // stepnum, 1e-4)]),
HumanHyperParamSetter('learning_rate'), HumanHyperParamSetter('learning_rate'),
EvalCallback(), EvalCallback(),
GPUUtilizationTracker(), GPUUtilizationTracker(),
], ],
steps_per_epoch=stepnum, steps_per_epoch=stepnum,
max_epoch=205000 * factor // stepnum, max_epoch=230000 * factor // stepnum,
session_init=get_model_loader(args.load) if args.load else None, session_init=get_model_loader(args.load) if args.load else None,
) )
trainer = SyncMultiGPUTrainerReplicated(get_nr_gpu()) trainer = SyncMultiGPUTrainerReplicated(get_nr_gpu())
......
...@@ -130,7 +130,8 @@ class Monitors(Callback): ...@@ -130,7 +130,8 @@ class Monitors(Callback):
if val.WhichOneof('value') == 'simple_value': if val.WhichOneof('value') == 'simple_value':
val.tag = re.sub('tower[0-9]+/', '', val.tag) # TODO move to subclasses val.tag = re.sub('tower[0-9]+/', '', val.tag) # TODO move to subclasses
# TODO This hack not needed any more, can remove this in the future # TODO This hack is still needed, seem to disappear only when
# compiled from source.
suffix = '-summary' # tensorflow#6150, tensorboard#59 suffix = '-summary' # tensorflow#6150, tensorboard#59
if val.tag.endswith(suffix): if val.tag.endswith(suffix):
val.tag = val.tag[:-len(suffix)] val.tag = val.tag[:-len(suffix)]
......
...@@ -41,6 +41,7 @@ def regularize_cost(regex, func, name='regularize_cost'): ...@@ -41,6 +41,7 @@ def regularize_cost(regex, func, name='regularize_cost'):
cost = cost + regularize_cost("fc.*/W", l2_regularizer(1e-5)) cost = cost + regularize_cost("fc.*/W", l2_regularizer(1e-5))
""" """
assert len(regex)
ctx = get_current_tower_context() ctx = get_current_tower_context()
if not ctx.is_training: if not ctx.is_training:
# Currently cannot build the wd_cost correctly at inference, # Currently cannot build the wd_cost correctly at inference,
......
...@@ -163,7 +163,8 @@ class OfflinePredictor(OnlinePredictor): ...@@ -163,7 +163,8 @@ class OfflinePredictor(OnlinePredictor):
input_tensors = get_tensors_by_names(config.input_names) input_tensors = get_tensors_by_names(config.input_names)
output_tensors = get_tensors_by_names(config.output_names) output_tensors = get_tensors_by_names(config.output_names)
config.session_init._setup_graph()
sess = config.session_creator.create_session() sess = config.session_creator.create_session()
config.session_init.init(sess) config.session_init._run_init(sess)
super(OfflinePredictor, self).__init__( super(OfflinePredictor, self).__init__(
input_tensors, output_tensors, config.return_input, sess) input_tensors, output_tensors, config.return_input, sess)
...@@ -119,8 +119,8 @@ def add_tensor_summary(x, types, name=None, collections=None, ...@@ -119,8 +119,8 @@ def add_tensor_summary(x, types, name=None, collections=None,
return return
SUMMARY_TYPES_DIC = { SUMMARY_TYPES_DIC = {
'scalar': lambda: tf.summary.scalar(name, x, collections=collections), 'scalar': lambda: tf.summary.scalar(name + '-summary', x, collections=collections),
'histogram': lambda: tf.summary.histogram(name, x, collections=collections), 'histogram': lambda: tf.summary.histogram(name + '-histogram', x, collections=collections),
'sparsity': lambda: tf.summary.scalar( 'sparsity': lambda: tf.summary.scalar(
name + '-sparsity', tf.nn.zero_fraction(x), name + '-sparsity', tf.nn.zero_fraction(x),
collections=collections), collections=collections),
...@@ -246,7 +246,7 @@ def add_moving_summary(*args, **kwargs): ...@@ -246,7 +246,7 @@ def add_moving_summary(*args, **kwargs):
ema_ops.append(ema_op) ema_ops.append(ema_op)
with tf.name_scope(None): with tf.name_scope(None):
# cannot add it into colocate group -- will force everything to cpus # cannot add it into colocate group -- will force everything to cpus
tf.summary.scalar(name, ema_op) # write the EMA value as a summary tf.summary.scalar(name + '-summary', ema_op) # write the EMA value as a summary
if coll is not None: if coll is not None:
for op in ema_ops: for op in ema_ops:
# TODO a new collection to summary every step? # TODO a new collection to summary every step?
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment