Commit bfad96d7 authored by Yuxin Wu's avatar Yuxin Wu

Merge branch 'master' into model-redesign

parents 8f8ae315 d38d22bf
......@@ -2,7 +2,11 @@ Bug Reports/Feature Requests/Usage Questions Only:
Bug Reports (including performance bug):
Some part of code (either the library or examples) doesn't work as expected.
Always include what you did, what you observed, what you expected.
Always include the following:
1. What you did. (command you run if using examples; post or describe your code if not)
2. What you observed. (training logs)
3. What you expected, if not obvious
4. Your environment (TF version, GPUs), if it matters.
Feature Requests:
1. Improve an existing feature.
......
......@@ -39,8 +39,8 @@ To predict on an image (and show output in a window):
## Results
+ trainval35k/minival, FASTRCNN_BATCH=256: 32.9
+ trainval35k/minival, FASTRCNN_BATCH=64: 31.6. Takes less than one day on 8 Maxwell TitanX.
+ trainval35k/minival, FASTRCNN_BATCH=256: 33.4. Takes 49h on 8 TitanX.
+ trainval35k/minival, FASTRCNN_BATCH=64: 32.2. Takes 31h on 8 TitanX.
The hyperparameters are not carefully tuned. You can probably get better performance by e.g. training longer.
......
......@@ -191,9 +191,9 @@ def get_rpn_anchor_input(im, boxes, klass, is_crowd):
def read_and_augment_images(ds):
def mapf(dp):
fname = dp[0]
im = cv2.imread(fname, cv2.IMREAD_COLOR).astype('float32')
assert im is not None, dp[0]
dp[0] = im
im = cv2.imread(fname, cv2.IMREAD_COLOR)
assert im is not None, fname
dp[0] = im.astype('float32')
# assume floatbox as input
assert dp[1].dtype == np.float32
......
......@@ -288,19 +288,19 @@ if __name__ == '__main__':
# linear warmup
ScheduledHyperParamSetter(
'learning_rate',
[(0, 0.003), (warmup_epoch * factor, 0.01)], interp='linear'),
[(0, 3e-3), (warmup_epoch * factor, 1e-2)], interp='linear'),
# step decay
ScheduledHyperParamSetter(
'learning_rate',
[(warmup_epoch * factor, 0.01),
(120000 * factor // stepnum, 1e-3),
(180000 * factor // stepnum, 1e-4)]),
[(warmup_epoch * factor, 1e-2),
(150000 * factor // stepnum, 1e-3),
(210000 * factor // stepnum, 1e-4)]),
HumanHyperParamSetter('learning_rate'),
EvalCallback(),
GPUUtilizationTracker(),
],
steps_per_epoch=stepnum,
max_epoch=205000 * factor // stepnum,
max_epoch=230000 * factor // stepnum,
session_init=get_model_loader(args.load) if args.load else None,
)
trainer = SyncMultiGPUTrainerReplicated(get_nr_gpu())
......
......@@ -130,7 +130,8 @@ class Monitors(Callback):
if val.WhichOneof('value') == 'simple_value':
val.tag = re.sub('tower[0-9]+/', '', val.tag) # TODO move to subclasses
# TODO This hack not needed any more, can remove this in the future
# TODO This hack is still needed, seem to disappear only when
# compiled from source.
suffix = '-summary' # tensorflow#6150, tensorboard#59
if val.tag.endswith(suffix):
val.tag = val.tag[:-len(suffix)]
......
......@@ -41,6 +41,7 @@ def regularize_cost(regex, func, name='regularize_cost'):
cost = cost + regularize_cost("fc.*/W", l2_regularizer(1e-5))
"""
assert len(regex)
ctx = get_current_tower_context()
if not ctx.is_training:
# Currently cannot build the wd_cost correctly at inference,
......
......@@ -163,7 +163,8 @@ class OfflinePredictor(OnlinePredictor):
input_tensors = get_tensors_by_names(config.input_names)
output_tensors = get_tensors_by_names(config.output_names)
config.session_init._setup_graph()
sess = config.session_creator.create_session()
config.session_init.init(sess)
config.session_init._run_init(sess)
super(OfflinePredictor, self).__init__(
input_tensors, output_tensors, config.return_input, sess)
......@@ -119,8 +119,8 @@ def add_tensor_summary(x, types, name=None, collections=None,
return
SUMMARY_TYPES_DIC = {
'scalar': lambda: tf.summary.scalar(name, x, collections=collections),
'histogram': lambda: tf.summary.histogram(name, x, collections=collections),
'scalar': lambda: tf.summary.scalar(name + '-summary', x, collections=collections),
'histogram': lambda: tf.summary.histogram(name + '-histogram', x, collections=collections),
'sparsity': lambda: tf.summary.scalar(
name + '-sparsity', tf.nn.zero_fraction(x),
collections=collections),
......@@ -246,7 +246,7 @@ def add_moving_summary(*args, **kwargs):
ema_ops.append(ema_op)
with tf.name_scope(None):
# cannot add it into colocate group -- will force everything to cpus
tf.summary.scalar(name, ema_op) # write the EMA value as a summary
tf.summary.scalar(name + '-summary', ema_op) # write the EMA value as a summary
if coll is not None:
for op in ema_ops:
# TODO a new collection to summary every step?
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment