Merge branch 'master' into model-redesign

bfad96d7 · Yuxin Wu · 8f8ae315 · d38d22bf · bfad96d7 · bfad96d7
Commit bfad96d7 authored Oct 28, 2017 by Yuxin Wu
8 changed files
--- a/.github/ISSUE_TEMPLATE.md
+++ b/.github/ISSUE_TEMPLATE.md
@@ -2,7 +2,11 @@ Bug Reports/Feature Requests/Usage Questions Only:

 Bug Reports (including performance bug):
 Some part of code (either the library or examples) doesn't work as expected.
-Always include what you did, what you observed, what you expected.
+Always include the following:
+1. What you did. (command you run if using examples; post or describe your code if not)
+2. What you observed. (training logs)
+3. What you expected, if not obvious
+4. Your environment (TF version, GPUs), if it matters.

 Feature Requests:
 1. Improve an existing feature.

--- a/examples/FasterRCNN/README.md
+++ b/examples/FasterRCNN/README.md
@@ -39,8 +39,8 @@ To predict on an image (and show output in a window):

 ## Results

-+ trainval35k/minival, FASTRCNN_BATCH=256: 32.9
-+ trainval35k/minival, FASTRCNN_BATCH=64: 31.6. Takes less than one day on 8 Maxwell TitanX.
+ trainval35k/minival, FASTRCNN_BATCH=256: 33.4. Takes 49h on 8 TitanX.
+ trainval35k/minival, FASTRCNN_BATCH=64: 32.2. Takes 31h on 8 TitanX.

 The hyperparameters are not carefully tuned. You can probably get better performance by e.g.  training longer.


--- a/examples/FasterRCNN/data.py
+++ b/examples/FasterRCNN/data.py
@@ -191,9 +191,9 @@ def get_rpn_anchor_input(im, boxes, klass, is_crowd):
 def read_and_augment_images(ds):
    def mapf(dp):
        fname = dp[0]
-        im = cv2.imread(fname, cv2.IMREAD_COLOR).astype('float32')
-        assert im is not None, dp[0]
-        dp[0] = im
+        im = cv2.imread(fname, cv2.IMREAD_COLOR)
+        assert im is not None, fname
+        dp[0] = im.astype('float32')

        # assume floatbox as input
        assert dp[1].dtype == np.float32

--- a/examples/FasterRCNN/train.py
+++ b/examples/FasterRCNN/train.py
@@ -288,19 +288,19 @@ if __name__ == '__main__':
                # linear warmup
                ScheduledHyperParamSetter(
                    'learning_rate',
-                    [(0, 0.003), (warmup_epoch * factor, 0.01)], interp='linear'),
+                    [(0, 3e-3), (warmup_epoch * factor, 1e-2)], interp='linear'),
                # step decay
                ScheduledHyperParamSetter(
                    'learning_rate',
-                    [(warmup_epoch * factor, 0.01),
-                     (120000 * factor // stepnum, 1e-3),
-                     (180000 * factor // stepnum, 1e-4)]),
+                    [(warmup_epoch * factor, 1e-2),
+                     (150000 * factor // stepnum, 1e-3),
+                     (210000 * factor // stepnum, 1e-4)]),
                HumanHyperParamSetter('learning_rate'),
                EvalCallback(),
                GPUUtilizationTracker(),
            ],
            steps_per_epoch=stepnum,
-            max_epoch=205000 * factor // stepnum,
+            max_epoch=230000 * factor // stepnum,
            session_init=get_model_loader(args.load) if args.load else None,
        )
        trainer = SyncMultiGPUTrainerReplicated(get_nr_gpu())

--- a/tensorpack/callbacks/monitor.py
+++ b/tensorpack/callbacks/monitor.py
@@ -130,7 +130,8 @@ class Monitors(Callback):
            if val.WhichOneof('value') == 'simple_value':
                val.tag = re.sub('tower[0-9]+/', '', val.tag)   # TODO move to subclasses

-                # TODO This hack not needed any more, can remove this in the future
+                # TODO This hack is still needed, seem to disappear only when
+                # compiled from source.
                suffix = '-summary'  # tensorflow#6150, tensorboard#59
                if val.tag.endswith(suffix):
                    val.tag = val.tag[:-len(suffix)]

--- a/tensorpack/models/regularize.py
+++ b/tensorpack/models/regularize.py
@@ -41,6 +41,7 @@ def regularize_cost(regex, func, name='regularize_cost'):

            cost = cost + regularize_cost("fc.*/W", l2_regularizer(1e-5))
    """
+    assert len(regex)
    ctx = get_current_tower_context()
    if not ctx.is_training:
        # Currently cannot build the wd_cost correctly at inference,

--- a/tensorpack/predict/base.py
+++ b/tensorpack/predict/base.py
@@ -163,7 +163,8 @@ class OfflinePredictor(OnlinePredictor):
            input_tensors = get_tensors_by_names(config.input_names)
            output_tensors = get_tensors_by_names(config.output_names)

+            config.session_init._setup_graph()
            sess = config.session_creator.create_session()
-            config.session_init.init(sess)
+            config.session_init._run_init(sess)
            super(OfflinePredictor, self).__init__(
                input_tensors, output_tensors, config.return_input, sess)
--- a/tensorpack/tfutils/summary.py
+++ b/tensorpack/tfutils/summary.py
@@ -119,8 +119,8 @@ def add_tensor_summary(x, types, name=None, collections=None,
        return

    SUMMARY_TYPES_DIC = {
-        'scalar': lambda: tf.summary.scalar(name, x, collections=collections),
-        'histogram': lambda: tf.summary.histogram(name, x, collections=collections),
+        'scalar': lambda: tf.summary.scalar(name + '-summary', x, collections=collections),
+        'histogram': lambda: tf.summary.histogram(name + '-histogram', x, collections=collections),
        'sparsity': lambda: tf.summary.scalar(
            name + '-sparsity', tf.nn.zero_fraction(x),
            collections=collections),
@@ -246,7 +246,7 @@ def add_moving_summary(*args, **kwargs):
            ema_ops.append(ema_op)
        with tf.name_scope(None):
            # cannot add it into colocate group -- will force everything to cpus
-            tf.summary.scalar(name, ema_op)    # write the EMA value as a summary
+            tf.summary.scalar(name + '-summary', ema_op)    # write the EMA value as a summary
    if coll is not None:
        for op in ema_ops:
            # TODO a new collection to summary every step?