Use fewer `prediction_incorrect`.

3e30bda4 · Yuxin Wu · bf9da6d5 · 3e30bda4 · 3e30bda4 · 3e30bda4
Commit 3e30bda4 authored Nov 07, 2017 by Yuxin Wu
10 changed files
--- a/examples/DoReFa-Net/resnet-dorefa.py
+++ b/examples/DoReFa-Net/resnet-dorefa.py
@@ -14,6 +14,8 @@ from tensorpack.dataflow import dataset
 from tensorpack.tfutils.symbolic_functions import *
 from tensorpack.utils.stats import RatioCounter
 from tensorpack.tfutils.varreplace import remap_variables
+from imagenet_utils import ImageNetModel, eval_on_ILSVRC12, fbresnet_augmentor
 from dorefa import get_dorefa
 """
@@ -110,15 +112,11 @@ class Model(ModelDesc):
                      .tf.multiply(49)  # this is due to a bug in our model design
                      .FullyConnected('fct', 1000)())
        prob = tf.nn.softmax(logits, name='output')
-        wrong = prediction_incorrect(logits, label, 1, name='wrong-top1')
+        ImageNetModel.compute_loss_and_error(logits, label)
-        wrong = prediction_incorrect(logits, label, 5, name='wrong-top5')
 def get_inference_augmentor():
-    return imgaug.AugmentorList([
+    return fbresnet_augmentor(False)
-        imgaug.ResizeShortestEdge(256),
-        imgaug.CenterCrop(224),
-    ])
 def run_image(model, sess_init, inputs):
@@ -148,26 +146,6 @@ def run_image(model, sess_init, inputs):
        print(list(zip(names, prob[ret])))
-def eval_on_ILSVRC12(model_path, data_dir):
-    ds = dataset.ILSVRC12(data_dir, 'val', shuffle=False)
-    ds = AugmentImageComponent(ds, get_inference_augmentor())
-    ds = BatchData(ds, 192, remainder=True)
-    pred_config = PredictConfig(
-        model=Model(),
-        session_init=get_model_loader(model_path),
-        input_names=['input', 'label'],
-        output_names=['wrong-top1', 'wrong-top5']
-    )
-    pred = SimpleDatasetPredictor(pred_config, ds)
-    acc1, acc5 = RatioCounter(), RatioCounter()
-    for o in pred.get_result():
-        batch_size = o[0].shape[0]
-        acc1.feed(o[0].sum(), batch_size)
-        acc5.feed(o[1].sum(), batch_size)
-    print("Top1 Error: {}".format(acc1.ratio))
-    print("Top5 Error: {}".format(acc5.ratio))
 if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--gpu', help='the physical ids of GPUs to use')
@@ -187,7 +165,10 @@ if __name__ == '__main__':
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    if args.eval:
-        eval_on_ILSVRC12(args.load, args.data)
+        ds = dataset.ILSVRC12(args.data, 'val', shuffle=False)
+        ds = AugmentImageComponent(ds, get_inference_augmentor())
+        ds = BatchData(ds, 192, remainder=True)
+        eval_on_ILSVRC12(Model(), get_model_loader(args.load), ds)
    elif args.run:
        assert args.load.endswith('.npy')
        run_image(Model(), DictRestore(

--- a/examples/ResNet/cifar10-resnet.py
+++ b/examples/ResNet/cifar10-resnet.py
@@ -102,7 +102,7 @@ class Model(ModelDesc):
        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label)
        cost = tf.reduce_mean(cost, name='cross_entropy_loss')
-        wrong = prediction_incorrect(logits, label)
+        wrong = tf.to_float(tf.logical_not(tf.nn.in_top_k(logits, label, 1)), name='wrong_vector')
        # monitor training error
        add_moving_summary(tf.reduce_mean(wrong, name='train_error'))
@@ -167,7 +167,7 @@ if __name__ == '__main__':
        callbacks=[
            ModelSaver(),
            InferenceRunner(dataset_test,
-                            [ScalarStats('cost'), ClassificationError()]),
+                            [ScalarStats('cost'), ClassificationError('wrong_vector')]),
            ScheduledHyperParamSetter('learning_rate',
                                      [(1, 0.1), (82, 0.01), (123, 0.001), (300, 0.0002)])
        ],

--- a/examples/ResNet/imagenet_utils.py
+++ b/examples/ResNet/imagenet_utils.py
@@ -157,7 +157,7 @@ class ImageNetModel(ModelDesc):
            image = tf.transpose(image, [0, 3, 1, 2])
        logits = self.get_logits(image)
-        loss = self.compute_loss_and_error(logits, label)
+        loss = ImageNetModel.compute_loss_and_error(logits, label)
        wd_loss = regularize_cost('.*/W', tf.contrib.layers.l2_regularizer(self.weight_decay),
                                  name='l2_regularize_loss')
        add_moving_summary(loss, wd_loss)
@@ -194,7 +194,8 @@ class ImageNetModel(ModelDesc):
            image = (image - image_mean) / image_std
            return image
-    def compute_loss_and_error(self, logits, label):
+    @staticmethod
+    def compute_loss_and_error(logits, label):
        loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label)
        loss = tf.reduce_mean(loss, name='xentropy-loss')

--- a/examples/ResNet/load-resnet.py
+++ b/examples/ResNet/load-resnet.py
@@ -18,11 +18,10 @@ from tensorflow.contrib.layers import variance_scaling_initializer
 from tensorpack import *
 from tensorpack.utils import logger
 from tensorpack.utils.stats import RatioCounter
-from tensorpack.tfutils.symbolic_functions import *
 from tensorpack.tfutils.summary import *
 from tensorpack.dataflow.dataset import ILSVRCMeta, ILSVRC12
-from imagenet_utils import eval_on_ILSVRC12, get_imagenet_dataflow
+from imagenet_utils import eval_on_ILSVRC12, get_imagenet_dataflow, ImageNetModel
 from resnet_model import resnet_group, resnet_bottleneck
 DEPTH = None
@@ -62,8 +61,7 @@ class Model(ModelDesc):
                      .GlobalAvgPooling('gap')
                      .FullyConnected('linear', 1000, nl=tf.identity)())
        prob = tf.nn.softmax(logits, name='prob')
-        prediction_incorrect(logits, label, name='wrong-top1')
+        ImageNetModel.compute_loss_and_error(logits, label)
-        prediction_incorrect(logits, label, 5, name='wrong-top5')
 def get_inference_augmentor():

--- a/examples/SpatialTransformer/mnist-addition.py
+++ b/examples/SpatialTransformer/mnist-addition.py
@@ -14,7 +14,6 @@ os.environ['TENSORPACK_TRAIN_API'] = 'v2'   # will become default soon
 from tensorpack import *
 from tensorpack.dataflow import dataset
 from tensorpack.tfutils import sesscreate, optimizer, summary
-import tensorpack.tfutils.symbolic_functions as symbf
 IMAGE_SIZE = 42
 WARP_TARGET_SIZE = 28
@@ -81,7 +80,7 @@ class Model(ModelDesc):
        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label)
        cost = tf.reduce_mean(cost, name='cross_entropy_loss')
-        wrong = symbf.prediction_incorrect(logits, label)
+        wrong = tf.to_float(tf.logical_not(tf.nn.in_top_k(logits, label, 1)), name='incorrect_vector')
        summary.add_moving_summary(tf.reduce_mean(wrong, name='train_error'))
        wd_cost = tf.multiply(1e-5, regularize_cost('fc.*/W', tf.nn.l2_loss),

--- a/examples/svhn-digit-convnet.py
+++ b/examples/svhn-digit-convnet.py
@@ -47,10 +47,8 @@ class Model(ModelDesc):
                      .FullyConnected('linear', out_dim=10, nl=tf.identity)())
        prob = tf.nn.softmax(logits, name='output')
-        # compute the number of failed samples, for ClassificationError to use at test time
+        acc = tf.to_float(tf.nn.in_top_k(logits, label, 1))
-        wrong = prediction_incorrect(logits, label)
+        add_moving_summary(tf.reduce_mean(accuracy, name='accuracy'))
-        # monitor training error
-        add_moving_summary(tf.reduce_mean(wrong, name='train_error'))
        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label)
        cost = tf.reduce_mean(cost, name='cross_entropy_loss')
@@ -95,21 +93,6 @@ def get_data():
    return data_train, data_test
-def get_config():
-    data_train, data_test = get_data()
-    return TrainConfig(
-        model=Model(),
-        data=QueueInput(data_train),
-        callbacks=[
-            ModelSaver(),
-            InferenceRunner(data_test,
-                            [ScalarStats('cost'), ClassificationError()])
-        ],
-        max_epoch=350,
-    )
 if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--gpu', help='comma separated list of GPU(s) to use.')
@@ -118,12 +101,19 @@ if __name__ == '__main__':
    if args.gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
-    else:
-        os.environ['CUDA_VISIBLE_DEVICES'] = '0'
    logger.auto_set_dir()
-    with tf.Graph().as_default():
+    data_train, data_test = get_data()
-        config = get_config()
-        if args.load:
+    config = TrainConfig(
-            config.session_init = SaverRestore(args.load)
+        model=Model(),
-        launch_train_with_config(config, SimpleTrainer())
+        data=QueueInput(data_train),
+        callbacks=[
+            ModelSaver(),
+            InferenceRunner(data_test,
+                            ScalarStats(['cost', 'accuracy']))
+        ],
+        max_epoch=350,
+        session_init=SaverRestore(args.load) if args.load else None
+    )
+    launch_train_with_config(config, SimpleTrainer())
--- a/tensorpack/callbacks/inference.py
+++ b/tensorpack/callbacks/inference.py
@@ -133,7 +133,7 @@ class ScalarStats(Inferencer):
 class ClassificationError(Inferencer):
    """
-    Compute classification error in batch mode, from a ``wrong`` tensor.
+    Compute __true__ classification error in batch mode, from a ``wrong`` tensor.
    The ``wrong`` tensor is supposed to be an binary vector containing
    whether each sample in the batch is *incorrectly* classified.
@@ -145,14 +145,14 @@ class ClassificationError(Inferencer):
    testing (because the size of test set might not be a multiple of batch size).
    Therefore the result can be different from averaging the error rate of each batch.
-    You can also use the "correct prediction" tensor, so this inferencer will
+    You can also use the "correct prediction" tensor, then this inferencer will
    give you "classification accuracy" instead of error.
    """
    def __init__(self, wrong_tensor_name='incorrect_vector', summary_name='validation_error'):
        """
        Args:
-            wrong_tensor_name(str): name of the ``wrong`` tensor.
+            wrong_tensor_name(str): name of the ``wrong`` binary vector tensor.
            summary_name(str): the name to log the error with.
        """
        self.wrong_tensor_name = wrong_tensor_name

--- a/tensorpack/tfutils/distributed.py
+++ b/tensorpack/tfutils/distributed.py
@@ -36,6 +36,9 @@ def get_distributed_session_creator(server):
            if is_chief:
                return sm.prepare_session(master=server.target, init_op=init_op)
            else:
-                return sm.wait_for_session(master=server.target)
+                tf.logging.set_verbosity(tf.logging.INFO)   # print message about uninitialized vars
+                ret = sm.wait_for_session(master=server.target)
+                tf.logging.set_verbosity(tf.logging.WARN)
+                return ret
    return _Creator()
--- a/tensorpack/tfutils/tower.py
+++ b/tensorpack/tfutils/tower.py
@@ -238,11 +238,19 @@ class TowerTensorHandles(object):
    def training(self):
        """
        Returns:
-            Still a :class:`TowerTensorHandles`, containing only the training towers.
+            A :class:`TowerTensorHandles`, containing only the training towers.
        """
        handles = [h for h in self._handles if h.is_training]
        return TowerTensorHandles(handles)
+    def inference(self):
+        """
+        Returns:
+            A :class:`TowerTensorHandles`, containing only the inference towers.
+        """
+        handles = [h for h in self._handles if not h.is_training]
+        return TowerTensorHandles(handles)
 class TowerTensorHandle(object):
    """

--- a/tensorpack/train/tower.py
+++ b/tensorpack/train/tower.py
@@ -50,6 +50,15 @@ class TowerTrainer(Trainer):
        """
        return self.tower_func.inputs_desc
+    @property
+    def towers(self):
+        """
+        Returns:
+            a :class:`TowerTensorHandles` object, to
+            access the tower handles by either indices or names.
+        """
+        return self.tower_func.towers
    def get_predictor(self, input_names, output_names, device=0):
        """
        Returns a callable predictor built under ``TowerContext(is_training=False)``.