use uint8 in resnet example

c68686e6 · Yuxin Wu · 1a5d3f4f · c68686e6 · c68686e6 · c68686e6
Commit c68686e6 authored Feb 10, 2017 by Yuxin Wu
4 changed files
--- a/examples/A3C-Gym/README.md
+++ b/examples/A3C-Gym/README.md
@@ -13,9 +13,11 @@ Most of them are the best reproducible results on gym.
 It should run at a speed of 6~10 iteration/s on 1 GPU plus 12+ CPU cores.
 Training with a significant slower speed (e.g. on CPU) will result in very bad score,
 probably because of async issues.
 The pre-trained models are all trained with 4 GPUs for about 2 days.
 But note that multi-GPU doesn't give you obvious speedup here,
-because the bottleneck is not computation but data.
+because the bottleneck is not computation but data. On machines without huge memory, you may also need to
+enable tcmalloc to keep training throughput more stable.
 Occasionally, processes may not get terminated completely, therefore it is suggested to use `systemd-run` to run any
 multiprocess Python program to get a cgroup dedicated for the task.

--- a/examples/Inception/inceptionv3.py
+++ b/examples/Inception/inceptionv3.py
@@ -22,10 +22,9 @@ See "Rethinking the Inception Architecture for Computer Vision", arxiv:1512.0056
 This config follows the official inceptionv3 setup
 (https://github.com/tensorflow/models/tree/master/inception/inception)
 with much much fewer lines of code.
-It reaches 74% single-crop validation accuracy,
+It reaches 74% single-crop validation accuracy, similar to the official code.
-and has the same running speed as the official code.
 The hyperparameters here are for 8 GPUs, so the effective batch size is 8*64 = 512.
-With 8 TitanX it runs about 0.45 it/s.
 """
 TOTAL_BATCH_SIZE = 512

--- a/examples/ResNet/imagenet-resnet.py
+++ b/examples/ResNet/imagenet-resnet.py
@@ -29,11 +29,16 @@ DEPTH = None
 class Model(ModelDesc):
    def _get_inputs(self):
-        return [InputVar(tf.float32, [None, INPUT_SHAPE, INPUT_SHAPE, 3], 'input'),
+        return [InputVar(tf.uint8, [None, INPUT_SHAPE, INPUT_SHAPE, 3], 'input'),
                InputVar(tf.int32, [None], 'label')]
    def _build_graph(self, inputs):
        image, label = inputs
+        image = tf.cast(image, tf.float32) * (1.0 / 255)
+        image_mean = tf.constant([0.485, 0.456, 0.406], dtype=tf.float32)
+        image_std = tf.constant([0.229, 0.224, 0.225], dtype=tf.float32)
+        image = (image - image_mean) / image_std
        def shortcut(l, n_in, n_out, stride):
            if n_in != n_out:
@@ -121,9 +126,6 @@ def get_data(train_or_test):
    datadir = args.data
    ds = dataset.ILSVRC12(datadir, train_or_test,
                          shuffle=True if isTrain else False, dir_structure='original')
-    image_mean = np.array([0.485, 0.456, 0.406], dtype='float32')
-    image_std = np.array([0.229, 0.224, 0.225], dtype='float32')
    if isTrain:
        class Resize(imgaug.ImageAugmentor):
            """
@@ -164,18 +166,18 @@ def get_data(train_or_test):
                                 )]),
            imgaug.Clip(),
            imgaug.Flip(horiz=True),
-            imgaug.MapImage(lambda x: (x * (1.0 / 255) - image_mean) / image_std),
+            imgaug.ToUint8()
        ]
    else:
        augmentors = [
            imgaug.ResizeShortestEdge(256),
            imgaug.CenterCrop((224, 224)),
-            imgaug.MapImage(lambda x: (x * (1.0 / 255) - image_mean) / image_std),
+            imgaug.ToUint8()
        ]
    ds = AugmentImageComponent(ds, augmentors)
-    ds = BatchData(ds, BATCH_SIZE, remainder=not isTrain)
    if isTrain:
        ds = PrefetchDataZMQ(ds, min(20, multiprocessing.cpu_count()))
+    ds = BatchData(ds, BATCH_SIZE, remainder=not isTrain)
    return ds

--- a/tensorpack/dataflow/common.py
+++ b/tensorpack/dataflow/common.py
@@ -38,7 +38,9 @@ class TestDataSpeed(ProxyDataFlow):
        Start testing with a progress bar.
        """
        self.ds.reset_state()
-        with get_tqdm(total=self.test_size, leave=True) as pbar:
+        # add smoothing for speed benchmark
+        with get_tqdm(total=self.test_size,
+                      leave=True, smoothing=0.2) as pbar:
            for idx, dp in enumerate(self.ds.get_data()):
                pbar.update()
                if idx == self.test_size - 1: