update and fix grad bug

a41f9e83 · Yuxin Wu · a4371695 · a41f9e83 · a41f9e83 · a41f9e83
Commit a41f9e83 authored Apr 08, 2016 by Yuxin Wu
12 changed files
--- a/examples/cifar10_convnet.py
+++ b/examples/cifar10_convnet.py
@@ -67,8 +67,7 @@ class Model(ModelDesc):
        l = FullyConnected('fc1', l, out_dim=512,
                           b_init=tf.constant_initializer(0.1))
        # fc will have activation summary by default. disable for the output layer
-        logits = FullyConnected('linear', l, out_dim=10, summary_activation=False,
-                                nl=tf.identity)
+        logits = FullyConnected('linear', l, out_dim=10, nl=tf.identity)
        prob = tf.nn.softmax(logits, name='output')

        y = one_hot(label, 10)

--- a/examples/cifar10_resnet.py
+++ b/examples/cifar10_resnet.py
@@ -102,8 +102,7 @@ class Model(ModelDesc):
        l = tf.nn.relu(l)
        # 8,c=64
        l = GlobalAvgPooling('gap', l)
-        logits = FullyConnected('linear', l, out_dim=10, summary_activation=False,
-                                nl=tf.identity)
+        logits = FullyConnected('linear', l, out_dim=10, nl=tf.identity)
        prob = tf.nn.softmax(logits, name='output')

        y = one_hot(label, 10)

--- a/examples/load_alexnet.py
+++ b/examples/load_alexnet.py
@@ -56,7 +56,7 @@ class Model(ModelDesc):
        l = FullyConnected('fc6', l, 4096)
        l = FullyConnected('fc7', l, out_dim=4096)
        # fc will have activation summary by default. disable this for the output layer
-        logits = FullyConnected('fc8', l, out_dim=1000, summary_activation=False, nl=tf.identity)
+        logits = FullyConnected('fc8', l, out_dim=1000, nl=tf.identity)
        prob = tf.nn.softmax(logits, name='output')

        y = one_hot(label, 1000)

--- a/examples/load_vgg16.py
+++ b/examples/load_vgg16.py
@@ -64,7 +64,7 @@ class Model(ModelDesc):
        l = tf.nn.dropout(l, keep_prob)
        l = FullyConnected('fc7', l, 4096)
        l = tf.nn.dropout(l, keep_prob)
-        logits = FullyConnected('fc8', l, out_dim=1000, summary_activation=False, nl=tf.identity)
+        logits = FullyConnected('fc8', l, out_dim=1000, nl=tf.identity)
        prob = tf.nn.softmax(logits, name='output')

        y = one_hot(label, 1000)

--- a/examples/mnist_convnet.py
+++ b/examples/mnist_convnet.py
@@ -54,8 +54,7 @@ class Model(ModelDesc):
        l = tf.nn.dropout(l, keep_prob)

        # fc will have activation summary by default. disable this for the output layer
-        logits = FullyConnected('fc1', l, out_dim=10,
-                             summary_activation=False, nl=tf.identity)
+        logits = FullyConnected('fc1', l, out_dim=10, nl=tf.identity)
        prob = tf.nn.softmax(logits, name='prob')

        y = one_hot(label, 10)

--- a/examples/svhn_digit_convnet.py
+++ b/examples/svhn_digit_convnet.py
@@ -45,8 +45,7 @@ class Model(ModelDesc):
        l = FullyConnected('fc0', l, 512,
                           b_init=tf.constant_initializer(0.1))
        # fc will have activation summary by default. disable for the output layer
-        logits = FullyConnected('linear', l, out_dim=10, summary_activation=False,
-                                nl=tf.identity)
+        logits = FullyConnected('linear', l, out_dim=10, nl=tf.identity)
        prob = tf.nn.softmax(logits, name='output')

        y = one_hot(label, 10)

--- a/tensorpack/dataflow/dataset/ilsvrc.py
+++ b/tensorpack/dataflow/dataset/ilsvrc.py
@@ -11,12 +11,15 @@ from ...utils import logger, get_rng
 from ..base import DataFlow
 from ...utils.fs import mkdir_p, download

-__all__ = ['ILSVRCMeta']
+__all__ = ['ILSVRCMeta', 'ILSVRC12']

 CAFFE_ILSVRC12_URL = "http://dl.caffe.berkeleyvision.org/caffe_ilsvrc12.tar.gz"
 CAFFE_PROTO_URL = "https://github.com/BVLC/caffe/raw/master/src/caffe/proto/caffe.proto"

 class ILSVRCMeta(object):
+    """
+    Provide metadata for ILSVRC dataset.
+    """
    def __init__(self, dir=None):
        if dir is None:
            dir = os.path.join(os.path.dirname(__file__), 'ilsvrc_metadata')
@@ -24,15 +27,18 @@ class ILSVRCMeta(object):
        mkdir_p(self.dir)
        self.caffe_pb_file = os.path.join(self.dir, 'caffe_pb2.py')
        if not os.path.isfile(self.caffe_pb_file):
-            self.download_caffe_meta()
+            self._download_caffe_meta()

    def get_synset_words_1000(self):
+        """
+        :returns a dict of {cls_number: cls_name}
+        """
        fname = os.path.join(self.dir, 'synset_words.txt')
        assert os.path.isfile(fname)
        lines = [x.strip() for x in open(fname).readlines()]
        return dict(enumerate(lines))

-    def download_caffe_meta(self):
+    def _download_caffe_meta(self):
        fpath = download(CAFFE_ILSVRC12_URL, self.dir)
        tarfile.open(fpath, 'r:gz').extractall(self.dir)

@@ -41,6 +47,10 @@ class ILSVRCMeta(object):
        assert ret == 0, "caffe proto compilation failed!"

    def get_image_list(self, name):
+        """
+        :param name: 'train' or 'val' or 'test'
+        :returns list of image filenames
+        """
        assert name in ['train', 'val', 'test']
        fname = os.path.join(self.dir, name + '.txt')
        assert os.path.isfile(fname)
@@ -51,10 +61,9 @@ class ILSVRCMeta(object):
                ret.append((name, int(cls)))
            return ret

-    def load_mean(self):
+    def get_per_pixel_mean(self):
        """
-        return per-pixel mean as an array of shape
-         (3, 256, 256) in range [0, 255]
+        :returns per-pixel mean as an array of shape (3, 256, 256) in range [0, 255]
        """
        import imp
        caffepb = imp.load_source('caffepb', self.caffe_pb_file)
@@ -83,9 +92,15 @@ class ILSVRC12(DataFlow):
        return len(self.imglist)

    def reset_state(self):
+        """
+        reset rng for shuffle
+        """
        self.rng = get_rng(self)

    def get_data(self):
+        """
+        Produce original images or shape [h, w, 3], and label
+        """
        idxs = np.arange(len(self.imglist))
        if self.shuffle:
            self.rng.shuffle(idxs)
@@ -99,7 +114,7 @@ class ILSVRC12(DataFlow):

 if __name__ == '__main__':
    meta = ILSVRCMeta()
-    print meta.load_mean()
+    print meta.get_per_pixel_mean()
    #print(meta.get_synset_words_1000())

    #ds = ILSVRC12('/home/wyx/data/imagenet', 'val')

--- a/tensorpack/dataflow/imgaug/deform.py
+++ b/tensorpack/dataflow/imgaug/deform.py
@@ -3,6 +3,7 @@
 # Author: Yuxin Wu <ppwwyyxx@gmail.com>

 from .base import ImageAugmentor
+from ...utils import logger
 import numpy as np

 __all__ = ['GaussianDeform', 'GaussianMap']
@@ -63,6 +64,7 @@ class GaussianDeform(ImageAugmentor):
        :param sigma: sigma for Gaussian weight
        :param randrange: default to shape[0] / 8
        """
+        logger.warn("GaussianDeform is slow. Consider using it with 4 or more prefetching processes.")
        super(GaussianDeform, self).__init__()
        self.anchors = anchors
        self.K = len(self.anchors)

--- a/tensorpack/models/conv2d.py
+++ b/tensorpack/models/conv2d.py
@@ -10,7 +10,7 @@ from ._common import *

 __all__ = ['Conv2D']

-@layer_register(summary_activation=True)
+@layer_register()
 def Conv2D(x, out_channel, kernel_shape,
           padding='SAME', stride=1,
           W_init=None, b_init=None,

--- a/tensorpack/models/fc.py
+++ b/tensorpack/models/fc.py
@@ -11,7 +11,7 @@ from ..tfutils.symbolic_functions import *

 __all__ = ['FullyConnected']

-@layer_register(summary_activation=True)
+@layer_register()
 def FullyConnected(x, out_dim,
                   W_init=None, b_init=None,
                   nl=tf.nn.relu, use_bias=True):

--- a/tensorpack/tfutils/gradproc.py
+++ b/tensorpack/tfutils/gradproc.py
@@ -21,7 +21,7 @@ class GradientProcessor(object):
        :param grads: list of (grad, var)
        :returns: symbolic gradients with the same type as input
        """
-        self._process(grads)
+        return self._process(grads)

    @abstractmethod
    def _process(self, grads):

--- a/tensorpack/train/base.py
+++ b/tensorpack/train/base.py
@@ -57,7 +57,7 @@ class Trainer(object):
            logger.LOG_DIR, graph=self.sess.graph)
        self.summary_op = tf.merge_all_summaries()
        # create an empty StatHolder
-        self.stat_holder = StatHolder(logger.LOG_DIR, [])
+        self.stat_holder = StatHolder(logger.LOG_DIR)

    def _process_summary(self, summary_str):
        summary = tf.Summary.FromString(summary_str)