HED readme

174a62c0 · Yuxin Wu · 3c27064c · 174a62c0 · 174a62c0 · 174a62c0
Commit 174a62c0 authored Oct 14, 2016 by Yuxin Wu
8 changed files
--- a/.gitignore
+++ b/.gitignore
@@ -67,3 +67,9 @@ target/
 *.log*
 model-*
 .gitignore
+*.caffemodel
+*.png
+*.jpg
+checkpoint
+*.json
+*.prototxt
--- a/examples/HED/README.md
+++ b/examples/HED/README.md
 ## Holistically-Nested Edge Detection
-Reproduce the HED paper by Saining Xie and Zhuowen Tu. See [https://arxiv.org/abs/1504.06375](https://arxiv.org/abs/1504.06375).
+Reproduce the HED paper by Saining. See [https://arxiv.org/abs/1504.06375](https://arxiv.org/abs/1504.06375).
+![HED](demo.jpg)
+(Bottom-left: raw fused heatmap; Middle and right column: raw heatmaps at different stages)
+HED is a fully-convolutional architecture. This code generally would also work
+for other FCN tasks such as semantic segmentation and detection.
+## Usage
+This script only needs the original BSDS dataset and applies augmentation on the fly.
+It will automatically download the dataset to `$TENSORPACK_DATASET/` if not there.
+It requires pretrained vgg16 model. See the docs in [examples/load-vgg16.py](../load-vgg16.py)
+for instructions to convert from vgg16 caffe model.
+To view augmented training images:
+```
+./hed.py --view
+```
+To start training:
+```
+./hed.py --load vgg16.npy
+```
+To inference (produce a heatmap at each level at out*.png):
+```
+./hed.py --load pretrained.model --run a.jpg
+```
+To view the loss curve:
+```
+cat train_log/hed/stat.json | jq '.[] |
+[.xentropy1,.xentropy2,.xentropy3,.xentropy4,.xentropy5,.xentropy6] |
+map(tostring) | join("\t") | .' -r | \
+				../../scripts/plot-point.py --legend 1,2,3,4,5,final --decay 0.8
+```
--- a/examples/HED/demo.jpg
+++ b/examples/HED/demo.jpg
--- a/examples/HED/hed.py
+++ b/examples/HED/hed.py
@@ -14,35 +14,6 @@ from tensorpack import *
 from tensorpack.tfutils.symbolic_functions import *
 from tensorpack.tfutils.summary import *
-"""
-Script to reproduce 'Holistically-Nested Edge Detection' by Saining, et al. See https://arxiv.org/abs/1504.06375.
-HED is a fully-convolutional architecture. This code generally would also work
-for other FCN tasks such as semantic segmentation and detection.
-Usage:
-    This script only needs the original BSDS dataset and applies augmentation on the fly.
-    It will automatically download the dataset to $TENSORPACK_DATASET/ if not there.
-    It requires pretrained vgg16 model. See the docs in `examples/load-vgg16.py`
-    for instructions to convert from vgg16 caffe model.
-    To view augmented images:
-    ./hed.py --view
-    To start training:
-    ./hed.py --load vgg16.npy
-    To inference (produce heatmap at each level):
-    ./hed.py --load pretrained.model --run a.jpg
-    To view the loss curve:
-    cat train_log/hed/stat.json | jq '.[] |
-    [.xentropy1,.xentropy2,.xentropy3,.xentropy4,.xentropy5,.xentropy6] |
-    map(tostring) | join("\t") | .' -r | \
-            ../../scripts/plot-point.py --legend 1,2,3,4,5,final --decay 0.8
-"""
 class Model(ModelDesc):
    def _get_input_vars(self):
        return [InputVar(tf.float32, [None, None, None] + [3], 'image'),
@@ -58,8 +29,6 @@ class Model(ModelDesc):
                        use_bias=True,
                        W_init=tf.zeros_initializer,
                        b_init=tf.zeros_initializer)
-                #if up != 1:
-                    #l = BilinearUpSample('upsample', l, up)
                while up != 1:
                    l = BilinearUpSample('upsample{}'.format(up), l, 2)
                    up = up / 2
@@ -98,8 +67,6 @@ class Model(ModelDesc):
                W_init=tf.constant_initializer(0.2),
                use_bias=False, nl=tf.identity)
        final_map = tf.squeeze(final_map, [3], name='predmap')
-        #final_map = tf.squeeze(tf.mul(0.2, b1 + b2 + b3 + b4 + b5),
-                #[3], name='predmap')
        costs = []
        for idx, b in enumerate([b1, b2, b3, b4, b5, final_map]):
            output = tf.nn.sigmoid(b, name='output{}'.format(idx+1))
@@ -108,6 +75,7 @@ class Model(ModelDesc):
                name='xentropy{}'.format(idx+1))
            costs.append(xentropy)
+        # some magic threshold
        pred = tf.cast(tf.greater(output, 0.5), tf.int32, name='prediction')
        wrong = tf.cast(tf.not_equal(pred, edgemap), tf.float32)
        wrong = tf.reduce_mean(wrong, name='train_error')
@@ -122,8 +90,7 @@ class Model(ModelDesc):
        self.cost = tf.add_n(costs, name='cost')
    def get_gradient_processor(self):
-        return [ScaleGradient([
+        return [ScaleGradient([('convfcweight.*', 0.1), ('conv5_.*', 5)]) ]
-            ('convfcweight.*', 0.1), ('conv5_.*', 5) ]) ]
 def get_data(name):
    isTrain = name == 'train'
@@ -201,18 +168,17 @@ def get_config():
    return TrainConfig(
        dataset=dataset_train,
        optimizer=tf.train.AdamOptimizer(lr, epsilon=1e-3),
-        #optimizer=tf.train.MomentumOptimizer(lr, 0.9),
        callbacks=Callbacks([
            StatPrinter(),
            ModelSaver(),
-            ScheduledHyperParamSetter('learning_rate', [(35, 6e-6), (50, 1e-6), (60, 8e-7)]),
+            ScheduledHyperParamSetter('learning_rate', [(30, 6e-6), (45, 1e-6), (60, 8e-7)]),
            HumanHyperParamSetter('learning_rate'),
            InferenceRunner(dataset_val,
                            BinaryClassificationStats('prediction', 'edgemap'))
        ]),
        model=Model(),
        step_per_epoch=step_per_epoch,
-        max_epoch=300,
+        max_epoch=100,
    )
 def run(model_path, image_path):
@@ -224,7 +190,7 @@ def run(model_path, image_path):
    predict_func = get_predict_func(pred_config)
    im = cv2.imread(image_path)
    assert im is not None
-    im = cv2.resize(im, (im.shape[0] // 16 * 16, im.shape[1] // 16 * 16))
+    im = cv2.resize(im, (im.shape[1] // 16 * 16, im.shape[0] // 16 * 16))
    outputs = predict_func([[im.astype('float32')]])
    for k in range(6):
        pred = outputs[k][0]
@@ -238,15 +204,14 @@ if __name__ == '__main__':
    parser.add_argument('--view', help='view dataset', action='store_true')
    parser.add_argument('--run', help='run model on images')
    args = parser.parse_args()
+    if args.gpu:
+        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    if args.view:
        view_data()
    elif args.run:
        run(args.load, args.run)
    else:
-        if args.gpu:
-            os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
        config = get_config()
        if args.load:
            config.session_init = get_model_loader(args.load)

--- a/examples/ResNet/cifar10-resnet.py
+++ b/examples/ResNet/cifar10-resnet.py
@@ -41,11 +41,6 @@ class Model(ModelDesc):
        image, label = input_vars
        image = image / 128.0 - 1
-        def conv(name, l, channel, stride):
-            return Conv2D(name, l, channel, 3, stride=stride,
-                          nl=tf.identity, use_bias=False,
-                          W_init=tf.random_normal_initializer(stddev=np.sqrt(2.0/9/channel)))
        def residual(name, l, increase_dim=False, first=False):
            shape = l.get_shape().as_list()
            in_channel = shape[3]
@@ -63,10 +58,10 @@ class Model(ModelDesc):
                    b1 = tf.nn.relu(b1)
                else:
                    b1 = l
-                c1 = conv('conv1', b1, out_channel, stride1)
+                c1 = Conv2D('conv1', b1, out_channel, stride=stride1)
                b2 = BatchNorm('bn2', c1)
                b2 = tf.nn.relu(b2)
-                c2 = conv('conv2', b2, out_channel, 1)
+                c2 = Conv2D('conv2', b2, out_channel)
                if increase_dim:
                    l = AvgPooling('pool', l, 2)
@@ -75,26 +70,29 @@ class Model(ModelDesc):
                l = c2 + l
                return l
-        l = conv('conv0', image, 16, 1)
+        with argscope(Conv2D, nl=tf.identity, use_bias=False, kernel=3,
-        l = BatchNorm('bn0', l)
+                    W_init=variance_scaling_initializer(mode='FAN_OUT')):
-        l = tf.nn.relu(l)
+            l = Conv2D('conv0', image, 16)
-        l = residual('res1.0', l, first=True)
+            l = BatchNorm('bn0', l)
-        for k in range(1, self.n):
+            l = tf.nn.relu(l)
-            l = residual('res1.{}'.format(k), l)
+            l = residual('res1.0', l, first=True)
-        # 32,c=16
+            for k in range(1, self.n):
+                l = residual('res1.{}'.format(k), l)
-        l = residual('res2.0', l, increase_dim=True)
+            # 32,c=16
-        for k in range(1, self.n):
-            l = residual('res2.{}'.format(k), l)
+            l = residual('res2.0', l, increase_dim=True)
-        # 16,c=32
+            for k in range(1, self.n):
+                l = residual('res2.{}'.format(k), l)
-        l = residual('res3.0', l, increase_dim=True)
+            # 16,c=32
-        for k in range(1, self.n):
-            l = residual('res3.' + str(k), l)
+            l = residual('res3.0', l, increase_dim=True)
-        l = BatchNorm('bnlast', l)
+            for k in range(1, self.n):
-        l = tf.nn.relu(l)
+                l = residual('res3.' + str(k), l)
-        # 8,c=64
+            l = BatchNorm('bnlast', l)
-        l = GlobalAvgPooling('gap', l)
+            l = tf.nn.relu(l)
+            # 8,c=64
+            l = GlobalAvgPooling('gap', l)
        logits = FullyConnected('linear', l, out_dim=10, nl=tf.identity)
        prob = tf.nn.softmax(logits, name='output')

--- a/tensorpack/callbacks/stat.py
+++ b/tensorpack/callbacks/stat.py
@@ -27,7 +27,7 @@ class StatHolder(object):
        self.log_dir = log_dir
        self.filename = os.path.join(log_dir, 'stat.json')
        if os.path.isfile(self.filename):
-            logger.info("Loading stats from {}...".format(self.filename))
+            logger.info("Found stats at {}, will append to it.".format(self.filename))
            with open(self.filename) as f:
                self.stat_history = json.load(f)
        else:

--- a/tensorpack/tfutils/sessinit.py
+++ b/tensorpack/tfutils/sessinit.py
@@ -183,6 +183,10 @@ class ChainInit(SessionInit):
 def get_model_loader(filename):
+    """
+    Get a corresponding model loader by looking at the file name
+    :return: either a ParamRestore or SaverRestore
+    """
    if filename.endswith('.npy'):
        return ParamRestore(np.load(filename, encoding='latin1').item())
    else:

--- a/tensorpack/train/multigpu.py
+++ b/tensorpack/train/multigpu.py
@@ -31,6 +31,9 @@ class MultiGPUTrainer(QueueInputTrainer):
        with tf.name_scope('AvgGrad'):
            for grad_and_vars in zip(*tower_grads):
                v = grad_and_vars[0][1]
+                for x in grad_and_vars:
+                    assert x[0] is not None, \
+                            "Gradient w.r.t {} is None!".format(v.name)
                try:
                    grad = tf.add_n([x[0] for x in grad_and_vars]) / float(len(tower_grads))
                except: