Commit 174a62c0 authored by Yuxin Wu's avatar Yuxin Wu

HED readme

parent 3c27064c
...@@ -67,3 +67,9 @@ target/ ...@@ -67,3 +67,9 @@ target/
*.log* *.log*
model-* model-*
.gitignore .gitignore
*.caffemodel
*.png
*.jpg
checkpoint
*.json
*.prototxt
## Holistically-Nested Edge Detection ## Holistically-Nested Edge Detection
Reproduce the HED paper by Saining Xie and Zhuowen Tu. See [https://arxiv.org/abs/1504.06375](https://arxiv.org/abs/1504.06375). Reproduce the HED paper by Saining. See [https://arxiv.org/abs/1504.06375](https://arxiv.org/abs/1504.06375).
![HED](demo.jpg)
(Bottom-left: raw fused heatmap; Middle and right column: raw heatmaps at different stages)
HED is a fully-convolutional architecture. This code generally would also work
for other FCN tasks such as semantic segmentation and detection.
## Usage
This script only needs the original BSDS dataset and applies augmentation on the fly.
It will automatically download the dataset to `$TENSORPACK_DATASET/` if not there.
It requires pretrained vgg16 model. See the docs in [examples/load-vgg16.py](../load-vgg16.py)
for instructions to convert from vgg16 caffe model.
To view augmented training images:
```
./hed.py --view
```
To start training:
```
./hed.py --load vgg16.npy
```
To inference (produce a heatmap at each level at out*.png):
```
./hed.py --load pretrained.model --run a.jpg
```
To view the loss curve:
```
cat train_log/hed/stat.json | jq '.[] |
[.xentropy1,.xentropy2,.xentropy3,.xentropy4,.xentropy5,.xentropy6] |
map(tostring) | join("\t") | .' -r | \
../../scripts/plot-point.py --legend 1,2,3,4,5,final --decay 0.8
```
...@@ -14,35 +14,6 @@ from tensorpack import * ...@@ -14,35 +14,6 @@ from tensorpack import *
from tensorpack.tfutils.symbolic_functions import * from tensorpack.tfutils.symbolic_functions import *
from tensorpack.tfutils.summary import * from tensorpack.tfutils.summary import *
"""
Script to reproduce 'Holistically-Nested Edge Detection' by Saining, et al. See https://arxiv.org/abs/1504.06375.
HED is a fully-convolutional architecture. This code generally would also work
for other FCN tasks such as semantic segmentation and detection.
Usage:
This script only needs the original BSDS dataset and applies augmentation on the fly.
It will automatically download the dataset to $TENSORPACK_DATASET/ if not there.
It requires pretrained vgg16 model. See the docs in `examples/load-vgg16.py`
for instructions to convert from vgg16 caffe model.
To view augmented images:
./hed.py --view
To start training:
./hed.py --load vgg16.npy
To inference (produce heatmap at each level):
./hed.py --load pretrained.model --run a.jpg
To view the loss curve:
cat train_log/hed/stat.json | jq '.[] |
[.xentropy1,.xentropy2,.xentropy3,.xentropy4,.xentropy5,.xentropy6] |
map(tostring) | join("\t") | .' -r | \
../../scripts/plot-point.py --legend 1,2,3,4,5,final --decay 0.8
"""
class Model(ModelDesc): class Model(ModelDesc):
def _get_input_vars(self): def _get_input_vars(self):
return [InputVar(tf.float32, [None, None, None] + [3], 'image'), return [InputVar(tf.float32, [None, None, None] + [3], 'image'),
...@@ -58,8 +29,6 @@ class Model(ModelDesc): ...@@ -58,8 +29,6 @@ class Model(ModelDesc):
use_bias=True, use_bias=True,
W_init=tf.zeros_initializer, W_init=tf.zeros_initializer,
b_init=tf.zeros_initializer) b_init=tf.zeros_initializer)
#if up != 1:
#l = BilinearUpSample('upsample', l, up)
while up != 1: while up != 1:
l = BilinearUpSample('upsample{}'.format(up), l, 2) l = BilinearUpSample('upsample{}'.format(up), l, 2)
up = up / 2 up = up / 2
...@@ -98,8 +67,6 @@ class Model(ModelDesc): ...@@ -98,8 +67,6 @@ class Model(ModelDesc):
W_init=tf.constant_initializer(0.2), W_init=tf.constant_initializer(0.2),
use_bias=False, nl=tf.identity) use_bias=False, nl=tf.identity)
final_map = tf.squeeze(final_map, [3], name='predmap') final_map = tf.squeeze(final_map, [3], name='predmap')
#final_map = tf.squeeze(tf.mul(0.2, b1 + b2 + b3 + b4 + b5),
#[3], name='predmap')
costs = [] costs = []
for idx, b in enumerate([b1, b2, b3, b4, b5, final_map]): for idx, b in enumerate([b1, b2, b3, b4, b5, final_map]):
output = tf.nn.sigmoid(b, name='output{}'.format(idx+1)) output = tf.nn.sigmoid(b, name='output{}'.format(idx+1))
...@@ -108,6 +75,7 @@ class Model(ModelDesc): ...@@ -108,6 +75,7 @@ class Model(ModelDesc):
name='xentropy{}'.format(idx+1)) name='xentropy{}'.format(idx+1))
costs.append(xentropy) costs.append(xentropy)
# some magic threshold
pred = tf.cast(tf.greater(output, 0.5), tf.int32, name='prediction') pred = tf.cast(tf.greater(output, 0.5), tf.int32, name='prediction')
wrong = tf.cast(tf.not_equal(pred, edgemap), tf.float32) wrong = tf.cast(tf.not_equal(pred, edgemap), tf.float32)
wrong = tf.reduce_mean(wrong, name='train_error') wrong = tf.reduce_mean(wrong, name='train_error')
...@@ -122,8 +90,7 @@ class Model(ModelDesc): ...@@ -122,8 +90,7 @@ class Model(ModelDesc):
self.cost = tf.add_n(costs, name='cost') self.cost = tf.add_n(costs, name='cost')
def get_gradient_processor(self): def get_gradient_processor(self):
return [ScaleGradient([ return [ScaleGradient([('convfcweight.*', 0.1), ('conv5_.*', 5)]) ]
('convfcweight.*', 0.1), ('conv5_.*', 5) ]) ]
def get_data(name): def get_data(name):
isTrain = name == 'train' isTrain = name == 'train'
...@@ -201,18 +168,17 @@ def get_config(): ...@@ -201,18 +168,17 @@ def get_config():
return TrainConfig( return TrainConfig(
dataset=dataset_train, dataset=dataset_train,
optimizer=tf.train.AdamOptimizer(lr, epsilon=1e-3), optimizer=tf.train.AdamOptimizer(lr, epsilon=1e-3),
#optimizer=tf.train.MomentumOptimizer(lr, 0.9),
callbacks=Callbacks([ callbacks=Callbacks([
StatPrinter(), StatPrinter(),
ModelSaver(), ModelSaver(),
ScheduledHyperParamSetter('learning_rate', [(35, 6e-6), (50, 1e-6), (60, 8e-7)]), ScheduledHyperParamSetter('learning_rate', [(30, 6e-6), (45, 1e-6), (60, 8e-7)]),
HumanHyperParamSetter('learning_rate'), HumanHyperParamSetter('learning_rate'),
InferenceRunner(dataset_val, InferenceRunner(dataset_val,
BinaryClassificationStats('prediction', 'edgemap')) BinaryClassificationStats('prediction', 'edgemap'))
]), ]),
model=Model(), model=Model(),
step_per_epoch=step_per_epoch, step_per_epoch=step_per_epoch,
max_epoch=300, max_epoch=100,
) )
def run(model_path, image_path): def run(model_path, image_path):
...@@ -224,7 +190,7 @@ def run(model_path, image_path): ...@@ -224,7 +190,7 @@ def run(model_path, image_path):
predict_func = get_predict_func(pred_config) predict_func = get_predict_func(pred_config)
im = cv2.imread(image_path) im = cv2.imread(image_path)
assert im is not None assert im is not None
im = cv2.resize(im, (im.shape[0] // 16 * 16, im.shape[1] // 16 * 16)) im = cv2.resize(im, (im.shape[1] // 16 * 16, im.shape[0] // 16 * 16))
outputs = predict_func([[im.astype('float32')]]) outputs = predict_func([[im.astype('float32')]])
for k in range(6): for k in range(6):
pred = outputs[k][0] pred = outputs[k][0]
...@@ -238,15 +204,14 @@ if __name__ == '__main__': ...@@ -238,15 +204,14 @@ if __name__ == '__main__':
parser.add_argument('--view', help='view dataset', action='store_true') parser.add_argument('--view', help='view dataset', action='store_true')
parser.add_argument('--run', help='run model on images') parser.add_argument('--run', help='run model on images')
args = parser.parse_args() args = parser.parse_args()
if args.gpu:
os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
if args.view: if args.view:
view_data() view_data()
elif args.run: elif args.run:
run(args.load, args.run) run(args.load, args.run)
else: else:
if args.gpu:
os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
config = get_config() config = get_config()
if args.load: if args.load:
config.session_init = get_model_loader(args.load) config.session_init = get_model_loader(args.load)
......
...@@ -41,11 +41,6 @@ class Model(ModelDesc): ...@@ -41,11 +41,6 @@ class Model(ModelDesc):
image, label = input_vars image, label = input_vars
image = image / 128.0 - 1 image = image / 128.0 - 1
def conv(name, l, channel, stride):
return Conv2D(name, l, channel, 3, stride=stride,
nl=tf.identity, use_bias=False,
W_init=tf.random_normal_initializer(stddev=np.sqrt(2.0/9/channel)))
def residual(name, l, increase_dim=False, first=False): def residual(name, l, increase_dim=False, first=False):
shape = l.get_shape().as_list() shape = l.get_shape().as_list()
in_channel = shape[3] in_channel = shape[3]
...@@ -63,10 +58,10 @@ class Model(ModelDesc): ...@@ -63,10 +58,10 @@ class Model(ModelDesc):
b1 = tf.nn.relu(b1) b1 = tf.nn.relu(b1)
else: else:
b1 = l b1 = l
c1 = conv('conv1', b1, out_channel, stride1) c1 = Conv2D('conv1', b1, out_channel, stride=stride1)
b2 = BatchNorm('bn2', c1) b2 = BatchNorm('bn2', c1)
b2 = tf.nn.relu(b2) b2 = tf.nn.relu(b2)
c2 = conv('conv2', b2, out_channel, 1) c2 = Conv2D('conv2', b2, out_channel)
if increase_dim: if increase_dim:
l = AvgPooling('pool', l, 2) l = AvgPooling('pool', l, 2)
...@@ -75,26 +70,29 @@ class Model(ModelDesc): ...@@ -75,26 +70,29 @@ class Model(ModelDesc):
l = c2 + l l = c2 + l
return l return l
l = conv('conv0', image, 16, 1) with argscope(Conv2D, nl=tf.identity, use_bias=False, kernel=3,
l = BatchNorm('bn0', l) W_init=variance_scaling_initializer(mode='FAN_OUT')):
l = tf.nn.relu(l) l = Conv2D('conv0', image, 16)
l = residual('res1.0', l, first=True) l = BatchNorm('bn0', l)
for k in range(1, self.n): l = tf.nn.relu(l)
l = residual('res1.{}'.format(k), l) l = residual('res1.0', l, first=True)
# 32,c=16 for k in range(1, self.n):
l = residual('res1.{}'.format(k), l)
l = residual('res2.0', l, increase_dim=True) # 32,c=16
for k in range(1, self.n):
l = residual('res2.{}'.format(k), l) l = residual('res2.0', l, increase_dim=True)
# 16,c=32 for k in range(1, self.n):
l = residual('res2.{}'.format(k), l)
l = residual('res3.0', l, increase_dim=True) # 16,c=32
for k in range(1, self.n):
l = residual('res3.' + str(k), l) l = residual('res3.0', l, increase_dim=True)
l = BatchNorm('bnlast', l) for k in range(1, self.n):
l = tf.nn.relu(l) l = residual('res3.' + str(k), l)
# 8,c=64 l = BatchNorm('bnlast', l)
l = GlobalAvgPooling('gap', l) l = tf.nn.relu(l)
# 8,c=64
l = GlobalAvgPooling('gap', l)
logits = FullyConnected('linear', l, out_dim=10, nl=tf.identity) logits = FullyConnected('linear', l, out_dim=10, nl=tf.identity)
prob = tf.nn.softmax(logits, name='output') prob = tf.nn.softmax(logits, name='output')
......
...@@ -27,7 +27,7 @@ class StatHolder(object): ...@@ -27,7 +27,7 @@ class StatHolder(object):
self.log_dir = log_dir self.log_dir = log_dir
self.filename = os.path.join(log_dir, 'stat.json') self.filename = os.path.join(log_dir, 'stat.json')
if os.path.isfile(self.filename): if os.path.isfile(self.filename):
logger.info("Loading stats from {}...".format(self.filename)) logger.info("Found stats at {}, will append to it.".format(self.filename))
with open(self.filename) as f: with open(self.filename) as f:
self.stat_history = json.load(f) self.stat_history = json.load(f)
else: else:
......
...@@ -183,6 +183,10 @@ class ChainInit(SessionInit): ...@@ -183,6 +183,10 @@ class ChainInit(SessionInit):
def get_model_loader(filename): def get_model_loader(filename):
"""
Get a corresponding model loader by looking at the file name
:return: either a ParamRestore or SaverRestore
"""
if filename.endswith('.npy'): if filename.endswith('.npy'):
return ParamRestore(np.load(filename, encoding='latin1').item()) return ParamRestore(np.load(filename, encoding='latin1').item())
else: else:
......
...@@ -31,6 +31,9 @@ class MultiGPUTrainer(QueueInputTrainer): ...@@ -31,6 +31,9 @@ class MultiGPUTrainer(QueueInputTrainer):
with tf.name_scope('AvgGrad'): with tf.name_scope('AvgGrad'):
for grad_and_vars in zip(*tower_grads): for grad_and_vars in zip(*tower_grads):
v = grad_and_vars[0][1] v = grad_and_vars[0][1]
for x in grad_and_vars:
assert x[0] is not None, \
"Gradient w.r.t {} is None!".format(v.name)
try: try:
grad = tf.add_n([x[0] for x in grad_and_vars]) / float(len(tower_grads)) grad = tf.add_n([x[0] for x in grad_and_vars]) / float(len(tower_grads))
except: except:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment