Commit a41f9e83 authored by Yuxin Wu's avatar Yuxin Wu

update and fix grad bug

parent a4371695
...@@ -67,8 +67,7 @@ class Model(ModelDesc): ...@@ -67,8 +67,7 @@ class Model(ModelDesc):
l = FullyConnected('fc1', l, out_dim=512, l = FullyConnected('fc1', l, out_dim=512,
b_init=tf.constant_initializer(0.1)) b_init=tf.constant_initializer(0.1))
# fc will have activation summary by default. disable for the output layer # fc will have activation summary by default. disable for the output layer
logits = FullyConnected('linear', l, out_dim=10, summary_activation=False, logits = FullyConnected('linear', l, out_dim=10, nl=tf.identity)
nl=tf.identity)
prob = tf.nn.softmax(logits, name='output') prob = tf.nn.softmax(logits, name='output')
y = one_hot(label, 10) y = one_hot(label, 10)
......
...@@ -102,8 +102,7 @@ class Model(ModelDesc): ...@@ -102,8 +102,7 @@ class Model(ModelDesc):
l = tf.nn.relu(l) l = tf.nn.relu(l)
# 8,c=64 # 8,c=64
l = GlobalAvgPooling('gap', l) l = GlobalAvgPooling('gap', l)
logits = FullyConnected('linear', l, out_dim=10, summary_activation=False, logits = FullyConnected('linear', l, out_dim=10, nl=tf.identity)
nl=tf.identity)
prob = tf.nn.softmax(logits, name='output') prob = tf.nn.softmax(logits, name='output')
y = one_hot(label, 10) y = one_hot(label, 10)
......
...@@ -56,7 +56,7 @@ class Model(ModelDesc): ...@@ -56,7 +56,7 @@ class Model(ModelDesc):
l = FullyConnected('fc6', l, 4096) l = FullyConnected('fc6', l, 4096)
l = FullyConnected('fc7', l, out_dim=4096) l = FullyConnected('fc7', l, out_dim=4096)
# fc will have activation summary by default. disable this for the output layer # fc will have activation summary by default. disable this for the output layer
logits = FullyConnected('fc8', l, out_dim=1000, summary_activation=False, nl=tf.identity) logits = FullyConnected('fc8', l, out_dim=1000, nl=tf.identity)
prob = tf.nn.softmax(logits, name='output') prob = tf.nn.softmax(logits, name='output')
y = one_hot(label, 1000) y = one_hot(label, 1000)
......
...@@ -64,7 +64,7 @@ class Model(ModelDesc): ...@@ -64,7 +64,7 @@ class Model(ModelDesc):
l = tf.nn.dropout(l, keep_prob) l = tf.nn.dropout(l, keep_prob)
l = FullyConnected('fc7', l, 4096) l = FullyConnected('fc7', l, 4096)
l = tf.nn.dropout(l, keep_prob) l = tf.nn.dropout(l, keep_prob)
logits = FullyConnected('fc8', l, out_dim=1000, summary_activation=False, nl=tf.identity) logits = FullyConnected('fc8', l, out_dim=1000, nl=tf.identity)
prob = tf.nn.softmax(logits, name='output') prob = tf.nn.softmax(logits, name='output')
y = one_hot(label, 1000) y = one_hot(label, 1000)
......
...@@ -54,8 +54,7 @@ class Model(ModelDesc): ...@@ -54,8 +54,7 @@ class Model(ModelDesc):
l = tf.nn.dropout(l, keep_prob) l = tf.nn.dropout(l, keep_prob)
# fc will have activation summary by default. disable this for the output layer # fc will have activation summary by default. disable this for the output layer
logits = FullyConnected('fc1', l, out_dim=10, logits = FullyConnected('fc1', l, out_dim=10, nl=tf.identity)
summary_activation=False, nl=tf.identity)
prob = tf.nn.softmax(logits, name='prob') prob = tf.nn.softmax(logits, name='prob')
y = one_hot(label, 10) y = one_hot(label, 10)
......
...@@ -45,8 +45,7 @@ class Model(ModelDesc): ...@@ -45,8 +45,7 @@ class Model(ModelDesc):
l = FullyConnected('fc0', l, 512, l = FullyConnected('fc0', l, 512,
b_init=tf.constant_initializer(0.1)) b_init=tf.constant_initializer(0.1))
# fc will have activation summary by default. disable for the output layer # fc will have activation summary by default. disable for the output layer
logits = FullyConnected('linear', l, out_dim=10, summary_activation=False, logits = FullyConnected('linear', l, out_dim=10, nl=tf.identity)
nl=tf.identity)
prob = tf.nn.softmax(logits, name='output') prob = tf.nn.softmax(logits, name='output')
y = one_hot(label, 10) y = one_hot(label, 10)
......
...@@ -11,12 +11,15 @@ from ...utils import logger, get_rng ...@@ -11,12 +11,15 @@ from ...utils import logger, get_rng
from ..base import DataFlow from ..base import DataFlow
from ...utils.fs import mkdir_p, download from ...utils.fs import mkdir_p, download
__all__ = ['ILSVRCMeta'] __all__ = ['ILSVRCMeta', 'ILSVRC12']
CAFFE_ILSVRC12_URL = "http://dl.caffe.berkeleyvision.org/caffe_ilsvrc12.tar.gz" CAFFE_ILSVRC12_URL = "http://dl.caffe.berkeleyvision.org/caffe_ilsvrc12.tar.gz"
CAFFE_PROTO_URL = "https://github.com/BVLC/caffe/raw/master/src/caffe/proto/caffe.proto" CAFFE_PROTO_URL = "https://github.com/BVLC/caffe/raw/master/src/caffe/proto/caffe.proto"
class ILSVRCMeta(object): class ILSVRCMeta(object):
"""
Provide metadata for ILSVRC dataset.
"""
def __init__(self, dir=None): def __init__(self, dir=None):
if dir is None: if dir is None:
dir = os.path.join(os.path.dirname(__file__), 'ilsvrc_metadata') dir = os.path.join(os.path.dirname(__file__), 'ilsvrc_metadata')
...@@ -24,15 +27,18 @@ class ILSVRCMeta(object): ...@@ -24,15 +27,18 @@ class ILSVRCMeta(object):
mkdir_p(self.dir) mkdir_p(self.dir)
self.caffe_pb_file = os.path.join(self.dir, 'caffe_pb2.py') self.caffe_pb_file = os.path.join(self.dir, 'caffe_pb2.py')
if not os.path.isfile(self.caffe_pb_file): if not os.path.isfile(self.caffe_pb_file):
self.download_caffe_meta() self._download_caffe_meta()
def get_synset_words_1000(self): def get_synset_words_1000(self):
"""
:returns a dict of {cls_number: cls_name}
"""
fname = os.path.join(self.dir, 'synset_words.txt') fname = os.path.join(self.dir, 'synset_words.txt')
assert os.path.isfile(fname) assert os.path.isfile(fname)
lines = [x.strip() for x in open(fname).readlines()] lines = [x.strip() for x in open(fname).readlines()]
return dict(enumerate(lines)) return dict(enumerate(lines))
def download_caffe_meta(self): def _download_caffe_meta(self):
fpath = download(CAFFE_ILSVRC12_URL, self.dir) fpath = download(CAFFE_ILSVRC12_URL, self.dir)
tarfile.open(fpath, 'r:gz').extractall(self.dir) tarfile.open(fpath, 'r:gz').extractall(self.dir)
...@@ -41,6 +47,10 @@ class ILSVRCMeta(object): ...@@ -41,6 +47,10 @@ class ILSVRCMeta(object):
assert ret == 0, "caffe proto compilation failed!" assert ret == 0, "caffe proto compilation failed!"
def get_image_list(self, name): def get_image_list(self, name):
"""
:param name: 'train' or 'val' or 'test'
:returns list of image filenames
"""
assert name in ['train', 'val', 'test'] assert name in ['train', 'val', 'test']
fname = os.path.join(self.dir, name + '.txt') fname = os.path.join(self.dir, name + '.txt')
assert os.path.isfile(fname) assert os.path.isfile(fname)
...@@ -51,10 +61,9 @@ class ILSVRCMeta(object): ...@@ -51,10 +61,9 @@ class ILSVRCMeta(object):
ret.append((name, int(cls))) ret.append((name, int(cls)))
return ret return ret
def load_mean(self): def get_per_pixel_mean(self):
""" """
return per-pixel mean as an array of shape :returns per-pixel mean as an array of shape (3, 256, 256) in range [0, 255]
(3, 256, 256) in range [0, 255]
""" """
import imp import imp
caffepb = imp.load_source('caffepb', self.caffe_pb_file) caffepb = imp.load_source('caffepb', self.caffe_pb_file)
...@@ -83,9 +92,15 @@ class ILSVRC12(DataFlow): ...@@ -83,9 +92,15 @@ class ILSVRC12(DataFlow):
return len(self.imglist) return len(self.imglist)
def reset_state(self): def reset_state(self):
"""
reset rng for shuffle
"""
self.rng = get_rng(self) self.rng = get_rng(self)
def get_data(self): def get_data(self):
"""
Produce original images or shape [h, w, 3], and label
"""
idxs = np.arange(len(self.imglist)) idxs = np.arange(len(self.imglist))
if self.shuffle: if self.shuffle:
self.rng.shuffle(idxs) self.rng.shuffle(idxs)
...@@ -99,7 +114,7 @@ class ILSVRC12(DataFlow): ...@@ -99,7 +114,7 @@ class ILSVRC12(DataFlow):
if __name__ == '__main__': if __name__ == '__main__':
meta = ILSVRCMeta() meta = ILSVRCMeta()
print meta.load_mean() print meta.get_per_pixel_mean()
#print(meta.get_synset_words_1000()) #print(meta.get_synset_words_1000())
#ds = ILSVRC12('/home/wyx/data/imagenet', 'val') #ds = ILSVRC12('/home/wyx/data/imagenet', 'val')
......
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
# Author: Yuxin Wu <ppwwyyxx@gmail.com> # Author: Yuxin Wu <ppwwyyxx@gmail.com>
from .base import ImageAugmentor from .base import ImageAugmentor
from ...utils import logger
import numpy as np import numpy as np
__all__ = ['GaussianDeform', 'GaussianMap'] __all__ = ['GaussianDeform', 'GaussianMap']
...@@ -63,6 +64,7 @@ class GaussianDeform(ImageAugmentor): ...@@ -63,6 +64,7 @@ class GaussianDeform(ImageAugmentor):
:param sigma: sigma for Gaussian weight :param sigma: sigma for Gaussian weight
:param randrange: default to shape[0] / 8 :param randrange: default to shape[0] / 8
""" """
logger.warn("GaussianDeform is slow. Consider using it with 4 or more prefetching processes.")
super(GaussianDeform, self).__init__() super(GaussianDeform, self).__init__()
self.anchors = anchors self.anchors = anchors
self.K = len(self.anchors) self.K = len(self.anchors)
......
...@@ -10,7 +10,7 @@ from ._common import * ...@@ -10,7 +10,7 @@ from ._common import *
__all__ = ['Conv2D'] __all__ = ['Conv2D']
@layer_register(summary_activation=True) @layer_register()
def Conv2D(x, out_channel, kernel_shape, def Conv2D(x, out_channel, kernel_shape,
padding='SAME', stride=1, padding='SAME', stride=1,
W_init=None, b_init=None, W_init=None, b_init=None,
......
...@@ -11,7 +11,7 @@ from ..tfutils.symbolic_functions import * ...@@ -11,7 +11,7 @@ from ..tfutils.symbolic_functions import *
__all__ = ['FullyConnected'] __all__ = ['FullyConnected']
@layer_register(summary_activation=True) @layer_register()
def FullyConnected(x, out_dim, def FullyConnected(x, out_dim,
W_init=None, b_init=None, W_init=None, b_init=None,
nl=tf.nn.relu, use_bias=True): nl=tf.nn.relu, use_bias=True):
......
...@@ -21,7 +21,7 @@ class GradientProcessor(object): ...@@ -21,7 +21,7 @@ class GradientProcessor(object):
:param grads: list of (grad, var) :param grads: list of (grad, var)
:returns: symbolic gradients with the same type as input :returns: symbolic gradients with the same type as input
""" """
self._process(grads) return self._process(grads)
@abstractmethod @abstractmethod
def _process(self, grads): def _process(self, grads):
......
...@@ -57,7 +57,7 @@ class Trainer(object): ...@@ -57,7 +57,7 @@ class Trainer(object):
logger.LOG_DIR, graph=self.sess.graph) logger.LOG_DIR, graph=self.sess.graph)
self.summary_op = tf.merge_all_summaries() self.summary_op = tf.merge_all_summaries()
# create an empty StatHolder # create an empty StatHolder
self.stat_holder = StatHolder(logger.LOG_DIR, []) self.stat_holder = StatHolder(logger.LOG_DIR)
def _process_summary(self, summary_str): def _process_summary(self, summary_str):
summary = tf.Summary.FromString(summary_str) summary = tf.Summary.FromString(summary_str)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment