Commit a41f9e83 authored by Yuxin Wu's avatar Yuxin Wu

update and fix grad bug

parent a4371695
......@@ -67,8 +67,7 @@ class Model(ModelDesc):
l = FullyConnected('fc1', l, out_dim=512,
b_init=tf.constant_initializer(0.1))
# fc will have activation summary by default. disable for the output layer
logits = FullyConnected('linear', l, out_dim=10, summary_activation=False,
nl=tf.identity)
logits = FullyConnected('linear', l, out_dim=10, nl=tf.identity)
prob = tf.nn.softmax(logits, name='output')
y = one_hot(label, 10)
......
......@@ -102,8 +102,7 @@ class Model(ModelDesc):
l = tf.nn.relu(l)
# 8,c=64
l = GlobalAvgPooling('gap', l)
logits = FullyConnected('linear', l, out_dim=10, summary_activation=False,
nl=tf.identity)
logits = FullyConnected('linear', l, out_dim=10, nl=tf.identity)
prob = tf.nn.softmax(logits, name='output')
y = one_hot(label, 10)
......
......@@ -56,7 +56,7 @@ class Model(ModelDesc):
l = FullyConnected('fc6', l, 4096)
l = FullyConnected('fc7', l, out_dim=4096)
# fc will have activation summary by default. disable this for the output layer
logits = FullyConnected('fc8', l, out_dim=1000, summary_activation=False, nl=tf.identity)
logits = FullyConnected('fc8', l, out_dim=1000, nl=tf.identity)
prob = tf.nn.softmax(logits, name='output')
y = one_hot(label, 1000)
......
......@@ -64,7 +64,7 @@ class Model(ModelDesc):
l = tf.nn.dropout(l, keep_prob)
l = FullyConnected('fc7', l, 4096)
l = tf.nn.dropout(l, keep_prob)
logits = FullyConnected('fc8', l, out_dim=1000, summary_activation=False, nl=tf.identity)
logits = FullyConnected('fc8', l, out_dim=1000, nl=tf.identity)
prob = tf.nn.softmax(logits, name='output')
y = one_hot(label, 1000)
......
......@@ -54,8 +54,7 @@ class Model(ModelDesc):
l = tf.nn.dropout(l, keep_prob)
# fc will have activation summary by default. disable this for the output layer
logits = FullyConnected('fc1', l, out_dim=10,
summary_activation=False, nl=tf.identity)
logits = FullyConnected('fc1', l, out_dim=10, nl=tf.identity)
prob = tf.nn.softmax(logits, name='prob')
y = one_hot(label, 10)
......
......@@ -45,8 +45,7 @@ class Model(ModelDesc):
l = FullyConnected('fc0', l, 512,
b_init=tf.constant_initializer(0.1))
# fc will have activation summary by default. disable for the output layer
logits = FullyConnected('linear', l, out_dim=10, summary_activation=False,
nl=tf.identity)
logits = FullyConnected('linear', l, out_dim=10, nl=tf.identity)
prob = tf.nn.softmax(logits, name='output')
y = one_hot(label, 10)
......
......@@ -11,12 +11,15 @@ from ...utils import logger, get_rng
from ..base import DataFlow
from ...utils.fs import mkdir_p, download
__all__ = ['ILSVRCMeta']
__all__ = ['ILSVRCMeta', 'ILSVRC12']
CAFFE_ILSVRC12_URL = "http://dl.caffe.berkeleyvision.org/caffe_ilsvrc12.tar.gz"
CAFFE_PROTO_URL = "https://github.com/BVLC/caffe/raw/master/src/caffe/proto/caffe.proto"
class ILSVRCMeta(object):
"""
Provide metadata for ILSVRC dataset.
"""
def __init__(self, dir=None):
if dir is None:
dir = os.path.join(os.path.dirname(__file__), 'ilsvrc_metadata')
......@@ -24,15 +27,18 @@ class ILSVRCMeta(object):
mkdir_p(self.dir)
self.caffe_pb_file = os.path.join(self.dir, 'caffe_pb2.py')
if not os.path.isfile(self.caffe_pb_file):
self.download_caffe_meta()
self._download_caffe_meta()
def get_synset_words_1000(self):
"""
:returns a dict of {cls_number: cls_name}
"""
fname = os.path.join(self.dir, 'synset_words.txt')
assert os.path.isfile(fname)
lines = [x.strip() for x in open(fname).readlines()]
return dict(enumerate(lines))
def download_caffe_meta(self):
def _download_caffe_meta(self):
fpath = download(CAFFE_ILSVRC12_URL, self.dir)
tarfile.open(fpath, 'r:gz').extractall(self.dir)
......@@ -41,6 +47,10 @@ class ILSVRCMeta(object):
assert ret == 0, "caffe proto compilation failed!"
def get_image_list(self, name):
"""
:param name: 'train' or 'val' or 'test'
:returns list of image filenames
"""
assert name in ['train', 'val', 'test']
fname = os.path.join(self.dir, name + '.txt')
assert os.path.isfile(fname)
......@@ -51,10 +61,9 @@ class ILSVRCMeta(object):
ret.append((name, int(cls)))
return ret
def load_mean(self):
def get_per_pixel_mean(self):
"""
return per-pixel mean as an array of shape
(3, 256, 256) in range [0, 255]
:returns per-pixel mean as an array of shape (3, 256, 256) in range [0, 255]
"""
import imp
caffepb = imp.load_source('caffepb', self.caffe_pb_file)
......@@ -83,9 +92,15 @@ class ILSVRC12(DataFlow):
return len(self.imglist)
def reset_state(self):
"""
reset rng for shuffle
"""
self.rng = get_rng(self)
def get_data(self):
"""
Produce original images or shape [h, w, 3], and label
"""
idxs = np.arange(len(self.imglist))
if self.shuffle:
self.rng.shuffle(idxs)
......@@ -99,7 +114,7 @@ class ILSVRC12(DataFlow):
if __name__ == '__main__':
meta = ILSVRCMeta()
print meta.load_mean()
print meta.get_per_pixel_mean()
#print(meta.get_synset_words_1000())
#ds = ILSVRC12('/home/wyx/data/imagenet', 'val')
......
......@@ -3,6 +3,7 @@
# Author: Yuxin Wu <ppwwyyxx@gmail.com>
from .base import ImageAugmentor
from ...utils import logger
import numpy as np
__all__ = ['GaussianDeform', 'GaussianMap']
......@@ -63,6 +64,7 @@ class GaussianDeform(ImageAugmentor):
:param sigma: sigma for Gaussian weight
:param randrange: default to shape[0] / 8
"""
logger.warn("GaussianDeform is slow. Consider using it with 4 or more prefetching processes.")
super(GaussianDeform, self).__init__()
self.anchors = anchors
self.K = len(self.anchors)
......
......@@ -10,7 +10,7 @@ from ._common import *
__all__ = ['Conv2D']
@layer_register(summary_activation=True)
@layer_register()
def Conv2D(x, out_channel, kernel_shape,
padding='SAME', stride=1,
W_init=None, b_init=None,
......
......@@ -11,7 +11,7 @@ from ..tfutils.symbolic_functions import *
__all__ = ['FullyConnected']
@layer_register(summary_activation=True)
@layer_register()
def FullyConnected(x, out_dim,
W_init=None, b_init=None,
nl=tf.nn.relu, use_bias=True):
......
......@@ -21,7 +21,7 @@ class GradientProcessor(object):
:param grads: list of (grad, var)
:returns: symbolic gradients with the same type as input
"""
self._process(grads)
return self._process(grads)
@abstractmethod
def _process(self, grads):
......
......@@ -57,7 +57,7 @@ class Trainer(object):
logger.LOG_DIR, graph=self.sess.graph)
self.summary_op = tf.merge_all_summaries()
# create an empty StatHolder
self.stat_holder = StatHolder(logger.LOG_DIR, [])
self.stat_holder = StatHolder(logger.LOG_DIR)
def _process_summary(self, summary_str):
summary = tf.Summary.FromString(summary_str)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment