Commit c08297ff authored by Yuxin Wu's avatar Yuxin Wu

ilsvrc12 dataflow

parent 3af5f874
...@@ -24,8 +24,10 @@ Deep Residual Learning for Image Recognition, arxiv:1512.03385 ...@@ -24,8 +24,10 @@ Deep Residual Learning for Image Recognition, arxiv:1512.03385
using the variants proposed in: using the variants proposed in:
Identity Mappings in Deep Residual Networks, arxiv::1603.05027 Identity Mappings in Deep Residual Networks, arxiv::1603.05027
I can reproduce the results I can reproduce the results for
for n=5 (about 7.7% val error) and 18 (about 6.4% val error) n=5 (about 7.6% val error)
n=18 (about 6.4% val error)
n=30: a 182-layer network (about 5.7% val error)
This model uses the whole training set instead of a 95:5 train-val split. This model uses the whole training set instead of a 95:5 train-val split.
""" """
...@@ -116,8 +118,9 @@ class Model(ModelDesc): ...@@ -116,8 +118,9 @@ class Model(ModelDesc):
MOVING_SUMMARY_VARS_KEY, tf.reduce_mean(wrong, name='train_error')) MOVING_SUMMARY_VARS_KEY, tf.reduce_mean(wrong, name='train_error'))
# weight decay on all W of fc layers # weight decay on all W of fc layers
#wd_cost = regularize_cost('.*/W', l2_regularizer(0.0002), name='regularize_loss') wd_w = tf.train.exponential_decay(0.0002, get_global_step_var(),
wd_cost = 0.0001 * regularize_cost('.*/W', tf.nn.l2_loss) 480000, 0.2, True)
wd_cost = wd_w * regularize_cost('.*/W', tf.nn.l2_loss)
tf.add_to_collection(MOVING_SUMMARY_VARS_KEY, wd_cost) tf.add_to_collection(MOVING_SUMMARY_VARS_KEY, wd_cost)
add_param_summary([('.*/W', ['histogram', 'sparsity'])]) # monitor W add_param_summary([('.*/W', ['histogram', 'sparsity'])]) # monitor W
...@@ -132,7 +135,7 @@ def get_data(train_or_test): ...@@ -132,7 +135,7 @@ def get_data(train_or_test):
imgaug.CenterPaste((40, 40)), imgaug.CenterPaste((40, 40)),
imgaug.RandomCrop((32, 32)), imgaug.RandomCrop((32, 32)),
imgaug.Flip(horiz=True), imgaug.Flip(horiz=True),
imgaug.BrightnessAdd(20), #imgaug.BrightnessAdd(20),
#imgaug.Contrast((0.6,1.4)), #imgaug.Contrast((0.6,1.4)),
imgaug.MapImage(lambda x: x - pp_mean), imgaug.MapImage(lambda x: x - pp_mean),
] ]
......
...@@ -121,9 +121,6 @@ class FakeData(DataFlow): ...@@ -121,9 +121,6 @@ class FakeData(DataFlow):
def size(self): def size(self):
return self._size return self._size
def reset_state(self):
self.rng = get_rng(self)
def get_data(self): def get_data(self):
for _ in range(self._size): for _ in range(self._size):
yield [self.rng.random_sample(k) for k in self.shapes] yield [self.rng.random_sample(k) for k in self.shapes]
......
...@@ -4,11 +4,17 @@ ...@@ -4,11 +4,17 @@
# Author: Yuxin Wu <ppwwyyxxc@gmail.com> # Author: Yuxin Wu <ppwwyyxxc@gmail.com>
import os import os
import tarfile import tarfile
import numpy as np
import scipy.ndimage as scimg
from ...utils import logger, get_rng
from ..base import DataFlow
from ...utils.fs import mkdir_p, download from ...utils.fs import mkdir_p, download
__all__ = ['ILSVRCMeta'] __all__ = ['ILSVRCMeta']
CAFFE_ILSVRC12_URL = "http://dl.caffe.berkeleyvision.org/caffe_ilsvrc12.tar.gz" CAFFE_ILSVRC12_URL = "http://dl.caffe.berkeleyvision.org/caffe_ilsvrc12.tar.gz"
CAFFE_PROTO_URL = "https://github.com/BVLC/caffe/raw/master/src/caffe/proto/caffe.proto"
class ILSVRCMeta(object): class ILSVRCMeta(object):
def __init__(self, dir=None): def __init__(self, dir=None):
...@@ -16,11 +22,12 @@ class ILSVRCMeta(object): ...@@ -16,11 +22,12 @@ class ILSVRCMeta(object):
dir = os.path.join(os.path.dirname(__file__), 'ilsvrc_metadata') dir = os.path.join(os.path.dirname(__file__), 'ilsvrc_metadata')
self.dir = dir self.dir = dir
mkdir_p(self.dir) mkdir_p(self.dir)
self.caffe_pb_file = os.path.join(self.dir, 'caffe_pb2.py')
if not os.path.isfile(self.caffe_pb_file):
self.download_caffe_meta()
def get_synset_words_1000(self): def get_synset_words_1000(self):
fname = os.path.join(self.dir, 'synset_words.txt') fname = os.path.join(self.dir, 'synset_words.txt')
if not os.path.isfile(fname):
self.download_caffe_meta()
assert os.path.isfile(fname) assert os.path.isfile(fname)
lines = [x.strip() for x in open(fname).readlines()] lines = [x.strip() for x in open(fname).readlines()]
return dict(enumerate(lines)) return dict(enumerate(lines))
...@@ -29,6 +36,72 @@ class ILSVRCMeta(object): ...@@ -29,6 +36,72 @@ class ILSVRCMeta(object):
fpath = download(CAFFE_ILSVRC12_URL, self.dir) fpath = download(CAFFE_ILSVRC12_URL, self.dir)
tarfile.open(fpath, 'r:gz').extractall(self.dir) tarfile.open(fpath, 'r:gz').extractall(self.dir)
proto_path = download(CAFFE_PROTO_URL, self.dir)
ret = os.system('cd {} && protoc caffe.proto --python_out .'.format(self.dir))
assert ret == 0, "caffe proto compilation failed!"
def get_image_list(self, name):
assert name in ['train', 'val', 'test']
fname = os.path.join(self.dir, name + '.txt')
assert os.path.isfile(fname)
with open(fname) as f:
ret = []
for line in f.readlines():
name, cls = line.strip().split()
ret.append((name, int(cls)))
return ret
def load_mean(self):
"""
return per-pixel mean as an array of shape
(3, 256, 256) in range [0, 255]
"""
import imp
caffepb = imp.load_source('caffepb', self.caffe_pb_file)
obj = caffepb.BlobProto()
mean_file = os.path.join(self.dir, 'imagenet_mean.binaryproto')
with open(mean_file) as f:
obj.ParseFromString(f.read())
arr = np.array(obj.data).reshape((3, 256, 256))
return arr
class ILSVRC12(DataFlow):
def __init__(self, dir, name, meta_dir=None, shuffle=True):
"""
name: 'train' or 'val' or 'test'
"""
assert name in ['train', 'test', 'val']
self.dir = dir
self.name = name
self.shuffle = shuffle
self.meta = ILSVRCMeta(meta_dir)
self.imglist = self.meta.get_image_list(name)
self.rng = get_rng(self)
def size(self):
return len(self.imglist)
def reset_state(self):
self.rng = get_rng(self)
def get_data(self):
idxs = np.arange(len(self.imglist))
if self.shuffle:
self.rng.shuffle(idxs)
for k in idxs:
tp = self.imglist[k]
fname = os.path.join(self.dir, self.name, tp[0])
im = scimg.imread(fname)
if len(im.shape) == 2:
im = np.expand_dims(im, 2).repeat(3,2)
yield [im, tp[1]]
if __name__ == '__main__': if __name__ == '__main__':
meta = ILSVRCMeta() meta = ILSVRCMeta()
print(meta.get_synset_words_1000()) print meta.load_mean()
#print(meta.get_synset_words_1000())
#ds = ILSVRC12('/home/wyx/data/imagenet', 'val')
#for k in ds.get_data():
#from IPython import embed; embed()
...@@ -22,7 +22,7 @@ class SVHNDigit(DataFlow): ...@@ -22,7 +22,7 @@ class SVHNDigit(DataFlow):
""" """
Cache = {} Cache = {}
def __init__(self, name, data_dir=None): def __init__(self, name, data_dir=None, shuffle=True):
""" """
name: 'train', 'test', or 'extra' name: 'train', 'test', or 'extra'
""" """
...@@ -45,12 +45,21 @@ http://ufldl.stanford.edu/housenumbers/".format(filename) ...@@ -45,12 +45,21 @@ http://ufldl.stanford.edu/housenumbers/".format(filename)
self.Y[self.Y==10] = 0 self.Y[self.Y==10] = 0
SVHNDigit.Cache[name] = (self.X, self.Y) SVHNDigit.Cache[name] = (self.X, self.Y)
self.shuffle = shuffle
self.rng = get_rng(self)
def size(self): def size(self):
return self.X.shape[0] return self.X.shape[0]
def reset_state(self):
self.rng = get_rng(self)
def get_data(self): def get_data(self):
n = self.X.shape[0] n = self.X.shape[0]
for k in range(n): idxs = np.arange(n)
if self.shuffle:
self.rng.shuffle(idxs)
for k in idxs:
yield [self.X[k], self.Y[k]] yield [self.X[k], self.Y[k]]
@staticmethod @staticmethod
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment