Commit b2f8fec3 authored by Yuxin Wu's avatar Yuxin Wu

prefetch keep dataflow size. conv2d use_bisa

parent 224b0da7
...@@ -18,7 +18,8 @@ from tensorpack.dataflow import * ...@@ -18,7 +18,8 @@ from tensorpack.dataflow import *
from tensorpack.dataflow import imgaug from tensorpack.dataflow import imgaug
""" """
CIFAR10 90% validation accuracy after 100k step CIFAR10 90% validation accuracy after 100k step.
91% after 160k step
""" """
BATCH_SIZE = 128 BATCH_SIZE = 128
...@@ -128,7 +129,7 @@ def get_config(): ...@@ -128,7 +129,7 @@ def get_config():
learning_rate=1e-2, learning_rate=1e-2,
global_step=get_global_step_var(), global_step=get_global_step_var(),
decay_steps=dataset_train.size() * 30, decay_steps=dataset_train.size() * 30,
decay_rate=0.7, staircase=True, name='learning_rate') decay_rate=0.5, staircase=True, name='learning_rate')
tf.scalar_summary('learning_rate', lr) tf.scalar_summary('learning_rate', lr)
return TrainConfig( return TrainConfig(
......
...@@ -17,6 +17,10 @@ from tensorpack.utils.summary import * ...@@ -17,6 +17,10 @@ from tensorpack.utils.summary import *
from tensorpack.dataflow import * from tensorpack.dataflow import *
from tensorpack.dataflow import imgaug from tensorpack.dataflow import imgaug
"""
SVHN convnet.
About 2.9% validation error after 70 epoch.
"""
class Model(ModelDesc): class Model(ModelDesc):
def _get_input_vars(self): def _get_input_vars(self):
...@@ -27,7 +31,7 @@ class Model(ModelDesc): ...@@ -27,7 +31,7 @@ class Model(ModelDesc):
image, label = input_vars image, label = input_vars
keep_prob = tf.constant(0.5 if is_training else 1.0) keep_prob = tf.constant(0.5 if is_training else 1.0)
image = image / 255.0 image = image / 128.0 - 1
nl = lambda x, name: tf.abs(tf.tanh(x), name=name) nl = lambda x, name: tf.abs(tf.tanh(x), name=name)
l = Conv2D('conv1', image, 24, 5, padding='VALID', nl=nl) l = Conv2D('conv1', image, 24, 5, padding='VALID', nl=nl)
...@@ -76,14 +80,17 @@ def get_config(): ...@@ -76,14 +80,17 @@ def get_config():
augmentors = [ augmentors = [
imgaug.Resize((40, 40)), imgaug.Resize((40, 40)),
imgaug.BrightnessAdd(63), imgaug.BrightnessAdd(30),
imgaug.Contrast((0.2,1.8)), imgaug.Contrast((0.5,1.5)),
imgaug.GaussianDeform(
[(0.2, 0.2), (0.2, 0.8), (0.8,0.8), (0.8,0.2)],
(40,40), 0.2, 3),
] ]
train = AugmentImageComponent(train, augmentors) train = AugmentImageComponent(train, augmentors)
train = BatchData(train, 128) train = BatchData(train, 128)
nr_proc = 2 nr_proc = 5
train = PrefetchData(train, 3, nr_proc) train = PrefetchData(train, 5, nr_proc)
step_per_epoch = train.size() / nr_proc step_per_epoch = train.size()
augmentors = [ augmentors = [
imgaug.Resize((40, 40)), imgaug.Resize((40, 40)),
...@@ -91,14 +98,13 @@ def get_config(): ...@@ -91,14 +98,13 @@ def get_config():
test = AugmentImageComponent(test, augmentors) test = AugmentImageComponent(test, augmentors)
test = BatchData(test, 128, remainder=True) test = BatchData(test, 128, remainder=True)
sess_config = get_default_sess_config() sess_config = get_default_sess_config(0.8)
sess_config.gpu_options.per_process_gpu_memory_fraction = 0.5
lr = tf.train.exponential_decay( lr = tf.train.exponential_decay(
learning_rate=1e-4, learning_rate=1e-3,
global_step=get_global_step_var(), global_step=get_global_step_var(),
decay_steps=train.size() * 50, decay_steps=train.size() * 30,
decay_rate=0.7, staircase=True, name='learning_rate') decay_rate=0.5, staircase=True, name='learning_rate')
tf.scalar_summary('learning_rate', lr) tf.scalar_summary('learning_rate', lr)
return TrainConfig( return TrainConfig(
...@@ -112,7 +118,7 @@ def get_config(): ...@@ -112,7 +118,7 @@ def get_config():
session_config=sess_config, session_config=sess_config,
model=Model(), model=Model(),
step_per_epoch=step_per_epoch, step_per_epoch=step_per_epoch,
max_epoch=100, max_epoch=350,
) )
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -7,7 +7,8 @@ from .base import ImageAugmentor ...@@ -7,7 +7,8 @@ from .base import ImageAugmentor
import numpy as np import numpy as np
from abc import abstractmethod from abc import abstractmethod
__all__ = ['RandomCrop', 'CenterCrop', 'FixedCrop', 'CenterPaste'] __all__ = ['RandomCrop', 'CenterCrop', 'FixedCrop', 'CenterPaste',
'ConstantBackgroundFiller']
class RandomCrop(ImageAugmentor): class RandomCrop(ImageAugmentor):
""" Randomly crop the image into a smaller one """ """ Randomly crop the image into a smaller one """
......
...@@ -34,14 +34,15 @@ class PrefetchProcess(multiprocessing.Process): ...@@ -34,14 +34,15 @@ class PrefetchProcess(multiprocessing.Process):
class PrefetchData(DataFlow): class PrefetchData(DataFlow):
def __init__(self, ds, nr_prefetch, nr_proc=1): def __init__(self, ds, nr_prefetch, nr_proc=1):
""" """
use multiprocess, will duplicate ds by nr_proc times use multiprocess
""" """
self.ds = ds self.ds = ds
self._size = self.ds.size()
self.nr_proc = nr_proc self.nr_proc = nr_proc
self.nr_prefetch = nr_prefetch self.nr_prefetch = nr_prefetch
def size(self): def size(self):
return self.ds.size() * self.nr_proc return self._size
def get_data(self): def get_data(self):
queue = multiprocessing.Queue(self.nr_prefetch) queue = multiprocessing.Queue(self.nr_prefetch)
...@@ -50,6 +51,7 @@ class PrefetchData(DataFlow): ...@@ -50,6 +51,7 @@ class PrefetchData(DataFlow):
[x.start() for x in procs] [x.start() for x in procs]
end_cnt = 0 end_cnt = 0
tot_cnt = 0
try: try:
while True: while True:
dp = queue.get() dp = queue.get()
...@@ -58,7 +60,10 @@ class PrefetchData(DataFlow): ...@@ -58,7 +60,10 @@ class PrefetchData(DataFlow):
if end_cnt == self.nr_proc: if end_cnt == self.nr_proc:
break break
continue continue
tot_cnt += 1
yield dp yield dp
if tot_cnt == self._size:
break
finally: finally:
queue.close() queue.close()
[x.terminate() for x in procs] [x.terminate() for x in procs]
......
...@@ -14,7 +14,7 @@ __all__ = ['BatchNorm'] ...@@ -14,7 +14,7 @@ __all__ = ['BatchNorm']
# http://stackoverflow.com/questions/33949786/how-could-i-use-batch-normalization-in-tensorflow # http://stackoverflow.com/questions/33949786/how-could-i-use-batch-normalization-in-tensorflow
# TF batch_norm only works for 4D tensor right now: #804 # TF batch_norm only works for 4D tensor right now: #804
@layer_register() @layer_register()
def BatchNorm(x, is_training, gamma_init=1.0): def BatchNorm(x, is_training=True, gamma_init=1.0):
""" """
Batch normalization layer as described in: Batch normalization layer as described in:
Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift
...@@ -54,12 +54,10 @@ def BatchNorm(x, is_training, gamma_init=1.0): ...@@ -54,12 +54,10 @@ def BatchNorm(x, is_training, gamma_init=1.0):
if is_training: if is_training:
with tf.control_dependencies([ema_apply_op]): with tf.control_dependencies([ema_apply_op]):
mean, var = tf.identity(batch_mean), tf.identity(batch_var) return tf.nn.batch_norm_with_global_normalization(
x, batch_mean, batch_var, beta, gamma, EPS, True)
else: else:
batch = tf.cast(tf.shape(x)[0], tf.float32) batch = tf.cast(tf.shape(x)[0], tf.float32)
mean, var = ema_mean, ema_var * batch / (batch - 1) # unbiased variance estimator mean, var = ema_mean, ema_var * batch / (batch - 1) # unbiased variance estimator
return tf.nn.batch_norm_with_global_normalization(
normed = tf.nn.batch_norm_with_global_normalization( x, mean, var, beta, gamma, EPS, True)
x, mean, var, beta, gamma, EPS, True)
return normed
...@@ -14,12 +14,13 @@ __all__ = ['Conv2D'] ...@@ -14,12 +14,13 @@ __all__ = ['Conv2D']
def Conv2D(x, out_channel, kernel_shape, def Conv2D(x, out_channel, kernel_shape,
padding='SAME', stride=1, padding='SAME', stride=1,
W_init=None, b_init=None, W_init=None, b_init=None,
nl=tf.nn.relu, split=1): nl=tf.nn.relu, split=1, use_bias=True):
""" """
kernel_shape: (h, w) or a int kernel_shape: (h, w) or a int
stride: (h, w) or a int stride: (h, w) or a int
padding: 'valid' or 'same' padding: 'valid' or 'same'
split: split channels. used in Alexnet split: split channels. used in Alexnet
use_bias: whether to use bias
""" """
in_shape = x.get_shape().as_list() in_shape = x.get_shape().as_list()
num_in = np.prod(in_shape[1:]) num_in = np.prod(in_shape[1:])
...@@ -39,7 +40,8 @@ def Conv2D(x, out_channel, kernel_shape, ...@@ -39,7 +40,8 @@ def Conv2D(x, out_channel, kernel_shape,
b_init = tf.constant_initializer() b_init = tf.constant_initializer()
W = tf.get_variable('W', filter_shape, initializer=W_init) W = tf.get_variable('W', filter_shape, initializer=W_init)
b = tf.get_variable('b', [out_channel], initializer=b_init) if use_bias:
b = tf.get_variable('b', [out_channel], initializer=b_init)
if split == 1: if split == 1:
conv = tf.nn.conv2d(x, W, stride, padding) conv = tf.nn.conv2d(x, W, stride, padding)
...@@ -49,6 +51,6 @@ def Conv2D(x, out_channel, kernel_shape, ...@@ -49,6 +51,6 @@ def Conv2D(x, out_channel, kernel_shape,
outputs = [tf.nn.conv2d(i, k, stride, padding) outputs = [tf.nn.conv2d(i, k, stride, padding)
for i, k in zip(inputs, kernels)] for i, k in zip(inputs, kernels)]
conv = tf.concat(3, outputs) conv = tf.concat(3, outputs)
return nl(tf.nn.bias_add(conv, b), name='output') return nl(tf.nn.bias_add(conv, b) if use_bias else conv, name='output')
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment