Commit eea48e2e authored by Yuxin Wu's avatar Yuxin Wu

should be the same cifar now

parent 8efd12b1
...@@ -28,7 +28,7 @@ def get_model(inputs, is_training): ...@@ -28,7 +28,7 @@ def get_model(inputs, is_training):
image, label = inputs image, label = inputs
if is_training: # slow? if is_training:
image, label = tf.train.shuffle_batch( image, label = tf.train.shuffle_batch(
[image, label], BATCH_SIZE, CAPACITY, MIN_AFTER_DEQUEUE, [image, label], BATCH_SIZE, CAPACITY, MIN_AFTER_DEQUEUE,
num_threads=6, enqueue_many=False) num_threads=6, enqueue_many=False)
...@@ -44,14 +44,19 @@ def get_model(inputs, is_training): ...@@ -44,14 +44,19 @@ def get_model(inputs, is_training):
l = MaxPooling('pool0', l, 3, stride=2, padding='SAME') l = MaxPooling('pool0', l, 3, stride=2, padding='SAME')
l = tf.nn.lrn(l, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm0') l = tf.nn.lrn(l, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm0')
l = Conv2D('conv1', l, out_channel=64, kernel_shape=5, padding='SAME') l = Conv2D('conv1', l, out_channel=64, kernel_shape=5, padding='SAME',
b_init=tf.constant_initializer(0.1))
l = tf.nn.lrn(l, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm1') l = tf.nn.lrn(l, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm1')
l = MaxPooling('pool1', l, 3, stride=2, padding='SAME') l = MaxPooling('pool1', l, 3, stride=2, padding='SAME')
l = FullyConnected('fc0', l, 384) l = FullyConnected('fc0', l, 384,
l = FullyConnected('fc1', l, out_dim=192) b_init=tf.constant_initializer(0.1))
l = FullyConnected('fc1', l, out_dim=192,
b_init=tf.constant_initializer(0.1))
# fc will have activation summary by default. disable this for the output layer # fc will have activation summary by default. disable this for the output layer
logits = FullyConnected('fc2', l, out_dim=10, summary_activation=False, nl=tf.identity) logits = FullyConnected('linear', l, out_dim=10, summary_activation=False,
nl=tf.identity,
W_init=tf.truncated_normal_initializer(1/192.0))
prob = tf.nn.softmax(logits, name='output') prob = tf.nn.softmax(logits, name='output')
y = one_hot(label, 10) y = one_hot(label, 10)
...@@ -69,7 +74,7 @@ def get_model(inputs, is_training): ...@@ -69,7 +74,7 @@ def get_model(inputs, is_training):
SUMMARY_VARS_KEY, tf.reduce_mean(wrong, name='train_error')) SUMMARY_VARS_KEY, tf.reduce_mean(wrong, name='train_error'))
# weight decay on all W of fc layers # weight decay on all W of fc layers
wd_cost = tf.mul(1e-4, wd_cost = tf.mul(4e-3,
regularize_cost('fc.*/W', tf.nn.l2_loss), regularize_cost('fc.*/W', tf.nn.l2_loss),
name='regularize_loss') name='regularize_loss')
tf.add_to_collection(COST_VARS_KEY, wd_cost) tf.add_to_collection(COST_VARS_KEY, wd_cost)
...@@ -122,7 +127,7 @@ def get_config(): ...@@ -122,7 +127,7 @@ def get_config():
lr = tf.train.exponential_decay( lr = tf.train.exponential_decay(
learning_rate=1e-1, learning_rate=1e-1,
global_step=get_global_step_var(), global_step=get_global_step_var(),
decay_steps=dataset_train.size() * 200, decay_steps=dataset_train.size() * 350,
decay_rate=0.1, staircase=True, name='learning_rate') decay_rate=0.1, staircase=True, name='learning_rate')
tf.scalar_summary('learning_rate', lr) tf.scalar_summary('learning_rate', lr)
......
...@@ -31,7 +31,7 @@ def Conv2D(x, out_channel, kernel_shape, ...@@ -31,7 +31,7 @@ def Conv2D(x, out_channel, kernel_shape,
stride = shape4d(stride) stride = shape4d(stride)
if W_init is None: if W_init is None:
W_init = tf.truncated_normal_initializer(stddev=0.04) W_init = tf.truncated_normal_initializer(stddev=1e-4)
if b_init is None: if b_init is None:
b_init = tf.constant_initializer() b_init = tf.constant_initializer()
......
...@@ -17,10 +17,10 @@ def FullyConnected(x, out_dim, W_init=None, b_init=None, nl=tf.nn.relu): ...@@ -17,10 +17,10 @@ def FullyConnected(x, out_dim, W_init=None, b_init=None, nl=tf.nn.relu):
in_dim = x.get_shape().as_list()[1] in_dim = x.get_shape().as_list()[1]
if W_init is None: if W_init is None:
W_init = tf.truncated_normal_initializer(stddev=1.0 / math.sqrt(float(in_dim))) W_init = tf.truncated_normal_initializer(stddev=0.04)
if b_init is None: if b_init is None:
b_init = tf.constant_initializer() b_init = tf.constant_initializer()
W = tf.get_variable('W', [in_dim, out_dim], initializer=W_init) W = tf.get_variable('W', [in_dim, out_dim], initializer=W_init)
b = tf.get_variable('b', [out_dim], initializer=b_init) b = tf.get_variable('b', [out_dim], initializer=b_init)
return nl(tf.matmul(x, W) + b, name=tf.get_variable_scope().name + '_output') return nl(tf.nn.xw_plus_b(x, W, b), name=tf.get_variable_scope().name + '_output')
...@@ -7,6 +7,7 @@ import tensorflow as tf ...@@ -7,6 +7,7 @@ import tensorflow as tf
from itertools import count from itertools import count
import argparse import argparse
import numpy as np import numpy as np
import tqdm
from utils import * from utils import *
from utils.modelutils import describe_model from utils.modelutils import describe_model
...@@ -115,8 +116,10 @@ class DatasetPredictor(object): ...@@ -115,8 +116,10 @@ class DatasetPredictor(object):
def get_result(self): def get_result(self):
""" a generator to return prediction for each data""" """ a generator to return prediction for each data"""
for dp in self.ds.get_data(): with tqdm(total=self.ds.size()) as pbar:
yield self.predict_func(dp) for dp in self.ds.get_data():
yield self.predict_func(dp)
pbar.update()
def get_all_result(self): def get_all_result(self):
return list(self.get_result()) return list(self.get_result())
...@@ -61,7 +61,7 @@ class PeriodicCallback(Callback): ...@@ -61,7 +61,7 @@ class PeriodicCallback(Callback):
pass pass
class PeriodicSaver(PeriodicCallback): class PeriodicSaver(PeriodicCallback):
def __init__(self, period=1, keep_recent=50, keep_freq=0.5): def __init__(self, period=1, keep_recent=10, keep_freq=0.5):
super(PeriodicSaver, self).__init__(period) super(PeriodicSaver, self).__init__(period)
self.path = os.path.join(logger.LOG_DIR, 'model') self.path = os.path.join(logger.LOG_DIR, 'model')
self.keep_recent = keep_recent self.keep_recent = keep_recent
......
...@@ -51,7 +51,7 @@ def summary_moving_average(cost_var): ...@@ -51,7 +51,7 @@ def summary_moving_average(cost_var):
""" """
global_step_var = tf.get_default_graph().get_tensor_by_name(GLOBAL_STEP_VAR_NAME) global_step_var = tf.get_default_graph().get_tensor_by_name(GLOBAL_STEP_VAR_NAME)
averager = tf.train.ExponentialMovingAverage( averager = tf.train.ExponentialMovingAverage(
0.9, num_updates=global_step_var, name='moving_averages') 0.99, num_updates=global_step_var, name='moving_averages')
vars_to_summary = [cost_var] + \ vars_to_summary = [cost_var] + \
tf.get_collection(SUMMARY_VARS_KEY) + \ tf.get_collection(SUMMARY_VARS_KEY) + \
tf.get_collection(COST_VARS_KEY) tf.get_collection(COST_VARS_KEY)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment