Commit eee59532 authored by Yuxin Wu's avatar Yuxin Wu

performance tuning

parent 3a90a5c9
......@@ -10,8 +10,8 @@ from .base import DataFlow, RNGDataFlow
__all__ = ['FakeData', 'DataFromQueue', 'DataFromList']
class FakeData(RNGDataFlow):
""" Generate fake random data of given shapes"""
def __init__(self, shapes, size):
""" Generate fake fixed data of given shapes"""
def __init__(self, shapes, size, random=True):
"""
:param shapes: a list of lists/tuples
:param size: size of this DataFlow
......@@ -19,14 +19,19 @@ class FakeData(RNGDataFlow):
super(FakeData, self).__init__()
self.shapes = shapes
self._size = int(size)
self.random = random
def size(self):
return self._size
def get_data(self):
for _ in range(self._size):
yield [self.rng.random_sample(k).astype('float32') for k in self.shapes]
#yield [self.rng.random_sample(k) for k in self.shapes]
if self.random:
for _ in range(self._size):
yield [self.rng.rand(*k).astype('float32') for k in self.shapes]
else:
v = [self.rng.rand(*k).astype('float32') for k in self.shapes]
for _ in range(self._size):
yield v
class DataFromQueue(DataFlow):
""" Produce data from a queue """
......
......@@ -35,9 +35,8 @@ def FullyConnected(x, out_dim,
if b_init is None:
b_init = tf.constant_initializer()
with tf.device('/cpu:0'):
W = tf.get_variable('W', [in_dim, out_dim], initializer=W_init)
if use_bias:
b = tf.get_variable('b', [out_dim], initializer=b_init)
W = tf.get_variable('W', [in_dim, out_dim], initializer=W_init)
if use_bias:
b = tf.get_variable('b', [out_dim], initializer=b_init)
prod = tf.nn.xw_plus_b(x, W, b) if use_bias else tf.matmul(x, W)
return nl(prod, name='output')
......@@ -86,7 +86,7 @@ class EnqueueThread(threading.Thread):
if self.coord.should_stop():
return
feed = dict(zip(self.input_vars, dp))
#print self.sess.run([self.op, self.size_op], feed_dict=feed)[1]
#print 'TFQ:', self.sess.run([self.op, self.size_op], feed_dict=feed)[1]
self.op.run(feed_dict=feed)
except tf.errors.CancelledError as e:
pass
......@@ -149,9 +149,16 @@ class QueueInputTrainer(Trainer):
def _single_tower_grad(self):
""" Get grad and cost for single-tower"""
self.dequed_inputs = model_inputs = self._get_model_inputs()
# test the overhead of queue
#with tf.device('/gpu:0'):
#self.dequed_inputs = [tf.Variable(tf.random_normal([128,224,224,3],
#dtype=tf.float32), trainable=False),
#tf.Variable(tf.ones([128], dtype=tf.int32), trainable=False)]
self.model.build_graph(self.dequed_inputs, True)
cost_var = self.model.get_cost()
grads = self.config.optimizer.compute_gradients(cost_var)
grads = self.config.optimizer.compute_gradients(
cost_var, gate_gradients=0) # GATE_NONE
tf.add_to_collection(MOVING_SUMMARY_VARS_KEY, cost_var)
return grads
......@@ -181,7 +188,7 @@ class QueueInputTrainer(Trainer):
def run_step(self):
""" just run self.train_op"""
self.sess.run([self.train_op])
self.sess.run(self.train_op)
#run_metadata = tf.RunMetadata()
#self.sess.run([self.train_op],
#options=tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE),
......@@ -193,6 +200,8 @@ class QueueInputTrainer(Trainer):
#trace_file.write(trace.generate_chrome_trace_format())
#import sys; sys.exit()
#self.sess.run([self.dequed_inputs[1]])
def _trigger_epoch(self):
# need to run summary_op every epoch
# note that summary_op will take a data from the queue
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment