Commit eee59532 authored by Yuxin Wu's avatar Yuxin Wu

performance tuning

parent 3a90a5c9
...@@ -10,8 +10,8 @@ from .base import DataFlow, RNGDataFlow ...@@ -10,8 +10,8 @@ from .base import DataFlow, RNGDataFlow
__all__ = ['FakeData', 'DataFromQueue', 'DataFromList'] __all__ = ['FakeData', 'DataFromQueue', 'DataFromList']
class FakeData(RNGDataFlow): class FakeData(RNGDataFlow):
""" Generate fake random data of given shapes""" """ Generate fake fixed data of given shapes"""
def __init__(self, shapes, size): def __init__(self, shapes, size, random=True):
""" """
:param shapes: a list of lists/tuples :param shapes: a list of lists/tuples
:param size: size of this DataFlow :param size: size of this DataFlow
...@@ -19,14 +19,19 @@ class FakeData(RNGDataFlow): ...@@ -19,14 +19,19 @@ class FakeData(RNGDataFlow):
super(FakeData, self).__init__() super(FakeData, self).__init__()
self.shapes = shapes self.shapes = shapes
self._size = int(size) self._size = int(size)
self.random = random
def size(self): def size(self):
return self._size return self._size
def get_data(self): def get_data(self):
for _ in range(self._size): if self.random:
yield [self.rng.random_sample(k).astype('float32') for k in self.shapes] for _ in range(self._size):
#yield [self.rng.random_sample(k) for k in self.shapes] yield [self.rng.rand(*k).astype('float32') for k in self.shapes]
else:
v = [self.rng.rand(*k).astype('float32') for k in self.shapes]
for _ in range(self._size):
yield v
class DataFromQueue(DataFlow): class DataFromQueue(DataFlow):
""" Produce data from a queue """ """ Produce data from a queue """
......
...@@ -35,9 +35,8 @@ def FullyConnected(x, out_dim, ...@@ -35,9 +35,8 @@ def FullyConnected(x, out_dim,
if b_init is None: if b_init is None:
b_init = tf.constant_initializer() b_init = tf.constant_initializer()
with tf.device('/cpu:0'): W = tf.get_variable('W', [in_dim, out_dim], initializer=W_init)
W = tf.get_variable('W', [in_dim, out_dim], initializer=W_init) if use_bias:
if use_bias: b = tf.get_variable('b', [out_dim], initializer=b_init)
b = tf.get_variable('b', [out_dim], initializer=b_init)
prod = tf.nn.xw_plus_b(x, W, b) if use_bias else tf.matmul(x, W) prod = tf.nn.xw_plus_b(x, W, b) if use_bias else tf.matmul(x, W)
return nl(prod, name='output') return nl(prod, name='output')
...@@ -86,7 +86,7 @@ class EnqueueThread(threading.Thread): ...@@ -86,7 +86,7 @@ class EnqueueThread(threading.Thread):
if self.coord.should_stop(): if self.coord.should_stop():
return return
feed = dict(zip(self.input_vars, dp)) feed = dict(zip(self.input_vars, dp))
#print self.sess.run([self.op, self.size_op], feed_dict=feed)[1] #print 'TFQ:', self.sess.run([self.op, self.size_op], feed_dict=feed)[1]
self.op.run(feed_dict=feed) self.op.run(feed_dict=feed)
except tf.errors.CancelledError as e: except tf.errors.CancelledError as e:
pass pass
...@@ -149,9 +149,16 @@ class QueueInputTrainer(Trainer): ...@@ -149,9 +149,16 @@ class QueueInputTrainer(Trainer):
def _single_tower_grad(self): def _single_tower_grad(self):
""" Get grad and cost for single-tower""" """ Get grad and cost for single-tower"""
self.dequed_inputs = model_inputs = self._get_model_inputs() self.dequed_inputs = model_inputs = self._get_model_inputs()
# test the overhead of queue
#with tf.device('/gpu:0'):
#self.dequed_inputs = [tf.Variable(tf.random_normal([128,224,224,3],
#dtype=tf.float32), trainable=False),
#tf.Variable(tf.ones([128], dtype=tf.int32), trainable=False)]
self.model.build_graph(self.dequed_inputs, True) self.model.build_graph(self.dequed_inputs, True)
cost_var = self.model.get_cost() cost_var = self.model.get_cost()
grads = self.config.optimizer.compute_gradients(cost_var) grads = self.config.optimizer.compute_gradients(
cost_var, gate_gradients=0) # GATE_NONE
tf.add_to_collection(MOVING_SUMMARY_VARS_KEY, cost_var) tf.add_to_collection(MOVING_SUMMARY_VARS_KEY, cost_var)
return grads return grads
...@@ -181,7 +188,7 @@ class QueueInputTrainer(Trainer): ...@@ -181,7 +188,7 @@ class QueueInputTrainer(Trainer):
def run_step(self): def run_step(self):
""" just run self.train_op""" """ just run self.train_op"""
self.sess.run([self.train_op]) self.sess.run(self.train_op)
#run_metadata = tf.RunMetadata() #run_metadata = tf.RunMetadata()
#self.sess.run([self.train_op], #self.sess.run([self.train_op],
#options=tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE), #options=tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE),
...@@ -193,6 +200,8 @@ class QueueInputTrainer(Trainer): ...@@ -193,6 +200,8 @@ class QueueInputTrainer(Trainer):
#trace_file.write(trace.generate_chrome_trace_format()) #trace_file.write(trace.generate_chrome_trace_format())
#import sys; sys.exit() #import sys; sys.exit()
#self.sess.run([self.dequed_inputs[1]])
def _trigger_epoch(self): def _trigger_epoch(self):
# need to run summary_op every epoch # need to run summary_op every epoch
# note that summary_op will take a data from the queue # note that summary_op will take a data from the queue
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment