Commit 881c4ee6 authored by Yuxin Wu's avatar Yuxin Wu

add shuffle_interval in locallyshuffledata

parent 9c30c49d
...@@ -100,7 +100,7 @@ class Model(ModelDesc): ...@@ -100,7 +100,7 @@ class Model(ModelDesc):
self.prob = tf.nn.softmax(logits / param.softmax_temprature, name='prob') self.prob = tf.nn.softmax(logits / param.softmax_temprature, name='prob')
xent_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( xent_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
logits=logits, labels=symbolic_functions.flatten(nextinput)) logits=logits, labels=tf.reshape(nextinput, [-1]))
self.cost = tf.reduce_mean(xent_loss, name='cost') self.cost = tf.reduce_mean(xent_loss, name='cost')
summary.add_param_summary(('.*/W', ['histogram'])) # monitor histogram of all W summary.add_param_summary(('.*/W', ['histogram'])) # monitor histogram of all W
summary.add_moving_summary(self.cost) summary.add_moving_summary(self.cost)
......
...@@ -78,7 +78,7 @@ class Model(ModelDesc): ...@@ -78,7 +78,7 @@ class Model(ModelDesc):
output = tf.reshape(tf.concat(outputs, 1), [-1, HIDDEN_SIZE]) # (Bxseqlen) x hidden output = tf.reshape(tf.concat(outputs, 1), [-1, HIDDEN_SIZE]) # (Bxseqlen) x hidden
logits = FullyConnected('fc', output, VOCAB_SIZE, nl=tf.identity, W_init=initializer, b_init=initializer) logits = FullyConnected('fc', output, VOCAB_SIZE, nl=tf.identity, W_init=initializer, b_init=initializer)
xent_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( xent_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
logits=logits, labels=symbolic_functions.flatten(nextinput)) logits=logits, labels=tf.reshape(nextinput, [-1]))
update_state_op = tf.group( update_state_op = tf.group(
tf.assign(state_var[0].c, last_state[0].c), tf.assign(state_var[0].c, last_state[0].c),
......
...@@ -68,7 +68,6 @@ class Model(ModelDesc): ...@@ -68,7 +68,6 @@ class Model(ModelDesc):
sampled = tf.concat([sampled1, sampled2], 3, 'sampled_concat') sampled = tf.concat([sampled1, sampled2], 3, 'sampled_concat')
logits = (LinearWrap(sampled) logits = (LinearWrap(sampled)
.apply(symbf.batch_flatten)
.FullyConnected('fc1', out_dim=256, nl=tf.nn.relu) .FullyConnected('fc1', out_dim=256, nl=tf.nn.relu)
.FullyConnected('fc2', out_dim=128, nl=tf.nn.relu) .FullyConnected('fc2', out_dim=128, nl=tf.nn.relu)
.FullyConnected('fct', out_dim=19, nl=tf.identity)()) .FullyConnected('fct', out_dim=19, nl=tf.identity)())
......
...@@ -22,7 +22,6 @@ Speed is about 43 it/s on TitanX. ...@@ -22,7 +22,6 @@ Speed is about 43 it/s on TitanX.
class Model(ModelDesc): class Model(ModelDesc):
def _get_inputs(self): def _get_inputs(self):
return [InputVar(tf.float32, [None, 40, 40, 3], 'input'), return [InputVar(tf.float32, [None, 40, 40, 3], 'input'),
InputVar(tf.int32, [None], 'label')] InputVar(tf.int32, [None], 'label')]
......
...@@ -438,27 +438,34 @@ def SelectComponent(ds, idxs): ...@@ -438,27 +438,34 @@ def SelectComponent(ds, idxs):
class LocallyShuffleData(ProxyDataFlow, RNGDataFlow): class LocallyShuffleData(ProxyDataFlow, RNGDataFlow):
""" Maintain a pool to cache datapoints, and shuffle before producing them. """ Maintain a pool to buffer datapoints, and shuffle before producing them.
This can be used as an alternative when a complete random read is too expensive for the This can be used as an alternative when a complete random read is too expensive
data source. or impossible for the data source.
""" """
def __init__(self, ds, cache_size, nr_reuse=1): def __init__(self, ds, buffer_size, nr_reuse=1, shuffle_interval=None):
""" """
Args: Args:
ds (DataFlow): input DataFlow. ds (DataFlow): input DataFlow.
cache_size (int): size of the cache. buffer_size (int): size of the buffer.
nr_reuse (int): reuse each datapoints several times to improve nr_reuse (int): reuse each datapoints several times to improve
speed, but may hurt your model. speed, but may hurt your model.
shuffle_interval (int): shuffle the buffer after this many
datapoints went through it. Frequent shuffle on large buffer
may affect speed, but infrequent shuffle may affect
randomness. Defaults to buffer_size / 3
""" """
ProxyDataFlow.__init__(self, ds) ProxyDataFlow.__init__(self, ds)
self.q = deque(maxlen=cache_size) self.q = deque(maxlen=buffer_size)
if shuffle_interval is None:
shuffle_interval = int(buffer_size // 3)
self.shuffle_interval = shuffle_interval
self.nr_reuse = nr_reuse self.nr_reuse = nr_reuse
def reset_state(self): def reset_state(self):
ProxyDataFlow.reset_state(self) ProxyDataFlow.reset_state(self)
RNGDataFlow.reset_state(self) RNGDataFlow.reset_state(self)
self.ds_itr = RepeatedData(self.ds).get_data() self.ds_itr = RepeatedData(self.ds, -1).get_data()
self.current_cnt = 0 self.current_cnt = 0
def _add_data(self): def _add_data(self):
...@@ -475,7 +482,7 @@ class LocallyShuffleData(ProxyDataFlow, RNGDataFlow): ...@@ -475,7 +482,7 @@ class LocallyShuffleData(ProxyDataFlow, RNGDataFlow):
cnt = 0 cnt = 0
while True: while True:
self.rng.shuffle(self.q) self.rng.shuffle(self.q)
for _ in range(self.q.maxlen): for _ in range(self.shuffle_interval):
# the inner loop maintains the queue size (almost) unchanged # the inner loop maintains the queue size (almost) unchanged
for _ in range(self.nr_reuse): for _ in range(self.nr_reuse):
yield self.q.popleft() yield self.q.popleft()
......
...@@ -7,7 +7,6 @@ import numpy as np ...@@ -7,7 +7,6 @@ import numpy as np
from .common import layer_register from .common import layer_register
from ..utils.argtools import shape2d, shape4d from ..utils.argtools import shape2d, shape4d
from ..tfutils import symbolic_functions as symbf
from ._test import TestModel from ._test import TestModel
...@@ -127,9 +126,9 @@ def FixedUnPooling(x, shape, unpool_mat=None): ...@@ -127,9 +126,9 @@ def FixedUnPooling(x, shape, unpool_mat=None):
assert unpool_mat.get_shape().as_list() == list(shape) assert unpool_mat.get_shape().as_list() == list(shape)
# perform a tensor-matrix kronecker product # perform a tensor-matrix kronecker product
fx = symbf.flatten(tf.transpose(x, [0, 3, 1, 2])) fx = tf.reshape(tf.transpose(x, [0, 3, 1, 2]), [-1])
fx = tf.expand_dims(fx, -1) # (bchw)x1 fx = tf.expand_dims(fx, -1) # (bchw)x1
mat = tf.expand_dims(symbf.flatten(unpool_mat), 0) # 1x(shxsw) mat = tf.expand_dims(tf.reshape(unpool_mat, [-1]), 0) # 1x(shxsw)
prod = tf.matmul(fx, mat) # (bchw) x(shxsw) prod = tf.matmul(fx, mat) # (bchw) x(shxsw)
prod = tf.reshape(prod, tf.stack( prod = tf.reshape(prod, tf.stack(
[-1, input_shape[3], input_shape[1], input_shape[2], shape[0], shape[1]])) [-1, input_shape[3], input_shape[1], input_shape[2], shape[0], shape[1]]))
......
...@@ -369,8 +369,8 @@ def shapeless_placeholder(x, axis, name): ...@@ -369,8 +369,8 @@ def shapeless_placeholder(x, axis, name):
If you want to feed to a tensor, the shape of the feed value must match If you want to feed to a tensor, the shape of the feed value must match
the tensor's static shape. This function creates a placeholder which the tensor's static shape. This function creates a placeholder which
defaults to x if not fed, but has a less specific static shape. defaults to x if not fed, but has a less specific static shape than x.
See `tensorflow#5680 See also `tensorflow#5680
<https://github.com/tensorflow/tensorflow/issues/5680>`_. <https://github.com/tensorflow/tensorflow/issues/5680>`_.
Args: Args:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment