Commit 881c4ee6 authored by Yuxin Wu's avatar Yuxin Wu

add shuffle_interval in locallyshuffledata

parent 9c30c49d
......@@ -100,7 +100,7 @@ class Model(ModelDesc):
self.prob = tf.nn.softmax(logits / param.softmax_temprature, name='prob')
xent_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
logits=logits, labels=symbolic_functions.flatten(nextinput))
logits=logits, labels=tf.reshape(nextinput, [-1]))
self.cost = tf.reduce_mean(xent_loss, name='cost')
summary.add_param_summary(('.*/W', ['histogram'])) # monitor histogram of all W
summary.add_moving_summary(self.cost)
......
......@@ -78,7 +78,7 @@ class Model(ModelDesc):
output = tf.reshape(tf.concat(outputs, 1), [-1, HIDDEN_SIZE]) # (Bxseqlen) x hidden
logits = FullyConnected('fc', output, VOCAB_SIZE, nl=tf.identity, W_init=initializer, b_init=initializer)
xent_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
logits=logits, labels=symbolic_functions.flatten(nextinput))
logits=logits, labels=tf.reshape(nextinput, [-1]))
update_state_op = tf.group(
tf.assign(state_var[0].c, last_state[0].c),
......
......@@ -68,7 +68,6 @@ class Model(ModelDesc):
sampled = tf.concat([sampled1, sampled2], 3, 'sampled_concat')
logits = (LinearWrap(sampled)
.apply(symbf.batch_flatten)
.FullyConnected('fc1', out_dim=256, nl=tf.nn.relu)
.FullyConnected('fc2', out_dim=128, nl=tf.nn.relu)
.FullyConnected('fct', out_dim=19, nl=tf.identity)())
......
......@@ -22,7 +22,6 @@ Speed is about 43 it/s on TitanX.
class Model(ModelDesc):
def _get_inputs(self):
return [InputVar(tf.float32, [None, 40, 40, 3], 'input'),
InputVar(tf.int32, [None], 'label')]
......
......@@ -438,27 +438,34 @@ def SelectComponent(ds, idxs):
class LocallyShuffleData(ProxyDataFlow, RNGDataFlow):
""" Maintain a pool to cache datapoints, and shuffle before producing them.
This can be used as an alternative when a complete random read is too expensive for the
data source.
""" Maintain a pool to buffer datapoints, and shuffle before producing them.
This can be used as an alternative when a complete random read is too expensive
or impossible for the data source.
"""
def __init__(self, ds, cache_size, nr_reuse=1):
def __init__(self, ds, buffer_size, nr_reuse=1, shuffle_interval=None):
"""
Args:
ds (DataFlow): input DataFlow.
cache_size (int): size of the cache.
buffer_size (int): size of the buffer.
nr_reuse (int): reuse each datapoints several times to improve
speed, but may hurt your model.
shuffle_interval (int): shuffle the buffer after this many
datapoints went through it. Frequent shuffle on large buffer
may affect speed, but infrequent shuffle may affect
randomness. Defaults to buffer_size / 3
"""
ProxyDataFlow.__init__(self, ds)
self.q = deque(maxlen=cache_size)
self.q = deque(maxlen=buffer_size)
if shuffle_interval is None:
shuffle_interval = int(buffer_size // 3)
self.shuffle_interval = shuffle_interval
self.nr_reuse = nr_reuse
def reset_state(self):
ProxyDataFlow.reset_state(self)
RNGDataFlow.reset_state(self)
self.ds_itr = RepeatedData(self.ds).get_data()
self.ds_itr = RepeatedData(self.ds, -1).get_data()
self.current_cnt = 0
def _add_data(self):
......@@ -475,7 +482,7 @@ class LocallyShuffleData(ProxyDataFlow, RNGDataFlow):
cnt = 0
while True:
self.rng.shuffle(self.q)
for _ in range(self.q.maxlen):
for _ in range(self.shuffle_interval):
# the inner loop maintains the queue size (almost) unchanged
for _ in range(self.nr_reuse):
yield self.q.popleft()
......
......@@ -7,7 +7,6 @@ import numpy as np
from .common import layer_register
from ..utils.argtools import shape2d, shape4d
from ..tfutils import symbolic_functions as symbf
from ._test import TestModel
......@@ -127,9 +126,9 @@ def FixedUnPooling(x, shape, unpool_mat=None):
assert unpool_mat.get_shape().as_list() == list(shape)
# perform a tensor-matrix kronecker product
fx = symbf.flatten(tf.transpose(x, [0, 3, 1, 2]))
fx = tf.reshape(tf.transpose(x, [0, 3, 1, 2]), [-1])
fx = tf.expand_dims(fx, -1) # (bchw)x1
mat = tf.expand_dims(symbf.flatten(unpool_mat), 0) # 1x(shxsw)
mat = tf.expand_dims(tf.reshape(unpool_mat, [-1]), 0) # 1x(shxsw)
prod = tf.matmul(fx, mat) # (bchw) x(shxsw)
prod = tf.reshape(prod, tf.stack(
[-1, input_shape[3], input_shape[1], input_shape[2], shape[0], shape[1]]))
......
......@@ -369,8 +369,8 @@ def shapeless_placeholder(x, axis, name):
If you want to feed to a tensor, the shape of the feed value must match
the tensor's static shape. This function creates a placeholder which
defaults to x if not fed, but has a less specific static shape.
See `tensorflow#5680
defaults to x if not fed, but has a less specific static shape than x.
See also `tensorflow#5680
<https://github.com/tensorflow/tensorflow/issues/5680>`_.
Args:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment