Commit 95bd4af5 authored by Yuxin Wu's avatar Yuxin Wu

self.cost -> return cost (#318)

parent a1e107d9
......@@ -119,13 +119,12 @@ class Model(ModelDesc):
advantage = tf.sqrt(tf.reduce_mean(tf.square(advantage)), name='rms_advantage')
entropy_beta = tf.get_variable('entropy_beta', shape=[],
initializer=tf.constant_initializer(0.01), trainable=False)
self.cost = tf.add_n([policy_loss, xentropy_loss * entropy_beta, value_loss])
self.cost = tf.truediv(self.cost,
tf.cast(tf.shape(futurereward)[0], tf.float32),
name='cost')
cost = tf.add_n([policy_loss, xentropy_loss * entropy_beta, value_loss])
cost = tf.truediv(cost, tf.cast(tf.shape(futurereward)[0], tf.float32), name='cost')
summary.add_moving_summary(policy_loss, xentropy_loss,
value_loss, pred_reward, advantage,
self.cost, tf.reduce_mean(importance, name='importance'))
cost, tf.reduce_mean(importance, name='importance'))
return cost
def _get_optimizer(self):
lr = tf.get_variable('learning_rate', initializer=0.001, trainable=False)
......
......@@ -53,7 +53,7 @@ class Model(ModelDesc):
loss = tf.nn.ctc_loss(label, logits, seqlen, time_major=False)
self.cost = tf.reduce_mean(loss, name='cost')
cost = tf.reduce_mean(loss, name='cost')
logits = tf.transpose(logits, [1, 0, 2])
......@@ -68,7 +68,8 @@ class Model(ModelDesc):
err = tf.edit_distance(predictions, label, normalize=True)
err.set_shape([None])
err = tf.reduce_mean(err, name='error')
summary.add_moving_summary(err, self.cost)
summary.add_moving_summary(err, cost)
return cost
def _get_optimizer(self):
lr = tf.get_variable('learning_rate', initializer=5e-3, trainable=False)
......
......@@ -104,9 +104,10 @@ class Model(ModelDesc):
xent_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
logits=logits, labels=tf.reshape(nextinput, [-1]))
self.cost = tf.reduce_mean(xent_loss, name='cost')
cost = tf.reduce_mean(xent_loss, name='cost')
summary.add_param_summary(('.*/W', ['histogram'])) # monitor histogram of all W
summary.add_moving_summary(self.cost)
summary.add_moving_summary(cost)
return cost
def _get_optimizer(self):
lr = tf.get_variable('learning_rate', initializer=2e-3, trainable=False)
......
......@@ -75,11 +75,12 @@ class Model(ModelDesc):
target = reward + (1.0 - tf.cast(isOver, tf.float32)) * self.gamma * tf.stop_gradient(best_v)
self.cost = tf.losses.huber_loss(
cost = tf.losses.huber_loss(
target, pred_action_value, reduction=tf.losses.Reduction.MEAN)
summary.add_param_summary(('conv.*/W', ['histogram', 'rms']),
('fc.*/W', ['histogram', 'rms'])) # monitor all W
summary.add_moving_summary(self.cost)
summary.add_moving_summary(cost)
return cost
def _get_optimizer(self):
lr = tf.get_variable('learning_rate', initializer=self.learning_rate, trainable=False)
......
......@@ -50,8 +50,7 @@ class Model(mnist_example.Model):
wd_cost = tf.multiply(1e-5, regularize_cost('fc.*/W', tf.nn.l2_loss),
name='regularize_loss')
self.cost = tf.add_n([wd_cost, cost], name='cost')
add_moving_summary(cost, wd_cost, self.cost)
return tf.add_n([wd_cost, cost], name='cost')
if __name__ == '__main__':
......
......@@ -159,8 +159,9 @@ class Model(ModelDesc):
wd_cost = regularize_cost('fc.*/W', l2_regularizer(5e-6), name='regularize_cost')
add_param_summary(('.*/W', ['histogram', 'rms']))
self.cost = tf.add_n([cost, wd_cost], name='cost')
add_moving_summary(cost, wd_cost, self.cost)
total_cost = tf.add_n([cost, wd_cost], name='cost')
add_moving_summary(cost, wd_cost, total_cost)
return total_cost
def _get_optimizer(self):
lr = tf.get_variable('learning_rate', initializer=1e-4, trainable=False)
......
......@@ -120,8 +120,9 @@ class Model(ModelDesc):
wd_cost = regularize_cost('fc.*/W', l2_regularizer(1e-7))
add_param_summary(('.*/W', ['histogram', 'rms']))
self.cost = tf.add_n([cost, wd_cost], name='cost')
add_moving_summary(cost, wd_cost, self.cost)
total_cost = tf.add_n([cost, wd_cost], name='cost')
add_moving_summary(cost, wd_cost, total_cost)
return total_cost
def _get_optimizer(self):
lr = tf.train.exponential_decay(
......
......@@ -143,8 +143,9 @@ class Model(ModelDesc):
tf.summary.image('pred_gt_filters', filters, max_outputs=20)
tf.summary.image('pred_gt_images', images, max_outputs=20)
self.cost = tf.reduce_mean(tf.squared_difference(pred_image, gt_image), name="cost")
summary.add_moving_summary(self.cost)
cost = tf.reduce_mean(tf.squared_difference(pred_image, gt_image), name="cost")
summary.add_moving_summary(cost)
return cost
def _get_optimizer(self):
return tf.train.AdamOptimizer(1e-3)
......
......@@ -180,13 +180,14 @@ class Model(ModelDesc):
'(?:group1|group2|group3|rpn|fastrcnn|maskrcnn)/.*W',
l2_regularizer(1e-4), name='wd_cost')
self.cost = tf.add_n([
total_cost = tf.add_n([
rpn_label_loss, rpn_box_loss,
fastrcnn_label_loss, fastrcnn_box_loss,
mrcnn_loss,
wd_cost], 'total_cost')
add_moving_summary(self.cost, wd_cost)
add_moving_summary(total_cost, wd_cost)
return total_cost
else:
label_probs = tf.nn.softmax(fastrcnn_label_logits, name='fastrcnn_all_probs') # #proposal x #Class
anchors = tf.tile(tf.expand_dims(proposal_boxes, 1), [1, config.NUM_CLASS - 1, 1]) # #proposal x #Cat x 4
......
......@@ -115,8 +115,9 @@ class Model(ModelDesc):
costs.append(wd_cost)
add_param_summary(('.*/W', ['histogram'])) # monitor W
self.cost = tf.add_n(costs, name='cost')
add_moving_summary(costs + [wrong, self.cost])
total_cost = tf.add_n(costs, name='cost')
add_moving_summary(costs + [wrong, total_cost])
return total_cost
def _get_optimizer(self):
lr = tf.get_variable('learning_rate', initializer=3e-5, trainable=False)
......
......@@ -165,10 +165,11 @@ class ImageNetModel(ModelDesc):
wd_loss = regularize_cost('.*/W', tf.contrib.layers.l2_regularizer(self.weight_decay),
name='l2_regularize_loss')
add_moving_summary(loss, wd_loss)
self.cost = tf.add_n([loss, wd_loss], name='cost')
total_cost = tf.add_n([loss, wd_loss], name='cost')
else:
self.cost = tf.identity(loss, name='cost')
add_moving_summary(self.cost)
total_cost = tf.identity(loss, name='cost')
add_moving_summary(total_cost)
return total_cost
@abstractmethod
def get_logits(self, image):
......
......@@ -111,8 +111,9 @@ class Model(ModelDesc):
80000, 0.7, True)
wd_cost = tf.multiply(wd_w, regularize_cost('.*/W', tf.nn.l2_loss), name='l2_regularize_loss')
self.cost = tf.add_n([cost, wd_cost], name='cost')
add_moving_summary(wd_cost, self.cost)
total_cost = tf.add_n([cost, wd_cost], name='cost')
add_moving_summary(wd_cost, total_cost)
return total_cost
def _get_optimizer(self):
lr = tf.get_variable('learning_rate', initializer=0.045, trainable=False)
......
......@@ -96,11 +96,12 @@ class Model(ModelDesc):
logits=logits, labels=tf.reshape(nextinput, [-1]))
with tf.control_dependencies(update_state_ops):
self.cost = tf.truediv(tf.reduce_sum(xent_loss),
cost = tf.truediv(tf.reduce_sum(xent_loss),
tf.cast(BATCH, tf.float32), name='cost') # log-perplexity
perpl = tf.exp(self.cost / SEQ_LEN, name='perplexity')
summary.add_moving_summary(perpl, self.cost)
perpl = tf.exp(cost / SEQ_LEN, name='perplexity')
summary.add_moving_summary(perpl, cost)
return cost
def reset_lstm_state(self):
s = self.state
......
......@@ -77,7 +77,7 @@ class ResNet_Cifar(ModelDesc):
# weight decay on all W matrixes. including convolutional layers
wd_cost = tf.multiply(WEIGHT_DECAY, regularize_cost('.*', tf.nn.l2_loss), name='wd_cost')
self.cost = tf.add_n([ce_cost, wd_cost], name='cost')
return tf.add_n([ce_cost, wd_cost], name='cost')
def _get_optimizer(self):
lr = tf.get_variable('learning_rate', initializer=0.1, trainable=False)
......
......@@ -110,7 +110,7 @@ class Model(ModelDesc):
add_moving_summary(cost, wd_cost)
add_param_summary(('.*/W', ['histogram'])) # monitor W
self.cost = tf.add_n([cost, wd_cost], name='cost')
return tf.add_n([cost, wd_cost], name='cost')
def _get_optimizer(self):
lr = tf.get_variable('learning_rate', initializer=0.01, trainable=False)
......
......@@ -64,7 +64,7 @@ class Model(ModelDesc):
loss = compute_loss_and_error(logits, label)
wd_cost = regularize_cost('.*/W', l2_regularizer(1e-4), name='l2_regularize_loss')
add_moving_summary(loss, wd_cost)
self.cost = tf.add_n([loss, wd_cost], name='cost')
return tf.add_n([loss, wd_cost], name='cost')
def _get_optimizer(self):
lr = tf.get_variable('learning_rate', initializer=0.1, trainable=False)
......
......@@ -53,7 +53,7 @@ def saliency_map(output, input, name="saliency_map"):
return tf.identity(saliency_op, name=name)
class Model(tp.ModelDesc):
class Model(tp.ModelDescBase):
def inputs(self):
return [tf.placeholder(tf.float32, (IMAGE_SIZE, IMAGE_SIZE, 3), 'image')]
......
......@@ -253,10 +253,11 @@ class SiameseModel(EmbeddingModel):
# compute the actual loss
cost, pos_dist, neg_dist = contrastive_loss(x, y, label, 5., extra=True, scope="loss")
self.cost = tf.identity(cost, name="cost")
cost = tf.identity(cost, name="cost")
# track these values during training
add_moving_summary(pos_dist, neg_dist, self.cost)
add_moving_summary(pos_dist, neg_dist, cost)
return cost
class CosineModel(SiameseModel):
......@@ -268,8 +269,9 @@ class CosineModel(SiameseModel):
tf.identity(self.embed(inputs[0]), name="emb")
cost = siamese_cosine_loss(x, y, label, scope="loss")
self.cost = tf.identity(cost, name="cost")
add_moving_summary(self.cost)
cost = tf.identity(cost, name="cost")
add_moving_summary(cost)
return cost
class TripletModel(EmbeddingModel):
......@@ -296,8 +298,9 @@ class TripletModel(EmbeddingModel):
cost, pos_dist, neg_dist = self.loss(a, p, n)
self.cost = tf.identity(cost, name="cost")
add_moving_summary(pos_dist, neg_dist, self.cost)
cost = tf.identity(cost, name="cost")
add_moving_summary(pos_dist, neg_dist, cost)
return cost
class SoftTripletModel(TripletModel):
......@@ -333,10 +336,11 @@ class CenterModel(EmbeddingModel):
cls_cost = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label),
name='classification_costs')
self.cost = tf.add(emb_cost, 100 * cls_cost, name="cost")
total_cost = tf.add(emb_cost, 100 * cls_cost, name="cost")
# track these values during training
add_moving_summary(self.cost, cls_cost, emb_cost)
add_moving_summary(total_cost, cls_cost, emb_cost)
return total_cost
def get_config(model, algorithm_name):
......
......@@ -85,7 +85,7 @@ class Model(ModelDesc):
wd_cost = tf.multiply(1e-5, regularize_cost('fc.*/W', tf.nn.l2_loss),
name='regularize_loss')
summary.add_moving_summary(cost, wd_cost)
self.cost = tf.add_n([wd_cost, cost], name='cost')
return tf.add_n([wd_cost, cost], name='cost')
def _get_optimizer(self):
lr = tf.get_variable('learning_rate', initializer=5e-4, trainable=False)
......
......@@ -72,7 +72,7 @@ class Model(ModelDesc):
add_moving_summary(cost, wd_cost)
add_param_summary(('.*/W', ['histogram'])) # monitor W
self.cost = tf.add_n([cost, wd_cost], name='cost')
return tf.add_n([cost, wd_cost], name='cost')
def _get_optimizer(self):
lr = tf.get_variable('learning_rate', initializer=1e-2, trainable=False)
......
......@@ -27,12 +27,9 @@ class Model(ModelDesc):
return [tf.placeholder(tf.float32, (None, IMAGE_SIZE, IMAGE_SIZE), 'input'),
tf.placeholder(tf.int32, (None,), 'label')]
def _build_graph(self, inputs):
def build_graph(self, image, label):
"""This function should build the model which takes the input variables
and define self.cost at the end"""
# inputs contains a list of input variables defined above
image, label = inputs
and return cost at the end"""
# In tensorflow, inputs to convolution function are assumed to be
# NHWC. Add a single channel here.
......@@ -74,11 +71,12 @@ class Model(ModelDesc):
wd_cost = tf.multiply(1e-5,
regularize_cost('fc.*/W', tf.nn.l2_loss),
name='regularize_loss')
self.cost = tf.add_n([wd_cost, cost], name='total_cost')
summary.add_moving_summary(cost, wd_cost, self.cost)
total_cost = tf.add_n([wd_cost, cost], name='total_cost')
summary.add_moving_summary(cost, wd_cost, total_cost)
# monitor histogram of all weight (of conv and fc layers) in tensorboard
summary.add_param_summary(('.*/W', ['histogram', 'rms']))
return total_cost
def _get_optimizer(self):
lr = tf.train.exponential_decay(
......
......@@ -32,9 +32,6 @@ class Model(ModelDesc):
tf.placeholder(tf.int32, (None,), 'label')]
def _build_graph(self, inputs):
"""This function should build the model which takes the input variables
and define self.cost at the end"""
# inputs contains a list of input variables defined above
image, label = inputs
......@@ -77,11 +74,12 @@ class Model(ModelDesc):
wd_cost = tf.multiply(1e-5,
regularize_cost('fc.*/kernel', tf.nn.l2_loss),
name='regularize_loss')
self.cost = tf.add_n([wd_cost, cost], name='total_cost')
summary.add_moving_summary(cost, wd_cost, self.cost)
total_cost = tf.add_n([wd_cost, cost], name='total_cost')
summary.add_moving_summary(cost, wd_cost, total_cost)
# monitor histogram of all weight (of conv and fc layers) in tensorboard
summary.add_param_summary(('.*/kernel', ['histogram', 'rms']))
return total_cost
def _get_optimizer(self):
lr = tf.train.exponential_decay(
......
......@@ -108,10 +108,7 @@ class Model(ModelDesc):
wd_cost = tf.multiply(1e-5,
regularize_cost('fc.*/W', tf.nn.l2_loss),
name='regularize_loss')
self.cost = tf.add_n([wd_cost, cost], name='total_cost')
summary.add_moving_summary(cost, wd_cost, self.cost, accuracy)
summary.add_param_summary(('.*/W', ['histogram', 'rms']))
return tf.add_n([wd_cost, cost], name='total_cost')
def _get_optimizer(self):
lr = tf.train.exponential_decay(
......
......@@ -56,7 +56,7 @@ class Model(ModelDesc):
add_moving_summary(cost, wd_cost)
add_param_summary(('.*/W', ['histogram', 'rms'])) # monitor W
self.cost = tf.add_n([cost, wd_cost], name='cost')
return tf.add_n([cost, wd_cost], name='cost')
def _get_optimizer(self):
lr = tf.train.exponential_decay(
......
......@@ -177,7 +177,7 @@ class ModelDesc(ModelDescBase):
A ModelDesc with **single cost** and **single optimizer**.
It has the following constraints in addition to :class:`ModelDescBase`:
1. :meth:`build_graph(...)` method should return a cost.
1. :meth:`build_graph(...)` method should return a cost when called under a training context.
The cost will be the final cost to be optimized by the optimizer.
Therefore it should include necessary regularization.
2. Subclass is expected to implement :meth:`optimizer()` method.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment