Commit 95bd4af5 authored by Yuxin Wu's avatar Yuxin Wu

self.cost -> return cost (#318)

parent a1e107d9
...@@ -119,13 +119,12 @@ class Model(ModelDesc): ...@@ -119,13 +119,12 @@ class Model(ModelDesc):
advantage = tf.sqrt(tf.reduce_mean(tf.square(advantage)), name='rms_advantage') advantage = tf.sqrt(tf.reduce_mean(tf.square(advantage)), name='rms_advantage')
entropy_beta = tf.get_variable('entropy_beta', shape=[], entropy_beta = tf.get_variable('entropy_beta', shape=[],
initializer=tf.constant_initializer(0.01), trainable=False) initializer=tf.constant_initializer(0.01), trainable=False)
self.cost = tf.add_n([policy_loss, xentropy_loss * entropy_beta, value_loss]) cost = tf.add_n([policy_loss, xentropy_loss * entropy_beta, value_loss])
self.cost = tf.truediv(self.cost, cost = tf.truediv(cost, tf.cast(tf.shape(futurereward)[0], tf.float32), name='cost')
tf.cast(tf.shape(futurereward)[0], tf.float32),
name='cost')
summary.add_moving_summary(policy_loss, xentropy_loss, summary.add_moving_summary(policy_loss, xentropy_loss,
value_loss, pred_reward, advantage, value_loss, pred_reward, advantage,
self.cost, tf.reduce_mean(importance, name='importance')) cost, tf.reduce_mean(importance, name='importance'))
return cost
def _get_optimizer(self): def _get_optimizer(self):
lr = tf.get_variable('learning_rate', initializer=0.001, trainable=False) lr = tf.get_variable('learning_rate', initializer=0.001, trainable=False)
......
...@@ -53,7 +53,7 @@ class Model(ModelDesc): ...@@ -53,7 +53,7 @@ class Model(ModelDesc):
loss = tf.nn.ctc_loss(label, logits, seqlen, time_major=False) loss = tf.nn.ctc_loss(label, logits, seqlen, time_major=False)
self.cost = tf.reduce_mean(loss, name='cost') cost = tf.reduce_mean(loss, name='cost')
logits = tf.transpose(logits, [1, 0, 2]) logits = tf.transpose(logits, [1, 0, 2])
...@@ -68,7 +68,8 @@ class Model(ModelDesc): ...@@ -68,7 +68,8 @@ class Model(ModelDesc):
err = tf.edit_distance(predictions, label, normalize=True) err = tf.edit_distance(predictions, label, normalize=True)
err.set_shape([None]) err.set_shape([None])
err = tf.reduce_mean(err, name='error') err = tf.reduce_mean(err, name='error')
summary.add_moving_summary(err, self.cost) summary.add_moving_summary(err, cost)
return cost
def _get_optimizer(self): def _get_optimizer(self):
lr = tf.get_variable('learning_rate', initializer=5e-3, trainable=False) lr = tf.get_variable('learning_rate', initializer=5e-3, trainable=False)
......
...@@ -104,9 +104,10 @@ class Model(ModelDesc): ...@@ -104,9 +104,10 @@ class Model(ModelDesc):
xent_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( xent_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
logits=logits, labels=tf.reshape(nextinput, [-1])) logits=logits, labels=tf.reshape(nextinput, [-1]))
self.cost = tf.reduce_mean(xent_loss, name='cost') cost = tf.reduce_mean(xent_loss, name='cost')
summary.add_param_summary(('.*/W', ['histogram'])) # monitor histogram of all W summary.add_param_summary(('.*/W', ['histogram'])) # monitor histogram of all W
summary.add_moving_summary(self.cost) summary.add_moving_summary(cost)
return cost
def _get_optimizer(self): def _get_optimizer(self):
lr = tf.get_variable('learning_rate', initializer=2e-3, trainable=False) lr = tf.get_variable('learning_rate', initializer=2e-3, trainable=False)
......
...@@ -75,11 +75,12 @@ class Model(ModelDesc): ...@@ -75,11 +75,12 @@ class Model(ModelDesc):
target = reward + (1.0 - tf.cast(isOver, tf.float32)) * self.gamma * tf.stop_gradient(best_v) target = reward + (1.0 - tf.cast(isOver, tf.float32)) * self.gamma * tf.stop_gradient(best_v)
self.cost = tf.losses.huber_loss( cost = tf.losses.huber_loss(
target, pred_action_value, reduction=tf.losses.Reduction.MEAN) target, pred_action_value, reduction=tf.losses.Reduction.MEAN)
summary.add_param_summary(('conv.*/W', ['histogram', 'rms']), summary.add_param_summary(('conv.*/W', ['histogram', 'rms']),
('fc.*/W', ['histogram', 'rms'])) # monitor all W ('fc.*/W', ['histogram', 'rms'])) # monitor all W
summary.add_moving_summary(self.cost) summary.add_moving_summary(cost)
return cost
def _get_optimizer(self): def _get_optimizer(self):
lr = tf.get_variable('learning_rate', initializer=self.learning_rate, trainable=False) lr = tf.get_variable('learning_rate', initializer=self.learning_rate, trainable=False)
......
...@@ -50,8 +50,7 @@ class Model(mnist_example.Model): ...@@ -50,8 +50,7 @@ class Model(mnist_example.Model):
wd_cost = tf.multiply(1e-5, regularize_cost('fc.*/W', tf.nn.l2_loss), wd_cost = tf.multiply(1e-5, regularize_cost('fc.*/W', tf.nn.l2_loss),
name='regularize_loss') name='regularize_loss')
self.cost = tf.add_n([wd_cost, cost], name='cost') return tf.add_n([wd_cost, cost], name='cost')
add_moving_summary(cost, wd_cost, self.cost)
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -159,8 +159,9 @@ class Model(ModelDesc): ...@@ -159,8 +159,9 @@ class Model(ModelDesc):
wd_cost = regularize_cost('fc.*/W', l2_regularizer(5e-6), name='regularize_cost') wd_cost = regularize_cost('fc.*/W', l2_regularizer(5e-6), name='regularize_cost')
add_param_summary(('.*/W', ['histogram', 'rms'])) add_param_summary(('.*/W', ['histogram', 'rms']))
self.cost = tf.add_n([cost, wd_cost], name='cost') total_cost = tf.add_n([cost, wd_cost], name='cost')
add_moving_summary(cost, wd_cost, self.cost) add_moving_summary(cost, wd_cost, total_cost)
return total_cost
def _get_optimizer(self): def _get_optimizer(self):
lr = tf.get_variable('learning_rate', initializer=1e-4, trainable=False) lr = tf.get_variable('learning_rate', initializer=1e-4, trainable=False)
......
...@@ -120,8 +120,9 @@ class Model(ModelDesc): ...@@ -120,8 +120,9 @@ class Model(ModelDesc):
wd_cost = regularize_cost('fc.*/W', l2_regularizer(1e-7)) wd_cost = regularize_cost('fc.*/W', l2_regularizer(1e-7))
add_param_summary(('.*/W', ['histogram', 'rms'])) add_param_summary(('.*/W', ['histogram', 'rms']))
self.cost = tf.add_n([cost, wd_cost], name='cost') total_cost = tf.add_n([cost, wd_cost], name='cost')
add_moving_summary(cost, wd_cost, self.cost) add_moving_summary(cost, wd_cost, total_cost)
return total_cost
def _get_optimizer(self): def _get_optimizer(self):
lr = tf.train.exponential_decay( lr = tf.train.exponential_decay(
......
...@@ -143,8 +143,9 @@ class Model(ModelDesc): ...@@ -143,8 +143,9 @@ class Model(ModelDesc):
tf.summary.image('pred_gt_filters', filters, max_outputs=20) tf.summary.image('pred_gt_filters', filters, max_outputs=20)
tf.summary.image('pred_gt_images', images, max_outputs=20) tf.summary.image('pred_gt_images', images, max_outputs=20)
self.cost = tf.reduce_mean(tf.squared_difference(pred_image, gt_image), name="cost") cost = tf.reduce_mean(tf.squared_difference(pred_image, gt_image), name="cost")
summary.add_moving_summary(self.cost) summary.add_moving_summary(cost)
return cost
def _get_optimizer(self): def _get_optimizer(self):
return tf.train.AdamOptimizer(1e-3) return tf.train.AdamOptimizer(1e-3)
......
...@@ -180,13 +180,14 @@ class Model(ModelDesc): ...@@ -180,13 +180,14 @@ class Model(ModelDesc):
'(?:group1|group2|group3|rpn|fastrcnn|maskrcnn)/.*W', '(?:group1|group2|group3|rpn|fastrcnn|maskrcnn)/.*W',
l2_regularizer(1e-4), name='wd_cost') l2_regularizer(1e-4), name='wd_cost')
self.cost = tf.add_n([ total_cost = tf.add_n([
rpn_label_loss, rpn_box_loss, rpn_label_loss, rpn_box_loss,
fastrcnn_label_loss, fastrcnn_box_loss, fastrcnn_label_loss, fastrcnn_box_loss,
mrcnn_loss, mrcnn_loss,
wd_cost], 'total_cost') wd_cost], 'total_cost')
add_moving_summary(self.cost, wd_cost) add_moving_summary(total_cost, wd_cost)
return total_cost
else: else:
label_probs = tf.nn.softmax(fastrcnn_label_logits, name='fastrcnn_all_probs') # #proposal x #Class label_probs = tf.nn.softmax(fastrcnn_label_logits, name='fastrcnn_all_probs') # #proposal x #Class
anchors = tf.tile(tf.expand_dims(proposal_boxes, 1), [1, config.NUM_CLASS - 1, 1]) # #proposal x #Cat x 4 anchors = tf.tile(tf.expand_dims(proposal_boxes, 1), [1, config.NUM_CLASS - 1, 1]) # #proposal x #Cat x 4
......
...@@ -115,8 +115,9 @@ class Model(ModelDesc): ...@@ -115,8 +115,9 @@ class Model(ModelDesc):
costs.append(wd_cost) costs.append(wd_cost)
add_param_summary(('.*/W', ['histogram'])) # monitor W add_param_summary(('.*/W', ['histogram'])) # monitor W
self.cost = tf.add_n(costs, name='cost') total_cost = tf.add_n(costs, name='cost')
add_moving_summary(costs + [wrong, self.cost]) add_moving_summary(costs + [wrong, total_cost])
return total_cost
def _get_optimizer(self): def _get_optimizer(self):
lr = tf.get_variable('learning_rate', initializer=3e-5, trainable=False) lr = tf.get_variable('learning_rate', initializer=3e-5, trainable=False)
......
...@@ -165,10 +165,11 @@ class ImageNetModel(ModelDesc): ...@@ -165,10 +165,11 @@ class ImageNetModel(ModelDesc):
wd_loss = regularize_cost('.*/W', tf.contrib.layers.l2_regularizer(self.weight_decay), wd_loss = regularize_cost('.*/W', tf.contrib.layers.l2_regularizer(self.weight_decay),
name='l2_regularize_loss') name='l2_regularize_loss')
add_moving_summary(loss, wd_loss) add_moving_summary(loss, wd_loss)
self.cost = tf.add_n([loss, wd_loss], name='cost') total_cost = tf.add_n([loss, wd_loss], name='cost')
else: else:
self.cost = tf.identity(loss, name='cost') total_cost = tf.identity(loss, name='cost')
add_moving_summary(self.cost) add_moving_summary(total_cost)
return total_cost
@abstractmethod @abstractmethod
def get_logits(self, image): def get_logits(self, image):
......
...@@ -111,8 +111,9 @@ class Model(ModelDesc): ...@@ -111,8 +111,9 @@ class Model(ModelDesc):
80000, 0.7, True) 80000, 0.7, True)
wd_cost = tf.multiply(wd_w, regularize_cost('.*/W', tf.nn.l2_loss), name='l2_regularize_loss') wd_cost = tf.multiply(wd_w, regularize_cost('.*/W', tf.nn.l2_loss), name='l2_regularize_loss')
self.cost = tf.add_n([cost, wd_cost], name='cost') total_cost = tf.add_n([cost, wd_cost], name='cost')
add_moving_summary(wd_cost, self.cost) add_moving_summary(wd_cost, total_cost)
return total_cost
def _get_optimizer(self): def _get_optimizer(self):
lr = tf.get_variable('learning_rate', initializer=0.045, trainable=False) lr = tf.get_variable('learning_rate', initializer=0.045, trainable=False)
......
...@@ -96,11 +96,12 @@ class Model(ModelDesc): ...@@ -96,11 +96,12 @@ class Model(ModelDesc):
logits=logits, labels=tf.reshape(nextinput, [-1])) logits=logits, labels=tf.reshape(nextinput, [-1]))
with tf.control_dependencies(update_state_ops): with tf.control_dependencies(update_state_ops):
self.cost = tf.truediv(tf.reduce_sum(xent_loss), cost = tf.truediv(tf.reduce_sum(xent_loss),
tf.cast(BATCH, tf.float32), name='cost') # log-perplexity tf.cast(BATCH, tf.float32), name='cost') # log-perplexity
perpl = tf.exp(self.cost / SEQ_LEN, name='perplexity') perpl = tf.exp(cost / SEQ_LEN, name='perplexity')
summary.add_moving_summary(perpl, self.cost) summary.add_moving_summary(perpl, cost)
return cost
def reset_lstm_state(self): def reset_lstm_state(self):
s = self.state s = self.state
......
...@@ -77,7 +77,7 @@ class ResNet_Cifar(ModelDesc): ...@@ -77,7 +77,7 @@ class ResNet_Cifar(ModelDesc):
# weight decay on all W matrixes. including convolutional layers # weight decay on all W matrixes. including convolutional layers
wd_cost = tf.multiply(WEIGHT_DECAY, regularize_cost('.*', tf.nn.l2_loss), name='wd_cost') wd_cost = tf.multiply(WEIGHT_DECAY, regularize_cost('.*', tf.nn.l2_loss), name='wd_cost')
self.cost = tf.add_n([ce_cost, wd_cost], name='cost') return tf.add_n([ce_cost, wd_cost], name='cost')
def _get_optimizer(self): def _get_optimizer(self):
lr = tf.get_variable('learning_rate', initializer=0.1, trainable=False) lr = tf.get_variable('learning_rate', initializer=0.1, trainable=False)
......
...@@ -110,7 +110,7 @@ class Model(ModelDesc): ...@@ -110,7 +110,7 @@ class Model(ModelDesc):
add_moving_summary(cost, wd_cost) add_moving_summary(cost, wd_cost)
add_param_summary(('.*/W', ['histogram'])) # monitor W add_param_summary(('.*/W', ['histogram'])) # monitor W
self.cost = tf.add_n([cost, wd_cost], name='cost') return tf.add_n([cost, wd_cost], name='cost')
def _get_optimizer(self): def _get_optimizer(self):
lr = tf.get_variable('learning_rate', initializer=0.01, trainable=False) lr = tf.get_variable('learning_rate', initializer=0.01, trainable=False)
......
...@@ -64,7 +64,7 @@ class Model(ModelDesc): ...@@ -64,7 +64,7 @@ class Model(ModelDesc):
loss = compute_loss_and_error(logits, label) loss = compute_loss_and_error(logits, label)
wd_cost = regularize_cost('.*/W', l2_regularizer(1e-4), name='l2_regularize_loss') wd_cost = regularize_cost('.*/W', l2_regularizer(1e-4), name='l2_regularize_loss')
add_moving_summary(loss, wd_cost) add_moving_summary(loss, wd_cost)
self.cost = tf.add_n([loss, wd_cost], name='cost') return tf.add_n([loss, wd_cost], name='cost')
def _get_optimizer(self): def _get_optimizer(self):
lr = tf.get_variable('learning_rate', initializer=0.1, trainable=False) lr = tf.get_variable('learning_rate', initializer=0.1, trainable=False)
......
...@@ -53,7 +53,7 @@ def saliency_map(output, input, name="saliency_map"): ...@@ -53,7 +53,7 @@ def saliency_map(output, input, name="saliency_map"):
return tf.identity(saliency_op, name=name) return tf.identity(saliency_op, name=name)
class Model(tp.ModelDesc): class Model(tp.ModelDescBase):
def inputs(self): def inputs(self):
return [tf.placeholder(tf.float32, (IMAGE_SIZE, IMAGE_SIZE, 3), 'image')] return [tf.placeholder(tf.float32, (IMAGE_SIZE, IMAGE_SIZE, 3), 'image')]
......
...@@ -253,10 +253,11 @@ class SiameseModel(EmbeddingModel): ...@@ -253,10 +253,11 @@ class SiameseModel(EmbeddingModel):
# compute the actual loss # compute the actual loss
cost, pos_dist, neg_dist = contrastive_loss(x, y, label, 5., extra=True, scope="loss") cost, pos_dist, neg_dist = contrastive_loss(x, y, label, 5., extra=True, scope="loss")
self.cost = tf.identity(cost, name="cost") cost = tf.identity(cost, name="cost")
# track these values during training # track these values during training
add_moving_summary(pos_dist, neg_dist, self.cost) add_moving_summary(pos_dist, neg_dist, cost)
return cost
class CosineModel(SiameseModel): class CosineModel(SiameseModel):
...@@ -268,8 +269,9 @@ class CosineModel(SiameseModel): ...@@ -268,8 +269,9 @@ class CosineModel(SiameseModel):
tf.identity(self.embed(inputs[0]), name="emb") tf.identity(self.embed(inputs[0]), name="emb")
cost = siamese_cosine_loss(x, y, label, scope="loss") cost = siamese_cosine_loss(x, y, label, scope="loss")
self.cost = tf.identity(cost, name="cost") cost = tf.identity(cost, name="cost")
add_moving_summary(self.cost) add_moving_summary(cost)
return cost
class TripletModel(EmbeddingModel): class TripletModel(EmbeddingModel):
...@@ -296,8 +298,9 @@ class TripletModel(EmbeddingModel): ...@@ -296,8 +298,9 @@ class TripletModel(EmbeddingModel):
cost, pos_dist, neg_dist = self.loss(a, p, n) cost, pos_dist, neg_dist = self.loss(a, p, n)
self.cost = tf.identity(cost, name="cost") cost = tf.identity(cost, name="cost")
add_moving_summary(pos_dist, neg_dist, self.cost) add_moving_summary(pos_dist, neg_dist, cost)
return cost
class SoftTripletModel(TripletModel): class SoftTripletModel(TripletModel):
...@@ -333,10 +336,11 @@ class CenterModel(EmbeddingModel): ...@@ -333,10 +336,11 @@ class CenterModel(EmbeddingModel):
cls_cost = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label), cls_cost = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label),
name='classification_costs') name='classification_costs')
self.cost = tf.add(emb_cost, 100 * cls_cost, name="cost") total_cost = tf.add(emb_cost, 100 * cls_cost, name="cost")
# track these values during training # track these values during training
add_moving_summary(self.cost, cls_cost, emb_cost) add_moving_summary(total_cost, cls_cost, emb_cost)
return total_cost
def get_config(model, algorithm_name): def get_config(model, algorithm_name):
......
...@@ -85,7 +85,7 @@ class Model(ModelDesc): ...@@ -85,7 +85,7 @@ class Model(ModelDesc):
wd_cost = tf.multiply(1e-5, regularize_cost('fc.*/W', tf.nn.l2_loss), wd_cost = tf.multiply(1e-5, regularize_cost('fc.*/W', tf.nn.l2_loss),
name='regularize_loss') name='regularize_loss')
summary.add_moving_summary(cost, wd_cost) summary.add_moving_summary(cost, wd_cost)
self.cost = tf.add_n([wd_cost, cost], name='cost') return tf.add_n([wd_cost, cost], name='cost')
def _get_optimizer(self): def _get_optimizer(self):
lr = tf.get_variable('learning_rate', initializer=5e-4, trainable=False) lr = tf.get_variable('learning_rate', initializer=5e-4, trainable=False)
......
...@@ -72,7 +72,7 @@ class Model(ModelDesc): ...@@ -72,7 +72,7 @@ class Model(ModelDesc):
add_moving_summary(cost, wd_cost) add_moving_summary(cost, wd_cost)
add_param_summary(('.*/W', ['histogram'])) # monitor W add_param_summary(('.*/W', ['histogram'])) # monitor W
self.cost = tf.add_n([cost, wd_cost], name='cost') return tf.add_n([cost, wd_cost], name='cost')
def _get_optimizer(self): def _get_optimizer(self):
lr = tf.get_variable('learning_rate', initializer=1e-2, trainable=False) lr = tf.get_variable('learning_rate', initializer=1e-2, trainable=False)
......
...@@ -27,12 +27,9 @@ class Model(ModelDesc): ...@@ -27,12 +27,9 @@ class Model(ModelDesc):
return [tf.placeholder(tf.float32, (None, IMAGE_SIZE, IMAGE_SIZE), 'input'), return [tf.placeholder(tf.float32, (None, IMAGE_SIZE, IMAGE_SIZE), 'input'),
tf.placeholder(tf.int32, (None,), 'label')] tf.placeholder(tf.int32, (None,), 'label')]
def _build_graph(self, inputs): def build_graph(self, image, label):
"""This function should build the model which takes the input variables """This function should build the model which takes the input variables
and define self.cost at the end""" and return cost at the end"""
# inputs contains a list of input variables defined above
image, label = inputs
# In tensorflow, inputs to convolution function are assumed to be # In tensorflow, inputs to convolution function are assumed to be
# NHWC. Add a single channel here. # NHWC. Add a single channel here.
...@@ -74,11 +71,12 @@ class Model(ModelDesc): ...@@ -74,11 +71,12 @@ class Model(ModelDesc):
wd_cost = tf.multiply(1e-5, wd_cost = tf.multiply(1e-5,
regularize_cost('fc.*/W', tf.nn.l2_loss), regularize_cost('fc.*/W', tf.nn.l2_loss),
name='regularize_loss') name='regularize_loss')
self.cost = tf.add_n([wd_cost, cost], name='total_cost') total_cost = tf.add_n([wd_cost, cost], name='total_cost')
summary.add_moving_summary(cost, wd_cost, self.cost) summary.add_moving_summary(cost, wd_cost, total_cost)
# monitor histogram of all weight (of conv and fc layers) in tensorboard # monitor histogram of all weight (of conv and fc layers) in tensorboard
summary.add_param_summary(('.*/W', ['histogram', 'rms'])) summary.add_param_summary(('.*/W', ['histogram', 'rms']))
return total_cost
def _get_optimizer(self): def _get_optimizer(self):
lr = tf.train.exponential_decay( lr = tf.train.exponential_decay(
......
...@@ -32,9 +32,6 @@ class Model(ModelDesc): ...@@ -32,9 +32,6 @@ class Model(ModelDesc):
tf.placeholder(tf.int32, (None,), 'label')] tf.placeholder(tf.int32, (None,), 'label')]
def _build_graph(self, inputs): def _build_graph(self, inputs):
"""This function should build the model which takes the input variables
and define self.cost at the end"""
# inputs contains a list of input variables defined above # inputs contains a list of input variables defined above
image, label = inputs image, label = inputs
...@@ -77,11 +74,12 @@ class Model(ModelDesc): ...@@ -77,11 +74,12 @@ class Model(ModelDesc):
wd_cost = tf.multiply(1e-5, wd_cost = tf.multiply(1e-5,
regularize_cost('fc.*/kernel', tf.nn.l2_loss), regularize_cost('fc.*/kernel', tf.nn.l2_loss),
name='regularize_loss') name='regularize_loss')
self.cost = tf.add_n([wd_cost, cost], name='total_cost') total_cost = tf.add_n([wd_cost, cost], name='total_cost')
summary.add_moving_summary(cost, wd_cost, self.cost) summary.add_moving_summary(cost, wd_cost, total_cost)
# monitor histogram of all weight (of conv and fc layers) in tensorboard # monitor histogram of all weight (of conv and fc layers) in tensorboard
summary.add_param_summary(('.*/kernel', ['histogram', 'rms'])) summary.add_param_summary(('.*/kernel', ['histogram', 'rms']))
return total_cost
def _get_optimizer(self): def _get_optimizer(self):
lr = tf.train.exponential_decay( lr = tf.train.exponential_decay(
......
...@@ -108,10 +108,7 @@ class Model(ModelDesc): ...@@ -108,10 +108,7 @@ class Model(ModelDesc):
wd_cost = tf.multiply(1e-5, wd_cost = tf.multiply(1e-5,
regularize_cost('fc.*/W', tf.nn.l2_loss), regularize_cost('fc.*/W', tf.nn.l2_loss),
name='regularize_loss') name='regularize_loss')
self.cost = tf.add_n([wd_cost, cost], name='total_cost') return tf.add_n([wd_cost, cost], name='total_cost')
summary.add_moving_summary(cost, wd_cost, self.cost, accuracy)
summary.add_param_summary(('.*/W', ['histogram', 'rms']))
def _get_optimizer(self): def _get_optimizer(self):
lr = tf.train.exponential_decay( lr = tf.train.exponential_decay(
......
...@@ -56,7 +56,7 @@ class Model(ModelDesc): ...@@ -56,7 +56,7 @@ class Model(ModelDesc):
add_moving_summary(cost, wd_cost) add_moving_summary(cost, wd_cost)
add_param_summary(('.*/W', ['histogram', 'rms'])) # monitor W add_param_summary(('.*/W', ['histogram', 'rms'])) # monitor W
self.cost = tf.add_n([cost, wd_cost], name='cost') return tf.add_n([cost, wd_cost], name='cost')
def _get_optimizer(self): def _get_optimizer(self):
lr = tf.train.exponential_decay( lr = tf.train.exponential_decay(
......
...@@ -177,7 +177,7 @@ class ModelDesc(ModelDescBase): ...@@ -177,7 +177,7 @@ class ModelDesc(ModelDescBase):
A ModelDesc with **single cost** and **single optimizer**. A ModelDesc with **single cost** and **single optimizer**.
It has the following constraints in addition to :class:`ModelDescBase`: It has the following constraints in addition to :class:`ModelDescBase`:
1. :meth:`build_graph(...)` method should return a cost. 1. :meth:`build_graph(...)` method should return a cost when called under a training context.
The cost will be the final cost to be optimized by the optimizer. The cost will be the final cost to be optimized by the optimizer.
Therefore it should include necessary regularization. Therefore it should include necessary regularization.
2. Subclass is expected to implement :meth:`optimizer()` method. 2. Subclass is expected to implement :meth:`optimizer()` method.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment