Commit a560e667 authored by Yuxin Wu's avatar Yuxin Wu

_get_optimizer() -> optimizer() (#318)

parent ae80945e
......@@ -125,7 +125,7 @@ class Model(ModelDesc):
cost, tf.reduce_mean(importance, name='importance'))
return cost
def _get_optimizer(self):
def optimizer(self):
lr = tf.get_variable('learning_rate', initializer=0.001, trainable=False)
opt = tf.train.AdamOptimizer(lr, epsilon=1e-3)
......
......@@ -70,7 +70,7 @@ class Model(ModelDesc):
summary.add_moving_summary(err, cost)
return cost
def _get_optimizer(self):
def optimizer(self):
lr = tf.get_variable('learning_rate', initializer=5e-3, trainable=False)
opt = tf.train.AdamOptimizer(lr, epsilon=1e-3)
return optimizer.apply_grad_processors(
......
......@@ -107,7 +107,7 @@ class Model(ModelDesc):
summary.add_moving_summary(cost)
return cost
def _get_optimizer(self):
def optimizer(self):
lr = tf.get_variable('learning_rate', initializer=2e-3, trainable=False)
opt = tf.train.AdamOptimizer(lr)
return optimizer.apply_grad_processors(opt, [GlobalNormClip(5)])
......
......@@ -81,7 +81,7 @@ class Model(ModelDesc):
summary.add_moving_summary(cost)
return cost
def _get_optimizer(self):
def optimizer(self):
lr = tf.get_variable('learning_rate', initializer=self.learning_rate, trainable=False)
opt = tf.train.AdamOptimizer(lr, epsilon=1e-3)
return optimizer.apply_grad_processors(
......
......@@ -162,7 +162,7 @@ class Model(ModelDesc):
add_moving_summary(cost, wd_cost, total_cost)
return total_cost
def _get_optimizer(self):
def optimizer(self):
lr = tf.get_variable('learning_rate', initializer=1e-4, trainable=False)
return tf.train.AdamOptimizer(lr, epsilon=1e-5)
......
......@@ -123,7 +123,7 @@ class Model(ModelDesc):
add_moving_summary(cost, wd_cost, total_cost)
return total_cost
def _get_optimizer(self):
def optimizer(self):
lr = tf.train.exponential_decay(
learning_rate=1e-3,
global_step=get_global_step_var(),
......
......@@ -146,7 +146,7 @@ class Model(ModelDesc):
summary.add_moving_summary(cost)
return cost
def _get_optimizer(self):
def optimizer(self):
return tf.train.AdamOptimizer(1e-3)
......
......@@ -216,7 +216,7 @@ class Model(ModelDesc):
final_masks = tf.cond(tf.size(final_probs) > 0, f1, lambda: tf.zeros([0, 14, 14]))
tf.identity(final_masks, name='final_masks')
def _get_optimizer(self):
def optimizer(self):
lr = tf.get_variable('learning_rate', initializer=0.003, trainable=False)
tf.summary.scalar('learning_rate', lr)
......
......@@ -122,7 +122,7 @@ class Model(GANModelDesc):
self.collect_variables()
def _get_optimizer(self):
def optimizer(self):
lr = tf.get_variable('learning_rate', initializer=1e-4, trainable=False)
opt = tf.train.AdamOptimizer(lr, beta1=0.5, beta2=0.9)
return opt
......
......@@ -103,7 +103,7 @@ class Model(GANModelDesc):
self.build_losses(vecpos, vecneg)
self.collect_variables()
def _get_optimizer(self):
def optimizer(self):
return tf.train.AdamOptimizer(2e-4, beta1=0.5, epsilon=1e-3)
......
......@@ -152,7 +152,7 @@ class Model(GANModelDesc):
add_moving_summary(recon_loss_A, recon_loss_B, self.g_loss, self.d_loss)
def _get_optimizer(self):
def optimizer(self):
lr = tf.get_variable('learning_rate', initializer=2e-4, trainable=False)
return tf.train.AdamOptimizer(lr, beta1=0.5, epsilon=1e-3)
......
......@@ -94,7 +94,7 @@ class Model(GANModelDesc):
self.build_losses(vecpos, vecneg)
self.collect_variables()
def _get_optimizer(self):
def optimizer(self):
lr = tf.get_variable('learning_rate', initializer=2e-4, trainable=False)
return tf.train.AdamOptimizer(lr, beta1=0.5, epsilon=1e-3)
......
......@@ -147,7 +147,7 @@ class Model(GANModelDesc):
add_moving_summary(recon_loss_A, recon_loss_B, rate, g_loss, d_loss, wd_g, wd_d)
def _get_optimizer(self):
def optimizer(self):
return tf.train.AdamOptimizer(2e-4, beta1=0.5, epsilon=1e-3)
......
......@@ -70,7 +70,7 @@ class GANModelDesc(ModelDescBase):
@memoized
def get_optimizer(self):
return self._get_optimizer()
return self.optimizer()
class GANTrainer(TowerTrainer):
......
......@@ -143,7 +143,7 @@ class Model(GANModelDesc):
self.collect_variables()
def _get_optimizer(self):
def optimizer(self):
lr = tf.get_variable('learning_rate', initializer=2e-4, trainable=False)
return tf.train.AdamOptimizer(lr, beta1=0.5, epsilon=1e-3)
......
......@@ -77,7 +77,7 @@ class Model(DCGAN.Model):
self.collect_variables()
def _get_optimizer(self):
def optimizer(self):
opt = tf.train.AdamOptimizer(1e-4, beta1=0.5, beta2=0.9)
return opt
......
......@@ -201,7 +201,7 @@ class Model(GANModelDesc):
# distinguish between variables of generator and discriminator updates
self.collect_variables()
def _get_optimizer(self):
def optimizer(self):
lr = tf.get_variable('learning_rate', initializer=2e-4, dtype=tf.float32, trainable=False)
opt = tf.train.AdamOptimizer(lr, beta1=0.5, epsilon=1e-6)
# generator learns 5 times faster
......
......@@ -28,7 +28,7 @@ class Model(DCGAN.Model):
self.g_loss = tf.negative(tf.reduce_mean(vecneg), name='g_loss')
add_moving_summary(self.d_loss, self.g_loss)
def _get_optimizer(self):
def optimizer(self):
opt = tf.train.RMSPropOptimizer(1e-4)
return opt
......
......@@ -118,7 +118,7 @@ class Model(ModelDesc):
add_moving_summary(costs + [wrong, total_cost])
return total_cost
def _get_optimizer(self):
def optimizer(self):
lr = tf.get_variable('learning_rate', initializer=3e-5, trainable=False)
opt = tf.train.AdamOptimizer(lr, epsilon=1e-3)
return optimizer.apply_grad_processors(
......
......@@ -180,7 +180,7 @@ class ImageNetModel(ModelDesc):
Nx1000 logits
"""
def _get_optimizer(self):
def optimizer(self):
lr = tf.get_variable('learning_rate', initializer=0.1, trainable=False)
tf.summary.scalar('learning_rate-summary', lr)
return tf.train.MomentumOptimizer(lr, 0.9, use_nesterov=True)
......
......@@ -114,7 +114,7 @@ class Model(ModelDesc):
add_moving_summary(wd_cost, total_cost)
return total_cost
def _get_optimizer(self):
def optimizer(self):
lr = tf.get_variable('learning_rate', initializer=0.045, trainable=False)
return tf.train.MomentumOptimizer(lr, 0.9)
......
......@@ -111,7 +111,7 @@ class Model(ModelDesc):
ops.append(s[k].h.assign(z))
return tf.group(*ops, name='reset_lstm_state')
def _get_optimizer(self):
def optimizer(self):
lr = tf.get_variable('learning_rate', initializer=1.0, trainable=False)
opt = tf.train.GradientDescentOptimizer(lr)
return optimizer.apply_grad_processors(
......
......@@ -78,7 +78,7 @@ class ResNet_Cifar(ModelDesc):
return tf.add_n([ce_cost, wd_cost], name='cost')
def _get_optimizer(self):
def optimizer(self):
lr = tf.get_variable('learning_rate', initializer=0.1, trainable=False)
opt = tf.train.MomentumOptimizer(lr, 0.9)
return opt
......
......@@ -111,7 +111,7 @@ class Model(ModelDesc):
add_param_summary(('.*/W', ['histogram'])) # monitor W
return tf.add_n([cost, wd_cost], name='cost')
def _get_optimizer(self):
def optimizer(self):
lr = tf.get_variable('learning_rate', initializer=0.01, trainable=False)
opt = tf.train.MomentumOptimizer(lr, 0.9)
return opt
......
......@@ -65,7 +65,7 @@ class Model(ModelDesc):
add_moving_summary(loss, wd_cost)
return tf.add_n([loss, wd_cost], name='cost')
def _get_optimizer(self):
def optimizer(self):
lr = tf.get_variable('learning_rate', initializer=0.1, trainable=False)
opt = tf.train.MomentumOptimizer(lr, 0.9, use_nesterov=True)
gradprocs = [gradproc.ScaleGradient(
......
......@@ -224,7 +224,7 @@ class EmbeddingModel(ModelDesc):
return embeddings
def _get_optimizer(self):
def optimizer(self):
lr = tf.get_variable('learning_rate', initializer=1e-4, trainable=False)
return tf.train.GradientDescentOptimizer(lr)
......
......@@ -85,7 +85,7 @@ class Model(ModelDesc):
summary.add_moving_summary(cost, wd_cost)
return tf.add_n([wd_cost, cost], name='cost')
def _get_optimizer(self):
def optimizer(self):
lr = tf.get_variable('learning_rate', initializer=5e-4, trainable=False)
opt = tf.train.AdamOptimizer(lr, epsilon=1e-3)
return optimizer.apply_grad_processors(
......
......@@ -211,7 +211,7 @@ class Model(GANModelDesc):
self.collect_variables()
def _get_optimizer(self):
def optimizer(self):
lr = tf.get_variable(
'learning_rate', initializer=1e-4, trainable=False)
opt = tf.train.AdamOptimizer(lr)
......
......@@ -73,7 +73,7 @@ class Model(ModelDesc):
add_param_summary(('.*/W', ['histogram'])) # monitor W
return tf.add_n([cost, wd_cost], name='cost')
def _get_optimizer(self):
def optimizer(self):
lr = tf.get_variable('learning_rate', initializer=1e-2, trainable=False)
tf.summary.scalar('lr', lr)
return tf.train.AdamOptimizer(lr, epsilon=1e-3)
......
......@@ -78,7 +78,7 @@ class Model(ModelDesc):
summary.add_param_summary(('.*/W', ['histogram', 'rms']))
return total_cost
def _get_optimizer(self):
def optimizer(self):
lr = tf.train.exponential_decay(
learning_rate=1e-3,
global_step=get_global_step_var(),
......
......@@ -78,7 +78,7 @@ class Model(ModelDesc):
summary.add_param_summary(('.*/kernel', ['histogram', 'rms']))
return total_cost
def _get_optimizer(self):
def optimizer(self):
lr = tf.train.exponential_decay(
learning_rate=1e-3,
global_step=get_global_step_var(),
......
......@@ -59,7 +59,7 @@ class Model(ModelDesc):
summary.add_param_summary(('.*/weights', ['histogram', 'rms'])) # slim uses different variable names
return cost + regularize_cost_from_collection()
def _get_optimizer(self):
def optimizer(self):
lr = tf.train.exponential_decay(
learning_rate=1e-3,
global_step=get_global_step_var(),
......
......@@ -108,7 +108,7 @@ class Model(ModelDesc):
name='regularize_loss')
return tf.add_n([wd_cost, cost], name='total_cost')
def _get_optimizer(self):
def optimizer(self):
lr = tf.train.exponential_decay(
learning_rate=1e-3,
global_step=get_global_step_var(),
......
......@@ -56,7 +56,7 @@ class Model(ModelDesc):
add_param_summary(('.*/W', ['histogram', 'rms'])) # monitor W
return tf.add_n([cost, wd_cost], name='cost')
def _get_optimizer(self):
def optimizer(self):
lr = tf.train.exponential_decay(
learning_rate=1e-3,
global_step=get_global_step_var(),
......
......@@ -29,7 +29,7 @@ class Model(ModelDesc):
summary.add_moving_summary(cost)
return cost
def _get_optimizer(self):
def optimizer(self):
lr = tf.get_variable('learning_rate', initializer=5e-3, trainable=False)
return tf.train.AdamOptimizer(lr)
......
......@@ -63,7 +63,7 @@ class Model(ModelDesc):
summary.add_moving_summary(cost, wd_cost)
return cost
def _get_optimizer(self):
def optimizer(self):
lr = tf.train.exponential_decay(
learning_rate=1e-3,
global_step=get_global_step_var(),
......
......@@ -109,10 +109,6 @@ class ModelDescBase(object):
return [InputDesc.from_placeholder(p) for p in inputs]
def _get_inputs(self):
"""
Returns:
a list of :class:`InputDesc`.
"""
raise NotImplementedError()
def inputs(self):
......@@ -207,22 +203,33 @@ class ModelDesc(ModelDescBase):
@memoized
def get_optimizer(self):
"""
Return the memoized optimizer returned by `_get_optimizer`.
Return the memoized optimizer returned by `optimizer()`.
Users of :class:`ModelDesc` will need to implement `_get_optimizer()`,
Users of :class:`ModelDesc` will need to implement `optimizer()`,
which will only be called once per each model.
Returns:
a :class:`tf.train.Optimizer` instance.
"""
try:
return self._get_optimizer()
except NotImplementedError:
pass
return self.optimizer()
def _get_optimizer(self):
raise NotImplementedError()
def optimizer(self):
"""
Returns a `tf.train.Optimizer` instance.
A subclass is expected to implement this method.
"""
raise NotImplementedError()
def _build_graph_get_cost(self, *inputs):
"""
Used by trainers to get the final cost for optimization.
Used internally by trainers to get the final cost for optimization.
"""
ret = self.build_graph(*inputs)
if not get_current_tower_context().is_training:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment