Commit a560e667 authored by Yuxin Wu's avatar Yuxin Wu

_get_optimizer() -> optimizer() (#318)

parent ae80945e
...@@ -125,7 +125,7 @@ class Model(ModelDesc): ...@@ -125,7 +125,7 @@ class Model(ModelDesc):
cost, tf.reduce_mean(importance, name='importance')) cost, tf.reduce_mean(importance, name='importance'))
return cost return cost
def _get_optimizer(self): def optimizer(self):
lr = tf.get_variable('learning_rate', initializer=0.001, trainable=False) lr = tf.get_variable('learning_rate', initializer=0.001, trainable=False)
opt = tf.train.AdamOptimizer(lr, epsilon=1e-3) opt = tf.train.AdamOptimizer(lr, epsilon=1e-3)
......
...@@ -70,7 +70,7 @@ class Model(ModelDesc): ...@@ -70,7 +70,7 @@ class Model(ModelDesc):
summary.add_moving_summary(err, cost) summary.add_moving_summary(err, cost)
return cost return cost
def _get_optimizer(self): def optimizer(self):
lr = tf.get_variable('learning_rate', initializer=5e-3, trainable=False) lr = tf.get_variable('learning_rate', initializer=5e-3, trainable=False)
opt = tf.train.AdamOptimizer(lr, epsilon=1e-3) opt = tf.train.AdamOptimizer(lr, epsilon=1e-3)
return optimizer.apply_grad_processors( return optimizer.apply_grad_processors(
......
...@@ -107,7 +107,7 @@ class Model(ModelDesc): ...@@ -107,7 +107,7 @@ class Model(ModelDesc):
summary.add_moving_summary(cost) summary.add_moving_summary(cost)
return cost return cost
def _get_optimizer(self): def optimizer(self):
lr = tf.get_variable('learning_rate', initializer=2e-3, trainable=False) lr = tf.get_variable('learning_rate', initializer=2e-3, trainable=False)
opt = tf.train.AdamOptimizer(lr) opt = tf.train.AdamOptimizer(lr)
return optimizer.apply_grad_processors(opt, [GlobalNormClip(5)]) return optimizer.apply_grad_processors(opt, [GlobalNormClip(5)])
......
...@@ -81,7 +81,7 @@ class Model(ModelDesc): ...@@ -81,7 +81,7 @@ class Model(ModelDesc):
summary.add_moving_summary(cost) summary.add_moving_summary(cost)
return cost return cost
def _get_optimizer(self): def optimizer(self):
lr = tf.get_variable('learning_rate', initializer=self.learning_rate, trainable=False) lr = tf.get_variable('learning_rate', initializer=self.learning_rate, trainable=False)
opt = tf.train.AdamOptimizer(lr, epsilon=1e-3) opt = tf.train.AdamOptimizer(lr, epsilon=1e-3)
return optimizer.apply_grad_processors( return optimizer.apply_grad_processors(
......
...@@ -162,7 +162,7 @@ class Model(ModelDesc): ...@@ -162,7 +162,7 @@ class Model(ModelDesc):
add_moving_summary(cost, wd_cost, total_cost) add_moving_summary(cost, wd_cost, total_cost)
return total_cost return total_cost
def _get_optimizer(self): def optimizer(self):
lr = tf.get_variable('learning_rate', initializer=1e-4, trainable=False) lr = tf.get_variable('learning_rate', initializer=1e-4, trainable=False)
return tf.train.AdamOptimizer(lr, epsilon=1e-5) return tf.train.AdamOptimizer(lr, epsilon=1e-5)
......
...@@ -123,7 +123,7 @@ class Model(ModelDesc): ...@@ -123,7 +123,7 @@ class Model(ModelDesc):
add_moving_summary(cost, wd_cost, total_cost) add_moving_summary(cost, wd_cost, total_cost)
return total_cost return total_cost
def _get_optimizer(self): def optimizer(self):
lr = tf.train.exponential_decay( lr = tf.train.exponential_decay(
learning_rate=1e-3, learning_rate=1e-3,
global_step=get_global_step_var(), global_step=get_global_step_var(),
......
...@@ -146,7 +146,7 @@ class Model(ModelDesc): ...@@ -146,7 +146,7 @@ class Model(ModelDesc):
summary.add_moving_summary(cost) summary.add_moving_summary(cost)
return cost return cost
def _get_optimizer(self): def optimizer(self):
return tf.train.AdamOptimizer(1e-3) return tf.train.AdamOptimizer(1e-3)
......
...@@ -216,7 +216,7 @@ class Model(ModelDesc): ...@@ -216,7 +216,7 @@ class Model(ModelDesc):
final_masks = tf.cond(tf.size(final_probs) > 0, f1, lambda: tf.zeros([0, 14, 14])) final_masks = tf.cond(tf.size(final_probs) > 0, f1, lambda: tf.zeros([0, 14, 14]))
tf.identity(final_masks, name='final_masks') tf.identity(final_masks, name='final_masks')
def _get_optimizer(self): def optimizer(self):
lr = tf.get_variable('learning_rate', initializer=0.003, trainable=False) lr = tf.get_variable('learning_rate', initializer=0.003, trainable=False)
tf.summary.scalar('learning_rate', lr) tf.summary.scalar('learning_rate', lr)
......
...@@ -122,7 +122,7 @@ class Model(GANModelDesc): ...@@ -122,7 +122,7 @@ class Model(GANModelDesc):
self.collect_variables() self.collect_variables()
def _get_optimizer(self): def optimizer(self):
lr = tf.get_variable('learning_rate', initializer=1e-4, trainable=False) lr = tf.get_variable('learning_rate', initializer=1e-4, trainable=False)
opt = tf.train.AdamOptimizer(lr, beta1=0.5, beta2=0.9) opt = tf.train.AdamOptimizer(lr, beta1=0.5, beta2=0.9)
return opt return opt
......
...@@ -103,7 +103,7 @@ class Model(GANModelDesc): ...@@ -103,7 +103,7 @@ class Model(GANModelDesc):
self.build_losses(vecpos, vecneg) self.build_losses(vecpos, vecneg)
self.collect_variables() self.collect_variables()
def _get_optimizer(self): def optimizer(self):
return tf.train.AdamOptimizer(2e-4, beta1=0.5, epsilon=1e-3) return tf.train.AdamOptimizer(2e-4, beta1=0.5, epsilon=1e-3)
......
...@@ -152,7 +152,7 @@ class Model(GANModelDesc): ...@@ -152,7 +152,7 @@ class Model(GANModelDesc):
add_moving_summary(recon_loss_A, recon_loss_B, self.g_loss, self.d_loss) add_moving_summary(recon_loss_A, recon_loss_B, self.g_loss, self.d_loss)
def _get_optimizer(self): def optimizer(self):
lr = tf.get_variable('learning_rate', initializer=2e-4, trainable=False) lr = tf.get_variable('learning_rate', initializer=2e-4, trainable=False)
return tf.train.AdamOptimizer(lr, beta1=0.5, epsilon=1e-3) return tf.train.AdamOptimizer(lr, beta1=0.5, epsilon=1e-3)
......
...@@ -94,7 +94,7 @@ class Model(GANModelDesc): ...@@ -94,7 +94,7 @@ class Model(GANModelDesc):
self.build_losses(vecpos, vecneg) self.build_losses(vecpos, vecneg)
self.collect_variables() self.collect_variables()
def _get_optimizer(self): def optimizer(self):
lr = tf.get_variable('learning_rate', initializer=2e-4, trainable=False) lr = tf.get_variable('learning_rate', initializer=2e-4, trainable=False)
return tf.train.AdamOptimizer(lr, beta1=0.5, epsilon=1e-3) return tf.train.AdamOptimizer(lr, beta1=0.5, epsilon=1e-3)
......
...@@ -147,7 +147,7 @@ class Model(GANModelDesc): ...@@ -147,7 +147,7 @@ class Model(GANModelDesc):
add_moving_summary(recon_loss_A, recon_loss_B, rate, g_loss, d_loss, wd_g, wd_d) add_moving_summary(recon_loss_A, recon_loss_B, rate, g_loss, d_loss, wd_g, wd_d)
def _get_optimizer(self): def optimizer(self):
return tf.train.AdamOptimizer(2e-4, beta1=0.5, epsilon=1e-3) return tf.train.AdamOptimizer(2e-4, beta1=0.5, epsilon=1e-3)
......
...@@ -70,7 +70,7 @@ class GANModelDesc(ModelDescBase): ...@@ -70,7 +70,7 @@ class GANModelDesc(ModelDescBase):
@memoized @memoized
def get_optimizer(self): def get_optimizer(self):
return self._get_optimizer() return self.optimizer()
class GANTrainer(TowerTrainer): class GANTrainer(TowerTrainer):
......
...@@ -143,7 +143,7 @@ class Model(GANModelDesc): ...@@ -143,7 +143,7 @@ class Model(GANModelDesc):
self.collect_variables() self.collect_variables()
def _get_optimizer(self): def optimizer(self):
lr = tf.get_variable('learning_rate', initializer=2e-4, trainable=False) lr = tf.get_variable('learning_rate', initializer=2e-4, trainable=False)
return tf.train.AdamOptimizer(lr, beta1=0.5, epsilon=1e-3) return tf.train.AdamOptimizer(lr, beta1=0.5, epsilon=1e-3)
......
...@@ -77,7 +77,7 @@ class Model(DCGAN.Model): ...@@ -77,7 +77,7 @@ class Model(DCGAN.Model):
self.collect_variables() self.collect_variables()
def _get_optimizer(self): def optimizer(self):
opt = tf.train.AdamOptimizer(1e-4, beta1=0.5, beta2=0.9) opt = tf.train.AdamOptimizer(1e-4, beta1=0.5, beta2=0.9)
return opt return opt
......
...@@ -201,7 +201,7 @@ class Model(GANModelDesc): ...@@ -201,7 +201,7 @@ class Model(GANModelDesc):
# distinguish between variables of generator and discriminator updates # distinguish between variables of generator and discriminator updates
self.collect_variables() self.collect_variables()
def _get_optimizer(self): def optimizer(self):
lr = tf.get_variable('learning_rate', initializer=2e-4, dtype=tf.float32, trainable=False) lr = tf.get_variable('learning_rate', initializer=2e-4, dtype=tf.float32, trainable=False)
opt = tf.train.AdamOptimizer(lr, beta1=0.5, epsilon=1e-6) opt = tf.train.AdamOptimizer(lr, beta1=0.5, epsilon=1e-6)
# generator learns 5 times faster # generator learns 5 times faster
......
...@@ -28,7 +28,7 @@ class Model(DCGAN.Model): ...@@ -28,7 +28,7 @@ class Model(DCGAN.Model):
self.g_loss = tf.negative(tf.reduce_mean(vecneg), name='g_loss') self.g_loss = tf.negative(tf.reduce_mean(vecneg), name='g_loss')
add_moving_summary(self.d_loss, self.g_loss) add_moving_summary(self.d_loss, self.g_loss)
def _get_optimizer(self): def optimizer(self):
opt = tf.train.RMSPropOptimizer(1e-4) opt = tf.train.RMSPropOptimizer(1e-4)
return opt return opt
......
...@@ -118,7 +118,7 @@ class Model(ModelDesc): ...@@ -118,7 +118,7 @@ class Model(ModelDesc):
add_moving_summary(costs + [wrong, total_cost]) add_moving_summary(costs + [wrong, total_cost])
return total_cost return total_cost
def _get_optimizer(self): def optimizer(self):
lr = tf.get_variable('learning_rate', initializer=3e-5, trainable=False) lr = tf.get_variable('learning_rate', initializer=3e-5, trainable=False)
opt = tf.train.AdamOptimizer(lr, epsilon=1e-3) opt = tf.train.AdamOptimizer(lr, epsilon=1e-3)
return optimizer.apply_grad_processors( return optimizer.apply_grad_processors(
......
...@@ -180,7 +180,7 @@ class ImageNetModel(ModelDesc): ...@@ -180,7 +180,7 @@ class ImageNetModel(ModelDesc):
Nx1000 logits Nx1000 logits
""" """
def _get_optimizer(self): def optimizer(self):
lr = tf.get_variable('learning_rate', initializer=0.1, trainable=False) lr = tf.get_variable('learning_rate', initializer=0.1, trainable=False)
tf.summary.scalar('learning_rate-summary', lr) tf.summary.scalar('learning_rate-summary', lr)
return tf.train.MomentumOptimizer(lr, 0.9, use_nesterov=True) return tf.train.MomentumOptimizer(lr, 0.9, use_nesterov=True)
......
...@@ -114,7 +114,7 @@ class Model(ModelDesc): ...@@ -114,7 +114,7 @@ class Model(ModelDesc):
add_moving_summary(wd_cost, total_cost) add_moving_summary(wd_cost, total_cost)
return total_cost return total_cost
def _get_optimizer(self): def optimizer(self):
lr = tf.get_variable('learning_rate', initializer=0.045, trainable=False) lr = tf.get_variable('learning_rate', initializer=0.045, trainable=False)
return tf.train.MomentumOptimizer(lr, 0.9) return tf.train.MomentumOptimizer(lr, 0.9)
......
...@@ -111,7 +111,7 @@ class Model(ModelDesc): ...@@ -111,7 +111,7 @@ class Model(ModelDesc):
ops.append(s[k].h.assign(z)) ops.append(s[k].h.assign(z))
return tf.group(*ops, name='reset_lstm_state') return tf.group(*ops, name='reset_lstm_state')
def _get_optimizer(self): def optimizer(self):
lr = tf.get_variable('learning_rate', initializer=1.0, trainable=False) lr = tf.get_variable('learning_rate', initializer=1.0, trainable=False)
opt = tf.train.GradientDescentOptimizer(lr) opt = tf.train.GradientDescentOptimizer(lr)
return optimizer.apply_grad_processors( return optimizer.apply_grad_processors(
......
...@@ -78,7 +78,7 @@ class ResNet_Cifar(ModelDesc): ...@@ -78,7 +78,7 @@ class ResNet_Cifar(ModelDesc):
return tf.add_n([ce_cost, wd_cost], name='cost') return tf.add_n([ce_cost, wd_cost], name='cost')
def _get_optimizer(self): def optimizer(self):
lr = tf.get_variable('learning_rate', initializer=0.1, trainable=False) lr = tf.get_variable('learning_rate', initializer=0.1, trainable=False)
opt = tf.train.MomentumOptimizer(lr, 0.9) opt = tf.train.MomentumOptimizer(lr, 0.9)
return opt return opt
......
...@@ -111,7 +111,7 @@ class Model(ModelDesc): ...@@ -111,7 +111,7 @@ class Model(ModelDesc):
add_param_summary(('.*/W', ['histogram'])) # monitor W add_param_summary(('.*/W', ['histogram'])) # monitor W
return tf.add_n([cost, wd_cost], name='cost') return tf.add_n([cost, wd_cost], name='cost')
def _get_optimizer(self): def optimizer(self):
lr = tf.get_variable('learning_rate', initializer=0.01, trainable=False) lr = tf.get_variable('learning_rate', initializer=0.01, trainable=False)
opt = tf.train.MomentumOptimizer(lr, 0.9) opt = tf.train.MomentumOptimizer(lr, 0.9)
return opt return opt
......
...@@ -65,7 +65,7 @@ class Model(ModelDesc): ...@@ -65,7 +65,7 @@ class Model(ModelDesc):
add_moving_summary(loss, wd_cost) add_moving_summary(loss, wd_cost)
return tf.add_n([loss, wd_cost], name='cost') return tf.add_n([loss, wd_cost], name='cost')
def _get_optimizer(self): def optimizer(self):
lr = tf.get_variable('learning_rate', initializer=0.1, trainable=False) lr = tf.get_variable('learning_rate', initializer=0.1, trainable=False)
opt = tf.train.MomentumOptimizer(lr, 0.9, use_nesterov=True) opt = tf.train.MomentumOptimizer(lr, 0.9, use_nesterov=True)
gradprocs = [gradproc.ScaleGradient( gradprocs = [gradproc.ScaleGradient(
......
...@@ -224,7 +224,7 @@ class EmbeddingModel(ModelDesc): ...@@ -224,7 +224,7 @@ class EmbeddingModel(ModelDesc):
return embeddings return embeddings
def _get_optimizer(self): def optimizer(self):
lr = tf.get_variable('learning_rate', initializer=1e-4, trainable=False) lr = tf.get_variable('learning_rate', initializer=1e-4, trainable=False)
return tf.train.GradientDescentOptimizer(lr) return tf.train.GradientDescentOptimizer(lr)
......
...@@ -85,7 +85,7 @@ class Model(ModelDesc): ...@@ -85,7 +85,7 @@ class Model(ModelDesc):
summary.add_moving_summary(cost, wd_cost) summary.add_moving_summary(cost, wd_cost)
return tf.add_n([wd_cost, cost], name='cost') return tf.add_n([wd_cost, cost], name='cost')
def _get_optimizer(self): def optimizer(self):
lr = tf.get_variable('learning_rate', initializer=5e-4, trainable=False) lr = tf.get_variable('learning_rate', initializer=5e-4, trainable=False)
opt = tf.train.AdamOptimizer(lr, epsilon=1e-3) opt = tf.train.AdamOptimizer(lr, epsilon=1e-3)
return optimizer.apply_grad_processors( return optimizer.apply_grad_processors(
......
...@@ -211,7 +211,7 @@ class Model(GANModelDesc): ...@@ -211,7 +211,7 @@ class Model(GANModelDesc):
self.collect_variables() self.collect_variables()
def _get_optimizer(self): def optimizer(self):
lr = tf.get_variable( lr = tf.get_variable(
'learning_rate', initializer=1e-4, trainable=False) 'learning_rate', initializer=1e-4, trainable=False)
opt = tf.train.AdamOptimizer(lr) opt = tf.train.AdamOptimizer(lr)
......
...@@ -73,7 +73,7 @@ class Model(ModelDesc): ...@@ -73,7 +73,7 @@ class Model(ModelDesc):
add_param_summary(('.*/W', ['histogram'])) # monitor W add_param_summary(('.*/W', ['histogram'])) # monitor W
return tf.add_n([cost, wd_cost], name='cost') return tf.add_n([cost, wd_cost], name='cost')
def _get_optimizer(self): def optimizer(self):
lr = tf.get_variable('learning_rate', initializer=1e-2, trainable=False) lr = tf.get_variable('learning_rate', initializer=1e-2, trainable=False)
tf.summary.scalar('lr', lr) tf.summary.scalar('lr', lr)
return tf.train.AdamOptimizer(lr, epsilon=1e-3) return tf.train.AdamOptimizer(lr, epsilon=1e-3)
......
...@@ -78,7 +78,7 @@ class Model(ModelDesc): ...@@ -78,7 +78,7 @@ class Model(ModelDesc):
summary.add_param_summary(('.*/W', ['histogram', 'rms'])) summary.add_param_summary(('.*/W', ['histogram', 'rms']))
return total_cost return total_cost
def _get_optimizer(self): def optimizer(self):
lr = tf.train.exponential_decay( lr = tf.train.exponential_decay(
learning_rate=1e-3, learning_rate=1e-3,
global_step=get_global_step_var(), global_step=get_global_step_var(),
......
...@@ -78,7 +78,7 @@ class Model(ModelDesc): ...@@ -78,7 +78,7 @@ class Model(ModelDesc):
summary.add_param_summary(('.*/kernel', ['histogram', 'rms'])) summary.add_param_summary(('.*/kernel', ['histogram', 'rms']))
return total_cost return total_cost
def _get_optimizer(self): def optimizer(self):
lr = tf.train.exponential_decay( lr = tf.train.exponential_decay(
learning_rate=1e-3, learning_rate=1e-3,
global_step=get_global_step_var(), global_step=get_global_step_var(),
......
...@@ -59,7 +59,7 @@ class Model(ModelDesc): ...@@ -59,7 +59,7 @@ class Model(ModelDesc):
summary.add_param_summary(('.*/weights', ['histogram', 'rms'])) # slim uses different variable names summary.add_param_summary(('.*/weights', ['histogram', 'rms'])) # slim uses different variable names
return cost + regularize_cost_from_collection() return cost + regularize_cost_from_collection()
def _get_optimizer(self): def optimizer(self):
lr = tf.train.exponential_decay( lr = tf.train.exponential_decay(
learning_rate=1e-3, learning_rate=1e-3,
global_step=get_global_step_var(), global_step=get_global_step_var(),
......
...@@ -108,7 +108,7 @@ class Model(ModelDesc): ...@@ -108,7 +108,7 @@ class Model(ModelDesc):
name='regularize_loss') name='regularize_loss')
return tf.add_n([wd_cost, cost], name='total_cost') return tf.add_n([wd_cost, cost], name='total_cost')
def _get_optimizer(self): def optimizer(self):
lr = tf.train.exponential_decay( lr = tf.train.exponential_decay(
learning_rate=1e-3, learning_rate=1e-3,
global_step=get_global_step_var(), global_step=get_global_step_var(),
......
...@@ -56,7 +56,7 @@ class Model(ModelDesc): ...@@ -56,7 +56,7 @@ class Model(ModelDesc):
add_param_summary(('.*/W', ['histogram', 'rms'])) # monitor W add_param_summary(('.*/W', ['histogram', 'rms'])) # monitor W
return tf.add_n([cost, wd_cost], name='cost') return tf.add_n([cost, wd_cost], name='cost')
def _get_optimizer(self): def optimizer(self):
lr = tf.train.exponential_decay( lr = tf.train.exponential_decay(
learning_rate=1e-3, learning_rate=1e-3,
global_step=get_global_step_var(), global_step=get_global_step_var(),
......
...@@ -29,7 +29,7 @@ class Model(ModelDesc): ...@@ -29,7 +29,7 @@ class Model(ModelDesc):
summary.add_moving_summary(cost) summary.add_moving_summary(cost)
return cost return cost
def _get_optimizer(self): def optimizer(self):
lr = tf.get_variable('learning_rate', initializer=5e-3, trainable=False) lr = tf.get_variable('learning_rate', initializer=5e-3, trainable=False)
return tf.train.AdamOptimizer(lr) return tf.train.AdamOptimizer(lr)
......
...@@ -63,7 +63,7 @@ class Model(ModelDesc): ...@@ -63,7 +63,7 @@ class Model(ModelDesc):
summary.add_moving_summary(cost, wd_cost) summary.add_moving_summary(cost, wd_cost)
return cost return cost
def _get_optimizer(self): def optimizer(self):
lr = tf.train.exponential_decay( lr = tf.train.exponential_decay(
learning_rate=1e-3, learning_rate=1e-3,
global_step=get_global_step_var(), global_step=get_global_step_var(),
......
...@@ -109,10 +109,6 @@ class ModelDescBase(object): ...@@ -109,10 +109,6 @@ class ModelDescBase(object):
return [InputDesc.from_placeholder(p) for p in inputs] return [InputDesc.from_placeholder(p) for p in inputs]
def _get_inputs(self): def _get_inputs(self):
"""
Returns:
a list of :class:`InputDesc`.
"""
raise NotImplementedError() raise NotImplementedError()
def inputs(self): def inputs(self):
...@@ -207,22 +203,33 @@ class ModelDesc(ModelDescBase): ...@@ -207,22 +203,33 @@ class ModelDesc(ModelDescBase):
@memoized @memoized
def get_optimizer(self): def get_optimizer(self):
""" """
Return the memoized optimizer returned by `_get_optimizer`. Return the memoized optimizer returned by `optimizer()`.
Users of :class:`ModelDesc` will need to implement `_get_optimizer()`, Users of :class:`ModelDesc` will need to implement `optimizer()`,
which will only be called once per each model. which will only be called once per each model.
Returns: Returns:
a :class:`tf.train.Optimizer` instance. a :class:`tf.train.Optimizer` instance.
""" """
return self._get_optimizer() try:
return self._get_optimizer()
except NotImplementedError:
pass
return self.optimizer()
def _get_optimizer(self): def _get_optimizer(self):
raise NotImplementedError() raise NotImplementedError()
def optimizer(self):
"""
Returns a `tf.train.Optimizer` instance.
A subclass is expected to implement this method.
"""
raise NotImplementedError()
def _build_graph_get_cost(self, *inputs): def _build_graph_get_cost(self, *inputs):
""" """
Used by trainers to get the final cost for optimization. Used internally by trainers to get the final cost for optimization.
""" """
ret = self.build_graph(*inputs) ret = self.build_graph(*inputs)
if not get_current_tower_context().is_training: if not get_current_tower_context().is_training:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment