Commit 17126868 authored by Yuxin Wu's avatar Yuxin Wu

remove some deprecation in trainers

parent ca0f0bd0
......@@ -5,7 +5,6 @@
import tensorflow as tf
from ..utils.develop import log_deprecated
from ..tfutils.tower import TowerContext, get_current_tower_context
from .input_data import QueueInput, FeedfreeInput
......@@ -21,7 +20,7 @@ class FeedfreeTrainerBase(Trainer):
"""
def build_train_tower(self):
"""
Get input tensors from `self.input_method` and build the graph.
Get input tensors from `self.input_method` and build the forward graph.
"""
def f():
self._input_tensors = self._input_method.get_input_tensors()
......@@ -64,7 +63,7 @@ class SingleCostFeedfreeTrainer(FeedfreeTrainerBase):
def _get_cost_and_grad(self):
""" get the cost and gradient"""
self.build_train_tower()
cost = self.model.get_cost()
cost = self.model.get_cost() # assume single cost
opt = self.config.optimizer
# GATE_NONE faster?
grads = opt.compute_gradients(
......@@ -90,7 +89,8 @@ class SimpleFeedfreeTrainer(SingleCostFeedfreeTrainer):
assert isinstance(self._input_method, FeedfreeInput), self._input_method
super(SimpleFeedfreeTrainer, self).__init__(config)
assert len(self.config.tower) == 1, \
"SimpleFeedfreeTrainer doesn't support multigpu!"
"Got nr_tower={}, but doesn't support multigpu!" \
" Use Sync/AsyncMultiGPUTrainer instead.".format(len(self.config.tower))
def _setup(self):
super(SimpleFeedfreeTrainer, self)._setup()
......@@ -101,7 +101,7 @@ class SimpleFeedfreeTrainer(SingleCostFeedfreeTrainer):
# self.train_op = tf.group(*self._input_tensors)
def QueueInputTrainer(config, input_queue=None, predict_tower=None):
def QueueInputTrainer(config, input_queue=None):
"""
A wrapper trainer which automatically wraps ``config.dataflow`` by a
:class:`QueueInput`.
......@@ -117,14 +117,8 @@ def QueueInputTrainer(config, input_queue=None, predict_tower=None):
else:
assert isinstance(config.data, QueueInput), config.data
# debug
# from tensorpack.train.input_data import StagingInputWrapper, DummyConstantInput
# config.data = StagingInputWrapper(config.data, ['/gpu:0'])
# config.data = DummyConstantInput([[128,224,224,3], [128]])
if predict_tower is not None:
log_deprecated("Argument `predict_tower` in trainer", "Use TrainConfig(predict_tower=...) instead!")
config.predict_tower = predict_tower
assert len(config.tower) == 1, \
"Got nr_tower={}, but QueueInputTrainer doesn't support multigpu!" \
" Use Sync/AsyncMultiGPUTrainer instead.".format(len(config.tower))
return SimpleFeedfreeTrainer(config)
......@@ -293,7 +293,7 @@ class TensorInput(FeedfreeInput):
"""
Args:
get_tensor_fn: a function which returns a list of input tensors
when called.
when called. It will be called under a TowerContext.
size(int): size of this input. Use None to leave it undefined.
"""
self.get_tensor_fn = get_tensor_fn
......
......@@ -63,20 +63,15 @@ class SyncMultiGPUTrainer(MultiGPUTrainer,
from each tower and averages them.
"""
def __init__(self, config, input_queue=None,
average_cost=False):
def __init__(self, config):
"""
Args:
config, input_queue: same as in :class:`QueueInputTrainer`.
average_cost (bool): average the cost (instead of gradients) from
each tower and did backprop only once. This option should make no
difference mathematically, but may affect speed.
config: same as in :class:`QueueInputTrainer`.
"""
if config.dataflow is not None:
# use queueinput by default. May need to avoid this in the future (when more input type is available)
self._input_method = QueueInput(config.dataflow, input_queue)
self._input_method = QueueInput(config.dataflow)
else:
assert input_queue is None, input_queue
self._input_method = config.data
assert len(config.tower) >= 1, "MultiGPUTrainer must be used with at least one tower."
......@@ -89,7 +84,6 @@ class SyncMultiGPUTrainer(MultiGPUTrainer,
self._input_method = StagingInputWrapper(self._input_method, devices)
super(SyncMultiGPUTrainer, self).__init__(config)
self.average_cost = average_cost
@staticmethod
def _average_grads(tower_grads):
......@@ -117,7 +111,7 @@ class SyncMultiGPUTrainer(MultiGPUTrainer,
def _setup(self):
super(SyncMultiGPUTrainer, self)._setup()
if not self.average_cost:
grad_list = MultiGPUTrainer.multi_tower_grads(
self.config.tower, lambda: self._get_cost_and_grad()[1])
......@@ -128,21 +122,7 @@ class SyncMultiGPUTrainer(MultiGPUTrainer,
grads = SyncMultiGPUTrainer._average_grads(grad_list)
# grads = grad_list[0]
else:
def get_cost():
self.build_train_tower()
return self.model.get_cost()
cost_list = MultiGPUTrainer.multi_tower_costs(
self.config.tower, get_cost)
cost = tf.multiply(tf.add_n(cost_list), 1.0 / len(cost_list),
name='averaged_cost')
opt = self.config.optimizer
grads = opt.compute_gradients(
cost,
gate_gradients=tf.train.Optimizer.GATE_NONE,
colocate_gradients_with_ops=True)
self.train_op = self.config.optimizer.apply_gradients(grads, name='min_op')
......@@ -154,19 +134,17 @@ class AsyncMultiGPUTrainer(MultiGPUTrainer,
"""
def __init__(self, config,
input_queue=None,
scale_gradient=True):
"""
Args:
config, input_queue: same as in :class:`QueueInputTrainer`.
config: same as in :class:`QueueInputTrainer`.
scale_gradient (bool): if True, will scale each gradient by
``1.0/nr_tower``, to make Async and Sync Trainer have the same
effective learning rate.
"""
if config.dataflow is not None:
self._input_method = QueueInput(config.dataflow, input_queue)
self._input_method = QueueInput(config.dataflow)
else:
assert input_queue is None, input_queue
self._input_method = config.data
super(AsyncMultiGPUTrainer, self).__init__(config)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment