Commit c33a3ccb authored by Yuxin Wu's avatar Yuxin Wu

update docs

parent caf9ee8d
...@@ -8,7 +8,7 @@ SPHINXPROJ = tensorpack ...@@ -8,7 +8,7 @@ SPHINXPROJ = tensorpack
SOURCEDIR = . SOURCEDIR = .
BUILDDIR = build BUILDDIR = build
.PHONY: help Makefile docset .PHONY: help Makefile docset clean
all: html all: html
...@@ -24,3 +24,6 @@ docset: html ...@@ -24,3 +24,6 @@ docset: html
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
html: Makefile html: Makefile
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
clean:
rm -rf build
...@@ -169,7 +169,7 @@ class GraphProfiler(Callback): ...@@ -169,7 +169,7 @@ class GraphProfiler(Callback):
class PeakMemoryTracker(Callback): class PeakMemoryTracker(Callback):
""" """
Track peak memory in each session run, by Track peak memory in each session run, by
:module:`tf.contrib.memory_stats`. :mod:`tf.contrib.memory_stats`.
It can only be used for GPUs. It can only be used for GPUs.
""" """
def __init__(self, devices=['/gpu:0']): def __init__(self, devices=['/gpu:0']):
......
...@@ -173,10 +173,14 @@ class DistributedReplicatedBuilder(DataParallelBuilder): ...@@ -173,10 +173,14 @@ class DistributedReplicatedBuilder(DataParallelBuilder):
get_opt_fn (-> tf.train.Optimizer): callable which returns an optimizer get_opt_fn (-> tf.train.Optimizer): callable which returns an optimizer
Returns: Returns:
tf.Operation: the training op (tf.Operation, tf.Operation, tf.Operation):
tf.Operation: the op which sync all the local variables from PS.
1. the training op.
2. the op which sync all the local variables from PS.
This op sholud be run before training. This op sholud be run before training.
tf.Operation: the op which sync all the local `MODEL_VARIABLES` from PS.
3. the op which sync all the local `MODEL_VARIABLES` from PS.
You can choose how often to run it by yourself. You can choose how often to run it by yourself.
""" """
with override_to_local_variable(): with override_to_local_variable():
......
...@@ -272,8 +272,11 @@ class SyncMultiGPUReplicatedBuilder(DataParallelBuilder): ...@@ -272,8 +272,11 @@ class SyncMultiGPUReplicatedBuilder(DataParallelBuilder):
get_opt_fn (-> tf.train.Optimizer): callable which returns an optimizer get_opt_fn (-> tf.train.Optimizer): callable which returns an optimizer
Returns: Returns:
tf.Operation: the training op. (tf.Operation, tf.Operation)
tf.Operation: the op which sync variables from GPU 0 to other GPUs.
1. the training op.
2. the op which sync variables from GPU 0 to other GPUs.
It has to be run before the training has started. It has to be run before the training has started.
And you can optionally run it later to sync non-trainable variables. And you can optionally run it later to sync non-trainable variables.
""" """
......
...@@ -25,7 +25,7 @@ from ..callbacks.graph import RunOp ...@@ -25,7 +25,7 @@ from ..callbacks.graph import RunOp
__all__ = ['PlaceholderInput', 'FeedInput', 'DataParallelFeedInput', __all__ = ['PlaceholderInput', 'FeedInput', 'DataParallelFeedInput',
'FeedfreeInput', 'FeedfreeInput',
'QueueInput', 'BatchQueueInput', 'QueueInput', 'BatchQueueInput',
'ZMQInput', 'DummyConstantInput', 'TensorInput', 'DummyConstantInput', 'TensorInput',
'TFDatasetInput', 'TFDatasetInput',
'StagingInputWrapper'] 'StagingInputWrapper']
......
...@@ -101,6 +101,10 @@ class InputSource(object): ...@@ -101,6 +101,10 @@ class InputSource(object):
pass pass
def setup_done(self): def setup_done(self):
"""
Returns:
bool: whether :meth:`setup()` has been called.
"""
return self._setup_done return self._setup_done
@memoized @memoized
...@@ -108,11 +112,12 @@ class InputSource(object): ...@@ -108,11 +112,12 @@ class InputSource(object):
""" """
An InputSource might need some extra maintainance during training, An InputSource might need some extra maintainance during training,
which is done also through the Callback interface. which is done also through the Callback interface.
This method returns the Callbacks and the return value will be memoized. This method returns the callbacks and the return value will be memoized.
Returns: Returns:
list[Callback]: extra callbacks needed by this InputSource. list[Callback]: extra callbacks needed by this InputSource.
""" """
assert self.setup_done()
return [CallbackFactory( return [CallbackFactory(
before_train=lambda _: self.reset_state())] + self._get_callbacks() before_train=lambda _: self.reset_state())] + self._get_callbacks()
......
...@@ -34,6 +34,7 @@ class TrainLoop(object): ...@@ -34,6 +34,7 @@ class TrainLoop(object):
""" """
Manage the double for loop. Manage the double for loop.
""" """
def __init__(self): def __init__(self):
self._epoch_num = 0 self._epoch_num = 0
self._global_step = 0 self._global_step = 0
...@@ -82,7 +83,7 @@ class TrainLoop(object): ...@@ -82,7 +83,7 @@ class TrainLoop(object):
@property @property
def local_step(self): def local_step(self):
""" """
The number of (tensorpack) steps that have finished in the current epoch. The number of steps that have finished in the current epoch.
""" """
return self._local_step return self._local_step
...@@ -97,9 +98,12 @@ class Trainer(object): ...@@ -97,9 +98,12 @@ class Trainer(object):
hooked_sess (tf.train.MonitoredSession): the session with hooks. hooked_sess (tf.train.MonitoredSession): the session with hooks.
monitors (Monitors): the monitors. Other callbacks can use it for logging. monitors (Monitors): the monitors. Other callbacks can use it for logging.
""" """
# step attr only available after before_train?
is_chief = True is_chief = True
"""
Whether this process is the chief worker in distributed training.
Only chief worker will run some callbacks.
"""
def __init__(self, config): def __init__(self, config):
""" """
...@@ -283,17 +287,22 @@ class Trainer(object): ...@@ -283,17 +287,22 @@ class Trainer(object):
return "" return ""
def _delegate_attr(name): def _get_property(name):
""" """
Delegate property to self.loop Delegate property to self.loop
""" """
setattr(Trainer, name, property( ret = property(
lambda self: getattr(self.loop, name))) lambda self: getattr(self.loop, name))
try:
ret.__doc__ = getattr(TrainLoop, name).__doc__
except AttributeError:
pass
return ret
for name in ['global_step', 'local_step', 'steps_per_epoch', for name in ['global_step', 'local_step', 'steps_per_epoch',
'epoch_num', 'starting_epoch', 'max_epoch']: 'epoch_num', 'starting_epoch', 'max_epoch']:
_delegate_attr(name) setattr(Trainer, name, _get_property(name))
def launch_train( def launch_train(
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment