Commit c33a3ccb authored by Yuxin Wu's avatar Yuxin Wu

update docs

parent caf9ee8d
......@@ -8,7 +8,7 @@ SPHINXPROJ = tensorpack
SOURCEDIR = .
BUILDDIR = build
.PHONY: help Makefile docset
.PHONY: help Makefile docset clean
all: html
......@@ -24,3 +24,6 @@ docset: html
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
html: Makefile
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
clean:
rm -rf build
......@@ -169,7 +169,7 @@ class GraphProfiler(Callback):
class PeakMemoryTracker(Callback):
"""
Track peak memory in each session run, by
:module:`tf.contrib.memory_stats`.
:mod:`tf.contrib.memory_stats`.
It can only be used for GPUs.
"""
def __init__(self, devices=['/gpu:0']):
......
......@@ -173,10 +173,14 @@ class DistributedReplicatedBuilder(DataParallelBuilder):
get_opt_fn (-> tf.train.Optimizer): callable which returns an optimizer
Returns:
tf.Operation: the training op
tf.Operation: the op which sync all the local variables from PS.
(tf.Operation, tf.Operation, tf.Operation):
1. the training op.
2. the op which sync all the local variables from PS.
This op sholud be run before training.
tf.Operation: the op which sync all the local `MODEL_VARIABLES` from PS.
3. the op which sync all the local `MODEL_VARIABLES` from PS.
You can choose how often to run it by yourself.
"""
with override_to_local_variable():
......
......@@ -272,8 +272,11 @@ class SyncMultiGPUReplicatedBuilder(DataParallelBuilder):
get_opt_fn (-> tf.train.Optimizer): callable which returns an optimizer
Returns:
tf.Operation: the training op.
tf.Operation: the op which sync variables from GPU 0 to other GPUs.
(tf.Operation, tf.Operation)
1. the training op.
2. the op which sync variables from GPU 0 to other GPUs.
It has to be run before the training has started.
And you can optionally run it later to sync non-trainable variables.
"""
......
......@@ -25,7 +25,7 @@ from ..callbacks.graph import RunOp
__all__ = ['PlaceholderInput', 'FeedInput', 'DataParallelFeedInput',
'FeedfreeInput',
'QueueInput', 'BatchQueueInput',
'ZMQInput', 'DummyConstantInput', 'TensorInput',
'DummyConstantInput', 'TensorInput',
'TFDatasetInput',
'StagingInputWrapper']
......
......@@ -101,6 +101,10 @@ class InputSource(object):
pass
def setup_done(self):
"""
Returns:
bool: whether :meth:`setup()` has been called.
"""
return self._setup_done
@memoized
......@@ -108,11 +112,12 @@ class InputSource(object):
"""
An InputSource might need some extra maintainance during training,
which is done also through the Callback interface.
This method returns the Callbacks and the return value will be memoized.
This method returns the callbacks and the return value will be memoized.
Returns:
list[Callback]: extra callbacks needed by this InputSource.
"""
assert self.setup_done()
return [CallbackFactory(
before_train=lambda _: self.reset_state())] + self._get_callbacks()
......
......@@ -34,6 +34,7 @@ class TrainLoop(object):
"""
Manage the double for loop.
"""
def __init__(self):
self._epoch_num = 0
self._global_step = 0
......@@ -82,7 +83,7 @@ class TrainLoop(object):
@property
def local_step(self):
"""
The number of (tensorpack) steps that have finished in the current epoch.
The number of steps that have finished in the current epoch.
"""
return self._local_step
......@@ -97,9 +98,12 @@ class Trainer(object):
hooked_sess (tf.train.MonitoredSession): the session with hooks.
monitors (Monitors): the monitors. Other callbacks can use it for logging.
"""
# step attr only available after before_train?
is_chief = True
"""
Whether this process is the chief worker in distributed training.
Only chief worker will run some callbacks.
"""
def __init__(self, config):
"""
......@@ -283,17 +287,22 @@ class Trainer(object):
return ""
def _delegate_attr(name):
def _get_property(name):
"""
Delegate property to self.loop
"""
setattr(Trainer, name, property(
lambda self: getattr(self.loop, name)))
ret = property(
lambda self: getattr(self.loop, name))
try:
ret.__doc__ = getattr(TrainLoop, name).__doc__
except AttributeError:
pass
return ret
for name in ['global_step', 'local_step', 'steps_per_epoch',
'epoch_num', 'starting_epoch', 'max_epoch']:
_delegate_attr(name)
setattr(Trainer, name, _get_property(name))
def launch_train(
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment