Commit 65a9fcc7 authored by Yuxin Wu's avatar Yuxin Wu

misc docs change

parent cfe88a9a
......@@ -364,7 +364,6 @@ def autodoc_skip_member(app, what, name, obj, skip, options):
return False
# hide deprecated stuff
if name in [
'MultiGPUTrainerBase',
'get_predictors',
'GaussianDeform',
'dump_chkpt_vars',
......@@ -375,6 +374,7 @@ def autodoc_skip_member(app, what, name, obj, skip, options):
'LeakyReLU',
'PrefetchOnGPUs',
'PeriodicRunHooks',
'apply_default_prefetch',
'guided_relu', 'saliency_map', 'get_scalar_var', 'psnr',
'prediction_incorrect', 'huber_loss', 'SoftMax'
......
......@@ -29,12 +29,12 @@ You can overwrite any of the following methods to define a new callback:
* `_setup_graph(self)`
Create any ops / tensors in the graph which you might need to use in the callback.
This method is to separate between "define" and "run", and also to
Create any tensors/ops in the graph which you might need to use in the callback.
This method exists to fully separate between "define" and "run", and also to
avoid the common mistake to create ops inside
loops. All changes to the graph should be made in this method.
To access ops which are already defined,
To access tensors/ops which are already defined,
you can use TF methods such as
[`graph.get_tensor_by_name`](https://www.tensorflow.org/api_docs/python/tf/Graph#get_tensor_by_name).
If you're using a `TowerTrainer` instance, more tools are available:
......@@ -66,7 +66,7 @@ You can overwrite any of the following methods to define a new callback:
Please refer to TensorFlow documentation for detailed API.
They are used to run extra ops / eval extra tensors / feed extra values __along with__ the actual training iterations.
Note the difference between running __along with__ an iteration and running after an iteration.
__IMPORTANT__ Note the difference between running __along with__ an iteration and running __after__ an iteration.
When you write
```python
......@@ -76,7 +76,8 @@ You can overwrite any of the following methods to define a new callback:
The training loops would become `sess.run([training_op, my_op])`.
This is different from `sess.run(training_op); sess.run(my_op);`,
which is what you would get if you run the op in `_trigger_step`.
which is what you would get if you run `my_op` in `_trigger_step`.
Sometimes the difference matters, please choose carefully.
* `_trigger_step(self)`
......@@ -93,15 +94,17 @@ You can overwrite any of the following methods to define a new callback:
By default it will get called by `_trigger_epoch`,
but you can customize the scheduling of this method by
[`PeriodicTrigger`](../../modules/callbacks.html#tensorpack.callbacks.PeriodicTrigger),
to let this method run every k steps or every k epochs.
to let this method run every k steps (potentially more frequently) or every k epochs.
### What you can do in the callback
* Access tensors / ops in either training / inference mode (need to create them in `_setup_graph`).
* Access tensors / ops (details mentioned above):
* For existing tensors/ops created in the tower, access them through [self.trainer.towers](../../modules/train.html#tensorpack.train.TowerTrainer.towers).
* Extra tensors/ops have to be created in `_setup_graph` callback method.
* Write stuff to the monitor backend, by `self.trainer.monitors.put_xxx`.
The monitors might direct your events to TensorFlow events file, JSON file, stdout, etc.
You can get history monitor data as well. See the docs for [Monitors](../../modules/callbacks.html#tensorpack.callbacks.Monitors)
* Access the current status of training, such as `epoch_num`, `global_step`. See [here](../../modules/callbacks.html#tensorpack.callbacks.Callback)
You can access history monitor data as well. See the docs for [Monitors](../../modules/callbacks.html#tensorpack.callbacks.Monitors)
* Access the current status of training, such as `self.epoch_num`, `self.global_step`. See [here](../../modules/callbacks.html#tensorpack.callbacks.Callback)
* Stop training by `raise StopTraining()` (with `from tensorpack.train import StopTraining`).
* Anything else that can be done with plain python.
......@@ -114,6 +117,6 @@ You can overwrite any of the following methods to define a new callback:
* You can choose to only implement "what to do", and leave "when to do" to
other wrappers such as
[PeriodicTrigger](../../modules/callbacks.html#tensorpack.callbacks.PeriodicTrigger),
[PeriodicRunHooks](../../modules/callbacks.html#tensorpack.callbacks.PeriodicRunHooks),
[PeriodicCallback](../../modules/callbacks.html#tensorpack.callbacks.PeriodicCallback),
or [EnableCallbackIf](../../modules/callbacks.html#tensorpack.callbacks.EnableCallbackIf).
......@@ -228,8 +228,9 @@ class Callback(object):
return get_op_or_tensor_by_name(name)
except KeyError:
pass
assert isinstance(self.trainer, TowerTrainer), msg
towers = self.trainer.tower_func.towers
if not isinstance(self.trainer, TowerTrainer):
raise KeyError(msg)
towers = self.trainer.towers
try:
return towers.training()[0][name]
except KeyError:
......
......@@ -515,6 +515,7 @@ class StagingInput(FeedfreeInput):
element should be sufficient.
towers: deprecated
device (str or None): if not None, place the StagingArea on a specific device. e.g., '/cpu:0'.
Otherwise, they are placed under where `get_inputs_tensors` gets called.
"""
assert isinstance(input, FeedfreeInput), input
self._input = input
......
......@@ -19,8 +19,8 @@ __all__ = ['get_default_sess_config',
def get_default_sess_config(mem_fraction=0.99):
"""
Return a better session config to use as default.
Tensorflow default session config consume too much resources.
Return a tf.ConfigProto to use as default session config.
You can modify the returned config to fit your needs.
Args:
mem_fraction(float): fraction of memory to use.
......@@ -37,12 +37,17 @@ def get_default_sess_config(mem_fraction=0.99):
# TF benchmark use cpu_count() - gpu_thread_count(), e.g. 80 - 8 * 2
# Didn't see much difference.
conf.gpu_options.per_process_gpu_memory_fraction = mem_fraction
conf.gpu_options.per_process_gpu_memory_fraction = 0.99
if get_tf_version_number() >= 1.2:
conf.gpu_options.force_gpu_compatible = True
conf.gpu_options.allow_growth = True
# from tensorflow.core.protobuf import rewriter_config_pb2 as rwc
# conf.graph_options.rewrite_options.memory_optimization = \
# rwc.RewriterConfig.HEURISTICS
# May hurt performance
# conf.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1
# conf.graph_options.place_pruned_graph = True
......
......@@ -8,6 +8,7 @@ import os
import pprint
import tensorflow as tf
import numpy as np
from ..utils.develop import deprecated
from ..utils import logger
from .common import get_op_tensor_name
......@@ -186,8 +187,8 @@ def load_chkpt_vars(model_path):
return result
@deprecated("Renamed to 'load_chkpt_vars!'", "2018-04-20")
def dump_chkpt_vars(model_path):
logger.warn("dump_chkpt_vars was renamed to load_chkpt_vars!")
return load_chkpt_vars(model_path)
......
......@@ -40,7 +40,6 @@ def apply_default_prefetch(input_source_or_dataflow, trainer):
if len(towers) > 1:
# seem to only improve on >1 GPUs
assert not isinstance(trainer, SimpleTrainer)
assert tf.test.is_gpu_available()
if not isinstance(input, (StagingInput, DummyConstantInput)):
logger.info("Automatically applying StagingInput on the DataFlow.")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment