misc docs change

65a9fcc7 · Yuxin Wu · cfe88a9a · 65a9fcc7 · 65a9fcc7 · 65a9fcc7
Commit 65a9fcc7 authored Feb 20, 2018 by Yuxin Wu
7 changed files
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -364,7 +364,6 @@ def autodoc_skip_member(app, what, name, obj, skip, options):
            return False
    # hide deprecated stuff
    if name in [
-        'MultiGPUTrainerBase',
        'get_predictors',
        'GaussianDeform',
        'dump_chkpt_vars',
@@ -375,6 +374,7 @@ def autodoc_skip_member(app, what, name, obj, skip, options):
        'LeakyReLU',
        'PrefetchOnGPUs',
        'PeriodicRunHooks',
+        'apply_default_prefetch',

        'guided_relu', 'saliency_map', 'get_scalar_var', 'psnr',
        'prediction_incorrect', 'huber_loss', 'SoftMax'

--- a/docs/tutorial/extend/callback.md
+++ b/docs/tutorial/extend/callback.md
@@ -29,12 +29,12 @@ You can overwrite any of the following methods to define a new callback:

 * `_setup_graph(self)`

-  Create any ops / tensors in the graph which you might need to use in the callback.
-  This method is to separate between "define" and "run", and also to
+  Create any tensors/ops in the graph which you might need to use in the callback.
+  This method exists to fully separate between "define" and "run", and also to
  avoid the common mistake to create ops inside
  loops. All changes to the graph should be made in this method.

-  To access ops which are already defined,
+  To access tensors/ops which are already defined,
  you can use TF methods such as
  [`graph.get_tensor_by_name`](https://www.tensorflow.org/api_docs/python/tf/Graph#get_tensor_by_name).
  If you're using a `TowerTrainer` instance, more tools are available:
@@ -66,7 +66,7 @@ You can overwrite any of the following methods to define a new callback:
  Please refer to TensorFlow documentation for detailed API.
  They are used to run extra ops / eval extra tensors / feed extra values __along with__ the actual training iterations.

-  Note the difference between running __along with__ an iteration and running after an iteration.
+  __IMPORTANT__ Note the difference between running __along with__ an iteration and running __after__ an iteration.
  When you write

  ```python
@@ -76,7 +76,8 @@ You can overwrite any of the following methods to define a new callback:

  The training loops would become `sess.run([training_op, my_op])`.
  This is different from `sess.run(training_op); sess.run(my_op);`,
-  which is what you would get if you run the op in `_trigger_step`.
+  which is what you would get if you run `my_op` in `_trigger_step`.
+	Sometimes the difference matters, please choose carefully.

 * `_trigger_step(self)`

@@ -93,15 +94,17 @@ You can overwrite any of the following methods to define a new callback:
  By default it will get called by `_trigger_epoch`,
  but you can customize the scheduling of this method by
  [`PeriodicTrigger`](../../modules/callbacks.html#tensorpack.callbacks.PeriodicTrigger),
-  to let this method run every k steps or every k epochs.
+  to let this method run every k steps (potentially more frequently) or every k epochs.

 ### What you can do in the callback

-* Access tensors / ops in either training / inference mode (need to create them in `_setup_graph`).
+* Access tensors / ops (details mentioned above):
+	* For existing tensors/ops created in the tower, access them through [self.trainer.towers](../../modules/train.html#tensorpack.train.TowerTrainer.towers).
+	* Extra tensors/ops have to be created in `_setup_graph` callback method.
 * Write stuff to the monitor backend, by `self.trainer.monitors.put_xxx`.
  The monitors might direct your events to TensorFlow events file, JSON file, stdout, etc.
-  You can get history monitor data as well. See the docs for [Monitors](../../modules/callbacks.html#tensorpack.callbacks.Monitors)
-* Access the current status of training, such as `epoch_num`, `global_step`. See [here](../../modules/callbacks.html#tensorpack.callbacks.Callback)
+  You can access history monitor data as well. See the docs for [Monitors](../../modules/callbacks.html#tensorpack.callbacks.Monitors)
+* Access the current status of training, such as `self.epoch_num`, `self.global_step`. See [here](../../modules/callbacks.html#tensorpack.callbacks.Callback)
 * Stop training by `raise StopTraining()` (with `from tensorpack.train import StopTraining`).
 * Anything else that can be done with plain python.

@@ -114,6 +117,6 @@ You can overwrite any of the following methods to define a new callback:
 * You can choose to only implement "what to do", and leave "when to do" to
  other wrappers such as
  [PeriodicTrigger](../../modules/callbacks.html#tensorpack.callbacks.PeriodicTrigger),
-  [PeriodicRunHooks](../../modules/callbacks.html#tensorpack.callbacks.PeriodicRunHooks),
+  [PeriodicCallback](../../modules/callbacks.html#tensorpack.callbacks.PeriodicCallback),
  or [EnableCallbackIf](../../modules/callbacks.html#tensorpack.callbacks.EnableCallbackIf).

--- a/tensorpack/callbacks/base.py
+++ b/tensorpack/callbacks/base.py
@@ -228,8 +228,9 @@ class Callback(object):
                return get_op_or_tensor_by_name(name)
            except KeyError:
                pass
-            assert isinstance(self.trainer, TowerTrainer), msg
-            towers = self.trainer.tower_func.towers
+            if not isinstance(self.trainer, TowerTrainer):
+                raise KeyError(msg)
+            towers = self.trainer.towers
            try:
                return towers.training()[0][name]
            except KeyError:

--- a/tensorpack/input_source/input_source.py
+++ b/tensorpack/input_source/input_source.py
@@ -515,6 +515,7 @@ class StagingInput(FeedfreeInput):
                    element should be sufficient.
            towers: deprecated
            device (str or None): if not None, place the StagingArea on a specific device. e.g., '/cpu:0'.
+                Otherwise, they are placed under where `get_inputs_tensors` gets called.
        """
        assert isinstance(input, FeedfreeInput), input
        self._input = input

--- a/tensorpack/tfutils/common.py
+++ b/tensorpack/tfutils/common.py
@@ -19,8 +19,8 @@ __all__ = ['get_default_sess_config',

 def get_default_sess_config(mem_fraction=0.99):
    """
-    Return a better session config to use as default.
-    Tensorflow default session config consume too much resources.
+    Return a tf.ConfigProto to use as default session config.
+    You can modify the returned config to fit your needs.

    Args:
        mem_fraction(float): fraction of memory to use.
@@ -37,12 +37,17 @@ def get_default_sess_config(mem_fraction=0.99):
    # TF benchmark use cpu_count() - gpu_thread_count(), e.g. 80 - 8 * 2
    # Didn't see much difference.

-    conf.gpu_options.per_process_gpu_memory_fraction = mem_fraction
+    conf.gpu_options.per_process_gpu_memory_fraction = 0.99
    if get_tf_version_number() >= 1.2:
        conf.gpu_options.force_gpu_compatible = True

    conf.gpu_options.allow_growth = True

+    # from tensorflow.core.protobuf import rewriter_config_pb2 as rwc
+    # conf.graph_options.rewrite_options.memory_optimization = \
+    #     rwc.RewriterConfig.HEURISTICS
+
+
    # May hurt performance
    # conf.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1
    # conf.graph_options.place_pruned_graph = True

--- a/tensorpack/tfutils/varmanip.py
+++ b/tensorpack/tfutils/varmanip.py
@@ -8,6 +8,7 @@ import os
 import pprint
 import tensorflow as tf
 import numpy as np
+from ..utils.develop import deprecated
 from ..utils import logger
 from .common import get_op_tensor_name

@@ -186,8 +187,8 @@ def load_chkpt_vars(model_path):
    return result


+@deprecated("Renamed to 'load_chkpt_vars!'", "2018-04-20")
 def dump_chkpt_vars(model_path):
-    logger.warn("dump_chkpt_vars was renamed to load_chkpt_vars!")
    return load_chkpt_vars(model_path)



--- a/tensorpack/train/interface.py
+++ b/tensorpack/train/interface.py
@@ -40,7 +40,6 @@ def apply_default_prefetch(input_source_or_dataflow, trainer):
        if len(towers) > 1:
            # seem to only improve on >1 GPUs
            assert not isinstance(trainer, SimpleTrainer)
-            assert tf.test.is_gpu_available()

            if not isinstance(input, (StagingInput, DummyConstantInput)):
                logger.info("Automatically applying StagingInput on the DataFlow.")