update sphinx==3.0

399a74df · Yuxin Wu · 031e698d · 399a74df · 399a74df · 399a74df
Commit 399a74df authored Apr 11, 2020 by Yuxin Wu
25 changed files
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -15,6 +15,33 @@
 import sys, os, re
 import mock
 import inspect
+from sphinx.domains import Domain
+
+class GithubURLDomain(Domain):
+    """
+    Resolve certain links in markdown files to github source.
+    """
+
+    name = "githuburl"
+    ROOT = "https://github.com/tensorpack/tensorpack/blob/master/"
+
+    def resolve_any_xref(self, env, fromdocname, builder, target, node, contnode):
+        github_url = None
+        if ".html" not in target:
+            if target.startswith("../../") and not target.startswith("../../modules"):
+                url = target.replace("../", "")
+                github_url = url
+
+        if github_url is not None:
+            if github_url.endswith("README"):
+                # bug of recommonmark.
+                # https://github.com/readthedocs/recommonmark/blob/ddd56e7717e9745f11300059e4268e204138a6b1/recommonmark/parser.py#L152-L155
+                github_url += ".md"
+            print(f"Ref {target} resolved to github:{github_url}")
+            contnode["refuri"] = self.ROOT + github_url
+            return [("githuburl:any", contnode)]
+        else:
+            return []

 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
@@ -43,6 +70,9 @@ except ImportError:
    MOCK_MODULES.extend(['tensorflow.python.training.monitored_session'])
    MOCK_MODULES.extend(['tensorflow.python.training'])
    MOCK_MODULES.extend(['tensorflow.python.client'])
+    MOCK_MODULES.extend(['tensorflow.python.framework'])
+    MOCK_MODULES.extend(['tensorflow.python.platform'])
+    MOCK_MODULES.extend(['tensorflow.python.tools'])
    MOCK_MODULES.extend(['tensorflow.contrib.graph_editor'])

 for mod_name in MOCK_MODULES:
@@ -55,12 +85,13 @@ import tensorpack
 # -- General configuration ------------------------------------------------

 # If your documentation needs a minimal Sphinx version, state it here.
-needs_sphinx = '1.4'
+needs_sphinx = '3.0'

 # Add any Sphinx extension module names here, as strings. They can be
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 # ones.
 extensions = [
+    'recommonmark',
    'sphinx.ext.autodoc',
    'sphinx.ext.todo',
    'sphinx.ext.napoleon',
@@ -92,11 +123,6 @@ intersphinx_mapping = {
 # Add any paths that contain templates here, relative to this directory.
 templates_path = ['_templates']

-# to support markdown
-from recommonmark.parser import CommonMarkParser
-source_parsers = {
-    '.md': CommonMarkParser,
-}
 # The suffix(es) of source filenames.
 # You can specify multiple suffix as a list of string:
 source_suffix = ['.rst', '.md']
@@ -109,7 +135,7 @@ master_doc = 'index'

 # General information about the project.
 project = u'tensorpack'
-copyright = u'2015 - 2019, Yuxin Wu, et al.'
+copyright = u'2015 - 2020, Yuxin Wu, et al.'
 author = u'Yuxin Wu, et al.'

 # The version info for the project you're documenting, acts as replacement for
@@ -430,23 +456,14 @@ def autodoc_skip_member(app, what, name, obj, skip, options):
                return True
    return None

-def url_resolver(url):
-    if '.html' not in url:
-        return "https://github.com/tensorpack/tensorpack/blob/master/" + url
-    else:
-        if ON_RTD:
-            return "http://tensorpack.readthedocs.io/" + url
-        else:
-            return '/' + url
-
 def setup(app):
    from recommonmark.transform import AutoStructify
+    app.add_domain(GithubURLDomain)
    app.connect('autodoc-process-signature', process_signature)
    app.connect('autodoc-skip-member', autodoc_skip_member)
    app.add_config_value(
        'recommonmark_config',
-        {'url_resolver': url_resolver,
-         'auto_toc_tree_section': 'Contents',
+        {'auto_toc_tree_section': 'Contents',
         'enable_math': True,
         'enable_inline_math': True,
         'enable_eval_rst': True

--- a/docs/requirements.txt
+++ b/docs/requirements.txt
 termcolor
 numpy
 tqdm
-docutils>=0.14
-Sphinx>=1.6
-recommonmark==0.4.0
+docutils==0.16
+Sphinx==3.0.0
+recommonmark==0.6.0
 sphinx_rtd_theme
 mock
 matplotlib

--- a/docs/tutorial/callback.md
+++ b/docs/tutorial/callback.md
@@ -80,7 +80,7 @@ These features are not always necessary, but think about how messy the main loop
 were to write these logic together with the loops, and how easy your life will be if you could enable
 these features with just one line when you need them.

-See [list of callbacks](../modules/callbacks.html)
+See [list of callbacks](../modules/callbacks)
 for a long list of tensorpack builtin callbacks.
-See [Write a callback](extend/callback.html)
+See [Write a callback](extend/callback.md)
 for details on how callbacks work, what they can do, and how to write them.
--- a/docs/tutorial/dataflow.md
+++ b/docs/tutorial/dataflow.md
@@ -21,7 +21,7 @@ You can simply use DataFlow as a data processing pipeline and plug it into your
 ### Load Raw Data
 We do not make any assumptions about your data format.
 You would usually want to write the source DataFlow (`MyDataFlow` in the example below) for your own data format.
-See [another tutorial](extend/dataflow.html) for simple instructions on writing a DataFlow.
+See [another tutorial](extend/dataflow.md) for simple instructions on writing a DataFlow.

 ### Assemble the Pipeline
 There are a lot of existing DataFlow utilities in tensorpack, which you can use to assemble
@@ -43,8 +43,8 @@ df = BatchData(df, 128)
 df = MultiProcessRunnerZMQ(df, 3)
 ````

-A list of built-in DataFlow to use can be found at [API docs](../modules/dataflow.html).
-You can also find complicated real-life DataFlow pipelines in the [ImageNet training script](../examples/ImageNetModels/imagenet_utils.py)
+A list of built-in DataFlow to use can be found at [API docs](../modules/dataflow).
+You can also find complicated real-life DataFlow pipelines in the [ImageNet training script](../../examples/ImageNetModels/imagenet_utils.py)
 or other tensorpack examples.

 ### Parallelize the Pipeline
@@ -52,10 +52,10 @@ or other tensorpack examples.
 DataFlow includes **carefully optimized** parallel runners and parallel mappers: `Multi{Thread,Process}{Runner,MapData}`.
 Runners execute multiple clones of a dataflow in parallel.
 Mappers execute a mapping function in parallel on top of an existing dataflow.
-You can find details in the [API docs](../modules/dataflow.html) under the
+You can find details in the [API docs](../modules/dataflow) under the
 "parallel" and "parallel_map" section.

-[Parallel DataFlow tutorial](parallel-dataflow.html) gives a deeper dive
+[Parallel DataFlow tutorial](parallel-dataflow.md) gives a deeper dive
 on how to use them to optimize your data pipeline.

 ### Run the DataFlow
@@ -77,6 +77,6 @@ for dp in df:
 ### Why DataFlow?

 It's **easy and fast**.
-For more discussions, see [Why DataFlow?](/tutorial/philosophy/dataflow.html)
+For more discussions, see [Why DataFlow?](./philosophy/dataflow.md)
 Nevertheless, using DataFlow is not required in tensorpack.
 Tensorpack supports data loading with native TF operators / TF datasets as well.
--- a/docs/tutorial/efficient-dataflow.md
+++ b/docs/tutorial/efficient-dataflow.md
@@ -10,21 +10,21 @@ or your own code as well.
 **What we are going to do**: We'll use ILSVRC12 dataset, which contains 1.28 million images.
 The original images (JPEG compressed) are 140G in total.
 The average resolution is about 400x350 <sup>[[1]]</sup>.
-Following the [ResNet example](../examples/ResNet), we need images in their original resolution,
+Following the [ResNet example](../../examples/ResNet), we need images in their original resolution,
 so we will read the original dataset (instead of a down-sampled version), and
 then apply complicated preprocessing to it.
 We hope to reach a speed of **1k~5k images per second**, to keep GPUs busy.

 Some things to know before reading:

-1. You are recommended to read the [Parallel DataFlow Tutorial](parallel-dataflow.html) first.
+1. You are recommended to read the [Parallel DataFlow Tutorial](./parallel-dataflow.md) first.
 1. You only need the data loader to be **fast enough, but not faster**.
   See [How Fast Do You Actually Need](philosophy/dataflow.html#how-fast-do-you-actually-need) for details.
   For smaller datasets (e.g. several GBs of images with lightweight preprocessing),
   a simple reader plus some multiprocess runner is usually fast enough.
 1. Having a fast Python generator **alone** may or may not improve your overall training speed.
   You need mechanisms to hide the latency of **all** preprocessing stages, as mentioned in the
-	 [InputSource tutorial](extend/input-source.html).
+   [InputSource tutorial](./extend/input-source.md).
 1. Reading training set and validation set are different.
   In training it's OK to reorder, regroup, or even duplicate some datapoints, as long as the
   data distribution stays the same.
@@ -75,15 +75,19 @@ Image decoding in `cv2.imread` may also be a bottleneck at this early stage, sin

 We will now add the cheapest pre-processing now to get an ndarray in the end instead of a list
 (because training will need ndarray eventually):
+
 ```eval_rst
+
 .. code-block:: python
    :emphasize-lines: 2,3

    ds = dataset.ILSVRC12('/path/to/ILSVRC12', 'train', shuffle=True)
    ds = AugmentImageComponent(ds, [imgaug.Resize(224)])
    ds = BatchData(ds, 256)
+
 ```
-You'll start to observe slow down after adding more pre-processing (such as those in the [ResNet example](../examples/ImageNetModels/imagenet_utils.py)).
+
+You'll start to observe slow down after adding more pre-processing (such as those in the [ResNet example](../../examples/ImageNetModels/imagenet_utils.py)).
 Now it's time to add threads or processes:
 ```eval_rst
 .. code-block:: python
@@ -94,13 +98,14 @@ Now it's time to add threads or processes:
    ds = MultiProcessRunnerZMQ(ds1, num_proc=25)
    ds = BatchData(ds, 256)
 ```
+
 Here we fork 25 processes to run `ds1`, and collect their output through ZMQ IPC protocol.
 You can also apply parallel runner after batching, of course.

 ### Parallel Map
 The above DataFlow might be fast, but since it forks the ImageNet reader (`ds0`),
 it's **not a good idea to use it for validation** (for reasons mentioned at top.
-More details at the [Parallel DataFlow Tutorial](parallel-dataflow) and the [documentation](../modules/dataflow.html#tensorpack.dataflow.MultiProcessRunnerZMQ)).
+More details at the [Parallel DataFlow Tutorial](./parallel-dataflow.md) and the [documentation](../modules/dataflow.html#tensorpack.dataflow.MultiProcessRunnerZMQ)).
 Alternatively, you can use parallel mapper like this:

 ```eval_rst

--- a/docs/tutorial/extend/augmentor.md
+++ b/docs/tutorial/extend/augmentor.md
@@ -4,7 +4,7 @@

 The first thing to note: __you never have to write an augmentor__.
 An augmentor is a part of the DataFlow, so you can always
-[write a DataFlow](dataflow.html)
+[write a DataFlow](./dataflow.md)
 to do whatever operations to your data, rather than writing an augmentor.

 Augmentor makes things easier when what you want fits its design.
@@ -28,7 +28,7 @@ To do custom augmentation, you can implement one yourself.

 #### The Design of imgaug Module

-The [imgaug module](../../modules/dataflow.imgaug.html) is designed to allow the following usage:
+The [imgaug module](../../modules/dataflow.imgaug) is designed to allow the following usage:

 * Factor out randomness and determinism.
  An augmentor often contains randomized policy, e.g., it randomly perturbs each image differently.

--- a/docs/tutorial/extend/callback.md
+++ b/docs/tutorial/extend/callback.md
@@ -141,5 +141,5 @@ You can overwrite any of the following methods in the new callback:

 ### Examples

-Check source code of the [existing tensorpack callbacks](../../modules/callbacks.html). 
+Check source code of the [existing tensorpack callbacks](../../modules/callbacks.md).
 Or grep 'Callback' in tensorpack examples for those implemented as extensions.
--- a/docs/tutorial/extend/dataflow.md
+++ b/docs/tutorial/extend/dataflow.md
@@ -46,7 +46,7 @@ If you're writing a complicated DataFlow, make sure to read the API documentatio
 for the semantics.

 DataFlow implementations for several well-known datasets are provided in the
-[dataflow.dataset](../../modules/dataflow.dataset.html)
+[dataflow.dataset](../../modules/dataflow.dataset)
 module. You can take them as examples.

 #### More Data Processing

--- a/docs/tutorial/extend/input-source.md
+++ b/docs/tutorial/extend/input-source.md
@@ -39,7 +39,7 @@ This is one of the reasons why tensorpack is [faster](https://github.com/tensorp
 The above discussion is valid regardless of what you use to load/preprocess data,
 either Python code or TensorFlow operators, or a mix of two.
 Both are supported in tensorpack, while we recommend using Python.
-See more discussions at [Why DataFlow?](/tutorial/philosophy/dataflow.html)
+See more discussions at [Why DataFlow?](../philosophy/dataflow.md)

 ## InputSource


--- a/docs/tutorial/extend/trainer.md
+++ b/docs/tutorial/extend/trainer.md
@@ -7,7 +7,7 @@ Tensorpack provides some trainer implementations for such tasks.
 These trainers will take care help you define the graph, with the following arguments:

 1. Some `tf.TensorSpec`, the signature of the input.
-2. An `InputSource`, where the input come from. See [Input Pipeline](input-source.html).
+2. An `InputSource`, where the input come from. See [Input Pipeline](./input-source.md).
 3. A function which takes input tensors and returns the cost.
 4. A function which returns an optimizer.


--- a/docs/tutorial/faq.md
+++ b/docs/tutorial/faq.md
@@ -34,7 +34,7 @@ Then it is a good time to open an issue.
 3. The [ProgressBar](../modules/callbacks.html#tensorpack.callbacks.ProgressBar)
 	 callback can print some scalar statistics, though not enabled by default.

-4. Read [Summary and Logging](summary.html) for more options on logging.
+4. Read [Summary and Logging](./summary.md) for more options on logging.

 ## How to freeze some variables in training

@@ -62,4 +62,4 @@ In general, you need to implement the model in a way your version of TensorFlow

 ## My training seems slow. Why?

-Checkout the [Performance Tuning tutorial](performance-tuning.html)
+Checkout the [Performance Tuning tutorial](./performance-tuning.md)
--- a/docs/tutorial/inference.md
+++ b/docs/tutorial/inference.md
@@ -8,7 +8,7 @@ There are two ways to do inference during training.
 1. The easiest way is to write a callback, and use
  [self.trainer.get_predictor()](../modules/train.html#tensorpack.train.TowerTrainer.get_predictor)
  to get a callable under inference mode.
-  See [Write a Callback](extend/callback.html).
+  See [Write a Callback](./extend/callback.md).

 2. If your inference follows the paradigm of:
  "evaluate some tensors for each input, and aggregate the results in the end".
@@ -58,7 +58,7 @@ output1_array, output2_array = predictor(input1_array, input2_array)
 It's __common to use a different graph for inference__,
 e.g., use NHWC format, support encoded image format, etc.
 You can make these changes inside the `model` or `tower_func` in your `PredictConfig`.
-The example in [examples/basics/export-model.py](../examples/basics/export-model.py) demonstrates such an altered inference graph.
+The example in [examples/basics/export-model.py](../../examples/basics/export-model.py) demonstrates such an altered inference graph.

 OfflinePredictor is only for quick demo purposes.
 It runs inference on numpy arrays, therefore may not be the most efficient way.
@@ -98,7 +98,7 @@ you can also save your models into other formats after training, so it may be mo
   tf.import_graph_def(graph_def)
   ```

-[examples/basics/export-model.py](../examples/basics/export-model.py)
+[examples/basics/export-model.py](../../examples/basics/export-model.py)
 demonstrates the usage of such a frozen/pruned graph.
 Again, you may often want to use a different graph for inference and you can
 do so by the arguments of `PredictConfig`.

--- a/docs/tutorial/parallel-dataflow.md
+++ b/docs/tutorial/parallel-dataflow.md
@@ -95,9 +95,9 @@ __zero Python threads__: this is a key implementation detail that makes tensorpa
 faster than the alternatives in Keras or PyTorch.

 For a new task, you often need to do a quick benchmark to choose the best pattern.
-See [Performance Tuning Tutorial](performance-tuning.html)
+See [Performance Tuning Tutorial](./performance-tuning.md)
 on how to effectively understand the performance of a DataFlow.

-See also [Efficient DataFlow](efficient-dataflow.html)
+See also [Efficient DataFlow](./efficient-dataflow.md)
 for real examples using the above DataFlows.

--- a/docs/tutorial/performance-tuning.md
+++ b/docs/tutorial/performance-tuning.md
@@ -50,7 +50,7 @@ A benchmark will give you more precise information about which part you should i

 ## Investigate DataFlow

-Understand the [Efficient DataFlow](efficient-dataflow.html) tutorial, so you know what your DataFlow is doing.
+Understand the [Efficient DataFlow](./efficient-dataflow.md) tutorial, so you know what your DataFlow is doing.
 Then, make modifications and benchmark your modifications to understand which
 part in the data pipeline is your bottleneck.
 Do __NOT__ look at training speed when you benchmark a DataFlow. Only look at the output of `TestDataSpeed`.

--- a/docs/tutorial/philosophy/dataflow.md
+++ b/docs/tutorial/philosophy/dataflow.md
@@ -12,7 +12,7 @@ Your data pipeline **only needs to be fast enough**.
 In practice, you should always first make sure your data pipeline runs
 asynchronously with your training.
 The method to do so is different in each training framework,
-and in tensorpack this is automatically done by the [InputSource](/tutorial/extend/input-source.html)
+and in tensorpack this is automatically done by the [InputSource](../extend/input-source.md)
 interface.

 Once you make sure the data pipeline runs async with your training,
@@ -29,7 +29,7 @@ DataFlow is fast enough for you unless you use
 8 V100s with both FP16 and XLA enabled, which most people don't.
 For tasks that are less data-hungry (e.g., object detection, or most NLP tasks),
 DataFlow is already an overkill.
-See the [Efficient DataFlow](/tutorial/efficient-dataflow.html) tutorial on how
+See the [Efficient DataFlow](../efficient-dataflow.md) tutorial on how
 to build a fast Python loader with DataFlow.

 There is no reason to try a more complicated solution,

--- a/docs/tutorial/save-load.md
+++ b/docs/tutorial/save-load.md
@@ -14,7 +14,7 @@ Tensorpack also provides a small tool to load checkpoints, see
 [load_chkpt_vars](../modules/tfutils.html#tensorpack.tfutils.varmanip.load_chkpt_vars)
 for details.

-[scripts/ls-checkpoint.py](../scripts/ls-checkpoint.py)
+[scripts/ls-checkpoint.py](../../scripts/ls-checkpoint.py)
 demos how to print all variables and their shapes in a checkpoint.

 Tensorpack includes another tool to save variables to TF checkpoint, see
@@ -26,7 +26,7 @@ Most models provided by tensorpack are in npz (dictionary) format,
 because it's easy to use without TF dependency.
 You can read/write them with `np.load` and `np.savez`.

-[scripts/dump-model-params.py](../scripts/dump-model-params.py) can be used to remove unnecessary variables in a checkpoint
+[scripts/dump-model-params.py](../../scripts/dump-model-params.py) can be used to remove unnecessary variables in a checkpoint
 and save results to a npz.
 It takes a metagraph file (which is also saved by `ModelSaver`) and only saves variables that the model needs at inference time.
 It dumps the model to a `var-name: value` dict saved in npz format.

--- a/docs/tutorial/symbolic.md
+++ b/docs/tutorial/symbolic.md
@@ -113,7 +113,7 @@ from calling `tf.add`. You may need to be careful on some issues:

 It is a bit different to use sonnet/Keras.
 sonnet/Keras manages the variable scope by their own model classes, and calling their symbolic functions
-always creates new variable scope. See the [Keras example](../examples/keras) for how to use it within tensorpack.
+always creates new variable scope. See the [Keras example](../../examples/keras) for how to use it within tensorpack.

 ```eval_rst
 .. note:: **It's best to not trust others' layers!**

--- a/docs/tutorial/trainer.md
+++ b/docs/tutorial/trainer.md
@@ -45,7 +45,7 @@ Users or derived trainers should implement __what the iterations are__.

 2. The concept of __"epoch"__, i.e. we assume that the iterations run in nested for-loops.
 In fact, the steps per epoch can be any number
-and it only affects the [schedule of callbacks](callback.html).
+and it only affects the [schedule of callbacks](./callback.md).
 In other words, an "epoch" in tensorpack is the __default period to run
 callbacks__ (validation, summary, checkpoint, etc.).
 So this assumption effectively puts no extra constraints.
@@ -56,7 +56,7 @@ So this assumption effectively puts no extra constraints.
 Tensorpack implements a few builtin trainers for __single-cost gradient-based optimization__,
 as this is the most common type of task.
 If your training follows this pattern, you only need to __select a trainer__,
-and use it with its [training interface](training-interface.html).
+and use it with its [training interface](./training-interface.md).

 The simplest example of such a trainer is
 [SimpleTrainer](../modules/train.html#tensorpack.train.SimpleTrainer).
@@ -65,7 +65,7 @@ All it does is building your model (which you have to provide) once

 ### Multi-GPU Trainers

-For data-parallel multi-GPU training, different [multi-GPU trainers](../modules/train.html)
+For data-parallel multi-GPU training, different [multi-GPU trainers](../modules/train)
 implement different distribution strategies.
 They take care of device placement, gradient averaging and synchronization
 in the efficient way, which is why multi-GPU training in tensorpack

--- a/docs/tutorial/training-interface.md
+++ b/docs/tutorial/training-interface.md
@@ -33,7 +33,7 @@ class MyModel(ModelDesc):

 You can use any symbolic functions in `build_graph`, including TensorFlow core library
 functions and other symbolic libraries.
-`build_graph` will be the tower function, so you need to follow [some rules](trainer.md#tower-trainer).
+`build_graph` will be the tower function, so you need to follow [some rules](trainer.html#tower-trainer).
 Because this interface is specialized for single-cost training, you need to return the cost tensor.

 After defining such a model, use it with `TrainConfig` and `launch_train_with_config`:
@@ -84,7 +84,7 @@ The function `launch_train_with_config` exists mainly for historical reasons.
 ### Keras Interface

 Some wrappers were made on top of tensorpack trainers, to create a Keras-like interface.
-See [Tensorpack+Keras examples](../examples/keras) for details.
+See the experimental [Tensorpack+Keras examples](../../examples/keras) for details.

 ### Raw Trainer Interface

@@ -102,5 +102,5 @@ training, or call
 which applies some defaults options for common use cases.

 Read their API documentation and the
-[advanced trainer tutorial](extend/trainer.html)
+[advanced trainer tutorial](./extend/trainer.md)
 for more details.
--- a/examples/FasterRCNN/modeling/model_cascade.py
+++ b/examples/FasterRCNN/modeling/model_cascade.py
@@ -87,7 +87,9 @@ class CascadeRCNNHead(object):
            with tf.name_scope('match_box_with_gt_{}'.format(iou_threshold)):
                iou = pairwise_iou(boxes, self.gt_boxes)  # NxM
                max_iou_per_box = tf.reduce_max(iou, axis=1)  # N
-                best_iou_ind = tf.argmax(iou, axis=1)  # N
+                best_iou_ind = tf.cond(tf.shape(iou)[1] > 0,
+                                       lambda: tf.argmax(iou, axis=1),   # #proposal, each in 0~m-1
+                                       lambda: tf.zeros([tf.shape(iou)[0]], dtype=tf.int64))
                labels_per_box = tf.gather(self.gt_labels, best_iou_ind)
                fg_mask = max_iou_per_box >= iou_threshold
                fg_inds_wrt_gt = tf.boolean_mask(best_iou_ind, fg_mask)

--- a/tensorpack/dataflow/common.py
+++ b/tensorpack/dataflow/common.py
@@ -164,7 +164,8 @@ class BatchData(ProxyDataFlow):
            use_list (bool): whether to batch data into a list or a numpy array.

        Returns:
-            dp: either a list or a dict, depend on the inputs.
+            dp:
+                either a list or a dict, depend on the inputs.
                Each item is a batched version of the corresponding inputs.
        """
        first_dp = data_holder[0]

--- a/tensorpack/tfutils/export.py
+++ b/tensorpack/tfutils/export.py
@@ -12,7 +12,7 @@ from tensorflow.python.framework import graph_util
 from tensorflow.python.platform import gfile
 from tensorflow.python.tools import optimize_for_inference_lib

-from ..compat import is_tfv2, tfv1
+from ..compat import tfv1
 from ..input_source import PlaceholderInput
 from ..tfutils.common import get_tensors_by_names, get_tf_version_tuple
 from ..tfutils.tower import PredictTowerContext
@@ -89,7 +89,7 @@ class ModelExporter(object):
                logger.info("Output graph written to {}.".format(filename))

    def export_serving(self, filename,
-                       tags=(tf.saved_model.SERVING if is_tfv2() else tf.saved_model.tag_constants.SERVING,),
+                       tags=None,
                       signature_name='prediction_pipeline'):
        """
        Converts a checkpoint and graph to a servable for TensorFlow Serving.
@@ -97,7 +97,7 @@ class ModelExporter(object):

        Args:
            filename (str): path for export directory
-            tags (tuple): tuple of user specified tags
+            tags (tuple): tuple of user specified tags. Defaults to "SERVING".
            signature_name (str): name of signature for prediction

        Note:
@@ -113,6 +113,9 @@ class ModelExporter(object):
            Currently, we only support a single signature, which is the general PredictSignatureDef:
            https://github.com/tensorflow/serving/blob/master/tensorflow_serving/g3doc/signature_defs.md
        """
+        if tags is None:
+            tags = (tf.saved_model.SERVING if get_tf_version_tuple() >= (1, 12)
+                    else tf.saved_model.tag_constants.SERVING)

        self.graph = self.config._maybe_create_graph()
        with self.graph.as_default():

--- a/tensorpack/tfutils/summary.py
+++ b/tensorpack/tfutils/summary.py
@@ -211,7 +211,8 @@ def add_moving_summary(*args, **kwargs):
            summary op. Default is TF's default (`tf.GraphKeys.SUMMARIES`).

    Returns:
-        [tf.Tensor]: list of tensors returned by assign_moving_average,
+        [tf.Tensor]:
+            list of tensors returned by assign_moving_average,
            which can be used to maintain the EMA.
    """
    decay = kwargs.pop('decay', 0.95)

--- a/tensorpack/tfutils/varreplace.py
+++ b/tensorpack/tfutils/varreplace.py
--- a/tensorpack/train/base.py
+++ b/tensorpack/train/base.py
@@ -126,7 +126,7 @@ class Trainer(object):
    2. Increase the global_step
    3. Evaluate some summaries

-    Typically you __should not__ use ``hooked_sess.run`` in callbacks,
+    Typically you **should not** use ``hooked_sess.run`` in callbacks,
    because it is for the "training iteration". If you just want to evaluate
    some tensors, use ``sess.run`` if the tensors does not depend on the inputs,
    or more generally, use `before_run/after_run` to evaluate the tensors **along with**