remove some use of contrib for tf1.13

dda6fd53 · Yuxin Wu · ab81a75d · dda6fd53 · dda6fd53 · dda6fd53
Commit dda6fd53 authored Dec 19, 2018 by Yuxin Wu
14 changed files
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
-
-
 Thanks for your contribution!

 Unless you want to send a simple several lines of PR that can be easily merged, please note the following:

-* If you want to add a new feature, please open an issue first and indicate that you want to
-	contribute.
+* If you want to add a new feature, 
+  please open an issue first and indicate that you want to contribute.

-	There are features that we prefer to not add to tensorpack, e.g. symbolic models
+  There are features that we prefer to not add to tensorpack, e.g. symbolic models
  (see details at https://tensorpack.readthedocs.io/tutorial/symbolic.html).
-	Therefore it's good to have a discussion first.
+  Therefore it's good to have a discussion first.

 * If you want to add a new example, please note that:

-	1. We prefer to not have an example that is too similar to existing ones in terms of the tasks.
+  1. We prefer to not have an example that is too similar to existing ones in terms of the tasks.

-	2. Examples have to be able to reproduce (preferrably in some mesurable metrics) published or well-known experiments and results.
+  2. Examples have to be able to reproduce (preferrably in some mesurable metrics) published or well-known experiments and results.

 * Please run `flake8 .` under the root of this repo to lint your code, and make sure the command produces no output.
--- a/README.md
+++ b/README.md
@@ -41,7 +41,7 @@ demonstrating its __flexibility__ for actual research.

 ### Vision:
 + [Train ResNet](examples/ResNet) and [other models](examples/ImageNetModels) on ImageNet.
-+ [Train Faster-RCNN / Mask-RCNN on COCO object detection](examples/FasterRCNN)
+ [Train Mask/Faster R-CNN on COCO object detection](examples/FasterRCNN)
 + [Generative Adversarial Network(GAN) variants](examples/GAN), including DCGAN, InfoGAN, Conditional GAN, WGAN, BEGAN, DiscoGAN, Image to Image, CycleGAN.
 + [DoReFa-Net: train binary / low-bitwidth CNN on ImageNet](examples/DoReFa-Net)
 + [Fully-convolutional Network for Holistically-Nested Edge Detection(HED)](examples/HED)

--- a/docs/tutorial/intro.rst
+++ b/docs/tutorial/intro.rst
@@ -22,7 +22,7 @@ No it's not, but it's not easy to write it in an efficient way.
 When **speed** is a concern, users will have to worry a lot about things unrelated to the model.
 Code written with low-level APIs or other existing high-level wrappers is often suboptimal in speed.
 Even most of the official TensorFlow examples are written for simplicity rather than efficiency,
-which as a result makes people think TensorFlow is __slow__.
+which as a result makes people think TensorFlow is *slow*.

 The `official TensorFlow benchmark <https://github.com/tensorflow/benchmarks/tree/master/scripts/tf_cnn_benchmarks>`_ said this in their README:


--- a/examples/FasterRCNN/README.md
+++ b/examples/FasterRCNN/README.md
@@ -87,11 +87,11 @@ FPN models are sometimes slightly worse, which is mainly due to batch size.

 | Backbone                    | mAP<br/>(box;mask)                                                                                                            | Detectron mAP <sup>[1](#ft1)</sup><br/> (box;mask) | Time (on 8 V100s) | Configurations <br/> (click to expand)                                                                                                                                                                                                                                                                                                                     |
 | -                           | -                                                                                                                             | -                                                  | -                 | -                                                                                                                                                                                                                                                                                                                                                          |
- | R50-C4                      | 33.1                                                                                                                          |                                                    | 18h               | <details><summary>super quick</summary>`MODE_MASK=False FRCNN.BATCH_PER_IM=64`<br/>`PREPROC.SHORT_EDGE_SIZE=600 PREPROC.MAX_SIZE=1024`<br/>`TRAIN.LR_SCHEDULE=[150000,230000,280000]` </details>                                                                                                                                                           |
+ | R50-C4                      | 33.1                                                                                                                          |                                                    | 18h               | <details><summary>super quick</summary>`MODE_MASK=False FRCNN.BATCH_PER_IM=64`<br/>`PREPROC.TRAIN_SHORT_EDGE_SIZE=600 PREPROC.MAX_SIZE=1024`<br/>`TRAIN.LR_SCHEDULE=[150000,230000,280000]` </details>                                                                                                                                                           |
 | R50-C4                      | 36.6                                                                                                                          | 36.5                                               | 44h               | <details><summary>standard</summary>`MODE_MASK=False` </details>                                                                                                                                                                                                                                                                                           |
- | R50-FPN                     | 37.4                                                                                                                          | 37.9                                               | 27h               | <details><summary>standard</summary>`MODE_MASK=False MODE_FPN=True` </details>                                                                                                                                                                                                                                                                             |
- | R50-C4                      | 38.2;33.3 [:arrow_down:](http://models.tensorpack.com/FasterRCNN/COCO-R50C4-MaskRCNN-Standard.npz)                            | 37.8;32.8                                          | 48h               | <details><summary>standard</summary>this is the default </details>                                                                                                                                                                                                                                                                                         |
- | R50-FPN                     | 38.4;35.1 [:arrow_down:](http://models.tensorpack.com/FasterRCNN/COCO-R50FPN-MaskRCNN-Standard.npz)                           | 38.6;34.5                                          | 28h               | <details><summary>standard</summary>`MODE_FPN=True` </details>                                                                                                                                                                                                                                                                                             |
+ | R50-FPN                     | 37.4                                                                                                                          | 37.9                                               | 23h               | <details><summary>standard</summary>`MODE_MASK=False MODE_FPN=True` </details>                                                                                                                                                                                                                                                                             |
+ | R50-C4                      | 38.2;33.3 [:arrow_down:](http://models.tensorpack.com/FasterRCNN/COCO-R50C4-MaskRCNN-Standard.npz)                            | 37.8;32.8                                          | 49h               | <details><summary>standard</summary>this is the default </details>                                                                                                                                                                                                                                                                                         |
+ | R50-FPN                     | 38.4;35.1 [:arrow_down:](http://models.tensorpack.com/FasterRCNN/COCO-R50FPN-MaskRCNN-Standard.npz)                           | 38.6;34.5                                          | 27h               | <details><summary>standard</summary>`MODE_FPN=True` </details>                                                                                                                                                                                                                                                                                             |
 | R50-FPN                     | 42.0;36.3                                                                                                                     |                                                    | 41h               | <details><summary>+Cascade</summary>`MODE_FPN=True FPN.CASCADE=True` </details>                                                                                                                                                                                                                                                                            |
 | R50-FPN                     | 39.5;35.2                                                                                                                     | 39.5;34.4<sup>[2](#ft2)</sup>                      | 33h               | <details><summary>+ConvGNHead</summary>`MODE_FPN=True`<br/>`FPN.FRCNN_HEAD_FUNC=fastrcnn_4conv1fc_gn_head` </details>                                                                                                                                                                                                                                      |
 | R50-FPN                     | 40.0;36.2 [:arrow_down:](http://models.tensorpack.com/FasterRCNN/COCO-R50FPN-MaskRCNN-StandardGN.npz)                         | 40.3;35.7                                          | 40h               | <details><summary>+GN</summary>`MODE_FPN=True`<br/>`FPN.NORM=GN BACKBONE.NORM=GN`<br/>`FPN.FRCNN_HEAD_FUNC=fastrcnn_4conv1fc_gn_head`<br/>`FPN.MRCNN_HEAD_FUNC=maskrcnn_up4conv_gn_head`                                                                                                                                                                   |

--- a/examples/FasterRCNN/config.py
+++ b/examples/FasterRCNN/config.py
@@ -234,6 +234,8 @@ def finalize_configs(is_training):

    if is_training:
        train_scales = _C.PREPROC.TRAIN_SHORT_EDGE_SIZE
+        if not isinstance(train_scales, (list, tuple)):
+            train_scales = [train_scales, train_scales]
        if train_scales[1] - train_scales[0] > 100:
            # don't warmup if augmentation is on
            os.environ['TF_CUDNN_USE_AUTOTUNE'] = '0'

--- a/examples/FasterRCNN/model_frcnn.py
+++ b/examples/FasterRCNN/model_frcnn.py
@@ -209,17 +209,16 @@ def fastrcnn_predictions(boxes, scores):
        selection = tf.image.non_max_suppression(
            box, prob, cfg.TEST.RESULTS_PER_IM, cfg.TEST.FRCNN_NMS_THRESH)
        selection = tf.gather(ids, selection)
-        # sort available in TF>1.4.0
-        # sorted_selection = tf.contrib.framework.sort(selection, direction='ASCENDING')
-        sorted_selection = -tf.nn.top_k(-selection, k=tf.size(selection))[0]

        if get_tf_version_tuple() >= (1, 13):
+            sorted_selection = tf.sort(selection, direction='ASCENDING')
            mask = tf.sparse.SparseTensor(indices=tf.expand_dims(sorted_selection, 1),
                                          values=tf.ones_like(sorted_selection, dtype=tf.bool),
                                          dense_shape=output_shape)
            mask = tf.sparse.to_dense(mask, default_value=False)
        else:
            # this function is deprecated by TF
+            sorted_selection = -tf.nn.top_k(-selection, k=tf.size(selection))[0]
            mask = tf.sparse_to_dense(
                sparse_indices=sorted_selection,
                output_shape=output_shape,

--- a/examples/README.md
+++ b/examples/README.md
@@ -27,7 +27,7 @@ These are all the toy examples in tensorpack. They are supposed to be just demos
 | Name                                                                                                                                                  | Performance        |
 | ---                                                                                                                                                   | ---                |
 | Train [ResNet](ResNet), [ShuffleNet and other models](ImageNetModels) on ImageNet                                                                     | reproduce paper    |
-| [Train Faster-RCNN / Mask-RCNN on COCO](FasterRCNN)                                                                                                   | reproduce paper    |
+| [Train Mask/Faster R-CNN on COCO](FasterRCNN)                                                                                                   | reproduce paper    |
 | [Generative Adversarial Network(GAN) variants](GAN), including DCGAN, InfoGAN, <br/> Conditional GAN, WGAN, BEGAN, DiscoGAN, Image to Image, CycleGAN | visually reproduce |
 | [DoReFa-Net: training binary / low-bitwidth CNN on ImageNet](DoReFa-Net)                                                                              | reproduce paper    |
 | [Fully-convolutional Network for Holistically-Nested Edge Detection(HED)](HED)                                                                        | visually reproduce |

--- a/tensorpack/graph_builder/training.py
+++ b/tensorpack/graph_builder/training.py
@@ -314,12 +314,13 @@ class SyncMultiGPUReplicatedBuilder(DataParallelBuilder):
        post_init_ops = []

        def log_failure(name, reason):
-            if name in trainable_names:
-                msg = "This variable is trainable, so this is probably a fatal error."
-            else:
-                msg = "This variable is non-trainable. Ignore this warning if you know it's OK to leave it out-of-sync."
            logger.warn("[ReplicatedTrainer] Do not know how to sync variable '{}' across GPUs. "
-                        "Reason: {} ".format(name, reason) + msg)
+                        "Reason: {} ".format(name, reason))
+            assert name not in trainable_names, \
+                "The aforementioned variable is trainable, so this is probably a fatal error."
+            logger.warn(
+                "[ReplicatedTrainer] This variable is non-trainable. "
+                "Ignore this warning if you know it's OK to leave it out-of-sync.")

        for v in all_vars:
            if not v.name.startswith('tower'):

--- a/tensorpack/graph_builder/utils.py
+++ b/tensorpack/graph_builder/utils.py
@@ -148,7 +148,11 @@ def allreduce_grads(all_grads, average):
    Returns:
        K x N: same as input, but each grad is replaced by the average over K devices.
    """
-    from tensorflow.contrib import nccl
+
+    if get_tf_version_tuple() <= (1, 12):
+        from tensorflow.contrib import nccl
+    else:
+        from tensorflow.python.ops import nccl_ops as nccl
    nr_tower = len(all_grads)
    if nr_tower == 1:
        return all_grads

--- a/tensorpack/libinfo.py
+++ b/tensorpack/libinfo.py
@@ -54,6 +54,7 @@ try:
    assert int(_version[0]) >= 1 and int(_version[1]) >= 3, "TF>=1.3 is required!"
    _HAS_TF = True
 except ImportError:
+    print("Failed to import tensorflow.")
    _HAS_TF = False



--- a/tensorpack/models/batch_norm.py
+++ b/tensorpack/models/batch_norm.py
@@ -230,13 +230,16 @@ def BatchNorm(inputs, axis=None, training=None, momentum=0.9, epsilon=1e-5,
                    "Cross-GPU BatchNorm is only supported in TF>=1.10 ." \
                    "Upgrade TF or apply this patch manually: https://github.com/tensorflow/tensorflow/pull/20360"

-                try:
-                    from tensorflow.contrib.nccl.python.ops.nccl_ops import _validate_and_load_nccl_so
-                except Exception:
-                    pass
+                if TF_version <= (1, 12):
+                    try:
+                        from tensorflow.contrib.nccl.python.ops.nccl_ops import _validate_and_load_nccl_so
+                    except Exception:
+                        pass
+                    else:
+                        _validate_and_load_nccl_so()
+                    from tensorflow.contrib.nccl.ops import gen_nccl_ops
                else:
-                    _validate_and_load_nccl_so()
-                from tensorflow.contrib.nccl.ops import gen_nccl_ops
+                    from tensorflow.python.ops import gen_nccl_ops
                shared_name = re.sub('tower[0-9]+/', '', tf.get_variable_scope().name)
                batch_mean = gen_nccl_ops.nccl_all_reduce(
                    input=batch_mean,

--- a/tensorpack/models/conv2d.py
+++ b/tensorpack/models/conv2d.py
@@ -29,7 +29,7 @@ def Conv2D(
        dilation_rate=(1, 1),
        activation=None,
        use_bias=True,
-        kernel_initializer=tf.contrib.layers.variance_scaling_initializer(2.0),
+        kernel_initializer=None,
        bias_initializer=tf.zeros_initializer(),
        kernel_regularizer=None,
        bias_regularizer=None,
@@ -48,6 +48,11 @@ def Conv2D(
    * ``W``: weights
    * ``b``: bias
    """
+    if kernel_initializer is None:
+        if get_tf_version_tuple() <= (1, 12):
+            kernel_initializer = tf.contrib.layers.variance_scaling_initializer(2.0),
+        else:
+            kernel_initializer = tf.keras.initializers.VarianceScaling(2.0)
    if split == 1:
        with rename_get_variable({'kernel': 'W', 'bias': 'b'}):
            layer = tf.layers.Conv2D(
@@ -134,7 +139,7 @@ def Conv2DTranspose(
        data_format='channels_last',
        activation=None,
        use_bias=True,
-        kernel_initializer=tf.contrib.layers.variance_scaling_initializer(2.0),
+        kernel_initializer=None,
        bias_initializer=tf.zeros_initializer(),
        kernel_regularizer=None,
        bias_regularizer=None,
@@ -151,6 +156,11 @@ def Conv2DTranspose(
    * ``W``: weights
    * ``b``: bias
    """
+    if kernel_initializer is None:
+        if get_tf_version_tuple() <= (1, 12):
+            kernel_initializer = tf.contrib.layers.variance_scaling_initializer(2.0),
+        else:
+            kernel_initializer = tf.keras.initializers.VarianceScaling(2.0)

    with rename_get_variable({'kernel': 'W', 'bias': 'b'}):
        layer = tf.layers.Conv2DTranspose(

--- a/tensorpack/models/fc.py
+++ b/tensorpack/models/fc.py
@@ -5,6 +5,7 @@
 import tensorflow as tf
 import numpy as np

+from ..tfutils.common import get_tf_version_tuple
 from .common import layer_register, VariableHolder
 from .tflayer import convert_to_tflayer_args, rename_get_variable

@@ -30,7 +31,7 @@ def FullyConnected(
        units,
        activation=None,
        use_bias=True,
-        kernel_initializer=tf.contrib.layers.variance_scaling_initializer(2.0),
+        kernel_initializer=None,
        bias_initializer=tf.zeros_initializer(),
        kernel_regularizer=None,
        bias_regularizer=None,
@@ -45,6 +46,11 @@ def FullyConnected(
    * ``W``: weights of shape [in_dim, out_dim]
    * ``b``: bias
    """
+    if kernel_initializer is None:
+        if get_tf_version_tuple() <= (1, 12):
+            kernel_initializer = tf.contrib.layers.variance_scaling_initializer(2.0),
+        else:
+            kernel_initializer = tf.keras.initializers.VarianceScaling(2.0)

    inputs = batch_flatten(inputs)
    with rename_get_variable({'kernel': 'W', 'bias': 'b'}):

--- a/tensorpack/models/regularize.py
+++ b/tensorpack/models/regularize.py
@@ -7,6 +7,7 @@ import re

 from ..utils import logger
 from ..utils.argtools import graph_memoized
+from ..tfutils.common import get_tf_version_tuple
 from ..tfutils.tower import get_current_tower_context
 from .common import layer_register

@@ -19,8 +20,12 @@ def _log_once(msg):
    logger.info(msg)


-l2_regularizer = tf.contrib.layers.l2_regularizer
-l1_regularizer = tf.contrib.layers.l1_regularizer
+if get_tf_version_tuple() <= (1, 12):
+    l2_regularizer = tf.contrib.layers.l2_regularizer
+    l1_regularizer = tf.contrib.layers.l1_regularizer
+else:
+    l2_regularizer = tf.keras.regularizers.l2
+    l1_regularizer = tf.keras.regularizers.l1


 def regularize_cost(regex, func, name='regularize_cost'):