clean-up deprecation

847fae12 · Yuxin Wu · 7b33a43c · 847fae12 · 847fae12 · 847fae12
Commit 847fae12 authored Nov 26, 2018 by Yuxin Wu
12 changed files
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -369,37 +369,32 @@ def process_signature(app, what, name, obj, options, signature,

 _DEPRECATED_NAMES = set([
    # deprecated stuff:
-    'TryResumeTraining',
    'QueueInputTrainer',
    'SimplePredictBuilder',
    'LMDBDataPoint',
    'TFRecordData',
    'dump_dataflow_to_lmdb',
    'dump_dataflow_to_tfrecord',
-    'pyplot2img',
    'IntBox', 'FloatBox',
+    'PrefetchOnGPUs',

    # renamed stuff:
    'DumpTensor',
    'DumpParamAsImage',
-    'StagingInputWrapper',
    'PeriodicRunHooks',
    'get_nr_gpu',
+    'start_test',  # TestDataSpeed

    # deprecated or renamed symbolic code
    'ImageSample',
    'BilinearUpSample'
-    'Deconv2D',
-    'get_scalar_var', 'psnr',
-    'prediction_incorrect', 'huber_loss',
+    'Deconv2D', 'psnr',

    # internal only
    'SessionUpdate',
-    'apply_default_prefetch',
    'average_grads',
    'aggregate_grads',
    'allreduce_grads',
-    'PrefetchOnGPUs',
 ])

 def autodoc_skip_member(app, what, name, obj, skip, options):
@@ -414,7 +409,7 @@ def autodoc_skip_member(app, what, name, obj, skip, options):
    # Hide some names that are deprecated or not intended to be used
    if name in _DEPRECATED_NAMES:
        return True
-    if name in ['get_data', 'size', 'reset_state']:
+    if name in ['__iter__', '__len__', 'reset_state']:
        # skip these methods with empty docstring
        if not obj.__doc__ and inspect.isfunction(obj):
            # https://stackoverflow.com/questions/3589311/get-defining-class-of-unbound-method-object-in-python-3

--- a/docs/tutorial/efficient-dataflow.md
+++ b/docs/tutorial/efficient-dataflow.md
@@ -6,6 +6,8 @@ a __Python generator__ which yields preprocessed ImageNet images and labels as f
 Since it is simply a generator interface, you can use the DataFlow in any Python-based frameworks (e.g. PyTorch, Keras)
 or your own code as well.

+
+
 **What we are going to do**: We'll use ILSVRC12 dataset, which contains 1.28 million images.
 The original images (JPEG compressed) are 140G in total.
 The average resolution is about 400x350 <sup>[[1]]</sup>.
@@ -29,6 +31,10 @@ Some things to know before reading:
 4. The actual performance would depend on not only the disk, but also memory (for caching) and CPU (for data processing).
 	You may need to tune the parameters (#processes, #threads, size of buffer, etc.)
 	or change the pipeline for new tasks and new machines to achieve the best performance.
+    The solutions in this tutorial may not help you.
+    To improve your own DataFlow, read the 
+    [performance tuning tutorial](performance-tuning.html#investigate-dataflow)
+    before doing any optimizations.

 The benchmark code for this tutorial can be found in [tensorpack/benchmarks](https://github.com/tensorpack/benchmarks/tree/master/ImageNet),
 including comparison with a similar (but simpler) pipeline built with `tf.data`.

--- a/docs/tutorial/extend/dataflow.md
+++ b/docs/tutorial/extend/dataflow.md
@@ -12,11 +12,11 @@ and then compose it with existing modules (e.g. mapping, batching, prefetching,
 The easiest way to create a DataFlow to load custom data, is to wrap a custom generator, e.g.:
 ```python
 def my_data_loader():
-  while True:
-    # load data from somewhere with Python
+  # load data from somewhere with Python, and yield them
+  for k in range(100):
    yield [my_array, my_label]

-dataflow = DataFromGenerator(my_data_loader)
+df = DataFromGenerator(my_data_loader)
 ```

 To write more complicated DataFlow, you need to inherit the base `DataFlow` class.
@@ -24,6 +24,7 @@ Usually, you just need to implement the `__iter__()` method which yields a datap
 ```python
 class MyDataFlow(DataFlow):
  def __iter__(self):
+    # load data from somewhere with Python, and yield them
    for k in range(100):
      digit = np.random.rand(28, 28)
      label = np.random.randint(10)
@@ -38,6 +39,8 @@ for datapoint in df:
 Optionally, you can implement the `__len__` and `reset_state` method. 
 The detailed semantics of these three methods are explained 
 in the [API documentation](../../modules/dataflow.html#tensorpack.dataflow.DataFlow).
+If you're writing a complicated DataFlow, make sure to read the API documentation
+for the semantics.

 DataFlow implementations for several well-known datasets are provided in the
 [dataflow.dataset](../../modules/dataflow.dataset.html)
@@ -53,8 +56,11 @@ class ProcessingDataFlow(DataFlow):
  def __init__(self, ds):
    self.ds = ds
    
+  def reset_state(self):
+    self.ds.reset_state()
+
  def __iter__(self):
-    for datapoint in self.ds.get_data():
+    for datapoint in self.ds:
      # do something
      yield new_datapoint
 ```

--- a/tensorpack/callbacks/trigger.py
+++ b/tensorpack/callbacks/trigger.py
@@ -3,6 +3,7 @@


 from .base import ProxyCallback, Callback
+from ..utils.develop import log_deprecated

 __all__ = ['PeriodicTrigger', 'PeriodicCallback', 'EnableCallbackIf']

@@ -77,6 +78,7 @@ class PeriodicRunHooks(ProxyCallback):
        """
        self._every_k_steps = int(every_k_steps)
        super(PeriodicRunHooks, self).__init__(callback)
+        log_deprecated("PeriodicRunHooks", "Use PeriodicCallback instead!", "2019-02-28")

    def _before_run(self, ctx):
        if self.global_step % self._every_k_steps == 0:

--- a/tensorpack/dataflow/base.py
+++ b/tensorpack/dataflow/base.py
@@ -65,7 +65,7 @@ class DataFlow(object):
        """
        * A dataflow is an iterable. The :meth:`__iter__` method should yield a list each time.
          Each element in the list should be either a number or a numpy array.
-          For now, tensorpack also partially supports dict instead of list.
+          For now, tensorpack also **partially** supports dict instead of list.

        * The :meth:`__iter__` method can be either finite (will stop iteration) or infinite
          (will not stop iteration). For a finite dataflow, :meth:`__iter__` can be called
@@ -107,7 +107,7 @@ class DataFlow(object):
            it yourself, especially when using data-parallel trainer.
          + The length of progress bar when processing a dataflow.
          + Used by :class:`InferenceRunner` to get the number of iterations in inference.
-            In this case users are responsible for making sure that :meth:`__len__` is accurate.
+            In this case users are **responsible** for making sure that :meth:`__len__` is accurate.
            This is to guarantee that inference is run on a fixed set of images.

        Returns:
@@ -127,11 +127,11 @@ class DataFlow(object):
          by the **process that uses the dataflow** before :meth:`__iter__` is called.
          The caller thread of this method should stay alive to keep this dataflow alive.

-        * It is meant for initialization works that involve processes,
-          e.g., initialize random number generator (RNG), create worker processes.
+        * It is meant for certain initialization that involves processes,
+          e.g., initialize random number generators (RNG), create worker processes.

          Because it's very common to use RNG in data processing,
-          developers of dataflow can also subclass :class:`RNGDataFlow` to simplify the work.
+          developers of dataflow can also subclass :class:`RNGDataFlow` to have easier access to an RNG.

        * A dataflow is not fork-safe after :meth:`reset_state` is called (because this will violate the guarantee).
          A few number of dataflow is not fork-safe anytime, which will be mentioned in the docs.
@@ -158,7 +158,7 @@ class RNGDataFlow(DataFlow):

 class ProxyDataFlow(DataFlow):
    """ Base class for DataFlow that proxies another.
-        Every method is proxied to ``self.ds`` unless override by subclass.
+        Every method is proxied to ``self.ds`` unless overriden by a subclass.
    """

    def __init__(self, ds):

--- a/tensorpack/dataflow/common.py
+++ b/tensorpack/dataflow/common.py
@@ -43,6 +43,10 @@ class TestDataSpeed(ProxyDataFlow):
            yield dp

    def start_test(self):
+        log_deprecated("TestDataSpeed.start_test() was renamed to start()", "2019-03-30")
+        self.start()
+
+    def start(self):
        """
        Start testing with a progress bar.
        """
@@ -59,12 +63,6 @@ class TestDataSpeed(ProxyDataFlow):
                if idx == self.test_size - 1:
                    break

-    def start(self):
-        """
-        Alias of start_test.
-        """
-        self.start_test()
-

 class BatchData(ProxyDataFlow):
    """

--- a/tensorpack/dataflow/parallel.py
+++ b/tensorpack/dataflow/parallel.py
@@ -21,6 +21,7 @@ from ..utils.concurrency import (ensure_proc_terminate,
 from ..utils.serialize import loads, dumps
 from ..utils import logger
 from ..utils.gpu import change_gpu
+from ..utils.develop import log_deprecated

 __all__ = ['PrefetchData', 'MultiProcessPrefetchData',
           'PrefetchDataZMQ', 'PrefetchOnGPUs', 'MultiThreadPrefetchData']
@@ -339,6 +340,7 @@ class PrefetchOnGPUs(PrefetchDataZMQ):
            ds (DataFlow): input DataFlow.
            gpus (list[int]): list of GPUs to use. Will also start this number of processes.
        """
+        log_deprecated("PrefetchOnGPUs", "It does not seem useful, and please implement it yourself.", "2019-02-28")
        self.gpus = gpus
        super(PrefetchOnGPUs, self).__init__(ds, len(gpus))


--- a/tensorpack/tfutils/sessinit.py
+++ b/tensorpack/tfutils/sessinit.py
@@ -2,20 +2,18 @@
 # File: sessinit.py


-import os
 import numpy as np
 import tensorflow as tf
 import six

 from ..utils import logger
-from ..utils.develop import deprecated
 from .common import get_op_tensor_name
 from .varmanip import (SessionUpdate, get_savename_from_varname,
                       is_training_name, get_checkpoint_path)

 __all__ = ['SessionInit', 'ChainInit',
           'SaverRestore', 'SaverRestoreRelaxed', 'DictRestore',
-           'JustCurrentSession', 'get_model_loader', 'TryResumeTraining']
+           'JustCurrentSession', 'get_model_loader']


 class SessionInit(object):
@@ -260,21 +258,3 @@ def get_model_loader(filename):
        return DictRestore(dict(obj))
    else:
        return SaverRestore(filename)
-
-
-@deprecated("It's better to write the logic yourself or use AutoResumeTrainConfig!", "2018-07-01")
-def TryResumeTraining():
-    """
-    Try loading latest checkpoint from ``logger.get_logger_dir()``, only if there is one.
-    Actually not very useful... better to write your own one.
-
-    Returns:
-        SessInit: either a :class:`JustCurrentSession`, or a :class:`SaverRestore`.
-    """
-    if not logger.get_logger_dir():
-        return JustCurrentSession()
-    path = os.path.join(logger.get_logger_dir(), 'checkpoint')
-    if not tf.gfile.Exists(path):
-        return JustCurrentSession()
-    logger.info("Found checkpoint at {}.".format(path))
-    return SaverRestore(path)
--- a/tensorpack/tfutils/symbolic_functions.py
+++ b/tensorpack/tfutils/symbolic_functions.py
@@ -3,35 +3,10 @@


 import tensorflow as tf
-import numpy as np

 from ..utils.develop import deprecated

-__all__ = ['get_scalar_var', 'prediction_incorrect', 'flatten', 'batch_flatten', 'print_stat', 'rms', 'huber_loss']
-
-
-# this function exists for backwards-compatibility
-def prediction_incorrect(logits, label, topk=1, name='incorrect_vector'):
-    return tf.cast(tf.logical_not(tf.nn.in_top_k(logits, label, topk)), tf.float32, name=name)
-
-
-@deprecated("Please implement it yourself!", "2018-08-01")
-def flatten(x):
-    """
-    Flatten the tensor.
-    """
-    return tf.reshape(x, [-1])
-
-
-@deprecated("Please implement it yourself!", "2018-08-01")
-def batch_flatten(x):
-    """
-    Flatten the tensor except the first dimension.
-    """
-    shape = x.get_shape().as_list()[1:]
-    if None not in shape:
-        return tf.reshape(x, [-1, int(np.prod(shape))])
-    return tf.reshape(x, tf.stack([tf.shape(x)[0], -1]))
+__all__ = ['print_stat', 'rms']


 def print_stat(x, message=None):
@@ -47,8 +22,7 @@ def print_stat(x, message=None):
                    message=message, name='print_' + x.op.name)


-# after deprecated, keep it for internal use only
-# @deprecated("Please implement it yourself!", "2018-08-01")
+# for internal use only
 def rms(x, name=None):
    """
    Returns:
@@ -61,58 +35,7 @@ def rms(x, name=None):
    return tf.sqrt(tf.reduce_mean(tf.square(x)), name=name)


-@deprecated("Please use tf.losses.huber_loss instead!", "2018-08-01")
-def huber_loss(x, delta=1, name='huber_loss'):
-    r"""
-    Huber loss of x.
-
-    .. math::
-
-        y = \begin{cases} \frac{x^2}{2}, & |x| < \delta \\
-        \delta |x| - \frac{\delta^2}{2}, & |x| \ge \delta
-        \end{cases}
-
-    Args:
-        x: the difference vector.
-        delta (float):
-
-    Returns:
-        a tensor of the same shape of x.
-    """
-    with tf.name_scope('huber_loss'):
-        sqrcost = tf.square(x)
-        abscost = tf.abs(x)
-
-        cond = abscost < delta
-        l2 = sqrcost * 0.5
-        l1 = abscost * delta - 0.5 * delta ** 2
-    return tf.where(cond, l2, l1, name=name)
-
-
-# TODO deprecate this in the future
-# doesn't hurt to keep it here for now
-@deprecated("Simply use tf.get_variable instead!", "2018-08-01")
-def get_scalar_var(name, init_value, summary=False, trainable=False):
-    """
-    Get a scalar float variable with certain initial value.
-    You can just call `tf.get_variable(name, initializer=init_value, trainable=False)` instead.
-
-    Args:
-        name (str): name of the variable.
-        init_value (float): initial value.
-        summary (bool): whether to summary this variable.
-        trainable (bool): trainable or not.
-    Returns:
-        tf.Variable: the variable
-    """
-    ret = tf.get_variable(name, initializer=float(init_value),
-                          trainable=trainable)
-    if summary:
-        # this is recognized in callbacks.StatHolder
-        tf.summary.scalar(name + '-summary', ret)
-    return ret
-
-
+# don't hurt to leave it here
 @deprecated("Please implement it by yourself.", "2018-04-28")
 def psnr(prediction, ground_truth, maxp=None, name='psnr'):
    """`Peek Signal to Noise Ratio <https://en.wikipedia.org/wiki/Peak_signal-to-noise_ratio>`_.

--- a/tensorpack/train/interface.py
+++ b/tensorpack/train/interface.py
@@ -12,7 +12,7 @@ from .config import TrainConfig
 from .tower import SingleCostTrainer
 from .trainers import SimpleTrainer

-__all__ = ['launch_train_with_config', 'apply_default_prefetch']
+__all__ = ['launch_train_with_config']


 def apply_default_prefetch(input_source_or_dataflow, trainer):

--- a/tensorpack/utils/rect.py
+++ b/tensorpack/utils/rect.py
@@ -3,6 +3,7 @@


 import numpy as np
+from .develop import log_deprecated

 __all__ = ['IntBox', 'FloatBox']

@@ -11,6 +12,7 @@ class BoxBase(object):
    __slots__ = ['x1', 'y1', 'x2', 'y2']

    def __init__(self, x1, y1, x2, y2):
+        log_deprecated("IntBox and FloatBox", "Please implement them by your own.", "2019-02-28")
        self.x1 = x1
        self.y1 = y1
        self.x2 = x2

--- a/tensorpack/utils/viz.py
+++ b/tensorpack/utils/viz.py
@@ -5,7 +5,6 @@
 import numpy as np
 import os
 import sys
-import io
 from .fs import mkdir_p
 from .argtools import shape2d
 from .palette import PALETTE_RGB
@@ -16,24 +15,12 @@ except ImportError:
    pass


-__all__ = ['pyplot2img', 'interactive_imshow',
+__all__ = ['interactive_imshow',
           'stack_patches', 'gen_stack_patches',
           'dump_dataflow_images', 'intensity_to_rgb',
           'draw_boxes']


-def pyplot2img(plt):
-    """ Convert a pyplot instance to image """
-    buf = io.BytesIO()
-    plt.axis('off')
-    plt.savefig(buf, format='png', bbox_inches='tight', pad_inches=0)
-    buf.seek(0)
-    rawbuf = np.frombuffer(buf.getvalue(), dtype='uint8')
-    im = cv2.imdecode(rawbuf, cv2.IMREAD_COLOR)
-    buf.close()
-    return im
-
-
 def interactive_imshow(img, lclick_cb=None, rclick_cb=None, **kwargs):
    """
    Args:
@@ -428,7 +415,6 @@ from ..utils.develop import create_dummy_func   # noqa
 try:
    import matplotlib.pyplot as plt
 except (ImportError, RuntimeError):
-    pyplot2img = create_dummy_func('pyplot2img', 'matplotlib')    # noqa
    intensity_to_rgb = create_dummy_func('intensity_to_rgb', 'matplotlib')    # noqa

 if __name__ == '__main__':