Commit 847fae12 authored by Yuxin Wu's avatar Yuxin Wu

clean-up deprecation

parent 7b33a43c
......@@ -369,37 +369,32 @@ def process_signature(app, what, name, obj, options, signature,
_DEPRECATED_NAMES = set([
# deprecated stuff:
'TryResumeTraining',
'QueueInputTrainer',
'SimplePredictBuilder',
'LMDBDataPoint',
'TFRecordData',
'dump_dataflow_to_lmdb',
'dump_dataflow_to_tfrecord',
'pyplot2img',
'IntBox', 'FloatBox',
'PrefetchOnGPUs',
# renamed stuff:
'DumpTensor',
'DumpParamAsImage',
'StagingInputWrapper',
'PeriodicRunHooks',
'get_nr_gpu',
'start_test', # TestDataSpeed
# deprecated or renamed symbolic code
'ImageSample',
'BilinearUpSample'
'Deconv2D',
'get_scalar_var', 'psnr',
'prediction_incorrect', 'huber_loss',
'Deconv2D', 'psnr',
# internal only
'SessionUpdate',
'apply_default_prefetch',
'average_grads',
'aggregate_grads',
'allreduce_grads',
'PrefetchOnGPUs',
])
def autodoc_skip_member(app, what, name, obj, skip, options):
......@@ -414,7 +409,7 @@ def autodoc_skip_member(app, what, name, obj, skip, options):
# Hide some names that are deprecated or not intended to be used
if name in _DEPRECATED_NAMES:
return True
if name in ['get_data', 'size', 'reset_state']:
if name in ['__iter__', '__len__', 'reset_state']:
# skip these methods with empty docstring
if not obj.__doc__ and inspect.isfunction(obj):
# https://stackoverflow.com/questions/3589311/get-defining-class-of-unbound-method-object-in-python-3
......
......@@ -6,6 +6,8 @@ a __Python generator__ which yields preprocessed ImageNet images and labels as f
Since it is simply a generator interface, you can use the DataFlow in any Python-based frameworks (e.g. PyTorch, Keras)
or your own code as well.
**What we are going to do**: We'll use ILSVRC12 dataset, which contains 1.28 million images.
The original images (JPEG compressed) are 140G in total.
The average resolution is about 400x350 <sup>[[1]]</sup>.
......@@ -29,6 +31,10 @@ Some things to know before reading:
4. The actual performance would depend on not only the disk, but also memory (for caching) and CPU (for data processing).
You may need to tune the parameters (#processes, #threads, size of buffer, etc.)
or change the pipeline for new tasks and new machines to achieve the best performance.
The solutions in this tutorial may not help you.
To improve your own DataFlow, read the
[performance tuning tutorial](performance-tuning.html#investigate-dataflow)
before doing any optimizations.
The benchmark code for this tutorial can be found in [tensorpack/benchmarks](https://github.com/tensorpack/benchmarks/tree/master/ImageNet),
including comparison with a similar (but simpler) pipeline built with `tf.data`.
......
......@@ -12,11 +12,11 @@ and then compose it with existing modules (e.g. mapping, batching, prefetching,
The easiest way to create a DataFlow to load custom data, is to wrap a custom generator, e.g.:
```python
def my_data_loader():
while True:
# load data from somewhere with Python
# load data from somewhere with Python, and yield them
for k in range(100):
yield [my_array, my_label]
dataflow = DataFromGenerator(my_data_loader)
df = DataFromGenerator(my_data_loader)
```
To write more complicated DataFlow, you need to inherit the base `DataFlow` class.
......@@ -24,6 +24,7 @@ Usually, you just need to implement the `__iter__()` method which yields a datap
```python
class MyDataFlow(DataFlow):
def __iter__(self):
# load data from somewhere with Python, and yield them
for k in range(100):
digit = np.random.rand(28, 28)
label = np.random.randint(10)
......@@ -38,6 +39,8 @@ for datapoint in df:
Optionally, you can implement the `__len__` and `reset_state` method.
The detailed semantics of these three methods are explained
in the [API documentation](../../modules/dataflow.html#tensorpack.dataflow.DataFlow).
If you're writing a complicated DataFlow, make sure to read the API documentation
for the semantics.
DataFlow implementations for several well-known datasets are provided in the
[dataflow.dataset](../../modules/dataflow.dataset.html)
......@@ -53,8 +56,11 @@ class ProcessingDataFlow(DataFlow):
def __init__(self, ds):
self.ds = ds
def reset_state(self):
self.ds.reset_state()
def __iter__(self):
for datapoint in self.ds.get_data():
for datapoint in self.ds:
# do something
yield new_datapoint
```
......
......@@ -3,6 +3,7 @@
from .base import ProxyCallback, Callback
from ..utils.develop import log_deprecated
__all__ = ['PeriodicTrigger', 'PeriodicCallback', 'EnableCallbackIf']
......@@ -77,6 +78,7 @@ class PeriodicRunHooks(ProxyCallback):
"""
self._every_k_steps = int(every_k_steps)
super(PeriodicRunHooks, self).__init__(callback)
log_deprecated("PeriodicRunHooks", "Use PeriodicCallback instead!", "2019-02-28")
def _before_run(self, ctx):
if self.global_step % self._every_k_steps == 0:
......
......@@ -65,7 +65,7 @@ class DataFlow(object):
"""
* A dataflow is an iterable. The :meth:`__iter__` method should yield a list each time.
Each element in the list should be either a number or a numpy array.
For now, tensorpack also partially supports dict instead of list.
For now, tensorpack also **partially** supports dict instead of list.
* The :meth:`__iter__` method can be either finite (will stop iteration) or infinite
(will not stop iteration). For a finite dataflow, :meth:`__iter__` can be called
......@@ -107,7 +107,7 @@ class DataFlow(object):
it yourself, especially when using data-parallel trainer.
+ The length of progress bar when processing a dataflow.
+ Used by :class:`InferenceRunner` to get the number of iterations in inference.
In this case users are responsible for making sure that :meth:`__len__` is accurate.
In this case users are **responsible** for making sure that :meth:`__len__` is accurate.
This is to guarantee that inference is run on a fixed set of images.
Returns:
......@@ -127,11 +127,11 @@ class DataFlow(object):
by the **process that uses the dataflow** before :meth:`__iter__` is called.
The caller thread of this method should stay alive to keep this dataflow alive.
* It is meant for initialization works that involve processes,
e.g., initialize random number generator (RNG), create worker processes.
* It is meant for certain initialization that involves processes,
e.g., initialize random number generators (RNG), create worker processes.
Because it's very common to use RNG in data processing,
developers of dataflow can also subclass :class:`RNGDataFlow` to simplify the work.
developers of dataflow can also subclass :class:`RNGDataFlow` to have easier access to an RNG.
* A dataflow is not fork-safe after :meth:`reset_state` is called (because this will violate the guarantee).
A few number of dataflow is not fork-safe anytime, which will be mentioned in the docs.
......@@ -158,7 +158,7 @@ class RNGDataFlow(DataFlow):
class ProxyDataFlow(DataFlow):
""" Base class for DataFlow that proxies another.
Every method is proxied to ``self.ds`` unless override by subclass.
Every method is proxied to ``self.ds`` unless overriden by a subclass.
"""
def __init__(self, ds):
......
......@@ -43,6 +43,10 @@ class TestDataSpeed(ProxyDataFlow):
yield dp
def start_test(self):
log_deprecated("TestDataSpeed.start_test() was renamed to start()", "2019-03-30")
self.start()
def start(self):
"""
Start testing with a progress bar.
"""
......@@ -59,12 +63,6 @@ class TestDataSpeed(ProxyDataFlow):
if idx == self.test_size - 1:
break
def start(self):
"""
Alias of start_test.
"""
self.start_test()
class BatchData(ProxyDataFlow):
"""
......
......@@ -21,6 +21,7 @@ from ..utils.concurrency import (ensure_proc_terminate,
from ..utils.serialize import loads, dumps
from ..utils import logger
from ..utils.gpu import change_gpu
from ..utils.develop import log_deprecated
__all__ = ['PrefetchData', 'MultiProcessPrefetchData',
'PrefetchDataZMQ', 'PrefetchOnGPUs', 'MultiThreadPrefetchData']
......@@ -339,6 +340,7 @@ class PrefetchOnGPUs(PrefetchDataZMQ):
ds (DataFlow): input DataFlow.
gpus (list[int]): list of GPUs to use. Will also start this number of processes.
"""
log_deprecated("PrefetchOnGPUs", "It does not seem useful, and please implement it yourself.", "2019-02-28")
self.gpus = gpus
super(PrefetchOnGPUs, self).__init__(ds, len(gpus))
......
......@@ -2,20 +2,18 @@
# File: sessinit.py
import os
import numpy as np
import tensorflow as tf
import six
from ..utils import logger
from ..utils.develop import deprecated
from .common import get_op_tensor_name
from .varmanip import (SessionUpdate, get_savename_from_varname,
is_training_name, get_checkpoint_path)
__all__ = ['SessionInit', 'ChainInit',
'SaverRestore', 'SaverRestoreRelaxed', 'DictRestore',
'JustCurrentSession', 'get_model_loader', 'TryResumeTraining']
'JustCurrentSession', 'get_model_loader']
class SessionInit(object):
......@@ -260,21 +258,3 @@ def get_model_loader(filename):
return DictRestore(dict(obj))
else:
return SaverRestore(filename)
@deprecated("It's better to write the logic yourself or use AutoResumeTrainConfig!", "2018-07-01")
def TryResumeTraining():
"""
Try loading latest checkpoint from ``logger.get_logger_dir()``, only if there is one.
Actually not very useful... better to write your own one.
Returns:
SessInit: either a :class:`JustCurrentSession`, or a :class:`SaverRestore`.
"""
if not logger.get_logger_dir():
return JustCurrentSession()
path = os.path.join(logger.get_logger_dir(), 'checkpoint')
if not tf.gfile.Exists(path):
return JustCurrentSession()
logger.info("Found checkpoint at {}.".format(path))
return SaverRestore(path)
......@@ -3,35 +3,10 @@
import tensorflow as tf
import numpy as np
from ..utils.develop import deprecated
__all__ = ['get_scalar_var', 'prediction_incorrect', 'flatten', 'batch_flatten', 'print_stat', 'rms', 'huber_loss']
# this function exists for backwards-compatibility
def prediction_incorrect(logits, label, topk=1, name='incorrect_vector'):
return tf.cast(tf.logical_not(tf.nn.in_top_k(logits, label, topk)), tf.float32, name=name)
@deprecated("Please implement it yourself!", "2018-08-01")
def flatten(x):
"""
Flatten the tensor.
"""
return tf.reshape(x, [-1])
@deprecated("Please implement it yourself!", "2018-08-01")
def batch_flatten(x):
"""
Flatten the tensor except the first dimension.
"""
shape = x.get_shape().as_list()[1:]
if None not in shape:
return tf.reshape(x, [-1, int(np.prod(shape))])
return tf.reshape(x, tf.stack([tf.shape(x)[0], -1]))
__all__ = ['print_stat', 'rms']
def print_stat(x, message=None):
......@@ -47,8 +22,7 @@ def print_stat(x, message=None):
message=message, name='print_' + x.op.name)
# after deprecated, keep it for internal use only
# @deprecated("Please implement it yourself!", "2018-08-01")
# for internal use only
def rms(x, name=None):
"""
Returns:
......@@ -61,58 +35,7 @@ def rms(x, name=None):
return tf.sqrt(tf.reduce_mean(tf.square(x)), name=name)
@deprecated("Please use tf.losses.huber_loss instead!", "2018-08-01")
def huber_loss(x, delta=1, name='huber_loss'):
r"""
Huber loss of x.
.. math::
y = \begin{cases} \frac{x^2}{2}, & |x| < \delta \\
\delta |x| - \frac{\delta^2}{2}, & |x| \ge \delta
\end{cases}
Args:
x: the difference vector.
delta (float):
Returns:
a tensor of the same shape of x.
"""
with tf.name_scope('huber_loss'):
sqrcost = tf.square(x)
abscost = tf.abs(x)
cond = abscost < delta
l2 = sqrcost * 0.5
l1 = abscost * delta - 0.5 * delta ** 2
return tf.where(cond, l2, l1, name=name)
# TODO deprecate this in the future
# doesn't hurt to keep it here for now
@deprecated("Simply use tf.get_variable instead!", "2018-08-01")
def get_scalar_var(name, init_value, summary=False, trainable=False):
"""
Get a scalar float variable with certain initial value.
You can just call `tf.get_variable(name, initializer=init_value, trainable=False)` instead.
Args:
name (str): name of the variable.
init_value (float): initial value.
summary (bool): whether to summary this variable.
trainable (bool): trainable or not.
Returns:
tf.Variable: the variable
"""
ret = tf.get_variable(name, initializer=float(init_value),
trainable=trainable)
if summary:
# this is recognized in callbacks.StatHolder
tf.summary.scalar(name + '-summary', ret)
return ret
# don't hurt to leave it here
@deprecated("Please implement it by yourself.", "2018-04-28")
def psnr(prediction, ground_truth, maxp=None, name='psnr'):
"""`Peek Signal to Noise Ratio <https://en.wikipedia.org/wiki/Peak_signal-to-noise_ratio>`_.
......
......@@ -12,7 +12,7 @@ from .config import TrainConfig
from .tower import SingleCostTrainer
from .trainers import SimpleTrainer
__all__ = ['launch_train_with_config', 'apply_default_prefetch']
__all__ = ['launch_train_with_config']
def apply_default_prefetch(input_source_or_dataflow, trainer):
......
......@@ -3,6 +3,7 @@
import numpy as np
from .develop import log_deprecated
__all__ = ['IntBox', 'FloatBox']
......@@ -11,6 +12,7 @@ class BoxBase(object):
__slots__ = ['x1', 'y1', 'x2', 'y2']
def __init__(self, x1, y1, x2, y2):
log_deprecated("IntBox and FloatBox", "Please implement them by your own.", "2019-02-28")
self.x1 = x1
self.y1 = y1
self.x2 = x2
......
......@@ -5,7 +5,6 @@
import numpy as np
import os
import sys
import io
from .fs import mkdir_p
from .argtools import shape2d
from .palette import PALETTE_RGB
......@@ -16,24 +15,12 @@ except ImportError:
pass
__all__ = ['pyplot2img', 'interactive_imshow',
__all__ = ['interactive_imshow',
'stack_patches', 'gen_stack_patches',
'dump_dataflow_images', 'intensity_to_rgb',
'draw_boxes']
def pyplot2img(plt):
""" Convert a pyplot instance to image """
buf = io.BytesIO()
plt.axis('off')
plt.savefig(buf, format='png', bbox_inches='tight', pad_inches=0)
buf.seek(0)
rawbuf = np.frombuffer(buf.getvalue(), dtype='uint8')
im = cv2.imdecode(rawbuf, cv2.IMREAD_COLOR)
buf.close()
return im
def interactive_imshow(img, lclick_cb=None, rclick_cb=None, **kwargs):
"""
Args:
......@@ -428,7 +415,6 @@ from ..utils.develop import create_dummy_func # noqa
try:
import matplotlib.pyplot as plt
except (ImportError, RuntimeError):
pyplot2img = create_dummy_func('pyplot2img', 'matplotlib') # noqa
intensity_to_rgb = create_dummy_func('intensity_to_rgb', 'matplotlib') # noqa
if __name__ == '__main__':
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment