Commit dbc50068 authored by Yuxin Wu's avatar Yuxin Wu

update docs & deprecation

parent 69d4e940
......@@ -371,26 +371,20 @@ def process_signature(app, what, name, obj, options, signature,
_DEPRECATED_NAMES = set([
# deprecated stuff:
'QueueInputTrainer',
'SimplePredictBuilder',
'LMDBDataPoint',
'TFRecordData',
'dump_dataflow_to_lmdb',
'dump_dataflow_to_tfrecord',
'IntBox', 'FloatBox',
'dump_dataflow_to_process_queue',
'PrefetchOnGPUs',
'DistributedTrainerReplicated',
'DistributedTrainerParameterServer',
# renamed items that should not appear in docs
'DumpTensor',
'DumpParamAsImage',
'PeriodicRunHooks',
'get_nr_gpu',
'start_test', # TestDataSpeed
'ThreadedMapData',
'TrainingMonitor',
# deprecated or renamed symbolic code
'BilinearUpSample',
'Deconv2D', 'psnr',
# shouldn't appear in doc:
......@@ -416,7 +410,7 @@ def autodoc_skip_member(app, what, name, obj, skip, options):
# Hide some names that are deprecated or not intended to be used
if name in _DEPRECATED_NAMES:
return True
if name in ['__iter__', '__len__', 'reset_state']:
if name in ['__iter__', '__len__', 'reset_state', 'get_data', 'size']:
# skip these methods with empty docstring
if not obj.__doc__ and inspect.isfunction(obj):
# https://stackoverflow.com/questions/3589311/get-defining-class-of-unbound-method-object-in-python-3
......
......@@ -14,11 +14,3 @@ Relevant tutorials: :doc:`../tutorial/dataflow`, :doc:`../tutorial/input-source`
:members:
:undoc-members:
:show-inheritance:
tensorpack.dataflow.dftools module
----------------------------------
.. automodule:: tensorpack.dataflow.dftools
:members:
:undoc-members:
:show-inheritance:
......@@ -11,7 +11,6 @@ from tensorpack.tfutils.summary import add_moving_summary
from tensorpack.tfutils.tower import TowerContext, TowerFuncWrapper
from tensorpack.utils import logger
from tensorpack.utils.argtools import memoized_method
from tensorpack.utils.develop import deprecated
class GANModelDesc(ModelDescBase):
......@@ -150,16 +149,6 @@ class GANTrainer(TowerTrainer):
self.train_op = d_min
class MultiGPUGANTrainer(GANTrainer):
"""
A replacement of GANTrainer (optimize d and g one by one) with multi-gpu support.
"""
@deprecated("Please use GANTrainer and set num_gpu", "2019-01-31")
def __init__(self, num_gpu, input, model):
super(MultiGPUGANTrainer, self).__init__(input, model, 1)
class SeparateGANTrainer(TowerTrainer):
""" A GAN trainer which runs two optimization ops with a certain ratio."""
def __init__(self, input, model, d_period=1, g_period=1):
......
......@@ -61,6 +61,8 @@ def CaffeBilinearUpSample(x, shape):
inp_shape = x.shape.as_list()
ch = inp_shape[1]
assert ch == 1, "This layer only works for channel=1"
# for a version that supports >1 channels, see:
# https://github.com/tensorpack/tensorpack/issues/1040#issuecomment-452798180
shape = int(shape)
filter_shape = 2 * shape
......@@ -77,6 +79,7 @@ def CaffeBilinearUpSample(x, shape):
for y in range(s):
ret[x, y] = (1 - abs(x / f - c)) * (1 - abs(y / f - c))
return ret
w = bilinear_conv_filler(filter_shape)
w = np.repeat(w, ch * ch).reshape((filter_shape, filter_shape, ch, ch))
......
......@@ -26,7 +26,7 @@ class StartProcOrThread(Callback):
stop_at_last (bool): whether to stop the processes or threads
after training. It will use :meth:`Process.terminate()` or
:meth:`StoppableThread.stop()`, but will do nothing on normal
`threading.Thread` or other startable objects.
``threading.Thread`` or other startable objects.
"""
if not isinstance(startable, list):
startable = [startable]
......
......@@ -28,7 +28,7 @@ class RunOp(Callback):
"""
Args:
op (tf.Operation or function): an Op, or a function that returns the Op in the graph.
The function will be called after the main graph has been created (in the `setup_graph` callback).
The function will be called after the main graph has been created (in the :meth:`setup_graph` callback).
run_before (bool): run the Op before training
run_as_trigger (bool): run the Op on every :meth:`trigger()` call.
run_step (bool): run the Op every step (along with training)
......@@ -76,8 +76,11 @@ class RunOp(Callback):
class RunUpdateOps(RunOp):
"""
Run ops from the collection UPDATE_OPS every step.
The ops will be hooked to `trainer.hooked_sess` and run along with
each `sess.run` call.
The ops will be hooked to ``trainer.hooked_sess`` and run along with
each ``hooked_sess.run`` call.
Be careful when using ``UPDATE_OPS`` if your model contains more than one sub-networks.
Perhaps not all updates are supposed to be executed in every iteration.
"""
def __init__(self, collection=None):
......@@ -105,7 +108,7 @@ class ProcessTensors(Callback):
"""
Fetch extra tensors **along with** each training step,
and call some function over the values.
It uses `_{before,after}_run` method to inject `tf.train.SessionRunHooks`
It uses ``_{before,after}_run`` method to inject ``tf.train.SessionRunHooks``
to the session.
You can use it to print tensors, save tensors to file, etc.
......
......@@ -7,6 +7,8 @@
import tensorflow as tf
from ..tfutils.common import tfv1
from ..utils.develop import HIDE_DOC
from .base import Callback
__all__ = ['CallbackToHook', 'HookToCallback']
......@@ -15,19 +17,21 @@ __all__ = ['CallbackToHook', 'HookToCallback']
class CallbackToHook(tfv1.train.SessionRunHook):
"""
Hooks are less powerful than callbacks so the conversion is incomplete.
It only converts the `before_run/after_run` calls.
It only converts the ``before_run/after_run`` calls.
This is only for internal implementation of
before_run/after_run callbacks.
``before_run/after_run`` callbacks.
You shouldn't need to use this.
"""
def __init__(self, cb):
self._cb = cb
@HIDE_DOC
def before_run(self, ctx):
return self._cb.before_run(ctx)
@HIDE_DOC
def after_run(self, ctx, vals):
self._cb.after_run(ctx, vals)
......@@ -35,7 +39,7 @@ class CallbackToHook(tfv1.train.SessionRunHook):
class HookToCallback(Callback):
"""
Make a ``tf.train.SessionRunHook`` into a callback.
Note that when `SessionRunHook.after_create_session` is called, the `coord` argument will be None.
Note that when ``SessionRunHook.after_create_session`` is called, the ``coord`` argument will be None.
"""
_chief_only = False
......
......@@ -20,7 +20,7 @@ __all__ = ['ScalarStats', 'Inferencer',
class Inferencer(Callback):
""" Base class of Inferencer.
Inferencer is a special kind of callback that should be called by :class:`InferenceRunner`.
It has the methods `_get_fetches` and `_on_fetches` which are like
It has the methods ``_get_fetches`` and ``_on_fetches`` which are like
:class:`SessionRunHooks`, except that they will be used only by :class:`InferenceRunner`.
.. document private functions
......@@ -142,7 +142,7 @@ class ClassificationError(Inferencer):
You can use ``tf.nn.in_top_k`` to produce this vector.
This Inferencer produces the "true" error, which could be different from
`ScalarStats('error_rate')`.
``ScalarStats('error_rate')``.
It takes account of the fact that batches might not have the same size in
testing (because the size of test set might not be a multiple of batch size).
Therefore the result can be different from averaging the error rate of each batch.
......
......@@ -257,13 +257,13 @@ class DataParallelInferenceRunner(InferenceRunnerBase):
self._hooks.append(h)
self._hooks_parallel.append(h)
class InferencerToHookDataParallel(InferencerToHook):
class _InferencerToHookDataParallel(InferencerToHook):
def __init__(self, inf, fetches, size):
"""
Args:
size(int): number of tensors to fetch per tower
"""
super(DataParallelInferenceRunner.InferencerToHookDataParallel, self).__init__(inf, fetches)
super(DataParallelInferenceRunner._InferencerToHookDataParallel, self).__init__(inf, fetches)
assert len(self._fetches) % size == 0
self._sz = size
......@@ -277,7 +277,7 @@ class DataParallelInferenceRunner(InferenceRunnerBase):
out_names = inf.get_fetches()
sz = len(out_names)
fetches = list(itertools.chain(*[t.get_tensors(out_names) for t in self._handles]))
return self.InferencerToHookDataParallel(inf, fetches, sz)
return self._InferencerToHookDataParallel(inf, fetches, sz)
def _build_hook(self, inf):
out_names = inf.get_fetches()
......
......@@ -37,7 +37,7 @@ class InjectShell(Callback):
and iteratively debug the training.
Once the :meth:`trigger` method is called, it detects whether the file exists, and opens an
IPython/pdb shell if yes.
In the shell, `self` is this callback, `self.trainer` is the trainer, and
In the shell, ``self`` is this callback, ``self.trainer`` is the trainer, and
from that you can access everything else.
Example:
......
......@@ -118,8 +118,8 @@ class Monitors(Callback):
Merge monitors together for trainer to use.
In training, each trainer will create a :class:`Monitors` instance,
and you can access it through `trainer.monitors`.
You should use `trainer.monitors` for logging and it will dispatch your
and you can access it through ``trainer.monitors``.
You should use ``trainer.monitors`` for logging and it will dispatch your
logs to each sub-monitor.
"""
......@@ -575,13 +575,14 @@ class CometMLMonitor(MonitorBase):
@property
def experiment(self):
"""
Returns: the :class:`comet_ml.Experiment` instance.
The :class:`comet_ml.Experiment` instance.
"""
return self._exp
def _before_train(self):
self._exp.set_model_graph(tf.get_default_graph())
@HIDE_DOC
def process_scalar(self, name, val):
self._exp.log_metric(name, val, step=self.global_step)
......
......@@ -23,7 +23,7 @@ __all__ = ['GPUUtilizationTracker', 'GraphProfiler', 'PeakMemoryTracker']
class GPUUtilizationTracker(Callback):
""" Summarize the average GPU utilization within an epoch.
It will start a process to run `nvidia-smi` every second
It will start a process to run ``nvidia-smi`` every second
within the epoch (the trigger_epoch time was not included),
and write average utilization to monitors.
......
......@@ -19,7 +19,7 @@ class MovingAverageSummary(Callback):
This callback is enabled by default.
Maintain the moving average of summarized tensors in every step,
by ops added to the collection.
Note that it only __maintains__ the moving averages by updating
Note that it only **maintains** the moving averages by updating
the relevant variables in the graph,
the actual summary should be done in other callbacks.
"""
......@@ -119,7 +119,7 @@ class MergeAllSummaries_RunWithOp(Callback):
def MergeAllSummaries(period=0, run_alone=False, key=None):
"""
This callback is enabled by default.
Evaluate all summaries by `tf.summary.merge_all`, and write them to logs.
Evaluate all summaries by ``tf.summary.merge_all``, and write them to logs.
Args:
period (int): by default the callback summarizes once every epoch.
......@@ -130,7 +130,7 @@ def MergeAllSummaries(period=0, run_alone=False, key=None):
`sess.run` calls, in the last step of each epoch.
For :class:`SimpleTrainer`, it needs to be False because summary may
depend on inputs.
key (str): the collection of summary tensors. Same as in `tf.summary.merge_all`.
key (str): the collection of summary tensors. Same as in ``tf.summary.merge_all``.
Default is ``tf.GraphKeys.SUMMARIES``.
"""
if key is None:
......
......@@ -2,7 +2,6 @@
# File: trigger.py
from ..utils.develop import log_deprecated
from .base import Callback, ProxyCallback
__all__ = ['PeriodicTrigger', 'PeriodicCallback', 'EnableCallbackIf']
......@@ -63,38 +62,6 @@ class PeriodicTrigger(ProxyCallback):
return "PeriodicTrigger-" + str(self.cb)
class PeriodicRunHooks(ProxyCallback):
"""
Enable the ``{before,after}_run`` methods of a callback every k global steps.
All other methods are untouched.
"""
def __init__(self, callback, every_k_steps):
"""
Args:
callback (Callback):
every_k_steps(int): call ``{before,after}_run`` when
``global_step % k == 0``.
"""
self._every_k_steps = int(every_k_steps)
super(PeriodicRunHooks, self).__init__(callback)
log_deprecated("PeriodicRunHooks", "Use PeriodicCallback instead!", "2019-02-28")
def _before_run(self, ctx):
if self.global_step % self._every_k_steps == 0:
self._enabled = True
return self.cb._before_run(ctx)
else:
self._enabled = False
def _after_run(self, ctx, rv):
if self._enabled:
self.cb._after_run(ctx, rv)
def __str__(self):
return "PeriodicRunHooks-" + str(self.cb)
class EnableCallbackIf(ProxyCallback):
"""
Disable the ``{before,after}_epoch``, ``{before,after}_run``,
......
......@@ -15,7 +15,6 @@ if STATICA_HACK:
from .remote import *
from . import imgaug
from . import dataset
from . import dftools
from pkgutil import iter_modules
......@@ -37,7 +36,7 @@ def _global_import(name):
__all__.append(k)
__SKIP = set(['dftools', 'dataset', 'imgaug'])
__SKIP = set(['dataset', 'imgaug'])
_CURR_DIR = os.path.dirname(__file__)
for _, module_name, __ in iter_modules(
[os.path.dirname(__file__)]):
......@@ -54,4 +53,4 @@ globals()['imgaug'] = LazyLoader('imgaug', globals(), 'tensorpack.dataflow.imgau
del LazyLoader
__all__.extend(['imgaug', 'dftools', 'dataset'])
__all__.extend(['imgaug', 'dataset'])
......@@ -75,9 +75,10 @@ class DataFlow(object):
* For many dataflow, the :meth:`__iter__` method is non-reentrant, which means for an dataflow
instance ``df``, :meth:`df.__iter__` cannot be called before the previous
:meth:`df.__iter__` call has finished (iteration has stopped).
If a dataflow is non-reentrant, :meth:`df.__iter__` should throw an exception if
When it is non-reentrant, :meth:`df.__iter__` should throw an exception if
called before the previous call has finished.
If you need to use the same dataflow in two places, you can simply create two dataflow instances.
For such non-reentrant dataflows, if you need to use the same dataflow in two places,
you need to create two dataflow instances.
Yields:
list: The datapoint, i.e. list of components.
......@@ -93,10 +94,11 @@ class DataFlow(object):
* It returns an integer representing the size of the dataflow.
The return value **may not be accurate or meaningful** at all.
When it's accurate, it means that :meth:`__iter__` will always yield this many of datapoints.
When saying the length is "accurate", it means that
:meth:`__iter__` will always yield this many of datapoints.
* There could be many reasons why :meth:`__len__` is inaccurate.
For example, some dataflow has dynamic size.
For example, some dataflow has dynamic size, if it throws away datapoints on the fly.
Some dataflow mixes the datapoints between consecutive passes over
the dataset, due to parallelism and buffering.
In this case it does not make sense to stop the iteration anywhere.
......@@ -108,7 +110,7 @@ class DataFlow(object):
it yourself, especially when using data-parallel trainer.
+ The length of progress bar when processing a dataflow.
+ Used by :class:`InferenceRunner` to get the number of iterations in inference.
In this case users are **responsible** for making sure that :meth:`__len__` is accurate.
In this case users are **responsible** for making sure that :meth:`__len__` is "accurate".
This is to guarantee that inference is run on a fixed set of images.
Returns:
......
# -*- coding: utf-8 -*-
# File: dftools.py
from ..utils.develop import deprecated
from .remote import dump_dataflow_to_process_queue
from .serialize import LMDBSerializer, TFRecordSerializer
__all__ = ['dump_dataflow_to_process_queue',
'dump_dataflow_to_lmdb', 'dump_dataflow_to_tfrecord']
@deprecated("Use LMDBSerializer.save instead!", "2019-01-31")
def dump_dataflow_to_lmdb(df, lmdb_path, write_frequency=5000):
LMDBSerializer.save(df, lmdb_path, write_frequency)
@deprecated("Use TFRecordSerializer.save instead!", "2019-01-31")
def dump_dataflow_to_tfrecord(df, path):
TFRecordSerializer.save(df, path)
......@@ -11,15 +11,14 @@ from ..utils import logger
from ..utils.argtools import log_once
from ..utils.compatible_serialize import loads
from ..utils.develop import create_dummy_class # noqa
from ..utils.develop import log_deprecated
from ..utils.loadcaffe import get_caffe_pb
from ..utils.timer import timed_operation
from ..utils.utils import get_tqdm
from .base import DataFlow, DataFlowReentrantGuard, RNGDataFlow
from .base import DataFlowReentrantGuard, RNGDataFlow
from .common import MapData
__all__ = ['HDF5Data', 'LMDBData', 'LMDBDataDecoder', 'LMDBDataPoint',
'CaffeLMDB', 'SVMLightData', 'TFRecordData']
__all__ = ['HDF5Data', 'LMDBData', 'LMDBDataDecoder',
'CaffeLMDB', 'SVMLightData']
"""
Adapters for different data format.
......@@ -165,21 +164,6 @@ class LMDBDataDecoder(MapData):
super(LMDBDataDecoder, self).__init__(lmdb_data, f)
class LMDBDataPoint(MapData):
def __init__(self, *args, **kwargs):
log_deprecated("LMDBDataPoint", "Use LMDBSerializer.load() instead!", "2019-01-31")
if isinstance(args[0], DataFlow):
ds = args[0]
assert len(args) == 1 and len(kwargs) == 0, \
"No more arguments are allowed if LMDBDataPoint is called with a LMDBData instance!"
else:
ds = LMDBData(*args, **kwargs)
def f(dp):
return loads(dp[1])
super(LMDBDataPoint, self).__init__(ds, f)
def CaffeLMDB(lmdb_path, shuffle=True, keys=None):
"""
Read a Caffe LMDB file where each value contains a ``caffe.Datum`` protobuf.
......@@ -243,23 +227,6 @@ class SVMLightData(RNGDataFlow):
yield [self.X[id, :], self.y[id]]
class TFRecordData(DataFlow):
def __init__(self, path, size=None):
log_deprecated("TFRecordData", "Use TFRecordSerializer.load instead!", "2019-01-31")
self._path = path
self._size = int(size)
def __len__(self):
if self._size:
return self._size
return len(super(TFRecordData, self))
def __iter__(self):
gen = tf.python_io.tf_record_iterator(self._path)
for dp in gen:
yield loads(dp)
try:
import h5py
except ImportError:
......@@ -268,10 +235,5 @@ except ImportError:
try:
import lmdb
except ImportError:
for klass in ['LMDBData', 'LMDBDataDecoder', 'LMDBDataPoint', 'CaffeLMDB']:
for klass in ['LMDBData', 'LMDBDataDecoder', 'CaffeLMDB']:
globals()[klass] = create_dummy_class(klass, 'lmdb')
try:
import tensorflow as tf
except ImportError:
TFRecordData = create_dummy_class('TFRecordData', 'tensorflow') # noqa
......@@ -16,7 +16,7 @@ class ColorSpace(ImageAugmentor):
def __init__(self, mode, keepdims=True):
"""
Args:
mode: OpenCV color space conversion code (e.g., `cv2.COLOR_BGR2HSV`)
mode: OpenCV color space conversion code (e.g., ``cv2.COLOR_BGR2HSV``)
keepdims (bool): keep the dimension of image unchanged if OpenCV
changes it.
"""
......
......@@ -89,8 +89,8 @@ class RandomCropRandomShape(TransformAugmentorBase):
class GoogleNetRandomCropAndResize(ImageAugmentor):
"""
The random crop and resize augmentation proposed in
Sec. 6 of `Going Deeper with Convolutions` by Google.
This implementation follows the details in `fb.resnet.torch`.
Sec. 6 of "Going Deeper with Convolutions" by Google.
This implementation follows the details in ``fb.resnet.torch``.
It attempts to crop a random rectangle with 8%~100% area of the original image,
and keep the aspect ratio between 3/4 to 4/3. Then it resize this crop to the target shape.
......
......@@ -11,18 +11,16 @@ import uuid
import weakref
from contextlib import contextmanager
import zmq
from six.moves import queue, range, zip
from six.moves import queue, range
from ..utils import logger
from ..utils.concurrency import (
StoppableThread, enable_death_signal, ensure_proc_terminate, mask_sigint, start_proc_mask_signal)
from ..utils.develop import log_deprecated
from ..utils.gpu import change_gpu
StoppableThread, enable_death_signal, ensure_proc_terminate, start_proc_mask_signal)
from ..utils.serialize import dumps, loads
from .base import DataFlow, DataFlowReentrantGuard, DataFlowTerminated, ProxyDataFlow
__all__ = ['PrefetchData', 'MultiProcessPrefetchData',
'PrefetchDataZMQ', 'PrefetchOnGPUs', 'MultiThreadPrefetchData']
'PrefetchDataZMQ', 'MultiThreadPrefetchData']
def _repeat_iter(get_itr):
......@@ -341,30 +339,6 @@ class PrefetchDataZMQ(_MultiProcessZMQDataFlow):
self._start_processes()
class PrefetchOnGPUs(PrefetchDataZMQ):
"""
Similar to :class:`PrefetchDataZMQ`,
but prefetch with each process having its own ``CUDA_VISIBLE_DEVICES`` variable
mapped to one GPU.
"""
def __init__(self, ds, gpus):
"""
Args:
ds (DataFlow): input DataFlow.
gpus (list[int]): list of GPUs to use. Will also start this number of processes.
"""
log_deprecated("PrefetchOnGPUs", "It does not seem useful, and please implement it yourself.", "2019-02-28")
self.gpus = gpus
super(PrefetchOnGPUs, self).__init__(ds, len(gpus))
def _start_processes(self):
with mask_sigint():
for gpu, proc in zip(self.gpus, self._procs):
with change_gpu(gpu):
proc.start()
# TODO renamed to MultiThreadDataFlow if separated to a new project
class MultiThreadPrefetchData(DataFlow):
"""
......
......@@ -96,19 +96,19 @@ class MultiThreadMapData(_ParallelMapData):
This is useful when the mapping function is the bottleneck, but you don't
want to start processes for the entire dataflow pipeline.
The semantics of this class is __identical__ to :class:`MapData` except for the ordering.
The semantics of this class is **identical** to :class:`MapData` except for the ordering.
Threads run in parallel and can take different time to run the
mapping function. Therefore the order of datapoints won't be preserved.
When `strict=True`, `MultiThreadMapData(df, ...)`
is guaranteed to produce the exact set of data as `MapData(df, ...)`,
if both are iterated until `StopIteration`. But the produced data will have different ordering.
The behavior of strict mode is undefined if the given dataflow `df` is infinite.
When ``strict=True``, ``MultiThreadMapData(df, ...)``
is guaranteed to produce the exact set of data as ``MapData(df, ...)``,
if both are iterated until ``StopIteration``. But the produced data will have different ordering.
The behavior of strict mode is undefined if the given dataflow ``df`` is infinite.
When `strict=False`, the data that's produced by `MultiThreadMapData(df, ...)`
is a reordering of the data produced by `RepeatedData(MapData(df, ...), -1)`.
In other words, first pass of `MultiThreadMapData.__iter__` may contain
datapoints from the second pass of `df.__iter__`.
When ``strict=False``, the data that's produced by ``MultiThreadMapData(df, ...)``
is a reordering of the data produced by ``RepeatedData(MapData(df, ...), -1)``.
In other words, first pass of ``MultiThreadMapData.__iter__`` may contain
datapoints from the second pass of ``df.__iter__``.
Note:
......@@ -212,19 +212,19 @@ class MultiProcessMapDataZMQ(_ParallelMapData, _MultiProcessZMQDataFlow):
Same as :class:`MapData`, but start processes to run the mapping function,
and communicate with ZeroMQ pipe.
The semantics of this class is __identical__ to :class:`MapData` except for the ordering.
The semantics of this class is **identical** to :class:`MapData` except for the ordering.
Processes run in parallel and can take different time to run the
mapping function. Therefore the order of datapoints won't be preserved.
When `strict=True`, `MultiProcessMapData(df, ...)`
is guaranteed to produce the exact set of data as `MapData(df, ...)`,
if both are iterated until `StopIteration`. But the produced data will have different ordering.
The behavior of strict mode is undefined if the given dataflow `df` is infinite.
When ``strict=True``, ``MultiProcessMapData(df, ...)``
is guaranteed to produce the exact set of data as ``MapData(df, ...)``,
if both are iterated until ``StopIteration``. But the produced data will have different ordering.
The behavior of strict mode is undefined if the given dataflow ``df`` is infinite.
When `strict=False`, the data that's produced by `MultiProcessMapData(df, ...)`
is a reordering of the data produced by `RepeatedData(MapData(df, ...), -1)`.
In other words, first pass of `MultiProcessMapData.__iter__` may contain
datapoints from the second pass of `df.__iter__`.
When ``strict=False``, the data that's produced by ``MultiProcessMapData(df, ...)``
is a reordering of the data produced by ``RepeatedData(MapData(df, ...), -1)``.
In other words, first pass of ``MultiProcessMapData.__iter__`` may contain
datapoints from the second pass of ``df.__iter__``.
"""
class _Worker(mp.Process):
def __init__(self, identity, map_func, pipename, hwm):
......
......@@ -31,7 +31,7 @@ class LMDBSerializer():
Serialize a Dataflow to a lmdb database, where the keys are indices and values
are serialized datapoints.
You will need to `pip install lmdb` to use it.
You will need to ``pip install lmdb`` to use it.
"""
@staticmethod
def save(df, path, write_frequency=5000):
......
......@@ -8,7 +8,6 @@ if STATICA_HACK:
from .model_desc import *
from .training import *
from .distributed import *
from .predict import *
from .utils import *
from pkgutil import iter_modules
......
# -*- coding: utf-8 -*-
# File: predict.py
import tensorflow as tf
from ..tfutils.tower import PredictTowerContext
from ..utils import logger
from ..utils.develop import deprecated
from .training import GraphBuilder
__all__ = ['SimplePredictBuilder']
class SimplePredictBuilder(GraphBuilder):
"""
Single-tower predictor.
"""
@deprecated("Please use TowerContext to build it by yourself!", "2018-12-31")
def __init__(self, ns_name='', vs_name='', device=0):
"""
Args:
ns_name (str):
vs_name (str):
device (int):
"""
self._ns_name = ns_name
self._vs_name = vs_name
device = '/gpu:{}'.format(device) if device >= 0 else '/cpu:0'
self._device = device
def build(self, input, tower_fn):
"""
Args:
input (InputSource): must have been setup
tower_fn ( [tf.Tensors] ->): callable that takes input tensors.
Returns:
The return value of tower_fn called under the proper context.
"""
assert input.setup_done()
logger.info("Building predictor tower '{}' on device {} ...".format(
self._ns_name, self._device))
with tf.device(self._device), \
PredictTowerContext(
self._ns_name, vs_name=self._vs_name):
inputs = input.get_input_tensors()
assert isinstance(inputs, (list, tuple)), inputs
return tower_fn(*inputs)
......@@ -11,8 +11,7 @@ from .common import layer_register
from .shape_utils import StaticDynamicShape
from .tflayer import convert_to_tflayer_args
__all__ = ['MaxPooling', 'FixedUnPooling', 'AvgPooling', 'GlobalAvgPooling',
'BilinearUpSample']
__all__ = ['MaxPooling', 'FixedUnPooling', 'AvgPooling', 'GlobalAvgPooling']
@layer_register(log_shape=True)
......@@ -141,7 +140,7 @@ def FixedUnPooling(x, shape, unpool_mat=None, data_format='channels_last'):
return ret
@layer_register(log_shape=True)
# Removed (not importable) already; leave it here just for testing purposes.
def BilinearUpSample(x, shape):
"""
Deterministic bilinearly-upsample the input images.
......@@ -158,7 +157,7 @@ def BilinearUpSample(x, shape):
log_deprecated("BilinearUpsample", "Please implement it in your own code instead!", "2019-03-01")
inp_shape = x.shape.as_list()
ch = inp_shape[3]
assert ch is not None
assert ch is not None and ch == 1
shape = int(shape)
filter_shape = 2 * shape
......@@ -222,7 +221,7 @@ class TestPool(TestModel):
inp = self.make_variable(mat)
inp = tf.reshape(inp, [1, h, w, 1])
output = BilinearUpSample('upsample', inp, scale)
output = BilinearUpSample(inp, scale)
res = self.run_variable(output)[0, :, :, 0]
from skimage.transform import rescale
......
......@@ -85,7 +85,7 @@ def enable_argscope_for_function(func, log_shape=True):
shape should be printed once.
Remarks:
If the function `func` returns multiple input or output tensors,
If the function ``func`` returns multiple input or output tensors,
only the first input/output tensor shape is displayed during logging.
Returns:
......@@ -126,7 +126,7 @@ def enable_argscope_for_module(module, log_shape=True):
Overwrite all functions of a given module to support argscope.
Note that this function monkey-patches the module and therefore could
have unexpected consequences.
It has been only tested to work well with `tf.layers` module.
It has been only tested to work well with ``tf.layers`` module.
Example:
......
......@@ -6,7 +6,6 @@ import tensorflow as tf
from six.moves import map
from ..utils.argtools import graph_memoized
from ..utils.develop import deprecated
__all__ = ['get_default_sess_config',
'get_global_step_value',
......@@ -25,8 +24,8 @@ def get_default_sess_config(mem_fraction=0.99):
Args:
mem_fraction(float): see the `per_process_gpu_memory_fraction` option
in TensorFlow's GPUOptions protobuf:
https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/protobuf/config.proto
in TensorFlow's GPUOptions protobuf:
https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/protobuf/config.proto
Returns:
tf.ConfigProto: the config to use.
......@@ -63,8 +62,7 @@ def get_default_sess_config(mem_fraction=0.99):
def get_global_step_var():
"""
Returns:
tf.Tensor: the global_step variable in the current graph. Create if
doesn't exist.
tf.Tensor: the global_step variable in the current graph. Create if doesn't exist.
"""
scope = tf.VariableScope(reuse=False, name='') # the root vs
with tf.variable_scope(scope):
......@@ -149,11 +147,6 @@ def gpu_available_in_session():
return False
@deprecated("Use get_tf_version_tuple instead.", "2019-01-31")
def get_tf_version_number():
return float('.'.join(tf.__version__.split('.')[:2]))
def get_tf_version_tuple():
"""
Return TensorFlow version as a 2-element tuple (for comparison).
......
......@@ -18,7 +18,7 @@ from ..tfutils.sesscreate import NewSessionCreator
from ..tfutils.tower import TrainTowerContext
from ..utils import logger
from ..utils.argtools import map_arg
from ..utils.develop import HIDE_DOC, log_deprecated
from ..utils.develop import HIDE_DOC
from .tower import SingleCostTrainer
__all__ = ['NoOpTrainer', 'SimpleTrainer',
......@@ -162,7 +162,7 @@ class SyncMultiGPUTrainerReplicated(SingleCostTrainer):
"""
@map_arg(gpus=_int_to_range)
def __init__(self, gpus, average=True, mode=None, use_nccl=None):
def __init__(self, gpus, average=True, mode=None):
"""
Args:
gpus (int or [int]): list of GPU ids.
......@@ -172,13 +172,9 @@ class SyncMultiGPUTrainerReplicated(SingleCostTrainer):
Default to pick automatically by heuristics.
These modes may have slight (within 5%) differences in speed.
"hierarchical" mode was designed for DGX-like 8GPU machines.
use_nccl: deprecated option
"""
self.devices = gpus
if use_nccl is not None:
mode = 'nccl' if use_nccl else None
log_deprecated("use_nccl option", "Use the `mode` option instead!", "2019-01-31")
if mode is None:
mode = 'hierarchical' if len(gpus) == 8 else 'nccl'
mode = mode.lower()
......
# -*- coding: utf-8 -*-
# File: rect.py
import numpy as np
from .develop import log_deprecated
__all__ = ['IntBox', 'FloatBox']
class BoxBase(object):
__slots__ = ['x1', 'y1', 'x2', 'y2']
def __init__(self, x1, y1, x2, y2):
log_deprecated("IntBox and FloatBox", "Please implement them by your own.", "2019-02-28")
self.x1 = x1
self.y1 = y1
self.x2 = x2
self.y2 = y2
def copy(self):
new = type(self)()
for i in self.__slots__:
setattr(new, i, getattr(self, i))
return new
def __str__(self):
return '{}(x1={}, y1={}, x2={}, y2={})'.format(
type(self).__name__, self.x1, self.y1, self.x2, self.y2)
__repr__ = __str__
def area(self):
return self.w * self.h
def is_box(self):
return self.w > 0 and self.h > 0
def to_list(self):
return [self.x1, self.y1, self.x2, self.y2]
class IntBox(BoxBase):
def __init__(self, x1, y1, x2, y2):
for k in [x1, y1, x2, y2]:
assert isinstance(k, int)
super(IntBox, self).__init__(x1, y1, x2, y2)
@property
def w(self):
return self.x2 - self.x1 + 1
@property
def h(self):
return self.y2 - self.y1 + 1
def is_valid_box(self, shape):
"""
Check that this rect is a valid bounding box within this shape.
Args:
shape: int [h, w] or None.
Returns:
bool
"""
if min(self.x1, self.y1) < 0:
return False
if min(self.w, self.h) <= 0:
return False
if self.x2 >= shape[1]:
return False
if self.y2 >= shape[0]:
return False
return True
def clip_by_shape(self, shape):
"""
Clip xs and ys to be valid coordinates inside shape
Args:
shape: int [h, w] or None.
"""
self.x1 = np.clip(self.x1, 0, shape[1] - 1)
self.x2 = np.clip(self.x2, 0, shape[1] - 1)
self.y1 = np.clip(self.y1, 0, shape[0] - 1)
self.y2 = np.clip(self.y2, 0, shape[0] - 1)
def roi(self, img):
assert self.is_valid_box(img.shape[:2]), "{} vs {}".format(self, img.shape[:2])
return img[self.y1:self.y2 + 1, self.x1:self.x2 + 1]
class FloatBox(BoxBase):
def __init__(self, x1, y1, x2, y2):
for k in [x1, y1, x2, y2]:
assert isinstance(k, float), "type={},value={}".format(type(k), k)
super(FloatBox, self).__init__(x1, y1, x2, y2)
@property
def w(self):
return self.x2 - self.x1
@property
def h(self):
return self.y2 - self.y1
@staticmethod
def from_intbox(intbox):
return FloatBox(intbox.x1, intbox.y1,
intbox.x2 + 1, intbox.y2 + 1)
def clip_by_shape(self, shape):
self.x1 = np.clip(self.x1, 0, shape[1])
self.x2 = np.clip(self.x2, 0, shape[1])
self.y1 = np.clip(self.y1, 0, shape[0])
self.y2 = np.clip(self.y2, 0, shape[0])
if __name__ == '__main__':
x = IntBox(2, 1, 3, 3)
img = np.random.rand(3, 3)
print(img)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment