Commit 399a74df authored by Yuxin Wu's avatar Yuxin Wu

update sphinx==3.0

parent 031e698d
...@@ -15,6 +15,33 @@ ...@@ -15,6 +15,33 @@
import sys, os, re import sys, os, re
import mock import mock
import inspect import inspect
from sphinx.domains import Domain
class GithubURLDomain(Domain):
"""
Resolve certain links in markdown files to github source.
"""
name = "githuburl"
ROOT = "https://github.com/tensorpack/tensorpack/blob/master/"
def resolve_any_xref(self, env, fromdocname, builder, target, node, contnode):
github_url = None
if ".html" not in target:
if target.startswith("../../") and not target.startswith("../../modules"):
url = target.replace("../", "")
github_url = url
if github_url is not None:
if github_url.endswith("README"):
# bug of recommonmark.
# https://github.com/readthedocs/recommonmark/blob/ddd56e7717e9745f11300059e4268e204138a6b1/recommonmark/parser.py#L152-L155
github_url += ".md"
print(f"Ref {target} resolved to github:{github_url}")
contnode["refuri"] = self.ROOT + github_url
return [("githuburl:any", contnode)]
else:
return []
# If extensions (or modules to document with autodoc) are in another directory, # If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the # add these directories to sys.path here. If the directory is relative to the
...@@ -43,6 +70,9 @@ except ImportError: ...@@ -43,6 +70,9 @@ except ImportError:
MOCK_MODULES.extend(['tensorflow.python.training.monitored_session']) MOCK_MODULES.extend(['tensorflow.python.training.monitored_session'])
MOCK_MODULES.extend(['tensorflow.python.training']) MOCK_MODULES.extend(['tensorflow.python.training'])
MOCK_MODULES.extend(['tensorflow.python.client']) MOCK_MODULES.extend(['tensorflow.python.client'])
MOCK_MODULES.extend(['tensorflow.python.framework'])
MOCK_MODULES.extend(['tensorflow.python.platform'])
MOCK_MODULES.extend(['tensorflow.python.tools'])
MOCK_MODULES.extend(['tensorflow.contrib.graph_editor']) MOCK_MODULES.extend(['tensorflow.contrib.graph_editor'])
for mod_name in MOCK_MODULES: for mod_name in MOCK_MODULES:
...@@ -55,12 +85,13 @@ import tensorpack ...@@ -55,12 +85,13 @@ import tensorpack
# -- General configuration ------------------------------------------------ # -- General configuration ------------------------------------------------
# If your documentation needs a minimal Sphinx version, state it here. # If your documentation needs a minimal Sphinx version, state it here.
needs_sphinx = '1.4' needs_sphinx = '3.0'
# Add any Sphinx extension module names here, as strings. They can be # Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones. # ones.
extensions = [ extensions = [
'recommonmark',
'sphinx.ext.autodoc', 'sphinx.ext.autodoc',
'sphinx.ext.todo', 'sphinx.ext.todo',
'sphinx.ext.napoleon', 'sphinx.ext.napoleon',
...@@ -92,11 +123,6 @@ intersphinx_mapping = { ...@@ -92,11 +123,6 @@ intersphinx_mapping = {
# Add any paths that contain templates here, relative to this directory. # Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates'] templates_path = ['_templates']
# to support markdown
from recommonmark.parser import CommonMarkParser
source_parsers = {
'.md': CommonMarkParser,
}
# The suffix(es) of source filenames. # The suffix(es) of source filenames.
# You can specify multiple suffix as a list of string: # You can specify multiple suffix as a list of string:
source_suffix = ['.rst', '.md'] source_suffix = ['.rst', '.md']
...@@ -109,7 +135,7 @@ master_doc = 'index' ...@@ -109,7 +135,7 @@ master_doc = 'index'
# General information about the project. # General information about the project.
project = u'tensorpack' project = u'tensorpack'
copyright = u'2015 - 2019, Yuxin Wu, et al.' copyright = u'2015 - 2020, Yuxin Wu, et al.'
author = u'Yuxin Wu, et al.' author = u'Yuxin Wu, et al.'
# The version info for the project you're documenting, acts as replacement for # The version info for the project you're documenting, acts as replacement for
...@@ -430,23 +456,14 @@ def autodoc_skip_member(app, what, name, obj, skip, options): ...@@ -430,23 +456,14 @@ def autodoc_skip_member(app, what, name, obj, skip, options):
return True return True
return None return None
def url_resolver(url):
if '.html' not in url:
return "https://github.com/tensorpack/tensorpack/blob/master/" + url
else:
if ON_RTD:
return "http://tensorpack.readthedocs.io/" + url
else:
return '/' + url
def setup(app): def setup(app):
from recommonmark.transform import AutoStructify from recommonmark.transform import AutoStructify
app.add_domain(GithubURLDomain)
app.connect('autodoc-process-signature', process_signature) app.connect('autodoc-process-signature', process_signature)
app.connect('autodoc-skip-member', autodoc_skip_member) app.connect('autodoc-skip-member', autodoc_skip_member)
app.add_config_value( app.add_config_value(
'recommonmark_config', 'recommonmark_config',
{'url_resolver': url_resolver, {'auto_toc_tree_section': 'Contents',
'auto_toc_tree_section': 'Contents',
'enable_math': True, 'enable_math': True,
'enable_inline_math': True, 'enable_inline_math': True,
'enable_eval_rst': True 'enable_eval_rst': True
......
termcolor termcolor
numpy numpy
tqdm tqdm
docutils>=0.14 docutils==0.16
Sphinx>=1.6 Sphinx==3.0.0
recommonmark==0.4.0 recommonmark==0.6.0
sphinx_rtd_theme sphinx_rtd_theme
mock mock
matplotlib matplotlib
......
...@@ -80,7 +80,7 @@ These features are not always necessary, but think about how messy the main loop ...@@ -80,7 +80,7 @@ These features are not always necessary, but think about how messy the main loop
were to write these logic together with the loops, and how easy your life will be if you could enable were to write these logic together with the loops, and how easy your life will be if you could enable
these features with just one line when you need them. these features with just one line when you need them.
See [list of callbacks](../modules/callbacks.html) See [list of callbacks](../modules/callbacks)
for a long list of tensorpack builtin callbacks. for a long list of tensorpack builtin callbacks.
See [Write a callback](extend/callback.html) See [Write a callback](extend/callback.md)
for details on how callbacks work, what they can do, and how to write them. for details on how callbacks work, what they can do, and how to write them.
...@@ -21,14 +21,14 @@ You can simply use DataFlow as a data processing pipeline and plug it into your ...@@ -21,14 +21,14 @@ You can simply use DataFlow as a data processing pipeline and plug it into your
### Load Raw Data ### Load Raw Data
We do not make any assumptions about your data format. We do not make any assumptions about your data format.
You would usually want to write the source DataFlow (`MyDataFlow` in the example below) for your own data format. You would usually want to write the source DataFlow (`MyDataFlow` in the example below) for your own data format.
See [another tutorial](extend/dataflow.html) for simple instructions on writing a DataFlow. See [another tutorial](extend/dataflow.md) for simple instructions on writing a DataFlow.
### Assemble the Pipeline ### Assemble the Pipeline
There are a lot of existing DataFlow utilities in tensorpack, which you can use to assemble There are a lot of existing DataFlow utilities in tensorpack, which you can use to assemble
the source DataFlow with complex data pipeline. the source DataFlow with complex data pipeline.
A common pipeline usually would A common pipeline usually would
__read from disk (or other sources), __read from disk (or other sources),
apply transformations, apply transformations,
group into batches, prefetch data__, etc, and all __run in parallel__. group into batches, prefetch data__, etc, and all __run in parallel__.
A simple DataFlow pipeline is like the following: A simple DataFlow pipeline is like the following:
...@@ -43,8 +43,8 @@ df = BatchData(df, 128) ...@@ -43,8 +43,8 @@ df = BatchData(df, 128)
df = MultiProcessRunnerZMQ(df, 3) df = MultiProcessRunnerZMQ(df, 3)
```` ````
A list of built-in DataFlow to use can be found at [API docs](../modules/dataflow.html). A list of built-in DataFlow to use can be found at [API docs](../modules/dataflow).
You can also find complicated real-life DataFlow pipelines in the [ImageNet training script](../examples/ImageNetModels/imagenet_utils.py) You can also find complicated real-life DataFlow pipelines in the [ImageNet training script](../../examples/ImageNetModels/imagenet_utils.py)
or other tensorpack examples. or other tensorpack examples.
### Parallelize the Pipeline ### Parallelize the Pipeline
...@@ -52,10 +52,10 @@ or other tensorpack examples. ...@@ -52,10 +52,10 @@ or other tensorpack examples.
DataFlow includes **carefully optimized** parallel runners and parallel mappers: `Multi{Thread,Process}{Runner,MapData}`. DataFlow includes **carefully optimized** parallel runners and parallel mappers: `Multi{Thread,Process}{Runner,MapData}`.
Runners execute multiple clones of a dataflow in parallel. Runners execute multiple clones of a dataflow in parallel.
Mappers execute a mapping function in parallel on top of an existing dataflow. Mappers execute a mapping function in parallel on top of an existing dataflow.
You can find details in the [API docs](../modules/dataflow.html) under the You can find details in the [API docs](../modules/dataflow) under the
"parallel" and "parallel_map" section. "parallel" and "parallel_map" section.
[Parallel DataFlow tutorial](parallel-dataflow.html) gives a deeper dive [Parallel DataFlow tutorial](parallel-dataflow.md) gives a deeper dive
on how to use them to optimize your data pipeline. on how to use them to optimize your data pipeline.
### Run the DataFlow ### Run the DataFlow
...@@ -77,6 +77,6 @@ for dp in df: ...@@ -77,6 +77,6 @@ for dp in df:
### Why DataFlow? ### Why DataFlow?
It's **easy and fast**. It's **easy and fast**.
For more discussions, see [Why DataFlow?](/tutorial/philosophy/dataflow.html) For more discussions, see [Why DataFlow?](./philosophy/dataflow.md)
Nevertheless, using DataFlow is not required in tensorpack. Nevertheless, using DataFlow is not required in tensorpack.
Tensorpack supports data loading with native TF operators / TF datasets as well. Tensorpack supports data loading with native TF operators / TF datasets as well.
...@@ -10,31 +10,31 @@ or your own code as well. ...@@ -10,31 +10,31 @@ or your own code as well.
**What we are going to do**: We'll use ILSVRC12 dataset, which contains 1.28 million images. **What we are going to do**: We'll use ILSVRC12 dataset, which contains 1.28 million images.
The original images (JPEG compressed) are 140G in total. The original images (JPEG compressed) are 140G in total.
The average resolution is about 400x350 <sup>[[1]]</sup>. The average resolution is about 400x350 <sup>[[1]]</sup>.
Following the [ResNet example](../examples/ResNet), we need images in their original resolution, Following the [ResNet example](../../examples/ResNet), we need images in their original resolution,
so we will read the original dataset (instead of a down-sampled version), and so we will read the original dataset (instead of a down-sampled version), and
then apply complicated preprocessing to it. then apply complicated preprocessing to it.
We hope to reach a speed of **1k~5k images per second**, to keep GPUs busy. We hope to reach a speed of **1k~5k images per second**, to keep GPUs busy.
Some things to know before reading: Some things to know before reading:
1. You are recommended to read the [Parallel DataFlow Tutorial](parallel-dataflow.html) first. 1. You are recommended to read the [Parallel DataFlow Tutorial](./parallel-dataflow.md) first.
1. You only need the data loader to be **fast enough, but not faster**. 1. You only need the data loader to be **fast enough, but not faster**.
See [How Fast Do You Actually Need](philosophy/dataflow.html#how-fast-do-you-actually-need) for details. See [How Fast Do You Actually Need](philosophy/dataflow.html#how-fast-do-you-actually-need) for details.
For smaller datasets (e.g. several GBs of images with lightweight preprocessing), For smaller datasets (e.g. several GBs of images with lightweight preprocessing),
a simple reader plus some multiprocess runner is usually fast enough. a simple reader plus some multiprocess runner is usually fast enough.
1. Having a fast Python generator **alone** may or may not improve your overall training speed. 1. Having a fast Python generator **alone** may or may not improve your overall training speed.
You need mechanisms to hide the latency of **all** preprocessing stages, as mentioned in the You need mechanisms to hide the latency of **all** preprocessing stages, as mentioned in the
[InputSource tutorial](extend/input-source.html). [InputSource tutorial](./extend/input-source.md).
1. Reading training set and validation set are different. 1. Reading training set and validation set are different.
In training it's OK to reorder, regroup, or even duplicate some datapoints, as long as the In training it's OK to reorder, regroup, or even duplicate some datapoints, as long as the
data distribution stays the same. data distribution stays the same.
But in validation we often need the exact set of data, to be able to compute a correct and comparable score. But in validation we often need the exact set of data, to be able to compute a correct and comparable score.
This will affect how we build the DataFlow. This will affect how we build the DataFlow.
1. The actual performance would depend on not only the disk, but also memory (for caching) and CPU (for data processing). 1. The actual performance would depend on not only the disk, but also memory (for caching) and CPU (for data processing).
You may need to tune the parameters (#processes, #threads, size of buffer, etc.) You may need to tune the parameters (#processes, #threads, size of buffer, etc.)
or change the pipeline for new tasks and new machines to achieve the best performance. or change the pipeline for new tasks and new machines to achieve the best performance.
The solutions in this tutorial may not help you. The solutions in this tutorial may not help you.
To improve your own DataFlow, read the To improve your own DataFlow, read the
[performance tuning tutorial](performance-tuning.html#investigate-dataflow) [performance tuning tutorial](performance-tuning.html#investigate-dataflow)
before performing or asking about any actual optimizations. before performing or asking about any actual optimizations.
...@@ -62,7 +62,7 @@ for filename, label in np.random.shuffle(filelist): ...@@ -62,7 +62,7 @@ for filename, label in np.random.shuffle(filelist):
``` ```
And `ds1` batch the datapoints from `ds0`, so that we can measure the speed of this DataFlow in terms of "batches per second". And `ds1` batch the datapoints from `ds0`, so that we can measure the speed of this DataFlow in terms of "batches per second".
By default, `BatchData` should stack the datapoints into an `numpy.ndarray`, By default, `BatchData` should stack the datapoints into an `numpy.ndarray`,
but since original ImageNet images are of different shapes, we use but since original ImageNet images are of different shapes, we use
`use_list=True** so that it produces lists for now. `use_list=True** so that it produces lists for now.
...@@ -75,45 +75,50 @@ Image decoding in `cv2.imread` may also be a bottleneck at this early stage, sin ...@@ -75,45 +75,50 @@ Image decoding in `cv2.imread` may also be a bottleneck at this early stage, sin
We will now add the cheapest pre-processing now to get an ndarray in the end instead of a list We will now add the cheapest pre-processing now to get an ndarray in the end instead of a list
(because training will need ndarray eventually): (because training will need ndarray eventually):
```eval_rst ```eval_rst
.. code-block:: python .. code-block:: python
:emphasize-lines: 2,3 :emphasize-lines: 2,3
ds = dataset.ILSVRC12('/path/to/ILSVRC12', 'train', shuffle=True)
ds = AugmentImageComponent(ds, [imgaug.Resize(224)])
ds = BatchData(ds, 256)
ds = dataset.ILSVRC12('/path/to/ILSVRC12', 'train', shuffle=True)
ds = AugmentImageComponent(ds, [imgaug.Resize(224)])
ds = BatchData(ds, 256)
``` ```
You'll start to observe slow down after adding more pre-processing (such as those in the [ResNet example](../examples/ImageNetModels/imagenet_utils.py)).
You'll start to observe slow down after adding more pre-processing (such as those in the [ResNet example](../../examples/ImageNetModels/imagenet_utils.py)).
Now it's time to add threads or processes: Now it's time to add threads or processes:
```eval_rst ```eval_rst
.. code-block:: python .. code-block:: python
:emphasize-lines: 3 :emphasize-lines: 3
ds0 = dataset.ILSVRC12('/path/to/ILSVRC12', 'train', shuffle=True) ds0 = dataset.ILSVRC12('/path/to/ILSVRC12', 'train', shuffle=True)
ds1 = AugmentImageComponent(ds0, lots_of_augmentors) ds1 = AugmentImageComponent(ds0, lots_of_augmentors)
ds = MultiProcessRunnerZMQ(ds1, num_proc=25) ds = MultiProcessRunnerZMQ(ds1, num_proc=25)
ds = BatchData(ds, 256) ds = BatchData(ds, 256)
``` ```
Here we fork 25 processes to run `ds1`, and collect their output through ZMQ IPC protocol. Here we fork 25 processes to run `ds1`, and collect their output through ZMQ IPC protocol.
You can also apply parallel runner after batching, of course. You can also apply parallel runner after batching, of course.
### Parallel Map ### Parallel Map
The above DataFlow might be fast, but since it forks the ImageNet reader (`ds0`), The above DataFlow might be fast, but since it forks the ImageNet reader (`ds0`),
it's **not a good idea to use it for validation** (for reasons mentioned at top. it's **not a good idea to use it for validation** (for reasons mentioned at top.
More details at the [Parallel DataFlow Tutorial](parallel-dataflow) and the [documentation](../modules/dataflow.html#tensorpack.dataflow.MultiProcessRunnerZMQ)). More details at the [Parallel DataFlow Tutorial](./parallel-dataflow.md) and the [documentation](../modules/dataflow.html#tensorpack.dataflow.MultiProcessRunnerZMQ)).
Alternatively, you can use parallel mapper like this: Alternatively, you can use parallel mapper like this:
```eval_rst ```eval_rst
.. code-block:: python .. code-block:: python
:emphasize-lines: 3-6 :emphasize-lines: 3-6
ds0 = dataset.ILSVRC12('/path/to/ILSVRC12', 'train', shuffle=True) ds0 = dataset.ILSVRC12('/path/to/ILSVRC12', 'train', shuffle=True)
augmentor = AugmentorList(lots_of_augmentors) augmentor = AugmentorList(lots_of_augmentors)
ds1 = MultiThreadMapData( ds1 = MultiThreadMapData(
ds0, num_thread=25, ds0, num_thread=25,
map_func=lambda dp: [augmentor.augment(dp[0]), dp[1]], buffer_size=1000) map_func=lambda dp: [augmentor.augment(dp[0]), dp[1]], buffer_size=1000)
# ds1 = MultiProcessRunnerZMQ(ds1, num_proc=1) # ds1 = MultiProcessRunnerZMQ(ds1, num_proc=1)
ds = BatchData(ds1, 256) ds = BatchData(ds1, 256)
``` ```
`MultiThreadMapData` launches a thread pool to fetch data and apply the mapping function on **a single `MultiThreadMapData` launches a thread pool to fetch data and apply the mapping function on **a single
instance of** `ds0`. This is done by an intermediate buffer of size 1000 to hide the mapping latency. instance of** `ds0`. This is done by an intermediate buffer of size 1000 to hide the mapping latency.
...@@ -130,17 +135,17 @@ If you identify this as a bottleneck, you can also use: ...@@ -130,17 +135,17 @@ If you identify this as a bottleneck, you can also use:
```eval_rst ```eval_rst
.. code-block:: python .. code-block:: python
:emphasize-lines: 5-6 :emphasize-lines: 5-6
ds0 = dataset.ILSVRC12Files('/path/to/ILSVRC12', 'train', shuffle=True) ds0 = dataset.ILSVRC12Files('/path/to/ILSVRC12', 'train', shuffle=True)
augmentor = AugmentorList(lots_of_augmentors) augmentor = AugmentorList(lots_of_augmentors)
ds1 = MultiThreadMapData( ds1 = MultiThreadMapData(
ds0, num_thread=25, ds0, num_thread=25,
map_func=lambda dp: map_func=lambda dp:
[augmentor.augment(cv2.imread(dp[0], cv2.IMREAD_COLOR)), dp[1]], [augmentor.augment(cv2.imread(dp[0], cv2.IMREAD_COLOR)), dp[1]],
buffer_size=1000) buffer_size=1000)
ds1 = MultiProcessRunnerZMQ(ds1, num_proc=1) ds1 = MultiProcessRunnerZMQ(ds1, num_proc=1)
ds = BatchData(ds1, 256) ds = BatchData(ds1, 256)
``` ```
Let's summarize what the above dataflow does: Let's summarize what the above dataflow does:
...@@ -190,11 +195,11 @@ As a reference, on Samsung SSD 850, the uncached speed is about 16it/s. ...@@ -190,11 +195,11 @@ As a reference, on Samsung SSD 850, the uncached speed is about 16it/s.
```eval_rst ```eval_rst
.. code-block:: python .. code-block:: python
:emphasize-lines: 2 :emphasize-lines: 2
ds = LMDBSerializer.load('/path/to/ILSVRC-train.lmdb', shuffle=False) ds = LMDBSerializer.load('/path/to/ILSVRC-train.lmdb', shuffle=False)
ds = LocallyShuffleData(ds, 50000) ds = LocallyShuffleData(ds, 50000)
ds = BatchData(ds, 256, use_list=True) ds = BatchData(ds, 256, use_list=True)
``` ```
Instead of shuffling all the training data in every epoch (which would require random read), Instead of shuffling all the training data in every epoch (which would require random read),
the added line above maintains a buffer of datapoints and shuffle them once a while. the added line above maintains a buffer of datapoints and shuffle them once a while.
...@@ -237,7 +242,7 @@ Let me summarize what this DataFlow does: ...@@ -237,7 +242,7 @@ Let me summarize what this DataFlow does:
1. One process reads LMDB file, shuffle them in a buffer and put them into a ZMQ pipe (used by `MultiProcessMapDataZMQ`). 1. One process reads LMDB file, shuffle them in a buffer and put them into a ZMQ pipe (used by `MultiProcessMapDataZMQ`).
2. 25 processes take items from the queue, decode and process them into [image, label] pairs, and 2. 25 processes take items from the queue, decode and process them into [image, label] pairs, and
send them through ZMQ IPC pipe to the main process. send them through ZMQ IPC pipe to the main process.
3. The main process takes data from the pipe, makes batches. 3. The main process takes data from the pipe, makes batches.
The two DataFlow mentioned in this tutorial (both random read and sequential read) can run at a speed of 1k ~ 5k images per second, The two DataFlow mentioned in this tutorial (both random read and sequential read) can run at a speed of 1k ~ 5k images per second,
...@@ -275,7 +280,7 @@ TestDataSpeed(df).start() ...@@ -275,7 +280,7 @@ TestDataSpeed(df).start()
## Common Issues on Windows: ## Common Issues on Windows:
1. Windows does not support IPC protocol of ZMQ. You can only use `MultiProcessRunner`, 1. Windows does not support IPC protocol of ZMQ. You can only use `MultiProcessRunner`,
`MultiThreadRunner`, and `MultiThreadMapData`. But you cannot use `MultiThreadRunner`, and `MultiThreadMapData`. But you cannot use
`MultiProcessRunnerZMQ` or `MultiProcessMapData` (which is an alias of `MultiProcessMapDataZMQ`). `MultiProcessRunnerZMQ` or `MultiProcessMapData` (which is an alias of `MultiProcessMapDataZMQ`).
2. Windows needs to pickle your dataflow to run it in multiple processes. 2. Windows needs to pickle your dataflow to run it in multiple processes.
As a result you cannot use lambda functions for mappings, like the examples above. As a result you cannot use lambda functions for mappings, like the examples above.
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
The first thing to note: __you never have to write an augmentor__. The first thing to note: __you never have to write an augmentor__.
An augmentor is a part of the DataFlow, so you can always An augmentor is a part of the DataFlow, so you can always
[write a DataFlow](dataflow.html) [write a DataFlow](./dataflow.md)
to do whatever operations to your data, rather than writing an augmentor. to do whatever operations to your data, rather than writing an augmentor.
Augmentor makes things easier when what you want fits its design. Augmentor makes things easier when what you want fits its design.
...@@ -28,7 +28,7 @@ To do custom augmentation, you can implement one yourself. ...@@ -28,7 +28,7 @@ To do custom augmentation, you can implement one yourself.
#### The Design of imgaug Module #### The Design of imgaug Module
The [imgaug module](../../modules/dataflow.imgaug.html) is designed to allow the following usage: The [imgaug module](../../modules/dataflow.imgaug) is designed to allow the following usage:
* Factor out randomness and determinism. * Factor out randomness and determinism.
An augmentor often contains randomized policy, e.g., it randomly perturbs each image differently. An augmentor often contains randomized policy, e.g., it randomly perturbs each image differently.
...@@ -43,13 +43,13 @@ new_img2 = tfm.apply_image(img2) ...@@ -43,13 +43,13 @@ new_img2 = tfm.apply_image(img2)
new_coords = tfm.apply_coords(coords) new_coords = tfm.apply_coords(coords)
``` ```
Due to this design, it can augment images together with its annotations Due to this design, it can augment images together with its annotations
(e.g., segmentation masks, bounding boxes, keypoints). (e.g., segmentation masks, bounding boxes, keypoints).
Our coordinate augmentation enforces floating points coordinates Our coordinate augmentation enforces floating points coordinates
to avoid quantization error. to avoid quantization error.
When you don't need to re-apply the same transformation, you can also just call When you don't need to re-apply the same transformation, you can also just call
```python ```python
new_img = augmentor.augment(img) new_img = augmentor.augment(img)
``` ```
...@@ -57,8 +57,8 @@ new_img = augmentor.augment(img) ...@@ -57,8 +57,8 @@ new_img = augmentor.augment(img)
* Reset random seed. Random seed can be reset by * Reset random seed. Random seed can be reset by
[reset_state](../../modules/dataflow.imgaug.html#tensorpack.dataflow.imgaug.ImageAugmentor.reset_state). [reset_state](../../modules/dataflow.imgaug.html#tensorpack.dataflow.imgaug.ImageAugmentor.reset_state).
This is important for multi-process data loading, to make sure different This is important for multi-process data loading, to make sure different
processes get different seeds. processes get different seeds.
The reset method is called automatically if you use tensorpack's The reset method is called automatically if you use tensorpack's
[image augmentation dataflow](../../modules/dataflow.html#tensorpack.dataflow.AugmentImageComponent) [image augmentation dataflow](../../modules/dataflow.html#tensorpack.dataflow.AugmentImageComponent)
or if you use Python 3.7+. or if you use Python 3.7+.
Otherwise, **you are responsible** for calling it by yourself in subprocesses. Otherwise, **you are responsible** for calling it by yourself in subprocesses.
...@@ -77,7 +77,7 @@ class MyAug(imgaug.ImageAugmentor): ...@@ -77,7 +77,7 @@ class MyAug(imgaug.ImageAugmentor):
# Randomly generate a deterministic transformation, to be applied on img # Randomly generate a deterministic transformation, to be applied on img
x = random_parameters() x = random_parameters()
return MyTransform(x) return MyTransform(x)
class MyTransform(imgaug.Transform): class MyTransform(imgaug.Transform):
def apply_image(self, img): def apply_image(self, img):
return new_img return new_img
......
...@@ -80,11 +80,11 @@ You can overwrite any of the following methods in the new callback: ...@@ -80,11 +80,11 @@ You can overwrite any of the following methods in the new callback:
``` ```
The training loops would become equivalent to `sess.run([training_op, my_op])`. The training loops would become equivalent to `sess.run([training_op, my_op])`.
However, if you write `my_op.run()` in `_trigger_step`, the training loop would become However, if you write `my_op.run()` in `_trigger_step`, the training loop would become
`sess.run(training_op); sess.run(my_op);`. `sess.run(training_op); sess.run(my_op);`.
Usually the difference matters, please choose carefully. Usually the difference matters, please choose carefully.
If you want to run ops that depend on your inputs, it's better to run it If you want to run ops that depend on your inputs, it's better to run it
__along with__ the training iteration, to avoid wasting a datapoint and avoid __along with__ the training iteration, to avoid wasting a datapoint and avoid
messing up hooks of the `InputSource`. messing up hooks of the `InputSource`.
...@@ -114,7 +114,7 @@ You can overwrite any of the following methods in the new callback: ...@@ -114,7 +114,7 @@ You can overwrite any of the following methods in the new callback:
* Access the current graph and session by `self.trainer.graph` and * Access the current graph and session by `self.trainer.graph` and
`self.trainer.sess`, `self.trainer.hooked_sess`. `self.trainer.sess`, `self.trainer.hooked_sess`.
Note that calling `(hooked_)sess.run` to evaluate tensors may have unexpected Note that calling `(hooked_)sess.run` to evaluate tensors may have unexpected
effect in certain scenarios. effect in certain scenarios.
In general, use `sess.run` to evaluate tensors that do not depend on the inputs. In general, use `sess.run` to evaluate tensors that do not depend on the inputs.
And use `_{before,after}_run` to evaluate tensors together with inputs if the And use `_{before,after}_run` to evaluate tensors together with inputs if the
tensors depend on the inputs. tensors depend on the inputs.
...@@ -141,5 +141,5 @@ You can overwrite any of the following methods in the new callback: ...@@ -141,5 +141,5 @@ You can overwrite any of the following methods in the new callback:
### Examples ### Examples
Check source code of the [existing tensorpack callbacks](../../modules/callbacks.html). Check source code of the [existing tensorpack callbacks](../../modules/callbacks.md).
Or grep 'Callback' in tensorpack examples for those implemented as extensions. Or grep 'Callback' in tensorpack examples for those implemented as extensions.
...@@ -32,21 +32,21 @@ class MyDataFlow(DataFlow): ...@@ -32,21 +32,21 @@ class MyDataFlow(DataFlow):
digit = np.random.rand(28, 28) digit = np.random.rand(28, 28)
label = np.random.randint(10) label = np.random.randint(10)
yield [digit, label] yield [digit, label]
df = MyDataFlow() df = MyDataFlow()
df.reset_state() df.reset_state()
for datapoint in df: for datapoint in df:
print(datapoint[0], datapoint[1]) print(datapoint[0], datapoint[1])
``` ```
Optionally, you can implement the `__len__` and `reset_state` method. Optionally, you can implement the `__len__` and `reset_state` method.
The detailed semantics of these three methods are explained The detailed semantics of these three methods are explained
in the [API documentation](../../modules/dataflow.html#tensorpack.dataflow.DataFlow). in the [API documentation](../../modules/dataflow.html#tensorpack.dataflow.DataFlow).
If you're writing a complicated DataFlow, make sure to read the API documentation If you're writing a complicated DataFlow, make sure to read the API documentation
for the semantics. for the semantics.
DataFlow implementations for several well-known datasets are provided in the DataFlow implementations for several well-known datasets are provided in the
[dataflow.dataset](../../modules/dataflow.dataset.html) [dataflow.dataset](../../modules/dataflow.dataset)
module. You can take them as examples. module. You can take them as examples.
#### More Data Processing #### More Data Processing
...@@ -58,7 +58,7 @@ processing on top of the source DataFlow, e.g.: ...@@ -58,7 +58,7 @@ processing on top of the source DataFlow, e.g.:
class ProcessingDataFlow(DataFlow): class ProcessingDataFlow(DataFlow):
def __init__(self, ds): def __init__(self, ds):
self.ds = ds self.ds = ds
def reset_state(self): def reset_state(self):
self.ds.reset_state() self.ds.reset_state()
...@@ -69,6 +69,6 @@ class ProcessingDataFlow(DataFlow): ...@@ -69,6 +69,6 @@ class ProcessingDataFlow(DataFlow):
``` ```
Some built-in dataflows, e.g. Some built-in dataflows, e.g.
[MapData](../../modules/dataflow.html#tensorpack.dataflow.MapData) and [MapData](../../modules/dataflow.html#tensorpack.dataflow.MapData) and
[MapDataComponent](../../modules/dataflow.html#tensorpack.dataflow.MapDataComponent) [MapDataComponent](../../modules/dataflow.html#tensorpack.dataflow.MapDataComponent)
can do common types of data processing for you. can do common types of data processing for you.
...@@ -39,7 +39,7 @@ This is one of the reasons why tensorpack is [faster](https://github.com/tensorp ...@@ -39,7 +39,7 @@ This is one of the reasons why tensorpack is [faster](https://github.com/tensorp
The above discussion is valid regardless of what you use to load/preprocess data, The above discussion is valid regardless of what you use to load/preprocess data,
either Python code or TensorFlow operators, or a mix of two. either Python code or TensorFlow operators, or a mix of two.
Both are supported in tensorpack, while we recommend using Python. Both are supported in tensorpack, while we recommend using Python.
See more discussions at [Why DataFlow?](/tutorial/philosophy/dataflow.html) See more discussions at [Why DataFlow?](../philosophy/dataflow.md)
## InputSource ## InputSource
...@@ -75,7 +75,7 @@ Refer to the documentation of these `InputSource` for more details. ...@@ -75,7 +75,7 @@ Refer to the documentation of these `InputSource` for more details.
`tensorpack.dataflow` is a pure Python library for efficient data loading which can be used `tensorpack.dataflow` is a pure Python library for efficient data loading which can be used
independently without TensorFlow or tensorpack trainers. independently without TensorFlow or tensorpack trainers.
However, the `InputSource` interface does require tensorpack and cannot be However, the `InputSource` interface does require tensorpack and cannot be
used without tensorpack trainers. used without tensorpack trainers.
Without tensorpack trainers, you'll have to optimize the copy latency by yourself. Without tensorpack trainers, you'll have to optimize the copy latency by yourself.
......
...@@ -7,7 +7,7 @@ Tensorpack provides some trainer implementations for such tasks. ...@@ -7,7 +7,7 @@ Tensorpack provides some trainer implementations for such tasks.
These trainers will take care help you define the graph, with the following arguments: These trainers will take care help you define the graph, with the following arguments:
1. Some `tf.TensorSpec`, the signature of the input. 1. Some `tf.TensorSpec`, the signature of the input.
2. An `InputSource`, where the input come from. See [Input Pipeline](input-source.html). 2. An `InputSource`, where the input come from. See [Input Pipeline](./input-source.md).
3. A function which takes input tensors and returns the cost. 3. A function which takes input tensors and returns the cost.
4. A function which returns an optimizer. 4. A function which returns an optimizer.
...@@ -114,5 +114,5 @@ You will need to do two things for a new Trainer: ...@@ -114,5 +114,5 @@ You will need to do two things for a new Trainer:
(global steps, StagingArea, summaries) are maintained through `before_run`/`after_run`. (global steps, StagingArea, summaries) are maintained through `before_run`/`after_run`.
If you want to write a new trainer, If you want to write a new trainer,
Tensorpack examples include several different Tensorpack examples include several different
[GAN trainers](../../examples/GAN/GAN.py) for a reference. [GAN trainers](../../examples/GAN/GAN.py) for a reference.
...@@ -34,7 +34,7 @@ Then it is a good time to open an issue. ...@@ -34,7 +34,7 @@ Then it is a good time to open an issue.
3. The [ProgressBar](../modules/callbacks.html#tensorpack.callbacks.ProgressBar) 3. The [ProgressBar](../modules/callbacks.html#tensorpack.callbacks.ProgressBar)
callback can print some scalar statistics, though not enabled by default. callback can print some scalar statistics, though not enabled by default.
4. Read [Summary and Logging](summary.html) for more options on logging. 4. Read [Summary and Logging](./summary.md) for more options on logging.
## How to freeze some variables in training ## How to freeze some variables in training
...@@ -62,4 +62,4 @@ In general, you need to implement the model in a way your version of TensorFlow ...@@ -62,4 +62,4 @@ In general, you need to implement the model in a way your version of TensorFlow
## My training seems slow. Why? ## My training seems slow. Why?
Checkout the [Performance Tuning tutorial](performance-tuning.html) Checkout the [Performance Tuning tutorial](./performance-tuning.md)
...@@ -8,7 +8,7 @@ There are two ways to do inference during training. ...@@ -8,7 +8,7 @@ There are two ways to do inference during training.
1. The easiest way is to write a callback, and use 1. The easiest way is to write a callback, and use
[self.trainer.get_predictor()](../modules/train.html#tensorpack.train.TowerTrainer.get_predictor) [self.trainer.get_predictor()](../modules/train.html#tensorpack.train.TowerTrainer.get_predictor)
to get a callable under inference mode. to get a callable under inference mode.
See [Write a Callback](extend/callback.html). See [Write a Callback](./extend/callback.md).
2. If your inference follows the paradigm of: 2. If your inference follows the paradigm of:
"evaluate some tensors for each input, and aggregate the results in the end". "evaluate some tensors for each input, and aggregate the results in the end".
...@@ -58,7 +58,7 @@ output1_array, output2_array = predictor(input1_array, input2_array) ...@@ -58,7 +58,7 @@ output1_array, output2_array = predictor(input1_array, input2_array)
It's __common to use a different graph for inference__, It's __common to use a different graph for inference__,
e.g., use NHWC format, support encoded image format, etc. e.g., use NHWC format, support encoded image format, etc.
You can make these changes inside the `model` or `tower_func` in your `PredictConfig`. You can make these changes inside the `model` or `tower_func` in your `PredictConfig`.
The example in [examples/basics/export-model.py](../examples/basics/export-model.py) demonstrates such an altered inference graph. The example in [examples/basics/export-model.py](../../examples/basics/export-model.py) demonstrates such an altered inference graph.
OfflinePredictor is only for quick demo purposes. OfflinePredictor is only for quick demo purposes.
It runs inference on numpy arrays, therefore may not be the most efficient way. It runs inference on numpy arrays, therefore may not be the most efficient way.
...@@ -98,7 +98,7 @@ you can also save your models into other formats after training, so it may be mo ...@@ -98,7 +98,7 @@ you can also save your models into other formats after training, so it may be mo
tf.import_graph_def(graph_def) tf.import_graph_def(graph_def)
``` ```
[examples/basics/export-model.py](../examples/basics/export-model.py) [examples/basics/export-model.py](../../examples/basics/export-model.py)
demonstrates the usage of such a frozen/pruned graph. demonstrates the usage of such a frozen/pruned graph.
Again, you may often want to use a different graph for inference and you can Again, you may often want to use a different graph for inference and you can
do so by the arguments of `PredictConfig`. do so by the arguments of `PredictConfig`.
......
...@@ -66,7 +66,7 @@ to learn more details. ...@@ -66,7 +66,7 @@ to learn more details.
## Threads & Processes ## Threads & Processes
Both the above two patterns can be used with Both the above two patterns can be used with
__either multi-threading or multi-processing__, with the following builtin DataFlows: __either multi-threading or multi-processing__, with the following builtin DataFlows:
* [MultiProcessRunnerZMQ](../modules/dataflow.html#tensorpack.dataflow.MultiProcessRunnerZMQ) * [MultiProcessRunnerZMQ](../modules/dataflow.html#tensorpack.dataflow.MultiProcessRunnerZMQ)
...@@ -95,9 +95,9 @@ __zero Python threads__: this is a key implementation detail that makes tensorpa ...@@ -95,9 +95,9 @@ __zero Python threads__: this is a key implementation detail that makes tensorpa
faster than the alternatives in Keras or PyTorch. faster than the alternatives in Keras or PyTorch.
For a new task, you often need to do a quick benchmark to choose the best pattern. For a new task, you often need to do a quick benchmark to choose the best pattern.
See [Performance Tuning Tutorial](performance-tuning.html) See [Performance Tuning Tutorial](./performance-tuning.md)
on how to effectively understand the performance of a DataFlow. on how to effectively understand the performance of a DataFlow.
See also [Efficient DataFlow](efficient-dataflow.html) See also [Efficient DataFlow](./efficient-dataflow.md)
for real examples using the above DataFlows. for real examples using the above DataFlows.
# Performance Tuning # Performance Tuning
__We do not know why your training is slow__ __We do not know why your training is slow__
(and most of the times it's not due to tensorpack), (and most of the times it's not due to tensorpack),
unless we can reproduce the slowness with your instsructions. unless we can reproduce the slowness with your instsructions.
...@@ -50,7 +50,7 @@ A benchmark will give you more precise information about which part you should i ...@@ -50,7 +50,7 @@ A benchmark will give you more precise information about which part you should i
## Investigate DataFlow ## Investigate DataFlow
Understand the [Efficient DataFlow](efficient-dataflow.html) tutorial, so you know what your DataFlow is doing. Understand the [Efficient DataFlow](./efficient-dataflow.md) tutorial, so you know what your DataFlow is doing.
Then, make modifications and benchmark your modifications to understand which Then, make modifications and benchmark your modifications to understand which
part in the data pipeline is your bottleneck. part in the data pipeline is your bottleneck.
Do __NOT__ look at training speed when you benchmark a DataFlow. Only look at the output of `TestDataSpeed`. Do __NOT__ look at training speed when you benchmark a DataFlow. Only look at the output of `TestDataSpeed`.
...@@ -67,14 +67,14 @@ dataflow, you can usually do the following: ...@@ -67,14 +67,14 @@ dataflow, you can usually do the following:
includes both reading cost and the multiprocess communication cost. includes both reading cost and the multiprocess communication cost.
You can now let your reader produce only a single integer after reading a large You can now let your reader produce only a single integer after reading a large
amount of data, so that the pipeline contains only parallel reading cost, but negligible amount of data, so that the pipeline contains only parallel reading cost, but negligible
communication cost any more. communication cost any more.
If this becomes fast enough, it means that communication is the bottleneck. If this becomes fast enough, it means that communication is the bottleneck.
If pure parallel reading is still not fast enough, it means your raw reader is the bottleneck. If pure parallel reading is still not fast enough, it means your raw reader is the bottleneck.
1. In practice the dataflow can be more complicated and you'll need to design 1. In practice the dataflow can be more complicated and you'll need to design
your own strategies to understand its performance. your own strategies to understand its performance.
Once you've understood which part is the bottleneck, Once you've understood which part is the bottleneck,
you can start optimizing the specific part by methods such as: you can start optimizing the specific part by methods such as:
1. Use single-file database to avoid random read on hard disk. 1. Use single-file database to avoid random read on hard disk.
...@@ -85,7 +85,7 @@ you can start optimizing the specific part by methods such as: ...@@ -85,7 +85,7 @@ you can start optimizing the specific part by methods such as:
## Investigate TensorFlow ## Investigate TensorFlow
When you're sure that data is not a bottleneck (e.g. when the logs show that queue is almost full), When you're sure that data is not a bottleneck (e.g. when the logs show that queue is almost full),
you can investigate and optimize the model. you can investigate and optimize the model.
A naive but effective way is to remove ops from your model to understand how much time they cost. A naive but effective way is to remove ops from your model to understand how much time they cost.
......
...@@ -12,7 +12,7 @@ Your data pipeline **only needs to be fast enough**. ...@@ -12,7 +12,7 @@ Your data pipeline **only needs to be fast enough**.
In practice, you should always first make sure your data pipeline runs In practice, you should always first make sure your data pipeline runs
asynchronously with your training. asynchronously with your training.
The method to do so is different in each training framework, The method to do so is different in each training framework,
and in tensorpack this is automatically done by the [InputSource](/tutorial/extend/input-source.html) and in tensorpack this is automatically done by the [InputSource](../extend/input-source.md)
interface. interface.
Once you make sure the data pipeline runs async with your training, Once you make sure the data pipeline runs async with your training,
...@@ -29,7 +29,7 @@ DataFlow is fast enough for you unless you use ...@@ -29,7 +29,7 @@ DataFlow is fast enough for you unless you use
8 V100s with both FP16 and XLA enabled, which most people don't. 8 V100s with both FP16 and XLA enabled, which most people don't.
For tasks that are less data-hungry (e.g., object detection, or most NLP tasks), For tasks that are less data-hungry (e.g., object detection, or most NLP tasks),
DataFlow is already an overkill. DataFlow is already an overkill.
See the [Efficient DataFlow](/tutorial/efficient-dataflow.html) tutorial on how See the [Efficient DataFlow](../efficient-dataflow.md) tutorial on how
to build a fast Python loader with DataFlow. to build a fast Python loader with DataFlow.
There is no reason to try a more complicated solution, There is no reason to try a more complicated solution,
......
...@@ -14,7 +14,7 @@ Tensorpack also provides a small tool to load checkpoints, see ...@@ -14,7 +14,7 @@ Tensorpack also provides a small tool to load checkpoints, see
[load_chkpt_vars](../modules/tfutils.html#tensorpack.tfutils.varmanip.load_chkpt_vars) [load_chkpt_vars](../modules/tfutils.html#tensorpack.tfutils.varmanip.load_chkpt_vars)
for details. for details.
[scripts/ls-checkpoint.py](../scripts/ls-checkpoint.py) [scripts/ls-checkpoint.py](../../scripts/ls-checkpoint.py)
demos how to print all variables and their shapes in a checkpoint. demos how to print all variables and their shapes in a checkpoint.
Tensorpack includes another tool to save variables to TF checkpoint, see Tensorpack includes another tool to save variables to TF checkpoint, see
...@@ -26,7 +26,7 @@ Most models provided by tensorpack are in npz (dictionary) format, ...@@ -26,7 +26,7 @@ Most models provided by tensorpack are in npz (dictionary) format,
because it's easy to use without TF dependency. because it's easy to use without TF dependency.
You can read/write them with `np.load` and `np.savez`. You can read/write them with `np.load` and `np.savez`.
[scripts/dump-model-params.py](../scripts/dump-model-params.py) can be used to remove unnecessary variables in a checkpoint [scripts/dump-model-params.py](../../scripts/dump-model-params.py) can be used to remove unnecessary variables in a checkpoint
and save results to a npz. and save results to a npz.
It takes a metagraph file (which is also saved by `ModelSaver`) and only saves variables that the model needs at inference time. It takes a metagraph file (which is also saved by `ModelSaver`) and only saves variables that the model needs at inference time.
It dumps the model to a `var-name: value` dict saved in npz format. It dumps the model to a `var-name: value` dict saved in npz format.
...@@ -46,7 +46,7 @@ session_init=SmartInit(["path1", dict2]) # load them sequentially ...@@ -46,7 +46,7 @@ session_init=SmartInit(["path1", dict2]) # load them sequentially
``` ```
[SmartInit](../modules/tfutils.html#tensorpack.tfutils.sessinit.SmartInit) [SmartInit](../modules/tfutils.html#tensorpack.tfutils.sessinit.SmartInit)
is in fact a small helper which uses some heuristics to return you one of is in fact a small helper which uses some heuristics to return you one of
[SaverRestore](../modules/tfutils.html#tensorpack.tfutils.sessinit.SaverRestore) or [SaverRestore](../modules/tfutils.html#tensorpack.tfutils.sessinit.SaverRestore) or
[DictRestore](../modules/tfutils.html#tensorpack.tfutils.sessinit.DictRestore). [DictRestore](../modules/tfutils.html#tensorpack.tfutils.sessinit.DictRestore).
They are responsible for the actual initialization work. They are responsible for the actual initialization work.
...@@ -58,7 +58,7 @@ Whatever you use in `session_init`, this is what happens during the initializati ...@@ -58,7 +58,7 @@ Whatever you use in `session_init`, this is what happens during the initializati
* Variables that appear in only one side will be printed as warning. * Variables that appear in only one side will be printed as warning.
* Variables of the same name but incompatible shapes will cause exceptions. * Variables of the same name but incompatible shapes will cause exceptions.
If you set `ignore_mismatch=True`, then such errors will only be printed as warnings. If you set `ignore_mismatch=True`, then such errors will only be printed as warnings.
You can also use `SmartInit` to load a model to a session manually, You can also use `SmartInit` to load a model to a session manually,
without involving the rest of the tensorpack, by `SmartInit(...).init(session)`. without involving the rest of the tensorpack, by `SmartInit(...).init(session)`.
......
...@@ -113,7 +113,7 @@ from calling `tf.add`. You may need to be careful on some issues: ...@@ -113,7 +113,7 @@ from calling `tf.add`. You may need to be careful on some issues:
It is a bit different to use sonnet/Keras. It is a bit different to use sonnet/Keras.
sonnet/Keras manages the variable scope by their own model classes, and calling their symbolic functions sonnet/Keras manages the variable scope by their own model classes, and calling their symbolic functions
always creates new variable scope. See the [Keras example](../examples/keras) for how to use it within tensorpack. always creates new variable scope. See the [Keras example](../../examples/keras) for how to use it within tensorpack.
```eval_rst ```eval_rst
.. note:: **It's best to not trust others' layers!** .. note:: **It's best to not trust others' layers!**
......
...@@ -24,7 +24,7 @@ by exploiting some universal patterns. ...@@ -24,7 +24,7 @@ by exploiting some universal patterns.
In research we do training of various kind. In research we do training of various kind.
Tensorpack trainers avoid making assumptions on what type of training Tensorpack trainers avoid making assumptions on what type of training
you want to do. For example, unlike Keras, tensorpack does not wrongly assume that: you want to do. For example, unlike Keras, tensorpack does not wrongly assume that:
1. Your training data is batched 1. Your training data is batched
2. Your training is gradient-based optimization 2. Your training is gradient-based optimization
3. Your data has `X`(inputs) and `y`(outputs) 3. Your data has `X`(inputs) and `y`(outputs)
...@@ -45,7 +45,7 @@ Users or derived trainers should implement __what the iterations are__. ...@@ -45,7 +45,7 @@ Users or derived trainers should implement __what the iterations are__.
2. The concept of __"epoch"__, i.e. we assume that the iterations run in nested for-loops. 2. The concept of __"epoch"__, i.e. we assume that the iterations run in nested for-loops.
In fact, the steps per epoch can be any number In fact, the steps per epoch can be any number
and it only affects the [schedule of callbacks](callback.html). and it only affects the [schedule of callbacks](./callback.md).
In other words, an "epoch" in tensorpack is the __default period to run In other words, an "epoch" in tensorpack is the __default period to run
callbacks__ (validation, summary, checkpoint, etc.). callbacks__ (validation, summary, checkpoint, etc.).
So this assumption effectively puts no extra constraints. So this assumption effectively puts no extra constraints.
...@@ -56,20 +56,20 @@ So this assumption effectively puts no extra constraints. ...@@ -56,20 +56,20 @@ So this assumption effectively puts no extra constraints.
Tensorpack implements a few builtin trainers for __single-cost gradient-based optimization__, Tensorpack implements a few builtin trainers for __single-cost gradient-based optimization__,
as this is the most common type of task. as this is the most common type of task.
If your training follows this pattern, you only need to __select a trainer__, If your training follows this pattern, you only need to __select a trainer__,
and use it with its [training interface](training-interface.html). and use it with its [training interface](./training-interface.md).
The simplest example of such a trainer is The simplest example of such a trainer is
[SimpleTrainer](../modules/train.html#tensorpack.train.SimpleTrainer). [SimpleTrainer](../modules/train.html#tensorpack.train.SimpleTrainer).
All it does is building your model (which you have to provide) once All it does is building your model (which you have to provide) once
(or twice if inference is needed by callbacks) and minimizing its cost. (or twice if inference is needed by callbacks) and minimizing its cost.
### Multi-GPU Trainers ### Multi-GPU Trainers
For data-parallel multi-GPU training, different [multi-GPU trainers](../modules/train.html) For data-parallel multi-GPU training, different [multi-GPU trainers](../modules/train)
implement different distribution strategies. implement different distribution strategies.
They take care of device placement, gradient averaging and synchronization They take care of device placement, gradient averaging and synchronization
in the efficient way, which is why multi-GPU training in tensorpack in the efficient way, which is why multi-GPU training in tensorpack
is up to is up to
[5x faster than Keras](https://github.com/tensorpack/benchmarks/tree/master/other-wrappers). [5x faster than Keras](https://github.com/tensorpack/benchmarks/tree/master/other-wrappers).
It takes only one line of code change to use them, e.g. `trainer=SyncMultiGPUTrainerReplicated(...)`. It takes only one line of code change to use them, e.g. `trainer=SyncMultiGPUTrainerReplicated(...)`.
......
...@@ -33,7 +33,7 @@ class MyModel(ModelDesc): ...@@ -33,7 +33,7 @@ class MyModel(ModelDesc):
You can use any symbolic functions in `build_graph`, including TensorFlow core library You can use any symbolic functions in `build_graph`, including TensorFlow core library
functions and other symbolic libraries. functions and other symbolic libraries.
`build_graph` will be the tower function, so you need to follow [some rules](trainer.md#tower-trainer). `build_graph` will be the tower function, so you need to follow [some rules](trainer.html#tower-trainer).
Because this interface is specialized for single-cost training, you need to return the cost tensor. Because this interface is specialized for single-cost training, you need to return the cost tensor.
After defining such a model, use it with `TrainConfig` and `launch_train_with_config`: After defining such a model, use it with `TrainConfig` and `launch_train_with_config`:
...@@ -84,7 +84,7 @@ The function `launch_train_with_config` exists mainly for historical reasons. ...@@ -84,7 +84,7 @@ The function `launch_train_with_config` exists mainly for historical reasons.
### Keras Interface ### Keras Interface
Some wrappers were made on top of tensorpack trainers, to create a Keras-like interface. Some wrappers were made on top of tensorpack trainers, to create a Keras-like interface.
See [Tensorpack+Keras examples](../examples/keras) for details. See the experimental [Tensorpack+Keras examples](../../examples/keras) for details.
### Raw Trainer Interface ### Raw Trainer Interface
...@@ -102,5 +102,5 @@ training, or call ...@@ -102,5 +102,5 @@ training, or call
which applies some defaults options for common use cases. which applies some defaults options for common use cases.
Read their API documentation and the Read their API documentation and the
[advanced trainer tutorial](extend/trainer.html) [advanced trainer tutorial](./extend/trainer.md)
for more details. for more details.
...@@ -87,7 +87,9 @@ class CascadeRCNNHead(object): ...@@ -87,7 +87,9 @@ class CascadeRCNNHead(object):
with tf.name_scope('match_box_with_gt_{}'.format(iou_threshold)): with tf.name_scope('match_box_with_gt_{}'.format(iou_threshold)):
iou = pairwise_iou(boxes, self.gt_boxes) # NxM iou = pairwise_iou(boxes, self.gt_boxes) # NxM
max_iou_per_box = tf.reduce_max(iou, axis=1) # N max_iou_per_box = tf.reduce_max(iou, axis=1) # N
best_iou_ind = tf.argmax(iou, axis=1) # N best_iou_ind = tf.cond(tf.shape(iou)[1] > 0,
lambda: tf.argmax(iou, axis=1), # #proposal, each in 0~m-1
lambda: tf.zeros([tf.shape(iou)[0]], dtype=tf.int64))
labels_per_box = tf.gather(self.gt_labels, best_iou_ind) labels_per_box = tf.gather(self.gt_labels, best_iou_ind)
fg_mask = max_iou_per_box >= iou_threshold fg_mask = max_iou_per_box >= iou_threshold
fg_inds_wrt_gt = tf.boolean_mask(best_iou_ind, fg_mask) fg_inds_wrt_gt = tf.boolean_mask(best_iou_ind, fg_mask)
......
...@@ -164,7 +164,8 @@ class BatchData(ProxyDataFlow): ...@@ -164,7 +164,8 @@ class BatchData(ProxyDataFlow):
use_list (bool): whether to batch data into a list or a numpy array. use_list (bool): whether to batch data into a list or a numpy array.
Returns: Returns:
dp: either a list or a dict, depend on the inputs. dp:
either a list or a dict, depend on the inputs.
Each item is a batched version of the corresponding inputs. Each item is a batched version of the corresponding inputs.
""" """
first_dp = data_holder[0] first_dp = data_holder[0]
......
...@@ -12,7 +12,7 @@ from tensorflow.python.framework import graph_util ...@@ -12,7 +12,7 @@ from tensorflow.python.framework import graph_util
from tensorflow.python.platform import gfile from tensorflow.python.platform import gfile
from tensorflow.python.tools import optimize_for_inference_lib from tensorflow.python.tools import optimize_for_inference_lib
from ..compat import is_tfv2, tfv1 from ..compat import tfv1
from ..input_source import PlaceholderInput from ..input_source import PlaceholderInput
from ..tfutils.common import get_tensors_by_names, get_tf_version_tuple from ..tfutils.common import get_tensors_by_names, get_tf_version_tuple
from ..tfutils.tower import PredictTowerContext from ..tfutils.tower import PredictTowerContext
...@@ -89,7 +89,7 @@ class ModelExporter(object): ...@@ -89,7 +89,7 @@ class ModelExporter(object):
logger.info("Output graph written to {}.".format(filename)) logger.info("Output graph written to {}.".format(filename))
def export_serving(self, filename, def export_serving(self, filename,
tags=(tf.saved_model.SERVING if is_tfv2() else tf.saved_model.tag_constants.SERVING,), tags=None,
signature_name='prediction_pipeline'): signature_name='prediction_pipeline'):
""" """
Converts a checkpoint and graph to a servable for TensorFlow Serving. Converts a checkpoint and graph to a servable for TensorFlow Serving.
...@@ -97,7 +97,7 @@ class ModelExporter(object): ...@@ -97,7 +97,7 @@ class ModelExporter(object):
Args: Args:
filename (str): path for export directory filename (str): path for export directory
tags (tuple): tuple of user specified tags tags (tuple): tuple of user specified tags. Defaults to "SERVING".
signature_name (str): name of signature for prediction signature_name (str): name of signature for prediction
Note: Note:
...@@ -113,6 +113,9 @@ class ModelExporter(object): ...@@ -113,6 +113,9 @@ class ModelExporter(object):
Currently, we only support a single signature, which is the general PredictSignatureDef: Currently, we only support a single signature, which is the general PredictSignatureDef:
https://github.com/tensorflow/serving/blob/master/tensorflow_serving/g3doc/signature_defs.md https://github.com/tensorflow/serving/blob/master/tensorflow_serving/g3doc/signature_defs.md
""" """
if tags is None:
tags = (tf.saved_model.SERVING if get_tf_version_tuple() >= (1, 12)
else tf.saved_model.tag_constants.SERVING)
self.graph = self.config._maybe_create_graph() self.graph = self.config._maybe_create_graph()
with self.graph.as_default(): with self.graph.as_default():
......
...@@ -211,7 +211,8 @@ def add_moving_summary(*args, **kwargs): ...@@ -211,7 +211,8 @@ def add_moving_summary(*args, **kwargs):
summary op. Default is TF's default (`tf.GraphKeys.SUMMARIES`). summary op. Default is TF's default (`tf.GraphKeys.SUMMARIES`).
Returns: Returns:
[tf.Tensor]: list of tensors returned by assign_moving_average, [tf.Tensor]:
list of tensors returned by assign_moving_average,
which can be used to maintain the EMA. which can be used to maintain the EMA.
""" """
decay = kwargs.pop('decay', 0.95) decay = kwargs.pop('decay', 0.95)
......
...@@ -95,8 +95,8 @@ def freeze_variables(stop_gradient=True, skip_collection=False): ...@@ -95,8 +95,8 @@ def freeze_variables(stop_gradient=True, skip_collection=False):
1. If a variable is created, or reused outside of the context, it can still contribute to the 1. If a variable is created, or reused outside of the context, it can still contribute to the
gradient of other tensors. gradient of other tensors.
2. If a freezed variable is accessed by other approaches (e.g., by names, by collections), 2. If a freezed variable is accessed by other approaches (e.g., by names, by collections),
it can still contribute to the gradient of other tensors. it can still contribute to the gradient of other tensors.
For example, weight decay cannot be stopped by a `stop_gradient` context. For example, weight decay cannot be stopped by a `stop_gradient` context.
`skip_collection` has to be used the first time the variable is created. `skip_collection` has to be used the first time the variable is created.
Once `skip_collection` is used, the variable is not a trainable variable anymore, Once `skip_collection` is used, the variable is not a trainable variable anymore,
......
...@@ -126,7 +126,7 @@ class Trainer(object): ...@@ -126,7 +126,7 @@ class Trainer(object):
2. Increase the global_step 2. Increase the global_step
3. Evaluate some summaries 3. Evaluate some summaries
Typically you __should not__ use ``hooked_sess.run`` in callbacks, Typically you **should not** use ``hooked_sess.run`` in callbacks,
because it is for the "training iteration". If you just want to evaluate because it is for the "training iteration". If you just want to evaluate
some tensors, use ``sess.run`` if the tensors does not depend on the inputs, some tensors, use ``sess.run`` if the tensors does not depend on the inputs,
or more generally, use `before_run/after_run` to evaluate the tensors **along with** or more generally, use `before_run/after_run` to evaluate the tensors **along with**
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment