small update & bump version

4f52bcfd · Yuxin Wu · 9972b150 · 4f52bcfd · 4f52bcfd · 4f52bcfd
Commit 4f52bcfd authored Mar 04, 2018 by Yuxin Wu
8 changed files
--- a/.travis.yml
+++ b/.travis.yml
@@ -26,10 +26,10 @@ matrix:
    env: TF_VERSION=1.3.0 TF_TYPE=release
  - os: linux
    python: 2.7
-    env: TF_VERSION=1.5.0 TF_TYPE=release
+    env: TF_VERSION=1.6.0 TF_TYPE=release
  - os: linux
    python: 3.5
-    env: TF_VERSION=1.5.0 TF_TYPE=release
+    env: TF_VERSION=1.6.0 TF_TYPE=release
  - os: linux
    python: 2.7
    env: TF_VERSION=1.head TF_TYPE=nightly

--- a/docs/tutorial/intro.rst
+++ b/docs/tutorial/intro.rst
@@ -8,8 +8,9 @@ you'll use mostly tensorpack high-level APIs to do training, rather than TensorF
 Why tensorpack?
 ~~~~~~~~~~~~~~~~~~~

-TensorFlow is powerful, but at the same time too complicated for a lot of people, especially when **speed** is a concern.
-Users can often write slow code with low-level APIs or other existing high-level wrappers.
+TensorFlow is powerful, but at the same time too complicated for a lot of people.
+Users will have to worry a lot about things unrelated to the model, especially when **speed** is a concern.
+Code written with low-level APIs or other existing high-level wrappers is often suboptimal in speed.
 Even a lot of official TensorFlow examples are written for simplicity rather than efficiency,
 which as a result makes people think TensorFlow is slow.

@@ -22,7 +23,7 @@ However you can have them both in tensorpack.
 Tensorpack uses TensorFlow efficiently, and hides performance details under its APIs.
 You no longer need to write
 data prefetch, multi-GPU replication, device placement, variables synchronization -- anything that's unrelated to the model itself.
-You still need to learn to write models with TF, but performance is all taken care of by tensorpack.
+You still need to understand graph and learn to write models with TF, but performance is all taken care of by tensorpack.

 A High Level Glance
 ~~~~~~~~~~~~~~~~~~~

--- a/examples/FasterRCNN/data.py
+++ b/examples/FasterRCNN/data.py
@@ -8,13 +8,14 @@ import copy

 from tensorpack.utils.argtools import memoized, log_once
 from tensorpack.dataflow import (
-    imgaug, TestDataSpeed, PrefetchDataZMQ, MultiProcessMapData,
+    imgaug, TestDataSpeed, PrefetchDataZMQ, MapData,
    MapDataComponent, DataFromList)
 # import tensorpack.utils.viz as tpviz

 from coco import COCODetection
 from utils.generate_anchors import generate_anchors
 from utils.np_box_ops import iou as np_iou
+from utils.np_box_ops import area as np_area
 from common import (
    DataFromListOfDict, CustomResize,
    box_to_point8, point8_to_box, segmentation_to_mask)
@@ -231,6 +232,7 @@ def get_train_dataflow(add_mask=False):
        points = box_to_point8(boxes)
        points = aug.augment_coords(points, params)
        boxes = point8_to_box(points)
+        assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"

        # rpn anchor:
        try:
@@ -267,7 +269,8 @@ def get_train_dataflow(add_mask=False):
            # tpviz.interactive_imshow(viz)
        return ret

-    ds = MultiProcessMapData(ds, 3, preprocess)
+    ds = MapData(ds, preprocess)
+    ds = PrefetchDataZMQ(ds, 1)
    return ds


@@ -286,9 +289,10 @@ def get_eval_dataflow():


 if __name__ == '__main__':
-    config.BASEDIR = '/private/home/yuxinwu/data/coco'
-    config.TRAIN_DATASET = ['train2014']
+    import os
    from tensorpack.dataflow import PrintData
+    config.BASEDIR = os.path.expanduser('~/data/coco')
+    config.TRAIN_DATASET = ['train2014']
    ds = get_train_dataflow(add_mask=config.MODE_MASK)
    ds = PrintData(ds, 100)
    TestDataSpeed(ds, 50000).start()

--- a/examples/ResNet/README.md
+++ b/examples/ResNet/README.md
@@ -28,7 +28,7 @@ To train, first decompress ImageNet data into [this structure](http://tensorpack
 You should be able to see good GPU utilization (95%~99%), if your data is fast enough.
 It can finish training [within 20 hours](http://dawn.cs.stanford.edu/benchmark/ImageNet/train.html) on AWS p3.16xlarge.

-The default data pipeline is probably OK for machines with SSD + E5 CPUs.
+The default data pipeline is probably OK for machines with SSD & 20 CPU cores.
 See the [tutorial](http://tensorpack.readthedocs.io/en/latest/tutorial/efficient-dataflow.html) on other options to speed up your data.

 ![imagenet](imagenet-resnet.png)

--- a/examples/ResNet/imagenet_utils.py
+++ b/examples/ResNet/imagenet_utils.py
@@ -94,7 +94,7 @@ def get_imagenet_dataflow(
    assert isinstance(augmentors, list)
    isTrain = name == 'train'
    if parallel is None:
-        parallel = min(40, multiprocessing.cpu_count())
+        parallel = min(40, multiprocessing.cpu_count() // 2)  # assuming hyperthreading
    if isTrain:
        ds = dataset.ILSVRC12(datadir, name, shuffle=True)
        ds = AugmentImageComponent(ds, augmentors, copy=False)

--- a/tensorpack/callbacks/summary.py
+++ b/tensorpack/callbacks/summary.py
@@ -46,6 +46,8 @@ class MergeAllSummaries_RunAlone(Callback):
        self._key = key

    def _setup_graph(self):
+        size = len(tf.get_collection(self._key))
+        logger.info("Summarizing collection '{}' of size {}".format(self._key, size))
        self.summary_op = tf.summary.merge_all(self._key)

    def _trigger_step(self):
@@ -65,6 +67,8 @@ class MergeAllSummaries_RunWithOp(Callback):
        self._key = key

    def _setup_graph(self):
+        size = len(tf.get_collection(self._key))
+        logger.info("Summarizing collection '{}' of size {}".format(self._key, size))
        self.summary_op = tf.summary.merge_all(self._key)
        if self.summary_op is not None:
            self._fetches = tf.train.SessionRunArgs(self.summary_op)

--- a/tensorpack/libinfo.py
+++ b/tensorpack/libinfo.py
@@ -43,4 +43,4 @@ except ImportError:
    _HAS_TF = False


-__version__ = '0.8.1'
+__version__ = '0.8.2'
--- a/tensorpack/train/trainers.py
+++ b/tensorpack/train/trainers.py
@@ -354,5 +354,5 @@ try:
 except ImportError:
    HorovodTrainer = create_dummy_class('HovorodTrainer', 'horovod')    # noqa
 except Exception:      # could be other than ImportError, e.g. NCCL not found
-    print("Horovod is installed but cannot be imported.")
+    print("Horovod is installed but cannot be imported. Check `python -c 'import horovod.tensorflow'`.")
    HorovodTrainer = create_dummy_class('HovorodTrainer', 'horovod')    # noqa