Commit 4f52bcfd authored by Yuxin Wu's avatar Yuxin Wu

small update & bump version

parent 9972b150
...@@ -26,10 +26,10 @@ matrix: ...@@ -26,10 +26,10 @@ matrix:
env: TF_VERSION=1.3.0 TF_TYPE=release env: TF_VERSION=1.3.0 TF_TYPE=release
- os: linux - os: linux
python: 2.7 python: 2.7
env: TF_VERSION=1.5.0 TF_TYPE=release env: TF_VERSION=1.6.0 TF_TYPE=release
- os: linux - os: linux
python: 3.5 python: 3.5
env: TF_VERSION=1.5.0 TF_TYPE=release env: TF_VERSION=1.6.0 TF_TYPE=release
- os: linux - os: linux
python: 2.7 python: 2.7
env: TF_VERSION=1.head TF_TYPE=nightly env: TF_VERSION=1.head TF_TYPE=nightly
......
...@@ -8,8 +8,9 @@ you'll use mostly tensorpack high-level APIs to do training, rather than TensorF ...@@ -8,8 +8,9 @@ you'll use mostly tensorpack high-level APIs to do training, rather than TensorF
Why tensorpack? Why tensorpack?
~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~
TensorFlow is powerful, but at the same time too complicated for a lot of people, especially when **speed** is a concern. TensorFlow is powerful, but at the same time too complicated for a lot of people.
Users can often write slow code with low-level APIs or other existing high-level wrappers. Users will have to worry a lot about things unrelated to the model, especially when **speed** is a concern.
Code written with low-level APIs or other existing high-level wrappers is often suboptimal in speed.
Even a lot of official TensorFlow examples are written for simplicity rather than efficiency, Even a lot of official TensorFlow examples are written for simplicity rather than efficiency,
which as a result makes people think TensorFlow is slow. which as a result makes people think TensorFlow is slow.
...@@ -22,7 +23,7 @@ However you can have them both in tensorpack. ...@@ -22,7 +23,7 @@ However you can have them both in tensorpack.
Tensorpack uses TensorFlow efficiently, and hides performance details under its APIs. Tensorpack uses TensorFlow efficiently, and hides performance details under its APIs.
You no longer need to write You no longer need to write
data prefetch, multi-GPU replication, device placement, variables synchronization -- anything that's unrelated to the model itself. data prefetch, multi-GPU replication, device placement, variables synchronization -- anything that's unrelated to the model itself.
You still need to learn to write models with TF, but performance is all taken care of by tensorpack. You still need to understand graph and learn to write models with TF, but performance is all taken care of by tensorpack.
A High Level Glance A High Level Glance
~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~
......
...@@ -8,13 +8,14 @@ import copy ...@@ -8,13 +8,14 @@ import copy
from tensorpack.utils.argtools import memoized, log_once from tensorpack.utils.argtools import memoized, log_once
from tensorpack.dataflow import ( from tensorpack.dataflow import (
imgaug, TestDataSpeed, PrefetchDataZMQ, MultiProcessMapData, imgaug, TestDataSpeed, PrefetchDataZMQ, MapData,
MapDataComponent, DataFromList) MapDataComponent, DataFromList)
# import tensorpack.utils.viz as tpviz # import tensorpack.utils.viz as tpviz
from coco import COCODetection from coco import COCODetection
from utils.generate_anchors import generate_anchors from utils.generate_anchors import generate_anchors
from utils.np_box_ops import iou as np_iou from utils.np_box_ops import iou as np_iou
from utils.np_box_ops import area as np_area
from common import ( from common import (
DataFromListOfDict, CustomResize, DataFromListOfDict, CustomResize,
box_to_point8, point8_to_box, segmentation_to_mask) box_to_point8, point8_to_box, segmentation_to_mask)
...@@ -231,6 +232,7 @@ def get_train_dataflow(add_mask=False): ...@@ -231,6 +232,7 @@ def get_train_dataflow(add_mask=False):
points = box_to_point8(boxes) points = box_to_point8(boxes)
points = aug.augment_coords(points, params) points = aug.augment_coords(points, params)
boxes = point8_to_box(points) boxes = point8_to_box(points)
assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"
# rpn anchor: # rpn anchor:
try: try:
...@@ -267,7 +269,8 @@ def get_train_dataflow(add_mask=False): ...@@ -267,7 +269,8 @@ def get_train_dataflow(add_mask=False):
# tpviz.interactive_imshow(viz) # tpviz.interactive_imshow(viz)
return ret return ret
ds = MultiProcessMapData(ds, 3, preprocess) ds = MapData(ds, preprocess)
ds = PrefetchDataZMQ(ds, 1)
return ds return ds
...@@ -286,9 +289,10 @@ def get_eval_dataflow(): ...@@ -286,9 +289,10 @@ def get_eval_dataflow():
if __name__ == '__main__': if __name__ == '__main__':
config.BASEDIR = '/private/home/yuxinwu/data/coco' import os
config.TRAIN_DATASET = ['train2014']
from tensorpack.dataflow import PrintData from tensorpack.dataflow import PrintData
config.BASEDIR = os.path.expanduser('~/data/coco')
config.TRAIN_DATASET = ['train2014']
ds = get_train_dataflow(add_mask=config.MODE_MASK) ds = get_train_dataflow(add_mask=config.MODE_MASK)
ds = PrintData(ds, 100) ds = PrintData(ds, 100)
TestDataSpeed(ds, 50000).start() TestDataSpeed(ds, 50000).start()
......
...@@ -28,7 +28,7 @@ To train, first decompress ImageNet data into [this structure](http://tensorpack ...@@ -28,7 +28,7 @@ To train, first decompress ImageNet data into [this structure](http://tensorpack
You should be able to see good GPU utilization (95%~99%), if your data is fast enough. You should be able to see good GPU utilization (95%~99%), if your data is fast enough.
It can finish training [within 20 hours](http://dawn.cs.stanford.edu/benchmark/ImageNet/train.html) on AWS p3.16xlarge. It can finish training [within 20 hours](http://dawn.cs.stanford.edu/benchmark/ImageNet/train.html) on AWS p3.16xlarge.
The default data pipeline is probably OK for machines with SSD + E5 CPUs. The default data pipeline is probably OK for machines with SSD & 20 CPU cores.
See the [tutorial](http://tensorpack.readthedocs.io/en/latest/tutorial/efficient-dataflow.html) on other options to speed up your data. See the [tutorial](http://tensorpack.readthedocs.io/en/latest/tutorial/efficient-dataflow.html) on other options to speed up your data.
![imagenet](imagenet-resnet.png) ![imagenet](imagenet-resnet.png)
......
...@@ -94,7 +94,7 @@ def get_imagenet_dataflow( ...@@ -94,7 +94,7 @@ def get_imagenet_dataflow(
assert isinstance(augmentors, list) assert isinstance(augmentors, list)
isTrain = name == 'train' isTrain = name == 'train'
if parallel is None: if parallel is None:
parallel = min(40, multiprocessing.cpu_count()) parallel = min(40, multiprocessing.cpu_count() // 2) # assuming hyperthreading
if isTrain: if isTrain:
ds = dataset.ILSVRC12(datadir, name, shuffle=True) ds = dataset.ILSVRC12(datadir, name, shuffle=True)
ds = AugmentImageComponent(ds, augmentors, copy=False) ds = AugmentImageComponent(ds, augmentors, copy=False)
......
...@@ -46,6 +46,8 @@ class MergeAllSummaries_RunAlone(Callback): ...@@ -46,6 +46,8 @@ class MergeAllSummaries_RunAlone(Callback):
self._key = key self._key = key
def _setup_graph(self): def _setup_graph(self):
size = len(tf.get_collection(self._key))
logger.info("Summarizing collection '{}' of size {}".format(self._key, size))
self.summary_op = tf.summary.merge_all(self._key) self.summary_op = tf.summary.merge_all(self._key)
def _trigger_step(self): def _trigger_step(self):
...@@ -65,6 +67,8 @@ class MergeAllSummaries_RunWithOp(Callback): ...@@ -65,6 +67,8 @@ class MergeAllSummaries_RunWithOp(Callback):
self._key = key self._key = key
def _setup_graph(self): def _setup_graph(self):
size = len(tf.get_collection(self._key))
logger.info("Summarizing collection '{}' of size {}".format(self._key, size))
self.summary_op = tf.summary.merge_all(self._key) self.summary_op = tf.summary.merge_all(self._key)
if self.summary_op is not None: if self.summary_op is not None:
self._fetches = tf.train.SessionRunArgs(self.summary_op) self._fetches = tf.train.SessionRunArgs(self.summary_op)
......
...@@ -43,4 +43,4 @@ except ImportError: ...@@ -43,4 +43,4 @@ except ImportError:
_HAS_TF = False _HAS_TF = False
__version__ = '0.8.1' __version__ = '0.8.2'
...@@ -354,5 +354,5 @@ try: ...@@ -354,5 +354,5 @@ try:
except ImportError: except ImportError:
HorovodTrainer = create_dummy_class('HovorodTrainer', 'horovod') # noqa HorovodTrainer = create_dummy_class('HovorodTrainer', 'horovod') # noqa
except Exception: # could be other than ImportError, e.g. NCCL not found except Exception: # could be other than ImportError, e.g. NCCL not found
print("Horovod is installed but cannot be imported.") print("Horovod is installed but cannot be imported. Check `python -c 'import horovod.tensorflow'`.")
HorovodTrainer = create_dummy_class('HovorodTrainer', 'horovod') # noqa HorovodTrainer = create_dummy_class('HovorodTrainer', 'horovod') # noqa
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment