Commit 4f52bcfd authored by Yuxin Wu's avatar Yuxin Wu

small update & bump version

parent 9972b150
......@@ -26,10 +26,10 @@ matrix:
env: TF_VERSION=1.3.0 TF_TYPE=release
- os: linux
python: 2.7
env: TF_VERSION=1.5.0 TF_TYPE=release
env: TF_VERSION=1.6.0 TF_TYPE=release
- os: linux
python: 3.5
env: TF_VERSION=1.5.0 TF_TYPE=release
env: TF_VERSION=1.6.0 TF_TYPE=release
- os: linux
python: 2.7
env: TF_VERSION=1.head TF_TYPE=nightly
......
......@@ -8,8 +8,9 @@ you'll use mostly tensorpack high-level APIs to do training, rather than TensorF
Why tensorpack?
~~~~~~~~~~~~~~~~~~~
TensorFlow is powerful, but at the same time too complicated for a lot of people, especially when **speed** is a concern.
Users can often write slow code with low-level APIs or other existing high-level wrappers.
TensorFlow is powerful, but at the same time too complicated for a lot of people.
Users will have to worry a lot about things unrelated to the model, especially when **speed** is a concern.
Code written with low-level APIs or other existing high-level wrappers is often suboptimal in speed.
Even a lot of official TensorFlow examples are written for simplicity rather than efficiency,
which as a result makes people think TensorFlow is slow.
......@@ -22,7 +23,7 @@ However you can have them both in tensorpack.
Tensorpack uses TensorFlow efficiently, and hides performance details under its APIs.
You no longer need to write
data prefetch, multi-GPU replication, device placement, variables synchronization -- anything that's unrelated to the model itself.
You still need to learn to write models with TF, but performance is all taken care of by tensorpack.
You still need to understand graph and learn to write models with TF, but performance is all taken care of by tensorpack.
A High Level Glance
~~~~~~~~~~~~~~~~~~~
......
......@@ -8,13 +8,14 @@ import copy
from tensorpack.utils.argtools import memoized, log_once
from tensorpack.dataflow import (
imgaug, TestDataSpeed, PrefetchDataZMQ, MultiProcessMapData,
imgaug, TestDataSpeed, PrefetchDataZMQ, MapData,
MapDataComponent, DataFromList)
# import tensorpack.utils.viz as tpviz
from coco import COCODetection
from utils.generate_anchors import generate_anchors
from utils.np_box_ops import iou as np_iou
from utils.np_box_ops import area as np_area
from common import (
DataFromListOfDict, CustomResize,
box_to_point8, point8_to_box, segmentation_to_mask)
......@@ -231,6 +232,7 @@ def get_train_dataflow(add_mask=False):
points = box_to_point8(boxes)
points = aug.augment_coords(points, params)
boxes = point8_to_box(points)
assert np.min(np_area(boxes)) > 0, "Some boxes have zero area!"
# rpn anchor:
try:
......@@ -267,7 +269,8 @@ def get_train_dataflow(add_mask=False):
# tpviz.interactive_imshow(viz)
return ret
ds = MultiProcessMapData(ds, 3, preprocess)
ds = MapData(ds, preprocess)
ds = PrefetchDataZMQ(ds, 1)
return ds
......@@ -286,9 +289,10 @@ def get_eval_dataflow():
if __name__ == '__main__':
config.BASEDIR = '/private/home/yuxinwu/data/coco'
config.TRAIN_DATASET = ['train2014']
import os
from tensorpack.dataflow import PrintData
config.BASEDIR = os.path.expanduser('~/data/coco')
config.TRAIN_DATASET = ['train2014']
ds = get_train_dataflow(add_mask=config.MODE_MASK)
ds = PrintData(ds, 100)
TestDataSpeed(ds, 50000).start()
......
......@@ -28,7 +28,7 @@ To train, first decompress ImageNet data into [this structure](http://tensorpack
You should be able to see good GPU utilization (95%~99%), if your data is fast enough.
It can finish training [within 20 hours](http://dawn.cs.stanford.edu/benchmark/ImageNet/train.html) on AWS p3.16xlarge.
The default data pipeline is probably OK for machines with SSD + E5 CPUs.
The default data pipeline is probably OK for machines with SSD & 20 CPU cores.
See the [tutorial](http://tensorpack.readthedocs.io/en/latest/tutorial/efficient-dataflow.html) on other options to speed up your data.
![imagenet](imagenet-resnet.png)
......
......@@ -94,7 +94,7 @@ def get_imagenet_dataflow(
assert isinstance(augmentors, list)
isTrain = name == 'train'
if parallel is None:
parallel = min(40, multiprocessing.cpu_count())
parallel = min(40, multiprocessing.cpu_count() // 2) # assuming hyperthreading
if isTrain:
ds = dataset.ILSVRC12(datadir, name, shuffle=True)
ds = AugmentImageComponent(ds, augmentors, copy=False)
......
......@@ -46,6 +46,8 @@ class MergeAllSummaries_RunAlone(Callback):
self._key = key
def _setup_graph(self):
size = len(tf.get_collection(self._key))
logger.info("Summarizing collection '{}' of size {}".format(self._key, size))
self.summary_op = tf.summary.merge_all(self._key)
def _trigger_step(self):
......@@ -65,6 +67,8 @@ class MergeAllSummaries_RunWithOp(Callback):
self._key = key
def _setup_graph(self):
size = len(tf.get_collection(self._key))
logger.info("Summarizing collection '{}' of size {}".format(self._key, size))
self.summary_op = tf.summary.merge_all(self._key)
if self.summary_op is not None:
self._fetches = tf.train.SessionRunArgs(self.summary_op)
......
......@@ -43,4 +43,4 @@ except ImportError:
_HAS_TF = False
__version__ = '0.8.1'
__version__ = '0.8.2'
......@@ -354,5 +354,5 @@ try:
except ImportError:
HorovodTrainer = create_dummy_class('HovorodTrainer', 'horovod') # noqa
except Exception: # could be other than ImportError, e.g. NCCL not found
print("Horovod is installed but cannot be imported.")
print("Horovod is installed but cannot be imported. Check `python -c 'import horovod.tensorflow'`.")
HorovodTrainer = create_dummy_class('HovorodTrainer', 'horovod') # noqa
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment