Commit 910cfaec authored by Yuxin Wu's avatar Yuxin Wu

update docs

parent fa1dccdd
......@@ -17,6 +17,7 @@ Data:
1. It's easy to train on your own data. Just replace `COCODetection.load_many` in `data.py` by your own loader.
Also remember to change `config.NUM_CLASS` and `config.CLASS_NAMES`.
The current evaluation code is also COCO-specific, and you need to change it to use your data and metrics.
2. You can easily add more augmentations such as rotation, but be careful how a box should be
augmented. The code now will always use the minimal axis-aligned bounding box of the 4 corners,
......
......@@ -2,6 +2,7 @@
# -*- coding: utf-8 -*-
# File: basemodel.py
from contextlib import contextmanager
import tensorflow as tf
from tensorpack.tfutils.argscope import argscope, get_arg_scope
from tensorpack.tfutils.scope_utils import auto_reuse_variable_scope
......@@ -9,6 +10,14 @@ from tensorpack.models import (
Conv2D, MaxPooling, BatchNorm, BNReLU)
@contextmanager
def resnet_argscope():
with argscope([Conv2D, MaxPooling, BatchNorm], data_format='NCHW'), \
argscope(Conv2D, use_bias=False), \
argscope(BatchNorm, use_local_stat=False):
yield
def image_preprocess(image, bgr=True):
with tf.name_scope('image_preprocess'):
if image.dtype.base_dtype != tf.float32:
......@@ -71,29 +80,25 @@ def resnet_group(l, name, block_func, features, count, stride):
return l
def pretrained_resnet_conv4(image, num_blocks):
def pretrained_resnet_conv4(image, num_blocks, freeze_c2=True):
assert len(num_blocks) == 3
with argscope([Conv2D, MaxPooling, BatchNorm], data_format='NCHW'), \
argscope(Conv2D, nl=tf.identity, use_bias=False), \
argscope(BatchNorm, use_local_stat=False):
with resnet_argscope():
l = tf.pad(image, [[0, 0], [0, 0], [2, 3], [2, 3]])
l = Conv2D('conv0', l, 64, 7, stride=2, nl=BNReLU, padding='VALID')
l = tf.pad(l, [[0, 0], [0, 0], [0, 1], [0, 1]])
l = MaxPooling('pool0', l, shape=3, stride=2, padding='VALID')
l = resnet_group(l, 'group0', resnet_bottleneck, 64, num_blocks[0], 1)
# TODO replace var by const to enable folding
l = tf.stop_gradient(l)
l = resnet_group(l, 'group1', resnet_bottleneck, 128, num_blocks[1], 2)
l = resnet_group(l, 'group2', resnet_bottleneck, 256, num_blocks[2], 2)
c2 = resnet_group(l, 'group0', resnet_bottleneck, 64, num_blocks[0], 1)
# TODO replace var by const to enable optimization
if freeze_c2:
c2 = tf.stop_gradient(c2)
c3 = resnet_group(c2, 'group1', resnet_bottleneck, 128, num_blocks[1], 2)
c4 = resnet_group(c3, 'group2', resnet_bottleneck, 256, num_blocks[2], 2)
# 16x downsampling up to now
return l
return c4
@auto_reuse_variable_scope
def resnet_conv5(image, num_block):
with argscope([Conv2D, BatchNorm], data_format='NCHW'), \
argscope(Conv2D, nl=tf.identity, use_bias=False), \
argscope(BatchNorm, use_local_stat=False):
# 14x14:
l = resnet_group(image, 'group3', resnet_bottleneck, 512, num_block, stride=2)
with resnet_argscope():
l = resnet_group(image, 'group3', resnet_bottleneck, 512, num_block, 2)
return l
......@@ -196,7 +196,7 @@ def get_train_dataflow(add_mask=False):
config.BASEDIR, config.TRAIN_DATASET, add_gt=True, add_mask=add_mask)
"""
To train on your own data, change this to your loader.
Produce "igms" as a list of dict, in the dict the following keys are needed for training:
Produce "imgs" as a list of dict, in the dict the following keys are needed for training:
height, width: integer
file_name: str
boxes: kx4 floats
......@@ -247,7 +247,7 @@ def get_train_dataflow(add_mask=False):
if add_mask:
# augmentation will modify the polys in-place
segmentation = copy.deepcopy(img.get('segmentation', None))
segmentation = copy.deepcopy(img['segmentation'])
segmentation = [segmentation[k] for k in range(len(segmentation)) if not is_crowd[k]]
assert len(segmentation) == len(boxes)
......
......@@ -22,8 +22,10 @@ are the only two tools I know that can scale the training of a large Keras model
reproduce exactly the same setting of [tensorpack ResNet example](../ResNet) on ImageNet.
It has:
+ ResNet-50 model modified from [keras.applications](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/_impl/keras/applications/resnet50.py)
+ ResNet-50 model modified from [keras.applications](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/_impl/keras/applications/resnet50.py).
(We put stride on 3x3 conv in each bottleneck, which is different from some other implementations).
+ Multi-GPU data-parallel __training and validation__ which scales
+ With 8 V100s, still has >90% GPU utilization and finished 100 epochs in 19.5 hours
+ Finished 100 epochs in 19.5 hours on 8 V100s, with >90% GPU utilization.
+ Still slightly slower than native tensorpack examples.
+ Good accuracy (same as [tensorpack ResNet example](../ResNet))
......@@ -50,7 +50,7 @@ def humanize_time_delta(sec):
vals[-1] = sec
def _format(v, u):
return "{} {}{}".format(v, u, "s" if v > 1 else "")
return "{:.3g} {}{}".format(v, u, "s" if v > 1 else "")
required = False
ans = []
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment