Commit 883ef79f authored by Yuxin Wu's avatar Yuxin Wu

update docs

parent 87059de5
......@@ -50,3 +50,14 @@ Speed:
3. This implementation is about 14% slower than detectron,
probably due to the lack of specialized ops (e.g. AffineChannel, ROIAlign) in TensorFlow.
It's certainly faster than other TF implementation.
Possible Future Enhancements:
1. Data-parallel evaluation during training.
2. Define an interface to load custom dataset.
3. Support batch>1 per GPU.
4. Use dedicated ops to improve speed. (e.g. a TF implementation of ROIAlign op
can be found in [light-head RCNN](https://github.com/zengarden/light_head_rcnn/tree/master/lib/lib_kernel))
......@@ -70,7 +70,7 @@ MaskRCNN results contain both bbox and segm mAP.
The two R50-C4 360k models have the same configuration __and mAP__
as the `R50-C4-2x` entries in
[Detectron Model Zoo](https://github.com/facebookresearch/Detectron/blob/master/MODEL_ZOO.md#end-to-end-faster--mask-r-cnn-baselines).
So far this is the only TensorFlow implementation that can reproduce mAP in Detectron.
So far this is the only public TensorFlow implementation that can reproduce mAP in Detectron.
The other models listed here do not correspond to any configurations in Detectron.
## Notes
......
......@@ -121,13 +121,11 @@ def resnet_fpn_backbone(image, num_blocks, freeze_c2=True):
mult = config.FPN_RESOLUTION_REQUIREMENT * 1.
new_shape2d = tf.to_int32(tf.ceil(tf.to_float(shape2d) / mult) * mult)
pad_shape2d = new_shape2d - shape2d
assert len(num_blocks) == 4
# TODO pad 1 at each stage
assert len(num_blocks) == 4, num_blocks
with resnet_argscope():
chan = image.shape[1]
l = tf.pad(image,
tf.stack([[0, 0], [0, 0],
[2, 3 + pad_shape2d[0]], [2, 3 + pad_shape2d[1]]]))
l = tf.pad(image, tf.stack(
[[0, 0], [0, 0], [2, 3 + pad_shape2d[0]], [2, 3 + pad_shape2d[1]]]))
l.set_shape([None, chan, None, None])
l = Conv2D('conv0', l, 64, 7, strides=2, activation=BNReLU, padding='VALID')
l = tf.pad(l, [[0, 0], [0, 0], [0, 1], [0, 1]])
......
......@@ -9,10 +9,10 @@ MODE_FPN = False
# dataset -----------------------
BASEDIR = '/path/to/your/COCO/DIR'
TRAIN_DATASET = ['train2014', 'valminusminival2014']
VAL_DATASET = 'minival2014' # only support evaluation on single dataset
NUM_CLASS = 81
CLASS_NAMES = [] # NUM_CLASS strings. Will be populated later by coco loader
TRAIN_DATASET = ['train2014', 'valminusminival2014'] # i.e., trainval35k
VAL_DATASET = 'minival2014' # For now, only support evaluation on single dataset
NUM_CLASS = 81 # 1 background + 80 categories
CLASS_NAMES = [] # NUM_CLASS strings. Needs to be populated later by data loader
# basemodel ----------------------
RESNET_NUM_BLOCK = [3, 4, 6, 3] # for resnet50
......
......@@ -243,7 +243,7 @@ def get_multilevel_rpn_anchor_input(im, boxes, is_crowd):
return multilevel_inputs
def get_train_dataflow(add_mask=False):
def get_train_dataflow():
"""
Return a training dataflow. Each datapoint consists of the following:
......@@ -260,7 +260,7 @@ def get_train_dataflow(add_mask=False):
"""
imgs = COCODetection.load_many(
config.BASEDIR, config.TRAIN_DATASET, add_gt=True, add_mask=add_mask)
config.BASEDIR, config.TRAIN_DATASET, add_gt=True, add_mask=config.MODE_MASK)
"""
To train on your own data, change this to your loader.
Produce "imgs" as a list of dict, in the dict the following keys are needed for training:
......@@ -291,7 +291,7 @@ def get_train_dataflow(add_mask=False):
assert im is not None, fname
im = im.astype('float32')
# assume floatbox as input
assert boxes.dtype == np.float32
assert boxes.dtype == np.float32, "Loader has to return floating point boxes!"
# augmentation:
im, params = aug.augment_return_params(im)
......@@ -319,9 +319,8 @@ def get_train_dataflow(add_mask=False):
return None
ret = [im] + list(anchor_inputs) + [boxes, klass]
# TODO pad im when FPN
if add_mask:
if config.MODE_MASK:
# augmentation will modify the polys in-place
segmentation = copy.deepcopy(img['segmentation'])
segmentation = [segmentation[k] for k in range(len(segmentation)) if not is_crowd[k]]
......@@ -365,7 +364,7 @@ if __name__ == '__main__':
import os
from tensorpack.dataflow import PrintData
config.BASEDIR = os.path.expanduser('~/data/coco')
ds = get_train_dataflow(add_mask=config.MODE_MASK)
ds = get_train_dataflow()
ds = PrintData(ds, 100)
TestDataSpeed(ds, 50000).start()
ds.reset_state()
......
......@@ -639,7 +639,7 @@ def fpn_map_rois_to_levels(boxes):
Assign boxes to level 2~5.
Args:
boxes (nx4)
boxes (nx4):
Returns:
[tf.Tensor]: 4 tensors for level 2-5. Each tensor is a vector of indices of boxes in its level.
......
......@@ -519,7 +519,7 @@ class EvalCallback(Callback):
def _before_train(self):
EVAL_TIMES = 5 # eval 5 times during training
interval = self.trainer.max_epoch // (EVAL_TIMES + 1)
self.epochs_to_eval = set([interval * k for k in range(1, EVAL_TIMES)])
self.epochs_to_eval = set([interval * k for k in range(1, EVAL_TIMES + 1)])
self.epochs_to_eval.add(self.trainer.max_epoch)
def _eval(self):
......@@ -600,19 +600,20 @@ if __name__ == '__main__':
cfg = TrainConfig(
model=get_model(),
data=QueueInput(get_train_dataflow(add_mask=config.MODE_MASK)),
data=QueueInput(get_train_dataflow()),
callbacks=[
PeriodicCallback(
ModelSaver(max_to_keep=10, keep_checkpoint_every_n_hours=1),
every_k_epochs=20),
SessionRunTimeout(60000), # 1 minute timeout
# linear warmup
ScheduledHyperParamSetter(
'learning_rate', warmup_schedule, interp='linear', step_based=True),
ScheduledHyperParamSetter('learning_rate', lr_schedule),
EvalCallback(),
GPUUtilizationTracker(),
PeakMemoryTracker(),
EstimatedTimeLeft(),
SessionRunTimeout(60000), # 1 minute timeout
],
steps_per_epoch=stepnum,
max_epoch=config.LR_SCHEDULE[-1] * factor // stepnum,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment