Commit 883ef79f authored by Yuxin Wu's avatar Yuxin Wu

update docs

parent 87059de5
...@@ -50,3 +50,14 @@ Speed: ...@@ -50,3 +50,14 @@ Speed:
3. This implementation is about 14% slower than detectron, 3. This implementation is about 14% slower than detectron,
probably due to the lack of specialized ops (e.g. AffineChannel, ROIAlign) in TensorFlow. probably due to the lack of specialized ops (e.g. AffineChannel, ROIAlign) in TensorFlow.
It's certainly faster than other TF implementation. It's certainly faster than other TF implementation.
Possible Future Enhancements:
1. Data-parallel evaluation during training.
2. Define an interface to load custom dataset.
3. Support batch>1 per GPU.
4. Use dedicated ops to improve speed. (e.g. a TF implementation of ROIAlign op
can be found in [light-head RCNN](https://github.com/zengarden/light_head_rcnn/tree/master/lib/lib_kernel))
...@@ -70,7 +70,7 @@ MaskRCNN results contain both bbox and segm mAP. ...@@ -70,7 +70,7 @@ MaskRCNN results contain both bbox and segm mAP.
The two R50-C4 360k models have the same configuration __and mAP__ The two R50-C4 360k models have the same configuration __and mAP__
as the `R50-C4-2x` entries in as the `R50-C4-2x` entries in
[Detectron Model Zoo](https://github.com/facebookresearch/Detectron/blob/master/MODEL_ZOO.md#end-to-end-faster--mask-r-cnn-baselines). [Detectron Model Zoo](https://github.com/facebookresearch/Detectron/blob/master/MODEL_ZOO.md#end-to-end-faster--mask-r-cnn-baselines).
So far this is the only TensorFlow implementation that can reproduce mAP in Detectron. So far this is the only public TensorFlow implementation that can reproduce mAP in Detectron.
The other models listed here do not correspond to any configurations in Detectron. The other models listed here do not correspond to any configurations in Detectron.
## Notes ## Notes
......
...@@ -121,13 +121,11 @@ def resnet_fpn_backbone(image, num_blocks, freeze_c2=True): ...@@ -121,13 +121,11 @@ def resnet_fpn_backbone(image, num_blocks, freeze_c2=True):
mult = config.FPN_RESOLUTION_REQUIREMENT * 1. mult = config.FPN_RESOLUTION_REQUIREMENT * 1.
new_shape2d = tf.to_int32(tf.ceil(tf.to_float(shape2d) / mult) * mult) new_shape2d = tf.to_int32(tf.ceil(tf.to_float(shape2d) / mult) * mult)
pad_shape2d = new_shape2d - shape2d pad_shape2d = new_shape2d - shape2d
assert len(num_blocks) == 4 assert len(num_blocks) == 4, num_blocks
# TODO pad 1 at each stage
with resnet_argscope(): with resnet_argscope():
chan = image.shape[1] chan = image.shape[1]
l = tf.pad(image, l = tf.pad(image, tf.stack(
tf.stack([[0, 0], [0, 0], [[0, 0], [0, 0], [2, 3 + pad_shape2d[0]], [2, 3 + pad_shape2d[1]]]))
[2, 3 + pad_shape2d[0]], [2, 3 + pad_shape2d[1]]]))
l.set_shape([None, chan, None, None]) l.set_shape([None, chan, None, None])
l = Conv2D('conv0', l, 64, 7, strides=2, activation=BNReLU, padding='VALID') l = Conv2D('conv0', l, 64, 7, strides=2, activation=BNReLU, padding='VALID')
l = tf.pad(l, [[0, 0], [0, 0], [0, 1], [0, 1]]) l = tf.pad(l, [[0, 0], [0, 0], [0, 1], [0, 1]])
......
...@@ -9,10 +9,10 @@ MODE_FPN = False ...@@ -9,10 +9,10 @@ MODE_FPN = False
# dataset ----------------------- # dataset -----------------------
BASEDIR = '/path/to/your/COCO/DIR' BASEDIR = '/path/to/your/COCO/DIR'
TRAIN_DATASET = ['train2014', 'valminusminival2014'] TRAIN_DATASET = ['train2014', 'valminusminival2014'] # i.e., trainval35k
VAL_DATASET = 'minival2014' # only support evaluation on single dataset VAL_DATASET = 'minival2014' # For now, only support evaluation on single dataset
NUM_CLASS = 81 NUM_CLASS = 81 # 1 background + 80 categories
CLASS_NAMES = [] # NUM_CLASS strings. Will be populated later by coco loader CLASS_NAMES = [] # NUM_CLASS strings. Needs to be populated later by data loader
# basemodel ---------------------- # basemodel ----------------------
RESNET_NUM_BLOCK = [3, 4, 6, 3] # for resnet50 RESNET_NUM_BLOCK = [3, 4, 6, 3] # for resnet50
......
...@@ -243,7 +243,7 @@ def get_multilevel_rpn_anchor_input(im, boxes, is_crowd): ...@@ -243,7 +243,7 @@ def get_multilevel_rpn_anchor_input(im, boxes, is_crowd):
return multilevel_inputs return multilevel_inputs
def get_train_dataflow(add_mask=False): def get_train_dataflow():
""" """
Return a training dataflow. Each datapoint consists of the following: Return a training dataflow. Each datapoint consists of the following:
...@@ -260,7 +260,7 @@ def get_train_dataflow(add_mask=False): ...@@ -260,7 +260,7 @@ def get_train_dataflow(add_mask=False):
""" """
imgs = COCODetection.load_many( imgs = COCODetection.load_many(
config.BASEDIR, config.TRAIN_DATASET, add_gt=True, add_mask=add_mask) config.BASEDIR, config.TRAIN_DATASET, add_gt=True, add_mask=config.MODE_MASK)
""" """
To train on your own data, change this to your loader. To train on your own data, change this to your loader.
Produce "imgs" as a list of dict, in the dict the following keys are needed for training: Produce "imgs" as a list of dict, in the dict the following keys are needed for training:
...@@ -291,7 +291,7 @@ def get_train_dataflow(add_mask=False): ...@@ -291,7 +291,7 @@ def get_train_dataflow(add_mask=False):
assert im is not None, fname assert im is not None, fname
im = im.astype('float32') im = im.astype('float32')
# assume floatbox as input # assume floatbox as input
assert boxes.dtype == np.float32 assert boxes.dtype == np.float32, "Loader has to return floating point boxes!"
# augmentation: # augmentation:
im, params = aug.augment_return_params(im) im, params = aug.augment_return_params(im)
...@@ -319,9 +319,8 @@ def get_train_dataflow(add_mask=False): ...@@ -319,9 +319,8 @@ def get_train_dataflow(add_mask=False):
return None return None
ret = [im] + list(anchor_inputs) + [boxes, klass] ret = [im] + list(anchor_inputs) + [boxes, klass]
# TODO pad im when FPN
if add_mask: if config.MODE_MASK:
# augmentation will modify the polys in-place # augmentation will modify the polys in-place
segmentation = copy.deepcopy(img['segmentation']) segmentation = copy.deepcopy(img['segmentation'])
segmentation = [segmentation[k] for k in range(len(segmentation)) if not is_crowd[k]] segmentation = [segmentation[k] for k in range(len(segmentation)) if not is_crowd[k]]
...@@ -365,7 +364,7 @@ if __name__ == '__main__': ...@@ -365,7 +364,7 @@ if __name__ == '__main__':
import os import os
from tensorpack.dataflow import PrintData from tensorpack.dataflow import PrintData
config.BASEDIR = os.path.expanduser('~/data/coco') config.BASEDIR = os.path.expanduser('~/data/coco')
ds = get_train_dataflow(add_mask=config.MODE_MASK) ds = get_train_dataflow()
ds = PrintData(ds, 100) ds = PrintData(ds, 100)
TestDataSpeed(ds, 50000).start() TestDataSpeed(ds, 50000).start()
ds.reset_state() ds.reset_state()
......
...@@ -639,7 +639,7 @@ def fpn_map_rois_to_levels(boxes): ...@@ -639,7 +639,7 @@ def fpn_map_rois_to_levels(boxes):
Assign boxes to level 2~5. Assign boxes to level 2~5.
Args: Args:
boxes (nx4) boxes (nx4):
Returns: Returns:
[tf.Tensor]: 4 tensors for level 2-5. Each tensor is a vector of indices of boxes in its level. [tf.Tensor]: 4 tensors for level 2-5. Each tensor is a vector of indices of boxes in its level.
......
...@@ -519,7 +519,7 @@ class EvalCallback(Callback): ...@@ -519,7 +519,7 @@ class EvalCallback(Callback):
def _before_train(self): def _before_train(self):
EVAL_TIMES = 5 # eval 5 times during training EVAL_TIMES = 5 # eval 5 times during training
interval = self.trainer.max_epoch // (EVAL_TIMES + 1) interval = self.trainer.max_epoch // (EVAL_TIMES + 1)
self.epochs_to_eval = set([interval * k for k in range(1, EVAL_TIMES)]) self.epochs_to_eval = set([interval * k for k in range(1, EVAL_TIMES + 1)])
self.epochs_to_eval.add(self.trainer.max_epoch) self.epochs_to_eval.add(self.trainer.max_epoch)
def _eval(self): def _eval(self):
...@@ -600,19 +600,20 @@ if __name__ == '__main__': ...@@ -600,19 +600,20 @@ if __name__ == '__main__':
cfg = TrainConfig( cfg = TrainConfig(
model=get_model(), model=get_model(),
data=QueueInput(get_train_dataflow(add_mask=config.MODE_MASK)), data=QueueInput(get_train_dataflow()),
callbacks=[ callbacks=[
PeriodicCallback( PeriodicCallback(
ModelSaver(max_to_keep=10, keep_checkpoint_every_n_hours=1), ModelSaver(max_to_keep=10, keep_checkpoint_every_n_hours=1),
every_k_epochs=20), every_k_epochs=20),
SessionRunTimeout(60000), # 1 minute timeout
# linear warmup # linear warmup
ScheduledHyperParamSetter( ScheduledHyperParamSetter(
'learning_rate', warmup_schedule, interp='linear', step_based=True), 'learning_rate', warmup_schedule, interp='linear', step_based=True),
ScheduledHyperParamSetter('learning_rate', lr_schedule), ScheduledHyperParamSetter('learning_rate', lr_schedule),
EvalCallback(), EvalCallback(),
GPUUtilizationTracker(), GPUUtilizationTracker(),
PeakMemoryTracker(),
EstimatedTimeLeft(), EstimatedTimeLeft(),
SessionRunTimeout(60000), # 1 minute timeout
], ],
steps_per_epoch=stepnum, steps_per_epoch=stepnum,
max_epoch=config.LR_SCHEDULE[-1] * factor // stepnum, max_epoch=config.LR_SCHEDULE[-1] * factor // stepnum,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment