Commit 271ffad1 authored by Yuxin Wu's avatar Yuxin Wu

[FasterRCNN] add notes about using different dataset (#632)

parent 5a9d1362
......@@ -16,6 +16,7 @@ This is a minimal implementation that simply contains these files:
Data:
1. It's easy to train on your own data. Just replace `COCODetection.load_many` in `data.py` by your own loader.
Also remember to change `config.NUM_CLASS` and `config.CLASS_NAMES`.
2. You can easily add more augmentations such as rotation, but be careful how a box should be
augmented. The code now will always use the minimal axis-aligned bounding box of the 4 corners,
......
......@@ -82,10 +82,12 @@ class COCODetection(object):
Args:
add_gt: whether to add ground truth bounding box annotations to the dicts
add_mask: whether to also add ground truth mask
Returns:
a list of dict, each has keys including:
height, width, id, file_name,
and (if add_gt is True) boxes, class, is_crowd
'height', 'width', 'id', 'file_name',
and (if add_gt is True) 'boxes', 'class', 'is_crowd', and optionally
'segmentation'.
"""
if add_mask:
assert add_gt
......@@ -112,6 +114,7 @@ class COCODetection(object):
def _add_detection_gt(self, img, add_mask):
"""
Add 'boxes', 'class', 'is_crowd' of this image to the dict, used by detection.
If add_mask is True, also add 'segmentation' in coco poly format.
"""
ann_ids = self.coco.getAnnIds(imgIds=img['id'], iscrowd=None)
objs = self.coco.loadAnns(ann_ids)
......@@ -184,6 +187,8 @@ class COCODetection(object):
def load_many(basedir, names, add_gt=True, add_mask=False):
"""
Load and merges several instance files together.
Returns the same format as :meth:`COCODetection.load`.
"""
if not isinstance(names, (list, tuple)):
names = [names]
......
......@@ -191,8 +191,24 @@ def get_train_dataflow(add_mask=False):
Return a training dataflow. Each datapoint is:
image, fm_labels, fm_boxes, gt_boxes, gt_class [, masks]
"""
imgs = COCODetection.load_many(
config.BASEDIR, config.TRAIN_DATASET, add_gt=True, add_mask=add_mask)
"""
To train on your own data, change this to your loader.
Produce "igms" as a list of dict, in the dict the following keys are needed for training:
height, width: integer
file_name: str
boxes: kx4 floats
class: k integers
is_crowd: k booleans. Use k False if you don't know what it means.
segmentation: k numpy arrays. Each array is a polygon of shape Nx2.
If your segmentation annotations are masks rather than polygons,
either convert it, or the augmentation code below will need to be
changed or skipped accordingly.
"""
# Valid training images should have at least one fg box.
# But this filter shall not be applied for testing.
imgs = list(filter(lambda img: len(img['boxes']) > 0, imgs)) # log invalid training
......@@ -236,7 +252,8 @@ def get_train_dataflow(add_mask=False):
segmentation = [segmentation[k] for k in range(len(segmentation)) if not is_crowd[k]]
assert len(segmentation) == len(boxes)
# one image-sized binary mask per box
# Apply augmentation on polygon coordinates.
# And produce one image-sized binary mask per box.
masks = []
for polys in segmentation:
polys = [aug.augment_coords(p, params) for p in polys]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment