Commit 81a1cb92 authored by Yuxin Wu's avatar Yuxin Wu

Add balloon demo

parent ab229670
## Balloon Demo
This is a demo on how to train tensorpack's Mask R-CNN on a custom dataset.
We use the [balloon dataset](https://github.com/matterport/Mask_RCNN/tree/master/samples/balloon)
as an example.
1. Download and unzip the dataset:
```
wget https://github.com/matterport/Mask_RCNN/releases/download/v2.1/balloon_dataset.zip
unzip balloon_dataset.zip
```
2. (included already) Since this dataset is not in COCO format, we add a new file
[dataset/balloon.py](dataset/balloon.py) to load the dataset.
Refer to [dataset/dataset.py](dataset/dataset.py) on the required interface of a new dataset.
3. (included already) Register the names of the new dataset in `train.py` and `predict.py`, by calling `register_balloon("/path/to/balloon_dataset")`
4. Download a model pretrained on COCO from tensorpack model zoo:
```
wget http://models.tensorpack.com/FasterRCNN/COCO-MaskRCNN-R50FPN2x.npz
```
5. Start fine-tuning on the new dataset:
```
./train.py --config DATA.BASEDIR=~/data/balloon MODE_FPN=True \
"DATA.VAL=('balloon_val',)" "DATA.TRAIN=('balloon_train',)" \
TRAIN.BASE_LR=1e-3 TRAIN.EVAL_PERIOD=0 "TRAIN.LR_SCHEDULE=[1000]" \
"PREPROC.TRAIN_SHORT_EDGE_SIZE=[600,1200]" TRAIN.CHECKPOINT_PERIOD=1 \
--load COCO-MaskRCNN-R50FPN2x.npz --logdir train_log/balloon
```
6. You can train as long as you want, but it only takes __a few minutes__ to produce nice results.
You can visualize the results of the latest model by:
```
./predict.py --config DATA.BASEDIR=~/data/balloon MODE_FPN=True \
"DATA.VAL=('balloon_val',)" "DATA.TRAIN=('balloon_train',)" \
--load train_log/balloon/checkpoint --predict ~/data/balloon/val/*.jpg
```
This command will produce images like this in your window:
![demo](https://user-images.githubusercontent.com/1381301/62665002-915ff880-b932-11e9-9f7e-f24f83d5d69c.jpg)
......@@ -31,6 +31,8 @@ Data:
In this class you'll implement the logic to load your dataset and evaluate predictions.
The documentation is in the docstring of `DatasetSplit.
See [BALLOON.md](BALLOON.md) for an example of fine-tuning on a different dataset.
1. If you load a COCO-trained model on a different dataset, you may see error messages
complaining about unmatched number of categories for certain weights in the checkpoint.
You can either remove those weights in checkpoint, or rename them in the model.
......
......@@ -121,6 +121,6 @@ Performance in [Detectron](https://github.com/facebookresearch/Detectron/) can b
Note that our training strategy is slightly different: we enable cascade throughout the entire training.
As far as I know, this model is the __best open source TF model__ on COCO dataset.
## Notes
## Other Datasets / Implementation Details / Speed:
[NOTES.md](NOTES.md) has some notes about implementation details & speed.
See [BALLOON.md](BALLOON.md) and [NOTES.md](NOTES.md) for more details.
......@@ -141,6 +141,7 @@ _C.TRAIN.STARTING_EPOCH = 1 # the first epoch to start with, useful to continue
_C.TRAIN.LR_SCHEDULE = "1x" # "1x" schedule in detectron
_C.TRAIN.EVAL_PERIOD = 25 # period (epochs) to run evaluation
_C.TRAIN.CHECKPOINT_PERIOD = 20 # period (epochs) to save model
# preprocessing --------------------
# Alternative old (worse & faster) setting: 600
......
......@@ -49,8 +49,8 @@ def print_class_histogram(roidbs):
gt_inds = np.where((entry["class"] > 0) & (entry["is_crowd"] == 0))[0]
gt_classes = entry["class"][gt_inds]
gt_hist += np.histogram(gt_classes, bins=hist_bins)[0]
COL = 6
data = list(itertools.chain(*[[class_names[i + 1], v] for i, v in enumerate(gt_hist[1:])]))
COL = max(6, len(data))
total_instances = sum(data[1::2])
data.extend([None] * (COL - len(data) % COL))
data.extend(["total", total_instances])
......
from .dataset import *
from .coco import *
from .balloon import *
import os
import numpy as np
import json
from dataset import DatasetSplit, DatasetRegistry
__all__ = ["register_balloon"]
class BalloonDemo(DatasetSplit):
def __init__(self, base_dir, split):
assert split in ["train", "val"]
base_dir = os.path.expanduser(base_dir)
self.imgdir = os.path.join(base_dir, split)
assert os.path.isdir(self.imgdir), self.imgdir
def training_roidbs(self):
json_file = os.path.join(self.imgdir, "via_region_data.json")
with open(json_file) as f:
obj = json.load(f)
ret = []
for _, v in obj.items():
fname = v["filename"]
fname = os.path.join(self.imgdir, fname)
roidb = {"file_name": fname}
annos = v["regions"]
boxes = []
segs = []
for _, anno in annos.items():
assert not anno["region_attributes"]
anno = anno["shape_attributes"]
px = anno["all_points_x"]
py = anno["all_points_y"]
poly = np.stack((px, py), axis=1) + 0.5
maxxy = poly.max(axis=0)
minxy = poly.min(axis=0)
boxes.append([minxy[0], minxy[1], maxxy[0], maxxy[1]])
segs.append([poly])
N = len(annos)
roidb["boxes"] = np.asarray(boxes, dtype=np.float32)
roidb["segmentation"] = segs
roidb["class"] = np.ones((N, ), dtype=np.int32)
roidb["is_crowd"] = np.zeros((N, ), dtype=np.int8)
ret.append(roidb)
return ret
def register_balloon(basedir):
for split in ["train", "val"]:
name = "balloon_" + split
DatasetRegistry.register(name, lambda x=split: BalloonDemo(basedir, x))
DatasetRegistry.register_metadata(name, "class_names", ["BG", "balloon"])
if __name__ == '__main__':
basedir = '~/data/balloon'
roidbs = BalloonDemo(basedir, "train").training_roidbs()
print("#images:", len(roidbs))
from viz import draw_annotation
from tensorpack.utils.viz import interactive_imshow as imshow
import cv2
for r in roidbs:
im = cv2.imread(r["file_name"])
vis = draw_annotation(im, r["boxes"], r["class"], r["segmentation"])
imshow(vis)
......@@ -18,13 +18,15 @@ from tensorpack.tfutils import get_model_loader, get_tf_version_tuple
from tensorpack.tfutils.export import ModelExporter
from tensorpack.utils import fs, logger
from dataset import DatasetRegistry, register_coco
from dataset import DatasetRegistry, register_coco, register_balloon
from config import config as cfg
from config import finalize_configs
from data import get_eval_dataflow, get_train_dataflow
from eval import DetectionResult, multithread_predict_dataflow, predict_image
from modeling.generalized_rcnn import ResNetC4Model, ResNetFPNModel
from viz import draw_annotation, draw_final_outputs, draw_predictions, draw_proposal_recall
from viz import (
draw_annotation, draw_final_outputs, draw_predictions,
draw_proposal_recall, draw_final_outputs_blackwhite)
def do_visualize(model, model_path, nr_visualize=100, output_dir='output'):
......@@ -97,6 +99,9 @@ def do_evaluate(pred_config, output_file):
def do_predict(pred_func, input_file):
img = cv2.imread(input_file, cv2.IMREAD_COLOR)
results = predict_image(img, pred_func)
if cfg.MODE_MASK:
final = draw_final_outputs_blackwhite(img, results)
else:
final = draw_final_outputs(img, results)
viz = np.concatenate((img, final), axis=1)
cv2.imwrite("output.png", viz)
......@@ -122,6 +127,8 @@ if __name__ == '__main__':
if args.config:
cfg.update_args(args.config)
register_coco(cfg.DATA.BASEDIR) # add COCO datasets to the registry
register_balloon(cfg.DATA.BASEDIR)
MODEL = ResNetFPNModel() if cfg.MODE_FPN else ResNetC4Model()
if not tf.test.is_gpu_available():
......
......@@ -10,7 +10,7 @@ from tensorpack import *
from tensorpack.tfutils import collect_env_info
from tensorpack.tfutils.common import get_tf_version_tuple
from dataset import register_coco
from dataset import register_coco, register_balloon
from config import config as cfg
from config import finalize_configs
from data import get_train_dataflow
......@@ -43,6 +43,7 @@ if __name__ == '__main__':
if args.config:
cfg.update_args(args.config)
register_coco(cfg.DATA.BASEDIR) # add COCO datasets to the registry
register_balloon(cfg.DATA.BASEDIR) # add the demo balloon datasets to the registry
# Setup logger ...
is_horovod = cfg.TRAINER == 'horovod'
......@@ -82,7 +83,7 @@ if __name__ == '__main__':
callbacks = [
PeriodicCallback(
ModelSaver(max_to_keep=10, keep_checkpoint_every_n_hours=1),
every_k_epochs=20),
every_k_epochs=cfg.TRAIN.CHECKPOINT_PERIOD),
# linear warmup
ScheduledHyperParamSetter(
'learning_rate', warmup_schedule, interp='linear', step_based=True),
......@@ -105,7 +106,8 @@ if __name__ == '__main__':
session_init = None
else:
if args.load:
session_init = get_model_loader(args.load)
# ignore mismatched values, so you can `--load` a model for fine-tuning
session_init = get_model_loader(args.load, ignore_mismatch=True)
else:
session_init = get_model_loader(cfg.BACKBONE.WEIGHTS) if cfg.BACKBONE.WEIGHTS else None
......
......@@ -97,6 +97,39 @@ def draw_final_outputs(img, results):
return ret
def draw_final_outputs_blackwhite(img, results):
"""
Args:
results: [DetectionResult]
"""
if len(results) == 0:
return img
# Display in largest to smallest order to reduce occlusion
boxes = np.asarray([r.box for r in results])
areas = np_area(boxes)
sorted_inds = np.argsort(-areas)
img_bw = img.mean(axis=2)
img_bw = np.stack([img_bw] * 3, axis=2)
tags = []
all_masks = [results[rid].mask for rid in sorted_inds]
if all_masks[0] is not None:
m = all_masks[0] > 0
for m2 in all_masks[1:]:
m = m | (m2 > 0)
print(m, m.sum())
img_bw[m] = img[m]
for r in results:
tags.append(
"{},{:.2f}".format(cfg.DATA.CLASS_NAMES[r.class_id], r.score))
ret = viz.draw_boxes(img_bw, boxes, tags)
return ret
def draw_mask(im, mask, alpha=0.5, color=None):
"""
Overlay a mask on top of the image.
......
......@@ -172,14 +172,18 @@ class SaverRestoreRelaxed(SaverRestore):
logger.info(
"Restoring checkpoint from {} ...".format(self.path))
matched_pairs = []
def f(reader, name, v):
val = reader.get_tensor(name)
val = SessionUpdate.relaxed_value_for_var(val, v, ignore_mismatch=True)
if val is not None:
v.load(val)
matched_pairs.append((v, val))
with sess.as_default():
self._match_vars(f)
upd = SessionUpdate(sess, [x[0] for x in matched_pairs])
upd.update({x[0].name: x[1] for x in matched_pairs})
class DictRestore(SessionInit):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment