Commit 9b1d1095 authored by Yuxin Wu's avatar Yuxin Wu

[MaskRCNN] use attrdict for config

parent 4f1efe74
......@@ -9,20 +9,20 @@ from tensorpack.tfutils.varreplace import custom_getter_scope
from tensorpack.models import (
Conv2D, MaxPooling, BatchNorm, BNReLU)
import config
from config import config as cfg
def maybe_freeze_affine(getter, *args, **kwargs):
# custom getter to freeze affine params inside bn
name = args[0] if len(args) else kwargs.get('name')
if name.endswith('/gamma') or name.endswith('/beta'):
if config.FREEZE_AFFINE:
if cfg.BACKBONE.FREEZE_AFFINE:
kwargs['trainable'] = False
return getter(*args, **kwargs)
def maybe_reverse_pad(topleft, bottomright):
if config.TF_PAD_MODE:
if cfg.BACKBONE.TF_PAD_MODE:
return [topleft, bottomright]
return [bottomright, topleft]
......@@ -65,7 +65,7 @@ def resnet_shortcut(l, n_out, stride, activation=tf.identity):
n_in = l.get_shape().as_list()[1 if data_format in ['NCHW', 'channels_first'] else 3]
if n_in != n_out: # change dimension when channel is not the same
# TF's SAME mode output ceil(x/stride), which is NOT what we want when x is odd and stride is 2
if not config.MODE_FPN and stride == 2:
if not cfg.MODE_FPN and stride == 2:
l = l[:, :, :-1, :-1]
return Conv2D('convshortcut', l, n_out, 1,
strides=stride, padding='VALID', activation=activation)
......@@ -124,7 +124,7 @@ def resnet_conv5(image, num_block):
def resnet_fpn_backbone(image, num_blocks, freeze_c2=True):
shape2d = tf.shape(image)[2:]
mult = float(config.FPN_RESOLUTION_REQUIREMENT)
mult = float(cfg.FPN.RESOLUTION_REQUIREMENT)
new_shape2d = tf.to_int32(tf.ceil(tf.to_float(shape2d) / mult) * mult)
pad_shape2d = new_shape2d - shape2d
assert len(num_blocks) == 4, num_blocks
......
......@@ -12,13 +12,13 @@ from tensorpack.utils.timer import timed_operation
from tensorpack.utils.argtools import log_once
from pycocotools.coco import COCO
import config
from config import config as cfg
__all__ = ['COCODetection', 'COCOMeta']
COCO_NUM_CATEGORY = 80
config.NUM_CLASS = COCO_NUM_CATEGORY + 1
cfg.DATA.NUM_CLASS = COCO_NUM_CATEGORY + 1
class _COCOMeta(object):
......@@ -48,7 +48,7 @@ class _COCOMeta(object):
v: i + 1 for i, v in enumerate(cat_ids)}
self.class_id_to_category_id = {
v: k for k, v in self.category_id_to_class_id.items()}
config.CLASS_NAMES = self.class_names
cfg.DATA.CLASS_NAMES = self.class_names
COCOMeta = _COCOMeta()
......@@ -200,7 +200,7 @@ class COCODetection(object):
if __name__ == '__main__':
c = COCODetection(config.BASEDIR, 'train2014')
c = COCODetection(cfg.DATA.BASEDIR, 'train2014')
gt_boxes = c.load(add_gt=True, add_mask=True)
print("#Images:", len(gt_boxes))
c.print_class_histogram(gt_boxes)
......@@ -2,17 +2,13 @@
# File: common.py
import numpy as np
import six
import cv2
from tensorpack.dataflow import RNGDataFlow
from tensorpack.dataflow.imgaug import transform
from tensorpack.utils import logger
import pycocotools.mask as cocomask
import config
class DataFromListOfDict(RNGDataFlow):
def __init__(self, lst, keys, shuffle=False):
......@@ -138,21 +134,3 @@ def filter_boxes_inside_shape(boxes, shape):
(boxes[:, 2] <= w) &
(boxes[:, 3] <= h))[0]
return indices, boxes[indices, :]
def write_config_from_args(configs):
for cfg in configs:
k, v = cfg.split('=', maxsplit=1)
assert k in dir(config), "Unknown config key: {}".format(k)
oldv = getattr(config, k)
if not isinstance(oldv, six.text_type):
v = eval(v)
setattr(config, k, v)
def print_config():
logger.info("Config: ------------------------------------------")
for k in dir(config):
if k == k.upper():
logger.info("{} = {}".format(k, getattr(config, k)))
logger.info("--------------------------------------------------")
# -*- coding: utf-8 -*-
# File: config.py
import numpy as np
import pprint
__all__ = ['config']
class AttrDict():
def __getattr__(self, name):
ret = AttrDict()
setattr(self, name, ret)
return ret
def __str__(self):
return pprint.pformat(self.to_dict(), indent=1)
__repr__ = __str__
def to_dict(self):
"""Convert to a nested dict. """
return {k: v.to_dict() if isinstance(v, AttrDict) else v
for k, v in self.__dict__.items()}
def update_args(self, args):
"""Update from command line args. """
for cfg in args:
keys, v = cfg.split('=', maxsplit=1)
keylist = keys.split('.')
dic = self
for i, k in enumerate(keylist[:-1]):
assert k in dir(dic), "Unknown config key: {}".format(keys)
dic = getattr(dic, k)
key = keylist[-1]
oldv = getattr(dic, key)
if not isinstance(oldv, str):
v = eval(v)
setattr(dic, key, v)
config = AttrDict()
_C = config # short alias to avoid coding
# mode flags ---------------------
TRAINER = 'replicated' # options: 'horovod', 'replicated'
NUM_GPUS = None # by default, will be set from code
MODE_MASK = True
MODE_FPN = False
_C.TRAINER = 'replicated' # options: 'horovod', 'replicated'
_C.MODE_MASK = True
_C.MODE_FPN = False
# dataset -----------------------
BASEDIR = '/path/to/your/COCO/DIR'
TRAIN_DATASET = ['train2014', 'valminusminival2014'] # i.e., trainval35k
VAL_DATASET = 'minival2014' # For now, only support evaluation on single dataset
NUM_CLASS = 81 # 1 background + 80 categories
CLASS_NAMES = [] # NUM_CLASS strings. Needs to be populated later by data loader
_C.DATA.BASEDIR = '/path/to/your/COCO/DIR'
_C.DATA.TRAIN = ['train2014', 'valminusminival2014'] # i.e., trainval35k
_C.DATA.VAL = 'minival2014' # For now, only support evaluation on single dataset
_C.DATA.NUM_CLASS = 81 # 1 background + 80 categories
_C.DATA.CLASS_NAMES = [] # NUM_CLASS strings. Needs to be populated later by data loader
# basemodel ----------------------
RESNET_NUM_BLOCK = [3, 4, 6, 3] # for resnet50
_C.BACKBONE.RESNET_NUM_BLOCK = [3, 4, 6, 3] # for resnet50
# RESNET_NUM_BLOCK = [3, 4, 23, 3] # for resnet101
FREEZE_AFFINE = False # do not train affine parameters inside BN
_C.BACKBONE.FREEZE_AFFINE = False # do not train affine parameters inside BN
# Use a base model with TF-preferred pad mode,
# which may pad more pixels on right/bottom than top/left.
# TF_PAD_MODE=False is better for performance but will require a different base model.
# See https://github.com/tensorflow/tensorflow/issues/18213
TF_PAD_MODE = True
_C.BACKBONE.TF_PAD_MODE = True
# schedule -----------------------
BASE_LR = 1e-2
WARMUP = 1000 # in steps
STEPS_PER_EPOCH = 500
# The schedule and learning rate here is defined for a total batch size of 8.
# If not running with 8 GPUs, they will be adjusted automatically in code.
_C.TRAIN.NUM_GPUS = None # by default, will be set from code
_C.TRAIN.WEIGHT_DECAY = 1e-4
_C.TRAIN.BASE_LR = 1e-2
_C.TRAIN.WARMUP = 1000 # in steps
_C.TRAIN.STEPS_PER_EPOCH = 500
# LR_SCHEDULE = [120000, 160000, 180000] # "1x" schedule in detectron
# LR_SCHEDULE = [150000, 230000, 280000] # roughly a "1.5x" schedule
LR_SCHEDULE = [240000, 320000, 360000] # "2x" schedule in detectron
_C.TRAIN.LR_SCHEDULE = [240000, 320000, 360000] # "2x" schedule in detectron
# image resolution --------------------
SHORT_EDGE_SIZE = 800
MAX_SIZE = 1333
# Alternative (worse & faster) setting: 600, 1024
# preprocessing --------------------
_C.PREPROC.SHORT_EDGE_SIZE = 800
_C.PREPROC.MAX_SIZE = 1333
# Alternative old (worse & faster) setting: 600, 1024
# anchors -------------------------
ANCHOR_STRIDE = 16
ANCHOR_STRIDES_FPN = (4, 8, 16, 32, 64) # strides for each FPN level. Must be the same length as ANCHOR_SIZES
FPN_RESOLUTION_REQUIREMENT = 32 # image size into the backbone has to be multiple of this number
ANCHOR_SIZES = (32, 64, 128, 256, 512) # sqrtarea of the anchor box
ANCHOR_RATIOS = (0.5, 1., 2.)
NUM_ANCHOR = len(ANCHOR_SIZES) * len(ANCHOR_RATIOS)
POSITIVE_ANCHOR_THRES = 0.7
NEGATIVE_ANCHOR_THRES = 0.3
BBOX_DECODE_CLIP = np.log(MAX_SIZE / 16.0) # to avoid too large numbers.
_C.RPN.ANCHOR_STRIDE = 16
_C.RPN.ANCHOR_SIZES = (32, 64, 128, 256, 512) # sqrtarea of the anchor box
_C.RPN.ANCHOR_RATIOS = (0.5, 1., 2.)
_C.RPN.NUM_ANCHOR = len(_C.RPN.ANCHOR_SIZES) * len(_C.RPN.ANCHOR_RATIOS)
_C.RPN.POSITIVE_ANCHOR_THRES = 0.7
_C.RPN.NEGATIVE_ANCHOR_THRES = 0.3
# rpn training -------------------------
RPN_FG_RATIO = 0.5 # fg ratio among selected RPN anchors
RPN_BATCH_PER_IM = 256 # total (across FPN levels) number of anchors that are marked valid
RPN_MIN_SIZE = 0
RPN_PROPOSAL_NMS_THRESH = 0.7
TRAIN_PRE_NMS_TOPK = 12000
TRAIN_POST_NMS_TOPK = 2000
TRAIN_FPN_NMS_TOPK = 2000
CROWD_OVERLAP_THRES = 0.7 # boxes overlapping crowd will be ignored.
_C.RPN.FG_RATIO = 0.5 # fg ratio among selected RPN anchors
_C.RPN.BATCH_PER_IM = 256 # total (across FPN levels) number of anchors that are marked valid
_C.RPN.MIN_SIZE = 0
_C.RPN.PROPOSAL_NMS_THRESH = 0.7
_C.RPN.TRAIN_PRE_NMS_TOPK = 12000
_C.RPN.TRAIN_POST_NMS_TOPK = 2000
_C.RPN.CROWD_OVERLAP_THRES = 0.7 # boxes overlapping crowd will be ignored.
# fastrcnn training ---------------------
FASTRCNN_BATCH_PER_IM = 512
FASTRCNN_BBOX_REG_WEIGHTS = [10., 10., 5., 5.] # Better but non-standard setting: [20, 20, 10, 10]
FASTRCNN_FG_THRESH = 0.5
FASTRCNN_FG_RATIO = 0.25 # fg ratio in a ROI batch
_C.FRCNN.BATCH_PER_IM = 512
_C.FRCNN.BBOX_REG_WEIGHTS = [10., 10., 5., 5.] # Better but non-standard setting: [20, 20, 10, 10]
_C.FRCNN.FG_THRESH = 0.5
_C.FRCNN.FG_RATIO = 0.25 # fg ratio in a ROI batch
# modeling -------------------------
FPN_NUM_CHANNEL = 256
# FPN -------------------------
_C.FPN.ANCHOR_STRIDES = (4, 8, 16, 32, 64) # strides for each FPN level. Must be the same length as ANCHOR_SIZES
_C.FPN.RESOLUTION_REQUIREMENT = 32 # image size into the backbone has to be multiple of this number
_C.FPN.NUM_CHANNEL = 256
# conv head and fc head are only used in FPN.
# For C4 models, the head is C5
FPN_FASTRCNN_HEAD_FUNC = 'fastrcnn_2fc_head' # choices: fastrcnn_2fc_head, fastrcnn_4conv1fc_head
FASTRCNN_CONV_HEAD_DIM = 256
FASTRCNN_FC_HEAD_DIM = 1024
MASKRCNN_HEAD_DIM = 256
_C.FPN.FRCNN_HEAD_FUNC = 'fastrcnn_2fc_head' # choices: fastrcnn_2fc_head, fastrcnn_4conv1fc_head
_C.FPN.FRCNN_CONV_HEAD_DIM = 256
_C.FPN.FRCNN_FC_HEAD_DIM = 1024
_C.RPN.TRAIN_FPN_NMS_TOPK = 2000
_C.RPN.TEST_FPN_NMS_TOPK = 1000
# Mask-RCNN
_C.MRCNN.HEAD_DIM = 256
# testing -----------------------
TEST_PRE_NMS_TOPK = 6000
TEST_POST_NMS_TOPK = 1000 # if you encounter OOM in inference, set this to a smaller number
TEST_FPN_NMS_TOPK = 1000
FASTRCNN_NMS_THRESH = 0.5
RESULT_SCORE_THRESH = 0.05
RESULT_SCORE_THRESH_VIS = 0.3 # only visualize confident results
RESULTS_PER_IM = 100
_C.RPN.TEST_PRE_NMS_TOPK = 6000
_C.RPN.TEST_POST_NMS_TOPK = 1000 # if you encounter OOM in inference, set this to a smaller number
_C.TEST.FRCNN_NMS_THRESH = 0.5
_C.TEST.RESULT_SCORE_THRESH = 0.05
_C.TEST.RESULT_SCORE_THRESH_VIS = 0.3 # only visualize confident results
_C.TEST.RESULTS_PER_IM = 100
......@@ -21,7 +21,7 @@ from utils.np_box_ops import area as np_area
from common import (
DataFromListOfDict, CustomResize, filter_boxes_inside_shape,
box_to_point8, point8_to_box, segmentation_to_mask)
import config
from config import config as cfg
class MalformedData(BaseException):
......@@ -30,8 +30,8 @@ class MalformedData(BaseException):
@memoized
def get_all_anchors(
stride=config.ANCHOR_STRIDE,
sizes=config.ANCHOR_SIZES):
stride=cfg.RPN.ANCHOR_STRIDE,
sizes=cfg.RPN.ANCHOR_SIZES):
"""
Get all anchors in the largest possible image, shifted, floatbox
Args:
......@@ -49,14 +49,14 @@ def get_all_anchors(
cell_anchors = generate_anchors(
stride,
scales=np.array(sizes, dtype=np.float) / stride,
ratios=np.array(config.ANCHOR_RATIOS, dtype=np.float))
ratios=np.array(cfg.RPN.ANCHOR_RATIOS, dtype=np.float))
# anchors are intbox here.
# anchors at featuremap [0,0] are centered at fpcoor (8,8) (half of stride)
max_size = config.MAX_SIZE
if config.MODE_FPN:
max_size = cfg.PREPROC.MAX_SIZE
if cfg.MODE_FPN:
# TODO setting this in config is perhaps better
size_mult = config.FPN_RESOLUTION_REQUIREMENT * 1.
size_mult = cfg.FPN.RESOLUTION_REQUIREMENT * 1.
max_size = np.ceil(max_size / size_mult) * size_mult
field_size = int(np.ceil(max_size / stride))
shifts = np.arange(0, field_size) * stride
......@@ -81,8 +81,8 @@ def get_all_anchors(
@memoized
def get_all_anchors_fpn(
strides=config.ANCHOR_STRIDES_FPN,
sizes=config.ANCHOR_SIZES):
strides=cfg.FPN.ANCHOR_STRIDES,
sizes=cfg.RPN.ANCHOR_SIZES):
"""
Returns:
[anchors]: each anchors is a SxSx NUM_ANCHOR_RATIOS x4 array.
......@@ -132,8 +132,8 @@ def get_anchor_labels(anchors, gt_boxes, crowd_boxes):
# the order of setting neg/pos labels matter
anchor_labels[anchors_with_max_iou_per_gt] = 1
anchor_labels[ious_max_per_anchor >= config.POSITIVE_ANCHOR_THRES] = 1
anchor_labels[ious_max_per_anchor < config.NEGATIVE_ANCHOR_THRES] = 0
anchor_labels[ious_max_per_anchor >= cfg.RPN.POSITIVE_ANCHOR_THRES] = 1
anchor_labels[ious_max_per_anchor < cfg.RPN.NEGATIVE_ANCHOR_THRES] = 0
# We can label all non-ignore candidate boxes which overlap crowd as ignore
# But detectron did not do this.
......@@ -141,11 +141,11 @@ def get_anchor_labels(anchors, gt_boxes, crowd_boxes):
# cand_inds = np.where(anchor_labels >= 0)[0]
# cand_anchors = anchors[cand_inds]
# ious = np_iou(cand_anchors, crowd_boxes)
# overlap_with_crowd = cand_inds[ious.max(axis=1) > config.CROWD_OVERLAP_THRES]
# overlap_with_crowd = cand_inds[ious.max(axis=1) > cfg.RPN.CROWD_OVERLAP_THRES]
# anchor_labels[overlap_with_crowd] = -1
# Subsample fg labels: ignore some fg if fg is too many
target_num_fg = int(config.RPN_BATCH_PER_IM * config.RPN_FG_RATIO)
target_num_fg = int(cfg.RPN.BATCH_PER_IM * cfg.RPN.FG_RATIO)
fg_inds = filter_box_label(anchor_labels, 1, target_num_fg)
# Keep an image even if there is no foreground anchors
# if len(fg_inds) == 0:
......@@ -156,14 +156,14 @@ def get_anchor_labels(anchors, gt_boxes, crowd_boxes):
if old_num_bg == 0:
# No valid bg in this image, skip.
raise MalformedData("No valid background for RPN!")
target_num_bg = config.RPN_BATCH_PER_IM - len(fg_inds)
target_num_bg = cfg.RPN.BATCH_PER_IM - len(fg_inds)
filter_box_label(anchor_labels, 0, target_num_bg) # ignore return values
# Set anchor boxes: the best gt_box for each fg anchor
anchor_boxes = np.zeros((NA, 4), dtype='float32')
fg_boxes = gt_boxes[ious_argmax_per_anchor[fg_inds], :]
anchor_boxes[fg_inds, :] = fg_boxes
# assert len(fg_inds) + np.sum(anchor_labels == 0) == config.RPN_BATCH_PER_IM
# assert len(fg_inds) + np.sum(anchor_labels == 0) == cfg.RPN.BATCH_PER_IM
return anchor_labels, anchor_boxes
......@@ -192,12 +192,12 @@ def get_rpn_anchor_input(im, boxes, is_crowd):
# Fill them back to original size: fHxfWx1, fHxfWx4
anchorH, anchorW = all_anchors.shape[:2]
featuremap_labels = -np.ones((anchorH * anchorW * config.NUM_ANCHOR, ), dtype='int32')
featuremap_labels = -np.ones((anchorH * anchorW * cfg.RPN.NUM_ANCHOR, ), dtype='int32')
featuremap_labels[inside_ind] = anchor_labels
featuremap_labels = featuremap_labels.reshape((anchorH, anchorW, config.NUM_ANCHOR))
featuremap_boxes = np.zeros((anchorH * anchorW * config.NUM_ANCHOR, 4), dtype='float32')
featuremap_labels = featuremap_labels.reshape((anchorH, anchorW, cfg.RPN.NUM_ANCHOR))
featuremap_boxes = np.zeros((anchorH * anchorW * cfg.RPN.NUM_ANCHOR, 4), dtype='float32')
featuremap_boxes[inside_ind, :] = anchor_gt_boxes
featuremap_boxes = featuremap_boxes.reshape((anchorH, anchorW, config.NUM_ANCHOR, 4))
featuremap_boxes = featuremap_boxes.reshape((anchorH, anchorW, cfg.RPN.NUM_ANCHOR, 4))
return featuremap_labels, featuremap_boxes
......@@ -233,7 +233,7 @@ def get_multilevel_rpn_anchor_input(im, boxes, is_crowd):
start = 0
multilevel_inputs = []
for level_anchor in anchors_per_level:
assert level_anchor.shape[2] == len(config.ANCHOR_RATIOS)
assert level_anchor.shape[2] == len(cfg.RPN.ANCHOR_RATIOS)
anchor_shape = level_anchor.shape[:3] # fHxfWxNUM_ANCHOR_RATIOS
num_anchor_this_level = np.prod(anchor_shape)
end = start + num_anchor_this_level
......@@ -263,7 +263,7 @@ def get_train_dataflow():
"""
imgs = COCODetection.load_many(
config.BASEDIR, config.TRAIN_DATASET, add_gt=True, add_mask=config.MODE_MASK)
cfg.DATA.BASEDIR, cfg.DATA.TRAIN, add_gt=True, add_mask=cfg.MODE_MASK)
"""
To train on your own data, change this to your loader.
Produce "imgs" as a list of dict, in the dict the following keys are needed for training:
......@@ -292,7 +292,7 @@ def get_train_dataflow():
ds = DataFromList(imgs, shuffle=True)
aug = imgaug.AugmentorList(
[CustomResize(config.SHORT_EDGE_SIZE, config.MAX_SIZE),
[CustomResize(cfg.PREPROC.SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE),
imgaug.Flip(horiz=True)])
def preprocess(img):
......@@ -313,7 +313,7 @@ def get_train_dataflow():
# rpn anchor:
try:
if config.MODE_FPN:
if cfg.MODE_FPN:
multilevel_anchor_inputs = get_multilevel_rpn_anchor_input(im, boxes, is_crowd)
anchor_inputs = itertools.chain.from_iterable(multilevel_anchor_inputs)
else:
......@@ -331,7 +331,7 @@ def get_train_dataflow():
ret = [im] + list(anchor_inputs) + [boxes, klass]
if config.MODE_MASK:
if cfg.MODE_MASK:
# augmentation will modify the polys in-place
segmentation = copy.deepcopy(img['segmentation'])
segmentation = [segmentation[k] for k in range(len(segmentation)) if not is_crowd[k]]
......@@ -353,7 +353,7 @@ def get_train_dataflow():
# tpviz.interactive_imshow(viz)
return ret
if config.TRAINER == 'horovod':
if cfg.TRAINER == 'horovod':
ds = MultiThreadMapData(ds, 5, preprocess)
# MPI does not like fork()
else:
......@@ -362,7 +362,7 @@ def get_train_dataflow():
def get_eval_dataflow():
imgs = COCODetection.load_many(config.BASEDIR, config.VAL_DATASET, add_gt=False)
imgs = COCODetection.load_many(cfg.DATA.BASEDIR, cfg.DATA.VAL, add_gt=False)
# no filter for training
ds = DataFromListOfDict(imgs, ['file_name', 'id'])
......@@ -371,7 +371,7 @@ def get_eval_dataflow():
assert im is not None, fname
return im
ds = MapDataComponent(ds, f, 0)
if config.TRAINER != 'horovod':
if cfg.TRAINER != 'horovod':
ds = PrefetchDataZMQ(ds, 1)
return ds
......@@ -379,7 +379,7 @@ def get_eval_dataflow():
if __name__ == '__main__':
import os
from tensorpack.dataflow import PrintData
config.BASEDIR = os.path.expanduser('~/data/coco')
cfg.DATA.BASEDIR = os.path.expanduser('~/data/coco')
ds = get_train_dataflow()
ds = PrintData(ds, 100)
TestDataSpeed(ds, 50000).start()
......
......@@ -15,7 +15,7 @@ import pycocotools.mask as cocomask
from coco import COCOMeta
from common import CustomResize, clip_boxes
import config
from config import config as cfg
DetectionResult = namedtuple(
'DetectionResult',
......@@ -69,7 +69,7 @@ def detect_one_image(img, model_func):
"""
orig_shape = img.shape[:2]
resizer = CustomResize(config.SHORT_EDGE_SIZE, config.MAX_SIZE)
resizer = CustomResize(cfg.PREPROC.SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE)
resized_img = resizer.augment(img)
scale = (resized_img.shape[0] * 1.0 / img.shape[0] + resized_img.shape[1] * 1.0 / img.shape[1]) / 2
boxes, probs, labels, *masks = model_func(resized_img)
......@@ -131,10 +131,10 @@ def eval_coco(df, detect_func):
# https://github.com/pdollar/coco/blob/master/PythonAPI/pycocoEvalDemo.ipynb
def print_evaluation_scores(json_file):
ret = {}
assert config.BASEDIR and os.path.isdir(config.BASEDIR)
assert cfg.DATA.BASEDIR and os.path.isdir(cfg.DATA.BASEDIR)
annofile = os.path.join(
config.BASEDIR, 'annotations',
'instances_{}.json'.format(config.VAL_DATASET))
cfg.DATA.BASEDIR, 'annotations',
'instances_{}.json'.format(cfg.DATA.VAL))
coco = COCO(annofile)
cocoDt = coco.loadRes(json_file)
cocoEval = COCOeval(coco, cocoDt, 'bbox')
......@@ -145,7 +145,7 @@ def print_evaluation_scores(json_file):
for k in range(6):
ret['mAP(bbox)/' + fields[k]] = cocoEval.stats[k]
if config.MODE_MASK:
if cfg.MODE_MASK:
cocoEval = COCOeval(coco, cocoDt, 'segm')
cocoEval.evaluate()
cocoEval.accumulate()
......
......@@ -15,7 +15,7 @@ from tensorpack.models import (
from utils.box_ops import pairwise_iou
from utils.box_ops import area as tf_area
from model_box import roi_align, clip_boxes
import config
from config import config as cfg
@layer_register(log_shape=True)
......@@ -91,7 +91,7 @@ def rpn_losses(anchor_labels, anchor_boxes, label_logits, box_logits):
placeholder = 0.
label_loss = tf.nn.sigmoid_cross_entropy_with_logits(
labels=tf.to_float(valid_anchor_labels), logits=valid_label_logits)
label_loss = tf.reduce_sum(label_loss) * (1. / config.RPN_BATCH_PER_IM)
label_loss = tf.reduce_sum(label_loss) * (1. / cfg.RPN.BATCH_PER_IM)
label_loss = tf.where(tf.equal(nr_valid, 0), placeholder, label_loss, name='label_loss')
pos_anchor_boxes = tf.boolean_mask(anchor_boxes, pos_mask)
......@@ -100,7 +100,7 @@ def rpn_losses(anchor_labels, anchor_boxes, label_logits, box_logits):
box_loss = tf.losses.huber_loss(
pos_anchor_boxes, pos_box_logits, delta=delta,
reduction=tf.losses.Reduction.SUM) / delta
box_loss = box_loss * (1. / config.RPN_BATCH_PER_IM)
box_loss = box_loss * (1. / cfg.RPN.BATCH_PER_IM)
box_loss = tf.where(tf.equal(nr_pos, 0), placeholder, box_loss, name='box_loss')
add_moving_summary(label_loss, box_loss, nr_valid, nr_pos)
......@@ -139,7 +139,7 @@ def generate_rpn_proposals(boxes, scores, img_shape,
topk_boxes_x1y1, topk_boxes_x2y2 = tf.split(topk_boxes_x1y1x2y2, 2, axis=1)
# nx1x2 each
wbhb = tf.squeeze(topk_boxes_x2y2 - topk_boxes_x1y1, axis=1)
valid = tf.reduce_all(wbhb > config.RPN_MIN_SIZE, axis=1) # n,
valid = tf.reduce_all(wbhb > cfg.RPN.MIN_SIZE, axis=1) # n,
topk_valid_boxes_x1y1x2y2 = tf.boolean_mask(topk_boxes_x1y1x2y2, valid)
topk_valid_scores = tf.boolean_mask(topk_scores, valid)
......@@ -152,7 +152,7 @@ def generate_rpn_proposals(boxes, scores, img_shape,
# TODO use exp to work around a bug in TF1.9: https://github.com/tensorflow/tensorflow/issues/19578
tf.exp(topk_valid_scores),
max_output_size=post_nms_topk,
iou_threshold=config.RPN_PROPOSAL_NMS_THRESH)
iou_threshold=cfg.RPN.PROPOSAL_NMS_THRESH)
topk_valid_boxes = tf.reshape(topk_valid_boxes_x1y1x2y2, (-1, 4))
final_boxes = tf.gather(topk_valid_boxes, nms_indices)
......@@ -209,17 +209,17 @@ def sample_fast_rcnn_targets(boxes, gt_boxes, gt_labels):
# #proposal=n+m from now on
def sample_fg_bg(iou):
fg_mask = tf.reduce_max(iou, axis=1) >= config.FASTRCNN_FG_THRESH
fg_mask = tf.reduce_max(iou, axis=1) >= cfg.FRCNN.FG_THRESH
fg_inds = tf.reshape(tf.where(fg_mask), [-1])
num_fg = tf.minimum(int(
config.FASTRCNN_BATCH_PER_IM * config.FASTRCNN_FG_RATIO),
cfg.FRCNN.BATCH_PER_IM * cfg.FRCNN.FG_RATIO),
tf.size(fg_inds), name='num_fg')
fg_inds = tf.random_shuffle(fg_inds)[:num_fg]
bg_inds = tf.reshape(tf.where(tf.logical_not(fg_mask)), [-1])
num_bg = tf.minimum(
config.FASTRCNN_BATCH_PER_IM - num_fg,
cfg.FRCNN.BATCH_PER_IM - num_fg,
tf.size(bg_inds), name='num_bg')
bg_inds = tf.random_shuffle(bg_inds)[:num_bg]
......@@ -274,7 +274,7 @@ def fastrcnn_2fc_head(feature, num_classes):
Returns:
cls_logits (Nxnum_class), reg_logits (Nx num_class-1 x 4)
"""
dim = config.FASTRCNN_FC_HEAD_DIM
dim = cfg.FPN.FRCNN_FC_HEAD_DIM
init = tf.variance_scaling_initializer()
hidden = FullyConnected('fc6', feature, dim, kernel_initializer=init, activation=tf.nn.relu)
hidden = FullyConnected('fc7', hidden, dim, kernel_initializer=init, activation=tf.nn.relu)
......@@ -297,8 +297,8 @@ def fastrcnn_Xconv1fc_head(feature, num_classes, num_convs):
kernel_initializer=tf.variance_scaling_initializer(
scale=2.0, mode='fan_out', distribution='normal')):
for k in range(num_convs):
l = Conv2D('conv{}'.format(k), l, config.FASTRCNN_CONV_HEAD_DIM, 3, activation=tf.nn.relu)
l = FullyConnected('fc', l, config.FASTRCNN_FC_HEAD_DIM,
l = Conv2D('conv{}'.format(k), l, cfg.FPN.FRCNN_CONV_HEAD_DIM, 3, activation=tf.nn.relu)
l = FullyConnected('fc', l, cfg.FPN.FRCNN_FC_HEAD_DIM,
kernel_initializer=tf.variance_scaling_initializer(), activation=tf.nn.relu)
return fastrcnn_outputs('outputs', l, num_classes)
......@@ -356,8 +356,8 @@ def fastrcnn_predictions(boxes, probs):
boxes: n#catx4 floatbox in float32
probs: nx#class
"""
assert boxes.shape[1] == config.NUM_CLASS - 1
assert probs.shape[1] == config.NUM_CLASS
assert boxes.shape[1] == cfg.DATA.NUM_CLASS - 1
assert probs.shape[1] == cfg.DATA.NUM_CLASS
boxes = tf.transpose(boxes, [1, 0, 2]) # #catxnx4
probs = tf.transpose(probs[:, 1:], [1, 0]) # #catxn
......@@ -371,12 +371,12 @@ def fastrcnn_predictions(boxes, probs):
prob, box = X
output_shape = tf.shape(prob)
# filter by score threshold
ids = tf.reshape(tf.where(prob > config.RESULT_SCORE_THRESH), [-1])
ids = tf.reshape(tf.where(prob > cfg.TEST.RESULT_SCORE_THRESH), [-1])
prob = tf.gather(prob, ids)
box = tf.gather(box, ids)
# NMS within each class
selection = tf.image.non_max_suppression(
box, prob, config.RESULTS_PER_IM, config.FASTRCNN_NMS_THRESH)
box, prob, cfg.TEST.RESULTS_PER_IM, cfg.TEST.FRCNN_NMS_THRESH)
selection = tf.to_int32(tf.gather(ids, selection))
# sort available in TF>1.4.0
# sorted_selection = tf.contrib.framework.sort(selection, direction='ASCENDING')
......@@ -396,7 +396,7 @@ def fastrcnn_predictions(boxes, probs):
# filter again by sorting scores
topk_probs, topk_indices = tf.nn.top_k(
probs,
tf.minimum(config.RESULTS_PER_IM, tf.size(probs)),
tf.minimum(cfg.TEST.RESULTS_PER_IM, tf.size(probs)),
sorted=False)
filtered_selection = tf.gather(selected_indices, topk_indices)
filtered_selection = tf.reverse(filtered_selection, axis=[1], name='filtered_indices')
......@@ -420,8 +420,8 @@ def maskrcnn_upXconv_head(feature, num_class, num_convs):
scale=2.0, mode='fan_out', distribution='normal')):
# c2's MSRAFill is fan_out
for k in range(num_convs):
l = Conv2D('fcn{}'.format(k), l, config.MASKRCNN_HEAD_DIM, 3, activation=tf.nn.relu)
l = Conv2DTranspose('deconv', l, config.MASKRCNN_HEAD_DIM, 2, strides=2, activation=tf.nn.relu)
l = Conv2D('fcn{}'.format(k), l, cfg.MRCNN.HEAD_DIM, 3, activation=tf.nn.relu)
l = Conv2DTranspose('deconv', l, cfg.MRCNN.HEAD_DIM, 2, strides=2, activation=tf.nn.relu)
l = Conv2D('conv', l, num_class - 1, 1)
return l
......@@ -475,7 +475,7 @@ def fpn_model(features):
[tf.Tensor]: FPN features p2-p6
"""
assert len(features) == 4, features
num_channel = config.FPN_NUM_CHANNEL
num_channel = cfg.FPN.NUM_CHANNEL
def upsample2x(name, x):
return FixedUnPooling(
......@@ -560,7 +560,7 @@ def multilevel_roi_align(features, rcnn_boxes, resolution):
# Crop patches from corresponding levels
for i, boxes, featuremap in zip(itertools.count(), level_boxes, features):
with tf.name_scope('roi_level{}'.format(i + 2)):
boxes_on_featuremap = boxes * (1.0 / config.ANCHOR_STRIDES_FPN[i])
boxes_on_featuremap = boxes * (1.0 / cfg.FPN.ANCHOR_STRIDES[i])
all_rois.append(roi_align(featuremap, boxes_on_featuremap, resolution))
all_rois = tf.concat(all_rois, axis=0) # NCHW
......
# -*- coding: utf-8 -*-
# File: model_box.py
import numpy as np
import tensorflow as tf
from tensorpack.tfutils.scope_utils import under_name_scope
import config
from config import config
@under_name_scope()
......@@ -41,8 +42,8 @@ def decode_bbox_target(box_predictions, anchors):
waha = anchors_x2y2 - anchors_x1y1
xaya = (anchors_x2y2 + anchors_x1y1) * 0.5
wbhb = tf.exp(tf.minimum(
box_pred_twth, config.BBOX_DECODE_CLIP)) * waha
clip = np.log(config.PREPROC.MAX_SIZE / 16.)
wbhb = tf.exp(tf.minimum(box_pred_twth, clip)) * waha
xbyb = box_pred_txty * waha + xaya
x1y1 = xbyb - wbhb * 0.5
x2y2 = xbyb + wbhb * 0.5 # (...)x1x2
......@@ -174,7 +175,6 @@ if __name__ == '__main__':
Demonstrate what's wrong with tf.image.crop_and_resize:
"""
import tensorflow.contrib.eager as tfe
import numpy as np
tfe.enable_eager_execution()
# want to crop 2x2 out of a 5x5 image, and resize to 4x4
......
This diff is collapsed.
......@@ -8,7 +8,7 @@ from tensorpack.utils import viz
from tensorpack.utils.palette import PALETTE_RGB
from utils.np_box_ops import iou as np_iou
import config
from config import config as cfg
def draw_annotation(img, boxes, klass, is_crowd=None):
......@@ -17,13 +17,13 @@ def draw_annotation(img, boxes, klass, is_crowd=None):
if is_crowd is not None:
assert len(boxes) == len(is_crowd)
for cls, crd in zip(klass, is_crowd):
clsname = config.CLASS_NAMES[cls]
clsname = cfg.DATA.CLASS_NAMES[cls]
if crd == 1:
clsname += ';Crowd'
labels.append(clsname)
else:
for cls in klass:
labels.append(config.CLASS_NAMES[cls])
labels.append(cfg.DATA.CLASS_NAMES[cls])
img = viz.draw_boxes(img, boxes, labels)
return img
......@@ -57,7 +57,7 @@ def draw_predictions(img, boxes, scores):
return img
labels = scores.argmax(axis=1)
scores = scores.max(axis=1)
tags = ["{},{:.2f}".format(config.CLASS_NAMES[lb], score) for lb, score in zip(labels, scores)]
tags = ["{},{:.2f}".format(cfg.DATA.CLASS_NAMES[lb], score) for lb, score in zip(labels, scores)]
return viz.draw_boxes(img, boxes, tags)
......@@ -72,7 +72,7 @@ def draw_final_outputs(img, results):
tags = []
for r in results:
tags.append(
"{},{:.2f}".format(config.CLASS_NAMES[r.class_id], r.score))
"{},{:.2f}".format(cfg.DATA.CLASS_NAMES[r.class_id], r.score))
boxes = np.asarray([r.box for r in results])
ret = viz.draw_boxes(img, boxes, tags)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment