Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
S
seminar-breakout
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Shashank Suhas
seminar-breakout
Commits
ccda3790
Commit
ccda3790
authored
Jul 09, 2018
by
Yuxin Wu
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[MaskRCNN] MaskRCNN head with GN
parent
a50f2952
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
56 additions
and
36 deletions
+56
-36
examples/FasterRCNN/config.py
examples/FasterRCNN/config.py
+9
-6
examples/FasterRCNN/data.py
examples/FasterRCNN/data.py
+3
-3
examples/FasterRCNN/model_frcnn.py
examples/FasterRCNN/model_frcnn.py
+1
-0
examples/FasterRCNN/model_mrcnn.py
examples/FasterRCNN/model_mrcnn.py
+36
-23
examples/FasterRCNN/train.py
examples/FasterRCNN/train.py
+7
-4
No files found.
examples/FasterRCNN/config.py
View file @
ccda3790
...
@@ -63,8 +63,8 @@ _C.DATA.CLASS_NAMES = [] # NUM_CLASS strings. Needs to be populated later by da
...
@@ -63,8 +63,8 @@ _C.DATA.CLASS_NAMES = [] # NUM_CLASS strings. Needs to be populated later by da
_C
.
BACKBONE
.
WEIGHTS
=
''
# /path/to/weights.npz
_C
.
BACKBONE
.
WEIGHTS
=
''
# /path/to/weights.npz
_C
.
BACKBONE
.
RESNET_NUM_BLOCK
=
[
3
,
4
,
6
,
3
]
# for resnet50
_C
.
BACKBONE
.
RESNET_NUM_BLOCK
=
[
3
,
4
,
6
,
3
]
# for resnet50
# RESNET_NUM_BLOCK = [3, 4, 23, 3] # for resnet101
# RESNET_NUM_BLOCK = [3, 4, 23, 3] # for resnet101
_C
.
BACKBONE
.
FREEZE_AFFINE
=
False
# do not train affine parameters inside
BN
_C
.
BACKBONE
.
FREEZE_AFFINE
=
False
# do not train affine parameters inside
norm layers
_C
.
BACKBONE
.
NORM
=
'FreezeBN'
# options: FreezeBN, SyncBN
_C
.
BACKBONE
.
NORM
=
'FreezeBN'
# options: FreezeBN, SyncBN
, GN
# Use a base model with TF-preferred padding mode,
# Use a base model with TF-preferred padding mode,
# which may pad more pixels on right/bottom than top/left.
# which may pad more pixels on right/bottom than top/left.
...
@@ -99,15 +99,15 @@ _C.PREPROC.PIXEL_STD = [58.395, 57.12, 57.375]
...
@@ -99,15 +99,15 @@ _C.PREPROC.PIXEL_STD = [58.395, 57.12, 57.375]
_C
.
RPN
.
ANCHOR_STRIDE
=
16
_C
.
RPN
.
ANCHOR_STRIDE
=
16
_C
.
RPN
.
ANCHOR_SIZES
=
(
32
,
64
,
128
,
256
,
512
)
# sqrtarea of the anchor box
_C
.
RPN
.
ANCHOR_SIZES
=
(
32
,
64
,
128
,
256
,
512
)
# sqrtarea of the anchor box
_C
.
RPN
.
ANCHOR_RATIOS
=
(
0.5
,
1.
,
2.
)
_C
.
RPN
.
ANCHOR_RATIOS
=
(
0.5
,
1.
,
2.
)
_C
.
RPN
.
POSITIVE_ANCHOR_THRES
=
0.7
_C
.
RPN
.
POSITIVE_ANCHOR_THRES
H
=
0.7
_C
.
RPN
.
NEGATIVE_ANCHOR_THRES
=
0.3
_C
.
RPN
.
NEGATIVE_ANCHOR_THRES
H
=
0.3
# rpn training -------------------------
# rpn training -------------------------
_C
.
RPN
.
FG_RATIO
=
0.5
# fg ratio among selected RPN anchors
_C
.
RPN
.
FG_RATIO
=
0.5
# fg ratio among selected RPN anchors
_C
.
RPN
.
BATCH_PER_IM
=
256
# total (across FPN levels) number of anchors that are marked valid
_C
.
RPN
.
BATCH_PER_IM
=
256
# total (across FPN levels) number of anchors that are marked valid
_C
.
RPN
.
MIN_SIZE
=
0
_C
.
RPN
.
MIN_SIZE
=
0
_C
.
RPN
.
PROPOSAL_NMS_THRESH
=
0.7
_C
.
RPN
.
PROPOSAL_NMS_THRESH
=
0.7
_C
.
RPN
.
CROWD_OVERLAP_THRES
=
0.7
# boxes overlapping crowd will be ignored.
_C
.
RPN
.
CROWD_OVERLAP_THRES
H
=
0.7
# boxes overlapping crowd will be ignored.
_C
.
RPN
.
HEAD_DIM
=
1024
# used in C4 only
_C
.
RPN
.
HEAD_DIM
=
1024
# used in C4 only
# RPN proposal selection -------------------------------
# RPN proposal selection -------------------------------
...
@@ -134,9 +134,11 @@ _C.FPN.NUM_CHANNEL = 256
...
@@ -134,9 +134,11 @@ _C.FPN.NUM_CHANNEL = 256
# conv head and fc head are only used in FPN.
# conv head and fc head are only used in FPN.
# For C4 models, the head is C5
# For C4 models, the head is C5
_C
.
FPN
.
FRCNN_HEAD_FUNC
=
'fastrcnn_2fc_head'
_C
.
FPN
.
FRCNN_HEAD_FUNC
=
'fastrcnn_2fc_head'
# choices: fastrcnn_2fc_head, fastrcnn_4conv1fc_
head, fastrcnn_4conv1fc_gn_
head
# choices: fastrcnn_2fc_head, fastrcnn_4conv1fc_
{,gn_}
head
_C
.
FPN
.
FRCNN_CONV_HEAD_DIM
=
256
_C
.
FPN
.
FRCNN_CONV_HEAD_DIM
=
256
_C
.
FPN
.
FRCNN_FC_HEAD_DIM
=
1024
_C
.
FPN
.
FRCNN_FC_HEAD_DIM
=
1024
_C
.
FPN
.
MRCNN_HEAD_FUNC
=
'maskrcnn_up4conv_head'
# choices: maskrcnn_up4conv_{,gn_}head
# Mask-RCNN
# Mask-RCNN
_C
.
MRCNN
.
HEAD_DIM
=
256
_C
.
MRCNN
.
HEAD_DIM
=
256
...
@@ -168,6 +170,7 @@ def finalize_configs(is_training):
...
@@ -168,6 +170,7 @@ def finalize_configs(is_training):
_C
.
PREPROC
.
MAX_SIZE
=
np
.
ceil
(
_C
.
PREPROC
.
MAX_SIZE
/
size_mult
)
*
size_mult
_C
.
PREPROC
.
MAX_SIZE
=
np
.
ceil
(
_C
.
PREPROC
.
MAX_SIZE
/
size_mult
)
*
size_mult
assert
_C
.
FPN
.
PROPOSAL_MODE
in
[
'Level'
,
'Joint'
]
assert
_C
.
FPN
.
PROPOSAL_MODE
in
[
'Level'
,
'Joint'
]
assert
_C
.
FPN
.
FRCNN_HEAD_FUNC
.
endswith
(
'_head'
)
assert
_C
.
FPN
.
FRCNN_HEAD_FUNC
.
endswith
(
'_head'
)
assert
_C
.
FPN
.
MRCNN_HEAD_FUNC
.
endswith
(
'_head'
)
if
is_training
:
if
is_training
:
os
.
environ
[
'TF_AUTOTUNE_THRESHOLD'
]
=
'1'
os
.
environ
[
'TF_AUTOTUNE_THRESHOLD'
]
=
'1'
...
...
examples/FasterRCNN/data.py
View file @
ccda3790
...
@@ -128,8 +128,8 @@ def get_anchor_labels(anchors, gt_boxes, crowd_boxes):
...
@@ -128,8 +128,8 @@ def get_anchor_labels(anchors, gt_boxes, crowd_boxes):
# the order of setting neg/pos labels matter
# the order of setting neg/pos labels matter
anchor_labels
[
anchors_with_max_iou_per_gt
]
=
1
anchor_labels
[
anchors_with_max_iou_per_gt
]
=
1
anchor_labels
[
ious_max_per_anchor
>=
cfg
.
RPN
.
POSITIVE_ANCHOR_THRES
]
=
1
anchor_labels
[
ious_max_per_anchor
>=
cfg
.
RPN
.
POSITIVE_ANCHOR_THRES
H
]
=
1
anchor_labels
[
ious_max_per_anchor
<
cfg
.
RPN
.
NEGATIVE_ANCHOR_THRES
]
=
0
anchor_labels
[
ious_max_per_anchor
<
cfg
.
RPN
.
NEGATIVE_ANCHOR_THRES
H
]
=
0
# We can label all non-ignore candidate boxes which overlap crowd as ignore
# We can label all non-ignore candidate boxes which overlap crowd as ignore
# But detectron did not do this.
# But detectron did not do this.
...
@@ -137,7 +137,7 @@ def get_anchor_labels(anchors, gt_boxes, crowd_boxes):
...
@@ -137,7 +137,7 @@ def get_anchor_labels(anchors, gt_boxes, crowd_boxes):
# cand_inds = np.where(anchor_labels >= 0)[0]
# cand_inds = np.where(anchor_labels >= 0)[0]
# cand_anchors = anchors[cand_inds]
# cand_anchors = anchors[cand_inds]
# ious = np_iou(cand_anchors, crowd_boxes)
# ious = np_iou(cand_anchors, crowd_boxes)
# overlap_with_crowd = cand_inds[ious.max(axis=1) > cfg.RPN.CROWD_OVERLAP_THRES]
# overlap_with_crowd = cand_inds[ious.max(axis=1) > cfg.RPN.CROWD_OVERLAP_THRES
H
]
# anchor_labels[overlap_with_crowd] = -1
# anchor_labels[overlap_with_crowd] = -1
# Subsample fg labels: ignore some fg if fg is too many
# Subsample fg labels: ignore some fg if fg is too many
...
...
examples/FasterRCNN/model_frcnn.py
View file @
ccda3790
...
@@ -247,6 +247,7 @@ def fastrcnn_Xconv1fc_head(feature, num_classes, num_convs, norm=None):
...
@@ -247,6 +247,7 @@ def fastrcnn_Xconv1fc_head(feature, num_classes, num_convs, norm=None):
Returns:
Returns:
cls_logits (Nxnum_class), reg_logits (Nx num_class-1 x 4)
cls_logits (Nxnum_class), reg_logits (Nx num_class-1 x 4)
"""
"""
assert
norm
in
[
None
,
'GN'
],
norm
l
=
feature
l
=
feature
with
argscope
(
Conv2D
,
data_format
=
'channels_first'
,
with
argscope
(
Conv2D
,
data_format
=
'channels_first'
,
kernel_initializer
=
tf
.
variance_scaling_initializer
(
kernel_initializer
=
tf
.
variance_scaling_initializer
(
...
...
examples/FasterRCNN/model_mrcnn.py
View file @
ccda3790
...
@@ -8,32 +8,10 @@ from tensorpack.models import (
...
@@ -8,32 +8,10 @@ from tensorpack.models import (
from
tensorpack.tfutils.scope_utils
import
under_name_scope
from
tensorpack.tfutils.scope_utils
import
under_name_scope
from
tensorpack.tfutils.summary
import
add_moving_summary
from
tensorpack.tfutils.summary
import
add_moving_summary
from
basemodel
import
GroupNorm
from
config
import
config
as
cfg
from
config
import
config
as
cfg
@
layer_register
(
log_shape
=
True
)
def
maskrcnn_upXconv_head
(
feature
,
num_category
,
num_convs
):
"""
Args:
feature (NxCx s x s): size is 7 in C4 models and 14 in FPN models.
num_category(int):
num_convs (int): number of convolution layers
Returns:
mask_logits (N x num_category x 2s x 2s):
"""
l
=
feature
with
argscope
([
Conv2D
,
Conv2DTranspose
],
data_format
=
'channels_first'
,
kernel_initializer
=
tf
.
variance_scaling_initializer
(
scale
=
2.0
,
mode
=
'fan_out'
,
distribution
=
'normal'
)):
# c2's MSRAFill is fan_out
for
k
in
range
(
num_convs
):
l
=
Conv2D
(
'fcn{}'
.
format
(
k
),
l
,
cfg
.
MRCNN
.
HEAD_DIM
,
3
,
activation
=
tf
.
nn
.
relu
)
l
=
Conv2DTranspose
(
'deconv'
,
l
,
cfg
.
MRCNN
.
HEAD_DIM
,
2
,
strides
=
2
,
activation
=
tf
.
nn
.
relu
)
l
=
Conv2D
(
'conv'
,
l
,
num_category
,
1
)
return
l
@
under_name_scope
()
@
under_name_scope
()
def
maskrcnn_loss
(
mask_logits
,
fg_labels
,
fg_target_masks
):
def
maskrcnn_loss
(
mask_logits
,
fg_labels
,
fg_target_masks
):
"""
"""
...
@@ -71,3 +49,38 @@ def maskrcnn_loss(mask_logits, fg_labels, fg_target_masks):
...
@@ -71,3 +49,38 @@ def maskrcnn_loss(mask_logits, fg_labels, fg_target_masks):
add_moving_summary
(
loss
,
accuracy
,
fg_pixel_ratio
,
pos_accuracy
)
add_moving_summary
(
loss
,
accuracy
,
fg_pixel_ratio
,
pos_accuracy
)
return
loss
return
loss
@
layer_register
(
log_shape
=
True
)
def
maskrcnn_upXconv_head
(
feature
,
num_category
,
num_convs
,
norm
=
None
):
"""
Args:
feature (NxCx s x s): size is 7 in C4 models and 14 in FPN models.
num_category(int):
num_convs (int): number of convolution layers
norm (str or None): either None or 'GN'
Returns:
mask_logits (N x num_category x 2s x 2s):
"""
assert
norm
in
[
None
,
'GN'
],
norm
l
=
feature
with
argscope
([
Conv2D
,
Conv2DTranspose
],
data_format
=
'channels_first'
,
kernel_initializer
=
tf
.
variance_scaling_initializer
(
scale
=
2.0
,
mode
=
'fan_out'
,
distribution
=
'normal'
)):
# c2's MSRAFill is fan_out
for
k
in
range
(
num_convs
):
l
=
Conv2D
(
'fcn{}'
.
format
(
k
),
l
,
cfg
.
MRCNN
.
HEAD_DIM
,
3
,
activation
=
tf
.
nn
.
relu
)
if
norm
is
not
None
:
l
=
GroupNorm
(
'gn{}'
.
format
(
k
),
l
)
l
=
Conv2DTranspose
(
'deconv'
,
l
,
cfg
.
MRCNN
.
HEAD_DIM
,
2
,
strides
=
2
,
activation
=
tf
.
nn
.
relu
)
l
=
Conv2D
(
'conv'
,
l
,
num_category
,
1
)
return
l
def
maskrcnn_up4conv_head
(
*
args
,
**
kwargs
):
return
maskrcnn_upXconv_head
(
*
args
,
num_convs
=
4
,
**
kwargs
)
def
maskrcnn_up4conv_gn_head
(
*
args
,
**
kwargs
):
return
maskrcnn_upXconv_head
(
*
args
,
num_convs
=
4
,
norm
=
'GN'
,
**
kwargs
)
examples/FasterRCNN/train.py
View file @
ccda3790
...
@@ -31,6 +31,7 @@ from basemodel import (
...
@@ -31,6 +31,7 @@ from basemodel import (
resnet_fpn_backbone
)
resnet_fpn_backbone
)
import
model_frcnn
import
model_frcnn
import
model_mrcnn
from
model_frcnn
import
(
from
model_frcnn
import
(
sample_fast_rcnn_targets
,
sample_fast_rcnn_targets
,
fastrcnn_outputs
,
fastrcnn_losses
,
fastrcnn_predictions
)
fastrcnn_outputs
,
fastrcnn_losses
,
fastrcnn_predictions
)
...
@@ -357,8 +358,9 @@ class ResNetFPNModel(DetectionModel):
...
@@ -357,8 +358,9 @@ class ResNetFPNModel(DetectionModel):
roi_feature_maskrcnn
=
multilevel_roi_align
(
roi_feature_maskrcnn
=
multilevel_roi_align
(
p23456
[:
4
],
fg_sampled_boxes
,
14
,
p23456
[:
4
],
fg_sampled_boxes
,
14
,
name_scope
=
'multilevel_roi_align_mask'
)
name_scope
=
'multilevel_roi_align_mask'
)
mask_logits
=
maskrcnn_upXconv_head
(
maskrcnn_head_func
=
getattr
(
model_mrcnn
,
cfg
.
FPN
.
MRCNN_HEAD_FUNC
)
'maskrcnn'
,
roi_feature_maskrcnn
,
cfg
.
DATA
.
NUM_CATEGORY
,
4
)
# #fg x #cat x 28 x 28
mask_logits
=
maskrcnn_head_func
(
'maskrcnn'
,
roi_feature_maskrcnn
,
cfg
.
DATA
.
NUM_CATEGORY
)
# #fg x #cat x 28 x 28
target_masks_for_fg
=
crop_and_resize
(
target_masks_for_fg
=
crop_and_resize
(
tf
.
expand_dims
(
gt_masks
,
1
),
tf
.
expand_dims
(
gt_masks
,
1
),
...
@@ -386,8 +388,9 @@ class ResNetFPNModel(DetectionModel):
...
@@ -386,8 +388,9 @@ class ResNetFPNModel(DetectionModel):
if
cfg
.
MODE_MASK
:
if
cfg
.
MODE_MASK
:
# Cascade inference needs roi transform with refined boxes.
# Cascade inference needs roi transform with refined boxes.
roi_feature_maskrcnn
=
multilevel_roi_align
(
p23456
[:
4
],
final_boxes
,
14
)
roi_feature_maskrcnn
=
multilevel_roi_align
(
p23456
[:
4
],
final_boxes
,
14
)
mask_logits
=
maskrcnn_upXconv_head
(
maskrcnn_head_func
=
getattr
(
model_mrcnn
,
cfg
.
FPN
.
MRCNN_HEAD_FUNC
)
'maskrcnn'
,
roi_feature_maskrcnn
,
cfg
.
DATA
.
NUM_CATEGORY
,
4
)
# #fg x #cat x 28 x 28
mask_logits
=
maskrcnn_head_func
(
'maskrcnn'
,
roi_feature_maskrcnn
,
cfg
.
DATA
.
NUM_CATEGORY
)
# #fg x #cat x 28 x 28
indices
=
tf
.
stack
([
tf
.
range
(
tf
.
size
(
final_labels
)),
tf
.
to_int32
(
final_labels
)
-
1
],
axis
=
1
)
indices
=
tf
.
stack
([
tf
.
range
(
tf
.
size
(
final_labels
)),
tf
.
to_int32
(
final_labels
)
-
1
],
axis
=
1
)
final_mask_logits
=
tf
.
gather_nd
(
mask_logits
,
indices
)
# #resultx28x28
final_mask_logits
=
tf
.
gather_nd
(
mask_logits
,
indices
)
# #resultx28x28
tf
.
sigmoid
(
final_mask_logits
,
name
=
'final_masks'
)
tf
.
sigmoid
(
final_mask_logits
,
name
=
'final_masks'
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment