Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
S
seminar-breakout
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Shashank Suhas
seminar-breakout
Commits
6041a1a4
Commit
6041a1a4
authored
Aug 25, 2018
by
Yuxin Wu
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[MaskRCNN] BoxProposals struct to manage proposals; rename probs->scores
parent
cf97218c
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
122 additions
and
116 deletions
+122
-116
examples/FasterRCNN/README.md
examples/FasterRCNN/README.md
+1
-1
examples/FasterRCNN/model_frcnn.py
examples/FasterRCNN/model_frcnn.py
+78
-50
examples/FasterRCNN/train.py
examples/FasterRCNN/train.py
+43
-65
No files found.
examples/FasterRCNN/README.md
View file @
6041a1a4
...
@@ -59,7 +59,7 @@ To predict on an image (and show output in a window):
...
@@ -59,7 +59,7 @@ To predict on an image (and show output in a window):
./train.py --predict input.jpg --load /path/to/model --config SAME-AS-TRAINING
./train.py --predict input.jpg --load /path/to/model --config SAME-AS-TRAINING
```
```
To
E
valuate the performance of a model on COCO:
To
e
valuate the performance of a model on COCO:
```
```
./train.py --evaluate output.json --load /path/to/COCO-R50C4-MaskRCNN-Standard.npz \
./train.py --evaluate output.json --load /path/to/COCO-R50C4-MaskRCNN-Standard.npz \
--config SAME-AS-TRAINING
--config SAME-AS-TRAINING
...
...
examples/FasterRCNN/model_frcnn.py
View file @
6041a1a4
...
@@ -50,8 +50,9 @@ def sample_fast_rcnn_targets(boxes, gt_boxes, gt_labels):
...
@@ -50,8 +50,9 @@ def sample_fast_rcnn_targets(boxes, gt_boxes, gt_labels):
gt_labels: m, int32
gt_labels: m, int32
Returns:
Returns:
A BoxProposals instance.
sampled_boxes: tx4 floatbox, the rois
sampled_boxes: tx4 floatbox, the rois
sampled_labels: t int64 labels, in [0, #class
-1]
. Positive means foreground.
sampled_labels: t int64 labels, in [0, #class
)
. Positive means foreground.
fg_inds_wrt_gt: #fg indices, each in range [0, m-1].
fg_inds_wrt_gt: #fg indices, each in range [0, m-1].
It contains the matching GT of each foreground roi.
It contains the matching GT of each foreground roi.
"""
"""
...
@@ -94,9 +95,11 @@ def sample_fast_rcnn_targets(boxes, gt_boxes, gt_labels):
...
@@ -94,9 +95,11 @@ def sample_fast_rcnn_targets(boxes, gt_boxes, gt_labels):
[
tf
.
gather
(
gt_labels
,
fg_inds_wrt_gt
),
[
tf
.
gather
(
gt_labels
,
fg_inds_wrt_gt
),
tf
.
zeros_like
(
bg_inds
,
dtype
=
tf
.
int64
)],
axis
=
0
)
tf
.
zeros_like
(
bg_inds
,
dtype
=
tf
.
int64
)],
axis
=
0
)
# stop the gradient -- they are meant to be training targets
# stop the gradient -- they are meant to be training targets
return
tf
.
stop_gradient
(
ret_boxes
,
name
=
'sampled_proposal_boxes'
),
\
return
BoxProposals
(
tf
.
stop_gradient
(
ret_labels
,
name
=
'sampled_labels'
),
\
tf
.
stop_gradient
(
ret_boxes
,
name
=
'sampled_proposal_boxes'
),
tf
.
stop_gradient
(
fg_inds_wrt_gt
)
tf
.
stop_gradient
(
ret_labels
,
name
=
'sampled_labels'
),
tf
.
stop_gradient
(
fg_inds_wrt_gt
),
gt_boxes
,
gt_labels
)
@
layer_register
(
log_shape
=
True
)
@
layer_register
(
log_shape
=
True
)
...
@@ -168,23 +171,24 @@ def fastrcnn_losses(labels, label_logits, fg_boxes, fg_box_logits):
...
@@ -168,23 +171,24 @@ def fastrcnn_losses(labels, label_logits, fg_boxes, fg_box_logits):
@
under_name_scope
()
@
under_name_scope
()
def
fastrcnn_predictions
(
boxes
,
prob
s
):
def
fastrcnn_predictions
(
boxes
,
score
s
):
"""
"""
Generate final results from predictions of all proposals.
Generate final results from predictions of all proposals.
Args:
Args:
boxes: n#classx4 floatbox in float32
boxes: n#classx4 floatbox in float32
prob
s: nx#class
score
s: nx#class
Returns:
Returns:
indices: Kx2. Each is (box_id, class_id)
boxes: Kx4
probs: K floats
scores: K
labels: K
"""
"""
assert
boxes
.
shape
[
1
]
==
cfg
.
DATA
.
NUM_CLASS
assert
boxes
.
shape
[
1
]
==
cfg
.
DATA
.
NUM_CLASS
assert
prob
s
.
shape
[
1
]
==
cfg
.
DATA
.
NUM_CLASS
assert
score
s
.
shape
[
1
]
==
cfg
.
DATA
.
NUM_CLASS
boxes
=
tf
.
transpose
(
boxes
,
[
1
,
0
,
2
])[
1
:,
:,
:]
# #catxnx4
boxes
=
tf
.
transpose
(
boxes
,
[
1
,
0
,
2
])[
1
:,
:,
:]
# #catxnx4
boxes
.
set_shape
([
None
,
cfg
.
DATA
.
NUM_CATEGORY
,
None
])
boxes
.
set_shape
([
None
,
cfg
.
DATA
.
NUM_CATEGORY
,
None
])
probs
=
tf
.
transpose
(
prob
s
[:,
1
:],
[
1
,
0
])
# #catxn
scores
=
tf
.
transpose
(
score
s
[:,
1
:],
[
1
,
0
])
# #catxn
def
f
(
X
):
def
f
(
X
):
"""
"""
...
@@ -213,20 +217,24 @@ def fastrcnn_predictions(boxes, probs):
...
@@ -213,20 +217,24 @@ def fastrcnn_predictions(boxes, probs):
default_value
=
False
)
default_value
=
False
)
return
mask
return
mask
masks
=
tf
.
map_fn
(
f
,
(
prob
s
,
boxes
),
dtype
=
tf
.
bool
,
masks
=
tf
.
map_fn
(
f
,
(
score
s
,
boxes
),
dtype
=
tf
.
bool
,
parallel_iterations
=
10
)
# #cat x N
parallel_iterations
=
10
)
# #cat x N
selected_indices
=
tf
.
where
(
masks
)
# #selection x 2, each is (cat_id, box_id)
selected_indices
=
tf
.
where
(
masks
)
# #selection x 2, each is (cat_id, box_id)
probs
=
tf
.
boolean_mask
(
prob
s
,
masks
)
scores
=
tf
.
boolean_mask
(
score
s
,
masks
)
# filter again by sorting scores
# filter again by sorting scores
topk_
prob
s
,
topk_indices
=
tf
.
nn
.
top_k
(
topk_
score
s
,
topk_indices
=
tf
.
nn
.
top_k
(
prob
s
,
score
s
,
tf
.
minimum
(
cfg
.
TEST
.
RESULTS_PER_IM
,
tf
.
size
(
prob
s
)),
tf
.
minimum
(
cfg
.
TEST
.
RESULTS_PER_IM
,
tf
.
size
(
score
s
)),
sorted
=
False
)
sorted
=
False
)
filtered_selection
=
tf
.
gather
(
selected_indices
,
topk_indices
)
filtered_selection
=
tf
.
gather
(
selected_indices
,
topk_indices
)
cat_ids
,
box_ids
=
tf
.
unstack
(
filtered_selection
,
axis
=
1
)
cat_ids
,
box_ids
=
tf
.
unstack
(
filtered_selection
,
axis
=
1
)
final_ids
=
tf
.
stack
([
box_ids
,
cat_ids
+
1
],
axis
=
1
,
name
=
'final_ids'
)
# Kx2, each is (box_id, class_id)
return
final_ids
,
topk_probs
final_scores
=
tf
.
identity
(
topk_scores
,
name
=
'scores'
)
final_labels
=
tf
.
add
(
cat_ids
,
1
,
name
=
'labels'
)
final_ids
=
tf
.
stack
([
cat_ids
,
box_ids
],
axis
=
1
,
name
=
'all_ids'
)
final_boxes
=
tf
.
gather_nd
(
boxes
,
final_ids
,
name
=
'boxes'
)
return
final_boxes
,
final_scores
,
final_labels
"""
"""
...
@@ -284,63 +292,84 @@ def fastrcnn_4conv1fc_gn_head(*args, **kwargs):
...
@@ -284,63 +292,84 @@ def fastrcnn_4conv1fc_gn_head(*args, **kwargs):
return
fastrcnn_Xconv1fc_head
(
*
args
,
num_convs
=
4
,
norm
=
'GN'
,
**
kwargs
)
return
fastrcnn_Xconv1fc_head
(
*
args
,
num_convs
=
4
,
norm
=
'GN'
,
**
kwargs
)
class
FastRCNNHead
(
object
):
class
BoxProposals
(
object
):
"""
"""
A
class to process & decode inputs/outputs of a fastrcnn classification+regression head
.
A
structure to manage box proposals and their relation with ground truth
.
"""
"""
def
__init__
(
self
,
input_boxes
,
box_logits
,
label_logits
,
bbox_regression_weights
,
def
__init__
(
self
,
boxes
,
labels
=
None
,
matched_gt_boxes_per_fg
=
None
):
labels
=
None
,
fg_inds_wrt_gt
=
None
,
gt_boxes
=
None
,
gt_labels
=
None
):
"""
"""
Args:
Args:
input_boxes: Nx4, inputs to the head
boxes: Nx4
box_logits: Nx#classx4 or Nx1x4, the output of the head
label_logits: Nx#class, the output of the head
bbox_regression_weights: a 4 element tensor
labels: N, each in [0, #class), the true label for each input box
labels: N, each in [0, #class), the true label for each input box
matched_gt_boxes_per_fg: #fgx4, the matching gt boxes for each fg input box
fg_inds_wrt_gt: #fg, each in [0, M)
gt_boxes: Mx4
gt_labels: M
The last
two
arguments could be None when not training.
The last
four
arguments could be None when not training.
"""
"""
for
k
,
v
in
locals
()
.
items
():
for
k
,
v
in
locals
()
.
items
():
if
k
!=
'self'
:
if
k
!=
'self'
and
v
is
not
None
:
setattr
(
self
,
k
,
v
)
setattr
(
self
,
k
,
v
)
self
.
_bbox_class_agnostic
=
int
(
box_logits
.
shape
[
1
])
==
1
@
memoized
@
memoized
def
fg_inds
_in_inputs
(
self
):
def
fg_inds
(
self
):
""" Returns: #fg indices in [0, N-1] """
""" Returns: #fg indices in [0, N-1] """
assert
self
.
labels
is
not
None
return
tf
.
reshape
(
tf
.
where
(
self
.
labels
>
0
),
[
-
1
],
name
=
'fg_inds'
)
return
tf
.
reshape
(
tf
.
where
(
self
.
labels
>
0
),
[
-
1
],
name
=
'fg_inds_in_inputs'
)
@
memoized
@
memoized
def
fg_
input_
boxes
(
self
):
def
fg_boxes
(
self
):
""" Returns: #fg
x4
"""
""" Returns: #fg
x4
"""
return
tf
.
gather
(
self
.
input_boxes
,
self
.
fg_inds_in_inputs
(),
name
=
'fg_input
_boxes'
)
return
tf
.
gather
(
self
.
boxes
,
self
.
fg_inds
(),
name
=
'fg
_boxes'
)
@
memoized
@
memoized
def
fg_
box_logit
s
(
self
):
def
fg_
label
s
(
self
):
""" Returns: #fg
x ? x 4
"""
""" Returns: #fg"""
return
tf
.
gather
(
self
.
box_logits
,
self
.
fg_inds_in_inputs
(),
name
=
'fg_box_logit
s'
)
return
tf
.
gather
(
self
.
labels
,
self
.
fg_inds
(),
name
=
'fg_label
s'
)
@
memoized
@
memoized
def
fg_labels
(
self
):
def
matched_gt_boxes
(
self
):
""" Returns: #fg """
""" Returns: #fg x 4"""
return
tf
.
gather
(
self
.
labels
,
self
.
fg_inds_in_inputs
(),
name
=
'fg_labels'
)
return
tf
.
gather
(
self
.
gt_boxes
,
self
.
fg_inds_wrt_gt
)
class
FastRCNNHead
(
object
):
"""
A class to process & decode inputs/outputs of a fastrcnn classification+regression head.
"""
def
__init__
(
self
,
proposals
,
box_logits
,
label_logits
,
bbox_regression_weights
):
"""
Args:
proposals: BoxProposals
box_logits: Nx#classx4 or Nx1x4, the output of the head
label_logits: Nx#class, the output of the head
bbox_regression_weights: a 4 element tensor
"""
for
k
,
v
in
locals
()
.
items
():
if
k
!=
'self'
and
v
is
not
None
:
setattr
(
self
,
k
,
v
)
self
.
_bbox_class_agnostic
=
int
(
box_logits
.
shape
[
1
])
==
1
@
memoized
def
fg_box_logits
(
self
):
""" Returns: #fg x ? x 4 """
return
tf
.
gather
(
self
.
box_logits
,
self
.
proposals
.
fg_inds
(),
name
=
'fg_box_logits'
)
@
memoized
@
memoized
def
losses
(
self
):
def
losses
(
self
):
encoded_fg_gt_boxes
=
encode_bbox_target
(
encoded_fg_gt_boxes
=
encode_bbox_target
(
self
.
matched_gt_boxes_per_fg
,
self
.
proposals
.
matched_gt_boxes
()
,
self
.
fg_input
_boxes
())
*
self
.
bbox_regression_weights
self
.
proposals
.
fg
_boxes
())
*
self
.
bbox_regression_weights
return
fastrcnn_losses
(
return
fastrcnn_losses
(
self
.
labels
,
self
.
label_logits
,
self
.
proposals
.
labels
,
self
.
label_logits
,
encoded_fg_gt_boxes
,
self
.
fg_box_logits
()
encoded_fg_gt_boxes
,
self
.
fg_box_logits
()
)
)
@
memoized
@
memoized
def
decoded_output_boxes
(
self
):
def
decoded_output_boxes
(
self
):
""" Returns: N x #class x 4 """
""" Returns: N x #class x 4 """
anchors
=
tf
.
tile
(
tf
.
expand_dims
(
self
.
input_
boxes
,
1
),
anchors
=
tf
.
tile
(
tf
.
expand_dims
(
self
.
proposals
.
boxes
,
1
),
[
1
,
cfg
.
DATA
.
NUM_CLASS
,
1
])
# N x #class x 4
[
1
,
cfg
.
DATA
.
NUM_CLASS
,
1
])
# N x #class x 4
decoded_boxes
=
decode_bbox_target
(
decoded_boxes
=
decode_bbox_target
(
self
.
box_logits
/
self
.
bbox_regression_weights
,
self
.
box_logits
/
self
.
bbox_regression_weights
,
...
@@ -351,8 +380,7 @@ class FastRCNNHead(object):
...
@@ -351,8 +380,7 @@ class FastRCNNHead(object):
@
memoized
@
memoized
def
decoded_output_boxes_for_true_label
(
self
):
def
decoded_output_boxes_for_true_label
(
self
):
""" Returns: Nx4 decoded boxes """
""" Returns: Nx4 decoded boxes """
assert
self
.
labels
is
not
None
return
self
.
_decoded_output_boxes_for_label
(
self
.
proposals
.
labels
)
return
self
.
_decoded_output_boxes_for_label
(
self
.
labels
)
@
memoized
@
memoized
def
decoded_output_boxes_for_predicted_label
(
self
):
def
decoded_output_boxes_for_predicted_label
(
self
):
...
@@ -363,13 +391,13 @@ class FastRCNNHead(object):
...
@@ -363,13 +391,13 @@ class FastRCNNHead(object):
def
decoded_output_boxes_for_label
(
self
,
labels
):
def
decoded_output_boxes_for_label
(
self
,
labels
):
assert
not
self
.
_bbox_class_agnostic
assert
not
self
.
_bbox_class_agnostic
indices
=
tf
.
stack
([
indices
=
tf
.
stack
([
tf
.
range
(
tf
.
size
(
self
.
labels
,
out_type
=
tf
.
int64
)),
tf
.
range
(
tf
.
size
(
labels
,
out_type
=
tf
.
int64
)),
labels
labels
])
])
needed_logits
=
tf
.
gather_nd
(
self
.
box_logits
,
indices
)
needed_logits
=
tf
.
gather_nd
(
self
.
box_logits
,
indices
)
decoded
=
decode_bbox_target
(
decoded
=
decode_bbox_target
(
needed_logits
/
self
.
bbox_regression_weights
,
needed_logits
/
self
.
bbox_regression_weights
,
self
.
input_
boxes
self
.
proposals
.
boxes
)
)
return
decoded
return
decoded
...
@@ -379,7 +407,7 @@ class FastRCNNHead(object):
...
@@ -379,7 +407,7 @@ class FastRCNNHead(object):
box_logits
=
tf
.
reshape
(
self
.
box_logits
,
[
-
1
,
4
])
box_logits
=
tf
.
reshape
(
self
.
box_logits
,
[
-
1
,
4
])
decoded
=
decode_bbox_target
(
decoded
=
decode_bbox_target
(
box_logits
/
self
.
bbox_regression_weights
,
box_logits
/
self
.
bbox_regression_weights
,
self
.
input_
boxes
self
.
proposals
.
boxes
)
)
return
decoded
return
decoded
...
...
examples/FasterRCNN/train.py
View file @
6041a1a4
...
@@ -34,8 +34,8 @@ from basemodel import (
...
@@ -34,8 +34,8 @@ from basemodel import (
import
model_frcnn
import
model_frcnn
import
model_mrcnn
import
model_mrcnn
from
model_frcnn
import
(
from
model_frcnn
import
(
sample_fast_rcnn_targets
,
sample_fast_rcnn_targets
,
fastrcnn_outputs
,
fastrcnn_
outputs
,
fastrcnn_prediction
s
,
FastRCNNHead
)
fastrcnn_
predictions
,
BoxProposal
s
,
FastRCNNHead
)
from
model_mrcnn
import
maskrcnn_upXconv_head
,
maskrcnn_loss
from
model_mrcnn
import
maskrcnn_upXconv_head
,
maskrcnn_loss
from
model_rpn
import
rpn_head
,
rpn_losses
,
generate_rpn_proposals
from
model_rpn
import
rpn_head
,
rpn_losses
,
generate_rpn_proposals
from
model_fpn
import
(
from
model_fpn
import
(
...
@@ -72,27 +72,6 @@ class DetectionModel(ModelDesc):
...
@@ -72,27 +72,6 @@ class DetectionModel(ModelDesc):
opt
=
optimizer
.
AccumGradOptimizer
(
opt
,
8
//
cfg
.
TRAIN
.
NUM_GPUS
)
opt
=
optimizer
.
AccumGradOptimizer
(
opt
,
8
//
cfg
.
TRAIN
.
NUM_GPUS
)
return
opt
return
opt
def
fastrcnn_inference
(
self
,
image_shape2d
,
fastrcnn_head
):
"""
Args:
image_shape2d: h, w
fastrcnn_head (FastRCNNHead):
Returns:
boxes (mx4):
labels (m): each >= 1
"""
decoded_boxes
=
fastrcnn_head
.
decoded_output_boxes
()
decoded_boxes
=
clip_boxes
(
decoded_boxes
,
image_shape2d
,
name
=
'fastrcnn_all_boxes'
)
label_probs
=
fastrcnn_head
.
output_scores
(
name
=
'fastrcnn_all_probs'
)
# indices: Nx2. Each index into (#box, #class)
pred_indices
,
final_probs
=
fastrcnn_predictions
(
decoded_boxes
,
label_probs
)
final_probs
=
tf
.
identity
(
final_probs
,
'final_probs'
)
final_boxes
=
tf
.
gather_nd
(
decoded_boxes
,
pred_indices
,
name
=
'final_boxes'
)
final_labels
=
tf
.
gather
(
pred_indices
,
1
,
axis
=
1
,
name
=
'final_labels'
)
return
final_boxes
,
final_labels
def
get_inference_tensor_names
(
self
):
def
get_inference_tensor_names
(
self
):
"""
"""
Returns two lists of tensor names to be used to create an inference callable.
Returns two lists of tensor names to be used to create an inference callable.
...
@@ -101,9 +80,9 @@ class DetectionModel(ModelDesc):
...
@@ -101,9 +80,9 @@ class DetectionModel(ModelDesc):
[str]: input names
[str]: input names
[str]: output names
[str]: output names
"""
"""
out
=
[
'
final_boxes'
,
'final_probs'
,
'final_
labels'
]
out
=
[
'
output/boxes'
,
'output/scores'
,
'output/
labels'
]
if
cfg
.
MODE_MASK
:
if
cfg
.
MODE_MASK
:
out
.
append
(
'
final_
masks'
)
out
.
append
(
'
output/
masks'
)
return
[
'image'
],
out
return
[
'image'
],
out
...
@@ -144,16 +123,13 @@ class ResNetC4Model(DetectionModel):
...
@@ -144,16 +123,13 @@ class ResNetC4Model(DetectionModel):
gt_boxes
,
gt_labels
=
inputs
[
'gt_boxes'
],
inputs
[
'gt_labels'
]
gt_boxes
,
gt_labels
=
inputs
[
'gt_boxes'
],
inputs
[
'gt_labels'
]
if
is_training
:
if
is_training
:
# sample proposal boxes in training
# sample proposal boxes in training
rcnn_boxes
,
rcnn_labels
,
fg_inds_wrt_gt
=
sample_fast_rcnn_targets
(
proposals
=
sample_fast_rcnn_targets
(
proposal_boxes
,
gt_boxes
,
gt_labels
)
proposal_boxes
,
gt_boxes
,
gt_labels
)
matched_gt_boxes
=
tf
.
gather
(
gt_boxes
,
fg_inds_wrt_gt
,
name
=
'gt_boxes_per_fg_proposal'
)
else
:
else
:
# The boxes to be used to crop RoIs.
# The boxes to be used to crop RoIs.
# Use all proposal boxes in inference
# Use all proposal boxes in inference
rcnn_boxes
=
proposal_boxes
proposals
=
BoxProposals
(
proposal_boxes
)
rcnn_labels
,
matched_gt_boxes
=
None
,
None
boxes_on_featuremap
=
rcnn_
boxes
*
(
1.0
/
cfg
.
RPN
.
ANCHOR_STRIDE
)
boxes_on_featuremap
=
proposals
.
boxes
*
(
1.0
/
cfg
.
RPN
.
ANCHOR_STRIDE
)
roi_resized
=
roi_align
(
featuremap
,
boxes_on_featuremap
,
14
)
roi_resized
=
roi_align
(
featuremap
,
boxes_on_featuremap
,
14
)
feature_fastrcnn
=
resnet_conv5
(
roi_resized
,
cfg
.
BACKBONE
.
RESNET_NUM_BLOCK
[
-
1
])
# nxcx7x7
feature_fastrcnn
=
resnet_conv5
(
roi_resized
,
cfg
.
BACKBONE
.
RESNET_NUM_BLOCK
[
-
1
])
# nxcx7x7
...
@@ -161,9 +137,8 @@ class ResNetC4Model(DetectionModel):
...
@@ -161,9 +137,8 @@ class ResNetC4Model(DetectionModel):
feature_gap
=
GlobalAvgPooling
(
'gap'
,
feature_fastrcnn
,
data_format
=
'channels_first'
)
feature_gap
=
GlobalAvgPooling
(
'gap'
,
feature_fastrcnn
,
data_format
=
'channels_first'
)
fastrcnn_label_logits
,
fastrcnn_box_logits
=
fastrcnn_outputs
(
'fastrcnn'
,
feature_gap
,
cfg
.
DATA
.
NUM_CLASS
)
fastrcnn_label_logits
,
fastrcnn_box_logits
=
fastrcnn_outputs
(
'fastrcnn'
,
feature_gap
,
cfg
.
DATA
.
NUM_CLASS
)
fastrcnn_head
=
FastRCNNHead
(
rcnn_boxes
,
fastrcnn_box_logits
,
fastrcnn_label_logits
,
fastrcnn_head
=
FastRCNNHead
(
proposals
,
fastrcnn_box_logits
,
fastrcnn_label_logits
,
tf
.
constant
(
cfg
.
FRCNN
.
BBOX_REG_WEIGHTS
,
dtype
=
tf
.
float32
),
tf
.
constant
(
cfg
.
FRCNN
.
BBOX_REG_WEIGHTS
,
dtype
=
tf
.
float32
))
rcnn_labels
,
matched_gt_boxes
)
if
is_training
:
if
is_training
:
# rpn loss
# rpn loss
...
@@ -176,17 +151,17 @@ class ResNetC4Model(DetectionModel):
...
@@ -176,17 +151,17 @@ class ResNetC4Model(DetectionModel):
if
cfg
.
MODE_MASK
:
if
cfg
.
MODE_MASK
:
# maskrcnn loss
# maskrcnn loss
# In training, mask branch shares the same C5 feature.
# In training, mask branch shares the same C5 feature.
fg_feature
=
tf
.
gather
(
feature_fastrcnn
,
fastrcnn_head
.
fg_inds_in_input
s
())
fg_feature
=
tf
.
gather
(
feature_fastrcnn
,
proposals
.
fg_ind
s
())
mask_logits
=
maskrcnn_upXconv_head
(
mask_logits
=
maskrcnn_upXconv_head
(
'maskrcnn'
,
fg_feature
,
cfg
.
DATA
.
NUM_CATEGORY
,
num_convs
=
0
)
# #fg x #cat x 14x14
'maskrcnn'
,
fg_feature
,
cfg
.
DATA
.
NUM_CATEGORY
,
num_convs
=
0
)
# #fg x #cat x 14x14
target_masks_for_fg
=
crop_and_resize
(
target_masks_for_fg
=
crop_and_resize
(
tf
.
expand_dims
(
inputs
[
'gt_masks'
],
1
),
tf
.
expand_dims
(
inputs
[
'gt_masks'
],
1
),
fastrcnn_head
.
fg_input
_boxes
(),
proposals
.
fg
_boxes
(),
fg_inds_wrt_gt
,
14
,
proposals
.
fg_inds_wrt_gt
,
14
,
pad_border
=
False
)
# nfg x 1x14x14
pad_border
=
False
)
# nfg x 1x14x14
target_masks_for_fg
=
tf
.
squeeze
(
target_masks_for_fg
,
1
,
'sampled_fg_mask_targets'
)
target_masks_for_fg
=
tf
.
squeeze
(
target_masks_for_fg
,
1
,
'sampled_fg_mask_targets'
)
mrcnn_loss
=
maskrcnn_loss
(
mask_logits
,
fastrcnn_head
.
fg_labels
(),
target_masks_for_fg
)
mrcnn_loss
=
maskrcnn_loss
(
mask_logits
,
proposals
.
fg_labels
(),
target_masks_for_fg
)
else
:
else
:
mrcnn_loss
=
0.0
mrcnn_loss
=
0.0
...
@@ -201,7 +176,11 @@ class ResNetC4Model(DetectionModel):
...
@@ -201,7 +176,11 @@ class ResNetC4Model(DetectionModel):
add_moving_summary
(
total_cost
,
wd_cost
)
add_moving_summary
(
total_cost
,
wd_cost
)
return
total_cost
return
total_cost
else
:
else
:
final_boxes
,
final_labels
=
self
.
fastrcnn_inference
(
image_shape2d
,
fastrcnn_head
)
decoded_boxes
=
fastrcnn_head
.
decoded_output_boxes
()
decoded_boxes
=
clip_boxes
(
decoded_boxes
,
image_shape2d
,
name
=
'fastrcnn_all_boxes'
)
label_scores
=
fastrcnn_head
.
output_scores
(
name
=
'fastrcnn_all_scores'
)
final_boxes
,
final_scores
,
final_labels
=
fastrcnn_predictions
(
decoded_boxes
,
label_scores
,
name_scope
=
'output'
)
if
cfg
.
MODE_MASK
:
if
cfg
.
MODE_MASK
:
roi_resized
=
roi_align
(
featuremap
,
final_boxes
*
(
1.0
/
cfg
.
RPN
.
ANCHOR_STRIDE
),
14
)
roi_resized
=
roi_align
(
featuremap
,
final_boxes
*
(
1.0
/
cfg
.
RPN
.
ANCHOR_STRIDE
),
14
)
...
@@ -210,7 +189,7 @@ class ResNetC4Model(DetectionModel):
...
@@ -210,7 +189,7 @@ class ResNetC4Model(DetectionModel):
'maskrcnn'
,
feature_maskrcnn
,
cfg
.
DATA
.
NUM_CATEGORY
,
0
)
# #result x #cat x 14x14
'maskrcnn'
,
feature_maskrcnn
,
cfg
.
DATA
.
NUM_CATEGORY
,
0
)
# #result x #cat x 14x14
indices
=
tf
.
stack
([
tf
.
range
(
tf
.
size
(
final_labels
)),
tf
.
to_int32
(
final_labels
)
-
1
],
axis
=
1
)
indices
=
tf
.
stack
([
tf
.
range
(
tf
.
size
(
final_labels
)),
tf
.
to_int32
(
final_labels
)
-
1
],
axis
=
1
)
final_mask_logits
=
tf
.
gather_nd
(
mask_logits
,
indices
)
# #resultx14x14
final_mask_logits
=
tf
.
gather_nd
(
mask_logits
,
indices
)
# #resultx14x14
tf
.
sigmoid
(
final_mask_logits
,
name
=
'
final_
masks'
)
tf
.
sigmoid
(
final_mask_logits
,
name
=
'
output/
masks'
)
class
ResNetFPNModel
(
DetectionModel
):
class
ResNetFPNModel
(
DetectionModel
):
...
@@ -279,23 +258,18 @@ class ResNetFPNModel(DetectionModel):
...
@@ -279,23 +258,18 @@ class ResNetFPNModel(DetectionModel):
gt_boxes
,
gt_labels
=
inputs
[
'gt_boxes'
],
inputs
[
'gt_labels'
]
gt_boxes
,
gt_labels
=
inputs
[
'gt_boxes'
],
inputs
[
'gt_labels'
]
if
is_training
:
if
is_training
:
rcnn_boxes
,
rcnn_labels
,
fg_inds_wrt_gt
=
sample_fast_rcnn_targets
(
proposals
=
sample_fast_rcnn_targets
(
proposal_boxes
,
gt_boxes
,
gt_labels
)
proposal_boxes
,
gt_boxes
,
gt_labels
)
matched_gt_boxes
=
tf
.
gather
(
gt_boxes
,
fg_inds_wrt_gt
)
else
:
else
:
# The boxes to be used to crop RoIs.
proposals
=
BoxProposals
(
proposal_boxes
)
rcnn_boxes
=
proposal_boxes
rcnn_labels
,
matched_gt_boxes
=
None
,
None
roi_feature_fastrcnn
=
multilevel_roi_align
(
p23456
[:
4
],
rcnn_
boxes
,
7
)
roi_feature_fastrcnn
=
multilevel_roi_align
(
p23456
[:
4
],
proposals
.
boxes
,
7
)
fastrcnn_head_func
=
getattr
(
model_frcnn
,
cfg
.
FPN
.
FRCNN_HEAD_FUNC
)
fastrcnn_head_func
=
getattr
(
model_frcnn
,
cfg
.
FPN
.
FRCNN_HEAD_FUNC
)
head_feature
=
fastrcnn_head_func
(
'fastrcnn'
,
roi_feature_fastrcnn
)
head_feature
=
fastrcnn_head_func
(
'fastrcnn'
,
roi_feature_fastrcnn
)
fastrcnn_label_logits
,
fastrcnn_box_logits
=
fastrcnn_outputs
(
fastrcnn_label_logits
,
fastrcnn_box_logits
=
fastrcnn_outputs
(
'fastrcnn/outputs'
,
head_feature
,
cfg
.
DATA
.
NUM_CLASS
)
'fastrcnn/outputs'
,
head_feature
,
cfg
.
DATA
.
NUM_CLASS
)
fastrcnn_head
=
FastRCNNHead
(
rcnn_boxes
,
fastrcnn_box_logits
,
fastrcnn_label_logits
,
fastrcnn_head
=
FastRCNNHead
(
proposals
,
fastrcnn_box_logits
,
fastrcnn_label_logits
,
tf
.
constant
(
cfg
.
FRCNN
.
BBOX_REG_WEIGHTS
,
dtype
=
tf
.
float32
),
tf
.
constant
(
cfg
.
FRCNN
.
BBOX_REG_WEIGHTS
,
dtype
=
tf
.
float32
))
rcnn_labels
,
matched_gt_boxes
)
if
is_training
:
if
is_training
:
# rpn loss:
# rpn loss:
...
@@ -307,7 +281,7 @@ class ResNetFPNModel(DetectionModel):
...
@@ -307,7 +281,7 @@ class ResNetFPNModel(DetectionModel):
if
cfg
.
MODE_MASK
:
if
cfg
.
MODE_MASK
:
# maskrcnn loss
# maskrcnn loss
roi_feature_maskrcnn
=
multilevel_roi_align
(
roi_feature_maskrcnn
=
multilevel_roi_align
(
p23456
[:
4
],
fastrcnn_head
.
fg_input
_boxes
(),
14
,
p23456
[:
4
],
proposals
.
fg
_boxes
(),
14
,
name_scope
=
'multilevel_roi_align_mask'
)
name_scope
=
'multilevel_roi_align_mask'
)
maskrcnn_head_func
=
getattr
(
model_mrcnn
,
cfg
.
FPN
.
MRCNN_HEAD_FUNC
)
maskrcnn_head_func
=
getattr
(
model_mrcnn
,
cfg
.
FPN
.
MRCNN_HEAD_FUNC
)
mask_logits
=
maskrcnn_head_func
(
mask_logits
=
maskrcnn_head_func
(
...
@@ -315,11 +289,11 @@ class ResNetFPNModel(DetectionModel):
...
@@ -315,11 +289,11 @@ class ResNetFPNModel(DetectionModel):
target_masks_for_fg
=
crop_and_resize
(
target_masks_for_fg
=
crop_and_resize
(
tf
.
expand_dims
(
inputs
[
'gt_masks'
],
1
),
tf
.
expand_dims
(
inputs
[
'gt_masks'
],
1
),
fastrcnn_head
.
fg_input
_boxes
(),
proposals
.
fg
_boxes
(),
fg_inds_wrt_gt
,
28
,
proposals
.
fg_inds_wrt_gt
,
28
,
pad_border
=
False
)
# fg x 1x28x28
pad_border
=
False
)
# fg x 1x28x28
target_masks_for_fg
=
tf
.
squeeze
(
target_masks_for_fg
,
1
,
'sampled_fg_mask_targets'
)
target_masks_for_fg
=
tf
.
squeeze
(
target_masks_for_fg
,
1
,
'sampled_fg_mask_targets'
)
mrcnn_loss
=
maskrcnn_loss
(
mask_logits
,
fastrcnn_head
.
fg_labels
(),
target_masks_for_fg
)
mrcnn_loss
=
maskrcnn_loss
(
mask_logits
,
proposals
.
fg_labels
(),
target_masks_for_fg
)
else
:
else
:
mrcnn_loss
=
0.0
mrcnn_loss
=
0.0
...
@@ -333,7 +307,11 @@ class ResNetFPNModel(DetectionModel):
...
@@ -333,7 +307,11 @@ class ResNetFPNModel(DetectionModel):
add_moving_summary
(
total_cost
,
wd_cost
)
add_moving_summary
(
total_cost
,
wd_cost
)
return
total_cost
return
total_cost
else
:
else
:
final_boxes
,
final_labels
=
self
.
fastrcnn_inference
(
image_shape2d
,
fastrcnn_head
)
decoded_boxes
=
fastrcnn_head
.
decoded_output_boxes
()
decoded_boxes
=
clip_boxes
(
decoded_boxes
,
image_shape2d
,
name
=
'fastrcnn_all_boxes'
)
label_scores
=
fastrcnn_head
.
output_scores
(
name
=
'fastrcnn_all_scores'
)
final_boxes
,
final_scores
,
final_labels
=
fastrcnn_predictions
(
decoded_boxes
,
label_scores
,
name_scope
=
'output'
)
if
cfg
.
MODE_MASK
:
if
cfg
.
MODE_MASK
:
# Cascade inference needs roi transform with refined boxes.
# Cascade inference needs roi transform with refined boxes.
roi_feature_maskrcnn
=
multilevel_roi_align
(
p23456
[:
4
],
final_boxes
,
14
)
roi_feature_maskrcnn
=
multilevel_roi_align
(
p23456
[:
4
],
final_boxes
,
14
)
...
@@ -342,7 +320,7 @@ class ResNetFPNModel(DetectionModel):
...
@@ -342,7 +320,7 @@ class ResNetFPNModel(DetectionModel):
'maskrcnn'
,
roi_feature_maskrcnn
,
cfg
.
DATA
.
NUM_CATEGORY
)
# #fg x #cat x 28 x 28
'maskrcnn'
,
roi_feature_maskrcnn
,
cfg
.
DATA
.
NUM_CATEGORY
)
# #fg x #cat x 28 x 28
indices
=
tf
.
stack
([
tf
.
range
(
tf
.
size
(
final_labels
)),
tf
.
to_int32
(
final_labels
)
-
1
],
axis
=
1
)
indices
=
tf
.
stack
([
tf
.
range
(
tf
.
size
(
final_labels
)),
tf
.
to_int32
(
final_labels
)
-
1
],
axis
=
1
)
final_mask_logits
=
tf
.
gather_nd
(
mask_logits
,
indices
)
# #resultx28x28
final_mask_logits
=
tf
.
gather_nd
(
mask_logits
,
indices
)
# #resultx28x28
tf
.
sigmoid
(
final_mask_logits
,
name
=
'
final_
masks'
)
tf
.
sigmoid
(
final_mask_logits
,
name
=
'
output/
masks'
)
def
visualize
(
model
,
model_path
,
nr_visualize
=
100
,
output_dir
=
'output'
):
def
visualize
(
model
,
model_path
,
nr_visualize
=
100
,
output_dir
=
'output'
):
...
@@ -358,11 +336,11 @@ def visualize(model, model_path, nr_visualize=100, output_dir='output'):
...
@@ -358,11 +336,11 @@ def visualize(model, model_path, nr_visualize=100, output_dir='output'):
input_names
=
[
'image'
,
'gt_boxes'
,
'gt_labels'
],
input_names
=
[
'image'
,
'gt_boxes'
,
'gt_labels'
],
output_names
=
[
output_names
=
[
'generate_{}_proposals/boxes'
.
format
(
'fpn'
if
cfg
.
MODE_FPN
else
'rpn'
),
'generate_{}_proposals/boxes'
.
format
(
'fpn'
if
cfg
.
MODE_FPN
else
'rpn'
),
'generate_{}_proposals/
prob
s'
.
format
(
'fpn'
if
cfg
.
MODE_FPN
else
'rpn'
),
'generate_{}_proposals/
score
s'
.
format
(
'fpn'
if
cfg
.
MODE_FPN
else
'rpn'
),
'fastrcnn_all_
prob
s'
,
'fastrcnn_all_
score
s'
,
'
final_
boxes'
,
'
output/
boxes'
,
'
final_prob
s'
,
'
output/score
s'
,
'
final_
labels'
,
'
output/
labels'
,
]))
]))
if
os
.
path
.
isdir
(
output_dir
):
if
os
.
path
.
isdir
(
output_dir
):
...
@@ -376,18 +354,18 @@ def visualize(model, model_path, nr_visualize=100, output_dir='output'):
...
@@ -376,18 +354,18 @@ def visualize(model, model_path, nr_visualize=100, output_dir='output'):
else
:
else
:
gt_boxes
,
gt_labels
=
dp
[
-
2
:]
gt_boxes
,
gt_labels
=
dp
[
-
2
:]
rpn_boxes
,
rpn_scores
,
all_
prob
s
,
\
rpn_boxes
,
rpn_scores
,
all_
score
s
,
\
final_boxes
,
final_
prob
s
,
final_labels
=
pred
(
img
,
gt_boxes
,
gt_labels
)
final_boxes
,
final_
score
s
,
final_labels
=
pred
(
img
,
gt_boxes
,
gt_labels
)
# draw groundtruth boxes
# draw groundtruth boxes
gt_viz
=
draw_annotation
(
img
,
gt_boxes
,
gt_labels
)
gt_viz
=
draw_annotation
(
img
,
gt_boxes
,
gt_labels
)
# draw best proposals for each groundtruth, to show recall
# draw best proposals for each groundtruth, to show recall
proposal_viz
,
good_proposals_ind
=
draw_proposal_recall
(
img
,
rpn_boxes
,
rpn_scores
,
gt_boxes
)
proposal_viz
,
good_proposals_ind
=
draw_proposal_recall
(
img
,
rpn_boxes
,
rpn_scores
,
gt_boxes
)
# draw the scores for the above proposals
# draw the scores for the above proposals
score_viz
=
draw_predictions
(
img
,
rpn_boxes
[
good_proposals_ind
],
all_
prob
s
[
good_proposals_ind
])
score_viz
=
draw_predictions
(
img
,
rpn_boxes
[
good_proposals_ind
],
all_
score
s
[
good_proposals_ind
])
results
=
[
DetectionResult
(
*
args
)
for
args
in
results
=
[
DetectionResult
(
*
args
)
for
args
in
zip
(
final_boxes
,
final_
prob
s
,
final_labels
,
zip
(
final_boxes
,
final_
score
s
,
final_labels
,
[
None
]
*
len
(
final_labels
))]
[
None
]
*
len
(
final_labels
))]
final_viz
=
draw_final_outputs
(
img
,
results
)
final_viz
=
draw_final_outputs
(
img
,
results
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment