Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
S
seminar-breakout
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Shashank Suhas
seminar-breakout
Commits
9b1d1095
Commit
9b1d1095
authored
Jun 26, 2018
by
Yuxin Wu
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[MaskRCNN] use attrdict for config
parent
4f1efe74
Changes
10
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
242 additions
and
219 deletions
+242
-219
examples/FasterRCNN/basemodel.py
examples/FasterRCNN/basemodel.py
+5
-5
examples/FasterRCNN/coco.py
examples/FasterRCNN/coco.py
+4
-4
examples/FasterRCNN/common.py
examples/FasterRCNN/common.py
+0
-22
examples/FasterRCNN/config.py
examples/FasterRCNN/config.py
+101
-55
examples/FasterRCNN/data.py
examples/FasterRCNN/data.py
+28
-28
examples/FasterRCNN/eval.py
examples/FasterRCNN/eval.py
+6
-6
examples/FasterRCNN/model.py
examples/FasterRCNN/model.py
+20
-20
examples/FasterRCNN/model_box.py
examples/FasterRCNN/model_box.py
+4
-4
examples/FasterRCNN/train.py
examples/FasterRCNN/train.py
+69
-70
examples/FasterRCNN/viz.py
examples/FasterRCNN/viz.py
+5
-5
No files found.
examples/FasterRCNN/basemodel.py
View file @
9b1d1095
...
@@ -9,20 +9,20 @@ from tensorpack.tfutils.varreplace import custom_getter_scope
...
@@ -9,20 +9,20 @@ from tensorpack.tfutils.varreplace import custom_getter_scope
from
tensorpack.models
import
(
from
tensorpack.models
import
(
Conv2D
,
MaxPooling
,
BatchNorm
,
BNReLU
)
Conv2D
,
MaxPooling
,
BatchNorm
,
BNReLU
)
import
confi
g
from
config
import
config
as
cf
g
def
maybe_freeze_affine
(
getter
,
*
args
,
**
kwargs
):
def
maybe_freeze_affine
(
getter
,
*
args
,
**
kwargs
):
# custom getter to freeze affine params inside bn
# custom getter to freeze affine params inside bn
name
=
args
[
0
]
if
len
(
args
)
else
kwargs
.
get
(
'name'
)
name
=
args
[
0
]
if
len
(
args
)
else
kwargs
.
get
(
'name'
)
if
name
.
endswith
(
'/gamma'
)
or
name
.
endswith
(
'/beta'
):
if
name
.
endswith
(
'/gamma'
)
or
name
.
endswith
(
'/beta'
):
if
c
onfig
.
FREEZE_AFFINE
:
if
c
fg
.
BACKBONE
.
FREEZE_AFFINE
:
kwargs
[
'trainable'
]
=
False
kwargs
[
'trainable'
]
=
False
return
getter
(
*
args
,
**
kwargs
)
return
getter
(
*
args
,
**
kwargs
)
def
maybe_reverse_pad
(
topleft
,
bottomright
):
def
maybe_reverse_pad
(
topleft
,
bottomright
):
if
c
onfig
.
TF_PAD_MODE
:
if
c
fg
.
BACKBONE
.
TF_PAD_MODE
:
return
[
topleft
,
bottomright
]
return
[
topleft
,
bottomright
]
return
[
bottomright
,
topleft
]
return
[
bottomright
,
topleft
]
...
@@ -65,7 +65,7 @@ def resnet_shortcut(l, n_out, stride, activation=tf.identity):
...
@@ -65,7 +65,7 @@ def resnet_shortcut(l, n_out, stride, activation=tf.identity):
n_in
=
l
.
get_shape
()
.
as_list
()[
1
if
data_format
in
[
'NCHW'
,
'channels_first'
]
else
3
]
n_in
=
l
.
get_shape
()
.
as_list
()[
1
if
data_format
in
[
'NCHW'
,
'channels_first'
]
else
3
]
if
n_in
!=
n_out
:
# change dimension when channel is not the same
if
n_in
!=
n_out
:
# change dimension when channel is not the same
# TF's SAME mode output ceil(x/stride), which is NOT what we want when x is odd and stride is 2
# TF's SAME mode output ceil(x/stride), which is NOT what we want when x is odd and stride is 2
if
not
c
onfi
g
.
MODE_FPN
and
stride
==
2
:
if
not
c
f
g
.
MODE_FPN
and
stride
==
2
:
l
=
l
[:,
:,
:
-
1
,
:
-
1
]
l
=
l
[:,
:,
:
-
1
,
:
-
1
]
return
Conv2D
(
'convshortcut'
,
l
,
n_out
,
1
,
return
Conv2D
(
'convshortcut'
,
l
,
n_out
,
1
,
strides
=
stride
,
padding
=
'VALID'
,
activation
=
activation
)
strides
=
stride
,
padding
=
'VALID'
,
activation
=
activation
)
...
@@ -124,7 +124,7 @@ def resnet_conv5(image, num_block):
...
@@ -124,7 +124,7 @@ def resnet_conv5(image, num_block):
def
resnet_fpn_backbone
(
image
,
num_blocks
,
freeze_c2
=
True
):
def
resnet_fpn_backbone
(
image
,
num_blocks
,
freeze_c2
=
True
):
shape2d
=
tf
.
shape
(
image
)[
2
:]
shape2d
=
tf
.
shape
(
image
)[
2
:]
mult
=
float
(
c
onfig
.
FPN_
RESOLUTION_REQUIREMENT
)
mult
=
float
(
c
fg
.
FPN
.
RESOLUTION_REQUIREMENT
)
new_shape2d
=
tf
.
to_int32
(
tf
.
ceil
(
tf
.
to_float
(
shape2d
)
/
mult
)
*
mult
)
new_shape2d
=
tf
.
to_int32
(
tf
.
ceil
(
tf
.
to_float
(
shape2d
)
/
mult
)
*
mult
)
pad_shape2d
=
new_shape2d
-
shape2d
pad_shape2d
=
new_shape2d
-
shape2d
assert
len
(
num_blocks
)
==
4
,
num_blocks
assert
len
(
num_blocks
)
==
4
,
num_blocks
...
...
examples/FasterRCNN/coco.py
View file @
9b1d1095
...
@@ -12,13 +12,13 @@ from tensorpack.utils.timer import timed_operation
...
@@ -12,13 +12,13 @@ from tensorpack.utils.timer import timed_operation
from
tensorpack.utils.argtools
import
log_once
from
tensorpack.utils.argtools
import
log_once
from
pycocotools.coco
import
COCO
from
pycocotools.coco
import
COCO
import
confi
g
from
config
import
config
as
cf
g
__all__
=
[
'COCODetection'
,
'COCOMeta'
]
__all__
=
[
'COCODetection'
,
'COCOMeta'
]
COCO_NUM_CATEGORY
=
80
COCO_NUM_CATEGORY
=
80
c
onfig
.
NUM_CLASS
=
COCO_NUM_CATEGORY
+
1
c
fg
.
DATA
.
NUM_CLASS
=
COCO_NUM_CATEGORY
+
1
class
_COCOMeta
(
object
):
class
_COCOMeta
(
object
):
...
@@ -48,7 +48,7 @@ class _COCOMeta(object):
...
@@ -48,7 +48,7 @@ class _COCOMeta(object):
v
:
i
+
1
for
i
,
v
in
enumerate
(
cat_ids
)}
v
:
i
+
1
for
i
,
v
in
enumerate
(
cat_ids
)}
self
.
class_id_to_category_id
=
{
self
.
class_id_to_category_id
=
{
v
:
k
for
k
,
v
in
self
.
category_id_to_class_id
.
items
()}
v
:
k
for
k
,
v
in
self
.
category_id_to_class_id
.
items
()}
c
onfig
.
CLASS_NAMES
=
self
.
class_names
c
fg
.
DATA
.
CLASS_NAMES
=
self
.
class_names
COCOMeta
=
_COCOMeta
()
COCOMeta
=
_COCOMeta
()
...
@@ -200,7 +200,7 @@ class COCODetection(object):
...
@@ -200,7 +200,7 @@ class COCODetection(object):
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
c
=
COCODetection
(
c
onfig
.
BASEDIR
,
'train2014'
)
c
=
COCODetection
(
c
fg
.
DATA
.
BASEDIR
,
'train2014'
)
gt_boxes
=
c
.
load
(
add_gt
=
True
,
add_mask
=
True
)
gt_boxes
=
c
.
load
(
add_gt
=
True
,
add_mask
=
True
)
print
(
"#Images:"
,
len
(
gt_boxes
))
print
(
"#Images:"
,
len
(
gt_boxes
))
c
.
print_class_histogram
(
gt_boxes
)
c
.
print_class_histogram
(
gt_boxes
)
examples/FasterRCNN/common.py
View file @
9b1d1095
...
@@ -2,17 +2,13 @@
...
@@ -2,17 +2,13 @@
# File: common.py
# File: common.py
import
numpy
as
np
import
numpy
as
np
import
six
import
cv2
import
cv2
from
tensorpack.dataflow
import
RNGDataFlow
from
tensorpack.dataflow
import
RNGDataFlow
from
tensorpack.dataflow.imgaug
import
transform
from
tensorpack.dataflow.imgaug
import
transform
from
tensorpack.utils
import
logger
import
pycocotools.mask
as
cocomask
import
pycocotools.mask
as
cocomask
import
config
class
DataFromListOfDict
(
RNGDataFlow
):
class
DataFromListOfDict
(
RNGDataFlow
):
def
__init__
(
self
,
lst
,
keys
,
shuffle
=
False
):
def
__init__
(
self
,
lst
,
keys
,
shuffle
=
False
):
...
@@ -138,21 +134,3 @@ def filter_boxes_inside_shape(boxes, shape):
...
@@ -138,21 +134,3 @@ def filter_boxes_inside_shape(boxes, shape):
(
boxes
[:,
2
]
<=
w
)
&
(
boxes
[:,
2
]
<=
w
)
&
(
boxes
[:,
3
]
<=
h
))[
0
]
(
boxes
[:,
3
]
<=
h
))[
0
]
return
indices
,
boxes
[
indices
,
:]
return
indices
,
boxes
[
indices
,
:]
def
write_config_from_args
(
configs
):
for
cfg
in
configs
:
k
,
v
=
cfg
.
split
(
'='
,
maxsplit
=
1
)
assert
k
in
dir
(
config
),
"Unknown config key: {}"
.
format
(
k
)
oldv
=
getattr
(
config
,
k
)
if
not
isinstance
(
oldv
,
six
.
text_type
):
v
=
eval
(
v
)
setattr
(
config
,
k
,
v
)
def
print_config
():
logger
.
info
(
"Config: ------------------------------------------"
)
for
k
in
dir
(
config
):
if
k
==
k
.
upper
():
logger
.
info
(
"{} = {}"
.
format
(
k
,
getattr
(
config
,
k
)))
logger
.
info
(
"--------------------------------------------------"
)
examples/FasterRCNN/config.py
View file @
9b1d1095
# -*- coding: utf-8 -*-
# -*- coding: utf-8 -*-
# File: config.py
# File: config.py
import
numpy
as
np
import
pprint
__all__
=
[
'config'
]
class
AttrDict
():
def
__getattr__
(
self
,
name
):
ret
=
AttrDict
()
setattr
(
self
,
name
,
ret
)
return
ret
def
__str__
(
self
):
return
pprint
.
pformat
(
self
.
to_dict
(),
indent
=
1
)
__repr__
=
__str__
def
to_dict
(
self
):
"""Convert to a nested dict. """
return
{
k
:
v
.
to_dict
()
if
isinstance
(
v
,
AttrDict
)
else
v
for
k
,
v
in
self
.
__dict__
.
items
()}
def
update_args
(
self
,
args
):
"""Update from command line args. """
for
cfg
in
args
:
keys
,
v
=
cfg
.
split
(
'='
,
maxsplit
=
1
)
keylist
=
keys
.
split
(
'.'
)
dic
=
self
for
i
,
k
in
enumerate
(
keylist
[:
-
1
]):
assert
k
in
dir
(
dic
),
"Unknown config key: {}"
.
format
(
keys
)
dic
=
getattr
(
dic
,
k
)
key
=
keylist
[
-
1
]
oldv
=
getattr
(
dic
,
key
)
if
not
isinstance
(
oldv
,
str
):
v
=
eval
(
v
)
setattr
(
dic
,
key
,
v
)
config
=
AttrDict
()
_C
=
config
# short alias to avoid coding
# mode flags ---------------------
# mode flags ---------------------
TRAINER
=
'replicated'
# options: 'horovod', 'replicated'
_C
.
TRAINER
=
'replicated'
# options: 'horovod', 'replicated'
NUM_GPUS
=
None
# by default, will be set from code
_C
.
MODE_MASK
=
True
MODE_MASK
=
True
_C
.
MODE_FPN
=
False
MODE_FPN
=
False
# dataset -----------------------
# dataset -----------------------
BASEDIR
=
'/path/to/your/COCO/DIR'
_C
.
DATA
.
BASEDIR
=
'/path/to/your/COCO/DIR'
TRAIN_DATASET
=
[
'train2014'
,
'valminusminival2014'
]
# i.e., trainval35k
_C
.
DATA
.
TRAIN
=
[
'train2014'
,
'valminusminival2014'
]
# i.e., trainval35k
VAL_DATASET
=
'minival2014'
# For now, only support evaluation on single dataset
_C
.
DATA
.
VAL
=
'minival2014'
# For now, only support evaluation on single dataset
NUM_CLASS
=
81
# 1 background + 80 categories
_C
.
DATA
.
NUM_CLASS
=
81
# 1 background + 80 categories
CLASS_NAMES
=
[]
# NUM_CLASS strings. Needs to be populated later by data loader
_C
.
DATA
.
CLASS_NAMES
=
[]
# NUM_CLASS strings. Needs to be populated later by data loader
# basemodel ----------------------
# basemodel ----------------------
RESNET_NUM_BLOCK
=
[
3
,
4
,
6
,
3
]
# for resnet50
_C
.
BACKBONE
.
RESNET_NUM_BLOCK
=
[
3
,
4
,
6
,
3
]
# for resnet50
# RESNET_NUM_BLOCK = [3, 4, 23, 3] # for resnet101
# RESNET_NUM_BLOCK = [3, 4, 23, 3] # for resnet101
FREEZE_AFFINE
=
False
# do not train affine parameters inside BN
_C
.
BACKBONE
.
FREEZE_AFFINE
=
False
# do not train affine parameters inside BN
# Use a base model with TF-preferred pad mode,
# Use a base model with TF-preferred pad mode,
# which may pad more pixels on right/bottom than top/left.
# which may pad more pixels on right/bottom than top/left.
# TF_PAD_MODE=False is better for performance but will require a different base model.
# TF_PAD_MODE=False is better for performance but will require a different base model.
# See https://github.com/tensorflow/tensorflow/issues/18213
# See https://github.com/tensorflow/tensorflow/issues/18213
TF_PAD_MODE
=
True
_C
.
BACKBONE
.
TF_PAD_MODE
=
True
# schedule -----------------------
# schedule -----------------------
BASE_LR
=
1e-2
# The schedule and learning rate here is defined for a total batch size of 8.
WARMUP
=
1000
# in steps
# If not running with 8 GPUs, they will be adjusted automatically in code.
STEPS_PER_EPOCH
=
500
_C
.
TRAIN
.
NUM_GPUS
=
None
# by default, will be set from code
_C
.
TRAIN
.
WEIGHT_DECAY
=
1e-4
_C
.
TRAIN
.
BASE_LR
=
1e-2
_C
.
TRAIN
.
WARMUP
=
1000
# in steps
_C
.
TRAIN
.
STEPS_PER_EPOCH
=
500
# LR_SCHEDULE = [120000, 160000, 180000] # "1x" schedule in detectron
# LR_SCHEDULE = [120000, 160000, 180000] # "1x" schedule in detectron
# LR_SCHEDULE = [150000, 230000, 280000] # roughly a "1.5x" schedule
# LR_SCHEDULE = [150000, 230000, 280000] # roughly a "1.5x" schedule
LR_SCHEDULE
=
[
240000
,
320000
,
360000
]
# "2x" schedule in detectron
_C
.
TRAIN
.
LR_SCHEDULE
=
[
240000
,
320000
,
360000
]
# "2x" schedule in detectron
#
image resolution
--------------------
#
preprocessing
--------------------
SHORT_EDGE_SIZE
=
800
_C
.
PREPROC
.
SHORT_EDGE_SIZE
=
800
MAX_SIZE
=
1333
_C
.
PREPROC
.
MAX_SIZE
=
1333
# Alternative (worse & faster) setting: 600, 1024
# Alternative
old
(worse & faster) setting: 600, 1024
# anchors -------------------------
# anchors -------------------------
ANCHOR_STRIDE
=
16
_C
.
RPN
.
ANCHOR_STRIDE
=
16
ANCHOR_STRIDES_FPN
=
(
4
,
8
,
16
,
32
,
64
)
# strides for each FPN level. Must be the same length as ANCHOR_SIZES
_C
.
RPN
.
ANCHOR_SIZES
=
(
32
,
64
,
128
,
256
,
512
)
# sqrtarea of the anchor box
FPN_RESOLUTION_REQUIREMENT
=
32
# image size into the backbone has to be multiple of this number
_C
.
RPN
.
ANCHOR_RATIOS
=
(
0.5
,
1.
,
2.
)
ANCHOR_SIZES
=
(
32
,
64
,
128
,
256
,
512
)
# sqrtarea of the anchor box
_C
.
RPN
.
NUM_ANCHOR
=
len
(
_C
.
RPN
.
ANCHOR_SIZES
)
*
len
(
_C
.
RPN
.
ANCHOR_RATIOS
)
ANCHOR_RATIOS
=
(
0.5
,
1.
,
2.
)
_C
.
RPN
.
POSITIVE_ANCHOR_THRES
=
0.7
NUM_ANCHOR
=
len
(
ANCHOR_SIZES
)
*
len
(
ANCHOR_RATIOS
)
_C
.
RPN
.
NEGATIVE_ANCHOR_THRES
=
0.3
POSITIVE_ANCHOR_THRES
=
0.7
NEGATIVE_ANCHOR_THRES
=
0.3
BBOX_DECODE_CLIP
=
np
.
log
(
MAX_SIZE
/
16.0
)
# to avoid too large numbers.
# rpn training -------------------------
# rpn training -------------------------
RPN_FG_RATIO
=
0.5
# fg ratio among selected RPN anchors
_C
.
RPN
.
FG_RATIO
=
0.5
# fg ratio among selected RPN anchors
RPN_BATCH_PER_IM
=
256
# total (across FPN levels) number of anchors that are marked valid
_C
.
RPN
.
BATCH_PER_IM
=
256
# total (across FPN levels) number of anchors that are marked valid
RPN_MIN_SIZE
=
0
_C
.
RPN
.
MIN_SIZE
=
0
RPN_PROPOSAL_NMS_THRESH
=
0.7
_C
.
RPN
.
PROPOSAL_NMS_THRESH
=
0.7
TRAIN_PRE_NMS_TOPK
=
12000
_C
.
RPN
.
TRAIN_PRE_NMS_TOPK
=
12000
TRAIN_POST_NMS_TOPK
=
2000
_C
.
RPN
.
TRAIN_POST_NMS_TOPK
=
2000
TRAIN_FPN_NMS_TOPK
=
2000
_C
.
RPN
.
CROWD_OVERLAP_THRES
=
0.7
# boxes overlapping crowd will be ignored.
CROWD_OVERLAP_THRES
=
0.7
# boxes overlapping crowd will be ignored.
# fastrcnn training ---------------------
# fastrcnn training ---------------------
FASTRCNN_
BATCH_PER_IM
=
512
_C
.
FRCNN
.
BATCH_PER_IM
=
512
FASTRCNN_
BBOX_REG_WEIGHTS
=
[
10.
,
10.
,
5.
,
5.
]
# Better but non-standard setting: [20, 20, 10, 10]
_C
.
FRCNN
.
BBOX_REG_WEIGHTS
=
[
10.
,
10.
,
5.
,
5.
]
# Better but non-standard setting: [20, 20, 10, 10]
FASTRCNN_
FG_THRESH
=
0.5
_C
.
FRCNN
.
FG_THRESH
=
0.5
FASTRCNN_
FG_RATIO
=
0.25
# fg ratio in a ROI batch
_C
.
FRCNN
.
FG_RATIO
=
0.25
# fg ratio in a ROI batch
# modeling -------------------------
# FPN -------------------------
FPN_NUM_CHANNEL
=
256
_C
.
FPN
.
ANCHOR_STRIDES
=
(
4
,
8
,
16
,
32
,
64
)
# strides for each FPN level. Must be the same length as ANCHOR_SIZES
_C
.
FPN
.
RESOLUTION_REQUIREMENT
=
32
# image size into the backbone has to be multiple of this number
_C
.
FPN
.
NUM_CHANNEL
=
256
# conv head and fc head are only used in FPN.
# conv head and fc head are only used in FPN.
# For C4 models, the head is C5
# For C4 models, the head is C5
FPN_FASTRCNN_HEAD_FUNC
=
'fastrcnn_2fc_head'
# choices: fastrcnn_2fc_head, fastrcnn_4conv1fc_head
_C
.
FPN
.
FRCNN_HEAD_FUNC
=
'fastrcnn_2fc_head'
# choices: fastrcnn_2fc_head, fastrcnn_4conv1fc_head
FASTRCNN_CONV_HEAD_DIM
=
256
_C
.
FPN
.
FRCNN_CONV_HEAD_DIM
=
256
FASTRCNN_FC_HEAD_DIM
=
1024
_C
.
FPN
.
FRCNN_FC_HEAD_DIM
=
1024
MASKRCNN_HEAD_DIM
=
256
_C
.
RPN
.
TRAIN_FPN_NMS_TOPK
=
2000
_C
.
RPN
.
TEST_FPN_NMS_TOPK
=
1000
# Mask-RCNN
_C
.
MRCNN
.
HEAD_DIM
=
256
# testing -----------------------
# testing -----------------------
TEST_PRE_NMS_TOPK
=
6000
_C
.
RPN
.
TEST_PRE_NMS_TOPK
=
6000
TEST_POST_NMS_TOPK
=
1000
# if you encounter OOM in inference, set this to a smaller number
_C
.
RPN
.
TEST_POST_NMS_TOPK
=
1000
# if you encounter OOM in inference, set this to a smaller number
TEST_FPN_NMS_TOPK
=
1000
_C
.
TEST
.
FRCNN_NMS_THRESH
=
0.5
FASTRCNN_NMS_THRESH
=
0.5
_C
.
TEST
.
RESULT_SCORE_THRESH
=
0.05
RESULT_SCORE_THRESH
=
0.05
_C
.
TEST
.
RESULT_SCORE_THRESH_VIS
=
0.3
# only visualize confident results
RESULT_SCORE_THRESH_VIS
=
0.3
# only visualize confident results
_C
.
TEST
.
RESULTS_PER_IM
=
100
RESULTS_PER_IM
=
100
examples/FasterRCNN/data.py
View file @
9b1d1095
...
@@ -21,7 +21,7 @@ from utils.np_box_ops import area as np_area
...
@@ -21,7 +21,7 @@ from utils.np_box_ops import area as np_area
from
common
import
(
from
common
import
(
DataFromListOfDict
,
CustomResize
,
filter_boxes_inside_shape
,
DataFromListOfDict
,
CustomResize
,
filter_boxes_inside_shape
,
box_to_point8
,
point8_to_box
,
segmentation_to_mask
)
box_to_point8
,
point8_to_box
,
segmentation_to_mask
)
import
confi
g
from
config
import
config
as
cf
g
class
MalformedData
(
BaseException
):
class
MalformedData
(
BaseException
):
...
@@ -30,8 +30,8 @@ class MalformedData(BaseException):
...
@@ -30,8 +30,8 @@ class MalformedData(BaseException):
@
memoized
@
memoized
def
get_all_anchors
(
def
get_all_anchors
(
stride
=
c
onfig
.
ANCHOR_STRIDE
,
stride
=
c
fg
.
RPN
.
ANCHOR_STRIDE
,
sizes
=
c
onfig
.
ANCHOR_SIZES
):
sizes
=
c
fg
.
RPN
.
ANCHOR_SIZES
):
"""
"""
Get all anchors in the largest possible image, shifted, floatbox
Get all anchors in the largest possible image, shifted, floatbox
Args:
Args:
...
@@ -49,14 +49,14 @@ def get_all_anchors(
...
@@ -49,14 +49,14 @@ def get_all_anchors(
cell_anchors
=
generate_anchors
(
cell_anchors
=
generate_anchors
(
stride
,
stride
,
scales
=
np
.
array
(
sizes
,
dtype
=
np
.
float
)
/
stride
,
scales
=
np
.
array
(
sizes
,
dtype
=
np
.
float
)
/
stride
,
ratios
=
np
.
array
(
c
onfig
.
ANCHOR_RATIOS
,
dtype
=
np
.
float
))
ratios
=
np
.
array
(
c
fg
.
RPN
.
ANCHOR_RATIOS
,
dtype
=
np
.
float
))
# anchors are intbox here.
# anchors are intbox here.
# anchors at featuremap [0,0] are centered at fpcoor (8,8) (half of stride)
# anchors at featuremap [0,0] are centered at fpcoor (8,8) (half of stride)
max_size
=
c
onfig
.
MAX_SIZE
max_size
=
c
fg
.
PREPROC
.
MAX_SIZE
if
c
onfi
g
.
MODE_FPN
:
if
c
f
g
.
MODE_FPN
:
# TODO setting this in config is perhaps better
# TODO setting this in config is perhaps better
size_mult
=
c
onfig
.
FPN_
RESOLUTION_REQUIREMENT
*
1.
size_mult
=
c
fg
.
FPN
.
RESOLUTION_REQUIREMENT
*
1.
max_size
=
np
.
ceil
(
max_size
/
size_mult
)
*
size_mult
max_size
=
np
.
ceil
(
max_size
/
size_mult
)
*
size_mult
field_size
=
int
(
np
.
ceil
(
max_size
/
stride
))
field_size
=
int
(
np
.
ceil
(
max_size
/
stride
))
shifts
=
np
.
arange
(
0
,
field_size
)
*
stride
shifts
=
np
.
arange
(
0
,
field_size
)
*
stride
...
@@ -81,8 +81,8 @@ def get_all_anchors(
...
@@ -81,8 +81,8 @@ def get_all_anchors(
@
memoized
@
memoized
def
get_all_anchors_fpn
(
def
get_all_anchors_fpn
(
strides
=
c
onfig
.
ANCHOR_STRIDES_FPN
,
strides
=
c
fg
.
FPN
.
ANCHOR_STRIDES
,
sizes
=
c
onfig
.
ANCHOR_SIZES
):
sizes
=
c
fg
.
RPN
.
ANCHOR_SIZES
):
"""
"""
Returns:
Returns:
[anchors]: each anchors is a SxSx NUM_ANCHOR_RATIOS x4 array.
[anchors]: each anchors is a SxSx NUM_ANCHOR_RATIOS x4 array.
...
@@ -132,8 +132,8 @@ def get_anchor_labels(anchors, gt_boxes, crowd_boxes):
...
@@ -132,8 +132,8 @@ def get_anchor_labels(anchors, gt_boxes, crowd_boxes):
# the order of setting neg/pos labels matter
# the order of setting neg/pos labels matter
anchor_labels
[
anchors_with_max_iou_per_gt
]
=
1
anchor_labels
[
anchors_with_max_iou_per_gt
]
=
1
anchor_labels
[
ious_max_per_anchor
>=
c
onfig
.
POSITIVE_ANCHOR_THRES
]
=
1
anchor_labels
[
ious_max_per_anchor
>=
c
fg
.
RPN
.
POSITIVE_ANCHOR_THRES
]
=
1
anchor_labels
[
ious_max_per_anchor
<
c
onfig
.
NEGATIVE_ANCHOR_THRES
]
=
0
anchor_labels
[
ious_max_per_anchor
<
c
fg
.
RPN
.
NEGATIVE_ANCHOR_THRES
]
=
0
# We can label all non-ignore candidate boxes which overlap crowd as ignore
# We can label all non-ignore candidate boxes which overlap crowd as ignore
# But detectron did not do this.
# But detectron did not do this.
...
@@ -141,11 +141,11 @@ def get_anchor_labels(anchors, gt_boxes, crowd_boxes):
...
@@ -141,11 +141,11 @@ def get_anchor_labels(anchors, gt_boxes, crowd_boxes):
# cand_inds = np.where(anchor_labels >= 0)[0]
# cand_inds = np.where(anchor_labels >= 0)[0]
# cand_anchors = anchors[cand_inds]
# cand_anchors = anchors[cand_inds]
# ious = np_iou(cand_anchors, crowd_boxes)
# ious = np_iou(cand_anchors, crowd_boxes)
# overlap_with_crowd = cand_inds[ious.max(axis=1) > c
onfig
.CROWD_OVERLAP_THRES]
# overlap_with_crowd = cand_inds[ious.max(axis=1) > c
fg.RPN
.CROWD_OVERLAP_THRES]
# anchor_labels[overlap_with_crowd] = -1
# anchor_labels[overlap_with_crowd] = -1
# Subsample fg labels: ignore some fg if fg is too many
# Subsample fg labels: ignore some fg if fg is too many
target_num_fg
=
int
(
c
onfig
.
RPN_BATCH_PER_IM
*
config
.
RPN_
FG_RATIO
)
target_num_fg
=
int
(
c
fg
.
RPN
.
BATCH_PER_IM
*
cfg
.
RPN
.
FG_RATIO
)
fg_inds
=
filter_box_label
(
anchor_labels
,
1
,
target_num_fg
)
fg_inds
=
filter_box_label
(
anchor_labels
,
1
,
target_num_fg
)
# Keep an image even if there is no foreground anchors
# Keep an image even if there is no foreground anchors
# if len(fg_inds) == 0:
# if len(fg_inds) == 0:
...
@@ -156,14 +156,14 @@ def get_anchor_labels(anchors, gt_boxes, crowd_boxes):
...
@@ -156,14 +156,14 @@ def get_anchor_labels(anchors, gt_boxes, crowd_boxes):
if
old_num_bg
==
0
:
if
old_num_bg
==
0
:
# No valid bg in this image, skip.
# No valid bg in this image, skip.
raise
MalformedData
(
"No valid background for RPN!"
)
raise
MalformedData
(
"No valid background for RPN!"
)
target_num_bg
=
c
onfig
.
RPN_
BATCH_PER_IM
-
len
(
fg_inds
)
target_num_bg
=
c
fg
.
RPN
.
BATCH_PER_IM
-
len
(
fg_inds
)
filter_box_label
(
anchor_labels
,
0
,
target_num_bg
)
# ignore return values
filter_box_label
(
anchor_labels
,
0
,
target_num_bg
)
# ignore return values
# Set anchor boxes: the best gt_box for each fg anchor
# Set anchor boxes: the best gt_box for each fg anchor
anchor_boxes
=
np
.
zeros
((
NA
,
4
),
dtype
=
'float32'
)
anchor_boxes
=
np
.
zeros
((
NA
,
4
),
dtype
=
'float32'
)
fg_boxes
=
gt_boxes
[
ious_argmax_per_anchor
[
fg_inds
],
:]
fg_boxes
=
gt_boxes
[
ious_argmax_per_anchor
[
fg_inds
],
:]
anchor_boxes
[
fg_inds
,
:]
=
fg_boxes
anchor_boxes
[
fg_inds
,
:]
=
fg_boxes
# assert len(fg_inds) + np.sum(anchor_labels == 0) == c
onfig.RPN_
BATCH_PER_IM
# assert len(fg_inds) + np.sum(anchor_labels == 0) == c
fg.RPN.
BATCH_PER_IM
return
anchor_labels
,
anchor_boxes
return
anchor_labels
,
anchor_boxes
...
@@ -192,12 +192,12 @@ def get_rpn_anchor_input(im, boxes, is_crowd):
...
@@ -192,12 +192,12 @@ def get_rpn_anchor_input(im, boxes, is_crowd):
# Fill them back to original size: fHxfWx1, fHxfWx4
# Fill them back to original size: fHxfWx1, fHxfWx4
anchorH
,
anchorW
=
all_anchors
.
shape
[:
2
]
anchorH
,
anchorW
=
all_anchors
.
shape
[:
2
]
featuremap_labels
=
-
np
.
ones
((
anchorH
*
anchorW
*
c
onfig
.
NUM_ANCHOR
,
),
dtype
=
'int32'
)
featuremap_labels
=
-
np
.
ones
((
anchorH
*
anchorW
*
c
fg
.
RPN
.
NUM_ANCHOR
,
),
dtype
=
'int32'
)
featuremap_labels
[
inside_ind
]
=
anchor_labels
featuremap_labels
[
inside_ind
]
=
anchor_labels
featuremap_labels
=
featuremap_labels
.
reshape
((
anchorH
,
anchorW
,
c
onfig
.
NUM_ANCHOR
))
featuremap_labels
=
featuremap_labels
.
reshape
((
anchorH
,
anchorW
,
c
fg
.
RPN
.
NUM_ANCHOR
))
featuremap_boxes
=
np
.
zeros
((
anchorH
*
anchorW
*
c
onfig
.
NUM_ANCHOR
,
4
),
dtype
=
'float32'
)
featuremap_boxes
=
np
.
zeros
((
anchorH
*
anchorW
*
c
fg
.
RPN
.
NUM_ANCHOR
,
4
),
dtype
=
'float32'
)
featuremap_boxes
[
inside_ind
,
:]
=
anchor_gt_boxes
featuremap_boxes
[
inside_ind
,
:]
=
anchor_gt_boxes
featuremap_boxes
=
featuremap_boxes
.
reshape
((
anchorH
,
anchorW
,
c
onfig
.
NUM_ANCHOR
,
4
))
featuremap_boxes
=
featuremap_boxes
.
reshape
((
anchorH
,
anchorW
,
c
fg
.
RPN
.
NUM_ANCHOR
,
4
))
return
featuremap_labels
,
featuremap_boxes
return
featuremap_labels
,
featuremap_boxes
...
@@ -233,7 +233,7 @@ def get_multilevel_rpn_anchor_input(im, boxes, is_crowd):
...
@@ -233,7 +233,7 @@ def get_multilevel_rpn_anchor_input(im, boxes, is_crowd):
start
=
0
start
=
0
multilevel_inputs
=
[]
multilevel_inputs
=
[]
for
level_anchor
in
anchors_per_level
:
for
level_anchor
in
anchors_per_level
:
assert
level_anchor
.
shape
[
2
]
==
len
(
c
onfig
.
ANCHOR_RATIOS
)
assert
level_anchor
.
shape
[
2
]
==
len
(
c
fg
.
RPN
.
ANCHOR_RATIOS
)
anchor_shape
=
level_anchor
.
shape
[:
3
]
# fHxfWxNUM_ANCHOR_RATIOS
anchor_shape
=
level_anchor
.
shape
[:
3
]
# fHxfWxNUM_ANCHOR_RATIOS
num_anchor_this_level
=
np
.
prod
(
anchor_shape
)
num_anchor_this_level
=
np
.
prod
(
anchor_shape
)
end
=
start
+
num_anchor_this_level
end
=
start
+
num_anchor_this_level
...
@@ -263,7 +263,7 @@ def get_train_dataflow():
...
@@ -263,7 +263,7 @@ def get_train_dataflow():
"""
"""
imgs
=
COCODetection
.
load_many
(
imgs
=
COCODetection
.
load_many
(
c
onfig
.
BASEDIR
,
config
.
TRAIN_DATASET
,
add_gt
=
True
,
add_mask
=
confi
g
.
MODE_MASK
)
c
fg
.
DATA
.
BASEDIR
,
cfg
.
DATA
.
TRAIN
,
add_gt
=
True
,
add_mask
=
cf
g
.
MODE_MASK
)
"""
"""
To train on your own data, change this to your loader.
To train on your own data, change this to your loader.
Produce "imgs" as a list of dict, in the dict the following keys are needed for training:
Produce "imgs" as a list of dict, in the dict the following keys are needed for training:
...
@@ -292,7 +292,7 @@ def get_train_dataflow():
...
@@ -292,7 +292,7 @@ def get_train_dataflow():
ds
=
DataFromList
(
imgs
,
shuffle
=
True
)
ds
=
DataFromList
(
imgs
,
shuffle
=
True
)
aug
=
imgaug
.
AugmentorList
(
aug
=
imgaug
.
AugmentorList
(
[
CustomResize
(
c
onfig
.
SHORT_EDGE_SIZE
,
config
.
MAX_SIZE
),
[
CustomResize
(
c
fg
.
PREPROC
.
SHORT_EDGE_SIZE
,
cfg
.
PREPROC
.
MAX_SIZE
),
imgaug
.
Flip
(
horiz
=
True
)])
imgaug
.
Flip
(
horiz
=
True
)])
def
preprocess
(
img
):
def
preprocess
(
img
):
...
@@ -313,7 +313,7 @@ def get_train_dataflow():
...
@@ -313,7 +313,7 @@ def get_train_dataflow():
# rpn anchor:
# rpn anchor:
try
:
try
:
if
c
onfi
g
.
MODE_FPN
:
if
c
f
g
.
MODE_FPN
:
multilevel_anchor_inputs
=
get_multilevel_rpn_anchor_input
(
im
,
boxes
,
is_crowd
)
multilevel_anchor_inputs
=
get_multilevel_rpn_anchor_input
(
im
,
boxes
,
is_crowd
)
anchor_inputs
=
itertools
.
chain
.
from_iterable
(
multilevel_anchor_inputs
)
anchor_inputs
=
itertools
.
chain
.
from_iterable
(
multilevel_anchor_inputs
)
else
:
else
:
...
@@ -331,7 +331,7 @@ def get_train_dataflow():
...
@@ -331,7 +331,7 @@ def get_train_dataflow():
ret
=
[
im
]
+
list
(
anchor_inputs
)
+
[
boxes
,
klass
]
ret
=
[
im
]
+
list
(
anchor_inputs
)
+
[
boxes
,
klass
]
if
c
onfi
g
.
MODE_MASK
:
if
c
f
g
.
MODE_MASK
:
# augmentation will modify the polys in-place
# augmentation will modify the polys in-place
segmentation
=
copy
.
deepcopy
(
img
[
'segmentation'
])
segmentation
=
copy
.
deepcopy
(
img
[
'segmentation'
])
segmentation
=
[
segmentation
[
k
]
for
k
in
range
(
len
(
segmentation
))
if
not
is_crowd
[
k
]]
segmentation
=
[
segmentation
[
k
]
for
k
in
range
(
len
(
segmentation
))
if
not
is_crowd
[
k
]]
...
@@ -353,7 +353,7 @@ def get_train_dataflow():
...
@@ -353,7 +353,7 @@ def get_train_dataflow():
# tpviz.interactive_imshow(viz)
# tpviz.interactive_imshow(viz)
return
ret
return
ret
if
c
onfi
g
.
TRAINER
==
'horovod'
:
if
c
f
g
.
TRAINER
==
'horovod'
:
ds
=
MultiThreadMapData
(
ds
,
5
,
preprocess
)
ds
=
MultiThreadMapData
(
ds
,
5
,
preprocess
)
# MPI does not like fork()
# MPI does not like fork()
else
:
else
:
...
@@ -362,7 +362,7 @@ def get_train_dataflow():
...
@@ -362,7 +362,7 @@ def get_train_dataflow():
def
get_eval_dataflow
():
def
get_eval_dataflow
():
imgs
=
COCODetection
.
load_many
(
c
onfig
.
BASEDIR
,
config
.
VAL_DATASET
,
add_gt
=
False
)
imgs
=
COCODetection
.
load_many
(
c
fg
.
DATA
.
BASEDIR
,
cfg
.
DATA
.
VAL
,
add_gt
=
False
)
# no filter for training
# no filter for training
ds
=
DataFromListOfDict
(
imgs
,
[
'file_name'
,
'id'
])
ds
=
DataFromListOfDict
(
imgs
,
[
'file_name'
,
'id'
])
...
@@ -371,7 +371,7 @@ def get_eval_dataflow():
...
@@ -371,7 +371,7 @@ def get_eval_dataflow():
assert
im
is
not
None
,
fname
assert
im
is
not
None
,
fname
return
im
return
im
ds
=
MapDataComponent
(
ds
,
f
,
0
)
ds
=
MapDataComponent
(
ds
,
f
,
0
)
if
c
onfi
g
.
TRAINER
!=
'horovod'
:
if
c
f
g
.
TRAINER
!=
'horovod'
:
ds
=
PrefetchDataZMQ
(
ds
,
1
)
ds
=
PrefetchDataZMQ
(
ds
,
1
)
return
ds
return
ds
...
@@ -379,7 +379,7 @@ def get_eval_dataflow():
...
@@ -379,7 +379,7 @@ def get_eval_dataflow():
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
import
os
import
os
from
tensorpack.dataflow
import
PrintData
from
tensorpack.dataflow
import
PrintData
c
onfig
.
BASEDIR
=
os
.
path
.
expanduser
(
'~/data/coco'
)
c
fg
.
DATA
.
BASEDIR
=
os
.
path
.
expanduser
(
'~/data/coco'
)
ds
=
get_train_dataflow
()
ds
=
get_train_dataflow
()
ds
=
PrintData
(
ds
,
100
)
ds
=
PrintData
(
ds
,
100
)
TestDataSpeed
(
ds
,
50000
)
.
start
()
TestDataSpeed
(
ds
,
50000
)
.
start
()
...
...
examples/FasterRCNN/eval.py
View file @
9b1d1095
...
@@ -15,7 +15,7 @@ import pycocotools.mask as cocomask
...
@@ -15,7 +15,7 @@ import pycocotools.mask as cocomask
from
coco
import
COCOMeta
from
coco
import
COCOMeta
from
common
import
CustomResize
,
clip_boxes
from
common
import
CustomResize
,
clip_boxes
import
confi
g
from
config
import
config
as
cf
g
DetectionResult
=
namedtuple
(
DetectionResult
=
namedtuple
(
'DetectionResult'
,
'DetectionResult'
,
...
@@ -69,7 +69,7 @@ def detect_one_image(img, model_func):
...
@@ -69,7 +69,7 @@ def detect_one_image(img, model_func):
"""
"""
orig_shape
=
img
.
shape
[:
2
]
orig_shape
=
img
.
shape
[:
2
]
resizer
=
CustomResize
(
c
onfig
.
SHORT_EDGE_SIZE
,
config
.
MAX_SIZE
)
resizer
=
CustomResize
(
c
fg
.
PREPROC
.
SHORT_EDGE_SIZE
,
cfg
.
PREPROC
.
MAX_SIZE
)
resized_img
=
resizer
.
augment
(
img
)
resized_img
=
resizer
.
augment
(
img
)
scale
=
(
resized_img
.
shape
[
0
]
*
1.0
/
img
.
shape
[
0
]
+
resized_img
.
shape
[
1
]
*
1.0
/
img
.
shape
[
1
])
/
2
scale
=
(
resized_img
.
shape
[
0
]
*
1.0
/
img
.
shape
[
0
]
+
resized_img
.
shape
[
1
]
*
1.0
/
img
.
shape
[
1
])
/
2
boxes
,
probs
,
labels
,
*
masks
=
model_func
(
resized_img
)
boxes
,
probs
,
labels
,
*
masks
=
model_func
(
resized_img
)
...
@@ -131,10 +131,10 @@ def eval_coco(df, detect_func):
...
@@ -131,10 +131,10 @@ def eval_coco(df, detect_func):
# https://github.com/pdollar/coco/blob/master/PythonAPI/pycocoEvalDemo.ipynb
# https://github.com/pdollar/coco/blob/master/PythonAPI/pycocoEvalDemo.ipynb
def
print_evaluation_scores
(
json_file
):
def
print_evaluation_scores
(
json_file
):
ret
=
{}
ret
=
{}
assert
c
onfig
.
BASEDIR
and
os
.
path
.
isdir
(
config
.
BASEDIR
)
assert
c
fg
.
DATA
.
BASEDIR
and
os
.
path
.
isdir
(
cfg
.
DATA
.
BASEDIR
)
annofile
=
os
.
path
.
join
(
annofile
=
os
.
path
.
join
(
c
onfig
.
BASEDIR
,
'annotations'
,
c
fg
.
DATA
.
BASEDIR
,
'annotations'
,
'instances_{}.json'
.
format
(
c
onfig
.
VAL_DATASET
))
'instances_{}.json'
.
format
(
c
fg
.
DATA
.
VAL
))
coco
=
COCO
(
annofile
)
coco
=
COCO
(
annofile
)
cocoDt
=
coco
.
loadRes
(
json_file
)
cocoDt
=
coco
.
loadRes
(
json_file
)
cocoEval
=
COCOeval
(
coco
,
cocoDt
,
'bbox'
)
cocoEval
=
COCOeval
(
coco
,
cocoDt
,
'bbox'
)
...
@@ -145,7 +145,7 @@ def print_evaluation_scores(json_file):
...
@@ -145,7 +145,7 @@ def print_evaluation_scores(json_file):
for
k
in
range
(
6
):
for
k
in
range
(
6
):
ret
[
'mAP(bbox)/'
+
fields
[
k
]]
=
cocoEval
.
stats
[
k
]
ret
[
'mAP(bbox)/'
+
fields
[
k
]]
=
cocoEval
.
stats
[
k
]
if
c
onfi
g
.
MODE_MASK
:
if
c
f
g
.
MODE_MASK
:
cocoEval
=
COCOeval
(
coco
,
cocoDt
,
'segm'
)
cocoEval
=
COCOeval
(
coco
,
cocoDt
,
'segm'
)
cocoEval
.
evaluate
()
cocoEval
.
evaluate
()
cocoEval
.
accumulate
()
cocoEval
.
accumulate
()
...
...
examples/FasterRCNN/model.py
View file @
9b1d1095
...
@@ -15,7 +15,7 @@ from tensorpack.models import (
...
@@ -15,7 +15,7 @@ from tensorpack.models import (
from
utils.box_ops
import
pairwise_iou
from
utils.box_ops
import
pairwise_iou
from
utils.box_ops
import
area
as
tf_area
from
utils.box_ops
import
area
as
tf_area
from
model_box
import
roi_align
,
clip_boxes
from
model_box
import
roi_align
,
clip_boxes
import
confi
g
from
config
import
config
as
cf
g
@
layer_register
(
log_shape
=
True
)
@
layer_register
(
log_shape
=
True
)
...
@@ -91,7 +91,7 @@ def rpn_losses(anchor_labels, anchor_boxes, label_logits, box_logits):
...
@@ -91,7 +91,7 @@ def rpn_losses(anchor_labels, anchor_boxes, label_logits, box_logits):
placeholder
=
0.
placeholder
=
0.
label_loss
=
tf
.
nn
.
sigmoid_cross_entropy_with_logits
(
label_loss
=
tf
.
nn
.
sigmoid_cross_entropy_with_logits
(
labels
=
tf
.
to_float
(
valid_anchor_labels
),
logits
=
valid_label_logits
)
labels
=
tf
.
to_float
(
valid_anchor_labels
),
logits
=
valid_label_logits
)
label_loss
=
tf
.
reduce_sum
(
label_loss
)
*
(
1.
/
c
onfig
.
RPN_
BATCH_PER_IM
)
label_loss
=
tf
.
reduce_sum
(
label_loss
)
*
(
1.
/
c
fg
.
RPN
.
BATCH_PER_IM
)
label_loss
=
tf
.
where
(
tf
.
equal
(
nr_valid
,
0
),
placeholder
,
label_loss
,
name
=
'label_loss'
)
label_loss
=
tf
.
where
(
tf
.
equal
(
nr_valid
,
0
),
placeholder
,
label_loss
,
name
=
'label_loss'
)
pos_anchor_boxes
=
tf
.
boolean_mask
(
anchor_boxes
,
pos_mask
)
pos_anchor_boxes
=
tf
.
boolean_mask
(
anchor_boxes
,
pos_mask
)
...
@@ -100,7 +100,7 @@ def rpn_losses(anchor_labels, anchor_boxes, label_logits, box_logits):
...
@@ -100,7 +100,7 @@ def rpn_losses(anchor_labels, anchor_boxes, label_logits, box_logits):
box_loss
=
tf
.
losses
.
huber_loss
(
box_loss
=
tf
.
losses
.
huber_loss
(
pos_anchor_boxes
,
pos_box_logits
,
delta
=
delta
,
pos_anchor_boxes
,
pos_box_logits
,
delta
=
delta
,
reduction
=
tf
.
losses
.
Reduction
.
SUM
)
/
delta
reduction
=
tf
.
losses
.
Reduction
.
SUM
)
/
delta
box_loss
=
box_loss
*
(
1.
/
c
onfig
.
RPN_
BATCH_PER_IM
)
box_loss
=
box_loss
*
(
1.
/
c
fg
.
RPN
.
BATCH_PER_IM
)
box_loss
=
tf
.
where
(
tf
.
equal
(
nr_pos
,
0
),
placeholder
,
box_loss
,
name
=
'box_loss'
)
box_loss
=
tf
.
where
(
tf
.
equal
(
nr_pos
,
0
),
placeholder
,
box_loss
,
name
=
'box_loss'
)
add_moving_summary
(
label_loss
,
box_loss
,
nr_valid
,
nr_pos
)
add_moving_summary
(
label_loss
,
box_loss
,
nr_valid
,
nr_pos
)
...
@@ -139,7 +139,7 @@ def generate_rpn_proposals(boxes, scores, img_shape,
...
@@ -139,7 +139,7 @@ def generate_rpn_proposals(boxes, scores, img_shape,
topk_boxes_x1y1
,
topk_boxes_x2y2
=
tf
.
split
(
topk_boxes_x1y1x2y2
,
2
,
axis
=
1
)
topk_boxes_x1y1
,
topk_boxes_x2y2
=
tf
.
split
(
topk_boxes_x1y1x2y2
,
2
,
axis
=
1
)
# nx1x2 each
# nx1x2 each
wbhb
=
tf
.
squeeze
(
topk_boxes_x2y2
-
topk_boxes_x1y1
,
axis
=
1
)
wbhb
=
tf
.
squeeze
(
topk_boxes_x2y2
-
topk_boxes_x1y1
,
axis
=
1
)
valid
=
tf
.
reduce_all
(
wbhb
>
c
onfig
.
RPN_
MIN_SIZE
,
axis
=
1
)
# n,
valid
=
tf
.
reduce_all
(
wbhb
>
c
fg
.
RPN
.
MIN_SIZE
,
axis
=
1
)
# n,
topk_valid_boxes_x1y1x2y2
=
tf
.
boolean_mask
(
topk_boxes_x1y1x2y2
,
valid
)
topk_valid_boxes_x1y1x2y2
=
tf
.
boolean_mask
(
topk_boxes_x1y1x2y2
,
valid
)
topk_valid_scores
=
tf
.
boolean_mask
(
topk_scores
,
valid
)
topk_valid_scores
=
tf
.
boolean_mask
(
topk_scores
,
valid
)
...
@@ -152,7 +152,7 @@ def generate_rpn_proposals(boxes, scores, img_shape,
...
@@ -152,7 +152,7 @@ def generate_rpn_proposals(boxes, scores, img_shape,
# TODO use exp to work around a bug in TF1.9: https://github.com/tensorflow/tensorflow/issues/19578
# TODO use exp to work around a bug in TF1.9: https://github.com/tensorflow/tensorflow/issues/19578
tf
.
exp
(
topk_valid_scores
),
tf
.
exp
(
topk_valid_scores
),
max_output_size
=
post_nms_topk
,
max_output_size
=
post_nms_topk
,
iou_threshold
=
c
onfig
.
RPN_
PROPOSAL_NMS_THRESH
)
iou_threshold
=
c
fg
.
RPN
.
PROPOSAL_NMS_THRESH
)
topk_valid_boxes
=
tf
.
reshape
(
topk_valid_boxes_x1y1x2y2
,
(
-
1
,
4
))
topk_valid_boxes
=
tf
.
reshape
(
topk_valid_boxes_x1y1x2y2
,
(
-
1
,
4
))
final_boxes
=
tf
.
gather
(
topk_valid_boxes
,
nms_indices
)
final_boxes
=
tf
.
gather
(
topk_valid_boxes
,
nms_indices
)
...
@@ -209,17 +209,17 @@ def sample_fast_rcnn_targets(boxes, gt_boxes, gt_labels):
...
@@ -209,17 +209,17 @@ def sample_fast_rcnn_targets(boxes, gt_boxes, gt_labels):
# #proposal=n+m from now on
# #proposal=n+m from now on
def
sample_fg_bg
(
iou
):
def
sample_fg_bg
(
iou
):
fg_mask
=
tf
.
reduce_max
(
iou
,
axis
=
1
)
>=
c
onfig
.
FASTRCNN_
FG_THRESH
fg_mask
=
tf
.
reduce_max
(
iou
,
axis
=
1
)
>=
c
fg
.
FRCNN
.
FG_THRESH
fg_inds
=
tf
.
reshape
(
tf
.
where
(
fg_mask
),
[
-
1
])
fg_inds
=
tf
.
reshape
(
tf
.
where
(
fg_mask
),
[
-
1
])
num_fg
=
tf
.
minimum
(
int
(
num_fg
=
tf
.
minimum
(
int
(
c
onfig
.
FASTRCNN_BATCH_PER_IM
*
config
.
FASTRCNN_
FG_RATIO
),
c
fg
.
FRCNN
.
BATCH_PER_IM
*
cfg
.
FRCNN
.
FG_RATIO
),
tf
.
size
(
fg_inds
),
name
=
'num_fg'
)
tf
.
size
(
fg_inds
),
name
=
'num_fg'
)
fg_inds
=
tf
.
random_shuffle
(
fg_inds
)[:
num_fg
]
fg_inds
=
tf
.
random_shuffle
(
fg_inds
)[:
num_fg
]
bg_inds
=
tf
.
reshape
(
tf
.
where
(
tf
.
logical_not
(
fg_mask
)),
[
-
1
])
bg_inds
=
tf
.
reshape
(
tf
.
where
(
tf
.
logical_not
(
fg_mask
)),
[
-
1
])
num_bg
=
tf
.
minimum
(
num_bg
=
tf
.
minimum
(
c
onfig
.
FASTRCNN_
BATCH_PER_IM
-
num_fg
,
c
fg
.
FRCNN
.
BATCH_PER_IM
-
num_fg
,
tf
.
size
(
bg_inds
),
name
=
'num_bg'
)
tf
.
size
(
bg_inds
),
name
=
'num_bg'
)
bg_inds
=
tf
.
random_shuffle
(
bg_inds
)[:
num_bg
]
bg_inds
=
tf
.
random_shuffle
(
bg_inds
)[:
num_bg
]
...
@@ -274,7 +274,7 @@ def fastrcnn_2fc_head(feature, num_classes):
...
@@ -274,7 +274,7 @@ def fastrcnn_2fc_head(feature, num_classes):
Returns:
Returns:
cls_logits (Nxnum_class), reg_logits (Nx num_class-1 x 4)
cls_logits (Nxnum_class), reg_logits (Nx num_class-1 x 4)
"""
"""
dim
=
c
onfig
.
FAST
RCNN_FC_HEAD_DIM
dim
=
c
fg
.
FPN
.
F
RCNN_FC_HEAD_DIM
init
=
tf
.
variance_scaling_initializer
()
init
=
tf
.
variance_scaling_initializer
()
hidden
=
FullyConnected
(
'fc6'
,
feature
,
dim
,
kernel_initializer
=
init
,
activation
=
tf
.
nn
.
relu
)
hidden
=
FullyConnected
(
'fc6'
,
feature
,
dim
,
kernel_initializer
=
init
,
activation
=
tf
.
nn
.
relu
)
hidden
=
FullyConnected
(
'fc7'
,
hidden
,
dim
,
kernel_initializer
=
init
,
activation
=
tf
.
nn
.
relu
)
hidden
=
FullyConnected
(
'fc7'
,
hidden
,
dim
,
kernel_initializer
=
init
,
activation
=
tf
.
nn
.
relu
)
...
@@ -297,8 +297,8 @@ def fastrcnn_Xconv1fc_head(feature, num_classes, num_convs):
...
@@ -297,8 +297,8 @@ def fastrcnn_Xconv1fc_head(feature, num_classes, num_convs):
kernel_initializer
=
tf
.
variance_scaling_initializer
(
kernel_initializer
=
tf
.
variance_scaling_initializer
(
scale
=
2.0
,
mode
=
'fan_out'
,
distribution
=
'normal'
)):
scale
=
2.0
,
mode
=
'fan_out'
,
distribution
=
'normal'
)):
for
k
in
range
(
num_convs
):
for
k
in
range
(
num_convs
):
l
=
Conv2D
(
'conv{}'
.
format
(
k
),
l
,
c
onfig
.
FAST
RCNN_CONV_HEAD_DIM
,
3
,
activation
=
tf
.
nn
.
relu
)
l
=
Conv2D
(
'conv{}'
.
format
(
k
),
l
,
c
fg
.
FPN
.
F
RCNN_CONV_HEAD_DIM
,
3
,
activation
=
tf
.
nn
.
relu
)
l
=
FullyConnected
(
'fc'
,
l
,
c
onfig
.
FAST
RCNN_FC_HEAD_DIM
,
l
=
FullyConnected
(
'fc'
,
l
,
c
fg
.
FPN
.
F
RCNN_FC_HEAD_DIM
,
kernel_initializer
=
tf
.
variance_scaling_initializer
(),
activation
=
tf
.
nn
.
relu
)
kernel_initializer
=
tf
.
variance_scaling_initializer
(),
activation
=
tf
.
nn
.
relu
)
return
fastrcnn_outputs
(
'outputs'
,
l
,
num_classes
)
return
fastrcnn_outputs
(
'outputs'
,
l
,
num_classes
)
...
@@ -356,8 +356,8 @@ def fastrcnn_predictions(boxes, probs):
...
@@ -356,8 +356,8 @@ def fastrcnn_predictions(boxes, probs):
boxes: n#catx4 floatbox in float32
boxes: n#catx4 floatbox in float32
probs: nx#class
probs: nx#class
"""
"""
assert
boxes
.
shape
[
1
]
==
c
onfig
.
NUM_CLASS
-
1
assert
boxes
.
shape
[
1
]
==
c
fg
.
DATA
.
NUM_CLASS
-
1
assert
probs
.
shape
[
1
]
==
c
onfig
.
NUM_CLASS
assert
probs
.
shape
[
1
]
==
c
fg
.
DATA
.
NUM_CLASS
boxes
=
tf
.
transpose
(
boxes
,
[
1
,
0
,
2
])
# #catxnx4
boxes
=
tf
.
transpose
(
boxes
,
[
1
,
0
,
2
])
# #catxnx4
probs
=
tf
.
transpose
(
probs
[:,
1
:],
[
1
,
0
])
# #catxn
probs
=
tf
.
transpose
(
probs
[:,
1
:],
[
1
,
0
])
# #catxn
...
@@ -371,12 +371,12 @@ def fastrcnn_predictions(boxes, probs):
...
@@ -371,12 +371,12 @@ def fastrcnn_predictions(boxes, probs):
prob
,
box
=
X
prob
,
box
=
X
output_shape
=
tf
.
shape
(
prob
)
output_shape
=
tf
.
shape
(
prob
)
# filter by score threshold
# filter by score threshold
ids
=
tf
.
reshape
(
tf
.
where
(
prob
>
c
onfig
.
RESULT_SCORE_THRESH
),
[
-
1
])
ids
=
tf
.
reshape
(
tf
.
where
(
prob
>
c
fg
.
TEST
.
RESULT_SCORE_THRESH
),
[
-
1
])
prob
=
tf
.
gather
(
prob
,
ids
)
prob
=
tf
.
gather
(
prob
,
ids
)
box
=
tf
.
gather
(
box
,
ids
)
box
=
tf
.
gather
(
box
,
ids
)
# NMS within each class
# NMS within each class
selection
=
tf
.
image
.
non_max_suppression
(
selection
=
tf
.
image
.
non_max_suppression
(
box
,
prob
,
c
onfig
.
RESULTS_PER_IM
,
config
.
FAST
RCNN_NMS_THRESH
)
box
,
prob
,
c
fg
.
TEST
.
RESULTS_PER_IM
,
cfg
.
TEST
.
F
RCNN_NMS_THRESH
)
selection
=
tf
.
to_int32
(
tf
.
gather
(
ids
,
selection
))
selection
=
tf
.
to_int32
(
tf
.
gather
(
ids
,
selection
))
# sort available in TF>1.4.0
# sort available in TF>1.4.0
# sorted_selection = tf.contrib.framework.sort(selection, direction='ASCENDING')
# sorted_selection = tf.contrib.framework.sort(selection, direction='ASCENDING')
...
@@ -396,7 +396,7 @@ def fastrcnn_predictions(boxes, probs):
...
@@ -396,7 +396,7 @@ def fastrcnn_predictions(boxes, probs):
# filter again by sorting scores
# filter again by sorting scores
topk_probs
,
topk_indices
=
tf
.
nn
.
top_k
(
topk_probs
,
topk_indices
=
tf
.
nn
.
top_k
(
probs
,
probs
,
tf
.
minimum
(
c
onfig
.
RESULTS_PER_IM
,
tf
.
size
(
probs
)),
tf
.
minimum
(
c
fg
.
TEST
.
RESULTS_PER_IM
,
tf
.
size
(
probs
)),
sorted
=
False
)
sorted
=
False
)
filtered_selection
=
tf
.
gather
(
selected_indices
,
topk_indices
)
filtered_selection
=
tf
.
gather
(
selected_indices
,
topk_indices
)
filtered_selection
=
tf
.
reverse
(
filtered_selection
,
axis
=
[
1
],
name
=
'filtered_indices'
)
filtered_selection
=
tf
.
reverse
(
filtered_selection
,
axis
=
[
1
],
name
=
'filtered_indices'
)
...
@@ -420,8 +420,8 @@ def maskrcnn_upXconv_head(feature, num_class, num_convs):
...
@@ -420,8 +420,8 @@ def maskrcnn_upXconv_head(feature, num_class, num_convs):
scale
=
2.0
,
mode
=
'fan_out'
,
distribution
=
'normal'
)):
scale
=
2.0
,
mode
=
'fan_out'
,
distribution
=
'normal'
)):
# c2's MSRAFill is fan_out
# c2's MSRAFill is fan_out
for
k
in
range
(
num_convs
):
for
k
in
range
(
num_convs
):
l
=
Conv2D
(
'fcn{}'
.
format
(
k
),
l
,
c
onfig
.
MASKRCNN_
HEAD_DIM
,
3
,
activation
=
tf
.
nn
.
relu
)
l
=
Conv2D
(
'fcn{}'
.
format
(
k
),
l
,
c
fg
.
MRCNN
.
HEAD_DIM
,
3
,
activation
=
tf
.
nn
.
relu
)
l
=
Conv2DTranspose
(
'deconv'
,
l
,
c
onfig
.
MASKRCNN_
HEAD_DIM
,
2
,
strides
=
2
,
activation
=
tf
.
nn
.
relu
)
l
=
Conv2DTranspose
(
'deconv'
,
l
,
c
fg
.
MRCNN
.
HEAD_DIM
,
2
,
strides
=
2
,
activation
=
tf
.
nn
.
relu
)
l
=
Conv2D
(
'conv'
,
l
,
num_class
-
1
,
1
)
l
=
Conv2D
(
'conv'
,
l
,
num_class
-
1
,
1
)
return
l
return
l
...
@@ -475,7 +475,7 @@ def fpn_model(features):
...
@@ -475,7 +475,7 @@ def fpn_model(features):
[tf.Tensor]: FPN features p2-p6
[tf.Tensor]: FPN features p2-p6
"""
"""
assert
len
(
features
)
==
4
,
features
assert
len
(
features
)
==
4
,
features
num_channel
=
c
onfig
.
FPN_
NUM_CHANNEL
num_channel
=
c
fg
.
FPN
.
NUM_CHANNEL
def
upsample2x
(
name
,
x
):
def
upsample2x
(
name
,
x
):
return
FixedUnPooling
(
return
FixedUnPooling
(
...
@@ -560,7 +560,7 @@ def multilevel_roi_align(features, rcnn_boxes, resolution):
...
@@ -560,7 +560,7 @@ def multilevel_roi_align(features, rcnn_boxes, resolution):
# Crop patches from corresponding levels
# Crop patches from corresponding levels
for
i
,
boxes
,
featuremap
in
zip
(
itertools
.
count
(),
level_boxes
,
features
):
for
i
,
boxes
,
featuremap
in
zip
(
itertools
.
count
(),
level_boxes
,
features
):
with
tf
.
name_scope
(
'roi_level{}'
.
format
(
i
+
2
)):
with
tf
.
name_scope
(
'roi_level{}'
.
format
(
i
+
2
)):
boxes_on_featuremap
=
boxes
*
(
1.0
/
c
onfig
.
ANCHOR_STRIDES_FPN
[
i
])
boxes_on_featuremap
=
boxes
*
(
1.0
/
c
fg
.
FPN
.
ANCHOR_STRIDES
[
i
])
all_rois
.
append
(
roi_align
(
featuremap
,
boxes_on_featuremap
,
resolution
))
all_rois
.
append
(
roi_align
(
featuremap
,
boxes_on_featuremap
,
resolution
))
all_rois
=
tf
.
concat
(
all_rois
,
axis
=
0
)
# NCHW
all_rois
=
tf
.
concat
(
all_rois
,
axis
=
0
)
# NCHW
...
...
examples/FasterRCNN/model_box.py
View file @
9b1d1095
# -*- coding: utf-8 -*-
# -*- coding: utf-8 -*-
# File: model_box.py
# File: model_box.py
import
numpy
as
np
import
tensorflow
as
tf
import
tensorflow
as
tf
from
tensorpack.tfutils.scope_utils
import
under_name_scope
from
tensorpack.tfutils.scope_utils
import
under_name_scope
import
config
from
config
import
config
@
under_name_scope
()
@
under_name_scope
()
...
@@ -41,8 +42,8 @@ def decode_bbox_target(box_predictions, anchors):
...
@@ -41,8 +42,8 @@ def decode_bbox_target(box_predictions, anchors):
waha
=
anchors_x2y2
-
anchors_x1y1
waha
=
anchors_x2y2
-
anchors_x1y1
xaya
=
(
anchors_x2y2
+
anchors_x1y1
)
*
0.5
xaya
=
(
anchors_x2y2
+
anchors_x1y1
)
*
0.5
wbhb
=
tf
.
exp
(
tf
.
minimum
(
clip
=
np
.
log
(
config
.
PREPROC
.
MAX_SIZE
/
16.
)
box_pred_twth
,
config
.
BBOX_DECODE_CLIP
))
*
waha
wbhb
=
tf
.
exp
(
tf
.
minimum
(
box_pred_twth
,
clip
))
*
waha
xbyb
=
box_pred_txty
*
waha
+
xaya
xbyb
=
box_pred_txty
*
waha
+
xaya
x1y1
=
xbyb
-
wbhb
*
0.5
x1y1
=
xbyb
-
wbhb
*
0.5
x2y2
=
xbyb
+
wbhb
*
0.5
# (...)x1x2
x2y2
=
xbyb
+
wbhb
*
0.5
# (...)x1x2
...
@@ -174,7 +175,6 @@ if __name__ == '__main__':
...
@@ -174,7 +175,6 @@ if __name__ == '__main__':
Demonstrate what's wrong with tf.image.crop_and_resize:
Demonstrate what's wrong with tf.image.crop_and_resize:
"""
"""
import
tensorflow.contrib.eager
as
tfe
import
tensorflow.contrib.eager
as
tfe
import
numpy
as
np
tfe
.
enable_eager_execution
()
tfe
.
enable_eager_execution
()
# want to crop 2x2 out of a 5x5 image, and resize to 4x4
# want to crop 2x2 out of a 5x5 image, and resize to 4x4
...
...
examples/FasterRCNN/train.py
View file @
9b1d1095
This diff is collapsed.
Click to expand it.
examples/FasterRCNN/viz.py
View file @
9b1d1095
...
@@ -8,7 +8,7 @@ from tensorpack.utils import viz
...
@@ -8,7 +8,7 @@ from tensorpack.utils import viz
from
tensorpack.utils.palette
import
PALETTE_RGB
from
tensorpack.utils.palette
import
PALETTE_RGB
from
utils.np_box_ops
import
iou
as
np_iou
from
utils.np_box_ops
import
iou
as
np_iou
import
confi
g
from
config
import
config
as
cf
g
def
draw_annotation
(
img
,
boxes
,
klass
,
is_crowd
=
None
):
def
draw_annotation
(
img
,
boxes
,
klass
,
is_crowd
=
None
):
...
@@ -17,13 +17,13 @@ def draw_annotation(img, boxes, klass, is_crowd=None):
...
@@ -17,13 +17,13 @@ def draw_annotation(img, boxes, klass, is_crowd=None):
if
is_crowd
is
not
None
:
if
is_crowd
is
not
None
:
assert
len
(
boxes
)
==
len
(
is_crowd
)
assert
len
(
boxes
)
==
len
(
is_crowd
)
for
cls
,
crd
in
zip
(
klass
,
is_crowd
):
for
cls
,
crd
in
zip
(
klass
,
is_crowd
):
clsname
=
c
onfig
.
CLASS_NAMES
[
cls
]
clsname
=
c
fg
.
DATA
.
CLASS_NAMES
[
cls
]
if
crd
==
1
:
if
crd
==
1
:
clsname
+=
';Crowd'
clsname
+=
';Crowd'
labels
.
append
(
clsname
)
labels
.
append
(
clsname
)
else
:
else
:
for
cls
in
klass
:
for
cls
in
klass
:
labels
.
append
(
c
onfig
.
CLASS_NAMES
[
cls
])
labels
.
append
(
c
fg
.
DATA
.
CLASS_NAMES
[
cls
])
img
=
viz
.
draw_boxes
(
img
,
boxes
,
labels
)
img
=
viz
.
draw_boxes
(
img
,
boxes
,
labels
)
return
img
return
img
...
@@ -57,7 +57,7 @@ def draw_predictions(img, boxes, scores):
...
@@ -57,7 +57,7 @@ def draw_predictions(img, boxes, scores):
return
img
return
img
labels
=
scores
.
argmax
(
axis
=
1
)
labels
=
scores
.
argmax
(
axis
=
1
)
scores
=
scores
.
max
(
axis
=
1
)
scores
=
scores
.
max
(
axis
=
1
)
tags
=
[
"{},{:.2f}"
.
format
(
c
onfig
.
CLASS_NAMES
[
lb
],
score
)
for
lb
,
score
in
zip
(
labels
,
scores
)]
tags
=
[
"{},{:.2f}"
.
format
(
c
fg
.
DATA
.
CLASS_NAMES
[
lb
],
score
)
for
lb
,
score
in
zip
(
labels
,
scores
)]
return
viz
.
draw_boxes
(
img
,
boxes
,
tags
)
return
viz
.
draw_boxes
(
img
,
boxes
,
tags
)
...
@@ -72,7 +72,7 @@ def draw_final_outputs(img, results):
...
@@ -72,7 +72,7 @@ def draw_final_outputs(img, results):
tags
=
[]
tags
=
[]
for
r
in
results
:
for
r
in
results
:
tags
.
append
(
tags
.
append
(
"{},{:.2f}"
.
format
(
c
onfig
.
CLASS_NAMES
[
r
.
class_id
],
r
.
score
))
"{},{:.2f}"
.
format
(
c
fg
.
DATA
.
CLASS_NAMES
[
r
.
class_id
],
r
.
score
))
boxes
=
np
.
asarray
([
r
.
box
for
r
in
results
])
boxes
=
np
.
asarray
([
r
.
box
for
r
in
results
])
ret
=
viz
.
draw_boxes
(
img
,
boxes
,
tags
)
ret
=
viz
.
draw_boxes
(
img
,
boxes
,
tags
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment