Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
S
seminar-breakout
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Shashank Suhas
seminar-breakout
Commits
791c7b45
Commit
791c7b45
authored
May 01, 2019
by
Yuxin Wu
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[MaskRCNN] split train.py & predict.py (#1163)
parent
9b1b5f29
Changes
10
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
112 additions
and
224 deletions
+112
-224
examples/FasterRCNN/README.md
examples/FasterRCNN/README.md
+3
-3
examples/FasterRCNN/backbone.py
examples/FasterRCNN/backbone.py
+1
-1
examples/FasterRCNN/coco.py
examples/FasterRCNN/coco.py
+3
-3
examples/FasterRCNN/config.py
examples/FasterRCNN/config.py
+1
-1
examples/FasterRCNN/data.py
examples/FasterRCNN/data.py
+7
-5
examples/FasterRCNN/eval.py
examples/FasterRCNN/eval.py
+4
-4
examples/FasterRCNN/generalized_rcnn.py
examples/FasterRCNN/generalized_rcnn.py
+7
-6
examples/FasterRCNN/model_box.py
examples/FasterRCNN/model_box.py
+1
-1
examples/FasterRCNN/train.py
examples/FasterRCNN/train.py
+84
-199
examples/FasterRCNN/viz.py
examples/FasterRCNN/viz.py
+1
-1
No files found.
examples/FasterRCNN/README.md
View file @
791c7b45
...
...
@@ -63,12 +63,12 @@ Some reasonable configurations are listed in the table below.
To predict on an image (needs DISPLAY to show the outputs):
```
./
train
.py --predict input1.jpg input2.jpg --load /path/to/Trained-Model-Checkpoint --config SAME-AS-TRAINING
./
predict
.py --predict input1.jpg input2.jpg --load /path/to/Trained-Model-Checkpoint --config SAME-AS-TRAINING
```
To evaluate the performance of a model on COCO:
```
./
train
.py --evaluate output.json --load /path/to/Trained-Model-Checkpoint \
./
predict
.py --evaluate output.json --load /path/to/Trained-Model-Checkpoint \
--config SAME-AS-TRAINING
```
...
...
@@ -99,7 +99,7 @@ be approximately reproduced.
| R101-FPN | 40.4;36.6
[
:arrow_down:
][
R101FPN2x
]
| 40.9;36.4 | 37h |
<details><summary>
standard
</summary>
`MODE_FPN=True`
<br/>
`BACKBONE.RESNET_NUM_BLOCKS=[3,4,23,3]`
</details>
|
| R101-FPN | 46.5;40.1
[
:arrow_down:
][
R101FPN3xCasAug
]
<sup>
[
3
](
#ft3
)
</sup>
| | 73h |
<details><summary>
3x+Cascade+TrainAug
</summary>
`MODE_FPN=True FPN.CASCADE=True`
<br/>
`BACKBONE.RESNET_NUM_BLOCKS=[3,4,23,3]`
<br/>
`TEST.RESULT_SCORE_THRESH=1e-4`
<br/>
`PREPROC.TRAIN_SHORT_EDGE_SIZE=[640,800]`
<br/>
`TRAIN.LR_SCHEDULE=[420000,500000,540000]`
</details>
|
| R101-FPN
<br/>
(From Scratch) | 47.5;41.2
[
:arrow_down:
][
R101FPN9xGNCasAugScratch
]
| 47.4;40.5
<sup>
[
4
](
#ft4
)
</sup>
| 45h (on 48 V100s) |
<details><summary>
9x+GN+Cascade+TrainAug
</summary>
`MODE_FPN=True FPN.CASCADE=True`
<br/>
`BACKBONE.RESNET_NUM_BLOCKS=[3,4,23,3]`
<br/>
`FPN.NORM=GN BACKBONE.NORM=GN`
<br/>
`FPN.FRCNN_HEAD_FUNC=fastrcnn_4conv1fc_gn_head`
<br/>
`FPN.MRCNN_HEAD_FUNC=maskrcnn_up4conv_gn_head`
<br/>
`PREPROC.TRAIN_SHORT_EDGE_SIZE=[640,800]`
<br/>
`TRAIN.LR_SCHEDULE=[1500000,1580000,1620000]`
<br/>
`BACKBONE.FREEZE_AT=0`
</details>
|
[
R50C42x
]:
http://models.tensorpack.com/FasterRCNN/COCO-R50C4-MaskRCNN-Standard.npz
[
R50FPN2x
]:
http://models.tensorpack.com/FasterRCNN/COCO-R50FPN-MaskRCNN-Standard.npz
[
R50FPN2xGN
]:
http://models.tensorpack.com/FasterRCNN/COCO-R50FPN-MaskRCNN-StandardGN.npz
...
...
examples/FasterRCNN/backbone.py
View file @
791c7b45
...
...
@@ -2,8 +2,8 @@
# File: backbone.py
import
numpy
as
np
from
contextlib
import
ExitStack
,
contextmanager
import
tensorflow
as
tf
from
contextlib
import
ExitStack
,
contextmanager
from
tensorpack.models
import
BatchNorm
,
Conv2D
,
MaxPooling
,
layer_register
from
tensorpack.tfutils
import
argscope
...
...
examples/FasterRCNN/coco.py
View file @
791c7b45
# -*- coding: utf-8 -*-
import
json
import
numpy
as
np
import
os
import
tqdm
import
json
from
tensorpack.utils
import
logger
from
tensorpack.utils.timer
import
timed_operation
from
config
import
config
as
cfg
from
dataset
import
Dataset
Split
,
DatasetRegistry
from
dataset
import
Dataset
Registry
,
DatasetSplit
__all__
=
[
'register_coco'
]
...
...
@@ -42,7 +42,7 @@ class COCODetection(DatasetSplit):
self
.
name
=
name
self
.
_imgdir
=
os
.
path
.
realpath
(
os
.
path
.
join
(
basedir
,
self
.
_INSTANCE_TO_BASEDIR
.
get
(
name
,
name
)))
assert
os
.
path
.
isdir
(
self
.
_imgdir
),
self
.
_imgdir
assert
os
.
path
.
isdir
(
self
.
_imgdir
),
"{} is not a directory!"
.
format
(
self
.
_imgdir
)
annotation_file
=
os
.
path
.
join
(
basedir
,
'annotations/instances_{}.json'
.
format
(
name
))
assert
os
.
path
.
isfile
(
annotation_file
),
annotation_file
...
...
examples/FasterRCNN/config.py
View file @
791c7b45
...
...
@@ -3,8 +3,8 @@
import
numpy
as
np
import
os
import
six
import
pprint
import
six
from
tensorpack.utils
import
logger
from
tensorpack.utils.gpu
import
get_num_gpu
...
...
examples/FasterRCNN/data.py
View file @
791c7b45
...
...
@@ -2,24 +2,26 @@
# File: data.py
import
copy
import
itertools
import
numpy
as
np
import
cv2
import
itertools
from
tabulate
import
tabulate
from
termcolor
import
colored
from
tensorpack.dataflow
import
(
DataFromList
,
MapDataComponent
,
MapData
,
MultiProcessMapDataZMQ
,
MultiThreadMapData
,
TestDataSpeed
,
imgaug
)
DataFromList
,
MapData
,
MapDataComponent
,
MultiProcessMapDataZMQ
,
MultiThreadMapData
,
TestDataSpeed
,
imgaug
)
from
tensorpack.utils
import
logger
from
tensorpack.utils.argtools
import
log_once
,
memoized
from
common
import
(
CustomResize
,
DataFromListOfDict
,
box_to_point8
,
filter_boxes_inside_shape
,
point8_to_box
,
segmentation_to_mask
,
np_iou
)
CustomResize
,
DataFromListOfDict
,
box_to_point8
,
filter_boxes_inside_shape
,
np_iou
,
point8_to_box
,
segmentation_to_mask
)
from
config
import
config
as
cfg
from
dataset
import
DatasetRegistry
from
utils.generate_anchors
import
generate_anchors
from
utils.np_box_ops
import
area
as
np_area
,
ioa
as
np_ioa
from
utils.np_box_ops
import
area
as
np_area
from
utils.np_box_ops
import
ioa
as
np_ioa
# import tensorpack.utils.viz as tpviz
...
...
examples/FasterRCNN/eval.py
View file @
791c7b45
...
...
@@ -2,17 +2,17 @@
# File: eval.py
import
itertools
import
sys
import
os
import
json
import
numpy
as
np
import
os
import
sys
import
tensorflow
as
tf
from
collections
import
namedtuple
from
concurrent.futures
import
ThreadPoolExecutor
from
contextlib
import
ExitStack
import
cv2
import
pycocotools.mask
as
cocomask
import
tqdm
import
tensorflow
as
tf
from
tensorpack.callbacks
import
Callback
from
tensorpack.tfutils.common
import
get_tf_version_tuple
...
...
@@ -20,9 +20,9 @@ from tensorpack.utils import logger
from
tensorpack.utils.utils
import
get_tqdm
from
common
import
CustomResize
,
clip_boxes
from
config
import
config
as
cfg
from
data
import
get_eval_dataflow
from
dataset
import
DatasetRegistry
from
config
import
config
as
cfg
try
:
import
horovod.tensorflow
as
hvd
...
...
examples/FasterRCNN/generalized_rcnn.py
View file @
791c7b45
...
...
@@ -4,22 +4,23 @@
import
tensorflow
as
tf
from
tensorpack
import
ModelDesc
from
tensorpack.models
import
regularize_cost
,
l2_regularizer
,
GlobalAvgPooling
from
tensorpack.tfutils.tower
import
get_current_tower_context
from
tensorpack.tfutils.summary
import
add_moving_summary
from
tensorpack.models
import
GlobalAvgPooling
,
l2_regularizer
,
regularize_cost
from
tensorpack.tfutils
import
optimizer
from
tensorpack.tfutils.summary
import
add_moving_summary
from
tensorpack.tfutils.tower
import
get_current_tower_context
import
model_frcnn
import
model_mrcnn
from
backbone
import
image_preprocess
,
resnet_c4_backbone
,
resnet_conv5
,
resnet_fpn_backbone
from
config
import
config
as
cfg
from
data
import
get_all_anchors
,
get_all_anchors_fpn
from
model_box
import
RPNAnchors
,
clip_boxes
,
crop_and_resize
,
roi_align
from
model_cascade
import
CascadeRCNNHead
from
model_fpn
import
fpn_model
,
generate_fpn_proposals
,
multilevel_roi_align
,
multilevel_rpn_losses
from
model_frcnn
import
BoxProposals
,
FastRCNNHead
,
fastrcnn_outputs
,
fastrcnn_predictions
,
sample_fast_rcnn_targets
from
model_frcnn
import
(
BoxProposals
,
FastRCNNHead
,
fastrcnn_outputs
,
fastrcnn_predictions
,
sample_fast_rcnn_targets
)
from
model_mrcnn
import
maskrcnn_loss
,
maskrcnn_upXconv_head
from
model_rpn
import
generate_rpn_proposals
,
rpn_head
,
rpn_losses
from
data
import
get_all_anchors
,
get_all_anchors_fpn
from
config
import
config
as
cfg
class
GeneralizedRCNN
(
ModelDesc
):
...
...
examples/FasterRCNN/model_box.py
View file @
791c7b45
...
...
@@ -2,8 +2,8 @@
# File: model_box.py
import
numpy
as
np
from
collections
import
namedtuple
import
tensorflow
as
tf
from
collections
import
namedtuple
from
tensorpack.tfutils.scope_utils
import
under_name_scope
...
...
examples/FasterRCNN/train.py
View file @
791c7b45
...
...
@@ -3,28 +3,20 @@
# File: train.py
import
argparse
import
itertools
import
numpy
as
np
import
os
import
shutil
import
cv2
import
six
assert
six
.
PY3
,
"FasterRCNN requires Python 3!"
import
tensorflow
as
tf
import
tqdm
assert
six
.
PY3
,
"This example requires Python 3!"
import
tensorpack.utils.viz
as
tpviz
from
tensorpack
import
*
from
tensorpack.tfutils
import
collect_env_info
from
tensorpack.tfutils.common
import
get_tf_version_tuple
from
generalized_rcnn
import
ResNetFPNModel
,
ResNetC4Model
from
dataset
import
DatasetRegistry
from
coco
import
register_coco
from
config
import
finalize_configs
,
config
as
cfg
from
data
import
get_eval_dataflow
,
get_train_dataflow
from
eval
import
DetectionResult
,
predict_image
,
multithread_predict_dataflow
,
EvalCallback
from
viz
import
draw_annotation
,
draw_final_outputs
,
draw_predictions
,
draw_proposal_recall
from
config
import
config
as
cfg
from
config
import
finalize_configs
from
data
import
get_train_dataflow
from
eval
import
EvalCallback
from
generalized_rcnn
import
ResNetC4Model
,
ResNetFPNModel
try
:
import
horovod.tensorflow
as
hvd
...
...
@@ -32,94 +24,11 @@ except ImportError:
pass
def
do_visualize
(
model
,
model_path
,
nr_visualize
=
100
,
output_dir
=
'output'
):
"""
Visualize some intermediate results (proposals, raw predictions) inside the pipeline.
"""
df
=
get_train_dataflow
()
# we don't visualize mask stuff
df
.
reset_state
()
pred
=
OfflinePredictor
(
PredictConfig
(
model
=
model
,
session_init
=
get_model_loader
(
model_path
),
input_names
=
[
'image'
,
'gt_boxes'
,
'gt_labels'
],
output_names
=
[
'generate_{}_proposals/boxes'
.
format
(
'fpn'
if
cfg
.
MODE_FPN
else
'rpn'
),
'generate_{}_proposals/scores'
.
format
(
'fpn'
if
cfg
.
MODE_FPN
else
'rpn'
),
'fastrcnn_all_scores'
,
'output/boxes'
,
'output/scores'
,
'output/labels'
,
]))
if
os
.
path
.
isdir
(
output_dir
):
shutil
.
rmtree
(
output_dir
)
utils
.
fs
.
mkdir_p
(
output_dir
)
with
tqdm
.
tqdm
(
total
=
nr_visualize
)
as
pbar
:
for
idx
,
dp
in
itertools
.
islice
(
enumerate
(
df
),
nr_visualize
):
img
,
gt_boxes
,
gt_labels
=
dp
[
'image'
],
dp
[
'gt_boxes'
],
dp
[
'gt_labels'
]
rpn_boxes
,
rpn_scores
,
all_scores
,
\
final_boxes
,
final_scores
,
final_labels
=
pred
(
img
,
gt_boxes
,
gt_labels
)
# draw groundtruth boxes
gt_viz
=
draw_annotation
(
img
,
gt_boxes
,
gt_labels
)
# draw best proposals for each groundtruth, to show recall
proposal_viz
,
good_proposals_ind
=
draw_proposal_recall
(
img
,
rpn_boxes
,
rpn_scores
,
gt_boxes
)
# draw the scores for the above proposals
score_viz
=
draw_predictions
(
img
,
rpn_boxes
[
good_proposals_ind
],
all_scores
[
good_proposals_ind
])
results
=
[
DetectionResult
(
*
args
)
for
args
in
zip
(
final_boxes
,
final_scores
,
final_labels
,
[
None
]
*
len
(
final_labels
))]
final_viz
=
draw_final_outputs
(
img
,
results
)
viz
=
tpviz
.
stack_patches
([
gt_viz
,
proposal_viz
,
score_viz
,
final_viz
],
2
,
2
)
if
os
.
environ
.
get
(
'DISPLAY'
,
None
):
tpviz
.
interactive_imshow
(
viz
)
cv2
.
imwrite
(
"{}/{:03d}.png"
.
format
(
output_dir
,
idx
),
viz
)
pbar
.
update
()
def
do_evaluate
(
pred_config
,
output_file
):
num_gpu
=
cfg
.
TRAIN
.
NUM_GPUS
graph_funcs
=
MultiTowerOfflinePredictor
(
pred_config
,
list
(
range
(
num_gpu
)))
.
get_predictors
()
for
dataset
in
cfg
.
DATA
.
VAL
:
logger
.
info
(
"Evaluating {} ..."
.
format
(
dataset
))
dataflows
=
[
get_eval_dataflow
(
dataset
,
shard
=
k
,
num_shards
=
num_gpu
)
for
k
in
range
(
num_gpu
)]
all_results
=
multithread_predict_dataflow
(
dataflows
,
graph_funcs
)
output
=
output_file
+
'-'
+
dataset
DatasetRegistry
.
get
(
dataset
)
.
eval_inference_results
(
all_results
,
output
)
def
do_predict
(
pred_func
,
input_file
):
img
=
cv2
.
imread
(
input_file
,
cv2
.
IMREAD_COLOR
)
results
=
predict_image
(
img
,
pred_func
)
final
=
draw_final_outputs
(
img
,
results
)
viz
=
np
.
concatenate
((
img
,
final
),
axis
=
1
)
cv2
.
imwrite
(
"output.png"
,
viz
)
logger
.
info
(
"Inference output for {} written to output.png"
.
format
(
input_file
))
tpviz
.
interactive_imshow
(
viz
)
if
__name__
==
'__main__'
:
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
'--load'
,
help
=
'load a model
for evaluation or training
. Can overwrite BACKBONE.WEIGHTS'
)
parser
.
add_argument
(
'--load'
,
help
=
'load a model
to start training from
. Can overwrite BACKBONE.WEIGHTS'
)
parser
.
add_argument
(
'--logdir'
,
help
=
'log directory'
,
default
=
'train_log/maskrcnn'
)
parser
.
add_argument
(
'--visualize'
,
action
=
'store_true'
,
help
=
'visualize intermediate results'
)
parser
.
add_argument
(
'--evaluate'
,
help
=
"Run evaluation. "
"This argument is the path to the output json evaluation file"
)
parser
.
add_argument
(
'--predict'
,
help
=
"Run prediction on a given image. "
"This argument is the path to the input image file"
,
nargs
=
'+'
)
parser
.
add_argument
(
'--config'
,
help
=
"A list of KEY=VALUE to overwrite those defined in config.py"
,
nargs
=
'+'
)
parser
.
add_argument
(
'--config'
,
help
=
"A list of KEY=VALUE to overwrite those defined in config.py"
,
nargs
=
'+'
)
if
get_tf_version_tuple
()
<
(
1
,
6
):
# https://github.com/tensorflow/tensorflow/issues/14657
...
...
@@ -130,106 +39,82 @@ if __name__ == '__main__':
cfg
.
update_args
(
args
.
config
)
register_coco
(
cfg
.
DATA
.
BASEDIR
)
# add COCO datasets to the registry
# Setup logger ...
is_horovod
=
cfg
.
TRAINER
==
'horovod'
if
is_horovod
:
hvd
.
init
()
logger
.
info
(
"Horovod Rank={}, Size={}"
.
format
(
hvd
.
rank
(),
hvd
.
size
()))
if
not
is_horovod
or
hvd
.
rank
()
==
0
:
logger
.
set_logger_dir
(
args
.
logdir
,
'd'
)
logger
.
info
(
"Environment Information:
\n
"
+
collect_env_info
())
finalize_configs
(
is_training
=
True
)
# Compute the training schedule from the number of GPUs ...
stepnum
=
cfg
.
TRAIN
.
STEPS_PER_EPOCH
# warmup is step based, lr is epoch based
init_lr
=
cfg
.
TRAIN
.
WARMUP_INIT_LR
*
min
(
8.
/
cfg
.
TRAIN
.
NUM_GPUS
,
1.
)
warmup_schedule
=
[(
0
,
init_lr
),
(
cfg
.
TRAIN
.
WARMUP
,
cfg
.
TRAIN
.
BASE_LR
)]
warmup_end_epoch
=
cfg
.
TRAIN
.
WARMUP
*
1.
/
stepnum
lr_schedule
=
[(
int
(
warmup_end_epoch
+
0.5
),
cfg
.
TRAIN
.
BASE_LR
)]
factor
=
8.
/
cfg
.
TRAIN
.
NUM_GPUS
for
idx
,
steps
in
enumerate
(
cfg
.
TRAIN
.
LR_SCHEDULE
[:
-
1
]):
mult
=
0.1
**
(
idx
+
1
)
lr_schedule
.
append
(
(
steps
*
factor
//
stepnum
,
cfg
.
TRAIN
.
BASE_LR
*
mult
))
logger
.
info
(
"Warm Up Schedule (steps, value): "
+
str
(
warmup_schedule
))
logger
.
info
(
"LR Schedule (epochs, value): "
+
str
(
lr_schedule
))
train_dataflow
=
get_train_dataflow
()
# This is what's commonly referred to as "epochs"
total_passes
=
cfg
.
TRAIN
.
LR_SCHEDULE
[
-
1
]
*
8
/
train_dataflow
.
size
()
logger
.
info
(
"Total passes of the training set is: {:.5g}"
.
format
(
total_passes
))
# Create model and callbacks ...
MODEL
=
ResNetFPNModel
()
if
cfg
.
MODE_FPN
else
ResNetC4Model
()
if
args
.
visualize
or
args
.
evaluate
or
args
.
predict
:
if
not
tf
.
test
.
is_gpu_available
():
from
tensorflow.python.framework
import
test_util
assert
get_tf_version_tuple
()
>=
(
1
,
7
)
and
test_util
.
IsMklEnabled
(),
\
"Inference requires either GPU support or MKL support!"
assert
args
.
load
finalize_configs
(
is_training
=
False
)
if
args
.
predict
or
args
.
visualize
:
cfg
.
TEST
.
RESULT_SCORE_THRESH
=
cfg
.
TEST
.
RESULT_SCORE_THRESH_VIS
if
args
.
visualize
:
do_visualize
(
MODEL
,
args
.
load
)
else
:
predcfg
=
PredictConfig
(
model
=
MODEL
,
session_init
=
get_model_loader
(
args
.
load
),
input_names
=
MODEL
.
get_inference_tensor_names
()[
0
],
output_names
=
MODEL
.
get_inference_tensor_names
()[
1
])
if
args
.
predict
:
predictor
=
OfflinePredictor
(
predcfg
)
for
image_file
in
args
.
predict
:
do_predict
(
predictor
,
image_file
)
elif
args
.
evaluate
:
assert
args
.
evaluate
.
endswith
(
'.json'
),
args
.
evaluate
do_evaluate
(
predcfg
,
args
.
evaluate
)
callbacks
=
[
PeriodicCallback
(
ModelSaver
(
max_to_keep
=
10
,
keep_checkpoint_every_n_hours
=
1
),
every_k_epochs
=
20
),
# linear warmup
ScheduledHyperParamSetter
(
'learning_rate'
,
warmup_schedule
,
interp
=
'linear'
,
step_based
=
True
),
ScheduledHyperParamSetter
(
'learning_rate'
,
lr_schedule
),
GPUMemoryTracker
(),
HostMemoryTracker
(),
EstimatedTimeLeft
(
median
=
True
),
SessionRunTimeout
(
60000
),
# 1 minute timeout
]
if
cfg
.
TRAIN
.
EVAL_PERIOD
>
0
:
callbacks
.
extend
([
EvalCallback
(
dataset
,
*
MODEL
.
get_inference_tensor_names
(),
args
.
logdir
)
for
dataset
in
cfg
.
DATA
.
VAL
])
if
not
is_horovod
:
callbacks
.
append
(
GPUUtilizationTracker
())
if
is_horovod
and
hvd
.
rank
()
>
0
:
session_init
=
None
else
:
is_horovod
=
cfg
.
TRAINER
==
'horovod'
if
is_horovod
:
hvd
.
init
()
logger
.
info
(
"Horovod Rank={}, Size={}"
.
format
(
hvd
.
rank
(),
hvd
.
size
()))
if
not
is_horovod
or
hvd
.
rank
()
==
0
:
logger
.
set_logger_dir
(
args
.
logdir
,
'd'
)
logger
.
info
(
"Environment Information:
\n
"
+
collect_env_info
())
finalize_configs
(
is_training
=
True
)
stepnum
=
cfg
.
TRAIN
.
STEPS_PER_EPOCH
# warmup is step based, lr is epoch based
init_lr
=
cfg
.
TRAIN
.
WARMUP_INIT_LR
*
min
(
8.
/
cfg
.
TRAIN
.
NUM_GPUS
,
1.
)
warmup_schedule
=
[(
0
,
init_lr
),
(
cfg
.
TRAIN
.
WARMUP
,
cfg
.
TRAIN
.
BASE_LR
)]
warmup_end_epoch
=
cfg
.
TRAIN
.
WARMUP
*
1.
/
stepnum
lr_schedule
=
[(
int
(
warmup_end_epoch
+
0.5
),
cfg
.
TRAIN
.
BASE_LR
)]
factor
=
8.
/
cfg
.
TRAIN
.
NUM_GPUS
for
idx
,
steps
in
enumerate
(
cfg
.
TRAIN
.
LR_SCHEDULE
[:
-
1
]):
mult
=
0.1
**
(
idx
+
1
)
lr_schedule
.
append
(
(
steps
*
factor
//
stepnum
,
cfg
.
TRAIN
.
BASE_LR
*
mult
))
logger
.
info
(
"Warm Up Schedule (steps, value): "
+
str
(
warmup_schedule
))
logger
.
info
(
"LR Schedule (epochs, value): "
+
str
(
lr_schedule
))
train_dataflow
=
get_train_dataflow
()
# This is what's commonly referred to as "epochs"
total_passes
=
cfg
.
TRAIN
.
LR_SCHEDULE
[
-
1
]
*
8
/
train_dataflow
.
size
()
logger
.
info
(
"Total passes of the training set is: {:.5g}"
.
format
(
total_passes
))
callbacks
=
[
PeriodicCallback
(
ModelSaver
(
max_to_keep
=
10
,
keep_checkpoint_every_n_hours
=
1
),
every_k_epochs
=
20
),
# linear warmup
ScheduledHyperParamSetter
(
'learning_rate'
,
warmup_schedule
,
interp
=
'linear'
,
step_based
=
True
),
ScheduledHyperParamSetter
(
'learning_rate'
,
lr_schedule
),
GPUMemoryTracker
(),
HostMemoryTracker
(),
EstimatedTimeLeft
(
median
=
True
),
SessionRunTimeout
(
60000
),
# 1 minute timeout
]
if
cfg
.
TRAIN
.
EVAL_PERIOD
>
0
:
callbacks
.
extend
([
EvalCallback
(
dataset
,
*
MODEL
.
get_inference_tensor_names
(),
args
.
logdir
)
for
dataset
in
cfg
.
DATA
.
VAL
])
if
not
is_horovod
:
callbacks
.
append
(
GPUUtilizationTracker
())
if
is_horovod
and
hvd
.
rank
()
>
0
:
session_init
=
None
if
args
.
load
:
session_init
=
get_model_loader
(
args
.
load
)
else
:
if
args
.
load
:
session_init
=
get_model_loader
(
args
.
load
)
else
:
session_init
=
get_model_loader
(
cfg
.
BACKBONE
.
WEIGHTS
)
if
cfg
.
BACKBONE
.
WEIGHTS
else
None
traincfg
=
TrainConfig
(
model
=
MODEL
,
data
=
QueueInput
(
train_dataflow
),
callbacks
=
callbacks
,
steps_per_epoch
=
stepnum
,
max_epoch
=
cfg
.
TRAIN
.
LR_SCHEDULE
[
-
1
]
*
factor
//
stepnum
,
session_init
=
session_init
,
starting_epoch
=
cfg
.
TRAIN
.
STARTING_EPOCH
)
if
is_horovod
:
trainer
=
HorovodTrainer
(
average
=
False
)
else
:
# nccl mode appears faster than cpu mode
trainer
=
SyncMultiGPUTrainerReplicated
(
cfg
.
TRAIN
.
NUM_GPUS
,
average
=
False
,
mode
=
'nccl'
)
launch_train_with_config
(
traincfg
,
trainer
)
session_init
=
get_model_loader
(
cfg
.
BACKBONE
.
WEIGHTS
)
if
cfg
.
BACKBONE
.
WEIGHTS
else
None
traincfg
=
TrainConfig
(
model
=
MODEL
,
data
=
QueueInput
(
train_dataflow
),
callbacks
=
callbacks
,
steps_per_epoch
=
stepnum
,
max_epoch
=
cfg
.
TRAIN
.
LR_SCHEDULE
[
-
1
]
*
factor
//
stepnum
,
session_init
=
session_init
,
starting_epoch
=
cfg
.
TRAIN
.
STARTING_EPOCH
)
if
is_horovod
:
trainer
=
HorovodTrainer
(
average
=
False
)
else
:
# nccl mode appears faster than cpu mode
trainer
=
SyncMultiGPUTrainerReplicated
(
cfg
.
TRAIN
.
NUM_GPUS
,
average
=
False
,
mode
=
'nccl'
)
launch_train_with_config
(
traincfg
,
trainer
)
examples/FasterRCNN/viz.py
View file @
791c7b45
...
...
@@ -8,8 +8,8 @@ from tensorpack.utils import viz
from
tensorpack.utils.palette
import
PALETTE_RGB
from
config
import
config
as
cfg
from
utils.np_box_ops
import
iou
as
np_iou
from
utils.np_box_ops
import
area
as
np_area
from
utils.np_box_ops
import
iou
as
np_iou
def
draw_annotation
(
img
,
boxes
,
klass
,
is_crowd
=
None
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment