Commit be39dbdf authored by Yuxin Wu's avatar Yuxin Wu

fix deprecation about dropout; fix Keras compatibility in tf1.13

parent 79148350
...@@ -100,7 +100,7 @@ class Model(ModelDesc): ...@@ -100,7 +100,7 @@ class Model(ModelDesc):
.apply(fg) .apply(fg)
.BatchNorm('bn5').apply(activate) .BatchNorm('bn5').apply(activate)
# 5 # 5
.tf.nn.dropout(0.5 if is_training else 1.0) .Dropout(rate=0.5 if is_training else 0.0)
.Conv2D('conv6', 512, 5, padding='VALID') .Conv2D('conv6', 512, 5, padding='VALID')
.apply(fg).BatchNorm('bn6') .apply(fg).BatchNorm('bn6')
.apply(nonlin) .apply(nonlin)
......
...@@ -85,20 +85,28 @@ All models are trained with 8 NVIDIA V100s, unless otherwise noted. ...@@ -85,20 +85,28 @@ All models are trained with 8 NVIDIA V100s, unless otherwise noted.
Performance in [Detectron](https://github.com/facebookresearch/Detectron/) can be roughly reproduced. Performance in [Detectron](https://github.com/facebookresearch/Detectron/) can be roughly reproduced.
FPN models are sometimes slightly worse, which is mainly due to batch size. FPN models are sometimes slightly worse, which is mainly due to batch size.
| Backbone | mAP<br/>(box;mask) | Detectron mAP <sup>[1](#ft1)</sup><br/> (box;mask) | Time (on 8 V100s) | Configurations <br/> (click to expand) | | Backbone | mAP<br/>(box;mask) | Detectron mAP <sup>[1](#ft1)</sup><br/> (box;mask) | Time (on 8 V100s) | Configurations <br/> (click to expand) |
| - | - | - | - | - | | - | - | - | - | - |
| R50-C4 | 33.1 | | 18h | <details><summary>super quick</summary>`MODE_MASK=False FRCNN.BATCH_PER_IM=64`<br/>`PREPROC.TRAIN_SHORT_EDGE_SIZE=600 PREPROC.MAX_SIZE=1024`<br/>`TRAIN.LR_SCHEDULE=[150000,230000,280000]` </details> | | R50-C4 | 33.5 | | 17h | <details><summary>super quick</summary>`MODE_MASK=False FRCNN.BATCH_PER_IM=64`<br/>`PREPROC.TRAIN_SHORT_EDGE_SIZE=600 PREPROC.MAX_SIZE=1024`<br/>`TRAIN.LR_SCHEDULE=[150000,230000,280000]` </details> |
| R50-C4 | 36.6 | 36.5 | 44h | <details><summary>standard</summary>`MODE_MASK=False` </details> | | R50-C4 | 36.6 | 36.5 | 44h | <details><summary>standard</summary>`MODE_MASK=False` </details> |
| R50-FPN | 37.4 | 37.9 | 23h | <details><summary>standard</summary>`MODE_MASK=False MODE_FPN=True` </details> | | R50-FPN | 37.4 | 37.9 | 23h | <details><summary>standard</summary>`MODE_MASK=False MODE_FPN=True` </details> |
| R50-C4 | 38.2;33.3 [:arrow_down:](http://models.tensorpack.com/FasterRCNN/COCO-R50C4-MaskRCNN-Standard.npz) | 37.8;32.8 | 49h | <details><summary>standard</summary>this is the default </details> | | R50-C4 | 38.2;33.3 [:arrow_down:][R50C42x] | 37.8;32.8 | 49h | <details><summary>standard</summary>this is the default </details> |
| R50-FPN | 38.4;35.1 [:arrow_down:](http://models.tensorpack.com/FasterRCNN/COCO-R50FPN-MaskRCNN-Standard.npz) | 38.6;34.5 | 27h | <details><summary>standard</summary>`MODE_FPN=True` </details> | | R50-FPN | 38.4;35.1 [:arrow_down:][R50FPN2x] | 38.6;34.5 | 27h | <details><summary>standard</summary>`MODE_FPN=True` </details> |
| R50-FPN | 42.0;36.3 | | 41h | <details><summary>+Cascade</summary>`MODE_FPN=True FPN.CASCADE=True` </details> | | R50-FPN | 42.0;36.3 | | 36h | <details><summary>+Cascade</summary>`MODE_FPN=True FPN.CASCADE=True` </details> |
| R50-FPN | 39.5;35.2 | 39.5;34.4<sup>[2](#ft2)</sup> | 33h | <details><summary>+ConvGNHead</summary>`MODE_FPN=True`<br/>`FPN.FRCNN_HEAD_FUNC=fastrcnn_4conv1fc_gn_head` </details> | | R50-FPN | 39.5;35.2 | 39.5;34.4<sup>[2](#ft2)</sup> | 31h | <details><summary>+ConvGNHead</summary>`MODE_FPN=True`<br/>`FPN.FRCNN_HEAD_FUNC=fastrcnn_4conv1fc_gn_head` </details> |
| R50-FPN | 40.0;36.2 [:arrow_down:](http://models.tensorpack.com/FasterRCNN/COCO-R50FPN-MaskRCNN-StandardGN.npz) | 40.3;35.7 | 40h | <details><summary>+GN</summary>`MODE_FPN=True`<br/>`FPN.NORM=GN BACKBONE.NORM=GN`<br/>`FPN.FRCNN_HEAD_FUNC=fastrcnn_4conv1fc_gn_head`<br/>`FPN.MRCNN_HEAD_FUNC=maskrcnn_up4conv_gn_head` | | R50-FPN | 40.0;36.2 [:arrow_down:][R50FPN2xGN] | 40.3;35.7 | 33h | <details><summary>+GN</summary>`MODE_FPN=True`<br/>`FPN.NORM=GN BACKBONE.NORM=GN`<br/>`FPN.FRCNN_HEAD_FUNC=fastrcnn_4conv1fc_gn_head`<br/>`FPN.MRCNN_HEAD_FUNC=maskrcnn_up4conv_gn_head` |
| R101-C4 | 41.4;35.2 [:arrow_down:](http://models.tensorpack.com/FasterRCNN/COCO-R101C4-MaskRCNN-Standard.npz) | | 60h | <details><summary>standard</summary>`BACKBONE.RESNET_NUM_BLOCKS=[3,4,23,3]` </details> | | R101-C4 | 41.4;35.2 [:arrow_down:][R101C42x] | | 60h | <details><summary>standard</summary>`BACKBONE.RESNET_NUM_BLOCKS=[3,4,23,3]` </details> |
| R101-FPN | 40.4;36.6 [:arrow_down:](http://models.tensorpack.com/FasterRCNN/COCO-R101FPN-MaskRCNN-Standard.npz) | 40.9;36.4 | 38h | <details><summary>standard</summary>`MODE_FPN=True`<br/>`BACKBONE.RESNET_NUM_BLOCKS=[3,4,23,3]` </details> | | R101-FPN | 40.4;36.6 [:arrow_down:][R101FPN2x] | 40.9;36.4 | 37h | <details><summary>standard</summary>`MODE_FPN=True`<br/>`BACKBONE.RESNET_NUM_BLOCKS=[3,4,23,3]` </details> |
| R101-FPN | 46.5;40.1 [:arrow_down:](http://models.tensorpack.com/FasterRCNN/COCO-R101FPN-MaskRCNN-BetterParams.npz) <sup>[3](#ft3)</sup> | | 73h | <details><summary>3x+Cascade+TrainAug</summary>`MODE_FPN=True FPN.CASCADE=True`<br/>`BACKBONE.RESNET_NUM_BLOCKS=[3,4,23,3]`<br/>`TEST.RESULT_SCORE_THRESH=1e-4`<br/>`PREPROC.TRAIN_SHORT_EDGE_SIZE=[640,800]`<br/>`TRAIN.LR_SCHEDULE=[420000,500000,540000]` </details> | | R101-FPN | 46.5;40.1 [:arrow_down:][R101FPN3xCasAug] <sup>[3](#ft3)</sup> | | 73h | <details><summary>3x+Cascade+TrainAug</summary>`MODE_FPN=True FPN.CASCADE=True`<br/>`BACKBONE.RESNET_NUM_BLOCKS=[3,4,23,3]`<br/>`TEST.RESULT_SCORE_THRESH=1e-4`<br/>`PREPROC.TRAIN_SHORT_EDGE_SIZE=[640,800]`<br/>`TRAIN.LR_SCHEDULE=[420000,500000,540000]` </details> |
| R101-FPN<br/>(From Scratch) | 47.5;41.2 [:arrow_down:](http://models.tensorpack.com/FasterRCNN/COCO-R101FPN-MaskRCNN-ScratchGN.npz) | 47.4;40.5<sup>[4](#ft4)</sup> | 45h (on 48 V100s) | <details><summary>9x+GN+Cascade+TrainAug</summary>`MODE_FPN=True FPN.CASCADE=True`<br/>`BACKBONE.RESNET_NUM_BLOCKS=[3,4,23,3]`<br/>`FPN.NORM=GN BACKBONE.NORM=GN`<br/>`FPN.FRCNN_HEAD_FUNC=fastrcnn_4conv1fc_gn_head`<br/>`FPN.MRCNN_HEAD_FUNC=maskrcnn_up4conv_gn_head`<br/>`PREPROC.TRAIN_SHORT_EDGE_SIZE=[640,800]`<br/>`TRAIN.LR_SCHEDULE=[1500000,1580000,1620000]`<br/>`BACKBONE.FREEZE_AT=0`</details> | | R101-FPN<br/>(From Scratch) | 47.5;41.2 [:arrow_down:][R101FPN9xGNCasAugScratch] | 47.4;40.5<sup>[4](#ft4)</sup> | 45h (on 48 V100s) | <details><summary>9x+GN+Cascade+TrainAug</summary>`MODE_FPN=True FPN.CASCADE=True`<br/>`BACKBONE.RESNET_NUM_BLOCKS=[3,4,23,3]`<br/>`FPN.NORM=GN BACKBONE.NORM=GN`<br/>`FPN.FRCNN_HEAD_FUNC=fastrcnn_4conv1fc_gn_head`<br/>`FPN.MRCNN_HEAD_FUNC=maskrcnn_up4conv_gn_head`<br/>`PREPROC.TRAIN_SHORT_EDGE_SIZE=[640,800]`<br/>`TRAIN.LR_SCHEDULE=[1500000,1580000,1620000]`<br/>`BACKBONE.FREEZE_AT=0`</details> |
[R50C42x]: http://models.tensorpack.com/FasterRCNN/COCO-R50C4-MaskRCNN-Standard.npz
[R50FPN2x]: http://models.tensorpack.com/FasterRCNN/COCO-R50FPN-MaskRCNN-Standard.npz
[R50FPN2xGN]: http://models.tensorpack.com/FasterRCNN/COCO-R50FPN-MaskRCNN-StandardGN.npz
[R101C42x]: http://models.tensorpack.com/FasterRCNN/COCO-R101C4-MaskRCNN-Standard.npz
[R101FPN2x]: http://models.tensorpack.com/FasterRCNN/COCO-R101FPN-MaskRCNN-Standard.npz
[R101FPN3xCasAug]: http://models.tensorpack.com/FasterRCNN/COCO-R101FPN-MaskRCNN-BetterParams.npz
[R101FPN9xGNCasAugScratch]: http://models.tensorpack.com/FasterRCNN/COCO-R101FPN-MaskRCNN-ScratchGN.npz
<a id="ft1">1</a>: Numbers taken from [Detectron Model Zoo](https://github.com/facebookresearch/Detectron/blob/master/MODEL_ZOO.md). <a id="ft1">1</a>: Numbers taken from [Detectron Model Zoo](https://github.com/facebookresearch/Detectron/blob/master/MODEL_ZOO.md).
We comapre models that have identical training & inference cost between the two implementation. However their numbers can be different due to many small implementation details. We comapre models that have identical training & inference cost between the two implementation. However their numbers can be different due to many small implementation details.
......
...@@ -33,7 +33,7 @@ class Model(ModelDesc): ...@@ -33,7 +33,7 @@ class Model(ModelDesc):
def build_graph(self, image, label): def build_graph(self, image, label):
is_training = get_current_tower_context().is_training is_training = get_current_tower_context().is_training
keep_prob = tf.constant(0.5 if is_training else 1.0) drop_rate = tf.constant(0.5 if is_training else 0.0)
if is_training: if is_training:
tf.summary.image("train_image", image, 10) tf.summary.image("train_image", image, 10)
...@@ -56,7 +56,7 @@ class Model(ModelDesc): ...@@ -56,7 +56,7 @@ class Model(ModelDesc):
.Conv2D('conv3.1', filters=128, padding='VALID') \ .Conv2D('conv3.1', filters=128, padding='VALID') \
.Conv2D('conv3.2', filters=128, padding='VALID') \ .Conv2D('conv3.2', filters=128, padding='VALID') \
.FullyConnected('fc0', 1024 + 512, activation=tf.nn.relu) \ .FullyConnected('fc0', 1024 + 512, activation=tf.nn.relu) \
.tf.nn.dropout(keep_prob) \ .Dropout(rate=drop_rate) \
.FullyConnected('fc1', 512, activation=tf.nn.relu) \ .FullyConnected('fc1', 512, activation=tf.nn.relu) \
.FullyConnected('linear', out_dim=self.cifar_classnum)() .FullyConnected('linear', out_dim=self.cifar_classnum)()
......
...@@ -147,8 +147,8 @@ if __name__ == '__main__': ...@@ -147,8 +147,8 @@ if __name__ == '__main__':
num_gpu = get_num_gpu() num_gpu = get_num_gpu()
if args.fake: if args.fake:
df_train = FakeData([[64, 224, 224, 3], [64, 1000]], 5000, random=False, dtype='uint8') df_train = FakeData([[32, 224, 224, 3], [32, 1000]], 5000, random=False, dtype='uint8')
df_val = FakeData([[64, 224, 224, 3], [64, 1000]], 5000, random=False) df_val = FakeData([[32, 224, 224, 3], [32, 1000]], 5000, random=False)
else: else:
batch_size = TOTAL_BATCH_SIZE // num_gpu batch_size = TOTAL_BATCH_SIZE // num_gpu
assert args.data is not None assert args.data is not None
......
...@@ -4,7 +4,9 @@ ...@@ -4,7 +4,9 @@
import tensorflow as tf import tensorflow as tf
import six import six
from tensorflow import keras from tensorflow import keras
import tensorflow.keras.backend as K
from tensorflow.python.keras import metrics as metrics_module from tensorflow.python.keras import metrics as metrics_module
from contextlib import contextmanager
from ..models.regularize import regularize_cost_from_collection from ..models.regularize import regularize_cost_from_collection
from ..train import Trainer, SimpleTrainer, SyncMultiGPUTrainerParameterServer from ..train import Trainer, SimpleTrainer, SyncMultiGPUTrainerParameterServer
...@@ -82,7 +84,19 @@ class KerasModelCaller(object): ...@@ -82,7 +84,19 @@ class KerasModelCaller(object):
if self.cached_model is None: if self.cached_model is None:
assert not reuse assert not reuse
model = self.cached_model = self.get_model(*input_tensors)
# starting from some versions, tf.keras starts to prepend name scope to variable names ..
@contextmanager
def clear_tower0_name_scope():
ns = tf.get_default_graph().get_name_scope()
if ns == 'tower0':
with tf.name_scope('/'):
yield
else:
yield
with clear_tower0_name_scope():
model = self.cached_model = self.get_model(*input_tensors)
outputs = model.outputs outputs = model.outputs
elif reuse: elif reuse:
# use the cached Keras model to mimic reuse # use the cached Keras model to mimic reuse
...@@ -108,7 +122,7 @@ class KerasPhaseCallback(Callback): ...@@ -108,7 +122,7 @@ class KerasPhaseCallback(Callback):
def __init__(self, isTrain): def __init__(self, isTrain):
assert isinstance(isTrain, bool), isTrain assert isinstance(isTrain, bool), isTrain
self._isTrain = isTrain self._isTrain = isTrain
self._learning_phase = keras.backend.learning_phase() self._learning_phase = K.learning_phase()
def _setup_graph(self): def _setup_graph(self):
logger.info("Using Keras learning phase {} in the graph!".format( logger.info("Using Keras learning phase {} in the graph!".format(
...@@ -200,7 +214,8 @@ def setup_keras_trainer( ...@@ -200,7 +214,8 @@ def setup_keras_trainer(
input, input,
get_cost, get_cost,
lambda: optimizer) lambda: optimizer)
if model_caller.cached_model.uses_learning_phase: if len(K.learning_phase().consumers()) > 0:
# check if learning_phase is used in this model
trainer.register_callback(KerasPhaseCallback(True)) trainer.register_callback(KerasPhaseCallback(True))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment