Commit e83b0797 authored by Yuxin Wu's avatar Yuxin Wu

update docs and sotabench

parent 7c1c9877
......@@ -43,6 +43,15 @@ class AttrDict():
return {k: v.to_dict() if isinstance(v, AttrDict) else v
for k, v in self.__dict__.items() if not k.startswith('_')}
def from_dict(self, d):
self.freeze(False)
for k, v in d.items():
self_v = getattr(self, k)
if isinstance(self_v, AttrDict):
self_v.from_dict(v)
else:
setattr(self, k, v)
def update_args(self, args):
"""Update from command line args. """
for cfg in args:
......
......@@ -3,6 +3,7 @@
import os
import sys
import tqdm
from contextlib import contextmanager
from tensorpack.predict import OfflinePredictor, PredictConfig
from tensorpack.tfutils import SmartInit
......@@ -31,6 +32,13 @@ COCO_ROOT = os.path.join(DATA_ROOT, "coco")
register_coco(COCO_ROOT)
@contextmanager
def backup_cfg():
orig_config = cfg.to_dict()
yield
cfg.from_dict(orig_config)
def evaluate_rcnn(model_name, paper_arxiv_id, cfg_list, model_file):
evaluator = COCOEvaluator(
root=COCO_ROOT, model_name=model_name, paper_arxiv_id=paper_arxiv_id
......@@ -77,16 +85,41 @@ def evaluate_rcnn(model_name, paper_arxiv_id, cfg_list, model_file):
evaluator.save()
download(
"http://models.tensorpack.com/FasterRCNN/COCO-MaskRCNN-R50FPN2x.npz",
"./",
expect_size=165362754)
with backup_cfg():
evaluate_rcnn(
"Mask R-CNN (ResNet-50-FPN, 2x)", "1703.06870", [],
"COCO-MaskRCNN-R50FPN2x.npz",
)
download(
"http://models.tensorpack.com/FasterRCNN/COCO-MaskRCNN-R50FPN2xGN.npz",
"./",
expect_size=167363872)
with backup_cfg():
evaluate_rcnn(
"Mask R-CNN (ResNet-50-FPN, GroupNorm)", "1803.08494",
"""FPN.NORM=GN BACKBONE.NORM=GN
FPN.FRCNN_HEAD_FUNC=fastrcnn_4conv1fc_gn_head
FPN.MRCNN_HEAD_FUNC=maskrcnn_up4conv_gn_head""".split(),
"COCO-MaskRCNN-R50FPN2xGN.npz",
)
download(
"http://models.tensorpack.com/FasterRCNN/COCO-MaskRCNN-R101FPN9xGNCasAugScratch.npz",
"./",
expect_size=355680386)
evaluate_rcnn(
"Mask R-CNN (ResNet-101-FPN, GN, Cascade)",
"1811.08883",
"""
FPN.CASCADE=True BACKBONE.RESNET_NUM_BLOCKS=[3,4,23,3] FPN.NORM=GN
BACKBONE.NORM=GN FPN.FRCNN_HEAD_FUNC=fastrcnn_4conv1fc_gn_head
FPN.MRCNN_HEAD_FUNC=maskrcnn_up4conv_gn_head""".split(),
"COCO-MaskRCNN-R101FPN9xGNCasAugScratch.npz",
)
with backup_cfg():
evaluate_rcnn(
"Mask R-CNN (ResNet-101-FPN, GN, Cascade)", "1811.08883",
"""
FPN.CASCADE=True BACKBONE.RESNET_NUM_BLOCKS=[3,4,23,3] FPN.NORM=GN
BACKBONE.NORM=GN FPN.FRCNN_HEAD_FUNC=fastrcnn_4conv1fc_gn_head
FPN.MRCNN_HEAD_FUNC=maskrcnn_up4conv_gn_head""".split(),
"COCO-MaskRCNN-R101FPN9xGNCasAugScratch.npz",
)
......@@ -98,7 +98,7 @@ def BatchNorm(inputs, axis=None, training=None, momentum=0.9, epsilon=1e-5,
This is not a good argument name, but it is what the Tensorflow layer uses.
ema_update (str): Only effective when ``training=True``. It has the following options:
* "default": same as "collection". Because this is the default behavior in tensorflow.
* "default": same as "collection". Because this is the default behavior in TensorFlow.
* "skip": do not update EMA. This can be useful when you reuse a batch norm layer in several places
but do not want them to all update your EMA.
* "collection": Add EMA update ops to collection `tf.GraphKeys.UPDATE_OPS`.
......@@ -106,7 +106,7 @@ def BatchNorm(inputs, axis=None, training=None, momentum=0.9, epsilon=1e-5,
your training iterations. This can waste compute if your training iterations do not always depend
on the BatchNorm layer.
* "internal": EMA is updated inside this layer itself by control dependencies.
In common cases, it has similar speed to "collection". But it covers more cases, e.g.:
In standard scenarios, it has similar speed to "collection". But it has some more benefits:
1. BatchNorm is used inside dynamic control flow.
The collection-based update does not support dynamic control flows.
......@@ -114,7 +114,9 @@ def BatchNorm(inputs, axis=None, training=None, momentum=0.9, epsilon=1e-5,
Putting all update ops into a single collection will waste a lot of compute.
3. Other part of the model relies on the "updated" EMA. The collection-based method does not update
EMA immediately.
4. It has less chance to cause TensorFlow bugs in a graph with complicated control flow.
Therefore this option is preferred over TensorFlow default.
Corresponding TF issue: https://github.com/tensorflow/tensorflow/issues/14699
sync_statistics (str or None): one of None, "nccl", or "horovod". It determines how to compute the
"per-batch statistics" when ``training==True``.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment