Commit e83b0797 authored by Yuxin Wu's avatar Yuxin Wu

update docs and sotabench

parent 7c1c9877
...@@ -43,6 +43,15 @@ class AttrDict(): ...@@ -43,6 +43,15 @@ class AttrDict():
return {k: v.to_dict() if isinstance(v, AttrDict) else v return {k: v.to_dict() if isinstance(v, AttrDict) else v
for k, v in self.__dict__.items() if not k.startswith('_')} for k, v in self.__dict__.items() if not k.startswith('_')}
def from_dict(self, d):
self.freeze(False)
for k, v in d.items():
self_v = getattr(self, k)
if isinstance(self_v, AttrDict):
self_v.from_dict(v)
else:
setattr(self, k, v)
def update_args(self, args): def update_args(self, args):
"""Update from command line args. """ """Update from command line args. """
for cfg in args: for cfg in args:
......
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
import os import os
import sys import sys
import tqdm import tqdm
from contextlib import contextmanager
from tensorpack.predict import OfflinePredictor, PredictConfig from tensorpack.predict import OfflinePredictor, PredictConfig
from tensorpack.tfutils import SmartInit from tensorpack.tfutils import SmartInit
...@@ -31,6 +32,13 @@ COCO_ROOT = os.path.join(DATA_ROOT, "coco") ...@@ -31,6 +32,13 @@ COCO_ROOT = os.path.join(DATA_ROOT, "coco")
register_coco(COCO_ROOT) register_coco(COCO_ROOT)
@contextmanager
def backup_cfg():
orig_config = cfg.to_dict()
yield
cfg.from_dict(orig_config)
def evaluate_rcnn(model_name, paper_arxiv_id, cfg_list, model_file): def evaluate_rcnn(model_name, paper_arxiv_id, cfg_list, model_file):
evaluator = COCOEvaluator( evaluator = COCOEvaluator(
root=COCO_ROOT, model_name=model_name, paper_arxiv_id=paper_arxiv_id root=COCO_ROOT, model_name=model_name, paper_arxiv_id=paper_arxiv_id
...@@ -77,16 +85,41 @@ def evaluate_rcnn(model_name, paper_arxiv_id, cfg_list, model_file): ...@@ -77,16 +85,41 @@ def evaluate_rcnn(model_name, paper_arxiv_id, cfg_list, model_file):
evaluator.save() evaluator.save()
download(
"http://models.tensorpack.com/FasterRCNN/COCO-MaskRCNN-R50FPN2x.npz",
"./",
expect_size=165362754)
with backup_cfg():
evaluate_rcnn(
"Mask R-CNN (ResNet-50-FPN, 2x)", "1703.06870", [],
"COCO-MaskRCNN-R50FPN2x.npz",
)
download(
"http://models.tensorpack.com/FasterRCNN/COCO-MaskRCNN-R50FPN2xGN.npz",
"./",
expect_size=167363872)
with backup_cfg():
evaluate_rcnn(
"Mask R-CNN (ResNet-50-FPN, GroupNorm)", "1803.08494",
"""FPN.NORM=GN BACKBONE.NORM=GN
FPN.FRCNN_HEAD_FUNC=fastrcnn_4conv1fc_gn_head
FPN.MRCNN_HEAD_FUNC=maskrcnn_up4conv_gn_head""".split(),
"COCO-MaskRCNN-R50FPN2xGN.npz",
)
download( download(
"http://models.tensorpack.com/FasterRCNN/COCO-MaskRCNN-R101FPN9xGNCasAugScratch.npz", "http://models.tensorpack.com/FasterRCNN/COCO-MaskRCNN-R101FPN9xGNCasAugScratch.npz",
"./", "./",
expect_size=355680386) expect_size=355680386)
evaluate_rcnn( with backup_cfg():
"Mask R-CNN (ResNet-101-FPN, GN, Cascade)", evaluate_rcnn(
"1811.08883", "Mask R-CNN (ResNet-101-FPN, GN, Cascade)", "1811.08883",
""" """
FPN.CASCADE=True BACKBONE.RESNET_NUM_BLOCKS=[3,4,23,3] FPN.NORM=GN FPN.CASCADE=True BACKBONE.RESNET_NUM_BLOCKS=[3,4,23,3] FPN.NORM=GN
BACKBONE.NORM=GN FPN.FRCNN_HEAD_FUNC=fastrcnn_4conv1fc_gn_head BACKBONE.NORM=GN FPN.FRCNN_HEAD_FUNC=fastrcnn_4conv1fc_gn_head
FPN.MRCNN_HEAD_FUNC=maskrcnn_up4conv_gn_head""".split(), FPN.MRCNN_HEAD_FUNC=maskrcnn_up4conv_gn_head""".split(),
"COCO-MaskRCNN-R101FPN9xGNCasAugScratch.npz", "COCO-MaskRCNN-R101FPN9xGNCasAugScratch.npz",
) )
...@@ -98,7 +98,7 @@ def BatchNorm(inputs, axis=None, training=None, momentum=0.9, epsilon=1e-5, ...@@ -98,7 +98,7 @@ def BatchNorm(inputs, axis=None, training=None, momentum=0.9, epsilon=1e-5,
This is not a good argument name, but it is what the Tensorflow layer uses. This is not a good argument name, but it is what the Tensorflow layer uses.
ema_update (str): Only effective when ``training=True``. It has the following options: ema_update (str): Only effective when ``training=True``. It has the following options:
* "default": same as "collection". Because this is the default behavior in tensorflow. * "default": same as "collection". Because this is the default behavior in TensorFlow.
* "skip": do not update EMA. This can be useful when you reuse a batch norm layer in several places * "skip": do not update EMA. This can be useful when you reuse a batch norm layer in several places
but do not want them to all update your EMA. but do not want them to all update your EMA.
* "collection": Add EMA update ops to collection `tf.GraphKeys.UPDATE_OPS`. * "collection": Add EMA update ops to collection `tf.GraphKeys.UPDATE_OPS`.
...@@ -106,7 +106,7 @@ def BatchNorm(inputs, axis=None, training=None, momentum=0.9, epsilon=1e-5, ...@@ -106,7 +106,7 @@ def BatchNorm(inputs, axis=None, training=None, momentum=0.9, epsilon=1e-5,
your training iterations. This can waste compute if your training iterations do not always depend your training iterations. This can waste compute if your training iterations do not always depend
on the BatchNorm layer. on the BatchNorm layer.
* "internal": EMA is updated inside this layer itself by control dependencies. * "internal": EMA is updated inside this layer itself by control dependencies.
In common cases, it has similar speed to "collection". But it covers more cases, e.g.: In standard scenarios, it has similar speed to "collection". But it has some more benefits:
1. BatchNorm is used inside dynamic control flow. 1. BatchNorm is used inside dynamic control flow.
The collection-based update does not support dynamic control flows. The collection-based update does not support dynamic control flows.
...@@ -114,7 +114,9 @@ def BatchNorm(inputs, axis=None, training=None, momentum=0.9, epsilon=1e-5, ...@@ -114,7 +114,9 @@ def BatchNorm(inputs, axis=None, training=None, momentum=0.9, epsilon=1e-5,
Putting all update ops into a single collection will waste a lot of compute. Putting all update ops into a single collection will waste a lot of compute.
3. Other part of the model relies on the "updated" EMA. The collection-based method does not update 3. Other part of the model relies on the "updated" EMA. The collection-based method does not update
EMA immediately. EMA immediately.
4. It has less chance to cause TensorFlow bugs in a graph with complicated control flow.
Therefore this option is preferred over TensorFlow default.
Corresponding TF issue: https://github.com/tensorflow/tensorflow/issues/14699 Corresponding TF issue: https://github.com/tensorflow/tensorflow/issues/14699
sync_statistics (str or None): one of None, "nccl", or "horovod". It determines how to compute the sync_statistics (str or None): one of None, "nccl", or "horovod". It determines how to compute the
"per-batch statistics" when ``training==True``. "per-batch statistics" when ``training==True``.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment