Commit cb8b48d7 authored by Yuxin Wu's avatar Yuxin Wu

Switch to non-preact resnet

parent cfae6938
## imagenet-resnet.py, imagenet-resnet-se.py ## imagenet-resnet.py, imagenet-resnet-se.py
__Training__ code of ResNet on ImageNet, with pre-activation and squeeze-and-excitation. __Training__ code of three variants of ResNet on ImageNet:
The pre-act ResNet follows the setup in [fb.resnet.torch](https://github.com/facebook/fb.resnet.torch) (except for the weight decay)
* imagenet-resnet.py: [Original ResNet](https://arxiv.org/abs/1512.03385) and [Pre-activation ResNet](https://arxiv.org/abs/1603.05027).
* imagenet-resnet-se.py: [Squeeze-and-Excitation ResNet](https://arxiv.org/abs/1709.01507)
The training mostly follows the setup in [fb.resnet.torch](https://github.com/facebook/fb.resnet.torch)
and gets similar performance (with much fewer lines of code). and gets similar performance (with much fewer lines of code).
Models can be [downloaded here](https://goo.gl/6XjK9V). Models can be [downloaded here](https://goo.gl/6XjK9V).
| Model | Top 5 Error | Top 1 Error | | Model | Top 5 Error | Top 1 Error |
|:-------------------|-------------|------------:| |:-------------------|-------------|------------:|
| ResNet18 | 10.47% | 29.56% | | ResNet18 | 10.50% | 29.66% |
| ResNet34 | 8.51% | 26.50% | | ResNet34 | 8.56% | 26.17% |
| ResNet50 | 7.16% | 23.72% | | ResNet50 | 6.85% | 23.61% |
| ResNet50-SE | TRAINING | TRAINING | | ResNet50-SE | TRAINING | TRAINING |
| ResNet101 | 6.26% | 22.53% | | ResNet101 | 6.04% | 21.95% |
To train, just run: To train, just run:
```bash ```bash
......
...@@ -20,28 +20,31 @@ from tensorpack.tfutils import argscope, get_model_loader ...@@ -20,28 +20,31 @@ from tensorpack.tfutils import argscope, get_model_loader
from tensorpack.utils.gpu import get_nr_gpu from tensorpack.utils.gpu import get_nr_gpu
from imagenet_resnet_utils import ( from imagenet_resnet_utils import (
fbresnet_augmentor, preresnet_group, fbresnet_augmentor, get_imagenet_dataflow,
preresnet_basicblock, preresnet_bottleneck, resnet_backbone, preresnet_group, preresnet_basicblock, preresnet_bottleneck,
eval_on_ILSVRC12, image_preprocess, compute_loss_and_error, resnet_group, resnet_basicblock, resnet_bottleneck,
get_imagenet_dataflow) resnet_backbone,
eval_on_ILSVRC12, image_preprocess, compute_loss_and_error)
TOTAL_BATCH_SIZE = 256 TOTAL_BATCH_SIZE = 256
INPUT_SHAPE = 224 INPUT_SHAPE = 224
DEPTH = None
RESNET_CONFIG = {
18: ([2, 2, 2, 2], preresnet_basicblock),
34: ([3, 4, 6, 3], preresnet_basicblock),
50: ([3, 4, 6, 3], preresnet_bottleneck),
101: ([3, 4, 23, 3], preresnet_bottleneck)
}
class Model(ModelDesc): class Model(ModelDesc):
def __init__(self, data_format='NCHW'): def __init__(self, depth, data_format='NCHW', preact=False):
if data_format == 'NCHW': if data_format == 'NCHW':
assert tf.test.is_gpu_available() assert tf.test.is_gpu_available()
self.data_format = data_format self.data_format = data_format
self.preact = preact
basicblock = preresnet_basicblock if preact else resnet_basicblock
bottleneck = preresnet_bottleneck if preact else resnet_bottleneck
self.num_blocks, self.block_func = {
18: ([2, 2, 2, 2], basicblock),
34: ([3, 4, 6, 3], basicblock),
50: ([3, 4, 6, 3], bottleneck),
101: ([3, 4, 23, 3], bottleneck)
}[depth]
def _get_inputs(self): def _get_inputs(self):
# uint8 instead of float32 is used as input type to reduce copy overhead. # uint8 instead of float32 is used as input type to reduce copy overhead.
...@@ -56,10 +59,11 @@ class Model(ModelDesc): ...@@ -56,10 +59,11 @@ class Model(ModelDesc):
if self.data_format == 'NCHW': if self.data_format == 'NCHW':
image = tf.transpose(image, [0, 3, 1, 2]) image = tf.transpose(image, [0, 3, 1, 2])
defs, block_func = RESNET_CONFIG[DEPTH]
with argscope([Conv2D, MaxPooling, GlobalAvgPooling, BatchNorm], data_format=self.data_format): with argscope([Conv2D, MaxPooling, GlobalAvgPooling, BatchNorm], data_format=self.data_format):
logits = resnet_backbone(image, defs, preresnet_group, block_func) logits = resnet_backbone(
image, self.num_blocks,
preresnet_group if self.preact else resnet_group, self.block_func)
loss = compute_loss_and_error(logits, label) loss = compute_loss_and_error(logits, label)
...@@ -72,18 +76,17 @@ class Model(ModelDesc): ...@@ -72,18 +76,17 @@ class Model(ModelDesc):
return tf.train.MomentumOptimizer(lr, 0.9, use_nesterov=True) return tf.train.MomentumOptimizer(lr, 0.9, use_nesterov=True)
def get_data(name): def get_data(name, batch):
isTrain = name == 'train' isTrain = name == 'train'
augmentors = fbresnet_augmentor(isTrain) augmentors = fbresnet_augmentor(isTrain)
datadir = args.data datadir = args.data
return get_imagenet_dataflow( return get_imagenet_dataflow(
datadir, name, BATCH_SIZE, augmentors, dir_structure='original') datadir, name, batch, augmentors, dir_structure='original')
def get_config(fake=False, data_format='NCHW'): def get_config(model, fake=False):
nr_tower = max(get_nr_gpu(), 1) nr_tower = max(get_nr_gpu(), 1)
global BATCH_SIZE batch = TOTAL_BATCH_SIZE // nr_tower
BATCH_SIZE = TOTAL_BATCH_SIZE // nr_tower
if fake: if fake:
logger.info("For benchmark, batch size is fixed to 64 per tower.") logger.info("For benchmark, batch size is fixed to 64 per tower.")
...@@ -91,9 +94,9 @@ def get_config(fake=False, data_format='NCHW'): ...@@ -91,9 +94,9 @@ def get_config(fake=False, data_format='NCHW'):
[[64, 224, 224, 3], [64]], 1000, random=False, dtype='uint8') [[64, 224, 224, 3], [64]], 1000, random=False, dtype='uint8')
callbacks = [] callbacks = []
else: else:
logger.info("Running on {} towers. Batch size per tower: {}".format(nr_tower, BATCH_SIZE)) logger.info("Running on {} towers. Batch size per tower: {}".format(nr_tower, batch))
dataset_train = get_data('train') dataset_train = get_data('train', batch)
dataset_val = get_data('val') dataset_val = get_data('val', batch)
callbacks = [ callbacks = [
ModelSaver(), ModelSaver(),
ScheduledHyperParamSetter('learning_rate', ScheduledHyperParamSetter('learning_rate',
...@@ -109,7 +112,7 @@ def get_config(fake=False, data_format='NCHW'): ...@@ -109,7 +112,7 @@ def get_config(fake=False, data_format='NCHW'):
dataset_val, infs, list(range(nr_tower)))) dataset_val, infs, list(range(nr_tower))))
return TrainConfig( return TrainConfig(
model=Model(data_format=data_format), model=model,
dataflow=dataset_train, dataflow=dataset_train,
callbacks=callbacks, callbacks=callbacks,
steps_per_epoch=5000, steps_per_epoch=5000,
...@@ -129,21 +132,22 @@ if __name__ == '__main__': ...@@ -129,21 +132,22 @@ if __name__ == '__main__':
parser.add_argument('-d', '--depth', help='resnet depth', parser.add_argument('-d', '--depth', help='resnet depth',
type=int, default=18, choices=[18, 34, 50, 101]) type=int, default=18, choices=[18, 34, 50, 101])
parser.add_argument('--eval', action='store_true') parser.add_argument('--eval', action='store_true')
parser.add_argument('--preact', action='store_true', help='Use pre-activation resnet')
args = parser.parse_args() args = parser.parse_args()
DEPTH = args.depth
if args.gpu: if args.gpu:
os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
model = Model(args.depth, args.data_format, args.preact)
if args.eval: if args.eval:
BATCH_SIZE = 128 # something that can run on one gpu batch = 128 # something that can run on one gpu
ds = get_data('val') ds = get_data('val', batch)
eval_on_ILSVRC12(Model(), get_model_loader(args.load), ds) eval_on_ILSVRC12(model, get_model_loader(args.load), ds)
sys.exit() else:
logger.set_logger_dir(
logger.set_logger_dir( os.path.join('train_log', 'imagenet-resnet-d' + str(args.depth)))
os.path.join('train_log', 'imagenet-resnet-d' + str(DEPTH)))
config = get_config(fake=args.fake, data_format=args.data_format) config = get_config(model, fake=args.fake)
if args.load: if args.load:
config.session_init = get_model_loader(args.load) config.session_init = get_model_loader(args.load)
SyncMultiGPUTrainerParameterServer(config).train() SyncMultiGPUTrainerParameterServer(config).train()
...@@ -31,19 +31,19 @@ Left to right: ...@@ -31,19 +31,19 @@ Left to right:
+ negative correlated pixels (keep original color) + negative correlated pixels (keep original color)
## CAM ## CAM
`CAM-resnet.py` fine-tune a variant of ResNet to have 2x larger last-layer feature maps, then produce CAM visualizations. `CAM-resnet.py` fine-tune a Preact-ResNet to have 2x larger last-layer feature maps, then produce CAM visualizations.
Usage: Usage:
1. Fine tune or retrain the ResNet: 1. Fine tune or retrain the ResNet:
```bash ```bash
./CAM-resnet.py --data /path/to/imagenet [--load ImageNet-ResNet18.npz] [--gpu 0,1,2,3] ./CAM-resnet.py --data /path/to/imagenet [--load ImageNet-ResNet18-Preact.npz] [--gpu 0,1,2,3]
``` ```
Pretrained and fine-tuned ResNet can be downloaded Pretrained and fine-tuned ResNet can be downloaded
[here](https://drive.google.com/open?id=0B9IPQTvr2BBkTXBlZmh1cmlnQ0k) and [here](https://drive.google.com/open?id=0B9IPQTvr2BBkQk9qcmtGSERlNUk). [here](https://drive.google.com/open?id=0B9IPQTvr2BBkTXBlZmh1cmlnQ0k) and [here](https://drive.google.com/open?id=0B9IPQTvr2BBkQk9qcmtGSERlNUk).
2. Generate CAM on ImageNet validation set: 2. Generate CAM on ImageNet validation set:
```bash ```bash
./CAM-resnet.py --data /path/to/imagenet --load ImageNet-ResNet18-2xGAP.npz --cam ./CAM-resnet.py --data /path/to/imagenet --load ImageNet-ResNet18-Preact-2xGAP.npz --cam
``` ```
<p align="center"> <img src="./CAM-demo.jpg" width="900"> </p> <p align="center"> <img src="./CAM-demo.jpg" width="900"> </p>
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment