Commit 2507e037 authored by Yuxin Wu's avatar Yuxin Wu

update models

parent 22be6dea
...@@ -2,10 +2,12 @@ This is the official script to train, or run pretrained model for the paper: ...@@ -2,10 +2,12 @@ This is the official script to train, or run pretrained model for the paper:
[DoReFa-Net: Training Low Bitwidth Convolutional Neural Networks with Low Bitwidth Gradients](http://arxiv.org/abs/1606.06160), by Zhou et al. [DoReFa-Net: Training Low Bitwidth Convolutional Neural Networks with Low Bitwidth Gradients](http://arxiv.org/abs/1606.06160), by Zhou et al.
We hosted a demo at CVPR16 on behalf of Megvii, Inc, running real-time DoReFa-Net on both ARM and FPGA. We hosted a demo at CVPR16 on behalf of Megvii, Inc, running real-time half-VGG size DoReFa-Net on both ARM and FPGA.
But we're not planning to release the runtime bit-op library. But we're not planning to release those runtime bit-op libraries for now. In these examples, bit operations are run in float32.
Pretrained model for 1-2-6-AlexNet will be available shortly. Pretrained model for 1-2-6-AlexNet is available at
[google drive](https://drive.google.com/a/ megvii.com/folderview?id=0B308TeQzmFDLa0xOeVQwcXg1ZjQ).
It's provided in the format of numpy dictionary, so it should be very easy to port into other applications.
## Preparation: ## Preparation:
......
#!/usr/bin/env python #!/usr/bin/env python
# -*- coding: UTF-8 -*- # -*- coding: UTF-8 -*-
# File: alexnet-dorefa.py # File: alexnet-dorefa.py
# Author: Yuxin Wu <ppwwyyxx@gmail.com> # Author: Yuxin Wu, Yuheng Zou ({wyx,zyh}@megvii.com)
import cv2 import cv2
import tensorflow as tf import tensorflow as tf
...@@ -25,8 +25,8 @@ The original experiements are performed on a proprietary framework. ...@@ -25,8 +25,8 @@ The original experiements are performed on a proprietary framework.
This is our attempt to reproduce it on tensorpack/tensorflow. This is our attempt to reproduce it on tensorpack/tensorflow.
Accuracy: Accuracy:
Trained with 4 GPUs and (W,A,G)=(1,2,6), it can reach top-1 single-crop validation error of 52%, Trained with 4 GPUs and (W,A,G)=(1,2,6), it can reach top-1 single-crop validation error of 51%,
after 64 epochs. The number is a bit better than what's in the paper after 70 epochs. This number is a bit better than what's in the paper
probably due to more sophisticated augmentors. probably due to more sophisticated augmentors.
Note that the effective batch size in SyncMultiGPUTrainer is actually Note that the effective batch size in SyncMultiGPUTrainer is actually
...@@ -55,12 +55,12 @@ To Train: ...@@ -55,12 +55,12 @@ To Train:
More than 12 CPU cores (for data processing) More than 12 CPU cores (for data processing)
To Run Pretrained Model: To Run Pretrained Model:
./alexnet-dorefa.py --load pretrained126.tfmodel --run a.jpg --dorefa 1,2,6 ./alexnet-dorefa.py --load alexnet-126.npy --run a.jpg --dorefa 1,2,6
""" """
BITW = 1 BITW = 1
BITA = 2 BITA = 2
BITG = 4 BITG = 6
BATCH_SIZE = 32 BATCH_SIZE = 32
class Model(ModelDesc): class Model(ModelDesc):
...@@ -155,7 +155,9 @@ class Model(ModelDesc): ...@@ -155,7 +155,9 @@ class Model(ModelDesc):
def get_data(dataset_name): def get_data(dataset_name):
isTrain = dataset_name == 'train' isTrain = dataset_name == 'train'
ds = dataset.ILSVRC12(args.data, dataset_name, #ds = dataset.ILSVRC12(args.data, dataset_name,
#shuffle=True if isTrain else False)
ds = dataset.ILSVRC12('/home/wyx/data/fake_ilsvrc', dataset_name,
shuffle=True if isTrain else False) shuffle=True if isTrain else False)
meta = dataset.ILSVRCMeta() meta = dataset.ILSVRCMeta()
...@@ -284,12 +286,12 @@ def run_image(model, sess_init, inputs): ...@@ -284,12 +286,12 @@ def run_image(model, sess_init, inputs):
if __name__ == '__main__': if __name__ == '__main__':
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument('--gpu', help='the physical ids of GPUs to use') parser.add_argument('--gpu', help='the physical ids of GPUs to use')
parser.add_argument('--load', help='load a checkpoint') parser.add_argument('--load', help='load a checkpoint, or a npy (given as the pretrained model)')
parser.add_argument('--data', help='ILSVRC dataset dir') parser.add_argument('--data', help='ILSVRC dataset dir')
parser.add_argument('--dorefa', parser.add_argument('--dorefa',
help='number of bits for W,A,G, separated by comma. Defaults to \'1,2,4\'', help='number of bits for W,A,G, separated by comma. Defaults to \'1,2,4\'',
default='1,2,4') default='1,2,4')
parser.add_argument('--run', help='run on a list of images', nargs='*') parser.add_argument('--run', help='run on a list of images with the pretrained model', nargs='*')
args = parser.parse_args() args = parser.parse_args()
BITW, BITA, BITG = map(int, args.dorefa.split(',')) BITW, BITA, BITG = map(int, args.dorefa.split(','))
...@@ -298,7 +300,8 @@ if __name__ == '__main__': ...@@ -298,7 +300,8 @@ if __name__ == '__main__':
os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
if args.run: if args.run:
run_image(Model(), SaverRestore(args.load), args.run) assert args.load.endswith('.npy')
run_image(Model(), ParamRestore(np.load(args.load, encoding='latin1').item()), args.run)
sys.exit() sys.exit()
config = get_config() config = get_config()
......
...@@ -107,5 +107,6 @@ if __name__ == '__main__': ...@@ -107,5 +107,6 @@ if __name__ == '__main__':
config = get_config() config = get_config()
if args.load: if args.load:
config.session_init = SaverRestore(args.load) config.session_init = SaverRestore(args.load)
QueueInputTrainer(config).train() #QueueInputTrainer(config).train()
SimpleTrainer(config).train()
...@@ -7,7 +7,7 @@ import tensorflow as tf ...@@ -7,7 +7,7 @@ import tensorflow as tf
from copy import copy from copy import copy
import re import re
from ..utils import logger from ..utils import logger, EXTRA_SAVE_VARS_KEY
from ._common import layer_register from ._common import layer_register
__all__ = ['BatchNorm'] __all__ = ['BatchNorm']
...@@ -60,6 +60,8 @@ def BatchNorm(x, use_local_stat=True, decay=0.9, epsilon=1e-5): ...@@ -60,6 +60,8 @@ def BatchNorm(x, use_local_stat=True, decay=0.9, epsilon=1e-5):
ema = tf.train.ExponentialMovingAverage(decay=decay, name=emaname) ema = tf.train.ExponentialMovingAverage(decay=decay, name=emaname)
ema_apply_op = ema.apply([batch_mean, batch_var]) ema_apply_op = ema.apply([batch_mean, batch_var])
ema_mean, ema_var = ema.average(batch_mean), ema.average(batch_var) ema_mean, ema_var = ema.average(batch_mean), ema.average(batch_var)
tf.add_to_collection(EXTRA_SAVE_VARS_KEY, ema_mean)
tf.add_to_collection(EXTRA_SAVE_VARS_KEY, ema_var)
else: else:
# use training-statistics in prediction # use training-statistics in prediction
assert not use_local_stat assert not use_local_stat
......
...@@ -10,7 +10,7 @@ import re ...@@ -10,7 +10,7 @@ import re
import tensorflow as tf import tensorflow as tf
import six import six
from ..utils import logger from ..utils import logger, EXTRA_SAVE_VARS_KEY
__all__ = ['SessionInit', 'NewSession', 'SaverRestore', __all__ = ['SessionInit', 'NewSession', 'SaverRestore',
'ParamRestore', 'ChainInit', 'ParamRestore', 'ChainInit',
...@@ -188,6 +188,7 @@ def dump_session_params(path): ...@@ -188,6 +188,7 @@ def dump_session_params(path):
npy format, loadable by ParamRestore npy format, loadable by ParamRestore
""" """
var = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) var = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
var.extend(tf.get_collection(EXTRA_SAVE_VARS_KEY))
result = {} result = {}
for v in var: for v in var:
name = v.name.replace(":0", "") name = v.name.replace(":0", "")
......
...@@ -7,8 +7,13 @@ GLOBAL_STEP_VAR_NAME = 'global_step:0' ...@@ -7,8 +7,13 @@ GLOBAL_STEP_VAR_NAME = 'global_step:0'
# extra variables to summarize during training in a moving-average way # extra variables to summarize during training in a moving-average way
MOVING_SUMMARY_VARS_KEY = 'MOVING_SUMMARY_VARIABLES' MOVING_SUMMARY_VARS_KEY = 'MOVING_SUMMARY_VARIABLES'
# placeholders for input variables
INPUT_VARS_KEY = 'INPUT_VARIABLES' INPUT_VARS_KEY = 'INPUT_VARIABLES'
# variables that need to be saved, apart from trainable variables
EXTRA_SAVE_VARS_KEY = 'EXTRA_SAVE_VARIABLES'
import tensorflow as tf import tensorflow as tf
SUMMARY_BACKUP_KEYS = [tf.GraphKeys.SUMMARIES, MOVING_SUMMARY_VARS_KEY] SUMMARY_BACKUP_KEYS = [tf.GraphKeys.SUMMARIES, MOVING_SUMMARY_VARS_KEY]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment