Commit 2507e037 authored by Yuxin Wu's avatar Yuxin Wu

update models

parent 22be6dea
......@@ -2,10 +2,12 @@ This is the official script to train, or run pretrained model for the paper:
[DoReFa-Net: Training Low Bitwidth Convolutional Neural Networks with Low Bitwidth Gradients](http://arxiv.org/abs/1606.06160), by Zhou et al.
We hosted a demo at CVPR16 on behalf of Megvii, Inc, running real-time DoReFa-Net on both ARM and FPGA.
But we're not planning to release the runtime bit-op library.
We hosted a demo at CVPR16 on behalf of Megvii, Inc, running real-time half-VGG size DoReFa-Net on both ARM and FPGA.
But we're not planning to release those runtime bit-op libraries for now. In these examples, bit operations are run in float32.
Pretrained model for 1-2-6-AlexNet will be available shortly.
Pretrained model for 1-2-6-AlexNet is available at
[google drive](https://drive.google.com/a/ megvii.com/folderview?id=0B308TeQzmFDLa0xOeVQwcXg1ZjQ).
It's provided in the format of numpy dictionary, so it should be very easy to port into other applications.
## Preparation:
......
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
# File: alexnet-dorefa.py
# Author: Yuxin Wu <ppwwyyxx@gmail.com>
# Author: Yuxin Wu, Yuheng Zou ({wyx,zyh}@megvii.com)
import cv2
import tensorflow as tf
......@@ -25,8 +25,8 @@ The original experiements are performed on a proprietary framework.
This is our attempt to reproduce it on tensorpack/tensorflow.
Accuracy:
Trained with 4 GPUs and (W,A,G)=(1,2,6), it can reach top-1 single-crop validation error of 52%,
after 64 epochs. The number is a bit better than what's in the paper
Trained with 4 GPUs and (W,A,G)=(1,2,6), it can reach top-1 single-crop validation error of 51%,
after 70 epochs. This number is a bit better than what's in the paper
probably due to more sophisticated augmentors.
Note that the effective batch size in SyncMultiGPUTrainer is actually
......@@ -55,12 +55,12 @@ To Train:
More than 12 CPU cores (for data processing)
To Run Pretrained Model:
./alexnet-dorefa.py --load pretrained126.tfmodel --run a.jpg --dorefa 1,2,6
./alexnet-dorefa.py --load alexnet-126.npy --run a.jpg --dorefa 1,2,6
"""
BITW = 1
BITA = 2
BITG = 4
BITG = 6
BATCH_SIZE = 32
class Model(ModelDesc):
......@@ -155,7 +155,9 @@ class Model(ModelDesc):
def get_data(dataset_name):
isTrain = dataset_name == 'train'
ds = dataset.ILSVRC12(args.data, dataset_name,
#ds = dataset.ILSVRC12(args.data, dataset_name,
#shuffle=True if isTrain else False)
ds = dataset.ILSVRC12('/home/wyx/data/fake_ilsvrc', dataset_name,
shuffle=True if isTrain else False)
meta = dataset.ILSVRCMeta()
......@@ -284,12 +286,12 @@ def run_image(model, sess_init, inputs):
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--gpu', help='the physical ids of GPUs to use')
parser.add_argument('--load', help='load a checkpoint')
parser.add_argument('--load', help='load a checkpoint, or a npy (given as the pretrained model)')
parser.add_argument('--data', help='ILSVRC dataset dir')
parser.add_argument('--dorefa',
help='number of bits for W,A,G, separated by comma. Defaults to \'1,2,4\'',
default='1,2,4')
parser.add_argument('--run', help='run on a list of images', nargs='*')
parser.add_argument('--run', help='run on a list of images with the pretrained model', nargs='*')
args = parser.parse_args()
BITW, BITA, BITG = map(int, args.dorefa.split(','))
......@@ -298,7 +300,8 @@ if __name__ == '__main__':
os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
if args.run:
run_image(Model(), SaverRestore(args.load), args.run)
assert args.load.endswith('.npy')
run_image(Model(), ParamRestore(np.load(args.load, encoding='latin1').item()), args.run)
sys.exit()
config = get_config()
......
......@@ -107,5 +107,6 @@ if __name__ == '__main__':
config = get_config()
if args.load:
config.session_init = SaverRestore(args.load)
QueueInputTrainer(config).train()
#QueueInputTrainer(config).train()
SimpleTrainer(config).train()
......@@ -7,7 +7,7 @@ import tensorflow as tf
from copy import copy
import re
from ..utils import logger
from ..utils import logger, EXTRA_SAVE_VARS_KEY
from ._common import layer_register
__all__ = ['BatchNorm']
......@@ -60,6 +60,8 @@ def BatchNorm(x, use_local_stat=True, decay=0.9, epsilon=1e-5):
ema = tf.train.ExponentialMovingAverage(decay=decay, name=emaname)
ema_apply_op = ema.apply([batch_mean, batch_var])
ema_mean, ema_var = ema.average(batch_mean), ema.average(batch_var)
tf.add_to_collection(EXTRA_SAVE_VARS_KEY, ema_mean)
tf.add_to_collection(EXTRA_SAVE_VARS_KEY, ema_var)
else:
# use training-statistics in prediction
assert not use_local_stat
......
......@@ -10,7 +10,7 @@ import re
import tensorflow as tf
import six
from ..utils import logger
from ..utils import logger, EXTRA_SAVE_VARS_KEY
__all__ = ['SessionInit', 'NewSession', 'SaverRestore',
'ParamRestore', 'ChainInit',
......@@ -188,6 +188,7 @@ def dump_session_params(path):
npy format, loadable by ParamRestore
"""
var = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
var.extend(tf.get_collection(EXTRA_SAVE_VARS_KEY))
result = {}
for v in var:
name = v.name.replace(":0", "")
......
......@@ -7,8 +7,13 @@ GLOBAL_STEP_VAR_NAME = 'global_step:0'
# extra variables to summarize during training in a moving-average way
MOVING_SUMMARY_VARS_KEY = 'MOVING_SUMMARY_VARIABLES'
# placeholders for input variables
INPUT_VARS_KEY = 'INPUT_VARIABLES'
# variables that need to be saved, apart from trainable variables
EXTRA_SAVE_VARS_KEY = 'EXTRA_SAVE_VARIABLES'
import tensorflow as tf
SUMMARY_BACKUP_KEYS = [tf.GraphKeys.SUMMARIES, MOVING_SUMMARY_VARS_KEY]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment