Commit db573204 authored by Yuxin Wu's avatar Yuxin Wu

update alexnet-dorefa

parent f636a657
...@@ -19,6 +19,7 @@ from tensorpack.tfutils.varreplace import remap_variables ...@@ -19,6 +19,7 @@ from tensorpack.tfutils.varreplace import remap_variables
from tensorpack.dataflow import dataset from tensorpack.dataflow import dataset
from tensorpack.utils.gpu import get_nr_gpu from tensorpack.utils.gpu import get_nr_gpu
from imagenet_utils import get_imagenet_dataflow, fbresnet_augmentor
from dorefa import get_dorefa from dorefa import get_dorefa
""" """
...@@ -30,23 +31,23 @@ The original experiements are performed on a proprietary framework. ...@@ -30,23 +31,23 @@ The original experiements are performed on a proprietary framework.
This is our attempt to reproduce it on tensorpack & TensorFlow. This is our attempt to reproduce it on tensorpack & TensorFlow.
Accuracy: Accuracy:
Trained with 4 GPUs and (W,A,G)=(1,2,6), it can reach top-1 single-crop validation error of 51%, Trained with 4 GPUs and (W,A,G)=(1,2,6), it can reach top-1 single-crop validation error of 47.6%,
after 70 epochs. This number is a bit better than what's in the paper after 70 epochs. This number is better than what's in the paper
probably due to more sophisticated augmentors. due to more sophisticated augmentors.
Note that the effective batch size in SyncMultiGPUTrainer is actually Note that the effective batch size in SyncMultiGPUTrainer is actually
BATCH_SIZE * NUM_GPU. With a different number of GPUs in use, things might BATCH_SIZE * NUM_GPU. With a different number of GPUs in use, things might
be a bit different, especially for learning rate. be a bit different, especially for learning rate.
With (W,A,G)=(32,32,32) -- full precision baseline, 43% error. With (W,A,G)=(32,32,32) -- full precision baseline
With (W,A,G)=(1,32,32) -- BWN, 46% error. With (W,A,G)=(1,32,32) -- BWN
With (W,A,G)=(1,2,6), 51% error. With (W,A,G)=(1,2,6), 47.6% error
With (W,A,G)=(1,2,4), 63% error. With (W,A,G)=(1,2,4)
Speed: Speed:
About 2.2 iteration/s on 1 TitanX. (Each epoch is set to 10000 iterations) About 11 iteration/s on 4 P100s. (Each epoch is set to 10000 iterations)
Note that this code was written early without using NCHW format. You Note that this code was written early without using NCHW format. You
should expect a 30% speed up after switching to NCHW format. should expect a speed up after switching to NCHW format.
To Train, for example: To Train, for example:
./alexnet-dorefa.py --dorefa 1,2,6 --data PATH --gpu 0,1 ./alexnet-dorefa.py --dorefa 1,2,6 --data PATH --gpu 0,1
...@@ -64,7 +65,7 @@ To Train, for example: ...@@ -64,7 +65,7 @@ To Train, for example:
And you'll need the following to be able to fetch data efficiently And you'll need the following to be able to fetch data efficiently
Fast disk random access (Not necessarily SSD. I used a RAID of HDD, but not sure if plain HDD is enough) Fast disk random access (Not necessarily SSD. I used a RAID of HDD, but not sure if plain HDD is enough)
More than 12 CPU cores (for data processing) More than 20 CPU cores (for data processing)
More than 10G of free memory More than 10G of free memory
To Run Pretrained Model: To Run Pretrained Model:
...@@ -173,62 +174,9 @@ class Model(ModelDesc): ...@@ -173,62 +174,9 @@ class Model(ModelDesc):
def get_data(dataset_name): def get_data(dataset_name):
isTrain = dataset_name == 'train' isTrain = dataset_name == 'train'
ds = dataset.ILSVRC12(args.data, dataset_name, shuffle=isTrain) augmentors = fbresnet_augmentor(isTrain)
return get_imagenet_dataflow(
meta = dataset.ILSVRCMeta() args.data, dataset_name, BATCH_SIZE, augmentors)
pp_mean = meta.get_per_pixel_mean()
pp_mean_224 = pp_mean[16:-16, 16:-16, :]
if isTrain:
class Resize(imgaug.ImageAugmentor):
def __init__(self):
self._init(locals())
def _augment(self, img, _):
h, w = img.shape[:2]
size = 224
scale = self.rng.randint(size, 308) * 1.0 / min(h, w)
scaleX = scale * self.rng.uniform(0.85, 1.15)
scaleY = scale * self.rng.uniform(0.85, 1.15)
desSize = map(int, (max(size, min(w, scaleX * w)),
max(size, min(h, scaleY * h))))
dst = cv2.resize(img, tuple(desSize),
interpolation=cv2.INTER_CUBIC)
return dst
augmentors = [
Resize(),
imgaug.Rotation(max_deg=10),
imgaug.RandomApplyAug(imgaug.GaussianBlur(3), 0.5),
imgaug.Brightness(30, True),
imgaug.Gamma(),
imgaug.Contrast((0.8, 1.2), True),
imgaug.RandomCrop((224, 224)),
imgaug.RandomApplyAug(imgaug.JpegNoise(), 0.8),
imgaug.RandomApplyAug(imgaug.GaussianDeform(
[(0.2, 0.2), (0.2, 0.8), (0.8, 0.8), (0.8, 0.2)],
(224, 224), 0.2, 3), 0.1),
imgaug.Flip(horiz=True),
imgaug.MapImage(lambda x: x - pp_mean_224),
]
else:
def resize_func(im):
h, w = im.shape[:2]
scale = 256.0 / min(h, w)
desSize = map(int, (max(224, min(w, scale * w)),
max(224, min(h, scale * h))))
im = cv2.resize(im, tuple(desSize), interpolation=cv2.INTER_CUBIC)
return im
augmentors = [
imgaug.MapImage(resize_func),
imgaug.CenterCrop((224, 224)),
imgaug.MapImage(lambda x: x - pp_mean_224),
]
ds = AugmentImageComponent(ds, augmentors, copy=False)
ds = BatchData(ds, BATCH_SIZE, remainder=not isTrain)
if isTrain:
ds = PrefetchDataZMQ(ds, min(25, multiprocessing.cpu_count()))
return ds
def get_config(): def get_config():
...@@ -314,7 +262,6 @@ if __name__ == '__main__': ...@@ -314,7 +262,6 @@ if __name__ == '__main__':
run_image(Model(), DictRestore(np.load(args.load, encoding='latin1').item()), args.run) run_image(Model(), DictRestore(np.load(args.load, encoding='latin1').item()), args.run)
sys.exit() sys.exit()
assert args.gpu is not None, "Need to specify a list of gpu for training!"
nr_tower = max(get_nr_gpu(), 1) nr_tower = max(get_nr_gpu(), 1)
BATCH_SIZE = TOTAL_BATCH_SIZE // nr_tower BATCH_SIZE = TOTAL_BATCH_SIZE // nr_tower
logger.info("Batch per tower: {}".format(BATCH_SIZE)) logger.info("Batch per tower: {}".format(BATCH_SIZE))
......
../ResNet/imagenet_utils.py
\ No newline at end of file
...@@ -25,7 +25,8 @@ class GoogleNetResize(imgaug.ImageAugmentor): ...@@ -25,7 +25,8 @@ class GoogleNetResize(imgaug.ImageAugmentor):
See `Going Deeper with Convolutions` by Google. See `Going Deeper with Convolutions` by Google.
""" """
def __init__(self, crop_area_fraction=0.08, def __init__(self, crop_area_fraction=0.08,
aspect_ratio_low=0.75, aspect_ratio_high=1.333): aspect_ratio_low=0.75, aspect_ratio_high=1.333,
target_shape=224):
self._init(locals()) self._init(locals())
def _augment(self, img, _): def _augment(self, img, _):
...@@ -42,10 +43,10 @@ class GoogleNetResize(imgaug.ImageAugmentor): ...@@ -42,10 +43,10 @@ class GoogleNetResize(imgaug.ImageAugmentor):
x1 = 0 if w == ww else self.rng.randint(0, w - ww) x1 = 0 if w == ww else self.rng.randint(0, w - ww)
y1 = 0 if h == hh else self.rng.randint(0, h - hh) y1 = 0 if h == hh else self.rng.randint(0, h - hh)
out = img[y1:y1 + hh, x1:x1 + ww] out = img[y1:y1 + hh, x1:x1 + ww]
out = cv2.resize(out, (224, 224), interpolation=cv2.INTER_CUBIC) out = cv2.resize(out, (self.target_shape, self.target_shape), interpolation=cv2.INTER_CUBIC)
return out return out
out = imgaug.ResizeShortestEdge(224, interp=cv2.INTER_CUBIC).augment(img) out = imgaug.ResizeShortestEdge(self.target_shape, interp=cv2.INTER_CUBIC).augment(img)
out = imgaug.CenterCrop(224).augment(out) out = imgaug.CenterCrop(self.target_shape).augment(out)
return out return out
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment