Commit a3581e79 authored by Yuxin Wu's avatar Yuxin Wu

update docs

parent 801e2921
...@@ -39,8 +39,10 @@ The tower function needs to follow some conventions: ...@@ -39,8 +39,10 @@ The tower function needs to follow some conventions:
To respect variable reuse, use `tf.get_variable` instead of `tf.Variable` in the function. To respect variable reuse, use `tf.get_variable` instead of `tf.Variable` in the function.
On the other hand, for non-trainable variables, it's OK to use On the other hand, for non-trainable variables, it's OK to use
`tf.Variable` to ensure creation of new variables in each tower even when `reuse=True`. `tf.Variable` to ensure creation of new variables in each tower even when `reuse=True`.
4. It will always be called under a `TowerContext`, which can be accessed by `get_current_tower_contxt()`. 4. It will always be called under a `TowerContext`, which can be accessed by `get_current_tower_context()`.
The context contains information about training/inference mode, reuse, etc. The context contains information about training/inference mode, reuse, etc.
5. It cannot create scopes or variables containing the name 'tower', as it is
reserved for special use.
These conventions are easy to follow, and most layer wrappers (e.g., These conventions are easy to follow, and most layer wrappers (e.g.,
tf.layers/slim/tensorlayer) do follow them. Note that certain Keras layers do not tf.layers/slim/tensorlayer) do follow them. Note that certain Keras layers do not
......
...@@ -19,7 +19,7 @@ from tensorpack import * ...@@ -19,7 +19,7 @@ from tensorpack import *
from tensorpack.utils.concurrency import ensure_proc_terminate, start_proc_mask_signal from tensorpack.utils.concurrency import ensure_proc_terminate, start_proc_mask_signal
from tensorpack.utils.serialize import dumps from tensorpack.utils.serialize import dumps
from tensorpack.tfutils.gradproc import MapGradient, SummaryGradient from tensorpack.tfutils.gradproc import MapGradient, SummaryGradient
from tensorpack.utils.gpu import get_nr_gpu from tensorpack.utils.gpu import get_num_gpu
import gym import gym
...@@ -144,10 +144,10 @@ class MySimulatorMaster(SimulatorMaster, Callback): ...@@ -144,10 +144,10 @@ class MySimulatorMaster(SimulatorMaster, Callback):
def _setup_graph(self): def _setup_graph(self):
# create predictors on the available predictor GPUs. # create predictors on the available predictor GPUs.
nr_gpu = len(self._gpus) num_gpu = len(self._gpus)
predictors = [self.trainer.get_predictor( predictors = [self.trainer.get_predictor(
['state'], ['policy', 'pred_value'], ['state'], ['policy', 'pred_value'],
self._gpus[k % nr_gpu]) self._gpus[k % num_gpu])
for k in range(PREDICTOR_THREAD)] for k in range(PREDICTOR_THREAD)]
self.async_predictor = MultiThreadAsyncPredictor( self.async_predictor = MultiThreadAsyncPredictor(
predictors, batch_size=PREDICT_BATCH_SIZE) predictors, batch_size=PREDICT_BATCH_SIZE)
...@@ -213,16 +213,16 @@ def train(): ...@@ -213,16 +213,16 @@ def train():
logger.set_logger_dir(dirname) logger.set_logger_dir(dirname)
# assign GPUs for training & inference # assign GPUs for training & inference
nr_gpu = get_nr_gpu() num_gpu = get_num_gpu()
global PREDICTOR_THREAD global PREDICTOR_THREAD
if nr_gpu > 0: if num_gpu > 0:
if nr_gpu > 1: if num_gpu > 1:
# use half gpus for inference # use half gpus for inference
predict_tower = list(range(nr_gpu))[-nr_gpu // 2:] predict_tower = list(range(num_gpu))[-num_gpu // 2:]
else: else:
predict_tower = [0] predict_tower = [0]
PREDICTOR_THREAD = len(predict_tower) * PREDICTOR_THREAD_PER_GPU PREDICTOR_THREAD = len(predict_tower) * PREDICTOR_THREAD_PER_GPU
train_tower = list(range(nr_gpu))[:-nr_gpu // 2] or [0] train_tower = list(range(num_gpu))[:-num_gpu // 2] or [0]
logger.info("[Batch-A3C] Train on gpu {} and infer on gpu {}".format( logger.info("[Batch-A3C] Train on gpu {} and infer on gpu {}".format(
','.join(map(str, train_tower)), ','.join(map(str, predict_tower)))) ','.join(map(str, train_tower)), ','.join(map(str, predict_tower))))
else: else:
......
...@@ -15,7 +15,7 @@ from tensorpack import * ...@@ -15,7 +15,7 @@ from tensorpack import *
from tensorpack.tfutils.summary import add_param_summary from tensorpack.tfutils.summary import add_param_summary
from tensorpack.tfutils.varreplace import remap_variables from tensorpack.tfutils.varreplace import remap_variables
from tensorpack.dataflow import dataset from tensorpack.dataflow import dataset
from tensorpack.utils.gpu import get_nr_gpu from tensorpack.utils.gpu import get_num_gpu
from imagenet_utils import get_imagenet_dataflow, fbresnet_augmentor, ImageNetModel from imagenet_utils import get_imagenet_dataflow, fbresnet_augmentor, ImageNetModel
from dorefa import get_dorefa, ternarize from dorefa import get_dorefa, ternarize
...@@ -215,7 +215,7 @@ if __name__ == '__main__': ...@@ -215,7 +215,7 @@ if __name__ == '__main__':
run_image(Model(), DictRestore(dict(np.load(args.load))), args.run) run_image(Model(), DictRestore(dict(np.load(args.load))), args.run)
sys.exit() sys.exit()
nr_tower = max(get_nr_gpu(), 1) nr_tower = max(get_num_gpu(), 1)
BATCH_SIZE = TOTAL_BATCH_SIZE // nr_tower BATCH_SIZE = TOTAL_BATCH_SIZE // nr_tower
logger.set_logger_dir(os.path.join( logger.set_logger_dir(os.path.join(
'train_log', 'alexnet-dorefa-{}'.format(args.dorefa))) 'train_log', 'alexnet-dorefa-{}'.format(args.dorefa)))
......
...@@ -257,8 +257,8 @@ if __name__ == '__main__': ...@@ -257,8 +257,8 @@ if __name__ == '__main__':
args = parser.parse_args() args = parser.parse_args()
with change_gpu(args.gpu): with change_gpu(args.gpu):
NR_GPU = len(args.gpu.split(',')) NGPU = len(args.gpu.split(','))
config = get_config() config = get_config()
if args.load: if args.load:
config.session_init = SaverRestore(args.load) config.session_init = SaverRestore(args.load)
launch_train_with_config(config, SyncMultiGPUTrainer(NR_GPU)) launch_train_with_config(config, SyncMultiGPUTrainer(NGPU))
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
from tensorpack import * from tensorpack import *
from tensorpack.tfutils.summary import add_moving_summary from tensorpack.tfutils.summary import add_moving_summary
from tensorpack.utils.gpu import get_nr_gpu from tensorpack.utils.gpu import get_num_gpu
from tensorpack.tfutils.scope_utils import auto_reuse_variable_scope from tensorpack.tfutils.scope_utils import auto_reuse_variable_scope
import tensorflow as tf import tensorflow as tf
...@@ -137,7 +137,7 @@ if __name__ == '__main__': ...@@ -137,7 +137,7 @@ if __name__ == '__main__':
input = QueueInput(DCGAN.get_data()) input = QueueInput(DCGAN.get_data())
model = Model() model = Model()
nr_tower = max(get_nr_gpu(), 1) nr_tower = max(get_num_gpu(), 1)
if nr_tower == 1: if nr_tower == 1:
trainer = GANTrainer(input, model) trainer = GANTrainer(input, model)
else: else:
......
...@@ -149,10 +149,10 @@ class MultiGPUGANTrainer(TowerTrainer): ...@@ -149,10 +149,10 @@ class MultiGPUGANTrainer(TowerTrainer):
""" """
A replacement of GANTrainer (optimize d and g one by one) with multi-gpu support. A replacement of GANTrainer (optimize d and g one by one) with multi-gpu support.
""" """
def __init__(self, nr_gpu, input, model): def __init__(self, num_gpu, input, model):
super(MultiGPUGANTrainer, self).__init__() super(MultiGPUGANTrainer, self).__init__()
assert nr_gpu > 1 assert num_gpu > 1
raw_devices = ['/gpu:{}'.format(k) for k in range(nr_gpu)] raw_devices = ['/gpu:{}'.format(k) for k in range(num_gpu)]
# Setup input # Setup input
input = StagingInput(input) input = StagingInput(input)
...@@ -167,13 +167,13 @@ class MultiGPUGANTrainer(TowerTrainer): ...@@ -167,13 +167,13 @@ class MultiGPUGANTrainer(TowerTrainer):
self.tower_func = TowerFuncWrapper(get_cost, model.get_inputs_desc()) self.tower_func = TowerFuncWrapper(get_cost, model.get_inputs_desc())
devices = [LeastLoadedDeviceSetter(d, raw_devices) for d in raw_devices] devices = [LeastLoadedDeviceSetter(d, raw_devices) for d in raw_devices]
cost_list = DataParallelBuilder.build_on_towers( cost_list = DataParallelBuilder.build_on_towers(
list(range(nr_gpu)), list(range(num_gpu)),
lambda: self.tower_func(*input.get_input_tensors()), lambda: self.tower_func(*input.get_input_tensors()),
devices) devices)
# Simply average the cost here. It might be faster to average the gradients # Simply average the cost here. It might be faster to average the gradients
with tf.name_scope('optimize'): with tf.name_scope('optimize'):
d_loss = tf.add_n([x[0] for x in cost_list]) * (1.0 / nr_gpu) d_loss = tf.add_n([x[0] for x in cost_list]) * (1.0 / num_gpu)
g_loss = tf.add_n([x[1] for x in cost_list]) * (1.0 / nr_gpu) g_loss = tf.add_n([x[1] for x in cost_list]) * (1.0 / num_gpu)
opt = model.get_optimizer() opt = model.get_optimizer()
# run one d_min after one g_min # run one d_min after one g_min
......
...@@ -12,7 +12,7 @@ import os ...@@ -12,7 +12,7 @@ import os
from tensorpack import * from tensorpack import *
from tensorpack.dataflow import dataset from tensorpack.dataflow import dataset
from tensorpack.utils.gpu import get_nr_gpu from tensorpack.utils.gpu import get_num_gpu
from tensorpack.tfutils import optimizer, gradproc from tensorpack.tfutils import optimizer, gradproc
from tensorpack.tfutils.summary import add_moving_summary, add_param_summary from tensorpack.tfutils.summary import add_moving_summary, add_param_summary
...@@ -256,4 +256,4 @@ if __name__ == '__main__': ...@@ -256,4 +256,4 @@ if __name__ == '__main__':
config.session_init = get_model_loader(args.load) config.session_init = get_model_loader(args.load)
launch_train_with_config( launch_train_with_config(
config, config,
SyncMultiGPUTrainer(max(get_nr_gpu(), 1))) SyncMultiGPUTrainer(max(get_num_gpu(), 1)))
...@@ -11,14 +11,14 @@ import tensorflow as tf ...@@ -11,14 +11,14 @@ import tensorflow as tf
from tensorpack import * from tensorpack import *
from tensorpack.tfutils.summary import add_moving_summary from tensorpack.tfutils.summary import add_moving_summary
from tensorpack.dataflow import dataset from tensorpack.dataflow import dataset
from tensorpack.utils.gpu import get_nr_gpu from tensorpack.utils.gpu import get_num_gpu
from imagenet_utils import fbresnet_augmentor, get_imagenet_dataflow from imagenet_utils import fbresnet_augmentor, get_imagenet_dataflow
# Change them if using different number of GPUs. # Change them if using different number of GPUs.
TOTAL_BATCH_SIZE = 64 * 6 TOTAL_BATCH_SIZE = 64 * 6
NR_GPU = 6 NUM_GPU = 6
BATCH_SIZE = TOTAL_BATCH_SIZE // NR_GPU BATCH_SIZE = TOTAL_BATCH_SIZE // NUM_GPU
INPUT_SHAPE = 224 INPUT_SHAPE = 224
...@@ -169,6 +169,6 @@ if __name__ == '__main__': ...@@ -169,6 +169,6 @@ if __name__ == '__main__':
config = get_config() config = get_config()
if args.load: if args.load:
config.session_init = SaverRestore(args.load) config.session_init = SaverRestore(args.load)
nr_tower = get_nr_gpu() nr_tower = get_num_gpu()
assert nr_tower == NR_GPU assert nr_tower == NUM_GPU
launch_train_with_config(config, SyncMultiGPUTrainer(NR_GPU)) launch_train_with_config(config, SyncMultiGPUTrainer(NUM_GPU))
...@@ -14,7 +14,7 @@ from tensorpack import * ...@@ -14,7 +14,7 @@ from tensorpack import *
from tensorpack.dataflow import imgaug from tensorpack.dataflow import imgaug
from tensorpack.tfutils import argscope, get_model_loader, model_utils from tensorpack.tfutils import argscope, get_model_loader, model_utils
from tensorpack.tfutils.scope_utils import under_name_scope from tensorpack.tfutils.scope_utils import under_name_scope
from tensorpack.utils.gpu import get_nr_gpu from tensorpack.utils.gpu import get_num_gpu
from imagenet_utils import ( from imagenet_utils import (
get_imagenet_dataflow, get_imagenet_dataflow,
...@@ -212,7 +212,7 @@ if __name__ == '__main__': ...@@ -212,7 +212,7 @@ if __name__ == '__main__':
else: else:
logger.set_logger_dir(os.path.join('train_log', 'shufflenet')) logger.set_logger_dir(os.path.join('train_log', 'shufflenet'))
nr_tower = max(get_nr_gpu(), 1) nr_tower = max(get_num_gpu(), 1)
config = get_config(model, nr_tower) config = get_config(model, nr_tower)
if args.load: if args.load:
config.session_init = get_model_loader(args.load) config.session_init = get_model_loader(args.load)
......
...@@ -10,7 +10,7 @@ import tensorflow as tf ...@@ -10,7 +10,7 @@ import tensorflow as tf
from tensorpack import * from tensorpack import *
from tensorpack.tfutils import argscope from tensorpack.tfutils import argscope
from tensorpack.tfutils.summary import * from tensorpack.tfutils.summary import *
from tensorpack.utils.gpu import get_nr_gpu from tensorpack.utils.gpu import get_num_gpu
from imagenet_utils import ( from imagenet_utils import (
ImageNetModel, get_imagenet_dataflow, fbresnet_augmentor) ImageNetModel, get_imagenet_dataflow, fbresnet_augmentor)
...@@ -108,7 +108,7 @@ def get_data(name, batch): ...@@ -108,7 +108,7 @@ def get_data(name, batch):
def get_config(): def get_config():
nr_tower = max(get_nr_gpu(), 1) nr_tower = max(get_num_gpu(), 1)
batch = args.batch batch = args.batch
total_batch = batch * nr_tower total_batch = batch * nr_tower
assert total_batch >= 256 # otherwise the learning rate warmup is wrong. assert total_batch >= 256 # otherwise the learning rate warmup is wrong.
...@@ -159,6 +159,6 @@ if __name__ == '__main__': ...@@ -159,6 +159,6 @@ if __name__ == '__main__':
logger.set_logger_dir(os.path.join('train_log', 'vgg16-norm={}'.format(args.norm))) logger.set_logger_dir(os.path.join('train_log', 'vgg16-norm={}'.format(args.norm)))
config = get_config() config = get_config()
nr_tower = max(get_nr_gpu(), 1) nr_tower = max(get_num_gpu(), 1)
trainer = SyncMultiGPUTrainerReplicated(nr_tower) trainer = SyncMultiGPUTrainerReplicated(nr_tower)
launch_train_with_config(config, trainer) launch_train_with_config(config, trainer)
...@@ -27,8 +27,8 @@ These are all the toy examples in tensorpack. They are supposed to be just demos ...@@ -27,8 +27,8 @@ These are all the toy examples in tensorpack. They are supposed to be just demos
| --- | --- | | --- | --- |
| Train [ResNet](ResNet), [ShuffleNet and other models](ImageNetModels) on ImageNet | reproduce paper | | Train [ResNet](ResNet), [ShuffleNet and other models](ImageNetModels) on ImageNet | reproduce paper |
| [Train Faster-RCNN / Mask-RCNN on COCO](FasterRCNN) | reproduce paper | | [Train Faster-RCNN / Mask-RCNN on COCO](FasterRCNN) | reproduce paper |
| [DoReFa-Net: training binary / low-bitwidth CNN on ImageNet](DoReFa-Net) | reproduce paper |
| [Generative Adversarial Network(GAN) variants](GAN), including DCGAN, InfoGAN, <br/> Conditional GAN, WGAN, BEGAN, DiscoGAN, Image to Image, CycleGAN | visually reproduce | | [Generative Adversarial Network(GAN) variants](GAN), including DCGAN, InfoGAN, <br/> Conditional GAN, WGAN, BEGAN, DiscoGAN, Image to Image, CycleGAN | visually reproduce |
| [DoReFa-Net: training binary / low-bitwidth CNN on ImageNet](DoReFa-Net) | reproduce paper |
| [Fully-convolutional Network for Holistically-Nested Edge Detection(HED)](HED) | visually reproduce | | [Fully-convolutional Network for Holistically-Nested Edge Detection(HED)](HED) | visually reproduce |
| [Spatial Transformer Networks on MNIST addition](SpatialTransformer) | reproduce paper | | [Spatial Transformer Networks on MNIST addition](SpatialTransformer) | reproduce paper |
| [Visualize CNN saliency maps](Saliency) | visually reproduce | | [Visualize CNN saliency maps](Saliency) | visually reproduce |
......
...@@ -9,7 +9,7 @@ import os ...@@ -9,7 +9,7 @@ import os
from tensorpack import * from tensorpack import *
from tensorpack.tfutils.summary import add_moving_summary, add_param_summary from tensorpack.tfutils.summary import add_moving_summary, add_param_summary
from tensorpack.utils.gpu import get_nr_gpu from tensorpack.utils.gpu import get_num_gpu
from tensorpack.dataflow import dataset from tensorpack.dataflow import dataset
import tensorflow as tf import tensorflow as tf
...@@ -170,5 +170,5 @@ if __name__ == '__main__': ...@@ -170,5 +170,5 @@ if __name__ == '__main__':
max_epoch=400, max_epoch=400,
session_init=SaverRestore(args.load) if args.load else None session_init=SaverRestore(args.load) if args.load else None
) )
nr_gpu = max(get_nr_gpu(), 1) num_gpu = max(get_num_gpu(), 1)
launch_train_with_config(config, SyncMultiGPUTrainerParameterServer(nr_gpu)) launch_train_with_config(config, SyncMultiGPUTrainerParameterServer(num_gpu))
...@@ -12,7 +12,7 @@ from tensorpack.train import ( ...@@ -12,7 +12,7 @@ from tensorpack.train import (
TrainConfig, SyncMultiGPUTrainerReplicated, launch_train_with_config) TrainConfig, SyncMultiGPUTrainerReplicated, launch_train_with_config)
from tensorpack.dataflow import FakeData from tensorpack.dataflow import FakeData
from tensorpack.tfutils import argscope, get_model_loader from tensorpack.tfutils import argscope, get_model_loader
from tensorpack.utils.gpu import get_nr_gpu from tensorpack.utils.gpu import get_num_gpu
from imagenet_utils import ( from imagenet_utils import (
fbresnet_augmentor, get_imagenet_dataflow, ImageNetModel, fbresnet_augmentor, get_imagenet_dataflow, ImageNetModel,
...@@ -57,7 +57,7 @@ def get_data(name, batch): ...@@ -57,7 +57,7 @@ def get_data(name, batch):
def get_config(model, fake=False): def get_config(model, fake=False):
nr_tower = max(get_nr_gpu(), 1) nr_tower = max(get_num_gpu(), 1)
assert args.batch % nr_tower == 0 assert args.batch % nr_tower == 0
batch = args.batch // nr_tower batch = args.batch // nr_tower
...@@ -143,5 +143,5 @@ if __name__ == '__main__': ...@@ -143,5 +143,5 @@ if __name__ == '__main__':
config = get_config(model, fake=args.fake) config = get_config(model, fake=args.fake)
if args.load: if args.load:
config.session_init = get_model_loader(args.load) config.session_init = get_model_loader(args.load)
trainer = SyncMultiGPUTrainerReplicated(max(get_nr_gpu(), 1)) trainer = SyncMultiGPUTrainerReplicated(max(get_num_gpu(), 1))
launch_train_with_config(config, trainer) launch_train_with_config(config, trainer)
...@@ -16,7 +16,7 @@ from tensorpack.dataflow import dataset ...@@ -16,7 +16,7 @@ from tensorpack.dataflow import dataset
from tensorpack.tfutils import optimizer, gradproc from tensorpack.tfutils import optimizer, gradproc
from tensorpack.tfutils.symbolic_functions import * from tensorpack.tfutils.symbolic_functions import *
from tensorpack.tfutils.summary import * from tensorpack.tfutils.summary import *
from tensorpack.utils.gpu import get_nr_gpu from tensorpack.utils.gpu import get_num_gpu
from tensorpack.utils import viz from tensorpack.utils import viz
from imagenet_utils import ( from imagenet_utils import (
...@@ -157,8 +157,8 @@ if __name__ == '__main__': ...@@ -157,8 +157,8 @@ if __name__ == '__main__':
if args.gpu: if args.gpu:
os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
nr_gpu = get_nr_gpu() num_gpu = get_num_gpu()
BATCH_SIZE = TOTAL_BATCH_SIZE // nr_gpu BATCH_SIZE = TOTAL_BATCH_SIZE // num_gpu
if args.cam: if args.cam:
BATCH_SIZE = 128 # something that can run on one gpu BATCH_SIZE = 128 # something that can run on one gpu
...@@ -169,4 +169,4 @@ if __name__ == '__main__': ...@@ -169,4 +169,4 @@ if __name__ == '__main__':
config = get_config() config = get_config()
if args.load: if args.load:
config.session_init = get_model_loader(args.load) config.session_init = get_model_loader(args.load)
launch_train_with_config(config, SyncMultiGPUTrainerParameterServer(nr_gpu)) launch_train_with_config(config, SyncMultiGPUTrainerParameterServer(num_gpu))
...@@ -13,6 +13,7 @@ from tensorpack import * ...@@ -13,6 +13,7 @@ from tensorpack import *
from tensorpack.tfutils.scope_utils import auto_reuse_variable_scope from tensorpack.tfutils.scope_utils import auto_reuse_variable_scope
from tensorpack.tfutils.summary import add_moving_summary from tensorpack.tfutils.summary import add_moving_summary
from tensorpack.utils import logger from tensorpack.utils import logger
from tensorpack.utils.gpu import get_num_gpu
from data_sampler import ( from data_sampler import (
ImageDecode, ImageDataFromZIPFile, ImageDecode, ImageDataFromZIPFile,
RejectTooSmallImages, CenterSquareResize) RejectTooSmallImages, CenterSquareResize)
...@@ -286,7 +287,7 @@ if __name__ == '__main__': ...@@ -286,7 +287,7 @@ if __name__ == '__main__':
param_dict = {'VGG19/' + name: value for name, value in six.iteritems(param_dict)} param_dict = {'VGG19/' + name: value for name, value in six.iteritems(param_dict)}
session_init = DictRestore(param_dict) session_init = DictRestore(param_dict)
nr_tower = max(get_nr_gpu(), 1) nr_tower = max(get_num_gpu(), 1)
data = QueueInput(get_data(args.data)) data = QueueInput(get_data(args.data))
model = Model() model = Model()
......
## Keras + Tensorpack ## Keras + Tensorpack
Use Keras to define a model a train it with efficient tensorpack trainers. Use Keras to define a model and train it with efficient tensorpack trainers.
### Why? ### Why?
Keras alone has various overhead. In particular, it is not efficient when working on large models. Keras alone has various overhead. In particular, it is not efficient with large models.
The article [Towards Efficient Multi-GPU Training in Keras with TensorFlow](https://medium.com/rossum/towards-efficient-multi-gpu-training-in-keras-with-tensorflow-8a0091074fb2) The article [Towards Efficient Multi-GPU Training in Keras with TensorFlow](https://medium.com/rossum/towards-efficient-multi-gpu-training-in-keras-with-tensorflow-8a0091074fb2)
has mentioned some of it. has mentioned some of it.
......
...@@ -11,7 +11,7 @@ import argparse ...@@ -11,7 +11,7 @@ import argparse
from tensorpack import InputDesc, SyncMultiGPUTrainerReplicated from tensorpack import InputDesc, SyncMultiGPUTrainerReplicated
from tensorpack.dataflow import FakeData, MapDataComponent from tensorpack.dataflow import FakeData, MapDataComponent
from tensorpack.utils import logger from tensorpack.utils import logger
from tensorpack.utils.gpu import get_nr_gpu from tensorpack.utils.gpu import get_num_gpu
from tensorpack.contrib.keras import KerasModel from tensorpack.contrib.keras import KerasModel
from tensorpack.callbacks import * from tensorpack.callbacks import *
from tensorflow.python.keras.layers import * from tensorflow.python.keras.layers import *
...@@ -141,12 +141,12 @@ if __name__ == '__main__': ...@@ -141,12 +141,12 @@ if __name__ == '__main__':
tf.keras.backend.set_image_data_format('channels_first') tf.keras.backend.set_image_data_format('channels_first')
nr_gpu = get_nr_gpu() num_gpu = get_num_gpu()
if args.fake: if args.fake:
df_train = FakeData([[64, 224, 224, 3], [64, 1000]], 5000, random=False, dtype='uint8') df_train = FakeData([[64, 224, 224, 3], [64, 1000]], 5000, random=False, dtype='uint8')
df_val = FakeData([[64, 224, 224, 3], [64, 1000]], 5000, random=False) df_val = FakeData([[64, 224, 224, 3], [64, 1000]], 5000, random=False)
else: else:
batch_size = TOTAL_BATCH_SIZE // nr_gpu batch_size = TOTAL_BATCH_SIZE // num_gpu
assert args.data is not None assert args.data is not None
df_train = get_imagenet_dataflow( df_train = get_imagenet_dataflow(
args.data, 'train', batch_size, fbresnet_augmentor(True)) args.data, 'train', batch_size, fbresnet_augmentor(True))
...@@ -164,7 +164,7 @@ if __name__ == '__main__': ...@@ -164,7 +164,7 @@ if __name__ == '__main__':
inputs_desc=[InputDesc(tf.uint8, [None, 224, 224, 3], 'images')], inputs_desc=[InputDesc(tf.uint8, [None, 224, 224, 3], 'images')],
targets_desc=[InputDesc(tf.float32, [None, 1000], 'labels')], targets_desc=[InputDesc(tf.float32, [None, 1000], 'labels')],
input=df_train, input=df_train,
trainer=SyncMultiGPUTrainerReplicated(nr_gpu)) trainer=SyncMultiGPUTrainerReplicated(num_gpu))
lr = tf.get_variable('learning_rate', initializer=0.1, trainable=False) lr = tf.get_variable('learning_rate', initializer=0.1, trainable=False)
tf.summary.scalar('lr', lr) tf.summary.scalar('lr', lr)
...@@ -188,7 +188,7 @@ if __name__ == '__main__': ...@@ -188,7 +188,7 @@ if __name__ == '__main__':
if not args.fake: if not args.fake:
callbacks.append( callbacks.append(
DataParallelInferenceRunner( DataParallelInferenceRunner(
df_val, ScalarStats(['categorical_accuracy']), nr_gpu)) df_val, ScalarStats(['categorical_accuracy']), num_gpu))
M.fit( M.fit(
steps_per_epoch=100 if args.fake else 1281167 // TOTAL_BATCH_SIZE, steps_per_epoch=100 if args.fake else 1281167 // TOTAL_BATCH_SIZE,
......
...@@ -96,8 +96,11 @@ def BatchNorm(inputs, axis=None, training=None, momentum=0.9, epsilon=1e-5, ...@@ -96,8 +96,11 @@ def BatchNorm(inputs, axis=None, training=None, momentum=0.9, epsilon=1e-5,
When set to "nccl", this layer must be used under tensorpack multi-gpu trainers, When set to "nccl", this layer must be used under tensorpack multi-gpu trainers,
and it then uses per-machine (multiple GPU) statistics to normalize. and it then uses per-machine (multiple GPU) statistics to normalize.
Note that this implementation averages the per-tower E[x] and E[x^2] among towers to compute
global mean&variance. The result is the global mean&variance only if each tower has the same batch size.
This option has no effect when not training. This option has no effect when not training.
The option is also known as "Cross-GPU BatchNorm" as mentioned in https://arxiv.org/abs/1711.07240. This option is also known as "Cross-GPU BatchNorm" as mentioned in https://arxiv.org/abs/1711.07240.
Corresponding TF issue: https://github.com/tensorflow/tensorflow/issues/18222 Corresponding TF issue: https://github.com/tensorflow/tensorflow/issues/18222
Variable Names: Variable Names:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment