Commit 180a3461 authored by Yuxin Wu's avatar Yuxin Wu

update docs

parent 759f54b4
......@@ -9,9 +9,9 @@ For any unexpected problems, __PLEASE ALWAYS INCLUDE__:
+ What's the command you run:
+ Have you made any changes to code? Paste them if any:
+ If not, tell us what you did that may be relevant.
But we may not be able to resolve it if there is no reproducible code.
But we may not investigate it if there is no reproducible code.
+ Better to paste what you did instead of describing them.
2. What you observed, e.g. the entire log:
2. What you observed, including but not limited to the __entire__ logs.
+ Better to paste what you observed instead of describing them.
3. What you expected, if not obvious.
4. Your environment:
......@@ -25,8 +25,7 @@ Feature Requests:
+ You can implement a lot of features by extending tensorpack
(See http://tensorpack.readthedocs.io/en/latest/tutorial/index.html#extend-tensorpack).
It does not have to be added to tensorpack unless you have a good reason.
+ We don't take feature requests for implementing new papers.
If you don't know how, ask it as a usage question.
+ We don't take feature requests for examples or implementing papers.
Usage Questions:
......
......@@ -8,10 +8,10 @@ It also contains an implementation of the following papers:
+ [Binarized Neural Networks](https://arxiv.org/abs/1602.02830), with (W,A,G)=(1,1,32).
This is a good set of baselines for research in model quantization.
These quantization techniques achieves the following ImageNet performance in this implementation:
These quantization techniques, when applied on AlexNet, achieves the following ImageNet performance in this implementation:
| Model | W,A,G | Top 1 Validation Error |
|:---------------|----------|-----------------------:|
| Model | Bit Width <br/> (weights, activations, gradients) | Top 1 Validation Error |
|:---------------|---------------------------------------------------|-----------------------:|
| Full Precision | 32,32,32 | 40.3% |
| TTQ | t,32,32 | 42.0% |
| BWN | 1,32,32 | 44.6% |
......
......@@ -60,7 +60,7 @@ Evaluation or prediction will need the same config used during training.
## Results
These models are trained with different configurations on trainval35k and evaluated on minival using mAP@IoU=0.50:0.95.
MaskRCNN results contain both bbox and segm mAP.
MaskRCNN results contain both box and mask mAP.
| Backbone | mAP<br/>(box/mask) | Detectron mAP <br/> (box/mask) | Time | Configurations <br/> (click to expand) |
| - | - | - | - | - |
......@@ -74,12 +74,11 @@ MaskRCNN results contain both bbox and segm mAP.
| R101-C4 | 40.8/35.1 | | 63h on 8 V100s | <details><summary>standard</summary>`MODE_MASK=True `<br/>`BACKBONE.RESNET_NUM_BLOCK=[3,4,23,3]` </details> |
<a id="ft1">1</a>: Slightly different configurations.
<a id="ft2">2</a>: Number from [Group Normalization](https://arxiv.org/abs/1803.08494)
The two R50-C4 360k models have the same configuration __and mAP__
as the `R50-C4-2x` entries in
[Detectron Model Zoo](https://github.com/facebookresearch/Detectron/blob/master/MODEL_ZOO.md#end-to-end-faster--mask-r-cnn-baselines).
The other models listed here do not correspond to any configurations in Detectron.
<a id="ft2">2</a>: Numbers taken from [Group Normalization](https://arxiv.org/abs/1803.08494)
Performance in [Detectron](https://github.com/facebookresearch/Detectron/) can be reproduced.
Note that most of these numbers are better than what's in the paper.
## Notes
......
......@@ -49,7 +49,7 @@ _C = config # short alias to avoid coding
# mode flags ---------------------
_C.TRAINER = 'replicated' # options: 'horovod', 'replicated'
_C.MODE_MASK = True
_C.MODE_MASK = True # FasterRCNN or MaskRCNN
_C.MODE_FPN = False
# dataset -----------------------
......
......@@ -2,8 +2,6 @@
# -*- coding: utf-8 -*-
# File: mnist-convnet.py
import os
import argparse
import tensorflow as tf
"""
MNIST ConvNet example.
......@@ -50,8 +48,6 @@ class Model(ModelDesc):
.Dropout('dropout', rate=0.5)
.FullyConnected('fc1', 10, activation=tf.identity)())
tf.nn.softmax(logits, name='prob') # a Bx10 with probabilities
# a vector of length B with loss of each sample
cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label)
cost = tf.reduce_mean(cost, name='cross_entropy_loss') # the average cross-entropy loss
......@@ -59,15 +55,16 @@ class Model(ModelDesc):
correct = tf.cast(tf.nn.in_top_k(logits, label, 1), tf.float32, name='correct')
accuracy = tf.reduce_mean(correct, name='accuracy')
# This will monitor training error (in a moving_average fashion):
# 1. write the value to tensosrboard
# 2. write the value to stat.json
# 3. print the value after each epoch
# This will monitor training error & accuracy (in a moving average fashion). The value will be automatically
# 1. written to tensosrboard
# 2. written to stat.json
# 3. printed after each epoch
train_error = tf.reduce_mean(1 - correct, name='train_error')
summary.add_moving_summary(train_error, accuracy)
# Use a regex to find parameters to apply weight decay.
# Here we apply a weight decay on all W (weight matrix) of all fc layers
# If you don't like regex, you can certainly define the cost in any other methods.
wd_cost = tf.multiply(1e-5,
regularize_cost('fc.*/W', tf.nn.l2_loss),
name='regularize_loss')
......@@ -76,6 +73,7 @@ class Model(ModelDesc):
# monitor histogram of all weight (of conv and fc layers) in tensorboard
summary.add_param_summary(('.*/W', ['histogram', 'rms']))
# the function should return the total cost to be optimized
return total_cost
def optimizer(self):
......@@ -84,7 +82,7 @@ class Model(ModelDesc):
global_step=get_global_step_var(),
decay_steps=468 * 10,
decay_rate=0.3, staircase=True, name='learning_rate')
# This will also put the summary in tensorboard, stat.json and print in terminal
# This will also put the summary in tensorboard, stat.json and print in terminal,
# but this time without moving average
tf.summary.scalar('lr', lr)
return tf.train.AdamOptimizer(lr)
......@@ -99,16 +97,22 @@ def get_data():
return train, test
def get_config():
if __name__ == '__main__':
# automatically setup the directory train_log/mnist-convnet for logging
logger.auto_set_dir()
dataset_train, dataset_test = get_data()
# How many iterations you want in each epoch.
# This is the default value, don't actually need to set it in the config
# This (data.size()) is the default value.
steps_per_epoch = dataset_train.size()
# get the config which contains everything necessary in a training
return TrainConfig(
config = TrainConfig(
model=Model(),
dataflow=dataset_train, # the DataFlow instance for training
# The input source for training. FeedInput is slow, this is just for demo purpose.
# In practice it's best to use QueueInput or others. See tutorials for details.
data=FeedInput(dataset_train),
callbacks=[
ModelSaver(), # save the model after every epoch
MaxSaver('validation_accuracy'), # save the model with highest accuracy (prefix 'validation_')
......@@ -119,22 +123,4 @@ def get_config():
steps_per_epoch=steps_per_epoch,
max_epoch=100,
)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--gpu', help='comma separated list of GPU(s) to use.')
parser.add_argument('--load', help='load model')
args = parser.parse_args()
if args.gpu:
os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
# automatically setup the directory train_log/mnist-convnet for logging
logger.auto_set_dir()
config = get_config()
if args.load:
config.session_init = SaverRestore(args.load)
# SimpleTrainer is slow, this is just a demo.
# You can use QueueInputTrainer instead
launch_train_with_config(config, SimpleTrainer())
......@@ -2,8 +2,6 @@
# -*- coding: utf-8 -*-
# File: mnist-tflayers.py
import os
import argparse
import tensorflow as tf
"""
MNIST ConvNet example using tf.layers
......@@ -20,25 +18,30 @@ from tensorpack.tfutils import summary, get_current_tower_context
from tensorpack.dataflow import dataset
IMAGE_SIZE = 28
# Monkey-patch tf.layers to support argscope.
enable_argscope_for_module(tf.layers)
class Model(ModelDesc):
def inputs(self):
"""
Define all the inputs (with type, shape, name) that
the graph will need.
Define all the inputs (with type, shape, name) that the graph will need.
"""
return [tf.placeholder(tf.float32, (None, IMAGE_SIZE, IMAGE_SIZE), 'input'),
tf.placeholder(tf.int32, (None,), 'label')]
def build_graph(self, image, label):
"""This function should build the model which takes the input variables
and return cost at the end"""
# In tensorflow, inputs to convolution function are assumed to be
# NHWC. Add a single channel here.
image = tf.expand_dims(image, 3)
image = image * 2 - 1 # center the pixels values at zero
# The context manager `argscope` sets the default option for all the layers under
# this context. Here we use 32 channel convolution with shape 3x3
with argscope([tf.layers.conv2d], padding='same', activation=tf.nn.relu):
l = tf.layers.conv2d(image, 32, 3, name='conv0')
l = tf.layers.max_pooling2d(l, 2, 2, padding='valid')
......@@ -52,8 +55,6 @@ class Model(ModelDesc):
training=get_current_tower_context().is_training)
logits = tf.layers.dense(l, 10, activation=tf.identity, name='fc1')
tf.nn.softmax(logits, name='prob') # a Bx10 with probabilities
# a vector of length B with loss of each sample
cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label)
cost = tf.reduce_mean(cost, name='cross_entropy_loss') # the average cross-entropy loss
......@@ -61,15 +62,16 @@ class Model(ModelDesc):
correct = tf.cast(tf.nn.in_top_k(logits, label, 1), tf.float32, name='correct')
accuracy = tf.reduce_mean(correct, name='accuracy')
# This will monitor training error (in a moving_average fashion):
# 1. write the value to tensorboard
# 2. write the value to stat.json
# 3. print the value after each epoch
# This will monitor training error & accuracy (in a moving average fashion). The value will be automatically
# 1. written to tensosrboard
# 2. written to stat.json
# 3. printed after each epoch
train_error = tf.reduce_mean(1 - correct, name='train_error')
summary.add_moving_summary(train_error, accuracy)
# Use a regex to find parameters to apply weight decay.
# Here we apply a weight decay on all W (weight matrix) of all fc layers
# If you don't like regex, you can certainly define the cost in any other methods.
wd_cost = tf.multiply(1e-5,
regularize_cost('fc.*/kernel', tf.nn.l2_loss),
name='regularize_loss')
......@@ -78,6 +80,7 @@ class Model(ModelDesc):
# monitor histogram of all weight (of conv and fc layers) in tensorboard
summary.add_param_summary(('.*/kernel', ['histogram', 'rms']))
# the function should return the total cost to be optimized
return total_cost
def optimizer(self):
......@@ -98,16 +101,22 @@ def get_data():
return train, test
def get_config():
if __name__ == '__main__':
# automatically setup the directory train_log/mnist-convnet for logging
logger.auto_set_dir()
dataset_train, dataset_test = get_data()
# How many iterations you want in each epoch.
# This is the default value, don't actually need to set it in the config
# This (data.size()) is the default value.
steps_per_epoch = dataset_train.size()
# get the config which contains everything necessary in a training
return TrainConfig(
config = TrainConfig(
model=Model(),
dataflow=dataset_train, # the DataFlow instance for training
# The input source for training. FeedInput is slow, this is just for demo purpose.
# In practice it's best to use QueueInput or others. See tutorials for details.
data=FeedInput(dataset_train),
callbacks=[
ModelSaver(), # save the model after every epoch
MaxSaver('validation_accuracy'), # save the model with highest accuracy (prefix 'validation_')
......@@ -118,22 +127,4 @@ def get_config():
steps_per_epoch=steps_per_epoch,
max_epoch=100,
)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--gpu', help='comma separated list of GPU(s) to use.')
parser.add_argument('--load', help='load model')
args = parser.parse_args()
if args.gpu:
os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
# automatically setup the directory train_log/mnist-convnet for logging
logger.auto_set_dir()
config = get_config()
if args.load:
config.session_init = SaverRestore(args.load)
# SimpleTrainer is slow, this is just a demo.
# You can use QueueInputTrainer instead
launch_train_with_config(config, SimpleTrainer())
......@@ -2,8 +2,6 @@
# -*- coding: utf-8 -*-
# File: mnist-tfslim.py
import os
import argparse
"""
MNIST ConvNet example using TensorFlow-slim.
Mostly the same as 'mnist-convnet.py',
......@@ -45,8 +43,6 @@ class Model(ModelDesc):
l = slim.layers.dropout(l, is_training=is_training)
logits = slim.layers.fully_connected(l, 10, activation_fn=None, scope='fc1')
tf.nn.softmax(logits, name='prob')
cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label)
cost = tf.reduce_mean(cost, name='cross_entropy_loss')
......@@ -75,10 +71,11 @@ def get_data():
return train, test
def get_config():
if __name__ == '__main__':
logger.auto_set_dir()
dataset_train, dataset_test = get_data()
return TrainConfig(
config = TrainConfig(
model=Model(),
dataflow=dataset_train,
callbacks=[
......@@ -89,14 +86,4 @@ def get_config():
],
max_epoch=100,
)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--gpu', help='comma separated list of GPU(s) to use.')
args = parser.parse_args()
if args.gpu:
os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
config = get_config()
launch_train_with_config(config, SimpleTrainer())
......@@ -2,17 +2,13 @@
# -*- coding: utf-8 -*-
# File: mnist-visualizations.py
import os
import argparse
"""
MNIST ConvNet example with weights/activations visualization.
The same MNIST ConvNet example, but with weights/activations visualization.
"""
import tensorflow as tf
from tensorpack import *
from tensorpack.dataflow import dataset
import tensorflow as tf
IMAGE_SIZE = 28
......@@ -124,12 +120,11 @@ def get_data():
return train, test
def get_config():
if __name__ == '__main__':
logger.auto_set_dir()
dataset_train, dataset_test = get_data()
return TrainConfig(
config = TrainConfig(
model=Model(),
dataflow=dataset_train,
callbacks=[
......@@ -141,16 +136,4 @@ def get_config():
max_epoch=100,
)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--gpu', help='comma separated list of GPU(s) to use.')
parser.add_argument('--load', help='load model')
args = parser.parse_args()
if args.gpu:
os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
config = get_config()
if args.load:
config.session_init = SaverRestore(args.load)
launch_train_with_config(config, SimpleTrainer())
......@@ -16,8 +16,7 @@ import tensorflow as tf
A very small SVHN convnet model (only 0.8m parameters).
About 2.3% validation error after 70 epochs. 2.15% after 150 epochs.
Each epoch iterates over the whole training set (4721 iterations).
Speed is about 43 it/s on TitanX.
Each epoch iterates over the whole training set (4721 iterations), and takes about 24s on a P100.
"""
......@@ -76,9 +75,6 @@ def get_data():
imgaug.Resize((40, 40)),
imgaug.Brightness(30),
imgaug.Contrast((0.5, 1.5)),
imgaug.GaussianDeform( # this is slow. only use it when you have lots of cpus
[(0.2, 0.2), (0.2, 0.8), (0.8, 0.8), (0.8, 0.2)],
(40, 40), 0.2, 3),
]
data_train = AugmentImageComponent(data_train, augmentors)
data_train = BatchData(data_train, 128)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment