Commit bbb2ecc2 authored by Yuxin Wu's avatar Yuxin Wu

bugfix for offline predict

parent f47923c7
...@@ -29,8 +29,8 @@ To train: ...@@ -29,8 +29,8 @@ To train:
``` ```
./train.py --load /path/to/ImageNet-ResNet50.npz ./train.py --load /path/to/ImageNet-ResNet50.npz
``` ```
The code is written for training with __8 GPUs__. The code is only for training with 1, 2, 4 or 8 GPUs.
To run on fewer GPUs, you probably need different hyperparameters for the same performance. Otherwise, you probably need different hyperparameters for the same performance.
To predict on an image (and show output in a window): To predict on an image (and show output in a window):
``` ```
...@@ -39,6 +39,8 @@ To predict on an image (and show output in a window): ...@@ -39,6 +39,8 @@ To predict on an image (and show output in a window):
## Results ## Results
Mean Average Precision @IoU=0.50:0.95:
+ trainval35k/minival, FASTRCNN_BATCH=256: 33.4. Takes 49h on 8 TitanX. + trainval35k/minival, FASTRCNN_BATCH=256: 33.4. Takes 49h on 8 TitanX.
+ trainval35k/minival, FASTRCNN_BATCH=64: 32.2. Takes 31h on 8 TitanX. + trainval35k/minival, FASTRCNN_BATCH=64: 32.2. Takes 31h on 8 TitanX.
......
...@@ -138,8 +138,6 @@ class Model(ModelDesc): ...@@ -138,8 +138,6 @@ class Model(ModelDesc):
else: else:
opt = tf.train.MomentumOptimizer(lr, 0.9) opt = tf.train.MomentumOptimizer(lr, 0.9)
return opt return opt
return optimizer.apply_grad_processors(
opt, [gradproc.ScaleGradient(('.*/b', 2))])
def visualize(model_path, nr_visualize=50, output_dir='output'): def visualize(model_path, nr_visualize=50, output_dir='output'):
......
...@@ -22,40 +22,34 @@ Usage: ...@@ -22,40 +22,34 @@ Usage:
""" """
class Model(ModelDesc): def tower_func(image):
def _get_inputs(self): # img: 227x227x3
return [InputDesc(tf.float32, (None, 227, 227, 3), 'input')] with argscope([Conv2D, FullyConnected], nl=tf.nn.relu):
l = Conv2D('conv1', image, out_channel=96, kernel_shape=11, stride=4, padding='VALID')
l = tf.nn.lrn(l, 2, bias=1.0, alpha=2e-5, beta=0.75, name='norm1')
l = MaxPooling('pool1', l, 3, stride=2, padding='VALID')
def _build_graph(self, inputs): l = Conv2D('conv2', l, out_channel=256, kernel_shape=5, split=2)
# img: 227x227x3 l = tf.nn.lrn(l, 2, bias=1.0, alpha=2e-5, beta=0.75, name='norm2')
image = inputs[0] l = MaxPooling('pool2', l, 3, stride=2, padding='VALID')
with argscope([Conv2D, FullyConnected], nl=tf.nn.relu): l = Conv2D('conv3', l, out_channel=384, kernel_shape=3)
l = Conv2D('conv1', image, out_channel=96, kernel_shape=11, stride=4, padding='VALID') l = Conv2D('conv4', l, out_channel=384, kernel_shape=3, split=2)
l = tf.nn.lrn(l, 2, bias=1.0, alpha=2e-5, beta=0.75, name='norm1') l = Conv2D('conv5', l, out_channel=256, kernel_shape=3, split=2)
l = MaxPooling('pool1', l, 3, stride=2, padding='VALID') l = MaxPooling('pool3', l, 3, stride=2, padding='VALID')
l = Conv2D('conv2', l, out_channel=256, kernel_shape=5, split=2) # This is just a script to load model, so we ignore the dropout layer
l = tf.nn.lrn(l, 2, bias=1.0, alpha=2e-5, beta=0.75, name='norm2') l = FullyConnected('fc6', l, 4096)
l = MaxPooling('pool2', l, 3, stride=2, padding='VALID') l = FullyConnected('fc7', l, out_dim=4096)
logits = FullyConnected('fc8', l, out_dim=1000, nl=tf.identity)
l = Conv2D('conv3', l, out_channel=384, kernel_shape=3) prob = tf.nn.softmax(logits, name='prob')
l = Conv2D('conv4', l, out_channel=384, kernel_shape=3, split=2)
l = Conv2D('conv5', l, out_channel=256, kernel_shape=3, split=2)
l = MaxPooling('pool3', l, 3, stride=2, padding='VALID')
# This is just a script to load model, so we ignore the dropout layer
l = FullyConnected('fc6', l, 4096)
l = FullyConnected('fc7', l, out_dim=4096)
# fc will have activation summary by default. disable this for the output layer
logits = FullyConnected('fc8', l, out_dim=1000, nl=tf.identity)
prob = tf.nn.softmax(logits, name='prob')
def run_test(path, input): def run_test(path, input):
param_dict = np.load(path, encoding='latin1').item() param_dict = np.load(path, encoding='latin1').item()
predictor = OfflinePredictor(PredictConfig( predictor = OfflinePredictor(PredictConfig(
model=Model(), inputs_desc=[InputDesc(tf.float32, (None, 227, 227, 3), 'input')],
tower_func=tower_func,
session_init=DictRestore(param_dict), session_init=DictRestore(param_dict),
input_names=['input'], input_names=['input'],
output_names=['prob'] output_names=['prob']
...@@ -63,8 +57,7 @@ def run_test(path, input): ...@@ -63,8 +57,7 @@ def run_test(path, input):
im = cv2.imread(input) im = cv2.imread(input)
assert im is not None, input assert im is not None, input
im = cv2.resize(im, (227, 227))[:, :, ::-1].reshape( im = cv2.resize(im, (227, 227))[None, :, :, ::-1].astype('float32') - 110
(1, 227, 227, 3)).astype('float32') - 110
outputs = predictor(im)[0] outputs = predictor(im)[0]
prob = outputs[0] prob = outputs[0]
ret = prob.argsort()[-10:][::-1] ret = prob.argsort()[-10:][::-1]
......
...@@ -23,49 +23,45 @@ Usage: ...@@ -23,49 +23,45 @@ Usage:
""" """
class Model(ModelDesc): def tower_func(image):
def _get_inputs(self): with argscope(Conv2D, kernel_shape=3, nl=tf.nn.relu):
return [InputDesc(tf.float32, (None, 224, 224, 3), 'input')] logits = (LinearWrap(image)
.Conv2D('conv1_1', 64)
def _build_graph(self, inputs): .Conv2D('conv1_2', 64)
image = inputs[0] .MaxPooling('pool1', 2)
with argscope(Conv2D, kernel_shape=3, nl=tf.nn.relu): # 112
logits = (LinearWrap(image) .Conv2D('conv2_1', 128)
.Conv2D('conv1_1', 64) .Conv2D('conv2_2', 128)
.Conv2D('conv1_2', 64) .MaxPooling('pool2', 2)
.MaxPooling('pool1', 2) # 56
# 112 .Conv2D('conv3_1', 256)
.Conv2D('conv2_1', 128) .Conv2D('conv3_2', 256)
.Conv2D('conv2_2', 128) .Conv2D('conv3_3', 256)
.MaxPooling('pool2', 2) .MaxPooling('pool3', 2)
# 56 # 28
.Conv2D('conv3_1', 256) .Conv2D('conv4_1', 512)
.Conv2D('conv3_2', 256) .Conv2D('conv4_2', 512)
.Conv2D('conv3_3', 256) .Conv2D('conv4_3', 512)
.MaxPooling('pool3', 2) .MaxPooling('pool4', 2)
# 28 # 14
.Conv2D('conv4_1', 512) .Conv2D('conv5_1', 512)
.Conv2D('conv4_2', 512) .Conv2D('conv5_2', 512)
.Conv2D('conv4_3', 512) .Conv2D('conv5_3', 512)
.MaxPooling('pool4', 2) .MaxPooling('pool5', 2)
# 14 # 7
.Conv2D('conv5_1', 512) .FullyConnected('fc6', 4096, nl=tf.nn.relu)
.Conv2D('conv5_2', 512) .Dropout('drop0', 0.5)
.Conv2D('conv5_3', 512) .FullyConnected('fc7', 4096, nl=tf.nn.relu)
.MaxPooling('pool5', 2) .Dropout('drop1', 0.5)
# 7 .FullyConnected('fc8', out_dim=1000, nl=tf.identity)())
.FullyConnected('fc6', 4096, nl=tf.nn.relu) prob = tf.nn.softmax(logits, name='prob')
.Dropout('drop0', 0.5)
.FullyConnected('fc7', 4096, nl=tf.nn.relu)
.Dropout('drop1', 0.5)
.FullyConnected('fc8', out_dim=1000, nl=tf.identity)())
prob = tf.nn.softmax(logits, name='prob')
def run_test(path, input): def run_test(path, input):
param_dict = np.load(path, encoding='latin1').item() param_dict = np.load(path, encoding='latin1').item()
predict_func = OfflinePredictor(PredictConfig( predict_func = OfflinePredictor(PredictConfig(
model=Model(), inputs_desc=[InputDesc(tf.float32, (None, 224, 224, 3), 'input')],
tower_func=tower_func,
session_init=DictRestore(param_dict), session_init=DictRestore(param_dict),
input_names=['input'], input_names=['input'],
output_names=['prob'] # prob:0 is the probability distribution output_names=['prob'] # prob:0 is the probability distribution
......
...@@ -66,7 +66,7 @@ class TowerContext(object): ...@@ -66,7 +66,7 @@ class TowerContext(object):
""" """
return self.is_main_training_tower or \ return self.is_main_training_tower or \
(self.is_training and len(self._vs_name) > 0) or \ (self.is_training and len(self._vs_name) > 0) or \
(not self.is_training and len(self._vs_name) > 0 and not self._initial_vs_reuse) (not self.is_training and not self._initial_vs_reuse)
@property @property
def name(self): def name(self):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment