bugfix for offline predict

bbb2ecc2 · Yuxin Wu · f47923c7 · bbb2ecc2 · bbb2ecc2 · bbb2ecc2
Commit bbb2ecc2 authored Oct 31, 2017 by Yuxin Wu
5 changed files
--- a/examples/FasterRCNN/README.md
+++ b/examples/FasterRCNN/README.md
@@ -29,8 +29,8 @@ To train:
 ```
 ./train.py --load /path/to/ImageNet-ResNet50.npz
 ```
-The code is written for training with __8 GPUs__.
-To run on fewer GPUs, you probably need different hyperparameters for the same performance.
+The code is only for training with 1, 2, 4 or 8 GPUs.
+Otherwise, you probably need different hyperparameters for the same performance.

 To predict on an image (and show output in a window):
 ```
@@ -39,6 +39,8 @@ To predict on an image (and show output in a window):

 ## Results

+Mean Average Precision @IoU=0.50:0.95:
+
 + trainval35k/minival, FASTRCNN_BATCH=256: 33.4. Takes 49h on 8 TitanX.
 + trainval35k/minival, FASTRCNN_BATCH=64: 32.2. Takes 31h on 8 TitanX.


--- a/examples/FasterRCNN/train.py
+++ b/examples/FasterRCNN/train.py
@@ -138,8 +138,6 @@ class Model(ModelDesc):
        else:
            opt = tf.train.MomentumOptimizer(lr, 0.9)
        return opt
-        return optimizer.apply_grad_processors(
-            opt, [gradproc.ScaleGradient(('.*/b', 2))])


 def visualize(model_path, nr_visualize=50, output_dir='output'):

--- a/examples/load-alexnet.py
+++ b/examples/load-alexnet.py
@@ -22,40 +22,34 @@ Usage:
 """


-class Model(ModelDesc):
-    def _get_inputs(self):
-        return [InputDesc(tf.float32, (None, 227, 227, 3), 'input')]
+def tower_func(image):
+    # img: 227x227x3
+    with argscope([Conv2D, FullyConnected], nl=tf.nn.relu):
+        l = Conv2D('conv1', image, out_channel=96, kernel_shape=11, stride=4, padding='VALID')
+        l = tf.nn.lrn(l, 2, bias=1.0, alpha=2e-5, beta=0.75, name='norm1')
+        l = MaxPooling('pool1', l, 3, stride=2, padding='VALID')

-    def _build_graph(self, inputs):
-        # img: 227x227x3
-        image = inputs[0]
+        l = Conv2D('conv2', l, out_channel=256, kernel_shape=5, split=2)
+        l = tf.nn.lrn(l, 2, bias=1.0, alpha=2e-5, beta=0.75, name='norm2')
+        l = MaxPooling('pool2', l, 3, stride=2, padding='VALID')

-        with argscope([Conv2D, FullyConnected], nl=tf.nn.relu):
-            l = Conv2D('conv1', image, out_channel=96, kernel_shape=11, stride=4, padding='VALID')
-            l = tf.nn.lrn(l, 2, bias=1.0, alpha=2e-5, beta=0.75, name='norm1')
-            l = MaxPooling('pool1', l, 3, stride=2, padding='VALID')
+        l = Conv2D('conv3', l, out_channel=384, kernel_shape=3)
+        l = Conv2D('conv4', l, out_channel=384, kernel_shape=3, split=2)
+        l = Conv2D('conv5', l, out_channel=256, kernel_shape=3, split=2)
+        l = MaxPooling('pool3', l, 3, stride=2, padding='VALID')

-            l = Conv2D('conv2', l, out_channel=256, kernel_shape=5, split=2)
-            l = tf.nn.lrn(l, 2, bias=1.0, alpha=2e-5, beta=0.75, name='norm2')
-            l = MaxPooling('pool2', l, 3, stride=2, padding='VALID')
-
-            l = Conv2D('conv3', l, out_channel=384, kernel_shape=3)
-            l = Conv2D('conv4', l, out_channel=384, kernel_shape=3, split=2)
-            l = Conv2D('conv5', l, out_channel=256, kernel_shape=3, split=2)
-            l = MaxPooling('pool3', l, 3, stride=2, padding='VALID')
-
-            # This is just a script to load model, so we ignore the dropout layer
-            l = FullyConnected('fc6', l, 4096)
-            l = FullyConnected('fc7', l, out_dim=4096)
-        # fc will have activation summary by default. disable this for the output layer
-        logits = FullyConnected('fc8', l, out_dim=1000, nl=tf.identity)
-        prob = tf.nn.softmax(logits, name='prob')
+        # This is just a script to load model, so we ignore the dropout layer
+        l = FullyConnected('fc6', l, 4096)
+        l = FullyConnected('fc7', l, out_dim=4096)
+    logits = FullyConnected('fc8', l, out_dim=1000, nl=tf.identity)
+    prob = tf.nn.softmax(logits, name='prob')


 def run_test(path, input):
    param_dict = np.load(path, encoding='latin1').item()
    predictor = OfflinePredictor(PredictConfig(
-        model=Model(),
+        inputs_desc=[InputDesc(tf.float32, (None, 227, 227, 3), 'input')],
+        tower_func=tower_func,
        session_init=DictRestore(param_dict),
        input_names=['input'],
        output_names=['prob']
@@ -63,8 +57,7 @@ def run_test(path, input):

    im = cv2.imread(input)
    assert im is not None, input
-    im = cv2.resize(im, (227, 227))[:, :, ::-1].reshape(
-        (1, 227, 227, 3)).astype('float32') - 110
+    im = cv2.resize(im, (227, 227))[None, :, :, ::-1].astype('float32') - 110
    outputs = predictor(im)[0]
    prob = outputs[0]
    ret = prob.argsort()[-10:][::-1]

--- a/examples/load-vgg16.py
+++ b/examples/load-vgg16.py
@@ -23,49 +23,45 @@ Usage:
 """


-class Model(ModelDesc):
-    def _get_inputs(self):
-        return [InputDesc(tf.float32, (None, 224, 224, 3), 'input')]
-
-    def _build_graph(self, inputs):
-        image = inputs[0]
-        with argscope(Conv2D, kernel_shape=3, nl=tf.nn.relu):
-            logits = (LinearWrap(image)
-                      .Conv2D('conv1_1', 64)
-                      .Conv2D('conv1_2', 64)
-                      .MaxPooling('pool1', 2)
-                      # 112
-                      .Conv2D('conv2_1', 128)
-                      .Conv2D('conv2_2', 128)
-                      .MaxPooling('pool2', 2)
-                      # 56
-                      .Conv2D('conv3_1', 256)
-                      .Conv2D('conv3_2', 256)
-                      .Conv2D('conv3_3', 256)
-                      .MaxPooling('pool3', 2)
-                      # 28
-                      .Conv2D('conv4_1', 512)
-                      .Conv2D('conv4_2', 512)
-                      .Conv2D('conv4_3', 512)
-                      .MaxPooling('pool4', 2)
-                      # 14
-                      .Conv2D('conv5_1', 512)
-                      .Conv2D('conv5_2', 512)
-                      .Conv2D('conv5_3', 512)
-                      .MaxPooling('pool5', 2)
-                      # 7
-                      .FullyConnected('fc6', 4096, nl=tf.nn.relu)
-                      .Dropout('drop0', 0.5)
-                      .FullyConnected('fc7', 4096, nl=tf.nn.relu)
-                      .Dropout('drop1', 0.5)
-                      .FullyConnected('fc8', out_dim=1000, nl=tf.identity)())
-        prob = tf.nn.softmax(logits, name='prob')
+def tower_func(image):
+    with argscope(Conv2D, kernel_shape=3, nl=tf.nn.relu):
+        logits = (LinearWrap(image)
+                  .Conv2D('conv1_1', 64)
+                  .Conv2D('conv1_2', 64)
+                  .MaxPooling('pool1', 2)
+                  # 112
+                  .Conv2D('conv2_1', 128)
+                  .Conv2D('conv2_2', 128)
+                  .MaxPooling('pool2', 2)
+                  # 56
+                  .Conv2D('conv3_1', 256)
+                  .Conv2D('conv3_2', 256)
+                  .Conv2D('conv3_3', 256)
+                  .MaxPooling('pool3', 2)
+                  # 28
+                  .Conv2D('conv4_1', 512)
+                  .Conv2D('conv4_2', 512)
+                  .Conv2D('conv4_3', 512)
+                  .MaxPooling('pool4', 2)
+                  # 14
+                  .Conv2D('conv5_1', 512)
+                  .Conv2D('conv5_2', 512)
+                  .Conv2D('conv5_3', 512)
+                  .MaxPooling('pool5', 2)
+                  # 7
+                  .FullyConnected('fc6', 4096, nl=tf.nn.relu)
+                  .Dropout('drop0', 0.5)
+                  .FullyConnected('fc7', 4096, nl=tf.nn.relu)
+                  .Dropout('drop1', 0.5)
+                  .FullyConnected('fc8', out_dim=1000, nl=tf.identity)())
+    prob = tf.nn.softmax(logits, name='prob')


 def run_test(path, input):
    param_dict = np.load(path, encoding='latin1').item()
    predict_func = OfflinePredictor(PredictConfig(
-        model=Model(),
+        inputs_desc=[InputDesc(tf.float32, (None, 224, 224, 3), 'input')],
+        tower_func=tower_func,
        session_init=DictRestore(param_dict),
        input_names=['input'],
        output_names=['prob']   # prob:0 is the probability distribution

--- a/tensorpack/tfutils/tower.py
+++ b/tensorpack/tfutils/tower.py
@@ -66,7 +66,7 @@ class TowerContext(object):
        """
        return self.is_main_training_tower or \
            (self.is_training and len(self._vs_name) > 0) or \
-            (not self.is_training and len(self._vs_name) > 0 and not self._initial_vs_reuse)
+            (not self.is_training and not self._initial_vs_reuse)

    @property
    def name(self):