move RPN functions around to model_rpn; improve logging

c5a47192 · Yuxin Wu · df82c65a · c5a47192 · c5a47192 · c5a47192
Commit c5a47192 authored Jul 20, 2019 by Yuxin Wu
6 changed files
--- a/examples/FasterRCNN/data.py
+++ b/examples/FasterRCNN/data.py
@@ -13,8 +13,10 @@ from tensorpack.dataflow import (
    MultiProcessMapData, MultiThreadMapData, TestDataSpeed, imgaug,
 )
 from tensorpack.utils import logger
-from tensorpack.utils.argtools import log_once, memoized
+from tensorpack.utils.argtools import log_once
+from modeling.model_rpn import get_all_anchors
+from modeling.model_fpn import get_all_anchors_fpn
 from common import (
    CustomResize, DataFromListOfDict, box_to_point8,
    filter_boxes_inside_shape, np_iou, point8_to_box, segmentation_to_mask,
@@ -57,64 +59,6 @@ def print_class_histogram(roidbs):
    logger.info("Ground-Truth category distribution:\n" + colored(table, "cyan"))
-@memoized
-def get_all_anchors(*, stride, sizes, ratios, max_size):
-    """
-    Get all anchors in the largest possible image, shifted, floatbox
-    Args:
-        stride (int): the stride of anchors.
-        sizes (tuple[int]): the sizes (sqrt area) of anchors
-        ratios (tuple[int]): the aspect ratios of anchors
-        max_size (int): maximum size of input image
-    Returns:
-        anchors: SxSxNUM_ANCHORx4, where S == ceil(MAX_SIZE/STRIDE), floatbox
-        The layout in the NUM_ANCHOR dim is NUM_RATIO x NUM_SIZE.
-    """
-    # Generates a NAx4 matrix of anchor boxes in (x1, y1, x2, y2) format. Anchors
-    # are centered on 0, have sqrt areas equal to the specified sizes, and aspect ratios as given.
-    anchors = []
-    for sz in sizes:
-        for ratio in ratios:
-            w = np.sqrt(sz * sz / ratio)
-            h = ratio * w
-            anchors.append([-w, -h, w, h])
-    cell_anchors = np.asarray(anchors) * 0.5
-    field_size = int(np.ceil(max_size / stride))
-    shifts = (np.arange(0, field_size) * stride).astype("float32")
-    shift_x, shift_y = np.meshgrid(shifts, shifts)
-    shift_x = shift_x.flatten()
-    shift_y = shift_y.flatten()
-    shifts = np.vstack((shift_x, shift_y, shift_x, shift_y)).transpose()
-    # Kx4, K = field_size * field_size
-    K = shifts.shape[0]
-    A = cell_anchors.shape[0]
-    field_of_anchors = cell_anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))
-    field_of_anchors = field_of_anchors.reshape((field_size, field_size, A, 4))
-    # FSxFSxAx4
-    # Many rounding happens inside the anchor code anyway
-    # assert np.all(field_of_anchors == field_of_anchors.astype('int32'))
-    field_of_anchors = field_of_anchors.astype("float32")
-    return field_of_anchors
-@memoized
-def get_all_anchors_fpn(*, strides, sizes, ratios, max_size):
-    """
-    Returns:
-        [anchors]: each anchors is a SxSx NUM_ANCHOR_RATIOS x4 array.
-    """
-    assert len(strides) == len(sizes)
-    foas = []
-    for stride, size in zip(strides, sizes):
-        foa = get_all_anchors(stride=stride, sizes=(size,), ratios=ratios, max_size=max_size)
-        foas.append(foa)
-    return foas
 class TrainingDataPreprocessor:
    """
    The mapper to preprocess the input data for training.
@@ -248,6 +192,7 @@ class TrainingDataPreprocessor:
        featuremap_boxes = featuremap_boxes.reshape((anchorH, anchorW, num_anchor, 4))
        return featuremap_labels, featuremap_boxes
+    # TODO: can probably merge single-level logic with FPN logic to simplify code
    def get_multilevel_rpn_anchor_input(self, im, boxes, is_crowd):
        """
        Args:

--- a/examples/FasterRCNN/modeling/model_box.py
+++ b/examples/FasterRCNN/modeling/model_box.py
@@ -201,7 +201,8 @@ class RPNAnchors(namedtuple('_RPNAnchors', ['boxes', 'gt_labels', 'gt_boxes'])):
 if __name__ == '__main__':
    """
-    Demonstrate what's wrong with tf.image.crop_and_resize:
+    Demonstrate what's wrong with tf.image.crop_and_resize.
+    Also reported at https://github.com/tensorflow/tensorflow/issues/26278
    """
    import tensorflow.contrib.eager as tfe
    tfe.enable_eager_execution()

--- a/examples/FasterRCNN/modeling/model_fpn.py
+++ b/examples/FasterRCNN/modeling/model_fpn.py
@@ -9,12 +9,13 @@ from tensorpack.tfutils.argscope import argscope
 from tensorpack.tfutils.scope_utils import under_name_scope
 from tensorpack.tfutils.summary import add_moving_summary
 from tensorpack.tfutils.tower import get_current_tower_context
+from tensorpack.utils.argtools import memoized
 from config import config as cfg
 from utils.box_ops import area as tf_area
 from .backbone import GroupNorm
 from .model_box import roi_align
-from .model_rpn import generate_rpn_proposals, rpn_losses
+from .model_rpn import generate_rpn_proposals, rpn_losses, get_all_anchors
 @layer_register(log_shape=True)
@@ -217,3 +218,17 @@ def generate_fpn_proposals(
    tf.sigmoid(proposal_scores, name='probs')  # for visualization
    return tf.stop_gradient(proposal_boxes, name='boxes'), \
        tf.stop_gradient(proposal_scores, name='scores')
+@memoized
+def get_all_anchors_fpn(*, strides, sizes, ratios, max_size):
+    """
+    Returns:
+        [anchors]: each anchors is a SxSx NUM_ANCHOR_RATIOS x4 array.
+    """
+    assert len(strides) == len(sizes)
+    foas = []
+    for stride, size in zip(strides, sizes):
+        foa = get_all_anchors(stride=stride, sizes=(size,), ratios=ratios, max_size=max_size)
+        foas.append(foa)
+    return foas
--- a/examples/FasterRCNN/modeling/model_rpn.py
+++ b/examples/FasterRCNN/modeling/model_rpn.py
 # -*- coding: utf-8 -*-
 import tensorflow as tf
+import numpy as np
 from tensorpack.models import Conv2D, layer_register
 from tensorpack.tfutils.argscope import argscope
 from tensorpack.tfutils.scope_utils import auto_reuse_variable_scope, under_name_scope
 from tensorpack.tfutils.summary import add_moving_summary
+from tensorpack.utils.argtools import memoized
 from config import config as cfg
 from .model_box import clip_boxes
@@ -151,3 +153,47 @@ def generate_rpn_proposals(boxes, scores, img_shape,
    proposal_scores = tf.gather(topk_valid_scores, nms_indices)
    tf.sigmoid(proposal_scores, name='probs')  # for visualization
    return tf.stop_gradient(proposal_boxes, name='boxes'), tf.stop_gradient(proposal_scores, name='scores')
+@memoized
+def get_all_anchors(*, stride, sizes, ratios, max_size):
+    """
+    Get all anchors in the largest possible image, shifted, floatbox
+    Args:
+        stride (int): the stride of anchors.
+        sizes (tuple[int]): the sizes (sqrt area) of anchors
+        ratios (tuple[int]): the aspect ratios of anchors
+        max_size (int): maximum size of input image
+    Returns:
+        anchors: SxSxNUM_ANCHORx4, where S == ceil(MAX_SIZE/STRIDE), floatbox
+        The layout in the NUM_ANCHOR dim is NUM_RATIO x NUM_SIZE.
+    """
+    # Generates a NAx4 matrix of anchor boxes in (x1, y1, x2, y2) format. Anchors
+    # are centered on 0, have sqrt areas equal to the specified sizes, and aspect ratios as given.
+    anchors = []
+    for sz in sizes:
+        for ratio in ratios:
+            w = np.sqrt(sz * sz / ratio)
+            h = ratio * w
+            anchors.append([-w, -h, w, h])
+    cell_anchors = np.asarray(anchors) * 0.5
+    field_size = int(np.ceil(max_size / stride))
+    shifts = (np.arange(0, field_size) * stride).astype("float32")
+    shift_x, shift_y = np.meshgrid(shifts, shifts)
+    shift_x = shift_x.flatten()
+    shift_y = shift_y.flatten()
+    shifts = np.vstack((shift_x, shift_y, shift_x, shift_y)).transpose()
+    # Kx4, K = field_size * field_size
+    K = shifts.shape[0]
+    A = cell_anchors.shape[0]
+    field_of_anchors = cell_anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))
+    field_of_anchors = field_of_anchors.reshape((field_size, field_size, A, 4))
+    # FSxFSxAx4
+    # Many rounding happens inside the anchor code anyway
+    # assert np.all(field_of_anchors == field_of_anchors.astype('int32'))
+    field_of_anchors = field_of_anchors.astype("float32")
+    return field_of_anchors
--- a/tensorpack/tfutils/collection.py
+++ b/tensorpack/tfutils/collection.py
@@ -135,15 +135,15 @@ class CollectionGuard(object):
            if k in self._whitelist or k in self._freeze_keys:
                continue
            if k not in self.original:
-                newly_created.append(self._key_name(k))
+                newly_created.append((self._key_name(k), len(v)))
            else:
                old_v = self.original[k]
                if len(old_v) != len(v):
                    size_change.append((self._key_name(k), len(old_v), len(v)))
        if newly_created:
            logger.info(
-                "New collections created in tower {}: {}".format(
+                "New collections created in tower {}: ".format(self._name) +
-                    self._name, ', '.join(newly_created)))
+                ', '.join(["{} of size {}".format(key, size) for key, size in newly_created]))
        if size_change:
            logger.info(
                "Size of these collections were changed in {}: {}".format(

--- a/tensorpack/tfutils/sessinit.py
+++ b/tensorpack/tfutils/sessinit.py
@@ -196,13 +196,17 @@ class DictRestore(SessionInit):
    def _run_init(self, sess):
        variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
+        variable_names_list = [k.name for k in variables]
-        variable_names = set([k.name for k in variables])
+        variable_names = set(variable_names_list)
        param_names = set(six.iterkeys(self._prms))
-        intersect = variable_names & param_names
+        # intersect has the original ordering of variables
+        intersect = [v for v in variable_names_list if v in param_names]
-        logger.info("Variables to restore from dict: {}".format(', '.join(map(str, intersect))))
+        # use opname (without :0) for clarity in logging
+        logger.info("Variables to restore from dict: {}".format(
+            ', '.join(get_op_tensor_name(x)[0] for x in intersect)))
        mismatch = MismatchLogger('graph', 'dict')
        for k in sorted(variable_names - param_names):