Commit c5a47192 authored by Yuxin Wu's avatar Yuxin Wu

move RPN functions around to model_rpn; improve logging

parent df82c65a
......@@ -13,8 +13,10 @@ from tensorpack.dataflow import (
MultiProcessMapData, MultiThreadMapData, TestDataSpeed, imgaug,
)
from tensorpack.utils import logger
from tensorpack.utils.argtools import log_once, memoized
from tensorpack.utils.argtools import log_once
from modeling.model_rpn import get_all_anchors
from modeling.model_fpn import get_all_anchors_fpn
from common import (
CustomResize, DataFromListOfDict, box_to_point8,
filter_boxes_inside_shape, np_iou, point8_to_box, segmentation_to_mask,
......@@ -57,64 +59,6 @@ def print_class_histogram(roidbs):
logger.info("Ground-Truth category distribution:\n" + colored(table, "cyan"))
@memoized
def get_all_anchors(*, stride, sizes, ratios, max_size):
"""
Get all anchors in the largest possible image, shifted, floatbox
Args:
stride (int): the stride of anchors.
sizes (tuple[int]): the sizes (sqrt area) of anchors
ratios (tuple[int]): the aspect ratios of anchors
max_size (int): maximum size of input image
Returns:
anchors: SxSxNUM_ANCHORx4, where S == ceil(MAX_SIZE/STRIDE), floatbox
The layout in the NUM_ANCHOR dim is NUM_RATIO x NUM_SIZE.
"""
# Generates a NAx4 matrix of anchor boxes in (x1, y1, x2, y2) format. Anchors
# are centered on 0, have sqrt areas equal to the specified sizes, and aspect ratios as given.
anchors = []
for sz in sizes:
for ratio in ratios:
w = np.sqrt(sz * sz / ratio)
h = ratio * w
anchors.append([-w, -h, w, h])
cell_anchors = np.asarray(anchors) * 0.5
field_size = int(np.ceil(max_size / stride))
shifts = (np.arange(0, field_size) * stride).astype("float32")
shift_x, shift_y = np.meshgrid(shifts, shifts)
shift_x = shift_x.flatten()
shift_y = shift_y.flatten()
shifts = np.vstack((shift_x, shift_y, shift_x, shift_y)).transpose()
# Kx4, K = field_size * field_size
K = shifts.shape[0]
A = cell_anchors.shape[0]
field_of_anchors = cell_anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))
field_of_anchors = field_of_anchors.reshape((field_size, field_size, A, 4))
# FSxFSxAx4
# Many rounding happens inside the anchor code anyway
# assert np.all(field_of_anchors == field_of_anchors.astype('int32'))
field_of_anchors = field_of_anchors.astype("float32")
return field_of_anchors
@memoized
def get_all_anchors_fpn(*, strides, sizes, ratios, max_size):
"""
Returns:
[anchors]: each anchors is a SxSx NUM_ANCHOR_RATIOS x4 array.
"""
assert len(strides) == len(sizes)
foas = []
for stride, size in zip(strides, sizes):
foa = get_all_anchors(stride=stride, sizes=(size,), ratios=ratios, max_size=max_size)
foas.append(foa)
return foas
class TrainingDataPreprocessor:
"""
The mapper to preprocess the input data for training.
......@@ -248,6 +192,7 @@ class TrainingDataPreprocessor:
featuremap_boxes = featuremap_boxes.reshape((anchorH, anchorW, num_anchor, 4))
return featuremap_labels, featuremap_boxes
# TODO: can probably merge single-level logic with FPN logic to simplify code
def get_multilevel_rpn_anchor_input(self, im, boxes, is_crowd):
"""
Args:
......
......@@ -201,7 +201,8 @@ class RPNAnchors(namedtuple('_RPNAnchors', ['boxes', 'gt_labels', 'gt_boxes'])):
if __name__ == '__main__':
"""
Demonstrate what's wrong with tf.image.crop_and_resize:
Demonstrate what's wrong with tf.image.crop_and_resize.
Also reported at https://github.com/tensorflow/tensorflow/issues/26278
"""
import tensorflow.contrib.eager as tfe
tfe.enable_eager_execution()
......
......@@ -9,12 +9,13 @@ from tensorpack.tfutils.argscope import argscope
from tensorpack.tfutils.scope_utils import under_name_scope
from tensorpack.tfutils.summary import add_moving_summary
from tensorpack.tfutils.tower import get_current_tower_context
from tensorpack.utils.argtools import memoized
from config import config as cfg
from utils.box_ops import area as tf_area
from .backbone import GroupNorm
from .model_box import roi_align
from .model_rpn import generate_rpn_proposals, rpn_losses
from .model_rpn import generate_rpn_proposals, rpn_losses, get_all_anchors
@layer_register(log_shape=True)
......@@ -217,3 +218,17 @@ def generate_fpn_proposals(
tf.sigmoid(proposal_scores, name='probs') # for visualization
return tf.stop_gradient(proposal_boxes, name='boxes'), \
tf.stop_gradient(proposal_scores, name='scores')
@memoized
def get_all_anchors_fpn(*, strides, sizes, ratios, max_size):
"""
Returns:
[anchors]: each anchors is a SxSx NUM_ANCHOR_RATIOS x4 array.
"""
assert len(strides) == len(sizes)
foas = []
for stride, size in zip(strides, sizes):
foa = get_all_anchors(stride=stride, sizes=(size,), ratios=ratios, max_size=max_size)
foas.append(foa)
return foas
# -*- coding: utf-8 -*-
import tensorflow as tf
import numpy as np
from tensorpack.models import Conv2D, layer_register
from tensorpack.tfutils.argscope import argscope
from tensorpack.tfutils.scope_utils import auto_reuse_variable_scope, under_name_scope
from tensorpack.tfutils.summary import add_moving_summary
from tensorpack.utils.argtools import memoized
from config import config as cfg
from .model_box import clip_boxes
......@@ -151,3 +153,47 @@ def generate_rpn_proposals(boxes, scores, img_shape,
proposal_scores = tf.gather(topk_valid_scores, nms_indices)
tf.sigmoid(proposal_scores, name='probs') # for visualization
return tf.stop_gradient(proposal_boxes, name='boxes'), tf.stop_gradient(proposal_scores, name='scores')
@memoized
def get_all_anchors(*, stride, sizes, ratios, max_size):
"""
Get all anchors in the largest possible image, shifted, floatbox
Args:
stride (int): the stride of anchors.
sizes (tuple[int]): the sizes (sqrt area) of anchors
ratios (tuple[int]): the aspect ratios of anchors
max_size (int): maximum size of input image
Returns:
anchors: SxSxNUM_ANCHORx4, where S == ceil(MAX_SIZE/STRIDE), floatbox
The layout in the NUM_ANCHOR dim is NUM_RATIO x NUM_SIZE.
"""
# Generates a NAx4 matrix of anchor boxes in (x1, y1, x2, y2) format. Anchors
# are centered on 0, have sqrt areas equal to the specified sizes, and aspect ratios as given.
anchors = []
for sz in sizes:
for ratio in ratios:
w = np.sqrt(sz * sz / ratio)
h = ratio * w
anchors.append([-w, -h, w, h])
cell_anchors = np.asarray(anchors) * 0.5
field_size = int(np.ceil(max_size / stride))
shifts = (np.arange(0, field_size) * stride).astype("float32")
shift_x, shift_y = np.meshgrid(shifts, shifts)
shift_x = shift_x.flatten()
shift_y = shift_y.flatten()
shifts = np.vstack((shift_x, shift_y, shift_x, shift_y)).transpose()
# Kx4, K = field_size * field_size
K = shifts.shape[0]
A = cell_anchors.shape[0]
field_of_anchors = cell_anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))
field_of_anchors = field_of_anchors.reshape((field_size, field_size, A, 4))
# FSxFSxAx4
# Many rounding happens inside the anchor code anyway
# assert np.all(field_of_anchors == field_of_anchors.astype('int32'))
field_of_anchors = field_of_anchors.astype("float32")
return field_of_anchors
......@@ -135,15 +135,15 @@ class CollectionGuard(object):
if k in self._whitelist or k in self._freeze_keys:
continue
if k not in self.original:
newly_created.append(self._key_name(k))
newly_created.append((self._key_name(k), len(v)))
else:
old_v = self.original[k]
if len(old_v) != len(v):
size_change.append((self._key_name(k), len(old_v), len(v)))
if newly_created:
logger.info(
"New collections created in tower {}: {}".format(
self._name, ', '.join(newly_created)))
"New collections created in tower {}: ".format(self._name) +
', '.join(["{} of size {}".format(key, size) for key, size in newly_created]))
if size_change:
logger.info(
"Size of these collections were changed in {}: {}".format(
......
......@@ -196,13 +196,17 @@ class DictRestore(SessionInit):
def _run_init(self, sess):
variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
variable_names_list = [k.name for k in variables]
variable_names = set([k.name for k in variables])
variable_names = set(variable_names_list)
param_names = set(six.iterkeys(self._prms))
intersect = variable_names & param_names
# intersect has the original ordering of variables
intersect = [v for v in variable_names_list if v in param_names]
logger.info("Variables to restore from dict: {}".format(', '.join(map(str, intersect))))
# use opname (without :0) for clarity in logging
logger.info("Variables to restore from dict: {}".format(
', '.join(get_op_tensor_name(x)[0] for x in intersect)))
mismatch = MismatchLogger('graph', 'dict')
for k in sorted(variable_names - param_names):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment