Commit c5a47192 authored by Yuxin Wu's avatar Yuxin Wu

move RPN functions around to model_rpn; improve logging

parent df82c65a
...@@ -13,8 +13,10 @@ from tensorpack.dataflow import ( ...@@ -13,8 +13,10 @@ from tensorpack.dataflow import (
MultiProcessMapData, MultiThreadMapData, TestDataSpeed, imgaug, MultiProcessMapData, MultiThreadMapData, TestDataSpeed, imgaug,
) )
from tensorpack.utils import logger from tensorpack.utils import logger
from tensorpack.utils.argtools import log_once, memoized from tensorpack.utils.argtools import log_once
from modeling.model_rpn import get_all_anchors
from modeling.model_fpn import get_all_anchors_fpn
from common import ( from common import (
CustomResize, DataFromListOfDict, box_to_point8, CustomResize, DataFromListOfDict, box_to_point8,
filter_boxes_inside_shape, np_iou, point8_to_box, segmentation_to_mask, filter_boxes_inside_shape, np_iou, point8_to_box, segmentation_to_mask,
...@@ -57,64 +59,6 @@ def print_class_histogram(roidbs): ...@@ -57,64 +59,6 @@ def print_class_histogram(roidbs):
logger.info("Ground-Truth category distribution:\n" + colored(table, "cyan")) logger.info("Ground-Truth category distribution:\n" + colored(table, "cyan"))
@memoized
def get_all_anchors(*, stride, sizes, ratios, max_size):
"""
Get all anchors in the largest possible image, shifted, floatbox
Args:
stride (int): the stride of anchors.
sizes (tuple[int]): the sizes (sqrt area) of anchors
ratios (tuple[int]): the aspect ratios of anchors
max_size (int): maximum size of input image
Returns:
anchors: SxSxNUM_ANCHORx4, where S == ceil(MAX_SIZE/STRIDE), floatbox
The layout in the NUM_ANCHOR dim is NUM_RATIO x NUM_SIZE.
"""
# Generates a NAx4 matrix of anchor boxes in (x1, y1, x2, y2) format. Anchors
# are centered on 0, have sqrt areas equal to the specified sizes, and aspect ratios as given.
anchors = []
for sz in sizes:
for ratio in ratios:
w = np.sqrt(sz * sz / ratio)
h = ratio * w
anchors.append([-w, -h, w, h])
cell_anchors = np.asarray(anchors) * 0.5
field_size = int(np.ceil(max_size / stride))
shifts = (np.arange(0, field_size) * stride).astype("float32")
shift_x, shift_y = np.meshgrid(shifts, shifts)
shift_x = shift_x.flatten()
shift_y = shift_y.flatten()
shifts = np.vstack((shift_x, shift_y, shift_x, shift_y)).transpose()
# Kx4, K = field_size * field_size
K = shifts.shape[0]
A = cell_anchors.shape[0]
field_of_anchors = cell_anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))
field_of_anchors = field_of_anchors.reshape((field_size, field_size, A, 4))
# FSxFSxAx4
# Many rounding happens inside the anchor code anyway
# assert np.all(field_of_anchors == field_of_anchors.astype('int32'))
field_of_anchors = field_of_anchors.astype("float32")
return field_of_anchors
@memoized
def get_all_anchors_fpn(*, strides, sizes, ratios, max_size):
"""
Returns:
[anchors]: each anchors is a SxSx NUM_ANCHOR_RATIOS x4 array.
"""
assert len(strides) == len(sizes)
foas = []
for stride, size in zip(strides, sizes):
foa = get_all_anchors(stride=stride, sizes=(size,), ratios=ratios, max_size=max_size)
foas.append(foa)
return foas
class TrainingDataPreprocessor: class TrainingDataPreprocessor:
""" """
The mapper to preprocess the input data for training. The mapper to preprocess the input data for training.
...@@ -248,6 +192,7 @@ class TrainingDataPreprocessor: ...@@ -248,6 +192,7 @@ class TrainingDataPreprocessor:
featuremap_boxes = featuremap_boxes.reshape((anchorH, anchorW, num_anchor, 4)) featuremap_boxes = featuremap_boxes.reshape((anchorH, anchorW, num_anchor, 4))
return featuremap_labels, featuremap_boxes return featuremap_labels, featuremap_boxes
# TODO: can probably merge single-level logic with FPN logic to simplify code
def get_multilevel_rpn_anchor_input(self, im, boxes, is_crowd): def get_multilevel_rpn_anchor_input(self, im, boxes, is_crowd):
""" """
Args: Args:
......
...@@ -201,7 +201,8 @@ class RPNAnchors(namedtuple('_RPNAnchors', ['boxes', 'gt_labels', 'gt_boxes'])): ...@@ -201,7 +201,8 @@ class RPNAnchors(namedtuple('_RPNAnchors', ['boxes', 'gt_labels', 'gt_boxes'])):
if __name__ == '__main__': if __name__ == '__main__':
""" """
Demonstrate what's wrong with tf.image.crop_and_resize: Demonstrate what's wrong with tf.image.crop_and_resize.
Also reported at https://github.com/tensorflow/tensorflow/issues/26278
""" """
import tensorflow.contrib.eager as tfe import tensorflow.contrib.eager as tfe
tfe.enable_eager_execution() tfe.enable_eager_execution()
......
...@@ -9,12 +9,13 @@ from tensorpack.tfutils.argscope import argscope ...@@ -9,12 +9,13 @@ from tensorpack.tfutils.argscope import argscope
from tensorpack.tfutils.scope_utils import under_name_scope from tensorpack.tfutils.scope_utils import under_name_scope
from tensorpack.tfutils.summary import add_moving_summary from tensorpack.tfutils.summary import add_moving_summary
from tensorpack.tfutils.tower import get_current_tower_context from tensorpack.tfutils.tower import get_current_tower_context
from tensorpack.utils.argtools import memoized
from config import config as cfg from config import config as cfg
from utils.box_ops import area as tf_area from utils.box_ops import area as tf_area
from .backbone import GroupNorm from .backbone import GroupNorm
from .model_box import roi_align from .model_box import roi_align
from .model_rpn import generate_rpn_proposals, rpn_losses from .model_rpn import generate_rpn_proposals, rpn_losses, get_all_anchors
@layer_register(log_shape=True) @layer_register(log_shape=True)
...@@ -217,3 +218,17 @@ def generate_fpn_proposals( ...@@ -217,3 +218,17 @@ def generate_fpn_proposals(
tf.sigmoid(proposal_scores, name='probs') # for visualization tf.sigmoid(proposal_scores, name='probs') # for visualization
return tf.stop_gradient(proposal_boxes, name='boxes'), \ return tf.stop_gradient(proposal_boxes, name='boxes'), \
tf.stop_gradient(proposal_scores, name='scores') tf.stop_gradient(proposal_scores, name='scores')
@memoized
def get_all_anchors_fpn(*, strides, sizes, ratios, max_size):
"""
Returns:
[anchors]: each anchors is a SxSx NUM_ANCHOR_RATIOS x4 array.
"""
assert len(strides) == len(sizes)
foas = []
for stride, size in zip(strides, sizes):
foa = get_all_anchors(stride=stride, sizes=(size,), ratios=ratios, max_size=max_size)
foas.append(foa)
return foas
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import tensorflow as tf import tensorflow as tf
import numpy as np
from tensorpack.models import Conv2D, layer_register from tensorpack.models import Conv2D, layer_register
from tensorpack.tfutils.argscope import argscope from tensorpack.tfutils.argscope import argscope
from tensorpack.tfutils.scope_utils import auto_reuse_variable_scope, under_name_scope from tensorpack.tfutils.scope_utils import auto_reuse_variable_scope, under_name_scope
from tensorpack.tfutils.summary import add_moving_summary from tensorpack.tfutils.summary import add_moving_summary
from tensorpack.utils.argtools import memoized
from config import config as cfg from config import config as cfg
from .model_box import clip_boxes from .model_box import clip_boxes
...@@ -151,3 +153,47 @@ def generate_rpn_proposals(boxes, scores, img_shape, ...@@ -151,3 +153,47 @@ def generate_rpn_proposals(boxes, scores, img_shape,
proposal_scores = tf.gather(topk_valid_scores, nms_indices) proposal_scores = tf.gather(topk_valid_scores, nms_indices)
tf.sigmoid(proposal_scores, name='probs') # for visualization tf.sigmoid(proposal_scores, name='probs') # for visualization
return tf.stop_gradient(proposal_boxes, name='boxes'), tf.stop_gradient(proposal_scores, name='scores') return tf.stop_gradient(proposal_boxes, name='boxes'), tf.stop_gradient(proposal_scores, name='scores')
@memoized
def get_all_anchors(*, stride, sizes, ratios, max_size):
"""
Get all anchors in the largest possible image, shifted, floatbox
Args:
stride (int): the stride of anchors.
sizes (tuple[int]): the sizes (sqrt area) of anchors
ratios (tuple[int]): the aspect ratios of anchors
max_size (int): maximum size of input image
Returns:
anchors: SxSxNUM_ANCHORx4, where S == ceil(MAX_SIZE/STRIDE), floatbox
The layout in the NUM_ANCHOR dim is NUM_RATIO x NUM_SIZE.
"""
# Generates a NAx4 matrix of anchor boxes in (x1, y1, x2, y2) format. Anchors
# are centered on 0, have sqrt areas equal to the specified sizes, and aspect ratios as given.
anchors = []
for sz in sizes:
for ratio in ratios:
w = np.sqrt(sz * sz / ratio)
h = ratio * w
anchors.append([-w, -h, w, h])
cell_anchors = np.asarray(anchors) * 0.5
field_size = int(np.ceil(max_size / stride))
shifts = (np.arange(0, field_size) * stride).astype("float32")
shift_x, shift_y = np.meshgrid(shifts, shifts)
shift_x = shift_x.flatten()
shift_y = shift_y.flatten()
shifts = np.vstack((shift_x, shift_y, shift_x, shift_y)).transpose()
# Kx4, K = field_size * field_size
K = shifts.shape[0]
A = cell_anchors.shape[0]
field_of_anchors = cell_anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))
field_of_anchors = field_of_anchors.reshape((field_size, field_size, A, 4))
# FSxFSxAx4
# Many rounding happens inside the anchor code anyway
# assert np.all(field_of_anchors == field_of_anchors.astype('int32'))
field_of_anchors = field_of_anchors.astype("float32")
return field_of_anchors
...@@ -135,15 +135,15 @@ class CollectionGuard(object): ...@@ -135,15 +135,15 @@ class CollectionGuard(object):
if k in self._whitelist or k in self._freeze_keys: if k in self._whitelist or k in self._freeze_keys:
continue continue
if k not in self.original: if k not in self.original:
newly_created.append(self._key_name(k)) newly_created.append((self._key_name(k), len(v)))
else: else:
old_v = self.original[k] old_v = self.original[k]
if len(old_v) != len(v): if len(old_v) != len(v):
size_change.append((self._key_name(k), len(old_v), len(v))) size_change.append((self._key_name(k), len(old_v), len(v)))
if newly_created: if newly_created:
logger.info( logger.info(
"New collections created in tower {}: {}".format( "New collections created in tower {}: ".format(self._name) +
self._name, ', '.join(newly_created))) ', '.join(["{} of size {}".format(key, size) for key, size in newly_created]))
if size_change: if size_change:
logger.info( logger.info(
"Size of these collections were changed in {}: {}".format( "Size of these collections were changed in {}: {}".format(
......
...@@ -196,13 +196,17 @@ class DictRestore(SessionInit): ...@@ -196,13 +196,17 @@ class DictRestore(SessionInit):
def _run_init(self, sess): def _run_init(self, sess):
variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
variable_names_list = [k.name for k in variables]
variable_names = set([k.name for k in variables]) variable_names = set(variable_names_list)
param_names = set(six.iterkeys(self._prms)) param_names = set(six.iterkeys(self._prms))
intersect = variable_names & param_names # intersect has the original ordering of variables
intersect = [v for v in variable_names_list if v in param_names]
logger.info("Variables to restore from dict: {}".format(', '.join(map(str, intersect)))) # use opname (without :0) for clarity in logging
logger.info("Variables to restore from dict: {}".format(
', '.join(get_op_tensor_name(x)[0] for x in intersect)))
mismatch = MismatchLogger('graph', 'dict') mismatch = MismatchLogger('graph', 'dict')
for k in sorted(variable_names - param_names): for k in sorted(variable_names - param_names):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment