Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
S
seminar-breakout
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Shashank Suhas
seminar-breakout
Commits
880b767d
Commit
880b767d
authored
Feb 18, 2017
by
Yuxin Wu
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add NCHW for Conv2D,Pooling and BN. fix #150
parent
4be41492
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
93 additions
and
72 deletions
+93
-72
examples/ResNet/cifar10-resnet.py
examples/ResNet/cifar10-resnet.py
+9
-6
tensorpack/models/batch_norm.py
tensorpack/models/batch_norm.py
+35
-25
tensorpack/models/conv2d.py
tensorpack/models/conv2d.py
+12
-10
tensorpack/models/pool.py
tensorpack/models/pool.py
+28
-27
tensorpack/utils/argtools.py
tensorpack/utils/argtools.py
+9
-4
No files found.
examples/ResNet/cifar10-resnet.py
View file @
880b767d
...
...
@@ -21,9 +21,9 @@ This implementation uses the variants proposed in:
Identity Mappings in Deep Residual Networks, arxiv:1603.05027
I can reproduce the results on 2 TitanX for
n=5, about 7.1
%
val error after 67k steps (
15
step/s)
n=18, about 5.95
%
val error after 80k steps (
4.2
step/s)
n=30: a 182-layer network, about 5.6
%
val error after 51k steps (
2.5
step/s)
n=5, about 7.1
%
val error after 67k steps (
20.4
step/s)
n=18, about 5.95
%
val error after 80k steps (
5.6
step/s)
n=30: a 182-layer network, about 5.6
%
val error after 51k steps (
3.4
step/s)
This model uses the whole training set instead of a train-val split.
To train:
...
...
@@ -47,10 +47,11 @@ class Model(ModelDesc):
def
_build_graph
(
self
,
inputs
):
image
,
label
=
inputs
image
=
image
/
128.0
-
1
image
=
tf
.
transpose
(
image
,
[
0
,
3
,
1
,
2
])
def
residual
(
name
,
l
,
increase_dim
=
False
,
first
=
False
):
shape
=
l
.
get_shape
()
.
as_list
()
in_channel
=
shape
[
3
]
in_channel
=
shape
[
1
]
if
increase_dim
:
out_channel
=
in_channel
*
2
...
...
@@ -65,12 +66,14 @@ class Model(ModelDesc):
c2
=
Conv2D
(
'conv2'
,
c1
,
out_channel
)
if
increase_dim
:
l
=
AvgPooling
(
'pool'
,
l
,
2
)
l
=
tf
.
pad
(
l
,
[[
0
,
0
],
[
0
,
0
],
[
0
,
0
],
[
in_channel
//
2
,
in_channel
//
2
]])
l
=
tf
.
pad
(
l
,
[[
0
,
0
],
[
in_channel
//
2
,
in_channel
//
2
],
[
0
,
0
],
[
0
,
0
]])
l
=
c2
+
l
return
l
with
argscope
(
Conv2D
,
nl
=
tf
.
identity
,
use_bias
=
False
,
kernel_shape
=
3
,
with
argscope
([
Conv2D
,
AvgPooling
,
BatchNorm
,
GlobalAvgPooling
],
data_format
=
'NCHW'
),
\
argscope
(
Conv2D
,
nl
=
tf
.
identity
,
use_bias
=
False
,
kernel_shape
=
3
,
W_init
=
variance_scaling_initializer
(
mode
=
'FAN_OUT'
)):
l
=
Conv2D
(
'conv0'
,
image
,
16
,
nl
=
BNReLU
)
l
=
residual
(
'res1.0'
,
l
,
first
=
True
)
...
...
tensorpack/models/batch_norm.py
View file @
880b767d
...
...
@@ -52,7 +52,7 @@ def BatchNormV1(x, use_local_stat=None, decay=0.9, epsilon=1e-5):
with
tf
.
variable_scope
(
tf
.
get_variable_scope
(),
reuse
=
False
):
# BatchNorm in reuse scope can be tricky! Moving mean/variance are not reused
with
tf
.
name_scope
(
None
):
# https://github.com/tensorflow/tensorflow/issues/2740
#
TODO
if reuse=True, try to find and use the existing statistics
# if reuse=True, try to find and use the existing statistics
# how to use multiple tensors to update one EMA? seems impossbile
ema
=
tf
.
train
.
ExponentialMovingAverage
(
decay
=
decay
,
name
=
emaname
)
ema_apply_op
=
ema
.
apply
([
batch_mean
,
batch_var
])
...
...
@@ -71,7 +71,7 @@ def BatchNormV1(x, use_local_stat=None, decay=0.9, epsilon=1e-5):
var_var_name
=
ema
.
average_name
(
batch_var
)
if
ctx
.
is_main_tower
:
# main tower, but needs to use global stat. global stat must be from outside
#
TODO
when reuse=True, the desired variable name could
# when reuse=True, the desired variable name could
# actually be different, because a different var is created
# for different reuse tower
ema_mean
=
tf
.
get_variable
(
'mean/'
+
emaname
,
[
n_out
])
...
...
@@ -96,14 +96,7 @@ def BatchNormV1(x, use_local_stat=None, decay=0.9, epsilon=1e-5):
x
,
ema_mean
,
ema_var
,
beta
,
gamma
,
epsilon
,
'output'
)
def
get_bn_variables
(
x
,
use_scale
,
use_bias
):
shape
=
x
.
get_shape
()
.
as_list
()
assert
len
(
shape
)
in
[
2
,
4
]
n_out
=
shape
[
-
1
]
# channel
assert
n_out
is
not
None
,
"Input to BatchNorm cannot have unknown channels!"
if
len
(
shape
)
==
2
:
x
=
tf
.
reshape
(
x
,
[
-
1
,
1
,
1
,
n_out
])
def
get_bn_variables
(
n_out
,
use_scale
,
use_bias
):
if
use_bias
:
beta
=
tf
.
get_variable
(
'beta'
,
[
n_out
],
initializer
=
tf
.
constant_initializer
())
else
:
...
...
@@ -118,11 +111,10 @@ def get_bn_variables(x, use_scale, use_bias):
initializer
=
tf
.
constant_initializer
(),
trainable
=
False
)
moving_var
=
tf
.
get_variable
(
'variance/EMA'
,
[
n_out
],
initializer
=
tf
.
constant_initializer
(),
trainable
=
False
)
return
x
,
beta
,
gamma
,
moving_mean
,
moving_var
return
beta
,
gamma
,
moving_mean
,
moving_var
def
update_bn_ema
(
xn
,
batch_mean
,
batch_var
,
moving_mean
,
moving_var
,
decay
):
# TODO update it later (similar to slim) might be faster?
# TODO is there a way to use zero_debias in multi-GPU?
update_op1
=
moving_averages
.
assign_moving_average
(
moving_mean
,
batch_mean
,
decay
,
zero_debias
=
False
,
...
...
@@ -138,7 +130,7 @@ def update_bn_ema(xn, batch_mean, batch_var, moving_mean, moving_var, decay):
@
layer_register
(
log_shape
=
False
)
def
BatchNorm
(
x
,
use_local_stat
=
None
,
decay
=
0.9
,
epsilon
=
1e-5
,
use_scale
=
True
,
use_bias
=
True
):
use_scale
=
True
,
use_bias
=
True
,
data_format
=
'NHWC'
):
"""
Batch Normalization layer, as described in the paper:
`Batch Normalization: Accelerating Deep Network Training by
...
...
@@ -171,7 +163,18 @@ def BatchNorm(x, use_local_stat=None, decay=0.9, epsilon=1e-5,
with the official inceptionv3 example).
"""
shape
=
x
.
get_shape
()
.
as_list
()
x
,
beta
,
gamma
,
moving_mean
,
moving_var
=
get_bn_variables
(
x
,
use_scale
,
use_bias
)
assert
len
(
shape
)
in
[
2
,
4
]
if
len
(
shape
)
==
2
:
data_format
=
'NCHW'
if
data_format
==
'NCHW'
:
n_out
=
shape
[
1
]
else
:
n_out
=
shape
[
-
1
]
# channel
if
len
(
shape
)
==
2
:
x
=
tf
.
reshape
(
x
,
[
-
1
,
n_out
,
1
,
1
])
assert
n_out
is
not
None
,
"Input to BatchNorm cannot have unknown channels!"
beta
,
gamma
,
moving_mean
,
moving_var
=
get_bn_variables
(
n_out
,
use_scale
,
use_bias
)
ctx
=
get_current_tower_context
()
if
use_local_stat
is
None
:
...
...
@@ -182,22 +185,25 @@ def BatchNorm(x, use_local_stat=None, decay=0.9, epsilon=1e-5,
logger
.
warn
(
"[BatchNorm] use_local_stat != is_training"
)
if
use_local_stat
:
xn
,
batch_mean
,
batch_var
=
tf
.
nn
.
fused_batch_norm
(
x
,
gamma
,
beta
,
epsilon
=
epsilon
,
is_training
=
True
)
xn
,
batch_mean
,
batch_var
=
tf
.
nn
.
fused_batch_norm
(
x
,
gamma
,
beta
,
epsilon
=
epsilon
,
is_training
=
True
,
data_format
=
data_format
)
else
:
assert
not
ctx
.
is_training
,
"In training, local statistics has to be used!"
# fused seems slower in inference
# xn, _, _ = tf.nn.fused_batch_norm(x, gamma, beta,
# moving_mean, moving_var,
# epsilon=epsilon, is_training=False, name='output')
xn
=
tf
.
nn
.
batch_normalization
(
x
,
moving_mean
,
moving_var
,
beta
,
gamma
,
epsilon
)
if
data_format
==
'NCHW'
:
# fused is slower in inference, but support NCHW
xn
,
_
,
_
=
tf
.
nn
.
fused_batch_norm
(
x
,
gamma
,
beta
,
moving_mean
,
moving_var
,
epsilon
=
epsilon
,
is_training
=
False
,
data_format
=
data_format
)
else
:
xn
=
tf
.
nn
.
batch_normalization
(
x
,
moving_mean
,
moving_var
,
beta
,
gamma
,
epsilon
)
if
len
(
shape
)
==
2
:
xn
=
tf
.
squeeze
(
xn
,
[
1
,
2
])
axis
=
[
2
,
3
]
if
data_format
==
'NCHW'
else
[
1
,
2
]
xn
=
tf
.
squeeze
(
xn
,
axis
)
# maintain EMA only on one GPU.
# TODO the first GPU already has too many work, might be faster to update it on a different GPU
if
ctx
.
is_main_training_tower
:
return
update_bn_ema
(
xn
,
batch_mean
,
batch_var
,
moving_mean
,
moving_var
,
decay
)
else
:
...
...
@@ -231,7 +237,11 @@ def BatchRenorm(x, rmax, dmax, decay=0.9, epsilon=1e-5,
"""
shape
=
x
.
get_shape
()
.
as_list
()
x
,
beta
,
gamma
,
moving_mean
,
moving_var
=
get_bn_variables
(
x
,
use_scale
,
use_bias
)
assert
len
(
shape
)
in
[
2
,
4
]
n_out
=
shape
[
-
1
]
if
len
(
shape
)
==
2
:
x
=
tf
.
reshape
(
x
,
[
-
1
,
1
,
1
,
n_out
])
beta
,
gamma
,
moving_mean
,
moving_var
=
get_bn_variables
(
n_out
,
use_scale
,
use_bias
)
ctx
=
get_current_tower_context
()
use_local_stat
=
ctx
.
is_training
...
...
tensorpack/models/conv2d.py
View file @
880b767d
...
...
@@ -14,12 +14,13 @@ __all__ = ['Conv2D', 'Deconv2D']
def
Conv2D
(
x
,
out_channel
,
kernel_shape
,
padding
=
'SAME'
,
stride
=
1
,
W_init
=
None
,
b_init
=
None
,
nl
=
tf
.
identity
,
split
=
1
,
use_bias
=
True
):
nl
=
tf
.
identity
,
split
=
1
,
use_bias
=
True
,
data_format
=
'NHWC'
):
"""
2D convolution on 4D inputs.
Args:
x (tf.Tensor): a
tensor of shape NHWC
.
x (tf.Tensor): a
4D tensor
.
Must have known number of channels, but can have other unknown dimensions.
out_channel (int): number of output channel.
kernel_shape: (h, w) tuple or a int.
...
...
@@ -32,7 +33,7 @@ def Conv2D(x, out_channel, kernel_shape,
use_bias (bool): whether to use bias.
Returns:
tf.Tensor
: a NHWC tensor
named ``output``.
tf.Tensor named ``output``.
Variable Names:
...
...
@@ -40,7 +41,8 @@ def Conv2D(x, out_channel, kernel_shape,
* ``b``: bias
"""
in_shape
=
x
.
get_shape
()
.
as_list
()
in_channel
=
in_shape
[
-
1
]
channel_axis
=
3
if
data_format
==
'NHWC'
else
1
in_channel
=
in_shape
[
channel_axis
]
assert
in_channel
is
not
None
,
"[Conv2D] Input cannot have unknown channel!"
assert
in_channel
%
split
==
0
assert
out_channel
%
split
==
0
...
...
@@ -48,7 +50,7 @@ def Conv2D(x, out_channel, kernel_shape,
kernel_shape
=
shape2d
(
kernel_shape
)
padding
=
padding
.
upper
()
filter_shape
=
kernel_shape
+
[
in_channel
/
split
,
out_channel
]
stride
=
shape4d
(
stride
)
stride
=
shape4d
(
stride
,
data_format
=
data_format
)
if
W_init
is
None
:
W_init
=
tf
.
contrib
.
layers
.
variance_scaling_initializer
()
...
...
@@ -60,14 +62,14 @@ def Conv2D(x, out_channel, kernel_shape,
b
=
tf
.
get_variable
(
'b'
,
[
out_channel
],
initializer
=
b_init
)
if
split
==
1
:
conv
=
tf
.
nn
.
conv2d
(
x
,
W
,
stride
,
padding
)
conv
=
tf
.
nn
.
conv2d
(
x
,
W
,
stride
,
padding
,
data_format
=
data_format
)
else
:
inputs
=
tf
.
split
(
x
,
split
,
3
)
inputs
=
tf
.
split
(
x
,
split
,
channel_axis
)
kernels
=
tf
.
split
(
W
,
split
,
3
)
outputs
=
[
tf
.
nn
.
conv2d
(
i
,
k
,
stride
,
padding
)
outputs
=
[
tf
.
nn
.
conv2d
(
i
,
k
,
stride
,
padding
,
data_format
=
data_format
)
for
i
,
k
in
zip
(
inputs
,
kernels
)]
conv
=
tf
.
concat
(
outputs
,
3
)
return
nl
(
tf
.
nn
.
bias_add
(
conv
,
b
)
if
use_bias
else
conv
,
name
=
'output'
)
conv
=
tf
.
concat
(
outputs
,
channel_axis
)
return
nl
(
tf
.
nn
.
bias_add
(
conv
,
b
,
data_format
=
data_format
)
if
use_bias
else
conv
,
name
=
'output'
)
class
StaticDynamicShape
(
object
):
...
...
tensorpack/models/pool.py
View file @
880b767d
...
...
@@ -14,59 +14,58 @@ __all__ = ['MaxPooling', 'FixedUnPooling', 'AvgPooling', 'GlobalAvgPooling',
'BilinearUpSample'
]
def
_Pooling
(
func
,
x
,
shape
,
stride
,
padding
,
data_format
):
padding
=
padding
.
upper
()
shape
=
shape4d
(
shape
,
data_format
=
data_format
)
if
stride
is
None
:
stride
=
shape
else
:
stride
=
shape4d
(
stride
,
data_format
=
data_format
)
return
func
(
x
,
ksize
=
shape
,
strides
=
stride
,
padding
=
padding
,
data_format
=
data_format
,
name
=
'output'
)
@
layer_register
()
def
MaxPooling
(
x
,
shape
,
stride
=
None
,
padding
=
'VALID'
):
def
MaxPooling
(
x
,
shape
,
stride
=
None
,
padding
=
'VALID'
,
data_format
=
'NHWC'
):
"""
Max Pooling on 4D tensors.
Args:
x (tf.Tensor): a
NHWC
tensor.
x (tf.Tensor): a
4D
tensor.
shape: int or (h, w) tuple
stride: int or (h, w) tuple. Defaults to be the same as shape.
padding (str): 'valid' or 'same'.
Returns:
tf.Tensor
: a NHWC tensor
named ``output``.
tf.Tensor named ``output``.
"""
padding
=
padding
.
upper
()
shape
=
shape4d
(
shape
)
if
stride
is
None
:
stride
=
shape
else
:
stride
=
shape4d
(
stride
)
return
tf
.
nn
.
max_pool
(
x
,
ksize
=
shape
,
strides
=
stride
,
padding
=
padding
,
name
=
'output'
)
return
_Pooling
(
tf
.
nn
.
max_pool
,
x
,
shape
,
stride
,
padding
,
data_format
=
data_format
)
@
layer_register
()
def
AvgPooling
(
x
,
shape
,
stride
=
None
,
padding
=
'VALID'
):
def
AvgPooling
(
x
,
shape
,
stride
=
None
,
padding
=
'VALID'
,
data_format
=
'NHWC'
):
"""
Average Pooling on 4D tensors.
Args:
x (tf.Tensor): a
NHWC
tensor.
x (tf.Tensor): a
4D
tensor.
shape: int or (h, w) tuple
stride: int or (h, w) tuple. Defaults to be the same as shape.
padding (str): 'valid' or 'same'.
Returns:
tf.Tensor
: a NHWC tensor
named ``output``.
tf.Tensor named ``output``.
"""
padding
=
padding
.
upper
()
shape
=
shape4d
(
shape
)
if
stride
is
None
:
stride
=
shape
else
:
stride
=
shape4d
(
stride
)
return
tf
.
nn
.
avg_pool
(
x
,
ksize
=
shape
,
strides
=
stride
,
padding
=
padding
,
name
=
'output'
)
return
_Pooling
(
tf
.
nn
.
avg_pool
,
x
,
shape
,
stride
,
padding
,
data_format
=
data_format
)
@
layer_register
()
def
GlobalAvgPooling
(
x
):
def
GlobalAvgPooling
(
x
,
data_format
=
'NHWC'
):
"""
Global average pooling as in the paper `Network In Network
<http://arxiv.org/abs/1312.4400>`_.
...
...
@@ -77,7 +76,9 @@ def GlobalAvgPooling(x):
tf.Tensor: a NC tensor named ``output``.
"""
assert
x
.
get_shape
()
.
ndims
==
4
return
tf
.
reduce_mean
(
x
,
[
1
,
2
],
name
=
'output'
)
assert
data_format
in
[
'NHWC'
,
'NCHW'
]
axis
=
[
1
,
2
]
if
data_format
==
'NHWC'
else
[
2
,
3
]
return
tf
.
reduce_mean
(
x
,
axis
,
name
=
'output'
)
def
UnPooling2x2ZeroFilled
(
x
):
...
...
tensorpack/utils/argtools.py
View file @
880b767d
...
...
@@ -86,17 +86,22 @@ def shape2d(a):
raise
RuntimeError
(
"Illegal shape: {}"
.
format
(
a
))
def
shape4d
(
a
):
def
shape4d
(
a
,
data_format
=
'NHWC'
):
"""
Ensuer a 4D shape, to use with
NHWC
functions.
Ensuer a 4D shape, to use with
4D symbolic
functions.
Args:
a: a int or tuple/list of length 2
Returns:
list: of length 4. if ``a`` is a int, return ``[1, a, a, 1]``.
list: of length 4. if ``a`` is a int, return ``[1, a, a, 1]`` or ``[1,
1, a, a]`` depending on data_format.
"""
return
[
1
]
+
shape2d
(
a
)
+
[
1
]
s2d
=
shape2d
(
a
)
if
data_format
==
'NHWC'
:
return
[
1
]
+
s2d
+
[
1
]
else
:
return
[
1
,
1
]
+
s2d
@
memoized
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment